|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 5000, |
|
"global_step": 20074, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004981568197668626, |
|
"grad_norm": 81.31034088134766, |
|
"learning_rate": 1.9999863931243543e-05, |
|
"logits/chosen": -19.35576057434082, |
|
"logits/rejected": -19.391923904418945, |
|
"logps/chosen": -488.51171875, |
|
"logps/rejected": -382.52825927734375, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.4239501953125, |
|
"rewards/margins": 1.4092838764190674, |
|
"rewards/rejected": -0.9853336215019226, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009963136395337252, |
|
"grad_norm": 27.6790828704834, |
|
"learning_rate": 1.9999455728677112e-05, |
|
"logits/chosen": -18.520322799682617, |
|
"logits/rejected": -18.58489227294922, |
|
"logps/chosen": -502.153564453125, |
|
"logps/rejected": -427.2685241699219, |
|
"loss": 1.088, |
|
"rewards/accuracies": 0.4699999988079071, |
|
"rewards/chosen": 1.2840694189071655, |
|
"rewards/margins": -0.12594786286354065, |
|
"rewards/rejected": 1.4100172519683838, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.014944704593005878, |
|
"grad_norm": 14.743182182312012, |
|
"learning_rate": 1.999877540340943e-05, |
|
"logits/chosen": -18.121265411376953, |
|
"logits/rejected": -17.966760635375977, |
|
"logps/chosen": -480.9696960449219, |
|
"logps/rejected": -391.3818359375, |
|
"loss": 0.846, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.156686305999756, |
|
"rewards/margins": 0.4148028492927551, |
|
"rewards/rejected": 1.7418835163116455, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.019926272790674503, |
|
"grad_norm": 0.3787066340446472, |
|
"learning_rate": 1.99978229739547e-05, |
|
"logits/chosen": -18.205398559570312, |
|
"logits/rejected": -18.042299270629883, |
|
"logps/chosen": -502.7016296386719, |
|
"logps/rejected": -388.99835205078125, |
|
"loss": 0.7988, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 2.600520372390747, |
|
"rewards/margins": 0.4970521926879883, |
|
"rewards/rejected": 2.1034679412841797, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02490784098834313, |
|
"grad_norm": 0.6821377873420715, |
|
"learning_rate": 1.9996598466232097e-05, |
|
"logits/chosen": -18.351791381835938, |
|
"logits/rejected": -18.350332260131836, |
|
"logps/chosen": -495.239501953125, |
|
"logps/rejected": -396.9656677246094, |
|
"loss": 0.9516, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 2.7651679515838623, |
|
"rewards/margins": 0.4416518807411194, |
|
"rewards/rejected": 2.3235161304473877, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.029889409186011757, |
|
"grad_norm": 0.07361862808465958, |
|
"learning_rate": 1.9995101913565075e-05, |
|
"logits/chosen": -18.08759117126465, |
|
"logits/rejected": -18.078266143798828, |
|
"logps/chosen": -500.9162902832031, |
|
"logps/rejected": -413.58673095703125, |
|
"loss": 0.8741, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 3.083859443664551, |
|
"rewards/margins": 0.49825409054756165, |
|
"rewards/rejected": 2.5856053829193115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.034870977383680384, |
|
"grad_norm": 50.79634475708008, |
|
"learning_rate": 1.9993333356680442e-05, |
|
"logits/chosen": -17.93349838256836, |
|
"logits/rejected": -17.859838485717773, |
|
"logps/chosen": -576.14501953125, |
|
"logps/rejected": -481.7210693359375, |
|
"loss": 0.9994, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 2.9873363971710205, |
|
"rewards/margins": 0.3300691843032837, |
|
"rewards/rejected": 2.6572670936584473, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03985254558134901, |
|
"grad_norm": 11.607032775878906, |
|
"learning_rate": 1.999129284370727e-05, |
|
"logits/chosen": -18.006515502929688, |
|
"logits/rejected": -17.87860107421875, |
|
"logps/chosen": -511.5252990722656, |
|
"logps/rejected": -448.138916015625, |
|
"loss": 1.0173, |
|
"rewards/accuracies": 0.5299999713897705, |
|
"rewards/chosen": 3.2953906059265137, |
|
"rewards/margins": 0.5231221318244934, |
|
"rewards/rejected": 2.772268772125244, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04483411377901764, |
|
"grad_norm": 9.66622257232666, |
|
"learning_rate": 1.9988980430175565e-05, |
|
"logits/chosen": -17.94629669189453, |
|
"logits/rejected": -17.793624877929688, |
|
"logps/chosen": -471.1706237792969, |
|
"logps/rejected": -380.40625, |
|
"loss": 0.7616, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 3.2259023189544678, |
|
"rewards/margins": 0.9019778966903687, |
|
"rewards/rejected": 2.3239243030548096, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.04981568197668626, |
|
"grad_norm": 33.115692138671875, |
|
"learning_rate": 1.998639617901478e-05, |
|
"logits/chosen": -18.29867935180664, |
|
"logits/rejected": -18.21681785583496, |
|
"logps/chosen": -492.39471435546875, |
|
"logps/rejected": -397.0972900390625, |
|
"loss": 0.8836, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 4.284320831298828, |
|
"rewards/margins": 1.2680495977401733, |
|
"rewards/rejected": 3.0162715911865234, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05479725017435489, |
|
"grad_norm": 14.794266700744629, |
|
"learning_rate": 1.998354016055208e-05, |
|
"logits/chosen": -17.866899490356445, |
|
"logits/rejected": -17.793582916259766, |
|
"logps/chosen": -512.371337890625, |
|
"logps/rejected": -424.1750793457031, |
|
"loss": 1.2634, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 3.433595895767212, |
|
"rewards/margins": 0.3185270428657532, |
|
"rewards/rejected": 3.1150686740875244, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.059778818372023514, |
|
"grad_norm": 177.02552795410156, |
|
"learning_rate": 1.998041245251044e-05, |
|
"logits/chosen": -18.298795700073242, |
|
"logits/rejected": -18.078033447265625, |
|
"logps/chosen": -464.45086669921875, |
|
"logps/rejected": -387.45428466796875, |
|
"loss": 1.0062, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 3.7040328979492188, |
|
"rewards/margins": 0.5431541800498962, |
|
"rewards/rejected": 3.1608786582946777, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.06476038656969214, |
|
"grad_norm": 19.352441787719727, |
|
"learning_rate": 1.997701314000653e-05, |
|
"logits/chosen": -18.20465660095215, |
|
"logits/rejected": -18.182998657226562, |
|
"logps/chosen": -489.5882873535156, |
|
"logps/rejected": -431.6067199707031, |
|
"loss": 0.8781, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 4.215704917907715, |
|
"rewards/margins": 0.7704020738601685, |
|
"rewards/rejected": 3.445302963256836, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.06974195476736077, |
|
"grad_norm": 22.920259475708008, |
|
"learning_rate": 1.9973342315548398e-05, |
|
"logits/chosen": -18.116256713867188, |
|
"logits/rejected": -18.149843215942383, |
|
"logps/chosen": -447.0269775390625, |
|
"logps/rejected": -374.61383056640625, |
|
"loss": 1.0611, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 3.63736629486084, |
|
"rewards/margins": 0.6730349063873291, |
|
"rewards/rejected": 2.96433162689209, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.07472352296502939, |
|
"grad_norm": 20.65612030029297, |
|
"learning_rate": 1.9969400079032947e-05, |
|
"logits/chosen": -18.347074508666992, |
|
"logits/rejected": -18.040178298950195, |
|
"logps/chosen": -453.944091796875, |
|
"logps/rejected": -380.4056396484375, |
|
"loss": 1.0204, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 4.042062282562256, |
|
"rewards/margins": 0.9009463787078857, |
|
"rewards/rejected": 3.1411163806915283, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07970509116269801, |
|
"grad_norm": 50.09711456298828, |
|
"learning_rate": 1.9965186537743215e-05, |
|
"logits/chosen": -18.355621337890625, |
|
"logits/rejected": -18.051054000854492, |
|
"logps/chosen": -502.7710876464844, |
|
"logps/rejected": -419.5497741699219, |
|
"loss": 1.1749, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 3.215543508529663, |
|
"rewards/margins": 0.48448604345321655, |
|
"rewards/rejected": 2.731057643890381, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.08468665936036664, |
|
"grad_norm": 0.4332411289215088, |
|
"learning_rate": 1.9960701806345472e-05, |
|
"logits/chosen": -18.210161209106445, |
|
"logits/rejected": -18.06623077392578, |
|
"logps/chosen": -449.0904235839844, |
|
"logps/rejected": -365.96173095703125, |
|
"loss": 0.7165, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 4.172327995300293, |
|
"rewards/margins": 1.4230843782424927, |
|
"rewards/rejected": 2.749243974685669, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.08966822755803527, |
|
"grad_norm": 5.466333389282227, |
|
"learning_rate": 1.9955946006886082e-05, |
|
"logits/chosen": -18.5748348236084, |
|
"logits/rejected": -18.16517448425293, |
|
"logps/chosen": -438.857421875, |
|
"logps/rejected": -416.3319396972656, |
|
"loss": 0.8766, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 4.177321910858154, |
|
"rewards/margins": 0.9888943433761597, |
|
"rewards/rejected": 3.188427686691284, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0946497957557039, |
|
"grad_norm": 82.25857543945312, |
|
"learning_rate": 1.995091926878819e-05, |
|
"logits/chosen": -18.491161346435547, |
|
"logits/rejected": -18.283767700195312, |
|
"logps/chosen": -461.55023193359375, |
|
"logps/rejected": -394.8086242675781, |
|
"loss": 1.0777, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 3.3734805583953857, |
|
"rewards/margins": 0.7351935505867004, |
|
"rewards/rejected": 2.638287305831909, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.09963136395337252, |
|
"grad_norm": 9.021427154541016, |
|
"learning_rate": 1.9945621728848194e-05, |
|
"logits/chosen": -18.71115493774414, |
|
"logits/rejected": -18.314943313598633, |
|
"logps/chosen": -466.3949890136719, |
|
"logps/rejected": -393.8585205078125, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": 3.843343496322632, |
|
"rewards/margins": 1.1383906602859497, |
|
"rewards/rejected": 2.7049529552459717, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.10461293215104114, |
|
"grad_norm": 146.21475219726562, |
|
"learning_rate": 1.9940053531232028e-05, |
|
"logits/chosen": -18.590173721313477, |
|
"logits/rejected": -18.441553115844727, |
|
"logps/chosen": -454.3846435546875, |
|
"logps/rejected": -387.5030212402344, |
|
"loss": 1.1674, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 4.06519079208374, |
|
"rewards/margins": 0.5631230473518372, |
|
"rewards/rejected": 3.5020673274993896, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.10959450034870978, |
|
"grad_norm": 4.808138847351074, |
|
"learning_rate": 1.9934214827471244e-05, |
|
"logits/chosen": -18.621475219726562, |
|
"logits/rejected": -18.35665512084961, |
|
"logps/chosen": -450.15582275390625, |
|
"logps/rejected": -386.0933532714844, |
|
"loss": 1.0704, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 3.885467290878296, |
|
"rewards/margins": 0.752030611038208, |
|
"rewards/rejected": 3.1334362030029297, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.1145760685463784, |
|
"grad_norm": 5.842870235443115, |
|
"learning_rate": 1.9928105776458864e-05, |
|
"logits/chosen": -18.336530685424805, |
|
"logits/rejected": -18.11532211303711, |
|
"logps/chosen": -466.25738525390625, |
|
"logps/rejected": -393.3591003417969, |
|
"loss": 1.1451, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 3.678475856781006, |
|
"rewards/margins": 0.4539036452770233, |
|
"rewards/rejected": 3.22457218170166, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.11955763674404703, |
|
"grad_norm": 37.61786651611328, |
|
"learning_rate": 1.9921726544445084e-05, |
|
"logits/chosen": -18.296964645385742, |
|
"logits/rejected": -18.364625930786133, |
|
"logps/chosen": -467.84027099609375, |
|
"logps/rejected": -397.1224670410156, |
|
"loss": 0.8784, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": 3.5043909549713135, |
|
"rewards/margins": 1.0125629901885986, |
|
"rewards/rejected": 2.4918274879455566, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.12453920494171565, |
|
"grad_norm": 49.04602813720703, |
|
"learning_rate": 1.9915077305032748e-05, |
|
"logits/chosen": -18.40894317626953, |
|
"logits/rejected": -18.2955322265625, |
|
"logps/chosen": -503.1138610839844, |
|
"logps/rejected": -375.34442138671875, |
|
"loss": 0.9985, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 4.6265459060668945, |
|
"rewards/margins": 1.7104928493499756, |
|
"rewards/rejected": 2.91605281829834, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1295207731393843, |
|
"grad_norm": 16.836668014526367, |
|
"learning_rate": 1.9908158239172596e-05, |
|
"logits/chosen": -18.674049377441406, |
|
"logits/rejected": -18.514965057373047, |
|
"logps/chosen": -455.519775390625, |
|
"logps/rejected": -374.22210693359375, |
|
"loss": 0.9035, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 4.500489711761475, |
|
"rewards/margins": 1.446923017501831, |
|
"rewards/rejected": 3.0535662174224854, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.1345023413370529, |
|
"grad_norm": 0.04983401298522949, |
|
"learning_rate": 1.990096953515836e-05, |
|
"logits/chosen": -18.647964477539062, |
|
"logits/rejected": -18.637880325317383, |
|
"logps/chosen": -465.9200439453125, |
|
"logps/rejected": -411.40838623046875, |
|
"loss": 1.2207, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 3.8630013465881348, |
|
"rewards/margins": 0.7057845592498779, |
|
"rewards/rejected": 3.157216787338257, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.13948390953472153, |
|
"grad_norm": 0.5644310116767883, |
|
"learning_rate": 1.9893511388621652e-05, |
|
"logits/chosen": -18.66870880126953, |
|
"logits/rejected": -18.76462745666504, |
|
"logps/chosen": -513.6793823242188, |
|
"logps/rejected": -469.8594055175781, |
|
"loss": 1.5471, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 3.391056537628174, |
|
"rewards/margins": 0.3123472332954407, |
|
"rewards/rejected": 3.078709363937378, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.14446547773239016, |
|
"grad_norm": 1.6863278150558472, |
|
"learning_rate": 1.9885784002526616e-05, |
|
"logits/chosen": -18.729633331298828, |
|
"logits/rejected": -19.068260192871094, |
|
"logps/chosen": -447.2772521972656, |
|
"logps/rejected": -352.67791748046875, |
|
"loss": 1.0868, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 3.924001693725586, |
|
"rewards/margins": 0.8989758491516113, |
|
"rewards/rejected": 3.0250258445739746, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.14944704593005878, |
|
"grad_norm": 0.10964024066925049, |
|
"learning_rate": 1.987778758716441e-05, |
|
"logits/chosen": -19.014976501464844, |
|
"logits/rejected": -19.501911163330078, |
|
"logps/chosen": -475.1939392089844, |
|
"logps/rejected": -385.38238525390625, |
|
"loss": 1.0756, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 4.359086036682129, |
|
"rewards/margins": 1.1548995971679688, |
|
"rewards/rejected": 3.2041866779327393, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1544286141277274, |
|
"grad_norm": 7.479519367218018, |
|
"learning_rate": 1.98695223601475e-05, |
|
"logits/chosen": -18.8636531829834, |
|
"logits/rejected": -19.16086196899414, |
|
"logps/chosen": -484.1092529296875, |
|
"logps/rejected": -399.1905212402344, |
|
"loss": 0.8348, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": 4.458531379699707, |
|
"rewards/margins": 1.7625274658203125, |
|
"rewards/rejected": 2.6960039138793945, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.15941018232539603, |
|
"grad_norm": 15.998089790344238, |
|
"learning_rate": 1.986098854640371e-05, |
|
"logits/chosen": -18.937522888183594, |
|
"logits/rejected": -19.118017196655273, |
|
"logps/chosen": -463.34149169921875, |
|
"logps/rejected": -415.29827880859375, |
|
"loss": 1.298, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 4.227731227874756, |
|
"rewards/margins": 0.5569795370101929, |
|
"rewards/rejected": 3.6707510948181152, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.16439175052306465, |
|
"grad_norm": 0.7655884027481079, |
|
"learning_rate": 1.9852186378170136e-05, |
|
"logits/chosen": -18.893104553222656, |
|
"logits/rejected": -19.257871627807617, |
|
"logps/chosen": -531.3560791015625, |
|
"logps/rejected": -465.7754821777344, |
|
"loss": 1.1944, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 4.285092353820801, |
|
"rewards/margins": 1.1729285717010498, |
|
"rewards/rejected": 3.1121633052825928, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.16937331872073327, |
|
"grad_norm": 10.586421012878418, |
|
"learning_rate": 1.9843116094986783e-05, |
|
"logits/chosen": -18.89116859436035, |
|
"logits/rejected": -19.32411003112793, |
|
"logps/chosen": -466.8319091796875, |
|
"logps/rejected": -388.9800109863281, |
|
"loss": 0.8162, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 4.402754306793213, |
|
"rewards/margins": 1.1992639303207397, |
|
"rewards/rejected": 3.203490972518921, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.17435488691840192, |
|
"grad_norm": 0.0021288192365318537, |
|
"learning_rate": 1.983377794369009e-05, |
|
"logits/chosen": -18.90306854248047, |
|
"logits/rejected": -19.6688289642334, |
|
"logps/chosen": -506.9422912597656, |
|
"logps/rejected": -422.39703369140625, |
|
"loss": 0.9919, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 4.193739414215088, |
|
"rewards/margins": 1.1028869152069092, |
|
"rewards/rejected": 3.0908522605895996, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.17933645511607055, |
|
"grad_norm": 72.208251953125, |
|
"learning_rate": 1.982417217840618e-05, |
|
"logits/chosen": -19.198213577270508, |
|
"logits/rejected": -20.100387573242188, |
|
"logps/chosen": -498.4687805175781, |
|
"logps/rejected": -383.0224914550781, |
|
"loss": 0.9324, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 4.158178806304932, |
|
"rewards/margins": 1.5135530233383179, |
|
"rewards/rejected": 2.6446259021759033, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.18431802331373917, |
|
"grad_norm": 54.43558120727539, |
|
"learning_rate": 1.9814299060543965e-05, |
|
"logits/chosen": -19.100000381469727, |
|
"logits/rejected": -20.164613723754883, |
|
"logps/chosen": -523.7534790039062, |
|
"logps/rejected": -420.5929260253906, |
|
"loss": 1.0645, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 4.555855751037598, |
|
"rewards/margins": 1.3967076539993286, |
|
"rewards/rejected": 3.1591484546661377, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.1892995915114078, |
|
"grad_norm": 44.80778503417969, |
|
"learning_rate": 1.980415885878801e-05, |
|
"logits/chosen": -19.23442840576172, |
|
"logits/rejected": -20.248851776123047, |
|
"logps/chosen": -470.9892578125, |
|
"logps/rejected": -387.8487548828125, |
|
"loss": 1.1345, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 4.9033355712890625, |
|
"rewards/margins": 1.5088270902633667, |
|
"rewards/rejected": 3.3945086002349854, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.19428115970907642, |
|
"grad_norm": 0.029925603419542313, |
|
"learning_rate": 1.979375184909125e-05, |
|
"logits/chosen": -19.161788940429688, |
|
"logits/rejected": -20.242706298828125, |
|
"logps/chosen": -451.88165283203125, |
|
"logps/rejected": -377.80078125, |
|
"loss": 1.0498, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 3.468371868133545, |
|
"rewards/margins": 1.5844755172729492, |
|
"rewards/rejected": 1.8838963508605957, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.19926272790674504, |
|
"grad_norm": 2.616316795349121, |
|
"learning_rate": 1.9783078314667465e-05, |
|
"logits/chosen": -19.053321838378906, |
|
"logits/rejected": -20.00080108642578, |
|
"logps/chosen": -502.9007568359375, |
|
"logps/rejected": -397.90020751953125, |
|
"loss": 0.9393, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 4.124849796295166, |
|
"rewards/margins": 1.3231381177902222, |
|
"rewards/rejected": 2.801711320877075, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.20424429610441366, |
|
"grad_norm": 44.81772232055664, |
|
"learning_rate": 1.9772138545983554e-05, |
|
"logits/chosen": -18.997940063476562, |
|
"logits/rejected": -19.999465942382812, |
|
"logps/chosen": -494.4920349121094, |
|
"logps/rejected": -401.1126708984375, |
|
"loss": 0.9059, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 3.8888561725616455, |
|
"rewards/margins": 1.703158974647522, |
|
"rewards/rejected": 2.185697317123413, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.2092258643020823, |
|
"grad_norm": 19.425662994384766, |
|
"learning_rate": 1.9760932840751663e-05, |
|
"logits/chosen": -18.9016056060791, |
|
"logits/rejected": -19.416828155517578, |
|
"logps/chosen": -483.6650390625, |
|
"logps/rejected": -388.0088195800781, |
|
"loss": 0.8963, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 5.2860283851623535, |
|
"rewards/margins": 1.5827534198760986, |
|
"rewards/rejected": 3.703275442123413, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.2142074324997509, |
|
"grad_norm": 124.74931335449219, |
|
"learning_rate": 1.9749461503921074e-05, |
|
"logits/chosen": -18.898042678833008, |
|
"logits/rejected": -19.673877716064453, |
|
"logps/chosen": -497.69476318359375, |
|
"logps/rejected": -364.8540954589844, |
|
"loss": 1.0963, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 4.136105537414551, |
|
"rewards/margins": 1.3833444118499756, |
|
"rewards/rejected": 2.752761125564575, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.21918900069741956, |
|
"grad_norm": 5.577426433563232, |
|
"learning_rate": 1.973772484766989e-05, |
|
"logits/chosen": -18.805566787719727, |
|
"logits/rejected": -19.62226104736328, |
|
"logps/chosen": -463.9582824707031, |
|
"logps/rejected": -351.70867919921875, |
|
"loss": 1.0113, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 4.1188740730285645, |
|
"rewards/margins": 1.5241186618804932, |
|
"rewards/rejected": 2.5947556495666504, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.22417056889508818, |
|
"grad_norm": 0.4124658405780792, |
|
"learning_rate": 1.9725723191396557e-05, |
|
"logits/chosen": -18.83307647705078, |
|
"logits/rejected": -19.278696060180664, |
|
"logps/chosen": -466.99859619140625, |
|
"logps/rejected": -398.4786682128906, |
|
"loss": 1.622, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 4.280797958374023, |
|
"rewards/margins": 0.31094062328338623, |
|
"rewards/rejected": 3.9698569774627686, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2291521370927568, |
|
"grad_norm": 46.63705825805664, |
|
"learning_rate": 1.971345686171116e-05, |
|
"logits/chosen": -18.672901153564453, |
|
"logits/rejected": -19.950056076049805, |
|
"logps/chosen": -500.7174072265625, |
|
"logps/rejected": -417.15191650390625, |
|
"loss": 0.9252, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 4.5458478927612305, |
|
"rewards/margins": 1.576623558998108, |
|
"rewards/rejected": 2.969224452972412, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.23413370529042543, |
|
"grad_norm": 18.882240295410156, |
|
"learning_rate": 1.9700926192426554e-05, |
|
"logits/chosen": -19.082120895385742, |
|
"logits/rejected": -20.37308120727539, |
|
"logps/chosen": -429.0272521972656, |
|
"logps/rejected": -354.2383728027344, |
|
"loss": 1.1165, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 3.655416965484619, |
|
"rewards/margins": 1.2390317916870117, |
|
"rewards/rejected": 2.4163851737976074, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.23911527348809405, |
|
"grad_norm": 336.6611633300781, |
|
"learning_rate": 1.9688131524549242e-05, |
|
"logits/chosen": -19.020198822021484, |
|
"logits/rejected": -19.45013999938965, |
|
"logps/chosen": -459.5777587890625, |
|
"logps/rejected": -409.13922119140625, |
|
"loss": 1.2777, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 2.606769323348999, |
|
"rewards/margins": 0.6296383142471313, |
|
"rewards/rejected": 1.9771310091018677, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.24409684168576268, |
|
"grad_norm": 72.95674133300781, |
|
"learning_rate": 1.9675073206270148e-05, |
|
"logits/chosen": -18.523130416870117, |
|
"logits/rejected": -19.362272262573242, |
|
"logps/chosen": -499.9466552734375, |
|
"logps/rejected": -384.0169982910156, |
|
"loss": 0.9484, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 4.083195209503174, |
|
"rewards/margins": 1.7932839393615723, |
|
"rewards/rejected": 2.2899110317230225, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.2490784098834313, |
|
"grad_norm": 7.091724395751953, |
|
"learning_rate": 1.9661751592955086e-05, |
|
"logits/chosen": -18.576244354248047, |
|
"logits/rejected": -19.872777938842773, |
|
"logps/chosen": -543.4857177734375, |
|
"logps/rejected": -440.7587890625, |
|
"loss": 0.9586, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 4.305512428283691, |
|
"rewards/margins": 1.4709885120391846, |
|
"rewards/rejected": 2.8345236778259277, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2490784098834313, |
|
"eval_logits/chosen": -21.543413162231445, |
|
"eval_logits/rejected": -22.481142044067383, |
|
"eval_logps/chosen": -475.4376525878906, |
|
"eval_logps/rejected": -402.1141052246094, |
|
"eval_loss": 1.169881820678711, |
|
"eval_rewards/accuracies": 0.6557591557502747, |
|
"eval_rewards/chosen": 4.376668930053711, |
|
"eval_rewards/margins": 1.5471277236938477, |
|
"eval_rewards/rejected": 2.829540729522705, |
|
"eval_runtime": 473.1936, |
|
"eval_samples_per_second": 3.216, |
|
"eval_steps_per_second": 0.404, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2540599780810999, |
|
"grad_norm": 82.43436431884766, |
|
"learning_rate": 1.9648167047135133e-05, |
|
"logits/chosen": -19.058635711669922, |
|
"logits/rejected": -20.134428024291992, |
|
"logps/chosen": -495.65887451171875, |
|
"logps/rejected": -419.3161315917969, |
|
"loss": 1.1698, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 3.87038516998291, |
|
"rewards/margins": 1.3453155755996704, |
|
"rewards/rejected": 2.52506947517395, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.2590415462787686, |
|
"grad_norm": 3.7819454669952393, |
|
"learning_rate": 1.9634319938496742e-05, |
|
"logits/chosen": -19.017601013183594, |
|
"logits/rejected": -20.623193740844727, |
|
"logps/chosen": -478.9990539550781, |
|
"logps/rejected": -395.05450439453125, |
|
"loss": 1.1279, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 3.3154103755950928, |
|
"rewards/margins": 1.5955133438110352, |
|
"rewards/rejected": 1.7198967933654785, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.26402311447643717, |
|
"grad_norm": 1.351638674736023, |
|
"learning_rate": 1.962021064387168e-05, |
|
"logits/chosen": -18.885652542114258, |
|
"logits/rejected": -19.914079666137695, |
|
"logps/chosen": -510.6768493652344, |
|
"logps/rejected": -441.775634765625, |
|
"loss": 1.1382, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 4.05587911605835, |
|
"rewards/margins": 1.2654640674591064, |
|
"rewards/rejected": 2.790414810180664, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.2690046826741058, |
|
"grad_norm": 32.41090393066406, |
|
"learning_rate": 1.9605839547226785e-05, |
|
"logits/chosen": -19.33073616027832, |
|
"logits/rejected": -20.477949142456055, |
|
"logps/chosen": -492.5516052246094, |
|
"logps/rejected": -420.31036376953125, |
|
"loss": 1.0482, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 3.8395843505859375, |
|
"rewards/margins": 1.6333999633789062, |
|
"rewards/rejected": 2.206184148788452, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.2739862508717744, |
|
"grad_norm": 32.58820724487305, |
|
"learning_rate": 1.9591207039653507e-05, |
|
"logits/chosen": -19.26167106628418, |
|
"logits/rejected": -20.89728546142578, |
|
"logps/chosen": -438.7798156738281, |
|
"logps/rejected": -363.2562255859375, |
|
"loss": 1.243, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 3.1530792713165283, |
|
"rewards/margins": 1.1595901250839233, |
|
"rewards/rejected": 1.9934889078140259, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.27896781906944307, |
|
"grad_norm": 93.86406707763672, |
|
"learning_rate": 1.9576313519357265e-05, |
|
"logits/chosen": -19.064878463745117, |
|
"logits/rejected": -20.905057907104492, |
|
"logps/chosen": -519.572509765625, |
|
"logps/rejected": -442.0079040527344, |
|
"loss": 1.2293, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 3.4798271656036377, |
|
"rewards/margins": 1.09993577003479, |
|
"rewards/rejected": 2.3798913955688477, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.28394938726711166, |
|
"grad_norm": 26.577350616455078, |
|
"learning_rate": 1.9561159391646618e-05, |
|
"logits/chosen": -19.34862518310547, |
|
"logits/rejected": -22.218530654907227, |
|
"logps/chosen": -500.4524230957031, |
|
"logps/rejected": -402.18389892578125, |
|
"loss": 0.9679, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 3.561279535293579, |
|
"rewards/margins": 1.6947131156921387, |
|
"rewards/rejected": 1.8665661811828613, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.2889309554647803, |
|
"grad_norm": 20.811817169189453, |
|
"learning_rate": 1.9545745068922225e-05, |
|
"logits/chosen": -19.238685607910156, |
|
"logits/rejected": -20.936817169189453, |
|
"logps/chosen": -499.0253601074219, |
|
"logps/rejected": -417.6446533203125, |
|
"loss": 1.1599, |
|
"rewards/accuracies": 0.5299999713897705, |
|
"rewards/chosen": 4.803900241851807, |
|
"rewards/margins": 1.0667366981506348, |
|
"rewards/rejected": 3.7371630668640137, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.29391252366244897, |
|
"grad_norm": 23.467056274414062, |
|
"learning_rate": 1.9530070970665638e-05, |
|
"logits/chosen": -19.428844451904297, |
|
"logits/rejected": -21.77304458618164, |
|
"logps/chosen": -498.2739562988281, |
|
"logps/rejected": -398.3455810546875, |
|
"loss": 0.9347, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 4.099443435668945, |
|
"rewards/margins": 1.4808638095855713, |
|
"rewards/rejected": 2.6185789108276367, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.29889409186011756, |
|
"grad_norm": 44.77325439453125, |
|
"learning_rate": 1.951413752342786e-05, |
|
"logits/chosen": -19.230180740356445, |
|
"logits/rejected": -21.40241050720215, |
|
"logps/chosen": -522.796630859375, |
|
"logps/rejected": -409.90936279296875, |
|
"loss": 1.0653, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 3.7546486854553223, |
|
"rewards/margins": 2.06999135017395, |
|
"rewards/rejected": 1.6846575736999512, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3038756600577862, |
|
"grad_norm": 0.1034678965806961, |
|
"learning_rate": 1.949794516081777e-05, |
|
"logits/chosen": -19.376697540283203, |
|
"logits/rejected": -21.691804885864258, |
|
"logps/chosen": -482.21807861328125, |
|
"logps/rejected": -379.0809326171875, |
|
"loss": 0.954, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 2.9128754138946533, |
|
"rewards/margins": 1.5702927112579346, |
|
"rewards/rejected": 1.3425830602645874, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.3088572282554548, |
|
"grad_norm": 8.153312683105469, |
|
"learning_rate": 1.9481494323490292e-05, |
|
"logits/chosen": -20.608989715576172, |
|
"logits/rejected": -25.289657592773438, |
|
"logps/chosen": -456.2762756347656, |
|
"logps/rejected": -363.2749938964844, |
|
"loss": 1.0848, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.7271530628204346, |
|
"rewards/margins": 1.8869810104370117, |
|
"rewards/rejected": 0.8401720523834229, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.31383879645312346, |
|
"grad_norm": 173.4281463623047, |
|
"learning_rate": 1.9464785459134422e-05, |
|
"logits/chosen": -20.402162551879883, |
|
"logits/rejected": -26.399858474731445, |
|
"logps/chosen": -496.73199462890625, |
|
"logps/rejected": -371.1173400878906, |
|
"loss": 1.0173, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 3.2006542682647705, |
|
"rewards/margins": 2.35373592376709, |
|
"rewards/rejected": 0.8469181060791016, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.31882036465079205, |
|
"grad_norm": 71.35045623779297, |
|
"learning_rate": 1.9447819022461036e-05, |
|
"logits/chosen": -20.591211318969727, |
|
"logits/rejected": -21.949787139892578, |
|
"logps/chosen": -505.4292297363281, |
|
"logps/rejected": -457.5049133300781, |
|
"loss": 1.334, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 3.6680984497070312, |
|
"rewards/margins": 1.0007195472717285, |
|
"rewards/rejected": 2.6673781871795654, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.3238019328484607, |
|
"grad_norm": 71.10392761230469, |
|
"learning_rate": 1.9430595475190528e-05, |
|
"logits/chosen": -20.5976505279541, |
|
"logits/rejected": -22.98895263671875, |
|
"logps/chosen": -485.20758056640625, |
|
"logps/rejected": -426.62078857421875, |
|
"loss": 1.1786, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 3.1840951442718506, |
|
"rewards/margins": 1.0180020332336426, |
|
"rewards/rejected": 2.166092872619629, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3287835010461293, |
|
"grad_norm": 20.788406372070312, |
|
"learning_rate": 1.9413115286040228e-05, |
|
"logits/chosen": -20.659372329711914, |
|
"logits/rejected": -24.63149642944336, |
|
"logps/chosen": -496.0128173828125, |
|
"logps/rejected": -426.76092529296875, |
|
"loss": 1.2417, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.6715264320373535, |
|
"rewards/margins": 1.3509292602539062, |
|
"rewards/rejected": 1.3205969333648682, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.33376506924379795, |
|
"grad_norm": 47.43177032470703, |
|
"learning_rate": 1.9395378930711654e-05, |
|
"logits/chosen": -21.26150894165039, |
|
"logits/rejected": -27.082225799560547, |
|
"logps/chosen": -484.2604675292969, |
|
"logps/rejected": -407.31512451171875, |
|
"loss": 1.0294, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": 2.974027633666992, |
|
"rewards/margins": 2.4509451389312744, |
|
"rewards/rejected": 0.5230829119682312, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.33874663744146655, |
|
"grad_norm": 29.3293514251709, |
|
"learning_rate": 1.9377386891877572e-05, |
|
"logits/chosen": -20.902864456176758, |
|
"logits/rejected": -24.1262264251709, |
|
"logps/chosen": -505.2213134765625, |
|
"logps/rejected": -438.3580017089844, |
|
"loss": 1.5894, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 1.993202567100525, |
|
"rewards/margins": 0.9017642736434937, |
|
"rewards/rejected": 1.0914379358291626, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.3437282056391352, |
|
"grad_norm": 85.70783996582031, |
|
"learning_rate": 1.9359139659168845e-05, |
|
"logits/chosen": -19.933032989501953, |
|
"logits/rejected": -23.047008514404297, |
|
"logps/chosen": -496.6371154785156, |
|
"logps/rejected": -435.8622131347656, |
|
"loss": 1.0725, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 1.8428994417190552, |
|
"rewards/margins": 1.234021782875061, |
|
"rewards/rejected": 0.6088778376579285, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.34870977383680385, |
|
"grad_norm": 44.99619674682617, |
|
"learning_rate": 1.9340637729161137e-05, |
|
"logits/chosen": -20.23802375793457, |
|
"logits/rejected": -22.14116668701172, |
|
"logps/chosen": -505.2397155761719, |
|
"logps/rejected": -458.6205139160156, |
|
"loss": 1.4139, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 1.7950440645217896, |
|
"rewards/margins": 1.0467220544815063, |
|
"rewards/rejected": 0.7483220100402832, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35369134203447244, |
|
"grad_norm": 2.535125886993228e-08, |
|
"learning_rate": 1.9321881605361363e-05, |
|
"logits/chosen": -19.776222229003906, |
|
"logits/rejected": -22.802228927612305, |
|
"logps/chosen": -552.8232421875, |
|
"logps/rejected": -477.6277770996094, |
|
"loss": 1.0137, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": 3.883512020111084, |
|
"rewards/margins": 2.6781344413757324, |
|
"rewards/rejected": 1.2053773403167725, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.3586729102321411, |
|
"grad_norm": 104.68194580078125, |
|
"learning_rate": 1.9302871798194005e-05, |
|
"logits/chosen": -21.631492614746094, |
|
"logits/rejected": -25.338726043701172, |
|
"logps/chosen": -458.9781494140625, |
|
"logps/rejected": -421.4054260253906, |
|
"loss": 1.5316, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.5002456903457642, |
|
"rewards/margins": 1.058099389076233, |
|
"rewards/rejected": 0.4421464204788208, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.3636544784298097, |
|
"grad_norm": 15.395011901855469, |
|
"learning_rate": 1.9283608824987236e-05, |
|
"logits/chosen": -21.326448440551758, |
|
"logits/rejected": -27.821928024291992, |
|
"logps/chosen": -519.0068969726562, |
|
"logps/rejected": -431.6269226074219, |
|
"loss": 1.4097, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 2.3849658966064453, |
|
"rewards/margins": 2.2866411209106445, |
|
"rewards/rejected": 0.09832416474819183, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.36863604662747834, |
|
"grad_norm": 5.503931999206543, |
|
"learning_rate": 1.9264093209958822e-05, |
|
"logits/chosen": -22.2663631439209, |
|
"logits/rejected": -30.09918212890625, |
|
"logps/chosen": -519.89013671875, |
|
"logps/rejected": -434.9560241699219, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": 1.8443881273269653, |
|
"rewards/margins": 3.3645260334014893, |
|
"rewards/rejected": -1.5201376676559448, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.37361761482514694, |
|
"grad_norm": 0.397699773311615, |
|
"learning_rate": 1.9244325484201844e-05, |
|
"logits/chosen": -21.55438995361328, |
|
"logits/rejected": -26.417490005493164, |
|
"logps/chosen": -578.6663208007812, |
|
"logps/rejected": -494.0219421386719, |
|
"loss": 1.32, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.8062126636505127, |
|
"rewards/margins": 1.9167245626449585, |
|
"rewards/rejected": 0.8894882798194885, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3785991830228156, |
|
"grad_norm": 1.4006325006484985, |
|
"learning_rate": 1.9224306185670284e-05, |
|
"logits/chosen": -21.26766586303711, |
|
"logits/rejected": -23.203746795654297, |
|
"logps/chosen": -524.5022583007812, |
|
"logps/rejected": -450.9858093261719, |
|
"loss": 1.2222, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 2.8659799098968506, |
|
"rewards/margins": 1.5051510334014893, |
|
"rewards/rejected": 1.3608287572860718, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.3835807512204842, |
|
"grad_norm": 77.18798828125, |
|
"learning_rate": 1.9204035859164346e-05, |
|
"logits/chosen": -20.718629837036133, |
|
"logits/rejected": -24.271589279174805, |
|
"logps/chosen": -482.0295715332031, |
|
"logps/rejected": -402.9772644042969, |
|
"loss": 1.4243, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 2.1575722694396973, |
|
"rewards/margins": 1.1561360359191895, |
|
"rewards/rejected": 1.0014359951019287, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.38856231941815284, |
|
"grad_norm": 7.084451675415039, |
|
"learning_rate": 1.9183515056315664e-05, |
|
"logits/chosen": -20.11510467529297, |
|
"logits/rejected": -22.786483764648438, |
|
"logps/chosen": -521.9429931640625, |
|
"logps/rejected": -447.221923828125, |
|
"loss": 1.2535, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 3.086232900619507, |
|
"rewards/margins": 1.8227348327636719, |
|
"rewards/rejected": 1.2634981870651245, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.3935438876158215, |
|
"grad_norm": 26.435029983520508, |
|
"learning_rate": 1.9162744335572254e-05, |
|
"logits/chosen": -20.078449249267578, |
|
"logits/rejected": -21.64859390258789, |
|
"logps/chosen": -495.9106750488281, |
|
"logps/rejected": -439.6401672363281, |
|
"loss": 1.3286, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 2.769630193710327, |
|
"rewards/margins": 0.8031193017959595, |
|
"rewards/rejected": 1.9665107727050781, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.3985254558134901, |
|
"grad_norm": 55.87978744506836, |
|
"learning_rate": 1.9141724262183347e-05, |
|
"logits/chosen": -19.4700927734375, |
|
"logits/rejected": -24.67208480834961, |
|
"logps/chosen": -476.494873046875, |
|
"logps/rejected": -370.9423828125, |
|
"loss": 1.1024, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 3.3464269638061523, |
|
"rewards/margins": 1.9073596000671387, |
|
"rewards/rejected": 1.4390674829483032, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.40350702401115873, |
|
"grad_norm": 72.6644515991211, |
|
"learning_rate": 1.9120455408183996e-05, |
|
"logits/chosen": -19.84633445739746, |
|
"logits/rejected": -23.482101440429688, |
|
"logps/chosen": -469.52984619140625, |
|
"logps/rejected": -371.6656494140625, |
|
"loss": 1.0347, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 3.2709081172943115, |
|
"rewards/margins": 1.9965094327926636, |
|
"rewards/rejected": 1.2743984460830688, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.40848859220882733, |
|
"grad_norm": 7.036466121673584, |
|
"learning_rate": 1.9098938352379497e-05, |
|
"logits/chosen": -19.558134078979492, |
|
"logits/rejected": -22.82000732421875, |
|
"logps/chosen": -516.8818359375, |
|
"logps/rejected": -426.9209289550781, |
|
"loss": 1.3704, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.8734841346740723, |
|
"rewards/margins": 1.5538628101348877, |
|
"rewards/rejected": 1.3196213245391846, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.413470160406496, |
|
"grad_norm": 19.03754997253418, |
|
"learning_rate": 1.9077173680329667e-05, |
|
"logits/chosen": -19.861852645874023, |
|
"logits/rejected": -20.495954513549805, |
|
"logps/chosen": -434.7484436035156, |
|
"logps/rejected": -418.93182373046875, |
|
"loss": 1.4406, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": 3.662661552429199, |
|
"rewards/margins": 1.0773922204971313, |
|
"rewards/rejected": 2.5852692127227783, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.4184517286041646, |
|
"grad_norm": 0.007115426007658243, |
|
"learning_rate": 1.9055161984332865e-05, |
|
"logits/chosen": -19.320505142211914, |
|
"logits/rejected": -20.781309127807617, |
|
"logps/chosen": -505.4100036621094, |
|
"logps/rejected": -399.3970031738281, |
|
"loss": 1.2344, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 2.867283582687378, |
|
"rewards/margins": 1.5176202058792114, |
|
"rewards/rejected": 1.3496633768081665, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.4234332968018332, |
|
"grad_norm": 27.222930908203125, |
|
"learning_rate": 1.9032903863409916e-05, |
|
"logits/chosen": -19.227426528930664, |
|
"logits/rejected": -20.481821060180664, |
|
"logps/chosen": -504.85076904296875, |
|
"logps/rejected": -399.4018249511719, |
|
"loss": 1.1368, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.5695528984069824, |
|
"rewards/margins": 1.6272262334823608, |
|
"rewards/rejected": 0.9423269033432007, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.4284148649995018, |
|
"grad_norm": 118.92176055908203, |
|
"learning_rate": 1.901039992328779e-05, |
|
"logits/chosen": -19.390897750854492, |
|
"logits/rejected": -20.656518936157227, |
|
"logps/chosen": -502.2049865722656, |
|
"logps/rejected": -406.15740966796875, |
|
"loss": 1.3008, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 2.6097073554992676, |
|
"rewards/margins": 1.4985759258270264, |
|
"rewards/rejected": 1.1111317873001099, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.43339643319717047, |
|
"grad_norm": 135.84136962890625, |
|
"learning_rate": 1.8987650776383116e-05, |
|
"logits/chosen": -19.83563232421875, |
|
"logits/rejected": -20.294017791748047, |
|
"logps/chosen": -496.2801513671875, |
|
"logps/rejected": -452.8775634765625, |
|
"loss": 1.689, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 2.978971481323242, |
|
"rewards/margins": 0.9833757281303406, |
|
"rewards/rejected": 1.9955962896347046, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.4383780013948391, |
|
"grad_norm": 0.03969337046146393, |
|
"learning_rate": 1.896465704178551e-05, |
|
"logits/chosen": -19.218610763549805, |
|
"logits/rejected": -20.0975284576416, |
|
"logps/chosen": -479.8434753417969, |
|
"logps/rejected": -410.504150390625, |
|
"loss": 1.2651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.871466875076294, |
|
"rewards/margins": 1.330002784729004, |
|
"rewards/rejected": 1.5414642095565796, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.4433595695925077, |
|
"grad_norm": 2.9111685752868652, |
|
"learning_rate": 1.8941419345240763e-05, |
|
"logits/chosen": -19.58942222595215, |
|
"logits/rejected": -20.811443328857422, |
|
"logps/chosen": -435.19122314453125, |
|
"logps/rejected": -364.0976867675781, |
|
"loss": 1.164, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 3.163036823272705, |
|
"rewards/margins": 1.286778450012207, |
|
"rewards/rejected": 1.876258373260498, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.44834113779017637, |
|
"grad_norm": 92.33987426757812, |
|
"learning_rate": 1.891793831913376e-05, |
|
"logits/chosen": -19.12569808959961, |
|
"logits/rejected": -20.312471389770508, |
|
"logps/chosen": -539.9608764648438, |
|
"logps/rejected": -456.8437805175781, |
|
"loss": 1.0481, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 2.520881175994873, |
|
"rewards/margins": 1.5386346578598022, |
|
"rewards/rejected": 0.982246458530426, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.45332270598784496, |
|
"grad_norm": 24.77559471130371, |
|
"learning_rate": 1.8894214602471307e-05, |
|
"logits/chosen": -19.473718643188477, |
|
"logits/rejected": -21.318897247314453, |
|
"logps/chosen": -499.727783203125, |
|
"logps/rejected": -432.5865478515625, |
|
"loss": 1.3744, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 2.8033368587493896, |
|
"rewards/margins": 1.5860238075256348, |
|
"rewards/rejected": 1.2173125743865967, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.4583042741855136, |
|
"grad_norm": 2.553715467453003, |
|
"learning_rate": 1.887024884086473e-05, |
|
"logits/chosen": -19.989469528198242, |
|
"logits/rejected": -21.36966323852539, |
|
"logps/chosen": -485.37139892578125, |
|
"logps/rejected": -414.8341369628906, |
|
"loss": 1.2352, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 2.2446627616882324, |
|
"rewards/margins": 2.1422224044799805, |
|
"rewards/rejected": 0.10244012624025345, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.4632858423831822, |
|
"grad_norm": 9.945940017700195, |
|
"learning_rate": 1.88460416865123e-05, |
|
"logits/chosen": -19.838525772094727, |
|
"logits/rejected": -21.396879196166992, |
|
"logps/chosen": -510.5537109375, |
|
"logps/rejected": -417.2762451171875, |
|
"loss": 1.1751, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 1.9794914722442627, |
|
"rewards/margins": 1.851491093635559, |
|
"rewards/rejected": 0.12800025939941406, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.46826741058085086, |
|
"grad_norm": 15.31814193725586, |
|
"learning_rate": 1.88215937981815e-05, |
|
"logits/chosen": -19.403379440307617, |
|
"logits/rejected": -20.53765869140625, |
|
"logps/chosen": -476.90802001953125, |
|
"logps/rejected": -390.69744873046875, |
|
"loss": 1.7465, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": 3.1520519256591797, |
|
"rewards/margins": 0.6633343696594238, |
|
"rewards/rejected": 2.488717555999756, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.47324897877851946, |
|
"grad_norm": 2.4341812133789062, |
|
"learning_rate": 1.879690584119108e-05, |
|
"logits/chosen": -18.863977432250977, |
|
"logits/rejected": -19.929393768310547, |
|
"logps/chosen": -451.2972717285156, |
|
"logps/rejected": -356.81878662109375, |
|
"loss": 1.0762, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 3.5310535430908203, |
|
"rewards/margins": 0.9716143012046814, |
|
"rewards/rejected": 2.5594394207000732, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.4782305469761881, |
|
"grad_norm": 0.5188534259796143, |
|
"learning_rate": 1.8771978487392965e-05, |
|
"logits/chosen": -19.067102432250977, |
|
"logits/rejected": -19.693904876708984, |
|
"logps/chosen": -436.10125732421875, |
|
"logps/rejected": -371.744140625, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 3.686614990234375, |
|
"rewards/margins": 1.6963415145874023, |
|
"rewards/rejected": 1.9902732372283936, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.4832121151738567, |
|
"grad_norm": 0.9292926788330078, |
|
"learning_rate": 1.874681241515396e-05, |
|
"logits/chosen": -18.863676071166992, |
|
"logits/rejected": -18.958852767944336, |
|
"logps/chosen": -462.2861328125, |
|
"logps/rejected": -413.9620361328125, |
|
"loss": 1.2257, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 3.4554378986358643, |
|
"rewards/margins": 0.9302346110343933, |
|
"rewards/rejected": 2.525202989578247, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.48819368337152536, |
|
"grad_norm": 35.07600402832031, |
|
"learning_rate": 1.8721408309337295e-05, |
|
"logits/chosen": -18.920787811279297, |
|
"logits/rejected": -19.5614070892334, |
|
"logps/chosen": -475.8876647949219, |
|
"logps/rejected": -409.3224182128906, |
|
"loss": 1.2497, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 3.723018169403076, |
|
"rewards/margins": 1.3657230138778687, |
|
"rewards/rejected": 2.357295036315918, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.493175251569194, |
|
"grad_norm": 0.003431697143241763, |
|
"learning_rate": 1.8695766861283987e-05, |
|
"logits/chosen": -18.851102828979492, |
|
"logits/rejected": -19.668804168701172, |
|
"logps/chosen": -505.24945068359375, |
|
"logps/rejected": -430.45428466796875, |
|
"loss": 1.1504, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 3.6089422702789307, |
|
"rewards/margins": 1.7381848096847534, |
|
"rewards/rejected": 1.8707573413848877, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.4981568197668626, |
|
"grad_norm": 65.25814819335938, |
|
"learning_rate": 1.8669888768794024e-05, |
|
"logits/chosen": -18.943655014038086, |
|
"logits/rejected": -19.943601608276367, |
|
"logps/chosen": -456.38531494140625, |
|
"logps/rejected": -394.1759033203125, |
|
"loss": 1.1225, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 3.0651655197143555, |
|
"rewards/margins": 1.2026199102401733, |
|
"rewards/rejected": 1.8625457286834717, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.4981568197668626, |
|
"eval_logits/chosen": -20.5496826171875, |
|
"eval_logits/rejected": -21.546123504638672, |
|
"eval_logps/chosen": -477.2846984863281, |
|
"eval_logps/rejected": -404.45428466796875, |
|
"eval_loss": 1.1951801776885986, |
|
"eval_rewards/accuracies": 0.6335078477859497, |
|
"eval_rewards/chosen": 4.191972732543945, |
|
"eval_rewards/margins": 1.5964468717575073, |
|
"eval_rewards/rejected": 2.5955255031585693, |
|
"eval_runtime": 472.8478, |
|
"eval_samples_per_second": 3.219, |
|
"eval_steps_per_second": 0.404, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5031383879645313, |
|
"grad_norm": 94.86180114746094, |
|
"learning_rate": 1.8643774736107384e-05, |
|
"logits/chosen": -18.719505310058594, |
|
"logits/rejected": -19.4000244140625, |
|
"logps/chosen": -508.5538330078125, |
|
"logps/rejected": -445.2476806640625, |
|
"loss": 1.3446, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 3.225598096847534, |
|
"rewards/margins": 1.1447488069534302, |
|
"rewards/rejected": 2.0808494091033936, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.5081199561621998, |
|
"grad_norm": 48.567291259765625, |
|
"learning_rate": 1.8617425473884855e-05, |
|
"logits/chosen": -18.674545288085938, |
|
"logits/rejected": -19.417861938476562, |
|
"logps/chosen": -519.6810302734375, |
|
"logps/rejected": -433.57940673828125, |
|
"loss": 1.3277, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 2.415245771408081, |
|
"rewards/margins": 1.2748632431030273, |
|
"rewards/rejected": 1.1403824090957642, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.5131015243598684, |
|
"grad_norm": 6.8724141120910645, |
|
"learning_rate": 1.859084169918871e-05, |
|
"logits/chosen": -18.91655731201172, |
|
"logits/rejected": -19.582307815551758, |
|
"logps/chosen": -490.5546569824219, |
|
"logps/rejected": -427.4134521484375, |
|
"loss": 1.5115, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 2.807913303375244, |
|
"rewards/margins": 1.6051901578903198, |
|
"rewards/rejected": 1.2027232646942139, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.5180830925575372, |
|
"grad_norm": 11.200637817382812, |
|
"learning_rate": 1.8564024135463173e-05, |
|
"logits/chosen": -18.65709114074707, |
|
"logits/rejected": -18.75111198425293, |
|
"logps/chosen": -456.7597961425781, |
|
"logps/rejected": -418.4756774902344, |
|
"loss": 1.4256, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 1.6474815607070923, |
|
"rewards/margins": 0.9819788336753845, |
|
"rewards/rejected": 0.6655027866363525, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.5230646607552057, |
|
"grad_norm": 14.02713394165039, |
|
"learning_rate": 1.8536973512514762e-05, |
|
"logits/chosen": -18.270898818969727, |
|
"logits/rejected": -18.737123489379883, |
|
"logps/chosen": -495.671875, |
|
"logps/rejected": -404.7850646972656, |
|
"loss": 1.1738, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 3.6311376094818115, |
|
"rewards/margins": 1.3220287561416626, |
|
"rewards/rejected": 2.3091087341308594, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.5280462289528743, |
|
"grad_norm": 88.36447143554688, |
|
"learning_rate": 1.85096905664924e-05, |
|
"logits/chosen": -18.358213424682617, |
|
"logits/rejected": -18.569581985473633, |
|
"logps/chosen": -460.0679931640625, |
|
"logps/rejected": -420.5664978027344, |
|
"loss": 1.4962, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.870842456817627, |
|
"rewards/margins": 1.0018635988235474, |
|
"rewards/rejected": 0.8689790368080139, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.5330277971505429, |
|
"grad_norm": 2.6545143127441406, |
|
"learning_rate": 1.848217603986739e-05, |
|
"logits/chosen": -18.19515037536621, |
|
"logits/rejected": -18.503700256347656, |
|
"logps/chosen": -482.6021423339844, |
|
"logps/rejected": -439.8750915527344, |
|
"loss": 1.3315, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 2.491628408432007, |
|
"rewards/margins": 0.7911645770072937, |
|
"rewards/rejected": 1.700463891029358, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.5380093653482116, |
|
"grad_norm": 54.594303131103516, |
|
"learning_rate": 1.845443068141322e-05, |
|
"logits/chosen": -18.29205894470215, |
|
"logits/rejected": -19.060501098632812, |
|
"logps/chosen": -499.4178466796875, |
|
"logps/rejected": -410.4550476074219, |
|
"loss": 1.3476, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 3.079913377761841, |
|
"rewards/margins": 1.4285519123077393, |
|
"rewards/rejected": 1.6513612270355225, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.5429909335458802, |
|
"grad_norm": 7.620671272277832, |
|
"learning_rate": 1.8426455246185177e-05, |
|
"logits/chosen": -18.482242584228516, |
|
"logits/rejected": -19.06051254272461, |
|
"logps/chosen": -488.9962463378906, |
|
"logps/rejected": -414.5852355957031, |
|
"loss": 1.118, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": 3.6191320419311523, |
|
"rewards/margins": 1.9326629638671875, |
|
"rewards/rejected": 1.6864690780639648, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.5479725017435488, |
|
"grad_norm": 44.31614685058594, |
|
"learning_rate": 1.8398250495499796e-05, |
|
"logits/chosen": -18.507394790649414, |
|
"logits/rejected": -19.16678810119629, |
|
"logps/chosen": -496.0931701660156, |
|
"logps/rejected": -424.0758056640625, |
|
"loss": 1.3294, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 3.764357805252075, |
|
"rewards/margins": 1.4526183605194092, |
|
"rewards/rejected": 2.311739444732666, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.5529540699412175, |
|
"grad_norm": 113.41361999511719, |
|
"learning_rate": 1.8369817196914145e-05, |
|
"logits/chosen": -18.77853775024414, |
|
"logits/rejected": -19.550212860107422, |
|
"logps/chosen": -458.4302978515625, |
|
"logps/rejected": -376.8597717285156, |
|
"loss": 1.2723, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 2.409397602081299, |
|
"rewards/margins": 1.0338749885559082, |
|
"rewards/rejected": 1.375522255897522, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.5579356381388861, |
|
"grad_norm": 39.32530212402344, |
|
"learning_rate": 1.8341156124204943e-05, |
|
"logits/chosen": -18.80110740661621, |
|
"logits/rejected": -19.126850128173828, |
|
"logps/chosen": -444.9359436035156, |
|
"logps/rejected": -403.28546142578125, |
|
"loss": 1.2621, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 3.3543567657470703, |
|
"rewards/margins": 1.4829553365707397, |
|
"rewards/rejected": 1.8714015483856201, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.5629172063365547, |
|
"grad_norm": 0.01231900043785572, |
|
"learning_rate": 1.8312268057347488e-05, |
|
"logits/chosen": -19.005640029907227, |
|
"logits/rejected": -19.596464157104492, |
|
"logps/chosen": -466.5137634277344, |
|
"logps/rejected": -408.3565673828125, |
|
"loss": 1.2726, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.5804996490478516, |
|
"rewards/margins": 1.2464163303375244, |
|
"rewards/rejected": 1.3340831995010376, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.5678987745342233, |
|
"grad_norm": 78.91645050048828, |
|
"learning_rate": 1.8283153782494457e-05, |
|
"logits/chosen": -19.66242218017578, |
|
"logits/rejected": -20.03885269165039, |
|
"logps/chosen": -495.5252380371094, |
|
"logps/rejected": -428.739501953125, |
|
"loss": 1.1003, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.8916977047920227, |
|
"rewards/margins": 2.0576887130737305, |
|
"rewards/rejected": -1.165990948677063, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.572880342731892, |
|
"grad_norm": 2.7297961711883545, |
|
"learning_rate": 1.8253814091954476e-05, |
|
"logits/chosen": -19.751190185546875, |
|
"logits/rejected": -20.76055335998535, |
|
"logps/chosen": -470.715576171875, |
|
"logps/rejected": -401.6990661621094, |
|
"loss": 1.1496, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -0.058783989399671555, |
|
"rewards/margins": 1.678650975227356, |
|
"rewards/rejected": -1.7374348640441895, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.5778619109295606, |
|
"grad_norm": 11.752656936645508, |
|
"learning_rate": 1.8224249784170595e-05, |
|
"logits/chosen": -19.580923080444336, |
|
"logits/rejected": -20.732593536376953, |
|
"logps/chosen": -517.3013916015625, |
|
"logps/rejected": -441.9253845214844, |
|
"loss": 1.2111, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -0.6827618479728699, |
|
"rewards/margins": 1.910689115524292, |
|
"rewards/rejected": -2.5934510231018066, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.5828434791272292, |
|
"grad_norm": 96.53925323486328, |
|
"learning_rate": 1.8194461663698524e-05, |
|
"logits/chosen": -19.67738914489746, |
|
"logits/rejected": -21.431556701660156, |
|
"logps/chosen": -518.2506103515625, |
|
"logps/rejected": -389.5613708496094, |
|
"loss": 1.3043, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.12440891563892365, |
|
"rewards/margins": 2.5978763103485107, |
|
"rewards/rejected": -2.4734673500061035, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.5878250473248979, |
|
"grad_norm": 62.21799850463867, |
|
"learning_rate": 1.8164450541184768e-05, |
|
"logits/chosen": -19.013898849487305, |
|
"logits/rejected": -19.318574905395508, |
|
"logps/chosen": -564.2166137695312, |
|
"logps/rejected": -528.39111328125, |
|
"loss": 1.3621, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.12132181972265244, |
|
"rewards/margins": 1.5959105491638184, |
|
"rewards/rejected": -1.4745885133743286, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.5928066155225665, |
|
"grad_norm": 0.12062743306159973, |
|
"learning_rate": 1.8134217233344556e-05, |
|
"logits/chosen": -19.182098388671875, |
|
"logits/rejected": -19.83804702758789, |
|
"logps/chosen": -525.335693359375, |
|
"logps/rejected": -455.0185852050781, |
|
"loss": 1.3079, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -0.4299677908420563, |
|
"rewards/margins": 1.420630931854248, |
|
"rewards/rejected": -1.8505988121032715, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.5977881837202351, |
|
"grad_norm": 4.478858470916748, |
|
"learning_rate": 1.81037625629396e-05, |
|
"logits/chosen": -18.84477996826172, |
|
"logits/rejected": -19.84359359741211, |
|
"logps/chosen": -519.6325073242188, |
|
"logps/rejected": -437.3699951171875, |
|
"loss": 1.2444, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 1.3549919128417969, |
|
"rewards/margins": 2.0743324756622314, |
|
"rewards/rejected": -0.7193406820297241, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6027697519179037, |
|
"grad_norm": 43.38969039916992, |
|
"learning_rate": 1.8073087358755735e-05, |
|
"logits/chosen": -18.777620315551758, |
|
"logits/rejected": -19.37495231628418, |
|
"logps/chosen": -485.4015197753906, |
|
"logps/rejected": -413.2806396484375, |
|
"loss": 1.2657, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.3500404357910156, |
|
"rewards/margins": 2.2481086254119873, |
|
"rewards/rejected": 0.10193166881799698, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.6077513201155724, |
|
"grad_norm": 29.47319984436035, |
|
"learning_rate": 1.804219245558033e-05, |
|
"logits/chosen": -18.747379302978516, |
|
"logits/rejected": -19.17096519470215, |
|
"logps/chosen": -476.1571044921875, |
|
"logps/rejected": -414.704345703125, |
|
"loss": 1.3264, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 1.9210524559020996, |
|
"rewards/margins": 0.9324368238449097, |
|
"rewards/rejected": 0.9886155724525452, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.612732888313241, |
|
"grad_norm": 57.94328689575195, |
|
"learning_rate": 1.8011078694179602e-05, |
|
"logits/chosen": -18.417835235595703, |
|
"logits/rejected": -18.728105545043945, |
|
"logps/chosen": -466.6083068847656, |
|
"logps/rejected": -417.28936767578125, |
|
"loss": 1.3787, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 2.9616811275482178, |
|
"rewards/margins": 0.9320456981658936, |
|
"rewards/rejected": 2.0296356678009033, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.6177144565109096, |
|
"grad_norm": 0.6535269021987915, |
|
"learning_rate": 1.7979746921275713e-05, |
|
"logits/chosen": -18.470064163208008, |
|
"logits/rejected": -19.071678161621094, |
|
"logps/chosen": -499.7461242675781, |
|
"logps/rejected": -414.9991455078125, |
|
"loss": 1.0886, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.9048681259155273, |
|
"rewards/margins": 1.7158997058868408, |
|
"rewards/rejected": 1.188968300819397, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.6226960247085782, |
|
"grad_norm": 47.690162658691406, |
|
"learning_rate": 1.794819798952374e-05, |
|
"logits/chosen": -18.49652862548828, |
|
"logits/rejected": -18.98128318786621, |
|
"logps/chosen": -571.6408081054688, |
|
"logps/rejected": -473.7767333984375, |
|
"loss": 1.2108, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": 2.7018349170684814, |
|
"rewards/margins": 2.354581832885742, |
|
"rewards/rejected": 0.34725311398506165, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.6276775929062469, |
|
"grad_norm": 69.55400085449219, |
|
"learning_rate": 1.7916432757488467e-05, |
|
"logits/chosen": -19.46697235107422, |
|
"logits/rejected": -20.09600830078125, |
|
"logps/chosen": -524.7301635742188, |
|
"logps/rejected": -433.07293701171875, |
|
"loss": 1.0183, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 1.3079893589019775, |
|
"rewards/margins": 3.2492458820343018, |
|
"rewards/rejected": -1.9412565231323242, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.6326591611039155, |
|
"grad_norm": 14.792251586914062, |
|
"learning_rate": 1.7884452089621012e-05, |
|
"logits/chosen": -19.28809928894043, |
|
"logits/rejected": -20.2492733001709, |
|
"logps/chosen": -578.1820068359375, |
|
"logps/rejected": -456.3279724121094, |
|
"loss": 1.1159, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 1.191977858543396, |
|
"rewards/margins": 2.5655128955841064, |
|
"rewards/rejected": -1.37353515625, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.6376407293015841, |
|
"grad_norm": 0.21572743356227875, |
|
"learning_rate": 1.7852256856235318e-05, |
|
"logits/chosen": -19.648353576660156, |
|
"logits/rejected": -20.134416580200195, |
|
"logps/chosen": -495.5775146484375, |
|
"logps/rejected": -438.1684265136719, |
|
"loss": 1.387, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -0.19859656691551208, |
|
"rewards/margins": 2.191715955734253, |
|
"rewards/rejected": -2.390312433242798, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.6426222974992528, |
|
"grad_norm": 102.35507202148438, |
|
"learning_rate": 1.7819847933484467e-05, |
|
"logits/chosen": -19.353174209594727, |
|
"logits/rejected": -20.048927307128906, |
|
"logps/chosen": -524.4760131835938, |
|
"logps/rejected": -446.4917907714844, |
|
"loss": 1.1967, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8889893293380737, |
|
"rewards/margins": 1.9711395502090454, |
|
"rewards/rejected": -1.0821502208709717, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.6476038656969214, |
|
"grad_norm": 0.06541766971349716, |
|
"learning_rate": 1.778722620333681e-05, |
|
"logits/chosen": -19.828271865844727, |
|
"logits/rejected": -20.194868087768555, |
|
"logps/chosen": -602.9769287109375, |
|
"logps/rejected": -530.201904296875, |
|
"loss": 1.6761, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.2178092002868652, |
|
"rewards/margins": 1.5633704662322998, |
|
"rewards/rejected": -3.781179904937744, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.65258543389459, |
|
"grad_norm": 58.162445068359375, |
|
"learning_rate": 1.775439255355201e-05, |
|
"logits/chosen": -19.331708908081055, |
|
"logits/rejected": -19.971097946166992, |
|
"logps/chosen": -570.8577880859375, |
|
"logps/rejected": -473.57196044921875, |
|
"loss": 1.1495, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 0.24211058020591736, |
|
"rewards/margins": 2.4019970893859863, |
|
"rewards/rejected": -2.159886598587036, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.6575670020922586, |
|
"grad_norm": 1.0752054452896118, |
|
"learning_rate": 1.772134787765684e-05, |
|
"logits/chosen": -19.27989959716797, |
|
"logits/rejected": -19.537317276000977, |
|
"logps/chosen": -541.393798828125, |
|
"logps/rejected": -501.2279968261719, |
|
"loss": 1.6051, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": -1.2508366107940674, |
|
"rewards/margins": 0.7745574712753296, |
|
"rewards/rejected": -2.0253942012786865, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.6625485702899273, |
|
"grad_norm": 8.162809371948242, |
|
"learning_rate": 1.768809307492089e-05, |
|
"logits/chosen": -18.722593307495117, |
|
"logits/rejected": -19.011571884155273, |
|
"logps/chosen": -513.6095581054688, |
|
"logps/rejected": -469.2226257324219, |
|
"loss": 1.3372, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": -0.9962272047996521, |
|
"rewards/margins": 0.9051995873451233, |
|
"rewards/rejected": -1.9014270305633545, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.6675301384875959, |
|
"grad_norm": 97.5845947265625, |
|
"learning_rate": 1.765462905033209e-05, |
|
"logits/chosen": -19.051023483276367, |
|
"logits/rejected": -19.420806884765625, |
|
"logps/chosen": -478.5913391113281, |
|
"logps/rejected": -437.26995849609375, |
|
"loss": 1.3999, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -1.0214941501617432, |
|
"rewards/margins": 0.9453433752059937, |
|
"rewards/rejected": -1.9668372869491577, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.6725117066852645, |
|
"grad_norm": 29.43077850341797, |
|
"learning_rate": 1.762095671457209e-05, |
|
"logits/chosen": -19.13440704345703, |
|
"logits/rejected": -19.925010681152344, |
|
"logps/chosen": -496.3144836425781, |
|
"logps/rejected": -413.5525817871094, |
|
"loss": 1.3997, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -0.08645965903997421, |
|
"rewards/margins": 1.8843421936035156, |
|
"rewards/rejected": -1.970801830291748, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.6774932748829331, |
|
"grad_norm": 0.7588065266609192, |
|
"learning_rate": 1.7587076983991457e-05, |
|
"logits/chosen": -19.021947860717773, |
|
"logits/rejected": -19.49304962158203, |
|
"logps/chosen": -520.0108032226562, |
|
"logps/rejected": -473.15020751953125, |
|
"loss": 1.9126, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.3220689296722412, |
|
"rewards/margins": 0.6901782155036926, |
|
"rewards/rejected": 0.631890594959259, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.6824748430806018, |
|
"grad_norm": 110.71784973144531, |
|
"learning_rate": 1.755299078058475e-05, |
|
"logits/chosen": -19.794466018676758, |
|
"logits/rejected": -20.945425033569336, |
|
"logps/chosen": -485.5846862792969, |
|
"logps/rejected": -422.5166931152344, |
|
"loss": 1.0623, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -0.4341191351413727, |
|
"rewards/margins": 1.9454231262207031, |
|
"rewards/rejected": -2.379542589187622, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.6874564112782704, |
|
"grad_norm": 92.97657012939453, |
|
"learning_rate": 1.751869903196543e-05, |
|
"logits/chosen": -19.072101593017578, |
|
"logits/rejected": -20.458724975585938, |
|
"logps/chosen": -550.3908081054688, |
|
"logps/rejected": -454.7740173339844, |
|
"loss": 1.1903, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.9165298342704773, |
|
"rewards/margins": 1.906398892402649, |
|
"rewards/rejected": -0.9898689389228821, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.692437979475939, |
|
"grad_norm": 4.199160575866699, |
|
"learning_rate": 1.748420267134062e-05, |
|
"logits/chosen": -18.836036682128906, |
|
"logits/rejected": -19.922813415527344, |
|
"logps/chosen": -539.5211181640625, |
|
"logps/rejected": -479.9672546386719, |
|
"loss": 1.2681, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 1.260705828666687, |
|
"rewards/margins": 1.744788408279419, |
|
"rewards/rejected": -0.48408252000808716, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.6974195476736077, |
|
"grad_norm": 107.6252212524414, |
|
"learning_rate": 1.74495026374857e-05, |
|
"logits/chosen": -19.50172233581543, |
|
"logits/rejected": -20.288314819335938, |
|
"logps/chosen": -514.2687377929688, |
|
"logps/rejected": -449.1997375488281, |
|
"loss": 1.4695, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.515940248966217, |
|
"rewards/margins": 1.3783401250839233, |
|
"rewards/rejected": -0.8623998761177063, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7024011158712763, |
|
"grad_norm": 5.941022872924805, |
|
"learning_rate": 1.7414599874718753e-05, |
|
"logits/chosen": -18.767423629760742, |
|
"logits/rejected": -19.68829917907715, |
|
"logps/chosen": -545.5341796875, |
|
"logps/rejected": -463.9657287597656, |
|
"loss": 1.1235, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 1.2197209596633911, |
|
"rewards/margins": 1.6720809936523438, |
|
"rewards/rejected": -0.4523601531982422, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.7073826840689449, |
|
"grad_norm": 26.461627960205078, |
|
"learning_rate": 1.737949533287489e-05, |
|
"logits/chosen": -18.46575355529785, |
|
"logits/rejected": -19.159351348876953, |
|
"logps/chosen": -517.9618530273438, |
|
"logps/rejected": -417.8922119140625, |
|
"loss": 1.14, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 1.9558275938034058, |
|
"rewards/margins": 2.069073438644409, |
|
"rewards/rejected": -0.11324585229158401, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.7123642522666135, |
|
"grad_norm": 1.1918169260025024, |
|
"learning_rate": 1.7344189967280383e-05, |
|
"logits/chosen": -19.000808715820312, |
|
"logits/rejected": -20.075515747070312, |
|
"logps/chosen": -474.513916015625, |
|
"logps/rejected": -400.20196533203125, |
|
"loss": 0.9665, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": 1.894654393196106, |
|
"rewards/margins": 2.998011589050293, |
|
"rewards/rejected": -1.103356957435608, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.7173458204642822, |
|
"grad_norm": 60.40048599243164, |
|
"learning_rate": 1.7308684738726668e-05, |
|
"logits/chosen": -18.980615615844727, |
|
"logits/rejected": -20.142223358154297, |
|
"logps/chosen": -510.573974609375, |
|
"logps/rejected": -441.4659729003906, |
|
"loss": 1.266, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 0.2977685332298279, |
|
"rewards/margins": 1.5726754665374756, |
|
"rewards/rejected": -1.274907112121582, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.7223273886619508, |
|
"grad_norm": 0.5988157391548157, |
|
"learning_rate": 1.7272980613444206e-05, |
|
"logits/chosen": -18.941259384155273, |
|
"logits/rejected": -20.322023391723633, |
|
"logps/chosen": -531.8062744140625, |
|
"logps/rejected": -474.36785888671875, |
|
"loss": 1.2675, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3074489235877991, |
|
"rewards/margins": 1.548563838005066, |
|
"rewards/rejected": -1.8560125827789307, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.7273089568596194, |
|
"grad_norm": 6.901514530181885, |
|
"learning_rate": 1.7237078563076178e-05, |
|
"logits/chosen": -19.498384475708008, |
|
"logits/rejected": -21.36153793334961, |
|
"logps/chosen": -511.9005126953125, |
|
"logps/rejected": -439.97003173828125, |
|
"loss": 1.2244, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -0.4899732172489166, |
|
"rewards/margins": 1.9954489469528198, |
|
"rewards/rejected": -2.485422372817993, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.7322905250572881, |
|
"grad_norm": 0.03188573196530342, |
|
"learning_rate": 1.7200979564652064e-05, |
|
"logits/chosen": -18.785024642944336, |
|
"logits/rejected": -21.15717124938965, |
|
"logps/chosen": -520.9817504882812, |
|
"logps/rejected": -426.6506652832031, |
|
"loss": 1.3959, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 1.1841310262680054, |
|
"rewards/margins": 1.9892054796218872, |
|
"rewards/rejected": -0.8050744533538818, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.7372720932549567, |
|
"grad_norm": 30.52501678466797, |
|
"learning_rate": 1.7164684600561018e-05, |
|
"logits/chosen": -18.466907501220703, |
|
"logits/rejected": -20.27123260498047, |
|
"logps/chosen": -531.89501953125, |
|
"logps/rejected": -429.2342834472656, |
|
"loss": 1.3491, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 2.848252773284912, |
|
"rewards/margins": 2.337670087814331, |
|
"rewards/rejected": 0.5105829834938049, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.7422536614526253, |
|
"grad_norm": 4.541143894195557, |
|
"learning_rate": 1.712819465852517e-05, |
|
"logits/chosen": -18.570043563842773, |
|
"logits/rejected": -20.75904083251953, |
|
"logps/chosen": -493.5054931640625, |
|
"logps/rejected": -380.9721984863281, |
|
"loss": 1.2685, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 1.8648478984832764, |
|
"rewards/margins": 2.2077205181121826, |
|
"rewards/rejected": -0.3428727388381958, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.7472352296502939, |
|
"grad_norm": 0.44224098324775696, |
|
"learning_rate": 1.7091510731572725e-05, |
|
"logits/chosen": -18.91974449157715, |
|
"logits/rejected": -20.655202865600586, |
|
"logps/chosen": -514.9723510742188, |
|
"logps/rejected": -452.38983154296875, |
|
"loss": 1.7715, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 2.1622519493103027, |
|
"rewards/margins": 1.3429455757141113, |
|
"rewards/rejected": 0.8193062543869019, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.7472352296502939, |
|
"eval_logits/chosen": -22.020517349243164, |
|
"eval_logits/rejected": -23.86900520324707, |
|
"eval_logps/chosen": -484.0854187011719, |
|
"eval_logps/rejected": -414.28009033203125, |
|
"eval_loss": 1.2986581325531006, |
|
"eval_rewards/accuracies": 0.6367800831794739, |
|
"eval_rewards/chosen": 3.5119001865386963, |
|
"eval_rewards/margins": 1.8989582061767578, |
|
"eval_rewards/rejected": 1.6129425764083862, |
|
"eval_runtime": 472.8477, |
|
"eval_samples_per_second": 3.219, |
|
"eval_steps_per_second": 0.404, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.7522167978479626, |
|
"grad_norm": 10.687481880187988, |
|
"learning_rate": 1.7054633818010954e-05, |
|
"logits/chosen": -18.568933486938477, |
|
"logits/rejected": -20.1440372467041, |
|
"logps/chosen": -456.9844055175781, |
|
"logps/rejected": -408.1793212890625, |
|
"loss": 1.0277, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 3.049287796020508, |
|
"rewards/margins": 1.8809983730316162, |
|
"rewards/rejected": 1.168289065361023, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.7571983660456312, |
|
"grad_norm": 67.85148620605469, |
|
"learning_rate": 1.7017564921399e-05, |
|
"logits/chosen": -18.714679718017578, |
|
"logits/rejected": -21.288236618041992, |
|
"logps/chosen": -508.66015625, |
|
"logps/rejected": -424.9830322265625, |
|
"loss": 1.362, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.725537061691284, |
|
"rewards/margins": 1.661694049835205, |
|
"rewards/rejected": 1.0638428926467896, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.7621799342432998, |
|
"grad_norm": 44.31140899658203, |
|
"learning_rate": 1.698030505052061e-05, |
|
"logits/chosen": -18.928104400634766, |
|
"logits/rejected": -20.428186416625977, |
|
"logps/chosen": -470.02459716796875, |
|
"logps/rejected": -381.4432678222656, |
|
"loss": 1.504, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 2.468132734298706, |
|
"rewards/margins": 0.8910315036773682, |
|
"rewards/rejected": 1.577101469039917, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.7671615024409684, |
|
"grad_norm": 10.858190536499023, |
|
"learning_rate": 1.6942855219356634e-05, |
|
"logits/chosen": -18.520444869995117, |
|
"logits/rejected": -20.245197296142578, |
|
"logps/chosen": -491.31854248046875, |
|
"logps/rejected": -416.8392333984375, |
|
"loss": 1.0534, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 3.1176271438598633, |
|
"rewards/margins": 1.8960695266723633, |
|
"rewards/rejected": 1.2215576171875, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.7721430706386371, |
|
"grad_norm": 0.026119831949472427, |
|
"learning_rate": 1.6905216447057467e-05, |
|
"logits/chosen": -19.040843963623047, |
|
"logits/rejected": -20.839155197143555, |
|
"logps/chosen": -495.08428955078125, |
|
"logps/rejected": -410.4775390625, |
|
"loss": 1.5746, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 2.2889349460601807, |
|
"rewards/margins": 1.6078674793243408, |
|
"rewards/rejected": 0.6810672283172607, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.7771246388363057, |
|
"grad_norm": 70.28289031982422, |
|
"learning_rate": 1.686738975791529e-05, |
|
"logits/chosen": -18.932214736938477, |
|
"logits/rejected": -20.42650032043457, |
|
"logps/chosen": -493.1498718261719, |
|
"logps/rejected": -429.498779296875, |
|
"loss": 1.4583, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": 2.8957221508026123, |
|
"rewards/margins": 1.429540991783142, |
|
"rewards/rejected": 1.4661809206008911, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.7821062070339743, |
|
"grad_norm": 0.8262832164764404, |
|
"learning_rate": 1.6829376181336225e-05, |
|
"logits/chosen": -19.566686630249023, |
|
"logits/rejected": -21.254444122314453, |
|
"logps/chosen": -483.1775817871094, |
|
"logps/rejected": -451.6460266113281, |
|
"loss": 1.238, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 1.937461495399475, |
|
"rewards/margins": 1.0156903266906738, |
|
"rewards/rejected": 0.9217712879180908, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.787087775231643, |
|
"grad_norm": 10.062756538391113, |
|
"learning_rate": 1.6791176751812282e-05, |
|
"logits/chosen": -19.597667694091797, |
|
"logits/rejected": -21.418546676635742, |
|
"logps/chosen": -485.7425537109375, |
|
"logps/rejected": -417.196533203125, |
|
"loss": 1.1054, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": 1.6439099311828613, |
|
"rewards/margins": 2.0514931678771973, |
|
"rewards/rejected": -0.40758341550827026, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.7920693434293116, |
|
"grad_norm": 11.171751022338867, |
|
"learning_rate": 1.675279250889324e-05, |
|
"logits/chosen": -19.189016342163086, |
|
"logits/rejected": -21.2204532623291, |
|
"logps/chosen": -523.8226318359375, |
|
"logps/rejected": -432.4778137207031, |
|
"loss": 1.2853, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 2.265528917312622, |
|
"rewards/margins": 2.169790744781494, |
|
"rewards/rejected": 0.0957380086183548, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.7970509116269802, |
|
"grad_norm": 5.786453723907471, |
|
"learning_rate": 1.6714224497158334e-05, |
|
"logits/chosen": -19.52318572998047, |
|
"logits/rejected": -22.174915313720703, |
|
"logps/chosen": -500.9818115234375, |
|
"logps/rejected": -427.1512451171875, |
|
"loss": 1.2191, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03365034982562065, |
|
"rewards/margins": 2.265183210372925, |
|
"rewards/rejected": -2.2315328121185303, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8020324798246488, |
|
"grad_norm": 0.03890511766076088, |
|
"learning_rate": 1.667547376618785e-05, |
|
"logits/chosen": -19.749061584472656, |
|
"logits/rejected": -21.982847213745117, |
|
"logps/chosen": -521.017822265625, |
|
"logps/rejected": -434.4532165527344, |
|
"loss": 1.6337, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 1.2707146406173706, |
|
"rewards/margins": 2.021421432495117, |
|
"rewards/rejected": -0.7507067918777466, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.8070140480223175, |
|
"grad_norm": 7.803345680236816, |
|
"learning_rate": 1.6636541370534537e-05, |
|
"logits/chosen": -19.421598434448242, |
|
"logits/rejected": -20.04302406311035, |
|
"logps/chosen": -481.74334716796875, |
|
"logps/rejected": -437.2704162597656, |
|
"loss": 1.5953, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 1.658532738685608, |
|
"rewards/margins": 1.250680685043335, |
|
"rewards/rejected": 0.40785208344459534, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.8119956162199861, |
|
"grad_norm": 0.0178745836019516, |
|
"learning_rate": 1.6597428369694934e-05, |
|
"logits/chosen": -18.97728157043457, |
|
"logits/rejected": -20.238012313842773, |
|
"logps/chosen": -544.66015625, |
|
"logps/rejected": -475.9863586425781, |
|
"loss": 1.2509, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 1.6321719884872437, |
|
"rewards/margins": 1.595544695854187, |
|
"rewards/rejected": 0.03662717714905739, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.8169771844176547, |
|
"grad_norm": 115.91627502441406, |
|
"learning_rate": 1.655813582808051e-05, |
|
"logits/chosen": -19.31316566467285, |
|
"logits/rejected": -20.653505325317383, |
|
"logps/chosen": -523.737060546875, |
|
"logps/rejected": -466.0438232421875, |
|
"loss": 1.4616, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 1.028465986251831, |
|
"rewards/margins": 0.8755133152008057, |
|
"rewards/rejected": 0.15295258164405823, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.8219587526153233, |
|
"grad_norm": 19.2689266204834, |
|
"learning_rate": 1.651866481498873e-05, |
|
"logits/chosen": -19.784204483032227, |
|
"logits/rejected": -21.526857376098633, |
|
"logps/chosen": -484.1365966796875, |
|
"logps/rejected": -423.5486755371094, |
|
"loss": 1.0354, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 1.5439956188201904, |
|
"rewards/margins": 2.2780568599700928, |
|
"rewards/rejected": -0.7340614199638367, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.826940320812992, |
|
"grad_norm": 22.2554988861084, |
|
"learning_rate": 1.6479016404573916e-05, |
|
"logits/chosen": -19.650360107421875, |
|
"logits/rejected": -21.203350067138672, |
|
"logps/chosen": -505.99871826171875, |
|
"logps/rejected": -474.4420166015625, |
|
"loss": 1.6364, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.20957961678504944, |
|
"rewards/margins": 1.9317030906677246, |
|
"rewards/rejected": -2.141282558441162, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.8319218890106606, |
|
"grad_norm": 44.10676574707031, |
|
"learning_rate": 1.6439191675818056e-05, |
|
"logits/chosen": -19.524065017700195, |
|
"logits/rejected": -22.584871292114258, |
|
"logps/chosen": -469.15264892578125, |
|
"logps/rejected": -362.7622985839844, |
|
"loss": 0.9529, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -0.1507752537727356, |
|
"rewards/margins": 2.6078405380249023, |
|
"rewards/rejected": -2.7586159706115723, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.8369034572083291, |
|
"grad_norm": 143.54794311523438, |
|
"learning_rate": 1.6399191712501417e-05, |
|
"logits/chosen": -19.261682510375977, |
|
"logits/rejected": -21.39293670654297, |
|
"logps/chosen": -543.3509521484375, |
|
"logps/rejected": -472.142822265625, |
|
"loss": 1.4329, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 1.02738356590271, |
|
"rewards/margins": 1.6261159181594849, |
|
"rewards/rejected": -0.5987322926521301, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.8418850254059979, |
|
"grad_norm": 0.004016869701445103, |
|
"learning_rate": 1.6359017603173043e-05, |
|
"logits/chosen": -19.182754516601562, |
|
"logits/rejected": -21.46161651611328, |
|
"logps/chosen": -525.286376953125, |
|
"logps/rejected": -444.2633972167969, |
|
"loss": 1.361, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 0.7399688959121704, |
|
"rewards/margins": 2.1688730716705322, |
|
"rewards/rejected": -1.4289040565490723, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.8468665936036665, |
|
"grad_norm": 85.75580596923828, |
|
"learning_rate": 1.6318670441121157e-05, |
|
"logits/chosen": -19.6074161529541, |
|
"logits/rejected": -20.843006134033203, |
|
"logps/chosen": -514.9994506835938, |
|
"logps/rejected": -458.54803466796875, |
|
"loss": 1.9907, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": -0.5896009802818298, |
|
"rewards/margins": 1.107519507408142, |
|
"rewards/rejected": -1.6971205472946167, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.851848161801335, |
|
"grad_norm": 0.005596471484750509, |
|
"learning_rate": 1.6278151324343395e-05, |
|
"logits/chosen": -18.718494415283203, |
|
"logits/rejected": -19.837169647216797, |
|
"logps/chosen": -527.9190673828125, |
|
"logps/rejected": -452.2406005859375, |
|
"loss": 0.9701, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -0.13445429503917694, |
|
"rewards/margins": 2.05269193649292, |
|
"rewards/rejected": -2.1871461868286133, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.8568297299990036, |
|
"grad_norm": 76.1462631225586, |
|
"learning_rate": 1.6237461355516918e-05, |
|
"logits/chosen": -18.651123046875, |
|
"logits/rejected": -19.52649688720703, |
|
"logps/chosen": -539.2634887695312, |
|
"logps/rejected": -478.9902648925781, |
|
"loss": 1.575, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8822598457336426, |
|
"rewards/margins": 1.2877403497695923, |
|
"rewards/rejected": -0.4054804742336273, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.8618112981966723, |
|
"grad_norm": 90.55237579345703, |
|
"learning_rate": 1.6196601641968425e-05, |
|
"logits/chosen": -18.68253517150879, |
|
"logits/rejected": -20.140342712402344, |
|
"logps/chosen": -523.9414672851562, |
|
"logps/rejected": -457.2998046875, |
|
"loss": 1.1014, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 2.5566680431365967, |
|
"rewards/margins": 1.6561553478240967, |
|
"rewards/rejected": 0.9005125164985657, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.8667928663943409, |
|
"grad_norm": 3.8211495876312256, |
|
"learning_rate": 1.6155573295643993e-05, |
|
"logits/chosen": -19.115205764770508, |
|
"logits/rejected": -19.974811553955078, |
|
"logps/chosen": -527.80419921875, |
|
"logps/rejected": -491.970458984375, |
|
"loss": 1.5425, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 2.147320032119751, |
|
"rewards/margins": 0.9348466396331787, |
|
"rewards/rejected": 1.2124736309051514, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.8717744345920095, |
|
"grad_norm": 12.620223999023438, |
|
"learning_rate": 1.611437743307884e-05, |
|
"logits/chosen": -19.070911407470703, |
|
"logits/rejected": -20.162216186523438, |
|
"logps/chosen": -515.4886474609375, |
|
"logps/rejected": -440.5010070800781, |
|
"loss": 1.2029, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": 2.0181946754455566, |
|
"rewards/margins": 1.9606748819351196, |
|
"rewards/rejected": 0.057520028203725815, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.8767560027896782, |
|
"grad_norm": 59.42154312133789, |
|
"learning_rate": 1.6073015175366914e-05, |
|
"logits/chosen": -18.614526748657227, |
|
"logits/rejected": -19.882549285888672, |
|
"logps/chosen": -515.3157958984375, |
|
"logps/rejected": -447.471435546875, |
|
"loss": 1.3499, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": 2.700169563293457, |
|
"rewards/margins": 0.998585045337677, |
|
"rewards/rejected": 1.701583981513977, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.8817375709873468, |
|
"grad_norm": 66.42816925048828, |
|
"learning_rate": 1.603148764813042e-05, |
|
"logits/chosen": -18.400327682495117, |
|
"logits/rejected": -19.4934024810791, |
|
"logps/chosen": -477.818603515625, |
|
"logps/rejected": -401.1089782714844, |
|
"loss": 1.1853, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 3.3441994190216064, |
|
"rewards/margins": 1.493272066116333, |
|
"rewards/rejected": 1.8509272336959839, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.8867191391850154, |
|
"grad_norm": 0.004200187046080828, |
|
"learning_rate": 1.5989795981489155e-05, |
|
"logits/chosen": -18.308795928955078, |
|
"logits/rejected": -19.249317169189453, |
|
"logps/chosen": -512.5999755859375, |
|
"logps/rejected": -438.9172668457031, |
|
"loss": 1.1657, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 3.686070442199707, |
|
"rewards/margins": 2.335679292678833, |
|
"rewards/rejected": 1.3503910303115845, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.891700707382684, |
|
"grad_norm": 4.083571910858154, |
|
"learning_rate": 1.5947941310029755e-05, |
|
"logits/chosen": -18.304054260253906, |
|
"logits/rejected": -19.744394302368164, |
|
"logps/chosen": -475.91485595703125, |
|
"logps/rejected": -379.3888244628906, |
|
"loss": 1.1195, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": 3.1028084754943848, |
|
"rewards/margins": 1.9994087219238281, |
|
"rewards/rejected": 1.1033999919891357, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.8966822755803527, |
|
"grad_norm": 51.022762298583984, |
|
"learning_rate": 1.5905924772774855e-05, |
|
"logits/chosen": -18.618383407592773, |
|
"logits/rejected": -19.63192367553711, |
|
"logps/chosen": -479.7633972167969, |
|
"logps/rejected": -392.1904602050781, |
|
"loss": 1.1562, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 2.1094658374786377, |
|
"rewards/margins": 1.7458257675170898, |
|
"rewards/rejected": 0.3636399209499359, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.9016638437780213, |
|
"grad_norm": 0.00022725010057911277, |
|
"learning_rate": 1.586374751315204e-05, |
|
"logits/chosen": -19.1419620513916, |
|
"logits/rejected": -20.252761840820312, |
|
"logps/chosen": -530.3628540039062, |
|
"logps/rejected": -461.61639404296875, |
|
"loss": 1.3509, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 3.2607533931732178, |
|
"rewards/margins": 1.972005009651184, |
|
"rewards/rejected": 1.2887482643127441, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.9066454119756899, |
|
"grad_norm": 120.93099212646484, |
|
"learning_rate": 1.5821410678962764e-05, |
|
"logits/chosen": -19.30841636657715, |
|
"logits/rejected": -20.281227111816406, |
|
"logps/chosen": -450.5699157714844, |
|
"logps/rejected": -398.7843017578125, |
|
"loss": 1.4407, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.621293067932129, |
|
"rewards/margins": 1.4806004762649536, |
|
"rewards/rejected": 1.1406925916671753, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.9116269801733585, |
|
"grad_norm": 0.014844976365566254, |
|
"learning_rate": 1.5778915422351102e-05, |
|
"logits/chosen": -18.85603141784668, |
|
"logits/rejected": -19.324058532714844, |
|
"logps/chosen": -493.2317199707031, |
|
"logps/rejected": -453.1470947265625, |
|
"loss": 0.913, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 4.14647102355957, |
|
"rewards/margins": 1.9552674293518066, |
|
"rewards/rejected": 2.1912038326263428, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.9166085483710272, |
|
"grad_norm": 90.2131576538086, |
|
"learning_rate": 1.5736262899772407e-05, |
|
"logits/chosen": -19.094078063964844, |
|
"logits/rejected": -20.168027877807617, |
|
"logps/chosen": -512.7498779296875, |
|
"logps/rejected": -448.6697692871094, |
|
"loss": 1.1494, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.183354139328003, |
|
"rewards/margins": 1.9273743629455566, |
|
"rewards/rejected": 0.2559796869754791, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.9215901165686958, |
|
"grad_norm": 0.30067598819732666, |
|
"learning_rate": 1.569345427196181e-05, |
|
"logits/chosen": -19.207042694091797, |
|
"logits/rejected": -20.777149200439453, |
|
"logps/chosen": -521.3990478515625, |
|
"logps/rejected": -436.8664855957031, |
|
"loss": 1.2902, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 2.0493509769439697, |
|
"rewards/margins": 1.9620305299758911, |
|
"rewards/rejected": 0.08732038736343384, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.9265716847663644, |
|
"grad_norm": 128.35350036621094, |
|
"learning_rate": 1.5650490703902666e-05, |
|
"logits/chosen": -19.485790252685547, |
|
"logits/rejected": -20.323007583618164, |
|
"logps/chosen": -496.7759094238281, |
|
"logps/rejected": -440.96759033203125, |
|
"loss": 1.4797, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": 0.3512002229690552, |
|
"rewards/margins": 1.2790648937225342, |
|
"rewards/rejected": -0.927864670753479, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.9315532529640331, |
|
"grad_norm": 23.355682373046875, |
|
"learning_rate": 1.5607373364794836e-05, |
|
"logits/chosen": -19.615062713623047, |
|
"logits/rejected": -20.484060287475586, |
|
"logps/chosen": -479.2476806640625, |
|
"logps/rejected": -415.4735412597656, |
|
"loss": 1.2546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.3234963417053223, |
|
"rewards/margins": 2.4520487785339355, |
|
"rewards/rejected": -1.1285524368286133, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.9365348211617017, |
|
"grad_norm": 41.07186508178711, |
|
"learning_rate": 1.5564103428022855e-05, |
|
"logits/chosen": -19.126056671142578, |
|
"logits/rejected": -19.900794982910156, |
|
"logps/chosen": -530.3897094726562, |
|
"logps/rejected": -466.9271545410156, |
|
"loss": 1.1304, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": 1.134138584136963, |
|
"rewards/margins": 2.5866689682006836, |
|
"rewards/rejected": -1.4525303840637207, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.9415163893593703, |
|
"grad_norm": 3.795400381088257, |
|
"learning_rate": 1.552068207112402e-05, |
|
"logits/chosen": -18.972984313964844, |
|
"logits/rejected": -19.319841384887695, |
|
"logps/chosen": -517.9902954101562, |
|
"logps/rejected": -463.4305725097656, |
|
"loss": 1.6333, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": 0.04141408950090408, |
|
"rewards/margins": 1.2581309080123901, |
|
"rewards/rejected": -1.2167168855667114, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.9464979575570389, |
|
"grad_norm": 0.2743411362171173, |
|
"learning_rate": 1.547711047575635e-05, |
|
"logits/chosen": -18.696569442749023, |
|
"logits/rejected": -19.06745719909668, |
|
"logps/chosen": -540.3345947265625, |
|
"logps/rejected": -484.9985656738281, |
|
"loss": 1.6534, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 2.271423816680908, |
|
"rewards/margins": 1.7851731777191162, |
|
"rewards/rejected": 0.48625069856643677, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.9514795257547076, |
|
"grad_norm": 27.537288665771484, |
|
"learning_rate": 1.543338982766639e-05, |
|
"logits/chosen": -18.664011001586914, |
|
"logits/rejected": -19.458066940307617, |
|
"logps/chosen": -518.22021484375, |
|
"logps/rejected": -418.8706359863281, |
|
"loss": 1.0862, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": 2.7771294116973877, |
|
"rewards/margins": 2.859891891479492, |
|
"rewards/rejected": -0.08276252448558807, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.9564610939523762, |
|
"grad_norm": 0.9008951783180237, |
|
"learning_rate": 1.5389521316656992e-05, |
|
"logits/chosen": -18.849185943603516, |
|
"logits/rejected": -19.619192123413086, |
|
"logps/chosen": -494.7686462402344, |
|
"logps/rejected": -408.1366882324219, |
|
"loss": 1.1158, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": 2.1583621501922607, |
|
"rewards/margins": 2.396536350250244, |
|
"rewards/rejected": -0.2381744235754013, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.9614426621500448, |
|
"grad_norm": 0.1889144629240036, |
|
"learning_rate": 1.5345506136554898e-05, |
|
"logits/chosen": -18.629066467285156, |
|
"logits/rejected": -19.594579696655273, |
|
"logps/chosen": -524.626708984375, |
|
"logps/rejected": -428.97344970703125, |
|
"loss": 1.1882, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 2.8290598392486572, |
|
"rewards/margins": 2.019918203353882, |
|
"rewards/rejected": 0.8091418743133545, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.9664242303477134, |
|
"grad_norm": 1.7383246421813965, |
|
"learning_rate": 1.5301345485178282e-05, |
|
"logits/chosen": -18.85825538635254, |
|
"logits/rejected": -19.791507720947266, |
|
"logps/chosen": -480.490234375, |
|
"logps/rejected": -380.1109619140625, |
|
"loss": 1.2643, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": 2.243396520614624, |
|
"rewards/margins": 1.7966461181640625, |
|
"rewards/rejected": 0.44675034284591675, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.9714057985453821, |
|
"grad_norm": 73.9409408569336, |
|
"learning_rate": 1.525704056430412e-05, |
|
"logits/chosen": -18.52286148071289, |
|
"logits/rejected": -18.89972496032715, |
|
"logps/chosen": -528.177001953125, |
|
"logps/rejected": -484.8839111328125, |
|
"loss": 1.5849, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 2.4241716861724854, |
|
"rewards/margins": 1.1765490770339966, |
|
"rewards/rejected": 1.2476229667663574, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.9763873667430507, |
|
"grad_norm": 34.82683563232422, |
|
"learning_rate": 1.5212592579635512e-05, |
|
"logits/chosen": -18.213794708251953, |
|
"logits/rejected": -19.081279754638672, |
|
"logps/chosen": -520.0574340820312, |
|
"logps/rejected": -446.2875061035156, |
|
"loss": 1.171, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 1.9521498680114746, |
|
"rewards/margins": 1.636811375617981, |
|
"rewards/rejected": 0.315338671207428, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.9813689349407193, |
|
"grad_norm": 8.361115455627441, |
|
"learning_rate": 1.5168002740768857e-05, |
|
"logits/chosen": -18.713205337524414, |
|
"logits/rejected": -19.391826629638672, |
|
"logps/chosen": -503.5995178222656, |
|
"logps/rejected": -472.6417236328125, |
|
"loss": 1.1879, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": 1.9274494647979736, |
|
"rewards/margins": 1.9319396018981934, |
|
"rewards/rejected": -0.00449012778699398, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.986350503138388, |
|
"grad_norm": 0.049951426684856415, |
|
"learning_rate": 1.512327226116094e-05, |
|
"logits/chosen": -19.126710891723633, |
|
"logits/rejected": -20.143205642700195, |
|
"logps/chosen": -521.0743408203125, |
|
"logps/rejected": -409.7085876464844, |
|
"loss": 1.2551, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 1.6364871263504028, |
|
"rewards/margins": 2.0774831771850586, |
|
"rewards/rejected": -0.44099605083465576, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.9913320713360566, |
|
"grad_norm": 0.5147112011909485, |
|
"learning_rate": 1.507840235809591e-05, |
|
"logits/chosen": -18.839317321777344, |
|
"logits/rejected": -19.660390853881836, |
|
"logps/chosen": -511.8052062988281, |
|
"logps/rejected": -445.6004638671875, |
|
"loss": 1.422, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 1.8398367166519165, |
|
"rewards/margins": 1.5229132175445557, |
|
"rewards/rejected": 0.3169235587120056, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.9963136395337252, |
|
"grad_norm": 19.235801696777344, |
|
"learning_rate": 1.503339425265215e-05, |
|
"logits/chosen": -18.8520450592041, |
|
"logits/rejected": -19.381229400634766, |
|
"logps/chosen": -479.9813537597656, |
|
"logps/rejected": -451.1719970703125, |
|
"loss": 1.5368, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": 1.7022852897644043, |
|
"rewards/margins": 1.5142347812652588, |
|
"rewards/rejected": 0.1880505383014679, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.9963136395337252, |
|
"eval_logits/chosen": -19.80689811706543, |
|
"eval_logits/rejected": -20.71694564819336, |
|
"eval_logps/chosen": -488.4328308105469, |
|
"eval_logps/rejected": -420.06622314453125, |
|
"eval_loss": 1.361470103263855, |
|
"eval_rewards/accuracies": 0.6511780023574829, |
|
"eval_rewards/chosen": 3.077153444290161, |
|
"eval_rewards/margins": 2.0428242683410645, |
|
"eval_rewards/rejected": 1.0343292951583862, |
|
"eval_runtime": 472.868, |
|
"eval_samples_per_second": 3.219, |
|
"eval_steps_per_second": 0.404, |
|
"step": 20000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 60222, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|