|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988571428571429, |
|
"eval_steps": 50, |
|
"global_step": 437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 8.11412987933583, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.66959810256958, |
|
"logits/rejected": -2.6077542304992676, |
|
"logps/chosen": -296.6876220703125, |
|
"logps/rejected": -254.7753448486328, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0003348872414790094, |
|
"rewards/margins": -0.00012643556692637503, |
|
"rewards/rejected": -0.0002084516454488039, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 9.934680850734814, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6891586780548096, |
|
"logits/rejected": -2.6237130165100098, |
|
"logps/chosen": -294.1405334472656, |
|
"logps/rejected": -254.05810546875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0007635392248630524, |
|
"rewards/margins": 0.0031762172002345324, |
|
"rewards/rejected": -0.0024126782082021236, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 8.46839233994518, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.657853364944458, |
|
"logits/rejected": -2.562720775604248, |
|
"logps/chosen": -285.6708984375, |
|
"logps/rejected": -247.06838989257812, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.012036588974297047, |
|
"rewards/margins": 0.01440697442740202, |
|
"rewards/rejected": -0.002370386151596904, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 10.38860192358711, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.6249475479125977, |
|
"logits/rejected": -2.5956408977508545, |
|
"logps/chosen": -269.8529357910156, |
|
"logps/rejected": -273.1573791503906, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.02421986497938633, |
|
"rewards/margins": 0.020207645371556282, |
|
"rewards/rejected": 0.004012218676507473, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 8.812178369158405, |
|
"learning_rate": 4.997124959943201e-07, |
|
"logits/chosen": -2.6135976314544678, |
|
"logits/rejected": -2.5756285190582275, |
|
"logps/chosen": -305.51312255859375, |
|
"logps/rejected": -271.22247314453125, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.010274273343384266, |
|
"rewards/margins": 0.067402184009552, |
|
"rewards/rejected": -0.0776764526963234, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"eval_logits/chosen": -2.569648265838623, |
|
"eval_logits/rejected": -2.4695067405700684, |
|
"eval_logps/chosen": -277.47930908203125, |
|
"eval_logps/rejected": -235.5894012451172, |
|
"eval_loss": 0.6465452313423157, |
|
"eval_rewards/accuracies": 0.693965494632721, |
|
"eval_rewards/chosen": -0.018753662705421448, |
|
"eval_rewards/margins": 0.1463788002729416, |
|
"eval_rewards/rejected": -0.16513246297836304, |
|
"eval_runtime": 90.334, |
|
"eval_samples_per_second": 20.269, |
|
"eval_steps_per_second": 0.321, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 11.6151396248973, |
|
"learning_rate": 4.979579212164186e-07, |
|
"logits/chosen": -2.6002144813537598, |
|
"logits/rejected": -2.501227855682373, |
|
"logps/chosen": -298.59063720703125, |
|
"logps/rejected": -302.35577392578125, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.047010406851768494, |
|
"rewards/margins": 0.16466036438941956, |
|
"rewards/rejected": -0.21167078614234924, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 14.811620243521006, |
|
"learning_rate": 4.946196886175515e-07, |
|
"logits/chosen": -2.6356024742126465, |
|
"logits/rejected": -2.5619795322418213, |
|
"logps/chosen": -280.7294921875, |
|
"logps/rejected": -274.32647705078125, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.16827444732189178, |
|
"rewards/margins": 0.20077195763587952, |
|
"rewards/rejected": -0.3690463900566101, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 16.993875634534675, |
|
"learning_rate": 4.897191188239667e-07, |
|
"logits/chosen": -2.642766237258911, |
|
"logits/rejected": -2.582462787628174, |
|
"logps/chosen": -321.19744873046875, |
|
"logps/rejected": -298.94171142578125, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0975220575928688, |
|
"rewards/margins": 0.27095723152160645, |
|
"rewards/rejected": -0.36847931146621704, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 17.671501985453162, |
|
"learning_rate": 4.832875107981763e-07, |
|
"logits/chosen": -2.4848644733428955, |
|
"logits/rejected": -2.404571533203125, |
|
"logps/chosen": -310.0569152832031, |
|
"logps/rejected": -313.3294982910156, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.345020592212677, |
|
"rewards/margins": 0.33231958746910095, |
|
"rewards/rejected": -0.6773402690887451, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 17.978608029535753, |
|
"learning_rate": 4.753659419387223e-07, |
|
"logits/chosen": -1.396976351737976, |
|
"logits/rejected": -1.3131816387176514, |
|
"logps/chosen": -309.3507385253906, |
|
"logps/rejected": -321.52056884765625, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6027094125747681, |
|
"rewards/margins": 0.35499778389930725, |
|
"rewards/rejected": -0.9577071070671082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"eval_logits/chosen": -1.3850308656692505, |
|
"eval_logits/rejected": -1.1914278268814087, |
|
"eval_logps/chosen": -320.03228759765625, |
|
"eval_logps/rejected": -321.1168518066406, |
|
"eval_loss": 0.5719701647758484, |
|
"eval_rewards/accuracies": 0.7456896305084229, |
|
"eval_rewards/chosen": -0.44428348541259766, |
|
"eval_rewards/margins": 0.5761240124702454, |
|
"eval_rewards/rejected": -1.0204075574874878, |
|
"eval_runtime": 89.9125, |
|
"eval_samples_per_second": 20.364, |
|
"eval_steps_per_second": 0.323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 30.473603379477254, |
|
"learning_rate": 4.660050057270191e-07, |
|
"logits/chosen": -1.5161502361297607, |
|
"logits/rejected": -1.4007251262664795, |
|
"logps/chosen": -326.7287292480469, |
|
"logps/rejected": -381.71234130859375, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47587689757347107, |
|
"rewards/margins": 0.4765087962150574, |
|
"rewards/rejected": -0.9523857235908508, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 16.07677825536776, |
|
"learning_rate": 4.5526448859687144e-07, |
|
"logits/chosen": -1.329465627670288, |
|
"logits/rejected": -1.2029626369476318, |
|
"logps/chosen": -313.04150390625, |
|
"logps/rejected": -344.30377197265625, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4427226483821869, |
|
"rewards/margins": 0.44687420129776, |
|
"rewards/rejected": -0.8895969390869141, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 19.25626895436831, |
|
"learning_rate": 4.432129880904388e-07, |
|
"logits/chosen": -1.0987221002578735, |
|
"logits/rejected": -0.7802125215530396, |
|
"logps/chosen": -344.36712646484375, |
|
"logps/rejected": -391.39154052734375, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5906578302383423, |
|
"rewards/margins": 0.5952860116958618, |
|
"rewards/rejected": -1.1859437227249146, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 32.057510195911, |
|
"learning_rate": 4.299274747394055e-07, |
|
"logits/chosen": -0.759337306022644, |
|
"logits/rejected": -0.5684966444969177, |
|
"logps/chosen": -352.746826171875, |
|
"logps/rejected": -371.2802734375, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.540864884853363, |
|
"rewards/margins": 0.43386125564575195, |
|
"rewards/rejected": -0.9747260808944702, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 31.167990046129457, |
|
"learning_rate": 4.1549280046953653e-07, |
|
"logits/chosen": -1.4192949533462524, |
|
"logits/rejected": -0.8910300135612488, |
|
"logps/chosen": -302.2892150878906, |
|
"logps/rejected": -339.9471435546875, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.45886653661727905, |
|
"rewards/margins": 0.62995845079422, |
|
"rewards/rejected": -1.0888248682022095, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"eval_logits/chosen": -1.355178952217102, |
|
"eval_logits/rejected": -0.6634992361068726, |
|
"eval_logps/chosen": -323.4683532714844, |
|
"eval_logps/rejected": -345.5482482910156, |
|
"eval_loss": 0.5466835498809814, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -0.4786438047885895, |
|
"eval_rewards/margins": 0.7860775589942932, |
|
"eval_rewards/rejected": -1.264721393585205, |
|
"eval_runtime": 90.391, |
|
"eval_samples_per_second": 20.256, |
|
"eval_steps_per_second": 0.321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 24.250974026024966, |
|
"learning_rate": 4.000011566683401e-07, |
|
"logits/chosen": -1.3669617176055908, |
|
"logits/rejected": -0.9052613377571106, |
|
"logps/chosen": -333.5279846191406, |
|
"logps/rejected": -360.4810791015625, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6962090730667114, |
|
"rewards/margins": 0.5538384318351746, |
|
"rewards/rejected": -1.2500474452972412, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38857142857142857, |
|
"grad_norm": 24.704591344446357, |
|
"learning_rate": 3.8355148537705047e-07, |
|
"logits/chosen": -1.3166093826293945, |
|
"logits/rejected": -0.7781628966331482, |
|
"logps/chosen": -343.19940185546875, |
|
"logps/rejected": -390.14239501953125, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6181753873825073, |
|
"rewards/margins": 0.671627402305603, |
|
"rewards/rejected": -1.2898027896881104, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4114285714285714, |
|
"grad_norm": 22.05889500026718, |
|
"learning_rate": 3.662488473675315e-07, |
|
"logits/chosen": -1.254248023033142, |
|
"logits/rejected": -0.8255653381347656, |
|
"logps/chosen": -341.5926818847656, |
|
"logps/rejected": -382.6631774902344, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7656871676445007, |
|
"rewards/margins": 0.5109010338783264, |
|
"rewards/rejected": -1.2765882015228271, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4342857142857143, |
|
"grad_norm": 23.909926187935003, |
|
"learning_rate": 3.48203751140067e-07, |
|
"logits/chosen": -1.0761396884918213, |
|
"logits/rejected": -0.6647660136222839, |
|
"logps/chosen": -325.56683349609375, |
|
"logps/rejected": -379.4165344238281, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6315457820892334, |
|
"rewards/margins": 0.5936378240585327, |
|
"rewards/rejected": -1.2251836061477661, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 20.73701530382175, |
|
"learning_rate": 3.2953144712759537e-07, |
|
"logits/chosen": -0.5213090181350708, |
|
"logits/rejected": 0.013022899627685547, |
|
"logps/chosen": -359.0880432128906, |
|
"logps/rejected": -410.6299743652344, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.927217960357666, |
|
"rewards/margins": 0.6191995739936829, |
|
"rewards/rejected": -1.5464175939559937, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"eval_logits/chosen": -1.0376836061477661, |
|
"eval_logits/rejected": -0.08949049562215805, |
|
"eval_logps/chosen": -327.8020935058594, |
|
"eval_logps/rejected": -357.148681640625, |
|
"eval_loss": 0.5396182537078857, |
|
"eval_rewards/accuracies": 0.7715517282485962, |
|
"eval_rewards/chosen": -0.5219810605049133, |
|
"eval_rewards/margins": 0.8587445020675659, |
|
"eval_rewards/rejected": -1.380725622177124, |
|
"eval_runtime": 90.1866, |
|
"eval_samples_per_second": 20.302, |
|
"eval_steps_per_second": 0.322, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 23.237204035063847, |
|
"learning_rate": 3.103511916141658e-07, |
|
"logits/chosen": -0.9574594497680664, |
|
"logits/rejected": -0.16778725385665894, |
|
"logps/chosen": -348.38385009765625, |
|
"logps/rejected": -397.10321044921875, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.686768651008606, |
|
"rewards/margins": 0.7565571665763855, |
|
"rewards/rejected": -1.4433258771896362, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5028571428571429, |
|
"grad_norm": 19.18259747527528, |
|
"learning_rate": 2.9078548506882117e-07, |
|
"logits/chosen": -0.37646159529685974, |
|
"logits/rejected": 0.516203761100769, |
|
"logps/chosen": -383.25811767578125, |
|
"logps/rejected": -413.0987243652344, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8871996998786926, |
|
"rewards/margins": 0.6418129205703735, |
|
"rewards/rejected": -1.529012680053711, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5257142857142857, |
|
"grad_norm": 21.792119076799803, |
|
"learning_rate": 2.709592897595191e-07, |
|
"logits/chosen": -0.94196617603302, |
|
"logits/rejected": -0.2367326021194458, |
|
"logps/chosen": -343.68157958984375, |
|
"logps/rejected": -391.78729248046875, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.583425760269165, |
|
"rewards/margins": 0.6397222280502319, |
|
"rewards/rejected": -1.2231481075286865, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5485714285714286, |
|
"grad_norm": 23.250665470567046, |
|
"learning_rate": 2.509992316440332e-07, |
|
"logits/chosen": -0.7590802907943726, |
|
"logits/rejected": 0.23232534527778625, |
|
"logps/chosen": -375.7669982910156, |
|
"logps/rejected": -393.1439208984375, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.661091685295105, |
|
"rewards/margins": 0.7419286370277405, |
|
"rewards/rejected": -1.4030205011367798, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 23.283347220975337, |
|
"learning_rate": 2.3103279163519918e-07, |
|
"logits/chosen": 0.3902924656867981, |
|
"logits/rejected": 1.2702767848968506, |
|
"logps/chosen": -430.42510986328125, |
|
"logps/rejected": -429.90240478515625, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4317457675933838, |
|
"rewards/margins": 0.5207899808883667, |
|
"rewards/rejected": -1.95253586769104, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"eval_logits/chosen": 0.2878158688545227, |
|
"eval_logits/rejected": 1.5253632068634033, |
|
"eval_logps/chosen": -384.02301025390625, |
|
"eval_logps/rejected": -415.1577453613281, |
|
"eval_loss": 0.5404527187347412, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -1.0841907262802124, |
|
"eval_rewards/margins": 0.8766254782676697, |
|
"eval_rewards/rejected": -1.9608159065246582, |
|
"eval_runtime": 91.5614, |
|
"eval_samples_per_second": 19.998, |
|
"eval_steps_per_second": 0.317, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5942857142857143, |
|
"grad_norm": 28.334390897274133, |
|
"learning_rate": 2.1118749140573358e-07, |
|
"logits/chosen": -0.2290700227022171, |
|
"logits/rejected": 0.8288987278938293, |
|
"logps/chosen": -408.00201416015625, |
|
"logps/rejected": -452.023681640625, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9032201766967773, |
|
"rewards/margins": 0.8297585248947144, |
|
"rewards/rejected": -1.7329788208007812, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6171428571428571, |
|
"grad_norm": 23.854460344353054, |
|
"learning_rate": 1.9159007893272703e-07, |
|
"logits/chosen": -0.17421701550483704, |
|
"logits/rejected": 0.6511275172233582, |
|
"logps/chosen": -342.1952209472656, |
|
"logps/rejected": -396.54083251953125, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7613161206245422, |
|
"rewards/margins": 0.7101620435714722, |
|
"rewards/rejected": -1.4714782238006592, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 27.414501100794606, |
|
"learning_rate": 1.7236571898357766e-07, |
|
"logits/chosen": -0.08564956486225128, |
|
"logits/rejected": 0.9427372813224792, |
|
"logps/chosen": -367.49407958984375, |
|
"logps/rejected": -409.3499755859375, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8473467826843262, |
|
"rewards/margins": 0.8089747428894043, |
|
"rewards/rejected": -1.6563212871551514, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6628571428571428, |
|
"grad_norm": 23.166387688948994, |
|
"learning_rate": 1.5363719371356882e-07, |
|
"logits/chosen": 0.06339935958385468, |
|
"logits/rejected": 0.5719184875488281, |
|
"logps/chosen": -360.3900451660156, |
|
"logps/rejected": -409.3319091796875, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9427051544189453, |
|
"rewards/margins": 0.5575781464576721, |
|
"rewards/rejected": -1.500283122062683, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 28.449941474840944, |
|
"learning_rate": 1.3552411848071565e-07, |
|
"logits/chosen": -0.3682107627391815, |
|
"logits/rejected": 0.5333132743835449, |
|
"logps/chosen": -333.92449951171875, |
|
"logps/rejected": -425.3599548339844, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7675741314888, |
|
"rewards/margins": 0.9044278264045715, |
|
"rewards/rejected": -1.6720020771026611, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"eval_logits/chosen": -0.21915225684642792, |
|
"eval_logits/rejected": 0.9934114813804626, |
|
"eval_logps/chosen": -349.8433837890625, |
|
"eval_logps/rejected": -392.549560546875, |
|
"eval_loss": 0.5274777412414551, |
|
"eval_rewards/accuracies": 0.7715517282485962, |
|
"eval_rewards/chosen": -0.7423940300941467, |
|
"eval_rewards/margins": 0.9923400282859802, |
|
"eval_rewards/rejected": -1.734734058380127, |
|
"eval_runtime": 90.6157, |
|
"eval_samples_per_second": 20.206, |
|
"eval_steps_per_second": 0.32, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7085714285714285, |
|
"grad_norm": 21.530490280501045, |
|
"learning_rate": 1.1814217788631473e-07, |
|
"logits/chosen": -0.17677690088748932, |
|
"logits/rejected": 0.3225722908973694, |
|
"logps/chosen": -356.5892639160156, |
|
"logps/rejected": -431.662109375, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8150566816329956, |
|
"rewards/margins": 0.6877792477607727, |
|
"rewards/rejected": -1.502835988998413, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7314285714285714, |
|
"grad_norm": 26.395907969219987, |
|
"learning_rate": 1.0160238692045331e-07, |
|
"logits/chosen": -0.2509198486804962, |
|
"logits/rejected": 0.4886396527290344, |
|
"logps/chosen": -359.4185485839844, |
|
"logps/rejected": -413.3671875, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8422183990478516, |
|
"rewards/margins": 0.7798849940299988, |
|
"rewards/rejected": -1.6221033334732056, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7542857142857143, |
|
"grad_norm": 25.409253983308545, |
|
"learning_rate": 8.601038193139438e-08, |
|
"logits/chosen": -0.13014790415763855, |
|
"logits/rejected": 0.5298113822937012, |
|
"logps/chosen": -378.82012939453125, |
|
"logps/rejected": -402.17156982421875, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.973560631275177, |
|
"rewards/margins": 0.6031589508056641, |
|
"rewards/rejected": -1.5767196416854858, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7771428571428571, |
|
"grad_norm": 26.91341277336219, |
|
"learning_rate": 7.146574594727572e-08, |
|
"logits/chosen": -0.4229533076286316, |
|
"logits/rejected": 0.5619645714759827, |
|
"logps/chosen": -363.5431213378906, |
|
"logps/rejected": -404.77935791015625, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8485255241394043, |
|
"rewards/margins": 0.8047366142272949, |
|
"rewards/rejected": -1.6532618999481201, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 24.565353732521466, |
|
"learning_rate": 5.8061372659157306e-08, |
|
"logits/chosen": -0.17489977180957794, |
|
"logits/rejected": 0.7508963346481323, |
|
"logps/chosen": -371.7098693847656, |
|
"logps/rejected": -395.7353210449219, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8898698091506958, |
|
"rewards/margins": 0.6660181879997253, |
|
"rewards/rejected": -1.5558879375457764, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -0.2696850597858429, |
|
"eval_logits/rejected": 0.9007923007011414, |
|
"eval_logps/chosen": -351.73101806640625, |
|
"eval_logps/rejected": -391.03973388671875, |
|
"eval_loss": 0.5234382748603821, |
|
"eval_rewards/accuracies": 0.7586206793785095, |
|
"eval_rewards/chosen": -0.76127028465271, |
|
"eval_rewards/margins": 0.9583660364151001, |
|
"eval_rewards/rejected": -1.71963632106781, |
|
"eval_runtime": 90.1922, |
|
"eval_samples_per_second": 20.301, |
|
"eval_steps_per_second": 0.322, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8228571428571428, |
|
"grad_norm": 26.528490406026805, |
|
"learning_rate": 4.5882873127531614e-08, |
|
"logits/chosen": -0.18714679777622223, |
|
"logits/rejected": 0.5642833113670349, |
|
"logps/chosen": -389.9281921386719, |
|
"logps/rejected": -425.6780700683594, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8513727188110352, |
|
"rewards/margins": 0.6271126866340637, |
|
"rewards/rejected": -1.478485345840454, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8457142857142858, |
|
"grad_norm": 26.160208453826513, |
|
"learning_rate": 3.500802900154412e-08, |
|
"logits/chosen": -0.11366554349660873, |
|
"logits/rejected": 0.7036272287368774, |
|
"logps/chosen": -315.89349365234375, |
|
"logps/rejected": -380.2809143066406, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7633152008056641, |
|
"rewards/margins": 0.7076437473297119, |
|
"rewards/rejected": -1.470958948135376, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8685714285714285, |
|
"grad_norm": 22.845593504615394, |
|
"learning_rate": 2.550629574310309e-08, |
|
"logits/chosen": -0.2266564667224884, |
|
"logits/rejected": 0.5173102021217346, |
|
"logps/chosen": -349.7990417480469, |
|
"logps/rejected": -402.1055603027344, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7807506918907166, |
|
"rewards/margins": 0.665580689907074, |
|
"rewards/rejected": -1.4463313817977905, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8914285714285715, |
|
"grad_norm": 22.30415183791987, |
|
"learning_rate": 1.7438359028687983e-08, |
|
"logits/chosen": -0.18654844164848328, |
|
"logits/rejected": 0.17555546760559082, |
|
"logps/chosen": -343.80609130859375, |
|
"logps/rejected": -413.258544921875, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8466179966926575, |
|
"rewards/margins": 0.5847481489181519, |
|
"rewards/rejected": -1.431365966796875, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 25.36293321655948, |
|
"learning_rate": 1.0855747162029361e-08, |
|
"logits/chosen": -0.39323678612709045, |
|
"logits/rejected": 0.6321589946746826, |
|
"logps/chosen": -364.44781494140625, |
|
"logps/rejected": -393.2674865722656, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7503901720046997, |
|
"rewards/margins": 0.7446303963661194, |
|
"rewards/rejected": -1.4950206279754639, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"eval_logits/chosen": -0.3663737177848816, |
|
"eval_logits/rejected": 0.7649080157279968, |
|
"eval_logps/chosen": -345.4830017089844, |
|
"eval_logps/rejected": -382.0352478027344, |
|
"eval_loss": 0.5226701498031616, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -0.6987906098365784, |
|
"eval_rewards/margins": 0.9308006763458252, |
|
"eval_rewards/rejected": -1.6295913457870483, |
|
"eval_runtime": 91.5738, |
|
"eval_samples_per_second": 19.995, |
|
"eval_steps_per_second": 0.317, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9371428571428572, |
|
"grad_norm": 21.80106996482806, |
|
"learning_rate": 5.8005019731033615e-09, |
|
"logits/chosen": -0.4477500021457672, |
|
"logits/rejected": 0.4871234893798828, |
|
"logps/chosen": -340.3522033691406, |
|
"logps/rejected": -401.26629638671875, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6757135391235352, |
|
"rewards/margins": 0.9294708371162415, |
|
"rewards/rejected": -1.6051843166351318, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 23.103516202896476, |
|
"learning_rate": 2.3049103053431886e-09, |
|
"logits/chosen": -0.3995040953159332, |
|
"logits/rejected": 0.3316659927368164, |
|
"logps/chosen": -386.6695861816406, |
|
"logps/rejected": -383.6183776855469, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7931796312332153, |
|
"rewards/margins": 0.6191332936286926, |
|
"rewards/rejected": -1.4123131036758423, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9828571428571429, |
|
"grad_norm": 22.91261742161674, |
|
"learning_rate": 3.9129780600541397e-10, |
|
"logits/chosen": -0.2376430332660675, |
|
"logits/rejected": 0.6147540807723999, |
|
"logps/chosen": -368.62451171875, |
|
"logps/rejected": -394.5757751464844, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8315681219100952, |
|
"rewards/margins": 0.7056232690811157, |
|
"rewards/rejected": -1.53719162940979, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9988571428571429, |
|
"step": 437, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5600041279258117, |
|
"train_runtime": 11271.3049, |
|
"train_samples_per_second": 4.968, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 437, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|