|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 8.180421521208332, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.6708250045776367, |
|
"logits/rejected": -2.6100573539733887, |
|
"logps/chosen": -280.91131591796875, |
|
"logps/rejected": -254.9091033935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 2.8257753001525998e-05, |
|
"rewards/margins": -1.8637976609170437e-05, |
|
"rewards/rejected": 4.689567140303552e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 10.108880291822361, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6290431022644043, |
|
"logits/rejected": -2.5482747554779053, |
|
"logps/chosen": -295.01123046875, |
|
"logps/rejected": -235.47012329101562, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0019573778845369816, |
|
"rewards/margins": 0.002028578193858266, |
|
"rewards/rejected": -7.120029476936907e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 7.7116147316900845, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6313440799713135, |
|
"logits/rejected": -2.608391046524048, |
|
"logps/chosen": -299.28961181640625, |
|
"logps/rejected": -286.8223571777344, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.011437652632594109, |
|
"rewards/margins": 0.009487219154834747, |
|
"rewards/rejected": 0.0019504327792674303, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 8.453965686447793, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.6830360889434814, |
|
"logits/rejected": -2.6185193061828613, |
|
"logps/chosen": -314.0823669433594, |
|
"logps/rejected": -274.99749755859375, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.04395774006843567, |
|
"rewards/margins": 0.034831658005714417, |
|
"rewards/rejected": 0.009126082062721252, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 8.520035411779036, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.5832035541534424, |
|
"logits/rejected": -2.560774564743042, |
|
"logps/chosen": -294.65460205078125, |
|
"logps/rejected": -279.75335693359375, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.010480286553502083, |
|
"rewards/margins": 0.08574546873569489, |
|
"rewards/rejected": -0.07526517659425735, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.516770124435425, |
|
"eval_logits/rejected": -2.4140138626098633, |
|
"eval_logps/chosen": -289.93359375, |
|
"eval_logps/rejected": -261.017333984375, |
|
"eval_loss": 0.6482123136520386, |
|
"eval_rewards/accuracies": 0.642241358757019, |
|
"eval_rewards/chosen": -0.059276144951581955, |
|
"eval_rewards/margins": 0.09442207962274551, |
|
"eval_rewards/rejected": -0.15369825065135956, |
|
"eval_runtime": 91.2745, |
|
"eval_samples_per_second": 19.918, |
|
"eval_steps_per_second": 0.318, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 15.433607591426341, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.520766496658325, |
|
"logits/rejected": -2.4426732063293457, |
|
"logps/chosen": -295.76214599609375, |
|
"logps/rejected": -292.59765625, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0482044480741024, |
|
"rewards/margins": 0.11990388482809067, |
|
"rewards/rejected": -0.16810834407806396, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 14.264817414504142, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.4215266704559326, |
|
"logits/rejected": -2.30562162399292, |
|
"logps/chosen": -306.8285217285156, |
|
"logps/rejected": -274.642333984375, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.15291962027549744, |
|
"rewards/margins": 0.16322971880435944, |
|
"rewards/rejected": -0.3161493241786957, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 13.495382013613375, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.31144380569458, |
|
"logits/rejected": -2.2500224113464355, |
|
"logps/chosen": -320.6394348144531, |
|
"logps/rejected": -311.4682922363281, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4840007424354553, |
|
"rewards/margins": 0.20117254555225372, |
|
"rewards/rejected": -0.6851732134819031, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 15.127866867375296, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -1.7405483722686768, |
|
"logits/rejected": -1.5851049423217773, |
|
"logps/chosen": -324.17535400390625, |
|
"logps/rejected": -355.6795959472656, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6825101971626282, |
|
"rewards/margins": 0.29254621267318726, |
|
"rewards/rejected": -0.9750563502311707, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 18.548127952359756, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -1.1513614654541016, |
|
"logits/rejected": -0.8654650449752808, |
|
"logps/chosen": -322.5659484863281, |
|
"logps/rejected": -328.91851806640625, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3547930121421814, |
|
"rewards/margins": 0.43715816736221313, |
|
"rewards/rejected": -0.7919511795043945, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": -0.635834276676178, |
|
"eval_logits/rejected": -0.16341222822666168, |
|
"eval_logps/chosen": -332.67840576171875, |
|
"eval_logps/rejected": -346.1421813964844, |
|
"eval_loss": 0.5831290483474731, |
|
"eval_rewards/accuracies": 0.693965494632721, |
|
"eval_rewards/chosen": -0.48672425746917725, |
|
"eval_rewards/margins": 0.5182227492332458, |
|
"eval_rewards/rejected": -1.0049471855163574, |
|
"eval_runtime": 92.7607, |
|
"eval_samples_per_second": 19.599, |
|
"eval_steps_per_second": 0.313, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 21.86823905005592, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -0.5007633566856384, |
|
"logits/rejected": -0.005487987305969, |
|
"logps/chosen": -308.6052551269531, |
|
"logps/rejected": -346.922607421875, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43811899423599243, |
|
"rewards/margins": 0.6061130166053772, |
|
"rewards/rejected": -1.0442321300506592, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 25.537041781100793, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": -0.5123111605644226, |
|
"logits/rejected": 0.09549371898174286, |
|
"logps/chosen": -376.27618408203125, |
|
"logps/rejected": -340.7986145019531, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5627577304840088, |
|
"rewards/margins": 0.48137393593788147, |
|
"rewards/rejected": -1.0441316366195679, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 23.843161239454492, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": -0.09251334518194199, |
|
"logits/rejected": 0.23536929488182068, |
|
"logps/chosen": -317.888427734375, |
|
"logps/rejected": -373.44952392578125, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7276519536972046, |
|
"rewards/margins": 0.4391087591648102, |
|
"rewards/rejected": -1.1667606830596924, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 31.20904640703908, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": -0.3974105715751648, |
|
"logits/rejected": 0.24647757411003113, |
|
"logps/chosen": -351.69537353515625, |
|
"logps/rejected": -372.2206115722656, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6338341236114502, |
|
"rewards/margins": 0.5683634877204895, |
|
"rewards/rejected": -1.202197551727295, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 24.781311573851625, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": -0.8739379048347473, |
|
"logits/rejected": -0.45592212677001953, |
|
"logps/chosen": -330.25274658203125, |
|
"logps/rejected": -367.8900146484375, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5034725069999695, |
|
"rewards/margins": 0.5031090974807739, |
|
"rewards/rejected": -1.0065815448760986, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": -1.00446355342865, |
|
"eval_logits/rejected": -0.24214474856853485, |
|
"eval_logps/chosen": -326.3860778808594, |
|
"eval_logps/rejected": -345.77154541015625, |
|
"eval_loss": 0.5497105717658997, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -0.42380115389823914, |
|
"eval_rewards/margins": 0.577439546585083, |
|
"eval_rewards/rejected": -1.001240611076355, |
|
"eval_runtime": 92.0532, |
|
"eval_samples_per_second": 19.749, |
|
"eval_steps_per_second": 0.315, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 22.924194135406648, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": -0.7640997767448425, |
|
"logits/rejected": 0.009471100755035877, |
|
"logps/chosen": -348.05657958984375, |
|
"logps/rejected": -402.06524658203125, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5906395316123962, |
|
"rewards/margins": 0.6577303409576416, |
|
"rewards/rejected": -1.248369812965393, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 20.23639815537178, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": -0.6523529291152954, |
|
"logits/rejected": -0.03978896886110306, |
|
"logps/chosen": -377.78094482421875, |
|
"logps/rejected": -413.15313720703125, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7973796129226685, |
|
"rewards/margins": 0.6492636799812317, |
|
"rewards/rejected": -1.446643352508545, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 18.90429435262536, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": -0.7708563208580017, |
|
"logits/rejected": -0.08572294563055038, |
|
"logps/chosen": -347.65289306640625, |
|
"logps/rejected": -362.48541259765625, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7364877462387085, |
|
"rewards/margins": 0.6042462587356567, |
|
"rewards/rejected": -1.3407340049743652, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 31.220611180756855, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": -0.38854315876960754, |
|
"logits/rejected": 0.21274462342262268, |
|
"logps/chosen": -350.7613525390625, |
|
"logps/rejected": -399.95086669921875, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.8297553062438965, |
|
"rewards/margins": 0.6977420449256897, |
|
"rewards/rejected": -1.5274971723556519, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 30.74591682430673, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": 0.04708825796842575, |
|
"logits/rejected": 0.7175595760345459, |
|
"logps/chosen": -389.9985656738281, |
|
"logps/rejected": -427.6483459472656, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0787255764007568, |
|
"rewards/margins": 0.6673825979232788, |
|
"rewards/rejected": -1.7461086511611938, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": -0.24332188069820404, |
|
"eval_logits/rejected": 0.630587100982666, |
|
"eval_logps/chosen": -360.6939392089844, |
|
"eval_logps/rejected": -391.9894714355469, |
|
"eval_loss": 0.5398454666137695, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -0.7668798565864563, |
|
"eval_rewards/margins": 0.6965396404266357, |
|
"eval_rewards/rejected": -1.4634195566177368, |
|
"eval_runtime": 92.2093, |
|
"eval_samples_per_second": 19.716, |
|
"eval_steps_per_second": 0.315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 20.053636726221647, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": -0.5000969171524048, |
|
"logits/rejected": 0.287570059299469, |
|
"logps/chosen": -364.197021484375, |
|
"logps/rejected": -397.1741943359375, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5910893082618713, |
|
"rewards/margins": 0.7975226044654846, |
|
"rewards/rejected": -1.388611912727356, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 29.09938855135104, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": -0.06796761602163315, |
|
"logits/rejected": 0.6887415647506714, |
|
"logps/chosen": -362.93719482421875, |
|
"logps/rejected": -396.042724609375, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6787573099136353, |
|
"rewards/margins": 0.7150012850761414, |
|
"rewards/rejected": -1.3937586545944214, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 30.18521722411796, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": 0.06652301549911499, |
|
"logits/rejected": 0.9086186289787292, |
|
"logps/chosen": -363.90216064453125, |
|
"logps/rejected": -403.9359436035156, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8593271970748901, |
|
"rewards/margins": 0.7014445066452026, |
|
"rewards/rejected": -1.5607718229293823, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 27.269767101203968, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.38139885663986206, |
|
"logits/rejected": 0.7918380498886108, |
|
"logps/chosen": -391.44293212890625, |
|
"logps/rejected": -436.5362243652344, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.770719051361084, |
|
"rewards/margins": 0.832466721534729, |
|
"rewards/rejected": -1.6031854152679443, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 25.963088610849898, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": 0.0945362076163292, |
|
"logits/rejected": 1.0674773454666138, |
|
"logps/chosen": -381.2936706542969, |
|
"logps/rejected": -445.334716796875, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8827805519104004, |
|
"rewards/margins": 0.6815775036811829, |
|
"rewards/rejected": -1.564357876777649, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": 0.15348348021507263, |
|
"eval_logits/rejected": 1.1693741083145142, |
|
"eval_logps/chosen": -374.9230651855469, |
|
"eval_logps/rejected": -410.466064453125, |
|
"eval_loss": 0.5334360599517822, |
|
"eval_rewards/accuracies": 0.7370689511299133, |
|
"eval_rewards/chosen": -0.9091711640357971, |
|
"eval_rewards/margins": 0.7390145659446716, |
|
"eval_rewards/rejected": -1.6481858491897583, |
|
"eval_runtime": 91.8172, |
|
"eval_samples_per_second": 19.8, |
|
"eval_steps_per_second": 0.316, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 24.61101929030597, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": 0.17619961500167847, |
|
"logits/rejected": 0.9847076535224915, |
|
"logps/chosen": -403.1936340332031, |
|
"logps/rejected": -437.13897705078125, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0121877193450928, |
|
"rewards/margins": 0.678584098815918, |
|
"rewards/rejected": -1.6907718181610107, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 20.367684952689917, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": -0.2150786817073822, |
|
"logits/rejected": 0.4295934736728668, |
|
"logps/chosen": -364.59625244140625, |
|
"logps/rejected": -382.64544677734375, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7735622525215149, |
|
"rewards/margins": 0.5857953429222107, |
|
"rewards/rejected": -1.3593575954437256, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 22.118134031317915, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": 0.00539786834269762, |
|
"logits/rejected": 1.0036160945892334, |
|
"logps/chosen": -326.5462341308594, |
|
"logps/rejected": -401.6683654785156, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7456392645835876, |
|
"rewards/margins": 0.8400524854660034, |
|
"rewards/rejected": -1.5856916904449463, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 25.56546771816306, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": 0.18758408725261688, |
|
"logits/rejected": 1.4861918687820435, |
|
"logps/chosen": -357.71759033203125, |
|
"logps/rejected": -386.7574768066406, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7674424648284912, |
|
"rewards/margins": 0.8558618426322937, |
|
"rewards/rejected": -1.6233041286468506, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 24.341607485547637, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": 0.2733023762702942, |
|
"logits/rejected": 1.3938772678375244, |
|
"logps/chosen": -372.9563903808594, |
|
"logps/rejected": -409.0280456542969, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7974545955657959, |
|
"rewards/margins": 0.7862997055053711, |
|
"rewards/rejected": -1.583754301071167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": 0.05302007496356964, |
|
"eval_logits/rejected": 1.1201387643814087, |
|
"eval_logps/chosen": -354.7213439941406, |
|
"eval_logps/rejected": -390.7056579589844, |
|
"eval_loss": 0.5226743817329407, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -0.7071539163589478, |
|
"eval_rewards/margins": 0.7434275150299072, |
|
"eval_rewards/rejected": -1.450581431388855, |
|
"eval_runtime": 91.9962, |
|
"eval_samples_per_second": 19.762, |
|
"eval_steps_per_second": 0.315, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 23.93721697368582, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": 0.01756982132792473, |
|
"logits/rejected": 0.9820684194564819, |
|
"logps/chosen": -369.3329772949219, |
|
"logps/rejected": -423.09552001953125, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7456832528114319, |
|
"rewards/margins": 0.7584327459335327, |
|
"rewards/rejected": -1.5041159391403198, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 27.593197014560943, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": 0.2417779266834259, |
|
"logits/rejected": 1.0749890804290771, |
|
"logps/chosen": -362.8450927734375, |
|
"logps/rejected": -409.4270324707031, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.771125316619873, |
|
"rewards/margins": 0.7913394570350647, |
|
"rewards/rejected": -1.5624648332595825, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 24.639500410291397, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": 0.31850242614746094, |
|
"logits/rejected": 1.264216423034668, |
|
"logps/chosen": -382.9512634277344, |
|
"logps/rejected": -421.80419921875, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9504399299621582, |
|
"rewards/margins": 0.6338992118835449, |
|
"rewards/rejected": -1.5843393802642822, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 27.22903391760232, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": 0.4879975914955139, |
|
"logits/rejected": 1.7275480031967163, |
|
"logps/chosen": -377.9282531738281, |
|
"logps/rejected": -405.58038330078125, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9674369692802429, |
|
"rewards/margins": 0.7435644865036011, |
|
"rewards/rejected": -1.7110016345977783, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 23.43707551698937, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": 0.2636318504810333, |
|
"logits/rejected": 1.2225837707519531, |
|
"logps/chosen": -377.68994140625, |
|
"logps/rejected": -407.3072814941406, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6589912176132202, |
|
"rewards/margins": 0.8298226594924927, |
|
"rewards/rejected": -1.4888139963150024, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": 0.2474285364151001, |
|
"eval_logits/rejected": 1.3205074071884155, |
|
"eval_logps/chosen": -361.77734375, |
|
"eval_logps/rejected": -398.4795837402344, |
|
"eval_loss": 0.5173320770263672, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -0.777714192867279, |
|
"eval_rewards/margins": 0.7506070137023926, |
|
"eval_rewards/rejected": -1.528321385383606, |
|
"eval_runtime": 91.2793, |
|
"eval_samples_per_second": 19.917, |
|
"eval_steps_per_second": 0.318, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 24.131364526405257, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": 0.3964577317237854, |
|
"logits/rejected": 1.2822027206420898, |
|
"logps/chosen": -376.3506164550781, |
|
"logps/rejected": -434.9208068847656, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.8864477276802063, |
|
"rewards/margins": 0.8174076080322266, |
|
"rewards/rejected": -1.703855276107788, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 24.753082391706794, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": 0.3074328601360321, |
|
"logits/rejected": 1.4143335819244385, |
|
"logps/chosen": -383.6210632324219, |
|
"logps/rejected": -398.04010009765625, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8147560954093933, |
|
"rewards/margins": 0.836268424987793, |
|
"rewards/rejected": -1.651024580001831, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 24.185031971952714, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": 0.6229906678199768, |
|
"logits/rejected": 1.3948328495025635, |
|
"logps/chosen": -357.06268310546875, |
|
"logps/rejected": -401.7029724121094, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9318356513977051, |
|
"rewards/margins": 0.685466468334198, |
|
"rewards/rejected": -1.6173019409179688, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 25.966162156707668, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": 0.46141189336776733, |
|
"logits/rejected": 1.5275745391845703, |
|
"logps/chosen": -373.74420166015625, |
|
"logps/rejected": -420.73968505859375, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8795710802078247, |
|
"rewards/margins": 0.8060371279716492, |
|
"rewards/rejected": -1.685608148574829, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 22.442914644150154, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": 0.351333886384964, |
|
"logits/rejected": 1.4142162799835205, |
|
"logps/chosen": -361.7117004394531, |
|
"logps/rejected": -423.2845764160156, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.775847315788269, |
|
"rewards/margins": 0.930949866771698, |
|
"rewards/rejected": -1.7067972421646118, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": 0.3004731237888336, |
|
"eval_logits/rejected": 1.3740564584732056, |
|
"eval_logps/chosen": -365.25396728515625, |
|
"eval_logps/rejected": -402.4430847167969, |
|
"eval_loss": 0.5178534984588623, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -0.8124799132347107, |
|
"eval_rewards/margins": 0.7554758191108704, |
|
"eval_rewards/rejected": -1.567955493927002, |
|
"eval_runtime": 92.4415, |
|
"eval_samples_per_second": 19.666, |
|
"eval_steps_per_second": 0.314, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 22.619715289441434, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": 0.658234715461731, |
|
"logits/rejected": 1.4394605159759521, |
|
"logps/chosen": -335.9058837890625, |
|
"logps/rejected": -414.93212890625, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.927649199962616, |
|
"rewards/margins": 0.7706761360168457, |
|
"rewards/rejected": -1.6983253955841064, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 25.83898128993406, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": 0.38683071732521057, |
|
"logits/rejected": 1.4439704418182373, |
|
"logps/chosen": -358.71893310546875, |
|
"logps/rejected": -380.32879638671875, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8638426065444946, |
|
"rewards/margins": 0.7388638257980347, |
|
"rewards/rejected": -1.6027065515518188, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 22.9017291219974, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": 0.3319759964942932, |
|
"logits/rejected": 1.3225994110107422, |
|
"logps/chosen": -365.75030517578125, |
|
"logps/rejected": -427.2379455566406, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9414383172988892, |
|
"rewards/margins": 0.7166720628738403, |
|
"rewards/rejected": -1.6581103801727295, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5596197334998244, |
|
"train_runtime": 11808.0241, |
|
"train_samples_per_second": 4.722, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|