|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 396, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007575757575757576, |
|
"grad_norm": 15.596037687491558, |
|
"learning_rate": 1.25e-08, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.578125, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -140.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07575757575757576, |
|
"grad_norm": 15.63845059404282, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -132.0, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.1805555522441864, |
|
"rewards/chosen": -0.006622314453125, |
|
"rewards/margins": -0.0020904541015625, |
|
"rewards/rejected": -0.0045166015625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15151515151515152, |
|
"grad_norm": 15.541972149316718, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -135.0, |
|
"logps/rejected": -139.0, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.0137939453125, |
|
"rewards/margins": -0.015625, |
|
"rewards/rejected": 0.00188446044921875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 16.167428018856896, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -127.5, |
|
"logps/rejected": -119.0, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.006256103515625, |
|
"rewards/margins": 0.010986328125, |
|
"rewards/rejected": -0.0172119140625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 15.146059864609068, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -137.0, |
|
"logps/rejected": -129.0, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.03564453125, |
|
"rewards/margins": -0.00186920166015625, |
|
"rewards/rejected": -0.033935546875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3787878787878788, |
|
"grad_norm": 16.049039283963904, |
|
"learning_rate": 4.859550561797752e-07, |
|
"logits/chosen": -1.6328125, |
|
"logits/rejected": -1.5546875, |
|
"logps/chosen": -166.0, |
|
"logps/rejected": -138.0, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.04931640625, |
|
"rewards/margins": 0.04736328125, |
|
"rewards/rejected": -0.0966796875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 16.507555023132472, |
|
"learning_rate": 4.7191011235955054e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -131.0, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0888671875, |
|
"rewards/margins": 0.047119140625, |
|
"rewards/rejected": -0.1357421875, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5303030303030303, |
|
"grad_norm": 15.118251247338051, |
|
"learning_rate": 4.5786516853932584e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -129.0, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1279296875, |
|
"rewards/margins": 0.049072265625, |
|
"rewards/rejected": -0.1767578125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 14.88927933446616, |
|
"learning_rate": 4.438202247191011e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -135.0, |
|
"logps/rejected": -129.0, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1318359375, |
|
"rewards/margins": 0.04248046875, |
|
"rewards/rejected": -0.173828125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 14.799638412304567, |
|
"learning_rate": 4.297752808988764e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -151.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.138671875, |
|
"rewards/margins": 0.06689453125, |
|
"rewards/rejected": -0.2060546875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 14.614214826436204, |
|
"learning_rate": 4.157303370786517e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.181640625, |
|
"rewards/margins": 0.0791015625, |
|
"rewards/rejected": -0.259765625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 14.469109228087035, |
|
"learning_rate": 4.0168539325842696e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -143.0, |
|
"logps/rejected": -143.0, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.2353515625, |
|
"rewards/margins": 0.05029296875, |
|
"rewards/rejected": -0.28515625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 14.828181252975622, |
|
"learning_rate": 3.876404494382022e-07, |
|
"logits/chosen": -1.5, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -139.0, |
|
"logps/rejected": -130.0, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.302734375, |
|
"rewards/margins": 0.0732421875, |
|
"rewards/rejected": -0.376953125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.9848484848484849, |
|
"grad_norm": 14.708424671919467, |
|
"learning_rate": 3.735955056179775e-07, |
|
"logits/chosen": -1.484375, |
|
"logits/rejected": -1.46875, |
|
"logps/chosen": -130.0, |
|
"logps/rejected": -119.0, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.248046875, |
|
"rewards/margins": 0.1552734375, |
|
"rewards/rejected": -0.40234375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.484375, |
|
"eval_logits/rejected": -1.4765625, |
|
"eval_logps/chosen": -141.0, |
|
"eval_logps/rejected": -129.0, |
|
"eval_loss": 0.6321874856948853, |
|
"eval_rewards/accuracies": 0.6428571343421936, |
|
"eval_rewards/chosen": -0.265625, |
|
"eval_rewards/margins": 0.1767578125, |
|
"eval_rewards/rejected": -0.443359375, |
|
"eval_runtime": 12.2418, |
|
"eval_samples_per_second": 16.337, |
|
"eval_steps_per_second": 0.572, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.0606060606060606, |
|
"grad_norm": 12.113429780384491, |
|
"learning_rate": 3.5955056179775277e-07, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -147.0, |
|
"logps/rejected": -141.0, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2314453125, |
|
"rewards/margins": 0.236328125, |
|
"rewards/rejected": -0.466796875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 11.88094972407373, |
|
"learning_rate": 3.4550561797752807e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.515625, |
|
"logps/chosen": -149.0, |
|
"logps/rejected": -150.0, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1630859375, |
|
"rewards/margins": 0.333984375, |
|
"rewards/rejected": -0.49609375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 12.356915867599595, |
|
"learning_rate": 3.314606741573033e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -142.0, |
|
"logps/rejected": -130.0, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2412109375, |
|
"rewards/margins": 0.369140625, |
|
"rewards/rejected": -0.609375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.2878787878787878, |
|
"grad_norm": 12.507248800803001, |
|
"learning_rate": 3.1741573033707863e-07, |
|
"logits/chosen": -1.5078125, |
|
"logits/rejected": -1.484375, |
|
"logps/chosen": -143.0, |
|
"logps/rejected": -131.0, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.33984375, |
|
"rewards/margins": 0.435546875, |
|
"rewards/rejected": -0.7734375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 12.914463649706578, |
|
"learning_rate": 3.0337078651685393e-07, |
|
"logits/chosen": -1.5234375, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -144.0, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.2470703125, |
|
"rewards/margins": 0.44140625, |
|
"rewards/rejected": -0.6875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.4393939393939394, |
|
"grad_norm": 14.04555120726488, |
|
"learning_rate": 2.893258426966292e-07, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -144.0, |
|
"logps/rejected": -137.0, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.515625, |
|
"rewards/margins": 0.423828125, |
|
"rewards/rejected": -0.9375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 12.20920671777009, |
|
"learning_rate": 2.752808988764045e-07, |
|
"logits/chosen": -1.5234375, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -155.0, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.40625, |
|
"rewards/margins": 0.5234375, |
|
"rewards/rejected": -0.9296875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 12.279499640810553, |
|
"learning_rate": 2.612359550561798e-07, |
|
"logits/chosen": -1.4765625, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -138.0, |
|
"logps/rejected": -137.0, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.447265625, |
|
"rewards/margins": 0.478515625, |
|
"rewards/rejected": -0.92578125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 14.135915136852619, |
|
"learning_rate": 2.4719101123595505e-07, |
|
"logits/chosen": -1.5, |
|
"logits/rejected": -1.4609375, |
|
"logps/chosen": -142.0, |
|
"logps/rejected": -133.0, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5625, |
|
"rewards/margins": 0.37109375, |
|
"rewards/rejected": -0.93359375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.7424242424242424, |
|
"grad_norm": 13.704058332324305, |
|
"learning_rate": 2.331460674157303e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -147.0, |
|
"logps/rejected": -145.0, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4375, |
|
"rewards/margins": 0.5390625, |
|
"rewards/rejected": -0.9765625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 12.255502416027877, |
|
"learning_rate": 2.191011235955056e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.5546875, |
|
"logps/chosen": -163.0, |
|
"logps/rejected": -152.0, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.349609375, |
|
"rewards/margins": 0.625, |
|
"rewards/rejected": -0.97265625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.893939393939394, |
|
"grad_norm": 13.196400764481162, |
|
"learning_rate": 2.0505617977528089e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -162.0, |
|
"logps/rejected": -127.0, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.53125, |
|
"rewards/margins": 0.65234375, |
|
"rewards/rejected": -1.1796875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.9696969696969697, |
|
"grad_norm": 15.136911810706158, |
|
"learning_rate": 1.9101123595505617e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -148.0, |
|
"logps/rejected": -145.0, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6328125, |
|
"rewards/margins": 0.46484375, |
|
"rewards/rejected": -1.09375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.484375, |
|
"eval_logits/rejected": -1.46875, |
|
"eval_logps/chosen": -144.0, |
|
"eval_logps/rejected": -133.0, |
|
"eval_loss": 0.6057812571525574, |
|
"eval_rewards/accuracies": 0.6607142686843872, |
|
"eval_rewards/chosen": -0.5625, |
|
"eval_rewards/margins": 0.3515625, |
|
"eval_rewards/rejected": -0.9140625, |
|
"eval_runtime": 14.4828, |
|
"eval_samples_per_second": 13.81, |
|
"eval_steps_per_second": 0.483, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 11.094892248853895, |
|
"learning_rate": 1.7696629213483144e-07, |
|
"logits/chosen": -1.5, |
|
"logits/rejected": -1.4453125, |
|
"logps/chosen": -148.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.58984375, |
|
"rewards/margins": 0.62890625, |
|
"rewards/rejected": -1.21875, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.121212121212121, |
|
"grad_norm": 10.64557888461514, |
|
"learning_rate": 1.6292134831460675e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -134.0, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5078125, |
|
"rewards/margins": 0.734375, |
|
"rewards/rejected": -1.2421875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.196969696969697, |
|
"grad_norm": 11.580320333983588, |
|
"learning_rate": 1.4887640449438203e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -160.0, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.546875, |
|
"rewards/margins": 0.765625, |
|
"rewards/rejected": -1.3125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 15.056725052177573, |
|
"learning_rate": 1.3483146067415728e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -158.0, |
|
"logps/rejected": -149.0, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.435546875, |
|
"rewards/margins": 0.75, |
|
"rewards/rejected": -1.1875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.3484848484848486, |
|
"grad_norm": 12.006511122339738, |
|
"learning_rate": 1.2078651685393259e-07, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -144.0, |
|
"loss": 0.4472, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5546875, |
|
"rewards/margins": 0.65625, |
|
"rewards/rejected": -1.2109375, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.4242424242424243, |
|
"grad_norm": 13.746362722926833, |
|
"learning_rate": 1.0674157303370785e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -140.0, |
|
"logps/rejected": -145.0, |
|
"loss": 0.4728, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.58203125, |
|
"rewards/margins": 0.6015625, |
|
"rewards/rejected": -1.1796875, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 10.459882513773048, |
|
"learning_rate": 9.269662921348314e-08, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.46875, |
|
"logps/chosen": -153.0, |
|
"logps/rejected": -149.0, |
|
"loss": 0.4405, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5703125, |
|
"rewards/margins": 0.79296875, |
|
"rewards/rejected": -1.3671875, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.5757575757575757, |
|
"grad_norm": 13.750139706707387, |
|
"learning_rate": 7.865168539325842e-08, |
|
"logits/chosen": -1.46875, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -147.0, |
|
"logps/rejected": -143.0, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.515625, |
|
"rewards/margins": 0.69140625, |
|
"rewards/rejected": -1.2109375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.6515151515151514, |
|
"grad_norm": 10.53559766204164, |
|
"learning_rate": 6.460674157303371e-08, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -155.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.54296875, |
|
"rewards/margins": 0.7109375, |
|
"rewards/rejected": -1.25, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 10.932011907954834, |
|
"learning_rate": 5.056179775280899e-08, |
|
"logits/chosen": -1.5078125, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -142.0, |
|
"logps/rejected": -139.0, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.57421875, |
|
"rewards/margins": 0.87890625, |
|
"rewards/rejected": -1.453125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.8030303030303028, |
|
"grad_norm": 10.478220370160118, |
|
"learning_rate": 3.6516853932584266e-08, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -144.0, |
|
"logps/rejected": -142.0, |
|
"loss": 0.447, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.53515625, |
|
"rewards/margins": 0.71484375, |
|
"rewards/rejected": -1.25, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.878787878787879, |
|
"grad_norm": 11.077008754645805, |
|
"learning_rate": 2.2471910112359548e-08, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.484375, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -140.0, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5234375, |
|
"rewards/margins": 0.76953125, |
|
"rewards/rejected": -1.2890625, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 10.906822523238372, |
|
"learning_rate": 8.42696629213483e-09, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5390625, |
|
"logps/chosen": -156.0, |
|
"logps/rejected": -146.0, |
|
"loss": 0.4113, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.439453125, |
|
"rewards/margins": 0.82421875, |
|
"rewards/rejected": -1.265625, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -1.4765625, |
|
"eval_logits/rejected": -1.46875, |
|
"eval_logps/chosen": -144.0, |
|
"eval_logps/rejected": -133.0, |
|
"eval_loss": 0.6071093678474426, |
|
"eval_rewards/accuracies": 0.6607142686843872, |
|
"eval_rewards/chosen": -0.490234375, |
|
"eval_rewards/margins": 0.396484375, |
|
"eval_rewards/rejected": -0.88671875, |
|
"eval_runtime": 14.7701, |
|
"eval_samples_per_second": 13.541, |
|
"eval_steps_per_second": 0.474, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 396, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5475334398674242, |
|
"train_runtime": 2411.1864, |
|
"train_samples_per_second": 5.224, |
|
"train_steps_per_second": 0.164 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 396, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|