|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 1000, |
|
"global_step": 3873, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.5773195876288657e-09, |
|
"logits/chosen": -3.9100074768066406, |
|
"logits/rejected": -4.447928428649902, |
|
"logps/chosen": -252.016845703125, |
|
"logps/rejected": -298.87518310546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.5773195876288656e-08, |
|
"logits/chosen": -5.264719486236572, |
|
"logits/rejected": -4.7501540184021, |
|
"logps/chosen": -704.29541015625, |
|
"logps/rejected": -532.2731323242188, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": 0.004859171807765961, |
|
"rewards/margins": 0.00023437623167410493, |
|
"rewards/rejected": 0.004624796565622091, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -5.434407711029053, |
|
"logits/rejected": -4.95996618270874, |
|
"logps/chosen": -699.14013671875, |
|
"logps/rejected": -476.2240295410156, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.002142244251444936, |
|
"rewards/margins": -0.0066454135812819, |
|
"rewards/rejected": 0.00878765620291233, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.731958762886598e-08, |
|
"logits/chosen": -5.243380546569824, |
|
"logits/rejected": -5.211713790893555, |
|
"logps/chosen": -525.1171875, |
|
"logps/rejected": -423.39312744140625, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003249790519475937, |
|
"rewards/margins": 0.000919342041015625, |
|
"rewards/rejected": 0.002330448944121599, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -5.131182670593262, |
|
"logits/rejected": -4.265445709228516, |
|
"logps/chosen": -661.4071655273438, |
|
"logps/rejected": -430.1532287597656, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.011420822702348232, |
|
"rewards/margins": -0.008613145910203457, |
|
"rewards/rejected": -0.0028076765593141317, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2886597938144328e-07, |
|
"logits/chosen": -5.016252040863037, |
|
"logits/rejected": -5.079930782318115, |
|
"logps/chosen": -700.6941528320312, |
|
"logps/rejected": -517.4772338867188, |
|
"loss": 0.6981, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0022976198233664036, |
|
"rewards/margins": -0.007870988920331001, |
|
"rewards/rejected": 0.00557337049394846, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -4.962490558624268, |
|
"logits/rejected": -5.010842323303223, |
|
"logps/chosen": -555.6851196289062, |
|
"logps/rejected": -501.57110595703125, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0058050318621098995, |
|
"rewards/margins": -0.0007518678903579712, |
|
"rewards/rejected": -0.005053164903074503, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.804123711340206e-07, |
|
"logits/chosen": -5.370819091796875, |
|
"logits/rejected": -5.034182071685791, |
|
"logps/chosen": -683.794921875, |
|
"logps/rejected": -468.4527893066406, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0010283368173986673, |
|
"rewards/margins": 0.014723362401127815, |
|
"rewards/rejected": -0.013695026747882366, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -4.814556121826172, |
|
"logits/rejected": -4.836775779724121, |
|
"logps/chosen": -626.3643798828125, |
|
"logps/rejected": -469.01177978515625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.007337172515690327, |
|
"rewards/margins": 0.010705096647143364, |
|
"rewards/rejected": -0.0033679225016385317, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3195876288659794e-07, |
|
"logits/chosen": -5.1350202560424805, |
|
"logits/rejected": -5.1212358474731445, |
|
"logps/chosen": -515.248779296875, |
|
"logps/rejected": -433.7506408691406, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00244722468778491, |
|
"rewards/margins": 0.010435061529278755, |
|
"rewards/rejected": -0.007987835444509983, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -5.177114009857178, |
|
"logits/rejected": -4.349142551422119, |
|
"logps/chosen": -593.7941284179688, |
|
"logps/rejected": -424.19696044921875, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0027124141342937946, |
|
"rewards/margins": 0.011196794919669628, |
|
"rewards/rejected": -0.013909208588302135, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.835051546391752e-07, |
|
"logits/chosen": -4.992671012878418, |
|
"logits/rejected": -4.795473098754883, |
|
"logps/chosen": -564.6749877929688, |
|
"logps/rejected": -494.9117126464844, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008160188794136047, |
|
"rewards/margins": 0.023002145811915398, |
|
"rewards/rejected": -0.0148419588804245, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -5.057103157043457, |
|
"logits/rejected": -4.665154457092285, |
|
"logps/chosen": -580.5682373046875, |
|
"logps/rejected": -467.72369384765625, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.008997360244393349, |
|
"rewards/margins": 0.010930529795587063, |
|
"rewards/rejected": -0.019927887246012688, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.3505154639175255e-07, |
|
"logits/chosen": -5.1313934326171875, |
|
"logits/rejected": -4.634444236755371, |
|
"logps/chosen": -599.831787109375, |
|
"logps/rejected": -448.5379943847656, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007190053351223469, |
|
"rewards/margins": 0.01588342897593975, |
|
"rewards/rejected": -0.008693376556038857, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -5.257201194763184, |
|
"logits/rejected": -4.326685905456543, |
|
"logps/chosen": -568.3807373046875, |
|
"logps/rejected": -408.2413024902344, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01816733181476593, |
|
"rewards/margins": 0.05539187043905258, |
|
"rewards/rejected": -0.03722454234957695, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.865979381443299e-07, |
|
"logits/chosen": -5.004805088043213, |
|
"logits/rejected": -4.720073699951172, |
|
"logps/chosen": -584.5308227539062, |
|
"logps/rejected": -476.8841857910156, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.020336730405688286, |
|
"rewards/margins": 0.04606650024652481, |
|
"rewards/rejected": -0.025729769840836525, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -5.245352745056152, |
|
"logits/rejected": -4.824395656585693, |
|
"logps/chosen": -647.1730346679688, |
|
"logps/rejected": -521.8081665039062, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.025765161961317062, |
|
"rewards/margins": 0.060902394354343414, |
|
"rewards/rejected": -0.03513722866773605, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.381443298969072e-07, |
|
"logits/chosen": -4.918195724487305, |
|
"logits/rejected": -5.041461944580078, |
|
"logps/chosen": -649.2320556640625, |
|
"logps/rejected": -453.7999572753906, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.030253374949097633, |
|
"rewards/margins": 0.10368291288614273, |
|
"rewards/rejected": -0.07342952489852905, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -4.9767351150512695, |
|
"logits/rejected": -4.5638556480407715, |
|
"logps/chosen": -633.88623046875, |
|
"logps/rejected": -496.74664306640625, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.008293787948787212, |
|
"rewards/margins": 0.06771639734506607, |
|
"rewards/rejected": -0.07601018249988556, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.896907216494845e-07, |
|
"logits/chosen": -5.383603096008301, |
|
"logits/rejected": -4.877391815185547, |
|
"logps/chosen": -546.728271484375, |
|
"logps/rejected": -344.8063659667969, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.007833002135157585, |
|
"rewards/margins": 0.09368343651294708, |
|
"rewards/rejected": -0.10151644051074982, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.991845610220168e-07, |
|
"logits/chosen": -4.733527183532715, |
|
"logits/rejected": -4.868575096130371, |
|
"logps/chosen": -481.6207580566406, |
|
"logps/rejected": -476.0492248535156, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03832811862230301, |
|
"rewards/margins": 0.055693674832582474, |
|
"rewards/rejected": -0.09402180463075638, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.978254960587116e-07, |
|
"logits/chosen": -4.98598051071167, |
|
"logits/rejected": -4.212894916534424, |
|
"logps/chosen": -637.4251098632812, |
|
"logps/rejected": -506.30230712890625, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0035689384676516056, |
|
"rewards/margins": 0.10787747800350189, |
|
"rewards/rejected": -0.10430854558944702, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.964664310954063e-07, |
|
"logits/chosen": -5.308133125305176, |
|
"logits/rejected": -5.002093315124512, |
|
"logps/chosen": -582.2630615234375, |
|
"logps/rejected": -459.553466796875, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03686295077204704, |
|
"rewards/margins": 0.10949943214654922, |
|
"rewards/rejected": -0.14636239409446716, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.951073661321011e-07, |
|
"logits/chosen": -5.136179447174072, |
|
"logits/rejected": -4.420478820800781, |
|
"logps/chosen": -637.677734375, |
|
"logps/rejected": -455.35528564453125, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.016099706292152405, |
|
"rewards/margins": 0.15525248646736145, |
|
"rewards/rejected": -0.17135220766067505, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.937483011687959e-07, |
|
"logits/chosen": -5.426546573638916, |
|
"logits/rejected": -4.910277366638184, |
|
"logps/chosen": -543.5972900390625, |
|
"logps/rejected": -451.20599365234375, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05474834516644478, |
|
"rewards/margins": 0.12798623740673065, |
|
"rewards/rejected": -0.18273457884788513, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.923892362054906e-07, |
|
"logits/chosen": -5.028792858123779, |
|
"logits/rejected": -4.966330051422119, |
|
"logps/chosen": -639.6892700195312, |
|
"logps/rejected": -501.51019287109375, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.013248622417449951, |
|
"rewards/margins": 0.18953406810760498, |
|
"rewards/rejected": -0.17628543078899384, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.910301712421854e-07, |
|
"logits/chosen": -4.824421405792236, |
|
"logits/rejected": -5.0228495597839355, |
|
"logps/chosen": -572.6714477539062, |
|
"logps/rejected": -469.21148681640625, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.042329370975494385, |
|
"rewards/margins": 0.18195411562919617, |
|
"rewards/rejected": -0.22428350150585175, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8967110627888e-07, |
|
"logits/chosen": -4.958866119384766, |
|
"logits/rejected": -4.671696662902832, |
|
"logps/chosen": -680.0038452148438, |
|
"logps/rejected": -487.1553649902344, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0647299587726593, |
|
"rewards/margins": 0.19074389338493347, |
|
"rewards/rejected": -0.2554738223552704, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.883120413155748e-07, |
|
"logits/chosen": -5.179836750030518, |
|
"logits/rejected": -5.224351406097412, |
|
"logps/chosen": -659.30712890625, |
|
"logps/rejected": -563.93505859375, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.005299837794154882, |
|
"rewards/margins": 0.23988893628120422, |
|
"rewards/rejected": -0.23458907008171082, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.869529763522696e-07, |
|
"logits/chosen": -5.175195217132568, |
|
"logits/rejected": -4.513309478759766, |
|
"logps/chosen": -554.3179321289062, |
|
"logps/rejected": -408.87469482421875, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03833787888288498, |
|
"rewards/margins": 0.20562824606895447, |
|
"rewards/rejected": -0.24396613240242004, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.855939113889644e-07, |
|
"logits/chosen": -5.465035915374756, |
|
"logits/rejected": -5.007296562194824, |
|
"logps/chosen": -614.6101684570312, |
|
"logps/rejected": -448.00396728515625, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.04801105335354805, |
|
"rewards/margins": 0.2677549719810486, |
|
"rewards/rejected": -0.3157660663127899, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.842348464256592e-07, |
|
"logits/chosen": -5.135851860046387, |
|
"logits/rejected": -4.458438873291016, |
|
"logps/chosen": -693.4744873046875, |
|
"logps/rejected": -477.97735595703125, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.026815488934516907, |
|
"rewards/margins": 0.36201024055480957, |
|
"rewards/rejected": -0.33519476652145386, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.828757814623539e-07, |
|
"logits/chosen": -4.8842668533325195, |
|
"logits/rejected": -4.530327796936035, |
|
"logps/chosen": -618.3095703125, |
|
"logps/rejected": -440.5267028808594, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04385804012417793, |
|
"rewards/margins": 0.31809335947036743, |
|
"rewards/rejected": -0.36195147037506104, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.815167164990487e-07, |
|
"logits/chosen": -5.210285186767578, |
|
"logits/rejected": -4.398937225341797, |
|
"logps/chosen": -613.9888916015625, |
|
"logps/rejected": -423.78973388671875, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.045169491320848465, |
|
"rewards/margins": 0.3982721269130707, |
|
"rewards/rejected": -0.44344156980514526, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.801576515357433e-07, |
|
"logits/chosen": -5.246552467346191, |
|
"logits/rejected": -4.851002216339111, |
|
"logps/chosen": -602.2916259765625, |
|
"logps/rejected": -460.1776428222656, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.023538529872894287, |
|
"rewards/margins": 0.2947237491607666, |
|
"rewards/rejected": -0.3182622492313385, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.787985865724381e-07, |
|
"logits/chosen": -5.230737209320068, |
|
"logits/rejected": -4.5234880447387695, |
|
"logps/chosen": -476.8888244628906, |
|
"logps/rejected": -386.95208740234375, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20046333968639374, |
|
"rewards/margins": 0.22257764637470245, |
|
"rewards/rejected": -0.4230410158634186, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.774395216091329e-07, |
|
"logits/chosen": -4.9621710777282715, |
|
"logits/rejected": -4.4291791915893555, |
|
"logps/chosen": -624.4473266601562, |
|
"logps/rejected": -469.51025390625, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0565962977707386, |
|
"rewards/margins": 0.4176466464996338, |
|
"rewards/rejected": -0.4742429256439209, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7608045664582765e-07, |
|
"logits/chosen": -5.13443660736084, |
|
"logits/rejected": -4.768548488616943, |
|
"logps/chosen": -580.9453125, |
|
"logps/rejected": -469.3211364746094, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11847039312124252, |
|
"rewards/margins": 0.1889243870973587, |
|
"rewards/rejected": -0.307394802570343, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.747213916825224e-07, |
|
"logits/chosen": -5.269211292266846, |
|
"logits/rejected": -4.984899997711182, |
|
"logps/chosen": -625.7012329101562, |
|
"logps/rejected": -498.78436279296875, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08433916419744492, |
|
"rewards/margins": 0.2882917821407318, |
|
"rewards/rejected": -0.3726309835910797, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.733623267192172e-07, |
|
"logits/chosen": -4.900550365447998, |
|
"logits/rejected": -4.842984199523926, |
|
"logps/chosen": -595.7847290039062, |
|
"logps/rejected": -421.272705078125, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.045443516224622726, |
|
"rewards/margins": 0.3703029155731201, |
|
"rewards/rejected": -0.41574639081954956, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.720032617559119e-07, |
|
"logits/chosen": -4.971917152404785, |
|
"logits/rejected": -4.413316249847412, |
|
"logps/chosen": -603.6484375, |
|
"logps/rejected": -390.181884765625, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08832856267690659, |
|
"rewards/margins": 0.370069295167923, |
|
"rewards/rejected": -0.4583978056907654, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.7064419679260665e-07, |
|
"logits/chosen": -4.816274642944336, |
|
"logits/rejected": -4.781431198120117, |
|
"logps/chosen": -532.91650390625, |
|
"logps/rejected": -487.6172790527344, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2775554656982422, |
|
"rewards/margins": 0.12774913012981415, |
|
"rewards/rejected": -0.40530458092689514, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.692851318293014e-07, |
|
"logits/chosen": -4.550357818603516, |
|
"logits/rejected": -4.743869781494141, |
|
"logps/chosen": -503.3206481933594, |
|
"logps/rejected": -449.945068359375, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2160659283399582, |
|
"rewards/margins": 0.1927742063999176, |
|
"rewards/rejected": -0.4088401198387146, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.6792606686599617e-07, |
|
"logits/chosen": -4.8620710372924805, |
|
"logits/rejected": -4.680614471435547, |
|
"logps/chosen": -517.7664184570312, |
|
"logps/rejected": -415.820556640625, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18706198036670685, |
|
"rewards/margins": 0.3389657735824585, |
|
"rewards/rejected": -0.5260277986526489, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.6656700190269095e-07, |
|
"logits/chosen": -4.892120838165283, |
|
"logits/rejected": -4.399613380432129, |
|
"logps/chosen": -616.3198852539062, |
|
"logps/rejected": -455.2386169433594, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2131435126066208, |
|
"rewards/margins": 0.2211327850818634, |
|
"rewards/rejected": -0.434276282787323, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.652079369393857e-07, |
|
"logits/chosen": -4.742907524108887, |
|
"logits/rejected": -4.0981621742248535, |
|
"logps/chosen": -684.6009521484375, |
|
"logps/rejected": -568.4190673828125, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.024788271635770798, |
|
"rewards/margins": 0.2745409309864044, |
|
"rewards/rejected": -0.29932913184165955, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.638488719760804e-07, |
|
"logits/chosen": -4.990183353424072, |
|
"logits/rejected": -4.841261386871338, |
|
"logps/chosen": -501.45587158203125, |
|
"logps/rejected": -421.10516357421875, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1533351093530655, |
|
"rewards/margins": 0.2984987199306488, |
|
"rewards/rejected": -0.4518338739871979, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.6248980701277516e-07, |
|
"logits/chosen": -4.963714599609375, |
|
"logits/rejected": -4.383978366851807, |
|
"logps/chosen": -651.7528076171875, |
|
"logps/rejected": -455.96051025390625, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.045393429696559906, |
|
"rewards/margins": 0.4336971640586853, |
|
"rewards/rejected": -0.4790906012058258, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.6113074204946995e-07, |
|
"logits/chosen": -5.326716899871826, |
|
"logits/rejected": -4.635982036590576, |
|
"logps/chosen": -634.7332763671875, |
|
"logps/rejected": -447.55010986328125, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.18605412542819977, |
|
"rewards/margins": 0.21766987442970276, |
|
"rewards/rejected": -0.4037240147590637, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.5977167708616473e-07, |
|
"logits/chosen": -5.067509651184082, |
|
"logits/rejected": -4.860345840454102, |
|
"logps/chosen": -541.9595947265625, |
|
"logps/rejected": -476.3515625, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.13814474642276764, |
|
"rewards/margins": 0.31477227807044983, |
|
"rewards/rejected": -0.4529170095920563, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.5841261212285947e-07, |
|
"logits/chosen": -4.927236080169678, |
|
"logits/rejected": -4.7580389976501465, |
|
"logps/chosen": -611.2810668945312, |
|
"logps/rejected": -467.39501953125, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1771600991487503, |
|
"rewards/margins": 0.2784258723258972, |
|
"rewards/rejected": -0.45558589696884155, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.570535471595542e-07, |
|
"logits/chosen": -5.053236484527588, |
|
"logits/rejected": -4.242236137390137, |
|
"logps/chosen": -641.723876953125, |
|
"logps/rejected": -466.0104064941406, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07538946717977524, |
|
"rewards/margins": 0.42548665404319763, |
|
"rewards/rejected": -0.5008760690689087, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.5569448219624894e-07, |
|
"logits/chosen": -5.104859352111816, |
|
"logits/rejected": -4.797629356384277, |
|
"logps/chosen": -636.1038818359375, |
|
"logps/rejected": -465.8121032714844, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03768538683652878, |
|
"rewards/margins": 0.325167715549469, |
|
"rewards/rejected": -0.36285310983657837, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.543354172329437e-07, |
|
"logits/chosen": -4.918176174163818, |
|
"logits/rejected": -4.973766326904297, |
|
"logps/chosen": -557.7041015625, |
|
"logps/rejected": -446.3299865722656, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.08515395224094391, |
|
"rewards/margins": 0.29556483030319214, |
|
"rewards/rejected": -0.38071876764297485, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.5297635226963846e-07, |
|
"logits/chosen": -5.113317012786865, |
|
"logits/rejected": -4.7637176513671875, |
|
"logps/chosen": -674.4705810546875, |
|
"logps/rejected": -501.1588439941406, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0015305932611227036, |
|
"rewards/margins": 0.47964540123939514, |
|
"rewards/rejected": -0.4811759889125824, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.5161728730633325e-07, |
|
"logits/chosen": -5.13610315322876, |
|
"logits/rejected": -4.954715251922607, |
|
"logps/chosen": -534.7664794921875, |
|
"logps/rejected": -424.2696228027344, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.15319520235061646, |
|
"rewards/margins": 0.3650510609149933, |
|
"rewards/rejected": -0.5182462930679321, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.50258222343028e-07, |
|
"logits/chosen": -5.178530693054199, |
|
"logits/rejected": -4.55206298828125, |
|
"logps/chosen": -707.4481811523438, |
|
"logps/rejected": -422.05035400390625, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.006741873919963837, |
|
"rewards/margins": 0.4363733232021332, |
|
"rewards/rejected": -0.42963147163391113, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.488991573797227e-07, |
|
"logits/chosen": -5.193602085113525, |
|
"logits/rejected": -4.635763645172119, |
|
"logps/chosen": -541.9158935546875, |
|
"logps/rejected": -412.94415283203125, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12414856255054474, |
|
"rewards/margins": 0.29824933409690857, |
|
"rewards/rejected": -0.4223979115486145, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.475400924164175e-07, |
|
"logits/chosen": -5.302609443664551, |
|
"logits/rejected": -4.3719706535339355, |
|
"logps/chosen": -621.8121337890625, |
|
"logps/rejected": -496.27423095703125, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0030913546215742826, |
|
"rewards/margins": 0.5280329585075378, |
|
"rewards/rejected": -0.5249415636062622, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.4618102745311224e-07, |
|
"logits/chosen": -4.785793781280518, |
|
"logits/rejected": -4.529145240783691, |
|
"logps/chosen": -636.421142578125, |
|
"logps/rejected": -513.9705810546875, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.017498258501291275, |
|
"rewards/margins": 0.42999735474586487, |
|
"rewards/rejected": -0.44749563932418823, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.4482196248980697e-07, |
|
"logits/chosen": -5.281703948974609, |
|
"logits/rejected": -4.8774003982543945, |
|
"logps/chosen": -571.9153442382812, |
|
"logps/rejected": -460.17022705078125, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2108837068080902, |
|
"rewards/margins": 0.2501353919506073, |
|
"rewards/rejected": -0.4610190987586975, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.4346289752650176e-07, |
|
"logits/chosen": -5.042937278747559, |
|
"logits/rejected": -4.787137031555176, |
|
"logps/chosen": -647.1984252929688, |
|
"logps/rejected": -505.620361328125, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06373582035303116, |
|
"rewards/margins": 0.34771889448165894, |
|
"rewards/rejected": -0.4114547371864319, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.421038325631965e-07, |
|
"logits/chosen": -4.816788673400879, |
|
"logits/rejected": -4.839449405670166, |
|
"logps/chosen": -594.6710815429688, |
|
"logps/rejected": -410.1011657714844, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.058069534599781036, |
|
"rewards/margins": 0.4649524688720703, |
|
"rewards/rejected": -0.4068829417228699, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.407447675998913e-07, |
|
"logits/chosen": -5.119754314422607, |
|
"logits/rejected": -4.477426052093506, |
|
"logps/chosen": -607.0371704101562, |
|
"logps/rejected": -424.75164794921875, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08852221071720123, |
|
"rewards/margins": 0.56838458776474, |
|
"rewards/rejected": -0.65690678358078, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.39385702636586e-07, |
|
"logits/chosen": -4.7978363037109375, |
|
"logits/rejected": -4.382967948913574, |
|
"logps/chosen": -581.29248046875, |
|
"logps/rejected": -446.77886962890625, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2044920176267624, |
|
"rewards/margins": 0.3723471164703369, |
|
"rewards/rejected": -0.5768391489982605, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.3802663767328075e-07, |
|
"logits/chosen": -5.002087593078613, |
|
"logits/rejected": -4.465549945831299, |
|
"logps/chosen": -565.1025390625, |
|
"logps/rejected": -443.71881103515625, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0946217030286789, |
|
"rewards/margins": 0.4325682520866394, |
|
"rewards/rejected": -0.5271899700164795, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.366675727099755e-07, |
|
"logits/chosen": -5.237046718597412, |
|
"logits/rejected": -4.730754375457764, |
|
"logps/chosen": -572.2055053710938, |
|
"logps/rejected": -445.2671813964844, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.00032152156927622855, |
|
"rewards/margins": 0.5232617855072021, |
|
"rewards/rejected": -0.5235832929611206, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.3530850774667027e-07, |
|
"logits/chosen": -4.660307884216309, |
|
"logits/rejected": -4.82083797454834, |
|
"logps/chosen": -571.3373413085938, |
|
"logps/rejected": -476.90240478515625, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10375179350376129, |
|
"rewards/margins": 0.4479514956474304, |
|
"rewards/rejected": -0.5517033338546753, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.3394944278336506e-07, |
|
"logits/chosen": -5.485714912414551, |
|
"logits/rejected": -4.80424690246582, |
|
"logps/chosen": -647.6866455078125, |
|
"logps/rejected": -514.752197265625, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.026715148240327835, |
|
"rewards/margins": 0.4548015594482422, |
|
"rewards/rejected": -0.48151668906211853, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.325903778200598e-07, |
|
"logits/chosen": -4.8239054679870605, |
|
"logits/rejected": -4.809833526611328, |
|
"logps/chosen": -474.703857421875, |
|
"logps/rejected": -464.36767578125, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.19570307433605194, |
|
"rewards/margins": 0.46426302194595337, |
|
"rewards/rejected": -0.6599661111831665, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.3123131285675453e-07, |
|
"logits/chosen": -4.975176811218262, |
|
"logits/rejected": -4.977343559265137, |
|
"logps/chosen": -562.6448974609375, |
|
"logps/rejected": -458.48065185546875, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.13624700903892517, |
|
"rewards/margins": 0.30273160338401794, |
|
"rewards/rejected": -0.4389786124229431, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.2987224789344926e-07, |
|
"logits/chosen": -5.58438777923584, |
|
"logits/rejected": -5.220755577087402, |
|
"logps/chosen": -562.8697509765625, |
|
"logps/rejected": -392.17755126953125, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1068628579378128, |
|
"rewards/margins": 0.5429280996322632, |
|
"rewards/rejected": -0.6497910022735596, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.2851318293014405e-07, |
|
"logits/chosen": -4.708364963531494, |
|
"logits/rejected": -4.156419277191162, |
|
"logps/chosen": -605.2653198242188, |
|
"logps/rejected": -456.384033203125, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.013813665136694908, |
|
"rewards/margins": 0.5981405973434448, |
|
"rewards/rejected": -0.5843268632888794, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.2715411796683884e-07, |
|
"logits/chosen": -5.233548164367676, |
|
"logits/rejected": -4.437680244445801, |
|
"logps/chosen": -627.4539794921875, |
|
"logps/rejected": -421.85565185546875, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.025078674778342247, |
|
"rewards/margins": 0.5734429359436035, |
|
"rewards/rejected": -0.5985215902328491, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.257950530035335e-07, |
|
"logits/chosen": -5.098059177398682, |
|
"logits/rejected": -4.827805995941162, |
|
"logps/chosen": -589.9744873046875, |
|
"logps/rejected": -431.1255798339844, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.036339785903692245, |
|
"rewards/margins": 0.3825877606868744, |
|
"rewards/rejected": -0.41892752051353455, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.244359880402283e-07, |
|
"logits/chosen": -4.666082859039307, |
|
"logits/rejected": -4.680274963378906, |
|
"logps/chosen": -591.2884521484375, |
|
"logps/rejected": -430.6166076660156, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.03429872542619705, |
|
"rewards/margins": 0.46906599402427673, |
|
"rewards/rejected": -0.4347672462463379, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logits/chosen": -5.038529396057129, |
|
"logits/rejected": -5.100456237792969, |
|
"logps/chosen": -573.0460815429688, |
|
"logps/rejected": -454.48968505859375, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.15040521323680878, |
|
"rewards/margins": 0.32461416721343994, |
|
"rewards/rejected": -0.4750193655490875, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.2171785811361783e-07, |
|
"logits/chosen": -5.336598873138428, |
|
"logits/rejected": -4.637759208679199, |
|
"logps/chosen": -672.5980224609375, |
|
"logps/rejected": -416.6004333496094, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.05844946578145027, |
|
"rewards/margins": 0.6305155158042908, |
|
"rewards/rejected": -0.6889649629592896, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.2035879315031256e-07, |
|
"logits/chosen": -4.696690559387207, |
|
"logits/rejected": -4.829110145568848, |
|
"logps/chosen": -568.7249755859375, |
|
"logps/rejected": -448.14190673828125, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09914873540401459, |
|
"rewards/margins": 0.4367143511772156, |
|
"rewards/rejected": -0.5358631014823914, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.189997281870073e-07, |
|
"logits/chosen": -5.3184123039245605, |
|
"logits/rejected": -4.514608860015869, |
|
"logps/chosen": -576.1703491210938, |
|
"logps/rejected": -419.2657775878906, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.049643244594335556, |
|
"rewards/margins": 0.4539434313774109, |
|
"rewards/rejected": -0.5035867094993591, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.176406632237021e-07, |
|
"logits/chosen": -5.1099748611450195, |
|
"logits/rejected": -5.108311653137207, |
|
"logps/chosen": -621.3192138671875, |
|
"logps/rejected": -517.3109130859375, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.017213309183716774, |
|
"rewards/margins": 0.4413982033729553, |
|
"rewards/rejected": -0.4586115777492523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.162815982603968e-07, |
|
"logits/chosen": -4.819340705871582, |
|
"logits/rejected": -4.71310567855835, |
|
"logps/chosen": -657.822509765625, |
|
"logps/rejected": -436.1148376464844, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.050711147487163544, |
|
"rewards/margins": 0.4844001233577728, |
|
"rewards/rejected": -0.5351113080978394, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.149225332970916e-07, |
|
"logits/chosen": -5.180826187133789, |
|
"logits/rejected": -4.553278923034668, |
|
"logps/chosen": -616.3976440429688, |
|
"logps/rejected": -428.14501953125, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.09903495013713837, |
|
"rewards/margins": 0.43739914894104004, |
|
"rewards/rejected": -0.5364341139793396, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.1356346833378634e-07, |
|
"logits/chosen": -5.0159783363342285, |
|
"logits/rejected": -4.414090633392334, |
|
"logps/chosen": -532.2962646484375, |
|
"logps/rejected": -394.74188232421875, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10811267048120499, |
|
"rewards/margins": 0.3885238468647003, |
|
"rewards/rejected": -0.4966364800930023, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.122044033704811e-07, |
|
"logits/chosen": -4.964326858520508, |
|
"logits/rejected": -4.606993675231934, |
|
"logps/chosen": -636.4017944335938, |
|
"logps/rejected": -483.2483825683594, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03904043883085251, |
|
"rewards/margins": 0.44281521439552307, |
|
"rewards/rejected": -0.48185569047927856, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.108453384071758e-07, |
|
"logits/chosen": -4.908474445343018, |
|
"logits/rejected": -4.149386882781982, |
|
"logps/chosen": -553.094482421875, |
|
"logps/rejected": -415.75115966796875, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0005749926203861833, |
|
"rewards/margins": 0.5852338075637817, |
|
"rewards/rejected": -0.5858088135719299, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.094862734438706e-07, |
|
"logits/chosen": -4.999358177185059, |
|
"logits/rejected": -4.587876319885254, |
|
"logps/chosen": -549.2117919921875, |
|
"logps/rejected": -410.4534606933594, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.09028232097625732, |
|
"rewards/margins": 0.588280439376831, |
|
"rewards/rejected": -0.6785627603530884, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.081272084805654e-07, |
|
"logits/chosen": -4.444740295410156, |
|
"logits/rejected": -4.594709873199463, |
|
"logps/chosen": -635.481201171875, |
|
"logps/rejected": -535.2267456054688, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06267094612121582, |
|
"rewards/margins": 0.4273379445075989, |
|
"rewards/rejected": -0.4900088906288147, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.067681435172601e-07, |
|
"logits/chosen": -4.936141014099121, |
|
"logits/rejected": -5.240883827209473, |
|
"logps/chosen": -575.3516845703125, |
|
"logps/rejected": -480.3345642089844, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02528349682688713, |
|
"rewards/margins": 0.6120938062667847, |
|
"rewards/rejected": -0.5868103504180908, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.0540907855395485e-07, |
|
"logits/chosen": -5.011609077453613, |
|
"logits/rejected": -4.755931377410889, |
|
"logps/chosen": -560.1868896484375, |
|
"logps/rejected": -472.1454162597656, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.17354989051818848, |
|
"rewards/margins": 0.3528314232826233, |
|
"rewards/rejected": -0.5263813138008118, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.040500135906496e-07, |
|
"logits/chosen": -5.133852005004883, |
|
"logits/rejected": -5.064622402191162, |
|
"logps/chosen": -621.9901123046875, |
|
"logps/rejected": -571.9163818359375, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.05665457993745804, |
|
"rewards/margins": 0.3919576108455658, |
|
"rewards/rejected": -0.44861215353012085, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.026909486273444e-07, |
|
"logits/chosen": -4.732084274291992, |
|
"logits/rejected": -4.557053565979004, |
|
"logps/chosen": -545.7122802734375, |
|
"logps/rejected": -464.310302734375, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.19330564141273499, |
|
"rewards/margins": 0.32232436537742615, |
|
"rewards/rejected": -0.5156300067901611, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.0133188366403916e-07, |
|
"logits/chosen": -4.645724773406982, |
|
"logits/rejected": -4.4923095703125, |
|
"logps/chosen": -622.0107421875, |
|
"logps/rejected": -484.76800537109375, |
|
"loss": 0.6126, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07238127291202545, |
|
"rewards/margins": 0.4415339529514313, |
|
"rewards/rejected": -0.5139152407646179, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9997281870073385e-07, |
|
"logits/chosen": -5.044188976287842, |
|
"logits/rejected": -4.4092912673950195, |
|
"logps/chosen": -694.5960083007812, |
|
"logps/rejected": -435.5107421875, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.042339447885751724, |
|
"rewards/margins": 0.5750529170036316, |
|
"rewards/rejected": -0.5327135324478149, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9861375373742863e-07, |
|
"logits/chosen": -4.965481758117676, |
|
"logits/rejected": -5.099541664123535, |
|
"logps/chosen": -572.3638305664062, |
|
"logps/rejected": -371.61663818359375, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2305530607700348, |
|
"rewards/margins": 0.3573130667209625, |
|
"rewards/rejected": -0.5878661274909973, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.9725468877412337e-07, |
|
"logits/chosen": -5.048774719238281, |
|
"logits/rejected": -4.542634010314941, |
|
"logps/chosen": -552.0947875976562, |
|
"logps/rejected": -455.07025146484375, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1393623650074005, |
|
"rewards/margins": 0.44824647903442383, |
|
"rewards/rejected": -0.587608814239502, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.9589562381081816e-07, |
|
"logits/chosen": -5.307036876678467, |
|
"logits/rejected": -5.147672176361084, |
|
"logps/chosen": -506.1968688964844, |
|
"logps/rejected": -428.904296875, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2355213463306427, |
|
"rewards/margins": 0.3196939527988434, |
|
"rewards/rejected": -0.5552152991294861, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.945365588475129e-07, |
|
"logits/chosen": -4.7375311851501465, |
|
"logits/rejected": -4.652678489685059, |
|
"logps/chosen": -612.0572509765625, |
|
"logps/rejected": -466.7972717285156, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0007326111081056297, |
|
"rewards/margins": 0.4344883859157562, |
|
"rewards/rejected": -0.43522095680236816, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.931774938842076e-07, |
|
"logits/chosen": -4.782444953918457, |
|
"logits/rejected": -4.471555233001709, |
|
"logps/chosen": -639.8296508789062, |
|
"logps/rejected": -462.0065002441406, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.056622106581926346, |
|
"rewards/margins": 0.39103665947914124, |
|
"rewards/rejected": -0.4476587772369385, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.918184289209024e-07, |
|
"logits/chosen": -4.838658809661865, |
|
"logits/rejected": -4.814244747161865, |
|
"logps/chosen": -602.7030029296875, |
|
"logps/rejected": -435.02081298828125, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.030191833153367043, |
|
"rewards/margins": 0.6228463053703308, |
|
"rewards/rejected": -0.6530382037162781, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.9045936395759715e-07, |
|
"logits/chosen": -5.35817289352417, |
|
"logits/rejected": -4.868961334228516, |
|
"logps/chosen": -609.6304931640625, |
|
"logps/rejected": -530.9454956054688, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1846841722726822, |
|
"rewards/margins": 0.1106841117143631, |
|
"rewards/rejected": -0.2953682541847229, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -5.090588569641113, |
|
"eval_logits/rejected": -4.794471740722656, |
|
"eval_logps/chosen": -589.0576782226562, |
|
"eval_logps/rejected": -448.0440673828125, |
|
"eval_loss": 0.588729202747345, |
|
"eval_rewards/accuracies": 0.6700000166893005, |
|
"eval_rewards/chosen": -0.07880854606628418, |
|
"eval_rewards/margins": 0.4715493321418762, |
|
"eval_rewards/rejected": -0.5503579378128052, |
|
"eval_runtime": 106.4173, |
|
"eval_samples_per_second": 18.794, |
|
"eval_steps_per_second": 1.175, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.8910029899429193e-07, |
|
"logits/chosen": -5.132681846618652, |
|
"logits/rejected": -4.960105895996094, |
|
"logps/chosen": -588.9244995117188, |
|
"logps/rejected": -428.7511291503906, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21401521563529968, |
|
"rewards/margins": 0.3573240637779236, |
|
"rewards/rejected": -0.5713392496109009, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.8774123403098667e-07, |
|
"logits/chosen": -5.2607316970825195, |
|
"logits/rejected": -4.853170394897461, |
|
"logps/chosen": -510.7887268066406, |
|
"logps/rejected": -449.23388671875, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1676936149597168, |
|
"rewards/margins": 0.4615322947502136, |
|
"rewards/rejected": -0.6292259097099304, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.863821690676814e-07, |
|
"logits/chosen": -4.826608180999756, |
|
"logits/rejected": -4.439766883850098, |
|
"logps/chosen": -584.8558349609375, |
|
"logps/rejected": -467.75042724609375, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.15133224427700043, |
|
"rewards/margins": 0.34150105714797974, |
|
"rewards/rejected": -0.49283328652381897, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.8502310410437614e-07, |
|
"logits/chosen": -4.53078556060791, |
|
"logits/rejected": -4.410236358642578, |
|
"logps/chosen": -554.697509765625, |
|
"logps/rejected": -405.0028076171875, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10526251792907715, |
|
"rewards/margins": 0.43528643250465393, |
|
"rewards/rejected": -0.5405489206314087, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.836640391410709e-07, |
|
"logits/chosen": -5.252594470977783, |
|
"logits/rejected": -4.650245189666748, |
|
"logps/chosen": -598.9107055664062, |
|
"logps/rejected": -387.4427795410156, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08027671277523041, |
|
"rewards/margins": 0.5926394462585449, |
|
"rewards/rejected": -0.6729162931442261, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.823049741777657e-07, |
|
"logits/chosen": -4.956867694854736, |
|
"logits/rejected": -4.863291263580322, |
|
"logps/chosen": -556.4575805664062, |
|
"logps/rejected": -470.60906982421875, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.17006051540374756, |
|
"rewards/margins": 0.27765610814094543, |
|
"rewards/rejected": -0.4477166533470154, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.8094590921446045e-07, |
|
"logits/chosen": -5.26237678527832, |
|
"logits/rejected": -4.90781831741333, |
|
"logps/chosen": -585.8365478515625, |
|
"logps/rejected": -457.3870544433594, |
|
"loss": 0.6522, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08809679746627808, |
|
"rewards/margins": 0.2951076626777649, |
|
"rewards/rejected": -0.38320446014404297, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.795868442511552e-07, |
|
"logits/chosen": -5.097253322601318, |
|
"logits/rejected": -4.8938751220703125, |
|
"logps/chosen": -553.80029296875, |
|
"logps/rejected": -450.6329040527344, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13567152619361877, |
|
"rewards/margins": 0.40010422468185425, |
|
"rewards/rejected": -0.5357757806777954, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.782277792878499e-07, |
|
"logits/chosen": -5.1099653244018555, |
|
"logits/rejected": -4.922682762145996, |
|
"logps/chosen": -599.4254150390625, |
|
"logps/rejected": -531.6323852539062, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05410848930478096, |
|
"rewards/margins": 0.5033014416694641, |
|
"rewards/rejected": -0.44919291138648987, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.768687143245447e-07, |
|
"logits/chosen": -4.754248142242432, |
|
"logits/rejected": -4.799590110778809, |
|
"logps/chosen": -675.2301025390625, |
|
"logps/rejected": -487.0110778808594, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.06325565278530121, |
|
"rewards/margins": 0.5369106531143188, |
|
"rewards/rejected": -0.47365492582321167, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.755096493612395e-07, |
|
"logits/chosen": -4.940129280090332, |
|
"logits/rejected": -4.485627174377441, |
|
"logps/chosen": -634.7139892578125, |
|
"logps/rejected": -507.569580078125, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.032657913863658905, |
|
"rewards/margins": 0.32800525426864624, |
|
"rewards/rejected": -0.36066314578056335, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.7415058439793417e-07, |
|
"logits/chosen": -5.1175336837768555, |
|
"logits/rejected": -4.806564807891846, |
|
"logps/chosen": -612.5916748046875, |
|
"logps/rejected": -467.4063415527344, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.1026410236954689, |
|
"rewards/margins": 0.5207311511039734, |
|
"rewards/rejected": -0.41809016466140747, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.7279151943462896e-07, |
|
"logits/chosen": -5.094006538391113, |
|
"logits/rejected": -4.8595170974731445, |
|
"logps/chosen": -559.3692626953125, |
|
"logps/rejected": -455.74493408203125, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08821893483400345, |
|
"rewards/margins": 0.31291455030441284, |
|
"rewards/rejected": -0.40113353729248047, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.714324544713237e-07, |
|
"logits/chosen": -5.375515460968018, |
|
"logits/rejected": -4.94734001159668, |
|
"logps/chosen": -541.21826171875, |
|
"logps/rejected": -456.67144775390625, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10544973611831665, |
|
"rewards/margins": 0.43963623046875, |
|
"rewards/rejected": -0.5450860261917114, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.700733895080185e-07, |
|
"logits/chosen": -4.819343566894531, |
|
"logits/rejected": -4.394247055053711, |
|
"logps/chosen": -607.6382446289062, |
|
"logps/rejected": -410.609619140625, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.15860453248023987, |
|
"rewards/margins": 0.6440173983573914, |
|
"rewards/rejected": -0.48541292548179626, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.687143245447132e-07, |
|
"logits/chosen": -5.074875354766846, |
|
"logits/rejected": -4.492025375366211, |
|
"logps/chosen": -566.6473388671875, |
|
"logps/rejected": -414.416015625, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.01561739295721054, |
|
"rewards/margins": 0.39994779229164124, |
|
"rewards/rejected": -0.4155651926994324, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.6735525958140795e-07, |
|
"logits/chosen": -4.827418327331543, |
|
"logits/rejected": -4.503185749053955, |
|
"logps/chosen": -538.5560913085938, |
|
"logps/rejected": -497.1845703125, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04222496598958969, |
|
"rewards/margins": 0.3792383074760437, |
|
"rewards/rejected": -0.4214633107185364, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.6599619461810274e-07, |
|
"logits/chosen": -4.778945446014404, |
|
"logits/rejected": -5.135014533996582, |
|
"logps/chosen": -618.1221923828125, |
|
"logps/rejected": -528.7874755859375, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.09977498650550842, |
|
"rewards/margins": 0.4105672836303711, |
|
"rewards/rejected": -0.3107922375202179, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.6463712965479747e-07, |
|
"logits/chosen": -4.888424396514893, |
|
"logits/rejected": -4.577418327331543, |
|
"logps/chosen": -577.0140991210938, |
|
"logps/rejected": -447.93212890625, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.053088851273059845, |
|
"rewards/margins": 0.49817705154418945, |
|
"rewards/rejected": -0.4450882077217102, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.6327806469149226e-07, |
|
"logits/chosen": -4.80429744720459, |
|
"logits/rejected": -4.898682594299316, |
|
"logps/chosen": -498.6844787597656, |
|
"logps/rejected": -423.1040954589844, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09284614026546478, |
|
"rewards/margins": 0.33597826957702637, |
|
"rewards/rejected": -0.4288244843482971, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.61918999728187e-07, |
|
"logits/chosen": -4.932044506072998, |
|
"logits/rejected": -4.503706455230713, |
|
"logps/chosen": -721.5752563476562, |
|
"logps/rejected": -512.78369140625, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.21977362036705017, |
|
"rewards/margins": 0.5384609699249268, |
|
"rewards/rejected": -0.3186873197555542, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.6055993476488173e-07, |
|
"logits/chosen": -4.647530555725098, |
|
"logits/rejected": -4.263665199279785, |
|
"logps/chosen": -597.6514892578125, |
|
"logps/rejected": -423.85260009765625, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.012402093037962914, |
|
"rewards/margins": 0.41996484994888306, |
|
"rewards/rejected": -0.4323669970035553, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.5920086980157646e-07, |
|
"logits/chosen": -5.303116321563721, |
|
"logits/rejected": -4.703645706176758, |
|
"logps/chosen": -552.6138916015625, |
|
"logps/rejected": -469.75811767578125, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.12344861030578613, |
|
"rewards/margins": 0.4382708966732025, |
|
"rewards/rejected": -0.314822256565094, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.5784180483827125e-07, |
|
"logits/chosen": -5.118283748626709, |
|
"logits/rejected": -4.543520927429199, |
|
"logps/chosen": -555.630126953125, |
|
"logps/rejected": -487.35498046875, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.06331979483366013, |
|
"rewards/margins": 0.3617437481880188, |
|
"rewards/rejected": -0.2984239459037781, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.5648273987496604e-07, |
|
"logits/chosen": -5.459466457366943, |
|
"logits/rejected": -4.4333906173706055, |
|
"logps/chosen": -588.8739624023438, |
|
"logps/rejected": -399.8832702636719, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09981145709753036, |
|
"rewards/margins": 0.4524534344673157, |
|
"rewards/rejected": -0.35264191031455994, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.5512367491166077e-07, |
|
"logits/chosen": -4.973242282867432, |
|
"logits/rejected": -4.562270164489746, |
|
"logps/chosen": -551.7763671875, |
|
"logps/rejected": -402.52655029296875, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.16520874202251434, |
|
"rewards/margins": 0.5079749822616577, |
|
"rewards/rejected": -0.3427662253379822, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.537646099483555e-07, |
|
"logits/chosen": -4.993309020996094, |
|
"logits/rejected": -5.029820442199707, |
|
"logps/chosen": -520.9195556640625, |
|
"logps/rejected": -385.7127380371094, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0681810975074768, |
|
"rewards/margins": 0.5013130903244019, |
|
"rewards/rejected": -0.4331319332122803, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.5240554498505024e-07, |
|
"logits/chosen": -4.839926242828369, |
|
"logits/rejected": -4.778874397277832, |
|
"logps/chosen": -558.1290283203125, |
|
"logps/rejected": -384.0828552246094, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.092664934694767, |
|
"rewards/margins": 0.5813394784927368, |
|
"rewards/rejected": -0.48867446184158325, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.5104648002174503e-07, |
|
"logits/chosen": -5.055529594421387, |
|
"logits/rejected": -4.888724327087402, |
|
"logps/chosen": -568.4841918945312, |
|
"logps/rejected": -433.59930419921875, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.021698763594031334, |
|
"rewards/margins": 0.37912648916244507, |
|
"rewards/rejected": -0.40082526206970215, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.496874150584398e-07, |
|
"logits/chosen": -5.203982353210449, |
|
"logits/rejected": -4.94318151473999, |
|
"logps/chosen": -483.5262756347656, |
|
"logps/rejected": -391.9451904296875, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.048337481915950775, |
|
"rewards/margins": 0.40044230222702026, |
|
"rewards/rejected": -0.44877976179122925, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.483283500951345e-07, |
|
"logits/chosen": -4.813787460327148, |
|
"logits/rejected": -4.484375953674316, |
|
"logps/chosen": -717.718017578125, |
|
"logps/rejected": -509.1785583496094, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.19200703501701355, |
|
"rewards/margins": 0.5683521032333374, |
|
"rewards/rejected": -0.37634506821632385, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.469692851318293e-07, |
|
"logits/chosen": -5.500855445861816, |
|
"logits/rejected": -4.6635870933532715, |
|
"logps/chosen": -628.2833251953125, |
|
"logps/rejected": -458.0882873535156, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07916983217000961, |
|
"rewards/margins": 0.5226668119430542, |
|
"rewards/rejected": -0.4434970021247864, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.45610220168524e-07, |
|
"logits/chosen": -4.949522495269775, |
|
"logits/rejected": -4.809584140777588, |
|
"logps/chosen": -522.9071044921875, |
|
"logps/rejected": -479.11700439453125, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0525810606777668, |
|
"rewards/margins": 0.24236159026622772, |
|
"rewards/rejected": -0.2949426770210266, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.442511552052188e-07, |
|
"logits/chosen": -4.881261825561523, |
|
"logits/rejected": -5.100898265838623, |
|
"logps/chosen": -484.083984375, |
|
"logps/rejected": -514.7659912109375, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.06116398051381111, |
|
"rewards/margins": 0.3284783959388733, |
|
"rewards/rejected": -0.2673143744468689, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.4289209024191354e-07, |
|
"logits/chosen": -5.381341457366943, |
|
"logits/rejected": -5.32161808013916, |
|
"logps/chosen": -575.6818237304688, |
|
"logps/rejected": -414.44921875, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06358791887760162, |
|
"rewards/margins": 0.472216933965683, |
|
"rewards/rejected": -0.5358048677444458, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.415330252786083e-07, |
|
"logits/chosen": -5.047477722167969, |
|
"logits/rejected": -4.687131404876709, |
|
"logps/chosen": -623.3069458007812, |
|
"logps/rejected": -538.3536376953125, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.029577601701021194, |
|
"rewards/margins": 0.3733888864517212, |
|
"rewards/rejected": -0.3438113033771515, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.4017396031530306e-07, |
|
"logits/chosen": -5.100833415985107, |
|
"logits/rejected": -5.073991298675537, |
|
"logps/chosen": -711.748046875, |
|
"logps/rejected": -509.7862243652344, |
|
"loss": 0.5531, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.15278328955173492, |
|
"rewards/margins": 0.5438005924224854, |
|
"rewards/rejected": -0.3910173773765564, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.388148953519978e-07, |
|
"logits/chosen": -4.926814079284668, |
|
"logits/rejected": -4.619027614593506, |
|
"logps/chosen": -612.6145629882812, |
|
"logps/rejected": -448.18621826171875, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.059671830385923386, |
|
"rewards/margins": 0.3946450352668762, |
|
"rewards/rejected": -0.3349732458591461, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.374558303886926e-07, |
|
"logits/chosen": -5.215226173400879, |
|
"logits/rejected": -4.522857189178467, |
|
"logps/chosen": -645.0291748046875, |
|
"logps/rejected": -458.01458740234375, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.13360649347305298, |
|
"rewards/margins": 0.6589222550392151, |
|
"rewards/rejected": -0.5253156423568726, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.360967654253873e-07, |
|
"logits/chosen": -5.343368053436279, |
|
"logits/rejected": -4.6446380615234375, |
|
"logps/chosen": -642.7989501953125, |
|
"logps/rejected": -497.9615173339844, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.11626561731100082, |
|
"rewards/margins": 0.4873886704444885, |
|
"rewards/rejected": -0.3711230456829071, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.3473770046208206e-07, |
|
"logits/chosen": -5.116816520690918, |
|
"logits/rejected": -4.710324287414551, |
|
"logps/chosen": -493.3133850097656, |
|
"logps/rejected": -378.7680969238281, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07064076513051987, |
|
"rewards/margins": 0.5043250322341919, |
|
"rewards/rejected": -0.5749658346176147, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.3337863549877684e-07, |
|
"logits/chosen": -5.086273193359375, |
|
"logits/rejected": -4.876503944396973, |
|
"logps/chosen": -597.3706665039062, |
|
"logps/rejected": -440.29522705078125, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.021745752543210983, |
|
"rewards/margins": 0.42672309279441833, |
|
"rewards/rejected": -0.4484688639640808, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.320195705354716e-07, |
|
"logits/chosen": -5.1945905685424805, |
|
"logits/rejected": -4.673881530761719, |
|
"logps/chosen": -500.9388732910156, |
|
"logps/rejected": -457.3580017089844, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20997877418994904, |
|
"rewards/margins": 0.44773179292678833, |
|
"rewards/rejected": -0.6577105522155762, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.3066050557216636e-07, |
|
"logits/chosen": -5.183767795562744, |
|
"logits/rejected": -4.868539333343506, |
|
"logps/chosen": -676.839111328125, |
|
"logps/rejected": -534.795166015625, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.12110471725463867, |
|
"rewards/margins": 0.6017159819602966, |
|
"rewards/rejected": -0.48061123490333557, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.293014406088611e-07, |
|
"logits/chosen": -4.892951965332031, |
|
"logits/rejected": -4.403754711151123, |
|
"logps/chosen": -601.4327392578125, |
|
"logps/rejected": -494.6481018066406, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09234030544757843, |
|
"rewards/margins": 0.4433468282222748, |
|
"rewards/rejected": -0.5356870889663696, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.2794237564555583e-07, |
|
"logits/chosen": -5.250612258911133, |
|
"logits/rejected": -5.025344371795654, |
|
"logps/chosen": -717.2879638671875, |
|
"logps/rejected": -576.2543334960938, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.09430352598428726, |
|
"rewards/margins": 0.5691030025482178, |
|
"rewards/rejected": -0.4747994542121887, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.2658331068225057e-07, |
|
"logits/chosen": -4.8504767417907715, |
|
"logits/rejected": -5.307365417480469, |
|
"logps/chosen": -681.6648559570312, |
|
"logps/rejected": -590.7325439453125, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.08806881308555603, |
|
"rewards/margins": 0.5291630029678345, |
|
"rewards/rejected": -0.44109421968460083, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.2522424571894536e-07, |
|
"logits/chosen": -5.252363204956055, |
|
"logits/rejected": -4.908774375915527, |
|
"logps/chosen": -534.4985961914062, |
|
"logps/rejected": -413.527099609375, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09185522794723511, |
|
"rewards/margins": 0.6131922602653503, |
|
"rewards/rejected": -0.52133709192276, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.2386518075564014e-07, |
|
"logits/chosen": -4.975606441497803, |
|
"logits/rejected": -5.050392150878906, |
|
"logps/chosen": -532.093017578125, |
|
"logps/rejected": -474.6080017089844, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.04556658864021301, |
|
"rewards/margins": 0.5064549446105957, |
|
"rewards/rejected": -0.4608883261680603, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.225061157923348e-07, |
|
"logits/chosen": -4.692256450653076, |
|
"logits/rejected": -4.6152777671813965, |
|
"logps/chosen": -618.080322265625, |
|
"logps/rejected": -493.81591796875, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.06875093281269073, |
|
"rewards/margins": 0.5014289021492004, |
|
"rewards/rejected": -0.4326779246330261, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.211470508290296e-07, |
|
"logits/chosen": -5.330389499664307, |
|
"logits/rejected": -4.813447952270508, |
|
"logps/chosen": -640.2445068359375, |
|
"logps/rejected": -463.66876220703125, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.01199892908334732, |
|
"rewards/margins": 0.4153195917606354, |
|
"rewards/rejected": -0.4273185133934021, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.1978798586572435e-07, |
|
"logits/chosen": -5.160652160644531, |
|
"logits/rejected": -5.030102252960205, |
|
"logps/chosen": -672.4517822265625, |
|
"logps/rejected": -530.814697265625, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.07595814764499664, |
|
"rewards/margins": 0.6065148115158081, |
|
"rewards/rejected": -0.5305566787719727, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.1842892090241913e-07, |
|
"logits/chosen": -4.820624828338623, |
|
"logits/rejected": -4.647487640380859, |
|
"logps/chosen": -591.4603271484375, |
|
"logps/rejected": -454.6068420410156, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07786037027835846, |
|
"rewards/margins": 0.39148765802383423, |
|
"rewards/rejected": -0.4693480134010315, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.1706985593911387e-07, |
|
"logits/chosen": -5.017200469970703, |
|
"logits/rejected": -4.60324239730835, |
|
"logps/chosen": -649.9214477539062, |
|
"logps/rejected": -520.6214599609375, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0727464109659195, |
|
"rewards/margins": 0.34763583540916443, |
|
"rewards/rejected": -0.27488940954208374, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.157107909758086e-07, |
|
"logits/chosen": -4.8267669677734375, |
|
"logits/rejected": -4.830237865447998, |
|
"logps/chosen": -747.1302490234375, |
|
"logps/rejected": -542.8424072265625, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.07756751030683517, |
|
"rewards/margins": 0.639870822429657, |
|
"rewards/rejected": -0.5623033046722412, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.143517260125034e-07, |
|
"logits/chosen": -4.978399276733398, |
|
"logits/rejected": -5.0023088455200195, |
|
"logps/chosen": -533.4491577148438, |
|
"logps/rejected": -406.94329833984375, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08706123381853104, |
|
"rewards/margins": 0.35440611839294434, |
|
"rewards/rejected": -0.44146737456321716, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.129926610491981e-07, |
|
"logits/chosen": -5.211610794067383, |
|
"logits/rejected": -4.5253987312316895, |
|
"logps/chosen": -648.8171997070312, |
|
"logps/rejected": -461.8702697753906, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07190613448619843, |
|
"rewards/margins": 0.4674451947212219, |
|
"rewards/rejected": -0.3955390751361847, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.116335960858929e-07, |
|
"logits/chosen": -5.574213981628418, |
|
"logits/rejected": -4.852818012237549, |
|
"logps/chosen": -693.5277099609375, |
|
"logps/rejected": -520.2194213867188, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.1971389502286911, |
|
"rewards/margins": 0.5434707403182983, |
|
"rewards/rejected": -0.34633177518844604, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.1027453112258765e-07, |
|
"logits/chosen": -5.164004325866699, |
|
"logits/rejected": -4.828025817871094, |
|
"logps/chosen": -518.7059326171875, |
|
"logps/rejected": -396.88983154296875, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.023333771154284477, |
|
"rewards/margins": 0.512725830078125, |
|
"rewards/rejected": -0.48939210176467896, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.089154661592824e-07, |
|
"logits/chosen": -4.937495231628418, |
|
"logits/rejected": -4.612677574157715, |
|
"logps/chosen": -582.4169311523438, |
|
"logps/rejected": -454.89202880859375, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.04642937704920769, |
|
"rewards/margins": 0.36292964220046997, |
|
"rewards/rejected": -0.3165002465248108, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.0755640119597717e-07, |
|
"logits/chosen": -4.931238174438477, |
|
"logits/rejected": -4.779752254486084, |
|
"logps/chosen": -601.8740234375, |
|
"logps/rejected": -576.3049926757812, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.06317490339279175, |
|
"rewards/margins": 0.41317835450172424, |
|
"rewards/rejected": -0.3500033915042877, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.061973362326719e-07, |
|
"logits/chosen": -5.196651458740234, |
|
"logits/rejected": -4.927478313446045, |
|
"logps/chosen": -576.1328125, |
|
"logps/rejected": -465.5516662597656, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0703674778342247, |
|
"rewards/margins": 0.5130087733268738, |
|
"rewards/rejected": -0.44264134764671326, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.048382712693667e-07, |
|
"logits/chosen": -4.793614864349365, |
|
"logits/rejected": -4.842940807342529, |
|
"logps/chosen": -599.1259765625, |
|
"logps/rejected": -518.8648071289062, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.038860831409692764, |
|
"rewards/margins": 0.4439505934715271, |
|
"rewards/rejected": -0.40508976578712463, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.034792063060614e-07, |
|
"logits/chosen": -5.246109962463379, |
|
"logits/rejected": -4.256648063659668, |
|
"logps/chosen": -623.5345458984375, |
|
"logps/rejected": -469.36260986328125, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0005898177623748779, |
|
"rewards/margins": 0.333055317401886, |
|
"rewards/rejected": -0.3324654698371887, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.0212014134275616e-07, |
|
"logits/chosen": -4.698599815368652, |
|
"logits/rejected": -4.9277753829956055, |
|
"logps/chosen": -537.7254028320312, |
|
"logps/rejected": -432.79254150390625, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03875481337308884, |
|
"rewards/margins": 0.4677085876464844, |
|
"rewards/rejected": -0.4289538264274597, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.007610763794509e-07, |
|
"logits/chosen": -5.468576431274414, |
|
"logits/rejected": -5.0482497215271, |
|
"logps/chosen": -577.8678588867188, |
|
"logps/rejected": -448.755859375, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.03977590426802635, |
|
"rewards/margins": 0.40795421600341797, |
|
"rewards/rejected": -0.3681783080101013, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.994020114161457e-07, |
|
"logits/chosen": -5.248682975769043, |
|
"logits/rejected": -4.796721935272217, |
|
"logps/chosen": -526.3560791015625, |
|
"logps/rejected": -412.30401611328125, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.009044056758284569, |
|
"rewards/margins": 0.3918320834636688, |
|
"rewards/rejected": -0.4008761942386627, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.9804294645284047e-07, |
|
"logits/chosen": -5.161639213562012, |
|
"logits/rejected": -5.1383490562438965, |
|
"logps/chosen": -549.3775634765625, |
|
"logps/rejected": -435.27947998046875, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.16379335522651672, |
|
"rewards/margins": 0.5625983476638794, |
|
"rewards/rejected": -0.39880499243736267, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9668388148953515e-07, |
|
"logits/chosen": -4.992415428161621, |
|
"logits/rejected": -4.46937894821167, |
|
"logps/chosen": -583.8298950195312, |
|
"logps/rejected": -446.05267333984375, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.13408790528774261, |
|
"rewards/margins": 0.4382530152797699, |
|
"rewards/rejected": -0.3041651248931885, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9532481652622994e-07, |
|
"logits/chosen": -5.193965911865234, |
|
"logits/rejected": -5.035892963409424, |
|
"logps/chosen": -526.388427734375, |
|
"logps/rejected": -451.8531188964844, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.008205227553844452, |
|
"rewards/margins": 0.4235307276248932, |
|
"rewards/rejected": -0.4317359924316406, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9396575156292467e-07, |
|
"logits/chosen": -4.930581092834473, |
|
"logits/rejected": -4.750922203063965, |
|
"logps/chosen": -611.9539794921875, |
|
"logps/rejected": -419.3814392089844, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.06710588932037354, |
|
"rewards/margins": 0.4377163350582123, |
|
"rewards/rejected": -0.37061044573783875, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9260668659961946e-07, |
|
"logits/chosen": -5.439746856689453, |
|
"logits/rejected": -5.209362983703613, |
|
"logps/chosen": -657.6182861328125, |
|
"logps/rejected": -513.0152587890625, |
|
"loss": 0.5919, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.03497297689318657, |
|
"rewards/margins": 0.4590676724910736, |
|
"rewards/rejected": -0.42409467697143555, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.9124762163631425e-07, |
|
"logits/chosen": -4.818249702453613, |
|
"logits/rejected": -4.768403053283691, |
|
"logps/chosen": -653.0007934570312, |
|
"logps/rejected": -423.8858947753906, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.21503737568855286, |
|
"rewards/margins": 0.66923987865448, |
|
"rewards/rejected": -0.4542025029659271, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.8988855667300893e-07, |
|
"logits/chosen": -5.165972709655762, |
|
"logits/rejected": -4.13810920715332, |
|
"logps/chosen": -563.0274047851562, |
|
"logps/rejected": -381.11297607421875, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.02564326301217079, |
|
"rewards/margins": 0.4730495810508728, |
|
"rewards/rejected": -0.49869289994239807, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.885294917097037e-07, |
|
"logits/chosen": -5.053162574768066, |
|
"logits/rejected": -4.775099277496338, |
|
"logps/chosen": -672.1310424804688, |
|
"logps/rejected": -519.7974243164062, |
|
"loss": 0.5661, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14415240287780762, |
|
"rewards/margins": 0.4557925760746002, |
|
"rewards/rejected": -0.3116401433944702, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.8717042674639845e-07, |
|
"logits/chosen": -5.262818336486816, |
|
"logits/rejected": -5.100151062011719, |
|
"logps/chosen": -615.7294921875, |
|
"logps/rejected": -467.1444396972656, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.004898411221802235, |
|
"rewards/margins": 0.5748053789138794, |
|
"rewards/rejected": -0.569907009601593, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.8581136178309324e-07, |
|
"logits/chosen": -5.110268592834473, |
|
"logits/rejected": -4.89237117767334, |
|
"logps/chosen": -627.622802734375, |
|
"logps/rejected": -493.29803466796875, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06722798198461533, |
|
"rewards/margins": 0.4168556332588196, |
|
"rewards/rejected": -0.3496275842189789, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.84452296819788e-07, |
|
"logits/chosen": -5.134549617767334, |
|
"logits/rejected": -5.063877582550049, |
|
"logps/chosen": -519.733642578125, |
|
"logps/rejected": -483.128662109375, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04302790015935898, |
|
"rewards/margins": 0.2304501086473465, |
|
"rewards/rejected": -0.27347803115844727, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.830932318564827e-07, |
|
"logits/chosen": -5.221256256103516, |
|
"logits/rejected": -4.87565803527832, |
|
"logps/chosen": -588.2337646484375, |
|
"logps/rejected": -459.95770263671875, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.08087799698114395, |
|
"rewards/margins": 0.4586857259273529, |
|
"rewards/rejected": -0.37780776619911194, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.817341668931775e-07, |
|
"logits/chosen": -5.366186618804932, |
|
"logits/rejected": -4.619801044464111, |
|
"logps/chosen": -584.1773681640625, |
|
"logps/rejected": -424.52178955078125, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.015188613906502724, |
|
"rewards/margins": 0.5031098127365112, |
|
"rewards/rejected": -0.48792123794555664, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.8037510192987223e-07, |
|
"logits/chosen": -5.331225395202637, |
|
"logits/rejected": -4.693115711212158, |
|
"logps/chosen": -568.12939453125, |
|
"logps/rejected": -415.4103088378906, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09138574451208115, |
|
"rewards/margins": 0.46304386854171753, |
|
"rewards/rejected": -0.3716581463813782, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.79016036966567e-07, |
|
"logits/chosen": -5.22902774810791, |
|
"logits/rejected": -4.565291881561279, |
|
"logps/chosen": -663.4849243164062, |
|
"logps/rejected": -516.2536010742188, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.14414827525615692, |
|
"rewards/margins": 0.7415136098861694, |
|
"rewards/rejected": -0.5973652601242065, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.7765697200326175e-07, |
|
"logits/chosen": -5.32174825668335, |
|
"logits/rejected": -5.004895210266113, |
|
"logps/chosen": -652.4281005859375, |
|
"logps/rejected": -464.1463928222656, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.08608388900756836, |
|
"rewards/margins": 0.5602224469184875, |
|
"rewards/rejected": -0.47413843870162964, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.762979070399565e-07, |
|
"logits/chosen": -4.805876731872559, |
|
"logits/rejected": -4.962039947509766, |
|
"logps/chosen": -514.503662109375, |
|
"logps/rejected": -440.2759704589844, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10992634296417236, |
|
"rewards/margins": 0.4109458327293396, |
|
"rewards/rejected": -0.5208722352981567, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.749388420766512e-07, |
|
"logits/chosen": -5.237261772155762, |
|
"logits/rejected": -4.973998546600342, |
|
"logps/chosen": -624.6343383789062, |
|
"logps/rejected": -472.2704162597656, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.25611644983291626, |
|
"rewards/margins": 0.7832332849502563, |
|
"rewards/rejected": -0.5271168947219849, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.73579777113346e-07, |
|
"logits/chosen": -5.345922946929932, |
|
"logits/rejected": -5.159511089324951, |
|
"logps/chosen": -544.3359375, |
|
"logps/rejected": -404.75738525390625, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.018667107447981834, |
|
"rewards/margins": 0.5646753907203674, |
|
"rewards/rejected": -0.546008288860321, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.722207121500408e-07, |
|
"logits/chosen": -5.2390313148498535, |
|
"logits/rejected": -5.117612838745117, |
|
"logps/chosen": -517.3175048828125, |
|
"logps/rejected": -434.38482666015625, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.030095791444182396, |
|
"rewards/margins": 0.4572904706001282, |
|
"rewards/rejected": -0.42719465494155884, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.708616471867355e-07, |
|
"logits/chosen": -4.929625988006592, |
|
"logits/rejected": -4.4647040367126465, |
|
"logps/chosen": -522.1158447265625, |
|
"logps/rejected": -373.1676330566406, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08331742882728577, |
|
"rewards/margins": 0.4487723410129547, |
|
"rewards/rejected": -0.5320898294448853, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.6950258222343027e-07, |
|
"logits/chosen": -5.052488327026367, |
|
"logits/rejected": -4.925723552703857, |
|
"logps/chosen": -523.0936889648438, |
|
"logps/rejected": -422.98455810546875, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06005431339144707, |
|
"rewards/margins": 0.5165280103683472, |
|
"rewards/rejected": -0.5765823125839233, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.68143517260125e-07, |
|
"logits/chosen": -5.030156135559082, |
|
"logits/rejected": -5.466166973114014, |
|
"logps/chosen": -566.7584228515625, |
|
"logps/rejected": -479.91705322265625, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.009148592129349709, |
|
"rewards/margins": 0.3955579996109009, |
|
"rewards/rejected": -0.4047066271305084, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.667844522968198e-07, |
|
"logits/chosen": -5.068789482116699, |
|
"logits/rejected": -4.769230842590332, |
|
"logps/chosen": -615.9959716796875, |
|
"logps/rejected": -434.10491943359375, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.1012858897447586, |
|
"rewards/margins": 0.6581977605819702, |
|
"rewards/rejected": -0.5569119453430176, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.654253873335146e-07, |
|
"logits/chosen": -5.509184837341309, |
|
"logits/rejected": -4.679028511047363, |
|
"logps/chosen": -683.6079711914062, |
|
"logps/rejected": -507.5233459472656, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.13926604390144348, |
|
"rewards/margins": 0.7854963541030884, |
|
"rewards/rejected": -0.6462303400039673, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.6406632237020926e-07, |
|
"logits/chosen": -4.794188022613525, |
|
"logits/rejected": -5.179261684417725, |
|
"logps/chosen": -580.4613647460938, |
|
"logps/rejected": -535.4896240234375, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0010952949523925781, |
|
"rewards/margins": 0.41253209114074707, |
|
"rewards/rejected": -0.4114367365837097, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.6270725740690404e-07, |
|
"logits/chosen": -4.888454437255859, |
|
"logits/rejected": -4.942262172698975, |
|
"logps/chosen": -547.9248046875, |
|
"logps/rejected": -410.5484924316406, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.097802072763443, |
|
"rewards/margins": 0.5982667207717896, |
|
"rewards/rejected": -0.500464677810669, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.613481924435988e-07, |
|
"logits/chosen": -5.1013078689575195, |
|
"logits/rejected": -4.519289493560791, |
|
"logps/chosen": -571.65966796875, |
|
"logps/rejected": -389.85186767578125, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03590717911720276, |
|
"rewards/margins": 0.5961285829544067, |
|
"rewards/rejected": -0.5602214336395264, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.5998912748029357e-07, |
|
"logits/chosen": -5.285208225250244, |
|
"logits/rejected": -4.495665550231934, |
|
"logps/chosen": -663.1292114257812, |
|
"logps/rejected": -532.8426513671875, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.021974461153149605, |
|
"rewards/margins": 0.43202877044677734, |
|
"rewards/rejected": -0.41005435585975647, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.586300625169883e-07, |
|
"logits/chosen": -5.222224235534668, |
|
"logits/rejected": -4.825186729431152, |
|
"logps/chosen": -573.8010864257812, |
|
"logps/rejected": -416.4983825683594, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.11975344270467758, |
|
"rewards/margins": 0.6545025110244751, |
|
"rewards/rejected": -0.5347490906715393, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.5727099755368303e-07, |
|
"logits/chosen": -5.289813041687012, |
|
"logits/rejected": -4.617688179016113, |
|
"logps/chosen": -560.8759155273438, |
|
"logps/rejected": -427.9178771972656, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.022764435037970543, |
|
"rewards/margins": 0.5342342257499695, |
|
"rewards/rejected": -0.5569986701011658, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.559119325903778e-07, |
|
"logits/chosen": -4.576651096343994, |
|
"logits/rejected": -4.732336521148682, |
|
"logps/chosen": -530.8504638671875, |
|
"logps/rejected": -465.09637451171875, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.021763667464256287, |
|
"rewards/margins": 0.4931577146053314, |
|
"rewards/rejected": -0.47139400243759155, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.5455286762707256e-07, |
|
"logits/chosen": -5.359576225280762, |
|
"logits/rejected": -4.76473331451416, |
|
"logps/chosen": -673.8704833984375, |
|
"logps/rejected": -438.1480407714844, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.14796891808509827, |
|
"rewards/margins": 0.6954831480979919, |
|
"rewards/rejected": -0.5475142598152161, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -5.148571968078613, |
|
"eval_logits/rejected": -4.8584794998168945, |
|
"eval_logps/chosen": -588.2166137695312, |
|
"eval_logps/rejected": -447.5611572265625, |
|
"eval_loss": 0.5739557147026062, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.005295886192470789, |
|
"eval_rewards/margins": 0.5073610544204712, |
|
"eval_rewards/rejected": -0.5020651817321777, |
|
"eval_runtime": 108.0014, |
|
"eval_samples_per_second": 18.518, |
|
"eval_steps_per_second": 1.157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.5319380266376734e-07, |
|
"logits/chosen": -4.752730369567871, |
|
"logits/rejected": -4.137943267822266, |
|
"logps/chosen": -574.3436279296875, |
|
"logps/rejected": -465.2110290527344, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06843885034322739, |
|
"rewards/margins": 0.35540246963500977, |
|
"rewards/rejected": -0.42384132742881775, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.518347377004621e-07, |
|
"logits/chosen": -4.942543029785156, |
|
"logits/rejected": -4.652976036071777, |
|
"logps/chosen": -639.5670166015625, |
|
"logps/rejected": -511.5428161621094, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.008806949481368065, |
|
"rewards/margins": 0.37993431091308594, |
|
"rewards/rejected": -0.38874128460884094, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.504756727371568e-07, |
|
"logits/chosen": -5.181532859802246, |
|
"logits/rejected": -5.063170433044434, |
|
"logps/chosen": -585.638671875, |
|
"logps/rejected": -500.2425231933594, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03146061673760414, |
|
"rewards/margins": 0.488603413105011, |
|
"rewards/rejected": -0.45714277029037476, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4911660777385155e-07, |
|
"logits/chosen": -5.165438652038574, |
|
"logits/rejected": -4.691048622131348, |
|
"logps/chosen": -624.2686767578125, |
|
"logps/rejected": -427.2496032714844, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08551664650440216, |
|
"rewards/margins": 0.6588252186775208, |
|
"rewards/rejected": -0.5733085870742798, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4775754281054634e-07, |
|
"logits/chosen": -5.296277046203613, |
|
"logits/rejected": -5.355370044708252, |
|
"logps/chosen": -531.8514404296875, |
|
"logps/rejected": -429.61737060546875, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.005439861677587032, |
|
"rewards/margins": 0.6196398735046387, |
|
"rewards/rejected": -0.6141999959945679, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4639847784724107e-07, |
|
"logits/chosen": -4.849526405334473, |
|
"logits/rejected": -4.464692115783691, |
|
"logps/chosen": -627.7634887695312, |
|
"logps/rejected": -506.77215576171875, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.09366675466299057, |
|
"rewards/margins": 0.5064713358879089, |
|
"rewards/rejected": -0.4128045439720154, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4503941288393586e-07, |
|
"logits/chosen": -4.814553260803223, |
|
"logits/rejected": -4.442749500274658, |
|
"logps/chosen": -575.5558471679688, |
|
"logps/rejected": -414.0263671875, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.10579456388950348, |
|
"rewards/margins": 0.7066904306411743, |
|
"rewards/rejected": -0.6008957624435425, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.436803479206306e-07, |
|
"logits/chosen": -5.186957359313965, |
|
"logits/rejected": -4.751301288604736, |
|
"logps/chosen": -653.7766723632812, |
|
"logps/rejected": -476.66162109375, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07173504680395126, |
|
"rewards/margins": 0.36066287755966187, |
|
"rewards/rejected": -0.43239790201187134, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.423212829573253e-07, |
|
"logits/chosen": -5.053447246551514, |
|
"logits/rejected": -4.918185234069824, |
|
"logps/chosen": -549.3553466796875, |
|
"logps/rejected": -429.42919921875, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.15459008514881134, |
|
"rewards/margins": 0.37957876920700073, |
|
"rewards/rejected": -0.5341688394546509, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.409622179940201e-07, |
|
"logits/chosen": -4.912445545196533, |
|
"logits/rejected": -4.6925859451293945, |
|
"logps/chosen": -432.07452392578125, |
|
"logps/rejected": -340.0139465332031, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029613960534334183, |
|
"rewards/margins": 0.48295989632606506, |
|
"rewards/rejected": -0.512573778629303, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3960315303071485e-07, |
|
"logits/chosen": -5.279432773590088, |
|
"logits/rejected": -4.552325248718262, |
|
"logps/chosen": -553.3790283203125, |
|
"logps/rejected": -460.07427978515625, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.06708192080259323, |
|
"rewards/margins": 0.4622860550880432, |
|
"rewards/rejected": -0.3952041566371918, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.382440880674096e-07, |
|
"logits/chosen": -5.419906139373779, |
|
"logits/rejected": -5.2849860191345215, |
|
"logps/chosen": -593.1560668945312, |
|
"logps/rejected": -437.38720703125, |
|
"loss": 0.5862, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0501575842499733, |
|
"rewards/margins": 0.41556042432785034, |
|
"rewards/rejected": -0.36540284752845764, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.3688502310410434e-07, |
|
"logits/chosen": -5.2504706382751465, |
|
"logits/rejected": -4.6304168701171875, |
|
"logps/chosen": -570.8204956054688, |
|
"logps/rejected": -433.6552734375, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.12808158993721008, |
|
"rewards/margins": 0.49509382247924805, |
|
"rewards/rejected": -0.3670122027397156, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.3552595814079913e-07, |
|
"logits/chosen": -5.319705009460449, |
|
"logits/rejected": -4.581608772277832, |
|
"logps/chosen": -649.8846435546875, |
|
"logps/rejected": -481.82550048828125, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.2135562151670456, |
|
"rewards/margins": 0.6773136854171753, |
|
"rewards/rejected": -0.4637575149536133, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.341668931774939e-07, |
|
"logits/chosen": -5.048118591308594, |
|
"logits/rejected": -5.331495761871338, |
|
"logps/chosen": -596.2010498046875, |
|
"logps/rejected": -466.883544921875, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.11167912185192108, |
|
"rewards/margins": 0.675338089466095, |
|
"rewards/rejected": -0.5636589527130127, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3280782821418863e-07, |
|
"logits/chosen": -5.044338703155518, |
|
"logits/rejected": -4.720290660858154, |
|
"logps/chosen": -599.9769287109375, |
|
"logps/rejected": -418.9681091308594, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07666479051113129, |
|
"rewards/margins": 0.48825913667678833, |
|
"rewards/rejected": -0.41159430146217346, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.314487632508834e-07, |
|
"logits/chosen": -5.259824752807617, |
|
"logits/rejected": -4.929041385650635, |
|
"logps/chosen": -552.7124633789062, |
|
"logps/rejected": -404.2115478515625, |
|
"loss": 0.5555, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.050760865211486816, |
|
"rewards/margins": 0.4294905662536621, |
|
"rewards/rejected": -0.3787297308444977, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3008969828757812e-07, |
|
"logits/chosen": -4.695530414581299, |
|
"logits/rejected": -4.929333686828613, |
|
"logps/chosen": -511.2328186035156, |
|
"logps/rejected": -422.66424560546875, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.011992475017905235, |
|
"rewards/margins": 0.38791900873184204, |
|
"rewards/rejected": -0.37592652440071106, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.2873063332427288e-07, |
|
"logits/chosen": -5.204197883605957, |
|
"logits/rejected": -4.672845363616943, |
|
"logps/chosen": -676.1151123046875, |
|
"logps/rejected": -544.1356811523438, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.03945215791463852, |
|
"rewards/margins": 0.4905180037021637, |
|
"rewards/rejected": -0.4510658383369446, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.2737156836096762e-07, |
|
"logits/chosen": -4.961050987243652, |
|
"logits/rejected": -5.060594081878662, |
|
"logps/chosen": -606.8833618164062, |
|
"logps/rejected": -515.282470703125, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.14587000012397766, |
|
"rewards/margins": 0.57590252161026, |
|
"rewards/rejected": -0.43003249168395996, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.260125033976624e-07, |
|
"logits/chosen": -5.351556301116943, |
|
"logits/rejected": -4.766120910644531, |
|
"logps/chosen": -614.4051513671875, |
|
"logps/rejected": -479.23614501953125, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.12179956585168839, |
|
"rewards/margins": 0.5328482985496521, |
|
"rewards/rejected": -0.4110487401485443, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.2465343843435717e-07, |
|
"logits/chosen": -5.310830116271973, |
|
"logits/rejected": -4.890834808349609, |
|
"logps/chosen": -605.814453125, |
|
"logps/rejected": -474.8689880371094, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.025691330432891846, |
|
"rewards/margins": 0.5121801495552063, |
|
"rewards/rejected": -0.4864887595176697, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.232943734710519e-07, |
|
"logits/chosen": -5.086139678955078, |
|
"logits/rejected": -4.888861656188965, |
|
"logps/chosen": -586.828857421875, |
|
"logps/rejected": -441.39117431640625, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05895073339343071, |
|
"rewards/margins": 0.6442952156066895, |
|
"rewards/rejected": -0.5853445529937744, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2193530850774666e-07, |
|
"logits/chosen": -5.551673412322998, |
|
"logits/rejected": -4.797183036804199, |
|
"logps/chosen": -568.6909790039062, |
|
"logps/rejected": -442.3128967285156, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.08748052269220352, |
|
"rewards/margins": 0.5034887194633484, |
|
"rewards/rejected": -0.4160081744194031, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.205762435444414e-07, |
|
"logits/chosen": -4.75910758972168, |
|
"logits/rejected": -4.565986156463623, |
|
"logps/chosen": -667.8187866210938, |
|
"logps/rejected": -495.55218505859375, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.13216033577919006, |
|
"rewards/margins": 0.6989060640335083, |
|
"rewards/rejected": -0.5667458176612854, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1921717858113616e-07, |
|
"logits/chosen": -4.912562370300293, |
|
"logits/rejected": -4.5314154624938965, |
|
"logps/chosen": -604.3954467773438, |
|
"logps/rejected": -406.6845703125, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.11295922845602036, |
|
"rewards/margins": 0.5721455812454224, |
|
"rewards/rejected": -0.459186315536499, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1785811361783094e-07, |
|
"logits/chosen": -5.262351989746094, |
|
"logits/rejected": -4.794454574584961, |
|
"logps/chosen": -634.1242065429688, |
|
"logps/rejected": -520.752685546875, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02443080209195614, |
|
"rewards/margins": 0.573106586933136, |
|
"rewards/rejected": -0.5486757159233093, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1649904865452568e-07, |
|
"logits/chosen": -5.01981782913208, |
|
"logits/rejected": -4.642092704772949, |
|
"logps/chosen": -641.73681640625, |
|
"logps/rejected": -522.1800537109375, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.008827829733490944, |
|
"rewards/margins": 0.31641584634780884, |
|
"rewards/rejected": -0.32524368166923523, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1513998369122044e-07, |
|
"logits/chosen": -5.0282673835754395, |
|
"logits/rejected": -4.697837829589844, |
|
"logps/chosen": -707.3143310546875, |
|
"logps/rejected": -492.3343200683594, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.1378926932811737, |
|
"rewards/margins": 0.8396459817886353, |
|
"rewards/rejected": -0.7017532587051392, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1378091872791517e-07, |
|
"logits/chosen": -5.313849925994873, |
|
"logits/rejected": -4.932788848876953, |
|
"logps/chosen": -605.4910888671875, |
|
"logps/rejected": -440.771728515625, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0297638438642025, |
|
"rewards/margins": 0.40913066267967224, |
|
"rewards/rejected": -0.43889445066452026, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1242185376460994e-07, |
|
"logits/chosen": -5.261561393737793, |
|
"logits/rejected": -5.213662624359131, |
|
"logps/chosen": -527.4952392578125, |
|
"logps/rejected": -468.1676330566406, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.10785374790430069, |
|
"rewards/margins": 0.5355297923088074, |
|
"rewards/rejected": -0.4276760518550873, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1106278880130467e-07, |
|
"logits/chosen": -5.175314903259277, |
|
"logits/rejected": -4.972376346588135, |
|
"logps/chosen": -579.1510009765625, |
|
"logps/rejected": -558.7515869140625, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01355088222771883, |
|
"rewards/margins": 0.4957160949707031, |
|
"rewards/rejected": -0.48216524720191956, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0970372383799946e-07, |
|
"logits/chosen": -5.064365386962891, |
|
"logits/rejected": -4.731338024139404, |
|
"logps/chosen": -484.03662109375, |
|
"logps/rejected": -456.25750732421875, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.035017192363739014, |
|
"rewards/margins": 0.5290455222129822, |
|
"rewards/rejected": -0.5640627145767212, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0834465887469422e-07, |
|
"logits/chosen": -5.324121475219727, |
|
"logits/rejected": -4.988900184631348, |
|
"logps/chosen": -694.1812133789062, |
|
"logps/rejected": -493.7262268066406, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.16435925662517548, |
|
"rewards/margins": 0.5418882966041565, |
|
"rewards/rejected": -0.3775290548801422, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0698559391138895e-07, |
|
"logits/chosen": -5.1242194175720215, |
|
"logits/rejected": -5.189187049865723, |
|
"logps/chosen": -547.3629150390625, |
|
"logps/rejected": -401.1067199707031, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.016556579619646072, |
|
"rewards/margins": 0.5332701802253723, |
|
"rewards/rejected": -0.5498267412185669, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0562652894808371e-07, |
|
"logits/chosen": -5.232357501983643, |
|
"logits/rejected": -4.684348106384277, |
|
"logps/chosen": -600.7597045898438, |
|
"logps/rejected": -423.6444396972656, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05882176756858826, |
|
"rewards/margins": 0.5666114091873169, |
|
"rewards/rejected": -0.5077896118164062, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0426746398477845e-07, |
|
"logits/chosen": -4.743066310882568, |
|
"logits/rejected": -5.0457682609558105, |
|
"logps/chosen": -621.16943359375, |
|
"logps/rejected": -562.2957763671875, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.030701154842972755, |
|
"rewards/margins": 0.49867868423461914, |
|
"rewards/rejected": -0.4679775834083557, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.029083990214732e-07, |
|
"logits/chosen": -5.360964298248291, |
|
"logits/rejected": -4.910816192626953, |
|
"logps/chosen": -541.6188354492188, |
|
"logps/rejected": -409.1690368652344, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.00623717624694109, |
|
"rewards/margins": 0.5276843309402466, |
|
"rewards/rejected": -0.5214471817016602, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0154933405816797e-07, |
|
"logits/chosen": -5.229551792144775, |
|
"logits/rejected": -5.032118797302246, |
|
"logps/chosen": -488.1885681152344, |
|
"logps/rejected": -373.1156311035156, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.14348170161247253, |
|
"rewards/margins": 0.24628177285194397, |
|
"rewards/rejected": -0.3897634446620941, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0019026909486273e-07, |
|
"logits/chosen": -4.7342987060546875, |
|
"logits/rejected": -5.318948268890381, |
|
"logps/chosen": -520.8893432617188, |
|
"logps/rejected": -414.48193359375, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06965414434671402, |
|
"rewards/margins": 0.44729694724082947, |
|
"rewards/rejected": -0.5169510245323181, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.988312041315575e-07, |
|
"logits/chosen": -5.036027431488037, |
|
"logits/rejected": -5.126777172088623, |
|
"logps/chosen": -580.8709716796875, |
|
"logps/rejected": -451.442138671875, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.22733895480632782, |
|
"rewards/margins": 0.6606284379959106, |
|
"rewards/rejected": -0.43328937888145447, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9747213916825223e-07, |
|
"logits/chosen": -4.949835300445557, |
|
"logits/rejected": -4.850296974182129, |
|
"logps/chosen": -540.108154296875, |
|
"logps/rejected": -456.7802734375, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01635250262916088, |
|
"rewards/margins": 0.41821298003196716, |
|
"rewards/rejected": -0.4018605351448059, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.96113074204947e-07, |
|
"logits/chosen": -5.230714797973633, |
|
"logits/rejected": -4.870827674865723, |
|
"logps/chosen": -629.0115966796875, |
|
"logps/rejected": -472.3067321777344, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3579154908657074, |
|
"rewards/margins": 0.7859879732131958, |
|
"rewards/rejected": -0.42807239294052124, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9475400924164172e-07, |
|
"logits/chosen": -5.072863578796387, |
|
"logits/rejected": -5.132185935974121, |
|
"logps/chosen": -481.25732421875, |
|
"logps/rejected": -406.0838317871094, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09596830606460571, |
|
"rewards/margins": 0.4968641698360443, |
|
"rewards/rejected": -0.400895893573761, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.933949442783365e-07, |
|
"logits/chosen": -5.207085609436035, |
|
"logits/rejected": -5.209758281707764, |
|
"logps/chosen": -663.7227172851562, |
|
"logps/rejected": -574.890869140625, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.01712667942047119, |
|
"rewards/margins": 0.5995233654975891, |
|
"rewards/rejected": -0.5823966264724731, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9203587931503127e-07, |
|
"logits/chosen": -5.274188995361328, |
|
"logits/rejected": -4.79066276550293, |
|
"logps/chosen": -641.5878295898438, |
|
"logps/rejected": -467.88970947265625, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.10666131973266602, |
|
"rewards/margins": 0.626215934753418, |
|
"rewards/rejected": -0.5195545554161072, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.90676814351726e-07, |
|
"logits/chosen": -4.854439735412598, |
|
"logits/rejected": -4.647269248962402, |
|
"logps/chosen": -645.0352783203125, |
|
"logps/rejected": -485.135009765625, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.11006246507167816, |
|
"rewards/margins": 0.5373518466949463, |
|
"rewards/rejected": -0.42728933691978455, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8931774938842077e-07, |
|
"logits/chosen": -5.332976341247559, |
|
"logits/rejected": -5.005293369293213, |
|
"logps/chosen": -735.2457275390625, |
|
"logps/rejected": -513.5392456054688, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.019729632884263992, |
|
"rewards/margins": 0.49942612648010254, |
|
"rewards/rejected": -0.47969645261764526, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.879586844251155e-07, |
|
"logits/chosen": -5.085322856903076, |
|
"logits/rejected": -4.836029052734375, |
|
"logps/chosen": -508.7854919433594, |
|
"logps/rejected": -408.246337890625, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07745769619941711, |
|
"rewards/margins": 0.4990989565849304, |
|
"rewards/rejected": -0.5765566229820251, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8659961946181026e-07, |
|
"logits/chosen": -5.203367233276367, |
|
"logits/rejected": -4.747181415557861, |
|
"logps/chosen": -633.173583984375, |
|
"logps/rejected": -483.2410583496094, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.09929683059453964, |
|
"rewards/margins": 0.6734989881515503, |
|
"rewards/rejected": -0.5742021799087524, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.85240554498505e-07, |
|
"logits/chosen": -5.12204647064209, |
|
"logits/rejected": -5.049520015716553, |
|
"logps/chosen": -613.7943115234375, |
|
"logps/rejected": -492.426513671875, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.09632813185453415, |
|
"rewards/margins": 0.46805983781814575, |
|
"rewards/rejected": -0.371731698513031, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8388148953519978e-07, |
|
"logits/chosen": -5.151005744934082, |
|
"logits/rejected": -4.872684478759766, |
|
"logps/chosen": -588.597900390625, |
|
"logps/rejected": -445.72552490234375, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.16517074406147003, |
|
"rewards/margins": 0.5802798867225647, |
|
"rewards/rejected": -0.4151090979576111, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8252242457189454e-07, |
|
"logits/chosen": -5.086743354797363, |
|
"logits/rejected": -5.152198791503906, |
|
"logps/chosen": -546.5130615234375, |
|
"logps/rejected": -409.61962890625, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08358633518218994, |
|
"rewards/margins": 0.44257277250289917, |
|
"rewards/rejected": -0.5261590480804443, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8116335960858928e-07, |
|
"logits/chosen": -5.062109470367432, |
|
"logits/rejected": -4.863866329193115, |
|
"logps/chosen": -546.6769409179688, |
|
"logps/rejected": -462.1380310058594, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.009900592267513275, |
|
"rewards/margins": 0.36029139161109924, |
|
"rewards/rejected": -0.35039082169532776, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7980429464528404e-07, |
|
"logits/chosen": -5.212429046630859, |
|
"logits/rejected": -5.2788190841674805, |
|
"logps/chosen": -579.0036010742188, |
|
"logps/rejected": -476.0892028808594, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.043891895562410355, |
|
"rewards/margins": 0.44694948196411133, |
|
"rewards/rejected": -0.40305763483047485, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7844522968197877e-07, |
|
"logits/chosen": -5.256224632263184, |
|
"logits/rejected": -4.5983476638793945, |
|
"logps/chosen": -610.208251953125, |
|
"logps/rejected": -496.9752502441406, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029510384425520897, |
|
"rewards/margins": 0.4173160195350647, |
|
"rewards/rejected": -0.44682639837265015, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7708616471867354e-07, |
|
"logits/chosen": -5.25832462310791, |
|
"logits/rejected": -4.58776330947876, |
|
"logps/chosen": -606.7193603515625, |
|
"logps/rejected": -458.6946716308594, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.15029188990592957, |
|
"rewards/margins": 0.5948890447616577, |
|
"rewards/rejected": -0.4445970952510834, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.757270997553683e-07, |
|
"logits/chosen": -5.031914710998535, |
|
"logits/rejected": -4.41817569732666, |
|
"logps/chosen": -631.19873046875, |
|
"logps/rejected": -482.4442443847656, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.216563418507576, |
|
"rewards/margins": 0.6215362548828125, |
|
"rewards/rejected": -0.40497273206710815, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7436803479206306e-07, |
|
"logits/chosen": -5.128687381744385, |
|
"logits/rejected": -5.052613735198975, |
|
"logps/chosen": -527.7045288085938, |
|
"logps/rejected": -391.2369384765625, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0025056705344468355, |
|
"rewards/margins": 0.5808178782463074, |
|
"rewards/rejected": -0.5833235383033752, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7300896982875782e-07, |
|
"logits/chosen": -4.958249092102051, |
|
"logits/rejected": -4.750493049621582, |
|
"logps/chosen": -653.8189697265625, |
|
"logps/rejected": -429.29864501953125, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.16785474121570587, |
|
"rewards/margins": 0.6959394216537476, |
|
"rewards/rejected": -0.5280846357345581, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7164990486545255e-07, |
|
"logits/chosen": -5.113970756530762, |
|
"logits/rejected": -5.008670806884766, |
|
"logps/chosen": -636.5609741210938, |
|
"logps/rejected": -528.3571166992188, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.10236026346683502, |
|
"rewards/margins": 0.6292668581008911, |
|
"rewards/rejected": -0.5269066095352173, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7029083990214731e-07, |
|
"logits/chosen": -5.374190807342529, |
|
"logits/rejected": -5.041703224182129, |
|
"logps/chosen": -589.2327270507812, |
|
"logps/rejected": -419.8196716308594, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.04434085637331009, |
|
"rewards/margins": 0.5901876091957092, |
|
"rewards/rejected": -0.545846700668335, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6893177493884205e-07, |
|
"logits/chosen": -4.970804691314697, |
|
"logits/rejected": -5.064300060272217, |
|
"logps/chosen": -590.119140625, |
|
"logps/rejected": -529.2374267578125, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.036118488758802414, |
|
"rewards/margins": 0.34446951746940613, |
|
"rewards/rejected": -0.3083510398864746, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6757270997553684e-07, |
|
"logits/chosen": -5.2425689697265625, |
|
"logits/rejected": -4.580435276031494, |
|
"logps/chosen": -681.4793090820312, |
|
"logps/rejected": -452.87677001953125, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.19392366707324982, |
|
"rewards/margins": 0.6403516530990601, |
|
"rewards/rejected": -0.44642800092697144, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.662136450122316e-07, |
|
"logits/chosen": -5.0710859298706055, |
|
"logits/rejected": -4.760018348693848, |
|
"logps/chosen": -674.2054443359375, |
|
"logps/rejected": -565.3984985351562, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06712154299020767, |
|
"rewards/margins": 0.37311816215515137, |
|
"rewards/rejected": -0.3059965968132019, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6485458004892633e-07, |
|
"logits/chosen": -5.095987796783447, |
|
"logits/rejected": -5.052186489105225, |
|
"logps/chosen": -544.0107421875, |
|
"logps/rejected": -435.6206970214844, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.01182224415242672, |
|
"rewards/margins": 0.4927147924900055, |
|
"rewards/rejected": -0.4808925986289978, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.634955150856211e-07, |
|
"logits/chosen": -5.013184547424316, |
|
"logits/rejected": -4.417869567871094, |
|
"logps/chosen": -619.71240234375, |
|
"logps/rejected": -465.03167724609375, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.10584282875061035, |
|
"rewards/margins": 0.553497850894928, |
|
"rewards/rejected": -0.44765496253967285, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6213645012231583e-07, |
|
"logits/chosen": -5.066674709320068, |
|
"logits/rejected": -4.569228172302246, |
|
"logps/chosen": -624.6241455078125, |
|
"logps/rejected": -439.173583984375, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.07207348197698593, |
|
"rewards/margins": 0.6219204664230347, |
|
"rewards/rejected": -0.5498470067977905, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.607773851590106e-07, |
|
"logits/chosen": -4.710171222686768, |
|
"logits/rejected": -4.891881465911865, |
|
"logps/chosen": -572.812744140625, |
|
"logps/rejected": -463.7294006347656, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.02544694021344185, |
|
"rewards/margins": 0.48878225684165955, |
|
"rewards/rejected": -0.4633353352546692, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5941832019570535e-07, |
|
"logits/chosen": -5.484849452972412, |
|
"logits/rejected": -4.930639266967773, |
|
"logps/chosen": -625.0872802734375, |
|
"logps/rejected": -407.0397033691406, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.016370752826333046, |
|
"rewards/margins": 0.48144835233688354, |
|
"rewards/rejected": -0.46507757902145386, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.580592552324001e-07, |
|
"logits/chosen": -4.830108642578125, |
|
"logits/rejected": -4.818240642547607, |
|
"logps/chosen": -765.7501831054688, |
|
"logps/rejected": -527.1919555664062, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.25509804487228394, |
|
"rewards/margins": 0.5200859308242798, |
|
"rewards/rejected": -0.2649877965450287, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5670019026909487e-07, |
|
"logits/chosen": -5.286885738372803, |
|
"logits/rejected": -5.104211807250977, |
|
"logps/chosen": -604.0806884765625, |
|
"logps/rejected": -539.4612426757812, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.06256647408008575, |
|
"rewards/margins": 0.41809743642807007, |
|
"rewards/rejected": -0.3555310070514679, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.553411253057896e-07, |
|
"logits/chosen": -5.280417442321777, |
|
"logits/rejected": -5.031546592712402, |
|
"logps/chosen": -564.3517456054688, |
|
"logps/rejected": -442.993408203125, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004562814719974995, |
|
"rewards/margins": 0.47682324051856995, |
|
"rewards/rejected": -0.4722604751586914, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5398206034248437e-07, |
|
"logits/chosen": -5.209362983703613, |
|
"logits/rejected": -4.777202129364014, |
|
"logps/chosen": -545.34228515625, |
|
"logps/rejected": -498.712890625, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.10138173401355743, |
|
"rewards/margins": 0.5011622309684753, |
|
"rewards/rejected": -0.3997804522514343, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.526229953791791e-07, |
|
"logits/chosen": -5.201174736022949, |
|
"logits/rejected": -4.462300777435303, |
|
"logps/chosen": -507.9359436035156, |
|
"logps/rejected": -392.84295654296875, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07436896860599518, |
|
"rewards/margins": 0.6053969264030457, |
|
"rewards/rejected": -0.5310279726982117, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5126393041587386e-07, |
|
"logits/chosen": -5.354833602905273, |
|
"logits/rejected": -5.260016441345215, |
|
"logps/chosen": -614.5958862304688, |
|
"logps/rejected": -445.5887145996094, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.12793684005737305, |
|
"rewards/margins": 0.6688116192817688, |
|
"rewards/rejected": -0.540874719619751, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4990486545256862e-07, |
|
"logits/chosen": -5.430781364440918, |
|
"logits/rejected": -4.701030254364014, |
|
"logps/chosen": -675.9953002929688, |
|
"logps/rejected": -461.70526123046875, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.17530107498168945, |
|
"rewards/margins": 0.6282079815864563, |
|
"rewards/rejected": -0.45290690660476685, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4854580048926338e-07, |
|
"logits/chosen": -5.285678386688232, |
|
"logits/rejected": -5.013982772827148, |
|
"logps/chosen": -570.0582275390625, |
|
"logps/rejected": -415.4815979003906, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.014036163687705994, |
|
"rewards/margins": 0.5525720119476318, |
|
"rewards/rejected": -0.5385358929634094, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4718673552595815e-07, |
|
"logits/chosen": -5.222121238708496, |
|
"logits/rejected": -4.650824546813965, |
|
"logps/chosen": -658.1751708984375, |
|
"logps/rejected": -433.31317138671875, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11640272289514542, |
|
"rewards/margins": 0.46268996596336365, |
|
"rewards/rejected": -0.34628722071647644, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4582767056265288e-07, |
|
"logits/chosen": -5.346107006072998, |
|
"logits/rejected": -4.362074375152588, |
|
"logps/chosen": -606.3163452148438, |
|
"logps/rejected": -428.46954345703125, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07154564559459686, |
|
"rewards/margins": 0.4669966697692871, |
|
"rewards/rejected": -0.5385423302650452, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4446860559934764e-07, |
|
"logits/chosen": -5.480148792266846, |
|
"logits/rejected": -5.2202863693237305, |
|
"logps/chosen": -478.8058166503906, |
|
"logps/rejected": -412.5455627441406, |
|
"loss": 0.5717, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.02568097785115242, |
|
"rewards/margins": 0.44622331857681274, |
|
"rewards/rejected": -0.42054232954978943, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4310954063604238e-07, |
|
"logits/chosen": -5.337462902069092, |
|
"logits/rejected": -4.77118444442749, |
|
"logps/chosen": -631.7322998046875, |
|
"logps/rejected": -428.6024475097656, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08751243352890015, |
|
"rewards/margins": 0.6282658576965332, |
|
"rewards/rejected": -0.5407534241676331, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4175047567273716e-07, |
|
"logits/chosen": -4.882279396057129, |
|
"logits/rejected": -4.7415571212768555, |
|
"logps/chosen": -554.1456298828125, |
|
"logps/rejected": -440.29583740234375, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.027923833578824997, |
|
"rewards/margins": 0.5361341238021851, |
|
"rewards/rejected": -0.5640579462051392, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4039141070943192e-07, |
|
"logits/chosen": -4.445671558380127, |
|
"logits/rejected": -4.818338394165039, |
|
"logps/chosen": -609.9766845703125, |
|
"logps/rejected": -476.7052307128906, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0833519846200943, |
|
"rewards/margins": 0.5305315852165222, |
|
"rewards/rejected": -0.4471796154975891, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3903234574612666e-07, |
|
"logits/chosen": -4.992600917816162, |
|
"logits/rejected": -4.623105525970459, |
|
"logps/chosen": -629.71875, |
|
"logps/rejected": -438.7481994628906, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.04544510692358017, |
|
"rewards/margins": 0.5514675378799438, |
|
"rewards/rejected": -0.5060223937034607, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3767328078282142e-07, |
|
"logits/chosen": -5.5516252517700195, |
|
"logits/rejected": -5.1864333152771, |
|
"logps/chosen": -638.9982299804688, |
|
"logps/rejected": -552.6722412109375, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.11496025323867798, |
|
"rewards/margins": 0.4150461256504059, |
|
"rewards/rejected": -0.3000858426094055, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3631421581951615e-07, |
|
"logits/chosen": -5.008028507232666, |
|
"logits/rejected": -4.577816009521484, |
|
"logps/chosen": -548.2963256835938, |
|
"logps/rejected": -500.7344665527344, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.026208871975541115, |
|
"rewards/margins": 0.3533262610435486, |
|
"rewards/rejected": -0.37953513860702515, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3495515085621091e-07, |
|
"logits/chosen": -5.472646713256836, |
|
"logits/rejected": -4.973770618438721, |
|
"logps/chosen": -724.769287109375, |
|
"logps/rejected": -563.2872924804688, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.1146833673119545, |
|
"rewards/margins": 0.4099810719490051, |
|
"rewards/rejected": -0.29529768228530884, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3359608589290568e-07, |
|
"logits/chosen": -4.954279899597168, |
|
"logits/rejected": -5.228877067565918, |
|
"logps/chosen": -581.0999145507812, |
|
"logps/rejected": -530.7953491210938, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.11613529920578003, |
|
"rewards/margins": 0.453176885843277, |
|
"rewards/rejected": -0.33704158663749695, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3223702092960044e-07, |
|
"logits/chosen": -5.242658615112305, |
|
"logits/rejected": -4.768167972564697, |
|
"logps/chosen": -533.3716430664062, |
|
"logps/rejected": -390.54547119140625, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.043281190097332, |
|
"rewards/margins": 0.43897300958633423, |
|
"rewards/rejected": -0.48225417733192444, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.308779559662952e-07, |
|
"logits/chosen": -5.201248645782471, |
|
"logits/rejected": -4.676947593688965, |
|
"logps/chosen": -643.8597412109375, |
|
"logps/rejected": -583.13232421875, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.14752855896949768, |
|
"rewards/margins": 0.2833397686481476, |
|
"rewards/rejected": -0.13581117987632751, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2951889100298993e-07, |
|
"logits/chosen": -5.122186183929443, |
|
"logits/rejected": -5.041023254394531, |
|
"logps/chosen": -577.3592529296875, |
|
"logps/rejected": -440.177734375, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05891375616192818, |
|
"rewards/margins": 0.4279320240020752, |
|
"rewards/rejected": -0.36901822686195374, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.281598260396847e-07, |
|
"logits/chosen": -5.211073875427246, |
|
"logits/rejected": -4.9518232345581055, |
|
"logps/chosen": -614.7965698242188, |
|
"logps/rejected": -439.0995178222656, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.13381192088127136, |
|
"rewards/margins": 0.6412351131439209, |
|
"rewards/rejected": -0.5074232220649719, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2680076107637943e-07, |
|
"logits/chosen": -5.193760871887207, |
|
"logits/rejected": -4.404151439666748, |
|
"logps/chosen": -622.8799438476562, |
|
"logps/rejected": -431.04461669921875, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1521753966808319, |
|
"rewards/margins": 0.6701608896255493, |
|
"rewards/rejected": -0.5179855227470398, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2544169611307421e-07, |
|
"logits/chosen": -5.015482425689697, |
|
"logits/rejected": -4.603137016296387, |
|
"logps/chosen": -594.6808471679688, |
|
"logps/rejected": -462.0057678222656, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.07626765221357346, |
|
"rewards/margins": 0.6575822830200195, |
|
"rewards/rejected": -0.5813146829605103, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2408263114976895e-07, |
|
"logits/chosen": -5.004543781280518, |
|
"logits/rejected": -4.734368801116943, |
|
"logps/chosen": -567.2260131835938, |
|
"logps/rejected": -404.7546081542969, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.07884837687015533, |
|
"rewards/margins": 0.6020263433456421, |
|
"rewards/rejected": -0.523177981376648, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.227235661864637e-07, |
|
"logits/chosen": -5.414193153381348, |
|
"logits/rejected": -5.046207904815674, |
|
"logps/chosen": -502.909912109375, |
|
"logps/rejected": -361.7054138183594, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06829454004764557, |
|
"rewards/margins": 0.5591669082641602, |
|
"rewards/rejected": -0.6274614930152893, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2136450122315844e-07, |
|
"logits/chosen": -5.198843955993652, |
|
"logits/rejected": -4.930603981018066, |
|
"logps/chosen": -520.4276733398438, |
|
"logps/rejected": -431.8805236816406, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.028726745396852493, |
|
"rewards/margins": 0.35937756299972534, |
|
"rewards/rejected": -0.33065086603164673, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.200054362598532e-07, |
|
"logits/chosen": -5.31064510345459, |
|
"logits/rejected": -4.599266529083252, |
|
"logps/chosen": -619.8414306640625, |
|
"logps/rejected": -396.1475524902344, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.18492308259010315, |
|
"rewards/margins": 0.7202444672584534, |
|
"rewards/rejected": -0.5353213548660278, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1864637129654798e-07, |
|
"logits/chosen": -5.111018180847168, |
|
"logits/rejected": -4.847773551940918, |
|
"logps/chosen": -574.0250854492188, |
|
"logps/rejected": -485.5152282714844, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.050168585032224655, |
|
"rewards/margins": 0.470418781042099, |
|
"rewards/rejected": -0.5205873847007751, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -5.234292984008789, |
|
"eval_logits/rejected": -4.938849449157715, |
|
"eval_logps/chosen": -587.7192993164062, |
|
"eval_logps/rejected": -447.32525634765625, |
|
"eval_loss": 0.5676125288009644, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": 0.05502856895327568, |
|
"eval_rewards/margins": 0.533500075340271, |
|
"eval_rewards/rejected": -0.4784714877605438, |
|
"eval_runtime": 107.5703, |
|
"eval_samples_per_second": 18.592, |
|
"eval_steps_per_second": 1.162, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1728730633324273e-07, |
|
"logits/chosen": -5.1651787757873535, |
|
"logits/rejected": -5.090694427490234, |
|
"logps/chosen": -506.1864318847656, |
|
"logps/rejected": -488.7061462402344, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.18548956513404846, |
|
"rewards/margins": 0.39953848719596863, |
|
"rewards/rejected": -0.5850280523300171, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1592824136993748e-07, |
|
"logits/chosen": -5.586142539978027, |
|
"logits/rejected": -5.10451602935791, |
|
"logps/chosen": -577.9089965820312, |
|
"logps/rejected": -501.72509765625, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.15978381037712097, |
|
"rewards/margins": 0.689250111579895, |
|
"rewards/rejected": -0.5294662714004517, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1456917640663224e-07, |
|
"logits/chosen": -5.37423849105835, |
|
"logits/rejected": -5.371174335479736, |
|
"logps/chosen": -580.6159057617188, |
|
"logps/rejected": -504.47174072265625, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08985177427530289, |
|
"rewards/margins": 0.42342209815979004, |
|
"rewards/rejected": -0.5132738351821899, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1321011144332698e-07, |
|
"logits/chosen": -4.83787202835083, |
|
"logits/rejected": -4.70668363571167, |
|
"logps/chosen": -620.3161010742188, |
|
"logps/rejected": -518.7396850585938, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.06751275807619095, |
|
"rewards/margins": 0.5135071277618408, |
|
"rewards/rejected": -0.44599437713623047, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1185104648002173e-07, |
|
"logits/chosen": -5.008633136749268, |
|
"logits/rejected": -5.046548843383789, |
|
"logps/chosen": -627.8788452148438, |
|
"logps/rejected": -494.4703063964844, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.057392753660678864, |
|
"rewards/margins": 0.4596997797489166, |
|
"rewards/rejected": -0.40230703353881836, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.104919815167165e-07, |
|
"logits/chosen": -4.719240665435791, |
|
"logits/rejected": -4.67408561706543, |
|
"logps/chosen": -576.9584350585938, |
|
"logps/rejected": -475.76263427734375, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07536741346120834, |
|
"rewards/margins": 0.6072790622711182, |
|
"rewards/rejected": -0.5319116115570068, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0913291655341125e-07, |
|
"logits/chosen": -5.283504962921143, |
|
"logits/rejected": -5.348637580871582, |
|
"logps/chosen": -595.0512084960938, |
|
"logps/rejected": -455.53411865234375, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.09285713732242584, |
|
"rewards/margins": 0.38092684745788574, |
|
"rewards/rejected": -0.2880697250366211, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.07773851590106e-07, |
|
"logits/chosen": -5.273705005645752, |
|
"logits/rejected": -5.187539577484131, |
|
"logps/chosen": -588.6239013671875, |
|
"logps/rejected": -506.1387634277344, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.06552986800670624, |
|
"rewards/margins": 0.5921922326087952, |
|
"rewards/rejected": -0.5266624093055725, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0641478662680076e-07, |
|
"logits/chosen": -5.3834028244018555, |
|
"logits/rejected": -4.774810791015625, |
|
"logps/chosen": -650.7406616210938, |
|
"logps/rejected": -485.8941345214844, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07584445178508759, |
|
"rewards/margins": 0.4963054656982422, |
|
"rewards/rejected": -0.4204609990119934, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0505572166349551e-07, |
|
"logits/chosen": -5.180682182312012, |
|
"logits/rejected": -4.717430591583252, |
|
"logps/chosen": -678.7384643554688, |
|
"logps/rejected": -479.06378173828125, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.20926418900489807, |
|
"rewards/margins": 0.6056047081947327, |
|
"rewards/rejected": -0.39634042978286743, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0369665670019026e-07, |
|
"logits/chosen": -5.130129814147949, |
|
"logits/rejected": -4.669539928436279, |
|
"logps/chosen": -597.5671997070312, |
|
"logps/rejected": -453.7167053222656, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.09353788197040558, |
|
"rewards/margins": 0.5858071446418762, |
|
"rewards/rejected": -0.49226921796798706, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.02337591736885e-07, |
|
"logits/chosen": -5.12376594543457, |
|
"logits/rejected": -5.563569068908691, |
|
"logps/chosen": -499.6468811035156, |
|
"logps/rejected": -436.5850524902344, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.010913821868598461, |
|
"rewards/margins": 0.49812451004981995, |
|
"rewards/rejected": -0.4872106909751892, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0097852677357978e-07, |
|
"logits/chosen": -5.269471168518066, |
|
"logits/rejected": -5.030025005340576, |
|
"logps/chosen": -533.9810180664062, |
|
"logps/rejected": -399.9656066894531, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11613695323467255, |
|
"rewards/margins": 0.5177056193351746, |
|
"rewards/rejected": -0.4015687108039856, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.961946181027453e-08, |
|
"logits/chosen": -5.219923973083496, |
|
"logits/rejected": -5.189248085021973, |
|
"logps/chosen": -583.900634765625, |
|
"logps/rejected": -441.4908142089844, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.12448207288980484, |
|
"rewards/margins": 0.7087076306343079, |
|
"rewards/rejected": -0.5842255353927612, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.826039684696928e-08, |
|
"logits/chosen": -5.200379371643066, |
|
"logits/rejected": -4.829669952392578, |
|
"logps/chosen": -659.9589233398438, |
|
"logps/rejected": -435.9754943847656, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.06884004175662994, |
|
"rewards/margins": 0.5907555818557739, |
|
"rewards/rejected": -0.5219155550003052, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.690133188366404e-08, |
|
"logits/chosen": -5.091736793518066, |
|
"logits/rejected": -5.361365795135498, |
|
"logps/chosen": -556.8284912109375, |
|
"logps/rejected": -478.77838134765625, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.059255052357912064, |
|
"rewards/margins": 0.5096714496612549, |
|
"rewards/rejected": -0.4504164159297943, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.554226692035878e-08, |
|
"logits/chosen": -5.221390247344971, |
|
"logits/rejected": -4.96212100982666, |
|
"logps/chosen": -653.00341796875, |
|
"logps/rejected": -540.3280639648438, |
|
"loss": 0.5672, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1990213841199875, |
|
"rewards/margins": 0.5155045390129089, |
|
"rewards/rejected": -0.31648311018943787, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.418320195705353e-08, |
|
"logits/chosen": -5.2392683029174805, |
|
"logits/rejected": -4.992609024047852, |
|
"logps/chosen": -588.1959228515625, |
|
"logps/rejected": -433.735595703125, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.21357548236846924, |
|
"rewards/margins": 0.7488683462142944, |
|
"rewards/rejected": -0.5352928042411804, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.28241369937483e-08, |
|
"logits/chosen": -5.30230188369751, |
|
"logits/rejected": -5.239448547363281, |
|
"logps/chosen": -568.1078491210938, |
|
"logps/rejected": -474.8052673339844, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11839760839939117, |
|
"rewards/margins": 0.46041935682296753, |
|
"rewards/rejected": -0.34202176332473755, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.146507203044305e-08, |
|
"logits/chosen": -5.218874931335449, |
|
"logits/rejected": -4.873734474182129, |
|
"logps/chosen": -609.4281005859375, |
|
"logps/rejected": -457.5322265625, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1006079763174057, |
|
"rewards/margins": 0.5693811178207397, |
|
"rewards/rejected": -0.4687730669975281, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.01060070671378e-08, |
|
"logits/chosen": -5.503744125366211, |
|
"logits/rejected": -5.236392498016357, |
|
"logps/chosen": -586.1392822265625, |
|
"logps/rejected": -444.5675354003906, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.03827662393450737, |
|
"rewards/margins": 0.5034765005111694, |
|
"rewards/rejected": -0.46519985795021057, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.874694210383256e-08, |
|
"logits/chosen": -5.066947937011719, |
|
"logits/rejected": -4.900802135467529, |
|
"logps/chosen": -595.0233154296875, |
|
"logps/rejected": -496.80877685546875, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.029369166120886803, |
|
"rewards/margins": 0.544265866279602, |
|
"rewards/rejected": -0.5148966908454895, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.738787714052731e-08, |
|
"logits/chosen": -5.082206726074219, |
|
"logits/rejected": -5.058773040771484, |
|
"logps/chosen": -645.4029541015625, |
|
"logps/rejected": -446.66766357421875, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.19253723323345184, |
|
"rewards/margins": 0.5689257979393005, |
|
"rewards/rejected": -0.3763886094093323, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.602881217722206e-08, |
|
"logits/chosen": -5.70505428314209, |
|
"logits/rejected": -4.710862636566162, |
|
"logps/chosen": -569.6564331054688, |
|
"logps/rejected": -432.09222412109375, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.011319099925458431, |
|
"rewards/margins": 0.6908237338066101, |
|
"rewards/rejected": -0.6795046329498291, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.466974721391682e-08, |
|
"logits/chosen": -5.539618015289307, |
|
"logits/rejected": -5.331404209136963, |
|
"logps/chosen": -576.7149658203125, |
|
"logps/rejected": -434.1064453125, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.059493519365787506, |
|
"rewards/margins": 0.5593506693840027, |
|
"rewards/rejected": -0.4998571276664734, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.331068225061158e-08, |
|
"logits/chosen": -5.128886699676514, |
|
"logits/rejected": -5.121006011962891, |
|
"logps/chosen": -577.34130859375, |
|
"logps/rejected": -516.7703857421875, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.04062025994062424, |
|
"rewards/margins": 0.4897204339504242, |
|
"rewards/rejected": -0.44910019636154175, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.195161728730633e-08, |
|
"logits/chosen": -5.2884345054626465, |
|
"logits/rejected": -4.900949478149414, |
|
"logps/chosen": -692.1761474609375, |
|
"logps/rejected": -483.1785583496094, |
|
"loss": 0.5496, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.1615811139345169, |
|
"rewards/margins": 0.6961567401885986, |
|
"rewards/rejected": -0.5345755815505981, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.059255232400109e-08, |
|
"logits/chosen": -5.02389669418335, |
|
"logits/rejected": -4.964724540710449, |
|
"logps/chosen": -695.4935302734375, |
|
"logps/rejected": -528.0411376953125, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.15914632380008698, |
|
"rewards/margins": 0.5235811471939087, |
|
"rewards/rejected": -0.36443477869033813, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.923348736069584e-08, |
|
"logits/chosen": -5.2003889083862305, |
|
"logits/rejected": -5.228209018707275, |
|
"logps/chosen": -537.4274291992188, |
|
"logps/rejected": -456.5669860839844, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11098973453044891, |
|
"rewards/margins": 0.608254611492157, |
|
"rewards/rejected": -0.49726492166519165, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.787442239739058e-08, |
|
"logits/chosen": -5.259240627288818, |
|
"logits/rejected": -4.512152671813965, |
|
"logps/chosen": -690.8038330078125, |
|
"logps/rejected": -444.99761962890625, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.23449468612670898, |
|
"rewards/margins": 0.8231126070022583, |
|
"rewards/rejected": -0.5886179208755493, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.651535743408535e-08, |
|
"logits/chosen": -5.13831090927124, |
|
"logits/rejected": -4.875172138214111, |
|
"logps/chosen": -591.1875610351562, |
|
"logps/rejected": -428.94610595703125, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1050385981798172, |
|
"rewards/margins": 0.5936521291732788, |
|
"rewards/rejected": -0.4886136054992676, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.515629247078011e-08, |
|
"logits/chosen": -5.013466835021973, |
|
"logits/rejected": -5.039238929748535, |
|
"logps/chosen": -693.2198486328125, |
|
"logps/rejected": -527.318359375, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.19377581775188446, |
|
"rewards/margins": 0.7708918452262878, |
|
"rewards/rejected": -0.5771160125732422, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.379722750747485e-08, |
|
"logits/chosen": -4.996349811553955, |
|
"logits/rejected": -4.909236431121826, |
|
"logps/chosen": -618.8716430664062, |
|
"logps/rejected": -456.98492431640625, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.08843658864498138, |
|
"rewards/margins": 0.5962534546852112, |
|
"rewards/rejected": -0.5078169107437134, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.243816254416962e-08, |
|
"logits/chosen": -4.896212100982666, |
|
"logits/rejected": -4.641386985778809, |
|
"logps/chosen": -608.5612182617188, |
|
"logps/rejected": -474.12158203125, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.12576648592948914, |
|
"rewards/margins": 0.5437217950820923, |
|
"rewards/rejected": -0.41795530915260315, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.107909758086436e-08, |
|
"logits/chosen": -5.272801399230957, |
|
"logits/rejected": -4.8430891036987305, |
|
"logps/chosen": -544.3641357421875, |
|
"logps/rejected": -426.4873046875, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.015302670188248158, |
|
"rewards/margins": 0.5599262714385986, |
|
"rewards/rejected": -0.5446235537528992, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.972003261755911e-08, |
|
"logits/chosen": -4.518991947174072, |
|
"logits/rejected": -4.936086177825928, |
|
"logps/chosen": -601.0823364257812, |
|
"logps/rejected": -546.61376953125, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0004491690488066524, |
|
"rewards/margins": 0.4196210503578186, |
|
"rewards/rejected": -0.42007017135620117, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.836096765425386e-08, |
|
"logits/chosen": -5.375284194946289, |
|
"logits/rejected": -5.087422847747803, |
|
"logps/chosen": -652.1438598632812, |
|
"logps/rejected": -542.370849609375, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.1580512672662735, |
|
"rewards/margins": 0.5438879132270813, |
|
"rewards/rejected": -0.3858366310596466, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.700190269094863e-08, |
|
"logits/chosen": -5.468593120574951, |
|
"logits/rejected": -4.970207214355469, |
|
"logps/chosen": -604.3890380859375, |
|
"logps/rejected": -451.9310607910156, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.19964425265789032, |
|
"rewards/margins": 0.6419768929481506, |
|
"rewards/rejected": -0.44233259558677673, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.564283772764338e-08, |
|
"logits/chosen": -5.108706474304199, |
|
"logits/rejected": -4.714381217956543, |
|
"logps/chosen": -603.6357421875, |
|
"logps/rejected": -479.19287109375, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11323221772909164, |
|
"rewards/margins": 0.3441081643104553, |
|
"rewards/rejected": -0.4573403298854828, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.428377276433813e-08, |
|
"logits/chosen": -5.2289934158325195, |
|
"logits/rejected": -4.821419715881348, |
|
"logps/chosen": -629.8280029296875, |
|
"logps/rejected": -439.3097229003906, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.13448044657707214, |
|
"rewards/margins": 0.5138882994651794, |
|
"rewards/rejected": -0.3794078826904297, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.292470780103289e-08, |
|
"logits/chosen": -5.200686931610107, |
|
"logits/rejected": -4.433293342590332, |
|
"logps/chosen": -628.9244995117188, |
|
"logps/rejected": -463.2845153808594, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.22049829363822937, |
|
"rewards/margins": 0.5850414633750916, |
|
"rewards/rejected": -0.36454319953918457, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.156564283772764e-08, |
|
"logits/chosen": -5.299391269683838, |
|
"logits/rejected": -4.578976631164551, |
|
"logps/chosen": -617.935302734375, |
|
"logps/rejected": -455.72149658203125, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.08323828130960464, |
|
"rewards/margins": 0.7242950201034546, |
|
"rewards/rejected": -0.6410566568374634, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.02065778744224e-08, |
|
"logits/chosen": -5.269377708435059, |
|
"logits/rejected": -5.062786102294922, |
|
"logps/chosen": -640.0068359375, |
|
"logps/rejected": -480.91180419921875, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1582534909248352, |
|
"rewards/margins": 0.6524442434310913, |
|
"rewards/rejected": -0.4941907823085785, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.8847512911117146e-08, |
|
"logits/chosen": -4.990743160247803, |
|
"logits/rejected": -5.302047252655029, |
|
"logps/chosen": -524.5138549804688, |
|
"logps/rejected": -541.4100341796875, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.05930762737989426, |
|
"rewards/margins": 0.3244546949863434, |
|
"rewards/rejected": -0.2651470899581909, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.74884479478119e-08, |
|
"logits/chosen": -5.07647705078125, |
|
"logits/rejected": -4.627741813659668, |
|
"logps/chosen": -591.9494018554688, |
|
"logps/rejected": -472.5774841308594, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.055071644484996796, |
|
"rewards/margins": 0.49125391244888306, |
|
"rewards/rejected": -0.43618226051330566, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.612938298450666e-08, |
|
"logits/chosen": -4.96254301071167, |
|
"logits/rejected": -4.850637912750244, |
|
"logps/chosen": -689.391357421875, |
|
"logps/rejected": -507.0537109375, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1781730204820633, |
|
"rewards/margins": 0.5519202947616577, |
|
"rewards/rejected": -0.3737472593784332, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.477031802120141e-08, |
|
"logits/chosen": -5.282811164855957, |
|
"logits/rejected": -4.793059825897217, |
|
"logps/chosen": -681.777099609375, |
|
"logps/rejected": -499.6626892089844, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.20708510279655457, |
|
"rewards/margins": 0.5851645469665527, |
|
"rewards/rejected": -0.378079354763031, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.341125305789616e-08, |
|
"logits/chosen": -5.204301834106445, |
|
"logits/rejected": -5.192577838897705, |
|
"logps/chosen": -614.7431030273438, |
|
"logps/rejected": -475.15447998046875, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.06923681497573853, |
|
"rewards/margins": 0.6609135866165161, |
|
"rewards/rejected": -0.5916768312454224, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.2052188094590924e-08, |
|
"logits/chosen": -5.410575866699219, |
|
"logits/rejected": -4.7271928787231445, |
|
"logps/chosen": -562.2875366210938, |
|
"logps/rejected": -470.6141662597656, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.04732062295079231, |
|
"rewards/margins": 0.45917314291000366, |
|
"rewards/rejected": -0.41185253858566284, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.069312313128567e-08, |
|
"logits/chosen": -4.909255027770996, |
|
"logits/rejected": -5.011034965515137, |
|
"logps/chosen": -578.5535888671875, |
|
"logps/rejected": -488.4493713378906, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09110667556524277, |
|
"rewards/margins": 0.5210335850715637, |
|
"rewards/rejected": -0.4299268126487732, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.9334058167980426e-08, |
|
"logits/chosen": -5.587698936462402, |
|
"logits/rejected": -5.1530914306640625, |
|
"logps/chosen": -583.8742065429688, |
|
"logps/rejected": -433.79083251953125, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.08238669484853745, |
|
"rewards/margins": 0.5551365613937378, |
|
"rewards/rejected": -0.4727499485015869, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.797499320467518e-08, |
|
"logits/chosen": -5.150055885314941, |
|
"logits/rejected": -4.939459800720215, |
|
"logps/chosen": -628.3910522460938, |
|
"logps/rejected": -505.53125, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.09437895566225052, |
|
"rewards/margins": 0.5040851831436157, |
|
"rewards/rejected": -0.409706175327301, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.6615928241369935e-08, |
|
"logits/chosen": -5.329426288604736, |
|
"logits/rejected": -5.03844690322876, |
|
"logps/chosen": -593.9847412109375, |
|
"logps/rejected": -454.750244140625, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.11871786415576935, |
|
"rewards/margins": 0.545038640499115, |
|
"rewards/rejected": -0.426320880651474, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.525686327806469e-08, |
|
"logits/chosen": -5.202639579772949, |
|
"logits/rejected": -4.666885852813721, |
|
"logps/chosen": -557.8953857421875, |
|
"logps/rejected": -449.791015625, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.08986867219209671, |
|
"rewards/margins": 0.5092368125915527, |
|
"rewards/rejected": -0.41936811804771423, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.389779831475944e-08, |
|
"logits/chosen": -5.34531307220459, |
|
"logits/rejected": -4.846031188964844, |
|
"logps/chosen": -595.12841796875, |
|
"logps/rejected": -509.6744079589844, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.08132925629615784, |
|
"rewards/margins": 0.3766716420650482, |
|
"rewards/rejected": -0.2953423857688904, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.25387333514542e-08, |
|
"logits/chosen": -5.138333320617676, |
|
"logits/rejected": -4.746251106262207, |
|
"logps/chosen": -515.9407958984375, |
|
"logps/rejected": -437.38018798828125, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0017356962198391557, |
|
"rewards/margins": 0.5237280130386353, |
|
"rewards/rejected": -0.5254637002944946, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.117966838814895e-08, |
|
"logits/chosen": -5.134562969207764, |
|
"logits/rejected": -4.96474552154541, |
|
"logps/chosen": -606.036376953125, |
|
"logps/rejected": -466.07867431640625, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.08818952739238739, |
|
"rewards/margins": 0.5076228380203247, |
|
"rewards/rejected": -0.4194332957267761, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.98206034248437e-08, |
|
"logits/chosen": -5.315756320953369, |
|
"logits/rejected": -4.949517250061035, |
|
"logps/chosen": -636.086181640625, |
|
"logps/rejected": -445.82904052734375, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.20392675697803497, |
|
"rewards/margins": 0.6199758052825928, |
|
"rewards/rejected": -0.416049063205719, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -5.101180076599121, |
|
"logits/rejected": -4.905373573303223, |
|
"logps/chosen": -536.1978759765625, |
|
"logps/rejected": -417.8277282714844, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.018841056153178215, |
|
"rewards/margins": 0.4977056086063385, |
|
"rewards/rejected": -0.5165466070175171, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7102473498233216e-08, |
|
"logits/chosen": -5.034720420837402, |
|
"logits/rejected": -4.62627649307251, |
|
"logps/chosen": -499.31463623046875, |
|
"logps/rejected": -425.21112060546875, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.08160565793514252, |
|
"rewards/margins": 0.32524237036705017, |
|
"rewards/rejected": -0.4068480134010315, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5743408534927963e-08, |
|
"logits/chosen": -5.122437000274658, |
|
"logits/rejected": -5.20498514175415, |
|
"logps/chosen": -485.17962646484375, |
|
"logps/rejected": -416.1468811035156, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.07851817458868027, |
|
"rewards/margins": 0.5143343210220337, |
|
"rewards/rejected": -0.4358161389827728, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4384343571622724e-08, |
|
"logits/chosen": -5.532504081726074, |
|
"logits/rejected": -5.148187160491943, |
|
"logps/chosen": -519.8585815429688, |
|
"logps/rejected": -442.80352783203125, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.018877673894166946, |
|
"rewards/margins": 0.41989025473594666, |
|
"rewards/rejected": -0.4010125696659088, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.302527860831748e-08, |
|
"logits/chosen": -5.246006488800049, |
|
"logits/rejected": -4.628513336181641, |
|
"logps/chosen": -557.8768310546875, |
|
"logps/rejected": -396.80230712890625, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0534333661198616, |
|
"rewards/margins": 0.49642905592918396, |
|
"rewards/rejected": -0.44299569725990295, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1666213645012227e-08, |
|
"logits/chosen": -5.454456806182861, |
|
"logits/rejected": -4.73259973526001, |
|
"logps/chosen": -597.5782470703125, |
|
"logps/rejected": -388.52789306640625, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09610459953546524, |
|
"rewards/margins": 0.6527958512306213, |
|
"rewards/rejected": -0.5566911697387695, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.030714868170698e-08, |
|
"logits/chosen": -5.269309043884277, |
|
"logits/rejected": -4.4948835372924805, |
|
"logps/chosen": -625.8494873046875, |
|
"logps/rejected": -416.91815185546875, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.13563071191310883, |
|
"rewards/margins": 0.5831824541091919, |
|
"rewards/rejected": -0.44755178689956665, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.894808371840174e-08, |
|
"logits/chosen": -4.872903347015381, |
|
"logits/rejected": -5.047135353088379, |
|
"logps/chosen": -538.2395629882812, |
|
"logps/rejected": -488.65966796875, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1966354250907898, |
|
"rewards/margins": 0.6410555839538574, |
|
"rewards/rejected": -0.4444200396537781, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.758901875509649e-08, |
|
"logits/chosen": -5.2469682693481445, |
|
"logits/rejected": -4.6465582847595215, |
|
"logps/chosen": -665.9546508789062, |
|
"logps/rejected": -488.3631286621094, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.06325142085552216, |
|
"rewards/margins": 0.4522647261619568, |
|
"rewards/rejected": -0.3890133202075958, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6229953791791247e-08, |
|
"logits/chosen": -5.30975341796875, |
|
"logits/rejected": -5.214649677276611, |
|
"logps/chosen": -625.4133911132812, |
|
"logps/rejected": -527.6849365234375, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.031594168394804, |
|
"rewards/margins": 0.4863681197166443, |
|
"rewards/rejected": -0.4547739028930664, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4870888828486002e-08, |
|
"logits/chosen": -5.154933929443359, |
|
"logits/rejected": -4.615642547607422, |
|
"logps/chosen": -566.6359252929688, |
|
"logps/rejected": -367.35272216796875, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.25163334608078003, |
|
"rewards/margins": 0.7026754021644592, |
|
"rewards/rejected": -0.4510420262813568, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3511823865180753e-08, |
|
"logits/chosen": -4.935416221618652, |
|
"logits/rejected": -4.931743621826172, |
|
"logps/chosen": -531.6405029296875, |
|
"logps/rejected": -465.2660217285156, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0852300152182579, |
|
"rewards/margins": 0.6420767903327942, |
|
"rewards/rejected": -0.5568467378616333, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.215275890187551e-08, |
|
"logits/chosen": -5.1003828048706055, |
|
"logits/rejected": -4.942370891571045, |
|
"logps/chosen": -572.6801147460938, |
|
"logps/rejected": -499.9962463378906, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.14501488208770752, |
|
"rewards/margins": 0.6089879274368286, |
|
"rewards/rejected": -0.46397310495376587, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0793693938570265e-08, |
|
"logits/chosen": -5.139072895050049, |
|
"logits/rejected": -4.563143253326416, |
|
"logps/chosen": -524.9754028320312, |
|
"logps/rejected": -419.37274169921875, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.030956823378801346, |
|
"rewards/margins": 0.5671139359474182, |
|
"rewards/rejected": -0.5980707406997681, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9434628975265016e-08, |
|
"logits/chosen": -5.675312042236328, |
|
"logits/rejected": -4.652360439300537, |
|
"logps/chosen": -580.7689208984375, |
|
"logps/rejected": -404.6139221191406, |
|
"loss": 0.5862, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.012955671176314354, |
|
"rewards/margins": 0.391316294670105, |
|
"rewards/rejected": -0.4042719900608063, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.807556401195977e-08, |
|
"logits/chosen": -5.271977424621582, |
|
"logits/rejected": -4.812222480773926, |
|
"logps/chosen": -672.85986328125, |
|
"logps/rejected": -450.5704040527344, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.11036734282970428, |
|
"rewards/margins": 0.5794622898101807, |
|
"rewards/rejected": -0.4690949320793152, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6716499048654525e-08, |
|
"logits/chosen": -5.003756523132324, |
|
"logits/rejected": -4.732114315032959, |
|
"logps/chosen": -636.2298583984375, |
|
"logps/rejected": -450.95172119140625, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03545001894235611, |
|
"rewards/margins": 0.4769270420074463, |
|
"rewards/rejected": -0.44147706031799316, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.535743408534928e-08, |
|
"logits/chosen": -4.955419063568115, |
|
"logits/rejected": -5.042864799499512, |
|
"logps/chosen": -597.6549072265625, |
|
"logps/rejected": -529.520263671875, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.06296499073505402, |
|
"rewards/margins": 0.43164101243019104, |
|
"rewards/rejected": -0.36867600679397583, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3998369122044032e-08, |
|
"logits/chosen": -4.982466697692871, |
|
"logits/rejected": -5.189105987548828, |
|
"logps/chosen": -566.3472290039062, |
|
"logps/rejected": -493.54974365234375, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.027576932683587074, |
|
"rewards/margins": 0.46042051911354065, |
|
"rewards/rejected": -0.4328436255455017, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2639304158738788e-08, |
|
"logits/chosen": -5.176326274871826, |
|
"logits/rejected": -4.8516364097595215, |
|
"logps/chosen": -625.2818603515625, |
|
"logps/rejected": -434.9867248535156, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.22398793697357178, |
|
"rewards/margins": 0.6402627229690552, |
|
"rewards/rejected": -0.41627463698387146, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1280239195433542e-08, |
|
"logits/chosen": -4.825104236602783, |
|
"logits/rejected": -4.390969276428223, |
|
"logps/chosen": -642.5945434570312, |
|
"logps/rejected": -465.6443786621094, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.14697226881980896, |
|
"rewards/margins": 0.5980256199836731, |
|
"rewards/rejected": -0.4510533809661865, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.921174232128295e-09, |
|
"logits/chosen": -5.257870674133301, |
|
"logits/rejected": -4.686526298522949, |
|
"logps/chosen": -617.8099365234375, |
|
"logps/rejected": -431.343017578125, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1597820371389389, |
|
"rewards/margins": 0.6843008995056152, |
|
"rewards/rejected": -0.5245188474655151, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.562109268823049e-09, |
|
"logits/chosen": -5.258317470550537, |
|
"logits/rejected": -5.140551567077637, |
|
"logps/chosen": -546.1046752929688, |
|
"logps/rejected": -436.4046936035156, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.011624747887253761, |
|
"rewards/margins": 0.36088308691978455, |
|
"rewards/rejected": -0.37250787019729614, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.203044305517803e-09, |
|
"logits/chosen": -4.875320911407471, |
|
"logits/rejected": -4.910046577453613, |
|
"logps/chosen": -613.1431884765625, |
|
"logps/rejected": -506.95098876953125, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1426512897014618, |
|
"rewards/margins": 0.5562176704406738, |
|
"rewards/rejected": -0.4135662913322449, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.843979342212558e-09, |
|
"logits/chosen": -4.8537468910217285, |
|
"logits/rejected": -4.501283645629883, |
|
"logps/chosen": -642.3465576171875, |
|
"logps/rejected": -467.0890197753906, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1156611293554306, |
|
"rewards/margins": 0.49272075295448303, |
|
"rewards/rejected": -0.37705960869789124, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.4849143789073114e-09, |
|
"logits/chosen": -5.545628070831299, |
|
"logits/rejected": -5.529524803161621, |
|
"logps/chosen": -538.4314575195312, |
|
"logps/rejected": -463.7015686035156, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.012657539919018745, |
|
"rewards/margins": 0.49136465787887573, |
|
"rewards/rejected": -0.4787071645259857, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.1258494156020658e-09, |
|
"logits/chosen": -5.1411237716674805, |
|
"logits/rejected": -5.270503520965576, |
|
"logps/chosen": -614.623046875, |
|
"logps/rejected": -475.93975830078125, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.20060591399669647, |
|
"rewards/margins": 0.6371750831604004, |
|
"rewards/rejected": -0.4365692734718323, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7667844522968197e-09, |
|
"logits/chosen": -5.237704753875732, |
|
"logits/rejected": -4.802920818328857, |
|
"logps/chosen": -578.4732666015625, |
|
"logps/rejected": -398.55804443359375, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.045068494975566864, |
|
"rewards/margins": 0.4510710835456848, |
|
"rewards/rejected": -0.4961395263671875, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.077194889915738e-10, |
|
"logits/chosen": -5.071371555328369, |
|
"logits/rejected": -4.778379917144775, |
|
"logps/chosen": -552.7120361328125, |
|
"logps/rejected": -491.9295349121094, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.027319246903061867, |
|
"rewards/margins": 0.4523466229438782, |
|
"rewards/rejected": -0.4796658456325531, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3873, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5843773977780509, |
|
"train_runtime": 4895.0541, |
|
"train_samples_per_second": 12.659, |
|
"train_steps_per_second": 0.791 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3873, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|