|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1563, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.1847133757961784e-09, |
|
"logits/generated": -2.8295888900756836, |
|
"logits/real": -2.849569320678711, |
|
"logps/generated": -609.6478881835938, |
|
"logps/real": -115.27798461914062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logits/generated": -2.6501715183258057, |
|
"logits/real": -2.821021318435669, |
|
"logps/generated": -688.5430297851562, |
|
"logps/real": -131.44122314453125, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6388888955116272, |
|
"rewards/generated": -0.34704259037971497, |
|
"rewards/margins": 0.3495745360851288, |
|
"rewards/real": 0.0025319471023976803, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logits/generated": -2.714224338531494, |
|
"logits/real": -2.780827522277832, |
|
"logps/generated": -677.5689086914062, |
|
"logps/real": -129.50588989257812, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.4221687316894531, |
|
"rewards/margins": 1.4617396593093872, |
|
"rewards/real": 0.039571087807416916, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logits/generated": -2.7694613933563232, |
|
"logits/real": -2.860546588897705, |
|
"logps/generated": -698.780517578125, |
|
"logps/real": -139.16226196289062, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.204209089279175, |
|
"rewards/margins": 2.362700939178467, |
|
"rewards/real": 0.15849189460277557, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logits/generated": -2.678589344024658, |
|
"logits/real": -2.7937369346618652, |
|
"logps/generated": -756.0526123046875, |
|
"logps/real": -132.27268981933594, |
|
"loss": 0.1129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.11299467086792, |
|
"rewards/margins": 4.405646800994873, |
|
"rewards/real": 0.29265230894088745, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logits/generated": -2.719285726547241, |
|
"logits/real": -2.8095831871032715, |
|
"logps/generated": -749.7476806640625, |
|
"logps/real": -123.66046142578125, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.803214073181152, |
|
"rewards/margins": 5.2087225914001465, |
|
"rewards/real": 0.4055088460445404, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logits/generated": -2.685657024383545, |
|
"logits/real": -2.817525625228882, |
|
"logps/generated": -743.1283569335938, |
|
"logps/real": -132.15084838867188, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.882467746734619, |
|
"rewards/margins": 5.401439189910889, |
|
"rewards/real": 0.5189720392227173, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logits/generated": -2.6736221313476562, |
|
"logits/real": -2.782536029815674, |
|
"logps/generated": -779.1280517578125, |
|
"logps/real": -136.8399200439453, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.401805400848389, |
|
"rewards/margins": 5.999195098876953, |
|
"rewards/real": 0.597389817237854, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logits/generated": -2.716283082962036, |
|
"logits/real": -2.7836098670959473, |
|
"logps/generated": -763.261962890625, |
|
"logps/real": -121.11332702636719, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.937032222747803, |
|
"rewards/margins": 6.565484046936035, |
|
"rewards/real": 0.6284510493278503, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logits/generated": -2.752387285232544, |
|
"logits/real": -2.8120365142822266, |
|
"logps/generated": -760.432373046875, |
|
"logps/real": -132.00917053222656, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.373486518859863, |
|
"rewards/margins": 7.068659782409668, |
|
"rewards/real": 0.6951735615730286, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logits/generated": -2.702080488204956, |
|
"logits/real": -2.792630910873413, |
|
"logps/generated": -744.688232421875, |
|
"logps/real": -120.1148910522461, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.977693557739258, |
|
"rewards/margins": 6.694817543029785, |
|
"rewards/real": 0.7171245217323303, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logits/generated": -2.6754021644592285, |
|
"logits/real": -2.786886215209961, |
|
"logps/generated": -726.2047119140625, |
|
"logps/real": -138.05221557617188, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.919286251068115, |
|
"rewards/margins": 7.560235500335693, |
|
"rewards/real": 0.6409494280815125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logits/generated": -2.7799072265625, |
|
"logits/real": -2.834855556488037, |
|
"logps/generated": -731.4354248046875, |
|
"logps/real": -123.99066162109375, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.255575656890869, |
|
"rewards/margins": 7.035998344421387, |
|
"rewards/real": 0.7804235219955444, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logits/generated": -2.695237398147583, |
|
"logits/real": -2.808246612548828, |
|
"logps/generated": -781.197265625, |
|
"logps/real": -134.5619354248047, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.42046594619751, |
|
"rewards/margins": 8.036266326904297, |
|
"rewards/real": 0.6158010363578796, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logits/generated": -2.690974712371826, |
|
"logits/real": -2.7843871116638184, |
|
"logps/generated": -769.6416625976562, |
|
"logps/real": -118.09663391113281, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.030922889709473, |
|
"rewards/margins": 8.821279525756836, |
|
"rewards/real": 0.7903567552566528, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logits/generated": -2.6999502182006836, |
|
"logits/real": -2.845780372619629, |
|
"logps/generated": -756.4619750976562, |
|
"logps/real": -128.97555541992188, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.640128135681152, |
|
"rewards/margins": 8.349076271057129, |
|
"rewards/real": 0.7089481353759766, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.989331436699858e-07, |
|
"logits/generated": -2.770327568054199, |
|
"logits/real": -2.7911763191223145, |
|
"logps/generated": -772.9364624023438, |
|
"logps/real": -125.63675689697266, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.297707557678223, |
|
"rewards/margins": 9.060877799987793, |
|
"rewards/real": 0.7631710767745972, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.953769559032717e-07, |
|
"logits/generated": -2.700230121612549, |
|
"logits/real": -2.770098924636841, |
|
"logps/generated": -789.2719116210938, |
|
"logps/real": -118.48409271240234, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.936752319335938, |
|
"rewards/margins": 9.651647567749023, |
|
"rewards/real": 0.7148973345756531, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918207681365576e-07, |
|
"logits/generated": -2.715512752532959, |
|
"logits/real": -2.8020758628845215, |
|
"logps/generated": -785.4468383789062, |
|
"logps/real": -141.07501220703125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.002473831176758, |
|
"rewards/margins": 9.73188591003418, |
|
"rewards/real": 0.7294121980667114, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.882645803698435e-07, |
|
"logits/generated": -2.7685036659240723, |
|
"logits/real": -2.789585590362549, |
|
"logps/generated": -773.3211669921875, |
|
"logps/real": -132.91026306152344, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.269811630249023, |
|
"rewards/margins": 10.108678817749023, |
|
"rewards/real": 0.8388668298721313, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.847083926031294e-07, |
|
"logits/generated": -2.7428407669067383, |
|
"logits/real": -2.7933878898620605, |
|
"logps/generated": -822.3059692382812, |
|
"logps/real": -119.96354675292969, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.959307670593262, |
|
"rewards/margins": 10.764963150024414, |
|
"rewards/real": 0.8056550025939941, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.811522048364154e-07, |
|
"logits/generated": -2.6588082313537598, |
|
"logits/real": -2.753288984298706, |
|
"logps/generated": -809.4946899414062, |
|
"logps/real": -112.04827880859375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.868528366088867, |
|
"rewards/margins": 10.617731094360352, |
|
"rewards/real": 0.7492026090621948, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.775960170697012e-07, |
|
"logits/generated": -2.6897635459899902, |
|
"logits/real": -2.770383596420288, |
|
"logps/generated": -840.3517456054688, |
|
"logps/real": -111.0347900390625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.592704772949219, |
|
"rewards/margins": 11.492315292358398, |
|
"rewards/real": 0.8996096849441528, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7403982930298717e-07, |
|
"logits/generated": -2.7826247215270996, |
|
"logits/real": -2.786170482635498, |
|
"logps/generated": -804.2281494140625, |
|
"logps/real": -119.8484115600586, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.724154472351074, |
|
"rewards/margins": 10.552526473999023, |
|
"rewards/real": 0.8283706903457642, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7048364153627306e-07, |
|
"logits/generated": -2.7883107662200928, |
|
"logits/real": -2.7733795642852783, |
|
"logps/generated": -787.4295654296875, |
|
"logps/real": -148.79747009277344, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.756797790527344, |
|
"rewards/margins": 10.487835884094238, |
|
"rewards/real": 0.7310384511947632, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.66927453769559e-07, |
|
"logits/generated": -2.7394678592681885, |
|
"logits/real": -2.790409803390503, |
|
"logps/generated": -819.8644409179688, |
|
"logps/real": -116.63028717041016, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.328279495239258, |
|
"rewards/margins": 11.112763404846191, |
|
"rewards/real": 0.7844842672348022, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.633712660028449e-07, |
|
"logits/generated": -2.727818012237549, |
|
"logits/real": -2.7909488677978516, |
|
"logps/generated": -797.3067016601562, |
|
"logps/real": -117.8537826538086, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.560891151428223, |
|
"rewards/margins": 11.291653633117676, |
|
"rewards/real": 0.7307616472244263, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5981507823613085e-07, |
|
"logits/generated": -2.767531633377075, |
|
"logits/real": -2.7978005409240723, |
|
"logps/generated": -810.1624755859375, |
|
"logps/real": -121.3199691772461, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.349993705749512, |
|
"rewards/margins": 11.145318984985352, |
|
"rewards/real": 0.7953254580497742, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.562588904694168e-07, |
|
"logits/generated": -2.778958559036255, |
|
"logits/real": -2.767667293548584, |
|
"logps/generated": -821.5338134765625, |
|
"logps/real": -130.97152709960938, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.092814445495605, |
|
"rewards/margins": 11.828493118286133, |
|
"rewards/real": 0.7356794476509094, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5270270270270264e-07, |
|
"logits/generated": -2.732811450958252, |
|
"logits/real": -2.771510601043701, |
|
"logps/generated": -757.4833374023438, |
|
"logps/real": -126.4487075805664, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.417075157165527, |
|
"rewards/margins": 11.195660591125488, |
|
"rewards/real": 0.7785850167274475, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.491465149359886e-07, |
|
"logits/generated": -2.713351249694824, |
|
"logits/real": -2.756260395050049, |
|
"logps/generated": -806.474609375, |
|
"logps/real": -118.60646057128906, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.823250770568848, |
|
"rewards/margins": 12.591789245605469, |
|
"rewards/real": 0.7685383558273315, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4559032716927454e-07, |
|
"logits/generated": -2.8016388416290283, |
|
"logits/real": -2.796672821044922, |
|
"logps/generated": -763.6119995117188, |
|
"logps/real": -142.29685974121094, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.680870056152344, |
|
"rewards/margins": 11.322160720825195, |
|
"rewards/real": 0.6412909030914307, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.420341394025605e-07, |
|
"logits/generated": -2.7270781993865967, |
|
"logits/real": -2.7888123989105225, |
|
"logps/generated": -799.5844116210938, |
|
"logps/real": -130.8887481689453, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.834752082824707, |
|
"rewards/margins": 12.512211799621582, |
|
"rewards/real": 0.6774585247039795, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.384779516358463e-07, |
|
"logits/generated": -2.79298734664917, |
|
"logits/real": -2.784541606903076, |
|
"logps/generated": -788.7904052734375, |
|
"logps/real": -134.79293823242188, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.506429672241211, |
|
"rewards/margins": 12.11829948425293, |
|
"rewards/real": 0.6118704080581665, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3492176386913227e-07, |
|
"logits/generated": -2.8363544940948486, |
|
"logits/real": -2.8054802417755127, |
|
"logps/generated": -768.1728515625, |
|
"logps/real": -139.9955291748047, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.797532081604004, |
|
"rewards/margins": 12.470538139343262, |
|
"rewards/real": 0.6730067133903503, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.313655761024182e-07, |
|
"logits/generated": -2.7400827407836914, |
|
"logits/real": -2.7338576316833496, |
|
"logps/generated": -811.0081787109375, |
|
"logps/real": -131.45535278320312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.710081100463867, |
|
"rewards/margins": 13.333477973937988, |
|
"rewards/real": 0.6233970522880554, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278093883357041e-07, |
|
"logits/generated": -2.776153087615967, |
|
"logits/real": -2.750797748565674, |
|
"logps/generated": -819.32666015625, |
|
"logps/real": -126.93217468261719, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.181897163391113, |
|
"rewards/margins": 12.906854629516602, |
|
"rewards/real": 0.7249582409858704, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2425320056899e-07, |
|
"logits/generated": -2.7904210090637207, |
|
"logits/real": -2.7961840629577637, |
|
"logps/generated": -789.5787963867188, |
|
"logps/real": -147.4117431640625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.834062576293945, |
|
"rewards/margins": 12.519264221191406, |
|
"rewards/real": 0.6852015256881714, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2069701280227595e-07, |
|
"logits/generated": -2.750471830368042, |
|
"logits/real": -2.772777795791626, |
|
"logps/generated": -819.3651123046875, |
|
"logps/real": -135.7245330810547, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.894061088562012, |
|
"rewards/margins": 13.609522819519043, |
|
"rewards/real": 0.7154618501663208, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1714082503556185e-07, |
|
"logits/generated": -2.7443814277648926, |
|
"logits/real": -2.745856285095215, |
|
"logps/generated": -827.7913208007812, |
|
"logps/real": -126.5484390258789, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.79316234588623, |
|
"rewards/margins": 14.394973754882812, |
|
"rewards/real": 0.601812481880188, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.135846372688478e-07, |
|
"logits/generated": -2.812390089035034, |
|
"logits/real": -2.73819637298584, |
|
"logps/generated": -808.9031982421875, |
|
"logps/real": -131.18746948242188, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.755112648010254, |
|
"rewards/margins": 13.416218757629395, |
|
"rewards/real": 0.6611047983169556, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.100284495021337e-07, |
|
"logits/generated": -2.820923089981079, |
|
"logits/real": -2.7527689933776855, |
|
"logps/generated": -823.16796875, |
|
"logps/real": -127.68003845214844, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.786894798278809, |
|
"rewards/margins": 13.430368423461914, |
|
"rewards/real": 0.6434718370437622, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.064722617354196e-07, |
|
"logits/generated": -2.678879737854004, |
|
"logits/real": -2.7136893272399902, |
|
"logps/generated": -872.8970947265625, |
|
"logps/real": -131.42593383789062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.947868347167969, |
|
"rewards/margins": 14.58533000946045, |
|
"rewards/real": 0.637461245059967, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0291607396870553e-07, |
|
"logits/generated": -2.7224462032318115, |
|
"logits/real": -2.7085747718811035, |
|
"logps/generated": -834.8814697265625, |
|
"logps/real": -122.5090560913086, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.601274490356445, |
|
"rewards/margins": 14.206278800964355, |
|
"rewards/real": 0.6050056219100952, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.993598862019915e-07, |
|
"logits/generated": -2.783932685852051, |
|
"logits/real": -2.7148799896240234, |
|
"logps/generated": -828.7579956054688, |
|
"logps/real": -125.92060852050781, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.426411628723145, |
|
"rewards/margins": 14.101513862609863, |
|
"rewards/real": 0.6751025915145874, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9580369843527737e-07, |
|
"logits/generated": -2.774967670440674, |
|
"logits/real": -2.701488971710205, |
|
"logps/generated": -873.3244018554688, |
|
"logps/real": -123.95247650146484, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.389918327331543, |
|
"rewards/margins": 14.915544509887695, |
|
"rewards/real": 0.525626540184021, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9224751066856327e-07, |
|
"logits/generated": -2.735586643218994, |
|
"logits/real": -2.7377548217773438, |
|
"logps/generated": -805.3878173828125, |
|
"logps/real": -155.18560791015625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.252492904663086, |
|
"rewards/margins": 14.655688285827637, |
|
"rewards/real": 0.4031934142112732, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.886913229018492e-07, |
|
"logits/generated": -2.7526440620422363, |
|
"logits/real": -2.7074286937713623, |
|
"logps/generated": -821.6927490234375, |
|
"logps/real": -128.5563507080078, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.784818649291992, |
|
"rewards/margins": 15.3468599319458, |
|
"rewards/real": 0.562040388584137, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.851351351351351e-07, |
|
"logits/generated": -2.7571702003479004, |
|
"logits/real": -2.737308979034424, |
|
"logps/generated": -852.9948120117188, |
|
"logps/real": -132.78759765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.543283462524414, |
|
"rewards/margins": 16.045238494873047, |
|
"rewards/real": 0.5019546747207642, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8157894736842105e-07, |
|
"logits/generated": -2.706204891204834, |
|
"logits/real": -2.6772992610931396, |
|
"logps/generated": -849.1552734375, |
|
"logps/real": -120.03173828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.504430770874023, |
|
"rewards/margins": 15.993593215942383, |
|
"rewards/real": 0.4891592860221863, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7802275960170695e-07, |
|
"logits/generated": -2.7938857078552246, |
|
"logits/real": -2.740180253982544, |
|
"logps/generated": -782.1716918945312, |
|
"logps/real": -129.24673461914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.547981262207031, |
|
"rewards/margins": 15.090237617492676, |
|
"rewards/real": 0.5422547459602356, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/generated": -2.761601686477661, |
|
"eval_logits/real": -2.705458402633667, |
|
"eval_logps/generated": -827.1112670898438, |
|
"eval_logps/real": -130.07472229003906, |
|
"eval_loss": 5.503268039319664e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -14.911882400512695, |
|
"eval_rewards/margins": 15.304994583129883, |
|
"eval_rewards/real": 0.3931117355823517, |
|
"eval_runtime": 66.014, |
|
"eval_samples_per_second": 7.574, |
|
"eval_steps_per_second": 0.242, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7446657183499284e-07, |
|
"logits/generated": -2.8136253356933594, |
|
"logits/real": -2.669490337371826, |
|
"logps/generated": -792.15380859375, |
|
"logps/real": -127.56230163574219, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.737091064453125, |
|
"rewards/margins": 15.237916946411133, |
|
"rewards/real": 0.5008259415626526, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.709103840682788e-07, |
|
"logits/generated": -2.787266731262207, |
|
"logits/real": -2.670997142791748, |
|
"logps/generated": -824.7960205078125, |
|
"logps/real": -124.60465240478516, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.136065483093262, |
|
"rewards/margins": 15.58125114440918, |
|
"rewards/real": 0.4451850950717926, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6735419630156474e-07, |
|
"logits/generated": -2.704446315765381, |
|
"logits/real": -2.6110129356384277, |
|
"logps/generated": -878.9093017578125, |
|
"logps/real": -118.95606994628906, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.33749008178711, |
|
"rewards/margins": 16.747507095336914, |
|
"rewards/real": 0.41001471877098083, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.637980085348506e-07, |
|
"logits/generated": -2.794490098953247, |
|
"logits/real": -2.6710212230682373, |
|
"logps/generated": -834.6387939453125, |
|
"logps/real": -130.42050170898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.052263259887695, |
|
"rewards/margins": 15.429656982421875, |
|
"rewards/real": 0.37739241123199463, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.602418207681365e-07, |
|
"logits/generated": -2.7407026290893555, |
|
"logits/real": -2.6537132263183594, |
|
"logps/generated": -880.5545043945312, |
|
"logps/real": -135.30288696289062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.217912673950195, |
|
"rewards/margins": 16.690380096435547, |
|
"rewards/real": 0.47246813774108887, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5668563300142247e-07, |
|
"logits/generated": -2.81174898147583, |
|
"logits/real": -2.6751866340637207, |
|
"logps/generated": -854.27490234375, |
|
"logps/real": -126.11138916015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.003740310668945, |
|
"rewards/margins": 16.378582000732422, |
|
"rewards/real": 0.3748398423194885, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5312944523470837e-07, |
|
"logits/generated": -2.741673469543457, |
|
"logits/real": -2.5907273292541504, |
|
"logps/generated": -888.6085815429688, |
|
"logps/real": -128.69569396972656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.06721305847168, |
|
"rewards/margins": 17.444976806640625, |
|
"rewards/real": 0.37776434421539307, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.495732574679943e-07, |
|
"logits/generated": -2.7112770080566406, |
|
"logits/real": -2.647355794906616, |
|
"logps/generated": -842.6921997070312, |
|
"logps/real": -126.5383529663086, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.2426815032959, |
|
"rewards/margins": 16.684232711791992, |
|
"rewards/real": 0.4415510296821594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.460170697012802e-07, |
|
"logits/generated": -2.7611031532287598, |
|
"logits/real": -2.587040901184082, |
|
"logps/generated": -856.2335815429688, |
|
"logps/real": -131.83444213867188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.540231704711914, |
|
"rewards/margins": 16.8332576751709, |
|
"rewards/real": 0.29302695393562317, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.424608819345661e-07, |
|
"logits/generated": -2.749112844467163, |
|
"logits/real": -2.5918571949005127, |
|
"logps/generated": -806.6871948242188, |
|
"logps/real": -124.9672622680664, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.760574340820312, |
|
"rewards/margins": 16.10599136352539, |
|
"rewards/real": 0.3454182744026184, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3890469416785205e-07, |
|
"logits/generated": -2.7701778411865234, |
|
"logits/real": -2.6194324493408203, |
|
"logps/generated": -830.5565185546875, |
|
"logps/real": -129.43264770507812, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.803186416625977, |
|
"rewards/margins": 17.231233596801758, |
|
"rewards/real": 0.42804789543151855, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.35348506401138e-07, |
|
"logits/generated": -2.814532995223999, |
|
"logits/real": -2.5871658325195312, |
|
"logps/generated": -834.9091796875, |
|
"logps/real": -127.82197570800781, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.46357536315918, |
|
"rewards/margins": 17.733022689819336, |
|
"rewards/real": 0.2694476246833801, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3179231863442384e-07, |
|
"logits/generated": -2.748018741607666, |
|
"logits/real": -2.5878210067749023, |
|
"logps/generated": -869.302734375, |
|
"logps/real": -141.6997833251953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.122358322143555, |
|
"rewards/margins": 18.34942626953125, |
|
"rewards/real": 0.22706761956214905, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.282361308677098e-07, |
|
"logits/generated": -2.792604684829712, |
|
"logits/real": -2.5345077514648438, |
|
"logps/generated": -875.5255737304688, |
|
"logps/real": -133.2880096435547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.701038360595703, |
|
"rewards/margins": 19.063264846801758, |
|
"rewards/real": 0.3622281849384308, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2467994310099573e-07, |
|
"logits/generated": -2.775300979614258, |
|
"logits/real": -2.560939311981201, |
|
"logps/generated": -871.24853515625, |
|
"logps/real": -142.5642852783203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.207752227783203, |
|
"rewards/margins": 18.60503387451172, |
|
"rewards/real": 0.3972865343093872, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.211237553342817e-07, |
|
"logits/generated": -2.835697650909424, |
|
"logits/real": -2.570935010910034, |
|
"logps/generated": -844.2117309570312, |
|
"logps/real": -129.60166931152344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.626232147216797, |
|
"rewards/margins": 17.888334274291992, |
|
"rewards/real": 0.26210257411003113, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.175675675675675e-07, |
|
"logits/generated": -2.7552971839904785, |
|
"logits/real": -2.5506412982940674, |
|
"logps/generated": -894.5694580078125, |
|
"logps/real": -119.1685791015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.028125762939453, |
|
"rewards/margins": 19.389427185058594, |
|
"rewards/real": 0.36130291223526, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1401137980085347e-07, |
|
"logits/generated": -2.7464940547943115, |
|
"logits/real": -2.5819218158721924, |
|
"logps/generated": -898.6015625, |
|
"logps/real": -131.2238311767578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.07114601135254, |
|
"rewards/margins": 19.419193267822266, |
|
"rewards/real": 0.34804823994636536, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.104551920341394e-07, |
|
"logits/generated": -2.7470998764038086, |
|
"logits/real": -2.5765349864959717, |
|
"logps/generated": -885.7230224609375, |
|
"logps/real": -134.91915893554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.37563705444336, |
|
"rewards/margins": 18.524120330810547, |
|
"rewards/real": 0.14848431944847107, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.068990042674253e-07, |
|
"logits/generated": -2.734856128692627, |
|
"logits/real": -2.512298107147217, |
|
"logps/generated": -853.0060424804688, |
|
"logps/real": -120.59394836425781, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.26091194152832, |
|
"rewards/margins": 18.471248626708984, |
|
"rewards/real": 0.21033525466918945, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.033428165007112e-07, |
|
"logits/generated": -2.7379255294799805, |
|
"logits/real": -2.524719715118408, |
|
"logps/generated": -912.4431762695312, |
|
"logps/real": -123.26702880859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.675243377685547, |
|
"rewards/margins": 19.047819137573242, |
|
"rewards/real": 0.37257617712020874, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9978662873399715e-07, |
|
"logits/generated": -2.7684216499328613, |
|
"logits/real": -2.531463146209717, |
|
"logps/generated": -881.7340698242188, |
|
"logps/real": -134.31008911132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.38132667541504, |
|
"rewards/margins": 19.59175682067871, |
|
"rewards/real": 0.21043212711811066, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9623044096728305e-07, |
|
"logits/generated": -2.7709155082702637, |
|
"logits/real": -2.562648057937622, |
|
"logps/generated": -887.0978393554688, |
|
"logps/real": -145.66043090820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.221771240234375, |
|
"rewards/margins": 19.359745025634766, |
|
"rewards/real": 0.13797567784786224, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.92674253200569e-07, |
|
"logits/generated": -2.8770792484283447, |
|
"logits/real": -2.5876846313476562, |
|
"logps/generated": -835.0736083984375, |
|
"logps/real": -131.42913818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.772159576416016, |
|
"rewards/margins": 19.107730865478516, |
|
"rewards/real": 0.33557194471359253, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8911806543385494e-07, |
|
"logits/generated": -2.734930992126465, |
|
"logits/real": -2.5578300952911377, |
|
"logps/generated": -874.3038330078125, |
|
"logps/real": -145.36695861816406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.031780242919922, |
|
"rewards/margins": 20.371191024780273, |
|
"rewards/real": 0.33940908312797546, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.855618776671408e-07, |
|
"logits/generated": -2.6914491653442383, |
|
"logits/real": -2.576624631881714, |
|
"logps/generated": -893.9830322265625, |
|
"logps/real": -153.35513305664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.32299041748047, |
|
"rewards/margins": 19.533132553100586, |
|
"rewards/real": 0.21014323830604553, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8200568990042673e-07, |
|
"logits/generated": -2.7508046627044678, |
|
"logits/real": -2.489609956741333, |
|
"logps/generated": -886.3199462890625, |
|
"logps/real": -132.56236267089844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.033309936523438, |
|
"rewards/margins": 20.369976043701172, |
|
"rewards/real": 0.3366653025150299, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.784495021337127e-07, |
|
"logits/generated": -2.7089645862579346, |
|
"logits/real": -2.4881272315979004, |
|
"logps/generated": -903.6559448242188, |
|
"logps/real": -122.16401672363281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.558847427368164, |
|
"rewards/margins": 19.884899139404297, |
|
"rewards/real": 0.32605427503585815, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7489331436699857e-07, |
|
"logits/generated": -2.7490928173065186, |
|
"logits/real": -2.536649703979492, |
|
"logps/generated": -893.4107666015625, |
|
"logps/real": -141.30215454101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.618242263793945, |
|
"rewards/margins": 19.87813949584961, |
|
"rewards/real": 0.25990021228790283, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7133712660028446e-07, |
|
"logits/generated": -2.754713535308838, |
|
"logits/real": -2.4825220108032227, |
|
"logps/generated": -877.2716064453125, |
|
"logps/real": -122.8569107055664, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.619098663330078, |
|
"rewards/margins": 19.970170974731445, |
|
"rewards/real": 0.35107091069221497, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.677809388335704e-07, |
|
"logits/generated": -2.7757420539855957, |
|
"logits/real": -2.5053367614746094, |
|
"logps/generated": -907.7996826171875, |
|
"logps/real": -124.29368591308594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.24595069885254, |
|
"rewards/margins": 20.557300567626953, |
|
"rewards/real": 0.3113483488559723, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.642247510668563e-07, |
|
"logits/generated": -2.7591538429260254, |
|
"logits/real": -2.488976001739502, |
|
"logps/generated": -878.0640869140625, |
|
"logps/real": -129.9168701171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.200183868408203, |
|
"rewards/margins": 20.459766387939453, |
|
"rewards/real": 0.25958216190338135, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6066856330014225e-07, |
|
"logits/generated": -2.717893600463867, |
|
"logits/real": -2.475963830947876, |
|
"logps/generated": -886.1345825195312, |
|
"logps/real": -136.38031005859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.064132690429688, |
|
"rewards/margins": 21.22158432006836, |
|
"rewards/real": 0.1574556827545166, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5711237553342815e-07, |
|
"logits/generated": -2.7576115131378174, |
|
"logits/real": -2.520620107650757, |
|
"logps/generated": -919.7344970703125, |
|
"logps/real": -146.73341369628906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.55472755432129, |
|
"rewards/margins": 21.70217514038086, |
|
"rewards/real": 0.14745107293128967, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5355618776671404e-07, |
|
"logits/generated": -2.758734941482544, |
|
"logits/real": -2.4575321674346924, |
|
"logps/generated": -924.5079956054688, |
|
"logps/real": -137.92759704589844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.882726669311523, |
|
"rewards/margins": 21.117658615112305, |
|
"rewards/real": 0.23493008315563202, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5e-07, |
|
"logits/generated": -2.76533579826355, |
|
"logits/real": -2.473336696624756, |
|
"logps/generated": -872.0582885742188, |
|
"logps/real": -128.65528869628906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.465499877929688, |
|
"rewards/margins": 20.760725021362305, |
|
"rewards/real": 0.29522615671157837, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4644381223328594e-07, |
|
"logits/generated": -2.8043251037597656, |
|
"logits/real": -2.4596962928771973, |
|
"logps/generated": -831.9053955078125, |
|
"logps/real": -136.89483642578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.220478057861328, |
|
"rewards/margins": 20.537092208862305, |
|
"rewards/real": 0.31661272048950195, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4288762446657183e-07, |
|
"logits/generated": -2.7661118507385254, |
|
"logits/real": -2.463319778442383, |
|
"logps/generated": -896.66796875, |
|
"logps/real": -140.58807373046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.672061920166016, |
|
"rewards/margins": 21.854246139526367, |
|
"rewards/real": 0.18218322098255157, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.393314366998578e-07, |
|
"logits/generated": -2.8111281394958496, |
|
"logits/real": -2.45881986618042, |
|
"logps/generated": -883.6256713867188, |
|
"logps/real": -133.40512084960938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.710323333740234, |
|
"rewards/margins": 22.020999908447266, |
|
"rewards/real": 0.3106769621372223, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3577524893314365e-07, |
|
"logits/generated": -2.7077133655548096, |
|
"logits/real": -2.4282584190368652, |
|
"logps/generated": -929.71923828125, |
|
"logps/real": -125.48017883300781, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.90422821044922, |
|
"rewards/margins": 22.404098510742188, |
|
"rewards/real": 0.49986690282821655, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.322190611664296e-07, |
|
"logits/generated": -2.8797926902770996, |
|
"logits/real": -2.449512004852295, |
|
"logps/generated": -898.6605224609375, |
|
"logps/real": -145.88131713867188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.765823364257812, |
|
"rewards/margins": 21.118236541748047, |
|
"rewards/real": 0.35241395235061646, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2866287339971549e-07, |
|
"logits/generated": -2.7840793132781982, |
|
"logits/real": -2.467308282852173, |
|
"logps/generated": -892.7574462890625, |
|
"logps/real": -133.6017303466797, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.309818267822266, |
|
"rewards/margins": 21.834148406982422, |
|
"rewards/real": 0.524328351020813, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.251066856330014e-07, |
|
"logits/generated": -2.7835748195648193, |
|
"logits/real": -2.430983781814575, |
|
"logps/generated": -926.97900390625, |
|
"logps/real": -121.66536712646484, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.335927963256836, |
|
"rewards/margins": 22.66562271118164, |
|
"rewards/real": 0.32969528436660767, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2155049786628733e-07, |
|
"logits/generated": -2.7988877296447754, |
|
"logits/real": -2.470797061920166, |
|
"logps/generated": -906.01904296875, |
|
"logps/real": -139.35302734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.41358757019043, |
|
"rewards/margins": 21.88725471496582, |
|
"rewards/real": 0.4736654758453369, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1799431009957325e-07, |
|
"logits/generated": -2.7693393230438232, |
|
"logits/real": -2.449216842651367, |
|
"logps/generated": -971.3763427734375, |
|
"logps/real": -120.10380554199219, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.712810516357422, |
|
"rewards/margins": 23.097557067871094, |
|
"rewards/real": 0.38474756479263306, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1443812233285914e-07, |
|
"logits/generated": -2.8496899604797363, |
|
"logits/real": -2.6208953857421875, |
|
"logps/generated": -853.5813598632812, |
|
"logps/real": -140.28988647460938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.387523651123047, |
|
"rewards/margins": 18.704341888427734, |
|
"rewards/real": 0.31681886315345764, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.108819345661451e-07, |
|
"logits/generated": -2.81449556350708, |
|
"logits/real": -2.7593271732330322, |
|
"logps/generated": -824.1788330078125, |
|
"logps/real": -129.34524536132812, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.003524780273438, |
|
"rewards/margins": 15.52760124206543, |
|
"rewards/real": 0.5240752100944519, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0732574679943098e-07, |
|
"logits/generated": -2.8338940143585205, |
|
"logits/real": -2.6659107208251953, |
|
"logps/generated": -828.3829956054688, |
|
"logps/real": -113.12556457519531, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.830032348632812, |
|
"rewards/margins": 16.479970932006836, |
|
"rewards/real": 0.6499394178390503, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0376955903271693e-07, |
|
"logits/generated": -2.8800129890441895, |
|
"logits/real": -2.7851107120513916, |
|
"logps/generated": -825.91015625, |
|
"logps/real": -145.60104370117188, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.22101879119873, |
|
"rewards/margins": 15.671483993530273, |
|
"rewards/real": 0.45046553015708923, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0021337126600283e-07, |
|
"logits/generated": -2.8783576488494873, |
|
"logits/real": -2.719095468521118, |
|
"logps/generated": -829.0347900390625, |
|
"logps/real": -117.2816162109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.659259796142578, |
|
"rewards/margins": 17.303306579589844, |
|
"rewards/real": 0.6440474390983582, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/generated": -2.8324971199035645, |
|
"eval_logits/real": -2.7463560104370117, |
|
"eval_logps/generated": -843.258544921875, |
|
"eval_logps/real": -129.80291748046875, |
|
"eval_loss": 2.3505108401877806e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -16.52660369873047, |
|
"eval_rewards/margins": 16.946895599365234, |
|
"eval_rewards/real": 0.42029163241386414, |
|
"eval_runtime": 65.6308, |
|
"eval_samples_per_second": 7.618, |
|
"eval_steps_per_second": 0.244, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9665718349928875e-07, |
|
"logits/generated": -2.7597239017486572, |
|
"logits/real": -2.70881986618042, |
|
"logps/generated": -878.791015625, |
|
"logps/real": -121.81756591796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.580312728881836, |
|
"rewards/margins": 18.086267471313477, |
|
"rewards/real": 0.5059542655944824, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.931009957325747e-07, |
|
"logits/generated": -2.8248562812805176, |
|
"logits/real": -2.6810784339904785, |
|
"logps/generated": -852.2976684570312, |
|
"logps/real": -110.82057189941406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.118757247924805, |
|
"rewards/margins": 17.73995590209961, |
|
"rewards/real": 0.6211975812911987, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.895448079658606e-07, |
|
"logits/generated": -2.8053200244903564, |
|
"logits/real": -2.70365309715271, |
|
"logps/generated": -900.5974731445312, |
|
"logps/real": -135.4402313232422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.847537994384766, |
|
"rewards/margins": 18.353679656982422, |
|
"rewards/real": 0.5061434507369995, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.859886201991465e-07, |
|
"logits/generated": -2.898444414138794, |
|
"logits/real": -2.7453322410583496, |
|
"logps/generated": -820.4622802734375, |
|
"logps/real": -140.00946044921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.400270462036133, |
|
"rewards/margins": 17.893169403076172, |
|
"rewards/real": 0.4929002821445465, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8243243243243243e-07, |
|
"logits/generated": -2.839688777923584, |
|
"logits/real": -2.6923739910125732, |
|
"logps/generated": -838.73291015625, |
|
"logps/real": -132.70223999023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.139698028564453, |
|
"rewards/margins": 18.552087783813477, |
|
"rewards/real": 0.41239088773727417, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7887624466571835e-07, |
|
"logits/generated": -2.833216667175293, |
|
"logits/real": -2.7041759490966797, |
|
"logps/generated": -861.8936767578125, |
|
"logps/real": -126.6530990600586, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.731300354003906, |
|
"rewards/margins": 19.197338104248047, |
|
"rewards/real": 0.4660395085811615, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7532005689900424e-07, |
|
"logits/generated": -2.8046717643737793, |
|
"logits/real": -2.69667387008667, |
|
"logps/generated": -875.9267578125, |
|
"logps/real": -128.2639617919922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.01252555847168, |
|
"rewards/margins": 18.45262908935547, |
|
"rewards/real": 0.44010037183761597, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.717638691322902e-07, |
|
"logits/generated": -2.83022141456604, |
|
"logits/real": -2.692930221557617, |
|
"logps/generated": -854.4952392578125, |
|
"logps/real": -121.74955749511719, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.961904525756836, |
|
"rewards/margins": 18.5115909576416, |
|
"rewards/real": 0.5496853590011597, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6820768136557609e-07, |
|
"logits/generated": -2.8350539207458496, |
|
"logits/real": -2.6970601081848145, |
|
"logps/generated": -863.2819213867188, |
|
"logps/real": -123.15059661865234, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.864200592041016, |
|
"rewards/margins": 19.36715316772461, |
|
"rewards/real": 0.5029550194740295, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.64651493598862e-07, |
|
"logits/generated": -2.8632559776306152, |
|
"logits/real": -2.677931308746338, |
|
"logps/generated": -879.8753662109375, |
|
"logps/real": -141.77952575683594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.380382537841797, |
|
"rewards/margins": 18.56157684326172, |
|
"rewards/real": 0.18119129538536072, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6109530583214793e-07, |
|
"logits/generated": -2.803745746612549, |
|
"logits/real": -2.6646134853363037, |
|
"logps/generated": -901.3361206054688, |
|
"logps/real": -126.1727523803711, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.827922821044922, |
|
"rewards/margins": 20.339210510253906, |
|
"rewards/real": 0.5112860202789307, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5753911806543385e-07, |
|
"logits/generated": -2.8503870964050293, |
|
"logits/real": -2.6645989418029785, |
|
"logps/generated": -888.7047119140625, |
|
"logps/real": -129.1335906982422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.367877960205078, |
|
"rewards/margins": 19.824716567993164, |
|
"rewards/real": 0.4568362236022949, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5398293029871974e-07, |
|
"logits/generated": -2.7480721473693848, |
|
"logits/real": -2.6507885456085205, |
|
"logps/generated": -934.6629028320312, |
|
"logps/real": -135.6553497314453, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.353116989135742, |
|
"rewards/margins": 20.787641525268555, |
|
"rewards/real": 0.4345230162143707, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.504267425320057e-07, |
|
"logits/generated": -2.797375202178955, |
|
"logits/real": -2.6887059211730957, |
|
"logps/generated": -881.1536254882812, |
|
"logps/real": -125.61927795410156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.214933395385742, |
|
"rewards/margins": 19.779462814331055, |
|
"rewards/real": 0.5645291209220886, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4687055476529158e-07, |
|
"logits/generated": -2.870006799697876, |
|
"logits/real": -2.654900550842285, |
|
"logps/generated": -834.4885864257812, |
|
"logps/real": -136.41250610351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.738508224487305, |
|
"rewards/margins": 19.11133575439453, |
|
"rewards/real": 0.37282687425613403, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4331436699857753e-07, |
|
"logits/generated": -2.8572959899902344, |
|
"logits/real": -2.7063486576080322, |
|
"logps/generated": -870.64404296875, |
|
"logps/real": -134.53292846679688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.98287582397461, |
|
"rewards/margins": 20.453279495239258, |
|
"rewards/real": 0.4704047739505768, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3975817923186345e-07, |
|
"logits/generated": -2.871398448944092, |
|
"logits/real": -2.707024335861206, |
|
"logps/generated": -850.9390869140625, |
|
"logps/real": -148.42337036132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.16036605834961, |
|
"rewards/margins": 20.592147827148438, |
|
"rewards/real": 0.43178051710128784, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3620199146514935e-07, |
|
"logits/generated": -2.807039976119995, |
|
"logits/real": -2.723776340484619, |
|
"logps/generated": -929.8095703125, |
|
"logps/real": -115.76216125488281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.871318817138672, |
|
"rewards/margins": 21.316526412963867, |
|
"rewards/real": 0.44520822167396545, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.326458036984353e-07, |
|
"logits/generated": -2.8286869525909424, |
|
"logits/real": -2.6350340843200684, |
|
"logps/generated": -862.5518798828125, |
|
"logps/real": -126.08731842041016, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.82559585571289, |
|
"rewards/margins": 20.303485870361328, |
|
"rewards/real": 0.4778921604156494, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.290896159317212e-07, |
|
"logits/generated": -2.8613972663879395, |
|
"logits/real": -2.638763904571533, |
|
"logps/generated": -921.1492919921875, |
|
"logps/real": -123.17964172363281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.54897689819336, |
|
"rewards/margins": 20.937849044799805, |
|
"rewards/real": 0.3888731598854065, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.255334281650071e-07, |
|
"logits/generated": -2.876451253890991, |
|
"logits/real": -2.6815617084503174, |
|
"logps/generated": -831.7360229492188, |
|
"logps/real": -139.11557006835938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.613943099975586, |
|
"rewards/margins": 19.776029586791992, |
|
"rewards/real": 0.16208769381046295, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2197724039829303e-07, |
|
"logits/generated": -2.8330130577087402, |
|
"logits/real": -2.566429615020752, |
|
"logps/generated": -911.4781494140625, |
|
"logps/real": -122.40309143066406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.14625358581543, |
|
"rewards/margins": 22.701326370239258, |
|
"rewards/real": 0.5550734400749207, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1842105263157894e-07, |
|
"logits/generated": -2.808868885040283, |
|
"logits/real": -2.6208791732788086, |
|
"logps/generated": -873.4892578125, |
|
"logps/real": -114.96858215332031, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.708131790161133, |
|
"rewards/margins": 21.098825454711914, |
|
"rewards/real": 0.3906935155391693, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1486486486486487e-07, |
|
"logits/generated": -2.8322298526763916, |
|
"logits/real": -2.6367688179016113, |
|
"logps/generated": -940.2215576171875, |
|
"logps/real": -132.8704071044922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.91935920715332, |
|
"rewards/margins": 23.39228630065918, |
|
"rewards/real": 0.4729260802268982, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1130867709815078e-07, |
|
"logits/generated": -2.8651223182678223, |
|
"logits/real": -2.6476199626922607, |
|
"logps/generated": -869.115234375, |
|
"logps/real": -129.05712890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.786222457885742, |
|
"rewards/margins": 21.287538528442383, |
|
"rewards/real": 0.5013141632080078, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.077524893314367e-07, |
|
"logits/generated": -2.7978148460388184, |
|
"logits/real": -2.5825142860412598, |
|
"logps/generated": -897.38330078125, |
|
"logps/real": -118.57264709472656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.1629638671875, |
|
"rewards/margins": 21.7834415435791, |
|
"rewards/real": 0.6204766631126404, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0419630156472262e-07, |
|
"logits/generated": -2.866764783859253, |
|
"logits/real": -2.685533046722412, |
|
"logps/generated": -919.8984375, |
|
"logps/real": -146.31027221679688, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.15250587463379, |
|
"rewards/margins": 21.48689079284668, |
|
"rewards/real": 0.3343891501426697, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0064011379800854e-07, |
|
"logits/generated": -2.8685061931610107, |
|
"logits/real": -2.649932384490967, |
|
"logps/generated": -902.7081909179688, |
|
"logps/real": -141.6209716796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.167659759521484, |
|
"rewards/margins": 22.551496505737305, |
|
"rewards/real": 0.38383588194847107, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.708392603129445e-08, |
|
"logits/generated": -2.8944077491760254, |
|
"logits/real": -2.6382641792297363, |
|
"logps/generated": -916.2838745117188, |
|
"logps/real": -130.5482177734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.300434112548828, |
|
"rewards/margins": 21.502178192138672, |
|
"rewards/real": 0.20173999667167664, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.352773826458037e-08, |
|
"logits/generated": -2.8912646770477295, |
|
"logits/real": -2.6097311973571777, |
|
"logps/generated": -894.2330322265625, |
|
"logps/real": -132.25819396972656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.95718765258789, |
|
"rewards/margins": 22.406177520751953, |
|
"rewards/real": 0.44899100065231323, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.997155049786629e-08, |
|
"logits/generated": -2.8901562690734863, |
|
"logits/real": -2.551877021789551, |
|
"logps/generated": -932.6094970703125, |
|
"logps/real": -119.0418930053711, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.05977439880371, |
|
"rewards/margins": 22.581966400146484, |
|
"rewards/real": 0.5221914052963257, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.64153627311522e-08, |
|
"logits/generated": -2.8296782970428467, |
|
"logits/real": -2.5993740558624268, |
|
"logps/generated": -896.4122314453125, |
|
"logps/real": -128.8374481201172, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.528562545776367, |
|
"rewards/margins": 22.051563262939453, |
|
"rewards/real": 0.5230005979537964, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.285917496443812e-08, |
|
"logits/generated": -2.860327959060669, |
|
"logits/real": -2.5901761054992676, |
|
"logps/generated": -898.0784912109375, |
|
"logps/real": -132.93478393554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.043405532836914, |
|
"rewards/margins": 22.58662223815918, |
|
"rewards/real": 0.543217658996582, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.930298719772404e-08, |
|
"logits/generated": -2.8273463249206543, |
|
"logits/real": -2.5543017387390137, |
|
"logps/generated": -961.3826904296875, |
|
"logps/real": -140.56031799316406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.900615692138672, |
|
"rewards/margins": 24.335386276245117, |
|
"rewards/real": 0.43477168679237366, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.574679943100994e-08, |
|
"logits/generated": -2.8639349937438965, |
|
"logits/real": -2.6260411739349365, |
|
"logps/generated": -885.5602416992188, |
|
"logps/real": -112.6806869506836, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.910247802734375, |
|
"rewards/margins": 21.491928100585938, |
|
"rewards/real": 0.5816811919212341, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.219061166429587e-08, |
|
"logits/generated": -2.8940956592559814, |
|
"logits/real": -2.5862889289855957, |
|
"logps/generated": -917.0275268554688, |
|
"logps/real": -125.59222412109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.038272857666016, |
|
"rewards/margins": 23.486886978149414, |
|
"rewards/real": 0.44861316680908203, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.863442389758179e-08, |
|
"logits/generated": -2.8932290077209473, |
|
"logits/real": -2.5974230766296387, |
|
"logps/generated": -886.7428588867188, |
|
"logps/real": -143.0746307373047, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.268463134765625, |
|
"rewards/margins": 22.662071228027344, |
|
"rewards/real": 0.3936085104942322, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.507823613086771e-08, |
|
"logits/generated": -2.922111749649048, |
|
"logits/real": -2.594691276550293, |
|
"logps/generated": -906.36767578125, |
|
"logps/real": -140.33889770507812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.82131004333496, |
|
"rewards/margins": 23.025859832763672, |
|
"rewards/real": 0.20455090701580048, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.152204836415363e-08, |
|
"logits/generated": -2.914600133895874, |
|
"logits/real": -2.6084065437316895, |
|
"logps/generated": -941.8387451171875, |
|
"logps/real": -133.2527313232422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.14767837524414, |
|
"rewards/margins": 23.540363311767578, |
|
"rewards/real": 0.39268168807029724, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.796586059743954e-08, |
|
"logits/generated": -2.8385584354400635, |
|
"logits/real": -2.6445257663726807, |
|
"logps/generated": -907.6339721679688, |
|
"logps/real": -132.3414764404297, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.239797592163086, |
|
"rewards/margins": 24.71152114868164, |
|
"rewards/real": 0.47172126173973083, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.4409672830725456e-08, |
|
"logits/generated": -2.8101682662963867, |
|
"logits/real": -2.6199960708618164, |
|
"logps/generated": -945.5979614257812, |
|
"logps/real": -132.10206604003906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.88088607788086, |
|
"rewards/margins": 25.367450714111328, |
|
"rewards/real": 0.4865630567073822, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.0853485064011376e-08, |
|
"logits/generated": -2.8484458923339844, |
|
"logits/real": -2.563117504119873, |
|
"logps/generated": -928.4225463867188, |
|
"logps/real": -119.01268005371094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.193946838378906, |
|
"rewards/margins": 23.61981201171875, |
|
"rewards/real": 0.4258663058280945, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logits/generated": -2.8349921703338623, |
|
"logits/real": -2.5548834800720215, |
|
"logps/generated": -979.7244873046875, |
|
"logps/real": -130.8174285888672, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.183971405029297, |
|
"rewards/margins": 25.632709503173828, |
|
"rewards/real": 0.44873887300491333, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.374110953058322e-08, |
|
"logits/generated": -2.837606906890869, |
|
"logits/real": -2.537325620651245, |
|
"logps/generated": -992.2394409179688, |
|
"logps/real": -128.49644470214844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.086801528930664, |
|
"rewards/margins": 25.410724639892578, |
|
"rewards/real": 0.32392334938049316, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.018492176386913e-08, |
|
"logits/generated": -2.91603422164917, |
|
"logits/real": -2.5341243743896484, |
|
"logps/generated": -898.00146484375, |
|
"logps/real": -129.14276123046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.930097579956055, |
|
"rewards/margins": 23.440217971801758, |
|
"rewards/real": 0.5101193189620972, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6628733997155046e-08, |
|
"logits/generated": -2.8616137504577637, |
|
"logits/real": -2.5504488945007324, |
|
"logps/generated": -912.74560546875, |
|
"logps/real": -108.04595947265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.54279899597168, |
|
"rewards/margins": 23.969791412353516, |
|
"rewards/real": 0.4269927442073822, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.3072546230440967e-08, |
|
"logits/generated": -2.85672926902771, |
|
"logits/real": -2.5588877201080322, |
|
"logps/generated": -940.3358154296875, |
|
"logps/real": -125.9631118774414, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.594558715820312, |
|
"rewards/margins": 24.99027442932129, |
|
"rewards/real": 0.39571598172187805, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9516358463726884e-08, |
|
"logits/generated": -2.89031720161438, |
|
"logits/real": -2.5603203773498535, |
|
"logps/generated": -972.2039184570312, |
|
"logps/real": -137.25588989257812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.38454818725586, |
|
"rewards/margins": 24.839740753173828, |
|
"rewards/real": 0.45519551634788513, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.59601706970128e-08, |
|
"logits/generated": -2.890516996383667, |
|
"logits/real": -2.5641016960144043, |
|
"logps/generated": -936.7041015625, |
|
"logps/real": -140.62559509277344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.00693702697754, |
|
"rewards/margins": 25.536272048950195, |
|
"rewards/real": 0.5293352603912354, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.240398293029872e-08, |
|
"logits/generated": -2.8512871265411377, |
|
"logits/real": -2.5838348865509033, |
|
"logps/generated": -910.3527221679688, |
|
"logps/real": -126.60355377197266, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.73545265197754, |
|
"rewards/margins": 24.106916427612305, |
|
"rewards/real": 0.3714631199836731, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/generated": -2.8901188373565674, |
|
"eval_logits/real": -2.609180450439453, |
|
"eval_logps/generated": -916.0912475585938, |
|
"eval_logps/real": -130.5047149658203, |
|
"eval_loss": 2.595016326267796e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -23.809871673583984, |
|
"eval_rewards/margins": 24.159982681274414, |
|
"eval_rewards/real": 0.35011160373687744, |
|
"eval_runtime": 65.5536, |
|
"eval_samples_per_second": 7.627, |
|
"eval_steps_per_second": 0.244, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.8847795163584636e-08, |
|
"logits/generated": -2.900836229324341, |
|
"logits/real": -2.5513949394226074, |
|
"logps/generated": -931.4461059570312, |
|
"logps/real": -129.80133056640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.57596206665039, |
|
"rewards/margins": 24.972017288208008, |
|
"rewards/real": 0.3960537910461426, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5291607396870554e-08, |
|
"logits/generated": -2.862175941467285, |
|
"logits/real": -2.5267205238342285, |
|
"logps/generated": -934.8350830078125, |
|
"logps/real": -126.36529541015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.785350799560547, |
|
"rewards/margins": 25.167552947998047, |
|
"rewards/real": 0.38220247626304626, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1735419630156473e-08, |
|
"logits/generated": -2.877037525177002, |
|
"logits/real": -2.5817883014678955, |
|
"logps/generated": -916.3255004882812, |
|
"logps/real": -128.97787475585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.590730667114258, |
|
"rewards/margins": 24.93622589111328, |
|
"rewards/real": 0.3454935848712921, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.179231863442388e-09, |
|
"logits/generated": -2.9152793884277344, |
|
"logits/real": -2.5522732734680176, |
|
"logps/generated": -968.6594848632812, |
|
"logps/real": -132.7301025390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.529855728149414, |
|
"rewards/margins": 25.114501953125, |
|
"rewards/real": 0.5846462249755859, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.623044096728307e-09, |
|
"logits/generated": -2.8607215881347656, |
|
"logits/real": -2.5737948417663574, |
|
"logps/generated": -897.6732177734375, |
|
"logps/real": -137.73817443847656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.409955978393555, |
|
"rewards/margins": 23.72184181213379, |
|
"rewards/real": 0.31188473105430603, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0668563300142248e-09, |
|
"logits/generated": -2.8556289672851562, |
|
"logits/real": -2.54154896736145, |
|
"logps/generated": -959.1483154296875, |
|
"logps/real": -127.1308822631836, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.255569458007812, |
|
"rewards/margins": 25.72182273864746, |
|
"rewards/real": 0.46625250577926636, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1563, |
|
"total_flos": 0.0, |
|
"train_loss": 0.009621814649877188, |
|
"train_runtime": 14787.3165, |
|
"train_samples_per_second": 3.381, |
|
"train_steps_per_second": 0.106 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1563, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|