|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998983843105376, |
|
"eval_steps": 500, |
|
"global_step": 1230, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000812925515699624, |
|
"grad_norm": 34.804351806640625, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 2.022, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001625851031399248, |
|
"grad_norm": 35.288848876953125, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 2.1055, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002438776547098872, |
|
"grad_norm": 37.58893585205078, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 2.0685, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003251702062798496, |
|
"grad_norm": 28.51118278503418, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 2.0364, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00406462757849812, |
|
"grad_norm": 24.874475479125977, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 1.9688, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004877553094197744, |
|
"grad_norm": 12.156012535095215, |
|
"learning_rate": 3.2432432432432437e-06, |
|
"loss": 1.8677, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005690478609897368, |
|
"grad_norm": 7.017012119293213, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 1.7774, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006503404125596992, |
|
"grad_norm": 11.024828910827637, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 1.8042, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007316329641296616, |
|
"grad_norm": 6.988280296325684, |
|
"learning_rate": 4.864864864864866e-06, |
|
"loss": 1.7973, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00812925515699624, |
|
"grad_norm": 8.541196823120117, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 1.7946, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008942180672695864, |
|
"grad_norm": 7.084593772888184, |
|
"learning_rate": 5.945945945945947e-06, |
|
"loss": 1.8178, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009755106188395488, |
|
"grad_norm": 5.755589962005615, |
|
"learning_rate": 6.486486486486487e-06, |
|
"loss": 1.7748, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010568031704095112, |
|
"grad_norm": 4.855886459350586, |
|
"learning_rate": 7.027027027027028e-06, |
|
"loss": 1.6665, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011380957219794737, |
|
"grad_norm": 5.280701160430908, |
|
"learning_rate": 7.567567567567569e-06, |
|
"loss": 1.7226, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01219388273549436, |
|
"grad_norm": 4.513389587402344, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 1.7219, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013006808251193984, |
|
"grad_norm": 4.92287015914917, |
|
"learning_rate": 8.64864864864865e-06, |
|
"loss": 1.697, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013819733766893608, |
|
"grad_norm": 4.488801002502441, |
|
"learning_rate": 9.189189189189191e-06, |
|
"loss": 1.6584, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014632659282593233, |
|
"grad_norm": 3.736851930618286, |
|
"learning_rate": 9.729729729729732e-06, |
|
"loss": 1.6752, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015445584798292857, |
|
"grad_norm": 3.7089431285858154, |
|
"learning_rate": 1.027027027027027e-05, |
|
"loss": 1.602, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01625851031399248, |
|
"grad_norm": 3.9821619987487793, |
|
"learning_rate": 1.0810810810810812e-05, |
|
"loss": 1.6492, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017071435829692106, |
|
"grad_norm": 3.72698974609375, |
|
"learning_rate": 1.1351351351351352e-05, |
|
"loss": 1.6893, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.017884361345391727, |
|
"grad_norm": 3.0124993324279785, |
|
"learning_rate": 1.1891891891891894e-05, |
|
"loss": 1.5879, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01869728686109135, |
|
"grad_norm": 9.361907005310059, |
|
"learning_rate": 1.2432432432432433e-05, |
|
"loss": 1.6172, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.019510212376790976, |
|
"grad_norm": 3.431147813796997, |
|
"learning_rate": 1.2972972972972975e-05, |
|
"loss": 1.6354, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0203231378924906, |
|
"grad_norm": 3.3041067123413086, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 1.5998, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021136063408190225, |
|
"grad_norm": 3.4122121334075928, |
|
"learning_rate": 1.4054054054054055e-05, |
|
"loss": 1.5737, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02194898892388985, |
|
"grad_norm": 3.538844585418701, |
|
"learning_rate": 1.4594594594594596e-05, |
|
"loss": 1.5737, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022761914439589474, |
|
"grad_norm": 3.245887041091919, |
|
"learning_rate": 1.5135135135135138e-05, |
|
"loss": 1.5893, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.023574839955289098, |
|
"grad_norm": 3.7787671089172363, |
|
"learning_rate": 1.5675675675675676e-05, |
|
"loss": 1.5923, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02438776547098872, |
|
"grad_norm": 3.557563066482544, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 1.5906, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025200690986688343, |
|
"grad_norm": 3.1536169052124023, |
|
"learning_rate": 1.6756756756756757e-05, |
|
"loss": 1.5976, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.026013616502387968, |
|
"grad_norm": 3.060678005218506, |
|
"learning_rate": 1.72972972972973e-05, |
|
"loss": 1.5239, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026826542018087592, |
|
"grad_norm": 3.0163331031799316, |
|
"learning_rate": 1.783783783783784e-05, |
|
"loss": 1.5703, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.027639467533787217, |
|
"grad_norm": 3.0648066997528076, |
|
"learning_rate": 1.8378378378378383e-05, |
|
"loss": 1.5421, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02845239304948684, |
|
"grad_norm": 2.8359413146972656, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 1.5698, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.029265318565186466, |
|
"grad_norm": 3.0256259441375732, |
|
"learning_rate": 1.9459459459459463e-05, |
|
"loss": 1.5258, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03007824408088609, |
|
"grad_norm": 3.305952548980713, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5979, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.030891169596585714, |
|
"grad_norm": 2.7958834171295166, |
|
"learning_rate": 1.99999653272242e-05, |
|
"loss": 1.5065, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.031704095112285335, |
|
"grad_norm": 3.515479564666748, |
|
"learning_rate": 1.9999861309137232e-05, |
|
"loss": 1.4837, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03251702062798496, |
|
"grad_norm": 2.7845990657806396, |
|
"learning_rate": 1.999968794646042e-05, |
|
"loss": 1.5634, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.033329946143684584, |
|
"grad_norm": 3.0540645122528076, |
|
"learning_rate": 1.9999445240395953e-05, |
|
"loss": 1.5001, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03414287165938421, |
|
"grad_norm": 3.059220790863037, |
|
"learning_rate": 1.9999133192626893e-05, |
|
"loss": 1.502, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03495579717508383, |
|
"grad_norm": 2.594452142715454, |
|
"learning_rate": 1.9998751805317152e-05, |
|
"loss": 1.5245, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.035768722690783454, |
|
"grad_norm": 3.0076844692230225, |
|
"learning_rate": 1.999830108111148e-05, |
|
"loss": 1.5032, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03658164820648308, |
|
"grad_norm": 2.9521396160125732, |
|
"learning_rate": 1.999778102313545e-05, |
|
"loss": 1.5381, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0373945737221827, |
|
"grad_norm": 3.280303478240967, |
|
"learning_rate": 1.999719163499543e-05, |
|
"loss": 1.5478, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03820749923788233, |
|
"grad_norm": 2.9089877605438232, |
|
"learning_rate": 1.999653292077857e-05, |
|
"loss": 1.4783, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03902042475358195, |
|
"grad_norm": 5.1869635581970215, |
|
"learning_rate": 1.999580488505276e-05, |
|
"loss": 1.5067, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03983335026928158, |
|
"grad_norm": 3.053921699523926, |
|
"learning_rate": 1.9995007532866594e-05, |
|
"loss": 1.503, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0406462757849812, |
|
"grad_norm": 2.952059507369995, |
|
"learning_rate": 1.9994140869749366e-05, |
|
"loss": 1.4579, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04145920130068083, |
|
"grad_norm": 2.609379291534424, |
|
"learning_rate": 1.9993204901710995e-05, |
|
"loss": 1.4679, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04227212681638045, |
|
"grad_norm": 3.41717267036438, |
|
"learning_rate": 1.9992199635241997e-05, |
|
"loss": 1.5197, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04308505233208007, |
|
"grad_norm": 2.8707101345062256, |
|
"learning_rate": 1.999112507731346e-05, |
|
"loss": 1.5074, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0438979778477797, |
|
"grad_norm": 3.325697660446167, |
|
"learning_rate": 1.9989981235376956e-05, |
|
"loss": 1.427, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04471090336347932, |
|
"grad_norm": 2.7196686267852783, |
|
"learning_rate": 1.9988768117364526e-05, |
|
"loss": 1.4868, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04552382887917895, |
|
"grad_norm": 2.9488351345062256, |
|
"learning_rate": 1.9987485731688595e-05, |
|
"loss": 1.5011, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04633675439487857, |
|
"grad_norm": 2.7776849269866943, |
|
"learning_rate": 1.998613408724195e-05, |
|
"loss": 1.4664, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.047149679910578196, |
|
"grad_norm": 2.719594717025757, |
|
"learning_rate": 1.998471319339763e-05, |
|
"loss": 1.4905, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04796260542627782, |
|
"grad_norm": 2.8028323650360107, |
|
"learning_rate": 1.9983223060008908e-05, |
|
"loss": 1.4754, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04877553094197744, |
|
"grad_norm": 3.1789817810058594, |
|
"learning_rate": 1.9981663697409203e-05, |
|
"loss": 1.4618, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.049588456457677066, |
|
"grad_norm": 3.077449321746826, |
|
"learning_rate": 1.998003511641199e-05, |
|
"loss": 1.453, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05040138197337669, |
|
"grad_norm": 2.960418939590454, |
|
"learning_rate": 1.997833732831076e-05, |
|
"loss": 1.4564, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.051214307489076315, |
|
"grad_norm": 5.316094875335693, |
|
"learning_rate": 1.9976570344878916e-05, |
|
"loss": 1.4711, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.052027233004775936, |
|
"grad_norm": 3.257415771484375, |
|
"learning_rate": 1.9974734178369702e-05, |
|
"loss": 1.4606, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.052840158520475564, |
|
"grad_norm": 3.0437912940979004, |
|
"learning_rate": 1.997282884151612e-05, |
|
"loss": 1.5075, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.053653084036175185, |
|
"grad_norm": 3.3059332370758057, |
|
"learning_rate": 1.9970854347530828e-05, |
|
"loss": 1.484, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05446600955187481, |
|
"grad_norm": 4.510897636413574, |
|
"learning_rate": 1.9968810710106065e-05, |
|
"loss": 1.5091, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05527893506757443, |
|
"grad_norm": 3.3621528148651123, |
|
"learning_rate": 1.9966697943413548e-05, |
|
"loss": 1.4603, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.056091860583274054, |
|
"grad_norm": 2.878563642501831, |
|
"learning_rate": 1.9964516062104377e-05, |
|
"loss": 1.4438, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05690478609897368, |
|
"grad_norm": 2.8587141036987305, |
|
"learning_rate": 1.996226508130892e-05, |
|
"loss": 1.441, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0577177116146733, |
|
"grad_norm": 3.2675728797912598, |
|
"learning_rate": 1.995994501663674e-05, |
|
"loss": 1.4515, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05853063713037293, |
|
"grad_norm": 3.018068790435791, |
|
"learning_rate": 1.995755588417644e-05, |
|
"loss": 1.4499, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05934356264607255, |
|
"grad_norm": 3.715628147125244, |
|
"learning_rate": 1.99550977004956e-05, |
|
"loss": 1.4624, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06015648816177218, |
|
"grad_norm": 2.7632699012756348, |
|
"learning_rate": 1.9952570482640628e-05, |
|
"loss": 1.4437, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0609694136774718, |
|
"grad_norm": 3.3581650257110596, |
|
"learning_rate": 1.9949974248136655e-05, |
|
"loss": 1.4865, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06178233919317143, |
|
"grad_norm": 3.090432643890381, |
|
"learning_rate": 1.9947309014987414e-05, |
|
"loss": 1.4416, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06259526470887104, |
|
"grad_norm": 3.3709418773651123, |
|
"learning_rate": 1.9944574801675106e-05, |
|
"loss": 1.4184, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06340819022457067, |
|
"grad_norm": 3.6959853172302246, |
|
"learning_rate": 1.9941771627160287e-05, |
|
"loss": 1.4694, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0642211157402703, |
|
"grad_norm": 3.2907724380493164, |
|
"learning_rate": 1.9938899510881732e-05, |
|
"loss": 1.4121, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06503404125596993, |
|
"grad_norm": 2.7885124683380127, |
|
"learning_rate": 1.9935958472756283e-05, |
|
"loss": 1.4033, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06584696677166954, |
|
"grad_norm": 2.8771262168884277, |
|
"learning_rate": 1.993294853317873e-05, |
|
"loss": 1.4466, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06665989228736917, |
|
"grad_norm": 3.572303056716919, |
|
"learning_rate": 1.9929869713021668e-05, |
|
"loss": 1.3854, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0674728178030688, |
|
"grad_norm": 2.636934757232666, |
|
"learning_rate": 1.9926722033635343e-05, |
|
"loss": 1.4186, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06828574331876842, |
|
"grad_norm": 3.1140427589416504, |
|
"learning_rate": 1.9923505516847514e-05, |
|
"loss": 1.424, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06909866883446804, |
|
"grad_norm": 2.808480739593506, |
|
"learning_rate": 1.9920220184963296e-05, |
|
"loss": 1.4744, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06991159435016767, |
|
"grad_norm": 3.118234872817993, |
|
"learning_rate": 1.9916866060764994e-05, |
|
"loss": 1.4277, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0707245198658673, |
|
"grad_norm": 4.0702033042907715, |
|
"learning_rate": 1.991344316751198e-05, |
|
"loss": 1.4236, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07153744538156691, |
|
"grad_norm": 2.938345193862915, |
|
"learning_rate": 1.9909951528940485e-05, |
|
"loss": 1.4119, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07235037089726654, |
|
"grad_norm": 2.960853338241577, |
|
"learning_rate": 1.990639116926348e-05, |
|
"loss": 1.471, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07316329641296616, |
|
"grad_norm": 3.146742343902588, |
|
"learning_rate": 1.9902762113170467e-05, |
|
"loss": 1.4751, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07397622192866579, |
|
"grad_norm": 3.3954169750213623, |
|
"learning_rate": 1.989906438582734e-05, |
|
"loss": 1.467, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0747891474443654, |
|
"grad_norm": 2.9790520668029785, |
|
"learning_rate": 1.9895298012876192e-05, |
|
"loss": 1.507, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07560207296006503, |
|
"grad_norm": 2.577925682067871, |
|
"learning_rate": 1.9891463020435144e-05, |
|
"loss": 1.4728, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07641499847576466, |
|
"grad_norm": 3.437133550643921, |
|
"learning_rate": 1.9887559435098162e-05, |
|
"loss": 1.4472, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07722792399146428, |
|
"grad_norm": 2.806886911392212, |
|
"learning_rate": 1.9883587283934875e-05, |
|
"loss": 1.4497, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0780408495071639, |
|
"grad_norm": 2.703793525695801, |
|
"learning_rate": 1.9879546594490383e-05, |
|
"loss": 1.4643, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07885377502286353, |
|
"grad_norm": 3.2830615043640137, |
|
"learning_rate": 1.987543739478507e-05, |
|
"loss": 1.4162, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07966670053856316, |
|
"grad_norm": 2.5376830101013184, |
|
"learning_rate": 1.987125971331441e-05, |
|
"loss": 1.494, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08047962605426277, |
|
"grad_norm": 2.532893180847168, |
|
"learning_rate": 1.9867013579048765e-05, |
|
"loss": 1.4575, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0812925515699624, |
|
"grad_norm": 2.838155508041382, |
|
"learning_rate": 1.9862699021433186e-05, |
|
"loss": 1.4007, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08210547708566203, |
|
"grad_norm": 2.5777368545532227, |
|
"learning_rate": 1.9858316070387208e-05, |
|
"loss": 1.4213, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08291840260136166, |
|
"grad_norm": 2.634209394454956, |
|
"learning_rate": 1.9853864756304654e-05, |
|
"loss": 1.4544, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08373132811706127, |
|
"grad_norm": 2.9893202781677246, |
|
"learning_rate": 1.9849345110053405e-05, |
|
"loss": 1.4361, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0845442536327609, |
|
"grad_norm": 2.668808698654175, |
|
"learning_rate": 1.984475716297519e-05, |
|
"loss": 1.4267, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08535717914846053, |
|
"grad_norm": 3.2199463844299316, |
|
"learning_rate": 1.984010094688539e-05, |
|
"loss": 1.4731, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08617010466416014, |
|
"grad_norm": 2.746006965637207, |
|
"learning_rate": 1.9835376494072788e-05, |
|
"loss": 1.385, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08698303017985977, |
|
"grad_norm": 2.955232620239258, |
|
"learning_rate": 1.9830583837299363e-05, |
|
"loss": 1.3984, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0877959556955594, |
|
"grad_norm": 5.357511520385742, |
|
"learning_rate": 1.9825723009800058e-05, |
|
"loss": 1.4562, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08860888121125902, |
|
"grad_norm": 2.5583655834198, |
|
"learning_rate": 1.9820794045282553e-05, |
|
"loss": 1.4222, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08942180672695864, |
|
"grad_norm": 2.6951992511749268, |
|
"learning_rate": 1.9815796977927015e-05, |
|
"loss": 1.4697, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09023473224265827, |
|
"grad_norm": 2.714019775390625, |
|
"learning_rate": 1.9810731842385892e-05, |
|
"loss": 1.4696, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0910476577583579, |
|
"grad_norm": 3.2998311519622803, |
|
"learning_rate": 1.9805598673783644e-05, |
|
"loss": 1.4034, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09186058327405751, |
|
"grad_norm": 7.7324652671813965, |
|
"learning_rate": 1.980039750771651e-05, |
|
"loss": 1.4697, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09267350878975714, |
|
"grad_norm": 2.6200242042541504, |
|
"learning_rate": 1.9795128380252263e-05, |
|
"loss": 1.451, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09348643430545676, |
|
"grad_norm": 2.937061071395874, |
|
"learning_rate": 1.978979132792996e-05, |
|
"loss": 1.4348, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09429935982115639, |
|
"grad_norm": 3.087085247039795, |
|
"learning_rate": 1.9784386387759684e-05, |
|
"loss": 1.4271, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.095112285336856, |
|
"grad_norm": 2.6796271800994873, |
|
"learning_rate": 1.977891359722229e-05, |
|
"loss": 1.4933, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09592521085255563, |
|
"grad_norm": 3.4506633281707764, |
|
"learning_rate": 1.9773372994269147e-05, |
|
"loss": 1.427, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09673813636825526, |
|
"grad_norm": 2.532562732696533, |
|
"learning_rate": 1.976776461732187e-05, |
|
"loss": 1.436, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09755106188395488, |
|
"grad_norm": 3.619605541229248, |
|
"learning_rate": 1.976208850527206e-05, |
|
"loss": 1.4384, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0983639873996545, |
|
"grad_norm": 3.5245602130889893, |
|
"learning_rate": 1.9756344697481027e-05, |
|
"loss": 1.4303, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09917691291535413, |
|
"grad_norm": 2.649686336517334, |
|
"learning_rate": 1.975053323377952e-05, |
|
"loss": 1.4692, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09998983843105376, |
|
"grad_norm": 2.516016721725464, |
|
"learning_rate": 1.9744654154467468e-05, |
|
"loss": 1.4154, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10080276394675337, |
|
"grad_norm": 2.591168165206909, |
|
"learning_rate": 1.9738707500313655e-05, |
|
"loss": 1.403, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.101615689462453, |
|
"grad_norm": 2.522486686706543, |
|
"learning_rate": 1.9732693312555492e-05, |
|
"loss": 1.4575, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10242861497815263, |
|
"grad_norm": 2.8282413482666016, |
|
"learning_rate": 1.9726611632898693e-05, |
|
"loss": 1.377, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10324154049385226, |
|
"grad_norm": 2.598076820373535, |
|
"learning_rate": 1.9720462503517e-05, |
|
"loss": 1.4382, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10405446600955187, |
|
"grad_norm": 2.688178777694702, |
|
"learning_rate": 1.971424596705189e-05, |
|
"loss": 1.4132, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1048673915252515, |
|
"grad_norm": 2.7512471675872803, |
|
"learning_rate": 1.9707962066612278e-05, |
|
"loss": 1.4193, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10568031704095113, |
|
"grad_norm": 2.8025805950164795, |
|
"learning_rate": 1.970161084577422e-05, |
|
"loss": 1.3829, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10649324255665074, |
|
"grad_norm": 2.6514623165130615, |
|
"learning_rate": 1.9695192348580606e-05, |
|
"loss": 1.4362, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10730616807235037, |
|
"grad_norm": 2.4559547901153564, |
|
"learning_rate": 1.9688706619540863e-05, |
|
"loss": 1.4357, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10811909358805, |
|
"grad_norm": 2.8258724212646484, |
|
"learning_rate": 1.968215370363063e-05, |
|
"loss": 1.4501, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10893201910374962, |
|
"grad_norm": 2.8553593158721924, |
|
"learning_rate": 1.9675533646291463e-05, |
|
"loss": 1.4841, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10974494461944924, |
|
"grad_norm": 3.625079870223999, |
|
"learning_rate": 1.9668846493430522e-05, |
|
"loss": 1.47, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11055787013514887, |
|
"grad_norm": 2.9793193340301514, |
|
"learning_rate": 1.9662092291420233e-05, |
|
"loss": 1.3969, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1113707956508485, |
|
"grad_norm": 2.5699939727783203, |
|
"learning_rate": 1.965527108709798e-05, |
|
"loss": 1.4258, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11218372116654811, |
|
"grad_norm": 2.7961106300354004, |
|
"learning_rate": 1.964838292776579e-05, |
|
"loss": 1.4637, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11299664668224774, |
|
"grad_norm": 3.3331451416015625, |
|
"learning_rate": 1.9641427861189973e-05, |
|
"loss": 1.3976, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11380957219794736, |
|
"grad_norm": 2.5645205974578857, |
|
"learning_rate": 1.963440593560083e-05, |
|
"loss": 1.409, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11462249771364699, |
|
"grad_norm": 2.5996487140655518, |
|
"learning_rate": 1.9627317199692287e-05, |
|
"loss": 1.4834, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1154354232293466, |
|
"grad_norm": 2.9811034202575684, |
|
"learning_rate": 1.962016170262157e-05, |
|
"loss": 1.4508, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11624834874504623, |
|
"grad_norm": 2.4133377075195312, |
|
"learning_rate": 1.961293949400888e-05, |
|
"loss": 1.4077, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11706127426074586, |
|
"grad_norm": 2.622091770172119, |
|
"learning_rate": 1.960565062393701e-05, |
|
"loss": 1.4046, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11787419977644548, |
|
"grad_norm": 2.857346534729004, |
|
"learning_rate": 1.9598295142951035e-05, |
|
"loss": 1.4217, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1186871252921451, |
|
"grad_norm": 2.600935220718384, |
|
"learning_rate": 1.9590873102057948e-05, |
|
"loss": 1.403, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11950005080784473, |
|
"grad_norm": 2.820359945297241, |
|
"learning_rate": 1.9583384552726294e-05, |
|
"loss": 1.4358, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12031297632354436, |
|
"grad_norm": 2.6272051334381104, |
|
"learning_rate": 1.957582954688584e-05, |
|
"loss": 1.4505, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12112590183924397, |
|
"grad_norm": 2.8003182411193848, |
|
"learning_rate": 1.9568208136927177e-05, |
|
"loss": 1.3977, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.1219388273549436, |
|
"grad_norm": 3.560518264770508, |
|
"learning_rate": 1.9560520375701408e-05, |
|
"loss": 1.3992, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12275175287064323, |
|
"grad_norm": 2.6377906799316406, |
|
"learning_rate": 1.9552766316519726e-05, |
|
"loss": 1.4022, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.12356467838634286, |
|
"grad_norm": 2.7982730865478516, |
|
"learning_rate": 1.9544946013153093e-05, |
|
"loss": 1.409, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.12437760390204247, |
|
"grad_norm": 2.7179160118103027, |
|
"learning_rate": 1.9537059519831822e-05, |
|
"loss": 1.415, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12519052941774209, |
|
"grad_norm": 2.959397554397583, |
|
"learning_rate": 1.9529106891245244e-05, |
|
"loss": 1.4296, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.12600345493344173, |
|
"grad_norm": 3.3228979110717773, |
|
"learning_rate": 1.9521088182541298e-05, |
|
"loss": 1.4282, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12681638044914134, |
|
"grad_norm": 2.763151168823242, |
|
"learning_rate": 1.951300344932616e-05, |
|
"loss": 1.3686, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12762930596484098, |
|
"grad_norm": 2.863190174102783, |
|
"learning_rate": 1.9504852747663862e-05, |
|
"loss": 1.4227, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1284422314805406, |
|
"grad_norm": 2.889604330062866, |
|
"learning_rate": 1.9496636134075894e-05, |
|
"loss": 1.4658, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1292551569962402, |
|
"grad_norm": 3.024122476577759, |
|
"learning_rate": 1.9488353665540813e-05, |
|
"loss": 1.4081, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.13006808251193985, |
|
"grad_norm": 2.4810218811035156, |
|
"learning_rate": 1.9480005399493857e-05, |
|
"loss": 1.4296, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13088100802763947, |
|
"grad_norm": 2.643673896789551, |
|
"learning_rate": 1.9471591393826536e-05, |
|
"loss": 1.3652, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.13169393354333908, |
|
"grad_norm": 2.888829231262207, |
|
"learning_rate": 1.9463111706886234e-05, |
|
"loss": 1.4003, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13250685905903872, |
|
"grad_norm": 2.7480149269104004, |
|
"learning_rate": 1.9454566397475813e-05, |
|
"loss": 1.4195, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.13331978457473834, |
|
"grad_norm": 2.68972110748291, |
|
"learning_rate": 1.944595552485319e-05, |
|
"loss": 1.3848, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.13413271009043795, |
|
"grad_norm": 2.8888440132141113, |
|
"learning_rate": 1.943727914873094e-05, |
|
"loss": 1.481, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1349456356061376, |
|
"grad_norm": 2.8409390449523926, |
|
"learning_rate": 1.9428537329275862e-05, |
|
"loss": 1.4176, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1357585611218372, |
|
"grad_norm": 2.4992098808288574, |
|
"learning_rate": 1.941973012710859e-05, |
|
"loss": 1.395, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.13657148663753685, |
|
"grad_norm": 4.587447166442871, |
|
"learning_rate": 1.941085760330316e-05, |
|
"loss": 1.3905, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13738441215323646, |
|
"grad_norm": 2.4778833389282227, |
|
"learning_rate": 1.940191981938657e-05, |
|
"loss": 1.3707, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.13819733766893608, |
|
"grad_norm": 2.7843387126922607, |
|
"learning_rate": 1.9392916837338376e-05, |
|
"loss": 1.3698, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13901026318463572, |
|
"grad_norm": 2.731437921524048, |
|
"learning_rate": 1.9383848719590257e-05, |
|
"loss": 1.4358, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.13982318870033533, |
|
"grad_norm": 3.079371213912964, |
|
"learning_rate": 1.9374715529025575e-05, |
|
"loss": 1.4027, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.14063611421603495, |
|
"grad_norm": 3.6557998657226562, |
|
"learning_rate": 1.9365517328978943e-05, |
|
"loss": 1.428, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1414490397317346, |
|
"grad_norm": 2.9291248321533203, |
|
"learning_rate": 1.9356254183235785e-05, |
|
"loss": 1.4039, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1422619652474342, |
|
"grad_norm": 2.498507499694824, |
|
"learning_rate": 1.93469261560319e-05, |
|
"loss": 1.3731, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14307489076313382, |
|
"grad_norm": 3.6117923259735107, |
|
"learning_rate": 1.9337533312053002e-05, |
|
"loss": 1.4263, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14388781627883346, |
|
"grad_norm": 2.490755319595337, |
|
"learning_rate": 1.9328075716434287e-05, |
|
"loss": 1.4215, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.14470074179453307, |
|
"grad_norm": 2.9008986949920654, |
|
"learning_rate": 1.931855343475998e-05, |
|
"loss": 1.3968, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14551366731023269, |
|
"grad_norm": 5.284730911254883, |
|
"learning_rate": 1.930896653306286e-05, |
|
"loss": 1.418, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.14632659282593233, |
|
"grad_norm": 2.826756000518799, |
|
"learning_rate": 1.929931507782383e-05, |
|
"loss": 1.3996, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14713951834163194, |
|
"grad_norm": 2.8084652423858643, |
|
"learning_rate": 1.9289599135971437e-05, |
|
"loss": 1.374, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.14795244385733158, |
|
"grad_norm": 2.736046075820923, |
|
"learning_rate": 1.9279818774881418e-05, |
|
"loss": 1.3687, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1487653693730312, |
|
"grad_norm": 2.7098567485809326, |
|
"learning_rate": 1.9269974062376224e-05, |
|
"loss": 1.4059, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1495782948887308, |
|
"grad_norm": 2.7764878273010254, |
|
"learning_rate": 1.926006506672456e-05, |
|
"loss": 1.42, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.15039122040443045, |
|
"grad_norm": 2.7715649604797363, |
|
"learning_rate": 1.9250091856640895e-05, |
|
"loss": 1.4549, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.15120414592013007, |
|
"grad_norm": 2.4104158878326416, |
|
"learning_rate": 1.9240054501285015e-05, |
|
"loss": 1.4129, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.15201707143582968, |
|
"grad_norm": 2.75614595413208, |
|
"learning_rate": 1.922995307026151e-05, |
|
"loss": 1.3959, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.15282999695152932, |
|
"grad_norm": 2.813262939453125, |
|
"learning_rate": 1.921978763361931e-05, |
|
"loss": 1.4139, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.15364292246722894, |
|
"grad_norm": 2.5106594562530518, |
|
"learning_rate": 1.9209558261851194e-05, |
|
"loss": 1.3683, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.15445584798292855, |
|
"grad_norm": 2.9257330894470215, |
|
"learning_rate": 1.919926502589331e-05, |
|
"loss": 1.3387, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1552687734986282, |
|
"grad_norm": 2.5029993057250977, |
|
"learning_rate": 1.9188907997124666e-05, |
|
"loss": 1.3892, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1560816990143278, |
|
"grad_norm": 2.6917388439178467, |
|
"learning_rate": 1.9178487247366652e-05, |
|
"loss": 1.3946, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15689462453002745, |
|
"grad_norm": 2.7038626670837402, |
|
"learning_rate": 1.916800284888253e-05, |
|
"loss": 1.4082, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.15770755004572706, |
|
"grad_norm": 2.586545467376709, |
|
"learning_rate": 1.915745487437694e-05, |
|
"loss": 1.3431, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.15852047556142668, |
|
"grad_norm": 3.043938159942627, |
|
"learning_rate": 1.9146843396995396e-05, |
|
"loss": 1.3967, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15933340107712632, |
|
"grad_norm": 2.80709171295166, |
|
"learning_rate": 1.9136168490323772e-05, |
|
"loss": 1.3617, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.16014632659282593, |
|
"grad_norm": 5.03334903717041, |
|
"learning_rate": 1.9125430228387794e-05, |
|
"loss": 1.4326, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.16095925210852555, |
|
"grad_norm": 4.717489719390869, |
|
"learning_rate": 1.9114628685652535e-05, |
|
"loss": 1.3459, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1617721776242252, |
|
"grad_norm": 3.0668435096740723, |
|
"learning_rate": 1.9103763937021887e-05, |
|
"loss": 1.3763, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1625851031399248, |
|
"grad_norm": 2.712122678756714, |
|
"learning_rate": 1.909283605783805e-05, |
|
"loss": 1.3319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16339802865562442, |
|
"grad_norm": 2.7631924152374268, |
|
"learning_rate": 1.9081845123881002e-05, |
|
"loss": 1.3641, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.16421095417132406, |
|
"grad_norm": 3.499955654144287, |
|
"learning_rate": 1.9070791211367984e-05, |
|
"loss": 1.3259, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.16502387968702367, |
|
"grad_norm": 2.913755416870117, |
|
"learning_rate": 1.9059674396952963e-05, |
|
"loss": 1.3386, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.16583680520272331, |
|
"grad_norm": 2.5671772956848145, |
|
"learning_rate": 1.90484947577261e-05, |
|
"loss": 1.3301, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16664973071842293, |
|
"grad_norm": 3.3566508293151855, |
|
"learning_rate": 1.903725237121322e-05, |
|
"loss": 1.3375, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16746265623412254, |
|
"grad_norm": 2.3617210388183594, |
|
"learning_rate": 1.902594731537527e-05, |
|
"loss": 1.4476, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.16827558174982218, |
|
"grad_norm": 2.8202669620513916, |
|
"learning_rate": 1.901457966860779e-05, |
|
"loss": 1.334, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1690885072655218, |
|
"grad_norm": 2.5990843772888184, |
|
"learning_rate": 1.9003149509740347e-05, |
|
"loss": 1.4321, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1699014327812214, |
|
"grad_norm": 5.9826507568359375, |
|
"learning_rate": 1.899165691803601e-05, |
|
"loss": 1.4338, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.17071435829692105, |
|
"grad_norm": 3.9570019245147705, |
|
"learning_rate": 1.8980101973190787e-05, |
|
"loss": 1.3265, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17152728381262067, |
|
"grad_norm": 2.8985307216644287, |
|
"learning_rate": 1.896848475533309e-05, |
|
"loss": 1.3297, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.17234020932832028, |
|
"grad_norm": 3.2575559616088867, |
|
"learning_rate": 1.8956805345023145e-05, |
|
"loss": 1.4086, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.17315313484401992, |
|
"grad_norm": 3.264796733856201, |
|
"learning_rate": 1.894506382325248e-05, |
|
"loss": 1.391, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.17396606035971954, |
|
"grad_norm": 2.767975330352783, |
|
"learning_rate": 1.8933260271443313e-05, |
|
"loss": 1.3731, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.17477898587541915, |
|
"grad_norm": 2.3556087017059326, |
|
"learning_rate": 1.8921394771448032e-05, |
|
"loss": 1.3288, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1755919113911188, |
|
"grad_norm": 4.253211975097656, |
|
"learning_rate": 1.89094674055486e-05, |
|
"loss": 1.3776, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1764048369068184, |
|
"grad_norm": 3.0681605339050293, |
|
"learning_rate": 1.889747825645599e-05, |
|
"loss": 1.4169, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.17721776242251805, |
|
"grad_norm": 2.3741588592529297, |
|
"learning_rate": 1.8885427407309627e-05, |
|
"loss": 1.3392, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.17803068793821766, |
|
"grad_norm": 2.968780279159546, |
|
"learning_rate": 1.887331494167678e-05, |
|
"loss": 1.4019, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.17884361345391728, |
|
"grad_norm": 2.3684914112091064, |
|
"learning_rate": 1.8861140943552014e-05, |
|
"loss": 1.3599, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17965653896961692, |
|
"grad_norm": 3.0405993461608887, |
|
"learning_rate": 1.884890549735659e-05, |
|
"loss": 1.4245, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.18046946448531653, |
|
"grad_norm": 3.397047281265259, |
|
"learning_rate": 1.8836608687937883e-05, |
|
"loss": 1.392, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.18128239000101615, |
|
"grad_norm": 2.4693644046783447, |
|
"learning_rate": 1.8824250600568798e-05, |
|
"loss": 1.3726, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1820953155167158, |
|
"grad_norm": 6.75039005279541, |
|
"learning_rate": 1.8811831320947177e-05, |
|
"loss": 1.3473, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1829082410324154, |
|
"grad_norm": 2.922574758529663, |
|
"learning_rate": 1.879935093519519e-05, |
|
"loss": 1.4221, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.18372116654811502, |
|
"grad_norm": 5.6719136238098145, |
|
"learning_rate": 1.878680952985877e-05, |
|
"loss": 1.3844, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.18453409206381466, |
|
"grad_norm": 2.6967201232910156, |
|
"learning_rate": 1.8774207191906976e-05, |
|
"loss": 1.344, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.18534701757951427, |
|
"grad_norm": 3.049881935119629, |
|
"learning_rate": 1.8761544008731426e-05, |
|
"loss": 1.3912, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.18615994309521391, |
|
"grad_norm": 3.1408843994140625, |
|
"learning_rate": 1.874882006814565e-05, |
|
"loss": 1.4048, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.18697286861091353, |
|
"grad_norm": 2.6653666496276855, |
|
"learning_rate": 1.8736035458384528e-05, |
|
"loss": 1.3844, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18778579412661314, |
|
"grad_norm": 2.6866488456726074, |
|
"learning_rate": 1.8723190268103634e-05, |
|
"loss": 1.3586, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.18859871964231278, |
|
"grad_norm": 3.2653231620788574, |
|
"learning_rate": 1.8710284586378645e-05, |
|
"loss": 1.3856, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.1894116451580124, |
|
"grad_norm": 2.841388463973999, |
|
"learning_rate": 1.8697318502704734e-05, |
|
"loss": 1.3868, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.190224570673712, |
|
"grad_norm": 2.797558307647705, |
|
"learning_rate": 1.8684292106995916e-05, |
|
"loss": 1.3885, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.19103749618941165, |
|
"grad_norm": 2.915003776550293, |
|
"learning_rate": 1.8671205489584453e-05, |
|
"loss": 1.3434, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.19185042170511127, |
|
"grad_norm": 3.2142281532287598, |
|
"learning_rate": 1.865805874122021e-05, |
|
"loss": 1.3975, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.19266334722081088, |
|
"grad_norm": 3.0831453800201416, |
|
"learning_rate": 1.8644851953070045e-05, |
|
"loss": 1.367, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.19347627273651052, |
|
"grad_norm": 3.2555181980133057, |
|
"learning_rate": 1.863158521671716e-05, |
|
"loss": 1.33, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.19428919825221014, |
|
"grad_norm": 2.8768310546875, |
|
"learning_rate": 1.8618258624160465e-05, |
|
"loss": 1.3867, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.19510212376790975, |
|
"grad_norm": 2.9737942218780518, |
|
"learning_rate": 1.8604872267813954e-05, |
|
"loss": 1.3726, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1959150492836094, |
|
"grad_norm": 2.5942904949188232, |
|
"learning_rate": 1.859142624050605e-05, |
|
"loss": 1.3704, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.196727974799309, |
|
"grad_norm": 2.6901443004608154, |
|
"learning_rate": 1.8577920635478976e-05, |
|
"loss": 1.3523, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.19754090031500865, |
|
"grad_norm": 2.4508392810821533, |
|
"learning_rate": 1.8564355546388094e-05, |
|
"loss": 1.3758, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.19835382583070826, |
|
"grad_norm": 2.3041279315948486, |
|
"learning_rate": 1.855073106730126e-05, |
|
"loss": 1.3491, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.19916675134640788, |
|
"grad_norm": 2.8388736248016357, |
|
"learning_rate": 1.8537047292698175e-05, |
|
"loss": 1.3578, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.19997967686210752, |
|
"grad_norm": 3.058314085006714, |
|
"learning_rate": 1.852330431746973e-05, |
|
"loss": 1.3547, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.20079260237780713, |
|
"grad_norm": 2.881788492202759, |
|
"learning_rate": 1.8509502236917353e-05, |
|
"loss": 1.3823, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.20160552789350675, |
|
"grad_norm": 2.623408794403076, |
|
"learning_rate": 1.8495641146752322e-05, |
|
"loss": 1.4516, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.2024184534092064, |
|
"grad_norm": 2.662614345550537, |
|
"learning_rate": 1.848172114309513e-05, |
|
"loss": 1.3924, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.203231378924906, |
|
"grad_norm": 2.520263671875, |
|
"learning_rate": 1.8467742322474822e-05, |
|
"loss": 1.4097, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.20404430444060562, |
|
"grad_norm": 4.465703964233398, |
|
"learning_rate": 1.845370478182829e-05, |
|
"loss": 1.3645, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.20485722995630526, |
|
"grad_norm": 2.5109176635742188, |
|
"learning_rate": 1.8439608618499637e-05, |
|
"loss": 1.3238, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20567015547200487, |
|
"grad_norm": 2.703659772872925, |
|
"learning_rate": 1.842545393023949e-05, |
|
"loss": 1.4027, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.20648308098770451, |
|
"grad_norm": 3.483933448791504, |
|
"learning_rate": 1.841124081520431e-05, |
|
"loss": 1.4167, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.20729600650340413, |
|
"grad_norm": 2.7172889709472656, |
|
"learning_rate": 1.8396969371955724e-05, |
|
"loss": 1.3017, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.20810893201910374, |
|
"grad_norm": 2.512045383453369, |
|
"learning_rate": 1.838263969945985e-05, |
|
"loss": 1.4112, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.20892185753480338, |
|
"grad_norm": 2.4449141025543213, |
|
"learning_rate": 1.836825189708659e-05, |
|
"loss": 1.3396, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.209734783050503, |
|
"grad_norm": 2.9280951023101807, |
|
"learning_rate": 1.8353806064608953e-05, |
|
"loss": 1.3461, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.2105477085662026, |
|
"grad_norm": 3.962769031524658, |
|
"learning_rate": 1.833930230220236e-05, |
|
"loss": 1.3347, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.21136063408190225, |
|
"grad_norm": 3.3168771266937256, |
|
"learning_rate": 1.8324740710443955e-05, |
|
"loss": 1.3264, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21217355959760187, |
|
"grad_norm": 2.754786252975464, |
|
"learning_rate": 1.831012139031189e-05, |
|
"loss": 1.3859, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.21298648511330148, |
|
"grad_norm": 2.5179426670074463, |
|
"learning_rate": 1.829544444318466e-05, |
|
"loss": 1.3653, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.21379941062900112, |
|
"grad_norm": 2.9228906631469727, |
|
"learning_rate": 1.8280709970840352e-05, |
|
"loss": 1.3929, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.21461233614470074, |
|
"grad_norm": 2.732806921005249, |
|
"learning_rate": 1.8265918075455985e-05, |
|
"loss": 1.3197, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.21542526166040035, |
|
"grad_norm": 2.7236287593841553, |
|
"learning_rate": 1.8251068859606777e-05, |
|
"loss": 1.3156, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2162381871761, |
|
"grad_norm": 3.677654504776001, |
|
"learning_rate": 1.823616242626542e-05, |
|
"loss": 1.3565, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2170511126917996, |
|
"grad_norm": 2.4574098587036133, |
|
"learning_rate": 1.8221198878801415e-05, |
|
"loss": 1.3802, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.21786403820749925, |
|
"grad_norm": 3.2601144313812256, |
|
"learning_rate": 1.8206178320980295e-05, |
|
"loss": 1.3606, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.21867696372319886, |
|
"grad_norm": 2.4183156490325928, |
|
"learning_rate": 1.819110085696295e-05, |
|
"loss": 1.3327, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.21948988923889848, |
|
"grad_norm": 2.6820755004882812, |
|
"learning_rate": 1.817596659130489e-05, |
|
"loss": 1.3676, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22030281475459812, |
|
"grad_norm": 2.619580030441284, |
|
"learning_rate": 1.816077562895551e-05, |
|
"loss": 1.408, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.22111574027029773, |
|
"grad_norm": 2.4499645233154297, |
|
"learning_rate": 1.814552807525738e-05, |
|
"loss": 1.3445, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.22192866578599735, |
|
"grad_norm": 2.5966873168945312, |
|
"learning_rate": 1.81302240359455e-05, |
|
"loss": 1.3354, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.222741591301697, |
|
"grad_norm": 8.227926254272461, |
|
"learning_rate": 1.8114863617146576e-05, |
|
"loss": 1.3495, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2235545168173966, |
|
"grad_norm": 5.334491729736328, |
|
"learning_rate": 1.8099446925378278e-05, |
|
"loss": 1.3845, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22436744233309622, |
|
"grad_norm": 2.436473846435547, |
|
"learning_rate": 1.8083974067548506e-05, |
|
"loss": 1.3152, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.22518036784879586, |
|
"grad_norm": 2.4906110763549805, |
|
"learning_rate": 1.806844515095465e-05, |
|
"loss": 1.3213, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.22599329336449547, |
|
"grad_norm": 2.627547264099121, |
|
"learning_rate": 1.8052860283282832e-05, |
|
"loss": 1.3394, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.22680621888019512, |
|
"grad_norm": 3.9034616947174072, |
|
"learning_rate": 1.8037219572607177e-05, |
|
"loss": 1.2956, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.22761914439589473, |
|
"grad_norm": 2.9307639598846436, |
|
"learning_rate": 1.8021523127389066e-05, |
|
"loss": 1.3507, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22843206991159434, |
|
"grad_norm": 2.6711225509643555, |
|
"learning_rate": 1.800577105647635e-05, |
|
"loss": 1.4043, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.22924499542729398, |
|
"grad_norm": 2.9251246452331543, |
|
"learning_rate": 1.7989963469102643e-05, |
|
"loss": 1.3424, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2300579209429936, |
|
"grad_norm": 2.2818679809570312, |
|
"learning_rate": 1.797410047488653e-05, |
|
"loss": 1.334, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2308708464586932, |
|
"grad_norm": 2.6961264610290527, |
|
"learning_rate": 1.7958182183830816e-05, |
|
"loss": 1.3411, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.23168377197439285, |
|
"grad_norm": 2.5082268714904785, |
|
"learning_rate": 1.794220870632177e-05, |
|
"loss": 1.3815, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.23249669749009247, |
|
"grad_norm": 2.6569674015045166, |
|
"learning_rate": 1.7926180153128358e-05, |
|
"loss": 1.4037, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.23330962300579208, |
|
"grad_norm": 2.559483289718628, |
|
"learning_rate": 1.791009663540146e-05, |
|
"loss": 1.333, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.23412254852149172, |
|
"grad_norm": 2.6982040405273438, |
|
"learning_rate": 1.789395826467312e-05, |
|
"loss": 1.4168, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.23493547403719134, |
|
"grad_norm": 2.414900541305542, |
|
"learning_rate": 1.7877765152855757e-05, |
|
"loss": 1.3583, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.23574839955289095, |
|
"grad_norm": 2.465045928955078, |
|
"learning_rate": 1.78615174122414e-05, |
|
"loss": 1.44, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2365613250685906, |
|
"grad_norm": 2.306795597076416, |
|
"learning_rate": 1.78452151555009e-05, |
|
"loss": 1.3215, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2373742505842902, |
|
"grad_norm": 2.6841700077056885, |
|
"learning_rate": 1.7828858495683162e-05, |
|
"loss": 1.351, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.23818717609998985, |
|
"grad_norm": 2.4231340885162354, |
|
"learning_rate": 1.781244754621434e-05, |
|
"loss": 1.3923, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.23900010161568946, |
|
"grad_norm": 2.8300161361694336, |
|
"learning_rate": 1.779598242089707e-05, |
|
"loss": 1.3876, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.23981302713138908, |
|
"grad_norm": 2.6287200450897217, |
|
"learning_rate": 1.7779463233909677e-05, |
|
"loss": 1.3609, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.24062595264708872, |
|
"grad_norm": 2.656332015991211, |
|
"learning_rate": 1.7762890099805362e-05, |
|
"loss": 1.3538, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.24143887816278833, |
|
"grad_norm": 2.5331099033355713, |
|
"learning_rate": 1.774626313351145e-05, |
|
"loss": 1.3154, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.24225180367848795, |
|
"grad_norm": 2.8881306648254395, |
|
"learning_rate": 1.7729582450328547e-05, |
|
"loss": 1.3561, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2430647291941876, |
|
"grad_norm": 2.4491260051727295, |
|
"learning_rate": 1.771284816592978e-05, |
|
"loss": 1.3494, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2438776547098872, |
|
"grad_norm": 2.8161392211914062, |
|
"learning_rate": 1.7696060396359956e-05, |
|
"loss": 1.3125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24469058022558682, |
|
"grad_norm": 2.788238048553467, |
|
"learning_rate": 1.7679219258034798e-05, |
|
"loss": 1.41, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.24550350574128646, |
|
"grad_norm": 3.0948519706726074, |
|
"learning_rate": 1.7662324867740102e-05, |
|
"loss": 1.4138, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.24631643125698607, |
|
"grad_norm": 3.617783308029175, |
|
"learning_rate": 1.7645377342630956e-05, |
|
"loss": 1.3995, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.24712935677268572, |
|
"grad_norm": 2.713531255722046, |
|
"learning_rate": 1.76283768002309e-05, |
|
"loss": 1.354, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.24794228228838533, |
|
"grad_norm": 3.9215407371520996, |
|
"learning_rate": 1.7611323358431145e-05, |
|
"loss": 1.3939, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.24875520780408494, |
|
"grad_norm": 3.519932508468628, |
|
"learning_rate": 1.759421713548971e-05, |
|
"loss": 1.3311, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.24956813331978459, |
|
"grad_norm": 3.0680055618286133, |
|
"learning_rate": 1.757705825003065e-05, |
|
"loss": 1.4131, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.25038105883548417, |
|
"grad_norm": 2.456533908843994, |
|
"learning_rate": 1.7559846821043205e-05, |
|
"loss": 1.3132, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.25119398435118384, |
|
"grad_norm": 2.6937081813812256, |
|
"learning_rate": 1.754258296788097e-05, |
|
"loss": 1.3041, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.25200690986688346, |
|
"grad_norm": 5.319806098937988, |
|
"learning_rate": 1.7525266810261096e-05, |
|
"loss": 1.3544, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.25281983538258307, |
|
"grad_norm": 2.9595742225646973, |
|
"learning_rate": 1.7507898468263422e-05, |
|
"loss": 1.3528, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2536327608982827, |
|
"grad_norm": 4.085862636566162, |
|
"learning_rate": 1.7490478062329686e-05, |
|
"loss": 1.3314, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2544456864139823, |
|
"grad_norm": 2.4585909843444824, |
|
"learning_rate": 1.7473005713262644e-05, |
|
"loss": 1.3622, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.25525861192968197, |
|
"grad_norm": 2.4798450469970703, |
|
"learning_rate": 1.7455481542225272e-05, |
|
"loss": 1.3804, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2560715374453816, |
|
"grad_norm": 2.686068534851074, |
|
"learning_rate": 1.7437905670739893e-05, |
|
"loss": 1.2945, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2568844629610812, |
|
"grad_norm": 2.7424585819244385, |
|
"learning_rate": 1.7420278220687366e-05, |
|
"loss": 1.3561, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2576973884767808, |
|
"grad_norm": 2.964237928390503, |
|
"learning_rate": 1.7402599314306207e-05, |
|
"loss": 1.3701, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2585103139924804, |
|
"grad_norm": 2.7983458042144775, |
|
"learning_rate": 1.7384869074191777e-05, |
|
"loss": 1.3536, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.25932323950818004, |
|
"grad_norm": 2.6008524894714355, |
|
"learning_rate": 1.7367087623295394e-05, |
|
"loss": 1.3394, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2601361650238797, |
|
"grad_norm": 2.4116249084472656, |
|
"learning_rate": 1.7349255084923517e-05, |
|
"loss": 1.3785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2609490905395793, |
|
"grad_norm": 2.9649388790130615, |
|
"learning_rate": 1.7331371582736864e-05, |
|
"loss": 1.3779, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.26176201605527893, |
|
"grad_norm": 2.692847490310669, |
|
"learning_rate": 1.731343724074957e-05, |
|
"loss": 1.3715, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.26257494157097855, |
|
"grad_norm": 2.6246955394744873, |
|
"learning_rate": 1.7295452183328317e-05, |
|
"loss": 1.3856, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.26338786708667816, |
|
"grad_norm": 2.822334051132202, |
|
"learning_rate": 1.7277416535191478e-05, |
|
"loss": 1.3289, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.26420079260237783, |
|
"grad_norm": 2.703158378601074, |
|
"learning_rate": 1.7259330421408247e-05, |
|
"loss": 1.3447, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.26501371811807745, |
|
"grad_norm": 2.5357322692871094, |
|
"learning_rate": 1.7241193967397784e-05, |
|
"loss": 1.3414, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.26582664363377706, |
|
"grad_norm": 2.7839202880859375, |
|
"learning_rate": 1.7223007298928322e-05, |
|
"loss": 1.3725, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2666395691494767, |
|
"grad_norm": 2.6645684242248535, |
|
"learning_rate": 1.7204770542116326e-05, |
|
"loss": 1.3163, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2674524946651763, |
|
"grad_norm": 4.677945137023926, |
|
"learning_rate": 1.7186483823425582e-05, |
|
"loss": 1.3583, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2682654201808759, |
|
"grad_norm": 2.948094367980957, |
|
"learning_rate": 1.7168147269666357e-05, |
|
"loss": 1.3643, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26907834569657557, |
|
"grad_norm": 2.5047991275787354, |
|
"learning_rate": 1.714976100799449e-05, |
|
"loss": 1.3542, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2698912712122752, |
|
"grad_norm": 2.680239677429199, |
|
"learning_rate": 1.713132516591053e-05, |
|
"loss": 1.3204, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2707041967279748, |
|
"grad_norm": 2.703165054321289, |
|
"learning_rate": 1.7112839871258838e-05, |
|
"loss": 1.3467, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2715171222436744, |
|
"grad_norm": 2.5855846405029297, |
|
"learning_rate": 1.7094305252226713e-05, |
|
"loss": 1.3807, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.272330047759374, |
|
"grad_norm": 2.8401761054992676, |
|
"learning_rate": 1.7075721437343488e-05, |
|
"loss": 1.4032, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2731429732750737, |
|
"grad_norm": 2.727287530899048, |
|
"learning_rate": 1.705708855547966e-05, |
|
"loss": 1.3416, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2739558987907733, |
|
"grad_norm": 2.9767589569091797, |
|
"learning_rate": 1.7038406735845967e-05, |
|
"loss": 1.3062, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2747688243064729, |
|
"grad_norm": 2.6532137393951416, |
|
"learning_rate": 1.7019676107992523e-05, |
|
"loss": 1.3717, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.27558174982217254, |
|
"grad_norm": 5.618951797485352, |
|
"learning_rate": 1.70008968018079e-05, |
|
"loss": 1.4021, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.27639467533787215, |
|
"grad_norm": 2.75219464302063, |
|
"learning_rate": 1.6982068947518235e-05, |
|
"loss": 1.3345, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27720760085357177, |
|
"grad_norm": 2.7771074771881104, |
|
"learning_rate": 1.6963192675686312e-05, |
|
"loss": 1.3613, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.27802052636927144, |
|
"grad_norm": 2.4822003841400146, |
|
"learning_rate": 1.694426811721069e-05, |
|
"loss": 1.3465, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.27883345188497105, |
|
"grad_norm": 2.684894323348999, |
|
"learning_rate": 1.6925295403324758e-05, |
|
"loss": 1.337, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.27964637740067066, |
|
"grad_norm": 2.804255962371826, |
|
"learning_rate": 1.6906274665595854e-05, |
|
"loss": 1.2862, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2804593029163703, |
|
"grad_norm": 2.7327306270599365, |
|
"learning_rate": 1.688720603592432e-05, |
|
"loss": 1.3826, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2812722284320699, |
|
"grad_norm": 3.0967769622802734, |
|
"learning_rate": 1.6868089646542632e-05, |
|
"loss": 1.3406, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.28208515394776956, |
|
"grad_norm": 2.4972376823425293, |
|
"learning_rate": 1.6848925630014445e-05, |
|
"loss": 1.3315, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2828980794634692, |
|
"grad_norm": 22.60991668701172, |
|
"learning_rate": 1.6829714119233688e-05, |
|
"loss": 1.3325, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2837110049791688, |
|
"grad_norm": 3.207625389099121, |
|
"learning_rate": 1.6810455247423634e-05, |
|
"loss": 1.3926, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2845239304948684, |
|
"grad_norm": 2.6568946838378906, |
|
"learning_rate": 1.6791149148136003e-05, |
|
"loss": 1.3464, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.285336856010568, |
|
"grad_norm": 2.9483156204223633, |
|
"learning_rate": 1.677179595525e-05, |
|
"loss": 1.2875, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.28614978152626763, |
|
"grad_norm": 2.841442584991455, |
|
"learning_rate": 1.675239580297141e-05, |
|
"loss": 1.3441, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2869627070419673, |
|
"grad_norm": 3.3877551555633545, |
|
"learning_rate": 1.6732948825831657e-05, |
|
"loss": 1.3662, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.2877756325576669, |
|
"grad_norm": 2.9442946910858154, |
|
"learning_rate": 1.671345515868688e-05, |
|
"loss": 1.3075, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.28858855807336653, |
|
"grad_norm": 2.672950029373169, |
|
"learning_rate": 1.6693914936716983e-05, |
|
"loss": 1.2982, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.28940148358906614, |
|
"grad_norm": 2.7699198722839355, |
|
"learning_rate": 1.6674328295424723e-05, |
|
"loss": 1.3331, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.29021440910476576, |
|
"grad_norm": 2.578444719314575, |
|
"learning_rate": 1.6654695370634738e-05, |
|
"loss": 1.3768, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.29102733462046537, |
|
"grad_norm": 2.748466968536377, |
|
"learning_rate": 1.6635016298492628e-05, |
|
"loss": 1.3108, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.29184026013616504, |
|
"grad_norm": 2.818321943283081, |
|
"learning_rate": 1.6615291215464005e-05, |
|
"loss": 1.2586, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.29265318565186466, |
|
"grad_norm": 3.6742396354675293, |
|
"learning_rate": 1.6595520258333545e-05, |
|
"loss": 1.3112, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.29346611116756427, |
|
"grad_norm": 2.999140977859497, |
|
"learning_rate": 1.657570356420404e-05, |
|
"loss": 1.2923, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2942790366832639, |
|
"grad_norm": 2.704463481903076, |
|
"learning_rate": 1.6555841270495456e-05, |
|
"loss": 1.3329, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2950919621989635, |
|
"grad_norm": 3.2639801502227783, |
|
"learning_rate": 1.6535933514943955e-05, |
|
"loss": 1.3215, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.29590488771466317, |
|
"grad_norm": 3.2200841903686523, |
|
"learning_rate": 1.6515980435600965e-05, |
|
"loss": 1.3792, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2967178132303628, |
|
"grad_norm": 2.9226245880126953, |
|
"learning_rate": 1.6495982170832224e-05, |
|
"loss": 1.3565, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2975307387460624, |
|
"grad_norm": 3.096405029296875, |
|
"learning_rate": 1.6475938859316795e-05, |
|
"loss": 1.3857, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.298343664261762, |
|
"grad_norm": 2.7694365978240967, |
|
"learning_rate": 1.6455850640046134e-05, |
|
"loss": 1.3782, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2991565897774616, |
|
"grad_norm": 3.011751890182495, |
|
"learning_rate": 1.6435717652323097e-05, |
|
"loss": 1.3426, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.29996951529316124, |
|
"grad_norm": 2.7828853130340576, |
|
"learning_rate": 1.6415540035761008e-05, |
|
"loss": 1.3429, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3007824408088609, |
|
"grad_norm": 2.5543785095214844, |
|
"learning_rate": 1.639531793028265e-05, |
|
"loss": 1.3768, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3015953663245605, |
|
"grad_norm": 2.8462271690368652, |
|
"learning_rate": 1.637505147611934e-05, |
|
"loss": 1.3203, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.30240829184026013, |
|
"grad_norm": 2.404257297515869, |
|
"learning_rate": 1.6354740813809917e-05, |
|
"loss": 1.3693, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.30322121735595975, |
|
"grad_norm": 2.674553394317627, |
|
"learning_rate": 1.6334386084199787e-05, |
|
"loss": 1.3518, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.30403414287165936, |
|
"grad_norm": 2.4954397678375244, |
|
"learning_rate": 1.631398742843995e-05, |
|
"loss": 1.3669, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.30484706838735903, |
|
"grad_norm": 3.333721876144409, |
|
"learning_rate": 1.629354498798601e-05, |
|
"loss": 1.3358, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.30565999390305865, |
|
"grad_norm": 2.859560966491699, |
|
"learning_rate": 1.627305890459719e-05, |
|
"loss": 1.3334, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.30647291941875826, |
|
"grad_norm": 2.8346803188323975, |
|
"learning_rate": 1.625252932033538e-05, |
|
"loss": 1.3366, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3072858449344579, |
|
"grad_norm": 2.64909029006958, |
|
"learning_rate": 1.6231956377564095e-05, |
|
"loss": 1.3398, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3080987704501575, |
|
"grad_norm": 3.935067653656006, |
|
"learning_rate": 1.621134021894756e-05, |
|
"loss": 1.2953, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3089116959658571, |
|
"grad_norm": 5.056494235992432, |
|
"learning_rate": 1.619068098744965e-05, |
|
"loss": 1.3245, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.30972462148155677, |
|
"grad_norm": 2.9668800830841064, |
|
"learning_rate": 1.6169978826332955e-05, |
|
"loss": 1.3199, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3105375469972564, |
|
"grad_norm": 2.6101276874542236, |
|
"learning_rate": 1.6149233879157747e-05, |
|
"loss": 1.3317, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.311350472512956, |
|
"grad_norm": 2.677374839782715, |
|
"learning_rate": 1.6128446289781012e-05, |
|
"loss": 1.304, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3121633980286556, |
|
"grad_norm": 4.049331188201904, |
|
"learning_rate": 1.610761620235543e-05, |
|
"loss": 1.3241, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.31297632354435523, |
|
"grad_norm": 2.566908836364746, |
|
"learning_rate": 1.60867437613284e-05, |
|
"loss": 1.3392, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3137892490600549, |
|
"grad_norm": 2.550367832183838, |
|
"learning_rate": 1.6065829111441e-05, |
|
"loss": 1.3274, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3146021745757545, |
|
"grad_norm": 4.543491363525391, |
|
"learning_rate": 1.6044872397727037e-05, |
|
"loss": 1.2993, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3154151000914541, |
|
"grad_norm": 2.8900489807128906, |
|
"learning_rate": 1.6023873765511993e-05, |
|
"loss": 1.3274, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.31622802560715374, |
|
"grad_norm": 2.4930450916290283, |
|
"learning_rate": 1.6002833360412044e-05, |
|
"loss": 1.3074, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.31704095112285335, |
|
"grad_norm": 3.0221235752105713, |
|
"learning_rate": 1.5981751328333036e-05, |
|
"loss": 1.3077, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.31785387663855297, |
|
"grad_norm": 3.0569851398468018, |
|
"learning_rate": 1.5960627815469486e-05, |
|
"loss": 1.3705, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.31866680215425264, |
|
"grad_norm": 7.261632442474365, |
|
"learning_rate": 1.5939462968303554e-05, |
|
"loss": 1.3564, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.31947972766995225, |
|
"grad_norm": 3.0555789470672607, |
|
"learning_rate": 1.5918256933604047e-05, |
|
"loss": 1.3451, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.32029265318565187, |
|
"grad_norm": 3.360779047012329, |
|
"learning_rate": 1.589700985842538e-05, |
|
"loss": 1.2764, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3211055787013515, |
|
"grad_norm": 2.9022507667541504, |
|
"learning_rate": 1.5875721890106574e-05, |
|
"loss": 1.3424, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3219185042170511, |
|
"grad_norm": 5.119380474090576, |
|
"learning_rate": 1.5854393176270205e-05, |
|
"loss": 1.3392, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.32273142973275076, |
|
"grad_norm": 2.7554409503936768, |
|
"learning_rate": 1.5833023864821427e-05, |
|
"loss": 1.3762, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3235443552484504, |
|
"grad_norm": 2.553323984146118, |
|
"learning_rate": 1.5811614103946905e-05, |
|
"loss": 1.3066, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.32435728076415, |
|
"grad_norm": 3.514381170272827, |
|
"learning_rate": 1.5790164042113805e-05, |
|
"loss": 1.3575, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3251702062798496, |
|
"grad_norm": 2.89054012298584, |
|
"learning_rate": 1.576867382806877e-05, |
|
"loss": 1.3106, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3259831317955492, |
|
"grad_norm": 2.9955763816833496, |
|
"learning_rate": 1.5747143610836873e-05, |
|
"loss": 1.3634, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.32679605731124883, |
|
"grad_norm": 3.175438404083252, |
|
"learning_rate": 1.5725573539720592e-05, |
|
"loss": 1.2876, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3276089828269485, |
|
"grad_norm": 2.6269116401672363, |
|
"learning_rate": 1.570396376429877e-05, |
|
"loss": 1.342, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3284219083426481, |
|
"grad_norm": 2.900568962097168, |
|
"learning_rate": 1.5682314434425593e-05, |
|
"loss": 1.3133, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.32923483385834773, |
|
"grad_norm": 2.6711323261260986, |
|
"learning_rate": 1.5660625700229526e-05, |
|
"loss": 1.2702, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.33004775937404734, |
|
"grad_norm": 2.8045928478240967, |
|
"learning_rate": 1.5638897712112303e-05, |
|
"loss": 1.3336, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.33086068488974696, |
|
"grad_norm": 2.9632303714752197, |
|
"learning_rate": 1.561713062074785e-05, |
|
"loss": 1.3546, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.33167361040544663, |
|
"grad_norm": 2.5156984329223633, |
|
"learning_rate": 1.5595324577081265e-05, |
|
"loss": 1.3587, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.33248653592114624, |
|
"grad_norm": 2.6634364128112793, |
|
"learning_rate": 1.5573479732327758e-05, |
|
"loss": 1.3317, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.33329946143684586, |
|
"grad_norm": 4.38008451461792, |
|
"learning_rate": 1.555159623797161e-05, |
|
"loss": 1.3078, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.33411238695254547, |
|
"grad_norm": 3.089078903198242, |
|
"learning_rate": 1.552967424576512e-05, |
|
"loss": 1.328, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.3349253124682451, |
|
"grad_norm": 2.9011247158050537, |
|
"learning_rate": 1.5507713907727557e-05, |
|
"loss": 1.349, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3357382379839447, |
|
"grad_norm": 2.431152582168579, |
|
"learning_rate": 1.5485715376144087e-05, |
|
"loss": 1.383, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.33655116349964437, |
|
"grad_norm": 2.6097633838653564, |
|
"learning_rate": 1.5463678803564753e-05, |
|
"loss": 1.3414, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.337364089015344, |
|
"grad_norm": 2.9973533153533936, |
|
"learning_rate": 1.5441604342803374e-05, |
|
"loss": 1.3359, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3381770145310436, |
|
"grad_norm": 2.849950075149536, |
|
"learning_rate": 1.5419492146936518e-05, |
|
"loss": 1.3378, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3389899400467432, |
|
"grad_norm": 2.600947856903076, |
|
"learning_rate": 1.5397342369302425e-05, |
|
"loss": 1.3411, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3398028655624428, |
|
"grad_norm": 2.946190595626831, |
|
"learning_rate": 1.5375155163499953e-05, |
|
"loss": 1.2981, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.34061579107814244, |
|
"grad_norm": 3.5300893783569336, |
|
"learning_rate": 1.5352930683387502e-05, |
|
"loss": 1.3717, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3414287165938421, |
|
"grad_norm": 2.342288017272949, |
|
"learning_rate": 1.5330669083081956e-05, |
|
"loss": 1.2734, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3422416421095417, |
|
"grad_norm": 3.7037856578826904, |
|
"learning_rate": 1.5308370516957617e-05, |
|
"loss": 1.3402, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.34305456762524134, |
|
"grad_norm": 2.5814309120178223, |
|
"learning_rate": 1.528603513964511e-05, |
|
"loss": 1.3207, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.34386749314094095, |
|
"grad_norm": 2.4542317390441895, |
|
"learning_rate": 1.5263663106030347e-05, |
|
"loss": 1.3257, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.34468041865664056, |
|
"grad_norm": 2.689870595932007, |
|
"learning_rate": 1.5241254571253433e-05, |
|
"loss": 1.3105, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.34549334417234023, |
|
"grad_norm": 2.900061845779419, |
|
"learning_rate": 1.5218809690707583e-05, |
|
"loss": 1.3113, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.34630626968803985, |
|
"grad_norm": 2.7165238857269287, |
|
"learning_rate": 1.5196328620038059e-05, |
|
"loss": 1.335, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.34711919520373946, |
|
"grad_norm": 2.3893747329711914, |
|
"learning_rate": 1.5173811515141083e-05, |
|
"loss": 1.3062, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3479321207194391, |
|
"grad_norm": 2.568575143814087, |
|
"learning_rate": 1.5151258532162771e-05, |
|
"loss": 1.3338, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3487450462351387, |
|
"grad_norm": 3.406301736831665, |
|
"learning_rate": 1.5128669827498024e-05, |
|
"loss": 1.3189, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3495579717508383, |
|
"grad_norm": 2.752307653427124, |
|
"learning_rate": 1.5106045557789453e-05, |
|
"loss": 1.331, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.350370897266538, |
|
"grad_norm": 2.570742130279541, |
|
"learning_rate": 1.5083385879926309e-05, |
|
"loss": 1.2887, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3511838227822376, |
|
"grad_norm": 2.4754555225372314, |
|
"learning_rate": 1.5060690951043385e-05, |
|
"loss": 1.3432, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3519967482979372, |
|
"grad_norm": 3.853609561920166, |
|
"learning_rate": 1.5037960928519902e-05, |
|
"loss": 1.3625, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3528096738136368, |
|
"grad_norm": 2.6506130695343018, |
|
"learning_rate": 1.501519596997847e-05, |
|
"loss": 1.2797, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.35362259932933643, |
|
"grad_norm": 2.8529601097106934, |
|
"learning_rate": 1.499239623328394e-05, |
|
"loss": 1.2868, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3544355248450361, |
|
"grad_norm": 4.091727256774902, |
|
"learning_rate": 1.4969561876542348e-05, |
|
"loss": 1.2648, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3552484503607357, |
|
"grad_norm": 2.5217483043670654, |
|
"learning_rate": 1.4946693058099802e-05, |
|
"loss": 1.2792, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3560613758764353, |
|
"grad_norm": 3.035297155380249, |
|
"learning_rate": 1.4923789936541378e-05, |
|
"loss": 1.3267, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.35687430139213494, |
|
"grad_norm": 4.371755599975586, |
|
"learning_rate": 1.4900852670690044e-05, |
|
"loss": 1.3114, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.35768722690783455, |
|
"grad_norm": 2.904101610183716, |
|
"learning_rate": 1.487788141960553e-05, |
|
"loss": 1.3716, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35850015242353417, |
|
"grad_norm": 2.663241147994995, |
|
"learning_rate": 1.4854876342583246e-05, |
|
"loss": 1.3269, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.35931307793923384, |
|
"grad_norm": 2.626646041870117, |
|
"learning_rate": 1.4831837599153165e-05, |
|
"loss": 1.3077, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.36012600345493345, |
|
"grad_norm": 2.8876073360443115, |
|
"learning_rate": 1.4808765349078729e-05, |
|
"loss": 1.2807, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.36093892897063307, |
|
"grad_norm": 2.5428106784820557, |
|
"learning_rate": 1.4785659752355724e-05, |
|
"loss": 1.3242, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3617518544863327, |
|
"grad_norm": 2.7515244483947754, |
|
"learning_rate": 1.4762520969211186e-05, |
|
"loss": 1.3356, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3625647800020323, |
|
"grad_norm": 2.771684408187866, |
|
"learning_rate": 1.4739349160102285e-05, |
|
"loss": 1.3255, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.36337770551773196, |
|
"grad_norm": 2.7270543575286865, |
|
"learning_rate": 1.4716144485715209e-05, |
|
"loss": 1.2797, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3641906310334316, |
|
"grad_norm": 3.5211868286132812, |
|
"learning_rate": 1.4692907106964051e-05, |
|
"loss": 1.3098, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3650035565491312, |
|
"grad_norm": 5.923196315765381, |
|
"learning_rate": 1.4669637184989696e-05, |
|
"loss": 1.3212, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3658164820648308, |
|
"grad_norm": 2.50697922706604, |
|
"learning_rate": 1.4646334881158704e-05, |
|
"loss": 1.3195, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3666294075805304, |
|
"grad_norm": 3.3721578121185303, |
|
"learning_rate": 1.4623000357062184e-05, |
|
"loss": 1.2747, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.36744233309623003, |
|
"grad_norm": 2.429243803024292, |
|
"learning_rate": 1.459963377451468e-05, |
|
"loss": 1.3122, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3682552586119297, |
|
"grad_norm": 4.240250587463379, |
|
"learning_rate": 1.457623529555305e-05, |
|
"loss": 1.3447, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3690681841276293, |
|
"grad_norm": 2.631667137145996, |
|
"learning_rate": 1.4552805082435333e-05, |
|
"loss": 1.3171, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.36988110964332893, |
|
"grad_norm": 2.906388521194458, |
|
"learning_rate": 1.4529343297639638e-05, |
|
"loss": 1.3193, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.37069403515902855, |
|
"grad_norm": 3.047884464263916, |
|
"learning_rate": 1.4505850103863007e-05, |
|
"loss": 1.3181, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.37150696067472816, |
|
"grad_norm": 2.3922433853149414, |
|
"learning_rate": 1.448232566402028e-05, |
|
"loss": 1.3203, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.37231988619042783, |
|
"grad_norm": 3.278813123703003, |
|
"learning_rate": 1.4458770141242992e-05, |
|
"loss": 1.3309, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.37313281170612744, |
|
"grad_norm": 2.7148866653442383, |
|
"learning_rate": 1.4435183698878212e-05, |
|
"loss": 1.3408, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.37394573722182706, |
|
"grad_norm": 2.913823366165161, |
|
"learning_rate": 1.4411566500487425e-05, |
|
"loss": 1.3426, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.37475866273752667, |
|
"grad_norm": 2.435643196105957, |
|
"learning_rate": 1.4387918709845395e-05, |
|
"loss": 1.3357, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3755715882532263, |
|
"grad_norm": 2.6099560260772705, |
|
"learning_rate": 1.4364240490939032e-05, |
|
"loss": 1.3013, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3763845137689259, |
|
"grad_norm": 2.7896599769592285, |
|
"learning_rate": 1.4340532007966252e-05, |
|
"loss": 1.3284, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.37719743928462557, |
|
"grad_norm": 2.857205867767334, |
|
"learning_rate": 1.4316793425334836e-05, |
|
"loss": 1.2926, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3780103648003252, |
|
"grad_norm": 2.4580750465393066, |
|
"learning_rate": 1.4293024907661295e-05, |
|
"loss": 1.3926, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3788232903160248, |
|
"grad_norm": 2.6340065002441406, |
|
"learning_rate": 1.4269226619769727e-05, |
|
"loss": 1.3315, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3796362158317244, |
|
"grad_norm": 3.416398525238037, |
|
"learning_rate": 1.424539872669067e-05, |
|
"loss": 1.2822, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.380449141347424, |
|
"grad_norm": 2.4222054481506348, |
|
"learning_rate": 1.4221541393659966e-05, |
|
"loss": 1.2894, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.38126206686312364, |
|
"grad_norm": 2.797074794769287, |
|
"learning_rate": 1.4197654786117604e-05, |
|
"loss": 1.3519, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3820749923788233, |
|
"grad_norm": 2.563831329345703, |
|
"learning_rate": 1.4173739069706586e-05, |
|
"loss": 1.3474, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3828879178945229, |
|
"grad_norm": 2.4004971981048584, |
|
"learning_rate": 1.414979441027176e-05, |
|
"loss": 1.3007, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.38370084341022254, |
|
"grad_norm": 2.532390594482422, |
|
"learning_rate": 1.4125820973858693e-05, |
|
"loss": 1.2613, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.38451376892592215, |
|
"grad_norm": 2.5733683109283447, |
|
"learning_rate": 1.41018189267125e-05, |
|
"loss": 1.3212, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.38532669444162176, |
|
"grad_norm": 2.710106134414673, |
|
"learning_rate": 1.4077788435276701e-05, |
|
"loss": 1.3235, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.38613961995732143, |
|
"grad_norm": 2.996795892715454, |
|
"learning_rate": 1.4053729666192067e-05, |
|
"loss": 1.3722, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.38695254547302105, |
|
"grad_norm": 2.4392545223236084, |
|
"learning_rate": 1.4029642786295452e-05, |
|
"loss": 1.3706, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.38776547098872066, |
|
"grad_norm": 2.6843369007110596, |
|
"learning_rate": 1.400552796261866e-05, |
|
"loss": 1.3382, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3885783965044203, |
|
"grad_norm": 2.405515193939209, |
|
"learning_rate": 1.3981385362387268e-05, |
|
"loss": 1.316, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3893913220201199, |
|
"grad_norm": 2.425203800201416, |
|
"learning_rate": 1.3957215153019463e-05, |
|
"loss": 1.3578, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3902042475358195, |
|
"grad_norm": 2.5134634971618652, |
|
"learning_rate": 1.3933017502124897e-05, |
|
"loss": 1.3531, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3910171730515192, |
|
"grad_norm": 2.4274141788482666, |
|
"learning_rate": 1.3908792577503514e-05, |
|
"loss": 1.3705, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3918300985672188, |
|
"grad_norm": 2.881443500518799, |
|
"learning_rate": 1.3884540547144393e-05, |
|
"loss": 1.3196, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3926430240829184, |
|
"grad_norm": 2.5505170822143555, |
|
"learning_rate": 1.3860261579224574e-05, |
|
"loss": 1.3221, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.393455949598618, |
|
"grad_norm": 2.5604939460754395, |
|
"learning_rate": 1.3835955842107897e-05, |
|
"loss": 1.2565, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.39426887511431763, |
|
"grad_norm": 2.8203351497650146, |
|
"learning_rate": 1.3811623504343845e-05, |
|
"loss": 1.323, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3950818006300173, |
|
"grad_norm": 3.9116978645324707, |
|
"learning_rate": 1.378726473466635e-05, |
|
"loss": 1.3188, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3958947261457169, |
|
"grad_norm": 2.918548822402954, |
|
"learning_rate": 1.3762879701992642e-05, |
|
"loss": 1.337, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3967076516614165, |
|
"grad_norm": 3.048039674758911, |
|
"learning_rate": 1.373846857542208e-05, |
|
"loss": 1.3379, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.39752057717711614, |
|
"grad_norm": 2.6825406551361084, |
|
"learning_rate": 1.3714031524234965e-05, |
|
"loss": 1.3096, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.39833350269281576, |
|
"grad_norm": 2.5955066680908203, |
|
"learning_rate": 1.3689568717891381e-05, |
|
"loss": 1.2947, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.39914642820851537, |
|
"grad_norm": 2.5204849243164062, |
|
"learning_rate": 1.3665080326029997e-05, |
|
"loss": 1.2852, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.39995935372421504, |
|
"grad_norm": 3.158151865005493, |
|
"learning_rate": 1.364056651846693e-05, |
|
"loss": 1.3323, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.40077227923991465, |
|
"grad_norm": 2.787951946258545, |
|
"learning_rate": 1.3616027465194525e-05, |
|
"loss": 1.325, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.40158520475561427, |
|
"grad_norm": 3.462423324584961, |
|
"learning_rate": 1.35914633363802e-05, |
|
"loss": 1.2689, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4023981302713139, |
|
"grad_norm": 3.3612263202667236, |
|
"learning_rate": 1.356687430236526e-05, |
|
"loss": 1.2846, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4032110557870135, |
|
"grad_norm": 2.521135091781616, |
|
"learning_rate": 1.3542260533663723e-05, |
|
"loss": 1.2845, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.40402398130271316, |
|
"grad_norm": 2.702359914779663, |
|
"learning_rate": 1.351762220096112e-05, |
|
"loss": 1.2982, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.4048369068184128, |
|
"grad_norm": 2.928270101547241, |
|
"learning_rate": 1.3492959475113332e-05, |
|
"loss": 1.2878, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.4056498323341124, |
|
"grad_norm": 2.491701126098633, |
|
"learning_rate": 1.3468272527145388e-05, |
|
"loss": 1.2913, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.406462757849812, |
|
"grad_norm": 2.8777735233306885, |
|
"learning_rate": 1.3443561528250295e-05, |
|
"loss": 1.328, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4072756833655116, |
|
"grad_norm": 3.4918212890625, |
|
"learning_rate": 1.3418826649787834e-05, |
|
"loss": 1.3415, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.40808860888121123, |
|
"grad_norm": 2.6940505504608154, |
|
"learning_rate": 1.3394068063283387e-05, |
|
"loss": 1.3017, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4089015343969109, |
|
"grad_norm": 3.9722023010253906, |
|
"learning_rate": 1.3369285940426737e-05, |
|
"loss": 1.3161, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.4097144599126105, |
|
"grad_norm": 2.6105010509490967, |
|
"learning_rate": 1.334448045307088e-05, |
|
"loss": 1.2853, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.41052738542831013, |
|
"grad_norm": 4.094304084777832, |
|
"learning_rate": 1.331965177323084e-05, |
|
"loss": 1.3059, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.41134031094400975, |
|
"grad_norm": 2.5570600032806396, |
|
"learning_rate": 1.3294800073082464e-05, |
|
"loss": 1.2957, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.41215323645970936, |
|
"grad_norm": 2.60870099067688, |
|
"learning_rate": 1.3269925524961237e-05, |
|
"loss": 1.2887, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.41296616197540903, |
|
"grad_norm": 2.3958325386047363, |
|
"learning_rate": 1.3245028301361086e-05, |
|
"loss": 1.3207, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.41377908749110864, |
|
"grad_norm": 2.718470811843872, |
|
"learning_rate": 1.3220108574933185e-05, |
|
"loss": 1.2884, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.41459201300680826, |
|
"grad_norm": 2.9990408420562744, |
|
"learning_rate": 1.3195166518484748e-05, |
|
"loss": 1.3104, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.41540493852250787, |
|
"grad_norm": 3.256333589553833, |
|
"learning_rate": 1.317020230497784e-05, |
|
"loss": 1.2586, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4162178640382075, |
|
"grad_norm": 3.0497708320617676, |
|
"learning_rate": 1.3145216107528178e-05, |
|
"loss": 1.2946, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4170307895539071, |
|
"grad_norm": 2.6696412563323975, |
|
"learning_rate": 1.3120208099403926e-05, |
|
"loss": 1.3413, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.41784371506960677, |
|
"grad_norm": 2.592937469482422, |
|
"learning_rate": 1.3095178454024496e-05, |
|
"loss": 1.2827, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.4186566405853064, |
|
"grad_norm": 2.450669288635254, |
|
"learning_rate": 1.3070127344959348e-05, |
|
"loss": 1.2505, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.419469566101006, |
|
"grad_norm": 4.529777526855469, |
|
"learning_rate": 1.3045054945926775e-05, |
|
"loss": 1.3001, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4202824916167056, |
|
"grad_norm": 3.2491648197174072, |
|
"learning_rate": 1.3019961430792711e-05, |
|
"loss": 1.2932, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4210954171324052, |
|
"grad_norm": 3.3505818843841553, |
|
"learning_rate": 1.2994846973569524e-05, |
|
"loss": 1.3516, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4219083426481049, |
|
"grad_norm": 3.5476715564727783, |
|
"learning_rate": 1.2969711748414804e-05, |
|
"loss": 1.2834, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4227212681638045, |
|
"grad_norm": 2.738903522491455, |
|
"learning_rate": 1.2944555929630152e-05, |
|
"loss": 1.2978, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4235341936795041, |
|
"grad_norm": 2.5854766368865967, |
|
"learning_rate": 1.2919379691659979e-05, |
|
"loss": 1.293, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.42434711919520374, |
|
"grad_norm": 3.76955246925354, |
|
"learning_rate": 1.2894183209090304e-05, |
|
"loss": 1.2517, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.42516004471090335, |
|
"grad_norm": 2.566361904144287, |
|
"learning_rate": 1.2868966656647522e-05, |
|
"loss": 1.3295, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.42597297022660296, |
|
"grad_norm": 2.7477164268493652, |
|
"learning_rate": 1.2843730209197203e-05, |
|
"loss": 1.3067, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.42678589574230263, |
|
"grad_norm": 3.0560967922210693, |
|
"learning_rate": 1.2818474041742885e-05, |
|
"loss": 1.2951, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.42759882125800225, |
|
"grad_norm": 2.9634625911712646, |
|
"learning_rate": 1.2793198329424858e-05, |
|
"loss": 1.268, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.42841174677370186, |
|
"grad_norm": 2.8108301162719727, |
|
"learning_rate": 1.2767903247518945e-05, |
|
"loss": 1.3319, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4292246722894015, |
|
"grad_norm": 3.85799241065979, |
|
"learning_rate": 1.2742588971435276e-05, |
|
"loss": 1.3764, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4300375978051011, |
|
"grad_norm": 2.564434766769409, |
|
"learning_rate": 1.2717255676717106e-05, |
|
"loss": 1.2854, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4308505233208007, |
|
"grad_norm": 5.098544597625732, |
|
"learning_rate": 1.2691903539039563e-05, |
|
"loss": 1.3143, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4316634488365004, |
|
"grad_norm": 7.195343017578125, |
|
"learning_rate": 1.2666532734208437e-05, |
|
"loss": 1.3026, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4324763743522, |
|
"grad_norm": 2.743298053741455, |
|
"learning_rate": 1.264114343815898e-05, |
|
"loss": 1.3124, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4332892998678996, |
|
"grad_norm": 3.183859348297119, |
|
"learning_rate": 1.2615735826954664e-05, |
|
"loss": 1.3132, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4341022253835992, |
|
"grad_norm": 7.095142364501953, |
|
"learning_rate": 1.2590310076785974e-05, |
|
"loss": 1.2599, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.43491515089929883, |
|
"grad_norm": 2.91894268989563, |
|
"learning_rate": 1.256486636396917e-05, |
|
"loss": 1.3251, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4357280764149985, |
|
"grad_norm": 2.931509494781494, |
|
"learning_rate": 1.2539404864945087e-05, |
|
"loss": 1.3347, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4365410019306981, |
|
"grad_norm": 2.4552268981933594, |
|
"learning_rate": 1.2513925756277894e-05, |
|
"loss": 1.3469, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.43735392744639773, |
|
"grad_norm": 2.846196174621582, |
|
"learning_rate": 1.2488429214653871e-05, |
|
"loss": 1.2654, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.43816685296209734, |
|
"grad_norm": 3.494403600692749, |
|
"learning_rate": 1.24629154168802e-05, |
|
"loss": 1.2688, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.43897977847779696, |
|
"grad_norm": 3.00067138671875, |
|
"learning_rate": 1.2437384539883715e-05, |
|
"loss": 1.2865, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.43979270399349657, |
|
"grad_norm": 3.0412096977233887, |
|
"learning_rate": 1.2411836760709686e-05, |
|
"loss": 1.269, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.44060562950919624, |
|
"grad_norm": 2.3580715656280518, |
|
"learning_rate": 1.2386272256520606e-05, |
|
"loss": 1.2752, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.44141855502489585, |
|
"grad_norm": 9.030720710754395, |
|
"learning_rate": 1.2360691204594937e-05, |
|
"loss": 1.3074, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.44223148054059547, |
|
"grad_norm": 3.970172882080078, |
|
"learning_rate": 1.2335093782325889e-05, |
|
"loss": 1.3117, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4430444060562951, |
|
"grad_norm": 2.8179943561553955, |
|
"learning_rate": 1.2309480167220203e-05, |
|
"loss": 1.3196, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.4438573315719947, |
|
"grad_norm": 2.9376232624053955, |
|
"learning_rate": 1.2283850536896907e-05, |
|
"loss": 1.2614, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.44467025708769436, |
|
"grad_norm": 2.811709403991699, |
|
"learning_rate": 1.2258205069086082e-05, |
|
"loss": 1.2666, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.445483182603394, |
|
"grad_norm": 3.060638427734375, |
|
"learning_rate": 1.2232543941627641e-05, |
|
"loss": 1.2891, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4462961081190936, |
|
"grad_norm": 2.581530809402466, |
|
"learning_rate": 1.2206867332470091e-05, |
|
"loss": 1.2875, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4471090336347932, |
|
"grad_norm": 2.588129997253418, |
|
"learning_rate": 1.2181175419669293e-05, |
|
"loss": 1.2964, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4479219591504928, |
|
"grad_norm": 3.0943429470062256, |
|
"learning_rate": 1.215546838138723e-05, |
|
"loss": 1.29, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.44873488466619244, |
|
"grad_norm": 2.960190534591675, |
|
"learning_rate": 1.212974639589078e-05, |
|
"loss": 1.2812, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4495478101818921, |
|
"grad_norm": 2.7364282608032227, |
|
"learning_rate": 1.2104009641550472e-05, |
|
"loss": 1.2783, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4503607356975917, |
|
"grad_norm": 2.509277105331421, |
|
"learning_rate": 1.2078258296839245e-05, |
|
"loss": 1.2859, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.45117366121329133, |
|
"grad_norm": 2.769371747970581, |
|
"learning_rate": 1.2052492540331218e-05, |
|
"loss": 1.2866, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.45198658672899095, |
|
"grad_norm": 3.057968854904175, |
|
"learning_rate": 1.2026712550700457e-05, |
|
"loss": 1.3051, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.45279951224469056, |
|
"grad_norm": 3.4182374477386475, |
|
"learning_rate": 1.200091850671972e-05, |
|
"loss": 1.3266, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.45361243776039023, |
|
"grad_norm": 2.6871426105499268, |
|
"learning_rate": 1.1975110587259222e-05, |
|
"loss": 1.2596, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.45442536327608984, |
|
"grad_norm": 3.463675022125244, |
|
"learning_rate": 1.1949288971285411e-05, |
|
"loss": 1.2767, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.45523828879178946, |
|
"grad_norm": 2.8260090351104736, |
|
"learning_rate": 1.1923453837859706e-05, |
|
"loss": 1.2734, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4560512143074891, |
|
"grad_norm": 2.6161341667175293, |
|
"learning_rate": 1.1897605366137264e-05, |
|
"loss": 1.2377, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4568641398231887, |
|
"grad_norm": 2.847534418106079, |
|
"learning_rate": 1.1871743735365735e-05, |
|
"loss": 1.3128, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4576770653388883, |
|
"grad_norm": 3.116063117980957, |
|
"learning_rate": 1.1845869124884027e-05, |
|
"loss": 1.3114, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.45848999085458797, |
|
"grad_norm": 3.2849061489105225, |
|
"learning_rate": 1.1819981714121054e-05, |
|
"loss": 1.2761, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4593029163702876, |
|
"grad_norm": 2.484531879425049, |
|
"learning_rate": 1.1794081682594491e-05, |
|
"loss": 1.2978, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4601158418859872, |
|
"grad_norm": 3.111940383911133, |
|
"learning_rate": 1.176816920990954e-05, |
|
"loss": 1.2928, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4609287674016868, |
|
"grad_norm": 3.063422918319702, |
|
"learning_rate": 1.174224447575767e-05, |
|
"loss": 1.3137, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4617416929173864, |
|
"grad_norm": 4.031757831573486, |
|
"learning_rate": 1.171630765991538e-05, |
|
"loss": 1.2986, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.4625546184330861, |
|
"grad_norm": 2.650336980819702, |
|
"learning_rate": 1.169035894224295e-05, |
|
"loss": 1.3328, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4633675439487857, |
|
"grad_norm": 2.574526309967041, |
|
"learning_rate": 1.1664398502683194e-05, |
|
"loss": 1.3078, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4641804694644853, |
|
"grad_norm": 2.3674449920654297, |
|
"learning_rate": 1.1638426521260211e-05, |
|
"loss": 1.2819, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.46499339498018494, |
|
"grad_norm": 2.8870980739593506, |
|
"learning_rate": 1.1612443178078138e-05, |
|
"loss": 1.2661, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.46580632049588455, |
|
"grad_norm": 2.4961047172546387, |
|
"learning_rate": 1.1586448653319908e-05, |
|
"loss": 1.3042, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.46661924601158417, |
|
"grad_norm": 2.6196508407592773, |
|
"learning_rate": 1.156044312724598e-05, |
|
"loss": 1.2306, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.46743217152728384, |
|
"grad_norm": 2.7249913215637207, |
|
"learning_rate": 1.153442678019311e-05, |
|
"loss": 1.3095, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.46824509704298345, |
|
"grad_norm": 2.9108643531799316, |
|
"learning_rate": 1.1508399792573095e-05, |
|
"loss": 1.2513, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.46905802255868306, |
|
"grad_norm": 2.7690494060516357, |
|
"learning_rate": 1.1482362344871514e-05, |
|
"loss": 1.3445, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4698709480743827, |
|
"grad_norm": 3.629122734069824, |
|
"learning_rate": 1.1456314617646482e-05, |
|
"loss": 1.2616, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4706838735900823, |
|
"grad_norm": 2.6831417083740234, |
|
"learning_rate": 1.1430256791527406e-05, |
|
"loss": 1.2786, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4714967991057819, |
|
"grad_norm": 2.5316171646118164, |
|
"learning_rate": 1.1404189047213716e-05, |
|
"loss": 1.3195, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4723097246214816, |
|
"grad_norm": 4.602120399475098, |
|
"learning_rate": 1.137811156547362e-05, |
|
"loss": 1.2378, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4731226501371812, |
|
"grad_norm": 2.5073766708374023, |
|
"learning_rate": 1.1352024527142855e-05, |
|
"loss": 1.2426, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4739355756528808, |
|
"grad_norm": 2.5561444759368896, |
|
"learning_rate": 1.1325928113123431e-05, |
|
"loss": 1.318, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4747485011685804, |
|
"grad_norm": 2.8386447429656982, |
|
"learning_rate": 1.129982250438237e-05, |
|
"loss": 1.2529, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.47556142668428003, |
|
"grad_norm": 2.3654778003692627, |
|
"learning_rate": 1.1273707881950445e-05, |
|
"loss": 1.2822, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4763743521999797, |
|
"grad_norm": 3.125446081161499, |
|
"learning_rate": 1.1247584426920962e-05, |
|
"loss": 1.3588, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4771872777156793, |
|
"grad_norm": 3.600827217102051, |
|
"learning_rate": 1.1221452320448449e-05, |
|
"loss": 1.3023, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.47800020323137893, |
|
"grad_norm": 3.858783483505249, |
|
"learning_rate": 1.1195311743747445e-05, |
|
"loss": 1.2784, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.47881312874707854, |
|
"grad_norm": 2.841679334640503, |
|
"learning_rate": 1.116916287809122e-05, |
|
"loss": 1.3084, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.47962605426277816, |
|
"grad_norm": 2.9722323417663574, |
|
"learning_rate": 1.1143005904810527e-05, |
|
"loss": 1.2983, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48043897977847777, |
|
"grad_norm": 2.560037136077881, |
|
"learning_rate": 1.1116841005292339e-05, |
|
"loss": 1.3175, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.48125190529417744, |
|
"grad_norm": 3.1770455837249756, |
|
"learning_rate": 1.1090668360978589e-05, |
|
"loss": 1.2603, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.48206483080987705, |
|
"grad_norm": 2.4485607147216797, |
|
"learning_rate": 1.106448815336493e-05, |
|
"loss": 1.2792, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.48287775632557667, |
|
"grad_norm": 3.7001748085021973, |
|
"learning_rate": 1.1038300563999455e-05, |
|
"loss": 1.2846, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4836906818412763, |
|
"grad_norm": 2.6942710876464844, |
|
"learning_rate": 1.1012105774481446e-05, |
|
"loss": 1.2864, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4845036073569759, |
|
"grad_norm": 2.5104377269744873, |
|
"learning_rate": 1.0985903966460115e-05, |
|
"loss": 1.256, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.48531653287267557, |
|
"grad_norm": 2.4864704608917236, |
|
"learning_rate": 1.0959695321633346e-05, |
|
"loss": 1.2838, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.4861294583883752, |
|
"grad_norm": 3.2645606994628906, |
|
"learning_rate": 1.0933480021746432e-05, |
|
"loss": 1.2966, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.4869423839040748, |
|
"grad_norm": 28.041383743286133, |
|
"learning_rate": 1.0907258248590816e-05, |
|
"loss": 1.2513, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4877553094197744, |
|
"grad_norm": 2.736785888671875, |
|
"learning_rate": 1.0881030184002827e-05, |
|
"loss": 1.3217, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.488568234935474, |
|
"grad_norm": 4.294330596923828, |
|
"learning_rate": 1.0854796009862434e-05, |
|
"loss": 1.3007, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.48938116045117364, |
|
"grad_norm": 2.629371404647827, |
|
"learning_rate": 1.0828555908091958e-05, |
|
"loss": 1.2884, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4901940859668733, |
|
"grad_norm": 3.166304588317871, |
|
"learning_rate": 1.0802310060654832e-05, |
|
"loss": 1.3127, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4910070114825729, |
|
"grad_norm": 2.5344200134277344, |
|
"learning_rate": 1.0776058649554336e-05, |
|
"loss": 1.249, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.49181993699827253, |
|
"grad_norm": 3.2902913093566895, |
|
"learning_rate": 1.0749801856832325e-05, |
|
"loss": 1.2341, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.49263286251397215, |
|
"grad_norm": 2.5863964557647705, |
|
"learning_rate": 1.0723539864567983e-05, |
|
"loss": 1.3534, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.49344578802967176, |
|
"grad_norm": 3.1407294273376465, |
|
"learning_rate": 1.0697272854876537e-05, |
|
"loss": 1.2452, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.49425871354537143, |
|
"grad_norm": 2.339702844619751, |
|
"learning_rate": 1.0671001009908015e-05, |
|
"loss": 1.2597, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.49507163906107104, |
|
"grad_norm": 2.5861027240753174, |
|
"learning_rate": 1.0644724511845976e-05, |
|
"loss": 1.304, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.49588456457677066, |
|
"grad_norm": 2.6124143600463867, |
|
"learning_rate": 1.0618443542906251e-05, |
|
"loss": 1.2333, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4966974900924703, |
|
"grad_norm": 2.53468918800354, |
|
"learning_rate": 1.059215828533566e-05, |
|
"loss": 1.2587, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4975104156081699, |
|
"grad_norm": 5.205654621124268, |
|
"learning_rate": 1.0565868921410776e-05, |
|
"loss": 1.2758, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4983233411238695, |
|
"grad_norm": 3.3307433128356934, |
|
"learning_rate": 1.0539575633436645e-05, |
|
"loss": 1.3197, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.49913626663956917, |
|
"grad_norm": 2.4654664993286133, |
|
"learning_rate": 1.0513278603745523e-05, |
|
"loss": 1.2733, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.4999491921552688, |
|
"grad_norm": 2.5150272846221924, |
|
"learning_rate": 1.0486978014695606e-05, |
|
"loss": 1.2841, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5007621176709683, |
|
"grad_norm": 2.660186767578125, |
|
"learning_rate": 1.0460674048669783e-05, |
|
"loss": 1.3007, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.501575043186668, |
|
"grad_norm": 2.7415716648101807, |
|
"learning_rate": 1.0434366888074363e-05, |
|
"loss": 1.2974, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5023879687023677, |
|
"grad_norm": 2.479142427444458, |
|
"learning_rate": 1.0408056715337797e-05, |
|
"loss": 1.301, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5032008942180672, |
|
"grad_norm": 2.4590210914611816, |
|
"learning_rate": 1.0381743712909424e-05, |
|
"loss": 1.2253, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5040138197337669, |
|
"grad_norm": 2.4704954624176025, |
|
"learning_rate": 1.0355428063258224e-05, |
|
"loss": 1.1927, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5048267452494665, |
|
"grad_norm": 3.5037641525268555, |
|
"learning_rate": 1.0329109948871512e-05, |
|
"loss": 1.2727, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5056396707651661, |
|
"grad_norm": 2.6537327766418457, |
|
"learning_rate": 1.0302789552253702e-05, |
|
"loss": 1.2295, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5064525962808658, |
|
"grad_norm": 3.4443886280059814, |
|
"learning_rate": 1.0276467055925044e-05, |
|
"loss": 1.2403, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5072655217965654, |
|
"grad_norm": 4.377493858337402, |
|
"learning_rate": 1.0250142642420335e-05, |
|
"loss": 1.2667, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.508078447312265, |
|
"grad_norm": 2.712472677230835, |
|
"learning_rate": 1.0223816494287675e-05, |
|
"loss": 1.3323, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5088913728279646, |
|
"grad_norm": 2.922093152999878, |
|
"learning_rate": 1.0197488794087188e-05, |
|
"loss": 1.2713, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5097042983436643, |
|
"grad_norm": 11.951809883117676, |
|
"learning_rate": 1.0171159724389766e-05, |
|
"loss": 1.2997, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5105172238593639, |
|
"grad_norm": 2.5700554847717285, |
|
"learning_rate": 1.0144829467775794e-05, |
|
"loss": 1.261, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5113301493750635, |
|
"grad_norm": 2.6800413131713867, |
|
"learning_rate": 1.0118498206833886e-05, |
|
"loss": 1.3292, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5121430748907632, |
|
"grad_norm": 4.24453592300415, |
|
"learning_rate": 1.0092166124159628e-05, |
|
"loss": 1.3281, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5129560004064627, |
|
"grad_norm": 2.7513749599456787, |
|
"learning_rate": 1.0065833402354302e-05, |
|
"loss": 1.2944, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5137689259221624, |
|
"grad_norm": 2.610588788986206, |
|
"learning_rate": 1.003950022402361e-05, |
|
"loss": 1.3129, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5145818514378621, |
|
"grad_norm": 2.949564218521118, |
|
"learning_rate": 1.0013166771776441e-05, |
|
"loss": 1.2961, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5153947769535616, |
|
"grad_norm": 2.5617198944091797, |
|
"learning_rate": 9.986833228223562e-06, |
|
"loss": 1.2898, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5162077024692613, |
|
"grad_norm": 2.779733896255493, |
|
"learning_rate": 9.96049977597639e-06, |
|
"loss": 1.2988, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5170206279849608, |
|
"grad_norm": 2.8505136966705322, |
|
"learning_rate": 9.934166597645703e-06, |
|
"loss": 1.2652, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5178335535006605, |
|
"grad_norm": 2.847262144088745, |
|
"learning_rate": 9.907833875840374e-06, |
|
"loss": 1.3076, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5186464790163601, |
|
"grad_norm": 4.957255840301514, |
|
"learning_rate": 9.881501793166117e-06, |
|
"loss": 1.214, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5194594045320597, |
|
"grad_norm": 2.7829556465148926, |
|
"learning_rate": 9.85517053222421e-06, |
|
"loss": 1.2379, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5202723300477594, |
|
"grad_norm": 2.7060935497283936, |
|
"learning_rate": 9.82884027561024e-06, |
|
"loss": 1.3016, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.521085255563459, |
|
"grad_norm": 6.336554527282715, |
|
"learning_rate": 9.802511205912815e-06, |
|
"loss": 1.269, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5218981810791586, |
|
"grad_norm": 3.0378448963165283, |
|
"learning_rate": 9.776183505712327e-06, |
|
"loss": 1.317, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5227111065948582, |
|
"grad_norm": 5.806065082550049, |
|
"learning_rate": 9.749857357579667e-06, |
|
"loss": 1.3165, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5235240321105579, |
|
"grad_norm": 2.7738869190216064, |
|
"learning_rate": 9.723532944074961e-06, |
|
"loss": 1.2835, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5243369576262575, |
|
"grad_norm": 2.6603453159332275, |
|
"learning_rate": 9.6972104477463e-06, |
|
"loss": 1.2673, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5251498831419571, |
|
"grad_norm": 2.9316189289093018, |
|
"learning_rate": 9.670890051128493e-06, |
|
"loss": 1.249, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5259628086576568, |
|
"grad_norm": 2.8541407585144043, |
|
"learning_rate": 9.644571936741778e-06, |
|
"loss": 1.2835, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5267757341733563, |
|
"grad_norm": 2.6935575008392334, |
|
"learning_rate": 9.618256287090576e-06, |
|
"loss": 1.2859, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.527588659689056, |
|
"grad_norm": 3.057039260864258, |
|
"learning_rate": 9.591943284662206e-06, |
|
"loss": 1.2538, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5284015852047557, |
|
"grad_norm": 3.2430379390716553, |
|
"learning_rate": 9.56563311192564e-06, |
|
"loss": 1.294, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5292145107204552, |
|
"grad_norm": 2.378072500228882, |
|
"learning_rate": 9.53932595133022e-06, |
|
"loss": 1.2793, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5300274362361549, |
|
"grad_norm": 3.2185440063476562, |
|
"learning_rate": 9.513021985304399e-06, |
|
"loss": 1.2868, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5308403617518545, |
|
"grad_norm": 3.272632122039795, |
|
"learning_rate": 9.486721396254484e-06, |
|
"loss": 1.2128, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5316532872675541, |
|
"grad_norm": 3.163884401321411, |
|
"learning_rate": 9.460424366563355e-06, |
|
"loss": 1.2962, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5324662127832538, |
|
"grad_norm": 3.096857786178589, |
|
"learning_rate": 9.434131078589224e-06, |
|
"loss": 1.2575, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5332791382989533, |
|
"grad_norm": 2.711069107055664, |
|
"learning_rate": 9.407841714664343e-06, |
|
"loss": 1.2969, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.534092063814653, |
|
"grad_norm": 4.4655866622924805, |
|
"learning_rate": 9.381556457093752e-06, |
|
"loss": 1.2229, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5349049893303526, |
|
"grad_norm": 2.7365305423736572, |
|
"learning_rate": 9.355275488154025e-06, |
|
"loss": 1.285, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5357179148460522, |
|
"grad_norm": 3.4264895915985107, |
|
"learning_rate": 9.32899899009199e-06, |
|
"loss": 1.3222, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5365308403617518, |
|
"grad_norm": 2.9572296142578125, |
|
"learning_rate": 9.30272714512347e-06, |
|
"loss": 1.2771, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5373437658774515, |
|
"grad_norm": 3.124464988708496, |
|
"learning_rate": 9.276460135432019e-06, |
|
"loss": 1.2362, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5381566913931511, |
|
"grad_norm": 3.484861373901367, |
|
"learning_rate": 9.250198143167675e-06, |
|
"loss": 1.2624, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5389696169088507, |
|
"grad_norm": 3.191455602645874, |
|
"learning_rate": 9.223941350445666e-06, |
|
"loss": 1.3271, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5397825424245504, |
|
"grad_norm": 3.055478572845459, |
|
"learning_rate": 9.19768993934517e-06, |
|
"loss": 1.2476, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5405954679402499, |
|
"grad_norm": 2.8661985397338867, |
|
"learning_rate": 9.171444091908046e-06, |
|
"loss": 1.2575, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5414083934559496, |
|
"grad_norm": 3.042300224304199, |
|
"learning_rate": 9.145203990137571e-06, |
|
"loss": 1.2472, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5422213189716493, |
|
"grad_norm": 3.324767827987671, |
|
"learning_rate": 9.118969815997174e-06, |
|
"loss": 1.2608, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5430342444873488, |
|
"grad_norm": 2.8374948501586914, |
|
"learning_rate": 9.092741751409186e-06, |
|
"loss": 1.2865, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5438471700030485, |
|
"grad_norm": 3.3593552112579346, |
|
"learning_rate": 9.06651997825357e-06, |
|
"loss": 1.2746, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.544660095518748, |
|
"grad_norm": 3.2432382106781006, |
|
"learning_rate": 9.040304678366658e-06, |
|
"loss": 1.2864, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5454730210344477, |
|
"grad_norm": 2.890409469604492, |
|
"learning_rate": 9.014096033539889e-06, |
|
"loss": 1.2685, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5462859465501474, |
|
"grad_norm": 3.0769150257110596, |
|
"learning_rate": 8.987894225518556e-06, |
|
"loss": 1.2701, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.547098872065847, |
|
"grad_norm": 3.453287363052368, |
|
"learning_rate": 8.961699436000548e-06, |
|
"loss": 1.2218, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5479117975815466, |
|
"grad_norm": 3.1950011253356934, |
|
"learning_rate": 8.93551184663507e-06, |
|
"loss": 1.2267, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5487247230972462, |
|
"grad_norm": 3.445006847381592, |
|
"learning_rate": 8.909331639021414e-06, |
|
"loss": 1.283, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5495376486129459, |
|
"grad_norm": 2.5453741550445557, |
|
"learning_rate": 8.883158994707666e-06, |
|
"loss": 1.3102, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5503505741286454, |
|
"grad_norm": 4.167499542236328, |
|
"learning_rate": 8.856994095189477e-06, |
|
"loss": 1.2881, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5511634996443451, |
|
"grad_norm": 2.6888363361358643, |
|
"learning_rate": 8.830837121908783e-06, |
|
"loss": 1.2332, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5519764251600447, |
|
"grad_norm": 2.9484667778015137, |
|
"learning_rate": 8.804688256252557e-06, |
|
"loss": 1.2676, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5527893506757443, |
|
"grad_norm": 2.5477519035339355, |
|
"learning_rate": 8.778547679551555e-06, |
|
"loss": 1.2956, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.553602276191444, |
|
"grad_norm": 2.3307385444641113, |
|
"learning_rate": 8.75241557307904e-06, |
|
"loss": 1.3021, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5544152017071435, |
|
"grad_norm": 3.1104202270507812, |
|
"learning_rate": 8.726292118049555e-06, |
|
"loss": 1.2861, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5552281272228432, |
|
"grad_norm": 3.2731287479400635, |
|
"learning_rate": 8.700177495617635e-06, |
|
"loss": 1.33, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5560410527385429, |
|
"grad_norm": 2.923478364944458, |
|
"learning_rate": 8.674071886876572e-06, |
|
"loss": 1.2946, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5568539782542424, |
|
"grad_norm": 3.1030538082122803, |
|
"learning_rate": 8.647975472857148e-06, |
|
"loss": 1.2481, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5576669037699421, |
|
"grad_norm": 2.6904759407043457, |
|
"learning_rate": 8.621888434526382e-06, |
|
"loss": 1.2637, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5584798292856417, |
|
"grad_norm": 3.6781442165374756, |
|
"learning_rate": 8.595810952786289e-06, |
|
"loss": 1.2875, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5592927548013413, |
|
"grad_norm": 4.897818565368652, |
|
"learning_rate": 8.569743208472594e-06, |
|
"loss": 1.2804, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.560105680317041, |
|
"grad_norm": 2.9090828895568848, |
|
"learning_rate": 8.543685382353518e-06, |
|
"loss": 1.2817, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5609186058327406, |
|
"grad_norm": 3.3284378051757812, |
|
"learning_rate": 8.51763765512849e-06, |
|
"loss": 1.2928, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5617315313484402, |
|
"grad_norm": 3.440209150314331, |
|
"learning_rate": 8.491600207426907e-06, |
|
"loss": 1.2667, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5625444568641398, |
|
"grad_norm": 3.1297762393951416, |
|
"learning_rate": 8.465573219806893e-06, |
|
"loss": 1.2752, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5633573823798395, |
|
"grad_norm": 3.460277795791626, |
|
"learning_rate": 8.439556872754025e-06, |
|
"loss": 1.2611, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5641703078955391, |
|
"grad_norm": 2.6390557289123535, |
|
"learning_rate": 8.413551346680095e-06, |
|
"loss": 1.2339, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5649832334112387, |
|
"grad_norm": 2.365945339202881, |
|
"learning_rate": 8.38755682192186e-06, |
|
"loss": 1.2333, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5657961589269384, |
|
"grad_norm": 3.140129804611206, |
|
"learning_rate": 8.36157347873979e-06, |
|
"loss": 1.2614, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5666090844426379, |
|
"grad_norm": 4.027166366577148, |
|
"learning_rate": 8.335601497316809e-06, |
|
"loss": 1.263, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5674220099583376, |
|
"grad_norm": 2.6872942447662354, |
|
"learning_rate": 8.309641057757052e-06, |
|
"loss": 1.2479, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5682349354740371, |
|
"grad_norm": 2.575493574142456, |
|
"learning_rate": 8.283692340084623e-06, |
|
"loss": 1.2818, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5690478609897368, |
|
"grad_norm": 2.6429176330566406, |
|
"learning_rate": 8.257755524242333e-06, |
|
"loss": 1.2921, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5698607865054365, |
|
"grad_norm": 4.695654392242432, |
|
"learning_rate": 8.231830790090461e-06, |
|
"loss": 1.2046, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.570673712021136, |
|
"grad_norm": 2.4642715454101562, |
|
"learning_rate": 8.205918317405508e-06, |
|
"loss": 1.3013, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5714866375368357, |
|
"grad_norm": 2.567474842071533, |
|
"learning_rate": 8.18001828587895e-06, |
|
"loss": 1.3458, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5722995630525353, |
|
"grad_norm": 2.934668779373169, |
|
"learning_rate": 8.154130875115978e-06, |
|
"loss": 1.2804, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5731124885682349, |
|
"grad_norm": 2.669285297393799, |
|
"learning_rate": 8.12825626463427e-06, |
|
"loss": 1.2329, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5739254140839346, |
|
"grad_norm": 2.7390220165252686, |
|
"learning_rate": 8.102394633862743e-06, |
|
"loss": 1.2177, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5747383395996342, |
|
"grad_norm": 3.19964861869812, |
|
"learning_rate": 8.0765461621403e-06, |
|
"loss": 1.2625, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5755512651153338, |
|
"grad_norm": 2.753469705581665, |
|
"learning_rate": 8.050711028714589e-06, |
|
"loss": 1.2357, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5763641906310334, |
|
"grad_norm": 3.3288702964782715, |
|
"learning_rate": 8.02488941274078e-06, |
|
"loss": 1.217, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5771771161467331, |
|
"grad_norm": 2.808100700378418, |
|
"learning_rate": 7.999081493280283e-06, |
|
"loss": 1.3156, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5779900416624327, |
|
"grad_norm": 2.8736870288848877, |
|
"learning_rate": 7.973287449299545e-06, |
|
"loss": 1.3122, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5788029671781323, |
|
"grad_norm": 6.863023281097412, |
|
"learning_rate": 7.947507459668784e-06, |
|
"loss": 1.2218, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.579615892693832, |
|
"grad_norm": 4.454842567443848, |
|
"learning_rate": 7.921741703160758e-06, |
|
"loss": 1.1918, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5804288182095315, |
|
"grad_norm": 2.4465959072113037, |
|
"learning_rate": 7.895990358449533e-06, |
|
"loss": 1.2705, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5812417437252312, |
|
"grad_norm": 3.3625428676605225, |
|
"learning_rate": 7.87025360410922e-06, |
|
"loss": 1.2644, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5820546692409307, |
|
"grad_norm": 2.846947431564331, |
|
"learning_rate": 7.844531618612772e-06, |
|
"loss": 1.2612, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5828675947566304, |
|
"grad_norm": 3.332118034362793, |
|
"learning_rate": 7.81882458033071e-06, |
|
"loss": 1.2597, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5836805202723301, |
|
"grad_norm": 2.646106719970703, |
|
"learning_rate": 7.79313266752991e-06, |
|
"loss": 1.2613, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5844934457880296, |
|
"grad_norm": 2.8592135906219482, |
|
"learning_rate": 7.767456058372362e-06, |
|
"loss": 1.282, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5853063713037293, |
|
"grad_norm": 2.748481035232544, |
|
"learning_rate": 7.741794930913922e-06, |
|
"loss": 1.2869, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5861192968194289, |
|
"grad_norm": 2.8134074211120605, |
|
"learning_rate": 7.7161494631031e-06, |
|
"loss": 1.3079, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5869322223351285, |
|
"grad_norm": 3.059119939804077, |
|
"learning_rate": 7.690519832779799e-06, |
|
"loss": 1.2705, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5877451478508282, |
|
"grad_norm": 2.6439130306243896, |
|
"learning_rate": 7.664906217674115e-06, |
|
"loss": 1.2413, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5885580733665278, |
|
"grad_norm": 2.812056303024292, |
|
"learning_rate": 7.639308795405066e-06, |
|
"loss": 1.2543, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5893709988822274, |
|
"grad_norm": 3.2603330612182617, |
|
"learning_rate": 7.613727743479395e-06, |
|
"loss": 1.2442, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.590183924397927, |
|
"grad_norm": 2.544433116912842, |
|
"learning_rate": 7.588163239290316e-06, |
|
"loss": 1.3034, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5909968499136267, |
|
"grad_norm": 4.0246262550354, |
|
"learning_rate": 7.562615460116289e-06, |
|
"loss": 1.3188, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5918097754293263, |
|
"grad_norm": 4.249239444732666, |
|
"learning_rate": 7.537084583119802e-06, |
|
"loss": 1.3091, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5926227009450259, |
|
"grad_norm": 2.7686362266540527, |
|
"learning_rate": 7.511570785346129e-06, |
|
"loss": 1.2449, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5934356264607256, |
|
"grad_norm": 2.8529245853424072, |
|
"learning_rate": 7.486074243722109e-06, |
|
"loss": 1.2392, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5942485519764251, |
|
"grad_norm": 3.073486328125, |
|
"learning_rate": 7.460595135054916e-06, |
|
"loss": 1.2848, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5950614774921248, |
|
"grad_norm": 3.365366220474243, |
|
"learning_rate": 7.435133636030831e-06, |
|
"loss": 1.2912, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5958744030078245, |
|
"grad_norm": 2.4938106536865234, |
|
"learning_rate": 7.4096899232140295e-06, |
|
"loss": 1.2965, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.596687328523524, |
|
"grad_norm": 2.9927473068237305, |
|
"learning_rate": 7.384264173045339e-06, |
|
"loss": 1.2748, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5975002540392237, |
|
"grad_norm": 7.3427205085754395, |
|
"learning_rate": 7.358856561841021e-06, |
|
"loss": 1.2457, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5983131795549232, |
|
"grad_norm": 3.274311065673828, |
|
"learning_rate": 7.333467265791563e-06, |
|
"loss": 1.2225, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5991261050706229, |
|
"grad_norm": 4.503856658935547, |
|
"learning_rate": 7.308096460960441e-06, |
|
"loss": 1.2603, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5999390305863225, |
|
"grad_norm": 3.6017913818359375, |
|
"learning_rate": 7.282744323282895e-06, |
|
"loss": 1.2278, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6007519561020221, |
|
"grad_norm": 3.0930585861206055, |
|
"learning_rate": 7.2574110285647244e-06, |
|
"loss": 1.2649, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6015648816177218, |
|
"grad_norm": 2.6793737411499023, |
|
"learning_rate": 7.232096752481061e-06, |
|
"loss": 1.215, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6023778071334214, |
|
"grad_norm": 3.0066819190979004, |
|
"learning_rate": 7.206801670575145e-06, |
|
"loss": 1.2953, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.603190732649121, |
|
"grad_norm": 3.2586004734039307, |
|
"learning_rate": 7.181525958257116e-06, |
|
"loss": 1.1988, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.6040036581648206, |
|
"grad_norm": 3.186267375946045, |
|
"learning_rate": 7.156269790802801e-06, |
|
"loss": 1.2425, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.6048165836805203, |
|
"grad_norm": 3.919509172439575, |
|
"learning_rate": 7.131033343352483e-06, |
|
"loss": 1.3432, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.6056295091962199, |
|
"grad_norm": 3.8313186168670654, |
|
"learning_rate": 7.105816790909699e-06, |
|
"loss": 1.2491, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6064424347119195, |
|
"grad_norm": 2.7689011096954346, |
|
"learning_rate": 7.080620308340024e-06, |
|
"loss": 1.2673, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6072553602276192, |
|
"grad_norm": 4.105691909790039, |
|
"learning_rate": 7.055444070369852e-06, |
|
"loss": 1.2688, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.6080682857433187, |
|
"grad_norm": 3.336580276489258, |
|
"learning_rate": 7.0302882515852025e-06, |
|
"loss": 1.2613, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.6088812112590184, |
|
"grad_norm": 3.7272021770477295, |
|
"learning_rate": 7.005153026430476e-06, |
|
"loss": 1.1882, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6096941367747181, |
|
"grad_norm": 4.220558166503906, |
|
"learning_rate": 6.980038569207291e-06, |
|
"loss": 1.1853, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6105070622904176, |
|
"grad_norm": 2.8943638801574707, |
|
"learning_rate": 6.954945054073228e-06, |
|
"loss": 1.2408, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6113199878061173, |
|
"grad_norm": 2.740449905395508, |
|
"learning_rate": 6.929872655040655e-06, |
|
"loss": 1.2233, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6121329133218169, |
|
"grad_norm": 3.1293320655822754, |
|
"learning_rate": 6.904821545975507e-06, |
|
"loss": 1.2362, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6129458388375165, |
|
"grad_norm": 2.9130334854125977, |
|
"learning_rate": 6.879791900596077e-06, |
|
"loss": 1.2525, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6137587643532162, |
|
"grad_norm": 2.6800663471221924, |
|
"learning_rate": 6.854783892471823e-06, |
|
"loss": 1.2811, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6145716898689157, |
|
"grad_norm": 2.7140908241271973, |
|
"learning_rate": 6.829797695022163e-06, |
|
"loss": 1.2693, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 2.687870740890503, |
|
"learning_rate": 6.804833481515256e-06, |
|
"loss": 1.2124, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.616197540900315, |
|
"grad_norm": 3.170487880706787, |
|
"learning_rate": 6.7798914250668154e-06, |
|
"loss": 1.2373, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6170104664160146, |
|
"grad_norm": 2.6142961978912354, |
|
"learning_rate": 6.7549716986389146e-06, |
|
"loss": 1.2527, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6178233919317142, |
|
"grad_norm": 3.4092085361480713, |
|
"learning_rate": 6.730074475038766e-06, |
|
"loss": 1.2401, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6186363174474139, |
|
"grad_norm": 3.256838083267212, |
|
"learning_rate": 6.7051999269175405e-06, |
|
"loss": 1.1863, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6194492429631135, |
|
"grad_norm": 2.8312947750091553, |
|
"learning_rate": 6.680348226769162e-06, |
|
"loss": 1.241, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6202621684788131, |
|
"grad_norm": 2.799750804901123, |
|
"learning_rate": 6.655519546929121e-06, |
|
"loss": 1.2601, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6210750939945128, |
|
"grad_norm": 3.188913106918335, |
|
"learning_rate": 6.630714059573267e-06, |
|
"loss": 1.2719, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6218880195102123, |
|
"grad_norm": 5.547321796417236, |
|
"learning_rate": 6.6059319367166165e-06, |
|
"loss": 1.2307, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.622700945025912, |
|
"grad_norm": 3.2380361557006836, |
|
"learning_rate": 6.581173350212169e-06, |
|
"loss": 1.2125, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6235138705416117, |
|
"grad_norm": 2.61883282661438, |
|
"learning_rate": 6.55643847174971e-06, |
|
"loss": 1.2556, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6243267960573112, |
|
"grad_norm": 3.0079920291900635, |
|
"learning_rate": 6.531727472854617e-06, |
|
"loss": 1.2761, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6251397215730109, |
|
"grad_norm": 3.995910882949829, |
|
"learning_rate": 6.507040524886672e-06, |
|
"loss": 1.302, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6259526470887105, |
|
"grad_norm": 2.7787578105926514, |
|
"learning_rate": 6.482377799038882e-06, |
|
"loss": 1.2249, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6267655726044101, |
|
"grad_norm": 3.6458895206451416, |
|
"learning_rate": 6.45773946633628e-06, |
|
"loss": 1.2833, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6275784981201098, |
|
"grad_norm": 2.9308435916900635, |
|
"learning_rate": 6.4331256976347434e-06, |
|
"loss": 1.309, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6283914236358094, |
|
"grad_norm": 3.7917234897613525, |
|
"learning_rate": 6.408536663619803e-06, |
|
"loss": 1.2996, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.629204349151509, |
|
"grad_norm": 8.85531234741211, |
|
"learning_rate": 6.383972534805478e-06, |
|
"loss": 1.2499, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6300172746672086, |
|
"grad_norm": 4.16661262512207, |
|
"learning_rate": 6.359433481533074e-06, |
|
"loss": 1.1928, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6308302001829083, |
|
"grad_norm": 3.6679298877716064, |
|
"learning_rate": 6.3349196739700024e-06, |
|
"loss": 1.2917, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6316431256986078, |
|
"grad_norm": 3.2031593322753906, |
|
"learning_rate": 6.310431282108622e-06, |
|
"loss": 1.2926, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6324560512143075, |
|
"grad_norm": 2.7538363933563232, |
|
"learning_rate": 6.2859684757650365e-06, |
|
"loss": 1.2634, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6332689767300071, |
|
"grad_norm": 3.4906575679779053, |
|
"learning_rate": 6.261531424577923e-06, |
|
"loss": 1.2711, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6340819022457067, |
|
"grad_norm": 3.4287617206573486, |
|
"learning_rate": 6.2371202980073596e-06, |
|
"loss": 1.2412, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6348948277614064, |
|
"grad_norm": 3.5826241970062256, |
|
"learning_rate": 6.212735265333655e-06, |
|
"loss": 1.1782, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6357077532771059, |
|
"grad_norm": 3.369983673095703, |
|
"learning_rate": 6.188376495656156e-06, |
|
"loss": 1.2628, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6365206787928056, |
|
"grad_norm": 3.6163413524627686, |
|
"learning_rate": 6.164044157892102e-06, |
|
"loss": 1.3304, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6373336043085053, |
|
"grad_norm": 2.6903252601623535, |
|
"learning_rate": 6.13973842077543e-06, |
|
"loss": 1.2458, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6381465298242048, |
|
"grad_norm": 3.919074296951294, |
|
"learning_rate": 6.11545945285561e-06, |
|
"loss": 1.253, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6389594553399045, |
|
"grad_norm": 2.9155240058898926, |
|
"learning_rate": 6.091207422496489e-06, |
|
"loss": 1.2661, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6397723808556041, |
|
"grad_norm": 3.2426347732543945, |
|
"learning_rate": 6.066982497875109e-06, |
|
"loss": 1.2556, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6405853063713037, |
|
"grad_norm": 3.078899383544922, |
|
"learning_rate": 6.042784846980542e-06, |
|
"loss": 1.2572, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6413982318870034, |
|
"grad_norm": 3.3044381141662598, |
|
"learning_rate": 6.018614637612733e-06, |
|
"loss": 1.2301, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.642211157402703, |
|
"grad_norm": 2.8474955558776855, |
|
"learning_rate": 5.99447203738134e-06, |
|
"loss": 1.2042, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6430240829184026, |
|
"grad_norm": 2.9787845611572266, |
|
"learning_rate": 5.9703572137045495e-06, |
|
"loss": 1.2608, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6438370084341022, |
|
"grad_norm": 3.380209445953369, |
|
"learning_rate": 5.946270333807937e-06, |
|
"loss": 1.2973, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6446499339498019, |
|
"grad_norm": 2.81736421585083, |
|
"learning_rate": 5.922211564723302e-06, |
|
"loss": 1.2791, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6454628594655015, |
|
"grad_norm": 2.9054102897644043, |
|
"learning_rate": 5.898181073287504e-06, |
|
"loss": 1.2692, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6462757849812011, |
|
"grad_norm": 3.2480154037475586, |
|
"learning_rate": 5.87417902614131e-06, |
|
"loss": 1.311, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6470887104969008, |
|
"grad_norm": 2.8822832107543945, |
|
"learning_rate": 5.850205589728239e-06, |
|
"loss": 1.2528, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6479016360126003, |
|
"grad_norm": 2.8832008838653564, |
|
"learning_rate": 5.826260930293417e-06, |
|
"loss": 1.2631, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6487145615283, |
|
"grad_norm": 3.547271490097046, |
|
"learning_rate": 5.802345213882396e-06, |
|
"loss": 1.2543, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6495274870439995, |
|
"grad_norm": 9.93248176574707, |
|
"learning_rate": 5.778458606340037e-06, |
|
"loss": 1.3218, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6503404125596992, |
|
"grad_norm": 4.664019584655762, |
|
"learning_rate": 5.754601273309333e-06, |
|
"loss": 1.2487, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6511533380753989, |
|
"grad_norm": 3.191390037536621, |
|
"learning_rate": 5.730773380230276e-06, |
|
"loss": 1.1966, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6519662635910984, |
|
"grad_norm": 3.228309392929077, |
|
"learning_rate": 5.70697509233871e-06, |
|
"loss": 1.2556, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6527791891067981, |
|
"grad_norm": 3.1456098556518555, |
|
"learning_rate": 5.683206574665165e-06, |
|
"loss": 1.2308, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6535921146224977, |
|
"grad_norm": 2.800039052963257, |
|
"learning_rate": 5.6594679920337514e-06, |
|
"loss": 1.2599, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6544050401381973, |
|
"grad_norm": 2.9048550128936768, |
|
"learning_rate": 5.635759509060969e-06, |
|
"loss": 1.2707, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.655217965653897, |
|
"grad_norm": 4.015383720397949, |
|
"learning_rate": 5.612081290154607e-06, |
|
"loss": 1.1853, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6560308911695966, |
|
"grad_norm": 2.6166458129882812, |
|
"learning_rate": 5.58843349951258e-06, |
|
"loss": 1.2589, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6568438166852962, |
|
"grad_norm": 4.735121726989746, |
|
"learning_rate": 5.564816301121792e-06, |
|
"loss": 1.2395, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6576567422009958, |
|
"grad_norm": 3.5069589614868164, |
|
"learning_rate": 5.541229858757011e-06, |
|
"loss": 1.2888, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.6584696677166955, |
|
"grad_norm": 2.354539394378662, |
|
"learning_rate": 5.517674335979721e-06, |
|
"loss": 1.1898, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6592825932323951, |
|
"grad_norm": 3.2337725162506104, |
|
"learning_rate": 5.494149896136998e-06, |
|
"loss": 1.311, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6600955187480947, |
|
"grad_norm": 2.6511757373809814, |
|
"learning_rate": 5.470656702360367e-06, |
|
"loss": 1.2788, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6609084442637944, |
|
"grad_norm": 3.772780179977417, |
|
"learning_rate": 5.447194917564671e-06, |
|
"loss": 1.2211, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6617213697794939, |
|
"grad_norm": 2.540316581726074, |
|
"learning_rate": 5.423764704446954e-06, |
|
"loss": 1.2647, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6625342952951936, |
|
"grad_norm": 2.792747735977173, |
|
"learning_rate": 5.400366225485326e-06, |
|
"loss": 1.2184, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6633472208108933, |
|
"grad_norm": 3.32261061668396, |
|
"learning_rate": 5.376999642937817e-06, |
|
"loss": 1.2727, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6641601463265928, |
|
"grad_norm": 4.128072738647461, |
|
"learning_rate": 5.353665118841296e-06, |
|
"loss": 1.2718, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6649730718422925, |
|
"grad_norm": 2.9913909435272217, |
|
"learning_rate": 5.330362815010306e-06, |
|
"loss": 1.2698, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.665785997357992, |
|
"grad_norm": 2.9993457794189453, |
|
"learning_rate": 5.307092893035951e-06, |
|
"loss": 1.2447, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6665989228736917, |
|
"grad_norm": 2.801236629486084, |
|
"learning_rate": 5.2838555142847925e-06, |
|
"loss": 1.209, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6674118483893913, |
|
"grad_norm": 3.982821464538574, |
|
"learning_rate": 5.260650839897719e-06, |
|
"loss": 1.3099, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6682247739050909, |
|
"grad_norm": 2.9553382396698, |
|
"learning_rate": 5.237479030788817e-06, |
|
"loss": 1.2652, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6690376994207906, |
|
"grad_norm": 3.233414888381958, |
|
"learning_rate": 5.214340247644278e-06, |
|
"loss": 1.2256, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6698506249364902, |
|
"grad_norm": 3.1418299674987793, |
|
"learning_rate": 5.191234650921273e-06, |
|
"loss": 1.2225, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6706635504521898, |
|
"grad_norm": 2.8071773052215576, |
|
"learning_rate": 5.168162400846835e-06, |
|
"loss": 1.3381, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6714764759678894, |
|
"grad_norm": 3.2606897354125977, |
|
"learning_rate": 5.145123657416759e-06, |
|
"loss": 1.2671, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6722894014835891, |
|
"grad_norm": 2.5103461742401123, |
|
"learning_rate": 5.122118580394473e-06, |
|
"loss": 1.2349, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6731023269992887, |
|
"grad_norm": 2.882448196411133, |
|
"learning_rate": 5.099147329309959e-06, |
|
"loss": 1.2466, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6739152525149883, |
|
"grad_norm": 3.0320730209350586, |
|
"learning_rate": 5.076210063458622e-06, |
|
"loss": 1.2157, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.674728178030688, |
|
"grad_norm": 3.285125970840454, |
|
"learning_rate": 5.0533069419002e-06, |
|
"loss": 1.3087, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6755411035463875, |
|
"grad_norm": 3.9807510375976562, |
|
"learning_rate": 5.030438123457655e-06, |
|
"loss": 1.2153, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6763540290620872, |
|
"grad_norm": 3.12975811958313, |
|
"learning_rate": 5.007603766716063e-06, |
|
"loss": 1.2064, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6771669545777869, |
|
"grad_norm": 2.9132258892059326, |
|
"learning_rate": 4.984804030021533e-06, |
|
"loss": 1.2132, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6779798800934864, |
|
"grad_norm": 2.872042417526245, |
|
"learning_rate": 4.962039071480102e-06, |
|
"loss": 1.2618, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6787928056091861, |
|
"grad_norm": 3.7190613746643066, |
|
"learning_rate": 4.939309048956622e-06, |
|
"loss": 1.2482, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6796057311248856, |
|
"grad_norm": 5.171625137329102, |
|
"learning_rate": 4.9166141200736885e-06, |
|
"loss": 1.2848, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6804186566405853, |
|
"grad_norm": 3.5912961959838867, |
|
"learning_rate": 4.89395444221055e-06, |
|
"loss": 1.2525, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6812315821562849, |
|
"grad_norm": 3.9113729000091553, |
|
"learning_rate": 4.871330172501979e-06, |
|
"loss": 1.2444, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6820445076719845, |
|
"grad_norm": 5.135432720184326, |
|
"learning_rate": 4.848741467837228e-06, |
|
"loss": 1.2189, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6828574331876842, |
|
"grad_norm": 3.0934841632843018, |
|
"learning_rate": 4.826188484858918e-06, |
|
"loss": 1.2357, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6836703587033838, |
|
"grad_norm": 3.951188325881958, |
|
"learning_rate": 4.803671379961945e-06, |
|
"loss": 1.2539, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6844832842190834, |
|
"grad_norm": 6.205260753631592, |
|
"learning_rate": 4.781190309292421e-06, |
|
"loss": 1.2537, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.685296209734783, |
|
"grad_norm": 4.493546485900879, |
|
"learning_rate": 4.758745428746569e-06, |
|
"loss": 1.252, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6861091352504827, |
|
"grad_norm": 4.0202436447143555, |
|
"learning_rate": 4.736336893969652e-06, |
|
"loss": 1.1887, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6869220607661823, |
|
"grad_norm": 2.65285587310791, |
|
"learning_rate": 4.7139648603548925e-06, |
|
"loss": 1.2612, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6877349862818819, |
|
"grad_norm": 3.629551410675049, |
|
"learning_rate": 4.691629483042387e-06, |
|
"loss": 1.2411, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6885479117975816, |
|
"grad_norm": 3.20709228515625, |
|
"learning_rate": 4.669330916918043e-06, |
|
"loss": 1.1949, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6893608373132811, |
|
"grad_norm": 3.19427752494812, |
|
"learning_rate": 4.647069316612502e-06, |
|
"loss": 1.2134, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6901737628289808, |
|
"grad_norm": 3.6364243030548096, |
|
"learning_rate": 4.624844836500052e-06, |
|
"loss": 1.2915, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6909866883446805, |
|
"grad_norm": 3.5689237117767334, |
|
"learning_rate": 4.60265763069758e-06, |
|
"loss": 1.2234, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69179961386038, |
|
"grad_norm": 3.1175014972686768, |
|
"learning_rate": 4.580507853063487e-06, |
|
"loss": 1.1833, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6926125393760797, |
|
"grad_norm": 2.945756196975708, |
|
"learning_rate": 4.5583956571966295e-06, |
|
"loss": 1.2231, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6934254648917793, |
|
"grad_norm": 4.729986667633057, |
|
"learning_rate": 4.5363211964352524e-06, |
|
"loss": 1.2578, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6942383904074789, |
|
"grad_norm": 2.7775003910064697, |
|
"learning_rate": 4.514284623855915e-06, |
|
"loss": 1.2678, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6950513159231786, |
|
"grad_norm": 4.027686595916748, |
|
"learning_rate": 4.4922860922724466e-06, |
|
"loss": 1.1692, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6958642414388782, |
|
"grad_norm": 3.3442118167877197, |
|
"learning_rate": 4.470325754234881e-06, |
|
"loss": 1.2515, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6966771669545778, |
|
"grad_norm": 3.197281837463379, |
|
"learning_rate": 4.448403762028391e-06, |
|
"loss": 1.2789, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6974900924702774, |
|
"grad_norm": 3.1467063426971436, |
|
"learning_rate": 4.426520267672244e-06, |
|
"loss": 1.2498, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.698303017985977, |
|
"grad_norm": 8.657835960388184, |
|
"learning_rate": 4.40467542291874e-06, |
|
"loss": 1.2149, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6991159435016766, |
|
"grad_norm": 5.045658111572266, |
|
"learning_rate": 4.382869379252152e-06, |
|
"loss": 1.2143, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6999288690173763, |
|
"grad_norm": 3.543026924133301, |
|
"learning_rate": 4.361102287887698e-06, |
|
"loss": 1.2727, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.700741794533076, |
|
"grad_norm": 3.2592012882232666, |
|
"learning_rate": 4.339374299770477e-06, |
|
"loss": 1.2528, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.7015547200487755, |
|
"grad_norm": 3.284749984741211, |
|
"learning_rate": 4.31768556557441e-06, |
|
"loss": 1.1814, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.7023676455644752, |
|
"grad_norm": 2.9172427654266357, |
|
"learning_rate": 4.296036235701235e-06, |
|
"loss": 1.2536, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.7031805710801747, |
|
"grad_norm": 8.07040023803711, |
|
"learning_rate": 4.274426460279412e-06, |
|
"loss": 1.2113, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7039934965958744, |
|
"grad_norm": 3.0349769592285156, |
|
"learning_rate": 4.252856389163128e-06, |
|
"loss": 1.2279, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.7048064221115741, |
|
"grad_norm": 2.7983269691467285, |
|
"learning_rate": 4.231326171931231e-06, |
|
"loss": 1.2585, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.7056193476272736, |
|
"grad_norm": 3.153099775314331, |
|
"learning_rate": 4.209835957886196e-06, |
|
"loss": 1.2576, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.7064322731429733, |
|
"grad_norm": 3.4303712844848633, |
|
"learning_rate": 4.188385896053098e-06, |
|
"loss": 1.2569, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.7072451986586729, |
|
"grad_norm": 3.310842990875244, |
|
"learning_rate": 4.166976135178575e-06, |
|
"loss": 1.2162, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7080581241743725, |
|
"grad_norm": 3.982365846633911, |
|
"learning_rate": 4.1456068237297964e-06, |
|
"loss": 1.2409, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.7088710496900722, |
|
"grad_norm": 3.0641191005706787, |
|
"learning_rate": 4.124278109893432e-06, |
|
"loss": 1.2563, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.7096839752057718, |
|
"grad_norm": 2.9682273864746094, |
|
"learning_rate": 4.10299014157462e-06, |
|
"loss": 1.1857, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.7104969007214714, |
|
"grad_norm": 6.076914310455322, |
|
"learning_rate": 4.0817430663959536e-06, |
|
"loss": 1.2108, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.711309826237171, |
|
"grad_norm": 8.528678894042969, |
|
"learning_rate": 4.06053703169645e-06, |
|
"loss": 1.2185, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7121227517528707, |
|
"grad_norm": 3.4424145221710205, |
|
"learning_rate": 4.039372184530521e-06, |
|
"loss": 1.2461, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7129356772685703, |
|
"grad_norm": 3.1624224185943604, |
|
"learning_rate": 4.0182486716669656e-06, |
|
"loss": 1.2282, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7137486027842699, |
|
"grad_norm": 4.986435890197754, |
|
"learning_rate": 3.9971666395879605e-06, |
|
"loss": 1.2048, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7145615282999696, |
|
"grad_norm": 3.537174701690674, |
|
"learning_rate": 3.9761262344880096e-06, |
|
"loss": 1.2752, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7153744538156691, |
|
"grad_norm": 2.7389779090881348, |
|
"learning_rate": 3.9551276022729644e-06, |
|
"loss": 1.2434, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7161873793313688, |
|
"grad_norm": 3.5238423347473145, |
|
"learning_rate": 3.9341708885590034e-06, |
|
"loss": 1.2409, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7170003048470683, |
|
"grad_norm": 3.9080941677093506, |
|
"learning_rate": 3.913256238671607e-06, |
|
"loss": 1.2019, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.717813230362768, |
|
"grad_norm": 4.038003921508789, |
|
"learning_rate": 3.89238379764457e-06, |
|
"loss": 1.2212, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7186261558784677, |
|
"grad_norm": 3.344622850418091, |
|
"learning_rate": 3.871553710218988e-06, |
|
"loss": 1.2067, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7194390813941672, |
|
"grad_norm": 3.5090816020965576, |
|
"learning_rate": 3.850766120842252e-06, |
|
"loss": 1.2171, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7202520069098669, |
|
"grad_norm": 3.003899335861206, |
|
"learning_rate": 3.830021173667048e-06, |
|
"loss": 1.2371, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7210649324255665, |
|
"grad_norm": 3.3116228580474854, |
|
"learning_rate": 3.809319012550352e-06, |
|
"loss": 1.2123, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7218778579412661, |
|
"grad_norm": 3.532245397567749, |
|
"learning_rate": 3.788659781052444e-06, |
|
"loss": 1.2629, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7226907834569658, |
|
"grad_norm": 4.061065196990967, |
|
"learning_rate": 3.7680436224359084e-06, |
|
"loss": 1.174, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7235037089726654, |
|
"grad_norm": 3.3992788791656494, |
|
"learning_rate": 3.747470679664624e-06, |
|
"loss": 1.2209, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.724316634488365, |
|
"grad_norm": 3.4010937213897705, |
|
"learning_rate": 3.7269410954028107e-06, |
|
"loss": 1.2426, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7251295600040646, |
|
"grad_norm": 2.854327917098999, |
|
"learning_rate": 3.706455012013994e-06, |
|
"loss": 1.1932, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.7259424855197643, |
|
"grad_norm": 3.451002836227417, |
|
"learning_rate": 3.6860125715600513e-06, |
|
"loss": 1.253, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7267554110354639, |
|
"grad_norm": 3.123344898223877, |
|
"learning_rate": 3.665613915800217e-06, |
|
"loss": 1.2187, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7275683365511635, |
|
"grad_norm": 3.021973133087158, |
|
"learning_rate": 3.6452591861900886e-06, |
|
"loss": 1.2165, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7283812620668632, |
|
"grad_norm": 3.234985589981079, |
|
"learning_rate": 3.6249485238806637e-06, |
|
"loss": 1.212, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7291941875825627, |
|
"grad_norm": 3.7146785259246826, |
|
"learning_rate": 3.6046820697173514e-06, |
|
"loss": 1.2697, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7300071130982624, |
|
"grad_norm": 3.134507417678833, |
|
"learning_rate": 3.5844599642389965e-06, |
|
"loss": 1.2433, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7308200386139619, |
|
"grad_norm": 2.9155194759368896, |
|
"learning_rate": 3.564282347676903e-06, |
|
"loss": 1.2403, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7316329641296616, |
|
"grad_norm": 3.148232936859131, |
|
"learning_rate": 3.54414935995387e-06, |
|
"loss": 1.2575, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7324458896453613, |
|
"grad_norm": 2.685274124145508, |
|
"learning_rate": 3.524061140683206e-06, |
|
"loss": 1.2124, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7332588151610608, |
|
"grad_norm": 3.4557571411132812, |
|
"learning_rate": 3.5040178291677816e-06, |
|
"loss": 1.2105, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7340717406767605, |
|
"grad_norm": 2.8230202198028564, |
|
"learning_rate": 3.4840195643990383e-06, |
|
"loss": 1.1745, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7348846661924601, |
|
"grad_norm": 3.311697483062744, |
|
"learning_rate": 3.464066485056048e-06, |
|
"loss": 1.222, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7356975917081597, |
|
"grad_norm": 3.2953929901123047, |
|
"learning_rate": 3.444158729504549e-06, |
|
"loss": 1.2688, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7365105172238594, |
|
"grad_norm": 3.3319778442382812, |
|
"learning_rate": 3.4242964357959597e-06, |
|
"loss": 1.2539, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.737323442739559, |
|
"grad_norm": 3.124361753463745, |
|
"learning_rate": 3.4044797416664564e-06, |
|
"loss": 1.2527, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7381363682552586, |
|
"grad_norm": 2.9690327644348145, |
|
"learning_rate": 3.3847087845359996e-06, |
|
"loss": 1.2722, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7389492937709582, |
|
"grad_norm": 5.119561672210693, |
|
"learning_rate": 3.364983701507376e-06, |
|
"loss": 1.2233, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7397622192866579, |
|
"grad_norm": 2.818423271179199, |
|
"learning_rate": 3.3453046293652657e-06, |
|
"loss": 1.2438, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7405751448023575, |
|
"grad_norm": 3.0988523960113525, |
|
"learning_rate": 3.3256717045752794e-06, |
|
"loss": 1.223, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7413880703180571, |
|
"grad_norm": 3.082066297531128, |
|
"learning_rate": 3.3060850632830167e-06, |
|
"loss": 1.244, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7422009958337568, |
|
"grad_norm": 2.944265127182007, |
|
"learning_rate": 3.286544841313126e-06, |
|
"loss": 1.2308, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7430139213494563, |
|
"grad_norm": 3.608762502670288, |
|
"learning_rate": 3.2670511741683475e-06, |
|
"loss": 1.2018, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.743826846865156, |
|
"grad_norm": 3.958385705947876, |
|
"learning_rate": 3.2476041970285945e-06, |
|
"loss": 1.2136, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7446397723808557, |
|
"grad_norm": 2.9133267402648926, |
|
"learning_rate": 3.2282040447500063e-06, |
|
"loss": 1.2649, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7454526978965552, |
|
"grad_norm": 3.8698244094848633, |
|
"learning_rate": 3.208850851863998e-06, |
|
"loss": 1.2265, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7462656234122549, |
|
"grad_norm": 4.550247669219971, |
|
"learning_rate": 3.189544752576369e-06, |
|
"loss": 1.2046, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7470785489279544, |
|
"grad_norm": 2.9886014461517334, |
|
"learning_rate": 3.1702858807663175e-06, |
|
"loss": 1.2812, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7478914744436541, |
|
"grad_norm": 3.3736209869384766, |
|
"learning_rate": 3.151074369985556e-06, |
|
"loss": 1.2482, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7487043999593537, |
|
"grad_norm": 2.7061290740966797, |
|
"learning_rate": 3.131910353457369e-06, |
|
"loss": 1.2474, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7495173254750533, |
|
"grad_norm": 4.058886528015137, |
|
"learning_rate": 3.112793964075681e-06, |
|
"loss": 1.1897, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.750330250990753, |
|
"grad_norm": 3.3311798572540283, |
|
"learning_rate": 3.0937253344041507e-06, |
|
"loss": 1.2129, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7511431765064526, |
|
"grad_norm": 3.2716569900512695, |
|
"learning_rate": 3.074704596675242e-06, |
|
"loss": 1.1763, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7519561020221522, |
|
"grad_norm": 3.360356569290161, |
|
"learning_rate": 3.055731882789311e-06, |
|
"loss": 1.2771, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7527690275378518, |
|
"grad_norm": 3.9494638442993164, |
|
"learning_rate": 3.0368073243136874e-06, |
|
"loss": 1.2551, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7535819530535515, |
|
"grad_norm": 3.3180434703826904, |
|
"learning_rate": 3.0179310524817707e-06, |
|
"loss": 1.245, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7543948785692511, |
|
"grad_norm": 4.963752746582031, |
|
"learning_rate": 2.9991031981921026e-06, |
|
"loss": 1.2266, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7552078040849507, |
|
"grad_norm": 3.1220555305480957, |
|
"learning_rate": 2.9803238920074784e-06, |
|
"loss": 1.2057, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.7560207296006504, |
|
"grad_norm": 2.8764801025390625, |
|
"learning_rate": 2.961593264154038e-06, |
|
"loss": 1.2157, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7568336551163499, |
|
"grad_norm": 2.682791233062744, |
|
"learning_rate": 2.9429114445203423e-06, |
|
"loss": 1.1899, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7576465806320496, |
|
"grad_norm": 5.8080878257751465, |
|
"learning_rate": 2.924278562656514e-06, |
|
"loss": 1.1661, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7584595061477493, |
|
"grad_norm": 3.5146303176879883, |
|
"learning_rate": 2.90569474777329e-06, |
|
"loss": 1.2712, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7592724316634488, |
|
"grad_norm": 3.092174530029297, |
|
"learning_rate": 2.8871601287411634e-06, |
|
"loss": 1.2297, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.7600853571791485, |
|
"grad_norm": 2.807847499847412, |
|
"learning_rate": 2.8686748340894744e-06, |
|
"loss": 1.2369, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.760898282694848, |
|
"grad_norm": 2.8753178119659424, |
|
"learning_rate": 2.850238992005514e-06, |
|
"loss": 1.2812, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7617112082105477, |
|
"grad_norm": 4.227181434631348, |
|
"learning_rate": 2.8318527303336465e-06, |
|
"loss": 1.2143, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7625241337262473, |
|
"grad_norm": 3.921201229095459, |
|
"learning_rate": 2.81351617657442e-06, |
|
"loss": 1.2446, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.763337059241947, |
|
"grad_norm": 3.164557695388794, |
|
"learning_rate": 2.795229457883678e-06, |
|
"loss": 1.2085, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7641499847576466, |
|
"grad_norm": 3.0904717445373535, |
|
"learning_rate": 2.7769927010716814e-06, |
|
"loss": 1.2436, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7649629102733462, |
|
"grad_norm": 9.615850448608398, |
|
"learning_rate": 2.7588060326022205e-06, |
|
"loss": 1.2179, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7657758357890458, |
|
"grad_norm": 7.9210357666015625, |
|
"learning_rate": 2.740669578591755e-06, |
|
"loss": 1.1704, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7665887613047454, |
|
"grad_norm": 3.03359055519104, |
|
"learning_rate": 2.7225834648085282e-06, |
|
"loss": 1.1919, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7674016868204451, |
|
"grad_norm": 3.331894636154175, |
|
"learning_rate": 2.7045478166716843e-06, |
|
"loss": 1.2297, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7682146123361447, |
|
"grad_norm": 2.9995782375335693, |
|
"learning_rate": 2.6865627592504295e-06, |
|
"loss": 1.1936, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7690275378518443, |
|
"grad_norm": 11.267196655273438, |
|
"learning_rate": 2.668628417263137e-06, |
|
"loss": 1.2385, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.769840463367544, |
|
"grad_norm": 4.058920383453369, |
|
"learning_rate": 2.6507449150764852e-06, |
|
"loss": 1.2078, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7706533888832435, |
|
"grad_norm": 2.8774616718292236, |
|
"learning_rate": 2.632912376704607e-06, |
|
"loss": 1.2585, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7714663143989432, |
|
"grad_norm": 3.4053540229797363, |
|
"learning_rate": 2.615130925808228e-06, |
|
"loss": 1.2739, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7722792399146429, |
|
"grad_norm": 3.0022501945495605, |
|
"learning_rate": 2.597400685693795e-06, |
|
"loss": 1.2136, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7730921654303424, |
|
"grad_norm": 3.6466481685638428, |
|
"learning_rate": 2.5797217793126373e-06, |
|
"loss": 1.3104, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7739050909460421, |
|
"grad_norm": 4.021648406982422, |
|
"learning_rate": 2.5620943292601074e-06, |
|
"loss": 1.2621, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7747180164617417, |
|
"grad_norm": 2.996817111968994, |
|
"learning_rate": 2.5445184577747305e-06, |
|
"loss": 1.2194, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7755309419774413, |
|
"grad_norm": 3.8881189823150635, |
|
"learning_rate": 2.52699428673736e-06, |
|
"loss": 1.2516, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.776343867493141, |
|
"grad_norm": 3.279557228088379, |
|
"learning_rate": 2.5095219376703183e-06, |
|
"loss": 1.2116, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7771567930088406, |
|
"grad_norm": 3.1030569076538086, |
|
"learning_rate": 2.4921015317365794e-06, |
|
"loss": 1.2902, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7779697185245402, |
|
"grad_norm": 3.7724967002868652, |
|
"learning_rate": 2.4747331897389103e-06, |
|
"loss": 1.2783, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7787826440402398, |
|
"grad_norm": 2.808138132095337, |
|
"learning_rate": 2.4574170321190305e-06, |
|
"loss": 1.2191, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7795955695559394, |
|
"grad_norm": 2.6033871173858643, |
|
"learning_rate": 2.440153178956798e-06, |
|
"loss": 1.2282, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.780408495071639, |
|
"grad_norm": 2.870957612991333, |
|
"learning_rate": 2.42294174996935e-06, |
|
"loss": 1.2118, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7812214205873387, |
|
"grad_norm": 2.913543462753296, |
|
"learning_rate": 2.40578286451029e-06, |
|
"loss": 1.2352, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7820343461030383, |
|
"grad_norm": 3.7069716453552246, |
|
"learning_rate": 2.38867664156886e-06, |
|
"loss": 1.2218, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7828472716187379, |
|
"grad_norm": 4.073693752288818, |
|
"learning_rate": 2.3716231997691007e-06, |
|
"loss": 1.1997, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7836601971344376, |
|
"grad_norm": 2.7815756797790527, |
|
"learning_rate": 2.3546226573690444e-06, |
|
"loss": 1.1898, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7844731226501371, |
|
"grad_norm": 3.2033910751342773, |
|
"learning_rate": 2.3376751322599e-06, |
|
"loss": 1.2575, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7852860481658368, |
|
"grad_norm": 2.805227518081665, |
|
"learning_rate": 2.320780741965206e-06, |
|
"loss": 1.221, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7860989736815365, |
|
"grad_norm": 2.747638463973999, |
|
"learning_rate": 2.3039396036400463e-06, |
|
"loss": 1.2199, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.786911899197236, |
|
"grad_norm": 2.758178234100342, |
|
"learning_rate": 2.287151834070226e-06, |
|
"loss": 1.1847, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7877248247129357, |
|
"grad_norm": 3.467595338821411, |
|
"learning_rate": 2.2704175496714552e-06, |
|
"loss": 1.2456, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7885377502286353, |
|
"grad_norm": 5.487158298492432, |
|
"learning_rate": 2.2537368664885527e-06, |
|
"loss": 1.2061, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7893506757443349, |
|
"grad_norm": 3.063075542449951, |
|
"learning_rate": 2.2371099001946385e-06, |
|
"loss": 1.264, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7901636012600346, |
|
"grad_norm": 2.6598317623138428, |
|
"learning_rate": 2.2205367660903267e-06, |
|
"loss": 1.1971, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7909765267757342, |
|
"grad_norm": 3.249379873275757, |
|
"learning_rate": 2.2040175791029305e-06, |
|
"loss": 1.2442, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7917894522914338, |
|
"grad_norm": 3.2312817573547363, |
|
"learning_rate": 2.187552453785662e-06, |
|
"loss": 1.1871, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7926023778071334, |
|
"grad_norm": 3.060171604156494, |
|
"learning_rate": 2.1711415043168395e-06, |
|
"loss": 1.2198, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.793415303322833, |
|
"grad_norm": 3.2674033641815186, |
|
"learning_rate": 2.1547848444991025e-06, |
|
"loss": 1.2343, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7942282288385327, |
|
"grad_norm": 3.822357654571533, |
|
"learning_rate": 2.138482587758605e-06, |
|
"loss": 1.1876, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7950411543542323, |
|
"grad_norm": 3.4773342609405518, |
|
"learning_rate": 2.1222348471442477e-06, |
|
"loss": 1.1976, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.795854079869932, |
|
"grad_norm": 3.8379478454589844, |
|
"learning_rate": 2.1060417353268845e-06, |
|
"loss": 1.198, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7966670053856315, |
|
"grad_norm": 4.963233470916748, |
|
"learning_rate": 2.0899033645985423e-06, |
|
"loss": 1.2991, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7974799309013312, |
|
"grad_norm": 3.4560701847076416, |
|
"learning_rate": 2.073819846871646e-06, |
|
"loss": 1.1936, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7982928564170307, |
|
"grad_norm": 2.69124698638916, |
|
"learning_rate": 2.0577912936782317e-06, |
|
"loss": 1.1708, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7991057819327304, |
|
"grad_norm": 2.973618268966675, |
|
"learning_rate": 2.041817816169187e-06, |
|
"loss": 1.2535, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7999187074484301, |
|
"grad_norm": 3.1709506511688232, |
|
"learning_rate": 2.025899525113474e-06, |
|
"loss": 1.2015, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.8007316329641296, |
|
"grad_norm": 2.750272274017334, |
|
"learning_rate": 2.010036530897359e-06, |
|
"loss": 1.2677, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8015445584798293, |
|
"grad_norm": 2.7218148708343506, |
|
"learning_rate": 1.9942289435236506e-06, |
|
"loss": 1.2679, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.8023574839955289, |
|
"grad_norm": 3.0237209796905518, |
|
"learning_rate": 1.978476872610939e-06, |
|
"loss": 1.2425, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.8031704095112285, |
|
"grad_norm": 4.8593363761901855, |
|
"learning_rate": 1.962780427392823e-06, |
|
"loss": 1.2754, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.8039833350269282, |
|
"grad_norm": 4.2402544021606445, |
|
"learning_rate": 1.9471397167171714e-06, |
|
"loss": 1.1841, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.8047962605426278, |
|
"grad_norm": 2.8616418838500977, |
|
"learning_rate": 1.931554849045355e-06, |
|
"loss": 1.1712, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8056091860583274, |
|
"grad_norm": 3.0303030014038086, |
|
"learning_rate": 1.916025932451493e-06, |
|
"loss": 1.2217, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.806422111574027, |
|
"grad_norm": 3.096165180206299, |
|
"learning_rate": 1.9005530746217238e-06, |
|
"loss": 1.1515, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.8072350370897267, |
|
"grad_norm": 5.142411231994629, |
|
"learning_rate": 1.8851363828534253e-06, |
|
"loss": 1.167, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.8080479626054263, |
|
"grad_norm": 3.1720876693725586, |
|
"learning_rate": 1.869775964054501e-06, |
|
"loss": 1.1896, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.8088608881211259, |
|
"grad_norm": 3.833009719848633, |
|
"learning_rate": 1.8544719247426224e-06, |
|
"loss": 1.2517, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8096738136368256, |
|
"grad_norm": 3.188974618911743, |
|
"learning_rate": 1.8392243710444911e-06, |
|
"loss": 1.2795, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.8104867391525251, |
|
"grad_norm": 3.601663589477539, |
|
"learning_rate": 1.8240334086951117e-06, |
|
"loss": 1.2366, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.8112996646682248, |
|
"grad_norm": 3.1258544921875, |
|
"learning_rate": 1.8088991430370506e-06, |
|
"loss": 1.2002, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.8121125901839243, |
|
"grad_norm": 2.71299409866333, |
|
"learning_rate": 1.7938216790197071e-06, |
|
"loss": 1.2609, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.812925515699624, |
|
"grad_norm": 3.2866601943969727, |
|
"learning_rate": 1.77880112119859e-06, |
|
"loss": 1.2571, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8137384412153237, |
|
"grad_norm": 3.1053292751312256, |
|
"learning_rate": 1.7638375737345804e-06, |
|
"loss": 1.2316, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8145513667310232, |
|
"grad_norm": 2.839862823486328, |
|
"learning_rate": 1.7489311403932274e-06, |
|
"loss": 1.2464, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8153642922467229, |
|
"grad_norm": 2.750040292739868, |
|
"learning_rate": 1.7340819245440166e-06, |
|
"loss": 1.2639, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.8161772177624225, |
|
"grad_norm": 3.918286085128784, |
|
"learning_rate": 1.7192900291596493e-06, |
|
"loss": 1.2379, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8169901432781221, |
|
"grad_norm": 3.579942226409912, |
|
"learning_rate": 1.7045555568153415e-06, |
|
"loss": 1.1943, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8178030687938218, |
|
"grad_norm": 3.2873690128326416, |
|
"learning_rate": 1.6898786096881104e-06, |
|
"loss": 1.2457, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8186159943095214, |
|
"grad_norm": 2.721126079559326, |
|
"learning_rate": 1.6752592895560493e-06, |
|
"loss": 1.2681, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.819428919825221, |
|
"grad_norm": 2.9273929595947266, |
|
"learning_rate": 1.6606976977976408e-06, |
|
"loss": 1.1985, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8202418453409206, |
|
"grad_norm": 3.6816606521606445, |
|
"learning_rate": 1.6461939353910494e-06, |
|
"loss": 1.2128, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8210547708566203, |
|
"grad_norm": 2.8991682529449463, |
|
"learning_rate": 1.631748102913412e-06, |
|
"loss": 1.224, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8218676963723199, |
|
"grad_norm": 3.2517406940460205, |
|
"learning_rate": 1.6173603005401505e-06, |
|
"loss": 1.1936, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8226806218880195, |
|
"grad_norm": 3.0502426624298096, |
|
"learning_rate": 1.6030306280442764e-06, |
|
"loss": 1.2555, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8234935474037192, |
|
"grad_norm": 3.2694664001464844, |
|
"learning_rate": 1.588759184795694e-06, |
|
"loss": 1.2643, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8243064729194187, |
|
"grad_norm": 2.9429259300231934, |
|
"learning_rate": 1.574546069760514e-06, |
|
"loss": 1.2221, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8251193984351184, |
|
"grad_norm": 3.2481369972229004, |
|
"learning_rate": 1.5603913815003634e-06, |
|
"loss": 1.1949, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8259323239508181, |
|
"grad_norm": 3.006603717803955, |
|
"learning_rate": 1.5462952181717117e-06, |
|
"loss": 1.1593, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8267452494665176, |
|
"grad_norm": 2.8126094341278076, |
|
"learning_rate": 1.532257677525183e-06, |
|
"loss": 1.2094, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8275581749822173, |
|
"grad_norm": 3.258910894393921, |
|
"learning_rate": 1.5182788569048689e-06, |
|
"loss": 1.1524, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8283711004979168, |
|
"grad_norm": 3.097121477127075, |
|
"learning_rate": 1.5043588532476827e-06, |
|
"loss": 1.2063, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.8291840260136165, |
|
"grad_norm": 3.5429606437683105, |
|
"learning_rate": 1.49049776308265e-06, |
|
"loss": 1.1579, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8299969515293161, |
|
"grad_norm": 3.0676991939544678, |
|
"learning_rate": 1.476695682530268e-06, |
|
"loss": 1.2063, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8308098770450157, |
|
"grad_norm": 3.191493272781372, |
|
"learning_rate": 1.4629527073018267e-06, |
|
"loss": 1.2724, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8316228025607154, |
|
"grad_norm": 4.181521415710449, |
|
"learning_rate": 1.449268932698743e-06, |
|
"loss": 1.2627, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.832435728076415, |
|
"grad_norm": 3.7330870628356934, |
|
"learning_rate": 1.4356444536119085e-06, |
|
"loss": 1.1875, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8332486535921146, |
|
"grad_norm": 3.5213124752044678, |
|
"learning_rate": 1.422079364521024e-06, |
|
"loss": 1.2345, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8340615791078142, |
|
"grad_norm": 3.672848701477051, |
|
"learning_rate": 1.4085737594939497e-06, |
|
"loss": 1.2451, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8348745046235139, |
|
"grad_norm": 3.2613043785095215, |
|
"learning_rate": 1.3951277321860468e-06, |
|
"loss": 1.261, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.8356874301392135, |
|
"grad_norm": 3.1444427967071533, |
|
"learning_rate": 1.381741375839537e-06, |
|
"loss": 1.2205, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8365003556549131, |
|
"grad_norm": 3.7306652069091797, |
|
"learning_rate": 1.3684147832828409e-06, |
|
"loss": 1.2343, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8373132811706128, |
|
"grad_norm": 3.6698615550994873, |
|
"learning_rate": 1.355148046929956e-06, |
|
"loss": 1.2195, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8381262066863123, |
|
"grad_norm": 4.807132244110107, |
|
"learning_rate": 1.3419412587797908e-06, |
|
"loss": 1.1946, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.838939132202012, |
|
"grad_norm": 3.0877437591552734, |
|
"learning_rate": 1.3287945104155487e-06, |
|
"loss": 1.1901, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8397520577177117, |
|
"grad_norm": 6.123032093048096, |
|
"learning_rate": 1.3157078930040856e-06, |
|
"loss": 1.2338, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8405649832334112, |
|
"grad_norm": 3.8207807540893555, |
|
"learning_rate": 1.3026814972952674e-06, |
|
"loss": 1.2064, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8413779087491109, |
|
"grad_norm": 3.591054916381836, |
|
"learning_rate": 1.2897154136213542e-06, |
|
"loss": 1.248, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8421908342648105, |
|
"grad_norm": 3.14103364944458, |
|
"learning_rate": 1.2768097318963701e-06, |
|
"loss": 1.2247, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8430037597805101, |
|
"grad_norm": 3.2605819702148438, |
|
"learning_rate": 1.2639645416154744e-06, |
|
"loss": 1.2265, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.8438166852962098, |
|
"grad_norm": 3.2860848903656006, |
|
"learning_rate": 1.2511799318543493e-06, |
|
"loss": 1.2083, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8446296108119093, |
|
"grad_norm": 3.6271586418151855, |
|
"learning_rate": 1.2384559912685768e-06, |
|
"loss": 1.2562, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.845442536327609, |
|
"grad_norm": 3.0439271926879883, |
|
"learning_rate": 1.2257928080930236e-06, |
|
"loss": 1.1838, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8462554618433086, |
|
"grad_norm": 2.9285664558410645, |
|
"learning_rate": 1.2131904701412345e-06, |
|
"loss": 1.2271, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8470683873590082, |
|
"grad_norm": 4.422233581542969, |
|
"learning_rate": 1.2006490648048118e-06, |
|
"loss": 1.2218, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.8478813128747078, |
|
"grad_norm": 3.193469524383545, |
|
"learning_rate": 1.1881686790528279e-06, |
|
"loss": 1.2167, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.8486942383904075, |
|
"grad_norm": 2.9041225910186768, |
|
"learning_rate": 1.1757493994312052e-06, |
|
"loss": 1.1652, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.8495071639061071, |
|
"grad_norm": 2.902376890182495, |
|
"learning_rate": 1.1633913120621188e-06, |
|
"loss": 1.209, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8503200894218067, |
|
"grad_norm": 2.7561545372009277, |
|
"learning_rate": 1.151094502643414e-06, |
|
"loss": 1.2105, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.8511330149375064, |
|
"grad_norm": 3.4532971382141113, |
|
"learning_rate": 1.1388590564479895e-06, |
|
"loss": 1.2457, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8519459404532059, |
|
"grad_norm": 4.540160179138184, |
|
"learning_rate": 1.1266850583232224e-06, |
|
"loss": 1.1941, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.8527588659689056, |
|
"grad_norm": 2.99617075920105, |
|
"learning_rate": 1.1145725926903772e-06, |
|
"loss": 1.2138, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.8535717914846053, |
|
"grad_norm": 3.2309064865112305, |
|
"learning_rate": 1.1025217435440116e-06, |
|
"loss": 1.2373, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8543847170003048, |
|
"grad_norm": 2.7454960346221924, |
|
"learning_rate": 1.0905325944514034e-06, |
|
"loss": 1.2473, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.8551976425160045, |
|
"grad_norm": 8.090238571166992, |
|
"learning_rate": 1.078605228551971e-06, |
|
"loss": 1.2342, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.8560105680317041, |
|
"grad_norm": 3.7213146686553955, |
|
"learning_rate": 1.0667397285566893e-06, |
|
"loss": 1.2232, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8568234935474037, |
|
"grad_norm": 3.4427578449249268, |
|
"learning_rate": 1.0549361767475241e-06, |
|
"loss": 1.2474, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.8576364190631034, |
|
"grad_norm": 3.212726593017578, |
|
"learning_rate": 1.0431946549768567e-06, |
|
"loss": 1.2727, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.858449344578803, |
|
"grad_norm": 3.895224094390869, |
|
"learning_rate": 1.0315152446669142e-06, |
|
"loss": 1.2451, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8592622700945026, |
|
"grad_norm": 2.8261964321136475, |
|
"learning_rate": 1.019898026809214e-06, |
|
"loss": 1.2416, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.8600751956102022, |
|
"grad_norm": 3.2146759033203125, |
|
"learning_rate": 1.0083430819639962e-06, |
|
"loss": 1.2258, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8608881211259019, |
|
"grad_norm": 5.239031791687012, |
|
"learning_rate": 9.968504902596566e-07, |
|
"loss": 1.2089, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8617010466416014, |
|
"grad_norm": 3.1487622261047363, |
|
"learning_rate": 9.85420331392214e-07, |
|
"loss": 1.2445, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8625139721573011, |
|
"grad_norm": 3.5642974376678467, |
|
"learning_rate": 9.74052684624731e-07, |
|
"loss": 1.2724, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8633268976730007, |
|
"grad_norm": 3.3064541816711426, |
|
"learning_rate": 9.62747628786782e-07, |
|
"loss": 1.235, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8641398231887003, |
|
"grad_norm": 2.7583703994750977, |
|
"learning_rate": 9.515052422739035e-07, |
|
"loss": 1.1864, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8649527487044, |
|
"grad_norm": 2.8002755641937256, |
|
"learning_rate": 9.403256030470386e-07, |
|
"loss": 1.1888, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.8657656742200995, |
|
"grad_norm": 4.0211710929870605, |
|
"learning_rate": 9.292087886320166e-07, |
|
"loss": 1.2513, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8665785997357992, |
|
"grad_norm": 3.937668561935425, |
|
"learning_rate": 9.181548761189996e-07, |
|
"loss": 1.2111, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8673915252514989, |
|
"grad_norm": 31.291566848754883, |
|
"learning_rate": 9.071639421619527e-07, |
|
"loss": 1.2234, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8682044507671984, |
|
"grad_norm": 4.150018692016602, |
|
"learning_rate": 8.962360629781164e-07, |
|
"loss": 1.2205, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8690173762828981, |
|
"grad_norm": 2.8017213344573975, |
|
"learning_rate": 8.853713143474685e-07, |
|
"loss": 1.27, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8698303017985977, |
|
"grad_norm": 2.9798476696014404, |
|
"learning_rate": 8.745697716122081e-07, |
|
"loss": 1.2169, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8706432273142973, |
|
"grad_norm": 4.344991683959961, |
|
"learning_rate": 8.638315096762318e-07, |
|
"loss": 1.2217, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.871456152829997, |
|
"grad_norm": 2.9421257972717285, |
|
"learning_rate": 8.531566030046035e-07, |
|
"loss": 1.2399, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8722690783456966, |
|
"grad_norm": 3.4676921367645264, |
|
"learning_rate": 8.425451256230588e-07, |
|
"loss": 1.1957, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8730820038613962, |
|
"grad_norm": 3.2855141162872314, |
|
"learning_rate": 8.319971511174718e-07, |
|
"loss": 1.2399, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8738949293770958, |
|
"grad_norm": 2.990471839904785, |
|
"learning_rate": 8.215127526333499e-07, |
|
"loss": 1.2787, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8747078548927955, |
|
"grad_norm": 3.183928966522217, |
|
"learning_rate": 8.110920028753355e-07, |
|
"loss": 1.1831, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8755207804084951, |
|
"grad_norm": 2.8277997970581055, |
|
"learning_rate": 8.007349741066939e-07, |
|
"loss": 1.248, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8763337059241947, |
|
"grad_norm": 2.7392983436584473, |
|
"learning_rate": 7.904417381488083e-07, |
|
"loss": 1.23, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8771466314398944, |
|
"grad_norm": 5.617170333862305, |
|
"learning_rate": 7.802123663806938e-07, |
|
"loss": 1.2267, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8779595569555939, |
|
"grad_norm": 2.906653642654419, |
|
"learning_rate": 7.700469297384927e-07, |
|
"loss": 1.2245, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8787724824712936, |
|
"grad_norm": 2.7728428840637207, |
|
"learning_rate": 7.599454987149868e-07, |
|
"loss": 1.2131, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8795854079869931, |
|
"grad_norm": 2.683861017227173, |
|
"learning_rate": 7.499081433591071e-07, |
|
"loss": 1.1936, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8803983335026928, |
|
"grad_norm": 2.6362993717193604, |
|
"learning_rate": 7.399349332754458e-07, |
|
"loss": 1.2169, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8812112590183925, |
|
"grad_norm": 3.3068742752075195, |
|
"learning_rate": 7.300259376237795e-07, |
|
"loss": 1.2098, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.882024184534092, |
|
"grad_norm": 2.825416326522827, |
|
"learning_rate": 7.201812251185869e-07, |
|
"loss": 1.2543, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8828371100497917, |
|
"grad_norm": 3.172919750213623, |
|
"learning_rate": 7.104008640285642e-07, |
|
"loss": 1.1768, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8836500355654913, |
|
"grad_norm": 3.052677869796753, |
|
"learning_rate": 7.006849221761736e-07, |
|
"loss": 1.2068, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8844629610811909, |
|
"grad_norm": 2.8510589599609375, |
|
"learning_rate": 6.910334669371433e-07, |
|
"loss": 1.2043, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8852758865968906, |
|
"grad_norm": 3.4369497299194336, |
|
"learning_rate": 6.814465652400237e-07, |
|
"loss": 1.2467, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8860888121125902, |
|
"grad_norm": 2.667567491531372, |
|
"learning_rate": 6.719242835657147e-07, |
|
"loss": 1.2594, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8869017376282898, |
|
"grad_norm": 2.983642816543579, |
|
"learning_rate": 6.62466687947001e-07, |
|
"loss": 1.2199, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8877146631439894, |
|
"grad_norm": 3.583439350128174, |
|
"learning_rate": 6.530738439681017e-07, |
|
"loss": 1.1827, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8885275886596891, |
|
"grad_norm": 4.706247806549072, |
|
"learning_rate": 6.437458167642164e-07, |
|
"loss": 1.2292, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8893405141753887, |
|
"grad_norm": 4.394626140594482, |
|
"learning_rate": 6.344826710210584e-07, |
|
"loss": 1.2975, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8901534396910883, |
|
"grad_norm": 4.5692572593688965, |
|
"learning_rate": 6.252844709744255e-07, |
|
"loss": 1.1853, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.890966365206788, |
|
"grad_norm": 3.4114434719085693, |
|
"learning_rate": 6.161512804097436e-07, |
|
"loss": 1.2067, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8917792907224875, |
|
"grad_norm": 7.298144340515137, |
|
"learning_rate": 6.070831626616236e-07, |
|
"loss": 1.2149, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8925922162381872, |
|
"grad_norm": 2.7437572479248047, |
|
"learning_rate": 5.980801806134318e-07, |
|
"loss": 1.2002, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8934051417538869, |
|
"grad_norm": 3.101397752761841, |
|
"learning_rate": 5.891423966968413e-07, |
|
"loss": 1.2594, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8942180672695864, |
|
"grad_norm": 3.186479091644287, |
|
"learning_rate": 5.80269872891408e-07, |
|
"loss": 1.1895, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8950309927852861, |
|
"grad_norm": 3.5605878829956055, |
|
"learning_rate": 5.714626707241411e-07, |
|
"loss": 1.1804, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8958439183009856, |
|
"grad_norm": 3.0213913917541504, |
|
"learning_rate": 5.627208512690641e-07, |
|
"loss": 1.2619, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8966568438166853, |
|
"grad_norm": 3.0476791858673096, |
|
"learning_rate": 5.5404447514681e-07, |
|
"loss": 1.1429, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8974697693323849, |
|
"grad_norm": 2.9802823066711426, |
|
"learning_rate": 5.45433602524188e-07, |
|
"loss": 1.2353, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8982826948480845, |
|
"grad_norm": 3.168029308319092, |
|
"learning_rate": 5.368882931137675e-07, |
|
"loss": 1.1771, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8990956203637842, |
|
"grad_norm": 2.8624963760375977, |
|
"learning_rate": 5.284086061734672e-07, |
|
"loss": 1.1929, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8999085458794838, |
|
"grad_norm": 3.3826193809509277, |
|
"learning_rate": 5.199946005061462e-07, |
|
"loss": 1.1379, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.9007214713951834, |
|
"grad_norm": 3.2084782123565674, |
|
"learning_rate": 5.116463344591893e-07, |
|
"loss": 1.1694, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.901534396910883, |
|
"grad_norm": 3.6624932289123535, |
|
"learning_rate": 5.033638659241102e-07, |
|
"loss": 1.219, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.9023473224265827, |
|
"grad_norm": 3.2314536571502686, |
|
"learning_rate": 4.951472523361401e-07, |
|
"loss": 1.2457, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9031602479422823, |
|
"grad_norm": 3.1179494857788086, |
|
"learning_rate": 4.869965506738416e-07, |
|
"loss": 1.232, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.9039731734579819, |
|
"grad_norm": 2.875725030899048, |
|
"learning_rate": 4.789118174587071e-07, |
|
"loss": 1.2515, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.9047860989736816, |
|
"grad_norm": 2.5742199420928955, |
|
"learning_rate": 4.7089310875475856e-07, |
|
"loss": 1.2554, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.9055990244893811, |
|
"grad_norm": 3.2250759601593018, |
|
"learning_rate": 4.6294048016817917e-07, |
|
"loss": 1.2281, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.9064119500050808, |
|
"grad_norm": 2.866562843322754, |
|
"learning_rate": 4.550539868469106e-07, |
|
"loss": 1.2559, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.9072248755207805, |
|
"grad_norm": 2.9703938961029053, |
|
"learning_rate": 4.4723368348027375e-07, |
|
"loss": 1.307, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.90803780103648, |
|
"grad_norm": 3.0078420639038086, |
|
"learning_rate": 4.394796242985933e-07, |
|
"loss": 1.2285, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.9088507265521797, |
|
"grad_norm": 3.0581750869750977, |
|
"learning_rate": 4.317918630728235e-07, |
|
"loss": 1.1751, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.9096636520678792, |
|
"grad_norm": 4.224788188934326, |
|
"learning_rate": 4.241704531141633e-07, |
|
"loss": 1.155, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.9104765775835789, |
|
"grad_norm": 3.2800920009613037, |
|
"learning_rate": 4.166154472737061e-07, |
|
"loss": 1.199, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9112895030992785, |
|
"grad_norm": 5.579473495483398, |
|
"learning_rate": 4.091268979420537e-07, |
|
"loss": 1.1558, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.9121024286149781, |
|
"grad_norm": 3.660987615585327, |
|
"learning_rate": 4.0170485704896453e-07, |
|
"loss": 1.2258, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.9129153541306778, |
|
"grad_norm": 11.064430236816406, |
|
"learning_rate": 3.943493760629924e-07, |
|
"loss": 1.1699, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.9137282796463774, |
|
"grad_norm": 4.9747138023376465, |
|
"learning_rate": 3.8706050599112363e-07, |
|
"loss": 1.2415, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.914541205162077, |
|
"grad_norm": 3.7896888256073, |
|
"learning_rate": 3.798382973784298e-07, |
|
"loss": 1.2221, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9153541306777766, |
|
"grad_norm": 3.383769989013672, |
|
"learning_rate": 3.7268280030771655e-07, |
|
"loss": 1.196, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9161670561934763, |
|
"grad_norm": 3.491272211074829, |
|
"learning_rate": 3.655940643991718e-07, |
|
"loss": 1.1786, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9169799817091759, |
|
"grad_norm": 3.1759097576141357, |
|
"learning_rate": 3.585721388100283e-07, |
|
"loss": 1.1696, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.9177929072248755, |
|
"grad_norm": 2.7568089962005615, |
|
"learning_rate": 3.516170722342127e-07, |
|
"loss": 1.1703, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9186058327405752, |
|
"grad_norm": 2.992725372314453, |
|
"learning_rate": 3.4472891290201927e-07, |
|
"loss": 1.1739, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9194187582562747, |
|
"grad_norm": 4.317306041717529, |
|
"learning_rate": 3.3790770857976995e-07, |
|
"loss": 1.184, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9202316837719744, |
|
"grad_norm": 3.9048075675964355, |
|
"learning_rate": 3.3115350656948043e-07, |
|
"loss": 1.2651, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9210446092876741, |
|
"grad_norm": 3.3990674018859863, |
|
"learning_rate": 3.2446635370853686e-07, |
|
"loss": 1.205, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9218575348033736, |
|
"grad_norm": 4.0517754554748535, |
|
"learning_rate": 3.1784629636937404e-07, |
|
"loss": 1.1996, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9226704603190733, |
|
"grad_norm": 3.340564489364624, |
|
"learning_rate": 3.1129338045914004e-07, |
|
"loss": 1.2215, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9234833858347729, |
|
"grad_norm": 3.5760183334350586, |
|
"learning_rate": 3.0480765141939316e-07, |
|
"loss": 1.2191, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.9242963113504725, |
|
"grad_norm": 2.8496994972229004, |
|
"learning_rate": 2.9838915422578e-07, |
|
"loss": 1.2217, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9251092368661722, |
|
"grad_norm": 3.025475025177002, |
|
"learning_rate": 2.920379333877221e-07, |
|
"loss": 1.2332, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9259221623818717, |
|
"grad_norm": 4.238699436187744, |
|
"learning_rate": 2.8575403294811123e-07, |
|
"loss": 1.2223, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.9267350878975714, |
|
"grad_norm": 2.9650015830993652, |
|
"learning_rate": 2.795374964830022e-07, |
|
"loss": 1.2149, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.927548013413271, |
|
"grad_norm": 2.731064796447754, |
|
"learning_rate": 2.733883671013082e-07, |
|
"loss": 1.2116, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9283609389289706, |
|
"grad_norm": 4.153676986694336, |
|
"learning_rate": 2.673066874445096e-07, |
|
"loss": 1.1189, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9291738644446702, |
|
"grad_norm": 3.843541383743286, |
|
"learning_rate": 2.612924996863453e-07, |
|
"loss": 1.1933, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9299867899603699, |
|
"grad_norm": 3.0720019340515137, |
|
"learning_rate": 2.5534584553253526e-07, |
|
"loss": 1.1859, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.9307997154760695, |
|
"grad_norm": 3.4368112087249756, |
|
"learning_rate": 2.494667662204797e-07, |
|
"loss": 1.22, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9316126409917691, |
|
"grad_norm": 2.524754285812378, |
|
"learning_rate": 2.436553025189758e-07, |
|
"loss": 1.2561, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9324255665074688, |
|
"grad_norm": 3.2625484466552734, |
|
"learning_rate": 2.3791149472794373e-07, |
|
"loss": 1.2026, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9332384920231683, |
|
"grad_norm": 3.4842891693115234, |
|
"learning_rate": 2.3223538267813317e-07, |
|
"loss": 1.234, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.934051417538868, |
|
"grad_norm": 2.9896857738494873, |
|
"learning_rate": 2.2662700573085505e-07, |
|
"loss": 1.2008, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9348643430545677, |
|
"grad_norm": 3.3465092182159424, |
|
"learning_rate": 2.2108640277771153e-07, |
|
"loss": 1.2392, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9356772685702672, |
|
"grad_norm": 2.6980130672454834, |
|
"learning_rate": 2.156136122403174e-07, |
|
"loss": 1.2083, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.9364901940859669, |
|
"grad_norm": 3.4942784309387207, |
|
"learning_rate": 2.1020867207004026e-07, |
|
"loss": 1.2232, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.9373031196016665, |
|
"grad_norm": 2.874210834503174, |
|
"learning_rate": 2.048716197477374e-07, |
|
"loss": 1.2447, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.9381160451173661, |
|
"grad_norm": 3.429757833480835, |
|
"learning_rate": 1.996024922834905e-07, |
|
"loss": 1.1562, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.9389289706330658, |
|
"grad_norm": 2.96549654006958, |
|
"learning_rate": 1.9440132621635687e-07, |
|
"loss": 1.2543, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9397418961487654, |
|
"grad_norm": 3.1660540103912354, |
|
"learning_rate": 1.8926815761410867e-07, |
|
"loss": 1.1931, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.940554821664465, |
|
"grad_norm": 2.848574161529541, |
|
"learning_rate": 1.8420302207298623e-07, |
|
"loss": 1.1837, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.9413677471801646, |
|
"grad_norm": 4.005343437194824, |
|
"learning_rate": 1.792059547174507e-07, |
|
"loss": 1.2423, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9421806726958643, |
|
"grad_norm": 2.7809975147247314, |
|
"learning_rate": 1.7427699019994415e-07, |
|
"loss": 1.1665, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.9429935982115638, |
|
"grad_norm": 4.211681365966797, |
|
"learning_rate": 1.6941616270063854e-07, |
|
"loss": 1.2526, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9438065237272635, |
|
"grad_norm": 4.117452144622803, |
|
"learning_rate": 1.6462350592721498e-07, |
|
"loss": 1.1957, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9446194492429631, |
|
"grad_norm": 2.9959964752197266, |
|
"learning_rate": 1.5989905311461274e-07, |
|
"loss": 1.2342, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.9454323747586627, |
|
"grad_norm": 3.091280460357666, |
|
"learning_rate": 1.5524283702481158e-07, |
|
"loss": 1.2168, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.9462453002743624, |
|
"grad_norm": 4.000481128692627, |
|
"learning_rate": 1.5065488994659983e-07, |
|
"loss": 1.2206, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9470582257900619, |
|
"grad_norm": 3.2974343299865723, |
|
"learning_rate": 1.461352436953478e-07, |
|
"loss": 1.1955, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9478711513057616, |
|
"grad_norm": 3.589606285095215, |
|
"learning_rate": 1.4168392961279254e-07, |
|
"loss": 1.1277, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.9486840768214613, |
|
"grad_norm": 3.071859121322632, |
|
"learning_rate": 1.3730097856681668e-07, |
|
"loss": 1.1837, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9494970023371608, |
|
"grad_norm": 3.4584462642669678, |
|
"learning_rate": 1.329864209512377e-07, |
|
"loss": 1.249, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.9503099278528605, |
|
"grad_norm": 4.1693434715271, |
|
"learning_rate": 1.2874028668559247e-07, |
|
"loss": 1.2234, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.9511228533685601, |
|
"grad_norm": 3.1776278018951416, |
|
"learning_rate": 1.245626052149318e-07, |
|
"loss": 1.2047, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9519357788842597, |
|
"grad_norm": 3.347137689590454, |
|
"learning_rate": 1.2045340550961958e-07, |
|
"loss": 1.2995, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.9527487043999594, |
|
"grad_norm": 3.2806451320648193, |
|
"learning_rate": 1.164127160651285e-07, |
|
"loss": 1.1546, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.953561629915659, |
|
"grad_norm": 4.498492240905762, |
|
"learning_rate": 1.1244056490184008e-07, |
|
"loss": 1.2469, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9543745554313586, |
|
"grad_norm": 3.0195493698120117, |
|
"learning_rate": 1.0853697956485942e-07, |
|
"loss": 1.2373, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.9551874809470582, |
|
"grad_norm": 4.176177501678467, |
|
"learning_rate": 1.0470198712381086e-07, |
|
"loss": 1.2486, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9560004064627579, |
|
"grad_norm": 3.222987413406372, |
|
"learning_rate": 1.009356141726614e-07, |
|
"loss": 1.1905, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9568133319784575, |
|
"grad_norm": 2.6555376052856445, |
|
"learning_rate": 9.723788682953539e-08, |
|
"loss": 1.1666, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.9576262574941571, |
|
"grad_norm": 4.015134334564209, |
|
"learning_rate": 9.360883073652238e-08, |
|
"loss": 1.2675, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.9584391830098568, |
|
"grad_norm": 3.029994487762451, |
|
"learning_rate": 9.004847105951509e-08, |
|
"loss": 1.1977, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9592521085255563, |
|
"grad_norm": 2.7363007068634033, |
|
"learning_rate": 8.655683248802282e-08, |
|
"loss": 1.2359, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.960065034041256, |
|
"grad_norm": 4.360199451446533, |
|
"learning_rate": 8.313393923500613e-08, |
|
"loss": 1.2099, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.9608779595569555, |
|
"grad_norm": 2.9082043170928955, |
|
"learning_rate": 7.977981503670795e-08, |
|
"loss": 1.2632, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9616908850726552, |
|
"grad_norm": 3.0049242973327637, |
|
"learning_rate": 7.64944831524872e-08, |
|
"loss": 1.2128, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.9625038105883549, |
|
"grad_norm": 2.9180142879486084, |
|
"learning_rate": 7.327796636465767e-08, |
|
"loss": 1.2075, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.9633167361040544, |
|
"grad_norm": 2.8545587062835693, |
|
"learning_rate": 7.01302869783338e-08, |
|
"loss": 1.1809, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9641296616197541, |
|
"grad_norm": 3.2359890937805176, |
|
"learning_rate": 6.705146682127184e-08, |
|
"loss": 1.2404, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.9649425871354537, |
|
"grad_norm": 7.442730903625488, |
|
"learning_rate": 6.404152724371892e-08, |
|
"loss": 1.2081, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9657555126511533, |
|
"grad_norm": 2.9155330657958984, |
|
"learning_rate": 6.110048911826871e-08, |
|
"loss": 1.1837, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.966568438166853, |
|
"grad_norm": 5.689270496368408, |
|
"learning_rate": 5.82283728397115e-08, |
|
"loss": 1.2039, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.9673813636825526, |
|
"grad_norm": 2.791161060333252, |
|
"learning_rate": 5.542519832489546e-08, |
|
"loss": 1.2032, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9681942891982522, |
|
"grad_norm": 3.127793312072754, |
|
"learning_rate": 5.269098501259007e-08, |
|
"loss": 1.2016, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9690072147139518, |
|
"grad_norm": 2.8209614753723145, |
|
"learning_rate": 5.002575186334735e-08, |
|
"loss": 1.1624, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9698201402296515, |
|
"grad_norm": 3.3611080646514893, |
|
"learning_rate": 4.742951735937418e-08, |
|
"loss": 1.2068, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.9706330657453511, |
|
"grad_norm": 5.118293285369873, |
|
"learning_rate": 4.490229950440239e-08, |
|
"loss": 1.2398, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9714459912610507, |
|
"grad_norm": 9.395883560180664, |
|
"learning_rate": 4.2444115823562226e-08, |
|
"loss": 1.3143, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9722589167767504, |
|
"grad_norm": 3.1017065048217773, |
|
"learning_rate": 4.005498336326463e-08, |
|
"loss": 1.1918, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9730718422924499, |
|
"grad_norm": 3.226966142654419, |
|
"learning_rate": 3.773491869108137e-08, |
|
"loss": 1.2046, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9738847678081496, |
|
"grad_norm": 3.233693838119507, |
|
"learning_rate": 3.548393789562732e-08, |
|
"loss": 1.2325, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9746976933238493, |
|
"grad_norm": 3.159299612045288, |
|
"learning_rate": 3.3302056586453916e-08, |
|
"loss": 1.1693, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9755106188395488, |
|
"grad_norm": 2.7059924602508545, |
|
"learning_rate": 3.118928989393699e-08, |
|
"loss": 1.2422, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9763235443552485, |
|
"grad_norm": 3.511061668395996, |
|
"learning_rate": 2.9145652469174666e-08, |
|
"loss": 1.2184, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.977136469870948, |
|
"grad_norm": 4.077070236206055, |
|
"learning_rate": 2.7171158483882963e-08, |
|
"loss": 1.2309, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9779493953866477, |
|
"grad_norm": 3.434537887573242, |
|
"learning_rate": 2.5265821630298116e-08, |
|
"loss": 1.1943, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9787623209023473, |
|
"grad_norm": 3.698641300201416, |
|
"learning_rate": 2.3429655121085525e-08, |
|
"loss": 1.2671, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9795752464180469, |
|
"grad_norm": 6.674719333648682, |
|
"learning_rate": 2.1662671689242076e-08, |
|
"loss": 1.1961, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9803881719337466, |
|
"grad_norm": 4.9146952629089355, |
|
"learning_rate": 1.996488358801174e-08, |
|
"loss": 1.2345, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9812010974494462, |
|
"grad_norm": 2.7147114276885986, |
|
"learning_rate": 1.8336302590798992e-08, |
|
"loss": 1.2118, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9820140229651458, |
|
"grad_norm": 2.809692859649658, |
|
"learning_rate": 1.677693999109109e-08, |
|
"loss": 1.2162, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9828269484808454, |
|
"grad_norm": 3.857846975326538, |
|
"learning_rate": 1.5286806602372583e-08, |
|
"loss": 1.1792, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.9836398739965451, |
|
"grad_norm": 3.8911325931549072, |
|
"learning_rate": 1.3865912758054267e-08, |
|
"loss": 1.2332, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9844527995122447, |
|
"grad_norm": 3.5572190284729004, |
|
"learning_rate": 1.2514268311405452e-08, |
|
"loss": 1.2174, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9852657250279443, |
|
"grad_norm": 3.22208833694458, |
|
"learning_rate": 1.1231882635477364e-08, |
|
"loss": 1.2146, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.986078650543644, |
|
"grad_norm": 4.469923973083496, |
|
"learning_rate": 1.0018764623045407e-08, |
|
"loss": 1.2168, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9868915760593435, |
|
"grad_norm": 3.1559510231018066, |
|
"learning_rate": 8.874922686541442e-09, |
|
"loss": 1.2074, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9877045015750432, |
|
"grad_norm": 2.6890878677368164, |
|
"learning_rate": 7.800364758002721e-09, |
|
"loss": 1.2358, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9885174270907429, |
|
"grad_norm": 3.4091622829437256, |
|
"learning_rate": 6.795098289008595e-09, |
|
"loss": 1.2484, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9893303526064424, |
|
"grad_norm": 3.0762569904327393, |
|
"learning_rate": 5.859130250636113e-09, |
|
"loss": 1.1787, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9901432781221421, |
|
"grad_norm": 2.616163492202759, |
|
"learning_rate": 4.992467133406731e-09, |
|
"loss": 1.2092, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9909562036378416, |
|
"grad_norm": 3.0248591899871826, |
|
"learning_rate": 4.195114947244117e-09, |
|
"loss": 1.1998, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9917691291535413, |
|
"grad_norm": 5.664068698883057, |
|
"learning_rate": 3.4670792214297476e-09, |
|
"loss": 1.2539, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9925820546692409, |
|
"grad_norm": 3.449087619781494, |
|
"learning_rate": 2.808365004569602e-09, |
|
"loss": 1.2463, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9933949801849405, |
|
"grad_norm": 2.958399534225464, |
|
"learning_rate": 2.2189768645519693e-09, |
|
"loss": 1.2076, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9942079057006402, |
|
"grad_norm": 3.4361188411712646, |
|
"learning_rate": 1.6989188885219165e-09, |
|
"loss": 1.2436, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9950208312163398, |
|
"grad_norm": 3.0529403686523438, |
|
"learning_rate": 1.2481946828502011e-09, |
|
"loss": 1.1955, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9958337567320394, |
|
"grad_norm": 3.090090274810791, |
|
"learning_rate": 8.668073731088467e-10, |
|
"loss": 1.1455, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.996646682247739, |
|
"grad_norm": 3.2662580013275146, |
|
"learning_rate": 5.547596040489378e-10, |
|
"loss": 1.2283, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9974596077634387, |
|
"grad_norm": 2.7874884605407715, |
|
"learning_rate": 3.1205353958285724e-10, |
|
"loss": 1.2011, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9982725332791383, |
|
"grad_norm": 2.9483141899108887, |
|
"learning_rate": 1.3869086276985243e-10, |
|
"loss": 1.272, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9990854587948379, |
|
"grad_norm": 3.550588607788086, |
|
"learning_rate": 3.467277580271322e-11, |
|
"loss": 1.1665, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9998983843105376, |
|
"grad_norm": 3.500861406326294, |
|
"learning_rate": 0.0, |
|
"loss": 1.2382, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9998983843105376, |
|
"step": 1230, |
|
"total_flos": 3.1215366383127757e+18, |
|
"train_loss": 1.3087712280149382, |
|
"train_runtime": 25084.8125, |
|
"train_samples_per_second": 6.277, |
|
"train_steps_per_second": 0.049 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1230, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 7975, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1215366383127757e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|