|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 200, |
|
"global_step": 1376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014534883720930232, |
|
"grad_norm": 4.90753978197305, |
|
"learning_rate": 9.999986968251269e-06, |
|
"loss": 0.2756, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0029069767441860465, |
|
"grad_norm": 2.0826321480239645, |
|
"learning_rate": 9.99994787307301e-06, |
|
"loss": 0.2055, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00436046511627907, |
|
"grad_norm": 2.0071189367521165, |
|
"learning_rate": 9.999882714669009e-06, |
|
"loss": 0.2428, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005813953488372093, |
|
"grad_norm": 1.8308409567606185, |
|
"learning_rate": 9.99979149337892e-06, |
|
"loss": 0.1988, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007267441860465116, |
|
"grad_norm": 2.1523095681813, |
|
"learning_rate": 9.999674209678253e-06, |
|
"loss": 0.2308, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00872093023255814, |
|
"grad_norm": 2.042843363698041, |
|
"learning_rate": 9.999530864178371e-06, |
|
"loss": 0.1847, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.010174418604651164, |
|
"grad_norm": 1.7333272867252485, |
|
"learning_rate": 9.999361457626493e-06, |
|
"loss": 0.2195, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011627906976744186, |
|
"grad_norm": 1.68765684619863, |
|
"learning_rate": 9.999165990905684e-06, |
|
"loss": 0.1968, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01308139534883721, |
|
"grad_norm": 2.3397968827666062, |
|
"learning_rate": 9.99894446503485e-06, |
|
"loss": 0.1655, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014534883720930232, |
|
"grad_norm": 1.8740327980184355, |
|
"learning_rate": 9.998696881168743e-06, |
|
"loss": 0.2018, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015988372093023256, |
|
"grad_norm": 1.8903939988289815, |
|
"learning_rate": 9.998423240597942e-06, |
|
"loss": 0.1755, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01744186046511628, |
|
"grad_norm": 2.536766624700735, |
|
"learning_rate": 9.998123544748852e-06, |
|
"loss": 0.2422, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0188953488372093, |
|
"grad_norm": 2.019676282269962, |
|
"learning_rate": 9.997797795183699e-06, |
|
"loss": 0.1937, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.020348837209302327, |
|
"grad_norm": 1.717985207230675, |
|
"learning_rate": 9.997445993600516e-06, |
|
"loss": 0.1596, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02180232558139535, |
|
"grad_norm": 1.7865449356650134, |
|
"learning_rate": 9.99706814183314e-06, |
|
"loss": 0.1943, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.023255813953488372, |
|
"grad_norm": 1.9620864278100416, |
|
"learning_rate": 9.996664241851197e-06, |
|
"loss": 0.1623, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.024709302325581394, |
|
"grad_norm": 1.8373307354674375, |
|
"learning_rate": 9.996234295760099e-06, |
|
"loss": 0.2007, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02616279069767442, |
|
"grad_norm": 2.227861617466709, |
|
"learning_rate": 9.995778305801025e-06, |
|
"loss": 0.18, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.027616279069767442, |
|
"grad_norm": 1.6895392880355353, |
|
"learning_rate": 9.995296274350912e-06, |
|
"loss": 0.1483, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.029069767441860465, |
|
"grad_norm": 1.7640692153593953, |
|
"learning_rate": 9.994788203922447e-06, |
|
"loss": 0.149, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030523255813953487, |
|
"grad_norm": 1.455602398611072, |
|
"learning_rate": 9.994254097164047e-06, |
|
"loss": 0.1534, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03197674418604651, |
|
"grad_norm": 1.932687065520904, |
|
"learning_rate": 9.993693956859849e-06, |
|
"loss": 0.1628, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03343023255813953, |
|
"grad_norm": 1.86140975395309, |
|
"learning_rate": 9.9931077859297e-06, |
|
"loss": 0.189, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03488372093023256, |
|
"grad_norm": 1.6940825298691309, |
|
"learning_rate": 9.99249558742913e-06, |
|
"loss": 0.1636, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.036337209302325583, |
|
"grad_norm": 1.9970017291615039, |
|
"learning_rate": 9.991857364549347e-06, |
|
"loss": 0.1354, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0377906976744186, |
|
"grad_norm": 1.611614767401661, |
|
"learning_rate": 9.991193120617213e-06, |
|
"loss": 0.1271, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03924418604651163, |
|
"grad_norm": 1.6668640058803244, |
|
"learning_rate": 9.990502859095234e-06, |
|
"loss": 0.1471, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.040697674418604654, |
|
"grad_norm": 1.6279188049671565, |
|
"learning_rate": 9.989786583581535e-06, |
|
"loss": 0.1672, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04215116279069767, |
|
"grad_norm": 1.7050930977268255, |
|
"learning_rate": 9.989044297809846e-06, |
|
"loss": 0.1621, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0436046511627907, |
|
"grad_norm": 1.6059761856688315, |
|
"learning_rate": 9.98827600564948e-06, |
|
"loss": 0.1413, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04505813953488372, |
|
"grad_norm": 1.657144516453862, |
|
"learning_rate": 9.987481711105312e-06, |
|
"loss": 0.1747, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.046511627906976744, |
|
"grad_norm": 1.5640565805511477, |
|
"learning_rate": 9.986661418317759e-06, |
|
"loss": 0.1553, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04796511627906977, |
|
"grad_norm": 1.3799874670130874, |
|
"learning_rate": 9.985815131562765e-06, |
|
"loss": 0.1185, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04941860465116279, |
|
"grad_norm": 1.6674345854485941, |
|
"learning_rate": 9.984942855251765e-06, |
|
"loss": 0.1666, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.050872093023255814, |
|
"grad_norm": 1.6727808497465833, |
|
"learning_rate": 9.984044593931674e-06, |
|
"loss": 0.1918, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05232558139534884, |
|
"grad_norm": 1.5015093467589995, |
|
"learning_rate": 9.983120352284861e-06, |
|
"loss": 0.1464, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05377906976744186, |
|
"grad_norm": 1.505665041651918, |
|
"learning_rate": 9.982170135129116e-06, |
|
"loss": 0.1259, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.055232558139534885, |
|
"grad_norm": 1.5003368863482498, |
|
"learning_rate": 9.981193947417638e-06, |
|
"loss": 0.1187, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.056686046511627904, |
|
"grad_norm": 1.900977624703376, |
|
"learning_rate": 9.980191794239e-06, |
|
"loss": 0.1584, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05813953488372093, |
|
"grad_norm": 2.1817255633060855, |
|
"learning_rate": 9.979163680817124e-06, |
|
"loss": 0.1711, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.059593023255813955, |
|
"grad_norm": 1.9211415894532136, |
|
"learning_rate": 9.978109612511257e-06, |
|
"loss": 0.1723, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.061046511627906974, |
|
"grad_norm": 3.230526894351387, |
|
"learning_rate": 9.977029594815942e-06, |
|
"loss": 0.2277, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 1.6529179532438223, |
|
"learning_rate": 9.975923633360985e-06, |
|
"loss": 0.1547, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06395348837209303, |
|
"grad_norm": 1.7575964357251361, |
|
"learning_rate": 9.974791733911431e-06, |
|
"loss": 0.2112, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06540697674418605, |
|
"grad_norm": 1.7248448102436156, |
|
"learning_rate": 9.973633902367532e-06, |
|
"loss": 0.1583, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06686046511627906, |
|
"grad_norm": 1.747621900146478, |
|
"learning_rate": 9.972450144764713e-06, |
|
"loss": 0.1747, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06831395348837209, |
|
"grad_norm": 1.8290141742667902, |
|
"learning_rate": 9.971240467273552e-06, |
|
"loss": 0.1524, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 1.8430702236241727, |
|
"learning_rate": 9.970004876199731e-06, |
|
"loss": 0.1496, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07122093023255814, |
|
"grad_norm": 2.2055666937974614, |
|
"learning_rate": 9.968743377984013e-06, |
|
"loss": 0.17, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07267441860465117, |
|
"grad_norm": 1.538052515526653, |
|
"learning_rate": 9.967455979202214e-06, |
|
"loss": 0.154, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07412790697674419, |
|
"grad_norm": 1.633366596107583, |
|
"learning_rate": 9.966142686565155e-06, |
|
"loss": 0.1578, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0755813953488372, |
|
"grad_norm": 1.6546648500780583, |
|
"learning_rate": 9.964803506918634e-06, |
|
"loss": 0.16, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07703488372093023, |
|
"grad_norm": 1.6111923238734047, |
|
"learning_rate": 9.963438447243394e-06, |
|
"loss": 0.1182, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07848837209302326, |
|
"grad_norm": 1.5663545251276552, |
|
"learning_rate": 9.96204751465508e-06, |
|
"loss": 0.1348, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07994186046511628, |
|
"grad_norm": 1.5491604249911004, |
|
"learning_rate": 9.960630716404205e-06, |
|
"loss": 0.1305, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08139534883720931, |
|
"grad_norm": 1.5904201245888179, |
|
"learning_rate": 9.959188059876115e-06, |
|
"loss": 0.1485, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08284883720930232, |
|
"grad_norm": 1.8216437492726838, |
|
"learning_rate": 9.957719552590944e-06, |
|
"loss": 0.1787, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08430232558139535, |
|
"grad_norm": 1.423635987100293, |
|
"learning_rate": 9.956225202203576e-06, |
|
"loss": 0.1335, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08575581395348837, |
|
"grad_norm": 1.651615225486003, |
|
"learning_rate": 9.954705016503614e-06, |
|
"loss": 0.1339, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0872093023255814, |
|
"grad_norm": 1.658410900976534, |
|
"learning_rate": 9.95315900341533e-06, |
|
"loss": 0.1618, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08866279069767442, |
|
"grad_norm": 1.7421372069577428, |
|
"learning_rate": 9.951587170997621e-06, |
|
"loss": 0.1559, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09011627906976744, |
|
"grad_norm": 1.6410793659457708, |
|
"learning_rate": 9.949989527443982e-06, |
|
"loss": 0.1187, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.09156976744186046, |
|
"grad_norm": 1.3084758751505552, |
|
"learning_rate": 9.948366081082446e-06, |
|
"loss": 0.1078, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09302325581395349, |
|
"grad_norm": 1.8532352795685818, |
|
"learning_rate": 9.946716840375552e-06, |
|
"loss": 0.1787, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09447674418604651, |
|
"grad_norm": 1.7832244807577888, |
|
"learning_rate": 9.945041813920296e-06, |
|
"loss": 0.1327, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09593023255813954, |
|
"grad_norm": 1.5586347200013577, |
|
"learning_rate": 9.943341010448086e-06, |
|
"loss": 0.1254, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09738372093023256, |
|
"grad_norm": 1.3472131423550082, |
|
"learning_rate": 9.941614438824703e-06, |
|
"loss": 0.1473, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09883720930232558, |
|
"grad_norm": 1.6722596945098753, |
|
"learning_rate": 9.939862108050244e-06, |
|
"loss": 0.1816, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1002906976744186, |
|
"grad_norm": 1.5162995990299708, |
|
"learning_rate": 9.93808402725908e-06, |
|
"loss": 0.1418, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.10174418604651163, |
|
"grad_norm": 1.7264251629725236, |
|
"learning_rate": 9.936280205719817e-06, |
|
"loss": 0.1845, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10319767441860465, |
|
"grad_norm": 1.8868123011708366, |
|
"learning_rate": 9.934450652835233e-06, |
|
"loss": 0.1454, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.10465116279069768, |
|
"grad_norm": 1.6996113668126296, |
|
"learning_rate": 9.932595378142233e-06, |
|
"loss": 0.1611, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.10610465116279069, |
|
"grad_norm": 1.3926857562718415, |
|
"learning_rate": 9.930714391311813e-06, |
|
"loss": 0.13, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.10755813953488372, |
|
"grad_norm": 1.3730967025306435, |
|
"learning_rate": 9.928807702148986e-06, |
|
"loss": 0.14, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.10901162790697674, |
|
"grad_norm": 1.6148257033625841, |
|
"learning_rate": 9.926875320592756e-06, |
|
"loss": 0.2184, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11046511627906977, |
|
"grad_norm": 1.6770555686431594, |
|
"learning_rate": 9.924917256716042e-06, |
|
"loss": 0.1343, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1119186046511628, |
|
"grad_norm": 1.5552428567558088, |
|
"learning_rate": 9.922933520725645e-06, |
|
"loss": 0.1228, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.11337209302325581, |
|
"grad_norm": 1.6238590358211495, |
|
"learning_rate": 9.920924122962185e-06, |
|
"loss": 0.1883, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.11482558139534883, |
|
"grad_norm": 1.3715125415573524, |
|
"learning_rate": 9.918889073900046e-06, |
|
"loss": 0.1204, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11627906976744186, |
|
"grad_norm": 1.5437673536268413, |
|
"learning_rate": 9.91682838414733e-06, |
|
"loss": 0.1443, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11773255813953488, |
|
"grad_norm": 1.9238885575325282, |
|
"learning_rate": 9.914742064445795e-06, |
|
"loss": 0.151, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11918604651162791, |
|
"grad_norm": 1.5000041779719893, |
|
"learning_rate": 9.912630125670793e-06, |
|
"loss": 0.1478, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.12063953488372094, |
|
"grad_norm": 1.4161989982638625, |
|
"learning_rate": 9.910492578831231e-06, |
|
"loss": 0.1205, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.12209302325581395, |
|
"grad_norm": 1.887456331836179, |
|
"learning_rate": 9.908329435069495e-06, |
|
"loss": 0.1892, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.12354651162790697, |
|
"grad_norm": 1.4188322937106472, |
|
"learning_rate": 9.906140705661406e-06, |
|
"loss": 0.1354, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 1.4674049503326925, |
|
"learning_rate": 9.903926402016153e-06, |
|
"loss": 0.1188, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.12645348837209303, |
|
"grad_norm": 1.7991239782209092, |
|
"learning_rate": 9.901686535676233e-06, |
|
"loss": 0.1758, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12790697674418605, |
|
"grad_norm": 1.755012463932795, |
|
"learning_rate": 9.899421118317399e-06, |
|
"loss": 0.1811, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12936046511627908, |
|
"grad_norm": 1.6961857675852638, |
|
"learning_rate": 9.897130161748588e-06, |
|
"loss": 0.1617, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1308139534883721, |
|
"grad_norm": 1.8355331445231158, |
|
"learning_rate": 9.894813677911868e-06, |
|
"loss": 0.1709, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13226744186046513, |
|
"grad_norm": 1.637508412196323, |
|
"learning_rate": 9.892471678882377e-06, |
|
"loss": 0.1467, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.13372093023255813, |
|
"grad_norm": 2.2746840658968677, |
|
"learning_rate": 9.890104176868246e-06, |
|
"loss": 0.1416, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.13517441860465115, |
|
"grad_norm": 1.7646971278296149, |
|
"learning_rate": 9.887711184210559e-06, |
|
"loss": 0.1597, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.13662790697674418, |
|
"grad_norm": 1.8116059351522782, |
|
"learning_rate": 9.885292713383264e-06, |
|
"loss": 0.1599, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1380813953488372, |
|
"grad_norm": 1.8626165104101766, |
|
"learning_rate": 9.882848776993119e-06, |
|
"loss": 0.1377, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13953488372093023, |
|
"grad_norm": 1.5910215156996428, |
|
"learning_rate": 9.880379387779637e-06, |
|
"loss": 0.1596, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14098837209302326, |
|
"grad_norm": 1.3431975319409974, |
|
"learning_rate": 9.877884558614997e-06, |
|
"loss": 0.1346, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.14244186046511628, |
|
"grad_norm": 1.3362805614728352, |
|
"learning_rate": 9.875364302503995e-06, |
|
"loss": 0.1127, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1438953488372093, |
|
"grad_norm": 1.8199783165196086, |
|
"learning_rate": 9.872818632583969e-06, |
|
"loss": 0.1604, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.14534883720930233, |
|
"grad_norm": 1.6827249026814992, |
|
"learning_rate": 9.870247562124731e-06, |
|
"loss": 0.1346, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14680232558139536, |
|
"grad_norm": 1.4674119242601242, |
|
"learning_rate": 9.8676511045285e-06, |
|
"loss": 0.1168, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.14825581395348839, |
|
"grad_norm": 1.6729272753403375, |
|
"learning_rate": 9.865029273329826e-06, |
|
"loss": 0.1478, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.14970930232558138, |
|
"grad_norm": 1.716840091927574, |
|
"learning_rate": 9.862382082195531e-06, |
|
"loss": 0.1494, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1511627906976744, |
|
"grad_norm": 1.8089594266192164, |
|
"learning_rate": 9.859709544924624e-06, |
|
"loss": 0.1362, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.15261627906976744, |
|
"grad_norm": 1.5150282280782363, |
|
"learning_rate": 9.85701167544824e-06, |
|
"loss": 0.1684, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15406976744186046, |
|
"grad_norm": 1.523111761533529, |
|
"learning_rate": 9.854288487829561e-06, |
|
"loss": 0.145, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1555232558139535, |
|
"grad_norm": 1.5473976095842301, |
|
"learning_rate": 9.851539996263748e-06, |
|
"loss": 0.1349, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1569767441860465, |
|
"grad_norm": 1.9497016837218222, |
|
"learning_rate": 9.848766215077859e-06, |
|
"loss": 0.1751, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.15843023255813954, |
|
"grad_norm": 1.8085748930229228, |
|
"learning_rate": 9.845967158730783e-06, |
|
"loss": 0.1401, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.15988372093023256, |
|
"grad_norm": 1.7391260062163252, |
|
"learning_rate": 9.843142841813158e-06, |
|
"loss": 0.1599, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1613372093023256, |
|
"grad_norm": 1.6195797255004982, |
|
"learning_rate": 9.840293279047302e-06, |
|
"loss": 0.1632, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.16279069767441862, |
|
"grad_norm": 1.82089731812025, |
|
"learning_rate": 9.837418485287126e-06, |
|
"loss": 0.1355, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.16424418604651161, |
|
"grad_norm": 1.7953380194093425, |
|
"learning_rate": 9.83451847551807e-06, |
|
"loss": 0.1635, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.16569767441860464, |
|
"grad_norm": 1.6497571195888396, |
|
"learning_rate": 9.831593264857011e-06, |
|
"loss": 0.1563, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.16715116279069767, |
|
"grad_norm": 1.4404520426344958, |
|
"learning_rate": 9.828642868552195e-06, |
|
"loss": 0.142, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1686046511627907, |
|
"grad_norm": 1.5292175625418722, |
|
"learning_rate": 9.825667301983149e-06, |
|
"loss": 0.1322, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.17005813953488372, |
|
"grad_norm": 2.0789140113841236, |
|
"learning_rate": 9.822666580660606e-06, |
|
"loss": 0.1272, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.17151162790697674, |
|
"grad_norm": 1.5437780941386026, |
|
"learning_rate": 9.819640720226429e-06, |
|
"loss": 0.1699, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.17296511627906977, |
|
"grad_norm": 1.5471057200671505, |
|
"learning_rate": 9.816589736453516e-06, |
|
"loss": 0.1233, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1744186046511628, |
|
"grad_norm": 1.5206963754435927, |
|
"learning_rate": 9.81351364524573e-06, |
|
"loss": 0.1265, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17587209302325582, |
|
"grad_norm": 2.2824687888973947, |
|
"learning_rate": 9.81041246263781e-06, |
|
"loss": 0.1806, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.17732558139534885, |
|
"grad_norm": 1.490348186092453, |
|
"learning_rate": 9.807286204795287e-06, |
|
"loss": 0.1254, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.17877906976744187, |
|
"grad_norm": 1.4547407899623217, |
|
"learning_rate": 9.804134888014407e-06, |
|
"loss": 0.1669, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.18023255813953487, |
|
"grad_norm": 1.3955416003002072, |
|
"learning_rate": 9.800958528722035e-06, |
|
"loss": 0.1227, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1816860465116279, |
|
"grad_norm": 1.366189834603044, |
|
"learning_rate": 9.797757143475577e-06, |
|
"loss": 0.1141, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18313953488372092, |
|
"grad_norm": 1.784363750973548, |
|
"learning_rate": 9.794530748962894e-06, |
|
"loss": 0.1545, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.18459302325581395, |
|
"grad_norm": 1.694786135391199, |
|
"learning_rate": 9.791279362002212e-06, |
|
"loss": 0.1441, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 1.5799996656623245, |
|
"learning_rate": 9.78800299954203e-06, |
|
"loss": 0.1495, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 1.810120051088718, |
|
"learning_rate": 9.784701678661045e-06, |
|
"loss": 0.1489, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.18895348837209303, |
|
"grad_norm": 1.6065641177622767, |
|
"learning_rate": 9.781375416568048e-06, |
|
"loss": 0.1498, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19040697674418605, |
|
"grad_norm": 1.6146187321984817, |
|
"learning_rate": 9.778024230601846e-06, |
|
"loss": 0.1616, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.19186046511627908, |
|
"grad_norm": 1.5540139065991918, |
|
"learning_rate": 9.774648138231163e-06, |
|
"loss": 0.15, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1933139534883721, |
|
"grad_norm": 1.692543845359626, |
|
"learning_rate": 9.771247157054554e-06, |
|
"loss": 0.1459, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.19476744186046513, |
|
"grad_norm": 1.9872344731203477, |
|
"learning_rate": 9.767821304800312e-06, |
|
"loss": 0.153, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.19622093023255813, |
|
"grad_norm": 2.1160080220932027, |
|
"learning_rate": 9.764370599326375e-06, |
|
"loss": 0.179, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.19767441860465115, |
|
"grad_norm": 1.6129189257388363, |
|
"learning_rate": 9.760895058620236e-06, |
|
"loss": 0.1689, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.19912790697674418, |
|
"grad_norm": 1.650799570186251, |
|
"learning_rate": 9.75739470079884e-06, |
|
"loss": 0.1671, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2005813953488372, |
|
"grad_norm": 2.030672631597455, |
|
"learning_rate": 9.753869544108504e-06, |
|
"loss": 0.18, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.20203488372093023, |
|
"grad_norm": 1.319224461569878, |
|
"learning_rate": 9.75031960692481e-06, |
|
"loss": 0.1483, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.20348837209302326, |
|
"grad_norm": 1.407548702023548, |
|
"learning_rate": 9.74674490775251e-06, |
|
"loss": 0.1333, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.20494186046511628, |
|
"grad_norm": 1.4802679481612602, |
|
"learning_rate": 9.743145465225443e-06, |
|
"loss": 0.1117, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2063953488372093, |
|
"grad_norm": 1.4078486946857556, |
|
"learning_rate": 9.739521298106417e-06, |
|
"loss": 0.1307, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.20784883720930233, |
|
"grad_norm": 1.9988611788754047, |
|
"learning_rate": 9.735872425287124e-06, |
|
"loss": 0.1269, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.20930232558139536, |
|
"grad_norm": 1.4797178696285465, |
|
"learning_rate": 9.732198865788047e-06, |
|
"loss": 0.1559, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.21075581395348839, |
|
"grad_norm": 1.6049649292469328, |
|
"learning_rate": 9.728500638758345e-06, |
|
"loss": 0.1381, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.21220930232558138, |
|
"grad_norm": 1.811631968427576, |
|
"learning_rate": 9.724777763475765e-06, |
|
"loss": 0.1637, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2136627906976744, |
|
"grad_norm": 1.4755303711009382, |
|
"learning_rate": 9.721030259346536e-06, |
|
"loss": 0.1054, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.21511627906976744, |
|
"grad_norm": 1.5259450627037843, |
|
"learning_rate": 9.71725814590527e-06, |
|
"loss": 0.1422, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.21656976744186046, |
|
"grad_norm": 1.6229825202968946, |
|
"learning_rate": 9.713461442814862e-06, |
|
"loss": 0.1298, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2180232558139535, |
|
"grad_norm": 1.4725135801149893, |
|
"learning_rate": 9.709640169866385e-06, |
|
"loss": 0.1361, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2194767441860465, |
|
"grad_norm": 1.5398373955451008, |
|
"learning_rate": 9.705794346978988e-06, |
|
"loss": 0.1531, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.22093023255813954, |
|
"grad_norm": 1.676675600727517, |
|
"learning_rate": 9.701923994199784e-06, |
|
"loss": 0.1579, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.22238372093023256, |
|
"grad_norm": 1.6397792048445756, |
|
"learning_rate": 9.698029131703766e-06, |
|
"loss": 0.1446, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2238372093023256, |
|
"grad_norm": 1.5894721901549633, |
|
"learning_rate": 9.694109779793677e-06, |
|
"loss": 0.1533, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.22529069767441862, |
|
"grad_norm": 1.6862648872718184, |
|
"learning_rate": 9.690165958899923e-06, |
|
"loss": 0.1483, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.22674418604651161, |
|
"grad_norm": 1.465800326444994, |
|
"learning_rate": 9.686197689580457e-06, |
|
"loss": 0.1338, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.22819767441860464, |
|
"grad_norm": 1.4653415401590757, |
|
"learning_rate": 9.682204992520674e-06, |
|
"loss": 0.1045, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.22965116279069767, |
|
"grad_norm": 1.8737300808849864, |
|
"learning_rate": 9.678187888533302e-06, |
|
"loss": 0.1457, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2311046511627907, |
|
"grad_norm": 1.9112821169487715, |
|
"learning_rate": 9.674146398558303e-06, |
|
"loss": 0.157, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 1.6172822347717775, |
|
"learning_rate": 9.670080543662742e-06, |
|
"loss": 0.1708, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23401162790697674, |
|
"grad_norm": 1.5162309678255983, |
|
"learning_rate": 9.665990345040702e-06, |
|
"loss": 0.1407, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.23546511627906977, |
|
"grad_norm": 1.5939441326156882, |
|
"learning_rate": 9.66187582401316e-06, |
|
"loss": 0.155, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2369186046511628, |
|
"grad_norm": 1.6355525068645504, |
|
"learning_rate": 9.657737002027878e-06, |
|
"loss": 0.1847, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.23837209302325582, |
|
"grad_norm": 1.3046079540322713, |
|
"learning_rate": 9.653573900659292e-06, |
|
"loss": 0.1282, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.23982558139534885, |
|
"grad_norm": 1.5490255723693547, |
|
"learning_rate": 9.649386541608395e-06, |
|
"loss": 0.1477, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.24127906976744187, |
|
"grad_norm": 1.595779809647306, |
|
"learning_rate": 9.645174946702634e-06, |
|
"loss": 0.1678, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.24273255813953487, |
|
"grad_norm": 1.2880223015165764, |
|
"learning_rate": 9.640939137895788e-06, |
|
"loss": 0.1463, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2441860465116279, |
|
"grad_norm": 1.5824936872282962, |
|
"learning_rate": 9.636679137267852e-06, |
|
"loss": 0.1709, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.24563953488372092, |
|
"grad_norm": 1.5803584865776295, |
|
"learning_rate": 9.632394967024934e-06, |
|
"loss": 0.1744, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.24709302325581395, |
|
"grad_norm": 1.4525033434391654, |
|
"learning_rate": 9.628086649499121e-06, |
|
"loss": 0.1441, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24854651162790697, |
|
"grad_norm": 1.3887804855032542, |
|
"learning_rate": 9.623754207148382e-06, |
|
"loss": 0.1482, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.4357196619635686, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 0.1281, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.251453488372093, |
|
"grad_norm": 1.7625555681963379, |
|
"learning_rate": 9.615017038432636e-06, |
|
"loss": 0.1772, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.25290697674418605, |
|
"grad_norm": 1.493762339052703, |
|
"learning_rate": 9.610612357611868e-06, |
|
"loss": 0.1158, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2543604651162791, |
|
"grad_norm": 1.429067156820555, |
|
"learning_rate": 9.606183643054401e-06, |
|
"loss": 0.1427, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2558139534883721, |
|
"grad_norm": 1.6273610891658072, |
|
"learning_rate": 9.601730917845798e-06, |
|
"loss": 0.1567, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.25726744186046513, |
|
"grad_norm": 1.7484274417059458, |
|
"learning_rate": 9.597254205196775e-06, |
|
"loss": 0.1352, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.25872093023255816, |
|
"grad_norm": 1.4768497051427878, |
|
"learning_rate": 9.592753528443092e-06, |
|
"loss": 0.1554, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2601744186046512, |
|
"grad_norm": 1.792017449974729, |
|
"learning_rate": 9.588228911045423e-06, |
|
"loss": 0.1611, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2616279069767442, |
|
"grad_norm": 1.8929106049989604, |
|
"learning_rate": 9.58368037658924e-06, |
|
"loss": 0.1599, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26308139534883723, |
|
"grad_norm": 1.6939699226935878, |
|
"learning_rate": 9.579107948784684e-06, |
|
"loss": 0.1625, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.26453488372093026, |
|
"grad_norm": 1.564868234763488, |
|
"learning_rate": 9.57451165146645e-06, |
|
"loss": 0.1533, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.26598837209302323, |
|
"grad_norm": 1.5170878496402254, |
|
"learning_rate": 9.569891508593654e-06, |
|
"loss": 0.1435, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.26744186046511625, |
|
"grad_norm": 1.3597169726380491, |
|
"learning_rate": 9.565247544249709e-06, |
|
"loss": 0.1419, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2688953488372093, |
|
"grad_norm": 1.3285568618373091, |
|
"learning_rate": 9.56057978264221e-06, |
|
"loss": 0.1199, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.2703488372093023, |
|
"grad_norm": 1.610052139476346, |
|
"learning_rate": 9.55588824810279e-06, |
|
"loss": 0.1329, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.27180232558139533, |
|
"grad_norm": 1.8398199886190283, |
|
"learning_rate": 9.551172965087017e-06, |
|
"loss": 0.161, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.27325581395348836, |
|
"grad_norm": 1.7116096397748537, |
|
"learning_rate": 9.54643395817424e-06, |
|
"loss": 0.1918, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.2747093023255814, |
|
"grad_norm": 1.8785142775891301, |
|
"learning_rate": 9.541671252067475e-06, |
|
"loss": 0.2001, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2761627906976744, |
|
"grad_norm": 1.9515749354758625, |
|
"learning_rate": 9.53688487159328e-06, |
|
"loss": 0.195, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.27761627906976744, |
|
"grad_norm": 1.539979313310192, |
|
"learning_rate": 9.532074841701619e-06, |
|
"loss": 0.1586, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.27906976744186046, |
|
"grad_norm": 1.6618019039238547, |
|
"learning_rate": 9.527241187465735e-06, |
|
"loss": 0.1625, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.2805232558139535, |
|
"grad_norm": 1.534997769425568, |
|
"learning_rate": 9.522383934082009e-06, |
|
"loss": 0.1421, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.2819767441860465, |
|
"grad_norm": 1.6350100832134988, |
|
"learning_rate": 9.517503106869845e-06, |
|
"loss": 0.1254, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.28343023255813954, |
|
"grad_norm": 1.4730057580868088, |
|
"learning_rate": 9.512598731271532e-06, |
|
"loss": 0.1575, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.28488372093023256, |
|
"grad_norm": 2.071129803071774, |
|
"learning_rate": 9.507670832852103e-06, |
|
"loss": 0.1589, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2863372093023256, |
|
"grad_norm": 2.002706015726789, |
|
"learning_rate": 9.502719437299212e-06, |
|
"loss": 0.1739, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2877906976744186, |
|
"grad_norm": 1.0738206830624688, |
|
"learning_rate": 9.497744570422997e-06, |
|
"loss": 0.1045, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.28924418604651164, |
|
"grad_norm": 1.6809668906114832, |
|
"learning_rate": 9.492746258155944e-06, |
|
"loss": 0.1688, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"grad_norm": 1.568763317573355, |
|
"learning_rate": 9.487724526552753e-06, |
|
"loss": 0.154, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"eval_loss": 0.14336518943309784, |
|
"eval_runtime": 2.2549, |
|
"eval_samples_per_second": 24.835, |
|
"eval_steps_per_second": 6.209, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2921511627906977, |
|
"grad_norm": 1.4702550707766895, |
|
"learning_rate": 9.4826794017902e-06, |
|
"loss": 0.1549, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.2936046511627907, |
|
"grad_norm": 1.5525125309981525, |
|
"learning_rate": 9.477610910167005e-06, |
|
"loss": 0.138, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.29505813953488375, |
|
"grad_norm": 1.9512544957864502, |
|
"learning_rate": 9.472519078103693e-06, |
|
"loss": 0.191, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.29651162790697677, |
|
"grad_norm": 1.6914763992393016, |
|
"learning_rate": 9.467403932142452e-06, |
|
"loss": 0.1415, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.29796511627906974, |
|
"grad_norm": 1.4472857294705104, |
|
"learning_rate": 9.462265498947002e-06, |
|
"loss": 0.1429, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.29941860465116277, |
|
"grad_norm": 1.5823405481416377, |
|
"learning_rate": 9.457103805302454e-06, |
|
"loss": 0.1326, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3008720930232558, |
|
"grad_norm": 1.420341202696158, |
|
"learning_rate": 9.451918878115163e-06, |
|
"loss": 0.128, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.3023255813953488, |
|
"grad_norm": 1.3234585094233786, |
|
"learning_rate": 9.446710744412595e-06, |
|
"loss": 0.1409, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.30377906976744184, |
|
"grad_norm": 1.4054727085825658, |
|
"learning_rate": 9.441479431343189e-06, |
|
"loss": 0.1295, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.30523255813953487, |
|
"grad_norm": 1.762998737448682, |
|
"learning_rate": 9.436224966176205e-06, |
|
"loss": 0.1832, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3066860465116279, |
|
"grad_norm": 1.6833002091383082, |
|
"learning_rate": 9.430947376301593e-06, |
|
"loss": 0.1393, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3081395348837209, |
|
"grad_norm": 1.4120192730188195, |
|
"learning_rate": 9.425646689229843e-06, |
|
"loss": 0.1295, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.30959302325581395, |
|
"grad_norm": 1.1626004083245838, |
|
"learning_rate": 9.420322932591842e-06, |
|
"loss": 0.1202, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.311046511627907, |
|
"grad_norm": 1.5259387861927707, |
|
"learning_rate": 9.414976134138736e-06, |
|
"loss": 0.1193, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 1.619894973779481, |
|
"learning_rate": 9.409606321741776e-06, |
|
"loss": 0.1698, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.313953488372093, |
|
"grad_norm": 1.7822094110301971, |
|
"learning_rate": 9.404213523392183e-06, |
|
"loss": 0.1393, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.31540697674418605, |
|
"grad_norm": 1.5931388190429225, |
|
"learning_rate": 9.39879776720099e-06, |
|
"loss": 0.1383, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3168604651162791, |
|
"grad_norm": 2.072625803623837, |
|
"learning_rate": 9.393359081398914e-06, |
|
"loss": 0.1834, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3183139534883721, |
|
"grad_norm": 1.7269561451971633, |
|
"learning_rate": 9.387897494336182e-06, |
|
"loss": 0.2005, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.31976744186046513, |
|
"grad_norm": 1.9777378749032901, |
|
"learning_rate": 9.38241303448241e-06, |
|
"loss": 0.1648, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32122093023255816, |
|
"grad_norm": 1.7036324366034152, |
|
"learning_rate": 9.376905730426438e-06, |
|
"loss": 0.1661, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.3226744186046512, |
|
"grad_norm": 1.6114418788776652, |
|
"learning_rate": 9.371375610876189e-06, |
|
"loss": 0.1871, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3241279069767442, |
|
"grad_norm": 1.6496270048093367, |
|
"learning_rate": 9.365822704658511e-06, |
|
"loss": 0.1683, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.32558139534883723, |
|
"grad_norm": 1.9563162812494876, |
|
"learning_rate": 9.36024704071904e-06, |
|
"loss": 0.1941, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.32703488372093026, |
|
"grad_norm": 2.0820637027256086, |
|
"learning_rate": 9.354648648122032e-06, |
|
"loss": 0.1951, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.32848837209302323, |
|
"grad_norm": 1.5149036427355425, |
|
"learning_rate": 9.349027556050225e-06, |
|
"loss": 0.1985, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.32994186046511625, |
|
"grad_norm": 1.3330622481681265, |
|
"learning_rate": 9.343383793804688e-06, |
|
"loss": 0.0971, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3313953488372093, |
|
"grad_norm": 1.917698823268353, |
|
"learning_rate": 9.337717390804653e-06, |
|
"loss": 0.1743, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3328488372093023, |
|
"grad_norm": 1.477889283501478, |
|
"learning_rate": 9.332028376587377e-06, |
|
"loss": 0.1367, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.33430232558139533, |
|
"grad_norm": 1.35154982485599, |
|
"learning_rate": 9.326316780807982e-06, |
|
"loss": 0.1498, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.33575581395348836, |
|
"grad_norm": 1.9401902636815154, |
|
"learning_rate": 9.320582633239303e-06, |
|
"loss": 0.1633, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3372093023255814, |
|
"grad_norm": 1.5805620021792157, |
|
"learning_rate": 9.314825963771724e-06, |
|
"loss": 0.172, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3386627906976744, |
|
"grad_norm": 1.2624039358903756, |
|
"learning_rate": 9.309046802413033e-06, |
|
"loss": 0.1445, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.34011627906976744, |
|
"grad_norm": 1.6329242629844094, |
|
"learning_rate": 9.303245179288265e-06, |
|
"loss": 0.1617, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.34156976744186046, |
|
"grad_norm": 1.572417591830697, |
|
"learning_rate": 9.297421124639534e-06, |
|
"loss": 0.1901, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3430232558139535, |
|
"grad_norm": 1.4594857772846002, |
|
"learning_rate": 9.29157466882589e-06, |
|
"loss": 0.1408, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.3444767441860465, |
|
"grad_norm": 1.341145951847404, |
|
"learning_rate": 9.28570584232315e-06, |
|
"loss": 0.1468, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.34593023255813954, |
|
"grad_norm": 1.3186091526025143, |
|
"learning_rate": 9.27981467572374e-06, |
|
"loss": 0.1269, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.34738372093023256, |
|
"grad_norm": 1.2632212032396861, |
|
"learning_rate": 9.273901199736544e-06, |
|
"loss": 0.1329, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3488372093023256, |
|
"grad_norm": 1.2397475494586065, |
|
"learning_rate": 9.267965445186733e-06, |
|
"loss": 0.1188, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3502906976744186, |
|
"grad_norm": 1.4040827076553737, |
|
"learning_rate": 9.262007443015614e-06, |
|
"loss": 0.1217, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.35174418604651164, |
|
"grad_norm": 1.5619651302909152, |
|
"learning_rate": 9.25602722428046e-06, |
|
"loss": 0.1372, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.35319767441860467, |
|
"grad_norm": 1.759985209874878, |
|
"learning_rate": 9.250024820154356e-06, |
|
"loss": 0.1545, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.3546511627906977, |
|
"grad_norm": 1.426120234598387, |
|
"learning_rate": 9.24400026192603e-06, |
|
"loss": 0.1254, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.3561046511627907, |
|
"grad_norm": 2.1333005251482238, |
|
"learning_rate": 9.237953580999694e-06, |
|
"loss": 0.1715, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.35755813953488375, |
|
"grad_norm": 1.4669057043638045, |
|
"learning_rate": 9.231884808894877e-06, |
|
"loss": 0.1589, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.35901162790697677, |
|
"grad_norm": 1.70352194581075, |
|
"learning_rate": 9.225793977246267e-06, |
|
"loss": 0.1714, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.36046511627906974, |
|
"grad_norm": 1.6341050240990393, |
|
"learning_rate": 9.219681117803537e-06, |
|
"loss": 0.1715, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.36191860465116277, |
|
"grad_norm": 1.5094144987016003, |
|
"learning_rate": 9.213546262431185e-06, |
|
"loss": 0.1195, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.3633720930232558, |
|
"grad_norm": 1.4662745386322011, |
|
"learning_rate": 9.207389443108372e-06, |
|
"loss": 0.1502, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3648255813953488, |
|
"grad_norm": 1.2772800586235213, |
|
"learning_rate": 9.201210691928745e-06, |
|
"loss": 0.1211, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.36627906976744184, |
|
"grad_norm": 1.4892966499782139, |
|
"learning_rate": 9.195010041100276e-06, |
|
"loss": 0.1168, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.36773255813953487, |
|
"grad_norm": 1.423847678645897, |
|
"learning_rate": 9.188787522945098e-06, |
|
"loss": 0.1338, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.3691860465116279, |
|
"grad_norm": 1.5018029337938783, |
|
"learning_rate": 9.182543169899325e-06, |
|
"loss": 0.1324, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3706395348837209, |
|
"grad_norm": 1.3714052238212613, |
|
"learning_rate": 9.176277014512894e-06, |
|
"loss": 0.1568, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 1.409010922605016, |
|
"learning_rate": 9.16998908944939e-06, |
|
"loss": 0.1604, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.373546511627907, |
|
"grad_norm": 1.6529698075548698, |
|
"learning_rate": 9.163679427485878e-06, |
|
"loss": 0.1567, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 1.3541902426493249, |
|
"learning_rate": 9.157348061512728e-06, |
|
"loss": 0.1557, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.376453488372093, |
|
"grad_norm": 1.4327109786235201, |
|
"learning_rate": 9.150995024533446e-06, |
|
"loss": 0.1578, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.37790697674418605, |
|
"grad_norm": 1.3363028855734624, |
|
"learning_rate": 9.14462034966451e-06, |
|
"loss": 0.1404, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3793604651162791, |
|
"grad_norm": 1.8919021627256019, |
|
"learning_rate": 9.138224070135183e-06, |
|
"loss": 0.1841, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.3808139534883721, |
|
"grad_norm": 1.48877635650518, |
|
"learning_rate": 9.131806219287344e-06, |
|
"loss": 0.1292, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.38226744186046513, |
|
"grad_norm": 1.4141106832003385, |
|
"learning_rate": 9.125366830575325e-06, |
|
"loss": 0.1355, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.38372093023255816, |
|
"grad_norm": 1.3866461149312288, |
|
"learning_rate": 9.118905937565723e-06, |
|
"loss": 0.1493, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3851744186046512, |
|
"grad_norm": 1.3927609057507213, |
|
"learning_rate": 9.112423573937232e-06, |
|
"loss": 0.1187, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3866279069767442, |
|
"grad_norm": 1.63044313572066, |
|
"learning_rate": 9.105919773480464e-06, |
|
"loss": 0.1604, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.38808139534883723, |
|
"grad_norm": 1.6214111304578023, |
|
"learning_rate": 9.09939457009778e-06, |
|
"loss": 0.1917, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.38953488372093026, |
|
"grad_norm": 1.4173048655266718, |
|
"learning_rate": 9.092847997803098e-06, |
|
"loss": 0.17, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.39098837209302323, |
|
"grad_norm": 1.6513577218808286, |
|
"learning_rate": 9.08628009072174e-06, |
|
"loss": 0.1725, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.39244186046511625, |
|
"grad_norm": 1.5628095311374188, |
|
"learning_rate": 9.079690883090227e-06, |
|
"loss": 0.1608, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3938953488372093, |
|
"grad_norm": 1.9629889905768871, |
|
"learning_rate": 9.073080409256118e-06, |
|
"loss": 0.1587, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.3953488372093023, |
|
"grad_norm": 1.947239430105324, |
|
"learning_rate": 9.066448703677828e-06, |
|
"loss": 0.2092, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.39680232558139533, |
|
"grad_norm": 1.6096007627861755, |
|
"learning_rate": 9.059795800924445e-06, |
|
"loss": 0.2076, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.39825581395348836, |
|
"grad_norm": 1.4682775876201015, |
|
"learning_rate": 9.053121735675552e-06, |
|
"loss": 0.1338, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3997093023255814, |
|
"grad_norm": 1.541753647412304, |
|
"learning_rate": 9.046426542721046e-06, |
|
"loss": 0.1714, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4011627906976744, |
|
"grad_norm": 1.413689581379683, |
|
"learning_rate": 9.039710256960956e-06, |
|
"loss": 0.1346, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.40261627906976744, |
|
"grad_norm": 1.6313953237696512, |
|
"learning_rate": 9.03297291340526e-06, |
|
"loss": 0.1596, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.40406976744186046, |
|
"grad_norm": 1.2910181632753897, |
|
"learning_rate": 9.026214547173706e-06, |
|
"loss": 0.1553, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4055232558139535, |
|
"grad_norm": 1.4228459831893367, |
|
"learning_rate": 9.019435193495627e-06, |
|
"loss": 0.1377, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.4069767441860465, |
|
"grad_norm": 1.459679839660881, |
|
"learning_rate": 9.012634887709755e-06, |
|
"loss": 0.1404, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.40843023255813954, |
|
"grad_norm": 1.6321159835021386, |
|
"learning_rate": 9.005813665264042e-06, |
|
"loss": 0.1393, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.40988372093023256, |
|
"grad_norm": 1.9115197748274007, |
|
"learning_rate": 8.998971561715468e-06, |
|
"loss": 0.164, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4113372093023256, |
|
"grad_norm": 1.5888048449790153, |
|
"learning_rate": 8.992108612729868e-06, |
|
"loss": 0.1422, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4127906976744186, |
|
"grad_norm": 2.3019049975561585, |
|
"learning_rate": 8.985224854081727e-06, |
|
"loss": 0.1863, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.41424418604651164, |
|
"grad_norm": 1.5757906262469172, |
|
"learning_rate": 8.978320321654014e-06, |
|
"loss": 0.1531, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.41569767441860467, |
|
"grad_norm": 1.3184585148618004, |
|
"learning_rate": 8.97139505143798e-06, |
|
"loss": 0.1035, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.4171511627906977, |
|
"grad_norm": 1.9152332078193026, |
|
"learning_rate": 8.964449079532978e-06, |
|
"loss": 0.1982, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.4186046511627907, |
|
"grad_norm": 1.3318524817371034, |
|
"learning_rate": 8.957482442146271e-06, |
|
"loss": 0.1422, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.42005813953488375, |
|
"grad_norm": 1.2994676654595296, |
|
"learning_rate": 8.950495175592849e-06, |
|
"loss": 0.0954, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.42151162790697677, |
|
"grad_norm": 1.3863595317355537, |
|
"learning_rate": 8.94348731629523e-06, |
|
"loss": 0.1481, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.42296511627906974, |
|
"grad_norm": 1.4930615065996538, |
|
"learning_rate": 8.93645890078328e-06, |
|
"loss": 0.1474, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.42441860465116277, |
|
"grad_norm": 1.1744911565495457, |
|
"learning_rate": 8.929409965694016e-06, |
|
"loss": 0.1228, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4258720930232558, |
|
"grad_norm": 1.3229206282158534, |
|
"learning_rate": 8.92234054777142e-06, |
|
"loss": 0.1165, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4273255813953488, |
|
"grad_norm": 1.500633285810525, |
|
"learning_rate": 8.915250683866242e-06, |
|
"loss": 0.1561, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.42877906976744184, |
|
"grad_norm": 1.4979998345117733, |
|
"learning_rate": 8.908140410935813e-06, |
|
"loss": 0.1466, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.43023255813953487, |
|
"grad_norm": 1.2712924829276973, |
|
"learning_rate": 8.901009766043846e-06, |
|
"loss": 0.1026, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.4316860465116279, |
|
"grad_norm": 1.6863134441377277, |
|
"learning_rate": 8.893858786360255e-06, |
|
"loss": 0.1687, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.4331395348837209, |
|
"grad_norm": 1.5887492952986007, |
|
"learning_rate": 8.886687509160944e-06, |
|
"loss": 0.1973, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.43459302325581395, |
|
"grad_norm": 1.6354886094555063, |
|
"learning_rate": 8.879495971827628e-06, |
|
"loss": 0.1881, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.436046511627907, |
|
"grad_norm": 1.5874859267729486, |
|
"learning_rate": 8.872284211847629e-06, |
|
"loss": 0.1105, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.7021255827480994, |
|
"learning_rate": 8.865052266813686e-06, |
|
"loss": 0.1592, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.438953488372093, |
|
"grad_norm": 1.1905429804396237, |
|
"learning_rate": 8.857800174423754e-06, |
|
"loss": 0.1569, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.44040697674418605, |
|
"grad_norm": 1.7925350240691666, |
|
"learning_rate": 8.850527972480812e-06, |
|
"loss": 0.1823, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4418604651162791, |
|
"grad_norm": 1.682150104412601, |
|
"learning_rate": 8.843235698892661e-06, |
|
"loss": 0.1725, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4433139534883721, |
|
"grad_norm": 1.4842418533887083, |
|
"learning_rate": 8.835923391671735e-06, |
|
"loss": 0.1095, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.44476744186046513, |
|
"grad_norm": 1.4624470494811705, |
|
"learning_rate": 8.828591088934894e-06, |
|
"loss": 0.1286, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.44622093023255816, |
|
"grad_norm": 1.3843653917366865, |
|
"learning_rate": 8.821238828903227e-06, |
|
"loss": 0.1423, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4476744186046512, |
|
"grad_norm": 1.4949237211694288, |
|
"learning_rate": 8.813866649901857e-06, |
|
"loss": 0.1426, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4491279069767442, |
|
"grad_norm": 1.338762423782917, |
|
"learning_rate": 8.806474590359736e-06, |
|
"loss": 0.1609, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.45058139534883723, |
|
"grad_norm": 1.3119459351922982, |
|
"learning_rate": 8.799062688809452e-06, |
|
"loss": 0.1508, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.45203488372093026, |
|
"grad_norm": 1.4964356641654488, |
|
"learning_rate": 8.79163098388702e-06, |
|
"loss": 0.1536, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.45348837209302323, |
|
"grad_norm": 1.8958424478348768, |
|
"learning_rate": 8.784179514331683e-06, |
|
"loss": 0.2154, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.45494186046511625, |
|
"grad_norm": 1.616943621767028, |
|
"learning_rate": 8.776708318985712e-06, |
|
"loss": 0.1338, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.4563953488372093, |
|
"grad_norm": 1.3369024538015155, |
|
"learning_rate": 8.769217436794205e-06, |
|
"loss": 0.1481, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.4578488372093023, |
|
"grad_norm": 1.5002304811430793, |
|
"learning_rate": 8.761706906804878e-06, |
|
"loss": 0.1484, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.45930232558139533, |
|
"grad_norm": 1.5317354250290076, |
|
"learning_rate": 8.75417676816787e-06, |
|
"loss": 0.1388, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.46075581395348836, |
|
"grad_norm": 1.5881770732342904, |
|
"learning_rate": 8.746627060135528e-06, |
|
"loss": 0.1607, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.4622093023255814, |
|
"grad_norm": 1.2933179184781483, |
|
"learning_rate": 8.73905782206221e-06, |
|
"loss": 0.1425, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.4636627906976744, |
|
"grad_norm": 1.4900348199011015, |
|
"learning_rate": 8.731469093404086e-06, |
|
"loss": 0.1686, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 1.2187036161656735, |
|
"learning_rate": 8.72386091371891e-06, |
|
"loss": 0.156, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.46656976744186046, |
|
"grad_norm": 1.5361392026631566, |
|
"learning_rate": 8.71623332266584e-06, |
|
"loss": 0.1697, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4680232558139535, |
|
"grad_norm": 1.5624029046245063, |
|
"learning_rate": 8.708586360005218e-06, |
|
"loss": 0.1565, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.4694767441860465, |
|
"grad_norm": 1.2313528832746294, |
|
"learning_rate": 8.700920065598358e-06, |
|
"loss": 0.1319, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.47093023255813954, |
|
"grad_norm": 1.4770385920550462, |
|
"learning_rate": 8.693234479407353e-06, |
|
"loss": 0.1212, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.47238372093023256, |
|
"grad_norm": 2.1148845935494913, |
|
"learning_rate": 8.685529641494852e-06, |
|
"loss": 0.1714, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4738372093023256, |
|
"grad_norm": 1.3818082146663953, |
|
"learning_rate": 8.677805592023858e-06, |
|
"loss": 0.1328, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.4752906976744186, |
|
"grad_norm": 2.0939762202676935, |
|
"learning_rate": 8.670062371257525e-06, |
|
"loss": 0.2428, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.47674418604651164, |
|
"grad_norm": 1.5066771176481124, |
|
"learning_rate": 8.662300019558931e-06, |
|
"loss": 0.1397, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.47819767441860467, |
|
"grad_norm": 1.3186753628942027, |
|
"learning_rate": 8.654518577390885e-06, |
|
"loss": 0.1513, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.4796511627906977, |
|
"grad_norm": 1.6619235949225295, |
|
"learning_rate": 8.646718085315707e-06, |
|
"loss": 0.1637, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4811046511627907, |
|
"grad_norm": 1.5149079703407748, |
|
"learning_rate": 8.638898583995016e-06, |
|
"loss": 0.1614, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.48255813953488375, |
|
"grad_norm": 1.6949649519406504, |
|
"learning_rate": 8.631060114189526e-06, |
|
"loss": 0.1613, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.48401162790697677, |
|
"grad_norm": 1.6341150247952272, |
|
"learning_rate": 8.62320271675882e-06, |
|
"loss": 0.139, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.48546511627906974, |
|
"grad_norm": 1.5722940293537715, |
|
"learning_rate": 8.615326432661155e-06, |
|
"loss": 0.1389, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.48691860465116277, |
|
"grad_norm": 1.6784313452366777, |
|
"learning_rate": 8.607431302953229e-06, |
|
"loss": 0.1586, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.4883720930232558, |
|
"grad_norm": 1.579470915019441, |
|
"learning_rate": 8.599517368789981e-06, |
|
"loss": 0.1628, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.4898255813953488, |
|
"grad_norm": 1.8175899396499493, |
|
"learning_rate": 8.591584671424371e-06, |
|
"loss": 0.1778, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.49127906976744184, |
|
"grad_norm": 1.3679144266460825, |
|
"learning_rate": 8.583633252207171e-06, |
|
"loss": 0.145, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.49273255813953487, |
|
"grad_norm": 1.5973543371186025, |
|
"learning_rate": 8.575663152586735e-06, |
|
"loss": 0.1371, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.4941860465116279, |
|
"grad_norm": 1.4223743951319792, |
|
"learning_rate": 8.5676744141088e-06, |
|
"loss": 0.1349, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4956395348837209, |
|
"grad_norm": 1.7867654597125795, |
|
"learning_rate": 8.559667078416257e-06, |
|
"loss": 0.1652, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.49709302325581395, |
|
"grad_norm": 1.6721772672683666, |
|
"learning_rate": 8.551641187248942e-06, |
|
"loss": 0.1827, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.498546511627907, |
|
"grad_norm": 1.5334190767352909, |
|
"learning_rate": 8.543596782443415e-06, |
|
"loss": 0.1367, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.4823817376465154, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.1427, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.501453488372093, |
|
"grad_norm": 1.5846228452591529, |
|
"learning_rate": 8.527452599746265e-06, |
|
"loss": 0.1705, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.502906976744186, |
|
"grad_norm": 1.3811944073529825, |
|
"learning_rate": 8.519352906009417e-06, |
|
"loss": 0.135, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5043604651162791, |
|
"grad_norm": 1.4875376615322078, |
|
"learning_rate": 8.511234866943463e-06, |
|
"loss": 0.1388, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5058139534883721, |
|
"grad_norm": 1.402324894315079, |
|
"learning_rate": 8.5030985248653e-06, |
|
"loss": 0.1474, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5072674418604651, |
|
"grad_norm": 1.463484821168073, |
|
"learning_rate": 8.494943922187236e-06, |
|
"loss": 0.1234, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5087209302325582, |
|
"grad_norm": 1.6398780382743268, |
|
"learning_rate": 8.486771101416765e-06, |
|
"loss": 0.1335, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5101744186046512, |
|
"grad_norm": 1.244410077865948, |
|
"learning_rate": 8.47858010515634e-06, |
|
"loss": 0.1499, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5116279069767442, |
|
"grad_norm": 1.7990830264360216, |
|
"learning_rate": 8.470370976103171e-06, |
|
"loss": 0.1662, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5130813953488372, |
|
"grad_norm": 1.7219602924486799, |
|
"learning_rate": 8.462143757048976e-06, |
|
"loss": 0.1294, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5145348837209303, |
|
"grad_norm": 1.1737595986370917, |
|
"learning_rate": 8.453898490879776e-06, |
|
"loss": 0.1391, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5159883720930233, |
|
"grad_norm": 1.5033196146866203, |
|
"learning_rate": 8.445635220575663e-06, |
|
"loss": 0.1207, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5174418604651163, |
|
"grad_norm": 1.526089460093771, |
|
"learning_rate": 8.43735398921059e-06, |
|
"loss": 0.1575, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5188953488372093, |
|
"grad_norm": 1.0613154614063016, |
|
"learning_rate": 8.429054839952122e-06, |
|
"loss": 0.107, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.5203488372093024, |
|
"grad_norm": 1.5465351587584268, |
|
"learning_rate": 8.42073781606123e-06, |
|
"loss": 0.1412, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5218023255813954, |
|
"grad_norm": 1.6263257643507738, |
|
"learning_rate": 8.412402960892061e-06, |
|
"loss": 0.1528, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.5232558139534884, |
|
"grad_norm": 1.4428369608603286, |
|
"learning_rate": 8.40405031789171e-06, |
|
"loss": 0.1229, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5247093023255814, |
|
"grad_norm": 1.5887049579267734, |
|
"learning_rate": 8.395679930599997e-06, |
|
"loss": 0.1768, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.5261627906976745, |
|
"grad_norm": 1.6132505441168516, |
|
"learning_rate": 8.387291842649234e-06, |
|
"loss": 0.147, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5276162790697675, |
|
"grad_norm": 1.6221175313012306, |
|
"learning_rate": 8.378886097764001e-06, |
|
"loss": 0.1393, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5290697674418605, |
|
"grad_norm": 1.3815088653312464, |
|
"learning_rate": 8.370462739760922e-06, |
|
"loss": 0.1346, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5305232558139535, |
|
"grad_norm": 1.613218374432377, |
|
"learning_rate": 8.362021812548433e-06, |
|
"loss": 0.1296, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5319767441860465, |
|
"grad_norm": 1.5239164352989918, |
|
"learning_rate": 8.353563360126548e-06, |
|
"loss": 0.2012, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.5334302325581395, |
|
"grad_norm": 1.4465320300081779, |
|
"learning_rate": 8.345087426586638e-06, |
|
"loss": 0.1436, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5348837209302325, |
|
"grad_norm": 1.6179330922890798, |
|
"learning_rate": 8.336594056111197e-06, |
|
"loss": 0.1699, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5363372093023255, |
|
"grad_norm": 1.2917432875508248, |
|
"learning_rate": 8.328083292973617e-06, |
|
"loss": 0.1294, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5377906976744186, |
|
"grad_norm": 1.823166535138271, |
|
"learning_rate": 8.319555181537942e-06, |
|
"loss": 0.1693, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5392441860465116, |
|
"grad_norm": 1.5964311134632778, |
|
"learning_rate": 8.311009766258659e-06, |
|
"loss": 0.1838, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5406976744186046, |
|
"grad_norm": 1.5287488881908324, |
|
"learning_rate": 8.30244709168045e-06, |
|
"loss": 0.164, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.5421511627906976, |
|
"grad_norm": 1.617466008500704, |
|
"learning_rate": 8.293867202437962e-06, |
|
"loss": 0.1675, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.5436046511627907, |
|
"grad_norm": 1.81589698419327, |
|
"learning_rate": 8.285270143255579e-06, |
|
"loss": 0.158, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5450581395348837, |
|
"grad_norm": 1.496211076073237, |
|
"learning_rate": 8.27665595894719e-06, |
|
"loss": 0.1332, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5465116279069767, |
|
"grad_norm": 1.528765465197372, |
|
"learning_rate": 8.268024694415949e-06, |
|
"loss": 0.1313, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5479651162790697, |
|
"grad_norm": 1.7252370903269931, |
|
"learning_rate": 8.25937639465404e-06, |
|
"loss": 0.1635, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5494186046511628, |
|
"grad_norm": 1.3299953218632383, |
|
"learning_rate": 8.250711104742453e-06, |
|
"loss": 0.1198, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5508720930232558, |
|
"grad_norm": 1.6165817454268103, |
|
"learning_rate": 8.242028869850743e-06, |
|
"loss": 0.142, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5523255813953488, |
|
"grad_norm": 1.4748568202518297, |
|
"learning_rate": 8.23332973523679e-06, |
|
"loss": 0.1134, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5537790697674418, |
|
"grad_norm": 1.327698827166681, |
|
"learning_rate": 8.224613746246565e-06, |
|
"loss": 0.1465, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5552325581395349, |
|
"grad_norm": 1.3793313249754087, |
|
"learning_rate": 8.215880948313904e-06, |
|
"loss": 0.1304, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5566860465116279, |
|
"grad_norm": 1.7504804510682308, |
|
"learning_rate": 8.207131386960256e-06, |
|
"loss": 0.1673, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 1.595850264406846, |
|
"learning_rate": 8.198365107794457e-06, |
|
"loss": 0.1444, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.559593023255814, |
|
"grad_norm": 1.5020289813450294, |
|
"learning_rate": 8.189582156512484e-06, |
|
"loss": 0.1403, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.561046511627907, |
|
"grad_norm": 1.4326207754311362, |
|
"learning_rate": 8.180782578897225e-06, |
|
"loss": 0.0998, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 1.7057272776604826, |
|
"learning_rate": 8.171966420818227e-06, |
|
"loss": 0.1388, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.563953488372093, |
|
"grad_norm": 1.7222325633827718, |
|
"learning_rate": 8.163133728231482e-06, |
|
"loss": 0.1897, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.565406976744186, |
|
"grad_norm": 1.473100288550089, |
|
"learning_rate": 8.154284547179158e-06, |
|
"loss": 0.1419, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.5668604651162791, |
|
"grad_norm": 1.9904385322120357, |
|
"learning_rate": 8.145418923789375e-06, |
|
"loss": 0.1935, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5683139534883721, |
|
"grad_norm": 1.8647312855206553, |
|
"learning_rate": 8.136536904275965e-06, |
|
"loss": 0.2022, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.5697674418604651, |
|
"grad_norm": 1.7630172852357997, |
|
"learning_rate": 8.127638534938227e-06, |
|
"loss": 0.1924, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.5712209302325582, |
|
"grad_norm": 1.3019122719624554, |
|
"learning_rate": 8.118723862160687e-06, |
|
"loss": 0.1469, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.5726744186046512, |
|
"grad_norm": 1.860210908405091, |
|
"learning_rate": 8.109792932412853e-06, |
|
"loss": 0.1508, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.5741279069767442, |
|
"grad_norm": 1.345826572185921, |
|
"learning_rate": 8.10084579224898e-06, |
|
"loss": 0.155, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5755813953488372, |
|
"grad_norm": 1.9559560979112873, |
|
"learning_rate": 8.09188248830782e-06, |
|
"loss": 0.183, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.5770348837209303, |
|
"grad_norm": 1.7929107775519544, |
|
"learning_rate": 8.082903067312384e-06, |
|
"loss": 0.1219, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.5784883720930233, |
|
"grad_norm": 1.4217667098877558, |
|
"learning_rate": 8.073907576069692e-06, |
|
"loss": 0.1615, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.5799418604651163, |
|
"grad_norm": 1.74509835604015, |
|
"learning_rate": 8.064896061470542e-06, |
|
"loss": 0.1638, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 1.412746062615197, |
|
"learning_rate": 8.055868570489247e-06, |
|
"loss": 0.1497, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"eval_loss": 0.1344260424375534, |
|
"eval_runtime": 2.2004, |
|
"eval_samples_per_second": 25.449, |
|
"eval_steps_per_second": 6.362, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5828488372093024, |
|
"grad_norm": 1.2068327544738824, |
|
"learning_rate": 8.046825150183406e-06, |
|
"loss": 0.1201, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.5843023255813954, |
|
"grad_norm": 1.6435893838564368, |
|
"learning_rate": 8.037765847693652e-06, |
|
"loss": 0.1145, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.5857558139534884, |
|
"grad_norm": 1.6608170672171527, |
|
"learning_rate": 8.028690710243407e-06, |
|
"loss": 0.1279, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.5872093023255814, |
|
"grad_norm": 1.4328859838303152, |
|
"learning_rate": 8.019599785138635e-06, |
|
"loss": 0.1373, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5886627906976745, |
|
"grad_norm": 1.4343862675807864, |
|
"learning_rate": 8.010493119767596e-06, |
|
"loss": 0.1498, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5901162790697675, |
|
"grad_norm": 1.6086081681876832, |
|
"learning_rate": 8.001370761600598e-06, |
|
"loss": 0.141, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.5915697674418605, |
|
"grad_norm": 2.1039051172285, |
|
"learning_rate": 7.992232758189756e-06, |
|
"loss": 0.1973, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.5930232558139535, |
|
"grad_norm": 1.8140948838773037, |
|
"learning_rate": 7.983079157168736e-06, |
|
"loss": 0.1748, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.5944767441860465, |
|
"grad_norm": 1.3362665282466653, |
|
"learning_rate": 7.973910006252508e-06, |
|
"loss": 0.1397, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.5959302325581395, |
|
"grad_norm": 1.5133021367871378, |
|
"learning_rate": 7.9647253532371e-06, |
|
"loss": 0.1227, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5973837209302325, |
|
"grad_norm": 1.3742813696710579, |
|
"learning_rate": 7.955525245999348e-06, |
|
"loss": 0.1292, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.5988372093023255, |
|
"grad_norm": 1.1612211072347856, |
|
"learning_rate": 7.946309732496646e-06, |
|
"loss": 0.1167, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.6002906976744186, |
|
"grad_norm": 1.2766729466996067, |
|
"learning_rate": 7.9370788607667e-06, |
|
"loss": 0.1229, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6017441860465116, |
|
"grad_norm": 1.5577927339728042, |
|
"learning_rate": 7.927832678927265e-06, |
|
"loss": 0.1267, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6031976744186046, |
|
"grad_norm": 1.252636212314083, |
|
"learning_rate": 7.918571235175914e-06, |
|
"loss": 0.1487, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6046511627906976, |
|
"grad_norm": 1.4878931355701959, |
|
"learning_rate": 7.909294577789765e-06, |
|
"loss": 0.1456, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6061046511627907, |
|
"grad_norm": 1.6750463601592944, |
|
"learning_rate": 7.900002755125249e-06, |
|
"loss": 0.1539, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6075581395348837, |
|
"grad_norm": 1.670161301173932, |
|
"learning_rate": 7.890695815617844e-06, |
|
"loss": 0.1588, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6090116279069767, |
|
"grad_norm": 1.5574686810516989, |
|
"learning_rate": 7.881373807781827e-06, |
|
"loss": 0.1598, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.6104651162790697, |
|
"grad_norm": 1.4407087883766663, |
|
"learning_rate": 7.872036780210025e-06, |
|
"loss": 0.1292, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6119186046511628, |
|
"grad_norm": 1.532394578818048, |
|
"learning_rate": 7.86268478157356e-06, |
|
"loss": 0.1417, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6133720930232558, |
|
"grad_norm": 1.558475109143411, |
|
"learning_rate": 7.853317860621586e-06, |
|
"loss": 0.1243, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.6148255813953488, |
|
"grad_norm": 1.2251102819390556, |
|
"learning_rate": 7.843936066181049e-06, |
|
"loss": 0.1218, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.6162790697674418, |
|
"grad_norm": 1.3841095970618222, |
|
"learning_rate": 7.834539447156424e-06, |
|
"loss": 0.1085, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.6177325581395349, |
|
"grad_norm": 1.3356825728851078, |
|
"learning_rate": 7.825128052529462e-06, |
|
"loss": 0.1116, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6191860465116279, |
|
"grad_norm": 1.5573547752820238, |
|
"learning_rate": 7.815701931358934e-06, |
|
"loss": 0.1388, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.6206395348837209, |
|
"grad_norm": 1.7038054695327371, |
|
"learning_rate": 7.80626113278038e-06, |
|
"loss": 0.1519, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.622093023255814, |
|
"grad_norm": 1.5094414416286532, |
|
"learning_rate": 7.796805706005843e-06, |
|
"loss": 0.1149, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.623546511627907, |
|
"grad_norm": 1.4273226525726925, |
|
"learning_rate": 7.787335700323622e-06, |
|
"loss": 0.1254, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.4966730289367747, |
|
"learning_rate": 7.777851165098012e-06, |
|
"loss": 0.156, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.626453488372093, |
|
"grad_norm": 1.6315240876401846, |
|
"learning_rate": 7.768352149769044e-06, |
|
"loss": 0.1621, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.627906976744186, |
|
"grad_norm": 1.7333824682174062, |
|
"learning_rate": 7.75883870385223e-06, |
|
"loss": 0.1376, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6293604651162791, |
|
"grad_norm": 1.602227708449946, |
|
"learning_rate": 7.749310876938306e-06, |
|
"loss": 0.1735, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.6308139534883721, |
|
"grad_norm": 1.3977532588731882, |
|
"learning_rate": 7.739768718692969e-06, |
|
"loss": 0.1289, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.6322674418604651, |
|
"grad_norm": 1.268259604423468, |
|
"learning_rate": 7.730212278856625e-06, |
|
"loss": 0.1017, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6337209302325582, |
|
"grad_norm": 1.3411672369388048, |
|
"learning_rate": 7.72064160724412e-06, |
|
"loss": 0.1362, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6351744186046512, |
|
"grad_norm": 1.2808388499819041, |
|
"learning_rate": 7.71105675374449e-06, |
|
"loss": 0.1394, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.6366279069767442, |
|
"grad_norm": 1.295520262094418, |
|
"learning_rate": 7.701457768320696e-06, |
|
"loss": 0.1442, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.6380813953488372, |
|
"grad_norm": 2.109775997490956, |
|
"learning_rate": 7.691844701009365e-06, |
|
"loss": 0.1762, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.6395348837209303, |
|
"grad_norm": 1.8500489048534585, |
|
"learning_rate": 7.682217601920529e-06, |
|
"loss": 0.1796, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6409883720930233, |
|
"grad_norm": 1.5939072801392646, |
|
"learning_rate": 7.672576521237361e-06, |
|
"loss": 0.1516, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.6424418604651163, |
|
"grad_norm": 1.6138091025138628, |
|
"learning_rate": 7.662921509215916e-06, |
|
"loss": 0.1829, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.6438953488372093, |
|
"grad_norm": 1.676099177115984, |
|
"learning_rate": 7.653252616184875e-06, |
|
"loss": 0.1237, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6453488372093024, |
|
"grad_norm": 1.2244457381485303, |
|
"learning_rate": 7.643569892545267e-06, |
|
"loss": 0.1306, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6468023255813954, |
|
"grad_norm": 1.5503434065388142, |
|
"learning_rate": 7.633873388770223e-06, |
|
"loss": 0.1432, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6482558139534884, |
|
"grad_norm": 1.3861212995292134, |
|
"learning_rate": 7.624163155404702e-06, |
|
"loss": 0.1246, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6497093023255814, |
|
"grad_norm": 1.4732536785794919, |
|
"learning_rate": 7.614439243065235e-06, |
|
"loss": 0.171, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.6511627906976745, |
|
"grad_norm": 1.9155294406879655, |
|
"learning_rate": 7.604701702439652e-06, |
|
"loss": 0.159, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6526162790697675, |
|
"grad_norm": 1.866142590003918, |
|
"learning_rate": 7.594950584286826e-06, |
|
"loss": 0.1705, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6540697674418605, |
|
"grad_norm": 1.3523787659587572, |
|
"learning_rate": 7.585185939436409e-06, |
|
"loss": 0.139, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6555232558139535, |
|
"grad_norm": 1.314104202140968, |
|
"learning_rate": 7.5754078187885586e-06, |
|
"loss": 0.1222, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6569767441860465, |
|
"grad_norm": 1.3649972766280627, |
|
"learning_rate": 7.5656162733136776e-06, |
|
"loss": 0.1429, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6584302325581395, |
|
"grad_norm": 1.3599029511484766, |
|
"learning_rate": 7.555811354052152e-06, |
|
"loss": 0.1483, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.6598837209302325, |
|
"grad_norm": 1.249236779922474, |
|
"learning_rate": 7.545993112114078e-06, |
|
"loss": 0.135, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6613372093023255, |
|
"grad_norm": 1.4068263051558898, |
|
"learning_rate": 7.536161598679002e-06, |
|
"loss": 0.14, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6627906976744186, |
|
"grad_norm": 1.873871839563441, |
|
"learning_rate": 7.526316864995648e-06, |
|
"loss": 0.1585, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6642441860465116, |
|
"grad_norm": 1.506919292093952, |
|
"learning_rate": 7.516458962381654e-06, |
|
"loss": 0.1308, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6656976744186046, |
|
"grad_norm": 1.561352936275751, |
|
"learning_rate": 7.506587942223305e-06, |
|
"loss": 0.1374, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6671511627906976, |
|
"grad_norm": 1.520294700430976, |
|
"learning_rate": 7.4967038559752626e-06, |
|
"loss": 0.1181, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6686046511627907, |
|
"grad_norm": 1.6731627584388915, |
|
"learning_rate": 7.486806755160298e-06, |
|
"loss": 0.1595, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6700581395348837, |
|
"grad_norm": 1.3704388693068905, |
|
"learning_rate": 7.476896691369023e-06, |
|
"loss": 0.1188, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6715116279069767, |
|
"grad_norm": 1.4337091782306097, |
|
"learning_rate": 7.466973716259622e-06, |
|
"loss": 0.1132, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.6729651162790697, |
|
"grad_norm": 1.3716017344315858, |
|
"learning_rate": 7.457037881557585e-06, |
|
"loss": 0.1334, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6744186046511628, |
|
"grad_norm": 1.5766750432421248, |
|
"learning_rate": 7.447089239055428e-06, |
|
"loss": 0.1143, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6758720930232558, |
|
"grad_norm": 1.5864606252067934, |
|
"learning_rate": 7.437127840612438e-06, |
|
"loss": 0.1309, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6773255813953488, |
|
"grad_norm": 1.4567542884868516, |
|
"learning_rate": 7.4271537381543916e-06, |
|
"loss": 0.147, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.6787790697674418, |
|
"grad_norm": 1.4514082699066, |
|
"learning_rate": 7.417166983673286e-06, |
|
"loss": 0.1551, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.6802325581395349, |
|
"grad_norm": 1.2445772110855982, |
|
"learning_rate": 7.407167629227072e-06, |
|
"loss": 0.1066, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.6816860465116279, |
|
"grad_norm": 1.370255809316678, |
|
"learning_rate": 7.3971557269393805e-06, |
|
"loss": 0.1377, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.6831395348837209, |
|
"grad_norm": 1.4592622873191892, |
|
"learning_rate": 7.3871313289992466e-06, |
|
"loss": 0.1314, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.684593023255814, |
|
"grad_norm": 1.6311413610361725, |
|
"learning_rate": 7.377094487660847e-06, |
|
"loss": 0.1628, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.686046511627907, |
|
"grad_norm": 1.5762097334368965, |
|
"learning_rate": 7.367045255243217e-06, |
|
"loss": 0.156, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 1.7627902314989499, |
|
"learning_rate": 7.3569836841299905e-06, |
|
"loss": 0.1315, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.688953488372093, |
|
"grad_norm": 2.222043724851836, |
|
"learning_rate": 7.346909826769107e-06, |
|
"loss": 0.1731, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.690406976744186, |
|
"grad_norm": 1.5406069442857668, |
|
"learning_rate": 7.336823735672563e-06, |
|
"loss": 0.1386, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6918604651162791, |
|
"grad_norm": 1.5796855692443843, |
|
"learning_rate": 7.326725463416118e-06, |
|
"loss": 0.15, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.6933139534883721, |
|
"grad_norm": 1.4662737935999774, |
|
"learning_rate": 7.316615062639031e-06, |
|
"loss": 0.1435, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.6947674418604651, |
|
"grad_norm": 1.2313632203692446, |
|
"learning_rate": 7.306492586043783e-06, |
|
"loss": 0.0919, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.6962209302325582, |
|
"grad_norm": 1.7509061840145403, |
|
"learning_rate": 7.296358086395803e-06, |
|
"loss": 0.1372, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 1.6433660385495745, |
|
"learning_rate": 7.286211616523193e-06, |
|
"loss": 0.131, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6991279069767442, |
|
"grad_norm": 1.8160423455057901, |
|
"learning_rate": 7.276053229316451e-06, |
|
"loss": 0.1945, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.7005813953488372, |
|
"grad_norm": 1.4727605332371438, |
|
"learning_rate": 7.265882977728195e-06, |
|
"loss": 0.1382, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.7020348837209303, |
|
"grad_norm": 1.4064135407009675, |
|
"learning_rate": 7.255700914772891e-06, |
|
"loss": 0.1266, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.7034883720930233, |
|
"grad_norm": 1.6003335768968785, |
|
"learning_rate": 7.245507093526575e-06, |
|
"loss": 0.169, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.7049418604651163, |
|
"grad_norm": 1.3092194579961982, |
|
"learning_rate": 7.2353015671265716e-06, |
|
"loss": 0.1184, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7063953488372093, |
|
"grad_norm": 1.5489983894775146, |
|
"learning_rate": 7.225084388771226e-06, |
|
"loss": 0.1524, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7078488372093024, |
|
"grad_norm": 1.4796324887099654, |
|
"learning_rate": 7.214855611719616e-06, |
|
"loss": 0.1592, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.7093023255813954, |
|
"grad_norm": 1.4347850977601093, |
|
"learning_rate": 7.204615289291283e-06, |
|
"loss": 0.1618, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7107558139534884, |
|
"grad_norm": 1.5275682896148945, |
|
"learning_rate": 7.194363474865951e-06, |
|
"loss": 0.1281, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.7122093023255814, |
|
"grad_norm": 1.785937536200274, |
|
"learning_rate": 7.184100221883248e-06, |
|
"loss": 0.1454, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7136627906976745, |
|
"grad_norm": 1.2976847404347416, |
|
"learning_rate": 7.173825583842427e-06, |
|
"loss": 0.1317, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7151162790697675, |
|
"grad_norm": 1.166513744560416, |
|
"learning_rate": 7.163539614302088e-06, |
|
"loss": 0.13, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.7165697674418605, |
|
"grad_norm": 1.3524452986795923, |
|
"learning_rate": 7.153242366879903e-06, |
|
"loss": 0.1514, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.7180232558139535, |
|
"grad_norm": 1.1994423678946693, |
|
"learning_rate": 7.142933895252324e-06, |
|
"loss": 0.1194, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.7194767441860465, |
|
"grad_norm": 1.2798330620724652, |
|
"learning_rate": 7.1326142531543184e-06, |
|
"loss": 0.1322, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.7209302325581395, |
|
"grad_norm": 1.2830117846056146, |
|
"learning_rate": 7.122283494379076e-06, |
|
"loss": 0.1368, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.7223837209302325, |
|
"grad_norm": 1.6773615252194154, |
|
"learning_rate": 7.1119416727777414e-06, |
|
"loss": 0.152, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.7238372093023255, |
|
"grad_norm": 1.4919923732243248, |
|
"learning_rate": 7.101588842259122e-06, |
|
"loss": 0.1712, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.7252906976744186, |
|
"grad_norm": 1.4335685593529308, |
|
"learning_rate": 7.09122505678941e-06, |
|
"loss": 0.1476, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.7267441860465116, |
|
"grad_norm": 1.6645358118933977, |
|
"learning_rate": 7.080850370391907e-06, |
|
"loss": 0.1275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7281976744186046, |
|
"grad_norm": 1.4570433505689948, |
|
"learning_rate": 7.0704648371467355e-06, |
|
"loss": 0.1173, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.7296511627906976, |
|
"grad_norm": 1.4500084348795894, |
|
"learning_rate": 7.060068511190559e-06, |
|
"loss": 0.1343, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.7311046511627907, |
|
"grad_norm": 1.3583549655605316, |
|
"learning_rate": 7.049661446716298e-06, |
|
"loss": 0.1392, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.7325581395348837, |
|
"grad_norm": 1.678903155981633, |
|
"learning_rate": 7.039243697972856e-06, |
|
"loss": 0.1237, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.7340116279069767, |
|
"grad_norm": 1.1813005908965784, |
|
"learning_rate": 7.028815319264825e-06, |
|
"loss": 0.1239, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.7354651162790697, |
|
"grad_norm": 1.6621537638966262, |
|
"learning_rate": 7.01837636495221e-06, |
|
"loss": 0.1373, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.7369186046511628, |
|
"grad_norm": 1.4624579516595546, |
|
"learning_rate": 7.007926889450142e-06, |
|
"loss": 0.1074, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.7383720930232558, |
|
"grad_norm": 1.6615046264229711, |
|
"learning_rate": 6.997466947228596e-06, |
|
"loss": 0.149, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.7398255813953488, |
|
"grad_norm": 1.4750586970164907, |
|
"learning_rate": 6.9869965928121095e-06, |
|
"loss": 0.11, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.7412790697674418, |
|
"grad_norm": 1.2507783508104329, |
|
"learning_rate": 6.976515880779492e-06, |
|
"loss": 0.1141, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7427325581395349, |
|
"grad_norm": 1.545121618824877, |
|
"learning_rate": 6.966024865763546e-06, |
|
"loss": 0.1291, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 1.438312213107679, |
|
"learning_rate": 6.95552360245078e-06, |
|
"loss": 0.1502, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.7456395348837209, |
|
"grad_norm": 1.6592935733066172, |
|
"learning_rate": 6.945012145581127e-06, |
|
"loss": 0.175, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.747093023255814, |
|
"grad_norm": 1.514707717226784, |
|
"learning_rate": 6.9344905499476475e-06, |
|
"loss": 0.15, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.748546511627907, |
|
"grad_norm": 1.604276122246373, |
|
"learning_rate": 6.9239588703962625e-06, |
|
"loss": 0.1572, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.3281897884109624, |
|
"learning_rate": 6.913417161825449e-06, |
|
"loss": 0.1519, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.751453488372093, |
|
"grad_norm": 1.4267995539927456, |
|
"learning_rate": 6.90286547918597e-06, |
|
"loss": 0.1335, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.752906976744186, |
|
"grad_norm": 1.5170579839155427, |
|
"learning_rate": 6.8923038774805705e-06, |
|
"loss": 0.1519, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.7543604651162791, |
|
"grad_norm": 1.3797508342143487, |
|
"learning_rate": 6.881732411763712e-06, |
|
"loss": 0.1343, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.7558139534883721, |
|
"grad_norm": 1.5104127395592026, |
|
"learning_rate": 6.871151137141266e-06, |
|
"loss": 0.138, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7572674418604651, |
|
"grad_norm": 1.3568718550092838, |
|
"learning_rate": 6.860560108770238e-06, |
|
"loss": 0.1367, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.7587209302325582, |
|
"grad_norm": 1.5091260876222339, |
|
"learning_rate": 6.849959381858475e-06, |
|
"loss": 0.1186, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.7601744186046512, |
|
"grad_norm": 1.318107897408335, |
|
"learning_rate": 6.839349011664381e-06, |
|
"loss": 0.111, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7616279069767442, |
|
"grad_norm": 1.4214672012040783, |
|
"learning_rate": 6.828729053496629e-06, |
|
"loss": 0.1174, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.7630813953488372, |
|
"grad_norm": 1.4659206181489879, |
|
"learning_rate": 6.8180995627138665e-06, |
|
"loss": 0.1492, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7645348837209303, |
|
"grad_norm": 1.7334326757131346, |
|
"learning_rate": 6.80746059472444e-06, |
|
"loss": 0.1363, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7659883720930233, |
|
"grad_norm": 1.4971209402674073, |
|
"learning_rate": 6.796812204986087e-06, |
|
"loss": 0.1465, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7674418604651163, |
|
"grad_norm": 1.4222274425753152, |
|
"learning_rate": 6.786154449005664e-06, |
|
"loss": 0.1356, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7688953488372093, |
|
"grad_norm": 1.1331281504639361, |
|
"learning_rate": 6.775487382338854e-06, |
|
"loss": 0.0901, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7703488372093024, |
|
"grad_norm": 1.4096278465211138, |
|
"learning_rate": 6.764811060589867e-06, |
|
"loss": 0.1083, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7718023255813954, |
|
"grad_norm": 1.4098544395095702, |
|
"learning_rate": 6.754125539411159e-06, |
|
"loss": 0.1217, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.7732558139534884, |
|
"grad_norm": 1.131188089335205, |
|
"learning_rate": 6.743430874503143e-06, |
|
"loss": 0.1337, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.7747093023255814, |
|
"grad_norm": 1.2696620283091575, |
|
"learning_rate": 6.732727121613894e-06, |
|
"loss": 0.097, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.7761627906976745, |
|
"grad_norm": 1.5173238831421139, |
|
"learning_rate": 6.722014336538858e-06, |
|
"loss": 0.1178, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.7776162790697675, |
|
"grad_norm": 1.393380969750145, |
|
"learning_rate": 6.7112925751205636e-06, |
|
"loss": 0.1409, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7790697674418605, |
|
"grad_norm": 1.5020699777762692, |
|
"learning_rate": 6.700561893248332e-06, |
|
"loss": 0.1635, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.7805232558139535, |
|
"grad_norm": 1.319982727969388, |
|
"learning_rate": 6.689822346857983e-06, |
|
"loss": 0.1047, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.7819767441860465, |
|
"grad_norm": 1.6061685140083173, |
|
"learning_rate": 6.679073991931544e-06, |
|
"loss": 0.1513, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.7834302325581395, |
|
"grad_norm": 1.451224509588678, |
|
"learning_rate": 6.66831688449696e-06, |
|
"loss": 0.1223, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.7848837209302325, |
|
"grad_norm": 1.3257073380514315, |
|
"learning_rate": 6.657551080627801e-06, |
|
"loss": 0.147, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7863372093023255, |
|
"grad_norm": 1.412854606695532, |
|
"learning_rate": 6.646776636442964e-06, |
|
"loss": 0.1427, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.7877906976744186, |
|
"grad_norm": 1.2307963654566814, |
|
"learning_rate": 6.63599360810639e-06, |
|
"loss": 0.1149, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.7892441860465116, |
|
"grad_norm": 1.2378486319546478, |
|
"learning_rate": 6.6252020518267664e-06, |
|
"loss": 0.145, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.7906976744186046, |
|
"grad_norm": 1.4375410232495538, |
|
"learning_rate": 6.614402023857231e-06, |
|
"loss": 0.1458, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.7921511627906976, |
|
"grad_norm": 1.7682907013920814, |
|
"learning_rate": 6.603593580495088e-06, |
|
"loss": 0.1492, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.7936046511627907, |
|
"grad_norm": 1.5849164073586606, |
|
"learning_rate": 6.5927767780815e-06, |
|
"loss": 0.1244, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.7950581395348837, |
|
"grad_norm": 1.4013284382226423, |
|
"learning_rate": 6.581951673001212e-06, |
|
"loss": 0.1575, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.7965116279069767, |
|
"grad_norm": 1.6703536565707453, |
|
"learning_rate": 6.5711183216822405e-06, |
|
"loss": 0.1345, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.7979651162790697, |
|
"grad_norm": 1.5575391293614544, |
|
"learning_rate": 6.56027678059559e-06, |
|
"loss": 0.1424, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.7994186046511628, |
|
"grad_norm": 1.1506378616299937, |
|
"learning_rate": 6.549427106254959e-06, |
|
"loss": 0.1204, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8008720930232558, |
|
"grad_norm": 1.6831420345046888, |
|
"learning_rate": 6.5385693552164375e-06, |
|
"loss": 0.1533, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.8023255813953488, |
|
"grad_norm": 1.5433557869849786, |
|
"learning_rate": 6.527703584078219e-06, |
|
"loss": 0.1287, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.8037790697674418, |
|
"grad_norm": 1.37938065902107, |
|
"learning_rate": 6.516829849480304e-06, |
|
"loss": 0.1466, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.8052325581395349, |
|
"grad_norm": 1.4138166190724506, |
|
"learning_rate": 6.505948208104202e-06, |
|
"loss": 0.1336, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.8066860465116279, |
|
"grad_norm": 1.2958346006809633, |
|
"learning_rate": 6.495058716672641e-06, |
|
"loss": 0.1155, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8081395348837209, |
|
"grad_norm": 1.536894893444166, |
|
"learning_rate": 6.4841614319492665e-06, |
|
"loss": 0.1467, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.809593023255814, |
|
"grad_norm": 1.42823984914434, |
|
"learning_rate": 6.473256410738349e-06, |
|
"loss": 0.1245, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.811046511627907, |
|
"grad_norm": 1.738051363991152, |
|
"learning_rate": 6.462343709884488e-06, |
|
"loss": 0.1431, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 1.199922342897142, |
|
"learning_rate": 6.451423386272312e-06, |
|
"loss": 0.1328, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.813953488372093, |
|
"grad_norm": 1.8691018081903936, |
|
"learning_rate": 6.440495496826189e-06, |
|
"loss": 0.1695, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.815406976744186, |
|
"grad_norm": 1.431507205382704, |
|
"learning_rate": 6.429560098509919e-06, |
|
"loss": 0.1519, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8168604651162791, |
|
"grad_norm": 1.764161344903358, |
|
"learning_rate": 6.4186172483264505e-06, |
|
"loss": 0.1503, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.8183139534883721, |
|
"grad_norm": 1.3488150562790815, |
|
"learning_rate": 6.4076670033175725e-06, |
|
"loss": 0.1419, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.8197674418604651, |
|
"grad_norm": 1.6253830612966447, |
|
"learning_rate": 6.396709420563621e-06, |
|
"loss": 0.1779, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.8212209302325582, |
|
"grad_norm": 1.628949856290011, |
|
"learning_rate": 6.385744557183181e-06, |
|
"loss": 0.1325, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.8226744186046512, |
|
"grad_norm": 2.1235072067385237, |
|
"learning_rate": 6.374772470332793e-06, |
|
"loss": 0.1833, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.8241279069767442, |
|
"grad_norm": 1.9172827402856294, |
|
"learning_rate": 6.363793217206645e-06, |
|
"loss": 0.1903, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.8255813953488372, |
|
"grad_norm": 1.4322727783843214, |
|
"learning_rate": 6.352806855036287e-06, |
|
"loss": 0.1271, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.8270348837209303, |
|
"grad_norm": 1.2966960086456427, |
|
"learning_rate": 6.341813441090323e-06, |
|
"loss": 0.146, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.8284883720930233, |
|
"grad_norm": 1.550482403879787, |
|
"learning_rate": 6.330813032674116e-06, |
|
"loss": 0.1432, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8299418604651163, |
|
"grad_norm": 1.3655414999390942, |
|
"learning_rate": 6.3198056871294885e-06, |
|
"loss": 0.109, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.8313953488372093, |
|
"grad_norm": 1.4217140051202517, |
|
"learning_rate": 6.308791461834427e-06, |
|
"loss": 0.1318, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.8328488372093024, |
|
"grad_norm": 1.7714886028240058, |
|
"learning_rate": 6.297770414202778e-06, |
|
"loss": 0.1777, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.8343023255813954, |
|
"grad_norm": 1.4570998125079995, |
|
"learning_rate": 6.286742601683947e-06, |
|
"loss": 0.1237, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.8357558139534884, |
|
"grad_norm": 1.4949515901539128, |
|
"learning_rate": 6.275708081762611e-06, |
|
"loss": 0.131, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8372093023255814, |
|
"grad_norm": 1.3854089542275734, |
|
"learning_rate": 6.264666911958404e-06, |
|
"loss": 0.1125, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.8386627906976745, |
|
"grad_norm": 1.5479190064330035, |
|
"learning_rate": 6.253619149825627e-06, |
|
"loss": 0.1402, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.8401162790697675, |
|
"grad_norm": 1.4026025728759723, |
|
"learning_rate": 6.242564852952946e-06, |
|
"loss": 0.1209, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.8415697674418605, |
|
"grad_norm": 1.4121319145908862, |
|
"learning_rate": 6.231504078963087e-06, |
|
"loss": 0.138, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8430232558139535, |
|
"grad_norm": 2.209836743441391, |
|
"learning_rate": 6.220436885512539e-06, |
|
"loss": 0.1362, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8444767441860465, |
|
"grad_norm": 1.585787336115818, |
|
"learning_rate": 6.209363330291261e-06, |
|
"loss": 0.201, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.8459302325581395, |
|
"grad_norm": 1.9740958700357052, |
|
"learning_rate": 6.198283471022362e-06, |
|
"loss": 0.1469, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.8473837209302325, |
|
"grad_norm": 1.4412498217025638, |
|
"learning_rate": 6.187197365461822e-06, |
|
"loss": 0.1708, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.8488372093023255, |
|
"grad_norm": 1.4582305731121776, |
|
"learning_rate": 6.1761050713981795e-06, |
|
"loss": 0.1266, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.8502906976744186, |
|
"grad_norm": 1.450107224126935, |
|
"learning_rate": 6.165006646652227e-06, |
|
"loss": 0.1561, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.8517441860465116, |
|
"grad_norm": 2.004083462986721, |
|
"learning_rate": 6.1539021490767206e-06, |
|
"loss": 0.2133, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.8531976744186046, |
|
"grad_norm": 1.6247079616074993, |
|
"learning_rate": 6.1427916365560666e-06, |
|
"loss": 0.1742, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.8546511627906976, |
|
"grad_norm": 1.552175859788209, |
|
"learning_rate": 6.1316751670060295e-06, |
|
"loss": 0.136, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.8561046511627907, |
|
"grad_norm": 1.276124479823587, |
|
"learning_rate": 6.120552798373423e-06, |
|
"loss": 0.1694, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.8575581395348837, |
|
"grad_norm": 1.5410147115702781, |
|
"learning_rate": 6.109424588635814e-06, |
|
"loss": 0.105, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8590116279069767, |
|
"grad_norm": 1.2952469635559278, |
|
"learning_rate": 6.098290595801215e-06, |
|
"loss": 0.1179, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.8604651162790697, |
|
"grad_norm": 1.1306177176882681, |
|
"learning_rate": 6.087150877907786e-06, |
|
"loss": 0.081, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.8619186046511628, |
|
"grad_norm": 1.7371127866399059, |
|
"learning_rate": 6.076005493023527e-06, |
|
"loss": 0.1893, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.8633720930232558, |
|
"grad_norm": 1.4638570381751588, |
|
"learning_rate": 6.0648544992459804e-06, |
|
"loss": 0.1572, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.8648255813953488, |
|
"grad_norm": 1.6634088466354064, |
|
"learning_rate": 6.053697954701927e-06, |
|
"loss": 0.1517, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8662790697674418, |
|
"grad_norm": 1.620767793264197, |
|
"learning_rate": 6.04253591754708e-06, |
|
"loss": 0.1678, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.8677325581395349, |
|
"grad_norm": 1.6119437429007382, |
|
"learning_rate": 6.031368445965784e-06, |
|
"loss": 0.159, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.8691860465116279, |
|
"grad_norm": 1.78819833988465, |
|
"learning_rate": 6.0201955981707135e-06, |
|
"loss": 0.1993, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.8706395348837209, |
|
"grad_norm": 1.95544535687204, |
|
"learning_rate": 6.009017432402569e-06, |
|
"loss": 0.1759, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"grad_norm": 1.5590305502883546, |
|
"learning_rate": 5.997834006929765e-06, |
|
"loss": 0.1469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"eval_loss": 0.1277909129858017, |
|
"eval_runtime": 2.208, |
|
"eval_samples_per_second": 25.362, |
|
"eval_steps_per_second": 6.341, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.873546511627907, |
|
"grad_norm": 1.3164705679840971, |
|
"learning_rate": 5.98664538004814e-06, |
|
"loss": 0.1003, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 1.2239480234296696, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 0.1121, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.876453488372093, |
|
"grad_norm": 1.894849529945871, |
|
"learning_rate": 5.964252755377033e-06, |
|
"loss": 0.1654, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.877906976744186, |
|
"grad_norm": 1.2492208085247638, |
|
"learning_rate": 5.953048874313575e-06, |
|
"loss": 0.1122, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.8793604651162791, |
|
"grad_norm": 1.8022455407233433, |
|
"learning_rate": 5.941840025292733e-06, |
|
"loss": 0.1257, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8808139534883721, |
|
"grad_norm": 1.3177318316240192, |
|
"learning_rate": 5.930626266742871e-06, |
|
"loss": 0.1383, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.8822674418604651, |
|
"grad_norm": 1.140730369983588, |
|
"learning_rate": 5.9194076571179415e-06, |
|
"loss": 0.116, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.8837209302325582, |
|
"grad_norm": 1.3723451817269945, |
|
"learning_rate": 5.908184254897183e-06, |
|
"loss": 0.1549, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.8851744186046512, |
|
"grad_norm": 1.5626353100716068, |
|
"learning_rate": 5.89695611858482e-06, |
|
"loss": 0.1442, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.8866279069767442, |
|
"grad_norm": 1.8839870797491676, |
|
"learning_rate": 5.885723306709754e-06, |
|
"loss": 0.1286, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8880813953488372, |
|
"grad_norm": 1.082745017222708, |
|
"learning_rate": 5.8744858778252555e-06, |
|
"loss": 0.1223, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.8895348837209303, |
|
"grad_norm": 1.4674626548779213, |
|
"learning_rate": 5.8632438905086685e-06, |
|
"loss": 0.1456, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.8909883720930233, |
|
"grad_norm": 1.358671032886957, |
|
"learning_rate": 5.851997403361089e-06, |
|
"loss": 0.1299, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.8924418604651163, |
|
"grad_norm": 1.296241900835058, |
|
"learning_rate": 5.840746475007079e-06, |
|
"loss": 0.1419, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.8938953488372093, |
|
"grad_norm": 1.3842657938075646, |
|
"learning_rate": 5.8294911640943455e-06, |
|
"loss": 0.1215, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.8953488372093024, |
|
"grad_norm": 1.3467341878188124, |
|
"learning_rate": 5.818231529293441e-06, |
|
"loss": 0.1123, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.8968023255813954, |
|
"grad_norm": 1.4251072382018615, |
|
"learning_rate": 5.80696762929746e-06, |
|
"loss": 0.1423, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.8982558139534884, |
|
"grad_norm": 1.5077208770452775, |
|
"learning_rate": 5.795699522821727e-06, |
|
"loss": 0.1728, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.8997093023255814, |
|
"grad_norm": 1.248864820858094, |
|
"learning_rate": 5.784427268603498e-06, |
|
"loss": 0.1195, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.9011627906976745, |
|
"grad_norm": 1.4092688361917134, |
|
"learning_rate": 5.773150925401642e-06, |
|
"loss": 0.1266, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9026162790697675, |
|
"grad_norm": 1.688504971336848, |
|
"learning_rate": 5.761870551996349e-06, |
|
"loss": 0.1379, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.9040697674418605, |
|
"grad_norm": 1.2541754034602337, |
|
"learning_rate": 5.750586207188817e-06, |
|
"loss": 0.0917, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.9055232558139535, |
|
"grad_norm": 1.2487120647276626, |
|
"learning_rate": 5.7392979498009445e-06, |
|
"loss": 0.1241, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.9069767441860465, |
|
"grad_norm": 1.445654790451785, |
|
"learning_rate": 5.728005838675026e-06, |
|
"loss": 0.1456, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.9084302325581395, |
|
"grad_norm": 1.7499610557683432, |
|
"learning_rate": 5.7167099326734385e-06, |
|
"loss": 0.1405, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9098837209302325, |
|
"grad_norm": 1.648183080801681, |
|
"learning_rate": 5.7054102906783526e-06, |
|
"loss": 0.1822, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.9113372093023255, |
|
"grad_norm": 1.6571510451841074, |
|
"learning_rate": 5.6941069715914e-06, |
|
"loss": 0.1181, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.9127906976744186, |
|
"grad_norm": 1.944131860744343, |
|
"learning_rate": 5.6828000343333904e-06, |
|
"loss": 0.1719, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.9142441860465116, |
|
"grad_norm": 1.7309656879851627, |
|
"learning_rate": 5.671489537843987e-06, |
|
"loss": 0.1689, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.9156976744186046, |
|
"grad_norm": 1.7575196308441996, |
|
"learning_rate": 5.660175541081411e-06, |
|
"loss": 0.182, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9171511627906976, |
|
"grad_norm": 1.277009648528385, |
|
"learning_rate": 5.648858103022128e-06, |
|
"loss": 0.1516, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.9186046511627907, |
|
"grad_norm": 1.340132227949514, |
|
"learning_rate": 5.63753728266054e-06, |
|
"loss": 0.1176, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.9200581395348837, |
|
"grad_norm": 1.7630027193552753, |
|
"learning_rate": 5.626213139008684e-06, |
|
"loss": 0.1677, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.9215116279069767, |
|
"grad_norm": 1.5111942011086947, |
|
"learning_rate": 5.614885731095915e-06, |
|
"loss": 0.1763, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.9229651162790697, |
|
"grad_norm": 1.3355309378554394, |
|
"learning_rate": 5.603555117968607e-06, |
|
"loss": 0.1073, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.9244186046511628, |
|
"grad_norm": 1.404078498056489, |
|
"learning_rate": 5.592221358689843e-06, |
|
"loss": 0.1249, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.9258720930232558, |
|
"grad_norm": 1.4270282877283114, |
|
"learning_rate": 5.580884512339103e-06, |
|
"loss": 0.1035, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.9273255813953488, |
|
"grad_norm": 1.5176928637053217, |
|
"learning_rate": 5.56954463801196e-06, |
|
"loss": 0.1373, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.9287790697674418, |
|
"grad_norm": 1.3456243259305802, |
|
"learning_rate": 5.558201794819773e-06, |
|
"loss": 0.1868, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 1.5593018977581026, |
|
"learning_rate": 5.546856041889374e-06, |
|
"loss": 0.1429, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9316860465116279, |
|
"grad_norm": 1.6976998517010211, |
|
"learning_rate": 5.53550743836276e-06, |
|
"loss": 0.1523, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.9331395348837209, |
|
"grad_norm": 1.3798256912079117, |
|
"learning_rate": 5.524156043396796e-06, |
|
"loss": 0.1179, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.934593023255814, |
|
"grad_norm": 1.4294475115030438, |
|
"learning_rate": 5.512801916162891e-06, |
|
"loss": 0.1309, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.936046511627907, |
|
"grad_norm": 2.7198043975539137, |
|
"learning_rate": 5.501445115846697e-06, |
|
"loss": 0.207, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 1.4833934130748365, |
|
"learning_rate": 5.490085701647805e-06, |
|
"loss": 0.1355, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.938953488372093, |
|
"grad_norm": 1.3355964013808177, |
|
"learning_rate": 5.478723732779422e-06, |
|
"loss": 0.1374, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.940406976744186, |
|
"grad_norm": 1.7207500799533988, |
|
"learning_rate": 5.467359268468081e-06, |
|
"loss": 0.1605, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.9418604651162791, |
|
"grad_norm": 1.3090162322795527, |
|
"learning_rate": 5.455992367953318e-06, |
|
"loss": 0.1445, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.9433139534883721, |
|
"grad_norm": 1.2788214859695954, |
|
"learning_rate": 5.444623090487371e-06, |
|
"loss": 0.1047, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.9447674418604651, |
|
"grad_norm": 2.0071938164433365, |
|
"learning_rate": 5.433251495334864e-06, |
|
"loss": 0.1546, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9462209302325582, |
|
"grad_norm": 1.3782058820605825, |
|
"learning_rate": 5.4218776417725095e-06, |
|
"loss": 0.136, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.9476744186046512, |
|
"grad_norm": 1.281462748132549, |
|
"learning_rate": 5.410501589088786e-06, |
|
"loss": 0.1035, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.9491279069767442, |
|
"grad_norm": 1.9048686906108325, |
|
"learning_rate": 5.3991233965836365e-06, |
|
"loss": 0.1683, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.9505813953488372, |
|
"grad_norm": 1.6316273841656876, |
|
"learning_rate": 5.387743123568161e-06, |
|
"loss": 0.1446, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.9520348837209303, |
|
"grad_norm": 1.4861937009380384, |
|
"learning_rate": 5.376360829364301e-06, |
|
"loss": 0.1527, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.9534883720930233, |
|
"grad_norm": 1.8814574763895897, |
|
"learning_rate": 5.364976573304538e-06, |
|
"loss": 0.1614, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.9549418604651163, |
|
"grad_norm": 1.4646624685889762, |
|
"learning_rate": 5.3535904147315765e-06, |
|
"loss": 0.1543, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.9563953488372093, |
|
"grad_norm": 1.287864663906961, |
|
"learning_rate": 5.34220241299804e-06, |
|
"loss": 0.1279, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.9578488372093024, |
|
"grad_norm": 1.7385545460984706, |
|
"learning_rate": 5.330812627466159e-06, |
|
"loss": 0.1639, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.9593023255813954, |
|
"grad_norm": 1.7081543020679955, |
|
"learning_rate": 5.319421117507461e-06, |
|
"loss": 0.1597, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9607558139534884, |
|
"grad_norm": 1.3995536288669932, |
|
"learning_rate": 5.308027942502467e-06, |
|
"loss": 0.1103, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.9622093023255814, |
|
"grad_norm": 1.2943413923835272, |
|
"learning_rate": 5.296633161840374e-06, |
|
"loss": 0.159, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.9636627906976745, |
|
"grad_norm": 1.5010261384648311, |
|
"learning_rate": 5.285236834918749e-06, |
|
"loss": 0.1101, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.9651162790697675, |
|
"grad_norm": 1.59839024793064, |
|
"learning_rate": 5.273839021143217e-06, |
|
"loss": 0.1438, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.9665697674418605, |
|
"grad_norm": 1.3887266849885156, |
|
"learning_rate": 5.262439779927163e-06, |
|
"loss": 0.1336, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.9680232558139535, |
|
"grad_norm": 1.2605940576749957, |
|
"learning_rate": 5.251039170691399e-06, |
|
"loss": 0.1168, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.9694767441860465, |
|
"grad_norm": 1.4056700350437759, |
|
"learning_rate": 5.2396372528638785e-06, |
|
"loss": 0.1501, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.9709302325581395, |
|
"grad_norm": 1.4534488094786742, |
|
"learning_rate": 5.22823408587937e-06, |
|
"loss": 0.1336, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.9723837209302325, |
|
"grad_norm": 1.336814955827563, |
|
"learning_rate": 5.216829729179158e-06, |
|
"loss": 0.1126, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.9738372093023255, |
|
"grad_norm": 1.5142125626046417, |
|
"learning_rate": 5.205424242210727e-06, |
|
"loss": 0.1096, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9752906976744186, |
|
"grad_norm": 1.4755522779468442, |
|
"learning_rate": 5.194017684427453e-06, |
|
"loss": 0.1087, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.9767441860465116, |
|
"grad_norm": 1.5661924416341968, |
|
"learning_rate": 5.182610115288296e-06, |
|
"loss": 0.1274, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.9781976744186046, |
|
"grad_norm": 1.9124058904385859, |
|
"learning_rate": 5.171201594257481e-06, |
|
"loss": 0.1314, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.9796511627906976, |
|
"grad_norm": 1.504304863764823, |
|
"learning_rate": 5.159792180804204e-06, |
|
"loss": 0.1759, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.9811046511627907, |
|
"grad_norm": 1.6754651562035756, |
|
"learning_rate": 5.148381934402306e-06, |
|
"loss": 0.1911, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9825581395348837, |
|
"grad_norm": 1.4176711979057277, |
|
"learning_rate": 5.136970914529975e-06, |
|
"loss": 0.1305, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.9840116279069767, |
|
"grad_norm": 1.5962091642432297, |
|
"learning_rate": 5.125559180669427e-06, |
|
"loss": 0.1385, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.9854651162790697, |
|
"grad_norm": 1.223911365889511, |
|
"learning_rate": 5.1141467923066016e-06, |
|
"loss": 0.1169, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.9869186046511628, |
|
"grad_norm": 1.5617758354090914, |
|
"learning_rate": 5.102733808930851e-06, |
|
"loss": 0.1172, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.9883720930232558, |
|
"grad_norm": 1.6974851747495288, |
|
"learning_rate": 5.0913202900346246e-06, |
|
"loss": 0.13, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9898255813953488, |
|
"grad_norm": 1.631411370867565, |
|
"learning_rate": 5.07990629511317e-06, |
|
"loss": 0.1454, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.9912790697674418, |
|
"grad_norm": 1.5300412665655438, |
|
"learning_rate": 5.068491883664212e-06, |
|
"loss": 0.109, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.9927325581395349, |
|
"grad_norm": 1.442218618017608, |
|
"learning_rate": 5.057077115187645e-06, |
|
"loss": 0.1429, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.9941860465116279, |
|
"grad_norm": 1.347588478220433, |
|
"learning_rate": 5.04566204918523e-06, |
|
"loss": 0.1255, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.9956395348837209, |
|
"grad_norm": 1.605701847815844, |
|
"learning_rate": 5.034246745160275e-06, |
|
"loss": 0.1533, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.997093023255814, |
|
"grad_norm": 1.4017801022069876, |
|
"learning_rate": 5.022831262617328e-06, |
|
"loss": 0.1291, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.998546511627907, |
|
"grad_norm": 1.5723087938578602, |
|
"learning_rate": 5.011415661061869e-06, |
|
"loss": 0.106, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.160944506207306, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0753, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.001453488372093, |
|
"grad_norm": 0.9804794119294494, |
|
"learning_rate": 4.988584338938133e-06, |
|
"loss": 0.0865, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.002906976744186, |
|
"grad_norm": 0.8710757605721173, |
|
"learning_rate": 4.977168737382674e-06, |
|
"loss": 0.0666, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.004360465116279, |
|
"grad_norm": 1.195545295432579, |
|
"learning_rate": 4.965753254839727e-06, |
|
"loss": 0.0836, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.005813953488372, |
|
"grad_norm": 1.2127630890866723, |
|
"learning_rate": 4.954337950814771e-06, |
|
"loss": 0.0551, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.007267441860465, |
|
"grad_norm": 1.5404241417212168, |
|
"learning_rate": 4.942922884812357e-06, |
|
"loss": 0.1009, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.0087209302325582, |
|
"grad_norm": 1.1556879357489478, |
|
"learning_rate": 4.9315081163357905e-06, |
|
"loss": 0.0675, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.010174418604651, |
|
"grad_norm": 1.1774111310837216, |
|
"learning_rate": 4.920093704886832e-06, |
|
"loss": 0.0639, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.0116279069767442, |
|
"grad_norm": 1.1528720659312301, |
|
"learning_rate": 4.908679709965376e-06, |
|
"loss": 0.0548, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.0130813953488371, |
|
"grad_norm": 1.1786682354719682, |
|
"learning_rate": 4.897266191069152e-06, |
|
"loss": 0.0793, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.0145348837209303, |
|
"grad_norm": 1.1681775175087292, |
|
"learning_rate": 4.8858532076934e-06, |
|
"loss": 0.0621, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.0159883720930232, |
|
"grad_norm": 1.396149038821934, |
|
"learning_rate": 4.874440819330576e-06, |
|
"loss": 0.0595, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.0174418604651163, |
|
"grad_norm": 1.0444832188691113, |
|
"learning_rate": 4.8630290854700264e-06, |
|
"loss": 0.06, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0188953488372092, |
|
"grad_norm": 1.3215192237780464, |
|
"learning_rate": 4.851618065597696e-06, |
|
"loss": 0.0594, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.0203488372093024, |
|
"grad_norm": 1.3022953280724392, |
|
"learning_rate": 4.840207819195797e-06, |
|
"loss": 0.0582, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.0218023255813953, |
|
"grad_norm": 1.2305716639772417, |
|
"learning_rate": 4.82879840574252e-06, |
|
"loss": 0.0626, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.0232558139534884, |
|
"grad_norm": 1.612550300365213, |
|
"learning_rate": 4.817389884711706e-06, |
|
"loss": 0.0739, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.0247093023255813, |
|
"grad_norm": 1.2261189219475501, |
|
"learning_rate": 4.805982315572547e-06, |
|
"loss": 0.0842, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.0261627906976745, |
|
"grad_norm": 1.5497272626438672, |
|
"learning_rate": 4.794575757789274e-06, |
|
"loss": 0.0808, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.0276162790697674, |
|
"grad_norm": 1.5930387295456183, |
|
"learning_rate": 4.7831702708208445e-06, |
|
"loss": 0.0617, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.0290697674418605, |
|
"grad_norm": 1.3103541839768624, |
|
"learning_rate": 4.7717659141206315e-06, |
|
"loss": 0.0728, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.0305232558139534, |
|
"grad_norm": 1.2251435492506793, |
|
"learning_rate": 4.760362747136125e-06, |
|
"loss": 0.0707, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.0319767441860466, |
|
"grad_norm": 1.3796327304677727, |
|
"learning_rate": 4.748960829308601e-06, |
|
"loss": 0.0681, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.0334302325581395, |
|
"grad_norm": 1.1725032499441765, |
|
"learning_rate": 4.737560220072839e-06, |
|
"loss": 0.0565, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.0348837209302326, |
|
"grad_norm": 1.245077983095157, |
|
"learning_rate": 4.726160978856782e-06, |
|
"loss": 0.0528, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.0363372093023255, |
|
"grad_norm": 1.1988572535104782, |
|
"learning_rate": 4.714763165081253e-06, |
|
"loss": 0.0677, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.0377906976744187, |
|
"grad_norm": 1.4960786476723407, |
|
"learning_rate": 4.703366838159627e-06, |
|
"loss": 0.0639, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.0392441860465116, |
|
"grad_norm": 1.4091361813904233, |
|
"learning_rate": 4.691972057497534e-06, |
|
"loss": 0.0602, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.0406976744186047, |
|
"grad_norm": 1.5037404883411951, |
|
"learning_rate": 4.6805788824925395e-06, |
|
"loss": 0.0705, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.0421511627906976, |
|
"grad_norm": 1.2607654648800852, |
|
"learning_rate": 4.669187372533843e-06, |
|
"loss": 0.053, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.0436046511627908, |
|
"grad_norm": 1.6239843045120586, |
|
"learning_rate": 4.657797587001961e-06, |
|
"loss": 0.0704, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.0450581395348837, |
|
"grad_norm": 1.5045378575805983, |
|
"learning_rate": 4.646409585268425e-06, |
|
"loss": 0.0606, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.0465116279069768, |
|
"grad_norm": 1.291286419079524, |
|
"learning_rate": 4.635023426695462e-06, |
|
"loss": 0.0644, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0479651162790697, |
|
"grad_norm": 1.370959757593235, |
|
"learning_rate": 4.6236391706357e-06, |
|
"loss": 0.0704, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.0494186046511629, |
|
"grad_norm": 1.5664627647333818, |
|
"learning_rate": 4.612256876431839e-06, |
|
"loss": 0.0498, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.0508720930232558, |
|
"grad_norm": 1.2766329620982255, |
|
"learning_rate": 4.600876603416364e-06, |
|
"loss": 0.0473, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.052325581395349, |
|
"grad_norm": 1.5959190709847693, |
|
"learning_rate": 4.589498410911215e-06, |
|
"loss": 0.083, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.0537790697674418, |
|
"grad_norm": 1.2654889143492782, |
|
"learning_rate": 4.578122358227492e-06, |
|
"loss": 0.061, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.055232558139535, |
|
"grad_norm": 1.232952014205962, |
|
"learning_rate": 4.566748504665136e-06, |
|
"loss": 0.0601, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.056686046511628, |
|
"grad_norm": 1.3155701382257572, |
|
"learning_rate": 4.555376909512631e-06, |
|
"loss": 0.0537, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.058139534883721, |
|
"grad_norm": 1.6520885025976517, |
|
"learning_rate": 4.544007632046682e-06, |
|
"loss": 0.0798, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.059593023255814, |
|
"grad_norm": 1.2269767633790387, |
|
"learning_rate": 4.532640731531921e-06, |
|
"loss": 0.0454, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.0610465116279069, |
|
"grad_norm": 1.6055062515206473, |
|
"learning_rate": 4.52127626722058e-06, |
|
"loss": 0.1011, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 1.5283525172896468, |
|
"learning_rate": 4.509914298352197e-06, |
|
"loss": 0.0507, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.0639534883720931, |
|
"grad_norm": 1.4633436434507294, |
|
"learning_rate": 4.4985548841533035e-06, |
|
"loss": 0.0948, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.065406976744186, |
|
"grad_norm": 1.3019061971094648, |
|
"learning_rate": 4.487198083837111e-06, |
|
"loss": 0.0631, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.066860465116279, |
|
"grad_norm": 1.3266926997772812, |
|
"learning_rate": 4.475843956603205e-06, |
|
"loss": 0.0604, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.068313953488372, |
|
"grad_norm": 1.3235367351231275, |
|
"learning_rate": 4.4644925616372405e-06, |
|
"loss": 0.0605, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.069767441860465, |
|
"grad_norm": 1.295361077164503, |
|
"learning_rate": 4.4531439581106295e-06, |
|
"loss": 0.06, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.0712209302325582, |
|
"grad_norm": 1.6139195727409428, |
|
"learning_rate": 4.441798205180228e-06, |
|
"loss": 0.0797, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.072674418604651, |
|
"grad_norm": 1.2563569911360324, |
|
"learning_rate": 4.430455361988041e-06, |
|
"loss": 0.0705, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.0741279069767442, |
|
"grad_norm": 1.2037437189059568, |
|
"learning_rate": 4.419115487660899e-06, |
|
"loss": 0.06, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.0755813953488371, |
|
"grad_norm": 1.2888284666571121, |
|
"learning_rate": 4.40777864131016e-06, |
|
"loss": 0.0574, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0770348837209303, |
|
"grad_norm": 1.4666943653511182, |
|
"learning_rate": 4.396444882031394e-06, |
|
"loss": 0.0748, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.0784883720930232, |
|
"grad_norm": 1.1651868897402284, |
|
"learning_rate": 4.3851142689040885e-06, |
|
"loss": 0.0624, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.0799418604651163, |
|
"grad_norm": 1.6627210680829636, |
|
"learning_rate": 4.373786860991318e-06, |
|
"loss": 0.0521, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.0813953488372092, |
|
"grad_norm": 1.7854261575212669, |
|
"learning_rate": 4.3624627173394615e-06, |
|
"loss": 0.0956, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.0828488372093024, |
|
"grad_norm": 1.281448055061643, |
|
"learning_rate": 4.351141896977874e-06, |
|
"loss": 0.0618, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.0843023255813953, |
|
"grad_norm": 1.9115172580935775, |
|
"learning_rate": 4.339824458918592e-06, |
|
"loss": 0.0778, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.0857558139534884, |
|
"grad_norm": 1.1325100660387584, |
|
"learning_rate": 4.328510462156015e-06, |
|
"loss": 0.0447, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.0872093023255813, |
|
"grad_norm": 1.2777657933057078, |
|
"learning_rate": 4.317199965666613e-06, |
|
"loss": 0.0757, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.0886627906976745, |
|
"grad_norm": 1.3785018962846736, |
|
"learning_rate": 4.305893028408601e-06, |
|
"loss": 0.0654, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.0901162790697674, |
|
"grad_norm": 1.2206761011457805, |
|
"learning_rate": 4.294589709321651e-06, |
|
"loss": 0.0724, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0915697674418605, |
|
"grad_norm": 1.4018327959477974, |
|
"learning_rate": 4.283290067326562e-06, |
|
"loss": 0.0904, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.0930232558139534, |
|
"grad_norm": 1.5155226786120202, |
|
"learning_rate": 4.271994161324977e-06, |
|
"loss": 0.0475, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.0944767441860466, |
|
"grad_norm": 1.5020821031236455, |
|
"learning_rate": 4.260702050199056e-06, |
|
"loss": 0.0627, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.0959302325581395, |
|
"grad_norm": 1.1544970293337866, |
|
"learning_rate": 4.2494137928111835e-06, |
|
"loss": 0.0731, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.0973837209302326, |
|
"grad_norm": 1.156709990172585, |
|
"learning_rate": 4.238129448003651e-06, |
|
"loss": 0.0496, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.0988372093023255, |
|
"grad_norm": 1.0821603719730972, |
|
"learning_rate": 4.22684907459836e-06, |
|
"loss": 0.0619, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.1002906976744187, |
|
"grad_norm": 1.1819763770544474, |
|
"learning_rate": 4.215572731396504e-06, |
|
"loss": 0.0582, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.1017441860465116, |
|
"grad_norm": 1.3810737668324913, |
|
"learning_rate": 4.204300477178274e-06, |
|
"loss": 0.0484, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.1031976744186047, |
|
"grad_norm": 1.6953078691365866, |
|
"learning_rate": 4.19303237070254e-06, |
|
"loss": 0.0856, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.1046511627906976, |
|
"grad_norm": 1.2505887912927889, |
|
"learning_rate": 4.181768470706561e-06, |
|
"loss": 0.0498, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1061046511627908, |
|
"grad_norm": 1.3867145509235537, |
|
"learning_rate": 4.170508835905655e-06, |
|
"loss": 0.0529, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.1075581395348837, |
|
"grad_norm": 1.731884519992483, |
|
"learning_rate": 4.159253524992922e-06, |
|
"loss": 0.0649, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.1090116279069768, |
|
"grad_norm": 1.6560829967990272, |
|
"learning_rate": 4.148002596638911e-06, |
|
"loss": 0.0575, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.1104651162790697, |
|
"grad_norm": 1.95162595456697, |
|
"learning_rate": 4.136756109491333e-06, |
|
"loss": 0.081, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.1119186046511629, |
|
"grad_norm": 1.1885528527482156, |
|
"learning_rate": 4.1255141221747445e-06, |
|
"loss": 0.0531, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.1133720930232558, |
|
"grad_norm": 1.5782587513465858, |
|
"learning_rate": 4.1142766932902475e-06, |
|
"loss": 0.0698, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.114825581395349, |
|
"grad_norm": 1.7115336320491816, |
|
"learning_rate": 4.103043881415181e-06, |
|
"loss": 0.0743, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.1162790697674418, |
|
"grad_norm": 1.3705963951525217, |
|
"learning_rate": 4.091815745102818e-06, |
|
"loss": 0.0634, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.117732558139535, |
|
"grad_norm": 1.2666345308767393, |
|
"learning_rate": 4.080592342882059e-06, |
|
"loss": 0.0557, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.119186046511628, |
|
"grad_norm": 1.466660608860313, |
|
"learning_rate": 4.069373733257129e-06, |
|
"loss": 0.0532, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.120639534883721, |
|
"grad_norm": 1.9380585631227165, |
|
"learning_rate": 4.058159974707267e-06, |
|
"loss": 0.0838, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.122093023255814, |
|
"grad_norm": 1.3123759677425353, |
|
"learning_rate": 4.046951125686427e-06, |
|
"loss": 0.0661, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.1235465116279069, |
|
"grad_norm": 1.3861730341247065, |
|
"learning_rate": 4.035747244622968e-06, |
|
"loss": 0.0732, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 1.5843091362943043, |
|
"learning_rate": 4.02454838991936e-06, |
|
"loss": 0.0601, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.1264534883720931, |
|
"grad_norm": 1.1871559950997301, |
|
"learning_rate": 4.013354619951864e-06, |
|
"loss": 0.0542, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.127906976744186, |
|
"grad_norm": 1.4833725641864457, |
|
"learning_rate": 4.002165993070237e-06, |
|
"loss": 0.0421, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.129360465116279, |
|
"grad_norm": 1.2607191005902103, |
|
"learning_rate": 3.990982567597434e-06, |
|
"loss": 0.0675, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.130813953488372, |
|
"grad_norm": 1.302371033029548, |
|
"learning_rate": 3.979804401829287e-06, |
|
"loss": 0.0552, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.1322674418604652, |
|
"grad_norm": 1.0982776342344591, |
|
"learning_rate": 3.968631554034219e-06, |
|
"loss": 0.0417, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.1337209302325582, |
|
"grad_norm": 1.0186685111120157, |
|
"learning_rate": 3.957464082452922e-06, |
|
"loss": 0.0433, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.135174418604651, |
|
"grad_norm": 1.405227738570767, |
|
"learning_rate": 3.946302045298076e-06, |
|
"loss": 0.0589, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.1366279069767442, |
|
"grad_norm": 1.7360986285508477, |
|
"learning_rate": 3.93514550075402e-06, |
|
"loss": 0.0831, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.1380813953488371, |
|
"grad_norm": 1.3982460147182094, |
|
"learning_rate": 3.923994506976475e-06, |
|
"loss": 0.0717, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.1395348837209303, |
|
"grad_norm": 1.67771687763503, |
|
"learning_rate": 3.912849122092216e-06, |
|
"loss": 0.0623, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.1409883720930232, |
|
"grad_norm": 1.36066170200081, |
|
"learning_rate": 3.901709404198787e-06, |
|
"loss": 0.0632, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.1424418604651163, |
|
"grad_norm": 1.724665327424766, |
|
"learning_rate": 3.890575411364187e-06, |
|
"loss": 0.0904, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.1438953488372092, |
|
"grad_norm": 1.292310436918187, |
|
"learning_rate": 3.879447201626579e-06, |
|
"loss": 0.0486, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.1453488372093024, |
|
"grad_norm": 1.3385498197221257, |
|
"learning_rate": 3.868324832993972e-06, |
|
"loss": 0.0543, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.1468023255813953, |
|
"grad_norm": 1.3062276729009943, |
|
"learning_rate": 3.857208363443936e-06, |
|
"loss": 0.0607, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.1482558139534884, |
|
"grad_norm": 1.6713853170193331, |
|
"learning_rate": 3.84609785092328e-06, |
|
"loss": 0.0541, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1497093023255813, |
|
"grad_norm": 1.4621166936575898, |
|
"learning_rate": 3.834993353347774e-06, |
|
"loss": 0.0822, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.1511627906976745, |
|
"grad_norm": 1.3920933935968336, |
|
"learning_rate": 3.823894928601822e-06, |
|
"loss": 0.0502, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.1526162790697674, |
|
"grad_norm": 1.3690857531456042, |
|
"learning_rate": 3.8128026345381804e-06, |
|
"loss": 0.0726, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.1540697674418605, |
|
"grad_norm": 1.435007037574927, |
|
"learning_rate": 3.8017165289776397e-06, |
|
"loss": 0.0733, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.1555232558139534, |
|
"grad_norm": 1.3209050161532427, |
|
"learning_rate": 3.7906366697087426e-06, |
|
"loss": 0.0625, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.1569767441860466, |
|
"grad_norm": 1.3609715123113075, |
|
"learning_rate": 3.7795631144874607e-06, |
|
"loss": 0.064, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.1584302325581395, |
|
"grad_norm": 1.35359644663218, |
|
"learning_rate": 3.768495921036915e-06, |
|
"loss": 0.0786, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.1598837209302326, |
|
"grad_norm": 1.4861713191531716, |
|
"learning_rate": 3.7574351470470547e-06, |
|
"loss": 0.0777, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.1613372093023255, |
|
"grad_norm": 1.256830182158305, |
|
"learning_rate": 3.7463808501743736e-06, |
|
"loss": 0.0649, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 1.18227664622794, |
|
"learning_rate": 3.7353330880415963e-06, |
|
"loss": 0.0614, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"eval_loss": 0.13292963802814484, |
|
"eval_runtime": 2.205, |
|
"eval_samples_per_second": 25.396, |
|
"eval_steps_per_second": 6.349, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1642441860465116, |
|
"grad_norm": 1.3697674011754686, |
|
"learning_rate": 3.724291918237391e-06, |
|
"loss": 0.0802, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.1656976744186047, |
|
"grad_norm": 1.5758218959573673, |
|
"learning_rate": 3.7132573983160538e-06, |
|
"loss": 0.0555, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.1671511627906976, |
|
"grad_norm": 1.4752518250909372, |
|
"learning_rate": 3.7022295857972244e-06, |
|
"loss": 0.0598, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.1686046511627908, |
|
"grad_norm": 1.1107721704788946, |
|
"learning_rate": 3.691208538165574e-06, |
|
"loss": 0.0526, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.1700581395348837, |
|
"grad_norm": 1.5316239040252175, |
|
"learning_rate": 3.6801943128705128e-06, |
|
"loss": 0.0642, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.1715116279069768, |
|
"grad_norm": 1.1009808525064773, |
|
"learning_rate": 3.6691869673258847e-06, |
|
"loss": 0.0579, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.1729651162790697, |
|
"grad_norm": 1.2132447629779688, |
|
"learning_rate": 3.6581865589096784e-06, |
|
"loss": 0.0534, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.1744186046511629, |
|
"grad_norm": 1.21822644945824, |
|
"learning_rate": 3.6471931449637127e-06, |
|
"loss": 0.0478, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.1758720930232558, |
|
"grad_norm": 2.1071653837605484, |
|
"learning_rate": 3.6362067827933555e-06, |
|
"loss": 0.0591, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.177325581395349, |
|
"grad_norm": 1.2907948357744514, |
|
"learning_rate": 3.625227529667209e-06, |
|
"loss": 0.0492, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.1787790697674418, |
|
"grad_norm": 1.3081083590425786, |
|
"learning_rate": 3.6142554428168208e-06, |
|
"loss": 0.0641, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.1802325581395348, |
|
"grad_norm": 1.497246846467104, |
|
"learning_rate": 3.6032905794363805e-06, |
|
"loss": 0.0657, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.181686046511628, |
|
"grad_norm": 1.3989638492911172, |
|
"learning_rate": 3.5923329966824288e-06, |
|
"loss": 0.0677, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.183139534883721, |
|
"grad_norm": 1.4702223684938749, |
|
"learning_rate": 3.5813827516735487e-06, |
|
"loss": 0.0858, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.184593023255814, |
|
"grad_norm": 1.2980406228178047, |
|
"learning_rate": 3.5704399014900814e-06, |
|
"loss": 0.0547, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.1860465116279069, |
|
"grad_norm": 1.2387292846340658, |
|
"learning_rate": 3.5595045031738123e-06, |
|
"loss": 0.0477, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 1.2267059696407838, |
|
"learning_rate": 3.5485766137276894e-06, |
|
"loss": 0.0499, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.1889534883720931, |
|
"grad_norm": 1.5850681514152112, |
|
"learning_rate": 3.5376562901155138e-06, |
|
"loss": 0.0608, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.190406976744186, |
|
"grad_norm": 1.4591340811318565, |
|
"learning_rate": 3.526743589261652e-06, |
|
"loss": 0.0553, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.191860465116279, |
|
"grad_norm": 1.3466268312861094, |
|
"learning_rate": 3.5158385680507356e-06, |
|
"loss": 0.0601, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.193313953488372, |
|
"grad_norm": 1.28302549307778, |
|
"learning_rate": 3.50494128332736e-06, |
|
"loss": 0.0614, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.1947674418604652, |
|
"grad_norm": 1.5064832595806237, |
|
"learning_rate": 3.4940517918958e-06, |
|
"loss": 0.053, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.1962209302325582, |
|
"grad_norm": 1.241785767259479, |
|
"learning_rate": 3.483170150519697e-06, |
|
"loss": 0.0648, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.197674418604651, |
|
"grad_norm": 1.9169832841939474, |
|
"learning_rate": 3.472296415921783e-06, |
|
"loss": 0.0653, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.1991279069767442, |
|
"grad_norm": 1.1075005753142289, |
|
"learning_rate": 3.4614306447835646e-06, |
|
"loss": 0.0623, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.2005813953488371, |
|
"grad_norm": 1.4624159678780777, |
|
"learning_rate": 3.4505728937450437e-06, |
|
"loss": 0.0502, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.2020348837209303, |
|
"grad_norm": 1.4873573941031213, |
|
"learning_rate": 3.439723219404411e-06, |
|
"loss": 0.0589, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.2034883720930232, |
|
"grad_norm": 1.3071195675919425, |
|
"learning_rate": 3.4288816783177624e-06, |
|
"loss": 0.059, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.2049418604651163, |
|
"grad_norm": 1.3306075906173456, |
|
"learning_rate": 3.41804832699879e-06, |
|
"loss": 0.0557, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.2063953488372092, |
|
"grad_norm": 1.2154802175356552, |
|
"learning_rate": 3.407223221918501e-06, |
|
"loss": 0.0593, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2078488372093024, |
|
"grad_norm": 1.368819793191012, |
|
"learning_rate": 3.396406419504914e-06, |
|
"loss": 0.0756, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.2093023255813953, |
|
"grad_norm": 1.6765723354799054, |
|
"learning_rate": 3.3855979761427705e-06, |
|
"loss": 0.0546, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.2107558139534884, |
|
"grad_norm": 1.4117226452497533, |
|
"learning_rate": 3.3747979481732352e-06, |
|
"loss": 0.0665, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.2122093023255813, |
|
"grad_norm": 1.2207966641443189, |
|
"learning_rate": 3.364006391893612e-06, |
|
"loss": 0.0481, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.2136627906976745, |
|
"grad_norm": 1.4940343341477946, |
|
"learning_rate": 3.3532233635570377e-06, |
|
"loss": 0.0578, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.2151162790697674, |
|
"grad_norm": 1.3800156284226655, |
|
"learning_rate": 3.3424489193722016e-06, |
|
"loss": 0.0642, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.2165697674418605, |
|
"grad_norm": 1.5526090076765855, |
|
"learning_rate": 3.331683115503041e-06, |
|
"loss": 0.0639, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.2180232558139534, |
|
"grad_norm": 1.2911489672133427, |
|
"learning_rate": 3.320926008068458e-06, |
|
"loss": 0.0577, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.2194767441860466, |
|
"grad_norm": 1.546598681848458, |
|
"learning_rate": 3.310177653142018e-06, |
|
"loss": 0.0593, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.2209302325581395, |
|
"grad_norm": 1.188966656927106, |
|
"learning_rate": 3.2994381067516702e-06, |
|
"loss": 0.067, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.2223837209302326, |
|
"grad_norm": 1.3683724251027738, |
|
"learning_rate": 3.2887074248794372e-06, |
|
"loss": 0.0661, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.2238372093023255, |
|
"grad_norm": 1.4058614221882173, |
|
"learning_rate": 3.2779856634611433e-06, |
|
"loss": 0.0519, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.2252906976744187, |
|
"grad_norm": 1.1579274317787078, |
|
"learning_rate": 3.267272878386106e-06, |
|
"loss": 0.0699, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.2267441860465116, |
|
"grad_norm": 1.2336765642615921, |
|
"learning_rate": 3.256569125496858e-06, |
|
"loss": 0.0526, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.2281976744186047, |
|
"grad_norm": 1.590986386650674, |
|
"learning_rate": 3.2458744605888414e-06, |
|
"loss": 0.0653, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.2296511627906976, |
|
"grad_norm": 1.2675149654017217, |
|
"learning_rate": 3.2351889394101356e-06, |
|
"loss": 0.0517, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.2311046511627908, |
|
"grad_norm": 1.4746783771397893, |
|
"learning_rate": 3.224512617661147e-06, |
|
"loss": 0.0597, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.2325581395348837, |
|
"grad_norm": 1.3951470526945027, |
|
"learning_rate": 3.2138455509943365e-06, |
|
"loss": 0.06, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.2340116279069768, |
|
"grad_norm": 1.6377226127179718, |
|
"learning_rate": 3.2031877950139138e-06, |
|
"loss": 0.0634, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.2354651162790697, |
|
"grad_norm": 1.3590191499897781, |
|
"learning_rate": 3.192539405275562e-06, |
|
"loss": 0.0585, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2369186046511629, |
|
"grad_norm": 1.197442084738658, |
|
"learning_rate": 3.181900437286133e-06, |
|
"loss": 0.0522, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.2383720930232558, |
|
"grad_norm": 1.457106341008041, |
|
"learning_rate": 3.171270946503373e-06, |
|
"loss": 0.0757, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.239825581395349, |
|
"grad_norm": 1.1462139163166636, |
|
"learning_rate": 3.160650988335619e-06, |
|
"loss": 0.066, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.2412790697674418, |
|
"grad_norm": 1.422733421409013, |
|
"learning_rate": 3.1500406181415266e-06, |
|
"loss": 0.0574, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.2427325581395348, |
|
"grad_norm": 1.3764162372997557, |
|
"learning_rate": 3.1394398912297623e-06, |
|
"loss": 0.0569, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.244186046511628, |
|
"grad_norm": 1.3748066305788302, |
|
"learning_rate": 3.1288488628587343e-06, |
|
"loss": 0.079, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.245639534883721, |
|
"grad_norm": 1.5587103993805567, |
|
"learning_rate": 3.118267588236288e-06, |
|
"loss": 0.048, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.247093023255814, |
|
"grad_norm": 1.0821878347641152, |
|
"learning_rate": 3.1076961225194303e-06, |
|
"loss": 0.0373, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.2485465116279069, |
|
"grad_norm": 1.3196026626843278, |
|
"learning_rate": 3.0971345208140315e-06, |
|
"loss": 0.0762, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.2188350465078943, |
|
"learning_rate": 3.0865828381745515e-06, |
|
"loss": 0.0603, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2514534883720931, |
|
"grad_norm": 1.2251652362284926, |
|
"learning_rate": 3.07604112960374e-06, |
|
"loss": 0.0622, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.252906976744186, |
|
"grad_norm": 1.3179349347598328, |
|
"learning_rate": 3.065509450052353e-06, |
|
"loss": 0.0834, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.254360465116279, |
|
"grad_norm": 1.5906225203629438, |
|
"learning_rate": 3.054987854418876e-06, |
|
"loss": 0.0809, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.255813953488372, |
|
"grad_norm": 1.4130566616345097, |
|
"learning_rate": 3.044476397549221e-06, |
|
"loss": 0.0654, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.2572674418604652, |
|
"grad_norm": 1.743946208757902, |
|
"learning_rate": 3.0339751342364563e-06, |
|
"loss": 0.0625, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.2587209302325582, |
|
"grad_norm": 1.7612769821987542, |
|
"learning_rate": 3.02348411922051e-06, |
|
"loss": 0.0642, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.260174418604651, |
|
"grad_norm": 3.522383016452111, |
|
"learning_rate": 3.0130034071878935e-06, |
|
"loss": 0.0452, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.2616279069767442, |
|
"grad_norm": 1.8108815533825797, |
|
"learning_rate": 3.002533052771405e-06, |
|
"loss": 0.069, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.2630813953488373, |
|
"grad_norm": 1.4705889091500777, |
|
"learning_rate": 2.99207311054986e-06, |
|
"loss": 0.0665, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.2645348837209303, |
|
"grad_norm": 1.2345293570590237, |
|
"learning_rate": 2.9816236350477924e-06, |
|
"loss": 0.0471, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.2659883720930232, |
|
"grad_norm": 1.3814272608337272, |
|
"learning_rate": 2.9711846807351775e-06, |
|
"loss": 0.0497, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.2674418604651163, |
|
"grad_norm": 1.4737560562363339, |
|
"learning_rate": 2.9607563020271446e-06, |
|
"loss": 0.0754, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.2688953488372092, |
|
"grad_norm": 1.2213469734360816, |
|
"learning_rate": 2.950338553283704e-06, |
|
"loss": 0.0439, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.2703488372093024, |
|
"grad_norm": 1.0228539211087246, |
|
"learning_rate": 2.939931488809443e-06, |
|
"loss": 0.0453, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.2718023255813953, |
|
"grad_norm": 1.3148270871243914, |
|
"learning_rate": 2.9295351628532666e-06, |
|
"loss": 0.0691, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.2732558139534884, |
|
"grad_norm": 1.6172892866219821, |
|
"learning_rate": 2.9191496296080935e-06, |
|
"loss": 0.0716, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.2747093023255813, |
|
"grad_norm": 1.5950616252422385, |
|
"learning_rate": 2.9087749432105917e-06, |
|
"loss": 0.0691, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.2761627906976745, |
|
"grad_norm": 1.465303805672259, |
|
"learning_rate": 2.898411157740879e-06, |
|
"loss": 0.0642, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.2776162790697674, |
|
"grad_norm": 1.234388737205486, |
|
"learning_rate": 2.8880583272222594e-06, |
|
"loss": 0.0485, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.2790697674418605, |
|
"grad_norm": 1.088371039773677, |
|
"learning_rate": 2.8777165056209256e-06, |
|
"loss": 0.0435, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2805232558139534, |
|
"grad_norm": 1.2648066111949126, |
|
"learning_rate": 2.867385746845685e-06, |
|
"loss": 0.0707, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.2819767441860466, |
|
"grad_norm": 1.429685475726185, |
|
"learning_rate": 2.8570661047476773e-06, |
|
"loss": 0.0561, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.2834302325581395, |
|
"grad_norm": 1.9989759755669896, |
|
"learning_rate": 2.8467576331200986e-06, |
|
"loss": 0.0794, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.2848837209302326, |
|
"grad_norm": 1.2470700343219876, |
|
"learning_rate": 2.836460385697911e-06, |
|
"loss": 0.046, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.2863372093023255, |
|
"grad_norm": 1.3164750906741658, |
|
"learning_rate": 2.8261744161575745e-06, |
|
"loss": 0.0658, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.2877906976744187, |
|
"grad_norm": 1.1137226528892454, |
|
"learning_rate": 2.815899778116753e-06, |
|
"loss": 0.0438, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.2892441860465116, |
|
"grad_norm": 1.5963496710705627, |
|
"learning_rate": 2.80563652513405e-06, |
|
"loss": 0.0768, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.2906976744186047, |
|
"grad_norm": 1.2418886512188267, |
|
"learning_rate": 2.7953847107087173e-06, |
|
"loss": 0.0476, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.2921511627906976, |
|
"grad_norm": 1.4960278840419248, |
|
"learning_rate": 2.785144388280385e-06, |
|
"loss": 0.0737, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.2936046511627908, |
|
"grad_norm": 1.078019987993341, |
|
"learning_rate": 2.7749156112287746e-06, |
|
"loss": 0.0686, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2950581395348837, |
|
"grad_norm": 1.3385018973715532, |
|
"learning_rate": 2.7646984328734284e-06, |
|
"loss": 0.0422, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.2965116279069768, |
|
"grad_norm": 1.5906816734841456, |
|
"learning_rate": 2.754492906473425e-06, |
|
"loss": 0.0555, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.2979651162790697, |
|
"grad_norm": 1.2925137226713737, |
|
"learning_rate": 2.744299085227109e-06, |
|
"loss": 0.0472, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.2994186046511627, |
|
"grad_norm": 1.259018228463516, |
|
"learning_rate": 2.7341170222718073e-06, |
|
"loss": 0.0517, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.3008720930232558, |
|
"grad_norm": 1.350283378016509, |
|
"learning_rate": 2.723946770683552e-06, |
|
"loss": 0.0496, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.302325581395349, |
|
"grad_norm": 1.3897000129677248, |
|
"learning_rate": 2.7137883834768076e-06, |
|
"loss": 0.0555, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.3037790697674418, |
|
"grad_norm": 1.4501986699541858, |
|
"learning_rate": 2.703641913604198e-06, |
|
"loss": 0.0734, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.3052325581395348, |
|
"grad_norm": 1.316272770181074, |
|
"learning_rate": 2.6935074139562174e-06, |
|
"loss": 0.0598, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.306686046511628, |
|
"grad_norm": 1.1543542777127582, |
|
"learning_rate": 2.683384937360971e-06, |
|
"loss": 0.0635, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.308139534883721, |
|
"grad_norm": 1.2569738112859057, |
|
"learning_rate": 2.673274536583883e-06, |
|
"loss": 0.0493, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.309593023255814, |
|
"grad_norm": 1.5911605897872676, |
|
"learning_rate": 2.663176264327439e-06, |
|
"loss": 0.064, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.3110465116279069, |
|
"grad_norm": 1.398413502674623, |
|
"learning_rate": 2.6530901732308934e-06, |
|
"loss": 0.0542, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 1.481895453387704, |
|
"learning_rate": 2.6430163158700116e-06, |
|
"loss": 0.0423, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.3139534883720931, |
|
"grad_norm": 1.5404341782721638, |
|
"learning_rate": 2.632954744756784e-06, |
|
"loss": 0.0743, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.315406976744186, |
|
"grad_norm": 1.6089449641602316, |
|
"learning_rate": 2.6229055123391545e-06, |
|
"loss": 0.0762, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.316860465116279, |
|
"grad_norm": 1.3087109651209734, |
|
"learning_rate": 2.612868671000755e-06, |
|
"loss": 0.063, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.318313953488372, |
|
"grad_norm": 1.3502162014519732, |
|
"learning_rate": 2.602844273060623e-06, |
|
"loss": 0.0613, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.3197674418604652, |
|
"grad_norm": 1.757116221919893, |
|
"learning_rate": 2.592832370772931e-06, |
|
"loss": 0.0598, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.3212209302325582, |
|
"grad_norm": 1.440632285315959, |
|
"learning_rate": 2.582833016326716e-06, |
|
"loss": 0.0603, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.322674418604651, |
|
"grad_norm": 1.3695411985419055, |
|
"learning_rate": 2.5728462618456114e-06, |
|
"loss": 0.0603, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.3241279069767442, |
|
"grad_norm": 1.3717727081965483, |
|
"learning_rate": 2.562872159387563e-06, |
|
"loss": 0.0489, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.3255813953488373, |
|
"grad_norm": 1.3965140784846173, |
|
"learning_rate": 2.5529107609445737e-06, |
|
"loss": 0.053, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.3270348837209303, |
|
"grad_norm": 1.3605550236382897, |
|
"learning_rate": 2.542962118442417e-06, |
|
"loss": 0.0652, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.3284883720930232, |
|
"grad_norm": 1.5017426449957705, |
|
"learning_rate": 2.5330262837403795e-06, |
|
"loss": 0.059, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.3299418604651163, |
|
"grad_norm": 1.2686790614066732, |
|
"learning_rate": 2.523103308630978e-06, |
|
"loss": 0.059, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.3313953488372092, |
|
"grad_norm": 1.4932765351503499, |
|
"learning_rate": 2.513193244839704e-06, |
|
"loss": 0.0801, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.3328488372093024, |
|
"grad_norm": 1.277822451358384, |
|
"learning_rate": 2.5032961440247382e-06, |
|
"loss": 0.0563, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.3343023255813953, |
|
"grad_norm": 1.1466090066744306, |
|
"learning_rate": 2.4934120577766963e-06, |
|
"loss": 0.0482, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.3357558139534884, |
|
"grad_norm": 1.3716592416122075, |
|
"learning_rate": 2.483541037618346e-06, |
|
"loss": 0.0618, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.3372093023255813, |
|
"grad_norm": 1.365751544441765, |
|
"learning_rate": 2.473683135004354e-06, |
|
"loss": 0.062, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3386627906976745, |
|
"grad_norm": 2.264876190842683, |
|
"learning_rate": 2.4638384013210004e-06, |
|
"loss": 0.1066, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.3401162790697674, |
|
"grad_norm": 1.6250492792457274, |
|
"learning_rate": 2.4540068878859247e-06, |
|
"loss": 0.0857, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.3415697674418605, |
|
"grad_norm": 1.821123714104965, |
|
"learning_rate": 2.4441886459478502e-06, |
|
"loss": 0.0484, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.3430232558139534, |
|
"grad_norm": 1.3628978122194464, |
|
"learning_rate": 2.4343837266863245e-06, |
|
"loss": 0.0716, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.3444767441860466, |
|
"grad_norm": 1.2104456979218408, |
|
"learning_rate": 2.4245921812114427e-06, |
|
"loss": 0.0413, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.3459302325581395, |
|
"grad_norm": 1.4006337692831987, |
|
"learning_rate": 2.4148140605635923e-06, |
|
"loss": 0.0884, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.3473837209302326, |
|
"grad_norm": 1.5775325655062982, |
|
"learning_rate": 2.405049415713173e-06, |
|
"loss": 0.0826, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.3488372093023255, |
|
"grad_norm": 1.3469035877818512, |
|
"learning_rate": 2.3952982975603494e-06, |
|
"loss": 0.0528, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.3502906976744187, |
|
"grad_norm": 1.3912044037862688, |
|
"learning_rate": 2.385560756934765e-06, |
|
"loss": 0.0683, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.3517441860465116, |
|
"grad_norm": 1.4330392863334833, |
|
"learning_rate": 2.3758368445952977e-06, |
|
"loss": 0.0615, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.3531976744186047, |
|
"grad_norm": 1.353177391111986, |
|
"learning_rate": 2.3661266112297765e-06, |
|
"loss": 0.0441, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.3546511627906976, |
|
"grad_norm": 1.4685441680460056, |
|
"learning_rate": 2.356430107454733e-06, |
|
"loss": 0.0719, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.3561046511627908, |
|
"grad_norm": 1.3373925778152866, |
|
"learning_rate": 2.346747383815126e-06, |
|
"loss": 0.0581, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.3575581395348837, |
|
"grad_norm": 1.6807992341418785, |
|
"learning_rate": 2.337078490784084e-06, |
|
"loss": 0.083, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.3590116279069768, |
|
"grad_norm": 1.3439853412810583, |
|
"learning_rate": 2.32742347876264e-06, |
|
"loss": 0.0601, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.3604651162790697, |
|
"grad_norm": 1.3060456175816213, |
|
"learning_rate": 2.317782398079473e-06, |
|
"loss": 0.055, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.3619186046511627, |
|
"grad_norm": 1.5527718046211736, |
|
"learning_rate": 2.3081552989906347e-06, |
|
"loss": 0.0734, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.3633720930232558, |
|
"grad_norm": 1.5997787553142, |
|
"learning_rate": 2.298542231679305e-06, |
|
"loss": 0.0727, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.364825581395349, |
|
"grad_norm": 1.647243597942959, |
|
"learning_rate": 2.2889432462555106e-06, |
|
"loss": 0.069, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.3662790697674418, |
|
"grad_norm": 1.4131937959777625, |
|
"learning_rate": 2.279358392755882e-06, |
|
"loss": 0.0572, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.3677325581395348, |
|
"grad_norm": 1.2921469692871526, |
|
"learning_rate": 2.269787721143376e-06, |
|
"loss": 0.0558, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.369186046511628, |
|
"grad_norm": 1.5778632142343019, |
|
"learning_rate": 2.2602312813070315e-06, |
|
"loss": 0.0653, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.370639534883721, |
|
"grad_norm": 1.1650103416898503, |
|
"learning_rate": 2.250689123061694e-06, |
|
"loss": 0.0583, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.372093023255814, |
|
"grad_norm": 1.638353937587653, |
|
"learning_rate": 2.2411612961477704e-06, |
|
"loss": 0.0954, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.3735465116279069, |
|
"grad_norm": 1.3332240689990287, |
|
"learning_rate": 2.2316478502309576e-06, |
|
"loss": 0.0686, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 1.4897450673269428, |
|
"learning_rate": 2.2221488349019903e-06, |
|
"loss": 0.0637, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.3764534883720931, |
|
"grad_norm": 1.2433687422683675, |
|
"learning_rate": 2.2126642996763793e-06, |
|
"loss": 0.0636, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.377906976744186, |
|
"grad_norm": 1.5438951823738334, |
|
"learning_rate": 2.203194293994159e-06, |
|
"loss": 0.063, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.379360465116279, |
|
"grad_norm": 1.353307173474805, |
|
"learning_rate": 2.193738867219623e-06, |
|
"loss": 0.0551, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.380813953488372, |
|
"grad_norm": 1.3926599213141382, |
|
"learning_rate": 2.184298068641067e-06, |
|
"loss": 0.0658, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3822674418604652, |
|
"grad_norm": 1.5876307826339826, |
|
"learning_rate": 2.174871947470541e-06, |
|
"loss": 0.0714, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.3837209302325582, |
|
"grad_norm": 1.5940606667679074, |
|
"learning_rate": 2.1654605528435774e-06, |
|
"loss": 0.0984, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.385174418604651, |
|
"grad_norm": 1.6476409988363916, |
|
"learning_rate": 2.1560639338189533e-06, |
|
"loss": 0.0618, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.3866279069767442, |
|
"grad_norm": 1.3123274183423979, |
|
"learning_rate": 2.1466821393784148e-06, |
|
"loss": 0.0526, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.3880813953488373, |
|
"grad_norm": 1.1675619599323548, |
|
"learning_rate": 2.137315218426442e-06, |
|
"loss": 0.0465, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.3895348837209303, |
|
"grad_norm": 1.6048177965652641, |
|
"learning_rate": 2.127963219789974e-06, |
|
"loss": 0.1021, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.3909883720930232, |
|
"grad_norm": 0.8988094880823243, |
|
"learning_rate": 2.1186261922181746e-06, |
|
"loss": 0.0387, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.3924418604651163, |
|
"grad_norm": 1.287189982666728, |
|
"learning_rate": 2.109304184382157e-06, |
|
"loss": 0.065, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.3938953488372092, |
|
"grad_norm": 1.1802701284704842, |
|
"learning_rate": 2.0999972448747525e-06, |
|
"loss": 0.0453, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.3953488372093024, |
|
"grad_norm": 1.3499018387202095, |
|
"learning_rate": 2.0907054222102367e-06, |
|
"loss": 0.0439, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3968023255813953, |
|
"grad_norm": 1.620339483385674, |
|
"learning_rate": 2.081428764824089e-06, |
|
"loss": 0.075, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.3982558139534884, |
|
"grad_norm": 1.2075011912654863, |
|
"learning_rate": 2.072167321072736e-06, |
|
"loss": 0.0653, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.3997093023255813, |
|
"grad_norm": 1.4261627536661536, |
|
"learning_rate": 2.0629211392333033e-06, |
|
"loss": 0.0605, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.4011627906976745, |
|
"grad_norm": 1.5799723516279018, |
|
"learning_rate": 2.0536902675033547e-06, |
|
"loss": 0.0667, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.4026162790697674, |
|
"grad_norm": 1.4908299612274243, |
|
"learning_rate": 2.044474754000655e-06, |
|
"loss": 0.058, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.4040697674418605, |
|
"grad_norm": 1.4424465604879053, |
|
"learning_rate": 2.0352746467629018e-06, |
|
"loss": 0.0749, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.4055232558139534, |
|
"grad_norm": 1.3986123725352064, |
|
"learning_rate": 2.0260899937474943e-06, |
|
"loss": 0.0587, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.4069767441860466, |
|
"grad_norm": 1.5870575696903628, |
|
"learning_rate": 2.0169208428312647e-06, |
|
"loss": 0.0825, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.4084302325581395, |
|
"grad_norm": 1.4921609261163944, |
|
"learning_rate": 2.0077672418102443e-06, |
|
"loss": 0.0796, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.4098837209302326, |
|
"grad_norm": 1.5995313772665938, |
|
"learning_rate": 1.998629238399402e-06, |
|
"loss": 0.0659, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.4113372093023255, |
|
"grad_norm": 1.2823207017129816, |
|
"learning_rate": 1.9895068802324065e-06, |
|
"loss": 0.0484, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.4127906976744187, |
|
"grad_norm": 1.3944837875822929, |
|
"learning_rate": 1.980400214861367e-06, |
|
"loss": 0.0507, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.4142441860465116, |
|
"grad_norm": 1.3570706618634196, |
|
"learning_rate": 1.971309289756595e-06, |
|
"loss": 0.0553, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.4156976744186047, |
|
"grad_norm": 1.3823236957095917, |
|
"learning_rate": 1.9622341523063484e-06, |
|
"loss": 0.0604, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.4171511627906976, |
|
"grad_norm": 1.5503182481651412, |
|
"learning_rate": 1.953174849816595e-06, |
|
"loss": 0.1027, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.4186046511627908, |
|
"grad_norm": 1.25459622511254, |
|
"learning_rate": 1.944131429510754e-06, |
|
"loss": 0.0646, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.4200581395348837, |
|
"grad_norm": 1.6351810496565538, |
|
"learning_rate": 1.93510393852946e-06, |
|
"loss": 0.0659, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.4215116279069768, |
|
"grad_norm": 1.2975727336300502, |
|
"learning_rate": 1.9260924239303075e-06, |
|
"loss": 0.0672, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.4229651162790697, |
|
"grad_norm": 1.3534696019811543, |
|
"learning_rate": 1.9170969326876177e-06, |
|
"loss": 0.0516, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.4244186046511627, |
|
"grad_norm": 1.5117325218714435, |
|
"learning_rate": 1.90811751169218e-06, |
|
"loss": 0.0512, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4258720930232558, |
|
"grad_norm": 1.2260873105062584, |
|
"learning_rate": 1.8991542077510205e-06, |
|
"loss": 0.0535, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.427325581395349, |
|
"grad_norm": 1.4430073129022352, |
|
"learning_rate": 1.8902070675871465e-06, |
|
"loss": 0.0623, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.4287790697674418, |
|
"grad_norm": 1.4117041417091303, |
|
"learning_rate": 1.881276137839314e-06, |
|
"loss": 0.0367, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.4302325581395348, |
|
"grad_norm": 1.3665830676976485, |
|
"learning_rate": 1.8723614650617721e-06, |
|
"loss": 0.0658, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.431686046511628, |
|
"grad_norm": 1.4391996547793735, |
|
"learning_rate": 1.8634630957240352e-06, |
|
"loss": 0.0762, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.433139534883721, |
|
"grad_norm": 1.5377368090806955, |
|
"learning_rate": 1.8545810762106263e-06, |
|
"loss": 0.0709, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.434593023255814, |
|
"grad_norm": 1.3753400620423242, |
|
"learning_rate": 1.845715452820845e-06, |
|
"loss": 0.0501, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.4360465116279069, |
|
"grad_norm": 1.593242899745401, |
|
"learning_rate": 1.8368662717685188e-06, |
|
"loss": 0.0799, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 1.6328684919856264, |
|
"learning_rate": 1.8280335791817733e-06, |
|
"loss": 0.053, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.4389534883720931, |
|
"grad_norm": 1.432266744388245, |
|
"learning_rate": 1.819217421102779e-06, |
|
"loss": 0.0617, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.440406976744186, |
|
"grad_norm": 1.2883150490212059, |
|
"learning_rate": 1.8104178434875175e-06, |
|
"loss": 0.0646, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.441860465116279, |
|
"grad_norm": 1.364249538543468, |
|
"learning_rate": 1.8016348922055448e-06, |
|
"loss": 0.0465, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.443313953488372, |
|
"grad_norm": 1.3168346576696832, |
|
"learning_rate": 1.7928686130397443e-06, |
|
"loss": 0.0792, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.4447674418604652, |
|
"grad_norm": 1.8107406359055818, |
|
"learning_rate": 1.7841190516860973e-06, |
|
"loss": 0.0732, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.4462209302325582, |
|
"grad_norm": 1.2063299992756713, |
|
"learning_rate": 1.7753862537534356e-06, |
|
"loss": 0.0762, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.447674418604651, |
|
"grad_norm": 1.8674037040685265, |
|
"learning_rate": 1.7666702647632128e-06, |
|
"loss": 0.0695, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.4491279069767442, |
|
"grad_norm": 1.1951714079592195, |
|
"learning_rate": 1.7579711301492574e-06, |
|
"loss": 0.0597, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.4505813953488373, |
|
"grad_norm": 2.0661364210414725, |
|
"learning_rate": 1.7492888952575475e-06, |
|
"loss": 0.0786, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.4520348837209303, |
|
"grad_norm": 1.3804303251215748, |
|
"learning_rate": 1.740623605345963e-06, |
|
"loss": 0.0632, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.4534883720930232, |
|
"grad_norm": 1.3877478679606865, |
|
"learning_rate": 1.7319753055840555e-06, |
|
"loss": 0.0467, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4534883720930232, |
|
"eval_loss": 0.1322409063577652, |
|
"eval_runtime": 2.2065, |
|
"eval_samples_per_second": 25.379, |
|
"eval_steps_per_second": 6.345, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4549418604651163, |
|
"grad_norm": 1.2437202205338127, |
|
"learning_rate": 1.7233440410528117e-06, |
|
"loss": 0.0504, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.4563953488372092, |
|
"grad_norm": 1.6237467009740467, |
|
"learning_rate": 1.7147298567444231e-06, |
|
"loss": 0.0938, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.4578488372093024, |
|
"grad_norm": 1.5164187899961117, |
|
"learning_rate": 1.7061327975620402e-06, |
|
"loss": 0.0772, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.4593023255813953, |
|
"grad_norm": 1.5187557348768597, |
|
"learning_rate": 1.697552908319553e-06, |
|
"loss": 0.0624, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.4607558139534884, |
|
"grad_norm": 1.178181594838555, |
|
"learning_rate": 1.6889902337413415e-06, |
|
"loss": 0.0655, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.4622093023255813, |
|
"grad_norm": 1.4575686492434048, |
|
"learning_rate": 1.6804448184620598e-06, |
|
"loss": 0.0631, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.4636627906976745, |
|
"grad_norm": 2.0108235684971327, |
|
"learning_rate": 1.6719167070263848e-06, |
|
"loss": 0.093, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.4651162790697674, |
|
"grad_norm": 1.3503757474217617, |
|
"learning_rate": 1.6634059438888034e-06, |
|
"loss": 0.0498, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.4665697674418605, |
|
"grad_norm": 1.4598338826538206, |
|
"learning_rate": 1.6549125734133625e-06, |
|
"loss": 0.0543, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.4680232558139534, |
|
"grad_norm": 1.4195207368199612, |
|
"learning_rate": 1.6464366398734532e-06, |
|
"loss": 0.0598, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.4694767441860466, |
|
"grad_norm": 1.357120211132288, |
|
"learning_rate": 1.6379781874515666e-06, |
|
"loss": 0.0511, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.4709302325581395, |
|
"grad_norm": 1.8474857055851184, |
|
"learning_rate": 1.6295372602390768e-06, |
|
"loss": 0.0676, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.4723837209302326, |
|
"grad_norm": 1.4685941935514055, |
|
"learning_rate": 1.6211139022359995e-06, |
|
"loss": 0.0616, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.4738372093023255, |
|
"grad_norm": 1.379812903529308, |
|
"learning_rate": 1.6127081573507685e-06, |
|
"loss": 0.0589, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.4752906976744187, |
|
"grad_norm": 1.2647342925759666, |
|
"learning_rate": 1.6043200694000038e-06, |
|
"loss": 0.0754, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.4767441860465116, |
|
"grad_norm": 1.13404075461665, |
|
"learning_rate": 1.5959496821082905e-06, |
|
"loss": 0.0544, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.4781976744186047, |
|
"grad_norm": 1.4701585201110707, |
|
"learning_rate": 1.5875970391079393e-06, |
|
"loss": 0.0625, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.4796511627906976, |
|
"grad_norm": 1.2418075850932098, |
|
"learning_rate": 1.5792621839387717e-06, |
|
"loss": 0.0489, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.4811046511627908, |
|
"grad_norm": 1.191695849684784, |
|
"learning_rate": 1.5709451600478787e-06, |
|
"loss": 0.0439, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.4825581395348837, |
|
"grad_norm": 1.2753380078997356, |
|
"learning_rate": 1.562646010789411e-06, |
|
"loss": 0.0616, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.4840116279069768, |
|
"grad_norm": 1.5221939760552072, |
|
"learning_rate": 1.5543647794243355e-06, |
|
"loss": 0.0827, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.4854651162790697, |
|
"grad_norm": 1.7991818130452302, |
|
"learning_rate": 1.5461015091202263e-06, |
|
"loss": 0.0717, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.4869186046511627, |
|
"grad_norm": 1.3744111180193537, |
|
"learning_rate": 1.5378562429510257e-06, |
|
"loss": 0.0639, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.4883720930232558, |
|
"grad_norm": 1.6295469366842923, |
|
"learning_rate": 1.5296290238968303e-06, |
|
"loss": 0.0615, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.489825581395349, |
|
"grad_norm": 1.4986295841575483, |
|
"learning_rate": 1.5214198948436604e-06, |
|
"loss": 0.0512, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.4912790697674418, |
|
"grad_norm": 1.5258968983298233, |
|
"learning_rate": 1.5132288985832383e-06, |
|
"loss": 0.0567, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.4927325581395348, |
|
"grad_norm": 1.3320359313785561, |
|
"learning_rate": 1.5050560778127648e-06, |
|
"loss": 0.0475, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.494186046511628, |
|
"grad_norm": 1.4535765609363385, |
|
"learning_rate": 1.496901475134701e-06, |
|
"loss": 0.0491, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.495639534883721, |
|
"grad_norm": 1.7901069273417733, |
|
"learning_rate": 1.4887651330565378e-06, |
|
"loss": 0.066, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.497093023255814, |
|
"grad_norm": 1.7585562014185228, |
|
"learning_rate": 1.4806470939905842e-06, |
|
"loss": 0.054, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4985465116279069, |
|
"grad_norm": 1.6626210530787378, |
|
"learning_rate": 1.472547400253735e-06, |
|
"loss": 0.0757, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.4156229167835312, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.063, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.5014534883720931, |
|
"grad_norm": 1.4973808057494127, |
|
"learning_rate": 1.4564032175565873e-06, |
|
"loss": 0.0539, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.502906976744186, |
|
"grad_norm": 1.462014108284378, |
|
"learning_rate": 1.4483588127510585e-06, |
|
"loss": 0.047, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.504360465116279, |
|
"grad_norm": 1.511766423540138, |
|
"learning_rate": 1.440332921583744e-06, |
|
"loss": 0.0576, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.505813953488372, |
|
"grad_norm": 1.4551371173160883, |
|
"learning_rate": 1.432325585891201e-06, |
|
"loss": 0.0611, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.5072674418604652, |
|
"grad_norm": 1.6008143073565722, |
|
"learning_rate": 1.4243368474132663e-06, |
|
"loss": 0.0612, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.5087209302325582, |
|
"grad_norm": 1.5177354364722053, |
|
"learning_rate": 1.41636674779283e-06, |
|
"loss": 0.0667, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.510174418604651, |
|
"grad_norm": 1.4839839114250544, |
|
"learning_rate": 1.408415328575629e-06, |
|
"loss": 0.0749, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.5116279069767442, |
|
"grad_norm": 1.4529745164922419, |
|
"learning_rate": 1.4004826312100218e-06, |
|
"loss": 0.0411, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.5130813953488373, |
|
"grad_norm": 1.1970103229686213, |
|
"learning_rate": 1.3925686970467745e-06, |
|
"loss": 0.0527, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.5145348837209303, |
|
"grad_norm": 1.3891332095713593, |
|
"learning_rate": 1.3846735673388473e-06, |
|
"loss": 0.059, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.5159883720930232, |
|
"grad_norm": 1.191612023245594, |
|
"learning_rate": 1.3767972832411813e-06, |
|
"loss": 0.0644, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.5174418604651163, |
|
"grad_norm": 1.2633634289004876, |
|
"learning_rate": 1.3689398858104753e-06, |
|
"loss": 0.0518, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.5188953488372094, |
|
"grad_norm": 1.259697007622928, |
|
"learning_rate": 1.3611014160049846e-06, |
|
"loss": 0.0399, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.5203488372093024, |
|
"grad_norm": 1.2500835906399796, |
|
"learning_rate": 1.3532819146842934e-06, |
|
"loss": 0.0555, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.5218023255813953, |
|
"grad_norm": 1.2215856052507712, |
|
"learning_rate": 1.3454814226091156e-06, |
|
"loss": 0.0541, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.5232558139534884, |
|
"grad_norm": 1.0651853757254646, |
|
"learning_rate": 1.337699980441069e-06, |
|
"loss": 0.0597, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.5247093023255816, |
|
"grad_norm": 1.4449454027688438, |
|
"learning_rate": 1.3299376287424763e-06, |
|
"loss": 0.0528, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.5261627906976745, |
|
"grad_norm": 1.3052426229372127, |
|
"learning_rate": 1.3221944079761413e-06, |
|
"loss": 0.0575, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5276162790697674, |
|
"grad_norm": 1.3323801979825336, |
|
"learning_rate": 1.3144703585051498e-06, |
|
"loss": 0.0471, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.5290697674418605, |
|
"grad_norm": 1.3097362291071397, |
|
"learning_rate": 1.3067655205926488e-06, |
|
"loss": 0.0686, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.5305232558139537, |
|
"grad_norm": 1.556833363521575, |
|
"learning_rate": 1.2990799344016436e-06, |
|
"loss": 0.0567, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.5319767441860463, |
|
"grad_norm": 1.2414839248874836, |
|
"learning_rate": 1.2914136399947841e-06, |
|
"loss": 0.0461, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.5334302325581395, |
|
"grad_norm": 1.4598506706462977, |
|
"learning_rate": 1.283766677334161e-06, |
|
"loss": 0.0661, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.5348837209302326, |
|
"grad_norm": 1.5053648322674895, |
|
"learning_rate": 1.2761390862810907e-06, |
|
"loss": 0.0684, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.5363372093023255, |
|
"grad_norm": 1.533086553131052, |
|
"learning_rate": 1.2685309065959168e-06, |
|
"loss": 0.0623, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.5377906976744184, |
|
"grad_norm": 1.376493396330026, |
|
"learning_rate": 1.260942177937789e-06, |
|
"loss": 0.0523, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.5392441860465116, |
|
"grad_norm": 1.4332881734009741, |
|
"learning_rate": 1.2533729398644735e-06, |
|
"loss": 0.042, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.5406976744186047, |
|
"grad_norm": 0.9999597570352821, |
|
"learning_rate": 1.2458232318321306e-06, |
|
"loss": 0.0336, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5421511627906976, |
|
"grad_norm": 1.211215207564261, |
|
"learning_rate": 1.238293093195122e-06, |
|
"loss": 0.052, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.5436046511627906, |
|
"grad_norm": 1.394715555164229, |
|
"learning_rate": 1.2307825632057952e-06, |
|
"loss": 0.0753, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.5450581395348837, |
|
"grad_norm": 1.3381942952433081, |
|
"learning_rate": 1.2232916810142886e-06, |
|
"loss": 0.058, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.5465116279069768, |
|
"grad_norm": 1.7578672319163982, |
|
"learning_rate": 1.2158204856683176e-06, |
|
"loss": 0.0657, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.5479651162790697, |
|
"grad_norm": 1.5353885598622956, |
|
"learning_rate": 1.2083690161129808e-06, |
|
"loss": 0.059, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.5494186046511627, |
|
"grad_norm": 1.405316093009901, |
|
"learning_rate": 1.2009373111905487e-06, |
|
"loss": 0.0579, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.5508720930232558, |
|
"grad_norm": 1.607692815378601, |
|
"learning_rate": 1.1935254096402655e-06, |
|
"loss": 0.0653, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.552325581395349, |
|
"grad_norm": 1.497479074300615, |
|
"learning_rate": 1.1861333500981449e-06, |
|
"loss": 0.0523, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.5537790697674418, |
|
"grad_norm": 1.1203473988887975, |
|
"learning_rate": 1.1787611710967751e-06, |
|
"loss": 0.0452, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.5552325581395348, |
|
"grad_norm": 1.4222136382752362, |
|
"learning_rate": 1.1714089110651071e-06, |
|
"loss": 0.0635, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.556686046511628, |
|
"grad_norm": 0.9658885164743157, |
|
"learning_rate": 1.1640766083282662e-06, |
|
"loss": 0.0771, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.558139534883721, |
|
"grad_norm": 1.3063613380960122, |
|
"learning_rate": 1.1567643011073393e-06, |
|
"loss": 0.0573, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.559593023255814, |
|
"grad_norm": 1.6416890877169144, |
|
"learning_rate": 1.1494720275191901e-06, |
|
"loss": 0.0616, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.5610465116279069, |
|
"grad_norm": 1.3817386853715168, |
|
"learning_rate": 1.1421998255762468e-06, |
|
"loss": 0.0503, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 1.3346841117708024, |
|
"learning_rate": 1.134947733186315e-06, |
|
"loss": 0.047, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.5639534883720931, |
|
"grad_norm": 1.3343322414067098, |
|
"learning_rate": 1.127715788152372e-06, |
|
"loss": 0.0636, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.565406976744186, |
|
"grad_norm": 1.7154646419432018, |
|
"learning_rate": 1.1205040281723728e-06, |
|
"loss": 0.0645, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.566860465116279, |
|
"grad_norm": 1.4443691543089068, |
|
"learning_rate": 1.1133124908390575e-06, |
|
"loss": 0.0569, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.568313953488372, |
|
"grad_norm": 1.9549846970535834, |
|
"learning_rate": 1.106141213639747e-06, |
|
"loss": 0.0843, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.5697674418604652, |
|
"grad_norm": 1.4036582458578477, |
|
"learning_rate": 1.0989902339561554e-06, |
|
"loss": 0.0541, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.5712209302325582, |
|
"grad_norm": 1.7620858308016654, |
|
"learning_rate": 1.0918595890641891e-06, |
|
"loss": 0.0571, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.572674418604651, |
|
"grad_norm": 2.0116431611899657, |
|
"learning_rate": 1.0847493161337602e-06, |
|
"loss": 0.0509, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.5741279069767442, |
|
"grad_norm": 1.3745830759736906, |
|
"learning_rate": 1.077659452228581e-06, |
|
"loss": 0.0506, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.5755813953488373, |
|
"grad_norm": 1.491007758939216, |
|
"learning_rate": 1.0705900343059856e-06, |
|
"loss": 0.0615, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.5770348837209303, |
|
"grad_norm": 1.369265137859038, |
|
"learning_rate": 1.0635410992167212e-06, |
|
"loss": 0.0674, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.5784883720930232, |
|
"grad_norm": 1.0623500340337344, |
|
"learning_rate": 1.0565126837047718e-06, |
|
"loss": 0.0467, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.5799418604651163, |
|
"grad_norm": 1.3868730182724978, |
|
"learning_rate": 1.049504824407152e-06, |
|
"loss": 0.0583, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.5813953488372094, |
|
"grad_norm": 1.5500696399009597, |
|
"learning_rate": 1.04251755785373e-06, |
|
"loss": 0.054, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.5828488372093024, |
|
"grad_norm": 1.416069042767884, |
|
"learning_rate": 1.0355509204670234e-06, |
|
"loss": 0.0462, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.5843023255813953, |
|
"grad_norm": 1.2069842940803852, |
|
"learning_rate": 1.0286049485620213e-06, |
|
"loss": 0.0483, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.5857558139534884, |
|
"grad_norm": 1.6409720581547536, |
|
"learning_rate": 1.0216796783459866e-06, |
|
"loss": 0.0489, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.5872093023255816, |
|
"grad_norm": 1.7346156581015553, |
|
"learning_rate": 1.0147751459182737e-06, |
|
"loss": 0.0452, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.5886627906976745, |
|
"grad_norm": 1.304713086631584, |
|
"learning_rate": 1.007891387270134e-06, |
|
"loss": 0.0652, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.5901162790697674, |
|
"grad_norm": 1.169058686538856, |
|
"learning_rate": 1.001028438284533e-06, |
|
"loss": 0.0395, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.5915697674418605, |
|
"grad_norm": 1.581843571962872, |
|
"learning_rate": 9.941863347359597e-07, |
|
"loss": 0.0593, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.5930232558139537, |
|
"grad_norm": 1.492544567921207, |
|
"learning_rate": 9.873651122902472e-07, |
|
"loss": 0.0834, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.5944767441860463, |
|
"grad_norm": 1.459822032594516, |
|
"learning_rate": 9.805648065043745e-07, |
|
"loss": 0.0511, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.5959302325581395, |
|
"grad_norm": 1.3911943572138405, |
|
"learning_rate": 9.737854528262953e-07, |
|
"loss": 0.0531, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.5973837209302326, |
|
"grad_norm": 1.0428194925897607, |
|
"learning_rate": 9.670270865947406e-07, |
|
"loss": 0.0646, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.5988372093023255, |
|
"grad_norm": 1.3576620238676198, |
|
"learning_rate": 9.602897430390456e-07, |
|
"loss": 0.0471, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6002906976744184, |
|
"grad_norm": 1.5766868187890286, |
|
"learning_rate": 9.53573457278954e-07, |
|
"loss": 0.0637, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.6017441860465116, |
|
"grad_norm": 1.4800012500421134, |
|
"learning_rate": 9.468782643244484e-07, |
|
"loss": 0.0695, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.6031976744186047, |
|
"grad_norm": 1.7029559921767845, |
|
"learning_rate": 9.40204199075555e-07, |
|
"loss": 0.0608, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.6046511627906976, |
|
"grad_norm": 1.1192989686723849, |
|
"learning_rate": 9.335512963221732e-07, |
|
"loss": 0.0501, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.6061046511627906, |
|
"grad_norm": 1.392049569057171, |
|
"learning_rate": 9.269195907438843e-07, |
|
"loss": 0.0956, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.6075581395348837, |
|
"grad_norm": 1.4377153631105137, |
|
"learning_rate": 9.203091169097761e-07, |
|
"loss": 0.0639, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.6090116279069768, |
|
"grad_norm": 1.4194367382834814, |
|
"learning_rate": 9.137199092782617e-07, |
|
"loss": 0.0504, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.6104651162790697, |
|
"grad_norm": 1.0856317537640523, |
|
"learning_rate": 9.071520021969027e-07, |
|
"loss": 0.0385, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.6119186046511627, |
|
"grad_norm": 1.3095443403571647, |
|
"learning_rate": 9.006054299022227e-07, |
|
"loss": 0.058, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.6133720930232558, |
|
"grad_norm": 1.377541034515128, |
|
"learning_rate": 8.940802265195375e-07, |
|
"loss": 0.0688, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.614825581395349, |
|
"grad_norm": 1.1095826182633532, |
|
"learning_rate": 8.875764260627695e-07, |
|
"loss": 0.0473, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.6162790697674418, |
|
"grad_norm": 1.5224215809822206, |
|
"learning_rate": 8.810940624342784e-07, |
|
"loss": 0.0825, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.6177325581395348, |
|
"grad_norm": 1.8482839553995425, |
|
"learning_rate": 8.746331694246756e-07, |
|
"loss": 0.0744, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.619186046511628, |
|
"grad_norm": 1.6017104682800916, |
|
"learning_rate": 8.681937807126567e-07, |
|
"loss": 0.069, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.620639534883721, |
|
"grad_norm": 1.2376828031612417, |
|
"learning_rate": 8.617759298648182e-07, |
|
"loss": 0.0495, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.622093023255814, |
|
"grad_norm": 1.284668072013395, |
|
"learning_rate": 8.553796503354899e-07, |
|
"loss": 0.0771, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.6235465116279069, |
|
"grad_norm": 1.3931203949754822, |
|
"learning_rate": 8.490049754665541e-07, |
|
"loss": 0.0574, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 1.4765649037778599, |
|
"learning_rate": 8.426519384872733e-07, |
|
"loss": 0.0682, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.6264534883720931, |
|
"grad_norm": 1.181845082363929, |
|
"learning_rate": 8.363205725141238e-07, |
|
"loss": 0.0494, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.627906976744186, |
|
"grad_norm": 1.3928319550967374, |
|
"learning_rate": 8.30010910550611e-07, |
|
"loss": 0.059, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.629360465116279, |
|
"grad_norm": 1.1141954284915958, |
|
"learning_rate": 8.237229854871076e-07, |
|
"loss": 0.0471, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.630813953488372, |
|
"grad_norm": 1.4876650888680079, |
|
"learning_rate": 8.174568301006763e-07, |
|
"loss": 0.0805, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.6322674418604652, |
|
"grad_norm": 1.4355426352193548, |
|
"learning_rate": 8.11212477054904e-07, |
|
"loss": 0.0412, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.6337209302325582, |
|
"grad_norm": 1.3219907781653484, |
|
"learning_rate": 8.049899588997246e-07, |
|
"loss": 0.0644, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.635174418604651, |
|
"grad_norm": 1.2661461720313956, |
|
"learning_rate": 7.987893080712572e-07, |
|
"loss": 0.0647, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.6366279069767442, |
|
"grad_norm": 1.4112271603442867, |
|
"learning_rate": 7.926105568916292e-07, |
|
"loss": 0.0559, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.6380813953488373, |
|
"grad_norm": 1.4885958147808351, |
|
"learning_rate": 7.864537375688164e-07, |
|
"loss": 0.0665, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.6395348837209303, |
|
"grad_norm": 1.2255901712760742, |
|
"learning_rate": 7.803188821964652e-07, |
|
"loss": 0.0447, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.6409883720930232, |
|
"grad_norm": 1.5452975728529494, |
|
"learning_rate": 7.742060227537351e-07, |
|
"loss": 0.067, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.6424418604651163, |
|
"grad_norm": 1.3595014210127743, |
|
"learning_rate": 7.681151911051232e-07, |
|
"loss": 0.0613, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.6438953488372094, |
|
"grad_norm": 1.4222742042571062, |
|
"learning_rate": 7.620464190003074e-07, |
|
"loss": 0.0725, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 1.6453488372093024, |
|
"grad_norm": 1.2218037474493977, |
|
"learning_rate": 7.559997380739714e-07, |
|
"loss": 0.0518, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.6468023255813953, |
|
"grad_norm": 1.7245104586600535, |
|
"learning_rate": 7.499751798456456e-07, |
|
"loss": 0.072, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 1.6482558139534884, |
|
"grad_norm": 1.3548373797393403, |
|
"learning_rate": 7.439727757195408e-07, |
|
"loss": 0.0655, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.6497093023255816, |
|
"grad_norm": 1.4843313971694068, |
|
"learning_rate": 7.379925569843877e-07, |
|
"loss": 0.0701, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.6511627906976745, |
|
"grad_norm": 1.2524145522085295, |
|
"learning_rate": 7.320345548132679e-07, |
|
"loss": 0.0429, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.6526162790697674, |
|
"grad_norm": 1.3051416609601543, |
|
"learning_rate": 7.260988002634584e-07, |
|
"loss": 0.0709, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 1.6540697674418605, |
|
"grad_norm": 1.1280777447038597, |
|
"learning_rate": 7.201853242762613e-07, |
|
"loss": 0.0653, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.6555232558139537, |
|
"grad_norm": 1.524319939704524, |
|
"learning_rate": 7.142941576768526e-07, |
|
"loss": 0.0671, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 1.6569767441860463, |
|
"grad_norm": 1.1315251169649219, |
|
"learning_rate": 7.084253311741101e-07, |
|
"loss": 0.0415, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.6584302325581395, |
|
"grad_norm": 1.5134458313250108, |
|
"learning_rate": 7.025788753604668e-07, |
|
"loss": 0.0507, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 1.6598837209302326, |
|
"grad_norm": 1.3932510174316692, |
|
"learning_rate": 6.967548207117364e-07, |
|
"loss": 0.0653, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.6613372093023255, |
|
"grad_norm": 1.369278789308021, |
|
"learning_rate": 6.909531975869682e-07, |
|
"loss": 0.0602, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 1.6627906976744184, |
|
"grad_norm": 1.335104392777719, |
|
"learning_rate": 6.851740362282788e-07, |
|
"loss": 0.0505, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.6642441860465116, |
|
"grad_norm": 1.7252990354329707, |
|
"learning_rate": 6.794173667606995e-07, |
|
"loss": 0.0679, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.6656976744186047, |
|
"grad_norm": 1.3931797125192535, |
|
"learning_rate": 6.736832191920184e-07, |
|
"loss": 0.0689, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.6671511627906976, |
|
"grad_norm": 1.270578497522883, |
|
"learning_rate": 6.679716234126243e-07, |
|
"loss": 0.0663, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 1.6686046511627906, |
|
"grad_norm": 1.1833811909595593, |
|
"learning_rate": 6.622826091953483e-07, |
|
"loss": 0.043, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.6700581395348837, |
|
"grad_norm": 1.2856539604876163, |
|
"learning_rate": 6.566162061953141e-07, |
|
"loss": 0.0689, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 1.6715116279069768, |
|
"grad_norm": 1.2026108031855738, |
|
"learning_rate": 6.50972443949775e-07, |
|
"loss": 0.0436, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.6729651162790697, |
|
"grad_norm": 1.4050042046526028, |
|
"learning_rate": 6.453513518779708e-07, |
|
"loss": 0.0683, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 1.6744186046511627, |
|
"grad_norm": 1.172885792221737, |
|
"learning_rate": 6.397529592809615e-07, |
|
"loss": 0.0457, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.6758720930232558, |
|
"grad_norm": 1.6645132359630175, |
|
"learning_rate": 6.341772953414893e-07, |
|
"loss": 0.0656, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 1.677325581395349, |
|
"grad_norm": 1.203947351057658, |
|
"learning_rate": 6.286243891238114e-07, |
|
"loss": 0.0508, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.6787790697674418, |
|
"grad_norm": 1.5653427701253289, |
|
"learning_rate": 6.23094269573562e-07, |
|
"loss": 0.0581, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.6802325581395348, |
|
"grad_norm": 1.3692678493335273, |
|
"learning_rate": 6.175869655175898e-07, |
|
"loss": 0.0574, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.681686046511628, |
|
"grad_norm": 1.223293248941118, |
|
"learning_rate": 6.121025056638186e-07, |
|
"loss": 0.0645, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 1.683139534883721, |
|
"grad_norm": 1.1984686741863968, |
|
"learning_rate": 6.06640918601088e-07, |
|
"loss": 0.0492, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.684593023255814, |
|
"grad_norm": 1.8607181067232295, |
|
"learning_rate": 6.012022327990097e-07, |
|
"loss": 0.0717, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 1.6860465116279069, |
|
"grad_norm": 1.3129459081678303, |
|
"learning_rate": 5.957864766078186e-07, |
|
"loss": 0.0527, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 1.2419623459260931, |
|
"learning_rate": 5.903936782582253e-07, |
|
"loss": 0.0381, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 1.6889534883720931, |
|
"grad_norm": 1.5778350174178633, |
|
"learning_rate": 5.850238658612667e-07, |
|
"loss": 0.0657, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.690406976744186, |
|
"grad_norm": 1.227703263910834, |
|
"learning_rate": 5.796770674081592e-07, |
|
"loss": 0.0578, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 1.691860465116279, |
|
"grad_norm": 1.2163654872782266, |
|
"learning_rate": 5.743533107701593e-07, |
|
"loss": 0.0622, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.693313953488372, |
|
"grad_norm": 1.7561357257514982, |
|
"learning_rate": 5.690526236984079e-07, |
|
"loss": 0.0706, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.6947674418604652, |
|
"grad_norm": 1.236485378770411, |
|
"learning_rate": 5.637750338237963e-07, |
|
"loss": 0.0476, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.6962209302325582, |
|
"grad_norm": 1.273190952518188, |
|
"learning_rate": 5.585205686568123e-07, |
|
"loss": 0.0572, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 1.697674418604651, |
|
"grad_norm": 1.3840145983571588, |
|
"learning_rate": 5.532892555874059e-07, |
|
"loss": 0.0498, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.6991279069767442, |
|
"grad_norm": 1.6682935105457355, |
|
"learning_rate": 5.48081121884838e-07, |
|
"loss": 0.0501, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.7005813953488373, |
|
"grad_norm": 1.7215714569023008, |
|
"learning_rate": 5.428961946975464e-07, |
|
"loss": 0.0621, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7020348837209303, |
|
"grad_norm": 1.2802300699738327, |
|
"learning_rate": 5.377345010529977e-07, |
|
"loss": 0.0507, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 1.7034883720930232, |
|
"grad_norm": 1.179288667096262, |
|
"learning_rate": 5.325960678575498e-07, |
|
"loss": 0.0622, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.7049418604651163, |
|
"grad_norm": 1.397312032777205, |
|
"learning_rate": 5.274809218963089e-07, |
|
"loss": 0.048, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 1.7063953488372094, |
|
"grad_norm": 1.0705846916376756, |
|
"learning_rate": 5.22389089832997e-07, |
|
"loss": 0.0462, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.7078488372093024, |
|
"grad_norm": 1.5708429842240574, |
|
"learning_rate": 5.173205982098018e-07, |
|
"loss": 0.0777, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.7093023255813953, |
|
"grad_norm": 1.6720126005052158, |
|
"learning_rate": 5.122754734472496e-07, |
|
"loss": 0.0751, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.7107558139534884, |
|
"grad_norm": 1.4286721495638015, |
|
"learning_rate": 5.072537418440565e-07, |
|
"loss": 0.0509, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 1.7122093023255816, |
|
"grad_norm": 1.594631174541215, |
|
"learning_rate": 5.022554295770038e-07, |
|
"loss": 0.0578, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.7136627906976745, |
|
"grad_norm": 1.2897023400093752, |
|
"learning_rate": 4.972805627007881e-07, |
|
"loss": 0.059, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 1.7151162790697674, |
|
"grad_norm": 1.0706258806400843, |
|
"learning_rate": 4.92329167147898e-07, |
|
"loss": 0.0403, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7165697674418605, |
|
"grad_norm": 1.247773190344945, |
|
"learning_rate": 4.874012687284685e-07, |
|
"loss": 0.0588, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 1.7180232558139537, |
|
"grad_norm": 1.1610372428474574, |
|
"learning_rate": 4.824968931301549e-07, |
|
"loss": 0.0493, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.7194767441860463, |
|
"grad_norm": 0.9925566701240426, |
|
"learning_rate": 4.776160659179918e-07, |
|
"loss": 0.0423, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 1.7209302325581395, |
|
"grad_norm": 1.2198238537851194, |
|
"learning_rate": 4.727588125342669e-07, |
|
"loss": 0.0434, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.7223837209302326, |
|
"grad_norm": 1.0814209449015801, |
|
"learning_rate": 4.679251582983807e-07, |
|
"loss": 0.0515, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.7238372093023255, |
|
"grad_norm": 1.253442945461474, |
|
"learning_rate": 4.631151284067209e-07, |
|
"loss": 0.0401, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.7252906976744184, |
|
"grad_norm": 1.6572342387258825, |
|
"learning_rate": 4.583287479325266e-07, |
|
"loss": 0.0851, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 1.7267441860465116, |
|
"grad_norm": 1.5674512288501314, |
|
"learning_rate": 4.5356604182576315e-07, |
|
"loss": 0.0748, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.7281976744186047, |
|
"grad_norm": 1.4246277140745789, |
|
"learning_rate": 4.4882703491298364e-07, |
|
"loss": 0.0507, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 1.7296511627906976, |
|
"grad_norm": 1.4692955325669175, |
|
"learning_rate": 4.4411175189720935e-07, |
|
"loss": 0.0599, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7311046511627906, |
|
"grad_norm": 1.3880435931686506, |
|
"learning_rate": 4.3942021735779163e-07, |
|
"loss": 0.0801, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 1.7325581395348837, |
|
"grad_norm": 1.2441163102111796, |
|
"learning_rate": 4.347524557502919e-07, |
|
"loss": 0.0656, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.7340116279069768, |
|
"grad_norm": 1.333242236616478, |
|
"learning_rate": 4.301084914063475e-07, |
|
"loss": 0.051, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 1.7354651162790697, |
|
"grad_norm": 1.48014650555478, |
|
"learning_rate": 4.2548834853355036e-07, |
|
"loss": 0.0678, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.7369186046511627, |
|
"grad_norm": 1.4136502655814862, |
|
"learning_rate": 4.2089205121531475e-07, |
|
"loss": 0.0423, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.7383720930232558, |
|
"grad_norm": 1.466619588107673, |
|
"learning_rate": 4.163196234107603e-07, |
|
"loss": 0.0422, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.739825581395349, |
|
"grad_norm": 1.0867167600455685, |
|
"learning_rate": 4.117710889545767e-07, |
|
"loss": 0.0465, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 1.7412790697674418, |
|
"grad_norm": 1.1097337994586927, |
|
"learning_rate": 4.0724647155690855e-07, |
|
"loss": 0.049, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.7427325581395348, |
|
"grad_norm": 1.4162752653560682, |
|
"learning_rate": 4.0274579480322485e-07, |
|
"loss": 0.0706, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 1.744186046511628, |
|
"grad_norm": 1.6314238706254214, |
|
"learning_rate": 3.9826908215420344e-07, |
|
"loss": 0.0648, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.744186046511628, |
|
"eval_loss": 0.13038784265518188, |
|
"eval_runtime": 2.2036, |
|
"eval_samples_per_second": 25.414, |
|
"eval_steps_per_second": 6.353, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.745639534883721, |
|
"grad_norm": 1.4924894866086462, |
|
"learning_rate": 3.938163569455999e-07, |
|
"loss": 0.065, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 1.747093023255814, |
|
"grad_norm": 1.2812945325918792, |
|
"learning_rate": 3.893876423881343e-07, |
|
"loss": 0.056, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.7485465116279069, |
|
"grad_norm": 1.4847911329450767, |
|
"learning_rate": 3.8498296156736336e-07, |
|
"loss": 0.0609, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.1933603478148918, |
|
"learning_rate": 3.8060233744356634e-07, |
|
"loss": 0.0528, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.7514534883720931, |
|
"grad_norm": 1.2151612672851098, |
|
"learning_rate": 3.7624579285161945e-07, |
|
"loss": 0.0448, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.752906976744186, |
|
"grad_norm": 1.2721240144688912, |
|
"learning_rate": 3.719133505008793e-07, |
|
"loss": 0.0638, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.754360465116279, |
|
"grad_norm": 1.5815876693660877, |
|
"learning_rate": 3.67605032975068e-07, |
|
"loss": 0.0611, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 1.755813953488372, |
|
"grad_norm": 1.4791508048535231, |
|
"learning_rate": 3.633208627321483e-07, |
|
"loss": 0.0565, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.7572674418604652, |
|
"grad_norm": 1.9027980020443158, |
|
"learning_rate": 3.590608621042141e-07, |
|
"loss": 0.0835, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 1.7587209302325582, |
|
"grad_norm": 1.2720247109280667, |
|
"learning_rate": 3.548250532973663e-07, |
|
"loss": 0.041, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.760174418604651, |
|
"grad_norm": 1.3313592730700925, |
|
"learning_rate": 3.50613458391606e-07, |
|
"loss": 0.0662, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 1.7616279069767442, |
|
"grad_norm": 1.742038899061449, |
|
"learning_rate": 3.464260993407098e-07, |
|
"loss": 0.0625, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.7630813953488373, |
|
"grad_norm": 1.313671785304789, |
|
"learning_rate": 3.422629979721226e-07, |
|
"loss": 0.058, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 1.7645348837209303, |
|
"grad_norm": 1.4381256471907784, |
|
"learning_rate": 3.381241759868403e-07, |
|
"loss": 0.0577, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.7659883720930232, |
|
"grad_norm": 1.1247184686299345, |
|
"learning_rate": 3.340096549592997e-07, |
|
"loss": 0.0381, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.7674418604651163, |
|
"grad_norm": 1.3749991805427006, |
|
"learning_rate": 3.299194563372604e-07, |
|
"loss": 0.05, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.7688953488372094, |
|
"grad_norm": 1.2620255884743692, |
|
"learning_rate": 3.258536014417002e-07, |
|
"loss": 0.0404, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 1.7703488372093024, |
|
"grad_norm": 1.4310474211303066, |
|
"learning_rate": 3.2181211146669835e-07, |
|
"loss": 0.0637, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.7718023255813953, |
|
"grad_norm": 1.372997486141924, |
|
"learning_rate": 3.177950074793279e-07, |
|
"loss": 0.059, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 1.7732558139534884, |
|
"grad_norm": 1.4514336664056386, |
|
"learning_rate": 3.1380231041954366e-07, |
|
"loss": 0.0802, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.7747093023255816, |
|
"grad_norm": 1.294363423127495, |
|
"learning_rate": 3.0983404110007775e-07, |
|
"loss": 0.0463, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 1.7761627906976745, |
|
"grad_norm": 1.2489012530557877, |
|
"learning_rate": 3.05890220206323e-07, |
|
"loss": 0.0758, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.7776162790697674, |
|
"grad_norm": 1.3733868017342423, |
|
"learning_rate": 3.0197086829623524e-07, |
|
"loss": 0.0433, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 1.7790697674418605, |
|
"grad_norm": 1.2696169610865513, |
|
"learning_rate": 2.980760058002163e-07, |
|
"loss": 0.0695, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.7805232558139537, |
|
"grad_norm": 1.1885642085760084, |
|
"learning_rate": 2.9420565302101467e-07, |
|
"loss": 0.0689, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.7819767441860463, |
|
"grad_norm": 1.7975663880717683, |
|
"learning_rate": 2.9035983013361524e-07, |
|
"loss": 0.0832, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.7834302325581395, |
|
"grad_norm": 1.4668495811562468, |
|
"learning_rate": 2.8653855718513867e-07, |
|
"loss": 0.0839, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 1.7848837209302326, |
|
"grad_norm": 1.7943430675185803, |
|
"learning_rate": 2.827418540947313e-07, |
|
"loss": 0.0734, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.7863372093023255, |
|
"grad_norm": 1.414532030794273, |
|
"learning_rate": 2.7896974065346636e-07, |
|
"loss": 0.0591, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 1.7877906976744184, |
|
"grad_norm": 1.3282221407021015, |
|
"learning_rate": 2.7522223652423627e-07, |
|
"loss": 0.0574, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.7892441860465116, |
|
"grad_norm": 1.4616173540064281, |
|
"learning_rate": 2.7149936124165556e-07, |
|
"loss": 0.088, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 1.7906976744186047, |
|
"grad_norm": 1.62055262401236, |
|
"learning_rate": 2.67801134211953e-07, |
|
"loss": 0.0555, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.7921511627906976, |
|
"grad_norm": 1.5054710143448626, |
|
"learning_rate": 2.6412757471287633e-07, |
|
"loss": 0.0355, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 1.7936046511627906, |
|
"grad_norm": 1.2551896585231108, |
|
"learning_rate": 2.6047870189358504e-07, |
|
"loss": 0.0619, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.7950581395348837, |
|
"grad_norm": 1.4827616308813487, |
|
"learning_rate": 2.568545347745582e-07, |
|
"loss": 0.0699, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.7965116279069768, |
|
"grad_norm": 1.1634489682279106, |
|
"learning_rate": 2.5325509224748965e-07, |
|
"loss": 0.0424, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.7979651162790697, |
|
"grad_norm": 1.2805820993346737, |
|
"learning_rate": 2.4968039307519174e-07, |
|
"loss": 0.0738, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 1.7994186046511627, |
|
"grad_norm": 1.2970028495965267, |
|
"learning_rate": 2.461304558914973e-07, |
|
"loss": 0.0483, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.8008720930232558, |
|
"grad_norm": 1.3774251049609605, |
|
"learning_rate": 2.426052992011613e-07, |
|
"loss": 0.0594, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 1.802325581395349, |
|
"grad_norm": 1.2741543577811263, |
|
"learning_rate": 2.3910494137976526e-07, |
|
"loss": 0.061, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8037790697674418, |
|
"grad_norm": 1.4946869686916726, |
|
"learning_rate": 2.356294006736254e-07, |
|
"loss": 0.0422, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 1.8052325581395348, |
|
"grad_norm": 1.1976454792671984, |
|
"learning_rate": 2.321786951996885e-07, |
|
"loss": 0.049, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.806686046511628, |
|
"grad_norm": 1.7640200270984723, |
|
"learning_rate": 2.2875284294544663e-07, |
|
"loss": 0.0654, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 1.808139534883721, |
|
"grad_norm": 1.4131166766489878, |
|
"learning_rate": 2.2535186176883771e-07, |
|
"loss": 0.0615, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.809593023255814, |
|
"grad_norm": 1.3382405156149042, |
|
"learning_rate": 2.2197576939815447e-07, |
|
"loss": 0.0596, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.8110465116279069, |
|
"grad_norm": 1.0375341190167515, |
|
"learning_rate": 2.186245834319517e-07, |
|
"loss": 0.0378, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.8125, |
|
"grad_norm": 1.26799708959962, |
|
"learning_rate": 2.152983213389559e-07, |
|
"loss": 0.0612, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 1.8139534883720931, |
|
"grad_norm": 0.9680143896503044, |
|
"learning_rate": 2.1199700045797077e-07, |
|
"loss": 0.0391, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.815406976744186, |
|
"grad_norm": 1.4351941135226602, |
|
"learning_rate": 2.0872063799778908e-07, |
|
"loss": 0.0465, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 1.816860465116279, |
|
"grad_norm": 1.2212499026345323, |
|
"learning_rate": 2.054692510371059e-07, |
|
"loss": 0.0596, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.818313953488372, |
|
"grad_norm": 1.4124067831447158, |
|
"learning_rate": 2.0224285652442332e-07, |
|
"loss": 0.0434, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 1.8197674418604652, |
|
"grad_norm": 1.6900102388116847, |
|
"learning_rate": 1.9904147127796646e-07, |
|
"loss": 0.0633, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.8212209302325582, |
|
"grad_norm": 1.1245930914451603, |
|
"learning_rate": 1.9586511198559422e-07, |
|
"loss": 0.0518, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 1.822674418604651, |
|
"grad_norm": 1.4535155723070285, |
|
"learning_rate": 1.9271379520471366e-07, |
|
"loss": 0.0611, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.8241279069767442, |
|
"grad_norm": 1.1651708233377704, |
|
"learning_rate": 1.8958753736219137e-07, |
|
"loss": 0.0437, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.8255813953488373, |
|
"grad_norm": 1.698268844667189, |
|
"learning_rate": 1.8648635475427112e-07, |
|
"loss": 0.0661, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.8270348837209303, |
|
"grad_norm": 1.2897615950696786, |
|
"learning_rate": 1.8341026354648461e-07, |
|
"loss": 0.0653, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 1.8284883720930232, |
|
"grad_norm": 1.454033537605278, |
|
"learning_rate": 1.8035927977357204e-07, |
|
"loss": 0.0824, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.8299418604651163, |
|
"grad_norm": 1.4782681121621621, |
|
"learning_rate": 1.773334193393944e-07, |
|
"loss": 0.0695, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 1.8313953488372094, |
|
"grad_norm": 1.2472901471200917, |
|
"learning_rate": 1.7433269801685304e-07, |
|
"loss": 0.0535, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.8328488372093024, |
|
"grad_norm": 1.411827026048705, |
|
"learning_rate": 1.713571314478063e-07, |
|
"loss": 0.0558, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 1.8343023255813953, |
|
"grad_norm": 1.3101061668799474, |
|
"learning_rate": 1.684067351429891e-07, |
|
"loss": 0.0847, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.8357558139534884, |
|
"grad_norm": 1.3283465609162761, |
|
"learning_rate": 1.6548152448193021e-07, |
|
"loss": 0.0517, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 1.8372093023255816, |
|
"grad_norm": 1.073779839976608, |
|
"learning_rate": 1.6258151471287397e-07, |
|
"loss": 0.059, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.8386627906976745, |
|
"grad_norm": 1.5768683051386514, |
|
"learning_rate": 1.5970672095269978e-07, |
|
"loss": 0.0715, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.8401162790697674, |
|
"grad_norm": 1.6425224461963919, |
|
"learning_rate": 1.5685715818684332e-07, |
|
"loss": 0.0742, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.8415697674418605, |
|
"grad_norm": 1.2460670529438014, |
|
"learning_rate": 1.540328412692188e-07, |
|
"loss": 0.0509, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 1.8430232558139537, |
|
"grad_norm": 1.1780779637487353, |
|
"learning_rate": 1.512337849221429e-07, |
|
"loss": 0.0558, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.8444767441860463, |
|
"grad_norm": 1.3379396790966653, |
|
"learning_rate": 1.4846000373625325e-07, |
|
"loss": 0.039, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 1.8459302325581395, |
|
"grad_norm": 1.6926159045939793, |
|
"learning_rate": 1.4571151217043944e-07, |
|
"loss": 0.0581, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.8473837209302326, |
|
"grad_norm": 1.358687319927915, |
|
"learning_rate": 1.4298832455176104e-07, |
|
"loss": 0.0442, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 1.8488372093023255, |
|
"grad_norm": 1.1372613393653885, |
|
"learning_rate": 1.4029045507537696e-07, |
|
"loss": 0.0619, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.8502906976744184, |
|
"grad_norm": 1.761528640147089, |
|
"learning_rate": 1.376179178044701e-07, |
|
"loss": 0.1053, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 1.8517441860465116, |
|
"grad_norm": 1.103400969846263, |
|
"learning_rate": 1.3497072667017497e-07, |
|
"loss": 0.0444, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.8531976744186047, |
|
"grad_norm": 1.6546592470023223, |
|
"learning_rate": 1.3234889547150132e-07, |
|
"loss": 0.0705, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.8546511627906976, |
|
"grad_norm": 1.4348034154756075, |
|
"learning_rate": 1.297524378752696e-07, |
|
"loss": 0.0479, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.8561046511627906, |
|
"grad_norm": 1.571516172687651, |
|
"learning_rate": 1.2718136741603216e-07, |
|
"loss": 0.0694, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 1.8575581395348837, |
|
"grad_norm": 1.3840050658107048, |
|
"learning_rate": 1.2463569749600613e-07, |
|
"loss": 0.035, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.8590116279069768, |
|
"grad_norm": 1.4947152523030978, |
|
"learning_rate": 1.2211544138500452e-07, |
|
"loss": 0.0561, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 1.8604651162790697, |
|
"grad_norm": 1.1233071008724007, |
|
"learning_rate": 1.196206122203647e-07, |
|
"loss": 0.036, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.8619186046511627, |
|
"grad_norm": 1.314094801957361, |
|
"learning_rate": 1.1715122300688109e-07, |
|
"loss": 0.0535, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 1.8633720930232558, |
|
"grad_norm": 1.2805535837964788, |
|
"learning_rate": 1.1470728661673814e-07, |
|
"loss": 0.0556, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.864825581395349, |
|
"grad_norm": 1.6182345359056034, |
|
"learning_rate": 1.122888157894414e-07, |
|
"loss": 0.0446, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 1.8662790697674418, |
|
"grad_norm": 1.5370590795498948, |
|
"learning_rate": 1.0989582313175373e-07, |
|
"loss": 0.0648, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.8677325581395348, |
|
"grad_norm": 1.2775753759686193, |
|
"learning_rate": 1.0752832111762479e-07, |
|
"loss": 0.0485, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.869186046511628, |
|
"grad_norm": 1.3721105986516784, |
|
"learning_rate": 1.0518632208813274e-07, |
|
"loss": 0.0461, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.870639534883721, |
|
"grad_norm": 1.3509315346274664, |
|
"learning_rate": 1.0286983825141373e-07, |
|
"loss": 0.0591, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 1.872093023255814, |
|
"grad_norm": 1.3166068602170886, |
|
"learning_rate": 1.0057888168260311e-07, |
|
"loss": 0.0667, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.8735465116279069, |
|
"grad_norm": 1.0841449480013747, |
|
"learning_rate": 9.831346432376765e-08, |
|
"loss": 0.0296, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 1.4139829336218244, |
|
"learning_rate": 9.607359798384785e-08, |
|
"loss": 0.0899, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.8764534883720931, |
|
"grad_norm": 1.3681050302392366, |
|
"learning_rate": 9.385929433859353e-08, |
|
"loss": 0.04, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 1.877906976744186, |
|
"grad_norm": 1.4537407830939912, |
|
"learning_rate": 9.167056493050497e-08, |
|
"loss": 0.0582, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.879360465116279, |
|
"grad_norm": 1.437948138954234, |
|
"learning_rate": 8.95074211687702e-08, |
|
"loss": 0.0558, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 1.880813953488372, |
|
"grad_norm": 1.2655853734528568, |
|
"learning_rate": 8.736987432920785e-08, |
|
"loss": 0.0562, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.8822674418604652, |
|
"grad_norm": 1.2886785360003277, |
|
"learning_rate": 8.525793555420714e-08, |
|
"loss": 0.0642, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.8837209302325582, |
|
"grad_norm": 1.5650728826423228, |
|
"learning_rate": 8.317161585266964e-08, |
|
"loss": 0.048, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.885174418604651, |
|
"grad_norm": 1.75943043500276, |
|
"learning_rate": 8.111092609995375e-08, |
|
"loss": 0.0516, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 1.8866279069767442, |
|
"grad_norm": 2.137820572547727, |
|
"learning_rate": 7.907587703781583e-08, |
|
"loss": 0.1197, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.8880813953488373, |
|
"grad_norm": 1.4094974542843408, |
|
"learning_rate": 7.706647927435528e-08, |
|
"loss": 0.0446, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 1.8895348837209303, |
|
"grad_norm": 1.5046480179949109, |
|
"learning_rate": 7.508274328395848e-08, |
|
"loss": 0.0717, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8909883720930232, |
|
"grad_norm": 0.9467909119425738, |
|
"learning_rate": 7.312467940724488e-08, |
|
"loss": 0.0391, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 1.8924418604651163, |
|
"grad_norm": 1.159518695387788, |
|
"learning_rate": 7.119229785101322e-08, |
|
"loss": 0.0499, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.8938953488372094, |
|
"grad_norm": 1.1359551931047827, |
|
"learning_rate": 6.928560868818823e-08, |
|
"loss": 0.0528, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 1.8953488372093024, |
|
"grad_norm": 1.3754138749662996, |
|
"learning_rate": 6.74046218577673e-08, |
|
"loss": 0.0901, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.8968023255813953, |
|
"grad_norm": 1.1173372045284649, |
|
"learning_rate": 6.554934716476946e-08, |
|
"loss": 0.0416, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.8982558139534884, |
|
"grad_norm": 1.6715320242870557, |
|
"learning_rate": 6.371979428018371e-08, |
|
"loss": 0.051, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.8997093023255816, |
|
"grad_norm": 1.6108507509250944, |
|
"learning_rate": 6.191597274091965e-08, |
|
"loss": 0.048, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 1.9011627906976745, |
|
"grad_norm": 1.1025808474832854, |
|
"learning_rate": 6.01378919497575e-08, |
|
"loss": 0.0473, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.9026162790697674, |
|
"grad_norm": 1.1565768061022645, |
|
"learning_rate": 5.838556117529759e-08, |
|
"loss": 0.0532, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 1.9040697674418605, |
|
"grad_norm": 1.3641230663168908, |
|
"learning_rate": 5.6658989551913736e-08, |
|
"loss": 0.059, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9055232558139537, |
|
"grad_norm": 1.3356709104324593, |
|
"learning_rate": 5.495818607970549e-08, |
|
"loss": 0.0612, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 1.9069767441860463, |
|
"grad_norm": 1.660638299272094, |
|
"learning_rate": 5.3283159624448745e-08, |
|
"loss": 0.0496, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.9084302325581395, |
|
"grad_norm": 1.7498825229557364, |
|
"learning_rate": 5.16339189175552e-08, |
|
"loss": 0.0577, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.9098837209302326, |
|
"grad_norm": 1.5655598568640097, |
|
"learning_rate": 5.0010472556019096e-08, |
|
"loss": 0.0553, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.9113372093023255, |
|
"grad_norm": 1.3860084050687909, |
|
"learning_rate": 4.841282900237942e-08, |
|
"loss": 0.0631, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.9127906976744184, |
|
"grad_norm": 1.3887503495723006, |
|
"learning_rate": 4.684099658467223e-08, |
|
"loss": 0.0699, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.9142441860465116, |
|
"grad_norm": 1.2678955663748521, |
|
"learning_rate": 4.529498349638728e-08, |
|
"loss": 0.0402, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 1.9156976744186047, |
|
"grad_norm": 1.284564731699548, |
|
"learning_rate": 4.377479779642535e-08, |
|
"loss": 0.0502, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.9171511627906976, |
|
"grad_norm": 1.2529895690190205, |
|
"learning_rate": 4.228044740905879e-08, |
|
"loss": 0.0626, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 1.9186046511627906, |
|
"grad_norm": 1.5762385143793198, |
|
"learning_rate": 4.081194012388601e-08, |
|
"loss": 0.0627, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9200581395348837, |
|
"grad_norm": 1.6231285336791623, |
|
"learning_rate": 3.936928359579539e-08, |
|
"loss": 0.0667, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 1.9215116279069768, |
|
"grad_norm": 1.5262840772998052, |
|
"learning_rate": 3.7952485344921465e-08, |
|
"loss": 0.0425, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.9229651162790697, |
|
"grad_norm": 1.145845214252006, |
|
"learning_rate": 3.656155275660711e-08, |
|
"loss": 0.0455, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 1.9244186046511627, |
|
"grad_norm": 2.0089578761429916, |
|
"learning_rate": 3.5196493081366966e-08, |
|
"loss": 0.0779, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.9258720930232558, |
|
"grad_norm": 1.613161432653411, |
|
"learning_rate": 3.385731343484633e-08, |
|
"loss": 0.0759, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.927325581395349, |
|
"grad_norm": 1.1432087881946933, |
|
"learning_rate": 3.254402079778618e-08, |
|
"loss": 0.0547, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.9287790697674418, |
|
"grad_norm": 1.4249087522542705, |
|
"learning_rate": 3.125662201598656e-08, |
|
"loss": 0.0663, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 1.9302325581395348, |
|
"grad_norm": 1.2132853467810891, |
|
"learning_rate": 2.9995123800270476e-08, |
|
"loss": 0.0499, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.931686046511628, |
|
"grad_norm": 1.193012210008745, |
|
"learning_rate": 2.8759532726448937e-08, |
|
"loss": 0.0523, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 1.933139534883721, |
|
"grad_norm": 1.358877307198463, |
|
"learning_rate": 2.754985523528708e-08, |
|
"loss": 0.0695, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.934593023255814, |
|
"grad_norm": 1.6866286956608185, |
|
"learning_rate": 2.6366097632469778e-08, |
|
"loss": 0.0625, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 1.9360465116279069, |
|
"grad_norm": 1.585941621289744, |
|
"learning_rate": 2.5208266088569966e-08, |
|
"loss": 0.0511, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.9375, |
|
"grad_norm": 1.3872165917004133, |
|
"learning_rate": 2.4076366639015914e-08, |
|
"loss": 0.0568, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 1.9389534883720931, |
|
"grad_norm": 1.457747420185785, |
|
"learning_rate": 2.2970405184058463e-08, |
|
"loss": 0.0547, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 1.940406976744186, |
|
"grad_norm": 1.1949184875255823, |
|
"learning_rate": 2.1890387488742726e-08, |
|
"loss": 0.0399, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.941860465116279, |
|
"grad_norm": 1.5940091425661185, |
|
"learning_rate": 2.083631918287643e-08, |
|
"loss": 0.0518, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.943313953488372, |
|
"grad_norm": 1.382910110487527, |
|
"learning_rate": 1.9808205761001065e-08, |
|
"loss": 0.0532, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 1.9447674418604652, |
|
"grad_norm": 1.4736697924051638, |
|
"learning_rate": 1.880605258236301e-08, |
|
"loss": 0.0588, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 1.9462209302325582, |
|
"grad_norm": 1.295565534954337, |
|
"learning_rate": 1.782986487088467e-08, |
|
"loss": 0.0724, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 1.947674418604651, |
|
"grad_norm": 1.3488063432605657, |
|
"learning_rate": 1.6879647715140613e-08, |
|
"loss": 0.0923, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.9491279069767442, |
|
"grad_norm": 1.3676570070169922, |
|
"learning_rate": 1.5955406068326462e-08, |
|
"loss": 0.0515, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 1.9505813953488373, |
|
"grad_norm": 1.2884297596799819, |
|
"learning_rate": 1.5057144748236162e-08, |
|
"loss": 0.0462, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 1.9520348837209303, |
|
"grad_norm": 1.2443705721719838, |
|
"learning_rate": 1.4184868437236987e-08, |
|
"loss": 0.0576, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 1.9534883720930232, |
|
"grad_norm": 1.3053643973067475, |
|
"learning_rate": 1.333858168224178e-08, |
|
"loss": 0.0537, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.9549418604651163, |
|
"grad_norm": 1.546574318041773, |
|
"learning_rate": 1.2518288894690089e-08, |
|
"loss": 0.0372, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.9563953488372094, |
|
"grad_norm": 1.2490005145711904, |
|
"learning_rate": 1.1723994350521518e-08, |
|
"loss": 0.0541, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 1.9578488372093024, |
|
"grad_norm": 1.5464557013202604, |
|
"learning_rate": 1.0955702190154072e-08, |
|
"loss": 0.0558, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 1.9593023255813953, |
|
"grad_norm": 1.2582934072656768, |
|
"learning_rate": 1.0213416418465294e-08, |
|
"loss": 0.0567, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 1.9607558139534884, |
|
"grad_norm": 1.9089698225345129, |
|
"learning_rate": 9.497140904766722e-09, |
|
"loss": 0.0556, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 1.9622093023255816, |
|
"grad_norm": 1.509899723394668, |
|
"learning_rate": 8.806879382788347e-09, |
|
"loss": 0.0603, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.9636627906976745, |
|
"grad_norm": 1.4511032534590458, |
|
"learning_rate": 8.142635450654746e-09, |
|
"loss": 0.0762, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 1.9651162790697674, |
|
"grad_norm": 1.6080149924866183, |
|
"learning_rate": 7.5044125708712e-09, |
|
"loss": 0.0609, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 1.9665697674418605, |
|
"grad_norm": 1.1922215779000969, |
|
"learning_rate": 6.89221407030094e-09, |
|
"loss": 0.0444, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 1.9680232558139537, |
|
"grad_norm": 1.3256995449929614, |
|
"learning_rate": 6.3060431401512634e-09, |
|
"loss": 0.0474, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 1.9694767441860463, |
|
"grad_norm": 1.1949382933968422, |
|
"learning_rate": 5.7459028359546645e-09, |
|
"loss": 0.0467, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.9709302325581395, |
|
"grad_norm": 1.693812650124421, |
|
"learning_rate": 5.211796077554399e-09, |
|
"loss": 0.0645, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.9723837209302326, |
|
"grad_norm": 1.454630218156985, |
|
"learning_rate": 4.703725649088941e-09, |
|
"loss": 0.0514, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 1.9738372093023255, |
|
"grad_norm": 1.4686563612295154, |
|
"learning_rate": 4.221694198976445e-09, |
|
"loss": 0.0654, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 1.9752906976744184, |
|
"grad_norm": 1.7602700723605103, |
|
"learning_rate": 3.765704239901413e-09, |
|
"loss": 0.0678, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 1.9767441860465116, |
|
"grad_norm": 1.2613024941113347, |
|
"learning_rate": 3.3357581488030476e-09, |
|
"loss": 0.0627, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.9781976744186047, |
|
"grad_norm": 1.7048864018499121, |
|
"learning_rate": 2.9318581668613676e-09, |
|
"loss": 0.0691, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 1.9796511627906976, |
|
"grad_norm": 1.1475541829943066, |
|
"learning_rate": 2.5540063994849982e-09, |
|
"loss": 0.0512, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 1.9811046511627906, |
|
"grad_norm": 1.6476817749953838, |
|
"learning_rate": 2.202204816302289e-09, |
|
"loss": 0.05, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 1.9825581395348837, |
|
"grad_norm": 1.0248136833336827, |
|
"learning_rate": 1.8764552511485457e-09, |
|
"loss": 0.0374, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.9840116279069768, |
|
"grad_norm": 1.248745978654899, |
|
"learning_rate": 1.576759402058814e-09, |
|
"loss": 0.0581, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.9854651162790697, |
|
"grad_norm": 1.4353092543675106, |
|
"learning_rate": 1.3031188312573328e-09, |
|
"loss": 0.0512, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 1.9869186046511627, |
|
"grad_norm": 1.6080768674831933, |
|
"learning_rate": 1.0555349651503178e-09, |
|
"loss": 0.063, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 1.9883720930232558, |
|
"grad_norm": 1.426444153430007, |
|
"learning_rate": 8.340090943176338e-10, |
|
"loss": 0.0526, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.989825581395349, |
|
"grad_norm": 1.3214408477997244, |
|
"learning_rate": 6.385423735078e-10, |
|
"loss": 0.0463, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 1.9912790697674418, |
|
"grad_norm": 1.7983231171481742, |
|
"learning_rate": 4.691358216291075e-10, |
|
"loss": 0.058, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.9927325581395348, |
|
"grad_norm": 1.4390939528927378, |
|
"learning_rate": 3.257903217479541e-10, |
|
"loss": 0.0639, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 1.994186046511628, |
|
"grad_norm": 1.1021839312652135, |
|
"learning_rate": 2.0850662108051755e-10, |
|
"loss": 0.0402, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 1.995639534883721, |
|
"grad_norm": 1.3590522618876162, |
|
"learning_rate": 1.1728533099220063e-10, |
|
"loss": 0.0509, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 1.997093023255814, |
|
"grad_norm": 1.4976019289330444, |
|
"learning_rate": 5.2126926991524774e-11, |
|
"loss": 0.0671, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 1.9985465116279069, |
|
"grad_norm": 1.3973669217539937, |
|
"learning_rate": 1.3031748730685246e-11, |
|
"loss": 0.0565, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8399439880889858, |
|
"learning_rate": 0.0, |
|
"loss": 0.0345, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1376, |
|
"total_flos": 7660524011520.0, |
|
"train_loss": 0.10411996046909629, |
|
"train_runtime": 1065.2694, |
|
"train_samples_per_second": 10.32, |
|
"train_steps_per_second": 1.292 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1376, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7660524011520.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|