diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.5555555555555556, + "epoch": 0.6666666666666666, "eval_steps": 9000, - "global_step": 25000, + "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -175023,6 +175023,35014 @@ "learning_rate": 8.890864636585908e-05, "loss": 1.0186, "step": 25000 + }, + { + "epoch": 0.5555777777777777, + "grad_norm": 1.6018104553222656, + "learning_rate": 8.890420093354079e-05, + "loss": 2.7778, + "step": 25001 + }, + { + "epoch": 0.5556, + "grad_norm": 1.4975850582122803, + "learning_rate": 8.88997555012225e-05, + "loss": 2.7011, + "step": 25002 + }, + { + "epoch": 0.5556222222222222, + "grad_norm": 1.3138421773910522, + "learning_rate": 8.889531006890421e-05, + "loss": 2.056, + "step": 25003 + }, + { + "epoch": 0.5556444444444445, + "grad_norm": 1.6650867462158203, + "learning_rate": 8.889086463658592e-05, + "loss": 2.3087, + "step": 25004 + }, + { + "epoch": 0.5556666666666666, + "grad_norm": 1.5537450313568115, + "learning_rate": 8.888641920426763e-05, + "loss": 2.1625, + "step": 25005 + }, + { + "epoch": 0.5556888888888889, + "grad_norm": 1.5213912725448608, + "learning_rate": 8.888197377194932e-05, + "loss": 2.3027, + "step": 25006 + }, + { + "epoch": 0.5557111111111112, + "grad_norm": 1.4587023258209229, + "learning_rate": 8.887752833963103e-05, + "loss": 1.7927, + "step": 25007 + }, + { + "epoch": 0.5557333333333333, + "grad_norm": 1.5978976488113403, + "learning_rate": 8.887308290731274e-05, + "loss": 1.9206, + "step": 25008 + }, + { + "epoch": 0.5557555555555556, + "grad_norm": 1.3036267757415771, + "learning_rate": 8.886863747499444e-05, + "loss": 1.5741, + "step": 25009 + }, + { + "epoch": 0.5557777777777778, + "grad_norm": 1.5057199001312256, + "learning_rate": 8.886419204267615e-05, + "loss": 1.9934, + "step": 25010 + }, + { + "epoch": 0.5558, + "grad_norm": 1.617224931716919, + "learning_rate": 8.885974661035786e-05, + "loss": 1.5776, + "step": 25011 + }, + { + "epoch": 0.5558222222222222, + "grad_norm": 1.4996505975723267, + "learning_rate": 8.885530117803957e-05, + "loss": 1.7387, + "step": 25012 + }, + { + "epoch": 0.5558444444444445, + "grad_norm": 1.9091581106185913, + "learning_rate": 8.885085574572128e-05, + "loss": 2.257, + "step": 25013 + }, + { + "epoch": 0.5558666666666666, + "grad_norm": 2.1841301918029785, + "learning_rate": 8.884641031340299e-05, + "loss": 1.6132, + "step": 25014 + }, + { + "epoch": 0.5558888888888889, + "grad_norm": 1.050742268562317, + "learning_rate": 8.88419648810847e-05, + "loss": 0.9285, + "step": 25015 + }, + { + "epoch": 0.5559111111111111, + "grad_norm": 1.4610708951950073, + "learning_rate": 8.88375194487664e-05, + "loss": 1.7434, + "step": 25016 + }, + { + "epoch": 0.5559333333333333, + "grad_norm": 1.5033701658248901, + "learning_rate": 8.88330740164481e-05, + "loss": 1.4852, + "step": 25017 + }, + { + "epoch": 0.5559555555555555, + "grad_norm": 1.5321849584579468, + "learning_rate": 8.882862858412981e-05, + "loss": 1.6994, + "step": 25018 + }, + { + "epoch": 0.5559777777777778, + "grad_norm": 1.53279709815979, + "learning_rate": 8.882418315181151e-05, + "loss": 1.9177, + "step": 25019 + }, + { + "epoch": 0.556, + "grad_norm": 1.8359917402267456, + "learning_rate": 8.881973771949322e-05, + "loss": 2.1219, + "step": 25020 + }, + { + "epoch": 0.5560222222222222, + "grad_norm": 2.4665095806121826, + "learning_rate": 8.881529228717493e-05, + "loss": 2.1297, + "step": 25021 + }, + { + "epoch": 0.5560444444444445, + "grad_norm": 1.7346540689468384, + "learning_rate": 8.881084685485664e-05, + "loss": 1.7752, + "step": 25022 + }, + { + "epoch": 0.5560666666666667, + "grad_norm": 1.6376831531524658, + "learning_rate": 8.880640142253835e-05, + "loss": 2.2255, + "step": 25023 + }, + { + "epoch": 0.5560888888888889, + "grad_norm": 2.1054024696350098, + "learning_rate": 8.880195599022006e-05, + "loss": 2.4414, + "step": 25024 + }, + { + "epoch": 0.5561111111111111, + "grad_norm": 1.8348150253295898, + "learning_rate": 8.879751055790177e-05, + "loss": 1.9206, + "step": 25025 + }, + { + "epoch": 0.5561333333333334, + "grad_norm": 1.9463988542556763, + "learning_rate": 8.879306512558346e-05, + "loss": 2.6646, + "step": 25026 + }, + { + "epoch": 0.5561555555555555, + "grad_norm": 1.4555976390838623, + "learning_rate": 8.878861969326517e-05, + "loss": 1.7126, + "step": 25027 + }, + { + "epoch": 0.5561777777777778, + "grad_norm": 1.2904547452926636, + "learning_rate": 8.878417426094688e-05, + "loss": 1.0135, + "step": 25028 + }, + { + "epoch": 0.5562, + "grad_norm": 1.8080211877822876, + "learning_rate": 8.877972882862858e-05, + "loss": 2.229, + "step": 25029 + }, + { + "epoch": 0.5562222222222222, + "grad_norm": 1.6538316011428833, + "learning_rate": 8.877528339631029e-05, + "loss": 2.2293, + "step": 25030 + }, + { + "epoch": 0.5562444444444444, + "grad_norm": 1.7188304662704468, + "learning_rate": 8.877083796399201e-05, + "loss": 1.6404, + "step": 25031 + }, + { + "epoch": 0.5562666666666667, + "grad_norm": 1.7964560985565186, + "learning_rate": 8.876639253167372e-05, + "loss": 1.8025, + "step": 25032 + }, + { + "epoch": 0.5562888888888889, + "grad_norm": 1.527963399887085, + "learning_rate": 8.876194709935542e-05, + "loss": 1.8277, + "step": 25033 + }, + { + "epoch": 0.5563111111111111, + "grad_norm": 1.5908809900283813, + "learning_rate": 8.875750166703713e-05, + "loss": 1.4329, + "step": 25034 + }, + { + "epoch": 0.5563333333333333, + "grad_norm": 1.6306854486465454, + "learning_rate": 8.875305623471884e-05, + "loss": 2.056, + "step": 25035 + }, + { + "epoch": 0.5563555555555556, + "grad_norm": 1.3418653011322021, + "learning_rate": 8.874861080240053e-05, + "loss": 1.1482, + "step": 25036 + }, + { + "epoch": 0.5563777777777777, + "grad_norm": 1.7620158195495605, + "learning_rate": 8.874416537008224e-05, + "loss": 1.7257, + "step": 25037 + }, + { + "epoch": 0.5564, + "grad_norm": 1.6331011056900024, + "learning_rate": 8.873971993776395e-05, + "loss": 1.283, + "step": 25038 + }, + { + "epoch": 0.5564222222222223, + "grad_norm": 1.7324144840240479, + "learning_rate": 8.873527450544565e-05, + "loss": 1.9654, + "step": 25039 + }, + { + "epoch": 0.5564444444444444, + "grad_norm": 1.7590289115905762, + "learning_rate": 8.873082907312737e-05, + "loss": 1.881, + "step": 25040 + }, + { + "epoch": 0.5564666666666667, + "grad_norm": 1.5783190727233887, + "learning_rate": 8.872638364080908e-05, + "loss": 1.9342, + "step": 25041 + }, + { + "epoch": 0.5564888888888889, + "grad_norm": 1.7162874937057495, + "learning_rate": 8.872193820849079e-05, + "loss": 1.9172, + "step": 25042 + }, + { + "epoch": 0.5565111111111111, + "grad_norm": 1.4815313816070557, + "learning_rate": 8.871749277617249e-05, + "loss": 1.4027, + "step": 25043 + }, + { + "epoch": 0.5565333333333333, + "grad_norm": 1.5845388174057007, + "learning_rate": 8.87130473438542e-05, + "loss": 1.6193, + "step": 25044 + }, + { + "epoch": 0.5565555555555556, + "grad_norm": 1.7816119194030762, + "learning_rate": 8.87086019115359e-05, + "loss": 1.8064, + "step": 25045 + }, + { + "epoch": 0.5565777777777777, + "grad_norm": 1.853499174118042, + "learning_rate": 8.87041564792176e-05, + "loss": 1.5769, + "step": 25046 + }, + { + "epoch": 0.5566, + "grad_norm": 1.7572011947631836, + "learning_rate": 8.869971104689931e-05, + "loss": 1.5565, + "step": 25047 + }, + { + "epoch": 0.5566222222222222, + "grad_norm": 1.6061574220657349, + "learning_rate": 8.869526561458102e-05, + "loss": 1.5543, + "step": 25048 + }, + { + "epoch": 0.5566444444444445, + "grad_norm": 2.0534188747406006, + "learning_rate": 8.869082018226273e-05, + "loss": 2.089, + "step": 25049 + }, + { + "epoch": 0.5566666666666666, + "grad_norm": 1.7749592065811157, + "learning_rate": 8.868637474994444e-05, + "loss": 1.5679, + "step": 25050 + }, + { + "epoch": 0.5566888888888889, + "grad_norm": 1.4333471059799194, + "learning_rate": 8.868192931762615e-05, + "loss": 2.177, + "step": 25051 + }, + { + "epoch": 0.5567111111111112, + "grad_norm": 1.3617366552352905, + "learning_rate": 8.867748388530786e-05, + "loss": 2.0119, + "step": 25052 + }, + { + "epoch": 0.5567333333333333, + "grad_norm": 1.37482488155365, + "learning_rate": 8.867303845298955e-05, + "loss": 1.9341, + "step": 25053 + }, + { + "epoch": 0.5567555555555556, + "grad_norm": 1.5708028078079224, + "learning_rate": 8.866859302067126e-05, + "loss": 2.3296, + "step": 25054 + }, + { + "epoch": 0.5567777777777778, + "grad_norm": 1.42710280418396, + "learning_rate": 8.866414758835297e-05, + "loss": 2.0739, + "step": 25055 + }, + { + "epoch": 0.5568, + "grad_norm": 1.6513372659683228, + "learning_rate": 8.865970215603467e-05, + "loss": 2.1563, + "step": 25056 + }, + { + "epoch": 0.5568222222222222, + "grad_norm": 1.4783315658569336, + "learning_rate": 8.865525672371638e-05, + "loss": 1.7002, + "step": 25057 + }, + { + "epoch": 0.5568444444444445, + "grad_norm": 1.4798046350479126, + "learning_rate": 8.865081129139809e-05, + "loss": 2.2017, + "step": 25058 + }, + { + "epoch": 0.5568666666666666, + "grad_norm": 1.5378378629684448, + "learning_rate": 8.86463658590798e-05, + "loss": 2.1798, + "step": 25059 + }, + { + "epoch": 0.5568888888888889, + "grad_norm": 1.6290702819824219, + "learning_rate": 8.864192042676151e-05, + "loss": 1.8907, + "step": 25060 + }, + { + "epoch": 0.5569111111111111, + "grad_norm": 1.5109094381332397, + "learning_rate": 8.863747499444322e-05, + "loss": 2.0781, + "step": 25061 + }, + { + "epoch": 0.5569333333333333, + "grad_norm": 1.7988399267196655, + "learning_rate": 8.863302956212493e-05, + "loss": 2.292, + "step": 25062 + }, + { + "epoch": 0.5569555555555555, + "grad_norm": 1.8292760848999023, + "learning_rate": 8.862858412980662e-05, + "loss": 2.1439, + "step": 25063 + }, + { + "epoch": 0.5569777777777778, + "grad_norm": 1.753173828125, + "learning_rate": 8.862413869748833e-05, + "loss": 2.0589, + "step": 25064 + }, + { + "epoch": 0.557, + "grad_norm": 1.4849519729614258, + "learning_rate": 8.861969326517004e-05, + "loss": 1.8589, + "step": 25065 + }, + { + "epoch": 0.5570222222222222, + "grad_norm": 1.563999056816101, + "learning_rate": 8.861524783285174e-05, + "loss": 1.8839, + "step": 25066 + }, + { + "epoch": 0.5570444444444445, + "grad_norm": 2.108994245529175, + "learning_rate": 8.861080240053345e-05, + "loss": 1.4836, + "step": 25067 + }, + { + "epoch": 0.5570666666666667, + "grad_norm": 1.462251901626587, + "learning_rate": 8.860635696821517e-05, + "loss": 2.081, + "step": 25068 + }, + { + "epoch": 0.5570888888888889, + "grad_norm": 1.4455344676971436, + "learning_rate": 8.860191153589688e-05, + "loss": 1.5538, + "step": 25069 + }, + { + "epoch": 0.5571111111111111, + "grad_norm": 1.5053396224975586, + "learning_rate": 8.859746610357858e-05, + "loss": 2.1755, + "step": 25070 + }, + { + "epoch": 0.5571333333333334, + "grad_norm": 2.1108238697052, + "learning_rate": 8.859302067126029e-05, + "loss": 2.1607, + "step": 25071 + }, + { + "epoch": 0.5571555555555555, + "grad_norm": 1.5452802181243896, + "learning_rate": 8.8588575238942e-05, + "loss": 2.1198, + "step": 25072 + }, + { + "epoch": 0.5571777777777778, + "grad_norm": 1.305687427520752, + "learning_rate": 8.858412980662369e-05, + "loss": 1.3974, + "step": 25073 + }, + { + "epoch": 0.5572, + "grad_norm": 1.6314971446990967, + "learning_rate": 8.85796843743054e-05, + "loss": 1.9672, + "step": 25074 + }, + { + "epoch": 0.5572222222222222, + "grad_norm": 1.672951340675354, + "learning_rate": 8.857523894198711e-05, + "loss": 2.2149, + "step": 25075 + }, + { + "epoch": 0.5572444444444444, + "grad_norm": 1.5819165706634521, + "learning_rate": 8.857079350966881e-05, + "loss": 1.5749, + "step": 25076 + }, + { + "epoch": 0.5572666666666667, + "grad_norm": 1.9342191219329834, + "learning_rate": 8.856634807735053e-05, + "loss": 2.265, + "step": 25077 + }, + { + "epoch": 0.5572888888888888, + "grad_norm": 1.6798408031463623, + "learning_rate": 8.856190264503224e-05, + "loss": 2.0166, + "step": 25078 + }, + { + "epoch": 0.5573111111111111, + "grad_norm": 1.69929838180542, + "learning_rate": 8.855745721271395e-05, + "loss": 2.0431, + "step": 25079 + }, + { + "epoch": 0.5573333333333333, + "grad_norm": 1.7669845819473267, + "learning_rate": 8.855301178039565e-05, + "loss": 2.2433, + "step": 25080 + }, + { + "epoch": 0.5573555555555556, + "grad_norm": 1.5253289937973022, + "learning_rate": 8.854856634807736e-05, + "loss": 1.6538, + "step": 25081 + }, + { + "epoch": 0.5573777777777777, + "grad_norm": 1.7386434078216553, + "learning_rate": 8.854412091575907e-05, + "loss": 1.9486, + "step": 25082 + }, + { + "epoch": 0.5574, + "grad_norm": 1.0491255521774292, + "learning_rate": 8.853967548344076e-05, + "loss": 0.7682, + "step": 25083 + }, + { + "epoch": 0.5574222222222223, + "grad_norm": 1.6801350116729736, + "learning_rate": 8.853523005112247e-05, + "loss": 2.12, + "step": 25084 + }, + { + "epoch": 0.5574444444444444, + "grad_norm": 1.7507060766220093, + "learning_rate": 8.853078461880418e-05, + "loss": 2.1324, + "step": 25085 + }, + { + "epoch": 0.5574666666666667, + "grad_norm": 1.7973806858062744, + "learning_rate": 8.852633918648589e-05, + "loss": 2.0655, + "step": 25086 + }, + { + "epoch": 0.5574888888888889, + "grad_norm": 1.6366549730300903, + "learning_rate": 8.85218937541676e-05, + "loss": 2.0583, + "step": 25087 + }, + { + "epoch": 0.5575111111111111, + "grad_norm": 1.4576225280761719, + "learning_rate": 8.851744832184931e-05, + "loss": 1.3184, + "step": 25088 + }, + { + "epoch": 0.5575333333333333, + "grad_norm": 2.1194889545440674, + "learning_rate": 8.851300288953102e-05, + "loss": 1.7683, + "step": 25089 + }, + { + "epoch": 0.5575555555555556, + "grad_norm": 1.4802414178848267, + "learning_rate": 8.850855745721272e-05, + "loss": 1.4868, + "step": 25090 + }, + { + "epoch": 0.5575777777777777, + "grad_norm": 1.659393310546875, + "learning_rate": 8.850411202489443e-05, + "loss": 1.8494, + "step": 25091 + }, + { + "epoch": 0.5576, + "grad_norm": 1.835160493850708, + "learning_rate": 8.849966659257613e-05, + "loss": 1.7385, + "step": 25092 + }, + { + "epoch": 0.5576222222222222, + "grad_norm": 1.8487638235092163, + "learning_rate": 8.849522116025783e-05, + "loss": 1.9471, + "step": 25093 + }, + { + "epoch": 0.5576444444444445, + "grad_norm": 1.6787410974502563, + "learning_rate": 8.849077572793954e-05, + "loss": 1.6944, + "step": 25094 + }, + { + "epoch": 0.5576666666666666, + "grad_norm": 1.7631950378417969, + "learning_rate": 8.848633029562125e-05, + "loss": 1.5408, + "step": 25095 + }, + { + "epoch": 0.5576888888888889, + "grad_norm": 1.939127802848816, + "learning_rate": 8.848188486330296e-05, + "loss": 1.6063, + "step": 25096 + }, + { + "epoch": 0.5577111111111112, + "grad_norm": 1.4154397249221802, + "learning_rate": 8.847743943098467e-05, + "loss": 1.2231, + "step": 25097 + }, + { + "epoch": 0.5577333333333333, + "grad_norm": 1.8410276174545288, + "learning_rate": 8.847299399866638e-05, + "loss": 1.8084, + "step": 25098 + }, + { + "epoch": 0.5577555555555556, + "grad_norm": 1.5185855627059937, + "learning_rate": 8.846854856634809e-05, + "loss": 0.9481, + "step": 25099 + }, + { + "epoch": 0.5577777777777778, + "grad_norm": 1.788804292678833, + "learning_rate": 8.846410313402978e-05, + "loss": 1.6403, + "step": 25100 + }, + { + "epoch": 0.5578, + "grad_norm": 1.5832068920135498, + "learning_rate": 8.84596577017115e-05, + "loss": 1.812, + "step": 25101 + }, + { + "epoch": 0.5578222222222222, + "grad_norm": 1.5860427618026733, + "learning_rate": 8.84552122693932e-05, + "loss": 2.5241, + "step": 25102 + }, + { + "epoch": 0.5578444444444445, + "grad_norm": 1.4625405073165894, + "learning_rate": 8.84507668370749e-05, + "loss": 2.3494, + "step": 25103 + }, + { + "epoch": 0.5578666666666666, + "grad_norm": 1.3589277267456055, + "learning_rate": 8.844632140475661e-05, + "loss": 2.3455, + "step": 25104 + }, + { + "epoch": 0.5578888888888889, + "grad_norm": 1.4070804119110107, + "learning_rate": 8.844187597243833e-05, + "loss": 2.3014, + "step": 25105 + }, + { + "epoch": 0.5579111111111111, + "grad_norm": 1.4124137163162231, + "learning_rate": 8.843743054012003e-05, + "loss": 1.0, + "step": 25106 + }, + { + "epoch": 0.5579333333333333, + "grad_norm": 1.5017024278640747, + "learning_rate": 8.843298510780174e-05, + "loss": 1.8322, + "step": 25107 + }, + { + "epoch": 0.5579555555555555, + "grad_norm": 1.6320382356643677, + "learning_rate": 8.842853967548345e-05, + "loss": 2.1433, + "step": 25108 + }, + { + "epoch": 0.5579777777777778, + "grad_norm": 1.7392261028289795, + "learning_rate": 8.842409424316516e-05, + "loss": 2.0716, + "step": 25109 + }, + { + "epoch": 0.558, + "grad_norm": 1.5695531368255615, + "learning_rate": 8.841964881084685e-05, + "loss": 2.1222, + "step": 25110 + }, + { + "epoch": 0.5580222222222222, + "grad_norm": 1.5330922603607178, + "learning_rate": 8.841520337852856e-05, + "loss": 2.0254, + "step": 25111 + }, + { + "epoch": 0.5580444444444445, + "grad_norm": 1.40812087059021, + "learning_rate": 8.841075794621027e-05, + "loss": 1.6054, + "step": 25112 + }, + { + "epoch": 0.5580666666666667, + "grad_norm": 1.6731535196304321, + "learning_rate": 8.840631251389197e-05, + "loss": 1.9831, + "step": 25113 + }, + { + "epoch": 0.5580888888888889, + "grad_norm": 1.7269052267074585, + "learning_rate": 8.840186708157369e-05, + "loss": 2.197, + "step": 25114 + }, + { + "epoch": 0.5581111111111111, + "grad_norm": 1.8623415231704712, + "learning_rate": 8.83974216492554e-05, + "loss": 1.6876, + "step": 25115 + }, + { + "epoch": 0.5581333333333334, + "grad_norm": 1.8906834125518799, + "learning_rate": 8.839297621693711e-05, + "loss": 2.5045, + "step": 25116 + }, + { + "epoch": 0.5581555555555555, + "grad_norm": 1.070365071296692, + "learning_rate": 8.838853078461881e-05, + "loss": 1.1872, + "step": 25117 + }, + { + "epoch": 0.5581777777777778, + "grad_norm": 0.3382456600666046, + "learning_rate": 8.838408535230052e-05, + "loss": 0.0277, + "step": 25118 + }, + { + "epoch": 0.5582, + "grad_norm": 1.8913952112197876, + "learning_rate": 8.837963991998223e-05, + "loss": 2.2064, + "step": 25119 + }, + { + "epoch": 0.5582222222222222, + "grad_norm": 1.9757825136184692, + "learning_rate": 8.837519448766392e-05, + "loss": 2.3808, + "step": 25120 + }, + { + "epoch": 0.5582444444444444, + "grad_norm": 1.481068730354309, + "learning_rate": 8.837074905534563e-05, + "loss": 1.8185, + "step": 25121 + }, + { + "epoch": 0.5582666666666667, + "grad_norm": 1.7362627983093262, + "learning_rate": 8.836630362302734e-05, + "loss": 2.045, + "step": 25122 + }, + { + "epoch": 0.5582888888888888, + "grad_norm": 1.7237762212753296, + "learning_rate": 8.836185819070905e-05, + "loss": 1.9348, + "step": 25123 + }, + { + "epoch": 0.5583111111111111, + "grad_norm": 1.5723251104354858, + "learning_rate": 8.835741275839076e-05, + "loss": 1.824, + "step": 25124 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 2.0629472732543945, + "learning_rate": 8.835296732607247e-05, + "loss": 2.4707, + "step": 25125 + }, + { + "epoch": 0.5583555555555556, + "grad_norm": 1.853628396987915, + "learning_rate": 8.834852189375418e-05, + "loss": 1.6413, + "step": 25126 + }, + { + "epoch": 0.5583777777777778, + "grad_norm": 1.5292593240737915, + "learning_rate": 8.834407646143588e-05, + "loss": 1.7817, + "step": 25127 + }, + { + "epoch": 0.5584, + "grad_norm": 1.5123380422592163, + "learning_rate": 8.833963102911759e-05, + "loss": 1.7791, + "step": 25128 + }, + { + "epoch": 0.5584222222222223, + "grad_norm": 2.058326244354248, + "learning_rate": 8.83351855967993e-05, + "loss": 2.4383, + "step": 25129 + }, + { + "epoch": 0.5584444444444444, + "grad_norm": 1.7544281482696533, + "learning_rate": 8.833074016448099e-05, + "loss": 1.9952, + "step": 25130 + }, + { + "epoch": 0.5584666666666667, + "grad_norm": 1.7322680950164795, + "learning_rate": 8.83262947321627e-05, + "loss": 1.9922, + "step": 25131 + }, + { + "epoch": 0.5584888888888889, + "grad_norm": 1.798189640045166, + "learning_rate": 8.832184929984441e-05, + "loss": 2.1415, + "step": 25132 + }, + { + "epoch": 0.5585111111111111, + "grad_norm": 1.7430840730667114, + "learning_rate": 8.831740386752612e-05, + "loss": 1.8528, + "step": 25133 + }, + { + "epoch": 0.5585333333333333, + "grad_norm": 1.6595499515533447, + "learning_rate": 8.831295843520783e-05, + "loss": 1.6088, + "step": 25134 + }, + { + "epoch": 0.5585555555555556, + "grad_norm": 1.873802900314331, + "learning_rate": 8.830851300288954e-05, + "loss": 1.8963, + "step": 25135 + }, + { + "epoch": 0.5585777777777777, + "grad_norm": 1.6037418842315674, + "learning_rate": 8.830406757057125e-05, + "loss": 1.9936, + "step": 25136 + }, + { + "epoch": 0.5586, + "grad_norm": 1.7320588827133179, + "learning_rate": 8.829962213825295e-05, + "loss": 2.0704, + "step": 25137 + }, + { + "epoch": 0.5586222222222222, + "grad_norm": 1.5538798570632935, + "learning_rate": 8.829517670593466e-05, + "loss": 1.9575, + "step": 25138 + }, + { + "epoch": 0.5586444444444445, + "grad_norm": 1.7341487407684326, + "learning_rate": 8.829073127361636e-05, + "loss": 2.1159, + "step": 25139 + }, + { + "epoch": 0.5586666666666666, + "grad_norm": 1.3691928386688232, + "learning_rate": 8.828628584129806e-05, + "loss": 1.4764, + "step": 25140 + }, + { + "epoch": 0.5586888888888889, + "grad_norm": 1.517678141593933, + "learning_rate": 8.828184040897977e-05, + "loss": 1.6072, + "step": 25141 + }, + { + "epoch": 0.5587111111111112, + "grad_norm": 1.7343122959136963, + "learning_rate": 8.82773949766615e-05, + "loss": 2.3817, + "step": 25142 + }, + { + "epoch": 0.5587333333333333, + "grad_norm": 1.5004314184188843, + "learning_rate": 8.827294954434319e-05, + "loss": 1.9208, + "step": 25143 + }, + { + "epoch": 0.5587555555555556, + "grad_norm": 2.1103506088256836, + "learning_rate": 8.82685041120249e-05, + "loss": 2.2693, + "step": 25144 + }, + { + "epoch": 0.5587777777777778, + "grad_norm": 1.7467249631881714, + "learning_rate": 8.826405867970661e-05, + "loss": 2.183, + "step": 25145 + }, + { + "epoch": 0.5588, + "grad_norm": 1.4826949834823608, + "learning_rate": 8.825961324738832e-05, + "loss": 1.545, + "step": 25146 + }, + { + "epoch": 0.5588222222222222, + "grad_norm": 1.5557191371917725, + "learning_rate": 8.825516781507001e-05, + "loss": 1.5978, + "step": 25147 + }, + { + "epoch": 0.5588444444444445, + "grad_norm": 1.6098825931549072, + "learning_rate": 8.825072238275172e-05, + "loss": 1.5343, + "step": 25148 + }, + { + "epoch": 0.5588666666666666, + "grad_norm": 1.8505737781524658, + "learning_rate": 8.824627695043343e-05, + "loss": 1.8377, + "step": 25149 + }, + { + "epoch": 0.5588888888888889, + "grad_norm": 1.4390512704849243, + "learning_rate": 8.824183151811513e-05, + "loss": 1.3804, + "step": 25150 + }, + { + "epoch": 0.5589111111111111, + "grad_norm": 1.4105268716812134, + "learning_rate": 8.823738608579685e-05, + "loss": 2.5203, + "step": 25151 + }, + { + "epoch": 0.5589333333333333, + "grad_norm": 1.429821491241455, + "learning_rate": 8.823294065347856e-05, + "loss": 2.5985, + "step": 25152 + }, + { + "epoch": 0.5589555555555555, + "grad_norm": 1.441236138343811, + "learning_rate": 8.822849522116026e-05, + "loss": 0.6427, + "step": 25153 + }, + { + "epoch": 0.5589777777777778, + "grad_norm": 1.460187554359436, + "learning_rate": 8.822404978884197e-05, + "loss": 1.9902, + "step": 25154 + }, + { + "epoch": 0.559, + "grad_norm": 1.2783151865005493, + "learning_rate": 8.821960435652368e-05, + "loss": 1.0684, + "step": 25155 + }, + { + "epoch": 0.5590222222222222, + "grad_norm": 1.341304063796997, + "learning_rate": 8.821515892420539e-05, + "loss": 1.911, + "step": 25156 + }, + { + "epoch": 0.5590444444444445, + "grad_norm": 1.565393090248108, + "learning_rate": 8.821071349188708e-05, + "loss": 2.0663, + "step": 25157 + }, + { + "epoch": 0.5590666666666667, + "grad_norm": 2.0223257541656494, + "learning_rate": 8.82062680595688e-05, + "loss": 2.627, + "step": 25158 + }, + { + "epoch": 0.5590888888888889, + "grad_norm": 1.4590426683425903, + "learning_rate": 8.82018226272505e-05, + "loss": 1.6162, + "step": 25159 + }, + { + "epoch": 0.5591111111111111, + "grad_norm": 1.4461957216262817, + "learning_rate": 8.819737719493221e-05, + "loss": 1.7326, + "step": 25160 + }, + { + "epoch": 0.5591333333333334, + "grad_norm": 1.526092529296875, + "learning_rate": 8.819293176261392e-05, + "loss": 2.0117, + "step": 25161 + }, + { + "epoch": 0.5591555555555555, + "grad_norm": 1.4501690864562988, + "learning_rate": 8.818848633029563e-05, + "loss": 1.9252, + "step": 25162 + }, + { + "epoch": 0.5591777777777778, + "grad_norm": 1.68050217628479, + "learning_rate": 8.818404089797734e-05, + "loss": 2.2222, + "step": 25163 + }, + { + "epoch": 0.5592, + "grad_norm": 1.6856437921524048, + "learning_rate": 8.817959546565904e-05, + "loss": 1.499, + "step": 25164 + }, + { + "epoch": 0.5592222222222222, + "grad_norm": 1.6665334701538086, + "learning_rate": 8.817515003334075e-05, + "loss": 2.2639, + "step": 25165 + }, + { + "epoch": 0.5592444444444444, + "grad_norm": 1.4440217018127441, + "learning_rate": 8.817070460102246e-05, + "loss": 1.7205, + "step": 25166 + }, + { + "epoch": 0.5592666666666667, + "grad_norm": 1.6238607168197632, + "learning_rate": 8.816625916870415e-05, + "loss": 2.0203, + "step": 25167 + }, + { + "epoch": 0.5592888888888888, + "grad_norm": 1.5175899267196655, + "learning_rate": 8.816181373638586e-05, + "loss": 1.76, + "step": 25168 + }, + { + "epoch": 0.5593111111111111, + "grad_norm": 1.5204845666885376, + "learning_rate": 8.815736830406757e-05, + "loss": 2.231, + "step": 25169 + }, + { + "epoch": 0.5593333333333333, + "grad_norm": 1.8707369565963745, + "learning_rate": 8.815292287174928e-05, + "loss": 2.2178, + "step": 25170 + }, + { + "epoch": 0.5593555555555556, + "grad_norm": 1.6467576026916504, + "learning_rate": 8.814847743943099e-05, + "loss": 2.2364, + "step": 25171 + }, + { + "epoch": 0.5593777777777778, + "grad_norm": 1.8275185823440552, + "learning_rate": 8.81440320071127e-05, + "loss": 2.1331, + "step": 25172 + }, + { + "epoch": 0.5594, + "grad_norm": 1.559929370880127, + "learning_rate": 8.813958657479441e-05, + "loss": 2.1326, + "step": 25173 + }, + { + "epoch": 0.5594222222222223, + "grad_norm": 2.1848270893096924, + "learning_rate": 8.81351411424761e-05, + "loss": 1.8675, + "step": 25174 + }, + { + "epoch": 0.5594444444444444, + "grad_norm": 1.3665002584457397, + "learning_rate": 8.813069571015782e-05, + "loss": 1.4474, + "step": 25175 + }, + { + "epoch": 0.5594666666666667, + "grad_norm": 1.7274236679077148, + "learning_rate": 8.812625027783953e-05, + "loss": 1.9424, + "step": 25176 + }, + { + "epoch": 0.5594888888888889, + "grad_norm": 1.6203581094741821, + "learning_rate": 8.812180484552122e-05, + "loss": 1.6601, + "step": 25177 + }, + { + "epoch": 0.5595111111111111, + "grad_norm": 1.8690255880355835, + "learning_rate": 8.811735941320293e-05, + "loss": 1.8835, + "step": 25178 + }, + { + "epoch": 0.5595333333333333, + "grad_norm": 1.4840466976165771, + "learning_rate": 8.811291398088465e-05, + "loss": 1.9987, + "step": 25179 + }, + { + "epoch": 0.5595555555555556, + "grad_norm": 1.9614216089248657, + "learning_rate": 8.810846854856635e-05, + "loss": 1.9445, + "step": 25180 + }, + { + "epoch": 0.5595777777777777, + "grad_norm": 1.560650110244751, + "learning_rate": 8.810402311624806e-05, + "loss": 1.6483, + "step": 25181 + }, + { + "epoch": 0.5596, + "grad_norm": 1.543728232383728, + "learning_rate": 8.809957768392977e-05, + "loss": 2.0516, + "step": 25182 + }, + { + "epoch": 0.5596222222222222, + "grad_norm": 2.026848316192627, + "learning_rate": 8.809513225161148e-05, + "loss": 2.276, + "step": 25183 + }, + { + "epoch": 0.5596444444444445, + "grad_norm": 2.0482680797576904, + "learning_rate": 8.809068681929318e-05, + "loss": 2.0603, + "step": 25184 + }, + { + "epoch": 0.5596666666666666, + "grad_norm": 1.650883674621582, + "learning_rate": 8.808624138697489e-05, + "loss": 1.6454, + "step": 25185 + }, + { + "epoch": 0.5596888888888889, + "grad_norm": 1.5135639905929565, + "learning_rate": 8.80817959546566e-05, + "loss": 1.7825, + "step": 25186 + }, + { + "epoch": 0.5597111111111112, + "grad_norm": 1.509371042251587, + "learning_rate": 8.807735052233829e-05, + "loss": 1.7063, + "step": 25187 + }, + { + "epoch": 0.5597333333333333, + "grad_norm": 1.7396836280822754, + "learning_rate": 8.807290509002001e-05, + "loss": 2.1483, + "step": 25188 + }, + { + "epoch": 0.5597555555555556, + "grad_norm": 1.742016315460205, + "learning_rate": 8.806845965770172e-05, + "loss": 2.0434, + "step": 25189 + }, + { + "epoch": 0.5597777777777778, + "grad_norm": 1.679200291633606, + "learning_rate": 8.806401422538342e-05, + "loss": 1.5045, + "step": 25190 + }, + { + "epoch": 0.5598, + "grad_norm": 1.8576488494873047, + "learning_rate": 8.805956879306513e-05, + "loss": 2.1748, + "step": 25191 + }, + { + "epoch": 0.5598222222222222, + "grad_norm": 1.8510833978652954, + "learning_rate": 8.805512336074684e-05, + "loss": 1.5863, + "step": 25192 + }, + { + "epoch": 0.5598444444444445, + "grad_norm": 2.1455936431884766, + "learning_rate": 8.805067792842855e-05, + "loss": 2.0357, + "step": 25193 + }, + { + "epoch": 0.5598666666666666, + "grad_norm": 1.841348648071289, + "learning_rate": 8.804623249611024e-05, + "loss": 1.9626, + "step": 25194 + }, + { + "epoch": 0.5598888888888889, + "grad_norm": 1.55734384059906, + "learning_rate": 8.804178706379195e-05, + "loss": 1.6384, + "step": 25195 + }, + { + "epoch": 0.5599111111111111, + "grad_norm": 2.034369945526123, + "learning_rate": 8.803734163147366e-05, + "loss": 1.8983, + "step": 25196 + }, + { + "epoch": 0.5599333333333333, + "grad_norm": 1.7569447755813599, + "learning_rate": 8.803289619915537e-05, + "loss": 1.879, + "step": 25197 + }, + { + "epoch": 0.5599555555555555, + "grad_norm": 2.063053607940674, + "learning_rate": 8.802845076683708e-05, + "loss": 1.8524, + "step": 25198 + }, + { + "epoch": 0.5599777777777778, + "grad_norm": 1.9506410360336304, + "learning_rate": 8.802400533451879e-05, + "loss": 2.1881, + "step": 25199 + }, + { + "epoch": 0.56, + "grad_norm": 1.5537521839141846, + "learning_rate": 8.801955990220049e-05, + "loss": 1.5269, + "step": 25200 + }, + { + "epoch": 0.5600222222222222, + "grad_norm": 1.7377948760986328, + "learning_rate": 8.80151144698822e-05, + "loss": 2.5955, + "step": 25201 + }, + { + "epoch": 0.5600444444444445, + "grad_norm": 1.6233863830566406, + "learning_rate": 8.801066903756391e-05, + "loss": 2.6002, + "step": 25202 + }, + { + "epoch": 0.5600666666666667, + "grad_norm": 1.5851396322250366, + "learning_rate": 8.800622360524562e-05, + "loss": 2.5364, + "step": 25203 + }, + { + "epoch": 0.5600888888888889, + "grad_norm": 1.334736943244934, + "learning_rate": 8.800177817292731e-05, + "loss": 2.1957, + "step": 25204 + }, + { + "epoch": 0.5601111111111111, + "grad_norm": 1.3811289072036743, + "learning_rate": 8.799733274060902e-05, + "loss": 2.08, + "step": 25205 + }, + { + "epoch": 0.5601333333333334, + "grad_norm": 1.5644488334655762, + "learning_rate": 8.799288730829073e-05, + "loss": 2.6846, + "step": 25206 + }, + { + "epoch": 0.5601555555555555, + "grad_norm": 1.5639359951019287, + "learning_rate": 8.798844187597244e-05, + "loss": 2.0832, + "step": 25207 + }, + { + "epoch": 0.5601777777777778, + "grad_norm": 1.5768216848373413, + "learning_rate": 8.798399644365415e-05, + "loss": 2.1764, + "step": 25208 + }, + { + "epoch": 0.5602, + "grad_norm": 1.8076982498168945, + "learning_rate": 8.797955101133586e-05, + "loss": 2.2899, + "step": 25209 + }, + { + "epoch": 0.5602222222222222, + "grad_norm": 1.1235891580581665, + "learning_rate": 8.797510557901756e-05, + "loss": 1.1146, + "step": 25210 + }, + { + "epoch": 0.5602444444444444, + "grad_norm": 1.7415649890899658, + "learning_rate": 8.797066014669927e-05, + "loss": 2.2942, + "step": 25211 + }, + { + "epoch": 0.5602666666666667, + "grad_norm": 1.6006947755813599, + "learning_rate": 8.796621471438098e-05, + "loss": 2.2651, + "step": 25212 + }, + { + "epoch": 0.5602888888888888, + "grad_norm": 1.5128785371780396, + "learning_rate": 8.796176928206269e-05, + "loss": 1.7946, + "step": 25213 + }, + { + "epoch": 0.5603111111111111, + "grad_norm": 1.5808520317077637, + "learning_rate": 8.795732384974438e-05, + "loss": 2.2544, + "step": 25214 + }, + { + "epoch": 0.5603333333333333, + "grad_norm": 1.525814175605774, + "learning_rate": 8.795287841742609e-05, + "loss": 1.4875, + "step": 25215 + }, + { + "epoch": 0.5603555555555556, + "grad_norm": 1.5856590270996094, + "learning_rate": 8.794843298510782e-05, + "loss": 1.7909, + "step": 25216 + }, + { + "epoch": 0.5603777777777778, + "grad_norm": 1.6325963735580444, + "learning_rate": 8.794398755278951e-05, + "loss": 2.1416, + "step": 25217 + }, + { + "epoch": 0.5604, + "grad_norm": 2.059194564819336, + "learning_rate": 8.793954212047122e-05, + "loss": 1.8765, + "step": 25218 + }, + { + "epoch": 0.5604222222222223, + "grad_norm": 1.6079334020614624, + "learning_rate": 8.793509668815293e-05, + "loss": 2.1008, + "step": 25219 + }, + { + "epoch": 0.5604444444444444, + "grad_norm": 1.5019943714141846, + "learning_rate": 8.793065125583464e-05, + "loss": 1.961, + "step": 25220 + }, + { + "epoch": 0.5604666666666667, + "grad_norm": 1.7217684984207153, + "learning_rate": 8.792620582351634e-05, + "loss": 2.2064, + "step": 25221 + }, + { + "epoch": 0.5604888888888889, + "grad_norm": 1.7666687965393066, + "learning_rate": 8.792176039119805e-05, + "loss": 1.9191, + "step": 25222 + }, + { + "epoch": 0.5605111111111111, + "grad_norm": 1.4344292879104614, + "learning_rate": 8.791731495887976e-05, + "loss": 1.1761, + "step": 25223 + }, + { + "epoch": 0.5605333333333333, + "grad_norm": 2.339412212371826, + "learning_rate": 8.791286952656145e-05, + "loss": 1.822, + "step": 25224 + }, + { + "epoch": 0.5605555555555556, + "grad_norm": 1.6369621753692627, + "learning_rate": 8.790842409424318e-05, + "loss": 1.9765, + "step": 25225 + }, + { + "epoch": 0.5605777777777777, + "grad_norm": 1.9765450954437256, + "learning_rate": 8.790397866192488e-05, + "loss": 2.1667, + "step": 25226 + }, + { + "epoch": 0.5606, + "grad_norm": 1.5706331729888916, + "learning_rate": 8.789953322960658e-05, + "loss": 1.859, + "step": 25227 + }, + { + "epoch": 0.5606222222222222, + "grad_norm": 1.4491208791732788, + "learning_rate": 8.789508779728829e-05, + "loss": 1.7216, + "step": 25228 + }, + { + "epoch": 0.5606444444444444, + "grad_norm": 1.8563940525054932, + "learning_rate": 8.789064236497e-05, + "loss": 2.1766, + "step": 25229 + }, + { + "epoch": 0.5606666666666666, + "grad_norm": 1.293811559677124, + "learning_rate": 8.788619693265171e-05, + "loss": 0.8898, + "step": 25230 + }, + { + "epoch": 0.5606888888888889, + "grad_norm": 1.5508888959884644, + "learning_rate": 8.78817515003334e-05, + "loss": 1.8485, + "step": 25231 + }, + { + "epoch": 0.5607111111111112, + "grad_norm": 1.7579081058502197, + "learning_rate": 8.787730606801512e-05, + "loss": 2.1352, + "step": 25232 + }, + { + "epoch": 0.5607333333333333, + "grad_norm": 1.6988152265548706, + "learning_rate": 8.787286063569683e-05, + "loss": 1.8743, + "step": 25233 + }, + { + "epoch": 0.5607555555555556, + "grad_norm": 1.710771918296814, + "learning_rate": 8.786841520337853e-05, + "loss": 1.9996, + "step": 25234 + }, + { + "epoch": 0.5607777777777778, + "grad_norm": 1.6037424802780151, + "learning_rate": 8.786396977106024e-05, + "loss": 1.7988, + "step": 25235 + }, + { + "epoch": 0.5608, + "grad_norm": 1.7195894718170166, + "learning_rate": 8.785952433874195e-05, + "loss": 1.9299, + "step": 25236 + }, + { + "epoch": 0.5608222222222222, + "grad_norm": 1.5542490482330322, + "learning_rate": 8.785507890642365e-05, + "loss": 1.4587, + "step": 25237 + }, + { + "epoch": 0.5608444444444445, + "grad_norm": 1.9157278537750244, + "learning_rate": 8.785063347410536e-05, + "loss": 2.0123, + "step": 25238 + }, + { + "epoch": 0.5608666666666666, + "grad_norm": 1.5922707319259644, + "learning_rate": 8.784618804178707e-05, + "loss": 1.7134, + "step": 25239 + }, + { + "epoch": 0.5608888888888889, + "grad_norm": 1.407324194908142, + "learning_rate": 8.784174260946878e-05, + "loss": 1.3508, + "step": 25240 + }, + { + "epoch": 0.5609111111111111, + "grad_norm": 1.816657543182373, + "learning_rate": 8.783729717715047e-05, + "loss": 2.2265, + "step": 25241 + }, + { + "epoch": 0.5609333333333333, + "grad_norm": 1.7667323350906372, + "learning_rate": 8.783285174483218e-05, + "loss": 1.5843, + "step": 25242 + }, + { + "epoch": 0.5609555555555555, + "grad_norm": 1.7764583826065063, + "learning_rate": 8.78284063125139e-05, + "loss": 1.7084, + "step": 25243 + }, + { + "epoch": 0.5609777777777778, + "grad_norm": 1.8567525148391724, + "learning_rate": 8.78239608801956e-05, + "loss": 1.9548, + "step": 25244 + }, + { + "epoch": 0.561, + "grad_norm": 1.7268896102905273, + "learning_rate": 8.781951544787731e-05, + "loss": 2.1631, + "step": 25245 + }, + { + "epoch": 0.5610222222222222, + "grad_norm": 1.588640570640564, + "learning_rate": 8.781507001555902e-05, + "loss": 1.6146, + "step": 25246 + }, + { + "epoch": 0.5610444444444445, + "grad_norm": 2.132821798324585, + "learning_rate": 8.781062458324072e-05, + "loss": 1.9696, + "step": 25247 + }, + { + "epoch": 0.5610666666666667, + "grad_norm": 1.6377253532409668, + "learning_rate": 8.780617915092243e-05, + "loss": 1.4368, + "step": 25248 + }, + { + "epoch": 0.5610888888888889, + "grad_norm": 2.060591697692871, + "learning_rate": 8.780173371860414e-05, + "loss": 1.7435, + "step": 25249 + }, + { + "epoch": 0.5611111111111111, + "grad_norm": 1.711743712425232, + "learning_rate": 8.779728828628585e-05, + "loss": 1.2941, + "step": 25250 + }, + { + "epoch": 0.5611333333333334, + "grad_norm": 1.6078704595565796, + "learning_rate": 8.779284285396754e-05, + "loss": 2.4519, + "step": 25251 + }, + { + "epoch": 0.5611555555555555, + "grad_norm": 1.683768391609192, + "learning_rate": 8.778839742164925e-05, + "loss": 2.858, + "step": 25252 + }, + { + "epoch": 0.5611777777777778, + "grad_norm": 1.5357310771942139, + "learning_rate": 8.778395198933098e-05, + "loss": 2.8118, + "step": 25253 + }, + { + "epoch": 0.5612, + "grad_norm": 1.3293133974075317, + "learning_rate": 8.777950655701267e-05, + "loss": 2.5063, + "step": 25254 + }, + { + "epoch": 0.5612222222222222, + "grad_norm": 1.4129914045333862, + "learning_rate": 8.777506112469438e-05, + "loss": 2.2103, + "step": 25255 + }, + { + "epoch": 0.5612444444444444, + "grad_norm": 1.5258193016052246, + "learning_rate": 8.777061569237609e-05, + "loss": 1.8931, + "step": 25256 + }, + { + "epoch": 0.5612666666666667, + "grad_norm": 1.6168725490570068, + "learning_rate": 8.776617026005779e-05, + "loss": 1.9514, + "step": 25257 + }, + { + "epoch": 0.5612888888888888, + "grad_norm": 1.647147297859192, + "learning_rate": 8.77617248277395e-05, + "loss": 2.1524, + "step": 25258 + }, + { + "epoch": 0.5613111111111111, + "grad_norm": 1.4901888370513916, + "learning_rate": 8.775727939542121e-05, + "loss": 1.8784, + "step": 25259 + }, + { + "epoch": 0.5613333333333334, + "grad_norm": 1.7015953063964844, + "learning_rate": 8.775283396310292e-05, + "loss": 1.7413, + "step": 25260 + }, + { + "epoch": 0.5613555555555556, + "grad_norm": 1.8106613159179688, + "learning_rate": 8.774838853078463e-05, + "loss": 2.456, + "step": 25261 + }, + { + "epoch": 0.5613777777777778, + "grad_norm": 1.8653677701950073, + "learning_rate": 8.774394309846634e-05, + "loss": 1.8331, + "step": 25262 + }, + { + "epoch": 0.5614, + "grad_norm": 1.485082983970642, + "learning_rate": 8.773949766614805e-05, + "loss": 1.9771, + "step": 25263 + }, + { + "epoch": 0.5614222222222223, + "grad_norm": 1.6807743310928345, + "learning_rate": 8.773505223382974e-05, + "loss": 2.3118, + "step": 25264 + }, + { + "epoch": 0.5614444444444444, + "grad_norm": 1.4277927875518799, + "learning_rate": 8.773060680151145e-05, + "loss": 1.6863, + "step": 25265 + }, + { + "epoch": 0.5614666666666667, + "grad_norm": 1.5671919584274292, + "learning_rate": 8.772616136919316e-05, + "loss": 1.9608, + "step": 25266 + }, + { + "epoch": 0.5614888888888889, + "grad_norm": 1.6028257608413696, + "learning_rate": 8.772171593687486e-05, + "loss": 1.6542, + "step": 25267 + }, + { + "epoch": 0.5615111111111111, + "grad_norm": 1.4625035524368286, + "learning_rate": 8.771727050455657e-05, + "loss": 2.1563, + "step": 25268 + }, + { + "epoch": 0.5615333333333333, + "grad_norm": 1.9192852973937988, + "learning_rate": 8.771282507223828e-05, + "loss": 1.7663, + "step": 25269 + }, + { + "epoch": 0.5615555555555556, + "grad_norm": 1.596518635749817, + "learning_rate": 8.770837963991999e-05, + "loss": 1.8391, + "step": 25270 + }, + { + "epoch": 0.5615777777777777, + "grad_norm": 1.869960904121399, + "learning_rate": 8.77039342076017e-05, + "loss": 1.8417, + "step": 25271 + }, + { + "epoch": 0.5616, + "grad_norm": 1.4895312786102295, + "learning_rate": 8.76994887752834e-05, + "loss": 1.9274, + "step": 25272 + }, + { + "epoch": 0.5616222222222222, + "grad_norm": 1.7752124071121216, + "learning_rate": 8.769504334296511e-05, + "loss": 2.0309, + "step": 25273 + }, + { + "epoch": 0.5616444444444444, + "grad_norm": 1.4135031700134277, + "learning_rate": 8.769059791064681e-05, + "loss": 1.8481, + "step": 25274 + }, + { + "epoch": 0.5616666666666666, + "grad_norm": 1.489335298538208, + "learning_rate": 8.768615247832852e-05, + "loss": 1.9864, + "step": 25275 + }, + { + "epoch": 0.5616888888888889, + "grad_norm": 1.7148730754852295, + "learning_rate": 8.768170704601023e-05, + "loss": 2.2432, + "step": 25276 + }, + { + "epoch": 0.5617111111111112, + "grad_norm": 1.6796962022781372, + "learning_rate": 8.767726161369194e-05, + "loss": 2.3246, + "step": 25277 + }, + { + "epoch": 0.5617333333333333, + "grad_norm": 1.7491155862808228, + "learning_rate": 8.767281618137364e-05, + "loss": 1.9976, + "step": 25278 + }, + { + "epoch": 0.5617555555555556, + "grad_norm": 1.6207523345947266, + "learning_rate": 8.766837074905535e-05, + "loss": 1.8278, + "step": 25279 + }, + { + "epoch": 0.5617777777777778, + "grad_norm": 2.00871205329895, + "learning_rate": 8.766392531673706e-05, + "loss": 2.0722, + "step": 25280 + }, + { + "epoch": 0.5618, + "grad_norm": 1.0934069156646729, + "learning_rate": 8.765947988441876e-05, + "loss": 0.9379, + "step": 25281 + }, + { + "epoch": 0.5618222222222222, + "grad_norm": 1.9000760316848755, + "learning_rate": 8.765503445210047e-05, + "loss": 1.8411, + "step": 25282 + }, + { + "epoch": 0.5618444444444445, + "grad_norm": 1.752655029296875, + "learning_rate": 8.765058901978218e-05, + "loss": 1.9778, + "step": 25283 + }, + { + "epoch": 0.5618666666666666, + "grad_norm": 1.6022640466690063, + "learning_rate": 8.764614358746388e-05, + "loss": 1.6126, + "step": 25284 + }, + { + "epoch": 0.5618888888888889, + "grad_norm": 1.6593174934387207, + "learning_rate": 8.764169815514559e-05, + "loss": 2.3195, + "step": 25285 + }, + { + "epoch": 0.5619111111111111, + "grad_norm": 1.9099777936935425, + "learning_rate": 8.76372527228273e-05, + "loss": 2.2803, + "step": 25286 + }, + { + "epoch": 0.5619333333333333, + "grad_norm": 1.6091148853302002, + "learning_rate": 8.763280729050901e-05, + "loss": 1.8329, + "step": 25287 + }, + { + "epoch": 0.5619555555555555, + "grad_norm": 2.04919171333313, + "learning_rate": 8.76283618581907e-05, + "loss": 2.0668, + "step": 25288 + }, + { + "epoch": 0.5619777777777778, + "grad_norm": 1.6717642545700073, + "learning_rate": 8.762391642587241e-05, + "loss": 1.7885, + "step": 25289 + }, + { + "epoch": 0.562, + "grad_norm": 2.0663769245147705, + "learning_rate": 8.761947099355414e-05, + "loss": 2.1753, + "step": 25290 + }, + { + "epoch": 0.5620222222222222, + "grad_norm": 1.6406569480895996, + "learning_rate": 8.761502556123583e-05, + "loss": 2.0932, + "step": 25291 + }, + { + "epoch": 0.5620444444444445, + "grad_norm": 1.6592798233032227, + "learning_rate": 8.761058012891754e-05, + "loss": 1.8012, + "step": 25292 + }, + { + "epoch": 0.5620666666666667, + "grad_norm": 1.3389942646026611, + "learning_rate": 8.760613469659925e-05, + "loss": 1.4157, + "step": 25293 + }, + { + "epoch": 0.5620888888888889, + "grad_norm": 1.5179102420806885, + "learning_rate": 8.760168926428095e-05, + "loss": 1.7008, + "step": 25294 + }, + { + "epoch": 0.5621111111111111, + "grad_norm": 2.3237197399139404, + "learning_rate": 8.759724383196266e-05, + "loss": 2.5221, + "step": 25295 + }, + { + "epoch": 0.5621333333333334, + "grad_norm": 1.9417719841003418, + "learning_rate": 8.759279839964437e-05, + "loss": 1.0572, + "step": 25296 + }, + { + "epoch": 0.5621555555555555, + "grad_norm": 1.9588453769683838, + "learning_rate": 8.758835296732608e-05, + "loss": 1.8536, + "step": 25297 + }, + { + "epoch": 0.5621777777777778, + "grad_norm": 1.9292761087417603, + "learning_rate": 8.758390753500779e-05, + "loss": 1.6143, + "step": 25298 + }, + { + "epoch": 0.5622, + "grad_norm": 1.8616777658462524, + "learning_rate": 8.75794621026895e-05, + "loss": 1.2399, + "step": 25299 + }, + { + "epoch": 0.5622222222222222, + "grad_norm": 1.3126881122589111, + "learning_rate": 8.757501667037121e-05, + "loss": 0.8374, + "step": 25300 + }, + { + "epoch": 0.5622444444444444, + "grad_norm": 1.904229998588562, + "learning_rate": 8.75705712380529e-05, + "loss": 2.7687, + "step": 25301 + }, + { + "epoch": 0.5622666666666667, + "grad_norm": 1.7405216693878174, + "learning_rate": 8.756612580573461e-05, + "loss": 2.6446, + "step": 25302 + }, + { + "epoch": 0.5622888888888888, + "grad_norm": 1.3169325590133667, + "learning_rate": 8.756168037341632e-05, + "loss": 1.3684, + "step": 25303 + }, + { + "epoch": 0.5623111111111111, + "grad_norm": 1.3349109888076782, + "learning_rate": 8.755723494109802e-05, + "loss": 0.9687, + "step": 25304 + }, + { + "epoch": 0.5623333333333334, + "grad_norm": 1.5008281469345093, + "learning_rate": 8.755278950877973e-05, + "loss": 2.0184, + "step": 25305 + }, + { + "epoch": 0.5623555555555556, + "grad_norm": 1.422032356262207, + "learning_rate": 8.754834407646144e-05, + "loss": 2.0879, + "step": 25306 + }, + { + "epoch": 0.5623777777777778, + "grad_norm": 1.5428098440170288, + "learning_rate": 8.754389864414315e-05, + "loss": 2.2109, + "step": 25307 + }, + { + "epoch": 0.5624, + "grad_norm": 1.5082614421844482, + "learning_rate": 8.753945321182486e-05, + "loss": 1.8282, + "step": 25308 + }, + { + "epoch": 0.5624222222222223, + "grad_norm": 1.5564109086990356, + "learning_rate": 8.753500777950657e-05, + "loss": 2.2583, + "step": 25309 + }, + { + "epoch": 0.5624444444444444, + "grad_norm": 1.4597630500793457, + "learning_rate": 8.753056234718828e-05, + "loss": 1.7068, + "step": 25310 + }, + { + "epoch": 0.5624666666666667, + "grad_norm": 1.603948950767517, + "learning_rate": 8.752611691486997e-05, + "loss": 2.1259, + "step": 25311 + }, + { + "epoch": 0.5624888888888889, + "grad_norm": 1.4440677165985107, + "learning_rate": 8.752167148255168e-05, + "loss": 1.6405, + "step": 25312 + }, + { + "epoch": 0.5625111111111111, + "grad_norm": 0.9187580943107605, + "learning_rate": 8.751722605023339e-05, + "loss": 0.5134, + "step": 25313 + }, + { + "epoch": 0.5625333333333333, + "grad_norm": 1.7306079864501953, + "learning_rate": 8.751278061791509e-05, + "loss": 1.9311, + "step": 25314 + }, + { + "epoch": 0.5625555555555556, + "grad_norm": 1.932061791419983, + "learning_rate": 8.75083351855968e-05, + "loss": 2.4453, + "step": 25315 + }, + { + "epoch": 0.5625777777777777, + "grad_norm": 1.70633065700531, + "learning_rate": 8.75038897532785e-05, + "loss": 2.3742, + "step": 25316 + }, + { + "epoch": 0.5626, + "grad_norm": 1.711786150932312, + "learning_rate": 8.749944432096022e-05, + "loss": 1.9739, + "step": 25317 + }, + { + "epoch": 0.5626222222222222, + "grad_norm": 1.4621399641036987, + "learning_rate": 8.749499888864193e-05, + "loss": 1.5956, + "step": 25318 + }, + { + "epoch": 0.5626444444444444, + "grad_norm": 1.4943276643753052, + "learning_rate": 8.749055345632364e-05, + "loss": 1.9649, + "step": 25319 + }, + { + "epoch": 0.5626666666666666, + "grad_norm": 2.1974916458129883, + "learning_rate": 8.748610802400535e-05, + "loss": 2.1227, + "step": 25320 + }, + { + "epoch": 0.5626888888888889, + "grad_norm": 1.8154371976852417, + "learning_rate": 8.748166259168704e-05, + "loss": 2.022, + "step": 25321 + }, + { + "epoch": 0.5627111111111112, + "grad_norm": 2.9448294639587402, + "learning_rate": 8.747721715936875e-05, + "loss": 2.1413, + "step": 25322 + }, + { + "epoch": 0.5627333333333333, + "grad_norm": 1.6197272539138794, + "learning_rate": 8.747277172705046e-05, + "loss": 1.4278, + "step": 25323 + }, + { + "epoch": 0.5627555555555556, + "grad_norm": 1.4971791505813599, + "learning_rate": 8.746832629473217e-05, + "loss": 2.0016, + "step": 25324 + }, + { + "epoch": 0.5627777777777778, + "grad_norm": 1.629438042640686, + "learning_rate": 8.746388086241387e-05, + "loss": 1.9129, + "step": 25325 + }, + { + "epoch": 0.5628, + "grad_norm": 1.8357394933700562, + "learning_rate": 8.745943543009558e-05, + "loss": 2.146, + "step": 25326 + }, + { + "epoch": 0.5628222222222222, + "grad_norm": 2.109930992126465, + "learning_rate": 8.74549899977773e-05, + "loss": 2.1673, + "step": 25327 + }, + { + "epoch": 0.5628444444444445, + "grad_norm": 1.9761508703231812, + "learning_rate": 8.7450544565459e-05, + "loss": 1.6631, + "step": 25328 + }, + { + "epoch": 0.5628666666666666, + "grad_norm": 2.0955917835235596, + "learning_rate": 8.74460991331407e-05, + "loss": 1.9176, + "step": 25329 + }, + { + "epoch": 0.5628888888888889, + "grad_norm": 1.5698069334030151, + "learning_rate": 8.744165370082241e-05, + "loss": 1.761, + "step": 25330 + }, + { + "epoch": 0.5629111111111111, + "grad_norm": 1.6463110446929932, + "learning_rate": 8.743720826850411e-05, + "loss": 1.8677, + "step": 25331 + }, + { + "epoch": 0.5629333333333333, + "grad_norm": 1.896437406539917, + "learning_rate": 8.743276283618582e-05, + "loss": 2.139, + "step": 25332 + }, + { + "epoch": 0.5629555555555555, + "grad_norm": 1.5840256214141846, + "learning_rate": 8.742831740386753e-05, + "loss": 1.9418, + "step": 25333 + }, + { + "epoch": 0.5629777777777778, + "grad_norm": 1.5916732549667358, + "learning_rate": 8.742387197154924e-05, + "loss": 1.9866, + "step": 25334 + }, + { + "epoch": 0.563, + "grad_norm": 1.9358857870101929, + "learning_rate": 8.741942653923095e-05, + "loss": 1.913, + "step": 25335 + }, + { + "epoch": 0.5630222222222222, + "grad_norm": 1.5716407299041748, + "learning_rate": 8.741498110691266e-05, + "loss": 1.6528, + "step": 25336 + }, + { + "epoch": 0.5630444444444445, + "grad_norm": 1.7800015211105347, + "learning_rate": 8.741053567459437e-05, + "loss": 2.1893, + "step": 25337 + }, + { + "epoch": 0.5630666666666667, + "grad_norm": 1.4720046520233154, + "learning_rate": 8.740609024227606e-05, + "loss": 1.8069, + "step": 25338 + }, + { + "epoch": 0.5630888888888889, + "grad_norm": 2.0705015659332275, + "learning_rate": 8.740164480995777e-05, + "loss": 2.1206, + "step": 25339 + }, + { + "epoch": 0.5631111111111111, + "grad_norm": 1.7749475240707397, + "learning_rate": 8.739719937763948e-05, + "loss": 1.988, + "step": 25340 + }, + { + "epoch": 0.5631333333333334, + "grad_norm": 1.589619517326355, + "learning_rate": 8.739275394532118e-05, + "loss": 1.2761, + "step": 25341 + }, + { + "epoch": 0.5631555555555555, + "grad_norm": 2.2986977100372314, + "learning_rate": 8.738830851300289e-05, + "loss": 2.5454, + "step": 25342 + }, + { + "epoch": 0.5631777777777778, + "grad_norm": 1.552807331085205, + "learning_rate": 8.73838630806846e-05, + "loss": 1.3278, + "step": 25343 + }, + { + "epoch": 0.5632, + "grad_norm": 1.748751163482666, + "learning_rate": 8.737941764836631e-05, + "loss": 2.0745, + "step": 25344 + }, + { + "epoch": 0.5632222222222222, + "grad_norm": 1.6754921674728394, + "learning_rate": 8.737497221604802e-05, + "loss": 1.5605, + "step": 25345 + }, + { + "epoch": 0.5632444444444444, + "grad_norm": 1.5707887411117554, + "learning_rate": 8.737052678372973e-05, + "loss": 1.4676, + "step": 25346 + }, + { + "epoch": 0.5632666666666667, + "grad_norm": 1.522444486618042, + "learning_rate": 8.736608135141144e-05, + "loss": 1.5384, + "step": 25347 + }, + { + "epoch": 0.5632888888888888, + "grad_norm": 1.6658782958984375, + "learning_rate": 8.736163591909313e-05, + "loss": 1.5798, + "step": 25348 + }, + { + "epoch": 0.5633111111111111, + "grad_norm": 1.4224662780761719, + "learning_rate": 8.735719048677484e-05, + "loss": 1.1295, + "step": 25349 + }, + { + "epoch": 0.5633333333333334, + "grad_norm": 1.9147154092788696, + "learning_rate": 8.735274505445655e-05, + "loss": 1.8996, + "step": 25350 + }, + { + "epoch": 0.5633555555555556, + "grad_norm": 1.0534144639968872, + "learning_rate": 8.734829962213825e-05, + "loss": 1.1374, + "step": 25351 + }, + { + "epoch": 0.5633777777777778, + "grad_norm": 1.0356683731079102, + "learning_rate": 8.734385418981996e-05, + "loss": 0.9505, + "step": 25352 + }, + { + "epoch": 0.5634, + "grad_norm": 1.3017328977584839, + "learning_rate": 8.733940875750167e-05, + "loss": 2.0527, + "step": 25353 + }, + { + "epoch": 0.5634222222222223, + "grad_norm": 1.7526426315307617, + "learning_rate": 8.733496332518338e-05, + "loss": 2.1973, + "step": 25354 + }, + { + "epoch": 0.5634444444444444, + "grad_norm": 1.5464191436767578, + "learning_rate": 8.733051789286509e-05, + "loss": 2.1379, + "step": 25355 + }, + { + "epoch": 0.5634666666666667, + "grad_norm": 1.9003633260726929, + "learning_rate": 8.73260724605468e-05, + "loss": 2.214, + "step": 25356 + }, + { + "epoch": 0.5634888888888889, + "grad_norm": 1.7008228302001953, + "learning_rate": 8.73216270282285e-05, + "loss": 2.2851, + "step": 25357 + }, + { + "epoch": 0.5635111111111111, + "grad_norm": 1.871749997138977, + "learning_rate": 8.73171815959102e-05, + "loss": 2.1141, + "step": 25358 + }, + { + "epoch": 0.5635333333333333, + "grad_norm": 1.8267793655395508, + "learning_rate": 8.731273616359191e-05, + "loss": 2.3976, + "step": 25359 + }, + { + "epoch": 0.5635555555555556, + "grad_norm": 1.6229313611984253, + "learning_rate": 8.730829073127362e-05, + "loss": 2.0573, + "step": 25360 + }, + { + "epoch": 0.5635777777777777, + "grad_norm": 1.5267568826675415, + "learning_rate": 8.730384529895532e-05, + "loss": 1.8809, + "step": 25361 + }, + { + "epoch": 0.5636, + "grad_norm": 2.0085060596466064, + "learning_rate": 8.729939986663703e-05, + "loss": 2.2086, + "step": 25362 + }, + { + "epoch": 0.5636222222222222, + "grad_norm": 1.1693706512451172, + "learning_rate": 8.729495443431874e-05, + "loss": 1.1021, + "step": 25363 + }, + { + "epoch": 0.5636444444444444, + "grad_norm": 1.5982754230499268, + "learning_rate": 8.729050900200046e-05, + "loss": 1.9899, + "step": 25364 + }, + { + "epoch": 0.5636666666666666, + "grad_norm": 1.4585381746292114, + "learning_rate": 8.728606356968216e-05, + "loss": 1.9564, + "step": 25365 + }, + { + "epoch": 0.5636888888888889, + "grad_norm": 1.5413857698440552, + "learning_rate": 8.728161813736387e-05, + "loss": 2.1478, + "step": 25366 + }, + { + "epoch": 0.5637111111111112, + "grad_norm": 1.5745688676834106, + "learning_rate": 8.727717270504558e-05, + "loss": 1.9242, + "step": 25367 + }, + { + "epoch": 0.5637333333333333, + "grad_norm": 1.760370135307312, + "learning_rate": 8.727272727272727e-05, + "loss": 1.9975, + "step": 25368 + }, + { + "epoch": 0.5637555555555556, + "grad_norm": 1.6587774753570557, + "learning_rate": 8.726828184040898e-05, + "loss": 2.0057, + "step": 25369 + }, + { + "epoch": 0.5637777777777778, + "grad_norm": 1.70685613155365, + "learning_rate": 8.726383640809069e-05, + "loss": 1.92, + "step": 25370 + }, + { + "epoch": 0.5638, + "grad_norm": 1.9009977579116821, + "learning_rate": 8.725939097577239e-05, + "loss": 1.9608, + "step": 25371 + }, + { + "epoch": 0.5638222222222222, + "grad_norm": 1.8968544006347656, + "learning_rate": 8.725494554345411e-05, + "loss": 1.8163, + "step": 25372 + }, + { + "epoch": 0.5638444444444445, + "grad_norm": 1.626792311668396, + "learning_rate": 8.725050011113582e-05, + "loss": 2.1957, + "step": 25373 + }, + { + "epoch": 0.5638666666666666, + "grad_norm": 1.4381468296051025, + "learning_rate": 8.724605467881753e-05, + "loss": 1.5413, + "step": 25374 + }, + { + "epoch": 0.5638888888888889, + "grad_norm": 2.1948907375335693, + "learning_rate": 8.724160924649923e-05, + "loss": 2.0981, + "step": 25375 + }, + { + "epoch": 0.5639111111111111, + "grad_norm": 1.748815894126892, + "learning_rate": 8.723716381418093e-05, + "loss": 2.2593, + "step": 25376 + }, + { + "epoch": 0.5639333333333333, + "grad_norm": 1.7379071712493896, + "learning_rate": 8.723271838186264e-05, + "loss": 2.4922, + "step": 25377 + }, + { + "epoch": 0.5639555555555555, + "grad_norm": 1.5309514999389648, + "learning_rate": 8.722827294954434e-05, + "loss": 1.9083, + "step": 25378 + }, + { + "epoch": 0.5639777777777778, + "grad_norm": 1.3660348653793335, + "learning_rate": 8.722382751722605e-05, + "loss": 0.8973, + "step": 25379 + }, + { + "epoch": 0.564, + "grad_norm": 1.711879849433899, + "learning_rate": 8.721938208490776e-05, + "loss": 1.8977, + "step": 25380 + }, + { + "epoch": 0.5640222222222222, + "grad_norm": 1.8451247215270996, + "learning_rate": 8.721493665258947e-05, + "loss": 2.0085, + "step": 25381 + }, + { + "epoch": 0.5640444444444445, + "grad_norm": 1.7080289125442505, + "learning_rate": 8.721049122027118e-05, + "loss": 2.0674, + "step": 25382 + }, + { + "epoch": 0.5640666666666667, + "grad_norm": 1.6591728925704956, + "learning_rate": 8.720604578795289e-05, + "loss": 1.9249, + "step": 25383 + }, + { + "epoch": 0.5640888888888889, + "grad_norm": 1.966254711151123, + "learning_rate": 8.72016003556346e-05, + "loss": 1.6539, + "step": 25384 + }, + { + "epoch": 0.5641111111111111, + "grad_norm": 1.779379963874817, + "learning_rate": 8.71971549233163e-05, + "loss": 2.3801, + "step": 25385 + }, + { + "epoch": 0.5641333333333334, + "grad_norm": 1.5594086647033691, + "learning_rate": 8.7192709490998e-05, + "loss": 1.9107, + "step": 25386 + }, + { + "epoch": 0.5641555555555555, + "grad_norm": 2.0875155925750732, + "learning_rate": 8.718826405867971e-05, + "loss": 2.1634, + "step": 25387 + }, + { + "epoch": 0.5641777777777778, + "grad_norm": 1.5301467180252075, + "learning_rate": 8.718381862636141e-05, + "loss": 1.6208, + "step": 25388 + }, + { + "epoch": 0.5642, + "grad_norm": 1.5342662334442139, + "learning_rate": 8.717937319404312e-05, + "loss": 1.7199, + "step": 25389 + }, + { + "epoch": 0.5642222222222222, + "grad_norm": 1.5771845579147339, + "learning_rate": 8.717492776172483e-05, + "loss": 1.8793, + "step": 25390 + }, + { + "epoch": 0.5642444444444444, + "grad_norm": 1.7096185684204102, + "learning_rate": 8.717048232940654e-05, + "loss": 1.5729, + "step": 25391 + }, + { + "epoch": 0.5642666666666667, + "grad_norm": 1.581945538520813, + "learning_rate": 8.716603689708825e-05, + "loss": 1.6892, + "step": 25392 + }, + { + "epoch": 0.5642888888888888, + "grad_norm": 1.7134122848510742, + "learning_rate": 8.716159146476996e-05, + "loss": 1.5713, + "step": 25393 + }, + { + "epoch": 0.5643111111111111, + "grad_norm": 1.9367319345474243, + "learning_rate": 8.715714603245167e-05, + "loss": 1.9274, + "step": 25394 + }, + { + "epoch": 0.5643333333333334, + "grad_norm": 1.7920246124267578, + "learning_rate": 8.715270060013336e-05, + "loss": 1.7583, + "step": 25395 + }, + { + "epoch": 0.5643555555555556, + "grad_norm": 1.793280839920044, + "learning_rate": 8.714825516781507e-05, + "loss": 1.9587, + "step": 25396 + }, + { + "epoch": 0.5643777777777778, + "grad_norm": 1.8033382892608643, + "learning_rate": 8.714380973549678e-05, + "loss": 1.6978, + "step": 25397 + }, + { + "epoch": 0.5644, + "grad_norm": 1.66734778881073, + "learning_rate": 8.713936430317848e-05, + "loss": 1.7054, + "step": 25398 + }, + { + "epoch": 0.5644222222222223, + "grad_norm": 1.5701909065246582, + "learning_rate": 8.713491887086019e-05, + "loss": 1.5448, + "step": 25399 + }, + { + "epoch": 0.5644444444444444, + "grad_norm": 1.4017804861068726, + "learning_rate": 8.71304734385419e-05, + "loss": 0.9378, + "step": 25400 + }, + { + "epoch": 0.5644666666666667, + "grad_norm": 1.4009686708450317, + "learning_rate": 8.712602800622362e-05, + "loss": 2.3716, + "step": 25401 + }, + { + "epoch": 0.5644888888888889, + "grad_norm": 1.7983325719833374, + "learning_rate": 8.712158257390532e-05, + "loss": 2.7815, + "step": 25402 + }, + { + "epoch": 0.5645111111111111, + "grad_norm": 1.7624199390411377, + "learning_rate": 8.711713714158703e-05, + "loss": 1.9048, + "step": 25403 + }, + { + "epoch": 0.5645333333333333, + "grad_norm": 1.4177815914154053, + "learning_rate": 8.711269170926874e-05, + "loss": 2.0578, + "step": 25404 + }, + { + "epoch": 0.5645555555555556, + "grad_norm": 1.2785425186157227, + "learning_rate": 8.710824627695043e-05, + "loss": 1.5198, + "step": 25405 + }, + { + "epoch": 0.5645777777777777, + "grad_norm": 1.5148561000823975, + "learning_rate": 8.710380084463214e-05, + "loss": 2.316, + "step": 25406 + }, + { + "epoch": 0.5646, + "grad_norm": 1.6512395143508911, + "learning_rate": 8.709935541231385e-05, + "loss": 2.118, + "step": 25407 + }, + { + "epoch": 0.5646222222222222, + "grad_norm": 1.554484248161316, + "learning_rate": 8.709490997999555e-05, + "loss": 2.4742, + "step": 25408 + }, + { + "epoch": 0.5646444444444444, + "grad_norm": 1.5429123640060425, + "learning_rate": 8.709046454767727e-05, + "loss": 2.1788, + "step": 25409 + }, + { + "epoch": 0.5646666666666667, + "grad_norm": 1.8350896835327148, + "learning_rate": 8.708601911535898e-05, + "loss": 2.4694, + "step": 25410 + }, + { + "epoch": 0.5646888888888889, + "grad_norm": 1.7665791511535645, + "learning_rate": 8.708157368304069e-05, + "loss": 2.2558, + "step": 25411 + }, + { + "epoch": 0.5647111111111112, + "grad_norm": 1.6597291231155396, + "learning_rate": 8.707712825072239e-05, + "loss": 2.2376, + "step": 25412 + }, + { + "epoch": 0.5647333333333333, + "grad_norm": 1.2979716062545776, + "learning_rate": 8.70726828184041e-05, + "loss": 1.1761, + "step": 25413 + }, + { + "epoch": 0.5647555555555556, + "grad_norm": 1.4011353254318237, + "learning_rate": 8.70682373860858e-05, + "loss": 1.821, + "step": 25414 + }, + { + "epoch": 0.5647777777777778, + "grad_norm": 1.4270352125167847, + "learning_rate": 8.70637919537675e-05, + "loss": 1.8507, + "step": 25415 + }, + { + "epoch": 0.5648, + "grad_norm": 1.5780997276306152, + "learning_rate": 8.705934652144921e-05, + "loss": 1.836, + "step": 25416 + }, + { + "epoch": 0.5648222222222222, + "grad_norm": 1.5082387924194336, + "learning_rate": 8.705490108913092e-05, + "loss": 2.2373, + "step": 25417 + }, + { + "epoch": 0.5648444444444445, + "grad_norm": 1.6113547086715698, + "learning_rate": 8.705045565681263e-05, + "loss": 1.6091, + "step": 25418 + }, + { + "epoch": 0.5648666666666666, + "grad_norm": 1.0369369983673096, + "learning_rate": 8.704601022449434e-05, + "loss": 0.5911, + "step": 25419 + }, + { + "epoch": 0.5648888888888889, + "grad_norm": 1.4776184558868408, + "learning_rate": 8.704156479217605e-05, + "loss": 1.1912, + "step": 25420 + }, + { + "epoch": 0.5649111111111111, + "grad_norm": 1.6801823377609253, + "learning_rate": 8.703711935985776e-05, + "loss": 2.1828, + "step": 25421 + }, + { + "epoch": 0.5649333333333333, + "grad_norm": 1.4651553630828857, + "learning_rate": 8.703267392753946e-05, + "loss": 1.4728, + "step": 25422 + }, + { + "epoch": 0.5649555555555555, + "grad_norm": 1.9302335977554321, + "learning_rate": 8.702822849522116e-05, + "loss": 2.0986, + "step": 25423 + }, + { + "epoch": 0.5649777777777778, + "grad_norm": 1.7884409427642822, + "learning_rate": 8.702378306290287e-05, + "loss": 2.0985, + "step": 25424 + }, + { + "epoch": 0.565, + "grad_norm": 1.600262999534607, + "learning_rate": 8.701933763058457e-05, + "loss": 2.1894, + "step": 25425 + }, + { + "epoch": 0.5650222222222222, + "grad_norm": 2.0447311401367188, + "learning_rate": 8.701489219826628e-05, + "loss": 2.156, + "step": 25426 + }, + { + "epoch": 0.5650444444444445, + "grad_norm": 1.5858628749847412, + "learning_rate": 8.701044676594799e-05, + "loss": 1.6714, + "step": 25427 + }, + { + "epoch": 0.5650666666666667, + "grad_norm": 1.7606112957000732, + "learning_rate": 8.70060013336297e-05, + "loss": 1.4105, + "step": 25428 + }, + { + "epoch": 0.5650888888888889, + "grad_norm": 1.3921977281570435, + "learning_rate": 8.700155590131141e-05, + "loss": 1.7857, + "step": 25429 + }, + { + "epoch": 0.5651111111111111, + "grad_norm": 1.7770886421203613, + "learning_rate": 8.699711046899312e-05, + "loss": 1.7963, + "step": 25430 + }, + { + "epoch": 0.5651333333333334, + "grad_norm": 1.8601276874542236, + "learning_rate": 8.699266503667483e-05, + "loss": 2.1159, + "step": 25431 + }, + { + "epoch": 0.5651555555555555, + "grad_norm": 1.8692203760147095, + "learning_rate": 8.698821960435652e-05, + "loss": 1.8525, + "step": 25432 + }, + { + "epoch": 0.5651777777777778, + "grad_norm": 1.6445821523666382, + "learning_rate": 8.698377417203823e-05, + "loss": 0.6555, + "step": 25433 + }, + { + "epoch": 0.5652, + "grad_norm": 1.7919769287109375, + "learning_rate": 8.697932873971994e-05, + "loss": 2.061, + "step": 25434 + }, + { + "epoch": 0.5652222222222222, + "grad_norm": 2.170060396194458, + "learning_rate": 8.697488330740164e-05, + "loss": 2.4497, + "step": 25435 + }, + { + "epoch": 0.5652444444444444, + "grad_norm": 1.3361024856567383, + "learning_rate": 8.697043787508335e-05, + "loss": 1.0493, + "step": 25436 + }, + { + "epoch": 0.5652666666666667, + "grad_norm": 1.9073244333267212, + "learning_rate": 8.696599244276506e-05, + "loss": 2.2122, + "step": 25437 + }, + { + "epoch": 0.5652888888888888, + "grad_norm": 1.7902787923812866, + "learning_rate": 8.696154701044678e-05, + "loss": 1.9999, + "step": 25438 + }, + { + "epoch": 0.5653111111111111, + "grad_norm": 2.0787904262542725, + "learning_rate": 8.695710157812848e-05, + "loss": 2.064, + "step": 25439 + }, + { + "epoch": 0.5653333333333334, + "grad_norm": 1.44843327999115, + "learning_rate": 8.695265614581019e-05, + "loss": 1.5858, + "step": 25440 + }, + { + "epoch": 0.5653555555555555, + "grad_norm": 1.5655252933502197, + "learning_rate": 8.69482107134919e-05, + "loss": 1.8586, + "step": 25441 + }, + { + "epoch": 0.5653777777777778, + "grad_norm": 1.646780014038086, + "learning_rate": 8.69437652811736e-05, + "loss": 1.7439, + "step": 25442 + }, + { + "epoch": 0.5654, + "grad_norm": 1.7771084308624268, + "learning_rate": 8.69393198488553e-05, + "loss": 1.6262, + "step": 25443 + }, + { + "epoch": 0.5654222222222223, + "grad_norm": 1.6789112091064453, + "learning_rate": 8.693487441653701e-05, + "loss": 1.8269, + "step": 25444 + }, + { + "epoch": 0.5654444444444444, + "grad_norm": 2.0198254585266113, + "learning_rate": 8.693042898421871e-05, + "loss": 1.6695, + "step": 25445 + }, + { + "epoch": 0.5654666666666667, + "grad_norm": 1.8060046434402466, + "learning_rate": 8.692598355190043e-05, + "loss": 1.9623, + "step": 25446 + }, + { + "epoch": 0.5654888888888889, + "grad_norm": 1.9680043458938599, + "learning_rate": 8.692153811958214e-05, + "loss": 1.9075, + "step": 25447 + }, + { + "epoch": 0.5655111111111111, + "grad_norm": 1.6656614542007446, + "learning_rate": 8.691709268726385e-05, + "loss": 1.648, + "step": 25448 + }, + { + "epoch": 0.5655333333333333, + "grad_norm": 1.683111548423767, + "learning_rate": 8.691264725494555e-05, + "loss": 1.0773, + "step": 25449 + }, + { + "epoch": 0.5655555555555556, + "grad_norm": 1.7862014770507812, + "learning_rate": 8.690820182262726e-05, + "loss": 1.8669, + "step": 25450 + }, + { + "epoch": 0.5655777777777777, + "grad_norm": 1.6154983043670654, + "learning_rate": 8.690375639030897e-05, + "loss": 1.9956, + "step": 25451 + }, + { + "epoch": 0.5656, + "grad_norm": 1.482670783996582, + "learning_rate": 8.689931095799066e-05, + "loss": 2.263, + "step": 25452 + }, + { + "epoch": 0.5656222222222222, + "grad_norm": 1.6822906732559204, + "learning_rate": 8.689486552567237e-05, + "loss": 0.0431, + "step": 25453 + }, + { + "epoch": 0.5656444444444444, + "grad_norm": 1.6634759902954102, + "learning_rate": 8.689042009335408e-05, + "loss": 2.292, + "step": 25454 + }, + { + "epoch": 0.5656666666666667, + "grad_norm": 1.638785481452942, + "learning_rate": 8.688597466103579e-05, + "loss": 2.1165, + "step": 25455 + }, + { + "epoch": 0.5656888888888889, + "grad_norm": 1.6883729696273804, + "learning_rate": 8.68815292287175e-05, + "loss": 2.2838, + "step": 25456 + }, + { + "epoch": 0.5657111111111112, + "grad_norm": 1.673374056816101, + "learning_rate": 8.687708379639921e-05, + "loss": 2.4126, + "step": 25457 + }, + { + "epoch": 0.5657333333333333, + "grad_norm": 1.664180040359497, + "learning_rate": 8.687263836408092e-05, + "loss": 2.2606, + "step": 25458 + }, + { + "epoch": 0.5657555555555556, + "grad_norm": 1.4526246786117554, + "learning_rate": 8.686819293176262e-05, + "loss": 2.0279, + "step": 25459 + }, + { + "epoch": 0.5657777777777778, + "grad_norm": 1.7229489088058472, + "learning_rate": 8.686374749944433e-05, + "loss": 2.5118, + "step": 25460 + }, + { + "epoch": 0.5658, + "grad_norm": 1.9437999725341797, + "learning_rate": 8.685930206712604e-05, + "loss": 2.7655, + "step": 25461 + }, + { + "epoch": 0.5658222222222222, + "grad_norm": 1.7487212419509888, + "learning_rate": 8.685485663480773e-05, + "loss": 2.2664, + "step": 25462 + }, + { + "epoch": 0.5658444444444445, + "grad_norm": 1.682439923286438, + "learning_rate": 8.685041120248944e-05, + "loss": 2.0246, + "step": 25463 + }, + { + "epoch": 0.5658666666666666, + "grad_norm": 1.7988824844360352, + "learning_rate": 8.684596577017115e-05, + "loss": 2.1121, + "step": 25464 + }, + { + "epoch": 0.5658888888888889, + "grad_norm": 1.8382154703140259, + "learning_rate": 8.684152033785286e-05, + "loss": 2.0848, + "step": 25465 + }, + { + "epoch": 0.5659111111111111, + "grad_norm": 1.9211585521697998, + "learning_rate": 8.683707490553457e-05, + "loss": 2.1939, + "step": 25466 + }, + { + "epoch": 0.5659333333333333, + "grad_norm": 1.4006104469299316, + "learning_rate": 8.683262947321628e-05, + "loss": 1.895, + "step": 25467 + }, + { + "epoch": 0.5659555555555555, + "grad_norm": 1.5469164848327637, + "learning_rate": 8.682818404089799e-05, + "loss": 2.3618, + "step": 25468 + }, + { + "epoch": 0.5659777777777778, + "grad_norm": 1.4230570793151855, + "learning_rate": 8.682373860857969e-05, + "loss": 1.7998, + "step": 25469 + }, + { + "epoch": 0.566, + "grad_norm": 1.1125915050506592, + "learning_rate": 8.68192931762614e-05, + "loss": 0.9492, + "step": 25470 + }, + { + "epoch": 0.5660222222222222, + "grad_norm": 1.9148786067962646, + "learning_rate": 8.68148477439431e-05, + "loss": 1.5855, + "step": 25471 + }, + { + "epoch": 0.5660444444444445, + "grad_norm": 1.9745311737060547, + "learning_rate": 8.68104023116248e-05, + "loss": 2.2945, + "step": 25472 + }, + { + "epoch": 0.5660666666666667, + "grad_norm": 1.7374578714370728, + "learning_rate": 8.680595687930651e-05, + "loss": 2.0763, + "step": 25473 + }, + { + "epoch": 0.5660888888888889, + "grad_norm": 1.493938684463501, + "learning_rate": 8.680151144698822e-05, + "loss": 2.2095, + "step": 25474 + }, + { + "epoch": 0.5661111111111111, + "grad_norm": 1.388858675956726, + "learning_rate": 8.679706601466993e-05, + "loss": 1.9734, + "step": 25475 + }, + { + "epoch": 0.5661333333333334, + "grad_norm": 1.6583508253097534, + "learning_rate": 8.679262058235164e-05, + "loss": 2.0741, + "step": 25476 + }, + { + "epoch": 0.5661555555555555, + "grad_norm": 1.6548030376434326, + "learning_rate": 8.678817515003335e-05, + "loss": 2.3124, + "step": 25477 + }, + { + "epoch": 0.5661777777777778, + "grad_norm": 1.4862264394760132, + "learning_rate": 8.678372971771506e-05, + "loss": 1.6101, + "step": 25478 + }, + { + "epoch": 0.5662, + "grad_norm": 1.6298364400863647, + "learning_rate": 8.677928428539675e-05, + "loss": 1.9753, + "step": 25479 + }, + { + "epoch": 0.5662222222222222, + "grad_norm": 1.9583399295806885, + "learning_rate": 8.677483885307846e-05, + "loss": 1.9028, + "step": 25480 + }, + { + "epoch": 0.5662444444444444, + "grad_norm": 1.9671558141708374, + "learning_rate": 8.677039342076017e-05, + "loss": 2.0369, + "step": 25481 + }, + { + "epoch": 0.5662666666666667, + "grad_norm": 1.336354374885559, + "learning_rate": 8.676594798844187e-05, + "loss": 1.4759, + "step": 25482 + }, + { + "epoch": 0.5662888888888888, + "grad_norm": 1.658531665802002, + "learning_rate": 8.676150255612359e-05, + "loss": 2.0151, + "step": 25483 + }, + { + "epoch": 0.5663111111111111, + "grad_norm": 1.5404964685440063, + "learning_rate": 8.67570571238053e-05, + "loss": 2.1178, + "step": 25484 + }, + { + "epoch": 0.5663333333333334, + "grad_norm": 1.5054473876953125, + "learning_rate": 8.6752611691487e-05, + "loss": 1.8238, + "step": 25485 + }, + { + "epoch": 0.5663555555555555, + "grad_norm": 1.724499225616455, + "learning_rate": 8.674816625916871e-05, + "loss": 1.7396, + "step": 25486 + }, + { + "epoch": 0.5663777777777778, + "grad_norm": 1.8233646154403687, + "learning_rate": 8.674372082685042e-05, + "loss": 1.731, + "step": 25487 + }, + { + "epoch": 0.5664, + "grad_norm": 1.9924031496047974, + "learning_rate": 8.673927539453213e-05, + "loss": 1.9948, + "step": 25488 + }, + { + "epoch": 0.5664222222222223, + "grad_norm": 1.707899808883667, + "learning_rate": 8.673482996221382e-05, + "loss": 2.2374, + "step": 25489 + }, + { + "epoch": 0.5664444444444444, + "grad_norm": 2.0084919929504395, + "learning_rate": 8.673038452989553e-05, + "loss": 2.199, + "step": 25490 + }, + { + "epoch": 0.5664666666666667, + "grad_norm": 1.6072585582733154, + "learning_rate": 8.672593909757724e-05, + "loss": 1.6715, + "step": 25491 + }, + { + "epoch": 0.5664888888888889, + "grad_norm": 1.8686779737472534, + "learning_rate": 8.672149366525895e-05, + "loss": 1.6971, + "step": 25492 + }, + { + "epoch": 0.5665111111111111, + "grad_norm": 1.6931346654891968, + "learning_rate": 8.671704823294066e-05, + "loss": 1.9617, + "step": 25493 + }, + { + "epoch": 0.5665333333333333, + "grad_norm": 1.8643832206726074, + "learning_rate": 8.671260280062237e-05, + "loss": 2.1704, + "step": 25494 + }, + { + "epoch": 0.5665555555555556, + "grad_norm": 2.1833972930908203, + "learning_rate": 8.670815736830408e-05, + "loss": 2.103, + "step": 25495 + }, + { + "epoch": 0.5665777777777777, + "grad_norm": 2.137716770172119, + "learning_rate": 8.670371193598578e-05, + "loss": 1.9764, + "step": 25496 + }, + { + "epoch": 0.5666, + "grad_norm": 1.7726776599884033, + "learning_rate": 8.669926650366749e-05, + "loss": 1.8766, + "step": 25497 + }, + { + "epoch": 0.5666222222222222, + "grad_norm": 2.3358371257781982, + "learning_rate": 8.66948210713492e-05, + "loss": 2.0126, + "step": 25498 + }, + { + "epoch": 0.5666444444444444, + "grad_norm": 1.7990282773971558, + "learning_rate": 8.669037563903089e-05, + "loss": 1.8925, + "step": 25499 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 2.0586588382720947, + "learning_rate": 8.66859302067126e-05, + "loss": 1.8382, + "step": 25500 + }, + { + "epoch": 0.5666888888888889, + "grad_norm": 1.2641650438308716, + "learning_rate": 8.668148477439431e-05, + "loss": 2.1141, + "step": 25501 + }, + { + "epoch": 0.5667111111111112, + "grad_norm": 1.080187201499939, + "learning_rate": 8.667703934207602e-05, + "loss": 1.0952, + "step": 25502 + }, + { + "epoch": 0.5667333333333333, + "grad_norm": 1.647226095199585, + "learning_rate": 8.667259390975773e-05, + "loss": 2.2651, + "step": 25503 + }, + { + "epoch": 0.5667555555555556, + "grad_norm": 1.6892226934432983, + "learning_rate": 8.666814847743944e-05, + "loss": 2.348, + "step": 25504 + }, + { + "epoch": 0.5667777777777778, + "grad_norm": 1.515511155128479, + "learning_rate": 8.666370304512115e-05, + "loss": 2.0383, + "step": 25505 + }, + { + "epoch": 0.5668, + "grad_norm": 1.4195060729980469, + "learning_rate": 8.665925761280285e-05, + "loss": 1.9549, + "step": 25506 + }, + { + "epoch": 0.5668222222222222, + "grad_norm": 0.9295563697814941, + "learning_rate": 8.665481218048456e-05, + "loss": 0.0285, + "step": 25507 + }, + { + "epoch": 0.5668444444444445, + "grad_norm": 1.2143354415893555, + "learning_rate": 8.665036674816627e-05, + "loss": 1.6016, + "step": 25508 + }, + { + "epoch": 0.5668666666666666, + "grad_norm": 1.9575049877166748, + "learning_rate": 8.664592131584796e-05, + "loss": 2.2171, + "step": 25509 + }, + { + "epoch": 0.5668888888888889, + "grad_norm": 1.8051297664642334, + "learning_rate": 8.664147588352967e-05, + "loss": 2.6714, + "step": 25510 + }, + { + "epoch": 0.5669111111111111, + "grad_norm": 1.5467302799224854, + "learning_rate": 8.663703045121138e-05, + "loss": 1.867, + "step": 25511 + }, + { + "epoch": 0.5669333333333333, + "grad_norm": 1.3129912614822388, + "learning_rate": 8.663258501889309e-05, + "loss": 1.8731, + "step": 25512 + }, + { + "epoch": 0.5669555555555555, + "grad_norm": 1.4830926656723022, + "learning_rate": 8.66281395865748e-05, + "loss": 1.9213, + "step": 25513 + }, + { + "epoch": 0.5669777777777778, + "grad_norm": 1.6727464199066162, + "learning_rate": 8.662369415425651e-05, + "loss": 2.009, + "step": 25514 + }, + { + "epoch": 0.567, + "grad_norm": 1.5978381633758545, + "learning_rate": 8.661924872193822e-05, + "loss": 2.2661, + "step": 25515 + }, + { + "epoch": 0.5670222222222222, + "grad_norm": 1.3395562171936035, + "learning_rate": 8.661480328961992e-05, + "loss": 1.7839, + "step": 25516 + }, + { + "epoch": 0.5670444444444445, + "grad_norm": 1.7934595346450806, + "learning_rate": 8.661035785730162e-05, + "loss": 2.2136, + "step": 25517 + }, + { + "epoch": 0.5670666666666667, + "grad_norm": 1.6741819381713867, + "learning_rate": 8.660591242498333e-05, + "loss": 1.6916, + "step": 25518 + }, + { + "epoch": 0.5670888888888889, + "grad_norm": 1.6226348876953125, + "learning_rate": 8.660146699266503e-05, + "loss": 1.9363, + "step": 25519 + }, + { + "epoch": 0.5671111111111111, + "grad_norm": 1.6335357427597046, + "learning_rate": 8.659702156034675e-05, + "loss": 1.9537, + "step": 25520 + }, + { + "epoch": 0.5671333333333334, + "grad_norm": 1.6441919803619385, + "learning_rate": 8.659257612802846e-05, + "loss": 1.8217, + "step": 25521 + }, + { + "epoch": 0.5671555555555555, + "grad_norm": 1.528879165649414, + "learning_rate": 8.658813069571016e-05, + "loss": 1.9105, + "step": 25522 + }, + { + "epoch": 0.5671777777777778, + "grad_norm": 1.4856935739517212, + "learning_rate": 8.658368526339187e-05, + "loss": 1.7963, + "step": 25523 + }, + { + "epoch": 0.5672, + "grad_norm": 1.4754873514175415, + "learning_rate": 8.657923983107358e-05, + "loss": 1.2109, + "step": 25524 + }, + { + "epoch": 0.5672222222222222, + "grad_norm": 1.4310427904129028, + "learning_rate": 8.657479439875529e-05, + "loss": 1.24, + "step": 25525 + }, + { + "epoch": 0.5672444444444444, + "grad_norm": 1.9599642753601074, + "learning_rate": 8.657034896643698e-05, + "loss": 2.1241, + "step": 25526 + }, + { + "epoch": 0.5672666666666667, + "grad_norm": 1.650848150253296, + "learning_rate": 8.65659035341187e-05, + "loss": 1.9539, + "step": 25527 + }, + { + "epoch": 0.5672888888888888, + "grad_norm": 1.6401448249816895, + "learning_rate": 8.65614581018004e-05, + "loss": 2.0195, + "step": 25528 + }, + { + "epoch": 0.5673111111111111, + "grad_norm": 1.3462976217269897, + "learning_rate": 8.655701266948211e-05, + "loss": 1.5376, + "step": 25529 + }, + { + "epoch": 0.5673333333333334, + "grad_norm": 1.7441065311431885, + "learning_rate": 8.655256723716382e-05, + "loss": 1.9041, + "step": 25530 + }, + { + "epoch": 0.5673555555555555, + "grad_norm": 1.5822638273239136, + "learning_rate": 8.654812180484553e-05, + "loss": 1.932, + "step": 25531 + }, + { + "epoch": 0.5673777777777778, + "grad_norm": 1.9012054204940796, + "learning_rate": 8.654367637252723e-05, + "loss": 1.9656, + "step": 25532 + }, + { + "epoch": 0.5674, + "grad_norm": 1.0392942428588867, + "learning_rate": 8.653923094020894e-05, + "loss": 0.8862, + "step": 25533 + }, + { + "epoch": 0.5674222222222223, + "grad_norm": 1.622781753540039, + "learning_rate": 8.653478550789065e-05, + "loss": 1.5679, + "step": 25534 + }, + { + "epoch": 0.5674444444444444, + "grad_norm": 1.4929344654083252, + "learning_rate": 8.653034007557236e-05, + "loss": 1.7067, + "step": 25535 + }, + { + "epoch": 0.5674666666666667, + "grad_norm": 1.837020993232727, + "learning_rate": 8.652589464325405e-05, + "loss": 1.6447, + "step": 25536 + }, + { + "epoch": 0.5674888888888889, + "grad_norm": 1.5858163833618164, + "learning_rate": 8.652144921093576e-05, + "loss": 1.742, + "step": 25537 + }, + { + "epoch": 0.5675111111111111, + "grad_norm": 1.4231958389282227, + "learning_rate": 8.651700377861747e-05, + "loss": 1.7786, + "step": 25538 + }, + { + "epoch": 0.5675333333333333, + "grad_norm": 1.6966981887817383, + "learning_rate": 8.651255834629918e-05, + "loss": 1.8127, + "step": 25539 + }, + { + "epoch": 0.5675555555555556, + "grad_norm": 1.7579383850097656, + "learning_rate": 8.650811291398089e-05, + "loss": 1.7069, + "step": 25540 + }, + { + "epoch": 0.5675777777777777, + "grad_norm": 1.9080697298049927, + "learning_rate": 8.65036674816626e-05, + "loss": 1.595, + "step": 25541 + }, + { + "epoch": 0.5676, + "grad_norm": 1.6535087823867798, + "learning_rate": 8.649922204934431e-05, + "loss": 1.7829, + "step": 25542 + }, + { + "epoch": 0.5676222222222223, + "grad_norm": 1.5289617776870728, + "learning_rate": 8.649477661702601e-05, + "loss": 1.5355, + "step": 25543 + }, + { + "epoch": 0.5676444444444444, + "grad_norm": 1.6726182699203491, + "learning_rate": 8.649033118470772e-05, + "loss": 1.6021, + "step": 25544 + }, + { + "epoch": 0.5676666666666667, + "grad_norm": 1.7910929918289185, + "learning_rate": 8.648588575238943e-05, + "loss": 1.7157, + "step": 25545 + }, + { + "epoch": 0.5676888888888889, + "grad_norm": 1.7049421072006226, + "learning_rate": 8.648144032007112e-05, + "loss": 2.0987, + "step": 25546 + }, + { + "epoch": 0.5677111111111112, + "grad_norm": 1.732585072517395, + "learning_rate": 8.647699488775283e-05, + "loss": 1.8902, + "step": 25547 + }, + { + "epoch": 0.5677333333333333, + "grad_norm": 1.5966678857803345, + "learning_rate": 8.647254945543454e-05, + "loss": 1.7052, + "step": 25548 + }, + { + "epoch": 0.5677555555555556, + "grad_norm": 2.260141372680664, + "learning_rate": 8.646810402311625e-05, + "loss": 1.7403, + "step": 25549 + }, + { + "epoch": 0.5677777777777778, + "grad_norm": 0.8711251020431519, + "learning_rate": 8.646365859079796e-05, + "loss": 0.4839, + "step": 25550 + }, + { + "epoch": 0.5678, + "grad_norm": 1.83405601978302, + "learning_rate": 8.645921315847967e-05, + "loss": 2.4266, + "step": 25551 + }, + { + "epoch": 0.5678222222222222, + "grad_norm": 1.4117273092269897, + "learning_rate": 8.645476772616138e-05, + "loss": 2.6935, + "step": 25552 + }, + { + "epoch": 0.5678444444444445, + "grad_norm": 1.2493290901184082, + "learning_rate": 8.645032229384308e-05, + "loss": 2.2871, + "step": 25553 + }, + { + "epoch": 0.5678666666666666, + "grad_norm": 1.019303798675537, + "learning_rate": 8.644587686152479e-05, + "loss": 1.1308, + "step": 25554 + }, + { + "epoch": 0.5678888888888889, + "grad_norm": 1.352089524269104, + "learning_rate": 8.64414314292065e-05, + "loss": 2.0598, + "step": 25555 + }, + { + "epoch": 0.5679111111111111, + "grad_norm": 1.6639633178710938, + "learning_rate": 8.643698599688819e-05, + "loss": 1.6875, + "step": 25556 + }, + { + "epoch": 0.5679333333333333, + "grad_norm": 1.5251015424728394, + "learning_rate": 8.643254056456991e-05, + "loss": 2.0902, + "step": 25557 + }, + { + "epoch": 0.5679555555555555, + "grad_norm": 1.4009424448013306, + "learning_rate": 8.642809513225162e-05, + "loss": 2.28, + "step": 25558 + }, + { + "epoch": 0.5679777777777778, + "grad_norm": 1.5966070890426636, + "learning_rate": 8.642364969993332e-05, + "loss": 2.2866, + "step": 25559 + }, + { + "epoch": 0.568, + "grad_norm": 1.0532625913619995, + "learning_rate": 8.641920426761503e-05, + "loss": 1.2138, + "step": 25560 + }, + { + "epoch": 0.5680222222222222, + "grad_norm": 1.5685704946517944, + "learning_rate": 8.641475883529674e-05, + "loss": 2.1516, + "step": 25561 + }, + { + "epoch": 0.5680444444444445, + "grad_norm": 1.348705768585205, + "learning_rate": 8.641031340297845e-05, + "loss": 1.7455, + "step": 25562 + }, + { + "epoch": 0.5680666666666667, + "grad_norm": 1.7225929498672485, + "learning_rate": 8.640586797066015e-05, + "loss": 2.1124, + "step": 25563 + }, + { + "epoch": 0.5680888888888889, + "grad_norm": 1.5577679872512817, + "learning_rate": 8.640142253834186e-05, + "loss": 1.857, + "step": 25564 + }, + { + "epoch": 0.5681111111111111, + "grad_norm": 2.3156497478485107, + "learning_rate": 8.639697710602356e-05, + "loss": 1.6415, + "step": 25565 + }, + { + "epoch": 0.5681333333333334, + "grad_norm": 1.426537036895752, + "learning_rate": 8.639253167370527e-05, + "loss": 1.5825, + "step": 25566 + }, + { + "epoch": 0.5681555555555555, + "grad_norm": 1.494095802307129, + "learning_rate": 8.638808624138698e-05, + "loss": 1.8617, + "step": 25567 + }, + { + "epoch": 0.5681777777777778, + "grad_norm": 1.7394038438796997, + "learning_rate": 8.63836408090687e-05, + "loss": 2.2626, + "step": 25568 + }, + { + "epoch": 0.5682, + "grad_norm": 1.7255122661590576, + "learning_rate": 8.637919537675039e-05, + "loss": 2.1076, + "step": 25569 + }, + { + "epoch": 0.5682222222222222, + "grad_norm": 1.3343063592910767, + "learning_rate": 8.63747499444321e-05, + "loss": 1.4298, + "step": 25570 + }, + { + "epoch": 0.5682444444444444, + "grad_norm": 1.4318054914474487, + "learning_rate": 8.637030451211381e-05, + "loss": 1.8676, + "step": 25571 + }, + { + "epoch": 0.5682666666666667, + "grad_norm": 1.3568811416625977, + "learning_rate": 8.636585907979552e-05, + "loss": 2.1097, + "step": 25572 + }, + { + "epoch": 0.5682888888888888, + "grad_norm": 1.5402088165283203, + "learning_rate": 8.636141364747721e-05, + "loss": 1.8708, + "step": 25573 + }, + { + "epoch": 0.5683111111111111, + "grad_norm": 1.8865468502044678, + "learning_rate": 8.635696821515892e-05, + "loss": 2.1948, + "step": 25574 + }, + { + "epoch": 0.5683333333333334, + "grad_norm": 1.824686884880066, + "learning_rate": 8.635252278284063e-05, + "loss": 2.2085, + "step": 25575 + }, + { + "epoch": 0.5683555555555555, + "grad_norm": 1.5992943048477173, + "learning_rate": 8.634807735052234e-05, + "loss": 1.893, + "step": 25576 + }, + { + "epoch": 0.5683777777777778, + "grad_norm": 1.4310848712921143, + "learning_rate": 8.634363191820405e-05, + "loss": 1.7461, + "step": 25577 + }, + { + "epoch": 0.5684, + "grad_norm": 1.4332364797592163, + "learning_rate": 8.633918648588576e-05, + "loss": 1.5877, + "step": 25578 + }, + { + "epoch": 0.5684222222222223, + "grad_norm": 1.5231653451919556, + "learning_rate": 8.633474105356746e-05, + "loss": 1.9179, + "step": 25579 + }, + { + "epoch": 0.5684444444444444, + "grad_norm": 1.6611709594726562, + "learning_rate": 8.633029562124917e-05, + "loss": 1.3753, + "step": 25580 + }, + { + "epoch": 0.5684666666666667, + "grad_norm": 1.5533298254013062, + "learning_rate": 8.632585018893088e-05, + "loss": 1.7141, + "step": 25581 + }, + { + "epoch": 0.5684888888888889, + "grad_norm": 1.7611892223358154, + "learning_rate": 8.632140475661259e-05, + "loss": 1.9242, + "step": 25582 + }, + { + "epoch": 0.5685111111111111, + "grad_norm": 1.7563116550445557, + "learning_rate": 8.631695932429428e-05, + "loss": 2.3091, + "step": 25583 + }, + { + "epoch": 0.5685333333333333, + "grad_norm": 1.78745436668396, + "learning_rate": 8.6312513891976e-05, + "loss": 1.7655, + "step": 25584 + }, + { + "epoch": 0.5685555555555556, + "grad_norm": 1.9148340225219727, + "learning_rate": 8.63080684596577e-05, + "loss": 1.9374, + "step": 25585 + }, + { + "epoch": 0.5685777777777777, + "grad_norm": 1.1031118631362915, + "learning_rate": 8.630362302733941e-05, + "loss": 0.9773, + "step": 25586 + }, + { + "epoch": 0.5686, + "grad_norm": 1.5591325759887695, + "learning_rate": 8.629917759502112e-05, + "loss": 1.8767, + "step": 25587 + }, + { + "epoch": 0.5686222222222223, + "grad_norm": 1.592635989189148, + "learning_rate": 8.629473216270283e-05, + "loss": 1.7193, + "step": 25588 + }, + { + "epoch": 0.5686444444444444, + "grad_norm": 1.505963683128357, + "learning_rate": 8.629028673038453e-05, + "loss": 1.8382, + "step": 25589 + }, + { + "epoch": 0.5686666666666667, + "grad_norm": 1.7800480127334595, + "learning_rate": 8.628584129806624e-05, + "loss": 1.7844, + "step": 25590 + }, + { + "epoch": 0.5686888888888889, + "grad_norm": 1.703181505203247, + "learning_rate": 8.628139586574795e-05, + "loss": 1.8816, + "step": 25591 + }, + { + "epoch": 0.5687111111111111, + "grad_norm": 1.9718616008758545, + "learning_rate": 8.627695043342966e-05, + "loss": 2.0569, + "step": 25592 + }, + { + "epoch": 0.5687333333333333, + "grad_norm": 2.016561508178711, + "learning_rate": 8.627250500111135e-05, + "loss": 2.1293, + "step": 25593 + }, + { + "epoch": 0.5687555555555556, + "grad_norm": 1.6165771484375, + "learning_rate": 8.626805956879308e-05, + "loss": 1.7433, + "step": 25594 + }, + { + "epoch": 0.5687777777777778, + "grad_norm": 1.7712388038635254, + "learning_rate": 8.626361413647479e-05, + "loss": 1.6944, + "step": 25595 + }, + { + "epoch": 0.5688, + "grad_norm": 1.5624693632125854, + "learning_rate": 8.625916870415648e-05, + "loss": 0.7768, + "step": 25596 + }, + { + "epoch": 0.5688222222222222, + "grad_norm": 1.7532920837402344, + "learning_rate": 8.625472327183819e-05, + "loss": 1.9357, + "step": 25597 + }, + { + "epoch": 0.5688444444444445, + "grad_norm": 1.4923877716064453, + "learning_rate": 8.62502778395199e-05, + "loss": 1.3285, + "step": 25598 + }, + { + "epoch": 0.5688666666666666, + "grad_norm": 1.7593766450881958, + "learning_rate": 8.624583240720161e-05, + "loss": 1.4869, + "step": 25599 + }, + { + "epoch": 0.5688888888888889, + "grad_norm": 2.146467447280884, + "learning_rate": 8.62413869748833e-05, + "loss": 1.6363, + "step": 25600 + }, + { + "epoch": 0.5689111111111111, + "grad_norm": 1.5496854782104492, + "learning_rate": 8.623694154256502e-05, + "loss": 2.6966, + "step": 25601 + }, + { + "epoch": 0.5689333333333333, + "grad_norm": 0.8772847652435303, + "learning_rate": 8.623249611024673e-05, + "loss": 1.1829, + "step": 25602 + }, + { + "epoch": 0.5689555555555555, + "grad_norm": 1.0426076650619507, + "learning_rate": 8.622805067792844e-05, + "loss": 1.0857, + "step": 25603 + }, + { + "epoch": 0.5689777777777778, + "grad_norm": 1.534347414970398, + "learning_rate": 8.622360524561015e-05, + "loss": 2.4706, + "step": 25604 + }, + { + "epoch": 0.569, + "grad_norm": 1.4586025476455688, + "learning_rate": 8.621915981329185e-05, + "loss": 2.4975, + "step": 25605 + }, + { + "epoch": 0.5690222222222222, + "grad_norm": 1.736374020576477, + "learning_rate": 8.621471438097355e-05, + "loss": 2.3636, + "step": 25606 + }, + { + "epoch": 0.5690444444444445, + "grad_norm": 1.303686499595642, + "learning_rate": 8.621026894865526e-05, + "loss": 1.356, + "step": 25607 + }, + { + "epoch": 0.5690666666666667, + "grad_norm": 1.3341532945632935, + "learning_rate": 8.620582351633697e-05, + "loss": 1.9148, + "step": 25608 + }, + { + "epoch": 0.5690888888888889, + "grad_norm": 1.4841862916946411, + "learning_rate": 8.620137808401868e-05, + "loss": 1.8911, + "step": 25609 + }, + { + "epoch": 0.5691111111111111, + "grad_norm": 1.6789429187774658, + "learning_rate": 8.619693265170038e-05, + "loss": 2.2117, + "step": 25610 + }, + { + "epoch": 0.5691333333333334, + "grad_norm": 0.1538681536912918, + "learning_rate": 8.619248721938209e-05, + "loss": 0.0183, + "step": 25611 + }, + { + "epoch": 0.5691555555555555, + "grad_norm": 1.7826683521270752, + "learning_rate": 8.61880417870638e-05, + "loss": 2.0759, + "step": 25612 + }, + { + "epoch": 0.5691777777777778, + "grad_norm": 1.508593201637268, + "learning_rate": 8.61835963547455e-05, + "loss": 2.2247, + "step": 25613 + }, + { + "epoch": 0.5692, + "grad_norm": 1.4140818119049072, + "learning_rate": 8.617915092242721e-05, + "loss": 1.613, + "step": 25614 + }, + { + "epoch": 0.5692222222222222, + "grad_norm": 1.6381505727767944, + "learning_rate": 8.617470549010892e-05, + "loss": 2.0254, + "step": 25615 + }, + { + "epoch": 0.5692444444444444, + "grad_norm": 1.8345551490783691, + "learning_rate": 8.617026005779062e-05, + "loss": 2.5182, + "step": 25616 + }, + { + "epoch": 0.5692666666666667, + "grad_norm": 1.2683861255645752, + "learning_rate": 8.616581462547233e-05, + "loss": 0.8326, + "step": 25617 + }, + { + "epoch": 0.5692888888888888, + "grad_norm": 1.8822568655014038, + "learning_rate": 8.616136919315404e-05, + "loss": 1.6483, + "step": 25618 + }, + { + "epoch": 0.5693111111111111, + "grad_norm": 1.725543737411499, + "learning_rate": 8.615692376083575e-05, + "loss": 2.064, + "step": 25619 + }, + { + "epoch": 0.5693333333333334, + "grad_norm": 2.3417868614196777, + "learning_rate": 8.615247832851744e-05, + "loss": 2.7043, + "step": 25620 + }, + { + "epoch": 0.5693555555555555, + "grad_norm": 1.4956707954406738, + "learning_rate": 8.614803289619915e-05, + "loss": 1.6743, + "step": 25621 + }, + { + "epoch": 0.5693777777777778, + "grad_norm": 2.0377418994903564, + "learning_rate": 8.614358746388086e-05, + "loss": 1.9123, + "step": 25622 + }, + { + "epoch": 0.5694, + "grad_norm": 1.8327478170394897, + "learning_rate": 8.613914203156257e-05, + "loss": 1.9568, + "step": 25623 + }, + { + "epoch": 0.5694222222222223, + "grad_norm": 1.4546701908111572, + "learning_rate": 8.613469659924428e-05, + "loss": 1.9495, + "step": 25624 + }, + { + "epoch": 0.5694444444444444, + "grad_norm": 1.475493311882019, + "learning_rate": 8.613025116692599e-05, + "loss": 1.5293, + "step": 25625 + }, + { + "epoch": 0.5694666666666667, + "grad_norm": 1.9478663206100464, + "learning_rate": 8.612580573460769e-05, + "loss": 2.33, + "step": 25626 + }, + { + "epoch": 0.5694888888888889, + "grad_norm": 1.5463484525680542, + "learning_rate": 8.61213603022894e-05, + "loss": 1.9062, + "step": 25627 + }, + { + "epoch": 0.5695111111111111, + "grad_norm": 1.953561544418335, + "learning_rate": 8.611691486997111e-05, + "loss": 1.9784, + "step": 25628 + }, + { + "epoch": 0.5695333333333333, + "grad_norm": 1.514666199684143, + "learning_rate": 8.611246943765282e-05, + "loss": 1.4286, + "step": 25629 + }, + { + "epoch": 0.5695555555555556, + "grad_norm": 1.5759919881820679, + "learning_rate": 8.610802400533451e-05, + "loss": 1.9392, + "step": 25630 + }, + { + "epoch": 0.5695777777777777, + "grad_norm": 0.9849022030830383, + "learning_rate": 8.610357857301624e-05, + "loss": 0.8068, + "step": 25631 + }, + { + "epoch": 0.5696, + "grad_norm": 1.7830392122268677, + "learning_rate": 8.609913314069795e-05, + "loss": 1.6613, + "step": 25632 + }, + { + "epoch": 0.5696222222222223, + "grad_norm": 1.3821425437927246, + "learning_rate": 8.609468770837964e-05, + "loss": 0.7226, + "step": 25633 + }, + { + "epoch": 0.5696444444444444, + "grad_norm": 0.15262767672538757, + "learning_rate": 8.609024227606135e-05, + "loss": 0.0273, + "step": 25634 + }, + { + "epoch": 0.5696666666666667, + "grad_norm": 1.5773595571517944, + "learning_rate": 8.608579684374306e-05, + "loss": 1.8314, + "step": 25635 + }, + { + "epoch": 0.5696888888888889, + "grad_norm": 1.7459397315979004, + "learning_rate": 8.608135141142476e-05, + "loss": 1.9914, + "step": 25636 + }, + { + "epoch": 0.5697111111111111, + "grad_norm": 1.83330237865448, + "learning_rate": 8.607690597910647e-05, + "loss": 1.8253, + "step": 25637 + }, + { + "epoch": 0.5697333333333333, + "grad_norm": 1.7723551988601685, + "learning_rate": 8.607246054678818e-05, + "loss": 2.0525, + "step": 25638 + }, + { + "epoch": 0.5697555555555556, + "grad_norm": 2.0471293926239014, + "learning_rate": 8.606801511446989e-05, + "loss": 1.9065, + "step": 25639 + }, + { + "epoch": 0.5697777777777778, + "grad_norm": 1.8356201648712158, + "learning_rate": 8.60635696821516e-05, + "loss": 2.0246, + "step": 25640 + }, + { + "epoch": 0.5698, + "grad_norm": 1.7239627838134766, + "learning_rate": 8.60591242498333e-05, + "loss": 2.2547, + "step": 25641 + }, + { + "epoch": 0.5698222222222222, + "grad_norm": 1.7103173732757568, + "learning_rate": 8.605467881751502e-05, + "loss": 1.8929, + "step": 25642 + }, + { + "epoch": 0.5698444444444445, + "grad_norm": 1.6156359910964966, + "learning_rate": 8.605023338519671e-05, + "loss": 1.8456, + "step": 25643 + }, + { + "epoch": 0.5698666666666666, + "grad_norm": 1.5640565156936646, + "learning_rate": 8.604578795287842e-05, + "loss": 1.6381, + "step": 25644 + }, + { + "epoch": 0.5698888888888889, + "grad_norm": 1.4334174394607544, + "learning_rate": 8.604134252056013e-05, + "loss": 1.3684, + "step": 25645 + }, + { + "epoch": 0.5699111111111111, + "grad_norm": 1.7092499732971191, + "learning_rate": 8.603689708824183e-05, + "loss": 1.8991, + "step": 25646 + }, + { + "epoch": 0.5699333333333333, + "grad_norm": 2.41092586517334, + "learning_rate": 8.603245165592354e-05, + "loss": 1.7, + "step": 25647 + }, + { + "epoch": 0.5699555555555555, + "grad_norm": 1.7601580619812012, + "learning_rate": 8.602800622360525e-05, + "loss": 1.7763, + "step": 25648 + }, + { + "epoch": 0.5699777777777778, + "grad_norm": 1.8733407258987427, + "learning_rate": 8.602356079128696e-05, + "loss": 1.5541, + "step": 25649 + }, + { + "epoch": 0.57, + "grad_norm": 2.374967575073242, + "learning_rate": 8.601911535896867e-05, + "loss": 1.8792, + "step": 25650 + }, + { + "epoch": 0.5700222222222222, + "grad_norm": 1.2829123735427856, + "learning_rate": 8.601466992665038e-05, + "loss": 2.0867, + "step": 25651 + }, + { + "epoch": 0.5700444444444445, + "grad_norm": 1.1918680667877197, + "learning_rate": 8.601022449433208e-05, + "loss": 1.3664, + "step": 25652 + }, + { + "epoch": 0.5700666666666667, + "grad_norm": 1.6968246698379517, + "learning_rate": 8.600577906201378e-05, + "loss": 2.6106, + "step": 25653 + }, + { + "epoch": 0.5700888888888889, + "grad_norm": 1.7334518432617188, + "learning_rate": 8.600133362969549e-05, + "loss": 2.3642, + "step": 25654 + }, + { + "epoch": 0.5701111111111111, + "grad_norm": 1.4948846101760864, + "learning_rate": 8.59968881973772e-05, + "loss": 1.7472, + "step": 25655 + }, + { + "epoch": 0.5701333333333334, + "grad_norm": 1.5138331651687622, + "learning_rate": 8.599244276505891e-05, + "loss": 1.9211, + "step": 25656 + }, + { + "epoch": 0.5701555555555555, + "grad_norm": 1.4922399520874023, + "learning_rate": 8.59879973327406e-05, + "loss": 1.9827, + "step": 25657 + }, + { + "epoch": 0.5701777777777778, + "grad_norm": 1.9236634969711304, + "learning_rate": 8.598355190042232e-05, + "loss": 1.9192, + "step": 25658 + }, + { + "epoch": 0.5702, + "grad_norm": 1.7816553115844727, + "learning_rate": 8.597910646810402e-05, + "loss": 2.12, + "step": 25659 + }, + { + "epoch": 0.5702222222222222, + "grad_norm": 1.5962737798690796, + "learning_rate": 8.597466103578573e-05, + "loss": 1.9601, + "step": 25660 + }, + { + "epoch": 0.5702444444444444, + "grad_norm": 1.6761876344680786, + "learning_rate": 8.597021560346744e-05, + "loss": 2.4774, + "step": 25661 + }, + { + "epoch": 0.5702666666666667, + "grad_norm": 1.5479689836502075, + "learning_rate": 8.596577017114915e-05, + "loss": 1.9717, + "step": 25662 + }, + { + "epoch": 0.5702888888888888, + "grad_norm": 1.867241621017456, + "learning_rate": 8.596132473883085e-05, + "loss": 1.8237, + "step": 25663 + }, + { + "epoch": 0.5703111111111111, + "grad_norm": 1.7697581052780151, + "learning_rate": 8.595687930651256e-05, + "loss": 2.1297, + "step": 25664 + }, + { + "epoch": 0.5703333333333334, + "grad_norm": 1.597586989402771, + "learning_rate": 8.595243387419427e-05, + "loss": 1.8777, + "step": 25665 + }, + { + "epoch": 0.5703555555555555, + "grad_norm": 1.3696242570877075, + "learning_rate": 8.594798844187598e-05, + "loss": 1.1399, + "step": 25666 + }, + { + "epoch": 0.5703777777777778, + "grad_norm": 1.4491081237792969, + "learning_rate": 8.594354300955767e-05, + "loss": 1.5301, + "step": 25667 + }, + { + "epoch": 0.5704, + "grad_norm": 1.6953177452087402, + "learning_rate": 8.59390975772394e-05, + "loss": 2.0117, + "step": 25668 + }, + { + "epoch": 0.5704222222222223, + "grad_norm": 1.524505376815796, + "learning_rate": 8.593465214492111e-05, + "loss": 1.8162, + "step": 25669 + }, + { + "epoch": 0.5704444444444444, + "grad_norm": 1.861527919769287, + "learning_rate": 8.59302067126028e-05, + "loss": 1.4832, + "step": 25670 + }, + { + "epoch": 0.5704666666666667, + "grad_norm": 1.8651635646820068, + "learning_rate": 8.592576128028451e-05, + "loss": 1.6583, + "step": 25671 + }, + { + "epoch": 0.5704888888888889, + "grad_norm": 2.0610780715942383, + "learning_rate": 8.592131584796622e-05, + "loss": 2.4729, + "step": 25672 + }, + { + "epoch": 0.5705111111111111, + "grad_norm": 1.5690243244171143, + "learning_rate": 8.591687041564792e-05, + "loss": 1.846, + "step": 25673 + }, + { + "epoch": 0.5705333333333333, + "grad_norm": 1.7951501607894897, + "learning_rate": 8.591242498332963e-05, + "loss": 2.2787, + "step": 25674 + }, + { + "epoch": 0.5705555555555556, + "grad_norm": 1.767663598060608, + "learning_rate": 8.590797955101134e-05, + "loss": 2.1076, + "step": 25675 + }, + { + "epoch": 0.5705777777777777, + "grad_norm": 1.3374791145324707, + "learning_rate": 8.590353411869305e-05, + "loss": 1.065, + "step": 25676 + }, + { + "epoch": 0.5706, + "grad_norm": 1.9195759296417236, + "learning_rate": 8.589908868637476e-05, + "loss": 2.0019, + "step": 25677 + }, + { + "epoch": 0.5706222222222223, + "grad_norm": 1.859966516494751, + "learning_rate": 8.589464325405647e-05, + "loss": 1.7332, + "step": 25678 + }, + { + "epoch": 0.5706444444444444, + "grad_norm": 1.7546013593673706, + "learning_rate": 8.589019782173818e-05, + "loss": 2.2428, + "step": 25679 + }, + { + "epoch": 0.5706666666666667, + "grad_norm": 1.5671427249908447, + "learning_rate": 8.588575238941987e-05, + "loss": 1.851, + "step": 25680 + }, + { + "epoch": 0.5706888888888889, + "grad_norm": 2.0177090167999268, + "learning_rate": 8.588130695710158e-05, + "loss": 2.1053, + "step": 25681 + }, + { + "epoch": 0.5707111111111111, + "grad_norm": 1.8132013082504272, + "learning_rate": 8.587686152478329e-05, + "loss": 2.0969, + "step": 25682 + }, + { + "epoch": 0.5707333333333333, + "grad_norm": 1.8457845449447632, + "learning_rate": 8.587241609246499e-05, + "loss": 1.8723, + "step": 25683 + }, + { + "epoch": 0.5707555555555556, + "grad_norm": 1.5866272449493408, + "learning_rate": 8.58679706601467e-05, + "loss": 1.4801, + "step": 25684 + }, + { + "epoch": 0.5707777777777778, + "grad_norm": 1.7102094888687134, + "learning_rate": 8.586352522782841e-05, + "loss": 1.9712, + "step": 25685 + }, + { + "epoch": 0.5708, + "grad_norm": 1.9478623867034912, + "learning_rate": 8.585907979551012e-05, + "loss": 2.2846, + "step": 25686 + }, + { + "epoch": 0.5708222222222222, + "grad_norm": 1.900810718536377, + "learning_rate": 8.585463436319183e-05, + "loss": 2.1442, + "step": 25687 + }, + { + "epoch": 0.5708444444444445, + "grad_norm": 1.5752862691879272, + "learning_rate": 8.585018893087354e-05, + "loss": 1.7731, + "step": 25688 + }, + { + "epoch": 0.5708666666666666, + "grad_norm": 1.7529854774475098, + "learning_rate": 8.584574349855525e-05, + "loss": 1.9615, + "step": 25689 + }, + { + "epoch": 0.5708888888888889, + "grad_norm": 1.776617407798767, + "learning_rate": 8.584129806623694e-05, + "loss": 1.8497, + "step": 25690 + }, + { + "epoch": 0.5709111111111111, + "grad_norm": 1.5052199363708496, + "learning_rate": 8.583685263391865e-05, + "loss": 1.7508, + "step": 25691 + }, + { + "epoch": 0.5709333333333333, + "grad_norm": 1.5465022325515747, + "learning_rate": 8.583240720160036e-05, + "loss": 1.6559, + "step": 25692 + }, + { + "epoch": 0.5709555555555555, + "grad_norm": 2.8852856159210205, + "learning_rate": 8.582796176928206e-05, + "loss": 1.7696, + "step": 25693 + }, + { + "epoch": 0.5709777777777778, + "grad_norm": 1.6870107650756836, + "learning_rate": 8.582351633696377e-05, + "loss": 1.3945, + "step": 25694 + }, + { + "epoch": 0.571, + "grad_norm": 1.3791446685791016, + "learning_rate": 8.581907090464548e-05, + "loss": 1.3369, + "step": 25695 + }, + { + "epoch": 0.5710222222222222, + "grad_norm": 1.6492143869400024, + "learning_rate": 8.581462547232719e-05, + "loss": 1.6589, + "step": 25696 + }, + { + "epoch": 0.5710444444444445, + "grad_norm": 1.7930784225463867, + "learning_rate": 8.58101800400089e-05, + "loss": 1.7328, + "step": 25697 + }, + { + "epoch": 0.5710666666666666, + "grad_norm": 1.76011061668396, + "learning_rate": 8.58057346076906e-05, + "loss": 1.3635, + "step": 25698 + }, + { + "epoch": 0.5710888888888889, + "grad_norm": 1.642624020576477, + "learning_rate": 8.580128917537231e-05, + "loss": 1.6686, + "step": 25699 + }, + { + "epoch": 0.5711111111111111, + "grad_norm": 1.4365448951721191, + "learning_rate": 8.579684374305401e-05, + "loss": 0.064, + "step": 25700 + }, + { + "epoch": 0.5711333333333334, + "grad_norm": 1.3209048509597778, + "learning_rate": 8.579239831073572e-05, + "loss": 2.0327, + "step": 25701 + }, + { + "epoch": 0.5711555555555555, + "grad_norm": 1.3216556310653687, + "learning_rate": 8.578795287841743e-05, + "loss": 2.2464, + "step": 25702 + }, + { + "epoch": 0.5711777777777778, + "grad_norm": 1.5542192459106445, + "learning_rate": 8.578350744609913e-05, + "loss": 2.3578, + "step": 25703 + }, + { + "epoch": 0.5712, + "grad_norm": 1.7194514274597168, + "learning_rate": 8.577906201378084e-05, + "loss": 2.7349, + "step": 25704 + }, + { + "epoch": 0.5712222222222222, + "grad_norm": 1.194679856300354, + "learning_rate": 8.577461658146256e-05, + "loss": 1.3133, + "step": 25705 + }, + { + "epoch": 0.5712444444444444, + "grad_norm": 1.6330630779266357, + "learning_rate": 8.577017114914427e-05, + "loss": 2.2848, + "step": 25706 + }, + { + "epoch": 0.5712666666666667, + "grad_norm": 2.1747875213623047, + "learning_rate": 8.576572571682596e-05, + "loss": 2.2944, + "step": 25707 + }, + { + "epoch": 0.5712888888888888, + "grad_norm": 2.29811429977417, + "learning_rate": 8.576128028450767e-05, + "loss": 2.1093, + "step": 25708 + }, + { + "epoch": 0.5713111111111111, + "grad_norm": 1.3702226877212524, + "learning_rate": 8.575683485218938e-05, + "loss": 1.4981, + "step": 25709 + }, + { + "epoch": 0.5713333333333334, + "grad_norm": 1.570976734161377, + "learning_rate": 8.575238941987108e-05, + "loss": 1.6642, + "step": 25710 + }, + { + "epoch": 0.5713555555555555, + "grad_norm": 1.8886852264404297, + "learning_rate": 8.574794398755279e-05, + "loss": 1.6982, + "step": 25711 + }, + { + "epoch": 0.5713777777777778, + "grad_norm": 1.566110610961914, + "learning_rate": 8.57434985552345e-05, + "loss": 1.504, + "step": 25712 + }, + { + "epoch": 0.5714, + "grad_norm": 1.0063490867614746, + "learning_rate": 8.573905312291621e-05, + "loss": 0.7414, + "step": 25713 + }, + { + "epoch": 0.5714222222222223, + "grad_norm": 2.0926096439361572, + "learning_rate": 8.573460769059792e-05, + "loss": 1.762, + "step": 25714 + }, + { + "epoch": 0.5714444444444444, + "grad_norm": 1.5999740362167358, + "learning_rate": 8.573016225827963e-05, + "loss": 2.0492, + "step": 25715 + }, + { + "epoch": 0.5714666666666667, + "grad_norm": 2.4232420921325684, + "learning_rate": 8.572571682596134e-05, + "loss": 1.1813, + "step": 25716 + }, + { + "epoch": 0.5714888888888889, + "grad_norm": 1.5547337532043457, + "learning_rate": 8.572127139364303e-05, + "loss": 1.7063, + "step": 25717 + }, + { + "epoch": 0.5715111111111111, + "grad_norm": 1.738980770111084, + "learning_rate": 8.571682596132474e-05, + "loss": 2.3619, + "step": 25718 + }, + { + "epoch": 0.5715333333333333, + "grad_norm": 1.4263365268707275, + "learning_rate": 8.571238052900645e-05, + "loss": 1.4464, + "step": 25719 + }, + { + "epoch": 0.5715555555555556, + "grad_norm": 1.5924991369247437, + "learning_rate": 8.570793509668815e-05, + "loss": 1.8442, + "step": 25720 + }, + { + "epoch": 0.5715777777777777, + "grad_norm": 1.668595314025879, + "learning_rate": 8.570348966436986e-05, + "loss": 1.5966, + "step": 25721 + }, + { + "epoch": 0.5716, + "grad_norm": 1.7621865272521973, + "learning_rate": 8.569904423205157e-05, + "loss": 1.8259, + "step": 25722 + }, + { + "epoch": 0.5716222222222223, + "grad_norm": 1.6255216598510742, + "learning_rate": 8.569459879973328e-05, + "loss": 2.0912, + "step": 25723 + }, + { + "epoch": 0.5716444444444444, + "grad_norm": 1.5257017612457275, + "learning_rate": 8.569015336741499e-05, + "loss": 1.9858, + "step": 25724 + }, + { + "epoch": 0.5716666666666667, + "grad_norm": 1.5033650398254395, + "learning_rate": 8.56857079350967e-05, + "loss": 1.5099, + "step": 25725 + }, + { + "epoch": 0.5716888888888889, + "grad_norm": 1.9311522245407104, + "learning_rate": 8.568126250277841e-05, + "loss": 2.0615, + "step": 25726 + }, + { + "epoch": 0.5717111111111111, + "grad_norm": 1.7489691972732544, + "learning_rate": 8.56768170704601e-05, + "loss": 2.4009, + "step": 25727 + }, + { + "epoch": 0.5717333333333333, + "grad_norm": 1.7931649684906006, + "learning_rate": 8.567237163814181e-05, + "loss": 2.1929, + "step": 25728 + }, + { + "epoch": 0.5717555555555556, + "grad_norm": 1.84356689453125, + "learning_rate": 8.566792620582352e-05, + "loss": 1.9432, + "step": 25729 + }, + { + "epoch": 0.5717777777777778, + "grad_norm": 1.5121773481369019, + "learning_rate": 8.566348077350522e-05, + "loss": 1.9769, + "step": 25730 + }, + { + "epoch": 0.5718, + "grad_norm": 1.5745084285736084, + "learning_rate": 8.565903534118693e-05, + "loss": 1.8561, + "step": 25731 + }, + { + "epoch": 0.5718222222222222, + "grad_norm": 1.262855887413025, + "learning_rate": 8.565458990886864e-05, + "loss": 1.272, + "step": 25732 + }, + { + "epoch": 0.5718444444444445, + "grad_norm": 1.2928316593170166, + "learning_rate": 8.565014447655035e-05, + "loss": 1.0977, + "step": 25733 + }, + { + "epoch": 0.5718666666666666, + "grad_norm": 0.956841766834259, + "learning_rate": 8.564569904423206e-05, + "loss": 0.7857, + "step": 25734 + }, + { + "epoch": 0.5718888888888889, + "grad_norm": 0.14648161828517914, + "learning_rate": 8.564125361191377e-05, + "loss": 0.0258, + "step": 25735 + }, + { + "epoch": 0.5719111111111111, + "grad_norm": 0.2709256112575531, + "learning_rate": 8.563680817959548e-05, + "loss": 0.0263, + "step": 25736 + }, + { + "epoch": 0.5719333333333333, + "grad_norm": 1.778686761856079, + "learning_rate": 8.563236274727717e-05, + "loss": 2.0404, + "step": 25737 + }, + { + "epoch": 0.5719555555555556, + "grad_norm": 1.7901958227157593, + "learning_rate": 8.562791731495888e-05, + "loss": 1.5201, + "step": 25738 + }, + { + "epoch": 0.5719777777777778, + "grad_norm": 1.8837966918945312, + "learning_rate": 8.562347188264059e-05, + "loss": 2.0632, + "step": 25739 + }, + { + "epoch": 0.572, + "grad_norm": 1.2224178314208984, + "learning_rate": 8.561902645032229e-05, + "loss": 0.9937, + "step": 25740 + }, + { + "epoch": 0.5720222222222222, + "grad_norm": 2.0332577228546143, + "learning_rate": 8.5614581018004e-05, + "loss": 2.1465, + "step": 25741 + }, + { + "epoch": 0.5720444444444445, + "grad_norm": 1.7775815725326538, + "learning_rate": 8.561013558568572e-05, + "loss": 1.8859, + "step": 25742 + }, + { + "epoch": 0.5720666666666666, + "grad_norm": 1.7891740798950195, + "learning_rate": 8.560569015336743e-05, + "loss": 1.9876, + "step": 25743 + }, + { + "epoch": 0.5720888888888889, + "grad_norm": 1.9831715822219849, + "learning_rate": 8.560124472104913e-05, + "loss": 1.9548, + "step": 25744 + }, + { + "epoch": 0.5721111111111111, + "grad_norm": 2.164729356765747, + "learning_rate": 8.559679928873084e-05, + "loss": 2.2877, + "step": 25745 + }, + { + "epoch": 0.5721333333333334, + "grad_norm": 1.6351230144500732, + "learning_rate": 8.559235385641254e-05, + "loss": 1.478, + "step": 25746 + }, + { + "epoch": 0.5721555555555555, + "grad_norm": 2.017198085784912, + "learning_rate": 8.558790842409424e-05, + "loss": 1.6519, + "step": 25747 + }, + { + "epoch": 0.5721777777777778, + "grad_norm": 1.556087851524353, + "learning_rate": 8.558346299177595e-05, + "loss": 1.5572, + "step": 25748 + }, + { + "epoch": 0.5722, + "grad_norm": 1.82135009765625, + "learning_rate": 8.557901755945766e-05, + "loss": 1.771, + "step": 25749 + }, + { + "epoch": 0.5722222222222222, + "grad_norm": 2.233006238937378, + "learning_rate": 8.557457212713936e-05, + "loss": 1.7638, + "step": 25750 + }, + { + "epoch": 0.5722444444444444, + "grad_norm": 1.3495924472808838, + "learning_rate": 8.557012669482108e-05, + "loss": 2.3135, + "step": 25751 + }, + { + "epoch": 0.5722666666666667, + "grad_norm": 1.718217134475708, + "learning_rate": 8.556568126250279e-05, + "loss": 2.3093, + "step": 25752 + }, + { + "epoch": 0.5722888888888888, + "grad_norm": 1.4839550256729126, + "learning_rate": 8.55612358301845e-05, + "loss": 2.6725, + "step": 25753 + }, + { + "epoch": 0.5723111111111111, + "grad_norm": 1.6164158582687378, + "learning_rate": 8.55567903978662e-05, + "loss": 2.3814, + "step": 25754 + }, + { + "epoch": 0.5723333333333334, + "grad_norm": 1.6461539268493652, + "learning_rate": 8.55523449655479e-05, + "loss": 2.1208, + "step": 25755 + }, + { + "epoch": 0.5723555555555555, + "grad_norm": 1.5425986051559448, + "learning_rate": 8.554789953322961e-05, + "loss": 2.1488, + "step": 25756 + }, + { + "epoch": 0.5723777777777778, + "grad_norm": 1.6818119287490845, + "learning_rate": 8.554345410091131e-05, + "loss": 2.2954, + "step": 25757 + }, + { + "epoch": 0.5724, + "grad_norm": 1.653141975402832, + "learning_rate": 8.553900866859302e-05, + "loss": 1.5584, + "step": 25758 + }, + { + "epoch": 0.5724222222222223, + "grad_norm": 1.3412193059921265, + "learning_rate": 8.553456323627473e-05, + "loss": 2.0158, + "step": 25759 + }, + { + "epoch": 0.5724444444444444, + "grad_norm": 1.7241566181182861, + "learning_rate": 8.553011780395644e-05, + "loss": 2.6476, + "step": 25760 + }, + { + "epoch": 0.5724666666666667, + "grad_norm": 1.53416907787323, + "learning_rate": 8.552567237163815e-05, + "loss": 2.087, + "step": 25761 + }, + { + "epoch": 0.5724888888888889, + "grad_norm": 1.8664326667785645, + "learning_rate": 8.552122693931986e-05, + "loss": 1.9608, + "step": 25762 + }, + { + "epoch": 0.5725111111111111, + "grad_norm": 1.7230284214019775, + "learning_rate": 8.551678150700157e-05, + "loss": 2.4159, + "step": 25763 + }, + { + "epoch": 0.5725333333333333, + "grad_norm": 1.5559327602386475, + "learning_rate": 8.551233607468326e-05, + "loss": 2.2237, + "step": 25764 + }, + { + "epoch": 0.5725555555555556, + "grad_norm": 1.481768012046814, + "learning_rate": 8.550789064236497e-05, + "loss": 2.1765, + "step": 25765 + }, + { + "epoch": 0.5725777777777777, + "grad_norm": 1.5719056129455566, + "learning_rate": 8.550344521004668e-05, + "loss": 1.9083, + "step": 25766 + }, + { + "epoch": 0.5726, + "grad_norm": 1.3396432399749756, + "learning_rate": 8.549899977772838e-05, + "loss": 1.7808, + "step": 25767 + }, + { + "epoch": 0.5726222222222223, + "grad_norm": 1.4139114618301392, + "learning_rate": 8.549455434541009e-05, + "loss": 1.8634, + "step": 25768 + }, + { + "epoch": 0.5726444444444444, + "grad_norm": 1.6188405752182007, + "learning_rate": 8.54901089130918e-05, + "loss": 2.0596, + "step": 25769 + }, + { + "epoch": 0.5726666666666667, + "grad_norm": 1.9198650121688843, + "learning_rate": 8.548566348077351e-05, + "loss": 1.7277, + "step": 25770 + }, + { + "epoch": 0.5726888888888889, + "grad_norm": 1.4434568881988525, + "learning_rate": 8.548121804845522e-05, + "loss": 1.742, + "step": 25771 + }, + { + "epoch": 0.5727111111111111, + "grad_norm": 1.572055697441101, + "learning_rate": 8.547677261613693e-05, + "loss": 1.8451, + "step": 25772 + }, + { + "epoch": 0.5727333333333333, + "grad_norm": 1.3459651470184326, + "learning_rate": 8.547232718381864e-05, + "loss": 1.3596, + "step": 25773 + }, + { + "epoch": 0.5727555555555556, + "grad_norm": 1.3569782972335815, + "learning_rate": 8.546788175150033e-05, + "loss": 1.7197, + "step": 25774 + }, + { + "epoch": 0.5727777777777778, + "grad_norm": 1.5443956851959229, + "learning_rate": 8.546343631918204e-05, + "loss": 1.6795, + "step": 25775 + }, + { + "epoch": 0.5728, + "grad_norm": 1.878814935684204, + "learning_rate": 8.545899088686375e-05, + "loss": 1.721, + "step": 25776 + }, + { + "epoch": 0.5728222222222222, + "grad_norm": 1.5908725261688232, + "learning_rate": 8.545454545454545e-05, + "loss": 2.0399, + "step": 25777 + }, + { + "epoch": 0.5728444444444445, + "grad_norm": 1.5259735584259033, + "learning_rate": 8.545010002222716e-05, + "loss": 1.6809, + "step": 25778 + }, + { + "epoch": 0.5728666666666666, + "grad_norm": 1.7970151901245117, + "learning_rate": 8.544565458990888e-05, + "loss": 2.0895, + "step": 25779 + }, + { + "epoch": 0.5728888888888889, + "grad_norm": 1.7510429620742798, + "learning_rate": 8.544120915759059e-05, + "loss": 2.0142, + "step": 25780 + }, + { + "epoch": 0.5729111111111111, + "grad_norm": 1.514723539352417, + "learning_rate": 8.543676372527229e-05, + "loss": 1.5025, + "step": 25781 + }, + { + "epoch": 0.5729333333333333, + "grad_norm": 1.241104245185852, + "learning_rate": 8.5432318292954e-05, + "loss": 1.2434, + "step": 25782 + }, + { + "epoch": 0.5729555555555556, + "grad_norm": 1.9197041988372803, + "learning_rate": 8.54278728606357e-05, + "loss": 1.7614, + "step": 25783 + }, + { + "epoch": 0.5729777777777778, + "grad_norm": 1.567199468612671, + "learning_rate": 8.54234274283174e-05, + "loss": 1.6021, + "step": 25784 + }, + { + "epoch": 0.573, + "grad_norm": 1.7817846536636353, + "learning_rate": 8.541898199599911e-05, + "loss": 1.8707, + "step": 25785 + }, + { + "epoch": 0.5730222222222222, + "grad_norm": 1.7348031997680664, + "learning_rate": 8.541453656368082e-05, + "loss": 1.771, + "step": 25786 + }, + { + "epoch": 0.5730444444444445, + "grad_norm": 2.0711631774902344, + "learning_rate": 8.541009113136252e-05, + "loss": 2.2206, + "step": 25787 + }, + { + "epoch": 0.5730666666666666, + "grad_norm": 1.8702232837677002, + "learning_rate": 8.540564569904424e-05, + "loss": 2.1554, + "step": 25788 + }, + { + "epoch": 0.5730888888888889, + "grad_norm": 2.070375680923462, + "learning_rate": 8.540120026672595e-05, + "loss": 1.9594, + "step": 25789 + }, + { + "epoch": 0.5731111111111111, + "grad_norm": 1.8700990676879883, + "learning_rate": 8.539675483440766e-05, + "loss": 1.8745, + "step": 25790 + }, + { + "epoch": 0.5731333333333334, + "grad_norm": 1.6505186557769775, + "learning_rate": 8.539230940208936e-05, + "loss": 1.6517, + "step": 25791 + }, + { + "epoch": 0.5731555555555555, + "grad_norm": 1.9102988243103027, + "learning_rate": 8.538786396977107e-05, + "loss": 2.0135, + "step": 25792 + }, + { + "epoch": 0.5731777777777778, + "grad_norm": 1.8339016437530518, + "learning_rate": 8.538341853745278e-05, + "loss": 1.7311, + "step": 25793 + }, + { + "epoch": 0.5732, + "grad_norm": 1.8846791982650757, + "learning_rate": 8.537897310513447e-05, + "loss": 2.0282, + "step": 25794 + }, + { + "epoch": 0.5732222222222222, + "grad_norm": 1.6500928401947021, + "learning_rate": 8.537452767281618e-05, + "loss": 1.734, + "step": 25795 + }, + { + "epoch": 0.5732444444444444, + "grad_norm": 1.7177919149398804, + "learning_rate": 8.537008224049789e-05, + "loss": 1.4664, + "step": 25796 + }, + { + "epoch": 0.5732666666666667, + "grad_norm": 1.9090465307235718, + "learning_rate": 8.53656368081796e-05, + "loss": 1.9601, + "step": 25797 + }, + { + "epoch": 0.5732888888888888, + "grad_norm": 1.3065292835235596, + "learning_rate": 8.536119137586131e-05, + "loss": 0.8583, + "step": 25798 + }, + { + "epoch": 0.5733111111111111, + "grad_norm": 1.5860507488250732, + "learning_rate": 8.535674594354302e-05, + "loss": 1.3665, + "step": 25799 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.4950746297836304, + "learning_rate": 8.535230051122473e-05, + "loss": 0.974, + "step": 25800 + }, + { + "epoch": 0.5733555555555555, + "grad_norm": 1.5080500841140747, + "learning_rate": 8.534785507890642e-05, + "loss": 2.2007, + "step": 25801 + }, + { + "epoch": 0.5733777777777778, + "grad_norm": 1.8065223693847656, + "learning_rate": 8.534340964658813e-05, + "loss": 2.7459, + "step": 25802 + }, + { + "epoch": 0.5734, + "grad_norm": 1.1576310396194458, + "learning_rate": 8.533896421426984e-05, + "loss": 1.1024, + "step": 25803 + }, + { + "epoch": 0.5734222222222223, + "grad_norm": 1.37685227394104, + "learning_rate": 8.533451878195154e-05, + "loss": 2.098, + "step": 25804 + }, + { + "epoch": 0.5734444444444444, + "grad_norm": 1.4093037843704224, + "learning_rate": 8.533007334963325e-05, + "loss": 2.2798, + "step": 25805 + }, + { + "epoch": 0.5734666666666667, + "grad_norm": 1.5947177410125732, + "learning_rate": 8.532562791731496e-05, + "loss": 2.0701, + "step": 25806 + }, + { + "epoch": 0.5734888888888889, + "grad_norm": 2.1120340824127197, + "learning_rate": 8.532118248499667e-05, + "loss": 1.6964, + "step": 25807 + }, + { + "epoch": 0.5735111111111111, + "grad_norm": 1.8550314903259277, + "learning_rate": 8.531673705267838e-05, + "loss": 2.5111, + "step": 25808 + }, + { + "epoch": 0.5735333333333333, + "grad_norm": 2.037273645401001, + "learning_rate": 8.531229162036009e-05, + "loss": 1.858, + "step": 25809 + }, + { + "epoch": 0.5735555555555556, + "grad_norm": 1.458878517150879, + "learning_rate": 8.53078461880418e-05, + "loss": 1.8525, + "step": 25810 + }, + { + "epoch": 0.5735777777777777, + "grad_norm": 0.2872360348701477, + "learning_rate": 8.53034007557235e-05, + "loss": 0.0209, + "step": 25811 + }, + { + "epoch": 0.5736, + "grad_norm": 1.529571294784546, + "learning_rate": 8.52989553234052e-05, + "loss": 1.8411, + "step": 25812 + }, + { + "epoch": 0.5736222222222223, + "grad_norm": 1.5923453569412231, + "learning_rate": 8.529450989108691e-05, + "loss": 1.8027, + "step": 25813 + }, + { + "epoch": 0.5736444444444444, + "grad_norm": 1.7380670309066772, + "learning_rate": 8.529006445876861e-05, + "loss": 2.4903, + "step": 25814 + }, + { + "epoch": 0.5736666666666667, + "grad_norm": 2.0399043560028076, + "learning_rate": 8.528561902645032e-05, + "loss": 2.3516, + "step": 25815 + }, + { + "epoch": 0.5736888888888889, + "grad_norm": 1.5405837297439575, + "learning_rate": 8.528117359413204e-05, + "loss": 1.7951, + "step": 25816 + }, + { + "epoch": 0.5737111111111111, + "grad_norm": 1.6100730895996094, + "learning_rate": 8.527672816181375e-05, + "loss": 1.955, + "step": 25817 + }, + { + "epoch": 0.5737333333333333, + "grad_norm": 1.479048252105713, + "learning_rate": 8.527228272949545e-05, + "loss": 1.6049, + "step": 25818 + }, + { + "epoch": 0.5737555555555556, + "grad_norm": 1.6415687799453735, + "learning_rate": 8.526783729717716e-05, + "loss": 2.1413, + "step": 25819 + }, + { + "epoch": 0.5737777777777778, + "grad_norm": 1.633242130279541, + "learning_rate": 8.526339186485887e-05, + "loss": 1.9074, + "step": 25820 + }, + { + "epoch": 0.5738, + "grad_norm": 1.722865343093872, + "learning_rate": 8.525894643254056e-05, + "loss": 2.2105, + "step": 25821 + }, + { + "epoch": 0.5738222222222222, + "grad_norm": 1.6807760000228882, + "learning_rate": 8.525450100022227e-05, + "loss": 2.1285, + "step": 25822 + }, + { + "epoch": 0.5738444444444445, + "grad_norm": 1.3529731035232544, + "learning_rate": 8.525005556790398e-05, + "loss": 1.7808, + "step": 25823 + }, + { + "epoch": 0.5738666666666666, + "grad_norm": 1.7612981796264648, + "learning_rate": 8.524561013558568e-05, + "loss": 1.8677, + "step": 25824 + }, + { + "epoch": 0.5738888888888889, + "grad_norm": 1.543408751487732, + "learning_rate": 8.52411647032674e-05, + "loss": 1.9944, + "step": 25825 + }, + { + "epoch": 0.5739111111111111, + "grad_norm": 1.546540379524231, + "learning_rate": 8.523671927094911e-05, + "loss": 1.942, + "step": 25826 + }, + { + "epoch": 0.5739333333333333, + "grad_norm": 1.7032814025878906, + "learning_rate": 8.523227383863082e-05, + "loss": 1.7724, + "step": 25827 + }, + { + "epoch": 0.5739555555555556, + "grad_norm": 1.6998419761657715, + "learning_rate": 8.522782840631252e-05, + "loss": 2.0003, + "step": 25828 + }, + { + "epoch": 0.5739777777777778, + "grad_norm": 1.5656496286392212, + "learning_rate": 8.522338297399423e-05, + "loss": 1.8617, + "step": 25829 + }, + { + "epoch": 0.574, + "grad_norm": 1.3194990158081055, + "learning_rate": 8.521893754167594e-05, + "loss": 1.3988, + "step": 25830 + }, + { + "epoch": 0.5740222222222222, + "grad_norm": 1.5632909536361694, + "learning_rate": 8.521449210935763e-05, + "loss": 1.8376, + "step": 25831 + }, + { + "epoch": 0.5740444444444445, + "grad_norm": 1.4915846586227417, + "learning_rate": 8.521004667703934e-05, + "loss": 1.9154, + "step": 25832 + }, + { + "epoch": 0.5740666666666666, + "grad_norm": 1.458807349205017, + "learning_rate": 8.520560124472105e-05, + "loss": 1.8826, + "step": 25833 + }, + { + "epoch": 0.5740888888888889, + "grad_norm": 1.8433349132537842, + "learning_rate": 8.520115581240276e-05, + "loss": 2.4107, + "step": 25834 + }, + { + "epoch": 0.5741111111111111, + "grad_norm": 1.5098316669464111, + "learning_rate": 8.519671038008447e-05, + "loss": 1.8326, + "step": 25835 + }, + { + "epoch": 0.5741333333333334, + "grad_norm": 1.8357150554656982, + "learning_rate": 8.519226494776618e-05, + "loss": 1.5214, + "step": 25836 + }, + { + "epoch": 0.5741555555555555, + "grad_norm": 1.6379412412643433, + "learning_rate": 8.518781951544789e-05, + "loss": 1.651, + "step": 25837 + }, + { + "epoch": 0.5741777777777778, + "grad_norm": 1.544201374053955, + "learning_rate": 8.518337408312959e-05, + "loss": 1.8596, + "step": 25838 + }, + { + "epoch": 0.5742, + "grad_norm": 1.7644819021224976, + "learning_rate": 8.51789286508113e-05, + "loss": 2.0177, + "step": 25839 + }, + { + "epoch": 0.5742222222222222, + "grad_norm": 1.0269098281860352, + "learning_rate": 8.5174483218493e-05, + "loss": 0.6541, + "step": 25840 + }, + { + "epoch": 0.5742444444444444, + "grad_norm": 2.171380043029785, + "learning_rate": 8.51700377861747e-05, + "loss": 2.2374, + "step": 25841 + }, + { + "epoch": 0.5742666666666667, + "grad_norm": 1.542111873626709, + "learning_rate": 8.516559235385641e-05, + "loss": 1.855, + "step": 25842 + }, + { + "epoch": 0.5742888888888888, + "grad_norm": 2.249403238296509, + "learning_rate": 8.516114692153812e-05, + "loss": 1.9984, + "step": 25843 + }, + { + "epoch": 0.5743111111111111, + "grad_norm": 1.7134921550750732, + "learning_rate": 8.515670148921983e-05, + "loss": 1.8807, + "step": 25844 + }, + { + "epoch": 0.5743333333333334, + "grad_norm": 1.8306804895401, + "learning_rate": 8.515225605690154e-05, + "loss": 2.2559, + "step": 25845 + }, + { + "epoch": 0.5743555555555555, + "grad_norm": 1.6717336177825928, + "learning_rate": 8.514781062458325e-05, + "loss": 1.7139, + "step": 25846 + }, + { + "epoch": 0.5743777777777778, + "grad_norm": 1.6805630922317505, + "learning_rate": 8.514336519226496e-05, + "loss": 1.9044, + "step": 25847 + }, + { + "epoch": 0.5744, + "grad_norm": 1.1804941892623901, + "learning_rate": 8.513891975994666e-05, + "loss": 0.8815, + "step": 25848 + }, + { + "epoch": 0.5744222222222222, + "grad_norm": 1.9663548469543457, + "learning_rate": 8.513447432762836e-05, + "loss": 1.888, + "step": 25849 + }, + { + "epoch": 0.5744444444444444, + "grad_norm": 1.8257575035095215, + "learning_rate": 8.513002889531007e-05, + "loss": 1.6505, + "step": 25850 + }, + { + "epoch": 0.5744666666666667, + "grad_norm": 1.527230143547058, + "learning_rate": 8.512558346299177e-05, + "loss": 2.4513, + "step": 25851 + }, + { + "epoch": 0.5744888888888889, + "grad_norm": 1.4793553352355957, + "learning_rate": 8.512113803067348e-05, + "loss": 2.4797, + "step": 25852 + }, + { + "epoch": 0.5745111111111111, + "grad_norm": 1.4620087146759033, + "learning_rate": 8.51166925983552e-05, + "loss": 2.487, + "step": 25853 + }, + { + "epoch": 0.5745333333333333, + "grad_norm": 1.2276864051818848, + "learning_rate": 8.51122471660369e-05, + "loss": 1.3152, + "step": 25854 + }, + { + "epoch": 0.5745555555555556, + "grad_norm": 1.73105788230896, + "learning_rate": 8.510780173371861e-05, + "loss": 1.9796, + "step": 25855 + }, + { + "epoch": 0.5745777777777777, + "grad_norm": 1.2348138093948364, + "learning_rate": 8.510335630140032e-05, + "loss": 1.4845, + "step": 25856 + }, + { + "epoch": 0.5746, + "grad_norm": 1.5831315517425537, + "learning_rate": 8.509891086908203e-05, + "loss": 2.0515, + "step": 25857 + }, + { + "epoch": 0.5746222222222223, + "grad_norm": 1.4946595430374146, + "learning_rate": 8.509446543676372e-05, + "loss": 2.227, + "step": 25858 + }, + { + "epoch": 0.5746444444444444, + "grad_norm": 1.8229634761810303, + "learning_rate": 8.509002000444543e-05, + "loss": 2.4852, + "step": 25859 + }, + { + "epoch": 0.5746666666666667, + "grad_norm": 1.4324060678482056, + "learning_rate": 8.508557457212714e-05, + "loss": 2.1294, + "step": 25860 + }, + { + "epoch": 0.5746888888888889, + "grad_norm": 1.3680497407913208, + "learning_rate": 8.508112913980884e-05, + "loss": 1.8083, + "step": 25861 + }, + { + "epoch": 0.5747111111111111, + "grad_norm": 1.4841502904891968, + "learning_rate": 8.507668370749056e-05, + "loss": 2.3155, + "step": 25862 + }, + { + "epoch": 0.5747333333333333, + "grad_norm": 1.6526727676391602, + "learning_rate": 8.507223827517227e-05, + "loss": 2.0219, + "step": 25863 + }, + { + "epoch": 0.5747555555555556, + "grad_norm": 1.7587121725082397, + "learning_rate": 8.506779284285397e-05, + "loss": 1.9555, + "step": 25864 + }, + { + "epoch": 0.5747777777777778, + "grad_norm": 1.8311957120895386, + "learning_rate": 8.506334741053568e-05, + "loss": 2.3431, + "step": 25865 + }, + { + "epoch": 0.5748, + "grad_norm": 1.590606451034546, + "learning_rate": 8.505890197821739e-05, + "loss": 1.7907, + "step": 25866 + }, + { + "epoch": 0.5748222222222222, + "grad_norm": 1.7074474096298218, + "learning_rate": 8.50544565458991e-05, + "loss": 2.0113, + "step": 25867 + }, + { + "epoch": 0.5748444444444445, + "grad_norm": 1.7187414169311523, + "learning_rate": 8.505001111358079e-05, + "loss": 1.7066, + "step": 25868 + }, + { + "epoch": 0.5748666666666666, + "grad_norm": 1.5686930418014526, + "learning_rate": 8.50455656812625e-05, + "loss": 1.9829, + "step": 25869 + }, + { + "epoch": 0.5748888888888889, + "grad_norm": 1.6503736972808838, + "learning_rate": 8.504112024894421e-05, + "loss": 2.0525, + "step": 25870 + }, + { + "epoch": 0.5749111111111112, + "grad_norm": 1.5381848812103271, + "learning_rate": 8.503667481662592e-05, + "loss": 2.1522, + "step": 25871 + }, + { + "epoch": 0.5749333333333333, + "grad_norm": 1.5714423656463623, + "learning_rate": 8.503222938430763e-05, + "loss": 1.8523, + "step": 25872 + }, + { + "epoch": 0.5749555555555556, + "grad_norm": 1.486332893371582, + "learning_rate": 8.502778395198934e-05, + "loss": 1.8179, + "step": 25873 + }, + { + "epoch": 0.5749777777777778, + "grad_norm": 1.8666901588439941, + "learning_rate": 8.502333851967105e-05, + "loss": 1.9923, + "step": 25874 + }, + { + "epoch": 0.575, + "grad_norm": 1.815557837486267, + "learning_rate": 8.501889308735275e-05, + "loss": 2.1112, + "step": 25875 + }, + { + "epoch": 0.5750222222222222, + "grad_norm": 1.8054728507995605, + "learning_rate": 8.501444765503446e-05, + "loss": 1.5906, + "step": 25876 + }, + { + "epoch": 0.5750444444444445, + "grad_norm": 1.6779707670211792, + "learning_rate": 8.501000222271617e-05, + "loss": 1.8326, + "step": 25877 + }, + { + "epoch": 0.5750666666666666, + "grad_norm": 1.5057170391082764, + "learning_rate": 8.500555679039786e-05, + "loss": 1.8389, + "step": 25878 + }, + { + "epoch": 0.5750888888888889, + "grad_norm": 2.190169334411621, + "learning_rate": 8.500111135807957e-05, + "loss": 2.0456, + "step": 25879 + }, + { + "epoch": 0.5751111111111111, + "grad_norm": 1.7067064046859741, + "learning_rate": 8.499666592576128e-05, + "loss": 1.9427, + "step": 25880 + }, + { + "epoch": 0.5751333333333334, + "grad_norm": 1.3328043222427368, + "learning_rate": 8.499222049344299e-05, + "loss": 1.436, + "step": 25881 + }, + { + "epoch": 0.5751555555555555, + "grad_norm": 1.5364593267440796, + "learning_rate": 8.49877750611247e-05, + "loss": 1.7917, + "step": 25882 + }, + { + "epoch": 0.5751777777777778, + "grad_norm": 1.9151912927627563, + "learning_rate": 8.498332962880641e-05, + "loss": 2.1354, + "step": 25883 + }, + { + "epoch": 0.5752, + "grad_norm": 1.7001210451126099, + "learning_rate": 8.497888419648812e-05, + "loss": 2.0169, + "step": 25884 + }, + { + "epoch": 0.5752222222222222, + "grad_norm": 1.3206290006637573, + "learning_rate": 8.497443876416982e-05, + "loss": 1.386, + "step": 25885 + }, + { + "epoch": 0.5752444444444444, + "grad_norm": 1.776057243347168, + "learning_rate": 8.496999333185153e-05, + "loss": 2.0317, + "step": 25886 + }, + { + "epoch": 0.5752666666666667, + "grad_norm": 1.4158116579055786, + "learning_rate": 8.496554789953324e-05, + "loss": 1.4969, + "step": 25887 + }, + { + "epoch": 0.5752888888888888, + "grad_norm": 1.621314287185669, + "learning_rate": 8.496110246721493e-05, + "loss": 1.7737, + "step": 25888 + }, + { + "epoch": 0.5753111111111111, + "grad_norm": 1.9011842012405396, + "learning_rate": 8.495665703489664e-05, + "loss": 2.5415, + "step": 25889 + }, + { + "epoch": 0.5753333333333334, + "grad_norm": 2.2260749340057373, + "learning_rate": 8.495221160257836e-05, + "loss": 1.8284, + "step": 25890 + }, + { + "epoch": 0.5753555555555555, + "grad_norm": 1.7880183458328247, + "learning_rate": 8.494776617026006e-05, + "loss": 1.5225, + "step": 25891 + }, + { + "epoch": 0.5753777777777778, + "grad_norm": 1.56460702419281, + "learning_rate": 8.494332073794177e-05, + "loss": 1.9047, + "step": 25892 + }, + { + "epoch": 0.5754, + "grad_norm": 1.4668025970458984, + "learning_rate": 8.493887530562348e-05, + "loss": 1.4527, + "step": 25893 + }, + { + "epoch": 0.5754222222222222, + "grad_norm": 1.743921160697937, + "learning_rate": 8.493442987330519e-05, + "loss": 1.5777, + "step": 25894 + }, + { + "epoch": 0.5754444444444444, + "grad_norm": 1.5977262258529663, + "learning_rate": 8.492998444098689e-05, + "loss": 1.4955, + "step": 25895 + }, + { + "epoch": 0.5754666666666667, + "grad_norm": 2.3820276260375977, + "learning_rate": 8.49255390086686e-05, + "loss": 2.1317, + "step": 25896 + }, + { + "epoch": 0.5754888888888889, + "grad_norm": 1.623916745185852, + "learning_rate": 8.49210935763503e-05, + "loss": 1.6631, + "step": 25897 + }, + { + "epoch": 0.5755111111111111, + "grad_norm": 2.083130359649658, + "learning_rate": 8.4916648144032e-05, + "loss": 2.0471, + "step": 25898 + }, + { + "epoch": 0.5755333333333333, + "grad_norm": 1.6949437856674194, + "learning_rate": 8.491220271171372e-05, + "loss": 1.7325, + "step": 25899 + }, + { + "epoch": 0.5755555555555556, + "grad_norm": 1.465520977973938, + "learning_rate": 8.490775727939543e-05, + "loss": 1.5087, + "step": 25900 + }, + { + "epoch": 0.5755777777777777, + "grad_norm": 2.0363380908966064, + "learning_rate": 8.490331184707713e-05, + "loss": 2.9875, + "step": 25901 + }, + { + "epoch": 0.5756, + "grad_norm": 1.3037781715393066, + "learning_rate": 8.489886641475884e-05, + "loss": 2.2702, + "step": 25902 + }, + { + "epoch": 0.5756222222222223, + "grad_norm": 1.4280723333358765, + "learning_rate": 8.489442098244055e-05, + "loss": 2.4732, + "step": 25903 + }, + { + "epoch": 0.5756444444444444, + "grad_norm": 0.8468064665794373, + "learning_rate": 8.488997555012226e-05, + "loss": 1.0019, + "step": 25904 + }, + { + "epoch": 0.5756666666666667, + "grad_norm": 1.372684121131897, + "learning_rate": 8.488553011780395e-05, + "loss": 2.4093, + "step": 25905 + }, + { + "epoch": 0.5756888888888889, + "grad_norm": 1.5047813653945923, + "learning_rate": 8.488108468548566e-05, + "loss": 2.5721, + "step": 25906 + }, + { + "epoch": 0.5757111111111111, + "grad_norm": 1.5781632661819458, + "learning_rate": 8.487663925316737e-05, + "loss": 2.0417, + "step": 25907 + }, + { + "epoch": 0.5757333333333333, + "grad_norm": 1.6170930862426758, + "learning_rate": 8.487219382084908e-05, + "loss": 2.135, + "step": 25908 + }, + { + "epoch": 0.5757555555555556, + "grad_norm": 1.679280400276184, + "learning_rate": 8.486774838853079e-05, + "loss": 2.2844, + "step": 25909 + }, + { + "epoch": 0.5757777777777778, + "grad_norm": 1.9144145250320435, + "learning_rate": 8.48633029562125e-05, + "loss": 2.1399, + "step": 25910 + }, + { + "epoch": 0.5758, + "grad_norm": 1.6415542364120483, + "learning_rate": 8.48588575238942e-05, + "loss": 1.2934, + "step": 25911 + }, + { + "epoch": 0.5758222222222222, + "grad_norm": 1.952968955039978, + "learning_rate": 8.485441209157591e-05, + "loss": 2.2187, + "step": 25912 + }, + { + "epoch": 0.5758444444444445, + "grad_norm": 1.1734278202056885, + "learning_rate": 8.484996665925762e-05, + "loss": 1.1814, + "step": 25913 + }, + { + "epoch": 0.5758666666666666, + "grad_norm": 1.1765111684799194, + "learning_rate": 8.484552122693933e-05, + "loss": 1.1921, + "step": 25914 + }, + { + "epoch": 0.5758888888888889, + "grad_norm": 1.738513708114624, + "learning_rate": 8.484107579462102e-05, + "loss": 1.9746, + "step": 25915 + }, + { + "epoch": 0.5759111111111112, + "grad_norm": 1.661946177482605, + "learning_rate": 8.483663036230273e-05, + "loss": 1.9189, + "step": 25916 + }, + { + "epoch": 0.5759333333333333, + "grad_norm": 1.8266701698303223, + "learning_rate": 8.483218492998444e-05, + "loss": 2.4225, + "step": 25917 + }, + { + "epoch": 0.5759555555555556, + "grad_norm": 1.9770067930221558, + "learning_rate": 8.482773949766615e-05, + "loss": 2.1569, + "step": 25918 + }, + { + "epoch": 0.5759777777777778, + "grad_norm": 1.2214926481246948, + "learning_rate": 8.482329406534786e-05, + "loss": 1.1254, + "step": 25919 + }, + { + "epoch": 0.576, + "grad_norm": 1.5936335325241089, + "learning_rate": 8.481884863302957e-05, + "loss": 1.7393, + "step": 25920 + }, + { + "epoch": 0.5760222222222222, + "grad_norm": 1.5994678735733032, + "learning_rate": 8.481440320071128e-05, + "loss": 1.8985, + "step": 25921 + }, + { + "epoch": 0.5760444444444445, + "grad_norm": 1.9072351455688477, + "learning_rate": 8.480995776839298e-05, + "loss": 2.0587, + "step": 25922 + }, + { + "epoch": 0.5760666666666666, + "grad_norm": 1.9863942861557007, + "learning_rate": 8.480551233607469e-05, + "loss": 1.991, + "step": 25923 + }, + { + "epoch": 0.5760888888888889, + "grad_norm": 1.942941427230835, + "learning_rate": 8.48010669037564e-05, + "loss": 2.2744, + "step": 25924 + }, + { + "epoch": 0.5761111111111111, + "grad_norm": 1.8738967180252075, + "learning_rate": 8.479662147143809e-05, + "loss": 1.9959, + "step": 25925 + }, + { + "epoch": 0.5761333333333334, + "grad_norm": 1.520711064338684, + "learning_rate": 8.47921760391198e-05, + "loss": 1.8296, + "step": 25926 + }, + { + "epoch": 0.5761555555555555, + "grad_norm": 1.6332484483718872, + "learning_rate": 8.478773060680153e-05, + "loss": 1.8679, + "step": 25927 + }, + { + "epoch": 0.5761777777777778, + "grad_norm": 1.8324124813079834, + "learning_rate": 8.478328517448322e-05, + "loss": 1.9311, + "step": 25928 + }, + { + "epoch": 0.5762, + "grad_norm": 1.7656601667404175, + "learning_rate": 8.477883974216493e-05, + "loss": 1.8967, + "step": 25929 + }, + { + "epoch": 0.5762222222222222, + "grad_norm": 1.652225375175476, + "learning_rate": 8.477439430984664e-05, + "loss": 1.8419, + "step": 25930 + }, + { + "epoch": 0.5762444444444444, + "grad_norm": 2.149226665496826, + "learning_rate": 8.476994887752835e-05, + "loss": 2.0951, + "step": 25931 + }, + { + "epoch": 0.5762666666666667, + "grad_norm": 1.4774209260940552, + "learning_rate": 8.476550344521005e-05, + "loss": 1.8889, + "step": 25932 + }, + { + "epoch": 0.5762888888888889, + "grad_norm": 1.4891711473464966, + "learning_rate": 8.476105801289176e-05, + "loss": 1.5859, + "step": 25933 + }, + { + "epoch": 0.5763111111111111, + "grad_norm": 2.0572304725646973, + "learning_rate": 8.475661258057347e-05, + "loss": 2.1509, + "step": 25934 + }, + { + "epoch": 0.5763333333333334, + "grad_norm": 2.207815647125244, + "learning_rate": 8.475216714825516e-05, + "loss": 2.0996, + "step": 25935 + }, + { + "epoch": 0.5763555555555555, + "grad_norm": 2.040614128112793, + "learning_rate": 8.474772171593688e-05, + "loss": 2.0951, + "step": 25936 + }, + { + "epoch": 0.5763777777777778, + "grad_norm": 1.738759160041809, + "learning_rate": 8.47432762836186e-05, + "loss": 2.4787, + "step": 25937 + }, + { + "epoch": 0.5764, + "grad_norm": 2.2333452701568604, + "learning_rate": 8.473883085130029e-05, + "loss": 2.0454, + "step": 25938 + }, + { + "epoch": 0.5764222222222222, + "grad_norm": 1.803543210029602, + "learning_rate": 8.4734385418982e-05, + "loss": 1.9241, + "step": 25939 + }, + { + "epoch": 0.5764444444444444, + "grad_norm": 1.8549774885177612, + "learning_rate": 8.472993998666371e-05, + "loss": 2.135, + "step": 25940 + }, + { + "epoch": 0.5764666666666667, + "grad_norm": 2.093702554702759, + "learning_rate": 8.472549455434542e-05, + "loss": 1.8778, + "step": 25941 + }, + { + "epoch": 0.5764888888888889, + "grad_norm": 1.755881428718567, + "learning_rate": 8.472104912202712e-05, + "loss": 2.0415, + "step": 25942 + }, + { + "epoch": 0.5765111111111111, + "grad_norm": 2.4870405197143555, + "learning_rate": 8.471660368970882e-05, + "loss": 1.9493, + "step": 25943 + }, + { + "epoch": 0.5765333333333333, + "grad_norm": 1.4329767227172852, + "learning_rate": 8.471215825739053e-05, + "loss": 1.6417, + "step": 25944 + }, + { + "epoch": 0.5765555555555556, + "grad_norm": 1.9608339071273804, + "learning_rate": 8.470771282507224e-05, + "loss": 2.0607, + "step": 25945 + }, + { + "epoch": 0.5765777777777777, + "grad_norm": 1.8790415525436401, + "learning_rate": 8.470326739275395e-05, + "loss": 1.8563, + "step": 25946 + }, + { + "epoch": 0.5766, + "grad_norm": 2.0572495460510254, + "learning_rate": 8.469882196043566e-05, + "loss": 2.0796, + "step": 25947 + }, + { + "epoch": 0.5766222222222223, + "grad_norm": 2.031140089035034, + "learning_rate": 8.469437652811736e-05, + "loss": 1.7421, + "step": 25948 + }, + { + "epoch": 0.5766444444444444, + "grad_norm": 1.3088346719741821, + "learning_rate": 8.468993109579907e-05, + "loss": 1.0353, + "step": 25949 + }, + { + "epoch": 0.5766666666666667, + "grad_norm": 1.5769364833831787, + "learning_rate": 8.468548566348078e-05, + "loss": 0.9108, + "step": 25950 + }, + { + "epoch": 0.5766888888888889, + "grad_norm": 1.0300639867782593, + "learning_rate": 8.468104023116249e-05, + "loss": 1.2338, + "step": 25951 + }, + { + "epoch": 0.5767111111111111, + "grad_norm": 1.5113728046417236, + "learning_rate": 8.467659479884418e-05, + "loss": 2.6188, + "step": 25952 + }, + { + "epoch": 0.5767333333333333, + "grad_norm": 1.1564249992370605, + "learning_rate": 8.46721493665259e-05, + "loss": 1.4414, + "step": 25953 + }, + { + "epoch": 0.5767555555555556, + "grad_norm": 3.3172104358673096, + "learning_rate": 8.46677039342076e-05, + "loss": 2.6221, + "step": 25954 + }, + { + "epoch": 0.5767777777777777, + "grad_norm": 1.5539910793304443, + "learning_rate": 8.466325850188931e-05, + "loss": 2.5753, + "step": 25955 + }, + { + "epoch": 0.5768, + "grad_norm": 1.4837034940719604, + "learning_rate": 8.465881306957102e-05, + "loss": 2.2811, + "step": 25956 + }, + { + "epoch": 0.5768222222222222, + "grad_norm": 1.4640401601791382, + "learning_rate": 8.465436763725273e-05, + "loss": 1.1172, + "step": 25957 + }, + { + "epoch": 0.5768444444444445, + "grad_norm": 1.5787912607192993, + "learning_rate": 8.464992220493443e-05, + "loss": 2.1486, + "step": 25958 + }, + { + "epoch": 0.5768666666666666, + "grad_norm": 1.4304194450378418, + "learning_rate": 8.464547677261614e-05, + "loss": 2.031, + "step": 25959 + }, + { + "epoch": 0.5768888888888889, + "grad_norm": 1.6707733869552612, + "learning_rate": 8.464103134029785e-05, + "loss": 2.2478, + "step": 25960 + }, + { + "epoch": 0.5769111111111112, + "grad_norm": 1.6346321105957031, + "learning_rate": 8.463658590797956e-05, + "loss": 2.3574, + "step": 25961 + }, + { + "epoch": 0.5769333333333333, + "grad_norm": 1.6079306602478027, + "learning_rate": 8.463214047566125e-05, + "loss": 2.1815, + "step": 25962 + }, + { + "epoch": 0.5769555555555556, + "grad_norm": 1.5131382942199707, + "learning_rate": 8.462769504334296e-05, + "loss": 2.0589, + "step": 25963 + }, + { + "epoch": 0.5769777777777778, + "grad_norm": 1.4120280742645264, + "learning_rate": 8.462324961102469e-05, + "loss": 1.2998, + "step": 25964 + }, + { + "epoch": 0.577, + "grad_norm": 1.5342144966125488, + "learning_rate": 8.461880417870638e-05, + "loss": 1.4498, + "step": 25965 + }, + { + "epoch": 0.5770222222222222, + "grad_norm": 1.4350162744522095, + "learning_rate": 8.461435874638809e-05, + "loss": 1.8269, + "step": 25966 + }, + { + "epoch": 0.5770444444444445, + "grad_norm": 1.6343975067138672, + "learning_rate": 8.46099133140698e-05, + "loss": 2.0455, + "step": 25967 + }, + { + "epoch": 0.5770666666666666, + "grad_norm": 1.7373617887496948, + "learning_rate": 8.46054678817515e-05, + "loss": 2.2496, + "step": 25968 + }, + { + "epoch": 0.5770888888888889, + "grad_norm": 1.7621031999588013, + "learning_rate": 8.460102244943321e-05, + "loss": 2.0521, + "step": 25969 + }, + { + "epoch": 0.5771111111111111, + "grad_norm": 2.0356478691101074, + "learning_rate": 8.459657701711492e-05, + "loss": 1.8139, + "step": 25970 + }, + { + "epoch": 0.5771333333333334, + "grad_norm": 1.6037461757659912, + "learning_rate": 8.459213158479663e-05, + "loss": 1.9601, + "step": 25971 + }, + { + "epoch": 0.5771555555555555, + "grad_norm": 1.9582102298736572, + "learning_rate": 8.458768615247832e-05, + "loss": 2.2421, + "step": 25972 + }, + { + "epoch": 0.5771777777777778, + "grad_norm": 1.8982878923416138, + "learning_rate": 8.458324072016005e-05, + "loss": 2.4973, + "step": 25973 + }, + { + "epoch": 0.5772, + "grad_norm": 1.451770544052124, + "learning_rate": 8.457879528784176e-05, + "loss": 1.8959, + "step": 25974 + }, + { + "epoch": 0.5772222222222222, + "grad_norm": 1.4310729503631592, + "learning_rate": 8.457434985552345e-05, + "loss": 1.3622, + "step": 25975 + }, + { + "epoch": 0.5772444444444444, + "grad_norm": 1.6004141569137573, + "learning_rate": 8.456990442320516e-05, + "loss": 1.6239, + "step": 25976 + }, + { + "epoch": 0.5772666666666667, + "grad_norm": 1.7305545806884766, + "learning_rate": 8.456545899088687e-05, + "loss": 2.0589, + "step": 25977 + }, + { + "epoch": 0.5772888888888889, + "grad_norm": 1.3793563842773438, + "learning_rate": 8.456101355856858e-05, + "loss": 1.6397, + "step": 25978 + }, + { + "epoch": 0.5773111111111111, + "grad_norm": 1.5642708539962769, + "learning_rate": 8.455656812625028e-05, + "loss": 1.9601, + "step": 25979 + }, + { + "epoch": 0.5773333333333334, + "grad_norm": 1.8084590435028076, + "learning_rate": 8.455212269393199e-05, + "loss": 1.5212, + "step": 25980 + }, + { + "epoch": 0.5773555555555555, + "grad_norm": 1.7321137189865112, + "learning_rate": 8.45476772616137e-05, + "loss": 0.9447, + "step": 25981 + }, + { + "epoch": 0.5773777777777778, + "grad_norm": 1.569234013557434, + "learning_rate": 8.45432318292954e-05, + "loss": 1.8806, + "step": 25982 + }, + { + "epoch": 0.5774, + "grad_norm": 1.749790072441101, + "learning_rate": 8.453878639697711e-05, + "loss": 1.9686, + "step": 25983 + }, + { + "epoch": 0.5774222222222222, + "grad_norm": 1.4470043182373047, + "learning_rate": 8.453434096465882e-05, + "loss": 1.5933, + "step": 25984 + }, + { + "epoch": 0.5774444444444444, + "grad_norm": 1.4855726957321167, + "learning_rate": 8.452989553234052e-05, + "loss": 1.76, + "step": 25985 + }, + { + "epoch": 0.5774666666666667, + "grad_norm": 1.7785967588424683, + "learning_rate": 8.452545010002223e-05, + "loss": 1.9297, + "step": 25986 + }, + { + "epoch": 0.5774888888888889, + "grad_norm": 1.0004416704177856, + "learning_rate": 8.452100466770394e-05, + "loss": 0.8414, + "step": 25987 + }, + { + "epoch": 0.5775111111111111, + "grad_norm": 17.7835693359375, + "learning_rate": 8.451655923538565e-05, + "loss": 2.2178, + "step": 25988 + }, + { + "epoch": 0.5775333333333333, + "grad_norm": 1.5151816606521606, + "learning_rate": 8.451211380306735e-05, + "loss": 1.4501, + "step": 25989 + }, + { + "epoch": 0.5775555555555556, + "grad_norm": 1.9943801164627075, + "learning_rate": 8.450766837074906e-05, + "loss": 2.2157, + "step": 25990 + }, + { + "epoch": 0.5775777777777777, + "grad_norm": 1.8530950546264648, + "learning_rate": 8.450322293843076e-05, + "loss": 1.7795, + "step": 25991 + }, + { + "epoch": 0.5776, + "grad_norm": 1.7538650035858154, + "learning_rate": 8.449877750611247e-05, + "loss": 1.6754, + "step": 25992 + }, + { + "epoch": 0.5776222222222223, + "grad_norm": 1.8652743101119995, + "learning_rate": 8.449433207379418e-05, + "loss": 2.1591, + "step": 25993 + }, + { + "epoch": 0.5776444444444444, + "grad_norm": 1.8382536172866821, + "learning_rate": 8.44898866414759e-05, + "loss": 1.8605, + "step": 25994 + }, + { + "epoch": 0.5776666666666667, + "grad_norm": 1.5919816493988037, + "learning_rate": 8.448544120915759e-05, + "loss": 1.7085, + "step": 25995 + }, + { + "epoch": 0.5776888888888889, + "grad_norm": 1.91946542263031, + "learning_rate": 8.44809957768393e-05, + "loss": 1.7697, + "step": 25996 + }, + { + "epoch": 0.5777111111111111, + "grad_norm": 1.5796613693237305, + "learning_rate": 8.447655034452101e-05, + "loss": 1.3837, + "step": 25997 + }, + { + "epoch": 0.5777333333333333, + "grad_norm": 1.1964937448501587, + "learning_rate": 8.447210491220272e-05, + "loss": 0.9521, + "step": 25998 + }, + { + "epoch": 0.5777555555555556, + "grad_norm": 1.6924588680267334, + "learning_rate": 8.446765947988441e-05, + "loss": 1.7327, + "step": 25999 + }, + { + "epoch": 0.5777777777777777, + "grad_norm": 1.842859148979187, + "learning_rate": 8.446321404756612e-05, + "loss": 1.6703, + "step": 26000 + }, + { + "epoch": 0.5778, + "grad_norm": 0.8927009701728821, + "learning_rate": 8.445876861524785e-05, + "loss": 0.8949, + "step": 26001 + }, + { + "epoch": 0.5778222222222222, + "grad_norm": 1.79087233543396, + "learning_rate": 8.445432318292954e-05, + "loss": 2.1933, + "step": 26002 + }, + { + "epoch": 0.5778444444444445, + "grad_norm": 1.7325570583343506, + "learning_rate": 8.444987775061125e-05, + "loss": 2.4312, + "step": 26003 + }, + { + "epoch": 0.5778666666666666, + "grad_norm": 1.6615060567855835, + "learning_rate": 8.444543231829296e-05, + "loss": 1.5557, + "step": 26004 + }, + { + "epoch": 0.5778888888888889, + "grad_norm": 1.3299299478530884, + "learning_rate": 8.444098688597466e-05, + "loss": 1.6282, + "step": 26005 + }, + { + "epoch": 0.5779111111111112, + "grad_norm": 1.5845272541046143, + "learning_rate": 8.443654145365637e-05, + "loss": 2.1, + "step": 26006 + }, + { + "epoch": 0.5779333333333333, + "grad_norm": 1.890878438949585, + "learning_rate": 8.443209602133808e-05, + "loss": 1.3777, + "step": 26007 + }, + { + "epoch": 0.5779555555555556, + "grad_norm": 1.5573099851608276, + "learning_rate": 8.442765058901979e-05, + "loss": 2.2133, + "step": 26008 + }, + { + "epoch": 0.5779777777777778, + "grad_norm": 1.5288937091827393, + "learning_rate": 8.442320515670148e-05, + "loss": 1.7278, + "step": 26009 + }, + { + "epoch": 0.578, + "grad_norm": 1.4313246011734009, + "learning_rate": 8.44187597243832e-05, + "loss": 2.0185, + "step": 26010 + }, + { + "epoch": 0.5780222222222222, + "grad_norm": 1.7366185188293457, + "learning_rate": 8.441431429206492e-05, + "loss": 2.2675, + "step": 26011 + }, + { + "epoch": 0.5780444444444445, + "grad_norm": 1.7425129413604736, + "learning_rate": 8.440986885974661e-05, + "loss": 2.218, + "step": 26012 + }, + { + "epoch": 0.5780666666666666, + "grad_norm": 1.5490167140960693, + "learning_rate": 8.440542342742832e-05, + "loss": 1.7977, + "step": 26013 + }, + { + "epoch": 0.5780888888888889, + "grad_norm": 1.307113528251648, + "learning_rate": 8.440097799511003e-05, + "loss": 1.146, + "step": 26014 + }, + { + "epoch": 0.5781111111111111, + "grad_norm": 1.814853549003601, + "learning_rate": 8.439653256279173e-05, + "loss": 1.9841, + "step": 26015 + }, + { + "epoch": 0.5781333333333334, + "grad_norm": 1.8090673685073853, + "learning_rate": 8.439208713047344e-05, + "loss": 1.7952, + "step": 26016 + }, + { + "epoch": 0.5781555555555555, + "grad_norm": 1.5758663415908813, + "learning_rate": 8.438764169815515e-05, + "loss": 2.2281, + "step": 26017 + }, + { + "epoch": 0.5781777777777778, + "grad_norm": 1.8426498174667358, + "learning_rate": 8.438319626583686e-05, + "loss": 2.4655, + "step": 26018 + }, + { + "epoch": 0.5782, + "grad_norm": 1.5860536098480225, + "learning_rate": 8.437875083351857e-05, + "loss": 1.9361, + "step": 26019 + }, + { + "epoch": 0.5782222222222222, + "grad_norm": 1.7160062789916992, + "learning_rate": 8.437430540120028e-05, + "loss": 1.8582, + "step": 26020 + }, + { + "epoch": 0.5782444444444444, + "grad_norm": 1.8908746242523193, + "learning_rate": 8.436985996888199e-05, + "loss": 2.1268, + "step": 26021 + }, + { + "epoch": 0.5782666666666667, + "grad_norm": 2.027647018432617, + "learning_rate": 8.436541453656368e-05, + "loss": 2.4584, + "step": 26022 + }, + { + "epoch": 0.5782888888888889, + "grad_norm": 1.639888882637024, + "learning_rate": 8.436096910424539e-05, + "loss": 1.3977, + "step": 26023 + }, + { + "epoch": 0.5783111111111111, + "grad_norm": 1.7778737545013428, + "learning_rate": 8.43565236719271e-05, + "loss": 2.2876, + "step": 26024 + }, + { + "epoch": 0.5783333333333334, + "grad_norm": 2.1147873401641846, + "learning_rate": 8.43520782396088e-05, + "loss": 1.7371, + "step": 26025 + }, + { + "epoch": 0.5783555555555555, + "grad_norm": 2.118032217025757, + "learning_rate": 8.43476328072905e-05, + "loss": 2.0833, + "step": 26026 + }, + { + "epoch": 0.5783777777777778, + "grad_norm": 1.5896843671798706, + "learning_rate": 8.434318737497222e-05, + "loss": 2.176, + "step": 26027 + }, + { + "epoch": 0.5784, + "grad_norm": 1.406416893005371, + "learning_rate": 8.433874194265393e-05, + "loss": 1.3327, + "step": 26028 + }, + { + "epoch": 0.5784222222222222, + "grad_norm": 1.8351752758026123, + "learning_rate": 8.433429651033564e-05, + "loss": 1.8609, + "step": 26029 + }, + { + "epoch": 0.5784444444444444, + "grad_norm": 2.0752804279327393, + "learning_rate": 8.432985107801734e-05, + "loss": 2.5484, + "step": 26030 + }, + { + "epoch": 0.5784666666666667, + "grad_norm": 1.6778042316436768, + "learning_rate": 8.432540564569905e-05, + "loss": 1.3947, + "step": 26031 + }, + { + "epoch": 0.5784888888888889, + "grad_norm": 2.0801656246185303, + "learning_rate": 8.432096021338075e-05, + "loss": 2.1063, + "step": 26032 + }, + { + "epoch": 0.5785111111111111, + "grad_norm": 1.9608205556869507, + "learning_rate": 8.431651478106246e-05, + "loss": 2.6487, + "step": 26033 + }, + { + "epoch": 0.5785333333333333, + "grad_norm": 1.5958486795425415, + "learning_rate": 8.431206934874417e-05, + "loss": 1.9128, + "step": 26034 + }, + { + "epoch": 0.5785555555555556, + "grad_norm": 1.6694389581680298, + "learning_rate": 8.430762391642588e-05, + "loss": 1.1725, + "step": 26035 + }, + { + "epoch": 0.5785777777777777, + "grad_norm": 1.5564417839050293, + "learning_rate": 8.430317848410758e-05, + "loss": 0.9956, + "step": 26036 + }, + { + "epoch": 0.5786, + "grad_norm": 1.6584241390228271, + "learning_rate": 8.429873305178929e-05, + "loss": 1.8983, + "step": 26037 + }, + { + "epoch": 0.5786222222222223, + "grad_norm": 1.9169220924377441, + "learning_rate": 8.429428761947101e-05, + "loss": 1.8601, + "step": 26038 + }, + { + "epoch": 0.5786444444444444, + "grad_norm": 1.9622453451156616, + "learning_rate": 8.42898421871527e-05, + "loss": 2.2768, + "step": 26039 + }, + { + "epoch": 0.5786666666666667, + "grad_norm": 1.8803428411483765, + "learning_rate": 8.428539675483441e-05, + "loss": 2.0619, + "step": 26040 + }, + { + "epoch": 0.5786888888888889, + "grad_norm": 1.8330425024032593, + "learning_rate": 8.428095132251612e-05, + "loss": 1.8456, + "step": 26041 + }, + { + "epoch": 0.5787111111111111, + "grad_norm": 1.7811545133590698, + "learning_rate": 8.427650589019782e-05, + "loss": 1.6961, + "step": 26042 + }, + { + "epoch": 0.5787333333333333, + "grad_norm": 1.984512448310852, + "learning_rate": 8.427206045787953e-05, + "loss": 1.7993, + "step": 26043 + }, + { + "epoch": 0.5787555555555556, + "grad_norm": 2.2069578170776367, + "learning_rate": 8.426761502556124e-05, + "loss": 1.8305, + "step": 26044 + }, + { + "epoch": 0.5787777777777777, + "grad_norm": 1.84796941280365, + "learning_rate": 8.426316959324295e-05, + "loss": 1.3626, + "step": 26045 + }, + { + "epoch": 0.5788, + "grad_norm": 2.032742977142334, + "learning_rate": 8.425872416092464e-05, + "loss": 1.8709, + "step": 26046 + }, + { + "epoch": 0.5788222222222222, + "grad_norm": 1.9718986749649048, + "learning_rate": 8.425427872860637e-05, + "loss": 1.7734, + "step": 26047 + }, + { + "epoch": 0.5788444444444445, + "grad_norm": 1.4757591485977173, + "learning_rate": 8.424983329628808e-05, + "loss": 1.3125, + "step": 26048 + }, + { + "epoch": 0.5788666666666666, + "grad_norm": 1.3887970447540283, + "learning_rate": 8.424538786396977e-05, + "loss": 0.9535, + "step": 26049 + }, + { + "epoch": 0.5788888888888889, + "grad_norm": 2.0439767837524414, + "learning_rate": 8.424094243165148e-05, + "loss": 0.9072, + "step": 26050 + }, + { + "epoch": 0.5789111111111112, + "grad_norm": 1.327715277671814, + "learning_rate": 8.423649699933319e-05, + "loss": 2.4519, + "step": 26051 + }, + { + "epoch": 0.5789333333333333, + "grad_norm": 1.6525444984436035, + "learning_rate": 8.423205156701489e-05, + "loss": 2.2627, + "step": 26052 + }, + { + "epoch": 0.5789555555555556, + "grad_norm": 1.309954047203064, + "learning_rate": 8.42276061346966e-05, + "loss": 2.5536, + "step": 26053 + }, + { + "epoch": 0.5789777777777778, + "grad_norm": 0.5188814997673035, + "learning_rate": 8.422316070237831e-05, + "loss": 0.0196, + "step": 26054 + }, + { + "epoch": 0.579, + "grad_norm": 1.337967872619629, + "learning_rate": 8.421871527006002e-05, + "loss": 2.1331, + "step": 26055 + }, + { + "epoch": 0.5790222222222222, + "grad_norm": 1.9190223217010498, + "learning_rate": 8.421426983774173e-05, + "loss": 2.3599, + "step": 26056 + }, + { + "epoch": 0.5790444444444445, + "grad_norm": 1.5431402921676636, + "learning_rate": 8.420982440542344e-05, + "loss": 2.2485, + "step": 26057 + }, + { + "epoch": 0.5790666666666666, + "grad_norm": 1.8881802558898926, + "learning_rate": 8.420537897310515e-05, + "loss": 2.4807, + "step": 26058 + }, + { + "epoch": 0.5790888888888889, + "grad_norm": 1.7573673725128174, + "learning_rate": 8.420093354078684e-05, + "loss": 1.9549, + "step": 26059 + }, + { + "epoch": 0.5791111111111111, + "grad_norm": 1.707239031791687, + "learning_rate": 8.419648810846855e-05, + "loss": 2.4954, + "step": 26060 + }, + { + "epoch": 0.5791333333333334, + "grad_norm": 1.5896327495574951, + "learning_rate": 8.419204267615026e-05, + "loss": 1.9372, + "step": 26061 + }, + { + "epoch": 0.5791555555555555, + "grad_norm": 1.5502749681472778, + "learning_rate": 8.418759724383196e-05, + "loss": 2.0857, + "step": 26062 + }, + { + "epoch": 0.5791777777777778, + "grad_norm": 1.7027544975280762, + "learning_rate": 8.418315181151367e-05, + "loss": 1.9132, + "step": 26063 + }, + { + "epoch": 0.5792, + "grad_norm": 1.5180604457855225, + "learning_rate": 8.417870637919538e-05, + "loss": 1.9358, + "step": 26064 + }, + { + "epoch": 0.5792222222222222, + "grad_norm": 1.7091317176818848, + "learning_rate": 8.417426094687709e-05, + "loss": 2.1182, + "step": 26065 + }, + { + "epoch": 0.5792444444444445, + "grad_norm": 1.6138535737991333, + "learning_rate": 8.41698155145588e-05, + "loss": 2.0723, + "step": 26066 + }, + { + "epoch": 0.5792666666666667, + "grad_norm": 1.7695465087890625, + "learning_rate": 8.41653700822405e-05, + "loss": 2.4244, + "step": 26067 + }, + { + "epoch": 0.5792888888888889, + "grad_norm": 1.6330814361572266, + "learning_rate": 8.416092464992222e-05, + "loss": 1.7658, + "step": 26068 + }, + { + "epoch": 0.5793111111111111, + "grad_norm": 1.3131344318389893, + "learning_rate": 8.415647921760391e-05, + "loss": 1.6413, + "step": 26069 + }, + { + "epoch": 0.5793333333333334, + "grad_norm": 1.4656513929367065, + "learning_rate": 8.415203378528562e-05, + "loss": 1.9481, + "step": 26070 + }, + { + "epoch": 0.5793555555555555, + "grad_norm": 1.9465343952178955, + "learning_rate": 8.414758835296733e-05, + "loss": 1.8912, + "step": 26071 + }, + { + "epoch": 0.5793777777777778, + "grad_norm": 1.7784497737884521, + "learning_rate": 8.414314292064903e-05, + "loss": 1.6295, + "step": 26072 + }, + { + "epoch": 0.5794, + "grad_norm": 1.859359860420227, + "learning_rate": 8.413869748833074e-05, + "loss": 2.1273, + "step": 26073 + }, + { + "epoch": 0.5794222222222222, + "grad_norm": 1.8585387468338013, + "learning_rate": 8.413425205601245e-05, + "loss": 2.2089, + "step": 26074 + }, + { + "epoch": 0.5794444444444444, + "grad_norm": 1.4101988077163696, + "learning_rate": 8.412980662369417e-05, + "loss": 1.3788, + "step": 26075 + }, + { + "epoch": 0.5794666666666667, + "grad_norm": 1.579291582107544, + "learning_rate": 8.412536119137587e-05, + "loss": 1.9922, + "step": 26076 + }, + { + "epoch": 0.5794888888888889, + "grad_norm": 1.8342472314834595, + "learning_rate": 8.412091575905758e-05, + "loss": 1.5453, + "step": 26077 + }, + { + "epoch": 0.5795111111111111, + "grad_norm": 1.6948658227920532, + "learning_rate": 8.411647032673928e-05, + "loss": 2.0403, + "step": 26078 + }, + { + "epoch": 0.5795333333333333, + "grad_norm": 1.831552505493164, + "learning_rate": 8.411202489442098e-05, + "loss": 1.327, + "step": 26079 + }, + { + "epoch": 0.5795555555555556, + "grad_norm": 1.650087594985962, + "learning_rate": 8.410757946210269e-05, + "loss": 1.6081, + "step": 26080 + }, + { + "epoch": 0.5795777777777777, + "grad_norm": 1.7019950151443481, + "learning_rate": 8.41031340297844e-05, + "loss": 2.2084, + "step": 26081 + }, + { + "epoch": 0.5796, + "grad_norm": 1.4378479719161987, + "learning_rate": 8.40986885974661e-05, + "loss": 1.6135, + "step": 26082 + }, + { + "epoch": 0.5796222222222223, + "grad_norm": 2.8144371509552, + "learning_rate": 8.409424316514782e-05, + "loss": 1.8706, + "step": 26083 + }, + { + "epoch": 0.5796444444444444, + "grad_norm": 1.7685115337371826, + "learning_rate": 8.408979773282953e-05, + "loss": 1.8798, + "step": 26084 + }, + { + "epoch": 0.5796666666666667, + "grad_norm": 1.2805466651916504, + "learning_rate": 8.408535230051124e-05, + "loss": 0.905, + "step": 26085 + }, + { + "epoch": 0.5796888888888889, + "grad_norm": 1.7151274681091309, + "learning_rate": 8.408090686819293e-05, + "loss": 2.0463, + "step": 26086 + }, + { + "epoch": 0.5797111111111111, + "grad_norm": 1.7159134149551392, + "learning_rate": 8.407646143587464e-05, + "loss": 1.8117, + "step": 26087 + }, + { + "epoch": 0.5797333333333333, + "grad_norm": 1.7387936115264893, + "learning_rate": 8.407201600355635e-05, + "loss": 1.6479, + "step": 26088 + }, + { + "epoch": 0.5797555555555556, + "grad_norm": 1.5587831735610962, + "learning_rate": 8.406757057123805e-05, + "loss": 1.6855, + "step": 26089 + }, + { + "epoch": 0.5797777777777777, + "grad_norm": 1.9584736824035645, + "learning_rate": 8.406312513891976e-05, + "loss": 1.9174, + "step": 26090 + }, + { + "epoch": 0.5798, + "grad_norm": 2.032670497894287, + "learning_rate": 8.405867970660147e-05, + "loss": 2.5423, + "step": 26091 + }, + { + "epoch": 0.5798222222222222, + "grad_norm": 1.73224937915802, + "learning_rate": 8.405423427428318e-05, + "loss": 1.8867, + "step": 26092 + }, + { + "epoch": 0.5798444444444445, + "grad_norm": 1.6868783235549927, + "learning_rate": 8.404978884196489e-05, + "loss": 2.0453, + "step": 26093 + }, + { + "epoch": 0.5798666666666666, + "grad_norm": 1.8552583456039429, + "learning_rate": 8.40453434096466e-05, + "loss": 2.0552, + "step": 26094 + }, + { + "epoch": 0.5798888888888889, + "grad_norm": 2.3586230278015137, + "learning_rate": 8.404089797732831e-05, + "loss": 2.2544, + "step": 26095 + }, + { + "epoch": 0.5799111111111112, + "grad_norm": 1.809834599494934, + "learning_rate": 8.403645254501e-05, + "loss": 1.547, + "step": 26096 + }, + { + "epoch": 0.5799333333333333, + "grad_norm": 1.7929770946502686, + "learning_rate": 8.403200711269171e-05, + "loss": 1.7148, + "step": 26097 + }, + { + "epoch": 0.5799555555555556, + "grad_norm": 1.786232352256775, + "learning_rate": 8.402756168037342e-05, + "loss": 1.4739, + "step": 26098 + }, + { + "epoch": 0.5799777777777778, + "grad_norm": 1.915028691291809, + "learning_rate": 8.402311624805512e-05, + "loss": 1.9315, + "step": 26099 + }, + { + "epoch": 0.58, + "grad_norm": 1.8769526481628418, + "learning_rate": 8.401867081573683e-05, + "loss": 1.0471, + "step": 26100 + }, + { + "epoch": 0.5800222222222222, + "grad_norm": 1.4817572832107544, + "learning_rate": 8.401422538341854e-05, + "loss": 2.7088, + "step": 26101 + }, + { + "epoch": 0.5800444444444445, + "grad_norm": 1.4244190454483032, + "learning_rate": 8.400977995110025e-05, + "loss": 1.9826, + "step": 26102 + }, + { + "epoch": 0.5800666666666666, + "grad_norm": 1.5300400257110596, + "learning_rate": 8.400533451878196e-05, + "loss": 2.388, + "step": 26103 + }, + { + "epoch": 0.5800888888888889, + "grad_norm": 1.3785556554794312, + "learning_rate": 8.400088908646367e-05, + "loss": 2.2987, + "step": 26104 + }, + { + "epoch": 0.5801111111111111, + "grad_norm": 1.6409354209899902, + "learning_rate": 8.399644365414538e-05, + "loss": 2.3494, + "step": 26105 + }, + { + "epoch": 0.5801333333333333, + "grad_norm": 1.6343281269073486, + "learning_rate": 8.399199822182707e-05, + "loss": 2.0914, + "step": 26106 + }, + { + "epoch": 0.5801555555555555, + "grad_norm": 1.7643954753875732, + "learning_rate": 8.398755278950878e-05, + "loss": 2.4257, + "step": 26107 + }, + { + "epoch": 0.5801777777777778, + "grad_norm": 1.5621308088302612, + "learning_rate": 8.398310735719049e-05, + "loss": 1.8084, + "step": 26108 + }, + { + "epoch": 0.5802, + "grad_norm": 1.443436622619629, + "learning_rate": 8.397866192487219e-05, + "loss": 1.6621, + "step": 26109 + }, + { + "epoch": 0.5802222222222222, + "grad_norm": 1.5355186462402344, + "learning_rate": 8.39742164925539e-05, + "loss": 2.015, + "step": 26110 + }, + { + "epoch": 0.5802444444444445, + "grad_norm": 1.5714714527130127, + "learning_rate": 8.396977106023561e-05, + "loss": 1.6738, + "step": 26111 + }, + { + "epoch": 0.5802666666666667, + "grad_norm": 1.5971449613571167, + "learning_rate": 8.396532562791733e-05, + "loss": 2.032, + "step": 26112 + }, + { + "epoch": 0.5802888888888889, + "grad_norm": 1.4106720685958862, + "learning_rate": 8.396088019559903e-05, + "loss": 1.8086, + "step": 26113 + }, + { + "epoch": 0.5803111111111111, + "grad_norm": 1.46250319480896, + "learning_rate": 8.395643476328074e-05, + "loss": 2.0213, + "step": 26114 + }, + { + "epoch": 0.5803333333333334, + "grad_norm": 1.6457831859588623, + "learning_rate": 8.395198933096245e-05, + "loss": 2.228, + "step": 26115 + }, + { + "epoch": 0.5803555555555555, + "grad_norm": 1.6616058349609375, + "learning_rate": 8.394754389864414e-05, + "loss": 2.2746, + "step": 26116 + }, + { + "epoch": 0.5803777777777778, + "grad_norm": 1.7403942346572876, + "learning_rate": 8.394309846632585e-05, + "loss": 2.221, + "step": 26117 + }, + { + "epoch": 0.5804, + "grad_norm": 1.6714938879013062, + "learning_rate": 8.393865303400756e-05, + "loss": 1.8509, + "step": 26118 + }, + { + "epoch": 0.5804222222222222, + "grad_norm": 1.7840502262115479, + "learning_rate": 8.393420760168926e-05, + "loss": 2.1026, + "step": 26119 + }, + { + "epoch": 0.5804444444444444, + "grad_norm": 1.6288597583770752, + "learning_rate": 8.392976216937098e-05, + "loss": 1.978, + "step": 26120 + }, + { + "epoch": 0.5804666666666667, + "grad_norm": 1.6369847059249878, + "learning_rate": 8.392531673705269e-05, + "loss": 2.2848, + "step": 26121 + }, + { + "epoch": 0.5804888888888889, + "grad_norm": 1.6995961666107178, + "learning_rate": 8.39208713047344e-05, + "loss": 2.2823, + "step": 26122 + }, + { + "epoch": 0.5805111111111111, + "grad_norm": 1.77353835105896, + "learning_rate": 8.39164258724161e-05, + "loss": 2.4403, + "step": 26123 + }, + { + "epoch": 0.5805333333333333, + "grad_norm": 1.6163816452026367, + "learning_rate": 8.39119804400978e-05, + "loss": 2.1503, + "step": 26124 + }, + { + "epoch": 0.5805555555555556, + "grad_norm": 1.5171163082122803, + "learning_rate": 8.390753500777951e-05, + "loss": 1.5474, + "step": 26125 + }, + { + "epoch": 0.5805777777777777, + "grad_norm": 1.759602665901184, + "learning_rate": 8.390308957546121e-05, + "loss": 1.611, + "step": 26126 + }, + { + "epoch": 0.5806, + "grad_norm": 1.63189697265625, + "learning_rate": 8.389864414314292e-05, + "loss": 2.0526, + "step": 26127 + }, + { + "epoch": 0.5806222222222223, + "grad_norm": 2.072521686553955, + "learning_rate": 8.389419871082463e-05, + "loss": 2.1334, + "step": 26128 + }, + { + "epoch": 0.5806444444444444, + "grad_norm": 1.7085297107696533, + "learning_rate": 8.388975327850634e-05, + "loss": 1.8668, + "step": 26129 + }, + { + "epoch": 0.5806666666666667, + "grad_norm": 0.3806315064430237, + "learning_rate": 8.388530784618805e-05, + "loss": 0.0278, + "step": 26130 + }, + { + "epoch": 0.5806888888888889, + "grad_norm": 1.724603533744812, + "learning_rate": 8.388086241386976e-05, + "loss": 1.9284, + "step": 26131 + }, + { + "epoch": 0.5807111111111111, + "grad_norm": 1.47846519947052, + "learning_rate": 8.387641698155147e-05, + "loss": 1.4473, + "step": 26132 + }, + { + "epoch": 0.5807333333333333, + "grad_norm": 1.6238040924072266, + "learning_rate": 8.387197154923316e-05, + "loss": 2.0136, + "step": 26133 + }, + { + "epoch": 0.5807555555555556, + "grad_norm": 1.7846760749816895, + "learning_rate": 8.386752611691487e-05, + "loss": 1.83, + "step": 26134 + }, + { + "epoch": 0.5807777777777777, + "grad_norm": 1.350911021232605, + "learning_rate": 8.386308068459658e-05, + "loss": 1.5452, + "step": 26135 + }, + { + "epoch": 0.5808, + "grad_norm": 1.2389070987701416, + "learning_rate": 8.385863525227828e-05, + "loss": 0.9817, + "step": 26136 + }, + { + "epoch": 0.5808222222222222, + "grad_norm": 1.82937753200531, + "learning_rate": 8.385418981995999e-05, + "loss": 2.1897, + "step": 26137 + }, + { + "epoch": 0.5808444444444445, + "grad_norm": 1.6447229385375977, + "learning_rate": 8.38497443876417e-05, + "loss": 1.5815, + "step": 26138 + }, + { + "epoch": 0.5808666666666666, + "grad_norm": 1.963240385055542, + "learning_rate": 8.384529895532341e-05, + "loss": 1.8421, + "step": 26139 + }, + { + "epoch": 0.5808888888888889, + "grad_norm": 1.9662257432937622, + "learning_rate": 8.384085352300512e-05, + "loss": 2.0641, + "step": 26140 + }, + { + "epoch": 0.5809111111111112, + "grad_norm": 1.741540789604187, + "learning_rate": 8.383640809068683e-05, + "loss": 2.1514, + "step": 26141 + }, + { + "epoch": 0.5809333333333333, + "grad_norm": 1.692657470703125, + "learning_rate": 8.383196265836854e-05, + "loss": 1.6779, + "step": 26142 + }, + { + "epoch": 0.5809555555555556, + "grad_norm": 1.5241749286651611, + "learning_rate": 8.382751722605023e-05, + "loss": 1.7482, + "step": 26143 + }, + { + "epoch": 0.5809777777777778, + "grad_norm": 1.9232747554779053, + "learning_rate": 8.382307179373194e-05, + "loss": 1.9883, + "step": 26144 + }, + { + "epoch": 0.581, + "grad_norm": 2.1884825229644775, + "learning_rate": 8.381862636141365e-05, + "loss": 1.8161, + "step": 26145 + }, + { + "epoch": 0.5810222222222222, + "grad_norm": 1.8066474199295044, + "learning_rate": 8.381418092909535e-05, + "loss": 1.4866, + "step": 26146 + }, + { + "epoch": 0.5810444444444445, + "grad_norm": 1.9330378770828247, + "learning_rate": 8.380973549677706e-05, + "loss": 1.9459, + "step": 26147 + }, + { + "epoch": 0.5810666666666666, + "grad_norm": 1.8063390254974365, + "learning_rate": 8.380529006445877e-05, + "loss": 1.8505, + "step": 26148 + }, + { + "epoch": 0.5810888888888889, + "grad_norm": 1.8492445945739746, + "learning_rate": 8.380084463214049e-05, + "loss": 1.6854, + "step": 26149 + }, + { + "epoch": 0.5811111111111111, + "grad_norm": 0.2438049167394638, + "learning_rate": 8.379639919982219e-05, + "loss": 0.0497, + "step": 26150 + }, + { + "epoch": 0.5811333333333333, + "grad_norm": 1.6242479085922241, + "learning_rate": 8.37919537675039e-05, + "loss": 2.1615, + "step": 26151 + }, + { + "epoch": 0.5811555555555555, + "grad_norm": 1.438272476196289, + "learning_rate": 8.37875083351856e-05, + "loss": 2.4119, + "step": 26152 + }, + { + "epoch": 0.5811777777777778, + "grad_norm": 0.1465410739183426, + "learning_rate": 8.37830629028673e-05, + "loss": 0.017, + "step": 26153 + }, + { + "epoch": 0.5812, + "grad_norm": 1.5817630290985107, + "learning_rate": 8.377861747054901e-05, + "loss": 2.1741, + "step": 26154 + }, + { + "epoch": 0.5812222222222222, + "grad_norm": 1.5494940280914307, + "learning_rate": 8.377417203823072e-05, + "loss": 2.602, + "step": 26155 + }, + { + "epoch": 0.5812444444444445, + "grad_norm": 1.5453170537948608, + "learning_rate": 8.376972660591242e-05, + "loss": 1.8616, + "step": 26156 + }, + { + "epoch": 0.5812666666666667, + "grad_norm": 1.8508343696594238, + "learning_rate": 8.376528117359414e-05, + "loss": 2.2297, + "step": 26157 + }, + { + "epoch": 0.5812888888888889, + "grad_norm": 1.8826130628585815, + "learning_rate": 8.376083574127585e-05, + "loss": 2.3587, + "step": 26158 + }, + { + "epoch": 0.5813111111111111, + "grad_norm": 1.9748693704605103, + "learning_rate": 8.375639030895756e-05, + "loss": 2.1999, + "step": 26159 + }, + { + "epoch": 0.5813333333333334, + "grad_norm": 1.8187892436981201, + "learning_rate": 8.375194487663926e-05, + "loss": 1.4834, + "step": 26160 + }, + { + "epoch": 0.5813555555555555, + "grad_norm": 1.8985687494277954, + "learning_rate": 8.374749944432097e-05, + "loss": 2.2664, + "step": 26161 + }, + { + "epoch": 0.5813777777777778, + "grad_norm": 1.7321456670761108, + "learning_rate": 8.374305401200268e-05, + "loss": 2.1491, + "step": 26162 + }, + { + "epoch": 0.5814, + "grad_norm": 1.2643800973892212, + "learning_rate": 8.373860857968437e-05, + "loss": 1.0812, + "step": 26163 + }, + { + "epoch": 0.5814222222222222, + "grad_norm": 1.6095198392868042, + "learning_rate": 8.373416314736608e-05, + "loss": 2.0306, + "step": 26164 + }, + { + "epoch": 0.5814444444444444, + "grad_norm": 1.8974038362503052, + "learning_rate": 8.372971771504779e-05, + "loss": 1.6857, + "step": 26165 + }, + { + "epoch": 0.5814666666666667, + "grad_norm": 1.8022187948226929, + "learning_rate": 8.37252722827295e-05, + "loss": 1.6285, + "step": 26166 + }, + { + "epoch": 0.5814888888888889, + "grad_norm": 1.73734450340271, + "learning_rate": 8.372082685041121e-05, + "loss": 1.926, + "step": 26167 + }, + { + "epoch": 0.5815111111111111, + "grad_norm": 1.4879589080810547, + "learning_rate": 8.371638141809292e-05, + "loss": 1.7018, + "step": 26168 + }, + { + "epoch": 0.5815333333333333, + "grad_norm": 1.672507882118225, + "learning_rate": 8.371193598577463e-05, + "loss": 2.0912, + "step": 26169 + }, + { + "epoch": 0.5815555555555556, + "grad_norm": 1.5402486324310303, + "learning_rate": 8.370749055345633e-05, + "loss": 2.1801, + "step": 26170 + }, + { + "epoch": 0.5815777777777777, + "grad_norm": 1.5981429815292358, + "learning_rate": 8.370304512113804e-05, + "loss": 2.0361, + "step": 26171 + }, + { + "epoch": 0.5816, + "grad_norm": 1.615914225578308, + "learning_rate": 8.369859968881974e-05, + "loss": 1.7654, + "step": 26172 + }, + { + "epoch": 0.5816222222222223, + "grad_norm": 1.6028060913085938, + "learning_rate": 8.369415425650144e-05, + "loss": 2.3957, + "step": 26173 + }, + { + "epoch": 0.5816444444444444, + "grad_norm": 1.8624187707901, + "learning_rate": 8.368970882418315e-05, + "loss": 2.0461, + "step": 26174 + }, + { + "epoch": 0.5816666666666667, + "grad_norm": 1.5869486331939697, + "learning_rate": 8.368526339186486e-05, + "loss": 1.9551, + "step": 26175 + }, + { + "epoch": 0.5816888888888889, + "grad_norm": 1.9387506246566772, + "learning_rate": 8.368081795954657e-05, + "loss": 2.1581, + "step": 26176 + }, + { + "epoch": 0.5817111111111111, + "grad_norm": 1.8396356105804443, + "learning_rate": 8.367637252722828e-05, + "loss": 1.9605, + "step": 26177 + }, + { + "epoch": 0.5817333333333333, + "grad_norm": 1.579355001449585, + "learning_rate": 8.367192709490999e-05, + "loss": 1.8826, + "step": 26178 + }, + { + "epoch": 0.5817555555555556, + "grad_norm": 2.3210432529449463, + "learning_rate": 8.36674816625917e-05, + "loss": 2.2686, + "step": 26179 + }, + { + "epoch": 0.5817777777777777, + "grad_norm": 1.7856831550598145, + "learning_rate": 8.36630362302734e-05, + "loss": 2.0718, + "step": 26180 + }, + { + "epoch": 0.5818, + "grad_norm": 1.4405708312988281, + "learning_rate": 8.36585907979551e-05, + "loss": 1.4981, + "step": 26181 + }, + { + "epoch": 0.5818222222222222, + "grad_norm": 2.206476926803589, + "learning_rate": 8.365414536563681e-05, + "loss": 1.9997, + "step": 26182 + }, + { + "epoch": 0.5818444444444445, + "grad_norm": 1.7495813369750977, + "learning_rate": 8.364969993331851e-05, + "loss": 1.6429, + "step": 26183 + }, + { + "epoch": 0.5818666666666666, + "grad_norm": 1.9488918781280518, + "learning_rate": 8.364525450100022e-05, + "loss": 2.0269, + "step": 26184 + }, + { + "epoch": 0.5818888888888889, + "grad_norm": 1.624327540397644, + "learning_rate": 8.364080906868193e-05, + "loss": 1.9028, + "step": 26185 + }, + { + "epoch": 0.5819111111111112, + "grad_norm": 1.8741378784179688, + "learning_rate": 8.363636363636364e-05, + "loss": 2.2326, + "step": 26186 + }, + { + "epoch": 0.5819333333333333, + "grad_norm": 1.7637832164764404, + "learning_rate": 8.363191820404535e-05, + "loss": 1.9232, + "step": 26187 + }, + { + "epoch": 0.5819555555555556, + "grad_norm": 1.5867350101470947, + "learning_rate": 8.362747277172706e-05, + "loss": 1.7418, + "step": 26188 + }, + { + "epoch": 0.5819777777777778, + "grad_norm": 1.8549751043319702, + "learning_rate": 8.362302733940877e-05, + "loss": 2.3625, + "step": 26189 + }, + { + "epoch": 0.582, + "grad_norm": 1.6489076614379883, + "learning_rate": 8.361858190709046e-05, + "loss": 1.7887, + "step": 26190 + }, + { + "epoch": 0.5820222222222222, + "grad_norm": 1.6363847255706787, + "learning_rate": 8.361413647477217e-05, + "loss": 1.5879, + "step": 26191 + }, + { + "epoch": 0.5820444444444445, + "grad_norm": 1.8551013469696045, + "learning_rate": 8.360969104245388e-05, + "loss": 2.251, + "step": 26192 + }, + { + "epoch": 0.5820666666666666, + "grad_norm": 1.7150312662124634, + "learning_rate": 8.360524561013558e-05, + "loss": 1.7961, + "step": 26193 + }, + { + "epoch": 0.5820888888888889, + "grad_norm": 1.5141977071762085, + "learning_rate": 8.36008001778173e-05, + "loss": 1.3297, + "step": 26194 + }, + { + "epoch": 0.5821111111111111, + "grad_norm": 1.8479887247085571, + "learning_rate": 8.359635474549901e-05, + "loss": 1.8324, + "step": 26195 + }, + { + "epoch": 0.5821333333333333, + "grad_norm": 1.1966216564178467, + "learning_rate": 8.359190931318072e-05, + "loss": 0.838, + "step": 26196 + }, + { + "epoch": 0.5821555555555555, + "grad_norm": 1.891556978225708, + "learning_rate": 8.358746388086242e-05, + "loss": 2.0078, + "step": 26197 + }, + { + "epoch": 0.5821777777777778, + "grad_norm": 1.81918203830719, + "learning_rate": 8.358301844854413e-05, + "loss": 1.7798, + "step": 26198 + }, + { + "epoch": 0.5822, + "grad_norm": 1.9150341749191284, + "learning_rate": 8.357857301622584e-05, + "loss": 1.8632, + "step": 26199 + }, + { + "epoch": 0.5822222222222222, + "grad_norm": 1.4459576606750488, + "learning_rate": 8.357412758390753e-05, + "loss": 0.9049, + "step": 26200 + }, + { + "epoch": 0.5822444444444445, + "grad_norm": 1.3595621585845947, + "learning_rate": 8.356968215158924e-05, + "loss": 2.1102, + "step": 26201 + }, + { + "epoch": 0.5822666666666667, + "grad_norm": 1.6290918588638306, + "learning_rate": 8.356523671927095e-05, + "loss": 1.3813, + "step": 26202 + }, + { + "epoch": 0.5822888888888889, + "grad_norm": 1.4016199111938477, + "learning_rate": 8.356079128695266e-05, + "loss": 2.1448, + "step": 26203 + }, + { + "epoch": 0.5823111111111111, + "grad_norm": 1.6045475006103516, + "learning_rate": 8.355634585463437e-05, + "loss": 2.1319, + "step": 26204 + }, + { + "epoch": 0.5823333333333334, + "grad_norm": 1.6283369064331055, + "learning_rate": 8.355190042231608e-05, + "loss": 2.0471, + "step": 26205 + }, + { + "epoch": 0.5823555555555555, + "grad_norm": 1.6710686683654785, + "learning_rate": 8.354745498999779e-05, + "loss": 1.8345, + "step": 26206 + }, + { + "epoch": 0.5823777777777778, + "grad_norm": 1.1778653860092163, + "learning_rate": 8.354300955767949e-05, + "loss": 1.231, + "step": 26207 + }, + { + "epoch": 0.5824, + "grad_norm": 1.6818339824676514, + "learning_rate": 8.35385641253612e-05, + "loss": 2.5075, + "step": 26208 + }, + { + "epoch": 0.5824222222222222, + "grad_norm": 1.7084901332855225, + "learning_rate": 8.35341186930429e-05, + "loss": 2.2311, + "step": 26209 + }, + { + "epoch": 0.5824444444444444, + "grad_norm": 1.6797622442245483, + "learning_rate": 8.35296732607246e-05, + "loss": 2.0646, + "step": 26210 + }, + { + "epoch": 0.5824666666666667, + "grad_norm": 1.6726146936416626, + "learning_rate": 8.352522782840631e-05, + "loss": 1.8802, + "step": 26211 + }, + { + "epoch": 0.5824888888888888, + "grad_norm": 1.711694359779358, + "learning_rate": 8.352078239608802e-05, + "loss": 2.2033, + "step": 26212 + }, + { + "epoch": 0.5825111111111111, + "grad_norm": 1.5540680885314941, + "learning_rate": 8.351633696376973e-05, + "loss": 2.0245, + "step": 26213 + }, + { + "epoch": 0.5825333333333333, + "grad_norm": 2.045577049255371, + "learning_rate": 8.351189153145144e-05, + "loss": 2.1828, + "step": 26214 + }, + { + "epoch": 0.5825555555555556, + "grad_norm": 1.543917179107666, + "learning_rate": 8.350744609913315e-05, + "loss": 1.8969, + "step": 26215 + }, + { + "epoch": 0.5825777777777777, + "grad_norm": 1.5529899597167969, + "learning_rate": 8.350300066681486e-05, + "loss": 2.1522, + "step": 26216 + }, + { + "epoch": 0.5826, + "grad_norm": 1.8591609001159668, + "learning_rate": 8.349855523449656e-05, + "loss": 2.0281, + "step": 26217 + }, + { + "epoch": 0.5826222222222223, + "grad_norm": 1.2038625478744507, + "learning_rate": 8.349410980217827e-05, + "loss": 0.4999, + "step": 26218 + }, + { + "epoch": 0.5826444444444444, + "grad_norm": 1.7365057468414307, + "learning_rate": 8.348966436985998e-05, + "loss": 2.2447, + "step": 26219 + }, + { + "epoch": 0.5826666666666667, + "grad_norm": 1.4882011413574219, + "learning_rate": 8.348521893754167e-05, + "loss": 1.5969, + "step": 26220 + }, + { + "epoch": 0.5826888888888889, + "grad_norm": 1.4881402254104614, + "learning_rate": 8.348077350522338e-05, + "loss": 1.8631, + "step": 26221 + }, + { + "epoch": 0.5827111111111111, + "grad_norm": 1.7825554609298706, + "learning_rate": 8.347632807290509e-05, + "loss": 1.9985, + "step": 26222 + }, + { + "epoch": 0.5827333333333333, + "grad_norm": 1.7529854774475098, + "learning_rate": 8.34718826405868e-05, + "loss": 1.7588, + "step": 26223 + }, + { + "epoch": 0.5827555555555556, + "grad_norm": 1.7051653861999512, + "learning_rate": 8.346743720826851e-05, + "loss": 1.8225, + "step": 26224 + }, + { + "epoch": 0.5827777777777777, + "grad_norm": 2.4538986682891846, + "learning_rate": 8.346299177595022e-05, + "loss": 1.5975, + "step": 26225 + }, + { + "epoch": 0.5828, + "grad_norm": 1.4693490266799927, + "learning_rate": 8.345854634363193e-05, + "loss": 1.5349, + "step": 26226 + }, + { + "epoch": 0.5828222222222222, + "grad_norm": 2.286280632019043, + "learning_rate": 8.345410091131362e-05, + "loss": 2.2337, + "step": 26227 + }, + { + "epoch": 0.5828444444444445, + "grad_norm": 2.219813346862793, + "learning_rate": 8.344965547899533e-05, + "loss": 1.8498, + "step": 26228 + }, + { + "epoch": 0.5828666666666666, + "grad_norm": 1.8761414289474487, + "learning_rate": 8.344521004667704e-05, + "loss": 1.7299, + "step": 26229 + }, + { + "epoch": 0.5828888888888889, + "grad_norm": 2.0295374393463135, + "learning_rate": 8.344076461435874e-05, + "loss": 2.1673, + "step": 26230 + }, + { + "epoch": 0.5829111111111112, + "grad_norm": 1.9511631727218628, + "learning_rate": 8.343631918204046e-05, + "loss": 1.7042, + "step": 26231 + }, + { + "epoch": 0.5829333333333333, + "grad_norm": 1.9228847026824951, + "learning_rate": 8.343187374972217e-05, + "loss": 2.0077, + "step": 26232 + }, + { + "epoch": 0.5829555555555556, + "grad_norm": 1.6258912086486816, + "learning_rate": 8.342742831740387e-05, + "loss": 1.3767, + "step": 26233 + }, + { + "epoch": 0.5829777777777778, + "grad_norm": 2.137866497039795, + "learning_rate": 8.342298288508558e-05, + "loss": 1.6094, + "step": 26234 + }, + { + "epoch": 0.583, + "grad_norm": 2.144888162612915, + "learning_rate": 8.341853745276729e-05, + "loss": 1.6773, + "step": 26235 + }, + { + "epoch": 0.5830222222222222, + "grad_norm": 1.9768238067626953, + "learning_rate": 8.3414092020449e-05, + "loss": 1.8659, + "step": 26236 + }, + { + "epoch": 0.5830444444444445, + "grad_norm": 1.9847415685653687, + "learning_rate": 8.34096465881307e-05, + "loss": 2.4647, + "step": 26237 + }, + { + "epoch": 0.5830666666666666, + "grad_norm": 2.0302507877349854, + "learning_rate": 8.34052011558124e-05, + "loss": 2.0827, + "step": 26238 + }, + { + "epoch": 0.5830888888888889, + "grad_norm": 2.288281202316284, + "learning_rate": 8.340075572349411e-05, + "loss": 2.3895, + "step": 26239 + }, + { + "epoch": 0.5831111111111111, + "grad_norm": 2.0385663509368896, + "learning_rate": 8.339631029117582e-05, + "loss": 1.967, + "step": 26240 + }, + { + "epoch": 0.5831333333333333, + "grad_norm": 1.8273963928222656, + "learning_rate": 8.339186485885753e-05, + "loss": 1.7718, + "step": 26241 + }, + { + "epoch": 0.5831555555555555, + "grad_norm": 1.6473088264465332, + "learning_rate": 8.338741942653924e-05, + "loss": 1.8773, + "step": 26242 + }, + { + "epoch": 0.5831777777777778, + "grad_norm": 1.4469784498214722, + "learning_rate": 8.338297399422094e-05, + "loss": 1.0654, + "step": 26243 + }, + { + "epoch": 0.5832, + "grad_norm": 1.5022889375686646, + "learning_rate": 8.337852856190265e-05, + "loss": 1.5355, + "step": 26244 + }, + { + "epoch": 0.5832222222222222, + "grad_norm": 1.4862998723983765, + "learning_rate": 8.337408312958436e-05, + "loss": 1.4346, + "step": 26245 + }, + { + "epoch": 0.5832444444444445, + "grad_norm": 1.8965848684310913, + "learning_rate": 8.336963769726607e-05, + "loss": 1.6884, + "step": 26246 + }, + { + "epoch": 0.5832666666666667, + "grad_norm": 1.659008502960205, + "learning_rate": 8.336519226494776e-05, + "loss": 1.9439, + "step": 26247 + }, + { + "epoch": 0.5832888888888889, + "grad_norm": 1.8410717248916626, + "learning_rate": 8.336074683262947e-05, + "loss": 1.9104, + "step": 26248 + }, + { + "epoch": 0.5833111111111111, + "grad_norm": 1.6476742029190063, + "learning_rate": 8.335630140031118e-05, + "loss": 1.7227, + "step": 26249 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 2.0273349285125732, + "learning_rate": 8.335185596799289e-05, + "loss": 1.697, + "step": 26250 + }, + { + "epoch": 0.5833555555555555, + "grad_norm": 1.3179007768630981, + "learning_rate": 8.33474105356746e-05, + "loss": 1.6495, + "step": 26251 + }, + { + "epoch": 0.5833777777777778, + "grad_norm": 1.3309334516525269, + "learning_rate": 8.334296510335631e-05, + "loss": 2.062, + "step": 26252 + }, + { + "epoch": 0.5834, + "grad_norm": 0.9220927953720093, + "learning_rate": 8.333851967103802e-05, + "loss": 1.149, + "step": 26253 + }, + { + "epoch": 0.5834222222222222, + "grad_norm": 1.560506820678711, + "learning_rate": 8.333407423871972e-05, + "loss": 2.3106, + "step": 26254 + }, + { + "epoch": 0.5834444444444444, + "grad_norm": 1.5376288890838623, + "learning_rate": 8.332962880640143e-05, + "loss": 1.7624, + "step": 26255 + }, + { + "epoch": 0.5834666666666667, + "grad_norm": 1.712773084640503, + "learning_rate": 8.332518337408314e-05, + "loss": 1.9538, + "step": 26256 + }, + { + "epoch": 0.5834888888888888, + "grad_norm": 1.3960744142532349, + "learning_rate": 8.332073794176483e-05, + "loss": 1.8866, + "step": 26257 + }, + { + "epoch": 0.5835111111111111, + "grad_norm": 0.9395681023597717, + "learning_rate": 8.331629250944654e-05, + "loss": 0.9285, + "step": 26258 + }, + { + "epoch": 0.5835333333333333, + "grad_norm": 1.9319789409637451, + "learning_rate": 8.331184707712825e-05, + "loss": 2.5569, + "step": 26259 + }, + { + "epoch": 0.5835555555555556, + "grad_norm": 1.4398587942123413, + "learning_rate": 8.330740164480996e-05, + "loss": 2.1114, + "step": 26260 + }, + { + "epoch": 0.5835777777777778, + "grad_norm": 1.695851445198059, + "learning_rate": 8.330295621249167e-05, + "loss": 2.5833, + "step": 26261 + }, + { + "epoch": 0.5836, + "grad_norm": 1.396723747253418, + "learning_rate": 8.329851078017338e-05, + "loss": 1.66, + "step": 26262 + }, + { + "epoch": 0.5836222222222223, + "grad_norm": 1.5797001123428345, + "learning_rate": 8.329406534785509e-05, + "loss": 2.074, + "step": 26263 + }, + { + "epoch": 0.5836444444444444, + "grad_norm": 1.6313436031341553, + "learning_rate": 8.328961991553679e-05, + "loss": 2.5088, + "step": 26264 + }, + { + "epoch": 0.5836666666666667, + "grad_norm": 1.7688790559768677, + "learning_rate": 8.32851744832185e-05, + "loss": 1.7651, + "step": 26265 + }, + { + "epoch": 0.5836888888888889, + "grad_norm": 1.5532402992248535, + "learning_rate": 8.32807290509002e-05, + "loss": 2.171, + "step": 26266 + }, + { + "epoch": 0.5837111111111111, + "grad_norm": 1.6320881843566895, + "learning_rate": 8.32762836185819e-05, + "loss": 2.4216, + "step": 26267 + }, + { + "epoch": 0.5837333333333333, + "grad_norm": 1.4460666179656982, + "learning_rate": 8.327183818626362e-05, + "loss": 1.8043, + "step": 26268 + }, + { + "epoch": 0.5837555555555556, + "grad_norm": 1.8040688037872314, + "learning_rate": 8.326739275394533e-05, + "loss": 1.9625, + "step": 26269 + }, + { + "epoch": 0.5837777777777777, + "grad_norm": 1.0893901586532593, + "learning_rate": 8.326294732162703e-05, + "loss": 0.9989, + "step": 26270 + }, + { + "epoch": 0.5838, + "grad_norm": 1.9131560325622559, + "learning_rate": 8.325850188930874e-05, + "loss": 2.5563, + "step": 26271 + }, + { + "epoch": 0.5838222222222222, + "grad_norm": 1.5659085512161255, + "learning_rate": 8.325405645699045e-05, + "loss": 2.1676, + "step": 26272 + }, + { + "epoch": 0.5838444444444445, + "grad_norm": 1.5888844728469849, + "learning_rate": 8.324961102467216e-05, + "loss": 2.1525, + "step": 26273 + }, + { + "epoch": 0.5838666666666666, + "grad_norm": 1.691839575767517, + "learning_rate": 8.324516559235385e-05, + "loss": 1.9559, + "step": 26274 + }, + { + "epoch": 0.5838888888888889, + "grad_norm": 1.625472903251648, + "learning_rate": 8.324072016003556e-05, + "loss": 2.118, + "step": 26275 + }, + { + "epoch": 0.5839111111111112, + "grad_norm": 1.6146228313446045, + "learning_rate": 8.323627472771727e-05, + "loss": 1.9359, + "step": 26276 + }, + { + "epoch": 0.5839333333333333, + "grad_norm": 1.5743327140808105, + "learning_rate": 8.323182929539898e-05, + "loss": 1.9866, + "step": 26277 + }, + { + "epoch": 0.5839555555555556, + "grad_norm": 1.5054017305374146, + "learning_rate": 8.32273838630807e-05, + "loss": 1.2454, + "step": 26278 + }, + { + "epoch": 0.5839777777777778, + "grad_norm": 1.793137788772583, + "learning_rate": 8.32229384307624e-05, + "loss": 1.9548, + "step": 26279 + }, + { + "epoch": 0.584, + "grad_norm": 1.9058033227920532, + "learning_rate": 8.32184929984441e-05, + "loss": 2.566, + "step": 26280 + }, + { + "epoch": 0.5840222222222222, + "grad_norm": 1.8583147525787354, + "learning_rate": 8.321404756612581e-05, + "loss": 2.0726, + "step": 26281 + }, + { + "epoch": 0.5840444444444445, + "grad_norm": 1.6046953201293945, + "learning_rate": 8.320960213380752e-05, + "loss": 2.0069, + "step": 26282 + }, + { + "epoch": 0.5840666666666666, + "grad_norm": 1.7175462245941162, + "learning_rate": 8.320515670148923e-05, + "loss": 1.9083, + "step": 26283 + }, + { + "epoch": 0.5840888888888889, + "grad_norm": 1.4772974252700806, + "learning_rate": 8.320071126917092e-05, + "loss": 1.239, + "step": 26284 + }, + { + "epoch": 0.5841111111111111, + "grad_norm": 1.5468647480010986, + "learning_rate": 8.319626583685263e-05, + "loss": 1.6306, + "step": 26285 + }, + { + "epoch": 0.5841333333333333, + "grad_norm": 1.430153489112854, + "learning_rate": 8.319182040453434e-05, + "loss": 1.7573, + "step": 26286 + }, + { + "epoch": 0.5841555555555555, + "grad_norm": 2.067629098892212, + "learning_rate": 8.318737497221605e-05, + "loss": 2.143, + "step": 26287 + }, + { + "epoch": 0.5841777777777778, + "grad_norm": 1.7656430006027222, + "learning_rate": 8.318292953989776e-05, + "loss": 2.0896, + "step": 26288 + }, + { + "epoch": 0.5842, + "grad_norm": 1.560054063796997, + "learning_rate": 8.317848410757947e-05, + "loss": 1.7963, + "step": 26289 + }, + { + "epoch": 0.5842222222222222, + "grad_norm": 1.8045637607574463, + "learning_rate": 8.317403867526117e-05, + "loss": 1.7165, + "step": 26290 + }, + { + "epoch": 0.5842444444444445, + "grad_norm": 1.740210771560669, + "learning_rate": 8.316959324294288e-05, + "loss": 2.3107, + "step": 26291 + }, + { + "epoch": 0.5842666666666667, + "grad_norm": 1.606910228729248, + "learning_rate": 8.316514781062459e-05, + "loss": 1.9242, + "step": 26292 + }, + { + "epoch": 0.5842888888888889, + "grad_norm": 1.793172836303711, + "learning_rate": 8.31607023783063e-05, + "loss": 1.7542, + "step": 26293 + }, + { + "epoch": 0.5843111111111111, + "grad_norm": 1.8749566078186035, + "learning_rate": 8.315625694598799e-05, + "loss": 2.1248, + "step": 26294 + }, + { + "epoch": 0.5843333333333334, + "grad_norm": 2.0712335109710693, + "learning_rate": 8.31518115136697e-05, + "loss": 1.9189, + "step": 26295 + }, + { + "epoch": 0.5843555555555555, + "grad_norm": 1.778473973274231, + "learning_rate": 8.314736608135141e-05, + "loss": 1.6249, + "step": 26296 + }, + { + "epoch": 0.5843777777777778, + "grad_norm": 1.758664846420288, + "learning_rate": 8.314292064903312e-05, + "loss": 1.7072, + "step": 26297 + }, + { + "epoch": 0.5844, + "grad_norm": 1.531131386756897, + "learning_rate": 8.313847521671483e-05, + "loss": 1.216, + "step": 26298 + }, + { + "epoch": 0.5844222222222222, + "grad_norm": 2.2430763244628906, + "learning_rate": 8.313402978439654e-05, + "loss": 2.2344, + "step": 26299 + }, + { + "epoch": 0.5844444444444444, + "grad_norm": 1.7089288234710693, + "learning_rate": 8.312958435207825e-05, + "loss": 1.3861, + "step": 26300 + }, + { + "epoch": 0.5844666666666667, + "grad_norm": 1.5989187955856323, + "learning_rate": 8.312513891975995e-05, + "loss": 2.6309, + "step": 26301 + }, + { + "epoch": 0.5844888888888888, + "grad_norm": 1.3423744440078735, + "learning_rate": 8.312069348744166e-05, + "loss": 1.9436, + "step": 26302 + }, + { + "epoch": 0.5845111111111111, + "grad_norm": 1.9386184215545654, + "learning_rate": 8.311624805512337e-05, + "loss": 1.9313, + "step": 26303 + }, + { + "epoch": 0.5845333333333333, + "grad_norm": 1.4838286638259888, + "learning_rate": 8.311180262280506e-05, + "loss": 1.6181, + "step": 26304 + }, + { + "epoch": 0.5845555555555556, + "grad_norm": 1.5339857339859009, + "learning_rate": 8.310735719048679e-05, + "loss": 1.8669, + "step": 26305 + }, + { + "epoch": 0.5845777777777778, + "grad_norm": 1.5780014991760254, + "learning_rate": 8.31029117581685e-05, + "loss": 1.8785, + "step": 26306 + }, + { + "epoch": 0.5846, + "grad_norm": 2.025686502456665, + "learning_rate": 8.309846632585019e-05, + "loss": 1.478, + "step": 26307 + }, + { + "epoch": 0.5846222222222223, + "grad_norm": 1.8832579851150513, + "learning_rate": 8.30940208935319e-05, + "loss": 2.0248, + "step": 26308 + }, + { + "epoch": 0.5846444444444444, + "grad_norm": 1.3615474700927734, + "learning_rate": 8.308957546121361e-05, + "loss": 1.3794, + "step": 26309 + }, + { + "epoch": 0.5846666666666667, + "grad_norm": 1.5770617723464966, + "learning_rate": 8.308513002889532e-05, + "loss": 1.5432, + "step": 26310 + }, + { + "epoch": 0.5846888888888889, + "grad_norm": 1.994457721710205, + "learning_rate": 8.308068459657702e-05, + "loss": 2.5888, + "step": 26311 + }, + { + "epoch": 0.5847111111111111, + "grad_norm": 1.5343011617660522, + "learning_rate": 8.307623916425873e-05, + "loss": 1.5649, + "step": 26312 + }, + { + "epoch": 0.5847333333333333, + "grad_norm": 1.5088410377502441, + "learning_rate": 8.307179373194044e-05, + "loss": 1.9576, + "step": 26313 + }, + { + "epoch": 0.5847555555555556, + "grad_norm": 1.646855354309082, + "learning_rate": 8.306734829962214e-05, + "loss": 1.722, + "step": 26314 + }, + { + "epoch": 0.5847777777777777, + "grad_norm": 1.5722808837890625, + "learning_rate": 8.306290286730385e-05, + "loss": 1.6116, + "step": 26315 + }, + { + "epoch": 0.5848, + "grad_norm": 1.5221266746520996, + "learning_rate": 8.305845743498556e-05, + "loss": 1.7647, + "step": 26316 + }, + { + "epoch": 0.5848222222222222, + "grad_norm": 1.6071947813034058, + "learning_rate": 8.305401200266726e-05, + "loss": 2.1635, + "step": 26317 + }, + { + "epoch": 0.5848444444444444, + "grad_norm": 1.9324707984924316, + "learning_rate": 8.304956657034897e-05, + "loss": 2.3949, + "step": 26318 + }, + { + "epoch": 0.5848666666666666, + "grad_norm": 1.6427240371704102, + "learning_rate": 8.304512113803068e-05, + "loss": 1.7098, + "step": 26319 + }, + { + "epoch": 0.5848888888888889, + "grad_norm": 1.8649818897247314, + "learning_rate": 8.304067570571239e-05, + "loss": 2.0969, + "step": 26320 + }, + { + "epoch": 0.5849111111111112, + "grad_norm": 1.9790477752685547, + "learning_rate": 8.303623027339409e-05, + "loss": 1.9604, + "step": 26321 + }, + { + "epoch": 0.5849333333333333, + "grad_norm": 1.46359384059906, + "learning_rate": 8.30317848410758e-05, + "loss": 1.4153, + "step": 26322 + }, + { + "epoch": 0.5849555555555556, + "grad_norm": 1.8081319332122803, + "learning_rate": 8.30273394087575e-05, + "loss": 1.932, + "step": 26323 + }, + { + "epoch": 0.5849777777777778, + "grad_norm": 1.7537386417388916, + "learning_rate": 8.302289397643921e-05, + "loss": 2.3716, + "step": 26324 + }, + { + "epoch": 0.585, + "grad_norm": 1.5806208848953247, + "learning_rate": 8.301844854412092e-05, + "loss": 1.8761, + "step": 26325 + }, + { + "epoch": 0.5850222222222222, + "grad_norm": 1.8859691619873047, + "learning_rate": 8.301400311180263e-05, + "loss": 1.997, + "step": 26326 + }, + { + "epoch": 0.5850444444444445, + "grad_norm": 1.6441972255706787, + "learning_rate": 8.300955767948433e-05, + "loss": 1.5677, + "step": 26327 + }, + { + "epoch": 0.5850666666666666, + "grad_norm": 1.6208170652389526, + "learning_rate": 8.300511224716604e-05, + "loss": 1.8993, + "step": 26328 + }, + { + "epoch": 0.5850888888888889, + "grad_norm": 1.696751356124878, + "learning_rate": 8.300066681484775e-05, + "loss": 2.3392, + "step": 26329 + }, + { + "epoch": 0.5851111111111111, + "grad_norm": 1.7575993537902832, + "learning_rate": 8.299622138252946e-05, + "loss": 2.1465, + "step": 26330 + }, + { + "epoch": 0.5851333333333333, + "grad_norm": 1.0533134937286377, + "learning_rate": 8.299177595021115e-05, + "loss": 0.8847, + "step": 26331 + }, + { + "epoch": 0.5851555555555555, + "grad_norm": 1.5377509593963623, + "learning_rate": 8.298733051789286e-05, + "loss": 1.452, + "step": 26332 + }, + { + "epoch": 0.5851777777777778, + "grad_norm": 1.8691941499710083, + "learning_rate": 8.298288508557457e-05, + "loss": 1.5431, + "step": 26333 + }, + { + "epoch": 0.5852, + "grad_norm": 1.8643362522125244, + "learning_rate": 8.297843965325628e-05, + "loss": 1.906, + "step": 26334 + }, + { + "epoch": 0.5852222222222222, + "grad_norm": 1.832690715789795, + "learning_rate": 8.297399422093799e-05, + "loss": 1.5813, + "step": 26335 + }, + { + "epoch": 0.5852444444444445, + "grad_norm": 1.643566608428955, + "learning_rate": 8.29695487886197e-05, + "loss": 1.9578, + "step": 26336 + }, + { + "epoch": 0.5852666666666667, + "grad_norm": 1.567914605140686, + "learning_rate": 8.29651033563014e-05, + "loss": 1.4821, + "step": 26337 + }, + { + "epoch": 0.5852888888888889, + "grad_norm": 1.438464879989624, + "learning_rate": 8.296065792398311e-05, + "loss": 1.47, + "step": 26338 + }, + { + "epoch": 0.5853111111111111, + "grad_norm": 1.6269550323486328, + "learning_rate": 8.295621249166482e-05, + "loss": 2.0255, + "step": 26339 + }, + { + "epoch": 0.5853333333333334, + "grad_norm": 1.3445321321487427, + "learning_rate": 8.295176705934653e-05, + "loss": 1.2979, + "step": 26340 + }, + { + "epoch": 0.5853555555555555, + "grad_norm": 1.9354634284973145, + "learning_rate": 8.294732162702822e-05, + "loss": 2.2849, + "step": 26341 + }, + { + "epoch": 0.5853777777777778, + "grad_norm": 1.5181694030761719, + "learning_rate": 8.294287619470995e-05, + "loss": 1.4888, + "step": 26342 + }, + { + "epoch": 0.5854, + "grad_norm": 1.9285415410995483, + "learning_rate": 8.293843076239166e-05, + "loss": 1.7749, + "step": 26343 + }, + { + "epoch": 0.5854222222222222, + "grad_norm": 2.097766160964966, + "learning_rate": 8.293398533007335e-05, + "loss": 2.3797, + "step": 26344 + }, + { + "epoch": 0.5854444444444444, + "grad_norm": 1.821632742881775, + "learning_rate": 8.292953989775506e-05, + "loss": 1.6612, + "step": 26345 + }, + { + "epoch": 0.5854666666666667, + "grad_norm": 1.9774261713027954, + "learning_rate": 8.292509446543677e-05, + "loss": 1.6549, + "step": 26346 + }, + { + "epoch": 0.5854888888888888, + "grad_norm": 1.726208209991455, + "learning_rate": 8.292064903311847e-05, + "loss": 2.0537, + "step": 26347 + }, + { + "epoch": 0.5855111111111111, + "grad_norm": 1.1972006559371948, + "learning_rate": 8.291620360080018e-05, + "loss": 0.6462, + "step": 26348 + }, + { + "epoch": 0.5855333333333334, + "grad_norm": 0.35262811183929443, + "learning_rate": 8.291175816848189e-05, + "loss": 0.0556, + "step": 26349 + }, + { + "epoch": 0.5855555555555556, + "grad_norm": 1.5765961408615112, + "learning_rate": 8.29073127361636e-05, + "loss": 0.817, + "step": 26350 + }, + { + "epoch": 0.5855777777777778, + "grad_norm": 1.6506803035736084, + "learning_rate": 8.29028673038453e-05, + "loss": 2.6258, + "step": 26351 + }, + { + "epoch": 0.5856, + "grad_norm": 1.4478061199188232, + "learning_rate": 8.289842187152702e-05, + "loss": 1.9368, + "step": 26352 + }, + { + "epoch": 0.5856222222222223, + "grad_norm": 1.4829243421554565, + "learning_rate": 8.289397643920873e-05, + "loss": 1.2137, + "step": 26353 + }, + { + "epoch": 0.5856444444444444, + "grad_norm": 1.551202654838562, + "learning_rate": 8.288953100689042e-05, + "loss": 2.54, + "step": 26354 + }, + { + "epoch": 0.5856666666666667, + "grad_norm": 1.680566430091858, + "learning_rate": 8.288508557457213e-05, + "loss": 2.3305, + "step": 26355 + }, + { + "epoch": 0.5856888888888889, + "grad_norm": 1.6335614919662476, + "learning_rate": 8.288064014225384e-05, + "loss": 1.6486, + "step": 26356 + }, + { + "epoch": 0.5857111111111111, + "grad_norm": 1.60475492477417, + "learning_rate": 8.287619470993555e-05, + "loss": 2.0379, + "step": 26357 + }, + { + "epoch": 0.5857333333333333, + "grad_norm": 1.900649905204773, + "learning_rate": 8.287174927761725e-05, + "loss": 2.5529, + "step": 26358 + }, + { + "epoch": 0.5857555555555556, + "grad_norm": 1.625902533531189, + "learning_rate": 8.286730384529896e-05, + "loss": 2.1112, + "step": 26359 + }, + { + "epoch": 0.5857777777777777, + "grad_norm": 1.5074518918991089, + "learning_rate": 8.286285841298067e-05, + "loss": 2.1067, + "step": 26360 + }, + { + "epoch": 0.5858, + "grad_norm": 1.585428237915039, + "learning_rate": 8.285841298066237e-05, + "loss": 1.8526, + "step": 26361 + }, + { + "epoch": 0.5858222222222222, + "grad_norm": 1.7944355010986328, + "learning_rate": 8.285396754834408e-05, + "loss": 2.4323, + "step": 26362 + }, + { + "epoch": 0.5858444444444444, + "grad_norm": 1.4390625953674316, + "learning_rate": 8.28495221160258e-05, + "loss": 1.7495, + "step": 26363 + }, + { + "epoch": 0.5858666666666666, + "grad_norm": 1.7868386507034302, + "learning_rate": 8.284507668370749e-05, + "loss": 1.6837, + "step": 26364 + }, + { + "epoch": 0.5858888888888889, + "grad_norm": 1.818138837814331, + "learning_rate": 8.28406312513892e-05, + "loss": 1.7053, + "step": 26365 + }, + { + "epoch": 0.5859111111111112, + "grad_norm": 2.056191921234131, + "learning_rate": 8.283618581907091e-05, + "loss": 2.0468, + "step": 26366 + }, + { + "epoch": 0.5859333333333333, + "grad_norm": 1.6591705083847046, + "learning_rate": 8.283174038675262e-05, + "loss": 2.3547, + "step": 26367 + }, + { + "epoch": 0.5859555555555556, + "grad_norm": 1.5027168989181519, + "learning_rate": 8.282729495443432e-05, + "loss": 1.9793, + "step": 26368 + }, + { + "epoch": 0.5859777777777778, + "grad_norm": 1.5128077268600464, + "learning_rate": 8.282284952211602e-05, + "loss": 1.5506, + "step": 26369 + }, + { + "epoch": 0.586, + "grad_norm": 1.9915423393249512, + "learning_rate": 8.281840408979773e-05, + "loss": 2.1444, + "step": 26370 + }, + { + "epoch": 0.5860222222222222, + "grad_norm": 1.9194926023483276, + "learning_rate": 8.281395865747944e-05, + "loss": 2.1724, + "step": 26371 + }, + { + "epoch": 0.5860444444444445, + "grad_norm": 2.282923936843872, + "learning_rate": 8.280951322516115e-05, + "loss": 2.2116, + "step": 26372 + }, + { + "epoch": 0.5860666666666666, + "grad_norm": 1.53107488155365, + "learning_rate": 8.280506779284286e-05, + "loss": 2.062, + "step": 26373 + }, + { + "epoch": 0.5860888888888889, + "grad_norm": 1.673590898513794, + "learning_rate": 8.280062236052456e-05, + "loss": 1.5491, + "step": 26374 + }, + { + "epoch": 0.5861111111111111, + "grad_norm": 2.9730846881866455, + "learning_rate": 8.279617692820627e-05, + "loss": 1.2786, + "step": 26375 + }, + { + "epoch": 0.5861333333333333, + "grad_norm": 1.4333029985427856, + "learning_rate": 8.279173149588798e-05, + "loss": 1.6072, + "step": 26376 + }, + { + "epoch": 0.5861555555555555, + "grad_norm": 1.7236535549163818, + "learning_rate": 8.278728606356969e-05, + "loss": 1.5908, + "step": 26377 + }, + { + "epoch": 0.5861777777777778, + "grad_norm": 1.8024083375930786, + "learning_rate": 8.278284063125138e-05, + "loss": 2.0472, + "step": 26378 + }, + { + "epoch": 0.5862, + "grad_norm": 1.3262346982955933, + "learning_rate": 8.277839519893311e-05, + "loss": 1.7635, + "step": 26379 + }, + { + "epoch": 0.5862222222222222, + "grad_norm": 1.9368542432785034, + "learning_rate": 8.277394976661482e-05, + "loss": 2.1713, + "step": 26380 + }, + { + "epoch": 0.5862444444444445, + "grad_norm": 1.776813268661499, + "learning_rate": 8.276950433429651e-05, + "loss": 2.2592, + "step": 26381 + }, + { + "epoch": 0.5862666666666667, + "grad_norm": 2.261319160461426, + "learning_rate": 8.276505890197822e-05, + "loss": 2.4149, + "step": 26382 + }, + { + "epoch": 0.5862888888888889, + "grad_norm": 1.633979320526123, + "learning_rate": 8.276061346965993e-05, + "loss": 1.6741, + "step": 26383 + }, + { + "epoch": 0.5863111111111111, + "grad_norm": 1.6456549167633057, + "learning_rate": 8.275616803734163e-05, + "loss": 2.0613, + "step": 26384 + }, + { + "epoch": 0.5863333333333334, + "grad_norm": 1.6004148721694946, + "learning_rate": 8.275172260502334e-05, + "loss": 1.8647, + "step": 26385 + }, + { + "epoch": 0.5863555555555555, + "grad_norm": 1.6349995136260986, + "learning_rate": 8.274727717270505e-05, + "loss": 1.946, + "step": 26386 + }, + { + "epoch": 0.5863777777777778, + "grad_norm": 1.856724500656128, + "learning_rate": 8.274283174038676e-05, + "loss": 2.0633, + "step": 26387 + }, + { + "epoch": 0.5864, + "grad_norm": 1.818117618560791, + "learning_rate": 8.273838630806847e-05, + "loss": 2.2893, + "step": 26388 + }, + { + "epoch": 0.5864222222222222, + "grad_norm": 1.3961563110351562, + "learning_rate": 8.273394087575018e-05, + "loss": 1.608, + "step": 26389 + }, + { + "epoch": 0.5864444444444444, + "grad_norm": 1.90252685546875, + "learning_rate": 8.272949544343189e-05, + "loss": 2.1006, + "step": 26390 + }, + { + "epoch": 0.5864666666666667, + "grad_norm": 1.7041651010513306, + "learning_rate": 8.272505001111358e-05, + "loss": 1.7053, + "step": 26391 + }, + { + "epoch": 0.5864888888888888, + "grad_norm": 1.704955816268921, + "learning_rate": 8.272060457879529e-05, + "loss": 1.8423, + "step": 26392 + }, + { + "epoch": 0.5865111111111111, + "grad_norm": 2.031508445739746, + "learning_rate": 8.2716159146477e-05, + "loss": 1.7539, + "step": 26393 + }, + { + "epoch": 0.5865333333333334, + "grad_norm": 2.1782689094543457, + "learning_rate": 8.27117137141587e-05, + "loss": 1.8534, + "step": 26394 + }, + { + "epoch": 0.5865555555555556, + "grad_norm": 1.6237221956253052, + "learning_rate": 8.270726828184041e-05, + "loss": 1.8039, + "step": 26395 + }, + { + "epoch": 0.5865777777777778, + "grad_norm": 1.854526162147522, + "learning_rate": 8.270282284952212e-05, + "loss": 1.671, + "step": 26396 + }, + { + "epoch": 0.5866, + "grad_norm": 1.7956544160842896, + "learning_rate": 8.269837741720383e-05, + "loss": 1.6, + "step": 26397 + }, + { + "epoch": 0.5866222222222223, + "grad_norm": 2.6054320335388184, + "learning_rate": 8.269393198488554e-05, + "loss": 1.3538, + "step": 26398 + }, + { + "epoch": 0.5866444444444444, + "grad_norm": 1.5763986110687256, + "learning_rate": 8.268948655256725e-05, + "loss": 1.6026, + "step": 26399 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 1.8800048828125, + "learning_rate": 8.268504112024896e-05, + "loss": 1.4492, + "step": 26400 + }, + { + "epoch": 0.5866888888888889, + "grad_norm": 1.9422786235809326, + "learning_rate": 8.268059568793065e-05, + "loss": 2.3528, + "step": 26401 + }, + { + "epoch": 0.5867111111111111, + "grad_norm": 1.5031019449234009, + "learning_rate": 8.267615025561236e-05, + "loss": 2.304, + "step": 26402 + }, + { + "epoch": 0.5867333333333333, + "grad_norm": 1.8519642353057861, + "learning_rate": 8.267170482329407e-05, + "loss": 2.9398, + "step": 26403 + }, + { + "epoch": 0.5867555555555556, + "grad_norm": 1.6615465879440308, + "learning_rate": 8.266725939097577e-05, + "loss": 2.2684, + "step": 26404 + }, + { + "epoch": 0.5867777777777777, + "grad_norm": 1.7758432626724243, + "learning_rate": 8.266281395865748e-05, + "loss": 2.2158, + "step": 26405 + }, + { + "epoch": 0.5868, + "grad_norm": 1.4606879949569702, + "learning_rate": 8.265836852633919e-05, + "loss": 1.6692, + "step": 26406 + }, + { + "epoch": 0.5868222222222222, + "grad_norm": 1.423619270324707, + "learning_rate": 8.26539230940209e-05, + "loss": 2.2713, + "step": 26407 + }, + { + "epoch": 0.5868444444444444, + "grad_norm": 1.7291810512542725, + "learning_rate": 8.26494776617026e-05, + "loss": 2.4045, + "step": 26408 + }, + { + "epoch": 0.5868666666666666, + "grad_norm": 1.4429683685302734, + "learning_rate": 8.264503222938431e-05, + "loss": 1.9806, + "step": 26409 + }, + { + "epoch": 0.5868888888888889, + "grad_norm": 1.526571273803711, + "learning_rate": 8.264058679706602e-05, + "loss": 2.2861, + "step": 26410 + }, + { + "epoch": 0.5869111111111112, + "grad_norm": 1.5247364044189453, + "learning_rate": 8.263614136474772e-05, + "loss": 1.6332, + "step": 26411 + }, + { + "epoch": 0.5869333333333333, + "grad_norm": 1.7640910148620605, + "learning_rate": 8.263169593242943e-05, + "loss": 2.6262, + "step": 26412 + }, + { + "epoch": 0.5869555555555556, + "grad_norm": 1.8370826244354248, + "learning_rate": 8.262725050011114e-05, + "loss": 1.9565, + "step": 26413 + }, + { + "epoch": 0.5869777777777778, + "grad_norm": 1.6487253904342651, + "learning_rate": 8.262280506779285e-05, + "loss": 2.071, + "step": 26414 + }, + { + "epoch": 0.587, + "grad_norm": 1.5205276012420654, + "learning_rate": 8.261835963547455e-05, + "loss": 1.8026, + "step": 26415 + }, + { + "epoch": 0.5870222222222222, + "grad_norm": 1.7748128175735474, + "learning_rate": 8.261391420315627e-05, + "loss": 1.8529, + "step": 26416 + }, + { + "epoch": 0.5870444444444445, + "grad_norm": 1.5548124313354492, + "learning_rate": 8.260946877083798e-05, + "loss": 1.9147, + "step": 26417 + }, + { + "epoch": 0.5870666666666666, + "grad_norm": 1.6629937887191772, + "learning_rate": 8.260502333851967e-05, + "loss": 2.1774, + "step": 26418 + }, + { + "epoch": 0.5870888888888889, + "grad_norm": 1.7149839401245117, + "learning_rate": 8.260057790620138e-05, + "loss": 2.023, + "step": 26419 + }, + { + "epoch": 0.5871111111111111, + "grad_norm": 1.7205604314804077, + "learning_rate": 8.25961324738831e-05, + "loss": 2.1347, + "step": 26420 + }, + { + "epoch": 0.5871333333333333, + "grad_norm": 1.8264589309692383, + "learning_rate": 8.259168704156479e-05, + "loss": 2.0622, + "step": 26421 + }, + { + "epoch": 0.5871555555555555, + "grad_norm": 1.5748111009597778, + "learning_rate": 8.25872416092465e-05, + "loss": 1.7845, + "step": 26422 + }, + { + "epoch": 0.5871777777777778, + "grad_norm": 2.0303964614868164, + "learning_rate": 8.258279617692821e-05, + "loss": 2.1154, + "step": 26423 + }, + { + "epoch": 0.5872, + "grad_norm": 1.772826910018921, + "learning_rate": 8.257835074460992e-05, + "loss": 1.9938, + "step": 26424 + }, + { + "epoch": 0.5872222222222222, + "grad_norm": 1.7206312417984009, + "learning_rate": 8.257390531229163e-05, + "loss": 2.008, + "step": 26425 + }, + { + "epoch": 0.5872444444444445, + "grad_norm": 1.6457266807556152, + "learning_rate": 8.256945987997334e-05, + "loss": 2.0749, + "step": 26426 + }, + { + "epoch": 0.5872666666666667, + "grad_norm": 1.5992521047592163, + "learning_rate": 8.256501444765505e-05, + "loss": 2.2439, + "step": 26427 + }, + { + "epoch": 0.5872888888888889, + "grad_norm": 1.3555350303649902, + "learning_rate": 8.256056901533674e-05, + "loss": 1.2614, + "step": 26428 + }, + { + "epoch": 0.5873111111111111, + "grad_norm": 1.5201590061187744, + "learning_rate": 8.255612358301845e-05, + "loss": 1.8837, + "step": 26429 + }, + { + "epoch": 0.5873333333333334, + "grad_norm": 1.8117382526397705, + "learning_rate": 8.255167815070016e-05, + "loss": 2.3655, + "step": 26430 + }, + { + "epoch": 0.5873555555555555, + "grad_norm": 1.8975117206573486, + "learning_rate": 8.254723271838186e-05, + "loss": 2.1152, + "step": 26431 + }, + { + "epoch": 0.5873777777777778, + "grad_norm": 1.8291747570037842, + "learning_rate": 8.254278728606357e-05, + "loss": 1.8039, + "step": 26432 + }, + { + "epoch": 0.5874, + "grad_norm": 1.5239486694335938, + "learning_rate": 8.253834185374528e-05, + "loss": 1.3966, + "step": 26433 + }, + { + "epoch": 0.5874222222222222, + "grad_norm": 1.609764814376831, + "learning_rate": 8.253389642142699e-05, + "loss": 1.8746, + "step": 26434 + }, + { + "epoch": 0.5874444444444444, + "grad_norm": 1.4939262866973877, + "learning_rate": 8.25294509891087e-05, + "loss": 1.4485, + "step": 26435 + }, + { + "epoch": 0.5874666666666667, + "grad_norm": 1.7408115863800049, + "learning_rate": 8.25250055567904e-05, + "loss": 1.9704, + "step": 26436 + }, + { + "epoch": 0.5874888888888888, + "grad_norm": 1.6214052438735962, + "learning_rate": 8.252056012447212e-05, + "loss": 1.5611, + "step": 26437 + }, + { + "epoch": 0.5875111111111111, + "grad_norm": 1.5068247318267822, + "learning_rate": 8.251611469215381e-05, + "loss": 1.6896, + "step": 26438 + }, + { + "epoch": 0.5875333333333334, + "grad_norm": 1.753875494003296, + "learning_rate": 8.251166925983552e-05, + "loss": 1.7141, + "step": 26439 + }, + { + "epoch": 0.5875555555555556, + "grad_norm": 1.8102775812149048, + "learning_rate": 8.250722382751723e-05, + "loss": 1.956, + "step": 26440 + }, + { + "epoch": 0.5875777777777778, + "grad_norm": 1.626371145248413, + "learning_rate": 8.250277839519893e-05, + "loss": 1.7825, + "step": 26441 + }, + { + "epoch": 0.5876, + "grad_norm": 1.577762484550476, + "learning_rate": 8.249833296288064e-05, + "loss": 1.7609, + "step": 26442 + }, + { + "epoch": 0.5876222222222223, + "grad_norm": 1.9608583450317383, + "learning_rate": 8.249388753056235e-05, + "loss": 2.1549, + "step": 26443 + }, + { + "epoch": 0.5876444444444444, + "grad_norm": 1.8894906044006348, + "learning_rate": 8.248944209824406e-05, + "loss": 1.9205, + "step": 26444 + }, + { + "epoch": 0.5876666666666667, + "grad_norm": 1.5059431791305542, + "learning_rate": 8.248499666592577e-05, + "loss": 1.4438, + "step": 26445 + }, + { + "epoch": 0.5876888888888889, + "grad_norm": 1.8880411386489868, + "learning_rate": 8.248055123360748e-05, + "loss": 1.9452, + "step": 26446 + }, + { + "epoch": 0.5877111111111111, + "grad_norm": 1.5395166873931885, + "learning_rate": 8.247610580128919e-05, + "loss": 1.8412, + "step": 26447 + }, + { + "epoch": 0.5877333333333333, + "grad_norm": 1.722232460975647, + "learning_rate": 8.247166036897088e-05, + "loss": 1.7644, + "step": 26448 + }, + { + "epoch": 0.5877555555555556, + "grad_norm": 1.1945700645446777, + "learning_rate": 8.246721493665259e-05, + "loss": 0.5181, + "step": 26449 + }, + { + "epoch": 0.5877777777777777, + "grad_norm": 1.5664265155792236, + "learning_rate": 8.24627695043343e-05, + "loss": 0.8745, + "step": 26450 + }, + { + "epoch": 0.5878, + "grad_norm": 1.278299331665039, + "learning_rate": 8.2458324072016e-05, + "loss": 1.6046, + "step": 26451 + }, + { + "epoch": 0.5878222222222222, + "grad_norm": 1.794995665550232, + "learning_rate": 8.24538786396977e-05, + "loss": 2.6412, + "step": 26452 + }, + { + "epoch": 0.5878444444444444, + "grad_norm": 1.5944641828536987, + "learning_rate": 8.244943320737943e-05, + "loss": 2.638, + "step": 26453 + }, + { + "epoch": 0.5878666666666666, + "grad_norm": 1.603827953338623, + "learning_rate": 8.244498777506114e-05, + "loss": 1.2712, + "step": 26454 + }, + { + "epoch": 0.5878888888888889, + "grad_norm": 1.6544184684753418, + "learning_rate": 8.244054234274284e-05, + "loss": 1.7048, + "step": 26455 + }, + { + "epoch": 0.5879111111111112, + "grad_norm": 1.5918264389038086, + "learning_rate": 8.243609691042454e-05, + "loss": 2.0802, + "step": 26456 + }, + { + "epoch": 0.5879333333333333, + "grad_norm": 1.6266483068466187, + "learning_rate": 8.243165147810625e-05, + "loss": 1.6888, + "step": 26457 + }, + { + "epoch": 0.5879555555555556, + "grad_norm": 1.4718546867370605, + "learning_rate": 8.242720604578795e-05, + "loss": 1.9847, + "step": 26458 + }, + { + "epoch": 0.5879777777777778, + "grad_norm": 1.6215400695800781, + "learning_rate": 8.242276061346966e-05, + "loss": 2.5943, + "step": 26459 + }, + { + "epoch": 0.588, + "grad_norm": 1.3390930891036987, + "learning_rate": 8.241831518115137e-05, + "loss": 0.9272, + "step": 26460 + }, + { + "epoch": 0.5880222222222222, + "grad_norm": 1.5605331659317017, + "learning_rate": 8.241386974883307e-05, + "loss": 2.1027, + "step": 26461 + }, + { + "epoch": 0.5880444444444445, + "grad_norm": 1.332210898399353, + "learning_rate": 8.240942431651479e-05, + "loss": 1.7529, + "step": 26462 + }, + { + "epoch": 0.5880666666666666, + "grad_norm": 1.414040207862854, + "learning_rate": 8.24049788841965e-05, + "loss": 1.9332, + "step": 26463 + }, + { + "epoch": 0.5880888888888889, + "grad_norm": 1.5289276838302612, + "learning_rate": 8.240053345187821e-05, + "loss": 2.1359, + "step": 26464 + }, + { + "epoch": 0.5881111111111111, + "grad_norm": 1.1083582639694214, + "learning_rate": 8.23960880195599e-05, + "loss": 1.1907, + "step": 26465 + }, + { + "epoch": 0.5881333333333333, + "grad_norm": 1.4909306764602661, + "learning_rate": 8.239164258724161e-05, + "loss": 1.8534, + "step": 26466 + }, + { + "epoch": 0.5881555555555555, + "grad_norm": 1.5297155380249023, + "learning_rate": 8.238719715492332e-05, + "loss": 1.7557, + "step": 26467 + }, + { + "epoch": 0.5881777777777778, + "grad_norm": 1.879301905632019, + "learning_rate": 8.238275172260502e-05, + "loss": 2.0541, + "step": 26468 + }, + { + "epoch": 0.5882, + "grad_norm": 1.8957713842391968, + "learning_rate": 8.237830629028673e-05, + "loss": 2.1396, + "step": 26469 + }, + { + "epoch": 0.5882222222222222, + "grad_norm": 1.3897382020950317, + "learning_rate": 8.237386085796844e-05, + "loss": 1.9988, + "step": 26470 + }, + { + "epoch": 0.5882444444444445, + "grad_norm": 1.6075379848480225, + "learning_rate": 8.236941542565015e-05, + "loss": 2.2945, + "step": 26471 + }, + { + "epoch": 0.5882666666666667, + "grad_norm": 1.8518240451812744, + "learning_rate": 8.236496999333186e-05, + "loss": 2.1176, + "step": 26472 + }, + { + "epoch": 0.5882888888888889, + "grad_norm": 1.6546710729599, + "learning_rate": 8.236052456101357e-05, + "loss": 1.8829, + "step": 26473 + }, + { + "epoch": 0.5883111111111111, + "grad_norm": 1.332216501235962, + "learning_rate": 8.235607912869528e-05, + "loss": 1.315, + "step": 26474 + }, + { + "epoch": 0.5883333333333334, + "grad_norm": 1.7014000415802002, + "learning_rate": 8.235163369637697e-05, + "loss": 2.2719, + "step": 26475 + }, + { + "epoch": 0.5883555555555555, + "grad_norm": 1.8507986068725586, + "learning_rate": 8.234718826405868e-05, + "loss": 1.9988, + "step": 26476 + }, + { + "epoch": 0.5883777777777778, + "grad_norm": 2.463884115219116, + "learning_rate": 8.234274283174039e-05, + "loss": 2.3464, + "step": 26477 + }, + { + "epoch": 0.5884, + "grad_norm": 3.2078611850738525, + "learning_rate": 8.233829739942209e-05, + "loss": 2.4075, + "step": 26478 + }, + { + "epoch": 0.5884222222222222, + "grad_norm": 1.769322156906128, + "learning_rate": 8.23338519671038e-05, + "loss": 1.9338, + "step": 26479 + }, + { + "epoch": 0.5884444444444444, + "grad_norm": 1.3190253973007202, + "learning_rate": 8.232940653478551e-05, + "loss": 1.2321, + "step": 26480 + }, + { + "epoch": 0.5884666666666667, + "grad_norm": 1.6068058013916016, + "learning_rate": 8.232496110246722e-05, + "loss": 1.9387, + "step": 26481 + }, + { + "epoch": 0.5884888888888888, + "grad_norm": 1.6955701112747192, + "learning_rate": 8.232051567014893e-05, + "loss": 2.0747, + "step": 26482 + }, + { + "epoch": 0.5885111111111111, + "grad_norm": 1.8967593908309937, + "learning_rate": 8.231607023783064e-05, + "loss": 1.7893, + "step": 26483 + }, + { + "epoch": 0.5885333333333334, + "grad_norm": 1.5220470428466797, + "learning_rate": 8.231162480551235e-05, + "loss": 1.8033, + "step": 26484 + }, + { + "epoch": 0.5885555555555556, + "grad_norm": 1.6682027578353882, + "learning_rate": 8.230717937319404e-05, + "loss": 2.1241, + "step": 26485 + }, + { + "epoch": 0.5885777777777778, + "grad_norm": 1.5411769151687622, + "learning_rate": 8.230273394087575e-05, + "loss": 1.5139, + "step": 26486 + }, + { + "epoch": 0.5886, + "grad_norm": 1.8869585990905762, + "learning_rate": 8.229828850855746e-05, + "loss": 2.0045, + "step": 26487 + }, + { + "epoch": 0.5886222222222223, + "grad_norm": 1.6759885549545288, + "learning_rate": 8.229384307623916e-05, + "loss": 1.6491, + "step": 26488 + }, + { + "epoch": 0.5886444444444444, + "grad_norm": 1.7548519372940063, + "learning_rate": 8.228939764392087e-05, + "loss": 2.0672, + "step": 26489 + }, + { + "epoch": 0.5886666666666667, + "grad_norm": 1.571402907371521, + "learning_rate": 8.228495221160259e-05, + "loss": 1.3744, + "step": 26490 + }, + { + "epoch": 0.5886888888888889, + "grad_norm": 1.9174574613571167, + "learning_rate": 8.22805067792843e-05, + "loss": 1.8788, + "step": 26491 + }, + { + "epoch": 0.5887111111111111, + "grad_norm": 2.084259033203125, + "learning_rate": 8.2276061346966e-05, + "loss": 2.3798, + "step": 26492 + }, + { + "epoch": 0.5887333333333333, + "grad_norm": 1.5270769596099854, + "learning_rate": 8.22716159146477e-05, + "loss": 1.8474, + "step": 26493 + }, + { + "epoch": 0.5887555555555556, + "grad_norm": 1.9949065446853638, + "learning_rate": 8.226717048232942e-05, + "loss": 2.2836, + "step": 26494 + }, + { + "epoch": 0.5887777777777777, + "grad_norm": 1.8067631721496582, + "learning_rate": 8.226272505001111e-05, + "loss": 1.8638, + "step": 26495 + }, + { + "epoch": 0.5888, + "grad_norm": 1.8312886953353882, + "learning_rate": 8.225827961769282e-05, + "loss": 2.3125, + "step": 26496 + }, + { + "epoch": 0.5888222222222222, + "grad_norm": 1.6324427127838135, + "learning_rate": 8.225383418537453e-05, + "loss": 1.4562, + "step": 26497 + }, + { + "epoch": 0.5888444444444444, + "grad_norm": 2.074530601501465, + "learning_rate": 8.224938875305623e-05, + "loss": 1.9811, + "step": 26498 + }, + { + "epoch": 0.5888666666666666, + "grad_norm": 1.7900502681732178, + "learning_rate": 8.224494332073795e-05, + "loss": 1.7443, + "step": 26499 + }, + { + "epoch": 0.5888888888888889, + "grad_norm": 1.8668699264526367, + "learning_rate": 8.224049788841966e-05, + "loss": 1.913, + "step": 26500 + }, + { + "epoch": 0.5889111111111112, + "grad_norm": 1.4245249032974243, + "learning_rate": 8.223605245610137e-05, + "loss": 2.6009, + "step": 26501 + }, + { + "epoch": 0.5889333333333333, + "grad_norm": 1.4777202606201172, + "learning_rate": 8.223160702378307e-05, + "loss": 2.6983, + "step": 26502 + }, + { + "epoch": 0.5889555555555556, + "grad_norm": 1.3658816814422607, + "learning_rate": 8.222716159146477e-05, + "loss": 2.0879, + "step": 26503 + }, + { + "epoch": 0.5889777777777778, + "grad_norm": 0.2192663699388504, + "learning_rate": 8.222271615914648e-05, + "loss": 0.0216, + "step": 26504 + }, + { + "epoch": 0.589, + "grad_norm": 1.0416237115859985, + "learning_rate": 8.221827072682818e-05, + "loss": 1.2148, + "step": 26505 + }, + { + "epoch": 0.5890222222222222, + "grad_norm": 1.6372934579849243, + "learning_rate": 8.221382529450989e-05, + "loss": 2.2565, + "step": 26506 + }, + { + "epoch": 0.5890444444444445, + "grad_norm": 1.6254594326019287, + "learning_rate": 8.22093798621916e-05, + "loss": 2.5999, + "step": 26507 + }, + { + "epoch": 0.5890666666666666, + "grad_norm": 1.6181801557540894, + "learning_rate": 8.220493442987331e-05, + "loss": 2.3094, + "step": 26508 + }, + { + "epoch": 0.5890888888888889, + "grad_norm": 1.6073154211044312, + "learning_rate": 8.220048899755502e-05, + "loss": 2.3929, + "step": 26509 + }, + { + "epoch": 0.5891111111111111, + "grad_norm": 1.4710923433303833, + "learning_rate": 8.219604356523673e-05, + "loss": 1.9685, + "step": 26510 + }, + { + "epoch": 0.5891333333333333, + "grad_norm": 1.5624597072601318, + "learning_rate": 8.219159813291844e-05, + "loss": 2.0818, + "step": 26511 + }, + { + "epoch": 0.5891555555555555, + "grad_norm": 1.6579220294952393, + "learning_rate": 8.218715270060013e-05, + "loss": 1.7105, + "step": 26512 + }, + { + "epoch": 0.5891777777777778, + "grad_norm": 1.7512640953063965, + "learning_rate": 8.218270726828184e-05, + "loss": 2.4301, + "step": 26513 + }, + { + "epoch": 0.5892, + "grad_norm": 1.403272271156311, + "learning_rate": 8.217826183596355e-05, + "loss": 1.5019, + "step": 26514 + }, + { + "epoch": 0.5892222222222222, + "grad_norm": 1.9031537771224976, + "learning_rate": 8.217381640364525e-05, + "loss": 2.4452, + "step": 26515 + }, + { + "epoch": 0.5892444444444445, + "grad_norm": 1.6179159879684448, + "learning_rate": 8.216937097132696e-05, + "loss": 2.1208, + "step": 26516 + }, + { + "epoch": 0.5892666666666667, + "grad_norm": 1.5489249229431152, + "learning_rate": 8.216492553900867e-05, + "loss": 1.6462, + "step": 26517 + }, + { + "epoch": 0.5892888888888889, + "grad_norm": 1.8130542039871216, + "learning_rate": 8.216048010669038e-05, + "loss": 1.6512, + "step": 26518 + }, + { + "epoch": 0.5893111111111111, + "grad_norm": 1.5840333700180054, + "learning_rate": 8.215603467437209e-05, + "loss": 1.6867, + "step": 26519 + }, + { + "epoch": 0.5893333333333334, + "grad_norm": 1.7145648002624512, + "learning_rate": 8.21515892420538e-05, + "loss": 2.2308, + "step": 26520 + }, + { + "epoch": 0.5893555555555555, + "grad_norm": 2.2711832523345947, + "learning_rate": 8.214714380973551e-05, + "loss": 2.3032, + "step": 26521 + }, + { + "epoch": 0.5893777777777778, + "grad_norm": 1.7569622993469238, + "learning_rate": 8.21426983774172e-05, + "loss": 1.9934, + "step": 26522 + }, + { + "epoch": 0.5894, + "grad_norm": 2.0704503059387207, + "learning_rate": 8.213825294509891e-05, + "loss": 2.0143, + "step": 26523 + }, + { + "epoch": 0.5894222222222222, + "grad_norm": 1.657915472984314, + "learning_rate": 8.213380751278062e-05, + "loss": 1.697, + "step": 26524 + }, + { + "epoch": 0.5894444444444444, + "grad_norm": 1.6642396450042725, + "learning_rate": 8.212936208046232e-05, + "loss": 1.6574, + "step": 26525 + }, + { + "epoch": 0.5894666666666667, + "grad_norm": 1.705404281616211, + "learning_rate": 8.212491664814403e-05, + "loss": 1.8843, + "step": 26526 + }, + { + "epoch": 0.5894888888888888, + "grad_norm": 1.951427936553955, + "learning_rate": 8.212047121582575e-05, + "loss": 2.2169, + "step": 26527 + }, + { + "epoch": 0.5895111111111111, + "grad_norm": 1.76736319065094, + "learning_rate": 8.211602578350746e-05, + "loss": 2.4436, + "step": 26528 + }, + { + "epoch": 0.5895333333333334, + "grad_norm": 1.6907325983047485, + "learning_rate": 8.211158035118916e-05, + "loss": 1.9543, + "step": 26529 + }, + { + "epoch": 0.5895555555555556, + "grad_norm": 1.6716514825820923, + "learning_rate": 8.210713491887087e-05, + "loss": 1.7508, + "step": 26530 + }, + { + "epoch": 0.5895777777777778, + "grad_norm": 1.727203130722046, + "learning_rate": 8.210268948655258e-05, + "loss": 2.0613, + "step": 26531 + }, + { + "epoch": 0.5896, + "grad_norm": 1.8019435405731201, + "learning_rate": 8.209824405423427e-05, + "loss": 1.9505, + "step": 26532 + }, + { + "epoch": 0.5896222222222223, + "grad_norm": 1.71755051612854, + "learning_rate": 8.209379862191598e-05, + "loss": 2.134, + "step": 26533 + }, + { + "epoch": 0.5896444444444444, + "grad_norm": 1.2752341032028198, + "learning_rate": 8.208935318959769e-05, + "loss": 0.8956, + "step": 26534 + }, + { + "epoch": 0.5896666666666667, + "grad_norm": 1.8847476243972778, + "learning_rate": 8.208490775727939e-05, + "loss": 2.0627, + "step": 26535 + }, + { + "epoch": 0.5896888888888889, + "grad_norm": 1.639291763305664, + "learning_rate": 8.208046232496111e-05, + "loss": 1.9175, + "step": 26536 + }, + { + "epoch": 0.5897111111111111, + "grad_norm": 2.0569376945495605, + "learning_rate": 8.207601689264282e-05, + "loss": 2.1619, + "step": 26537 + }, + { + "epoch": 0.5897333333333333, + "grad_norm": 1.775004267692566, + "learning_rate": 8.207157146032453e-05, + "loss": 1.8134, + "step": 26538 + }, + { + "epoch": 0.5897555555555556, + "grad_norm": 2.516251564025879, + "learning_rate": 8.206712602800623e-05, + "loss": 2.2565, + "step": 26539 + }, + { + "epoch": 0.5897777777777777, + "grad_norm": 1.5780636072158813, + "learning_rate": 8.206268059568794e-05, + "loss": 1.658, + "step": 26540 + }, + { + "epoch": 0.5898, + "grad_norm": 1.934320330619812, + "learning_rate": 8.205823516336965e-05, + "loss": 2.1937, + "step": 26541 + }, + { + "epoch": 0.5898222222222222, + "grad_norm": 1.937772274017334, + "learning_rate": 8.205378973105134e-05, + "loss": 2.0649, + "step": 26542 + }, + { + "epoch": 0.5898444444444444, + "grad_norm": 1.5901800394058228, + "learning_rate": 8.204934429873305e-05, + "loss": 1.6263, + "step": 26543 + }, + { + "epoch": 0.5898666666666667, + "grad_norm": 1.6580713987350464, + "learning_rate": 8.204489886641476e-05, + "loss": 1.7261, + "step": 26544 + }, + { + "epoch": 0.5898888888888889, + "grad_norm": 1.5570964813232422, + "learning_rate": 8.204045343409647e-05, + "loss": 1.4863, + "step": 26545 + }, + { + "epoch": 0.5899111111111112, + "grad_norm": 1.8327727317810059, + "learning_rate": 8.203600800177818e-05, + "loss": 1.8743, + "step": 26546 + }, + { + "epoch": 0.5899333333333333, + "grad_norm": 1.509567141532898, + "learning_rate": 8.203156256945989e-05, + "loss": 1.591, + "step": 26547 + }, + { + "epoch": 0.5899555555555556, + "grad_norm": 2.091273307800293, + "learning_rate": 8.20271171371416e-05, + "loss": 1.8144, + "step": 26548 + }, + { + "epoch": 0.5899777777777778, + "grad_norm": 1.8163344860076904, + "learning_rate": 8.20226717048233e-05, + "loss": 1.7362, + "step": 26549 + }, + { + "epoch": 0.59, + "grad_norm": 2.0135276317596436, + "learning_rate": 8.2018226272505e-05, + "loss": 1.6102, + "step": 26550 + }, + { + "epoch": 0.5900222222222222, + "grad_norm": 1.781471610069275, + "learning_rate": 8.201378084018671e-05, + "loss": 2.6554, + "step": 26551 + }, + { + "epoch": 0.5900444444444445, + "grad_norm": 1.0032708644866943, + "learning_rate": 8.200933540786841e-05, + "loss": 0.8867, + "step": 26552 + }, + { + "epoch": 0.5900666666666666, + "grad_norm": 1.761935830116272, + "learning_rate": 8.200488997555012e-05, + "loss": 2.6627, + "step": 26553 + }, + { + "epoch": 0.5900888888888889, + "grad_norm": 1.5583018064498901, + "learning_rate": 8.200044454323183e-05, + "loss": 2.2719, + "step": 26554 + }, + { + "epoch": 0.5901111111111111, + "grad_norm": 1.6104395389556885, + "learning_rate": 8.199599911091354e-05, + "loss": 2.3873, + "step": 26555 + }, + { + "epoch": 0.5901333333333333, + "grad_norm": 1.6542582511901855, + "learning_rate": 8.199155367859525e-05, + "loss": 2.2377, + "step": 26556 + }, + { + "epoch": 0.5901555555555555, + "grad_norm": 1.507717251777649, + "learning_rate": 8.198710824627696e-05, + "loss": 1.8727, + "step": 26557 + }, + { + "epoch": 0.5901777777777778, + "grad_norm": 1.5798872709274292, + "learning_rate": 8.198266281395867e-05, + "loss": 2.4807, + "step": 26558 + }, + { + "epoch": 0.5902, + "grad_norm": 2.0253896713256836, + "learning_rate": 8.197821738164036e-05, + "loss": 2.1833, + "step": 26559 + }, + { + "epoch": 0.5902222222222222, + "grad_norm": 1.9096221923828125, + "learning_rate": 8.197377194932207e-05, + "loss": 2.0666, + "step": 26560 + }, + { + "epoch": 0.5902444444444445, + "grad_norm": 1.6258680820465088, + "learning_rate": 8.196932651700378e-05, + "loss": 1.9742, + "step": 26561 + }, + { + "epoch": 0.5902666666666667, + "grad_norm": 1.657233715057373, + "learning_rate": 8.196488108468548e-05, + "loss": 2.2751, + "step": 26562 + }, + { + "epoch": 0.5902888888888889, + "grad_norm": 2.256463050842285, + "learning_rate": 8.196043565236719e-05, + "loss": 2.3546, + "step": 26563 + }, + { + "epoch": 0.5903111111111111, + "grad_norm": 1.5345555543899536, + "learning_rate": 8.195599022004891e-05, + "loss": 2.0553, + "step": 26564 + }, + { + "epoch": 0.5903333333333334, + "grad_norm": 1.6566272974014282, + "learning_rate": 8.195154478773061e-05, + "loss": 1.8394, + "step": 26565 + }, + { + "epoch": 0.5903555555555555, + "grad_norm": 1.3703186511993408, + "learning_rate": 8.194709935541232e-05, + "loss": 1.2446, + "step": 26566 + }, + { + "epoch": 0.5903777777777778, + "grad_norm": 1.0013676881790161, + "learning_rate": 8.194265392309403e-05, + "loss": 0.0459, + "step": 26567 + }, + { + "epoch": 0.5904, + "grad_norm": 1.772273063659668, + "learning_rate": 8.193820849077574e-05, + "loss": 2.2504, + "step": 26568 + }, + { + "epoch": 0.5904222222222222, + "grad_norm": 1.7223542928695679, + "learning_rate": 8.193376305845743e-05, + "loss": 1.9782, + "step": 26569 + }, + { + "epoch": 0.5904444444444444, + "grad_norm": 1.4310003519058228, + "learning_rate": 8.192931762613914e-05, + "loss": 1.9249, + "step": 26570 + }, + { + "epoch": 0.5904666666666667, + "grad_norm": 1.986963152885437, + "learning_rate": 8.192487219382085e-05, + "loss": 2.1059, + "step": 26571 + }, + { + "epoch": 0.5904888888888888, + "grad_norm": 1.6062281131744385, + "learning_rate": 8.192042676150255e-05, + "loss": 1.9521, + "step": 26572 + }, + { + "epoch": 0.5905111111111111, + "grad_norm": 1.5529038906097412, + "learning_rate": 8.191598132918427e-05, + "loss": 1.7418, + "step": 26573 + }, + { + "epoch": 0.5905333333333334, + "grad_norm": 2.3296256065368652, + "learning_rate": 8.191153589686598e-05, + "loss": 2.0563, + "step": 26574 + }, + { + "epoch": 0.5905555555555555, + "grad_norm": 1.8755871057510376, + "learning_rate": 8.190709046454769e-05, + "loss": 2.1776, + "step": 26575 + }, + { + "epoch": 0.5905777777777778, + "grad_norm": 1.5734074115753174, + "learning_rate": 8.190264503222939e-05, + "loss": 1.8932, + "step": 26576 + }, + { + "epoch": 0.5906, + "grad_norm": 1.6150445938110352, + "learning_rate": 8.18981995999111e-05, + "loss": 1.8276, + "step": 26577 + }, + { + "epoch": 0.5906222222222223, + "grad_norm": 1.3324120044708252, + "learning_rate": 8.18937541675928e-05, + "loss": 1.8167, + "step": 26578 + }, + { + "epoch": 0.5906444444444444, + "grad_norm": 1.2015331983566284, + "learning_rate": 8.18893087352745e-05, + "loss": 1.0707, + "step": 26579 + }, + { + "epoch": 0.5906666666666667, + "grad_norm": 1.3903979063034058, + "learning_rate": 8.188486330295621e-05, + "loss": 1.3774, + "step": 26580 + }, + { + "epoch": 0.5906888888888889, + "grad_norm": 1.6445354223251343, + "learning_rate": 8.188041787063792e-05, + "loss": 1.4733, + "step": 26581 + }, + { + "epoch": 0.5907111111111111, + "grad_norm": 1.9705824851989746, + "learning_rate": 8.187597243831963e-05, + "loss": 1.829, + "step": 26582 + }, + { + "epoch": 0.5907333333333333, + "grad_norm": 1.145692229270935, + "learning_rate": 8.187152700600134e-05, + "loss": 0.9194, + "step": 26583 + }, + { + "epoch": 0.5907555555555556, + "grad_norm": 1.5932261943817139, + "learning_rate": 8.186708157368305e-05, + "loss": 1.5765, + "step": 26584 + }, + { + "epoch": 0.5907777777777777, + "grad_norm": 1.630245566368103, + "learning_rate": 8.186263614136476e-05, + "loss": 1.8371, + "step": 26585 + }, + { + "epoch": 0.5908, + "grad_norm": 1.6606966257095337, + "learning_rate": 8.185819070904646e-05, + "loss": 1.5915, + "step": 26586 + }, + { + "epoch": 0.5908222222222222, + "grad_norm": 1.1430854797363281, + "learning_rate": 8.185374527672817e-05, + "loss": 0.745, + "step": 26587 + }, + { + "epoch": 0.5908444444444444, + "grad_norm": 1.4599413871765137, + "learning_rate": 8.184929984440988e-05, + "loss": 1.1224, + "step": 26588 + }, + { + "epoch": 0.5908666666666667, + "grad_norm": 2.151262044906616, + "learning_rate": 8.184485441209157e-05, + "loss": 2.0447, + "step": 26589 + }, + { + "epoch": 0.5908888888888889, + "grad_norm": 1.7323678731918335, + "learning_rate": 8.184040897977328e-05, + "loss": 1.9831, + "step": 26590 + }, + { + "epoch": 0.5909111111111112, + "grad_norm": 1.604820728302002, + "learning_rate": 8.183596354745499e-05, + "loss": 1.4933, + "step": 26591 + }, + { + "epoch": 0.5909333333333333, + "grad_norm": 1.5943677425384521, + "learning_rate": 8.18315181151367e-05, + "loss": 1.6485, + "step": 26592 + }, + { + "epoch": 0.5909555555555556, + "grad_norm": 1.757006287574768, + "learning_rate": 8.182707268281841e-05, + "loss": 2.0849, + "step": 26593 + }, + { + "epoch": 0.5909777777777778, + "grad_norm": 1.4215257167816162, + "learning_rate": 8.182262725050012e-05, + "loss": 1.4631, + "step": 26594 + }, + { + "epoch": 0.591, + "grad_norm": 1.69182550907135, + "learning_rate": 8.181818181818183e-05, + "loss": 1.7487, + "step": 26595 + }, + { + "epoch": 0.5910222222222222, + "grad_norm": 2.1581408977508545, + "learning_rate": 8.181373638586353e-05, + "loss": 2.0966, + "step": 26596 + }, + { + "epoch": 0.5910444444444445, + "grad_norm": 2.17958927154541, + "learning_rate": 8.180929095354524e-05, + "loss": 1.7987, + "step": 26597 + }, + { + "epoch": 0.5910666666666666, + "grad_norm": 1.9482837915420532, + "learning_rate": 8.180484552122694e-05, + "loss": 1.793, + "step": 26598 + }, + { + "epoch": 0.5910888888888889, + "grad_norm": 1.6424596309661865, + "learning_rate": 8.180040008890864e-05, + "loss": 1.4073, + "step": 26599 + }, + { + "epoch": 0.5911111111111111, + "grad_norm": 2.3620777130126953, + "learning_rate": 8.179595465659035e-05, + "loss": 1.3749, + "step": 26600 + }, + { + "epoch": 0.5911333333333333, + "grad_norm": 1.8508235216140747, + "learning_rate": 8.179150922427207e-05, + "loss": 2.9108, + "step": 26601 + }, + { + "epoch": 0.5911555555555555, + "grad_norm": 1.1262704133987427, + "learning_rate": 8.178706379195377e-05, + "loss": 0.9763, + "step": 26602 + }, + { + "epoch": 0.5911777777777778, + "grad_norm": 1.5308245420455933, + "learning_rate": 8.178261835963548e-05, + "loss": 2.0178, + "step": 26603 + }, + { + "epoch": 0.5912, + "grad_norm": 1.6389973163604736, + "learning_rate": 8.177817292731719e-05, + "loss": 2.4368, + "step": 26604 + }, + { + "epoch": 0.5912222222222222, + "grad_norm": 1.7720253467559814, + "learning_rate": 8.17737274949989e-05, + "loss": 2.2833, + "step": 26605 + }, + { + "epoch": 0.5912444444444445, + "grad_norm": 1.401301383972168, + "learning_rate": 8.17692820626806e-05, + "loss": 2.1505, + "step": 26606 + }, + { + "epoch": 0.5912666666666667, + "grad_norm": 1.1152511835098267, + "learning_rate": 8.17648366303623e-05, + "loss": 1.1234, + "step": 26607 + }, + { + "epoch": 0.5912888888888889, + "grad_norm": 1.4615529775619507, + "learning_rate": 8.176039119804401e-05, + "loss": 1.8951, + "step": 26608 + }, + { + "epoch": 0.5913111111111111, + "grad_norm": 1.5637952089309692, + "learning_rate": 8.175594576572571e-05, + "loss": 2.4163, + "step": 26609 + }, + { + "epoch": 0.5913333333333334, + "grad_norm": 1.5488580465316772, + "learning_rate": 8.175150033340743e-05, + "loss": 2.0755, + "step": 26610 + }, + { + "epoch": 0.5913555555555555, + "grad_norm": 1.5902295112609863, + "learning_rate": 8.174705490108914e-05, + "loss": 1.8796, + "step": 26611 + }, + { + "epoch": 0.5913777777777778, + "grad_norm": 1.5014410018920898, + "learning_rate": 8.174260946877084e-05, + "loss": 2.2305, + "step": 26612 + }, + { + "epoch": 0.5914, + "grad_norm": 1.3349800109863281, + "learning_rate": 8.173816403645255e-05, + "loss": 1.4973, + "step": 26613 + }, + { + "epoch": 0.5914222222222222, + "grad_norm": 1.8313910961151123, + "learning_rate": 8.173371860413426e-05, + "loss": 2.1185, + "step": 26614 + }, + { + "epoch": 0.5914444444444444, + "grad_norm": 1.560365915298462, + "learning_rate": 8.172927317181597e-05, + "loss": 1.798, + "step": 26615 + }, + { + "epoch": 0.5914666666666667, + "grad_norm": 1.5687285661697388, + "learning_rate": 8.172482773949766e-05, + "loss": 1.802, + "step": 26616 + }, + { + "epoch": 0.5914888888888888, + "grad_norm": 1.6726188659667969, + "learning_rate": 8.172038230717937e-05, + "loss": 1.9302, + "step": 26617 + }, + { + "epoch": 0.5915111111111111, + "grad_norm": 2.1939918994903564, + "learning_rate": 8.171593687486108e-05, + "loss": 2.1833, + "step": 26618 + }, + { + "epoch": 0.5915333333333334, + "grad_norm": 1.5094565153121948, + "learning_rate": 8.171149144254279e-05, + "loss": 2.179, + "step": 26619 + }, + { + "epoch": 0.5915555555555555, + "grad_norm": 1.5371365547180176, + "learning_rate": 8.17070460102245e-05, + "loss": 1.798, + "step": 26620 + }, + { + "epoch": 0.5915777777777778, + "grad_norm": 1.4002341032028198, + "learning_rate": 8.170260057790621e-05, + "loss": 1.8906, + "step": 26621 + }, + { + "epoch": 0.5916, + "grad_norm": 1.7280828952789307, + "learning_rate": 8.169815514558791e-05, + "loss": 2.034, + "step": 26622 + }, + { + "epoch": 0.5916222222222223, + "grad_norm": 1.7399758100509644, + "learning_rate": 8.169370971326962e-05, + "loss": 2.2769, + "step": 26623 + }, + { + "epoch": 0.5916444444444444, + "grad_norm": 1.539891004562378, + "learning_rate": 8.168926428095133e-05, + "loss": 1.9422, + "step": 26624 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 1.6626595258712769, + "learning_rate": 8.168481884863304e-05, + "loss": 1.9142, + "step": 26625 + }, + { + "epoch": 0.5916888888888889, + "grad_norm": 1.5796253681182861, + "learning_rate": 8.168037341631473e-05, + "loss": 2.0004, + "step": 26626 + }, + { + "epoch": 0.5917111111111111, + "grad_norm": 1.6983789205551147, + "learning_rate": 8.167592798399644e-05, + "loss": 1.935, + "step": 26627 + }, + { + "epoch": 0.5917333333333333, + "grad_norm": 1.405887246131897, + "learning_rate": 8.167148255167815e-05, + "loss": 1.5105, + "step": 26628 + }, + { + "epoch": 0.5917555555555556, + "grad_norm": 1.676605463027954, + "learning_rate": 8.166703711935986e-05, + "loss": 1.7027, + "step": 26629 + }, + { + "epoch": 0.5917777777777777, + "grad_norm": 1.861316204071045, + "learning_rate": 8.166259168704157e-05, + "loss": 1.9841, + "step": 26630 + }, + { + "epoch": 0.5918, + "grad_norm": 1.7648253440856934, + "learning_rate": 8.165814625472328e-05, + "loss": 2.1883, + "step": 26631 + }, + { + "epoch": 0.5918222222222222, + "grad_norm": 1.637048363685608, + "learning_rate": 8.165370082240499e-05, + "loss": 1.5399, + "step": 26632 + }, + { + "epoch": 0.5918444444444444, + "grad_norm": 1.3285629749298096, + "learning_rate": 8.164925539008669e-05, + "loss": 1.1909, + "step": 26633 + }, + { + "epoch": 0.5918666666666667, + "grad_norm": 1.8250073194503784, + "learning_rate": 8.16448099577684e-05, + "loss": 2.0721, + "step": 26634 + }, + { + "epoch": 0.5918888888888889, + "grad_norm": 1.6213438510894775, + "learning_rate": 8.16403645254501e-05, + "loss": 1.9153, + "step": 26635 + }, + { + "epoch": 0.5919111111111112, + "grad_norm": 1.490524172782898, + "learning_rate": 8.16359190931318e-05, + "loss": 1.2641, + "step": 26636 + }, + { + "epoch": 0.5919333333333333, + "grad_norm": 1.869890570640564, + "learning_rate": 8.163147366081351e-05, + "loss": 1.3881, + "step": 26637 + }, + { + "epoch": 0.5919555555555556, + "grad_norm": 1.9931570291519165, + "learning_rate": 8.162702822849523e-05, + "loss": 1.5897, + "step": 26638 + }, + { + "epoch": 0.5919777777777778, + "grad_norm": 2.059723377227783, + "learning_rate": 8.162258279617693e-05, + "loss": 1.8027, + "step": 26639 + }, + { + "epoch": 0.592, + "grad_norm": 1.8755348920822144, + "learning_rate": 8.161813736385864e-05, + "loss": 1.7455, + "step": 26640 + }, + { + "epoch": 0.5920222222222222, + "grad_norm": 1.8518706560134888, + "learning_rate": 8.161369193154035e-05, + "loss": 1.8116, + "step": 26641 + }, + { + "epoch": 0.5920444444444445, + "grad_norm": 2.008342981338501, + "learning_rate": 8.160924649922206e-05, + "loss": 2.3095, + "step": 26642 + }, + { + "epoch": 0.5920666666666666, + "grad_norm": 1.9992581605911255, + "learning_rate": 8.160480106690376e-05, + "loss": 1.831, + "step": 26643 + }, + { + "epoch": 0.5920888888888889, + "grad_norm": 1.7105638980865479, + "learning_rate": 8.160035563458547e-05, + "loss": 1.7077, + "step": 26644 + }, + { + "epoch": 0.5921111111111111, + "grad_norm": 1.5414973497390747, + "learning_rate": 8.159591020226717e-05, + "loss": 1.4228, + "step": 26645 + }, + { + "epoch": 0.5921333333333333, + "grad_norm": 1.8841001987457275, + "learning_rate": 8.159146476994887e-05, + "loss": 1.8394, + "step": 26646 + }, + { + "epoch": 0.5921555555555555, + "grad_norm": 1.8836796283721924, + "learning_rate": 8.15870193376306e-05, + "loss": 1.7217, + "step": 26647 + }, + { + "epoch": 0.5921777777777778, + "grad_norm": 2.0066640377044678, + "learning_rate": 8.15825739053123e-05, + "loss": 1.984, + "step": 26648 + }, + { + "epoch": 0.5922, + "grad_norm": 2.158877372741699, + "learning_rate": 8.1578128472994e-05, + "loss": 2.005, + "step": 26649 + }, + { + "epoch": 0.5922222222222222, + "grad_norm": 1.6916780471801758, + "learning_rate": 8.157368304067571e-05, + "loss": 1.3073, + "step": 26650 + }, + { + "epoch": 0.5922444444444445, + "grad_norm": 1.0765386819839478, + "learning_rate": 8.156923760835742e-05, + "loss": 1.078, + "step": 26651 + }, + { + "epoch": 0.5922666666666667, + "grad_norm": 1.3867547512054443, + "learning_rate": 8.156479217603913e-05, + "loss": 2.3633, + "step": 26652 + }, + { + "epoch": 0.5922888888888889, + "grad_norm": 1.541346788406372, + "learning_rate": 8.156034674372082e-05, + "loss": 2.5147, + "step": 26653 + }, + { + "epoch": 0.5923111111111111, + "grad_norm": 1.302173376083374, + "learning_rate": 8.155590131140253e-05, + "loss": 1.8573, + "step": 26654 + }, + { + "epoch": 0.5923333333333334, + "grad_norm": 1.4141172170639038, + "learning_rate": 8.155145587908424e-05, + "loss": 1.8953, + "step": 26655 + }, + { + "epoch": 0.5923555555555555, + "grad_norm": 1.8537484407424927, + "learning_rate": 8.154701044676595e-05, + "loss": 2.6232, + "step": 26656 + }, + { + "epoch": 0.5923777777777778, + "grad_norm": 1.457228660583496, + "learning_rate": 8.154256501444766e-05, + "loss": 2.0644, + "step": 26657 + }, + { + "epoch": 0.5924, + "grad_norm": 1.6981463432312012, + "learning_rate": 8.153811958212937e-05, + "loss": 1.7786, + "step": 26658 + }, + { + "epoch": 0.5924222222222222, + "grad_norm": 1.579250693321228, + "learning_rate": 8.153367414981107e-05, + "loss": 2.5667, + "step": 26659 + }, + { + "epoch": 0.5924444444444444, + "grad_norm": 1.561820387840271, + "learning_rate": 8.152922871749278e-05, + "loss": 2.0428, + "step": 26660 + }, + { + "epoch": 0.5924666666666667, + "grad_norm": 1.5794981718063354, + "learning_rate": 8.152478328517449e-05, + "loss": 2.0572, + "step": 26661 + }, + { + "epoch": 0.5924888888888888, + "grad_norm": 2.4824812412261963, + "learning_rate": 8.15203378528562e-05, + "loss": 1.0123, + "step": 26662 + }, + { + "epoch": 0.5925111111111111, + "grad_norm": 1.656442403793335, + "learning_rate": 8.15158924205379e-05, + "loss": 2.3156, + "step": 26663 + }, + { + "epoch": 0.5925333333333334, + "grad_norm": 1.7113794088363647, + "learning_rate": 8.15114469882196e-05, + "loss": 2.2904, + "step": 26664 + }, + { + "epoch": 0.5925555555555555, + "grad_norm": 1.4261407852172852, + "learning_rate": 8.150700155590131e-05, + "loss": 1.5333, + "step": 26665 + }, + { + "epoch": 0.5925777777777778, + "grad_norm": 1.7409135103225708, + "learning_rate": 8.150255612358302e-05, + "loss": 2.3401, + "step": 26666 + }, + { + "epoch": 0.5926, + "grad_norm": 1.390152096748352, + "learning_rate": 8.149811069126473e-05, + "loss": 1.2631, + "step": 26667 + }, + { + "epoch": 0.5926222222222223, + "grad_norm": 1.422816514968872, + "learning_rate": 8.149366525894644e-05, + "loss": 1.8948, + "step": 26668 + }, + { + "epoch": 0.5926444444444444, + "grad_norm": 1.5520660877227783, + "learning_rate": 8.148921982662814e-05, + "loss": 2.1724, + "step": 26669 + }, + { + "epoch": 0.5926666666666667, + "grad_norm": 1.649167776107788, + "learning_rate": 8.148477439430985e-05, + "loss": 2.0536, + "step": 26670 + }, + { + "epoch": 0.5926888888888889, + "grad_norm": 1.961836576461792, + "learning_rate": 8.148032896199156e-05, + "loss": 2.153, + "step": 26671 + }, + { + "epoch": 0.5927111111111111, + "grad_norm": 1.7347272634506226, + "learning_rate": 8.147588352967327e-05, + "loss": 2.1467, + "step": 26672 + }, + { + "epoch": 0.5927333333333333, + "grad_norm": 1.6931570768356323, + "learning_rate": 8.147143809735496e-05, + "loss": 1.6617, + "step": 26673 + }, + { + "epoch": 0.5927555555555556, + "grad_norm": 1.6291333436965942, + "learning_rate": 8.146699266503667e-05, + "loss": 1.7673, + "step": 26674 + }, + { + "epoch": 0.5927777777777777, + "grad_norm": 1.5067996978759766, + "learning_rate": 8.14625472327184e-05, + "loss": 1.8728, + "step": 26675 + }, + { + "epoch": 0.5928, + "grad_norm": 1.7630198001861572, + "learning_rate": 8.145810180040009e-05, + "loss": 1.6776, + "step": 26676 + }, + { + "epoch": 0.5928222222222223, + "grad_norm": 1.9861774444580078, + "learning_rate": 8.14536563680818e-05, + "loss": 1.7904, + "step": 26677 + }, + { + "epoch": 0.5928444444444444, + "grad_norm": 1.9535640478134155, + "learning_rate": 8.144921093576351e-05, + "loss": 2.577, + "step": 26678 + }, + { + "epoch": 0.5928666666666667, + "grad_norm": 1.636690616607666, + "learning_rate": 8.144476550344522e-05, + "loss": 2.0611, + "step": 26679 + }, + { + "epoch": 0.5928888888888889, + "grad_norm": 1.8471348285675049, + "learning_rate": 8.144032007112692e-05, + "loss": 1.5789, + "step": 26680 + }, + { + "epoch": 0.5929111111111112, + "grad_norm": 1.6389126777648926, + "learning_rate": 8.143587463880863e-05, + "loss": 2.0829, + "step": 26681 + }, + { + "epoch": 0.5929333333333333, + "grad_norm": 2.013946771621704, + "learning_rate": 8.143142920649034e-05, + "loss": 2.1446, + "step": 26682 + }, + { + "epoch": 0.5929555555555556, + "grad_norm": 1.6818547248840332, + "learning_rate": 8.142698377417203e-05, + "loss": 1.6905, + "step": 26683 + }, + { + "epoch": 0.5929777777777778, + "grad_norm": 1.9250750541687012, + "learning_rate": 8.142253834185376e-05, + "loss": 2.1326, + "step": 26684 + }, + { + "epoch": 0.593, + "grad_norm": 1.8171052932739258, + "learning_rate": 8.141809290953546e-05, + "loss": 1.8738, + "step": 26685 + }, + { + "epoch": 0.5930222222222222, + "grad_norm": 1.7382375001907349, + "learning_rate": 8.141364747721716e-05, + "loss": 1.8583, + "step": 26686 + }, + { + "epoch": 0.5930444444444445, + "grad_norm": 1.8500169515609741, + "learning_rate": 8.140920204489887e-05, + "loss": 1.7788, + "step": 26687 + }, + { + "epoch": 0.5930666666666666, + "grad_norm": 1.9859305620193481, + "learning_rate": 8.140475661258058e-05, + "loss": 1.9631, + "step": 26688 + }, + { + "epoch": 0.5930888888888889, + "grad_norm": 2.013277769088745, + "learning_rate": 8.140031118026229e-05, + "loss": 2.091, + "step": 26689 + }, + { + "epoch": 0.5931111111111111, + "grad_norm": 1.6553006172180176, + "learning_rate": 8.139586574794399e-05, + "loss": 2.0575, + "step": 26690 + }, + { + "epoch": 0.5931333333333333, + "grad_norm": 1.632489800453186, + "learning_rate": 8.13914203156257e-05, + "loss": 1.5623, + "step": 26691 + }, + { + "epoch": 0.5931555555555555, + "grad_norm": 1.8890914916992188, + "learning_rate": 8.13869748833074e-05, + "loss": 1.8982, + "step": 26692 + }, + { + "epoch": 0.5931777777777778, + "grad_norm": 1.7432690858840942, + "learning_rate": 8.138252945098911e-05, + "loss": 2.0591, + "step": 26693 + }, + { + "epoch": 0.5932, + "grad_norm": 1.7689228057861328, + "learning_rate": 8.137808401867082e-05, + "loss": 1.8778, + "step": 26694 + }, + { + "epoch": 0.5932222222222222, + "grad_norm": 1.571394920349121, + "learning_rate": 8.137363858635253e-05, + "loss": 1.9016, + "step": 26695 + }, + { + "epoch": 0.5932444444444445, + "grad_norm": 1.9821078777313232, + "learning_rate": 8.136919315403423e-05, + "loss": 2.078, + "step": 26696 + }, + { + "epoch": 0.5932666666666667, + "grad_norm": 1.533720850944519, + "learning_rate": 8.136474772171594e-05, + "loss": 1.6173, + "step": 26697 + }, + { + "epoch": 0.5932888888888889, + "grad_norm": 2.0955474376678467, + "learning_rate": 8.136030228939765e-05, + "loss": 1.8444, + "step": 26698 + }, + { + "epoch": 0.5933111111111111, + "grad_norm": 1.8464515209197998, + "learning_rate": 8.135585685707936e-05, + "loss": 1.9813, + "step": 26699 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 1.3876664638519287, + "learning_rate": 8.135141142476105e-05, + "loss": 1.0008, + "step": 26700 + }, + { + "epoch": 0.5933555555555555, + "grad_norm": 1.6724517345428467, + "learning_rate": 8.134696599244276e-05, + "loss": 2.8459, + "step": 26701 + }, + { + "epoch": 0.5933777777777778, + "grad_norm": 1.4604490995407104, + "learning_rate": 8.134252056012447e-05, + "loss": 2.2689, + "step": 26702 + }, + { + "epoch": 0.5934, + "grad_norm": 1.372445821762085, + "learning_rate": 8.133807512780618e-05, + "loss": 2.0997, + "step": 26703 + }, + { + "epoch": 0.5934222222222222, + "grad_norm": 1.5143563747406006, + "learning_rate": 8.13336296954879e-05, + "loss": 2.3801, + "step": 26704 + }, + { + "epoch": 0.5934444444444444, + "grad_norm": 1.5440425872802734, + "learning_rate": 8.13291842631696e-05, + "loss": 2.1438, + "step": 26705 + }, + { + "epoch": 0.5934666666666667, + "grad_norm": 1.5724350214004517, + "learning_rate": 8.13247388308513e-05, + "loss": 2.0209, + "step": 26706 + }, + { + "epoch": 0.5934888888888888, + "grad_norm": 1.6102594137191772, + "learning_rate": 8.132029339853301e-05, + "loss": 1.9863, + "step": 26707 + }, + { + "epoch": 0.5935111111111111, + "grad_norm": 1.4920109510421753, + "learning_rate": 8.131584796621472e-05, + "loss": 2.2177, + "step": 26708 + }, + { + "epoch": 0.5935333333333334, + "grad_norm": 1.2547584772109985, + "learning_rate": 8.131140253389643e-05, + "loss": 1.5696, + "step": 26709 + }, + { + "epoch": 0.5935555555555555, + "grad_norm": 1.7736376523971558, + "learning_rate": 8.130695710157812e-05, + "loss": 1.8298, + "step": 26710 + }, + { + "epoch": 0.5935777777777778, + "grad_norm": 1.727866530418396, + "learning_rate": 8.130251166925983e-05, + "loss": 2.2899, + "step": 26711 + }, + { + "epoch": 0.5936, + "grad_norm": 1.626160979270935, + "learning_rate": 8.129806623694156e-05, + "loss": 2.3531, + "step": 26712 + }, + { + "epoch": 0.5936222222222223, + "grad_norm": 1.6291509866714478, + "learning_rate": 8.129362080462325e-05, + "loss": 1.8856, + "step": 26713 + }, + { + "epoch": 0.5936444444444444, + "grad_norm": 1.7645621299743652, + "learning_rate": 8.128917537230496e-05, + "loss": 1.5955, + "step": 26714 + }, + { + "epoch": 0.5936666666666667, + "grad_norm": 1.6563748121261597, + "learning_rate": 8.128472993998667e-05, + "loss": 2.3929, + "step": 26715 + }, + { + "epoch": 0.5936888888888889, + "grad_norm": 1.5260820388793945, + "learning_rate": 8.128028450766837e-05, + "loss": 2.425, + "step": 26716 + }, + { + "epoch": 0.5937111111111111, + "grad_norm": 0.6744555830955505, + "learning_rate": 8.127583907535008e-05, + "loss": 0.0275, + "step": 26717 + }, + { + "epoch": 0.5937333333333333, + "grad_norm": 1.311850905418396, + "learning_rate": 8.127139364303179e-05, + "loss": 1.7537, + "step": 26718 + }, + { + "epoch": 0.5937555555555556, + "grad_norm": 1.5667086839675903, + "learning_rate": 8.12669482107135e-05, + "loss": 2.1051, + "step": 26719 + }, + { + "epoch": 0.5937777777777777, + "grad_norm": 1.6409789323806763, + "learning_rate": 8.126250277839519e-05, + "loss": 2.4806, + "step": 26720 + }, + { + "epoch": 0.5938, + "grad_norm": 1.585787057876587, + "learning_rate": 8.125805734607692e-05, + "loss": 2.4312, + "step": 26721 + }, + { + "epoch": 0.5938222222222223, + "grad_norm": 1.5881736278533936, + "learning_rate": 8.125361191375863e-05, + "loss": 2.2744, + "step": 26722 + }, + { + "epoch": 0.5938444444444444, + "grad_norm": 1.6588823795318604, + "learning_rate": 8.124916648144032e-05, + "loss": 1.711, + "step": 26723 + }, + { + "epoch": 0.5938666666666667, + "grad_norm": 1.717848300933838, + "learning_rate": 8.124472104912203e-05, + "loss": 2.1547, + "step": 26724 + }, + { + "epoch": 0.5938888888888889, + "grad_norm": 1.8647304773330688, + "learning_rate": 8.124027561680374e-05, + "loss": 2.0538, + "step": 26725 + }, + { + "epoch": 0.5939111111111111, + "grad_norm": 1.7777903079986572, + "learning_rate": 8.123583018448544e-05, + "loss": 2.1023, + "step": 26726 + }, + { + "epoch": 0.5939333333333333, + "grad_norm": 1.7060556411743164, + "learning_rate": 8.123138475216715e-05, + "loss": 1.4759, + "step": 26727 + }, + { + "epoch": 0.5939555555555556, + "grad_norm": 1.4728893041610718, + "learning_rate": 8.122693931984886e-05, + "loss": 1.4794, + "step": 26728 + }, + { + "epoch": 0.5939777777777778, + "grad_norm": 1.678420901298523, + "learning_rate": 8.122249388753057e-05, + "loss": 1.7817, + "step": 26729 + }, + { + "epoch": 0.594, + "grad_norm": 1.4201163053512573, + "learning_rate": 8.121804845521228e-05, + "loss": 1.6628, + "step": 26730 + }, + { + "epoch": 0.5940222222222222, + "grad_norm": 1.512225866317749, + "learning_rate": 8.121360302289399e-05, + "loss": 1.4771, + "step": 26731 + }, + { + "epoch": 0.5940444444444445, + "grad_norm": 1.8955464363098145, + "learning_rate": 8.12091575905757e-05, + "loss": 1.166, + "step": 26732 + }, + { + "epoch": 0.5940666666666666, + "grad_norm": 1.1601372957229614, + "learning_rate": 8.120471215825739e-05, + "loss": 1.0652, + "step": 26733 + }, + { + "epoch": 0.5940888888888889, + "grad_norm": 1.6053649187088013, + "learning_rate": 8.12002667259391e-05, + "loss": 1.7017, + "step": 26734 + }, + { + "epoch": 0.5941111111111111, + "grad_norm": 1.9037258625030518, + "learning_rate": 8.119582129362081e-05, + "loss": 2.0369, + "step": 26735 + }, + { + "epoch": 0.5941333333333333, + "grad_norm": 1.9601517915725708, + "learning_rate": 8.119137586130252e-05, + "loss": 1.8534, + "step": 26736 + }, + { + "epoch": 0.5941555555555555, + "grad_norm": 1.8852641582489014, + "learning_rate": 8.118693042898422e-05, + "loss": 1.8805, + "step": 26737 + }, + { + "epoch": 0.5941777777777778, + "grad_norm": 1.643731713294983, + "learning_rate": 8.118248499666593e-05, + "loss": 2.1002, + "step": 26738 + }, + { + "epoch": 0.5942, + "grad_norm": 1.9783738851547241, + "learning_rate": 8.117803956434764e-05, + "loss": 1.8242, + "step": 26739 + }, + { + "epoch": 0.5942222222222222, + "grad_norm": 1.477020502090454, + "learning_rate": 8.117359413202934e-05, + "loss": 1.3409, + "step": 26740 + }, + { + "epoch": 0.5942444444444445, + "grad_norm": 2.2099764347076416, + "learning_rate": 8.116914869971105e-05, + "loss": 1.9382, + "step": 26741 + }, + { + "epoch": 0.5942666666666667, + "grad_norm": 1.8193410634994507, + "learning_rate": 8.116470326739276e-05, + "loss": 1.6036, + "step": 26742 + }, + { + "epoch": 0.5942888888888889, + "grad_norm": 1.7083810567855835, + "learning_rate": 8.116025783507446e-05, + "loss": 1.7845, + "step": 26743 + }, + { + "epoch": 0.5943111111111111, + "grad_norm": 1.6544417142868042, + "learning_rate": 8.115581240275617e-05, + "loss": 1.7903, + "step": 26744 + }, + { + "epoch": 0.5943333333333334, + "grad_norm": 1.5880299806594849, + "learning_rate": 8.115136697043788e-05, + "loss": 1.7088, + "step": 26745 + }, + { + "epoch": 0.5943555555555555, + "grad_norm": 1.7833446264266968, + "learning_rate": 8.114692153811959e-05, + "loss": 2.0259, + "step": 26746 + }, + { + "epoch": 0.5943777777777778, + "grad_norm": 1.6049708127975464, + "learning_rate": 8.114247610580128e-05, + "loss": 1.682, + "step": 26747 + }, + { + "epoch": 0.5944, + "grad_norm": 2.0446813106536865, + "learning_rate": 8.1138030673483e-05, + "loss": 2.0802, + "step": 26748 + }, + { + "epoch": 0.5944222222222222, + "grad_norm": 2.010500192642212, + "learning_rate": 8.113358524116472e-05, + "loss": 2.2086, + "step": 26749 + }, + { + "epoch": 0.5944444444444444, + "grad_norm": 2.6268837451934814, + "learning_rate": 8.112913980884641e-05, + "loss": 1.9425, + "step": 26750 + }, + { + "epoch": 0.5944666666666667, + "grad_norm": 1.661972999572754, + "learning_rate": 8.112469437652812e-05, + "loss": 1.2295, + "step": 26751 + }, + { + "epoch": 0.5944888888888888, + "grad_norm": 1.4331251382827759, + "learning_rate": 8.112024894420983e-05, + "loss": 2.305, + "step": 26752 + }, + { + "epoch": 0.5945111111111111, + "grad_norm": 1.3672977685928345, + "learning_rate": 8.111580351189153e-05, + "loss": 2.128, + "step": 26753 + }, + { + "epoch": 0.5945333333333334, + "grad_norm": 1.5451977252960205, + "learning_rate": 8.111135807957324e-05, + "loss": 2.3535, + "step": 26754 + }, + { + "epoch": 0.5945555555555555, + "grad_norm": 1.458953619003296, + "learning_rate": 8.110691264725495e-05, + "loss": 2.1002, + "step": 26755 + }, + { + "epoch": 0.5945777777777778, + "grad_norm": 1.6705247163772583, + "learning_rate": 8.110246721493666e-05, + "loss": 2.0621, + "step": 26756 + }, + { + "epoch": 0.5946, + "grad_norm": 1.4237427711486816, + "learning_rate": 8.109802178261835e-05, + "loss": 2.0543, + "step": 26757 + }, + { + "epoch": 0.5946222222222223, + "grad_norm": 1.5021898746490479, + "learning_rate": 8.109357635030008e-05, + "loss": 1.9559, + "step": 26758 + }, + { + "epoch": 0.5946444444444444, + "grad_norm": 1.3429763317108154, + "learning_rate": 8.108913091798179e-05, + "loss": 1.5675, + "step": 26759 + }, + { + "epoch": 0.5946666666666667, + "grad_norm": 2.030367851257324, + "learning_rate": 8.108468548566348e-05, + "loss": 2.4005, + "step": 26760 + }, + { + "epoch": 0.5946888888888889, + "grad_norm": 1.536736011505127, + "learning_rate": 8.108024005334519e-05, + "loss": 1.945, + "step": 26761 + }, + { + "epoch": 0.5947111111111111, + "grad_norm": 1.4390614032745361, + "learning_rate": 8.10757946210269e-05, + "loss": 1.7824, + "step": 26762 + }, + { + "epoch": 0.5947333333333333, + "grad_norm": 1.8407021760940552, + "learning_rate": 8.10713491887086e-05, + "loss": 2.376, + "step": 26763 + }, + { + "epoch": 0.5947555555555556, + "grad_norm": 1.6480615139007568, + "learning_rate": 8.106690375639031e-05, + "loss": 2.0424, + "step": 26764 + }, + { + "epoch": 0.5947777777777777, + "grad_norm": 1.8025591373443604, + "learning_rate": 8.106245832407202e-05, + "loss": 1.9488, + "step": 26765 + }, + { + "epoch": 0.5948, + "grad_norm": 1.4991320371627808, + "learning_rate": 8.105801289175373e-05, + "loss": 0.7819, + "step": 26766 + }, + { + "epoch": 0.5948222222222223, + "grad_norm": 1.5261831283569336, + "learning_rate": 8.105356745943544e-05, + "loss": 2.0639, + "step": 26767 + }, + { + "epoch": 0.5948444444444444, + "grad_norm": 1.889927625656128, + "learning_rate": 8.104912202711715e-05, + "loss": 1.8238, + "step": 26768 + }, + { + "epoch": 0.5948666666666667, + "grad_norm": 1.794587254524231, + "learning_rate": 8.104467659479886e-05, + "loss": 2.3345, + "step": 26769 + }, + { + "epoch": 0.5948888888888889, + "grad_norm": 1.5195122957229614, + "learning_rate": 8.104023116248055e-05, + "loss": 1.3055, + "step": 26770 + }, + { + "epoch": 0.5949111111111111, + "grad_norm": 2.0479865074157715, + "learning_rate": 8.103578573016226e-05, + "loss": 2.0186, + "step": 26771 + }, + { + "epoch": 0.5949333333333333, + "grad_norm": 1.65352201461792, + "learning_rate": 8.103134029784397e-05, + "loss": 1.87, + "step": 26772 + }, + { + "epoch": 0.5949555555555556, + "grad_norm": 1.6952732801437378, + "learning_rate": 8.102689486552567e-05, + "loss": 1.7593, + "step": 26773 + }, + { + "epoch": 0.5949777777777778, + "grad_norm": 1.7520891427993774, + "learning_rate": 8.102244943320738e-05, + "loss": 2.2394, + "step": 26774 + }, + { + "epoch": 0.595, + "grad_norm": 1.7385798692703247, + "learning_rate": 8.101800400088909e-05, + "loss": 2.1476, + "step": 26775 + }, + { + "epoch": 0.5950222222222222, + "grad_norm": 1.6520934104919434, + "learning_rate": 8.10135585685708e-05, + "loss": 2.0333, + "step": 26776 + }, + { + "epoch": 0.5950444444444445, + "grad_norm": 1.8042887449264526, + "learning_rate": 8.10091131362525e-05, + "loss": 1.724, + "step": 26777 + }, + { + "epoch": 0.5950666666666666, + "grad_norm": 1.5326650142669678, + "learning_rate": 8.100466770393422e-05, + "loss": 1.7221, + "step": 26778 + }, + { + "epoch": 0.5950888888888889, + "grad_norm": 1.6813794374465942, + "learning_rate": 8.100022227161593e-05, + "loss": 1.9858, + "step": 26779 + }, + { + "epoch": 0.5951111111111111, + "grad_norm": 1.7051405906677246, + "learning_rate": 8.099577683929762e-05, + "loss": 1.801, + "step": 26780 + }, + { + "epoch": 0.5951333333333333, + "grad_norm": 1.1973179578781128, + "learning_rate": 8.099133140697933e-05, + "loss": 0.7489, + "step": 26781 + }, + { + "epoch": 0.5951555555555555, + "grad_norm": 1.8290960788726807, + "learning_rate": 8.098688597466104e-05, + "loss": 1.9373, + "step": 26782 + }, + { + "epoch": 0.5951777777777778, + "grad_norm": 1.7020169496536255, + "learning_rate": 8.098244054234274e-05, + "loss": 1.494, + "step": 26783 + }, + { + "epoch": 0.5952, + "grad_norm": 1.893183708190918, + "learning_rate": 8.097799511002445e-05, + "loss": 2.2692, + "step": 26784 + }, + { + "epoch": 0.5952222222222222, + "grad_norm": 1.7478630542755127, + "learning_rate": 8.097354967770616e-05, + "loss": 1.8236, + "step": 26785 + }, + { + "epoch": 0.5952444444444445, + "grad_norm": 1.9741427898406982, + "learning_rate": 8.096910424538788e-05, + "loss": 2.2287, + "step": 26786 + }, + { + "epoch": 0.5952666666666667, + "grad_norm": 1.6652030944824219, + "learning_rate": 8.096465881306957e-05, + "loss": 1.9211, + "step": 26787 + }, + { + "epoch": 0.5952888888888889, + "grad_norm": 1.9754842519760132, + "learning_rate": 8.096021338075128e-05, + "loss": 1.8296, + "step": 26788 + }, + { + "epoch": 0.5953111111111111, + "grad_norm": 1.5150765180587769, + "learning_rate": 8.0955767948433e-05, + "loss": 1.3967, + "step": 26789 + }, + { + "epoch": 0.5953333333333334, + "grad_norm": 1.212402582168579, + "learning_rate": 8.095132251611469e-05, + "loss": 0.8705, + "step": 26790 + }, + { + "epoch": 0.5953555555555555, + "grad_norm": 1.5794655084609985, + "learning_rate": 8.09468770837964e-05, + "loss": 1.8254, + "step": 26791 + }, + { + "epoch": 0.5953777777777778, + "grad_norm": 1.7709836959838867, + "learning_rate": 8.094243165147811e-05, + "loss": 1.8013, + "step": 26792 + }, + { + "epoch": 0.5954, + "grad_norm": 1.4090503454208374, + "learning_rate": 8.093798621915982e-05, + "loss": 1.6456, + "step": 26793 + }, + { + "epoch": 0.5954222222222222, + "grad_norm": 1.6370187997817993, + "learning_rate": 8.093354078684152e-05, + "loss": 1.549, + "step": 26794 + }, + { + "epoch": 0.5954444444444444, + "grad_norm": 1.7430535554885864, + "learning_rate": 8.092909535452324e-05, + "loss": 1.9142, + "step": 26795 + }, + { + "epoch": 0.5954666666666667, + "grad_norm": 1.836463212966919, + "learning_rate": 8.092464992220495e-05, + "loss": 1.9958, + "step": 26796 + }, + { + "epoch": 0.5954888888888888, + "grad_norm": 1.962188720703125, + "learning_rate": 8.092020448988664e-05, + "loss": 2.1326, + "step": 26797 + }, + { + "epoch": 0.5955111111111111, + "grad_norm": 1.878197431564331, + "learning_rate": 8.091575905756835e-05, + "loss": 2.121, + "step": 26798 + }, + { + "epoch": 0.5955333333333334, + "grad_norm": 1.8089852333068848, + "learning_rate": 8.091131362525006e-05, + "loss": 1.5793, + "step": 26799 + }, + { + "epoch": 0.5955555555555555, + "grad_norm": 1.736074686050415, + "learning_rate": 8.090686819293176e-05, + "loss": 0.9735, + "step": 26800 + }, + { + "epoch": 0.5955777777777778, + "grad_norm": 1.3884100914001465, + "learning_rate": 8.090242276061347e-05, + "loss": 1.0546, + "step": 26801 + }, + { + "epoch": 0.5956, + "grad_norm": 1.429738163948059, + "learning_rate": 8.089797732829518e-05, + "loss": 2.3998, + "step": 26802 + }, + { + "epoch": 0.5956222222222223, + "grad_norm": 1.3541944026947021, + "learning_rate": 8.089353189597689e-05, + "loss": 2.2969, + "step": 26803 + }, + { + "epoch": 0.5956444444444444, + "grad_norm": 1.5858944654464722, + "learning_rate": 8.08890864636586e-05, + "loss": 2.3351, + "step": 26804 + }, + { + "epoch": 0.5956666666666667, + "grad_norm": 1.5756072998046875, + "learning_rate": 8.088464103134031e-05, + "loss": 2.4645, + "step": 26805 + }, + { + "epoch": 0.5956888888888889, + "grad_norm": 1.969294548034668, + "learning_rate": 8.088019559902202e-05, + "loss": 2.1922, + "step": 26806 + }, + { + "epoch": 0.5957111111111111, + "grad_norm": 1.397650122642517, + "learning_rate": 8.087575016670371e-05, + "loss": 2.1963, + "step": 26807 + }, + { + "epoch": 0.5957333333333333, + "grad_norm": 1.8563101291656494, + "learning_rate": 8.087130473438542e-05, + "loss": 1.9459, + "step": 26808 + }, + { + "epoch": 0.5957555555555556, + "grad_norm": 1.721784234046936, + "learning_rate": 8.086685930206713e-05, + "loss": 2.3333, + "step": 26809 + }, + { + "epoch": 0.5957777777777777, + "grad_norm": 1.3764325380325317, + "learning_rate": 8.086241386974883e-05, + "loss": 2.0502, + "step": 26810 + }, + { + "epoch": 0.5958, + "grad_norm": 1.489363670349121, + "learning_rate": 8.085796843743054e-05, + "loss": 2.0739, + "step": 26811 + }, + { + "epoch": 0.5958222222222223, + "grad_norm": 1.897639274597168, + "learning_rate": 8.085352300511225e-05, + "loss": 2.2408, + "step": 26812 + }, + { + "epoch": 0.5958444444444444, + "grad_norm": 1.3819931745529175, + "learning_rate": 8.084907757279396e-05, + "loss": 1.6007, + "step": 26813 + }, + { + "epoch": 0.5958666666666667, + "grad_norm": 1.9608386754989624, + "learning_rate": 8.084463214047567e-05, + "loss": 1.6374, + "step": 26814 + }, + { + "epoch": 0.5958888888888889, + "grad_norm": 1.374138593673706, + "learning_rate": 8.084018670815738e-05, + "loss": 1.6545, + "step": 26815 + }, + { + "epoch": 0.5959111111111111, + "grad_norm": 1.7151923179626465, + "learning_rate": 8.083574127583909e-05, + "loss": 1.9196, + "step": 26816 + }, + { + "epoch": 0.5959333333333333, + "grad_norm": 1.099605917930603, + "learning_rate": 8.083129584352078e-05, + "loss": 1.0795, + "step": 26817 + }, + { + "epoch": 0.5959555555555556, + "grad_norm": 1.6276981830596924, + "learning_rate": 8.082685041120249e-05, + "loss": 1.5975, + "step": 26818 + }, + { + "epoch": 0.5959777777777778, + "grad_norm": 1.174397587776184, + "learning_rate": 8.08224049788842e-05, + "loss": 0.9732, + "step": 26819 + }, + { + "epoch": 0.596, + "grad_norm": 1.9741249084472656, + "learning_rate": 8.08179595465659e-05, + "loss": 2.1333, + "step": 26820 + }, + { + "epoch": 0.5960222222222222, + "grad_norm": 1.6376827955245972, + "learning_rate": 8.081351411424761e-05, + "loss": 1.8163, + "step": 26821 + }, + { + "epoch": 0.5960444444444445, + "grad_norm": 1.5402346849441528, + "learning_rate": 8.080906868192932e-05, + "loss": 1.5948, + "step": 26822 + }, + { + "epoch": 0.5960666666666666, + "grad_norm": 1.6858675479888916, + "learning_rate": 8.080462324961104e-05, + "loss": 2.2483, + "step": 26823 + }, + { + "epoch": 0.5960888888888889, + "grad_norm": 1.6498875617980957, + "learning_rate": 8.080017781729274e-05, + "loss": 1.7901, + "step": 26824 + }, + { + "epoch": 0.5961111111111111, + "grad_norm": 1.6973052024841309, + "learning_rate": 8.079573238497445e-05, + "loss": 2.3223, + "step": 26825 + }, + { + "epoch": 0.5961333333333333, + "grad_norm": 1.4297351837158203, + "learning_rate": 8.079128695265616e-05, + "loss": 1.8346, + "step": 26826 + }, + { + "epoch": 0.5961555555555555, + "grad_norm": 1.4189152717590332, + "learning_rate": 8.078684152033785e-05, + "loss": 1.1534, + "step": 26827 + }, + { + "epoch": 0.5961777777777778, + "grad_norm": 1.5987457036972046, + "learning_rate": 8.078239608801956e-05, + "loss": 1.3904, + "step": 26828 + }, + { + "epoch": 0.5962, + "grad_norm": 1.690332055091858, + "learning_rate": 8.077795065570127e-05, + "loss": 1.9059, + "step": 26829 + }, + { + "epoch": 0.5962222222222222, + "grad_norm": 1.9026538133621216, + "learning_rate": 8.077350522338297e-05, + "loss": 1.976, + "step": 26830 + }, + { + "epoch": 0.5962444444444445, + "grad_norm": 1.623248815536499, + "learning_rate": 8.076905979106468e-05, + "loss": 1.9548, + "step": 26831 + }, + { + "epoch": 0.5962666666666666, + "grad_norm": 1.5890661478042603, + "learning_rate": 8.07646143587464e-05, + "loss": 1.2765, + "step": 26832 + }, + { + "epoch": 0.5962888888888889, + "grad_norm": 2.1992969512939453, + "learning_rate": 8.076016892642811e-05, + "loss": 2.1102, + "step": 26833 + }, + { + "epoch": 0.5963111111111111, + "grad_norm": 1.1912901401519775, + "learning_rate": 8.07557234941098e-05, + "loss": 0.7625, + "step": 26834 + }, + { + "epoch": 0.5963333333333334, + "grad_norm": 1.461975336074829, + "learning_rate": 8.075127806179151e-05, + "loss": 1.4908, + "step": 26835 + }, + { + "epoch": 0.5963555555555555, + "grad_norm": 2.027536630630493, + "learning_rate": 8.074683262947322e-05, + "loss": 2.1627, + "step": 26836 + }, + { + "epoch": 0.5963777777777778, + "grad_norm": 1.6185896396636963, + "learning_rate": 8.074238719715492e-05, + "loss": 1.3973, + "step": 26837 + }, + { + "epoch": 0.5964, + "grad_norm": 1.4064449071884155, + "learning_rate": 8.073794176483663e-05, + "loss": 0.8511, + "step": 26838 + }, + { + "epoch": 0.5964222222222222, + "grad_norm": 1.5895582437515259, + "learning_rate": 8.073349633251834e-05, + "loss": 1.9084, + "step": 26839 + }, + { + "epoch": 0.5964444444444444, + "grad_norm": 1.683510661125183, + "learning_rate": 8.072905090020005e-05, + "loss": 1.9935, + "step": 26840 + }, + { + "epoch": 0.5964666666666667, + "grad_norm": 1.5518105030059814, + "learning_rate": 8.072460546788176e-05, + "loss": 1.7772, + "step": 26841 + }, + { + "epoch": 0.5964888888888888, + "grad_norm": 1.6962943077087402, + "learning_rate": 8.072016003556347e-05, + "loss": 1.6512, + "step": 26842 + }, + { + "epoch": 0.5965111111111111, + "grad_norm": 1.9162324666976929, + "learning_rate": 8.071571460324518e-05, + "loss": 2.0387, + "step": 26843 + }, + { + "epoch": 0.5965333333333334, + "grad_norm": 1.843002438545227, + "learning_rate": 8.071126917092687e-05, + "loss": 1.7871, + "step": 26844 + }, + { + "epoch": 0.5965555555555555, + "grad_norm": 2.334334373474121, + "learning_rate": 8.070682373860858e-05, + "loss": 2.2448, + "step": 26845 + }, + { + "epoch": 0.5965777777777778, + "grad_norm": 1.9153010845184326, + "learning_rate": 8.07023783062903e-05, + "loss": 1.8737, + "step": 26846 + }, + { + "epoch": 0.5966, + "grad_norm": 1.7890713214874268, + "learning_rate": 8.069793287397199e-05, + "loss": 1.7194, + "step": 26847 + }, + { + "epoch": 0.5966222222222223, + "grad_norm": 2.0024993419647217, + "learning_rate": 8.06934874416537e-05, + "loss": 1.9438, + "step": 26848 + }, + { + "epoch": 0.5966444444444444, + "grad_norm": 1.8416032791137695, + "learning_rate": 8.068904200933541e-05, + "loss": 1.8522, + "step": 26849 + }, + { + "epoch": 0.5966666666666667, + "grad_norm": 1.50460946559906, + "learning_rate": 8.068459657701712e-05, + "loss": 1.2711, + "step": 26850 + }, + { + "epoch": 0.5966888888888889, + "grad_norm": 1.4480715990066528, + "learning_rate": 8.068015114469883e-05, + "loss": 2.5689, + "step": 26851 + }, + { + "epoch": 0.5967111111111111, + "grad_norm": 1.4781830310821533, + "learning_rate": 8.067570571238054e-05, + "loss": 2.4561, + "step": 26852 + }, + { + "epoch": 0.5967333333333333, + "grad_norm": 1.2978445291519165, + "learning_rate": 8.067126028006225e-05, + "loss": 1.1158, + "step": 26853 + }, + { + "epoch": 0.5967555555555556, + "grad_norm": 1.3282469511032104, + "learning_rate": 8.066681484774394e-05, + "loss": 2.0565, + "step": 26854 + }, + { + "epoch": 0.5967777777777777, + "grad_norm": 1.4978200197219849, + "learning_rate": 8.066236941542565e-05, + "loss": 1.8874, + "step": 26855 + }, + { + "epoch": 0.5968, + "grad_norm": 1.4156322479248047, + "learning_rate": 8.065792398310736e-05, + "loss": 2.0859, + "step": 26856 + }, + { + "epoch": 0.5968222222222223, + "grad_norm": 1.2539492845535278, + "learning_rate": 8.065347855078906e-05, + "loss": 1.0677, + "step": 26857 + }, + { + "epoch": 0.5968444444444444, + "grad_norm": 2.2956056594848633, + "learning_rate": 8.064903311847077e-05, + "loss": 2.5987, + "step": 26858 + }, + { + "epoch": 0.5968666666666667, + "grad_norm": 1.740151286125183, + "learning_rate": 8.064458768615248e-05, + "loss": 2.402, + "step": 26859 + }, + { + "epoch": 0.5968888888888889, + "grad_norm": 1.8559595346450806, + "learning_rate": 8.06401422538342e-05, + "loss": 1.8741, + "step": 26860 + }, + { + "epoch": 0.5969111111111111, + "grad_norm": 1.9488989114761353, + "learning_rate": 8.06356968215159e-05, + "loss": 2.3833, + "step": 26861 + }, + { + "epoch": 0.5969333333333333, + "grad_norm": 1.630929946899414, + "learning_rate": 8.06312513891976e-05, + "loss": 2.1018, + "step": 26862 + }, + { + "epoch": 0.5969555555555556, + "grad_norm": 1.4589755535125732, + "learning_rate": 8.062680595687932e-05, + "loss": 2.262, + "step": 26863 + }, + { + "epoch": 0.5969777777777778, + "grad_norm": 1.5186175107955933, + "learning_rate": 8.062236052456101e-05, + "loss": 2.0959, + "step": 26864 + }, + { + "epoch": 0.597, + "grad_norm": 1.5367112159729004, + "learning_rate": 8.061791509224272e-05, + "loss": 2.2553, + "step": 26865 + }, + { + "epoch": 0.5970222222222222, + "grad_norm": 1.5911413431167603, + "learning_rate": 8.061346965992443e-05, + "loss": 2.0367, + "step": 26866 + }, + { + "epoch": 0.5970444444444445, + "grad_norm": 1.7494949102401733, + "learning_rate": 8.060902422760613e-05, + "loss": 2.2267, + "step": 26867 + }, + { + "epoch": 0.5970666666666666, + "grad_norm": 1.4874684810638428, + "learning_rate": 8.060457879528784e-05, + "loss": 1.7659, + "step": 26868 + }, + { + "epoch": 0.5970888888888889, + "grad_norm": 1.7232106924057007, + "learning_rate": 8.060013336296956e-05, + "loss": 2.2392, + "step": 26869 + }, + { + "epoch": 0.5971111111111111, + "grad_norm": 1.575861930847168, + "learning_rate": 8.059568793065127e-05, + "loss": 1.9078, + "step": 26870 + }, + { + "epoch": 0.5971333333333333, + "grad_norm": 1.568634033203125, + "learning_rate": 8.059124249833297e-05, + "loss": 1.5753, + "step": 26871 + }, + { + "epoch": 0.5971555555555556, + "grad_norm": 1.902392864227295, + "learning_rate": 8.058679706601468e-05, + "loss": 2.2982, + "step": 26872 + }, + { + "epoch": 0.5971777777777778, + "grad_norm": 1.3850717544555664, + "learning_rate": 8.058235163369639e-05, + "loss": 1.8568, + "step": 26873 + }, + { + "epoch": 0.5972, + "grad_norm": 1.8104287385940552, + "learning_rate": 8.057790620137808e-05, + "loss": 1.9714, + "step": 26874 + }, + { + "epoch": 0.5972222222222222, + "grad_norm": 1.636537790298462, + "learning_rate": 8.057346076905979e-05, + "loss": 2.0433, + "step": 26875 + }, + { + "epoch": 0.5972444444444445, + "grad_norm": 1.733601689338684, + "learning_rate": 8.05690153367415e-05, + "loss": 1.7444, + "step": 26876 + }, + { + "epoch": 0.5972666666666666, + "grad_norm": 1.402464509010315, + "learning_rate": 8.056456990442321e-05, + "loss": 1.478, + "step": 26877 + }, + { + "epoch": 0.5972888888888889, + "grad_norm": 1.607865571975708, + "learning_rate": 8.056012447210492e-05, + "loss": 1.7318, + "step": 26878 + }, + { + "epoch": 0.5973111111111111, + "grad_norm": 1.7006738185882568, + "learning_rate": 8.055567903978663e-05, + "loss": 2.1914, + "step": 26879 + }, + { + "epoch": 0.5973333333333334, + "grad_norm": 1.8816903829574585, + "learning_rate": 8.055123360746834e-05, + "loss": 1.2202, + "step": 26880 + }, + { + "epoch": 0.5973555555555555, + "grad_norm": 1.9753090143203735, + "learning_rate": 8.054678817515004e-05, + "loss": 1.8315, + "step": 26881 + }, + { + "epoch": 0.5973777777777778, + "grad_norm": 1.8738881349563599, + "learning_rate": 8.054234274283174e-05, + "loss": 2.2523, + "step": 26882 + }, + { + "epoch": 0.5974, + "grad_norm": 1.477479338645935, + "learning_rate": 8.053789731051345e-05, + "loss": 1.6772, + "step": 26883 + }, + { + "epoch": 0.5974222222222222, + "grad_norm": 1.8067989349365234, + "learning_rate": 8.053345187819515e-05, + "loss": 1.8221, + "step": 26884 + }, + { + "epoch": 0.5974444444444444, + "grad_norm": 1.6162313222885132, + "learning_rate": 8.052900644587686e-05, + "loss": 1.7678, + "step": 26885 + }, + { + "epoch": 0.5974666666666667, + "grad_norm": 1.8906008005142212, + "learning_rate": 8.052456101355857e-05, + "loss": 1.9851, + "step": 26886 + }, + { + "epoch": 0.5974888888888888, + "grad_norm": 1.5765184164047241, + "learning_rate": 8.052011558124028e-05, + "loss": 1.7963, + "step": 26887 + }, + { + "epoch": 0.5975111111111111, + "grad_norm": 1.4867256879806519, + "learning_rate": 8.051567014892199e-05, + "loss": 1.7625, + "step": 26888 + }, + { + "epoch": 0.5975333333333334, + "grad_norm": 1.643934965133667, + "learning_rate": 8.05112247166037e-05, + "loss": 1.8958, + "step": 26889 + }, + { + "epoch": 0.5975555555555555, + "grad_norm": 1.6014454364776611, + "learning_rate": 8.050677928428541e-05, + "loss": 1.7788, + "step": 26890 + }, + { + "epoch": 0.5975777777777778, + "grad_norm": 3.079517126083374, + "learning_rate": 8.05023338519671e-05, + "loss": 1.5165, + "step": 26891 + }, + { + "epoch": 0.5976, + "grad_norm": 1.854453682899475, + "learning_rate": 8.049788841964881e-05, + "loss": 1.7971, + "step": 26892 + }, + { + "epoch": 0.5976222222222223, + "grad_norm": 1.4640413522720337, + "learning_rate": 8.049344298733052e-05, + "loss": 1.5388, + "step": 26893 + }, + { + "epoch": 0.5976444444444444, + "grad_norm": 1.8597694635391235, + "learning_rate": 8.048899755501222e-05, + "loss": 2.039, + "step": 26894 + }, + { + "epoch": 0.5976666666666667, + "grad_norm": 2.2400176525115967, + "learning_rate": 8.048455212269393e-05, + "loss": 2.0969, + "step": 26895 + }, + { + "epoch": 0.5976888888888889, + "grad_norm": 1.6783058643341064, + "learning_rate": 8.048010669037564e-05, + "loss": 1.7719, + "step": 26896 + }, + { + "epoch": 0.5977111111111111, + "grad_norm": 1.8262856006622314, + "learning_rate": 8.047566125805736e-05, + "loss": 1.6248, + "step": 26897 + }, + { + "epoch": 0.5977333333333333, + "grad_norm": 1.9277127981185913, + "learning_rate": 8.047121582573906e-05, + "loss": 1.6219, + "step": 26898 + }, + { + "epoch": 0.5977555555555556, + "grad_norm": 1.7251594066619873, + "learning_rate": 8.046677039342077e-05, + "loss": 1.7427, + "step": 26899 + }, + { + "epoch": 0.5977777777777777, + "grad_norm": 1.6436690092086792, + "learning_rate": 8.046232496110248e-05, + "loss": 1.4936, + "step": 26900 + }, + { + "epoch": 0.5978, + "grad_norm": 1.403838872909546, + "learning_rate": 8.045787952878417e-05, + "loss": 2.6362, + "step": 26901 + }, + { + "epoch": 0.5978222222222223, + "grad_norm": 1.383715271949768, + "learning_rate": 8.045343409646588e-05, + "loss": 2.1609, + "step": 26902 + }, + { + "epoch": 0.5978444444444444, + "grad_norm": 1.8453351259231567, + "learning_rate": 8.044898866414759e-05, + "loss": 2.2438, + "step": 26903 + }, + { + "epoch": 0.5978666666666667, + "grad_norm": 1.7187947034835815, + "learning_rate": 8.044454323182929e-05, + "loss": 2.2648, + "step": 26904 + }, + { + "epoch": 0.5978888888888889, + "grad_norm": 1.3632493019104004, + "learning_rate": 8.044009779951101e-05, + "loss": 2.1545, + "step": 26905 + }, + { + "epoch": 0.5979111111111111, + "grad_norm": 1.9141936302185059, + "learning_rate": 8.043565236719272e-05, + "loss": 1.6925, + "step": 26906 + }, + { + "epoch": 0.5979333333333333, + "grad_norm": 1.5372122526168823, + "learning_rate": 8.043120693487443e-05, + "loss": 2.392, + "step": 26907 + }, + { + "epoch": 0.5979555555555556, + "grad_norm": 1.2757940292358398, + "learning_rate": 8.042676150255613e-05, + "loss": 1.7866, + "step": 26908 + }, + { + "epoch": 0.5979777777777778, + "grad_norm": 1.9118685722351074, + "learning_rate": 8.042231607023784e-05, + "loss": 2.2627, + "step": 26909 + }, + { + "epoch": 0.598, + "grad_norm": 1.775957703590393, + "learning_rate": 8.041787063791955e-05, + "loss": 2.1612, + "step": 26910 + }, + { + "epoch": 0.5980222222222222, + "grad_norm": 1.5586014986038208, + "learning_rate": 8.041342520560124e-05, + "loss": 2.2685, + "step": 26911 + }, + { + "epoch": 0.5980444444444445, + "grad_norm": 1.7741633653640747, + "learning_rate": 8.040897977328295e-05, + "loss": 1.98, + "step": 26912 + }, + { + "epoch": 0.5980666666666666, + "grad_norm": 1.7802754640579224, + "learning_rate": 8.040453434096466e-05, + "loss": 2.5607, + "step": 26913 + }, + { + "epoch": 0.5980888888888889, + "grad_norm": 1.3747230768203735, + "learning_rate": 8.040008890864637e-05, + "loss": 1.3905, + "step": 26914 + }, + { + "epoch": 0.5981111111111111, + "grad_norm": 1.478218913078308, + "learning_rate": 8.039564347632808e-05, + "loss": 1.9997, + "step": 26915 + }, + { + "epoch": 0.5981333333333333, + "grad_norm": 2.119713544845581, + "learning_rate": 8.039119804400979e-05, + "loss": 1.7586, + "step": 26916 + }, + { + "epoch": 0.5981555555555556, + "grad_norm": 2.079786777496338, + "learning_rate": 8.03867526116915e-05, + "loss": 2.2577, + "step": 26917 + }, + { + "epoch": 0.5981777777777778, + "grad_norm": 1.048282504081726, + "learning_rate": 8.03823071793732e-05, + "loss": 0.9193, + "step": 26918 + }, + { + "epoch": 0.5982, + "grad_norm": 1.0226231813430786, + "learning_rate": 8.03778617470549e-05, + "loss": 1.0035, + "step": 26919 + }, + { + "epoch": 0.5982222222222222, + "grad_norm": 1.470382571220398, + "learning_rate": 8.037341631473662e-05, + "loss": 1.7619, + "step": 26920 + }, + { + "epoch": 0.5982444444444445, + "grad_norm": 1.536065697669983, + "learning_rate": 8.036897088241831e-05, + "loss": 1.8363, + "step": 26921 + }, + { + "epoch": 0.5982666666666666, + "grad_norm": 1.9655731916427612, + "learning_rate": 8.036452545010002e-05, + "loss": 2.2304, + "step": 26922 + }, + { + "epoch": 0.5982888888888889, + "grad_norm": 1.4935956001281738, + "learning_rate": 8.036008001778173e-05, + "loss": 1.5833, + "step": 26923 + }, + { + "epoch": 0.5983111111111111, + "grad_norm": 1.8283042907714844, + "learning_rate": 8.035563458546344e-05, + "loss": 2.0601, + "step": 26924 + }, + { + "epoch": 0.5983333333333334, + "grad_norm": 1.7895570993423462, + "learning_rate": 8.035118915314515e-05, + "loss": 2.0856, + "step": 26925 + }, + { + "epoch": 0.5983555555555555, + "grad_norm": 1.5136226415634155, + "learning_rate": 8.034674372082686e-05, + "loss": 2.1683, + "step": 26926 + }, + { + "epoch": 0.5983777777777778, + "grad_norm": 1.888890027999878, + "learning_rate": 8.034229828850857e-05, + "loss": 1.9483, + "step": 26927 + }, + { + "epoch": 0.5984, + "grad_norm": 1.9367108345031738, + "learning_rate": 8.033785285619027e-05, + "loss": 2.0916, + "step": 26928 + }, + { + "epoch": 0.5984222222222222, + "grad_norm": 1.6923370361328125, + "learning_rate": 8.033340742387197e-05, + "loss": 1.8858, + "step": 26929 + }, + { + "epoch": 0.5984444444444444, + "grad_norm": 1.7384182214736938, + "learning_rate": 8.032896199155368e-05, + "loss": 2.0751, + "step": 26930 + }, + { + "epoch": 0.5984666666666667, + "grad_norm": 1.5326626300811768, + "learning_rate": 8.032451655923538e-05, + "loss": 1.0639, + "step": 26931 + }, + { + "epoch": 0.5984888888888888, + "grad_norm": 1.4723471403121948, + "learning_rate": 8.032007112691709e-05, + "loss": 1.4927, + "step": 26932 + }, + { + "epoch": 0.5985111111111111, + "grad_norm": 1.6398212909698486, + "learning_rate": 8.03156256945988e-05, + "loss": 1.9547, + "step": 26933 + }, + { + "epoch": 0.5985333333333334, + "grad_norm": 1.6803056001663208, + "learning_rate": 8.031118026228051e-05, + "loss": 1.9571, + "step": 26934 + }, + { + "epoch": 0.5985555555555555, + "grad_norm": 1.5296698808670044, + "learning_rate": 8.030673482996222e-05, + "loss": 1.4227, + "step": 26935 + }, + { + "epoch": 0.5985777777777778, + "grad_norm": 1.6332881450653076, + "learning_rate": 8.030228939764393e-05, + "loss": 2.2374, + "step": 26936 + }, + { + "epoch": 0.5986, + "grad_norm": 1.9761298894882202, + "learning_rate": 8.029784396532564e-05, + "loss": 2.4395, + "step": 26937 + }, + { + "epoch": 0.5986222222222222, + "grad_norm": 1.8694782257080078, + "learning_rate": 8.029339853300733e-05, + "loss": 1.6662, + "step": 26938 + }, + { + "epoch": 0.5986444444444444, + "grad_norm": 1.7730891704559326, + "learning_rate": 8.028895310068904e-05, + "loss": 2.1371, + "step": 26939 + }, + { + "epoch": 0.5986666666666667, + "grad_norm": 1.6914303302764893, + "learning_rate": 8.028450766837075e-05, + "loss": 1.9322, + "step": 26940 + }, + { + "epoch": 0.5986888888888889, + "grad_norm": 1.6608036756515503, + "learning_rate": 8.028006223605245e-05, + "loss": 1.8645, + "step": 26941 + }, + { + "epoch": 0.5987111111111111, + "grad_norm": 1.7254583835601807, + "learning_rate": 8.027561680373417e-05, + "loss": 1.6468, + "step": 26942 + }, + { + "epoch": 0.5987333333333333, + "grad_norm": 1.714959979057312, + "learning_rate": 8.027117137141588e-05, + "loss": 1.8889, + "step": 26943 + }, + { + "epoch": 0.5987555555555556, + "grad_norm": 1.6050472259521484, + "learning_rate": 8.026672593909758e-05, + "loss": 1.8922, + "step": 26944 + }, + { + "epoch": 0.5987777777777777, + "grad_norm": 1.7325646877288818, + "learning_rate": 8.026228050677929e-05, + "loss": 1.9524, + "step": 26945 + }, + { + "epoch": 0.5988, + "grad_norm": 1.3277366161346436, + "learning_rate": 8.0257835074461e-05, + "loss": 0.8642, + "step": 26946 + }, + { + "epoch": 0.5988222222222223, + "grad_norm": 1.1544034481048584, + "learning_rate": 8.025338964214271e-05, + "loss": 0.9541, + "step": 26947 + }, + { + "epoch": 0.5988444444444444, + "grad_norm": 1.8778469562530518, + "learning_rate": 8.02489442098244e-05, + "loss": 2.0342, + "step": 26948 + }, + { + "epoch": 0.5988666666666667, + "grad_norm": 0.2326299399137497, + "learning_rate": 8.024449877750611e-05, + "loss": 0.0492, + "step": 26949 + }, + { + "epoch": 0.5988888888888889, + "grad_norm": 2.3978614807128906, + "learning_rate": 8.024005334518782e-05, + "loss": 2.062, + "step": 26950 + }, + { + "epoch": 0.5989111111111111, + "grad_norm": 1.5422933101654053, + "learning_rate": 8.023560791286953e-05, + "loss": 2.7222, + "step": 26951 + }, + { + "epoch": 0.5989333333333333, + "grad_norm": 1.4547905921936035, + "learning_rate": 8.023116248055124e-05, + "loss": 1.699, + "step": 26952 + }, + { + "epoch": 0.5989555555555556, + "grad_norm": 1.5786837339401245, + "learning_rate": 8.022671704823295e-05, + "loss": 3.1059, + "step": 26953 + }, + { + "epoch": 0.5989777777777778, + "grad_norm": 1.6623071432113647, + "learning_rate": 8.022227161591466e-05, + "loss": 2.5863, + "step": 26954 + }, + { + "epoch": 0.599, + "grad_norm": 1.663651704788208, + "learning_rate": 8.021782618359636e-05, + "loss": 1.9413, + "step": 26955 + }, + { + "epoch": 0.5990222222222222, + "grad_norm": 1.5665733814239502, + "learning_rate": 8.021338075127807e-05, + "loss": 2.3541, + "step": 26956 + }, + { + "epoch": 0.5990444444444445, + "grad_norm": 1.2850035429000854, + "learning_rate": 8.020893531895978e-05, + "loss": 2.0893, + "step": 26957 + }, + { + "epoch": 0.5990666666666666, + "grad_norm": 1.481261968612671, + "learning_rate": 8.020448988664147e-05, + "loss": 2.3503, + "step": 26958 + }, + { + "epoch": 0.5990888888888889, + "grad_norm": 1.7478474378585815, + "learning_rate": 8.020004445432318e-05, + "loss": 1.5484, + "step": 26959 + }, + { + "epoch": 0.5991111111111111, + "grad_norm": 1.60469651222229, + "learning_rate": 8.019559902200489e-05, + "loss": 2.0309, + "step": 26960 + }, + { + "epoch": 0.5991333333333333, + "grad_norm": 1.6925408840179443, + "learning_rate": 8.01911535896866e-05, + "loss": 0.9794, + "step": 26961 + }, + { + "epoch": 0.5991555555555556, + "grad_norm": 1.7684733867645264, + "learning_rate": 8.018670815736831e-05, + "loss": 2.2351, + "step": 26962 + }, + { + "epoch": 0.5991777777777778, + "grad_norm": 1.7226217985153198, + "learning_rate": 8.018226272505002e-05, + "loss": 1.963, + "step": 26963 + }, + { + "epoch": 0.5992, + "grad_norm": 1.6714963912963867, + "learning_rate": 8.017781729273173e-05, + "loss": 2.0087, + "step": 26964 + }, + { + "epoch": 0.5992222222222222, + "grad_norm": 1.5926263332366943, + "learning_rate": 8.017337186041343e-05, + "loss": 2.0484, + "step": 26965 + }, + { + "epoch": 0.5992444444444445, + "grad_norm": 1.4017750024795532, + "learning_rate": 8.016892642809514e-05, + "loss": 1.2894, + "step": 26966 + }, + { + "epoch": 0.5992666666666666, + "grad_norm": 1.0954105854034424, + "learning_rate": 8.016448099577685e-05, + "loss": 0.9788, + "step": 26967 + }, + { + "epoch": 0.5992888888888889, + "grad_norm": 1.7288366556167603, + "learning_rate": 8.016003556345854e-05, + "loss": 1.8245, + "step": 26968 + }, + { + "epoch": 0.5993111111111111, + "grad_norm": 1.6629772186279297, + "learning_rate": 8.015559013114025e-05, + "loss": 2.171, + "step": 26969 + }, + { + "epoch": 0.5993333333333334, + "grad_norm": 1.8875101804733276, + "learning_rate": 8.015114469882196e-05, + "loss": 2.114, + "step": 26970 + }, + { + "epoch": 0.5993555555555555, + "grad_norm": 1.5006781816482544, + "learning_rate": 8.014669926650367e-05, + "loss": 2.1082, + "step": 26971 + }, + { + "epoch": 0.5993777777777778, + "grad_norm": 1.7864024639129639, + "learning_rate": 8.014225383418538e-05, + "loss": 2.197, + "step": 26972 + }, + { + "epoch": 0.5994, + "grad_norm": 1.6741145849227905, + "learning_rate": 8.013780840186709e-05, + "loss": 1.6411, + "step": 26973 + }, + { + "epoch": 0.5994222222222222, + "grad_norm": 1.6694917678833008, + "learning_rate": 8.01333629695488e-05, + "loss": 1.8141, + "step": 26974 + }, + { + "epoch": 0.5994444444444444, + "grad_norm": 1.9536409378051758, + "learning_rate": 8.01289175372305e-05, + "loss": 2.1622, + "step": 26975 + }, + { + "epoch": 0.5994666666666667, + "grad_norm": 1.9600883722305298, + "learning_rate": 8.01244721049122e-05, + "loss": 1.9236, + "step": 26976 + }, + { + "epoch": 0.5994888888888888, + "grad_norm": 2.1006920337677, + "learning_rate": 8.012002667259391e-05, + "loss": 2.3782, + "step": 26977 + }, + { + "epoch": 0.5995111111111111, + "grad_norm": 1.7578033208847046, + "learning_rate": 8.011558124027561e-05, + "loss": 2.1157, + "step": 26978 + }, + { + "epoch": 0.5995333333333334, + "grad_norm": 1.7216506004333496, + "learning_rate": 8.011113580795733e-05, + "loss": 1.7657, + "step": 26979 + }, + { + "epoch": 0.5995555555555555, + "grad_norm": 1.7706249952316284, + "learning_rate": 8.010669037563904e-05, + "loss": 2.1157, + "step": 26980 + }, + { + "epoch": 0.5995777777777778, + "grad_norm": 1.936853051185608, + "learning_rate": 8.010224494332074e-05, + "loss": 1.9105, + "step": 26981 + }, + { + "epoch": 0.5996, + "grad_norm": 1.5771408081054688, + "learning_rate": 8.009779951100245e-05, + "loss": 1.8554, + "step": 26982 + }, + { + "epoch": 0.5996222222222222, + "grad_norm": 1.5390323400497437, + "learning_rate": 8.009335407868416e-05, + "loss": 1.734, + "step": 26983 + }, + { + "epoch": 0.5996444444444444, + "grad_norm": 1.6512367725372314, + "learning_rate": 8.008890864636587e-05, + "loss": 1.7134, + "step": 26984 + }, + { + "epoch": 0.5996666666666667, + "grad_norm": 2.0488808155059814, + "learning_rate": 8.008446321404756e-05, + "loss": 1.7957, + "step": 26985 + }, + { + "epoch": 0.5996888888888889, + "grad_norm": 1.9845051765441895, + "learning_rate": 8.008001778172927e-05, + "loss": 1.9532, + "step": 26986 + }, + { + "epoch": 0.5997111111111111, + "grad_norm": 1.6982089281082153, + "learning_rate": 8.007557234941098e-05, + "loss": 2.023, + "step": 26987 + }, + { + "epoch": 0.5997333333333333, + "grad_norm": 1.7800291776657104, + "learning_rate": 8.00711269170927e-05, + "loss": 1.9438, + "step": 26988 + }, + { + "epoch": 0.5997555555555556, + "grad_norm": 3.0561060905456543, + "learning_rate": 8.00666814847744e-05, + "loss": 2.0598, + "step": 26989 + }, + { + "epoch": 0.5997777777777777, + "grad_norm": 1.765212059020996, + "learning_rate": 8.006223605245611e-05, + "loss": 1.9058, + "step": 26990 + }, + { + "epoch": 0.5998, + "grad_norm": 1.9671481847763062, + "learning_rate": 8.005779062013781e-05, + "loss": 2.1272, + "step": 26991 + }, + { + "epoch": 0.5998222222222223, + "grad_norm": 1.9428707361221313, + "learning_rate": 8.005334518781952e-05, + "loss": 1.9046, + "step": 26992 + }, + { + "epoch": 0.5998444444444444, + "grad_norm": 1.8527716398239136, + "learning_rate": 8.004889975550123e-05, + "loss": 1.847, + "step": 26993 + }, + { + "epoch": 0.5998666666666667, + "grad_norm": 1.6317662000656128, + "learning_rate": 8.004445432318294e-05, + "loss": 1.5198, + "step": 26994 + }, + { + "epoch": 0.5998888888888889, + "grad_norm": 1.8692249059677124, + "learning_rate": 8.004000889086463e-05, + "loss": 1.895, + "step": 26995 + }, + { + "epoch": 0.5999111111111111, + "grad_norm": 1.947574496269226, + "learning_rate": 8.003556345854634e-05, + "loss": 2.2449, + "step": 26996 + }, + { + "epoch": 0.5999333333333333, + "grad_norm": 1.7928720712661743, + "learning_rate": 8.003111802622805e-05, + "loss": 1.9345, + "step": 26997 + }, + { + "epoch": 0.5999555555555556, + "grad_norm": 2.0767455101013184, + "learning_rate": 8.002667259390976e-05, + "loss": 2.2322, + "step": 26998 + }, + { + "epoch": 0.5999777777777778, + "grad_norm": 1.093999981880188, + "learning_rate": 8.002222716159147e-05, + "loss": 0.7272, + "step": 26999 + }, + { + "epoch": 0.6, + "grad_norm": 1.6920832395553589, + "learning_rate": 8.001778172927318e-05, + "loss": 1.0741, + "step": 27000 + }, + { + "epoch": 0.6, + "eval_loss": 1.8736392259597778, + "eval_runtime": 2159.761, + "eval_samples_per_second": 4.63, + "eval_steps_per_second": 4.63, + "step": 27000 + }, + { + "epoch": 0.6000222222222222, + "grad_norm": 1.6577028036117554, + "learning_rate": 8.001333629695488e-05, + "loss": 2.6101, + "step": 27001 + }, + { + "epoch": 0.6000444444444445, + "grad_norm": 1.6142709255218506, + "learning_rate": 8.000889086463659e-05, + "loss": 2.748, + "step": 27002 + }, + { + "epoch": 0.6000666666666666, + "grad_norm": 1.508734941482544, + "learning_rate": 8.00044454323183e-05, + "loss": 2.6064, + "step": 27003 + }, + { + "epoch": 0.6000888888888889, + "grad_norm": 1.804945945739746, + "learning_rate": 8e-05, + "loss": 2.5162, + "step": 27004 + }, + { + "epoch": 0.6001111111111112, + "grad_norm": 1.4495036602020264, + "learning_rate": 7.99955545676817e-05, + "loss": 2.216, + "step": 27005 + }, + { + "epoch": 0.6001333333333333, + "grad_norm": 1.4844017028808594, + "learning_rate": 7.999110913536341e-05, + "loss": 2.0562, + "step": 27006 + }, + { + "epoch": 0.6001555555555556, + "grad_norm": 1.463944673538208, + "learning_rate": 7.998666370304512e-05, + "loss": 2.2244, + "step": 27007 + }, + { + "epoch": 0.6001777777777778, + "grad_norm": 1.755519151687622, + "learning_rate": 7.998221827072683e-05, + "loss": 2.3888, + "step": 27008 + }, + { + "epoch": 0.6002, + "grad_norm": 1.373860239982605, + "learning_rate": 7.997777283840854e-05, + "loss": 1.5492, + "step": 27009 + }, + { + "epoch": 0.6002222222222222, + "grad_norm": 1.4735991954803467, + "learning_rate": 7.997332740609025e-05, + "loss": 1.7861, + "step": 27010 + }, + { + "epoch": 0.6002444444444445, + "grad_norm": 1.7138311862945557, + "learning_rate": 7.996888197377196e-05, + "loss": 2.1563, + "step": 27011 + }, + { + "epoch": 0.6002666666666666, + "grad_norm": 1.6711690425872803, + "learning_rate": 7.996443654145366e-05, + "loss": 2.3283, + "step": 27012 + }, + { + "epoch": 0.6002888888888889, + "grad_norm": 2.0114669799804688, + "learning_rate": 7.995999110913537e-05, + "loss": 2.2527, + "step": 27013 + }, + { + "epoch": 0.6003111111111111, + "grad_norm": 1.8923662900924683, + "learning_rate": 7.995554567681708e-05, + "loss": 2.3929, + "step": 27014 + }, + { + "epoch": 0.6003333333333334, + "grad_norm": 1.4820717573165894, + "learning_rate": 7.995110024449877e-05, + "loss": 1.7815, + "step": 27015 + }, + { + "epoch": 0.6003555555555555, + "grad_norm": 1.8462886810302734, + "learning_rate": 7.99466548121805e-05, + "loss": 2.6735, + "step": 27016 + }, + { + "epoch": 0.6003777777777778, + "grad_norm": 1.7554908990859985, + "learning_rate": 7.99422093798622e-05, + "loss": 2.0213, + "step": 27017 + }, + { + "epoch": 0.6004, + "grad_norm": 2.3965280055999756, + "learning_rate": 7.99377639475439e-05, + "loss": 1.7916, + "step": 27018 + }, + { + "epoch": 0.6004222222222222, + "grad_norm": 1.4135266542434692, + "learning_rate": 7.993331851522561e-05, + "loss": 1.7435, + "step": 27019 + }, + { + "epoch": 0.6004444444444444, + "grad_norm": 1.4737776517868042, + "learning_rate": 7.992887308290732e-05, + "loss": 1.3768, + "step": 27020 + }, + { + "epoch": 0.6004666666666667, + "grad_norm": 1.8706743717193604, + "learning_rate": 7.992442765058903e-05, + "loss": 1.7092, + "step": 27021 + }, + { + "epoch": 0.6004888888888888, + "grad_norm": 1.7776674032211304, + "learning_rate": 7.991998221827073e-05, + "loss": 2.5811, + "step": 27022 + }, + { + "epoch": 0.6005111111111111, + "grad_norm": 1.6870381832122803, + "learning_rate": 7.991553678595244e-05, + "loss": 1.5549, + "step": 27023 + }, + { + "epoch": 0.6005333333333334, + "grad_norm": 1.9651492834091187, + "learning_rate": 7.991109135363414e-05, + "loss": 2.1999, + "step": 27024 + }, + { + "epoch": 0.6005555555555555, + "grad_norm": 1.5777024030685425, + "learning_rate": 7.990664592131585e-05, + "loss": 1.7351, + "step": 27025 + }, + { + "epoch": 0.6005777777777778, + "grad_norm": 1.9379805326461792, + "learning_rate": 7.990220048899756e-05, + "loss": 2.4563, + "step": 27026 + }, + { + "epoch": 0.6006, + "grad_norm": 1.9654110670089722, + "learning_rate": 7.989775505667927e-05, + "loss": 2.2118, + "step": 27027 + }, + { + "epoch": 0.6006222222222222, + "grad_norm": 1.7272353172302246, + "learning_rate": 7.989330962436097e-05, + "loss": 2.0969, + "step": 27028 + }, + { + "epoch": 0.6006444444444444, + "grad_norm": 1.3812412023544312, + "learning_rate": 7.988886419204268e-05, + "loss": 0.7072, + "step": 27029 + }, + { + "epoch": 0.6006666666666667, + "grad_norm": 1.810765266418457, + "learning_rate": 7.988441875972439e-05, + "loss": 1.5445, + "step": 27030 + }, + { + "epoch": 0.6006888888888889, + "grad_norm": 1.8113470077514648, + "learning_rate": 7.98799733274061e-05, + "loss": 1.9716, + "step": 27031 + }, + { + "epoch": 0.6007111111111111, + "grad_norm": 1.2004268169403076, + "learning_rate": 7.98755278950878e-05, + "loss": 0.9292, + "step": 27032 + }, + { + "epoch": 0.6007333333333333, + "grad_norm": 1.4961955547332764, + "learning_rate": 7.98710824627695e-05, + "loss": 1.6421, + "step": 27033 + }, + { + "epoch": 0.6007555555555556, + "grad_norm": 1.735058307647705, + "learning_rate": 7.986663703045121e-05, + "loss": 1.4196, + "step": 27034 + }, + { + "epoch": 0.6007777777777777, + "grad_norm": 1.5329277515411377, + "learning_rate": 7.986219159813292e-05, + "loss": 1.5782, + "step": 27035 + }, + { + "epoch": 0.6008, + "grad_norm": 1.7740753889083862, + "learning_rate": 7.985774616581463e-05, + "loss": 2.0045, + "step": 27036 + }, + { + "epoch": 0.6008222222222223, + "grad_norm": 1.8819478750228882, + "learning_rate": 7.985330073349634e-05, + "loss": 2.2053, + "step": 27037 + }, + { + "epoch": 0.6008444444444444, + "grad_norm": 2.052328586578369, + "learning_rate": 7.984885530117804e-05, + "loss": 2.061, + "step": 27038 + }, + { + "epoch": 0.6008666666666667, + "grad_norm": 1.8363018035888672, + "learning_rate": 7.984440986885975e-05, + "loss": 1.8221, + "step": 27039 + }, + { + "epoch": 0.6008888888888889, + "grad_norm": 1.8349599838256836, + "learning_rate": 7.983996443654146e-05, + "loss": 1.7493, + "step": 27040 + }, + { + "epoch": 0.6009111111111111, + "grad_norm": 1.7365055084228516, + "learning_rate": 7.983551900422317e-05, + "loss": 1.6845, + "step": 27041 + }, + { + "epoch": 0.6009333333333333, + "grad_norm": 1.5604342222213745, + "learning_rate": 7.983107357190486e-05, + "loss": 1.9154, + "step": 27042 + }, + { + "epoch": 0.6009555555555556, + "grad_norm": 1.7715667486190796, + "learning_rate": 7.982662813958657e-05, + "loss": 1.9098, + "step": 27043 + }, + { + "epoch": 0.6009777777777778, + "grad_norm": 1.7074742317199707, + "learning_rate": 7.982218270726828e-05, + "loss": 1.4472, + "step": 27044 + }, + { + "epoch": 0.601, + "grad_norm": 1.6556566953659058, + "learning_rate": 7.981773727494999e-05, + "loss": 1.8303, + "step": 27045 + }, + { + "epoch": 0.6010222222222222, + "grad_norm": 1.5229953527450562, + "learning_rate": 7.98132918426317e-05, + "loss": 1.9378, + "step": 27046 + }, + { + "epoch": 0.6010444444444445, + "grad_norm": 1.802293300628662, + "learning_rate": 7.980884641031341e-05, + "loss": 1.7766, + "step": 27047 + }, + { + "epoch": 0.6010666666666666, + "grad_norm": 1.222768783569336, + "learning_rate": 7.980440097799511e-05, + "loss": 0.7822, + "step": 27048 + }, + { + "epoch": 0.6010888888888889, + "grad_norm": 1.9860477447509766, + "learning_rate": 7.979995554567682e-05, + "loss": 1.8711, + "step": 27049 + }, + { + "epoch": 0.6011111111111112, + "grad_norm": 1.6315582990646362, + "learning_rate": 7.979551011335853e-05, + "loss": 0.8217, + "step": 27050 + }, + { + "epoch": 0.6011333333333333, + "grad_norm": 1.3168281316757202, + "learning_rate": 7.979106468104024e-05, + "loss": 2.2222, + "step": 27051 + }, + { + "epoch": 0.6011555555555556, + "grad_norm": 1.5810824632644653, + "learning_rate": 7.978661924872193e-05, + "loss": 2.4733, + "step": 27052 + }, + { + "epoch": 0.6011777777777778, + "grad_norm": 1.3996162414550781, + "learning_rate": 7.978217381640366e-05, + "loss": 2.0982, + "step": 27053 + }, + { + "epoch": 0.6012, + "grad_norm": 0.9520947933197021, + "learning_rate": 7.977772838408537e-05, + "loss": 1.1722, + "step": 27054 + }, + { + "epoch": 0.6012222222222222, + "grad_norm": 1.687771201133728, + "learning_rate": 7.977328295176706e-05, + "loss": 2.2697, + "step": 27055 + }, + { + "epoch": 0.6012444444444445, + "grad_norm": 1.4503023624420166, + "learning_rate": 7.976883751944877e-05, + "loss": 2.1093, + "step": 27056 + }, + { + "epoch": 0.6012666666666666, + "grad_norm": 1.4794197082519531, + "learning_rate": 7.976439208713048e-05, + "loss": 1.9424, + "step": 27057 + }, + { + "epoch": 0.6012888888888889, + "grad_norm": 1.83533775806427, + "learning_rate": 7.975994665481218e-05, + "loss": 2.3112, + "step": 27058 + }, + { + "epoch": 0.6013111111111111, + "grad_norm": 1.5439540147781372, + "learning_rate": 7.975550122249389e-05, + "loss": 1.4641, + "step": 27059 + }, + { + "epoch": 0.6013333333333334, + "grad_norm": 1.8201035261154175, + "learning_rate": 7.97510557901756e-05, + "loss": 1.9752, + "step": 27060 + }, + { + "epoch": 0.6013555555555555, + "grad_norm": 1.8052176237106323, + "learning_rate": 7.97466103578573e-05, + "loss": 2.1018, + "step": 27061 + }, + { + "epoch": 0.6013777777777778, + "grad_norm": 1.7336097955703735, + "learning_rate": 7.974216492553902e-05, + "loss": 1.8685, + "step": 27062 + }, + { + "epoch": 0.6014, + "grad_norm": 1.6656413078308105, + "learning_rate": 7.973771949322072e-05, + "loss": 1.6962, + "step": 27063 + }, + { + "epoch": 0.6014222222222222, + "grad_norm": 1.5584288835525513, + "learning_rate": 7.973327406090243e-05, + "loss": 1.8788, + "step": 27064 + }, + { + "epoch": 0.6014444444444444, + "grad_norm": 1.5663453340530396, + "learning_rate": 7.972882862858413e-05, + "loss": 2.4941, + "step": 27065 + }, + { + "epoch": 0.6014666666666667, + "grad_norm": 1.6916759014129639, + "learning_rate": 7.972438319626584e-05, + "loss": 2.1018, + "step": 27066 + }, + { + "epoch": 0.6014888888888889, + "grad_norm": 1.6306703090667725, + "learning_rate": 7.971993776394755e-05, + "loss": 1.6678, + "step": 27067 + }, + { + "epoch": 0.6015111111111111, + "grad_norm": 1.8184791803359985, + "learning_rate": 7.971549233162926e-05, + "loss": 0.8785, + "step": 27068 + }, + { + "epoch": 0.6015333333333334, + "grad_norm": 1.5695523023605347, + "learning_rate": 7.971104689931096e-05, + "loss": 2.0016, + "step": 27069 + }, + { + "epoch": 0.6015555555555555, + "grad_norm": 1.766474962234497, + "learning_rate": 7.970660146699267e-05, + "loss": 2.144, + "step": 27070 + }, + { + "epoch": 0.6015777777777778, + "grad_norm": 1.989862322807312, + "learning_rate": 7.970215603467437e-05, + "loss": 2.3518, + "step": 27071 + }, + { + "epoch": 0.6016, + "grad_norm": 1.598899483680725, + "learning_rate": 7.969771060235608e-05, + "loss": 1.8039, + "step": 27072 + }, + { + "epoch": 0.6016222222222222, + "grad_norm": 1.6440620422363281, + "learning_rate": 7.96932651700378e-05, + "loss": 1.9172, + "step": 27073 + }, + { + "epoch": 0.6016444444444444, + "grad_norm": 1.8292713165283203, + "learning_rate": 7.96888197377195e-05, + "loss": 1.6827, + "step": 27074 + }, + { + "epoch": 0.6016666666666667, + "grad_norm": 1.7788729667663574, + "learning_rate": 7.96843743054012e-05, + "loss": 2.0841, + "step": 27075 + }, + { + "epoch": 0.6016888888888889, + "grad_norm": 1.5874347686767578, + "learning_rate": 7.967992887308291e-05, + "loss": 1.6457, + "step": 27076 + }, + { + "epoch": 0.6017111111111111, + "grad_norm": 1.4802864789962769, + "learning_rate": 7.967548344076462e-05, + "loss": 2.049, + "step": 27077 + }, + { + "epoch": 0.6017333333333333, + "grad_norm": 1.557401180267334, + "learning_rate": 7.967103800844633e-05, + "loss": 0.8245, + "step": 27078 + }, + { + "epoch": 0.6017555555555556, + "grad_norm": 1.8262344598770142, + "learning_rate": 7.966659257612802e-05, + "loss": 2.1202, + "step": 27079 + }, + { + "epoch": 0.6017777777777777, + "grad_norm": 1.4381197690963745, + "learning_rate": 7.966214714380973e-05, + "loss": 1.3392, + "step": 27080 + }, + { + "epoch": 0.6018, + "grad_norm": 1.579521894454956, + "learning_rate": 7.965770171149144e-05, + "loss": 1.6266, + "step": 27081 + }, + { + "epoch": 0.6018222222222223, + "grad_norm": 1.4592291116714478, + "learning_rate": 7.965325627917315e-05, + "loss": 1.678, + "step": 27082 + }, + { + "epoch": 0.6018444444444444, + "grad_norm": 1.5992571115493774, + "learning_rate": 7.964881084685486e-05, + "loss": 1.6441, + "step": 27083 + }, + { + "epoch": 0.6018666666666667, + "grad_norm": 1.9749088287353516, + "learning_rate": 7.964436541453657e-05, + "loss": 2.295, + "step": 27084 + }, + { + "epoch": 0.6018888888888889, + "grad_norm": 1.8254121541976929, + "learning_rate": 7.963991998221827e-05, + "loss": 2.1561, + "step": 27085 + }, + { + "epoch": 0.6019111111111111, + "grad_norm": 2.735177993774414, + "learning_rate": 7.963547454989998e-05, + "loss": 2.914, + "step": 27086 + }, + { + "epoch": 0.6019333333333333, + "grad_norm": 1.5838193893432617, + "learning_rate": 7.963102911758169e-05, + "loss": 1.8465, + "step": 27087 + }, + { + "epoch": 0.6019555555555556, + "grad_norm": 1.8101228475570679, + "learning_rate": 7.96265836852634e-05, + "loss": 1.9933, + "step": 27088 + }, + { + "epoch": 0.6019777777777777, + "grad_norm": 1.8943954706192017, + "learning_rate": 7.96221382529451e-05, + "loss": 1.4846, + "step": 27089 + }, + { + "epoch": 0.602, + "grad_norm": 1.3854761123657227, + "learning_rate": 7.961769282062682e-05, + "loss": 0.8323, + "step": 27090 + }, + { + "epoch": 0.6020222222222222, + "grad_norm": 1.6953483819961548, + "learning_rate": 7.961324738830853e-05, + "loss": 1.8218, + "step": 27091 + }, + { + "epoch": 0.6020444444444445, + "grad_norm": 1.7189913988113403, + "learning_rate": 7.960880195599022e-05, + "loss": 1.8765, + "step": 27092 + }, + { + "epoch": 0.6020666666666666, + "grad_norm": 1.7521131038665771, + "learning_rate": 7.960435652367193e-05, + "loss": 2.0528, + "step": 27093 + }, + { + "epoch": 0.6020888888888889, + "grad_norm": 1.5336365699768066, + "learning_rate": 7.959991109135364e-05, + "loss": 1.6545, + "step": 27094 + }, + { + "epoch": 0.6021111111111112, + "grad_norm": 1.950052261352539, + "learning_rate": 7.959546565903534e-05, + "loss": 2.2977, + "step": 27095 + }, + { + "epoch": 0.6021333333333333, + "grad_norm": 2.1121826171875, + "learning_rate": 7.959102022671705e-05, + "loss": 2.1558, + "step": 27096 + }, + { + "epoch": 0.6021555555555556, + "grad_norm": 1.7638791799545288, + "learning_rate": 7.958657479439876e-05, + "loss": 1.8762, + "step": 27097 + }, + { + "epoch": 0.6021777777777778, + "grad_norm": 1.2896921634674072, + "learning_rate": 7.958212936208047e-05, + "loss": 0.9417, + "step": 27098 + }, + { + "epoch": 0.6022, + "grad_norm": 1.7924249172210693, + "learning_rate": 7.957768392976218e-05, + "loss": 1.9258, + "step": 27099 + }, + { + "epoch": 0.6022222222222222, + "grad_norm": 1.7163163423538208, + "learning_rate": 7.957323849744389e-05, + "loss": 1.5863, + "step": 27100 + }, + { + "epoch": 0.6022444444444445, + "grad_norm": 1.3778804540634155, + "learning_rate": 7.95687930651256e-05, + "loss": 2.2595, + "step": 27101 + }, + { + "epoch": 0.6022666666666666, + "grad_norm": 1.197900652885437, + "learning_rate": 7.956434763280729e-05, + "loss": 1.1102, + "step": 27102 + }, + { + "epoch": 0.6022888888888889, + "grad_norm": 1.3239737749099731, + "learning_rate": 7.9559902200489e-05, + "loss": 2.09, + "step": 27103 + }, + { + "epoch": 0.6023111111111111, + "grad_norm": 1.4392409324645996, + "learning_rate": 7.955545676817071e-05, + "loss": 1.5999, + "step": 27104 + }, + { + "epoch": 0.6023333333333334, + "grad_norm": 1.487004041671753, + "learning_rate": 7.955101133585241e-05, + "loss": 2.2757, + "step": 27105 + }, + { + "epoch": 0.6023555555555555, + "grad_norm": 2.519434690475464, + "learning_rate": 7.954656590353412e-05, + "loss": 1.6574, + "step": 27106 + }, + { + "epoch": 0.6023777777777778, + "grad_norm": 1.5450921058654785, + "learning_rate": 7.954212047121583e-05, + "loss": 2.3597, + "step": 27107 + }, + { + "epoch": 0.6024, + "grad_norm": 1.5056664943695068, + "learning_rate": 7.953767503889754e-05, + "loss": 1.8347, + "step": 27108 + }, + { + "epoch": 0.6024222222222222, + "grad_norm": 1.5468626022338867, + "learning_rate": 7.953322960657925e-05, + "loss": 1.9519, + "step": 27109 + }, + { + "epoch": 0.6024444444444444, + "grad_norm": 1.6627495288848877, + "learning_rate": 7.952878417426096e-05, + "loss": 1.8685, + "step": 27110 + }, + { + "epoch": 0.6024666666666667, + "grad_norm": 1.7398810386657715, + "learning_rate": 7.952433874194266e-05, + "loss": 2.2111, + "step": 27111 + }, + { + "epoch": 0.6024888888888889, + "grad_norm": 1.4979416131973267, + "learning_rate": 7.951989330962436e-05, + "loss": 2.2772, + "step": 27112 + }, + { + "epoch": 0.6025111111111111, + "grad_norm": 1.7348592281341553, + "learning_rate": 7.951544787730607e-05, + "loss": 2.2954, + "step": 27113 + }, + { + "epoch": 0.6025333333333334, + "grad_norm": 1.4830193519592285, + "learning_rate": 7.951100244498778e-05, + "loss": 2.1515, + "step": 27114 + }, + { + "epoch": 0.6025555555555555, + "grad_norm": 1.6449952125549316, + "learning_rate": 7.950655701266949e-05, + "loss": 2.1656, + "step": 27115 + }, + { + "epoch": 0.6025777777777778, + "grad_norm": 1.628627061843872, + "learning_rate": 7.950211158035119e-05, + "loss": 1.8167, + "step": 27116 + }, + { + "epoch": 0.6026, + "grad_norm": 1.6549086570739746, + "learning_rate": 7.94976661480329e-05, + "loss": 2.0722, + "step": 27117 + }, + { + "epoch": 0.6026222222222222, + "grad_norm": 1.1302205324172974, + "learning_rate": 7.94932207157146e-05, + "loss": 1.0459, + "step": 27118 + }, + { + "epoch": 0.6026444444444444, + "grad_norm": 1.723571538925171, + "learning_rate": 7.948877528339631e-05, + "loss": 1.9982, + "step": 27119 + }, + { + "epoch": 0.6026666666666667, + "grad_norm": 1.8001817464828491, + "learning_rate": 7.948432985107802e-05, + "loss": 2.4451, + "step": 27120 + }, + { + "epoch": 0.6026888888888889, + "grad_norm": 1.7819477319717407, + "learning_rate": 7.947988441875973e-05, + "loss": 2.1226, + "step": 27121 + }, + { + "epoch": 0.6027111111111111, + "grad_norm": 1.7987935543060303, + "learning_rate": 7.947543898644143e-05, + "loss": 2.1471, + "step": 27122 + }, + { + "epoch": 0.6027333333333333, + "grad_norm": 1.8866928815841675, + "learning_rate": 7.947099355412314e-05, + "loss": 2.2982, + "step": 27123 + }, + { + "epoch": 0.6027555555555556, + "grad_norm": 2.157236337661743, + "learning_rate": 7.946654812180485e-05, + "loss": 2.5947, + "step": 27124 + }, + { + "epoch": 0.6027777777777777, + "grad_norm": 1.7541967630386353, + "learning_rate": 7.946210268948656e-05, + "loss": 2.1122, + "step": 27125 + }, + { + "epoch": 0.6028, + "grad_norm": 1.5409049987792969, + "learning_rate": 7.945765725716825e-05, + "loss": 2.2893, + "step": 27126 + }, + { + "epoch": 0.6028222222222223, + "grad_norm": 1.9576064348220825, + "learning_rate": 7.945321182484998e-05, + "loss": 1.741, + "step": 27127 + }, + { + "epoch": 0.6028444444444444, + "grad_norm": 1.5584945678710938, + "learning_rate": 7.944876639253169e-05, + "loss": 2.0153, + "step": 27128 + }, + { + "epoch": 0.6028666666666667, + "grad_norm": 2.0886170864105225, + "learning_rate": 7.944432096021338e-05, + "loss": 2.3039, + "step": 27129 + }, + { + "epoch": 0.6028888888888889, + "grad_norm": 1.5287795066833496, + "learning_rate": 7.94398755278951e-05, + "loss": 1.5705, + "step": 27130 + }, + { + "epoch": 0.6029111111111111, + "grad_norm": 1.9877333641052246, + "learning_rate": 7.94354300955768e-05, + "loss": 2.0677, + "step": 27131 + }, + { + "epoch": 0.6029333333333333, + "grad_norm": 1.8227648735046387, + "learning_rate": 7.94309846632585e-05, + "loss": 2.0896, + "step": 27132 + }, + { + "epoch": 0.6029555555555556, + "grad_norm": 1.9447449445724487, + "learning_rate": 7.942653923094021e-05, + "loss": 1.9537, + "step": 27133 + }, + { + "epoch": 0.6029777777777777, + "grad_norm": 0.20771636068820953, + "learning_rate": 7.942209379862192e-05, + "loss": 0.0279, + "step": 27134 + }, + { + "epoch": 0.603, + "grad_norm": 2.1986615657806396, + "learning_rate": 7.941764836630363e-05, + "loss": 2.0665, + "step": 27135 + }, + { + "epoch": 0.6030222222222222, + "grad_norm": 1.7483482360839844, + "learning_rate": 7.941320293398534e-05, + "loss": 1.8281, + "step": 27136 + }, + { + "epoch": 0.6030444444444445, + "grad_norm": 1.5220845937728882, + "learning_rate": 7.940875750166705e-05, + "loss": 1.44, + "step": 27137 + }, + { + "epoch": 0.6030666666666666, + "grad_norm": 1.4676389694213867, + "learning_rate": 7.940431206934876e-05, + "loss": 1.396, + "step": 27138 + }, + { + "epoch": 0.6030888888888889, + "grad_norm": 1.8141095638275146, + "learning_rate": 7.939986663703045e-05, + "loss": 1.8311, + "step": 27139 + }, + { + "epoch": 0.6031111111111112, + "grad_norm": 2.3061838150024414, + "learning_rate": 7.939542120471216e-05, + "loss": 2.5424, + "step": 27140 + }, + { + "epoch": 0.6031333333333333, + "grad_norm": 1.7188256978988647, + "learning_rate": 7.939097577239387e-05, + "loss": 2.0499, + "step": 27141 + }, + { + "epoch": 0.6031555555555556, + "grad_norm": 1.8286235332489014, + "learning_rate": 7.938653034007557e-05, + "loss": 1.9255, + "step": 27142 + }, + { + "epoch": 0.6031777777777778, + "grad_norm": 1.7270867824554443, + "learning_rate": 7.938208490775728e-05, + "loss": 1.5199, + "step": 27143 + }, + { + "epoch": 0.6032, + "grad_norm": 1.7218559980392456, + "learning_rate": 7.937763947543899e-05, + "loss": 1.9342, + "step": 27144 + }, + { + "epoch": 0.6032222222222222, + "grad_norm": 1.8925360441207886, + "learning_rate": 7.93731940431207e-05, + "loss": 1.7951, + "step": 27145 + }, + { + "epoch": 0.6032444444444445, + "grad_norm": 1.7069977521896362, + "learning_rate": 7.93687486108024e-05, + "loss": 1.7802, + "step": 27146 + }, + { + "epoch": 0.6032666666666666, + "grad_norm": 1.6345694065093994, + "learning_rate": 7.936430317848412e-05, + "loss": 1.7784, + "step": 27147 + }, + { + "epoch": 0.6032888888888889, + "grad_norm": 1.5925861597061157, + "learning_rate": 7.935985774616583e-05, + "loss": 1.5868, + "step": 27148 + }, + { + "epoch": 0.6033111111111111, + "grad_norm": 1.817430019378662, + "learning_rate": 7.935541231384752e-05, + "loss": 1.6037, + "step": 27149 + }, + { + "epoch": 0.6033333333333334, + "grad_norm": 1.9700731039047241, + "learning_rate": 7.935096688152923e-05, + "loss": 1.9079, + "step": 27150 + }, + { + "epoch": 0.6033555555555555, + "grad_norm": 1.0064431428909302, + "learning_rate": 7.934652144921094e-05, + "loss": 1.0072, + "step": 27151 + }, + { + "epoch": 0.6033777777777778, + "grad_norm": 1.370635986328125, + "learning_rate": 7.934207601689264e-05, + "loss": 1.9392, + "step": 27152 + }, + { + "epoch": 0.6034, + "grad_norm": 0.9711331129074097, + "learning_rate": 7.933763058457435e-05, + "loss": 1.1252, + "step": 27153 + }, + { + "epoch": 0.6034222222222222, + "grad_norm": 1.2324848175048828, + "learning_rate": 7.933318515225606e-05, + "loss": 1.3029, + "step": 27154 + }, + { + "epoch": 0.6034444444444444, + "grad_norm": 1.7871129512786865, + "learning_rate": 7.932873971993777e-05, + "loss": 2.4237, + "step": 27155 + }, + { + "epoch": 0.6034666666666667, + "grad_norm": 1.6308362483978271, + "learning_rate": 7.932429428761948e-05, + "loss": 2.4098, + "step": 27156 + }, + { + "epoch": 0.6034888888888889, + "grad_norm": 1.7439035177230835, + "learning_rate": 7.931984885530119e-05, + "loss": 2.3384, + "step": 27157 + }, + { + "epoch": 0.6035111111111111, + "grad_norm": 1.5378587245941162, + "learning_rate": 7.93154034229829e-05, + "loss": 2.3004, + "step": 27158 + }, + { + "epoch": 0.6035333333333334, + "grad_norm": 1.8610527515411377, + "learning_rate": 7.931095799066459e-05, + "loss": 2.3126, + "step": 27159 + }, + { + "epoch": 0.6035555555555555, + "grad_norm": 1.5449259281158447, + "learning_rate": 7.93065125583463e-05, + "loss": 2.3821, + "step": 27160 + }, + { + "epoch": 0.6035777777777778, + "grad_norm": 1.38969886302948, + "learning_rate": 7.930206712602801e-05, + "loss": 1.047, + "step": 27161 + }, + { + "epoch": 0.6036, + "grad_norm": 1.143798828125, + "learning_rate": 7.92976216937097e-05, + "loss": 0.9785, + "step": 27162 + }, + { + "epoch": 0.6036222222222222, + "grad_norm": 1.6547763347625732, + "learning_rate": 7.929317626139142e-05, + "loss": 1.6648, + "step": 27163 + }, + { + "epoch": 0.6036444444444444, + "grad_norm": 1.617087960243225, + "learning_rate": 7.928873082907314e-05, + "loss": 1.9899, + "step": 27164 + }, + { + "epoch": 0.6036666666666667, + "grad_norm": 1.8198072910308838, + "learning_rate": 7.928428539675485e-05, + "loss": 1.7478, + "step": 27165 + }, + { + "epoch": 0.6036888888888889, + "grad_norm": 1.6863237619400024, + "learning_rate": 7.927983996443654e-05, + "loss": 1.818, + "step": 27166 + }, + { + "epoch": 0.6037111111111111, + "grad_norm": 1.8025907278060913, + "learning_rate": 7.927539453211825e-05, + "loss": 2.3106, + "step": 27167 + }, + { + "epoch": 0.6037333333333333, + "grad_norm": 1.6756824254989624, + "learning_rate": 7.927094909979996e-05, + "loss": 1.7787, + "step": 27168 + }, + { + "epoch": 0.6037555555555556, + "grad_norm": 1.456647276878357, + "learning_rate": 7.926650366748166e-05, + "loss": 2.0248, + "step": 27169 + }, + { + "epoch": 0.6037777777777777, + "grad_norm": 1.53804612159729, + "learning_rate": 7.926205823516337e-05, + "loss": 1.9316, + "step": 27170 + }, + { + "epoch": 0.6038, + "grad_norm": 1.5869379043579102, + "learning_rate": 7.925761280284508e-05, + "loss": 1.799, + "step": 27171 + }, + { + "epoch": 0.6038222222222223, + "grad_norm": 1.6954032182693481, + "learning_rate": 7.925316737052679e-05, + "loss": 2.29, + "step": 27172 + }, + { + "epoch": 0.6038444444444444, + "grad_norm": 1.6798923015594482, + "learning_rate": 7.92487219382085e-05, + "loss": 1.4173, + "step": 27173 + }, + { + "epoch": 0.6038666666666667, + "grad_norm": 1.5482616424560547, + "learning_rate": 7.924427650589021e-05, + "loss": 2.2306, + "step": 27174 + }, + { + "epoch": 0.6038888888888889, + "grad_norm": 1.8107613325119019, + "learning_rate": 7.923983107357192e-05, + "loss": 1.8407, + "step": 27175 + }, + { + "epoch": 0.6039111111111111, + "grad_norm": 1.5991325378417969, + "learning_rate": 7.923538564125361e-05, + "loss": 1.8254, + "step": 27176 + }, + { + "epoch": 0.6039333333333333, + "grad_norm": 1.5964834690093994, + "learning_rate": 7.923094020893532e-05, + "loss": 1.9868, + "step": 27177 + }, + { + "epoch": 0.6039555555555556, + "grad_norm": 1.3201031684875488, + "learning_rate": 7.922649477661703e-05, + "loss": 1.4255, + "step": 27178 + }, + { + "epoch": 0.6039777777777777, + "grad_norm": 1.61896550655365, + "learning_rate": 7.922204934429873e-05, + "loss": 1.9333, + "step": 27179 + }, + { + "epoch": 0.604, + "grad_norm": 1.6375930309295654, + "learning_rate": 7.921760391198044e-05, + "loss": 1.5589, + "step": 27180 + }, + { + "epoch": 0.6040222222222222, + "grad_norm": 1.830711841583252, + "learning_rate": 7.921315847966215e-05, + "loss": 1.9801, + "step": 27181 + }, + { + "epoch": 0.6040444444444445, + "grad_norm": 1.8794705867767334, + "learning_rate": 7.920871304734386e-05, + "loss": 2.5178, + "step": 27182 + }, + { + "epoch": 0.6040666666666666, + "grad_norm": 2.103665590286255, + "learning_rate": 7.920426761502557e-05, + "loss": 2.5723, + "step": 27183 + }, + { + "epoch": 0.6040888888888889, + "grad_norm": 1.7538213729858398, + "learning_rate": 7.919982218270728e-05, + "loss": 1.7259, + "step": 27184 + }, + { + "epoch": 0.6041111111111112, + "grad_norm": 1.6424554586410522, + "learning_rate": 7.919537675038899e-05, + "loss": 1.8007, + "step": 27185 + }, + { + "epoch": 0.6041333333333333, + "grad_norm": 1.4428913593292236, + "learning_rate": 7.919093131807068e-05, + "loss": 1.6839, + "step": 27186 + }, + { + "epoch": 0.6041555555555556, + "grad_norm": 0.2574564516544342, + "learning_rate": 7.918648588575239e-05, + "loss": 0.0344, + "step": 27187 + }, + { + "epoch": 0.6041777777777778, + "grad_norm": 1.7294193506240845, + "learning_rate": 7.91820404534341e-05, + "loss": 1.8667, + "step": 27188 + }, + { + "epoch": 0.6042, + "grad_norm": 1.966111660003662, + "learning_rate": 7.91775950211158e-05, + "loss": 1.6948, + "step": 27189 + }, + { + "epoch": 0.6042222222222222, + "grad_norm": 1.6431033611297607, + "learning_rate": 7.917314958879751e-05, + "loss": 1.8165, + "step": 27190 + }, + { + "epoch": 0.6042444444444445, + "grad_norm": 1.6426076889038086, + "learning_rate": 7.916870415647922e-05, + "loss": 1.5127, + "step": 27191 + }, + { + "epoch": 0.6042666666666666, + "grad_norm": 1.80479896068573, + "learning_rate": 7.916425872416093e-05, + "loss": 1.668, + "step": 27192 + }, + { + "epoch": 0.6042888888888889, + "grad_norm": 1.8984283208847046, + "learning_rate": 7.915981329184264e-05, + "loss": 1.628, + "step": 27193 + }, + { + "epoch": 0.6043111111111111, + "grad_norm": 1.841007947921753, + "learning_rate": 7.915536785952435e-05, + "loss": 1.7181, + "step": 27194 + }, + { + "epoch": 0.6043333333333333, + "grad_norm": 1.8353080749511719, + "learning_rate": 7.915092242720606e-05, + "loss": 1.7911, + "step": 27195 + }, + { + "epoch": 0.6043555555555555, + "grad_norm": 2.1403040885925293, + "learning_rate": 7.914647699488775e-05, + "loss": 2.1268, + "step": 27196 + }, + { + "epoch": 0.6043777777777778, + "grad_norm": 1.801777958869934, + "learning_rate": 7.914203156256946e-05, + "loss": 1.7257, + "step": 27197 + }, + { + "epoch": 0.6044, + "grad_norm": 1.9252780675888062, + "learning_rate": 7.913758613025117e-05, + "loss": 1.7415, + "step": 27198 + }, + { + "epoch": 0.6044222222222222, + "grad_norm": 2.0512959957122803, + "learning_rate": 7.913314069793287e-05, + "loss": 1.5827, + "step": 27199 + }, + { + "epoch": 0.6044444444444445, + "grad_norm": 2.0335724353790283, + "learning_rate": 7.912869526561458e-05, + "loss": 1.6761, + "step": 27200 + }, + { + "epoch": 0.6044666666666667, + "grad_norm": 1.3120962381362915, + "learning_rate": 7.91242498332963e-05, + "loss": 2.4573, + "step": 27201 + }, + { + "epoch": 0.6044888888888889, + "grad_norm": 1.1846799850463867, + "learning_rate": 7.911980440097801e-05, + "loss": 1.0285, + "step": 27202 + }, + { + "epoch": 0.6045111111111111, + "grad_norm": 1.609234094619751, + "learning_rate": 7.91153589686597e-05, + "loss": 2.4472, + "step": 27203 + }, + { + "epoch": 0.6045333333333334, + "grad_norm": 1.491317868232727, + "learning_rate": 7.911091353634142e-05, + "loss": 2.3505, + "step": 27204 + }, + { + "epoch": 0.6045555555555555, + "grad_norm": 1.569449782371521, + "learning_rate": 7.910646810402312e-05, + "loss": 2.0256, + "step": 27205 + }, + { + "epoch": 0.6045777777777778, + "grad_norm": 1.4014590978622437, + "learning_rate": 7.910202267170482e-05, + "loss": 2.0003, + "step": 27206 + }, + { + "epoch": 0.6046, + "grad_norm": 1.7583537101745605, + "learning_rate": 7.909757723938653e-05, + "loss": 2.4809, + "step": 27207 + }, + { + "epoch": 0.6046222222222222, + "grad_norm": 1.4294236898422241, + "learning_rate": 7.909313180706824e-05, + "loss": 2.1107, + "step": 27208 + }, + { + "epoch": 0.6046444444444444, + "grad_norm": 1.4733035564422607, + "learning_rate": 7.908868637474994e-05, + "loss": 1.9086, + "step": 27209 + }, + { + "epoch": 0.6046666666666667, + "grad_norm": 2.2566213607788086, + "learning_rate": 7.908424094243166e-05, + "loss": 2.483, + "step": 27210 + }, + { + "epoch": 0.6046888888888889, + "grad_norm": 1.5477021932601929, + "learning_rate": 7.907979551011337e-05, + "loss": 1.8564, + "step": 27211 + }, + { + "epoch": 0.6047111111111111, + "grad_norm": 1.6028445959091187, + "learning_rate": 7.907535007779508e-05, + "loss": 2.3341, + "step": 27212 + }, + { + "epoch": 0.6047333333333333, + "grad_norm": 1.5999501943588257, + "learning_rate": 7.907090464547677e-05, + "loss": 2.1941, + "step": 27213 + }, + { + "epoch": 0.6047555555555556, + "grad_norm": 1.3347578048706055, + "learning_rate": 7.906645921315848e-05, + "loss": 1.9165, + "step": 27214 + }, + { + "epoch": 0.6047777777777777, + "grad_norm": 1.7065314054489136, + "learning_rate": 7.90620137808402e-05, + "loss": 1.665, + "step": 27215 + }, + { + "epoch": 0.6048, + "grad_norm": 1.4490126371383667, + "learning_rate": 7.905756834852189e-05, + "loss": 1.9074, + "step": 27216 + }, + { + "epoch": 0.6048222222222223, + "grad_norm": 1.6703357696533203, + "learning_rate": 7.90531229162036e-05, + "loss": 2.252, + "step": 27217 + }, + { + "epoch": 0.6048444444444444, + "grad_norm": 1.8422366380691528, + "learning_rate": 7.904867748388531e-05, + "loss": 2.2362, + "step": 27218 + }, + { + "epoch": 0.6048666666666667, + "grad_norm": 1.5340640544891357, + "learning_rate": 7.904423205156702e-05, + "loss": 1.6657, + "step": 27219 + }, + { + "epoch": 0.6048888888888889, + "grad_norm": 1.7700345516204834, + "learning_rate": 7.903978661924873e-05, + "loss": 2.0036, + "step": 27220 + }, + { + "epoch": 0.6049111111111111, + "grad_norm": 1.6051772832870483, + "learning_rate": 7.903534118693044e-05, + "loss": 1.916, + "step": 27221 + }, + { + "epoch": 0.6049333333333333, + "grad_norm": 1.0932059288024902, + "learning_rate": 7.903089575461215e-05, + "loss": 0.9623, + "step": 27222 + }, + { + "epoch": 0.6049555555555556, + "grad_norm": 1.5146722793579102, + "learning_rate": 7.902645032229384e-05, + "loss": 1.7762, + "step": 27223 + }, + { + "epoch": 0.6049777777777777, + "grad_norm": 1.7188857793807983, + "learning_rate": 7.902200488997555e-05, + "loss": 2.0552, + "step": 27224 + }, + { + "epoch": 0.605, + "grad_norm": 1.9077479839324951, + "learning_rate": 7.901755945765726e-05, + "loss": 2.2053, + "step": 27225 + }, + { + "epoch": 0.6050222222222222, + "grad_norm": 1.4720269441604614, + "learning_rate": 7.901311402533896e-05, + "loss": 1.8231, + "step": 27226 + }, + { + "epoch": 0.6050444444444445, + "grad_norm": 2.066772222518921, + "learning_rate": 7.900866859302067e-05, + "loss": 1.6994, + "step": 27227 + }, + { + "epoch": 0.6050666666666666, + "grad_norm": 1.4600918292999268, + "learning_rate": 7.900422316070238e-05, + "loss": 1.5678, + "step": 27228 + }, + { + "epoch": 0.6050888888888889, + "grad_norm": 1.7550400495529175, + "learning_rate": 7.899977772838409e-05, + "loss": 1.6418, + "step": 27229 + }, + { + "epoch": 0.6051111111111112, + "grad_norm": 1.729282021522522, + "learning_rate": 7.89953322960658e-05, + "loss": 2.0124, + "step": 27230 + }, + { + "epoch": 0.6051333333333333, + "grad_norm": 1.740505576133728, + "learning_rate": 7.899088686374751e-05, + "loss": 1.8965, + "step": 27231 + }, + { + "epoch": 0.6051555555555556, + "grad_norm": 1.786176085472107, + "learning_rate": 7.898644143142922e-05, + "loss": 2.0738, + "step": 27232 + }, + { + "epoch": 0.6051777777777778, + "grad_norm": 1.7551746368408203, + "learning_rate": 7.898199599911091e-05, + "loss": 2.156, + "step": 27233 + }, + { + "epoch": 0.6052, + "grad_norm": 1.7817493677139282, + "learning_rate": 7.897755056679262e-05, + "loss": 2.104, + "step": 27234 + }, + { + "epoch": 0.6052222222222222, + "grad_norm": 1.8015249967575073, + "learning_rate": 7.897310513447433e-05, + "loss": 1.7312, + "step": 27235 + }, + { + "epoch": 0.6052444444444445, + "grad_norm": 1.0835497379302979, + "learning_rate": 7.896865970215603e-05, + "loss": 0.8679, + "step": 27236 + }, + { + "epoch": 0.6052666666666666, + "grad_norm": 1.620829701423645, + "learning_rate": 7.896421426983774e-05, + "loss": 1.9263, + "step": 27237 + }, + { + "epoch": 0.6052888888888889, + "grad_norm": 1.481078863143921, + "learning_rate": 7.895976883751946e-05, + "loss": 1.5824, + "step": 27238 + }, + { + "epoch": 0.6053111111111111, + "grad_norm": 2.2054390907287598, + "learning_rate": 7.895532340520117e-05, + "loss": 2.0811, + "step": 27239 + }, + { + "epoch": 0.6053333333333333, + "grad_norm": 1.6682418584823608, + "learning_rate": 7.895087797288287e-05, + "loss": 1.8911, + "step": 27240 + }, + { + "epoch": 0.6053555555555555, + "grad_norm": 1.5624669790267944, + "learning_rate": 7.894643254056458e-05, + "loss": 1.5574, + "step": 27241 + }, + { + "epoch": 0.6053777777777778, + "grad_norm": 1.5307351350784302, + "learning_rate": 7.894198710824629e-05, + "loss": 1.6266, + "step": 27242 + }, + { + "epoch": 0.6054, + "grad_norm": 1.8009110689163208, + "learning_rate": 7.893754167592798e-05, + "loss": 1.8157, + "step": 27243 + }, + { + "epoch": 0.6054222222222222, + "grad_norm": 2.319335460662842, + "learning_rate": 7.893309624360969e-05, + "loss": 1.9652, + "step": 27244 + }, + { + "epoch": 0.6054444444444445, + "grad_norm": 1.7203480005264282, + "learning_rate": 7.89286508112914e-05, + "loss": 1.9449, + "step": 27245 + }, + { + "epoch": 0.6054666666666667, + "grad_norm": 1.9069678783416748, + "learning_rate": 7.89242053789731e-05, + "loss": 1.9355, + "step": 27246 + }, + { + "epoch": 0.6054888888888889, + "grad_norm": 2.3656368255615234, + "learning_rate": 7.891975994665482e-05, + "loss": 2.183, + "step": 27247 + }, + { + "epoch": 0.6055111111111111, + "grad_norm": 1.825256109237671, + "learning_rate": 7.891531451433653e-05, + "loss": 1.6806, + "step": 27248 + }, + { + "epoch": 0.6055333333333334, + "grad_norm": 1.5662660598754883, + "learning_rate": 7.891086908201824e-05, + "loss": 1.3733, + "step": 27249 + }, + { + "epoch": 0.6055555555555555, + "grad_norm": 1.6754958629608154, + "learning_rate": 7.890642364969994e-05, + "loss": 1.415, + "step": 27250 + }, + { + "epoch": 0.6055777777777778, + "grad_norm": 1.008105754852295, + "learning_rate": 7.890197821738165e-05, + "loss": 1.1714, + "step": 27251 + }, + { + "epoch": 0.6056, + "grad_norm": 1.5311484336853027, + "learning_rate": 7.889753278506336e-05, + "loss": 2.2801, + "step": 27252 + }, + { + "epoch": 0.6056222222222222, + "grad_norm": 1.440260648727417, + "learning_rate": 7.889308735274505e-05, + "loss": 1.8973, + "step": 27253 + }, + { + "epoch": 0.6056444444444444, + "grad_norm": 1.7567195892333984, + "learning_rate": 7.888864192042676e-05, + "loss": 2.3299, + "step": 27254 + }, + { + "epoch": 0.6056666666666667, + "grad_norm": 1.4583789110183716, + "learning_rate": 7.888419648810847e-05, + "loss": 2.1668, + "step": 27255 + }, + { + "epoch": 0.6056888888888889, + "grad_norm": 1.5148966312408447, + "learning_rate": 7.887975105579018e-05, + "loss": 2.1548, + "step": 27256 + }, + { + "epoch": 0.6057111111111111, + "grad_norm": 1.9175471067428589, + "learning_rate": 7.887530562347189e-05, + "loss": 2.3519, + "step": 27257 + }, + { + "epoch": 0.6057333333333333, + "grad_norm": 1.5939949750900269, + "learning_rate": 7.88708601911536e-05, + "loss": 2.2127, + "step": 27258 + }, + { + "epoch": 0.6057555555555556, + "grad_norm": 0.9889760613441467, + "learning_rate": 7.886641475883531e-05, + "loss": 1.006, + "step": 27259 + }, + { + "epoch": 0.6057777777777777, + "grad_norm": 1.8088592290878296, + "learning_rate": 7.8861969326517e-05, + "loss": 1.9756, + "step": 27260 + }, + { + "epoch": 0.6058, + "grad_norm": 1.4391329288482666, + "learning_rate": 7.885752389419871e-05, + "loss": 1.9602, + "step": 27261 + }, + { + "epoch": 0.6058222222222223, + "grad_norm": 1.4422990083694458, + "learning_rate": 7.885307846188042e-05, + "loss": 1.9709, + "step": 27262 + }, + { + "epoch": 0.6058444444444444, + "grad_norm": 1.6840225458145142, + "learning_rate": 7.884863302956212e-05, + "loss": 2.2694, + "step": 27263 + }, + { + "epoch": 0.6058666666666667, + "grad_norm": 1.5895969867706299, + "learning_rate": 7.884418759724383e-05, + "loss": 1.5241, + "step": 27264 + }, + { + "epoch": 0.6058888888888889, + "grad_norm": 1.3911423683166504, + "learning_rate": 7.883974216492554e-05, + "loss": 1.8146, + "step": 27265 + }, + { + "epoch": 0.6059111111111111, + "grad_norm": 1.4521762132644653, + "learning_rate": 7.883529673260725e-05, + "loss": 1.9944, + "step": 27266 + }, + { + "epoch": 0.6059333333333333, + "grad_norm": 1.5626418590545654, + "learning_rate": 7.883085130028896e-05, + "loss": 1.9758, + "step": 27267 + }, + { + "epoch": 0.6059555555555556, + "grad_norm": 1.6539586782455444, + "learning_rate": 7.882640586797067e-05, + "loss": 1.8796, + "step": 27268 + }, + { + "epoch": 0.6059777777777777, + "grad_norm": 1.6141746044158936, + "learning_rate": 7.882196043565238e-05, + "loss": 1.9365, + "step": 27269 + }, + { + "epoch": 0.606, + "grad_norm": 1.7146342992782593, + "learning_rate": 7.881751500333407e-05, + "loss": 1.8579, + "step": 27270 + }, + { + "epoch": 0.6060222222222222, + "grad_norm": 1.6857411861419678, + "learning_rate": 7.881306957101578e-05, + "loss": 1.7205, + "step": 27271 + }, + { + "epoch": 0.6060444444444445, + "grad_norm": 1.569933295249939, + "learning_rate": 7.88086241386975e-05, + "loss": 1.335, + "step": 27272 + }, + { + "epoch": 0.6060666666666666, + "grad_norm": 1.507858395576477, + "learning_rate": 7.880417870637919e-05, + "loss": 1.7434, + "step": 27273 + }, + { + "epoch": 0.6060888888888889, + "grad_norm": 1.8040244579315186, + "learning_rate": 7.87997332740609e-05, + "loss": 2.213, + "step": 27274 + }, + { + "epoch": 0.6061111111111112, + "grad_norm": 1.3667738437652588, + "learning_rate": 7.879528784174262e-05, + "loss": 1.572, + "step": 27275 + }, + { + "epoch": 0.6061333333333333, + "grad_norm": 1.5162640810012817, + "learning_rate": 7.879084240942433e-05, + "loss": 1.8487, + "step": 27276 + }, + { + "epoch": 0.6061555555555556, + "grad_norm": 1.6777446269989014, + "learning_rate": 7.878639697710603e-05, + "loss": 1.6855, + "step": 27277 + }, + { + "epoch": 0.6061777777777778, + "grad_norm": 1.7804574966430664, + "learning_rate": 7.878195154478774e-05, + "loss": 2.2795, + "step": 27278 + }, + { + "epoch": 0.6062, + "grad_norm": 1.5175312757492065, + "learning_rate": 7.877750611246945e-05, + "loss": 0.8556, + "step": 27279 + }, + { + "epoch": 0.6062222222222222, + "grad_norm": 2.4243533611297607, + "learning_rate": 7.877306068015114e-05, + "loss": 2.2855, + "step": 27280 + }, + { + "epoch": 0.6062444444444445, + "grad_norm": 1.8361667394638062, + "learning_rate": 7.876861524783285e-05, + "loss": 1.5862, + "step": 27281 + }, + { + "epoch": 0.6062666666666666, + "grad_norm": 1.846785068511963, + "learning_rate": 7.876416981551456e-05, + "loss": 1.634, + "step": 27282 + }, + { + "epoch": 0.6062888888888889, + "grad_norm": 1.6902236938476562, + "learning_rate": 7.875972438319626e-05, + "loss": 1.8415, + "step": 27283 + }, + { + "epoch": 0.6063111111111111, + "grad_norm": 1.7941762208938599, + "learning_rate": 7.875527895087798e-05, + "loss": 2.0218, + "step": 27284 + }, + { + "epoch": 0.6063333333333333, + "grad_norm": 2.0850422382354736, + "learning_rate": 7.875083351855969e-05, + "loss": 2.2714, + "step": 27285 + }, + { + "epoch": 0.6063555555555555, + "grad_norm": 1.5774202346801758, + "learning_rate": 7.87463880862414e-05, + "loss": 1.7006, + "step": 27286 + }, + { + "epoch": 0.6063777777777778, + "grad_norm": 1.839981198310852, + "learning_rate": 7.87419426539231e-05, + "loss": 1.9587, + "step": 27287 + }, + { + "epoch": 0.6064, + "grad_norm": 2.1352789402008057, + "learning_rate": 7.87374972216048e-05, + "loss": 1.5797, + "step": 27288 + }, + { + "epoch": 0.6064222222222222, + "grad_norm": 1.870865821838379, + "learning_rate": 7.873305178928652e-05, + "loss": 2.1551, + "step": 27289 + }, + { + "epoch": 0.6064444444444445, + "grad_norm": 1.6629317998886108, + "learning_rate": 7.872860635696821e-05, + "loss": 1.7822, + "step": 27290 + }, + { + "epoch": 0.6064666666666667, + "grad_norm": 1.589384913444519, + "learning_rate": 7.872416092464992e-05, + "loss": 1.4413, + "step": 27291 + }, + { + "epoch": 0.6064888888888889, + "grad_norm": 2.1463875770568848, + "learning_rate": 7.871971549233163e-05, + "loss": 1.874, + "step": 27292 + }, + { + "epoch": 0.6065111111111111, + "grad_norm": 1.9512488842010498, + "learning_rate": 7.871527006001334e-05, + "loss": 1.79, + "step": 27293 + }, + { + "epoch": 0.6065333333333334, + "grad_norm": 1.5353994369506836, + "learning_rate": 7.871082462769505e-05, + "loss": 1.5228, + "step": 27294 + }, + { + "epoch": 0.6065555555555555, + "grad_norm": 1.8420060873031616, + "learning_rate": 7.870637919537676e-05, + "loss": 2.0819, + "step": 27295 + }, + { + "epoch": 0.6065777777777778, + "grad_norm": 1.686511754989624, + "learning_rate": 7.870193376305847e-05, + "loss": 1.87, + "step": 27296 + }, + { + "epoch": 0.6066, + "grad_norm": 1.0739492177963257, + "learning_rate": 7.869748833074017e-05, + "loss": 0.8274, + "step": 27297 + }, + { + "epoch": 0.6066222222222222, + "grad_norm": 1.7083632946014404, + "learning_rate": 7.869304289842188e-05, + "loss": 1.4793, + "step": 27298 + }, + { + "epoch": 0.6066444444444444, + "grad_norm": 1.871471643447876, + "learning_rate": 7.868859746610359e-05, + "loss": 1.7419, + "step": 27299 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 2.1125330924987793, + "learning_rate": 7.868415203378528e-05, + "loss": 2.0606, + "step": 27300 + }, + { + "epoch": 0.6066888888888889, + "grad_norm": 1.3407315015792847, + "learning_rate": 7.867970660146699e-05, + "loss": 2.1311, + "step": 27301 + }, + { + "epoch": 0.6067111111111111, + "grad_norm": 1.8985822200775146, + "learning_rate": 7.86752611691487e-05, + "loss": 2.2525, + "step": 27302 + }, + { + "epoch": 0.6067333333333333, + "grad_norm": 1.5596638917922974, + "learning_rate": 7.867081573683041e-05, + "loss": 2.4506, + "step": 27303 + }, + { + "epoch": 0.6067555555555556, + "grad_norm": 2.5087380409240723, + "learning_rate": 7.866637030451212e-05, + "loss": 2.2414, + "step": 27304 + }, + { + "epoch": 0.6067777777777777, + "grad_norm": 1.667316198348999, + "learning_rate": 7.866192487219383e-05, + "loss": 2.4115, + "step": 27305 + }, + { + "epoch": 0.6068, + "grad_norm": 1.6813075542449951, + "learning_rate": 7.865747943987554e-05, + "loss": 2.1599, + "step": 27306 + }, + { + "epoch": 0.6068222222222223, + "grad_norm": 1.5888385772705078, + "learning_rate": 7.865303400755724e-05, + "loss": 2.2666, + "step": 27307 + }, + { + "epoch": 0.6068444444444444, + "grad_norm": 1.5145535469055176, + "learning_rate": 7.864858857523894e-05, + "loss": 2.2976, + "step": 27308 + }, + { + "epoch": 0.6068666666666667, + "grad_norm": 1.3751749992370605, + "learning_rate": 7.864414314292065e-05, + "loss": 1.326, + "step": 27309 + }, + { + "epoch": 0.6068888888888889, + "grad_norm": 1.3869388103485107, + "learning_rate": 7.863969771060235e-05, + "loss": 1.7573, + "step": 27310 + }, + { + "epoch": 0.6069111111111111, + "grad_norm": 1.439608097076416, + "learning_rate": 7.863525227828406e-05, + "loss": 1.6373, + "step": 27311 + }, + { + "epoch": 0.6069333333333333, + "grad_norm": 1.6022894382476807, + "learning_rate": 7.863080684596578e-05, + "loss": 1.9089, + "step": 27312 + }, + { + "epoch": 0.6069555555555556, + "grad_norm": 1.7709941864013672, + "learning_rate": 7.862636141364748e-05, + "loss": 2.3322, + "step": 27313 + }, + { + "epoch": 0.6069777777777777, + "grad_norm": 1.721430778503418, + "learning_rate": 7.862191598132919e-05, + "loss": 2.0007, + "step": 27314 + }, + { + "epoch": 0.607, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.86174705490109e-05, + "loss": 2.5062, + "step": 27315 + }, + { + "epoch": 0.6070222222222222, + "grad_norm": 1.1028038263320923, + "learning_rate": 7.861302511669261e-05, + "loss": 0.6188, + "step": 27316 + }, + { + "epoch": 0.6070444444444445, + "grad_norm": 1.4227097034454346, + "learning_rate": 7.86085796843743e-05, + "loss": 1.8313, + "step": 27317 + }, + { + "epoch": 0.6070666666666666, + "grad_norm": 1.8850692510604858, + "learning_rate": 7.860413425205601e-05, + "loss": 2.3957, + "step": 27318 + }, + { + "epoch": 0.6070888888888889, + "grad_norm": 1.5272574424743652, + "learning_rate": 7.859968881973772e-05, + "loss": 1.9499, + "step": 27319 + }, + { + "epoch": 0.6071111111111112, + "grad_norm": 1.9343817234039307, + "learning_rate": 7.859524338741942e-05, + "loss": 2.4507, + "step": 27320 + }, + { + "epoch": 0.6071333333333333, + "grad_norm": 2.05100679397583, + "learning_rate": 7.859079795510114e-05, + "loss": 2.0323, + "step": 27321 + }, + { + "epoch": 0.6071555555555556, + "grad_norm": 1.5332660675048828, + "learning_rate": 7.858635252278285e-05, + "loss": 1.4606, + "step": 27322 + }, + { + "epoch": 0.6071777777777778, + "grad_norm": 1.5789637565612793, + "learning_rate": 7.858190709046455e-05, + "loss": 2.0889, + "step": 27323 + }, + { + "epoch": 0.6072, + "grad_norm": 1.5830676555633545, + "learning_rate": 7.857746165814626e-05, + "loss": 1.7474, + "step": 27324 + }, + { + "epoch": 0.6072222222222222, + "grad_norm": 1.554005742073059, + "learning_rate": 7.857301622582797e-05, + "loss": 1.7042, + "step": 27325 + }, + { + "epoch": 0.6072444444444445, + "grad_norm": 1.6211838722229004, + "learning_rate": 7.856857079350968e-05, + "loss": 2.1875, + "step": 27326 + }, + { + "epoch": 0.6072666666666666, + "grad_norm": 1.5243436098098755, + "learning_rate": 7.856412536119137e-05, + "loss": 1.3351, + "step": 27327 + }, + { + "epoch": 0.6072888888888889, + "grad_norm": 1.7410411834716797, + "learning_rate": 7.855967992887308e-05, + "loss": 2.1272, + "step": 27328 + }, + { + "epoch": 0.6073111111111111, + "grad_norm": 1.3875596523284912, + "learning_rate": 7.855523449655479e-05, + "loss": 1.3673, + "step": 27329 + }, + { + "epoch": 0.6073333333333333, + "grad_norm": 1.7309584617614746, + "learning_rate": 7.85507890642365e-05, + "loss": 1.8283, + "step": 27330 + }, + { + "epoch": 0.6073555555555555, + "grad_norm": 1.6569769382476807, + "learning_rate": 7.854634363191821e-05, + "loss": 1.8252, + "step": 27331 + }, + { + "epoch": 0.6073777777777778, + "grad_norm": 1.6704628467559814, + "learning_rate": 7.854189819959992e-05, + "loss": 1.3384, + "step": 27332 + }, + { + "epoch": 0.6074, + "grad_norm": 2.1140072345733643, + "learning_rate": 7.853745276728163e-05, + "loss": 2.4161, + "step": 27333 + }, + { + "epoch": 0.6074222222222222, + "grad_norm": 1.9944144487380981, + "learning_rate": 7.853300733496333e-05, + "loss": 1.5231, + "step": 27334 + }, + { + "epoch": 0.6074444444444445, + "grad_norm": 0.5336458086967468, + "learning_rate": 7.852856190264504e-05, + "loss": 0.0307, + "step": 27335 + }, + { + "epoch": 0.6074666666666667, + "grad_norm": 1.9870555400848389, + "learning_rate": 7.852411647032675e-05, + "loss": 2.0026, + "step": 27336 + }, + { + "epoch": 0.6074888888888889, + "grad_norm": 1.7697476148605347, + "learning_rate": 7.851967103800844e-05, + "loss": 1.9533, + "step": 27337 + }, + { + "epoch": 0.6075111111111111, + "grad_norm": 1.5129780769348145, + "learning_rate": 7.851522560569015e-05, + "loss": 1.7916, + "step": 27338 + }, + { + "epoch": 0.6075333333333334, + "grad_norm": 1.7161614894866943, + "learning_rate": 7.851078017337186e-05, + "loss": 1.9484, + "step": 27339 + }, + { + "epoch": 0.6075555555555555, + "grad_norm": 2.0948965549468994, + "learning_rate": 7.850633474105357e-05, + "loss": 1.9074, + "step": 27340 + }, + { + "epoch": 0.6075777777777778, + "grad_norm": 1.6959728002548218, + "learning_rate": 7.850188930873528e-05, + "loss": 1.0733, + "step": 27341 + }, + { + "epoch": 0.6076, + "grad_norm": 2.467322826385498, + "learning_rate": 7.849744387641699e-05, + "loss": 2.4551, + "step": 27342 + }, + { + "epoch": 0.6076222222222222, + "grad_norm": 2.0165164470672607, + "learning_rate": 7.84929984440987e-05, + "loss": 2.0362, + "step": 27343 + }, + { + "epoch": 0.6076444444444444, + "grad_norm": 1.6614750623703003, + "learning_rate": 7.84885530117804e-05, + "loss": 1.601, + "step": 27344 + }, + { + "epoch": 0.6076666666666667, + "grad_norm": 1.7933533191680908, + "learning_rate": 7.84841075794621e-05, + "loss": 1.6952, + "step": 27345 + }, + { + "epoch": 0.6076888888888888, + "grad_norm": 1.201366901397705, + "learning_rate": 7.847966214714382e-05, + "loss": 0.7091, + "step": 27346 + }, + { + "epoch": 0.6077111111111111, + "grad_norm": 1.7066757678985596, + "learning_rate": 7.847521671482551e-05, + "loss": 1.773, + "step": 27347 + }, + { + "epoch": 0.6077333333333333, + "grad_norm": 2.016390800476074, + "learning_rate": 7.847077128250722e-05, + "loss": 1.7361, + "step": 27348 + }, + { + "epoch": 0.6077555555555556, + "grad_norm": 2.0306246280670166, + "learning_rate": 7.846632585018894e-05, + "loss": 2.0121, + "step": 27349 + }, + { + "epoch": 0.6077777777777778, + "grad_norm": 2.100975751876831, + "learning_rate": 7.846188041787064e-05, + "loss": 1.523, + "step": 27350 + }, + { + "epoch": 0.6078, + "grad_norm": 1.7427887916564941, + "learning_rate": 7.845743498555235e-05, + "loss": 3.027, + "step": 27351 + }, + { + "epoch": 0.6078222222222223, + "grad_norm": 1.467203974723816, + "learning_rate": 7.845298955323406e-05, + "loss": 2.4911, + "step": 27352 + }, + { + "epoch": 0.6078444444444444, + "grad_norm": 1.4917911291122437, + "learning_rate": 7.844854412091577e-05, + "loss": 1.8796, + "step": 27353 + }, + { + "epoch": 0.6078666666666667, + "grad_norm": 1.814010500907898, + "learning_rate": 7.844409868859747e-05, + "loss": 2.5444, + "step": 27354 + }, + { + "epoch": 0.6078888888888889, + "grad_norm": 1.3394798040390015, + "learning_rate": 7.843965325627917e-05, + "loss": 2.0518, + "step": 27355 + }, + { + "epoch": 0.6079111111111111, + "grad_norm": 1.4140102863311768, + "learning_rate": 7.843520782396088e-05, + "loss": 1.8226, + "step": 27356 + }, + { + "epoch": 0.6079333333333333, + "grad_norm": 1.5619611740112305, + "learning_rate": 7.843076239164258e-05, + "loss": 2.2057, + "step": 27357 + }, + { + "epoch": 0.6079555555555556, + "grad_norm": 1.7596157789230347, + "learning_rate": 7.84263169593243e-05, + "loss": 2.0465, + "step": 27358 + }, + { + "epoch": 0.6079777777777777, + "grad_norm": 1.662670612335205, + "learning_rate": 7.842187152700601e-05, + "loss": 2.4204, + "step": 27359 + }, + { + "epoch": 0.608, + "grad_norm": 1.7003495693206787, + "learning_rate": 7.841742609468771e-05, + "loss": 2.2705, + "step": 27360 + }, + { + "epoch": 0.6080222222222222, + "grad_norm": 1.5685595273971558, + "learning_rate": 7.841298066236942e-05, + "loss": 1.9704, + "step": 27361 + }, + { + "epoch": 0.6080444444444445, + "grad_norm": 1.3184891939163208, + "learning_rate": 7.840853523005113e-05, + "loss": 1.5464, + "step": 27362 + }, + { + "epoch": 0.6080666666666666, + "grad_norm": 1.4261938333511353, + "learning_rate": 7.840408979773284e-05, + "loss": 1.9069, + "step": 27363 + }, + { + "epoch": 0.6080888888888889, + "grad_norm": 1.6896491050720215, + "learning_rate": 7.839964436541453e-05, + "loss": 2.0931, + "step": 27364 + }, + { + "epoch": 0.6081111111111112, + "grad_norm": 1.500627040863037, + "learning_rate": 7.839519893309624e-05, + "loss": 2.434, + "step": 27365 + }, + { + "epoch": 0.6081333333333333, + "grad_norm": 1.5913405418395996, + "learning_rate": 7.839075350077795e-05, + "loss": 1.9811, + "step": 27366 + }, + { + "epoch": 0.6081555555555556, + "grad_norm": 1.6660000085830688, + "learning_rate": 7.838630806845966e-05, + "loss": 2.097, + "step": 27367 + }, + { + "epoch": 0.6081777777777778, + "grad_norm": 1.4471626281738281, + "learning_rate": 7.838186263614137e-05, + "loss": 1.7771, + "step": 27368 + }, + { + "epoch": 0.6082, + "grad_norm": 1.5671677589416504, + "learning_rate": 7.837741720382308e-05, + "loss": 1.8492, + "step": 27369 + }, + { + "epoch": 0.6082222222222222, + "grad_norm": 1.9340373277664185, + "learning_rate": 7.837297177150478e-05, + "loss": 2.2269, + "step": 27370 + }, + { + "epoch": 0.6082444444444445, + "grad_norm": 1.7883458137512207, + "learning_rate": 7.836852633918649e-05, + "loss": 2.0126, + "step": 27371 + }, + { + "epoch": 0.6082666666666666, + "grad_norm": 1.9143633842468262, + "learning_rate": 7.83640809068682e-05, + "loss": 1.8424, + "step": 27372 + }, + { + "epoch": 0.6082888888888889, + "grad_norm": 1.63086998462677, + "learning_rate": 7.835963547454991e-05, + "loss": 1.5828, + "step": 27373 + }, + { + "epoch": 0.6083111111111111, + "grad_norm": 1.1815743446350098, + "learning_rate": 7.83551900422316e-05, + "loss": 1.0505, + "step": 27374 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 1.5303187370300293, + "learning_rate": 7.835074460991331e-05, + "loss": 1.6238, + "step": 27375 + }, + { + "epoch": 0.6083555555555555, + "grad_norm": 1.7506102323532104, + "learning_rate": 7.834629917759502e-05, + "loss": 1.8319, + "step": 27376 + }, + { + "epoch": 0.6083777777777778, + "grad_norm": 1.247265338897705, + "learning_rate": 7.834185374527673e-05, + "loss": 0.7261, + "step": 27377 + }, + { + "epoch": 0.6084, + "grad_norm": 1.6127877235412598, + "learning_rate": 7.833740831295844e-05, + "loss": 1.714, + "step": 27378 + }, + { + "epoch": 0.6084222222222222, + "grad_norm": 1.9505119323730469, + "learning_rate": 7.833296288064015e-05, + "loss": 1.7232, + "step": 27379 + }, + { + "epoch": 0.6084444444444445, + "grad_norm": 1.985364317893982, + "learning_rate": 7.832851744832185e-05, + "loss": 2.0324, + "step": 27380 + }, + { + "epoch": 0.6084666666666667, + "grad_norm": 1.7504750490188599, + "learning_rate": 7.832407201600356e-05, + "loss": 1.4993, + "step": 27381 + }, + { + "epoch": 0.6084888888888889, + "grad_norm": 1.5885565280914307, + "learning_rate": 7.831962658368527e-05, + "loss": 1.5342, + "step": 27382 + }, + { + "epoch": 0.6085111111111111, + "grad_norm": 1.6928197145462036, + "learning_rate": 7.831518115136698e-05, + "loss": 1.4337, + "step": 27383 + }, + { + "epoch": 0.6085333333333334, + "grad_norm": 1.6309256553649902, + "learning_rate": 7.831073571904867e-05, + "loss": 2.0033, + "step": 27384 + }, + { + "epoch": 0.6085555555555555, + "grad_norm": 1.6423968076705933, + "learning_rate": 7.830629028673038e-05, + "loss": 1.3243, + "step": 27385 + }, + { + "epoch": 0.6085777777777778, + "grad_norm": 1.1410245895385742, + "learning_rate": 7.83018448544121e-05, + "loss": 0.9618, + "step": 27386 + }, + { + "epoch": 0.6086, + "grad_norm": 1.119278073310852, + "learning_rate": 7.82973994220938e-05, + "loss": 0.7651, + "step": 27387 + }, + { + "epoch": 0.6086222222222222, + "grad_norm": 1.5896695852279663, + "learning_rate": 7.829295398977551e-05, + "loss": 1.9563, + "step": 27388 + }, + { + "epoch": 0.6086444444444444, + "grad_norm": 1.5537821054458618, + "learning_rate": 7.828850855745722e-05, + "loss": 1.7107, + "step": 27389 + }, + { + "epoch": 0.6086666666666667, + "grad_norm": 1.7940185070037842, + "learning_rate": 7.828406312513893e-05, + "loss": 1.8262, + "step": 27390 + }, + { + "epoch": 0.6086888888888888, + "grad_norm": 1.3464748859405518, + "learning_rate": 7.827961769282063e-05, + "loss": 1.406, + "step": 27391 + }, + { + "epoch": 0.6087111111111111, + "grad_norm": 1.7492023706436157, + "learning_rate": 7.827517226050234e-05, + "loss": 1.7909, + "step": 27392 + }, + { + "epoch": 0.6087333333333333, + "grad_norm": 1.7525969743728638, + "learning_rate": 7.827072682818405e-05, + "loss": 1.8682, + "step": 27393 + }, + { + "epoch": 0.6087555555555556, + "grad_norm": 1.7507290840148926, + "learning_rate": 7.826628139586574e-05, + "loss": 1.6276, + "step": 27394 + }, + { + "epoch": 0.6087777777777778, + "grad_norm": 1.8546593189239502, + "learning_rate": 7.826183596354746e-05, + "loss": 1.8663, + "step": 27395 + }, + { + "epoch": 0.6088, + "grad_norm": 1.7273077964782715, + "learning_rate": 7.825739053122917e-05, + "loss": 1.669, + "step": 27396 + }, + { + "epoch": 0.6088222222222223, + "grad_norm": 1.8696497678756714, + "learning_rate": 7.825294509891087e-05, + "loss": 1.6732, + "step": 27397 + }, + { + "epoch": 0.6088444444444444, + "grad_norm": 1.6298273801803589, + "learning_rate": 7.824849966659258e-05, + "loss": 1.4251, + "step": 27398 + }, + { + "epoch": 0.6088666666666667, + "grad_norm": 1.6738899946212769, + "learning_rate": 7.824405423427429e-05, + "loss": 1.6784, + "step": 27399 + }, + { + "epoch": 0.6088888888888889, + "grad_norm": 1.6951320171356201, + "learning_rate": 7.8239608801956e-05, + "loss": 1.3885, + "step": 27400 + }, + { + "epoch": 0.6089111111111111, + "grad_norm": 1.6318590641021729, + "learning_rate": 7.82351633696377e-05, + "loss": 2.4896, + "step": 27401 + }, + { + "epoch": 0.6089333333333333, + "grad_norm": 0.9586099982261658, + "learning_rate": 7.82307179373194e-05, + "loss": 0.0219, + "step": 27402 + }, + { + "epoch": 0.6089555555555556, + "grad_norm": 1.6465784311294556, + "learning_rate": 7.822627250500111e-05, + "loss": 2.6767, + "step": 27403 + }, + { + "epoch": 0.6089777777777777, + "grad_norm": 1.5100815296173096, + "learning_rate": 7.822182707268282e-05, + "loss": 2.0998, + "step": 27404 + }, + { + "epoch": 0.609, + "grad_norm": 1.8771125078201294, + "learning_rate": 7.821738164036453e-05, + "loss": 1.9324, + "step": 27405 + }, + { + "epoch": 0.6090222222222222, + "grad_norm": 1.4821887016296387, + "learning_rate": 7.821293620804624e-05, + "loss": 2.3447, + "step": 27406 + }, + { + "epoch": 0.6090444444444445, + "grad_norm": 1.6751375198364258, + "learning_rate": 7.820849077572794e-05, + "loss": 2.2993, + "step": 27407 + }, + { + "epoch": 0.6090666666666666, + "grad_norm": 1.3872592449188232, + "learning_rate": 7.820404534340965e-05, + "loss": 1.4902, + "step": 27408 + }, + { + "epoch": 0.6090888888888889, + "grad_norm": 1.9620487689971924, + "learning_rate": 7.819959991109136e-05, + "loss": 2.845, + "step": 27409 + }, + { + "epoch": 0.6091111111111112, + "grad_norm": 2.082174777984619, + "learning_rate": 7.819515447877307e-05, + "loss": 2.0938, + "step": 27410 + }, + { + "epoch": 0.6091333333333333, + "grad_norm": 1.4058952331542969, + "learning_rate": 7.819070904645476e-05, + "loss": 2.0692, + "step": 27411 + }, + { + "epoch": 0.6091555555555556, + "grad_norm": 1.7376422882080078, + "learning_rate": 7.818626361413647e-05, + "loss": 2.0961, + "step": 27412 + }, + { + "epoch": 0.6091777777777778, + "grad_norm": 1.646878719329834, + "learning_rate": 7.818181818181818e-05, + "loss": 1.6845, + "step": 27413 + }, + { + "epoch": 0.6092, + "grad_norm": 1.623456358909607, + "learning_rate": 7.817737274949989e-05, + "loss": 2.2751, + "step": 27414 + }, + { + "epoch": 0.6092222222222222, + "grad_norm": 1.3822822570800781, + "learning_rate": 7.81729273171816e-05, + "loss": 1.9871, + "step": 27415 + }, + { + "epoch": 0.6092444444444445, + "grad_norm": 1.3027005195617676, + "learning_rate": 7.816848188486331e-05, + "loss": 1.0903, + "step": 27416 + }, + { + "epoch": 0.6092666666666666, + "grad_norm": 1.6102577447891235, + "learning_rate": 7.816403645254501e-05, + "loss": 2.0865, + "step": 27417 + }, + { + "epoch": 0.6092888888888889, + "grad_norm": 1.9533787965774536, + "learning_rate": 7.815959102022672e-05, + "loss": 1.7968, + "step": 27418 + }, + { + "epoch": 0.6093111111111111, + "grad_norm": 1.5359073877334595, + "learning_rate": 7.815514558790843e-05, + "loss": 2.1051, + "step": 27419 + }, + { + "epoch": 0.6093333333333333, + "grad_norm": 1.6860942840576172, + "learning_rate": 7.815070015559014e-05, + "loss": 2.169, + "step": 27420 + }, + { + "epoch": 0.6093555555555555, + "grad_norm": 1.7165453433990479, + "learning_rate": 7.814625472327183e-05, + "loss": 0.0347, + "step": 27421 + }, + { + "epoch": 0.6093777777777778, + "grad_norm": 1.7087652683258057, + "learning_rate": 7.814180929095354e-05, + "loss": 1.7233, + "step": 27422 + }, + { + "epoch": 0.6094, + "grad_norm": 1.8145941495895386, + "learning_rate": 7.813736385863527e-05, + "loss": 2.054, + "step": 27423 + }, + { + "epoch": 0.6094222222222222, + "grad_norm": 1.6878859996795654, + "learning_rate": 7.813291842631696e-05, + "loss": 2.2226, + "step": 27424 + }, + { + "epoch": 0.6094444444444445, + "grad_norm": 1.7352796792984009, + "learning_rate": 7.812847299399867e-05, + "loss": 1.7145, + "step": 27425 + }, + { + "epoch": 0.6094666666666667, + "grad_norm": 1.5544723272323608, + "learning_rate": 7.812402756168038e-05, + "loss": 1.967, + "step": 27426 + }, + { + "epoch": 0.6094888888888889, + "grad_norm": 1.5665216445922852, + "learning_rate": 7.811958212936208e-05, + "loss": 1.8513, + "step": 27427 + }, + { + "epoch": 0.6095111111111111, + "grad_norm": 1.6698379516601562, + "learning_rate": 7.811513669704379e-05, + "loss": 1.9505, + "step": 27428 + }, + { + "epoch": 0.6095333333333334, + "grad_norm": 1.8726260662078857, + "learning_rate": 7.81106912647255e-05, + "loss": 2.0823, + "step": 27429 + }, + { + "epoch": 0.6095555555555555, + "grad_norm": 1.5356457233428955, + "learning_rate": 7.81062458324072e-05, + "loss": 1.6717, + "step": 27430 + }, + { + "epoch": 0.6095777777777778, + "grad_norm": 1.660161018371582, + "learning_rate": 7.81018004000889e-05, + "loss": 2.0189, + "step": 27431 + }, + { + "epoch": 0.6096, + "grad_norm": 1.7959243059158325, + "learning_rate": 7.809735496777063e-05, + "loss": 2.1529, + "step": 27432 + }, + { + "epoch": 0.6096222222222222, + "grad_norm": 1.4932820796966553, + "learning_rate": 7.809290953545234e-05, + "loss": 1.0458, + "step": 27433 + }, + { + "epoch": 0.6096444444444444, + "grad_norm": 1.8300398588180542, + "learning_rate": 7.808846410313403e-05, + "loss": 1.8499, + "step": 27434 + }, + { + "epoch": 0.6096666666666667, + "grad_norm": 1.5831258296966553, + "learning_rate": 7.808401867081574e-05, + "loss": 1.7654, + "step": 27435 + }, + { + "epoch": 0.6096888888888888, + "grad_norm": 0.13884931802749634, + "learning_rate": 7.807957323849745e-05, + "loss": 0.0269, + "step": 27436 + }, + { + "epoch": 0.6097111111111111, + "grad_norm": 1.4457045793533325, + "learning_rate": 7.807512780617915e-05, + "loss": 1.692, + "step": 27437 + }, + { + "epoch": 0.6097333333333333, + "grad_norm": 1.3845202922821045, + "learning_rate": 7.807068237386086e-05, + "loss": 1.7402, + "step": 27438 + }, + { + "epoch": 0.6097555555555556, + "grad_norm": 1.6479185819625854, + "learning_rate": 7.806623694154257e-05, + "loss": 1.9473, + "step": 27439 + }, + { + "epoch": 0.6097777777777778, + "grad_norm": 2.167131185531616, + "learning_rate": 7.806179150922428e-05, + "loss": 1.6456, + "step": 27440 + }, + { + "epoch": 0.6098, + "grad_norm": 1.7285823822021484, + "learning_rate": 7.805734607690599e-05, + "loss": 1.9564, + "step": 27441 + }, + { + "epoch": 0.6098222222222223, + "grad_norm": 1.8743493556976318, + "learning_rate": 7.80529006445877e-05, + "loss": 1.9648, + "step": 27442 + }, + { + "epoch": 0.6098444444444444, + "grad_norm": 1.6637998819351196, + "learning_rate": 7.80484552122694e-05, + "loss": 1.7091, + "step": 27443 + }, + { + "epoch": 0.6098666666666667, + "grad_norm": 1.7053593397140503, + "learning_rate": 7.80440097799511e-05, + "loss": 1.613, + "step": 27444 + }, + { + "epoch": 0.6098888888888889, + "grad_norm": 1.8503854274749756, + "learning_rate": 7.803956434763281e-05, + "loss": 1.8533, + "step": 27445 + }, + { + "epoch": 0.6099111111111111, + "grad_norm": 1.8011876344680786, + "learning_rate": 7.803511891531452e-05, + "loss": 1.6717, + "step": 27446 + }, + { + "epoch": 0.6099333333333333, + "grad_norm": 1.5371845960617065, + "learning_rate": 7.803067348299623e-05, + "loss": 1.4231, + "step": 27447 + }, + { + "epoch": 0.6099555555555556, + "grad_norm": 2.444550037384033, + "learning_rate": 7.802622805067793e-05, + "loss": 2.5735, + "step": 27448 + }, + { + "epoch": 0.6099777777777777, + "grad_norm": 0.2376312017440796, + "learning_rate": 7.802178261835964e-05, + "loss": 0.0471, + "step": 27449 + }, + { + "epoch": 0.61, + "grad_norm": 1.6536933183670044, + "learning_rate": 7.801733718604134e-05, + "loss": 0.9774, + "step": 27450 + }, + { + "epoch": 0.6100222222222222, + "grad_norm": 1.3775696754455566, + "learning_rate": 7.801289175372305e-05, + "loss": 1.3143, + "step": 27451 + }, + { + "epoch": 0.6100444444444444, + "grad_norm": 1.582527995109558, + "learning_rate": 7.800844632140476e-05, + "loss": 2.2777, + "step": 27452 + }, + { + "epoch": 0.6100666666666666, + "grad_norm": 1.4803003072738647, + "learning_rate": 7.800400088908647e-05, + "loss": 1.9213, + "step": 27453 + }, + { + "epoch": 0.6100888888888889, + "grad_norm": 1.5033972263336182, + "learning_rate": 7.799955545676817e-05, + "loss": 2.2959, + "step": 27454 + }, + { + "epoch": 0.6101111111111112, + "grad_norm": 1.5518680810928345, + "learning_rate": 7.799511002444988e-05, + "loss": 2.2657, + "step": 27455 + }, + { + "epoch": 0.6101333333333333, + "grad_norm": 1.8300862312316895, + "learning_rate": 7.799066459213159e-05, + "loss": 2.2406, + "step": 27456 + }, + { + "epoch": 0.6101555555555556, + "grad_norm": 1.669783353805542, + "learning_rate": 7.79862191598133e-05, + "loss": 2.5407, + "step": 27457 + }, + { + "epoch": 0.6101777777777778, + "grad_norm": 1.7596453428268433, + "learning_rate": 7.7981773727495e-05, + "loss": 2.4514, + "step": 27458 + }, + { + "epoch": 0.6102, + "grad_norm": 1.6170973777770996, + "learning_rate": 7.79773282951767e-05, + "loss": 1.728, + "step": 27459 + }, + { + "epoch": 0.6102222222222222, + "grad_norm": 1.6608338356018066, + "learning_rate": 7.797288286285843e-05, + "loss": 2.3238, + "step": 27460 + }, + { + "epoch": 0.6102444444444445, + "grad_norm": 1.5813875198364258, + "learning_rate": 7.796843743054012e-05, + "loss": 1.9771, + "step": 27461 + }, + { + "epoch": 0.6102666666666666, + "grad_norm": 1.5008454322814941, + "learning_rate": 7.796399199822183e-05, + "loss": 1.8478, + "step": 27462 + }, + { + "epoch": 0.6102888888888889, + "grad_norm": 1.8809629678726196, + "learning_rate": 7.795954656590354e-05, + "loss": 2.0884, + "step": 27463 + }, + { + "epoch": 0.6103111111111111, + "grad_norm": 1.7680113315582275, + "learning_rate": 7.795510113358524e-05, + "loss": 2.0763, + "step": 27464 + }, + { + "epoch": 0.6103333333333333, + "grad_norm": 1.5036847591400146, + "learning_rate": 7.795065570126695e-05, + "loss": 2.0254, + "step": 27465 + }, + { + "epoch": 0.6103555555555555, + "grad_norm": 2.1038553714752197, + "learning_rate": 7.794621026894866e-05, + "loss": 2.4336, + "step": 27466 + }, + { + "epoch": 0.6103777777777778, + "grad_norm": 1.8677781820297241, + "learning_rate": 7.794176483663037e-05, + "loss": 2.2394, + "step": 27467 + }, + { + "epoch": 0.6104, + "grad_norm": 1.5848768949508667, + "learning_rate": 7.793731940431206e-05, + "loss": 2.1178, + "step": 27468 + }, + { + "epoch": 0.6104222222222222, + "grad_norm": 1.632435917854309, + "learning_rate": 7.793287397199379e-05, + "loss": 2.0393, + "step": 27469 + }, + { + "epoch": 0.6104444444444445, + "grad_norm": 2.08906626701355, + "learning_rate": 7.79284285396755e-05, + "loss": 1.511, + "step": 27470 + }, + { + "epoch": 0.6104666666666667, + "grad_norm": 1.5188167095184326, + "learning_rate": 7.792398310735719e-05, + "loss": 1.4935, + "step": 27471 + }, + { + "epoch": 0.6104888888888889, + "grad_norm": 1.579158067703247, + "learning_rate": 7.79195376750389e-05, + "loss": 1.81, + "step": 27472 + }, + { + "epoch": 0.6105111111111111, + "grad_norm": 1.6517826318740845, + "learning_rate": 7.791509224272061e-05, + "loss": 1.4328, + "step": 27473 + }, + { + "epoch": 0.6105333333333334, + "grad_norm": 1.608399510383606, + "learning_rate": 7.791064681040231e-05, + "loss": 1.9586, + "step": 27474 + }, + { + "epoch": 0.6105555555555555, + "grad_norm": 1.4978665113449097, + "learning_rate": 7.790620137808402e-05, + "loss": 1.7627, + "step": 27475 + }, + { + "epoch": 0.6105777777777778, + "grad_norm": 1.7245503664016724, + "learning_rate": 7.790175594576573e-05, + "loss": 2.1325, + "step": 27476 + }, + { + "epoch": 0.6106, + "grad_norm": 1.6567022800445557, + "learning_rate": 7.789731051344744e-05, + "loss": 1.58, + "step": 27477 + }, + { + "epoch": 0.6106222222222222, + "grad_norm": 1.8098033666610718, + "learning_rate": 7.789286508112915e-05, + "loss": 1.4043, + "step": 27478 + }, + { + "epoch": 0.6106444444444444, + "grad_norm": 1.8955461978912354, + "learning_rate": 7.788841964881086e-05, + "loss": 2.1242, + "step": 27479 + }, + { + "epoch": 0.6106666666666667, + "grad_norm": 1.7299959659576416, + "learning_rate": 7.788397421649257e-05, + "loss": 1.9613, + "step": 27480 + }, + { + "epoch": 0.6106888888888888, + "grad_norm": 1.5184121131896973, + "learning_rate": 7.787952878417426e-05, + "loss": 1.9766, + "step": 27481 + }, + { + "epoch": 0.6107111111111111, + "grad_norm": 1.6224559545516968, + "learning_rate": 7.787508335185597e-05, + "loss": 1.9477, + "step": 27482 + }, + { + "epoch": 0.6107333333333334, + "grad_norm": 1.9605573415756226, + "learning_rate": 7.787063791953768e-05, + "loss": 1.6935, + "step": 27483 + }, + { + "epoch": 0.6107555555555556, + "grad_norm": 1.8318862915039062, + "learning_rate": 7.786619248721938e-05, + "loss": 2.1745, + "step": 27484 + }, + { + "epoch": 0.6107777777777778, + "grad_norm": 1.8009825944900513, + "learning_rate": 7.786174705490109e-05, + "loss": 1.6737, + "step": 27485 + }, + { + "epoch": 0.6108, + "grad_norm": 1.9569668769836426, + "learning_rate": 7.78573016225828e-05, + "loss": 1.6701, + "step": 27486 + }, + { + "epoch": 0.6108222222222223, + "grad_norm": 1.9783457517623901, + "learning_rate": 7.78528561902645e-05, + "loss": 2.1003, + "step": 27487 + }, + { + "epoch": 0.6108444444444444, + "grad_norm": 1.848065972328186, + "learning_rate": 7.784841075794622e-05, + "loss": 1.8746, + "step": 27488 + }, + { + "epoch": 0.6108666666666667, + "grad_norm": 1.9032918214797974, + "learning_rate": 7.784396532562792e-05, + "loss": 1.8572, + "step": 27489 + }, + { + "epoch": 0.6108888888888889, + "grad_norm": 1.7915080785751343, + "learning_rate": 7.783951989330963e-05, + "loss": 1.771, + "step": 27490 + }, + { + "epoch": 0.6109111111111111, + "grad_norm": 1.5801305770874023, + "learning_rate": 7.783507446099133e-05, + "loss": 1.8012, + "step": 27491 + }, + { + "epoch": 0.6109333333333333, + "grad_norm": 1.9359170198440552, + "learning_rate": 7.783062902867304e-05, + "loss": 2.2787, + "step": 27492 + }, + { + "epoch": 0.6109555555555556, + "grad_norm": 1.6912680864334106, + "learning_rate": 7.782618359635475e-05, + "loss": 1.6391, + "step": 27493 + }, + { + "epoch": 0.6109777777777777, + "grad_norm": 1.9229602813720703, + "learning_rate": 7.782173816403646e-05, + "loss": 1.5608, + "step": 27494 + }, + { + "epoch": 0.611, + "grad_norm": 1.6610921621322632, + "learning_rate": 7.781729273171816e-05, + "loss": 1.9686, + "step": 27495 + }, + { + "epoch": 0.6110222222222222, + "grad_norm": 1.874182939529419, + "learning_rate": 7.781284729939987e-05, + "loss": 2.2633, + "step": 27496 + }, + { + "epoch": 0.6110444444444444, + "grad_norm": 1.746493935585022, + "learning_rate": 7.780840186708159e-05, + "loss": 1.7766, + "step": 27497 + }, + { + "epoch": 0.6110666666666666, + "grad_norm": 1.7898156642913818, + "learning_rate": 7.780395643476328e-05, + "loss": 1.9386, + "step": 27498 + }, + { + "epoch": 0.6110888888888889, + "grad_norm": 1.5614951848983765, + "learning_rate": 7.7799511002445e-05, + "loss": 1.7405, + "step": 27499 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 1.543195366859436, + "learning_rate": 7.77950655701267e-05, + "loss": 1.1225, + "step": 27500 + }, + { + "epoch": 0.6111333333333333, + "grad_norm": 1.5423308610916138, + "learning_rate": 7.77906201378084e-05, + "loss": 2.1288, + "step": 27501 + }, + { + "epoch": 0.6111555555555556, + "grad_norm": 1.339261531829834, + "learning_rate": 7.778617470549011e-05, + "loss": 2.4123, + "step": 27502 + }, + { + "epoch": 0.6111777777777778, + "grad_norm": 1.3870769739151, + "learning_rate": 7.778172927317182e-05, + "loss": 2.0975, + "step": 27503 + }, + { + "epoch": 0.6112, + "grad_norm": 1.0712703466415405, + "learning_rate": 7.777728384085353e-05, + "loss": 1.2014, + "step": 27504 + }, + { + "epoch": 0.6112222222222222, + "grad_norm": 1.5670156478881836, + "learning_rate": 7.777283840853522e-05, + "loss": 1.9654, + "step": 27505 + }, + { + "epoch": 0.6112444444444445, + "grad_norm": 1.445992350578308, + "learning_rate": 7.776839297621695e-05, + "loss": 2.1042, + "step": 27506 + }, + { + "epoch": 0.6112666666666666, + "grad_norm": 1.7465412616729736, + "learning_rate": 7.776394754389866e-05, + "loss": 2.0741, + "step": 27507 + }, + { + "epoch": 0.6112888888888889, + "grad_norm": 1.6744312047958374, + "learning_rate": 7.775950211158035e-05, + "loss": 2.7767, + "step": 27508 + }, + { + "epoch": 0.6113111111111111, + "grad_norm": 1.4504873752593994, + "learning_rate": 7.775505667926206e-05, + "loss": 2.3511, + "step": 27509 + }, + { + "epoch": 0.6113333333333333, + "grad_norm": 1.8150758743286133, + "learning_rate": 7.775061124694377e-05, + "loss": 2.4931, + "step": 27510 + }, + { + "epoch": 0.6113555555555555, + "grad_norm": 1.5724668502807617, + "learning_rate": 7.774616581462547e-05, + "loss": 2.251, + "step": 27511 + }, + { + "epoch": 0.6113777777777778, + "grad_norm": 1.3290016651153564, + "learning_rate": 7.774172038230718e-05, + "loss": 1.1945, + "step": 27512 + }, + { + "epoch": 0.6114, + "grad_norm": 1.7260475158691406, + "learning_rate": 7.773727494998889e-05, + "loss": 1.8431, + "step": 27513 + }, + { + "epoch": 0.6114222222222222, + "grad_norm": 1.6018413305282593, + "learning_rate": 7.77328295176706e-05, + "loss": 2.1677, + "step": 27514 + }, + { + "epoch": 0.6114444444444445, + "grad_norm": 1.3727264404296875, + "learning_rate": 7.772838408535231e-05, + "loss": 1.2625, + "step": 27515 + }, + { + "epoch": 0.6114666666666667, + "grad_norm": 1.6395866870880127, + "learning_rate": 7.772393865303402e-05, + "loss": 2.2097, + "step": 27516 + }, + { + "epoch": 0.6114888888888889, + "grad_norm": 1.7023524045944214, + "learning_rate": 7.771949322071573e-05, + "loss": 2.1454, + "step": 27517 + }, + { + "epoch": 0.6115111111111111, + "grad_norm": 1.579649567604065, + "learning_rate": 7.771504778839742e-05, + "loss": 1.9694, + "step": 27518 + }, + { + "epoch": 0.6115333333333334, + "grad_norm": 1.4795308113098145, + "learning_rate": 7.771060235607913e-05, + "loss": 2.1181, + "step": 27519 + }, + { + "epoch": 0.6115555555555555, + "grad_norm": 1.7093156576156616, + "learning_rate": 7.770615692376084e-05, + "loss": 2.3267, + "step": 27520 + }, + { + "epoch": 0.6115777777777778, + "grad_norm": 1.8103622198104858, + "learning_rate": 7.770171149144254e-05, + "loss": 1.9654, + "step": 27521 + }, + { + "epoch": 0.6116, + "grad_norm": 1.6107460260391235, + "learning_rate": 7.769726605912425e-05, + "loss": 1.3434, + "step": 27522 + }, + { + "epoch": 0.6116222222222222, + "grad_norm": 1.6847106218338013, + "learning_rate": 7.769282062680596e-05, + "loss": 2.5195, + "step": 27523 + }, + { + "epoch": 0.6116444444444444, + "grad_norm": 1.7250392436981201, + "learning_rate": 7.768837519448767e-05, + "loss": 1.5947, + "step": 27524 + }, + { + "epoch": 0.6116666666666667, + "grad_norm": 1.9823029041290283, + "learning_rate": 7.768392976216938e-05, + "loss": 2.2627, + "step": 27525 + }, + { + "epoch": 0.6116888888888888, + "grad_norm": 1.8412489891052246, + "learning_rate": 7.767948432985109e-05, + "loss": 2.0382, + "step": 27526 + }, + { + "epoch": 0.6117111111111111, + "grad_norm": 1.7352039813995361, + "learning_rate": 7.76750388975328e-05, + "loss": 2.2732, + "step": 27527 + }, + { + "epoch": 0.6117333333333334, + "grad_norm": 1.6988086700439453, + "learning_rate": 7.767059346521449e-05, + "loss": 2.2386, + "step": 27528 + }, + { + "epoch": 0.6117555555555556, + "grad_norm": 1.6477222442626953, + "learning_rate": 7.76661480328962e-05, + "loss": 1.4523, + "step": 27529 + }, + { + "epoch": 0.6117777777777778, + "grad_norm": 2.1043894290924072, + "learning_rate": 7.766170260057791e-05, + "loss": 2.5293, + "step": 27530 + }, + { + "epoch": 0.6118, + "grad_norm": 1.773444652557373, + "learning_rate": 7.765725716825961e-05, + "loss": 2.1363, + "step": 27531 + }, + { + "epoch": 0.6118222222222223, + "grad_norm": 1.4800810813903809, + "learning_rate": 7.765281173594132e-05, + "loss": 1.8682, + "step": 27532 + }, + { + "epoch": 0.6118444444444444, + "grad_norm": 1.6080175638198853, + "learning_rate": 7.764836630362303e-05, + "loss": 1.6381, + "step": 27533 + }, + { + "epoch": 0.6118666666666667, + "grad_norm": 2.55741548538208, + "learning_rate": 7.764392087130475e-05, + "loss": 1.8665, + "step": 27534 + }, + { + "epoch": 0.6118888888888889, + "grad_norm": 1.5693540573120117, + "learning_rate": 7.763947543898645e-05, + "loss": 1.8233, + "step": 27535 + }, + { + "epoch": 0.6119111111111111, + "grad_norm": 1.696226954460144, + "learning_rate": 7.763503000666816e-05, + "loss": 1.6646, + "step": 27536 + }, + { + "epoch": 0.6119333333333333, + "grad_norm": 2.2691214084625244, + "learning_rate": 7.763058457434986e-05, + "loss": 2.2198, + "step": 27537 + }, + { + "epoch": 0.6119555555555556, + "grad_norm": 1.8994804620742798, + "learning_rate": 7.762613914203156e-05, + "loss": 2.2349, + "step": 27538 + }, + { + "epoch": 0.6119777777777777, + "grad_norm": 1.870898962020874, + "learning_rate": 7.762169370971327e-05, + "loss": 1.89, + "step": 27539 + }, + { + "epoch": 0.612, + "grad_norm": 1.6274160146713257, + "learning_rate": 7.761724827739498e-05, + "loss": 1.7772, + "step": 27540 + }, + { + "epoch": 0.6120222222222222, + "grad_norm": 1.788335919380188, + "learning_rate": 7.761280284507668e-05, + "loss": 1.9265, + "step": 27541 + }, + { + "epoch": 0.6120444444444444, + "grad_norm": 1.7671217918395996, + "learning_rate": 7.760835741275839e-05, + "loss": 1.5955, + "step": 27542 + }, + { + "epoch": 0.6120666666666666, + "grad_norm": 1.9399702548980713, + "learning_rate": 7.760391198044011e-05, + "loss": 1.886, + "step": 27543 + }, + { + "epoch": 0.6120888888888889, + "grad_norm": 1.8233189582824707, + "learning_rate": 7.759946654812182e-05, + "loss": 1.7791, + "step": 27544 + }, + { + "epoch": 0.6121111111111112, + "grad_norm": 1.925369143486023, + "learning_rate": 7.759502111580351e-05, + "loss": 1.9089, + "step": 27545 + }, + { + "epoch": 0.6121333333333333, + "grad_norm": 1.8397341966629028, + "learning_rate": 7.759057568348522e-05, + "loss": 1.9789, + "step": 27546 + }, + { + "epoch": 0.6121555555555556, + "grad_norm": 1.8775615692138672, + "learning_rate": 7.758613025116693e-05, + "loss": 1.6122, + "step": 27547 + }, + { + "epoch": 0.6121777777777778, + "grad_norm": 2.0925540924072266, + "learning_rate": 7.758168481884863e-05, + "loss": 2.0339, + "step": 27548 + }, + { + "epoch": 0.6122, + "grad_norm": 2.579909086227417, + "learning_rate": 7.757723938653034e-05, + "loss": 1.9227, + "step": 27549 + }, + { + "epoch": 0.6122222222222222, + "grad_norm": 1.542546033859253, + "learning_rate": 7.757279395421205e-05, + "loss": 1.2056, + "step": 27550 + }, + { + "epoch": 0.6122444444444445, + "grad_norm": 1.1781761646270752, + "learning_rate": 7.756834852189376e-05, + "loss": 1.4263, + "step": 27551 + }, + { + "epoch": 0.6122666666666666, + "grad_norm": 1.4291971921920776, + "learning_rate": 7.756390308957547e-05, + "loss": 2.5189, + "step": 27552 + }, + { + "epoch": 0.6122888888888889, + "grad_norm": 1.0243903398513794, + "learning_rate": 7.755945765725718e-05, + "loss": 1.1948, + "step": 27553 + }, + { + "epoch": 0.6123111111111111, + "grad_norm": 1.0803135633468628, + "learning_rate": 7.755501222493889e-05, + "loss": 1.1821, + "step": 27554 + }, + { + "epoch": 0.6123333333333333, + "grad_norm": 1.4516526460647583, + "learning_rate": 7.755056679262058e-05, + "loss": 2.0488, + "step": 27555 + }, + { + "epoch": 0.6123555555555555, + "grad_norm": 1.5359574556350708, + "learning_rate": 7.754612136030229e-05, + "loss": 2.3512, + "step": 27556 + }, + { + "epoch": 0.6123777777777778, + "grad_norm": 1.5224683284759521, + "learning_rate": 7.7541675927984e-05, + "loss": 2.1224, + "step": 27557 + }, + { + "epoch": 0.6124, + "grad_norm": 1.4551442861557007, + "learning_rate": 7.75372304956657e-05, + "loss": 1.8269, + "step": 27558 + }, + { + "epoch": 0.6124222222222222, + "grad_norm": 1.4387001991271973, + "learning_rate": 7.753278506334741e-05, + "loss": 2.0894, + "step": 27559 + }, + { + "epoch": 0.6124444444444445, + "grad_norm": 2.2286064624786377, + "learning_rate": 7.752833963102912e-05, + "loss": 2.4909, + "step": 27560 + }, + { + "epoch": 0.6124666666666667, + "grad_norm": 1.8037059307098389, + "learning_rate": 7.752389419871083e-05, + "loss": 2.1049, + "step": 27561 + }, + { + "epoch": 0.6124888888888889, + "grad_norm": 1.8678205013275146, + "learning_rate": 7.751944876639254e-05, + "loss": 2.0448, + "step": 27562 + }, + { + "epoch": 0.6125111111111111, + "grad_norm": 1.6603857278823853, + "learning_rate": 7.751500333407425e-05, + "loss": 1.2267, + "step": 27563 + }, + { + "epoch": 0.6125333333333334, + "grad_norm": 1.5285018682479858, + "learning_rate": 7.751055790175596e-05, + "loss": 2.072, + "step": 27564 + }, + { + "epoch": 0.6125555555555555, + "grad_norm": 1.6152604818344116, + "learning_rate": 7.750611246943765e-05, + "loss": 2.062, + "step": 27565 + }, + { + "epoch": 0.6125777777777778, + "grad_norm": 1.51948082447052, + "learning_rate": 7.750166703711936e-05, + "loss": 1.5071, + "step": 27566 + }, + { + "epoch": 0.6126, + "grad_norm": 1.1879931688308716, + "learning_rate": 7.749722160480107e-05, + "loss": 1.1506, + "step": 27567 + }, + { + "epoch": 0.6126222222222222, + "grad_norm": 1.9524271488189697, + "learning_rate": 7.749277617248277e-05, + "loss": 2.4532, + "step": 27568 + }, + { + "epoch": 0.6126444444444444, + "grad_norm": 1.6999812126159668, + "learning_rate": 7.748833074016448e-05, + "loss": 1.6778, + "step": 27569 + }, + { + "epoch": 0.6126666666666667, + "grad_norm": 1.7853630781173706, + "learning_rate": 7.748388530784619e-05, + "loss": 2.3016, + "step": 27570 + }, + { + "epoch": 0.6126888888888888, + "grad_norm": 1.782258152961731, + "learning_rate": 7.747943987552791e-05, + "loss": 1.65, + "step": 27571 + }, + { + "epoch": 0.6127111111111111, + "grad_norm": 1.5772422552108765, + "learning_rate": 7.74749944432096e-05, + "loss": 2.018, + "step": 27572 + }, + { + "epoch": 0.6127333333333334, + "grad_norm": 2.0711746215820312, + "learning_rate": 7.747054901089132e-05, + "loss": 2.2871, + "step": 27573 + }, + { + "epoch": 0.6127555555555556, + "grad_norm": 1.7900344133377075, + "learning_rate": 7.746610357857303e-05, + "loss": 1.9562, + "step": 27574 + }, + { + "epoch": 0.6127777777777778, + "grad_norm": 1.6933283805847168, + "learning_rate": 7.746165814625472e-05, + "loss": 1.9733, + "step": 27575 + }, + { + "epoch": 0.6128, + "grad_norm": 1.7561581134796143, + "learning_rate": 7.745721271393643e-05, + "loss": 2.1765, + "step": 27576 + }, + { + "epoch": 0.6128222222222223, + "grad_norm": 1.5848567485809326, + "learning_rate": 7.745276728161814e-05, + "loss": 1.8434, + "step": 27577 + }, + { + "epoch": 0.6128444444444444, + "grad_norm": 1.4822124242782593, + "learning_rate": 7.744832184929984e-05, + "loss": 2.0103, + "step": 27578 + }, + { + "epoch": 0.6128666666666667, + "grad_norm": 1.7740105390548706, + "learning_rate": 7.744387641698155e-05, + "loss": 2.0244, + "step": 27579 + }, + { + "epoch": 0.6128888888888889, + "grad_norm": 1.7334976196289062, + "learning_rate": 7.743943098466327e-05, + "loss": 1.9153, + "step": 27580 + }, + { + "epoch": 0.6129111111111111, + "grad_norm": 1.635332465171814, + "learning_rate": 7.743498555234498e-05, + "loss": 1.9374, + "step": 27581 + }, + { + "epoch": 0.6129333333333333, + "grad_norm": 1.4835823774337769, + "learning_rate": 7.743054012002668e-05, + "loss": 2.0732, + "step": 27582 + }, + { + "epoch": 0.6129555555555556, + "grad_norm": 1.4527803659439087, + "learning_rate": 7.742609468770839e-05, + "loss": 1.6363, + "step": 27583 + }, + { + "epoch": 0.6129777777777777, + "grad_norm": 1.216811180114746, + "learning_rate": 7.74216492553901e-05, + "loss": 0.8762, + "step": 27584 + }, + { + "epoch": 0.613, + "grad_norm": 1.8603177070617676, + "learning_rate": 7.741720382307179e-05, + "loss": 2.3087, + "step": 27585 + }, + { + "epoch": 0.6130222222222222, + "grad_norm": 1.8066385984420776, + "learning_rate": 7.74127583907535e-05, + "loss": 1.8244, + "step": 27586 + }, + { + "epoch": 0.6130444444444444, + "grad_norm": 1.5782830715179443, + "learning_rate": 7.740831295843521e-05, + "loss": 1.5668, + "step": 27587 + }, + { + "epoch": 0.6130666666666666, + "grad_norm": 1.8840564489364624, + "learning_rate": 7.740386752611692e-05, + "loss": 1.7731, + "step": 27588 + }, + { + "epoch": 0.6130888888888889, + "grad_norm": 2.1401329040527344, + "learning_rate": 7.739942209379863e-05, + "loss": 2.0632, + "step": 27589 + }, + { + "epoch": 0.6131111111111112, + "grad_norm": 1.8816437721252441, + "learning_rate": 7.739497666148034e-05, + "loss": 1.6173, + "step": 27590 + }, + { + "epoch": 0.6131333333333333, + "grad_norm": 2.0451958179473877, + "learning_rate": 7.739053122916205e-05, + "loss": 2.0848, + "step": 27591 + }, + { + "epoch": 0.6131555555555556, + "grad_norm": 1.6593228578567505, + "learning_rate": 7.738608579684374e-05, + "loss": 1.7136, + "step": 27592 + }, + { + "epoch": 0.6131777777777778, + "grad_norm": 1.5360110998153687, + "learning_rate": 7.738164036452545e-05, + "loss": 1.747, + "step": 27593 + }, + { + "epoch": 0.6132, + "grad_norm": 2.1586971282958984, + "learning_rate": 7.737719493220716e-05, + "loss": 1.9396, + "step": 27594 + }, + { + "epoch": 0.6132222222222222, + "grad_norm": 1.9175387620925903, + "learning_rate": 7.737274949988886e-05, + "loss": 1.7093, + "step": 27595 + }, + { + "epoch": 0.6132444444444445, + "grad_norm": 1.982151985168457, + "learning_rate": 7.736830406757057e-05, + "loss": 2.1863, + "step": 27596 + }, + { + "epoch": 0.6132666666666666, + "grad_norm": 3.3169479370117188, + "learning_rate": 7.736385863525228e-05, + "loss": 1.985, + "step": 27597 + }, + { + "epoch": 0.6132888888888889, + "grad_norm": 1.7234865427017212, + "learning_rate": 7.735941320293399e-05, + "loss": 1.9525, + "step": 27598 + }, + { + "epoch": 0.6133111111111111, + "grad_norm": 1.8913743495941162, + "learning_rate": 7.73549677706157e-05, + "loss": 2.1799, + "step": 27599 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 1.910362720489502, + "learning_rate": 7.735052233829741e-05, + "loss": 1.761, + "step": 27600 + }, + { + "epoch": 0.6133555555555555, + "grad_norm": 1.485051155090332, + "learning_rate": 7.734607690597912e-05, + "loss": 2.3293, + "step": 27601 + }, + { + "epoch": 0.6133777777777778, + "grad_norm": 1.4898614883422852, + "learning_rate": 7.734163147366081e-05, + "loss": 2.4178, + "step": 27602 + }, + { + "epoch": 0.6134, + "grad_norm": 1.735337734222412, + "learning_rate": 7.733718604134252e-05, + "loss": 2.3771, + "step": 27603 + }, + { + "epoch": 0.6134222222222222, + "grad_norm": 0.31236639618873596, + "learning_rate": 7.733274060902423e-05, + "loss": 0.0177, + "step": 27604 + }, + { + "epoch": 0.6134444444444445, + "grad_norm": 1.4100191593170166, + "learning_rate": 7.732829517670593e-05, + "loss": 2.1479, + "step": 27605 + }, + { + "epoch": 0.6134666666666667, + "grad_norm": 1.3722394704818726, + "learning_rate": 7.732384974438764e-05, + "loss": 2.2028, + "step": 27606 + }, + { + "epoch": 0.6134888888888889, + "grad_norm": 1.4756319522857666, + "learning_rate": 7.731940431206935e-05, + "loss": 2.1144, + "step": 27607 + }, + { + "epoch": 0.6135111111111111, + "grad_norm": 1.1815156936645508, + "learning_rate": 7.731495887975107e-05, + "loss": 1.5594, + "step": 27608 + }, + { + "epoch": 0.6135333333333334, + "grad_norm": 1.2476978302001953, + "learning_rate": 7.731051344743277e-05, + "loss": 1.2661, + "step": 27609 + }, + { + "epoch": 0.6135555555555555, + "grad_norm": 1.892336368560791, + "learning_rate": 7.730606801511448e-05, + "loss": 2.3488, + "step": 27610 + }, + { + "epoch": 0.6135777777777778, + "grad_norm": 1.6618032455444336, + "learning_rate": 7.730162258279619e-05, + "loss": 1.6773, + "step": 27611 + }, + { + "epoch": 0.6136, + "grad_norm": 1.2995035648345947, + "learning_rate": 7.729717715047788e-05, + "loss": 1.4888, + "step": 27612 + }, + { + "epoch": 0.6136222222222222, + "grad_norm": 1.5355890989303589, + "learning_rate": 7.729273171815959e-05, + "loss": 1.7641, + "step": 27613 + }, + { + "epoch": 0.6136444444444444, + "grad_norm": 1.7926889657974243, + "learning_rate": 7.72882862858413e-05, + "loss": 1.903, + "step": 27614 + }, + { + "epoch": 0.6136666666666667, + "grad_norm": 1.98298978805542, + "learning_rate": 7.7283840853523e-05, + "loss": 2.0607, + "step": 27615 + }, + { + "epoch": 0.6136888888888888, + "grad_norm": 1.5406136512756348, + "learning_rate": 7.727939542120471e-05, + "loss": 1.96, + "step": 27616 + }, + { + "epoch": 0.6137111111111111, + "grad_norm": 1.7651652097702026, + "learning_rate": 7.727494998888643e-05, + "loss": 1.8261, + "step": 27617 + }, + { + "epoch": 0.6137333333333334, + "grad_norm": 1.607982873916626, + "learning_rate": 7.727050455656814e-05, + "loss": 1.7859, + "step": 27618 + }, + { + "epoch": 0.6137555555555556, + "grad_norm": 1.7538888454437256, + "learning_rate": 7.726605912424984e-05, + "loss": 2.2275, + "step": 27619 + }, + { + "epoch": 0.6137777777777778, + "grad_norm": 2.276658773422241, + "learning_rate": 7.726161369193155e-05, + "loss": 2.1569, + "step": 27620 + }, + { + "epoch": 0.6138, + "grad_norm": 1.7184829711914062, + "learning_rate": 7.725716825961326e-05, + "loss": 2.0479, + "step": 27621 + }, + { + "epoch": 0.6138222222222223, + "grad_norm": 1.8018218278884888, + "learning_rate": 7.725272282729495e-05, + "loss": 1.9534, + "step": 27622 + }, + { + "epoch": 0.6138444444444444, + "grad_norm": 1.7157447338104248, + "learning_rate": 7.724827739497666e-05, + "loss": 1.936, + "step": 27623 + }, + { + "epoch": 0.6138666666666667, + "grad_norm": 1.4542107582092285, + "learning_rate": 7.724383196265837e-05, + "loss": 1.5425, + "step": 27624 + }, + { + "epoch": 0.6138888888888889, + "grad_norm": 2.096475124359131, + "learning_rate": 7.723938653034008e-05, + "loss": 2.4732, + "step": 27625 + }, + { + "epoch": 0.6139111111111111, + "grad_norm": 1.5585896968841553, + "learning_rate": 7.723494109802179e-05, + "loss": 2.1209, + "step": 27626 + }, + { + "epoch": 0.6139333333333333, + "grad_norm": 2.434840679168701, + "learning_rate": 7.72304956657035e-05, + "loss": 2.603, + "step": 27627 + }, + { + "epoch": 0.6139555555555556, + "grad_norm": 1.888709545135498, + "learning_rate": 7.722605023338521e-05, + "loss": 2.3049, + "step": 27628 + }, + { + "epoch": 0.6139777777777777, + "grad_norm": 1.717698335647583, + "learning_rate": 7.72216048010669e-05, + "loss": 2.0233, + "step": 27629 + }, + { + "epoch": 0.614, + "grad_norm": 1.5587272644042969, + "learning_rate": 7.721715936874862e-05, + "loss": 1.8363, + "step": 27630 + }, + { + "epoch": 0.6140222222222222, + "grad_norm": 1.546492099761963, + "learning_rate": 7.721271393643032e-05, + "loss": 2.0981, + "step": 27631 + }, + { + "epoch": 0.6140444444444444, + "grad_norm": 1.110864520072937, + "learning_rate": 7.720826850411202e-05, + "loss": 1.0081, + "step": 27632 + }, + { + "epoch": 0.6140666666666666, + "grad_norm": 1.1011080741882324, + "learning_rate": 7.720382307179373e-05, + "loss": 0.9227, + "step": 27633 + }, + { + "epoch": 0.6140888888888889, + "grad_norm": 1.4774861335754395, + "learning_rate": 7.719937763947544e-05, + "loss": 1.7885, + "step": 27634 + }, + { + "epoch": 0.6141111111111112, + "grad_norm": 1.8963218927383423, + "learning_rate": 7.719493220715715e-05, + "loss": 1.8461, + "step": 27635 + }, + { + "epoch": 0.6141333333333333, + "grad_norm": 1.8257931470870972, + "learning_rate": 7.719048677483886e-05, + "loss": 1.7892, + "step": 27636 + }, + { + "epoch": 0.6141555555555556, + "grad_norm": 1.488949179649353, + "learning_rate": 7.718604134252057e-05, + "loss": 1.2848, + "step": 27637 + }, + { + "epoch": 0.6141777777777778, + "grad_norm": 1.8301564455032349, + "learning_rate": 7.718159591020228e-05, + "loss": 2.1069, + "step": 27638 + }, + { + "epoch": 0.6142, + "grad_norm": 1.6307073831558228, + "learning_rate": 7.717715047788397e-05, + "loss": 1.7145, + "step": 27639 + }, + { + "epoch": 0.6142222222222222, + "grad_norm": 1.733677864074707, + "learning_rate": 7.717270504556568e-05, + "loss": 2.2545, + "step": 27640 + }, + { + "epoch": 0.6142444444444445, + "grad_norm": 1.5401047468185425, + "learning_rate": 7.71682596132474e-05, + "loss": 1.225, + "step": 27641 + }, + { + "epoch": 0.6142666666666666, + "grad_norm": 0.9527256488800049, + "learning_rate": 7.716381418092909e-05, + "loss": 0.762, + "step": 27642 + }, + { + "epoch": 0.6142888888888889, + "grad_norm": 1.9015424251556396, + "learning_rate": 7.71593687486108e-05, + "loss": 1.8785, + "step": 27643 + }, + { + "epoch": 0.6143111111111111, + "grad_norm": 1.754401445388794, + "learning_rate": 7.715492331629251e-05, + "loss": 2.1387, + "step": 27644 + }, + { + "epoch": 0.6143333333333333, + "grad_norm": 1.8856477737426758, + "learning_rate": 7.715047788397422e-05, + "loss": 1.8633, + "step": 27645 + }, + { + "epoch": 0.6143555555555555, + "grad_norm": 1.6553912162780762, + "learning_rate": 7.714603245165593e-05, + "loss": 1.6771, + "step": 27646 + }, + { + "epoch": 0.6143777777777778, + "grad_norm": 1.5054494142532349, + "learning_rate": 7.714158701933764e-05, + "loss": 1.3938, + "step": 27647 + }, + { + "epoch": 0.6144, + "grad_norm": 1.715761661529541, + "learning_rate": 7.713714158701935e-05, + "loss": 2.0282, + "step": 27648 + }, + { + "epoch": 0.6144222222222222, + "grad_norm": 1.7670443058013916, + "learning_rate": 7.713269615470104e-05, + "loss": 0.9393, + "step": 27649 + }, + { + "epoch": 0.6144444444444445, + "grad_norm": 1.943627119064331, + "learning_rate": 7.712825072238275e-05, + "loss": 1.3714, + "step": 27650 + }, + { + "epoch": 0.6144666666666667, + "grad_norm": 1.4573408365249634, + "learning_rate": 7.712380529006446e-05, + "loss": 2.2397, + "step": 27651 + }, + { + "epoch": 0.6144888888888889, + "grad_norm": 1.8467220067977905, + "learning_rate": 7.711935985774616e-05, + "loss": 2.8679, + "step": 27652 + }, + { + "epoch": 0.6145111111111111, + "grad_norm": 0.8519179224967957, + "learning_rate": 7.711491442542787e-05, + "loss": 0.9065, + "step": 27653 + }, + { + "epoch": 0.6145333333333334, + "grad_norm": 1.5563408136367798, + "learning_rate": 7.711046899310959e-05, + "loss": 2.1656, + "step": 27654 + }, + { + "epoch": 0.6145555555555555, + "grad_norm": 2.1301660537719727, + "learning_rate": 7.71060235607913e-05, + "loss": 2.4985, + "step": 27655 + }, + { + "epoch": 0.6145777777777778, + "grad_norm": 1.8298200368881226, + "learning_rate": 7.7101578128473e-05, + "loss": 2.1938, + "step": 27656 + }, + { + "epoch": 0.6146, + "grad_norm": 1.7975974082946777, + "learning_rate": 7.709713269615471e-05, + "loss": 2.0496, + "step": 27657 + }, + { + "epoch": 0.6146222222222222, + "grad_norm": 1.5617163181304932, + "learning_rate": 7.709268726383642e-05, + "loss": 2.2042, + "step": 27658 + }, + { + "epoch": 0.6146444444444444, + "grad_norm": 1.6176177263259888, + "learning_rate": 7.708824183151811e-05, + "loss": 1.6636, + "step": 27659 + }, + { + "epoch": 0.6146666666666667, + "grad_norm": 1.4682356119155884, + "learning_rate": 7.708379639919982e-05, + "loss": 1.7942, + "step": 27660 + }, + { + "epoch": 0.6146888888888888, + "grad_norm": 1.9298056364059448, + "learning_rate": 7.707935096688153e-05, + "loss": 2.0287, + "step": 27661 + }, + { + "epoch": 0.6147111111111111, + "grad_norm": 1.5962305068969727, + "learning_rate": 7.707490553456324e-05, + "loss": 1.9764, + "step": 27662 + }, + { + "epoch": 0.6147333333333334, + "grad_norm": 1.8374494314193726, + "learning_rate": 7.707046010224495e-05, + "loss": 2.0601, + "step": 27663 + }, + { + "epoch": 0.6147555555555556, + "grad_norm": 1.7612316608428955, + "learning_rate": 7.706601466992666e-05, + "loss": 2.5086, + "step": 27664 + }, + { + "epoch": 0.6147777777777778, + "grad_norm": 1.8300474882125854, + "learning_rate": 7.706156923760837e-05, + "loss": 1.891, + "step": 27665 + }, + { + "epoch": 0.6148, + "grad_norm": 1.481309175491333, + "learning_rate": 7.705712380529007e-05, + "loss": 1.8192, + "step": 27666 + }, + { + "epoch": 0.6148222222222223, + "grad_norm": 1.5958552360534668, + "learning_rate": 7.705267837297178e-05, + "loss": 2.1367, + "step": 27667 + }, + { + "epoch": 0.6148444444444444, + "grad_norm": 1.4381794929504395, + "learning_rate": 7.704823294065349e-05, + "loss": 1.667, + "step": 27668 + }, + { + "epoch": 0.6148666666666667, + "grad_norm": 1.7751753330230713, + "learning_rate": 7.704378750833518e-05, + "loss": 2.3798, + "step": 27669 + }, + { + "epoch": 0.6148888888888889, + "grad_norm": 1.361076831817627, + "learning_rate": 7.703934207601689e-05, + "loss": 1.3767, + "step": 27670 + }, + { + "epoch": 0.6149111111111111, + "grad_norm": 1.8791605234146118, + "learning_rate": 7.70348966436986e-05, + "loss": 2.2393, + "step": 27671 + }, + { + "epoch": 0.6149333333333333, + "grad_norm": 1.0605690479278564, + "learning_rate": 7.703045121138031e-05, + "loss": 0.9758, + "step": 27672 + }, + { + "epoch": 0.6149555555555556, + "grad_norm": 1.5713154077529907, + "learning_rate": 7.702600577906202e-05, + "loss": 1.5473, + "step": 27673 + }, + { + "epoch": 0.6149777777777777, + "grad_norm": 1.6930948495864868, + "learning_rate": 7.702156034674373e-05, + "loss": 2.4817, + "step": 27674 + }, + { + "epoch": 0.615, + "grad_norm": 1.694022297859192, + "learning_rate": 7.701711491442544e-05, + "loss": 2.0426, + "step": 27675 + }, + { + "epoch": 0.6150222222222222, + "grad_norm": 1.4905400276184082, + "learning_rate": 7.701266948210714e-05, + "loss": 1.7319, + "step": 27676 + }, + { + "epoch": 0.6150444444444444, + "grad_norm": 1.6683911085128784, + "learning_rate": 7.700822404978885e-05, + "loss": 2.1189, + "step": 27677 + }, + { + "epoch": 0.6150666666666667, + "grad_norm": 1.7028203010559082, + "learning_rate": 7.700377861747055e-05, + "loss": 2.313, + "step": 27678 + }, + { + "epoch": 0.6150888888888889, + "grad_norm": 1.4560718536376953, + "learning_rate": 7.699933318515225e-05, + "loss": 1.8408, + "step": 27679 + }, + { + "epoch": 0.6151111111111112, + "grad_norm": 1.5160682201385498, + "learning_rate": 7.699488775283396e-05, + "loss": 2.0621, + "step": 27680 + }, + { + "epoch": 0.6151333333333333, + "grad_norm": 1.6277612447738647, + "learning_rate": 7.699044232051567e-05, + "loss": 1.9481, + "step": 27681 + }, + { + "epoch": 0.6151555555555556, + "grad_norm": 1.7742987871170044, + "learning_rate": 7.698599688819738e-05, + "loss": 2.1653, + "step": 27682 + }, + { + "epoch": 0.6151777777777778, + "grad_norm": 1.5646719932556152, + "learning_rate": 7.698155145587909e-05, + "loss": 1.8406, + "step": 27683 + }, + { + "epoch": 0.6152, + "grad_norm": 1.6861202716827393, + "learning_rate": 7.69771060235608e-05, + "loss": 2.6977, + "step": 27684 + }, + { + "epoch": 0.6152222222222222, + "grad_norm": 2.004915475845337, + "learning_rate": 7.697266059124251e-05, + "loss": 2.252, + "step": 27685 + }, + { + "epoch": 0.6152444444444445, + "grad_norm": 0.9852021336555481, + "learning_rate": 7.69682151589242e-05, + "loss": 0.8603, + "step": 27686 + }, + { + "epoch": 0.6152666666666666, + "grad_norm": 1.5224124193191528, + "learning_rate": 7.696376972660591e-05, + "loss": 1.9327, + "step": 27687 + }, + { + "epoch": 0.6152888888888889, + "grad_norm": 1.6800435781478882, + "learning_rate": 7.695932429428762e-05, + "loss": 1.7287, + "step": 27688 + }, + { + "epoch": 0.6153111111111111, + "grad_norm": 1.6324905157089233, + "learning_rate": 7.695487886196932e-05, + "loss": 1.5948, + "step": 27689 + }, + { + "epoch": 0.6153333333333333, + "grad_norm": 1.5132930278778076, + "learning_rate": 7.695043342965103e-05, + "loss": 1.4836, + "step": 27690 + }, + { + "epoch": 0.6153555555555555, + "grad_norm": 2.3151564598083496, + "learning_rate": 7.694598799733275e-05, + "loss": 1.8424, + "step": 27691 + }, + { + "epoch": 0.6153777777777778, + "grad_norm": 1.6328611373901367, + "learning_rate": 7.694154256501445e-05, + "loss": 1.5843, + "step": 27692 + }, + { + "epoch": 0.6154, + "grad_norm": 2.0141165256500244, + "learning_rate": 7.693709713269616e-05, + "loss": 1.8026, + "step": 27693 + }, + { + "epoch": 0.6154222222222222, + "grad_norm": 1.8666143417358398, + "learning_rate": 7.693265170037787e-05, + "loss": 2.0635, + "step": 27694 + }, + { + "epoch": 0.6154444444444445, + "grad_norm": 2.37695574760437, + "learning_rate": 7.692820626805958e-05, + "loss": 1.9471, + "step": 27695 + }, + { + "epoch": 0.6154666666666667, + "grad_norm": 1.7559750080108643, + "learning_rate": 7.692376083574127e-05, + "loss": 1.5826, + "step": 27696 + }, + { + "epoch": 0.6154888888888889, + "grad_norm": 1.5847703218460083, + "learning_rate": 7.691931540342298e-05, + "loss": 1.3723, + "step": 27697 + }, + { + "epoch": 0.6155111111111111, + "grad_norm": 1.7543034553527832, + "learning_rate": 7.691486997110469e-05, + "loss": 1.6881, + "step": 27698 + }, + { + "epoch": 0.6155333333333334, + "grad_norm": 1.7109777927398682, + "learning_rate": 7.69104245387864e-05, + "loss": 1.6384, + "step": 27699 + }, + { + "epoch": 0.6155555555555555, + "grad_norm": 1.8934999704360962, + "learning_rate": 7.690597910646811e-05, + "loss": 1.4734, + "step": 27700 + }, + { + "epoch": 0.6155777777777778, + "grad_norm": 1.2288439273834229, + "learning_rate": 7.690153367414982e-05, + "loss": 1.3923, + "step": 27701 + }, + { + "epoch": 0.6156, + "grad_norm": 1.6553082466125488, + "learning_rate": 7.689708824183152e-05, + "loss": 1.8585, + "step": 27702 + }, + { + "epoch": 0.6156222222222222, + "grad_norm": 1.480249285697937, + "learning_rate": 7.689264280951323e-05, + "loss": 2.0241, + "step": 27703 + }, + { + "epoch": 0.6156444444444444, + "grad_norm": 1.4849505424499512, + "learning_rate": 7.688819737719494e-05, + "loss": 2.1186, + "step": 27704 + }, + { + "epoch": 0.6156666666666667, + "grad_norm": 1.8371179103851318, + "learning_rate": 7.688375194487665e-05, + "loss": 2.0136, + "step": 27705 + }, + { + "epoch": 0.6156888888888888, + "grad_norm": 2.0982840061187744, + "learning_rate": 7.687930651255834e-05, + "loss": 2.2286, + "step": 27706 + }, + { + "epoch": 0.6157111111111111, + "grad_norm": 1.4270573854446411, + "learning_rate": 7.687486108024005e-05, + "loss": 2.0153, + "step": 27707 + }, + { + "epoch": 0.6157333333333334, + "grad_norm": 1.5784318447113037, + "learning_rate": 7.687041564792176e-05, + "loss": 1.8786, + "step": 27708 + }, + { + "epoch": 0.6157555555555555, + "grad_norm": 1.4389392137527466, + "learning_rate": 7.686597021560347e-05, + "loss": 1.6763, + "step": 27709 + }, + { + "epoch": 0.6157777777777778, + "grad_norm": 1.6586129665374756, + "learning_rate": 7.686152478328518e-05, + "loss": 2.0527, + "step": 27710 + }, + { + "epoch": 0.6158, + "grad_norm": 1.6005334854125977, + "learning_rate": 7.685707935096689e-05, + "loss": 2.1591, + "step": 27711 + }, + { + "epoch": 0.6158222222222223, + "grad_norm": 1.971131443977356, + "learning_rate": 7.68526339186486e-05, + "loss": 1.7389, + "step": 27712 + }, + { + "epoch": 0.6158444444444444, + "grad_norm": 1.5543437004089355, + "learning_rate": 7.68481884863303e-05, + "loss": 1.9799, + "step": 27713 + }, + { + "epoch": 0.6158666666666667, + "grad_norm": 1.8680613040924072, + "learning_rate": 7.6843743054012e-05, + "loss": 1.5808, + "step": 27714 + }, + { + "epoch": 0.6158888888888889, + "grad_norm": 1.8699873685836792, + "learning_rate": 7.683929762169372e-05, + "loss": 2.1983, + "step": 27715 + }, + { + "epoch": 0.6159111111111111, + "grad_norm": 1.506440281867981, + "learning_rate": 7.683485218937541e-05, + "loss": 2.2206, + "step": 27716 + }, + { + "epoch": 0.6159333333333333, + "grad_norm": 1.7313804626464844, + "learning_rate": 7.683040675705712e-05, + "loss": 1.9819, + "step": 27717 + }, + { + "epoch": 0.6159555555555556, + "grad_norm": 1.4775224924087524, + "learning_rate": 7.682596132473883e-05, + "loss": 2.0975, + "step": 27718 + }, + { + "epoch": 0.6159777777777777, + "grad_norm": 1.656072974205017, + "learning_rate": 7.682151589242054e-05, + "loss": 2.4082, + "step": 27719 + }, + { + "epoch": 0.616, + "grad_norm": 1.63306725025177, + "learning_rate": 7.681707046010225e-05, + "loss": 1.7959, + "step": 27720 + }, + { + "epoch": 0.6160222222222222, + "grad_norm": 2.13395357131958, + "learning_rate": 7.681262502778396e-05, + "loss": 0.0452, + "step": 27721 + }, + { + "epoch": 0.6160444444444444, + "grad_norm": 1.8045676946640015, + "learning_rate": 7.680817959546567e-05, + "loss": 2.08, + "step": 27722 + }, + { + "epoch": 0.6160666666666667, + "grad_norm": 1.8881685733795166, + "learning_rate": 7.680373416314737e-05, + "loss": 2.011, + "step": 27723 + }, + { + "epoch": 0.6160888888888889, + "grad_norm": 1.9771424531936646, + "learning_rate": 7.679928873082908e-05, + "loss": 2.2206, + "step": 27724 + }, + { + "epoch": 0.6161111111111112, + "grad_norm": 1.520660161972046, + "learning_rate": 7.679484329851079e-05, + "loss": 2.0039, + "step": 27725 + }, + { + "epoch": 0.6161333333333333, + "grad_norm": 1.7060950994491577, + "learning_rate": 7.679039786619248e-05, + "loss": 2.3537, + "step": 27726 + }, + { + "epoch": 0.6161555555555556, + "grad_norm": 2.0422251224517822, + "learning_rate": 7.67859524338742e-05, + "loss": 1.8996, + "step": 27727 + }, + { + "epoch": 0.6161777777777778, + "grad_norm": 1.5210211277008057, + "learning_rate": 7.678150700155591e-05, + "loss": 1.9079, + "step": 27728 + }, + { + "epoch": 0.6162, + "grad_norm": 1.5915676355361938, + "learning_rate": 7.677706156923761e-05, + "loss": 2.0076, + "step": 27729 + }, + { + "epoch": 0.6162222222222222, + "grad_norm": 1.6983424425125122, + "learning_rate": 7.677261613691932e-05, + "loss": 2.2466, + "step": 27730 + }, + { + "epoch": 0.6162444444444445, + "grad_norm": 1.7953312397003174, + "learning_rate": 7.676817070460103e-05, + "loss": 2.1523, + "step": 27731 + }, + { + "epoch": 0.6162666666666666, + "grad_norm": 1.9210249185562134, + "learning_rate": 7.676372527228274e-05, + "loss": 2.0222, + "step": 27732 + }, + { + "epoch": 0.6162888888888889, + "grad_norm": 1.5903412103652954, + "learning_rate": 7.675927983996443e-05, + "loss": 1.6526, + "step": 27733 + }, + { + "epoch": 0.6163111111111111, + "grad_norm": 1.300310730934143, + "learning_rate": 7.675483440764614e-05, + "loss": 0.0304, + "step": 27734 + }, + { + "epoch": 0.6163333333333333, + "grad_norm": 1.4345909357070923, + "learning_rate": 7.675038897532785e-05, + "loss": 1.624, + "step": 27735 + }, + { + "epoch": 0.6163555555555555, + "grad_norm": 1.777416467666626, + "learning_rate": 7.674594354300956e-05, + "loss": 1.6793, + "step": 27736 + }, + { + "epoch": 0.6163777777777778, + "grad_norm": 1.7176923751831055, + "learning_rate": 7.674149811069127e-05, + "loss": 1.6987, + "step": 27737 + }, + { + "epoch": 0.6164, + "grad_norm": 1.9084837436676025, + "learning_rate": 7.673705267837298e-05, + "loss": 1.9548, + "step": 27738 + }, + { + "epoch": 0.6164222222222222, + "grad_norm": 1.5448215007781982, + "learning_rate": 7.673260724605468e-05, + "loss": 1.2958, + "step": 27739 + }, + { + "epoch": 0.6164444444444445, + "grad_norm": 1.9638711214065552, + "learning_rate": 7.672816181373639e-05, + "loss": 1.3227, + "step": 27740 + }, + { + "epoch": 0.6164666666666667, + "grad_norm": 1.9058576822280884, + "learning_rate": 7.67237163814181e-05, + "loss": 1.6331, + "step": 27741 + }, + { + "epoch": 0.6164888888888889, + "grad_norm": 1.6180567741394043, + "learning_rate": 7.671927094909981e-05, + "loss": 1.9669, + "step": 27742 + }, + { + "epoch": 0.6165111111111111, + "grad_norm": 1.7356921434402466, + "learning_rate": 7.67148255167815e-05, + "loss": 2.1797, + "step": 27743 + }, + { + "epoch": 0.6165333333333334, + "grad_norm": 1.7215243577957153, + "learning_rate": 7.671038008446321e-05, + "loss": 1.6665, + "step": 27744 + }, + { + "epoch": 0.6165555555555555, + "grad_norm": 2.344080924987793, + "learning_rate": 7.670593465214492e-05, + "loss": 2.1785, + "step": 27745 + }, + { + "epoch": 0.6165777777777778, + "grad_norm": 1.594612956047058, + "learning_rate": 7.670148921982663e-05, + "loss": 1.5843, + "step": 27746 + }, + { + "epoch": 0.6166, + "grad_norm": 2.3588497638702393, + "learning_rate": 7.669704378750834e-05, + "loss": 1.8844, + "step": 27747 + }, + { + "epoch": 0.6166222222222222, + "grad_norm": 1.6239612102508545, + "learning_rate": 7.669259835519005e-05, + "loss": 1.6552, + "step": 27748 + }, + { + "epoch": 0.6166444444444444, + "grad_norm": 1.8449560403823853, + "learning_rate": 7.668815292287175e-05, + "loss": 1.7764, + "step": 27749 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 1.4692752361297607, + "learning_rate": 7.668370749055346e-05, + "loss": 1.2563, + "step": 27750 + }, + { + "epoch": 0.6166888888888888, + "grad_norm": 1.3470090627670288, + "learning_rate": 7.667926205823517e-05, + "loss": 2.1952, + "step": 27751 + }, + { + "epoch": 0.6167111111111111, + "grad_norm": 1.4804136753082275, + "learning_rate": 7.667481662591688e-05, + "loss": 2.3699, + "step": 27752 + }, + { + "epoch": 0.6167333333333334, + "grad_norm": 1.5747419595718384, + "learning_rate": 7.667037119359857e-05, + "loss": 2.253, + "step": 27753 + }, + { + "epoch": 0.6167555555555555, + "grad_norm": 1.3297574520111084, + "learning_rate": 7.666592576128028e-05, + "loss": 1.1228, + "step": 27754 + }, + { + "epoch": 0.6167777777777778, + "grad_norm": 1.3217922449111938, + "learning_rate": 7.666148032896199e-05, + "loss": 1.8382, + "step": 27755 + }, + { + "epoch": 0.6168, + "grad_norm": 1.4520697593688965, + "learning_rate": 7.66570348966437e-05, + "loss": 2.1111, + "step": 27756 + }, + { + "epoch": 0.6168222222222223, + "grad_norm": 1.4051185846328735, + "learning_rate": 7.665258946432541e-05, + "loss": 1.8979, + "step": 27757 + }, + { + "epoch": 0.6168444444444444, + "grad_norm": 1.55694580078125, + "learning_rate": 7.664814403200712e-05, + "loss": 2.0987, + "step": 27758 + }, + { + "epoch": 0.6168666666666667, + "grad_norm": 1.452420711517334, + "learning_rate": 7.664369859968882e-05, + "loss": 1.866, + "step": 27759 + }, + { + "epoch": 0.6168888888888889, + "grad_norm": 1.4672553539276123, + "learning_rate": 7.663925316737053e-05, + "loss": 1.8999, + "step": 27760 + }, + { + "epoch": 0.6169111111111111, + "grad_norm": 1.9965301752090454, + "learning_rate": 7.663480773505224e-05, + "loss": 2.7316, + "step": 27761 + }, + { + "epoch": 0.6169333333333333, + "grad_norm": 1.617195725440979, + "learning_rate": 7.663036230273395e-05, + "loss": 2.4018, + "step": 27762 + }, + { + "epoch": 0.6169555555555556, + "grad_norm": 1.7019859552383423, + "learning_rate": 7.662591687041564e-05, + "loss": 2.1988, + "step": 27763 + }, + { + "epoch": 0.6169777777777777, + "grad_norm": 1.8571925163269043, + "learning_rate": 7.662147143809737e-05, + "loss": 2.2452, + "step": 27764 + }, + { + "epoch": 0.617, + "grad_norm": 1.922149658203125, + "learning_rate": 7.661702600577907e-05, + "loss": 2.1675, + "step": 27765 + }, + { + "epoch": 0.6170222222222222, + "grad_norm": 1.533893346786499, + "learning_rate": 7.661258057346077e-05, + "loss": 1.4528, + "step": 27766 + }, + { + "epoch": 0.6170444444444444, + "grad_norm": 1.5106412172317505, + "learning_rate": 7.660813514114248e-05, + "loss": 1.8997, + "step": 27767 + }, + { + "epoch": 0.6170666666666667, + "grad_norm": 1.6652741432189941, + "learning_rate": 7.660368970882419e-05, + "loss": 2.0417, + "step": 27768 + }, + { + "epoch": 0.6170888888888889, + "grad_norm": 1.8946317434310913, + "learning_rate": 7.65992442765059e-05, + "loss": 2.3318, + "step": 27769 + }, + { + "epoch": 0.6171111111111112, + "grad_norm": 2.1442677974700928, + "learning_rate": 7.65947988441876e-05, + "loss": 1.679, + "step": 27770 + }, + { + "epoch": 0.6171333333333333, + "grad_norm": 1.441885232925415, + "learning_rate": 7.65903534118693e-05, + "loss": 1.6786, + "step": 27771 + }, + { + "epoch": 0.6171555555555556, + "grad_norm": 1.5308915376663208, + "learning_rate": 7.658590797955102e-05, + "loss": 1.8118, + "step": 27772 + }, + { + "epoch": 0.6171777777777778, + "grad_norm": 2.0713369846343994, + "learning_rate": 7.658146254723272e-05, + "loss": 2.2014, + "step": 27773 + }, + { + "epoch": 0.6172, + "grad_norm": 1.5334278345108032, + "learning_rate": 7.657701711491443e-05, + "loss": 2.0471, + "step": 27774 + }, + { + "epoch": 0.6172222222222222, + "grad_norm": 1.7738068103790283, + "learning_rate": 7.657257168259614e-05, + "loss": 1.888, + "step": 27775 + }, + { + "epoch": 0.6172444444444445, + "grad_norm": 2.077033519744873, + "learning_rate": 7.656812625027784e-05, + "loss": 1.7194, + "step": 27776 + }, + { + "epoch": 0.6172666666666666, + "grad_norm": 1.8221032619476318, + "learning_rate": 7.656368081795955e-05, + "loss": 1.9039, + "step": 27777 + }, + { + "epoch": 0.6172888888888889, + "grad_norm": 1.6100168228149414, + "learning_rate": 7.655923538564126e-05, + "loss": 1.8672, + "step": 27778 + }, + { + "epoch": 0.6173111111111111, + "grad_norm": 2.5112087726593018, + "learning_rate": 7.655478995332297e-05, + "loss": 2.4041, + "step": 27779 + }, + { + "epoch": 0.6173333333333333, + "grad_norm": 1.9133623838424683, + "learning_rate": 7.655034452100467e-05, + "loss": 1.8831, + "step": 27780 + }, + { + "epoch": 0.6173555555555555, + "grad_norm": 1.3843563795089722, + "learning_rate": 7.654589908868637e-05, + "loss": 1.1407, + "step": 27781 + }, + { + "epoch": 0.6173777777777778, + "grad_norm": 1.566394567489624, + "learning_rate": 7.654145365636808e-05, + "loss": 1.4861, + "step": 27782 + }, + { + "epoch": 0.6174, + "grad_norm": 1.7080655097961426, + "learning_rate": 7.65370082240498e-05, + "loss": 1.7517, + "step": 27783 + }, + { + "epoch": 0.6174222222222222, + "grad_norm": 1.7616134881973267, + "learning_rate": 7.65325627917315e-05, + "loss": 1.8535, + "step": 27784 + }, + { + "epoch": 0.6174444444444445, + "grad_norm": 1.7613402605056763, + "learning_rate": 7.652811735941321e-05, + "loss": 1.4388, + "step": 27785 + }, + { + "epoch": 0.6174666666666667, + "grad_norm": 1.4484586715698242, + "learning_rate": 7.652367192709491e-05, + "loss": 1.7956, + "step": 27786 + }, + { + "epoch": 0.6174888888888889, + "grad_norm": 1.606410264968872, + "learning_rate": 7.651922649477662e-05, + "loss": 1.9883, + "step": 27787 + }, + { + "epoch": 0.6175111111111111, + "grad_norm": 2.0317225456237793, + "learning_rate": 7.651478106245833e-05, + "loss": 1.5763, + "step": 27788 + }, + { + "epoch": 0.6175333333333334, + "grad_norm": 1.7747282981872559, + "learning_rate": 7.651033563014004e-05, + "loss": 1.9427, + "step": 27789 + }, + { + "epoch": 0.6175555555555555, + "grad_norm": 1.4215421676635742, + "learning_rate": 7.650589019782173e-05, + "loss": 1.4721, + "step": 27790 + }, + { + "epoch": 0.6175777777777778, + "grad_norm": 1.934670090675354, + "learning_rate": 7.650144476550344e-05, + "loss": 1.6565, + "step": 27791 + }, + { + "epoch": 0.6176, + "grad_norm": 2.0426065921783447, + "learning_rate": 7.649699933318515e-05, + "loss": 1.6775, + "step": 27792 + }, + { + "epoch": 0.6176222222222222, + "grad_norm": 1.7949936389923096, + "learning_rate": 7.649255390086686e-05, + "loss": 1.7326, + "step": 27793 + }, + { + "epoch": 0.6176444444444444, + "grad_norm": 1.9961637258529663, + "learning_rate": 7.648810846854857e-05, + "loss": 1.7102, + "step": 27794 + }, + { + "epoch": 0.6176666666666667, + "grad_norm": 1.7532471418380737, + "learning_rate": 7.648366303623028e-05, + "loss": 1.7731, + "step": 27795 + }, + { + "epoch": 0.6176888888888888, + "grad_norm": 2.2291626930236816, + "learning_rate": 7.647921760391198e-05, + "loss": 2.2091, + "step": 27796 + }, + { + "epoch": 0.6177111111111111, + "grad_norm": 2.012691020965576, + "learning_rate": 7.647477217159369e-05, + "loss": 1.9455, + "step": 27797 + }, + { + "epoch": 0.6177333333333334, + "grad_norm": 1.7636815309524536, + "learning_rate": 7.64703267392754e-05, + "loss": 1.5035, + "step": 27798 + }, + { + "epoch": 0.6177555555555555, + "grad_norm": 1.8447431325912476, + "learning_rate": 7.646588130695711e-05, + "loss": 1.3523, + "step": 27799 + }, + { + "epoch": 0.6177777777777778, + "grad_norm": 1.5177545547485352, + "learning_rate": 7.64614358746388e-05, + "loss": 1.147, + "step": 27800 + }, + { + "epoch": 0.6178, + "grad_norm": 1.4865962266921997, + "learning_rate": 7.645699044232053e-05, + "loss": 2.593, + "step": 27801 + }, + { + "epoch": 0.6178222222222223, + "grad_norm": 1.5389937162399292, + "learning_rate": 7.645254501000224e-05, + "loss": 2.2661, + "step": 27802 + }, + { + "epoch": 0.6178444444444444, + "grad_norm": 1.3967429399490356, + "learning_rate": 7.644809957768393e-05, + "loss": 2.368, + "step": 27803 + }, + { + "epoch": 0.6178666666666667, + "grad_norm": 1.512739896774292, + "learning_rate": 7.644365414536564e-05, + "loss": 2.3682, + "step": 27804 + }, + { + "epoch": 0.6178888888888889, + "grad_norm": 1.7297879457473755, + "learning_rate": 7.643920871304735e-05, + "loss": 2.2775, + "step": 27805 + }, + { + "epoch": 0.6179111111111111, + "grad_norm": 1.5569535493850708, + "learning_rate": 7.643476328072905e-05, + "loss": 1.8969, + "step": 27806 + }, + { + "epoch": 0.6179333333333333, + "grad_norm": 2.3133952617645264, + "learning_rate": 7.643031784841076e-05, + "loss": 2.1335, + "step": 27807 + }, + { + "epoch": 0.6179555555555556, + "grad_norm": 1.477544903755188, + "learning_rate": 7.642587241609247e-05, + "loss": 1.9267, + "step": 27808 + }, + { + "epoch": 0.6179777777777777, + "grad_norm": 1.5673898458480835, + "learning_rate": 7.642142698377418e-05, + "loss": 2.1177, + "step": 27809 + }, + { + "epoch": 0.618, + "grad_norm": 1.7615025043487549, + "learning_rate": 7.641698155145589e-05, + "loss": 2.2152, + "step": 27810 + }, + { + "epoch": 0.6180222222222223, + "grad_norm": 2.3260927200317383, + "learning_rate": 7.64125361191376e-05, + "loss": 2.2751, + "step": 27811 + }, + { + "epoch": 0.6180444444444444, + "grad_norm": 1.756048321723938, + "learning_rate": 7.64080906868193e-05, + "loss": 2.406, + "step": 27812 + }, + { + "epoch": 0.6180666666666667, + "grad_norm": 2.2361507415771484, + "learning_rate": 7.6403645254501e-05, + "loss": 1.9301, + "step": 27813 + }, + { + "epoch": 0.6180888888888889, + "grad_norm": 1.5776044130325317, + "learning_rate": 7.639919982218271e-05, + "loss": 2.2465, + "step": 27814 + }, + { + "epoch": 0.6181111111111111, + "grad_norm": 1.456405520439148, + "learning_rate": 7.639475438986442e-05, + "loss": 2.1656, + "step": 27815 + }, + { + "epoch": 0.6181333333333333, + "grad_norm": 1.5151910781860352, + "learning_rate": 7.639030895754612e-05, + "loss": 1.9655, + "step": 27816 + }, + { + "epoch": 0.6181555555555556, + "grad_norm": 1.819975733757019, + "learning_rate": 7.638586352522783e-05, + "loss": 1.7451, + "step": 27817 + }, + { + "epoch": 0.6181777777777778, + "grad_norm": 1.6116926670074463, + "learning_rate": 7.638141809290954e-05, + "loss": 1.9346, + "step": 27818 + }, + { + "epoch": 0.6182, + "grad_norm": 1.3958470821380615, + "learning_rate": 7.637697266059125e-05, + "loss": 1.57, + "step": 27819 + }, + { + "epoch": 0.6182222222222222, + "grad_norm": 1.6095985174179077, + "learning_rate": 7.637252722827295e-05, + "loss": 2.1294, + "step": 27820 + }, + { + "epoch": 0.6182444444444445, + "grad_norm": 1.5986542701721191, + "learning_rate": 7.636808179595466e-05, + "loss": 2.2341, + "step": 27821 + }, + { + "epoch": 0.6182666666666666, + "grad_norm": 1.6214473247528076, + "learning_rate": 7.636363636363637e-05, + "loss": 2.133, + "step": 27822 + }, + { + "epoch": 0.6182888888888889, + "grad_norm": 1.593881368637085, + "learning_rate": 7.635919093131807e-05, + "loss": 1.7351, + "step": 27823 + }, + { + "epoch": 0.6183111111111111, + "grad_norm": 1.630756139755249, + "learning_rate": 7.635474549899978e-05, + "loss": 1.7785, + "step": 27824 + }, + { + "epoch": 0.6183333333333333, + "grad_norm": 1.658084750175476, + "learning_rate": 7.635030006668149e-05, + "loss": 1.5986, + "step": 27825 + }, + { + "epoch": 0.6183555555555555, + "grad_norm": 1.6406036615371704, + "learning_rate": 7.63458546343632e-05, + "loss": 1.8687, + "step": 27826 + }, + { + "epoch": 0.6183777777777778, + "grad_norm": 1.6786532402038574, + "learning_rate": 7.63414092020449e-05, + "loss": 1.6849, + "step": 27827 + }, + { + "epoch": 0.6184, + "grad_norm": 1.719092607498169, + "learning_rate": 7.63369637697266e-05, + "loss": 1.7264, + "step": 27828 + }, + { + "epoch": 0.6184222222222222, + "grad_norm": 1.5456290245056152, + "learning_rate": 7.633251833740831e-05, + "loss": 1.8574, + "step": 27829 + }, + { + "epoch": 0.6184444444444445, + "grad_norm": 1.4805980920791626, + "learning_rate": 7.632807290509002e-05, + "loss": 1.6378, + "step": 27830 + }, + { + "epoch": 0.6184666666666667, + "grad_norm": 1.7630740404129028, + "learning_rate": 7.632362747277173e-05, + "loss": 1.7688, + "step": 27831 + }, + { + "epoch": 0.6184888888888889, + "grad_norm": 1.7586926221847534, + "learning_rate": 7.631918204045344e-05, + "loss": 2.1503, + "step": 27832 + }, + { + "epoch": 0.6185111111111111, + "grad_norm": 1.1839295625686646, + "learning_rate": 7.631473660813514e-05, + "loss": 0.9218, + "step": 27833 + }, + { + "epoch": 0.6185333333333334, + "grad_norm": 1.8422901630401611, + "learning_rate": 7.631029117581685e-05, + "loss": 2.0849, + "step": 27834 + }, + { + "epoch": 0.6185555555555555, + "grad_norm": 2.327899217605591, + "learning_rate": 7.630584574349856e-05, + "loss": 2.2619, + "step": 27835 + }, + { + "epoch": 0.6185777777777778, + "grad_norm": 1.7383606433868408, + "learning_rate": 7.630140031118027e-05, + "loss": 1.9783, + "step": 27836 + }, + { + "epoch": 0.6186, + "grad_norm": 1.705854892730713, + "learning_rate": 7.629695487886196e-05, + "loss": 1.8867, + "step": 27837 + }, + { + "epoch": 0.6186222222222222, + "grad_norm": 1.2072322368621826, + "learning_rate": 7.629250944654369e-05, + "loss": 1.3474, + "step": 27838 + }, + { + "epoch": 0.6186444444444444, + "grad_norm": 1.545650601387024, + "learning_rate": 7.62880640142254e-05, + "loss": 1.4679, + "step": 27839 + }, + { + "epoch": 0.6186666666666667, + "grad_norm": 1.7425636053085327, + "learning_rate": 7.628361858190709e-05, + "loss": 2.004, + "step": 27840 + }, + { + "epoch": 0.6186888888888888, + "grad_norm": 1.946216344833374, + "learning_rate": 7.62791731495888e-05, + "loss": 1.6654, + "step": 27841 + }, + { + "epoch": 0.6187111111111111, + "grad_norm": 1.860919713973999, + "learning_rate": 7.627472771727051e-05, + "loss": 1.8677, + "step": 27842 + }, + { + "epoch": 0.6187333333333334, + "grad_norm": 1.6177150011062622, + "learning_rate": 7.627028228495221e-05, + "loss": 1.694, + "step": 27843 + }, + { + "epoch": 0.6187555555555555, + "grad_norm": 1.7288682460784912, + "learning_rate": 7.626583685263392e-05, + "loss": 1.6698, + "step": 27844 + }, + { + "epoch": 0.6187777777777778, + "grad_norm": 2.123058795928955, + "learning_rate": 7.626139142031563e-05, + "loss": 1.6972, + "step": 27845 + }, + { + "epoch": 0.6188, + "grad_norm": 1.999112844467163, + "learning_rate": 7.625694598799734e-05, + "loss": 1.6694, + "step": 27846 + }, + { + "epoch": 0.6188222222222223, + "grad_norm": 1.8956773281097412, + "learning_rate": 7.625250055567905e-05, + "loss": 1.946, + "step": 27847 + }, + { + "epoch": 0.6188444444444444, + "grad_norm": 2.0588977336883545, + "learning_rate": 7.624805512336076e-05, + "loss": 1.6527, + "step": 27848 + }, + { + "epoch": 0.6188666666666667, + "grad_norm": 1.6914948225021362, + "learning_rate": 7.624360969104247e-05, + "loss": 1.3385, + "step": 27849 + }, + { + "epoch": 0.6188888888888889, + "grad_norm": 1.8900210857391357, + "learning_rate": 7.623916425872416e-05, + "loss": 1.3768, + "step": 27850 + }, + { + "epoch": 0.6189111111111111, + "grad_norm": 1.0682233572006226, + "learning_rate": 7.623471882640587e-05, + "loss": 1.192, + "step": 27851 + }, + { + "epoch": 0.6189333333333333, + "grad_norm": 1.622100591659546, + "learning_rate": 7.623027339408758e-05, + "loss": 2.828, + "step": 27852 + }, + { + "epoch": 0.6189555555555556, + "grad_norm": 1.6287745237350464, + "learning_rate": 7.622582796176928e-05, + "loss": 2.5527, + "step": 27853 + }, + { + "epoch": 0.6189777777777777, + "grad_norm": 1.8082754611968994, + "learning_rate": 7.622138252945099e-05, + "loss": 2.5409, + "step": 27854 + }, + { + "epoch": 0.619, + "grad_norm": 1.6269252300262451, + "learning_rate": 7.62169370971327e-05, + "loss": 2.1211, + "step": 27855 + }, + { + "epoch": 0.6190222222222223, + "grad_norm": 1.4222098588943481, + "learning_rate": 7.62124916648144e-05, + "loss": 2.0527, + "step": 27856 + }, + { + "epoch": 0.6190444444444444, + "grad_norm": 1.8875646591186523, + "learning_rate": 7.620804623249612e-05, + "loss": 2.3583, + "step": 27857 + }, + { + "epoch": 0.6190666666666667, + "grad_norm": 1.4680495262145996, + "learning_rate": 7.620360080017783e-05, + "loss": 2.3106, + "step": 27858 + }, + { + "epoch": 0.6190888888888889, + "grad_norm": 1.6367533206939697, + "learning_rate": 7.619915536785954e-05, + "loss": 2.1393, + "step": 27859 + }, + { + "epoch": 0.6191111111111111, + "grad_norm": 1.9394482374191284, + "learning_rate": 7.619470993554123e-05, + "loss": 1.9793, + "step": 27860 + }, + { + "epoch": 0.6191333333333333, + "grad_norm": 1.4326545000076294, + "learning_rate": 7.619026450322294e-05, + "loss": 2.1417, + "step": 27861 + }, + { + "epoch": 0.6191555555555556, + "grad_norm": 1.5940648317337036, + "learning_rate": 7.618581907090465e-05, + "loss": 2.3789, + "step": 27862 + }, + { + "epoch": 0.6191777777777778, + "grad_norm": 1.5996965169906616, + "learning_rate": 7.618137363858635e-05, + "loss": 1.6626, + "step": 27863 + }, + { + "epoch": 0.6192, + "grad_norm": 1.4322491884231567, + "learning_rate": 7.617692820626806e-05, + "loss": 2.0181, + "step": 27864 + }, + { + "epoch": 0.6192222222222222, + "grad_norm": 1.4913889169692993, + "learning_rate": 7.617248277394977e-05, + "loss": 2.2868, + "step": 27865 + }, + { + "epoch": 0.6192444444444445, + "grad_norm": 1.576112151145935, + "learning_rate": 7.616803734163148e-05, + "loss": 2.0001, + "step": 27866 + }, + { + "epoch": 0.6192666666666666, + "grad_norm": 2.043811082839966, + "learning_rate": 7.616359190931319e-05, + "loss": 2.1336, + "step": 27867 + }, + { + "epoch": 0.6192888888888889, + "grad_norm": 1.5992106199264526, + "learning_rate": 7.61591464769949e-05, + "loss": 1.9645, + "step": 27868 + }, + { + "epoch": 0.6193111111111111, + "grad_norm": 1.496248483657837, + "learning_rate": 7.61547010446766e-05, + "loss": 1.5838, + "step": 27869 + }, + { + "epoch": 0.6193333333333333, + "grad_norm": 2.008216381072998, + "learning_rate": 7.61502556123583e-05, + "loss": 2.4949, + "step": 27870 + }, + { + "epoch": 0.6193555555555555, + "grad_norm": 1.7011308670043945, + "learning_rate": 7.614581018004001e-05, + "loss": 2.3068, + "step": 27871 + }, + { + "epoch": 0.6193777777777778, + "grad_norm": 1.6385869979858398, + "learning_rate": 7.614136474772172e-05, + "loss": 1.1203, + "step": 27872 + }, + { + "epoch": 0.6194, + "grad_norm": 1.4843419790267944, + "learning_rate": 7.613691931540343e-05, + "loss": 1.6449, + "step": 27873 + }, + { + "epoch": 0.6194222222222222, + "grad_norm": 1.7454240322113037, + "learning_rate": 7.613247388308513e-05, + "loss": 1.6117, + "step": 27874 + }, + { + "epoch": 0.6194444444444445, + "grad_norm": 2.1531989574432373, + "learning_rate": 7.612802845076685e-05, + "loss": 2.2732, + "step": 27875 + }, + { + "epoch": 0.6194666666666667, + "grad_norm": 1.6188414096832275, + "learning_rate": 7.612358301844856e-05, + "loss": 1.614, + "step": 27876 + }, + { + "epoch": 0.6194888888888889, + "grad_norm": 1.970890760421753, + "learning_rate": 7.611913758613025e-05, + "loss": 2.3413, + "step": 27877 + }, + { + "epoch": 0.6195111111111111, + "grad_norm": 1.5219156742095947, + "learning_rate": 7.611469215381196e-05, + "loss": 1.6374, + "step": 27878 + }, + { + "epoch": 0.6195333333333334, + "grad_norm": 1.6971971988677979, + "learning_rate": 7.611024672149367e-05, + "loss": 1.5241, + "step": 27879 + }, + { + "epoch": 0.6195555555555555, + "grad_norm": 1.7241084575653076, + "learning_rate": 7.610580128917537e-05, + "loss": 2.0711, + "step": 27880 + }, + { + "epoch": 0.6195777777777778, + "grad_norm": 1.8422069549560547, + "learning_rate": 7.610135585685708e-05, + "loss": 2.0101, + "step": 27881 + }, + { + "epoch": 0.6196, + "grad_norm": 1.7453213930130005, + "learning_rate": 7.609691042453879e-05, + "loss": 1.7256, + "step": 27882 + }, + { + "epoch": 0.6196222222222222, + "grad_norm": 1.6272207498550415, + "learning_rate": 7.60924649922205e-05, + "loss": 1.8167, + "step": 27883 + }, + { + "epoch": 0.6196444444444444, + "grad_norm": 2.016037940979004, + "learning_rate": 7.608801955990221e-05, + "loss": 1.7058, + "step": 27884 + }, + { + "epoch": 0.6196666666666667, + "grad_norm": 2.0873775482177734, + "learning_rate": 7.608357412758392e-05, + "loss": 1.9221, + "step": 27885 + }, + { + "epoch": 0.6196888888888888, + "grad_norm": 1.8145103454589844, + "learning_rate": 7.607912869526563e-05, + "loss": 1.9094, + "step": 27886 + }, + { + "epoch": 0.6197111111111111, + "grad_norm": 2.046666145324707, + "learning_rate": 7.607468326294732e-05, + "loss": 2.0466, + "step": 27887 + }, + { + "epoch": 0.6197333333333334, + "grad_norm": 1.5662391185760498, + "learning_rate": 7.607023783062903e-05, + "loss": 2.0985, + "step": 27888 + }, + { + "epoch": 0.6197555555555555, + "grad_norm": 1.8663203716278076, + "learning_rate": 7.606579239831074e-05, + "loss": 1.8951, + "step": 27889 + }, + { + "epoch": 0.6197777777777778, + "grad_norm": 1.8117085695266724, + "learning_rate": 7.606134696599244e-05, + "loss": 1.6704, + "step": 27890 + }, + { + "epoch": 0.6198, + "grad_norm": 1.5927486419677734, + "learning_rate": 7.605690153367415e-05, + "loss": 1.6549, + "step": 27891 + }, + { + "epoch": 0.6198222222222223, + "grad_norm": 1.8937994241714478, + "learning_rate": 7.605245610135586e-05, + "loss": 2.0085, + "step": 27892 + }, + { + "epoch": 0.6198444444444444, + "grad_norm": 1.9662067890167236, + "learning_rate": 7.604801066903757e-05, + "loss": 2.1213, + "step": 27893 + }, + { + "epoch": 0.6198666666666667, + "grad_norm": 2.182756185531616, + "learning_rate": 7.604356523671928e-05, + "loss": 1.8149, + "step": 27894 + }, + { + "epoch": 0.6198888888888889, + "grad_norm": 1.598151683807373, + "learning_rate": 7.603911980440099e-05, + "loss": 1.779, + "step": 27895 + }, + { + "epoch": 0.6199111111111111, + "grad_norm": 1.8498104810714722, + "learning_rate": 7.60346743720827e-05, + "loss": 1.5005, + "step": 27896 + }, + { + "epoch": 0.6199333333333333, + "grad_norm": 1.8257795572280884, + "learning_rate": 7.603022893976439e-05, + "loss": 1.6445, + "step": 27897 + }, + { + "epoch": 0.6199555555555556, + "grad_norm": 1.6474151611328125, + "learning_rate": 7.60257835074461e-05, + "loss": 1.7728, + "step": 27898 + }, + { + "epoch": 0.6199777777777777, + "grad_norm": 1.864945888519287, + "learning_rate": 7.602133807512781e-05, + "loss": 1.8965, + "step": 27899 + }, + { + "epoch": 0.62, + "grad_norm": 1.7285076379776, + "learning_rate": 7.601689264280951e-05, + "loss": 1.7502, + "step": 27900 + }, + { + "epoch": 0.6200222222222223, + "grad_norm": 1.3599979877471924, + "learning_rate": 7.601244721049122e-05, + "loss": 2.4592, + "step": 27901 + }, + { + "epoch": 0.6200444444444444, + "grad_norm": 1.5312752723693848, + "learning_rate": 7.600800177817293e-05, + "loss": 2.6304, + "step": 27902 + }, + { + "epoch": 0.6200666666666667, + "grad_norm": 1.5203238725662231, + "learning_rate": 7.600355634585464e-05, + "loss": 2.3161, + "step": 27903 + }, + { + "epoch": 0.6200888888888889, + "grad_norm": 1.6090893745422363, + "learning_rate": 7.599911091353635e-05, + "loss": 2.5759, + "step": 27904 + }, + { + "epoch": 0.6201111111111111, + "grad_norm": 1.3716859817504883, + "learning_rate": 7.599466548121806e-05, + "loss": 2.0005, + "step": 27905 + }, + { + "epoch": 0.6201333333333333, + "grad_norm": 1.4314205646514893, + "learning_rate": 7.599022004889977e-05, + "loss": 1.8578, + "step": 27906 + }, + { + "epoch": 0.6201555555555556, + "grad_norm": 1.3883862495422363, + "learning_rate": 7.598577461658146e-05, + "loss": 1.7013, + "step": 27907 + }, + { + "epoch": 0.6201777777777778, + "grad_norm": 1.5877196788787842, + "learning_rate": 7.598132918426317e-05, + "loss": 1.9532, + "step": 27908 + }, + { + "epoch": 0.6202, + "grad_norm": 1.5918232202529907, + "learning_rate": 7.597688375194488e-05, + "loss": 1.0095, + "step": 27909 + }, + { + "epoch": 0.6202222222222222, + "grad_norm": 1.5913398265838623, + "learning_rate": 7.597243831962658e-05, + "loss": 2.2015, + "step": 27910 + }, + { + "epoch": 0.6202444444444445, + "grad_norm": 1.7465524673461914, + "learning_rate": 7.596799288730829e-05, + "loss": 2.2779, + "step": 27911 + }, + { + "epoch": 0.6202666666666666, + "grad_norm": 1.722979187965393, + "learning_rate": 7.596354745499001e-05, + "loss": 2.114, + "step": 27912 + }, + { + "epoch": 0.6202888888888889, + "grad_norm": 1.6596341133117676, + "learning_rate": 7.595910202267172e-05, + "loss": 2.0469, + "step": 27913 + }, + { + "epoch": 0.6203111111111111, + "grad_norm": 1.748468041419983, + "learning_rate": 7.595465659035342e-05, + "loss": 1.9282, + "step": 27914 + }, + { + "epoch": 0.6203333333333333, + "grad_norm": 1.206313967704773, + "learning_rate": 7.595021115803512e-05, + "loss": 1.0193, + "step": 27915 + }, + { + "epoch": 0.6203555555555555, + "grad_norm": 2.0730957984924316, + "learning_rate": 7.594576572571683e-05, + "loss": 1.5116, + "step": 27916 + }, + { + "epoch": 0.6203777777777778, + "grad_norm": 1.7398715019226074, + "learning_rate": 7.594132029339853e-05, + "loss": 1.719, + "step": 27917 + }, + { + "epoch": 0.6204, + "grad_norm": 1.5374395847320557, + "learning_rate": 7.593687486108024e-05, + "loss": 1.5994, + "step": 27918 + }, + { + "epoch": 0.6204222222222222, + "grad_norm": 1.6957192420959473, + "learning_rate": 7.593242942876195e-05, + "loss": 2.3119, + "step": 27919 + }, + { + "epoch": 0.6204444444444445, + "grad_norm": 1.5393242835998535, + "learning_rate": 7.592798399644365e-05, + "loss": 2.1632, + "step": 27920 + }, + { + "epoch": 0.6204666666666667, + "grad_norm": 1.851560354232788, + "learning_rate": 7.592353856412537e-05, + "loss": 1.865, + "step": 27921 + }, + { + "epoch": 0.6204888888888889, + "grad_norm": 1.640065312385559, + "learning_rate": 7.591909313180708e-05, + "loss": 1.5166, + "step": 27922 + }, + { + "epoch": 0.6205111111111111, + "grad_norm": 1.5717978477478027, + "learning_rate": 7.591464769948879e-05, + "loss": 1.854, + "step": 27923 + }, + { + "epoch": 0.6205333333333334, + "grad_norm": 1.7012500762939453, + "learning_rate": 7.591020226717048e-05, + "loss": 1.856, + "step": 27924 + }, + { + "epoch": 0.6205555555555555, + "grad_norm": 2.284024477005005, + "learning_rate": 7.59057568348522e-05, + "loss": 1.8496, + "step": 27925 + }, + { + "epoch": 0.6205777777777778, + "grad_norm": 1.928219199180603, + "learning_rate": 7.59013114025339e-05, + "loss": 1.7534, + "step": 27926 + }, + { + "epoch": 0.6206, + "grad_norm": 1.737140417098999, + "learning_rate": 7.58968659702156e-05, + "loss": 1.5632, + "step": 27927 + }, + { + "epoch": 0.6206222222222222, + "grad_norm": 1.9052971601486206, + "learning_rate": 7.589242053789731e-05, + "loss": 2.1908, + "step": 27928 + }, + { + "epoch": 0.6206444444444444, + "grad_norm": 1.4619766473770142, + "learning_rate": 7.588797510557902e-05, + "loss": 1.838, + "step": 27929 + }, + { + "epoch": 0.6206666666666667, + "grad_norm": 1.62483811378479, + "learning_rate": 7.588352967326073e-05, + "loss": 1.7586, + "step": 27930 + }, + { + "epoch": 0.6206888888888888, + "grad_norm": 1.580270767211914, + "learning_rate": 7.587908424094244e-05, + "loss": 1.9294, + "step": 27931 + }, + { + "epoch": 0.6207111111111111, + "grad_norm": 1.4798740148544312, + "learning_rate": 7.587463880862415e-05, + "loss": 1.0451, + "step": 27932 + }, + { + "epoch": 0.6207333333333334, + "grad_norm": 1.5176098346710205, + "learning_rate": 7.587019337630586e-05, + "loss": 1.2598, + "step": 27933 + }, + { + "epoch": 0.6207555555555555, + "grad_norm": 1.3974021673202515, + "learning_rate": 7.586574794398755e-05, + "loss": 1.5877, + "step": 27934 + }, + { + "epoch": 0.6207777777777778, + "grad_norm": 1.6418516635894775, + "learning_rate": 7.586130251166926e-05, + "loss": 1.785, + "step": 27935 + }, + { + "epoch": 0.6208, + "grad_norm": 1.3830448389053345, + "learning_rate": 7.585685707935097e-05, + "loss": 1.246, + "step": 27936 + }, + { + "epoch": 0.6208222222222223, + "grad_norm": 1.5359796285629272, + "learning_rate": 7.585241164703267e-05, + "loss": 1.7877, + "step": 27937 + }, + { + "epoch": 0.6208444444444444, + "grad_norm": 2.0273587703704834, + "learning_rate": 7.584796621471438e-05, + "loss": 2.0674, + "step": 27938 + }, + { + "epoch": 0.6208666666666667, + "grad_norm": 1.653097152709961, + "learning_rate": 7.584352078239609e-05, + "loss": 2.0481, + "step": 27939 + }, + { + "epoch": 0.6208888888888889, + "grad_norm": 1.8791024684906006, + "learning_rate": 7.58390753500778e-05, + "loss": 1.8427, + "step": 27940 + }, + { + "epoch": 0.6209111111111111, + "grad_norm": 1.7230693101882935, + "learning_rate": 7.583462991775951e-05, + "loss": 1.6818, + "step": 27941 + }, + { + "epoch": 0.6209333333333333, + "grad_norm": 1.7749496698379517, + "learning_rate": 7.583018448544122e-05, + "loss": 1.9271, + "step": 27942 + }, + { + "epoch": 0.6209555555555556, + "grad_norm": 1.8635108470916748, + "learning_rate": 7.582573905312293e-05, + "loss": 1.7799, + "step": 27943 + }, + { + "epoch": 0.6209777777777777, + "grad_norm": 2.007655620574951, + "learning_rate": 7.582129362080462e-05, + "loss": 1.8257, + "step": 27944 + }, + { + "epoch": 0.621, + "grad_norm": 1.7758183479309082, + "learning_rate": 7.581684818848633e-05, + "loss": 2.0091, + "step": 27945 + }, + { + "epoch": 0.6210222222222223, + "grad_norm": 2.0555312633514404, + "learning_rate": 7.581240275616804e-05, + "loss": 2.1853, + "step": 27946 + }, + { + "epoch": 0.6210444444444444, + "grad_norm": 1.8315494060516357, + "learning_rate": 7.580795732384974e-05, + "loss": 1.8743, + "step": 27947 + }, + { + "epoch": 0.6210666666666667, + "grad_norm": 1.6850496530532837, + "learning_rate": 7.580351189153145e-05, + "loss": 1.6656, + "step": 27948 + }, + { + "epoch": 0.6210888888888889, + "grad_norm": 1.6388370990753174, + "learning_rate": 7.579906645921317e-05, + "loss": 1.2549, + "step": 27949 + }, + { + "epoch": 0.6211111111111111, + "grad_norm": 1.9473472833633423, + "learning_rate": 7.579462102689488e-05, + "loss": 1.1684, + "step": 27950 + }, + { + "epoch": 0.6211333333333333, + "grad_norm": 1.6398197412490845, + "learning_rate": 7.579017559457658e-05, + "loss": 2.7279, + "step": 27951 + }, + { + "epoch": 0.6211555555555556, + "grad_norm": 1.6046913862228394, + "learning_rate": 7.578573016225829e-05, + "loss": 2.8082, + "step": 27952 + }, + { + "epoch": 0.6211777777777778, + "grad_norm": 1.714526891708374, + "learning_rate": 7.578128472994e-05, + "loss": 3.4, + "step": 27953 + }, + { + "epoch": 0.6212, + "grad_norm": 1.4914047718048096, + "learning_rate": 7.577683929762169e-05, + "loss": 1.0971, + "step": 27954 + }, + { + "epoch": 0.6212222222222222, + "grad_norm": 1.3853058815002441, + "learning_rate": 7.57723938653034e-05, + "loss": 1.858, + "step": 27955 + }, + { + "epoch": 0.6212444444444445, + "grad_norm": 1.5049077272415161, + "learning_rate": 7.576794843298511e-05, + "loss": 2.2232, + "step": 27956 + }, + { + "epoch": 0.6212666666666666, + "grad_norm": 1.389599323272705, + "learning_rate": 7.576350300066681e-05, + "loss": 2.0723, + "step": 27957 + }, + { + "epoch": 0.6212888888888889, + "grad_norm": 1.340166449546814, + "learning_rate": 7.575905756834853e-05, + "loss": 1.3373, + "step": 27958 + }, + { + "epoch": 0.6213111111111111, + "grad_norm": 1.8471781015396118, + "learning_rate": 7.575461213603024e-05, + "loss": 2.4835, + "step": 27959 + }, + { + "epoch": 0.6213333333333333, + "grad_norm": 1.6483877897262573, + "learning_rate": 7.575016670371195e-05, + "loss": 2.3488, + "step": 27960 + }, + { + "epoch": 0.6213555555555555, + "grad_norm": 1.5012586116790771, + "learning_rate": 7.574572127139365e-05, + "loss": 1.88, + "step": 27961 + }, + { + "epoch": 0.6213777777777778, + "grad_norm": 1.6846944093704224, + "learning_rate": 7.574127583907535e-05, + "loss": 1.9666, + "step": 27962 + }, + { + "epoch": 0.6214, + "grad_norm": 1.699049711227417, + "learning_rate": 7.573683040675706e-05, + "loss": 2.4547, + "step": 27963 + }, + { + "epoch": 0.6214222222222222, + "grad_norm": 1.53487229347229, + "learning_rate": 7.573238497443876e-05, + "loss": 1.9296, + "step": 27964 + }, + { + "epoch": 0.6214444444444445, + "grad_norm": 1.4856736660003662, + "learning_rate": 7.572793954212047e-05, + "loss": 1.4113, + "step": 27965 + }, + { + "epoch": 0.6214666666666666, + "grad_norm": 1.408342719078064, + "learning_rate": 7.572349410980218e-05, + "loss": 1.6205, + "step": 27966 + }, + { + "epoch": 0.6214888888888889, + "grad_norm": 1.648368239402771, + "learning_rate": 7.571904867748389e-05, + "loss": 2.2225, + "step": 27967 + }, + { + "epoch": 0.6215111111111111, + "grad_norm": 1.4867284297943115, + "learning_rate": 7.57146032451656e-05, + "loss": 2.0689, + "step": 27968 + }, + { + "epoch": 0.6215333333333334, + "grad_norm": 1.564137578010559, + "learning_rate": 7.571015781284731e-05, + "loss": 2.3272, + "step": 27969 + }, + { + "epoch": 0.6215555555555555, + "grad_norm": 1.6785781383514404, + "learning_rate": 7.570571238052902e-05, + "loss": 1.8413, + "step": 27970 + }, + { + "epoch": 0.6215777777777778, + "grad_norm": 1.7516751289367676, + "learning_rate": 7.570126694821071e-05, + "loss": 1.9945, + "step": 27971 + }, + { + "epoch": 0.6216, + "grad_norm": 1.6698040962219238, + "learning_rate": 7.569682151589242e-05, + "loss": 2.0592, + "step": 27972 + }, + { + "epoch": 0.6216222222222222, + "grad_norm": 1.8508039712905884, + "learning_rate": 7.569237608357413e-05, + "loss": 1.9574, + "step": 27973 + }, + { + "epoch": 0.6216444444444444, + "grad_norm": 1.6818801164627075, + "learning_rate": 7.568793065125583e-05, + "loss": 1.4262, + "step": 27974 + }, + { + "epoch": 0.6216666666666667, + "grad_norm": 1.641690969467163, + "learning_rate": 7.568348521893754e-05, + "loss": 1.9255, + "step": 27975 + }, + { + "epoch": 0.6216888888888888, + "grad_norm": 1.6074247360229492, + "learning_rate": 7.567903978661925e-05, + "loss": 2.1551, + "step": 27976 + }, + { + "epoch": 0.6217111111111111, + "grad_norm": 1.5521165132522583, + "learning_rate": 7.567459435430096e-05, + "loss": 1.7597, + "step": 27977 + }, + { + "epoch": 0.6217333333333334, + "grad_norm": 1.5441497564315796, + "learning_rate": 7.567014892198267e-05, + "loss": 1.7756, + "step": 27978 + }, + { + "epoch": 0.6217555555555555, + "grad_norm": 1.7832576036453247, + "learning_rate": 7.566570348966438e-05, + "loss": 1.7453, + "step": 27979 + }, + { + "epoch": 0.6217777777777778, + "grad_norm": 1.5918539762496948, + "learning_rate": 7.566125805734609e-05, + "loss": 1.6821, + "step": 27980 + }, + { + "epoch": 0.6218, + "grad_norm": 1.6350274085998535, + "learning_rate": 7.565681262502778e-05, + "loss": 2.1195, + "step": 27981 + }, + { + "epoch": 0.6218222222222223, + "grad_norm": 1.7357590198516846, + "learning_rate": 7.565236719270949e-05, + "loss": 1.8812, + "step": 27982 + }, + { + "epoch": 0.6218444444444444, + "grad_norm": 1.4452167749404907, + "learning_rate": 7.56479217603912e-05, + "loss": 0.903, + "step": 27983 + }, + { + "epoch": 0.6218666666666667, + "grad_norm": 1.1117844581604004, + "learning_rate": 7.56434763280729e-05, + "loss": 0.8629, + "step": 27984 + }, + { + "epoch": 0.6218888888888889, + "grad_norm": 1.4665799140930176, + "learning_rate": 7.563903089575461e-05, + "loss": 1.6197, + "step": 27985 + }, + { + "epoch": 0.6219111111111111, + "grad_norm": 1.592340111732483, + "learning_rate": 7.563458546343633e-05, + "loss": 1.5545, + "step": 27986 + }, + { + "epoch": 0.6219333333333333, + "grad_norm": 1.722834587097168, + "learning_rate": 7.563014003111804e-05, + "loss": 1.8558, + "step": 27987 + }, + { + "epoch": 0.6219555555555556, + "grad_norm": 2.186478614807129, + "learning_rate": 7.562569459879974e-05, + "loss": 1.8553, + "step": 27988 + }, + { + "epoch": 0.6219777777777777, + "grad_norm": 1.7810604572296143, + "learning_rate": 7.562124916648145e-05, + "loss": 1.935, + "step": 27989 + }, + { + "epoch": 0.622, + "grad_norm": 1.6212259531021118, + "learning_rate": 7.561680373416316e-05, + "loss": 1.8521, + "step": 27990 + }, + { + "epoch": 0.6220222222222223, + "grad_norm": 2.047581672668457, + "learning_rate": 7.561235830184485e-05, + "loss": 2.0427, + "step": 27991 + }, + { + "epoch": 0.6220444444444444, + "grad_norm": 1.82438063621521, + "learning_rate": 7.560791286952656e-05, + "loss": 1.4275, + "step": 27992 + }, + { + "epoch": 0.6220666666666667, + "grad_norm": 1.806922197341919, + "learning_rate": 7.560346743720827e-05, + "loss": 1.6137, + "step": 27993 + }, + { + "epoch": 0.6220888888888889, + "grad_norm": 1.7153993844985962, + "learning_rate": 7.559902200488997e-05, + "loss": 2.0712, + "step": 27994 + }, + { + "epoch": 0.6221111111111111, + "grad_norm": 1.5733906030654907, + "learning_rate": 7.559457657257169e-05, + "loss": 1.3682, + "step": 27995 + }, + { + "epoch": 0.6221333333333333, + "grad_norm": 2.5927658081054688, + "learning_rate": 7.55901311402534e-05, + "loss": 2.0729, + "step": 27996 + }, + { + "epoch": 0.6221555555555556, + "grad_norm": 1.656896948814392, + "learning_rate": 7.558568570793511e-05, + "loss": 1.3535, + "step": 27997 + }, + { + "epoch": 0.6221777777777778, + "grad_norm": 1.611059308052063, + "learning_rate": 7.55812402756168e-05, + "loss": 1.6851, + "step": 27998 + }, + { + "epoch": 0.6222, + "grad_norm": 1.9701584577560425, + "learning_rate": 7.557679484329852e-05, + "loss": 2.047, + "step": 27999 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 1.6851133108139038, + "learning_rate": 7.557234941098023e-05, + "loss": 1.1879, + "step": 28000 + }, + { + "epoch": 0.6222444444444445, + "grad_norm": 1.6479599475860596, + "learning_rate": 7.556790397866192e-05, + "loss": 2.4432, + "step": 28001 + }, + { + "epoch": 0.6222666666666666, + "grad_norm": 1.5716572999954224, + "learning_rate": 7.556345854634363e-05, + "loss": 2.1938, + "step": 28002 + }, + { + "epoch": 0.6222888888888889, + "grad_norm": 2.1492650508880615, + "learning_rate": 7.555901311402534e-05, + "loss": 2.1888, + "step": 28003 + }, + { + "epoch": 0.6223111111111111, + "grad_norm": 1.7056307792663574, + "learning_rate": 7.555456768170705e-05, + "loss": 2.339, + "step": 28004 + }, + { + "epoch": 0.6223333333333333, + "grad_norm": 1.5766496658325195, + "learning_rate": 7.555012224938876e-05, + "loss": 2.9654, + "step": 28005 + }, + { + "epoch": 0.6223555555555556, + "grad_norm": 1.76160728931427, + "learning_rate": 7.554567681707047e-05, + "loss": 2.2328, + "step": 28006 + }, + { + "epoch": 0.6223777777777778, + "grad_norm": 1.5594091415405273, + "learning_rate": 7.554123138475218e-05, + "loss": 2.0131, + "step": 28007 + }, + { + "epoch": 0.6224, + "grad_norm": 1.8020453453063965, + "learning_rate": 7.553678595243388e-05, + "loss": 1.6641, + "step": 28008 + }, + { + "epoch": 0.6224222222222222, + "grad_norm": 1.7662761211395264, + "learning_rate": 7.553234052011559e-05, + "loss": 2.189, + "step": 28009 + }, + { + "epoch": 0.6224444444444445, + "grad_norm": 1.743079423904419, + "learning_rate": 7.55278950877973e-05, + "loss": 2.2716, + "step": 28010 + }, + { + "epoch": 0.6224666666666666, + "grad_norm": 1.5527716875076294, + "learning_rate": 7.552344965547899e-05, + "loss": 1.6795, + "step": 28011 + }, + { + "epoch": 0.6224888888888889, + "grad_norm": 1.5938984155654907, + "learning_rate": 7.55190042231607e-05, + "loss": 1.5725, + "step": 28012 + }, + { + "epoch": 0.6225111111111111, + "grad_norm": 1.3984183073043823, + "learning_rate": 7.551455879084241e-05, + "loss": 1.5888, + "step": 28013 + }, + { + "epoch": 0.6225333333333334, + "grad_norm": 1.4453216791152954, + "learning_rate": 7.551011335852412e-05, + "loss": 1.8982, + "step": 28014 + }, + { + "epoch": 0.6225555555555555, + "grad_norm": 1.5529475212097168, + "learning_rate": 7.550566792620583e-05, + "loss": 2.0408, + "step": 28015 + }, + { + "epoch": 0.6225777777777778, + "grad_norm": 1.4605592489242554, + "learning_rate": 7.550122249388754e-05, + "loss": 1.8066, + "step": 28016 + }, + { + "epoch": 0.6226, + "grad_norm": 1.8840243816375732, + "learning_rate": 7.549677706156925e-05, + "loss": 2.7807, + "step": 28017 + }, + { + "epoch": 0.6226222222222222, + "grad_norm": 1.7677241563796997, + "learning_rate": 7.549233162925094e-05, + "loss": 1.994, + "step": 28018 + }, + { + "epoch": 0.6226444444444444, + "grad_norm": 1.7343677282333374, + "learning_rate": 7.548788619693265e-05, + "loss": 2.0937, + "step": 28019 + }, + { + "epoch": 0.6226666666666667, + "grad_norm": 1.4782507419586182, + "learning_rate": 7.548344076461436e-05, + "loss": 1.6051, + "step": 28020 + }, + { + "epoch": 0.6226888888888888, + "grad_norm": 1.741336703300476, + "learning_rate": 7.547899533229606e-05, + "loss": 2.2404, + "step": 28021 + }, + { + "epoch": 0.6227111111111111, + "grad_norm": 1.725960612297058, + "learning_rate": 7.547454989997777e-05, + "loss": 1.658, + "step": 28022 + }, + { + "epoch": 0.6227333333333334, + "grad_norm": 1.852859377861023, + "learning_rate": 7.547010446765949e-05, + "loss": 2.0704, + "step": 28023 + }, + { + "epoch": 0.6227555555555555, + "grad_norm": 1.7915359735488892, + "learning_rate": 7.546565903534119e-05, + "loss": 2.1554, + "step": 28024 + }, + { + "epoch": 0.6227777777777778, + "grad_norm": 1.321905255317688, + "learning_rate": 7.54612136030229e-05, + "loss": 1.337, + "step": 28025 + }, + { + "epoch": 0.6228, + "grad_norm": 1.7915064096450806, + "learning_rate": 7.545676817070461e-05, + "loss": 1.6202, + "step": 28026 + }, + { + "epoch": 0.6228222222222223, + "grad_norm": 1.7633399963378906, + "learning_rate": 7.545232273838632e-05, + "loss": 1.8875, + "step": 28027 + }, + { + "epoch": 0.6228444444444444, + "grad_norm": 1.0724031925201416, + "learning_rate": 7.544787730606801e-05, + "loss": 0.8833, + "step": 28028 + }, + { + "epoch": 0.6228666666666667, + "grad_norm": 1.6533682346343994, + "learning_rate": 7.544343187374972e-05, + "loss": 1.6894, + "step": 28029 + }, + { + "epoch": 0.6228888888888889, + "grad_norm": 1.649261236190796, + "learning_rate": 7.543898644143143e-05, + "loss": 2.0422, + "step": 28030 + }, + { + "epoch": 0.6229111111111111, + "grad_norm": 1.9536653757095337, + "learning_rate": 7.543454100911313e-05, + "loss": 2.3201, + "step": 28031 + }, + { + "epoch": 0.6229333333333333, + "grad_norm": 1.7542853355407715, + "learning_rate": 7.543009557679485e-05, + "loss": 1.7455, + "step": 28032 + }, + { + "epoch": 0.6229555555555556, + "grad_norm": 1.3951656818389893, + "learning_rate": 7.542565014447656e-05, + "loss": 1.2113, + "step": 28033 + }, + { + "epoch": 0.6229777777777777, + "grad_norm": 1.873515009880066, + "learning_rate": 7.542120471215827e-05, + "loss": 1.6083, + "step": 28034 + }, + { + "epoch": 0.623, + "grad_norm": 1.6619421243667603, + "learning_rate": 7.541675927983997e-05, + "loss": 1.9856, + "step": 28035 + }, + { + "epoch": 0.6230222222222223, + "grad_norm": 1.208182692527771, + "learning_rate": 7.541231384752168e-05, + "loss": 0.8777, + "step": 28036 + }, + { + "epoch": 0.6230444444444444, + "grad_norm": 1.8139747381210327, + "learning_rate": 7.540786841520339e-05, + "loss": 1.6417, + "step": 28037 + }, + { + "epoch": 0.6230666666666667, + "grad_norm": 1.5409775972366333, + "learning_rate": 7.540342298288508e-05, + "loss": 1.7358, + "step": 28038 + }, + { + "epoch": 0.6230888888888889, + "grad_norm": 1.7688947916030884, + "learning_rate": 7.539897755056679e-05, + "loss": 2.0103, + "step": 28039 + }, + { + "epoch": 0.6231111111111111, + "grad_norm": 1.628895878791809, + "learning_rate": 7.53945321182485e-05, + "loss": 1.6595, + "step": 28040 + }, + { + "epoch": 0.6231333333333333, + "grad_norm": 1.785688877105713, + "learning_rate": 7.539008668593021e-05, + "loss": 1.721, + "step": 28041 + }, + { + "epoch": 0.6231555555555556, + "grad_norm": 1.9412195682525635, + "learning_rate": 7.538564125361192e-05, + "loss": 1.8939, + "step": 28042 + }, + { + "epoch": 0.6231777777777778, + "grad_norm": 2.1074423789978027, + "learning_rate": 7.538119582129363e-05, + "loss": 1.5976, + "step": 28043 + }, + { + "epoch": 0.6232, + "grad_norm": 2.227391242980957, + "learning_rate": 7.537675038897534e-05, + "loss": 1.9225, + "step": 28044 + }, + { + "epoch": 0.6232222222222222, + "grad_norm": 1.847145915031433, + "learning_rate": 7.537230495665704e-05, + "loss": 1.5274, + "step": 28045 + }, + { + "epoch": 0.6232444444444445, + "grad_norm": 2.542794942855835, + "learning_rate": 7.536785952433875e-05, + "loss": 2.4347, + "step": 28046 + }, + { + "epoch": 0.6232666666666666, + "grad_norm": 1.9294025897979736, + "learning_rate": 7.536341409202046e-05, + "loss": 1.9662, + "step": 28047 + }, + { + "epoch": 0.6232888888888889, + "grad_norm": 1.1842116117477417, + "learning_rate": 7.535896865970215e-05, + "loss": 0.8846, + "step": 28048 + }, + { + "epoch": 0.6233111111111111, + "grad_norm": 1.9983824491500854, + "learning_rate": 7.535452322738386e-05, + "loss": 1.8756, + "step": 28049 + }, + { + "epoch": 0.6233333333333333, + "grad_norm": 1.9489094018936157, + "learning_rate": 7.535007779506557e-05, + "loss": 1.6686, + "step": 28050 + }, + { + "epoch": 0.6233555555555556, + "grad_norm": 1.8719598054885864, + "learning_rate": 7.534563236274728e-05, + "loss": 2.0377, + "step": 28051 + }, + { + "epoch": 0.6233777777777778, + "grad_norm": 1.7876286506652832, + "learning_rate": 7.534118693042899e-05, + "loss": 2.6911, + "step": 28052 + }, + { + "epoch": 0.6234, + "grad_norm": 1.4274696111679077, + "learning_rate": 7.53367414981107e-05, + "loss": 1.9744, + "step": 28053 + }, + { + "epoch": 0.6234222222222222, + "grad_norm": 1.7839244604110718, + "learning_rate": 7.533229606579241e-05, + "loss": 2.4189, + "step": 28054 + }, + { + "epoch": 0.6234444444444445, + "grad_norm": 1.5739293098449707, + "learning_rate": 7.53278506334741e-05, + "loss": 1.8991, + "step": 28055 + }, + { + "epoch": 0.6234666666666666, + "grad_norm": 2.45786190032959, + "learning_rate": 7.532340520115582e-05, + "loss": 2.428, + "step": 28056 + }, + { + "epoch": 0.6234888888888889, + "grad_norm": 2.522510290145874, + "learning_rate": 7.531895976883752e-05, + "loss": 2.4247, + "step": 28057 + }, + { + "epoch": 0.6235111111111111, + "grad_norm": 1.6587966680526733, + "learning_rate": 7.531451433651922e-05, + "loss": 2.0945, + "step": 28058 + }, + { + "epoch": 0.6235333333333334, + "grad_norm": 1.7723037004470825, + "learning_rate": 7.531006890420093e-05, + "loss": 2.1222, + "step": 28059 + }, + { + "epoch": 0.6235555555555555, + "grad_norm": 1.5230958461761475, + "learning_rate": 7.530562347188265e-05, + "loss": 2.4104, + "step": 28060 + }, + { + "epoch": 0.6235777777777778, + "grad_norm": 1.460243582725525, + "learning_rate": 7.530117803956435e-05, + "loss": 0.8193, + "step": 28061 + }, + { + "epoch": 0.6236, + "grad_norm": 1.7510052919387817, + "learning_rate": 7.529673260724606e-05, + "loss": 1.968, + "step": 28062 + }, + { + "epoch": 0.6236222222222222, + "grad_norm": 1.5463873147964478, + "learning_rate": 7.529228717492777e-05, + "loss": 2.3151, + "step": 28063 + }, + { + "epoch": 0.6236444444444444, + "grad_norm": 1.510756492614746, + "learning_rate": 7.528784174260948e-05, + "loss": 1.5273, + "step": 28064 + }, + { + "epoch": 0.6236666666666667, + "grad_norm": 1.7299911975860596, + "learning_rate": 7.528339631029117e-05, + "loss": 2.1592, + "step": 28065 + }, + { + "epoch": 0.6236888888888888, + "grad_norm": 1.775102972984314, + "learning_rate": 7.527895087797288e-05, + "loss": 2.5766, + "step": 28066 + }, + { + "epoch": 0.6237111111111111, + "grad_norm": 1.8978391885757446, + "learning_rate": 7.52745054456546e-05, + "loss": 2.8868, + "step": 28067 + }, + { + "epoch": 0.6237333333333334, + "grad_norm": 1.4233916997909546, + "learning_rate": 7.527006001333629e-05, + "loss": 1.7611, + "step": 28068 + }, + { + "epoch": 0.6237555555555555, + "grad_norm": 1.5011509656906128, + "learning_rate": 7.526561458101801e-05, + "loss": 2.0248, + "step": 28069 + }, + { + "epoch": 0.6237777777777778, + "grad_norm": 1.5596365928649902, + "learning_rate": 7.526116914869972e-05, + "loss": 2.1135, + "step": 28070 + }, + { + "epoch": 0.6238, + "grad_norm": 0.19666370749473572, + "learning_rate": 7.525672371638142e-05, + "loss": 0.0258, + "step": 28071 + }, + { + "epoch": 0.6238222222222222, + "grad_norm": 1.6305148601531982, + "learning_rate": 7.525227828406313e-05, + "loss": 1.9894, + "step": 28072 + }, + { + "epoch": 0.6238444444444444, + "grad_norm": 1.5677214860916138, + "learning_rate": 7.524783285174484e-05, + "loss": 1.9228, + "step": 28073 + }, + { + "epoch": 0.6238666666666667, + "grad_norm": 1.7453768253326416, + "learning_rate": 7.524338741942655e-05, + "loss": 2.0331, + "step": 28074 + }, + { + "epoch": 0.6238888888888889, + "grad_norm": 1.7649633884429932, + "learning_rate": 7.523894198710824e-05, + "loss": 2.5632, + "step": 28075 + }, + { + "epoch": 0.6239111111111111, + "grad_norm": 1.564621925354004, + "learning_rate": 7.523449655478995e-05, + "loss": 1.9003, + "step": 28076 + }, + { + "epoch": 0.6239333333333333, + "grad_norm": 1.7973852157592773, + "learning_rate": 7.523005112247166e-05, + "loss": 2.2091, + "step": 28077 + }, + { + "epoch": 0.6239555555555556, + "grad_norm": 1.7107487916946411, + "learning_rate": 7.522560569015337e-05, + "loss": 1.9508, + "step": 28078 + }, + { + "epoch": 0.6239777777777777, + "grad_norm": 1.6050406694412231, + "learning_rate": 7.522116025783508e-05, + "loss": 1.8418, + "step": 28079 + }, + { + "epoch": 0.624, + "grad_norm": 1.5532035827636719, + "learning_rate": 7.521671482551679e-05, + "loss": 1.7462, + "step": 28080 + }, + { + "epoch": 0.6240222222222223, + "grad_norm": 1.7981599569320679, + "learning_rate": 7.521226939319849e-05, + "loss": 2.0927, + "step": 28081 + }, + { + "epoch": 0.6240444444444444, + "grad_norm": 2.0693376064300537, + "learning_rate": 7.52078239608802e-05, + "loss": 2.339, + "step": 28082 + }, + { + "epoch": 0.6240666666666667, + "grad_norm": 2.317495822906494, + "learning_rate": 7.520337852856191e-05, + "loss": 2.6896, + "step": 28083 + }, + { + "epoch": 0.6240888888888889, + "grad_norm": 1.6972509622573853, + "learning_rate": 7.519893309624362e-05, + "loss": 2.0378, + "step": 28084 + }, + { + "epoch": 0.6241111111111111, + "grad_norm": 1.830173373222351, + "learning_rate": 7.519448766392531e-05, + "loss": 2.1773, + "step": 28085 + }, + { + "epoch": 0.6241333333333333, + "grad_norm": 2.4986438751220703, + "learning_rate": 7.519004223160702e-05, + "loss": 1.8763, + "step": 28086 + }, + { + "epoch": 0.6241555555555556, + "grad_norm": 1.9697855710983276, + "learning_rate": 7.518559679928873e-05, + "loss": 1.5322, + "step": 28087 + }, + { + "epoch": 0.6241777777777778, + "grad_norm": 1.905541181564331, + "learning_rate": 7.518115136697044e-05, + "loss": 1.6298, + "step": 28088 + }, + { + "epoch": 0.6242, + "grad_norm": 1.5465525388717651, + "learning_rate": 7.517670593465215e-05, + "loss": 1.4808, + "step": 28089 + }, + { + "epoch": 0.6242222222222222, + "grad_norm": 1.9321972131729126, + "learning_rate": 7.517226050233386e-05, + "loss": 1.6778, + "step": 28090 + }, + { + "epoch": 0.6242444444444445, + "grad_norm": 1.919901728630066, + "learning_rate": 7.516781507001557e-05, + "loss": 2.01, + "step": 28091 + }, + { + "epoch": 0.6242666666666666, + "grad_norm": 1.8274459838867188, + "learning_rate": 7.516336963769727e-05, + "loss": 2.2137, + "step": 28092 + }, + { + "epoch": 0.6242888888888889, + "grad_norm": 1.757928729057312, + "learning_rate": 7.515892420537898e-05, + "loss": 1.6233, + "step": 28093 + }, + { + "epoch": 0.6243111111111111, + "grad_norm": 1.7088501453399658, + "learning_rate": 7.515447877306069e-05, + "loss": 1.7571, + "step": 28094 + }, + { + "epoch": 0.6243333333333333, + "grad_norm": 2.1138648986816406, + "learning_rate": 7.515003334074238e-05, + "loss": 2.3092, + "step": 28095 + }, + { + "epoch": 0.6243555555555556, + "grad_norm": 1.675551414489746, + "learning_rate": 7.514558790842409e-05, + "loss": 1.7107, + "step": 28096 + }, + { + "epoch": 0.6243777777777778, + "grad_norm": 1.9008680582046509, + "learning_rate": 7.514114247610581e-05, + "loss": 2.0442, + "step": 28097 + }, + { + "epoch": 0.6244, + "grad_norm": 2.154712438583374, + "learning_rate": 7.513669704378751e-05, + "loss": 1.9904, + "step": 28098 + }, + { + "epoch": 0.6244222222222222, + "grad_norm": 1.4223718643188477, + "learning_rate": 7.513225161146922e-05, + "loss": 0.9587, + "step": 28099 + }, + { + "epoch": 0.6244444444444445, + "grad_norm": 2.5552480220794678, + "learning_rate": 7.512780617915093e-05, + "loss": 1.6455, + "step": 28100 + }, + { + "epoch": 0.6244666666666666, + "grad_norm": 1.4165558815002441, + "learning_rate": 7.512336074683264e-05, + "loss": 2.0838, + "step": 28101 + }, + { + "epoch": 0.6244888888888889, + "grad_norm": 1.561410665512085, + "learning_rate": 7.511891531451434e-05, + "loss": 2.2894, + "step": 28102 + }, + { + "epoch": 0.6245111111111111, + "grad_norm": 1.31568443775177, + "learning_rate": 7.511446988219605e-05, + "loss": 2.087, + "step": 28103 + }, + { + "epoch": 0.6245333333333334, + "grad_norm": 1.6914708614349365, + "learning_rate": 7.511002444987775e-05, + "loss": 1.9268, + "step": 28104 + }, + { + "epoch": 0.6245555555555555, + "grad_norm": 1.6146295070648193, + "learning_rate": 7.510557901755945e-05, + "loss": 2.0463, + "step": 28105 + }, + { + "epoch": 0.6245777777777778, + "grad_norm": 1.5655505657196045, + "learning_rate": 7.510113358524117e-05, + "loss": 2.1122, + "step": 28106 + }, + { + "epoch": 0.6246, + "grad_norm": 1.5095285177230835, + "learning_rate": 7.509668815292288e-05, + "loss": 2.0841, + "step": 28107 + }, + { + "epoch": 0.6246222222222222, + "grad_norm": 1.6935125589370728, + "learning_rate": 7.509224272060458e-05, + "loss": 1.5109, + "step": 28108 + }, + { + "epoch": 0.6246444444444444, + "grad_norm": 1.5695096254348755, + "learning_rate": 7.508779728828629e-05, + "loss": 2.0201, + "step": 28109 + }, + { + "epoch": 0.6246666666666667, + "grad_norm": 1.5828744173049927, + "learning_rate": 7.5083351855968e-05, + "loss": 2.3864, + "step": 28110 + }, + { + "epoch": 0.6246888888888888, + "grad_norm": 1.9256597757339478, + "learning_rate": 7.507890642364971e-05, + "loss": 1.3124, + "step": 28111 + }, + { + "epoch": 0.6247111111111111, + "grad_norm": 1.8132530450820923, + "learning_rate": 7.50744609913314e-05, + "loss": 1.9949, + "step": 28112 + }, + { + "epoch": 0.6247333333333334, + "grad_norm": 1.9781066179275513, + "learning_rate": 7.507001555901311e-05, + "loss": 2.2829, + "step": 28113 + }, + { + "epoch": 0.6247555555555555, + "grad_norm": 1.0637400150299072, + "learning_rate": 7.506557012669482e-05, + "loss": 1.1418, + "step": 28114 + }, + { + "epoch": 0.6247777777777778, + "grad_norm": 1.4971493482589722, + "learning_rate": 7.506112469437653e-05, + "loss": 1.9301, + "step": 28115 + }, + { + "epoch": 0.6248, + "grad_norm": 1.9875372648239136, + "learning_rate": 7.505667926205824e-05, + "loss": 1.9572, + "step": 28116 + }, + { + "epoch": 0.6248222222222222, + "grad_norm": 1.5765823125839233, + "learning_rate": 7.505223382973995e-05, + "loss": 2.1686, + "step": 28117 + }, + { + "epoch": 0.6248444444444444, + "grad_norm": 1.5952030420303345, + "learning_rate": 7.504778839742165e-05, + "loss": 1.7775, + "step": 28118 + }, + { + "epoch": 0.6248666666666667, + "grad_norm": 1.9303405284881592, + "learning_rate": 7.504334296510336e-05, + "loss": 2.2577, + "step": 28119 + }, + { + "epoch": 0.6248888888888889, + "grad_norm": 1.6546903848648071, + "learning_rate": 7.503889753278507e-05, + "loss": 1.9196, + "step": 28120 + }, + { + "epoch": 0.6249111111111111, + "grad_norm": 1.4748766422271729, + "learning_rate": 7.503445210046678e-05, + "loss": 1.9852, + "step": 28121 + }, + { + "epoch": 0.6249333333333333, + "grad_norm": 1.8407598733901978, + "learning_rate": 7.503000666814847e-05, + "loss": 1.7293, + "step": 28122 + }, + { + "epoch": 0.6249555555555556, + "grad_norm": 1.8829072713851929, + "learning_rate": 7.502556123583018e-05, + "loss": 2.2661, + "step": 28123 + }, + { + "epoch": 0.6249777777777777, + "grad_norm": 1.5948944091796875, + "learning_rate": 7.502111580351189e-05, + "loss": 1.7127, + "step": 28124 + }, + { + "epoch": 0.625, + "grad_norm": 1.6878756284713745, + "learning_rate": 7.50166703711936e-05, + "loss": 2.1244, + "step": 28125 + }, + { + "epoch": 0.6250222222222223, + "grad_norm": 1.5194244384765625, + "learning_rate": 7.501222493887531e-05, + "loss": 1.5466, + "step": 28126 + }, + { + "epoch": 0.6250444444444444, + "grad_norm": 1.6358795166015625, + "learning_rate": 7.500777950655702e-05, + "loss": 1.8983, + "step": 28127 + }, + { + "epoch": 0.6250666666666667, + "grad_norm": 1.9146581888198853, + "learning_rate": 7.500333407423872e-05, + "loss": 2.1941, + "step": 28128 + }, + { + "epoch": 0.6250888888888889, + "grad_norm": 1.477559208869934, + "learning_rate": 7.499888864192043e-05, + "loss": 1.1678, + "step": 28129 + }, + { + "epoch": 0.6251111111111111, + "grad_norm": 1.3219830989837646, + "learning_rate": 7.499444320960214e-05, + "loss": 1.3111, + "step": 28130 + }, + { + "epoch": 0.6251333333333333, + "grad_norm": 1.3379400968551636, + "learning_rate": 7.498999777728385e-05, + "loss": 1.2058, + "step": 28131 + }, + { + "epoch": 0.6251555555555556, + "grad_norm": 1.565895676612854, + "learning_rate": 7.498555234496554e-05, + "loss": 2.0447, + "step": 28132 + }, + { + "epoch": 0.6251777777777778, + "grad_norm": 1.7458685636520386, + "learning_rate": 7.498110691264725e-05, + "loss": 1.6086, + "step": 28133 + }, + { + "epoch": 0.6252, + "grad_norm": 1.5337018966674805, + "learning_rate": 7.497666148032898e-05, + "loss": 1.7856, + "step": 28134 + }, + { + "epoch": 0.6252222222222222, + "grad_norm": 2.4415087699890137, + "learning_rate": 7.497221604801067e-05, + "loss": 2.4248, + "step": 28135 + }, + { + "epoch": 0.6252444444444445, + "grad_norm": 1.7598092555999756, + "learning_rate": 7.496777061569238e-05, + "loss": 2.0031, + "step": 28136 + }, + { + "epoch": 0.6252666666666666, + "grad_norm": 1.657050609588623, + "learning_rate": 7.496332518337409e-05, + "loss": 1.6827, + "step": 28137 + }, + { + "epoch": 0.6252888888888889, + "grad_norm": 1.4472593069076538, + "learning_rate": 7.495887975105579e-05, + "loss": 1.5198, + "step": 28138 + }, + { + "epoch": 0.6253111111111112, + "grad_norm": 0.7550368309020996, + "learning_rate": 7.49544343187375e-05, + "loss": 0.033, + "step": 28139 + }, + { + "epoch": 0.6253333333333333, + "grad_norm": 1.9193103313446045, + "learning_rate": 7.49499888864192e-05, + "loss": 1.9755, + "step": 28140 + }, + { + "epoch": 0.6253555555555556, + "grad_norm": 2.672001361846924, + "learning_rate": 7.494554345410092e-05, + "loss": 1.7431, + "step": 28141 + }, + { + "epoch": 0.6253777777777778, + "grad_norm": 1.5714386701583862, + "learning_rate": 7.494109802178261e-05, + "loss": 1.9451, + "step": 28142 + }, + { + "epoch": 0.6254, + "grad_norm": 1.6924715042114258, + "learning_rate": 7.493665258946434e-05, + "loss": 1.7028, + "step": 28143 + }, + { + "epoch": 0.6254222222222222, + "grad_norm": 1.9398430585861206, + "learning_rate": 7.493220715714604e-05, + "loss": 2.1162, + "step": 28144 + }, + { + "epoch": 0.6254444444444445, + "grad_norm": 1.5882272720336914, + "learning_rate": 7.492776172482774e-05, + "loss": 1.4288, + "step": 28145 + }, + { + "epoch": 0.6254666666666666, + "grad_norm": 1.8803850412368774, + "learning_rate": 7.492331629250945e-05, + "loss": 1.7391, + "step": 28146 + }, + { + "epoch": 0.6254888888888889, + "grad_norm": 1.6644755601882935, + "learning_rate": 7.491887086019116e-05, + "loss": 1.9229, + "step": 28147 + }, + { + "epoch": 0.6255111111111111, + "grad_norm": 2.1229488849639893, + "learning_rate": 7.491442542787287e-05, + "loss": 1.6049, + "step": 28148 + }, + { + "epoch": 0.6255333333333334, + "grad_norm": 1.5672968626022339, + "learning_rate": 7.490997999555457e-05, + "loss": 1.3858, + "step": 28149 + }, + { + "epoch": 0.6255555555555555, + "grad_norm": 1.2970843315124512, + "learning_rate": 7.490553456323628e-05, + "loss": 0.5406, + "step": 28150 + }, + { + "epoch": 0.6255777777777778, + "grad_norm": 1.9396559000015259, + "learning_rate": 7.490108913091799e-05, + "loss": 2.4668, + "step": 28151 + }, + { + "epoch": 0.6256, + "grad_norm": 1.4108716249465942, + "learning_rate": 7.48966436985997e-05, + "loss": 1.8329, + "step": 28152 + }, + { + "epoch": 0.6256222222222222, + "grad_norm": 1.3884369134902954, + "learning_rate": 7.48921982662814e-05, + "loss": 1.9971, + "step": 28153 + }, + { + "epoch": 0.6256444444444444, + "grad_norm": 1.5661218166351318, + "learning_rate": 7.488775283396311e-05, + "loss": 2.4888, + "step": 28154 + }, + { + "epoch": 0.6256666666666667, + "grad_norm": 1.4409959316253662, + "learning_rate": 7.488330740164481e-05, + "loss": 1.4596, + "step": 28155 + }, + { + "epoch": 0.6256888888888889, + "grad_norm": 1.6241331100463867, + "learning_rate": 7.487886196932652e-05, + "loss": 2.219, + "step": 28156 + }, + { + "epoch": 0.6257111111111111, + "grad_norm": 1.4938559532165527, + "learning_rate": 7.487441653700823e-05, + "loss": 2.295, + "step": 28157 + }, + { + "epoch": 0.6257333333333334, + "grad_norm": 1.578965187072754, + "learning_rate": 7.486997110468994e-05, + "loss": 2.276, + "step": 28158 + }, + { + "epoch": 0.6257555555555555, + "grad_norm": 1.4421511888504028, + "learning_rate": 7.486552567237163e-05, + "loss": 1.905, + "step": 28159 + }, + { + "epoch": 0.6257777777777778, + "grad_norm": 1.6099575757980347, + "learning_rate": 7.486108024005334e-05, + "loss": 1.9368, + "step": 28160 + }, + { + "epoch": 0.6258, + "grad_norm": 1.6032575368881226, + "learning_rate": 7.485663480773505e-05, + "loss": 1.7127, + "step": 28161 + }, + { + "epoch": 0.6258222222222222, + "grad_norm": 1.719819188117981, + "learning_rate": 7.485218937541676e-05, + "loss": 2.3936, + "step": 28162 + }, + { + "epoch": 0.6258444444444444, + "grad_norm": 1.682454228401184, + "learning_rate": 7.484774394309847e-05, + "loss": 2.2743, + "step": 28163 + }, + { + "epoch": 0.6258666666666667, + "grad_norm": 1.3777753114700317, + "learning_rate": 7.484329851078018e-05, + "loss": 0.9647, + "step": 28164 + }, + { + "epoch": 0.6258888888888889, + "grad_norm": 1.7213762998580933, + "learning_rate": 7.483885307846188e-05, + "loss": 2.2361, + "step": 28165 + }, + { + "epoch": 0.6259111111111111, + "grad_norm": 1.4945802688598633, + "learning_rate": 7.483440764614359e-05, + "loss": 1.7347, + "step": 28166 + }, + { + "epoch": 0.6259333333333333, + "grad_norm": 1.432998538017273, + "learning_rate": 7.48299622138253e-05, + "loss": 1.5117, + "step": 28167 + }, + { + "epoch": 0.6259555555555556, + "grad_norm": 1.8089045286178589, + "learning_rate": 7.482551678150701e-05, + "loss": 2.1483, + "step": 28168 + }, + { + "epoch": 0.6259777777777777, + "grad_norm": 1.4634894132614136, + "learning_rate": 7.48210713491887e-05, + "loss": 1.7531, + "step": 28169 + }, + { + "epoch": 0.626, + "grad_norm": 1.5178799629211426, + "learning_rate": 7.481662591687041e-05, + "loss": 1.5721, + "step": 28170 + }, + { + "epoch": 0.6260222222222223, + "grad_norm": 1.5224968194961548, + "learning_rate": 7.481218048455214e-05, + "loss": 1.9887, + "step": 28171 + }, + { + "epoch": 0.6260444444444444, + "grad_norm": 1.3402128219604492, + "learning_rate": 7.480773505223383e-05, + "loss": 1.6201, + "step": 28172 + }, + { + "epoch": 0.6260666666666667, + "grad_norm": 1.4766627550125122, + "learning_rate": 7.480328961991554e-05, + "loss": 1.7079, + "step": 28173 + }, + { + "epoch": 0.6260888888888889, + "grad_norm": 1.459479570388794, + "learning_rate": 7.479884418759725e-05, + "loss": 1.4734, + "step": 28174 + }, + { + "epoch": 0.6261111111111111, + "grad_norm": 1.8193565607070923, + "learning_rate": 7.479439875527895e-05, + "loss": 2.2156, + "step": 28175 + }, + { + "epoch": 0.6261333333333333, + "grad_norm": 1.553693413734436, + "learning_rate": 7.478995332296066e-05, + "loss": 1.5702, + "step": 28176 + }, + { + "epoch": 0.6261555555555556, + "grad_norm": 2.1361544132232666, + "learning_rate": 7.478550789064237e-05, + "loss": 1.8119, + "step": 28177 + }, + { + "epoch": 0.6261777777777778, + "grad_norm": 1.7945271730422974, + "learning_rate": 7.478106245832408e-05, + "loss": 2.1368, + "step": 28178 + }, + { + "epoch": 0.6262, + "grad_norm": 1.1177501678466797, + "learning_rate": 7.477661702600577e-05, + "loss": 0.7338, + "step": 28179 + }, + { + "epoch": 0.6262222222222222, + "grad_norm": 1.8593310117721558, + "learning_rate": 7.47721715936875e-05, + "loss": 2.0467, + "step": 28180 + }, + { + "epoch": 0.6262444444444445, + "grad_norm": 2.0093624591827393, + "learning_rate": 7.47677261613692e-05, + "loss": 1.9046, + "step": 28181 + }, + { + "epoch": 0.6262666666666666, + "grad_norm": 2.9138686656951904, + "learning_rate": 7.47632807290509e-05, + "loss": 2.1785, + "step": 28182 + }, + { + "epoch": 0.6262888888888889, + "grad_norm": 1.8873893022537231, + "learning_rate": 7.475883529673261e-05, + "loss": 1.7869, + "step": 28183 + }, + { + "epoch": 0.6263111111111112, + "grad_norm": 1.9957176446914673, + "learning_rate": 7.475438986441432e-05, + "loss": 1.9079, + "step": 28184 + }, + { + "epoch": 0.6263333333333333, + "grad_norm": 1.9087276458740234, + "learning_rate": 7.474994443209602e-05, + "loss": 2.3609, + "step": 28185 + }, + { + "epoch": 0.6263555555555556, + "grad_norm": 1.6395312547683716, + "learning_rate": 7.474549899977773e-05, + "loss": 1.5422, + "step": 28186 + }, + { + "epoch": 0.6263777777777778, + "grad_norm": 1.694113850593567, + "learning_rate": 7.474105356745944e-05, + "loss": 1.4948, + "step": 28187 + }, + { + "epoch": 0.6264, + "grad_norm": 2.090855121612549, + "learning_rate": 7.473660813514115e-05, + "loss": 2.3313, + "step": 28188 + }, + { + "epoch": 0.6264222222222222, + "grad_norm": 1.4489983320236206, + "learning_rate": 7.473216270282286e-05, + "loss": 1.4332, + "step": 28189 + }, + { + "epoch": 0.6264444444444445, + "grad_norm": 1.713574767112732, + "learning_rate": 7.472771727050457e-05, + "loss": 1.8765, + "step": 28190 + }, + { + "epoch": 0.6264666666666666, + "grad_norm": 1.9305528402328491, + "learning_rate": 7.472327183818627e-05, + "loss": 1.434, + "step": 28191 + }, + { + "epoch": 0.6264888888888889, + "grad_norm": 2.201230764389038, + "learning_rate": 7.471882640586797e-05, + "loss": 1.412, + "step": 28192 + }, + { + "epoch": 0.6265111111111111, + "grad_norm": 1.8815728425979614, + "learning_rate": 7.471438097354968e-05, + "loss": 1.5491, + "step": 28193 + }, + { + "epoch": 0.6265333333333334, + "grad_norm": 1.3930691480636597, + "learning_rate": 7.470993554123139e-05, + "loss": 1.5035, + "step": 28194 + }, + { + "epoch": 0.6265555555555555, + "grad_norm": 1.7491741180419922, + "learning_rate": 7.470549010891309e-05, + "loss": 1.6875, + "step": 28195 + }, + { + "epoch": 0.6265777777777778, + "grad_norm": 2.281614303588867, + "learning_rate": 7.47010446765948e-05, + "loss": 2.1702, + "step": 28196 + }, + { + "epoch": 0.6266, + "grad_norm": 1.478087306022644, + "learning_rate": 7.46965992442765e-05, + "loss": 1.4802, + "step": 28197 + }, + { + "epoch": 0.6266222222222222, + "grad_norm": 2.5004618167877197, + "learning_rate": 7.469215381195822e-05, + "loss": 1.9388, + "step": 28198 + }, + { + "epoch": 0.6266444444444444, + "grad_norm": 3.1494030952453613, + "learning_rate": 7.468770837963992e-05, + "loss": 2.192, + "step": 28199 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 1.9602372646331787, + "learning_rate": 7.468326294732163e-05, + "loss": 1.2015, + "step": 28200 + }, + { + "epoch": 0.6266888888888889, + "grad_norm": 1.803371548652649, + "learning_rate": 7.467881751500334e-05, + "loss": 2.1237, + "step": 28201 + }, + { + "epoch": 0.6267111111111111, + "grad_norm": 1.682560682296753, + "learning_rate": 7.467437208268504e-05, + "loss": 2.3588, + "step": 28202 + }, + { + "epoch": 0.6267333333333334, + "grad_norm": 1.5700569152832031, + "learning_rate": 7.466992665036675e-05, + "loss": 2.4099, + "step": 28203 + }, + { + "epoch": 0.6267555555555555, + "grad_norm": 1.806165099143982, + "learning_rate": 7.466548121804846e-05, + "loss": 2.2097, + "step": 28204 + }, + { + "epoch": 0.6267777777777778, + "grad_norm": 1.451766014099121, + "learning_rate": 7.466103578573017e-05, + "loss": 1.657, + "step": 28205 + }, + { + "epoch": 0.6268, + "grad_norm": 1.5212419033050537, + "learning_rate": 7.465659035341186e-05, + "loss": 2.3481, + "step": 28206 + }, + { + "epoch": 0.6268222222222222, + "grad_norm": 1.485065221786499, + "learning_rate": 7.465214492109357e-05, + "loss": 2.3113, + "step": 28207 + }, + { + "epoch": 0.6268444444444444, + "grad_norm": 1.5031648874282837, + "learning_rate": 7.46476994887753e-05, + "loss": 1.9426, + "step": 28208 + }, + { + "epoch": 0.6268666666666667, + "grad_norm": 1.8476406335830688, + "learning_rate": 7.4643254056457e-05, + "loss": 1.9221, + "step": 28209 + }, + { + "epoch": 0.6268888888888889, + "grad_norm": 1.5286202430725098, + "learning_rate": 7.46388086241387e-05, + "loss": 1.9807, + "step": 28210 + }, + { + "epoch": 0.6269111111111111, + "grad_norm": 1.6897189617156982, + "learning_rate": 7.463436319182041e-05, + "loss": 1.8853, + "step": 28211 + }, + { + "epoch": 0.6269333333333333, + "grad_norm": 1.8951705694198608, + "learning_rate": 7.462991775950211e-05, + "loss": 1.9416, + "step": 28212 + }, + { + "epoch": 0.6269555555555556, + "grad_norm": 1.5698425769805908, + "learning_rate": 7.462547232718382e-05, + "loss": 1.5634, + "step": 28213 + }, + { + "epoch": 0.6269777777777777, + "grad_norm": 1.63398277759552, + "learning_rate": 7.462102689486553e-05, + "loss": 2.0327, + "step": 28214 + }, + { + "epoch": 0.627, + "grad_norm": 1.5755960941314697, + "learning_rate": 7.461658146254724e-05, + "loss": 1.6802, + "step": 28215 + }, + { + "epoch": 0.6270222222222223, + "grad_norm": 0.6174606084823608, + "learning_rate": 7.461213603022893e-05, + "loss": 0.0305, + "step": 28216 + }, + { + "epoch": 0.6270444444444444, + "grad_norm": 1.692030429840088, + "learning_rate": 7.460769059791066e-05, + "loss": 2.2251, + "step": 28217 + }, + { + "epoch": 0.6270666666666667, + "grad_norm": 1.5224628448486328, + "learning_rate": 7.460324516559237e-05, + "loss": 1.624, + "step": 28218 + }, + { + "epoch": 0.6270888888888889, + "grad_norm": 1.9173451662063599, + "learning_rate": 7.459879973327406e-05, + "loss": 2.2157, + "step": 28219 + }, + { + "epoch": 0.6271111111111111, + "grad_norm": 1.9144426584243774, + "learning_rate": 7.459435430095577e-05, + "loss": 2.6556, + "step": 28220 + }, + { + "epoch": 0.6271333333333333, + "grad_norm": 1.8819726705551147, + "learning_rate": 7.458990886863748e-05, + "loss": 2.0855, + "step": 28221 + }, + { + "epoch": 0.6271555555555556, + "grad_norm": 2.056820869445801, + "learning_rate": 7.458546343631918e-05, + "loss": 2.3333, + "step": 28222 + }, + { + "epoch": 0.6271777777777777, + "grad_norm": 1.8170225620269775, + "learning_rate": 7.458101800400089e-05, + "loss": 1.8746, + "step": 28223 + }, + { + "epoch": 0.6272, + "grad_norm": 1.4878606796264648, + "learning_rate": 7.45765725716826e-05, + "loss": 1.4699, + "step": 28224 + }, + { + "epoch": 0.6272222222222222, + "grad_norm": 2.008486747741699, + "learning_rate": 7.457212713936431e-05, + "loss": 2.4197, + "step": 28225 + }, + { + "epoch": 0.6272444444444445, + "grad_norm": 1.7091706991195679, + "learning_rate": 7.456768170704602e-05, + "loss": 2.0768, + "step": 28226 + }, + { + "epoch": 0.6272666666666666, + "grad_norm": 1.7279984951019287, + "learning_rate": 7.456323627472773e-05, + "loss": 1.8261, + "step": 28227 + }, + { + "epoch": 0.6272888888888889, + "grad_norm": 1.9838591814041138, + "learning_rate": 7.455879084240944e-05, + "loss": 1.7549, + "step": 28228 + }, + { + "epoch": 0.6273111111111112, + "grad_norm": 1.8756340742111206, + "learning_rate": 7.455434541009113e-05, + "loss": 1.8588, + "step": 28229 + }, + { + "epoch": 0.6273333333333333, + "grad_norm": 2.0535311698913574, + "learning_rate": 7.454989997777284e-05, + "loss": 2.3669, + "step": 28230 + }, + { + "epoch": 0.6273555555555556, + "grad_norm": 1.7964155673980713, + "learning_rate": 7.454545454545455e-05, + "loss": 1.7613, + "step": 28231 + }, + { + "epoch": 0.6273777777777778, + "grad_norm": 2.2725765705108643, + "learning_rate": 7.454100911313625e-05, + "loss": 2.5016, + "step": 28232 + }, + { + "epoch": 0.6274, + "grad_norm": 1.988587498664856, + "learning_rate": 7.453656368081796e-05, + "loss": 1.9788, + "step": 28233 + }, + { + "epoch": 0.6274222222222222, + "grad_norm": 1.7059104442596436, + "learning_rate": 7.453211824849967e-05, + "loss": 1.9183, + "step": 28234 + }, + { + "epoch": 0.6274444444444445, + "grad_norm": 1.7577725648880005, + "learning_rate": 7.452767281618138e-05, + "loss": 2.0483, + "step": 28235 + }, + { + "epoch": 0.6274666666666666, + "grad_norm": 1.6931707859039307, + "learning_rate": 7.452322738386309e-05, + "loss": 1.6994, + "step": 28236 + }, + { + "epoch": 0.6274888888888889, + "grad_norm": 1.6592637300491333, + "learning_rate": 7.45187819515448e-05, + "loss": 1.5483, + "step": 28237 + }, + { + "epoch": 0.6275111111111111, + "grad_norm": 1.3451963663101196, + "learning_rate": 7.45143365192265e-05, + "loss": 1.0713, + "step": 28238 + }, + { + "epoch": 0.6275333333333334, + "grad_norm": 1.2414436340332031, + "learning_rate": 7.45098910869082e-05, + "loss": 0.9416, + "step": 28239 + }, + { + "epoch": 0.6275555555555555, + "grad_norm": 1.9038292169570923, + "learning_rate": 7.450544565458991e-05, + "loss": 1.8877, + "step": 28240 + }, + { + "epoch": 0.6275777777777778, + "grad_norm": 1.881089210510254, + "learning_rate": 7.450100022227162e-05, + "loss": 1.6985, + "step": 28241 + }, + { + "epoch": 0.6276, + "grad_norm": 1.404329776763916, + "learning_rate": 7.449655478995332e-05, + "loss": 1.4275, + "step": 28242 + }, + { + "epoch": 0.6276222222222222, + "grad_norm": 1.655401349067688, + "learning_rate": 7.449210935763503e-05, + "loss": 2.0994, + "step": 28243 + }, + { + "epoch": 0.6276444444444444, + "grad_norm": 2.3409383296966553, + "learning_rate": 7.448766392531674e-05, + "loss": 2.3209, + "step": 28244 + }, + { + "epoch": 0.6276666666666667, + "grad_norm": 1.8269544839859009, + "learning_rate": 7.448321849299846e-05, + "loss": 1.9272, + "step": 28245 + }, + { + "epoch": 0.6276888888888889, + "grad_norm": 1.9554951190948486, + "learning_rate": 7.447877306068015e-05, + "loss": 2.1167, + "step": 28246 + }, + { + "epoch": 0.6277111111111111, + "grad_norm": 1.4238810539245605, + "learning_rate": 7.447432762836186e-05, + "loss": 1.0804, + "step": 28247 + }, + { + "epoch": 0.6277333333333334, + "grad_norm": 1.9184352159500122, + "learning_rate": 7.446988219604357e-05, + "loss": 1.945, + "step": 28248 + }, + { + "epoch": 0.6277555555555555, + "grad_norm": 1.8719063997268677, + "learning_rate": 7.446543676372527e-05, + "loss": 1.7791, + "step": 28249 + }, + { + "epoch": 0.6277777777777778, + "grad_norm": 1.7169616222381592, + "learning_rate": 7.446099133140698e-05, + "loss": 0.9318, + "step": 28250 + }, + { + "epoch": 0.6278, + "grad_norm": 1.0853959321975708, + "learning_rate": 7.445654589908869e-05, + "loss": 0.9651, + "step": 28251 + }, + { + "epoch": 0.6278222222222222, + "grad_norm": 1.5064071416854858, + "learning_rate": 7.44521004667704e-05, + "loss": 2.7501, + "step": 28252 + }, + { + "epoch": 0.6278444444444444, + "grad_norm": 1.6041384935379028, + "learning_rate": 7.44476550344521e-05, + "loss": 2.051, + "step": 28253 + }, + { + "epoch": 0.6278666666666667, + "grad_norm": 1.5042846202850342, + "learning_rate": 7.444320960213382e-05, + "loss": 2.0497, + "step": 28254 + }, + { + "epoch": 0.6278888888888889, + "grad_norm": 1.3406095504760742, + "learning_rate": 7.443876416981553e-05, + "loss": 2.2745, + "step": 28255 + }, + { + "epoch": 0.6279111111111111, + "grad_norm": 1.767425537109375, + "learning_rate": 7.443431873749722e-05, + "loss": 2.3426, + "step": 28256 + }, + { + "epoch": 0.6279333333333333, + "grad_norm": 1.8688346147537231, + "learning_rate": 7.442987330517893e-05, + "loss": 2.042, + "step": 28257 + }, + { + "epoch": 0.6279555555555556, + "grad_norm": 2.109346628189087, + "learning_rate": 7.442542787286064e-05, + "loss": 2.2495, + "step": 28258 + }, + { + "epoch": 0.6279777777777777, + "grad_norm": 2.2193105220794678, + "learning_rate": 7.442098244054234e-05, + "loss": 2.1782, + "step": 28259 + }, + { + "epoch": 0.628, + "grad_norm": 1.6535050868988037, + "learning_rate": 7.441653700822405e-05, + "loss": 2.3659, + "step": 28260 + }, + { + "epoch": 0.6280222222222223, + "grad_norm": 0.8908278346061707, + "learning_rate": 7.441209157590576e-05, + "loss": 0.6832, + "step": 28261 + }, + { + "epoch": 0.6280444444444444, + "grad_norm": 1.612201452255249, + "learning_rate": 7.440764614358747e-05, + "loss": 2.3098, + "step": 28262 + }, + { + "epoch": 0.6280666666666667, + "grad_norm": 1.5728148221969604, + "learning_rate": 7.440320071126918e-05, + "loss": 1.993, + "step": 28263 + }, + { + "epoch": 0.6280888888888889, + "grad_norm": 1.5704433917999268, + "learning_rate": 7.439875527895089e-05, + "loss": 2.0002, + "step": 28264 + }, + { + "epoch": 0.6281111111111111, + "grad_norm": 1.6616250276565552, + "learning_rate": 7.43943098466326e-05, + "loss": 2.2513, + "step": 28265 + }, + { + "epoch": 0.6281333333333333, + "grad_norm": 1.6211087703704834, + "learning_rate": 7.438986441431429e-05, + "loss": 1.7105, + "step": 28266 + }, + { + "epoch": 0.6281555555555556, + "grad_norm": 1.5077217817306519, + "learning_rate": 7.4385418981996e-05, + "loss": 1.9318, + "step": 28267 + }, + { + "epoch": 0.6281777777777777, + "grad_norm": 1.5681517124176025, + "learning_rate": 7.438097354967771e-05, + "loss": 1.7493, + "step": 28268 + }, + { + "epoch": 0.6282, + "grad_norm": 1.7847591638565063, + "learning_rate": 7.437652811735941e-05, + "loss": 2.3265, + "step": 28269 + }, + { + "epoch": 0.6282222222222222, + "grad_norm": 1.3881011009216309, + "learning_rate": 7.437208268504112e-05, + "loss": 1.7574, + "step": 28270 + }, + { + "epoch": 0.6282444444444445, + "grad_norm": 1.2813900709152222, + "learning_rate": 7.436763725272283e-05, + "loss": 1.5656, + "step": 28271 + }, + { + "epoch": 0.6282666666666666, + "grad_norm": 1.3996785879135132, + "learning_rate": 7.436319182040454e-05, + "loss": 1.1872, + "step": 28272 + }, + { + "epoch": 0.6282888888888889, + "grad_norm": 1.6524691581726074, + "learning_rate": 7.435874638808625e-05, + "loss": 1.8171, + "step": 28273 + }, + { + "epoch": 0.6283111111111112, + "grad_norm": 1.781345009803772, + "learning_rate": 7.435430095576796e-05, + "loss": 2.0514, + "step": 28274 + }, + { + "epoch": 0.6283333333333333, + "grad_norm": 1.6943475008010864, + "learning_rate": 7.434985552344967e-05, + "loss": 2.1368, + "step": 28275 + }, + { + "epoch": 0.6283555555555556, + "grad_norm": 1.695285439491272, + "learning_rate": 7.434541009113136e-05, + "loss": 1.9909, + "step": 28276 + }, + { + "epoch": 0.6283777777777778, + "grad_norm": 1.768095850944519, + "learning_rate": 7.434096465881307e-05, + "loss": 2.0456, + "step": 28277 + }, + { + "epoch": 0.6284, + "grad_norm": 1.7602506875991821, + "learning_rate": 7.433651922649478e-05, + "loss": 2.0481, + "step": 28278 + }, + { + "epoch": 0.6284222222222222, + "grad_norm": 1.5751560926437378, + "learning_rate": 7.433207379417648e-05, + "loss": 1.7103, + "step": 28279 + }, + { + "epoch": 0.6284444444444445, + "grad_norm": 1.458869218826294, + "learning_rate": 7.432762836185819e-05, + "loss": 1.6771, + "step": 28280 + }, + { + "epoch": 0.6284666666666666, + "grad_norm": 1.6494125127792358, + "learning_rate": 7.43231829295399e-05, + "loss": 1.7331, + "step": 28281 + }, + { + "epoch": 0.6284888888888889, + "grad_norm": 1.9761335849761963, + "learning_rate": 7.431873749722162e-05, + "loss": 2.1751, + "step": 28282 + }, + { + "epoch": 0.6285111111111111, + "grad_norm": 1.5494686365127563, + "learning_rate": 7.431429206490332e-05, + "loss": 1.9114, + "step": 28283 + }, + { + "epoch": 0.6285333333333334, + "grad_norm": 1.946640133857727, + "learning_rate": 7.430984663258503e-05, + "loss": 1.9682, + "step": 28284 + }, + { + "epoch": 0.6285555555555555, + "grad_norm": 1.6203198432922363, + "learning_rate": 7.430540120026674e-05, + "loss": 1.9045, + "step": 28285 + }, + { + "epoch": 0.6285777777777778, + "grad_norm": 1.078140377998352, + "learning_rate": 7.430095576794843e-05, + "loss": 0.8918, + "step": 28286 + }, + { + "epoch": 0.6286, + "grad_norm": 1.5796608924865723, + "learning_rate": 7.429651033563014e-05, + "loss": 1.6461, + "step": 28287 + }, + { + "epoch": 0.6286222222222222, + "grad_norm": 2.0373597145080566, + "learning_rate": 7.429206490331185e-05, + "loss": 2.0898, + "step": 28288 + }, + { + "epoch": 0.6286444444444445, + "grad_norm": 1.6334196329116821, + "learning_rate": 7.428761947099355e-05, + "loss": 1.8604, + "step": 28289 + }, + { + "epoch": 0.6286666666666667, + "grad_norm": 1.8665622472763062, + "learning_rate": 7.428317403867526e-05, + "loss": 1.7337, + "step": 28290 + }, + { + "epoch": 0.6286888888888889, + "grad_norm": 1.641268014907837, + "learning_rate": 7.427872860635698e-05, + "loss": 2.0156, + "step": 28291 + }, + { + "epoch": 0.6287111111111111, + "grad_norm": 1.8151181936264038, + "learning_rate": 7.427428317403869e-05, + "loss": 2.0465, + "step": 28292 + }, + { + "epoch": 0.6287333333333334, + "grad_norm": 2.0497701168060303, + "learning_rate": 7.426983774172038e-05, + "loss": 2.0673, + "step": 28293 + }, + { + "epoch": 0.6287555555555555, + "grad_norm": 1.5897787809371948, + "learning_rate": 7.42653923094021e-05, + "loss": 1.8692, + "step": 28294 + }, + { + "epoch": 0.6287777777777778, + "grad_norm": 1.913669228553772, + "learning_rate": 7.42609468770838e-05, + "loss": 1.6572, + "step": 28295 + }, + { + "epoch": 0.6288, + "grad_norm": 1.6216816902160645, + "learning_rate": 7.42565014447655e-05, + "loss": 1.7076, + "step": 28296 + }, + { + "epoch": 0.6288222222222222, + "grad_norm": 1.8761065006256104, + "learning_rate": 7.425205601244721e-05, + "loss": 1.7828, + "step": 28297 + }, + { + "epoch": 0.6288444444444444, + "grad_norm": 1.8865710496902466, + "learning_rate": 7.424761058012892e-05, + "loss": 1.6179, + "step": 28298 + }, + { + "epoch": 0.6288666666666667, + "grad_norm": 2.1235756874084473, + "learning_rate": 7.424316514781063e-05, + "loss": 2.0475, + "step": 28299 + }, + { + "epoch": 0.6288888888888889, + "grad_norm": 2.2243402004241943, + "learning_rate": 7.423871971549234e-05, + "loss": 1.7504, + "step": 28300 + }, + { + "epoch": 0.6289111111111111, + "grad_norm": 1.769464373588562, + "learning_rate": 7.423427428317405e-05, + "loss": 2.6118, + "step": 28301 + }, + { + "epoch": 0.6289333333333333, + "grad_norm": 1.8621481657028198, + "learning_rate": 7.422982885085576e-05, + "loss": 1.1403, + "step": 28302 + }, + { + "epoch": 0.6289555555555556, + "grad_norm": 1.5293608903884888, + "learning_rate": 7.422538341853745e-05, + "loss": 2.2813, + "step": 28303 + }, + { + "epoch": 0.6289777777777777, + "grad_norm": 1.328939437866211, + "learning_rate": 7.422093798621916e-05, + "loss": 1.8391, + "step": 28304 + }, + { + "epoch": 0.629, + "grad_norm": 1.330453634262085, + "learning_rate": 7.421649255390087e-05, + "loss": 2.2479, + "step": 28305 + }, + { + "epoch": 0.6290222222222223, + "grad_norm": 1.6832678318023682, + "learning_rate": 7.421204712158257e-05, + "loss": 2.405, + "step": 28306 + }, + { + "epoch": 0.6290444444444444, + "grad_norm": 1.228036642074585, + "learning_rate": 7.420760168926428e-05, + "loss": 1.966, + "step": 28307 + }, + { + "epoch": 0.6290666666666667, + "grad_norm": 1.3511638641357422, + "learning_rate": 7.420315625694599e-05, + "loss": 2.014, + "step": 28308 + }, + { + "epoch": 0.6290888888888889, + "grad_norm": 1.5867738723754883, + "learning_rate": 7.41987108246277e-05, + "loss": 2.1594, + "step": 28309 + }, + { + "epoch": 0.6291111111111111, + "grad_norm": 1.6926277875900269, + "learning_rate": 7.419426539230941e-05, + "loss": 2.2151, + "step": 28310 + }, + { + "epoch": 0.6291333333333333, + "grad_norm": 2.021162271499634, + "learning_rate": 7.418981995999112e-05, + "loss": 2.1444, + "step": 28311 + }, + { + "epoch": 0.6291555555555556, + "grad_norm": 2.1428580284118652, + "learning_rate": 7.418537452767283e-05, + "loss": 2.575, + "step": 28312 + }, + { + "epoch": 0.6291777777777777, + "grad_norm": 1.5130378007888794, + "learning_rate": 7.418092909535452e-05, + "loss": 1.8039, + "step": 28313 + }, + { + "epoch": 0.6292, + "grad_norm": 1.5649083852767944, + "learning_rate": 7.417648366303623e-05, + "loss": 1.6873, + "step": 28314 + }, + { + "epoch": 0.6292222222222222, + "grad_norm": 1.5888735055923462, + "learning_rate": 7.417203823071794e-05, + "loss": 2.0904, + "step": 28315 + }, + { + "epoch": 0.6292444444444445, + "grad_norm": 1.8082695007324219, + "learning_rate": 7.416759279839964e-05, + "loss": 2.3681, + "step": 28316 + }, + { + "epoch": 0.6292666666666666, + "grad_norm": 1.7941712141036987, + "learning_rate": 7.416314736608135e-05, + "loss": 2.2749, + "step": 28317 + }, + { + "epoch": 0.6292888888888889, + "grad_norm": 1.4473190307617188, + "learning_rate": 7.415870193376306e-05, + "loss": 1.7183, + "step": 28318 + }, + { + "epoch": 0.6293111111111112, + "grad_norm": 1.321277141571045, + "learning_rate": 7.415425650144478e-05, + "loss": 1.3812, + "step": 28319 + }, + { + "epoch": 0.6293333333333333, + "grad_norm": 2.0893523693084717, + "learning_rate": 7.414981106912648e-05, + "loss": 2.7359, + "step": 28320 + }, + { + "epoch": 0.6293555555555556, + "grad_norm": 1.4881926774978638, + "learning_rate": 7.414536563680819e-05, + "loss": 1.6233, + "step": 28321 + }, + { + "epoch": 0.6293777777777778, + "grad_norm": 1.9817962646484375, + "learning_rate": 7.41409202044899e-05, + "loss": 2.1383, + "step": 28322 + }, + { + "epoch": 0.6294, + "grad_norm": 1.7774596214294434, + "learning_rate": 7.413647477217159e-05, + "loss": 2.2503, + "step": 28323 + }, + { + "epoch": 0.6294222222222222, + "grad_norm": 1.7046529054641724, + "learning_rate": 7.41320293398533e-05, + "loss": 2.0507, + "step": 28324 + }, + { + "epoch": 0.6294444444444445, + "grad_norm": 1.4740560054779053, + "learning_rate": 7.412758390753501e-05, + "loss": 1.6524, + "step": 28325 + }, + { + "epoch": 0.6294666666666666, + "grad_norm": 1.5283254384994507, + "learning_rate": 7.412313847521671e-05, + "loss": 2.2177, + "step": 28326 + }, + { + "epoch": 0.6294888888888889, + "grad_norm": 1.4951704740524292, + "learning_rate": 7.411869304289842e-05, + "loss": 1.9466, + "step": 28327 + }, + { + "epoch": 0.6295111111111111, + "grad_norm": 1.6033143997192383, + "learning_rate": 7.411424761058014e-05, + "loss": 1.9761, + "step": 28328 + }, + { + "epoch": 0.6295333333333333, + "grad_norm": 1.7867376804351807, + "learning_rate": 7.410980217826185e-05, + "loss": 1.8702, + "step": 28329 + }, + { + "epoch": 0.6295555555555555, + "grad_norm": 1.4749178886413574, + "learning_rate": 7.410535674594355e-05, + "loss": 1.5522, + "step": 28330 + }, + { + "epoch": 0.6295777777777778, + "grad_norm": 1.5684312582015991, + "learning_rate": 7.410091131362526e-05, + "loss": 1.2988, + "step": 28331 + }, + { + "epoch": 0.6296, + "grad_norm": 1.4363404512405396, + "learning_rate": 7.409646588130697e-05, + "loss": 1.7482, + "step": 28332 + }, + { + "epoch": 0.6296222222222222, + "grad_norm": 1.8172701597213745, + "learning_rate": 7.409202044898866e-05, + "loss": 1.7153, + "step": 28333 + }, + { + "epoch": 0.6296444444444445, + "grad_norm": 1.687948226928711, + "learning_rate": 7.408757501667037e-05, + "loss": 1.9432, + "step": 28334 + }, + { + "epoch": 0.6296666666666667, + "grad_norm": 1.816715955734253, + "learning_rate": 7.408312958435208e-05, + "loss": 1.9648, + "step": 28335 + }, + { + "epoch": 0.6296888888888889, + "grad_norm": 1.7293776273727417, + "learning_rate": 7.407868415203379e-05, + "loss": 1.9419, + "step": 28336 + }, + { + "epoch": 0.6297111111111111, + "grad_norm": 1.5770736932754517, + "learning_rate": 7.40742387197155e-05, + "loss": 1.8593, + "step": 28337 + }, + { + "epoch": 0.6297333333333334, + "grad_norm": 1.74238920211792, + "learning_rate": 7.406979328739721e-05, + "loss": 2.2409, + "step": 28338 + }, + { + "epoch": 0.6297555555555555, + "grad_norm": 1.8684200048446655, + "learning_rate": 7.406534785507892e-05, + "loss": 1.859, + "step": 28339 + }, + { + "epoch": 0.6297777777777778, + "grad_norm": 1.713669776916504, + "learning_rate": 7.406090242276062e-05, + "loss": 1.7262, + "step": 28340 + }, + { + "epoch": 0.6298, + "grad_norm": 1.7168631553649902, + "learning_rate": 7.405645699044232e-05, + "loss": 2.173, + "step": 28341 + }, + { + "epoch": 0.6298222222222222, + "grad_norm": 1.9766662120819092, + "learning_rate": 7.405201155812403e-05, + "loss": 1.9445, + "step": 28342 + }, + { + "epoch": 0.6298444444444444, + "grad_norm": 1.9945155382156372, + "learning_rate": 7.404756612580573e-05, + "loss": 1.865, + "step": 28343 + }, + { + "epoch": 0.6298666666666667, + "grad_norm": 1.841382622718811, + "learning_rate": 7.404312069348744e-05, + "loss": 1.8165, + "step": 28344 + }, + { + "epoch": 0.6298888888888889, + "grad_norm": 1.9434353113174438, + "learning_rate": 7.403867526116915e-05, + "loss": 1.7789, + "step": 28345 + }, + { + "epoch": 0.6299111111111111, + "grad_norm": 2.029454469680786, + "learning_rate": 7.403422982885086e-05, + "loss": 2.1717, + "step": 28346 + }, + { + "epoch": 0.6299333333333333, + "grad_norm": 2.521643877029419, + "learning_rate": 7.402978439653257e-05, + "loss": 2.3886, + "step": 28347 + }, + { + "epoch": 0.6299555555555556, + "grad_norm": 1.5658351182937622, + "learning_rate": 7.402533896421428e-05, + "loss": 1.7562, + "step": 28348 + }, + { + "epoch": 0.6299777777777777, + "grad_norm": 1.8885512351989746, + "learning_rate": 7.402089353189599e-05, + "loss": 1.4809, + "step": 28349 + }, + { + "epoch": 0.63, + "grad_norm": 1.3135809898376465, + "learning_rate": 7.401644809957768e-05, + "loss": 0.8233, + "step": 28350 + }, + { + "epoch": 0.6300222222222223, + "grad_norm": 1.5412646532058716, + "learning_rate": 7.40120026672594e-05, + "loss": 2.08, + "step": 28351 + }, + { + "epoch": 0.6300444444444444, + "grad_norm": 0.986265242099762, + "learning_rate": 7.40075572349411e-05, + "loss": 0.8639, + "step": 28352 + }, + { + "epoch": 0.6300666666666667, + "grad_norm": 7.895050048828125, + "learning_rate": 7.40031118026228e-05, + "loss": 2.2206, + "step": 28353 + }, + { + "epoch": 0.6300888888888889, + "grad_norm": 1.8083707094192505, + "learning_rate": 7.399866637030451e-05, + "loss": 2.5821, + "step": 28354 + }, + { + "epoch": 0.6301111111111111, + "grad_norm": 1.620503544807434, + "learning_rate": 7.399422093798622e-05, + "loss": 2.2363, + "step": 28355 + }, + { + "epoch": 0.6301333333333333, + "grad_norm": 1.4891921281814575, + "learning_rate": 7.398977550566793e-05, + "loss": 1.24, + "step": 28356 + }, + { + "epoch": 0.6301555555555556, + "grad_norm": 1.437220573425293, + "learning_rate": 7.398533007334964e-05, + "loss": 2.2225, + "step": 28357 + }, + { + "epoch": 0.6301777777777777, + "grad_norm": 1.757455587387085, + "learning_rate": 7.398088464103135e-05, + "loss": 2.4739, + "step": 28358 + }, + { + "epoch": 0.6302, + "grad_norm": 1.5432251691818237, + "learning_rate": 7.397643920871306e-05, + "loss": 1.9358, + "step": 28359 + }, + { + "epoch": 0.6302222222222222, + "grad_norm": 1.6928318738937378, + "learning_rate": 7.397199377639475e-05, + "loss": 2.0875, + "step": 28360 + }, + { + "epoch": 0.6302444444444445, + "grad_norm": 1.3686691522598267, + "learning_rate": 7.396754834407646e-05, + "loss": 1.8747, + "step": 28361 + }, + { + "epoch": 0.6302666666666666, + "grad_norm": 1.6149375438690186, + "learning_rate": 7.396310291175817e-05, + "loss": 2.1837, + "step": 28362 + }, + { + "epoch": 0.6302888888888889, + "grad_norm": 1.6666003465652466, + "learning_rate": 7.395865747943987e-05, + "loss": 2.0275, + "step": 28363 + }, + { + "epoch": 0.6303111111111112, + "grad_norm": 1.603553295135498, + "learning_rate": 7.395421204712158e-05, + "loss": 2.3148, + "step": 28364 + }, + { + "epoch": 0.6303333333333333, + "grad_norm": 1.6051143407821655, + "learning_rate": 7.39497666148033e-05, + "loss": 2.2403, + "step": 28365 + }, + { + "epoch": 0.6303555555555556, + "grad_norm": 1.8546210527420044, + "learning_rate": 7.394532118248501e-05, + "loss": 2.2779, + "step": 28366 + }, + { + "epoch": 0.6303777777777778, + "grad_norm": 1.7641934156417847, + "learning_rate": 7.394087575016671e-05, + "loss": 2.2434, + "step": 28367 + }, + { + "epoch": 0.6304, + "grad_norm": 1.4324891567230225, + "learning_rate": 7.393643031784842e-05, + "loss": 1.8032, + "step": 28368 + }, + { + "epoch": 0.6304222222222222, + "grad_norm": 1.5796635150909424, + "learning_rate": 7.393198488553013e-05, + "loss": 1.8798, + "step": 28369 + }, + { + "epoch": 0.6304444444444445, + "grad_norm": 1.614767074584961, + "learning_rate": 7.392753945321182e-05, + "loss": 1.9635, + "step": 28370 + }, + { + "epoch": 0.6304666666666666, + "grad_norm": 1.954885482788086, + "learning_rate": 7.392309402089353e-05, + "loss": 2.3244, + "step": 28371 + }, + { + "epoch": 0.6304888888888889, + "grad_norm": 1.6378158330917358, + "learning_rate": 7.391864858857524e-05, + "loss": 2.1, + "step": 28372 + }, + { + "epoch": 0.6305111111111111, + "grad_norm": 1.7912436723709106, + "learning_rate": 7.391420315625695e-05, + "loss": 2.1831, + "step": 28373 + }, + { + "epoch": 0.6305333333333333, + "grad_norm": 1.5908149480819702, + "learning_rate": 7.390975772393866e-05, + "loss": 1.5954, + "step": 28374 + }, + { + "epoch": 0.6305555555555555, + "grad_norm": 1.681992769241333, + "learning_rate": 7.390531229162037e-05, + "loss": 2.1747, + "step": 28375 + }, + { + "epoch": 0.6305777777777778, + "grad_norm": 1.6367912292480469, + "learning_rate": 7.390086685930208e-05, + "loss": 2.3634, + "step": 28376 + }, + { + "epoch": 0.6306, + "grad_norm": 1.787254810333252, + "learning_rate": 7.389642142698378e-05, + "loss": 2.2269, + "step": 28377 + }, + { + "epoch": 0.6306222222222222, + "grad_norm": 1.8347268104553223, + "learning_rate": 7.389197599466549e-05, + "loss": 1.8225, + "step": 28378 + }, + { + "epoch": 0.6306444444444445, + "grad_norm": 1.7412904500961304, + "learning_rate": 7.38875305623472e-05, + "loss": 1.6935, + "step": 28379 + }, + { + "epoch": 0.6306666666666667, + "grad_norm": 1.8452202081680298, + "learning_rate": 7.388308513002889e-05, + "loss": 1.8675, + "step": 28380 + }, + { + "epoch": 0.6306888888888889, + "grad_norm": 1.8450442552566528, + "learning_rate": 7.38786396977106e-05, + "loss": 2.0222, + "step": 28381 + }, + { + "epoch": 0.6307111111111111, + "grad_norm": 1.9819624423980713, + "learning_rate": 7.387419426539231e-05, + "loss": 1.5903, + "step": 28382 + }, + { + "epoch": 0.6307333333333334, + "grad_norm": 2.1070683002471924, + "learning_rate": 7.386974883307402e-05, + "loss": 2.2376, + "step": 28383 + }, + { + "epoch": 0.6307555555555555, + "grad_norm": 2.4826745986938477, + "learning_rate": 7.386530340075573e-05, + "loss": 2.0857, + "step": 28384 + }, + { + "epoch": 0.6307777777777778, + "grad_norm": 1.4686522483825684, + "learning_rate": 7.386085796843744e-05, + "loss": 1.6862, + "step": 28385 + }, + { + "epoch": 0.6308, + "grad_norm": 1.6939491033554077, + "learning_rate": 7.385641253611915e-05, + "loss": 1.5444, + "step": 28386 + }, + { + "epoch": 0.6308222222222222, + "grad_norm": 1.892615556716919, + "learning_rate": 7.385196710380085e-05, + "loss": 1.6596, + "step": 28387 + }, + { + "epoch": 0.6308444444444444, + "grad_norm": 1.5864721536636353, + "learning_rate": 7.384752167148255e-05, + "loss": 1.8505, + "step": 28388 + }, + { + "epoch": 0.6308666666666667, + "grad_norm": 1.7852271795272827, + "learning_rate": 7.384307623916426e-05, + "loss": 2.1614, + "step": 28389 + }, + { + "epoch": 0.6308888888888889, + "grad_norm": 2.2662484645843506, + "learning_rate": 7.383863080684596e-05, + "loss": 1.9352, + "step": 28390 + }, + { + "epoch": 0.6309111111111111, + "grad_norm": 1.8422645330429077, + "learning_rate": 7.383418537452767e-05, + "loss": 1.5843, + "step": 28391 + }, + { + "epoch": 0.6309333333333333, + "grad_norm": 1.7014566659927368, + "learning_rate": 7.382973994220938e-05, + "loss": 1.9234, + "step": 28392 + }, + { + "epoch": 0.6309555555555556, + "grad_norm": 1.7305951118469238, + "learning_rate": 7.382529450989109e-05, + "loss": 2.058, + "step": 28393 + }, + { + "epoch": 0.6309777777777777, + "grad_norm": 1.942842721939087, + "learning_rate": 7.38208490775728e-05, + "loss": 2.0942, + "step": 28394 + }, + { + "epoch": 0.631, + "grad_norm": 1.5892136096954346, + "learning_rate": 7.381640364525451e-05, + "loss": 1.4916, + "step": 28395 + }, + { + "epoch": 0.6310222222222223, + "grad_norm": 1.6735446453094482, + "learning_rate": 7.381195821293622e-05, + "loss": 1.6573, + "step": 28396 + }, + { + "epoch": 0.6310444444444444, + "grad_norm": 1.4542067050933838, + "learning_rate": 7.380751278061791e-05, + "loss": 1.4651, + "step": 28397 + }, + { + "epoch": 0.6310666666666667, + "grad_norm": 1.794935941696167, + "learning_rate": 7.380306734829962e-05, + "loss": 1.508, + "step": 28398 + }, + { + "epoch": 0.6310888888888889, + "grad_norm": 1.9414783716201782, + "learning_rate": 7.379862191598133e-05, + "loss": 1.8128, + "step": 28399 + }, + { + "epoch": 0.6311111111111111, + "grad_norm": 1.4216324090957642, + "learning_rate": 7.379417648366303e-05, + "loss": 0.9064, + "step": 28400 + }, + { + "epoch": 0.6311333333333333, + "grad_norm": 1.4880794286727905, + "learning_rate": 7.378973105134474e-05, + "loss": 2.2743, + "step": 28401 + }, + { + "epoch": 0.6311555555555556, + "grad_norm": 1.5550297498703003, + "learning_rate": 7.378528561902646e-05, + "loss": 1.9959, + "step": 28402 + }, + { + "epoch": 0.6311777777777777, + "grad_norm": 1.4357762336730957, + "learning_rate": 7.378084018670816e-05, + "loss": 2.1209, + "step": 28403 + }, + { + "epoch": 0.6312, + "grad_norm": 1.5185269117355347, + "learning_rate": 7.377639475438987e-05, + "loss": 1.9705, + "step": 28404 + }, + { + "epoch": 0.6312222222222222, + "grad_norm": 1.398624300956726, + "learning_rate": 7.377194932207158e-05, + "loss": 1.5216, + "step": 28405 + }, + { + "epoch": 0.6312444444444445, + "grad_norm": 1.5025349855422974, + "learning_rate": 7.376750388975329e-05, + "loss": 2.089, + "step": 28406 + }, + { + "epoch": 0.6312666666666666, + "grad_norm": 1.5183087587356567, + "learning_rate": 7.376305845743498e-05, + "loss": 1.9401, + "step": 28407 + }, + { + "epoch": 0.6312888888888889, + "grad_norm": 1.4337327480316162, + "learning_rate": 7.375861302511669e-05, + "loss": 1.6506, + "step": 28408 + }, + { + "epoch": 0.6313111111111112, + "grad_norm": 1.5042972564697266, + "learning_rate": 7.37541675927984e-05, + "loss": 2.1496, + "step": 28409 + }, + { + "epoch": 0.6313333333333333, + "grad_norm": 2.031554698944092, + "learning_rate": 7.374972216048011e-05, + "loss": 2.2798, + "step": 28410 + }, + { + "epoch": 0.6313555555555556, + "grad_norm": 1.6585062742233276, + "learning_rate": 7.374527672816182e-05, + "loss": 2.2681, + "step": 28411 + }, + { + "epoch": 0.6313777777777778, + "grad_norm": 1.9968764781951904, + "learning_rate": 7.374083129584353e-05, + "loss": 2.1939, + "step": 28412 + }, + { + "epoch": 0.6314, + "grad_norm": 1.8754717111587524, + "learning_rate": 7.373638586352523e-05, + "loss": 2.2713, + "step": 28413 + }, + { + "epoch": 0.6314222222222222, + "grad_norm": 1.6223303079605103, + "learning_rate": 7.373194043120694e-05, + "loss": 2.0536, + "step": 28414 + }, + { + "epoch": 0.6314444444444445, + "grad_norm": 1.6929534673690796, + "learning_rate": 7.372749499888865e-05, + "loss": 1.9036, + "step": 28415 + }, + { + "epoch": 0.6314666666666666, + "grad_norm": 1.6671748161315918, + "learning_rate": 7.372304956657036e-05, + "loss": 1.9505, + "step": 28416 + }, + { + "epoch": 0.6314888888888889, + "grad_norm": 2.055662155151367, + "learning_rate": 7.371860413425205e-05, + "loss": 2.4632, + "step": 28417 + }, + { + "epoch": 0.6315111111111111, + "grad_norm": 1.5693776607513428, + "learning_rate": 7.371415870193376e-05, + "loss": 1.7384, + "step": 28418 + }, + { + "epoch": 0.6315333333333333, + "grad_norm": 1.3068820238113403, + "learning_rate": 7.370971326961547e-05, + "loss": 0.6118, + "step": 28419 + }, + { + "epoch": 0.6315555555555555, + "grad_norm": 1.467512845993042, + "learning_rate": 7.370526783729718e-05, + "loss": 2.0226, + "step": 28420 + }, + { + "epoch": 0.6315777777777778, + "grad_norm": 1.5475867986679077, + "learning_rate": 7.370082240497889e-05, + "loss": 1.931, + "step": 28421 + }, + { + "epoch": 0.6316, + "grad_norm": 1.5756876468658447, + "learning_rate": 7.36963769726606e-05, + "loss": 1.6972, + "step": 28422 + }, + { + "epoch": 0.6316222222222222, + "grad_norm": 1.9265605211257935, + "learning_rate": 7.369193154034231e-05, + "loss": 2.3031, + "step": 28423 + }, + { + "epoch": 0.6316444444444445, + "grad_norm": 1.5181357860565186, + "learning_rate": 7.3687486108024e-05, + "loss": 2.1034, + "step": 28424 + }, + { + "epoch": 0.6316666666666667, + "grad_norm": 1.7012661695480347, + "learning_rate": 7.368304067570572e-05, + "loss": 2.1504, + "step": 28425 + }, + { + "epoch": 0.6316888888888889, + "grad_norm": 1.4558168649673462, + "learning_rate": 7.367859524338743e-05, + "loss": 1.533, + "step": 28426 + }, + { + "epoch": 0.6317111111111111, + "grad_norm": 1.610884189605713, + "learning_rate": 7.367414981106912e-05, + "loss": 2.0492, + "step": 28427 + }, + { + "epoch": 0.6317333333333334, + "grad_norm": 1.6316362619400024, + "learning_rate": 7.366970437875083e-05, + "loss": 1.604, + "step": 28428 + }, + { + "epoch": 0.6317555555555555, + "grad_norm": 1.0601921081542969, + "learning_rate": 7.366525894643254e-05, + "loss": 0.8132, + "step": 28429 + }, + { + "epoch": 0.6317777777777778, + "grad_norm": 1.6722947359085083, + "learning_rate": 7.366081351411425e-05, + "loss": 1.6607, + "step": 28430 + }, + { + "epoch": 0.6318, + "grad_norm": 2.0034799575805664, + "learning_rate": 7.365636808179596e-05, + "loss": 1.9938, + "step": 28431 + }, + { + "epoch": 0.6318222222222222, + "grad_norm": 1.1785577535629272, + "learning_rate": 7.365192264947767e-05, + "loss": 0.8286, + "step": 28432 + }, + { + "epoch": 0.6318444444444444, + "grad_norm": 1.7210696935653687, + "learning_rate": 7.364747721715938e-05, + "loss": 1.7816, + "step": 28433 + }, + { + "epoch": 0.6318666666666667, + "grad_norm": 1.9599621295928955, + "learning_rate": 7.364303178484108e-05, + "loss": 2.3561, + "step": 28434 + }, + { + "epoch": 0.6318888888888889, + "grad_norm": 1.6815145015716553, + "learning_rate": 7.363858635252278e-05, + "loss": 1.9247, + "step": 28435 + }, + { + "epoch": 0.6319111111111111, + "grad_norm": 1.673786997795105, + "learning_rate": 7.36341409202045e-05, + "loss": 1.5815, + "step": 28436 + }, + { + "epoch": 0.6319333333333333, + "grad_norm": 1.7454754114151, + "learning_rate": 7.362969548788619e-05, + "loss": 1.6624, + "step": 28437 + }, + { + "epoch": 0.6319555555555556, + "grad_norm": 1.573633074760437, + "learning_rate": 7.36252500555679e-05, + "loss": 1.5709, + "step": 28438 + }, + { + "epoch": 0.6319777777777777, + "grad_norm": 1.6286156177520752, + "learning_rate": 7.362080462324962e-05, + "loss": 1.5022, + "step": 28439 + }, + { + "epoch": 0.632, + "grad_norm": 1.5434051752090454, + "learning_rate": 7.361635919093132e-05, + "loss": 1.8551, + "step": 28440 + }, + { + "epoch": 0.6320222222222223, + "grad_norm": 1.667515754699707, + "learning_rate": 7.361191375861303e-05, + "loss": 1.4745, + "step": 28441 + }, + { + "epoch": 0.6320444444444444, + "grad_norm": 1.844184160232544, + "learning_rate": 7.360746832629474e-05, + "loss": 1.8393, + "step": 28442 + }, + { + "epoch": 0.6320666666666667, + "grad_norm": 1.635435938835144, + "learning_rate": 7.360302289397645e-05, + "loss": 1.4518, + "step": 28443 + }, + { + "epoch": 0.6320888888888889, + "grad_norm": 2.1414828300476074, + "learning_rate": 7.359857746165814e-05, + "loss": 1.9476, + "step": 28444 + }, + { + "epoch": 0.6321111111111111, + "grad_norm": 1.8981890678405762, + "learning_rate": 7.359413202933985e-05, + "loss": 1.882, + "step": 28445 + }, + { + "epoch": 0.6321333333333333, + "grad_norm": 1.939850091934204, + "learning_rate": 7.358968659702156e-05, + "loss": 1.6742, + "step": 28446 + }, + { + "epoch": 0.6321555555555556, + "grad_norm": 1.9543533325195312, + "learning_rate": 7.358524116470327e-05, + "loss": 1.6316, + "step": 28447 + }, + { + "epoch": 0.6321777777777777, + "grad_norm": 1.5883665084838867, + "learning_rate": 7.358079573238498e-05, + "loss": 1.7852, + "step": 28448 + }, + { + "epoch": 0.6322, + "grad_norm": 2.1732687950134277, + "learning_rate": 7.357635030006669e-05, + "loss": 1.5089, + "step": 28449 + }, + { + "epoch": 0.6322222222222222, + "grad_norm": 2.041788101196289, + "learning_rate": 7.357190486774839e-05, + "loss": 1.3564, + "step": 28450 + }, + { + "epoch": 0.6322444444444445, + "grad_norm": 1.011060118675232, + "learning_rate": 7.35674594354301e-05, + "loss": 1.1324, + "step": 28451 + }, + { + "epoch": 0.6322666666666666, + "grad_norm": 2.093306064605713, + "learning_rate": 7.356301400311181e-05, + "loss": 2.4241, + "step": 28452 + }, + { + "epoch": 0.6322888888888889, + "grad_norm": 1.4873263835906982, + "learning_rate": 7.355856857079352e-05, + "loss": 2.338, + "step": 28453 + }, + { + "epoch": 0.6323111111111112, + "grad_norm": 2.758570432662964, + "learning_rate": 7.355412313847521e-05, + "loss": 1.6346, + "step": 28454 + }, + { + "epoch": 0.6323333333333333, + "grad_norm": 1.6452229022979736, + "learning_rate": 7.354967770615692e-05, + "loss": 1.3205, + "step": 28455 + }, + { + "epoch": 0.6323555555555556, + "grad_norm": 1.439057469367981, + "learning_rate": 7.354523227383863e-05, + "loss": 1.6358, + "step": 28456 + }, + { + "epoch": 0.6323777777777778, + "grad_norm": 1.928806185722351, + "learning_rate": 7.354078684152034e-05, + "loss": 2.5795, + "step": 28457 + }, + { + "epoch": 0.6324, + "grad_norm": 1.6794370412826538, + "learning_rate": 7.353634140920205e-05, + "loss": 2.255, + "step": 28458 + }, + { + "epoch": 0.6324222222222222, + "grad_norm": 1.8662517070770264, + "learning_rate": 7.353189597688376e-05, + "loss": 1.6204, + "step": 28459 + }, + { + "epoch": 0.6324444444444445, + "grad_norm": 1.5139602422714233, + "learning_rate": 7.352745054456546e-05, + "loss": 1.911, + "step": 28460 + }, + { + "epoch": 0.6324666666666666, + "grad_norm": 1.6585510969161987, + "learning_rate": 7.352300511224717e-05, + "loss": 2.3561, + "step": 28461 + }, + { + "epoch": 0.6324888888888889, + "grad_norm": 1.6817216873168945, + "learning_rate": 7.351855967992888e-05, + "loss": 2.1024, + "step": 28462 + }, + { + "epoch": 0.6325111111111111, + "grad_norm": 1.553934097290039, + "learning_rate": 7.351411424761059e-05, + "loss": 1.975, + "step": 28463 + }, + { + "epoch": 0.6325333333333333, + "grad_norm": 1.6149582862854004, + "learning_rate": 7.350966881529228e-05, + "loss": 2.3645, + "step": 28464 + }, + { + "epoch": 0.6325555555555555, + "grad_norm": 1.5581504106521606, + "learning_rate": 7.350522338297399e-05, + "loss": 2.1551, + "step": 28465 + }, + { + "epoch": 0.6325777777777778, + "grad_norm": 1.6197482347488403, + "learning_rate": 7.35007779506557e-05, + "loss": 2.0389, + "step": 28466 + }, + { + "epoch": 0.6326, + "grad_norm": 2.02691388130188, + "learning_rate": 7.349633251833741e-05, + "loss": 1.8135, + "step": 28467 + }, + { + "epoch": 0.6326222222222222, + "grad_norm": 1.5820540189743042, + "learning_rate": 7.349188708601912e-05, + "loss": 1.8064, + "step": 28468 + }, + { + "epoch": 0.6326444444444445, + "grad_norm": 1.6677908897399902, + "learning_rate": 7.348744165370083e-05, + "loss": 1.9711, + "step": 28469 + }, + { + "epoch": 0.6326666666666667, + "grad_norm": 1.846671223640442, + "learning_rate": 7.348299622138254e-05, + "loss": 1.7647, + "step": 28470 + }, + { + "epoch": 0.6326888888888889, + "grad_norm": 1.6714377403259277, + "learning_rate": 7.347855078906424e-05, + "loss": 1.6715, + "step": 28471 + }, + { + "epoch": 0.6327111111111111, + "grad_norm": 2.223407745361328, + "learning_rate": 7.347410535674595e-05, + "loss": 2.3233, + "step": 28472 + }, + { + "epoch": 0.6327333333333334, + "grad_norm": 1.6671288013458252, + "learning_rate": 7.346965992442766e-05, + "loss": 1.8601, + "step": 28473 + }, + { + "epoch": 0.6327555555555555, + "grad_norm": 1.7181869745254517, + "learning_rate": 7.346521449210935e-05, + "loss": 1.8613, + "step": 28474 + }, + { + "epoch": 0.6327777777777778, + "grad_norm": 1.8806204795837402, + "learning_rate": 7.346076905979106e-05, + "loss": 2.8201, + "step": 28475 + }, + { + "epoch": 0.6328, + "grad_norm": 1.6484366655349731, + "learning_rate": 7.345632362747278e-05, + "loss": 1.7206, + "step": 28476 + }, + { + "epoch": 0.6328222222222222, + "grad_norm": 1.443996548652649, + "learning_rate": 7.345187819515448e-05, + "loss": 1.6574, + "step": 28477 + }, + { + "epoch": 0.6328444444444444, + "grad_norm": 1.6608508825302124, + "learning_rate": 7.344743276283619e-05, + "loss": 1.6401, + "step": 28478 + }, + { + "epoch": 0.6328666666666667, + "grad_norm": 2.0114076137542725, + "learning_rate": 7.34429873305179e-05, + "loss": 2.4515, + "step": 28479 + }, + { + "epoch": 0.6328888888888888, + "grad_norm": 1.9120980501174927, + "learning_rate": 7.343854189819961e-05, + "loss": 2.0641, + "step": 28480 + }, + { + "epoch": 0.6329111111111111, + "grad_norm": 1.7209848165512085, + "learning_rate": 7.34340964658813e-05, + "loss": 1.9164, + "step": 28481 + }, + { + "epoch": 0.6329333333333333, + "grad_norm": 1.8291016817092896, + "learning_rate": 7.342965103356302e-05, + "loss": 2.1635, + "step": 28482 + }, + { + "epoch": 0.6329555555555556, + "grad_norm": 1.7169533967971802, + "learning_rate": 7.342520560124472e-05, + "loss": 1.9718, + "step": 28483 + }, + { + "epoch": 0.6329777777777778, + "grad_norm": 1.7712876796722412, + "learning_rate": 7.342076016892643e-05, + "loss": 1.8347, + "step": 28484 + }, + { + "epoch": 0.633, + "grad_norm": 1.8500925302505493, + "learning_rate": 7.341631473660814e-05, + "loss": 1.8675, + "step": 28485 + }, + { + "epoch": 0.6330222222222223, + "grad_norm": 1.7853578329086304, + "learning_rate": 7.341186930428985e-05, + "loss": 1.9601, + "step": 28486 + }, + { + "epoch": 0.6330444444444444, + "grad_norm": 1.840130090713501, + "learning_rate": 7.340742387197155e-05, + "loss": 2.0153, + "step": 28487 + }, + { + "epoch": 0.6330666666666667, + "grad_norm": 2.2379651069641113, + "learning_rate": 7.340297843965326e-05, + "loss": 2.0073, + "step": 28488 + }, + { + "epoch": 0.6330888888888889, + "grad_norm": 1.6185786724090576, + "learning_rate": 7.339853300733497e-05, + "loss": 1.7823, + "step": 28489 + }, + { + "epoch": 0.6331111111111111, + "grad_norm": 6.099532604217529, + "learning_rate": 7.339408757501668e-05, + "loss": 1.8272, + "step": 28490 + }, + { + "epoch": 0.6331333333333333, + "grad_norm": 1.8324321508407593, + "learning_rate": 7.338964214269837e-05, + "loss": 2.173, + "step": 28491 + }, + { + "epoch": 0.6331555555555556, + "grad_norm": 1.8781834840774536, + "learning_rate": 7.338519671038008e-05, + "loss": 1.8743, + "step": 28492 + }, + { + "epoch": 0.6331777777777777, + "grad_norm": 2.0203893184661865, + "learning_rate": 7.33807512780618e-05, + "loss": 2.1924, + "step": 28493 + }, + { + "epoch": 0.6332, + "grad_norm": 1.485815405845642, + "learning_rate": 7.33763058457435e-05, + "loss": 1.4762, + "step": 28494 + }, + { + "epoch": 0.6332222222222222, + "grad_norm": 2.0100176334381104, + "learning_rate": 7.337186041342521e-05, + "loss": 2.186, + "step": 28495 + }, + { + "epoch": 0.6332444444444445, + "grad_norm": 1.780142903327942, + "learning_rate": 7.336741498110692e-05, + "loss": 1.6422, + "step": 28496 + }, + { + "epoch": 0.6332666666666666, + "grad_norm": 2.057020425796509, + "learning_rate": 7.336296954878862e-05, + "loss": 1.7511, + "step": 28497 + }, + { + "epoch": 0.6332888888888889, + "grad_norm": 1.6470258235931396, + "learning_rate": 7.335852411647033e-05, + "loss": 1.7918, + "step": 28498 + }, + { + "epoch": 0.6333111111111112, + "grad_norm": 1.5285764932632446, + "learning_rate": 7.335407868415204e-05, + "loss": 1.5708, + "step": 28499 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 1.337584376335144, + "learning_rate": 7.334963325183375e-05, + "loss": 1.0091, + "step": 28500 + }, + { + "epoch": 0.6333555555555556, + "grad_norm": 1.5156707763671875, + "learning_rate": 7.334518781951544e-05, + "loss": 2.3721, + "step": 28501 + }, + { + "epoch": 0.6333777777777778, + "grad_norm": 1.9771760702133179, + "learning_rate": 7.334074238719715e-05, + "loss": 2.5071, + "step": 28502 + }, + { + "epoch": 0.6334, + "grad_norm": 1.4453988075256348, + "learning_rate": 7.333629695487886e-05, + "loss": 2.2172, + "step": 28503 + }, + { + "epoch": 0.6334222222222222, + "grad_norm": 1.5911939144134521, + "learning_rate": 7.333185152256057e-05, + "loss": 1.7264, + "step": 28504 + }, + { + "epoch": 0.6334444444444445, + "grad_norm": 1.421743631362915, + "learning_rate": 7.332740609024228e-05, + "loss": 1.9622, + "step": 28505 + }, + { + "epoch": 0.6334666666666666, + "grad_norm": 1.416795015335083, + "learning_rate": 7.332296065792399e-05, + "loss": 1.7702, + "step": 28506 + }, + { + "epoch": 0.6334888888888889, + "grad_norm": 1.7996914386749268, + "learning_rate": 7.331851522560569e-05, + "loss": 1.6658, + "step": 28507 + }, + { + "epoch": 0.6335111111111111, + "grad_norm": 1.7646291255950928, + "learning_rate": 7.33140697932874e-05, + "loss": 2.6733, + "step": 28508 + }, + { + "epoch": 0.6335333333333333, + "grad_norm": 1.6536164283752441, + "learning_rate": 7.330962436096911e-05, + "loss": 1.9984, + "step": 28509 + }, + { + "epoch": 0.6335555555555555, + "grad_norm": 1.6147255897521973, + "learning_rate": 7.330517892865082e-05, + "loss": 1.9761, + "step": 28510 + }, + { + "epoch": 0.6335777777777778, + "grad_norm": 1.5083590745925903, + "learning_rate": 7.330073349633251e-05, + "loss": 1.7461, + "step": 28511 + }, + { + "epoch": 0.6336, + "grad_norm": 1.7501157522201538, + "learning_rate": 7.329628806401422e-05, + "loss": 2.2495, + "step": 28512 + }, + { + "epoch": 0.6336222222222222, + "grad_norm": 1.9548697471618652, + "learning_rate": 7.329184263169595e-05, + "loss": 2.637, + "step": 28513 + }, + { + "epoch": 0.6336444444444445, + "grad_norm": 1.6512237787246704, + "learning_rate": 7.328739719937764e-05, + "loss": 2.0011, + "step": 28514 + }, + { + "epoch": 0.6336666666666667, + "grad_norm": 1.7872507572174072, + "learning_rate": 7.328295176705935e-05, + "loss": 2.345, + "step": 28515 + }, + { + "epoch": 0.6336888888888889, + "grad_norm": 1.7773979902267456, + "learning_rate": 7.327850633474106e-05, + "loss": 1.9037, + "step": 28516 + }, + { + "epoch": 0.6337111111111111, + "grad_norm": 2.1018433570861816, + "learning_rate": 7.327406090242276e-05, + "loss": 2.0568, + "step": 28517 + }, + { + "epoch": 0.6337333333333334, + "grad_norm": 1.9714939594268799, + "learning_rate": 7.326961547010447e-05, + "loss": 1.5208, + "step": 28518 + }, + { + "epoch": 0.6337555555555555, + "grad_norm": 1.7774789333343506, + "learning_rate": 7.326517003778618e-05, + "loss": 2.0657, + "step": 28519 + }, + { + "epoch": 0.6337777777777778, + "grad_norm": 1.48650324344635, + "learning_rate": 7.326072460546789e-05, + "loss": 2.0511, + "step": 28520 + }, + { + "epoch": 0.6338, + "grad_norm": 1.7070021629333496, + "learning_rate": 7.32562791731496e-05, + "loss": 2.0907, + "step": 28521 + }, + { + "epoch": 0.6338222222222222, + "grad_norm": 1.827212929725647, + "learning_rate": 7.32518337408313e-05, + "loss": 2.1822, + "step": 28522 + }, + { + "epoch": 0.6338444444444444, + "grad_norm": 1.3869909048080444, + "learning_rate": 7.324738830851301e-05, + "loss": 1.672, + "step": 28523 + }, + { + "epoch": 0.6338666666666667, + "grad_norm": 1.6951630115509033, + "learning_rate": 7.324294287619471e-05, + "loss": 1.6874, + "step": 28524 + }, + { + "epoch": 0.6338888888888888, + "grad_norm": 1.5163898468017578, + "learning_rate": 7.323849744387642e-05, + "loss": 2.0067, + "step": 28525 + }, + { + "epoch": 0.6339111111111111, + "grad_norm": 1.5956902503967285, + "learning_rate": 7.323405201155813e-05, + "loss": 1.7679, + "step": 28526 + }, + { + "epoch": 0.6339333333333333, + "grad_norm": 1.5486507415771484, + "learning_rate": 7.322960657923984e-05, + "loss": 1.7618, + "step": 28527 + }, + { + "epoch": 0.6339555555555556, + "grad_norm": 1.6986682415008545, + "learning_rate": 7.322516114692154e-05, + "loss": 2.2877, + "step": 28528 + }, + { + "epoch": 0.6339777777777778, + "grad_norm": 1.615833044052124, + "learning_rate": 7.322071571460325e-05, + "loss": 1.9716, + "step": 28529 + }, + { + "epoch": 0.634, + "grad_norm": 1.906920313835144, + "learning_rate": 7.321627028228495e-05, + "loss": 1.8578, + "step": 28530 + }, + { + "epoch": 0.6340222222222223, + "grad_norm": 1.7437498569488525, + "learning_rate": 7.321182484996666e-05, + "loss": 1.7576, + "step": 28531 + }, + { + "epoch": 0.6340444444444444, + "grad_norm": 1.6995993852615356, + "learning_rate": 7.320737941764837e-05, + "loss": 1.9975, + "step": 28532 + }, + { + "epoch": 0.6340666666666667, + "grad_norm": 1.756561279296875, + "learning_rate": 7.320293398533008e-05, + "loss": 0.8284, + "step": 28533 + }, + { + "epoch": 0.6340888888888889, + "grad_norm": 1.932459831237793, + "learning_rate": 7.319848855301178e-05, + "loss": 2.5953, + "step": 28534 + }, + { + "epoch": 0.6341111111111111, + "grad_norm": 1.888073444366455, + "learning_rate": 7.319404312069349e-05, + "loss": 2.0259, + "step": 28535 + }, + { + "epoch": 0.6341333333333333, + "grad_norm": 2.1089727878570557, + "learning_rate": 7.31895976883752e-05, + "loss": 2.2318, + "step": 28536 + }, + { + "epoch": 0.6341555555555556, + "grad_norm": 1.951337218284607, + "learning_rate": 7.318515225605691e-05, + "loss": 1.7462, + "step": 28537 + }, + { + "epoch": 0.6341777777777777, + "grad_norm": 1.523807168006897, + "learning_rate": 7.31807068237386e-05, + "loss": 1.8149, + "step": 28538 + }, + { + "epoch": 0.6342, + "grad_norm": 1.58031165599823, + "learning_rate": 7.317626139142031e-05, + "loss": 1.8405, + "step": 28539 + }, + { + "epoch": 0.6342222222222222, + "grad_norm": 1.4024109840393066, + "learning_rate": 7.317181595910202e-05, + "loss": 1.3146, + "step": 28540 + }, + { + "epoch": 0.6342444444444445, + "grad_norm": 2.466278076171875, + "learning_rate": 7.316737052678373e-05, + "loss": 1.946, + "step": 28541 + }, + { + "epoch": 0.6342666666666666, + "grad_norm": 1.586089849472046, + "learning_rate": 7.316292509446544e-05, + "loss": 1.9332, + "step": 28542 + }, + { + "epoch": 0.6342888888888889, + "grad_norm": 1.5611326694488525, + "learning_rate": 7.315847966214715e-05, + "loss": 1.6498, + "step": 28543 + }, + { + "epoch": 0.6343111111111112, + "grad_norm": 1.8813029527664185, + "learning_rate": 7.315403422982885e-05, + "loss": 1.7946, + "step": 28544 + }, + { + "epoch": 0.6343333333333333, + "grad_norm": 1.7111068964004517, + "learning_rate": 7.314958879751056e-05, + "loss": 1.7456, + "step": 28545 + }, + { + "epoch": 0.6343555555555556, + "grad_norm": 2.0285494327545166, + "learning_rate": 7.314514336519227e-05, + "loss": 1.8342, + "step": 28546 + }, + { + "epoch": 0.6343777777777778, + "grad_norm": 1.7374845743179321, + "learning_rate": 7.314069793287398e-05, + "loss": 1.7883, + "step": 28547 + }, + { + "epoch": 0.6344, + "grad_norm": 1.5826327800750732, + "learning_rate": 7.313625250055567e-05, + "loss": 1.3873, + "step": 28548 + }, + { + "epoch": 0.6344222222222222, + "grad_norm": 1.2411465644836426, + "learning_rate": 7.31318070682374e-05, + "loss": 0.7282, + "step": 28549 + }, + { + "epoch": 0.6344444444444445, + "grad_norm": 1.8891007900238037, + "learning_rate": 7.31273616359191e-05, + "loss": 1.24, + "step": 28550 + }, + { + "epoch": 0.6344666666666666, + "grad_norm": 1.4046944379806519, + "learning_rate": 7.31229162036008e-05, + "loss": 2.2538, + "step": 28551 + }, + { + "epoch": 0.6344888888888889, + "grad_norm": 1.4603004455566406, + "learning_rate": 7.311847077128251e-05, + "loss": 2.1524, + "step": 28552 + }, + { + "epoch": 0.6345111111111111, + "grad_norm": 1.4375442266464233, + "learning_rate": 7.311402533896422e-05, + "loss": 2.4616, + "step": 28553 + }, + { + "epoch": 0.6345333333333333, + "grad_norm": 1.6095811128616333, + "learning_rate": 7.310957990664592e-05, + "loss": 2.2056, + "step": 28554 + }, + { + "epoch": 0.6345555555555555, + "grad_norm": 1.7685246467590332, + "learning_rate": 7.310513447432763e-05, + "loss": 2.2868, + "step": 28555 + }, + { + "epoch": 0.6345777777777778, + "grad_norm": 0.9971567988395691, + "learning_rate": 7.310068904200934e-05, + "loss": 0.9219, + "step": 28556 + }, + { + "epoch": 0.6346, + "grad_norm": 1.743350625038147, + "learning_rate": 7.309624360969105e-05, + "loss": 2.5285, + "step": 28557 + }, + { + "epoch": 0.6346222222222222, + "grad_norm": 1.6786539554595947, + "learning_rate": 7.309179817737276e-05, + "loss": 1.6679, + "step": 28558 + }, + { + "epoch": 0.6346444444444445, + "grad_norm": 1.6530226469039917, + "learning_rate": 7.308735274505447e-05, + "loss": 1.8407, + "step": 28559 + }, + { + "epoch": 0.6346666666666667, + "grad_norm": 1.5131018161773682, + "learning_rate": 7.308290731273618e-05, + "loss": 2.0245, + "step": 28560 + }, + { + "epoch": 0.6346888888888889, + "grad_norm": 1.5645263195037842, + "learning_rate": 7.307846188041787e-05, + "loss": 1.9042, + "step": 28561 + }, + { + "epoch": 0.6347111111111111, + "grad_norm": 1.5649160146713257, + "learning_rate": 7.307401644809958e-05, + "loss": 2.1702, + "step": 28562 + }, + { + "epoch": 0.6347333333333334, + "grad_norm": 1.6197746992111206, + "learning_rate": 7.306957101578129e-05, + "loss": 2.2538, + "step": 28563 + }, + { + "epoch": 0.6347555555555555, + "grad_norm": 1.6465293169021606, + "learning_rate": 7.306512558346299e-05, + "loss": 2.389, + "step": 28564 + }, + { + "epoch": 0.6347777777777778, + "grad_norm": 1.5573149919509888, + "learning_rate": 7.30606801511447e-05, + "loss": 2.0219, + "step": 28565 + }, + { + "epoch": 0.6348, + "grad_norm": 1.5480624437332153, + "learning_rate": 7.30562347188264e-05, + "loss": 1.5438, + "step": 28566 + }, + { + "epoch": 0.6348222222222222, + "grad_norm": 1.59479558467865, + "learning_rate": 7.305178928650812e-05, + "loss": 1.1533, + "step": 28567 + }, + { + "epoch": 0.6348444444444444, + "grad_norm": 1.8960281610488892, + "learning_rate": 7.304734385418983e-05, + "loss": 2.0588, + "step": 28568 + }, + { + "epoch": 0.6348666666666667, + "grad_norm": 1.7194708585739136, + "learning_rate": 7.304289842187154e-05, + "loss": 2.1623, + "step": 28569 + }, + { + "epoch": 0.6348888888888888, + "grad_norm": 1.7426247596740723, + "learning_rate": 7.303845298955324e-05, + "loss": 2.4881, + "step": 28570 + }, + { + "epoch": 0.6349111111111111, + "grad_norm": 1.7681390047073364, + "learning_rate": 7.303400755723494e-05, + "loss": 1.7168, + "step": 28571 + }, + { + "epoch": 0.6349333333333333, + "grad_norm": 1.7570207118988037, + "learning_rate": 7.302956212491665e-05, + "loss": 1.8496, + "step": 28572 + }, + { + "epoch": 0.6349555555555556, + "grad_norm": 1.7781468629837036, + "learning_rate": 7.302511669259836e-05, + "loss": 1.9237, + "step": 28573 + }, + { + "epoch": 0.6349777777777778, + "grad_norm": 2.170135498046875, + "learning_rate": 7.302067126028006e-05, + "loss": 2.6366, + "step": 28574 + }, + { + "epoch": 0.635, + "grad_norm": 1.726895809173584, + "learning_rate": 7.301622582796177e-05, + "loss": 1.9751, + "step": 28575 + }, + { + "epoch": 0.6350222222222223, + "grad_norm": 1.7290587425231934, + "learning_rate": 7.301178039564348e-05, + "loss": 2.4883, + "step": 28576 + }, + { + "epoch": 0.6350444444444444, + "grad_norm": 1.654848337173462, + "learning_rate": 7.300733496332518e-05, + "loss": 2.0482, + "step": 28577 + }, + { + "epoch": 0.6350666666666667, + "grad_norm": 1.8695530891418457, + "learning_rate": 7.30028895310069e-05, + "loss": 1.987, + "step": 28578 + }, + { + "epoch": 0.6350888888888889, + "grad_norm": 1.7678459882736206, + "learning_rate": 7.29984440986886e-05, + "loss": 2.162, + "step": 28579 + }, + { + "epoch": 0.6351111111111111, + "grad_norm": 1.806392788887024, + "learning_rate": 7.299399866637031e-05, + "loss": 1.9873, + "step": 28580 + }, + { + "epoch": 0.6351333333333333, + "grad_norm": 1.878972053527832, + "learning_rate": 7.298955323405201e-05, + "loss": 2.217, + "step": 28581 + }, + { + "epoch": 0.6351555555555556, + "grad_norm": 1.5210174322128296, + "learning_rate": 7.298510780173372e-05, + "loss": 1.8135, + "step": 28582 + }, + { + "epoch": 0.6351777777777777, + "grad_norm": 1.3945029973983765, + "learning_rate": 7.298066236941543e-05, + "loss": 1.2624, + "step": 28583 + }, + { + "epoch": 0.6352, + "grad_norm": 1.7245527505874634, + "learning_rate": 7.297621693709714e-05, + "loss": 1.9487, + "step": 28584 + }, + { + "epoch": 0.6352222222222222, + "grad_norm": 1.6345226764678955, + "learning_rate": 7.297177150477883e-05, + "loss": 1.7475, + "step": 28585 + }, + { + "epoch": 0.6352444444444444, + "grad_norm": 1.9497441053390503, + "learning_rate": 7.296732607246056e-05, + "loss": 1.898, + "step": 28586 + }, + { + "epoch": 0.6352666666666666, + "grad_norm": 2.098942279815674, + "learning_rate": 7.296288064014227e-05, + "loss": 1.8164, + "step": 28587 + }, + { + "epoch": 0.6352888888888889, + "grad_norm": 2.027961492538452, + "learning_rate": 7.295843520782396e-05, + "loss": 2.0378, + "step": 28588 + }, + { + "epoch": 0.6353111111111112, + "grad_norm": 1.3239794969558716, + "learning_rate": 7.295398977550567e-05, + "loss": 1.2233, + "step": 28589 + }, + { + "epoch": 0.6353333333333333, + "grad_norm": 1.8406566381454468, + "learning_rate": 7.294954434318738e-05, + "loss": 1.9132, + "step": 28590 + }, + { + "epoch": 0.6353555555555556, + "grad_norm": 1.7158334255218506, + "learning_rate": 7.294509891086908e-05, + "loss": 1.5789, + "step": 28591 + }, + { + "epoch": 0.6353777777777778, + "grad_norm": 1.7892802953720093, + "learning_rate": 7.294065347855079e-05, + "loss": 1.9597, + "step": 28592 + }, + { + "epoch": 0.6354, + "grad_norm": 1.775794506072998, + "learning_rate": 7.29362080462325e-05, + "loss": 2.115, + "step": 28593 + }, + { + "epoch": 0.6354222222222222, + "grad_norm": 1.8277240991592407, + "learning_rate": 7.293176261391421e-05, + "loss": 1.7255, + "step": 28594 + }, + { + "epoch": 0.6354444444444445, + "grad_norm": 1.9447377920150757, + "learning_rate": 7.292731718159592e-05, + "loss": 1.9657, + "step": 28595 + }, + { + "epoch": 0.6354666666666666, + "grad_norm": 1.974876046180725, + "learning_rate": 7.292287174927763e-05, + "loss": 1.7476, + "step": 28596 + }, + { + "epoch": 0.6354888888888889, + "grad_norm": 2.3403728008270264, + "learning_rate": 7.291842631695934e-05, + "loss": 2.101, + "step": 28597 + }, + { + "epoch": 0.6355111111111111, + "grad_norm": 1.9675029516220093, + "learning_rate": 7.291398088464103e-05, + "loss": 2.0985, + "step": 28598 + }, + { + "epoch": 0.6355333333333333, + "grad_norm": 1.6708178520202637, + "learning_rate": 7.290953545232274e-05, + "loss": 1.4453, + "step": 28599 + }, + { + "epoch": 0.6355555555555555, + "grad_norm": 2.2771944999694824, + "learning_rate": 7.290509002000445e-05, + "loss": 1.4697, + "step": 28600 + }, + { + "epoch": 0.6355777777777778, + "grad_norm": 1.8394581079483032, + "learning_rate": 7.290064458768615e-05, + "loss": 2.1661, + "step": 28601 + }, + { + "epoch": 0.6356, + "grad_norm": 1.8911298513412476, + "learning_rate": 7.289619915536786e-05, + "loss": 2.4016, + "step": 28602 + }, + { + "epoch": 0.6356222222222222, + "grad_norm": 1.4113638401031494, + "learning_rate": 7.289175372304957e-05, + "loss": 2.1418, + "step": 28603 + }, + { + "epoch": 0.6356444444444445, + "grad_norm": 0.29859021306037903, + "learning_rate": 7.288730829073128e-05, + "loss": 0.0205, + "step": 28604 + }, + { + "epoch": 0.6356666666666667, + "grad_norm": 1.9816783666610718, + "learning_rate": 7.288286285841299e-05, + "loss": 1.8118, + "step": 28605 + }, + { + "epoch": 0.6356888888888889, + "grad_norm": 1.4286977052688599, + "learning_rate": 7.28784174260947e-05, + "loss": 1.9265, + "step": 28606 + }, + { + "epoch": 0.6357111111111111, + "grad_norm": 1.6861002445220947, + "learning_rate": 7.28739719937764e-05, + "loss": 2.3091, + "step": 28607 + }, + { + "epoch": 0.6357333333333334, + "grad_norm": 1.5298811197280884, + "learning_rate": 7.28695265614581e-05, + "loss": 2.1259, + "step": 28608 + }, + { + "epoch": 0.6357555555555555, + "grad_norm": 1.7088412046432495, + "learning_rate": 7.286508112913981e-05, + "loss": 1.7914, + "step": 28609 + }, + { + "epoch": 0.6357777777777778, + "grad_norm": 1.6431035995483398, + "learning_rate": 7.286063569682152e-05, + "loss": 2.349, + "step": 28610 + }, + { + "epoch": 0.6358, + "grad_norm": 1.3370503187179565, + "learning_rate": 7.285619026450322e-05, + "loss": 1.8768, + "step": 28611 + }, + { + "epoch": 0.6358222222222222, + "grad_norm": 1.6042437553405762, + "learning_rate": 7.285174483218493e-05, + "loss": 1.9377, + "step": 28612 + }, + { + "epoch": 0.6358444444444444, + "grad_norm": 0.9689003229141235, + "learning_rate": 7.284729939986664e-05, + "loss": 0.9605, + "step": 28613 + }, + { + "epoch": 0.6358666666666667, + "grad_norm": 1.4642431735992432, + "learning_rate": 7.284285396754835e-05, + "loss": 1.7675, + "step": 28614 + }, + { + "epoch": 0.6358888888888888, + "grad_norm": 1.6015061140060425, + "learning_rate": 7.283840853523006e-05, + "loss": 2.0033, + "step": 28615 + }, + { + "epoch": 0.6359111111111111, + "grad_norm": 1.7440736293792725, + "learning_rate": 7.283396310291177e-05, + "loss": 2.25, + "step": 28616 + }, + { + "epoch": 0.6359333333333334, + "grad_norm": 1.5764706134796143, + "learning_rate": 7.282951767059347e-05, + "loss": 1.6149, + "step": 28617 + }, + { + "epoch": 0.6359555555555556, + "grad_norm": 1.6555157899856567, + "learning_rate": 7.282507223827517e-05, + "loss": 2.1503, + "step": 28618 + }, + { + "epoch": 0.6359777777777778, + "grad_norm": 1.5757826566696167, + "learning_rate": 7.282062680595688e-05, + "loss": 1.9042, + "step": 28619 + }, + { + "epoch": 0.636, + "grad_norm": 2.0371313095092773, + "learning_rate": 7.281618137363859e-05, + "loss": 2.2666, + "step": 28620 + }, + { + "epoch": 0.6360222222222223, + "grad_norm": 1.907381296157837, + "learning_rate": 7.281173594132029e-05, + "loss": 1.9729, + "step": 28621 + }, + { + "epoch": 0.6360444444444444, + "grad_norm": 1.9274381399154663, + "learning_rate": 7.2807290509002e-05, + "loss": 2.2086, + "step": 28622 + }, + { + "epoch": 0.6360666666666667, + "grad_norm": 1.6238957643508911, + "learning_rate": 7.280284507668372e-05, + "loss": 1.9729, + "step": 28623 + }, + { + "epoch": 0.6360888888888889, + "grad_norm": 1.978137731552124, + "learning_rate": 7.279839964436543e-05, + "loss": 2.1478, + "step": 28624 + }, + { + "epoch": 0.6361111111111111, + "grad_norm": 1.505639910697937, + "learning_rate": 7.279395421204712e-05, + "loss": 1.7419, + "step": 28625 + }, + { + "epoch": 0.6361333333333333, + "grad_norm": 1.651392936706543, + "learning_rate": 7.278950877972883e-05, + "loss": 1.9836, + "step": 28626 + }, + { + "epoch": 0.6361555555555556, + "grad_norm": 2.5465140342712402, + "learning_rate": 7.278506334741054e-05, + "loss": 2.0878, + "step": 28627 + }, + { + "epoch": 0.6361777777777777, + "grad_norm": 1.9380546808242798, + "learning_rate": 7.278061791509224e-05, + "loss": 2.1909, + "step": 28628 + }, + { + "epoch": 0.6362, + "grad_norm": 1.6133519411087036, + "learning_rate": 7.277617248277395e-05, + "loss": 1.9931, + "step": 28629 + }, + { + "epoch": 0.6362222222222222, + "grad_norm": 1.7483313083648682, + "learning_rate": 7.277172705045566e-05, + "loss": 1.6586, + "step": 28630 + }, + { + "epoch": 0.6362444444444444, + "grad_norm": 1.6443977355957031, + "learning_rate": 7.276728161813737e-05, + "loss": 1.9233, + "step": 28631 + }, + { + "epoch": 0.6362666666666666, + "grad_norm": 2.09979510307312, + "learning_rate": 7.276283618581908e-05, + "loss": 1.6895, + "step": 28632 + }, + { + "epoch": 0.6362888888888889, + "grad_norm": 1.843942642211914, + "learning_rate": 7.275839075350079e-05, + "loss": 1.7228, + "step": 28633 + }, + { + "epoch": 0.6363111111111112, + "grad_norm": 1.4837838411331177, + "learning_rate": 7.27539453211825e-05, + "loss": 1.6651, + "step": 28634 + }, + { + "epoch": 0.6363333333333333, + "grad_norm": 1.5741617679595947, + "learning_rate": 7.27494998888642e-05, + "loss": 1.6383, + "step": 28635 + }, + { + "epoch": 0.6363555555555556, + "grad_norm": 2.0141563415527344, + "learning_rate": 7.27450544565459e-05, + "loss": 2.3839, + "step": 28636 + }, + { + "epoch": 0.6363777777777778, + "grad_norm": 1.6650718450546265, + "learning_rate": 7.274060902422761e-05, + "loss": 1.8759, + "step": 28637 + }, + { + "epoch": 0.6364, + "grad_norm": 1.608781337738037, + "learning_rate": 7.273616359190931e-05, + "loss": 1.4356, + "step": 28638 + }, + { + "epoch": 0.6364222222222222, + "grad_norm": 1.8974065780639648, + "learning_rate": 7.273171815959102e-05, + "loss": 2.0662, + "step": 28639 + }, + { + "epoch": 0.6364444444444445, + "grad_norm": 1.67244291305542, + "learning_rate": 7.272727272727273e-05, + "loss": 1.7112, + "step": 28640 + }, + { + "epoch": 0.6364666666666666, + "grad_norm": 2.118295431137085, + "learning_rate": 7.272282729495444e-05, + "loss": 1.9977, + "step": 28641 + }, + { + "epoch": 0.6364888888888889, + "grad_norm": 2.3757729530334473, + "learning_rate": 7.271838186263615e-05, + "loss": 2.4056, + "step": 28642 + }, + { + "epoch": 0.6365111111111111, + "grad_norm": 1.6669895648956299, + "learning_rate": 7.271393643031786e-05, + "loss": 1.5257, + "step": 28643 + }, + { + "epoch": 0.6365333333333333, + "grad_norm": 1.4742716550827026, + "learning_rate": 7.270949099799957e-05, + "loss": 1.361, + "step": 28644 + }, + { + "epoch": 0.6365555555555555, + "grad_norm": 2.1252920627593994, + "learning_rate": 7.270504556568126e-05, + "loss": 1.7919, + "step": 28645 + }, + { + "epoch": 0.6365777777777778, + "grad_norm": 1.695860505104065, + "learning_rate": 7.270060013336297e-05, + "loss": 1.8634, + "step": 28646 + }, + { + "epoch": 0.6366, + "grad_norm": 2.3226397037506104, + "learning_rate": 7.269615470104468e-05, + "loss": 1.8964, + "step": 28647 + }, + { + "epoch": 0.6366222222222222, + "grad_norm": 1.852975606918335, + "learning_rate": 7.269170926872638e-05, + "loss": 1.5989, + "step": 28648 + }, + { + "epoch": 0.6366444444444445, + "grad_norm": 1.5640804767608643, + "learning_rate": 7.268726383640809e-05, + "loss": 1.5557, + "step": 28649 + }, + { + "epoch": 0.6366666666666667, + "grad_norm": 2.081127882003784, + "learning_rate": 7.26828184040898e-05, + "loss": 1.8513, + "step": 28650 + }, + { + "epoch": 0.6366888888888889, + "grad_norm": 1.040666103363037, + "learning_rate": 7.267837297177151e-05, + "loss": 0.0264, + "step": 28651 + }, + { + "epoch": 0.6367111111111111, + "grad_norm": 0.20972059667110443, + "learning_rate": 7.267392753945322e-05, + "loss": 0.0186, + "step": 28652 + }, + { + "epoch": 0.6367333333333334, + "grad_norm": 1.5281065702438354, + "learning_rate": 7.266948210713493e-05, + "loss": 1.9655, + "step": 28653 + }, + { + "epoch": 0.6367555555555555, + "grad_norm": 1.4604177474975586, + "learning_rate": 7.266503667481664e-05, + "loss": 1.9184, + "step": 28654 + }, + { + "epoch": 0.6367777777777778, + "grad_norm": 1.5252203941345215, + "learning_rate": 7.266059124249833e-05, + "loss": 1.9986, + "step": 28655 + }, + { + "epoch": 0.6368, + "grad_norm": 1.656562328338623, + "learning_rate": 7.265614581018004e-05, + "loss": 1.8098, + "step": 28656 + }, + { + "epoch": 0.6368222222222222, + "grad_norm": 1.582373857498169, + "learning_rate": 7.265170037786175e-05, + "loss": 1.4481, + "step": 28657 + }, + { + "epoch": 0.6368444444444444, + "grad_norm": 2.045361280441284, + "learning_rate": 7.264725494554345e-05, + "loss": 2.5536, + "step": 28658 + }, + { + "epoch": 0.6368666666666667, + "grad_norm": 1.7420368194580078, + "learning_rate": 7.264280951322516e-05, + "loss": 2.2029, + "step": 28659 + }, + { + "epoch": 0.6368888888888888, + "grad_norm": 1.4882012605667114, + "learning_rate": 7.263836408090688e-05, + "loss": 1.6128, + "step": 28660 + }, + { + "epoch": 0.6369111111111111, + "grad_norm": 1.2847424745559692, + "learning_rate": 7.263391864858859e-05, + "loss": 1.1251, + "step": 28661 + }, + { + "epoch": 0.6369333333333334, + "grad_norm": 1.6982362270355225, + "learning_rate": 7.262947321627029e-05, + "loss": 2.031, + "step": 28662 + }, + { + "epoch": 0.6369555555555556, + "grad_norm": 2.1967759132385254, + "learning_rate": 7.2625027783952e-05, + "loss": 2.1992, + "step": 28663 + }, + { + "epoch": 0.6369777777777778, + "grad_norm": 1.2060320377349854, + "learning_rate": 7.26205823516337e-05, + "loss": 0.92, + "step": 28664 + }, + { + "epoch": 0.637, + "grad_norm": 1.366573691368103, + "learning_rate": 7.26161369193154e-05, + "loss": 1.611, + "step": 28665 + }, + { + "epoch": 0.6370222222222223, + "grad_norm": 1.792948842048645, + "learning_rate": 7.261169148699711e-05, + "loss": 1.9003, + "step": 28666 + }, + { + "epoch": 0.6370444444444444, + "grad_norm": 1.3751440048217773, + "learning_rate": 7.260724605467882e-05, + "loss": 1.7083, + "step": 28667 + }, + { + "epoch": 0.6370666666666667, + "grad_norm": 1.6102418899536133, + "learning_rate": 7.260280062236052e-05, + "loss": 1.9591, + "step": 28668 + }, + { + "epoch": 0.6370888888888889, + "grad_norm": 2.0655267238616943, + "learning_rate": 7.259835519004224e-05, + "loss": 2.3119, + "step": 28669 + }, + { + "epoch": 0.6371111111111111, + "grad_norm": 1.8483701944351196, + "learning_rate": 7.259390975772395e-05, + "loss": 2.1266, + "step": 28670 + }, + { + "epoch": 0.6371333333333333, + "grad_norm": 1.8502991199493408, + "learning_rate": 7.258946432540566e-05, + "loss": 2.212, + "step": 28671 + }, + { + "epoch": 0.6371555555555556, + "grad_norm": 1.7456656694412231, + "learning_rate": 7.258501889308735e-05, + "loss": 1.7485, + "step": 28672 + }, + { + "epoch": 0.6371777777777777, + "grad_norm": 1.6352704763412476, + "learning_rate": 7.258057346076906e-05, + "loss": 1.7125, + "step": 28673 + }, + { + "epoch": 0.6372, + "grad_norm": 1.7602741718292236, + "learning_rate": 7.257612802845077e-05, + "loss": 2.0005, + "step": 28674 + }, + { + "epoch": 0.6372222222222222, + "grad_norm": 1.2574185132980347, + "learning_rate": 7.257168259613247e-05, + "loss": 1.2635, + "step": 28675 + }, + { + "epoch": 0.6372444444444444, + "grad_norm": 1.5207939147949219, + "learning_rate": 7.256723716381418e-05, + "loss": 1.473, + "step": 28676 + }, + { + "epoch": 0.6372666666666666, + "grad_norm": 1.6920651197433472, + "learning_rate": 7.256279173149589e-05, + "loss": 1.9603, + "step": 28677 + }, + { + "epoch": 0.6372888888888889, + "grad_norm": 1.7033255100250244, + "learning_rate": 7.25583462991776e-05, + "loss": 1.6898, + "step": 28678 + }, + { + "epoch": 0.6373111111111112, + "grad_norm": 1.9950025081634521, + "learning_rate": 7.255390086685931e-05, + "loss": 2.1322, + "step": 28679 + }, + { + "epoch": 0.6373333333333333, + "grad_norm": 1.4635943174362183, + "learning_rate": 7.254945543454102e-05, + "loss": 1.3099, + "step": 28680 + }, + { + "epoch": 0.6373555555555556, + "grad_norm": 1.6194703578948975, + "learning_rate": 7.254501000222273e-05, + "loss": 1.7211, + "step": 28681 + }, + { + "epoch": 0.6373777777777778, + "grad_norm": 1.7633742094039917, + "learning_rate": 7.254056456990442e-05, + "loss": 1.8104, + "step": 28682 + }, + { + "epoch": 0.6374, + "grad_norm": 1.9420922994613647, + "learning_rate": 7.253611913758613e-05, + "loss": 1.694, + "step": 28683 + }, + { + "epoch": 0.6374222222222222, + "grad_norm": 1.8654829263687134, + "learning_rate": 7.253167370526784e-05, + "loss": 1.8214, + "step": 28684 + }, + { + "epoch": 0.6374444444444445, + "grad_norm": 2.2684643268585205, + "learning_rate": 7.252722827294954e-05, + "loss": 2.2231, + "step": 28685 + }, + { + "epoch": 0.6374666666666666, + "grad_norm": 2.0050923824310303, + "learning_rate": 7.252278284063125e-05, + "loss": 2.0718, + "step": 28686 + }, + { + "epoch": 0.6374888888888889, + "grad_norm": 1.8627880811691284, + "learning_rate": 7.251833740831296e-05, + "loss": 1.8782, + "step": 28687 + }, + { + "epoch": 0.6375111111111111, + "grad_norm": 1.5710184574127197, + "learning_rate": 7.251389197599467e-05, + "loss": 1.5789, + "step": 28688 + }, + { + "epoch": 0.6375333333333333, + "grad_norm": 1.7555488348007202, + "learning_rate": 7.250944654367638e-05, + "loss": 1.8122, + "step": 28689 + }, + { + "epoch": 0.6375555555555555, + "grad_norm": 1.7349294424057007, + "learning_rate": 7.250500111135809e-05, + "loss": 1.8575, + "step": 28690 + }, + { + "epoch": 0.6375777777777778, + "grad_norm": 1.8430469036102295, + "learning_rate": 7.25005556790398e-05, + "loss": 1.6942, + "step": 28691 + }, + { + "epoch": 0.6376, + "grad_norm": 1.8953179121017456, + "learning_rate": 7.249611024672149e-05, + "loss": 1.504, + "step": 28692 + }, + { + "epoch": 0.6376222222222222, + "grad_norm": 1.7643966674804688, + "learning_rate": 7.24916648144032e-05, + "loss": 1.7468, + "step": 28693 + }, + { + "epoch": 0.6376444444444445, + "grad_norm": 1.9069668054580688, + "learning_rate": 7.248721938208491e-05, + "loss": 2.0379, + "step": 28694 + }, + { + "epoch": 0.6376666666666667, + "grad_norm": 1.7193044424057007, + "learning_rate": 7.248277394976661e-05, + "loss": 1.5639, + "step": 28695 + }, + { + "epoch": 0.6376888888888889, + "grad_norm": 1.7010250091552734, + "learning_rate": 7.247832851744832e-05, + "loss": 1.7881, + "step": 28696 + }, + { + "epoch": 0.6377111111111111, + "grad_norm": 1.6988699436187744, + "learning_rate": 7.247388308513004e-05, + "loss": 1.9145, + "step": 28697 + }, + { + "epoch": 0.6377333333333334, + "grad_norm": 1.434458613395691, + "learning_rate": 7.246943765281175e-05, + "loss": 1.2502, + "step": 28698 + }, + { + "epoch": 0.6377555555555555, + "grad_norm": 1.992437481880188, + "learning_rate": 7.246499222049345e-05, + "loss": 1.8448, + "step": 28699 + }, + { + "epoch": 0.6377777777777778, + "grad_norm": 1.5702959299087524, + "learning_rate": 7.246054678817516e-05, + "loss": 1.3579, + "step": 28700 + }, + { + "epoch": 0.6378, + "grad_norm": 1.6802828311920166, + "learning_rate": 7.245610135585687e-05, + "loss": 2.341, + "step": 28701 + }, + { + "epoch": 0.6378222222222222, + "grad_norm": 1.627329707145691, + "learning_rate": 7.245165592353856e-05, + "loss": 2.8105, + "step": 28702 + }, + { + "epoch": 0.6378444444444444, + "grad_norm": 1.342093586921692, + "learning_rate": 7.244721049122027e-05, + "loss": 1.9832, + "step": 28703 + }, + { + "epoch": 0.6378666666666667, + "grad_norm": 1.5474640130996704, + "learning_rate": 7.244276505890198e-05, + "loss": 2.4607, + "step": 28704 + }, + { + "epoch": 0.6378888888888888, + "grad_norm": 1.531585454940796, + "learning_rate": 7.243831962658368e-05, + "loss": 2.0354, + "step": 28705 + }, + { + "epoch": 0.6379111111111111, + "grad_norm": 1.5042400360107422, + "learning_rate": 7.24338741942654e-05, + "loss": 2.1718, + "step": 28706 + }, + { + "epoch": 0.6379333333333334, + "grad_norm": 1.3284921646118164, + "learning_rate": 7.242942876194711e-05, + "loss": 1.6979, + "step": 28707 + }, + { + "epoch": 0.6379555555555556, + "grad_norm": 1.4425671100616455, + "learning_rate": 7.242498332962882e-05, + "loss": 1.9872, + "step": 28708 + }, + { + "epoch": 0.6379777777777778, + "grad_norm": 1.7384395599365234, + "learning_rate": 7.242053789731052e-05, + "loss": 2.2414, + "step": 28709 + }, + { + "epoch": 0.638, + "grad_norm": 1.554915189743042, + "learning_rate": 7.241609246499223e-05, + "loss": 2.3454, + "step": 28710 + }, + { + "epoch": 0.6380222222222223, + "grad_norm": 1.8486429452896118, + "learning_rate": 7.241164703267394e-05, + "loss": 2.168, + "step": 28711 + }, + { + "epoch": 0.6380444444444444, + "grad_norm": 1.7188366651535034, + "learning_rate": 7.240720160035563e-05, + "loss": 2.25, + "step": 28712 + }, + { + "epoch": 0.6380666666666667, + "grad_norm": 1.8007780313491821, + "learning_rate": 7.240275616803734e-05, + "loss": 2.4567, + "step": 28713 + }, + { + "epoch": 0.6380888888888889, + "grad_norm": 1.6362093687057495, + "learning_rate": 7.239831073571905e-05, + "loss": 2.0876, + "step": 28714 + }, + { + "epoch": 0.6381111111111111, + "grad_norm": 1.621285319328308, + "learning_rate": 7.239386530340076e-05, + "loss": 2.183, + "step": 28715 + }, + { + "epoch": 0.6381333333333333, + "grad_norm": 1.888231635093689, + "learning_rate": 7.238941987108247e-05, + "loss": 2.0216, + "step": 28716 + }, + { + "epoch": 0.6381555555555556, + "grad_norm": 1.4312587976455688, + "learning_rate": 7.238497443876418e-05, + "loss": 1.6035, + "step": 28717 + }, + { + "epoch": 0.6381777777777777, + "grad_norm": 1.1737911701202393, + "learning_rate": 7.238052900644589e-05, + "loss": 0.6206, + "step": 28718 + }, + { + "epoch": 0.6382, + "grad_norm": 1.8380234241485596, + "learning_rate": 7.237608357412758e-05, + "loss": 2.3758, + "step": 28719 + }, + { + "epoch": 0.6382222222222222, + "grad_norm": 1.6413174867630005, + "learning_rate": 7.23716381418093e-05, + "loss": 1.7652, + "step": 28720 + }, + { + "epoch": 0.6382444444444444, + "grad_norm": 1.5186172723770142, + "learning_rate": 7.2367192709491e-05, + "loss": 1.9663, + "step": 28721 + }, + { + "epoch": 0.6382666666666666, + "grad_norm": 1.8024673461914062, + "learning_rate": 7.23627472771727e-05, + "loss": 2.0255, + "step": 28722 + }, + { + "epoch": 0.6382888888888889, + "grad_norm": 1.5912338495254517, + "learning_rate": 7.235830184485441e-05, + "loss": 1.7869, + "step": 28723 + }, + { + "epoch": 0.6383111111111112, + "grad_norm": 1.888502836227417, + "learning_rate": 7.235385641253612e-05, + "loss": 2.0113, + "step": 28724 + }, + { + "epoch": 0.6383333333333333, + "grad_norm": 1.4829522371292114, + "learning_rate": 7.234941098021783e-05, + "loss": 1.7279, + "step": 28725 + }, + { + "epoch": 0.6383555555555556, + "grad_norm": 1.761410117149353, + "learning_rate": 7.234496554789954e-05, + "loss": 1.6857, + "step": 28726 + }, + { + "epoch": 0.6383777777777778, + "grad_norm": 1.7559994459152222, + "learning_rate": 7.234052011558125e-05, + "loss": 2.2451, + "step": 28727 + }, + { + "epoch": 0.6384, + "grad_norm": 1.6013765335083008, + "learning_rate": 7.233607468326296e-05, + "loss": 1.6825, + "step": 28728 + }, + { + "epoch": 0.6384222222222222, + "grad_norm": 1.9847111701965332, + "learning_rate": 7.233162925094465e-05, + "loss": 2.1672, + "step": 28729 + }, + { + "epoch": 0.6384444444444445, + "grad_norm": 1.9318690299987793, + "learning_rate": 7.232718381862636e-05, + "loss": 1.7432, + "step": 28730 + }, + { + "epoch": 0.6384666666666666, + "grad_norm": 1.307256817817688, + "learning_rate": 7.232273838630807e-05, + "loss": 0.9803, + "step": 28731 + }, + { + "epoch": 0.6384888888888889, + "grad_norm": 1.7495099306106567, + "learning_rate": 7.231829295398977e-05, + "loss": 1.606, + "step": 28732 + }, + { + "epoch": 0.6385111111111111, + "grad_norm": 1.1634020805358887, + "learning_rate": 7.231384752167148e-05, + "loss": 0.9347, + "step": 28733 + }, + { + "epoch": 0.6385333333333333, + "grad_norm": 2.0177419185638428, + "learning_rate": 7.23094020893532e-05, + "loss": 2.1003, + "step": 28734 + }, + { + "epoch": 0.6385555555555555, + "grad_norm": 1.7657959461212158, + "learning_rate": 7.23049566570349e-05, + "loss": 1.7731, + "step": 28735 + }, + { + "epoch": 0.6385777777777778, + "grad_norm": 1.8546388149261475, + "learning_rate": 7.230051122471661e-05, + "loss": 1.5486, + "step": 28736 + }, + { + "epoch": 0.6386, + "grad_norm": 1.8040077686309814, + "learning_rate": 7.229606579239832e-05, + "loss": 2.0225, + "step": 28737 + }, + { + "epoch": 0.6386222222222222, + "grad_norm": 1.7609293460845947, + "learning_rate": 7.229162036008003e-05, + "loss": 2.2548, + "step": 28738 + }, + { + "epoch": 0.6386444444444445, + "grad_norm": 2.3687591552734375, + "learning_rate": 7.228717492776172e-05, + "loss": 2.1545, + "step": 28739 + }, + { + "epoch": 0.6386666666666667, + "grad_norm": 3.026139259338379, + "learning_rate": 7.228272949544343e-05, + "loss": 2.0457, + "step": 28740 + }, + { + "epoch": 0.6386888888888889, + "grad_norm": 2.389371395111084, + "learning_rate": 7.227828406312514e-05, + "loss": 2.1006, + "step": 28741 + }, + { + "epoch": 0.6387111111111111, + "grad_norm": 1.8829636573791504, + "learning_rate": 7.227383863080684e-05, + "loss": 2.157, + "step": 28742 + }, + { + "epoch": 0.6387333333333334, + "grad_norm": 1.4691171646118164, + "learning_rate": 7.226939319848856e-05, + "loss": 1.1827, + "step": 28743 + }, + { + "epoch": 0.6387555555555555, + "grad_norm": 1.8725281953811646, + "learning_rate": 7.226494776617027e-05, + "loss": 2.0211, + "step": 28744 + }, + { + "epoch": 0.6387777777777778, + "grad_norm": 2.3019354343414307, + "learning_rate": 7.226050233385198e-05, + "loss": 1.9218, + "step": 28745 + }, + { + "epoch": 0.6388, + "grad_norm": 2.649057626724243, + "learning_rate": 7.225605690153368e-05, + "loss": 2.4973, + "step": 28746 + }, + { + "epoch": 0.6388222222222222, + "grad_norm": 2.0913870334625244, + "learning_rate": 7.225161146921539e-05, + "loss": 1.7488, + "step": 28747 + }, + { + "epoch": 0.6388444444444444, + "grad_norm": 1.7658172845840454, + "learning_rate": 7.22471660368971e-05, + "loss": 1.3801, + "step": 28748 + }, + { + "epoch": 0.6388666666666667, + "grad_norm": 1.7374156713485718, + "learning_rate": 7.224272060457879e-05, + "loss": 1.6473, + "step": 28749 + }, + { + "epoch": 0.6388888888888888, + "grad_norm": 1.4828306436538696, + "learning_rate": 7.22382751722605e-05, + "loss": 0.9041, + "step": 28750 + }, + { + "epoch": 0.6389111111111111, + "grad_norm": 1.3354045152664185, + "learning_rate": 7.223382973994221e-05, + "loss": 2.0015, + "step": 28751 + }, + { + "epoch": 0.6389333333333334, + "grad_norm": 2.6666393280029297, + "learning_rate": 7.222938430762392e-05, + "loss": 0.0448, + "step": 28752 + }, + { + "epoch": 0.6389555555555556, + "grad_norm": 1.3696006536483765, + "learning_rate": 7.222493887530563e-05, + "loss": 1.8281, + "step": 28753 + }, + { + "epoch": 0.6389777777777778, + "grad_norm": 1.4097464084625244, + "learning_rate": 7.222049344298734e-05, + "loss": 1.2002, + "step": 28754 + }, + { + "epoch": 0.639, + "grad_norm": 1.7855876684188843, + "learning_rate": 7.221604801066905e-05, + "loss": 2.0857, + "step": 28755 + }, + { + "epoch": 0.6390222222222223, + "grad_norm": 1.5594568252563477, + "learning_rate": 7.221160257835075e-05, + "loss": 1.6816, + "step": 28756 + }, + { + "epoch": 0.6390444444444444, + "grad_norm": 1.421626091003418, + "learning_rate": 7.220715714603246e-05, + "loss": 1.7824, + "step": 28757 + }, + { + "epoch": 0.6390666666666667, + "grad_norm": 2.0007307529449463, + "learning_rate": 7.220271171371417e-05, + "loss": 2.2619, + "step": 28758 + }, + { + "epoch": 0.6390888888888889, + "grad_norm": 1.9237414598464966, + "learning_rate": 7.219826628139586e-05, + "loss": 2.0441, + "step": 28759 + }, + { + "epoch": 0.6391111111111111, + "grad_norm": 1.8039405345916748, + "learning_rate": 7.219382084907757e-05, + "loss": 1.708, + "step": 28760 + }, + { + "epoch": 0.6391333333333333, + "grad_norm": 1.5210480690002441, + "learning_rate": 7.218937541675928e-05, + "loss": 2.385, + "step": 28761 + }, + { + "epoch": 0.6391555555555556, + "grad_norm": 1.5081499814987183, + "learning_rate": 7.218492998444099e-05, + "loss": 1.2788, + "step": 28762 + }, + { + "epoch": 0.6391777777777777, + "grad_norm": 1.9141021966934204, + "learning_rate": 7.21804845521227e-05, + "loss": 1.3567, + "step": 28763 + }, + { + "epoch": 0.6392, + "grad_norm": 3.3286590576171875, + "learning_rate": 7.217603911980441e-05, + "loss": 1.9065, + "step": 28764 + }, + { + "epoch": 0.6392222222222222, + "grad_norm": 0.9982495307922363, + "learning_rate": 7.217159368748612e-05, + "loss": 0.7088, + "step": 28765 + }, + { + "epoch": 0.6392444444444444, + "grad_norm": 1.485310673713684, + "learning_rate": 7.216714825516782e-05, + "loss": 1.1706, + "step": 28766 + }, + { + "epoch": 0.6392666666666666, + "grad_norm": 1.6460174322128296, + "learning_rate": 7.216270282284952e-05, + "loss": 1.7584, + "step": 28767 + }, + { + "epoch": 0.6392888888888889, + "grad_norm": 1.8000679016113281, + "learning_rate": 7.215825739053123e-05, + "loss": 1.9528, + "step": 28768 + }, + { + "epoch": 0.6393111111111112, + "grad_norm": 1.925246000289917, + "learning_rate": 7.215381195821293e-05, + "loss": 2.0873, + "step": 28769 + }, + { + "epoch": 0.6393333333333333, + "grad_norm": 1.6026747226715088, + "learning_rate": 7.214936652589464e-05, + "loss": 1.6128, + "step": 28770 + }, + { + "epoch": 0.6393555555555556, + "grad_norm": 2.411868095397949, + "learning_rate": 7.214492109357636e-05, + "loss": 2.344, + "step": 28771 + }, + { + "epoch": 0.6393777777777778, + "grad_norm": 1.7101866006851196, + "learning_rate": 7.214047566125806e-05, + "loss": 1.7117, + "step": 28772 + }, + { + "epoch": 0.6394, + "grad_norm": 1.8014975786209106, + "learning_rate": 7.213603022893977e-05, + "loss": 1.6977, + "step": 28773 + }, + { + "epoch": 0.6394222222222222, + "grad_norm": 1.1885337829589844, + "learning_rate": 7.213158479662148e-05, + "loss": 1.0145, + "step": 28774 + }, + { + "epoch": 0.6394444444444445, + "grad_norm": 1.8605204820632935, + "learning_rate": 7.212713936430319e-05, + "loss": 2.1966, + "step": 28775 + }, + { + "epoch": 0.6394666666666666, + "grad_norm": 1.8864420652389526, + "learning_rate": 7.212269393198488e-05, + "loss": 1.8072, + "step": 28776 + }, + { + "epoch": 0.6394888888888889, + "grad_norm": 1.7543187141418457, + "learning_rate": 7.21182484996666e-05, + "loss": 1.6595, + "step": 28777 + }, + { + "epoch": 0.6395111111111111, + "grad_norm": 1.7286779880523682, + "learning_rate": 7.21138030673483e-05, + "loss": 1.7274, + "step": 28778 + }, + { + "epoch": 0.6395333333333333, + "grad_norm": 1.0491361618041992, + "learning_rate": 7.210935763503e-05, + "loss": 0.7481, + "step": 28779 + }, + { + "epoch": 0.6395555555555555, + "grad_norm": 1.604767084121704, + "learning_rate": 7.210491220271172e-05, + "loss": 1.6491, + "step": 28780 + }, + { + "epoch": 0.6395777777777778, + "grad_norm": 1.9371610879898071, + "learning_rate": 7.210046677039343e-05, + "loss": 1.691, + "step": 28781 + }, + { + "epoch": 0.6396, + "grad_norm": 1.6699118614196777, + "learning_rate": 7.209602133807513e-05, + "loss": 1.7054, + "step": 28782 + }, + { + "epoch": 0.6396222222222222, + "grad_norm": 2.032633066177368, + "learning_rate": 7.209157590575684e-05, + "loss": 1.6071, + "step": 28783 + }, + { + "epoch": 0.6396444444444445, + "grad_norm": 2.003582239151001, + "learning_rate": 7.208713047343855e-05, + "loss": 2.2553, + "step": 28784 + }, + { + "epoch": 0.6396666666666667, + "grad_norm": 1.4603201150894165, + "learning_rate": 7.208268504112026e-05, + "loss": 1.6134, + "step": 28785 + }, + { + "epoch": 0.6396888888888889, + "grad_norm": 2.1268177032470703, + "learning_rate": 7.207823960880195e-05, + "loss": 1.8333, + "step": 28786 + }, + { + "epoch": 0.6397111111111111, + "grad_norm": 2.0532491207122803, + "learning_rate": 7.207379417648366e-05, + "loss": 1.9751, + "step": 28787 + }, + { + "epoch": 0.6397333333333334, + "grad_norm": 2.148063898086548, + "learning_rate": 7.206934874416537e-05, + "loss": 2.6461, + "step": 28788 + }, + { + "epoch": 0.6397555555555555, + "grad_norm": 2.0925416946411133, + "learning_rate": 7.206490331184708e-05, + "loss": 1.6654, + "step": 28789 + }, + { + "epoch": 0.6397777777777778, + "grad_norm": 1.6371979713439941, + "learning_rate": 7.206045787952879e-05, + "loss": 1.7088, + "step": 28790 + }, + { + "epoch": 0.6398, + "grad_norm": 2.2275896072387695, + "learning_rate": 7.20560124472105e-05, + "loss": 2.0222, + "step": 28791 + }, + { + "epoch": 0.6398222222222222, + "grad_norm": 1.7818533182144165, + "learning_rate": 7.20515670148922e-05, + "loss": 1.7862, + "step": 28792 + }, + { + "epoch": 0.6398444444444444, + "grad_norm": 1.8481240272521973, + "learning_rate": 7.204712158257391e-05, + "loss": 1.4648, + "step": 28793 + }, + { + "epoch": 0.6398666666666667, + "grad_norm": 1.8847888708114624, + "learning_rate": 7.204267615025562e-05, + "loss": 2.2493, + "step": 28794 + }, + { + "epoch": 0.6398888888888888, + "grad_norm": 2.177549362182617, + "learning_rate": 7.203823071793733e-05, + "loss": 2.3858, + "step": 28795 + }, + { + "epoch": 0.6399111111111111, + "grad_norm": 1.6145027875900269, + "learning_rate": 7.203378528561902e-05, + "loss": 1.475, + "step": 28796 + }, + { + "epoch": 0.6399333333333334, + "grad_norm": 1.8633002042770386, + "learning_rate": 7.202933985330073e-05, + "loss": 2.1072, + "step": 28797 + }, + { + "epoch": 0.6399555555555556, + "grad_norm": 1.697845220565796, + "learning_rate": 7.202489442098244e-05, + "loss": 1.5116, + "step": 28798 + }, + { + "epoch": 0.6399777777777778, + "grad_norm": 1.8304460048675537, + "learning_rate": 7.202044898866415e-05, + "loss": 2.1342, + "step": 28799 + }, + { + "epoch": 0.64, + "grad_norm": 1.6719493865966797, + "learning_rate": 7.201600355634586e-05, + "loss": 1.5713, + "step": 28800 + }, + { + "epoch": 0.6400222222222223, + "grad_norm": 1.6436892747879028, + "learning_rate": 7.201155812402757e-05, + "loss": 2.2734, + "step": 28801 + }, + { + "epoch": 0.6400444444444444, + "grad_norm": 0.18774180114269257, + "learning_rate": 7.200711269170928e-05, + "loss": 0.0162, + "step": 28802 + }, + { + "epoch": 0.6400666666666667, + "grad_norm": 1.8824647665023804, + "learning_rate": 7.200266725939098e-05, + "loss": 2.5967, + "step": 28803 + }, + { + "epoch": 0.6400888888888889, + "grad_norm": 1.5691263675689697, + "learning_rate": 7.199822182707269e-05, + "loss": 2.3886, + "step": 28804 + }, + { + "epoch": 0.6401111111111111, + "grad_norm": 1.7049403190612793, + "learning_rate": 7.19937763947544e-05, + "loss": 2.1433, + "step": 28805 + }, + { + "epoch": 0.6401333333333333, + "grad_norm": 1.461273193359375, + "learning_rate": 7.198933096243609e-05, + "loss": 1.9639, + "step": 28806 + }, + { + "epoch": 0.6401555555555556, + "grad_norm": 1.6055644750595093, + "learning_rate": 7.19848855301178e-05, + "loss": 2.2601, + "step": 28807 + }, + { + "epoch": 0.6401777777777777, + "grad_norm": 1.67502760887146, + "learning_rate": 7.198044009779952e-05, + "loss": 2.0551, + "step": 28808 + }, + { + "epoch": 0.6402, + "grad_norm": 1.8794890642166138, + "learning_rate": 7.197599466548122e-05, + "loss": 2.1515, + "step": 28809 + }, + { + "epoch": 0.6402222222222222, + "grad_norm": 1.5921684503555298, + "learning_rate": 7.197154923316293e-05, + "loss": 2.0577, + "step": 28810 + }, + { + "epoch": 0.6402444444444444, + "grad_norm": 1.6569201946258545, + "learning_rate": 7.196710380084464e-05, + "loss": 1.8993, + "step": 28811 + }, + { + "epoch": 0.6402666666666667, + "grad_norm": 1.6281111240386963, + "learning_rate": 7.196265836852635e-05, + "loss": 2.0419, + "step": 28812 + }, + { + "epoch": 0.6402888888888889, + "grad_norm": 1.9701122045516968, + "learning_rate": 7.195821293620805e-05, + "loss": 2.1372, + "step": 28813 + }, + { + "epoch": 0.6403111111111112, + "grad_norm": 1.0676236152648926, + "learning_rate": 7.195376750388975e-05, + "loss": 1.1958, + "step": 28814 + }, + { + "epoch": 0.6403333333333333, + "grad_norm": 1.5588091611862183, + "learning_rate": 7.194932207157146e-05, + "loss": 1.4684, + "step": 28815 + }, + { + "epoch": 0.6403555555555556, + "grad_norm": 1.7817189693450928, + "learning_rate": 7.194487663925316e-05, + "loss": 2.1338, + "step": 28816 + }, + { + "epoch": 0.6403777777777778, + "grad_norm": 1.8440243005752563, + "learning_rate": 7.194043120693488e-05, + "loss": 1.8603, + "step": 28817 + }, + { + "epoch": 0.6404, + "grad_norm": 1.5460695028305054, + "learning_rate": 7.19359857746166e-05, + "loss": 1.6515, + "step": 28818 + }, + { + "epoch": 0.6404222222222222, + "grad_norm": 1.8269490003585815, + "learning_rate": 7.193154034229829e-05, + "loss": 2.3875, + "step": 28819 + }, + { + "epoch": 0.6404444444444445, + "grad_norm": 1.5759791135787964, + "learning_rate": 7.192709490998e-05, + "loss": 1.9152, + "step": 28820 + }, + { + "epoch": 0.6404666666666666, + "grad_norm": 1.8602632284164429, + "learning_rate": 7.192264947766171e-05, + "loss": 2.1399, + "step": 28821 + }, + { + "epoch": 0.6404888888888889, + "grad_norm": 1.6041455268859863, + "learning_rate": 7.191820404534342e-05, + "loss": 1.9356, + "step": 28822 + }, + { + "epoch": 0.6405111111111111, + "grad_norm": 1.8999677896499634, + "learning_rate": 7.191375861302511e-05, + "loss": 2.1473, + "step": 28823 + }, + { + "epoch": 0.6405333333333333, + "grad_norm": 1.381639838218689, + "learning_rate": 7.190931318070682e-05, + "loss": 1.7347, + "step": 28824 + }, + { + "epoch": 0.6405555555555555, + "grad_norm": 2.126990795135498, + "learning_rate": 7.190486774838853e-05, + "loss": 2.4627, + "step": 28825 + }, + { + "epoch": 0.6405777777777778, + "grad_norm": 1.8442226648330688, + "learning_rate": 7.190042231607024e-05, + "loss": 1.6449, + "step": 28826 + }, + { + "epoch": 0.6406, + "grad_norm": 1.4026455879211426, + "learning_rate": 7.189597688375195e-05, + "loss": 1.5099, + "step": 28827 + }, + { + "epoch": 0.6406222222222222, + "grad_norm": 2.226283550262451, + "learning_rate": 7.189153145143366e-05, + "loss": 1.5443, + "step": 28828 + }, + { + "epoch": 0.6406444444444445, + "grad_norm": 1.6985044479370117, + "learning_rate": 7.188708601911536e-05, + "loss": 1.6088, + "step": 28829 + }, + { + "epoch": 0.6406666666666667, + "grad_norm": 1.2343090772628784, + "learning_rate": 7.188264058679707e-05, + "loss": 0.8093, + "step": 28830 + }, + { + "epoch": 0.6406888888888889, + "grad_norm": 0.5151288509368896, + "learning_rate": 7.187819515447878e-05, + "loss": 0.0383, + "step": 28831 + }, + { + "epoch": 0.6407111111111111, + "grad_norm": 1.7729060649871826, + "learning_rate": 7.187374972216049e-05, + "loss": 1.5273, + "step": 28832 + }, + { + "epoch": 0.6407333333333334, + "grad_norm": 2.00065541267395, + "learning_rate": 7.186930428984218e-05, + "loss": 2.1591, + "step": 28833 + }, + { + "epoch": 0.6407555555555555, + "grad_norm": 1.6418817043304443, + "learning_rate": 7.186485885752389e-05, + "loss": 2.2355, + "step": 28834 + }, + { + "epoch": 0.6407777777777778, + "grad_norm": 1.126987099647522, + "learning_rate": 7.18604134252056e-05, + "loss": 0.65, + "step": 28835 + }, + { + "epoch": 0.6408, + "grad_norm": 1.324263572692871, + "learning_rate": 7.185596799288731e-05, + "loss": 1.1635, + "step": 28836 + }, + { + "epoch": 0.6408222222222222, + "grad_norm": 1.718005895614624, + "learning_rate": 7.185152256056902e-05, + "loss": 1.6372, + "step": 28837 + }, + { + "epoch": 0.6408444444444444, + "grad_norm": 1.81002676486969, + "learning_rate": 7.184707712825073e-05, + "loss": 2.1625, + "step": 28838 + }, + { + "epoch": 0.6408666666666667, + "grad_norm": 1.6128833293914795, + "learning_rate": 7.184263169593243e-05, + "loss": 1.8423, + "step": 28839 + }, + { + "epoch": 0.6408888888888888, + "grad_norm": 1.8957856893539429, + "learning_rate": 7.183818626361414e-05, + "loss": 1.791, + "step": 28840 + }, + { + "epoch": 0.6409111111111111, + "grad_norm": 1.8511956930160522, + "learning_rate": 7.183374083129585e-05, + "loss": 1.7306, + "step": 28841 + }, + { + "epoch": 0.6409333333333334, + "grad_norm": 1.8138313293457031, + "learning_rate": 7.182929539897756e-05, + "loss": 1.9937, + "step": 28842 + }, + { + "epoch": 0.6409555555555555, + "grad_norm": 2.3374853134155273, + "learning_rate": 7.182484996665925e-05, + "loss": 2.0165, + "step": 28843 + }, + { + "epoch": 0.6409777777777778, + "grad_norm": 1.9973797798156738, + "learning_rate": 7.182040453434096e-05, + "loss": 2.0304, + "step": 28844 + }, + { + "epoch": 0.641, + "grad_norm": 1.9620558023452759, + "learning_rate": 7.181595910202269e-05, + "loss": 1.7683, + "step": 28845 + }, + { + "epoch": 0.6410222222222223, + "grad_norm": 1.861804723739624, + "learning_rate": 7.181151366970438e-05, + "loss": 1.8745, + "step": 28846 + }, + { + "epoch": 0.6410444444444444, + "grad_norm": 1.9343196153640747, + "learning_rate": 7.180706823738609e-05, + "loss": 2.1023, + "step": 28847 + }, + { + "epoch": 0.6410666666666667, + "grad_norm": 1.6749043464660645, + "learning_rate": 7.18026228050678e-05, + "loss": 1.4079, + "step": 28848 + }, + { + "epoch": 0.6410888888888889, + "grad_norm": 1.8276622295379639, + "learning_rate": 7.179817737274951e-05, + "loss": 1.7603, + "step": 28849 + }, + { + "epoch": 0.6411111111111111, + "grad_norm": 2.2006306648254395, + "learning_rate": 7.17937319404312e-05, + "loss": 1.4094, + "step": 28850 + }, + { + "epoch": 0.6411333333333333, + "grad_norm": 1.417955756187439, + "learning_rate": 7.178928650811292e-05, + "loss": 1.0786, + "step": 28851 + }, + { + "epoch": 0.6411555555555556, + "grad_norm": 1.5691580772399902, + "learning_rate": 7.178484107579463e-05, + "loss": 0.7504, + "step": 28852 + }, + { + "epoch": 0.6411777777777777, + "grad_norm": 1.7956329584121704, + "learning_rate": 7.178039564347632e-05, + "loss": 2.7077, + "step": 28853 + }, + { + "epoch": 0.6412, + "grad_norm": 0.820685088634491, + "learning_rate": 7.177595021115804e-05, + "loss": 0.9948, + "step": 28854 + }, + { + "epoch": 0.6412222222222222, + "grad_norm": 1.52669358253479, + "learning_rate": 7.177150477883975e-05, + "loss": 2.2596, + "step": 28855 + }, + { + "epoch": 0.6412444444444444, + "grad_norm": 1.7392785549163818, + "learning_rate": 7.176705934652145e-05, + "loss": 2.2223, + "step": 28856 + }, + { + "epoch": 0.6412666666666667, + "grad_norm": 1.745495319366455, + "learning_rate": 7.176261391420316e-05, + "loss": 0.0464, + "step": 28857 + }, + { + "epoch": 0.6412888888888889, + "grad_norm": 1.4235334396362305, + "learning_rate": 7.175816848188487e-05, + "loss": 2.0477, + "step": 28858 + }, + { + "epoch": 0.6413111111111112, + "grad_norm": 1.6636027097702026, + "learning_rate": 7.175372304956658e-05, + "loss": 2.2592, + "step": 28859 + }, + { + "epoch": 0.6413333333333333, + "grad_norm": 1.500736951828003, + "learning_rate": 7.174927761724828e-05, + "loss": 1.5237, + "step": 28860 + }, + { + "epoch": 0.6413555555555556, + "grad_norm": 1.7351739406585693, + "learning_rate": 7.174483218492998e-05, + "loss": 1.9768, + "step": 28861 + }, + { + "epoch": 0.6413777777777778, + "grad_norm": 1.5094364881515503, + "learning_rate": 7.17403867526117e-05, + "loss": 2.0292, + "step": 28862 + }, + { + "epoch": 0.6414, + "grad_norm": 1.594172716140747, + "learning_rate": 7.17359413202934e-05, + "loss": 2.2337, + "step": 28863 + }, + { + "epoch": 0.6414222222222222, + "grad_norm": 2.1875925064086914, + "learning_rate": 7.173149588797511e-05, + "loss": 2.3735, + "step": 28864 + }, + { + "epoch": 0.6414444444444445, + "grad_norm": 1.6282321214675903, + "learning_rate": 7.172705045565682e-05, + "loss": 2.1575, + "step": 28865 + }, + { + "epoch": 0.6414666666666666, + "grad_norm": 1.6158971786499023, + "learning_rate": 7.172260502333852e-05, + "loss": 1.9549, + "step": 28866 + }, + { + "epoch": 0.6414888888888889, + "grad_norm": 1.8376556634902954, + "learning_rate": 7.171815959102023e-05, + "loss": 1.9984, + "step": 28867 + }, + { + "epoch": 0.6415111111111111, + "grad_norm": 1.7575677633285522, + "learning_rate": 7.171371415870194e-05, + "loss": 2.0017, + "step": 28868 + }, + { + "epoch": 0.6415333333333333, + "grad_norm": 1.7002308368682861, + "learning_rate": 7.170926872638365e-05, + "loss": 1.9536, + "step": 28869 + }, + { + "epoch": 0.6415555555555555, + "grad_norm": 1.6438381671905518, + "learning_rate": 7.170482329406534e-05, + "loss": 1.4856, + "step": 28870 + }, + { + "epoch": 0.6415777777777778, + "grad_norm": 1.813004493713379, + "learning_rate": 7.170037786174705e-05, + "loss": 1.8227, + "step": 28871 + }, + { + "epoch": 0.6416, + "grad_norm": 1.6936862468719482, + "learning_rate": 7.169593242942876e-05, + "loss": 2.3135, + "step": 28872 + }, + { + "epoch": 0.6416222222222222, + "grad_norm": 1.5419673919677734, + "learning_rate": 7.169148699711047e-05, + "loss": 1.9524, + "step": 28873 + }, + { + "epoch": 0.6416444444444445, + "grad_norm": 1.786665916442871, + "learning_rate": 7.168704156479218e-05, + "loss": 2.0306, + "step": 28874 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 1.6579208374023438, + "learning_rate": 7.168259613247389e-05, + "loss": 1.9428, + "step": 28875 + }, + { + "epoch": 0.6416888888888889, + "grad_norm": 1.7095891237258911, + "learning_rate": 7.167815070015559e-05, + "loss": 1.9264, + "step": 28876 + }, + { + "epoch": 0.6417111111111111, + "grad_norm": 1.598313570022583, + "learning_rate": 7.16737052678373e-05, + "loss": 1.7624, + "step": 28877 + }, + { + "epoch": 0.6417333333333334, + "grad_norm": 1.8565610647201538, + "learning_rate": 7.166925983551901e-05, + "loss": 1.9579, + "step": 28878 + }, + { + "epoch": 0.6417555555555555, + "grad_norm": 1.7740898132324219, + "learning_rate": 7.166481440320072e-05, + "loss": 1.817, + "step": 28879 + }, + { + "epoch": 0.6417777777777778, + "grad_norm": 1.8127964735031128, + "learning_rate": 7.166036897088241e-05, + "loss": 2.2289, + "step": 28880 + }, + { + "epoch": 0.6418, + "grad_norm": 1.7820124626159668, + "learning_rate": 7.165592353856412e-05, + "loss": 1.7684, + "step": 28881 + }, + { + "epoch": 0.6418222222222222, + "grad_norm": 1.5686150789260864, + "learning_rate": 7.165147810624585e-05, + "loss": 2.0081, + "step": 28882 + }, + { + "epoch": 0.6418444444444444, + "grad_norm": 1.3537559509277344, + "learning_rate": 7.164703267392754e-05, + "loss": 1.4353, + "step": 28883 + }, + { + "epoch": 0.6418666666666667, + "grad_norm": 1.7080589532852173, + "learning_rate": 7.164258724160925e-05, + "loss": 1.9397, + "step": 28884 + }, + { + "epoch": 0.6418888888888888, + "grad_norm": 1.935981035232544, + "learning_rate": 7.163814180929096e-05, + "loss": 1.7046, + "step": 28885 + }, + { + "epoch": 0.6419111111111111, + "grad_norm": 1.734411597251892, + "learning_rate": 7.163369637697266e-05, + "loss": 2.0956, + "step": 28886 + }, + { + "epoch": 0.6419333333333334, + "grad_norm": 1.9430949687957764, + "learning_rate": 7.162925094465437e-05, + "loss": 1.619, + "step": 28887 + }, + { + "epoch": 0.6419555555555555, + "grad_norm": 1.137198805809021, + "learning_rate": 7.162480551233608e-05, + "loss": 0.6831, + "step": 28888 + }, + { + "epoch": 0.6419777777777778, + "grad_norm": 1.6652976274490356, + "learning_rate": 7.162036008001779e-05, + "loss": 1.8329, + "step": 28889 + }, + { + "epoch": 0.642, + "grad_norm": 1.7454249858856201, + "learning_rate": 7.161591464769948e-05, + "loss": 1.8221, + "step": 28890 + }, + { + "epoch": 0.6420222222222223, + "grad_norm": 1.548720359802246, + "learning_rate": 7.16114692153812e-05, + "loss": 1.8067, + "step": 28891 + }, + { + "epoch": 0.6420444444444444, + "grad_norm": 1.8045750856399536, + "learning_rate": 7.160702378306292e-05, + "loss": 1.3626, + "step": 28892 + }, + { + "epoch": 0.6420666666666667, + "grad_norm": 1.8985451459884644, + "learning_rate": 7.160257835074461e-05, + "loss": 1.9183, + "step": 28893 + }, + { + "epoch": 0.6420888888888889, + "grad_norm": 1.6870826482772827, + "learning_rate": 7.159813291842632e-05, + "loss": 1.8397, + "step": 28894 + }, + { + "epoch": 0.6421111111111111, + "grad_norm": 1.941114902496338, + "learning_rate": 7.159368748610803e-05, + "loss": 2.1521, + "step": 28895 + }, + { + "epoch": 0.6421333333333333, + "grad_norm": 1.5407544374465942, + "learning_rate": 7.158924205378973e-05, + "loss": 1.6867, + "step": 28896 + }, + { + "epoch": 0.6421555555555556, + "grad_norm": 1.2025772333145142, + "learning_rate": 7.158479662147144e-05, + "loss": 0.8019, + "step": 28897 + }, + { + "epoch": 0.6421777777777777, + "grad_norm": 1.5493876934051514, + "learning_rate": 7.158035118915315e-05, + "loss": 1.5128, + "step": 28898 + }, + { + "epoch": 0.6422, + "grad_norm": 1.7517184019088745, + "learning_rate": 7.157590575683486e-05, + "loss": 1.6096, + "step": 28899 + }, + { + "epoch": 0.6422222222222222, + "grad_norm": 1.934634804725647, + "learning_rate": 7.157146032451657e-05, + "loss": 1.3253, + "step": 28900 + }, + { + "epoch": 0.6422444444444444, + "grad_norm": 1.406434416770935, + "learning_rate": 7.156701489219827e-05, + "loss": 1.9834, + "step": 28901 + }, + { + "epoch": 0.6422666666666667, + "grad_norm": 1.0627119541168213, + "learning_rate": 7.156256945987998e-05, + "loss": 1.5022, + "step": 28902 + }, + { + "epoch": 0.6422888888888889, + "grad_norm": 1.4767934083938599, + "learning_rate": 7.155812402756168e-05, + "loss": 2.177, + "step": 28903 + }, + { + "epoch": 0.6423111111111112, + "grad_norm": 1.4371474981307983, + "learning_rate": 7.155367859524339e-05, + "loss": 2.339, + "step": 28904 + }, + { + "epoch": 0.6423333333333333, + "grad_norm": 1.7908233404159546, + "learning_rate": 7.15492331629251e-05, + "loss": 2.4955, + "step": 28905 + }, + { + "epoch": 0.6423555555555556, + "grad_norm": 1.7578809261322021, + "learning_rate": 7.154478773060681e-05, + "loss": 2.1955, + "step": 28906 + }, + { + "epoch": 0.6423777777777778, + "grad_norm": 1.7062569856643677, + "learning_rate": 7.15403422982885e-05, + "loss": 2.7585, + "step": 28907 + }, + { + "epoch": 0.6424, + "grad_norm": 1.4424821138381958, + "learning_rate": 7.153589686597021e-05, + "loss": 2.0638, + "step": 28908 + }, + { + "epoch": 0.6424222222222222, + "grad_norm": 1.797548770904541, + "learning_rate": 7.153145143365192e-05, + "loss": 2.3194, + "step": 28909 + }, + { + "epoch": 0.6424444444444445, + "grad_norm": 2.008291721343994, + "learning_rate": 7.152700600133363e-05, + "loss": 2.2287, + "step": 28910 + }, + { + "epoch": 0.6424666666666666, + "grad_norm": 1.5889003276824951, + "learning_rate": 7.152256056901534e-05, + "loss": 1.4463, + "step": 28911 + }, + { + "epoch": 0.6424888888888889, + "grad_norm": 1.6907141208648682, + "learning_rate": 7.151811513669705e-05, + "loss": 1.9515, + "step": 28912 + }, + { + "epoch": 0.6425111111111111, + "grad_norm": 1.3028802871704102, + "learning_rate": 7.151366970437875e-05, + "loss": 1.6399, + "step": 28913 + }, + { + "epoch": 0.6425333333333333, + "grad_norm": 1.5053068399429321, + "learning_rate": 7.150922427206046e-05, + "loss": 1.8337, + "step": 28914 + }, + { + "epoch": 0.6425555555555555, + "grad_norm": 1.4605801105499268, + "learning_rate": 7.150477883974217e-05, + "loss": 1.8094, + "step": 28915 + }, + { + "epoch": 0.6425777777777778, + "grad_norm": 1.552448034286499, + "learning_rate": 7.150033340742388e-05, + "loss": 1.659, + "step": 28916 + }, + { + "epoch": 0.6426, + "grad_norm": 1.7648966312408447, + "learning_rate": 7.149588797510557e-05, + "loss": 2.2957, + "step": 28917 + }, + { + "epoch": 0.6426222222222222, + "grad_norm": 1.7324978113174438, + "learning_rate": 7.149144254278728e-05, + "loss": 2.1141, + "step": 28918 + }, + { + "epoch": 0.6426444444444445, + "grad_norm": 2.1639018058776855, + "learning_rate": 7.148699711046901e-05, + "loss": 2.5954, + "step": 28919 + }, + { + "epoch": 0.6426666666666667, + "grad_norm": 1.7023972272872925, + "learning_rate": 7.14825516781507e-05, + "loss": 1.3876, + "step": 28920 + }, + { + "epoch": 0.6426888888888889, + "grad_norm": 2.214062213897705, + "learning_rate": 7.147810624583241e-05, + "loss": 2.5949, + "step": 28921 + }, + { + "epoch": 0.6427111111111111, + "grad_norm": 1.7753064632415771, + "learning_rate": 7.147366081351412e-05, + "loss": 2.0713, + "step": 28922 + }, + { + "epoch": 0.6427333333333334, + "grad_norm": 1.664215326309204, + "learning_rate": 7.146921538119582e-05, + "loss": 2.0425, + "step": 28923 + }, + { + "epoch": 0.6427555555555555, + "grad_norm": 1.6645792722702026, + "learning_rate": 7.146476994887753e-05, + "loss": 1.7387, + "step": 28924 + }, + { + "epoch": 0.6427777777777778, + "grad_norm": 1.8174889087677002, + "learning_rate": 7.146032451655924e-05, + "loss": 1.9186, + "step": 28925 + }, + { + "epoch": 0.6428, + "grad_norm": 1.781592607498169, + "learning_rate": 7.145587908424095e-05, + "loss": 2.0698, + "step": 28926 + }, + { + "epoch": 0.6428222222222222, + "grad_norm": 1.753316879272461, + "learning_rate": 7.145143365192264e-05, + "loss": 2.1797, + "step": 28927 + }, + { + "epoch": 0.6428444444444444, + "grad_norm": 1.7087000608444214, + "learning_rate": 7.144698821960437e-05, + "loss": 1.826, + "step": 28928 + }, + { + "epoch": 0.6428666666666667, + "grad_norm": 1.5608490705490112, + "learning_rate": 7.144254278728608e-05, + "loss": 1.7394, + "step": 28929 + }, + { + "epoch": 0.6428888888888888, + "grad_norm": 2.120506525039673, + "learning_rate": 7.143809735496777e-05, + "loss": 1.8856, + "step": 28930 + }, + { + "epoch": 0.6429111111111111, + "grad_norm": 2.612682342529297, + "learning_rate": 7.143365192264948e-05, + "loss": 2.0027, + "step": 28931 + }, + { + "epoch": 0.6429333333333334, + "grad_norm": 1.8839622735977173, + "learning_rate": 7.142920649033119e-05, + "loss": 1.8474, + "step": 28932 + }, + { + "epoch": 0.6429555555555555, + "grad_norm": 1.8573490381240845, + "learning_rate": 7.142476105801289e-05, + "loss": 2.2052, + "step": 28933 + }, + { + "epoch": 0.6429777777777778, + "grad_norm": 1.7652058601379395, + "learning_rate": 7.14203156256946e-05, + "loss": 1.7029, + "step": 28934 + }, + { + "epoch": 0.643, + "grad_norm": 1.7231435775756836, + "learning_rate": 7.141587019337631e-05, + "loss": 1.7502, + "step": 28935 + }, + { + "epoch": 0.6430222222222223, + "grad_norm": 1.7900536060333252, + "learning_rate": 7.141142476105802e-05, + "loss": 2.1998, + "step": 28936 + }, + { + "epoch": 0.6430444444444444, + "grad_norm": 1.664600133895874, + "learning_rate": 7.140697932873973e-05, + "loss": 1.7662, + "step": 28937 + }, + { + "epoch": 0.6430666666666667, + "grad_norm": 2.0292153358459473, + "learning_rate": 7.140253389642144e-05, + "loss": 2.5704, + "step": 28938 + }, + { + "epoch": 0.6430888888888889, + "grad_norm": 1.509979248046875, + "learning_rate": 7.139808846410315e-05, + "loss": 1.0218, + "step": 28939 + }, + { + "epoch": 0.6431111111111111, + "grad_norm": 0.9186019897460938, + "learning_rate": 7.139364303178484e-05, + "loss": 0.5662, + "step": 28940 + }, + { + "epoch": 0.6431333333333333, + "grad_norm": 1.5803574323654175, + "learning_rate": 7.138919759946655e-05, + "loss": 1.5752, + "step": 28941 + }, + { + "epoch": 0.6431555555555556, + "grad_norm": 1.9634132385253906, + "learning_rate": 7.138475216714826e-05, + "loss": 1.9959, + "step": 28942 + }, + { + "epoch": 0.6431777777777777, + "grad_norm": 1.919405221939087, + "learning_rate": 7.138030673482996e-05, + "loss": 2.1137, + "step": 28943 + }, + { + "epoch": 0.6432, + "grad_norm": 1.5899920463562012, + "learning_rate": 7.137586130251167e-05, + "loss": 1.9365, + "step": 28944 + }, + { + "epoch": 0.6432222222222223, + "grad_norm": 2.470597267150879, + "learning_rate": 7.137141587019338e-05, + "loss": 1.7956, + "step": 28945 + }, + { + "epoch": 0.6432444444444444, + "grad_norm": 1.7212203741073608, + "learning_rate": 7.136697043787509e-05, + "loss": 1.8821, + "step": 28946 + }, + { + "epoch": 0.6432666666666667, + "grad_norm": 1.7492668628692627, + "learning_rate": 7.13625250055568e-05, + "loss": 1.6948, + "step": 28947 + }, + { + "epoch": 0.6432888888888889, + "grad_norm": 1.3097518682479858, + "learning_rate": 7.13580795732385e-05, + "loss": 0.8045, + "step": 28948 + }, + { + "epoch": 0.6433111111111111, + "grad_norm": 1.538679838180542, + "learning_rate": 7.135363414092021e-05, + "loss": 1.4235, + "step": 28949 + }, + { + "epoch": 0.6433333333333333, + "grad_norm": 1.3475881814956665, + "learning_rate": 7.134918870860191e-05, + "loss": 0.836, + "step": 28950 + }, + { + "epoch": 0.6433555555555556, + "grad_norm": 1.8413110971450806, + "learning_rate": 7.134474327628362e-05, + "loss": 2.3781, + "step": 28951 + }, + { + "epoch": 0.6433777777777778, + "grad_norm": 1.575206995010376, + "learning_rate": 7.134029784396533e-05, + "loss": 2.3801, + "step": 28952 + }, + { + "epoch": 0.6434, + "grad_norm": 1.3493009805679321, + "learning_rate": 7.133585241164703e-05, + "loss": 1.9338, + "step": 28953 + }, + { + "epoch": 0.6434222222222222, + "grad_norm": 2.175967216491699, + "learning_rate": 7.133140697932874e-05, + "loss": 2.695, + "step": 28954 + }, + { + "epoch": 0.6434444444444445, + "grad_norm": 1.4450469017028809, + "learning_rate": 7.132696154701045e-05, + "loss": 1.7778, + "step": 28955 + }, + { + "epoch": 0.6434666666666666, + "grad_norm": 1.4620064496994019, + "learning_rate": 7.132251611469217e-05, + "loss": 1.841, + "step": 28956 + }, + { + "epoch": 0.6434888888888889, + "grad_norm": 1.7964931726455688, + "learning_rate": 7.131807068237386e-05, + "loss": 2.0961, + "step": 28957 + }, + { + "epoch": 0.6435111111111111, + "grad_norm": 1.6290562152862549, + "learning_rate": 7.131362525005557e-05, + "loss": 2.2417, + "step": 28958 + }, + { + "epoch": 0.6435333333333333, + "grad_norm": 1.3939601182937622, + "learning_rate": 7.130917981773728e-05, + "loss": 1.4903, + "step": 28959 + }, + { + "epoch": 0.6435555555555555, + "grad_norm": 1.6482905149459839, + "learning_rate": 7.130473438541898e-05, + "loss": 2.6648, + "step": 28960 + }, + { + "epoch": 0.6435777777777778, + "grad_norm": 2.1006858348846436, + "learning_rate": 7.130028895310069e-05, + "loss": 2.8309, + "step": 28961 + }, + { + "epoch": 0.6436, + "grad_norm": 1.5466973781585693, + "learning_rate": 7.12958435207824e-05, + "loss": 2.219, + "step": 28962 + }, + { + "epoch": 0.6436222222222222, + "grad_norm": 1.6188929080963135, + "learning_rate": 7.129139808846411e-05, + "loss": 1.6512, + "step": 28963 + }, + { + "epoch": 0.6436444444444445, + "grad_norm": 1.6394859552383423, + "learning_rate": 7.12869526561458e-05, + "loss": 1.9599, + "step": 28964 + }, + { + "epoch": 0.6436666666666667, + "grad_norm": 1.5032265186309814, + "learning_rate": 7.128250722382753e-05, + "loss": 2.1569, + "step": 28965 + }, + { + "epoch": 0.6436888888888889, + "grad_norm": 1.6356178522109985, + "learning_rate": 7.127806179150924e-05, + "loss": 2.1785, + "step": 28966 + }, + { + "epoch": 0.6437111111111111, + "grad_norm": 1.3364404439926147, + "learning_rate": 7.127361635919093e-05, + "loss": 1.3715, + "step": 28967 + }, + { + "epoch": 0.6437333333333334, + "grad_norm": 2.5799660682678223, + "learning_rate": 7.126917092687264e-05, + "loss": 1.8913, + "step": 28968 + }, + { + "epoch": 0.6437555555555555, + "grad_norm": 1.5566381216049194, + "learning_rate": 7.126472549455435e-05, + "loss": 1.7737, + "step": 28969 + }, + { + "epoch": 0.6437777777777778, + "grad_norm": 1.677599549293518, + "learning_rate": 7.126028006223605e-05, + "loss": 1.8859, + "step": 28970 + }, + { + "epoch": 0.6438, + "grad_norm": 1.5886155366897583, + "learning_rate": 7.125583462991776e-05, + "loss": 1.9092, + "step": 28971 + }, + { + "epoch": 0.6438222222222222, + "grad_norm": 1.4532115459442139, + "learning_rate": 7.125138919759947e-05, + "loss": 1.3206, + "step": 28972 + }, + { + "epoch": 0.6438444444444444, + "grad_norm": 1.7453795671463013, + "learning_rate": 7.124694376528118e-05, + "loss": 1.73, + "step": 28973 + }, + { + "epoch": 0.6438666666666667, + "grad_norm": 1.896207571029663, + "learning_rate": 7.124249833296289e-05, + "loss": 1.9166, + "step": 28974 + }, + { + "epoch": 0.6438888888888888, + "grad_norm": 1.6417932510375977, + "learning_rate": 7.12380529006446e-05, + "loss": 1.9912, + "step": 28975 + }, + { + "epoch": 0.6439111111111111, + "grad_norm": 2.062929630279541, + "learning_rate": 7.12336074683263e-05, + "loss": 2.1088, + "step": 28976 + }, + { + "epoch": 0.6439333333333334, + "grad_norm": 1.370573878288269, + "learning_rate": 7.1229162036008e-05, + "loss": 1.4623, + "step": 28977 + }, + { + "epoch": 0.6439555555555555, + "grad_norm": 1.727644443511963, + "learning_rate": 7.122471660368971e-05, + "loss": 1.547, + "step": 28978 + }, + { + "epoch": 0.6439777777777778, + "grad_norm": 1.6774473190307617, + "learning_rate": 7.122027117137142e-05, + "loss": 2.0309, + "step": 28979 + }, + { + "epoch": 0.644, + "grad_norm": 1.7936469316482544, + "learning_rate": 7.121582573905312e-05, + "loss": 1.8048, + "step": 28980 + }, + { + "epoch": 0.6440222222222223, + "grad_norm": 1.6021851301193237, + "learning_rate": 7.121138030673483e-05, + "loss": 1.8476, + "step": 28981 + }, + { + "epoch": 0.6440444444444444, + "grad_norm": 1.6705433130264282, + "learning_rate": 7.120693487441654e-05, + "loss": 1.8819, + "step": 28982 + }, + { + "epoch": 0.6440666666666667, + "grad_norm": 2.086313009262085, + "learning_rate": 7.120248944209825e-05, + "loss": 2.1044, + "step": 28983 + }, + { + "epoch": 0.6440888888888889, + "grad_norm": 2.1750640869140625, + "learning_rate": 7.119804400977996e-05, + "loss": 1.7156, + "step": 28984 + }, + { + "epoch": 0.6441111111111111, + "grad_norm": 1.8231173753738403, + "learning_rate": 7.119359857746167e-05, + "loss": 1.8609, + "step": 28985 + }, + { + "epoch": 0.6441333333333333, + "grad_norm": 2.0555918216705322, + "learning_rate": 7.118915314514338e-05, + "loss": 2.4212, + "step": 28986 + }, + { + "epoch": 0.6441555555555556, + "grad_norm": 2.163342237472534, + "learning_rate": 7.118470771282507e-05, + "loss": 1.8877, + "step": 28987 + }, + { + "epoch": 0.6441777777777777, + "grad_norm": 1.6176615953445435, + "learning_rate": 7.118026228050678e-05, + "loss": 1.5502, + "step": 28988 + }, + { + "epoch": 0.6442, + "grad_norm": 1.878800630569458, + "learning_rate": 7.117581684818849e-05, + "loss": 1.7773, + "step": 28989 + }, + { + "epoch": 0.6442222222222223, + "grad_norm": 1.7244913578033447, + "learning_rate": 7.117137141587019e-05, + "loss": 1.9062, + "step": 28990 + }, + { + "epoch": 0.6442444444444444, + "grad_norm": 1.8530195951461792, + "learning_rate": 7.11669259835519e-05, + "loss": 1.9271, + "step": 28991 + }, + { + "epoch": 0.6442666666666667, + "grad_norm": 1.9446215629577637, + "learning_rate": 7.11624805512336e-05, + "loss": 2.044, + "step": 28992 + }, + { + "epoch": 0.6442888888888889, + "grad_norm": 1.711171269416809, + "learning_rate": 7.115803511891533e-05, + "loss": 1.6565, + "step": 28993 + }, + { + "epoch": 0.6443111111111111, + "grad_norm": 1.7592583894729614, + "learning_rate": 7.115358968659703e-05, + "loss": 1.8452, + "step": 28994 + }, + { + "epoch": 0.6443333333333333, + "grad_norm": 1.7510658502578735, + "learning_rate": 7.114914425427873e-05, + "loss": 1.9349, + "step": 28995 + }, + { + "epoch": 0.6443555555555556, + "grad_norm": 2.019502639770508, + "learning_rate": 7.114469882196044e-05, + "loss": 2.0444, + "step": 28996 + }, + { + "epoch": 0.6443777777777778, + "grad_norm": 1.7224868535995483, + "learning_rate": 7.114025338964214e-05, + "loss": 2.0977, + "step": 28997 + }, + { + "epoch": 0.6444, + "grad_norm": 0.34710970520973206, + "learning_rate": 7.113580795732385e-05, + "loss": 0.0366, + "step": 28998 + }, + { + "epoch": 0.6444222222222222, + "grad_norm": 2.390554428100586, + "learning_rate": 7.113136252500556e-05, + "loss": 2.1436, + "step": 28999 + }, + { + "epoch": 0.6444444444444445, + "grad_norm": 1.99862539768219, + "learning_rate": 7.112691709268726e-05, + "loss": 1.5966, + "step": 29000 + }, + { + "epoch": 0.6444666666666666, + "grad_norm": 1.686181902885437, + "learning_rate": 7.112247166036897e-05, + "loss": 2.3817, + "step": 29001 + }, + { + "epoch": 0.6444888888888889, + "grad_norm": 1.5259734392166138, + "learning_rate": 7.111802622805069e-05, + "loss": 1.845, + "step": 29002 + }, + { + "epoch": 0.6445111111111111, + "grad_norm": 1.4012967348098755, + "learning_rate": 7.11135807957324e-05, + "loss": 2.2198, + "step": 29003 + }, + { + "epoch": 0.6445333333333333, + "grad_norm": 1.4382514953613281, + "learning_rate": 7.11091353634141e-05, + "loss": 1.8372, + "step": 29004 + }, + { + "epoch": 0.6445555555555555, + "grad_norm": 1.8115453720092773, + "learning_rate": 7.11046899310958e-05, + "loss": 1.7997, + "step": 29005 + }, + { + "epoch": 0.6445777777777778, + "grad_norm": 1.711234211921692, + "learning_rate": 7.110024449877751e-05, + "loss": 2.125, + "step": 29006 + }, + { + "epoch": 0.6446, + "grad_norm": 1.3862836360931396, + "learning_rate": 7.109579906645921e-05, + "loss": 2.1216, + "step": 29007 + }, + { + "epoch": 0.6446222222222222, + "grad_norm": 1.8051152229309082, + "learning_rate": 7.109135363414092e-05, + "loss": 1.5909, + "step": 29008 + }, + { + "epoch": 0.6446444444444445, + "grad_norm": 1.8862489461898804, + "learning_rate": 7.108690820182263e-05, + "loss": 2.0823, + "step": 29009 + }, + { + "epoch": 0.6446666666666667, + "grad_norm": 1.46207594871521, + "learning_rate": 7.108246276950434e-05, + "loss": 2.1865, + "step": 29010 + }, + { + "epoch": 0.6446888888888889, + "grad_norm": 0.224213108420372, + "learning_rate": 7.107801733718605e-05, + "loss": 0.025, + "step": 29011 + }, + { + "epoch": 0.6447111111111111, + "grad_norm": 1.5012792348861694, + "learning_rate": 7.107357190486776e-05, + "loss": 2.1706, + "step": 29012 + }, + { + "epoch": 0.6447333333333334, + "grad_norm": 1.9338184595108032, + "learning_rate": 7.106912647254947e-05, + "loss": 2.2592, + "step": 29013 + }, + { + "epoch": 0.6447555555555555, + "grad_norm": 1.810837745666504, + "learning_rate": 7.106468104023116e-05, + "loss": 1.7871, + "step": 29014 + }, + { + "epoch": 0.6447777777777778, + "grad_norm": 1.4336885213851929, + "learning_rate": 7.106023560791287e-05, + "loss": 1.8581, + "step": 29015 + }, + { + "epoch": 0.6448, + "grad_norm": 1.5249687433242798, + "learning_rate": 7.105579017559458e-05, + "loss": 1.3569, + "step": 29016 + }, + { + "epoch": 0.6448222222222222, + "grad_norm": 2.3387036323547363, + "learning_rate": 7.105134474327628e-05, + "loss": 2.1901, + "step": 29017 + }, + { + "epoch": 0.6448444444444444, + "grad_norm": 1.683835744857788, + "learning_rate": 7.104689931095799e-05, + "loss": 2.242, + "step": 29018 + }, + { + "epoch": 0.6448666666666667, + "grad_norm": 1.5120434761047363, + "learning_rate": 7.10424538786397e-05, + "loss": 1.9611, + "step": 29019 + }, + { + "epoch": 0.6448888888888888, + "grad_norm": 1.8133294582366943, + "learning_rate": 7.103800844632141e-05, + "loss": 2.2278, + "step": 29020 + }, + { + "epoch": 0.6449111111111111, + "grad_norm": 1.528519630432129, + "learning_rate": 7.103356301400312e-05, + "loss": 1.7234, + "step": 29021 + }, + { + "epoch": 0.6449333333333334, + "grad_norm": 1.6087287664413452, + "learning_rate": 7.102911758168483e-05, + "loss": 1.8672, + "step": 29022 + }, + { + "epoch": 0.6449555555555555, + "grad_norm": 1.694104790687561, + "learning_rate": 7.102467214936654e-05, + "loss": 2.2416, + "step": 29023 + }, + { + "epoch": 0.6449777777777778, + "grad_norm": 2.1608152389526367, + "learning_rate": 7.102022671704823e-05, + "loss": 2.186, + "step": 29024 + }, + { + "epoch": 0.645, + "grad_norm": 1.8009564876556396, + "learning_rate": 7.101578128472994e-05, + "loss": 2.168, + "step": 29025 + }, + { + "epoch": 0.6450222222222223, + "grad_norm": 1.6181777715682983, + "learning_rate": 7.101133585241165e-05, + "loss": 2.2455, + "step": 29026 + }, + { + "epoch": 0.6450444444444444, + "grad_norm": 1.9097341299057007, + "learning_rate": 7.100689042009335e-05, + "loss": 2.0104, + "step": 29027 + }, + { + "epoch": 0.6450666666666667, + "grad_norm": 1.472928524017334, + "learning_rate": 7.100244498777506e-05, + "loss": 1.7127, + "step": 29028 + }, + { + "epoch": 0.6450888888888889, + "grad_norm": 1.8379104137420654, + "learning_rate": 7.099799955545677e-05, + "loss": 2.4129, + "step": 29029 + }, + { + "epoch": 0.6451111111111111, + "grad_norm": 1.4409955739974976, + "learning_rate": 7.099355412313849e-05, + "loss": 1.5263, + "step": 29030 + }, + { + "epoch": 0.6451333333333333, + "grad_norm": 1.8723595142364502, + "learning_rate": 7.098910869082019e-05, + "loss": 1.9664, + "step": 29031 + }, + { + "epoch": 0.6451555555555556, + "grad_norm": 0.896340012550354, + "learning_rate": 7.09846632585019e-05, + "loss": 0.6383, + "step": 29032 + }, + { + "epoch": 0.6451777777777777, + "grad_norm": 1.6818281412124634, + "learning_rate": 7.09802178261836e-05, + "loss": 2.082, + "step": 29033 + }, + { + "epoch": 0.6452, + "grad_norm": 1.591972827911377, + "learning_rate": 7.09757723938653e-05, + "loss": 1.7117, + "step": 29034 + }, + { + "epoch": 0.6452222222222223, + "grad_norm": 1.9729094505310059, + "learning_rate": 7.097132696154701e-05, + "loss": 2.056, + "step": 29035 + }, + { + "epoch": 0.6452444444444444, + "grad_norm": 1.9406650066375732, + "learning_rate": 7.096688152922872e-05, + "loss": 2.3653, + "step": 29036 + }, + { + "epoch": 0.6452666666666667, + "grad_norm": 1.635969877243042, + "learning_rate": 7.096243609691042e-05, + "loss": 1.4225, + "step": 29037 + }, + { + "epoch": 0.6452888888888889, + "grad_norm": 1.8061898946762085, + "learning_rate": 7.095799066459213e-05, + "loss": 2.1575, + "step": 29038 + }, + { + "epoch": 0.6453111111111111, + "grad_norm": 0.9610072374343872, + "learning_rate": 7.095354523227385e-05, + "loss": 0.7144, + "step": 29039 + }, + { + "epoch": 0.6453333333333333, + "grad_norm": 1.6564658880233765, + "learning_rate": 7.094909979995556e-05, + "loss": 1.6961, + "step": 29040 + }, + { + "epoch": 0.6453555555555556, + "grad_norm": 2.188620090484619, + "learning_rate": 7.094465436763726e-05, + "loss": 1.992, + "step": 29041 + }, + { + "epoch": 0.6453777777777778, + "grad_norm": 2.1346263885498047, + "learning_rate": 7.094020893531897e-05, + "loss": 1.7606, + "step": 29042 + }, + { + "epoch": 0.6454, + "grad_norm": 1.584122896194458, + "learning_rate": 7.093576350300067e-05, + "loss": 1.9503, + "step": 29043 + }, + { + "epoch": 0.6454222222222222, + "grad_norm": 2.2534492015838623, + "learning_rate": 7.093131807068237e-05, + "loss": 2.2768, + "step": 29044 + }, + { + "epoch": 0.6454444444444445, + "grad_norm": 1.9511295557022095, + "learning_rate": 7.092687263836408e-05, + "loss": 2.1601, + "step": 29045 + }, + { + "epoch": 0.6454666666666666, + "grad_norm": 1.7901535034179688, + "learning_rate": 7.092242720604579e-05, + "loss": 1.8901, + "step": 29046 + }, + { + "epoch": 0.6454888888888889, + "grad_norm": 1.7285199165344238, + "learning_rate": 7.09179817737275e-05, + "loss": 1.5072, + "step": 29047 + }, + { + "epoch": 0.6455111111111111, + "grad_norm": 1.8059173822402954, + "learning_rate": 7.091353634140921e-05, + "loss": 1.8646, + "step": 29048 + }, + { + "epoch": 0.6455333333333333, + "grad_norm": 1.863149881362915, + "learning_rate": 7.090909090909092e-05, + "loss": 1.971, + "step": 29049 + }, + { + "epoch": 0.6455555555555555, + "grad_norm": 1.723754644393921, + "learning_rate": 7.090464547677263e-05, + "loss": 1.5036, + "step": 29050 + }, + { + "epoch": 0.6455777777777778, + "grad_norm": 1.1470249891281128, + "learning_rate": 7.090020004445432e-05, + "loss": 1.0734, + "step": 29051 + }, + { + "epoch": 0.6456, + "grad_norm": 1.4927550554275513, + "learning_rate": 7.089575461213603e-05, + "loss": 2.4021, + "step": 29052 + }, + { + "epoch": 0.6456222222222222, + "grad_norm": 1.577982783317566, + "learning_rate": 7.089130917981774e-05, + "loss": 2.2874, + "step": 29053 + }, + { + "epoch": 0.6456444444444445, + "grad_norm": 1.6243376731872559, + "learning_rate": 7.088686374749944e-05, + "loss": 1.0384, + "step": 29054 + }, + { + "epoch": 0.6456666666666667, + "grad_norm": 1.6546640396118164, + "learning_rate": 7.088241831518115e-05, + "loss": 1.1537, + "step": 29055 + }, + { + "epoch": 0.6456888888888889, + "grad_norm": 1.5087653398513794, + "learning_rate": 7.087797288286286e-05, + "loss": 2.3756, + "step": 29056 + }, + { + "epoch": 0.6457111111111111, + "grad_norm": 1.4392410516738892, + "learning_rate": 7.087352745054457e-05, + "loss": 1.8451, + "step": 29057 + }, + { + "epoch": 0.6457333333333334, + "grad_norm": 1.604750156402588, + "learning_rate": 7.086908201822628e-05, + "loss": 2.3932, + "step": 29058 + }, + { + "epoch": 0.6457555555555555, + "grad_norm": 1.3697820901870728, + "learning_rate": 7.086463658590799e-05, + "loss": 1.99, + "step": 29059 + }, + { + "epoch": 0.6457777777777778, + "grad_norm": 1.5392061471939087, + "learning_rate": 7.08601911535897e-05, + "loss": 1.8891, + "step": 29060 + }, + { + "epoch": 0.6458, + "grad_norm": 1.8606305122375488, + "learning_rate": 7.08557457212714e-05, + "loss": 2.2007, + "step": 29061 + }, + { + "epoch": 0.6458222222222222, + "grad_norm": 1.821648359298706, + "learning_rate": 7.08513002889531e-05, + "loss": 2.2572, + "step": 29062 + }, + { + "epoch": 0.6458444444444444, + "grad_norm": 1.64143967628479, + "learning_rate": 7.084685485663481e-05, + "loss": 1.8566, + "step": 29063 + }, + { + "epoch": 0.6458666666666667, + "grad_norm": 1.5850093364715576, + "learning_rate": 7.084240942431651e-05, + "loss": 1.8496, + "step": 29064 + }, + { + "epoch": 0.6458888888888888, + "grad_norm": 1.6152373552322388, + "learning_rate": 7.083796399199822e-05, + "loss": 1.1328, + "step": 29065 + }, + { + "epoch": 0.6459111111111111, + "grad_norm": 1.1689858436584473, + "learning_rate": 7.083351855967993e-05, + "loss": 1.2043, + "step": 29066 + }, + { + "epoch": 0.6459333333333334, + "grad_norm": 1.7024846076965332, + "learning_rate": 7.082907312736165e-05, + "loss": 1.9357, + "step": 29067 + }, + { + "epoch": 0.6459555555555555, + "grad_norm": 1.3479031324386597, + "learning_rate": 7.082462769504335e-05, + "loss": 1.3946, + "step": 29068 + }, + { + "epoch": 0.6459777777777778, + "grad_norm": 1.5222748517990112, + "learning_rate": 7.082018226272506e-05, + "loss": 1.8374, + "step": 29069 + }, + { + "epoch": 0.646, + "grad_norm": 1.5448720455169678, + "learning_rate": 7.081573683040677e-05, + "loss": 1.7917, + "step": 29070 + }, + { + "epoch": 0.6460222222222223, + "grad_norm": 1.8289965391159058, + "learning_rate": 7.081129139808846e-05, + "loss": 1.3985, + "step": 29071 + }, + { + "epoch": 0.6460444444444444, + "grad_norm": 1.918680191040039, + "learning_rate": 7.080684596577017e-05, + "loss": 1.5956, + "step": 29072 + }, + { + "epoch": 0.6460666666666667, + "grad_norm": 1.5386064052581787, + "learning_rate": 7.080240053345188e-05, + "loss": 1.7289, + "step": 29073 + }, + { + "epoch": 0.6460888888888889, + "grad_norm": 1.6020234823226929, + "learning_rate": 7.079795510113358e-05, + "loss": 1.7265, + "step": 29074 + }, + { + "epoch": 0.6461111111111111, + "grad_norm": 1.6040480136871338, + "learning_rate": 7.079350966881529e-05, + "loss": 1.6565, + "step": 29075 + }, + { + "epoch": 0.6461333333333333, + "grad_norm": 1.5008918046951294, + "learning_rate": 7.078906423649701e-05, + "loss": 1.9662, + "step": 29076 + }, + { + "epoch": 0.6461555555555556, + "grad_norm": 1.6811549663543701, + "learning_rate": 7.078461880417872e-05, + "loss": 1.83, + "step": 29077 + }, + { + "epoch": 0.6461777777777777, + "grad_norm": 1.627158284187317, + "learning_rate": 7.078017337186042e-05, + "loss": 2.421, + "step": 29078 + }, + { + "epoch": 0.6462, + "grad_norm": 1.6065196990966797, + "learning_rate": 7.077572793954213e-05, + "loss": 1.8298, + "step": 29079 + }, + { + "epoch": 0.6462222222222223, + "grad_norm": 1.4701594114303589, + "learning_rate": 7.077128250722384e-05, + "loss": 1.3981, + "step": 29080 + }, + { + "epoch": 0.6462444444444444, + "grad_norm": 1.77341890335083, + "learning_rate": 7.076683707490553e-05, + "loss": 2.2195, + "step": 29081 + }, + { + "epoch": 0.6462666666666667, + "grad_norm": 1.2583370208740234, + "learning_rate": 7.076239164258724e-05, + "loss": 0.795, + "step": 29082 + }, + { + "epoch": 0.6462888888888889, + "grad_norm": 1.5209894180297852, + "learning_rate": 7.075794621026895e-05, + "loss": 1.8441, + "step": 29083 + }, + { + "epoch": 0.6463111111111111, + "grad_norm": 1.5885052680969238, + "learning_rate": 7.075350077795066e-05, + "loss": 2.0454, + "step": 29084 + }, + { + "epoch": 0.6463333333333333, + "grad_norm": 1.8185741901397705, + "learning_rate": 7.074905534563237e-05, + "loss": 1.8766, + "step": 29085 + }, + { + "epoch": 0.6463555555555556, + "grad_norm": 1.6103633642196655, + "learning_rate": 7.074460991331408e-05, + "loss": 1.5191, + "step": 29086 + }, + { + "epoch": 0.6463777777777778, + "grad_norm": 1.9912450313568115, + "learning_rate": 7.074016448099579e-05, + "loss": 2.1519, + "step": 29087 + }, + { + "epoch": 0.6464, + "grad_norm": 2.1047565937042236, + "learning_rate": 7.073571904867749e-05, + "loss": 2.1204, + "step": 29088 + }, + { + "epoch": 0.6464222222222222, + "grad_norm": 1.7408231496810913, + "learning_rate": 7.07312736163592e-05, + "loss": 1.687, + "step": 29089 + }, + { + "epoch": 0.6464444444444445, + "grad_norm": 2.2416300773620605, + "learning_rate": 7.07268281840409e-05, + "loss": 2.5354, + "step": 29090 + }, + { + "epoch": 0.6464666666666666, + "grad_norm": 2.1599998474121094, + "learning_rate": 7.07223827517226e-05, + "loss": 2.36, + "step": 29091 + }, + { + "epoch": 0.6464888888888889, + "grad_norm": 1.4476076364517212, + "learning_rate": 7.071793731940431e-05, + "loss": 1.1872, + "step": 29092 + }, + { + "epoch": 0.6465111111111111, + "grad_norm": 1.974564552307129, + "learning_rate": 7.071349188708602e-05, + "loss": 2.0381, + "step": 29093 + }, + { + "epoch": 0.6465333333333333, + "grad_norm": 1.7401434183120728, + "learning_rate": 7.070904645476773e-05, + "loss": 1.646, + "step": 29094 + }, + { + "epoch": 0.6465555555555556, + "grad_norm": 1.8770774602890015, + "learning_rate": 7.070460102244944e-05, + "loss": 1.6141, + "step": 29095 + }, + { + "epoch": 0.6465777777777778, + "grad_norm": 1.7361961603164673, + "learning_rate": 7.070015559013115e-05, + "loss": 1.3437, + "step": 29096 + }, + { + "epoch": 0.6466, + "grad_norm": 1.6656427383422852, + "learning_rate": 7.069571015781286e-05, + "loss": 1.5471, + "step": 29097 + }, + { + "epoch": 0.6466222222222222, + "grad_norm": 1.9020168781280518, + "learning_rate": 7.069126472549455e-05, + "loss": 1.9409, + "step": 29098 + }, + { + "epoch": 0.6466444444444445, + "grad_norm": 2.2219109535217285, + "learning_rate": 7.068681929317626e-05, + "loss": 2.1355, + "step": 29099 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.644800901412964, + "learning_rate": 7.068237386085797e-05, + "loss": 1.2337, + "step": 29100 + }, + { + "epoch": 0.6466888888888889, + "grad_norm": 1.3294848203659058, + "learning_rate": 7.067792842853967e-05, + "loss": 1.0227, + "step": 29101 + }, + { + "epoch": 0.6467111111111111, + "grad_norm": 1.480413556098938, + "learning_rate": 7.067348299622138e-05, + "loss": 2.1486, + "step": 29102 + }, + { + "epoch": 0.6467333333333334, + "grad_norm": 1.4161555767059326, + "learning_rate": 7.066903756390309e-05, + "loss": 1.8185, + "step": 29103 + }, + { + "epoch": 0.6467555555555555, + "grad_norm": 0.9094601273536682, + "learning_rate": 7.06645921315848e-05, + "loss": 1.1822, + "step": 29104 + }, + { + "epoch": 0.6467777777777778, + "grad_norm": 1.4612770080566406, + "learning_rate": 7.066014669926651e-05, + "loss": 2.2132, + "step": 29105 + }, + { + "epoch": 0.6468, + "grad_norm": 1.5787962675094604, + "learning_rate": 7.065570126694822e-05, + "loss": 2.2369, + "step": 29106 + }, + { + "epoch": 0.6468222222222222, + "grad_norm": 1.8740125894546509, + "learning_rate": 7.065125583462993e-05, + "loss": 2.5167, + "step": 29107 + }, + { + "epoch": 0.6468444444444444, + "grad_norm": 1.66010582447052, + "learning_rate": 7.064681040231162e-05, + "loss": 2.4951, + "step": 29108 + }, + { + "epoch": 0.6468666666666667, + "grad_norm": 1.5209728479385376, + "learning_rate": 7.064236496999333e-05, + "loss": 2.2145, + "step": 29109 + }, + { + "epoch": 0.6468888888888888, + "grad_norm": 1.2550688982009888, + "learning_rate": 7.063791953767504e-05, + "loss": 1.5124, + "step": 29110 + }, + { + "epoch": 0.6469111111111111, + "grad_norm": 1.596299648284912, + "learning_rate": 7.063347410535674e-05, + "loss": 2.3205, + "step": 29111 + }, + { + "epoch": 0.6469333333333334, + "grad_norm": 1.7290644645690918, + "learning_rate": 7.062902867303845e-05, + "loss": 1.424, + "step": 29112 + }, + { + "epoch": 0.6469555555555555, + "grad_norm": 1.6497602462768555, + "learning_rate": 7.062458324072017e-05, + "loss": 1.7248, + "step": 29113 + }, + { + "epoch": 0.6469777777777778, + "grad_norm": 1.732588768005371, + "learning_rate": 7.062013780840187e-05, + "loss": 2.3204, + "step": 29114 + }, + { + "epoch": 0.647, + "grad_norm": 1.65004563331604, + "learning_rate": 7.061569237608358e-05, + "loss": 2.0739, + "step": 29115 + }, + { + "epoch": 0.6470222222222223, + "grad_norm": 1.4575328826904297, + "learning_rate": 7.061124694376529e-05, + "loss": 1.1476, + "step": 29116 + }, + { + "epoch": 0.6470444444444444, + "grad_norm": 1.8705321550369263, + "learning_rate": 7.0606801511447e-05, + "loss": 2.2397, + "step": 29117 + }, + { + "epoch": 0.6470666666666667, + "grad_norm": 1.4159882068634033, + "learning_rate": 7.060235607912869e-05, + "loss": 2.1124, + "step": 29118 + }, + { + "epoch": 0.6470888888888889, + "grad_norm": 1.5113205909729004, + "learning_rate": 7.05979106468104e-05, + "loss": 1.5342, + "step": 29119 + }, + { + "epoch": 0.6471111111111111, + "grad_norm": 1.9303370714187622, + "learning_rate": 7.059346521449211e-05, + "loss": 1.8784, + "step": 29120 + }, + { + "epoch": 0.6471333333333333, + "grad_norm": 1.6839933395385742, + "learning_rate": 7.058901978217382e-05, + "loss": 1.967, + "step": 29121 + }, + { + "epoch": 0.6471555555555556, + "grad_norm": 1.7867637872695923, + "learning_rate": 7.058457434985553e-05, + "loss": 2.0391, + "step": 29122 + }, + { + "epoch": 0.6471777777777777, + "grad_norm": 1.8341662883758545, + "learning_rate": 7.058012891753724e-05, + "loss": 1.9629, + "step": 29123 + }, + { + "epoch": 0.6472, + "grad_norm": 1.563461184501648, + "learning_rate": 7.057568348521895e-05, + "loss": 1.4533, + "step": 29124 + }, + { + "epoch": 0.6472222222222223, + "grad_norm": 1.56820547580719, + "learning_rate": 7.057123805290065e-05, + "loss": 2.1632, + "step": 29125 + }, + { + "epoch": 0.6472444444444444, + "grad_norm": 2.0252976417541504, + "learning_rate": 7.056679262058236e-05, + "loss": 2.6123, + "step": 29126 + }, + { + "epoch": 0.6472666666666667, + "grad_norm": 1.8063596487045288, + "learning_rate": 7.056234718826407e-05, + "loss": 1.9402, + "step": 29127 + }, + { + "epoch": 0.6472888888888889, + "grad_norm": 1.1451598405838013, + "learning_rate": 7.055790175594576e-05, + "loss": 1.0699, + "step": 29128 + }, + { + "epoch": 0.6473111111111111, + "grad_norm": 1.7466984987258911, + "learning_rate": 7.055345632362747e-05, + "loss": 1.846, + "step": 29129 + }, + { + "epoch": 0.6473333333333333, + "grad_norm": 1.801710605621338, + "learning_rate": 7.054901089130918e-05, + "loss": 1.5419, + "step": 29130 + }, + { + "epoch": 0.6473555555555556, + "grad_norm": 1.829551100730896, + "learning_rate": 7.054456545899089e-05, + "loss": 2.5472, + "step": 29131 + }, + { + "epoch": 0.6473777777777778, + "grad_norm": 2.013662099838257, + "learning_rate": 7.05401200266726e-05, + "loss": 2.0472, + "step": 29132 + }, + { + "epoch": 0.6474, + "grad_norm": 1.3499835729599, + "learning_rate": 7.053567459435431e-05, + "loss": 0.7647, + "step": 29133 + }, + { + "epoch": 0.6474222222222222, + "grad_norm": 2.016878843307495, + "learning_rate": 7.053122916203602e-05, + "loss": 2.2325, + "step": 29134 + }, + { + "epoch": 0.6474444444444445, + "grad_norm": 1.7505043745040894, + "learning_rate": 7.052678372971772e-05, + "loss": 2.1693, + "step": 29135 + }, + { + "epoch": 0.6474666666666666, + "grad_norm": 1.9273511171340942, + "learning_rate": 7.052233829739943e-05, + "loss": 1.8722, + "step": 29136 + }, + { + "epoch": 0.6474888888888889, + "grad_norm": 1.839815616607666, + "learning_rate": 7.051789286508113e-05, + "loss": 2.2612, + "step": 29137 + }, + { + "epoch": 0.6475111111111111, + "grad_norm": 1.7973805665969849, + "learning_rate": 7.051344743276283e-05, + "loss": 1.6756, + "step": 29138 + }, + { + "epoch": 0.6475333333333333, + "grad_norm": 1.2534786462783813, + "learning_rate": 7.050900200044454e-05, + "loss": 0.8786, + "step": 29139 + }, + { + "epoch": 0.6475555555555556, + "grad_norm": 1.5299891233444214, + "learning_rate": 7.050455656812625e-05, + "loss": 1.3872, + "step": 29140 + }, + { + "epoch": 0.6475777777777778, + "grad_norm": 1.5156304836273193, + "learning_rate": 7.050011113580796e-05, + "loss": 1.6405, + "step": 29141 + }, + { + "epoch": 0.6476, + "grad_norm": 1.7062276601791382, + "learning_rate": 7.049566570348967e-05, + "loss": 2.0486, + "step": 29142 + }, + { + "epoch": 0.6476222222222222, + "grad_norm": 1.7123117446899414, + "learning_rate": 7.049122027117138e-05, + "loss": 1.9122, + "step": 29143 + }, + { + "epoch": 0.6476444444444445, + "grad_norm": 1.8518922328948975, + "learning_rate": 7.048677483885309e-05, + "loss": 2.0751, + "step": 29144 + }, + { + "epoch": 0.6476666666666666, + "grad_norm": 1.8861069679260254, + "learning_rate": 7.048232940653478e-05, + "loss": 1.831, + "step": 29145 + }, + { + "epoch": 0.6476888888888889, + "grad_norm": 1.7695550918579102, + "learning_rate": 7.04778839742165e-05, + "loss": 1.9029, + "step": 29146 + }, + { + "epoch": 0.6477111111111111, + "grad_norm": 2.139415979385376, + "learning_rate": 7.04734385418982e-05, + "loss": 2.0564, + "step": 29147 + }, + { + "epoch": 0.6477333333333334, + "grad_norm": 1.5689301490783691, + "learning_rate": 7.04689931095799e-05, + "loss": 1.7203, + "step": 29148 + }, + { + "epoch": 0.6477555555555555, + "grad_norm": 1.9944018125534058, + "learning_rate": 7.046454767726161e-05, + "loss": 1.9389, + "step": 29149 + }, + { + "epoch": 0.6477777777777778, + "grad_norm": 2.172898054122925, + "learning_rate": 7.046010224494333e-05, + "loss": 1.5803, + "step": 29150 + }, + { + "epoch": 0.6478, + "grad_norm": 1.5711785554885864, + "learning_rate": 7.045565681262503e-05, + "loss": 2.575, + "step": 29151 + }, + { + "epoch": 0.6478222222222222, + "grad_norm": 1.8806873559951782, + "learning_rate": 7.045121138030674e-05, + "loss": 2.7309, + "step": 29152 + }, + { + "epoch": 0.6478444444444444, + "grad_norm": 1.4180324077606201, + "learning_rate": 7.044676594798845e-05, + "loss": 2.1319, + "step": 29153 + }, + { + "epoch": 0.6478666666666667, + "grad_norm": 1.5303846597671509, + "learning_rate": 7.044232051567016e-05, + "loss": 2.1468, + "step": 29154 + }, + { + "epoch": 0.6478888888888888, + "grad_norm": 1.6324728727340698, + "learning_rate": 7.043787508335185e-05, + "loss": 1.595, + "step": 29155 + }, + { + "epoch": 0.6479111111111111, + "grad_norm": 1.6591416597366333, + "learning_rate": 7.043342965103356e-05, + "loss": 2.4564, + "step": 29156 + }, + { + "epoch": 0.6479333333333334, + "grad_norm": 1.2871074676513672, + "learning_rate": 7.042898421871527e-05, + "loss": 1.7271, + "step": 29157 + }, + { + "epoch": 0.6479555555555555, + "grad_norm": 1.3471856117248535, + "learning_rate": 7.042453878639698e-05, + "loss": 2.1456, + "step": 29158 + }, + { + "epoch": 0.6479777777777778, + "grad_norm": 1.4971389770507812, + "learning_rate": 7.042009335407869e-05, + "loss": 1.8597, + "step": 29159 + }, + { + "epoch": 0.648, + "grad_norm": 1.63566255569458, + "learning_rate": 7.04156479217604e-05, + "loss": 2.0335, + "step": 29160 + }, + { + "epoch": 0.6480222222222223, + "grad_norm": 1.4445345401763916, + "learning_rate": 7.04112024894421e-05, + "loss": 1.5174, + "step": 29161 + }, + { + "epoch": 0.6480444444444444, + "grad_norm": 1.633418083190918, + "learning_rate": 7.040675705712381e-05, + "loss": 1.92, + "step": 29162 + }, + { + "epoch": 0.6480666666666667, + "grad_norm": 0.33277857303619385, + "learning_rate": 7.040231162480552e-05, + "loss": 0.0243, + "step": 29163 + }, + { + "epoch": 0.6480888888888889, + "grad_norm": 1.222267746925354, + "learning_rate": 7.039786619248723e-05, + "loss": 1.1443, + "step": 29164 + }, + { + "epoch": 0.6481111111111111, + "grad_norm": 1.243935227394104, + "learning_rate": 7.039342076016892e-05, + "loss": 0.7909, + "step": 29165 + }, + { + "epoch": 0.6481333333333333, + "grad_norm": 1.5980591773986816, + "learning_rate": 7.038897532785063e-05, + "loss": 2.0629, + "step": 29166 + }, + { + "epoch": 0.6481555555555556, + "grad_norm": 1.7505806684494019, + "learning_rate": 7.038452989553234e-05, + "loss": 2.0045, + "step": 29167 + }, + { + "epoch": 0.6481777777777777, + "grad_norm": 1.5095384120941162, + "learning_rate": 7.038008446321405e-05, + "loss": 1.6692, + "step": 29168 + }, + { + "epoch": 0.6482, + "grad_norm": 1.7710570096969604, + "learning_rate": 7.037563903089576e-05, + "loss": 2.0661, + "step": 29169 + }, + { + "epoch": 0.6482222222222223, + "grad_norm": 1.5297256708145142, + "learning_rate": 7.037119359857747e-05, + "loss": 1.8635, + "step": 29170 + }, + { + "epoch": 0.6482444444444444, + "grad_norm": 1.8935678005218506, + "learning_rate": 7.036674816625917e-05, + "loss": 2.1259, + "step": 29171 + }, + { + "epoch": 0.6482666666666667, + "grad_norm": 1.6577333211898804, + "learning_rate": 7.036230273394088e-05, + "loss": 1.5838, + "step": 29172 + }, + { + "epoch": 0.6482888888888889, + "grad_norm": 1.8225229978561401, + "learning_rate": 7.035785730162259e-05, + "loss": 2.3188, + "step": 29173 + }, + { + "epoch": 0.6483111111111111, + "grad_norm": 1.8094227313995361, + "learning_rate": 7.03534118693043e-05, + "loss": 2.2508, + "step": 29174 + }, + { + "epoch": 0.6483333333333333, + "grad_norm": 1.651315689086914, + "learning_rate": 7.034896643698599e-05, + "loss": 1.8218, + "step": 29175 + }, + { + "epoch": 0.6483555555555556, + "grad_norm": 1.819054126739502, + "learning_rate": 7.03445210046677e-05, + "loss": 1.7227, + "step": 29176 + }, + { + "epoch": 0.6483777777777778, + "grad_norm": 1.9914813041687012, + "learning_rate": 7.034007557234941e-05, + "loss": 2.3753, + "step": 29177 + }, + { + "epoch": 0.6484, + "grad_norm": 2.263993501663208, + "learning_rate": 7.033563014003112e-05, + "loss": 1.438, + "step": 29178 + }, + { + "epoch": 0.6484222222222222, + "grad_norm": 1.7326619625091553, + "learning_rate": 7.033118470771283e-05, + "loss": 2.0003, + "step": 29179 + }, + { + "epoch": 0.6484444444444445, + "grad_norm": 2.027819871902466, + "learning_rate": 7.032673927539454e-05, + "loss": 2.2009, + "step": 29180 + }, + { + "epoch": 0.6484666666666666, + "grad_norm": 1.8078641891479492, + "learning_rate": 7.032229384307625e-05, + "loss": 1.7488, + "step": 29181 + }, + { + "epoch": 0.6484888888888889, + "grad_norm": 1.6092551946640015, + "learning_rate": 7.031784841075795e-05, + "loss": 1.5736, + "step": 29182 + }, + { + "epoch": 0.6485111111111111, + "grad_norm": 1.5341358184814453, + "learning_rate": 7.031340297843966e-05, + "loss": 1.9011, + "step": 29183 + }, + { + "epoch": 0.6485333333333333, + "grad_norm": 1.4618282318115234, + "learning_rate": 7.030895754612137e-05, + "loss": 1.5799, + "step": 29184 + }, + { + "epoch": 0.6485555555555556, + "grad_norm": 1.7872344255447388, + "learning_rate": 7.030451211380306e-05, + "loss": 1.6422, + "step": 29185 + }, + { + "epoch": 0.6485777777777778, + "grad_norm": 1.677580714225769, + "learning_rate": 7.030006668148477e-05, + "loss": 1.9763, + "step": 29186 + }, + { + "epoch": 0.6486, + "grad_norm": 1.6863222122192383, + "learning_rate": 7.02956212491665e-05, + "loss": 1.8678, + "step": 29187 + }, + { + "epoch": 0.6486222222222222, + "grad_norm": 1.24997079372406, + "learning_rate": 7.029117581684819e-05, + "loss": 1.3101, + "step": 29188 + }, + { + "epoch": 0.6486444444444445, + "grad_norm": 1.3846231698989868, + "learning_rate": 7.02867303845299e-05, + "loss": 1.7414, + "step": 29189 + }, + { + "epoch": 0.6486666666666666, + "grad_norm": 1.944069504737854, + "learning_rate": 7.028228495221161e-05, + "loss": 2.381, + "step": 29190 + }, + { + "epoch": 0.6486888888888889, + "grad_norm": 1.7412798404693604, + "learning_rate": 7.027783951989332e-05, + "loss": 1.7451, + "step": 29191 + }, + { + "epoch": 0.6487111111111111, + "grad_norm": 1.9458290338516235, + "learning_rate": 7.027339408757501e-05, + "loss": 2.1176, + "step": 29192 + }, + { + "epoch": 0.6487333333333334, + "grad_norm": 1.7362381219863892, + "learning_rate": 7.026894865525672e-05, + "loss": 1.4444, + "step": 29193 + }, + { + "epoch": 0.6487555555555555, + "grad_norm": 1.9106467962265015, + "learning_rate": 7.026450322293843e-05, + "loss": 1.8456, + "step": 29194 + }, + { + "epoch": 0.6487777777777778, + "grad_norm": 1.5858999490737915, + "learning_rate": 7.026005779062014e-05, + "loss": 1.6961, + "step": 29195 + }, + { + "epoch": 0.6488, + "grad_norm": 1.915849208831787, + "learning_rate": 7.025561235830185e-05, + "loss": 1.7903, + "step": 29196 + }, + { + "epoch": 0.6488222222222222, + "grad_norm": 1.544602394104004, + "learning_rate": 7.025116692598356e-05, + "loss": 1.1969, + "step": 29197 + }, + { + "epoch": 0.6488444444444444, + "grad_norm": 1.9226042032241821, + "learning_rate": 7.024672149366526e-05, + "loss": 1.826, + "step": 29198 + }, + { + "epoch": 0.6488666666666667, + "grad_norm": 1.8153972625732422, + "learning_rate": 7.024227606134697e-05, + "loss": 1.5014, + "step": 29199 + }, + { + "epoch": 0.6488888888888888, + "grad_norm": 1.5163156986236572, + "learning_rate": 7.023783062902868e-05, + "loss": 1.1973, + "step": 29200 + }, + { + "epoch": 0.6489111111111111, + "grad_norm": 1.4240121841430664, + "learning_rate": 7.023338519671039e-05, + "loss": 2.5114, + "step": 29201 + }, + { + "epoch": 0.6489333333333334, + "grad_norm": 1.366631031036377, + "learning_rate": 7.022893976439208e-05, + "loss": 2.4166, + "step": 29202 + }, + { + "epoch": 0.6489555555555555, + "grad_norm": 1.802046298980713, + "learning_rate": 7.02244943320738e-05, + "loss": 3.01, + "step": 29203 + }, + { + "epoch": 0.6489777777777778, + "grad_norm": 1.4731266498565674, + "learning_rate": 7.02200488997555e-05, + "loss": 2.2481, + "step": 29204 + }, + { + "epoch": 0.649, + "grad_norm": 1.8114069700241089, + "learning_rate": 7.021560346743721e-05, + "loss": 2.2668, + "step": 29205 + }, + { + "epoch": 0.6490222222222222, + "grad_norm": 1.574411153793335, + "learning_rate": 7.021115803511892e-05, + "loss": 2.3854, + "step": 29206 + }, + { + "epoch": 0.6490444444444444, + "grad_norm": 1.6780638694763184, + "learning_rate": 7.020671260280063e-05, + "loss": 2.4121, + "step": 29207 + }, + { + "epoch": 0.6490666666666667, + "grad_norm": 1.9986330270767212, + "learning_rate": 7.020226717048233e-05, + "loss": 2.5218, + "step": 29208 + }, + { + "epoch": 0.6490888888888889, + "grad_norm": 1.8758296966552734, + "learning_rate": 7.019782173816404e-05, + "loss": 1.8691, + "step": 29209 + }, + { + "epoch": 0.6491111111111111, + "grad_norm": 1.7497591972351074, + "learning_rate": 7.019337630584575e-05, + "loss": 2.0199, + "step": 29210 + }, + { + "epoch": 0.6491333333333333, + "grad_norm": 2.0398762226104736, + "learning_rate": 7.018893087352746e-05, + "loss": 2.1921, + "step": 29211 + }, + { + "epoch": 0.6491555555555556, + "grad_norm": 1.2331268787384033, + "learning_rate": 7.018448544120915e-05, + "loss": 0.9171, + "step": 29212 + }, + { + "epoch": 0.6491777777777777, + "grad_norm": 1.537521243095398, + "learning_rate": 7.018004000889086e-05, + "loss": 1.5539, + "step": 29213 + }, + { + "epoch": 0.6492, + "grad_norm": 1.4166390895843506, + "learning_rate": 7.017559457657257e-05, + "loss": 1.7561, + "step": 29214 + }, + { + "epoch": 0.6492222222222223, + "grad_norm": 1.5925884246826172, + "learning_rate": 7.017114914425428e-05, + "loss": 1.7955, + "step": 29215 + }, + { + "epoch": 0.6492444444444444, + "grad_norm": 1.5275793075561523, + "learning_rate": 7.016670371193599e-05, + "loss": 1.911, + "step": 29216 + }, + { + "epoch": 0.6492666666666667, + "grad_norm": 2.0152485370635986, + "learning_rate": 7.01622582796177e-05, + "loss": 1.8886, + "step": 29217 + }, + { + "epoch": 0.6492888888888889, + "grad_norm": 1.747724175453186, + "learning_rate": 7.01578128472994e-05, + "loss": 2.0026, + "step": 29218 + }, + { + "epoch": 0.6493111111111111, + "grad_norm": 1.6655484437942505, + "learning_rate": 7.015336741498111e-05, + "loss": 2.2749, + "step": 29219 + }, + { + "epoch": 0.6493333333333333, + "grad_norm": 1.7254546880722046, + "learning_rate": 7.014892198266282e-05, + "loss": 1.9314, + "step": 29220 + }, + { + "epoch": 0.6493555555555556, + "grad_norm": 1.608613133430481, + "learning_rate": 7.014447655034453e-05, + "loss": 1.9615, + "step": 29221 + }, + { + "epoch": 0.6493777777777778, + "grad_norm": 1.9358397722244263, + "learning_rate": 7.014003111802622e-05, + "loss": 2.3336, + "step": 29222 + }, + { + "epoch": 0.6494, + "grad_norm": 1.6523768901824951, + "learning_rate": 7.013558568570793e-05, + "loss": 2.1089, + "step": 29223 + }, + { + "epoch": 0.6494222222222222, + "grad_norm": 2.0118861198425293, + "learning_rate": 7.013114025338965e-05, + "loss": 2.4732, + "step": 29224 + }, + { + "epoch": 0.6494444444444445, + "grad_norm": 1.8435945510864258, + "learning_rate": 7.012669482107135e-05, + "loss": 1.9642, + "step": 29225 + }, + { + "epoch": 0.6494666666666666, + "grad_norm": 1.514154076576233, + "learning_rate": 7.012224938875306e-05, + "loss": 1.6705, + "step": 29226 + }, + { + "epoch": 0.6494888888888889, + "grad_norm": 1.780166745185852, + "learning_rate": 7.011780395643477e-05, + "loss": 1.512, + "step": 29227 + }, + { + "epoch": 0.6495111111111112, + "grad_norm": 1.433593511581421, + "learning_rate": 7.011335852411648e-05, + "loss": 1.7877, + "step": 29228 + }, + { + "epoch": 0.6495333333333333, + "grad_norm": 1.7081892490386963, + "learning_rate": 7.010891309179818e-05, + "loss": 1.9008, + "step": 29229 + }, + { + "epoch": 0.6495555555555556, + "grad_norm": 1.8547171354293823, + "learning_rate": 7.010446765947989e-05, + "loss": 2.4596, + "step": 29230 + }, + { + "epoch": 0.6495777777777778, + "grad_norm": 1.7276898622512817, + "learning_rate": 7.01000222271616e-05, + "loss": 1.9145, + "step": 29231 + }, + { + "epoch": 0.6496, + "grad_norm": 2.084520101547241, + "learning_rate": 7.00955767948433e-05, + "loss": 2.3716, + "step": 29232 + }, + { + "epoch": 0.6496222222222222, + "grad_norm": 1.6999021768569946, + "learning_rate": 7.009113136252501e-05, + "loss": 1.7156, + "step": 29233 + }, + { + "epoch": 0.6496444444444445, + "grad_norm": 1.4727013111114502, + "learning_rate": 7.008668593020672e-05, + "loss": 1.7079, + "step": 29234 + }, + { + "epoch": 0.6496666666666666, + "grad_norm": 1.6207228899002075, + "learning_rate": 7.008224049788842e-05, + "loss": 1.4759, + "step": 29235 + }, + { + "epoch": 0.6496888888888889, + "grad_norm": 1.706359624862671, + "learning_rate": 7.007779506557013e-05, + "loss": 1.8351, + "step": 29236 + }, + { + "epoch": 0.6497111111111111, + "grad_norm": 1.4816555976867676, + "learning_rate": 7.007334963325184e-05, + "loss": 1.5791, + "step": 29237 + }, + { + "epoch": 0.6497333333333334, + "grad_norm": 1.713639259338379, + "learning_rate": 7.006890420093355e-05, + "loss": 1.7698, + "step": 29238 + }, + { + "epoch": 0.6497555555555555, + "grad_norm": 1.8787704706192017, + "learning_rate": 7.006445876861525e-05, + "loss": 1.9489, + "step": 29239 + }, + { + "epoch": 0.6497777777777778, + "grad_norm": 1.6118589639663696, + "learning_rate": 7.006001333629695e-05, + "loss": 1.2888, + "step": 29240 + }, + { + "epoch": 0.6498, + "grad_norm": 1.7838908433914185, + "learning_rate": 7.005556790397866e-05, + "loss": 2.0457, + "step": 29241 + }, + { + "epoch": 0.6498222222222222, + "grad_norm": 2.315001964569092, + "learning_rate": 7.005112247166037e-05, + "loss": 2.044, + "step": 29242 + }, + { + "epoch": 0.6498444444444444, + "grad_norm": 1.7782366275787354, + "learning_rate": 7.004667703934208e-05, + "loss": 1.7675, + "step": 29243 + }, + { + "epoch": 0.6498666666666667, + "grad_norm": 1.7710777521133423, + "learning_rate": 7.004223160702379e-05, + "loss": 1.7482, + "step": 29244 + }, + { + "epoch": 0.6498888888888888, + "grad_norm": 2.2236485481262207, + "learning_rate": 7.003778617470549e-05, + "loss": 1.7651, + "step": 29245 + }, + { + "epoch": 0.6499111111111111, + "grad_norm": 1.7950549125671387, + "learning_rate": 7.00333407423872e-05, + "loss": 1.6507, + "step": 29246 + }, + { + "epoch": 0.6499333333333334, + "grad_norm": 1.7493401765823364, + "learning_rate": 7.002889531006891e-05, + "loss": 1.5964, + "step": 29247 + }, + { + "epoch": 0.6499555555555555, + "grad_norm": 2.2070393562316895, + "learning_rate": 7.002444987775062e-05, + "loss": 1.8369, + "step": 29248 + }, + { + "epoch": 0.6499777777777778, + "grad_norm": 2.0941531658172607, + "learning_rate": 7.002000444543231e-05, + "loss": 2.0079, + "step": 29249 + }, + { + "epoch": 0.65, + "grad_norm": 2.0086233615875244, + "learning_rate": 7.001555901311402e-05, + "loss": 1.8565, + "step": 29250 + }, + { + "epoch": 0.6500222222222222, + "grad_norm": 1.5135737657546997, + "learning_rate": 7.001111358079573e-05, + "loss": 2.3019, + "step": 29251 + }, + { + "epoch": 0.6500444444444444, + "grad_norm": 1.2567074298858643, + "learning_rate": 7.000666814847744e-05, + "loss": 1.8777, + "step": 29252 + }, + { + "epoch": 0.6500666666666667, + "grad_norm": 0.17498348653316498, + "learning_rate": 7.000222271615915e-05, + "loss": 0.018, + "step": 29253 + }, + { + "epoch": 0.6500888888888889, + "grad_norm": 1.7214148044586182, + "learning_rate": 6.999777728384086e-05, + "loss": 2.4307, + "step": 29254 + }, + { + "epoch": 0.6501111111111111, + "grad_norm": 1.6908961534500122, + "learning_rate": 6.999333185152256e-05, + "loss": 2.3565, + "step": 29255 + }, + { + "epoch": 0.6501333333333333, + "grad_norm": 2.346676826477051, + "learning_rate": 6.998888641920427e-05, + "loss": 2.1449, + "step": 29256 + }, + { + "epoch": 0.6501555555555556, + "grad_norm": 1.7402980327606201, + "learning_rate": 6.998444098688598e-05, + "loss": 2.0284, + "step": 29257 + }, + { + "epoch": 0.6501777777777777, + "grad_norm": 1.891755223274231, + "learning_rate": 6.997999555456769e-05, + "loss": 2.0583, + "step": 29258 + }, + { + "epoch": 0.6502, + "grad_norm": 1.889899730682373, + "learning_rate": 6.997555012224938e-05, + "loss": 1.9627, + "step": 29259 + }, + { + "epoch": 0.6502222222222223, + "grad_norm": 1.6953115463256836, + "learning_rate": 6.997110468993109e-05, + "loss": 2.3591, + "step": 29260 + }, + { + "epoch": 0.6502444444444444, + "grad_norm": 1.7562839984893799, + "learning_rate": 6.996665925761282e-05, + "loss": 2.2369, + "step": 29261 + }, + { + "epoch": 0.6502666666666667, + "grad_norm": 1.4049495458602905, + "learning_rate": 6.996221382529451e-05, + "loss": 2.1355, + "step": 29262 + }, + { + "epoch": 0.6502888888888889, + "grad_norm": 1.5074312686920166, + "learning_rate": 6.995776839297622e-05, + "loss": 2.13, + "step": 29263 + }, + { + "epoch": 0.6503111111111111, + "grad_norm": 1.7136636972427368, + "learning_rate": 6.995332296065793e-05, + "loss": 1.8946, + "step": 29264 + }, + { + "epoch": 0.6503333333333333, + "grad_norm": 1.636090636253357, + "learning_rate": 6.994887752833963e-05, + "loss": 2.1277, + "step": 29265 + }, + { + "epoch": 0.6503555555555556, + "grad_norm": 2.028170347213745, + "learning_rate": 6.994443209602134e-05, + "loss": 2.0288, + "step": 29266 + }, + { + "epoch": 0.6503777777777778, + "grad_norm": 1.5597490072250366, + "learning_rate": 6.993998666370305e-05, + "loss": 1.7009, + "step": 29267 + }, + { + "epoch": 0.6504, + "grad_norm": 1.6211811304092407, + "learning_rate": 6.993554123138476e-05, + "loss": 2.1396, + "step": 29268 + }, + { + "epoch": 0.6504222222222222, + "grad_norm": 1.767333745956421, + "learning_rate": 6.993109579906647e-05, + "loss": 2.4782, + "step": 29269 + }, + { + "epoch": 0.6504444444444445, + "grad_norm": 1.4722760915756226, + "learning_rate": 6.992665036674818e-05, + "loss": 1.9787, + "step": 29270 + }, + { + "epoch": 0.6504666666666666, + "grad_norm": 1.608223557472229, + "learning_rate": 6.992220493442989e-05, + "loss": 2.326, + "step": 29271 + }, + { + "epoch": 0.6504888888888889, + "grad_norm": 1.988062858581543, + "learning_rate": 6.991775950211158e-05, + "loss": 2.6228, + "step": 29272 + }, + { + "epoch": 0.6505111111111112, + "grad_norm": 1.5838956832885742, + "learning_rate": 6.991331406979329e-05, + "loss": 1.75, + "step": 29273 + }, + { + "epoch": 0.6505333333333333, + "grad_norm": 1.4592198133468628, + "learning_rate": 6.9908868637475e-05, + "loss": 1.7813, + "step": 29274 + }, + { + "epoch": 0.6505555555555556, + "grad_norm": 1.73031485080719, + "learning_rate": 6.99044232051567e-05, + "loss": 2.2714, + "step": 29275 + }, + { + "epoch": 0.6505777777777778, + "grad_norm": 1.753232717514038, + "learning_rate": 6.98999777728384e-05, + "loss": 1.9807, + "step": 29276 + }, + { + "epoch": 0.6506, + "grad_norm": 1.6164661645889282, + "learning_rate": 6.989553234052012e-05, + "loss": 1.4149, + "step": 29277 + }, + { + "epoch": 0.6506222222222222, + "grad_norm": 1.8052868843078613, + "learning_rate": 6.989108690820183e-05, + "loss": 1.9208, + "step": 29278 + }, + { + "epoch": 0.6506444444444445, + "grad_norm": 1.4451102018356323, + "learning_rate": 6.988664147588353e-05, + "loss": 2.1409, + "step": 29279 + }, + { + "epoch": 0.6506666666666666, + "grad_norm": 1.5738019943237305, + "learning_rate": 6.988219604356524e-05, + "loss": 1.9391, + "step": 29280 + }, + { + "epoch": 0.6506888888888889, + "grad_norm": 1.637847661972046, + "learning_rate": 6.987775061124695e-05, + "loss": 1.9187, + "step": 29281 + }, + { + "epoch": 0.6507111111111111, + "grad_norm": 1.3089234828948975, + "learning_rate": 6.987330517892865e-05, + "loss": 0.885, + "step": 29282 + }, + { + "epoch": 0.6507333333333334, + "grad_norm": 1.7746233940124512, + "learning_rate": 6.986885974661036e-05, + "loss": 1.9604, + "step": 29283 + }, + { + "epoch": 0.6507555555555555, + "grad_norm": 1.536644458770752, + "learning_rate": 6.986441431429207e-05, + "loss": 1.9093, + "step": 29284 + }, + { + "epoch": 0.6507777777777778, + "grad_norm": 1.560268759727478, + "learning_rate": 6.985996888197378e-05, + "loss": 1.367, + "step": 29285 + }, + { + "epoch": 0.6508, + "grad_norm": 1.840164065361023, + "learning_rate": 6.985552344965548e-05, + "loss": 1.9394, + "step": 29286 + }, + { + "epoch": 0.6508222222222222, + "grad_norm": 1.6413569450378418, + "learning_rate": 6.985107801733718e-05, + "loss": 1.4838, + "step": 29287 + }, + { + "epoch": 0.6508444444444444, + "grad_norm": 1.576214075088501, + "learning_rate": 6.98466325850189e-05, + "loss": 1.8984, + "step": 29288 + }, + { + "epoch": 0.6508666666666667, + "grad_norm": 1.3859788179397583, + "learning_rate": 6.98421871527006e-05, + "loss": 1.4899, + "step": 29289 + }, + { + "epoch": 0.6508888888888889, + "grad_norm": 1.5814870595932007, + "learning_rate": 6.983774172038231e-05, + "loss": 1.4805, + "step": 29290 + }, + { + "epoch": 0.6509111111111111, + "grad_norm": 1.8000617027282715, + "learning_rate": 6.983329628806402e-05, + "loss": 1.8311, + "step": 29291 + }, + { + "epoch": 0.6509333333333334, + "grad_norm": 1.9427064657211304, + "learning_rate": 6.982885085574572e-05, + "loss": 1.9634, + "step": 29292 + }, + { + "epoch": 0.6509555555555555, + "grad_norm": 1.5224941968917847, + "learning_rate": 6.982440542342743e-05, + "loss": 1.4516, + "step": 29293 + }, + { + "epoch": 0.6509777777777778, + "grad_norm": 1.658179521560669, + "learning_rate": 6.981995999110914e-05, + "loss": 1.682, + "step": 29294 + }, + { + "epoch": 0.651, + "grad_norm": 1.6062688827514648, + "learning_rate": 6.981551455879085e-05, + "loss": 1.7161, + "step": 29295 + }, + { + "epoch": 0.6510222222222222, + "grad_norm": 2.0045063495635986, + "learning_rate": 6.981106912647254e-05, + "loss": 2.073, + "step": 29296 + }, + { + "epoch": 0.6510444444444444, + "grad_norm": 1.722097396850586, + "learning_rate": 6.980662369415425e-05, + "loss": 1.7127, + "step": 29297 + }, + { + "epoch": 0.6510666666666667, + "grad_norm": 1.8101304769515991, + "learning_rate": 6.980217826183598e-05, + "loss": 1.6494, + "step": 29298 + }, + { + "epoch": 0.6510888888888889, + "grad_norm": 1.9141604900360107, + "learning_rate": 6.979773282951767e-05, + "loss": 1.4045, + "step": 29299 + }, + { + "epoch": 0.6511111111111111, + "grad_norm": 2.0712220668792725, + "learning_rate": 6.979328739719938e-05, + "loss": 1.5308, + "step": 29300 + }, + { + "epoch": 0.6511333333333333, + "grad_norm": 1.2344119548797607, + "learning_rate": 6.978884196488109e-05, + "loss": 1.1758, + "step": 29301 + }, + { + "epoch": 0.6511555555555556, + "grad_norm": 1.4297155141830444, + "learning_rate": 6.978439653256279e-05, + "loss": 2.1887, + "step": 29302 + }, + { + "epoch": 0.6511777777777777, + "grad_norm": 0.8754487633705139, + "learning_rate": 6.97799511002445e-05, + "loss": 0.8814, + "step": 29303 + }, + { + "epoch": 0.6512, + "grad_norm": 1.4717179536819458, + "learning_rate": 6.977550566792621e-05, + "loss": 2.1349, + "step": 29304 + }, + { + "epoch": 0.6512222222222223, + "grad_norm": 1.4106658697128296, + "learning_rate": 6.977106023560792e-05, + "loss": 2.1573, + "step": 29305 + }, + { + "epoch": 0.6512444444444444, + "grad_norm": 1.9039552211761475, + "learning_rate": 6.976661480328963e-05, + "loss": 1.8249, + "step": 29306 + }, + { + "epoch": 0.6512666666666667, + "grad_norm": 1.3200098276138306, + "learning_rate": 6.976216937097134e-05, + "loss": 1.7248, + "step": 29307 + }, + { + "epoch": 0.6512888888888889, + "grad_norm": 1.861236810684204, + "learning_rate": 6.975772393865305e-05, + "loss": 2.4081, + "step": 29308 + }, + { + "epoch": 0.6513111111111111, + "grad_norm": 1.6973249912261963, + "learning_rate": 6.975327850633474e-05, + "loss": 1.9996, + "step": 29309 + }, + { + "epoch": 0.6513333333333333, + "grad_norm": 1.8404150009155273, + "learning_rate": 6.974883307401645e-05, + "loss": 2.4652, + "step": 29310 + }, + { + "epoch": 0.6513555555555556, + "grad_norm": 1.7387663125991821, + "learning_rate": 6.974438764169816e-05, + "loss": 1.4416, + "step": 29311 + }, + { + "epoch": 0.6513777777777778, + "grad_norm": 1.6357557773590088, + "learning_rate": 6.973994220937986e-05, + "loss": 1.6109, + "step": 29312 + }, + { + "epoch": 0.6514, + "grad_norm": 1.6437475681304932, + "learning_rate": 6.973549677706157e-05, + "loss": 1.9761, + "step": 29313 + }, + { + "epoch": 0.6514222222222222, + "grad_norm": 1.6953421831130981, + "learning_rate": 6.973105134474328e-05, + "loss": 2.1356, + "step": 29314 + }, + { + "epoch": 0.6514444444444445, + "grad_norm": 1.6046780347824097, + "learning_rate": 6.972660591242499e-05, + "loss": 2.1672, + "step": 29315 + }, + { + "epoch": 0.6514666666666666, + "grad_norm": 1.550767421722412, + "learning_rate": 6.97221604801067e-05, + "loss": 1.653, + "step": 29316 + }, + { + "epoch": 0.6514888888888889, + "grad_norm": 1.5348842144012451, + "learning_rate": 6.97177150477884e-05, + "loss": 1.8861, + "step": 29317 + }, + { + "epoch": 0.6515111111111112, + "grad_norm": 1.7914471626281738, + "learning_rate": 6.971326961547012e-05, + "loss": 2.2415, + "step": 29318 + }, + { + "epoch": 0.6515333333333333, + "grad_norm": 1.428574800491333, + "learning_rate": 6.970882418315181e-05, + "loss": 1.4008, + "step": 29319 + }, + { + "epoch": 0.6515555555555556, + "grad_norm": 1.382114052772522, + "learning_rate": 6.970437875083352e-05, + "loss": 1.7083, + "step": 29320 + }, + { + "epoch": 0.6515777777777778, + "grad_norm": 2.0291502475738525, + "learning_rate": 6.969993331851523e-05, + "loss": 1.5898, + "step": 29321 + }, + { + "epoch": 0.6516, + "grad_norm": 1.5214177370071411, + "learning_rate": 6.969548788619693e-05, + "loss": 1.9985, + "step": 29322 + }, + { + "epoch": 0.6516222222222222, + "grad_norm": 1.516836166381836, + "learning_rate": 6.969104245387864e-05, + "loss": 1.6018, + "step": 29323 + }, + { + "epoch": 0.6516444444444445, + "grad_norm": 1.7093638181686401, + "learning_rate": 6.968659702156035e-05, + "loss": 2.0104, + "step": 29324 + }, + { + "epoch": 0.6516666666666666, + "grad_norm": 1.858313798904419, + "learning_rate": 6.968215158924206e-05, + "loss": 2.0254, + "step": 29325 + }, + { + "epoch": 0.6516888888888889, + "grad_norm": 2.083031177520752, + "learning_rate": 6.967770615692377e-05, + "loss": 2.307, + "step": 29326 + }, + { + "epoch": 0.6517111111111111, + "grad_norm": 1.7572811841964722, + "learning_rate": 6.967326072460547e-05, + "loss": 1.9557, + "step": 29327 + }, + { + "epoch": 0.6517333333333334, + "grad_norm": 1.7568235397338867, + "learning_rate": 6.966881529228718e-05, + "loss": 2.1862, + "step": 29328 + }, + { + "epoch": 0.6517555555555555, + "grad_norm": 1.9669950008392334, + "learning_rate": 6.966436985996888e-05, + "loss": 1.9378, + "step": 29329 + }, + { + "epoch": 0.6517777777777778, + "grad_norm": 1.1136808395385742, + "learning_rate": 6.965992442765059e-05, + "loss": 0.8185, + "step": 29330 + }, + { + "epoch": 0.6518, + "grad_norm": 1.4626590013504028, + "learning_rate": 6.96554789953323e-05, + "loss": 1.6177, + "step": 29331 + }, + { + "epoch": 0.6518222222222222, + "grad_norm": 1.4172871112823486, + "learning_rate": 6.9651033563014e-05, + "loss": 1.1709, + "step": 29332 + }, + { + "epoch": 0.6518444444444444, + "grad_norm": 1.4712958335876465, + "learning_rate": 6.96465881306957e-05, + "loss": 1.5426, + "step": 29333 + }, + { + "epoch": 0.6518666666666667, + "grad_norm": 1.5287601947784424, + "learning_rate": 6.964214269837741e-05, + "loss": 1.3882, + "step": 29334 + }, + { + "epoch": 0.6518888888888889, + "grad_norm": 1.9561853408813477, + "learning_rate": 6.963769726605914e-05, + "loss": 1.9832, + "step": 29335 + }, + { + "epoch": 0.6519111111111111, + "grad_norm": 1.5903472900390625, + "learning_rate": 6.963325183374083e-05, + "loss": 1.5891, + "step": 29336 + }, + { + "epoch": 0.6519333333333334, + "grad_norm": 1.7223854064941406, + "learning_rate": 6.962880640142254e-05, + "loss": 2.1605, + "step": 29337 + }, + { + "epoch": 0.6519555555555555, + "grad_norm": 1.8184937238693237, + "learning_rate": 6.962436096910425e-05, + "loss": 1.7764, + "step": 29338 + }, + { + "epoch": 0.6519777777777778, + "grad_norm": 2.029592752456665, + "learning_rate": 6.961991553678595e-05, + "loss": 2.023, + "step": 29339 + }, + { + "epoch": 0.652, + "grad_norm": 1.8846449851989746, + "learning_rate": 6.961547010446766e-05, + "loss": 1.8893, + "step": 29340 + }, + { + "epoch": 0.6520222222222222, + "grad_norm": 1.637250542640686, + "learning_rate": 6.961102467214937e-05, + "loss": 1.5786, + "step": 29341 + }, + { + "epoch": 0.6520444444444444, + "grad_norm": 1.8435598611831665, + "learning_rate": 6.960657923983108e-05, + "loss": 1.6511, + "step": 29342 + }, + { + "epoch": 0.6520666666666667, + "grad_norm": 1.94805109500885, + "learning_rate": 6.960213380751279e-05, + "loss": 1.8842, + "step": 29343 + }, + { + "epoch": 0.6520888888888889, + "grad_norm": 1.7755887508392334, + "learning_rate": 6.95976883751945e-05, + "loss": 2.0506, + "step": 29344 + }, + { + "epoch": 0.6521111111111111, + "grad_norm": 2.023674964904785, + "learning_rate": 6.959324294287621e-05, + "loss": 2.1951, + "step": 29345 + }, + { + "epoch": 0.6521333333333333, + "grad_norm": 1.8801541328430176, + "learning_rate": 6.95887975105579e-05, + "loss": 2.0137, + "step": 29346 + }, + { + "epoch": 0.6521555555555556, + "grad_norm": 1.7253317832946777, + "learning_rate": 6.958435207823961e-05, + "loss": 1.9039, + "step": 29347 + }, + { + "epoch": 0.6521777777777777, + "grad_norm": 1.8239405155181885, + "learning_rate": 6.957990664592132e-05, + "loss": 1.5129, + "step": 29348 + }, + { + "epoch": 0.6522, + "grad_norm": 1.4606218338012695, + "learning_rate": 6.957546121360302e-05, + "loss": 0.9085, + "step": 29349 + }, + { + "epoch": 0.6522222222222223, + "grad_norm": 2.2330949306488037, + "learning_rate": 6.957101578128473e-05, + "loss": 1.3404, + "step": 29350 + }, + { + "epoch": 0.6522444444444444, + "grad_norm": 1.858919382095337, + "learning_rate": 6.956657034896644e-05, + "loss": 2.718, + "step": 29351 + }, + { + "epoch": 0.6522666666666667, + "grad_norm": 0.23877856135368347, + "learning_rate": 6.956212491664815e-05, + "loss": 0.0166, + "step": 29352 + }, + { + "epoch": 0.6522888888888889, + "grad_norm": 0.24272207915782928, + "learning_rate": 6.955767948432986e-05, + "loss": 0.0162, + "step": 29353 + }, + { + "epoch": 0.6523111111111111, + "grad_norm": 1.4344298839569092, + "learning_rate": 6.955323405201157e-05, + "loss": 2.3705, + "step": 29354 + }, + { + "epoch": 0.6523333333333333, + "grad_norm": 1.3044419288635254, + "learning_rate": 6.954878861969328e-05, + "loss": 2.1032, + "step": 29355 + }, + { + "epoch": 0.6523555555555556, + "grad_norm": 1.8512741327285767, + "learning_rate": 6.954434318737497e-05, + "loss": 2.3973, + "step": 29356 + }, + { + "epoch": 0.6523777777777777, + "grad_norm": 1.987452745437622, + "learning_rate": 6.953989775505668e-05, + "loss": 1.2002, + "step": 29357 + }, + { + "epoch": 0.6524, + "grad_norm": 1.310753583908081, + "learning_rate": 6.953545232273839e-05, + "loss": 1.9942, + "step": 29358 + }, + { + "epoch": 0.6524222222222222, + "grad_norm": 1.4221051931381226, + "learning_rate": 6.953100689042009e-05, + "loss": 1.8441, + "step": 29359 + }, + { + "epoch": 0.6524444444444445, + "grad_norm": 1.6217492818832397, + "learning_rate": 6.95265614581018e-05, + "loss": 2.3557, + "step": 29360 + }, + { + "epoch": 0.6524666666666666, + "grad_norm": 1.9682235717773438, + "learning_rate": 6.952211602578351e-05, + "loss": 1.7465, + "step": 29361 + }, + { + "epoch": 0.6524888888888889, + "grad_norm": 1.491863489151001, + "learning_rate": 6.951767059346522e-05, + "loss": 1.6969, + "step": 29362 + }, + { + "epoch": 0.6525111111111112, + "grad_norm": 1.5127167701721191, + "learning_rate": 6.951322516114693e-05, + "loss": 2.0036, + "step": 29363 + }, + { + "epoch": 0.6525333333333333, + "grad_norm": 1.4923393726348877, + "learning_rate": 6.950877972882864e-05, + "loss": 1.715, + "step": 29364 + }, + { + "epoch": 0.6525555555555556, + "grad_norm": 1.6749690771102905, + "learning_rate": 6.950433429651035e-05, + "loss": 2.1604, + "step": 29365 + }, + { + "epoch": 0.6525777777777778, + "grad_norm": 1.8960293531417847, + "learning_rate": 6.949988886419204e-05, + "loss": 1.997, + "step": 29366 + }, + { + "epoch": 0.6526, + "grad_norm": 1.4093700647354126, + "learning_rate": 6.949544343187375e-05, + "loss": 2.076, + "step": 29367 + }, + { + "epoch": 0.6526222222222222, + "grad_norm": 1.5399001836776733, + "learning_rate": 6.949099799955546e-05, + "loss": 1.9729, + "step": 29368 + }, + { + "epoch": 0.6526444444444445, + "grad_norm": 1.6583781242370605, + "learning_rate": 6.948655256723716e-05, + "loss": 1.9131, + "step": 29369 + }, + { + "epoch": 0.6526666666666666, + "grad_norm": 1.5324383974075317, + "learning_rate": 6.948210713491887e-05, + "loss": 1.7619, + "step": 29370 + }, + { + "epoch": 0.6526888888888889, + "grad_norm": 1.7012816667556763, + "learning_rate": 6.947766170260059e-05, + "loss": 2.1841, + "step": 29371 + }, + { + "epoch": 0.6527111111111111, + "grad_norm": 1.626502275466919, + "learning_rate": 6.94732162702823e-05, + "loss": 1.9042, + "step": 29372 + }, + { + "epoch": 0.6527333333333334, + "grad_norm": 1.9852179288864136, + "learning_rate": 6.9468770837964e-05, + "loss": 1.9659, + "step": 29373 + }, + { + "epoch": 0.6527555555555555, + "grad_norm": 1.7495781183242798, + "learning_rate": 6.94643254056457e-05, + "loss": 2.356, + "step": 29374 + }, + { + "epoch": 0.6527777777777778, + "grad_norm": 1.5500662326812744, + "learning_rate": 6.945987997332741e-05, + "loss": 1.9053, + "step": 29375 + }, + { + "epoch": 0.6528, + "grad_norm": 1.7778003215789795, + "learning_rate": 6.945543454100911e-05, + "loss": 1.7993, + "step": 29376 + }, + { + "epoch": 0.6528222222222222, + "grad_norm": 1.6801308393478394, + "learning_rate": 6.945098910869082e-05, + "loss": 1.7258, + "step": 29377 + }, + { + "epoch": 0.6528444444444444, + "grad_norm": 1.6005624532699585, + "learning_rate": 6.944654367637253e-05, + "loss": 1.1401, + "step": 29378 + }, + { + "epoch": 0.6528666666666667, + "grad_norm": 2.02657151222229, + "learning_rate": 6.944209824405423e-05, + "loss": 1.6297, + "step": 29379 + }, + { + "epoch": 0.6528888888888889, + "grad_norm": 1.8680578470230103, + "learning_rate": 6.943765281173595e-05, + "loss": 2.0188, + "step": 29380 + }, + { + "epoch": 0.6529111111111111, + "grad_norm": 1.6274778842926025, + "learning_rate": 6.943320737941766e-05, + "loss": 1.6062, + "step": 29381 + }, + { + "epoch": 0.6529333333333334, + "grad_norm": 1.5393065214157104, + "learning_rate": 6.942876194709937e-05, + "loss": 0.8731, + "step": 29382 + }, + { + "epoch": 0.6529555555555555, + "grad_norm": 1.7478028535842896, + "learning_rate": 6.942431651478106e-05, + "loss": 2.1741, + "step": 29383 + }, + { + "epoch": 0.6529777777777778, + "grad_norm": 1.7424736022949219, + "learning_rate": 6.941987108246277e-05, + "loss": 2.0114, + "step": 29384 + }, + { + "epoch": 0.653, + "grad_norm": 1.6595288515090942, + "learning_rate": 6.941542565014448e-05, + "loss": 1.7124, + "step": 29385 + }, + { + "epoch": 0.6530222222222222, + "grad_norm": 1.862471103668213, + "learning_rate": 6.941098021782618e-05, + "loss": 1.3923, + "step": 29386 + }, + { + "epoch": 0.6530444444444444, + "grad_norm": 1.7211300134658813, + "learning_rate": 6.940653478550789e-05, + "loss": 2.1222, + "step": 29387 + }, + { + "epoch": 0.6530666666666667, + "grad_norm": 1.7551764249801636, + "learning_rate": 6.94020893531896e-05, + "loss": 2.3051, + "step": 29388 + }, + { + "epoch": 0.6530888888888889, + "grad_norm": 2.34627628326416, + "learning_rate": 6.939764392087131e-05, + "loss": 2.3686, + "step": 29389 + }, + { + "epoch": 0.6531111111111111, + "grad_norm": 1.6594637632369995, + "learning_rate": 6.939319848855302e-05, + "loss": 2.0798, + "step": 29390 + }, + { + "epoch": 0.6531333333333333, + "grad_norm": 1.7297558784484863, + "learning_rate": 6.938875305623473e-05, + "loss": 1.7862, + "step": 29391 + }, + { + "epoch": 0.6531555555555556, + "grad_norm": 1.5960259437561035, + "learning_rate": 6.938430762391644e-05, + "loss": 1.7059, + "step": 29392 + }, + { + "epoch": 0.6531777777777777, + "grad_norm": 1.6867831945419312, + "learning_rate": 6.937986219159813e-05, + "loss": 1.8962, + "step": 29393 + }, + { + "epoch": 0.6532, + "grad_norm": 1.8363149166107178, + "learning_rate": 6.937541675927984e-05, + "loss": 1.6911, + "step": 29394 + }, + { + "epoch": 0.6532222222222223, + "grad_norm": 1.8267245292663574, + "learning_rate": 6.937097132696155e-05, + "loss": 1.9225, + "step": 29395 + }, + { + "epoch": 0.6532444444444444, + "grad_norm": 1.8893781900405884, + "learning_rate": 6.936652589464325e-05, + "loss": 1.8525, + "step": 29396 + }, + { + "epoch": 0.6532666666666667, + "grad_norm": 1.3311126232147217, + "learning_rate": 6.936208046232496e-05, + "loss": 1.0012, + "step": 29397 + }, + { + "epoch": 0.6532888888888889, + "grad_norm": 1.7276990413665771, + "learning_rate": 6.935763503000667e-05, + "loss": 1.8642, + "step": 29398 + }, + { + "epoch": 0.6533111111111111, + "grad_norm": 1.8954962491989136, + "learning_rate": 6.935318959768838e-05, + "loss": 1.9947, + "step": 29399 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 2.5243430137634277, + "learning_rate": 6.934874416537009e-05, + "loss": 1.7075, + "step": 29400 + }, + { + "epoch": 0.6533555555555556, + "grad_norm": 1.5330208539962769, + "learning_rate": 6.93442987330518e-05, + "loss": 2.5989, + "step": 29401 + }, + { + "epoch": 0.6533777777777777, + "grad_norm": 1.3771998882293701, + "learning_rate": 6.93398533007335e-05, + "loss": 2.0617, + "step": 29402 + }, + { + "epoch": 0.6534, + "grad_norm": 1.2721847295761108, + "learning_rate": 6.93354078684152e-05, + "loss": 1.001, + "step": 29403 + }, + { + "epoch": 0.6534222222222222, + "grad_norm": 1.885785698890686, + "learning_rate": 6.933096243609691e-05, + "loss": 2.2274, + "step": 29404 + }, + { + "epoch": 0.6534444444444445, + "grad_norm": 1.6090855598449707, + "learning_rate": 6.932651700377862e-05, + "loss": 2.017, + "step": 29405 + }, + { + "epoch": 0.6534666666666666, + "grad_norm": 1.3860564231872559, + "learning_rate": 6.932207157146032e-05, + "loss": 2.1833, + "step": 29406 + }, + { + "epoch": 0.6534888888888889, + "grad_norm": 1.5746746063232422, + "learning_rate": 6.931762613914203e-05, + "loss": 1.9449, + "step": 29407 + }, + { + "epoch": 0.6535111111111112, + "grad_norm": 1.605556607246399, + "learning_rate": 6.931318070682375e-05, + "loss": 2.3436, + "step": 29408 + }, + { + "epoch": 0.6535333333333333, + "grad_norm": 1.6281412839889526, + "learning_rate": 6.930873527450546e-05, + "loss": 1.9975, + "step": 29409 + }, + { + "epoch": 0.6535555555555556, + "grad_norm": 1.5633022785186768, + "learning_rate": 6.930428984218716e-05, + "loss": 1.875, + "step": 29410 + }, + { + "epoch": 0.6535777777777778, + "grad_norm": 1.6272820234298706, + "learning_rate": 6.929984440986887e-05, + "loss": 2.2241, + "step": 29411 + }, + { + "epoch": 0.6536, + "grad_norm": 1.4241611957550049, + "learning_rate": 6.929539897755058e-05, + "loss": 1.9127, + "step": 29412 + }, + { + "epoch": 0.6536222222222222, + "grad_norm": 1.8110153675079346, + "learning_rate": 6.929095354523227e-05, + "loss": 2.2737, + "step": 29413 + }, + { + "epoch": 0.6536444444444445, + "grad_norm": 1.5204417705535889, + "learning_rate": 6.928650811291398e-05, + "loss": 1.8794, + "step": 29414 + }, + { + "epoch": 0.6536666666666666, + "grad_norm": 1.456246256828308, + "learning_rate": 6.928206268059569e-05, + "loss": 2.1589, + "step": 29415 + }, + { + "epoch": 0.6536888888888889, + "grad_norm": 1.581737756729126, + "learning_rate": 6.927761724827739e-05, + "loss": 2.0204, + "step": 29416 + }, + { + "epoch": 0.6537111111111111, + "grad_norm": 1.9131207466125488, + "learning_rate": 6.927317181595911e-05, + "loss": 2.2468, + "step": 29417 + }, + { + "epoch": 0.6537333333333334, + "grad_norm": 1.531806468963623, + "learning_rate": 6.926872638364082e-05, + "loss": 1.9102, + "step": 29418 + }, + { + "epoch": 0.6537555555555555, + "grad_norm": 1.7774204015731812, + "learning_rate": 6.926428095132253e-05, + "loss": 2.2159, + "step": 29419 + }, + { + "epoch": 0.6537777777777778, + "grad_norm": 1.632561206817627, + "learning_rate": 6.925983551900423e-05, + "loss": 1.7789, + "step": 29420 + }, + { + "epoch": 0.6538, + "grad_norm": 1.4562997817993164, + "learning_rate": 6.925539008668593e-05, + "loss": 1.7284, + "step": 29421 + }, + { + "epoch": 0.6538222222222222, + "grad_norm": 1.8392057418823242, + "learning_rate": 6.925094465436764e-05, + "loss": 1.7259, + "step": 29422 + }, + { + "epoch": 0.6538444444444445, + "grad_norm": 1.8033900260925293, + "learning_rate": 6.924649922204934e-05, + "loss": 2.0409, + "step": 29423 + }, + { + "epoch": 0.6538666666666667, + "grad_norm": 1.638187289237976, + "learning_rate": 6.924205378973105e-05, + "loss": 1.9508, + "step": 29424 + }, + { + "epoch": 0.6538888888888889, + "grad_norm": 2.0364911556243896, + "learning_rate": 6.923760835741276e-05, + "loss": 2.0946, + "step": 29425 + }, + { + "epoch": 0.6539111111111111, + "grad_norm": 1.5568419694900513, + "learning_rate": 6.923316292509447e-05, + "loss": 1.9651, + "step": 29426 + }, + { + "epoch": 0.6539333333333334, + "grad_norm": 1.6838810443878174, + "learning_rate": 6.922871749277618e-05, + "loss": 2.1729, + "step": 29427 + }, + { + "epoch": 0.6539555555555555, + "grad_norm": 1.4847619533538818, + "learning_rate": 6.922427206045789e-05, + "loss": 1.8529, + "step": 29428 + }, + { + "epoch": 0.6539777777777778, + "grad_norm": 1.2320556640625, + "learning_rate": 6.92198266281396e-05, + "loss": 0.7937, + "step": 29429 + }, + { + "epoch": 0.654, + "grad_norm": 1.8546630144119263, + "learning_rate": 6.92153811958213e-05, + "loss": 1.6273, + "step": 29430 + }, + { + "epoch": 0.6540222222222222, + "grad_norm": 1.7034964561462402, + "learning_rate": 6.9210935763503e-05, + "loss": 1.9266, + "step": 29431 + }, + { + "epoch": 0.6540444444444444, + "grad_norm": 1.6947805881500244, + "learning_rate": 6.920649033118471e-05, + "loss": 1.9812, + "step": 29432 + }, + { + "epoch": 0.6540666666666667, + "grad_norm": 1.985138177871704, + "learning_rate": 6.920204489886641e-05, + "loss": 2.1235, + "step": 29433 + }, + { + "epoch": 0.6540888888888889, + "grad_norm": 1.9874866008758545, + "learning_rate": 6.919759946654812e-05, + "loss": 1.7606, + "step": 29434 + }, + { + "epoch": 0.6541111111111111, + "grad_norm": 1.6188753843307495, + "learning_rate": 6.919315403422983e-05, + "loss": 1.6674, + "step": 29435 + }, + { + "epoch": 0.6541333333333333, + "grad_norm": 1.4949100017547607, + "learning_rate": 6.918870860191154e-05, + "loss": 1.8744, + "step": 29436 + }, + { + "epoch": 0.6541555555555556, + "grad_norm": 1.6058498620986938, + "learning_rate": 6.918426316959325e-05, + "loss": 1.6328, + "step": 29437 + }, + { + "epoch": 0.6541777777777777, + "grad_norm": 1.6887714862823486, + "learning_rate": 6.917981773727496e-05, + "loss": 1.6641, + "step": 29438 + }, + { + "epoch": 0.6542, + "grad_norm": 1.9345225095748901, + "learning_rate": 6.917537230495667e-05, + "loss": 1.7995, + "step": 29439 + }, + { + "epoch": 0.6542222222222223, + "grad_norm": 1.9309325218200684, + "learning_rate": 6.917092687263836e-05, + "loss": 1.731, + "step": 29440 + }, + { + "epoch": 0.6542444444444444, + "grad_norm": 1.5484614372253418, + "learning_rate": 6.916648144032007e-05, + "loss": 1.8296, + "step": 29441 + }, + { + "epoch": 0.6542666666666667, + "grad_norm": 1.6554497480392456, + "learning_rate": 6.916203600800178e-05, + "loss": 1.7379, + "step": 29442 + }, + { + "epoch": 0.6542888888888889, + "grad_norm": 2.0249950885772705, + "learning_rate": 6.915759057568348e-05, + "loss": 1.827, + "step": 29443 + }, + { + "epoch": 0.6543111111111111, + "grad_norm": 1.835463523864746, + "learning_rate": 6.915314514336519e-05, + "loss": 1.9925, + "step": 29444 + }, + { + "epoch": 0.6543333333333333, + "grad_norm": 2.0246810913085938, + "learning_rate": 6.914869971104691e-05, + "loss": 2.0876, + "step": 29445 + }, + { + "epoch": 0.6543555555555556, + "grad_norm": 2.1907918453216553, + "learning_rate": 6.914425427872862e-05, + "loss": 1.8498, + "step": 29446 + }, + { + "epoch": 0.6543777777777777, + "grad_norm": 2.0203280448913574, + "learning_rate": 6.913980884641032e-05, + "loss": 1.9762, + "step": 29447 + }, + { + "epoch": 0.6544, + "grad_norm": 2.1965038776397705, + "learning_rate": 6.913536341409203e-05, + "loss": 2.0273, + "step": 29448 + }, + { + "epoch": 0.6544222222222222, + "grad_norm": 2.1303956508636475, + "learning_rate": 6.913091798177374e-05, + "loss": 1.9217, + "step": 29449 + }, + { + "epoch": 0.6544444444444445, + "grad_norm": 1.9999853372573853, + "learning_rate": 6.912647254945543e-05, + "loss": 1.485, + "step": 29450 + }, + { + "epoch": 0.6544666666666666, + "grad_norm": 1.4747973680496216, + "learning_rate": 6.912202711713714e-05, + "loss": 2.2189, + "step": 29451 + }, + { + "epoch": 0.6544888888888889, + "grad_norm": 1.6959315538406372, + "learning_rate": 6.911758168481885e-05, + "loss": 2.6387, + "step": 29452 + }, + { + "epoch": 0.6545111111111112, + "grad_norm": 1.709522008895874, + "learning_rate": 6.911313625250055e-05, + "loss": 2.4668, + "step": 29453 + }, + { + "epoch": 0.6545333333333333, + "grad_norm": 1.0732325315475464, + "learning_rate": 6.910869082018227e-05, + "loss": 1.189, + "step": 29454 + }, + { + "epoch": 0.6545555555555556, + "grad_norm": 1.5143368244171143, + "learning_rate": 6.910424538786398e-05, + "loss": 2.1954, + "step": 29455 + }, + { + "epoch": 0.6545777777777778, + "grad_norm": 1.6703866720199585, + "learning_rate": 6.909979995554569e-05, + "loss": 2.5903, + "step": 29456 + }, + { + "epoch": 0.6546, + "grad_norm": 1.806408166885376, + "learning_rate": 6.909535452322739e-05, + "loss": 2.0729, + "step": 29457 + }, + { + "epoch": 0.6546222222222222, + "grad_norm": 1.9158971309661865, + "learning_rate": 6.90909090909091e-05, + "loss": 2.732, + "step": 29458 + }, + { + "epoch": 0.6546444444444445, + "grad_norm": 1.7235842943191528, + "learning_rate": 6.90864636585908e-05, + "loss": 1.8045, + "step": 29459 + }, + { + "epoch": 0.6546666666666666, + "grad_norm": 2.2138214111328125, + "learning_rate": 6.90820182262725e-05, + "loss": 2.4499, + "step": 29460 + }, + { + "epoch": 0.6546888888888889, + "grad_norm": 1.5769482851028442, + "learning_rate": 6.907757279395421e-05, + "loss": 1.8384, + "step": 29461 + }, + { + "epoch": 0.6547111111111111, + "grad_norm": 1.7914637327194214, + "learning_rate": 6.907312736163592e-05, + "loss": 2.0621, + "step": 29462 + }, + { + "epoch": 0.6547333333333333, + "grad_norm": 1.412074327468872, + "learning_rate": 6.906868192931763e-05, + "loss": 1.6855, + "step": 29463 + }, + { + "epoch": 0.6547555555555555, + "grad_norm": 1.8703691959381104, + "learning_rate": 6.906423649699934e-05, + "loss": 2.2053, + "step": 29464 + }, + { + "epoch": 0.6547777777777778, + "grad_norm": 1.5958142280578613, + "learning_rate": 6.905979106468105e-05, + "loss": 2.143, + "step": 29465 + }, + { + "epoch": 0.6548, + "grad_norm": 1.4695812463760376, + "learning_rate": 6.905534563236276e-05, + "loss": 1.7765, + "step": 29466 + }, + { + "epoch": 0.6548222222222222, + "grad_norm": 1.472147822380066, + "learning_rate": 6.905090020004446e-05, + "loss": 1.1977, + "step": 29467 + }, + { + "epoch": 0.6548444444444445, + "grad_norm": 1.6705714464187622, + "learning_rate": 6.904645476772617e-05, + "loss": 2.2475, + "step": 29468 + }, + { + "epoch": 0.6548666666666667, + "grad_norm": 1.4583940505981445, + "learning_rate": 6.904200933540787e-05, + "loss": 1.7501, + "step": 29469 + }, + { + "epoch": 0.6548888888888889, + "grad_norm": 1.5646191835403442, + "learning_rate": 6.903756390308957e-05, + "loss": 1.6958, + "step": 29470 + }, + { + "epoch": 0.6549111111111111, + "grad_norm": 1.6197688579559326, + "learning_rate": 6.903311847077128e-05, + "loss": 2.0638, + "step": 29471 + }, + { + "epoch": 0.6549333333333334, + "grad_norm": 1.7141706943511963, + "learning_rate": 6.902867303845299e-05, + "loss": 1.8158, + "step": 29472 + }, + { + "epoch": 0.6549555555555555, + "grad_norm": 1.6812819242477417, + "learning_rate": 6.90242276061347e-05, + "loss": 2.1096, + "step": 29473 + }, + { + "epoch": 0.6549777777777778, + "grad_norm": 1.5177154541015625, + "learning_rate": 6.901978217381641e-05, + "loss": 1.5467, + "step": 29474 + }, + { + "epoch": 0.655, + "grad_norm": 1.914524793624878, + "learning_rate": 6.901533674149812e-05, + "loss": 2.4122, + "step": 29475 + }, + { + "epoch": 0.6550222222222222, + "grad_norm": 1.8134791851043701, + "learning_rate": 6.901089130917983e-05, + "loss": 1.9971, + "step": 29476 + }, + { + "epoch": 0.6550444444444444, + "grad_norm": 2.0190398693084717, + "learning_rate": 6.900644587686152e-05, + "loss": 2.315, + "step": 29477 + }, + { + "epoch": 0.6550666666666667, + "grad_norm": 1.5984684228897095, + "learning_rate": 6.900200044454323e-05, + "loss": 2.2506, + "step": 29478 + }, + { + "epoch": 0.6550888888888889, + "grad_norm": 1.6222925186157227, + "learning_rate": 6.899755501222494e-05, + "loss": 2.2784, + "step": 29479 + }, + { + "epoch": 0.6551111111111111, + "grad_norm": 1.7684059143066406, + "learning_rate": 6.899310957990664e-05, + "loss": 1.4624, + "step": 29480 + }, + { + "epoch": 0.6551333333333333, + "grad_norm": 1.797141671180725, + "learning_rate": 6.898866414758835e-05, + "loss": 1.712, + "step": 29481 + }, + { + "epoch": 0.6551555555555556, + "grad_norm": 1.7618950605392456, + "learning_rate": 6.898421871527007e-05, + "loss": 1.8547, + "step": 29482 + }, + { + "epoch": 0.6551777777777777, + "grad_norm": 1.7545932531356812, + "learning_rate": 6.897977328295177e-05, + "loss": 2.2939, + "step": 29483 + }, + { + "epoch": 0.6552, + "grad_norm": 2.0590357780456543, + "learning_rate": 6.897532785063348e-05, + "loss": 2.5174, + "step": 29484 + }, + { + "epoch": 0.6552222222222223, + "grad_norm": 1.5144672393798828, + "learning_rate": 6.897088241831519e-05, + "loss": 1.5754, + "step": 29485 + }, + { + "epoch": 0.6552444444444444, + "grad_norm": 0.1713659167289734, + "learning_rate": 6.89664369859969e-05, + "loss": 0.0272, + "step": 29486 + }, + { + "epoch": 0.6552666666666667, + "grad_norm": 1.5620533227920532, + "learning_rate": 6.89619915536786e-05, + "loss": 1.5878, + "step": 29487 + }, + { + "epoch": 0.6552888888888889, + "grad_norm": 1.8646880388259888, + "learning_rate": 6.89575461213603e-05, + "loss": 1.9633, + "step": 29488 + }, + { + "epoch": 0.6553111111111111, + "grad_norm": 1.9776960611343384, + "learning_rate": 6.895310068904201e-05, + "loss": 2.3859, + "step": 29489 + }, + { + "epoch": 0.6553333333333333, + "grad_norm": 2.0050442218780518, + "learning_rate": 6.894865525672371e-05, + "loss": 1.9738, + "step": 29490 + }, + { + "epoch": 0.6553555555555556, + "grad_norm": 1.5091774463653564, + "learning_rate": 6.894420982440543e-05, + "loss": 1.4317, + "step": 29491 + }, + { + "epoch": 0.6553777777777777, + "grad_norm": 1.7000620365142822, + "learning_rate": 6.893976439208714e-05, + "loss": 2.1876, + "step": 29492 + }, + { + "epoch": 0.6554, + "grad_norm": 1.7755290269851685, + "learning_rate": 6.893531895976884e-05, + "loss": 1.7824, + "step": 29493 + }, + { + "epoch": 0.6554222222222222, + "grad_norm": 1.6783421039581299, + "learning_rate": 6.893087352745055e-05, + "loss": 1.6337, + "step": 29494 + }, + { + "epoch": 0.6554444444444445, + "grad_norm": 2.275012254714966, + "learning_rate": 6.892642809513226e-05, + "loss": 1.7613, + "step": 29495 + }, + { + "epoch": 0.6554666666666666, + "grad_norm": 1.8266583681106567, + "learning_rate": 6.892198266281397e-05, + "loss": 2.0478, + "step": 29496 + }, + { + "epoch": 0.6554888888888889, + "grad_norm": 1.857939600944519, + "learning_rate": 6.891753723049566e-05, + "loss": 1.874, + "step": 29497 + }, + { + "epoch": 0.6555111111111112, + "grad_norm": 2.2057530879974365, + "learning_rate": 6.891309179817737e-05, + "loss": 2.0207, + "step": 29498 + }, + { + "epoch": 0.6555333333333333, + "grad_norm": 2.1313059329986572, + "learning_rate": 6.890864636585908e-05, + "loss": 2.0267, + "step": 29499 + }, + { + "epoch": 0.6555555555555556, + "grad_norm": 1.5375425815582275, + "learning_rate": 6.890420093354079e-05, + "loss": 1.0636, + "step": 29500 + }, + { + "epoch": 0.6555777777777778, + "grad_norm": 1.5120632648468018, + "learning_rate": 6.88997555012225e-05, + "loss": 1.5956, + "step": 29501 + }, + { + "epoch": 0.6556, + "grad_norm": 1.6631743907928467, + "learning_rate": 6.889531006890421e-05, + "loss": 2.4788, + "step": 29502 + }, + { + "epoch": 0.6556222222222222, + "grad_norm": 1.5857105255126953, + "learning_rate": 6.889086463658592e-05, + "loss": 2.9431, + "step": 29503 + }, + { + "epoch": 0.6556444444444445, + "grad_norm": 1.843294382095337, + "learning_rate": 6.888641920426762e-05, + "loss": 1.8507, + "step": 29504 + }, + { + "epoch": 0.6556666666666666, + "grad_norm": 2.2526984214782715, + "learning_rate": 6.888197377194933e-05, + "loss": 2.0335, + "step": 29505 + }, + { + "epoch": 0.6556888888888889, + "grad_norm": 1.6078662872314453, + "learning_rate": 6.887752833963104e-05, + "loss": 2.5839, + "step": 29506 + }, + { + "epoch": 0.6557111111111111, + "grad_norm": 1.6662286520004272, + "learning_rate": 6.887308290731273e-05, + "loss": 2.2819, + "step": 29507 + }, + { + "epoch": 0.6557333333333333, + "grad_norm": 1.4887229204177856, + "learning_rate": 6.886863747499444e-05, + "loss": 2.1366, + "step": 29508 + }, + { + "epoch": 0.6557555555555555, + "grad_norm": 1.8119449615478516, + "learning_rate": 6.886419204267615e-05, + "loss": 2.6567, + "step": 29509 + }, + { + "epoch": 0.6557777777777778, + "grad_norm": 1.9445807933807373, + "learning_rate": 6.885974661035786e-05, + "loss": 2.3424, + "step": 29510 + }, + { + "epoch": 0.6558, + "grad_norm": 1.461766004562378, + "learning_rate": 6.885530117803957e-05, + "loss": 1.5104, + "step": 29511 + }, + { + "epoch": 0.6558222222222222, + "grad_norm": 1.448891520500183, + "learning_rate": 6.885085574572128e-05, + "loss": 1.9026, + "step": 29512 + }, + { + "epoch": 0.6558444444444445, + "grad_norm": 1.5717157125473022, + "learning_rate": 6.884641031340299e-05, + "loss": 2.3704, + "step": 29513 + }, + { + "epoch": 0.6558666666666667, + "grad_norm": 1.8957597017288208, + "learning_rate": 6.884196488108469e-05, + "loss": 2.3059, + "step": 29514 + }, + { + "epoch": 0.6558888888888889, + "grad_norm": 1.5091511011123657, + "learning_rate": 6.88375194487664e-05, + "loss": 2.1742, + "step": 29515 + }, + { + "epoch": 0.6559111111111111, + "grad_norm": 1.6314390897750854, + "learning_rate": 6.88330740164481e-05, + "loss": 1.9497, + "step": 29516 + }, + { + "epoch": 0.6559333333333334, + "grad_norm": 1.682511568069458, + "learning_rate": 6.88286285841298e-05, + "loss": 2.5928, + "step": 29517 + }, + { + "epoch": 0.6559555555555555, + "grad_norm": 1.6695607900619507, + "learning_rate": 6.882418315181151e-05, + "loss": 2.4622, + "step": 29518 + }, + { + "epoch": 0.6559777777777778, + "grad_norm": 2.0376710891723633, + "learning_rate": 6.881973771949323e-05, + "loss": 2.1434, + "step": 29519 + }, + { + "epoch": 0.656, + "grad_norm": 1.7357299327850342, + "learning_rate": 6.881529228717493e-05, + "loss": 1.02, + "step": 29520 + }, + { + "epoch": 0.6560222222222222, + "grad_norm": 1.7393115758895874, + "learning_rate": 6.881084685485664e-05, + "loss": 2.2984, + "step": 29521 + }, + { + "epoch": 0.6560444444444444, + "grad_norm": 1.6580684185028076, + "learning_rate": 6.880640142253835e-05, + "loss": 2.1213, + "step": 29522 + }, + { + "epoch": 0.6560666666666667, + "grad_norm": 1.7607481479644775, + "learning_rate": 6.880195599022006e-05, + "loss": 1.9981, + "step": 29523 + }, + { + "epoch": 0.6560888888888889, + "grad_norm": 1.508754014968872, + "learning_rate": 6.879751055790175e-05, + "loss": 1.4985, + "step": 29524 + }, + { + "epoch": 0.6561111111111111, + "grad_norm": 1.6106895208358765, + "learning_rate": 6.879306512558346e-05, + "loss": 2.0425, + "step": 29525 + }, + { + "epoch": 0.6561333333333333, + "grad_norm": 1.9137887954711914, + "learning_rate": 6.878861969326517e-05, + "loss": 1.9829, + "step": 29526 + }, + { + "epoch": 0.6561555555555556, + "grad_norm": 1.5618802309036255, + "learning_rate": 6.878417426094687e-05, + "loss": 1.7582, + "step": 29527 + }, + { + "epoch": 0.6561777777777777, + "grad_norm": 1.4684094190597534, + "learning_rate": 6.877972882862859e-05, + "loss": 1.7052, + "step": 29528 + }, + { + "epoch": 0.6562, + "grad_norm": 1.937695026397705, + "learning_rate": 6.87752833963103e-05, + "loss": 2.0951, + "step": 29529 + }, + { + "epoch": 0.6562222222222223, + "grad_norm": 1.691579818725586, + "learning_rate": 6.8770837963992e-05, + "loss": 1.9227, + "step": 29530 + }, + { + "epoch": 0.6562444444444444, + "grad_norm": 1.5953030586242676, + "learning_rate": 6.876639253167371e-05, + "loss": 1.5954, + "step": 29531 + }, + { + "epoch": 0.6562666666666667, + "grad_norm": 1.897382378578186, + "learning_rate": 6.876194709935542e-05, + "loss": 2.3019, + "step": 29532 + }, + { + "epoch": 0.6562888888888889, + "grad_norm": 1.8537498712539673, + "learning_rate": 6.875750166703713e-05, + "loss": 2.1086, + "step": 29533 + }, + { + "epoch": 0.6563111111111111, + "grad_norm": 1.8957946300506592, + "learning_rate": 6.875305623471882e-05, + "loss": 1.8338, + "step": 29534 + }, + { + "epoch": 0.6563333333333333, + "grad_norm": 1.5951693058013916, + "learning_rate": 6.874861080240053e-05, + "loss": 1.7943, + "step": 29535 + }, + { + "epoch": 0.6563555555555556, + "grad_norm": 1.6607433557510376, + "learning_rate": 6.874416537008224e-05, + "loss": 1.9024, + "step": 29536 + }, + { + "epoch": 0.6563777777777777, + "grad_norm": 1.5129278898239136, + "learning_rate": 6.873971993776395e-05, + "loss": 1.636, + "step": 29537 + }, + { + "epoch": 0.6564, + "grad_norm": 1.4948451519012451, + "learning_rate": 6.873527450544566e-05, + "loss": 1.9594, + "step": 29538 + }, + { + "epoch": 0.6564222222222222, + "grad_norm": 2.0458195209503174, + "learning_rate": 6.873082907312737e-05, + "loss": 2.0937, + "step": 29539 + }, + { + "epoch": 0.6564444444444445, + "grad_norm": 1.569808840751648, + "learning_rate": 6.872638364080907e-05, + "loss": 1.3614, + "step": 29540 + }, + { + "epoch": 0.6564666666666666, + "grad_norm": 1.8471095561981201, + "learning_rate": 6.872193820849078e-05, + "loss": 1.6355, + "step": 29541 + }, + { + "epoch": 0.6564888888888889, + "grad_norm": 1.5188415050506592, + "learning_rate": 6.871749277617249e-05, + "loss": 1.2968, + "step": 29542 + }, + { + "epoch": 0.6565111111111112, + "grad_norm": 1.6712753772735596, + "learning_rate": 6.87130473438542e-05, + "loss": 1.5632, + "step": 29543 + }, + { + "epoch": 0.6565333333333333, + "grad_norm": 1.6549158096313477, + "learning_rate": 6.870860191153589e-05, + "loss": 2.0788, + "step": 29544 + }, + { + "epoch": 0.6565555555555556, + "grad_norm": 1.7481123208999634, + "learning_rate": 6.87041564792176e-05, + "loss": 1.4649, + "step": 29545 + }, + { + "epoch": 0.6565777777777778, + "grad_norm": 1.8728467226028442, + "learning_rate": 6.869971104689931e-05, + "loss": 1.9749, + "step": 29546 + }, + { + "epoch": 0.6566, + "grad_norm": 1.7548919916152954, + "learning_rate": 6.869526561458102e-05, + "loss": 2.0778, + "step": 29547 + }, + { + "epoch": 0.6566222222222222, + "grad_norm": 1.713470220565796, + "learning_rate": 6.869082018226273e-05, + "loss": 1.6491, + "step": 29548 + }, + { + "epoch": 0.6566444444444445, + "grad_norm": 1.8220151662826538, + "learning_rate": 6.868637474994444e-05, + "loss": 2.0425, + "step": 29549 + }, + { + "epoch": 0.6566666666666666, + "grad_norm": 1.7327014207839966, + "learning_rate": 6.868192931762614e-05, + "loss": 1.7185, + "step": 29550 + }, + { + "epoch": 0.6566888888888889, + "grad_norm": 0.9733756184577942, + "learning_rate": 6.867748388530785e-05, + "loss": 1.1545, + "step": 29551 + }, + { + "epoch": 0.6567111111111111, + "grad_norm": 1.092980146408081, + "learning_rate": 6.867303845298956e-05, + "loss": 1.2458, + "step": 29552 + }, + { + "epoch": 0.6567333333333333, + "grad_norm": 1.8089306354522705, + "learning_rate": 6.866859302067127e-05, + "loss": 1.9041, + "step": 29553 + }, + { + "epoch": 0.6567555555555555, + "grad_norm": 1.3253194093704224, + "learning_rate": 6.866414758835296e-05, + "loss": 1.9735, + "step": 29554 + }, + { + "epoch": 0.6567777777777778, + "grad_norm": 2.6289620399475098, + "learning_rate": 6.865970215603467e-05, + "loss": 1.8887, + "step": 29555 + }, + { + "epoch": 0.6568, + "grad_norm": 1.636061191558838, + "learning_rate": 6.86552567237164e-05, + "loss": 2.3801, + "step": 29556 + }, + { + "epoch": 0.6568222222222222, + "grad_norm": 1.466566801071167, + "learning_rate": 6.865081129139809e-05, + "loss": 1.0834, + "step": 29557 + }, + { + "epoch": 0.6568444444444445, + "grad_norm": 1.4488520622253418, + "learning_rate": 6.86463658590798e-05, + "loss": 1.9759, + "step": 29558 + }, + { + "epoch": 0.6568666666666667, + "grad_norm": 1.8230533599853516, + "learning_rate": 6.864192042676151e-05, + "loss": 2.4713, + "step": 29559 + }, + { + "epoch": 0.6568888888888889, + "grad_norm": 1.695978045463562, + "learning_rate": 6.863747499444322e-05, + "loss": 2.4114, + "step": 29560 + }, + { + "epoch": 0.6569111111111111, + "grad_norm": 1.4497877359390259, + "learning_rate": 6.863302956212492e-05, + "loss": 2.0624, + "step": 29561 + }, + { + "epoch": 0.6569333333333334, + "grad_norm": 1.780235767364502, + "learning_rate": 6.862858412980663e-05, + "loss": 1.9743, + "step": 29562 + }, + { + "epoch": 0.6569555555555555, + "grad_norm": 1.5658392906188965, + "learning_rate": 6.862413869748833e-05, + "loss": 1.8545, + "step": 29563 + }, + { + "epoch": 0.6569777777777778, + "grad_norm": 1.272002100944519, + "learning_rate": 6.861969326517003e-05, + "loss": 1.5825, + "step": 29564 + }, + { + "epoch": 0.657, + "grad_norm": 1.6310688257217407, + "learning_rate": 6.861524783285175e-05, + "loss": 2.2191, + "step": 29565 + }, + { + "epoch": 0.6570222222222222, + "grad_norm": 1.7843564748764038, + "learning_rate": 6.861080240053346e-05, + "loss": 2.2956, + "step": 29566 + }, + { + "epoch": 0.6570444444444444, + "grad_norm": 1.9026020765304565, + "learning_rate": 6.860635696821516e-05, + "loss": 2.1435, + "step": 29567 + }, + { + "epoch": 0.6570666666666667, + "grad_norm": 1.5643749237060547, + "learning_rate": 6.860191153589687e-05, + "loss": 2.0786, + "step": 29568 + }, + { + "epoch": 0.6570888888888888, + "grad_norm": 1.9233726263046265, + "learning_rate": 6.859746610357858e-05, + "loss": 1.6316, + "step": 29569 + }, + { + "epoch": 0.6571111111111111, + "grad_norm": 1.754144549369812, + "learning_rate": 6.859302067126029e-05, + "loss": 2.0501, + "step": 29570 + }, + { + "epoch": 0.6571333333333333, + "grad_norm": 1.6095532178878784, + "learning_rate": 6.858857523894198e-05, + "loss": 1.973, + "step": 29571 + }, + { + "epoch": 0.6571555555555556, + "grad_norm": 1.6992303133010864, + "learning_rate": 6.85841298066237e-05, + "loss": 1.8782, + "step": 29572 + }, + { + "epoch": 0.6571777777777777, + "grad_norm": 2.066359043121338, + "learning_rate": 6.85796843743054e-05, + "loss": 2.3603, + "step": 29573 + }, + { + "epoch": 0.6572, + "grad_norm": 1.9374815225601196, + "learning_rate": 6.857523894198711e-05, + "loss": 1.8272, + "step": 29574 + }, + { + "epoch": 0.6572222222222223, + "grad_norm": 1.6850558519363403, + "learning_rate": 6.857079350966882e-05, + "loss": 2.108, + "step": 29575 + }, + { + "epoch": 0.6572444444444444, + "grad_norm": 1.5528672933578491, + "learning_rate": 6.856634807735053e-05, + "loss": 1.6883, + "step": 29576 + }, + { + "epoch": 0.6572666666666667, + "grad_norm": 1.6508656740188599, + "learning_rate": 6.856190264503223e-05, + "loss": 1.9709, + "step": 29577 + }, + { + "epoch": 0.6572888888888889, + "grad_norm": 1.7889775037765503, + "learning_rate": 6.855745721271394e-05, + "loss": 1.8791, + "step": 29578 + }, + { + "epoch": 0.6573111111111111, + "grad_norm": 2.086968421936035, + "learning_rate": 6.855301178039565e-05, + "loss": 2.4607, + "step": 29579 + }, + { + "epoch": 0.6573333333333333, + "grad_norm": 1.2391284704208374, + "learning_rate": 6.854856634807736e-05, + "loss": 0.9328, + "step": 29580 + }, + { + "epoch": 0.6573555555555556, + "grad_norm": 1.818240761756897, + "learning_rate": 6.854412091575905e-05, + "loss": 1.5874, + "step": 29581 + }, + { + "epoch": 0.6573777777777777, + "grad_norm": 1.5828113555908203, + "learning_rate": 6.853967548344076e-05, + "loss": 1.6431, + "step": 29582 + }, + { + "epoch": 0.6574, + "grad_norm": 1.8962316513061523, + "learning_rate": 6.853523005112247e-05, + "loss": 2.2709, + "step": 29583 + }, + { + "epoch": 0.6574222222222222, + "grad_norm": 1.5729304552078247, + "learning_rate": 6.853078461880418e-05, + "loss": 1.5839, + "step": 29584 + }, + { + "epoch": 0.6574444444444445, + "grad_norm": 2.0226142406463623, + "learning_rate": 6.852633918648589e-05, + "loss": 2.0012, + "step": 29585 + }, + { + "epoch": 0.6574666666666666, + "grad_norm": 1.720937728881836, + "learning_rate": 6.85218937541676e-05, + "loss": 1.717, + "step": 29586 + }, + { + "epoch": 0.6574888888888889, + "grad_norm": 1.8267759084701538, + "learning_rate": 6.85174483218493e-05, + "loss": 1.6517, + "step": 29587 + }, + { + "epoch": 0.6575111111111112, + "grad_norm": 1.7748316526412964, + "learning_rate": 6.851300288953101e-05, + "loss": 1.7497, + "step": 29588 + }, + { + "epoch": 0.6575333333333333, + "grad_norm": 1.5970873832702637, + "learning_rate": 6.850855745721272e-05, + "loss": 1.9194, + "step": 29589 + }, + { + "epoch": 0.6575555555555556, + "grad_norm": 1.6664336919784546, + "learning_rate": 6.850411202489443e-05, + "loss": 1.8788, + "step": 29590 + }, + { + "epoch": 0.6575777777777778, + "grad_norm": 1.6667951345443726, + "learning_rate": 6.849966659257612e-05, + "loss": 1.8426, + "step": 29591 + }, + { + "epoch": 0.6576, + "grad_norm": 1.5941898822784424, + "learning_rate": 6.849522116025783e-05, + "loss": 1.5277, + "step": 29592 + }, + { + "epoch": 0.6576222222222222, + "grad_norm": 1.630222201347351, + "learning_rate": 6.849077572793956e-05, + "loss": 1.6931, + "step": 29593 + }, + { + "epoch": 0.6576444444444445, + "grad_norm": 1.4186203479766846, + "learning_rate": 6.848633029562125e-05, + "loss": 1.4202, + "step": 29594 + }, + { + "epoch": 0.6576666666666666, + "grad_norm": 1.4970699548721313, + "learning_rate": 6.848188486330296e-05, + "loss": 1.3966, + "step": 29595 + }, + { + "epoch": 0.6576888888888889, + "grad_norm": 1.7796052694320679, + "learning_rate": 6.847743943098467e-05, + "loss": 1.799, + "step": 29596 + }, + { + "epoch": 0.6577111111111111, + "grad_norm": 1.919205904006958, + "learning_rate": 6.847299399866637e-05, + "loss": 1.7903, + "step": 29597 + }, + { + "epoch": 0.6577333333333333, + "grad_norm": 1.979422926902771, + "learning_rate": 6.846854856634808e-05, + "loss": 1.9165, + "step": 29598 + }, + { + "epoch": 0.6577555555555555, + "grad_norm": 1.5138273239135742, + "learning_rate": 6.846410313402979e-05, + "loss": 1.5262, + "step": 29599 + }, + { + "epoch": 0.6577777777777778, + "grad_norm": 1.6930948495864868, + "learning_rate": 6.84596577017115e-05, + "loss": 1.0738, + "step": 29600 + }, + { + "epoch": 0.6578, + "grad_norm": 1.5009393692016602, + "learning_rate": 6.845521226939319e-05, + "loss": 1.9558, + "step": 29601 + }, + { + "epoch": 0.6578222222222222, + "grad_norm": 1.225827693939209, + "learning_rate": 6.845076683707492e-05, + "loss": 1.5175, + "step": 29602 + }, + { + "epoch": 0.6578444444444445, + "grad_norm": 1.4594206809997559, + "learning_rate": 6.844632140475662e-05, + "loss": 2.0969, + "step": 29603 + }, + { + "epoch": 0.6578666666666667, + "grad_norm": 1.7906701564788818, + "learning_rate": 6.844187597243832e-05, + "loss": 2.1246, + "step": 29604 + }, + { + "epoch": 0.6578888888888889, + "grad_norm": 1.4477447271347046, + "learning_rate": 6.843743054012003e-05, + "loss": 1.8149, + "step": 29605 + }, + { + "epoch": 0.6579111111111111, + "grad_norm": 1.7752348184585571, + "learning_rate": 6.843298510780174e-05, + "loss": 2.6142, + "step": 29606 + }, + { + "epoch": 0.6579333333333334, + "grad_norm": 1.4473702907562256, + "learning_rate": 6.842853967548345e-05, + "loss": 1.5544, + "step": 29607 + }, + { + "epoch": 0.6579555555555555, + "grad_norm": 1.456236720085144, + "learning_rate": 6.842409424316515e-05, + "loss": 2.0661, + "step": 29608 + }, + { + "epoch": 0.6579777777777778, + "grad_norm": 1.6258362531661987, + "learning_rate": 6.841964881084686e-05, + "loss": 1.7835, + "step": 29609 + }, + { + "epoch": 0.658, + "grad_norm": 1.791027307510376, + "learning_rate": 6.841520337852857e-05, + "loss": 2.0022, + "step": 29610 + }, + { + "epoch": 0.6580222222222222, + "grad_norm": 2.289262056350708, + "learning_rate": 6.841075794621027e-05, + "loss": 1.7229, + "step": 29611 + }, + { + "epoch": 0.6580444444444444, + "grad_norm": 1.7048383951187134, + "learning_rate": 6.840631251389198e-05, + "loss": 2.1676, + "step": 29612 + }, + { + "epoch": 0.6580666666666667, + "grad_norm": 1.8120087385177612, + "learning_rate": 6.84018670815737e-05, + "loss": 2.9178, + "step": 29613 + }, + { + "epoch": 0.6580888888888888, + "grad_norm": 1.4685407876968384, + "learning_rate": 6.839742164925539e-05, + "loss": 1.1795, + "step": 29614 + }, + { + "epoch": 0.6581111111111111, + "grad_norm": 1.9184573888778687, + "learning_rate": 6.83929762169371e-05, + "loss": 2.4737, + "step": 29615 + }, + { + "epoch": 0.6581333333333333, + "grad_norm": 1.8086332082748413, + "learning_rate": 6.838853078461881e-05, + "loss": 1.844, + "step": 29616 + }, + { + "epoch": 0.6581555555555556, + "grad_norm": 1.885086178779602, + "learning_rate": 6.838408535230052e-05, + "loss": 1.7616, + "step": 29617 + }, + { + "epoch": 0.6581777777777778, + "grad_norm": 1.7152103185653687, + "learning_rate": 6.837963991998221e-05, + "loss": 2.3195, + "step": 29618 + }, + { + "epoch": 0.6582, + "grad_norm": 2.053940773010254, + "learning_rate": 6.837519448766392e-05, + "loss": 2.235, + "step": 29619 + }, + { + "epoch": 0.6582222222222223, + "grad_norm": 1.9013762474060059, + "learning_rate": 6.837074905534563e-05, + "loss": 2.3344, + "step": 29620 + }, + { + "epoch": 0.6582444444444444, + "grad_norm": 1.1086158752441406, + "learning_rate": 6.836630362302734e-05, + "loss": 0.9228, + "step": 29621 + }, + { + "epoch": 0.6582666666666667, + "grad_norm": 1.6535439491271973, + "learning_rate": 6.836185819070905e-05, + "loss": 1.9664, + "step": 29622 + }, + { + "epoch": 0.6582888888888889, + "grad_norm": 1.2858459949493408, + "learning_rate": 6.835741275839076e-05, + "loss": 0.9314, + "step": 29623 + }, + { + "epoch": 0.6583111111111111, + "grad_norm": 1.4932442903518677, + "learning_rate": 6.835296732607246e-05, + "loss": 1.8218, + "step": 29624 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 1.3918441534042358, + "learning_rate": 6.834852189375417e-05, + "loss": 1.7047, + "step": 29625 + }, + { + "epoch": 0.6583555555555556, + "grad_norm": 2.063030242919922, + "learning_rate": 6.834407646143588e-05, + "loss": 2.3652, + "step": 29626 + }, + { + "epoch": 0.6583777777777777, + "grad_norm": 2.071507453918457, + "learning_rate": 6.833963102911759e-05, + "loss": 1.9228, + "step": 29627 + }, + { + "epoch": 0.6584, + "grad_norm": 1.5261411666870117, + "learning_rate": 6.833518559679928e-05, + "loss": 1.667, + "step": 29628 + }, + { + "epoch": 0.6584222222222222, + "grad_norm": 1.715639591217041, + "learning_rate": 6.8330740164481e-05, + "loss": 1.8281, + "step": 29629 + }, + { + "epoch": 0.6584444444444445, + "grad_norm": 1.7601677179336548, + "learning_rate": 6.832629473216272e-05, + "loss": 2.1907, + "step": 29630 + }, + { + "epoch": 0.6584666666666666, + "grad_norm": 1.5039981603622437, + "learning_rate": 6.832184929984441e-05, + "loss": 1.7468, + "step": 29631 + }, + { + "epoch": 0.6584888888888889, + "grad_norm": 1.8185672760009766, + "learning_rate": 6.831740386752612e-05, + "loss": 1.5048, + "step": 29632 + }, + { + "epoch": 0.6585111111111112, + "grad_norm": 1.8447353839874268, + "learning_rate": 6.831295843520783e-05, + "loss": 1.9108, + "step": 29633 + }, + { + "epoch": 0.6585333333333333, + "grad_norm": 2.0437545776367188, + "learning_rate": 6.830851300288953e-05, + "loss": 2.3156, + "step": 29634 + }, + { + "epoch": 0.6585555555555556, + "grad_norm": 1.719464659690857, + "learning_rate": 6.830406757057124e-05, + "loss": 1.751, + "step": 29635 + }, + { + "epoch": 0.6585777777777778, + "grad_norm": 1.796617865562439, + "learning_rate": 6.829962213825295e-05, + "loss": 1.6782, + "step": 29636 + }, + { + "epoch": 0.6586, + "grad_norm": 1.913540244102478, + "learning_rate": 6.829517670593466e-05, + "loss": 2.1384, + "step": 29637 + }, + { + "epoch": 0.6586222222222222, + "grad_norm": 1.398923397064209, + "learning_rate": 6.829073127361635e-05, + "loss": 1.5806, + "step": 29638 + }, + { + "epoch": 0.6586444444444445, + "grad_norm": 1.6090314388275146, + "learning_rate": 6.828628584129808e-05, + "loss": 1.4752, + "step": 29639 + }, + { + "epoch": 0.6586666666666666, + "grad_norm": 2.2967145442962646, + "learning_rate": 6.828184040897979e-05, + "loss": 2.758, + "step": 29640 + }, + { + "epoch": 0.6586888888888889, + "grad_norm": 1.6961264610290527, + "learning_rate": 6.827739497666148e-05, + "loss": 1.7788, + "step": 29641 + }, + { + "epoch": 0.6587111111111111, + "grad_norm": 1.4948949813842773, + "learning_rate": 6.827294954434319e-05, + "loss": 1.3806, + "step": 29642 + }, + { + "epoch": 0.6587333333333333, + "grad_norm": 1.6879496574401855, + "learning_rate": 6.82685041120249e-05, + "loss": 2.105, + "step": 29643 + }, + { + "epoch": 0.6587555555555555, + "grad_norm": 1.7877131700515747, + "learning_rate": 6.82640586797066e-05, + "loss": 1.9023, + "step": 29644 + }, + { + "epoch": 0.6587777777777778, + "grad_norm": 1.640920639038086, + "learning_rate": 6.825961324738831e-05, + "loss": 1.5046, + "step": 29645 + }, + { + "epoch": 0.6588, + "grad_norm": 1.6340906620025635, + "learning_rate": 6.825516781507002e-05, + "loss": 1.6889, + "step": 29646 + }, + { + "epoch": 0.6588222222222222, + "grad_norm": 1.9517959356307983, + "learning_rate": 6.825072238275173e-05, + "loss": 1.8836, + "step": 29647 + }, + { + "epoch": 0.6588444444444445, + "grad_norm": 2.099510669708252, + "learning_rate": 6.824627695043344e-05, + "loss": 1.684, + "step": 29648 + }, + { + "epoch": 0.6588666666666667, + "grad_norm": 1.5277636051177979, + "learning_rate": 6.824183151811515e-05, + "loss": 1.368, + "step": 29649 + }, + { + "epoch": 0.6588888888888889, + "grad_norm": 2.4442501068115234, + "learning_rate": 6.823738608579685e-05, + "loss": 2.1152, + "step": 29650 + }, + { + "epoch": 0.6589111111111111, + "grad_norm": 0.9391299486160278, + "learning_rate": 6.823294065347855e-05, + "loss": 1.2782, + "step": 29651 + }, + { + "epoch": 0.6589333333333334, + "grad_norm": 1.7166022062301636, + "learning_rate": 6.822849522116026e-05, + "loss": 2.52, + "step": 29652 + }, + { + "epoch": 0.6589555555555555, + "grad_norm": 0.9608427286148071, + "learning_rate": 6.822404978884197e-05, + "loss": 0.9397, + "step": 29653 + }, + { + "epoch": 0.6589777777777778, + "grad_norm": 0.9675276279449463, + "learning_rate": 6.821960435652367e-05, + "loss": 0.9316, + "step": 29654 + }, + { + "epoch": 0.659, + "grad_norm": 1.6645857095718384, + "learning_rate": 6.821515892420538e-05, + "loss": 2.3069, + "step": 29655 + }, + { + "epoch": 0.6590222222222222, + "grad_norm": 1.3648736476898193, + "learning_rate": 6.821071349188709e-05, + "loss": 1.6865, + "step": 29656 + }, + { + "epoch": 0.6590444444444444, + "grad_norm": 1.2589092254638672, + "learning_rate": 6.82062680595688e-05, + "loss": 2.0334, + "step": 29657 + }, + { + "epoch": 0.6590666666666667, + "grad_norm": 1.7342948913574219, + "learning_rate": 6.82018226272505e-05, + "loss": 2.4378, + "step": 29658 + }, + { + "epoch": 0.6590888888888888, + "grad_norm": 1.6458474397659302, + "learning_rate": 6.819737719493221e-05, + "loss": 1.6645, + "step": 29659 + }, + { + "epoch": 0.6591111111111111, + "grad_norm": 1.6723769903182983, + "learning_rate": 6.819293176261392e-05, + "loss": 1.7515, + "step": 29660 + }, + { + "epoch": 0.6591333333333333, + "grad_norm": 1.7363861799240112, + "learning_rate": 6.818848633029562e-05, + "loss": 2.6189, + "step": 29661 + }, + { + "epoch": 0.6591555555555556, + "grad_norm": 1.4819042682647705, + "learning_rate": 6.818404089797733e-05, + "loss": 1.5243, + "step": 29662 + }, + { + "epoch": 0.6591777777777778, + "grad_norm": 1.7086176872253418, + "learning_rate": 6.817959546565904e-05, + "loss": 2.3058, + "step": 29663 + }, + { + "epoch": 0.6592, + "grad_norm": 1.4702590703964233, + "learning_rate": 6.817515003334075e-05, + "loss": 1.3932, + "step": 29664 + }, + { + "epoch": 0.6592222222222223, + "grad_norm": 1.6508628129959106, + "learning_rate": 6.817070460102244e-05, + "loss": 2.3402, + "step": 29665 + }, + { + "epoch": 0.6592444444444444, + "grad_norm": 1.8322089910507202, + "learning_rate": 6.816625916870415e-05, + "loss": 2.0941, + "step": 29666 + }, + { + "epoch": 0.6592666666666667, + "grad_norm": 1.5012660026550293, + "learning_rate": 6.816181373638588e-05, + "loss": 1.861, + "step": 29667 + }, + { + "epoch": 0.6592888888888889, + "grad_norm": 1.5552964210510254, + "learning_rate": 6.815736830406757e-05, + "loss": 1.7842, + "step": 29668 + }, + { + "epoch": 0.6593111111111111, + "grad_norm": 1.3963440656661987, + "learning_rate": 6.815292287174928e-05, + "loss": 1.2243, + "step": 29669 + }, + { + "epoch": 0.6593333333333333, + "grad_norm": 1.7548903226852417, + "learning_rate": 6.814847743943099e-05, + "loss": 1.9696, + "step": 29670 + }, + { + "epoch": 0.6593555555555556, + "grad_norm": 1.929069995880127, + "learning_rate": 6.814403200711269e-05, + "loss": 2.0156, + "step": 29671 + }, + { + "epoch": 0.6593777777777777, + "grad_norm": 1.6079157590866089, + "learning_rate": 6.81395865747944e-05, + "loss": 2.0887, + "step": 29672 + }, + { + "epoch": 0.6594, + "grad_norm": 2.9583001136779785, + "learning_rate": 6.813514114247611e-05, + "loss": 1.8755, + "step": 29673 + }, + { + "epoch": 0.6594222222222222, + "grad_norm": 1.3937418460845947, + "learning_rate": 6.813069571015782e-05, + "loss": 1.1554, + "step": 29674 + }, + { + "epoch": 0.6594444444444445, + "grad_norm": 1.1485793590545654, + "learning_rate": 6.812625027783951e-05, + "loss": 1.0502, + "step": 29675 + }, + { + "epoch": 0.6594666666666666, + "grad_norm": 1.6714928150177002, + "learning_rate": 6.812180484552124e-05, + "loss": 1.9522, + "step": 29676 + }, + { + "epoch": 0.6594888888888889, + "grad_norm": 1.5369539260864258, + "learning_rate": 6.811735941320295e-05, + "loss": 1.8008, + "step": 29677 + }, + { + "epoch": 0.6595111111111112, + "grad_norm": 1.638718843460083, + "learning_rate": 6.811291398088464e-05, + "loss": 1.6649, + "step": 29678 + }, + { + "epoch": 0.6595333333333333, + "grad_norm": 2.0578885078430176, + "learning_rate": 6.810846854856635e-05, + "loss": 2.3217, + "step": 29679 + }, + { + "epoch": 0.6595555555555556, + "grad_norm": 1.6226433515548706, + "learning_rate": 6.810402311624806e-05, + "loss": 1.8039, + "step": 29680 + }, + { + "epoch": 0.6595777777777778, + "grad_norm": 1.656620740890503, + "learning_rate": 6.809957768392976e-05, + "loss": 1.5798, + "step": 29681 + }, + { + "epoch": 0.6596, + "grad_norm": 1.6753404140472412, + "learning_rate": 6.809513225161147e-05, + "loss": 1.861, + "step": 29682 + }, + { + "epoch": 0.6596222222222222, + "grad_norm": 1.7894854545593262, + "learning_rate": 6.809068681929318e-05, + "loss": 1.6227, + "step": 29683 + }, + { + "epoch": 0.6596444444444445, + "grad_norm": 1.937540888786316, + "learning_rate": 6.808624138697489e-05, + "loss": 2.182, + "step": 29684 + }, + { + "epoch": 0.6596666666666666, + "grad_norm": 1.7234638929367065, + "learning_rate": 6.80817959546566e-05, + "loss": 2.4864, + "step": 29685 + }, + { + "epoch": 0.6596888888888889, + "grad_norm": 1.9724160432815552, + "learning_rate": 6.80773505223383e-05, + "loss": 2.3914, + "step": 29686 + }, + { + "epoch": 0.6597111111111111, + "grad_norm": 1.872678518295288, + "learning_rate": 6.807290509002002e-05, + "loss": 1.9283, + "step": 29687 + }, + { + "epoch": 0.6597333333333333, + "grad_norm": 1.7543476819992065, + "learning_rate": 6.806845965770171e-05, + "loss": 1.6584, + "step": 29688 + }, + { + "epoch": 0.6597555555555555, + "grad_norm": 1.8167245388031006, + "learning_rate": 6.806401422538342e-05, + "loss": 1.7004, + "step": 29689 + }, + { + "epoch": 0.6597777777777778, + "grad_norm": 1.886895775794983, + "learning_rate": 6.805956879306513e-05, + "loss": 1.8245, + "step": 29690 + }, + { + "epoch": 0.6598, + "grad_norm": 1.7559186220169067, + "learning_rate": 6.805512336074683e-05, + "loss": 1.9585, + "step": 29691 + }, + { + "epoch": 0.6598222222222222, + "grad_norm": 1.783811330795288, + "learning_rate": 6.805067792842854e-05, + "loss": 1.9343, + "step": 29692 + }, + { + "epoch": 0.6598444444444445, + "grad_norm": 1.630228877067566, + "learning_rate": 6.804623249611025e-05, + "loss": 1.4084, + "step": 29693 + }, + { + "epoch": 0.6598666666666667, + "grad_norm": 1.8122942447662354, + "learning_rate": 6.804178706379196e-05, + "loss": 1.8979, + "step": 29694 + }, + { + "epoch": 0.6598888888888889, + "grad_norm": 2.2165892124176025, + "learning_rate": 6.803734163147367e-05, + "loss": 2.2585, + "step": 29695 + }, + { + "epoch": 0.6599111111111111, + "grad_norm": 1.2415522336959839, + "learning_rate": 6.803289619915538e-05, + "loss": 1.1165, + "step": 29696 + }, + { + "epoch": 0.6599333333333334, + "grad_norm": 1.784896969795227, + "learning_rate": 6.802845076683709e-05, + "loss": 1.7723, + "step": 29697 + }, + { + "epoch": 0.6599555555555555, + "grad_norm": 2.093536615371704, + "learning_rate": 6.802400533451878e-05, + "loss": 1.4105, + "step": 29698 + }, + { + "epoch": 0.6599777777777778, + "grad_norm": 1.7735121250152588, + "learning_rate": 6.801955990220049e-05, + "loss": 1.7523, + "step": 29699 + }, + { + "epoch": 0.66, + "grad_norm": 1.0457617044448853, + "learning_rate": 6.80151144698822e-05, + "loss": 0.6506, + "step": 29700 + }, + { + "epoch": 0.6600222222222222, + "grad_norm": 1.3916157484054565, + "learning_rate": 6.80106690375639e-05, + "loss": 2.4461, + "step": 29701 + }, + { + "epoch": 0.6600444444444444, + "grad_norm": 1.639994502067566, + "learning_rate": 6.80062236052456e-05, + "loss": 2.5601, + "step": 29702 + }, + { + "epoch": 0.6600666666666667, + "grad_norm": 1.5185401439666748, + "learning_rate": 6.800177817292732e-05, + "loss": 2.6104, + "step": 29703 + }, + { + "epoch": 0.6600888888888888, + "grad_norm": 1.7068262100219727, + "learning_rate": 6.799733274060904e-05, + "loss": 1.9843, + "step": 29704 + }, + { + "epoch": 0.6601111111111111, + "grad_norm": 1.891868233680725, + "learning_rate": 6.799288730829073e-05, + "loss": 3.0391, + "step": 29705 + }, + { + "epoch": 0.6601333333333333, + "grad_norm": 1.5163244009017944, + "learning_rate": 6.798844187597244e-05, + "loss": 2.0175, + "step": 29706 + }, + { + "epoch": 0.6601555555555556, + "grad_norm": 1.5959887504577637, + "learning_rate": 6.798399644365415e-05, + "loss": 2.1754, + "step": 29707 + }, + { + "epoch": 0.6601777777777778, + "grad_norm": 1.3765621185302734, + "learning_rate": 6.797955101133585e-05, + "loss": 2.1678, + "step": 29708 + }, + { + "epoch": 0.6602, + "grad_norm": 1.5355892181396484, + "learning_rate": 6.797510557901756e-05, + "loss": 1.8159, + "step": 29709 + }, + { + "epoch": 0.6602222222222223, + "grad_norm": 1.7196125984191895, + "learning_rate": 6.797066014669927e-05, + "loss": 1.6837, + "step": 29710 + }, + { + "epoch": 0.6602444444444444, + "grad_norm": 1.511362910270691, + "learning_rate": 6.796621471438097e-05, + "loss": 2.4021, + "step": 29711 + }, + { + "epoch": 0.6602666666666667, + "grad_norm": 1.6308833360671997, + "learning_rate": 6.796176928206268e-05, + "loss": 1.6508, + "step": 29712 + }, + { + "epoch": 0.6602888888888889, + "grad_norm": 1.5413122177124023, + "learning_rate": 6.79573238497444e-05, + "loss": 2.0749, + "step": 29713 + }, + { + "epoch": 0.6603111111111111, + "grad_norm": 1.774201512336731, + "learning_rate": 6.795287841742611e-05, + "loss": 1.9161, + "step": 29714 + }, + { + "epoch": 0.6603333333333333, + "grad_norm": 1.5780080556869507, + "learning_rate": 6.79484329851078e-05, + "loss": 2.2094, + "step": 29715 + }, + { + "epoch": 0.6603555555555556, + "grad_norm": 1.2944010496139526, + "learning_rate": 6.794398755278951e-05, + "loss": 1.785, + "step": 29716 + }, + { + "epoch": 0.6603777777777777, + "grad_norm": 1.6052595376968384, + "learning_rate": 6.793954212047122e-05, + "loss": 2.0328, + "step": 29717 + }, + { + "epoch": 0.6604, + "grad_norm": 1.6314421892166138, + "learning_rate": 6.793509668815292e-05, + "loss": 1.9605, + "step": 29718 + }, + { + "epoch": 0.6604222222222222, + "grad_norm": 1.7301579713821411, + "learning_rate": 6.793065125583463e-05, + "loss": 1.9727, + "step": 29719 + }, + { + "epoch": 0.6604444444444444, + "grad_norm": 1.8894845247268677, + "learning_rate": 6.792620582351634e-05, + "loss": 2.1291, + "step": 29720 + }, + { + "epoch": 0.6604666666666666, + "grad_norm": 1.5691381692886353, + "learning_rate": 6.792176039119805e-05, + "loss": 1.5028, + "step": 29721 + }, + { + "epoch": 0.6604888888888889, + "grad_norm": 1.8337043523788452, + "learning_rate": 6.791731495887976e-05, + "loss": 1.5781, + "step": 29722 + }, + { + "epoch": 0.6605111111111112, + "grad_norm": 1.8304129838943481, + "learning_rate": 6.791286952656147e-05, + "loss": 2.2604, + "step": 29723 + }, + { + "epoch": 0.6605333333333333, + "grad_norm": 1.7375537157058716, + "learning_rate": 6.790842409424318e-05, + "loss": 2.2333, + "step": 29724 + }, + { + "epoch": 0.6605555555555556, + "grad_norm": 1.5077272653579712, + "learning_rate": 6.790397866192487e-05, + "loss": 1.47, + "step": 29725 + }, + { + "epoch": 0.6605777777777778, + "grad_norm": 1.7780345678329468, + "learning_rate": 6.789953322960658e-05, + "loss": 1.7045, + "step": 29726 + }, + { + "epoch": 0.6606, + "grad_norm": 1.7432992458343506, + "learning_rate": 6.789508779728829e-05, + "loss": 1.9861, + "step": 29727 + }, + { + "epoch": 0.6606222222222222, + "grad_norm": 1.5569027662277222, + "learning_rate": 6.789064236496999e-05, + "loss": 1.6343, + "step": 29728 + }, + { + "epoch": 0.6606444444444445, + "grad_norm": 1.9147087335586548, + "learning_rate": 6.78861969326517e-05, + "loss": 1.8007, + "step": 29729 + }, + { + "epoch": 0.6606666666666666, + "grad_norm": 1.8529517650604248, + "learning_rate": 6.788175150033341e-05, + "loss": 2.0208, + "step": 29730 + }, + { + "epoch": 0.6606888888888889, + "grad_norm": 1.5898747444152832, + "learning_rate": 6.787730606801512e-05, + "loss": 1.9657, + "step": 29731 + }, + { + "epoch": 0.6607111111111111, + "grad_norm": 2.055760622024536, + "learning_rate": 6.787286063569683e-05, + "loss": 2.0554, + "step": 29732 + }, + { + "epoch": 0.6607333333333333, + "grad_norm": 2.072956085205078, + "learning_rate": 6.786841520337854e-05, + "loss": 2.523, + "step": 29733 + }, + { + "epoch": 0.6607555555555555, + "grad_norm": 1.6523401737213135, + "learning_rate": 6.786396977106025e-05, + "loss": 1.6765, + "step": 29734 + }, + { + "epoch": 0.6607777777777778, + "grad_norm": 1.8183640241622925, + "learning_rate": 6.785952433874194e-05, + "loss": 2.185, + "step": 29735 + }, + { + "epoch": 0.6608, + "grad_norm": 1.4948359727859497, + "learning_rate": 6.785507890642365e-05, + "loss": 1.467, + "step": 29736 + }, + { + "epoch": 0.6608222222222222, + "grad_norm": 1.782305121421814, + "learning_rate": 6.785063347410536e-05, + "loss": 2.0008, + "step": 29737 + }, + { + "epoch": 0.6608444444444445, + "grad_norm": 1.9820513725280762, + "learning_rate": 6.784618804178706e-05, + "loss": 1.8032, + "step": 29738 + }, + { + "epoch": 0.6608666666666667, + "grad_norm": 1.8400990962982178, + "learning_rate": 6.784174260946877e-05, + "loss": 2.0825, + "step": 29739 + }, + { + "epoch": 0.6608888888888889, + "grad_norm": 1.880311131477356, + "learning_rate": 6.783729717715048e-05, + "loss": 2.0094, + "step": 29740 + }, + { + "epoch": 0.6609111111111111, + "grad_norm": 1.5467296838760376, + "learning_rate": 6.78328517448322e-05, + "loss": 1.53, + "step": 29741 + }, + { + "epoch": 0.6609333333333334, + "grad_norm": 1.9151309728622437, + "learning_rate": 6.78284063125139e-05, + "loss": 2.0012, + "step": 29742 + }, + { + "epoch": 0.6609555555555555, + "grad_norm": 1.48209547996521, + "learning_rate": 6.78239608801956e-05, + "loss": 1.5927, + "step": 29743 + }, + { + "epoch": 0.6609777777777778, + "grad_norm": 2.134293556213379, + "learning_rate": 6.781951544787732e-05, + "loss": 1.918, + "step": 29744 + }, + { + "epoch": 0.661, + "grad_norm": 1.6198315620422363, + "learning_rate": 6.781507001555901e-05, + "loss": 1.6545, + "step": 29745 + }, + { + "epoch": 0.6610222222222222, + "grad_norm": 1.608734130859375, + "learning_rate": 6.781062458324072e-05, + "loss": 1.5489, + "step": 29746 + }, + { + "epoch": 0.6610444444444444, + "grad_norm": 1.6038824319839478, + "learning_rate": 6.780617915092243e-05, + "loss": 1.4575, + "step": 29747 + }, + { + "epoch": 0.6610666666666667, + "grad_norm": 1.6160492897033691, + "learning_rate": 6.780173371860413e-05, + "loss": 1.6108, + "step": 29748 + }, + { + "epoch": 0.6610888888888888, + "grad_norm": 1.9526495933532715, + "learning_rate": 6.779728828628584e-05, + "loss": 2.0151, + "step": 29749 + }, + { + "epoch": 0.6611111111111111, + "grad_norm": 1.8424237966537476, + "learning_rate": 6.779284285396756e-05, + "loss": 1.6796, + "step": 29750 + }, + { + "epoch": 0.6611333333333334, + "grad_norm": 1.365674376487732, + "learning_rate": 6.778839742164927e-05, + "loss": 2.4502, + "step": 29751 + }, + { + "epoch": 0.6611555555555556, + "grad_norm": 1.7497609853744507, + "learning_rate": 6.778395198933096e-05, + "loss": 2.7399, + "step": 29752 + }, + { + "epoch": 0.6611777777777778, + "grad_norm": 1.6796815395355225, + "learning_rate": 6.777950655701267e-05, + "loss": 2.0876, + "step": 29753 + }, + { + "epoch": 0.6612, + "grad_norm": 1.3631795644760132, + "learning_rate": 6.777506112469438e-05, + "loss": 1.9842, + "step": 29754 + }, + { + "epoch": 0.6612222222222223, + "grad_norm": 1.490350365638733, + "learning_rate": 6.777061569237608e-05, + "loss": 1.9246, + "step": 29755 + }, + { + "epoch": 0.6612444444444444, + "grad_norm": 1.5525401830673218, + "learning_rate": 6.776617026005779e-05, + "loss": 1.9768, + "step": 29756 + }, + { + "epoch": 0.6612666666666667, + "grad_norm": 2.369767904281616, + "learning_rate": 6.77617248277395e-05, + "loss": 1.8494, + "step": 29757 + }, + { + "epoch": 0.6612888888888889, + "grad_norm": 1.6807938814163208, + "learning_rate": 6.775727939542121e-05, + "loss": 2.0463, + "step": 29758 + }, + { + "epoch": 0.6613111111111111, + "grad_norm": 1.5899170637130737, + "learning_rate": 6.775283396310292e-05, + "loss": 2.4831, + "step": 29759 + }, + { + "epoch": 0.6613333333333333, + "grad_norm": 1.462839961051941, + "learning_rate": 6.774838853078463e-05, + "loss": 2.0605, + "step": 29760 + }, + { + "epoch": 0.6613555555555556, + "grad_norm": 1.7161911725997925, + "learning_rate": 6.774394309846634e-05, + "loss": 2.0418, + "step": 29761 + }, + { + "epoch": 0.6613777777777777, + "grad_norm": 1.541351318359375, + "learning_rate": 6.773949766614803e-05, + "loss": 1.7762, + "step": 29762 + }, + { + "epoch": 0.6614, + "grad_norm": 1.732049822807312, + "learning_rate": 6.773505223382974e-05, + "loss": 2.1298, + "step": 29763 + }, + { + "epoch": 0.6614222222222222, + "grad_norm": 1.5366971492767334, + "learning_rate": 6.773060680151145e-05, + "loss": 1.7534, + "step": 29764 + }, + { + "epoch": 0.6614444444444444, + "grad_norm": 1.4801750183105469, + "learning_rate": 6.772616136919315e-05, + "loss": 1.811, + "step": 29765 + }, + { + "epoch": 0.6614666666666666, + "grad_norm": 1.6979097127914429, + "learning_rate": 6.772171593687486e-05, + "loss": 2.5855, + "step": 29766 + }, + { + "epoch": 0.6614888888888889, + "grad_norm": 1.709199070930481, + "learning_rate": 6.771727050455657e-05, + "loss": 2.179, + "step": 29767 + }, + { + "epoch": 0.6615111111111112, + "grad_norm": 1.6825566291809082, + "learning_rate": 6.771282507223828e-05, + "loss": 2.3518, + "step": 29768 + }, + { + "epoch": 0.6615333333333333, + "grad_norm": 1.5685398578643799, + "learning_rate": 6.770837963991999e-05, + "loss": 1.6449, + "step": 29769 + }, + { + "epoch": 0.6615555555555556, + "grad_norm": 2.124915838241577, + "learning_rate": 6.77039342076017e-05, + "loss": 0.7097, + "step": 29770 + }, + { + "epoch": 0.6615777777777778, + "grad_norm": 1.4958488941192627, + "learning_rate": 6.769948877528341e-05, + "loss": 1.2729, + "step": 29771 + }, + { + "epoch": 0.6616, + "grad_norm": 2.002549409866333, + "learning_rate": 6.76950433429651e-05, + "loss": 2.3574, + "step": 29772 + }, + { + "epoch": 0.6616222222222222, + "grad_norm": 1.654229760169983, + "learning_rate": 6.769059791064681e-05, + "loss": 2.0806, + "step": 29773 + }, + { + "epoch": 0.6616444444444445, + "grad_norm": 1.7626105546951294, + "learning_rate": 6.768615247832852e-05, + "loss": 1.9968, + "step": 29774 + }, + { + "epoch": 0.6616666666666666, + "grad_norm": 1.9881726503372192, + "learning_rate": 6.768170704601022e-05, + "loss": 2.1822, + "step": 29775 + }, + { + "epoch": 0.6616888888888889, + "grad_norm": 1.5868173837661743, + "learning_rate": 6.767726161369193e-05, + "loss": 1.8892, + "step": 29776 + }, + { + "epoch": 0.6617111111111111, + "grad_norm": 1.8402587175369263, + "learning_rate": 6.767281618137364e-05, + "loss": 1.5504, + "step": 29777 + }, + { + "epoch": 0.6617333333333333, + "grad_norm": 1.462702751159668, + "learning_rate": 6.766837074905536e-05, + "loss": 2.1335, + "step": 29778 + }, + { + "epoch": 0.6617555555555555, + "grad_norm": 1.7339056730270386, + "learning_rate": 6.766392531673706e-05, + "loss": 2.0455, + "step": 29779 + }, + { + "epoch": 0.6617777777777778, + "grad_norm": 1.7997807264328003, + "learning_rate": 6.765947988441877e-05, + "loss": 1.7998, + "step": 29780 + }, + { + "epoch": 0.6618, + "grad_norm": 1.674707055091858, + "learning_rate": 6.765503445210048e-05, + "loss": 1.8069, + "step": 29781 + }, + { + "epoch": 0.6618222222222222, + "grad_norm": 1.5384355783462524, + "learning_rate": 6.765058901978217e-05, + "loss": 1.8479, + "step": 29782 + }, + { + "epoch": 0.6618444444444445, + "grad_norm": 2.0576751232147217, + "learning_rate": 6.764614358746388e-05, + "loss": 1.7632, + "step": 29783 + }, + { + "epoch": 0.6618666666666667, + "grad_norm": 1.397933840751648, + "learning_rate": 6.764169815514559e-05, + "loss": 0.9408, + "step": 29784 + }, + { + "epoch": 0.6618888888888889, + "grad_norm": 1.7931183576583862, + "learning_rate": 6.763725272282729e-05, + "loss": 1.9449, + "step": 29785 + }, + { + "epoch": 0.6619111111111111, + "grad_norm": 1.3988561630249023, + "learning_rate": 6.7632807290509e-05, + "loss": 1.2232, + "step": 29786 + }, + { + "epoch": 0.6619333333333334, + "grad_norm": 1.7735909223556519, + "learning_rate": 6.762836185819072e-05, + "loss": 1.8449, + "step": 29787 + }, + { + "epoch": 0.6619555555555555, + "grad_norm": 1.4180495738983154, + "learning_rate": 6.762391642587243e-05, + "loss": 1.471, + "step": 29788 + }, + { + "epoch": 0.6619777777777778, + "grad_norm": 1.846642255783081, + "learning_rate": 6.761947099355413e-05, + "loss": 2.0046, + "step": 29789 + }, + { + "epoch": 0.662, + "grad_norm": 1.1563552618026733, + "learning_rate": 6.761502556123584e-05, + "loss": 0.9774, + "step": 29790 + }, + { + "epoch": 0.6620222222222222, + "grad_norm": 1.5811171531677246, + "learning_rate": 6.761058012891755e-05, + "loss": 1.7614, + "step": 29791 + }, + { + "epoch": 0.6620444444444444, + "grad_norm": 1.9264756441116333, + "learning_rate": 6.760613469659924e-05, + "loss": 1.8206, + "step": 29792 + }, + { + "epoch": 0.6620666666666667, + "grad_norm": 1.555372953414917, + "learning_rate": 6.760168926428095e-05, + "loss": 1.3343, + "step": 29793 + }, + { + "epoch": 0.6620888888888888, + "grad_norm": 2.0197927951812744, + "learning_rate": 6.759724383196266e-05, + "loss": 1.5174, + "step": 29794 + }, + { + "epoch": 0.6621111111111111, + "grad_norm": 1.9780981540679932, + "learning_rate": 6.759279839964437e-05, + "loss": 1.6895, + "step": 29795 + }, + { + "epoch": 0.6621333333333334, + "grad_norm": 1.7140216827392578, + "learning_rate": 6.758835296732608e-05, + "loss": 1.5978, + "step": 29796 + }, + { + "epoch": 0.6621555555555556, + "grad_norm": 2.1798079013824463, + "learning_rate": 6.758390753500779e-05, + "loss": 1.1335, + "step": 29797 + }, + { + "epoch": 0.6621777777777778, + "grad_norm": 1.043080449104309, + "learning_rate": 6.75794621026895e-05, + "loss": 0.719, + "step": 29798 + }, + { + "epoch": 0.6622, + "grad_norm": 2.2078845500946045, + "learning_rate": 6.75750166703712e-05, + "loss": 2.1393, + "step": 29799 + }, + { + "epoch": 0.6622222222222223, + "grad_norm": 2.1851742267608643, + "learning_rate": 6.75705712380529e-05, + "loss": 1.9053, + "step": 29800 + }, + { + "epoch": 0.6622444444444444, + "grad_norm": 2.015225648880005, + "learning_rate": 6.756612580573461e-05, + "loss": 3.1311, + "step": 29801 + }, + { + "epoch": 0.6622666666666667, + "grad_norm": 1.6868116855621338, + "learning_rate": 6.756168037341631e-05, + "loss": 2.4089, + "step": 29802 + }, + { + "epoch": 0.6622888888888889, + "grad_norm": 1.5408720970153809, + "learning_rate": 6.755723494109802e-05, + "loss": 1.1948, + "step": 29803 + }, + { + "epoch": 0.6623111111111111, + "grad_norm": 1.2142361402511597, + "learning_rate": 6.755278950877973e-05, + "loss": 1.1249, + "step": 29804 + }, + { + "epoch": 0.6623333333333333, + "grad_norm": 1.530510425567627, + "learning_rate": 6.754834407646144e-05, + "loss": 2.4555, + "step": 29805 + }, + { + "epoch": 0.6623555555555556, + "grad_norm": 1.4871829748153687, + "learning_rate": 6.754389864414315e-05, + "loss": 2.1614, + "step": 29806 + }, + { + "epoch": 0.6623777777777777, + "grad_norm": 1.8419668674468994, + "learning_rate": 6.753945321182486e-05, + "loss": 2.5486, + "step": 29807 + }, + { + "epoch": 0.6624, + "grad_norm": 1.5700019598007202, + "learning_rate": 6.753500777950657e-05, + "loss": 1.9956, + "step": 29808 + }, + { + "epoch": 0.6624222222222222, + "grad_norm": 1.4468845129013062, + "learning_rate": 6.753056234718826e-05, + "loss": 1.6379, + "step": 29809 + }, + { + "epoch": 0.6624444444444444, + "grad_norm": 1.574005365371704, + "learning_rate": 6.752611691486997e-05, + "loss": 2.0273, + "step": 29810 + }, + { + "epoch": 0.6624666666666666, + "grad_norm": 1.4133375883102417, + "learning_rate": 6.752167148255168e-05, + "loss": 1.6354, + "step": 29811 + }, + { + "epoch": 0.6624888888888889, + "grad_norm": 1.696982502937317, + "learning_rate": 6.751722605023338e-05, + "loss": 2.2724, + "step": 29812 + }, + { + "epoch": 0.6625111111111112, + "grad_norm": 1.4631052017211914, + "learning_rate": 6.751278061791509e-05, + "loss": 2.0235, + "step": 29813 + }, + { + "epoch": 0.6625333333333333, + "grad_norm": 1.6357436180114746, + "learning_rate": 6.75083351855968e-05, + "loss": 1.7853, + "step": 29814 + }, + { + "epoch": 0.6625555555555556, + "grad_norm": 1.5390557050704956, + "learning_rate": 6.750388975327851e-05, + "loss": 1.7458, + "step": 29815 + }, + { + "epoch": 0.6625777777777778, + "grad_norm": 1.714276909828186, + "learning_rate": 6.749944432096022e-05, + "loss": 1.8244, + "step": 29816 + }, + { + "epoch": 0.6626, + "grad_norm": 1.4015839099884033, + "learning_rate": 6.749499888864193e-05, + "loss": 1.6646, + "step": 29817 + }, + { + "epoch": 0.6626222222222222, + "grad_norm": 1.5322781801223755, + "learning_rate": 6.749055345632364e-05, + "loss": 1.5663, + "step": 29818 + }, + { + "epoch": 0.6626444444444445, + "grad_norm": 1.8195284605026245, + "learning_rate": 6.748610802400533e-05, + "loss": 1.7811, + "step": 29819 + }, + { + "epoch": 0.6626666666666666, + "grad_norm": 1.9035009145736694, + "learning_rate": 6.748166259168704e-05, + "loss": 1.8438, + "step": 29820 + }, + { + "epoch": 0.6626888888888889, + "grad_norm": 1.8950119018554688, + "learning_rate": 6.747721715936875e-05, + "loss": 1.9304, + "step": 29821 + }, + { + "epoch": 0.6627111111111111, + "grad_norm": 1.8354183435440063, + "learning_rate": 6.747277172705045e-05, + "loss": 1.8252, + "step": 29822 + }, + { + "epoch": 0.6627333333333333, + "grad_norm": 1.8345448970794678, + "learning_rate": 6.746832629473216e-05, + "loss": 1.8646, + "step": 29823 + }, + { + "epoch": 0.6627555555555555, + "grad_norm": 1.8485467433929443, + "learning_rate": 6.746388086241388e-05, + "loss": 2.2144, + "step": 29824 + }, + { + "epoch": 0.6627777777777778, + "grad_norm": 1.7133246660232544, + "learning_rate": 6.745943543009559e-05, + "loss": 1.832, + "step": 29825 + }, + { + "epoch": 0.6628, + "grad_norm": 1.6095284223556519, + "learning_rate": 6.745498999777729e-05, + "loss": 1.5356, + "step": 29826 + }, + { + "epoch": 0.6628222222222222, + "grad_norm": 1.4461475610733032, + "learning_rate": 6.7450544565459e-05, + "loss": 1.6122, + "step": 29827 + }, + { + "epoch": 0.6628444444444445, + "grad_norm": 1.8822609186172485, + "learning_rate": 6.74460991331407e-05, + "loss": 1.7766, + "step": 29828 + }, + { + "epoch": 0.6628666666666667, + "grad_norm": 1.944409966468811, + "learning_rate": 6.74416537008224e-05, + "loss": 1.8874, + "step": 29829 + }, + { + "epoch": 0.6628888888888889, + "grad_norm": 1.6179362535476685, + "learning_rate": 6.743720826850411e-05, + "loss": 1.8056, + "step": 29830 + }, + { + "epoch": 0.6629111111111111, + "grad_norm": 2.0456881523132324, + "learning_rate": 6.743276283618582e-05, + "loss": 1.6853, + "step": 29831 + }, + { + "epoch": 0.6629333333333334, + "grad_norm": 1.715853214263916, + "learning_rate": 6.742831740386753e-05, + "loss": 1.886, + "step": 29832 + }, + { + "epoch": 0.6629555555555555, + "grad_norm": 1.9937889575958252, + "learning_rate": 6.742387197154924e-05, + "loss": 1.998, + "step": 29833 + }, + { + "epoch": 0.6629777777777778, + "grad_norm": 1.687606692314148, + "learning_rate": 6.741942653923095e-05, + "loss": 1.9692, + "step": 29834 + }, + { + "epoch": 0.663, + "grad_norm": 1.661903977394104, + "learning_rate": 6.741498110691266e-05, + "loss": 1.675, + "step": 29835 + }, + { + "epoch": 0.6630222222222222, + "grad_norm": 2.170902729034424, + "learning_rate": 6.741053567459436e-05, + "loss": 2.1342, + "step": 29836 + }, + { + "epoch": 0.6630444444444444, + "grad_norm": 0.8865088820457458, + "learning_rate": 6.740609024227607e-05, + "loss": 0.7062, + "step": 29837 + }, + { + "epoch": 0.6630666666666667, + "grad_norm": 2.08332896232605, + "learning_rate": 6.740164480995778e-05, + "loss": 2.2277, + "step": 29838 + }, + { + "epoch": 0.6630888888888888, + "grad_norm": 1.8963425159454346, + "learning_rate": 6.739719937763947e-05, + "loss": 2.13, + "step": 29839 + }, + { + "epoch": 0.6631111111111111, + "grad_norm": 1.5987082719802856, + "learning_rate": 6.739275394532118e-05, + "loss": 1.5478, + "step": 29840 + }, + { + "epoch": 0.6631333333333334, + "grad_norm": 1.8892338275909424, + "learning_rate": 6.738830851300289e-05, + "loss": 2.3368, + "step": 29841 + }, + { + "epoch": 0.6631555555555556, + "grad_norm": 1.368084192276001, + "learning_rate": 6.73838630806846e-05, + "loss": 1.2946, + "step": 29842 + }, + { + "epoch": 0.6631777777777778, + "grad_norm": 2.3991076946258545, + "learning_rate": 6.737941764836631e-05, + "loss": 2.455, + "step": 29843 + }, + { + "epoch": 0.6632, + "grad_norm": 1.8040879964828491, + "learning_rate": 6.737497221604802e-05, + "loss": 1.7914, + "step": 29844 + }, + { + "epoch": 0.6632222222222223, + "grad_norm": 1.668006181716919, + "learning_rate": 6.737052678372973e-05, + "loss": 1.7811, + "step": 29845 + }, + { + "epoch": 0.6632444444444444, + "grad_norm": 1.5654646158218384, + "learning_rate": 6.736608135141143e-05, + "loss": 1.384, + "step": 29846 + }, + { + "epoch": 0.6632666666666667, + "grad_norm": 2.4598138332366943, + "learning_rate": 6.736163591909313e-05, + "loss": 1.9209, + "step": 29847 + }, + { + "epoch": 0.6632888888888889, + "grad_norm": 1.0733261108398438, + "learning_rate": 6.735719048677484e-05, + "loss": 0.789, + "step": 29848 + }, + { + "epoch": 0.6633111111111111, + "grad_norm": 1.8876014947891235, + "learning_rate": 6.735274505445654e-05, + "loss": 1.9376, + "step": 29849 + }, + { + "epoch": 0.6633333333333333, + "grad_norm": 2.126352548599243, + "learning_rate": 6.734829962213825e-05, + "loss": 1.7392, + "step": 29850 + }, + { + "epoch": 0.6633555555555556, + "grad_norm": 0.9994049668312073, + "learning_rate": 6.734385418981996e-05, + "loss": 1.4126, + "step": 29851 + }, + { + "epoch": 0.6633777777777777, + "grad_norm": 1.020012378692627, + "learning_rate": 6.733940875750167e-05, + "loss": 1.054, + "step": 29852 + }, + { + "epoch": 0.6634, + "grad_norm": 1.5305405855178833, + "learning_rate": 6.733496332518338e-05, + "loss": 2.3882, + "step": 29853 + }, + { + "epoch": 0.6634222222222222, + "grad_norm": 1.1586368083953857, + "learning_rate": 6.733051789286509e-05, + "loss": 1.6271, + "step": 29854 + }, + { + "epoch": 0.6634444444444444, + "grad_norm": 1.5261671543121338, + "learning_rate": 6.73260724605468e-05, + "loss": 1.771, + "step": 29855 + }, + { + "epoch": 0.6634666666666666, + "grad_norm": 1.3771371841430664, + "learning_rate": 6.73216270282285e-05, + "loss": 1.4175, + "step": 29856 + }, + { + "epoch": 0.6634888888888889, + "grad_norm": 1.535698413848877, + "learning_rate": 6.73171815959102e-05, + "loss": 1.0387, + "step": 29857 + }, + { + "epoch": 0.6635111111111112, + "grad_norm": 1.5857796669006348, + "learning_rate": 6.731273616359191e-05, + "loss": 1.9065, + "step": 29858 + }, + { + "epoch": 0.6635333333333333, + "grad_norm": 1.5167961120605469, + "learning_rate": 6.730829073127361e-05, + "loss": 1.8251, + "step": 29859 + }, + { + "epoch": 0.6635555555555556, + "grad_norm": 1.5984530448913574, + "learning_rate": 6.730384529895532e-05, + "loss": 1.9663, + "step": 29860 + }, + { + "epoch": 0.6635777777777778, + "grad_norm": 1.4273908138275146, + "learning_rate": 6.729939986663704e-05, + "loss": 1.9222, + "step": 29861 + }, + { + "epoch": 0.6636, + "grad_norm": 1.4247217178344727, + "learning_rate": 6.729495443431874e-05, + "loss": 1.7142, + "step": 29862 + }, + { + "epoch": 0.6636222222222222, + "grad_norm": 1.9986554384231567, + "learning_rate": 6.729050900200045e-05, + "loss": 2.0371, + "step": 29863 + }, + { + "epoch": 0.6636444444444445, + "grad_norm": 1.9894723892211914, + "learning_rate": 6.728606356968216e-05, + "loss": 2.0388, + "step": 29864 + }, + { + "epoch": 0.6636666666666666, + "grad_norm": 2.0761454105377197, + "learning_rate": 6.728161813736387e-05, + "loss": 1.1724, + "step": 29865 + }, + { + "epoch": 0.6636888888888889, + "grad_norm": 1.590334415435791, + "learning_rate": 6.727717270504556e-05, + "loss": 1.8133, + "step": 29866 + }, + { + "epoch": 0.6637111111111111, + "grad_norm": 1.7695194482803345, + "learning_rate": 6.727272727272727e-05, + "loss": 1.9001, + "step": 29867 + }, + { + "epoch": 0.6637333333333333, + "grad_norm": 1.504016399383545, + "learning_rate": 6.726828184040898e-05, + "loss": 2.0085, + "step": 29868 + }, + { + "epoch": 0.6637555555555555, + "grad_norm": 1.9227676391601562, + "learning_rate": 6.726383640809069e-05, + "loss": 1.9244, + "step": 29869 + }, + { + "epoch": 0.6637777777777778, + "grad_norm": 1.641811728477478, + "learning_rate": 6.72593909757724e-05, + "loss": 1.7412, + "step": 29870 + }, + { + "epoch": 0.6638, + "grad_norm": 2.3565618991851807, + "learning_rate": 6.725494554345411e-05, + "loss": 1.8609, + "step": 29871 + }, + { + "epoch": 0.6638222222222222, + "grad_norm": 2.0382018089294434, + "learning_rate": 6.725050011113581e-05, + "loss": 2.0848, + "step": 29872 + }, + { + "epoch": 0.6638444444444445, + "grad_norm": 2.107382297515869, + "learning_rate": 6.724605467881752e-05, + "loss": 1.7436, + "step": 29873 + }, + { + "epoch": 0.6638666666666667, + "grad_norm": 1.684561848640442, + "learning_rate": 6.724160924649923e-05, + "loss": 1.8383, + "step": 29874 + }, + { + "epoch": 0.6638888888888889, + "grad_norm": 1.0685566663742065, + "learning_rate": 6.723716381418094e-05, + "loss": 0.9968, + "step": 29875 + }, + { + "epoch": 0.6639111111111111, + "grad_norm": 2.0837154388427734, + "learning_rate": 6.723271838186263e-05, + "loss": 2.1355, + "step": 29876 + }, + { + "epoch": 0.6639333333333334, + "grad_norm": 1.7028639316558838, + "learning_rate": 6.722827294954434e-05, + "loss": 2.1359, + "step": 29877 + }, + { + "epoch": 0.6639555555555555, + "grad_norm": 1.3876352310180664, + "learning_rate": 6.722382751722605e-05, + "loss": 1.3667, + "step": 29878 + }, + { + "epoch": 0.6639777777777778, + "grad_norm": 1.7416679859161377, + "learning_rate": 6.721938208490776e-05, + "loss": 1.8754, + "step": 29879 + }, + { + "epoch": 0.664, + "grad_norm": 1.6241660118103027, + "learning_rate": 6.721493665258947e-05, + "loss": 1.6207, + "step": 29880 + }, + { + "epoch": 0.6640222222222222, + "grad_norm": 1.7644826173782349, + "learning_rate": 6.721049122027118e-05, + "loss": 1.7229, + "step": 29881 + }, + { + "epoch": 0.6640444444444444, + "grad_norm": 1.7775423526763916, + "learning_rate": 6.720604578795289e-05, + "loss": 1.9053, + "step": 29882 + }, + { + "epoch": 0.6640666666666667, + "grad_norm": 1.7751245498657227, + "learning_rate": 6.720160035563459e-05, + "loss": 1.8768, + "step": 29883 + }, + { + "epoch": 0.6640888888888888, + "grad_norm": 2.0301311016082764, + "learning_rate": 6.71971549233163e-05, + "loss": 2.4465, + "step": 29884 + }, + { + "epoch": 0.6641111111111111, + "grad_norm": 1.7487518787384033, + "learning_rate": 6.7192709490998e-05, + "loss": 1.6696, + "step": 29885 + }, + { + "epoch": 0.6641333333333334, + "grad_norm": 2.3853297233581543, + "learning_rate": 6.71882640586797e-05, + "loss": 2.2918, + "step": 29886 + }, + { + "epoch": 0.6641555555555556, + "grad_norm": 2.0184779167175293, + "learning_rate": 6.718381862636141e-05, + "loss": 1.7051, + "step": 29887 + }, + { + "epoch": 0.6641777777777778, + "grad_norm": 1.8643485307693481, + "learning_rate": 6.717937319404312e-05, + "loss": 1.9038, + "step": 29888 + }, + { + "epoch": 0.6642, + "grad_norm": 2.1959586143493652, + "learning_rate": 6.717492776172483e-05, + "loss": 1.5555, + "step": 29889 + }, + { + "epoch": 0.6642222222222223, + "grad_norm": 1.7016730308532715, + "learning_rate": 6.717048232940654e-05, + "loss": 1.5396, + "step": 29890 + }, + { + "epoch": 0.6642444444444444, + "grad_norm": 2.1555588245391846, + "learning_rate": 6.716603689708825e-05, + "loss": 2.1698, + "step": 29891 + }, + { + "epoch": 0.6642666666666667, + "grad_norm": 1.6451393365859985, + "learning_rate": 6.716159146476996e-05, + "loss": 1.3959, + "step": 29892 + }, + { + "epoch": 0.6642888888888889, + "grad_norm": 1.5219990015029907, + "learning_rate": 6.715714603245166e-05, + "loss": 1.5174, + "step": 29893 + }, + { + "epoch": 0.6643111111111111, + "grad_norm": 1.9876725673675537, + "learning_rate": 6.715270060013336e-05, + "loss": 2.0229, + "step": 29894 + }, + { + "epoch": 0.6643333333333333, + "grad_norm": 1.7877013683319092, + "learning_rate": 6.714825516781507e-05, + "loss": 1.8763, + "step": 29895 + }, + { + "epoch": 0.6643555555555556, + "grad_norm": 2.0413553714752197, + "learning_rate": 6.714380973549677e-05, + "loss": 1.671, + "step": 29896 + }, + { + "epoch": 0.6643777777777777, + "grad_norm": 2.1173155307769775, + "learning_rate": 6.713936430317848e-05, + "loss": 2.178, + "step": 29897 + }, + { + "epoch": 0.6644, + "grad_norm": 1.778635859489441, + "learning_rate": 6.71349188708602e-05, + "loss": 1.5821, + "step": 29898 + }, + { + "epoch": 0.6644222222222222, + "grad_norm": 1.8207401037216187, + "learning_rate": 6.71304734385419e-05, + "loss": 1.6871, + "step": 29899 + }, + { + "epoch": 0.6644444444444444, + "grad_norm": 1.787549376487732, + "learning_rate": 6.712602800622361e-05, + "loss": 1.468, + "step": 29900 + }, + { + "epoch": 0.6644666666666666, + "grad_norm": 1.3397949934005737, + "learning_rate": 6.712158257390532e-05, + "loss": 2.3533, + "step": 29901 + }, + { + "epoch": 0.6644888888888889, + "grad_norm": 1.6535183191299438, + "learning_rate": 6.711713714158703e-05, + "loss": 2.1798, + "step": 29902 + }, + { + "epoch": 0.6645111111111112, + "grad_norm": 1.3034520149230957, + "learning_rate": 6.711269170926872e-05, + "loss": 2.2776, + "step": 29903 + }, + { + "epoch": 0.6645333333333333, + "grad_norm": 1.4036054611206055, + "learning_rate": 6.710824627695043e-05, + "loss": 2.2437, + "step": 29904 + }, + { + "epoch": 0.6645555555555556, + "grad_norm": 1.8223963975906372, + "learning_rate": 6.710380084463214e-05, + "loss": 2.2021, + "step": 29905 + }, + { + "epoch": 0.6645777777777778, + "grad_norm": 2.307182788848877, + "learning_rate": 6.709935541231385e-05, + "loss": 1.7815, + "step": 29906 + }, + { + "epoch": 0.6646, + "grad_norm": 1.9300739765167236, + "learning_rate": 6.709490997999556e-05, + "loss": 2.2553, + "step": 29907 + }, + { + "epoch": 0.6646222222222222, + "grad_norm": 1.8276855945587158, + "learning_rate": 6.709046454767727e-05, + "loss": 2.4208, + "step": 29908 + }, + { + "epoch": 0.6646444444444445, + "grad_norm": 2.037367820739746, + "learning_rate": 6.708601911535897e-05, + "loss": 2.1848, + "step": 29909 + }, + { + "epoch": 0.6646666666666666, + "grad_norm": 1.8194398880004883, + "learning_rate": 6.708157368304068e-05, + "loss": 2.4673, + "step": 29910 + }, + { + "epoch": 0.6646888888888889, + "grad_norm": 1.8202264308929443, + "learning_rate": 6.707712825072239e-05, + "loss": 2.5849, + "step": 29911 + }, + { + "epoch": 0.6647111111111111, + "grad_norm": 1.4894503355026245, + "learning_rate": 6.70726828184041e-05, + "loss": 1.5413, + "step": 29912 + }, + { + "epoch": 0.6647333333333333, + "grad_norm": 1.4747028350830078, + "learning_rate": 6.70682373860858e-05, + "loss": 1.8897, + "step": 29913 + }, + { + "epoch": 0.6647555555555555, + "grad_norm": 1.6377867460250854, + "learning_rate": 6.70637919537675e-05, + "loss": 2.2015, + "step": 29914 + }, + { + "epoch": 0.6647777777777778, + "grad_norm": 1.5551769733428955, + "learning_rate": 6.705934652144921e-05, + "loss": 2.0039, + "step": 29915 + }, + { + "epoch": 0.6648, + "grad_norm": 1.4956742525100708, + "learning_rate": 6.705490108913092e-05, + "loss": 1.9786, + "step": 29916 + }, + { + "epoch": 0.6648222222222222, + "grad_norm": 1.1861640214920044, + "learning_rate": 6.705045565681263e-05, + "loss": 1.133, + "step": 29917 + }, + { + "epoch": 0.6648444444444445, + "grad_norm": 1.2967723608016968, + "learning_rate": 6.704601022449434e-05, + "loss": 1.218, + "step": 29918 + }, + { + "epoch": 0.6648666666666667, + "grad_norm": 1.7160927057266235, + "learning_rate": 6.704156479217604e-05, + "loss": 2.0236, + "step": 29919 + }, + { + "epoch": 0.6648888888888889, + "grad_norm": 2.133582353591919, + "learning_rate": 6.703711935985775e-05, + "loss": 1.696, + "step": 29920 + }, + { + "epoch": 0.6649111111111111, + "grad_norm": 1.6214841604232788, + "learning_rate": 6.703267392753946e-05, + "loss": 2.2105, + "step": 29921 + }, + { + "epoch": 0.6649333333333334, + "grad_norm": 1.700264811515808, + "learning_rate": 6.702822849522117e-05, + "loss": 2.034, + "step": 29922 + }, + { + "epoch": 0.6649555555555555, + "grad_norm": 1.8656233549118042, + "learning_rate": 6.702378306290286e-05, + "loss": 1.8581, + "step": 29923 + }, + { + "epoch": 0.6649777777777778, + "grad_norm": 1.5610407590866089, + "learning_rate": 6.701933763058457e-05, + "loss": 1.808, + "step": 29924 + }, + { + "epoch": 0.665, + "grad_norm": 1.8312547206878662, + "learning_rate": 6.701489219826628e-05, + "loss": 2.0996, + "step": 29925 + }, + { + "epoch": 0.6650222222222222, + "grad_norm": 1.595229983329773, + "learning_rate": 6.701044676594799e-05, + "loss": 1.597, + "step": 29926 + }, + { + "epoch": 0.6650444444444444, + "grad_norm": 1.6952110528945923, + "learning_rate": 6.70060013336297e-05, + "loss": 1.5797, + "step": 29927 + }, + { + "epoch": 0.6650666666666667, + "grad_norm": 1.8080729246139526, + "learning_rate": 6.700155590131141e-05, + "loss": 2.0955, + "step": 29928 + }, + { + "epoch": 0.6650888888888888, + "grad_norm": 1.844958782196045, + "learning_rate": 6.69971104689931e-05, + "loss": 2.0051, + "step": 29929 + }, + { + "epoch": 0.6651111111111111, + "grad_norm": 1.6383330821990967, + "learning_rate": 6.699266503667482e-05, + "loss": 1.7088, + "step": 29930 + }, + { + "epoch": 0.6651333333333334, + "grad_norm": 1.9203208684921265, + "learning_rate": 6.698821960435653e-05, + "loss": 2.2203, + "step": 29931 + }, + { + "epoch": 0.6651555555555556, + "grad_norm": 1.5951393842697144, + "learning_rate": 6.698377417203824e-05, + "loss": 1.7076, + "step": 29932 + }, + { + "epoch": 0.6651777777777778, + "grad_norm": 1.5344406366348267, + "learning_rate": 6.697932873971993e-05, + "loss": 1.6437, + "step": 29933 + }, + { + "epoch": 0.6652, + "grad_norm": 1.807194709777832, + "learning_rate": 6.697488330740164e-05, + "loss": 2.2013, + "step": 29934 + }, + { + "epoch": 0.6652222222222223, + "grad_norm": 1.5656250715255737, + "learning_rate": 6.697043787508336e-05, + "loss": 1.6515, + "step": 29935 + }, + { + "epoch": 0.6652444444444444, + "grad_norm": 2.082768678665161, + "learning_rate": 6.696599244276506e-05, + "loss": 2.3978, + "step": 29936 + }, + { + "epoch": 0.6652666666666667, + "grad_norm": 1.8907321691513062, + "learning_rate": 6.696154701044677e-05, + "loss": 1.5345, + "step": 29937 + }, + { + "epoch": 0.6652888888888889, + "grad_norm": 2.222169876098633, + "learning_rate": 6.695710157812848e-05, + "loss": 1.7997, + "step": 29938 + }, + { + "epoch": 0.6653111111111111, + "grad_norm": 1.9868658781051636, + "learning_rate": 6.695265614581019e-05, + "loss": 2.0057, + "step": 29939 + }, + { + "epoch": 0.6653333333333333, + "grad_norm": 1.4449933767318726, + "learning_rate": 6.694821071349189e-05, + "loss": 1.4912, + "step": 29940 + }, + { + "epoch": 0.6653555555555556, + "grad_norm": 1.9717739820480347, + "learning_rate": 6.69437652811736e-05, + "loss": 2.2573, + "step": 29941 + }, + { + "epoch": 0.6653777777777777, + "grad_norm": 2.050900936126709, + "learning_rate": 6.69393198488553e-05, + "loss": 1.6612, + "step": 29942 + }, + { + "epoch": 0.6654, + "grad_norm": 1.7138173580169678, + "learning_rate": 6.693487441653701e-05, + "loss": 1.7574, + "step": 29943 + }, + { + "epoch": 0.6654222222222222, + "grad_norm": 1.945564866065979, + "learning_rate": 6.693042898421872e-05, + "loss": 2.1009, + "step": 29944 + }, + { + "epoch": 0.6654444444444444, + "grad_norm": 1.7058165073394775, + "learning_rate": 6.692598355190043e-05, + "loss": 1.7286, + "step": 29945 + }, + { + "epoch": 0.6654666666666667, + "grad_norm": 1.0451241731643677, + "learning_rate": 6.692153811958213e-05, + "loss": 0.7271, + "step": 29946 + }, + { + "epoch": 0.6654888888888889, + "grad_norm": 1.9874522686004639, + "learning_rate": 6.691709268726384e-05, + "loss": 2.2219, + "step": 29947 + }, + { + "epoch": 0.6655111111111112, + "grad_norm": 1.9588372707366943, + "learning_rate": 6.691264725494555e-05, + "loss": 1.9927, + "step": 29948 + }, + { + "epoch": 0.6655333333333333, + "grad_norm": 2.0496973991394043, + "learning_rate": 6.690820182262726e-05, + "loss": 2.1667, + "step": 29949 + }, + { + "epoch": 0.6655555555555556, + "grad_norm": 1.554182767868042, + "learning_rate": 6.690375639030895e-05, + "loss": 0.7582, + "step": 29950 + }, + { + "epoch": 0.6655777777777778, + "grad_norm": 0.9341526031494141, + "learning_rate": 6.689931095799066e-05, + "loss": 1.1776, + "step": 29951 + }, + { + "epoch": 0.6656, + "grad_norm": 1.5718581676483154, + "learning_rate": 6.689486552567237e-05, + "loss": 2.8773, + "step": 29952 + }, + { + "epoch": 0.6656222222222222, + "grad_norm": 1.5068167448043823, + "learning_rate": 6.689042009335408e-05, + "loss": 2.6586, + "step": 29953 + }, + { + "epoch": 0.6656444444444445, + "grad_norm": 1.533138394355774, + "learning_rate": 6.688597466103579e-05, + "loss": 2.4295, + "step": 29954 + }, + { + "epoch": 0.6656666666666666, + "grad_norm": 1.6191515922546387, + "learning_rate": 6.68815292287175e-05, + "loss": 2.2264, + "step": 29955 + }, + { + "epoch": 0.6656888888888889, + "grad_norm": 1.8643866777420044, + "learning_rate": 6.68770837963992e-05, + "loss": 2.3549, + "step": 29956 + }, + { + "epoch": 0.6657111111111111, + "grad_norm": 2.198772430419922, + "learning_rate": 6.687263836408091e-05, + "loss": 1.9581, + "step": 29957 + }, + { + "epoch": 0.6657333333333333, + "grad_norm": 1.8021020889282227, + "learning_rate": 6.686819293176262e-05, + "loss": 2.2015, + "step": 29958 + }, + { + "epoch": 0.6657555555555555, + "grad_norm": 1.3431627750396729, + "learning_rate": 6.686374749944433e-05, + "loss": 1.7711, + "step": 29959 + }, + { + "epoch": 0.6657777777777778, + "grad_norm": 1.6012687683105469, + "learning_rate": 6.685930206712602e-05, + "loss": 1.8353, + "step": 29960 + }, + { + "epoch": 0.6658, + "grad_norm": 1.1746835708618164, + "learning_rate": 6.685485663480773e-05, + "loss": 0.8971, + "step": 29961 + }, + { + "epoch": 0.6658222222222222, + "grad_norm": 1.4977556467056274, + "learning_rate": 6.685041120248944e-05, + "loss": 1.6268, + "step": 29962 + }, + { + "epoch": 0.6658444444444445, + "grad_norm": 1.93625009059906, + "learning_rate": 6.684596577017115e-05, + "loss": 1.6674, + "step": 29963 + }, + { + "epoch": 0.6658666666666667, + "grad_norm": 1.4827030897140503, + "learning_rate": 6.684152033785286e-05, + "loss": 1.3579, + "step": 29964 + }, + { + "epoch": 0.6658888888888889, + "grad_norm": 1.4736237525939941, + "learning_rate": 6.683707490553457e-05, + "loss": 1.5683, + "step": 29965 + }, + { + "epoch": 0.6659111111111111, + "grad_norm": 1.9679960012435913, + "learning_rate": 6.683262947321627e-05, + "loss": 1.6845, + "step": 29966 + }, + { + "epoch": 0.6659333333333334, + "grad_norm": 1.2724868059158325, + "learning_rate": 6.682818404089798e-05, + "loss": 1.2223, + "step": 29967 + }, + { + "epoch": 0.6659555555555555, + "grad_norm": 1.369015097618103, + "learning_rate": 6.682373860857969e-05, + "loss": 1.5704, + "step": 29968 + }, + { + "epoch": 0.6659777777777778, + "grad_norm": 1.5374850034713745, + "learning_rate": 6.68192931762614e-05, + "loss": 2.1142, + "step": 29969 + }, + { + "epoch": 0.666, + "grad_norm": 0.48563823103904724, + "learning_rate": 6.681484774394309e-05, + "loss": 0.0228, + "step": 29970 + }, + { + "epoch": 0.6660222222222222, + "grad_norm": 1.7181105613708496, + "learning_rate": 6.68104023116248e-05, + "loss": 1.9205, + "step": 29971 + }, + { + "epoch": 0.6660444444444444, + "grad_norm": 1.7128758430480957, + "learning_rate": 6.680595687930653e-05, + "loss": 2.1458, + "step": 29972 + }, + { + "epoch": 0.6660666666666667, + "grad_norm": 1.8142510652542114, + "learning_rate": 6.680151144698822e-05, + "loss": 1.9488, + "step": 29973 + }, + { + "epoch": 0.6660888888888888, + "grad_norm": 1.813297986984253, + "learning_rate": 6.679706601466993e-05, + "loss": 2.392, + "step": 29974 + }, + { + "epoch": 0.6661111111111111, + "grad_norm": 1.5008163452148438, + "learning_rate": 6.679262058235164e-05, + "loss": 1.7734, + "step": 29975 + }, + { + "epoch": 0.6661333333333334, + "grad_norm": 2.0730113983154297, + "learning_rate": 6.678817515003334e-05, + "loss": 1.8511, + "step": 29976 + }, + { + "epoch": 0.6661555555555555, + "grad_norm": 2.2350423336029053, + "learning_rate": 6.678372971771505e-05, + "loss": 1.9735, + "step": 29977 + }, + { + "epoch": 0.6661777777777778, + "grad_norm": 1.4494450092315674, + "learning_rate": 6.677928428539676e-05, + "loss": 1.6645, + "step": 29978 + }, + { + "epoch": 0.6662, + "grad_norm": 1.9463860988616943, + "learning_rate": 6.677483885307847e-05, + "loss": 1.9717, + "step": 29979 + }, + { + "epoch": 0.6662222222222223, + "grad_norm": 1.710606336593628, + "learning_rate": 6.677039342076018e-05, + "loss": 1.9876, + "step": 29980 + }, + { + "epoch": 0.6662444444444444, + "grad_norm": 1.8882859945297241, + "learning_rate": 6.676594798844188e-05, + "loss": 1.5495, + "step": 29981 + }, + { + "epoch": 0.6662666666666667, + "grad_norm": 1.8677129745483398, + "learning_rate": 6.67615025561236e-05, + "loss": 2.0978, + "step": 29982 + }, + { + "epoch": 0.6662888888888889, + "grad_norm": 1.759299397468567, + "learning_rate": 6.675705712380529e-05, + "loss": 1.9628, + "step": 29983 + }, + { + "epoch": 0.6663111111111111, + "grad_norm": 2.7281928062438965, + "learning_rate": 6.6752611691487e-05, + "loss": 2.2807, + "step": 29984 + }, + { + "epoch": 0.6663333333333333, + "grad_norm": 1.6504846811294556, + "learning_rate": 6.674816625916871e-05, + "loss": 1.9276, + "step": 29985 + }, + { + "epoch": 0.6663555555555556, + "grad_norm": 1.9323383569717407, + "learning_rate": 6.674372082685042e-05, + "loss": 2.0836, + "step": 29986 + }, + { + "epoch": 0.6663777777777777, + "grad_norm": 1.8040101528167725, + "learning_rate": 6.673927539453212e-05, + "loss": 2.2852, + "step": 29987 + }, + { + "epoch": 0.6664, + "grad_norm": 1.518248200416565, + "learning_rate": 6.673482996221383e-05, + "loss": 1.6423, + "step": 29988 + }, + { + "epoch": 0.6664222222222222, + "grad_norm": 1.5416886806488037, + "learning_rate": 6.673038452989553e-05, + "loss": 1.4105, + "step": 29989 + }, + { + "epoch": 0.6664444444444444, + "grad_norm": 1.7516173124313354, + "learning_rate": 6.672593909757724e-05, + "loss": 1.8827, + "step": 29990 + }, + { + "epoch": 0.6664666666666667, + "grad_norm": 1.6546638011932373, + "learning_rate": 6.672149366525895e-05, + "loss": 1.7423, + "step": 29991 + }, + { + "epoch": 0.6664888888888889, + "grad_norm": 1.5564340353012085, + "learning_rate": 6.671704823294066e-05, + "loss": 1.6448, + "step": 29992 + }, + { + "epoch": 0.6665111111111112, + "grad_norm": 1.8928364515304565, + "learning_rate": 6.671260280062236e-05, + "loss": 1.7723, + "step": 29993 + }, + { + "epoch": 0.6665333333333333, + "grad_norm": 1.846963882446289, + "learning_rate": 6.670815736830407e-05, + "loss": 2.0836, + "step": 29994 + }, + { + "epoch": 0.6665555555555556, + "grad_norm": 1.6592938899993896, + "learning_rate": 6.670371193598578e-05, + "loss": 1.7189, + "step": 29995 + }, + { + "epoch": 0.6665777777777778, + "grad_norm": 1.7135471105575562, + "learning_rate": 6.669926650366749e-05, + "loss": 1.8557, + "step": 29996 + }, + { + "epoch": 0.6666, + "grad_norm": 1.8523329496383667, + "learning_rate": 6.669482107134918e-05, + "loss": 1.9346, + "step": 29997 + }, + { + "epoch": 0.6666222222222222, + "grad_norm": 1.719085693359375, + "learning_rate": 6.66903756390309e-05, + "loss": 1.7526, + "step": 29998 + }, + { + "epoch": 0.6666444444444445, + "grad_norm": 2.335409164428711, + "learning_rate": 6.66859302067126e-05, + "loss": 1.9978, + "step": 29999 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.673316240310669, + "learning_rate": 6.668148477439431e-05, + "loss": 1.5546, + "step": 30000 } ], "logging_steps": 1, @@ -175042,7 +210050,7 @@ "attributes": {} } }, - "total_flos": 2.470864784640553e+17, + "total_flos": 2.96467995936897e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null