{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.054150955059653, "eval_steps": 50000, "global_step": 390000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009272423811584348, "grad_norm": 0.1638532280921936, "learning_rate": 1.9999536378809423e-05, "loss": 0.018, "step": 30 }, { "epoch": 0.0018544847623168696, "grad_norm": 0.22779884934425354, "learning_rate": 1.999907275761884e-05, "loss": 0.0141, "step": 60 }, { "epoch": 0.0027817271434753045, "grad_norm": 0.3011741042137146, "learning_rate": 1.9998609136428263e-05, "loss": 0.0132, "step": 90 }, { "epoch": 0.003708969524633739, "grad_norm": 0.11500360816717148, "learning_rate": 1.9998145515237684e-05, "loss": 0.0125, "step": 120 }, { "epoch": 0.004636211905792174, "grad_norm": 0.15819363296031952, "learning_rate": 1.9997681894047106e-05, "loss": 0.0127, "step": 150 }, { "epoch": 0.005563454286950609, "grad_norm": 0.1754404753446579, "learning_rate": 1.9997218272856527e-05, "loss": 0.0134, "step": 180 }, { "epoch": 0.006490696668109044, "grad_norm": 0.1880694478750229, "learning_rate": 1.9996754651665945e-05, "loss": 0.0129, "step": 210 }, { "epoch": 0.007417939049267478, "grad_norm": 0.15779966115951538, "learning_rate": 1.9996291030475367e-05, "loss": 0.0127, "step": 240 }, { "epoch": 0.008345181430425914, "grad_norm": 0.18808868527412415, "learning_rate": 1.999582740928479e-05, "loss": 0.0124, "step": 270 }, { "epoch": 0.009272423811584348, "grad_norm": 0.1451554149389267, "learning_rate": 1.999536378809421e-05, "loss": 0.0125, "step": 300 }, { "epoch": 0.010199666192742783, "grad_norm": 0.162284255027771, "learning_rate": 1.999490016690363e-05, "loss": 0.0128, "step": 330 }, { "epoch": 0.011126908573901218, "grad_norm": 0.20637871325016022, "learning_rate": 1.9994436545713053e-05, "loss": 0.0123, "step": 360 }, { "epoch": 0.012054150955059652, "grad_norm": 0.12386586517095566, "learning_rate": 1.999397292452247e-05, "loss": 0.0117, "step": 390 }, { "epoch": 0.012981393336218087, "grad_norm": 0.1481143981218338, "learning_rate": 1.9993509303331893e-05, "loss": 0.0122, "step": 420 }, { "epoch": 0.013908635717376522, "grad_norm": 0.13544076681137085, "learning_rate": 1.9993045682141314e-05, "loss": 0.0127, "step": 450 }, { "epoch": 0.014835878098534956, "grad_norm": 0.15923643112182617, "learning_rate": 1.9992582060950732e-05, "loss": 0.012, "step": 480 }, { "epoch": 0.01576312047969339, "grad_norm": 0.1551590859889984, "learning_rate": 1.9992118439760154e-05, "loss": 0.0121, "step": 510 }, { "epoch": 0.016690362860851828, "grad_norm": 0.1450086086988449, "learning_rate": 1.9991654818569575e-05, "loss": 0.012, "step": 540 }, { "epoch": 0.01761760524201026, "grad_norm": 0.1472288966178894, "learning_rate": 1.9991191197378997e-05, "loss": 0.0126, "step": 570 }, { "epoch": 0.018544847623168697, "grad_norm": 0.21153061091899872, "learning_rate": 1.9990727576188418e-05, "loss": 0.0115, "step": 600 }, { "epoch": 0.01947209000432713, "grad_norm": 0.1357986330986023, "learning_rate": 1.9990263954997836e-05, "loss": 0.0125, "step": 630 }, { "epoch": 0.020399332385485566, "grad_norm": 0.15329468250274658, "learning_rate": 1.9989800333807258e-05, "loss": 0.0123, "step": 660 }, { "epoch": 0.021326574766644, "grad_norm": 0.1228242963552475, "learning_rate": 1.998933671261668e-05, "loss": 0.0121, "step": 690 }, { "epoch": 0.022253817147802436, "grad_norm": 0.14257122576236725, "learning_rate": 1.99888730914261e-05, "loss": 0.0127, "step": 720 }, { "epoch": 0.023181059528960872, "grad_norm": 0.1672751009464264, "learning_rate": 1.9988409470235522e-05, "loss": 0.0121, "step": 750 }, { "epoch": 0.024108301910119305, "grad_norm": 0.137558713555336, "learning_rate": 1.9987945849044944e-05, "loss": 0.0119, "step": 780 }, { "epoch": 0.02503554429127774, "grad_norm": 0.17862604558467865, "learning_rate": 1.9987482227854362e-05, "loss": 0.0119, "step": 810 }, { "epoch": 0.025962786672436174, "grad_norm": 0.18835781514644623, "learning_rate": 1.9987018606663783e-05, "loss": 0.0126, "step": 840 }, { "epoch": 0.02689002905359461, "grad_norm": 0.14080563187599182, "learning_rate": 1.99865549854732e-05, "loss": 0.0115, "step": 870 }, { "epoch": 0.027817271434753044, "grad_norm": 0.15246932208538055, "learning_rate": 1.9986091364282626e-05, "loss": 0.012, "step": 900 }, { "epoch": 0.02874451381591148, "grad_norm": 0.15116548538208008, "learning_rate": 1.9985627743092048e-05, "loss": 0.0119, "step": 930 }, { "epoch": 0.029671756197069913, "grad_norm": 0.17396464943885803, "learning_rate": 1.9985164121901466e-05, "loss": 0.0123, "step": 960 }, { "epoch": 0.03059899857822835, "grad_norm": 0.2706860899925232, "learning_rate": 1.9984700500710888e-05, "loss": 0.0123, "step": 990 }, { "epoch": 0.03152624095938678, "grad_norm": 0.1573820561170578, "learning_rate": 1.998423687952031e-05, "loss": 0.0127, "step": 1020 }, { "epoch": 0.032453483340545215, "grad_norm": 0.15245282649993896, "learning_rate": 1.9983773258329727e-05, "loss": 0.0121, "step": 1050 }, { "epoch": 0.033380725721703655, "grad_norm": 0.19036678969860077, "learning_rate": 1.998330963713915e-05, "loss": 0.0123, "step": 1080 }, { "epoch": 0.03430796810286209, "grad_norm": 0.17689216136932373, "learning_rate": 1.998284601594857e-05, "loss": 0.0113, "step": 1110 }, { "epoch": 0.03523521048402052, "grad_norm": 0.11005578190088272, "learning_rate": 1.9982382394757992e-05, "loss": 0.0122, "step": 1140 }, { "epoch": 0.03616245286517896, "grad_norm": 0.18516843020915985, "learning_rate": 1.9981918773567413e-05, "loss": 0.012, "step": 1170 }, { "epoch": 0.037089695246337394, "grad_norm": 0.19611108303070068, "learning_rate": 1.998145515237683e-05, "loss": 0.0116, "step": 1200 }, { "epoch": 0.03801693762749583, "grad_norm": 0.15067140758037567, "learning_rate": 1.9980991531186253e-05, "loss": 0.0128, "step": 1230 }, { "epoch": 0.03894418000865426, "grad_norm": 0.1785999834537506, "learning_rate": 1.9980527909995674e-05, "loss": 0.0112, "step": 1260 }, { "epoch": 0.0398714223898127, "grad_norm": 0.1541200578212738, "learning_rate": 1.9980064288805096e-05, "loss": 0.0112, "step": 1290 }, { "epoch": 0.04079866477097113, "grad_norm": 0.1889958679676056, "learning_rate": 1.9979600667614517e-05, "loss": 0.0129, "step": 1320 }, { "epoch": 0.041725907152129565, "grad_norm": 0.1332894265651703, "learning_rate": 1.997913704642394e-05, "loss": 0.0124, "step": 1350 }, { "epoch": 0.042653149533288, "grad_norm": 0.1866230070590973, "learning_rate": 1.9978673425233357e-05, "loss": 0.0123, "step": 1380 }, { "epoch": 0.04358039191444644, "grad_norm": 0.11638156324625015, "learning_rate": 1.997820980404278e-05, "loss": 0.0122, "step": 1410 }, { "epoch": 0.04450763429560487, "grad_norm": 0.09268539398908615, "learning_rate": 1.99777461828522e-05, "loss": 0.0119, "step": 1440 }, { "epoch": 0.045434876676763304, "grad_norm": 0.11723676323890686, "learning_rate": 1.9977282561661618e-05, "loss": 0.0119, "step": 1470 }, { "epoch": 0.046362119057921744, "grad_norm": 0.15469707548618317, "learning_rate": 1.997681894047104e-05, "loss": 0.0111, "step": 1500 }, { "epoch": 0.04728936143908018, "grad_norm": 0.1315491944551468, "learning_rate": 1.997635531928046e-05, "loss": 0.0106, "step": 1530 }, { "epoch": 0.04821660382023861, "grad_norm": 0.11381359398365021, "learning_rate": 1.9975891698089883e-05, "loss": 0.0122, "step": 1560 }, { "epoch": 0.04914384620139704, "grad_norm": 0.16818124055862427, "learning_rate": 1.9975428076899304e-05, "loss": 0.0114, "step": 1590 }, { "epoch": 0.05007108858255548, "grad_norm": 0.16137473285198212, "learning_rate": 1.9974964455708722e-05, "loss": 0.0125, "step": 1620 }, { "epoch": 0.050998330963713916, "grad_norm": 0.18651790916919708, "learning_rate": 1.9974500834518144e-05, "loss": 0.0117, "step": 1650 }, { "epoch": 0.05192557334487235, "grad_norm": 0.1545749306678772, "learning_rate": 1.9974037213327565e-05, "loss": 0.0112, "step": 1680 }, { "epoch": 0.05285281572603078, "grad_norm": 0.15960855782032013, "learning_rate": 1.9973573592136987e-05, "loss": 0.0121, "step": 1710 }, { "epoch": 0.05378005810718922, "grad_norm": 0.1781289130449295, "learning_rate": 1.997310997094641e-05, "loss": 0.0112, "step": 1740 }, { "epoch": 0.054707300488347654, "grad_norm": 0.15499824285507202, "learning_rate": 1.997264634975583e-05, "loss": 0.0125, "step": 1770 }, { "epoch": 0.05563454286950609, "grad_norm": 0.1786467581987381, "learning_rate": 1.9972182728565248e-05, "loss": 0.0128, "step": 1800 }, { "epoch": 0.05656178525066453, "grad_norm": 0.18428528308868408, "learning_rate": 1.997171910737467e-05, "loss": 0.0116, "step": 1830 }, { "epoch": 0.05748902763182296, "grad_norm": 0.16435745358467102, "learning_rate": 1.9971255486184088e-05, "loss": 0.0127, "step": 1860 }, { "epoch": 0.05841627001298139, "grad_norm": 0.3035704493522644, "learning_rate": 1.997079186499351e-05, "loss": 0.0116, "step": 1890 }, { "epoch": 0.059343512394139826, "grad_norm": 0.11720012873411179, "learning_rate": 1.997032824380293e-05, "loss": 0.0116, "step": 1920 }, { "epoch": 0.060270754775298266, "grad_norm": 0.17466124892234802, "learning_rate": 1.9969864622612352e-05, "loss": 0.0128, "step": 1950 }, { "epoch": 0.0611979971564567, "grad_norm": 0.1788652241230011, "learning_rate": 1.9969401001421774e-05, "loss": 0.0129, "step": 1980 }, { "epoch": 0.06212523953761513, "grad_norm": 0.15693792700767517, "learning_rate": 1.9968937380231195e-05, "loss": 0.0118, "step": 2010 }, { "epoch": 0.06305248191877356, "grad_norm": 0.15108437836170197, "learning_rate": 1.9968473759040613e-05, "loss": 0.0117, "step": 2040 }, { "epoch": 0.063979724299932, "grad_norm": 0.13107357919216156, "learning_rate": 1.9968010137850035e-05, "loss": 0.0118, "step": 2070 }, { "epoch": 0.06490696668109043, "grad_norm": 0.10856608301401138, "learning_rate": 1.9967546516659456e-05, "loss": 0.0126, "step": 2100 }, { "epoch": 0.06583420906224888, "grad_norm": 0.17243517935276031, "learning_rate": 1.9967082895468878e-05, "loss": 0.0113, "step": 2130 }, { "epoch": 0.06676145144340731, "grad_norm": 0.16601836681365967, "learning_rate": 1.99666192742783e-05, "loss": 0.0116, "step": 2160 }, { "epoch": 0.06768869382456574, "grad_norm": 0.1730329543352127, "learning_rate": 1.996615565308772e-05, "loss": 0.012, "step": 2190 }, { "epoch": 0.06861593620572418, "grad_norm": 0.14996053278446198, "learning_rate": 1.996569203189714e-05, "loss": 0.0113, "step": 2220 }, { "epoch": 0.06954317858688261, "grad_norm": 0.1300431340932846, "learning_rate": 1.996522841070656e-05, "loss": 0.0118, "step": 2250 }, { "epoch": 0.07047042096804104, "grad_norm": 0.10595303028821945, "learning_rate": 1.996476478951598e-05, "loss": 0.0116, "step": 2280 }, { "epoch": 0.07139766334919947, "grad_norm": 0.16038645803928375, "learning_rate": 1.9964301168325403e-05, "loss": 0.0124, "step": 2310 }, { "epoch": 0.07232490573035792, "grad_norm": 0.13681264221668243, "learning_rate": 1.9963837547134825e-05, "loss": 0.0119, "step": 2340 }, { "epoch": 0.07325214811151635, "grad_norm": 0.15593959391117096, "learning_rate": 1.9963373925944243e-05, "loss": 0.0125, "step": 2370 }, { "epoch": 0.07417939049267479, "grad_norm": 0.15990868210792542, "learning_rate": 1.9962910304753665e-05, "loss": 0.0119, "step": 2400 }, { "epoch": 0.07510663287383322, "grad_norm": 0.1498151272535324, "learning_rate": 1.9962446683563086e-05, "loss": 0.0122, "step": 2430 }, { "epoch": 0.07603387525499165, "grad_norm": 0.0990406796336174, "learning_rate": 1.9961983062372504e-05, "loss": 0.0121, "step": 2460 }, { "epoch": 0.07696111763615009, "grad_norm": 0.20319393277168274, "learning_rate": 1.9961519441181926e-05, "loss": 0.0124, "step": 2490 }, { "epoch": 0.07788836001730852, "grad_norm": 0.42328718304634094, "learning_rate": 1.9961055819991347e-05, "loss": 0.0116, "step": 2520 }, { "epoch": 0.07881560239846695, "grad_norm": 0.15908204019069672, "learning_rate": 1.996059219880077e-05, "loss": 0.0115, "step": 2550 }, { "epoch": 0.0797428447796254, "grad_norm": 0.17834916710853577, "learning_rate": 1.996012857761019e-05, "loss": 0.0119, "step": 2580 }, { "epoch": 0.08067008716078383, "grad_norm": 0.11459767818450928, "learning_rate": 1.9959664956419608e-05, "loss": 0.0111, "step": 2610 }, { "epoch": 0.08159732954194227, "grad_norm": 0.16798309981822968, "learning_rate": 1.995920133522903e-05, "loss": 0.0118, "step": 2640 }, { "epoch": 0.0825245719231007, "grad_norm": 0.12844550609588623, "learning_rate": 1.995873771403845e-05, "loss": 0.0111, "step": 2670 }, { "epoch": 0.08345181430425913, "grad_norm": 0.1184278279542923, "learning_rate": 1.9958274092847873e-05, "loss": 0.0115, "step": 2700 }, { "epoch": 0.08437905668541756, "grad_norm": 0.1516464799642563, "learning_rate": 1.9957810471657294e-05, "loss": 0.0114, "step": 2730 }, { "epoch": 0.085306299066576, "grad_norm": 0.14283674955368042, "learning_rate": 1.9957346850466716e-05, "loss": 0.0122, "step": 2760 }, { "epoch": 0.08623354144773444, "grad_norm": 0.16056415438652039, "learning_rate": 1.9956883229276134e-05, "loss": 0.0118, "step": 2790 }, { "epoch": 0.08716078382889288, "grad_norm": 0.10949695855379105, "learning_rate": 1.9956419608085555e-05, "loss": 0.0121, "step": 2820 }, { "epoch": 0.08808802621005131, "grad_norm": 0.15344224870204926, "learning_rate": 1.9955955986894977e-05, "loss": 0.0121, "step": 2850 }, { "epoch": 0.08901526859120974, "grad_norm": 0.21913252770900726, "learning_rate": 1.9955492365704395e-05, "loss": 0.012, "step": 2880 }, { "epoch": 0.08994251097236818, "grad_norm": 0.16412845253944397, "learning_rate": 1.9955028744513817e-05, "loss": 0.0112, "step": 2910 }, { "epoch": 0.09086975335352661, "grad_norm": 0.20833870768547058, "learning_rate": 1.9954565123323238e-05, "loss": 0.0118, "step": 2940 }, { "epoch": 0.09179699573468504, "grad_norm": 0.1550578624010086, "learning_rate": 1.995410150213266e-05, "loss": 0.0109, "step": 2970 }, { "epoch": 0.09272423811584349, "grad_norm": 0.2026883065700531, "learning_rate": 1.995363788094208e-05, "loss": 0.0119, "step": 3000 }, { "epoch": 0.09365148049700192, "grad_norm": 0.1692405790090561, "learning_rate": 1.99531742597515e-05, "loss": 0.0113, "step": 3030 }, { "epoch": 0.09457872287816035, "grad_norm": 0.18874527513980865, "learning_rate": 1.995271063856092e-05, "loss": 0.0117, "step": 3060 }, { "epoch": 0.09550596525931879, "grad_norm": 0.14070002734661102, "learning_rate": 1.9952247017370342e-05, "loss": 0.0121, "step": 3090 }, { "epoch": 0.09643320764047722, "grad_norm": 0.17347273230552673, "learning_rate": 1.9951783396179764e-05, "loss": 0.012, "step": 3120 }, { "epoch": 0.09736045002163565, "grad_norm": 0.16344496607780457, "learning_rate": 1.9951319774989185e-05, "loss": 0.0118, "step": 3150 }, { "epoch": 0.09828769240279409, "grad_norm": 0.1429017335176468, "learning_rate": 1.9950856153798607e-05, "loss": 0.0106, "step": 3180 }, { "epoch": 0.09921493478395252, "grad_norm": 0.16180144250392914, "learning_rate": 1.9950392532608025e-05, "loss": 0.012, "step": 3210 }, { "epoch": 0.10014217716511097, "grad_norm": 0.1445780098438263, "learning_rate": 1.9949928911417446e-05, "loss": 0.0124, "step": 3240 }, { "epoch": 0.1010694195462694, "grad_norm": 0.15510070323944092, "learning_rate": 1.9949465290226864e-05, "loss": 0.0116, "step": 3270 }, { "epoch": 0.10199666192742783, "grad_norm": 0.14975541830062866, "learning_rate": 1.9949001669036286e-05, "loss": 0.0118, "step": 3300 }, { "epoch": 0.10292390430858626, "grad_norm": 0.14262330532073975, "learning_rate": 1.9948538047845707e-05, "loss": 0.0127, "step": 3330 }, { "epoch": 0.1038511466897447, "grad_norm": 0.15400759875774384, "learning_rate": 1.994807442665513e-05, "loss": 0.0119, "step": 3360 }, { "epoch": 0.10477838907090313, "grad_norm": 0.17536458373069763, "learning_rate": 1.994761080546455e-05, "loss": 0.0115, "step": 3390 }, { "epoch": 0.10570563145206156, "grad_norm": 0.14111503958702087, "learning_rate": 1.9947147184273972e-05, "loss": 0.0115, "step": 3420 }, { "epoch": 0.10663287383322001, "grad_norm": 0.14547236263751984, "learning_rate": 1.994668356308339e-05, "loss": 0.0118, "step": 3450 }, { "epoch": 0.10756011621437844, "grad_norm": 0.13688117265701294, "learning_rate": 1.994621994189281e-05, "loss": 0.0115, "step": 3480 }, { "epoch": 0.10848735859553688, "grad_norm": 0.12432358413934708, "learning_rate": 1.9945756320702233e-05, "loss": 0.011, "step": 3510 }, { "epoch": 0.10941460097669531, "grad_norm": 0.165272057056427, "learning_rate": 1.9945292699511655e-05, "loss": 0.0116, "step": 3540 }, { "epoch": 0.11034184335785374, "grad_norm": 0.12315840274095535, "learning_rate": 1.9944829078321076e-05, "loss": 0.0117, "step": 3570 }, { "epoch": 0.11126908573901217, "grad_norm": 0.1442781239748001, "learning_rate": 1.9944365457130494e-05, "loss": 0.0117, "step": 3600 }, { "epoch": 0.11219632812017061, "grad_norm": 0.17888033390045166, "learning_rate": 1.9943901835939916e-05, "loss": 0.0118, "step": 3630 }, { "epoch": 0.11312357050132905, "grad_norm": 0.17265847325325012, "learning_rate": 1.9943438214749337e-05, "loss": 0.0122, "step": 3660 }, { "epoch": 0.11405081288248749, "grad_norm": 0.11911609768867493, "learning_rate": 1.9942974593558755e-05, "loss": 0.0119, "step": 3690 }, { "epoch": 0.11497805526364592, "grad_norm": 0.19884465634822845, "learning_rate": 1.994251097236818e-05, "loss": 0.0117, "step": 3720 }, { "epoch": 0.11590529764480435, "grad_norm": 0.1213579773902893, "learning_rate": 1.9942047351177602e-05, "loss": 0.0114, "step": 3750 }, { "epoch": 0.11683254002596279, "grad_norm": 0.15959061682224274, "learning_rate": 1.994158372998702e-05, "loss": 0.0114, "step": 3780 }, { "epoch": 0.11775978240712122, "grad_norm": 0.1291438192129135, "learning_rate": 1.994112010879644e-05, "loss": 0.0124, "step": 3810 }, { "epoch": 0.11868702478827965, "grad_norm": 0.11936940997838974, "learning_rate": 1.9940656487605863e-05, "loss": 0.0118, "step": 3840 }, { "epoch": 0.11961426716943808, "grad_norm": 0.10639103502035141, "learning_rate": 1.994019286641528e-05, "loss": 0.0129, "step": 3870 }, { "epoch": 0.12054150955059653, "grad_norm": 0.14213669300079346, "learning_rate": 1.9939729245224703e-05, "loss": 0.0121, "step": 3900 }, { "epoch": 0.12146875193175496, "grad_norm": 0.14756685495376587, "learning_rate": 1.9939265624034124e-05, "loss": 0.0124, "step": 3930 }, { "epoch": 0.1223959943129134, "grad_norm": 0.13682521879673004, "learning_rate": 1.9938802002843546e-05, "loss": 0.0112, "step": 3960 }, { "epoch": 0.12332323669407183, "grad_norm": 0.15887029469013214, "learning_rate": 1.9938338381652967e-05, "loss": 0.0119, "step": 3990 }, { "epoch": 0.12425047907523026, "grad_norm": 0.15965710580348969, "learning_rate": 1.9937874760462385e-05, "loss": 0.0118, "step": 4020 }, { "epoch": 0.1251777214563887, "grad_norm": 0.13950075209140778, "learning_rate": 1.9937411139271807e-05, "loss": 0.0119, "step": 4050 }, { "epoch": 0.12610496383754713, "grad_norm": 0.15551650524139404, "learning_rate": 1.9936947518081228e-05, "loss": 0.0122, "step": 4080 }, { "epoch": 0.12703220621870556, "grad_norm": 0.1322879195213318, "learning_rate": 1.993648389689065e-05, "loss": 0.012, "step": 4110 }, { "epoch": 0.127959448599864, "grad_norm": 0.16118811070919037, "learning_rate": 1.993602027570007e-05, "loss": 0.0115, "step": 4140 }, { "epoch": 0.12888669098102243, "grad_norm": 0.12056667357683182, "learning_rate": 1.9935556654509493e-05, "loss": 0.0104, "step": 4170 }, { "epoch": 0.12981393336218086, "grad_norm": 0.14641250669956207, "learning_rate": 1.993509303331891e-05, "loss": 0.0122, "step": 4200 }, { "epoch": 0.13074117574333932, "grad_norm": 0.1472238302230835, "learning_rate": 1.9934629412128332e-05, "loss": 0.0113, "step": 4230 }, { "epoch": 0.13166841812449775, "grad_norm": 0.11005036532878876, "learning_rate": 1.993416579093775e-05, "loss": 0.011, "step": 4260 }, { "epoch": 0.1325956605056562, "grad_norm": 0.1692676544189453, "learning_rate": 1.9933702169747172e-05, "loss": 0.0124, "step": 4290 }, { "epoch": 0.13352290288681462, "grad_norm": 0.1464112102985382, "learning_rate": 1.9933238548556593e-05, "loss": 0.0118, "step": 4320 }, { "epoch": 0.13445014526797305, "grad_norm": 0.14881689846515656, "learning_rate": 1.9932774927366015e-05, "loss": 0.0116, "step": 4350 }, { "epoch": 0.1353773876491315, "grad_norm": 0.15761300921440125, "learning_rate": 1.9932311306175436e-05, "loss": 0.0119, "step": 4380 }, { "epoch": 0.13630463003028992, "grad_norm": 0.10027972608804703, "learning_rate": 1.9931847684984858e-05, "loss": 0.0114, "step": 4410 }, { "epoch": 0.13723187241144835, "grad_norm": 0.19624949991703033, "learning_rate": 1.9931384063794276e-05, "loss": 0.0114, "step": 4440 }, { "epoch": 0.13815911479260679, "grad_norm": 0.1595603972673416, "learning_rate": 1.9930920442603698e-05, "loss": 0.0117, "step": 4470 }, { "epoch": 0.13908635717376522, "grad_norm": 0.1403309404850006, "learning_rate": 1.993045682141312e-05, "loss": 0.0124, "step": 4500 }, { "epoch": 0.14001359955492365, "grad_norm": 0.14781560003757477, "learning_rate": 1.992999320022254e-05, "loss": 0.0111, "step": 4530 }, { "epoch": 0.14094084193608208, "grad_norm": 0.16119930148124695, "learning_rate": 1.9929529579031962e-05, "loss": 0.0108, "step": 4560 }, { "epoch": 0.14186808431724052, "grad_norm": 0.14867869019508362, "learning_rate": 1.992906595784138e-05, "loss": 0.0116, "step": 4590 }, { "epoch": 0.14279532669839895, "grad_norm": 0.1569111943244934, "learning_rate": 1.9928602336650802e-05, "loss": 0.0121, "step": 4620 }, { "epoch": 0.14372256907955738, "grad_norm": 0.1304953694343567, "learning_rate": 1.9928138715460223e-05, "loss": 0.0121, "step": 4650 }, { "epoch": 0.14464981146071584, "grad_norm": 0.1778661161661148, "learning_rate": 1.992767509426964e-05, "loss": 0.0115, "step": 4680 }, { "epoch": 0.14557705384187428, "grad_norm": 0.11943686008453369, "learning_rate": 1.9927211473079063e-05, "loss": 0.0116, "step": 4710 }, { "epoch": 0.1465042962230327, "grad_norm": 0.14344817399978638, "learning_rate": 1.9926747851888484e-05, "loss": 0.0119, "step": 4740 }, { "epoch": 0.14743153860419114, "grad_norm": 0.1658024936914444, "learning_rate": 1.9926284230697906e-05, "loss": 0.0107, "step": 4770 }, { "epoch": 0.14835878098534958, "grad_norm": 0.16240152716636658, "learning_rate": 1.9925820609507327e-05, "loss": 0.0113, "step": 4800 }, { "epoch": 0.149286023366508, "grad_norm": 0.1680387258529663, "learning_rate": 1.992535698831675e-05, "loss": 0.0109, "step": 4830 }, { "epoch": 0.15021326574766644, "grad_norm": 0.13924817740917206, "learning_rate": 1.9924893367126167e-05, "loss": 0.0117, "step": 4860 }, { "epoch": 0.15114050812882487, "grad_norm": 0.16914813220500946, "learning_rate": 1.992442974593559e-05, "loss": 0.0119, "step": 4890 }, { "epoch": 0.1520677505099833, "grad_norm": 0.12571659684181213, "learning_rate": 1.992396612474501e-05, "loss": 0.0125, "step": 4920 }, { "epoch": 0.15299499289114174, "grad_norm": 0.15637660026550293, "learning_rate": 1.992350250355443e-05, "loss": 0.0128, "step": 4950 }, { "epoch": 0.15392223527230017, "grad_norm": 0.1367233246564865, "learning_rate": 1.9923038882363853e-05, "loss": 0.0125, "step": 4980 }, { "epoch": 0.1548494776534586, "grad_norm": 0.1725272536277771, "learning_rate": 1.992257526117327e-05, "loss": 0.0118, "step": 5010 }, { "epoch": 0.15577672003461704, "grad_norm": 0.32370761036872864, "learning_rate": 1.9922111639982693e-05, "loss": 0.0118, "step": 5040 }, { "epoch": 0.15670396241577547, "grad_norm": 0.13288190960884094, "learning_rate": 1.9921648018792114e-05, "loss": 0.0117, "step": 5070 }, { "epoch": 0.1576312047969339, "grad_norm": 0.16906672716140747, "learning_rate": 1.9921184397601536e-05, "loss": 0.0119, "step": 5100 }, { "epoch": 0.15855844717809237, "grad_norm": 0.12446191161870956, "learning_rate": 1.9920720776410957e-05, "loss": 0.0105, "step": 5130 }, { "epoch": 0.1594856895592508, "grad_norm": 0.15891976654529572, "learning_rate": 1.992025715522038e-05, "loss": 0.0111, "step": 5160 }, { "epoch": 0.16041293194040923, "grad_norm": 0.12846866250038147, "learning_rate": 1.9919793534029797e-05, "loss": 0.0121, "step": 5190 }, { "epoch": 0.16134017432156766, "grad_norm": 0.11923234909772873, "learning_rate": 1.991932991283922e-05, "loss": 0.0117, "step": 5220 }, { "epoch": 0.1622674167027261, "grad_norm": 0.142263725399971, "learning_rate": 1.9918866291648636e-05, "loss": 0.0122, "step": 5250 }, { "epoch": 0.16319465908388453, "grad_norm": 0.1639038473367691, "learning_rate": 1.9918402670458058e-05, "loss": 0.0115, "step": 5280 }, { "epoch": 0.16412190146504296, "grad_norm": 0.1391741931438446, "learning_rate": 1.991793904926748e-05, "loss": 0.011, "step": 5310 }, { "epoch": 0.1650491438462014, "grad_norm": 0.1815851330757141, "learning_rate": 1.99174754280769e-05, "loss": 0.0113, "step": 5340 }, { "epoch": 0.16597638622735983, "grad_norm": 0.1509413719177246, "learning_rate": 1.9917011806886322e-05, "loss": 0.0122, "step": 5370 }, { "epoch": 0.16690362860851826, "grad_norm": 0.1459558606147766, "learning_rate": 1.9916548185695744e-05, "loss": 0.0112, "step": 5400 }, { "epoch": 0.1678308709896767, "grad_norm": 0.14355354011058807, "learning_rate": 1.9916084564505162e-05, "loss": 0.0109, "step": 5430 }, { "epoch": 0.16875811337083513, "grad_norm": 0.2610926032066345, "learning_rate": 1.9915620943314584e-05, "loss": 0.0114, "step": 5460 }, { "epoch": 0.16968535575199356, "grad_norm": 0.1558261662721634, "learning_rate": 1.9915157322124005e-05, "loss": 0.0115, "step": 5490 }, { "epoch": 0.170612598133152, "grad_norm": 0.14375154674053192, "learning_rate": 1.9914693700933427e-05, "loss": 0.0112, "step": 5520 }, { "epoch": 0.17153984051431045, "grad_norm": 0.12216426432132721, "learning_rate": 1.9914230079742848e-05, "loss": 0.011, "step": 5550 }, { "epoch": 0.1724670828954689, "grad_norm": 0.1404140740633011, "learning_rate": 1.9913766458552266e-05, "loss": 0.0108, "step": 5580 }, { "epoch": 0.17339432527662732, "grad_norm": 0.1506173014640808, "learning_rate": 1.9913302837361688e-05, "loss": 0.0121, "step": 5610 }, { "epoch": 0.17432156765778575, "grad_norm": 0.18408668041229248, "learning_rate": 1.991283921617111e-05, "loss": 0.0118, "step": 5640 }, { "epoch": 0.17524881003894419, "grad_norm": 0.1965809315443039, "learning_rate": 1.9912375594980527e-05, "loss": 0.0115, "step": 5670 }, { "epoch": 0.17617605242010262, "grad_norm": 0.15520329773426056, "learning_rate": 1.991191197378995e-05, "loss": 0.0121, "step": 5700 }, { "epoch": 0.17710329480126105, "grad_norm": 0.14089910686016083, "learning_rate": 1.991144835259937e-05, "loss": 0.0108, "step": 5730 }, { "epoch": 0.17803053718241948, "grad_norm": 0.12024790048599243, "learning_rate": 1.9910984731408792e-05, "loss": 0.0119, "step": 5760 }, { "epoch": 0.17895777956357792, "grad_norm": 0.14454089105129242, "learning_rate": 1.9910521110218213e-05, "loss": 0.0111, "step": 5790 }, { "epoch": 0.17988502194473635, "grad_norm": 0.1527293473482132, "learning_rate": 1.9910057489027635e-05, "loss": 0.011, "step": 5820 }, { "epoch": 0.18081226432589478, "grad_norm": 0.16068576276302338, "learning_rate": 1.9909593867837053e-05, "loss": 0.011, "step": 5850 }, { "epoch": 0.18173950670705322, "grad_norm": 0.14727212488651276, "learning_rate": 1.9909130246646475e-05, "loss": 0.012, "step": 5880 }, { "epoch": 0.18266674908821165, "grad_norm": 0.14564207196235657, "learning_rate": 1.9908666625455896e-05, "loss": 0.0122, "step": 5910 }, { "epoch": 0.18359399146937008, "grad_norm": 0.12158086150884628, "learning_rate": 1.9908203004265318e-05, "loss": 0.0114, "step": 5940 }, { "epoch": 0.18452123385052852, "grad_norm": 0.12684834003448486, "learning_rate": 1.990773938307474e-05, "loss": 0.0119, "step": 5970 }, { "epoch": 0.18544847623168698, "grad_norm": 0.13370554149150848, "learning_rate": 1.9907275761884157e-05, "loss": 0.0117, "step": 6000 }, { "epoch": 0.1863757186128454, "grad_norm": 0.14909164607524872, "learning_rate": 1.990681214069358e-05, "loss": 0.0122, "step": 6030 }, { "epoch": 0.18730296099400384, "grad_norm": 0.1588449478149414, "learning_rate": 1.9906348519503e-05, "loss": 0.0117, "step": 6060 }, { "epoch": 0.18823020337516227, "grad_norm": 0.1614857167005539, "learning_rate": 1.9905884898312418e-05, "loss": 0.0106, "step": 6090 }, { "epoch": 0.1891574457563207, "grad_norm": 0.16850101947784424, "learning_rate": 1.990542127712184e-05, "loss": 0.0108, "step": 6120 }, { "epoch": 0.19008468813747914, "grad_norm": 0.4009515345096588, "learning_rate": 1.990495765593126e-05, "loss": 0.0116, "step": 6150 }, { "epoch": 0.19101193051863757, "grad_norm": 0.16089241206645966, "learning_rate": 1.9904494034740683e-05, "loss": 0.0109, "step": 6180 }, { "epoch": 0.191939172899796, "grad_norm": 0.16613070666790009, "learning_rate": 1.9904030413550104e-05, "loss": 0.012, "step": 6210 }, { "epoch": 0.19286641528095444, "grad_norm": 0.14632222056388855, "learning_rate": 1.9903566792359526e-05, "loss": 0.0114, "step": 6240 }, { "epoch": 0.19379365766211287, "grad_norm": 0.1911221146583557, "learning_rate": 1.9903103171168944e-05, "loss": 0.0111, "step": 6270 }, { "epoch": 0.1947209000432713, "grad_norm": 0.1362852305173874, "learning_rate": 1.9902639549978365e-05, "loss": 0.0117, "step": 6300 }, { "epoch": 0.19564814242442974, "grad_norm": 0.1608174741268158, "learning_rate": 1.9902175928787787e-05, "loss": 0.0111, "step": 6330 }, { "epoch": 0.19657538480558817, "grad_norm": 0.16837799549102783, "learning_rate": 1.990171230759721e-05, "loss": 0.0113, "step": 6360 }, { "epoch": 0.1975026271867466, "grad_norm": 0.11734677851200104, "learning_rate": 1.990124868640663e-05, "loss": 0.0112, "step": 6390 }, { "epoch": 0.19842986956790504, "grad_norm": 0.1214800626039505, "learning_rate": 1.9900785065216048e-05, "loss": 0.0115, "step": 6420 }, { "epoch": 0.1993571119490635, "grad_norm": 0.15694832801818848, "learning_rate": 1.990032144402547e-05, "loss": 0.0112, "step": 6450 }, { "epoch": 0.20028435433022193, "grad_norm": 0.16361165046691895, "learning_rate": 1.989985782283489e-05, "loss": 0.0116, "step": 6480 }, { "epoch": 0.20121159671138036, "grad_norm": 0.11277832090854645, "learning_rate": 1.9899394201644313e-05, "loss": 0.0114, "step": 6510 }, { "epoch": 0.2021388390925388, "grad_norm": 0.15097928047180176, "learning_rate": 1.9898930580453734e-05, "loss": 0.0107, "step": 6540 }, { "epoch": 0.20306608147369723, "grad_norm": 0.14473706483840942, "learning_rate": 1.9898466959263156e-05, "loss": 0.0112, "step": 6570 }, { "epoch": 0.20399332385485566, "grad_norm": Infinity, "learning_rate": 1.989801879211226e-05, "loss": 0.0111, "step": 6600 }, { "epoch": 0.2049205662360141, "grad_norm": 0.1209929957985878, "learning_rate": 1.989755517092168e-05, "loss": 0.0121, "step": 6630 }, { "epoch": 0.20584780861717253, "grad_norm": 0.13794147968292236, "learning_rate": 1.9897091549731103e-05, "loss": 0.0116, "step": 6660 }, { "epoch": 0.20677505099833096, "grad_norm": 0.13448581099510193, "learning_rate": 1.989662792854052e-05, "loss": 0.0122, "step": 6690 }, { "epoch": 0.2077022933794894, "grad_norm": 0.12994475662708282, "learning_rate": 1.9896164307349943e-05, "loss": 0.0118, "step": 6720 }, { "epoch": 0.20862953576064783, "grad_norm": 0.18589365482330322, "learning_rate": 1.9895700686159364e-05, "loss": 0.012, "step": 6750 }, { "epoch": 0.20955677814180626, "grad_norm": 0.1379929631948471, "learning_rate": 1.9895237064968786e-05, "loss": 0.011, "step": 6780 }, { "epoch": 0.2104840205229647, "grad_norm": 0.138004869222641, "learning_rate": 1.9894773443778207e-05, "loss": 0.0113, "step": 6810 }, { "epoch": 0.21141126290412313, "grad_norm": 0.1849716454744339, "learning_rate": 1.9894309822587625e-05, "loss": 0.0116, "step": 6840 }, { "epoch": 0.21233850528528156, "grad_norm": 0.14123991131782532, "learning_rate": 1.9893846201397047e-05, "loss": 0.0121, "step": 6870 }, { "epoch": 0.21326574766644002, "grad_norm": 0.1818026900291443, "learning_rate": 1.989338258020647e-05, "loss": 0.0121, "step": 6900 }, { "epoch": 0.21419299004759845, "grad_norm": 0.14563556015491486, "learning_rate": 1.9892918959015887e-05, "loss": 0.0105, "step": 6930 }, { "epoch": 0.21512023242875689, "grad_norm": 0.17094004154205322, "learning_rate": 1.9892455337825308e-05, "loss": 0.0115, "step": 6960 }, { "epoch": 0.21604747480991532, "grad_norm": 0.12089026719331741, "learning_rate": 1.989199171663473e-05, "loss": 0.0112, "step": 6990 }, { "epoch": 0.21697471719107375, "grad_norm": 0.15891049802303314, "learning_rate": 1.989152809544415e-05, "loss": 0.0106, "step": 7020 }, { "epoch": 0.21790195957223218, "grad_norm": 0.13235603272914886, "learning_rate": 1.9891064474253573e-05, "loss": 0.0108, "step": 7050 }, { "epoch": 0.21882920195339062, "grad_norm": 0.12356830388307571, "learning_rate": 1.989060085306299e-05, "loss": 0.0115, "step": 7080 }, { "epoch": 0.21975644433454905, "grad_norm": 0.1347808986902237, "learning_rate": 1.9890137231872412e-05, "loss": 0.0124, "step": 7110 }, { "epoch": 0.22068368671570748, "grad_norm": 0.13858920335769653, "learning_rate": 1.9889673610681834e-05, "loss": 0.0118, "step": 7140 }, { "epoch": 0.22161092909686592, "grad_norm": 0.11920204013586044, "learning_rate": 1.9889209989491255e-05, "loss": 0.0115, "step": 7170 }, { "epoch": 0.22253817147802435, "grad_norm": 0.11066605150699615, "learning_rate": 1.9888746368300677e-05, "loss": 0.011, "step": 7200 }, { "epoch": 0.22346541385918278, "grad_norm": 0.1258588284254074, "learning_rate": 1.9888282747110098e-05, "loss": 0.0121, "step": 7230 }, { "epoch": 0.22439265624034122, "grad_norm": 0.186114102602005, "learning_rate": 1.9887819125919516e-05, "loss": 0.0114, "step": 7260 }, { "epoch": 0.22531989862149965, "grad_norm": 0.18203586339950562, "learning_rate": 1.9887355504728938e-05, "loss": 0.0114, "step": 7290 }, { "epoch": 0.2262471410026581, "grad_norm": 0.14298152923583984, "learning_rate": 1.988689188353836e-05, "loss": 0.0111, "step": 7320 }, { "epoch": 0.22717438338381654, "grad_norm": 0.151611328125, "learning_rate": 1.9886428262347777e-05, "loss": 0.0116, "step": 7350 }, { "epoch": 0.22810162576497497, "grad_norm": 0.9072340726852417, "learning_rate": 1.98859646411572e-05, "loss": 0.0116, "step": 7380 }, { "epoch": 0.2290288681461334, "grad_norm": 0.13344086706638336, "learning_rate": 1.988550101996662e-05, "loss": 0.0111, "step": 7410 }, { "epoch": 0.22995611052729184, "grad_norm": 0.137989804148674, "learning_rate": 1.9885037398776042e-05, "loss": 0.0112, "step": 7440 }, { "epoch": 0.23088335290845027, "grad_norm": 0.15519069135189056, "learning_rate": 1.9884573777585463e-05, "loss": 0.011, "step": 7470 }, { "epoch": 0.2318105952896087, "grad_norm": 0.1381489336490631, "learning_rate": 1.988411015639488e-05, "loss": 0.0121, "step": 7500 }, { "epoch": 0.23273783767076714, "grad_norm": 0.14939753711223602, "learning_rate": 1.9883646535204303e-05, "loss": 0.0113, "step": 7530 }, { "epoch": 0.23366508005192557, "grad_norm": 0.15637439489364624, "learning_rate": 1.9883182914013725e-05, "loss": 0.0117, "step": 7560 }, { "epoch": 0.234592322433084, "grad_norm": 0.17385205626487732, "learning_rate": 1.9882719292823146e-05, "loss": 0.0114, "step": 7590 }, { "epoch": 0.23551956481424244, "grad_norm": 0.1611182540655136, "learning_rate": 1.9882255671632568e-05, "loss": 0.0117, "step": 7620 }, { "epoch": 0.23644680719540087, "grad_norm": 0.16272680461406708, "learning_rate": 1.988179205044199e-05, "loss": 0.0116, "step": 7650 }, { "epoch": 0.2373740495765593, "grad_norm": 0.10000737756490707, "learning_rate": 1.9881328429251407e-05, "loss": 0.0118, "step": 7680 }, { "epoch": 0.23830129195771774, "grad_norm": 0.1614050716161728, "learning_rate": 1.988086480806083e-05, "loss": 0.0114, "step": 7710 }, { "epoch": 0.23922853433887617, "grad_norm": 0.14760708808898926, "learning_rate": 1.9880401186870247e-05, "loss": 0.0108, "step": 7740 }, { "epoch": 0.24015577672003463, "grad_norm": 0.17550432682037354, "learning_rate": 1.987993756567967e-05, "loss": 0.0117, "step": 7770 }, { "epoch": 0.24108301910119306, "grad_norm": 0.143015056848526, "learning_rate": 1.9879473944489093e-05, "loss": 0.0118, "step": 7800 }, { "epoch": 0.2420102614823515, "grad_norm": 0.14959968626499176, "learning_rate": 1.987901032329851e-05, "loss": 0.0113, "step": 7830 }, { "epoch": 0.24293750386350993, "grad_norm": 0.1407766044139862, "learning_rate": 1.9878546702107933e-05, "loss": 0.0121, "step": 7860 }, { "epoch": 0.24386474624466836, "grad_norm": 0.1537831574678421, "learning_rate": 1.9878083080917354e-05, "loss": 0.0115, "step": 7890 }, { "epoch": 0.2447919886258268, "grad_norm": 0.1855214685201645, "learning_rate": 1.9877619459726773e-05, "loss": 0.0112, "step": 7920 }, { "epoch": 0.24571923100698523, "grad_norm": 0.1298413872718811, "learning_rate": 1.9877155838536194e-05, "loss": 0.0113, "step": 7950 }, { "epoch": 0.24664647338814366, "grad_norm": 0.12115942686796188, "learning_rate": 1.9876692217345616e-05, "loss": 0.0114, "step": 7980 }, { "epoch": 0.2475737157693021, "grad_norm": 0.11830762773752213, "learning_rate": 1.9876228596155037e-05, "loss": 0.0112, "step": 8010 }, { "epoch": 0.24850095815046053, "grad_norm": 0.15367205440998077, "learning_rate": 1.987576497496446e-05, "loss": 0.011, "step": 8040 }, { "epoch": 0.24942820053161896, "grad_norm": 0.1217503622174263, "learning_rate": 1.9875301353773877e-05, "loss": 0.0105, "step": 8070 }, { "epoch": 0.2503554429127774, "grad_norm": 0.12943443655967712, "learning_rate": 1.9874837732583298e-05, "loss": 0.0118, "step": 8100 }, { "epoch": 0.2512826852939358, "grad_norm": 0.1825013905763626, "learning_rate": 1.987437411139272e-05, "loss": 0.0117, "step": 8130 }, { "epoch": 0.25220992767509426, "grad_norm": 0.14473342895507812, "learning_rate": 1.987391049020214e-05, "loss": 0.0126, "step": 8160 }, { "epoch": 0.2531371700562527, "grad_norm": 0.15052497386932373, "learning_rate": 1.9873446869011563e-05, "loss": 0.0121, "step": 8190 }, { "epoch": 0.2540644124374111, "grad_norm": 0.12829282879829407, "learning_rate": 1.9872983247820984e-05, "loss": 0.0109, "step": 8220 }, { "epoch": 0.25499165481856956, "grad_norm": 0.13104604184627533, "learning_rate": 1.9872519626630402e-05, "loss": 0.0109, "step": 8250 }, { "epoch": 0.255918897199728, "grad_norm": 0.15770044922828674, "learning_rate": 1.9872056005439824e-05, "loss": 0.0114, "step": 8280 }, { "epoch": 0.2568461395808864, "grad_norm": 0.16095954179763794, "learning_rate": 1.9871592384249245e-05, "loss": 0.0124, "step": 8310 }, { "epoch": 0.25777338196204486, "grad_norm": 0.23386655747890472, "learning_rate": 1.9871128763058663e-05, "loss": 0.0116, "step": 8340 }, { "epoch": 0.2587006243432033, "grad_norm": 0.16140364110469818, "learning_rate": 1.9870665141868085e-05, "loss": 0.0106, "step": 8370 }, { "epoch": 0.2596278667243617, "grad_norm": 0.17416268587112427, "learning_rate": 1.9870201520677506e-05, "loss": 0.0119, "step": 8400 }, { "epoch": 0.2605551091055202, "grad_norm": 0.14950339496135712, "learning_rate": 1.9869737899486928e-05, "loss": 0.0106, "step": 8430 }, { "epoch": 0.26148235148667864, "grad_norm": 0.18756259977817535, "learning_rate": 1.986927427829635e-05, "loss": 0.0111, "step": 8460 }, { "epoch": 0.2624095938678371, "grad_norm": 0.13959966599941254, "learning_rate": 1.9868810657105768e-05, "loss": 0.011, "step": 8490 }, { "epoch": 0.2633368362489955, "grad_norm": 0.14915312826633453, "learning_rate": 1.986834703591519e-05, "loss": 0.0113, "step": 8520 }, { "epoch": 0.26426407863015394, "grad_norm": 0.13579893112182617, "learning_rate": 1.986788341472461e-05, "loss": 0.0116, "step": 8550 }, { "epoch": 0.2651913210113124, "grad_norm": 0.17177793383598328, "learning_rate": 1.9867419793534032e-05, "loss": 0.0115, "step": 8580 }, { "epoch": 0.2661185633924708, "grad_norm": 0.16653135418891907, "learning_rate": 1.9866956172343454e-05, "loss": 0.0116, "step": 8610 }, { "epoch": 0.26704580577362924, "grad_norm": 0.10810311883687973, "learning_rate": 1.9866492551152875e-05, "loss": 0.0115, "step": 8640 }, { "epoch": 0.2679730481547877, "grad_norm": 0.15830102562904358, "learning_rate": 1.9866028929962293e-05, "loss": 0.0116, "step": 8670 }, { "epoch": 0.2689002905359461, "grad_norm": 0.1652713567018509, "learning_rate": 1.9865565308771715e-05, "loss": 0.0109, "step": 8700 }, { "epoch": 0.26982753291710454, "grad_norm": 0.14021942019462585, "learning_rate": 1.9865101687581136e-05, "loss": 0.0119, "step": 8730 }, { "epoch": 0.270754775298263, "grad_norm": 0.11354702711105347, "learning_rate": 1.9864638066390554e-05, "loss": 0.0112, "step": 8760 }, { "epoch": 0.2716820176794214, "grad_norm": 0.14638416469097137, "learning_rate": 1.9864174445199976e-05, "loss": 0.0111, "step": 8790 }, { "epoch": 0.27260926006057984, "grad_norm": 0.14928920567035675, "learning_rate": 1.9863710824009397e-05, "loss": 0.0114, "step": 8820 }, { "epoch": 0.27353650244173827, "grad_norm": 0.10313010215759277, "learning_rate": 1.986324720281882e-05, "loss": 0.0114, "step": 8850 }, { "epoch": 0.2744637448228967, "grad_norm": 0.144300639629364, "learning_rate": 1.986278358162824e-05, "loss": 0.0119, "step": 8880 }, { "epoch": 0.27539098720405514, "grad_norm": 0.13070087134838104, "learning_rate": 1.986231996043766e-05, "loss": 0.0113, "step": 8910 }, { "epoch": 0.27631822958521357, "grad_norm": 0.19819149374961853, "learning_rate": 1.986185633924708e-05, "loss": 0.0109, "step": 8940 }, { "epoch": 0.277245471966372, "grad_norm": 0.14839182794094086, "learning_rate": 1.9861408172096188e-05, "loss": 0.0117, "step": 8970 }, { "epoch": 0.27817271434753044, "grad_norm": 0.1318449229001999, "learning_rate": 1.9860944550905606e-05, "loss": 0.0107, "step": 9000 }, { "epoch": 0.27909995672868887, "grad_norm": 0.1858850121498108, "learning_rate": 1.9860480929715028e-05, "loss": 0.0111, "step": 9030 }, { "epoch": 0.2800271991098473, "grad_norm": 0.13583645224571228, "learning_rate": 1.9860017308524452e-05, "loss": 0.0116, "step": 9060 }, { "epoch": 0.28095444149100574, "grad_norm": 0.1082659438252449, "learning_rate": 1.985955368733387e-05, "loss": 0.0116, "step": 9090 }, { "epoch": 0.28188168387216417, "grad_norm": 0.1821129471063614, "learning_rate": 1.9859090066143292e-05, "loss": 0.011, "step": 9120 }, { "epoch": 0.2828089262533226, "grad_norm": 0.1822529435157776, "learning_rate": 1.9858626444952714e-05, "loss": 0.0107, "step": 9150 }, { "epoch": 0.28373616863448103, "grad_norm": 0.1635167896747589, "learning_rate": 1.985816282376213e-05, "loss": 0.0122, "step": 9180 }, { "epoch": 0.28466341101563947, "grad_norm": 0.13503924012184143, "learning_rate": 1.9857699202571553e-05, "loss": 0.0109, "step": 9210 }, { "epoch": 0.2855906533967979, "grad_norm": 0.10900534689426422, "learning_rate": 1.9857235581380975e-05, "loss": 0.0108, "step": 9240 }, { "epoch": 0.28651789577795633, "grad_norm": 0.16569878160953522, "learning_rate": 1.9856771960190396e-05, "loss": 0.0112, "step": 9270 }, { "epoch": 0.28744513815911477, "grad_norm": 0.19507841765880585, "learning_rate": 1.9856308338999818e-05, "loss": 0.0114, "step": 9300 }, { "epoch": 0.28837238054027325, "grad_norm": 0.14021535217761993, "learning_rate": 1.9855844717809236e-05, "loss": 0.0114, "step": 9330 }, { "epoch": 0.2892996229214317, "grad_norm": 0.12532436847686768, "learning_rate": 1.9855381096618657e-05, "loss": 0.011, "step": 9360 }, { "epoch": 0.2902268653025901, "grad_norm": 0.15686772763729095, "learning_rate": 1.985491747542808e-05, "loss": 0.0111, "step": 9390 }, { "epoch": 0.29115410768374855, "grad_norm": 0.1124226376414299, "learning_rate": 1.98544538542375e-05, "loss": 0.0112, "step": 9420 }, { "epoch": 0.292081350064907, "grad_norm": 0.13426342606544495, "learning_rate": 1.9853990233046922e-05, "loss": 0.012, "step": 9450 }, { "epoch": 0.2930085924460654, "grad_norm": 0.14461062848567963, "learning_rate": 1.9853526611856343e-05, "loss": 0.0106, "step": 9480 }, { "epoch": 0.29393583482722385, "grad_norm": 0.11342272162437439, "learning_rate": 1.985306299066576e-05, "loss": 0.0116, "step": 9510 }, { "epoch": 0.2948630772083823, "grad_norm": 0.17991814017295837, "learning_rate": 1.9852599369475183e-05, "loss": 0.0115, "step": 9540 }, { "epoch": 0.2957903195895407, "grad_norm": 0.14832162857055664, "learning_rate": 1.98521357482846e-05, "loss": 0.0108, "step": 9570 }, { "epoch": 0.29671756197069915, "grad_norm": 0.13052034378051758, "learning_rate": 1.9851672127094023e-05, "loss": 0.0118, "step": 9600 }, { "epoch": 0.2976448043518576, "grad_norm": 0.15216808021068573, "learning_rate": 1.9851208505903444e-05, "loss": 0.0104, "step": 9630 }, { "epoch": 0.298572046733016, "grad_norm": 0.12808045744895935, "learning_rate": 1.9850744884712866e-05, "loss": 0.0114, "step": 9660 }, { "epoch": 0.29949928911417445, "grad_norm": 0.1520978957414627, "learning_rate": 1.9850281263522287e-05, "loss": 0.0103, "step": 9690 }, { "epoch": 0.3004265314953329, "grad_norm": 0.13173256814479828, "learning_rate": 1.984981764233171e-05, "loss": 0.0125, "step": 9720 }, { "epoch": 0.3013537738764913, "grad_norm": 0.14505286514759064, "learning_rate": 1.9849354021141127e-05, "loss": 0.011, "step": 9750 }, { "epoch": 0.30228101625764975, "grad_norm": 0.1599726378917694, "learning_rate": 1.9848890399950548e-05, "loss": 0.0112, "step": 9780 }, { "epoch": 0.3032082586388082, "grad_norm": 0.13948954641819, "learning_rate": 1.984842677875997e-05, "loss": 0.0114, "step": 9810 }, { "epoch": 0.3041355010199666, "grad_norm": 0.1153598204255104, "learning_rate": 1.984796315756939e-05, "loss": 0.0108, "step": 9840 }, { "epoch": 0.30506274340112505, "grad_norm": 0.1416090428829193, "learning_rate": 1.9847499536378813e-05, "loss": 0.0111, "step": 9870 }, { "epoch": 0.3059899857822835, "grad_norm": 0.20779788494110107, "learning_rate": 1.984703591518823e-05, "loss": 0.0116, "step": 9900 }, { "epoch": 0.3069172281634419, "grad_norm": 0.1529664546251297, "learning_rate": 1.9846572293997652e-05, "loss": 0.0112, "step": 9930 }, { "epoch": 0.30784447054460035, "grad_norm": 0.14496023952960968, "learning_rate": 1.9846108672807074e-05, "loss": 0.0109, "step": 9960 }, { "epoch": 0.3087717129257588, "grad_norm": 0.15674734115600586, "learning_rate": 1.9845645051616492e-05, "loss": 0.0121, "step": 9990 }, { "epoch": 0.3096989553069172, "grad_norm": 0.12837307155132294, "learning_rate": 1.9845181430425914e-05, "loss": 0.0114, "step": 10020 }, { "epoch": 0.31062619768807564, "grad_norm": 0.11187253892421722, "learning_rate": 1.9844717809235335e-05, "loss": 0.0118, "step": 10050 }, { "epoch": 0.3115534400692341, "grad_norm": 0.1310831904411316, "learning_rate": 1.9844254188044757e-05, "loss": 0.0114, "step": 10080 }, { "epoch": 0.3124806824503925, "grad_norm": 0.11833212524652481, "learning_rate": 1.9843790566854178e-05, "loss": 0.0116, "step": 10110 }, { "epoch": 0.31340792483155094, "grad_norm": 0.13200756907463074, "learning_rate": 1.98433269456636e-05, "loss": 0.0106, "step": 10140 }, { "epoch": 0.3143351672127094, "grad_norm": 0.16668395698070526, "learning_rate": 1.9842863324473018e-05, "loss": 0.0113, "step": 10170 }, { "epoch": 0.3152624095938678, "grad_norm": 0.146733820438385, "learning_rate": 1.984239970328244e-05, "loss": 0.0108, "step": 10200 }, { "epoch": 0.3161896519750263, "grad_norm": 0.15480907261371613, "learning_rate": 1.984193608209186e-05, "loss": 0.0107, "step": 10230 }, { "epoch": 0.31711689435618473, "grad_norm": 0.1579483300447464, "learning_rate": 1.9841472460901282e-05, "loss": 0.0106, "step": 10260 }, { "epoch": 0.31804413673734316, "grad_norm": 0.13968975841999054, "learning_rate": 1.9841008839710704e-05, "loss": 0.0114, "step": 10290 }, { "epoch": 0.3189713791185016, "grad_norm": 0.14403727650642395, "learning_rate": 1.9840545218520122e-05, "loss": 0.0112, "step": 10320 }, { "epoch": 0.31989862149966003, "grad_norm": 0.17049701511859894, "learning_rate": 1.9840081597329543e-05, "loss": 0.0117, "step": 10350 }, { "epoch": 0.32082586388081846, "grad_norm": 0.1345260590314865, "learning_rate": 1.9839617976138965e-05, "loss": 0.0116, "step": 10380 }, { "epoch": 0.3217531062619769, "grad_norm": 0.12479573488235474, "learning_rate": 1.9839154354948383e-05, "loss": 0.0112, "step": 10410 }, { "epoch": 0.32268034864313533, "grad_norm": 0.08282722532749176, "learning_rate": 1.9838690733757804e-05, "loss": 0.0111, "step": 10440 }, { "epoch": 0.32360759102429376, "grad_norm": 0.1147284209728241, "learning_rate": 1.983822711256723e-05, "loss": 0.0109, "step": 10470 }, { "epoch": 0.3245348334054522, "grad_norm": 0.14327378571033478, "learning_rate": 1.9837763491376647e-05, "loss": 0.0117, "step": 10500 }, { "epoch": 0.3254620757866106, "grad_norm": 0.11629188060760498, "learning_rate": 1.983729987018607e-05, "loss": 0.0114, "step": 10530 }, { "epoch": 0.32638931816776906, "grad_norm": 0.136122465133667, "learning_rate": 1.9836836248995487e-05, "loss": 0.0117, "step": 10560 }, { "epoch": 0.3273165605489275, "grad_norm": 0.1614139974117279, "learning_rate": 1.983637262780491e-05, "loss": 0.0113, "step": 10590 }, { "epoch": 0.3282438029300859, "grad_norm": 0.14974412322044373, "learning_rate": 1.983590900661433e-05, "loss": 0.0112, "step": 10620 }, { "epoch": 0.32917104531124436, "grad_norm": 0.1436106562614441, "learning_rate": 1.983544538542375e-05, "loss": 0.0111, "step": 10650 }, { "epoch": 0.3300982876924028, "grad_norm": 0.1434728354215622, "learning_rate": 1.9834981764233173e-05, "loss": 0.011, "step": 10680 }, { "epoch": 0.3310255300735612, "grad_norm": 0.1285828799009323, "learning_rate": 1.9834518143042595e-05, "loss": 0.0118, "step": 10710 }, { "epoch": 0.33195277245471966, "grad_norm": 0.16394366323947906, "learning_rate": 1.9834054521852013e-05, "loss": 0.0119, "step": 10740 }, { "epoch": 0.3328800148358781, "grad_norm": 0.169382706284523, "learning_rate": 1.9833590900661434e-05, "loss": 0.0121, "step": 10770 }, { "epoch": 0.3338072572170365, "grad_norm": 0.13655491173267365, "learning_rate": 1.9833127279470856e-05, "loss": 0.0116, "step": 10800 }, { "epoch": 0.33473449959819496, "grad_norm": 0.1816062033176422, "learning_rate": 1.9832663658280277e-05, "loss": 0.0108, "step": 10830 }, { "epoch": 0.3356617419793534, "grad_norm": 0.1492711901664734, "learning_rate": 1.98322000370897e-05, "loss": 0.0109, "step": 10860 }, { "epoch": 0.3365889843605118, "grad_norm": 0.09310702234506607, "learning_rate": 1.9831736415899117e-05, "loss": 0.0108, "step": 10890 }, { "epoch": 0.33751622674167026, "grad_norm": 0.19297969341278076, "learning_rate": 1.983127279470854e-05, "loss": 0.0115, "step": 10920 }, { "epoch": 0.3384434691228287, "grad_norm": 0.12931479513645172, "learning_rate": 1.983080917351796e-05, "loss": 0.0119, "step": 10950 }, { "epoch": 0.3393707115039871, "grad_norm": 0.1133192926645279, "learning_rate": 1.9830345552327378e-05, "loss": 0.0102, "step": 10980 }, { "epoch": 0.34029795388514555, "grad_norm": 0.19055162370204926, "learning_rate": 1.98298819311368e-05, "loss": 0.0112, "step": 11010 }, { "epoch": 0.341225196266304, "grad_norm": 0.16808393597602844, "learning_rate": 1.982941830994622e-05, "loss": 0.0115, "step": 11040 }, { "epoch": 0.3421524386474624, "grad_norm": 0.18603599071502686, "learning_rate": 1.9828954688755643e-05, "loss": 0.011, "step": 11070 }, { "epoch": 0.3430796810286209, "grad_norm": 0.15790607035160065, "learning_rate": 1.9828491067565064e-05, "loss": 0.0113, "step": 11100 }, { "epoch": 0.34400692340977934, "grad_norm": 0.13516488671302795, "learning_rate": 1.9828027446374486e-05, "loss": 0.0111, "step": 11130 }, { "epoch": 0.3449341657909378, "grad_norm": 0.16013328731060028, "learning_rate": 1.9827563825183904e-05, "loss": 0.011, "step": 11160 }, { "epoch": 0.3458614081720962, "grad_norm": 0.1916416585445404, "learning_rate": 1.9827100203993325e-05, "loss": 0.0115, "step": 11190 }, { "epoch": 0.34678865055325464, "grad_norm": 0.12891234457492828, "learning_rate": 1.9826636582802747e-05, "loss": 0.0109, "step": 11220 }, { "epoch": 0.3477158929344131, "grad_norm": 0.15920047461986542, "learning_rate": 1.9826172961612168e-05, "loss": 0.0099, "step": 11250 }, { "epoch": 0.3486431353155715, "grad_norm": 0.1358756422996521, "learning_rate": 1.982570934042159e-05, "loss": 0.0117, "step": 11280 }, { "epoch": 0.34957037769672994, "grad_norm": 0.110017329454422, "learning_rate": 1.9825245719231008e-05, "loss": 0.0115, "step": 11310 }, { "epoch": 0.35049762007788837, "grad_norm": 0.13068543374538422, "learning_rate": 1.982478209804043e-05, "loss": 0.0113, "step": 11340 }, { "epoch": 0.3514248624590468, "grad_norm": 0.32361915707588196, "learning_rate": 1.982431847684985e-05, "loss": 0.011, "step": 11370 }, { "epoch": 0.35235210484020524, "grad_norm": 0.11055938154459, "learning_rate": 1.982385485565927e-05, "loss": 0.0122, "step": 11400 }, { "epoch": 0.35327934722136367, "grad_norm": 0.1457458883523941, "learning_rate": 1.982339123446869e-05, "loss": 0.01, "step": 11430 }, { "epoch": 0.3542065896025221, "grad_norm": 0.12355484068393707, "learning_rate": 1.9822927613278112e-05, "loss": 0.0111, "step": 11460 }, { "epoch": 0.35513383198368054, "grad_norm": 0.1109640896320343, "learning_rate": 1.9822463992087533e-05, "loss": 0.0115, "step": 11490 }, { "epoch": 0.35606107436483897, "grad_norm": 0.15337657928466797, "learning_rate": 1.9822000370896955e-05, "loss": 0.0111, "step": 11520 }, { "epoch": 0.3569883167459974, "grad_norm": 0.13346615433692932, "learning_rate": 1.9821536749706373e-05, "loss": 0.0116, "step": 11550 }, { "epoch": 0.35791555912715584, "grad_norm": 0.15459662675857544, "learning_rate": 1.9821073128515795e-05, "loss": 0.0111, "step": 11580 }, { "epoch": 0.35884280150831427, "grad_norm": 0.16509433090686798, "learning_rate": 1.9820609507325216e-05, "loss": 0.011, "step": 11610 }, { "epoch": 0.3597700438894727, "grad_norm": 0.15595482289791107, "learning_rate": 1.9820145886134638e-05, "loss": 0.0111, "step": 11640 }, { "epoch": 0.36069728627063113, "grad_norm": 0.12362854927778244, "learning_rate": 1.981968226494406e-05, "loss": 0.0115, "step": 11670 }, { "epoch": 0.36162452865178957, "grad_norm": 0.15109333395957947, "learning_rate": 1.981921864375348e-05, "loss": 0.0112, "step": 11700 }, { "epoch": 0.362551771032948, "grad_norm": 0.19275827705860138, "learning_rate": 1.98187550225629e-05, "loss": 0.0116, "step": 11730 }, { "epoch": 0.36347901341410643, "grad_norm": 0.15680351853370667, "learning_rate": 1.981829140137232e-05, "loss": 0.0116, "step": 11760 }, { "epoch": 0.36440625579526487, "grad_norm": 0.1653236448764801, "learning_rate": 1.9817827780181742e-05, "loss": 0.0118, "step": 11790 }, { "epoch": 0.3653334981764233, "grad_norm": 0.13641513884067535, "learning_rate": 1.981736415899116e-05, "loss": 0.0116, "step": 11820 }, { "epoch": 0.36626074055758173, "grad_norm": 0.1006283164024353, "learning_rate": 1.981690053780058e-05, "loss": 0.0116, "step": 11850 }, { "epoch": 0.36718798293874017, "grad_norm": 0.18624696135520935, "learning_rate": 1.981645237064969e-05, "loss": 0.0113, "step": 11880 }, { "epoch": 0.3681152253198986, "grad_norm": 0.11926878988742828, "learning_rate": 1.981598874945911e-05, "loss": 0.0102, "step": 11910 }, { "epoch": 0.36904246770105703, "grad_norm": 0.16613160073757172, "learning_rate": 1.9815525128268532e-05, "loss": 0.0116, "step": 11940 }, { "epoch": 0.36996971008221546, "grad_norm": 0.15728864073753357, "learning_rate": 1.9815061507077954e-05, "loss": 0.0112, "step": 11970 }, { "epoch": 0.37089695246337395, "grad_norm": 0.16255664825439453, "learning_rate": 1.9814597885887372e-05, "loss": 0.0122, "step": 12000 }, { "epoch": 0.3718241948445324, "grad_norm": 0.171084925532341, "learning_rate": 1.9814134264696793e-05, "loss": 0.0108, "step": 12030 }, { "epoch": 0.3727514372256908, "grad_norm": 0.13140620291233063, "learning_rate": 1.981367064350621e-05, "loss": 0.0108, "step": 12060 }, { "epoch": 0.37367867960684925, "grad_norm": 0.2089879959821701, "learning_rate": 1.9813207022315636e-05, "loss": 0.011, "step": 12090 }, { "epoch": 0.3746059219880077, "grad_norm": 0.20163212716579437, "learning_rate": 1.9812743401125058e-05, "loss": 0.0107, "step": 12120 }, { "epoch": 0.3755331643691661, "grad_norm": 0.12433648109436035, "learning_rate": 1.9812279779934476e-05, "loss": 0.0102, "step": 12150 }, { "epoch": 0.37646040675032455, "grad_norm": 0.18142108619213104, "learning_rate": 1.9811816158743898e-05, "loss": 0.0108, "step": 12180 }, { "epoch": 0.377387649131483, "grad_norm": 0.10551220178604126, "learning_rate": 1.981135253755332e-05, "loss": 0.0113, "step": 12210 }, { "epoch": 0.3783148915126414, "grad_norm": 0.0951574444770813, "learning_rate": 1.9810888916362737e-05, "loss": 0.0112, "step": 12240 }, { "epoch": 0.37924213389379985, "grad_norm": 0.17203642427921295, "learning_rate": 1.981042529517216e-05, "loss": 0.0106, "step": 12270 }, { "epoch": 0.3801693762749583, "grad_norm": 0.12327639758586884, "learning_rate": 1.980996167398158e-05, "loss": 0.01, "step": 12300 }, { "epoch": 0.3810966186561167, "grad_norm": 0.1307629644870758, "learning_rate": 1.9809498052791e-05, "loss": 0.0117, "step": 12330 }, { "epoch": 0.38202386103727515, "grad_norm": 0.2066798210144043, "learning_rate": 1.9809034431600423e-05, "loss": 0.0116, "step": 12360 }, { "epoch": 0.3829511034184336, "grad_norm": 0.16595342755317688, "learning_rate": 1.980857081040984e-05, "loss": 0.0114, "step": 12390 }, { "epoch": 0.383878345799592, "grad_norm": 0.13339926302433014, "learning_rate": 1.9808107189219263e-05, "loss": 0.011, "step": 12420 }, { "epoch": 0.38480558818075045, "grad_norm": 0.1992148458957672, "learning_rate": 1.9807643568028684e-05, "loss": 0.0113, "step": 12450 }, { "epoch": 0.3857328305619089, "grad_norm": 0.18934965133666992, "learning_rate": 1.9807179946838106e-05, "loss": 0.0109, "step": 12480 }, { "epoch": 0.3866600729430673, "grad_norm": 0.13220654428005219, "learning_rate": 1.9806716325647527e-05, "loss": 0.0108, "step": 12510 }, { "epoch": 0.38758731532422575, "grad_norm": 0.17376932501792908, "learning_rate": 1.980625270445695e-05, "loss": 0.0123, "step": 12540 }, { "epoch": 0.3885145577053842, "grad_norm": 0.1915857493877411, "learning_rate": 1.9805789083266367e-05, "loss": 0.0115, "step": 12570 }, { "epoch": 0.3894418000865426, "grad_norm": 0.1544165313243866, "learning_rate": 1.980532546207579e-05, "loss": 0.0112, "step": 12600 }, { "epoch": 0.39036904246770104, "grad_norm": 0.16934797167778015, "learning_rate": 1.980486184088521e-05, "loss": 0.0107, "step": 12630 }, { "epoch": 0.3912962848488595, "grad_norm": 0.12536969780921936, "learning_rate": 1.9804398219694628e-05, "loss": 0.0103, "step": 12660 }, { "epoch": 0.3922235272300179, "grad_norm": 0.19992008805274963, "learning_rate": 1.980393459850405e-05, "loss": 0.0112, "step": 12690 }, { "epoch": 0.39315076961117634, "grad_norm": 0.11244340986013412, "learning_rate": 1.980347097731347e-05, "loss": 0.0112, "step": 12720 }, { "epoch": 0.3940780119923348, "grad_norm": 0.14152537286281586, "learning_rate": 1.9803007356122893e-05, "loss": 0.0112, "step": 12750 }, { "epoch": 0.3950052543734932, "grad_norm": 0.14930760860443115, "learning_rate": 1.9802543734932314e-05, "loss": 0.0107, "step": 12780 }, { "epoch": 0.39593249675465164, "grad_norm": 0.13682101666927338, "learning_rate": 1.9802080113741732e-05, "loss": 0.0119, "step": 12810 }, { "epoch": 0.3968597391358101, "grad_norm": 0.10455325990915298, "learning_rate": 1.9801616492551154e-05, "loss": 0.0109, "step": 12840 }, { "epoch": 0.39778698151696856, "grad_norm": 0.14510197937488556, "learning_rate": 1.9801152871360575e-05, "loss": 0.0105, "step": 12870 }, { "epoch": 0.398714223898127, "grad_norm": 0.09486869722604752, "learning_rate": 1.9800689250169997e-05, "loss": 0.0115, "step": 12900 }, { "epoch": 0.39964146627928543, "grad_norm": 0.15586893260478973, "learning_rate": 1.9800225628979418e-05, "loss": 0.0111, "step": 12930 }, { "epoch": 0.40056870866044386, "grad_norm": 0.14421053230762482, "learning_rate": 1.979976200778884e-05, "loss": 0.011, "step": 12960 }, { "epoch": 0.4014959510416023, "grad_norm": 0.17253579199314117, "learning_rate": 1.9799298386598258e-05, "loss": 0.0113, "step": 12990 }, { "epoch": 0.4024231934227607, "grad_norm": 0.16507063806056976, "learning_rate": 1.979883476540768e-05, "loss": 0.0108, "step": 13020 }, { "epoch": 0.40335043580391916, "grad_norm": 0.1142890527844429, "learning_rate": 1.9798371144217097e-05, "loss": 0.0116, "step": 13050 }, { "epoch": 0.4042776781850776, "grad_norm": 0.1496378779411316, "learning_rate": 1.979790752302652e-05, "loss": 0.011, "step": 13080 }, { "epoch": 0.405204920566236, "grad_norm": 0.319779634475708, "learning_rate": 1.979744390183594e-05, "loss": 0.0111, "step": 13110 }, { "epoch": 0.40613216294739446, "grad_norm": 0.1454552412033081, "learning_rate": 1.9796980280645362e-05, "loss": 0.0105, "step": 13140 }, { "epoch": 0.4070594053285529, "grad_norm": 0.10423721373081207, "learning_rate": 1.9796516659454784e-05, "loss": 0.0113, "step": 13170 }, { "epoch": 0.4079866477097113, "grad_norm": 0.14256003499031067, "learning_rate": 1.9796053038264205e-05, "loss": 0.0114, "step": 13200 }, { "epoch": 0.40891389009086976, "grad_norm": 0.11011745035648346, "learning_rate": 1.9795589417073623e-05, "loss": 0.0112, "step": 13230 }, { "epoch": 0.4098411324720282, "grad_norm": 0.10417099297046661, "learning_rate": 1.9795125795883045e-05, "loss": 0.0104, "step": 13260 }, { "epoch": 0.4107683748531866, "grad_norm": 0.12770701944828033, "learning_rate": 1.9794662174692466e-05, "loss": 0.0108, "step": 13290 }, { "epoch": 0.41169561723434506, "grad_norm": 0.13587158918380737, "learning_rate": 1.9794198553501888e-05, "loss": 0.0114, "step": 13320 }, { "epoch": 0.4126228596155035, "grad_norm": 0.14058151841163635, "learning_rate": 1.979373493231131e-05, "loss": 0.0104, "step": 13350 }, { "epoch": 0.4135501019966619, "grad_norm": 0.13275578618049622, "learning_rate": 1.9793271311120727e-05, "loss": 0.0111, "step": 13380 }, { "epoch": 0.41447734437782036, "grad_norm": 0.20822390913963318, "learning_rate": 1.979280768993015e-05, "loss": 0.011, "step": 13410 }, { "epoch": 0.4154045867589788, "grad_norm": 0.13399675488471985, "learning_rate": 1.979234406873957e-05, "loss": 0.0117, "step": 13440 }, { "epoch": 0.4163318291401372, "grad_norm": 0.10696171969175339, "learning_rate": 1.979188044754899e-05, "loss": 0.0114, "step": 13470 }, { "epoch": 0.41725907152129565, "grad_norm": 0.11326509714126587, "learning_rate": 1.9791416826358413e-05, "loss": 0.0118, "step": 13500 }, { "epoch": 0.4181863139024541, "grad_norm": 0.14315557479858398, "learning_rate": 1.9790968659207518e-05, "loss": 0.0114, "step": 13530 }, { "epoch": 0.4191135562836125, "grad_norm": 0.14048266410827637, "learning_rate": 1.979050503801694e-05, "loss": 0.0101, "step": 13560 }, { "epoch": 0.42004079866477095, "grad_norm": 0.19906365871429443, "learning_rate": 1.979004141682636e-05, "loss": 0.011, "step": 13590 }, { "epoch": 0.4209680410459294, "grad_norm": 0.14246176183223724, "learning_rate": 1.9789577795635782e-05, "loss": 0.0113, "step": 13620 }, { "epoch": 0.4218952834270878, "grad_norm": 0.19815756380558014, "learning_rate": 1.97891141744452e-05, "loss": 0.0113, "step": 13650 }, { "epoch": 0.42282252580824625, "grad_norm": 0.13454632461071014, "learning_rate": 1.9788650553254622e-05, "loss": 0.0104, "step": 13680 }, { "epoch": 0.4237497681894047, "grad_norm": 0.39795833826065063, "learning_rate": 1.9788186932064043e-05, "loss": 0.0109, "step": 13710 }, { "epoch": 0.4246770105705631, "grad_norm": 0.1440640687942505, "learning_rate": 1.9787723310873465e-05, "loss": 0.0117, "step": 13740 }, { "epoch": 0.4256042529517216, "grad_norm": 0.14893043041229248, "learning_rate": 1.9787259689682886e-05, "loss": 0.0109, "step": 13770 }, { "epoch": 0.42653149533288004, "grad_norm": 0.16754408180713654, "learning_rate": 1.9786796068492308e-05, "loss": 0.0111, "step": 13800 }, { "epoch": 0.4274587377140385, "grad_norm": 0.14534471929073334, "learning_rate": 1.9786332447301726e-05, "loss": 0.0114, "step": 13830 }, { "epoch": 0.4283859800951969, "grad_norm": 0.1311120092868805, "learning_rate": 1.9785868826111148e-05, "loss": 0.0108, "step": 13860 }, { "epoch": 0.42931322247635534, "grad_norm": 0.11852199584245682, "learning_rate": 1.9785405204920566e-05, "loss": 0.0104, "step": 13890 }, { "epoch": 0.43024046485751377, "grad_norm": 0.21745778620243073, "learning_rate": 1.9784941583729987e-05, "loss": 0.0115, "step": 13920 }, { "epoch": 0.4311677072386722, "grad_norm": 0.11198741942644119, "learning_rate": 1.978447796253941e-05, "loss": 0.0106, "step": 13950 }, { "epoch": 0.43209494961983064, "grad_norm": 0.15288135409355164, "learning_rate": 1.978401434134883e-05, "loss": 0.0111, "step": 13980 }, { "epoch": 0.43302219200098907, "grad_norm": 0.1267995834350586, "learning_rate": 1.9783550720158252e-05, "loss": 0.0118, "step": 14010 }, { "epoch": 0.4339494343821475, "grad_norm": 0.14455971121788025, "learning_rate": 1.9783087098967673e-05, "loss": 0.0109, "step": 14040 }, { "epoch": 0.43487667676330594, "grad_norm": 0.15423157811164856, "learning_rate": 1.978262347777709e-05, "loss": 0.0106, "step": 14070 }, { "epoch": 0.43580391914446437, "grad_norm": 0.1500750482082367, "learning_rate": 1.9782159856586513e-05, "loss": 0.0117, "step": 14100 }, { "epoch": 0.4367311615256228, "grad_norm": 0.17914001643657684, "learning_rate": 1.9781696235395934e-05, "loss": 0.0098, "step": 14130 }, { "epoch": 0.43765840390678123, "grad_norm": 0.12782762944698334, "learning_rate": 1.9781232614205356e-05, "loss": 0.0112, "step": 14160 }, { "epoch": 0.43858564628793967, "grad_norm": 0.12267909198999405, "learning_rate": 1.9780768993014777e-05, "loss": 0.0115, "step": 14190 }, { "epoch": 0.4395128886690981, "grad_norm": 0.16791710257530212, "learning_rate": 1.9780305371824196e-05, "loss": 0.0108, "step": 14220 }, { "epoch": 0.44044013105025653, "grad_norm": 0.1596802920103073, "learning_rate": 1.9779841750633617e-05, "loss": 0.0113, "step": 14250 }, { "epoch": 0.44136737343141497, "grad_norm": 0.15017160773277283, "learning_rate": 1.977937812944304e-05, "loss": 0.0111, "step": 14280 }, { "epoch": 0.4422946158125734, "grad_norm": 0.14193257689476013, "learning_rate": 1.9778914508252457e-05, "loss": 0.0104, "step": 14310 }, { "epoch": 0.44322185819373183, "grad_norm": 0.17363493144512177, "learning_rate": 1.9778450887061878e-05, "loss": 0.0118, "step": 14340 }, { "epoch": 0.44414910057489027, "grad_norm": 0.1378113478422165, "learning_rate": 1.97779872658713e-05, "loss": 0.0107, "step": 14370 }, { "epoch": 0.4450763429560487, "grad_norm": 0.12291575968265533, "learning_rate": 1.977752364468072e-05, "loss": 0.0108, "step": 14400 }, { "epoch": 0.44600358533720713, "grad_norm": 0.10949061810970306, "learning_rate": 1.9777060023490143e-05, "loss": 0.0113, "step": 14430 }, { "epoch": 0.44693082771836556, "grad_norm": 0.12933921813964844, "learning_rate": 1.9776596402299564e-05, "loss": 0.0111, "step": 14460 }, { "epoch": 0.447858070099524, "grad_norm": 0.19648374617099762, "learning_rate": 1.9776132781108982e-05, "loss": 0.0107, "step": 14490 }, { "epoch": 0.44878531248068243, "grad_norm": 0.13444839417934418, "learning_rate": 1.9775669159918404e-05, "loss": 0.0115, "step": 14520 }, { "epoch": 0.44971255486184086, "grad_norm": 0.10550806671380997, "learning_rate": 1.9775205538727825e-05, "loss": 0.0109, "step": 14550 }, { "epoch": 0.4506397972429993, "grad_norm": 0.13259588181972504, "learning_rate": 1.9774741917537247e-05, "loss": 0.0117, "step": 14580 }, { "epoch": 0.45156703962415773, "grad_norm": 0.1595287173986435, "learning_rate": 1.977427829634667e-05, "loss": 0.0115, "step": 14610 }, { "epoch": 0.4524942820053162, "grad_norm": 0.17247788608074188, "learning_rate": 1.9773814675156086e-05, "loss": 0.0117, "step": 14640 }, { "epoch": 0.45342152438647465, "grad_norm": 0.1309216469526291, "learning_rate": 1.9773351053965508e-05, "loss": 0.0118, "step": 14670 }, { "epoch": 0.4543487667676331, "grad_norm": 0.141946941614151, "learning_rate": 1.977288743277493e-05, "loss": 0.0115, "step": 14700 }, { "epoch": 0.4552760091487915, "grad_norm": 0.14929181337356567, "learning_rate": 1.9772423811584348e-05, "loss": 0.011, "step": 14730 }, { "epoch": 0.45620325152994995, "grad_norm": 0.13850441575050354, "learning_rate": 1.9771960190393772e-05, "loss": 0.0109, "step": 14760 }, { "epoch": 0.4571304939111084, "grad_norm": 0.119786337018013, "learning_rate": 1.9771496569203194e-05, "loss": 0.0111, "step": 14790 }, { "epoch": 0.4580577362922668, "grad_norm": 0.11005456000566483, "learning_rate": 1.9771032948012612e-05, "loss": 0.0112, "step": 14820 }, { "epoch": 0.45898497867342525, "grad_norm": 0.12615413963794708, "learning_rate": 1.9770569326822034e-05, "loss": 0.0109, "step": 14850 }, { "epoch": 0.4599122210545837, "grad_norm": 0.11778329312801361, "learning_rate": 1.9770105705631452e-05, "loss": 0.0112, "step": 14880 }, { "epoch": 0.4608394634357421, "grad_norm": 0.14141227304935455, "learning_rate": 1.9769642084440873e-05, "loss": 0.0109, "step": 14910 }, { "epoch": 0.46176670581690055, "grad_norm": 0.14961731433868408, "learning_rate": 1.9769178463250295e-05, "loss": 0.0103, "step": 14940 }, { "epoch": 0.462693948198059, "grad_norm": 0.15315207839012146, "learning_rate": 1.9768714842059716e-05, "loss": 0.0117, "step": 14970 }, { "epoch": 0.4636211905792174, "grad_norm": 0.1742749810218811, "learning_rate": 1.9768251220869138e-05, "loss": 0.0117, "step": 15000 }, { "epoch": 0.46454843296037585, "grad_norm": 0.1547149419784546, "learning_rate": 1.976778759967856e-05, "loss": 0.0107, "step": 15030 }, { "epoch": 0.4654756753415343, "grad_norm": 0.18222281336784363, "learning_rate": 1.9767323978487977e-05, "loss": 0.0108, "step": 15060 }, { "epoch": 0.4664029177226927, "grad_norm": 0.17831669747829437, "learning_rate": 1.97668603572974e-05, "loss": 0.0114, "step": 15090 }, { "epoch": 0.46733016010385114, "grad_norm": 0.13622137904167175, "learning_rate": 1.976639673610682e-05, "loss": 0.0114, "step": 15120 }, { "epoch": 0.4682574024850096, "grad_norm": 0.12895211577415466, "learning_rate": 1.9765933114916242e-05, "loss": 0.0105, "step": 15150 }, { "epoch": 0.469184644866168, "grad_norm": 0.1450720727443695, "learning_rate": 1.9765469493725663e-05, "loss": 0.0105, "step": 15180 }, { "epoch": 0.47011188724732644, "grad_norm": 0.139903262257576, "learning_rate": 1.976500587253508e-05, "loss": 0.0114, "step": 15210 }, { "epoch": 0.4710391296284849, "grad_norm": 0.1515028029680252, "learning_rate": 1.9764542251344503e-05, "loss": 0.0114, "step": 15240 }, { "epoch": 0.4719663720096433, "grad_norm": 0.1418166309595108, "learning_rate": 1.9764078630153925e-05, "loss": 0.0119, "step": 15270 }, { "epoch": 0.47289361439080174, "grad_norm": 0.12409061193466187, "learning_rate": 1.9763615008963343e-05, "loss": 0.0114, "step": 15300 }, { "epoch": 0.4738208567719602, "grad_norm": 0.27732494473457336, "learning_rate": 1.9763151387772764e-05, "loss": 0.0105, "step": 15330 }, { "epoch": 0.4747480991531186, "grad_norm": 0.1457599401473999, "learning_rate": 1.9762687766582186e-05, "loss": 0.0112, "step": 15360 }, { "epoch": 0.47567534153427704, "grad_norm": 0.13710662722587585, "learning_rate": 1.9762224145391607e-05, "loss": 0.0102, "step": 15390 }, { "epoch": 0.4766025839154355, "grad_norm": 0.14741382002830505, "learning_rate": 1.976176052420103e-05, "loss": 0.012, "step": 15420 }, { "epoch": 0.4775298262965939, "grad_norm": 0.11261238902807236, "learning_rate": 1.976129690301045e-05, "loss": 0.0107, "step": 15450 }, { "epoch": 0.47845706867775234, "grad_norm": 0.12006154656410217, "learning_rate": 1.9760833281819868e-05, "loss": 0.0108, "step": 15480 }, { "epoch": 0.4793843110589108, "grad_norm": 0.16782879829406738, "learning_rate": 1.976036966062929e-05, "loss": 0.0108, "step": 15510 }, { "epoch": 0.48031155344006926, "grad_norm": 0.15319256484508514, "learning_rate": 1.975990603943871e-05, "loss": 0.0116, "step": 15540 }, { "epoch": 0.4812387958212277, "grad_norm": 0.15482097864151, "learning_rate": 1.9759442418248133e-05, "loss": 0.0113, "step": 15570 }, { "epoch": 0.4821660382023861, "grad_norm": 0.1682938039302826, "learning_rate": 1.9758978797057554e-05, "loss": 0.0112, "step": 15600 }, { "epoch": 0.48309328058354456, "grad_norm": 0.1555566042661667, "learning_rate": 1.9758515175866972e-05, "loss": 0.0106, "step": 15630 }, { "epoch": 0.484020522964703, "grad_norm": 0.12842746078968048, "learning_rate": 1.9758051554676394e-05, "loss": 0.0112, "step": 15660 }, { "epoch": 0.4849477653458614, "grad_norm": 0.15598492324352264, "learning_rate": 1.9757587933485815e-05, "loss": 0.0106, "step": 15690 }, { "epoch": 0.48587500772701986, "grad_norm": 0.1573287844657898, "learning_rate": 1.9757124312295234e-05, "loss": 0.0108, "step": 15720 }, { "epoch": 0.4868022501081783, "grad_norm": 0.13735492527484894, "learning_rate": 1.9756660691104655e-05, "loss": 0.0108, "step": 15750 }, { "epoch": 0.4877294924893367, "grad_norm": 0.17496006190776825, "learning_rate": 1.9756197069914077e-05, "loss": 0.0111, "step": 15780 }, { "epoch": 0.48865673487049516, "grad_norm": 0.17873094975948334, "learning_rate": 1.9755733448723498e-05, "loss": 0.0111, "step": 15810 }, { "epoch": 0.4895839772516536, "grad_norm": 0.14948290586471558, "learning_rate": 1.975526982753292e-05, "loss": 0.0111, "step": 15840 }, { "epoch": 0.490511219632812, "grad_norm": 0.1015981063246727, "learning_rate": 1.9754806206342338e-05, "loss": 0.0121, "step": 15870 }, { "epoch": 0.49143846201397046, "grad_norm": 0.15646769106388092, "learning_rate": 1.975434258515176e-05, "loss": 0.0111, "step": 15900 }, { "epoch": 0.4923657043951289, "grad_norm": 0.11788052320480347, "learning_rate": 1.975387896396118e-05, "loss": 0.0112, "step": 15930 }, { "epoch": 0.4932929467762873, "grad_norm": 0.16695596277713776, "learning_rate": 1.9753415342770602e-05, "loss": 0.011, "step": 15960 }, { "epoch": 0.49422018915744576, "grad_norm": 0.12330469489097595, "learning_rate": 1.9752951721580024e-05, "loss": 0.0113, "step": 15990 }, { "epoch": 0.4951474315386042, "grad_norm": 0.16799937188625336, "learning_rate": 1.9752488100389445e-05, "loss": 0.0111, "step": 16020 }, { "epoch": 0.4960746739197626, "grad_norm": 0.146440789103508, "learning_rate": 1.9752024479198863e-05, "loss": 0.0111, "step": 16050 }, { "epoch": 0.49700191630092105, "grad_norm": 0.1455446183681488, "learning_rate": 1.9751560858008285e-05, "loss": 0.0108, "step": 16080 }, { "epoch": 0.4979291586820795, "grad_norm": 0.1497688889503479, "learning_rate": 1.9751097236817706e-05, "loss": 0.0114, "step": 16110 }, { "epoch": 0.4988564010632379, "grad_norm": 0.18637433648109436, "learning_rate": 1.9750633615627124e-05, "loss": 0.0104, "step": 16140 }, { "epoch": 0.49978364344439635, "grad_norm": 0.09844199568033218, "learning_rate": 1.975016999443655e-05, "loss": 0.0111, "step": 16170 }, { "epoch": 0.5007108858255548, "grad_norm": 0.17865383625030518, "learning_rate": 1.9749706373245967e-05, "loss": 0.0113, "step": 16200 }, { "epoch": 0.5016381282067133, "grad_norm": 0.17511294782161713, "learning_rate": 1.974924275205539e-05, "loss": 0.0104, "step": 16230 }, { "epoch": 0.5025653705878717, "grad_norm": 0.2122095376253128, "learning_rate": 1.974877913086481e-05, "loss": 0.0111, "step": 16260 }, { "epoch": 0.5034926129690301, "grad_norm": 0.15994173288345337, "learning_rate": 1.974831550967423e-05, "loss": 0.0108, "step": 16290 }, { "epoch": 0.5044198553501885, "grad_norm": 0.1498800665140152, "learning_rate": 1.974785188848365e-05, "loss": 0.0112, "step": 16320 }, { "epoch": 0.505347097731347, "grad_norm": 0.11768174916505814, "learning_rate": 1.974738826729307e-05, "loss": 0.0107, "step": 16350 }, { "epoch": 0.5062743401125054, "grad_norm": 0.14521424472332, "learning_rate": 1.9746924646102493e-05, "loss": 0.0115, "step": 16380 }, { "epoch": 0.5072015824936639, "grad_norm": 0.11522214859724045, "learning_rate": 1.9746461024911915e-05, "loss": 0.0109, "step": 16410 }, { "epoch": 0.5081288248748222, "grad_norm": 0.13075843453407288, "learning_rate": 1.9745997403721336e-05, "loss": 0.0108, "step": 16440 }, { "epoch": 0.5090560672559807, "grad_norm": 0.15582314133644104, "learning_rate": 1.9745533782530754e-05, "loss": 0.0106, "step": 16470 }, { "epoch": 0.5099833096371391, "grad_norm": 0.1819498986005783, "learning_rate": 1.9745070161340176e-05, "loss": 0.0104, "step": 16500 }, { "epoch": 0.5109105520182976, "grad_norm": 0.1500265747308731, "learning_rate": 1.9744606540149597e-05, "loss": 0.0117, "step": 16530 }, { "epoch": 0.511837794399456, "grad_norm": 0.14031992852687836, "learning_rate": 1.974414291895902e-05, "loss": 0.0105, "step": 16560 }, { "epoch": 0.5127650367806145, "grad_norm": 0.11498759686946869, "learning_rate": 1.974367929776844e-05, "loss": 0.012, "step": 16590 }, { "epoch": 0.5136922791617728, "grad_norm": 0.1483055055141449, "learning_rate": 1.974321567657786e-05, "loss": 0.0101, "step": 16620 }, { "epoch": 0.5146195215429313, "grad_norm": 0.1600915789604187, "learning_rate": 1.974275205538728e-05, "loss": 0.011, "step": 16650 }, { "epoch": 0.5155467639240897, "grad_norm": 0.11965183168649673, "learning_rate": 1.97422884341967e-05, "loss": 0.011, "step": 16680 }, { "epoch": 0.5164740063052482, "grad_norm": 0.19211125373840332, "learning_rate": 1.974182481300612e-05, "loss": 0.0117, "step": 16710 }, { "epoch": 0.5174012486864066, "grad_norm": 0.1554003208875656, "learning_rate": 1.974136119181554e-05, "loss": 0.0114, "step": 16740 }, { "epoch": 0.5183284910675651, "grad_norm": 0.13877448439598083, "learning_rate": 1.9740897570624963e-05, "loss": 0.0103, "step": 16770 }, { "epoch": 0.5192557334487234, "grad_norm": 0.13544303178787231, "learning_rate": 1.9740433949434384e-05, "loss": 0.011, "step": 16800 }, { "epoch": 0.5201829758298819, "grad_norm": 0.17708604037761688, "learning_rate": 1.9739970328243806e-05, "loss": 0.0106, "step": 16830 }, { "epoch": 0.5211102182110404, "grad_norm": 0.1740335077047348, "learning_rate": 1.9739506707053224e-05, "loss": 0.0103, "step": 16860 }, { "epoch": 0.5220374605921988, "grad_norm": 0.14314977824687958, "learning_rate": 1.9739043085862645e-05, "loss": 0.0106, "step": 16890 }, { "epoch": 0.5229647029733573, "grad_norm": 0.17561118304729462, "learning_rate": 1.9738579464672067e-05, "loss": 0.0117, "step": 16920 }, { "epoch": 0.5238919453545157, "grad_norm": 0.1606060117483139, "learning_rate": 1.9738115843481488e-05, "loss": 0.0111, "step": 16950 }, { "epoch": 0.5248191877356742, "grad_norm": 0.14647898077964783, "learning_rate": 1.973765222229091e-05, "loss": 0.0104, "step": 16980 }, { "epoch": 0.5257464301168325, "grad_norm": 0.18634577095508575, "learning_rate": 1.973718860110033e-05, "loss": 0.0119, "step": 17010 }, { "epoch": 0.526673672497991, "grad_norm": 0.17249546945095062, "learning_rate": 1.973672497990975e-05, "loss": 0.011, "step": 17040 }, { "epoch": 0.5276009148791494, "grad_norm": 0.09247303754091263, "learning_rate": 1.973626135871917e-05, "loss": 0.0113, "step": 17070 }, { "epoch": 0.5285281572603079, "grad_norm": 0.13393516838550568, "learning_rate": 1.9735797737528592e-05, "loss": 0.0111, "step": 17100 }, { "epoch": 0.5294553996414663, "grad_norm": 0.12860962748527527, "learning_rate": 1.973533411633801e-05, "loss": 0.0113, "step": 17130 }, { "epoch": 0.5303826420226248, "grad_norm": 0.1558118313550949, "learning_rate": 1.9734870495147432e-05, "loss": 0.0119, "step": 17160 }, { "epoch": 0.5313098844037831, "grad_norm": 0.14543479681015015, "learning_rate": 1.9734406873956853e-05, "loss": 0.0114, "step": 17190 }, { "epoch": 0.5322371267849416, "grad_norm": 0.07575354725122452, "learning_rate": 1.9733943252766275e-05, "loss": 0.0106, "step": 17220 }, { "epoch": 0.5331643691661, "grad_norm": 0.15373536944389343, "learning_rate": 1.9733479631575696e-05, "loss": 0.0104, "step": 17250 }, { "epoch": 0.5340916115472585, "grad_norm": 0.13065002858638763, "learning_rate": 1.9733016010385115e-05, "loss": 0.01, "step": 17280 }, { "epoch": 0.5350188539284169, "grad_norm": 0.1295432299375534, "learning_rate": 1.9732552389194536e-05, "loss": 0.0107, "step": 17310 }, { "epoch": 0.5359460963095753, "grad_norm": 0.1259675770998001, "learning_rate": 1.9732088768003958e-05, "loss": 0.0118, "step": 17340 }, { "epoch": 0.5368733386907337, "grad_norm": 0.16108573973178864, "learning_rate": 1.973162514681338e-05, "loss": 0.0115, "step": 17370 }, { "epoch": 0.5378005810718922, "grad_norm": 0.1371215134859085, "learning_rate": 1.97311615256228e-05, "loss": 0.011, "step": 17400 }, { "epoch": 0.5387278234530506, "grad_norm": 0.09765796363353729, "learning_rate": 1.9730697904432222e-05, "loss": 0.0115, "step": 17430 }, { "epoch": 0.5396550658342091, "grad_norm": 0.12412647902965546, "learning_rate": 1.973023428324164e-05, "loss": 0.0103, "step": 17460 }, { "epoch": 0.5405823082153675, "grad_norm": 0.14737875759601593, "learning_rate": 1.9729770662051062e-05, "loss": 0.0109, "step": 17490 }, { "epoch": 0.541509550596526, "grad_norm": 0.11230660229921341, "learning_rate": 1.9729307040860483e-05, "loss": 0.0115, "step": 17520 }, { "epoch": 0.5424367929776843, "grad_norm": 0.1141422763466835, "learning_rate": 1.97288434196699e-05, "loss": 0.0106, "step": 17550 }, { "epoch": 0.5433640353588428, "grad_norm": 0.12016434967517853, "learning_rate": 1.9728379798479326e-05, "loss": 0.0115, "step": 17580 }, { "epoch": 0.5442912777400012, "grad_norm": 0.13615207374095917, "learning_rate": 1.9727916177288744e-05, "loss": 0.011, "step": 17610 }, { "epoch": 0.5452185201211597, "grad_norm": 0.1334221363067627, "learning_rate": 1.9727452556098166e-05, "loss": 0.0116, "step": 17640 }, { "epoch": 0.546145762502318, "grad_norm": 0.11807431280612946, "learning_rate": 1.9726988934907587e-05, "loss": 0.0116, "step": 17670 }, { "epoch": 0.5470730048834765, "grad_norm": 0.13560529053211212, "learning_rate": 1.9726525313717006e-05, "loss": 0.0111, "step": 17700 }, { "epoch": 0.5480002472646349, "grad_norm": 0.17897607386112213, "learning_rate": 1.9726061692526427e-05, "loss": 0.0118, "step": 17730 }, { "epoch": 0.5489274896457934, "grad_norm": 0.206075519323349, "learning_rate": 1.972559807133585e-05, "loss": 0.0113, "step": 17760 }, { "epoch": 0.5498547320269519, "grad_norm": 0.13741877675056458, "learning_rate": 1.972513445014527e-05, "loss": 0.0106, "step": 17790 }, { "epoch": 0.5507819744081103, "grad_norm": 0.1166204884648323, "learning_rate": 1.972467082895469e-05, "loss": 0.0114, "step": 17820 }, { "epoch": 0.5517092167892688, "grad_norm": 0.18695688247680664, "learning_rate": 1.9724207207764113e-05, "loss": 0.011, "step": 17850 }, { "epoch": 0.5526364591704271, "grad_norm": 0.11755209416151047, "learning_rate": 1.972374358657353e-05, "loss": 0.0104, "step": 17880 }, { "epoch": 0.5535637015515856, "grad_norm": 0.1512221097946167, "learning_rate": 1.9723279965382953e-05, "loss": 0.0115, "step": 17910 }, { "epoch": 0.554490943932744, "grad_norm": 0.2666775584220886, "learning_rate": 1.9722816344192374e-05, "loss": 0.0101, "step": 17940 }, { "epoch": 0.5554181863139025, "grad_norm": 0.1656980812549591, "learning_rate": 1.9722352723001796e-05, "loss": 0.0106, "step": 17970 }, { "epoch": 0.5563454286950609, "grad_norm": 0.14059163630008698, "learning_rate": 1.9721889101811217e-05, "loss": 0.0116, "step": 18000 }, { "epoch": 0.5572726710762194, "grad_norm": 0.1456231027841568, "learning_rate": 1.9721425480620635e-05, "loss": 0.0117, "step": 18030 }, { "epoch": 0.5581999134573777, "grad_norm": 0.15551233291625977, "learning_rate": 1.9720961859430057e-05, "loss": 0.011, "step": 18060 }, { "epoch": 0.5591271558385362, "grad_norm": 0.15648788213729858, "learning_rate": 1.972049823823948e-05, "loss": 0.0115, "step": 18090 }, { "epoch": 0.5600543982196946, "grad_norm": 0.13963231444358826, "learning_rate": 1.9720034617048896e-05, "loss": 0.011, "step": 18120 }, { "epoch": 0.5609816406008531, "grad_norm": 0.178186297416687, "learning_rate": 1.9719570995858318e-05, "loss": 0.0107, "step": 18150 }, { "epoch": 0.5619088829820115, "grad_norm": 0.11234034597873688, "learning_rate": 1.971910737466774e-05, "loss": 0.0108, "step": 18180 }, { "epoch": 0.56283612536317, "grad_norm": 0.11878681182861328, "learning_rate": 1.971864375347716e-05, "loss": 0.0106, "step": 18210 }, { "epoch": 0.5637633677443283, "grad_norm": 0.16639955341815948, "learning_rate": 1.9718180132286582e-05, "loss": 0.0109, "step": 18240 }, { "epoch": 0.5646906101254868, "grad_norm": 0.1685299277305603, "learning_rate": 1.9717716511096e-05, "loss": 0.0111, "step": 18270 }, { "epoch": 0.5656178525066452, "grad_norm": 0.13424406945705414, "learning_rate": 1.9717252889905422e-05, "loss": 0.0108, "step": 18300 }, { "epoch": 0.5665450948878037, "grad_norm": 0.14570076763629913, "learning_rate": 1.9716789268714844e-05, "loss": 0.0108, "step": 18330 }, { "epoch": 0.5674723372689621, "grad_norm": 0.11698706448078156, "learning_rate": 1.9716325647524265e-05, "loss": 0.0111, "step": 18360 }, { "epoch": 0.5683995796501206, "grad_norm": 0.1734720766544342, "learning_rate": 1.9715862026333687e-05, "loss": 0.0108, "step": 18390 }, { "epoch": 0.5693268220312789, "grad_norm": 0.20620308816432953, "learning_rate": 1.9715398405143108e-05, "loss": 0.0104, "step": 18420 }, { "epoch": 0.5702540644124374, "grad_norm": 0.1889072060585022, "learning_rate": 1.9714934783952526e-05, "loss": 0.0116, "step": 18450 }, { "epoch": 0.5711813067935958, "grad_norm": 0.12345515191555023, "learning_rate": 1.9714471162761948e-05, "loss": 0.0116, "step": 18480 }, { "epoch": 0.5721085491747543, "grad_norm": 0.1576390117406845, "learning_rate": 1.971400754157137e-05, "loss": 0.0112, "step": 18510 }, { "epoch": 0.5730357915559127, "grad_norm": 0.13220590353012085, "learning_rate": 1.9713543920380787e-05, "loss": 0.0114, "step": 18540 }, { "epoch": 0.5739630339370712, "grad_norm": 0.14561784267425537, "learning_rate": 1.971308029919021e-05, "loss": 0.0116, "step": 18570 }, { "epoch": 0.5748902763182295, "grad_norm": 0.1624085009098053, "learning_rate": 1.971261667799963e-05, "loss": 0.011, "step": 18600 }, { "epoch": 0.575817518699388, "grad_norm": 0.15127071738243103, "learning_rate": 1.9712153056809052e-05, "loss": 0.0109, "step": 18630 }, { "epoch": 0.5767447610805465, "grad_norm": 0.19874811172485352, "learning_rate": 1.9711689435618473e-05, "loss": 0.0118, "step": 18660 }, { "epoch": 0.5776720034617049, "grad_norm": 0.12631240487098694, "learning_rate": 1.971122581442789e-05, "loss": 0.0119, "step": 18690 }, { "epoch": 0.5785992458428634, "grad_norm": 0.16969221830368042, "learning_rate": 1.9710762193237313e-05, "loss": 0.0116, "step": 18720 }, { "epoch": 0.5795264882240218, "grad_norm": 0.14823880791664124, "learning_rate": 1.9710298572046735e-05, "loss": 0.0107, "step": 18750 }, { "epoch": 0.5804537306051802, "grad_norm": 0.11717385798692703, "learning_rate": 1.9709834950856156e-05, "loss": 0.0108, "step": 18780 }, { "epoch": 0.5813809729863386, "grad_norm": 0.138312429189682, "learning_rate": 1.9709371329665578e-05, "loss": 0.01, "step": 18810 }, { "epoch": 0.5823082153674971, "grad_norm": 0.11770165711641312, "learning_rate": 1.9708907708475e-05, "loss": 0.0111, "step": 18840 }, { "epoch": 0.5832354577486555, "grad_norm": 0.18042947351932526, "learning_rate": 1.9708444087284417e-05, "loss": 0.0107, "step": 18870 }, { "epoch": 0.584162700129814, "grad_norm": 0.1403278410434723, "learning_rate": 1.970798046609384e-05, "loss": 0.011, "step": 18900 }, { "epoch": 0.5850899425109723, "grad_norm": 0.14570821821689606, "learning_rate": 1.9707516844903257e-05, "loss": 0.0109, "step": 18930 }, { "epoch": 0.5860171848921308, "grad_norm": 0.174841970205307, "learning_rate": 1.9707053223712678e-05, "loss": 0.0111, "step": 18960 }, { "epoch": 0.5869444272732892, "grad_norm": 0.1217731460928917, "learning_rate": 1.9706589602522103e-05, "loss": 0.0109, "step": 18990 }, { "epoch": 0.5878716696544477, "grad_norm": 0.1423633098602295, "learning_rate": 1.970612598133152e-05, "loss": 0.0108, "step": 19020 }, { "epoch": 0.5887989120356061, "grad_norm": 0.16417445242404938, "learning_rate": 1.9705662360140943e-05, "loss": 0.0101, "step": 19050 }, { "epoch": 0.5897261544167646, "grad_norm": 0.11511775851249695, "learning_rate": 1.9705198738950364e-05, "loss": 0.0103, "step": 19080 }, { "epoch": 0.590653396797923, "grad_norm": 0.13160157203674316, "learning_rate": 1.9704735117759782e-05, "loss": 0.0109, "step": 19110 }, { "epoch": 0.5915806391790814, "grad_norm": 0.2624340057373047, "learning_rate": 1.9704271496569204e-05, "loss": 0.0112, "step": 19140 }, { "epoch": 0.5925078815602398, "grad_norm": 0.13895557820796967, "learning_rate": 1.9703807875378625e-05, "loss": 0.0106, "step": 19170 }, { "epoch": 0.5934351239413983, "grad_norm": 0.13287252187728882, "learning_rate": 1.9703344254188047e-05, "loss": 0.0113, "step": 19200 }, { "epoch": 0.5943623663225567, "grad_norm": 0.13132213056087494, "learning_rate": 1.970288063299747e-05, "loss": 0.0105, "step": 19230 }, { "epoch": 0.5952896087037152, "grad_norm": 0.10387607663869858, "learning_rate": 1.9702417011806887e-05, "loss": 0.011, "step": 19260 }, { "epoch": 0.5962168510848735, "grad_norm": 0.15990789234638214, "learning_rate": 1.9701953390616308e-05, "loss": 0.0108, "step": 19290 }, { "epoch": 0.597144093466032, "grad_norm": 0.14809280633926392, "learning_rate": 1.970148976942573e-05, "loss": 0.0112, "step": 19320 }, { "epoch": 0.5980713358471904, "grad_norm": 0.13073743879795074, "learning_rate": 1.970102614823515e-05, "loss": 0.0107, "step": 19350 }, { "epoch": 0.5989985782283489, "grad_norm": 0.12297141551971436, "learning_rate": 1.9700562527044573e-05, "loss": 0.0103, "step": 19380 }, { "epoch": 0.5999258206095073, "grad_norm": 0.1540883332490921, "learning_rate": 1.9700098905853994e-05, "loss": 0.0107, "step": 19410 }, { "epoch": 0.6008530629906658, "grad_norm": 0.12956982851028442, "learning_rate": 1.9699635284663412e-05, "loss": 0.0115, "step": 19440 }, { "epoch": 0.6017803053718241, "grad_norm": 0.16077589988708496, "learning_rate": 1.9699171663472834e-05, "loss": 0.01, "step": 19470 }, { "epoch": 0.6027075477529826, "grad_norm": 0.13807636499404907, "learning_rate": 1.9698708042282255e-05, "loss": 0.0115, "step": 19500 }, { "epoch": 0.6036347901341411, "grad_norm": 0.11394845694303513, "learning_rate": 1.9698244421091673e-05, "loss": 0.0104, "step": 19530 }, { "epoch": 0.6045620325152995, "grad_norm": 0.16035230457782745, "learning_rate": 1.9697780799901095e-05, "loss": 0.0106, "step": 19560 }, { "epoch": 0.605489274896458, "grad_norm": 0.15413151681423187, "learning_rate": 1.9697317178710516e-05, "loss": 0.0105, "step": 19590 }, { "epoch": 0.6064165172776164, "grad_norm": 0.12395604699850082, "learning_rate": 1.9696853557519938e-05, "loss": 0.0115, "step": 19620 }, { "epoch": 0.6073437596587749, "grad_norm": 0.1340208798646927, "learning_rate": 1.969638993632936e-05, "loss": 0.0106, "step": 19650 }, { "epoch": 0.6082710020399332, "grad_norm": 0.1469482183456421, "learning_rate": 1.9695926315138777e-05, "loss": 0.0107, "step": 19680 }, { "epoch": 0.6091982444210917, "grad_norm": 0.1482105404138565, "learning_rate": 1.96954626939482e-05, "loss": 0.0107, "step": 19710 }, { "epoch": 0.6101254868022501, "grad_norm": 0.10288722813129425, "learning_rate": 1.969499907275762e-05, "loss": 0.0107, "step": 19740 }, { "epoch": 0.6110527291834086, "grad_norm": 0.14000536501407623, "learning_rate": 1.9694535451567042e-05, "loss": 0.0101, "step": 19770 }, { "epoch": 0.611979971564567, "grad_norm": 0.13950161635875702, "learning_rate": 1.9694071830376464e-05, "loss": 0.0103, "step": 19800 }, { "epoch": 0.6129072139457254, "grad_norm": 0.1045997366309166, "learning_rate": 1.9693608209185885e-05, "loss": 0.0107, "step": 19830 }, { "epoch": 0.6138344563268838, "grad_norm": 0.11684412509202957, "learning_rate": 1.9693144587995303e-05, "loss": 0.0116, "step": 19860 }, { "epoch": 0.6147616987080423, "grad_norm": 0.12157295644283295, "learning_rate": 1.9692680966804725e-05, "loss": 0.0108, "step": 19890 }, { "epoch": 0.6156889410892007, "grad_norm": 0.12909547984600067, "learning_rate": 1.9692217345614143e-05, "loss": 0.0116, "step": 19920 }, { "epoch": 0.6166161834703592, "grad_norm": 0.14208202064037323, "learning_rate": 1.9691753724423564e-05, "loss": 0.0116, "step": 19950 }, { "epoch": 0.6175434258515176, "grad_norm": 0.16134332120418549, "learning_rate": 1.9691290103232986e-05, "loss": 0.0114, "step": 19980 }, { "epoch": 0.618470668232676, "grad_norm": 0.1051783412694931, "learning_rate": 1.9690826482042407e-05, "loss": 0.0101, "step": 20010 }, { "epoch": 0.6193979106138344, "grad_norm": 0.1699976921081543, "learning_rate": 1.969036286085183e-05, "loss": 0.0105, "step": 20040 }, { "epoch": 0.6203251529949929, "grad_norm": 0.1221729964017868, "learning_rate": 1.968989923966125e-05, "loss": 0.0102, "step": 20070 }, { "epoch": 0.6212523953761513, "grad_norm": 0.09658984839916229, "learning_rate": 1.968943561847067e-05, "loss": 0.0105, "step": 20100 }, { "epoch": 0.6221796377573098, "grad_norm": 0.1710032820701599, "learning_rate": 1.968897199728009e-05, "loss": 0.0106, "step": 20130 }, { "epoch": 0.6231068801384682, "grad_norm": 0.12585148215293884, "learning_rate": 1.968850837608951e-05, "loss": 0.0106, "step": 20160 }, { "epoch": 0.6240341225196266, "grad_norm": 0.13396285474300385, "learning_rate": 1.9688044754898933e-05, "loss": 0.0115, "step": 20190 }, { "epoch": 0.624961364900785, "grad_norm": 0.19498120248317719, "learning_rate": 1.9687581133708354e-05, "loss": 0.0106, "step": 20220 }, { "epoch": 0.6258886072819435, "grad_norm": 0.1718875765800476, "learning_rate": 1.9687117512517773e-05, "loss": 0.0101, "step": 20250 }, { "epoch": 0.6268158496631019, "grad_norm": 0.20032888650894165, "learning_rate": 1.9686653891327194e-05, "loss": 0.0112, "step": 20280 }, { "epoch": 0.6277430920442604, "grad_norm": 0.1483587920665741, "learning_rate": 1.9686190270136616e-05, "loss": 0.0112, "step": 20310 }, { "epoch": 0.6286703344254188, "grad_norm": 0.1184084564447403, "learning_rate": 1.9685726648946034e-05, "loss": 0.0104, "step": 20340 }, { "epoch": 0.6295975768065772, "grad_norm": 0.1851278692483902, "learning_rate": 1.9685263027755455e-05, "loss": 0.0118, "step": 20370 }, { "epoch": 0.6305248191877356, "grad_norm": 0.19694185256958008, "learning_rate": 1.968479940656488e-05, "loss": 0.0111, "step": 20400 }, { "epoch": 0.6314520615688941, "grad_norm": 0.1660080999135971, "learning_rate": 1.9684335785374298e-05, "loss": 0.0114, "step": 20430 }, { "epoch": 0.6323793039500526, "grad_norm": 0.18938466906547546, "learning_rate": 1.968387216418372e-05, "loss": 0.0106, "step": 20460 }, { "epoch": 0.633306546331211, "grad_norm": 0.152815043926239, "learning_rate": 1.968340854299314e-05, "loss": 0.0107, "step": 20490 }, { "epoch": 0.6342337887123695, "grad_norm": 0.1433444321155548, "learning_rate": 1.968294492180256e-05, "loss": 0.0107, "step": 20520 }, { "epoch": 0.6351610310935278, "grad_norm": 0.11339518427848816, "learning_rate": 1.968248130061198e-05, "loss": 0.0102, "step": 20550 }, { "epoch": 0.6360882734746863, "grad_norm": 0.14251305162906647, "learning_rate": 1.9682017679421402e-05, "loss": 0.0108, "step": 20580 }, { "epoch": 0.6370155158558447, "grad_norm": 0.15844962000846863, "learning_rate": 1.9681554058230824e-05, "loss": 0.0108, "step": 20610 }, { "epoch": 0.6379427582370032, "grad_norm": 0.13714487850666046, "learning_rate": 1.9681090437040245e-05, "loss": 0.0118, "step": 20640 }, { "epoch": 0.6388700006181616, "grad_norm": 0.15218786895275116, "learning_rate": 1.9680626815849663e-05, "loss": 0.0104, "step": 20670 }, { "epoch": 0.6397972429993201, "grad_norm": 0.10393033921718597, "learning_rate": 1.9680163194659085e-05, "loss": 0.0109, "step": 20700 }, { "epoch": 0.6407244853804784, "grad_norm": 0.1047113686800003, "learning_rate": 1.9679699573468506e-05, "loss": 0.0118, "step": 20730 }, { "epoch": 0.6416517277616369, "grad_norm": 0.13883335888385773, "learning_rate": 1.9679235952277928e-05, "loss": 0.0113, "step": 20760 }, { "epoch": 0.6425789701427953, "grad_norm": 0.16248537600040436, "learning_rate": 1.967877233108735e-05, "loss": 0.0109, "step": 20790 }, { "epoch": 0.6435062125239538, "grad_norm": 0.14050905406475067, "learning_rate": 1.967830870989677e-05, "loss": 0.0109, "step": 20820 }, { "epoch": 0.6444334549051122, "grad_norm": 0.18135398626327515, "learning_rate": 1.967784508870619e-05, "loss": 0.0108, "step": 20850 }, { "epoch": 0.6453606972862707, "grad_norm": 0.16382360458374023, "learning_rate": 1.967738146751561e-05, "loss": 0.011, "step": 20880 }, { "epoch": 0.646287939667429, "grad_norm": 0.14652054011821747, "learning_rate": 1.967691784632503e-05, "loss": 0.0103, "step": 20910 }, { "epoch": 0.6472151820485875, "grad_norm": 0.12952165305614471, "learning_rate": 1.967645422513445e-05, "loss": 0.0107, "step": 20940 }, { "epoch": 0.6481424244297459, "grad_norm": 0.17503808438777924, "learning_rate": 1.9675990603943872e-05, "loss": 0.011, "step": 20970 }, { "epoch": 0.6490696668109044, "grad_norm": 0.12232805788516998, "learning_rate": 1.9675526982753293e-05, "loss": 0.0107, "step": 21000 }, { "epoch": 0.6499969091920628, "grad_norm": 0.141217902302742, "learning_rate": 1.9675063361562715e-05, "loss": 0.0116, "step": 21030 }, { "epoch": 0.6509241515732213, "grad_norm": 0.1509045511484146, "learning_rate": 1.9674599740372136e-05, "loss": 0.0102, "step": 21060 }, { "epoch": 0.6518513939543796, "grad_norm": 0.11254977434873581, "learning_rate": 1.9674136119181554e-05, "loss": 0.0104, "step": 21090 }, { "epoch": 0.6527786363355381, "grad_norm": 0.13542582094669342, "learning_rate": 1.9673672497990976e-05, "loss": 0.0101, "step": 21120 }, { "epoch": 0.6537058787166965, "grad_norm": 0.23376986384391785, "learning_rate": 1.9673208876800397e-05, "loss": 0.0105, "step": 21150 }, { "epoch": 0.654633121097855, "grad_norm": 0.14030662178993225, "learning_rate": 1.967274525560982e-05, "loss": 0.0108, "step": 21180 }, { "epoch": 0.6555603634790134, "grad_norm": 0.11941663175821304, "learning_rate": 1.967228163441924e-05, "loss": 0.0104, "step": 21210 }, { "epoch": 0.6564876058601719, "grad_norm": 0.13119135797023773, "learning_rate": 1.967181801322866e-05, "loss": 0.0105, "step": 21240 }, { "epoch": 0.6574148482413302, "grad_norm": 0.18197275698184967, "learning_rate": 1.967135439203808e-05, "loss": 0.011, "step": 21270 }, { "epoch": 0.6583420906224887, "grad_norm": 0.16185222566127777, "learning_rate": 1.96708907708475e-05, "loss": 0.0113, "step": 21300 }, { "epoch": 0.6592693330036472, "grad_norm": 0.1478484719991684, "learning_rate": 1.967042714965692e-05, "loss": 0.0099, "step": 21330 }, { "epoch": 0.6601965753848056, "grad_norm": 0.13554543256759644, "learning_rate": 1.966996352846634e-05, "loss": 0.0101, "step": 21360 }, { "epoch": 0.6611238177659641, "grad_norm": 0.1609603315591812, "learning_rate": 1.966951536131545e-05, "loss": 0.0115, "step": 21390 }, { "epoch": 0.6620510601471224, "grad_norm": 0.18124592304229736, "learning_rate": 1.966905174012487e-05, "loss": 0.0109, "step": 21420 }, { "epoch": 0.6629783025282809, "grad_norm": 0.09979747980833054, "learning_rate": 1.9668588118934292e-05, "loss": 0.0104, "step": 21450 }, { "epoch": 0.6639055449094393, "grad_norm": 0.12567797303199768, "learning_rate": 1.9668124497743714e-05, "loss": 0.0112, "step": 21480 }, { "epoch": 0.6648327872905978, "grad_norm": 0.17854486405849457, "learning_rate": 1.9667660876553132e-05, "loss": 0.0107, "step": 21510 }, { "epoch": 0.6657600296717562, "grad_norm": 0.13201989233493805, "learning_rate": 1.9667197255362553e-05, "loss": 0.0113, "step": 21540 }, { "epoch": 0.6666872720529147, "grad_norm": 0.1718299388885498, "learning_rate": 1.9666733634171975e-05, "loss": 0.0103, "step": 21570 }, { "epoch": 0.667614514434073, "grad_norm": 0.1717633455991745, "learning_rate": 1.9666270012981393e-05, "loss": 0.0104, "step": 21600 }, { "epoch": 0.6685417568152315, "grad_norm": 0.16848677396774292, "learning_rate": 1.9665806391790814e-05, "loss": 0.0103, "step": 21630 }, { "epoch": 0.6694689991963899, "grad_norm": 0.1344890594482422, "learning_rate": 1.966534277060024e-05, "loss": 0.0106, "step": 21660 }, { "epoch": 0.6703962415775484, "grad_norm": 0.1707814335823059, "learning_rate": 1.9664879149409657e-05, "loss": 0.0117, "step": 21690 }, { "epoch": 0.6713234839587068, "grad_norm": 0.2230360358953476, "learning_rate": 1.966441552821908e-05, "loss": 0.0109, "step": 21720 }, { "epoch": 0.6722507263398653, "grad_norm": 0.13025912642478943, "learning_rate": 1.9663951907028497e-05, "loss": 0.0109, "step": 21750 }, { "epoch": 0.6731779687210236, "grad_norm": 0.13415636122226715, "learning_rate": 1.966348828583792e-05, "loss": 0.0106, "step": 21780 }, { "epoch": 0.6741052111021821, "grad_norm": 0.12410006672143936, "learning_rate": 1.966302466464734e-05, "loss": 0.0109, "step": 21810 }, { "epoch": 0.6750324534833405, "grad_norm": 0.16657215356826782, "learning_rate": 1.966256104345676e-05, "loss": 0.0109, "step": 21840 }, { "epoch": 0.675959695864499, "grad_norm": 0.12938612699508667, "learning_rate": 1.9662097422266183e-05, "loss": 0.0114, "step": 21870 }, { "epoch": 0.6768869382456574, "grad_norm": 0.1887212097644806, "learning_rate": 1.9661633801075605e-05, "loss": 0.0105, "step": 21900 }, { "epoch": 0.6778141806268159, "grad_norm": 0.11065803468227386, "learning_rate": 1.966118563392471e-05, "loss": 0.0098, "step": 21930 }, { "epoch": 0.6787414230079742, "grad_norm": 0.14481179416179657, "learning_rate": 1.966072201273413e-05, "loss": 0.011, "step": 21960 }, { "epoch": 0.6796686653891327, "grad_norm": 0.13505086302757263, "learning_rate": 1.9660258391543552e-05, "loss": 0.0101, "step": 21990 }, { "epoch": 0.6805959077702911, "grad_norm": 0.15838079154491425, "learning_rate": 1.965979477035297e-05, "loss": 0.0106, "step": 22020 }, { "epoch": 0.6815231501514496, "grad_norm": 0.1366211622953415, "learning_rate": 1.965933114916239e-05, "loss": 0.0112, "step": 22050 }, { "epoch": 0.682450392532608, "grad_norm": 0.2598154544830322, "learning_rate": 1.9658867527971813e-05, "loss": 0.0104, "step": 22080 }, { "epoch": 0.6833776349137665, "grad_norm": 0.13103698194026947, "learning_rate": 1.9658403906781235e-05, "loss": 0.0109, "step": 22110 }, { "epoch": 0.6843048772949248, "grad_norm": 0.11876718699932098, "learning_rate": 1.9657940285590656e-05, "loss": 0.0109, "step": 22140 }, { "epoch": 0.6852321196760833, "grad_norm": 0.17607197165489197, "learning_rate": 1.9657476664400074e-05, "loss": 0.0103, "step": 22170 }, { "epoch": 0.6861593620572418, "grad_norm": 0.1490725725889206, "learning_rate": 1.9657013043209496e-05, "loss": 0.0103, "step": 22200 }, { "epoch": 0.6870866044384002, "grad_norm": 0.13106711208820343, "learning_rate": 1.9656549422018917e-05, "loss": 0.0114, "step": 22230 }, { "epoch": 0.6880138468195587, "grad_norm": 0.14891047775745392, "learning_rate": 1.965608580082834e-05, "loss": 0.0101, "step": 22260 }, { "epoch": 0.6889410892007171, "grad_norm": 0.10606863349676132, "learning_rate": 1.965562217963776e-05, "loss": 0.0104, "step": 22290 }, { "epoch": 0.6898683315818755, "grad_norm": 0.12561169266700745, "learning_rate": 1.9655158558447182e-05, "loss": 0.0108, "step": 22320 }, { "epoch": 0.6907955739630339, "grad_norm": 0.15355020761489868, "learning_rate": 1.96546949372566e-05, "loss": 0.0098, "step": 22350 }, { "epoch": 0.6917228163441924, "grad_norm": 0.11139534413814545, "learning_rate": 1.965423131606602e-05, "loss": 0.0103, "step": 22380 }, { "epoch": 0.6926500587253508, "grad_norm": 0.1802678406238556, "learning_rate": 1.9653767694875443e-05, "loss": 0.0105, "step": 22410 }, { "epoch": 0.6935773011065093, "grad_norm": 0.1225091889500618, "learning_rate": 1.965330407368486e-05, "loss": 0.0106, "step": 22440 }, { "epoch": 0.6945045434876677, "grad_norm": 0.1083742082118988, "learning_rate": 1.9652840452494283e-05, "loss": 0.0104, "step": 22470 }, { "epoch": 0.6954317858688261, "grad_norm": 0.15114031732082367, "learning_rate": 1.9652376831303704e-05, "loss": 0.0106, "step": 22500 }, { "epoch": 0.6963590282499845, "grad_norm": 0.1286889910697937, "learning_rate": 1.9651913210113126e-05, "loss": 0.0098, "step": 22530 }, { "epoch": 0.697286270631143, "grad_norm": 0.1737942397594452, "learning_rate": 1.9651449588922547e-05, "loss": 0.0107, "step": 22560 }, { "epoch": 0.6982135130123014, "grad_norm": 0.16390171647071838, "learning_rate": 1.9650985967731965e-05, "loss": 0.0106, "step": 22590 }, { "epoch": 0.6991407553934599, "grad_norm": 0.11053313314914703, "learning_rate": 1.9650522346541387e-05, "loss": 0.0108, "step": 22620 }, { "epoch": 0.7000679977746183, "grad_norm": 0.1604512482881546, "learning_rate": 1.9650058725350808e-05, "loss": 0.0111, "step": 22650 }, { "epoch": 0.7009952401557767, "grad_norm": 0.19298644363880157, "learning_rate": 1.964959510416023e-05, "loss": 0.0113, "step": 22680 }, { "epoch": 0.7019224825369351, "grad_norm": 0.2749721109867096, "learning_rate": 1.964913148296965e-05, "loss": 0.0103, "step": 22710 }, { "epoch": 0.7028497249180936, "grad_norm": 0.10749609023332596, "learning_rate": 1.9648667861779073e-05, "loss": 0.0106, "step": 22740 }, { "epoch": 0.703776967299252, "grad_norm": 0.13725101947784424, "learning_rate": 1.964820424058849e-05, "loss": 0.0108, "step": 22770 }, { "epoch": 0.7047042096804105, "grad_norm": 0.14936412870883942, "learning_rate": 1.9647740619397912e-05, "loss": 0.01, "step": 22800 }, { "epoch": 0.7056314520615689, "grad_norm": 0.13317477703094482, "learning_rate": 1.964727699820733e-05, "loss": 0.0105, "step": 22830 }, { "epoch": 0.7065586944427273, "grad_norm": 0.1467142403125763, "learning_rate": 1.9646813377016752e-05, "loss": 0.011, "step": 22860 }, { "epoch": 0.7074859368238857, "grad_norm": 0.1495506763458252, "learning_rate": 1.9646349755826174e-05, "loss": 0.0101, "step": 22890 }, { "epoch": 0.7084131792050442, "grad_norm": 0.14246061444282532, "learning_rate": 1.9645886134635595e-05, "loss": 0.0105, "step": 22920 }, { "epoch": 0.7093404215862026, "grad_norm": 0.19877555966377258, "learning_rate": 1.9645422513445017e-05, "loss": 0.0104, "step": 22950 }, { "epoch": 0.7102676639673611, "grad_norm": 0.09770684689283371, "learning_rate": 1.9644958892254438e-05, "loss": 0.01, "step": 22980 }, { "epoch": 0.7111949063485195, "grad_norm": 0.1393786370754242, "learning_rate": 1.9644495271063856e-05, "loss": 0.0106, "step": 23010 }, { "epoch": 0.7121221487296779, "grad_norm": 0.11217056959867477, "learning_rate": 1.9644031649873278e-05, "loss": 0.0112, "step": 23040 }, { "epoch": 0.7130493911108364, "grad_norm": 0.1840437948703766, "learning_rate": 1.96435680286827e-05, "loss": 0.0108, "step": 23070 }, { "epoch": 0.7139766334919948, "grad_norm": 0.1373140960931778, "learning_rate": 1.964310440749212e-05, "loss": 0.0104, "step": 23100 }, { "epoch": 0.7149038758731533, "grad_norm": 0.1342286318540573, "learning_rate": 1.9642640786301542e-05, "loss": 0.0106, "step": 23130 }, { "epoch": 0.7158311182543117, "grad_norm": 0.15362820029258728, "learning_rate": 1.964217716511096e-05, "loss": 0.0106, "step": 23160 }, { "epoch": 0.7167583606354702, "grad_norm": 0.1843593567609787, "learning_rate": 1.9641713543920382e-05, "loss": 0.0104, "step": 23190 }, { "epoch": 0.7176856030166285, "grad_norm": 0.1569281816482544, "learning_rate": 1.9641249922729803e-05, "loss": 0.0102, "step": 23220 }, { "epoch": 0.718612845397787, "grad_norm": 0.139905646443367, "learning_rate": 1.964078630153922e-05, "loss": 0.0102, "step": 23250 }, { "epoch": 0.7195400877789454, "grad_norm": 0.12406637519598007, "learning_rate": 1.9640322680348646e-05, "loss": 0.01, "step": 23280 }, { "epoch": 0.7204673301601039, "grad_norm": 0.15604321658611298, "learning_rate": 1.9639859059158068e-05, "loss": 0.0111, "step": 23310 }, { "epoch": 0.7213945725412623, "grad_norm": 0.12473416328430176, "learning_rate": 1.9639395437967486e-05, "loss": 0.0111, "step": 23340 }, { "epoch": 0.7223218149224208, "grad_norm": 0.10518327355384827, "learning_rate": 1.9638931816776907e-05, "loss": 0.0104, "step": 23370 }, { "epoch": 0.7232490573035791, "grad_norm": 0.13876180350780487, "learning_rate": 1.963846819558633e-05, "loss": 0.0104, "step": 23400 }, { "epoch": 0.7241762996847376, "grad_norm": 0.15004809200763702, "learning_rate": 1.9638004574395747e-05, "loss": 0.0102, "step": 23430 }, { "epoch": 0.725103542065896, "grad_norm": 0.12494517862796783, "learning_rate": 1.963754095320517e-05, "loss": 0.011, "step": 23460 }, { "epoch": 0.7260307844470545, "grad_norm": 0.16083678603172302, "learning_rate": 1.963707733201459e-05, "loss": 0.0101, "step": 23490 }, { "epoch": 0.7269580268282129, "grad_norm": 0.2052803784608841, "learning_rate": 1.963661371082401e-05, "loss": 0.0114, "step": 23520 }, { "epoch": 0.7278852692093714, "grad_norm": 0.16548952460289001, "learning_rate": 1.9636150089633433e-05, "loss": 0.0117, "step": 23550 }, { "epoch": 0.7288125115905297, "grad_norm": 0.2400340884923935, "learning_rate": 1.963568646844285e-05, "loss": 0.011, "step": 23580 }, { "epoch": 0.7297397539716882, "grad_norm": 0.18960875272750854, "learning_rate": 1.9635222847252273e-05, "loss": 0.01, "step": 23610 }, { "epoch": 0.7306669963528466, "grad_norm": 0.1324130743741989, "learning_rate": 1.9634759226061694e-05, "loss": 0.0103, "step": 23640 }, { "epoch": 0.7315942387340051, "grad_norm": 0.13898129761219025, "learning_rate": 1.9634295604871116e-05, "loss": 0.0104, "step": 23670 }, { "epoch": 0.7325214811151635, "grad_norm": 0.14267148077487946, "learning_rate": 1.9633831983680537e-05, "loss": 0.0105, "step": 23700 }, { "epoch": 0.733448723496322, "grad_norm": 0.12074214965105057, "learning_rate": 1.963336836248996e-05, "loss": 0.0104, "step": 23730 }, { "epoch": 0.7343759658774803, "grad_norm": 0.15600258111953735, "learning_rate": 1.9632904741299377e-05, "loss": 0.0111, "step": 23760 }, { "epoch": 0.7353032082586388, "grad_norm": 0.13810376822948456, "learning_rate": 1.96324411201088e-05, "loss": 0.0104, "step": 23790 }, { "epoch": 0.7362304506397972, "grad_norm": 0.18209311366081238, "learning_rate": 1.963197749891822e-05, "loss": 0.0123, "step": 23820 }, { "epoch": 0.7371576930209557, "grad_norm": 0.1387079954147339, "learning_rate": 1.9631513877727638e-05, "loss": 0.0111, "step": 23850 }, { "epoch": 0.7380849354021141, "grad_norm": 0.1338684856891632, "learning_rate": 1.963105025653706e-05, "loss": 0.0108, "step": 23880 }, { "epoch": 0.7390121777832726, "grad_norm": 0.1294497698545456, "learning_rate": 1.963058663534648e-05, "loss": 0.01, "step": 23910 }, { "epoch": 0.7399394201644309, "grad_norm": 0.145687073469162, "learning_rate": 1.9630123014155902e-05, "loss": 0.0106, "step": 23940 }, { "epoch": 0.7408666625455894, "grad_norm": 0.15462340414524078, "learning_rate": 1.9629659392965324e-05, "loss": 0.0106, "step": 23970 }, { "epoch": 0.7417939049267479, "grad_norm": 0.1549011468887329, "learning_rate": 1.9629195771774742e-05, "loss": 0.0112, "step": 24000 }, { "epoch": 0.7427211473079063, "grad_norm": 0.1274493783712387, "learning_rate": 1.9628732150584164e-05, "loss": 0.0112, "step": 24030 }, { "epoch": 0.7436483896890648, "grad_norm": 0.17054013907909393, "learning_rate": 1.9628268529393585e-05, "loss": 0.0097, "step": 24060 }, { "epoch": 0.7445756320702231, "grad_norm": 0.1312279850244522, "learning_rate": 1.9627804908203007e-05, "loss": 0.0106, "step": 24090 }, { "epoch": 0.7455028744513816, "grad_norm": 0.12537410855293274, "learning_rate": 1.9627341287012428e-05, "loss": 0.0107, "step": 24120 }, { "epoch": 0.74643011683254, "grad_norm": 0.16354672610759735, "learning_rate": 1.962687766582185e-05, "loss": 0.0108, "step": 24150 }, { "epoch": 0.7473573592136985, "grad_norm": 0.14861637353897095, "learning_rate": 1.9626414044631268e-05, "loss": 0.0111, "step": 24180 }, { "epoch": 0.7482846015948569, "grad_norm": 0.17199063301086426, "learning_rate": 1.962595042344069e-05, "loss": 0.0108, "step": 24210 }, { "epoch": 0.7492118439760154, "grad_norm": 0.1828068494796753, "learning_rate": 1.9625486802250107e-05, "loss": 0.0102, "step": 24240 }, { "epoch": 0.7501390863571737, "grad_norm": 0.17171557247638702, "learning_rate": 1.962502318105953e-05, "loss": 0.0107, "step": 24270 }, { "epoch": 0.7510663287383322, "grad_norm": 0.1086016297340393, "learning_rate": 1.962455955986895e-05, "loss": 0.0106, "step": 24300 }, { "epoch": 0.7519935711194906, "grad_norm": 0.15765012800693512, "learning_rate": 1.9624095938678372e-05, "loss": 0.0096, "step": 24330 }, { "epoch": 0.7529208135006491, "grad_norm": 0.17492449283599854, "learning_rate": 1.9623632317487793e-05, "loss": 0.0103, "step": 24360 }, { "epoch": 0.7538480558818075, "grad_norm": 0.1605881303548813, "learning_rate": 1.9623168696297215e-05, "loss": 0.0107, "step": 24390 }, { "epoch": 0.754775298262966, "grad_norm": 0.22354120016098022, "learning_rate": 1.9622705075106633e-05, "loss": 0.0099, "step": 24420 }, { "epoch": 0.7557025406441243, "grad_norm": 0.16061992943286896, "learning_rate": 1.9622241453916055e-05, "loss": 0.0103, "step": 24450 }, { "epoch": 0.7566297830252828, "grad_norm": 0.13511791825294495, "learning_rate": 1.9621793286765162e-05, "loss": 0.0105, "step": 24480 }, { "epoch": 0.7575570254064412, "grad_norm": 0.13650743663311005, "learning_rate": 1.962132966557458e-05, "loss": 0.0111, "step": 24510 }, { "epoch": 0.7584842677875997, "grad_norm": 0.11807099729776382, "learning_rate": 1.9620866044384005e-05, "loss": 0.0114, "step": 24540 }, { "epoch": 0.7594115101687581, "grad_norm": 0.11842277646064758, "learning_rate": 1.9620402423193427e-05, "loss": 0.0105, "step": 24570 }, { "epoch": 0.7603387525499166, "grad_norm": 0.10607994347810745, "learning_rate": 1.9619938802002845e-05, "loss": 0.0099, "step": 24600 }, { "epoch": 0.7612659949310749, "grad_norm": 0.16705180704593658, "learning_rate": 1.9619475180812267e-05, "loss": 0.0099, "step": 24630 }, { "epoch": 0.7621932373122334, "grad_norm": 0.1280556321144104, "learning_rate": 1.9619011559621685e-05, "loss": 0.0105, "step": 24660 }, { "epoch": 0.7631204796933918, "grad_norm": 0.14667744934558868, "learning_rate": 1.9618547938431106e-05, "loss": 0.0097, "step": 24690 }, { "epoch": 0.7640477220745503, "grad_norm": 0.14611978828907013, "learning_rate": 1.9618084317240528e-05, "loss": 0.0104, "step": 24720 }, { "epoch": 0.7649749644557087, "grad_norm": 0.09779754281044006, "learning_rate": 1.961762069604995e-05, "loss": 0.0109, "step": 24750 }, { "epoch": 0.7659022068368672, "grad_norm": 0.12345808744430542, "learning_rate": 1.961715707485937e-05, "loss": 0.0104, "step": 24780 }, { "epoch": 0.7668294492180255, "grad_norm": 0.12747137248516083, "learning_rate": 1.9616693453668792e-05, "loss": 0.0098, "step": 24810 }, { "epoch": 0.767756691599184, "grad_norm": 0.12263495475053787, "learning_rate": 1.961622983247821e-05, "loss": 0.0111, "step": 24840 }, { "epoch": 0.7686839339803425, "grad_norm": 0.10848915576934814, "learning_rate": 1.9615766211287632e-05, "loss": 0.0119, "step": 24870 }, { "epoch": 0.7696111763615009, "grad_norm": 0.12249802052974701, "learning_rate": 1.9615302590097053e-05, "loss": 0.0102, "step": 24900 }, { "epoch": 0.7705384187426594, "grad_norm": 0.12829901278018951, "learning_rate": 1.9614838968906475e-05, "loss": 0.0099, "step": 24930 }, { "epoch": 0.7714656611238178, "grad_norm": 0.12436488270759583, "learning_rate": 1.9614375347715896e-05, "loss": 0.0101, "step": 24960 }, { "epoch": 0.7723929035049762, "grad_norm": 0.1472853124141693, "learning_rate": 1.9613911726525315e-05, "loss": 0.0099, "step": 24990 }, { "epoch": 0.7733201458861346, "grad_norm": 0.16694360971450806, "learning_rate": 1.9613448105334736e-05, "loss": 0.0106, "step": 25020 }, { "epoch": 0.7742473882672931, "grad_norm": 0.13734154403209686, "learning_rate": 1.9612984484144158e-05, "loss": 0.0095, "step": 25050 }, { "epoch": 0.7751746306484515, "grad_norm": 0.13226653635501862, "learning_rate": 1.9612520862953576e-05, "loss": 0.0109, "step": 25080 }, { "epoch": 0.77610187302961, "grad_norm": 0.14038865268230438, "learning_rate": 1.9612057241762997e-05, "loss": 0.0107, "step": 25110 }, { "epoch": 0.7770291154107684, "grad_norm": 0.14121894538402557, "learning_rate": 1.961159362057242e-05, "loss": 0.0115, "step": 25140 }, { "epoch": 0.7779563577919268, "grad_norm": 0.14763228595256805, "learning_rate": 1.961112999938184e-05, "loss": 0.0109, "step": 25170 }, { "epoch": 0.7788836001730852, "grad_norm": 0.11016196012496948, "learning_rate": 1.961066637819126e-05, "loss": 0.0101, "step": 25200 }, { "epoch": 0.7798108425542437, "grad_norm": 0.09767867624759674, "learning_rate": 1.9610202757000683e-05, "loss": 0.0106, "step": 25230 }, { "epoch": 0.7807380849354021, "grad_norm": 0.14262355864048004, "learning_rate": 1.96097391358101e-05, "loss": 0.011, "step": 25260 }, { "epoch": 0.7816653273165606, "grad_norm": 0.13480566442012787, "learning_rate": 1.9609275514619523e-05, "loss": 0.0109, "step": 25290 }, { "epoch": 0.782592569697719, "grad_norm": 0.10434292256832123, "learning_rate": 1.9608811893428944e-05, "loss": 0.0099, "step": 25320 }, { "epoch": 0.7835198120788774, "grad_norm": 0.15682266652584076, "learning_rate": 1.9608348272238366e-05, "loss": 0.0105, "step": 25350 }, { "epoch": 0.7844470544600358, "grad_norm": 0.17194856703281403, "learning_rate": 1.9607884651047787e-05, "loss": 0.0101, "step": 25380 }, { "epoch": 0.7853742968411943, "grad_norm": 0.15843357145786285, "learning_rate": 1.9607421029857205e-05, "loss": 0.0112, "step": 25410 }, { "epoch": 0.7863015392223527, "grad_norm": 0.16078777611255646, "learning_rate": 1.9606957408666627e-05, "loss": 0.0111, "step": 25440 }, { "epoch": 0.7872287816035112, "grad_norm": 0.15555913746356964, "learning_rate": 1.960649378747605e-05, "loss": 0.0103, "step": 25470 }, { "epoch": 0.7881560239846696, "grad_norm": 0.12802782654762268, "learning_rate": 1.9606030166285467e-05, "loss": 0.0108, "step": 25500 }, { "epoch": 0.789083266365828, "grad_norm": 0.15200209617614746, "learning_rate": 1.9605566545094888e-05, "loss": 0.0106, "step": 25530 }, { "epoch": 0.7900105087469864, "grad_norm": 0.11743868887424469, "learning_rate": 1.960510292390431e-05, "loss": 0.0101, "step": 25560 }, { "epoch": 0.7909377511281449, "grad_norm": 0.17723707854747772, "learning_rate": 1.960463930271373e-05, "loss": 0.0108, "step": 25590 }, { "epoch": 0.7918649935093033, "grad_norm": 0.16618022322654724, "learning_rate": 1.9604175681523153e-05, "loss": 0.0102, "step": 25620 }, { "epoch": 0.7927922358904618, "grad_norm": 0.15643423795700073, "learning_rate": 1.960371206033257e-05, "loss": 0.0102, "step": 25650 }, { "epoch": 0.7937194782716201, "grad_norm": 0.12282592803239822, "learning_rate": 1.9603248439141992e-05, "loss": 0.0107, "step": 25680 }, { "epoch": 0.7946467206527786, "grad_norm": 0.1297512799501419, "learning_rate": 1.9602784817951414e-05, "loss": 0.0103, "step": 25710 }, { "epoch": 0.7955739630339371, "grad_norm": 0.18836601078510284, "learning_rate": 1.9602321196760835e-05, "loss": 0.0104, "step": 25740 }, { "epoch": 0.7965012054150955, "grad_norm": 0.21309618651866913, "learning_rate": 1.9601857575570257e-05, "loss": 0.0109, "step": 25770 }, { "epoch": 0.797428447796254, "grad_norm": 0.11946742981672287, "learning_rate": 1.9601393954379678e-05, "loss": 0.0102, "step": 25800 }, { "epoch": 0.7983556901774124, "grad_norm": 0.15536849200725555, "learning_rate": 1.9600930333189096e-05, "loss": 0.0109, "step": 25830 }, { "epoch": 0.7992829325585709, "grad_norm": 0.14737233519554138, "learning_rate": 1.9600466711998518e-05, "loss": 0.0104, "step": 25860 }, { "epoch": 0.8002101749397292, "grad_norm": 0.09525593370199203, "learning_rate": 1.960000309080794e-05, "loss": 0.0106, "step": 25890 }, { "epoch": 0.8011374173208877, "grad_norm": 0.1439526081085205, "learning_rate": 1.9599539469617357e-05, "loss": 0.0111, "step": 25920 }, { "epoch": 0.8020646597020461, "grad_norm": 0.15843050181865692, "learning_rate": 1.9599075848426782e-05, "loss": 0.0108, "step": 25950 }, { "epoch": 0.8029919020832046, "grad_norm": 0.15759846568107605, "learning_rate": 1.95986122272362e-05, "loss": 0.0101, "step": 25980 }, { "epoch": 0.803919144464363, "grad_norm": 0.15653857588768005, "learning_rate": 1.9598148606045622e-05, "loss": 0.0106, "step": 26010 }, { "epoch": 0.8048463868455215, "grad_norm": 0.1498725712299347, "learning_rate": 1.9597684984855044e-05, "loss": 0.0112, "step": 26040 }, { "epoch": 0.8057736292266798, "grad_norm": 0.12088173627853394, "learning_rate": 1.959722136366446e-05, "loss": 0.0101, "step": 26070 }, { "epoch": 0.8067008716078383, "grad_norm": 0.1721748411655426, "learning_rate": 1.9596757742473883e-05, "loss": 0.0106, "step": 26100 }, { "epoch": 0.8076281139889967, "grad_norm": 0.15741917490959167, "learning_rate": 1.9596294121283305e-05, "loss": 0.0107, "step": 26130 }, { "epoch": 0.8085553563701552, "grad_norm": 0.17974017560482025, "learning_rate": 1.9595830500092726e-05, "loss": 0.011, "step": 26160 }, { "epoch": 0.8094825987513136, "grad_norm": 0.15539591014385223, "learning_rate": 1.9595366878902148e-05, "loss": 0.0102, "step": 26190 }, { "epoch": 0.810409841132472, "grad_norm": 0.11472824960947037, "learning_rate": 1.959490325771157e-05, "loss": 0.0097, "step": 26220 }, { "epoch": 0.8113370835136304, "grad_norm": 0.16355004906654358, "learning_rate": 1.9594439636520987e-05, "loss": 0.0108, "step": 26250 }, { "epoch": 0.8122643258947889, "grad_norm": 0.12253107875585556, "learning_rate": 1.959397601533041e-05, "loss": 0.0104, "step": 26280 }, { "epoch": 0.8131915682759473, "grad_norm": 0.15832988917827606, "learning_rate": 1.959351239413983e-05, "loss": 0.0102, "step": 26310 }, { "epoch": 0.8141188106571058, "grad_norm": 0.12542270123958588, "learning_rate": 1.9593048772949252e-05, "loss": 0.0097, "step": 26340 }, { "epoch": 0.8150460530382642, "grad_norm": 0.14216552674770355, "learning_rate": 1.9592585151758673e-05, "loss": 0.0099, "step": 26370 }, { "epoch": 0.8159732954194227, "grad_norm": 0.1342097669839859, "learning_rate": 1.959212153056809e-05, "loss": 0.0112, "step": 26400 }, { "epoch": 0.816900537800581, "grad_norm": 0.1554272472858429, "learning_rate": 1.9591657909377513e-05, "loss": 0.0099, "step": 26430 }, { "epoch": 0.8178277801817395, "grad_norm": 0.1246778666973114, "learning_rate": 1.9591194288186934e-05, "loss": 0.0102, "step": 26460 }, { "epoch": 0.8187550225628979, "grad_norm": 0.13803289830684662, "learning_rate": 1.9590730666996353e-05, "loss": 0.0116, "step": 26490 }, { "epoch": 0.8196822649440564, "grad_norm": 0.1459980309009552, "learning_rate": 1.9590267045805774e-05, "loss": 0.0101, "step": 26520 }, { "epoch": 0.8206095073252148, "grad_norm": 0.15976214408874512, "learning_rate": 1.9589803424615196e-05, "loss": 0.0108, "step": 26550 }, { "epoch": 0.8215367497063732, "grad_norm": 0.18104307353496552, "learning_rate": 1.9589339803424617e-05, "loss": 0.0103, "step": 26580 }, { "epoch": 0.8224639920875316, "grad_norm": 0.15361934900283813, "learning_rate": 1.958887618223404e-05, "loss": 0.0105, "step": 26610 }, { "epoch": 0.8233912344686901, "grad_norm": 0.12334848195314407, "learning_rate": 1.958841256104346e-05, "loss": 0.0103, "step": 26640 }, { "epoch": 0.8243184768498486, "grad_norm": 0.13135434687137604, "learning_rate": 1.9587948939852878e-05, "loss": 0.0105, "step": 26670 }, { "epoch": 0.825245719231007, "grad_norm": 0.17657335102558136, "learning_rate": 1.95874853186623e-05, "loss": 0.0109, "step": 26700 }, { "epoch": 0.8261729616121655, "grad_norm": 0.14345771074295044, "learning_rate": 1.958702169747172e-05, "loss": 0.0101, "step": 26730 }, { "epoch": 0.8271002039933238, "grad_norm": 0.15636752545833588, "learning_rate": 1.9586558076281143e-05, "loss": 0.0094, "step": 26760 }, { "epoch": 0.8280274463744823, "grad_norm": 0.1724989414215088, "learning_rate": 1.9586094455090564e-05, "loss": 0.0105, "step": 26790 }, { "epoch": 0.8289546887556407, "grad_norm": 0.12026019394397736, "learning_rate": 1.9585630833899982e-05, "loss": 0.0104, "step": 26820 }, { "epoch": 0.8298819311367992, "grad_norm": 0.11256767064332962, "learning_rate": 1.9585167212709404e-05, "loss": 0.0101, "step": 26850 }, { "epoch": 0.8308091735179576, "grad_norm": 0.1131991520524025, "learning_rate": 1.9584703591518825e-05, "loss": 0.0098, "step": 26880 }, { "epoch": 0.8317364158991161, "grad_norm": 0.11625966429710388, "learning_rate": 1.9584239970328243e-05, "loss": 0.0115, "step": 26910 }, { "epoch": 0.8326636582802744, "grad_norm": 0.1800490766763687, "learning_rate": 1.9583776349137665e-05, "loss": 0.0114, "step": 26940 }, { "epoch": 0.8335909006614329, "grad_norm": 0.12596991658210754, "learning_rate": 1.9583312727947086e-05, "loss": 0.0112, "step": 26970 }, { "epoch": 0.8345181430425913, "grad_norm": 0.13882051408290863, "learning_rate": 1.9582849106756508e-05, "loss": 0.0111, "step": 27000 }, { "epoch": 0.8354453854237498, "grad_norm": 0.12672291696071625, "learning_rate": 1.958238548556593e-05, "loss": 0.0098, "step": 27030 }, { "epoch": 0.8363726278049082, "grad_norm": 0.14286762475967407, "learning_rate": 1.9581921864375348e-05, "loss": 0.0106, "step": 27060 }, { "epoch": 0.8372998701860667, "grad_norm": 0.13392741978168488, "learning_rate": 1.958145824318477e-05, "loss": 0.0109, "step": 27090 }, { "epoch": 0.838227112567225, "grad_norm": 0.13683576881885529, "learning_rate": 1.958099462199419e-05, "loss": 0.0098, "step": 27120 }, { "epoch": 0.8391543549483835, "grad_norm": 0.1763872504234314, "learning_rate": 1.9580531000803612e-05, "loss": 0.0116, "step": 27150 }, { "epoch": 0.8400815973295419, "grad_norm": 0.1336519420146942, "learning_rate": 1.9580067379613034e-05, "loss": 0.0102, "step": 27180 }, { "epoch": 0.8410088397107004, "grad_norm": 0.171179860830307, "learning_rate": 1.9579603758422455e-05, "loss": 0.0101, "step": 27210 }, { "epoch": 0.8419360820918588, "grad_norm": 0.11407764256000519, "learning_rate": 1.9579140137231873e-05, "loss": 0.01, "step": 27240 }, { "epoch": 0.8428633244730173, "grad_norm": 0.1523221880197525, "learning_rate": 1.9578676516041295e-05, "loss": 0.0094, "step": 27270 }, { "epoch": 0.8437905668541756, "grad_norm": 0.13665278255939484, "learning_rate": 1.9578212894850716e-05, "loss": 0.0101, "step": 27300 }, { "epoch": 0.8447178092353341, "grad_norm": 0.1470496654510498, "learning_rate": 1.9577749273660134e-05, "loss": 0.0113, "step": 27330 }, { "epoch": 0.8456450516164925, "grad_norm": 0.12531396746635437, "learning_rate": 1.9577301106509242e-05, "loss": 0.0105, "step": 27360 }, { "epoch": 0.846572293997651, "grad_norm": 0.1426687389612198, "learning_rate": 1.9576837485318664e-05, "loss": 0.0101, "step": 27390 }, { "epoch": 0.8474995363788094, "grad_norm": 0.13463148474693298, "learning_rate": 1.9576373864128085e-05, "loss": 0.0097, "step": 27420 }, { "epoch": 0.8484267787599679, "grad_norm": 0.13500350713729858, "learning_rate": 1.9575910242937507e-05, "loss": 0.0095, "step": 27450 }, { "epoch": 0.8493540211411262, "grad_norm": 0.11668331921100616, "learning_rate": 1.9575446621746925e-05, "loss": 0.0104, "step": 27480 }, { "epoch": 0.8502812635222847, "grad_norm": 0.18076808750629425, "learning_rate": 1.9574983000556346e-05, "loss": 0.0107, "step": 27510 }, { "epoch": 0.8512085059034432, "grad_norm": 0.1427670270204544, "learning_rate": 1.9574519379365768e-05, "loss": 0.0105, "step": 27540 }, { "epoch": 0.8521357482846016, "grad_norm": 0.11931096017360687, "learning_rate": 1.957405575817519e-05, "loss": 0.0114, "step": 27570 }, { "epoch": 0.8530629906657601, "grad_norm": 0.14580246806144714, "learning_rate": 1.957359213698461e-05, "loss": 0.0111, "step": 27600 }, { "epoch": 0.8539902330469185, "grad_norm": 0.11945343017578125, "learning_rate": 1.9573128515794032e-05, "loss": 0.0105, "step": 27630 }, { "epoch": 0.854917475428077, "grad_norm": 0.12932758033275604, "learning_rate": 1.957266489460345e-05, "loss": 0.0101, "step": 27660 }, { "epoch": 0.8558447178092353, "grad_norm": 0.17068512737751007, "learning_rate": 1.9572201273412872e-05, "loss": 0.0114, "step": 27690 }, { "epoch": 0.8567719601903938, "grad_norm": 0.10813874751329422, "learning_rate": 1.9571737652222294e-05, "loss": 0.0115, "step": 27720 }, { "epoch": 0.8576992025715522, "grad_norm": 0.11785928905010223, "learning_rate": 1.9571274031031712e-05, "loss": 0.0111, "step": 27750 }, { "epoch": 0.8586264449527107, "grad_norm": 0.15302570164203644, "learning_rate": 1.9570810409841133e-05, "loss": 0.0101, "step": 27780 }, { "epoch": 0.859553687333869, "grad_norm": 0.11724339425563812, "learning_rate": 1.9570346788650555e-05, "loss": 0.011, "step": 27810 }, { "epoch": 0.8604809297150275, "grad_norm": 0.13124890625476837, "learning_rate": 1.9569883167459976e-05, "loss": 0.0102, "step": 27840 }, { "epoch": 0.8614081720961859, "grad_norm": 0.14294826984405518, "learning_rate": 1.9569419546269398e-05, "loss": 0.0104, "step": 27870 }, { "epoch": 0.8623354144773444, "grad_norm": 0.11142890900373459, "learning_rate": 1.9568955925078816e-05, "loss": 0.0112, "step": 27900 }, { "epoch": 0.8632626568585028, "grad_norm": 0.1277155727148056, "learning_rate": 1.9568492303888237e-05, "loss": 0.0106, "step": 27930 }, { "epoch": 0.8641898992396613, "grad_norm": 0.11281192302703857, "learning_rate": 1.956802868269766e-05, "loss": 0.0106, "step": 27960 }, { "epoch": 0.8651171416208197, "grad_norm": 0.12655314803123474, "learning_rate": 1.956756506150708e-05, "loss": 0.0102, "step": 27990 }, { "epoch": 0.8660443840019781, "grad_norm": 0.12139441072940826, "learning_rate": 1.9567101440316502e-05, "loss": 0.0102, "step": 28020 }, { "epoch": 0.8669716263831365, "grad_norm": 0.1479601263999939, "learning_rate": 1.9566637819125923e-05, "loss": 0.0095, "step": 28050 }, { "epoch": 0.867898868764295, "grad_norm": 0.17215463519096375, "learning_rate": 1.956617419793534e-05, "loss": 0.0101, "step": 28080 }, { "epoch": 0.8688261111454534, "grad_norm": 0.1311594545841217, "learning_rate": 1.9565710576744763e-05, "loss": 0.0103, "step": 28110 }, { "epoch": 0.8697533535266119, "grad_norm": 0.15616732835769653, "learning_rate": 1.956524695555418e-05, "loss": 0.0111, "step": 28140 }, { "epoch": 0.8706805959077702, "grad_norm": 0.10624350607395172, "learning_rate": 1.9564783334363603e-05, "loss": 0.0109, "step": 28170 }, { "epoch": 0.8716078382889287, "grad_norm": 0.10852344334125519, "learning_rate": 1.9564319713173024e-05, "loss": 0.0104, "step": 28200 }, { "epoch": 0.8725350806700871, "grad_norm": 0.12826849520206451, "learning_rate": 1.9563856091982446e-05, "loss": 0.0105, "step": 28230 }, { "epoch": 0.8734623230512456, "grad_norm": 0.1460951268672943, "learning_rate": 1.9563392470791867e-05, "loss": 0.0092, "step": 28260 }, { "epoch": 0.874389565432404, "grad_norm": 0.1467992514371872, "learning_rate": 1.956292884960129e-05, "loss": 0.0112, "step": 28290 }, { "epoch": 0.8753168078135625, "grad_norm": 0.15741851925849915, "learning_rate": 1.9562465228410707e-05, "loss": 0.0111, "step": 28320 }, { "epoch": 0.8762440501947208, "grad_norm": 0.1173175722360611, "learning_rate": 1.9562001607220128e-05, "loss": 0.0109, "step": 28350 }, { "epoch": 0.8771712925758793, "grad_norm": 0.20475371181964874, "learning_rate": 1.956153798602955e-05, "loss": 0.0107, "step": 28380 }, { "epoch": 0.8780985349570378, "grad_norm": 0.12673446536064148, "learning_rate": 1.956107436483897e-05, "loss": 0.0096, "step": 28410 }, { "epoch": 0.8790257773381962, "grad_norm": 0.19214878976345062, "learning_rate": 1.9560610743648393e-05, "loss": 0.0097, "step": 28440 }, { "epoch": 0.8799530197193547, "grad_norm": 0.11743417382240295, "learning_rate": 1.956014712245781e-05, "loss": 0.0107, "step": 28470 }, { "epoch": 0.8808802621005131, "grad_norm": 0.12940219044685364, "learning_rate": 1.9559683501267232e-05, "loss": 0.0111, "step": 28500 }, { "epoch": 0.8818075044816716, "grad_norm": 0.11061205714941025, "learning_rate": 1.9559219880076654e-05, "loss": 0.01, "step": 28530 }, { "epoch": 0.8827347468628299, "grad_norm": 0.1848234385251999, "learning_rate": 1.9558756258886072e-05, "loss": 0.0099, "step": 28560 }, { "epoch": 0.8836619892439884, "grad_norm": 0.17153625190258026, "learning_rate": 1.9558292637695494e-05, "loss": 0.0109, "step": 28590 }, { "epoch": 0.8845892316251468, "grad_norm": 0.11701735854148865, "learning_rate": 1.9557829016504915e-05, "loss": 0.0109, "step": 28620 }, { "epoch": 0.8855164740063053, "grad_norm": 0.12982513010501862, "learning_rate": 1.9557365395314337e-05, "loss": 0.0095, "step": 28650 }, { "epoch": 0.8864437163874637, "grad_norm": 0.13801456987857819, "learning_rate": 1.9556901774123758e-05, "loss": 0.0107, "step": 28680 }, { "epoch": 0.8873709587686222, "grad_norm": 0.09260576218366623, "learning_rate": 1.955643815293318e-05, "loss": 0.0105, "step": 28710 }, { "epoch": 0.8882982011497805, "grad_norm": 0.1039632260799408, "learning_rate": 1.9555974531742598e-05, "loss": 0.0108, "step": 28740 }, { "epoch": 0.889225443530939, "grad_norm": 0.1408616006374359, "learning_rate": 1.955551091055202e-05, "loss": 0.01, "step": 28770 }, { "epoch": 0.8901526859120974, "grad_norm": 0.13390640914440155, "learning_rate": 1.955504728936144e-05, "loss": 0.0105, "step": 28800 }, { "epoch": 0.8910799282932559, "grad_norm": 0.1330823004245758, "learning_rate": 1.9554583668170862e-05, "loss": 0.0098, "step": 28830 }, { "epoch": 0.8920071706744143, "grad_norm": 0.16644588112831116, "learning_rate": 1.9554120046980284e-05, "loss": 0.0105, "step": 28860 }, { "epoch": 0.8929344130555728, "grad_norm": 0.1822705715894699, "learning_rate": 1.9553656425789702e-05, "loss": 0.0099, "step": 28890 }, { "epoch": 0.8938616554367311, "grad_norm": 0.19486865401268005, "learning_rate": 1.9553192804599123e-05, "loss": 0.0101, "step": 28920 }, { "epoch": 0.8947888978178896, "grad_norm": 0.13516107201576233, "learning_rate": 1.9552729183408545e-05, "loss": 0.0112, "step": 28950 }, { "epoch": 0.895716140199048, "grad_norm": 0.1798097938299179, "learning_rate": 1.9552265562217966e-05, "loss": 0.0116, "step": 28980 }, { "epoch": 0.8966433825802065, "grad_norm": 0.11729099601507187, "learning_rate": 1.9551801941027388e-05, "loss": 0.0091, "step": 29010 }, { "epoch": 0.8975706249613649, "grad_norm": 0.11080125719308853, "learning_rate": 1.955133831983681e-05, "loss": 0.0107, "step": 29040 }, { "epoch": 0.8984978673425233, "grad_norm": 0.1238224059343338, "learning_rate": 1.9550874698646227e-05, "loss": 0.0097, "step": 29070 }, { "epoch": 0.8994251097236817, "grad_norm": 0.17074823379516602, "learning_rate": 1.955041107745565e-05, "loss": 0.0101, "step": 29100 }, { "epoch": 0.9003523521048402, "grad_norm": 0.13491256535053253, "learning_rate": 1.9549947456265067e-05, "loss": 0.0103, "step": 29130 }, { "epoch": 0.9012795944859986, "grad_norm": 0.14148780703544617, "learning_rate": 1.954948383507449e-05, "loss": 0.0102, "step": 29160 }, { "epoch": 0.9022068368671571, "grad_norm": 0.22930121421813965, "learning_rate": 1.954902021388391e-05, "loss": 0.0106, "step": 29190 }, { "epoch": 0.9031340792483155, "grad_norm": 0.16468314826488495, "learning_rate": 1.954855659269333e-05, "loss": 0.0109, "step": 29220 }, { "epoch": 0.904061321629474, "grad_norm": 0.1470540165901184, "learning_rate": 1.9548092971502753e-05, "loss": 0.0104, "step": 29250 }, { "epoch": 0.9049885640106324, "grad_norm": 0.08858689665794373, "learning_rate": 1.9547629350312175e-05, "loss": 0.0102, "step": 29280 }, { "epoch": 0.9059158063917908, "grad_norm": 0.09545585513114929, "learning_rate": 1.9547165729121593e-05, "loss": 0.0106, "step": 29310 }, { "epoch": 0.9068430487729493, "grad_norm": 0.1421983540058136, "learning_rate": 1.9546702107931014e-05, "loss": 0.0113, "step": 29340 }, { "epoch": 0.9077702911541077, "grad_norm": 0.1679130345582962, "learning_rate": 1.9546238486740436e-05, "loss": 0.0112, "step": 29370 }, { "epoch": 0.9086975335352662, "grad_norm": 0.09180338680744171, "learning_rate": 1.9545774865549857e-05, "loss": 0.0103, "step": 29400 }, { "epoch": 0.9096247759164245, "grad_norm": 0.11787530779838562, "learning_rate": 1.954531124435928e-05, "loss": 0.0102, "step": 29430 }, { "epoch": 0.910552018297583, "grad_norm": 0.16902294754981995, "learning_rate": 1.95448476231687e-05, "loss": 0.0105, "step": 29460 }, { "epoch": 0.9114792606787414, "grad_norm": 0.1642059087753296, "learning_rate": 1.954438400197812e-05, "loss": 0.0099, "step": 29490 }, { "epoch": 0.9124065030598999, "grad_norm": 0.11565437912940979, "learning_rate": 1.954392038078754e-05, "loss": 0.0105, "step": 29520 }, { "epoch": 0.9133337454410583, "grad_norm": 0.17358510196208954, "learning_rate": 1.9543456759596958e-05, "loss": 0.0105, "step": 29550 }, { "epoch": 0.9142609878222168, "grad_norm": 0.10795272141695023, "learning_rate": 1.954299313840638e-05, "loss": 0.0101, "step": 29580 }, { "epoch": 0.9151882302033751, "grad_norm": 0.11939966678619385, "learning_rate": 1.95425295172158e-05, "loss": 0.0101, "step": 29610 }, { "epoch": 0.9161154725845336, "grad_norm": 0.09388712048530579, "learning_rate": 1.9542065896025223e-05, "loss": 0.0101, "step": 29640 }, { "epoch": 0.917042714965692, "grad_norm": 0.1667623668909073, "learning_rate": 1.9541602274834644e-05, "loss": 0.0103, "step": 29670 }, { "epoch": 0.9179699573468505, "grad_norm": 0.16669020056724548, "learning_rate": 1.9541138653644066e-05, "loss": 0.0103, "step": 29700 }, { "epoch": 0.9188971997280089, "grad_norm": 0.1597282737493515, "learning_rate": 1.9540675032453484e-05, "loss": 0.011, "step": 29730 }, { "epoch": 0.9198244421091674, "grad_norm": 0.15668092668056488, "learning_rate": 1.9540211411262905e-05, "loss": 0.0105, "step": 29760 }, { "epoch": 0.9207516844903257, "grad_norm": 0.13728152215480804, "learning_rate": 1.9539747790072327e-05, "loss": 0.0105, "step": 29790 }, { "epoch": 0.9216789268714842, "grad_norm": 0.10351379215717316, "learning_rate": 1.9539284168881748e-05, "loss": 0.0107, "step": 29820 }, { "epoch": 0.9226061692526426, "grad_norm": 0.11544520407915115, "learning_rate": 1.953882054769117e-05, "loss": 0.0098, "step": 29850 }, { "epoch": 0.9235334116338011, "grad_norm": 0.17946027219295502, "learning_rate": 1.9538356926500588e-05, "loss": 0.0099, "step": 29880 }, { "epoch": 0.9244606540149595, "grad_norm": 0.15539829432964325, "learning_rate": 1.953789330531001e-05, "loss": 0.0099, "step": 29910 }, { "epoch": 0.925387896396118, "grad_norm": 0.1369847059249878, "learning_rate": 1.953742968411943e-05, "loss": 0.0103, "step": 29940 }, { "epoch": 0.9263151387772763, "grad_norm": 0.18713867664337158, "learning_rate": 1.953696606292885e-05, "loss": 0.0109, "step": 29970 }, { "epoch": 0.9272423811584348, "grad_norm": 0.16122247278690338, "learning_rate": 1.953650244173827e-05, "loss": 0.0105, "step": 30000 }, { "epoch": 0.9281696235395932, "grad_norm": 0.21481163799762726, "learning_rate": 1.9536038820547692e-05, "loss": 0.0103, "step": 30030 }, { "epoch": 0.9290968659207517, "grad_norm": 0.15142469108104706, "learning_rate": 1.9535575199357113e-05, "loss": 0.0107, "step": 30060 }, { "epoch": 0.9300241083019101, "grad_norm": 0.17728132009506226, "learning_rate": 1.9535111578166535e-05, "loss": 0.0109, "step": 30090 }, { "epoch": 0.9309513506830686, "grad_norm": 0.12847939133644104, "learning_rate": 1.9534647956975956e-05, "loss": 0.0107, "step": 30120 }, { "epoch": 0.9318785930642269, "grad_norm": 0.14404620230197906, "learning_rate": 1.9534184335785375e-05, "loss": 0.0101, "step": 30150 }, { "epoch": 0.9328058354453854, "grad_norm": 0.10215981304645538, "learning_rate": 1.9533720714594796e-05, "loss": 0.0102, "step": 30180 }, { "epoch": 0.9337330778265439, "grad_norm": 0.10401603579521179, "learning_rate": 1.9533257093404218e-05, "loss": 0.0112, "step": 30210 }, { "epoch": 0.9346603202077023, "grad_norm": 0.13442771136760712, "learning_rate": 1.953279347221364e-05, "loss": 0.01, "step": 30240 }, { "epoch": 0.9355875625888608, "grad_norm": 0.14753110706806183, "learning_rate": 1.953232985102306e-05, "loss": 0.0095, "step": 30270 }, { "epoch": 0.9365148049700192, "grad_norm": 0.09361498057842255, "learning_rate": 1.953186622983248e-05, "loss": 0.0091, "step": 30300 }, { "epoch": 0.9374420473511776, "grad_norm": 0.14071495831012726, "learning_rate": 1.95314026086419e-05, "loss": 0.0102, "step": 30330 }, { "epoch": 0.938369289732336, "grad_norm": 0.1727423369884491, "learning_rate": 1.9530938987451322e-05, "loss": 0.0095, "step": 30360 }, { "epoch": 0.9392965321134945, "grad_norm": 0.3718450665473938, "learning_rate": 1.9530475366260743e-05, "loss": 0.0107, "step": 30390 }, { "epoch": 0.9402237744946529, "grad_norm": 0.12286089360713959, "learning_rate": 1.9530011745070165e-05, "loss": 0.0103, "step": 30420 }, { "epoch": 0.9411510168758114, "grad_norm": 0.1691594123840332, "learning_rate": 1.9529548123879586e-05, "loss": 0.0095, "step": 30450 }, { "epoch": 0.9420782592569698, "grad_norm": 0.14085109531879425, "learning_rate": 1.9529084502689004e-05, "loss": 0.0099, "step": 30480 }, { "epoch": 0.9430055016381282, "grad_norm": 0.18192769587039948, "learning_rate": 1.9528620881498426e-05, "loss": 0.0101, "step": 30510 }, { "epoch": 0.9439327440192866, "grad_norm": 0.12856383621692657, "learning_rate": 1.9528157260307844e-05, "loss": 0.0101, "step": 30540 }, { "epoch": 0.9448599864004451, "grad_norm": 0.14312247931957245, "learning_rate": 1.9527693639117266e-05, "loss": 0.01, "step": 30570 }, { "epoch": 0.9457872287816035, "grad_norm": 0.1339237540960312, "learning_rate": 1.9527230017926687e-05, "loss": 0.0095, "step": 30600 }, { "epoch": 0.946714471162762, "grad_norm": 0.10748368501663208, "learning_rate": 1.952676639673611e-05, "loss": 0.0107, "step": 30630 }, { "epoch": 0.9476417135439203, "grad_norm": 0.14337974786758423, "learning_rate": 1.952630277554553e-05, "loss": 0.01, "step": 30660 }, { "epoch": 0.9485689559250788, "grad_norm": 0.1824871301651001, "learning_rate": 1.952583915435495e-05, "loss": 0.0113, "step": 30690 }, { "epoch": 0.9494961983062372, "grad_norm": 0.17678356170654297, "learning_rate": 1.9525390987204056e-05, "loss": 0.0107, "step": 30720 }, { "epoch": 0.9504234406873957, "grad_norm": 0.3082156777381897, "learning_rate": 1.9524927366013478e-05, "loss": 0.0113, "step": 30750 }, { "epoch": 0.9513506830685541, "grad_norm": 0.1579640805721283, "learning_rate": 1.95244637448229e-05, "loss": 0.0099, "step": 30780 }, { "epoch": 0.9522779254497126, "grad_norm": 0.11085548251867294, "learning_rate": 1.9524000123632317e-05, "loss": 0.0102, "step": 30810 }, { "epoch": 0.953205167830871, "grad_norm": 0.17379029095172882, "learning_rate": 1.952353650244174e-05, "loss": 0.0107, "step": 30840 }, { "epoch": 0.9541324102120294, "grad_norm": 0.15838640928268433, "learning_rate": 1.952307288125116e-05, "loss": 0.0105, "step": 30870 }, { "epoch": 0.9550596525931878, "grad_norm": 0.12278188019990921, "learning_rate": 1.9522609260060582e-05, "loss": 0.0092, "step": 30900 }, { "epoch": 0.9559868949743463, "grad_norm": 0.11618715524673462, "learning_rate": 1.9522145638870003e-05, "loss": 0.01, "step": 30930 }, { "epoch": 0.9569141373555047, "grad_norm": 0.17169547080993652, "learning_rate": 1.952168201767942e-05, "loss": 0.0104, "step": 30960 }, { "epoch": 0.9578413797366632, "grad_norm": 0.13035085797309875, "learning_rate": 1.9521218396488843e-05, "loss": 0.0106, "step": 30990 }, { "epoch": 0.9587686221178215, "grad_norm": 0.14783158898353577, "learning_rate": 1.9520754775298264e-05, "loss": 0.0104, "step": 31020 }, { "epoch": 0.95969586449898, "grad_norm": 0.1539580523967743, "learning_rate": 1.9520291154107686e-05, "loss": 0.0099, "step": 31050 }, { "epoch": 0.9606231068801385, "grad_norm": 0.12812523543834686, "learning_rate": 1.9519827532917107e-05, "loss": 0.0101, "step": 31080 }, { "epoch": 0.9615503492612969, "grad_norm": 0.15188440680503845, "learning_rate": 1.951936391172653e-05, "loss": 0.0098, "step": 31110 }, { "epoch": 0.9624775916424554, "grad_norm": 0.13555318117141724, "learning_rate": 1.9518900290535947e-05, "loss": 0.0102, "step": 31140 }, { "epoch": 0.9634048340236138, "grad_norm": 0.19393488764762878, "learning_rate": 1.951843666934537e-05, "loss": 0.0103, "step": 31170 }, { "epoch": 0.9643320764047723, "grad_norm": 0.11865700781345367, "learning_rate": 1.951797304815479e-05, "loss": 0.0106, "step": 31200 }, { "epoch": 0.9652593187859306, "grad_norm": 0.11719061434268951, "learning_rate": 1.9517509426964208e-05, "loss": 0.0104, "step": 31230 }, { "epoch": 0.9661865611670891, "grad_norm": 0.12498758733272552, "learning_rate": 1.951704580577363e-05, "loss": 0.0099, "step": 31260 }, { "epoch": 0.9671138035482475, "grad_norm": 0.12046651542186737, "learning_rate": 1.951658218458305e-05, "loss": 0.0103, "step": 31290 }, { "epoch": 0.968041045929406, "grad_norm": 0.11983208358287811, "learning_rate": 1.9516118563392473e-05, "loss": 0.0104, "step": 31320 }, { "epoch": 0.9689682883105644, "grad_norm": 0.20224547386169434, "learning_rate": 1.9515654942201894e-05, "loss": 0.0111, "step": 31350 }, { "epoch": 0.9698955306917229, "grad_norm": 0.17893078923225403, "learning_rate": 1.9515191321011312e-05, "loss": 0.0108, "step": 31380 }, { "epoch": 0.9708227730728812, "grad_norm": 0.11806561052799225, "learning_rate": 1.9514727699820734e-05, "loss": 0.0099, "step": 31410 }, { "epoch": 0.9717500154540397, "grad_norm": 0.1670357584953308, "learning_rate": 1.9514264078630155e-05, "loss": 0.0106, "step": 31440 }, { "epoch": 0.9726772578351981, "grad_norm": 0.1309916377067566, "learning_rate": 1.9513800457439577e-05, "loss": 0.0103, "step": 31470 }, { "epoch": 0.9736045002163566, "grad_norm": 0.11974647641181946, "learning_rate": 1.9513336836248998e-05, "loss": 0.011, "step": 31500 }, { "epoch": 0.974531742597515, "grad_norm": 0.18252629041671753, "learning_rate": 1.951287321505842e-05, "loss": 0.0111, "step": 31530 }, { "epoch": 0.9754589849786734, "grad_norm": 0.14146754145622253, "learning_rate": 1.9512409593867838e-05, "loss": 0.0095, "step": 31560 }, { "epoch": 0.9763862273598318, "grad_norm": 0.1251847743988037, "learning_rate": 1.951194597267726e-05, "loss": 0.0102, "step": 31590 }, { "epoch": 0.9773134697409903, "grad_norm": 0.14479295909404755, "learning_rate": 1.9511482351486678e-05, "loss": 0.0111, "step": 31620 }, { "epoch": 0.9782407121221487, "grad_norm": 0.13783372938632965, "learning_rate": 1.9511018730296102e-05, "loss": 0.0105, "step": 31650 }, { "epoch": 0.9791679545033072, "grad_norm": 0.14375264942646027, "learning_rate": 1.9510555109105524e-05, "loss": 0.0107, "step": 31680 }, { "epoch": 0.9800951968844656, "grad_norm": 0.2233307659626007, "learning_rate": 1.9510091487914942e-05, "loss": 0.0103, "step": 31710 }, { "epoch": 0.981022439265624, "grad_norm": 0.1705060750246048, "learning_rate": 1.9509627866724364e-05, "loss": 0.0103, "step": 31740 }, { "epoch": 0.9819496816467824, "grad_norm": 0.15316961705684662, "learning_rate": 1.9509164245533785e-05, "loss": 0.0099, "step": 31770 }, { "epoch": 0.9828769240279409, "grad_norm": 0.11986293643712997, "learning_rate": 1.9508700624343203e-05, "loss": 0.0093, "step": 31800 }, { "epoch": 0.9838041664090993, "grad_norm": 0.1639423817396164, "learning_rate": 1.9508237003152625e-05, "loss": 0.0103, "step": 31830 }, { "epoch": 0.9847314087902578, "grad_norm": 0.1444067656993866, "learning_rate": 1.9507773381962046e-05, "loss": 0.0114, "step": 31860 }, { "epoch": 0.9856586511714162, "grad_norm": 0.1159743145108223, "learning_rate": 1.9507309760771468e-05, "loss": 0.0101, "step": 31890 }, { "epoch": 0.9865858935525746, "grad_norm": 0.14385798573493958, "learning_rate": 1.950684613958089e-05, "loss": 0.011, "step": 31920 }, { "epoch": 0.9875131359337331, "grad_norm": 0.14646178483963013, "learning_rate": 1.9506382518390307e-05, "loss": 0.0104, "step": 31950 }, { "epoch": 0.9884403783148915, "grad_norm": 0.1557178944349289, "learning_rate": 1.950591889719973e-05, "loss": 0.0105, "step": 31980 }, { "epoch": 0.98936762069605, "grad_norm": 0.14214742183685303, "learning_rate": 1.950545527600915e-05, "loss": 0.011, "step": 32010 }, { "epoch": 0.9902948630772084, "grad_norm": 0.1310066133737564, "learning_rate": 1.9504991654818572e-05, "loss": 0.0106, "step": 32040 }, { "epoch": 0.9912221054583669, "grad_norm": 0.127274289727211, "learning_rate": 1.9504528033627993e-05, "loss": 0.0102, "step": 32070 }, { "epoch": 0.9921493478395252, "grad_norm": 0.14554499089717865, "learning_rate": 1.9504064412437415e-05, "loss": 0.0103, "step": 32100 }, { "epoch": 0.9930765902206837, "grad_norm": 0.10675456374883652, "learning_rate": 1.9503600791246833e-05, "loss": 0.0095, "step": 32130 }, { "epoch": 0.9940038326018421, "grad_norm": 0.10777793079614639, "learning_rate": 1.9503137170056254e-05, "loss": 0.0097, "step": 32160 }, { "epoch": 0.9949310749830006, "grad_norm": 0.1751488894224167, "learning_rate": 1.9502673548865676e-05, "loss": 0.0096, "step": 32190 }, { "epoch": 0.995858317364159, "grad_norm": 0.09978543221950531, "learning_rate": 1.9502209927675094e-05, "loss": 0.0109, "step": 32220 }, { "epoch": 0.9967855597453175, "grad_norm": 0.16980716586112976, "learning_rate": 1.9501746306484516e-05, "loss": 0.0103, "step": 32250 }, { "epoch": 0.9977128021264758, "grad_norm": 0.10347730666399002, "learning_rate": 1.9501282685293937e-05, "loss": 0.0101, "step": 32280 }, { "epoch": 0.9986400445076343, "grad_norm": 0.12681429088115692, "learning_rate": 1.950081906410336e-05, "loss": 0.0114, "step": 32310 }, { "epoch": 0.9995672868887927, "grad_norm": 0.11633099615573883, "learning_rate": 1.950035544291278e-05, "loss": 0.0107, "step": 32340 }, { "epoch": 1.0004945292699512, "grad_norm": 0.1388934701681137, "learning_rate": 1.9499891821722198e-05, "loss": 0.0109, "step": 32370 }, { "epoch": 1.0014217716511096, "grad_norm": 0.09523776918649673, "learning_rate": 1.949942820053162e-05, "loss": 0.0097, "step": 32400 }, { "epoch": 1.002349014032268, "grad_norm": 0.15373149514198303, "learning_rate": 1.949896457934104e-05, "loss": 0.0106, "step": 32430 }, { "epoch": 1.0032762564134265, "grad_norm": 0.14451244473457336, "learning_rate": 1.9498500958150463e-05, "loss": 0.0101, "step": 32460 }, { "epoch": 1.004203498794585, "grad_norm": 0.12027052789926529, "learning_rate": 1.9498037336959884e-05, "loss": 0.0107, "step": 32490 }, { "epoch": 1.0051307411757433, "grad_norm": 0.15820251405239105, "learning_rate": 1.9497573715769306e-05, "loss": 0.0096, "step": 32520 }, { "epoch": 1.0060579835569017, "grad_norm": 0.1072096973657608, "learning_rate": 1.9497110094578724e-05, "loss": 0.0106, "step": 32550 }, { "epoch": 1.0069852259380603, "grad_norm": 0.14151984453201294, "learning_rate": 1.9496646473388145e-05, "loss": 0.01, "step": 32580 }, { "epoch": 1.0079124683192187, "grad_norm": 0.26245638728141785, "learning_rate": 1.9496182852197567e-05, "loss": 0.0108, "step": 32610 }, { "epoch": 1.008839710700377, "grad_norm": 0.13837318122386932, "learning_rate": 1.9495719231006985e-05, "loss": 0.0096, "step": 32640 }, { "epoch": 1.0097669530815354, "grad_norm": 0.1483849734067917, "learning_rate": 1.9495255609816407e-05, "loss": 0.0092, "step": 32670 }, { "epoch": 1.010694195462694, "grad_norm": 0.1291530877351761, "learning_rate": 1.9494791988625828e-05, "loss": 0.0104, "step": 32700 }, { "epoch": 1.0116214378438524, "grad_norm": 0.17314477264881134, "learning_rate": 1.949432836743525e-05, "loss": 0.0098, "step": 32730 }, { "epoch": 1.0125486802250108, "grad_norm": 0.1739080846309662, "learning_rate": 1.949386474624467e-05, "loss": 0.0099, "step": 32760 }, { "epoch": 1.0134759226061691, "grad_norm": 0.15653568506240845, "learning_rate": 1.949340112505409e-05, "loss": 0.0101, "step": 32790 }, { "epoch": 1.0144031649873277, "grad_norm": 0.153305783867836, "learning_rate": 1.949293750386351e-05, "loss": 0.0098, "step": 32820 }, { "epoch": 1.0153304073684861, "grad_norm": 0.13061876595020294, "learning_rate": 1.9492473882672932e-05, "loss": 0.0098, "step": 32850 }, { "epoch": 1.0162576497496445, "grad_norm": 0.13316050171852112, "learning_rate": 1.9492010261482354e-05, "loss": 0.0097, "step": 32880 }, { "epoch": 1.017184892130803, "grad_norm": 0.2807864248752594, "learning_rate": 1.9491546640291775e-05, "loss": 0.0106, "step": 32910 }, { "epoch": 1.0181121345119615, "grad_norm": 0.16865280270576477, "learning_rate": 1.9491083019101197e-05, "loss": 0.0102, "step": 32940 }, { "epoch": 1.0190393768931199, "grad_norm": 0.1773238331079483, "learning_rate": 1.9490619397910615e-05, "loss": 0.0102, "step": 32970 }, { "epoch": 1.0199666192742782, "grad_norm": 0.1225300282239914, "learning_rate": 1.9490155776720036e-05, "loss": 0.01, "step": 33000 }, { "epoch": 1.0208938616554368, "grad_norm": 0.14690649509429932, "learning_rate": 1.9489692155529454e-05, "loss": 0.0102, "step": 33030 }, { "epoch": 1.0218211040365952, "grad_norm": 0.12160659581422806, "learning_rate": 1.948922853433888e-05, "loss": 0.0103, "step": 33060 }, { "epoch": 1.0227483464177536, "grad_norm": 0.14907792210578918, "learning_rate": 1.94887649131483e-05, "loss": 0.0099, "step": 33090 }, { "epoch": 1.023675588798912, "grad_norm": 0.14165416359901428, "learning_rate": 1.948830129195772e-05, "loss": 0.0094, "step": 33120 }, { "epoch": 1.0246028311800706, "grad_norm": 0.1446605771780014, "learning_rate": 1.948783767076714e-05, "loss": 0.0093, "step": 33150 }, { "epoch": 1.025530073561229, "grad_norm": 0.12140592932701111, "learning_rate": 1.9487374049576562e-05, "loss": 0.0099, "step": 33180 }, { "epoch": 1.0264573159423873, "grad_norm": 0.11077461391687393, "learning_rate": 1.948691042838598e-05, "loss": 0.0099, "step": 33210 }, { "epoch": 1.0273845583235457, "grad_norm": 0.14734628796577454, "learning_rate": 1.94864468071954e-05, "loss": 0.0096, "step": 33240 }, { "epoch": 1.0283118007047043, "grad_norm": 0.10921402275562286, "learning_rate": 1.9485983186004823e-05, "loss": 0.01, "step": 33270 }, { "epoch": 1.0292390430858627, "grad_norm": 0.09588339179754257, "learning_rate": 1.9485519564814245e-05, "loss": 0.0102, "step": 33300 }, { "epoch": 1.030166285467021, "grad_norm": 0.15871886909008026, "learning_rate": 1.9485055943623666e-05, "loss": 0.011, "step": 33330 }, { "epoch": 1.0310935278481794, "grad_norm": 0.16036361455917358, "learning_rate": 1.9484592322433084e-05, "loss": 0.0092, "step": 33360 }, { "epoch": 1.032020770229338, "grad_norm": 0.13292501866817474, "learning_rate": 1.9484128701242506e-05, "loss": 0.0102, "step": 33390 }, { "epoch": 1.0329480126104964, "grad_norm": 0.15553243458271027, "learning_rate": 1.9483665080051927e-05, "loss": 0.011, "step": 33420 }, { "epoch": 1.0338752549916548, "grad_norm": 0.17727160453796387, "learning_rate": 1.948320145886135e-05, "loss": 0.01, "step": 33450 }, { "epoch": 1.0348024973728132, "grad_norm": 0.10606896132230759, "learning_rate": 1.948273783767077e-05, "loss": 0.0096, "step": 33480 }, { "epoch": 1.0357297397539718, "grad_norm": 0.17466050386428833, "learning_rate": 1.9482274216480192e-05, "loss": 0.01, "step": 33510 }, { "epoch": 1.0366569821351301, "grad_norm": 0.14464224874973297, "learning_rate": 1.948181059528961e-05, "loss": 0.0103, "step": 33540 }, { "epoch": 1.0375842245162885, "grad_norm": 0.20701110363006592, "learning_rate": 1.948134697409903e-05, "loss": 0.0089, "step": 33570 }, { "epoch": 1.0385114668974469, "grad_norm": 0.12086112797260284, "learning_rate": 1.948089880694814e-05, "loss": 0.0101, "step": 33600 }, { "epoch": 1.0394387092786055, "grad_norm": 0.11641248315572739, "learning_rate": 1.9480435185757557e-05, "loss": 0.0108, "step": 33630 }, { "epoch": 1.0403659516597639, "grad_norm": 0.1789770871400833, "learning_rate": 1.947997156456698e-05, "loss": 0.0097, "step": 33660 }, { "epoch": 1.0412931940409222, "grad_norm": 0.11733080446720123, "learning_rate": 1.94795079433764e-05, "loss": 0.0095, "step": 33690 }, { "epoch": 1.0422204364220806, "grad_norm": 0.1260131150484085, "learning_rate": 1.9479044322185822e-05, "loss": 0.0095, "step": 33720 }, { "epoch": 1.0431476788032392, "grad_norm": 0.1030770018696785, "learning_rate": 1.9478580700995243e-05, "loss": 0.0099, "step": 33750 }, { "epoch": 1.0440749211843976, "grad_norm": 0.11511421203613281, "learning_rate": 1.947811707980466e-05, "loss": 0.0104, "step": 33780 }, { "epoch": 1.045002163565556, "grad_norm": 0.14781862497329712, "learning_rate": 1.9477653458614083e-05, "loss": 0.0098, "step": 33810 }, { "epoch": 1.0459294059467146, "grad_norm": 0.17974963784217834, "learning_rate": 1.9477189837423505e-05, "loss": 0.0103, "step": 33840 }, { "epoch": 1.046856648327873, "grad_norm": 0.1342313587665558, "learning_rate": 1.9476726216232923e-05, "loss": 0.0101, "step": 33870 }, { "epoch": 1.0477838907090313, "grad_norm": 0.1080104187130928, "learning_rate": 1.9476262595042344e-05, "loss": 0.0103, "step": 33900 }, { "epoch": 1.0487111330901897, "grad_norm": 0.09354769438505173, "learning_rate": 1.9475798973851766e-05, "loss": 0.0096, "step": 33930 }, { "epoch": 1.0496383754713483, "grad_norm": 0.14355513453483582, "learning_rate": 1.9475335352661187e-05, "loss": 0.0106, "step": 33960 }, { "epoch": 1.0505656178525067, "grad_norm": 0.1416493058204651, "learning_rate": 1.947487173147061e-05, "loss": 0.0101, "step": 33990 }, { "epoch": 1.051492860233665, "grad_norm": 0.09909727424383163, "learning_rate": 1.947440811028003e-05, "loss": 0.0101, "step": 34020 }, { "epoch": 1.0524201026148234, "grad_norm": 0.17160369455814362, "learning_rate": 1.947394448908945e-05, "loss": 0.0098, "step": 34050 }, { "epoch": 1.053347344995982, "grad_norm": 0.15703952312469482, "learning_rate": 1.947348086789887e-05, "loss": 0.0098, "step": 34080 }, { "epoch": 1.0542745873771404, "grad_norm": 0.11413668096065521, "learning_rate": 1.947301724670829e-05, "loss": 0.0103, "step": 34110 }, { "epoch": 1.0552018297582988, "grad_norm": 0.13696733117103577, "learning_rate": 1.9472553625517713e-05, "loss": 0.0105, "step": 34140 }, { "epoch": 1.0561290721394572, "grad_norm": 0.1135011687874794, "learning_rate": 1.9472090004327134e-05, "loss": 0.0096, "step": 34170 }, { "epoch": 1.0570563145206158, "grad_norm": 0.18881669640541077, "learning_rate": 1.9471626383136552e-05, "loss": 0.0105, "step": 34200 }, { "epoch": 1.0579835569017741, "grad_norm": 0.13246992230415344, "learning_rate": 1.9471162761945974e-05, "loss": 0.0109, "step": 34230 }, { "epoch": 1.0589107992829325, "grad_norm": 0.16972258687019348, "learning_rate": 1.9470699140755395e-05, "loss": 0.0095, "step": 34260 }, { "epoch": 1.059838041664091, "grad_norm": 0.18332742154598236, "learning_rate": 1.9470235519564814e-05, "loss": 0.0098, "step": 34290 }, { "epoch": 1.0607652840452495, "grad_norm": 0.14079688489437103, "learning_rate": 1.9469771898374235e-05, "loss": 0.0101, "step": 34320 }, { "epoch": 1.0616925264264079, "grad_norm": 0.1401091367006302, "learning_rate": 1.946930827718366e-05, "loss": 0.0091, "step": 34350 }, { "epoch": 1.0626197688075663, "grad_norm": 0.1265421211719513, "learning_rate": 1.9468844655993078e-05, "loss": 0.0105, "step": 34380 }, { "epoch": 1.0635470111887246, "grad_norm": 0.1649467498064041, "learning_rate": 1.94683810348025e-05, "loss": 0.01, "step": 34410 }, { "epoch": 1.0644742535698832, "grad_norm": 0.17060862481594086, "learning_rate": 1.9467917413611918e-05, "loss": 0.0105, "step": 34440 }, { "epoch": 1.0654014959510416, "grad_norm": 0.13365741074085236, "learning_rate": 1.946745379242134e-05, "loss": 0.0101, "step": 34470 }, { "epoch": 1.0663287383322, "grad_norm": 0.13291510939598083, "learning_rate": 1.946699017123076e-05, "loss": 0.0099, "step": 34500 }, { "epoch": 1.0672559807133584, "grad_norm": 0.17610692977905273, "learning_rate": 1.9466526550040182e-05, "loss": 0.0096, "step": 34530 }, { "epoch": 1.068183223094517, "grad_norm": 0.10533946007490158, "learning_rate": 1.9466062928849604e-05, "loss": 0.0096, "step": 34560 }, { "epoch": 1.0691104654756753, "grad_norm": 0.12113987654447556, "learning_rate": 1.9465599307659025e-05, "loss": 0.0115, "step": 34590 }, { "epoch": 1.0700377078568337, "grad_norm": 0.12044841051101685, "learning_rate": 1.9465135686468443e-05, "loss": 0.0097, "step": 34620 }, { "epoch": 1.0709649502379923, "grad_norm": 0.22501035034656525, "learning_rate": 1.9464672065277865e-05, "loss": 0.01, "step": 34650 }, { "epoch": 1.0718921926191507, "grad_norm": 0.16986390948295593, "learning_rate": 1.9464208444087286e-05, "loss": 0.0105, "step": 34680 }, { "epoch": 1.072819435000309, "grad_norm": 0.15584400296211243, "learning_rate": 1.9463744822896708e-05, "loss": 0.0101, "step": 34710 }, { "epoch": 1.0737466773814675, "grad_norm": 0.1579132378101349, "learning_rate": 1.946328120170613e-05, "loss": 0.0098, "step": 34740 }, { "epoch": 1.074673919762626, "grad_norm": 0.1535457819700241, "learning_rate": 1.9462817580515548e-05, "loss": 0.0097, "step": 34770 }, { "epoch": 1.0756011621437844, "grad_norm": 0.13067880272865295, "learning_rate": 1.946235395932497e-05, "loss": 0.0099, "step": 34800 }, { "epoch": 1.0765284045249428, "grad_norm": 0.15930773317813873, "learning_rate": 1.946189033813439e-05, "loss": 0.0095, "step": 34830 }, { "epoch": 1.0774556469061012, "grad_norm": 0.13918161392211914, "learning_rate": 1.946142671694381e-05, "loss": 0.01, "step": 34860 }, { "epoch": 1.0783828892872598, "grad_norm": 0.16773083806037903, "learning_rate": 1.946096309575323e-05, "loss": 0.0097, "step": 34890 }, { "epoch": 1.0793101316684182, "grad_norm": 0.14369383454322815, "learning_rate": 1.946049947456265e-05, "loss": 0.0099, "step": 34920 }, { "epoch": 1.0802373740495765, "grad_norm": 0.15487195551395416, "learning_rate": 1.9460035853372073e-05, "loss": 0.01, "step": 34950 }, { "epoch": 1.081164616430735, "grad_norm": 0.12814001739025116, "learning_rate": 1.9459572232181495e-05, "loss": 0.0101, "step": 34980 }, { "epoch": 1.0820918588118935, "grad_norm": 0.1057596504688263, "learning_rate": 1.9459108610990916e-05, "loss": 0.0096, "step": 35010 }, { "epoch": 1.083019101193052, "grad_norm": 0.12797509133815765, "learning_rate": 1.9458644989800334e-05, "loss": 0.0103, "step": 35040 }, { "epoch": 1.0839463435742103, "grad_norm": 0.19044697284698486, "learning_rate": 1.9458181368609756e-05, "loss": 0.0102, "step": 35070 }, { "epoch": 1.0848735859553686, "grad_norm": 0.14435169100761414, "learning_rate": 1.9457717747419177e-05, "loss": 0.0096, "step": 35100 }, { "epoch": 1.0858008283365272, "grad_norm": 0.16554707288742065, "learning_rate": 1.94572541262286e-05, "loss": 0.0099, "step": 35130 }, { "epoch": 1.0867280707176856, "grad_norm": 0.10546158999204636, "learning_rate": 1.945679050503802e-05, "loss": 0.0095, "step": 35160 }, { "epoch": 1.087655313098844, "grad_norm": 0.10732924938201904, "learning_rate": 1.945632688384744e-05, "loss": 0.0094, "step": 35190 }, { "epoch": 1.0885825554800024, "grad_norm": 0.08728913962841034, "learning_rate": 1.945586326265686e-05, "loss": 0.0103, "step": 35220 }, { "epoch": 1.089509797861161, "grad_norm": 0.11772685497999191, "learning_rate": 1.945539964146628e-05, "loss": 0.0105, "step": 35250 }, { "epoch": 1.0904370402423194, "grad_norm": 0.1601562201976776, "learning_rate": 1.94549360202757e-05, "loss": 0.0109, "step": 35280 }, { "epoch": 1.0913642826234777, "grad_norm": 0.13381876051425934, "learning_rate": 1.945447239908512e-05, "loss": 0.0097, "step": 35310 }, { "epoch": 1.092291525004636, "grad_norm": 0.168488010764122, "learning_rate": 1.9454008777894543e-05, "loss": 0.0096, "step": 35340 }, { "epoch": 1.0932187673857947, "grad_norm": 0.11687427759170532, "learning_rate": 1.9453545156703964e-05, "loss": 0.0106, "step": 35370 }, { "epoch": 1.094146009766953, "grad_norm": 0.11201674491167068, "learning_rate": 1.9453081535513386e-05, "loss": 0.0097, "step": 35400 }, { "epoch": 1.0950732521481115, "grad_norm": 0.16657891869544983, "learning_rate": 1.9452617914322807e-05, "loss": 0.0103, "step": 35430 }, { "epoch": 1.09600049452927, "grad_norm": 0.1800008863210678, "learning_rate": 1.9452154293132225e-05, "loss": 0.01, "step": 35460 }, { "epoch": 1.0969277369104284, "grad_norm": 0.14142769575119019, "learning_rate": 1.9451690671941647e-05, "loss": 0.0101, "step": 35490 }, { "epoch": 1.0978549792915868, "grad_norm": 0.17022483050823212, "learning_rate": 1.9451227050751068e-05, "loss": 0.0097, "step": 35520 }, { "epoch": 1.0987822216727452, "grad_norm": 0.14693398773670197, "learning_rate": 1.945076342956049e-05, "loss": 0.0098, "step": 35550 }, { "epoch": 1.0997094640539036, "grad_norm": 0.19098339974880219, "learning_rate": 1.945029980836991e-05, "loss": 0.0098, "step": 35580 }, { "epoch": 1.1006367064350622, "grad_norm": 0.13580141961574554, "learning_rate": 1.944983618717933e-05, "loss": 0.0096, "step": 35610 }, { "epoch": 1.1015639488162206, "grad_norm": 0.1257988065481186, "learning_rate": 1.944937256598875e-05, "loss": 0.0103, "step": 35640 }, { "epoch": 1.102491191197379, "grad_norm": 0.10509399324655533, "learning_rate": 1.9448908944798172e-05, "loss": 0.0095, "step": 35670 }, { "epoch": 1.1034184335785375, "grad_norm": 0.131899893283844, "learning_rate": 1.944844532360759e-05, "loss": 0.0101, "step": 35700 }, { "epoch": 1.104345675959696, "grad_norm": 0.11872909218072891, "learning_rate": 1.9447981702417012e-05, "loss": 0.0099, "step": 35730 }, { "epoch": 1.1052729183408543, "grad_norm": 0.22213688492774963, "learning_rate": 1.9447518081226437e-05, "loss": 0.0103, "step": 35760 }, { "epoch": 1.1062001607220127, "grad_norm": 0.1313295215368271, "learning_rate": 1.9447054460035855e-05, "loss": 0.0103, "step": 35790 }, { "epoch": 1.1071274031031713, "grad_norm": 0.19214262068271637, "learning_rate": 1.9446590838845277e-05, "loss": 0.0097, "step": 35820 }, { "epoch": 1.1080546454843296, "grad_norm": 0.1615900993347168, "learning_rate": 1.9446127217654695e-05, "loss": 0.0096, "step": 35850 }, { "epoch": 1.108981887865488, "grad_norm": 0.1562500298023224, "learning_rate": 1.9445663596464116e-05, "loss": 0.0107, "step": 35880 }, { "epoch": 1.1099091302466464, "grad_norm": 0.18546907603740692, "learning_rate": 1.9445199975273538e-05, "loss": 0.0099, "step": 35910 }, { "epoch": 1.110836372627805, "grad_norm": 0.1612282693386078, "learning_rate": 1.944473635408296e-05, "loss": 0.0096, "step": 35940 }, { "epoch": 1.1117636150089634, "grad_norm": 0.14918465912342072, "learning_rate": 1.944427273289238e-05, "loss": 0.0104, "step": 35970 }, { "epoch": 1.1126908573901217, "grad_norm": 0.1451747864484787, "learning_rate": 1.9443809111701802e-05, "loss": 0.0097, "step": 36000 }, { "epoch": 1.1136180997712801, "grad_norm": 0.16418671607971191, "learning_rate": 1.944334549051122e-05, "loss": 0.0107, "step": 36030 }, { "epoch": 1.1145453421524387, "grad_norm": 0.14507657289505005, "learning_rate": 1.9442881869320642e-05, "loss": 0.0098, "step": 36060 }, { "epoch": 1.115472584533597, "grad_norm": 0.1588558554649353, "learning_rate": 1.9442418248130063e-05, "loss": 0.0102, "step": 36090 }, { "epoch": 1.1163998269147555, "grad_norm": 0.12200423330068588, "learning_rate": 1.9441954626939485e-05, "loss": 0.0095, "step": 36120 }, { "epoch": 1.1173270692959139, "grad_norm": 0.12481945008039474, "learning_rate": 1.9441491005748906e-05, "loss": 0.0103, "step": 36150 }, { "epoch": 1.1182543116770725, "grad_norm": 0.14485055208206177, "learning_rate": 1.9441027384558324e-05, "loss": 0.0093, "step": 36180 }, { "epoch": 1.1191815540582308, "grad_norm": 0.15107417106628418, "learning_rate": 1.9440563763367746e-05, "loss": 0.0096, "step": 36210 }, { "epoch": 1.1201087964393892, "grad_norm": 0.10571879893541336, "learning_rate": 1.9440100142177167e-05, "loss": 0.0099, "step": 36240 }, { "epoch": 1.1210360388205478, "grad_norm": 0.12914879620075226, "learning_rate": 1.9439636520986586e-05, "loss": 0.0096, "step": 36270 }, { "epoch": 1.1219632812017062, "grad_norm": 0.13391166925430298, "learning_rate": 1.9439172899796007e-05, "loss": 0.01, "step": 36300 }, { "epoch": 1.1228905235828646, "grad_norm": 0.14640313386917114, "learning_rate": 1.943870927860543e-05, "loss": 0.0099, "step": 36330 }, { "epoch": 1.123817765964023, "grad_norm": 0.1353132128715515, "learning_rate": 1.943824565741485e-05, "loss": 0.0092, "step": 36360 }, { "epoch": 1.1247450083451813, "grad_norm": 0.15348805487155914, "learning_rate": 1.943778203622427e-05, "loss": 0.0102, "step": 36390 }, { "epoch": 1.12567225072634, "grad_norm": 0.15322577953338623, "learning_rate": 1.9437318415033693e-05, "loss": 0.0089, "step": 36420 }, { "epoch": 1.1265994931074983, "grad_norm": 0.1272343248128891, "learning_rate": 1.943685479384311e-05, "loss": 0.0098, "step": 36450 }, { "epoch": 1.1275267354886567, "grad_norm": 0.11916729807853699, "learning_rate": 1.9436391172652533e-05, "loss": 0.0099, "step": 36480 }, { "epoch": 1.1284539778698153, "grad_norm": 0.14329126477241516, "learning_rate": 1.9435927551461954e-05, "loss": 0.0096, "step": 36510 }, { "epoch": 1.1293812202509736, "grad_norm": 0.11813582479953766, "learning_rate": 1.9435463930271376e-05, "loss": 0.0097, "step": 36540 }, { "epoch": 1.130308462632132, "grad_norm": 0.12455583363771439, "learning_rate": 1.9435000309080797e-05, "loss": 0.0088, "step": 36570 }, { "epoch": 1.1312357050132904, "grad_norm": 0.16248907148838043, "learning_rate": 1.9434536687890215e-05, "loss": 0.0103, "step": 36600 }, { "epoch": 1.132162947394449, "grad_norm": 0.15603284537792206, "learning_rate": 1.9434073066699637e-05, "loss": 0.0098, "step": 36630 }, { "epoch": 1.1330901897756074, "grad_norm": 0.08950382471084595, "learning_rate": 1.943360944550906e-05, "loss": 0.0096, "step": 36660 }, { "epoch": 1.1340174321567658, "grad_norm": 0.1440577656030655, "learning_rate": 1.9433145824318476e-05, "loss": 0.01, "step": 36690 }, { "epoch": 1.1349446745379241, "grad_norm": 0.15700002014636993, "learning_rate": 1.9432682203127898e-05, "loss": 0.0106, "step": 36720 }, { "epoch": 1.1358719169190827, "grad_norm": 0.09210638701915741, "learning_rate": 1.943221858193732e-05, "loss": 0.01, "step": 36750 }, { "epoch": 1.1367991593002411, "grad_norm": 0.14137540757656097, "learning_rate": 1.943175496074674e-05, "loss": 0.01, "step": 36780 }, { "epoch": 1.1377264016813995, "grad_norm": 0.18400785326957703, "learning_rate": 1.9431291339556162e-05, "loss": 0.0098, "step": 36810 }, { "epoch": 1.1386536440625579, "grad_norm": 0.2002107799053192, "learning_rate": 1.943082771836558e-05, "loss": 0.0103, "step": 36840 }, { "epoch": 1.1395808864437165, "grad_norm": 0.12342993170022964, "learning_rate": 1.9430364097175002e-05, "loss": 0.0097, "step": 36870 }, { "epoch": 1.1405081288248748, "grad_norm": 0.08840873092412949, "learning_rate": 1.9429900475984424e-05, "loss": 0.0106, "step": 36900 }, { "epoch": 1.1414353712060332, "grad_norm": 0.13715186715126038, "learning_rate": 1.9429436854793845e-05, "loss": 0.0097, "step": 36930 }, { "epoch": 1.1423626135871916, "grad_norm": 0.15597553551197052, "learning_rate": 1.9428973233603267e-05, "loss": 0.0098, "step": 36960 }, { "epoch": 1.1432898559683502, "grad_norm": 0.1217803955078125, "learning_rate": 1.9428509612412688e-05, "loss": 0.0098, "step": 36990 }, { "epoch": 1.1442170983495086, "grad_norm": 0.13489513099193573, "learning_rate": 1.9428045991222106e-05, "loss": 0.0097, "step": 37020 }, { "epoch": 1.145144340730667, "grad_norm": 0.10803089290857315, "learning_rate": 1.9427582370031528e-05, "loss": 0.0102, "step": 37050 }, { "epoch": 1.1460715831118256, "grad_norm": 0.14150722324848175, "learning_rate": 1.942711874884095e-05, "loss": 0.0096, "step": 37080 }, { "epoch": 1.146998825492984, "grad_norm": 0.0849340483546257, "learning_rate": 1.9426655127650367e-05, "loss": 0.0097, "step": 37110 }, { "epoch": 1.1479260678741423, "grad_norm": 0.12866099178791046, "learning_rate": 1.942619150645979e-05, "loss": 0.0102, "step": 37140 }, { "epoch": 1.1488533102553007, "grad_norm": 0.17727048695087433, "learning_rate": 1.942572788526921e-05, "loss": 0.0099, "step": 37170 }, { "epoch": 1.149780552636459, "grad_norm": 0.15304800868034363, "learning_rate": 1.9425264264078632e-05, "loss": 0.0108, "step": 37200 }, { "epoch": 1.1507077950176177, "grad_norm": 0.14143036305904388, "learning_rate": 1.9424800642888053e-05, "loss": 0.0106, "step": 37230 }, { "epoch": 1.151635037398776, "grad_norm": 0.15591411292552948, "learning_rate": 1.942433702169747e-05, "loss": 0.0098, "step": 37260 }, { "epoch": 1.1525622797799344, "grad_norm": 0.10836917161941528, "learning_rate": 1.9423873400506893e-05, "loss": 0.0105, "step": 37290 }, { "epoch": 1.153489522161093, "grad_norm": 0.13930240273475647, "learning_rate": 1.9423409779316315e-05, "loss": 0.0107, "step": 37320 }, { "epoch": 1.1544167645422514, "grad_norm": 0.16543267667293549, "learning_rate": 1.9422946158125736e-05, "loss": 0.01, "step": 37350 }, { "epoch": 1.1553440069234098, "grad_norm": 0.11831526458263397, "learning_rate": 1.9422482536935158e-05, "loss": 0.0099, "step": 37380 }, { "epoch": 1.1562712493045681, "grad_norm": 0.10302194952964783, "learning_rate": 1.942201891574458e-05, "loss": 0.0099, "step": 37410 }, { "epoch": 1.1571984916857265, "grad_norm": 0.10405894368886948, "learning_rate": 1.9421555294553997e-05, "loss": 0.0103, "step": 37440 }, { "epoch": 1.1581257340668851, "grad_norm": 0.1121474951505661, "learning_rate": 1.942109167336342e-05, "loss": 0.0095, "step": 37470 }, { "epoch": 1.1590529764480435, "grad_norm": 0.14188213646411896, "learning_rate": 1.942062805217284e-05, "loss": 0.0103, "step": 37500 }, { "epoch": 1.1599802188292019, "grad_norm": 0.1552407592535019, "learning_rate": 1.942016443098226e-05, "loss": 0.0096, "step": 37530 }, { "epoch": 1.1609074612103605, "grad_norm": 0.1376008540391922, "learning_rate": 1.9419700809791683e-05, "loss": 0.0103, "step": 37560 }, { "epoch": 1.1618347035915189, "grad_norm": 0.14242205023765564, "learning_rate": 1.94192371886011e-05, "loss": 0.0098, "step": 37590 }, { "epoch": 1.1627619459726772, "grad_norm": 0.16172154247760773, "learning_rate": 1.941878902145021e-05, "loss": 0.0106, "step": 37620 }, { "epoch": 1.1636891883538356, "grad_norm": 0.10541028529405594, "learning_rate": 1.941832540025963e-05, "loss": 0.0103, "step": 37650 }, { "epoch": 1.1646164307349942, "grad_norm": 0.16445297002792358, "learning_rate": 1.941786177906905e-05, "loss": 0.01, "step": 37680 }, { "epoch": 1.1655436731161526, "grad_norm": 0.14423568546772003, "learning_rate": 1.941739815787847e-05, "loss": 0.0104, "step": 37710 }, { "epoch": 1.166470915497311, "grad_norm": 0.13846218585968018, "learning_rate": 1.9416934536687892e-05, "loss": 0.0091, "step": 37740 }, { "epoch": 1.1673981578784693, "grad_norm": 0.16291335225105286, "learning_rate": 1.9416470915497313e-05, "loss": 0.0099, "step": 37770 }, { "epoch": 1.168325400259628, "grad_norm": 0.12655191123485565, "learning_rate": 1.9416007294306735e-05, "loss": 0.0095, "step": 37800 }, { "epoch": 1.1692526426407863, "grad_norm": 0.11272448301315308, "learning_rate": 1.9415543673116156e-05, "loss": 0.0097, "step": 37830 }, { "epoch": 1.1701798850219447, "grad_norm": 0.1456725001335144, "learning_rate": 1.9415080051925574e-05, "loss": 0.0097, "step": 37860 }, { "epoch": 1.171107127403103, "grad_norm": 0.14356279373168945, "learning_rate": 1.9414616430734996e-05, "loss": 0.0107, "step": 37890 }, { "epoch": 1.1720343697842617, "grad_norm": 0.1722131073474884, "learning_rate": 1.9414152809544414e-05, "loss": 0.0098, "step": 37920 }, { "epoch": 1.17296161216542, "grad_norm": 0.22130611538887024, "learning_rate": 1.9413689188353836e-05, "loss": 0.01, "step": 37950 }, { "epoch": 1.1738888545465784, "grad_norm": 0.13683514297008514, "learning_rate": 1.9413225567163257e-05, "loss": 0.0102, "step": 37980 }, { "epoch": 1.1748160969277368, "grad_norm": 0.15743893384933472, "learning_rate": 1.941276194597268e-05, "loss": 0.0106, "step": 38010 }, { "epoch": 1.1757433393088954, "grad_norm": 0.12358296662569046, "learning_rate": 1.94122983247821e-05, "loss": 0.0099, "step": 38040 }, { "epoch": 1.1766705816900538, "grad_norm": 0.14761659502983093, "learning_rate": 1.941183470359152e-05, "loss": 0.0095, "step": 38070 }, { "epoch": 1.1775978240712122, "grad_norm": 0.09743492305278778, "learning_rate": 1.941137108240094e-05, "loss": 0.0099, "step": 38100 }, { "epoch": 1.1785250664523708, "grad_norm": 0.13945776224136353, "learning_rate": 1.941090746121036e-05, "loss": 0.0105, "step": 38130 }, { "epoch": 1.1794523088335291, "grad_norm": 0.1458735316991806, "learning_rate": 1.9410443840019783e-05, "loss": 0.0101, "step": 38160 }, { "epoch": 1.1803795512146875, "grad_norm": 0.12406901270151138, "learning_rate": 1.9409980218829204e-05, "loss": 0.0096, "step": 38190 }, { "epoch": 1.181306793595846, "grad_norm": 0.1036820039153099, "learning_rate": 1.9409516597638626e-05, "loss": 0.0102, "step": 38220 }, { "epoch": 1.1822340359770043, "grad_norm": 0.1408422738313675, "learning_rate": 1.9409052976448047e-05, "loss": 0.0096, "step": 38250 }, { "epoch": 1.1831612783581629, "grad_norm": 0.13666819036006927, "learning_rate": 1.9408589355257465e-05, "loss": 0.0101, "step": 38280 }, { "epoch": 1.1840885207393212, "grad_norm": 0.16975803673267365, "learning_rate": 1.9408125734066887e-05, "loss": 0.0099, "step": 38310 }, { "epoch": 1.1850157631204796, "grad_norm": 0.1930142343044281, "learning_rate": 1.9407662112876305e-05, "loss": 0.0095, "step": 38340 }, { "epoch": 1.1859430055016382, "grad_norm": 0.12256772816181183, "learning_rate": 1.9407198491685727e-05, "loss": 0.0099, "step": 38370 }, { "epoch": 1.1868702478827966, "grad_norm": 0.17187932133674622, "learning_rate": 1.9406734870495148e-05, "loss": 0.0097, "step": 38400 }, { "epoch": 1.187797490263955, "grad_norm": 0.3963204026222229, "learning_rate": 1.940627124930457e-05, "loss": 0.0106, "step": 38430 }, { "epoch": 1.1887247326451134, "grad_norm": 0.1278136670589447, "learning_rate": 1.940580762811399e-05, "loss": 0.0096, "step": 38460 }, { "epoch": 1.189651975026272, "grad_norm": 0.10489697754383087, "learning_rate": 1.9405344006923413e-05, "loss": 0.0097, "step": 38490 }, { "epoch": 1.1905792174074303, "grad_norm": 0.1288975477218628, "learning_rate": 1.940488038573283e-05, "loss": 0.0101, "step": 38520 }, { "epoch": 1.1915064597885887, "grad_norm": 0.10277805477380753, "learning_rate": 1.9404416764542252e-05, "loss": 0.0095, "step": 38550 }, { "epoch": 1.192433702169747, "grad_norm": 0.15121419727802277, "learning_rate": 1.9403953143351674e-05, "loss": 0.0093, "step": 38580 }, { "epoch": 1.1933609445509057, "grad_norm": 0.14006897807121277, "learning_rate": 1.9403489522161095e-05, "loss": 0.01, "step": 38610 }, { "epoch": 1.194288186932064, "grad_norm": 0.13060308992862701, "learning_rate": 1.9403025900970517e-05, "loss": 0.0097, "step": 38640 }, { "epoch": 1.1952154293132224, "grad_norm": 0.11539765447378159, "learning_rate": 1.9402562279779935e-05, "loss": 0.0099, "step": 38670 }, { "epoch": 1.1961426716943808, "grad_norm": 0.12231142818927765, "learning_rate": 1.9402098658589356e-05, "loss": 0.0098, "step": 38700 }, { "epoch": 1.1970699140755394, "grad_norm": 0.15903855860233307, "learning_rate": 1.9401635037398778e-05, "loss": 0.0092, "step": 38730 }, { "epoch": 1.1979971564566978, "grad_norm": 0.11971570551395416, "learning_rate": 1.94011714162082e-05, "loss": 0.0096, "step": 38760 }, { "epoch": 1.1989243988378562, "grad_norm": 0.17320114374160767, "learning_rate": 1.940070779501762e-05, "loss": 0.0103, "step": 38790 }, { "epoch": 1.1998516412190146, "grad_norm": 0.13392198085784912, "learning_rate": 1.9400244173827042e-05, "loss": 0.0101, "step": 38820 }, { "epoch": 1.2007788836001732, "grad_norm": 0.16567495465278625, "learning_rate": 1.939978055263646e-05, "loss": 0.0099, "step": 38850 }, { "epoch": 1.2017061259813315, "grad_norm": 0.16357013583183289, "learning_rate": 1.9399316931445882e-05, "loss": 0.0093, "step": 38880 }, { "epoch": 1.20263336836249, "grad_norm": 0.18659989535808563, "learning_rate": 1.9398853310255303e-05, "loss": 0.0097, "step": 38910 }, { "epoch": 1.2035606107436485, "grad_norm": 0.15806886553764343, "learning_rate": 1.939838968906472e-05, "loss": 0.0104, "step": 38940 }, { "epoch": 1.2044878531248069, "grad_norm": 0.1393982172012329, "learning_rate": 1.9397926067874143e-05, "loss": 0.0109, "step": 38970 }, { "epoch": 1.2054150955059653, "grad_norm": 0.1824716180562973, "learning_rate": 1.9397462446683565e-05, "loss": 0.01, "step": 39000 }, { "epoch": 1.2063423378871236, "grad_norm": 0.11774330586194992, "learning_rate": 1.9396998825492986e-05, "loss": 0.0098, "step": 39030 }, { "epoch": 1.207269580268282, "grad_norm": 0.11890225112438202, "learning_rate": 1.9396535204302408e-05, "loss": 0.0098, "step": 39060 }, { "epoch": 1.2081968226494406, "grad_norm": 0.10907123982906342, "learning_rate": 1.9396071583111826e-05, "loss": 0.0097, "step": 39090 }, { "epoch": 1.209124065030599, "grad_norm": 0.17712970077991486, "learning_rate": 1.9395607961921247e-05, "loss": 0.0102, "step": 39120 }, { "epoch": 1.2100513074117574, "grad_norm": 0.15125684440135956, "learning_rate": 1.939514434073067e-05, "loss": 0.0093, "step": 39150 }, { "epoch": 1.210978549792916, "grad_norm": 0.15437306463718414, "learning_rate": 1.939468071954009e-05, "loss": 0.0104, "step": 39180 }, { "epoch": 1.2119057921740743, "grad_norm": 0.169686958193779, "learning_rate": 1.9394217098349512e-05, "loss": 0.0105, "step": 39210 }, { "epoch": 1.2128330345552327, "grad_norm": 0.11649049818515778, "learning_rate": 1.9393753477158933e-05, "loss": 0.0105, "step": 39240 }, { "epoch": 1.213760276936391, "grad_norm": 0.12962350249290466, "learning_rate": 1.939328985596835e-05, "loss": 0.0108, "step": 39270 }, { "epoch": 1.2146875193175497, "grad_norm": 0.15621963143348694, "learning_rate": 1.9392826234777773e-05, "loss": 0.0093, "step": 39300 }, { "epoch": 1.215614761698708, "grad_norm": 0.17883452773094177, "learning_rate": 1.939236261358719e-05, "loss": 0.0095, "step": 39330 }, { "epoch": 1.2165420040798665, "grad_norm": 0.1353161484003067, "learning_rate": 1.9391898992396613e-05, "loss": 0.0103, "step": 39360 }, { "epoch": 1.2174692464610248, "grad_norm": 0.1107260063290596, "learning_rate": 1.9391435371206034e-05, "loss": 0.0088, "step": 39390 }, { "epoch": 1.2183964888421834, "grad_norm": 0.16918453574180603, "learning_rate": 1.9390971750015456e-05, "loss": 0.0104, "step": 39420 }, { "epoch": 1.2193237312233418, "grad_norm": 0.15229879319667816, "learning_rate": 1.9390508128824877e-05, "loss": 0.0095, "step": 39450 }, { "epoch": 1.2202509736045002, "grad_norm": 0.15304787456989288, "learning_rate": 1.93900445076343e-05, "loss": 0.0095, "step": 39480 }, { "epoch": 1.2211782159856586, "grad_norm": 0.13534873723983765, "learning_rate": 1.9389580886443717e-05, "loss": 0.0097, "step": 39510 }, { "epoch": 1.2221054583668172, "grad_norm": 0.13528189063072205, "learning_rate": 1.9389117265253138e-05, "loss": 0.0093, "step": 39540 }, { "epoch": 1.2230327007479755, "grad_norm": 0.13307373225688934, "learning_rate": 1.9388669098102246e-05, "loss": 0.0094, "step": 39570 }, { "epoch": 1.223959943129134, "grad_norm": 0.12752601504325867, "learning_rate": 1.9388205476911664e-05, "loss": 0.0099, "step": 39600 }, { "epoch": 1.2248871855102923, "grad_norm": 0.09536135941743851, "learning_rate": 1.9387741855721086e-05, "loss": 0.0096, "step": 39630 }, { "epoch": 1.225814427891451, "grad_norm": 0.14239300787448883, "learning_rate": 1.9387278234530507e-05, "loss": 0.0093, "step": 39660 }, { "epoch": 1.2267416702726093, "grad_norm": 0.17687296867370605, "learning_rate": 1.938681461333993e-05, "loss": 0.0096, "step": 39690 }, { "epoch": 1.2276689126537677, "grad_norm": 0.15559448301792145, "learning_rate": 1.938635099214935e-05, "loss": 0.0103, "step": 39720 }, { "epoch": 1.2285961550349263, "grad_norm": 0.16627903282642365, "learning_rate": 1.938588737095877e-05, "loss": 0.0091, "step": 39750 }, { "epoch": 1.2295233974160846, "grad_norm": 0.09868670254945755, "learning_rate": 1.938542374976819e-05, "loss": 0.0106, "step": 39780 }, { "epoch": 1.230450639797243, "grad_norm": 0.1794576346874237, "learning_rate": 1.938496012857761e-05, "loss": 0.0094, "step": 39810 }, { "epoch": 1.2313778821784014, "grad_norm": 0.14979185163974762, "learning_rate": 1.9384496507387033e-05, "loss": 0.01, "step": 39840 }, { "epoch": 1.2323051245595598, "grad_norm": 0.14982298016548157, "learning_rate": 1.9384032886196454e-05, "loss": 0.0096, "step": 39870 }, { "epoch": 1.2332323669407184, "grad_norm": 0.1560695320367813, "learning_rate": 1.9383569265005876e-05, "loss": 0.0099, "step": 39900 }, { "epoch": 1.2341596093218767, "grad_norm": 0.1693585216999054, "learning_rate": 1.9383105643815294e-05, "loss": 0.0095, "step": 39930 }, { "epoch": 1.2350868517030351, "grad_norm": 0.1433252990245819, "learning_rate": 1.9382642022624715e-05, "loss": 0.0096, "step": 39960 }, { "epoch": 1.2360140940841937, "grad_norm": 0.17063690721988678, "learning_rate": 1.9382178401434137e-05, "loss": 0.0096, "step": 39990 }, { "epoch": 1.236941336465352, "grad_norm": 0.12897926568984985, "learning_rate": 1.9381714780243555e-05, "loss": 0.0094, "step": 40020 }, { "epoch": 1.2378685788465105, "grad_norm": 0.1105082705616951, "learning_rate": 1.938125115905298e-05, "loss": 0.0104, "step": 40050 }, { "epoch": 1.2387958212276688, "grad_norm": 0.11381847411394119, "learning_rate": 1.9380787537862398e-05, "loss": 0.01, "step": 40080 }, { "epoch": 1.2397230636088272, "grad_norm": 0.26289549469947815, "learning_rate": 1.938032391667182e-05, "loss": 0.0096, "step": 40110 }, { "epoch": 1.2406503059899858, "grad_norm": 0.13575169444084167, "learning_rate": 1.937986029548124e-05, "loss": 0.0099, "step": 40140 }, { "epoch": 1.2415775483711442, "grad_norm": 0.18954433500766754, "learning_rate": 1.937939667429066e-05, "loss": 0.0095, "step": 40170 }, { "epoch": 1.2425047907523026, "grad_norm": 0.12234604358673096, "learning_rate": 1.937893305310008e-05, "loss": 0.0099, "step": 40200 }, { "epoch": 1.2434320331334612, "grad_norm": 0.21626605093479156, "learning_rate": 1.9378469431909502e-05, "loss": 0.0099, "step": 40230 }, { "epoch": 1.2443592755146196, "grad_norm": 0.11947640776634216, "learning_rate": 1.9378005810718924e-05, "loss": 0.0093, "step": 40260 }, { "epoch": 1.245286517895778, "grad_norm": 0.10579235106706619, "learning_rate": 1.9377542189528345e-05, "loss": 0.0095, "step": 40290 }, { "epoch": 1.2462137602769363, "grad_norm": 0.22110219299793243, "learning_rate": 1.9377078568337767e-05, "loss": 0.0094, "step": 40320 }, { "epoch": 1.247141002658095, "grad_norm": 0.13767109811306, "learning_rate": 1.9376614947147185e-05, "loss": 0.0092, "step": 40350 }, { "epoch": 1.2480682450392533, "grad_norm": 0.10156524181365967, "learning_rate": 1.9376151325956606e-05, "loss": 0.0104, "step": 40380 }, { "epoch": 1.2489954874204117, "grad_norm": 0.12273012101650238, "learning_rate": 1.9375687704766028e-05, "loss": 0.0098, "step": 40410 }, { "epoch": 1.24992272980157, "grad_norm": 0.08254643529653549, "learning_rate": 1.937522408357545e-05, "loss": 0.0092, "step": 40440 }, { "epoch": 1.2508499721827286, "grad_norm": 0.15621818602085114, "learning_rate": 1.937476046238487e-05, "loss": 0.0105, "step": 40470 }, { "epoch": 1.251777214563887, "grad_norm": 0.15590178966522217, "learning_rate": 1.937429684119429e-05, "loss": 0.0098, "step": 40500 }, { "epoch": 1.2527044569450454, "grad_norm": 0.1532873809337616, "learning_rate": 1.937383322000371e-05, "loss": 0.0103, "step": 40530 }, { "epoch": 1.253631699326204, "grad_norm": 0.15894892811775208, "learning_rate": 1.9373369598813132e-05, "loss": 0.0104, "step": 40560 }, { "epoch": 1.2545589417073624, "grad_norm": 0.16404438018798828, "learning_rate": 1.937290597762255e-05, "loss": 0.0099, "step": 40590 }, { "epoch": 1.2554861840885208, "grad_norm": 0.16523513197898865, "learning_rate": 1.937244235643197e-05, "loss": 0.0098, "step": 40620 }, { "epoch": 1.2564134264696791, "grad_norm": 0.1068335622549057, "learning_rate": 1.9371978735241393e-05, "loss": 0.0099, "step": 40650 }, { "epoch": 1.2573406688508375, "grad_norm": 0.15168210864067078, "learning_rate": 1.9371515114050815e-05, "loss": 0.0098, "step": 40680 }, { "epoch": 1.258267911231996, "grad_norm": 0.16336332261562347, "learning_rate": 1.9371051492860236e-05, "loss": 0.0096, "step": 40710 }, { "epoch": 1.2591951536131545, "grad_norm": 0.13347195088863373, "learning_rate": 1.9370587871669654e-05, "loss": 0.0101, "step": 40740 }, { "epoch": 1.2601223959943129, "grad_norm": 0.15228013694286346, "learning_rate": 1.9370124250479076e-05, "loss": 0.0097, "step": 40770 }, { "epoch": 1.2610496383754715, "grad_norm": 0.13808861374855042, "learning_rate": 1.9369660629288497e-05, "loss": 0.0094, "step": 40800 }, { "epoch": 1.2619768807566298, "grad_norm": 0.15696440637111664, "learning_rate": 1.936919700809792e-05, "loss": 0.0094, "step": 40830 }, { "epoch": 1.2629041231377882, "grad_norm": 0.16538427770137787, "learning_rate": 1.936873338690734e-05, "loss": 0.0099, "step": 40860 }, { "epoch": 1.2638313655189466, "grad_norm": 0.14463524520397186, "learning_rate": 1.9368269765716762e-05, "loss": 0.0094, "step": 40890 }, { "epoch": 1.264758607900105, "grad_norm": 0.13104894757270813, "learning_rate": 1.936780614452618e-05, "loss": 0.0097, "step": 40920 }, { "epoch": 1.2656858502812636, "grad_norm": 0.17612744867801666, "learning_rate": 1.93673425233356e-05, "loss": 0.0095, "step": 40950 }, { "epoch": 1.266613092662422, "grad_norm": 0.13631165027618408, "learning_rate": 1.9366878902145023e-05, "loss": 0.0092, "step": 40980 }, { "epoch": 1.2675403350435803, "grad_norm": 0.12255091965198517, "learning_rate": 1.936641528095444e-05, "loss": 0.0111, "step": 41010 }, { "epoch": 1.268467577424739, "grad_norm": 0.17109882831573486, "learning_rate": 1.9365951659763863e-05, "loss": 0.0105, "step": 41040 }, { "epoch": 1.2693948198058973, "grad_norm": 0.12696711719036102, "learning_rate": 1.9365488038573284e-05, "loss": 0.0094, "step": 41070 }, { "epoch": 1.2703220621870557, "grad_norm": 0.11803048849105835, "learning_rate": 1.9365024417382706e-05, "loss": 0.0095, "step": 41100 }, { "epoch": 1.271249304568214, "grad_norm": 0.1408001035451889, "learning_rate": 1.9364560796192127e-05, "loss": 0.0094, "step": 41130 }, { "epoch": 1.2721765469493724, "grad_norm": 0.15360252559185028, "learning_rate": 1.9364112629041235e-05, "loss": 0.0102, "step": 41160 }, { "epoch": 1.273103789330531, "grad_norm": 0.12400442361831665, "learning_rate": 1.9363649007850653e-05, "loss": 0.0089, "step": 41190 }, { "epoch": 1.2740310317116894, "grad_norm": 0.12682650983333588, "learning_rate": 1.9363185386660075e-05, "loss": 0.0098, "step": 41220 }, { "epoch": 1.2749582740928478, "grad_norm": 0.09790973365306854, "learning_rate": 1.9362721765469493e-05, "loss": 0.0093, "step": 41250 }, { "epoch": 1.2758855164740064, "grad_norm": 0.10748005658388138, "learning_rate": 1.9362258144278914e-05, "loss": 0.0103, "step": 41280 }, { "epoch": 1.2768127588551648, "grad_norm": 0.21705591678619385, "learning_rate": 1.936179452308834e-05, "loss": 0.0098, "step": 41310 }, { "epoch": 1.2777400012363231, "grad_norm": 0.09401357173919678, "learning_rate": 1.9361330901897757e-05, "loss": 0.0097, "step": 41340 }, { "epoch": 1.2786672436174817, "grad_norm": 0.13465026021003723, "learning_rate": 1.936086728070718e-05, "loss": 0.0101, "step": 41370 }, { "epoch": 1.2795944859986401, "grad_norm": 0.12775449454784393, "learning_rate": 1.93604036595166e-05, "loss": 0.0104, "step": 41400 }, { "epoch": 1.2805217283797985, "grad_norm": 0.11745551973581314, "learning_rate": 1.935994003832602e-05, "loss": 0.0099, "step": 41430 }, { "epoch": 1.2814489707609569, "grad_norm": 0.13269154727458954, "learning_rate": 1.935947641713544e-05, "loss": 0.0091, "step": 41460 }, { "epoch": 1.2823762131421153, "grad_norm": 0.17286986112594604, "learning_rate": 1.935901279594486e-05, "loss": 0.0097, "step": 41490 }, { "epoch": 1.2833034555232739, "grad_norm": 0.11456038057804108, "learning_rate": 1.9358549174754283e-05, "loss": 0.01, "step": 41520 }, { "epoch": 1.2842306979044322, "grad_norm": 0.1208593100309372, "learning_rate": 1.9358085553563704e-05, "loss": 0.0098, "step": 41550 }, { "epoch": 1.2851579402855906, "grad_norm": 0.11044719815254211, "learning_rate": 1.9357621932373123e-05, "loss": 0.0093, "step": 41580 }, { "epoch": 1.2860851826667492, "grad_norm": 0.1078275665640831, "learning_rate": 1.9357158311182544e-05, "loss": 0.0096, "step": 41610 }, { "epoch": 1.2870124250479076, "grad_norm": 0.10757624357938766, "learning_rate": 1.9356694689991966e-05, "loss": 0.0091, "step": 41640 }, { "epoch": 1.287939667429066, "grad_norm": 0.17637521028518677, "learning_rate": 1.9356231068801387e-05, "loss": 0.0098, "step": 41670 }, { "epoch": 1.2888669098102243, "grad_norm": 0.11411935091018677, "learning_rate": 1.935576744761081e-05, "loss": 0.0102, "step": 41700 }, { "epoch": 1.2897941521913827, "grad_norm": 0.14789685606956482, "learning_rate": 1.935530382642023e-05, "loss": 0.0098, "step": 41730 }, { "epoch": 1.2907213945725413, "grad_norm": 0.1283310353755951, "learning_rate": 1.9354840205229648e-05, "loss": 0.0094, "step": 41760 }, { "epoch": 1.2916486369536997, "grad_norm": 0.14964035153388977, "learning_rate": 1.935437658403907e-05, "loss": 0.0106, "step": 41790 }, { "epoch": 1.292575879334858, "grad_norm": 0.1585545688867569, "learning_rate": 1.935391296284849e-05, "loss": 0.0089, "step": 41820 }, { "epoch": 1.2935031217160167, "grad_norm": 0.13573133945465088, "learning_rate": 1.935344934165791e-05, "loss": 0.0094, "step": 41850 }, { "epoch": 1.294430364097175, "grad_norm": 0.13661280274391174, "learning_rate": 1.935298572046733e-05, "loss": 0.0104, "step": 41880 }, { "epoch": 1.2953576064783334, "grad_norm": 0.1184845119714737, "learning_rate": 1.9352522099276752e-05, "loss": 0.0093, "step": 41910 }, { "epoch": 1.2962848488594918, "grad_norm": 0.13419349491596222, "learning_rate": 1.9352058478086174e-05, "loss": 0.0103, "step": 41940 }, { "epoch": 1.2972120912406502, "grad_norm": 0.1477489322423935, "learning_rate": 1.9351594856895595e-05, "loss": 0.0103, "step": 41970 }, { "epoch": 1.2981393336218088, "grad_norm": 0.18402239680290222, "learning_rate": 1.9351131235705013e-05, "loss": 0.0102, "step": 42000 }, { "epoch": 1.2990665760029672, "grad_norm": 0.13978451490402222, "learning_rate": 1.9350667614514435e-05, "loss": 0.0096, "step": 42030 }, { "epoch": 1.2999938183841255, "grad_norm": 0.1334972232580185, "learning_rate": 1.9350203993323857e-05, "loss": 0.0094, "step": 42060 }, { "epoch": 1.3009210607652841, "grad_norm": 0.10967112332582474, "learning_rate": 1.9349740372133278e-05, "loss": 0.0098, "step": 42090 }, { "epoch": 1.3018483031464425, "grad_norm": 0.10409344732761383, "learning_rate": 1.93492767509427e-05, "loss": 0.0096, "step": 42120 }, { "epoch": 1.3027755455276009, "grad_norm": 0.1623001992702484, "learning_rate": 1.934881312975212e-05, "loss": 0.0099, "step": 42150 }, { "epoch": 1.3037027879087595, "grad_norm": 0.18686982989311218, "learning_rate": 1.934834950856154e-05, "loss": 0.0102, "step": 42180 }, { "epoch": 1.3046300302899179, "grad_norm": 0.1665397733449936, "learning_rate": 1.934788588737096e-05, "loss": 0.0097, "step": 42210 }, { "epoch": 1.3055572726710762, "grad_norm": 0.12506654858589172, "learning_rate": 1.934742226618038e-05, "loss": 0.0096, "step": 42240 }, { "epoch": 1.3064845150522346, "grad_norm": 0.1885235756635666, "learning_rate": 1.93469586449898e-05, "loss": 0.0091, "step": 42270 }, { "epoch": 1.307411757433393, "grad_norm": 0.12629254162311554, "learning_rate": 1.9346495023799222e-05, "loss": 0.0102, "step": 42300 }, { "epoch": 1.3083389998145516, "grad_norm": 0.12458830326795578, "learning_rate": 1.9346031402608643e-05, "loss": 0.0095, "step": 42330 }, { "epoch": 1.30926624219571, "grad_norm": 0.15353864431381226, "learning_rate": 1.9345567781418065e-05, "loss": 0.0096, "step": 42360 }, { "epoch": 1.3101934845768684, "grad_norm": 0.10979277640581131, "learning_rate": 1.9345104160227486e-05, "loss": 0.0099, "step": 42390 }, { "epoch": 1.311120726958027, "grad_norm": 0.2093656212091446, "learning_rate": 1.9344640539036904e-05, "loss": 0.0105, "step": 42420 }, { "epoch": 1.3120479693391853, "grad_norm": 0.19527794420719147, "learning_rate": 1.9344176917846326e-05, "loss": 0.0097, "step": 42450 }, { "epoch": 1.3129752117203437, "grad_norm": 0.21789956092834473, "learning_rate": 1.9343713296655747e-05, "loss": 0.0096, "step": 42480 }, { "epoch": 1.313902454101502, "grad_norm": 0.1621646136045456, "learning_rate": 1.934324967546517e-05, "loss": 0.0094, "step": 42510 }, { "epoch": 1.3148296964826605, "grad_norm": 0.15770259499549866, "learning_rate": 1.934278605427459e-05, "loss": 0.0096, "step": 42540 }, { "epoch": 1.315756938863819, "grad_norm": 0.1522708386182785, "learning_rate": 1.934232243308401e-05, "loss": 0.009, "step": 42570 }, { "epoch": 1.3166841812449774, "grad_norm": 0.12348338216543198, "learning_rate": 1.934185881189343e-05, "loss": 0.0099, "step": 42600 }, { "epoch": 1.3176114236261358, "grad_norm": 0.1210988312959671, "learning_rate": 1.934139519070285e-05, "loss": 0.0094, "step": 42630 }, { "epoch": 1.3185386660072944, "grad_norm": 0.13431526720523834, "learning_rate": 1.934093156951227e-05, "loss": 0.0084, "step": 42660 }, { "epoch": 1.3194659083884528, "grad_norm": 0.14159180223941803, "learning_rate": 1.934046794832169e-05, "loss": 0.0094, "step": 42690 }, { "epoch": 1.3203931507696112, "grad_norm": 0.15002261102199554, "learning_rate": 1.9340004327131116e-05, "loss": 0.0094, "step": 42720 }, { "epoch": 1.3213203931507695, "grad_norm": 0.16137181222438812, "learning_rate": 1.9339540705940534e-05, "loss": 0.0098, "step": 42750 }, { "epoch": 1.322247635531928, "grad_norm": 0.1578523814678192, "learning_rate": 1.9339077084749956e-05, "loss": 0.0093, "step": 42780 }, { "epoch": 1.3231748779130865, "grad_norm": 0.16220831871032715, "learning_rate": 1.9338613463559377e-05, "loss": 0.011, "step": 42810 }, { "epoch": 1.324102120294245, "grad_norm": 0.14755982160568237, "learning_rate": 1.9338149842368795e-05, "loss": 0.0101, "step": 42840 }, { "epoch": 1.3250293626754033, "grad_norm": 0.2031804472208023, "learning_rate": 1.9337686221178217e-05, "loss": 0.0098, "step": 42870 }, { "epoch": 1.3259566050565619, "grad_norm": 0.13053549826145172, "learning_rate": 1.933722259998764e-05, "loss": 0.0088, "step": 42900 }, { "epoch": 1.3268838474377203, "grad_norm": 0.10885687172412872, "learning_rate": 1.933675897879706e-05, "loss": 0.0099, "step": 42930 }, { "epoch": 1.3278110898188786, "grad_norm": 0.14118161797523499, "learning_rate": 1.933629535760648e-05, "loss": 0.009, "step": 42960 }, { "epoch": 1.328738332200037, "grad_norm": 0.10056724399328232, "learning_rate": 1.93358317364159e-05, "loss": 0.01, "step": 42990 }, { "epoch": 1.3296655745811954, "grad_norm": 0.12797583639621735, "learning_rate": 1.933536811522532e-05, "loss": 0.0093, "step": 43020 }, { "epoch": 1.330592816962354, "grad_norm": 0.11022935807704926, "learning_rate": 1.9334904494034742e-05, "loss": 0.0106, "step": 43050 }, { "epoch": 1.3315200593435124, "grad_norm": 0.1281905174255371, "learning_rate": 1.9334440872844164e-05, "loss": 0.0091, "step": 43080 }, { "epoch": 1.3324473017246707, "grad_norm": 0.1619054079055786, "learning_rate": 1.9333977251653585e-05, "loss": 0.0095, "step": 43110 }, { "epoch": 1.3333745441058293, "grad_norm": 0.10594852268695831, "learning_rate": 1.9333513630463007e-05, "loss": 0.0092, "step": 43140 }, { "epoch": 1.3343017864869877, "grad_norm": 0.11116695404052734, "learning_rate": 1.9333050009272425e-05, "loss": 0.0091, "step": 43170 }, { "epoch": 1.335229028868146, "grad_norm": 0.1529129147529602, "learning_rate": 1.9332586388081847e-05, "loss": 0.0103, "step": 43200 }, { "epoch": 1.3361562712493047, "grad_norm": 0.15643824636936188, "learning_rate": 1.9332122766891265e-05, "loss": 0.0096, "step": 43230 }, { "epoch": 1.337083513630463, "grad_norm": 0.14660117030143738, "learning_rate": 1.9331659145700686e-05, "loss": 0.0097, "step": 43260 }, { "epoch": 1.3380107560116214, "grad_norm": 0.16686803102493286, "learning_rate": 1.9331195524510108e-05, "loss": 0.0098, "step": 43290 }, { "epoch": 1.3389379983927798, "grad_norm": 0.08398181945085526, "learning_rate": 1.933073190331953e-05, "loss": 0.01, "step": 43320 }, { "epoch": 1.3398652407739382, "grad_norm": 0.1410205066204071, "learning_rate": 1.933026828212895e-05, "loss": 0.0095, "step": 43350 }, { "epoch": 1.3407924831550968, "grad_norm": 0.13396607339382172, "learning_rate": 1.9329804660938372e-05, "loss": 0.0097, "step": 43380 }, { "epoch": 1.3417197255362552, "grad_norm": 0.10474668443202972, "learning_rate": 1.932934103974779e-05, "loss": 0.009, "step": 43410 }, { "epoch": 1.3426469679174136, "grad_norm": 0.1338723748922348, "learning_rate": 1.9328877418557212e-05, "loss": 0.0098, "step": 43440 }, { "epoch": 1.3435742102985722, "grad_norm": 0.12704727053642273, "learning_rate": 1.9328413797366633e-05, "loss": 0.0099, "step": 43470 }, { "epoch": 1.3445014526797305, "grad_norm": 0.16717588901519775, "learning_rate": 1.9327950176176055e-05, "loss": 0.0095, "step": 43500 }, { "epoch": 1.345428695060889, "grad_norm": 0.09584976732730865, "learning_rate": 1.9327486554985476e-05, "loss": 0.0104, "step": 43530 }, { "epoch": 1.3463559374420473, "grad_norm": 0.1608995944261551, "learning_rate": 1.9327022933794895e-05, "loss": 0.0096, "step": 43560 }, { "epoch": 1.3472831798232057, "grad_norm": 0.09906839579343796, "learning_rate": 1.9326559312604316e-05, "loss": 0.0089, "step": 43590 }, { "epoch": 1.3482104222043643, "grad_norm": 0.12248405069112778, "learning_rate": 1.9326095691413738e-05, "loss": 0.0101, "step": 43620 }, { "epoch": 1.3491376645855226, "grad_norm": 0.14213143289089203, "learning_rate": 1.9325632070223156e-05, "loss": 0.0095, "step": 43650 }, { "epoch": 1.350064906966681, "grad_norm": 0.10279853641986847, "learning_rate": 1.9325168449032577e-05, "loss": 0.0085, "step": 43680 }, { "epoch": 1.3509921493478396, "grad_norm": 0.11240057647228241, "learning_rate": 1.9324704827842e-05, "loss": 0.0095, "step": 43710 }, { "epoch": 1.351919391728998, "grad_norm": 0.11670602113008499, "learning_rate": 1.932424120665142e-05, "loss": 0.009, "step": 43740 }, { "epoch": 1.3528466341101564, "grad_norm": 0.10376632213592529, "learning_rate": 1.932377758546084e-05, "loss": 0.0094, "step": 43770 }, { "epoch": 1.3537738764913148, "grad_norm": 0.10545159131288528, "learning_rate": 1.9323313964270263e-05, "loss": 0.0097, "step": 43800 }, { "epoch": 1.3547011188724731, "grad_norm": 0.15568681061267853, "learning_rate": 1.932285034307968e-05, "loss": 0.0102, "step": 43830 }, { "epoch": 1.3556283612536317, "grad_norm": 0.16665047407150269, "learning_rate": 1.9322386721889103e-05, "loss": 0.01, "step": 43860 }, { "epoch": 1.35655560363479, "grad_norm": 0.25485026836395264, "learning_rate": 1.9321923100698524e-05, "loss": 0.0098, "step": 43890 }, { "epoch": 1.3574828460159485, "grad_norm": 0.08989837020635605, "learning_rate": 1.9321459479507946e-05, "loss": 0.0108, "step": 43920 }, { "epoch": 1.358410088397107, "grad_norm": 0.10231315344572067, "learning_rate": 1.9320995858317367e-05, "loss": 0.0098, "step": 43950 }, { "epoch": 1.3593373307782655, "grad_norm": 0.1742917001247406, "learning_rate": 1.9320532237126785e-05, "loss": 0.0095, "step": 43980 }, { "epoch": 1.3602645731594238, "grad_norm": 0.14758968353271484, "learning_rate": 1.9320068615936207e-05, "loss": 0.0097, "step": 44010 }, { "epoch": 1.3611918155405824, "grad_norm": 0.16156749427318573, "learning_rate": 1.931960499474563e-05, "loss": 0.0088, "step": 44040 }, { "epoch": 1.3621190579217408, "grad_norm": 0.14050178229808807, "learning_rate": 1.9319141373555047e-05, "loss": 0.0097, "step": 44070 }, { "epoch": 1.3630463003028992, "grad_norm": 0.15599453449249268, "learning_rate": 1.9318677752364468e-05, "loss": 0.0094, "step": 44100 }, { "epoch": 1.3639735426840576, "grad_norm": 0.09204850345849991, "learning_rate": 1.9318214131173893e-05, "loss": 0.0097, "step": 44130 }, { "epoch": 1.364900785065216, "grad_norm": 0.13839352130889893, "learning_rate": 1.931775050998331e-05, "loss": 0.0097, "step": 44160 }, { "epoch": 1.3658280274463745, "grad_norm": 0.14394205808639526, "learning_rate": 1.9317286888792733e-05, "loss": 0.01, "step": 44190 }, { "epoch": 1.366755269827533, "grad_norm": 0.12282363325357437, "learning_rate": 1.9316823267602154e-05, "loss": 0.01, "step": 44220 }, { "epoch": 1.3676825122086913, "grad_norm": 0.11648645251989365, "learning_rate": 1.9316359646411572e-05, "loss": 0.0097, "step": 44250 }, { "epoch": 1.36860975458985, "grad_norm": 0.13112352788448334, "learning_rate": 1.9315896025220994e-05, "loss": 0.0085, "step": 44280 }, { "epoch": 1.3695369969710083, "grad_norm": 0.13149431347846985, "learning_rate": 1.9315432404030415e-05, "loss": 0.0102, "step": 44310 }, { "epoch": 1.3704642393521667, "grad_norm": 0.16201671957969666, "learning_rate": 1.9314968782839837e-05, "loss": 0.0103, "step": 44340 }, { "epoch": 1.371391481733325, "grad_norm": 0.11749322712421417, "learning_rate": 1.9314505161649258e-05, "loss": 0.0094, "step": 44370 }, { "epoch": 1.3723187241144834, "grad_norm": 0.1278185397386551, "learning_rate": 1.9314041540458676e-05, "loss": 0.0096, "step": 44400 }, { "epoch": 1.373245966495642, "grad_norm": 0.19534847140312195, "learning_rate": 1.9313577919268098e-05, "loss": 0.0099, "step": 44430 }, { "epoch": 1.3741732088768004, "grad_norm": 0.16261234879493713, "learning_rate": 1.931311429807752e-05, "loss": 0.0097, "step": 44460 }, { "epoch": 1.3751004512579588, "grad_norm": 0.14654837548732758, "learning_rate": 1.931265067688694e-05, "loss": 0.0098, "step": 44490 }, { "epoch": 1.3760276936391174, "grad_norm": 0.1287396252155304, "learning_rate": 1.9312187055696362e-05, "loss": 0.0097, "step": 44520 }, { "epoch": 1.3769549360202757, "grad_norm": 0.1637963056564331, "learning_rate": 1.9311723434505784e-05, "loss": 0.0094, "step": 44550 }, { "epoch": 1.3778821784014341, "grad_norm": 0.11489313095808029, "learning_rate": 1.9311259813315202e-05, "loss": 0.0105, "step": 44580 }, { "epoch": 1.3788094207825925, "grad_norm": 0.12646456062793732, "learning_rate": 1.9310796192124624e-05, "loss": 0.0098, "step": 44610 }, { "epoch": 1.3797366631637509, "grad_norm": 0.11426539719104767, "learning_rate": 1.931033257093404e-05, "loss": 0.0108, "step": 44640 }, { "epoch": 1.3806639055449095, "grad_norm": 0.11411666870117188, "learning_rate": 1.9309868949743463e-05, "loss": 0.0093, "step": 44670 }, { "epoch": 1.3815911479260679, "grad_norm": 0.10227779299020767, "learning_rate": 1.9309405328552885e-05, "loss": 0.0096, "step": 44700 }, { "epoch": 1.3825183903072262, "grad_norm": 0.09074218571186066, "learning_rate": 1.9308941707362306e-05, "loss": 0.0095, "step": 44730 }, { "epoch": 1.3834456326883848, "grad_norm": 0.1774221807718277, "learning_rate": 1.9308478086171728e-05, "loss": 0.0087, "step": 44760 }, { "epoch": 1.3843728750695432, "grad_norm": 0.1385614275932312, "learning_rate": 1.930801446498115e-05, "loss": 0.0091, "step": 44790 }, { "epoch": 1.3853001174507016, "grad_norm": 0.1240461990237236, "learning_rate": 1.9307550843790567e-05, "loss": 0.0101, "step": 44820 }, { "epoch": 1.3862273598318602, "grad_norm": 0.15871401131153107, "learning_rate": 1.930708722259999e-05, "loss": 0.0096, "step": 44850 }, { "epoch": 1.3871546022130186, "grad_norm": 0.11096545308828354, "learning_rate": 1.930662360140941e-05, "loss": 0.0101, "step": 44880 }, { "epoch": 1.388081844594177, "grad_norm": 0.1292717009782791, "learning_rate": 1.9306159980218832e-05, "loss": 0.0094, "step": 44910 }, { "epoch": 1.3890090869753353, "grad_norm": 0.11317145079374313, "learning_rate": 1.9305696359028253e-05, "loss": 0.0094, "step": 44940 }, { "epoch": 1.3899363293564937, "grad_norm": 0.09849058091640472, "learning_rate": 1.930523273783767e-05, "loss": 0.0092, "step": 44970 }, { "epoch": 1.3908635717376523, "grad_norm": 0.14303342998027802, "learning_rate": 1.9304769116647093e-05, "loss": 0.01, "step": 45000 }, { "epoch": 1.3917908141188107, "grad_norm": 0.14045266807079315, "learning_rate": 1.9304305495456514e-05, "loss": 0.009, "step": 45030 }, { "epoch": 1.392718056499969, "grad_norm": 0.18170809745788574, "learning_rate": 1.9303841874265933e-05, "loss": 0.0097, "step": 45060 }, { "epoch": 1.3936452988811276, "grad_norm": 0.1552629917860031, "learning_rate": 1.9303378253075354e-05, "loss": 0.0093, "step": 45090 }, { "epoch": 1.394572541262286, "grad_norm": 0.18788155913352966, "learning_rate": 1.9302914631884776e-05, "loss": 0.0093, "step": 45120 }, { "epoch": 1.3954997836434444, "grad_norm": 0.15484917163848877, "learning_rate": 1.9302451010694197e-05, "loss": 0.0104, "step": 45150 }, { "epoch": 1.3964270260246028, "grad_norm": 0.11511819809675217, "learning_rate": 1.930198738950362e-05, "loss": 0.0098, "step": 45180 }, { "epoch": 1.3973542684057612, "grad_norm": 0.1598670482635498, "learning_rate": 1.930152376831304e-05, "loss": 0.0098, "step": 45210 }, { "epoch": 1.3982815107869198, "grad_norm": 0.15243253111839294, "learning_rate": 1.9301060147122458e-05, "loss": 0.0086, "step": 45240 }, { "epoch": 1.3992087531680781, "grad_norm": 0.15170632302761078, "learning_rate": 1.930059652593188e-05, "loss": 0.0096, "step": 45270 }, { "epoch": 1.4001359955492365, "grad_norm": 0.2878711521625519, "learning_rate": 1.93001329047413e-05, "loss": 0.0099, "step": 45300 }, { "epoch": 1.4010632379303951, "grad_norm": 0.1886281967163086, "learning_rate": 1.9299669283550723e-05, "loss": 0.0096, "step": 45330 }, { "epoch": 1.4019904803115535, "grad_norm": 0.11519009619951248, "learning_rate": 1.9299205662360144e-05, "loss": 0.0102, "step": 45360 }, { "epoch": 1.4029177226927119, "grad_norm": 0.12238399684429169, "learning_rate": 1.9298742041169562e-05, "loss": 0.0096, "step": 45390 }, { "epoch": 1.4038449650738702, "grad_norm": 0.17442694306373596, "learning_rate": 1.9298278419978984e-05, "loss": 0.0094, "step": 45420 }, { "epoch": 1.4047722074550286, "grad_norm": 0.19181925058364868, "learning_rate": 1.9297814798788405e-05, "loss": 0.0102, "step": 45450 }, { "epoch": 1.4056994498361872, "grad_norm": 0.12132903188467026, "learning_rate": 1.9297351177597823e-05, "loss": 0.0095, "step": 45480 }, { "epoch": 1.4066266922173456, "grad_norm": 0.1244729608297348, "learning_rate": 1.9296887556407245e-05, "loss": 0.0099, "step": 45510 }, { "epoch": 1.407553934598504, "grad_norm": 0.14180779457092285, "learning_rate": 1.929642393521667e-05, "loss": 0.0099, "step": 45540 }, { "epoch": 1.4084811769796626, "grad_norm": 0.11773888766765594, "learning_rate": 1.9295960314026088e-05, "loss": 0.0098, "step": 45570 }, { "epoch": 1.409408419360821, "grad_norm": 0.15132468938827515, "learning_rate": 1.929549669283551e-05, "loss": 0.0093, "step": 45600 }, { "epoch": 1.4103356617419793, "grad_norm": 0.13650880753993988, "learning_rate": 1.9295033071644928e-05, "loss": 0.0098, "step": 45630 }, { "epoch": 1.4112629041231377, "grad_norm": 0.14783047139644623, "learning_rate": 1.929456945045435e-05, "loss": 0.01, "step": 45660 }, { "epoch": 1.412190146504296, "grad_norm": 0.10870374739170074, "learning_rate": 1.929410582926377e-05, "loss": 0.0103, "step": 45690 }, { "epoch": 1.4131173888854547, "grad_norm": 0.22586514055728912, "learning_rate": 1.9293642208073192e-05, "loss": 0.0093, "step": 45720 }, { "epoch": 1.414044631266613, "grad_norm": 0.1295841783285141, "learning_rate": 1.9293178586882614e-05, "loss": 0.0098, "step": 45750 }, { "epoch": 1.4149718736477714, "grad_norm": 0.1018872857093811, "learning_rate": 1.9292714965692035e-05, "loss": 0.0098, "step": 45780 }, { "epoch": 1.41589911602893, "grad_norm": 0.32290175557136536, "learning_rate": 1.9292251344501453e-05, "loss": 0.0093, "step": 45810 }, { "epoch": 1.4168263584100884, "grad_norm": 0.18325766921043396, "learning_rate": 1.9291787723310875e-05, "loss": 0.0092, "step": 45840 }, { "epoch": 1.4177536007912468, "grad_norm": 0.10027280449867249, "learning_rate": 1.9291324102120296e-05, "loss": 0.0097, "step": 45870 }, { "epoch": 1.4186808431724054, "grad_norm": 0.1484086811542511, "learning_rate": 1.9290860480929718e-05, "loss": 0.0101, "step": 45900 }, { "epoch": 1.4196080855535638, "grad_norm": 0.11070256680250168, "learning_rate": 1.929039685973914e-05, "loss": 0.0099, "step": 45930 }, { "epoch": 1.4205353279347221, "grad_norm": 0.13895608484745026, "learning_rate": 1.9289933238548557e-05, "loss": 0.01, "step": 45960 }, { "epoch": 1.4214625703158805, "grad_norm": 0.1415352076292038, "learning_rate": 1.928946961735798e-05, "loss": 0.0095, "step": 45990 }, { "epoch": 1.422389812697039, "grad_norm": 0.11337880790233612, "learning_rate": 1.92890059961674e-05, "loss": 0.0097, "step": 46020 }, { "epoch": 1.4233170550781975, "grad_norm": 0.13699333369731903, "learning_rate": 1.928854237497682e-05, "loss": 0.0092, "step": 46050 }, { "epoch": 1.4242442974593559, "grad_norm": 0.13759247958660126, "learning_rate": 1.928807875378624e-05, "loss": 0.0093, "step": 46080 }, { "epoch": 1.4251715398405143, "grad_norm": 0.15350960195064545, "learning_rate": 1.928761513259566e-05, "loss": 0.0096, "step": 46110 }, { "epoch": 1.4260987822216729, "grad_norm": 0.16113942861557007, "learning_rate": 1.9287151511405083e-05, "loss": 0.0104, "step": 46140 }, { "epoch": 1.4270260246028312, "grad_norm": 0.16575117409229279, "learning_rate": 1.9286687890214505e-05, "loss": 0.0093, "step": 46170 }, { "epoch": 1.4279532669839896, "grad_norm": 0.1398855745792389, "learning_rate": 1.9286224269023926e-05, "loss": 0.0092, "step": 46200 }, { "epoch": 1.428880509365148, "grad_norm": 0.16142059862613678, "learning_rate": 1.9285760647833344e-05, "loss": 0.0095, "step": 46230 }, { "epoch": 1.4298077517463064, "grad_norm": 0.14813700318336487, "learning_rate": 1.9285312480682452e-05, "loss": 0.0095, "step": 46260 }, { "epoch": 1.430734994127465, "grad_norm": 0.10589031130075455, "learning_rate": 1.9284848859491874e-05, "loss": 0.0092, "step": 46290 }, { "epoch": 1.4316622365086233, "grad_norm": 0.1111166849732399, "learning_rate": 1.9284385238301292e-05, "loss": 0.01, "step": 46320 }, { "epoch": 1.4325894788897817, "grad_norm": 0.11735191941261292, "learning_rate": 1.9283921617110713e-05, "loss": 0.0096, "step": 46350 }, { "epoch": 1.4335167212709403, "grad_norm": 0.13968631625175476, "learning_rate": 1.9283457995920135e-05, "loss": 0.0094, "step": 46380 }, { "epoch": 1.4344439636520987, "grad_norm": 0.16659127175807953, "learning_rate": 1.9282994374729556e-05, "loss": 0.0094, "step": 46410 }, { "epoch": 1.435371206033257, "grad_norm": 0.13163556158542633, "learning_rate": 1.9282530753538978e-05, "loss": 0.0097, "step": 46440 }, { "epoch": 1.4362984484144155, "grad_norm": 0.1983731985092163, "learning_rate": 1.9282067132348396e-05, "loss": 0.0097, "step": 46470 }, { "epoch": 1.4372256907955738, "grad_norm": 0.22138060629367828, "learning_rate": 1.9281603511157817e-05, "loss": 0.0101, "step": 46500 }, { "epoch": 1.4381529331767324, "grad_norm": 0.10715129226446152, "learning_rate": 1.928113988996724e-05, "loss": 0.0096, "step": 46530 }, { "epoch": 1.4390801755578908, "grad_norm": 0.12874117493629456, "learning_rate": 1.928067626877666e-05, "loss": 0.0096, "step": 46560 }, { "epoch": 1.4400074179390492, "grad_norm": 0.18497486412525177, "learning_rate": 1.9280212647586082e-05, "loss": 0.0097, "step": 46590 }, { "epoch": 1.4409346603202078, "grad_norm": 0.1300419569015503, "learning_rate": 1.9279749026395503e-05, "loss": 0.0104, "step": 46620 }, { "epoch": 1.4418619027013662, "grad_norm": 0.12455590069293976, "learning_rate": 1.927928540520492e-05, "loss": 0.0099, "step": 46650 }, { "epoch": 1.4427891450825245, "grad_norm": 0.12309641391038895, "learning_rate": 1.9278821784014343e-05, "loss": 0.0096, "step": 46680 }, { "epoch": 1.4437163874636831, "grad_norm": 0.2545463740825653, "learning_rate": 1.927835816282376e-05, "loss": 0.0103, "step": 46710 }, { "epoch": 1.4446436298448415, "grad_norm": 0.15456247329711914, "learning_rate": 1.9277894541633183e-05, "loss": 0.0092, "step": 46740 }, { "epoch": 1.445570872226, "grad_norm": 0.13643598556518555, "learning_rate": 1.9277430920442604e-05, "loss": 0.0094, "step": 46770 }, { "epoch": 1.4464981146071583, "grad_norm": 0.12029001861810684, "learning_rate": 1.9276967299252026e-05, "loss": 0.0091, "step": 46800 }, { "epoch": 1.4474253569883166, "grad_norm": 0.11417023092508316, "learning_rate": 1.9276503678061447e-05, "loss": 0.0095, "step": 46830 }, { "epoch": 1.4483525993694752, "grad_norm": 0.18955156207084656, "learning_rate": 1.927604005687087e-05, "loss": 0.0091, "step": 46860 }, { "epoch": 1.4492798417506336, "grad_norm": 0.183878093957901, "learning_rate": 1.9275576435680287e-05, "loss": 0.0094, "step": 46890 }, { "epoch": 1.450207084131792, "grad_norm": 0.12621872127056122, "learning_rate": 1.9275112814489708e-05, "loss": 0.0098, "step": 46920 }, { "epoch": 1.4511343265129506, "grad_norm": 0.12486520409584045, "learning_rate": 1.927464919329913e-05, "loss": 0.0097, "step": 46950 }, { "epoch": 1.452061568894109, "grad_norm": 0.11134640872478485, "learning_rate": 1.927418557210855e-05, "loss": 0.01, "step": 46980 }, { "epoch": 1.4529888112752674, "grad_norm": 0.11623376607894897, "learning_rate": 1.9273721950917973e-05, "loss": 0.0099, "step": 47010 }, { "epoch": 1.4539160536564257, "grad_norm": 0.16449394822120667, "learning_rate": 1.9273258329727394e-05, "loss": 0.0091, "step": 47040 }, { "epoch": 1.454843296037584, "grad_norm": 0.12645195424556732, "learning_rate": 1.9272794708536812e-05, "loss": 0.0095, "step": 47070 }, { "epoch": 1.4557705384187427, "grad_norm": 0.16126175224781036, "learning_rate": 1.9272331087346234e-05, "loss": 0.0098, "step": 47100 }, { "epoch": 1.456697780799901, "grad_norm": 0.17905184626579285, "learning_rate": 1.9271867466155652e-05, "loss": 0.0097, "step": 47130 }, { "epoch": 1.4576250231810595, "grad_norm": 0.17743679881095886, "learning_rate": 1.9271403844965077e-05, "loss": 0.0097, "step": 47160 }, { "epoch": 1.458552265562218, "grad_norm": 0.1431826949119568, "learning_rate": 1.92709402237745e-05, "loss": 0.0093, "step": 47190 }, { "epoch": 1.4594795079433764, "grad_norm": 0.12102874368429184, "learning_rate": 1.9270476602583917e-05, "loss": 0.0102, "step": 47220 }, { "epoch": 1.4604067503245348, "grad_norm": 0.11329863965511322, "learning_rate": 1.9270012981393338e-05, "loss": 0.0102, "step": 47250 }, { "epoch": 1.4613339927056932, "grad_norm": 0.16904839873313904, "learning_rate": 1.926954936020276e-05, "loss": 0.0101, "step": 47280 }, { "epoch": 1.4622612350868516, "grad_norm": 0.10920751839876175, "learning_rate": 1.9269085739012178e-05, "loss": 0.0094, "step": 47310 }, { "epoch": 1.4631884774680102, "grad_norm": 0.16065941751003265, "learning_rate": 1.92686221178216e-05, "loss": 0.0091, "step": 47340 }, { "epoch": 1.4641157198491686, "grad_norm": 0.14328458905220032, "learning_rate": 1.926815849663102e-05, "loss": 0.0101, "step": 47370 }, { "epoch": 1.465042962230327, "grad_norm": 0.16084934771060944, "learning_rate": 1.9267694875440442e-05, "loss": 0.0099, "step": 47400 }, { "epoch": 1.4659702046114855, "grad_norm": 0.1507655680179596, "learning_rate": 1.9267231254249864e-05, "loss": 0.01, "step": 47430 }, { "epoch": 1.466897446992644, "grad_norm": 0.15478891134262085, "learning_rate": 1.9266767633059282e-05, "loss": 0.0105, "step": 47460 }, { "epoch": 1.4678246893738023, "grad_norm": 0.12051159143447876, "learning_rate": 1.9266304011868703e-05, "loss": 0.01, "step": 47490 }, { "epoch": 1.4687519317549609, "grad_norm": 0.1660822033882141, "learning_rate": 1.9265840390678125e-05, "loss": 0.0095, "step": 47520 }, { "epoch": 1.4696791741361193, "grad_norm": 0.1412389576435089, "learning_rate": 1.9265376769487546e-05, "loss": 0.0091, "step": 47550 }, { "epoch": 1.4706064165172776, "grad_norm": 0.2320871651172638, "learning_rate": 1.9264913148296968e-05, "loss": 0.01, "step": 47580 }, { "epoch": 1.471533658898436, "grad_norm": 0.10116700083017349, "learning_rate": 1.926444952710639e-05, "loss": 0.009, "step": 47610 }, { "epoch": 1.4724609012795944, "grad_norm": 0.11700137704610825, "learning_rate": 1.9263985905915808e-05, "loss": 0.0098, "step": 47640 }, { "epoch": 1.473388143660753, "grad_norm": 0.1918482929468155, "learning_rate": 1.926352228472523e-05, "loss": 0.0106, "step": 47670 }, { "epoch": 1.4743153860419114, "grad_norm": 0.15227003395557404, "learning_rate": 1.926305866353465e-05, "loss": 0.0103, "step": 47700 }, { "epoch": 1.4752426284230697, "grad_norm": 0.140887051820755, "learning_rate": 1.926259504234407e-05, "loss": 0.009, "step": 47730 }, { "epoch": 1.4761698708042283, "grad_norm": 0.13153178989887238, "learning_rate": 1.926213142115349e-05, "loss": 0.0098, "step": 47760 }, { "epoch": 1.4770971131853867, "grad_norm": 0.13178084790706635, "learning_rate": 1.926166779996291e-05, "loss": 0.0101, "step": 47790 }, { "epoch": 1.478024355566545, "grad_norm": 0.22907550632953644, "learning_rate": 1.9261204178772333e-05, "loss": 0.0087, "step": 47820 }, { "epoch": 1.4789515979477035, "grad_norm": 0.2010449916124344, "learning_rate": 1.9260740557581755e-05, "loss": 0.0098, "step": 47850 }, { "epoch": 1.4798788403288619, "grad_norm": 0.1654026061296463, "learning_rate": 1.9260276936391173e-05, "loss": 0.0095, "step": 47880 }, { "epoch": 1.4808060827100205, "grad_norm": 0.11710470169782639, "learning_rate": 1.9259813315200594e-05, "loss": 0.0097, "step": 47910 }, { "epoch": 1.4817333250911788, "grad_norm": 0.1181873083114624, "learning_rate": 1.9259349694010016e-05, "loss": 0.0096, "step": 47940 }, { "epoch": 1.4826605674723372, "grad_norm": 0.15281298756599426, "learning_rate": 1.9258886072819437e-05, "loss": 0.0095, "step": 47970 }, { "epoch": 1.4835878098534958, "grad_norm": 0.1119583249092102, "learning_rate": 1.925842245162886e-05, "loss": 0.0095, "step": 48000 }, { "epoch": 1.4845150522346542, "grad_norm": 0.08605809509754181, "learning_rate": 1.925795883043828e-05, "loss": 0.0095, "step": 48030 }, { "epoch": 1.4854422946158126, "grad_norm": 0.14609645307064056, "learning_rate": 1.92574952092477e-05, "loss": 0.0095, "step": 48060 }, { "epoch": 1.486369536996971, "grad_norm": 0.1784377098083496, "learning_rate": 1.925703158805712e-05, "loss": 0.0095, "step": 48090 }, { "epoch": 1.4872967793781293, "grad_norm": 0.1389724165201187, "learning_rate": 1.9256567966866538e-05, "loss": 0.01, "step": 48120 }, { "epoch": 1.488224021759288, "grad_norm": 0.14989978075027466, "learning_rate": 1.925610434567596e-05, "loss": 0.0099, "step": 48150 }, { "epoch": 1.4891512641404463, "grad_norm": 0.1372492015361786, "learning_rate": 1.925564072448538e-05, "loss": 0.0088, "step": 48180 }, { "epoch": 1.4900785065216047, "grad_norm": 0.15601222217082977, "learning_rate": 1.9255177103294803e-05, "loss": 0.0099, "step": 48210 }, { "epoch": 1.4910057489027633, "grad_norm": 0.19537174701690674, "learning_rate": 1.9254713482104224e-05, "loss": 0.0101, "step": 48240 }, { "epoch": 1.4919329912839217, "grad_norm": 0.14853228628635406, "learning_rate": 1.9254249860913646e-05, "loss": 0.0095, "step": 48270 }, { "epoch": 1.49286023366508, "grad_norm": 0.13122498989105225, "learning_rate": 1.9253786239723064e-05, "loss": 0.0094, "step": 48300 }, { "epoch": 1.4937874760462384, "grad_norm": 0.18477042019367218, "learning_rate": 1.9253322618532485e-05, "loss": 0.0089, "step": 48330 }, { "epoch": 1.494714718427397, "grad_norm": 0.12615007162094116, "learning_rate": 1.9252858997341907e-05, "loss": 0.01, "step": 48360 }, { "epoch": 1.4956419608085554, "grad_norm": 0.16904385387897491, "learning_rate": 1.9252395376151328e-05, "loss": 0.0098, "step": 48390 }, { "epoch": 1.4965692031897138, "grad_norm": 0.12534336745738983, "learning_rate": 1.925193175496075e-05, "loss": 0.0092, "step": 48420 }, { "epoch": 1.4974964455708721, "grad_norm": 0.12175725400447845, "learning_rate": 1.9251468133770168e-05, "loss": 0.0092, "step": 48450 }, { "epoch": 1.4984236879520307, "grad_norm": 0.09141551703214645, "learning_rate": 1.925100451257959e-05, "loss": 0.0094, "step": 48480 }, { "epoch": 1.4993509303331891, "grad_norm": 0.12459276616573334, "learning_rate": 1.925054089138901e-05, "loss": 0.0096, "step": 48510 }, { "epoch": 1.5002781727143475, "grad_norm": 0.1483525186777115, "learning_rate": 1.925007727019843e-05, "loss": 0.0097, "step": 48540 }, { "epoch": 1.501205415095506, "grad_norm": 0.16884151101112366, "learning_rate": 1.9249613649007854e-05, "loss": 0.0086, "step": 48570 }, { "epoch": 1.5021326574766642, "grad_norm": 0.13718192279338837, "learning_rate": 1.9249150027817275e-05, "loss": 0.0094, "step": 48600 }, { "epoch": 1.5030598998578228, "grad_norm": 0.14917141199111938, "learning_rate": 1.9248686406626693e-05, "loss": 0.0097, "step": 48630 }, { "epoch": 1.5039871422389812, "grad_norm": 0.24633976817131042, "learning_rate": 1.9248222785436115e-05, "loss": 0.0095, "step": 48660 }, { "epoch": 1.5049143846201396, "grad_norm": 0.18098463118076324, "learning_rate": 1.9247759164245536e-05, "loss": 0.0103, "step": 48690 }, { "epoch": 1.5058416270012982, "grad_norm": 0.1520099639892578, "learning_rate": 1.9247295543054955e-05, "loss": 0.0089, "step": 48720 }, { "epoch": 1.5067688693824566, "grad_norm": 0.16213862597942352, "learning_rate": 1.9246831921864376e-05, "loss": 0.0089, "step": 48750 }, { "epoch": 1.507696111763615, "grad_norm": 0.09101473540067673, "learning_rate": 1.9246368300673798e-05, "loss": 0.0098, "step": 48780 }, { "epoch": 1.5086233541447736, "grad_norm": 0.1879040002822876, "learning_rate": 1.924590467948322e-05, "loss": 0.0095, "step": 48810 }, { "epoch": 1.509550596525932, "grad_norm": 0.14592543244361877, "learning_rate": 1.924544105829264e-05, "loss": 0.0095, "step": 48840 }, { "epoch": 1.5104778389070903, "grad_norm": 0.11115190386772156, "learning_rate": 1.924497743710206e-05, "loss": 0.0092, "step": 48870 }, { "epoch": 1.511405081288249, "grad_norm": 0.17127321660518646, "learning_rate": 1.924451381591148e-05, "loss": 0.0093, "step": 48900 }, { "epoch": 1.512332323669407, "grad_norm": 0.08015548437833786, "learning_rate": 1.9244050194720902e-05, "loss": 0.0095, "step": 48930 }, { "epoch": 1.5132595660505657, "grad_norm": 0.12987497448921204, "learning_rate": 1.9243586573530323e-05, "loss": 0.0095, "step": 48960 }, { "epoch": 1.514186808431724, "grad_norm": 0.1651998907327652, "learning_rate": 1.9243122952339745e-05, "loss": 0.0097, "step": 48990 }, { "epoch": 1.5151140508128824, "grad_norm": 0.08935632556676865, "learning_rate": 1.9242659331149166e-05, "loss": 0.0097, "step": 49020 }, { "epoch": 1.516041293194041, "grad_norm": 0.12644417583942413, "learning_rate": 1.9242195709958584e-05, "loss": 0.0089, "step": 49050 }, { "epoch": 1.5169685355751994, "grad_norm": 0.12699303030967712, "learning_rate": 1.9241732088768006e-05, "loss": 0.0092, "step": 49080 }, { "epoch": 1.5178957779563578, "grad_norm": 0.15760111808776855, "learning_rate": 1.9241268467577424e-05, "loss": 0.0092, "step": 49110 }, { "epoch": 1.5188230203375164, "grad_norm": 0.15269140899181366, "learning_rate": 1.9240804846386846e-05, "loss": 0.0092, "step": 49140 }, { "epoch": 1.5197502627186745, "grad_norm": 0.09311418980360031, "learning_rate": 1.9240341225196267e-05, "loss": 0.0099, "step": 49170 }, { "epoch": 1.5206775050998331, "grad_norm": 0.10121689736843109, "learning_rate": 1.923987760400569e-05, "loss": 0.0097, "step": 49200 }, { "epoch": 1.5216047474809915, "grad_norm": 0.13810919225215912, "learning_rate": 1.923941398281511e-05, "loss": 0.0093, "step": 49230 }, { "epoch": 1.5225319898621499, "grad_norm": 0.15873965620994568, "learning_rate": 1.923895036162453e-05, "loss": 0.0098, "step": 49260 }, { "epoch": 1.5234592322433085, "grad_norm": 0.10924283415079117, "learning_rate": 1.923848674043395e-05, "loss": 0.0093, "step": 49290 }, { "epoch": 1.5243864746244669, "grad_norm": 0.1544143259525299, "learning_rate": 1.923802311924337e-05, "loss": 0.0092, "step": 49320 }, { "epoch": 1.5253137170056252, "grad_norm": 0.16904467344284058, "learning_rate": 1.9237559498052793e-05, "loss": 0.0096, "step": 49350 }, { "epoch": 1.5262409593867838, "grad_norm": 0.1343633532524109, "learning_rate": 1.9237095876862214e-05, "loss": 0.0094, "step": 49380 }, { "epoch": 1.527168201767942, "grad_norm": 0.130093514919281, "learning_rate": 1.9236632255671636e-05, "loss": 0.0099, "step": 49410 }, { "epoch": 1.5280954441491006, "grad_norm": 0.1455693244934082, "learning_rate": 1.9236168634481054e-05, "loss": 0.0101, "step": 49440 }, { "epoch": 1.529022686530259, "grad_norm": 0.11982125788927078, "learning_rate": 1.9235705013290475e-05, "loss": 0.0092, "step": 49470 }, { "epoch": 1.5299499289114173, "grad_norm": 0.1096549704670906, "learning_rate": 1.9235241392099897e-05, "loss": 0.0096, "step": 49500 }, { "epoch": 1.530877171292576, "grad_norm": 0.11243181675672531, "learning_rate": 1.9234777770909315e-05, "loss": 0.0093, "step": 49530 }, { "epoch": 1.5318044136737343, "grad_norm": 0.1448419690132141, "learning_rate": 1.9234314149718736e-05, "loss": 0.0091, "step": 49560 }, { "epoch": 1.5327316560548927, "grad_norm": 0.1255633533000946, "learning_rate": 1.9233850528528158e-05, "loss": 0.0096, "step": 49590 }, { "epoch": 1.5336588984360513, "grad_norm": 0.14078247547149658, "learning_rate": 1.923338690733758e-05, "loss": 0.0095, "step": 49620 }, { "epoch": 1.5345861408172095, "grad_norm": 0.1610621064901352, "learning_rate": 1.9232923286147e-05, "loss": 0.0097, "step": 49650 }, { "epoch": 1.535513383198368, "grad_norm": 0.11686121672391891, "learning_rate": 1.9232459664956422e-05, "loss": 0.01, "step": 49680 }, { "epoch": 1.5364406255795267, "grad_norm": 0.16634871065616608, "learning_rate": 1.923199604376584e-05, "loss": 0.0095, "step": 49710 }, { "epoch": 1.5373678679606848, "grad_norm": 0.14145205914974213, "learning_rate": 1.9231532422575262e-05, "loss": 0.0088, "step": 49740 }, { "epoch": 1.5382951103418434, "grad_norm": 0.17106138169765472, "learning_rate": 1.9231068801384684e-05, "loss": 0.0095, "step": 49770 }, { "epoch": 1.5392223527230018, "grad_norm": 0.16263146698474884, "learning_rate": 1.9230605180194105e-05, "loss": 0.0091, "step": 49800 }, { "epoch": 1.5401495951041602, "grad_norm": 0.14079351723194122, "learning_rate": 1.9230141559003527e-05, "loss": 0.0094, "step": 49830 }, { "epoch": 1.5410768374853188, "grad_norm": 0.1088135614991188, "learning_rate": 1.9229677937812945e-05, "loss": 0.0103, "step": 49860 }, { "epoch": 1.5420040798664771, "grad_norm": 0.09699127823114395, "learning_rate": 1.9229214316622366e-05, "loss": 0.0095, "step": 49890 }, { "epoch": 1.5429313222476355, "grad_norm": 0.10424819588661194, "learning_rate": 1.9228750695431788e-05, "loss": 0.0102, "step": 49920 }, { "epoch": 1.5438585646287941, "grad_norm": 0.10783172398805618, "learning_rate": 1.9228287074241206e-05, "loss": 0.0089, "step": 49950 }, { "epoch": 1.5447858070099523, "grad_norm": 0.21555323898792267, "learning_rate": 1.922782345305063e-05, "loss": 0.0085, "step": 49980 }, { "epoch": 1.5454039685973915, "eval_f1": 0.9969671947873243, "eval_loss": 0.009696862660348415, "eval_precision": 0.9969643640165787, "eval_recall": 0.9969722256007961, "eval_runtime": 4254.1518, "eval_samples_per_second": 277.658, "eval_steps_per_second": 8.677, "step": 50000 }, { "epoch": 1.5457130493911109, "grad_norm": 0.17913877964019775, "learning_rate": 1.9227359831860052e-05, "loss": 0.0092, "step": 50010 }, { "epoch": 1.5466402917722692, "grad_norm": 0.12794901430606842, "learning_rate": 1.922689621066947e-05, "loss": 0.0092, "step": 50040 }, { "epoch": 1.5475675341534276, "grad_norm": 0.1582464724779129, "learning_rate": 1.9226432589478892e-05, "loss": 0.0096, "step": 50070 }, { "epoch": 1.5484947765345862, "grad_norm": 0.17681871354579926, "learning_rate": 1.922596896828831e-05, "loss": 0.0099, "step": 50100 }, { "epoch": 1.5494220189157446, "grad_norm": 0.13899938762187958, "learning_rate": 1.922550534709773e-05, "loss": 0.0092, "step": 50130 }, { "epoch": 1.550349261296903, "grad_norm": 0.1638915240764618, "learning_rate": 1.9225041725907153e-05, "loss": 0.0097, "step": 50160 }, { "epoch": 1.5512765036780616, "grad_norm": 0.13840323686599731, "learning_rate": 1.9224578104716575e-05, "loss": 0.0094, "step": 50190 }, { "epoch": 1.5522037460592197, "grad_norm": 0.13591980934143066, "learning_rate": 1.9224114483525996e-05, "loss": 0.009, "step": 50220 }, { "epoch": 1.5531309884403783, "grad_norm": 0.1090109571814537, "learning_rate": 1.9223650862335418e-05, "loss": 0.0097, "step": 50250 }, { "epoch": 1.5540582308215367, "grad_norm": 0.18672215938568115, "learning_rate": 1.9223187241144836e-05, "loss": 0.0099, "step": 50280 }, { "epoch": 1.554985473202695, "grad_norm": 0.12414762377738953, "learning_rate": 1.9222723619954257e-05, "loss": 0.0093, "step": 50310 }, { "epoch": 1.5559127155838537, "grad_norm": 0.09658060222864151, "learning_rate": 1.922225999876368e-05, "loss": 0.0094, "step": 50340 }, { "epoch": 1.556839957965012, "grad_norm": 0.16103999316692352, "learning_rate": 1.92217963775731e-05, "loss": 0.0089, "step": 50370 }, { "epoch": 1.5577672003461704, "grad_norm": 0.1187373548746109, "learning_rate": 1.922133275638252e-05, "loss": 0.0095, "step": 50400 }, { "epoch": 1.558694442727329, "grad_norm": 0.16834302246570587, "learning_rate": 1.922086913519194e-05, "loss": 0.0095, "step": 50430 }, { "epoch": 1.5596216851084872, "grad_norm": 0.10655603557825089, "learning_rate": 1.922040551400136e-05, "loss": 0.0091, "step": 50460 }, { "epoch": 1.5605489274896458, "grad_norm": 0.1625603884458542, "learning_rate": 1.9219941892810783e-05, "loss": 0.0095, "step": 50490 }, { "epoch": 1.5614761698708042, "grad_norm": 0.1365915834903717, "learning_rate": 1.92194782716202e-05, "loss": 0.009, "step": 50520 }, { "epoch": 1.5624034122519626, "grad_norm": 0.15909062325954437, "learning_rate": 1.9219014650429622e-05, "loss": 0.0085, "step": 50550 }, { "epoch": 1.5633306546331212, "grad_norm": 0.16600386798381805, "learning_rate": 1.9218551029239044e-05, "loss": 0.0093, "step": 50580 }, { "epoch": 1.5642578970142795, "grad_norm": 0.07621962577104568, "learning_rate": 1.9218087408048465e-05, "loss": 0.0097, "step": 50610 }, { "epoch": 1.565185139395438, "grad_norm": 0.13161897659301758, "learning_rate": 1.9217623786857887e-05, "loss": 0.0098, "step": 50640 }, { "epoch": 1.5661123817765965, "grad_norm": 0.12562263011932373, "learning_rate": 1.921716016566731e-05, "loss": 0.0102, "step": 50670 }, { "epoch": 1.5670396241577549, "grad_norm": 0.11936542391777039, "learning_rate": 1.9216696544476727e-05, "loss": 0.01, "step": 50700 }, { "epoch": 1.5679668665389133, "grad_norm": 0.15751071274280548, "learning_rate": 1.9216232923286148e-05, "loss": 0.0096, "step": 50730 }, { "epoch": 1.5688941089200719, "grad_norm": 0.1469218134880066, "learning_rate": 1.921576930209557e-05, "loss": 0.0092, "step": 50760 }, { "epoch": 1.56982135130123, "grad_norm": 0.18954558670520782, "learning_rate": 1.921530568090499e-05, "loss": 0.0095, "step": 50790 }, { "epoch": 1.5707485936823886, "grad_norm": 0.12735134363174438, "learning_rate": 1.9214842059714413e-05, "loss": 0.0098, "step": 50820 }, { "epoch": 1.571675836063547, "grad_norm": 0.14252237975597382, "learning_rate": 1.921437843852383e-05, "loss": 0.0095, "step": 50850 }, { "epoch": 1.5726030784447054, "grad_norm": 0.13952960073947906, "learning_rate": 1.9213914817333252e-05, "loss": 0.009, "step": 50880 }, { "epoch": 1.573530320825864, "grad_norm": 0.11086856573820114, "learning_rate": 1.9213451196142674e-05, "loss": 0.0093, "step": 50910 }, { "epoch": 1.5744575632070223, "grad_norm": 0.14413772523403168, "learning_rate": 1.9212987574952092e-05, "loss": 0.0094, "step": 50940 }, { "epoch": 1.5753848055881807, "grad_norm": 0.18182632327079773, "learning_rate": 1.9212523953761513e-05, "loss": 0.0107, "step": 50970 }, { "epoch": 1.5763120479693393, "grad_norm": 0.21864250302314758, "learning_rate": 1.9212060332570935e-05, "loss": 0.0088, "step": 51000 }, { "epoch": 1.5772392903504975, "grad_norm": 0.14293724298477173, "learning_rate": 1.9211596711380356e-05, "loss": 0.0096, "step": 51030 }, { "epoch": 1.578166532731656, "grad_norm": 0.19166423380374908, "learning_rate": 1.9211133090189778e-05, "loss": 0.0101, "step": 51060 }, { "epoch": 1.5790937751128145, "grad_norm": 0.15941683948040009, "learning_rate": 1.92106694689992e-05, "loss": 0.01, "step": 51090 }, { "epoch": 1.5800210174939728, "grad_norm": 0.12575973570346832, "learning_rate": 1.9210205847808617e-05, "loss": 0.0093, "step": 51120 }, { "epoch": 1.5809482598751314, "grad_norm": 0.3184037208557129, "learning_rate": 1.920974222661804e-05, "loss": 0.0092, "step": 51150 }, { "epoch": 1.5818755022562898, "grad_norm": 0.14921867847442627, "learning_rate": 1.920927860542746e-05, "loss": 0.0096, "step": 51180 }, { "epoch": 1.5828027446374482, "grad_norm": 0.1514192372560501, "learning_rate": 1.9208814984236882e-05, "loss": 0.0097, "step": 51210 }, { "epoch": 1.5837299870186068, "grad_norm": 0.17328055202960968, "learning_rate": 1.9208351363046304e-05, "loss": 0.0091, "step": 51240 }, { "epoch": 1.584657229399765, "grad_norm": 0.15126366913318634, "learning_rate": 1.920788774185572e-05, "loss": 0.0093, "step": 51270 }, { "epoch": 1.5855844717809235, "grad_norm": 0.08810719102621078, "learning_rate": 1.9207424120665143e-05, "loss": 0.0092, "step": 51300 }, { "epoch": 1.586511714162082, "grad_norm": 0.11617647111415863, "learning_rate": 1.9206960499474565e-05, "loss": 0.0094, "step": 51330 }, { "epoch": 1.5874389565432403, "grad_norm": 0.12962676584720612, "learning_rate": 1.9206496878283983e-05, "loss": 0.0087, "step": 51360 }, { "epoch": 1.588366198924399, "grad_norm": 0.11993799358606339, "learning_rate": 1.9206033257093408e-05, "loss": 0.0098, "step": 51390 }, { "epoch": 1.5892934413055573, "grad_norm": 0.09296266734600067, "learning_rate": 1.920556963590283e-05, "loss": 0.0087, "step": 51420 }, { "epoch": 1.5902206836867157, "grad_norm": 0.18137672543525696, "learning_rate": 1.9205106014712247e-05, "loss": 0.0096, "step": 51450 }, { "epoch": 1.5911479260678743, "grad_norm": 0.10692484676837921, "learning_rate": 1.920464239352167e-05, "loss": 0.0094, "step": 51480 }, { "epoch": 1.5920751684490326, "grad_norm": 0.11979269981384277, "learning_rate": 1.9204194226370777e-05, "loss": 0.0084, "step": 51510 }, { "epoch": 1.593002410830191, "grad_norm": 0.16203047335147858, "learning_rate": 1.9203730605180195e-05, "loss": 0.0097, "step": 51540 }, { "epoch": 1.5939296532113496, "grad_norm": 0.13626447319984436, "learning_rate": 1.9203266983989616e-05, "loss": 0.0098, "step": 51570 }, { "epoch": 1.5948568955925078, "grad_norm": 0.14116331934928894, "learning_rate": 1.9202803362799038e-05, "loss": 0.0089, "step": 51600 }, { "epoch": 1.5957841379736664, "grad_norm": 0.1254875659942627, "learning_rate": 1.920233974160846e-05, "loss": 0.0094, "step": 51630 }, { "epoch": 1.5967113803548247, "grad_norm": 0.08816734701395035, "learning_rate": 1.920187612041788e-05, "loss": 0.0085, "step": 51660 }, { "epoch": 1.5976386227359831, "grad_norm": 0.12914961576461792, "learning_rate": 1.92014124992273e-05, "loss": 0.0094, "step": 51690 }, { "epoch": 1.5985658651171417, "grad_norm": 0.1434967964887619, "learning_rate": 1.920094887803672e-05, "loss": 0.0086, "step": 51720 }, { "epoch": 1.5994931074983, "grad_norm": 0.140420064330101, "learning_rate": 1.9200485256846142e-05, "loss": 0.0094, "step": 51750 }, { "epoch": 1.6004203498794585, "grad_norm": 0.13500522077083588, "learning_rate": 1.920002163565556e-05, "loss": 0.0099, "step": 51780 }, { "epoch": 1.601347592260617, "grad_norm": 0.13464060425758362, "learning_rate": 1.919955801446498e-05, "loss": 0.0094, "step": 51810 }, { "epoch": 1.6022748346417752, "grad_norm": 0.11276684701442719, "learning_rate": 1.9199094393274403e-05, "loss": 0.0094, "step": 51840 }, { "epoch": 1.6032020770229338, "grad_norm": 0.11805001646280289, "learning_rate": 1.9198630772083825e-05, "loss": 0.0094, "step": 51870 }, { "epoch": 1.6041293194040922, "grad_norm": 0.16129083931446075, "learning_rate": 1.9198167150893246e-05, "loss": 0.0093, "step": 51900 }, { "epoch": 1.6050565617852506, "grad_norm": 0.11470159143209457, "learning_rate": 1.9197703529702664e-05, "loss": 0.0089, "step": 51930 }, { "epoch": 1.6059838041664092, "grad_norm": 0.2109963595867157, "learning_rate": 1.9197239908512086e-05, "loss": 0.0098, "step": 51960 }, { "epoch": 1.6069110465475676, "grad_norm": 0.14319279789924622, "learning_rate": 1.9196776287321507e-05, "loss": 0.01, "step": 51990 }, { "epoch": 1.607838288928726, "grad_norm": 0.13672903180122375, "learning_rate": 1.919631266613093e-05, "loss": 0.0084, "step": 52020 }, { "epoch": 1.6087655313098845, "grad_norm": 0.16045495867729187, "learning_rate": 1.919584904494035e-05, "loss": 0.0089, "step": 52050 }, { "epoch": 1.6096927736910427, "grad_norm": 0.1417803019285202, "learning_rate": 1.9195385423749772e-05, "loss": 0.0091, "step": 52080 }, { "epoch": 1.6106200160722013, "grad_norm": 0.13423936069011688, "learning_rate": 1.919492180255919e-05, "loss": 0.0092, "step": 52110 }, { "epoch": 1.6115472584533597, "grad_norm": 0.13794130086898804, "learning_rate": 1.919445818136861e-05, "loss": 0.009, "step": 52140 }, { "epoch": 1.612474500834518, "grad_norm": 0.1327725350856781, "learning_rate": 1.9193994560178033e-05, "loss": 0.0101, "step": 52170 }, { "epoch": 1.6134017432156766, "grad_norm": 0.12194669246673584, "learning_rate": 1.919353093898745e-05, "loss": 0.0089, "step": 52200 }, { "epoch": 1.614328985596835, "grad_norm": 0.13510173559188843, "learning_rate": 1.9193067317796873e-05, "loss": 0.0093, "step": 52230 }, { "epoch": 1.6152562279779934, "grad_norm": 0.11988970637321472, "learning_rate": 1.9192603696606294e-05, "loss": 0.0091, "step": 52260 }, { "epoch": 1.616183470359152, "grad_norm": 0.1110687255859375, "learning_rate": 1.9192140075415716e-05, "loss": 0.0101, "step": 52290 }, { "epoch": 1.6171107127403102, "grad_norm": 0.14389757812023163, "learning_rate": 1.9191676454225137e-05, "loss": 0.0092, "step": 52320 }, { "epoch": 1.6180379551214688, "grad_norm": 0.15718917548656464, "learning_rate": 1.9191212833034555e-05, "loss": 0.0091, "step": 52350 }, { "epoch": 1.6189651975026274, "grad_norm": 0.13931840658187866, "learning_rate": 1.9190749211843977e-05, "loss": 0.0094, "step": 52380 }, { "epoch": 1.6198924398837855, "grad_norm": 0.13862594962120056, "learning_rate": 1.9190285590653398e-05, "loss": 0.0092, "step": 52410 }, { "epoch": 1.620819682264944, "grad_norm": 0.13352888822555542, "learning_rate": 1.918982196946282e-05, "loss": 0.0096, "step": 52440 }, { "epoch": 1.6217469246461025, "grad_norm": 0.1035979762673378, "learning_rate": 1.918935834827224e-05, "loss": 0.0087, "step": 52470 }, { "epoch": 1.6226741670272609, "grad_norm": 0.16948071122169495, "learning_rate": 1.9188894727081663e-05, "loss": 0.01, "step": 52500 }, { "epoch": 1.6236014094084195, "grad_norm": 0.13679096102714539, "learning_rate": 1.918843110589108e-05, "loss": 0.0099, "step": 52530 }, { "epoch": 1.6245286517895778, "grad_norm": 0.1796470731496811, "learning_rate": 1.9187967484700502e-05, "loss": 0.0094, "step": 52560 }, { "epoch": 1.6254558941707362, "grad_norm": 0.14241860806941986, "learning_rate": 1.918750386350992e-05, "loss": 0.0088, "step": 52590 }, { "epoch": 1.6263831365518948, "grad_norm": 0.13280513882637024, "learning_rate": 1.918705569635903e-05, "loss": 0.009, "step": 52620 }, { "epoch": 1.627310378933053, "grad_norm": 0.10208094120025635, "learning_rate": 1.918659207516845e-05, "loss": 0.0099, "step": 52650 }, { "epoch": 1.6282376213142116, "grad_norm": 0.1282786726951599, "learning_rate": 1.918612845397787e-05, "loss": 0.0093, "step": 52680 }, { "epoch": 1.62916486369537, "grad_norm": 0.09611595422029495, "learning_rate": 1.9185664832787293e-05, "loss": 0.0097, "step": 52710 }, { "epoch": 1.6300921060765283, "grad_norm": 0.15586107969284058, "learning_rate": 1.9185201211596714e-05, "loss": 0.0094, "step": 52740 }, { "epoch": 1.631019348457687, "grad_norm": 0.17779968678951263, "learning_rate": 1.9184737590406132e-05, "loss": 0.0094, "step": 52770 }, { "epoch": 1.6319465908388453, "grad_norm": 0.14495053887367249, "learning_rate": 1.9184273969215554e-05, "loss": 0.0091, "step": 52800 }, { "epoch": 1.6328738332200037, "grad_norm": 0.13374333083629608, "learning_rate": 1.9183810348024975e-05, "loss": 0.01, "step": 52830 }, { "epoch": 1.6338010756011623, "grad_norm": 0.12610729038715363, "learning_rate": 1.9183346726834397e-05, "loss": 0.0091, "step": 52860 }, { "epoch": 1.6347283179823204, "grad_norm": 0.10151669383049011, "learning_rate": 1.918288310564382e-05, "loss": 0.0102, "step": 52890 }, { "epoch": 1.635655560363479, "grad_norm": 0.1059807762503624, "learning_rate": 1.918241948445324e-05, "loss": 0.0095, "step": 52920 }, { "epoch": 1.6365828027446374, "grad_norm": 0.15262019634246826, "learning_rate": 1.9181955863262658e-05, "loss": 0.0102, "step": 52950 }, { "epoch": 1.6375100451257958, "grad_norm": 0.10901451110839844, "learning_rate": 1.918149224207208e-05, "loss": 0.0095, "step": 52980 }, { "epoch": 1.6384372875069544, "grad_norm": 0.17048171162605286, "learning_rate": 1.91810286208815e-05, "loss": 0.0096, "step": 53010 }, { "epoch": 1.6393645298881128, "grad_norm": 0.09015143662691116, "learning_rate": 1.918056499969092e-05, "loss": 0.0094, "step": 53040 }, { "epoch": 1.6402917722692711, "grad_norm": 0.13316532969474792, "learning_rate": 1.918010137850034e-05, "loss": 0.0093, "step": 53070 }, { "epoch": 1.6412190146504297, "grad_norm": 0.15543310344219208, "learning_rate": 1.9179637757309762e-05, "loss": 0.0099, "step": 53100 }, { "epoch": 1.642146257031588, "grad_norm": 0.1851482391357422, "learning_rate": 1.9179174136119184e-05, "loss": 0.0094, "step": 53130 }, { "epoch": 1.6430734994127465, "grad_norm": 0.13724984228610992, "learning_rate": 1.9178710514928605e-05, "loss": 0.0089, "step": 53160 }, { "epoch": 1.6440007417939049, "grad_norm": 0.12769708037376404, "learning_rate": 1.9178246893738023e-05, "loss": 0.0096, "step": 53190 }, { "epoch": 1.6449279841750633, "grad_norm": 0.1660904735326767, "learning_rate": 1.9177783272547445e-05, "loss": 0.0104, "step": 53220 }, { "epoch": 1.6458552265562219, "grad_norm": 0.1161818578839302, "learning_rate": 1.9177319651356866e-05, "loss": 0.0088, "step": 53250 }, { "epoch": 1.6467824689373802, "grad_norm": 0.11766833811998367, "learning_rate": 1.9176856030166288e-05, "loss": 0.0097, "step": 53280 }, { "epoch": 1.6477097113185386, "grad_norm": 0.10271690040826797, "learning_rate": 1.917639240897571e-05, "loss": 0.0097, "step": 53310 }, { "epoch": 1.6486369536996972, "grad_norm": 0.14822208881378174, "learning_rate": 1.917592878778513e-05, "loss": 0.0094, "step": 53340 }, { "epoch": 1.6495641960808556, "grad_norm": 0.1488986611366272, "learning_rate": 1.917546516659455e-05, "loss": 0.0098, "step": 53370 }, { "epoch": 1.650491438462014, "grad_norm": 0.18712139129638672, "learning_rate": 1.917500154540397e-05, "loss": 0.0093, "step": 53400 }, { "epoch": 1.6514186808431726, "grad_norm": 0.09408722072839737, "learning_rate": 1.917453792421339e-05, "loss": 0.0088, "step": 53430 }, { "epoch": 1.6523459232243307, "grad_norm": 0.13388709723949432, "learning_rate": 1.917407430302281e-05, "loss": 0.0103, "step": 53460 }, { "epoch": 1.6532731656054893, "grad_norm": 0.150857076048851, "learning_rate": 1.917361068183223e-05, "loss": 0.0094, "step": 53490 }, { "epoch": 1.6542004079866477, "grad_norm": 0.10530157387256622, "learning_rate": 1.9173147060641653e-05, "loss": 0.0095, "step": 53520 }, { "epoch": 1.655127650367806, "grad_norm": 0.09941335767507553, "learning_rate": 1.9172683439451075e-05, "loss": 0.0101, "step": 53550 }, { "epoch": 1.6560548927489647, "grad_norm": 0.14865027368068695, "learning_rate": 1.9172219818260496e-05, "loss": 0.009, "step": 53580 }, { "epoch": 1.656982135130123, "grad_norm": 0.1467655450105667, "learning_rate": 1.9171756197069914e-05, "loss": 0.009, "step": 53610 }, { "epoch": 1.6579093775112814, "grad_norm": 0.08962097764015198, "learning_rate": 1.9171292575879336e-05, "loss": 0.0096, "step": 53640 }, { "epoch": 1.65883661989244, "grad_norm": 0.13861003518104553, "learning_rate": 1.9170828954688757e-05, "loss": 0.0098, "step": 53670 }, { "epoch": 1.6597638622735982, "grad_norm": 0.16868984699249268, "learning_rate": 1.917036533349818e-05, "loss": 0.009, "step": 53700 }, { "epoch": 1.6606911046547568, "grad_norm": 0.1342468410730362, "learning_rate": 1.91699017123076e-05, "loss": 0.0097, "step": 53730 }, { "epoch": 1.6616183470359152, "grad_norm": 0.1119002252817154, "learning_rate": 1.916943809111702e-05, "loss": 0.0097, "step": 53760 }, { "epoch": 1.6625455894170735, "grad_norm": 0.14368779957294464, "learning_rate": 1.916897446992644e-05, "loss": 0.0096, "step": 53790 }, { "epoch": 1.6634728317982321, "grad_norm": 0.1421126127243042, "learning_rate": 1.916851084873586e-05, "loss": 0.0099, "step": 53820 }, { "epoch": 1.6644000741793905, "grad_norm": 0.10994177311658859, "learning_rate": 1.916804722754528e-05, "loss": 0.009, "step": 53850 }, { "epoch": 1.6653273165605489, "grad_norm": 0.10876211524009705, "learning_rate": 1.91675836063547e-05, "loss": 0.0099, "step": 53880 }, { "epoch": 1.6662545589417075, "grad_norm": 0.16765227913856506, "learning_rate": 1.9167119985164126e-05, "loss": 0.01, "step": 53910 }, { "epoch": 1.6671818013228656, "grad_norm": 0.170677050948143, "learning_rate": 1.9166656363973544e-05, "loss": 0.0096, "step": 53940 }, { "epoch": 1.6681090437040242, "grad_norm": 0.19664275646209717, "learning_rate": 1.9166192742782966e-05, "loss": 0.0093, "step": 53970 }, { "epoch": 1.6690362860851826, "grad_norm": 0.1395527571439743, "learning_rate": 1.9165729121592387e-05, "loss": 0.0095, "step": 54000 }, { "epoch": 1.669963528466341, "grad_norm": 0.13767150044441223, "learning_rate": 1.9165265500401805e-05, "loss": 0.0101, "step": 54030 }, { "epoch": 1.6708907708474996, "grad_norm": 0.13334769010543823, "learning_rate": 1.9164801879211227e-05, "loss": 0.0094, "step": 54060 }, { "epoch": 1.671818013228658, "grad_norm": 0.12909670174121857, "learning_rate": 1.9164338258020648e-05, "loss": 0.0097, "step": 54090 }, { "epoch": 1.6727452556098164, "grad_norm": 0.11720459908246994, "learning_rate": 1.916387463683007e-05, "loss": 0.009, "step": 54120 }, { "epoch": 1.673672497990975, "grad_norm": 0.24290640652179718, "learning_rate": 1.916341101563949e-05, "loss": 0.0087, "step": 54150 }, { "epoch": 1.6745997403721333, "grad_norm": 0.12326133251190186, "learning_rate": 1.916294739444891e-05, "loss": 0.0095, "step": 54180 }, { "epoch": 1.6755269827532917, "grad_norm": 0.1361721009016037, "learning_rate": 1.916248377325833e-05, "loss": 0.0099, "step": 54210 }, { "epoch": 1.6764542251344503, "grad_norm": 0.13287442922592163, "learning_rate": 1.9162020152067752e-05, "loss": 0.0094, "step": 54240 }, { "epoch": 1.6773814675156085, "grad_norm": 0.11923348903656006, "learning_rate": 1.9161556530877174e-05, "loss": 0.0095, "step": 54270 }, { "epoch": 1.678308709896767, "grad_norm": 0.15064279735088348, "learning_rate": 1.9161092909686595e-05, "loss": 0.009, "step": 54300 }, { "epoch": 1.6792359522779254, "grad_norm": 0.17028802633285522, "learning_rate": 1.9160629288496017e-05, "loss": 0.0095, "step": 54330 }, { "epoch": 1.6801631946590838, "grad_norm": 0.11273153126239777, "learning_rate": 1.9160165667305435e-05, "loss": 0.0091, "step": 54360 }, { "epoch": 1.6810904370402424, "grad_norm": 0.10572308301925659, "learning_rate": 1.9159702046114857e-05, "loss": 0.0102, "step": 54390 }, { "epoch": 1.6820176794214008, "grad_norm": 0.12953117489814758, "learning_rate": 1.9159238424924275e-05, "loss": 0.0095, "step": 54420 }, { "epoch": 1.6829449218025592, "grad_norm": 0.10780920088291168, "learning_rate": 1.9158774803733696e-05, "loss": 0.0092, "step": 54450 }, { "epoch": 1.6838721641837178, "grad_norm": 0.11831291019916534, "learning_rate": 1.9158311182543118e-05, "loss": 0.0102, "step": 54480 }, { "epoch": 1.684799406564876, "grad_norm": 0.13420821726322174, "learning_rate": 1.915784756135254e-05, "loss": 0.0088, "step": 54510 }, { "epoch": 1.6857266489460345, "grad_norm": 0.19302931427955627, "learning_rate": 1.915738394016196e-05, "loss": 0.0094, "step": 54540 }, { "epoch": 1.686653891327193, "grad_norm": 0.19178350269794464, "learning_rate": 1.9156920318971382e-05, "loss": 0.0093, "step": 54570 }, { "epoch": 1.6875811337083513, "grad_norm": 0.10883525013923645, "learning_rate": 1.91564566977808e-05, "loss": 0.0096, "step": 54600 }, { "epoch": 1.6885083760895099, "grad_norm": 0.16872438788414001, "learning_rate": 1.9155993076590222e-05, "loss": 0.0098, "step": 54630 }, { "epoch": 1.6894356184706683, "grad_norm": 0.20986542105674744, "learning_rate": 1.9155529455399643e-05, "loss": 0.0091, "step": 54660 }, { "epoch": 1.6903628608518266, "grad_norm": 0.1171252429485321, "learning_rate": 1.9155065834209065e-05, "loss": 0.0097, "step": 54690 }, { "epoch": 1.6912901032329852, "grad_norm": 0.17706051468849182, "learning_rate": 1.9154602213018486e-05, "loss": 0.0109, "step": 54720 }, { "epoch": 1.6922173456141434, "grad_norm": 0.14497560262680054, "learning_rate": 1.9154138591827904e-05, "loss": 0.0096, "step": 54750 }, { "epoch": 1.693144587995302, "grad_norm": 0.18789975345134735, "learning_rate": 1.9153674970637326e-05, "loss": 0.0094, "step": 54780 }, { "epoch": 1.6940718303764604, "grad_norm": 0.1284540295600891, "learning_rate": 1.9153211349446747e-05, "loss": 0.0095, "step": 54810 }, { "epoch": 1.6949990727576187, "grad_norm": 0.16070914268493652, "learning_rate": 1.9152747728256166e-05, "loss": 0.0102, "step": 54840 }, { "epoch": 1.6959263151387773, "grad_norm": 0.15256452560424805, "learning_rate": 1.9152284107065587e-05, "loss": 0.0094, "step": 54870 }, { "epoch": 1.6968535575199357, "grad_norm": 0.12167240679264069, "learning_rate": 1.915182048587501e-05, "loss": 0.009, "step": 54900 }, { "epoch": 1.697780799901094, "grad_norm": 0.10163354873657227, "learning_rate": 1.915135686468443e-05, "loss": 0.0096, "step": 54930 }, { "epoch": 1.6987080422822527, "grad_norm": 0.16005441546440125, "learning_rate": 1.915089324349385e-05, "loss": 0.0093, "step": 54960 }, { "epoch": 1.6996352846634109, "grad_norm": 0.1471225768327713, "learning_rate": 1.9150429622303273e-05, "loss": 0.0094, "step": 54990 }, { "epoch": 1.7005625270445694, "grad_norm": 0.1488265097141266, "learning_rate": 1.914996600111269e-05, "loss": 0.0098, "step": 55020 }, { "epoch": 1.701489769425728, "grad_norm": 0.16626474261283875, "learning_rate": 1.9149502379922113e-05, "loss": 0.01, "step": 55050 }, { "epoch": 1.7024170118068862, "grad_norm": 0.12388648837804794, "learning_rate": 1.9149038758731534e-05, "loss": 0.0092, "step": 55080 }, { "epoch": 1.7033442541880448, "grad_norm": 0.1475176364183426, "learning_rate": 1.9148575137540956e-05, "loss": 0.0093, "step": 55110 }, { "epoch": 1.7042714965692032, "grad_norm": 0.12750722467899323, "learning_rate": 1.9148111516350377e-05, "loss": 0.0099, "step": 55140 }, { "epoch": 1.7051987389503616, "grad_norm": 0.16431891918182373, "learning_rate": 1.9147647895159795e-05, "loss": 0.0091, "step": 55170 }, { "epoch": 1.7061259813315202, "grad_norm": 0.12907744944095612, "learning_rate": 1.9147184273969217e-05, "loss": 0.0094, "step": 55200 }, { "epoch": 1.7070532237126785, "grad_norm": 0.06637873500585556, "learning_rate": 1.914672065277864e-05, "loss": 0.0091, "step": 55230 }, { "epoch": 1.707980466093837, "grad_norm": 0.19886338710784912, "learning_rate": 1.9146257031588056e-05, "loss": 0.0091, "step": 55260 }, { "epoch": 1.7089077084749955, "grad_norm": 0.14564409852027893, "learning_rate": 1.9145793410397478e-05, "loss": 0.0089, "step": 55290 }, { "epoch": 1.7098349508561537, "grad_norm": 0.11837296932935715, "learning_rate": 1.9145329789206903e-05, "loss": 0.0094, "step": 55320 }, { "epoch": 1.7107621932373123, "grad_norm": 0.16335999965667725, "learning_rate": 1.914486616801632e-05, "loss": 0.0094, "step": 55350 }, { "epoch": 1.7116894356184706, "grad_norm": 0.1536480337381363, "learning_rate": 1.9144402546825743e-05, "loss": 0.0102, "step": 55380 }, { "epoch": 1.712616677999629, "grad_norm": 0.11580019444227219, "learning_rate": 1.914393892563516e-05, "loss": 0.0099, "step": 55410 }, { "epoch": 1.7135439203807876, "grad_norm": 0.1549271047115326, "learning_rate": 1.9143475304444582e-05, "loss": 0.01, "step": 55440 }, { "epoch": 1.714471162761946, "grad_norm": 0.1688505858182907, "learning_rate": 1.9143011683254004e-05, "loss": 0.0091, "step": 55470 }, { "epoch": 1.7153984051431044, "grad_norm": 0.15756696462631226, "learning_rate": 1.9142548062063425e-05, "loss": 0.0096, "step": 55500 }, { "epoch": 1.716325647524263, "grad_norm": 0.10049308091402054, "learning_rate": 1.9142084440872847e-05, "loss": 0.0088, "step": 55530 }, { "epoch": 1.7172528899054211, "grad_norm": 0.32078489661216736, "learning_rate": 1.9141620819682268e-05, "loss": 0.0099, "step": 55560 }, { "epoch": 1.7181801322865797, "grad_norm": 0.10990991443395615, "learning_rate": 1.9141157198491686e-05, "loss": 0.0097, "step": 55590 }, { "epoch": 1.719107374667738, "grad_norm": 0.10550989955663681, "learning_rate": 1.9140693577301108e-05, "loss": 0.0085, "step": 55620 }, { "epoch": 1.7200346170488965, "grad_norm": 0.12818893790245056, "learning_rate": 1.914022995611053e-05, "loss": 0.0089, "step": 55650 }, { "epoch": 1.720961859430055, "grad_norm": 0.16319526731967926, "learning_rate": 1.913976633491995e-05, "loss": 0.0095, "step": 55680 }, { "epoch": 1.7218891018112135, "grad_norm": 0.1432582288980484, "learning_rate": 1.9139302713729372e-05, "loss": 0.0092, "step": 55710 }, { "epoch": 1.7228163441923718, "grad_norm": 0.14118653535842896, "learning_rate": 1.913883909253879e-05, "loss": 0.0096, "step": 55740 }, { "epoch": 1.7237435865735304, "grad_norm": 0.1348247528076172, "learning_rate": 1.9138375471348212e-05, "loss": 0.0099, "step": 55770 }, { "epoch": 1.7246708289546886, "grad_norm": 0.12489266693592072, "learning_rate": 1.9137911850157633e-05, "loss": 0.0081, "step": 55800 }, { "epoch": 1.7255980713358472, "grad_norm": 0.12218639254570007, "learning_rate": 1.913746368300674e-05, "loss": 0.0092, "step": 55830 }, { "epoch": 1.7265253137170056, "grad_norm": 0.1989259123802185, "learning_rate": 1.913700006181616e-05, "loss": 0.0089, "step": 55860 }, { "epoch": 1.727452556098164, "grad_norm": 0.1484067440032959, "learning_rate": 1.913653644062558e-05, "loss": 0.0095, "step": 55890 }, { "epoch": 1.7283797984793225, "grad_norm": 0.1592060774564743, "learning_rate": 1.9136072819435002e-05, "loss": 0.0101, "step": 55920 }, { "epoch": 1.729307040860481, "grad_norm": 0.15433984994888306, "learning_rate": 1.9135609198244424e-05, "loss": 0.0096, "step": 55950 }, { "epoch": 1.7302342832416393, "grad_norm": 0.12774841487407684, "learning_rate": 1.9135145577053845e-05, "loss": 0.0097, "step": 55980 }, { "epoch": 1.731161525622798, "grad_norm": 0.18127135932445526, "learning_rate": 1.9134681955863264e-05, "loss": 0.0099, "step": 56010 }, { "epoch": 1.7320887680039563, "grad_norm": 0.11238892376422882, "learning_rate": 1.9134218334672685e-05, "loss": 0.0088, "step": 56040 }, { "epoch": 1.7330160103851147, "grad_norm": 0.12405003607273102, "learning_rate": 1.9133754713482107e-05, "loss": 0.0099, "step": 56070 }, { "epoch": 1.7339432527662733, "grad_norm": 0.13921202719211578, "learning_rate": 1.9133291092291525e-05, "loss": 0.01, "step": 56100 }, { "epoch": 1.7348704951474314, "grad_norm": 0.19013284146785736, "learning_rate": 1.9132827471100946e-05, "loss": 0.0091, "step": 56130 }, { "epoch": 1.73579773752859, "grad_norm": 0.12127307057380676, "learning_rate": 1.9132363849910368e-05, "loss": 0.0093, "step": 56160 }, { "epoch": 1.7367249799097484, "grad_norm": 0.16938094794750214, "learning_rate": 1.913190022871979e-05, "loss": 0.0089, "step": 56190 }, { "epoch": 1.7376522222909068, "grad_norm": 0.15857680141925812, "learning_rate": 1.913143660752921e-05, "loss": 0.0097, "step": 56220 }, { "epoch": 1.7385794646720654, "grad_norm": 0.11882348358631134, "learning_rate": 1.913097298633863e-05, "loss": 0.0088, "step": 56250 }, { "epoch": 1.7395067070532237, "grad_norm": 0.15856103599071503, "learning_rate": 1.913050936514805e-05, "loss": 0.01, "step": 56280 }, { "epoch": 1.7404339494343821, "grad_norm": 0.20786434412002563, "learning_rate": 1.9130045743957472e-05, "loss": 0.0099, "step": 56310 }, { "epoch": 1.7413611918155407, "grad_norm": 0.14965207874774933, "learning_rate": 1.9129582122766893e-05, "loss": 0.0088, "step": 56340 }, { "epoch": 1.7422884341966989, "grad_norm": 0.08363660424947739, "learning_rate": 1.9129118501576315e-05, "loss": 0.0091, "step": 56370 }, { "epoch": 1.7432156765778575, "grad_norm": 0.1501367837190628, "learning_rate": 1.9128654880385736e-05, "loss": 0.0092, "step": 56400 }, { "epoch": 1.7441429189590159, "grad_norm": 0.11382821947336197, "learning_rate": 1.9128191259195155e-05, "loss": 0.0092, "step": 56430 }, { "epoch": 1.7450701613401742, "grad_norm": 0.1429881453514099, "learning_rate": 1.9127727638004576e-05, "loss": 0.0092, "step": 56460 }, { "epoch": 1.7459974037213328, "grad_norm": 0.14989063143730164, "learning_rate": 1.9127264016813998e-05, "loss": 0.0089, "step": 56490 }, { "epoch": 1.7469246461024912, "grad_norm": 0.0941828116774559, "learning_rate": 1.9126800395623416e-05, "loss": 0.0101, "step": 56520 }, { "epoch": 1.7478518884836496, "grad_norm": 0.17130373418331146, "learning_rate": 1.9126336774432837e-05, "loss": 0.0092, "step": 56550 }, { "epoch": 1.7487791308648082, "grad_norm": 0.1459270715713501, "learning_rate": 1.912587315324226e-05, "loss": 0.009, "step": 56580 }, { "epoch": 1.7497063732459663, "grad_norm": 0.17441968619823456, "learning_rate": 1.912540953205168e-05, "loss": 0.0092, "step": 56610 }, { "epoch": 1.750633615627125, "grad_norm": 0.1702623963356018, "learning_rate": 1.91249459108611e-05, "loss": 0.0093, "step": 56640 }, { "epoch": 1.7515608580082833, "grad_norm": 0.1644851267337799, "learning_rate": 1.912448228967052e-05, "loss": 0.0098, "step": 56670 }, { "epoch": 1.7524881003894417, "grad_norm": 0.1811038702726364, "learning_rate": 1.912401866847994e-05, "loss": 0.0097, "step": 56700 }, { "epoch": 1.7534153427706003, "grad_norm": 0.12836286425590515, "learning_rate": 1.9123555047289363e-05, "loss": 0.0096, "step": 56730 }, { "epoch": 1.7543425851517587, "grad_norm": 0.1694982498884201, "learning_rate": 1.9123091426098784e-05, "loss": 0.0099, "step": 56760 }, { "epoch": 1.755269827532917, "grad_norm": 0.11523500829935074, "learning_rate": 1.9122627804908206e-05, "loss": 0.0096, "step": 56790 }, { "epoch": 1.7561970699140756, "grad_norm": 0.12877310812473297, "learning_rate": 1.9122164183717627e-05, "loss": 0.0094, "step": 56820 }, { "epoch": 1.757124312295234, "grad_norm": 0.14352649450302124, "learning_rate": 1.9121700562527045e-05, "loss": 0.0092, "step": 56850 }, { "epoch": 1.7580515546763924, "grad_norm": 0.1453847736120224, "learning_rate": 1.9121236941336467e-05, "loss": 0.009, "step": 56880 }, { "epoch": 1.758978797057551, "grad_norm": 0.1685023009777069, "learning_rate": 1.9120773320145885e-05, "loss": 0.0093, "step": 56910 }, { "epoch": 1.7599060394387092, "grad_norm": 0.11498740315437317, "learning_rate": 1.912030969895531e-05, "loss": 0.0092, "step": 56940 }, { "epoch": 1.7608332818198678, "grad_norm": 0.11391297727823257, "learning_rate": 1.911984607776473e-05, "loss": 0.0094, "step": 56970 }, { "epoch": 1.7617605242010261, "grad_norm": 0.14003457129001617, "learning_rate": 1.911938245657415e-05, "loss": 0.0102, "step": 57000 }, { "epoch": 1.7626877665821845, "grad_norm": 0.1557619869709015, "learning_rate": 1.911891883538357e-05, "loss": 0.0099, "step": 57030 }, { "epoch": 1.7636150089633431, "grad_norm": 0.10675251483917236, "learning_rate": 1.9118455214192993e-05, "loss": 0.0096, "step": 57060 }, { "epoch": 1.7645422513445015, "grad_norm": 0.14731018245220184, "learning_rate": 1.911799159300241e-05, "loss": 0.0103, "step": 57090 }, { "epoch": 1.7654694937256599, "grad_norm": 0.1403256207704544, "learning_rate": 1.9117527971811832e-05, "loss": 0.0097, "step": 57120 }, { "epoch": 1.7663967361068185, "grad_norm": 0.08880030363798141, "learning_rate": 1.9117064350621254e-05, "loss": 0.009, "step": 57150 }, { "epoch": 1.7673239784879766, "grad_norm": 0.13318701088428497, "learning_rate": 1.9116600729430675e-05, "loss": 0.0091, "step": 57180 }, { "epoch": 1.7682512208691352, "grad_norm": 0.17154277861118317, "learning_rate": 1.9116137108240097e-05, "loss": 0.0101, "step": 57210 }, { "epoch": 1.7691784632502936, "grad_norm": 0.10519508272409439, "learning_rate": 1.9115673487049515e-05, "loss": 0.0093, "step": 57240 }, { "epoch": 1.770105705631452, "grad_norm": 0.12178880721330643, "learning_rate": 1.9115209865858936e-05, "loss": 0.0094, "step": 57270 }, { "epoch": 1.7710329480126106, "grad_norm": 0.12247934937477112, "learning_rate": 1.9114746244668358e-05, "loss": 0.009, "step": 57300 }, { "epoch": 1.771960190393769, "grad_norm": 0.18209509551525116, "learning_rate": 1.911428262347778e-05, "loss": 0.0096, "step": 57330 }, { "epoch": 1.7728874327749273, "grad_norm": 0.11330309510231018, "learning_rate": 1.91138190022872e-05, "loss": 0.0091, "step": 57360 }, { "epoch": 1.773814675156086, "grad_norm": 0.10543116182088852, "learning_rate": 1.9113355381096622e-05, "loss": 0.0094, "step": 57390 }, { "epoch": 1.774741917537244, "grad_norm": 0.1423691362142563, "learning_rate": 1.911289175990604e-05, "loss": 0.0095, "step": 57420 }, { "epoch": 1.7756691599184027, "grad_norm": 0.12785960733890533, "learning_rate": 1.9112428138715462e-05, "loss": 0.0099, "step": 57450 }, { "epoch": 1.776596402299561, "grad_norm": 0.15297368168830872, "learning_rate": 1.9111964517524884e-05, "loss": 0.0091, "step": 57480 }, { "epoch": 1.7775236446807194, "grad_norm": 0.09806019812822342, "learning_rate": 1.91115008963343e-05, "loss": 0.0096, "step": 57510 }, { "epoch": 1.778450887061878, "grad_norm": 0.3151446282863617, "learning_rate": 1.9111037275143723e-05, "loss": 0.0097, "step": 57540 }, { "epoch": 1.7793781294430364, "grad_norm": 0.11838164180517197, "learning_rate": 1.9110573653953145e-05, "loss": 0.0092, "step": 57570 }, { "epoch": 1.7803053718241948, "grad_norm": 0.14801162481307983, "learning_rate": 1.9110110032762566e-05, "loss": 0.0089, "step": 57600 }, { "epoch": 1.7812326142053534, "grad_norm": 0.2099505513906479, "learning_rate": 1.9109646411571988e-05, "loss": 0.0094, "step": 57630 }, { "epoch": 1.7821598565865115, "grad_norm": 0.14530889689922333, "learning_rate": 1.9109182790381406e-05, "loss": 0.0093, "step": 57660 }, { "epoch": 1.7830870989676701, "grad_norm": 0.16612043976783752, "learning_rate": 1.9108719169190827e-05, "loss": 0.01, "step": 57690 }, { "epoch": 1.7840143413488287, "grad_norm": 0.2378138154745102, "learning_rate": 1.910825554800025e-05, "loss": 0.0092, "step": 57720 }, { "epoch": 1.784941583729987, "grad_norm": 0.13058830797672272, "learning_rate": 1.910779192680967e-05, "loss": 0.0098, "step": 57750 }, { "epoch": 1.7858688261111455, "grad_norm": 0.10912700742483139, "learning_rate": 1.9107328305619092e-05, "loss": 0.0088, "step": 57780 }, { "epoch": 1.7867960684923039, "grad_norm": 0.12460000813007355, "learning_rate": 1.9106864684428513e-05, "loss": 0.0095, "step": 57810 }, { "epoch": 1.7877233108734623, "grad_norm": 0.1489262878894806, "learning_rate": 1.910640106323793e-05, "loss": 0.0092, "step": 57840 }, { "epoch": 1.7886505532546209, "grad_norm": 0.11183461546897888, "learning_rate": 1.9105937442047353e-05, "loss": 0.0093, "step": 57870 }, { "epoch": 1.7895777956357792, "grad_norm": 0.14622057974338531, "learning_rate": 1.910547382085677e-05, "loss": 0.0097, "step": 57900 }, { "epoch": 1.7905050380169376, "grad_norm": 0.13182926177978516, "learning_rate": 1.9105010199666193e-05, "loss": 0.0098, "step": 57930 }, { "epoch": 1.7914322803980962, "grad_norm": 0.09324318170547485, "learning_rate": 1.9104546578475614e-05, "loss": 0.0091, "step": 57960 }, { "epoch": 1.7923595227792544, "grad_norm": 0.1586207151412964, "learning_rate": 1.9104082957285036e-05, "loss": 0.0093, "step": 57990 }, { "epoch": 1.793286765160413, "grad_norm": 0.1095605120062828, "learning_rate": 1.9103619336094457e-05, "loss": 0.009, "step": 58020 }, { "epoch": 1.7942140075415713, "grad_norm": 0.12339567393064499, "learning_rate": 1.910315571490388e-05, "loss": 0.0097, "step": 58050 }, { "epoch": 1.7951412499227297, "grad_norm": 0.12609724700450897, "learning_rate": 1.9102692093713297e-05, "loss": 0.0098, "step": 58080 }, { "epoch": 1.7960684923038883, "grad_norm": 0.12668557465076447, "learning_rate": 1.9102228472522718e-05, "loss": 0.0097, "step": 58110 }, { "epoch": 1.7969957346850467, "grad_norm": 0.12492331117391586, "learning_rate": 1.910176485133214e-05, "loss": 0.0093, "step": 58140 }, { "epoch": 1.797922977066205, "grad_norm": 0.18879801034927368, "learning_rate": 1.910130123014156e-05, "loss": 0.0098, "step": 58170 }, { "epoch": 1.7988502194473637, "grad_norm": 0.11470051854848862, "learning_rate": 1.9100837608950983e-05, "loss": 0.0092, "step": 58200 }, { "epoch": 1.7997774618285218, "grad_norm": 0.2197941094636917, "learning_rate": 1.91003739877604e-05, "loss": 0.0098, "step": 58230 }, { "epoch": 1.8007047042096804, "grad_norm": 0.1137099415063858, "learning_rate": 1.9099910366569822e-05, "loss": 0.009, "step": 58260 }, { "epoch": 1.8016319465908388, "grad_norm": 0.14055195450782776, "learning_rate": 1.9099446745379244e-05, "loss": 0.009, "step": 58290 }, { "epoch": 1.8025591889719972, "grad_norm": 0.12624052166938782, "learning_rate": 1.9098983124188662e-05, "loss": 0.0092, "step": 58320 }, { "epoch": 1.8034864313531558, "grad_norm": 0.1372157782316208, "learning_rate": 1.9098519502998087e-05, "loss": 0.0095, "step": 58350 }, { "epoch": 1.8044136737343142, "grad_norm": 0.11069510877132416, "learning_rate": 1.909805588180751e-05, "loss": 0.0089, "step": 58380 }, { "epoch": 1.8053409161154725, "grad_norm": 0.13277314603328705, "learning_rate": 1.9097592260616926e-05, "loss": 0.0083, "step": 58410 }, { "epoch": 1.8062681584966311, "grad_norm": 0.13979008793830872, "learning_rate": 1.9097128639426348e-05, "loss": 0.0095, "step": 58440 }, { "epoch": 1.8071954008777893, "grad_norm": 0.106143057346344, "learning_rate": 1.909666501823577e-05, "loss": 0.0097, "step": 58470 }, { "epoch": 1.808122643258948, "grad_norm": 0.14058811962604523, "learning_rate": 1.9096201397045188e-05, "loss": 0.0098, "step": 58500 }, { "epoch": 1.8090498856401063, "grad_norm": 0.14922446012496948, "learning_rate": 1.909573777585461e-05, "loss": 0.0094, "step": 58530 }, { "epoch": 1.8099771280212646, "grad_norm": 0.138026162981987, "learning_rate": 1.909527415466403e-05, "loss": 0.0091, "step": 58560 }, { "epoch": 1.8109043704024232, "grad_norm": 0.16290608048439026, "learning_rate": 1.9094810533473452e-05, "loss": 0.01, "step": 58590 }, { "epoch": 1.8118316127835816, "grad_norm": 0.140450581908226, "learning_rate": 1.9094346912282874e-05, "loss": 0.0094, "step": 58620 }, { "epoch": 1.81275885516474, "grad_norm": 0.1488400101661682, "learning_rate": 1.9093883291092292e-05, "loss": 0.0095, "step": 58650 }, { "epoch": 1.8136860975458986, "grad_norm": 0.16881272196769714, "learning_rate": 1.9093419669901713e-05, "loss": 0.0088, "step": 58680 }, { "epoch": 1.814613339927057, "grad_norm": 0.0795733854174614, "learning_rate": 1.9092956048711135e-05, "loss": 0.0089, "step": 58710 }, { "epoch": 1.8155405823082154, "grad_norm": 0.16176141798496246, "learning_rate": 1.9092492427520556e-05, "loss": 0.0093, "step": 58740 }, { "epoch": 1.816467824689374, "grad_norm": 0.12653523683547974, "learning_rate": 1.9092028806329978e-05, "loss": 0.0102, "step": 58770 }, { "epoch": 1.817395067070532, "grad_norm": 0.1458776444196701, "learning_rate": 1.90915651851394e-05, "loss": 0.0097, "step": 58800 }, { "epoch": 1.8183223094516907, "grad_norm": 0.14672403037548065, "learning_rate": 1.9091101563948817e-05, "loss": 0.0093, "step": 58830 }, { "epoch": 1.819249551832849, "grad_norm": 0.19287113845348358, "learning_rate": 1.909063794275824e-05, "loss": 0.0097, "step": 58860 }, { "epoch": 1.8201767942140075, "grad_norm": 0.1762712597846985, "learning_rate": 1.9090174321567657e-05, "loss": 0.0095, "step": 58890 }, { "epoch": 1.821104036595166, "grad_norm": 0.12777851521968842, "learning_rate": 1.908971070037708e-05, "loss": 0.0096, "step": 58920 }, { "epoch": 1.8220312789763244, "grad_norm": 0.1380002796649933, "learning_rate": 1.90892470791865e-05, "loss": 0.0098, "step": 58950 }, { "epoch": 1.8229585213574828, "grad_norm": 0.11182005703449249, "learning_rate": 1.908878345799592e-05, "loss": 0.0094, "step": 58980 }, { "epoch": 1.8238857637386414, "grad_norm": 0.15216749906539917, "learning_rate": 1.9088319836805343e-05, "loss": 0.0096, "step": 59010 }, { "epoch": 1.8248130061197996, "grad_norm": 0.16489852964878082, "learning_rate": 1.9087856215614765e-05, "loss": 0.0093, "step": 59040 }, { "epoch": 1.8257402485009582, "grad_norm": 0.13042420148849487, "learning_rate": 1.9087392594424183e-05, "loss": 0.009, "step": 59070 }, { "epoch": 1.8266674908821166, "grad_norm": 0.14320331811904907, "learning_rate": 1.9086928973233604e-05, "loss": 0.0094, "step": 59100 }, { "epoch": 1.827594733263275, "grad_norm": 0.12307153642177582, "learning_rate": 1.9086465352043026e-05, "loss": 0.0085, "step": 59130 }, { "epoch": 1.8285219756444335, "grad_norm": 0.1353633552789688, "learning_rate": 1.9086001730852447e-05, "loss": 0.0095, "step": 59160 }, { "epoch": 1.829449218025592, "grad_norm": 0.13079820573329926, "learning_rate": 1.908553810966187e-05, "loss": 0.0092, "step": 59190 }, { "epoch": 1.8303764604067503, "grad_norm": 0.13820411264896393, "learning_rate": 1.9085074488471287e-05, "loss": 0.0095, "step": 59220 }, { "epoch": 1.8313037027879089, "grad_norm": 0.2079014778137207, "learning_rate": 1.908461086728071e-05, "loss": 0.0092, "step": 59250 }, { "epoch": 1.832230945169067, "grad_norm": 0.09954431653022766, "learning_rate": 1.908414724609013e-05, "loss": 0.0093, "step": 59280 }, { "epoch": 1.8331581875502256, "grad_norm": 0.14853043854236603, "learning_rate": 1.9083683624899548e-05, "loss": 0.0089, "step": 59310 }, { "epoch": 1.834085429931384, "grad_norm": 0.15565913915634155, "learning_rate": 1.908322000370897e-05, "loss": 0.0088, "step": 59340 }, { "epoch": 1.8350126723125424, "grad_norm": 0.12933789193630219, "learning_rate": 1.908275638251839e-05, "loss": 0.0084, "step": 59370 }, { "epoch": 1.835939914693701, "grad_norm": 0.1294112205505371, "learning_rate": 1.9082292761327812e-05, "loss": 0.0098, "step": 59400 }, { "epoch": 1.8368671570748594, "grad_norm": 0.09510964155197144, "learning_rate": 1.9081829140137234e-05, "loss": 0.0092, "step": 59430 }, { "epoch": 1.8377943994560177, "grad_norm": 0.15951798856258392, "learning_rate": 1.9081365518946655e-05, "loss": 0.0097, "step": 59460 }, { "epoch": 1.8387216418371763, "grad_norm": 0.10656768083572388, "learning_rate": 1.9080901897756074e-05, "loss": 0.0089, "step": 59490 }, { "epoch": 1.8396488842183347, "grad_norm": 0.16272607445716858, "learning_rate": 1.9080438276565495e-05, "loss": 0.0099, "step": 59520 }, { "epoch": 1.840576126599493, "grad_norm": 0.13074111938476562, "learning_rate": 1.9079974655374917e-05, "loss": 0.0087, "step": 59550 }, { "epoch": 1.8415033689806517, "grad_norm": 0.10480541735887527, "learning_rate": 1.9079511034184338e-05, "loss": 0.0088, "step": 59580 }, { "epoch": 1.8424306113618099, "grad_norm": 0.09175165742635727, "learning_rate": 1.907904741299376e-05, "loss": 0.0103, "step": 59610 }, { "epoch": 1.8433578537429685, "grad_norm": 0.1139521524310112, "learning_rate": 1.9078583791803178e-05, "loss": 0.01, "step": 59640 }, { "epoch": 1.8442850961241268, "grad_norm": 0.12340928614139557, "learning_rate": 1.90781201706126e-05, "loss": 0.0102, "step": 59670 }, { "epoch": 1.8452123385052852, "grad_norm": 0.10071641206741333, "learning_rate": 1.907765654942202e-05, "loss": 0.0096, "step": 59700 }, { "epoch": 1.8461395808864438, "grad_norm": 0.1530892550945282, "learning_rate": 1.907719292823144e-05, "loss": 0.0088, "step": 59730 }, { "epoch": 1.8470668232676022, "grad_norm": 0.11075989156961441, "learning_rate": 1.9076729307040864e-05, "loss": 0.0103, "step": 59760 }, { "epoch": 1.8479940656487606, "grad_norm": 0.16041330993175507, "learning_rate": 1.9076265685850285e-05, "loss": 0.0092, "step": 59790 }, { "epoch": 1.8489213080299192, "grad_norm": 0.13961970806121826, "learning_rate": 1.9075802064659703e-05, "loss": 0.0091, "step": 59820 }, { "epoch": 1.8498485504110773, "grad_norm": 0.13643532991409302, "learning_rate": 1.9075338443469125e-05, "loss": 0.0099, "step": 59850 }, { "epoch": 1.850775792792236, "grad_norm": 0.1500256359577179, "learning_rate": 1.9074874822278546e-05, "loss": 0.0096, "step": 59880 }, { "epoch": 1.8517030351733943, "grad_norm": 0.23553705215454102, "learning_rate": 1.9074411201087965e-05, "loss": 0.0096, "step": 59910 }, { "epoch": 1.8526302775545527, "grad_norm": 0.15305650234222412, "learning_rate": 1.9073947579897386e-05, "loss": 0.0098, "step": 59940 }, { "epoch": 1.8535575199357113, "grad_norm": 0.1456664651632309, "learning_rate": 1.9073483958706808e-05, "loss": 0.0095, "step": 59970 }, { "epoch": 1.8544847623168697, "grad_norm": 0.13518153131008148, "learning_rate": 1.907302033751623e-05, "loss": 0.0092, "step": 60000 }, { "epoch": 1.855412004698028, "grad_norm": 0.18454313278198242, "learning_rate": 1.907255671632565e-05, "loss": 0.0087, "step": 60030 }, { "epoch": 1.8563392470791866, "grad_norm": 0.10428538173437119, "learning_rate": 1.907209309513507e-05, "loss": 0.009, "step": 60060 }, { "epoch": 1.8572664894603448, "grad_norm": 0.14127443730831146, "learning_rate": 1.9071644927984177e-05, "loss": 0.0094, "step": 60090 }, { "epoch": 1.8581937318415034, "grad_norm": 0.1017933264374733, "learning_rate": 1.9071181306793598e-05, "loss": 0.0086, "step": 60120 }, { "epoch": 1.8591209742226618, "grad_norm": 0.14192751049995422, "learning_rate": 1.9070717685603016e-05, "loss": 0.0097, "step": 60150 }, { "epoch": 1.8600482166038201, "grad_norm": 0.13043881952762604, "learning_rate": 1.9070254064412438e-05, "loss": 0.0086, "step": 60180 }, { "epoch": 1.8609754589849787, "grad_norm": 0.1340523362159729, "learning_rate": 1.906979044322186e-05, "loss": 0.0088, "step": 60210 }, { "epoch": 1.8619027013661371, "grad_norm": 0.11857260018587112, "learning_rate": 1.906932682203128e-05, "loss": 0.0092, "step": 60240 }, { "epoch": 1.8628299437472955, "grad_norm": 0.12999200820922852, "learning_rate": 1.9068863200840702e-05, "loss": 0.0091, "step": 60270 }, { "epoch": 1.863757186128454, "grad_norm": 0.15751798450946808, "learning_rate": 1.9068399579650124e-05, "loss": 0.0088, "step": 60300 }, { "epoch": 1.8646844285096122, "grad_norm": 0.12903964519500732, "learning_rate": 1.9067935958459542e-05, "loss": 0.0092, "step": 60330 }, { "epoch": 1.8656116708907708, "grad_norm": 0.11230449378490448, "learning_rate": 1.9067472337268963e-05, "loss": 0.0089, "step": 60360 }, { "epoch": 1.8665389132719294, "grad_norm": 0.13289953768253326, "learning_rate": 1.9067008716078385e-05, "loss": 0.0098, "step": 60390 }, { "epoch": 1.8674661556530876, "grad_norm": 0.11573710292577744, "learning_rate": 1.9066545094887806e-05, "loss": 0.009, "step": 60420 }, { "epoch": 1.8683933980342462, "grad_norm": 0.13184423744678497, "learning_rate": 1.9066081473697228e-05, "loss": 0.0086, "step": 60450 }, { "epoch": 1.8693206404154046, "grad_norm": 0.1487945020198822, "learning_rate": 1.9065617852506646e-05, "loss": 0.009, "step": 60480 }, { "epoch": 1.870247882796563, "grad_norm": 0.13500678539276123, "learning_rate": 1.9065154231316067e-05, "loss": 0.0095, "step": 60510 }, { "epoch": 1.8711751251777216, "grad_norm": 0.1343154013156891, "learning_rate": 1.906469061012549e-05, "loss": 0.0093, "step": 60540 }, { "epoch": 1.87210236755888, "grad_norm": 0.18538621068000793, "learning_rate": 1.9064226988934907e-05, "loss": 0.0091, "step": 60570 }, { "epoch": 1.8730296099400383, "grad_norm": 0.11275152862071991, "learning_rate": 1.906376336774433e-05, "loss": 0.0101, "step": 60600 }, { "epoch": 1.873956852321197, "grad_norm": 0.1220078095793724, "learning_rate": 1.906329974655375e-05, "loss": 0.0096, "step": 60630 }, { "epoch": 1.874884094702355, "grad_norm": 0.10824988782405853, "learning_rate": 1.906283612536317e-05, "loss": 0.0086, "step": 60660 }, { "epoch": 1.8758113370835137, "grad_norm": 0.13964611291885376, "learning_rate": 1.9062372504172593e-05, "loss": 0.0091, "step": 60690 }, { "epoch": 1.876738579464672, "grad_norm": 0.1350889503955841, "learning_rate": 1.906190888298201e-05, "loss": 0.0085, "step": 60720 }, { "epoch": 1.8776658218458304, "grad_norm": 0.13655762374401093, "learning_rate": 1.9061445261791433e-05, "loss": 0.0095, "step": 60750 }, { "epoch": 1.878593064226989, "grad_norm": 0.1113176941871643, "learning_rate": 1.9060981640600854e-05, "loss": 0.009, "step": 60780 }, { "epoch": 1.8795203066081474, "grad_norm": 0.11275703459978104, "learning_rate": 1.9060518019410276e-05, "loss": 0.0092, "step": 60810 }, { "epoch": 1.8804475489893058, "grad_norm": 0.10507626086473465, "learning_rate": 1.9060054398219697e-05, "loss": 0.0093, "step": 60840 }, { "epoch": 1.8813747913704644, "grad_norm": 0.12415433675050735, "learning_rate": 1.905959077702912e-05, "loss": 0.0085, "step": 60870 }, { "epoch": 1.8823020337516225, "grad_norm": 0.13976049423217773, "learning_rate": 1.9059127155838537e-05, "loss": 0.0092, "step": 60900 }, { "epoch": 1.8832292761327811, "grad_norm": 0.12405195087194443, "learning_rate": 1.905866353464796e-05, "loss": 0.0086, "step": 60930 }, { "epoch": 1.8841565185139395, "grad_norm": 0.11321035027503967, "learning_rate": 1.905819991345738e-05, "loss": 0.0095, "step": 60960 }, { "epoch": 1.8850837608950979, "grad_norm": 0.15693306922912598, "learning_rate": 1.9057736292266798e-05, "loss": 0.0091, "step": 60990 }, { "epoch": 1.8860110032762565, "grad_norm": 0.18756218254566193, "learning_rate": 1.9057272671076223e-05, "loss": 0.0093, "step": 61020 }, { "epoch": 1.8869382456574149, "grad_norm": 0.1325468271970749, "learning_rate": 1.905680904988564e-05, "loss": 0.0097, "step": 61050 }, { "epoch": 1.8878654880385732, "grad_norm": 0.1329220086336136, "learning_rate": 1.9056345428695063e-05, "loss": 0.0097, "step": 61080 }, { "epoch": 1.8887927304197318, "grad_norm": 0.1144622415304184, "learning_rate": 1.9055881807504484e-05, "loss": 0.0097, "step": 61110 }, { "epoch": 1.88971997280089, "grad_norm": 0.1259905844926834, "learning_rate": 1.9055418186313902e-05, "loss": 0.0096, "step": 61140 }, { "epoch": 1.8906472151820486, "grad_norm": 0.13827066123485565, "learning_rate": 1.9054954565123324e-05, "loss": 0.0092, "step": 61170 }, { "epoch": 1.891574457563207, "grad_norm": 0.11490660160779953, "learning_rate": 1.9054490943932745e-05, "loss": 0.0089, "step": 61200 }, { "epoch": 1.8925016999443653, "grad_norm": 0.19296786189079285, "learning_rate": 1.9054027322742167e-05, "loss": 0.0091, "step": 61230 }, { "epoch": 1.893428942325524, "grad_norm": 0.1746770143508911, "learning_rate": 1.9053563701551588e-05, "loss": 0.0094, "step": 61260 }, { "epoch": 1.8943561847066823, "grad_norm": 0.1452881097793579, "learning_rate": 1.905310008036101e-05, "loss": 0.0091, "step": 61290 }, { "epoch": 1.8952834270878407, "grad_norm": 0.15034690499305725, "learning_rate": 1.9052636459170428e-05, "loss": 0.0096, "step": 61320 }, { "epoch": 1.8962106694689993, "grad_norm": 0.1274152547121048, "learning_rate": 1.905217283797985e-05, "loss": 0.0094, "step": 61350 }, { "epoch": 1.8971379118501577, "grad_norm": 0.10763230174779892, "learning_rate": 1.905170921678927e-05, "loss": 0.0089, "step": 61380 }, { "epoch": 1.898065154231316, "grad_norm": 0.11832676082849503, "learning_rate": 1.9051245595598692e-05, "loss": 0.0092, "step": 61410 }, { "epoch": 1.8989923966124747, "grad_norm": 0.09381038695573807, "learning_rate": 1.9050781974408114e-05, "loss": 0.0086, "step": 61440 }, { "epoch": 1.8999196389936328, "grad_norm": 0.14295919239521027, "learning_rate": 1.9050318353217532e-05, "loss": 0.0093, "step": 61470 }, { "epoch": 1.9008468813747914, "grad_norm": 0.11144337058067322, "learning_rate": 1.9049854732026953e-05, "loss": 0.0098, "step": 61500 }, { "epoch": 1.9017741237559498, "grad_norm": 0.19241581857204437, "learning_rate": 1.9049391110836375e-05, "loss": 0.009, "step": 61530 }, { "epoch": 1.9027013661371082, "grad_norm": 0.138858824968338, "learning_rate": 1.9048927489645793e-05, "loss": 0.0095, "step": 61560 }, { "epoch": 1.9036286085182668, "grad_norm": 0.10097570717334747, "learning_rate": 1.9048463868455215e-05, "loss": 0.0086, "step": 61590 }, { "epoch": 1.9045558508994251, "grad_norm": 0.11737438291311264, "learning_rate": 1.9048000247264636e-05, "loss": 0.0099, "step": 61620 }, { "epoch": 1.9054830932805835, "grad_norm": 0.1274745613336563, "learning_rate": 1.9047536626074058e-05, "loss": 0.0095, "step": 61650 }, { "epoch": 1.9064103356617421, "grad_norm": 0.12672437727451324, "learning_rate": 1.904707300488348e-05, "loss": 0.0092, "step": 61680 }, { "epoch": 1.9073375780429003, "grad_norm": 0.11251486092805862, "learning_rate": 1.9046609383692897e-05, "loss": 0.0099, "step": 61710 }, { "epoch": 1.9082648204240589, "grad_norm": 0.20637133717536926, "learning_rate": 1.904614576250232e-05, "loss": 0.0088, "step": 61740 }, { "epoch": 1.9091920628052172, "grad_norm": 0.11758559197187424, "learning_rate": 1.904568214131174e-05, "loss": 0.0092, "step": 61770 }, { "epoch": 1.9101193051863756, "grad_norm": 0.17867404222488403, "learning_rate": 1.9045218520121162e-05, "loss": 0.0094, "step": 61800 }, { "epoch": 1.9110465475675342, "grad_norm": 0.16294452548027039, "learning_rate": 1.9044754898930583e-05, "loss": 0.0097, "step": 61830 }, { "epoch": 1.9119737899486926, "grad_norm": 0.1393040269613266, "learning_rate": 1.9044291277740005e-05, "loss": 0.0091, "step": 61860 }, { "epoch": 1.912901032329851, "grad_norm": 0.13124382495880127, "learning_rate": 1.9043827656549423e-05, "loss": 0.0096, "step": 61890 }, { "epoch": 1.9138282747110096, "grad_norm": 0.08335110545158386, "learning_rate": 1.9043364035358844e-05, "loss": 0.0089, "step": 61920 }, { "epoch": 1.9147555170921677, "grad_norm": 0.09705237299203873, "learning_rate": 1.9042900414168266e-05, "loss": 0.0093, "step": 61950 }, { "epoch": 1.9156827594733263, "grad_norm": 0.1063515916466713, "learning_rate": 1.9042436792977684e-05, "loss": 0.0089, "step": 61980 }, { "epoch": 1.9166100018544847, "grad_norm": 0.14963437616825104, "learning_rate": 1.9041973171787106e-05, "loss": 0.0086, "step": 62010 }, { "epoch": 1.917537244235643, "grad_norm": 0.1541127860546112, "learning_rate": 1.9041509550596527e-05, "loss": 0.0091, "step": 62040 }, { "epoch": 1.9184644866168017, "grad_norm": 0.09581714123487473, "learning_rate": 1.904104592940595e-05, "loss": 0.0095, "step": 62070 }, { "epoch": 1.91939172899796, "grad_norm": 0.1103588119149208, "learning_rate": 1.904058230821537e-05, "loss": 0.0092, "step": 62100 }, { "epoch": 1.9203189713791184, "grad_norm": 0.120629221200943, "learning_rate": 1.9040118687024788e-05, "loss": 0.0092, "step": 62130 }, { "epoch": 1.921246213760277, "grad_norm": 0.12340272217988968, "learning_rate": 1.903965506583421e-05, "loss": 0.0095, "step": 62160 }, { "epoch": 1.9221734561414354, "grad_norm": 0.14694437384605408, "learning_rate": 1.903919144464363e-05, "loss": 0.0096, "step": 62190 }, { "epoch": 1.9231006985225938, "grad_norm": 0.1375669687986374, "learning_rate": 1.9038727823453053e-05, "loss": 0.0098, "step": 62220 }, { "epoch": 1.9240279409037524, "grad_norm": 0.14238828420639038, "learning_rate": 1.9038264202262474e-05, "loss": 0.0102, "step": 62250 }, { "epoch": 1.9249551832849106, "grad_norm": 0.18961717188358307, "learning_rate": 1.9037800581071896e-05, "loss": 0.0098, "step": 62280 }, { "epoch": 1.9258824256660692, "grad_norm": 0.149754598736763, "learning_rate": 1.9037336959881314e-05, "loss": 0.0097, "step": 62310 }, { "epoch": 1.9268096680472275, "grad_norm": 0.14573989808559418, "learning_rate": 1.9036873338690735e-05, "loss": 0.0094, "step": 62340 }, { "epoch": 1.927736910428386, "grad_norm": 0.13750629127025604, "learning_rate": 1.9036409717500157e-05, "loss": 0.0086, "step": 62370 }, { "epoch": 1.9286641528095445, "grad_norm": 0.09911882132291794, "learning_rate": 1.9035946096309575e-05, "loss": 0.0088, "step": 62400 }, { "epoch": 1.9295913951907029, "grad_norm": 0.19665664434432983, "learning_rate": 1.9035482475119e-05, "loss": 0.0097, "step": 62430 }, { "epoch": 1.9305186375718613, "grad_norm": 0.17365387082099915, "learning_rate": 1.9035018853928418e-05, "loss": 0.01, "step": 62460 }, { "epoch": 1.9314458799530199, "grad_norm": 0.13629050552845, "learning_rate": 1.903455523273784e-05, "loss": 0.0088, "step": 62490 }, { "epoch": 1.932373122334178, "grad_norm": 0.10273543000221252, "learning_rate": 1.903409161154726e-05, "loss": 0.0091, "step": 62520 }, { "epoch": 1.9333003647153366, "grad_norm": 0.09896998107433319, "learning_rate": 1.903362799035668e-05, "loss": 0.0094, "step": 62550 }, { "epoch": 1.934227607096495, "grad_norm": 0.14015135169029236, "learning_rate": 1.90331643691661e-05, "loss": 0.0087, "step": 62580 }, { "epoch": 1.9351548494776534, "grad_norm": 0.17226681113243103, "learning_rate": 1.9032700747975522e-05, "loss": 0.0107, "step": 62610 }, { "epoch": 1.936082091858812, "grad_norm": 0.12491384148597717, "learning_rate": 1.9032237126784944e-05, "loss": 0.0094, "step": 62640 }, { "epoch": 1.9370093342399703, "grad_norm": 0.15217767655849457, "learning_rate": 1.9031773505594365e-05, "loss": 0.0084, "step": 62670 }, { "epoch": 1.9379365766211287, "grad_norm": 0.19567051529884338, "learning_rate": 1.9031309884403787e-05, "loss": 0.0092, "step": 62700 }, { "epoch": 1.9388638190022873, "grad_norm": 0.16607308387756348, "learning_rate": 1.9030846263213205e-05, "loss": 0.0092, "step": 62730 }, { "epoch": 1.9397910613834455, "grad_norm": 0.15475702285766602, "learning_rate": 1.9030382642022626e-05, "loss": 0.0083, "step": 62760 }, { "epoch": 1.940718303764604, "grad_norm": 0.1401626169681549, "learning_rate": 1.9029919020832048e-05, "loss": 0.0096, "step": 62790 }, { "epoch": 1.9416455461457625, "grad_norm": 0.11708253622055054, "learning_rate": 1.902945539964147e-05, "loss": 0.0091, "step": 62820 }, { "epoch": 1.9425727885269208, "grad_norm": 0.19589698314666748, "learning_rate": 1.902899177845089e-05, "loss": 0.0096, "step": 62850 }, { "epoch": 1.9435000309080794, "grad_norm": 0.2052047699689865, "learning_rate": 1.902852815726031e-05, "loss": 0.0095, "step": 62880 }, { "epoch": 1.9444272732892378, "grad_norm": 0.09994105994701385, "learning_rate": 1.902806453606973e-05, "loss": 0.0097, "step": 62910 }, { "epoch": 1.9453545156703962, "grad_norm": 0.11955256015062332, "learning_rate": 1.9027600914879152e-05, "loss": 0.0096, "step": 62940 }, { "epoch": 1.9462817580515548, "grad_norm": 0.11118430644273758, "learning_rate": 1.902713729368857e-05, "loss": 0.0091, "step": 62970 }, { "epoch": 1.9472090004327132, "grad_norm": 0.1869446486234665, "learning_rate": 1.902667367249799e-05, "loss": 0.0086, "step": 63000 }, { "epoch": 1.9481362428138715, "grad_norm": 0.14018604159355164, "learning_rate": 1.9026210051307413e-05, "loss": 0.0088, "step": 63030 }, { "epoch": 1.9490634851950301, "grad_norm": 0.17518387734889984, "learning_rate": 1.9025746430116835e-05, "loss": 0.0094, "step": 63060 }, { "epoch": 1.9499907275761883, "grad_norm": 0.09303198754787445, "learning_rate": 1.9025282808926256e-05, "loss": 0.0088, "step": 63090 }, { "epoch": 1.950917969957347, "grad_norm": 0.169603630900383, "learning_rate": 1.9024819187735674e-05, "loss": 0.0086, "step": 63120 }, { "epoch": 1.9518452123385053, "grad_norm": 0.11652740836143494, "learning_rate": 1.9024355566545096e-05, "loss": 0.0097, "step": 63150 }, { "epoch": 1.9527724547196637, "grad_norm": 0.17860741913318634, "learning_rate": 1.9023891945354517e-05, "loss": 0.009, "step": 63180 }, { "epoch": 1.9536996971008223, "grad_norm": 0.12186767905950546, "learning_rate": 1.902342832416394e-05, "loss": 0.0104, "step": 63210 }, { "epoch": 1.9546269394819806, "grad_norm": 0.15101395547389984, "learning_rate": 1.9022980157013043e-05, "loss": 0.0096, "step": 63240 }, { "epoch": 1.955554181863139, "grad_norm": 0.26696833968162537, "learning_rate": 1.9022516535822465e-05, "loss": 0.0095, "step": 63270 }, { "epoch": 1.9564814242442976, "grad_norm": 0.1088373064994812, "learning_rate": 1.9022052914631886e-05, "loss": 0.0097, "step": 63300 }, { "epoch": 1.9574086666254558, "grad_norm": 0.0767214223742485, "learning_rate": 1.9021589293441308e-05, "loss": 0.0095, "step": 63330 }, { "epoch": 1.9583359090066144, "grad_norm": 0.0849837213754654, "learning_rate": 1.902112567225073e-05, "loss": 0.0089, "step": 63360 }, { "epoch": 1.9592631513877727, "grad_norm": 0.19714535772800446, "learning_rate": 1.9020662051060147e-05, "loss": 0.0092, "step": 63390 }, { "epoch": 1.9601903937689311, "grad_norm": 0.11759000271558762, "learning_rate": 1.902019842986957e-05, "loss": 0.0092, "step": 63420 }, { "epoch": 1.9611176361500897, "grad_norm": 0.17578251659870148, "learning_rate": 1.901973480867899e-05, "loss": 0.0091, "step": 63450 }, { "epoch": 1.962044878531248, "grad_norm": 0.1105835810303688, "learning_rate": 1.9019271187488412e-05, "loss": 0.0106, "step": 63480 }, { "epoch": 1.9629721209124065, "grad_norm": 0.16292352974414825, "learning_rate": 1.9018807566297833e-05, "loss": 0.01, "step": 63510 }, { "epoch": 1.963899363293565, "grad_norm": 0.1264432668685913, "learning_rate": 1.901834394510725e-05, "loss": 0.0093, "step": 63540 }, { "epoch": 1.9648266056747232, "grad_norm": 0.15947729349136353, "learning_rate": 1.9017880323916673e-05, "loss": 0.0096, "step": 63570 }, { "epoch": 1.9657538480558818, "grad_norm": 0.09663262218236923, "learning_rate": 1.9017416702726094e-05, "loss": 0.0099, "step": 63600 }, { "epoch": 1.9666810904370402, "grad_norm": 0.1371634155511856, "learning_rate": 1.9016953081535513e-05, "loss": 0.0096, "step": 63630 }, { "epoch": 1.9676083328181986, "grad_norm": 0.12497830390930176, "learning_rate": 1.9016489460344934e-05, "loss": 0.0091, "step": 63660 }, { "epoch": 1.9685355751993572, "grad_norm": 0.15984059870243073, "learning_rate": 1.9016025839154356e-05, "loss": 0.0091, "step": 63690 }, { "epoch": 1.9694628175805156, "grad_norm": 0.12621347606182098, "learning_rate": 1.9015562217963777e-05, "loss": 0.0086, "step": 63720 }, { "epoch": 1.970390059961674, "grad_norm": 0.16863922774791718, "learning_rate": 1.90150985967732e-05, "loss": 0.0092, "step": 63750 }, { "epoch": 1.9713173023428325, "grad_norm": 0.15847687423229218, "learning_rate": 1.901463497558262e-05, "loss": 0.009, "step": 63780 }, { "epoch": 1.9722445447239907, "grad_norm": 0.11528904736042023, "learning_rate": 1.9014171354392038e-05, "loss": 0.0092, "step": 63810 }, { "epoch": 1.9731717871051493, "grad_norm": 0.1577402651309967, "learning_rate": 1.901370773320146e-05, "loss": 0.0089, "step": 63840 }, { "epoch": 1.9740990294863077, "grad_norm": 0.11878088116645813, "learning_rate": 1.901324411201088e-05, "loss": 0.009, "step": 63870 }, { "epoch": 1.975026271867466, "grad_norm": 0.15793371200561523, "learning_rate": 1.9012780490820303e-05, "loss": 0.0082, "step": 63900 }, { "epoch": 1.9759535142486246, "grad_norm": 0.13721726834774017, "learning_rate": 1.9012316869629724e-05, "loss": 0.009, "step": 63930 }, { "epoch": 1.976880756629783, "grad_norm": 0.1303936392068863, "learning_rate": 1.901186870247883e-05, "loss": 0.0093, "step": 63960 }, { "epoch": 1.9778079990109414, "grad_norm": 0.16049392521381378, "learning_rate": 1.901140508128825e-05, "loss": 0.0093, "step": 63990 }, { "epoch": 1.9787352413921, "grad_norm": 0.1618276685476303, "learning_rate": 1.9010941460097672e-05, "loss": 0.01, "step": 64020 }, { "epoch": 1.9796624837732584, "grad_norm": 0.16271555423736572, "learning_rate": 1.901047783890709e-05, "loss": 0.0083, "step": 64050 }, { "epoch": 1.9805897261544168, "grad_norm": 0.10620540380477905, "learning_rate": 1.901001421771651e-05, "loss": 0.0088, "step": 64080 }, { "epoch": 1.9815169685355754, "grad_norm": 0.11167493462562561, "learning_rate": 1.9009550596525933e-05, "loss": 0.0095, "step": 64110 }, { "epoch": 1.9824442109167335, "grad_norm": 0.16313228011131287, "learning_rate": 1.9009086975335354e-05, "loss": 0.0089, "step": 64140 }, { "epoch": 1.983371453297892, "grad_norm": 0.11637566983699799, "learning_rate": 1.9008623354144776e-05, "loss": 0.0096, "step": 64170 }, { "epoch": 1.9842986956790505, "grad_norm": 0.10372761636972427, "learning_rate": 1.9008159732954197e-05, "loss": 0.0099, "step": 64200 }, { "epoch": 1.9852259380602089, "grad_norm": 0.1633736789226532, "learning_rate": 1.9007696111763616e-05, "loss": 0.0087, "step": 64230 }, { "epoch": 1.9861531804413675, "grad_norm": 0.11466097086668015, "learning_rate": 1.9007232490573037e-05, "loss": 0.0091, "step": 64260 }, { "epoch": 1.9870804228225258, "grad_norm": 0.13708263635635376, "learning_rate": 1.900676886938246e-05, "loss": 0.0088, "step": 64290 }, { "epoch": 1.9880076652036842, "grad_norm": 0.1355397254228592, "learning_rate": 1.900630524819188e-05, "loss": 0.0091, "step": 64320 }, { "epoch": 1.9889349075848428, "grad_norm": 0.1417113095521927, "learning_rate": 1.90058416270013e-05, "loss": 0.0093, "step": 64350 }, { "epoch": 1.989862149966001, "grad_norm": 0.18434807658195496, "learning_rate": 1.900537800581072e-05, "loss": 0.009, "step": 64380 }, { "epoch": 1.9907893923471596, "grad_norm": 0.19822551310062408, "learning_rate": 1.900491438462014e-05, "loss": 0.0095, "step": 64410 }, { "epoch": 1.991716634728318, "grad_norm": 0.10598516464233398, "learning_rate": 1.9004450763429563e-05, "loss": 0.0089, "step": 64440 }, { "epoch": 1.9926438771094763, "grad_norm": 0.1367039978504181, "learning_rate": 1.900398714223898e-05, "loss": 0.0089, "step": 64470 }, { "epoch": 1.993571119490635, "grad_norm": 0.10684409737586975, "learning_rate": 1.9003523521048402e-05, "loss": 0.0097, "step": 64500 }, { "epoch": 1.9944983618717933, "grad_norm": 0.09892908483743668, "learning_rate": 1.9003059899857824e-05, "loss": 0.0094, "step": 64530 }, { "epoch": 1.9954256042529517, "grad_norm": 0.12868237495422363, "learning_rate": 1.9002596278667245e-05, "loss": 0.0095, "step": 64560 }, { "epoch": 1.9963528466341103, "grad_norm": 0.12531247735023499, "learning_rate": 1.9002132657476667e-05, "loss": 0.0081, "step": 64590 }, { "epoch": 1.9972800890152684, "grad_norm": 0.14696528017520905, "learning_rate": 1.900166903628609e-05, "loss": 0.009, "step": 64620 }, { "epoch": 1.998207331396427, "grad_norm": 0.1545921415090561, "learning_rate": 1.9001205415095506e-05, "loss": 0.0092, "step": 64650 }, { "epoch": 1.9991345737775854, "grad_norm": 0.1880977898836136, "learning_rate": 1.9000741793904928e-05, "loss": 0.009, "step": 64680 }, { "epoch": 2.000061816158744, "grad_norm": 0.11475808173418045, "learning_rate": 1.900027817271435e-05, "loss": 0.0096, "step": 64710 }, { "epoch": 2.0009890585399024, "grad_norm": 0.12773172557353973, "learning_rate": 1.899981455152377e-05, "loss": 0.0097, "step": 64740 }, { "epoch": 2.001916300921061, "grad_norm": 0.15816722810268402, "learning_rate": 1.8999350930333193e-05, "loss": 0.0084, "step": 64770 }, { "epoch": 2.002843543302219, "grad_norm": 0.13860182464122772, "learning_rate": 1.899888730914261e-05, "loss": 0.0084, "step": 64800 }, { "epoch": 2.0037707856833777, "grad_norm": 0.1457679718732834, "learning_rate": 1.8998423687952032e-05, "loss": 0.0091, "step": 64830 }, { "epoch": 2.004698028064536, "grad_norm": 0.13077963888645172, "learning_rate": 1.8997960066761454e-05, "loss": 0.0093, "step": 64860 }, { "epoch": 2.0056252704456945, "grad_norm": 0.11316544562578201, "learning_rate": 1.8997496445570872e-05, "loss": 0.0089, "step": 64890 }, { "epoch": 2.006552512826853, "grad_norm": 0.11615769565105438, "learning_rate": 1.8997032824380293e-05, "loss": 0.009, "step": 64920 }, { "epoch": 2.0074797552080113, "grad_norm": 0.19580519199371338, "learning_rate": 1.8996569203189715e-05, "loss": 0.0085, "step": 64950 }, { "epoch": 2.00840699758917, "grad_norm": 0.19162940979003906, "learning_rate": 1.8996105581999136e-05, "loss": 0.0096, "step": 64980 }, { "epoch": 2.0093342399703285, "grad_norm": 0.11968031525611877, "learning_rate": 1.8995641960808558e-05, "loss": 0.009, "step": 65010 }, { "epoch": 2.0102614823514866, "grad_norm": 0.13664618134498596, "learning_rate": 1.8995178339617976e-05, "loss": 0.0085, "step": 65040 }, { "epoch": 2.011188724732645, "grad_norm": 0.10808546096086502, "learning_rate": 1.8994714718427397e-05, "loss": 0.0092, "step": 65070 }, { "epoch": 2.0121159671138034, "grad_norm": 0.15749041736125946, "learning_rate": 1.899425109723682e-05, "loss": 0.0098, "step": 65100 }, { "epoch": 2.013043209494962, "grad_norm": 0.12906228005886078, "learning_rate": 1.899378747604624e-05, "loss": 0.0095, "step": 65130 }, { "epoch": 2.0139704518761206, "grad_norm": 0.11465371400117874, "learning_rate": 1.8993323854855662e-05, "loss": 0.0092, "step": 65160 }, { "epoch": 2.0148976942572787, "grad_norm": 0.15533116459846497, "learning_rate": 1.8992860233665083e-05, "loss": 0.0103, "step": 65190 }, { "epoch": 2.0158249366384373, "grad_norm": 0.23096424341201782, "learning_rate": 1.89923966124745e-05, "loss": 0.0096, "step": 65220 }, { "epoch": 2.016752179019596, "grad_norm": 0.10848797857761383, "learning_rate": 1.8991932991283923e-05, "loss": 0.0085, "step": 65250 }, { "epoch": 2.017679421400754, "grad_norm": 0.15202327072620392, "learning_rate": 1.8991469370093345e-05, "loss": 0.008, "step": 65280 }, { "epoch": 2.0186066637819127, "grad_norm": 0.08730794489383698, "learning_rate": 1.8991005748902766e-05, "loss": 0.0087, "step": 65310 }, { "epoch": 2.019533906163071, "grad_norm": 0.10692930966615677, "learning_rate": 1.8990542127712188e-05, "loss": 0.0097, "step": 65340 }, { "epoch": 2.0204611485442294, "grad_norm": 0.14468488097190857, "learning_rate": 1.8990078506521606e-05, "loss": 0.0095, "step": 65370 }, { "epoch": 2.021388390925388, "grad_norm": 0.1579744815826416, "learning_rate": 1.8989614885331027e-05, "loss": 0.0086, "step": 65400 }, { "epoch": 2.022315633306546, "grad_norm": 0.14883960783481598, "learning_rate": 1.898915126414045e-05, "loss": 0.009, "step": 65430 }, { "epoch": 2.023242875687705, "grad_norm": 0.14120815694332123, "learning_rate": 1.8988687642949867e-05, "loss": 0.0089, "step": 65460 }, { "epoch": 2.0241701180688634, "grad_norm": 0.0958087369799614, "learning_rate": 1.898822402175929e-05, "loss": 0.009, "step": 65490 }, { "epoch": 2.0250973604500215, "grad_norm": 0.1257772296667099, "learning_rate": 1.898776040056871e-05, "loss": 0.0094, "step": 65520 }, { "epoch": 2.02602460283118, "grad_norm": 0.11547397077083588, "learning_rate": 1.898729677937813e-05, "loss": 0.0086, "step": 65550 }, { "epoch": 2.0269518452123383, "grad_norm": 0.13431216776371002, "learning_rate": 1.8986833158187553e-05, "loss": 0.0087, "step": 65580 }, { "epoch": 2.027879087593497, "grad_norm": 0.13823598623275757, "learning_rate": 1.8986369536996974e-05, "loss": 0.0087, "step": 65610 }, { "epoch": 2.0288063299746555, "grad_norm": 0.10067378729581833, "learning_rate": 1.8985905915806392e-05, "loss": 0.0086, "step": 65640 }, { "epoch": 2.0297335723558136, "grad_norm": 0.14060132205486298, "learning_rate": 1.8985442294615814e-05, "loss": 0.0088, "step": 65670 }, { "epoch": 2.0306608147369722, "grad_norm": 0.11318201571702957, "learning_rate": 1.8984978673425235e-05, "loss": 0.0091, "step": 65700 }, { "epoch": 2.031588057118131, "grad_norm": 0.12335211038589478, "learning_rate": 1.8984515052234657e-05, "loss": 0.0086, "step": 65730 }, { "epoch": 2.032515299499289, "grad_norm": 0.25289386510849, "learning_rate": 1.898405143104408e-05, "loss": 0.009, "step": 65760 }, { "epoch": 2.0334425418804476, "grad_norm": 0.1182640939950943, "learning_rate": 1.8983587809853497e-05, "loss": 0.0092, "step": 65790 }, { "epoch": 2.034369784261606, "grad_norm": 0.12467535585165024, "learning_rate": 1.8983124188662918e-05, "loss": 0.0089, "step": 65820 }, { "epoch": 2.0352970266427644, "grad_norm": 0.138853520154953, "learning_rate": 1.898266056747234e-05, "loss": 0.0086, "step": 65850 }, { "epoch": 2.036224269023923, "grad_norm": 0.11774081736803055, "learning_rate": 1.8982196946281758e-05, "loss": 0.0098, "step": 65880 }, { "epoch": 2.037151511405081, "grad_norm": 0.1453554630279541, "learning_rate": 1.898173332509118e-05, "loss": 0.0092, "step": 65910 }, { "epoch": 2.0380787537862397, "grad_norm": 0.10599801689386368, "learning_rate": 1.89812697039006e-05, "loss": 0.0087, "step": 65940 }, { "epoch": 2.0390059961673983, "grad_norm": 0.1475299894809723, "learning_rate": 1.8980806082710022e-05, "loss": 0.0087, "step": 65970 }, { "epoch": 2.0399332385485565, "grad_norm": 0.12606190145015717, "learning_rate": 1.8980342461519444e-05, "loss": 0.0095, "step": 66000 }, { "epoch": 2.040860480929715, "grad_norm": 0.1375729888677597, "learning_rate": 1.8979878840328862e-05, "loss": 0.0088, "step": 66030 }, { "epoch": 2.0417877233108737, "grad_norm": 0.1380685716867447, "learning_rate": 1.8979415219138283e-05, "loss": 0.0092, "step": 66060 }, { "epoch": 2.042714965692032, "grad_norm": 0.10499219596385956, "learning_rate": 1.8978951597947705e-05, "loss": 0.0081, "step": 66090 }, { "epoch": 2.0436422080731904, "grad_norm": 0.12475612759590149, "learning_rate": 1.8978487976757126e-05, "loss": 0.0091, "step": 66120 }, { "epoch": 2.0445694504543486, "grad_norm": 0.14613312482833862, "learning_rate": 1.8978024355566548e-05, "loss": 0.0088, "step": 66150 }, { "epoch": 2.045496692835507, "grad_norm": 0.12583017349243164, "learning_rate": 1.897756073437597e-05, "loss": 0.009, "step": 66180 }, { "epoch": 2.0464239352166658, "grad_norm": 0.14173677563667297, "learning_rate": 1.8977097113185388e-05, "loss": 0.0089, "step": 66210 }, { "epoch": 2.047351177597824, "grad_norm": 0.10217849165201187, "learning_rate": 1.897663349199481e-05, "loss": 0.0097, "step": 66240 }, { "epoch": 2.0482784199789825, "grad_norm": 0.11388631165027618, "learning_rate": 1.897616987080423e-05, "loss": 0.0094, "step": 66270 }, { "epoch": 2.049205662360141, "grad_norm": 0.18763694167137146, "learning_rate": 1.897570624961365e-05, "loss": 0.0089, "step": 66300 }, { "epoch": 2.0501329047412993, "grad_norm": 0.15172521770000458, "learning_rate": 1.897524262842307e-05, "loss": 0.0093, "step": 66330 }, { "epoch": 2.051060147122458, "grad_norm": 0.1428171992301941, "learning_rate": 1.897477900723249e-05, "loss": 0.0083, "step": 66360 }, { "epoch": 2.051987389503616, "grad_norm": 0.14987333118915558, "learning_rate": 1.8974315386041913e-05, "loss": 0.0091, "step": 66390 }, { "epoch": 2.0529146318847746, "grad_norm": 0.12561367452144623, "learning_rate": 1.8973851764851335e-05, "loss": 0.0092, "step": 66420 }, { "epoch": 2.0538418742659332, "grad_norm": 0.15525296330451965, "learning_rate": 1.8973388143660753e-05, "loss": 0.0092, "step": 66450 }, { "epoch": 2.0547691166470914, "grad_norm": 0.1708170771598816, "learning_rate": 1.8972924522470174e-05, "loss": 0.0091, "step": 66480 }, { "epoch": 2.05569635902825, "grad_norm": 0.13217654824256897, "learning_rate": 1.8972460901279596e-05, "loss": 0.0087, "step": 66510 }, { "epoch": 2.0566236014094086, "grad_norm": 0.15263767540454865, "learning_rate": 1.8971997280089017e-05, "loss": 0.0093, "step": 66540 }, { "epoch": 2.0575508437905667, "grad_norm": 0.12828969955444336, "learning_rate": 1.897153365889844e-05, "loss": 0.0089, "step": 66570 }, { "epoch": 2.0584780861717253, "grad_norm": 0.14098788797855377, "learning_rate": 1.897107003770786e-05, "loss": 0.0091, "step": 66600 }, { "epoch": 2.059405328552884, "grad_norm": 0.09310761839151382, "learning_rate": 1.897060641651728e-05, "loss": 0.0089, "step": 66630 }, { "epoch": 2.060332570934042, "grad_norm": 0.16830267012119293, "learning_rate": 1.89701427953267e-05, "loss": 0.009, "step": 66660 }, { "epoch": 2.0612598133152007, "grad_norm": 0.2279578447341919, "learning_rate": 1.8969679174136118e-05, "loss": 0.0085, "step": 66690 }, { "epoch": 2.062187055696359, "grad_norm": 0.1812441349029541, "learning_rate": 1.8969215552945543e-05, "loss": 0.0094, "step": 66720 }, { "epoch": 2.0631142980775175, "grad_norm": 0.17217817902565002, "learning_rate": 1.8968751931754964e-05, "loss": 0.0089, "step": 66750 }, { "epoch": 2.064041540458676, "grad_norm": 0.1246933564543724, "learning_rate": 1.8968288310564383e-05, "loss": 0.0085, "step": 66780 }, { "epoch": 2.064968782839834, "grad_norm": 0.10423219949007034, "learning_rate": 1.8967824689373804e-05, "loss": 0.0091, "step": 66810 }, { "epoch": 2.065896025220993, "grad_norm": 0.13233426213264465, "learning_rate": 1.8967361068183226e-05, "loss": 0.0092, "step": 66840 }, { "epoch": 2.0668232676021514, "grad_norm": 0.20915277302265167, "learning_rate": 1.8966897446992644e-05, "loss": 0.0087, "step": 66870 }, { "epoch": 2.0677505099833096, "grad_norm": 0.08850126713514328, "learning_rate": 1.8966433825802065e-05, "loss": 0.0087, "step": 66900 }, { "epoch": 2.068677752364468, "grad_norm": 0.12746590375900269, "learning_rate": 1.8965970204611487e-05, "loss": 0.0091, "step": 66930 }, { "epoch": 2.0696049947456263, "grad_norm": 0.12976905703544617, "learning_rate": 1.8965506583420908e-05, "loss": 0.0086, "step": 66960 }, { "epoch": 2.070532237126785, "grad_norm": 0.1600741446018219, "learning_rate": 1.896504296223033e-05, "loss": 0.0089, "step": 66990 }, { "epoch": 2.0714594795079435, "grad_norm": 0.16356977820396423, "learning_rate": 1.8964579341039748e-05, "loss": 0.0092, "step": 67020 }, { "epoch": 2.0723867218891017, "grad_norm": 0.15789812803268433, "learning_rate": 1.896411571984917e-05, "loss": 0.0086, "step": 67050 }, { "epoch": 2.0733139642702603, "grad_norm": 0.11779878288507462, "learning_rate": 1.896365209865859e-05, "loss": 0.0087, "step": 67080 }, { "epoch": 2.074241206651419, "grad_norm": 0.11191772669553757, "learning_rate": 1.8963188477468012e-05, "loss": 0.0093, "step": 67110 }, { "epoch": 2.075168449032577, "grad_norm": 0.168721541762352, "learning_rate": 1.8962724856277434e-05, "loss": 0.009, "step": 67140 }, { "epoch": 2.0760956914137356, "grad_norm": 0.14309649169445038, "learning_rate": 1.8962261235086855e-05, "loss": 0.0084, "step": 67170 }, { "epoch": 2.0770229337948938, "grad_norm": 0.10397924482822418, "learning_rate": 1.8961797613896274e-05, "loss": 0.0086, "step": 67200 }, { "epoch": 2.0779501761760524, "grad_norm": 0.10500927269458771, "learning_rate": 1.8961333992705695e-05, "loss": 0.009, "step": 67230 }, { "epoch": 2.078877418557211, "grad_norm": 0.1136791780591011, "learning_rate": 1.8960870371515117e-05, "loss": 0.0086, "step": 67260 }, { "epoch": 2.079804660938369, "grad_norm": 0.16977989673614502, "learning_rate": 1.8960406750324535e-05, "loss": 0.0092, "step": 67290 }, { "epoch": 2.0807319033195277, "grad_norm": 0.17049583792686462, "learning_rate": 1.8959943129133956e-05, "loss": 0.0091, "step": 67320 }, { "epoch": 2.0816591457006863, "grad_norm": 0.15765345096588135, "learning_rate": 1.8959479507943378e-05, "loss": 0.0078, "step": 67350 }, { "epoch": 2.0825863880818445, "grad_norm": 0.12697376310825348, "learning_rate": 1.89590158867528e-05, "loss": 0.0093, "step": 67380 }, { "epoch": 2.083513630463003, "grad_norm": 0.14551502466201782, "learning_rate": 1.895855226556222e-05, "loss": 0.0091, "step": 67410 }, { "epoch": 2.0844408728441612, "grad_norm": 0.11650168150663376, "learning_rate": 1.895808864437164e-05, "loss": 0.0091, "step": 67440 }, { "epoch": 2.08536811522532, "grad_norm": 0.12619873881340027, "learning_rate": 1.895762502318106e-05, "loss": 0.0093, "step": 67470 }, { "epoch": 2.0862953576064784, "grad_norm": 0.11821484565734863, "learning_rate": 1.8957161401990482e-05, "loss": 0.0086, "step": 67500 }, { "epoch": 2.0872225999876366, "grad_norm": 0.12292352318763733, "learning_rate": 1.8956697780799903e-05, "loss": 0.0084, "step": 67530 }, { "epoch": 2.088149842368795, "grad_norm": 0.12670384347438812, "learning_rate": 1.8956234159609325e-05, "loss": 0.0084, "step": 67560 }, { "epoch": 2.089077084749954, "grad_norm": 0.11425792425870895, "learning_rate": 1.8955770538418746e-05, "loss": 0.0081, "step": 67590 }, { "epoch": 2.090004327131112, "grad_norm": 0.13008829951286316, "learning_rate": 1.8955306917228164e-05, "loss": 0.0095, "step": 67620 }, { "epoch": 2.0909315695122705, "grad_norm": 0.15753936767578125, "learning_rate": 1.8954843296037586e-05, "loss": 0.0095, "step": 67650 }, { "epoch": 2.091858811893429, "grad_norm": 0.12383640557527542, "learning_rate": 1.8954379674847004e-05, "loss": 0.0095, "step": 67680 }, { "epoch": 2.0927860542745873, "grad_norm": 0.10606686770915985, "learning_rate": 1.8953916053656426e-05, "loss": 0.0094, "step": 67710 }, { "epoch": 2.093713296655746, "grad_norm": 0.12386137247085571, "learning_rate": 1.8953452432465847e-05, "loss": 0.0085, "step": 67740 }, { "epoch": 2.094640539036904, "grad_norm": 0.17757362127304077, "learning_rate": 1.895298881127527e-05, "loss": 0.0085, "step": 67770 }, { "epoch": 2.0955677814180627, "grad_norm": 0.09576290845870972, "learning_rate": 1.895252519008469e-05, "loss": 0.0085, "step": 67800 }, { "epoch": 2.0964950237992213, "grad_norm": 0.14230401813983917, "learning_rate": 1.895206156889411e-05, "loss": 0.0094, "step": 67830 }, { "epoch": 2.0974222661803794, "grad_norm": 0.16352061927318573, "learning_rate": 1.895159794770353e-05, "loss": 0.0102, "step": 67860 }, { "epoch": 2.098349508561538, "grad_norm": 0.12838277220726013, "learning_rate": 1.895113432651295e-05, "loss": 0.0092, "step": 67890 }, { "epoch": 2.0992767509426966, "grad_norm": 0.09615806490182877, "learning_rate": 1.8950670705322373e-05, "loss": 0.0082, "step": 67920 }, { "epoch": 2.1002039933238548, "grad_norm": 0.16167783737182617, "learning_rate": 1.8950207084131794e-05, "loss": 0.0091, "step": 67950 }, { "epoch": 2.1011312357050134, "grad_norm": 0.08501963317394257, "learning_rate": 1.8949743462941216e-05, "loss": 0.0083, "step": 67980 }, { "epoch": 2.1020584780861715, "grad_norm": 0.16387620568275452, "learning_rate": 1.8949279841750634e-05, "loss": 0.0091, "step": 68010 }, { "epoch": 2.10298572046733, "grad_norm": 0.09395553171634674, "learning_rate": 1.8948816220560055e-05, "loss": 0.0083, "step": 68040 }, { "epoch": 2.1039129628484887, "grad_norm": 0.07801681756973267, "learning_rate": 1.8948352599369477e-05, "loss": 0.0091, "step": 68070 }, { "epoch": 2.104840205229647, "grad_norm": 0.13253183662891388, "learning_rate": 1.8947888978178895e-05, "loss": 0.0093, "step": 68100 }, { "epoch": 2.1057674476108055, "grad_norm": 0.14533409476280212, "learning_rate": 1.894742535698832e-05, "loss": 0.0098, "step": 68130 }, { "epoch": 2.106694689991964, "grad_norm": 0.11559165269136429, "learning_rate": 1.894696173579774e-05, "loss": 0.0083, "step": 68160 }, { "epoch": 2.1076219323731222, "grad_norm": 0.10860398411750793, "learning_rate": 1.894649811460716e-05, "loss": 0.0088, "step": 68190 }, { "epoch": 2.108549174754281, "grad_norm": 0.15254497528076172, "learning_rate": 1.894603449341658e-05, "loss": 0.0086, "step": 68220 }, { "epoch": 2.109476417135439, "grad_norm": 0.08755811303853989, "learning_rate": 1.8945570872226003e-05, "loss": 0.0087, "step": 68250 }, { "epoch": 2.1104036595165976, "grad_norm": 0.12163553386926651, "learning_rate": 1.894510725103542e-05, "loss": 0.0086, "step": 68280 }, { "epoch": 2.111330901897756, "grad_norm": 0.2019680142402649, "learning_rate": 1.8944643629844842e-05, "loss": 0.0083, "step": 68310 }, { "epoch": 2.1122581442789143, "grad_norm": 0.1410064995288849, "learning_rate": 1.8944180008654264e-05, "loss": 0.0094, "step": 68340 }, { "epoch": 2.113185386660073, "grad_norm": 0.17468823492527008, "learning_rate": 1.8943716387463685e-05, "loss": 0.0082, "step": 68370 }, { "epoch": 2.1141126290412315, "grad_norm": 0.12097126245498657, "learning_rate": 1.8943252766273107e-05, "loss": 0.0094, "step": 68400 }, { "epoch": 2.1150398714223897, "grad_norm": 0.13267478346824646, "learning_rate": 1.8942789145082525e-05, "loss": 0.0085, "step": 68430 }, { "epoch": 2.1159671138035483, "grad_norm": 0.10222849249839783, "learning_rate": 1.8942340977931633e-05, "loss": 0.009, "step": 68460 }, { "epoch": 2.116894356184707, "grad_norm": 0.125253364443779, "learning_rate": 1.8941877356741054e-05, "loss": 0.0086, "step": 68490 }, { "epoch": 2.117821598565865, "grad_norm": 0.13151666522026062, "learning_rate": 1.8941413735550472e-05, "loss": 0.0081, "step": 68520 }, { "epoch": 2.1187488409470236, "grad_norm": 0.13030564785003662, "learning_rate": 1.8940950114359894e-05, "loss": 0.0098, "step": 68550 }, { "epoch": 2.119676083328182, "grad_norm": 0.11686971783638, "learning_rate": 1.8940486493169315e-05, "loss": 0.0088, "step": 68580 }, { "epoch": 2.1206033257093404, "grad_norm": 0.1871291548013687, "learning_rate": 1.8940022871978737e-05, "loss": 0.0086, "step": 68610 }, { "epoch": 2.121530568090499, "grad_norm": 0.10454609245061874, "learning_rate": 1.893955925078816e-05, "loss": 0.0085, "step": 68640 }, { "epoch": 2.122457810471657, "grad_norm": 0.17025896906852722, "learning_rate": 1.893909562959758e-05, "loss": 0.0085, "step": 68670 }, { "epoch": 2.1233850528528158, "grad_norm": 0.18512579798698425, "learning_rate": 1.8938632008406998e-05, "loss": 0.0094, "step": 68700 }, { "epoch": 2.1243122952339744, "grad_norm": 0.24098490178585052, "learning_rate": 1.893816838721642e-05, "loss": 0.0098, "step": 68730 }, { "epoch": 2.1252395376151325, "grad_norm": 0.18122804164886475, "learning_rate": 1.893770476602584e-05, "loss": 0.0092, "step": 68760 }, { "epoch": 2.126166779996291, "grad_norm": 0.14257536828517914, "learning_rate": 1.8937241144835262e-05, "loss": 0.0096, "step": 68790 }, { "epoch": 2.1270940223774493, "grad_norm": 0.13276635110378265, "learning_rate": 1.8936777523644684e-05, "loss": 0.0089, "step": 68820 }, { "epoch": 2.128021264758608, "grad_norm": 0.1384515017271042, "learning_rate": 1.8936313902454102e-05, "loss": 0.0093, "step": 68850 }, { "epoch": 2.1289485071397665, "grad_norm": 0.11156316101551056, "learning_rate": 1.8935850281263524e-05, "loss": 0.0089, "step": 68880 }, { "epoch": 2.1298757495209246, "grad_norm": 0.15153776109218597, "learning_rate": 1.8935386660072945e-05, "loss": 0.0087, "step": 68910 }, { "epoch": 2.130802991902083, "grad_norm": 0.15474291145801544, "learning_rate": 1.8934923038882363e-05, "loss": 0.0101, "step": 68940 }, { "epoch": 2.131730234283242, "grad_norm": 0.17571423947811127, "learning_rate": 1.8934459417691785e-05, "loss": 0.0085, "step": 68970 }, { "epoch": 2.1326574766644, "grad_norm": 0.11417470872402191, "learning_rate": 1.8933995796501206e-05, "loss": 0.0093, "step": 69000 }, { "epoch": 2.1335847190455586, "grad_norm": 0.13590805232524872, "learning_rate": 1.8933532175310628e-05, "loss": 0.0095, "step": 69030 }, { "epoch": 2.1345119614267167, "grad_norm": 0.1487530618906021, "learning_rate": 1.893306855412005e-05, "loss": 0.0085, "step": 69060 }, { "epoch": 2.1354392038078753, "grad_norm": 0.0748179629445076, "learning_rate": 1.893260493292947e-05, "loss": 0.0087, "step": 69090 }, { "epoch": 2.136366446189034, "grad_norm": 0.1396723985671997, "learning_rate": 1.893214131173889e-05, "loss": 0.0093, "step": 69120 }, { "epoch": 2.137293688570192, "grad_norm": 0.40192583203315735, "learning_rate": 1.893167769054831e-05, "loss": 0.0082, "step": 69150 }, { "epoch": 2.1382209309513507, "grad_norm": 0.1580619215965271, "learning_rate": 1.8931214069357732e-05, "loss": 0.0087, "step": 69180 }, { "epoch": 2.1391481733325093, "grad_norm": 0.11349703371524811, "learning_rate": 1.8930750448167153e-05, "loss": 0.0092, "step": 69210 }, { "epoch": 2.1400754157136674, "grad_norm": 0.11525145918130875, "learning_rate": 1.8930286826976575e-05, "loss": 0.0095, "step": 69240 }, { "epoch": 2.141002658094826, "grad_norm": 0.15722735226154327, "learning_rate": 1.8929823205785993e-05, "loss": 0.0084, "step": 69270 }, { "epoch": 2.1419299004759846, "grad_norm": 0.14131617546081543, "learning_rate": 1.8929359584595415e-05, "loss": 0.0096, "step": 69300 }, { "epoch": 2.142857142857143, "grad_norm": 0.139816552400589, "learning_rate": 1.8928895963404836e-05, "loss": 0.0087, "step": 69330 }, { "epoch": 2.1437843852383014, "grad_norm": 0.09025824815034866, "learning_rate": 1.8928432342214254e-05, "loss": 0.0094, "step": 69360 }, { "epoch": 2.1447116276194595, "grad_norm": 0.16685907542705536, "learning_rate": 1.8927968721023676e-05, "loss": 0.0099, "step": 69390 }, { "epoch": 2.145638870000618, "grad_norm": 0.11043151468038559, "learning_rate": 1.89275050998331e-05, "loss": 0.0084, "step": 69420 }, { "epoch": 2.1465661123817767, "grad_norm": 0.1615799516439438, "learning_rate": 1.892704147864252e-05, "loss": 0.0078, "step": 69450 }, { "epoch": 2.147493354762935, "grad_norm": 0.15646037459373474, "learning_rate": 1.892657785745194e-05, "loss": 0.0091, "step": 69480 }, { "epoch": 2.1484205971440935, "grad_norm": 0.10793735831975937, "learning_rate": 1.8926114236261358e-05, "loss": 0.0093, "step": 69510 }, { "epoch": 2.149347839525252, "grad_norm": 0.10871328413486481, "learning_rate": 1.892565061507078e-05, "loss": 0.0089, "step": 69540 }, { "epoch": 2.1502750819064103, "grad_norm": 0.20696048438549042, "learning_rate": 1.89251869938802e-05, "loss": 0.0089, "step": 69570 }, { "epoch": 2.151202324287569, "grad_norm": 0.13094669580459595, "learning_rate": 1.8924723372689623e-05, "loss": 0.0091, "step": 69600 }, { "epoch": 2.152129566668727, "grad_norm": 0.13827069103717804, "learning_rate": 1.8924259751499044e-05, "loss": 0.0095, "step": 69630 }, { "epoch": 2.1530568090498856, "grad_norm": 0.16554994881153107, "learning_rate": 1.8923796130308466e-05, "loss": 0.0092, "step": 69660 }, { "epoch": 2.153984051431044, "grad_norm": 0.10166825354099274, "learning_rate": 1.8923332509117884e-05, "loss": 0.0086, "step": 69690 }, { "epoch": 2.1549112938122024, "grad_norm": 0.14612141251564026, "learning_rate": 1.8922868887927305e-05, "loss": 0.0083, "step": 69720 }, { "epoch": 2.155838536193361, "grad_norm": 0.15411648154258728, "learning_rate": 1.8922405266736727e-05, "loss": 0.0089, "step": 69750 }, { "epoch": 2.1567657785745196, "grad_norm": 0.17111580073833466, "learning_rate": 1.892194164554615e-05, "loss": 0.0083, "step": 69780 }, { "epoch": 2.1576930209556777, "grad_norm": 0.11129394173622131, "learning_rate": 1.892147802435557e-05, "loss": 0.0089, "step": 69810 }, { "epoch": 2.1586202633368363, "grad_norm": 0.10386613011360168, "learning_rate": 1.8921014403164988e-05, "loss": 0.0091, "step": 69840 }, { "epoch": 2.1595475057179945, "grad_norm": 0.11056216806173325, "learning_rate": 1.892055078197441e-05, "loss": 0.0092, "step": 69870 }, { "epoch": 2.160474748099153, "grad_norm": 0.15415820479393005, "learning_rate": 1.892008716078383e-05, "loss": 0.0092, "step": 69900 }, { "epoch": 2.1614019904803117, "grad_norm": 0.1822110116481781, "learning_rate": 1.891962353959325e-05, "loss": 0.0081, "step": 69930 }, { "epoch": 2.16232923286147, "grad_norm": 0.15298330783843994, "learning_rate": 1.891915991840267e-05, "loss": 0.0084, "step": 69960 }, { "epoch": 2.1632564752426284, "grad_norm": 0.15450450778007507, "learning_rate": 1.8918696297212092e-05, "loss": 0.0092, "step": 69990 }, { "epoch": 2.164183717623787, "grad_norm": 0.09891186654567719, "learning_rate": 1.8918232676021514e-05, "loss": 0.0087, "step": 70020 }, { "epoch": 2.165110960004945, "grad_norm": 0.17991402745246887, "learning_rate": 1.8917769054830935e-05, "loss": 0.0091, "step": 70050 }, { "epoch": 2.166038202386104, "grad_norm": 0.14245396852493286, "learning_rate": 1.8917305433640357e-05, "loss": 0.0089, "step": 70080 }, { "epoch": 2.1669654447672624, "grad_norm": 0.11628829687833786, "learning_rate": 1.8916841812449775e-05, "loss": 0.008, "step": 70110 }, { "epoch": 2.1678926871484205, "grad_norm": 0.1272253543138504, "learning_rate": 1.8916378191259196e-05, "loss": 0.0086, "step": 70140 }, { "epoch": 2.168819929529579, "grad_norm": 0.1308063268661499, "learning_rate": 1.8915914570068618e-05, "loss": 0.0086, "step": 70170 }, { "epoch": 2.1697471719107373, "grad_norm": 0.10678290575742722, "learning_rate": 1.891545094887804e-05, "loss": 0.0094, "step": 70200 }, { "epoch": 2.170674414291896, "grad_norm": 0.16830222308635712, "learning_rate": 1.891498732768746e-05, "loss": 0.0091, "step": 70230 }, { "epoch": 2.1716016566730545, "grad_norm": 0.14412182569503784, "learning_rate": 1.891452370649688e-05, "loss": 0.0087, "step": 70260 }, { "epoch": 2.1725288990542126, "grad_norm": 0.11741902679204941, "learning_rate": 1.89140600853063e-05, "loss": 0.009, "step": 70290 }, { "epoch": 2.1734561414353712, "grad_norm": 0.16648481786251068, "learning_rate": 1.8913596464115722e-05, "loss": 0.009, "step": 70320 }, { "epoch": 2.17438338381653, "grad_norm": 0.12810702621936798, "learning_rate": 1.891313284292514e-05, "loss": 0.0089, "step": 70350 }, { "epoch": 2.175310626197688, "grad_norm": 0.14085452258586884, "learning_rate": 1.891266922173456e-05, "loss": 0.0096, "step": 70380 }, { "epoch": 2.1762378685788466, "grad_norm": 0.13843321800231934, "learning_rate": 1.8912205600543983e-05, "loss": 0.0085, "step": 70410 }, { "epoch": 2.1771651109600048, "grad_norm": 0.1245063841342926, "learning_rate": 1.8911741979353405e-05, "loss": 0.0096, "step": 70440 }, { "epoch": 2.1780923533411634, "grad_norm": 0.14684456586837769, "learning_rate": 1.8911278358162826e-05, "loss": 0.0092, "step": 70470 }, { "epoch": 2.179019595722322, "grad_norm": 0.15975366532802582, "learning_rate": 1.8910814736972244e-05, "loss": 0.0079, "step": 70500 }, { "epoch": 2.17994683810348, "grad_norm": 0.11974488943815231, "learning_rate": 1.8910351115781666e-05, "loss": 0.0092, "step": 70530 }, { "epoch": 2.1808740804846387, "grad_norm": 0.12652868032455444, "learning_rate": 1.8909887494591087e-05, "loss": 0.0092, "step": 70560 }, { "epoch": 2.1818013228657973, "grad_norm": 0.09854988008737564, "learning_rate": 1.8909439327440195e-05, "loss": 0.009, "step": 70590 }, { "epoch": 2.1827285652469555, "grad_norm": 0.19675464928150177, "learning_rate": 1.8908975706249613e-05, "loss": 0.0088, "step": 70620 }, { "epoch": 2.183655807628114, "grad_norm": 0.12877976894378662, "learning_rate": 1.8908512085059035e-05, "loss": 0.0094, "step": 70650 }, { "epoch": 2.184583050009272, "grad_norm": 0.1481362283229828, "learning_rate": 1.8908048463868456e-05, "loss": 0.0087, "step": 70680 }, { "epoch": 2.185510292390431, "grad_norm": 0.12870502471923828, "learning_rate": 1.8907584842677878e-05, "loss": 0.0086, "step": 70710 }, { "epoch": 2.1864375347715894, "grad_norm": 0.16580922901630402, "learning_rate": 1.89071212214873e-05, "loss": 0.0085, "step": 70740 }, { "epoch": 2.1873647771527476, "grad_norm": 0.18394939601421356, "learning_rate": 1.8906657600296717e-05, "loss": 0.0093, "step": 70770 }, { "epoch": 2.188292019533906, "grad_norm": 0.12719479203224182, "learning_rate": 1.890619397910614e-05, "loss": 0.0088, "step": 70800 }, { "epoch": 2.1892192619150648, "grad_norm": 0.14633497595787048, "learning_rate": 1.890573035791556e-05, "loss": 0.0095, "step": 70830 }, { "epoch": 2.190146504296223, "grad_norm": 0.09643109142780304, "learning_rate": 1.8905266736724982e-05, "loss": 0.0089, "step": 70860 }, { "epoch": 2.1910737466773815, "grad_norm": 0.16548655927181244, "learning_rate": 1.8904803115534403e-05, "loss": 0.0089, "step": 70890 }, { "epoch": 2.19200098905854, "grad_norm": 0.1491905301809311, "learning_rate": 1.8904339494343825e-05, "loss": 0.0094, "step": 70920 }, { "epoch": 2.1929282314396983, "grad_norm": 0.14683310687541962, "learning_rate": 1.8903875873153243e-05, "loss": 0.0089, "step": 70950 }, { "epoch": 2.193855473820857, "grad_norm": 0.12418724596500397, "learning_rate": 1.8903412251962665e-05, "loss": 0.0087, "step": 70980 }, { "epoch": 2.194782716202015, "grad_norm": 0.15240062773227692, "learning_rate": 1.8902948630772086e-05, "loss": 0.0083, "step": 71010 }, { "epoch": 2.1957099585831736, "grad_norm": 0.1495690941810608, "learning_rate": 1.8902485009581508e-05, "loss": 0.0081, "step": 71040 }, { "epoch": 2.1966372009643322, "grad_norm": 0.14796452224254608, "learning_rate": 1.890202138839093e-05, "loss": 0.0093, "step": 71070 }, { "epoch": 2.1975644433454904, "grad_norm": 0.19263876974582672, "learning_rate": 1.8901557767200347e-05, "loss": 0.0089, "step": 71100 }, { "epoch": 2.198491685726649, "grad_norm": 0.16011062264442444, "learning_rate": 1.890109414600977e-05, "loss": 0.0095, "step": 71130 }, { "epoch": 2.199418928107807, "grad_norm": 0.10791882872581482, "learning_rate": 1.890063052481919e-05, "loss": 0.0089, "step": 71160 }, { "epoch": 2.2003461704889657, "grad_norm": 0.18085575103759766, "learning_rate": 1.890016690362861e-05, "loss": 0.0093, "step": 71190 }, { "epoch": 2.2012734128701243, "grad_norm": 0.09394484013319016, "learning_rate": 1.889970328243803e-05, "loss": 0.0082, "step": 71220 }, { "epoch": 2.2022006552512825, "grad_norm": 0.21017324924468994, "learning_rate": 1.889923966124745e-05, "loss": 0.0082, "step": 71250 }, { "epoch": 2.203127897632441, "grad_norm": 0.163272887468338, "learning_rate": 1.8898776040056873e-05, "loss": 0.009, "step": 71280 }, { "epoch": 2.2040551400135997, "grad_norm": 0.16225232183933258, "learning_rate": 1.8898312418866294e-05, "loss": 0.0094, "step": 71310 }, { "epoch": 2.204982382394758, "grad_norm": 0.09801054000854492, "learning_rate": 1.8897848797675713e-05, "loss": 0.0088, "step": 71340 }, { "epoch": 2.2059096247759165, "grad_norm": 0.1248030811548233, "learning_rate": 1.8897385176485134e-05, "loss": 0.0089, "step": 71370 }, { "epoch": 2.206836867157075, "grad_norm": 0.11771856248378754, "learning_rate": 1.8896921555294556e-05, "loss": 0.0086, "step": 71400 }, { "epoch": 2.207764109538233, "grad_norm": 0.12281326204538345, "learning_rate": 1.8896457934103977e-05, "loss": 0.0093, "step": 71430 }, { "epoch": 2.208691351919392, "grad_norm": 0.31952977180480957, "learning_rate": 1.88959943129134e-05, "loss": 0.0092, "step": 71460 }, { "epoch": 2.20961859430055, "grad_norm": 0.09996087849140167, "learning_rate": 1.889553069172282e-05, "loss": 0.0088, "step": 71490 }, { "epoch": 2.2105458366817086, "grad_norm": 0.12061703950166702, "learning_rate": 1.8895067070532238e-05, "loss": 0.0084, "step": 71520 }, { "epoch": 2.211473079062867, "grad_norm": 0.1389547437429428, "learning_rate": 1.889460344934166e-05, "loss": 0.0091, "step": 71550 }, { "epoch": 2.2124003214440253, "grad_norm": 0.11041004955768585, "learning_rate": 1.889413982815108e-05, "loss": 0.009, "step": 71580 }, { "epoch": 2.213327563825184, "grad_norm": 0.14489056169986725, "learning_rate": 1.88936762069605e-05, "loss": 0.0087, "step": 71610 }, { "epoch": 2.2142548062063425, "grad_norm": 0.1850181221961975, "learning_rate": 1.889321258576992e-05, "loss": 0.0084, "step": 71640 }, { "epoch": 2.2151820485875007, "grad_norm": 0.1333666890859604, "learning_rate": 1.8892748964579342e-05, "loss": 0.0097, "step": 71670 }, { "epoch": 2.2161092909686593, "grad_norm": 0.1311594545841217, "learning_rate": 1.8892285343388764e-05, "loss": 0.0083, "step": 71700 }, { "epoch": 2.217036533349818, "grad_norm": 0.1530425101518631, "learning_rate": 1.8891821722198185e-05, "loss": 0.0093, "step": 71730 }, { "epoch": 2.217963775730976, "grad_norm": 0.15149125456809998, "learning_rate": 1.8891358101007603e-05, "loss": 0.009, "step": 71760 }, { "epoch": 2.2188910181121346, "grad_norm": 0.1322949379682541, "learning_rate": 1.8890894479817025e-05, "loss": 0.0084, "step": 71790 }, { "epoch": 2.219818260493293, "grad_norm": 0.14221623539924622, "learning_rate": 1.8890430858626446e-05, "loss": 0.0097, "step": 71820 }, { "epoch": 2.2207455028744514, "grad_norm": 0.17533144354820251, "learning_rate": 1.8889967237435868e-05, "loss": 0.0089, "step": 71850 }, { "epoch": 2.22167274525561, "grad_norm": 0.13972420990467072, "learning_rate": 1.888950361624529e-05, "loss": 0.0089, "step": 71880 }, { "epoch": 2.222599987636768, "grad_norm": 0.17145711183547974, "learning_rate": 1.888903999505471e-05, "loss": 0.0092, "step": 71910 }, { "epoch": 2.2235272300179267, "grad_norm": 0.15872696042060852, "learning_rate": 1.888857637386413e-05, "loss": 0.0084, "step": 71940 }, { "epoch": 2.224454472399085, "grad_norm": 0.13763616979122162, "learning_rate": 1.888811275267355e-05, "loss": 0.0095, "step": 71970 }, { "epoch": 2.2253817147802435, "grad_norm": 0.11473555117845535, "learning_rate": 1.888764913148297e-05, "loss": 0.0098, "step": 72000 }, { "epoch": 2.226308957161402, "grad_norm": 0.14158859848976135, "learning_rate": 1.888718551029239e-05, "loss": 0.0088, "step": 72030 }, { "epoch": 2.2272361995425602, "grad_norm": 0.10756170004606247, "learning_rate": 1.8886721889101812e-05, "loss": 0.0086, "step": 72060 }, { "epoch": 2.228163441923719, "grad_norm": 0.12372111529111862, "learning_rate": 1.8886258267911233e-05, "loss": 0.0089, "step": 72090 }, { "epoch": 2.2290906843048774, "grad_norm": 0.13207387924194336, "learning_rate": 1.8885794646720655e-05, "loss": 0.0086, "step": 72120 }, { "epoch": 2.2300179266860356, "grad_norm": 0.14404727518558502, "learning_rate": 1.8885331025530076e-05, "loss": 0.0091, "step": 72150 }, { "epoch": 2.230945169067194, "grad_norm": 0.13198868930339813, "learning_rate": 1.8884867404339494e-05, "loss": 0.0096, "step": 72180 }, { "epoch": 2.231872411448353, "grad_norm": 0.13135038316249847, "learning_rate": 1.8884403783148916e-05, "loss": 0.0088, "step": 72210 }, { "epoch": 2.232799653829511, "grad_norm": 0.1281643509864807, "learning_rate": 1.8883940161958337e-05, "loss": 0.0083, "step": 72240 }, { "epoch": 2.2337268962106696, "grad_norm": 0.1256420612335205, "learning_rate": 1.888347654076776e-05, "loss": 0.009, "step": 72270 }, { "epoch": 2.2346541385918277, "grad_norm": 0.1490791290998459, "learning_rate": 1.888301291957718e-05, "loss": 0.0089, "step": 72300 }, { "epoch": 2.2355813809729863, "grad_norm": 0.17233188450336456, "learning_rate": 1.88825492983866e-05, "loss": 0.0092, "step": 72330 }, { "epoch": 2.236508623354145, "grad_norm": 0.13731978833675385, "learning_rate": 1.888208567719602e-05, "loss": 0.0082, "step": 72360 }, { "epoch": 2.237435865735303, "grad_norm": 0.1100035011768341, "learning_rate": 1.888162205600544e-05, "loss": 0.0087, "step": 72390 }, { "epoch": 2.2383631081164617, "grad_norm": 0.12508642673492432, "learning_rate": 1.8881158434814863e-05, "loss": 0.0089, "step": 72420 }, { "epoch": 2.2392903504976203, "grad_norm": 0.11783348023891449, "learning_rate": 1.8880694813624285e-05, "loss": 0.0083, "step": 72450 }, { "epoch": 2.2402175928787784, "grad_norm": 0.1623716503381729, "learning_rate": 1.8880231192433706e-05, "loss": 0.0093, "step": 72480 }, { "epoch": 2.241144835259937, "grad_norm": 0.1348728984594345, "learning_rate": 1.8879767571243124e-05, "loss": 0.0089, "step": 72510 }, { "epoch": 2.2420720776410956, "grad_norm": 0.16939108073711395, "learning_rate": 1.8879303950052546e-05, "loss": 0.0092, "step": 72540 }, { "epoch": 2.2429993200222538, "grad_norm": 0.12319344282150269, "learning_rate": 1.8878840328861967e-05, "loss": 0.0089, "step": 72570 }, { "epoch": 2.2439265624034124, "grad_norm": 0.0907304659485817, "learning_rate": 1.8878376707671385e-05, "loss": 0.0095, "step": 72600 }, { "epoch": 2.2448538047845705, "grad_norm": 0.1291521191596985, "learning_rate": 1.8877913086480807e-05, "loss": 0.0083, "step": 72630 }, { "epoch": 2.245781047165729, "grad_norm": 0.12700653076171875, "learning_rate": 1.8877464919329915e-05, "loss": 0.0092, "step": 72660 }, { "epoch": 2.2467082895468877, "grad_norm": 0.10512715578079224, "learning_rate": 1.8877001298139336e-05, "loss": 0.0089, "step": 72690 }, { "epoch": 2.247635531928046, "grad_norm": 0.12982504069805145, "learning_rate": 1.8876537676948758e-05, "loss": 0.0086, "step": 72720 }, { "epoch": 2.2485627743092045, "grad_norm": 0.2718728482723236, "learning_rate": 1.8876074055758176e-05, "loss": 0.0091, "step": 72750 }, { "epoch": 2.2494900166903626, "grad_norm": 0.2009500414133072, "learning_rate": 1.8875610434567597e-05, "loss": 0.0088, "step": 72780 }, { "epoch": 2.2504172590715212, "grad_norm": 0.08626282215118408, "learning_rate": 1.887514681337702e-05, "loss": 0.0081, "step": 72810 }, { "epoch": 2.25134450145268, "grad_norm": 0.14024610817432404, "learning_rate": 1.8874683192186437e-05, "loss": 0.0092, "step": 72840 }, { "epoch": 2.252271743833838, "grad_norm": 0.1387394219636917, "learning_rate": 1.887421957099586e-05, "loss": 0.0086, "step": 72870 }, { "epoch": 2.2531989862149966, "grad_norm": 0.08926853537559509, "learning_rate": 1.887375594980528e-05, "loss": 0.0091, "step": 72900 }, { "epoch": 2.254126228596155, "grad_norm": 0.15835779905319214, "learning_rate": 1.88732923286147e-05, "loss": 0.0089, "step": 72930 }, { "epoch": 2.2550534709773133, "grad_norm": 0.09361131489276886, "learning_rate": 1.8872828707424123e-05, "loss": 0.0081, "step": 72960 }, { "epoch": 2.255980713358472, "grad_norm": 0.1452685445547104, "learning_rate": 1.8872365086233544e-05, "loss": 0.0086, "step": 72990 }, { "epoch": 2.2569079557396305, "grad_norm": 0.1345299333333969, "learning_rate": 1.8871901465042963e-05, "loss": 0.0084, "step": 73020 }, { "epoch": 2.2578351981207887, "grad_norm": 0.17026135325431824, "learning_rate": 1.8871437843852384e-05, "loss": 0.0081, "step": 73050 }, { "epoch": 2.2587624405019473, "grad_norm": 0.12752577662467957, "learning_rate": 1.8870974222661806e-05, "loss": 0.0092, "step": 73080 }, { "epoch": 2.2596896828831055, "grad_norm": 0.09900791943073273, "learning_rate": 1.8870510601471227e-05, "loss": 0.0087, "step": 73110 }, { "epoch": 2.260616925264264, "grad_norm": 0.10658922791481018, "learning_rate": 1.887004698028065e-05, "loss": 0.0089, "step": 73140 }, { "epoch": 2.2615441676454227, "grad_norm": 0.17126788198947906, "learning_rate": 1.8869583359090067e-05, "loss": 0.0086, "step": 73170 }, { "epoch": 2.262471410026581, "grad_norm": 0.10094423592090607, "learning_rate": 1.8869119737899488e-05, "loss": 0.0086, "step": 73200 }, { "epoch": 2.2633986524077394, "grad_norm": 0.1033947765827179, "learning_rate": 1.886865611670891e-05, "loss": 0.0088, "step": 73230 }, { "epoch": 2.264325894788898, "grad_norm": 0.15351808071136475, "learning_rate": 1.8868192495518328e-05, "loss": 0.0083, "step": 73260 }, { "epoch": 2.265253137170056, "grad_norm": 0.1365838348865509, "learning_rate": 1.886772887432775e-05, "loss": 0.0095, "step": 73290 }, { "epoch": 2.2661803795512148, "grad_norm": 0.12750144302845, "learning_rate": 1.886726525313717e-05, "loss": 0.0086, "step": 73320 }, { "epoch": 2.2671076219323734, "grad_norm": 0.15665072202682495, "learning_rate": 1.8866801631946592e-05, "loss": 0.0092, "step": 73350 }, { "epoch": 2.2680348643135315, "grad_norm": 0.10841574519872665, "learning_rate": 1.8866338010756014e-05, "loss": 0.0087, "step": 73380 }, { "epoch": 2.26896210669469, "grad_norm": 0.1495928317308426, "learning_rate": 1.8865874389565435e-05, "loss": 0.008, "step": 73410 }, { "epoch": 2.2698893490758483, "grad_norm": 0.18516689538955688, "learning_rate": 1.8865410768374854e-05, "loss": 0.0088, "step": 73440 }, { "epoch": 2.270816591457007, "grad_norm": 0.11128291487693787, "learning_rate": 1.8864947147184275e-05, "loss": 0.0082, "step": 73470 }, { "epoch": 2.2717438338381655, "grad_norm": 0.18804824352264404, "learning_rate": 1.8864483525993697e-05, "loss": 0.0094, "step": 73500 }, { "epoch": 2.2726710762193236, "grad_norm": 0.1250469833612442, "learning_rate": 1.8864019904803118e-05, "loss": 0.0085, "step": 73530 }, { "epoch": 2.2735983186004822, "grad_norm": 0.13956324756145477, "learning_rate": 1.886355628361254e-05, "loss": 0.0087, "step": 73560 }, { "epoch": 2.2745255609816404, "grad_norm": 0.17182214558124542, "learning_rate": 1.8863092662421958e-05, "loss": 0.0088, "step": 73590 }, { "epoch": 2.275452803362799, "grad_norm": 0.1759544163942337, "learning_rate": 1.886262904123138e-05, "loss": 0.0083, "step": 73620 }, { "epoch": 2.2763800457439576, "grad_norm": 0.12783929705619812, "learning_rate": 1.88621654200408e-05, "loss": 0.0079, "step": 73650 }, { "epoch": 2.2773072881251157, "grad_norm": 0.12960253655910492, "learning_rate": 1.886170179885022e-05, "loss": 0.0082, "step": 73680 }, { "epoch": 2.2782345305062743, "grad_norm": 0.2172284871339798, "learning_rate": 1.8861238177659644e-05, "loss": 0.0086, "step": 73710 }, { "epoch": 2.279161772887433, "grad_norm": 0.15971016883850098, "learning_rate": 1.8860774556469065e-05, "loss": 0.0093, "step": 73740 }, { "epoch": 2.280089015268591, "grad_norm": 0.21730943024158478, "learning_rate": 1.8860310935278483e-05, "loss": 0.0086, "step": 73770 }, { "epoch": 2.2810162576497497, "grad_norm": 0.132557675242424, "learning_rate": 1.8859847314087905e-05, "loss": 0.009, "step": 73800 }, { "epoch": 2.2819435000309083, "grad_norm": 0.1418829709291458, "learning_rate": 1.8859383692897323e-05, "loss": 0.0086, "step": 73830 }, { "epoch": 2.2828707424120664, "grad_norm": 0.1313936412334442, "learning_rate": 1.8858920071706744e-05, "loss": 0.0087, "step": 73860 }, { "epoch": 2.283797984793225, "grad_norm": 0.09573974460363388, "learning_rate": 1.8858456450516166e-05, "loss": 0.008, "step": 73890 }, { "epoch": 2.284725227174383, "grad_norm": 0.18170644342899323, "learning_rate": 1.8857992829325587e-05, "loss": 0.0089, "step": 73920 }, { "epoch": 2.285652469555542, "grad_norm": 0.17393308877944946, "learning_rate": 1.885752920813501e-05, "loss": 0.0096, "step": 73950 }, { "epoch": 2.2865797119367004, "grad_norm": 0.15272578597068787, "learning_rate": 1.885706558694443e-05, "loss": 0.0086, "step": 73980 }, { "epoch": 2.2875069543178586, "grad_norm": 0.16750074923038483, "learning_rate": 1.885660196575385e-05, "loss": 0.009, "step": 74010 }, { "epoch": 2.288434196699017, "grad_norm": 0.14809376001358032, "learning_rate": 1.885613834456327e-05, "loss": 0.0085, "step": 74040 }, { "epoch": 2.2893614390801753, "grad_norm": 0.11909930408000946, "learning_rate": 1.885567472337269e-05, "loss": 0.009, "step": 74070 }, { "epoch": 2.290288681461334, "grad_norm": 0.1542603224515915, "learning_rate": 1.8855211102182113e-05, "loss": 0.0091, "step": 74100 }, { "epoch": 2.2912159238424925, "grad_norm": 0.13722600042819977, "learning_rate": 1.8854747480991535e-05, "loss": 0.0085, "step": 74130 }, { "epoch": 2.292143166223651, "grad_norm": 0.10010780394077301, "learning_rate": 1.8854283859800953e-05, "loss": 0.0096, "step": 74160 }, { "epoch": 2.2930704086048093, "grad_norm": 0.12684530019760132, "learning_rate": 1.8853820238610374e-05, "loss": 0.0088, "step": 74190 }, { "epoch": 2.293997650985968, "grad_norm": 0.19932232797145844, "learning_rate": 1.8853356617419796e-05, "loss": 0.0093, "step": 74220 }, { "epoch": 2.294924893367126, "grad_norm": 0.11821864545345306, "learning_rate": 1.8852892996229214e-05, "loss": 0.0092, "step": 74250 }, { "epoch": 2.2958521357482846, "grad_norm": 0.16085079312324524, "learning_rate": 1.8852429375038635e-05, "loss": 0.0101, "step": 74280 }, { "epoch": 2.296779378129443, "grad_norm": 0.08482006937265396, "learning_rate": 1.8851965753848057e-05, "loss": 0.0094, "step": 74310 }, { "epoch": 2.2977066205106014, "grad_norm": 0.10123047232627869, "learning_rate": 1.885150213265748e-05, "loss": 0.0088, "step": 74340 }, { "epoch": 2.29863386289176, "grad_norm": 0.1315576732158661, "learning_rate": 1.88510385114669e-05, "loss": 0.0089, "step": 74370 }, { "epoch": 2.299561105272918, "grad_norm": 0.12873077392578125, "learning_rate": 1.885057489027632e-05, "loss": 0.0093, "step": 74400 }, { "epoch": 2.3004883476540767, "grad_norm": 0.16195860505104065, "learning_rate": 1.885011126908574e-05, "loss": 0.0088, "step": 74430 }, { "epoch": 2.3014155900352353, "grad_norm": 0.14522325992584229, "learning_rate": 1.884964764789516e-05, "loss": 0.0087, "step": 74460 }, { "epoch": 2.3023428324163935, "grad_norm": 0.10039913654327393, "learning_rate": 1.8849184026704583e-05, "loss": 0.0086, "step": 74490 }, { "epoch": 2.303270074797552, "grad_norm": 0.11097509413957596, "learning_rate": 1.8848720405514004e-05, "loss": 0.009, "step": 74520 }, { "epoch": 2.3041973171787107, "grad_norm": 0.09466470032930374, "learning_rate": 1.8848256784323426e-05, "loss": 0.0096, "step": 74550 }, { "epoch": 2.305124559559869, "grad_norm": 0.16630716621875763, "learning_rate": 1.8847793163132844e-05, "loss": 0.0082, "step": 74580 }, { "epoch": 2.3060518019410274, "grad_norm": 0.20133961737155914, "learning_rate": 1.8847329541942265e-05, "loss": 0.0086, "step": 74610 }, { "epoch": 2.306979044322186, "grad_norm": 0.13147056102752686, "learning_rate": 1.8846865920751687e-05, "loss": 0.0086, "step": 74640 }, { "epoch": 2.307906286703344, "grad_norm": 0.15197718143463135, "learning_rate": 1.8846402299561105e-05, "loss": 0.0099, "step": 74670 }, { "epoch": 2.308833529084503, "grad_norm": 0.18132458627223969, "learning_rate": 1.8845938678370526e-05, "loss": 0.0094, "step": 74700 }, { "epoch": 2.309760771465661, "grad_norm": 0.12127023935317993, "learning_rate": 1.8845475057179948e-05, "loss": 0.0084, "step": 74730 }, { "epoch": 2.3106880138468195, "grad_norm": 0.21165399253368378, "learning_rate": 1.884501143598937e-05, "loss": 0.0088, "step": 74760 }, { "epoch": 2.311615256227978, "grad_norm": 0.09430490434169769, "learning_rate": 1.884454781479879e-05, "loss": 0.0086, "step": 74790 }, { "epoch": 2.3125424986091363, "grad_norm": 0.11644085496664047, "learning_rate": 1.884408419360821e-05, "loss": 0.0094, "step": 74820 }, { "epoch": 2.313469740990295, "grad_norm": 0.11686800420284271, "learning_rate": 1.884362057241763e-05, "loss": 0.0092, "step": 74850 }, { "epoch": 2.314396983371453, "grad_norm": 0.194910928606987, "learning_rate": 1.884317240526674e-05, "loss": 0.0089, "step": 74880 }, { "epoch": 2.3153242257526117, "grad_norm": 0.16339494287967682, "learning_rate": 1.8842708784076156e-05, "loss": 0.0091, "step": 74910 }, { "epoch": 2.3162514681337703, "grad_norm": 0.14438965916633606, "learning_rate": 1.8842245162885578e-05, "loss": 0.0091, "step": 74940 }, { "epoch": 2.3171787105149284, "grad_norm": 0.11943770200014114, "learning_rate": 1.8841781541695003e-05, "loss": 0.009, "step": 74970 }, { "epoch": 2.318105952896087, "grad_norm": 0.11169414967298508, "learning_rate": 1.884131792050442e-05, "loss": 0.0092, "step": 75000 }, { "epoch": 2.3190331952772456, "grad_norm": 0.17709872126579285, "learning_rate": 1.8840854299313842e-05, "loss": 0.0093, "step": 75030 }, { "epoch": 2.3199604376584038, "grad_norm": 0.11170468479394913, "learning_rate": 1.8840390678123264e-05, "loss": 0.0088, "step": 75060 }, { "epoch": 2.3208876800395624, "grad_norm": 0.16096435487270355, "learning_rate": 1.8839927056932682e-05, "loss": 0.0091, "step": 75090 }, { "epoch": 2.321814922420721, "grad_norm": 0.13746283948421478, "learning_rate": 1.8839463435742104e-05, "loss": 0.0096, "step": 75120 }, { "epoch": 2.322742164801879, "grad_norm": 0.24748066067695618, "learning_rate": 1.8838999814551525e-05, "loss": 0.0083, "step": 75150 }, { "epoch": 2.3236694071830377, "grad_norm": 0.13065913319587708, "learning_rate": 1.8838536193360947e-05, "loss": 0.0087, "step": 75180 }, { "epoch": 2.324596649564196, "grad_norm": 0.16684673726558685, "learning_rate": 1.8838072572170368e-05, "loss": 0.0084, "step": 75210 }, { "epoch": 2.3255238919453545, "grad_norm": 0.15574394166469574, "learning_rate": 1.8837608950979786e-05, "loss": 0.0089, "step": 75240 }, { "epoch": 2.326451134326513, "grad_norm": 0.1908269077539444, "learning_rate": 1.8837145329789208e-05, "loss": 0.0083, "step": 75270 }, { "epoch": 2.3273783767076712, "grad_norm": 0.11776213347911835, "learning_rate": 1.883668170859863e-05, "loss": 0.0085, "step": 75300 }, { "epoch": 2.32830561908883, "grad_norm": 0.1164964884519577, "learning_rate": 1.883621808740805e-05, "loss": 0.0095, "step": 75330 }, { "epoch": 2.3292328614699884, "grad_norm": 0.14617545902729034, "learning_rate": 1.8835754466217472e-05, "loss": 0.0089, "step": 75360 }, { "epoch": 2.3301601038511466, "grad_norm": 0.13098369538784027, "learning_rate": 1.8835290845026894e-05, "loss": 0.0093, "step": 75390 }, { "epoch": 2.331087346232305, "grad_norm": 0.14681804180145264, "learning_rate": 1.8834827223836312e-05, "loss": 0.0086, "step": 75420 }, { "epoch": 2.332014588613464, "grad_norm": 0.17139966785907745, "learning_rate": 1.8834363602645733e-05, "loss": 0.0089, "step": 75450 }, { "epoch": 2.332941830994622, "grad_norm": 0.14468665421009064, "learning_rate": 1.8833899981455155e-05, "loss": 0.0089, "step": 75480 }, { "epoch": 2.3338690733757805, "grad_norm": 0.11322130262851715, "learning_rate": 1.8833436360264573e-05, "loss": 0.0089, "step": 75510 }, { "epoch": 2.3347963157569387, "grad_norm": 0.13798370957374573, "learning_rate": 1.8832972739073995e-05, "loss": 0.0084, "step": 75540 }, { "epoch": 2.3357235581380973, "grad_norm": 0.12144910544157028, "learning_rate": 1.8832509117883416e-05, "loss": 0.0094, "step": 75570 }, { "epoch": 2.336650800519256, "grad_norm": 0.1305384486913681, "learning_rate": 1.8832045496692838e-05, "loss": 0.0082, "step": 75600 }, { "epoch": 2.337578042900414, "grad_norm": 0.11054778099060059, "learning_rate": 1.883158187550226e-05, "loss": 0.0103, "step": 75630 }, { "epoch": 2.3385052852815726, "grad_norm": 0.12285984307527542, "learning_rate": 1.8831118254311677e-05, "loss": 0.0089, "step": 75660 }, { "epoch": 2.339432527662731, "grad_norm": 0.13046592473983765, "learning_rate": 1.88306546331211e-05, "loss": 0.0092, "step": 75690 }, { "epoch": 2.3403597700438894, "grad_norm": 0.13936516642570496, "learning_rate": 1.883019101193052e-05, "loss": 0.008, "step": 75720 }, { "epoch": 2.341287012425048, "grad_norm": 0.11979890614748001, "learning_rate": 1.882972739073994e-05, "loss": 0.0085, "step": 75750 }, { "epoch": 2.342214254806206, "grad_norm": 0.09984098374843597, "learning_rate": 1.8829263769549363e-05, "loss": 0.0084, "step": 75780 }, { "epoch": 2.3431414971873648, "grad_norm": 0.13953642547130585, "learning_rate": 1.8828800148358785e-05, "loss": 0.0084, "step": 75810 }, { "epoch": 2.3440687395685234, "grad_norm": 0.12482649832963943, "learning_rate": 1.8828336527168203e-05, "loss": 0.0077, "step": 75840 }, { "epoch": 2.3449959819496815, "grad_norm": 0.1804075986146927, "learning_rate": 1.8827872905977624e-05, "loss": 0.0083, "step": 75870 }, { "epoch": 2.34592322433084, "grad_norm": 0.16126272082328796, "learning_rate": 1.8827409284787042e-05, "loss": 0.0099, "step": 75900 }, { "epoch": 2.3468504667119987, "grad_norm": 0.09347030520439148, "learning_rate": 1.8826945663596464e-05, "loss": 0.0083, "step": 75930 }, { "epoch": 2.347777709093157, "grad_norm": 0.08166996389627457, "learning_rate": 1.8826482042405885e-05, "loss": 0.0085, "step": 75960 }, { "epoch": 2.3487049514743155, "grad_norm": 0.11639881879091263, "learning_rate": 1.8826018421215307e-05, "loss": 0.0088, "step": 75990 }, { "epoch": 2.3496321938554736, "grad_norm": 0.16229262948036194, "learning_rate": 1.882555480002473e-05, "loss": 0.0091, "step": 76020 }, { "epoch": 2.350559436236632, "grad_norm": 0.1248411312699318, "learning_rate": 1.882509117883415e-05, "loss": 0.009, "step": 76050 }, { "epoch": 2.351486678617791, "grad_norm": 0.14677976071834564, "learning_rate": 1.8824627557643568e-05, "loss": 0.0084, "step": 76080 }, { "epoch": 2.352413920998949, "grad_norm": 0.16593830287456512, "learning_rate": 1.882416393645299e-05, "loss": 0.0085, "step": 76110 }, { "epoch": 2.3533411633801076, "grad_norm": 0.12686456739902496, "learning_rate": 1.882370031526241e-05, "loss": 0.0089, "step": 76140 }, { "epoch": 2.354268405761266, "grad_norm": 0.11726818233728409, "learning_rate": 1.8823236694071833e-05, "loss": 0.0095, "step": 76170 }, { "epoch": 2.3551956481424243, "grad_norm": 0.13045185804367065, "learning_rate": 1.8822773072881254e-05, "loss": 0.0082, "step": 76200 }, { "epoch": 2.356122890523583, "grad_norm": 0.1532270461320877, "learning_rate": 1.8822309451690676e-05, "loss": 0.0089, "step": 76230 }, { "epoch": 2.3570501329047415, "grad_norm": 0.1641763299703598, "learning_rate": 1.8821845830500094e-05, "loss": 0.0095, "step": 76260 }, { "epoch": 2.3579773752858997, "grad_norm": 0.11482513695955276, "learning_rate": 1.8821382209309515e-05, "loss": 0.0084, "step": 76290 }, { "epoch": 2.3589046176670583, "grad_norm": 0.14296168088912964, "learning_rate": 1.8820918588118933e-05, "loss": 0.0087, "step": 76320 }, { "epoch": 2.3598318600482164, "grad_norm": 0.12292762100696564, "learning_rate": 1.8820454966928355e-05, "loss": 0.0082, "step": 76350 }, { "epoch": 2.360759102429375, "grad_norm": 0.09331880509853363, "learning_rate": 1.881999134573778e-05, "loss": 0.0083, "step": 76380 }, { "epoch": 2.3616863448105336, "grad_norm": 0.11191748827695847, "learning_rate": 1.8819527724547198e-05, "loss": 0.0089, "step": 76410 }, { "epoch": 2.362613587191692, "grad_norm": 0.0803728923201561, "learning_rate": 1.881906410335662e-05, "loss": 0.0087, "step": 76440 }, { "epoch": 2.3635408295728504, "grad_norm": 0.09934933483600616, "learning_rate": 1.881860048216604e-05, "loss": 0.0095, "step": 76470 }, { "epoch": 2.3644680719540085, "grad_norm": 0.15693044662475586, "learning_rate": 1.881813686097546e-05, "loss": 0.0085, "step": 76500 }, { "epoch": 2.365395314335167, "grad_norm": 0.13296152651309967, "learning_rate": 1.881767323978488e-05, "loss": 0.0091, "step": 76530 }, { "epoch": 2.3663225567163257, "grad_norm": 0.14104163646697998, "learning_rate": 1.8817209618594302e-05, "loss": 0.0081, "step": 76560 }, { "epoch": 2.367249799097484, "grad_norm": 0.12936557829380035, "learning_rate": 1.8816745997403724e-05, "loss": 0.0095, "step": 76590 }, { "epoch": 2.3681770414786425, "grad_norm": 0.14456813037395477, "learning_rate": 1.8816282376213145e-05, "loss": 0.009, "step": 76620 }, { "epoch": 2.369104283859801, "grad_norm": 0.1490224003791809, "learning_rate": 1.8815818755022563e-05, "loss": 0.0083, "step": 76650 }, { "epoch": 2.3700315262409593, "grad_norm": 0.1352522075176239, "learning_rate": 1.8815355133831985e-05, "loss": 0.0085, "step": 76680 }, { "epoch": 2.370958768622118, "grad_norm": 0.14040938019752502, "learning_rate": 1.8814891512641406e-05, "loss": 0.0097, "step": 76710 }, { "epoch": 2.3718860110032765, "grad_norm": 0.10637863725423813, "learning_rate": 1.8814427891450828e-05, "loss": 0.0091, "step": 76740 }, { "epoch": 2.3728132533844346, "grad_norm": 0.185690775513649, "learning_rate": 1.881396427026025e-05, "loss": 0.0088, "step": 76770 }, { "epoch": 2.373740495765593, "grad_norm": 0.08623024076223373, "learning_rate": 1.881350064906967e-05, "loss": 0.0087, "step": 76800 }, { "epoch": 2.3746677381467514, "grad_norm": 0.13053886592388153, "learning_rate": 1.881303702787909e-05, "loss": 0.0087, "step": 76830 }, { "epoch": 2.37559498052791, "grad_norm": 0.14961187541484833, "learning_rate": 1.881257340668851e-05, "loss": 0.0091, "step": 76860 }, { "epoch": 2.3765222229090686, "grad_norm": 0.16369985044002533, "learning_rate": 1.8812109785497932e-05, "loss": 0.0089, "step": 76890 }, { "epoch": 2.3774494652902267, "grad_norm": 0.17418837547302246, "learning_rate": 1.881164616430735e-05, "loss": 0.0098, "step": 76920 }, { "epoch": 2.3783767076713853, "grad_norm": 0.11170440912246704, "learning_rate": 1.881118254311677e-05, "loss": 0.0086, "step": 76950 }, { "epoch": 2.379303950052544, "grad_norm": 0.13598869740962982, "learning_rate": 1.8810718921926193e-05, "loss": 0.0088, "step": 76980 }, { "epoch": 2.380231192433702, "grad_norm": 0.14196845889091492, "learning_rate": 1.8810255300735614e-05, "loss": 0.0085, "step": 77010 }, { "epoch": 2.3811584348148607, "grad_norm": 0.13873696327209473, "learning_rate": 1.8809791679545036e-05, "loss": 0.0087, "step": 77040 }, { "epoch": 2.3820856771960193, "grad_norm": 0.1666746735572815, "learning_rate": 1.8809328058354454e-05, "loss": 0.0087, "step": 77070 }, { "epoch": 2.3830129195771774, "grad_norm": 0.14203017950057983, "learning_rate": 1.8808864437163876e-05, "loss": 0.0092, "step": 77100 }, { "epoch": 2.383940161958336, "grad_norm": 0.13405928015708923, "learning_rate": 1.8808400815973297e-05, "loss": 0.0092, "step": 77130 }, { "epoch": 2.384867404339494, "grad_norm": 0.1705392748117447, "learning_rate": 1.880793719478272e-05, "loss": 0.0092, "step": 77160 }, { "epoch": 2.385794646720653, "grad_norm": 0.12378732115030289, "learning_rate": 1.880747357359214e-05, "loss": 0.0087, "step": 77190 }, { "epoch": 2.3867218891018114, "grad_norm": 0.1587781310081482, "learning_rate": 1.8807025406441245e-05, "loss": 0.009, "step": 77220 }, { "epoch": 2.3876491314829695, "grad_norm": 0.11840275675058365, "learning_rate": 1.8806561785250666e-05, "loss": 0.0095, "step": 77250 }, { "epoch": 2.388576373864128, "grad_norm": 0.0933840423822403, "learning_rate": 1.8806098164060088e-05, "loss": 0.0089, "step": 77280 }, { "epoch": 2.3895036162452863, "grad_norm": 0.13950538635253906, "learning_rate": 1.880563454286951e-05, "loss": 0.0093, "step": 77310 }, { "epoch": 2.390430858626445, "grad_norm": 0.15270930528640747, "learning_rate": 1.8805170921678927e-05, "loss": 0.0088, "step": 77340 }, { "epoch": 2.3913581010076035, "grad_norm": 0.14163032174110413, "learning_rate": 1.880470730048835e-05, "loss": 0.0097, "step": 77370 }, { "epoch": 2.3922853433887616, "grad_norm": 0.15284672379493713, "learning_rate": 1.880424367929777e-05, "loss": 0.0091, "step": 77400 }, { "epoch": 2.3932125857699202, "grad_norm": 0.11155938357114792, "learning_rate": 1.8803780058107192e-05, "loss": 0.0086, "step": 77430 }, { "epoch": 2.394139828151079, "grad_norm": 0.12135271728038788, "learning_rate": 1.8803316436916613e-05, "loss": 0.0091, "step": 77460 }, { "epoch": 2.395067070532237, "grad_norm": 0.12385198473930359, "learning_rate": 1.880285281572603e-05, "loss": 0.0082, "step": 77490 }, { "epoch": 2.3959943129133956, "grad_norm": 0.10250342637300491, "learning_rate": 1.8802389194535453e-05, "loss": 0.009, "step": 77520 }, { "epoch": 2.396921555294554, "grad_norm": 0.16134126484394073, "learning_rate": 1.8801925573344874e-05, "loss": 0.0079, "step": 77550 }, { "epoch": 2.3978487976757124, "grad_norm": 0.16359932720661163, "learning_rate": 1.8801461952154293e-05, "loss": 0.0089, "step": 77580 }, { "epoch": 2.398776040056871, "grad_norm": 0.13000300526618958, "learning_rate": 1.8800998330963714e-05, "loss": 0.0095, "step": 77610 }, { "epoch": 2.399703282438029, "grad_norm": 0.10755154490470886, "learning_rate": 1.880053470977314e-05, "loss": 0.0086, "step": 77640 }, { "epoch": 2.4006305248191877, "grad_norm": 0.12594200670719147, "learning_rate": 1.8800071088582557e-05, "loss": 0.0088, "step": 77670 }, { "epoch": 2.4015577672003463, "grad_norm": 0.17045018076896667, "learning_rate": 1.879960746739198e-05, "loss": 0.0089, "step": 77700 }, { "epoch": 2.4024850095815045, "grad_norm": 0.1077188104391098, "learning_rate": 1.8799143846201397e-05, "loss": 0.0085, "step": 77730 }, { "epoch": 2.403412251962663, "grad_norm": 0.09939396381378174, "learning_rate": 1.8798680225010818e-05, "loss": 0.0085, "step": 77760 }, { "epoch": 2.4043394943438217, "grad_norm": 0.21822403371334076, "learning_rate": 1.879821660382024e-05, "loss": 0.008, "step": 77790 }, { "epoch": 2.40526673672498, "grad_norm": 0.13640953600406647, "learning_rate": 1.879775298262966e-05, "loss": 0.0089, "step": 77820 }, { "epoch": 2.4061939791061384, "grad_norm": 0.10769758373498917, "learning_rate": 1.8797289361439083e-05, "loss": 0.009, "step": 77850 }, { "epoch": 2.407121221487297, "grad_norm": 0.18811997771263123, "learning_rate": 1.8796825740248504e-05, "loss": 0.0084, "step": 77880 }, { "epoch": 2.408048463868455, "grad_norm": 0.11369206011295319, "learning_rate": 1.8796362119057922e-05, "loss": 0.0088, "step": 77910 }, { "epoch": 2.4089757062496138, "grad_norm": 0.13493448495864868, "learning_rate": 1.8795898497867344e-05, "loss": 0.0084, "step": 77940 }, { "epoch": 2.409902948630772, "grad_norm": 0.11459315568208694, "learning_rate": 1.8795434876676765e-05, "loss": 0.009, "step": 77970 }, { "epoch": 2.4108301910119305, "grad_norm": 0.1177598387002945, "learning_rate": 1.8794971255486187e-05, "loss": 0.0086, "step": 78000 }, { "epoch": 2.411757433393089, "grad_norm": 0.17122313380241394, "learning_rate": 1.879450763429561e-05, "loss": 0.0089, "step": 78030 }, { "epoch": 2.4126846757742473, "grad_norm": 0.18141156435012817, "learning_rate": 1.8794044013105026e-05, "loss": 0.0082, "step": 78060 }, { "epoch": 2.413611918155406, "grad_norm": 0.15442384779453278, "learning_rate": 1.8793580391914448e-05, "loss": 0.0096, "step": 78090 }, { "epoch": 2.414539160536564, "grad_norm": 0.1382388025522232, "learning_rate": 1.879311677072387e-05, "loss": 0.0087, "step": 78120 }, { "epoch": 2.4154664029177226, "grad_norm": 0.1616453379392624, "learning_rate": 1.8792653149533288e-05, "loss": 0.0088, "step": 78150 }, { "epoch": 2.4163936452988812, "grad_norm": 0.10994719713926315, "learning_rate": 1.879218952834271e-05, "loss": 0.0082, "step": 78180 }, { "epoch": 2.4173208876800394, "grad_norm": 0.11726092547178268, "learning_rate": 1.879172590715213e-05, "loss": 0.0092, "step": 78210 }, { "epoch": 2.418248130061198, "grad_norm": 0.12625840306282043, "learning_rate": 1.8791262285961552e-05, "loss": 0.0088, "step": 78240 }, { "epoch": 2.4191753724423566, "grad_norm": 0.07084941864013672, "learning_rate": 1.8790798664770974e-05, "loss": 0.0087, "step": 78270 }, { "epoch": 2.4201026148235147, "grad_norm": 0.15003038942813873, "learning_rate": 1.8790335043580395e-05, "loss": 0.0094, "step": 78300 }, { "epoch": 2.4210298572046733, "grad_norm": 0.16172361373901367, "learning_rate": 1.8789871422389813e-05, "loss": 0.0089, "step": 78330 }, { "epoch": 2.421957099585832, "grad_norm": 0.11249765008687973, "learning_rate": 1.8789407801199235e-05, "loss": 0.0087, "step": 78360 }, { "epoch": 2.42288434196699, "grad_norm": 0.08312156051397324, "learning_rate": 1.8788944180008656e-05, "loss": 0.0084, "step": 78390 }, { "epoch": 2.4238115843481487, "grad_norm": 0.1622050702571869, "learning_rate": 1.8788480558818078e-05, "loss": 0.0088, "step": 78420 }, { "epoch": 2.424738826729307, "grad_norm": 0.13083821535110474, "learning_rate": 1.87880169376275e-05, "loss": 0.0097, "step": 78450 }, { "epoch": 2.4256660691104655, "grad_norm": 0.13480915129184723, "learning_rate": 1.8787553316436917e-05, "loss": 0.0088, "step": 78480 }, { "epoch": 2.426593311491624, "grad_norm": 0.12061762809753418, "learning_rate": 1.878708969524634e-05, "loss": 0.0086, "step": 78510 }, { "epoch": 2.427520553872782, "grad_norm": 0.1260618418455124, "learning_rate": 1.878662607405576e-05, "loss": 0.0096, "step": 78540 }, { "epoch": 2.428447796253941, "grad_norm": 0.13787256181240082, "learning_rate": 1.878616245286518e-05, "loss": 0.0088, "step": 78570 }, { "epoch": 2.4293750386350994, "grad_norm": 0.108407162129879, "learning_rate": 1.87856988316746e-05, "loss": 0.009, "step": 78600 }, { "epoch": 2.4303022810162576, "grad_norm": 0.17161695659160614, "learning_rate": 1.878523521048402e-05, "loss": 0.0081, "step": 78630 }, { "epoch": 2.431229523397416, "grad_norm": 0.1719065010547638, "learning_rate": 1.8784771589293443e-05, "loss": 0.0084, "step": 78660 }, { "epoch": 2.4321567657785748, "grad_norm": 0.12402104586362839, "learning_rate": 1.8784307968102865e-05, "loss": 0.0085, "step": 78690 }, { "epoch": 2.433084008159733, "grad_norm": 0.11257883906364441, "learning_rate": 1.8783844346912283e-05, "loss": 0.0086, "step": 78720 }, { "epoch": 2.4340112505408915, "grad_norm": 0.10237425565719604, "learning_rate": 1.8783380725721704e-05, "loss": 0.0093, "step": 78750 }, { "epoch": 2.4349384929220497, "grad_norm": 0.16165576875209808, "learning_rate": 1.8782917104531126e-05, "loss": 0.0077, "step": 78780 }, { "epoch": 2.4358657353032083, "grad_norm": 0.15033291280269623, "learning_rate": 1.8782453483340547e-05, "loss": 0.0085, "step": 78810 }, { "epoch": 2.436792977684367, "grad_norm": 0.14412614703178406, "learning_rate": 1.878198986214997e-05, "loss": 0.0087, "step": 78840 }, { "epoch": 2.437720220065525, "grad_norm": 0.15754248201847076, "learning_rate": 1.878152624095939e-05, "loss": 0.0087, "step": 78870 }, { "epoch": 2.4386474624466836, "grad_norm": 0.11940871924161911, "learning_rate": 1.8781062619768808e-05, "loss": 0.0086, "step": 78900 }, { "epoch": 2.4395747048278418, "grad_norm": 0.12952829897403717, "learning_rate": 1.878059899857823e-05, "loss": 0.0087, "step": 78930 }, { "epoch": 2.4405019472090004, "grad_norm": 0.1343623250722885, "learning_rate": 1.878013537738765e-05, "loss": 0.0092, "step": 78960 }, { "epoch": 2.441429189590159, "grad_norm": 0.16332915425300598, "learning_rate": 1.877967175619707e-05, "loss": 0.0086, "step": 78990 }, { "epoch": 2.442356431971317, "grad_norm": 0.1550234854221344, "learning_rate": 1.877920813500649e-05, "loss": 0.0086, "step": 79020 }, { "epoch": 2.4432836743524757, "grad_norm": 0.20137035846710205, "learning_rate": 1.8778744513815916e-05, "loss": 0.0089, "step": 79050 }, { "epoch": 2.4442109167336343, "grad_norm": 0.20418862998485565, "learning_rate": 1.8778280892625334e-05, "loss": 0.0089, "step": 79080 }, { "epoch": 2.4451381591147925, "grad_norm": Infinity, "learning_rate": 1.8777832725474442e-05, "loss": 0.0092, "step": 79110 }, { "epoch": 2.446065401495951, "grad_norm": 0.13513608276844025, "learning_rate": 1.8777369104283863e-05, "loss": 0.0082, "step": 79140 }, { "epoch": 2.4469926438771097, "grad_norm": 0.1674344688653946, "learning_rate": 1.877690548309328e-05, "loss": 0.0084, "step": 79170 }, { "epoch": 2.447919886258268, "grad_norm": 0.18775440752506256, "learning_rate": 1.8776441861902703e-05, "loss": 0.0094, "step": 79200 }, { "epoch": 2.4488471286394264, "grad_norm": 0.1533244103193283, "learning_rate": 1.877597824071212e-05, "loss": 0.0087, "step": 79230 }, { "epoch": 2.4497743710205846, "grad_norm": 0.10873398929834366, "learning_rate": 1.8775514619521546e-05, "loss": 0.0086, "step": 79260 }, { "epoch": 2.450701613401743, "grad_norm": 0.0980750024318695, "learning_rate": 1.8775050998330967e-05, "loss": 0.0079, "step": 79290 }, { "epoch": 2.451628855782902, "grad_norm": 0.14497636258602142, "learning_rate": 1.8774587377140386e-05, "loss": 0.0085, "step": 79320 }, { "epoch": 2.45255609816406, "grad_norm": 0.11082825809717178, "learning_rate": 1.8774123755949807e-05, "loss": 0.0092, "step": 79350 }, { "epoch": 2.4534833405452185, "grad_norm": 0.1351863145828247, "learning_rate": 1.877366013475923e-05, "loss": 0.0083, "step": 79380 }, { "epoch": 2.454410582926377, "grad_norm": 0.11677920073270798, "learning_rate": 1.8773196513568647e-05, "loss": 0.0086, "step": 79410 }, { "epoch": 2.4553378253075353, "grad_norm": 0.12915626168251038, "learning_rate": 1.8772732892378068e-05, "loss": 0.0083, "step": 79440 }, { "epoch": 2.456265067688694, "grad_norm": 0.09940023720264435, "learning_rate": 1.877226927118749e-05, "loss": 0.0085, "step": 79470 }, { "epoch": 2.4571923100698525, "grad_norm": 0.10301085561513901, "learning_rate": 1.877180564999691e-05, "loss": 0.0082, "step": 79500 }, { "epoch": 2.4581195524510107, "grad_norm": 0.0993812158703804, "learning_rate": 1.8771342028806333e-05, "loss": 0.0093, "step": 79530 }, { "epoch": 2.4590467948321693, "grad_norm": 0.13228176534175873, "learning_rate": 1.877087840761575e-05, "loss": 0.0089, "step": 79560 }, { "epoch": 2.4599740372133274, "grad_norm": 0.12160558253526688, "learning_rate": 1.8770414786425172e-05, "loss": 0.0088, "step": 79590 }, { "epoch": 2.460901279594486, "grad_norm": 0.12942536175251007, "learning_rate": 1.8769951165234594e-05, "loss": 0.0087, "step": 79620 }, { "epoch": 2.4618285219756446, "grad_norm": 0.10193166136741638, "learning_rate": 1.8769487544044015e-05, "loss": 0.0082, "step": 79650 }, { "epoch": 2.4627557643568028, "grad_norm": 0.12022865563631058, "learning_rate": 1.8769023922853437e-05, "loss": 0.0076, "step": 79680 }, { "epoch": 2.4636830067379614, "grad_norm": 0.09125329554080963, "learning_rate": 1.876856030166286e-05, "loss": 0.0091, "step": 79710 }, { "epoch": 2.4646102491191195, "grad_norm": 0.1252610981464386, "learning_rate": 1.8768096680472277e-05, "loss": 0.0085, "step": 79740 }, { "epoch": 2.465537491500278, "grad_norm": 0.15328440070152283, "learning_rate": 1.8767633059281698e-05, "loss": 0.0089, "step": 79770 }, { "epoch": 2.4664647338814367, "grad_norm": 0.15554173290729523, "learning_rate": 1.876716943809112e-05, "loss": 0.0084, "step": 79800 }, { "epoch": 2.467391976262595, "grad_norm": 0.17437000572681427, "learning_rate": 1.8766705816900538e-05, "loss": 0.0089, "step": 79830 }, { "epoch": 2.4683192186437535, "grad_norm": 0.15518683195114136, "learning_rate": 1.876624219570996e-05, "loss": 0.0081, "step": 79860 }, { "epoch": 2.469246461024912, "grad_norm": 0.1389426440000534, "learning_rate": 1.876577857451938e-05, "loss": 0.0091, "step": 79890 }, { "epoch": 2.4701737034060702, "grad_norm": 0.07793077826499939, "learning_rate": 1.8765314953328802e-05, "loss": 0.0089, "step": 79920 }, { "epoch": 2.471100945787229, "grad_norm": 0.12450376152992249, "learning_rate": 1.8764851332138224e-05, "loss": 0.0085, "step": 79950 }, { "epoch": 2.4720281881683874, "grad_norm": 0.1293283998966217, "learning_rate": 1.8764387710947642e-05, "loss": 0.0087, "step": 79980 }, { "epoch": 2.4729554305495456, "grad_norm": 0.07659190893173218, "learning_rate": 1.8763924089757063e-05, "loss": 0.0081, "step": 80010 }, { "epoch": 2.473882672930704, "grad_norm": 0.14978617429733276, "learning_rate": 1.8763460468566485e-05, "loss": 0.0089, "step": 80040 }, { "epoch": 2.4748099153118623, "grad_norm": 0.17355941236019135, "learning_rate": 1.8762996847375906e-05, "loss": 0.0088, "step": 80070 }, { "epoch": 2.475737157693021, "grad_norm": 0.14947311580181122, "learning_rate": 1.8762533226185328e-05, "loss": 0.0092, "step": 80100 }, { "epoch": 2.4766644000741795, "grad_norm": 0.13580703735351562, "learning_rate": 1.876206960499475e-05, "loss": 0.0088, "step": 80130 }, { "epoch": 2.4775916424553377, "grad_norm": 0.1696145087480545, "learning_rate": 1.8761605983804167e-05, "loss": 0.009, "step": 80160 }, { "epoch": 2.4785188848364963, "grad_norm": 0.20830953121185303, "learning_rate": 1.876114236261359e-05, "loss": 0.0089, "step": 80190 }, { "epoch": 2.4794461272176544, "grad_norm": 0.15247535705566406, "learning_rate": 1.8760678741423007e-05, "loss": 0.0088, "step": 80220 }, { "epoch": 2.480373369598813, "grad_norm": 0.15293602645397186, "learning_rate": 1.876021512023243e-05, "loss": 0.0081, "step": 80250 }, { "epoch": 2.4813006119799716, "grad_norm": 0.16944509744644165, "learning_rate": 1.875975149904185e-05, "loss": 0.0081, "step": 80280 }, { "epoch": 2.4822278543611302, "grad_norm": 0.09930860996246338, "learning_rate": 1.875928787785127e-05, "loss": 0.008, "step": 80310 }, { "epoch": 2.4831550967422884, "grad_norm": 0.13376520574092865, "learning_rate": 1.8758824256660693e-05, "loss": 0.0076, "step": 80340 }, { "epoch": 2.484082339123447, "grad_norm": 0.1358371526002884, "learning_rate": 1.8758360635470115e-05, "loss": 0.0088, "step": 80370 }, { "epoch": 2.485009581504605, "grad_norm": 0.15927430987358093, "learning_rate": 1.8757897014279533e-05, "loss": 0.008, "step": 80400 }, { "epoch": 2.4859368238857638, "grad_norm": 0.14467744529247284, "learning_rate": 1.8757433393088954e-05, "loss": 0.009, "step": 80430 }, { "epoch": 2.4868640662669224, "grad_norm": 0.10617701709270477, "learning_rate": 1.8756969771898376e-05, "loss": 0.0092, "step": 80460 }, { "epoch": 2.4877913086480805, "grad_norm": 0.18855726718902588, "learning_rate": 1.8756506150707797e-05, "loss": 0.0077, "step": 80490 }, { "epoch": 2.488718551029239, "grad_norm": 0.13442391157150269, "learning_rate": 1.875604252951722e-05, "loss": 0.0087, "step": 80520 }, { "epoch": 2.4896457934103973, "grad_norm": 0.10965793579816818, "learning_rate": 1.8755578908326637e-05, "loss": 0.0087, "step": 80550 }, { "epoch": 2.490573035791556, "grad_norm": 0.12208891659975052, "learning_rate": 1.875511528713606e-05, "loss": 0.0088, "step": 80580 }, { "epoch": 2.4915002781727145, "grad_norm": 0.18067143857479095, "learning_rate": 1.875465166594548e-05, "loss": 0.0094, "step": 80610 }, { "epoch": 2.4924275205538726, "grad_norm": 0.09234073758125305, "learning_rate": 1.8754188044754898e-05, "loss": 0.0089, "step": 80640 }, { "epoch": 2.493354762935031, "grad_norm": 0.12804247438907623, "learning_rate": 1.8753724423564323e-05, "loss": 0.0085, "step": 80670 }, { "epoch": 2.49428200531619, "grad_norm": 0.12986846268177032, "learning_rate": 1.8753260802373744e-05, "loss": 0.0085, "step": 80700 }, { "epoch": 2.495209247697348, "grad_norm": 0.1535433828830719, "learning_rate": 1.8752797181183162e-05, "loss": 0.0088, "step": 80730 }, { "epoch": 2.4961364900785066, "grad_norm": 0.1537960171699524, "learning_rate": 1.8752333559992584e-05, "loss": 0.0091, "step": 80760 }, { "epoch": 2.497063732459665, "grad_norm": 0.18439540266990662, "learning_rate": 1.8751869938802006e-05, "loss": 0.0085, "step": 80790 }, { "epoch": 2.4979909748408233, "grad_norm": 0.13243679702281952, "learning_rate": 1.8751406317611424e-05, "loss": 0.0078, "step": 80820 }, { "epoch": 2.498918217221982, "grad_norm": 0.13984040915966034, "learning_rate": 1.8750942696420845e-05, "loss": 0.0085, "step": 80850 }, { "epoch": 2.49984545960314, "grad_norm": 0.12441185861825943, "learning_rate": 1.8750479075230267e-05, "loss": 0.0084, "step": 80880 }, { "epoch": 2.5007727019842987, "grad_norm": 0.11459699273109436, "learning_rate": 1.8750015454039688e-05, "loss": 0.0089, "step": 80910 }, { "epoch": 2.5016999443654573, "grad_norm": 0.1122569888830185, "learning_rate": 1.874955183284911e-05, "loss": 0.0091, "step": 80940 }, { "epoch": 2.5026271867466154, "grad_norm": 0.14716710150241852, "learning_rate": 1.8749088211658528e-05, "loss": 0.0086, "step": 80970 }, { "epoch": 2.503554429127774, "grad_norm": 0.1202312558889389, "learning_rate": 1.874862459046795e-05, "loss": 0.0095, "step": 81000 }, { "epoch": 2.504481671508932, "grad_norm": 0.15878424048423767, "learning_rate": 1.874816096927737e-05, "loss": 0.0085, "step": 81030 }, { "epoch": 2.505408913890091, "grad_norm": 0.12479099631309509, "learning_rate": 1.8747697348086792e-05, "loss": 0.0082, "step": 81060 }, { "epoch": 2.5063361562712494, "grad_norm": 0.15736249089241028, "learning_rate": 1.8747233726896214e-05, "loss": 0.0083, "step": 81090 }, { "epoch": 2.507263398652408, "grad_norm": 0.13870181143283844, "learning_rate": 1.8746770105705635e-05, "loss": 0.0087, "step": 81120 }, { "epoch": 2.508190641033566, "grad_norm": 0.13003161549568176, "learning_rate": 1.8746306484515053e-05, "loss": 0.009, "step": 81150 }, { "epoch": 2.5091178834147247, "grad_norm": 0.10836382955312729, "learning_rate": 1.8745842863324475e-05, "loss": 0.0089, "step": 81180 }, { "epoch": 2.510045125795883, "grad_norm": 0.14216303825378418, "learning_rate": 1.8745379242133893e-05, "loss": 0.0085, "step": 81210 }, { "epoch": 2.5109723681770415, "grad_norm": 0.11363524198532104, "learning_rate": 1.8744915620943315e-05, "loss": 0.0087, "step": 81240 }, { "epoch": 2.5118996105582, "grad_norm": 0.12202872335910797, "learning_rate": 1.8744451999752736e-05, "loss": 0.0081, "step": 81270 }, { "epoch": 2.5128268529393583, "grad_norm": 0.11422145366668701, "learning_rate": 1.8743988378562158e-05, "loss": 0.0089, "step": 81300 }, { "epoch": 2.513754095320517, "grad_norm": 0.17279550433158875, "learning_rate": 1.874352475737158e-05, "loss": 0.0088, "step": 81330 }, { "epoch": 2.514681337701675, "grad_norm": 0.17195099592208862, "learning_rate": 1.8743061136181e-05, "loss": 0.0085, "step": 81360 }, { "epoch": 2.5156085800828336, "grad_norm": 0.12432810664176941, "learning_rate": 1.874259751499042e-05, "loss": 0.0082, "step": 81390 }, { "epoch": 2.516535822463992, "grad_norm": 0.1355486810207367, "learning_rate": 1.874213389379984e-05, "loss": 0.0086, "step": 81420 }, { "epoch": 2.5174630648451504, "grad_norm": 0.14894573390483856, "learning_rate": 1.8741670272609262e-05, "loss": 0.0082, "step": 81450 }, { "epoch": 2.518390307226309, "grad_norm": 0.12448050081729889, "learning_rate": 1.8741206651418683e-05, "loss": 0.0089, "step": 81480 }, { "epoch": 2.519317549607467, "grad_norm": 0.114827960729599, "learning_rate": 1.8740743030228105e-05, "loss": 0.0085, "step": 81510 }, { "epoch": 2.5202447919886257, "grad_norm": 0.11880979686975479, "learning_rate": 1.8740279409037523e-05, "loss": 0.0081, "step": 81540 }, { "epoch": 2.5211720343697843, "grad_norm": 0.1394691914319992, "learning_rate": 1.8739815787846944e-05, "loss": 0.0087, "step": 81570 }, { "epoch": 2.522099276750943, "grad_norm": 0.16083015501499176, "learning_rate": 1.8739352166656366e-05, "loss": 0.0091, "step": 81600 }, { "epoch": 2.523026519132101, "grad_norm": 0.10144852101802826, "learning_rate": 1.8738888545465784e-05, "loss": 0.0083, "step": 81630 }, { "epoch": 2.5239537615132597, "grad_norm": 0.14725539088249207, "learning_rate": 1.8738424924275205e-05, "loss": 0.0088, "step": 81660 }, { "epoch": 2.524881003894418, "grad_norm": 0.10248062759637833, "learning_rate": 1.8737961303084627e-05, "loss": 0.0095, "step": 81690 }, { "epoch": 2.5258082462755764, "grad_norm": 0.16555500030517578, "learning_rate": 1.873749768189405e-05, "loss": 0.0091, "step": 81720 }, { "epoch": 2.526735488656735, "grad_norm": 0.13076305389404297, "learning_rate": 1.873703406070347e-05, "loss": 0.009, "step": 81750 }, { "epoch": 2.527662731037893, "grad_norm": 0.0995512530207634, "learning_rate": 1.873657043951289e-05, "loss": 0.0085, "step": 81780 }, { "epoch": 2.528589973419052, "grad_norm": 0.16120386123657227, "learning_rate": 1.873610681832231e-05, "loss": 0.0072, "step": 81810 }, { "epoch": 2.52951721580021, "grad_norm": 0.12601813673973083, "learning_rate": 1.873564319713173e-05, "loss": 0.0088, "step": 81840 }, { "epoch": 2.5304444581813685, "grad_norm": 0.11159754544496536, "learning_rate": 1.8735179575941153e-05, "loss": 0.0087, "step": 81870 }, { "epoch": 2.531371700562527, "grad_norm": 0.11113303899765015, "learning_rate": 1.8734715954750574e-05, "loss": 0.0087, "step": 81900 }, { "epoch": 2.5322989429436857, "grad_norm": 0.11072549223899841, "learning_rate": 1.8734252333559996e-05, "loss": 0.0098, "step": 81930 }, { "epoch": 2.533226185324844, "grad_norm": 0.11904916167259216, "learning_rate": 1.8733788712369414e-05, "loss": 0.0086, "step": 81960 }, { "epoch": 2.5341534277060025, "grad_norm": 0.1518901288509369, "learning_rate": 1.8733325091178835e-05, "loss": 0.0084, "step": 81990 }, { "epoch": 2.5350806700871606, "grad_norm": 0.07523313164710999, "learning_rate": 1.8732861469988257e-05, "loss": 0.0082, "step": 82020 }, { "epoch": 2.5360079124683192, "grad_norm": 0.1824711114168167, "learning_rate": 1.8732397848797675e-05, "loss": 0.0089, "step": 82050 }, { "epoch": 2.536935154849478, "grad_norm": 0.14571939408779144, "learning_rate": 1.87319342276071e-05, "loss": 0.009, "step": 82080 }, { "epoch": 2.537862397230636, "grad_norm": 0.10460414737462997, "learning_rate": 1.873147060641652e-05, "loss": 0.0081, "step": 82110 }, { "epoch": 2.5387896396117946, "grad_norm": 0.15768958628177643, "learning_rate": 1.873100698522594e-05, "loss": 0.009, "step": 82140 }, { "epoch": 2.5397168819929528, "grad_norm": 0.13291418552398682, "learning_rate": 1.873054336403536e-05, "loss": 0.0088, "step": 82170 }, { "epoch": 2.5406441243741114, "grad_norm": 0.1860613077878952, "learning_rate": 1.8730079742844782e-05, "loss": 0.0087, "step": 82200 }, { "epoch": 2.54157136675527, "grad_norm": 0.14964739978313446, "learning_rate": 1.87296161216542e-05, "loss": 0.0087, "step": 82230 }, { "epoch": 2.542498609136428, "grad_norm": 0.12159821391105652, "learning_rate": 1.8729152500463622e-05, "loss": 0.0079, "step": 82260 }, { "epoch": 2.5434258515175867, "grad_norm": 0.10106454789638519, "learning_rate": 1.8728688879273044e-05, "loss": 0.0085, "step": 82290 }, { "epoch": 2.544353093898745, "grad_norm": 0.11363514512777328, "learning_rate": 1.8728225258082465e-05, "loss": 0.009, "step": 82320 }, { "epoch": 2.5452803362799035, "grad_norm": 0.17592498660087585, "learning_rate": 1.8727761636891887e-05, "loss": 0.0081, "step": 82350 }, { "epoch": 2.546207578661062, "grad_norm": 0.10325367003679276, "learning_rate": 1.8727298015701305e-05, "loss": 0.0091, "step": 82380 }, { "epoch": 2.5471348210422207, "grad_norm": 0.10917796194553375, "learning_rate": 1.8726834394510726e-05, "loss": 0.0093, "step": 82410 }, { "epoch": 2.548062063423379, "grad_norm": 0.1344439685344696, "learning_rate": 1.8726370773320148e-05, "loss": 0.0083, "step": 82440 }, { "epoch": 2.5489893058045374, "grad_norm": 0.14398439228534698, "learning_rate": 1.872590715212957e-05, "loss": 0.0084, "step": 82470 }, { "epoch": 2.5499165481856956, "grad_norm": 0.17929525673389435, "learning_rate": 1.872544353093899e-05, "loss": 0.0086, "step": 82500 }, { "epoch": 2.550843790566854, "grad_norm": 0.12475304305553436, "learning_rate": 1.8724979909748412e-05, "loss": 0.0097, "step": 82530 }, { "epoch": 2.5517710329480128, "grad_norm": 0.13135647773742676, "learning_rate": 1.872451628855783e-05, "loss": 0.0092, "step": 82560 }, { "epoch": 2.552698275329171, "grad_norm": 0.10748768597841263, "learning_rate": 1.8724052667367252e-05, "loss": 0.0086, "step": 82590 }, { "epoch": 2.5536255177103295, "grad_norm": 0.10229167342185974, "learning_rate": 1.872358904617667e-05, "loss": 0.0087, "step": 82620 }, { "epoch": 2.5545527600914877, "grad_norm": 0.1133623719215393, "learning_rate": 1.872312542498609e-05, "loss": 0.0078, "step": 82650 }, { "epoch": 2.5554800024726463, "grad_norm": 0.1393238753080368, "learning_rate": 1.8722661803795513e-05, "loss": 0.009, "step": 82680 }, { "epoch": 2.556407244853805, "grad_norm": 0.1676507592201233, "learning_rate": 1.8722198182604934e-05, "loss": 0.0091, "step": 82710 }, { "epoch": 2.5573344872349635, "grad_norm": 0.12299422919750214, "learning_rate": 1.8721734561414356e-05, "loss": 0.009, "step": 82740 }, { "epoch": 2.5582617296161216, "grad_norm": 0.1406802386045456, "learning_rate": 1.8721270940223777e-05, "loss": 0.0083, "step": 82770 }, { "epoch": 2.5591889719972802, "grad_norm": 0.13432759046554565, "learning_rate": 1.8720807319033196e-05, "loss": 0.0087, "step": 82800 }, { "epoch": 2.5601162143784384, "grad_norm": 0.16960175335407257, "learning_rate": 1.8720343697842617e-05, "loss": 0.0088, "step": 82830 }, { "epoch": 2.561043456759597, "grad_norm": 0.12580354511737823, "learning_rate": 1.871988007665204e-05, "loss": 0.0084, "step": 82860 }, { "epoch": 2.5619706991407556, "grad_norm": 0.11002588272094727, "learning_rate": 1.871941645546146e-05, "loss": 0.0084, "step": 82890 }, { "epoch": 2.5628979415219137, "grad_norm": 0.12829343974590302, "learning_rate": 1.871895283427088e-05, "loss": 0.0086, "step": 82920 }, { "epoch": 2.5638251839030723, "grad_norm": 0.12011834979057312, "learning_rate": 1.87184892130803e-05, "loss": 0.0082, "step": 82950 }, { "epoch": 2.5647524262842305, "grad_norm": 0.15150627493858337, "learning_rate": 1.871802559188972e-05, "loss": 0.0092, "step": 82980 }, { "epoch": 2.565679668665389, "grad_norm": 0.14587225019931793, "learning_rate": 1.8717561970699143e-05, "loss": 0.008, "step": 83010 }, { "epoch": 2.5666069110465477, "grad_norm": 0.1199047714471817, "learning_rate": 1.871709834950856e-05, "loss": 0.0083, "step": 83040 }, { "epoch": 2.567534153427706, "grad_norm": 0.18161755800247192, "learning_rate": 1.8716634728317982e-05, "loss": 0.0085, "step": 83070 }, { "epoch": 2.5684613958088645, "grad_norm": 0.09636133164167404, "learning_rate": 1.8716171107127404e-05, "loss": 0.009, "step": 83100 }, { "epoch": 2.5693886381900226, "grad_norm": 0.10247247666120529, "learning_rate": 1.8715707485936825e-05, "loss": 0.0083, "step": 83130 }, { "epoch": 2.570315880571181, "grad_norm": 0.15617945790290833, "learning_rate": 1.8715243864746247e-05, "loss": 0.0084, "step": 83160 }, { "epoch": 2.57124312295234, "grad_norm": 0.11562894284725189, "learning_rate": 1.871478024355567e-05, "loss": 0.008, "step": 83190 }, { "epoch": 2.5721703653334984, "grad_norm": 0.17164431512355804, "learning_rate": 1.8714316622365087e-05, "loss": 0.0088, "step": 83220 }, { "epoch": 2.5730976077146566, "grad_norm": 0.10453260689973831, "learning_rate": 1.8713868455214194e-05, "loss": 0.0088, "step": 83250 }, { "epoch": 2.574024850095815, "grad_norm": 0.14669805765151978, "learning_rate": 1.8713404834023616e-05, "loss": 0.0079, "step": 83280 }, { "epoch": 2.5749520924769733, "grad_norm": 0.09950917214155197, "learning_rate": 1.8712941212833034e-05, "loss": 0.0095, "step": 83310 }, { "epoch": 2.575879334858132, "grad_norm": 0.14577051997184753, "learning_rate": 1.871247759164246e-05, "loss": 0.0089, "step": 83340 }, { "epoch": 2.5768065772392905, "grad_norm": 0.1588582694530487, "learning_rate": 1.8712013970451877e-05, "loss": 0.0094, "step": 83370 }, { "epoch": 2.5777338196204487, "grad_norm": 0.14398448169231415, "learning_rate": 1.87115503492613e-05, "loss": 0.009, "step": 83400 }, { "epoch": 2.5786610620016073, "grad_norm": 0.19722291827201843, "learning_rate": 1.871108672807072e-05, "loss": 0.0087, "step": 83430 }, { "epoch": 2.5795883043827654, "grad_norm": 0.10374266654253006, "learning_rate": 1.8710623106880138e-05, "loss": 0.0079, "step": 83460 }, { "epoch": 2.580515546763924, "grad_norm": 0.1824256330728531, "learning_rate": 1.871015948568956e-05, "loss": 0.0089, "step": 83490 }, { "epoch": 2.5814427891450826, "grad_norm": 0.14635881781578064, "learning_rate": 1.870969586449898e-05, "loss": 0.0087, "step": 83520 }, { "epoch": 2.5823700315262412, "grad_norm": 0.11381163448095322, "learning_rate": 1.8709232243308403e-05, "loss": 0.0083, "step": 83550 }, { "epoch": 2.5832972739073994, "grad_norm": 0.14550189673900604, "learning_rate": 1.8708768622117824e-05, "loss": 0.0089, "step": 83580 }, { "epoch": 2.584224516288558, "grad_norm": 0.18732666969299316, "learning_rate": 1.8708305000927246e-05, "loss": 0.0087, "step": 83610 }, { "epoch": 2.585151758669716, "grad_norm": 0.16688835620880127, "learning_rate": 1.8707841379736664e-05, "loss": 0.0094, "step": 83640 }, { "epoch": 2.5860790010508747, "grad_norm": 0.15800811350345612, "learning_rate": 1.8707377758546085e-05, "loss": 0.0088, "step": 83670 }, { "epoch": 2.5870062434320333, "grad_norm": 0.12759670615196228, "learning_rate": 1.8706914137355507e-05, "loss": 0.0087, "step": 83700 }, { "epoch": 2.5879334858131915, "grad_norm": 0.127017542719841, "learning_rate": 1.870645051616493e-05, "loss": 0.0077, "step": 83730 }, { "epoch": 2.58886072819435, "grad_norm": 0.1289612352848053, "learning_rate": 1.870598689497435e-05, "loss": 0.0088, "step": 83760 }, { "epoch": 2.5897879705755082, "grad_norm": 0.13559868931770325, "learning_rate": 1.8705523273783768e-05, "loss": 0.0078, "step": 83790 }, { "epoch": 2.590715212956667, "grad_norm": 0.14078553020954132, "learning_rate": 1.870505965259319e-05, "loss": 0.0091, "step": 83820 }, { "epoch": 2.5916424553378254, "grad_norm": 0.1020689532160759, "learning_rate": 1.870459603140261e-05, "loss": 0.0084, "step": 83850 }, { "epoch": 2.5925696977189836, "grad_norm": 0.11977100372314453, "learning_rate": 1.870413241021203e-05, "loss": 0.0088, "step": 83880 }, { "epoch": 2.593496940100142, "grad_norm": 0.12971675395965576, "learning_rate": 1.870366878902145e-05, "loss": 0.0088, "step": 83910 }, { "epoch": 2.5944241824813004, "grad_norm": 0.0898328423500061, "learning_rate": 1.8703205167830872e-05, "loss": 0.0093, "step": 83940 }, { "epoch": 2.595351424862459, "grad_norm": 0.17606475949287415, "learning_rate": 1.8702741546640294e-05, "loss": 0.0086, "step": 83970 }, { "epoch": 2.5962786672436176, "grad_norm": 0.1137506291270256, "learning_rate": 1.8702277925449715e-05, "loss": 0.008, "step": 84000 }, { "epoch": 2.597205909624776, "grad_norm": 0.11598740518093109, "learning_rate": 1.8701814304259133e-05, "loss": 0.0085, "step": 84030 }, { "epoch": 2.5981331520059343, "grad_norm": 0.11294408142566681, "learning_rate": 1.8701350683068555e-05, "loss": 0.0088, "step": 84060 }, { "epoch": 2.599060394387093, "grad_norm": 0.1413615643978119, "learning_rate": 1.8700887061877976e-05, "loss": 0.0076, "step": 84090 }, { "epoch": 2.599987636768251, "grad_norm": 0.11809457093477249, "learning_rate": 1.8700423440687398e-05, "loss": 0.0084, "step": 84120 }, { "epoch": 2.6009148791494097, "grad_norm": 0.1790398210287094, "learning_rate": 1.869995981949682e-05, "loss": 0.0083, "step": 84150 }, { "epoch": 2.6018421215305683, "grad_norm": 0.12099386751651764, "learning_rate": 1.869949619830624e-05, "loss": 0.0086, "step": 84180 }, { "epoch": 2.6027693639117264, "grad_norm": 0.12272170931100845, "learning_rate": 1.869903257711566e-05, "loss": 0.0088, "step": 84210 }, { "epoch": 2.603696606292885, "grad_norm": 0.09890399873256683, "learning_rate": 1.869856895592508e-05, "loss": 0.0087, "step": 84240 }, { "epoch": 2.604623848674043, "grad_norm": 0.1229422390460968, "learning_rate": 1.8698105334734502e-05, "loss": 0.008, "step": 84270 }, { "epoch": 2.6055510910552018, "grad_norm": 0.12582425773143768, "learning_rate": 1.869764171354392e-05, "loss": 0.0087, "step": 84300 }, { "epoch": 2.6064783334363604, "grad_norm": 0.18591783940792084, "learning_rate": 1.869717809235334e-05, "loss": 0.0088, "step": 84330 }, { "epoch": 2.607405575817519, "grad_norm": 0.1338305026292801, "learning_rate": 1.8696714471162763e-05, "loss": 0.008, "step": 84360 }, { "epoch": 2.608332818198677, "grad_norm": 0.1539323925971985, "learning_rate": 1.8696250849972185e-05, "loss": 0.0081, "step": 84390 }, { "epoch": 2.6092600605798357, "grad_norm": 0.10190358012914658, "learning_rate": 1.8695787228781606e-05, "loss": 0.0086, "step": 84420 }, { "epoch": 2.610187302960994, "grad_norm": 0.14482611417770386, "learning_rate": 1.8695323607591024e-05, "loss": 0.0087, "step": 84450 }, { "epoch": 2.6111145453421525, "grad_norm": 0.15430624783039093, "learning_rate": 1.8694859986400446e-05, "loss": 0.0096, "step": 84480 }, { "epoch": 2.612041787723311, "grad_norm": 0.20419147610664368, "learning_rate": 1.8694396365209867e-05, "loss": 0.0086, "step": 84510 }, { "epoch": 2.6129690301044692, "grad_norm": 0.12547484040260315, "learning_rate": 1.869393274401929e-05, "loss": 0.0086, "step": 84540 }, { "epoch": 2.613896272485628, "grad_norm": 0.12979641556739807, "learning_rate": 1.8693484576868393e-05, "loss": 0.0076, "step": 84570 }, { "epoch": 2.614823514866786, "grad_norm": 0.12258392572402954, "learning_rate": 1.8693020955677815e-05, "loss": 0.0083, "step": 84600 }, { "epoch": 2.6157507572479446, "grad_norm": 0.1632450670003891, "learning_rate": 1.8692557334487236e-05, "loss": 0.0082, "step": 84630 }, { "epoch": 2.616677999629103, "grad_norm": 0.1281362920999527, "learning_rate": 1.8692093713296658e-05, "loss": 0.0091, "step": 84660 }, { "epoch": 2.6176052420102613, "grad_norm": 0.11907164007425308, "learning_rate": 1.869163009210608e-05, "loss": 0.0083, "step": 84690 }, { "epoch": 2.61853248439142, "grad_norm": 0.13265672326087952, "learning_rate": 1.8691166470915497e-05, "loss": 0.0083, "step": 84720 }, { "epoch": 2.619459726772578, "grad_norm": 0.12362153083086014, "learning_rate": 1.869070284972492e-05, "loss": 0.0083, "step": 84750 }, { "epoch": 2.6203869691537367, "grad_norm": 0.11257553100585938, "learning_rate": 1.869023922853434e-05, "loss": 0.0084, "step": 84780 }, { "epoch": 2.6213142115348953, "grad_norm": 0.15145526826381683, "learning_rate": 1.8689775607343762e-05, "loss": 0.0085, "step": 84810 }, { "epoch": 2.622241453916054, "grad_norm": 0.08838054537773132, "learning_rate": 1.8689311986153183e-05, "loss": 0.0086, "step": 84840 }, { "epoch": 2.623168696297212, "grad_norm": 0.12125760316848755, "learning_rate": 1.86888483649626e-05, "loss": 0.0088, "step": 84870 }, { "epoch": 2.6240959386783707, "grad_norm": 0.09850654751062393, "learning_rate": 1.8688384743772023e-05, "loss": 0.0078, "step": 84900 }, { "epoch": 2.625023181059529, "grad_norm": 0.16382183134555817, "learning_rate": 1.8687921122581445e-05, "loss": 0.0091, "step": 84930 }, { "epoch": 2.6259504234406874, "grad_norm": 0.14469730854034424, "learning_rate": 1.8687457501390866e-05, "loss": 0.0087, "step": 84960 }, { "epoch": 2.626877665821846, "grad_norm": 0.179117351770401, "learning_rate": 1.8686993880200288e-05, "loss": 0.0092, "step": 84990 }, { "epoch": 2.627804908203004, "grad_norm": 0.11572559922933578, "learning_rate": 1.868653025900971e-05, "loss": 0.0082, "step": 85020 }, { "epoch": 2.6287321505841628, "grad_norm": 0.103205107152462, "learning_rate": 1.8686066637819127e-05, "loss": 0.0089, "step": 85050 }, { "epoch": 2.629659392965321, "grad_norm": 0.1445525586605072, "learning_rate": 1.868560301662855e-05, "loss": 0.0086, "step": 85080 }, { "epoch": 2.6305866353464795, "grad_norm": 0.14662952721118927, "learning_rate": 1.868513939543797e-05, "loss": 0.0083, "step": 85110 }, { "epoch": 2.631513877727638, "grad_norm": 0.10314887017011642, "learning_rate": 1.8684675774247388e-05, "loss": 0.0084, "step": 85140 }, { "epoch": 2.6324411201087963, "grad_norm": 0.12339344620704651, "learning_rate": 1.868421215305681e-05, "loss": 0.0087, "step": 85170 }, { "epoch": 2.633368362489955, "grad_norm": 0.11903191357851028, "learning_rate": 1.868374853186623e-05, "loss": 0.0089, "step": 85200 }, { "epoch": 2.6342956048711135, "grad_norm": 0.11822497844696045, "learning_rate": 1.8683284910675653e-05, "loss": 0.0079, "step": 85230 }, { "epoch": 2.6352228472522716, "grad_norm": 0.10058464854955673, "learning_rate": 1.8682821289485074e-05, "loss": 0.0083, "step": 85260 }, { "epoch": 2.6361500896334302, "grad_norm": 0.2078515887260437, "learning_rate": 1.8682357668294492e-05, "loss": 0.009, "step": 85290 }, { "epoch": 2.637077332014589, "grad_norm": 0.12074626982212067, "learning_rate": 1.8681894047103914e-05, "loss": 0.009, "step": 85320 }, { "epoch": 2.638004574395747, "grad_norm": 0.1255297064781189, "learning_rate": 1.8681430425913335e-05, "loss": 0.0093, "step": 85350 }, { "epoch": 2.6389318167769056, "grad_norm": 0.19273145496845245, "learning_rate": 1.8680966804722757e-05, "loss": 0.0086, "step": 85380 }, { "epoch": 2.6398590591580637, "grad_norm": 0.1550952047109604, "learning_rate": 1.868050318353218e-05, "loss": 0.0084, "step": 85410 }, { "epoch": 2.6407863015392223, "grad_norm": 0.1640343964099884, "learning_rate": 1.86800395623416e-05, "loss": 0.0088, "step": 85440 }, { "epoch": 2.641713543920381, "grad_norm": 0.12181587517261505, "learning_rate": 1.8679575941151018e-05, "loss": 0.0082, "step": 85470 }, { "epoch": 2.642640786301539, "grad_norm": 0.15538346767425537, "learning_rate": 1.867911231996044e-05, "loss": 0.0089, "step": 85500 }, { "epoch": 2.6435680286826977, "grad_norm": 0.14633819460868835, "learning_rate": 1.8678648698769858e-05, "loss": 0.0082, "step": 85530 }, { "epoch": 2.644495271063856, "grad_norm": 0.1048721969127655, "learning_rate": 1.867818507757928e-05, "loss": 0.0083, "step": 85560 }, { "epoch": 2.6454225134450144, "grad_norm": 0.1365603357553482, "learning_rate": 1.86777214563887e-05, "loss": 0.0084, "step": 85590 }, { "epoch": 2.646349755826173, "grad_norm": 0.10917264968156815, "learning_rate": 1.8677257835198122e-05, "loss": 0.0088, "step": 85620 }, { "epoch": 2.6472769982073316, "grad_norm": 0.1125091165304184, "learning_rate": 1.8676794214007544e-05, "loss": 0.0087, "step": 85650 }, { "epoch": 2.64820424058849, "grad_norm": 0.20187056064605713, "learning_rate": 1.8676330592816965e-05, "loss": 0.0087, "step": 85680 }, { "epoch": 2.6491314829696484, "grad_norm": 0.13428279757499695, "learning_rate": 1.8675866971626383e-05, "loss": 0.0086, "step": 85710 }, { "epoch": 2.6500587253508066, "grad_norm": 0.09832122921943665, "learning_rate": 1.8675403350435805e-05, "loss": 0.0084, "step": 85740 }, { "epoch": 2.650985967731965, "grad_norm": 0.10087072104215622, "learning_rate": 1.8674939729245226e-05, "loss": 0.0085, "step": 85770 }, { "epoch": 2.6519132101131238, "grad_norm": 0.11738874763250351, "learning_rate": 1.8674476108054648e-05, "loss": 0.0088, "step": 85800 }, { "epoch": 2.652840452494282, "grad_norm": 0.18463149666786194, "learning_rate": 1.867401248686407e-05, "loss": 0.0082, "step": 85830 }, { "epoch": 2.6537676948754405, "grad_norm": 0.08683522790670395, "learning_rate": 1.8673548865673487e-05, "loss": 0.0089, "step": 85860 }, { "epoch": 2.6546949372565987, "grad_norm": 0.15725469589233398, "learning_rate": 1.867308524448291e-05, "loss": 0.0084, "step": 85890 }, { "epoch": 2.6556221796377573, "grad_norm": 0.1025790199637413, "learning_rate": 1.867262162329233e-05, "loss": 0.009, "step": 85920 }, { "epoch": 2.656549422018916, "grad_norm": 0.11292483657598495, "learning_rate": 1.867215800210175e-05, "loss": 0.0091, "step": 85950 }, { "epoch": 2.657476664400074, "grad_norm": 0.10978427529335022, "learning_rate": 1.867169438091117e-05, "loss": 0.0084, "step": 85980 }, { "epoch": 2.6584039067812326, "grad_norm": 0.19320349395275116, "learning_rate": 1.867123075972059e-05, "loss": 0.009, "step": 86010 }, { "epoch": 2.6593311491623908, "grad_norm": 0.15667004883289337, "learning_rate": 1.8670767138530013e-05, "loss": 0.0089, "step": 86040 }, { "epoch": 2.6602583915435494, "grad_norm": 0.11178553104400635, "learning_rate": 1.8670303517339435e-05, "loss": 0.0082, "step": 86070 }, { "epoch": 2.661185633924708, "grad_norm": 0.11244656145572662, "learning_rate": 1.8669839896148856e-05, "loss": 0.0088, "step": 86100 }, { "epoch": 2.6621128763058666, "grad_norm": 0.1444675177335739, "learning_rate": 1.8669376274958274e-05, "loss": 0.0081, "step": 86130 }, { "epoch": 2.6630401186870247, "grad_norm": 0.10138285160064697, "learning_rate": 1.8668912653767696e-05, "loss": 0.0084, "step": 86160 }, { "epoch": 2.6639673610681833, "grad_norm": 0.09901793301105499, "learning_rate": 1.8668449032577117e-05, "loss": 0.0086, "step": 86190 }, { "epoch": 2.6648946034493415, "grad_norm": 0.18112054467201233, "learning_rate": 1.8668000865426222e-05, "loss": 0.0089, "step": 86220 }, { "epoch": 2.6658218458305, "grad_norm": 0.11614803969860077, "learning_rate": 1.8667537244235647e-05, "loss": 0.0084, "step": 86250 }, { "epoch": 2.6667490882116587, "grad_norm": 0.13773025572299957, "learning_rate": 1.8667073623045065e-05, "loss": 0.0088, "step": 86280 }, { "epoch": 2.667676330592817, "grad_norm": 0.14757825434207916, "learning_rate": 1.8666610001854486e-05, "loss": 0.0082, "step": 86310 }, { "epoch": 2.6686035729739754, "grad_norm": 0.1006697416305542, "learning_rate": 1.8666146380663908e-05, "loss": 0.0087, "step": 86340 }, { "epoch": 2.6695308153551336, "grad_norm": 0.13692615926265717, "learning_rate": 1.8665682759473326e-05, "loss": 0.0087, "step": 86370 }, { "epoch": 2.670458057736292, "grad_norm": 0.12743917107582092, "learning_rate": 1.8665219138282747e-05, "loss": 0.0089, "step": 86400 }, { "epoch": 2.671385300117451, "grad_norm": 0.13204078376293182, "learning_rate": 1.866475551709217e-05, "loss": 0.0075, "step": 86430 }, { "epoch": 2.6723125424986094, "grad_norm": 0.1479799598455429, "learning_rate": 1.866429189590159e-05, "loss": 0.0079, "step": 86460 }, { "epoch": 2.6732397848797675, "grad_norm": 0.18815340101718903, "learning_rate": 1.8663828274711012e-05, "loss": 0.0085, "step": 86490 }, { "epoch": 2.674167027260926, "grad_norm": 0.19707423448562622, "learning_rate": 1.8663364653520433e-05, "loss": 0.0084, "step": 86520 }, { "epoch": 2.6750942696420843, "grad_norm": 0.12828145921230316, "learning_rate": 1.866290103232985e-05, "loss": 0.008, "step": 86550 }, { "epoch": 2.676021512023243, "grad_norm": 0.14439275860786438, "learning_rate": 1.8662437411139273e-05, "loss": 0.0093, "step": 86580 }, { "epoch": 2.6769487544044015, "grad_norm": 0.14215877652168274, "learning_rate": 1.8661973789948695e-05, "loss": 0.0079, "step": 86610 }, { "epoch": 2.6778759967855597, "grad_norm": 0.4008859395980835, "learning_rate": 1.8661510168758116e-05, "loss": 0.0085, "step": 86640 }, { "epoch": 2.6788032391667183, "grad_norm": 0.1539941281080246, "learning_rate": 1.8661046547567538e-05, "loss": 0.0083, "step": 86670 }, { "epoch": 2.6797304815478764, "grad_norm": 0.1280483901500702, "learning_rate": 1.8660582926376956e-05, "loss": 0.0089, "step": 86700 }, { "epoch": 2.680657723929035, "grad_norm": 0.18633581697940826, "learning_rate": 1.8660119305186377e-05, "loss": 0.0089, "step": 86730 }, { "epoch": 2.6815849663101936, "grad_norm": 0.11792843788862228, "learning_rate": 1.86596556839958e-05, "loss": 0.0084, "step": 86760 }, { "epoch": 2.6825122086913518, "grad_norm": 0.13055694103240967, "learning_rate": 1.8659192062805217e-05, "loss": 0.0083, "step": 86790 }, { "epoch": 2.6834394510725104, "grad_norm": 0.13933469355106354, "learning_rate": 1.865872844161464e-05, "loss": 0.0083, "step": 86820 }, { "epoch": 2.6843666934536685, "grad_norm": 0.1444053053855896, "learning_rate": 1.865826482042406e-05, "loss": 0.0093, "step": 86850 }, { "epoch": 2.685293935834827, "grad_norm": 0.20206475257873535, "learning_rate": 1.865780119923348e-05, "loss": 0.0086, "step": 86880 }, { "epoch": 2.6862211782159857, "grad_norm": 0.12605780363082886, "learning_rate": 1.8657337578042903e-05, "loss": 0.0087, "step": 86910 }, { "epoch": 2.6871484205971443, "grad_norm": 0.10700727254152298, "learning_rate": 1.8656873956852324e-05, "loss": 0.0082, "step": 86940 }, { "epoch": 2.6880756629783025, "grad_norm": 0.18665194511413574, "learning_rate": 1.8656410335661742e-05, "loss": 0.0084, "step": 86970 }, { "epoch": 2.689002905359461, "grad_norm": 0.1005401611328125, "learning_rate": 1.8655946714471164e-05, "loss": 0.0084, "step": 87000 }, { "epoch": 2.6899301477406192, "grad_norm": 0.09731214493513107, "learning_rate": 1.8655483093280586e-05, "loss": 0.0087, "step": 87030 }, { "epoch": 2.690857390121778, "grad_norm": 0.18287114799022675, "learning_rate": 1.8655019472090007e-05, "loss": 0.0092, "step": 87060 }, { "epoch": 2.6917846325029364, "grad_norm": 0.1536656618118286, "learning_rate": 1.865455585089943e-05, "loss": 0.0084, "step": 87090 }, { "epoch": 2.6927118748840946, "grad_norm": 0.11163792759180069, "learning_rate": 1.8654092229708847e-05, "loss": 0.0081, "step": 87120 }, { "epoch": 2.693639117265253, "grad_norm": 0.12848307192325592, "learning_rate": 1.8653628608518268e-05, "loss": 0.0085, "step": 87150 }, { "epoch": 2.6945663596464113, "grad_norm": 0.1095590591430664, "learning_rate": 1.865316498732769e-05, "loss": 0.0083, "step": 87180 }, { "epoch": 2.69549360202757, "grad_norm": 0.12900914251804352, "learning_rate": 1.8652701366137108e-05, "loss": 0.0086, "step": 87210 }, { "epoch": 2.6964208444087285, "grad_norm": 0.13115191459655762, "learning_rate": 1.865223774494653e-05, "loss": 0.0086, "step": 87240 }, { "epoch": 2.697348086789887, "grad_norm": 0.13575927913188934, "learning_rate": 1.865177412375595e-05, "loss": 0.0086, "step": 87270 }, { "epoch": 2.6982753291710453, "grad_norm": 0.112447589635849, "learning_rate": 1.8651310502565372e-05, "loss": 0.0089, "step": 87300 }, { "epoch": 2.699202571552204, "grad_norm": 0.13927561044692993, "learning_rate": 1.8650846881374794e-05, "loss": 0.0083, "step": 87330 }, { "epoch": 2.700129813933362, "grad_norm": 0.13068045675754547, "learning_rate": 1.8650383260184212e-05, "loss": 0.0086, "step": 87360 }, { "epoch": 2.7010570563145206, "grad_norm": 0.0911865308880806, "learning_rate": 1.8649919638993633e-05, "loss": 0.0091, "step": 87390 }, { "epoch": 2.7019842986956792, "grad_norm": 0.19008716940879822, "learning_rate": 1.8649456017803055e-05, "loss": 0.0084, "step": 87420 }, { "epoch": 2.7029115410768374, "grad_norm": 0.14832358062267303, "learning_rate": 1.8648992396612476e-05, "loss": 0.0092, "step": 87450 }, { "epoch": 2.703838783457996, "grad_norm": 0.1205102875828743, "learning_rate": 1.8648528775421898e-05, "loss": 0.0089, "step": 87480 }, { "epoch": 2.704766025839154, "grad_norm": 0.1700468212366104, "learning_rate": 1.864806515423132e-05, "loss": 0.0086, "step": 87510 }, { "epoch": 2.7056932682203128, "grad_norm": 0.10592833161354065, "learning_rate": 1.8647601533040738e-05, "loss": 0.0088, "step": 87540 }, { "epoch": 2.7066205106014714, "grad_norm": 0.24454836547374725, "learning_rate": 1.864713791185016e-05, "loss": 0.0089, "step": 87570 }, { "epoch": 2.7075477529826295, "grad_norm": 0.13956472277641296, "learning_rate": 1.864667429065958e-05, "loss": 0.0088, "step": 87600 }, { "epoch": 2.708474995363788, "grad_norm": 0.09915021806955338, "learning_rate": 1.8646210669469e-05, "loss": 0.0082, "step": 87630 }, { "epoch": 2.7094022377449463, "grad_norm": 0.1574244648218155, "learning_rate": 1.8645747048278424e-05, "loss": 0.0092, "step": 87660 }, { "epoch": 2.710329480126105, "grad_norm": 0.1148134171962738, "learning_rate": 1.8645283427087842e-05, "loss": 0.0091, "step": 87690 }, { "epoch": 2.7112567225072635, "grad_norm": 0.11643565446138382, "learning_rate": 1.8644819805897263e-05, "loss": 0.0081, "step": 87720 }, { "epoch": 2.712183964888422, "grad_norm": 0.22726906836032867, "learning_rate": 1.8644356184706685e-05, "loss": 0.009, "step": 87750 }, { "epoch": 2.71311120726958, "grad_norm": 0.15644009411334991, "learning_rate": 1.8643892563516103e-05, "loss": 0.0093, "step": 87780 }, { "epoch": 2.714038449650739, "grad_norm": 0.129537433385849, "learning_rate": 1.8643428942325524e-05, "loss": 0.0091, "step": 87810 }, { "epoch": 2.714965692031897, "grad_norm": 0.12698793411254883, "learning_rate": 1.8642965321134946e-05, "loss": 0.0087, "step": 87840 }, { "epoch": 2.7158929344130556, "grad_norm": 0.11267630010843277, "learning_rate": 1.8642501699944367e-05, "loss": 0.0086, "step": 87870 }, { "epoch": 2.716820176794214, "grad_norm": 0.12636758387088776, "learning_rate": 1.864203807875379e-05, "loss": 0.0086, "step": 87900 }, { "epoch": 2.7177474191753723, "grad_norm": 0.11855950951576233, "learning_rate": 1.864157445756321e-05, "loss": 0.0083, "step": 87930 }, { "epoch": 2.718674661556531, "grad_norm": 0.12393512576818466, "learning_rate": 1.864111083637263e-05, "loss": 0.0081, "step": 87960 }, { "epoch": 2.719601903937689, "grad_norm": 0.19822511076927185, "learning_rate": 1.864064721518205e-05, "loss": 0.0082, "step": 87990 }, { "epoch": 2.7205291463188477, "grad_norm": 0.14973318576812744, "learning_rate": 1.864018359399147e-05, "loss": 0.0088, "step": 88020 }, { "epoch": 2.7214563887000063, "grad_norm": 0.09929432719945908, "learning_rate": 1.8639719972800893e-05, "loss": 0.0089, "step": 88050 }, { "epoch": 2.722383631081165, "grad_norm": 0.20295777916908264, "learning_rate": 1.8639256351610315e-05, "loss": 0.0088, "step": 88080 }, { "epoch": 2.723310873462323, "grad_norm": 0.10770698636770248, "learning_rate": 1.8638792730419733e-05, "loss": 0.0085, "step": 88110 }, { "epoch": 2.7242381158434816, "grad_norm": 0.14503268897533417, "learning_rate": 1.8638329109229154e-05, "loss": 0.0087, "step": 88140 }, { "epoch": 2.72516535822464, "grad_norm": 0.09084682911634445, "learning_rate": 1.8637865488038576e-05, "loss": 0.0092, "step": 88170 }, { "epoch": 2.7260926006057984, "grad_norm": 0.13872790336608887, "learning_rate": 1.8637401866847994e-05, "loss": 0.0084, "step": 88200 }, { "epoch": 2.727019842986957, "grad_norm": 0.1385885775089264, "learning_rate": 1.8636938245657415e-05, "loss": 0.0091, "step": 88230 }, { "epoch": 2.727947085368115, "grad_norm": 0.17457671463489532, "learning_rate": 1.8636474624466837e-05, "loss": 0.009, "step": 88260 }, { "epoch": 2.7288743277492737, "grad_norm": 0.11509042978286743, "learning_rate": 1.8636011003276258e-05, "loss": 0.009, "step": 88290 }, { "epoch": 2.729801570130432, "grad_norm": 0.12333668768405914, "learning_rate": 1.863554738208568e-05, "loss": 0.0084, "step": 88320 }, { "epoch": 2.7307288125115905, "grad_norm": 0.17393136024475098, "learning_rate": 1.8635083760895098e-05, "loss": 0.0092, "step": 88350 }, { "epoch": 2.731656054892749, "grad_norm": 0.1173294261097908, "learning_rate": 1.863462013970452e-05, "loss": 0.0084, "step": 88380 }, { "epoch": 2.7325832972739073, "grad_norm": 0.1437629759311676, "learning_rate": 1.863415651851394e-05, "loss": 0.0085, "step": 88410 }, { "epoch": 2.733510539655066, "grad_norm": 0.11529382318258286, "learning_rate": 1.8633692897323362e-05, "loss": 0.0087, "step": 88440 }, { "epoch": 2.734437782036224, "grad_norm": 0.17349869012832642, "learning_rate": 1.8633229276132784e-05, "loss": 0.0089, "step": 88470 }, { "epoch": 2.7353650244173826, "grad_norm": 0.10471516847610474, "learning_rate": 1.8632765654942205e-05, "loss": 0.0087, "step": 88500 }, { "epoch": 2.736292266798541, "grad_norm": 0.09604345262050629, "learning_rate": 1.8632302033751624e-05, "loss": 0.0095, "step": 88530 }, { "epoch": 2.7372195091797, "grad_norm": 0.10696335881948471, "learning_rate": 1.8631838412561045e-05, "loss": 0.0076, "step": 88560 }, { "epoch": 2.738146751560858, "grad_norm": 0.12681317329406738, "learning_rate": 1.8631374791370467e-05, "loss": 0.0084, "step": 88590 }, { "epoch": 2.7390739939420166, "grad_norm": 0.12728339433670044, "learning_rate": 1.8630911170179885e-05, "loss": 0.0086, "step": 88620 }, { "epoch": 2.7400012363231747, "grad_norm": 0.13086722791194916, "learning_rate": 1.8630447548989306e-05, "loss": 0.0081, "step": 88650 }, { "epoch": 2.7409284787043333, "grad_norm": 0.11631613969802856, "learning_rate": 1.8629983927798728e-05, "loss": 0.008, "step": 88680 }, { "epoch": 2.741855721085492, "grad_norm": 0.16702356934547424, "learning_rate": 1.862952030660815e-05, "loss": 0.0086, "step": 88710 }, { "epoch": 2.74278296346665, "grad_norm": 0.17187392711639404, "learning_rate": 1.862905668541757e-05, "loss": 0.008, "step": 88740 }, { "epoch": 2.7437102058478087, "grad_norm": 0.10984040796756744, "learning_rate": 1.862859306422699e-05, "loss": 0.0079, "step": 88770 }, { "epoch": 2.744637448228967, "grad_norm": 0.1428414136171341, "learning_rate": 1.862812944303641e-05, "loss": 0.0083, "step": 88800 }, { "epoch": 2.7455646906101254, "grad_norm": 0.09793814271688461, "learning_rate": 1.8627665821845832e-05, "loss": 0.0081, "step": 88830 }, { "epoch": 2.746491932991284, "grad_norm": 0.10678979754447937, "learning_rate": 1.8627202200655253e-05, "loss": 0.0084, "step": 88860 }, { "epoch": 2.7474191753724426, "grad_norm": 0.11471214890480042, "learning_rate": 1.8626738579464675e-05, "loss": 0.0083, "step": 88890 }, { "epoch": 2.748346417753601, "grad_norm": 0.18693940341472626, "learning_rate": 1.8626274958274096e-05, "loss": 0.009, "step": 88920 }, { "epoch": 2.7492736601347594, "grad_norm": 0.1541987657546997, "learning_rate": 1.8625811337083514e-05, "loss": 0.008, "step": 88950 }, { "epoch": 2.7502009025159175, "grad_norm": 0.10604231059551239, "learning_rate": 1.8625347715892936e-05, "loss": 0.0087, "step": 88980 }, { "epoch": 2.751128144897076, "grad_norm": 0.11335533857345581, "learning_rate": 1.8624884094702354e-05, "loss": 0.0089, "step": 89010 }, { "epoch": 2.7520553872782347, "grad_norm": 0.11030709743499756, "learning_rate": 1.862442047351178e-05, "loss": 0.0085, "step": 89040 }, { "epoch": 2.752982629659393, "grad_norm": 0.12230734527111053, "learning_rate": 1.86239568523212e-05, "loss": 0.0091, "step": 89070 }, { "epoch": 2.7539098720405515, "grad_norm": 0.07782137393951416, "learning_rate": 1.862349323113062e-05, "loss": 0.0096, "step": 89100 }, { "epoch": 2.7548371144217096, "grad_norm": 0.12529060244560242, "learning_rate": 1.862302960994004e-05, "loss": 0.0089, "step": 89130 }, { "epoch": 2.7557643568028682, "grad_norm": 0.10348793119192123, "learning_rate": 1.862256598874946e-05, "loss": 0.0089, "step": 89160 }, { "epoch": 2.756691599184027, "grad_norm": 0.12804590165615082, "learning_rate": 1.862210236755888e-05, "loss": 0.0081, "step": 89190 }, { "epoch": 2.757618841565185, "grad_norm": 0.1413261741399765, "learning_rate": 1.86216387463683e-05, "loss": 0.008, "step": 89220 }, { "epoch": 2.7585460839463436, "grad_norm": 0.17208924889564514, "learning_rate": 1.8621175125177723e-05, "loss": 0.0086, "step": 89250 }, { "epoch": 2.7594733263275018, "grad_norm": 0.09126507490873337, "learning_rate": 1.8620711503987144e-05, "loss": 0.009, "step": 89280 }, { "epoch": 2.7604005687086604, "grad_norm": 0.11617173254489899, "learning_rate": 1.8620247882796566e-05, "loss": 0.0079, "step": 89310 }, { "epoch": 2.761327811089819, "grad_norm": 0.15829944610595703, "learning_rate": 1.8619784261605984e-05, "loss": 0.0081, "step": 89340 }, { "epoch": 2.7622550534709776, "grad_norm": 0.10610859096050262, "learning_rate": 1.8619320640415405e-05, "loss": 0.0092, "step": 89370 }, { "epoch": 2.7631822958521357, "grad_norm": 0.15822455286979675, "learning_rate": 1.8618857019224827e-05, "loss": 0.0087, "step": 89400 }, { "epoch": 2.7641095382332943, "grad_norm": 0.12626484036445618, "learning_rate": 1.861839339803425e-05, "loss": 0.0088, "step": 89430 }, { "epoch": 2.7650367806144525, "grad_norm": 0.11276178807020187, "learning_rate": 1.861792977684367e-05, "loss": 0.0088, "step": 89460 }, { "epoch": 2.765964022995611, "grad_norm": 0.11831784248352051, "learning_rate": 1.861746615565309e-05, "loss": 0.008, "step": 89490 }, { "epoch": 2.7668912653767697, "grad_norm": 0.19830825924873352, "learning_rate": 1.861700253446251e-05, "loss": 0.0086, "step": 89520 }, { "epoch": 2.767818507757928, "grad_norm": 0.21531720459461212, "learning_rate": 1.861653891327193e-05, "loss": 0.0092, "step": 89550 }, { "epoch": 2.7687457501390864, "grad_norm": 0.08417746424674988, "learning_rate": 1.8616075292081353e-05, "loss": 0.0093, "step": 89580 }, { "epoch": 2.7696729925202446, "grad_norm": 0.18002420663833618, "learning_rate": 1.861561167089077e-05, "loss": 0.0083, "step": 89610 }, { "epoch": 2.770600234901403, "grad_norm": 0.14169329404830933, "learning_rate": 1.8615148049700192e-05, "loss": 0.0091, "step": 89640 }, { "epoch": 2.7715274772825618, "grad_norm": 0.0844830721616745, "learning_rate": 1.8614684428509614e-05, "loss": 0.0084, "step": 89670 }, { "epoch": 2.7724547196637204, "grad_norm": 0.10926453024148941, "learning_rate": 1.8614220807319035e-05, "loss": 0.0087, "step": 89700 }, { "epoch": 2.7733819620448785, "grad_norm": 0.12972241640090942, "learning_rate": 1.8613757186128457e-05, "loss": 0.0092, "step": 89730 }, { "epoch": 2.774309204426037, "grad_norm": 0.11640885472297668, "learning_rate": 1.8613293564937875e-05, "loss": 0.0091, "step": 89760 }, { "epoch": 2.7752364468071953, "grad_norm": 0.1302075833082199, "learning_rate": 1.8612829943747296e-05, "loss": 0.0082, "step": 89790 }, { "epoch": 2.776163689188354, "grad_norm": 0.09797530621290207, "learning_rate": 1.8612366322556718e-05, "loss": 0.008, "step": 89820 }, { "epoch": 2.7770909315695125, "grad_norm": 0.1814849078655243, "learning_rate": 1.861190270136614e-05, "loss": 0.0082, "step": 89850 }, { "epoch": 2.7780181739506706, "grad_norm": 0.13836582005023956, "learning_rate": 1.861143908017556e-05, "loss": 0.0083, "step": 89880 }, { "epoch": 2.7789454163318292, "grad_norm": 0.13631808757781982, "learning_rate": 1.8610975458984982e-05, "loss": 0.0087, "step": 89910 }, { "epoch": 2.7798726587129874, "grad_norm": 0.16467426717281342, "learning_rate": 1.86105118377944e-05, "loss": 0.0081, "step": 89940 }, { "epoch": 2.780799901094146, "grad_norm": 0.11474920064210892, "learning_rate": 1.8610048216603822e-05, "loss": 0.0079, "step": 89970 }, { "epoch": 2.7817271434753046, "grad_norm": 0.1408492475748062, "learning_rate": 1.860958459541324e-05, "loss": 0.0085, "step": 90000 }, { "epoch": 2.7826543858564627, "grad_norm": 0.11728565394878387, "learning_rate": 1.860912097422266e-05, "loss": 0.0095, "step": 90030 }, { "epoch": 2.7835816282376213, "grad_norm": 0.14239588379859924, "learning_rate": 1.8608657353032083e-05, "loss": 0.0085, "step": 90060 }, { "epoch": 2.7845088706187795, "grad_norm": 0.12092181295156479, "learning_rate": 1.8608193731841505e-05, "loss": 0.0084, "step": 90090 }, { "epoch": 2.785436112999938, "grad_norm": 0.14780163764953613, "learning_rate": 1.8607730110650926e-05, "loss": 0.0091, "step": 90120 }, { "epoch": 2.7863633553810967, "grad_norm": 0.16345135867595673, "learning_rate": 1.8607266489460348e-05, "loss": 0.0091, "step": 90150 }, { "epoch": 2.7872905977622553, "grad_norm": 0.14326147735118866, "learning_rate": 1.8606802868269766e-05, "loss": 0.0082, "step": 90180 }, { "epoch": 2.7882178401434135, "grad_norm": 0.1410682648420334, "learning_rate": 1.8606339247079187e-05, "loss": 0.0082, "step": 90210 }, { "epoch": 2.789145082524572, "grad_norm": 0.11945252865552902, "learning_rate": 1.860587562588861e-05, "loss": 0.009, "step": 90240 }, { "epoch": 2.79007232490573, "grad_norm": 0.13335807621479034, "learning_rate": 1.860541200469803e-05, "loss": 0.0088, "step": 90270 }, { "epoch": 2.790999567286889, "grad_norm": 0.13506770133972168, "learning_rate": 1.8604948383507452e-05, "loss": 0.0089, "step": 90300 }, { "epoch": 2.7919268096680474, "grad_norm": 0.12684841454029083, "learning_rate": 1.860448476231687e-05, "loss": 0.0089, "step": 90330 }, { "epoch": 2.7928540520492056, "grad_norm": 0.13992303609848022, "learning_rate": 1.860402114112629e-05, "loss": 0.0081, "step": 90360 }, { "epoch": 2.793781294430364, "grad_norm": 0.1126607358455658, "learning_rate": 1.8603557519935713e-05, "loss": 0.0091, "step": 90390 }, { "epoch": 2.7947085368115223, "grad_norm": 0.11071283370256424, "learning_rate": 1.860309389874513e-05, "loss": 0.009, "step": 90420 }, { "epoch": 2.795635779192681, "grad_norm": 0.13500753045082092, "learning_rate": 1.8602630277554556e-05, "loss": 0.0086, "step": 90450 }, { "epoch": 2.7965630215738395, "grad_norm": 0.11496052891016006, "learning_rate": 1.8602166656363977e-05, "loss": 0.0082, "step": 90480 }, { "epoch": 2.797490263954998, "grad_norm": 0.13442881405353546, "learning_rate": 1.8601703035173396e-05, "loss": 0.0092, "step": 90510 }, { "epoch": 2.7984175063361563, "grad_norm": 0.12855885922908783, "learning_rate": 1.8601239413982817e-05, "loss": 0.0092, "step": 90540 }, { "epoch": 2.799344748717315, "grad_norm": 0.1196671575307846, "learning_rate": 1.860077579279224e-05, "loss": 0.0083, "step": 90570 }, { "epoch": 2.800271991098473, "grad_norm": 0.13405795395374298, "learning_rate": 1.8600312171601657e-05, "loss": 0.0088, "step": 90600 }, { "epoch": 2.8011992334796316, "grad_norm": 0.1430569440126419, "learning_rate": 1.8599848550411078e-05, "loss": 0.0091, "step": 90630 }, { "epoch": 2.8021264758607902, "grad_norm": 0.1175832450389862, "learning_rate": 1.85993849292205e-05, "loss": 0.0085, "step": 90660 }, { "epoch": 2.8030537182419484, "grad_norm": 0.16968503594398499, "learning_rate": 1.859892130802992e-05, "loss": 0.0083, "step": 90690 }, { "epoch": 2.803980960623107, "grad_norm": 0.14726297557353973, "learning_rate": 1.8598457686839343e-05, "loss": 0.0083, "step": 90720 }, { "epoch": 2.804908203004265, "grad_norm": 0.1238597184419632, "learning_rate": 1.859799406564876e-05, "loss": 0.0089, "step": 90750 }, { "epoch": 2.8058354453854237, "grad_norm": 0.13885989785194397, "learning_rate": 1.8597530444458182e-05, "loss": 0.0082, "step": 90780 }, { "epoch": 2.8067626877665823, "grad_norm": 0.10051357746124268, "learning_rate": 1.8597066823267604e-05, "loss": 0.0084, "step": 90810 }, { "epoch": 2.8076899301477405, "grad_norm": 0.1617790311574936, "learning_rate": 1.8596603202077025e-05, "loss": 0.0082, "step": 90840 }, { "epoch": 2.808617172528899, "grad_norm": 0.13988438248634338, "learning_rate": 1.8596139580886447e-05, "loss": 0.0087, "step": 90870 }, { "epoch": 2.8095444149100572, "grad_norm": 0.13014934957027435, "learning_rate": 1.859567595969587e-05, "loss": 0.0089, "step": 90900 }, { "epoch": 2.810471657291216, "grad_norm": 0.10864031314849854, "learning_rate": 1.8595212338505286e-05, "loss": 0.0089, "step": 90930 }, { "epoch": 2.8113988996723744, "grad_norm": 0.12369391322135925, "learning_rate": 1.8594764171354394e-05, "loss": 0.0086, "step": 90960 }, { "epoch": 2.812326142053533, "grad_norm": 0.11728804558515549, "learning_rate": 1.8594300550163816e-05, "loss": 0.0076, "step": 90990 }, { "epoch": 2.813253384434691, "grad_norm": 0.1419326215982437, "learning_rate": 1.8593836928973234e-05, "loss": 0.0084, "step": 91020 }, { "epoch": 2.81418062681585, "grad_norm": 0.15460416674613953, "learning_rate": 1.8593373307782655e-05, "loss": 0.0086, "step": 91050 }, { "epoch": 2.815107869197008, "grad_norm": 0.16225691139698029, "learning_rate": 1.8592909686592077e-05, "loss": 0.0097, "step": 91080 }, { "epoch": 2.8160351115781665, "grad_norm": 0.2368593066930771, "learning_rate": 1.85924460654015e-05, "loss": 0.0087, "step": 91110 }, { "epoch": 2.816962353959325, "grad_norm": 0.12322594970464706, "learning_rate": 1.859198244421092e-05, "loss": 0.009, "step": 91140 }, { "epoch": 2.8178895963404833, "grad_norm": 0.1076345145702362, "learning_rate": 1.8591518823020338e-05, "loss": 0.0082, "step": 91170 }, { "epoch": 2.818816838721642, "grad_norm": 0.1386338770389557, "learning_rate": 1.859105520182976e-05, "loss": 0.0084, "step": 91200 }, { "epoch": 2.8197440811028, "grad_norm": 0.13370254635810852, "learning_rate": 1.859059158063918e-05, "loss": 0.0082, "step": 91230 }, { "epoch": 2.8206713234839587, "grad_norm": 0.12221098691225052, "learning_rate": 1.85901279594486e-05, "loss": 0.0079, "step": 91260 }, { "epoch": 2.8215985658651173, "grad_norm": 0.15380242466926575, "learning_rate": 1.858966433825802e-05, "loss": 0.0085, "step": 91290 }, { "epoch": 2.8225258082462754, "grad_norm": 0.10492274165153503, "learning_rate": 1.8589200717067442e-05, "loss": 0.0084, "step": 91320 }, { "epoch": 2.823453050627434, "grad_norm": 0.14491182565689087, "learning_rate": 1.8588737095876864e-05, "loss": 0.0093, "step": 91350 }, { "epoch": 2.824380293008592, "grad_norm": 0.1421395093202591, "learning_rate": 1.8588273474686285e-05, "loss": 0.008, "step": 91380 }, { "epoch": 2.8253075353897508, "grad_norm": 0.2093840390443802, "learning_rate": 1.8587809853495707e-05, "loss": 0.009, "step": 91410 }, { "epoch": 2.8262347777709094, "grad_norm": 0.1454947590827942, "learning_rate": 1.8587346232305125e-05, "loss": 0.0085, "step": 91440 }, { "epoch": 2.827162020152068, "grad_norm": 0.1822793334722519, "learning_rate": 1.8586882611114546e-05, "loss": 0.0084, "step": 91470 }, { "epoch": 2.828089262533226, "grad_norm": 0.12545859813690186, "learning_rate": 1.8586418989923968e-05, "loss": 0.0089, "step": 91500 }, { "epoch": 2.8290165049143847, "grad_norm": 0.09764917194843292, "learning_rate": 1.858595536873339e-05, "loss": 0.0083, "step": 91530 }, { "epoch": 2.829943747295543, "grad_norm": 0.13562104105949402, "learning_rate": 1.858549174754281e-05, "loss": 0.0094, "step": 91560 }, { "epoch": 2.8308709896767015, "grad_norm": 0.1171429306268692, "learning_rate": 1.858502812635223e-05, "loss": 0.0086, "step": 91590 }, { "epoch": 2.83179823205786, "grad_norm": 0.11422930657863617, "learning_rate": 1.858456450516165e-05, "loss": 0.0084, "step": 91620 }, { "epoch": 2.8327254744390182, "grad_norm": 0.18126286566257477, "learning_rate": 1.8584100883971072e-05, "loss": 0.0085, "step": 91650 }, { "epoch": 2.833652716820177, "grad_norm": 0.13607874512672424, "learning_rate": 1.858363726278049e-05, "loss": 0.008, "step": 91680 }, { "epoch": 2.834579959201335, "grad_norm": 0.11191564798355103, "learning_rate": 1.858317364158991e-05, "loss": 0.0084, "step": 91710 }, { "epoch": 2.8355072015824936, "grad_norm": 0.12621836364269257, "learning_rate": 1.8582710020399337e-05, "loss": 0.0088, "step": 91740 }, { "epoch": 2.836434443963652, "grad_norm": 0.14118127524852753, "learning_rate": 1.8582246399208755e-05, "loss": 0.0083, "step": 91770 }, { "epoch": 2.837361686344811, "grad_norm": 0.11645210534334183, "learning_rate": 1.8581782778018176e-05, "loss": 0.0096, "step": 91800 }, { "epoch": 2.838288928725969, "grad_norm": 0.1296171396970749, "learning_rate": 1.8581319156827594e-05, "loss": 0.0085, "step": 91830 }, { "epoch": 2.8392161711071275, "grad_norm": 0.1983984112739563, "learning_rate": 1.8580855535637016e-05, "loss": 0.0083, "step": 91860 }, { "epoch": 2.8401434134882857, "grad_norm": 0.1077771931886673, "learning_rate": 1.8580391914446437e-05, "loss": 0.0083, "step": 91890 }, { "epoch": 2.8410706558694443, "grad_norm": 0.11242585629224777, "learning_rate": 1.857992829325586e-05, "loss": 0.0094, "step": 91920 }, { "epoch": 2.841997898250603, "grad_norm": 0.08204063773155212, "learning_rate": 1.857946467206528e-05, "loss": 0.0086, "step": 91950 }, { "epoch": 2.842925140631761, "grad_norm": 0.10523824393749237, "learning_rate": 1.8579001050874702e-05, "loss": 0.0084, "step": 91980 }, { "epoch": 2.8438523830129196, "grad_norm": 0.13908317685127258, "learning_rate": 1.857853742968412e-05, "loss": 0.0092, "step": 92010 }, { "epoch": 2.844779625394078, "grad_norm": 0.20090460777282715, "learning_rate": 1.857807380849354e-05, "loss": 0.0081, "step": 92040 }, { "epoch": 2.8457068677752364, "grad_norm": 0.2248997837305069, "learning_rate": 1.8577610187302963e-05, "loss": 0.0083, "step": 92070 }, { "epoch": 2.846634110156395, "grad_norm": 0.10269434005022049, "learning_rate": 1.8577146566112384e-05, "loss": 0.0088, "step": 92100 }, { "epoch": 2.847561352537553, "grad_norm": 0.17311516404151917, "learning_rate": 1.8576682944921806e-05, "loss": 0.009, "step": 92130 }, { "epoch": 2.8484885949187118, "grad_norm": 0.11820591241121292, "learning_rate": 1.8576219323731224e-05, "loss": 0.0086, "step": 92160 }, { "epoch": 2.84941583729987, "grad_norm": 0.13092494010925293, "learning_rate": 1.8575755702540646e-05, "loss": 0.0089, "step": 92190 }, { "epoch": 2.8503430796810285, "grad_norm": 0.14582288265228271, "learning_rate": 1.8575292081350067e-05, "loss": 0.0091, "step": 92220 }, { "epoch": 2.851270322062187, "grad_norm": 0.20914195477962494, "learning_rate": 1.8574828460159485e-05, "loss": 0.0081, "step": 92250 }, { "epoch": 2.8521975644433457, "grad_norm": 0.08632322400808334, "learning_rate": 1.8574364838968907e-05, "loss": 0.0084, "step": 92280 }, { "epoch": 2.853124806824504, "grad_norm": 0.13025735318660736, "learning_rate": 1.8573901217778328e-05, "loss": 0.0087, "step": 92310 }, { "epoch": 2.8540520492056625, "grad_norm": 0.12039626389741898, "learning_rate": 1.857343759658775e-05, "loss": 0.0089, "step": 92340 }, { "epoch": 2.8549792915868206, "grad_norm": 0.1515360176563263, "learning_rate": 1.857297397539717e-05, "loss": 0.0088, "step": 92370 }, { "epoch": 2.855906533967979, "grad_norm": 0.13808509707450867, "learning_rate": 1.8572510354206593e-05, "loss": 0.0086, "step": 92400 }, { "epoch": 2.856833776349138, "grad_norm": 0.1320321261882782, "learning_rate": 1.857204673301601e-05, "loss": 0.0088, "step": 92430 }, { "epoch": 2.857761018730296, "grad_norm": 0.14445891976356506, "learning_rate": 1.8571583111825432e-05, "loss": 0.0086, "step": 92460 }, { "epoch": 2.8586882611114546, "grad_norm": 0.13731224834918976, "learning_rate": 1.8571119490634854e-05, "loss": 0.0084, "step": 92490 }, { "epoch": 2.8596155034926127, "grad_norm": 0.10762965679168701, "learning_rate": 1.8570655869444275e-05, "loss": 0.0093, "step": 92520 }, { "epoch": 2.8605427458737713, "grad_norm": 0.15148703753948212, "learning_rate": 1.8570192248253697e-05, "loss": 0.0086, "step": 92550 }, { "epoch": 2.86146998825493, "grad_norm": 0.11471084505319595, "learning_rate": 1.8569728627063115e-05, "loss": 0.0084, "step": 92580 }, { "epoch": 2.8623972306360885, "grad_norm": 0.12873798608779907, "learning_rate": 1.8569265005872537e-05, "loss": 0.0086, "step": 92610 }, { "epoch": 2.8633244730172467, "grad_norm": 0.1546454280614853, "learning_rate": 1.8568801384681958e-05, "loss": 0.0095, "step": 92640 }, { "epoch": 2.8642517153984053, "grad_norm": 0.12572748959064484, "learning_rate": 1.8568337763491376e-05, "loss": 0.0084, "step": 92670 }, { "epoch": 2.8651789577795634, "grad_norm": 0.09484290331602097, "learning_rate": 1.8567874142300798e-05, "loss": 0.0096, "step": 92700 }, { "epoch": 2.866106200160722, "grad_norm": 0.11526168882846832, "learning_rate": 1.856741052111022e-05, "loss": 0.0087, "step": 92730 }, { "epoch": 2.8670334425418806, "grad_norm": 0.09865675866603851, "learning_rate": 1.856694689991964e-05, "loss": 0.0088, "step": 92760 }, { "epoch": 2.867960684923039, "grad_norm": 0.10678531229496002, "learning_rate": 1.8566483278729062e-05, "loss": 0.0093, "step": 92790 }, { "epoch": 2.8688879273041974, "grad_norm": 0.12766322493553162, "learning_rate": 1.856601965753848e-05, "loss": 0.0082, "step": 92820 }, { "epoch": 2.8698151696853555, "grad_norm": 0.10749982297420502, "learning_rate": 1.8565556036347902e-05, "loss": 0.0089, "step": 92850 }, { "epoch": 2.870742412066514, "grad_norm": 0.15095318853855133, "learning_rate": 1.8565092415157323e-05, "loss": 0.0089, "step": 92880 }, { "epoch": 2.8716696544476727, "grad_norm": 0.18062609434127808, "learning_rate": 1.8564628793966745e-05, "loss": 0.0094, "step": 92910 }, { "epoch": 2.872596896828831, "grad_norm": 0.11590752750635147, "learning_rate": 1.8564165172776166e-05, "loss": 0.0084, "step": 92940 }, { "epoch": 2.8735241392099895, "grad_norm": 0.17584210634231567, "learning_rate": 1.856371700562527e-05, "loss": 0.0088, "step": 92970 }, { "epoch": 2.8744513815911477, "grad_norm": 0.09179487824440002, "learning_rate": 1.8563253384434692e-05, "loss": 0.0087, "step": 93000 }, { "epoch": 2.8753786239723063, "grad_norm": 0.1034398004412651, "learning_rate": 1.8562789763244114e-05, "loss": 0.0081, "step": 93030 }, { "epoch": 2.876305866353465, "grad_norm": 0.1361175775527954, "learning_rate": 1.8562326142053535e-05, "loss": 0.0086, "step": 93060 }, { "epoch": 2.8772331087346235, "grad_norm": 0.14660552144050598, "learning_rate": 1.8561862520862953e-05, "loss": 0.0082, "step": 93090 }, { "epoch": 2.8781603511157816, "grad_norm": 0.12689760327339172, "learning_rate": 1.8561398899672375e-05, "loss": 0.0084, "step": 93120 }, { "epoch": 2.87908759349694, "grad_norm": 0.11624161899089813, "learning_rate": 1.8560935278481796e-05, "loss": 0.0089, "step": 93150 }, { "epoch": 2.8800148358780984, "grad_norm": 0.10275036096572876, "learning_rate": 1.8560471657291218e-05, "loss": 0.009, "step": 93180 }, { "epoch": 2.880942078259257, "grad_norm": 0.11894278228282928, "learning_rate": 1.856000803610064e-05, "loss": 0.0082, "step": 93210 }, { "epoch": 2.8818693206404156, "grad_norm": 0.14577727019786835, "learning_rate": 1.855954441491006e-05, "loss": 0.0089, "step": 93240 }, { "epoch": 2.8827965630215737, "grad_norm": 0.09799131006002426, "learning_rate": 1.855908079371948e-05, "loss": 0.0096, "step": 93270 }, { "epoch": 2.8837238054027323, "grad_norm": 0.10185334086418152, "learning_rate": 1.85586171725289e-05, "loss": 0.0091, "step": 93300 }, { "epoch": 2.8846510477838905, "grad_norm": 0.1036679670214653, "learning_rate": 1.855815355133832e-05, "loss": 0.0081, "step": 93330 }, { "epoch": 2.885578290165049, "grad_norm": 0.1555018573999405, "learning_rate": 1.8557689930147744e-05, "loss": 0.0084, "step": 93360 }, { "epoch": 2.8865055325462077, "grad_norm": 0.10523844510316849, "learning_rate": 1.8557226308957165e-05, "loss": 0.0092, "step": 93390 }, { "epoch": 2.8874327749273663, "grad_norm": 0.12467095255851746, "learning_rate": 1.8556762687766583e-05, "loss": 0.0087, "step": 93420 }, { "epoch": 2.8883600173085244, "grad_norm": 0.07676834613084793, "learning_rate": 1.8556299066576005e-05, "loss": 0.0082, "step": 93450 }, { "epoch": 2.889287259689683, "grad_norm": 0.14997775852680206, "learning_rate": 1.8555835445385426e-05, "loss": 0.0091, "step": 93480 }, { "epoch": 2.890214502070841, "grad_norm": 0.10713930428028107, "learning_rate": 1.8555371824194844e-05, "loss": 0.0084, "step": 93510 }, { "epoch": 2.891141744452, "grad_norm": 0.09179970622062683, "learning_rate": 1.8554908203004266e-05, "loss": 0.0084, "step": 93540 }, { "epoch": 2.8920689868331584, "grad_norm": 0.11327464878559113, "learning_rate": 1.8554444581813687e-05, "loss": 0.0084, "step": 93570 }, { "epoch": 2.8929962292143165, "grad_norm": 0.1468013972043991, "learning_rate": 1.855398096062311e-05, "loss": 0.0086, "step": 93600 }, { "epoch": 2.893923471595475, "grad_norm": 0.12654666602611542, "learning_rate": 1.855351733943253e-05, "loss": 0.0092, "step": 93630 }, { "epoch": 2.8948507139766333, "grad_norm": 0.13310733437538147, "learning_rate": 1.855305371824195e-05, "loss": 0.0089, "step": 93660 }, { "epoch": 2.895777956357792, "grad_norm": 0.20286405086517334, "learning_rate": 1.855259009705137e-05, "loss": 0.0073, "step": 93690 }, { "epoch": 2.8967051987389505, "grad_norm": 0.10918466746807098, "learning_rate": 1.855212647586079e-05, "loss": 0.0091, "step": 93720 }, { "epoch": 2.8976324411201086, "grad_norm": 0.12612344324588776, "learning_rate": 1.8551662854670213e-05, "loss": 0.0089, "step": 93750 }, { "epoch": 2.8985596835012672, "grad_norm": 0.11129084974527359, "learning_rate": 1.8551199233479635e-05, "loss": 0.0087, "step": 93780 }, { "epoch": 2.8994869258824254, "grad_norm": 0.0995628833770752, "learning_rate": 1.8550735612289056e-05, "loss": 0.008, "step": 93810 }, { "epoch": 2.900414168263584, "grad_norm": 0.11288429796695709, "learning_rate": 1.8550271991098474e-05, "loss": 0.0085, "step": 93840 }, { "epoch": 2.9013414106447426, "grad_norm": 0.16816814243793488, "learning_rate": 1.8549808369907896e-05, "loss": 0.008, "step": 93870 }, { "epoch": 2.902268653025901, "grad_norm": 0.1787322461605072, "learning_rate": 1.8549344748717317e-05, "loss": 0.0084, "step": 93900 }, { "epoch": 2.9031958954070594, "grad_norm": 0.13829544186592102, "learning_rate": 1.8548881127526735e-05, "loss": 0.0091, "step": 93930 }, { "epoch": 2.904123137788218, "grad_norm": 0.14435073733329773, "learning_rate": 1.8548417506336157e-05, "loss": 0.0083, "step": 93960 }, { "epoch": 2.905050380169376, "grad_norm": 0.15822739899158478, "learning_rate": 1.8547953885145578e-05, "loss": 0.009, "step": 93990 }, { "epoch": 2.9059776225505347, "grad_norm": 0.13656415045261383, "learning_rate": 1.8547490263955e-05, "loss": 0.0083, "step": 94020 }, { "epoch": 2.9069048649316933, "grad_norm": 0.16183915734291077, "learning_rate": 1.854702664276442e-05, "loss": 0.0083, "step": 94050 }, { "epoch": 2.9078321073128515, "grad_norm": 0.10690449923276901, "learning_rate": 1.854656302157384e-05, "loss": 0.0086, "step": 94080 }, { "epoch": 2.90875934969401, "grad_norm": 0.09808778017759323, "learning_rate": 1.854609940038326e-05, "loss": 0.0081, "step": 94110 }, { "epoch": 2.909686592075168, "grad_norm": 0.09353505820035934, "learning_rate": 1.8545635779192682e-05, "loss": 0.0087, "step": 94140 }, { "epoch": 2.910613834456327, "grad_norm": 0.21144580841064453, "learning_rate": 1.8545172158002104e-05, "loss": 0.0086, "step": 94170 }, { "epoch": 2.9115410768374854, "grad_norm": 0.12975060939788818, "learning_rate": 1.8544708536811525e-05, "loss": 0.0088, "step": 94200 }, { "epoch": 2.912468319218644, "grad_norm": 0.1301783323287964, "learning_rate": 1.8544244915620947e-05, "loss": 0.0083, "step": 94230 }, { "epoch": 2.913395561599802, "grad_norm": 0.118833526968956, "learning_rate": 1.8543781294430365e-05, "loss": 0.009, "step": 94260 }, { "epoch": 2.9143228039809608, "grad_norm": 0.14614026248455048, "learning_rate": 1.8543317673239787e-05, "loss": 0.0088, "step": 94290 }, { "epoch": 2.915250046362119, "grad_norm": 0.14576590061187744, "learning_rate": 1.8542854052049205e-05, "loss": 0.008, "step": 94320 }, { "epoch": 2.9161772887432775, "grad_norm": 0.14708206057548523, "learning_rate": 1.8542390430858626e-05, "loss": 0.0089, "step": 94350 }, { "epoch": 2.917104531124436, "grad_norm": 0.126750186085701, "learning_rate": 1.8541926809668048e-05, "loss": 0.009, "step": 94380 }, { "epoch": 2.9180317735055943, "grad_norm": 0.14337153732776642, "learning_rate": 1.854146318847747e-05, "loss": 0.0086, "step": 94410 }, { "epoch": 2.918959015886753, "grad_norm": 0.09205970168113708, "learning_rate": 1.854099956728689e-05, "loss": 0.0089, "step": 94440 }, { "epoch": 2.919886258267911, "grad_norm": 0.14648060500621796, "learning_rate": 1.8540535946096312e-05, "loss": 0.0088, "step": 94470 }, { "epoch": 2.9208135006490696, "grad_norm": 0.12796416878700256, "learning_rate": 1.854007232490573e-05, "loss": 0.009, "step": 94500 }, { "epoch": 2.9217407430302282, "grad_norm": 0.09306251257658005, "learning_rate": 1.8539608703715152e-05, "loss": 0.0087, "step": 94530 }, { "epoch": 2.9226679854113864, "grad_norm": 0.13896818459033966, "learning_rate": 1.8539145082524573e-05, "loss": 0.0081, "step": 94560 }, { "epoch": 2.923595227792545, "grad_norm": 0.10473315417766571, "learning_rate": 1.8538681461333995e-05, "loss": 0.0092, "step": 94590 }, { "epoch": 2.924522470173703, "grad_norm": 0.1256120353937149, "learning_rate": 1.8538217840143416e-05, "loss": 0.0081, "step": 94620 }, { "epoch": 2.9254497125548617, "grad_norm": 0.13494308292865753, "learning_rate": 1.8537754218952834e-05, "loss": 0.0093, "step": 94650 }, { "epoch": 2.9263769549360203, "grad_norm": 0.13113562762737274, "learning_rate": 1.8537290597762256e-05, "loss": 0.0082, "step": 94680 }, { "epoch": 2.927304197317179, "grad_norm": 0.15377749502658844, "learning_rate": 1.8536826976571678e-05, "loss": 0.0081, "step": 94710 }, { "epoch": 2.928231439698337, "grad_norm": 0.09684314578771591, "learning_rate": 1.85363633553811e-05, "loss": 0.0076, "step": 94740 }, { "epoch": 2.9291586820794957, "grad_norm": 0.130441352725029, "learning_rate": 1.853589973419052e-05, "loss": 0.0087, "step": 94770 }, { "epoch": 2.930085924460654, "grad_norm": 0.1265760213136673, "learning_rate": 1.8535436112999942e-05, "loss": 0.0088, "step": 94800 }, { "epoch": 2.9310131668418125, "grad_norm": 0.09499923884868622, "learning_rate": 1.853497249180936e-05, "loss": 0.0084, "step": 94830 }, { "epoch": 2.931940409222971, "grad_norm": 0.12100382149219513, "learning_rate": 1.853450887061878e-05, "loss": 0.0088, "step": 94860 }, { "epoch": 2.932867651604129, "grad_norm": 0.15605619549751282, "learning_rate": 1.8534045249428203e-05, "loss": 0.008, "step": 94890 }, { "epoch": 2.933794893985288, "grad_norm": 0.12449372559785843, "learning_rate": 1.853358162823762e-05, "loss": 0.0083, "step": 94920 }, { "epoch": 2.934722136366446, "grad_norm": 0.16309304535388947, "learning_rate": 1.8533118007047043e-05, "loss": 0.0083, "step": 94950 }, { "epoch": 2.9356493787476046, "grad_norm": 0.15381142497062683, "learning_rate": 1.8532654385856464e-05, "loss": 0.0091, "step": 94980 }, { "epoch": 2.936576621128763, "grad_norm": 0.11742822825908661, "learning_rate": 1.8532190764665886e-05, "loss": 0.0091, "step": 95010 }, { "epoch": 2.9375038635099218, "grad_norm": 0.12088004499673843, "learning_rate": 1.8531727143475307e-05, "loss": 0.0081, "step": 95040 }, { "epoch": 2.93843110589108, "grad_norm": 0.10524341464042664, "learning_rate": 1.8531263522284725e-05, "loss": 0.0084, "step": 95070 }, { "epoch": 2.9393583482722385, "grad_norm": 0.1077144593000412, "learning_rate": 1.8530799901094147e-05, "loss": 0.0088, "step": 95100 }, { "epoch": 2.9402855906533967, "grad_norm": 0.09031736850738525, "learning_rate": 1.853033627990357e-05, "loss": 0.0084, "step": 95130 }, { "epoch": 2.9412128330345553, "grad_norm": 0.1014927327632904, "learning_rate": 1.852987265871299e-05, "loss": 0.0087, "step": 95160 }, { "epoch": 2.942140075415714, "grad_norm": 0.11147831380367279, "learning_rate": 1.852940903752241e-05, "loss": 0.0088, "step": 95190 }, { "epoch": 2.943067317796872, "grad_norm": 0.09087756276130676, "learning_rate": 1.8528945416331833e-05, "loss": 0.009, "step": 95220 }, { "epoch": 2.9439945601780306, "grad_norm": 0.13084863126277924, "learning_rate": 1.852848179514125e-05, "loss": 0.0081, "step": 95250 }, { "epoch": 2.944921802559189, "grad_norm": 0.12178432196378708, "learning_rate": 1.8528018173950673e-05, "loss": 0.0094, "step": 95280 }, { "epoch": 2.9458490449403474, "grad_norm": 0.09112653881311417, "learning_rate": 1.852755455276009e-05, "loss": 0.0091, "step": 95310 }, { "epoch": 2.946776287321506, "grad_norm": 0.10483210533857346, "learning_rate": 1.8527090931569512e-05, "loss": 0.0085, "step": 95340 }, { "epoch": 2.947703529702664, "grad_norm": 0.14777801930904388, "learning_rate": 1.8526627310378934e-05, "loss": 0.0082, "step": 95370 }, { "epoch": 2.9486307720838227, "grad_norm": 0.1329668015241623, "learning_rate": 1.8526163689188355e-05, "loss": 0.0082, "step": 95400 }, { "epoch": 2.949558014464981, "grad_norm": 0.113923579454422, "learning_rate": 1.8525700067997777e-05, "loss": 0.0094, "step": 95430 }, { "epoch": 2.9504852568461395, "grad_norm": 0.1366647183895111, "learning_rate": 1.8525236446807198e-05, "loss": 0.0087, "step": 95460 }, { "epoch": 2.951412499227298, "grad_norm": 0.14724576473236084, "learning_rate": 1.8524772825616616e-05, "loss": 0.0087, "step": 95490 }, { "epoch": 2.9523397416084567, "grad_norm": 0.16716459393501282, "learning_rate": 1.8524309204426038e-05, "loss": 0.009, "step": 95520 }, { "epoch": 2.953266983989615, "grad_norm": 0.12177246063947678, "learning_rate": 1.852384558323546e-05, "loss": 0.008, "step": 95550 }, { "epoch": 2.9541942263707734, "grad_norm": 0.1436130553483963, "learning_rate": 1.852338196204488e-05, "loss": 0.0082, "step": 95580 }, { "epoch": 2.9551214687519316, "grad_norm": 0.11257430911064148, "learning_rate": 1.8522918340854302e-05, "loss": 0.0081, "step": 95610 }, { "epoch": 2.95604871113309, "grad_norm": 0.12696370482444763, "learning_rate": 1.852245471966372e-05, "loss": 0.0087, "step": 95640 }, { "epoch": 2.956975953514249, "grad_norm": 0.15154941380023956, "learning_rate": 1.8521991098473142e-05, "loss": 0.0082, "step": 95670 }, { "epoch": 2.957903195895407, "grad_norm": 0.1564200073480606, "learning_rate": 1.8521527477282563e-05, "loss": 0.0089, "step": 95700 }, { "epoch": 2.9588304382765656, "grad_norm": 0.26006683707237244, "learning_rate": 1.852106385609198e-05, "loss": 0.0084, "step": 95730 }, { "epoch": 2.9597576806577237, "grad_norm": 0.10534998774528503, "learning_rate": 1.8520600234901403e-05, "loss": 0.0083, "step": 95760 }, { "epoch": 2.9606849230388823, "grad_norm": 0.10097362101078033, "learning_rate": 1.8520136613710825e-05, "loss": 0.0076, "step": 95790 }, { "epoch": 2.961612165420041, "grad_norm": 0.12678001821041107, "learning_rate": 1.8519672992520246e-05, "loss": 0.0081, "step": 95820 }, { "epoch": 2.9625394078011995, "grad_norm": 0.09448982775211334, "learning_rate": 1.8519209371329668e-05, "loss": 0.0088, "step": 95850 }, { "epoch": 2.9634666501823577, "grad_norm": 0.32674768567085266, "learning_rate": 1.851874575013909e-05, "loss": 0.0095, "step": 95880 }, { "epoch": 2.9643938925635163, "grad_norm": 0.1227155476808548, "learning_rate": 1.8518282128948507e-05, "loss": 0.0085, "step": 95910 }, { "epoch": 2.9653211349446744, "grad_norm": 0.11824636161327362, "learning_rate": 1.851781850775793e-05, "loss": 0.008, "step": 95940 }, { "epoch": 2.966248377325833, "grad_norm": 0.16277915239334106, "learning_rate": 1.851735488656735e-05, "loss": 0.009, "step": 95970 }, { "epoch": 2.9671756197069916, "grad_norm": 0.1328493058681488, "learning_rate": 1.8516891265376772e-05, "loss": 0.0079, "step": 96000 }, { "epoch": 2.9681028620881498, "grad_norm": 0.10132215917110443, "learning_rate": 1.8516427644186193e-05, "loss": 0.0084, "step": 96030 }, { "epoch": 2.9690301044693084, "grad_norm": 0.13432230055332184, "learning_rate": 1.851596402299561e-05, "loss": 0.0082, "step": 96060 }, { "epoch": 2.9699573468504665, "grad_norm": 0.1514066457748413, "learning_rate": 1.8515500401805033e-05, "loss": 0.0082, "step": 96090 }, { "epoch": 2.970884589231625, "grad_norm": 0.1342611163854599, "learning_rate": 1.8515036780614454e-05, "loss": 0.0088, "step": 96120 }, { "epoch": 2.9718118316127837, "grad_norm": 0.11398923397064209, "learning_rate": 1.8514573159423876e-05, "loss": 0.0087, "step": 96150 }, { "epoch": 2.972739073993942, "grad_norm": 0.07628985494375229, "learning_rate": 1.8514109538233297e-05, "loss": 0.0081, "step": 96180 }, { "epoch": 2.9736663163751005, "grad_norm": 0.12477681785821915, "learning_rate": 1.851364591704272e-05, "loss": 0.0081, "step": 96210 }, { "epoch": 2.9745935587562586, "grad_norm": 0.16294847428798676, "learning_rate": 1.8513182295852137e-05, "loss": 0.0081, "step": 96240 }, { "epoch": 2.9755208011374172, "grad_norm": 0.12379492819309235, "learning_rate": 1.8512734128701245e-05, "loss": 0.0082, "step": 96270 }, { "epoch": 2.976448043518576, "grad_norm": 0.11745769530534744, "learning_rate": 1.8512270507510666e-05, "loss": 0.0092, "step": 96300 }, { "epoch": 2.9773752858997344, "grad_norm": 0.19374430179595947, "learning_rate": 1.8511806886320085e-05, "loss": 0.0084, "step": 96330 }, { "epoch": 2.9783025282808926, "grad_norm": 0.1423642486333847, "learning_rate": 1.8511343265129506e-05, "loss": 0.0077, "step": 96360 }, { "epoch": 2.979229770662051, "grad_norm": 0.13231876492500305, "learning_rate": 1.8510879643938928e-05, "loss": 0.0075, "step": 96390 }, { "epoch": 2.9801570130432093, "grad_norm": 0.14509879052639008, "learning_rate": 1.851041602274835e-05, "loss": 0.0087, "step": 96420 }, { "epoch": 2.981084255424368, "grad_norm": 0.1556706726551056, "learning_rate": 1.850995240155777e-05, "loss": 0.0074, "step": 96450 }, { "epoch": 2.9820114978055265, "grad_norm": 0.1340479999780655, "learning_rate": 1.850948878036719e-05, "loss": 0.0084, "step": 96480 }, { "epoch": 2.9829387401866847, "grad_norm": 0.16759666800498962, "learning_rate": 1.850902515917661e-05, "loss": 0.0081, "step": 96510 }, { "epoch": 2.9838659825678433, "grad_norm": 0.1558028757572174, "learning_rate": 1.8508561537986032e-05, "loss": 0.0086, "step": 96540 }, { "epoch": 2.9847932249490015, "grad_norm": 0.10385628044605255, "learning_rate": 1.850809791679545e-05, "loss": 0.0083, "step": 96570 }, { "epoch": 2.98572046733016, "grad_norm": 0.1576617807149887, "learning_rate": 1.850763429560487e-05, "loss": 0.0092, "step": 96600 }, { "epoch": 2.9866477097113187, "grad_norm": 0.17541295289993286, "learning_rate": 1.8507170674414293e-05, "loss": 0.009, "step": 96630 }, { "epoch": 2.987574952092477, "grad_norm": 0.1473584622144699, "learning_rate": 1.8506707053223714e-05, "loss": 0.0086, "step": 96660 }, { "epoch": 2.9885021944736354, "grad_norm": 0.10208375751972198, "learning_rate": 1.8506243432033136e-05, "loss": 0.0085, "step": 96690 }, { "epoch": 2.989429436854794, "grad_norm": 0.1269853115081787, "learning_rate": 1.8505779810842557e-05, "loss": 0.0083, "step": 96720 }, { "epoch": 2.990356679235952, "grad_norm": 0.12085272371768951, "learning_rate": 1.8505316189651976e-05, "loss": 0.0076, "step": 96750 }, { "epoch": 2.9912839216171108, "grad_norm": 0.12460831552743912, "learning_rate": 1.8504852568461397e-05, "loss": 0.0082, "step": 96780 }, { "epoch": 2.9922111639982694, "grad_norm": 0.1236172467470169, "learning_rate": 1.850438894727082e-05, "loss": 0.0084, "step": 96810 }, { "epoch": 2.9931384063794275, "grad_norm": 0.1245904415845871, "learning_rate": 1.850392532608024e-05, "loss": 0.0093, "step": 96840 }, { "epoch": 2.994065648760586, "grad_norm": 0.16722384095191956, "learning_rate": 1.850346170488966e-05, "loss": 0.0083, "step": 96870 }, { "epoch": 2.9949928911417443, "grad_norm": 0.09547737240791321, "learning_rate": 1.850299808369908e-05, "loss": 0.0081, "step": 96900 }, { "epoch": 2.995920133522903, "grad_norm": 0.15034884214401245, "learning_rate": 1.85025344625085e-05, "loss": 0.009, "step": 96930 }, { "epoch": 2.9968473759040615, "grad_norm": 0.14478018879890442, "learning_rate": 1.8502070841317923e-05, "loss": 0.0081, "step": 96960 }, { "epoch": 2.9977746182852196, "grad_norm": 0.1146443709731102, "learning_rate": 1.850160722012734e-05, "loss": 0.0087, "step": 96990 }, { "epoch": 2.9987018606663782, "grad_norm": 0.1465146541595459, "learning_rate": 1.8501143598936762e-05, "loss": 0.0079, "step": 97020 }, { "epoch": 2.9996291030475364, "grad_norm": 0.12791137397289276, "learning_rate": 1.8500679977746184e-05, "loss": 0.0087, "step": 97050 }, { "epoch": 3.000556345428695, "grad_norm": 0.17301997542381287, "learning_rate": 1.8500216356555605e-05, "loss": 0.0085, "step": 97080 }, { "epoch": 3.0014835878098536, "grad_norm": 0.11766210943460464, "learning_rate": 1.8499752735365027e-05, "loss": 0.0085, "step": 97110 }, { "epoch": 3.0024108301910117, "grad_norm": 0.1368464082479477, "learning_rate": 1.8499289114174445e-05, "loss": 0.0083, "step": 97140 }, { "epoch": 3.0033380725721703, "grad_norm": 0.1391049474477768, "learning_rate": 1.8498825492983866e-05, "loss": 0.0081, "step": 97170 }, { "epoch": 3.004265314953329, "grad_norm": 0.14123746752738953, "learning_rate": 1.8498361871793288e-05, "loss": 0.0086, "step": 97200 }, { "epoch": 3.005192557334487, "grad_norm": 0.11303326487541199, "learning_rate": 1.849789825060271e-05, "loss": 0.0086, "step": 97230 }, { "epoch": 3.0061197997156457, "grad_norm": 0.1161809116601944, "learning_rate": 1.849743462941213e-05, "loss": 0.0082, "step": 97260 }, { "epoch": 3.0070470420968043, "grad_norm": 0.1751079559326172, "learning_rate": 1.8496971008221552e-05, "loss": 0.0081, "step": 97290 }, { "epoch": 3.0079742844779624, "grad_norm": 0.08687733113765717, "learning_rate": 1.849650738703097e-05, "loss": 0.009, "step": 97320 }, { "epoch": 3.008901526859121, "grad_norm": 0.10946592688560486, "learning_rate": 1.8496043765840392e-05, "loss": 0.008, "step": 97350 }, { "epoch": 3.009828769240279, "grad_norm": 0.11813241988420486, "learning_rate": 1.8495580144649814e-05, "loss": 0.0082, "step": 97380 }, { "epoch": 3.010756011621438, "grad_norm": 0.11210690438747406, "learning_rate": 1.849511652345923e-05, "loss": 0.0082, "step": 97410 }, { "epoch": 3.0116832540025964, "grad_norm": 0.14756910502910614, "learning_rate": 1.8494652902268657e-05, "loss": 0.0086, "step": 97440 }, { "epoch": 3.0126104963837546, "grad_norm": 0.16912846267223358, "learning_rate": 1.8494189281078075e-05, "loss": 0.0082, "step": 97470 }, { "epoch": 3.013537738764913, "grad_norm": 0.10417933017015457, "learning_rate": 1.8493725659887496e-05, "loss": 0.0082, "step": 97500 }, { "epoch": 3.0144649811460718, "grad_norm": 0.10140187293291092, "learning_rate": 1.8493262038696918e-05, "loss": 0.0083, "step": 97530 }, { "epoch": 3.01539222352723, "grad_norm": 0.10571268945932388, "learning_rate": 1.8492798417506336e-05, "loss": 0.0083, "step": 97560 }, { "epoch": 3.0163194659083885, "grad_norm": 0.2334173619747162, "learning_rate": 1.8492334796315757e-05, "loss": 0.0081, "step": 97590 }, { "epoch": 3.017246708289547, "grad_norm": 0.13466012477874756, "learning_rate": 1.849187117512518e-05, "loss": 0.0084, "step": 97620 }, { "epoch": 3.0181739506707053, "grad_norm": 0.15808260440826416, "learning_rate": 1.84914075539346e-05, "loss": 0.0089, "step": 97650 }, { "epoch": 3.019101193051864, "grad_norm": 0.18779857456684113, "learning_rate": 1.8490943932744022e-05, "loss": 0.0082, "step": 97680 }, { "epoch": 3.020028435433022, "grad_norm": 0.14652521908283234, "learning_rate": 1.8490480311553443e-05, "loss": 0.0083, "step": 97710 }, { "epoch": 3.0209556778141806, "grad_norm": 0.1369020640850067, "learning_rate": 1.849001669036286e-05, "loss": 0.0089, "step": 97740 }, { "epoch": 3.021882920195339, "grad_norm": 0.11740074306726456, "learning_rate": 1.8489553069172283e-05, "loss": 0.0077, "step": 97770 }, { "epoch": 3.0228101625764974, "grad_norm": 0.11918993294239044, "learning_rate": 1.8489089447981704e-05, "loss": 0.0088, "step": 97800 }, { "epoch": 3.023737404957656, "grad_norm": 0.2059757560491562, "learning_rate": 1.8488625826791126e-05, "loss": 0.0082, "step": 97830 }, { "epoch": 3.0246646473388146, "grad_norm": 0.08696670085191727, "learning_rate": 1.8488162205600548e-05, "loss": 0.0079, "step": 97860 }, { "epoch": 3.0255918897199727, "grad_norm": 0.1408277153968811, "learning_rate": 1.8487698584409966e-05, "loss": 0.0083, "step": 97890 }, { "epoch": 3.0265191321011313, "grad_norm": 0.16855181753635406, "learning_rate": 1.8487234963219387e-05, "loss": 0.0087, "step": 97920 }, { "epoch": 3.0274463744822895, "grad_norm": 0.13420413434505463, "learning_rate": 1.848677134202881e-05, "loss": 0.0084, "step": 97950 }, { "epoch": 3.028373616863448, "grad_norm": 0.1725984811782837, "learning_rate": 1.8486307720838227e-05, "loss": 0.008, "step": 97980 }, { "epoch": 3.0293008592446067, "grad_norm": 0.12505246698856354, "learning_rate": 1.8485844099647648e-05, "loss": 0.0081, "step": 98010 }, { "epoch": 3.030228101625765, "grad_norm": 0.15531425178050995, "learning_rate": 1.848538047845707e-05, "loss": 0.0088, "step": 98040 }, { "epoch": 3.0311553440069234, "grad_norm": 0.11028123646974564, "learning_rate": 1.848491685726649e-05, "loss": 0.0086, "step": 98070 }, { "epoch": 3.032082586388082, "grad_norm": 0.116666778922081, "learning_rate": 1.8484453236075913e-05, "loss": 0.0087, "step": 98100 }, { "epoch": 3.03300982876924, "grad_norm": 0.13247500360012054, "learning_rate": 1.848400506892502e-05, "loss": 0.0082, "step": 98130 }, { "epoch": 3.033937071150399, "grad_norm": 0.12238973379135132, "learning_rate": 1.848354144773444e-05, "loss": 0.0076, "step": 98160 }, { "epoch": 3.034864313531557, "grad_norm": 0.1273692101240158, "learning_rate": 1.848307782654386e-05, "loss": 0.008, "step": 98190 }, { "epoch": 3.0357915559127155, "grad_norm": 0.12392578274011612, "learning_rate": 1.848261420535328e-05, "loss": 0.0082, "step": 98220 }, { "epoch": 3.036718798293874, "grad_norm": 0.12125787138938904, "learning_rate": 1.84821505841627e-05, "loss": 0.0082, "step": 98250 }, { "epoch": 3.0376460406750323, "grad_norm": 0.07717643678188324, "learning_rate": 1.848168696297212e-05, "loss": 0.0082, "step": 98280 }, { "epoch": 3.038573283056191, "grad_norm": 0.11602535843849182, "learning_rate": 1.8481223341781543e-05, "loss": 0.0078, "step": 98310 }, { "epoch": 3.0395005254373495, "grad_norm": 0.1363963931798935, "learning_rate": 1.8480759720590964e-05, "loss": 0.0084, "step": 98340 }, { "epoch": 3.0404277678185077, "grad_norm": 0.13305062055587769, "learning_rate": 1.8480296099400386e-05, "loss": 0.0079, "step": 98370 }, { "epoch": 3.0413550101996663, "grad_norm": 0.12968887388706207, "learning_rate": 1.8479832478209804e-05, "loss": 0.0073, "step": 98400 }, { "epoch": 3.042282252580825, "grad_norm": 0.1542646437883377, "learning_rate": 1.8479368857019226e-05, "loss": 0.0083, "step": 98430 }, { "epoch": 3.043209494961983, "grad_norm": 0.1876635104417801, "learning_rate": 1.8478905235828647e-05, "loss": 0.0077, "step": 98460 }, { "epoch": 3.0441367373431416, "grad_norm": 0.08711368590593338, "learning_rate": 1.847844161463807e-05, "loss": 0.0081, "step": 98490 }, { "epoch": 3.0450639797242998, "grad_norm": 0.17960205674171448, "learning_rate": 1.847797799344749e-05, "loss": 0.0088, "step": 98520 }, { "epoch": 3.0459912221054584, "grad_norm": 0.10337238758802414, "learning_rate": 1.8477514372256908e-05, "loss": 0.009, "step": 98550 }, { "epoch": 3.046918464486617, "grad_norm": 0.11240050941705704, "learning_rate": 1.847705075106633e-05, "loss": 0.0078, "step": 98580 }, { "epoch": 3.047845706867775, "grad_norm": 0.1566145122051239, "learning_rate": 1.847658712987575e-05, "loss": 0.0077, "step": 98610 }, { "epoch": 3.0487729492489337, "grad_norm": 0.10601182281970978, "learning_rate": 1.847612350868517e-05, "loss": 0.0075, "step": 98640 }, { "epoch": 3.0497001916300923, "grad_norm": 0.12738144397735596, "learning_rate": 1.847565988749459e-05, "loss": 0.0087, "step": 98670 }, { "epoch": 3.0506274340112505, "grad_norm": 0.1374865472316742, "learning_rate": 1.8475196266304016e-05, "loss": 0.0076, "step": 98700 }, { "epoch": 3.051554676392409, "grad_norm": 0.1419951319694519, "learning_rate": 1.8474732645113434e-05, "loss": 0.0081, "step": 98730 }, { "epoch": 3.0524819187735672, "grad_norm": 0.10714120417833328, "learning_rate": 1.8474269023922855e-05, "loss": 0.0088, "step": 98760 }, { "epoch": 3.053409161154726, "grad_norm": 0.12978176772594452, "learning_rate": 1.8473805402732277e-05, "loss": 0.0083, "step": 98790 }, { "epoch": 3.0543364035358844, "grad_norm": 0.16391527652740479, "learning_rate": 1.8473341781541695e-05, "loss": 0.0089, "step": 98820 }, { "epoch": 3.0552636459170426, "grad_norm": 0.13526907563209534, "learning_rate": 1.8472878160351117e-05, "loss": 0.0081, "step": 98850 }, { "epoch": 3.056190888298201, "grad_norm": 0.12376457452774048, "learning_rate": 1.8472414539160538e-05, "loss": 0.0082, "step": 98880 }, { "epoch": 3.05711813067936, "grad_norm": 0.18332451581954956, "learning_rate": 1.847195091796996e-05, "loss": 0.0076, "step": 98910 }, { "epoch": 3.058045373060518, "grad_norm": 0.12273937463760376, "learning_rate": 1.847148729677938e-05, "loss": 0.0081, "step": 98940 }, { "epoch": 3.0589726154416765, "grad_norm": 0.16268685460090637, "learning_rate": 1.84710236755888e-05, "loss": 0.008, "step": 98970 }, { "epoch": 3.0598998578228347, "grad_norm": 0.1155691146850586, "learning_rate": 1.847056005439822e-05, "loss": 0.0087, "step": 99000 }, { "epoch": 3.0608271002039933, "grad_norm": 0.14969179034233093, "learning_rate": 1.8470096433207642e-05, "loss": 0.0084, "step": 99030 }, { "epoch": 3.061754342585152, "grad_norm": 0.14028246700763702, "learning_rate": 1.8469632812017064e-05, "loss": 0.0081, "step": 99060 }, { "epoch": 3.06268158496631, "grad_norm": 0.13638117909431458, "learning_rate": 1.8469169190826485e-05, "loss": 0.0087, "step": 99090 }, { "epoch": 3.0636088273474686, "grad_norm": 0.14781703054904938, "learning_rate": 1.8468705569635907e-05, "loss": 0.008, "step": 99120 }, { "epoch": 3.0645360697286272, "grad_norm": 0.13032525777816772, "learning_rate": 1.8468241948445325e-05, "loss": 0.0082, "step": 99150 }, { "epoch": 3.0654633121097854, "grad_norm": 0.08900028467178345, "learning_rate": 1.8467778327254746e-05, "loss": 0.0081, "step": 99180 }, { "epoch": 3.066390554490944, "grad_norm": 0.167524516582489, "learning_rate": 1.8467314706064168e-05, "loss": 0.0083, "step": 99210 }, { "epoch": 3.067317796872102, "grad_norm": 0.10560484230518341, "learning_rate": 1.8466851084873586e-05, "loss": 0.0085, "step": 99240 }, { "epoch": 3.0682450392532608, "grad_norm": 0.15823334455490112, "learning_rate": 1.8466387463683007e-05, "loss": 0.0081, "step": 99270 }, { "epoch": 3.0691722816344194, "grad_norm": 0.160282701253891, "learning_rate": 1.846592384249243e-05, "loss": 0.0081, "step": 99300 }, { "epoch": 3.0700995240155775, "grad_norm": 0.07350581139326096, "learning_rate": 1.846546022130185e-05, "loss": 0.0083, "step": 99330 }, { "epoch": 3.071026766396736, "grad_norm": 0.12563875317573547, "learning_rate": 1.8464996600111272e-05, "loss": 0.0084, "step": 99360 }, { "epoch": 3.0719540087778947, "grad_norm": 0.12747521698474884, "learning_rate": 1.846453297892069e-05, "loss": 0.0078, "step": 99390 }, { "epoch": 3.072881251159053, "grad_norm": 0.15366318821907043, "learning_rate": 1.846406935773011e-05, "loss": 0.0079, "step": 99420 }, { "epoch": 3.0738084935402115, "grad_norm": 0.12868782877922058, "learning_rate": 1.8463605736539533e-05, "loss": 0.0087, "step": 99450 }, { "epoch": 3.07473573592137, "grad_norm": 0.12291885167360306, "learning_rate": 1.8463142115348955e-05, "loss": 0.0081, "step": 99480 }, { "epoch": 3.075662978302528, "grad_norm": 0.08502746373414993, "learning_rate": 1.8462678494158376e-05, "loss": 0.0085, "step": 99510 }, { "epoch": 3.076590220683687, "grad_norm": 0.15466776490211487, "learning_rate": 1.8462214872967798e-05, "loss": 0.0089, "step": 99540 }, { "epoch": 3.077517463064845, "grad_norm": 0.12330694496631622, "learning_rate": 1.8461751251777216e-05, "loss": 0.0089, "step": 99570 }, { "epoch": 3.0784447054460036, "grad_norm": 0.13068729639053345, "learning_rate": 1.8461287630586637e-05, "loss": 0.0078, "step": 99600 }, { "epoch": 3.079371947827162, "grad_norm": 0.11955488473176956, "learning_rate": 1.8460824009396055e-05, "loss": 0.0081, "step": 99630 }, { "epoch": 3.0802991902083203, "grad_norm": 0.1064491868019104, "learning_rate": 1.8460360388205477e-05, "loss": 0.0087, "step": 99660 }, { "epoch": 3.081226432589479, "grad_norm": 0.14056247472763062, "learning_rate": 1.84598967670149e-05, "loss": 0.0078, "step": 99690 }, { "epoch": 3.0821536749706375, "grad_norm": 0.16910676658153534, "learning_rate": 1.845943314582432e-05, "loss": 0.0078, "step": 99720 }, { "epoch": 3.0830809173517957, "grad_norm": 0.14823657274246216, "learning_rate": 1.845896952463374e-05, "loss": 0.0085, "step": 99750 }, { "epoch": 3.0840081597329543, "grad_norm": 0.0984150618314743, "learning_rate": 1.8458505903443163e-05, "loss": 0.0085, "step": 99780 }, { "epoch": 3.0849354021141124, "grad_norm": 0.11981356143951416, "learning_rate": 1.845804228225258e-05, "loss": 0.0086, "step": 99810 }, { "epoch": 3.085862644495271, "grad_norm": 0.12913021445274353, "learning_rate": 1.8457578661062002e-05, "loss": 0.0078, "step": 99840 }, { "epoch": 3.0867898868764296, "grad_norm": 0.12388936430215836, "learning_rate": 1.8457115039871424e-05, "loss": 0.0078, "step": 99870 }, { "epoch": 3.087717129257588, "grad_norm": 0.1181371659040451, "learning_rate": 1.8456651418680845e-05, "loss": 0.0081, "step": 99900 }, { "epoch": 3.0886443716387464, "grad_norm": 0.15989188849925995, "learning_rate": 1.8456187797490267e-05, "loss": 0.0084, "step": 99930 }, { "epoch": 3.089571614019905, "grad_norm": 0.11976885050535202, "learning_rate": 1.8455724176299685e-05, "loss": 0.008, "step": 99960 }, { "epoch": 3.090498856401063, "grad_norm": 0.10742703080177307, "learning_rate": 1.8455260555109107e-05, "loss": 0.0074, "step": 99990 }, { "epoch": 3.0908079371947825, "eval_f1": 0.9973667865186562, "eval_loss": 0.008742357604205608, "eval_precision": 0.9973647018761722, "eval_recall": 0.997372136296386, "eval_runtime": 4335.7742, "eval_samples_per_second": 272.431, "eval_steps_per_second": 8.514, "step": 100000 }, { "epoch": 3.0914260987822217, "grad_norm": 0.09459880739450455, "learning_rate": 1.8454796933918528e-05, "loss": 0.0084, "step": 100020 }, { "epoch": 3.09235334116338, "grad_norm": 0.1581123024225235, "learning_rate": 1.8454333312727946e-05, "loss": 0.0081, "step": 100050 }, { "epoch": 3.0932805835445385, "grad_norm": 0.12209558486938477, "learning_rate": 1.8453869691537368e-05, "loss": 0.0081, "step": 100080 }, { "epoch": 3.094207825925697, "grad_norm": 0.16225886344909668, "learning_rate": 1.8453406070346793e-05, "loss": 0.0082, "step": 100110 }, { "epoch": 3.0951350683068553, "grad_norm": 0.13976836204528809, "learning_rate": 1.845294244915621e-05, "loss": 0.0084, "step": 100140 }, { "epoch": 3.096062310688014, "grad_norm": 0.08809863030910492, "learning_rate": 1.8452478827965632e-05, "loss": 0.0078, "step": 100170 }, { "epoch": 3.0969895530691725, "grad_norm": 0.13172772526741028, "learning_rate": 1.8452015206775054e-05, "loss": 0.0088, "step": 100200 }, { "epoch": 3.0979167954503306, "grad_norm": 0.15518079698085785, "learning_rate": 1.8451551585584472e-05, "loss": 0.0089, "step": 100230 }, { "epoch": 3.098844037831489, "grad_norm": 0.16698946058750153, "learning_rate": 1.8451087964393893e-05, "loss": 0.0077, "step": 100260 }, { "epoch": 3.0997712802126474, "grad_norm": 0.16835670173168182, "learning_rate": 1.8450624343203315e-05, "loss": 0.0076, "step": 100290 }, { "epoch": 3.100698522593806, "grad_norm": 0.12033651769161224, "learning_rate": 1.8450160722012736e-05, "loss": 0.0078, "step": 100320 }, { "epoch": 3.1016257649749646, "grad_norm": 0.10131445527076721, "learning_rate": 1.8449697100822158e-05, "loss": 0.0078, "step": 100350 }, { "epoch": 3.1025530073561227, "grad_norm": 0.08333709836006165, "learning_rate": 1.8449233479631576e-05, "loss": 0.0086, "step": 100380 }, { "epoch": 3.1034802497372813, "grad_norm": 0.15354426205158234, "learning_rate": 1.8448769858440998e-05, "loss": 0.0081, "step": 100410 }, { "epoch": 3.10440749211844, "grad_norm": 0.15530206263065338, "learning_rate": 1.844830623725042e-05, "loss": 0.0076, "step": 100440 }, { "epoch": 3.105334734499598, "grad_norm": 0.12677592039108276, "learning_rate": 1.844784261605984e-05, "loss": 0.0075, "step": 100470 }, { "epoch": 3.1062619768807567, "grad_norm": 0.21500848233699799, "learning_rate": 1.8447378994869262e-05, "loss": 0.008, "step": 100500 }, { "epoch": 3.1071892192619153, "grad_norm": 0.11700476706027985, "learning_rate": 1.8446915373678684e-05, "loss": 0.0082, "step": 100530 }, { "epoch": 3.1081164616430734, "grad_norm": 0.09208942949771881, "learning_rate": 1.84464517524881e-05, "loss": 0.0079, "step": 100560 }, { "epoch": 3.109043704024232, "grad_norm": 0.10517438501119614, "learning_rate": 1.8445988131297523e-05, "loss": 0.0089, "step": 100590 }, { "epoch": 3.10997094640539, "grad_norm": 0.13290072977542877, "learning_rate": 1.844552451010694e-05, "loss": 0.0076, "step": 100620 }, { "epoch": 3.110898188786549, "grad_norm": 0.1392398178577423, "learning_rate": 1.8445060888916363e-05, "loss": 0.0082, "step": 100650 }, { "epoch": 3.1118254311677074, "grad_norm": 0.13695746660232544, "learning_rate": 1.8444597267725784e-05, "loss": 0.0084, "step": 100680 }, { "epoch": 3.1127526735488655, "grad_norm": 0.14131532609462738, "learning_rate": 1.8444133646535206e-05, "loss": 0.0084, "step": 100710 }, { "epoch": 3.113679915930024, "grad_norm": 0.12648330628871918, "learning_rate": 1.8443670025344627e-05, "loss": 0.0088, "step": 100740 }, { "epoch": 3.1146071583111827, "grad_norm": 0.10085126757621765, "learning_rate": 1.844320640415405e-05, "loss": 0.0081, "step": 100770 }, { "epoch": 3.115534400692341, "grad_norm": 0.10368482023477554, "learning_rate": 1.8442742782963467e-05, "loss": 0.0081, "step": 100800 }, { "epoch": 3.1164616430734995, "grad_norm": 0.10885415226221085, "learning_rate": 1.844227916177289e-05, "loss": 0.0077, "step": 100830 }, { "epoch": 3.1173888854546576, "grad_norm": 0.15345443785190582, "learning_rate": 1.844181554058231e-05, "loss": 0.0084, "step": 100860 }, { "epoch": 3.1183161278358162, "grad_norm": 0.1703343242406845, "learning_rate": 1.844135191939173e-05, "loss": 0.0076, "step": 100890 }, { "epoch": 3.119243370216975, "grad_norm": 0.15433140099048615, "learning_rate": 1.8440888298201153e-05, "loss": 0.0084, "step": 100920 }, { "epoch": 3.120170612598133, "grad_norm": 0.09448756277561188, "learning_rate": 1.844042467701057e-05, "loss": 0.0085, "step": 100950 }, { "epoch": 3.1210978549792916, "grad_norm": 0.1094212457537651, "learning_rate": 1.8439961055819993e-05, "loss": 0.0084, "step": 100980 }, { "epoch": 3.12202509736045, "grad_norm": 0.1245197206735611, "learning_rate": 1.8439497434629414e-05, "loss": 0.0086, "step": 101010 }, { "epoch": 3.1229523397416084, "grad_norm": 0.1612682044506073, "learning_rate": 1.8439033813438832e-05, "loss": 0.0081, "step": 101040 }, { "epoch": 3.123879582122767, "grad_norm": 0.14855548739433289, "learning_rate": 1.8438570192248254e-05, "loss": 0.0085, "step": 101070 }, { "epoch": 3.124806824503925, "grad_norm": 0.14388969540596008, "learning_rate": 1.8438106571057675e-05, "loss": 0.0083, "step": 101100 }, { "epoch": 3.1257340668850837, "grad_norm": 0.20922857522964478, "learning_rate": 1.8437642949867097e-05, "loss": 0.008, "step": 101130 }, { "epoch": 3.1266613092662423, "grad_norm": 0.19238539040088654, "learning_rate": 1.8437179328676518e-05, "loss": 0.0089, "step": 101160 }, { "epoch": 3.1275885516474005, "grad_norm": 0.16897623240947723, "learning_rate": 1.843671570748594e-05, "loss": 0.0081, "step": 101190 }, { "epoch": 3.128515794028559, "grad_norm": 0.11540330946445465, "learning_rate": 1.8436252086295358e-05, "loss": 0.0082, "step": 101220 }, { "epoch": 3.1294430364097177, "grad_norm": 0.09599994122982025, "learning_rate": 1.843578846510478e-05, "loss": 0.0085, "step": 101250 }, { "epoch": 3.130370278790876, "grad_norm": 0.11834778636693954, "learning_rate": 1.84353248439142e-05, "loss": 0.0086, "step": 101280 }, { "epoch": 3.1312975211720344, "grad_norm": 0.1859632432460785, "learning_rate": 1.8434861222723622e-05, "loss": 0.0082, "step": 101310 }, { "epoch": 3.132224763553193, "grad_norm": 0.18008238077163696, "learning_rate": 1.8434397601533044e-05, "loss": 0.0076, "step": 101340 }, { "epoch": 3.133152005934351, "grad_norm": 0.18477989733219147, "learning_rate": 1.8433933980342462e-05, "loss": 0.0087, "step": 101370 }, { "epoch": 3.1340792483155098, "grad_norm": 0.08824071288108826, "learning_rate": 1.8433470359151884e-05, "loss": 0.008, "step": 101400 }, { "epoch": 3.135006490696668, "grad_norm": 0.11691128462553024, "learning_rate": 1.8433006737961305e-05, "loss": 0.0088, "step": 101430 }, { "epoch": 3.1359337330778265, "grad_norm": 0.15848472714424133, "learning_rate": 1.8432543116770723e-05, "loss": 0.0076, "step": 101460 }, { "epoch": 3.136860975458985, "grad_norm": 0.1935313194990158, "learning_rate": 1.8432079495580145e-05, "loss": 0.0095, "step": 101490 }, { "epoch": 3.1377882178401433, "grad_norm": 0.13169287145137787, "learning_rate": 1.843161587438957e-05, "loss": 0.0081, "step": 101520 }, { "epoch": 3.138715460221302, "grad_norm": 0.1117553785443306, "learning_rate": 1.8431152253198988e-05, "loss": 0.0086, "step": 101550 }, { "epoch": 3.1396427026024605, "grad_norm": 0.10505997389554977, "learning_rate": 1.843068863200841e-05, "loss": 0.008, "step": 101580 }, { "epoch": 3.1405699449836186, "grad_norm": 0.181939035654068, "learning_rate": 1.8430225010817827e-05, "loss": 0.008, "step": 101610 }, { "epoch": 3.1414971873647772, "grad_norm": 0.14463743567466736, "learning_rate": 1.842976138962725e-05, "loss": 0.0084, "step": 101640 }, { "epoch": 3.1424244297459354, "grad_norm": 0.1418609321117401, "learning_rate": 1.842929776843667e-05, "loss": 0.0088, "step": 101670 }, { "epoch": 3.143351672127094, "grad_norm": 0.10300297290086746, "learning_rate": 1.8428834147246092e-05, "loss": 0.0083, "step": 101700 }, { "epoch": 3.1442789145082526, "grad_norm": 0.16146598756313324, "learning_rate": 1.8428370526055513e-05, "loss": 0.0086, "step": 101730 }, { "epoch": 3.1452061568894107, "grad_norm": 0.09219073504209518, "learning_rate": 1.8427906904864935e-05, "loss": 0.009, "step": 101760 }, { "epoch": 3.1461333992705693, "grad_norm": 0.1192464604973793, "learning_rate": 1.8427443283674353e-05, "loss": 0.0081, "step": 101790 }, { "epoch": 3.147060641651728, "grad_norm": 0.11573787033557892, "learning_rate": 1.8426979662483774e-05, "loss": 0.0075, "step": 101820 }, { "epoch": 3.147987884032886, "grad_norm": 0.14545293152332306, "learning_rate": 1.8426516041293196e-05, "loss": 0.0075, "step": 101850 }, { "epoch": 3.1489151264140447, "grad_norm": 0.11414206773042679, "learning_rate": 1.8426052420102617e-05, "loss": 0.0077, "step": 101880 }, { "epoch": 3.149842368795203, "grad_norm": 0.13551203906536102, "learning_rate": 1.842558879891204e-05, "loss": 0.0079, "step": 101910 }, { "epoch": 3.1507696111763615, "grad_norm": 0.1278255581855774, "learning_rate": 1.8425125177721457e-05, "loss": 0.0085, "step": 101940 }, { "epoch": 3.15169685355752, "grad_norm": 0.13464580476284027, "learning_rate": 1.842466155653088e-05, "loss": 0.0087, "step": 101970 }, { "epoch": 3.152624095938678, "grad_norm": 0.14404979348182678, "learning_rate": 1.84241979353403e-05, "loss": 0.0082, "step": 102000 }, { "epoch": 3.153551338319837, "grad_norm": 0.1001371517777443, "learning_rate": 1.8423734314149718e-05, "loss": 0.008, "step": 102030 }, { "epoch": 3.1544785807009954, "grad_norm": 0.13096320629119873, "learning_rate": 1.842327069295914e-05, "loss": 0.0077, "step": 102060 }, { "epoch": 3.1554058230821536, "grad_norm": 0.1680983603000641, "learning_rate": 1.842280707176856e-05, "loss": 0.0087, "step": 102090 }, { "epoch": 3.156333065463312, "grad_norm": 0.12471675127744675, "learning_rate": 1.8422343450577983e-05, "loss": 0.0085, "step": 102120 }, { "epoch": 3.1572603078444708, "grad_norm": 0.09109923243522644, "learning_rate": 1.8421879829387404e-05, "loss": 0.008, "step": 102150 }, { "epoch": 3.158187550225629, "grad_norm": 0.16463273763656616, "learning_rate": 1.8421416208196826e-05, "loss": 0.0088, "step": 102180 }, { "epoch": 3.1591147926067875, "grad_norm": 0.10834204405546188, "learning_rate": 1.8420952587006244e-05, "loss": 0.0079, "step": 102210 }, { "epoch": 3.1600420349879457, "grad_norm": 0.13540267944335938, "learning_rate": 1.8420488965815665e-05, "loss": 0.008, "step": 102240 }, { "epoch": 3.1609692773691043, "grad_norm": 0.14770832657814026, "learning_rate": 1.8420025344625087e-05, "loss": 0.0081, "step": 102270 }, { "epoch": 3.161896519750263, "grad_norm": 0.11157308518886566, "learning_rate": 1.841956172343451e-05, "loss": 0.0081, "step": 102300 }, { "epoch": 3.162823762131421, "grad_norm": 0.12389900535345078, "learning_rate": 1.841909810224393e-05, "loss": 0.0088, "step": 102330 }, { "epoch": 3.1637510045125796, "grad_norm": 0.23711714148521423, "learning_rate": 1.8418634481053348e-05, "loss": 0.0084, "step": 102360 }, { "epoch": 3.1646782468937382, "grad_norm": 0.15308032929897308, "learning_rate": 1.841817085986277e-05, "loss": 0.0082, "step": 102390 }, { "epoch": 3.1656054892748964, "grad_norm": 0.1374216079711914, "learning_rate": 1.841770723867219e-05, "loss": 0.0088, "step": 102420 }, { "epoch": 3.166532731656055, "grad_norm": 0.16146202385425568, "learning_rate": 1.841724361748161e-05, "loss": 0.0086, "step": 102450 }, { "epoch": 3.167459974037213, "grad_norm": 0.1271718591451645, "learning_rate": 1.841677999629103e-05, "loss": 0.0085, "step": 102480 }, { "epoch": 3.1683872164183717, "grad_norm": 0.12487583607435226, "learning_rate": 1.8416316375100452e-05, "loss": 0.0086, "step": 102510 }, { "epoch": 3.1693144587995303, "grad_norm": 0.10312239080667496, "learning_rate": 1.8415852753909874e-05, "loss": 0.0087, "step": 102540 }, { "epoch": 3.1702417011806885, "grad_norm": 0.1651381105184555, "learning_rate": 1.8415389132719295e-05, "loss": 0.0078, "step": 102570 }, { "epoch": 3.171168943561847, "grad_norm": 0.15549200773239136, "learning_rate": 1.8414925511528717e-05, "loss": 0.0075, "step": 102600 }, { "epoch": 3.1720961859430057, "grad_norm": 0.15114463865756989, "learning_rate": 1.8414461890338135e-05, "loss": 0.0078, "step": 102630 }, { "epoch": 3.173023428324164, "grad_norm": 0.11362305283546448, "learning_rate": 1.8413998269147556e-05, "loss": 0.0079, "step": 102660 }, { "epoch": 3.1739506707053224, "grad_norm": 0.10225860029459, "learning_rate": 1.8413534647956978e-05, "loss": 0.0091, "step": 102690 }, { "epoch": 3.1748779130864806, "grad_norm": 0.12213977426290512, "learning_rate": 1.8413086480806082e-05, "loss": 0.0085, "step": 102720 }, { "epoch": 3.175805155467639, "grad_norm": 0.1462858021259308, "learning_rate": 1.8412622859615504e-05, "loss": 0.0088, "step": 102750 }, { "epoch": 3.176732397848798, "grad_norm": 0.10618723928928375, "learning_rate": 1.8412159238424925e-05, "loss": 0.0085, "step": 102780 }, { "epoch": 3.177659640229956, "grad_norm": 0.08744430541992188, "learning_rate": 1.8411695617234347e-05, "loss": 0.0087, "step": 102810 }, { "epoch": 3.1785868826111146, "grad_norm": 0.12282086908817291, "learning_rate": 1.841123199604377e-05, "loss": 0.0081, "step": 102840 }, { "epoch": 3.179514124992273, "grad_norm": 0.1371600478887558, "learning_rate": 1.8410768374853186e-05, "loss": 0.008, "step": 102870 }, { "epoch": 3.1804413673734313, "grad_norm": 0.1181427463889122, "learning_rate": 1.8410304753662608e-05, "loss": 0.0083, "step": 102900 }, { "epoch": 3.18136860975459, "grad_norm": 0.11463901400566101, "learning_rate": 1.840984113247203e-05, "loss": 0.008, "step": 102930 }, { "epoch": 3.1822958521357485, "grad_norm": 0.11790738999843597, "learning_rate": 1.840937751128145e-05, "loss": 0.0081, "step": 102960 }, { "epoch": 3.1832230945169067, "grad_norm": 0.1354987621307373, "learning_rate": 1.8408913890090872e-05, "loss": 0.0077, "step": 102990 }, { "epoch": 3.1841503368980653, "grad_norm": 0.07785102725028992, "learning_rate": 1.8408450268900294e-05, "loss": 0.009, "step": 103020 }, { "epoch": 3.1850775792792234, "grad_norm": 0.1317695826292038, "learning_rate": 1.8407986647709712e-05, "loss": 0.0087, "step": 103050 }, { "epoch": 3.186004821660382, "grad_norm": 0.1406124085187912, "learning_rate": 1.8407523026519134e-05, "loss": 0.0087, "step": 103080 }, { "epoch": 3.1869320640415406, "grad_norm": 0.17949573695659637, "learning_rate": 1.8407059405328552e-05, "loss": 0.008, "step": 103110 }, { "epoch": 3.1878593064226988, "grad_norm": 0.12080986797809601, "learning_rate": 1.8406595784137977e-05, "loss": 0.0077, "step": 103140 }, { "epoch": 3.1887865488038574, "grad_norm": 0.09506355226039886, "learning_rate": 1.8406132162947398e-05, "loss": 0.0084, "step": 103170 }, { "epoch": 3.189713791185016, "grad_norm": 0.1272953599691391, "learning_rate": 1.8405668541756816e-05, "loss": 0.0081, "step": 103200 }, { "epoch": 3.190641033566174, "grad_norm": 0.11863699555397034, "learning_rate": 1.8405204920566238e-05, "loss": 0.0089, "step": 103230 }, { "epoch": 3.1915682759473327, "grad_norm": 0.08478176593780518, "learning_rate": 1.840474129937566e-05, "loss": 0.0083, "step": 103260 }, { "epoch": 3.192495518328491, "grad_norm": 0.15749363601207733, "learning_rate": 1.8404277678185077e-05, "loss": 0.0083, "step": 103290 }, { "epoch": 3.1934227607096495, "grad_norm": 0.08859844505786896, "learning_rate": 1.84038140569945e-05, "loss": 0.0078, "step": 103320 }, { "epoch": 3.194350003090808, "grad_norm": 0.13509243726730347, "learning_rate": 1.840335043580392e-05, "loss": 0.0087, "step": 103350 }, { "epoch": 3.1952772454719662, "grad_norm": 0.12172698974609375, "learning_rate": 1.8402886814613342e-05, "loss": 0.0084, "step": 103380 }, { "epoch": 3.196204487853125, "grad_norm": 0.09224339574575424, "learning_rate": 1.8402423193422763e-05, "loss": 0.0088, "step": 103410 }, { "epoch": 3.1971317302342834, "grad_norm": 0.09689203649759293, "learning_rate": 1.840195957223218e-05, "loss": 0.0076, "step": 103440 }, { "epoch": 3.1980589726154416, "grad_norm": 0.08367349952459335, "learning_rate": 1.8401495951041603e-05, "loss": 0.0081, "step": 103470 }, { "epoch": 3.1989862149966, "grad_norm": 0.12915758788585663, "learning_rate": 1.8401032329851025e-05, "loss": 0.0083, "step": 103500 }, { "epoch": 3.1999134573777583, "grad_norm": 0.13832664489746094, "learning_rate": 1.8400568708660446e-05, "loss": 0.0083, "step": 103530 }, { "epoch": 3.200840699758917, "grad_norm": 0.16538234055042267, "learning_rate": 1.8400105087469868e-05, "loss": 0.0082, "step": 103560 }, { "epoch": 3.2017679421400755, "grad_norm": 0.09535195678472519, "learning_rate": 1.839964146627929e-05, "loss": 0.008, "step": 103590 }, { "epoch": 3.2026951845212337, "grad_norm": 0.20014053583145142, "learning_rate": 1.8399177845088707e-05, "loss": 0.0082, "step": 103620 }, { "epoch": 3.2036224269023923, "grad_norm": 0.1363668590784073, "learning_rate": 1.839871422389813e-05, "loss": 0.0079, "step": 103650 }, { "epoch": 3.204549669283551, "grad_norm": 0.16534487903118134, "learning_rate": 1.839825060270755e-05, "loss": 0.0082, "step": 103680 }, { "epoch": 3.205476911664709, "grad_norm": 0.18177981674671173, "learning_rate": 1.8397786981516968e-05, "loss": 0.0082, "step": 103710 }, { "epoch": 3.2064041540458676, "grad_norm": 0.16372136771678925, "learning_rate": 1.839732336032639e-05, "loss": 0.0083, "step": 103740 }, { "epoch": 3.2073313964270262, "grad_norm": 0.16839900612831116, "learning_rate": 1.839685973913581e-05, "loss": 0.0092, "step": 103770 }, { "epoch": 3.2082586388081844, "grad_norm": 0.1703179031610489, "learning_rate": 1.8396396117945233e-05, "loss": 0.0081, "step": 103800 }, { "epoch": 3.209185881189343, "grad_norm": 0.1308167576789856, "learning_rate": 1.8395932496754654e-05, "loss": 0.0084, "step": 103830 }, { "epoch": 3.210113123570501, "grad_norm": 0.13242705166339874, "learning_rate": 1.8395468875564072e-05, "loss": 0.008, "step": 103860 }, { "epoch": 3.2110403659516598, "grad_norm": 0.0884060487151146, "learning_rate": 1.8395005254373494e-05, "loss": 0.0074, "step": 103890 }, { "epoch": 3.2119676083328184, "grad_norm": 0.1890478879213333, "learning_rate": 1.8394541633182915e-05, "loss": 0.0079, "step": 103920 }, { "epoch": 3.2128948507139765, "grad_norm": 0.14672677218914032, "learning_rate": 1.8394078011992337e-05, "loss": 0.0087, "step": 103950 }, { "epoch": 3.213822093095135, "grad_norm": 0.10897182673215866, "learning_rate": 1.839361439080176e-05, "loss": 0.0082, "step": 103980 }, { "epoch": 3.2147493354762933, "grad_norm": 0.11979885399341583, "learning_rate": 1.839315076961118e-05, "loss": 0.0079, "step": 104010 }, { "epoch": 3.215676577857452, "grad_norm": 0.1253642737865448, "learning_rate": 1.8392687148420598e-05, "loss": 0.0081, "step": 104040 }, { "epoch": 3.2166038202386105, "grad_norm": 0.14487536251544952, "learning_rate": 1.839222352723002e-05, "loss": 0.0088, "step": 104070 }, { "epoch": 3.2175310626197686, "grad_norm": 0.14612720906734467, "learning_rate": 1.8391759906039438e-05, "loss": 0.0074, "step": 104100 }, { "epoch": 3.218458305000927, "grad_norm": 0.10298236459493637, "learning_rate": 1.839129628484886e-05, "loss": 0.0078, "step": 104130 }, { "epoch": 3.219385547382086, "grad_norm": 0.1082545593380928, "learning_rate": 1.839083266365828e-05, "loss": 0.0079, "step": 104160 }, { "epoch": 3.220312789763244, "grad_norm": 0.1177036464214325, "learning_rate": 1.8390369042467702e-05, "loss": 0.0083, "step": 104190 }, { "epoch": 3.2212400321444026, "grad_norm": 0.11700400710105896, "learning_rate": 1.8389905421277124e-05, "loss": 0.0077, "step": 104220 }, { "epoch": 3.222167274525561, "grad_norm": 0.1396879255771637, "learning_rate": 1.8389441800086545e-05, "loss": 0.008, "step": 104250 }, { "epoch": 3.2230945169067193, "grad_norm": 0.13465215265750885, "learning_rate": 1.8388978178895963e-05, "loss": 0.0085, "step": 104280 }, { "epoch": 3.224021759287878, "grad_norm": 0.13598762452602386, "learning_rate": 1.8388514557705385e-05, "loss": 0.0083, "step": 104310 }, { "epoch": 3.224949001669036, "grad_norm": 0.12540887296199799, "learning_rate": 1.8388050936514806e-05, "loss": 0.0083, "step": 104340 }, { "epoch": 3.2258762440501947, "grad_norm": 0.12254876643419266, "learning_rate": 1.8387587315324228e-05, "loss": 0.0083, "step": 104370 }, { "epoch": 3.2268034864313533, "grad_norm": 0.1547633409500122, "learning_rate": 1.838712369413365e-05, "loss": 0.0079, "step": 104400 }, { "epoch": 3.2277307288125114, "grad_norm": 0.15870054066181183, "learning_rate": 1.8386660072943068e-05, "loss": 0.0082, "step": 104430 }, { "epoch": 3.22865797119367, "grad_norm": 0.1653970628976822, "learning_rate": 1.838619645175249e-05, "loss": 0.0081, "step": 104460 }, { "epoch": 3.2295852135748286, "grad_norm": 0.14198356866836548, "learning_rate": 1.838573283056191e-05, "loss": 0.0086, "step": 104490 }, { "epoch": 3.230512455955987, "grad_norm": 0.0987839326262474, "learning_rate": 1.838526920937133e-05, "loss": 0.0084, "step": 104520 }, { "epoch": 3.2314396983371454, "grad_norm": 0.1515866070985794, "learning_rate": 1.8384805588180754e-05, "loss": 0.0084, "step": 104550 }, { "epoch": 3.232366940718304, "grad_norm": 0.1402536928653717, "learning_rate": 1.8384341966990175e-05, "loss": 0.0081, "step": 104580 }, { "epoch": 3.233294183099462, "grad_norm": 0.12023887038230896, "learning_rate": 1.8383878345799593e-05, "loss": 0.0086, "step": 104610 }, { "epoch": 3.2342214254806207, "grad_norm": 0.11479323357343674, "learning_rate": 1.8383414724609015e-05, "loss": 0.008, "step": 104640 }, { "epoch": 3.235148667861779, "grad_norm": 0.1375497281551361, "learning_rate": 1.8382951103418436e-05, "loss": 0.0071, "step": 104670 }, { "epoch": 3.2360759102429375, "grad_norm": 0.14262790977954865, "learning_rate": 1.8382487482227854e-05, "loss": 0.0079, "step": 104700 }, { "epoch": 3.237003152624096, "grad_norm": 0.09746945649385452, "learning_rate": 1.8382023861037276e-05, "loss": 0.0087, "step": 104730 }, { "epoch": 3.2379303950052543, "grad_norm": 0.15864889323711395, "learning_rate": 1.8381560239846697e-05, "loss": 0.0081, "step": 104760 }, { "epoch": 3.238857637386413, "grad_norm": 0.1485477089881897, "learning_rate": 1.838109661865612e-05, "loss": 0.0081, "step": 104790 }, { "epoch": 3.239784879767571, "grad_norm": 0.31800374388694763, "learning_rate": 1.838063299746554e-05, "loss": 0.0079, "step": 104820 }, { "epoch": 3.2407121221487296, "grad_norm": 0.11669903993606567, "learning_rate": 1.838016937627496e-05, "loss": 0.0088, "step": 104850 }, { "epoch": 3.241639364529888, "grad_norm": 0.12052738666534424, "learning_rate": 1.837970575508438e-05, "loss": 0.0086, "step": 104880 }, { "epoch": 3.2425666069110464, "grad_norm": 0.10850119590759277, "learning_rate": 1.8379257587933488e-05, "loss": 0.0079, "step": 104910 }, { "epoch": 3.243493849292205, "grad_norm": 0.14872652292251587, "learning_rate": 1.8378793966742906e-05, "loss": 0.0084, "step": 104940 }, { "epoch": 3.2444210916733636, "grad_norm": 0.09695352613925934, "learning_rate": 1.8378330345552327e-05, "loss": 0.0082, "step": 104970 }, { "epoch": 3.2453483340545217, "grad_norm": 0.14877551794052124, "learning_rate": 1.837786672436175e-05, "loss": 0.0085, "step": 105000 }, { "epoch": 3.2462755764356803, "grad_norm": 0.1205933690071106, "learning_rate": 1.837740310317117e-05, "loss": 0.0083, "step": 105030 }, { "epoch": 3.247202818816839, "grad_norm": 0.1810852736234665, "learning_rate": 1.8376939481980592e-05, "loss": 0.0083, "step": 105060 }, { "epoch": 3.248130061197997, "grad_norm": 0.1331174522638321, "learning_rate": 1.8376475860790013e-05, "loss": 0.008, "step": 105090 }, { "epoch": 3.2490573035791557, "grad_norm": 0.10872344672679901, "learning_rate": 1.837601223959943e-05, "loss": 0.0081, "step": 105120 }, { "epoch": 3.249984545960314, "grad_norm": 0.17149727046489716, "learning_rate": 1.8375548618408853e-05, "loss": 0.0076, "step": 105150 }, { "epoch": 3.2509117883414724, "grad_norm": 0.1530533879995346, "learning_rate": 1.8375084997218275e-05, "loss": 0.0074, "step": 105180 }, { "epoch": 3.251839030722631, "grad_norm": 0.1284940093755722, "learning_rate": 1.8374621376027696e-05, "loss": 0.0077, "step": 105210 }, { "epoch": 3.252766273103789, "grad_norm": 0.11872290819883347, "learning_rate": 1.8374157754837118e-05, "loss": 0.0082, "step": 105240 }, { "epoch": 3.253693515484948, "grad_norm": 0.14583267271518707, "learning_rate": 1.8373694133646536e-05, "loss": 0.0077, "step": 105270 }, { "epoch": 3.2546207578661064, "grad_norm": 0.0962076485157013, "learning_rate": 1.8373230512455957e-05, "loss": 0.0081, "step": 105300 }, { "epoch": 3.2555480002472645, "grad_norm": 0.12222538888454437, "learning_rate": 1.837276689126538e-05, "loss": 0.008, "step": 105330 }, { "epoch": 3.256475242628423, "grad_norm": 0.09949534386396408, "learning_rate": 1.8372303270074797e-05, "loss": 0.0078, "step": 105360 }, { "epoch": 3.2574024850095817, "grad_norm": 0.1841236799955368, "learning_rate": 1.837183964888422e-05, "loss": 0.0086, "step": 105390 }, { "epoch": 3.25832972739074, "grad_norm": 0.12217816710472107, "learning_rate": 1.837137602769364e-05, "loss": 0.0083, "step": 105420 }, { "epoch": 3.2592569697718985, "grad_norm": 0.1348239779472351, "learning_rate": 1.837091240650306e-05, "loss": 0.0082, "step": 105450 }, { "epoch": 3.2601842121530566, "grad_norm": 0.10958919674158096, "learning_rate": 1.8370448785312483e-05, "loss": 0.0084, "step": 105480 }, { "epoch": 3.2611114545342152, "grad_norm": 0.12335113435983658, "learning_rate": 1.8369985164121904e-05, "loss": 0.0079, "step": 105510 }, { "epoch": 3.262038696915374, "grad_norm": 0.1130380928516388, "learning_rate": 1.8369521542931323e-05, "loss": 0.0089, "step": 105540 }, { "epoch": 3.262965939296532, "grad_norm": 0.10086685419082642, "learning_rate": 1.8369057921740744e-05, "loss": 0.0088, "step": 105570 }, { "epoch": 3.2638931816776906, "grad_norm": 0.16625024378299713, "learning_rate": 1.8368594300550166e-05, "loss": 0.008, "step": 105600 }, { "epoch": 3.2648204240588488, "grad_norm": 0.17265670001506805, "learning_rate": 1.8368130679359587e-05, "loss": 0.0087, "step": 105630 }, { "epoch": 3.2657476664400074, "grad_norm": 0.0961090549826622, "learning_rate": 1.836766705816901e-05, "loss": 0.008, "step": 105660 }, { "epoch": 3.266674908821166, "grad_norm": 0.11458256095647812, "learning_rate": 1.8367203436978427e-05, "loss": 0.0082, "step": 105690 }, { "epoch": 3.267602151202324, "grad_norm": 0.17855823040008545, "learning_rate": 1.8366739815787848e-05, "loss": 0.0081, "step": 105720 }, { "epoch": 3.2685293935834827, "grad_norm": 0.0939449816942215, "learning_rate": 1.836627619459727e-05, "loss": 0.0085, "step": 105750 }, { "epoch": 3.2694566359646413, "grad_norm": 0.16969658434391022, "learning_rate": 1.8365812573406688e-05, "loss": 0.0079, "step": 105780 }, { "epoch": 3.2703838783457995, "grad_norm": 0.14658470451831818, "learning_rate": 1.8365348952216113e-05, "loss": 0.0079, "step": 105810 }, { "epoch": 3.271311120726958, "grad_norm": 0.11847195029258728, "learning_rate": 1.8364885331025534e-05, "loss": 0.0083, "step": 105840 }, { "epoch": 3.2722383631081167, "grad_norm": 0.14603792130947113, "learning_rate": 1.8364421709834952e-05, "loss": 0.0079, "step": 105870 }, { "epoch": 3.273165605489275, "grad_norm": 0.1297709345817566, "learning_rate": 1.8363958088644374e-05, "loss": 0.0083, "step": 105900 }, { "epoch": 3.2740928478704334, "grad_norm": 0.1315571665763855, "learning_rate": 1.8363494467453792e-05, "loss": 0.0083, "step": 105930 }, { "epoch": 3.2750200902515916, "grad_norm": 0.18559791147708893, "learning_rate": 1.8363030846263213e-05, "loss": 0.0079, "step": 105960 }, { "epoch": 3.27594733263275, "grad_norm": 0.14307399094104767, "learning_rate": 1.8362567225072635e-05, "loss": 0.0079, "step": 105990 }, { "epoch": 3.2768745750139088, "grad_norm": 0.12617476284503937, "learning_rate": 1.8362103603882056e-05, "loss": 0.008, "step": 106020 }, { "epoch": 3.277801817395067, "grad_norm": 0.1268637627363205, "learning_rate": 1.8361639982691478e-05, "loss": 0.0081, "step": 106050 }, { "epoch": 3.2787290597762255, "grad_norm": 0.1024925634264946, "learning_rate": 1.83611763615009e-05, "loss": 0.0085, "step": 106080 }, { "epoch": 3.279656302157384, "grad_norm": 0.12680889666080475, "learning_rate": 1.8360712740310318e-05, "loss": 0.008, "step": 106110 }, { "epoch": 3.2805835445385423, "grad_norm": 0.08871777355670929, "learning_rate": 1.836024911911974e-05, "loss": 0.0074, "step": 106140 }, { "epoch": 3.281510786919701, "grad_norm": 0.13568943738937378, "learning_rate": 1.835978549792916e-05, "loss": 0.0084, "step": 106170 }, { "epoch": 3.2824380293008595, "grad_norm": 0.11448521167039871, "learning_rate": 1.8359337330778265e-05, "loss": 0.0079, "step": 106200 }, { "epoch": 3.2833652716820176, "grad_norm": 0.12124025821685791, "learning_rate": 1.8358873709587687e-05, "loss": 0.0079, "step": 106230 }, { "epoch": 3.2842925140631762, "grad_norm": 0.1442449390888214, "learning_rate": 1.8358410088397108e-05, "loss": 0.0086, "step": 106260 }, { "epoch": 3.2852197564443344, "grad_norm": 0.09920551627874374, "learning_rate": 1.835794646720653e-05, "loss": 0.0085, "step": 106290 }, { "epoch": 3.286146998825493, "grad_norm": 0.16558559238910675, "learning_rate": 1.835748284601595e-05, "loss": 0.0089, "step": 106320 }, { "epoch": 3.2870742412066516, "grad_norm": 0.09249119460582733, "learning_rate": 1.835701922482537e-05, "loss": 0.008, "step": 106350 }, { "epoch": 3.2880014835878097, "grad_norm": 0.10121524333953857, "learning_rate": 1.835655560363479e-05, "loss": 0.0085, "step": 106380 }, { "epoch": 3.2889287259689683, "grad_norm": 0.15737830102443695, "learning_rate": 1.8356091982444212e-05, "loss": 0.0082, "step": 106410 }, { "epoch": 3.2898559683501265, "grad_norm": 0.13682162761688232, "learning_rate": 1.8355628361253634e-05, "loss": 0.0073, "step": 106440 }, { "epoch": 3.290783210731285, "grad_norm": 0.13867175579071045, "learning_rate": 1.8355164740063055e-05, "loss": 0.0082, "step": 106470 }, { "epoch": 3.2917104531124437, "grad_norm": 0.10858272016048431, "learning_rate": 1.8354701118872477e-05, "loss": 0.0075, "step": 106500 }, { "epoch": 3.292637695493602, "grad_norm": 0.13812801241874695, "learning_rate": 1.8354237497681895e-05, "loss": 0.0078, "step": 106530 }, { "epoch": 3.2935649378747605, "grad_norm": 0.11812686175107956, "learning_rate": 1.8353773876491316e-05, "loss": 0.0084, "step": 106560 }, { "epoch": 3.294492180255919, "grad_norm": 0.1930733174085617, "learning_rate": 1.8353310255300738e-05, "loss": 0.0077, "step": 106590 }, { "epoch": 3.295419422637077, "grad_norm": 0.10954590141773224, "learning_rate": 1.8352846634110156e-05, "loss": 0.0089, "step": 106620 }, { "epoch": 3.296346665018236, "grad_norm": 0.12478135526180267, "learning_rate": 1.8352383012919578e-05, "loss": 0.0082, "step": 106650 }, { "epoch": 3.2972739073993944, "grad_norm": 0.08509889990091324, "learning_rate": 1.8351919391729e-05, "loss": 0.0081, "step": 106680 }, { "epoch": 3.2982011497805526, "grad_norm": 0.11412026733160019, "learning_rate": 1.835145577053842e-05, "loss": 0.0083, "step": 106710 }, { "epoch": 3.299128392161711, "grad_norm": 0.14728973805904388, "learning_rate": 1.8350992149347842e-05, "loss": 0.009, "step": 106740 }, { "epoch": 3.3000556345428693, "grad_norm": 0.1353444755077362, "learning_rate": 1.835052852815726e-05, "loss": 0.008, "step": 106770 }, { "epoch": 3.300982876924028, "grad_norm": 0.1353350579738617, "learning_rate": 1.835006490696668e-05, "loss": 0.0074, "step": 106800 }, { "epoch": 3.3019101193051865, "grad_norm": 0.11685993522405624, "learning_rate": 1.8349601285776103e-05, "loss": 0.0079, "step": 106830 }, { "epoch": 3.3028373616863447, "grad_norm": 0.12873545289039612, "learning_rate": 1.8349137664585525e-05, "loss": 0.0088, "step": 106860 }, { "epoch": 3.3037646040675033, "grad_norm": 0.11249644309282303, "learning_rate": 1.8348674043394946e-05, "loss": 0.0088, "step": 106890 }, { "epoch": 3.304691846448662, "grad_norm": 0.15390795469284058, "learning_rate": 1.8348210422204368e-05, "loss": 0.009, "step": 106920 }, { "epoch": 3.30561908882982, "grad_norm": 0.12133028358221054, "learning_rate": 1.8347746801013786e-05, "loss": 0.0084, "step": 106950 }, { "epoch": 3.3065463312109786, "grad_norm": 0.11764845252037048, "learning_rate": 1.8347283179823207e-05, "loss": 0.0084, "step": 106980 }, { "epoch": 3.3074735735921372, "grad_norm": 0.1454475373029709, "learning_rate": 1.8346819558632625e-05, "loss": 0.0078, "step": 107010 }, { "epoch": 3.3084008159732954, "grad_norm": 0.15150998532772064, "learning_rate": 1.8346355937442047e-05, "loss": 0.0083, "step": 107040 }, { "epoch": 3.309328058354454, "grad_norm": 0.14129102230072021, "learning_rate": 1.834589231625147e-05, "loss": 0.0092, "step": 107070 }, { "epoch": 3.310255300735612, "grad_norm": 0.13693569600582123, "learning_rate": 1.834542869506089e-05, "loss": 0.0081, "step": 107100 }, { "epoch": 3.3111825431167707, "grad_norm": 0.12598952651023865, "learning_rate": 1.834496507387031e-05, "loss": 0.0075, "step": 107130 }, { "epoch": 3.3121097854979293, "grad_norm": 0.14600084722042084, "learning_rate": 1.8344501452679733e-05, "loss": 0.0083, "step": 107160 }, { "epoch": 3.3130370278790875, "grad_norm": 0.14024458825588226, "learning_rate": 1.834403783148915e-05, "loss": 0.0092, "step": 107190 }, { "epoch": 3.313964270260246, "grad_norm": 0.09908128529787064, "learning_rate": 1.8343574210298573e-05, "loss": 0.0079, "step": 107220 }, { "epoch": 3.3148915126414042, "grad_norm": 0.19208325445652008, "learning_rate": 1.8343110589107994e-05, "loss": 0.008, "step": 107250 }, { "epoch": 3.315818755022563, "grad_norm": 0.14018385112285614, "learning_rate": 1.8342646967917416e-05, "loss": 0.0086, "step": 107280 }, { "epoch": 3.3167459974037214, "grad_norm": 0.13017354905605316, "learning_rate": 1.8342183346726837e-05, "loss": 0.0087, "step": 107310 }, { "epoch": 3.3176732397848796, "grad_norm": 0.12047970294952393, "learning_rate": 1.8341719725536255e-05, "loss": 0.0084, "step": 107340 }, { "epoch": 3.318600482166038, "grad_norm": 0.11978892236948013, "learning_rate": 1.8341256104345677e-05, "loss": 0.0085, "step": 107370 }, { "epoch": 3.319527724547197, "grad_norm": 0.1419883817434311, "learning_rate": 1.8340792483155098e-05, "loss": 0.009, "step": 107400 }, { "epoch": 3.320454966928355, "grad_norm": 0.1518547534942627, "learning_rate": 1.834032886196452e-05, "loss": 0.0075, "step": 107430 }, { "epoch": 3.3213822093095136, "grad_norm": 0.10892272740602493, "learning_rate": 1.833986524077394e-05, "loss": 0.0078, "step": 107460 }, { "epoch": 3.322309451690672, "grad_norm": 0.31344887614250183, "learning_rate": 1.8339401619583363e-05, "loss": 0.0082, "step": 107490 }, { "epoch": 3.3232366940718303, "grad_norm": 0.18206796050071716, "learning_rate": 1.833893799839278e-05, "loss": 0.0084, "step": 107520 }, { "epoch": 3.324163936452989, "grad_norm": 0.1491541564464569, "learning_rate": 1.8338474377202202e-05, "loss": 0.008, "step": 107550 }, { "epoch": 3.325091178834147, "grad_norm": 0.09389735758304596, "learning_rate": 1.8338010756011624e-05, "loss": 0.0082, "step": 107580 }, { "epoch": 3.3260184212153057, "grad_norm": 0.08751418441534042, "learning_rate": 1.8337547134821042e-05, "loss": 0.0082, "step": 107610 }, { "epoch": 3.3269456635964643, "grad_norm": 0.10854653269052505, "learning_rate": 1.8337083513630464e-05, "loss": 0.0081, "step": 107640 }, { "epoch": 3.3278729059776224, "grad_norm": 0.09140148013830185, "learning_rate": 1.8336619892439885e-05, "loss": 0.0082, "step": 107670 }, { "epoch": 3.328800148358781, "grad_norm": 0.11593981087207794, "learning_rate": 1.8336156271249307e-05, "loss": 0.0085, "step": 107700 }, { "epoch": 3.329727390739939, "grad_norm": 0.1462603062391281, "learning_rate": 1.8335692650058728e-05, "loss": 0.0084, "step": 107730 }, { "epoch": 3.3306546331210978, "grad_norm": 0.12523356080055237, "learning_rate": 1.8335229028868146e-05, "loss": 0.0084, "step": 107760 }, { "epoch": 3.3315818755022564, "grad_norm": 0.17146196961402893, "learning_rate": 1.8334765407677568e-05, "loss": 0.0077, "step": 107790 }, { "epoch": 3.332509117883415, "grad_norm": 0.09303944557905197, "learning_rate": 1.833430178648699e-05, "loss": 0.0084, "step": 107820 }, { "epoch": 3.333436360264573, "grad_norm": 0.13172508776187897, "learning_rate": 1.833383816529641e-05, "loss": 0.0078, "step": 107850 }, { "epoch": 3.3343636026457317, "grad_norm": 0.11711015552282333, "learning_rate": 1.8333374544105832e-05, "loss": 0.0087, "step": 107880 }, { "epoch": 3.33529084502689, "grad_norm": 0.13577063381671906, "learning_rate": 1.8332910922915254e-05, "loss": 0.0077, "step": 107910 }, { "epoch": 3.3362180874080485, "grad_norm": 0.3088992238044739, "learning_rate": 1.8332447301724672e-05, "loss": 0.0087, "step": 107940 }, { "epoch": 3.337145329789207, "grad_norm": 0.08759712427854538, "learning_rate": 1.8331983680534093e-05, "loss": 0.0082, "step": 107970 }, { "epoch": 3.3380725721703652, "grad_norm": 0.09502461552619934, "learning_rate": 1.8331520059343515e-05, "loss": 0.0075, "step": 108000 }, { "epoch": 3.338999814551524, "grad_norm": 0.12537315487861633, "learning_rate": 1.8331056438152933e-05, "loss": 0.0074, "step": 108030 }, { "epoch": 3.339927056932682, "grad_norm": 0.08894362300634384, "learning_rate": 1.8330592816962354e-05, "loss": 0.008, "step": 108060 }, { "epoch": 3.3408542993138406, "grad_norm": 0.09921267628669739, "learning_rate": 1.8330129195771776e-05, "loss": 0.009, "step": 108090 }, { "epoch": 3.341781541694999, "grad_norm": 0.12993919849395752, "learning_rate": 1.8329665574581197e-05, "loss": 0.0082, "step": 108120 }, { "epoch": 3.3427087840761573, "grad_norm": 0.14503920078277588, "learning_rate": 1.832920195339062e-05, "loss": 0.0073, "step": 108150 }, { "epoch": 3.343636026457316, "grad_norm": 0.09432554990053177, "learning_rate": 1.8328738332200037e-05, "loss": 0.0078, "step": 108180 }, { "epoch": 3.3445632688384745, "grad_norm": 0.12086700648069382, "learning_rate": 1.832827471100946e-05, "loss": 0.0071, "step": 108210 }, { "epoch": 3.3454905112196327, "grad_norm": 0.1780165582895279, "learning_rate": 1.832781108981888e-05, "loss": 0.0081, "step": 108240 }, { "epoch": 3.3464177536007913, "grad_norm": 0.13841553032398224, "learning_rate": 1.83273474686283e-05, "loss": 0.0081, "step": 108270 }, { "epoch": 3.34734499598195, "grad_norm": 0.1325562596321106, "learning_rate": 1.8326883847437723e-05, "loss": 0.008, "step": 108300 }, { "epoch": 3.348272238363108, "grad_norm": 0.05770470201969147, "learning_rate": 1.8326420226247145e-05, "loss": 0.0082, "step": 108330 }, { "epoch": 3.3491994807442667, "grad_norm": 0.11042402684688568, "learning_rate": 1.8325956605056563e-05, "loss": 0.0087, "step": 108360 }, { "epoch": 3.350126723125425, "grad_norm": 0.1217421367764473, "learning_rate": 1.8325492983865984e-05, "loss": 0.0076, "step": 108390 }, { "epoch": 3.3510539655065834, "grad_norm": 0.09767142683267593, "learning_rate": 1.8325029362675402e-05, "loss": 0.0084, "step": 108420 }, { "epoch": 3.351981207887742, "grad_norm": 0.11793980002403259, "learning_rate": 1.8324565741484824e-05, "loss": 0.0084, "step": 108450 }, { "epoch": 3.3529084502689, "grad_norm": 0.15514931082725525, "learning_rate": 1.832410212029425e-05, "loss": 0.0074, "step": 108480 }, { "epoch": 3.3538356926500588, "grad_norm": 0.11244075745344162, "learning_rate": 1.8323638499103667e-05, "loss": 0.0078, "step": 108510 }, { "epoch": 3.354762935031217, "grad_norm": 0.19463859498500824, "learning_rate": 1.832317487791309e-05, "loss": 0.0089, "step": 108540 }, { "epoch": 3.3556901774123755, "grad_norm": 0.09701947867870331, "learning_rate": 1.832271125672251e-05, "loss": 0.0078, "step": 108570 }, { "epoch": 3.356617419793534, "grad_norm": 0.16508749127388, "learning_rate": 1.8322247635531928e-05, "loss": 0.0084, "step": 108600 }, { "epoch": 3.3575446621746923, "grad_norm": 0.1219353973865509, "learning_rate": 1.832178401434135e-05, "loss": 0.0075, "step": 108630 }, { "epoch": 3.358471904555851, "grad_norm": 0.11422885954380035, "learning_rate": 1.832132039315077e-05, "loss": 0.0073, "step": 108660 }, { "epoch": 3.3593991469370095, "grad_norm": 0.10102053731679916, "learning_rate": 1.8320856771960193e-05, "loss": 0.0081, "step": 108690 }, { "epoch": 3.3603263893181676, "grad_norm": 0.1832076609134674, "learning_rate": 1.8320393150769614e-05, "loss": 0.008, "step": 108720 }, { "epoch": 3.3612536316993262, "grad_norm": 0.09938611835241318, "learning_rate": 1.8319929529579032e-05, "loss": 0.0079, "step": 108750 }, { "epoch": 3.362180874080485, "grad_norm": 0.1325431913137436, "learning_rate": 1.8319465908388454e-05, "loss": 0.009, "step": 108780 }, { "epoch": 3.363108116461643, "grad_norm": 0.08958464860916138, "learning_rate": 1.8319002287197875e-05, "loss": 0.0083, "step": 108810 }, { "epoch": 3.3640353588428016, "grad_norm": 0.0935661643743515, "learning_rate": 1.8318538666007297e-05, "loss": 0.008, "step": 108840 }, { "epoch": 3.3649626012239597, "grad_norm": 0.11501895636320114, "learning_rate": 1.8318075044816718e-05, "loss": 0.0084, "step": 108870 }, { "epoch": 3.3658898436051183, "grad_norm": 0.10206559300422668, "learning_rate": 1.831761142362614e-05, "loss": 0.0081, "step": 108900 }, { "epoch": 3.366817085986277, "grad_norm": 0.09624968469142914, "learning_rate": 1.8317147802435558e-05, "loss": 0.0077, "step": 108930 }, { "epoch": 3.367744328367435, "grad_norm": 0.15184107422828674, "learning_rate": 1.831668418124498e-05, "loss": 0.0083, "step": 108960 }, { "epoch": 3.3686715707485937, "grad_norm": 0.10763302445411682, "learning_rate": 1.83162205600544e-05, "loss": 0.0078, "step": 108990 }, { "epoch": 3.3695988131297523, "grad_norm": 0.10045313835144043, "learning_rate": 1.831575693886382e-05, "loss": 0.0079, "step": 109020 }, { "epoch": 3.3705260555109104, "grad_norm": 0.1364290416240692, "learning_rate": 1.831529331767324e-05, "loss": 0.0078, "step": 109050 }, { "epoch": 3.371453297892069, "grad_norm": 0.10211575031280518, "learning_rate": 1.8314829696482662e-05, "loss": 0.0075, "step": 109080 }, { "epoch": 3.3723805402732276, "grad_norm": 0.13629619777202606, "learning_rate": 1.8314366075292083e-05, "loss": 0.0077, "step": 109110 }, { "epoch": 3.373307782654386, "grad_norm": 0.14242103695869446, "learning_rate": 1.8313902454101505e-05, "loss": 0.0078, "step": 109140 }, { "epoch": 3.3742350250355444, "grad_norm": 0.1589682251214981, "learning_rate": 1.8313438832910923e-05, "loss": 0.0079, "step": 109170 }, { "epoch": 3.3751622674167026, "grad_norm": 0.134429469704628, "learning_rate": 1.8312975211720345e-05, "loss": 0.0082, "step": 109200 }, { "epoch": 3.376089509797861, "grad_norm": 0.10803438723087311, "learning_rate": 1.8312511590529766e-05, "loss": 0.0084, "step": 109230 }, { "epoch": 3.3770167521790198, "grad_norm": 0.11982069909572601, "learning_rate": 1.8312047969339188e-05, "loss": 0.0075, "step": 109260 }, { "epoch": 3.377943994560178, "grad_norm": 0.14434470236301422, "learning_rate": 1.831158434814861e-05, "loss": 0.0078, "step": 109290 }, { "epoch": 3.3788712369413365, "grad_norm": 0.12651823461055756, "learning_rate": 1.831112072695803e-05, "loss": 0.0081, "step": 109320 }, { "epoch": 3.3797984793224947, "grad_norm": 0.07898402959108353, "learning_rate": 1.831065710576745e-05, "loss": 0.0083, "step": 109350 }, { "epoch": 3.3807257217036533, "grad_norm": 0.12386193126440048, "learning_rate": 1.831019348457687e-05, "loss": 0.0081, "step": 109380 }, { "epoch": 3.381652964084812, "grad_norm": 0.13946926593780518, "learning_rate": 1.830972986338629e-05, "loss": 0.008, "step": 109410 }, { "epoch": 3.38258020646597, "grad_norm": 0.1952924281358719, "learning_rate": 1.830926624219571e-05, "loss": 0.0081, "step": 109440 }, { "epoch": 3.3835074488471286, "grad_norm": 0.12294316291809082, "learning_rate": 1.830880262100513e-05, "loss": 0.0084, "step": 109470 }, { "epoch": 3.384434691228287, "grad_norm": 0.13371561467647552, "learning_rate": 1.8308338999814553e-05, "loss": 0.008, "step": 109500 }, { "epoch": 3.3853619336094454, "grad_norm": 0.17461082339286804, "learning_rate": 1.8307875378623974e-05, "loss": 0.0078, "step": 109530 }, { "epoch": 3.386289175990604, "grad_norm": 0.10565973818302155, "learning_rate": 1.8307411757433396e-05, "loss": 0.008, "step": 109560 }, { "epoch": 3.3872164183717626, "grad_norm": 0.1331189125776291, "learning_rate": 1.8306948136242814e-05, "loss": 0.0079, "step": 109590 }, { "epoch": 3.3881436607529207, "grad_norm": 0.1288798600435257, "learning_rate": 1.8306484515052235e-05, "loss": 0.0092, "step": 109620 }, { "epoch": 3.3890709031340793, "grad_norm": 0.10887886583805084, "learning_rate": 1.8306020893861657e-05, "loss": 0.0076, "step": 109650 }, { "epoch": 3.3899981455152375, "grad_norm": 0.1173979863524437, "learning_rate": 1.830555727267108e-05, "loss": 0.0082, "step": 109680 }, { "epoch": 3.390925387896396, "grad_norm": 0.11643943935632706, "learning_rate": 1.83050936514805e-05, "loss": 0.0083, "step": 109710 }, { "epoch": 3.3918526302775547, "grad_norm": 0.1715647280216217, "learning_rate": 1.8304630030289918e-05, "loss": 0.0083, "step": 109740 }, { "epoch": 3.392779872658713, "grad_norm": 0.16107410192489624, "learning_rate": 1.830416640909934e-05, "loss": 0.0077, "step": 109770 }, { "epoch": 3.3937071150398714, "grad_norm": 0.08610054850578308, "learning_rate": 1.830370278790876e-05, "loss": 0.0069, "step": 109800 }, { "epoch": 3.39463435742103, "grad_norm": 0.21556946635246277, "learning_rate": 1.830323916671818e-05, "loss": 0.0078, "step": 109830 }, { "epoch": 3.395561599802188, "grad_norm": 0.13167023658752441, "learning_rate": 1.83027755455276e-05, "loss": 0.0085, "step": 109860 }, { "epoch": 3.396488842183347, "grad_norm": 0.21004638075828552, "learning_rate": 1.8302311924337026e-05, "loss": 0.008, "step": 109890 }, { "epoch": 3.3974160845645054, "grad_norm": 0.12430498749017715, "learning_rate": 1.8301848303146444e-05, "loss": 0.008, "step": 109920 }, { "epoch": 3.3983433269456635, "grad_norm": 0.11965309083461761, "learning_rate": 1.8301384681955865e-05, "loss": 0.0082, "step": 109950 }, { "epoch": 3.399270569326822, "grad_norm": 0.10556712001562119, "learning_rate": 1.8300921060765287e-05, "loss": 0.0082, "step": 109980 }, { "epoch": 3.4001978117079803, "grad_norm": 0.12282989919185638, "learning_rate": 1.8300457439574705e-05, "loss": 0.0091, "step": 110010 }, { "epoch": 3.401125054089139, "grad_norm": 0.12872552871704102, "learning_rate": 1.8299993818384126e-05, "loss": 0.0077, "step": 110040 }, { "epoch": 3.4020522964702975, "grad_norm": 0.0965452492237091, "learning_rate": 1.8299530197193548e-05, "loss": 0.0077, "step": 110070 }, { "epoch": 3.4029795388514557, "grad_norm": 0.19155994057655334, "learning_rate": 1.829906657600297e-05, "loss": 0.0077, "step": 110100 }, { "epoch": 3.4039067812326143, "grad_norm": 0.14455546438694, "learning_rate": 1.829860295481239e-05, "loss": 0.0078, "step": 110130 }, { "epoch": 3.4048340236137724, "grad_norm": 0.08366556465625763, "learning_rate": 1.829813933362181e-05, "loss": 0.008, "step": 110160 }, { "epoch": 3.405761265994931, "grad_norm": 0.12854647636413574, "learning_rate": 1.829767571243123e-05, "loss": 0.0091, "step": 110190 }, { "epoch": 3.4066885083760896, "grad_norm": 0.10172423720359802, "learning_rate": 1.8297212091240652e-05, "loss": 0.0074, "step": 110220 }, { "epoch": 3.4076157507572478, "grad_norm": 0.13294345140457153, "learning_rate": 1.8296748470050074e-05, "loss": 0.008, "step": 110250 }, { "epoch": 3.4085429931384064, "grad_norm": 0.10157804936170578, "learning_rate": 1.8296284848859495e-05, "loss": 0.0074, "step": 110280 }, { "epoch": 3.409470235519565, "grad_norm": 0.1233581155538559, "learning_rate": 1.8295821227668917e-05, "loss": 0.0085, "step": 110310 }, { "epoch": 3.410397477900723, "grad_norm": 0.13836944103240967, "learning_rate": 1.8295357606478335e-05, "loss": 0.0081, "step": 110340 }, { "epoch": 3.4113247202818817, "grad_norm": 0.17525318264961243, "learning_rate": 1.8294893985287756e-05, "loss": 0.0079, "step": 110370 }, { "epoch": 3.4122519626630403, "grad_norm": 0.18546810746192932, "learning_rate": 1.8294430364097174e-05, "loss": 0.0086, "step": 110400 }, { "epoch": 3.4131792050441985, "grad_norm": 0.1104738712310791, "learning_rate": 1.8293966742906596e-05, "loss": 0.008, "step": 110430 }, { "epoch": 3.414106447425357, "grad_norm": 0.11795436590909958, "learning_rate": 1.8293503121716017e-05, "loss": 0.0083, "step": 110460 }, { "epoch": 3.4150336898065152, "grad_norm": 0.15530312061309814, "learning_rate": 1.829303950052544e-05, "loss": 0.0085, "step": 110490 }, { "epoch": 3.415960932187674, "grad_norm": 0.14500953257083893, "learning_rate": 1.829257587933486e-05, "loss": 0.0081, "step": 110520 }, { "epoch": 3.4168881745688324, "grad_norm": 0.13009458780288696, "learning_rate": 1.8292112258144282e-05, "loss": 0.0086, "step": 110550 }, { "epoch": 3.4178154169499906, "grad_norm": 0.14581404626369476, "learning_rate": 1.82916486369537e-05, "loss": 0.0073, "step": 110580 }, { "epoch": 3.418742659331149, "grad_norm": 0.13712170720100403, "learning_rate": 1.829118501576312e-05, "loss": 0.0083, "step": 110610 }, { "epoch": 3.419669901712308, "grad_norm": 0.10256226360797882, "learning_rate": 1.8290721394572543e-05, "loss": 0.0077, "step": 110640 }, { "epoch": 3.420597144093466, "grad_norm": 0.1704181730747223, "learning_rate": 1.8290257773381964e-05, "loss": 0.009, "step": 110670 }, { "epoch": 3.4215243864746245, "grad_norm": 0.09149094671010971, "learning_rate": 1.8289794152191386e-05, "loss": 0.0074, "step": 110700 }, { "epoch": 3.422451628855783, "grad_norm": 0.12590110301971436, "learning_rate": 1.8289330531000804e-05, "loss": 0.0072, "step": 110730 }, { "epoch": 3.4233788712369413, "grad_norm": 0.14240290224552155, "learning_rate": 1.8288866909810226e-05, "loss": 0.0077, "step": 110760 }, { "epoch": 3.4243061136181, "grad_norm": 0.10753019154071808, "learning_rate": 1.8288403288619647e-05, "loss": 0.0087, "step": 110790 }, { "epoch": 3.425233355999258, "grad_norm": 0.1491657942533493, "learning_rate": 1.8287939667429065e-05, "loss": 0.008, "step": 110820 }, { "epoch": 3.4261605983804166, "grad_norm": 0.09455230087041855, "learning_rate": 1.8287476046238487e-05, "loss": 0.0079, "step": 110850 }, { "epoch": 3.4270878407615752, "grad_norm": 0.11304278671741486, "learning_rate": 1.8287012425047908e-05, "loss": 0.0078, "step": 110880 }, { "epoch": 3.4280150831427334, "grad_norm": 0.07876136898994446, "learning_rate": 1.828654880385733e-05, "loss": 0.0073, "step": 110910 }, { "epoch": 3.428942325523892, "grad_norm": 0.11411268264055252, "learning_rate": 1.828608518266675e-05, "loss": 0.0085, "step": 110940 }, { "epoch": 3.42986956790505, "grad_norm": 0.1034374013543129, "learning_rate": 1.8285621561476173e-05, "loss": 0.0082, "step": 110970 }, { "epoch": 3.4307968102862088, "grad_norm": 0.10488907992839813, "learning_rate": 1.828515794028559e-05, "loss": 0.0084, "step": 111000 }, { "epoch": 3.4317240526673674, "grad_norm": 0.12228064239025116, "learning_rate": 1.8284694319095012e-05, "loss": 0.0083, "step": 111030 }, { "epoch": 3.4326512950485255, "grad_norm": 0.16529324650764465, "learning_rate": 1.8284230697904434e-05, "loss": 0.0082, "step": 111060 }, { "epoch": 3.433578537429684, "grad_norm": 0.1710965931415558, "learning_rate": 1.8283767076713855e-05, "loss": 0.0076, "step": 111090 }, { "epoch": 3.4345057798108427, "grad_norm": 0.11809089779853821, "learning_rate": 1.8283303455523277e-05, "loss": 0.0072, "step": 111120 }, { "epoch": 3.435433022192001, "grad_norm": 0.11667387932538986, "learning_rate": 1.8282839834332695e-05, "loss": 0.0084, "step": 111150 }, { "epoch": 3.4363602645731595, "grad_norm": 0.180836021900177, "learning_rate": 1.8282376213142117e-05, "loss": 0.0077, "step": 111180 }, { "epoch": 3.437287506954318, "grad_norm": 0.13640031218528748, "learning_rate": 1.8281912591951538e-05, "loss": 0.0084, "step": 111210 }, { "epoch": 3.438214749335476, "grad_norm": 0.1083144024014473, "learning_rate": 1.8281448970760956e-05, "loss": 0.0074, "step": 111240 }, { "epoch": 3.439141991716635, "grad_norm": 0.150522843003273, "learning_rate": 1.8280985349570378e-05, "loss": 0.0081, "step": 111270 }, { "epoch": 3.440069234097793, "grad_norm": 0.13978488743305206, "learning_rate": 1.8280521728379803e-05, "loss": 0.0089, "step": 111300 }, { "epoch": 3.4409964764789516, "grad_norm": 0.11682113260030746, "learning_rate": 1.828005810718922e-05, "loss": 0.0082, "step": 111330 }, { "epoch": 3.44192371886011, "grad_norm": 0.1245095357298851, "learning_rate": 1.8279594485998642e-05, "loss": 0.0077, "step": 111360 }, { "epoch": 3.4428509612412683, "grad_norm": 0.1329326629638672, "learning_rate": 1.8279130864808064e-05, "loss": 0.0083, "step": 111390 }, { "epoch": 3.443778203622427, "grad_norm": 0.12364096939563751, "learning_rate": 1.8278667243617482e-05, "loss": 0.0081, "step": 111420 }, { "epoch": 3.4447054460035855, "grad_norm": 0.1403578668832779, "learning_rate": 1.8278203622426903e-05, "loss": 0.0081, "step": 111450 }, { "epoch": 3.4456326883847437, "grad_norm": 0.1650015264749527, "learning_rate": 1.8277740001236325e-05, "loss": 0.0081, "step": 111480 }, { "epoch": 3.4465599307659023, "grad_norm": 0.10548876971006393, "learning_rate": 1.8277276380045746e-05, "loss": 0.0073, "step": 111510 }, { "epoch": 3.447487173147061, "grad_norm": 0.11068691313266754, "learning_rate": 1.8276812758855168e-05, "loss": 0.0083, "step": 111540 }, { "epoch": 3.448414415528219, "grad_norm": 0.10304587334394455, "learning_rate": 1.8276349137664586e-05, "loss": 0.0076, "step": 111570 }, { "epoch": 3.4493416579093776, "grad_norm": 0.11254573613405228, "learning_rate": 1.8275885516474007e-05, "loss": 0.0077, "step": 111600 }, { "epoch": 3.450268900290536, "grad_norm": 0.12884262204170227, "learning_rate": 1.827542189528343e-05, "loss": 0.0085, "step": 111630 }, { "epoch": 3.4511961426716944, "grad_norm": 0.12768985331058502, "learning_rate": 1.827495827409285e-05, "loss": 0.0084, "step": 111660 }, { "epoch": 3.452123385052853, "grad_norm": 0.16277442872524261, "learning_rate": 1.8274494652902272e-05, "loss": 0.0088, "step": 111690 }, { "epoch": 3.453050627434011, "grad_norm": 0.16791783273220062, "learning_rate": 1.8274031031711693e-05, "loss": 0.0079, "step": 111720 }, { "epoch": 3.4539778698151697, "grad_norm": 0.19456088542938232, "learning_rate": 1.827356741052111e-05, "loss": 0.0085, "step": 111750 }, { "epoch": 3.454905112196328, "grad_norm": 0.10675299167633057, "learning_rate": 1.8273103789330533e-05, "loss": 0.0075, "step": 111780 }, { "epoch": 3.4558323545774865, "grad_norm": 0.13475903868675232, "learning_rate": 1.827264016813995e-05, "loss": 0.0078, "step": 111810 }, { "epoch": 3.456759596958645, "grad_norm": 0.254241943359375, "learning_rate": 1.8272176546949373e-05, "loss": 0.0077, "step": 111840 }, { "epoch": 3.4576868393398033, "grad_norm": 0.17448382079601288, "learning_rate": 1.8271712925758794e-05, "loss": 0.008, "step": 111870 }, { "epoch": 3.458614081720962, "grad_norm": 0.13330848515033722, "learning_rate": 1.8271249304568216e-05, "loss": 0.008, "step": 111900 }, { "epoch": 3.4595413241021205, "grad_norm": 0.12932325899600983, "learning_rate": 1.8270785683377637e-05, "loss": 0.0078, "step": 111930 }, { "epoch": 3.4604685664832786, "grad_norm": 0.09985451400279999, "learning_rate": 1.827032206218706e-05, "loss": 0.0082, "step": 111960 }, { "epoch": 3.461395808864437, "grad_norm": 0.10967565327882767, "learning_rate": 1.8269858440996477e-05, "loss": 0.008, "step": 111990 }, { "epoch": 3.462323051245596, "grad_norm": 0.09841161221265793, "learning_rate": 1.82693948198059e-05, "loss": 0.0081, "step": 112020 }, { "epoch": 3.463250293626754, "grad_norm": 0.11684148758649826, "learning_rate": 1.826893119861532e-05, "loss": 0.0079, "step": 112050 }, { "epoch": 3.4641775360079126, "grad_norm": 0.10635727643966675, "learning_rate": 1.826846757742474e-05, "loss": 0.0082, "step": 112080 }, { "epoch": 3.4651047783890707, "grad_norm": 0.16047123074531555, "learning_rate": 1.8268003956234163e-05, "loss": 0.0083, "step": 112110 }, { "epoch": 3.4660320207702293, "grad_norm": 0.09859541058540344, "learning_rate": 1.826754033504358e-05, "loss": 0.0083, "step": 112140 }, { "epoch": 3.466959263151388, "grad_norm": 0.12522125244140625, "learning_rate": 1.8267076713853003e-05, "loss": 0.0075, "step": 112170 }, { "epoch": 3.467886505532546, "grad_norm": 0.14147734642028809, "learning_rate": 1.8266613092662424e-05, "loss": 0.0088, "step": 112200 }, { "epoch": 3.4688137479137047, "grad_norm": 0.10759259760379791, "learning_rate": 1.8266149471471842e-05, "loss": 0.0089, "step": 112230 }, { "epoch": 3.4697409902948633, "grad_norm": 0.13930906355381012, "learning_rate": 1.8265685850281264e-05, "loss": 0.008, "step": 112260 }, { "epoch": 3.4706682326760214, "grad_norm": 0.14537912607192993, "learning_rate": 1.8265222229090685e-05, "loss": 0.0083, "step": 112290 }, { "epoch": 3.47159547505718, "grad_norm": 0.11640504002571106, "learning_rate": 1.8264758607900107e-05, "loss": 0.0076, "step": 112320 }, { "epoch": 3.4725227174383386, "grad_norm": 0.12353353947401047, "learning_rate": 1.8264294986709528e-05, "loss": 0.0084, "step": 112350 }, { "epoch": 3.473449959819497, "grad_norm": 0.11591926217079163, "learning_rate": 1.826383136551895e-05, "loss": 0.0081, "step": 112380 }, { "epoch": 3.4743772022006554, "grad_norm": 0.18198394775390625, "learning_rate": 1.8263367744328368e-05, "loss": 0.0082, "step": 112410 }, { "epoch": 3.4753044445818135, "grad_norm": 0.10663142055273056, "learning_rate": 1.826290412313779e-05, "loss": 0.008, "step": 112440 }, { "epoch": 3.476231686962972, "grad_norm": 0.164102703332901, "learning_rate": 1.826244050194721e-05, "loss": 0.0079, "step": 112470 }, { "epoch": 3.4771589293441307, "grad_norm": 0.10792583972215652, "learning_rate": 1.8261976880756632e-05, "loss": 0.0075, "step": 112500 }, { "epoch": 3.478086171725289, "grad_norm": 0.10117162019014359, "learning_rate": 1.8261513259566054e-05, "loss": 0.0081, "step": 112530 }, { "epoch": 3.4790134141064475, "grad_norm": 0.10819587111473083, "learning_rate": 1.8261049638375472e-05, "loss": 0.0083, "step": 112560 }, { "epoch": 3.4799406564876056, "grad_norm": 0.13231904804706573, "learning_rate": 1.8260586017184893e-05, "loss": 0.0077, "step": 112590 }, { "epoch": 3.4808678988687642, "grad_norm": 0.12159226089715958, "learning_rate": 1.8260122395994315e-05, "loss": 0.008, "step": 112620 }, { "epoch": 3.481795141249923, "grad_norm": 0.12198486179113388, "learning_rate": 1.8259658774803733e-05, "loss": 0.0084, "step": 112650 }, { "epoch": 3.482722383631081, "grad_norm": 0.16055789589881897, "learning_rate": 1.8259195153613155e-05, "loss": 0.0086, "step": 112680 }, { "epoch": 3.4836496260122396, "grad_norm": 0.10826107859611511, "learning_rate": 1.825873153242258e-05, "loss": 0.008, "step": 112710 }, { "epoch": 3.484576868393398, "grad_norm": 0.08271689713001251, "learning_rate": 1.8258267911231998e-05, "loss": 0.008, "step": 112740 }, { "epoch": 3.4855041107745564, "grad_norm": 0.17465150356292725, "learning_rate": 1.825780429004142e-05, "loss": 0.0081, "step": 112770 }, { "epoch": 3.486431353155715, "grad_norm": 0.11844614893198013, "learning_rate": 1.8257340668850837e-05, "loss": 0.0084, "step": 112800 }, { "epoch": 3.4873585955368736, "grad_norm": 0.1365048885345459, "learning_rate": 1.825687704766026e-05, "loss": 0.0083, "step": 112830 }, { "epoch": 3.4882858379180317, "grad_norm": 0.08380492031574249, "learning_rate": 1.825641342646968e-05, "loss": 0.008, "step": 112860 }, { "epoch": 3.4892130802991903, "grad_norm": 0.16429179906845093, "learning_rate": 1.8255949805279102e-05, "loss": 0.0075, "step": 112890 }, { "epoch": 3.4901403226803485, "grad_norm": 0.18949481844902039, "learning_rate": 1.8255486184088523e-05, "loss": 0.0088, "step": 112920 }, { "epoch": 3.491067565061507, "grad_norm": 0.09504175186157227, "learning_rate": 1.8255022562897945e-05, "loss": 0.0072, "step": 112950 }, { "epoch": 3.4919948074426657, "grad_norm": 0.16583991050720215, "learning_rate": 1.8254558941707363e-05, "loss": 0.0085, "step": 112980 }, { "epoch": 3.492922049823824, "grad_norm": 0.15161849558353424, "learning_rate": 1.8254095320516784e-05, "loss": 0.0078, "step": 113010 }, { "epoch": 3.4938492922049824, "grad_norm": 0.07592923194169998, "learning_rate": 1.8253631699326206e-05, "loss": 0.008, "step": 113040 }, { "epoch": 3.4947765345861406, "grad_norm": 0.13251550495624542, "learning_rate": 1.8253168078135627e-05, "loss": 0.0079, "step": 113070 }, { "epoch": 3.495703776967299, "grad_norm": 0.1508098840713501, "learning_rate": 1.825270445694505e-05, "loss": 0.0074, "step": 113100 }, { "epoch": 3.4966310193484578, "grad_norm": 0.13992056250572205, "learning_rate": 1.8252240835754467e-05, "loss": 0.0084, "step": 113130 }, { "epoch": 3.4975582617296164, "grad_norm": 0.07681751996278763, "learning_rate": 1.825177721456389e-05, "loss": 0.0079, "step": 113160 }, { "epoch": 3.4984855041107745, "grad_norm": 0.14129143953323364, "learning_rate": 1.8251329047412996e-05, "loss": 0.0085, "step": 113190 }, { "epoch": 3.499412746491933, "grad_norm": 0.10117676854133606, "learning_rate": 1.8250865426222415e-05, "loss": 0.0084, "step": 113220 }, { "epoch": 3.5003399888730913, "grad_norm": 0.12388333678245544, "learning_rate": 1.8250401805031836e-05, "loss": 0.0075, "step": 113250 }, { "epoch": 3.50126723125425, "grad_norm": 0.14431364834308624, "learning_rate": 1.8249938183841258e-05, "loss": 0.008, "step": 113280 }, { "epoch": 3.5021944736354085, "grad_norm": 0.12566278874874115, "learning_rate": 1.824947456265068e-05, "loss": 0.0076, "step": 113310 }, { "epoch": 3.5031217160165666, "grad_norm": 0.10024154931306839, "learning_rate": 1.82490109414601e-05, "loss": 0.0078, "step": 113340 }, { "epoch": 3.5040489583977252, "grad_norm": 0.16020235419273376, "learning_rate": 1.8248547320269522e-05, "loss": 0.0084, "step": 113370 }, { "epoch": 3.5049762007788834, "grad_norm": 0.10360162705183029, "learning_rate": 1.824808369907894e-05, "loss": 0.0079, "step": 113400 }, { "epoch": 3.505903443160042, "grad_norm": 0.11487597227096558, "learning_rate": 1.824762007788836e-05, "loss": 0.0079, "step": 113430 }, { "epoch": 3.5068306855412006, "grad_norm": 0.15095315873622894, "learning_rate": 1.8247156456697783e-05, "loss": 0.0083, "step": 113460 }, { "epoch": 3.5077579279223587, "grad_norm": 0.15357448160648346, "learning_rate": 1.82466928355072e-05, "loss": 0.008, "step": 113490 }, { "epoch": 3.5086851703035173, "grad_norm": 0.1539233922958374, "learning_rate": 1.8246229214316623e-05, "loss": 0.0074, "step": 113520 }, { "epoch": 3.5096124126846755, "grad_norm": 0.166009321808815, "learning_rate": 1.8245765593126044e-05, "loss": 0.0084, "step": 113550 }, { "epoch": 3.510539655065834, "grad_norm": 0.13082562386989594, "learning_rate": 1.8245301971935466e-05, "loss": 0.0078, "step": 113580 }, { "epoch": 3.5114668974469927, "grad_norm": 0.1447136253118515, "learning_rate": 1.8244838350744887e-05, "loss": 0.0083, "step": 113610 }, { "epoch": 3.5123941398281513, "grad_norm": 0.102235347032547, "learning_rate": 1.8244374729554305e-05, "loss": 0.0081, "step": 113640 }, { "epoch": 3.5133213822093095, "grad_norm": 0.10055695474147797, "learning_rate": 1.8243911108363727e-05, "loss": 0.0081, "step": 113670 }, { "epoch": 3.514248624590468, "grad_norm": 0.09434635192155838, "learning_rate": 1.824344748717315e-05, "loss": 0.0079, "step": 113700 }, { "epoch": 3.515175866971626, "grad_norm": 0.08356805890798569, "learning_rate": 1.824298386598257e-05, "loss": 0.0086, "step": 113730 }, { "epoch": 3.516103109352785, "grad_norm": 0.133201465010643, "learning_rate": 1.824252024479199e-05, "loss": 0.0083, "step": 113760 }, { "epoch": 3.5170303517339434, "grad_norm": 0.13155783712863922, "learning_rate": 1.8242056623601413e-05, "loss": 0.0081, "step": 113790 }, { "epoch": 3.5179575941151016, "grad_norm": 0.1311688870191574, "learning_rate": 1.824159300241083e-05, "loss": 0.008, "step": 113820 }, { "epoch": 3.51888483649626, "grad_norm": 0.13369256258010864, "learning_rate": 1.8241129381220253e-05, "loss": 0.0087, "step": 113850 }, { "epoch": 3.5198120788774183, "grad_norm": 0.12048374861478806, "learning_rate": 1.824066576002967e-05, "loss": 0.0086, "step": 113880 }, { "epoch": 3.520739321258577, "grad_norm": 0.1320374608039856, "learning_rate": 1.8240202138839092e-05, "loss": 0.0079, "step": 113910 }, { "epoch": 3.5216665636397355, "grad_norm": 0.1242765560746193, "learning_rate": 1.8239738517648514e-05, "loss": 0.0086, "step": 113940 }, { "epoch": 3.522593806020894, "grad_norm": 0.12726062536239624, "learning_rate": 1.8239274896457935e-05, "loss": 0.0084, "step": 113970 }, { "epoch": 3.5235210484020523, "grad_norm": 0.17018087208271027, "learning_rate": 1.8238811275267357e-05, "loss": 0.0083, "step": 114000 }, { "epoch": 3.524448290783211, "grad_norm": 0.11612843722105026, "learning_rate": 1.8238347654076778e-05, "loss": 0.0076, "step": 114030 }, { "epoch": 3.525375533164369, "grad_norm": 0.14703316986560822, "learning_rate": 1.8237884032886196e-05, "loss": 0.0075, "step": 114060 }, { "epoch": 3.5263027755455276, "grad_norm": 0.17400933802127838, "learning_rate": 1.8237420411695618e-05, "loss": 0.008, "step": 114090 }, { "epoch": 3.5272300179266862, "grad_norm": 0.05523867532610893, "learning_rate": 1.823695679050504e-05, "loss": 0.0081, "step": 114120 }, { "epoch": 3.5281572603078444, "grad_norm": 0.11787325888872147, "learning_rate": 1.823649316931446e-05, "loss": 0.0084, "step": 114150 }, { "epoch": 3.529084502689003, "grad_norm": 0.13153858482837677, "learning_rate": 1.8236029548123882e-05, "loss": 0.0083, "step": 114180 }, { "epoch": 3.530011745070161, "grad_norm": 0.09089378267526627, "learning_rate": 1.8235565926933304e-05, "loss": 0.0082, "step": 114210 }, { "epoch": 3.5309389874513197, "grad_norm": 0.12849421799182892, "learning_rate": 1.8235102305742722e-05, "loss": 0.0085, "step": 114240 }, { "epoch": 3.5318662298324783, "grad_norm": 0.11912986636161804, "learning_rate": 1.8234638684552144e-05, "loss": 0.0077, "step": 114270 }, { "epoch": 3.5327934722136365, "grad_norm": 0.12810738384723663, "learning_rate": 1.823417506336156e-05, "loss": 0.0078, "step": 114300 }, { "epoch": 3.533720714594795, "grad_norm": 0.12671393156051636, "learning_rate": 1.8233711442170987e-05, "loss": 0.008, "step": 114330 }, { "epoch": 3.5346479569759532, "grad_norm": 0.08861023932695389, "learning_rate": 1.8233247820980408e-05, "loss": 0.0082, "step": 114360 }, { "epoch": 3.535575199357112, "grad_norm": 0.13489368557929993, "learning_rate": 1.8232784199789826e-05, "loss": 0.0087, "step": 114390 }, { "epoch": 3.5365024417382704, "grad_norm": 0.1314043551683426, "learning_rate": 1.8232320578599248e-05, "loss": 0.0075, "step": 114420 }, { "epoch": 3.537429684119429, "grad_norm": 0.1756552755832672, "learning_rate": 1.823185695740867e-05, "loss": 0.008, "step": 114450 }, { "epoch": 3.538356926500587, "grad_norm": 0.11926868557929993, "learning_rate": 1.8231393336218087e-05, "loss": 0.0077, "step": 114480 }, { "epoch": 3.539284168881746, "grad_norm": 0.14967912435531616, "learning_rate": 1.823092971502751e-05, "loss": 0.0078, "step": 114510 }, { "epoch": 3.540211411262904, "grad_norm": 0.12298878282308578, "learning_rate": 1.823046609383693e-05, "loss": 0.0078, "step": 114540 }, { "epoch": 3.5411386536440626, "grad_norm": 0.1520959585905075, "learning_rate": 1.8230002472646352e-05, "loss": 0.0082, "step": 114570 }, { "epoch": 3.542065896025221, "grad_norm": 0.07237017899751663, "learning_rate": 1.8229538851455773e-05, "loss": 0.008, "step": 114600 }, { "epoch": 3.5429931384063793, "grad_norm": 0.19247126579284668, "learning_rate": 1.822907523026519e-05, "loss": 0.0077, "step": 114630 }, { "epoch": 3.543920380787538, "grad_norm": 0.1513088196516037, "learning_rate": 1.8228611609074613e-05, "loss": 0.0081, "step": 114660 }, { "epoch": 3.544847623168696, "grad_norm": 0.08677954226732254, "learning_rate": 1.8228147987884034e-05, "loss": 0.0071, "step": 114690 }, { "epoch": 3.5457748655498547, "grad_norm": 0.11467532068490982, "learning_rate": 1.8227684366693456e-05, "loss": 0.0078, "step": 114720 }, { "epoch": 3.5467021079310133, "grad_norm": 0.10629726946353912, "learning_rate": 1.8227220745502877e-05, "loss": 0.0083, "step": 114750 }, { "epoch": 3.547629350312172, "grad_norm": 0.12049513310194016, "learning_rate": 1.82267571243123e-05, "loss": 0.0091, "step": 114780 }, { "epoch": 3.54855659269333, "grad_norm": 0.0762409195303917, "learning_rate": 1.8226293503121717e-05, "loss": 0.008, "step": 114810 }, { "epoch": 3.5494838350744886, "grad_norm": 0.13170690834522247, "learning_rate": 1.822582988193114e-05, "loss": 0.0077, "step": 114840 }, { "epoch": 3.5504110774556468, "grad_norm": 0.12247452884912491, "learning_rate": 1.822536626074056e-05, "loss": 0.0092, "step": 114870 }, { "epoch": 3.5513383198368054, "grad_norm": 0.12234190106391907, "learning_rate": 1.8224902639549978e-05, "loss": 0.0081, "step": 114900 }, { "epoch": 3.552265562217964, "grad_norm": 0.12365664541721344, "learning_rate": 1.82244390183594e-05, "loss": 0.0078, "step": 114930 }, { "epoch": 3.553192804599122, "grad_norm": 0.1103430837392807, "learning_rate": 1.822397539716882e-05, "loss": 0.0089, "step": 114960 }, { "epoch": 3.5541200469802807, "grad_norm": 0.1422974318265915, "learning_rate": 1.8223511775978243e-05, "loss": 0.0083, "step": 114990 }, { "epoch": 3.555047289361439, "grad_norm": 0.11694323271512985, "learning_rate": 1.8223048154787664e-05, "loss": 0.0076, "step": 115020 }, { "epoch": 3.5559745317425975, "grad_norm": 0.14883510768413544, "learning_rate": 1.8222584533597082e-05, "loss": 0.0083, "step": 115050 }, { "epoch": 3.556901774123756, "grad_norm": 0.13799665868282318, "learning_rate": 1.8222120912406504e-05, "loss": 0.0083, "step": 115080 }, { "epoch": 3.5578290165049142, "grad_norm": 0.13397713005542755, "learning_rate": 1.8221657291215925e-05, "loss": 0.0085, "step": 115110 }, { "epoch": 3.558756258886073, "grad_norm": 0.13372541964054108, "learning_rate": 1.8221193670025347e-05, "loss": 0.0088, "step": 115140 }, { "epoch": 3.559683501267231, "grad_norm": 0.14905565977096558, "learning_rate": 1.822073004883477e-05, "loss": 0.008, "step": 115170 }, { "epoch": 3.5606107436483896, "grad_norm": 0.1981203556060791, "learning_rate": 1.822026642764419e-05, "loss": 0.0081, "step": 115200 }, { "epoch": 3.561537986029548, "grad_norm": 0.18683522939682007, "learning_rate": 1.8219802806453608e-05, "loss": 0.0081, "step": 115230 }, { "epoch": 3.562465228410707, "grad_norm": 0.1591271460056305, "learning_rate": 1.821933918526303e-05, "loss": 0.0073, "step": 115260 }, { "epoch": 3.563392470791865, "grad_norm": 0.19810152053833008, "learning_rate": 1.8218875564072448e-05, "loss": 0.0088, "step": 115290 }, { "epoch": 3.5643197131730235, "grad_norm": 0.24428075551986694, "learning_rate": 1.821841194288187e-05, "loss": 0.0087, "step": 115320 }, { "epoch": 3.5652469555541817, "grad_norm": 0.10179830342531204, "learning_rate": 1.821794832169129e-05, "loss": 0.008, "step": 115350 }, { "epoch": 3.5661741979353403, "grad_norm": 0.15605612099170685, "learning_rate": 1.8217484700500712e-05, "loss": 0.0083, "step": 115380 }, { "epoch": 3.567101440316499, "grad_norm": 0.1107044592499733, "learning_rate": 1.8217021079310134e-05, "loss": 0.0085, "step": 115410 }, { "epoch": 3.568028682697657, "grad_norm": 0.12357164919376373, "learning_rate": 1.8216557458119555e-05, "loss": 0.0077, "step": 115440 }, { "epoch": 3.5689559250788156, "grad_norm": 0.0877528265118599, "learning_rate": 1.8216093836928973e-05, "loss": 0.0081, "step": 115470 }, { "epoch": 3.569883167459974, "grad_norm": 0.11744006723165512, "learning_rate": 1.8215630215738395e-05, "loss": 0.0078, "step": 115500 }, { "epoch": 3.5708104098411324, "grad_norm": 0.12081371247768402, "learning_rate": 1.8215166594547816e-05, "loss": 0.0075, "step": 115530 }, { "epoch": 3.571737652222291, "grad_norm": 0.1460162103176117, "learning_rate": 1.8214702973357238e-05, "loss": 0.0079, "step": 115560 }, { "epoch": 3.5726648946034496, "grad_norm": 0.15214666724205017, "learning_rate": 1.821423935216666e-05, "loss": 0.0079, "step": 115590 }, { "epoch": 3.5735921369846078, "grad_norm": 0.10062669962644577, "learning_rate": 1.8213775730976077e-05, "loss": 0.0078, "step": 115620 }, { "epoch": 3.5745193793657664, "grad_norm": 0.16469129920005798, "learning_rate": 1.82133121097855e-05, "loss": 0.0076, "step": 115650 }, { "epoch": 3.5754466217469245, "grad_norm": 0.14586405456066132, "learning_rate": 1.821284848859492e-05, "loss": 0.0087, "step": 115680 }, { "epoch": 3.576373864128083, "grad_norm": 0.13878396153450012, "learning_rate": 1.821238486740434e-05, "loss": 0.0079, "step": 115710 }, { "epoch": 3.5773011065092417, "grad_norm": 0.16074097156524658, "learning_rate": 1.8211921246213763e-05, "loss": 0.0075, "step": 115740 }, { "epoch": 3.5782283488904, "grad_norm": 0.114118792116642, "learning_rate": 1.8211457625023185e-05, "loss": 0.0082, "step": 115770 }, { "epoch": 3.5791555912715585, "grad_norm": 0.10240211337804794, "learning_rate": 1.8210994003832603e-05, "loss": 0.0083, "step": 115800 }, { "epoch": 3.5800828336527166, "grad_norm": 0.14468497037887573, "learning_rate": 1.8210530382642025e-05, "loss": 0.0081, "step": 115830 }, { "epoch": 3.5810100760338752, "grad_norm": 0.1690642088651657, "learning_rate": 1.8210066761451446e-05, "loss": 0.0079, "step": 115860 }, { "epoch": 3.581937318415034, "grad_norm": 0.07668804377317429, "learning_rate": 1.8209603140260864e-05, "loss": 0.0083, "step": 115890 }, { "epoch": 3.582864560796192, "grad_norm": 0.13831457495689392, "learning_rate": 1.8209139519070286e-05, "loss": 0.0091, "step": 115920 }, { "epoch": 3.5837918031773506, "grad_norm": 0.1221984326839447, "learning_rate": 1.8208675897879707e-05, "loss": 0.0075, "step": 115950 }, { "epoch": 3.5847190455585087, "grad_norm": 0.13043753802776337, "learning_rate": 1.820821227668913e-05, "loss": 0.0087, "step": 115980 }, { "epoch": 3.5856462879396673, "grad_norm": 0.13155679404735565, "learning_rate": 1.820774865549855e-05, "loss": 0.0076, "step": 116010 }, { "epoch": 3.586573530320826, "grad_norm": 0.14735837280750275, "learning_rate": 1.820728503430797e-05, "loss": 0.0083, "step": 116040 }, { "epoch": 3.5875007727019845, "grad_norm": 0.1272050142288208, "learning_rate": 1.820682141311739e-05, "loss": 0.0076, "step": 116070 }, { "epoch": 3.5884280150831427, "grad_norm": 0.11904411017894745, "learning_rate": 1.820635779192681e-05, "loss": 0.0079, "step": 116100 }, { "epoch": 3.5893552574643013, "grad_norm": 0.10984945297241211, "learning_rate": 1.8205894170736233e-05, "loss": 0.0081, "step": 116130 }, { "epoch": 3.5902824998454594, "grad_norm": 0.14105834066867828, "learning_rate": 1.8205430549545654e-05, "loss": 0.0081, "step": 116160 }, { "epoch": 3.591209742226618, "grad_norm": 0.10170961916446686, "learning_rate": 1.8204966928355076e-05, "loss": 0.0074, "step": 116190 }, { "epoch": 3.5921369846077766, "grad_norm": 0.09563864022493362, "learning_rate": 1.8204503307164494e-05, "loss": 0.008, "step": 116220 }, { "epoch": 3.593064226988935, "grad_norm": 0.13875795900821686, "learning_rate": 1.8204039685973915e-05, "loss": 0.0075, "step": 116250 }, { "epoch": 3.5939914693700934, "grad_norm": 0.09532289206981659, "learning_rate": 1.8203576064783334e-05, "loss": 0.0082, "step": 116280 }, { "epoch": 3.5949187117512515, "grad_norm": 0.13236001133918762, "learning_rate": 1.8203112443592755e-05, "loss": 0.0081, "step": 116310 }, { "epoch": 3.59584595413241, "grad_norm": 0.10277732461690903, "learning_rate": 1.8202648822402177e-05, "loss": 0.0083, "step": 116340 }, { "epoch": 3.5967731965135687, "grad_norm": 0.0845486968755722, "learning_rate": 1.8202185201211598e-05, "loss": 0.0073, "step": 116370 }, { "epoch": 3.5977004388947273, "grad_norm": 0.11059334874153137, "learning_rate": 1.820172158002102e-05, "loss": 0.0081, "step": 116400 }, { "epoch": 3.5986276812758855, "grad_norm": 0.14223797619342804, "learning_rate": 1.820125795883044e-05, "loss": 0.0087, "step": 116430 }, { "epoch": 3.599554923657044, "grad_norm": 0.13428670167922974, "learning_rate": 1.820079433763986e-05, "loss": 0.0073, "step": 116460 }, { "epoch": 3.6004821660382023, "grad_norm": 0.12003948539495468, "learning_rate": 1.820033071644928e-05, "loss": 0.0075, "step": 116490 }, { "epoch": 3.601409408419361, "grad_norm": 0.11862187832593918, "learning_rate": 1.8199867095258702e-05, "loss": 0.0078, "step": 116520 }, { "epoch": 3.6023366508005195, "grad_norm": 0.174494206905365, "learning_rate": 1.8199403474068124e-05, "loss": 0.0084, "step": 116550 }, { "epoch": 3.6032638931816776, "grad_norm": 0.1134699285030365, "learning_rate": 1.8198939852877545e-05, "loss": 0.0078, "step": 116580 }, { "epoch": 3.604191135562836, "grad_norm": 0.11208406090736389, "learning_rate": 1.8198476231686963e-05, "loss": 0.0082, "step": 116610 }, { "epoch": 3.6051183779439944, "grad_norm": 0.13211792707443237, "learning_rate": 1.8198012610496385e-05, "loss": 0.0083, "step": 116640 }, { "epoch": 3.606045620325153, "grad_norm": 0.1611974984407425, "learning_rate": 1.8197548989305806e-05, "loss": 0.0083, "step": 116670 }, { "epoch": 3.6069728627063116, "grad_norm": 0.16840019822120667, "learning_rate": 1.8197085368115225e-05, "loss": 0.0075, "step": 116700 }, { "epoch": 3.6079001050874697, "grad_norm": 0.11946713924407959, "learning_rate": 1.8196621746924646e-05, "loss": 0.0092, "step": 116730 }, { "epoch": 3.6088273474686283, "grad_norm": 0.13938041031360626, "learning_rate": 1.8196158125734068e-05, "loss": 0.008, "step": 116760 }, { "epoch": 3.6097545898497865, "grad_norm": 0.12545520067214966, "learning_rate": 1.819569450454349e-05, "loss": 0.0081, "step": 116790 }, { "epoch": 3.610681832230945, "grad_norm": 0.09578878432512283, "learning_rate": 1.819523088335291e-05, "loss": 0.008, "step": 116820 }, { "epoch": 3.6116090746121037, "grad_norm": 0.13541381061077118, "learning_rate": 1.8194767262162332e-05, "loss": 0.0073, "step": 116850 }, { "epoch": 3.6125363169932623, "grad_norm": 0.13833795487880707, "learning_rate": 1.819430364097175e-05, "loss": 0.0086, "step": 116880 }, { "epoch": 3.6134635593744204, "grad_norm": 0.12542611360549927, "learning_rate": 1.819384001978117e-05, "loss": 0.0086, "step": 116910 }, { "epoch": 3.614390801755579, "grad_norm": 0.16586093604564667, "learning_rate": 1.8193376398590593e-05, "loss": 0.0085, "step": 116940 }, { "epoch": 3.615318044136737, "grad_norm": 0.11709494888782501, "learning_rate": 1.8192912777400015e-05, "loss": 0.0081, "step": 116970 }, { "epoch": 3.616245286517896, "grad_norm": 0.19686758518218994, "learning_rate": 1.8192449156209436e-05, "loss": 0.0075, "step": 117000 }, { "epoch": 3.6171725288990544, "grad_norm": 0.08521337807178497, "learning_rate": 1.8191985535018854e-05, "loss": 0.0081, "step": 117030 }, { "epoch": 3.6180997712802125, "grad_norm": 0.07973572611808777, "learning_rate": 1.8191521913828276e-05, "loss": 0.0086, "step": 117060 }, { "epoch": 3.619027013661371, "grad_norm": 0.14019301533699036, "learning_rate": 1.8191058292637697e-05, "loss": 0.0081, "step": 117090 }, { "epoch": 3.6199542560425293, "grad_norm": 0.1624777466058731, "learning_rate": 1.8190594671447115e-05, "loss": 0.0081, "step": 117120 }, { "epoch": 3.620881498423688, "grad_norm": 0.11743370443582535, "learning_rate": 1.819013105025654e-05, "loss": 0.0081, "step": 117150 }, { "epoch": 3.6218087408048465, "grad_norm": 0.12443020939826965, "learning_rate": 1.8189667429065962e-05, "loss": 0.0081, "step": 117180 }, { "epoch": 3.622735983186005, "grad_norm": 0.10224783420562744, "learning_rate": 1.818920380787538e-05, "loss": 0.0079, "step": 117210 }, { "epoch": 3.6236632255671632, "grad_norm": 0.10292903333902359, "learning_rate": 1.81887401866848e-05, "loss": 0.0078, "step": 117240 }, { "epoch": 3.624590467948322, "grad_norm": 0.11726408451795578, "learning_rate": 1.818827656549422e-05, "loss": 0.0079, "step": 117270 }, { "epoch": 3.62551771032948, "grad_norm": 0.12687227129936218, "learning_rate": 1.8187828398343327e-05, "loss": 0.0082, "step": 117300 }, { "epoch": 3.6264449527106386, "grad_norm": 0.11573716998100281, "learning_rate": 1.818736477715275e-05, "loss": 0.0087, "step": 117330 }, { "epoch": 3.627372195091797, "grad_norm": 0.08935608714818954, "learning_rate": 1.818690115596217e-05, "loss": 0.0078, "step": 117360 }, { "epoch": 3.6282994374729554, "grad_norm": 0.15892130136489868, "learning_rate": 1.8186437534771592e-05, "loss": 0.0083, "step": 117390 }, { "epoch": 3.629226679854114, "grad_norm": 0.09334686398506165, "learning_rate": 1.8185973913581014e-05, "loss": 0.0084, "step": 117420 }, { "epoch": 3.630153922235272, "grad_norm": 0.09567971527576447, "learning_rate": 1.818551029239043e-05, "loss": 0.008, "step": 117450 }, { "epoch": 3.6310811646164307, "grad_norm": 0.15643027424812317, "learning_rate": 1.8185046671199853e-05, "loss": 0.0085, "step": 117480 }, { "epoch": 3.6320084069975893, "grad_norm": 0.10664965957403183, "learning_rate": 1.8184583050009275e-05, "loss": 0.0087, "step": 117510 }, { "epoch": 3.6329356493787475, "grad_norm": 0.15537457168102264, "learning_rate": 1.8184119428818693e-05, "loss": 0.0079, "step": 117540 }, { "epoch": 3.633862891759906, "grad_norm": 0.0884210392832756, "learning_rate": 1.8183655807628114e-05, "loss": 0.0082, "step": 117570 }, { "epoch": 3.634790134141064, "grad_norm": 0.14505259692668915, "learning_rate": 1.8183192186437536e-05, "loss": 0.008, "step": 117600 }, { "epoch": 3.635717376522223, "grad_norm": 0.12793125212192535, "learning_rate": 1.8182728565246957e-05, "loss": 0.008, "step": 117630 }, { "epoch": 3.6366446189033814, "grad_norm": 0.1381228268146515, "learning_rate": 1.818226494405638e-05, "loss": 0.0077, "step": 117660 }, { "epoch": 3.63757186128454, "grad_norm": 0.13011334836483002, "learning_rate": 1.81818013228658e-05, "loss": 0.008, "step": 117690 }, { "epoch": 3.638499103665698, "grad_norm": 0.1285460740327835, "learning_rate": 1.818133770167522e-05, "loss": 0.009, "step": 117720 }, { "epoch": 3.6394263460468568, "grad_norm": 0.14884209632873535, "learning_rate": 1.818087408048464e-05, "loss": 0.0082, "step": 117750 }, { "epoch": 3.640353588428015, "grad_norm": 0.10961679369211197, "learning_rate": 1.818041045929406e-05, "loss": 0.0081, "step": 117780 }, { "epoch": 3.6412808308091735, "grad_norm": 0.18532636761665344, "learning_rate": 1.8179946838103483e-05, "loss": 0.0079, "step": 117810 }, { "epoch": 3.642208073190332, "grad_norm": 0.14264056086540222, "learning_rate": 1.8179483216912904e-05, "loss": 0.0084, "step": 117840 }, { "epoch": 3.6431353155714903, "grad_norm": 0.1038099080324173, "learning_rate": 1.8179019595722323e-05, "loss": 0.008, "step": 117870 }, { "epoch": 3.644062557952649, "grad_norm": 0.1748819500207901, "learning_rate": 1.8178555974531744e-05, "loss": 0.0074, "step": 117900 }, { "epoch": 3.644989800333807, "grad_norm": 0.10423043370246887, "learning_rate": 1.8178092353341166e-05, "loss": 0.0086, "step": 117930 }, { "epoch": 3.6459170427149656, "grad_norm": 0.13197460770606995, "learning_rate": 1.8177628732150584e-05, "loss": 0.008, "step": 117960 }, { "epoch": 3.6468442850961242, "grad_norm": 0.12784293293952942, "learning_rate": 1.8177165110960005e-05, "loss": 0.0079, "step": 117990 }, { "epoch": 3.647771527477283, "grad_norm": 0.16531071066856384, "learning_rate": 1.8176701489769427e-05, "loss": 0.0085, "step": 118020 }, { "epoch": 3.648698769858441, "grad_norm": 0.10807972401380539, "learning_rate": 1.8176237868578848e-05, "loss": 0.0081, "step": 118050 }, { "epoch": 3.6496260122395996, "grad_norm": 0.12962916493415833, "learning_rate": 1.817577424738827e-05, "loss": 0.008, "step": 118080 }, { "epoch": 3.6505532546207577, "grad_norm": 0.1056046113371849, "learning_rate": 1.8175310626197688e-05, "loss": 0.0082, "step": 118110 }, { "epoch": 3.6514804970019163, "grad_norm": 0.10113202035427094, "learning_rate": 1.817484700500711e-05, "loss": 0.0088, "step": 118140 }, { "epoch": 3.652407739383075, "grad_norm": 0.13094177842140198, "learning_rate": 1.817438338381653e-05, "loss": 0.0083, "step": 118170 }, { "epoch": 3.653334981764233, "grad_norm": 0.14762109518051147, "learning_rate": 1.8173919762625952e-05, "loss": 0.0084, "step": 118200 }, { "epoch": 3.6542622241453917, "grad_norm": 0.12714336812496185, "learning_rate": 1.8173456141435374e-05, "loss": 0.0074, "step": 118230 }, { "epoch": 3.65518946652655, "grad_norm": 0.12227367609739304, "learning_rate": 1.8172992520244795e-05, "loss": 0.0072, "step": 118260 }, { "epoch": 3.6561167089077085, "grad_norm": 0.16235800087451935, "learning_rate": 1.8172528899054213e-05, "loss": 0.008, "step": 118290 }, { "epoch": 3.657043951288867, "grad_norm": 0.13146018981933594, "learning_rate": 1.8172065277863635e-05, "loss": 0.0083, "step": 118320 }, { "epoch": 3.657971193670025, "grad_norm": 0.11970271170139313, "learning_rate": 1.8171601656673056e-05, "loss": 0.0079, "step": 118350 }, { "epoch": 3.658898436051184, "grad_norm": 0.1392654925584793, "learning_rate": 1.8171138035482475e-05, "loss": 0.0082, "step": 118380 }, { "epoch": 3.659825678432342, "grad_norm": 0.2053743451833725, "learning_rate": 1.81706744142919e-05, "loss": 0.0086, "step": 118410 }, { "epoch": 3.6607529208135006, "grad_norm": 0.12643122673034668, "learning_rate": 1.8170210793101318e-05, "loss": 0.0078, "step": 118440 }, { "epoch": 3.661680163194659, "grad_norm": 0.10458928346633911, "learning_rate": 1.816974717191074e-05, "loss": 0.0082, "step": 118470 }, { "epoch": 3.6626074055758178, "grad_norm": 0.1176496148109436, "learning_rate": 1.816928355072016e-05, "loss": 0.0077, "step": 118500 }, { "epoch": 3.663534647956976, "grad_norm": 0.14647363126277924, "learning_rate": 1.816881992952958e-05, "loss": 0.0076, "step": 118530 }, { "epoch": 3.6644618903381345, "grad_norm": 0.16313272714614868, "learning_rate": 1.8168356308339e-05, "loss": 0.0078, "step": 118560 }, { "epoch": 3.6653891327192927, "grad_norm": 0.14535175263881683, "learning_rate": 1.8167892687148422e-05, "loss": 0.0075, "step": 118590 }, { "epoch": 3.6663163751004513, "grad_norm": 0.11908460408449173, "learning_rate": 1.8167429065957843e-05, "loss": 0.0076, "step": 118620 }, { "epoch": 3.66724361748161, "grad_norm": 0.09485573321580887, "learning_rate": 1.8166965444767265e-05, "loss": 0.0083, "step": 118650 }, { "epoch": 3.668170859862768, "grad_norm": 0.10076744854450226, "learning_rate": 1.8166501823576686e-05, "loss": 0.0087, "step": 118680 }, { "epoch": 3.6690981022439266, "grad_norm": 0.12435971945524216, "learning_rate": 1.8166038202386104e-05, "loss": 0.0076, "step": 118710 }, { "epoch": 3.670025344625085, "grad_norm": 0.1309240460395813, "learning_rate": 1.8165574581195526e-05, "loss": 0.0067, "step": 118740 }, { "epoch": 3.6709525870062434, "grad_norm": 0.1351226419210434, "learning_rate": 1.8165110960004947e-05, "loss": 0.0086, "step": 118770 }, { "epoch": 3.671879829387402, "grad_norm": 0.12273991852998734, "learning_rate": 1.816464733881437e-05, "loss": 0.0082, "step": 118800 }, { "epoch": 3.67280707176856, "grad_norm": 0.09458436071872711, "learning_rate": 1.816418371762379e-05, "loss": 0.0088, "step": 118830 }, { "epoch": 3.6737343141497187, "grad_norm": 0.14882884919643402, "learning_rate": 1.816372009643321e-05, "loss": 0.0086, "step": 118860 }, { "epoch": 3.6746615565308773, "grad_norm": 0.13135308027267456, "learning_rate": 1.816325647524263e-05, "loss": 0.0076, "step": 118890 }, { "epoch": 3.6755887989120355, "grad_norm": 0.16879390180110931, "learning_rate": 1.816279285405205e-05, "loss": 0.0083, "step": 118920 }, { "epoch": 3.676516041293194, "grad_norm": 0.07361263036727905, "learning_rate": 1.816232923286147e-05, "loss": 0.0083, "step": 118950 }, { "epoch": 3.6774432836743527, "grad_norm": 0.12805874645709991, "learning_rate": 1.816186561167089e-05, "loss": 0.0078, "step": 118980 }, { "epoch": 3.678370526055511, "grad_norm": 0.13695384562015533, "learning_rate": 1.8161401990480313e-05, "loss": 0.0086, "step": 119010 }, { "epoch": 3.6792977684366694, "grad_norm": 0.15153110027313232, "learning_rate": 1.8160938369289734e-05, "loss": 0.008, "step": 119040 }, { "epoch": 3.6802250108178276, "grad_norm": 0.08139652013778687, "learning_rate": 1.8160474748099156e-05, "loss": 0.0072, "step": 119070 }, { "epoch": 3.681152253198986, "grad_norm": 0.0852152556180954, "learning_rate": 1.8160011126908574e-05, "loss": 0.0083, "step": 119100 }, { "epoch": 3.682079495580145, "grad_norm": 0.11527495086193085, "learning_rate": 1.8159547505717995e-05, "loss": 0.0093, "step": 119130 }, { "epoch": 3.683006737961303, "grad_norm": 0.13175126910209656, "learning_rate": 1.8159083884527417e-05, "loss": 0.0077, "step": 119160 }, { "epoch": 3.6839339803424616, "grad_norm": 0.12826766073703766, "learning_rate": 1.815862026333684e-05, "loss": 0.0085, "step": 119190 }, { "epoch": 3.6848612227236197, "grad_norm": 0.10989135503768921, "learning_rate": 1.815815664214626e-05, "loss": 0.0082, "step": 119220 }, { "epoch": 3.6857884651047783, "grad_norm": 0.14646069705486298, "learning_rate": 1.815769302095568e-05, "loss": 0.0079, "step": 119250 }, { "epoch": 3.686715707485937, "grad_norm": 0.12946489453315735, "learning_rate": 1.81572293997651e-05, "loss": 0.0088, "step": 119280 }, { "epoch": 3.6876429498670955, "grad_norm": 0.10503479838371277, "learning_rate": 1.815676577857452e-05, "loss": 0.0072, "step": 119310 }, { "epoch": 3.6885701922482537, "grad_norm": 0.16430848836898804, "learning_rate": 1.8156302157383942e-05, "loss": 0.0078, "step": 119340 }, { "epoch": 3.6894974346294123, "grad_norm": 0.13162539899349213, "learning_rate": 1.815583853619336e-05, "loss": 0.008, "step": 119370 }, { "epoch": 3.6904246770105704, "grad_norm": 0.17879866063594818, "learning_rate": 1.8155374915002782e-05, "loss": 0.0081, "step": 119400 }, { "epoch": 3.691351919391729, "grad_norm": 0.12945307791233063, "learning_rate": 1.8154911293812204e-05, "loss": 0.0084, "step": 119430 }, { "epoch": 3.6922791617728876, "grad_norm": 0.16465257108211517, "learning_rate": 1.8154447672621625e-05, "loss": 0.0081, "step": 119460 }, { "epoch": 3.6932064041540458, "grad_norm": 0.08418288826942444, "learning_rate": 1.8153984051431047e-05, "loss": 0.007, "step": 119490 }, { "epoch": 3.6941336465352044, "grad_norm": 0.1507236659526825, "learning_rate": 1.8153520430240465e-05, "loss": 0.0079, "step": 119520 }, { "epoch": 3.6950608889163625, "grad_norm": 0.143387109041214, "learning_rate": 1.8153056809049886e-05, "loss": 0.008, "step": 119550 }, { "epoch": 3.695988131297521, "grad_norm": 0.17465659976005554, "learning_rate": 1.8152593187859308e-05, "loss": 0.0081, "step": 119580 }, { "epoch": 3.6969153736786797, "grad_norm": 0.1423025131225586, "learning_rate": 1.815212956666873e-05, "loss": 0.0082, "step": 119610 }, { "epoch": 3.697842616059838, "grad_norm": 0.13425639271736145, "learning_rate": 1.815166594547815e-05, "loss": 0.0077, "step": 119640 }, { "epoch": 3.6987698584409965, "grad_norm": 0.1762930154800415, "learning_rate": 1.8151202324287572e-05, "loss": 0.008, "step": 119670 }, { "epoch": 3.6996971008221546, "grad_norm": 0.079365074634552, "learning_rate": 1.815073870309699e-05, "loss": 0.0086, "step": 119700 }, { "epoch": 3.7006243432033132, "grad_norm": 0.10577601939439774, "learning_rate": 1.8150275081906412e-05, "loss": 0.0087, "step": 119730 }, { "epoch": 3.701551585584472, "grad_norm": 0.15350401401519775, "learning_rate": 1.814981146071583e-05, "loss": 0.0084, "step": 119760 }, { "epoch": 3.7024788279656304, "grad_norm": 0.09126746654510498, "learning_rate": 1.814934783952525e-05, "loss": 0.0082, "step": 119790 }, { "epoch": 3.7034060703467886, "grad_norm": 0.1540314108133316, "learning_rate": 1.8148884218334676e-05, "loss": 0.0074, "step": 119820 }, { "epoch": 3.704333312727947, "grad_norm": 0.12784618139266968, "learning_rate": 1.8148420597144095e-05, "loss": 0.0068, "step": 119850 }, { "epoch": 3.7052605551091053, "grad_norm": 0.14070458710193634, "learning_rate": 1.8147956975953516e-05, "loss": 0.0074, "step": 119880 }, { "epoch": 3.706187797490264, "grad_norm": 0.1251015067100525, "learning_rate": 1.814752426284231e-05, "loss": 0.0088, "step": 119910 }, { "epoch": 3.7071150398714225, "grad_norm": 0.10169024020433426, "learning_rate": 1.8147060641651732e-05, "loss": 0.0074, "step": 119940 }, { "epoch": 3.7080422822525807, "grad_norm": 0.08187046647071838, "learning_rate": 1.814659702046115e-05, "loss": 0.009, "step": 119970 }, { "epoch": 3.7089695246337393, "grad_norm": 0.0944724753499031, "learning_rate": 1.814613339927057e-05, "loss": 0.0079, "step": 120000 }, { "epoch": 3.7098967670148975, "grad_norm": 0.1349627822637558, "learning_rate": 1.814566977807999e-05, "loss": 0.0084, "step": 120030 }, { "epoch": 3.710824009396056, "grad_norm": 0.13565164804458618, "learning_rate": 1.814520615688941e-05, "loss": 0.0072, "step": 120060 }, { "epoch": 3.7117512517772147, "grad_norm": 0.1504800021648407, "learning_rate": 1.8144742535698833e-05, "loss": 0.008, "step": 120090 }, { "epoch": 3.7126784941583733, "grad_norm": 0.10205909609794617, "learning_rate": 1.8144278914508254e-05, "loss": 0.0084, "step": 120120 }, { "epoch": 3.7136057365395314, "grad_norm": 0.20826736092567444, "learning_rate": 1.8143815293317676e-05, "loss": 0.0089, "step": 120150 }, { "epoch": 3.71453297892069, "grad_norm": 0.1417473554611206, "learning_rate": 1.8143351672127097e-05, "loss": 0.0089, "step": 120180 }, { "epoch": 3.715460221301848, "grad_norm": 0.16649433970451355, "learning_rate": 1.8142888050936515e-05, "loss": 0.008, "step": 120210 }, { "epoch": 3.7163874636830068, "grad_norm": 0.15490631759166718, "learning_rate": 1.8142424429745937e-05, "loss": 0.0076, "step": 120240 }, { "epoch": 3.7173147060641654, "grad_norm": 0.1671963483095169, "learning_rate": 1.8141960808555358e-05, "loss": 0.0078, "step": 120270 }, { "epoch": 3.7182419484453235, "grad_norm": 0.11454898864030838, "learning_rate": 1.814149718736478e-05, "loss": 0.0083, "step": 120300 }, { "epoch": 3.719169190826482, "grad_norm": 0.13125605881214142, "learning_rate": 1.81410335661742e-05, "loss": 0.0084, "step": 120330 }, { "epoch": 3.7200964332076403, "grad_norm": 0.11667096614837646, "learning_rate": 1.814056994498362e-05, "loss": 0.0078, "step": 120360 }, { "epoch": 3.721023675588799, "grad_norm": 0.1759253889322281, "learning_rate": 1.814010632379304e-05, "loss": 0.008, "step": 120390 }, { "epoch": 3.7219509179699575, "grad_norm": 0.15758271515369415, "learning_rate": 1.8139642702602462e-05, "loss": 0.0078, "step": 120420 }, { "epoch": 3.7228781603511156, "grad_norm": 0.12792794406414032, "learning_rate": 1.813917908141188e-05, "loss": 0.008, "step": 120450 }, { "epoch": 3.7238054027322742, "grad_norm": 0.14673930406570435, "learning_rate": 1.8138715460221302e-05, "loss": 0.0078, "step": 120480 }, { "epoch": 3.7247326451134324, "grad_norm": 0.1276085078716278, "learning_rate": 1.8138251839030724e-05, "loss": 0.0084, "step": 120510 }, { "epoch": 3.725659887494591, "grad_norm": 0.11245516687631607, "learning_rate": 1.8137788217840145e-05, "loss": 0.0076, "step": 120540 }, { "epoch": 3.7265871298757496, "grad_norm": 0.0922490805387497, "learning_rate": 1.8137324596649567e-05, "loss": 0.0083, "step": 120570 }, { "epoch": 3.727514372256908, "grad_norm": 0.12439174950122833, "learning_rate": 1.8136860975458988e-05, "loss": 0.0082, "step": 120600 }, { "epoch": 3.7284416146380663, "grad_norm": 0.1968047320842743, "learning_rate": 1.8136397354268406e-05, "loss": 0.0084, "step": 120630 }, { "epoch": 3.729368857019225, "grad_norm": 0.13048365712165833, "learning_rate": 1.8135933733077828e-05, "loss": 0.0077, "step": 120660 }, { "epoch": 3.730296099400383, "grad_norm": 0.17642123997211456, "learning_rate": 1.813547011188725e-05, "loss": 0.0085, "step": 120690 }, { "epoch": 3.7312233417815417, "grad_norm": 0.1032821461558342, "learning_rate": 1.813500649069667e-05, "loss": 0.0083, "step": 120720 }, { "epoch": 3.7321505841627003, "grad_norm": 0.12553110718727112, "learning_rate": 1.8134542869506092e-05, "loss": 0.0076, "step": 120750 }, { "epoch": 3.7330778265438584, "grad_norm": 0.16814449429512024, "learning_rate": 1.813407924831551e-05, "loss": 0.0086, "step": 120780 }, { "epoch": 3.734005068925017, "grad_norm": 0.0956915020942688, "learning_rate": 1.8133615627124932e-05, "loss": 0.0073, "step": 120810 }, { "epoch": 3.734932311306175, "grad_norm": 0.13960425555706024, "learning_rate": 1.8133152005934353e-05, "loss": 0.008, "step": 120840 }, { "epoch": 3.735859553687334, "grad_norm": 0.16865208745002747, "learning_rate": 1.813268838474377e-05, "loss": 0.0072, "step": 120870 }, { "epoch": 3.7367867960684924, "grad_norm": 0.1297546774148941, "learning_rate": 1.8132224763553193e-05, "loss": 0.0083, "step": 120900 }, { "epoch": 3.737714038449651, "grad_norm": 0.13086503744125366, "learning_rate": 1.8131761142362614e-05, "loss": 0.008, "step": 120930 }, { "epoch": 3.738641280830809, "grad_norm": 0.1300152689218521, "learning_rate": 1.8131297521172036e-05, "loss": 0.0082, "step": 120960 }, { "epoch": 3.7395685232119678, "grad_norm": 0.08579818159341812, "learning_rate": 1.8130833899981457e-05, "loss": 0.0081, "step": 120990 }, { "epoch": 3.740495765593126, "grad_norm": 0.1801765114068985, "learning_rate": 1.8130370278790876e-05, "loss": 0.0084, "step": 121020 }, { "epoch": 3.7414230079742845, "grad_norm": 0.12828285992145538, "learning_rate": 1.8129906657600297e-05, "loss": 0.0082, "step": 121050 }, { "epoch": 3.742350250355443, "grad_norm": 0.12217877805233002, "learning_rate": 1.8129458490449405e-05, "loss": 0.0078, "step": 121080 }, { "epoch": 3.7432774927366013, "grad_norm": 0.14113907516002655, "learning_rate": 1.8128994869258823e-05, "loss": 0.0084, "step": 121110 }, { "epoch": 3.74420473511776, "grad_norm": 0.09513013064861298, "learning_rate": 1.8128531248068245e-05, "loss": 0.008, "step": 121140 }, { "epoch": 3.745131977498918, "grad_norm": 0.1805957704782486, "learning_rate": 1.812806762687767e-05, "loss": 0.0078, "step": 121170 }, { "epoch": 3.7460592198800766, "grad_norm": 0.13526876270771027, "learning_rate": 1.8127604005687088e-05, "loss": 0.008, "step": 121200 }, { "epoch": 3.746986462261235, "grad_norm": 0.12814433872699738, "learning_rate": 1.812714038449651e-05, "loss": 0.0075, "step": 121230 }, { "epoch": 3.7479137046423934, "grad_norm": 0.1279151886701584, "learning_rate": 1.812667676330593e-05, "loss": 0.0081, "step": 121260 }, { "epoch": 3.748840947023552, "grad_norm": 0.1286272257566452, "learning_rate": 1.812621314211535e-05, "loss": 0.0084, "step": 121290 }, { "epoch": 3.74976818940471, "grad_norm": 0.10865768045186996, "learning_rate": 1.812574952092477e-05, "loss": 0.0083, "step": 121320 }, { "epoch": 3.7506954317858687, "grad_norm": 0.11769051849842072, "learning_rate": 1.8125285899734192e-05, "loss": 0.0084, "step": 121350 }, { "epoch": 3.7516226741670273, "grad_norm": 0.11551350355148315, "learning_rate": 1.8124822278543613e-05, "loss": 0.0074, "step": 121380 }, { "epoch": 3.752549916548186, "grad_norm": 0.12981387972831726, "learning_rate": 1.8124358657353035e-05, "loss": 0.0076, "step": 121410 }, { "epoch": 3.753477158929344, "grad_norm": 0.18964019417762756, "learning_rate": 1.8123895036162453e-05, "loss": 0.0079, "step": 121440 }, { "epoch": 3.7544044013105027, "grad_norm": 0.10663186758756638, "learning_rate": 1.8123431414971874e-05, "loss": 0.0083, "step": 121470 }, { "epoch": 3.755331643691661, "grad_norm": 0.14633168280124664, "learning_rate": 1.8122967793781296e-05, "loss": 0.0075, "step": 121500 }, { "epoch": 3.7562588860728194, "grad_norm": 0.11641968786716461, "learning_rate": 1.8122504172590717e-05, "loss": 0.0086, "step": 121530 }, { "epoch": 3.757186128453978, "grad_norm": 0.11757657676935196, "learning_rate": 1.812204055140014e-05, "loss": 0.0084, "step": 121560 }, { "epoch": 3.758113370835136, "grad_norm": 0.2098539173603058, "learning_rate": 1.812157693020956e-05, "loss": 0.0085, "step": 121590 }, { "epoch": 3.759040613216295, "grad_norm": 0.1152629628777504, "learning_rate": 1.812111330901898e-05, "loss": 0.0077, "step": 121620 }, { "epoch": 3.759967855597453, "grad_norm": 0.12581691145896912, "learning_rate": 1.81206496878284e-05, "loss": 0.0076, "step": 121650 }, { "epoch": 3.7608950979786115, "grad_norm": 0.11009199917316437, "learning_rate": 1.812018606663782e-05, "loss": 0.0076, "step": 121680 }, { "epoch": 3.76182234035977, "grad_norm": 0.13507094979286194, "learning_rate": 1.811972244544724e-05, "loss": 0.0082, "step": 121710 }, { "epoch": 3.7627495827409287, "grad_norm": 0.1510290801525116, "learning_rate": 1.811925882425666e-05, "loss": 0.0084, "step": 121740 }, { "epoch": 3.763676825122087, "grad_norm": 0.14034785330295563, "learning_rate": 1.8118795203066083e-05, "loss": 0.0076, "step": 121770 }, { "epoch": 3.7646040675032455, "grad_norm": 0.10778360813856125, "learning_rate": 1.8118331581875504e-05, "loss": 0.0075, "step": 121800 }, { "epoch": 3.7655313098844037, "grad_norm": 0.1437978595495224, "learning_rate": 1.8117867960684926e-05, "loss": 0.0076, "step": 121830 }, { "epoch": 3.7664585522655623, "grad_norm": 0.1663195937871933, "learning_rate": 1.8117404339494344e-05, "loss": 0.0085, "step": 121860 }, { "epoch": 3.767385794646721, "grad_norm": 1.241632103919983, "learning_rate": 1.8116940718303765e-05, "loss": 0.0077, "step": 121890 }, { "epoch": 3.768313037027879, "grad_norm": 0.14764447510242462, "learning_rate": 1.8116477097113187e-05, "loss": 0.0088, "step": 121920 }, { "epoch": 3.7692402794090376, "grad_norm": 0.1394069343805313, "learning_rate": 1.811601347592261e-05, "loss": 0.0085, "step": 121950 }, { "epoch": 3.7701675217901958, "grad_norm": 0.0824093222618103, "learning_rate": 1.811554985473203e-05, "loss": 0.0078, "step": 121980 }, { "epoch": 3.7710947641713544, "grad_norm": 0.13641512393951416, "learning_rate": 1.811508623354145e-05, "loss": 0.0079, "step": 122010 }, { "epoch": 3.772022006552513, "grad_norm": 0.12313011288642883, "learning_rate": 1.811462261235087e-05, "loss": 0.0078, "step": 122040 }, { "epoch": 3.772949248933671, "grad_norm": 0.15797407925128937, "learning_rate": 1.811415899116029e-05, "loss": 0.0086, "step": 122070 }, { "epoch": 3.7738764913148297, "grad_norm": 0.08051083236932755, "learning_rate": 1.8113695369969712e-05, "loss": 0.0087, "step": 122100 }, { "epoch": 3.774803733695988, "grad_norm": 0.15782231092453003, "learning_rate": 1.811323174877913e-05, "loss": 0.0083, "step": 122130 }, { "epoch": 3.7757309760771465, "grad_norm": 0.07120130211114883, "learning_rate": 1.8112768127588552e-05, "loss": 0.0076, "step": 122160 }, { "epoch": 3.776658218458305, "grad_norm": 0.10544900596141815, "learning_rate": 1.8112304506397974e-05, "loss": 0.0078, "step": 122190 }, { "epoch": 3.7775854608394637, "grad_norm": 0.11483177542686462, "learning_rate": 1.8111840885207395e-05, "loss": 0.0078, "step": 122220 }, { "epoch": 3.778512703220622, "grad_norm": 0.11517966538667679, "learning_rate": 1.8111377264016817e-05, "loss": 0.0084, "step": 122250 }, { "epoch": 3.7794399456017804, "grad_norm": 0.12113375216722488, "learning_rate": 1.8110913642826235e-05, "loss": 0.0071, "step": 122280 }, { "epoch": 3.7803671879829386, "grad_norm": 0.12127669155597687, "learning_rate": 1.8110450021635656e-05, "loss": 0.0081, "step": 122310 }, { "epoch": 3.781294430364097, "grad_norm": 0.10540524870157242, "learning_rate": 1.8109986400445078e-05, "loss": 0.008, "step": 122340 }, { "epoch": 3.782221672745256, "grad_norm": 0.14437782764434814, "learning_rate": 1.81095227792545e-05, "loss": 0.0081, "step": 122370 }, { "epoch": 3.783148915126414, "grad_norm": 0.1388515830039978, "learning_rate": 1.810905915806392e-05, "loss": 0.008, "step": 122400 }, { "epoch": 3.7840761575075725, "grad_norm": 0.1440693587064743, "learning_rate": 1.8108595536873342e-05, "loss": 0.0083, "step": 122430 }, { "epoch": 3.7850033998887307, "grad_norm": 0.12575878202915192, "learning_rate": 1.810813191568276e-05, "loss": 0.0079, "step": 122460 }, { "epoch": 3.7859306422698893, "grad_norm": 0.10180613398551941, "learning_rate": 1.8107668294492182e-05, "loss": 0.0076, "step": 122490 }, { "epoch": 3.786857884651048, "grad_norm": 0.17967310547828674, "learning_rate": 1.81072046733016e-05, "loss": 0.0082, "step": 122520 }, { "epoch": 3.7877851270322065, "grad_norm": 0.12969709932804108, "learning_rate": 1.810674105211102e-05, "loss": 0.0078, "step": 122550 }, { "epoch": 3.7887123694133646, "grad_norm": 0.1011432632803917, "learning_rate": 1.8106277430920446e-05, "loss": 0.0085, "step": 122580 }, { "epoch": 3.7896396117945232, "grad_norm": 0.11030440777540207, "learning_rate": 1.8105813809729865e-05, "loss": 0.0076, "step": 122610 }, { "epoch": 3.7905668541756814, "grad_norm": 0.10401671379804611, "learning_rate": 1.8105350188539286e-05, "loss": 0.0072, "step": 122640 }, { "epoch": 3.79149409655684, "grad_norm": 0.14586728811264038, "learning_rate": 1.8104886567348708e-05, "loss": 0.0085, "step": 122670 }, { "epoch": 3.7924213389379986, "grad_norm": 0.12596186995506287, "learning_rate": 1.8104422946158126e-05, "loss": 0.0077, "step": 122700 }, { "epoch": 3.7933485813191568, "grad_norm": 0.11973842978477478, "learning_rate": 1.8103959324967547e-05, "loss": 0.0079, "step": 122730 }, { "epoch": 3.7942758237003154, "grad_norm": 0.1500464379787445, "learning_rate": 1.810349570377697e-05, "loss": 0.0089, "step": 122760 }, { "epoch": 3.7952030660814735, "grad_norm": 0.1621190458536148, "learning_rate": 1.810303208258639e-05, "loss": 0.0081, "step": 122790 }, { "epoch": 3.796130308462632, "grad_norm": 0.10114305466413498, "learning_rate": 1.810256846139581e-05, "loss": 0.0084, "step": 122820 }, { "epoch": 3.7970575508437907, "grad_norm": 0.11614688485860825, "learning_rate": 1.810210484020523e-05, "loss": 0.0078, "step": 122850 }, { "epoch": 3.797984793224949, "grad_norm": 0.13171876966953278, "learning_rate": 1.810164121901465e-05, "loss": 0.0072, "step": 122880 }, { "epoch": 3.7989120356061075, "grad_norm": 0.1516057699918747, "learning_rate": 1.8101177597824073e-05, "loss": 0.0081, "step": 122910 }, { "epoch": 3.7998392779872656, "grad_norm": 0.09468051791191101, "learning_rate": 1.8100713976633494e-05, "loss": 0.0078, "step": 122940 }, { "epoch": 3.800766520368424, "grad_norm": 0.10290413349866867, "learning_rate": 1.8100250355442916e-05, "loss": 0.0075, "step": 122970 }, { "epoch": 3.801693762749583, "grad_norm": 0.11771654337644577, "learning_rate": 1.8099786734252337e-05, "loss": 0.0077, "step": 123000 }, { "epoch": 3.8026210051307414, "grad_norm": 0.10625947266817093, "learning_rate": 1.8099323113061755e-05, "loss": 0.0084, "step": 123030 }, { "epoch": 3.8035482475118996, "grad_norm": 0.12502749264240265, "learning_rate": 1.8098859491871177e-05, "loss": 0.0079, "step": 123060 }, { "epoch": 3.804475489893058, "grad_norm": 0.11785713583230972, "learning_rate": 1.80983958706806e-05, "loss": 0.0079, "step": 123090 }, { "epoch": 3.8054027322742163, "grad_norm": 0.117913156747818, "learning_rate": 1.8097932249490017e-05, "loss": 0.0082, "step": 123120 }, { "epoch": 3.806329974655375, "grad_norm": 0.1882917582988739, "learning_rate": 1.8097468628299438e-05, "loss": 0.0083, "step": 123150 }, { "epoch": 3.8072572170365335, "grad_norm": 0.11626938730478287, "learning_rate": 1.809700500710886e-05, "loss": 0.0076, "step": 123180 }, { "epoch": 3.8081844594176917, "grad_norm": 0.19319774210453033, "learning_rate": 1.809654138591828e-05, "loss": 0.0075, "step": 123210 }, { "epoch": 3.8091117017988503, "grad_norm": 0.15898728370666504, "learning_rate": 1.8096077764727703e-05, "loss": 0.0078, "step": 123240 }, { "epoch": 3.8100389441800084, "grad_norm": 0.12441060692071915, "learning_rate": 1.809561414353712e-05, "loss": 0.0074, "step": 123270 }, { "epoch": 3.810966186561167, "grad_norm": 0.13664312660694122, "learning_rate": 1.8095150522346542e-05, "loss": 0.0078, "step": 123300 }, { "epoch": 3.8118934289423256, "grad_norm": 0.10670898854732513, "learning_rate": 1.8094686901155964e-05, "loss": 0.0082, "step": 123330 }, { "epoch": 3.8128206713234842, "grad_norm": 0.11809548735618591, "learning_rate": 1.8094223279965385e-05, "loss": 0.0082, "step": 123360 }, { "epoch": 3.8137479137046424, "grad_norm": 0.07844429463148117, "learning_rate": 1.8093759658774807e-05, "loss": 0.0078, "step": 123390 }, { "epoch": 3.814675156085801, "grad_norm": 0.1561068892478943, "learning_rate": 1.8093296037584228e-05, "loss": 0.0076, "step": 123420 }, { "epoch": 3.815602398466959, "grad_norm": 0.1462811380624771, "learning_rate": 1.8092832416393646e-05, "loss": 0.0082, "step": 123450 }, { "epoch": 3.8165296408481177, "grad_norm": 0.13831305503845215, "learning_rate": 1.8092368795203068e-05, "loss": 0.0074, "step": 123480 }, { "epoch": 3.8174568832292763, "grad_norm": 0.08000694960355759, "learning_rate": 1.8091905174012486e-05, "loss": 0.0074, "step": 123510 }, { "epoch": 3.8183841256104345, "grad_norm": 0.24763788282871246, "learning_rate": 1.8091441552821907e-05, "loss": 0.0079, "step": 123540 }, { "epoch": 3.819311367991593, "grad_norm": 0.13641653954982758, "learning_rate": 1.809097793163133e-05, "loss": 0.0089, "step": 123570 }, { "epoch": 3.8202386103727513, "grad_norm": 0.17702771723270416, "learning_rate": 1.809051431044075e-05, "loss": 0.0083, "step": 123600 }, { "epoch": 3.82116585275391, "grad_norm": 0.10862215608358383, "learning_rate": 1.8090050689250172e-05, "loss": 0.0078, "step": 123630 }, { "epoch": 3.8220930951350685, "grad_norm": 0.08993857353925705, "learning_rate": 1.8089587068059594e-05, "loss": 0.0081, "step": 123660 }, { "epoch": 3.8230203375162266, "grad_norm": 0.13338683545589447, "learning_rate": 1.808912344686901e-05, "loss": 0.0081, "step": 123690 }, { "epoch": 3.823947579897385, "grad_norm": 0.1492851972579956, "learning_rate": 1.8088659825678433e-05, "loss": 0.0075, "step": 123720 }, { "epoch": 3.8248748222785434, "grad_norm": 0.07954173535108566, "learning_rate": 1.8088196204487855e-05, "loss": 0.0082, "step": 123750 }, { "epoch": 3.825802064659702, "grad_norm": 0.1471700668334961, "learning_rate": 1.8087732583297276e-05, "loss": 0.0082, "step": 123780 }, { "epoch": 3.8267293070408606, "grad_norm": 0.09938060492277145, "learning_rate": 1.8087268962106698e-05, "loss": 0.0081, "step": 123810 }, { "epoch": 3.827656549422019, "grad_norm": 0.09775839000940323, "learning_rate": 1.8086805340916116e-05, "loss": 0.0076, "step": 123840 }, { "epoch": 3.8285837918031773, "grad_norm": 0.11986198276281357, "learning_rate": 1.8086341719725537e-05, "loss": 0.0077, "step": 123870 }, { "epoch": 3.829511034184336, "grad_norm": 0.09924204647541046, "learning_rate": 1.808587809853496e-05, "loss": 0.0078, "step": 123900 }, { "epoch": 3.830438276565494, "grad_norm": 0.08894223719835281, "learning_rate": 1.8085414477344377e-05, "loss": 0.0083, "step": 123930 }, { "epoch": 3.8313655189466527, "grad_norm": 0.13797789812088013, "learning_rate": 1.80849508561538e-05, "loss": 0.0082, "step": 123960 }, { "epoch": 3.8322927613278113, "grad_norm": 0.11284887045621872, "learning_rate": 1.8084487234963223e-05, "loss": 0.0082, "step": 123990 }, { "epoch": 3.8332200037089694, "grad_norm": 0.12923812866210938, "learning_rate": 1.808402361377264e-05, "loss": 0.0078, "step": 124020 }, { "epoch": 3.834147246090128, "grad_norm": 0.1520715206861496, "learning_rate": 1.8083559992582063e-05, "loss": 0.0082, "step": 124050 }, { "epoch": 3.835074488471286, "grad_norm": 0.11658784747123718, "learning_rate": 1.8083096371391484e-05, "loss": 0.0083, "step": 124080 }, { "epoch": 3.836001730852445, "grad_norm": 0.11594454944133759, "learning_rate": 1.8082632750200903e-05, "loss": 0.0078, "step": 124110 }, { "epoch": 3.8369289732336034, "grad_norm": 0.11815297603607178, "learning_rate": 1.8082169129010324e-05, "loss": 0.0077, "step": 124140 }, { "epoch": 3.8378562156147615, "grad_norm": 0.09091242402791977, "learning_rate": 1.8081705507819746e-05, "loss": 0.0076, "step": 124170 }, { "epoch": 3.83878345799592, "grad_norm": 0.0951206237077713, "learning_rate": 1.8081241886629167e-05, "loss": 0.0078, "step": 124200 }, { "epoch": 3.8397107003770787, "grad_norm": 0.18376436829566956, "learning_rate": 1.808077826543859e-05, "loss": 0.007, "step": 124230 }, { "epoch": 3.840637942758237, "grad_norm": 0.138031005859375, "learning_rate": 1.8080314644248007e-05, "loss": 0.0086, "step": 124260 }, { "epoch": 3.8415651851393955, "grad_norm": 0.10387805849313736, "learning_rate": 1.8079851023057428e-05, "loss": 0.0083, "step": 124290 }, { "epoch": 3.842492427520554, "grad_norm": 0.10865048319101334, "learning_rate": 1.807938740186685e-05, "loss": 0.0082, "step": 124320 }, { "epoch": 3.8434196699017122, "grad_norm": 0.17619608342647552, "learning_rate": 1.807892378067627e-05, "loss": 0.008, "step": 124350 }, { "epoch": 3.844346912282871, "grad_norm": 0.059509050101041794, "learning_rate": 1.8078460159485693e-05, "loss": 0.0077, "step": 124380 }, { "epoch": 3.845274154664029, "grad_norm": 0.1368538737297058, "learning_rate": 1.8077996538295114e-05, "loss": 0.0073, "step": 124410 }, { "epoch": 3.8462013970451876, "grad_norm": 0.15249882638454437, "learning_rate": 1.8077532917104532e-05, "loss": 0.0084, "step": 124440 }, { "epoch": 3.847128639426346, "grad_norm": 0.14453019201755524, "learning_rate": 1.8077069295913954e-05, "loss": 0.0075, "step": 124470 }, { "epoch": 3.8480558818075044, "grad_norm": 0.14370010793209076, "learning_rate": 1.8076605674723372e-05, "loss": 0.0074, "step": 124500 }, { "epoch": 3.848983124188663, "grad_norm": 0.1715235859155655, "learning_rate": 1.8076142053532793e-05, "loss": 0.0077, "step": 124530 }, { "epoch": 3.849910366569821, "grad_norm": 0.13961179554462433, "learning_rate": 1.8075678432342215e-05, "loss": 0.0079, "step": 124560 }, { "epoch": 3.8508376089509797, "grad_norm": 0.14055143296718597, "learning_rate": 1.8075214811151636e-05, "loss": 0.0085, "step": 124590 }, { "epoch": 3.8517648513321383, "grad_norm": 0.14665494859218597, "learning_rate": 1.8074751189961058e-05, "loss": 0.0077, "step": 124620 }, { "epoch": 3.852692093713297, "grad_norm": 0.13661113381385803, "learning_rate": 1.807428756877048e-05, "loss": 0.0075, "step": 124650 }, { "epoch": 3.853619336094455, "grad_norm": 0.08813063055276871, "learning_rate": 1.8073823947579898e-05, "loss": 0.0084, "step": 124680 }, { "epoch": 3.8545465784756137, "grad_norm": 0.08236559480428696, "learning_rate": 1.807336032638932e-05, "loss": 0.0086, "step": 124710 }, { "epoch": 3.855473820856772, "grad_norm": 0.14947554469108582, "learning_rate": 1.807289670519874e-05, "loss": 0.0073, "step": 124740 }, { "epoch": 3.8564010632379304, "grad_norm": 0.14029532670974731, "learning_rate": 1.8072433084008162e-05, "loss": 0.0077, "step": 124770 }, { "epoch": 3.857328305619089, "grad_norm": 0.1497596949338913, "learning_rate": 1.8071969462817584e-05, "loss": 0.0085, "step": 124800 }, { "epoch": 3.858255548000247, "grad_norm": 0.14711116254329681, "learning_rate": 1.8071505841627002e-05, "loss": 0.0079, "step": 124830 }, { "epoch": 3.8591827903814058, "grad_norm": 0.13378460705280304, "learning_rate": 1.8071042220436423e-05, "loss": 0.0076, "step": 124860 }, { "epoch": 3.860110032762564, "grad_norm": 0.12722405791282654, "learning_rate": 1.8070578599245845e-05, "loss": 0.0077, "step": 124890 }, { "epoch": 3.8610372751437225, "grad_norm": 0.10433605313301086, "learning_rate": 1.8070114978055263e-05, "loss": 0.0073, "step": 124920 }, { "epoch": 3.861964517524881, "grad_norm": 0.17289750277996063, "learning_rate": 1.8069651356864684e-05, "loss": 0.0084, "step": 124950 }, { "epoch": 3.8628917599060393, "grad_norm": 0.12118003517389297, "learning_rate": 1.8069187735674106e-05, "loss": 0.0081, "step": 124980 }, { "epoch": 3.863819002287198, "grad_norm": 0.10651051998138428, "learning_rate": 1.8068724114483527e-05, "loss": 0.0079, "step": 125010 }, { "epoch": 3.864746244668356, "grad_norm": 0.10316120833158493, "learning_rate": 1.806826049329295e-05, "loss": 0.0082, "step": 125040 }, { "epoch": 3.8656734870495146, "grad_norm": 0.12989778816699982, "learning_rate": 1.806779687210237e-05, "loss": 0.0085, "step": 125070 }, { "epoch": 3.8666007294306732, "grad_norm": 0.1146363839507103, "learning_rate": 1.806733325091179e-05, "loss": 0.0078, "step": 125100 }, { "epoch": 3.867527971811832, "grad_norm": 0.14515268802642822, "learning_rate": 1.806686962972121e-05, "loss": 0.0082, "step": 125130 }, { "epoch": 3.86845521419299, "grad_norm": 0.09566944092512131, "learning_rate": 1.806640600853063e-05, "loss": 0.0081, "step": 125160 }, { "epoch": 3.8693824565741486, "grad_norm": 0.09817391633987427, "learning_rate": 1.8065942387340053e-05, "loss": 0.0087, "step": 125190 }, { "epoch": 3.8703096989553067, "grad_norm": 0.14104199409484863, "learning_rate": 1.8065478766149475e-05, "loss": 0.0077, "step": 125220 }, { "epoch": 3.8712369413364653, "grad_norm": 0.14005254209041595, "learning_rate": 1.8065015144958893e-05, "loss": 0.0089, "step": 125250 }, { "epoch": 3.872164183717624, "grad_norm": 0.08993400633335114, "learning_rate": 1.8064551523768314e-05, "loss": 0.0077, "step": 125280 }, { "epoch": 3.873091426098782, "grad_norm": 0.09888715296983719, "learning_rate": 1.8064087902577736e-05, "loss": 0.0078, "step": 125310 }, { "epoch": 3.8740186684799407, "grad_norm": 0.10428255051374435, "learning_rate": 1.8063624281387154e-05, "loss": 0.0079, "step": 125340 }, { "epoch": 3.874945910861099, "grad_norm": 0.16898533701896667, "learning_rate": 1.8063160660196575e-05, "loss": 0.008, "step": 125370 }, { "epoch": 3.8758731532422575, "grad_norm": 0.11620552837848663, "learning_rate": 1.8062697039006e-05, "loss": 0.0081, "step": 125400 }, { "epoch": 3.876800395623416, "grad_norm": 0.14223551750183105, "learning_rate": 1.806223341781542e-05, "loss": 0.0081, "step": 125430 }, { "epoch": 3.8777276380045747, "grad_norm": 0.16748668253421783, "learning_rate": 1.806176979662484e-05, "loss": 0.0083, "step": 125460 }, { "epoch": 3.878654880385733, "grad_norm": 0.17086607217788696, "learning_rate": 1.8061306175434258e-05, "loss": 0.0077, "step": 125490 }, { "epoch": 3.8795821227668914, "grad_norm": 0.09484773874282837, "learning_rate": 1.806084255424368e-05, "loss": 0.0083, "step": 125520 }, { "epoch": 3.8805093651480496, "grad_norm": 0.1250600814819336, "learning_rate": 1.80603789330531e-05, "loss": 0.008, "step": 125550 }, { "epoch": 3.881436607529208, "grad_norm": 0.11786874383687973, "learning_rate": 1.8059915311862522e-05, "loss": 0.0075, "step": 125580 }, { "epoch": 3.8823638499103668, "grad_norm": 0.17272323369979858, "learning_rate": 1.8059451690671944e-05, "loss": 0.0075, "step": 125610 }, { "epoch": 3.883291092291525, "grad_norm": 0.0964648425579071, "learning_rate": 1.8058988069481365e-05, "loss": 0.0075, "step": 125640 }, { "epoch": 3.8842183346726835, "grad_norm": 0.10591693222522736, "learning_rate": 1.8058524448290784e-05, "loss": 0.0083, "step": 125670 }, { "epoch": 3.8851455770538417, "grad_norm": 0.10571250319480896, "learning_rate": 1.8058060827100205e-05, "loss": 0.0085, "step": 125700 }, { "epoch": 3.8860728194350003, "grad_norm": 0.11131220310926437, "learning_rate": 1.8057597205909627e-05, "loss": 0.0079, "step": 125730 }, { "epoch": 3.887000061816159, "grad_norm": 0.15054956078529358, "learning_rate": 1.8057133584719048e-05, "loss": 0.0083, "step": 125760 }, { "epoch": 3.887927304197317, "grad_norm": 0.11237400025129318, "learning_rate": 1.805666996352847e-05, "loss": 0.0083, "step": 125790 }, { "epoch": 3.8888545465784756, "grad_norm": 0.1302414834499359, "learning_rate": 1.8056206342337888e-05, "loss": 0.0079, "step": 125820 }, { "epoch": 3.889781788959634, "grad_norm": 0.11004883050918579, "learning_rate": 1.805574272114731e-05, "loss": 0.0082, "step": 125850 }, { "epoch": 3.8907090313407924, "grad_norm": 0.10827922821044922, "learning_rate": 1.805527909995673e-05, "loss": 0.0072, "step": 125880 }, { "epoch": 3.891636273721951, "grad_norm": 0.13674502074718475, "learning_rate": 1.805481547876615e-05, "loss": 0.008, "step": 125910 }, { "epoch": 3.8925635161031096, "grad_norm": 0.1137402132153511, "learning_rate": 1.805435185757557e-05, "loss": 0.0076, "step": 125940 }, { "epoch": 3.8934907584842677, "grad_norm": 0.09919696301221848, "learning_rate": 1.8053888236384992e-05, "loss": 0.0081, "step": 125970 }, { "epoch": 3.8944180008654263, "grad_norm": 0.13721922039985657, "learning_rate": 1.8053424615194413e-05, "loss": 0.0081, "step": 126000 }, { "epoch": 3.8953452432465845, "grad_norm": 0.10136844962835312, "learning_rate": 1.8052960994003835e-05, "loss": 0.0081, "step": 126030 }, { "epoch": 3.896272485627743, "grad_norm": 0.13987553119659424, "learning_rate": 1.8052497372813256e-05, "loss": 0.008, "step": 126060 }, { "epoch": 3.8971997280089017, "grad_norm": 0.10875638574361801, "learning_rate": 1.8052033751622675e-05, "loss": 0.008, "step": 126090 }, { "epoch": 3.89812697039006, "grad_norm": 0.11830208450555801, "learning_rate": 1.8051570130432096e-05, "loss": 0.0077, "step": 126120 }, { "epoch": 3.8990542127712184, "grad_norm": 0.15171046555042267, "learning_rate": 1.8051106509241518e-05, "loss": 0.0077, "step": 126150 }, { "epoch": 3.8999814551523766, "grad_norm": 0.1458015739917755, "learning_rate": 1.805064288805094e-05, "loss": 0.0078, "step": 126180 }, { "epoch": 3.900908697533535, "grad_norm": 0.11642541736364365, "learning_rate": 1.805017926686036e-05, "loss": 0.0079, "step": 126210 }, { "epoch": 3.901835939914694, "grad_norm": 0.12067073583602905, "learning_rate": 1.804971564566978e-05, "loss": 0.0075, "step": 126240 }, { "epoch": 3.9027631822958524, "grad_norm": 0.08724424242973328, "learning_rate": 1.80492520244792e-05, "loss": 0.0074, "step": 126270 }, { "epoch": 3.9036904246770106, "grad_norm": 0.13110609352588654, "learning_rate": 1.804878840328862e-05, "loss": 0.0086, "step": 126300 }, { "epoch": 3.904617667058169, "grad_norm": 0.1256094127893448, "learning_rate": 1.804832478209804e-05, "loss": 0.0073, "step": 126330 }, { "epoch": 3.9055449094393273, "grad_norm": 0.15216928720474243, "learning_rate": 1.804786116090746e-05, "loss": 0.0081, "step": 126360 }, { "epoch": 3.906472151820486, "grad_norm": 0.14511220157146454, "learning_rate": 1.8047397539716883e-05, "loss": 0.0082, "step": 126390 }, { "epoch": 3.9073993942016445, "grad_norm": 0.12660017609596252, "learning_rate": 1.8046933918526304e-05, "loss": 0.0078, "step": 126420 }, { "epoch": 3.9083266365828027, "grad_norm": 0.11905729025602341, "learning_rate": 1.8046470297335726e-05, "loss": 0.007, "step": 126450 }, { "epoch": 3.9092538789639613, "grad_norm": 0.13277572393417358, "learning_rate": 1.8046006676145147e-05, "loss": 0.0072, "step": 126480 }, { "epoch": 3.9101811213451194, "grad_norm": 0.20128217339515686, "learning_rate": 1.8045543054954565e-05, "loss": 0.0089, "step": 126510 }, { "epoch": 3.911108363726278, "grad_norm": 0.11736565083265305, "learning_rate": 1.8045079433763987e-05, "loss": 0.0084, "step": 126540 }, { "epoch": 3.9120356061074366, "grad_norm": 0.11184646934270859, "learning_rate": 1.804461581257341e-05, "loss": 0.0081, "step": 126570 }, { "epoch": 3.9129628484885948, "grad_norm": 0.10971062630414963, "learning_rate": 1.804415219138283e-05, "loss": 0.0079, "step": 126600 }, { "epoch": 3.9138900908697534, "grad_norm": 0.15453393757343292, "learning_rate": 1.804368857019225e-05, "loss": 0.0077, "step": 126630 }, { "epoch": 3.9148173332509115, "grad_norm": 0.1543821543455124, "learning_rate": 1.804322494900167e-05, "loss": 0.0081, "step": 126660 }, { "epoch": 3.91574457563207, "grad_norm": 0.09844567626714706, "learning_rate": 1.804276132781109e-05, "loss": 0.0076, "step": 126690 }, { "epoch": 3.9166718180132287, "grad_norm": 0.1656315177679062, "learning_rate": 1.8042297706620513e-05, "loss": 0.0087, "step": 126720 }, { "epoch": 3.9175990603943873, "grad_norm": 0.12266018241643906, "learning_rate": 1.804183408542993e-05, "loss": 0.0087, "step": 126750 }, { "epoch": 3.9185263027755455, "grad_norm": 0.1599063128232956, "learning_rate": 1.8041370464239352e-05, "loss": 0.0077, "step": 126780 }, { "epoch": 3.919453545156704, "grad_norm": 0.15377844870090485, "learning_rate": 1.8040906843048777e-05, "loss": 0.0081, "step": 126810 }, { "epoch": 3.9203807875378622, "grad_norm": 0.13414689898490906, "learning_rate": 1.8040443221858195e-05, "loss": 0.0077, "step": 126840 }, { "epoch": 3.921308029919021, "grad_norm": 0.15732060372829437, "learning_rate": 1.8039979600667617e-05, "loss": 0.0077, "step": 126870 }, { "epoch": 3.9222352723001794, "grad_norm": 0.1683844029903412, "learning_rate": 1.8039515979477035e-05, "loss": 0.0075, "step": 126900 }, { "epoch": 3.9231625146813376, "grad_norm": 0.12442179769277573, "learning_rate": 1.8039052358286456e-05, "loss": 0.0084, "step": 126930 }, { "epoch": 3.924089757062496, "grad_norm": 0.1231749951839447, "learning_rate": 1.8038588737095878e-05, "loss": 0.0078, "step": 126960 }, { "epoch": 3.9250169994436543, "grad_norm": 0.14916956424713135, "learning_rate": 1.80381251159053e-05, "loss": 0.0079, "step": 126990 }, { "epoch": 3.925944241824813, "grad_norm": 0.1364298164844513, "learning_rate": 1.803766149471472e-05, "loss": 0.0081, "step": 127020 }, { "epoch": 3.9268714842059715, "grad_norm": 0.13185566663742065, "learning_rate": 1.8037197873524142e-05, "loss": 0.0084, "step": 127050 }, { "epoch": 3.92779872658713, "grad_norm": 0.15468363463878632, "learning_rate": 1.803673425233356e-05, "loss": 0.0081, "step": 127080 }, { "epoch": 3.9287259689682883, "grad_norm": 0.08768816292285919, "learning_rate": 1.8036270631142982e-05, "loss": 0.0086, "step": 127110 }, { "epoch": 3.929653211349447, "grad_norm": 0.07887507230043411, "learning_rate": 1.8035807009952404e-05, "loss": 0.008, "step": 127140 }, { "epoch": 3.930580453730605, "grad_norm": 0.14508327841758728, "learning_rate": 1.8035343388761825e-05, "loss": 0.0076, "step": 127170 }, { "epoch": 3.9315076961117637, "grad_norm": 0.14313454926013947, "learning_rate": 1.8034879767571247e-05, "loss": 0.0079, "step": 127200 }, { "epoch": 3.9324349384929222, "grad_norm": 0.15425485372543335, "learning_rate": 1.8034416146380665e-05, "loss": 0.0078, "step": 127230 }, { "epoch": 3.9333621808740804, "grad_norm": 0.1678168773651123, "learning_rate": 1.8033952525190086e-05, "loss": 0.008, "step": 127260 }, { "epoch": 3.934289423255239, "grad_norm": 0.16188593208789825, "learning_rate": 1.8033504358039194e-05, "loss": 0.0081, "step": 127290 }, { "epoch": 3.935216665636397, "grad_norm": 0.155975341796875, "learning_rate": 1.8033040736848612e-05, "loss": 0.0076, "step": 127320 }, { "epoch": 3.9361439080175558, "grad_norm": 0.16268032789230347, "learning_rate": 1.8032577115658034e-05, "loss": 0.0077, "step": 127350 }, { "epoch": 3.9370711503987144, "grad_norm": 0.1300899237394333, "learning_rate": 1.8032113494467455e-05, "loss": 0.0076, "step": 127380 }, { "epoch": 3.9379983927798725, "grad_norm": 0.11954429745674133, "learning_rate": 1.8031649873276877e-05, "loss": 0.0072, "step": 127410 }, { "epoch": 3.938925635161031, "grad_norm": 0.07991782575845718, "learning_rate": 1.8031186252086298e-05, "loss": 0.0079, "step": 127440 }, { "epoch": 3.9398528775421893, "grad_norm": 0.13318853080272675, "learning_rate": 1.803072263089572e-05, "loss": 0.0081, "step": 127470 }, { "epoch": 3.940780119923348, "grad_norm": 0.08670535683631897, "learning_rate": 1.8030259009705138e-05, "loss": 0.0075, "step": 127500 }, { "epoch": 3.9417073623045065, "grad_norm": 0.11157066375017166, "learning_rate": 1.802979538851456e-05, "loss": 0.0078, "step": 127530 }, { "epoch": 3.942634604685665, "grad_norm": 0.11758280545473099, "learning_rate": 1.802933176732398e-05, "loss": 0.0081, "step": 127560 }, { "epoch": 3.9435618470668232, "grad_norm": 0.12076164782047272, "learning_rate": 1.80288681461334e-05, "loss": 0.0082, "step": 127590 }, { "epoch": 3.944489089447982, "grad_norm": 0.12955184280872345, "learning_rate": 1.802840452494282e-05, "loss": 0.0078, "step": 127620 }, { "epoch": 3.94541633182914, "grad_norm": 0.109381765127182, "learning_rate": 1.8027940903752242e-05, "loss": 0.0076, "step": 127650 }, { "epoch": 3.9463435742102986, "grad_norm": 0.15013234317302704, "learning_rate": 1.8027477282561663e-05, "loss": 0.008, "step": 127680 }, { "epoch": 3.947270816591457, "grad_norm": 0.12686511874198914, "learning_rate": 1.8027013661371085e-05, "loss": 0.0078, "step": 127710 }, { "epoch": 3.9481980589726153, "grad_norm": 0.1506604254245758, "learning_rate": 1.8026550040180503e-05, "loss": 0.0081, "step": 127740 }, { "epoch": 3.949125301353774, "grad_norm": 0.1542629599571228, "learning_rate": 1.8026086418989925e-05, "loss": 0.0085, "step": 127770 }, { "epoch": 3.950052543734932, "grad_norm": 0.16435667872428894, "learning_rate": 1.8025622797799346e-05, "loss": 0.0075, "step": 127800 }, { "epoch": 3.9509797861160907, "grad_norm": 0.12235792726278305, "learning_rate": 1.8025159176608768e-05, "loss": 0.0081, "step": 127830 }, { "epoch": 3.9519070284972493, "grad_norm": 0.12119663506746292, "learning_rate": 1.802469555541819e-05, "loss": 0.0071, "step": 127860 }, { "epoch": 3.952834270878408, "grad_norm": 0.1042666807770729, "learning_rate": 1.802423193422761e-05, "loss": 0.0083, "step": 127890 }, { "epoch": 3.953761513259566, "grad_norm": 0.14477424323558807, "learning_rate": 1.802376831303703e-05, "loss": 0.0079, "step": 127920 }, { "epoch": 3.9546887556407246, "grad_norm": 0.15882444381713867, "learning_rate": 1.802330469184645e-05, "loss": 0.0069, "step": 127950 }, { "epoch": 3.955615998021883, "grad_norm": 0.14018367230892181, "learning_rate": 1.802284107065587e-05, "loss": 0.0079, "step": 127980 }, { "epoch": 3.9565432404030414, "grad_norm": 0.10356174409389496, "learning_rate": 1.802237744946529e-05, "loss": 0.007, "step": 128010 }, { "epoch": 3.9574704827842, "grad_norm": 0.08202103525400162, "learning_rate": 1.802191382827471e-05, "loss": 0.0076, "step": 128040 }, { "epoch": 3.958397725165358, "grad_norm": 0.13635534048080444, "learning_rate": 1.8021450207084133e-05, "loss": 0.0078, "step": 128070 }, { "epoch": 3.9593249675465167, "grad_norm": 0.14654995501041412, "learning_rate": 1.8020986585893554e-05, "loss": 0.0082, "step": 128100 }, { "epoch": 3.960252209927675, "grad_norm": 0.09844822436571121, "learning_rate": 1.8020522964702976e-05, "loss": 0.0081, "step": 128130 }, { "epoch": 3.9611794523088335, "grad_norm": 0.14532245695590973, "learning_rate": 1.8020059343512394e-05, "loss": 0.0078, "step": 128160 }, { "epoch": 3.962106694689992, "grad_norm": 0.1334424614906311, "learning_rate": 1.8019595722321816e-05, "loss": 0.0079, "step": 128190 }, { "epoch": 3.9630339370711503, "grad_norm": 0.18222498893737793, "learning_rate": 1.8019132101131237e-05, "loss": 0.0078, "step": 128220 }, { "epoch": 3.963961179452309, "grad_norm": 0.14841046929359436, "learning_rate": 1.801866847994066e-05, "loss": 0.0076, "step": 128250 }, { "epoch": 3.964888421833467, "grad_norm": 0.10789448767900467, "learning_rate": 1.801820485875008e-05, "loss": 0.0077, "step": 128280 }, { "epoch": 3.9658156642146256, "grad_norm": 0.12242002785205841, "learning_rate": 1.8017741237559498e-05, "loss": 0.0081, "step": 128310 }, { "epoch": 3.966742906595784, "grad_norm": 0.1673567295074463, "learning_rate": 1.801727761636892e-05, "loss": 0.009, "step": 128340 }, { "epoch": 3.967670148976943, "grad_norm": 0.12530694901943207, "learning_rate": 1.801681399517834e-05, "loss": 0.007, "step": 128370 }, { "epoch": 3.968597391358101, "grad_norm": 0.13635435700416565, "learning_rate": 1.8016350373987763e-05, "loss": 0.008, "step": 128400 }, { "epoch": 3.9695246337392596, "grad_norm": 0.11833300441503525, "learning_rate": 1.8015886752797184e-05, "loss": 0.0085, "step": 128430 }, { "epoch": 3.9704518761204177, "grad_norm": 0.11583725363016129, "learning_rate": 1.8015423131606606e-05, "loss": 0.0075, "step": 128460 }, { "epoch": 3.9713791185015763, "grad_norm": 0.10750550031661987, "learning_rate": 1.8014959510416024e-05, "loss": 0.0074, "step": 128490 }, { "epoch": 3.972306360882735, "grad_norm": 0.12884779274463654, "learning_rate": 1.8014495889225445e-05, "loss": 0.0079, "step": 128520 }, { "epoch": 3.973233603263893, "grad_norm": 0.1189359724521637, "learning_rate": 1.8014032268034867e-05, "loss": 0.0071, "step": 128550 }, { "epoch": 3.9741608456450517, "grad_norm": 0.10390626639127731, "learning_rate": 1.8013568646844285e-05, "loss": 0.0075, "step": 128580 }, { "epoch": 3.97508808802621, "grad_norm": 0.10967977344989777, "learning_rate": 1.8013105025653706e-05, "loss": 0.0084, "step": 128610 }, { "epoch": 3.9760153304073684, "grad_norm": 0.0869687870144844, "learning_rate": 1.8012641404463128e-05, "loss": 0.0087, "step": 128640 }, { "epoch": 3.976942572788527, "grad_norm": 0.1617935299873352, "learning_rate": 1.801217778327255e-05, "loss": 0.0073, "step": 128670 }, { "epoch": 3.9778698151696856, "grad_norm": 0.07998383790254593, "learning_rate": 1.801171416208197e-05, "loss": 0.0086, "step": 128700 }, { "epoch": 3.978797057550844, "grad_norm": 0.11470500379800797, "learning_rate": 1.801125054089139e-05, "loss": 0.0082, "step": 128730 }, { "epoch": 3.9797242999320024, "grad_norm": 0.10801786929368973, "learning_rate": 1.801078691970081e-05, "loss": 0.008, "step": 128760 }, { "epoch": 3.9806515423131605, "grad_norm": 0.11818356066942215, "learning_rate": 1.801033875254992e-05, "loss": 0.0081, "step": 128790 }, { "epoch": 3.981578784694319, "grad_norm": 0.0795990601181984, "learning_rate": 1.8009875131359337e-05, "loss": 0.0086, "step": 128820 }, { "epoch": 3.9825060270754777, "grad_norm": 0.09642798453569412, "learning_rate": 1.8009411510168758e-05, "loss": 0.0082, "step": 128850 }, { "epoch": 3.983433269456636, "grad_norm": 0.1255480796098709, "learning_rate": 1.800894788897818e-05, "loss": 0.0077, "step": 128880 }, { "epoch": 3.9843605118377945, "grad_norm": 0.1586267203092575, "learning_rate": 1.80084842677876e-05, "loss": 0.0073, "step": 128910 }, { "epoch": 3.9852877542189526, "grad_norm": 0.16676120460033417, "learning_rate": 1.8008020646597023e-05, "loss": 0.0081, "step": 128940 }, { "epoch": 3.9862149966001112, "grad_norm": 0.18106231093406677, "learning_rate": 1.8007557025406444e-05, "loss": 0.0074, "step": 128970 }, { "epoch": 3.98714223898127, "grad_norm": 0.11883413791656494, "learning_rate": 1.8007093404215862e-05, "loss": 0.0084, "step": 129000 }, { "epoch": 3.988069481362428, "grad_norm": 0.13498547673225403, "learning_rate": 1.8006629783025284e-05, "loss": 0.0074, "step": 129030 }, { "epoch": 3.9889967237435866, "grad_norm": 0.17085608839988708, "learning_rate": 1.8006166161834705e-05, "loss": 0.0075, "step": 129060 }, { "epoch": 3.9899239661247448, "grad_norm": 0.12326138466596603, "learning_rate": 1.8005702540644127e-05, "loss": 0.0074, "step": 129090 }, { "epoch": 3.9908512085059034, "grad_norm": 0.13607320189476013, "learning_rate": 1.8005238919453548e-05, "loss": 0.0085, "step": 129120 }, { "epoch": 3.991778450887062, "grad_norm": 0.14998821914196014, "learning_rate": 1.8004775298262966e-05, "loss": 0.0082, "step": 129150 }, { "epoch": 3.9927056932682206, "grad_norm": 0.08498384803533554, "learning_rate": 1.8004311677072388e-05, "loss": 0.008, "step": 129180 }, { "epoch": 3.9936329356493787, "grad_norm": 0.14276325702667236, "learning_rate": 1.800384805588181e-05, "loss": 0.0079, "step": 129210 }, { "epoch": 3.9945601780305373, "grad_norm": 0.126481831073761, "learning_rate": 1.8003384434691228e-05, "loss": 0.0081, "step": 129240 }, { "epoch": 3.9954874204116955, "grad_norm": 0.13922886550426483, "learning_rate": 1.800292081350065e-05, "loss": 0.0075, "step": 129270 }, { "epoch": 3.996414662792854, "grad_norm": 0.1253785640001297, "learning_rate": 1.800245719231007e-05, "loss": 0.0078, "step": 129300 }, { "epoch": 3.9973419051740127, "grad_norm": 0.09933047741651535, "learning_rate": 1.8001993571119492e-05, "loss": 0.0079, "step": 129330 }, { "epoch": 3.998269147555171, "grad_norm": 0.1222730502486229, "learning_rate": 1.8001529949928914e-05, "loss": 0.0076, "step": 129360 }, { "epoch": 3.9991963899363294, "grad_norm": 0.13382066786289215, "learning_rate": 1.8001066328738335e-05, "loss": 0.0084, "step": 129390 }, { "epoch": 4.000123632317488, "grad_norm": 0.14938730001449585, "learning_rate": 1.8000602707547753e-05, "loss": 0.0084, "step": 129420 }, { "epoch": 4.001050874698646, "grad_norm": 0.10139600932598114, "learning_rate": 1.8000139086357175e-05, "loss": 0.0078, "step": 129450 }, { "epoch": 4.001978117079805, "grad_norm": 0.13267692923545837, "learning_rate": 1.7999675465166596e-05, "loss": 0.0074, "step": 129480 }, { "epoch": 4.002905359460963, "grad_norm": 0.1417175531387329, "learning_rate": 1.7999211843976018e-05, "loss": 0.008, "step": 129510 }, { "epoch": 4.003832601842122, "grad_norm": 0.10580160468816757, "learning_rate": 1.799874822278544e-05, "loss": 0.0073, "step": 129540 }, { "epoch": 4.00475984422328, "grad_norm": 0.11739666014909744, "learning_rate": 1.7998284601594857e-05, "loss": 0.0073, "step": 129570 }, { "epoch": 4.005687086604438, "grad_norm": 0.1643565446138382, "learning_rate": 1.799782098040428e-05, "loss": 0.0069, "step": 129600 }, { "epoch": 4.006614328985597, "grad_norm": 0.11615020036697388, "learning_rate": 1.79973573592137e-05, "loss": 0.0078, "step": 129630 }, { "epoch": 4.0075415713667555, "grad_norm": 0.12681220471858978, "learning_rate": 1.799689373802312e-05, "loss": 0.0076, "step": 129660 }, { "epoch": 4.008468813747914, "grad_norm": 0.08577977865934372, "learning_rate": 1.7996430116832543e-05, "loss": 0.0087, "step": 129690 }, { "epoch": 4.009396056129072, "grad_norm": 0.14764931797981262, "learning_rate": 1.7995966495641965e-05, "loss": 0.0076, "step": 129720 }, { "epoch": 4.01032329851023, "grad_norm": 0.17287296056747437, "learning_rate": 1.7995502874451383e-05, "loss": 0.0072, "step": 129750 }, { "epoch": 4.011250540891389, "grad_norm": 0.11462659388780594, "learning_rate": 1.7995039253260804e-05, "loss": 0.0074, "step": 129780 }, { "epoch": 4.012177783272548, "grad_norm": 0.09754712879657745, "learning_rate": 1.7994575632070223e-05, "loss": 0.0081, "step": 129810 }, { "epoch": 4.013105025653706, "grad_norm": 0.10459887981414795, "learning_rate": 1.7994112010879644e-05, "loss": 0.0077, "step": 129840 }, { "epoch": 4.014032268034864, "grad_norm": 0.16517393290996552, "learning_rate": 1.7993648389689066e-05, "loss": 0.007, "step": 129870 }, { "epoch": 4.0149595104160225, "grad_norm": 0.0901302844285965, "learning_rate": 1.7993184768498487e-05, "loss": 0.0068, "step": 129900 }, { "epoch": 4.015886752797181, "grad_norm": 0.09698948264122009, "learning_rate": 1.799272114730791e-05, "loss": 0.0083, "step": 129930 }, { "epoch": 4.01681399517834, "grad_norm": 0.1212879940867424, "learning_rate": 1.799225752611733e-05, "loss": 0.0073, "step": 129960 }, { "epoch": 4.017741237559498, "grad_norm": 0.0992366150021553, "learning_rate": 1.7991793904926748e-05, "loss": 0.0074, "step": 129990 }, { "epoch": 4.018668479940657, "grad_norm": 0.09393645823001862, "learning_rate": 1.799133028373617e-05, "loss": 0.0086, "step": 130020 }, { "epoch": 4.019595722321815, "grad_norm": 0.1536663919687271, "learning_rate": 1.799086666254559e-05, "loss": 0.0082, "step": 130050 }, { "epoch": 4.020522964702973, "grad_norm": 0.10065682232379913, "learning_rate": 1.7990403041355013e-05, "loss": 0.0077, "step": 130080 }, { "epoch": 4.021450207084132, "grad_norm": 0.10054944455623627, "learning_rate": 1.7989939420164434e-05, "loss": 0.007, "step": 130110 }, { "epoch": 4.02237744946529, "grad_norm": 0.148135706782341, "learning_rate": 1.7989475798973852e-05, "loss": 0.0082, "step": 130140 }, { "epoch": 4.023304691846449, "grad_norm": 0.10569805651903152, "learning_rate": 1.7989012177783274e-05, "loss": 0.0079, "step": 130170 }, { "epoch": 4.024231934227607, "grad_norm": 0.10759950429201126, "learning_rate": 1.7988548556592695e-05, "loss": 0.0084, "step": 130200 }, { "epoch": 4.025159176608765, "grad_norm": 0.08212367445230484, "learning_rate": 1.7988084935402114e-05, "loss": 0.0079, "step": 130230 }, { "epoch": 4.026086418989924, "grad_norm": 0.1325373500585556, "learning_rate": 1.7987621314211535e-05, "loss": 0.008, "step": 130260 }, { "epoch": 4.0270136613710825, "grad_norm": 0.1463516801595688, "learning_rate": 1.7987157693020957e-05, "loss": 0.008, "step": 130290 }, { "epoch": 4.027940903752241, "grad_norm": 0.15320324897766113, "learning_rate": 1.7986694071830378e-05, "loss": 0.008, "step": 130320 }, { "epoch": 4.028868146133399, "grad_norm": 0.13034936785697937, "learning_rate": 1.79862304506398e-05, "loss": 0.0076, "step": 130350 }, { "epoch": 4.029795388514557, "grad_norm": 0.16463680565357208, "learning_rate": 1.798576682944922e-05, "loss": 0.0077, "step": 130380 }, { "epoch": 4.030722630895716, "grad_norm": 0.12481920421123505, "learning_rate": 1.798530320825864e-05, "loss": 0.0078, "step": 130410 }, { "epoch": 4.031649873276875, "grad_norm": 0.09046881645917892, "learning_rate": 1.798483958706806e-05, "loss": 0.0071, "step": 130440 }, { "epoch": 4.032577115658033, "grad_norm": 0.10848797112703323, "learning_rate": 1.7984375965877482e-05, "loss": 0.0079, "step": 130470 }, { "epoch": 4.033504358039192, "grad_norm": 0.17632918059825897, "learning_rate": 1.7983912344686904e-05, "loss": 0.008, "step": 130500 }, { "epoch": 4.0344316004203495, "grad_norm": 0.1557653248310089, "learning_rate": 1.7983448723496325e-05, "loss": 0.008, "step": 130530 }, { "epoch": 4.035358842801508, "grad_norm": 0.14344221353530884, "learning_rate": 1.7982985102305743e-05, "loss": 0.0075, "step": 130560 }, { "epoch": 4.036286085182667, "grad_norm": 0.14648157358169556, "learning_rate": 1.7982521481115165e-05, "loss": 0.0077, "step": 130590 }, { "epoch": 4.037213327563825, "grad_norm": 0.14594382047653198, "learning_rate": 1.7982057859924586e-05, "loss": 0.0076, "step": 130620 }, { "epoch": 4.038140569944984, "grad_norm": 0.10582228749990463, "learning_rate": 1.7981594238734004e-05, "loss": 0.0077, "step": 130650 }, { "epoch": 4.039067812326142, "grad_norm": 0.11843997985124588, "learning_rate": 1.7981130617543426e-05, "loss": 0.0072, "step": 130680 }, { "epoch": 4.0399950547073, "grad_norm": 0.1107873022556305, "learning_rate": 1.7980666996352847e-05, "loss": 0.0072, "step": 130710 }, { "epoch": 4.040922297088459, "grad_norm": 0.14993701875209808, "learning_rate": 1.798020337516227e-05, "loss": 0.0076, "step": 130740 }, { "epoch": 4.0418495394696174, "grad_norm": 0.10703025013208389, "learning_rate": 1.797973975397169e-05, "loss": 0.0074, "step": 130770 }, { "epoch": 4.042776781850776, "grad_norm": 0.15770059823989868, "learning_rate": 1.797927613278111e-05, "loss": 0.0079, "step": 130800 }, { "epoch": 4.043704024231935, "grad_norm": 0.1295211762189865, "learning_rate": 1.797881251159053e-05, "loss": 0.0082, "step": 130830 }, { "epoch": 4.044631266613092, "grad_norm": 0.20027455687522888, "learning_rate": 1.797834889039995e-05, "loss": 0.0083, "step": 130860 }, { "epoch": 4.045558508994251, "grad_norm": 0.16623584926128387, "learning_rate": 1.7977885269209373e-05, "loss": 0.0085, "step": 130890 }, { "epoch": 4.04648575137541, "grad_norm": 0.14285629987716675, "learning_rate": 1.7977421648018795e-05, "loss": 0.0079, "step": 130920 }, { "epoch": 4.047412993756568, "grad_norm": 0.14035506546497345, "learning_rate": 1.7976958026828216e-05, "loss": 0.0082, "step": 130950 }, { "epoch": 4.048340236137727, "grad_norm": 0.09353161603212357, "learning_rate": 1.797650985967732e-05, "loss": 0.008, "step": 130980 }, { "epoch": 4.0492674785188845, "grad_norm": 0.1701636165380478, "learning_rate": 1.7976046238486742e-05, "loss": 0.0076, "step": 131010 }, { "epoch": 4.050194720900043, "grad_norm": 0.07977306842803955, "learning_rate": 1.7975582617296164e-05, "loss": 0.0076, "step": 131040 }, { "epoch": 4.051121963281202, "grad_norm": 0.1069093644618988, "learning_rate": 1.7975118996105582e-05, "loss": 0.0073, "step": 131070 }, { "epoch": 4.05204920566236, "grad_norm": 0.15062940120697021, "learning_rate": 1.7974655374915003e-05, "loss": 0.0081, "step": 131100 }, { "epoch": 4.052976448043519, "grad_norm": 0.11506178975105286, "learning_rate": 1.7974191753724425e-05, "loss": 0.0084, "step": 131130 }, { "epoch": 4.053903690424677, "grad_norm": 0.14136749505996704, "learning_rate": 1.7973728132533846e-05, "loss": 0.0085, "step": 131160 }, { "epoch": 4.054830932805835, "grad_norm": 0.12098030000925064, "learning_rate": 1.7973264511343268e-05, "loss": 0.0077, "step": 131190 }, { "epoch": 4.055758175186994, "grad_norm": 0.09098857641220093, "learning_rate": 1.797280089015269e-05, "loss": 0.0074, "step": 131220 }, { "epoch": 4.056685417568152, "grad_norm": 0.0926334336400032, "learning_rate": 1.7972337268962107e-05, "loss": 0.0082, "step": 131250 }, { "epoch": 4.057612659949311, "grad_norm": 0.1347314715385437, "learning_rate": 1.797187364777153e-05, "loss": 0.0075, "step": 131280 }, { "epoch": 4.05853990233047, "grad_norm": 0.14888140559196472, "learning_rate": 1.797141002658095e-05, "loss": 0.008, "step": 131310 }, { "epoch": 4.059467144711627, "grad_norm": 0.14458996057510376, "learning_rate": 1.7970946405390372e-05, "loss": 0.0072, "step": 131340 }, { "epoch": 4.060394387092786, "grad_norm": 0.1125316470861435, "learning_rate": 1.7970482784199793e-05, "loss": 0.008, "step": 131370 }, { "epoch": 4.0613216294739445, "grad_norm": 0.22512905299663544, "learning_rate": 1.797001916300921e-05, "loss": 0.0084, "step": 131400 }, { "epoch": 4.062248871855103, "grad_norm": 0.14550834894180298, "learning_rate": 1.7969555541818633e-05, "loss": 0.0076, "step": 131430 }, { "epoch": 4.063176114236262, "grad_norm": 0.11040198057889938, "learning_rate": 1.7969091920628055e-05, "loss": 0.0075, "step": 131460 }, { "epoch": 4.064103356617419, "grad_norm": 0.08588742464780807, "learning_rate": 1.7968628299437473e-05, "loss": 0.0072, "step": 131490 }, { "epoch": 4.065030598998578, "grad_norm": 0.07731255143880844, "learning_rate": 1.7968164678246894e-05, "loss": 0.0078, "step": 131520 }, { "epoch": 4.065957841379737, "grad_norm": 0.12353474646806717, "learning_rate": 1.7967701057056316e-05, "loss": 0.0077, "step": 131550 }, { "epoch": 4.066885083760895, "grad_norm": 0.13262119889259338, "learning_rate": 1.7967237435865737e-05, "loss": 0.0078, "step": 131580 }, { "epoch": 4.067812326142054, "grad_norm": 0.12651698291301727, "learning_rate": 1.796677381467516e-05, "loss": 0.0072, "step": 131610 }, { "epoch": 4.068739568523212, "grad_norm": 0.0885661244392395, "learning_rate": 1.7966310193484577e-05, "loss": 0.0078, "step": 131640 }, { "epoch": 4.06966681090437, "grad_norm": 0.14070983231067657, "learning_rate": 1.7965846572294e-05, "loss": 0.007, "step": 131670 }, { "epoch": 4.070594053285529, "grad_norm": 0.13378340005874634, "learning_rate": 1.796538295110342e-05, "loss": 0.0082, "step": 131700 }, { "epoch": 4.071521295666687, "grad_norm": 0.12715306878089905, "learning_rate": 1.796491932991284e-05, "loss": 0.0073, "step": 131730 }, { "epoch": 4.072448538047846, "grad_norm": 0.14247746765613556, "learning_rate": 1.7964455708722263e-05, "loss": 0.0082, "step": 131760 }, { "epoch": 4.0733757804290045, "grad_norm": 0.09810046851634979, "learning_rate": 1.7963992087531684e-05, "loss": 0.0078, "step": 131790 }, { "epoch": 4.074303022810162, "grad_norm": 0.11068812757730484, "learning_rate": 1.7963528466341102e-05, "loss": 0.0084, "step": 131820 }, { "epoch": 4.075230265191321, "grad_norm": 0.10293274372816086, "learning_rate": 1.7963064845150524e-05, "loss": 0.008, "step": 131850 }, { "epoch": 4.076157507572479, "grad_norm": 0.1762697398662567, "learning_rate": 1.7962601223959945e-05, "loss": 0.0078, "step": 131880 }, { "epoch": 4.077084749953638, "grad_norm": 0.14567989110946655, "learning_rate": 1.7962137602769364e-05, "loss": 0.0079, "step": 131910 }, { "epoch": 4.078011992334797, "grad_norm": 0.14292176067829132, "learning_rate": 1.7961673981578785e-05, "loss": 0.0069, "step": 131940 }, { "epoch": 4.078939234715954, "grad_norm": 0.10602311044931412, "learning_rate": 1.7961210360388207e-05, "loss": 0.0077, "step": 131970 }, { "epoch": 4.079866477097113, "grad_norm": 0.10776689648628235, "learning_rate": 1.7960746739197628e-05, "loss": 0.0078, "step": 132000 }, { "epoch": 4.0807937194782715, "grad_norm": 0.09656447172164917, "learning_rate": 1.796028311800705e-05, "loss": 0.0073, "step": 132030 }, { "epoch": 4.08172096185943, "grad_norm": 0.16954798996448517, "learning_rate": 1.7959819496816468e-05, "loss": 0.008, "step": 132060 }, { "epoch": 4.082648204240589, "grad_norm": 0.10751048475503922, "learning_rate": 1.795935587562589e-05, "loss": 0.0071, "step": 132090 }, { "epoch": 4.083575446621747, "grad_norm": 0.10170537978410721, "learning_rate": 1.795889225443531e-05, "loss": 0.0074, "step": 132120 }, { "epoch": 4.084502689002905, "grad_norm": 0.1728229522705078, "learning_rate": 1.7958428633244732e-05, "loss": 0.0079, "step": 132150 }, { "epoch": 4.085429931384064, "grad_norm": 0.16267187893390656, "learning_rate": 1.7957965012054154e-05, "loss": 0.0077, "step": 132180 }, { "epoch": 4.086357173765222, "grad_norm": 0.1499231606721878, "learning_rate": 1.7957501390863575e-05, "loss": 0.0076, "step": 132210 }, { "epoch": 4.087284416146381, "grad_norm": 0.13369275629520416, "learning_rate": 1.7957037769672993e-05, "loss": 0.007, "step": 132240 }, { "epoch": 4.088211658527539, "grad_norm": 0.10757968574762344, "learning_rate": 1.7956574148482415e-05, "loss": 0.0074, "step": 132270 }, { "epoch": 4.089138900908697, "grad_norm": 0.1356998234987259, "learning_rate": 1.7956110527291833e-05, "loss": 0.0083, "step": 132300 }, { "epoch": 4.090066143289856, "grad_norm": 0.13352195918560028, "learning_rate": 1.7955646906101255e-05, "loss": 0.0077, "step": 132330 }, { "epoch": 4.090993385671014, "grad_norm": 0.11752789467573166, "learning_rate": 1.795518328491068e-05, "loss": 0.0081, "step": 132360 }, { "epoch": 4.091920628052173, "grad_norm": 0.13614895939826965, "learning_rate": 1.7954719663720098e-05, "loss": 0.0077, "step": 132390 }, { "epoch": 4.0928478704333315, "grad_norm": 0.07253704220056534, "learning_rate": 1.795425604252952e-05, "loss": 0.0076, "step": 132420 }, { "epoch": 4.09377511281449, "grad_norm": 0.11362693458795547, "learning_rate": 1.795379242133894e-05, "loss": 0.0081, "step": 132450 }, { "epoch": 4.094702355195648, "grad_norm": 0.08474447578191757, "learning_rate": 1.795332880014836e-05, "loss": 0.0075, "step": 132480 }, { "epoch": 4.0956295975768064, "grad_norm": 0.11741628497838974, "learning_rate": 1.795286517895778e-05, "loss": 0.0074, "step": 132510 }, { "epoch": 4.096556839957965, "grad_norm": 0.18058843910694122, "learning_rate": 1.79524015577672e-05, "loss": 0.0073, "step": 132540 }, { "epoch": 4.097484082339124, "grad_norm": 0.14723332226276398, "learning_rate": 1.7951937936576623e-05, "loss": 0.0073, "step": 132570 }, { "epoch": 4.098411324720282, "grad_norm": 0.15065744519233704, "learning_rate": 1.7951474315386045e-05, "loss": 0.0079, "step": 132600 }, { "epoch": 4.09933856710144, "grad_norm": 0.09456325322389603, "learning_rate": 1.7951010694195463e-05, "loss": 0.008, "step": 132630 }, { "epoch": 4.1002658094825986, "grad_norm": 0.10682754963636398, "learning_rate": 1.7950547073004884e-05, "loss": 0.0077, "step": 132660 }, { "epoch": 4.101193051863757, "grad_norm": 0.1306118369102478, "learning_rate": 1.7950083451814306e-05, "loss": 0.0072, "step": 132690 }, { "epoch": 4.102120294244916, "grad_norm": 0.20135274529457092, "learning_rate": 1.7949619830623727e-05, "loss": 0.0078, "step": 132720 }, { "epoch": 4.103047536626074, "grad_norm": 0.10750208050012589, "learning_rate": 1.794915620943315e-05, "loss": 0.0081, "step": 132750 }, { "epoch": 4.103974779007232, "grad_norm": 0.15974067151546478, "learning_rate": 1.794869258824257e-05, "loss": 0.0075, "step": 132780 }, { "epoch": 4.104902021388391, "grad_norm": 0.12117225676774979, "learning_rate": 1.794822896705199e-05, "loss": 0.0083, "step": 132810 }, { "epoch": 4.105829263769549, "grad_norm": 0.1081472635269165, "learning_rate": 1.794776534586141e-05, "loss": 0.0074, "step": 132840 }, { "epoch": 4.106756506150708, "grad_norm": 0.0818278044462204, "learning_rate": 1.794730172467083e-05, "loss": 0.0082, "step": 132870 }, { "epoch": 4.1076837485318665, "grad_norm": 0.1386931836605072, "learning_rate": 1.794683810348025e-05, "loss": 0.0076, "step": 132900 }, { "epoch": 4.108610990913025, "grad_norm": 0.09118567407131195, "learning_rate": 1.794637448228967e-05, "loss": 0.008, "step": 132930 }, { "epoch": 4.109538233294183, "grad_norm": 0.19258271157741547, "learning_rate": 1.7945910861099093e-05, "loss": 0.0077, "step": 132960 }, { "epoch": 4.110465475675341, "grad_norm": 0.11926763504743576, "learning_rate": 1.7945447239908514e-05, "loss": 0.008, "step": 132990 }, { "epoch": 4.1113927180565, "grad_norm": 0.10702764242887497, "learning_rate": 1.7944983618717936e-05, "loss": 0.0075, "step": 133020 }, { "epoch": 4.112319960437659, "grad_norm": 0.12574538588523865, "learning_rate": 1.7944519997527354e-05, "loss": 0.0078, "step": 133050 }, { "epoch": 4.113247202818817, "grad_norm": 0.12163501232862473, "learning_rate": 1.7944056376336775e-05, "loss": 0.0071, "step": 133080 }, { "epoch": 4.114174445199975, "grad_norm": 0.12122626602649689, "learning_rate": 1.7943592755146197e-05, "loss": 0.0082, "step": 133110 }, { "epoch": 4.1151016875811335, "grad_norm": 0.09287907928228378, "learning_rate": 1.7943129133955618e-05, "loss": 0.0078, "step": 133140 }, { "epoch": 4.116028929962292, "grad_norm": 0.09513799846172333, "learning_rate": 1.794266551276504e-05, "loss": 0.0075, "step": 133170 }, { "epoch": 4.116956172343451, "grad_norm": 0.12084096670150757, "learning_rate": 1.794220189157446e-05, "loss": 0.008, "step": 133200 }, { "epoch": 4.117883414724609, "grad_norm": 0.10880518704652786, "learning_rate": 1.794173827038388e-05, "loss": 0.0075, "step": 133230 }, { "epoch": 4.118810657105768, "grad_norm": 0.14652973413467407, "learning_rate": 1.79412746491933e-05, "loss": 0.0069, "step": 133260 }, { "epoch": 4.119737899486926, "grad_norm": 0.11913669109344482, "learning_rate": 1.794081102800272e-05, "loss": 0.0076, "step": 133290 }, { "epoch": 4.120665141868084, "grad_norm": 0.14945881068706512, "learning_rate": 1.794034740681214e-05, "loss": 0.0072, "step": 133320 }, { "epoch": 4.121592384249243, "grad_norm": 0.10081756860017776, "learning_rate": 1.7939883785621562e-05, "loss": 0.0076, "step": 133350 }, { "epoch": 4.122519626630401, "grad_norm": 0.1058572307229042, "learning_rate": 1.7939420164430984e-05, "loss": 0.0076, "step": 133380 }, { "epoch": 4.12344686901156, "grad_norm": 0.11694315075874329, "learning_rate": 1.7938956543240405e-05, "loss": 0.0076, "step": 133410 }, { "epoch": 4.124374111392718, "grad_norm": 0.13294747471809387, "learning_rate": 1.7938492922049827e-05, "loss": 0.0071, "step": 133440 }, { "epoch": 4.125301353773876, "grad_norm": 0.1260756403207779, "learning_rate": 1.793804475489893e-05, "loss": 0.0082, "step": 133470 }, { "epoch": 4.126228596155035, "grad_norm": 0.11549487709999084, "learning_rate": 1.7937581133708353e-05, "loss": 0.0084, "step": 133500 }, { "epoch": 4.1271558385361935, "grad_norm": 0.10858603566884995, "learning_rate": 1.7937117512517774e-05, "loss": 0.0081, "step": 133530 }, { "epoch": 4.128083080917352, "grad_norm": 0.09486912935972214, "learning_rate": 1.7936653891327192e-05, "loss": 0.0074, "step": 133560 }, { "epoch": 4.12901032329851, "grad_norm": 0.11505357176065445, "learning_rate": 1.7936190270136614e-05, "loss": 0.0086, "step": 133590 }, { "epoch": 4.129937565679668, "grad_norm": 0.12391208112239838, "learning_rate": 1.793572664894604e-05, "loss": 0.0078, "step": 133620 }, { "epoch": 4.130864808060827, "grad_norm": 0.13456246256828308, "learning_rate": 1.7935263027755457e-05, "loss": 0.0077, "step": 133650 }, { "epoch": 4.131792050441986, "grad_norm": 0.17389869689941406, "learning_rate": 1.7934799406564878e-05, "loss": 0.008, "step": 133680 }, { "epoch": 4.132719292823144, "grad_norm": 0.1653982400894165, "learning_rate": 1.7934335785374296e-05, "loss": 0.0083, "step": 133710 }, { "epoch": 4.133646535204303, "grad_norm": 0.12310171872377396, "learning_rate": 1.7933872164183718e-05, "loss": 0.0074, "step": 133740 }, { "epoch": 4.1345737775854605, "grad_norm": 0.07750450074672699, "learning_rate": 1.793340854299314e-05, "loss": 0.0071, "step": 133770 }, { "epoch": 4.135501019966619, "grad_norm": 0.11882323771715164, "learning_rate": 1.793294492180256e-05, "loss": 0.0074, "step": 133800 }, { "epoch": 4.136428262347778, "grad_norm": 0.10694760084152222, "learning_rate": 1.7932481300611982e-05, "loss": 0.0074, "step": 133830 }, { "epoch": 4.137355504728936, "grad_norm": 0.09970377385616302, "learning_rate": 1.7932017679421404e-05, "loss": 0.008, "step": 133860 }, { "epoch": 4.138282747110095, "grad_norm": 0.14380617439746857, "learning_rate": 1.7931554058230822e-05, "loss": 0.0079, "step": 133890 }, { "epoch": 4.139209989491253, "grad_norm": 0.11057461053133011, "learning_rate": 1.7931090437040243e-05, "loss": 0.0079, "step": 133920 }, { "epoch": 4.140137231872411, "grad_norm": 0.15907832980155945, "learning_rate": 1.7930626815849665e-05, "loss": 0.0074, "step": 133950 }, { "epoch": 4.14106447425357, "grad_norm": 0.12238583713769913, "learning_rate": 1.7930163194659086e-05, "loss": 0.0087, "step": 133980 }, { "epoch": 4.141991716634728, "grad_norm": 0.12678390741348267, "learning_rate": 1.7929699573468508e-05, "loss": 0.007, "step": 134010 }, { "epoch": 4.142918959015887, "grad_norm": 0.16050119698047638, "learning_rate": 1.792923595227793e-05, "loss": 0.0077, "step": 134040 }, { "epoch": 4.143846201397045, "grad_norm": 0.1067274883389473, "learning_rate": 1.7928772331087348e-05, "loss": 0.0075, "step": 134070 }, { "epoch": 4.144773443778203, "grad_norm": 0.12755516171455383, "learning_rate": 1.792830870989677e-05, "loss": 0.0078, "step": 134100 }, { "epoch": 4.145700686159362, "grad_norm": 0.12515652179718018, "learning_rate": 1.7927845088706187e-05, "loss": 0.0074, "step": 134130 }, { "epoch": 4.1466279285405205, "grad_norm": 0.10877413302659988, "learning_rate": 1.792738146751561e-05, "loss": 0.0078, "step": 134160 }, { "epoch": 4.147555170921679, "grad_norm": 0.08519995212554932, "learning_rate": 1.792691784632503e-05, "loss": 0.0081, "step": 134190 }, { "epoch": 4.148482413302838, "grad_norm": 0.1111951470375061, "learning_rate": 1.7926454225134452e-05, "loss": 0.0079, "step": 134220 }, { "epoch": 4.1494096556839954, "grad_norm": 0.10849592089653015, "learning_rate": 1.7925990603943873e-05, "loss": 0.0083, "step": 134250 }, { "epoch": 4.150336898065154, "grad_norm": 0.1507599651813507, "learning_rate": 1.7925526982753295e-05, "loss": 0.0074, "step": 134280 }, { "epoch": 4.151264140446313, "grad_norm": 0.12339285761117935, "learning_rate": 1.7925063361562713e-05, "loss": 0.0081, "step": 134310 }, { "epoch": 4.152191382827471, "grad_norm": 0.11195474117994308, "learning_rate": 1.7924599740372134e-05, "loss": 0.0076, "step": 134340 }, { "epoch": 4.15311862520863, "grad_norm": 0.09214905649423599, "learning_rate": 1.7924136119181556e-05, "loss": 0.0081, "step": 134370 }, { "epoch": 4.1540458675897876, "grad_norm": 0.15658220648765564, "learning_rate": 1.7923672497990977e-05, "loss": 0.0076, "step": 134400 }, { "epoch": 4.154973109970946, "grad_norm": 0.13088305294513702, "learning_rate": 1.79232088768004e-05, "loss": 0.0076, "step": 134430 }, { "epoch": 4.155900352352105, "grad_norm": 0.11254215240478516, "learning_rate": 1.7922745255609817e-05, "loss": 0.008, "step": 134460 }, { "epoch": 4.156827594733263, "grad_norm": 0.09529317170381546, "learning_rate": 1.792228163441924e-05, "loss": 0.007, "step": 134490 }, { "epoch": 4.157754837114422, "grad_norm": 0.13429683446884155, "learning_rate": 1.792181801322866e-05, "loss": 0.0074, "step": 134520 }, { "epoch": 4.1586820794955806, "grad_norm": 0.1335681974887848, "learning_rate": 1.7921354392038078e-05, "loss": 0.0079, "step": 134550 }, { "epoch": 4.159609321876738, "grad_norm": 0.10877745598554611, "learning_rate": 1.79208907708475e-05, "loss": 0.0074, "step": 134580 }, { "epoch": 4.160536564257897, "grad_norm": 0.17448632419109344, "learning_rate": 1.792042714965692e-05, "loss": 0.008, "step": 134610 }, { "epoch": 4.1614638066390555, "grad_norm": 0.11586928367614746, "learning_rate": 1.7919963528466343e-05, "loss": 0.0077, "step": 134640 }, { "epoch": 4.162391049020214, "grad_norm": 0.14520534873008728, "learning_rate": 1.7919499907275764e-05, "loss": 0.0078, "step": 134670 }, { "epoch": 4.163318291401373, "grad_norm": 0.1289711892604828, "learning_rate": 1.7919036286085186e-05, "loss": 0.0076, "step": 134700 }, { "epoch": 4.16424553378253, "grad_norm": 0.15762203931808472, "learning_rate": 1.7918572664894604e-05, "loss": 0.0075, "step": 134730 }, { "epoch": 4.165172776163689, "grad_norm": 0.10574372857809067, "learning_rate": 1.7918109043704025e-05, "loss": 0.0082, "step": 134760 }, { "epoch": 4.166100018544848, "grad_norm": 0.21140773594379425, "learning_rate": 1.7917645422513447e-05, "loss": 0.0071, "step": 134790 }, { "epoch": 4.167027260926006, "grad_norm": 0.1889595240354538, "learning_rate": 1.791718180132287e-05, "loss": 0.0081, "step": 134820 }, { "epoch": 4.167954503307165, "grad_norm": 0.1368233859539032, "learning_rate": 1.791671818013229e-05, "loss": 0.0075, "step": 134850 }, { "epoch": 4.1688817456883225, "grad_norm": 0.08977579325437546, "learning_rate": 1.7916254558941708e-05, "loss": 0.0075, "step": 134880 }, { "epoch": 4.169808988069481, "grad_norm": 0.13096769154071808, "learning_rate": 1.791579093775113e-05, "loss": 0.0077, "step": 134910 }, { "epoch": 4.17073623045064, "grad_norm": 0.2424074411392212, "learning_rate": 1.791532731656055e-05, "loss": 0.0083, "step": 134940 }, { "epoch": 4.171663472831798, "grad_norm": 0.12828004360198975, "learning_rate": 1.791486369536997e-05, "loss": 0.0078, "step": 134970 }, { "epoch": 4.172590715212957, "grad_norm": 0.15075017511844635, "learning_rate": 1.791440007417939e-05, "loss": 0.0086, "step": 135000 }, { "epoch": 4.1735179575941155, "grad_norm": 0.09814751893281937, "learning_rate": 1.7913936452988815e-05, "loss": 0.008, "step": 135030 }, { "epoch": 4.174445199975273, "grad_norm": 0.07265017926692963, "learning_rate": 1.7913472831798234e-05, "loss": 0.0081, "step": 135060 }, { "epoch": 4.175372442356432, "grad_norm": 0.10746470093727112, "learning_rate": 1.7913009210607655e-05, "loss": 0.0077, "step": 135090 }, { "epoch": 4.17629968473759, "grad_norm": 0.08075996488332748, "learning_rate": 1.7912545589417073e-05, "loss": 0.0073, "step": 135120 }, { "epoch": 4.177226927118749, "grad_norm": 0.10567345470190048, "learning_rate": 1.7912081968226495e-05, "loss": 0.0077, "step": 135150 }, { "epoch": 4.178154169499908, "grad_norm": 0.13595862686634064, "learning_rate": 1.7911618347035916e-05, "loss": 0.0077, "step": 135180 }, { "epoch": 4.179081411881065, "grad_norm": 0.12152441591024399, "learning_rate": 1.7911154725845338e-05, "loss": 0.0077, "step": 135210 }, { "epoch": 4.180008654262224, "grad_norm": 0.1544717401266098, "learning_rate": 1.791069110465476e-05, "loss": 0.0067, "step": 135240 }, { "epoch": 4.1809358966433825, "grad_norm": 0.124333456158638, "learning_rate": 1.791022748346418e-05, "loss": 0.0069, "step": 135270 }, { "epoch": 4.181863139024541, "grad_norm": 0.14380568265914917, "learning_rate": 1.79097638622736e-05, "loss": 0.0082, "step": 135300 }, { "epoch": 4.1827903814057, "grad_norm": 0.15047810971736908, "learning_rate": 1.790930024108302e-05, "loss": 0.0076, "step": 135330 }, { "epoch": 4.183717623786858, "grad_norm": 0.12249570339918137, "learning_rate": 1.7908836619892442e-05, "loss": 0.0082, "step": 135360 }, { "epoch": 4.184644866168016, "grad_norm": 0.1487228125333786, "learning_rate": 1.7908372998701863e-05, "loss": 0.0077, "step": 135390 }, { "epoch": 4.185572108549175, "grad_norm": 0.11289842426776886, "learning_rate": 1.7907909377511285e-05, "loss": 0.0068, "step": 135420 }, { "epoch": 4.186499350930333, "grad_norm": 0.08759872615337372, "learning_rate": 1.7907445756320703e-05, "loss": 0.008, "step": 135450 }, { "epoch": 4.187426593311492, "grad_norm": 0.14454416930675507, "learning_rate": 1.790699758916981e-05, "loss": 0.0079, "step": 135480 }, { "epoch": 4.18835383569265, "grad_norm": 0.13753901422023773, "learning_rate": 1.7906533967979232e-05, "loss": 0.0077, "step": 135510 }, { "epoch": 4.189281078073808, "grad_norm": 0.14374977350234985, "learning_rate": 1.790607034678865e-05, "loss": 0.0077, "step": 135540 }, { "epoch": 4.190208320454967, "grad_norm": 0.16559569537639618, "learning_rate": 1.7905606725598072e-05, "loss": 0.0078, "step": 135570 }, { "epoch": 4.191135562836125, "grad_norm": 0.14022304117679596, "learning_rate": 1.7905143104407494e-05, "loss": 0.0076, "step": 135600 }, { "epoch": 4.192062805217284, "grad_norm": 0.13936980068683624, "learning_rate": 1.7904679483216915e-05, "loss": 0.0075, "step": 135630 }, { "epoch": 4.1929900475984425, "grad_norm": 0.10650769621133804, "learning_rate": 1.7904215862026337e-05, "loss": 0.0073, "step": 135660 }, { "epoch": 4.1939172899796, "grad_norm": 0.12274541705846786, "learning_rate": 1.7903752240835758e-05, "loss": 0.0076, "step": 135690 }, { "epoch": 4.194844532360759, "grad_norm": 0.1321442425251007, "learning_rate": 1.7903288619645176e-05, "loss": 0.0075, "step": 135720 }, { "epoch": 4.195771774741917, "grad_norm": 0.08216425031423569, "learning_rate": 1.7902824998454598e-05, "loss": 0.0075, "step": 135750 }, { "epoch": 4.196699017123076, "grad_norm": 0.08993197977542877, "learning_rate": 1.790236137726402e-05, "loss": 0.0073, "step": 135780 }, { "epoch": 4.197626259504235, "grad_norm": 0.14365729689598083, "learning_rate": 1.7901897756073437e-05, "loss": 0.0079, "step": 135810 }, { "epoch": 4.198553501885393, "grad_norm": 0.12699058651924133, "learning_rate": 1.790143413488286e-05, "loss": 0.0073, "step": 135840 }, { "epoch": 4.199480744266551, "grad_norm": 0.17583493888378143, "learning_rate": 1.790097051369228e-05, "loss": 0.0078, "step": 135870 }, { "epoch": 4.2004079866477095, "grad_norm": 0.1662192940711975, "learning_rate": 1.7900506892501702e-05, "loss": 0.0081, "step": 135900 }, { "epoch": 4.201335229028868, "grad_norm": 0.11289922893047333, "learning_rate": 1.7900043271311123e-05, "loss": 0.008, "step": 135930 }, { "epoch": 4.202262471410027, "grad_norm": 0.07107041031122208, "learning_rate": 1.789957965012054e-05, "loss": 0.0084, "step": 135960 }, { "epoch": 4.203189713791185, "grad_norm": 0.1431051790714264, "learning_rate": 1.7899116028929963e-05, "loss": 0.0079, "step": 135990 }, { "epoch": 4.204116956172343, "grad_norm": 0.14122842252254486, "learning_rate": 1.7898652407739384e-05, "loss": 0.0075, "step": 136020 }, { "epoch": 4.205044198553502, "grad_norm": 0.16614322364330292, "learning_rate": 1.7898188786548806e-05, "loss": 0.0076, "step": 136050 }, { "epoch": 4.20597144093466, "grad_norm": 0.15209445357322693, "learning_rate": 1.7897725165358227e-05, "loss": 0.0075, "step": 136080 }, { "epoch": 4.206898683315819, "grad_norm": 0.13459162414073944, "learning_rate": 1.789726154416765e-05, "loss": 0.0076, "step": 136110 }, { "epoch": 4.207825925696977, "grad_norm": 0.11071515828371048, "learning_rate": 1.7896797922977067e-05, "loss": 0.0072, "step": 136140 }, { "epoch": 4.208753168078136, "grad_norm": 0.10316558927297592, "learning_rate": 1.789633430178649e-05, "loss": 0.0078, "step": 136170 }, { "epoch": 4.209680410459294, "grad_norm": 0.09621255099773407, "learning_rate": 1.7895870680595907e-05, "loss": 0.007, "step": 136200 }, { "epoch": 4.210607652840452, "grad_norm": 0.12820982933044434, "learning_rate": 1.7895407059405328e-05, "loss": 0.0078, "step": 136230 }, { "epoch": 4.211534895221611, "grad_norm": 0.12015386670827866, "learning_rate": 1.789494343821475e-05, "loss": 0.0076, "step": 136260 }, { "epoch": 4.2124621376027696, "grad_norm": 0.12681584060192108, "learning_rate": 1.789447981702417e-05, "loss": 0.0078, "step": 136290 }, { "epoch": 4.213389379983928, "grad_norm": 0.13843700289726257, "learning_rate": 1.7894016195833593e-05, "loss": 0.0073, "step": 136320 }, { "epoch": 4.214316622365086, "grad_norm": 0.16758975386619568, "learning_rate": 1.7893552574643014e-05, "loss": 0.0072, "step": 136350 }, { "epoch": 4.2152438647462445, "grad_norm": 0.10558799654245377, "learning_rate": 1.7893088953452432e-05, "loss": 0.0076, "step": 136380 }, { "epoch": 4.216171107127403, "grad_norm": 0.13708597421646118, "learning_rate": 1.7892625332261854e-05, "loss": 0.0077, "step": 136410 }, { "epoch": 4.217098349508562, "grad_norm": 0.11012642085552216, "learning_rate": 1.7892161711071275e-05, "loss": 0.0075, "step": 136440 }, { "epoch": 4.21802559188972, "grad_norm": 0.11835169792175293, "learning_rate": 1.7891698089880697e-05, "loss": 0.0082, "step": 136470 }, { "epoch": 4.218952834270878, "grad_norm": 0.11715035140514374, "learning_rate": 1.789123446869012e-05, "loss": 0.0078, "step": 136500 }, { "epoch": 4.219880076652037, "grad_norm": 0.12659583985805511, "learning_rate": 1.7890770847499537e-05, "loss": 0.0074, "step": 136530 }, { "epoch": 4.220807319033195, "grad_norm": 0.18023397028446198, "learning_rate": 1.7890307226308958e-05, "loss": 0.0077, "step": 136560 }, { "epoch": 4.221734561414354, "grad_norm": 0.14468537271022797, "learning_rate": 1.788984360511838e-05, "loss": 0.007, "step": 136590 }, { "epoch": 4.222661803795512, "grad_norm": 0.13414216041564941, "learning_rate": 1.7889379983927798e-05, "loss": 0.0075, "step": 136620 }, { "epoch": 4.223589046176671, "grad_norm": 0.13056468963623047, "learning_rate": 1.7888916362737223e-05, "loss": 0.0084, "step": 136650 }, { "epoch": 4.224516288557829, "grad_norm": 0.1219404861330986, "learning_rate": 1.7888452741546644e-05, "loss": 0.0074, "step": 136680 }, { "epoch": 4.225443530938987, "grad_norm": 0.14664727449417114, "learning_rate": 1.7887989120356062e-05, "loss": 0.0071, "step": 136710 }, { "epoch": 4.226370773320146, "grad_norm": 0.1496819108724594, "learning_rate": 1.7887525499165484e-05, "loss": 0.007, "step": 136740 }, { "epoch": 4.2272980157013045, "grad_norm": 0.10284573584794998, "learning_rate": 1.7887061877974905e-05, "loss": 0.0078, "step": 136770 }, { "epoch": 4.228225258082463, "grad_norm": 0.11175142973661423, "learning_rate": 1.7886598256784323e-05, "loss": 0.0069, "step": 136800 }, { "epoch": 4.229152500463621, "grad_norm": 0.09802564978599548, "learning_rate": 1.7886134635593745e-05, "loss": 0.0082, "step": 136830 }, { "epoch": 4.230079742844779, "grad_norm": 0.1052405834197998, "learning_rate": 1.7885671014403166e-05, "loss": 0.0075, "step": 136860 }, { "epoch": 4.231006985225938, "grad_norm": 0.16250568628311157, "learning_rate": 1.7885207393212588e-05, "loss": 0.0077, "step": 136890 }, { "epoch": 4.231934227607097, "grad_norm": 0.11297982186079025, "learning_rate": 1.788474377202201e-05, "loss": 0.0072, "step": 136920 }, { "epoch": 4.232861469988255, "grad_norm": 0.14890934526920319, "learning_rate": 1.7884280150831427e-05, "loss": 0.0077, "step": 136950 }, { "epoch": 4.233788712369414, "grad_norm": 0.08747988939285278, "learning_rate": 1.788381652964085e-05, "loss": 0.0083, "step": 136980 }, { "epoch": 4.2347159547505715, "grad_norm": 0.1425870954990387, "learning_rate": 1.788335290845027e-05, "loss": 0.0077, "step": 137010 }, { "epoch": 4.23564319713173, "grad_norm": 0.10158837586641312, "learning_rate": 1.7882889287259692e-05, "loss": 0.0077, "step": 137040 }, { "epoch": 4.236570439512889, "grad_norm": 0.06472762674093246, "learning_rate": 1.7882425666069113e-05, "loss": 0.007, "step": 137070 }, { "epoch": 4.237497681894047, "grad_norm": 0.1746249496936798, "learning_rate": 1.7881962044878535e-05, "loss": 0.0079, "step": 137100 }, { "epoch": 4.238424924275206, "grad_norm": 0.13942883908748627, "learning_rate": 1.7881498423687953e-05, "loss": 0.0071, "step": 137130 }, { "epoch": 4.239352166656364, "grad_norm": 0.1943032443523407, "learning_rate": 1.7881034802497375e-05, "loss": 0.0074, "step": 137160 }, { "epoch": 4.240279409037522, "grad_norm": 0.17373745143413544, "learning_rate": 1.7880571181306796e-05, "loss": 0.0076, "step": 137190 }, { "epoch": 4.241206651418681, "grad_norm": 0.1890280395746231, "learning_rate": 1.7880107560116214e-05, "loss": 0.0078, "step": 137220 }, { "epoch": 4.242133893799839, "grad_norm": 0.14619126915931702, "learning_rate": 1.7879643938925636e-05, "loss": 0.0079, "step": 137250 }, { "epoch": 4.243061136180998, "grad_norm": 0.14833827316761017, "learning_rate": 1.7879180317735057e-05, "loss": 0.0074, "step": 137280 }, { "epoch": 4.243988378562156, "grad_norm": 0.11849244683980942, "learning_rate": 1.787871669654448e-05, "loss": 0.0076, "step": 137310 }, { "epoch": 4.244915620943314, "grad_norm": 0.15523530542850494, "learning_rate": 1.78782530753539e-05, "loss": 0.0076, "step": 137340 }, { "epoch": 4.245842863324473, "grad_norm": 0.0841788575053215, "learning_rate": 1.787778945416332e-05, "loss": 0.0077, "step": 137370 }, { "epoch": 4.2467701057056315, "grad_norm": 0.07423537969589233, "learning_rate": 1.787732583297274e-05, "loss": 0.0077, "step": 137400 }, { "epoch": 4.24769734808679, "grad_norm": 0.11168840527534485, "learning_rate": 1.787686221178216e-05, "loss": 0.008, "step": 137430 }, { "epoch": 4.248624590467949, "grad_norm": 0.19511379301548004, "learning_rate": 1.7876398590591583e-05, "loss": 0.0076, "step": 137460 }, { "epoch": 4.249551832849106, "grad_norm": 0.1165245771408081, "learning_rate": 1.7875934969401004e-05, "loss": 0.0075, "step": 137490 }, { "epoch": 4.250479075230265, "grad_norm": 0.12326252460479736, "learning_rate": 1.7875471348210426e-05, "loss": 0.0078, "step": 137520 }, { "epoch": 4.251406317611424, "grad_norm": 0.124460369348526, "learning_rate": 1.7875007727019844e-05, "loss": 0.0081, "step": 137550 }, { "epoch": 4.252333559992582, "grad_norm": 0.11768350005149841, "learning_rate": 1.7874544105829266e-05, "loss": 0.0071, "step": 137580 }, { "epoch": 4.253260802373741, "grad_norm": 0.0947045087814331, "learning_rate": 1.7874080484638684e-05, "loss": 0.0081, "step": 137610 }, { "epoch": 4.2541880447548985, "grad_norm": 0.11992837488651276, "learning_rate": 1.7873616863448105e-05, "loss": 0.0083, "step": 137640 }, { "epoch": 4.255115287136057, "grad_norm": 0.11747949570417404, "learning_rate": 1.7873153242257527e-05, "loss": 0.0076, "step": 137670 }, { "epoch": 4.256042529517216, "grad_norm": 0.13079416751861572, "learning_rate": 1.7872689621066948e-05, "loss": 0.0077, "step": 137700 }, { "epoch": 4.256969771898374, "grad_norm": 0.14485518634319305, "learning_rate": 1.787222599987637e-05, "loss": 0.0077, "step": 137730 }, { "epoch": 4.257897014279533, "grad_norm": 0.11138258874416351, "learning_rate": 1.787176237868579e-05, "loss": 0.0071, "step": 137760 }, { "epoch": 4.2588242566606915, "grad_norm": 0.1228642389178276, "learning_rate": 1.787129875749521e-05, "loss": 0.0077, "step": 137790 }, { "epoch": 4.259751499041849, "grad_norm": 0.14766879379749298, "learning_rate": 1.787083513630463e-05, "loss": 0.0075, "step": 137820 }, { "epoch": 4.260678741423008, "grad_norm": 0.10570461302995682, "learning_rate": 1.7870371515114052e-05, "loss": 0.0075, "step": 137850 }, { "epoch": 4.261605983804166, "grad_norm": 0.11781474202871323, "learning_rate": 1.7869907893923474e-05, "loss": 0.0081, "step": 137880 }, { "epoch": 4.262533226185325, "grad_norm": 0.15138664841651917, "learning_rate": 1.7869444272732895e-05, "loss": 0.0069, "step": 137910 }, { "epoch": 4.263460468566484, "grad_norm": 0.15151400864124298, "learning_rate": 1.7868980651542313e-05, "loss": 0.0079, "step": 137940 }, { "epoch": 4.264387710947641, "grad_norm": 0.14767037332057953, "learning_rate": 1.7868517030351735e-05, "loss": 0.0076, "step": 137970 }, { "epoch": 4.2653149533288, "grad_norm": 0.11174730956554413, "learning_rate": 1.7868053409161156e-05, "loss": 0.0075, "step": 138000 }, { "epoch": 4.2662421957099586, "grad_norm": 0.16695301234722137, "learning_rate": 1.7867589787970575e-05, "loss": 0.0076, "step": 138030 }, { "epoch": 4.267169438091117, "grad_norm": 0.1539904773235321, "learning_rate": 1.786712616678e-05, "loss": 0.0077, "step": 138060 }, { "epoch": 4.268096680472276, "grad_norm": 0.0947163850069046, "learning_rate": 1.786666254558942e-05, "loss": 0.007, "step": 138090 }, { "epoch": 4.2690239228534335, "grad_norm": 0.14556460082530975, "learning_rate": 1.786619892439884e-05, "loss": 0.0073, "step": 138120 }, { "epoch": 4.269951165234592, "grad_norm": 0.10058998316526413, "learning_rate": 1.786573530320826e-05, "loss": 0.0068, "step": 138150 }, { "epoch": 4.270878407615751, "grad_norm": 0.2017301619052887, "learning_rate": 1.7865271682017682e-05, "loss": 0.0079, "step": 138180 }, { "epoch": 4.271805649996909, "grad_norm": 0.0827050432562828, "learning_rate": 1.78648080608271e-05, "loss": 0.0075, "step": 138210 }, { "epoch": 4.272732892378068, "grad_norm": 0.09003029763698578, "learning_rate": 1.7864359893676208e-05, "loss": 0.0078, "step": 138240 }, { "epoch": 4.2736601347592265, "grad_norm": 0.10038290917873383, "learning_rate": 1.786389627248563e-05, "loss": 0.0075, "step": 138270 }, { "epoch": 4.274587377140384, "grad_norm": 0.12135578691959381, "learning_rate": 1.786343265129505e-05, "loss": 0.0078, "step": 138300 }, { "epoch": 4.275514619521543, "grad_norm": 0.13211464881896973, "learning_rate": 1.7862969030104473e-05, "loss": 0.0073, "step": 138330 }, { "epoch": 4.276441861902701, "grad_norm": 0.11236780881881714, "learning_rate": 1.786250540891389e-05, "loss": 0.0072, "step": 138360 }, { "epoch": 4.27736910428386, "grad_norm": 0.12078630924224854, "learning_rate": 1.7862041787723312e-05, "loss": 0.0073, "step": 138390 }, { "epoch": 4.278296346665019, "grad_norm": 0.11102848500013351, "learning_rate": 1.7861578166532734e-05, "loss": 0.0077, "step": 138420 }, { "epoch": 4.279223589046176, "grad_norm": 0.13753663003444672, "learning_rate": 1.7861114545342152e-05, "loss": 0.0077, "step": 138450 }, { "epoch": 4.280150831427335, "grad_norm": 0.1252647489309311, "learning_rate": 1.7860650924151573e-05, "loss": 0.008, "step": 138480 }, { "epoch": 4.2810780738084935, "grad_norm": 0.1037081703543663, "learning_rate": 1.7860187302960995e-05, "loss": 0.0077, "step": 138510 }, { "epoch": 4.282005316189652, "grad_norm": 0.11051721125841141, "learning_rate": 1.7859723681770416e-05, "loss": 0.0079, "step": 138540 }, { "epoch": 4.282932558570811, "grad_norm": 0.17664283514022827, "learning_rate": 1.7859260060579838e-05, "loss": 0.0082, "step": 138570 }, { "epoch": 4.283859800951969, "grad_norm": 0.13865219056606293, "learning_rate": 1.785879643938926e-05, "loss": 0.0075, "step": 138600 }, { "epoch": 4.284787043333127, "grad_norm": 0.10374722629785538, "learning_rate": 1.7858332818198678e-05, "loss": 0.0078, "step": 138630 }, { "epoch": 4.285714285714286, "grad_norm": 0.10976599156856537, "learning_rate": 1.78578691970081e-05, "loss": 0.0073, "step": 138660 }, { "epoch": 4.286641528095444, "grad_norm": 0.06711684167385101, "learning_rate": 1.785740557581752e-05, "loss": 0.0072, "step": 138690 }, { "epoch": 4.287568770476603, "grad_norm": 0.1323816329240799, "learning_rate": 1.7856941954626942e-05, "loss": 0.0075, "step": 138720 }, { "epoch": 4.288496012857761, "grad_norm": 0.14940603077411652, "learning_rate": 1.7856478333436364e-05, "loss": 0.0075, "step": 138750 }, { "epoch": 4.289423255238919, "grad_norm": 0.09469068795442581, "learning_rate": 1.785601471224578e-05, "loss": 0.0075, "step": 138780 }, { "epoch": 4.290350497620078, "grad_norm": 0.07947387546300888, "learning_rate": 1.7855551091055203e-05, "loss": 0.0076, "step": 138810 }, { "epoch": 4.291277740001236, "grad_norm": 0.1040000319480896, "learning_rate": 1.7855087469864625e-05, "loss": 0.0082, "step": 138840 }, { "epoch": 4.292204982382395, "grad_norm": 0.15089818835258484, "learning_rate": 1.7854623848674043e-05, "loss": 0.008, "step": 138870 }, { "epoch": 4.2931322247635535, "grad_norm": 0.07723446190357208, "learning_rate": 1.7854160227483464e-05, "loss": 0.0075, "step": 138900 }, { "epoch": 4.294059467144711, "grad_norm": 0.06418800354003906, "learning_rate": 1.7853696606292886e-05, "loss": 0.0072, "step": 138930 }, { "epoch": 4.29498670952587, "grad_norm": 0.10968206822872162, "learning_rate": 1.7853232985102307e-05, "loss": 0.0075, "step": 138960 }, { "epoch": 4.295913951907028, "grad_norm": 0.09900601953268051, "learning_rate": 1.785276936391173e-05, "loss": 0.0078, "step": 138990 }, { "epoch": 4.296841194288187, "grad_norm": 0.1516730785369873, "learning_rate": 1.7852305742721147e-05, "loss": 0.0074, "step": 139020 }, { "epoch": 4.297768436669346, "grad_norm": 0.12817099690437317, "learning_rate": 1.785184212153057e-05, "loss": 0.0084, "step": 139050 }, { "epoch": 4.298695679050504, "grad_norm": 0.14440327882766724, "learning_rate": 1.785137850033999e-05, "loss": 0.0077, "step": 139080 }, { "epoch": 4.299622921431662, "grad_norm": 0.2506627142429352, "learning_rate": 1.785091487914941e-05, "loss": 0.0073, "step": 139110 }, { "epoch": 4.3005501638128205, "grad_norm": 0.1076522096991539, "learning_rate": 1.7850451257958833e-05, "loss": 0.0087, "step": 139140 }, { "epoch": 4.301477406193979, "grad_norm": 0.10564753413200378, "learning_rate": 1.7849987636768254e-05, "loss": 0.0076, "step": 139170 }, { "epoch": 4.302404648575138, "grad_norm": 0.15728892385959625, "learning_rate": 1.7849524015577673e-05, "loss": 0.0074, "step": 139200 }, { "epoch": 4.303331890956296, "grad_norm": 0.11242575198411942, "learning_rate": 1.7849060394387094e-05, "loss": 0.0081, "step": 139230 }, { "epoch": 4.304259133337454, "grad_norm": 0.10256769508123398, "learning_rate": 1.7848596773196516e-05, "loss": 0.0078, "step": 139260 }, { "epoch": 4.305186375718613, "grad_norm": 0.17277033627033234, "learning_rate": 1.7848133152005934e-05, "loss": 0.0075, "step": 139290 }, { "epoch": 4.306113618099771, "grad_norm": 0.17073573172092438, "learning_rate": 1.784766953081536e-05, "loss": 0.0089, "step": 139320 }, { "epoch": 4.30704086048093, "grad_norm": 0.13664963841438293, "learning_rate": 1.7847205909624777e-05, "loss": 0.0075, "step": 139350 }, { "epoch": 4.307968102862088, "grad_norm": 0.11333553493022919, "learning_rate": 1.7846742288434198e-05, "loss": 0.0068, "step": 139380 }, { "epoch": 4.308895345243247, "grad_norm": 0.11398225277662277, "learning_rate": 1.784627866724362e-05, "loss": 0.0081, "step": 139410 }, { "epoch": 4.309822587624405, "grad_norm": 0.13435810804367065, "learning_rate": 1.7845815046053038e-05, "loss": 0.0069, "step": 139440 }, { "epoch": 4.310749830005563, "grad_norm": 0.16561922430992126, "learning_rate": 1.784535142486246e-05, "loss": 0.0075, "step": 139470 }, { "epoch": 4.311677072386722, "grad_norm": 0.13186170160770416, "learning_rate": 1.784488780367188e-05, "loss": 0.0077, "step": 139500 }, { "epoch": 4.3126043147678805, "grad_norm": 0.23061132431030273, "learning_rate": 1.7844424182481302e-05, "loss": 0.0082, "step": 139530 }, { "epoch": 4.313531557149039, "grad_norm": 0.1022549420595169, "learning_rate": 1.7843960561290724e-05, "loss": 0.0082, "step": 139560 }, { "epoch": 4.314458799530197, "grad_norm": 0.15815654397010803, "learning_rate": 1.7843496940100145e-05, "loss": 0.0082, "step": 139590 }, { "epoch": 4.315386041911355, "grad_norm": 0.11770161241292953, "learning_rate": 1.7843033318909564e-05, "loss": 0.0074, "step": 139620 }, { "epoch": 4.316313284292514, "grad_norm": 0.12229833751916885, "learning_rate": 1.7842569697718985e-05, "loss": 0.0067, "step": 139650 }, { "epoch": 4.317240526673673, "grad_norm": 0.15249651670455933, "learning_rate": 1.7842106076528407e-05, "loss": 0.0071, "step": 139680 }, { "epoch": 4.318167769054831, "grad_norm": 0.10200540721416473, "learning_rate": 1.7841642455337828e-05, "loss": 0.0077, "step": 139710 }, { "epoch": 4.319095011435989, "grad_norm": 0.14202681183815002, "learning_rate": 1.784117883414725e-05, "loss": 0.0069, "step": 139740 }, { "epoch": 4.3200222538171476, "grad_norm": 0.14790692925453186, "learning_rate": 1.7840715212956668e-05, "loss": 0.0079, "step": 139770 }, { "epoch": 4.320949496198306, "grad_norm": 0.1402231901884079, "learning_rate": 1.784025159176609e-05, "loss": 0.0075, "step": 139800 }, { "epoch": 4.321876738579465, "grad_norm": 0.13437169790267944, "learning_rate": 1.783978797057551e-05, "loss": 0.0069, "step": 139830 }, { "epoch": 4.322803980960623, "grad_norm": 0.09246131032705307, "learning_rate": 1.783932434938493e-05, "loss": 0.0072, "step": 139860 }, { "epoch": 4.323731223341782, "grad_norm": 0.13318024575710297, "learning_rate": 1.783886072819435e-05, "loss": 0.0084, "step": 139890 }, { "epoch": 4.32465846572294, "grad_norm": 0.1069723516702652, "learning_rate": 1.7838397107003772e-05, "loss": 0.0079, "step": 139920 }, { "epoch": 4.325585708104098, "grad_norm": 0.10741695761680603, "learning_rate": 1.7837933485813193e-05, "loss": 0.0072, "step": 139950 }, { "epoch": 4.326512950485257, "grad_norm": 0.11955549567937851, "learning_rate": 1.7837469864622615e-05, "loss": 0.0072, "step": 139980 }, { "epoch": 4.3274401928664155, "grad_norm": 0.1040564551949501, "learning_rate": 1.7837006243432036e-05, "loss": 0.0075, "step": 140010 }, { "epoch": 4.328367435247574, "grad_norm": 0.11155141890048981, "learning_rate": 1.7836542622241454e-05, "loss": 0.0073, "step": 140040 }, { "epoch": 4.329294677628732, "grad_norm": 0.14314034581184387, "learning_rate": 1.7836079001050876e-05, "loss": 0.0082, "step": 140070 }, { "epoch": 4.33022192000989, "grad_norm": 0.13916248083114624, "learning_rate": 1.7835615379860297e-05, "loss": 0.0081, "step": 140100 }, { "epoch": 4.331149162391049, "grad_norm": 0.13600081205368042, "learning_rate": 1.783515175866972e-05, "loss": 0.0067, "step": 140130 }, { "epoch": 4.332076404772208, "grad_norm": 0.12562303245067596, "learning_rate": 1.783468813747914e-05, "loss": 0.0078, "step": 140160 }, { "epoch": 4.333003647153366, "grad_norm": 0.11796186864376068, "learning_rate": 1.783422451628856e-05, "loss": 0.0076, "step": 140190 }, { "epoch": 4.333930889534525, "grad_norm": 0.11728834360837936, "learning_rate": 1.783376089509798e-05, "loss": 0.0076, "step": 140220 }, { "epoch": 4.3348581319156825, "grad_norm": 0.1395537108182907, "learning_rate": 1.78332972739074e-05, "loss": 0.0077, "step": 140250 }, { "epoch": 4.335785374296841, "grad_norm": 0.0880308598279953, "learning_rate": 1.783283365271682e-05, "loss": 0.0076, "step": 140280 }, { "epoch": 4.336712616678, "grad_norm": 0.14949363470077515, "learning_rate": 1.783237003152624e-05, "loss": 0.0081, "step": 140310 }, { "epoch": 4.337639859059158, "grad_norm": 0.13437099754810333, "learning_rate": 1.7831906410335663e-05, "loss": 0.0077, "step": 140340 }, { "epoch": 4.338567101440317, "grad_norm": 0.11945544183254242, "learning_rate": 1.7831442789145084e-05, "loss": 0.0075, "step": 140370 }, { "epoch": 4.339494343821475, "grad_norm": 0.09162548929452896, "learning_rate": 1.7830979167954506e-05, "loss": 0.0072, "step": 140400 }, { "epoch": 4.340421586202633, "grad_norm": 0.18508663773536682, "learning_rate": 1.7830515546763924e-05, "loss": 0.008, "step": 140430 }, { "epoch": 4.341348828583792, "grad_norm": 0.12826301157474518, "learning_rate": 1.7830051925573345e-05, "loss": 0.0075, "step": 140460 }, { "epoch": 4.34227607096495, "grad_norm": 0.1174173429608345, "learning_rate": 1.7829588304382767e-05, "loss": 0.0079, "step": 140490 }, { "epoch": 4.343203313346109, "grad_norm": 0.13835269212722778, "learning_rate": 1.782912468319219e-05, "loss": 0.0075, "step": 140520 }, { "epoch": 4.344130555727267, "grad_norm": 0.16723181307315826, "learning_rate": 1.782866106200161e-05, "loss": 0.0075, "step": 140550 }, { "epoch": 4.345057798108425, "grad_norm": 0.08087880909442902, "learning_rate": 1.782819744081103e-05, "loss": 0.0075, "step": 140580 }, { "epoch": 4.345985040489584, "grad_norm": 0.12118934839963913, "learning_rate": 1.782773381962045e-05, "loss": 0.0071, "step": 140610 }, { "epoch": 4.3469122828707425, "grad_norm": 0.15444190800189972, "learning_rate": 1.782727019842987e-05, "loss": 0.0072, "step": 140640 }, { "epoch": 4.347839525251901, "grad_norm": 0.09779056161642075, "learning_rate": 1.7826806577239292e-05, "loss": 0.0072, "step": 140670 }, { "epoch": 4.34876676763306, "grad_norm": 0.14977046847343445, "learning_rate": 1.782634295604871e-05, "loss": 0.0078, "step": 140700 }, { "epoch": 4.349694010014217, "grad_norm": 0.12159384042024612, "learning_rate": 1.7825879334858136e-05, "loss": 0.0078, "step": 140730 }, { "epoch": 4.350621252395376, "grad_norm": 0.11163755506277084, "learning_rate": 1.7825415713667554e-05, "loss": 0.0072, "step": 140760 }, { "epoch": 4.351548494776535, "grad_norm": 0.1610022634267807, "learning_rate": 1.7824952092476975e-05, "loss": 0.0083, "step": 140790 }, { "epoch": 4.352475737157693, "grad_norm": 0.10527195036411285, "learning_rate": 1.7824488471286397e-05, "loss": 0.008, "step": 140820 }, { "epoch": 4.353402979538852, "grad_norm": 0.11796286702156067, "learning_rate": 1.7824024850095815e-05, "loss": 0.0074, "step": 140850 }, { "epoch": 4.3543302219200095, "grad_norm": 0.16457590460777283, "learning_rate": 1.7823561228905236e-05, "loss": 0.0075, "step": 140880 }, { "epoch": 4.355257464301168, "grad_norm": 0.14657220244407654, "learning_rate": 1.7823097607714658e-05, "loss": 0.0079, "step": 140910 }, { "epoch": 4.356184706682327, "grad_norm": 0.11749359965324402, "learning_rate": 1.782263398652408e-05, "loss": 0.0079, "step": 140940 }, { "epoch": 4.357111949063485, "grad_norm": 0.11516252905130386, "learning_rate": 1.78221703653335e-05, "loss": 0.0081, "step": 140970 }, { "epoch": 4.358039191444644, "grad_norm": 0.09955707937479019, "learning_rate": 1.7821706744142922e-05, "loss": 0.0077, "step": 141000 }, { "epoch": 4.3589664338258025, "grad_norm": 0.0969972237944603, "learning_rate": 1.782124312295234e-05, "loss": 0.0071, "step": 141030 }, { "epoch": 4.35989367620696, "grad_norm": 0.16025713086128235, "learning_rate": 1.7820779501761762e-05, "loss": 0.0076, "step": 141060 }, { "epoch": 4.360820918588119, "grad_norm": 0.10891643911600113, "learning_rate": 1.7820315880571183e-05, "loss": 0.0073, "step": 141090 }, { "epoch": 4.361748160969277, "grad_norm": 0.11469168215990067, "learning_rate": 1.7819852259380605e-05, "loss": 0.0073, "step": 141120 }, { "epoch": 4.362675403350436, "grad_norm": 0.110169418156147, "learning_rate": 1.7819388638190026e-05, "loss": 0.0075, "step": 141150 }, { "epoch": 4.363602645731595, "grad_norm": 0.12631213665008545, "learning_rate": 1.7818925016999445e-05, "loss": 0.0074, "step": 141180 }, { "epoch": 4.364529888112752, "grad_norm": 0.08569841831922531, "learning_rate": 1.7818461395808866e-05, "loss": 0.0082, "step": 141210 }, { "epoch": 4.365457130493911, "grad_norm": 0.0971873551607132, "learning_rate": 1.7817997774618288e-05, "loss": 0.0084, "step": 141240 }, { "epoch": 4.3663843728750695, "grad_norm": 0.15038056671619415, "learning_rate": 1.7817534153427706e-05, "loss": 0.0088, "step": 141270 }, { "epoch": 4.367311615256228, "grad_norm": 0.12079136073589325, "learning_rate": 1.7817070532237127e-05, "loss": 0.0071, "step": 141300 }, { "epoch": 4.368238857637387, "grad_norm": 0.12128613144159317, "learning_rate": 1.781660691104655e-05, "loss": 0.0072, "step": 141330 }, { "epoch": 4.369166100018544, "grad_norm": 0.1076437383890152, "learning_rate": 1.781614328985597e-05, "loss": 0.0074, "step": 141360 }, { "epoch": 4.370093342399703, "grad_norm": 0.11495501548051834, "learning_rate": 1.7815679668665392e-05, "loss": 0.0074, "step": 141390 }, { "epoch": 4.371020584780862, "grad_norm": 0.09904185682535172, "learning_rate": 1.781521604747481e-05, "loss": 0.0073, "step": 141420 }, { "epoch": 4.37194782716202, "grad_norm": 0.0893084928393364, "learning_rate": 1.781475242628423e-05, "loss": 0.0074, "step": 141450 }, { "epoch": 4.372875069543179, "grad_norm": 0.15081055462360382, "learning_rate": 1.7814288805093653e-05, "loss": 0.0077, "step": 141480 }, { "epoch": 4.3738023119243365, "grad_norm": 0.10136350244283676, "learning_rate": 1.7813825183903074e-05, "loss": 0.0079, "step": 141510 }, { "epoch": 4.374729554305495, "grad_norm": 0.16271178424358368, "learning_rate": 1.7813361562712496e-05, "loss": 0.0069, "step": 141540 }, { "epoch": 4.375656796686654, "grad_norm": 0.14366386830806732, "learning_rate": 1.7812897941521917e-05, "loss": 0.0074, "step": 141570 }, { "epoch": 4.376584039067812, "grad_norm": 0.13914576172828674, "learning_rate": 1.7812434320331335e-05, "loss": 0.0077, "step": 141600 }, { "epoch": 4.377511281448971, "grad_norm": 0.11001505702733994, "learning_rate": 1.7811970699140757e-05, "loss": 0.0072, "step": 141630 }, { "epoch": 4.3784385238301295, "grad_norm": 0.1713004857301712, "learning_rate": 1.781150707795018e-05, "loss": 0.0073, "step": 141660 }, { "epoch": 4.379365766211287, "grad_norm": 0.1260102540254593, "learning_rate": 1.7811043456759597e-05, "loss": 0.0074, "step": 141690 }, { "epoch": 4.380293008592446, "grad_norm": 0.12742042541503906, "learning_rate": 1.7810579835569018e-05, "loss": 0.0076, "step": 141720 }, { "epoch": 4.3812202509736045, "grad_norm": 0.12663429975509644, "learning_rate": 1.781011621437844e-05, "loss": 0.0075, "step": 141750 }, { "epoch": 4.382147493354763, "grad_norm": 0.10938280075788498, "learning_rate": 1.780965259318786e-05, "loss": 0.0081, "step": 141780 }, { "epoch": 4.383074735735922, "grad_norm": 0.14981700479984283, "learning_rate": 1.7809188971997283e-05, "loss": 0.0084, "step": 141810 }, { "epoch": 4.38400197811708, "grad_norm": 0.13938109576702118, "learning_rate": 1.78087253508067e-05, "loss": 0.0074, "step": 141840 }, { "epoch": 4.384929220498238, "grad_norm": 0.12850947678089142, "learning_rate": 1.7808261729616122e-05, "loss": 0.007, "step": 141870 }, { "epoch": 4.385856462879397, "grad_norm": 0.13853617012500763, "learning_rate": 1.7807798108425544e-05, "loss": 0.0083, "step": 141900 }, { "epoch": 4.386783705260555, "grad_norm": 0.1097806990146637, "learning_rate": 1.7807334487234965e-05, "loss": 0.0087, "step": 141930 }, { "epoch": 4.387710947641714, "grad_norm": 0.13921475410461426, "learning_rate": 1.7806870866044387e-05, "loss": 0.0079, "step": 141960 }, { "epoch": 4.388638190022872, "grad_norm": 0.13199599087238312, "learning_rate": 1.7806407244853808e-05, "loss": 0.0071, "step": 141990 }, { "epoch": 4.38956543240403, "grad_norm": 0.089664988219738, "learning_rate": 1.7805943623663226e-05, "loss": 0.0079, "step": 142020 }, { "epoch": 4.390492674785189, "grad_norm": 0.11503759026527405, "learning_rate": 1.7805480002472648e-05, "loss": 0.0078, "step": 142050 }, { "epoch": 4.391419917166347, "grad_norm": 0.06971501559019089, "learning_rate": 1.7805016381282066e-05, "loss": 0.0081, "step": 142080 }, { "epoch": 4.392347159547506, "grad_norm": 0.10550288110971451, "learning_rate": 1.7804552760091488e-05, "loss": 0.0072, "step": 142110 }, { "epoch": 4.3932744019286645, "grad_norm": 0.12750393152236938, "learning_rate": 1.7804089138900912e-05, "loss": 0.0078, "step": 142140 }, { "epoch": 4.394201644309822, "grad_norm": 0.1558866649866104, "learning_rate": 1.780362551771033e-05, "loss": 0.0073, "step": 142170 }, { "epoch": 4.395128886690981, "grad_norm": 0.12270717322826385, "learning_rate": 1.7803161896519752e-05, "loss": 0.0074, "step": 142200 }, { "epoch": 4.396056129072139, "grad_norm": 0.15997028350830078, "learning_rate": 1.7802698275329174e-05, "loss": 0.0082, "step": 142230 }, { "epoch": 4.396983371453298, "grad_norm": 0.12363751977682114, "learning_rate": 1.780223465413859e-05, "loss": 0.0087, "step": 142260 }, { "epoch": 4.397910613834457, "grad_norm": 0.19335134327411652, "learning_rate": 1.7801771032948013e-05, "loss": 0.0072, "step": 142290 }, { "epoch": 4.398837856215614, "grad_norm": 0.08031613379716873, "learning_rate": 1.7801307411757435e-05, "loss": 0.0075, "step": 142320 }, { "epoch": 4.399765098596773, "grad_norm": 0.11150027066469193, "learning_rate": 1.7800843790566856e-05, "loss": 0.0081, "step": 142350 }, { "epoch": 4.4006923409779315, "grad_norm": 0.11421022564172745, "learning_rate": 1.7800380169376278e-05, "loss": 0.0079, "step": 142380 }, { "epoch": 4.40161958335909, "grad_norm": 0.12463527172803879, "learning_rate": 1.7799916548185696e-05, "loss": 0.0069, "step": 142410 }, { "epoch": 4.402546825740249, "grad_norm": 0.18747207522392273, "learning_rate": 1.7799452926995117e-05, "loss": 0.0085, "step": 142440 }, { "epoch": 4.403474068121407, "grad_norm": 0.09210150688886642, "learning_rate": 1.779898930580454e-05, "loss": 0.0073, "step": 142470 }, { "epoch": 4.404401310502565, "grad_norm": 0.19298909604549408, "learning_rate": 1.779852568461396e-05, "loss": 0.0075, "step": 142500 }, { "epoch": 4.405328552883724, "grad_norm": 0.10652025789022446, "learning_rate": 1.7798062063423382e-05, "loss": 0.0081, "step": 142530 }, { "epoch": 4.406255795264882, "grad_norm": 0.12298984080553055, "learning_rate": 1.7797598442232803e-05, "loss": 0.0076, "step": 142560 }, { "epoch": 4.407183037646041, "grad_norm": 0.10574322938919067, "learning_rate": 1.779713482104222e-05, "loss": 0.0076, "step": 142590 }, { "epoch": 4.408110280027199, "grad_norm": 0.1278662532567978, "learning_rate": 1.7796671199851643e-05, "loss": 0.0081, "step": 142620 }, { "epoch": 4.409037522408358, "grad_norm": 0.13756972551345825, "learning_rate": 1.7796207578661064e-05, "loss": 0.0079, "step": 142650 }, { "epoch": 4.409964764789516, "grad_norm": 0.11249973624944687, "learning_rate": 1.7795743957470483e-05, "loss": 0.008, "step": 142680 }, { "epoch": 4.410892007170674, "grad_norm": 0.12676844000816345, "learning_rate": 1.7795280336279904e-05, "loss": 0.0072, "step": 142710 }, { "epoch": 4.411819249551833, "grad_norm": 0.15522441267967224, "learning_rate": 1.7794816715089326e-05, "loss": 0.0077, "step": 142740 }, { "epoch": 4.4127464919329915, "grad_norm": 0.10922496020793915, "learning_rate": 1.7794353093898747e-05, "loss": 0.0075, "step": 142770 }, { "epoch": 4.41367373431415, "grad_norm": 0.09917556494474411, "learning_rate": 1.779388947270817e-05, "loss": 0.0076, "step": 142800 }, { "epoch": 4.414600976695308, "grad_norm": 0.1121457889676094, "learning_rate": 1.7793441305557277e-05, "loss": 0.0074, "step": 142830 }, { "epoch": 4.415528219076466, "grad_norm": 0.13054528832435608, "learning_rate": 1.7792977684366695e-05, "loss": 0.0072, "step": 142860 }, { "epoch": 4.416455461457625, "grad_norm": 0.13623684644699097, "learning_rate": 1.7792529517215803e-05, "loss": 0.008, "step": 142890 }, { "epoch": 4.417382703838784, "grad_norm": 0.16268250346183777, "learning_rate": 1.7792065896025224e-05, "loss": 0.0072, "step": 142920 }, { "epoch": 4.418309946219942, "grad_norm": 0.12074761092662811, "learning_rate": 1.7791602274834642e-05, "loss": 0.007, "step": 142950 }, { "epoch": 4.4192371886011, "grad_norm": 0.12190932035446167, "learning_rate": 1.7791138653644064e-05, "loss": 0.0084, "step": 142980 }, { "epoch": 4.4201644309822585, "grad_norm": 0.15361295640468597, "learning_rate": 1.7790675032453485e-05, "loss": 0.0074, "step": 143010 }, { "epoch": 4.421091673363417, "grad_norm": 0.12495763599872589, "learning_rate": 1.7790211411262907e-05, "loss": 0.0084, "step": 143040 }, { "epoch": 4.422018915744576, "grad_norm": 0.11277907341718674, "learning_rate": 1.7789747790072328e-05, "loss": 0.0075, "step": 143070 }, { "epoch": 4.422946158125734, "grad_norm": 0.11218865215778351, "learning_rate": 1.7789284168881746e-05, "loss": 0.0077, "step": 143100 }, { "epoch": 4.423873400506892, "grad_norm": 0.14487586915493011, "learning_rate": 1.7788820547691168e-05, "loss": 0.0069, "step": 143130 }, { "epoch": 4.424800642888051, "grad_norm": 0.12944138050079346, "learning_rate": 1.778835692650059e-05, "loss": 0.007, "step": 143160 }, { "epoch": 4.425727885269209, "grad_norm": 0.1745924949645996, "learning_rate": 1.7787893305310007e-05, "loss": 0.0077, "step": 143190 }, { "epoch": 4.426655127650368, "grad_norm": 0.1457984298467636, "learning_rate": 1.778742968411943e-05, "loss": 0.0072, "step": 143220 }, { "epoch": 4.427582370031526, "grad_norm": 0.08386215567588806, "learning_rate": 1.778696606292885e-05, "loss": 0.0074, "step": 143250 }, { "epoch": 4.428509612412685, "grad_norm": 0.06848037242889404, "learning_rate": 1.7786502441738272e-05, "loss": 0.0075, "step": 143280 }, { "epoch": 4.429436854793843, "grad_norm": 0.13868176937103271, "learning_rate": 1.7786038820547693e-05, "loss": 0.0074, "step": 143310 }, { "epoch": 4.430364097175001, "grad_norm": 0.11281488090753555, "learning_rate": 1.778557519935711e-05, "loss": 0.0081, "step": 143340 }, { "epoch": 4.43129133955616, "grad_norm": 0.11754710227251053, "learning_rate": 1.7785111578166533e-05, "loss": 0.0074, "step": 143370 }, { "epoch": 4.4322185819373185, "grad_norm": 0.1352853626012802, "learning_rate": 1.7784647956975955e-05, "loss": 0.0074, "step": 143400 }, { "epoch": 4.433145824318477, "grad_norm": 0.17788034677505493, "learning_rate": 1.7784184335785376e-05, "loss": 0.0078, "step": 143430 }, { "epoch": 4.434073066699636, "grad_norm": 0.11892340332269669, "learning_rate": 1.7783720714594798e-05, "loss": 0.0074, "step": 143460 }, { "epoch": 4.4350003090807935, "grad_norm": 0.1339990347623825, "learning_rate": 1.778325709340422e-05, "loss": 0.0077, "step": 143490 }, { "epoch": 4.435927551461952, "grad_norm": 0.1292540580034256, "learning_rate": 1.7782793472213637e-05, "loss": 0.0085, "step": 143520 }, { "epoch": 4.436854793843111, "grad_norm": 0.09798040241003036, "learning_rate": 1.778232985102306e-05, "loss": 0.0078, "step": 143550 }, { "epoch": 4.437782036224269, "grad_norm": 0.14078755676746368, "learning_rate": 1.778186622983248e-05, "loss": 0.0077, "step": 143580 }, { "epoch": 4.438709278605428, "grad_norm": 0.10414775460958481, "learning_rate": 1.77814026086419e-05, "loss": 0.0077, "step": 143610 }, { "epoch": 4.439636520986586, "grad_norm": 0.09104146808385849, "learning_rate": 1.7780938987451323e-05, "loss": 0.0077, "step": 143640 }, { "epoch": 4.440563763367744, "grad_norm": 0.14262516796588898, "learning_rate": 1.778047536626074e-05, "loss": 0.0074, "step": 143670 }, { "epoch": 4.441491005748903, "grad_norm": 0.12294989824295044, "learning_rate": 1.7780011745070163e-05, "loss": 0.0081, "step": 143700 }, { "epoch": 4.442418248130061, "grad_norm": 0.13460490107536316, "learning_rate": 1.7779548123879584e-05, "loss": 0.0074, "step": 143730 }, { "epoch": 4.44334549051122, "grad_norm": 0.10047691315412521, "learning_rate": 1.7779084502689002e-05, "loss": 0.0079, "step": 143760 }, { "epoch": 4.444272732892378, "grad_norm": 0.08770579844713211, "learning_rate": 1.7778620881498424e-05, "loss": 0.007, "step": 143790 }, { "epoch": 4.445199975273536, "grad_norm": 0.1401616334915161, "learning_rate": 1.7778157260307846e-05, "loss": 0.0085, "step": 143820 }, { "epoch": 4.446127217654695, "grad_norm": 0.07870367914438248, "learning_rate": 1.7777693639117267e-05, "loss": 0.0077, "step": 143850 }, { "epoch": 4.4470544600358535, "grad_norm": 0.12003014236688614, "learning_rate": 1.777723001792669e-05, "loss": 0.0075, "step": 143880 }, { "epoch": 4.447981702417012, "grad_norm": 0.09402447193861008, "learning_rate": 1.777676639673611e-05, "loss": 0.0075, "step": 143910 }, { "epoch": 4.44890894479817, "grad_norm": 0.09830935299396515, "learning_rate": 1.7776302775545528e-05, "loss": 0.007, "step": 143940 }, { "epoch": 4.449836187179328, "grad_norm": 0.17419004440307617, "learning_rate": 1.777583915435495e-05, "loss": 0.0077, "step": 143970 }, { "epoch": 4.450763429560487, "grad_norm": 0.13525977730751038, "learning_rate": 1.777537553316437e-05, "loss": 0.0074, "step": 144000 }, { "epoch": 4.451690671941646, "grad_norm": 0.09531675279140472, "learning_rate": 1.7774911911973793e-05, "loss": 0.008, "step": 144030 }, { "epoch": 4.452617914322804, "grad_norm": 0.12336616218090057, "learning_rate": 1.7774448290783214e-05, "loss": 0.0076, "step": 144060 }, { "epoch": 4.453545156703963, "grad_norm": 0.09522704035043716, "learning_rate": 1.7773984669592632e-05, "loss": 0.0082, "step": 144090 }, { "epoch": 4.4544723990851205, "grad_norm": 0.12372840940952301, "learning_rate": 1.7773521048402054e-05, "loss": 0.0081, "step": 144120 }, { "epoch": 4.455399641466279, "grad_norm": 0.12031423300504684, "learning_rate": 1.7773057427211475e-05, "loss": 0.0077, "step": 144150 }, { "epoch": 4.456326883847438, "grad_norm": 0.07964415103197098, "learning_rate": 1.7772593806020893e-05, "loss": 0.0081, "step": 144180 }, { "epoch": 4.457254126228596, "grad_norm": 0.13692007958889008, "learning_rate": 1.7772130184830315e-05, "loss": 0.0078, "step": 144210 }, { "epoch": 4.458181368609755, "grad_norm": 0.1018601804971695, "learning_rate": 1.7771666563639736e-05, "loss": 0.0082, "step": 144240 }, { "epoch": 4.4591086109909135, "grad_norm": 0.11264209449291229, "learning_rate": 1.7771202942449158e-05, "loss": 0.0086, "step": 144270 }, { "epoch": 4.460035853372071, "grad_norm": 0.180088073015213, "learning_rate": 1.777073932125858e-05, "loss": 0.0078, "step": 144300 }, { "epoch": 4.46096309575323, "grad_norm": 0.13193881511688232, "learning_rate": 1.7770275700067998e-05, "loss": 0.0077, "step": 144330 }, { "epoch": 4.461890338134388, "grad_norm": 0.13938410580158234, "learning_rate": 1.776981207887742e-05, "loss": 0.0069, "step": 144360 }, { "epoch": 4.462817580515547, "grad_norm": 0.18625515699386597, "learning_rate": 1.776934845768684e-05, "loss": 0.0077, "step": 144390 }, { "epoch": 4.463744822896706, "grad_norm": 0.17835277318954468, "learning_rate": 1.7768884836496262e-05, "loss": 0.008, "step": 144420 }, { "epoch": 4.464672065277863, "grad_norm": 0.09979695826768875, "learning_rate": 1.7768421215305684e-05, "loss": 0.0073, "step": 144450 }, { "epoch": 4.465599307659022, "grad_norm": 0.11489000916481018, "learning_rate": 1.7767957594115105e-05, "loss": 0.0079, "step": 144480 }, { "epoch": 4.4665265500401805, "grad_norm": 0.13607265055179596, "learning_rate": 1.7767493972924523e-05, "loss": 0.0069, "step": 144510 }, { "epoch": 4.467453792421339, "grad_norm": 0.19533243775367737, "learning_rate": 1.7767030351733945e-05, "loss": 0.0077, "step": 144540 }, { "epoch": 4.468381034802498, "grad_norm": 0.12626270949840546, "learning_rate": 1.7766566730543366e-05, "loss": 0.0075, "step": 144570 }, { "epoch": 4.469308277183655, "grad_norm": 0.150742769241333, "learning_rate": 1.7766103109352784e-05, "loss": 0.0075, "step": 144600 }, { "epoch": 4.470235519564814, "grad_norm": 0.11047095060348511, "learning_rate": 1.7765639488162206e-05, "loss": 0.0074, "step": 144630 }, { "epoch": 4.471162761945973, "grad_norm": 0.10541094094514847, "learning_rate": 1.7765175866971627e-05, "loss": 0.0074, "step": 144660 }, { "epoch": 4.472090004327131, "grad_norm": 0.14088140428066254, "learning_rate": 1.776471224578105e-05, "loss": 0.0076, "step": 144690 }, { "epoch": 4.47301724670829, "grad_norm": 0.13118283450603485, "learning_rate": 1.776424862459047e-05, "loss": 0.0078, "step": 144720 }, { "epoch": 4.4739444890894475, "grad_norm": 0.06028030067682266, "learning_rate": 1.776378500339989e-05, "loss": 0.0077, "step": 144750 }, { "epoch": 4.474871731470606, "grad_norm": 0.11989560723304749, "learning_rate": 1.776332138220931e-05, "loss": 0.0077, "step": 144780 }, { "epoch": 4.475798973851765, "grad_norm": 0.07984425127506256, "learning_rate": 1.776285776101873e-05, "loss": 0.0078, "step": 144810 }, { "epoch": 4.476726216232923, "grad_norm": 0.1250176876783371, "learning_rate": 1.7762394139828153e-05, "loss": 0.008, "step": 144840 }, { "epoch": 4.477653458614082, "grad_norm": 0.12880556285381317, "learning_rate": 1.7761930518637575e-05, "loss": 0.0081, "step": 144870 }, { "epoch": 4.4785807009952405, "grad_norm": 0.1259760558605194, "learning_rate": 1.7761466897446996e-05, "loss": 0.0072, "step": 144900 }, { "epoch": 4.479507943376398, "grad_norm": 0.13860400021076202, "learning_rate": 1.7761003276256414e-05, "loss": 0.0076, "step": 144930 }, { "epoch": 4.480435185757557, "grad_norm": 0.13566072285175323, "learning_rate": 1.7760539655065836e-05, "loss": 0.0078, "step": 144960 }, { "epoch": 4.481362428138715, "grad_norm": 0.11207734048366547, "learning_rate": 1.7760076033875254e-05, "loss": 0.0073, "step": 144990 }, { "epoch": 4.482289670519874, "grad_norm": 0.08781290054321289, "learning_rate": 1.7759612412684675e-05, "loss": 0.0072, "step": 145020 }, { "epoch": 4.483216912901033, "grad_norm": 0.12761074304580688, "learning_rate": 1.77591487914941e-05, "loss": 0.0081, "step": 145050 }, { "epoch": 4.484144155282191, "grad_norm": 0.12806448340415955, "learning_rate": 1.7758685170303518e-05, "loss": 0.0081, "step": 145080 }, { "epoch": 4.485071397663349, "grad_norm": 0.10703037679195404, "learning_rate": 1.775822154911294e-05, "loss": 0.0078, "step": 145110 }, { "epoch": 4.4859986400445075, "grad_norm": 0.13580740988254547, "learning_rate": 1.775775792792236e-05, "loss": 0.0081, "step": 145140 }, { "epoch": 4.486925882425666, "grad_norm": 0.14224082231521606, "learning_rate": 1.775729430673178e-05, "loss": 0.0075, "step": 145170 }, { "epoch": 4.487853124806825, "grad_norm": 0.09580780565738678, "learning_rate": 1.77568306855412e-05, "loss": 0.0069, "step": 145200 }, { "epoch": 4.488780367187983, "grad_norm": 0.1409773975610733, "learning_rate": 1.7756367064350622e-05, "loss": 0.0073, "step": 145230 }, { "epoch": 4.489707609569141, "grad_norm": 0.07995839416980743, "learning_rate": 1.7755903443160044e-05, "loss": 0.0067, "step": 145260 }, { "epoch": 4.4906348519503, "grad_norm": 0.13059002161026, "learning_rate": 1.7755439821969465e-05, "loss": 0.008, "step": 145290 }, { "epoch": 4.491562094331458, "grad_norm": 0.07870828360319138, "learning_rate": 1.7754976200778884e-05, "loss": 0.0076, "step": 145320 }, { "epoch": 4.492489336712617, "grad_norm": 0.1836526244878769, "learning_rate": 1.7754512579588305e-05, "loss": 0.0076, "step": 145350 }, { "epoch": 4.4934165790937755, "grad_norm": 0.09724230319261551, "learning_rate": 1.7754048958397727e-05, "loss": 0.007, "step": 145380 }, { "epoch": 4.494343821474933, "grad_norm": 0.13495154678821564, "learning_rate": 1.7753585337207148e-05, "loss": 0.0078, "step": 145410 }, { "epoch": 4.495271063856092, "grad_norm": 0.1248454749584198, "learning_rate": 1.775312171601657e-05, "loss": 0.0068, "step": 145440 }, { "epoch": 4.49619830623725, "grad_norm": 0.06986160576343536, "learning_rate": 1.775265809482599e-05, "loss": 0.008, "step": 145470 }, { "epoch": 4.497125548618409, "grad_norm": 0.1502290517091751, "learning_rate": 1.775219447363541e-05, "loss": 0.008, "step": 145500 }, { "epoch": 4.498052790999568, "grad_norm": 0.11230990290641785, "learning_rate": 1.775173085244483e-05, "loss": 0.0071, "step": 145530 }, { "epoch": 4.498980033380725, "grad_norm": 0.11255662888288498, "learning_rate": 1.7751267231254252e-05, "loss": 0.0072, "step": 145560 }, { "epoch": 4.499907275761884, "grad_norm": 0.07055949419736862, "learning_rate": 1.775080361006367e-05, "loss": 0.0075, "step": 145590 }, { "epoch": 4.5008345181430425, "grad_norm": 0.12479166686534882, "learning_rate": 1.7750339988873092e-05, "loss": 0.0087, "step": 145620 }, { "epoch": 4.501761760524201, "grad_norm": 0.08252373337745667, "learning_rate": 1.7749876367682513e-05, "loss": 0.0073, "step": 145650 }, { "epoch": 4.50268900290536, "grad_norm": 0.13182416558265686, "learning_rate": 1.7749412746491935e-05, "loss": 0.0076, "step": 145680 }, { "epoch": 4.503616245286518, "grad_norm": 0.10989271104335785, "learning_rate": 1.7748949125301356e-05, "loss": 0.0075, "step": 145710 }, { "epoch": 4.504543487667676, "grad_norm": 0.08051234483718872, "learning_rate": 1.7748485504110774e-05, "loss": 0.0078, "step": 145740 }, { "epoch": 4.505470730048835, "grad_norm": 0.1294327974319458, "learning_rate": 1.7748021882920196e-05, "loss": 0.0075, "step": 145770 }, { "epoch": 4.506397972429993, "grad_norm": 0.12116780877113342, "learning_rate": 1.7747558261729617e-05, "loss": 0.0066, "step": 145800 }, { "epoch": 4.507325214811152, "grad_norm": 0.15063653886318207, "learning_rate": 1.774709464053904e-05, "loss": 0.0074, "step": 145830 }, { "epoch": 4.50825245719231, "grad_norm": 0.16870830953121185, "learning_rate": 1.774663101934846e-05, "loss": 0.0078, "step": 145860 }, { "epoch": 4.509179699573469, "grad_norm": 0.10918789356946945, "learning_rate": 1.7746167398157882e-05, "loss": 0.0081, "step": 145890 }, { "epoch": 4.510106941954627, "grad_norm": 0.11514927446842194, "learning_rate": 1.77457037769673e-05, "loss": 0.0068, "step": 145920 }, { "epoch": 4.511034184335785, "grad_norm": 0.12717227637767792, "learning_rate": 1.774524015577672e-05, "loss": 0.0068, "step": 145950 }, { "epoch": 4.511961426716944, "grad_norm": 0.08586136251688004, "learning_rate": 1.7744776534586143e-05, "loss": 0.008, "step": 145980 }, { "epoch": 4.5128886690981025, "grad_norm": 0.12824735045433044, "learning_rate": 1.774431291339556e-05, "loss": 0.0076, "step": 146010 }, { "epoch": 4.513815911479261, "grad_norm": 0.11373312771320343, "learning_rate": 1.7743849292204983e-05, "loss": 0.0078, "step": 146040 }, { "epoch": 4.514743153860419, "grad_norm": 0.09227073192596436, "learning_rate": 1.7743385671014404e-05, "loss": 0.0075, "step": 146070 }, { "epoch": 4.515670396241577, "grad_norm": 0.1424662321805954, "learning_rate": 1.7742922049823826e-05, "loss": 0.0083, "step": 146100 }, { "epoch": 4.516597638622736, "grad_norm": 0.14079776406288147, "learning_rate": 1.7742458428633247e-05, "loss": 0.0083, "step": 146130 }, { "epoch": 4.517524881003895, "grad_norm": 0.17080801725387573, "learning_rate": 1.7741994807442665e-05, "loss": 0.0078, "step": 146160 }, { "epoch": 4.518452123385053, "grad_norm": 0.14621001482009888, "learning_rate": 1.7741531186252087e-05, "loss": 0.0072, "step": 146190 }, { "epoch": 4.519379365766211, "grad_norm": 0.16215848922729492, "learning_rate": 1.774106756506151e-05, "loss": 0.0074, "step": 146220 }, { "epoch": 4.5203066081473695, "grad_norm": 0.10160928219556808, "learning_rate": 1.774060394387093e-05, "loss": 0.0082, "step": 146250 }, { "epoch": 4.521233850528528, "grad_norm": 0.13066466152668, "learning_rate": 1.774014032268035e-05, "loss": 0.0074, "step": 146280 }, { "epoch": 4.522161092909687, "grad_norm": 0.5072054266929626, "learning_rate": 1.7739676701489773e-05, "loss": 0.0074, "step": 146310 }, { "epoch": 4.523088335290845, "grad_norm": 0.10676614940166473, "learning_rate": 1.773921308029919e-05, "loss": 0.008, "step": 146340 }, { "epoch": 4.524015577672003, "grad_norm": 0.10195165872573853, "learning_rate": 1.7738749459108613e-05, "loss": 0.0075, "step": 146370 }, { "epoch": 4.524942820053162, "grad_norm": 0.12106718122959137, "learning_rate": 1.773828583791803e-05, "loss": 0.0073, "step": 146400 }, { "epoch": 4.52587006243432, "grad_norm": 0.11271371692419052, "learning_rate": 1.7737822216727456e-05, "loss": 0.0079, "step": 146430 }, { "epoch": 4.526797304815479, "grad_norm": 0.10541433840990067, "learning_rate": 1.7737358595536877e-05, "loss": 0.0073, "step": 146460 }, { "epoch": 4.527724547196637, "grad_norm": 0.10724107921123505, "learning_rate": 1.7736894974346295e-05, "loss": 0.0075, "step": 146490 }, { "epoch": 4.528651789577796, "grad_norm": 0.07924504578113556, "learning_rate": 1.7736431353155717e-05, "loss": 0.0074, "step": 146520 }, { "epoch": 4.529579031958954, "grad_norm": 0.12076960504055023, "learning_rate": 1.7735967731965138e-05, "loss": 0.0075, "step": 146550 }, { "epoch": 4.530506274340112, "grad_norm": 0.06512390077114105, "learning_rate": 1.7735504110774556e-05, "loss": 0.0068, "step": 146580 }, { "epoch": 4.531433516721271, "grad_norm": 0.13034182786941528, "learning_rate": 1.7735040489583978e-05, "loss": 0.0081, "step": 146610 }, { "epoch": 4.5323607591024295, "grad_norm": 0.14131900668144226, "learning_rate": 1.77345768683934e-05, "loss": 0.0078, "step": 146640 }, { "epoch": 4.533288001483588, "grad_norm": 0.12525834143161774, "learning_rate": 1.773411324720282e-05, "loss": 0.0079, "step": 146670 }, { "epoch": 4.534215243864747, "grad_norm": 0.10005570203065872, "learning_rate": 1.7733649626012242e-05, "loss": 0.0073, "step": 146700 }, { "epoch": 4.535142486245904, "grad_norm": 0.10546138137578964, "learning_rate": 1.773318600482166e-05, "loss": 0.0075, "step": 146730 }, { "epoch": 4.536069728627063, "grad_norm": 0.15775184333324432, "learning_rate": 1.7732722383631082e-05, "loss": 0.0069, "step": 146760 }, { "epoch": 4.536996971008222, "grad_norm": 0.11830465495586395, "learning_rate": 1.7732258762440503e-05, "loss": 0.0074, "step": 146790 }, { "epoch": 4.53792421338938, "grad_norm": 0.13840998709201813, "learning_rate": 1.7731795141249925e-05, "loss": 0.0069, "step": 146820 }, { "epoch": 4.538851455770539, "grad_norm": 0.12604033946990967, "learning_rate": 1.7731331520059346e-05, "loss": 0.0069, "step": 146850 }, { "epoch": 4.5397786981516965, "grad_norm": 0.109989233314991, "learning_rate": 1.7730867898868768e-05, "loss": 0.008, "step": 146880 }, { "epoch": 4.540705940532855, "grad_norm": 0.15211787819862366, "learning_rate": 1.7730404277678186e-05, "loss": 0.0078, "step": 146910 }, { "epoch": 4.541633182914014, "grad_norm": 0.13778476417064667, "learning_rate": 1.7729940656487608e-05, "loss": 0.0073, "step": 146940 }, { "epoch": 4.542560425295172, "grad_norm": 0.11253995448350906, "learning_rate": 1.772947703529703e-05, "loss": 0.0082, "step": 146970 }, { "epoch": 4.543487667676331, "grad_norm": 0.12400108575820923, "learning_rate": 1.7729013414106447e-05, "loss": 0.0074, "step": 147000 }, { "epoch": 4.544414910057489, "grad_norm": 0.1469539999961853, "learning_rate": 1.772854979291587e-05, "loss": 0.0078, "step": 147030 }, { "epoch": 4.545342152438647, "grad_norm": 0.20448632538318634, "learning_rate": 1.772808617172529e-05, "loss": 0.0077, "step": 147060 }, { "epoch": 4.546269394819806, "grad_norm": 0.12646400928497314, "learning_rate": 1.7727622550534712e-05, "loss": 0.0075, "step": 147090 }, { "epoch": 4.5471966372009645, "grad_norm": 0.16711962223052979, "learning_rate": 1.7727158929344133e-05, "loss": 0.007, "step": 147120 }, { "epoch": 4.548123879582123, "grad_norm": 0.14088094234466553, "learning_rate": 1.772669530815355e-05, "loss": 0.0077, "step": 147150 }, { "epoch": 4.549051121963281, "grad_norm": 0.10744577646255493, "learning_rate": 1.7726231686962973e-05, "loss": 0.0075, "step": 147180 }, { "epoch": 4.549978364344439, "grad_norm": 0.1784728318452835, "learning_rate": 1.7725768065772394e-05, "loss": 0.008, "step": 147210 }, { "epoch": 4.550905606725598, "grad_norm": 0.12698574364185333, "learning_rate": 1.7725304444581816e-05, "loss": 0.0074, "step": 147240 }, { "epoch": 4.551832849106757, "grad_norm": 0.1258862465620041, "learning_rate": 1.7724840823391237e-05, "loss": 0.0075, "step": 147270 }, { "epoch": 4.552760091487915, "grad_norm": 0.14268960058689117, "learning_rate": 1.772437720220066e-05, "loss": 0.0082, "step": 147300 }, { "epoch": 4.553687333869074, "grad_norm": 0.12724469602108002, "learning_rate": 1.7723913581010077e-05, "loss": 0.008, "step": 147330 }, { "epoch": 4.5546145762502315, "grad_norm": 0.11819019168615341, "learning_rate": 1.77234499598195e-05, "loss": 0.0081, "step": 147360 }, { "epoch": 4.55554181863139, "grad_norm": 0.19770976901054382, "learning_rate": 1.7722986338628917e-05, "loss": 0.0078, "step": 147390 }, { "epoch": 4.556469061012549, "grad_norm": 0.09462856501340866, "learning_rate": 1.7722522717438338e-05, "loss": 0.0082, "step": 147420 }, { "epoch": 4.557396303393707, "grad_norm": 0.08907128125429153, "learning_rate": 1.772205909624776e-05, "loss": 0.0075, "step": 147450 }, { "epoch": 4.558323545774866, "grad_norm": 0.11109469830989838, "learning_rate": 1.772159547505718e-05, "loss": 0.0081, "step": 147480 }, { "epoch": 4.5592507881560245, "grad_norm": 0.20437611639499664, "learning_rate": 1.7721131853866603e-05, "loss": 0.0075, "step": 147510 }, { "epoch": 4.560178030537182, "grad_norm": 0.14668239653110504, "learning_rate": 1.7720668232676024e-05, "loss": 0.0078, "step": 147540 }, { "epoch": 4.561105272918341, "grad_norm": 0.11152627319097519, "learning_rate": 1.7720204611485442e-05, "loss": 0.0078, "step": 147570 }, { "epoch": 4.562032515299499, "grad_norm": 0.13721202313899994, "learning_rate": 1.7719740990294864e-05, "loss": 0.0072, "step": 147600 }, { "epoch": 4.562959757680658, "grad_norm": 0.1386169195175171, "learning_rate": 1.7719277369104285e-05, "loss": 0.0078, "step": 147630 }, { "epoch": 4.563887000061817, "grad_norm": 0.14664068818092346, "learning_rate": 1.7718813747913707e-05, "loss": 0.0072, "step": 147660 }, { "epoch": 4.564814242442974, "grad_norm": 0.1719646453857422, "learning_rate": 1.771835012672313e-05, "loss": 0.0069, "step": 147690 }, { "epoch": 4.565741484824133, "grad_norm": 0.09568733721971512, "learning_rate": 1.7717901959572236e-05, "loss": 0.0077, "step": 147720 }, { "epoch": 4.5666687272052915, "grad_norm": 0.12361951917409897, "learning_rate": 1.7717438338381654e-05, "loss": 0.0079, "step": 147750 }, { "epoch": 4.56759596958645, "grad_norm": 0.1895270198583603, "learning_rate": 1.7716974717191076e-05, "loss": 0.008, "step": 147780 }, { "epoch": 4.568523211967609, "grad_norm": 0.13683627545833588, "learning_rate": 1.7716511096000494e-05, "loss": 0.0074, "step": 147810 }, { "epoch": 4.569450454348766, "grad_norm": 0.11770327389240265, "learning_rate": 1.7716047474809915e-05, "loss": 0.0075, "step": 147840 }, { "epoch": 4.570377696729925, "grad_norm": 0.12918564677238464, "learning_rate": 1.7715583853619337e-05, "loss": 0.0083, "step": 147870 }, { "epoch": 4.571304939111084, "grad_norm": 0.09686256945133209, "learning_rate": 1.771512023242876e-05, "loss": 0.0079, "step": 147900 }, { "epoch": 4.572232181492242, "grad_norm": 0.1813722848892212, "learning_rate": 1.771465661123818e-05, "loss": 0.0079, "step": 147930 }, { "epoch": 4.573159423873401, "grad_norm": 0.13079579174518585, "learning_rate": 1.77141929900476e-05, "loss": 0.0071, "step": 147960 }, { "epoch": 4.5740866662545585, "grad_norm": 0.13199611008167267, "learning_rate": 1.771372936885702e-05, "loss": 0.0084, "step": 147990 }, { "epoch": 4.575013908635717, "grad_norm": 0.18087239563465118, "learning_rate": 1.771326574766644e-05, "loss": 0.0076, "step": 148020 }, { "epoch": 4.575941151016876, "grad_norm": 0.13708527386188507, "learning_rate": 1.7712802126475863e-05, "loss": 0.007, "step": 148050 }, { "epoch": 4.576868393398034, "grad_norm": 0.14144174754619598, "learning_rate": 1.7712338505285284e-05, "loss": 0.0069, "step": 148080 }, { "epoch": 4.577795635779193, "grad_norm": 0.09131938219070435, "learning_rate": 1.7711874884094706e-05, "loss": 0.0077, "step": 148110 }, { "epoch": 4.578722878160351, "grad_norm": 0.1273912936449051, "learning_rate": 1.7711411262904124e-05, "loss": 0.0075, "step": 148140 }, { "epoch": 4.579650120541509, "grad_norm": 0.11777228862047195, "learning_rate": 1.7710947641713545e-05, "loss": 0.0074, "step": 148170 }, { "epoch": 4.580577362922668, "grad_norm": 0.10849854350090027, "learning_rate": 1.7710484020522967e-05, "loss": 0.008, "step": 148200 }, { "epoch": 4.581504605303826, "grad_norm": 0.1446896344423294, "learning_rate": 1.7710020399332385e-05, "loss": 0.0079, "step": 148230 }, { "epoch": 4.582431847684985, "grad_norm": 0.14094839990139008, "learning_rate": 1.7709556778141806e-05, "loss": 0.0074, "step": 148260 }, { "epoch": 4.583359090066144, "grad_norm": 0.09088858217000961, "learning_rate": 1.7709093156951228e-05, "loss": 0.0079, "step": 148290 }, { "epoch": 4.584286332447302, "grad_norm": 0.1483944207429886, "learning_rate": 1.770862953576065e-05, "loss": 0.0076, "step": 148320 }, { "epoch": 4.58521357482846, "grad_norm": 0.12790539860725403, "learning_rate": 1.770816591457007e-05, "loss": 0.0079, "step": 148350 }, { "epoch": 4.5861408172096185, "grad_norm": 0.11429519206285477, "learning_rate": 1.7707702293379492e-05, "loss": 0.0074, "step": 148380 }, { "epoch": 4.587068059590777, "grad_norm": 0.140118807554245, "learning_rate": 1.770723867218891e-05, "loss": 0.0074, "step": 148410 }, { "epoch": 4.587995301971936, "grad_norm": 0.12126567959785461, "learning_rate": 1.7706775050998332e-05, "loss": 0.0077, "step": 148440 }, { "epoch": 4.588922544353094, "grad_norm": 0.0999356359243393, "learning_rate": 1.7706311429807754e-05, "loss": 0.0076, "step": 148470 }, { "epoch": 4.589849786734252, "grad_norm": 0.1154092475771904, "learning_rate": 1.7705847808617175e-05, "loss": 0.0078, "step": 148500 }, { "epoch": 4.590777029115411, "grad_norm": 0.140669584274292, "learning_rate": 1.7705384187426597e-05, "loss": 0.0071, "step": 148530 }, { "epoch": 4.591704271496569, "grad_norm": 0.13854347169399261, "learning_rate": 1.7704920566236015e-05, "loss": 0.0073, "step": 148560 }, { "epoch": 4.592631513877728, "grad_norm": 0.1318151354789734, "learning_rate": 1.7704456945045436e-05, "loss": 0.0077, "step": 148590 }, { "epoch": 4.593558756258886, "grad_norm": 0.11374860256910324, "learning_rate": 1.7703993323854858e-05, "loss": 0.0078, "step": 148620 }, { "epoch": 4.594485998640044, "grad_norm": 0.11679628491401672, "learning_rate": 1.7703529702664276e-05, "loss": 0.0083, "step": 148650 }, { "epoch": 4.595413241021203, "grad_norm": 0.09602635353803635, "learning_rate": 1.7703066081473697e-05, "loss": 0.0084, "step": 148680 }, { "epoch": 4.596340483402361, "grad_norm": 0.10370725393295288, "learning_rate": 1.770260246028312e-05, "loss": 0.0081, "step": 148710 }, { "epoch": 4.59726772578352, "grad_norm": 0.11088865995407104, "learning_rate": 1.770213883909254e-05, "loss": 0.0074, "step": 148740 }, { "epoch": 4.5981949681646785, "grad_norm": 0.1323672980070114, "learning_rate": 1.7701675217901962e-05, "loss": 0.0073, "step": 148770 }, { "epoch": 4.599122210545836, "grad_norm": 0.11344920843839645, "learning_rate": 1.7701211596711383e-05, "loss": 0.0071, "step": 148800 }, { "epoch": 4.600049452926995, "grad_norm": 0.16126327216625214, "learning_rate": 1.77007479755208e-05, "loss": 0.0075, "step": 148830 }, { "epoch": 4.6009766953081535, "grad_norm": 0.1675584316253662, "learning_rate": 1.7700284354330223e-05, "loss": 0.0083, "step": 148860 }, { "epoch": 4.601903937689312, "grad_norm": 0.09805308282375336, "learning_rate": 1.7699820733139644e-05, "loss": 0.0077, "step": 148890 }, { "epoch": 4.602831180070471, "grad_norm": 0.1380365788936615, "learning_rate": 1.7699357111949066e-05, "loss": 0.007, "step": 148920 }, { "epoch": 4.603758422451628, "grad_norm": 0.12485463917255402, "learning_rate": 1.7698893490758487e-05, "loss": 0.0076, "step": 148950 }, { "epoch": 4.604685664832787, "grad_norm": 0.13616086542606354, "learning_rate": 1.7698429869567906e-05, "loss": 0.0081, "step": 148980 }, { "epoch": 4.605612907213946, "grad_norm": 0.14573918282985687, "learning_rate": 1.7697966248377327e-05, "loss": 0.0073, "step": 149010 }, { "epoch": 4.606540149595104, "grad_norm": 0.10774239152669907, "learning_rate": 1.769750262718675e-05, "loss": 0.0066, "step": 149040 }, { "epoch": 4.607467391976263, "grad_norm": 0.10096266120672226, "learning_rate": 1.7697039005996167e-05, "loss": 0.0072, "step": 149070 }, { "epoch": 4.608394634357421, "grad_norm": 0.09121248871088028, "learning_rate": 1.7696575384805588e-05, "loss": 0.0076, "step": 149100 }, { "epoch": 4.60932187673858, "grad_norm": 0.19118209183216095, "learning_rate": 1.7696111763615013e-05, "loss": 0.0075, "step": 149130 }, { "epoch": 4.610249119119738, "grad_norm": 0.11794822663068771, "learning_rate": 1.769564814242443e-05, "loss": 0.0078, "step": 149160 }, { "epoch": 4.611176361500896, "grad_norm": 0.13826368749141693, "learning_rate": 1.7695184521233853e-05, "loss": 0.0069, "step": 149190 }, { "epoch": 4.612103603882055, "grad_norm": 0.10969781875610352, "learning_rate": 1.769472090004327e-05, "loss": 0.0082, "step": 149220 }, { "epoch": 4.6130308462632135, "grad_norm": 0.11054999381303787, "learning_rate": 1.7694257278852692e-05, "loss": 0.0079, "step": 149250 }, { "epoch": 4.613958088644372, "grad_norm": 0.1410619467496872, "learning_rate": 1.7693793657662114e-05, "loss": 0.0076, "step": 149280 }, { "epoch": 4.61488533102553, "grad_norm": 0.11050041019916534, "learning_rate": 1.7693330036471535e-05, "loss": 0.0079, "step": 149310 }, { "epoch": 4.615812573406688, "grad_norm": 0.11404857784509659, "learning_rate": 1.7692866415280957e-05, "loss": 0.0074, "step": 149340 }, { "epoch": 4.616739815787847, "grad_norm": 0.14479310810565948, "learning_rate": 1.769240279409038e-05, "loss": 0.0078, "step": 149370 }, { "epoch": 4.617667058169006, "grad_norm": 0.13864319026470184, "learning_rate": 1.7691939172899797e-05, "loss": 0.0084, "step": 149400 }, { "epoch": 4.618594300550164, "grad_norm": 0.12114877998828888, "learning_rate": 1.7691475551709218e-05, "loss": 0.0076, "step": 149430 }, { "epoch": 4.619521542931322, "grad_norm": 0.09464185684919357, "learning_rate": 1.769101193051864e-05, "loss": 0.0067, "step": 149460 }, { "epoch": 4.6204487853124805, "grad_norm": 0.09610247611999512, "learning_rate": 1.769054830932806e-05, "loss": 0.0076, "step": 149490 }, { "epoch": 4.621376027693639, "grad_norm": 0.11177004873752594, "learning_rate": 1.7690084688137483e-05, "loss": 0.0078, "step": 149520 }, { "epoch": 4.622303270074798, "grad_norm": 0.16138775646686554, "learning_rate": 1.76896210669469e-05, "loss": 0.0074, "step": 149550 }, { "epoch": 4.623230512455956, "grad_norm": 0.08428120613098145, "learning_rate": 1.7689157445756322e-05, "loss": 0.0074, "step": 149580 }, { "epoch": 4.624157754837114, "grad_norm": 0.14781968295574188, "learning_rate": 1.7688693824565744e-05, "loss": 0.0071, "step": 149610 }, { "epoch": 4.625084997218273, "grad_norm": 0.19110897183418274, "learning_rate": 1.7688230203375162e-05, "loss": 0.0073, "step": 149640 }, { "epoch": 4.626012239599431, "grad_norm": 0.13608478009700775, "learning_rate": 1.7687766582184583e-05, "loss": 0.008, "step": 149670 }, { "epoch": 4.62693948198059, "grad_norm": 0.10825950652360916, "learning_rate": 1.7687302960994005e-05, "loss": 0.0073, "step": 149700 }, { "epoch": 4.627866724361748, "grad_norm": 0.12609358131885529, "learning_rate": 1.7686839339803426e-05, "loss": 0.0079, "step": 149730 }, { "epoch": 4.628793966742906, "grad_norm": 0.08918404579162598, "learning_rate": 1.7686375718612848e-05, "loss": 0.0082, "step": 149760 }, { "epoch": 4.629721209124065, "grad_norm": 0.12873885035514832, "learning_rate": 1.768591209742227e-05, "loss": 0.008, "step": 149790 }, { "epoch": 4.630648451505223, "grad_norm": 0.13572745025157928, "learning_rate": 1.7685448476231687e-05, "loss": 0.0073, "step": 149820 }, { "epoch": 4.631575693886382, "grad_norm": 0.13974972069263458, "learning_rate": 1.768498485504111e-05, "loss": 0.0079, "step": 149850 }, { "epoch": 4.6325029362675405, "grad_norm": 0.1407906860113144, "learning_rate": 1.768452123385053e-05, "loss": 0.0072, "step": 149880 }, { "epoch": 4.633430178648699, "grad_norm": 0.09393014013767242, "learning_rate": 1.7684057612659952e-05, "loss": 0.0066, "step": 149910 }, { "epoch": 4.634357421029857, "grad_norm": 0.12894055247306824, "learning_rate": 1.7683593991469373e-05, "loss": 0.0074, "step": 149940 }, { "epoch": 4.635284663411015, "grad_norm": 0.07363326102495193, "learning_rate": 1.768313037027879e-05, "loss": 0.0074, "step": 149970 }, { "epoch": 4.636211905792174, "grad_norm": 0.15174515545368195, "learning_rate": 1.7682666749088213e-05, "loss": 0.0081, "step": 150000 }, { "epoch": 4.636211905792174, "eval_f1": 0.9975795012850921, "eval_loss": 0.00788673385977745, "eval_precision": 0.9975776797665495, "eval_recall": 0.9975828488998025, "eval_runtime": 4183.9384, "eval_samples_per_second": 282.318, "eval_steps_per_second": 8.823, "step": 150000 }, { "epoch": 4.637139148173333, "grad_norm": 0.1503363698720932, "learning_rate": 1.7682203127897635e-05, "loss": 0.0071, "step": 150030 }, { "epoch": 4.638066390554491, "grad_norm": 0.16517585515975952, "learning_rate": 1.7681739506707053e-05, "loss": 0.0083, "step": 150060 }, { "epoch": 4.63899363293565, "grad_norm": 0.11561283469200134, "learning_rate": 1.7681275885516474e-05, "loss": 0.0079, "step": 150090 }, { "epoch": 4.6399208753168075, "grad_norm": 0.14705954492092133, "learning_rate": 1.7680812264325896e-05, "loss": 0.0089, "step": 150120 }, { "epoch": 4.640848117697966, "grad_norm": 0.10565178841352463, "learning_rate": 1.7680348643135317e-05, "loss": 0.0081, "step": 150150 }, { "epoch": 4.641775360079125, "grad_norm": 0.1259392648935318, "learning_rate": 1.767988502194474e-05, "loss": 0.0075, "step": 150180 }, { "epoch": 4.642702602460283, "grad_norm": 0.1217338889837265, "learning_rate": 1.7679421400754157e-05, "loss": 0.0074, "step": 150210 }, { "epoch": 4.643629844841442, "grad_norm": 0.17026184499263763, "learning_rate": 1.767895777956358e-05, "loss": 0.0075, "step": 150240 }, { "epoch": 4.6445570872226, "grad_norm": 0.11175204068422318, "learning_rate": 1.7678494158373e-05, "loss": 0.0076, "step": 150270 }, { "epoch": 4.645484329603758, "grad_norm": 0.13173048198223114, "learning_rate": 1.767803053718242e-05, "loss": 0.0075, "step": 150300 }, { "epoch": 4.646411571984917, "grad_norm": 0.11052161455154419, "learning_rate": 1.7677566915991843e-05, "loss": 0.0077, "step": 150330 }, { "epoch": 4.647338814366075, "grad_norm": 0.11297579854726791, "learning_rate": 1.7677103294801264e-05, "loss": 0.0081, "step": 150360 }, { "epoch": 4.648266056747234, "grad_norm": 0.11499317735433578, "learning_rate": 1.7676639673610682e-05, "loss": 0.0073, "step": 150390 }, { "epoch": 4.649193299128392, "grad_norm": 0.15685491263866425, "learning_rate": 1.7676176052420104e-05, "loss": 0.0077, "step": 150420 }, { "epoch": 4.65012054150955, "grad_norm": 0.13910332322120667, "learning_rate": 1.7675712431229526e-05, "loss": 0.0076, "step": 150450 }, { "epoch": 4.651047783890709, "grad_norm": 0.13990963995456696, "learning_rate": 1.7675248810038944e-05, "loss": 0.0077, "step": 150480 }, { "epoch": 4.6519750262718675, "grad_norm": 0.11688588559627533, "learning_rate": 1.7674785188848365e-05, "loss": 0.007, "step": 150510 }, { "epoch": 4.652902268653026, "grad_norm": 0.13179424405097961, "learning_rate": 1.7674321567657787e-05, "loss": 0.0075, "step": 150540 }, { "epoch": 4.653829511034184, "grad_norm": 0.09647632390260696, "learning_rate": 1.7673857946467208e-05, "loss": 0.0078, "step": 150570 }, { "epoch": 4.6547567534153425, "grad_norm": 0.10774902254343033, "learning_rate": 1.767339432527663e-05, "loss": 0.0079, "step": 150600 }, { "epoch": 4.655683995796501, "grad_norm": 0.10326775163412094, "learning_rate": 1.7672930704086048e-05, "loss": 0.0084, "step": 150630 }, { "epoch": 4.65661123817766, "grad_norm": 0.0850168988108635, "learning_rate": 1.767246708289547e-05, "loss": 0.0073, "step": 150660 }, { "epoch": 4.657538480558818, "grad_norm": 0.17810487747192383, "learning_rate": 1.767200346170489e-05, "loss": 0.0072, "step": 150690 }, { "epoch": 4.658465722939977, "grad_norm": 0.1306060403585434, "learning_rate": 1.7671539840514312e-05, "loss": 0.0081, "step": 150720 }, { "epoch": 4.659392965321135, "grad_norm": 0.21010248363018036, "learning_rate": 1.7671076219323734e-05, "loss": 0.0078, "step": 150750 }, { "epoch": 4.660320207702293, "grad_norm": 0.13926804065704346, "learning_rate": 1.7670612598133155e-05, "loss": 0.0075, "step": 150780 }, { "epoch": 4.661247450083452, "grad_norm": 0.11404723674058914, "learning_rate": 1.7670148976942573e-05, "loss": 0.0079, "step": 150810 }, { "epoch": 4.66217469246461, "grad_norm": 0.154031902551651, "learning_rate": 1.7669685355751995e-05, "loss": 0.0078, "step": 150840 }, { "epoch": 4.663101934845769, "grad_norm": 0.11289053410291672, "learning_rate": 1.7669221734561416e-05, "loss": 0.0076, "step": 150870 }, { "epoch": 4.664029177226928, "grad_norm": 0.13523446023464203, "learning_rate": 1.7668758113370838e-05, "loss": 0.0068, "step": 150900 }, { "epoch": 4.664956419608085, "grad_norm": 0.1555851548910141, "learning_rate": 1.766829449218026e-05, "loss": 0.0074, "step": 150930 }, { "epoch": 4.665883661989244, "grad_norm": 0.11015929281711578, "learning_rate": 1.7667830870989678e-05, "loss": 0.0078, "step": 150960 }, { "epoch": 4.6668109043704025, "grad_norm": 0.10632864385843277, "learning_rate": 1.76673672497991e-05, "loss": 0.0074, "step": 150990 }, { "epoch": 4.667738146751561, "grad_norm": 0.18474841117858887, "learning_rate": 1.766690362860852e-05, "loss": 0.0077, "step": 151020 }, { "epoch": 4.66866538913272, "grad_norm": 0.13637782633304596, "learning_rate": 1.766644000741794e-05, "loss": 0.0071, "step": 151050 }, { "epoch": 4.669592631513877, "grad_norm": 0.16030707955360413, "learning_rate": 1.766597638622736e-05, "loss": 0.0077, "step": 151080 }, { "epoch": 4.670519873895036, "grad_norm": 0.09765080362558365, "learning_rate": 1.7665512765036782e-05, "loss": 0.0068, "step": 151110 }, { "epoch": 4.671447116276195, "grad_norm": 0.15030857920646667, "learning_rate": 1.7665049143846203e-05, "loss": 0.0087, "step": 151140 }, { "epoch": 4.672374358657353, "grad_norm": 0.11211097240447998, "learning_rate": 1.7664585522655625e-05, "loss": 0.009, "step": 151170 }, { "epoch": 4.673301601038512, "grad_norm": 0.16350455582141876, "learning_rate": 1.7664121901465043e-05, "loss": 0.0076, "step": 151200 }, { "epoch": 4.6742288434196695, "grad_norm": 0.1399257630109787, "learning_rate": 1.7663658280274464e-05, "loss": 0.0074, "step": 151230 }, { "epoch": 4.675156085800828, "grad_norm": 0.11114632338285446, "learning_rate": 1.7663194659083886e-05, "loss": 0.0076, "step": 151260 }, { "epoch": 4.676083328181987, "grad_norm": 0.15231232345104218, "learning_rate": 1.7662731037893307e-05, "loss": 0.0076, "step": 151290 }, { "epoch": 4.677010570563145, "grad_norm": 0.12323766201734543, "learning_rate": 1.766226741670273e-05, "loss": 0.0072, "step": 151320 }, { "epoch": 4.677937812944304, "grad_norm": 0.18683646619319916, "learning_rate": 1.766180379551215e-05, "loss": 0.0073, "step": 151350 }, { "epoch": 4.678865055325462, "grad_norm": 0.12874175608158112, "learning_rate": 1.766134017432157e-05, "loss": 0.0077, "step": 151380 }, { "epoch": 4.67979229770662, "grad_norm": 0.1880853921175003, "learning_rate": 1.766087655313099e-05, "loss": 0.0082, "step": 151410 }, { "epoch": 4.680719540087779, "grad_norm": 0.12129819393157959, "learning_rate": 1.766041293194041e-05, "loss": 0.0076, "step": 151440 }, { "epoch": 4.681646782468937, "grad_norm": 0.10596541315317154, "learning_rate": 1.765994931074983e-05, "loss": 0.0076, "step": 151470 }, { "epoch": 4.682574024850096, "grad_norm": 0.11696387082338333, "learning_rate": 1.765948568955925e-05, "loss": 0.0072, "step": 151500 }, { "epoch": 4.683501267231255, "grad_norm": 0.12795841693878174, "learning_rate": 1.7659022068368673e-05, "loss": 0.0079, "step": 151530 }, { "epoch": 4.684428509612412, "grad_norm": 0.10147170722484589, "learning_rate": 1.7658558447178094e-05, "loss": 0.0072, "step": 151560 }, { "epoch": 4.685355751993571, "grad_norm": 0.1179114356637001, "learning_rate": 1.7658094825987516e-05, "loss": 0.0074, "step": 151590 }, { "epoch": 4.6862829943747295, "grad_norm": 0.12931543588638306, "learning_rate": 1.7657631204796934e-05, "loss": 0.007, "step": 151620 }, { "epoch": 4.687210236755888, "grad_norm": 0.12913909554481506, "learning_rate": 1.7657167583606355e-05, "loss": 0.0075, "step": 151650 }, { "epoch": 4.688137479137047, "grad_norm": 0.13880881667137146, "learning_rate": 1.7656703962415777e-05, "loss": 0.0068, "step": 151680 }, { "epoch": 4.689064721518205, "grad_norm": 0.11010642349720001, "learning_rate": 1.7656240341225198e-05, "loss": 0.0083, "step": 151710 }, { "epoch": 4.689991963899363, "grad_norm": 0.1301361471414566, "learning_rate": 1.7655792174074303e-05, "loss": 0.0079, "step": 151740 }, { "epoch": 4.690919206280522, "grad_norm": 0.08908285945653915, "learning_rate": 1.7655328552883724e-05, "loss": 0.0069, "step": 151770 }, { "epoch": 4.69184644866168, "grad_norm": 0.13413876295089722, "learning_rate": 1.7654864931693146e-05, "loss": 0.0077, "step": 151800 }, { "epoch": 4.692773691042839, "grad_norm": 0.11715666204690933, "learning_rate": 1.7654401310502567e-05, "loss": 0.0081, "step": 151830 }, { "epoch": 4.693700933423997, "grad_norm": 0.14722810685634613, "learning_rate": 1.765393768931199e-05, "loss": 0.0076, "step": 151860 }, { "epoch": 4.694628175805155, "grad_norm": 0.08925475180149078, "learning_rate": 1.7653474068121407e-05, "loss": 0.0084, "step": 151890 }, { "epoch": 4.695555418186314, "grad_norm": 0.14209523797035217, "learning_rate": 1.765301044693083e-05, "loss": 0.0075, "step": 151920 }, { "epoch": 4.696482660567472, "grad_norm": 0.13131177425384521, "learning_rate": 1.765254682574025e-05, "loss": 0.0075, "step": 151950 }, { "epoch": 4.697409902948631, "grad_norm": 0.16967542469501495, "learning_rate": 1.765208320454967e-05, "loss": 0.0078, "step": 151980 }, { "epoch": 4.6983371453297895, "grad_norm": 0.12872318923473358, "learning_rate": 1.7651619583359093e-05, "loss": 0.0081, "step": 152010 }, { "epoch": 4.699264387710947, "grad_norm": 0.12237045168876648, "learning_rate": 1.765115596216851e-05, "loss": 0.0077, "step": 152040 }, { "epoch": 4.700191630092106, "grad_norm": 0.11313438415527344, "learning_rate": 1.7650692340977933e-05, "loss": 0.0074, "step": 152070 }, { "epoch": 4.701118872473264, "grad_norm": 0.14890949428081512, "learning_rate": 1.7650228719787354e-05, "loss": 0.0075, "step": 152100 }, { "epoch": 4.702046114854423, "grad_norm": 0.10618309676647186, "learning_rate": 1.7649765098596776e-05, "loss": 0.0075, "step": 152130 }, { "epoch": 4.702973357235582, "grad_norm": 0.16240036487579346, "learning_rate": 1.7649301477406197e-05, "loss": 0.0074, "step": 152160 }, { "epoch": 4.703900599616739, "grad_norm": 0.11130160093307495, "learning_rate": 1.764883785621562e-05, "loss": 0.0076, "step": 152190 }, { "epoch": 4.704827841997898, "grad_norm": 0.14677542448043823, "learning_rate": 1.7648374235025037e-05, "loss": 0.0084, "step": 152220 }, { "epoch": 4.7057550843790565, "grad_norm": 0.10439508408308029, "learning_rate": 1.7647910613834458e-05, "loss": 0.0078, "step": 152250 }, { "epoch": 4.706682326760215, "grad_norm": 0.10277349501848221, "learning_rate": 1.764744699264388e-05, "loss": 0.0081, "step": 152280 }, { "epoch": 4.707609569141374, "grad_norm": 0.0762588307261467, "learning_rate": 1.7646983371453298e-05, "loss": 0.0072, "step": 152310 }, { "epoch": 4.708536811522532, "grad_norm": 0.15663520991802216, "learning_rate": 1.764651975026272e-05, "loss": 0.0076, "step": 152340 }, { "epoch": 4.70946405390369, "grad_norm": 0.19293953478336334, "learning_rate": 1.764605612907214e-05, "loss": 0.0072, "step": 152370 }, { "epoch": 4.710391296284849, "grad_norm": 0.12235824018716812, "learning_rate": 1.7645592507881562e-05, "loss": 0.0077, "step": 152400 }, { "epoch": 4.711318538666007, "grad_norm": 0.12569309771060944, "learning_rate": 1.7645128886690984e-05, "loss": 0.0077, "step": 152430 }, { "epoch": 4.712245781047166, "grad_norm": 0.10811951756477356, "learning_rate": 1.7644665265500402e-05, "loss": 0.0081, "step": 152460 }, { "epoch": 4.7131730234283244, "grad_norm": 0.14796623587608337, "learning_rate": 1.7644201644309823e-05, "loss": 0.0076, "step": 152490 }, { "epoch": 4.714100265809483, "grad_norm": 0.09894729405641556, "learning_rate": 1.7643738023119245e-05, "loss": 0.0072, "step": 152520 }, { "epoch": 4.715027508190641, "grad_norm": 0.12526631355285645, "learning_rate": 1.7643274401928667e-05, "loss": 0.0076, "step": 152550 }, { "epoch": 4.715954750571799, "grad_norm": 0.09605897217988968, "learning_rate": 1.7642810780738088e-05, "loss": 0.0068, "step": 152580 }, { "epoch": 4.716881992952958, "grad_norm": 0.0838436633348465, "learning_rate": 1.7642362613587193e-05, "loss": 0.0073, "step": 152610 }, { "epoch": 4.717809235334117, "grad_norm": 0.1487833559513092, "learning_rate": 1.7641898992396614e-05, "loss": 0.0073, "step": 152640 }, { "epoch": 4.718736477715275, "grad_norm": 0.14362908899784088, "learning_rate": 1.7641435371206036e-05, "loss": 0.0066, "step": 152670 }, { "epoch": 4.719663720096433, "grad_norm": 0.11566905677318573, "learning_rate": 1.7640971750015457e-05, "loss": 0.0074, "step": 152700 }, { "epoch": 4.7205909624775915, "grad_norm": 0.13515117764472961, "learning_rate": 1.7640508128824875e-05, "loss": 0.0075, "step": 152730 }, { "epoch": 4.72151820485875, "grad_norm": 0.10878629982471466, "learning_rate": 1.7640059961673983e-05, "loss": 0.0084, "step": 152760 }, { "epoch": 4.722445447239909, "grad_norm": 0.12456575781106949, "learning_rate": 1.7639596340483405e-05, "loss": 0.0075, "step": 152790 }, { "epoch": 4.723372689621067, "grad_norm": 0.11882930994033813, "learning_rate": 1.7639132719292823e-05, "loss": 0.0078, "step": 152820 }, { "epoch": 4.724299932002225, "grad_norm": 0.12501686811447144, "learning_rate": 1.7638669098102244e-05, "loss": 0.0071, "step": 152850 }, { "epoch": 4.725227174383384, "grad_norm": 0.14570477604866028, "learning_rate": 1.7638205476911666e-05, "loss": 0.0078, "step": 152880 }, { "epoch": 4.726154416764542, "grad_norm": 0.10243307799100876, "learning_rate": 1.7637741855721087e-05, "loss": 0.0081, "step": 152910 }, { "epoch": 4.727081659145701, "grad_norm": 0.09375493228435516, "learning_rate": 1.763727823453051e-05, "loss": 0.0077, "step": 152940 }, { "epoch": 4.728008901526859, "grad_norm": 0.14656035602092743, "learning_rate": 1.7636814613339927e-05, "loss": 0.0075, "step": 152970 }, { "epoch": 4.728936143908017, "grad_norm": 0.09293659031391144, "learning_rate": 1.763635099214935e-05, "loss": 0.0074, "step": 153000 }, { "epoch": 4.729863386289176, "grad_norm": 0.11630482226610184, "learning_rate": 1.763588737095877e-05, "loss": 0.0075, "step": 153030 }, { "epoch": 4.730790628670334, "grad_norm": 0.12244120985269547, "learning_rate": 1.763542374976819e-05, "loss": 0.0074, "step": 153060 }, { "epoch": 4.731717871051493, "grad_norm": 0.17393182218074799, "learning_rate": 1.7634960128577613e-05, "loss": 0.0066, "step": 153090 }, { "epoch": 4.7326451134326515, "grad_norm": 0.11016713082790375, "learning_rate": 1.7634496507387034e-05, "loss": 0.0076, "step": 153120 }, { "epoch": 4.73357235581381, "grad_norm": 0.15123748779296875, "learning_rate": 1.7634032886196452e-05, "loss": 0.0075, "step": 153150 }, { "epoch": 4.734499598194968, "grad_norm": 0.1613844782114029, "learning_rate": 1.7633569265005874e-05, "loss": 0.0079, "step": 153180 }, { "epoch": 4.735426840576126, "grad_norm": 0.09762314707040787, "learning_rate": 1.7633105643815292e-05, "loss": 0.0076, "step": 153210 }, { "epoch": 4.736354082957285, "grad_norm": 0.16852574050426483, "learning_rate": 1.7632642022624714e-05, "loss": 0.0069, "step": 153240 }, { "epoch": 4.737281325338444, "grad_norm": 0.1289166361093521, "learning_rate": 1.763217840143414e-05, "loss": 0.0072, "step": 153270 }, { "epoch": 4.738208567719602, "grad_norm": 0.10049624741077423, "learning_rate": 1.7631714780243557e-05, "loss": 0.0078, "step": 153300 }, { "epoch": 4.739135810100761, "grad_norm": 0.12585361301898956, "learning_rate": 1.7631251159052978e-05, "loss": 0.0072, "step": 153330 }, { "epoch": 4.7400630524819185, "grad_norm": 0.15505428612232208, "learning_rate": 1.76307875378624e-05, "loss": 0.0067, "step": 153360 }, { "epoch": 4.740990294863077, "grad_norm": 0.10143953561782837, "learning_rate": 1.7630323916671818e-05, "loss": 0.0075, "step": 153390 }, { "epoch": 4.741917537244236, "grad_norm": 0.21975329518318176, "learning_rate": 1.762986029548124e-05, "loss": 0.007, "step": 153420 }, { "epoch": 4.742844779625394, "grad_norm": 0.12522757053375244, "learning_rate": 1.762939667429066e-05, "loss": 0.0078, "step": 153450 }, { "epoch": 4.743772022006553, "grad_norm": 0.08415010571479797, "learning_rate": 1.7628933053100082e-05, "loss": 0.0078, "step": 153480 }, { "epoch": 4.744699264387711, "grad_norm": 0.09389259666204453, "learning_rate": 1.7628469431909504e-05, "loss": 0.0072, "step": 153510 }, { "epoch": 4.745626506768869, "grad_norm": 0.10276704281568527, "learning_rate": 1.7628005810718925e-05, "loss": 0.0076, "step": 153540 }, { "epoch": 4.746553749150028, "grad_norm": 0.11330967396497726, "learning_rate": 1.7627542189528343e-05, "loss": 0.0076, "step": 153570 }, { "epoch": 4.747480991531186, "grad_norm": 0.1012779027223587, "learning_rate": 1.7627078568337765e-05, "loss": 0.0067, "step": 153600 }, { "epoch": 4.748408233912345, "grad_norm": 0.14682911336421967, "learning_rate": 1.7626614947147186e-05, "loss": 0.0079, "step": 153630 }, { "epoch": 4.749335476293503, "grad_norm": 0.1390252709388733, "learning_rate": 1.7626151325956608e-05, "loss": 0.008, "step": 153660 }, { "epoch": 4.750262718674661, "grad_norm": 0.1364862024784088, "learning_rate": 1.762568770476603e-05, "loss": 0.0077, "step": 153690 }, { "epoch": 4.75118996105582, "grad_norm": 0.4526296555995941, "learning_rate": 1.7625224083575448e-05, "loss": 0.0076, "step": 153720 }, { "epoch": 4.7521172034369785, "grad_norm": 0.11537552624940872, "learning_rate": 1.762476046238487e-05, "loss": 0.0071, "step": 153750 }, { "epoch": 4.753044445818137, "grad_norm": 0.1572064310312271, "learning_rate": 1.762429684119429e-05, "loss": 0.0077, "step": 153780 }, { "epoch": 4.753971688199295, "grad_norm": 0.16625350713729858, "learning_rate": 1.762383322000371e-05, "loss": 0.0082, "step": 153810 }, { "epoch": 4.754898930580453, "grad_norm": 0.13885830342769623, "learning_rate": 1.762336959881313e-05, "loss": 0.0079, "step": 153840 }, { "epoch": 4.755826172961612, "grad_norm": 0.15174540877342224, "learning_rate": 1.762290597762255e-05, "loss": 0.008, "step": 153870 }, { "epoch": 4.756753415342771, "grad_norm": 0.13191327452659607, "learning_rate": 1.7622442356431973e-05, "loss": 0.0077, "step": 153900 }, { "epoch": 4.757680657723929, "grad_norm": 0.1927606165409088, "learning_rate": 1.7621978735241395e-05, "loss": 0.0077, "step": 153930 }, { "epoch": 4.758607900105088, "grad_norm": 0.13645650446414948, "learning_rate": 1.7621515114050813e-05, "loss": 0.0074, "step": 153960 }, { "epoch": 4.7595351424862455, "grad_norm": 0.1400548815727234, "learning_rate": 1.7621051492860234e-05, "loss": 0.008, "step": 153990 }, { "epoch": 4.760462384867404, "grad_norm": 0.08234061300754547, "learning_rate": 1.7620587871669656e-05, "loss": 0.0071, "step": 154020 }, { "epoch": 4.761389627248563, "grad_norm": 0.12678605318069458, "learning_rate": 1.7620124250479077e-05, "loss": 0.0068, "step": 154050 }, { "epoch": 4.762316869629721, "grad_norm": 0.10357221961021423, "learning_rate": 1.76196606292885e-05, "loss": 0.0076, "step": 154080 }, { "epoch": 4.76324411201088, "grad_norm": 0.18744595348834991, "learning_rate": 1.761919700809792e-05, "loss": 0.0082, "step": 154110 }, { "epoch": 4.7641713543920385, "grad_norm": 0.09634369611740112, "learning_rate": 1.761873338690734e-05, "loss": 0.0078, "step": 154140 }, { "epoch": 4.765098596773196, "grad_norm": 0.13067378103733063, "learning_rate": 1.761826976571676e-05, "loss": 0.0076, "step": 154170 }, { "epoch": 4.766025839154355, "grad_norm": 0.08418267965316772, "learning_rate": 1.761780614452618e-05, "loss": 0.0075, "step": 154200 }, { "epoch": 4.7669530815355134, "grad_norm": 0.13420841097831726, "learning_rate": 1.76173425233356e-05, "loss": 0.0072, "step": 154230 }, { "epoch": 4.767880323916672, "grad_norm": 0.13031861186027527, "learning_rate": 1.761687890214502e-05, "loss": 0.008, "step": 154260 }, { "epoch": 4.768807566297831, "grad_norm": 0.12013163417577744, "learning_rate": 1.7616415280954443e-05, "loss": 0.0072, "step": 154290 }, { "epoch": 4.769734808678988, "grad_norm": 0.10078064352273941, "learning_rate": 1.7615951659763864e-05, "loss": 0.0077, "step": 154320 }, { "epoch": 4.770662051060147, "grad_norm": 0.16029320657253265, "learning_rate": 1.7615488038573286e-05, "loss": 0.0071, "step": 154350 }, { "epoch": 4.771589293441306, "grad_norm": 0.13007421791553497, "learning_rate": 1.7615024417382704e-05, "loss": 0.0071, "step": 154380 }, { "epoch": 4.772516535822464, "grad_norm": 0.13964739441871643, "learning_rate": 1.7614560796192125e-05, "loss": 0.0072, "step": 154410 }, { "epoch": 4.773443778203623, "grad_norm": 0.13191303610801697, "learning_rate": 1.7614097175001547e-05, "loss": 0.0073, "step": 154440 }, { "epoch": 4.7743710205847805, "grad_norm": 0.13117459416389465, "learning_rate": 1.761364900785065e-05, "loss": 0.0078, "step": 154470 }, { "epoch": 4.775298262965939, "grad_norm": 0.06382203102111816, "learning_rate": 1.7613185386660073e-05, "loss": 0.0078, "step": 154500 }, { "epoch": 4.776225505347098, "grad_norm": 0.09107507765293121, "learning_rate": 1.7612721765469494e-05, "loss": 0.007, "step": 154530 }, { "epoch": 4.777152747728256, "grad_norm": 0.10210587084293365, "learning_rate": 1.7612258144278916e-05, "loss": 0.0079, "step": 154560 }, { "epoch": 4.778079990109415, "grad_norm": 0.1558595448732376, "learning_rate": 1.7611794523088337e-05, "loss": 0.0082, "step": 154590 }, { "epoch": 4.779007232490573, "grad_norm": 0.1348859667778015, "learning_rate": 1.761133090189776e-05, "loss": 0.0074, "step": 154620 }, { "epoch": 4.779934474871731, "grad_norm": 0.10061666369438171, "learning_rate": 1.7610867280707177e-05, "loss": 0.0071, "step": 154650 }, { "epoch": 4.78086171725289, "grad_norm": 0.08061203360557556, "learning_rate": 1.76104036595166e-05, "loss": 0.0083, "step": 154680 }, { "epoch": 4.781788959634048, "grad_norm": 0.1275140643119812, "learning_rate": 1.760994003832602e-05, "loss": 0.0078, "step": 154710 }, { "epoch": 4.782716202015207, "grad_norm": 0.12339965254068375, "learning_rate": 1.760947641713544e-05, "loss": 0.0076, "step": 154740 }, { "epoch": 4.783643444396366, "grad_norm": 0.17627210915088654, "learning_rate": 1.7609012795944863e-05, "loss": 0.0079, "step": 154770 }, { "epoch": 4.784570686777523, "grad_norm": 0.14073440432548523, "learning_rate": 1.760854917475428e-05, "loss": 0.0074, "step": 154800 }, { "epoch": 4.785497929158682, "grad_norm": 0.16517610847949982, "learning_rate": 1.7608085553563703e-05, "loss": 0.008, "step": 154830 }, { "epoch": 4.7864251715398405, "grad_norm": 0.10163113474845886, "learning_rate": 1.7607621932373124e-05, "loss": 0.0071, "step": 154860 }, { "epoch": 4.787352413920999, "grad_norm": 0.1461084485054016, "learning_rate": 1.7607158311182546e-05, "loss": 0.0071, "step": 154890 }, { "epoch": 4.788279656302158, "grad_norm": 0.13563352823257446, "learning_rate": 1.7606694689991967e-05, "loss": 0.0074, "step": 154920 }, { "epoch": 4.789206898683316, "grad_norm": 0.14735890924930573, "learning_rate": 1.760623106880139e-05, "loss": 0.0074, "step": 154950 }, { "epoch": 4.790134141064474, "grad_norm": 0.1265452802181244, "learning_rate": 1.7605767447610807e-05, "loss": 0.007, "step": 154980 }, { "epoch": 4.791061383445633, "grad_norm": 0.09404253214597702, "learning_rate": 1.7605303826420228e-05, "loss": 0.0077, "step": 155010 }, { "epoch": 4.791988625826791, "grad_norm": 0.1170065775513649, "learning_rate": 1.7604840205229646e-05, "loss": 0.0077, "step": 155040 }, { "epoch": 4.79291586820795, "grad_norm": 0.13602176308631897, "learning_rate": 1.7604376584039068e-05, "loss": 0.0069, "step": 155070 }, { "epoch": 4.793843110589108, "grad_norm": 0.17359493672847748, "learning_rate": 1.760391296284849e-05, "loss": 0.0067, "step": 155100 }, { "epoch": 4.794770352970266, "grad_norm": 0.1195748969912529, "learning_rate": 1.760344934165791e-05, "loss": 0.0082, "step": 155130 }, { "epoch": 4.795697595351425, "grad_norm": 0.11379114538431168, "learning_rate": 1.7602985720467332e-05, "loss": 0.008, "step": 155160 }, { "epoch": 4.796624837732583, "grad_norm": 0.11269999295473099, "learning_rate": 1.7602522099276754e-05, "loss": 0.0079, "step": 155190 }, { "epoch": 4.797552080113742, "grad_norm": 0.15757162868976593, "learning_rate": 1.7602058478086172e-05, "loss": 0.007, "step": 155220 }, { "epoch": 4.7984793224949005, "grad_norm": 0.17851419746875763, "learning_rate": 1.7601594856895593e-05, "loss": 0.0081, "step": 155250 }, { "epoch": 4.799406564876058, "grad_norm": 0.1299140900373459, "learning_rate": 1.7601131235705015e-05, "loss": 0.007, "step": 155280 }, { "epoch": 4.800333807257217, "grad_norm": 0.1426197737455368, "learning_rate": 1.7600667614514437e-05, "loss": 0.0077, "step": 155310 }, { "epoch": 4.801261049638375, "grad_norm": 0.1190279871225357, "learning_rate": 1.7600203993323858e-05, "loss": 0.0079, "step": 155340 }, { "epoch": 4.802188292019534, "grad_norm": 0.11872486025094986, "learning_rate": 1.7599740372133276e-05, "loss": 0.0074, "step": 155370 }, { "epoch": 4.803115534400693, "grad_norm": 0.11318349093198776, "learning_rate": 1.7599276750942698e-05, "loss": 0.0072, "step": 155400 }, { "epoch": 4.80404277678185, "grad_norm": 0.1379072517156601, "learning_rate": 1.759881312975212e-05, "loss": 0.0077, "step": 155430 }, { "epoch": 4.804970019163009, "grad_norm": 0.09869755804538727, "learning_rate": 1.7598349508561537e-05, "loss": 0.0077, "step": 155460 }, { "epoch": 4.8058972615441675, "grad_norm": 0.07154048979282379, "learning_rate": 1.759788588737096e-05, "loss": 0.0076, "step": 155490 }, { "epoch": 4.806824503925326, "grad_norm": 0.1312808394432068, "learning_rate": 1.759742226618038e-05, "loss": 0.0075, "step": 155520 }, { "epoch": 4.807751746306485, "grad_norm": 0.12414688616991043, "learning_rate": 1.7596958644989802e-05, "loss": 0.0073, "step": 155550 }, { "epoch": 4.808678988687643, "grad_norm": 0.12672396004199982, "learning_rate": 1.7596495023799223e-05, "loss": 0.0078, "step": 155580 }, { "epoch": 4.809606231068801, "grad_norm": 0.12700602412223816, "learning_rate": 1.7596031402608645e-05, "loss": 0.0074, "step": 155610 }, { "epoch": 4.81053347344996, "grad_norm": 0.165926992893219, "learning_rate": 1.7595567781418063e-05, "loss": 0.0072, "step": 155640 }, { "epoch": 4.811460715831118, "grad_norm": 0.1462850570678711, "learning_rate": 1.7595104160227484e-05, "loss": 0.0072, "step": 155670 }, { "epoch": 4.812387958212277, "grad_norm": 0.10664214193820953, "learning_rate": 1.7594640539036906e-05, "loss": 0.0079, "step": 155700 }, { "epoch": 4.813315200593435, "grad_norm": 0.10887693613767624, "learning_rate": 1.7594176917846327e-05, "loss": 0.0072, "step": 155730 }, { "epoch": 4.814242442974594, "grad_norm": 0.11229437589645386, "learning_rate": 1.759371329665575e-05, "loss": 0.0078, "step": 155760 }, { "epoch": 4.815169685355752, "grad_norm": 0.13857659697532654, "learning_rate": 1.7593249675465167e-05, "loss": 0.0065, "step": 155790 }, { "epoch": 4.81609692773691, "grad_norm": 0.08290574699640274, "learning_rate": 1.759278605427459e-05, "loss": 0.0072, "step": 155820 }, { "epoch": 4.817024170118069, "grad_norm": 0.12659016251564026, "learning_rate": 1.759232243308401e-05, "loss": 0.0076, "step": 155850 }, { "epoch": 4.8179514124992275, "grad_norm": 0.10731244832277298, "learning_rate": 1.7591858811893428e-05, "loss": 0.0072, "step": 155880 }, { "epoch": 4.818878654880386, "grad_norm": 0.1977066993713379, "learning_rate": 1.759139519070285e-05, "loss": 0.0066, "step": 155910 }, { "epoch": 4.819805897261544, "grad_norm": 0.14930696785449982, "learning_rate": 1.759093156951227e-05, "loss": 0.0079, "step": 155940 }, { "epoch": 4.8207331396427024, "grad_norm": 0.13199691474437714, "learning_rate": 1.7590467948321693e-05, "loss": 0.0075, "step": 155970 }, { "epoch": 4.821660382023861, "grad_norm": 0.10039552301168442, "learning_rate": 1.7590004327131114e-05, "loss": 0.0081, "step": 156000 }, { "epoch": 4.82258762440502, "grad_norm": 0.10038246959447861, "learning_rate": 1.7589540705940532e-05, "loss": 0.0074, "step": 156030 }, { "epoch": 4.823514866786178, "grad_norm": 0.14350713789463043, "learning_rate": 1.7589077084749954e-05, "loss": 0.0082, "step": 156060 }, { "epoch": 4.824442109167336, "grad_norm": 0.10305672138929367, "learning_rate": 1.7588613463559375e-05, "loss": 0.0074, "step": 156090 }, { "epoch": 4.825369351548495, "grad_norm": 0.19539010524749756, "learning_rate": 1.7588149842368797e-05, "loss": 0.0072, "step": 156120 }, { "epoch": 4.826296593929653, "grad_norm": 0.1309889703989029, "learning_rate": 1.758768622117822e-05, "loss": 0.0072, "step": 156150 }, { "epoch": 4.827223836310812, "grad_norm": 0.11228398233652115, "learning_rate": 1.758722259998764e-05, "loss": 0.0069, "step": 156180 }, { "epoch": 4.82815107869197, "grad_norm": 0.15642087161540985, "learning_rate": 1.7586758978797058e-05, "loss": 0.0075, "step": 156210 }, { "epoch": 4.829078321073128, "grad_norm": 0.10282242298126221, "learning_rate": 1.758629535760648e-05, "loss": 0.0068, "step": 156240 }, { "epoch": 4.830005563454287, "grad_norm": 0.12884609401226044, "learning_rate": 1.75858317364159e-05, "loss": 0.0075, "step": 156270 }, { "epoch": 4.830932805835445, "grad_norm": 0.1116701140999794, "learning_rate": 1.7585368115225322e-05, "loss": 0.0075, "step": 156300 }, { "epoch": 4.831860048216604, "grad_norm": 0.10828559845685959, "learning_rate": 1.7584904494034744e-05, "loss": 0.0069, "step": 156330 }, { "epoch": 4.8327872905977625, "grad_norm": 0.09635424613952637, "learning_rate": 1.7584440872844165e-05, "loss": 0.0074, "step": 156360 }, { "epoch": 4.833714532978921, "grad_norm": 0.13888125121593475, "learning_rate": 1.7583977251653584e-05, "loss": 0.0077, "step": 156390 }, { "epoch": 4.834641775360079, "grad_norm": 0.09854333102703094, "learning_rate": 1.7583513630463005e-05, "loss": 0.0069, "step": 156420 }, { "epoch": 4.835569017741237, "grad_norm": 0.16128389537334442, "learning_rate": 1.7583050009272423e-05, "loss": 0.0076, "step": 156450 }, { "epoch": 4.836496260122396, "grad_norm": 0.10811527818441391, "learning_rate": 1.7582586388081845e-05, "loss": 0.0077, "step": 156480 }, { "epoch": 4.837423502503555, "grad_norm": 0.13507024943828583, "learning_rate": 1.7582122766891266e-05, "loss": 0.0071, "step": 156510 }, { "epoch": 4.838350744884713, "grad_norm": 0.11902371793985367, "learning_rate": 1.7581659145700688e-05, "loss": 0.0077, "step": 156540 }, { "epoch": 4.839277987265872, "grad_norm": 0.11790937185287476, "learning_rate": 1.758119552451011e-05, "loss": 0.0069, "step": 156570 }, { "epoch": 4.8402052296470295, "grad_norm": 0.16364233195781708, "learning_rate": 1.758073190331953e-05, "loss": 0.008, "step": 156600 }, { "epoch": 4.841132472028188, "grad_norm": 0.11354954540729523, "learning_rate": 1.758026828212895e-05, "loss": 0.0077, "step": 156630 }, { "epoch": 4.842059714409347, "grad_norm": 0.17091155052185059, "learning_rate": 1.757980466093837e-05, "loss": 0.0074, "step": 156660 }, { "epoch": 4.842986956790505, "grad_norm": 0.08564923703670502, "learning_rate": 1.7579341039747792e-05, "loss": 0.0077, "step": 156690 }, { "epoch": 4.843914199171664, "grad_norm": 0.10546241700649261, "learning_rate": 1.7578877418557213e-05, "loss": 0.0076, "step": 156720 }, { "epoch": 4.844841441552822, "grad_norm": 0.14829978346824646, "learning_rate": 1.7578413797366635e-05, "loss": 0.0071, "step": 156750 }, { "epoch": 4.84576868393398, "grad_norm": 0.1495857536792755, "learning_rate": 1.7577950176176053e-05, "loss": 0.008, "step": 156780 }, { "epoch": 4.846695926315139, "grad_norm": 0.11449125409126282, "learning_rate": 1.7577486554985475e-05, "loss": 0.0082, "step": 156810 }, { "epoch": 4.847623168696297, "grad_norm": 0.11898341774940491, "learning_rate": 1.7577022933794896e-05, "loss": 0.007, "step": 156840 }, { "epoch": 4.848550411077456, "grad_norm": 0.09242479503154755, "learning_rate": 1.7576559312604314e-05, "loss": 0.008, "step": 156870 }, { "epoch": 4.849477653458614, "grad_norm": 0.13898108899593353, "learning_rate": 1.7576095691413736e-05, "loss": 0.007, "step": 156900 }, { "epoch": 4.850404895839772, "grad_norm": 0.1583225578069687, "learning_rate": 1.7575632070223157e-05, "loss": 0.008, "step": 156930 }, { "epoch": 4.851332138220931, "grad_norm": 0.14425835013389587, "learning_rate": 1.757516844903258e-05, "loss": 0.0075, "step": 156960 }, { "epoch": 4.8522593806020895, "grad_norm": 0.09974212944507599, "learning_rate": 1.7574704827842e-05, "loss": 0.0076, "step": 156990 }, { "epoch": 4.853186622983248, "grad_norm": 0.08313441276550293, "learning_rate": 1.757424120665142e-05, "loss": 0.0072, "step": 157020 }, { "epoch": 4.854113865364406, "grad_norm": 0.14934054017066956, "learning_rate": 1.757377758546084e-05, "loss": 0.008, "step": 157050 }, { "epoch": 4.855041107745564, "grad_norm": 0.12337655574083328, "learning_rate": 1.757331396427026e-05, "loss": 0.0073, "step": 157080 }, { "epoch": 4.855968350126723, "grad_norm": 0.14684998989105225, "learning_rate": 1.7572850343079683e-05, "loss": 0.0079, "step": 157110 }, { "epoch": 4.856895592507882, "grad_norm": 0.09002646803855896, "learning_rate": 1.7572386721889104e-05, "loss": 0.0069, "step": 157140 }, { "epoch": 4.85782283488904, "grad_norm": 0.14636117219924927, "learning_rate": 1.7571923100698526e-05, "loss": 0.0077, "step": 157170 }, { "epoch": 4.858750077270199, "grad_norm": 0.14607901871204376, "learning_rate": 1.7571459479507944e-05, "loss": 0.0076, "step": 157200 }, { "epoch": 4.8596773196513565, "grad_norm": 0.17219538986682892, "learning_rate": 1.7570995858317365e-05, "loss": 0.0078, "step": 157230 }, { "epoch": 4.860604562032515, "grad_norm": 0.14477472007274628, "learning_rate": 1.7570532237126787e-05, "loss": 0.0078, "step": 157260 }, { "epoch": 4.861531804413674, "grad_norm": 0.1294659972190857, "learning_rate": 1.7570068615936205e-05, "loss": 0.0073, "step": 157290 }, { "epoch": 4.862459046794832, "grad_norm": 0.1343969702720642, "learning_rate": 1.7569604994745627e-05, "loss": 0.0077, "step": 157320 }, { "epoch": 4.863386289175991, "grad_norm": 0.09068270772695541, "learning_rate": 1.7569141373555048e-05, "loss": 0.0071, "step": 157350 }, { "epoch": 4.8643135315571495, "grad_norm": 0.11396805942058563, "learning_rate": 1.756867775236447e-05, "loss": 0.0068, "step": 157380 }, { "epoch": 4.865240773938307, "grad_norm": 0.218610942363739, "learning_rate": 1.756821413117389e-05, "loss": 0.0068, "step": 157410 }, { "epoch": 4.866168016319466, "grad_norm": 0.1994808465242386, "learning_rate": 1.756775050998331e-05, "loss": 0.0079, "step": 157440 }, { "epoch": 4.867095258700624, "grad_norm": 0.11955064535140991, "learning_rate": 1.756728688879273e-05, "loss": 0.0074, "step": 157470 }, { "epoch": 4.868022501081783, "grad_norm": 0.11644437164068222, "learning_rate": 1.7566823267602152e-05, "loss": 0.0072, "step": 157500 }, { "epoch": 4.868949743462942, "grad_norm": 0.12209809571504593, "learning_rate": 1.7566359646411574e-05, "loss": 0.0073, "step": 157530 }, { "epoch": 4.869876985844099, "grad_norm": 0.15328480303287506, "learning_rate": 1.7565896025220995e-05, "loss": 0.0072, "step": 157560 }, { "epoch": 4.870804228225258, "grad_norm": 0.11719213426113129, "learning_rate": 1.7565432404030417e-05, "loss": 0.0073, "step": 157590 }, { "epoch": 4.8717314706064165, "grad_norm": 0.17245139181613922, "learning_rate": 1.7564968782839835e-05, "loss": 0.0065, "step": 157620 }, { "epoch": 4.872658712987575, "grad_norm": 0.11117279529571533, "learning_rate": 1.7564505161649256e-05, "loss": 0.0077, "step": 157650 }, { "epoch": 4.873585955368734, "grad_norm": 0.09652625024318695, "learning_rate": 1.7564041540458678e-05, "loss": 0.0071, "step": 157680 }, { "epoch": 4.8745131977498914, "grad_norm": 0.1294788122177124, "learning_rate": 1.75635779192681e-05, "loss": 0.0068, "step": 157710 }, { "epoch": 4.87544044013105, "grad_norm": 0.1279054582118988, "learning_rate": 1.756311429807752e-05, "loss": 0.0076, "step": 157740 }, { "epoch": 4.876367682512209, "grad_norm": 0.16190747916698456, "learning_rate": 1.756265067688694e-05, "loss": 0.0071, "step": 157770 }, { "epoch": 4.877294924893367, "grad_norm": 0.1691199392080307, "learning_rate": 1.756218705569636e-05, "loss": 0.0073, "step": 157800 }, { "epoch": 4.878222167274526, "grad_norm": 0.1147417277097702, "learning_rate": 1.7561723434505782e-05, "loss": 0.0073, "step": 157830 }, { "epoch": 4.8791494096556836, "grad_norm": 0.1295657455921173, "learning_rate": 1.75612598133152e-05, "loss": 0.0078, "step": 157860 }, { "epoch": 4.880076652036842, "grad_norm": 0.09971607476472855, "learning_rate": 1.756079619212462e-05, "loss": 0.0079, "step": 157890 }, { "epoch": 4.881003894418001, "grad_norm": 0.10699129104614258, "learning_rate": 1.7560332570934043e-05, "loss": 0.007, "step": 157920 }, { "epoch": 4.881931136799159, "grad_norm": 0.14464642107486725, "learning_rate": 1.7559868949743465e-05, "loss": 0.0073, "step": 157950 }, { "epoch": 4.882858379180318, "grad_norm": 0.1193290427327156, "learning_rate": 1.7559405328552886e-05, "loss": 0.0077, "step": 157980 }, { "epoch": 4.8837856215614766, "grad_norm": 0.16978982090950012, "learning_rate": 1.7558941707362308e-05, "loss": 0.0068, "step": 158010 }, { "epoch": 4.884712863942634, "grad_norm": 0.10900181531906128, "learning_rate": 1.7558478086171726e-05, "loss": 0.0074, "step": 158040 }, { "epoch": 4.885640106323793, "grad_norm": 0.11083262413740158, "learning_rate": 1.7558014464981147e-05, "loss": 0.0072, "step": 158070 }, { "epoch": 4.8865673487049515, "grad_norm": 0.13128794729709625, "learning_rate": 1.755755084379057e-05, "loss": 0.0071, "step": 158100 }, { "epoch": 4.88749459108611, "grad_norm": 0.1498374491930008, "learning_rate": 1.755708722259999e-05, "loss": 0.0073, "step": 158130 }, { "epoch": 4.888421833467269, "grad_norm": 0.13680291175842285, "learning_rate": 1.7556623601409412e-05, "loss": 0.0077, "step": 158160 }, { "epoch": 4.889349075848427, "grad_norm": 0.15497516095638275, "learning_rate": 1.755615998021883e-05, "loss": 0.0072, "step": 158190 }, { "epoch": 4.890276318229585, "grad_norm": 0.1443401724100113, "learning_rate": 1.755569635902825e-05, "loss": 0.0078, "step": 158220 }, { "epoch": 4.891203560610744, "grad_norm": 0.08936183899641037, "learning_rate": 1.7555232737837673e-05, "loss": 0.0083, "step": 158250 }, { "epoch": 4.892130802991902, "grad_norm": 0.10721143335103989, "learning_rate": 1.755476911664709e-05, "loss": 0.0079, "step": 158280 }, { "epoch": 4.893058045373061, "grad_norm": 0.10698700696229935, "learning_rate": 1.7554305495456513e-05, "loss": 0.0078, "step": 158310 }, { "epoch": 4.893985287754219, "grad_norm": 0.10023888200521469, "learning_rate": 1.7553841874265934e-05, "loss": 0.0079, "step": 158340 }, { "epoch": 4.894912530135377, "grad_norm": 0.17285297811031342, "learning_rate": 1.7553378253075356e-05, "loss": 0.0077, "step": 158370 }, { "epoch": 4.895839772516536, "grad_norm": 0.1387130618095398, "learning_rate": 1.7552914631884777e-05, "loss": 0.0074, "step": 158400 }, { "epoch": 4.896767014897694, "grad_norm": 0.09325949102640152, "learning_rate": 1.7552451010694195e-05, "loss": 0.0074, "step": 158430 }, { "epoch": 4.897694257278853, "grad_norm": 0.05508545786142349, "learning_rate": 1.7551987389503617e-05, "loss": 0.0077, "step": 158460 }, { "epoch": 4.8986214996600115, "grad_norm": 0.10496322810649872, "learning_rate": 1.7551523768313038e-05, "loss": 0.0073, "step": 158490 }, { "epoch": 4.899548742041169, "grad_norm": 0.12847256660461426, "learning_rate": 1.755106014712246e-05, "loss": 0.0078, "step": 158520 }, { "epoch": 4.900475984422328, "grad_norm": 0.10622482746839523, "learning_rate": 1.755059652593188e-05, "loss": 0.0074, "step": 158550 }, { "epoch": 4.901403226803486, "grad_norm": 0.137132465839386, "learning_rate": 1.7550132904741303e-05, "loss": 0.0074, "step": 158580 }, { "epoch": 4.902330469184645, "grad_norm": 0.13949251174926758, "learning_rate": 1.754966928355072e-05, "loss": 0.0072, "step": 158610 }, { "epoch": 4.903257711565804, "grad_norm": 0.08257464319467545, "learning_rate": 1.7549205662360142e-05, "loss": 0.0077, "step": 158640 }, { "epoch": 4.904184953946961, "grad_norm": 0.10967805981636047, "learning_rate": 1.7548742041169564e-05, "loss": 0.0073, "step": 158670 }, { "epoch": 4.90511219632812, "grad_norm": 0.09973084181547165, "learning_rate": 1.7548278419978982e-05, "loss": 0.0074, "step": 158700 }, { "epoch": 4.9060394387092785, "grad_norm": 0.11089131236076355, "learning_rate": 1.7547814798788403e-05, "loss": 0.0069, "step": 158730 }, { "epoch": 4.906966681090437, "grad_norm": 0.1150914803147316, "learning_rate": 1.7547351177597825e-05, "loss": 0.0072, "step": 158760 }, { "epoch": 4.907893923471596, "grad_norm": 0.12484666705131531, "learning_rate": 1.7546887556407247e-05, "loss": 0.0079, "step": 158790 }, { "epoch": 4.908821165852754, "grad_norm": 0.08902730792760849, "learning_rate": 1.7546423935216668e-05, "loss": 0.0076, "step": 158820 }, { "epoch": 4.909748408233912, "grad_norm": 0.0965014100074768, "learning_rate": 1.7545960314026086e-05, "loss": 0.0078, "step": 158850 }, { "epoch": 4.910675650615071, "grad_norm": 0.0809628814458847, "learning_rate": 1.7545496692835508e-05, "loss": 0.0076, "step": 158880 }, { "epoch": 4.911602892996229, "grad_norm": 0.10979782044887543, "learning_rate": 1.754503307164493e-05, "loss": 0.0069, "step": 158910 }, { "epoch": 4.912530135377388, "grad_norm": 0.1150323674082756, "learning_rate": 1.754456945045435e-05, "loss": 0.0066, "step": 158940 }, { "epoch": 4.913457377758546, "grad_norm": 0.2427011877298355, "learning_rate": 1.7544105829263772e-05, "loss": 0.0071, "step": 158970 }, { "epoch": 4.914384620139705, "grad_norm": 0.10813760757446289, "learning_rate": 1.7543642208073194e-05, "loss": 0.0077, "step": 159000 }, { "epoch": 4.915311862520863, "grad_norm": 0.15007346868515015, "learning_rate": 1.7543178586882612e-05, "loss": 0.0076, "step": 159030 }, { "epoch": 4.916239104902021, "grad_norm": 0.1923598349094391, "learning_rate": 1.7542714965692033e-05, "loss": 0.007, "step": 159060 }, { "epoch": 4.91716634728318, "grad_norm": 0.14031489193439484, "learning_rate": 1.754225134450145e-05, "loss": 0.0082, "step": 159090 }, { "epoch": 4.9180935896643385, "grad_norm": 0.11816397309303284, "learning_rate": 1.7541787723310876e-05, "loss": 0.0076, "step": 159120 }, { "epoch": 4.919020832045497, "grad_norm": 0.09946399182081223, "learning_rate": 1.7541324102120298e-05, "loss": 0.0073, "step": 159150 }, { "epoch": 4.919948074426655, "grad_norm": 0.15115411579608917, "learning_rate": 1.7540860480929716e-05, "loss": 0.0075, "step": 159180 }, { "epoch": 4.920875316807813, "grad_norm": 0.06614378839731216, "learning_rate": 1.7540396859739137e-05, "loss": 0.0073, "step": 159210 }, { "epoch": 4.921802559188972, "grad_norm": 0.1898435801267624, "learning_rate": 1.753993323854856e-05, "loss": 0.0073, "step": 159240 }, { "epoch": 4.922729801570131, "grad_norm": 0.09845539182424545, "learning_rate": 1.7539469617357977e-05, "loss": 0.0076, "step": 159270 }, { "epoch": 4.923657043951289, "grad_norm": 0.1253083348274231, "learning_rate": 1.75390059961674e-05, "loss": 0.0076, "step": 159300 }, { "epoch": 4.924584286332447, "grad_norm": 0.10110145807266235, "learning_rate": 1.753854237497682e-05, "loss": 0.0077, "step": 159330 }, { "epoch": 4.9255115287136055, "grad_norm": 0.20262768864631653, "learning_rate": 1.753807875378624e-05, "loss": 0.007, "step": 159360 }, { "epoch": 4.926438771094764, "grad_norm": 0.12334411591291428, "learning_rate": 1.7537615132595663e-05, "loss": 0.0066, "step": 159390 }, { "epoch": 4.927366013475923, "grad_norm": 0.1236196905374527, "learning_rate": 1.753715151140508e-05, "loss": 0.008, "step": 159420 }, { "epoch": 4.928293255857081, "grad_norm": 0.16953203082084656, "learning_rate": 1.7536687890214503e-05, "loss": 0.0075, "step": 159450 }, { "epoch": 4.929220498238239, "grad_norm": 0.12793444097042084, "learning_rate": 1.7536224269023924e-05, "loss": 0.0077, "step": 159480 }, { "epoch": 4.930147740619398, "grad_norm": 0.12657874822616577, "learning_rate": 1.7535760647833346e-05, "loss": 0.0069, "step": 159510 }, { "epoch": 4.931074983000556, "grad_norm": 0.11006152629852295, "learning_rate": 1.7535297026642767e-05, "loss": 0.008, "step": 159540 }, { "epoch": 4.932002225381715, "grad_norm": 0.06553195416927338, "learning_rate": 1.753483340545219e-05, "loss": 0.0079, "step": 159570 }, { "epoch": 4.932929467762873, "grad_norm": 0.128460094332695, "learning_rate": 1.7534369784261607e-05, "loss": 0.0073, "step": 159600 }, { "epoch": 4.933856710144031, "grad_norm": 0.11116764694452286, "learning_rate": 1.753390616307103e-05, "loss": 0.0071, "step": 159630 }, { "epoch": 4.93478395252519, "grad_norm": 0.10491380840539932, "learning_rate": 1.753344254188045e-05, "loss": 0.0069, "step": 159660 }, { "epoch": 4.935711194906348, "grad_norm": 0.1439782977104187, "learning_rate": 1.7532978920689868e-05, "loss": 0.0076, "step": 159690 }, { "epoch": 4.936638437287507, "grad_norm": 0.11652590334415436, "learning_rate": 1.753251529949929e-05, "loss": 0.0075, "step": 159720 }, { "epoch": 4.9375656796686656, "grad_norm": 0.16826878488063812, "learning_rate": 1.753205167830871e-05, "loss": 0.0082, "step": 159750 }, { "epoch": 4.938492922049824, "grad_norm": 0.14291808009147644, "learning_rate": 1.7531588057118132e-05, "loss": 0.0081, "step": 159780 }, { "epoch": 4.939420164430983, "grad_norm": 0.13138121366500854, "learning_rate": 1.7531124435927554e-05, "loss": 0.0072, "step": 159810 }, { "epoch": 4.9403474068121405, "grad_norm": 0.10919874906539917, "learning_rate": 1.7530660814736972e-05, "loss": 0.0079, "step": 159840 }, { "epoch": 4.941274649193299, "grad_norm": 0.09419435262680054, "learning_rate": 1.7530197193546394e-05, "loss": 0.0075, "step": 159870 }, { "epoch": 4.942201891574458, "grad_norm": 0.14088863134384155, "learning_rate": 1.7529733572355815e-05, "loss": 0.0069, "step": 159900 }, { "epoch": 4.943129133955616, "grad_norm": 0.06863661110401154, "learning_rate": 1.7529269951165237e-05, "loss": 0.007, "step": 159930 }, { "epoch": 4.944056376336775, "grad_norm": 0.12032978981733322, "learning_rate": 1.7528806329974658e-05, "loss": 0.0074, "step": 159960 }, { "epoch": 4.944983618717933, "grad_norm": 0.17480145394802094, "learning_rate": 1.752834270878408e-05, "loss": 0.0071, "step": 159990 }, { "epoch": 4.945910861099091, "grad_norm": 0.12238478660583496, "learning_rate": 1.7527879087593498e-05, "loss": 0.0064, "step": 160020 }, { "epoch": 4.94683810348025, "grad_norm": 0.11076578497886658, "learning_rate": 1.752741546640292e-05, "loss": 0.0079, "step": 160050 }, { "epoch": 4.947765345861408, "grad_norm": 0.11457434296607971, "learning_rate": 1.7526967299252027e-05, "loss": 0.0081, "step": 160080 }, { "epoch": 4.948692588242567, "grad_norm": 0.10537590831518173, "learning_rate": 1.7526503678061445e-05, "loss": 0.0065, "step": 160110 }, { "epoch": 4.949619830623725, "grad_norm": 0.17463046312332153, "learning_rate": 1.7526040056870867e-05, "loss": 0.0074, "step": 160140 }, { "epoch": 4.950547073004883, "grad_norm": 0.1398235708475113, "learning_rate": 1.7525591889719975e-05, "loss": 0.0072, "step": 160170 }, { "epoch": 4.951474315386042, "grad_norm": 0.11775997281074524, "learning_rate": 1.7525128268529393e-05, "loss": 0.0067, "step": 160200 }, { "epoch": 4.9524015577672005, "grad_norm": 0.10145576298236847, "learning_rate": 1.7524664647338814e-05, "loss": 0.0077, "step": 160230 }, { "epoch": 4.953328800148359, "grad_norm": 0.13297021389007568, "learning_rate": 1.752420102614824e-05, "loss": 0.0079, "step": 160260 }, { "epoch": 4.954256042529517, "grad_norm": 0.14076389372348785, "learning_rate": 1.7523737404957657e-05, "loss": 0.0078, "step": 160290 }, { "epoch": 4.955183284910675, "grad_norm": 0.14529335498809814, "learning_rate": 1.752327378376708e-05, "loss": 0.0067, "step": 160320 }, { "epoch": 4.956110527291834, "grad_norm": 0.09113836288452148, "learning_rate": 1.7522810162576497e-05, "loss": 0.0074, "step": 160350 }, { "epoch": 4.957037769672993, "grad_norm": 0.12645089626312256, "learning_rate": 1.752234654138592e-05, "loss": 0.0076, "step": 160380 }, { "epoch": 4.957965012054151, "grad_norm": 0.12760451436042786, "learning_rate": 1.752188292019534e-05, "loss": 0.0073, "step": 160410 }, { "epoch": 4.958892254435309, "grad_norm": 0.19445869326591492, "learning_rate": 1.752141929900476e-05, "loss": 0.0075, "step": 160440 }, { "epoch": 4.9598194968164675, "grad_norm": 0.10587280243635178, "learning_rate": 1.7520955677814183e-05, "loss": 0.0072, "step": 160470 }, { "epoch": 4.960746739197626, "grad_norm": 0.19665175676345825, "learning_rate": 1.7520492056623604e-05, "loss": 0.0074, "step": 160500 }, { "epoch": 4.961673981578785, "grad_norm": 0.4821053445339203, "learning_rate": 1.7520028435433023e-05, "loss": 0.0072, "step": 160530 }, { "epoch": 4.962601223959943, "grad_norm": 0.1317213922739029, "learning_rate": 1.7519564814242444e-05, "loss": 0.008, "step": 160560 }, { "epoch": 4.963528466341102, "grad_norm": 0.10619110614061356, "learning_rate": 1.7519101193051866e-05, "loss": 0.0071, "step": 160590 }, { "epoch": 4.9644557087222605, "grad_norm": 0.09393712878227234, "learning_rate": 1.7518637571861287e-05, "loss": 0.0072, "step": 160620 }, { "epoch": 4.965382951103418, "grad_norm": 0.12280314415693283, "learning_rate": 1.751817395067071e-05, "loss": 0.0073, "step": 160650 }, { "epoch": 4.966310193484577, "grad_norm": 0.10114023089408875, "learning_rate": 1.7517710329480127e-05, "loss": 0.0073, "step": 160680 }, { "epoch": 4.967237435865735, "grad_norm": 0.10564298927783966, "learning_rate": 1.7517246708289548e-05, "loss": 0.008, "step": 160710 }, { "epoch": 4.968164678246894, "grad_norm": 0.1109955683350563, "learning_rate": 1.751678308709897e-05, "loss": 0.0077, "step": 160740 }, { "epoch": 4.969091920628053, "grad_norm": 0.1581471860408783, "learning_rate": 1.7516319465908388e-05, "loss": 0.0076, "step": 160770 }, { "epoch": 4.97001916300921, "grad_norm": 0.13754810392856598, "learning_rate": 1.751585584471781e-05, "loss": 0.0071, "step": 160800 }, { "epoch": 4.970946405390369, "grad_norm": 0.1249186247587204, "learning_rate": 1.751539222352723e-05, "loss": 0.0072, "step": 160830 }, { "epoch": 4.9718736477715275, "grad_norm": 0.10342936962842941, "learning_rate": 1.7514928602336652e-05, "loss": 0.0076, "step": 160860 }, { "epoch": 4.972800890152686, "grad_norm": 0.10568511486053467, "learning_rate": 1.7514464981146074e-05, "loss": 0.0077, "step": 160890 }, { "epoch": 4.973728132533845, "grad_norm": 0.0779712051153183, "learning_rate": 1.7514001359955495e-05, "loss": 0.0071, "step": 160920 }, { "epoch": 4.974655374915002, "grad_norm": 0.11493860185146332, "learning_rate": 1.7513537738764914e-05, "loss": 0.0078, "step": 160950 }, { "epoch": 4.975582617296161, "grad_norm": 0.09069942682981491, "learning_rate": 1.7513074117574335e-05, "loss": 0.0065, "step": 160980 }, { "epoch": 4.97650985967732, "grad_norm": 0.13559424877166748, "learning_rate": 1.7512610496383757e-05, "loss": 0.0073, "step": 161010 }, { "epoch": 4.977437102058478, "grad_norm": 0.1501857340335846, "learning_rate": 1.7512146875193178e-05, "loss": 0.0071, "step": 161040 }, { "epoch": 4.978364344439637, "grad_norm": 0.13683266937732697, "learning_rate": 1.75116832540026e-05, "loss": 0.0072, "step": 161070 }, { "epoch": 4.9792915868207945, "grad_norm": 0.10318165272474289, "learning_rate": 1.7511219632812018e-05, "loss": 0.0073, "step": 161100 }, { "epoch": 4.980218829201953, "grad_norm": 0.12340828776359558, "learning_rate": 1.751075601162144e-05, "loss": 0.0078, "step": 161130 }, { "epoch": 4.981146071583112, "grad_norm": 0.11027143895626068, "learning_rate": 1.751029239043086e-05, "loss": 0.0081, "step": 161160 }, { "epoch": 4.98207331396427, "grad_norm": 0.10717801004648209, "learning_rate": 1.750982876924028e-05, "loss": 0.0076, "step": 161190 }, { "epoch": 4.983000556345429, "grad_norm": 0.1124313697218895, "learning_rate": 1.75093651480497e-05, "loss": 0.0078, "step": 161220 }, { "epoch": 4.983927798726587, "grad_norm": 0.15392524003982544, "learning_rate": 1.7508901526859122e-05, "loss": 0.0074, "step": 161250 }, { "epoch": 4.984855041107745, "grad_norm": 0.14626412093639374, "learning_rate": 1.7508437905668543e-05, "loss": 0.0072, "step": 161280 }, { "epoch": 4.985782283488904, "grad_norm": 0.1007731705904007, "learning_rate": 1.7507974284477965e-05, "loss": 0.0072, "step": 161310 }, { "epoch": 4.986709525870062, "grad_norm": 0.1578971892595291, "learning_rate": 1.7507510663287383e-05, "loss": 0.0088, "step": 161340 }, { "epoch": 4.987636768251221, "grad_norm": 0.10526420176029205, "learning_rate": 1.7507047042096804e-05, "loss": 0.0072, "step": 161370 }, { "epoch": 4.98856401063238, "grad_norm": 0.11597985774278641, "learning_rate": 1.7506583420906226e-05, "loss": 0.0071, "step": 161400 }, { "epoch": 4.989491253013537, "grad_norm": 0.10804571211338043, "learning_rate": 1.7506119799715647e-05, "loss": 0.0079, "step": 161430 }, { "epoch": 4.990418495394696, "grad_norm": 0.10227999091148376, "learning_rate": 1.750565617852507e-05, "loss": 0.0067, "step": 161460 }, { "epoch": 4.9913457377758546, "grad_norm": 0.11440455913543701, "learning_rate": 1.750519255733449e-05, "loss": 0.0079, "step": 161490 }, { "epoch": 4.992272980157013, "grad_norm": 0.1682019978761673, "learning_rate": 1.750472893614391e-05, "loss": 0.0076, "step": 161520 }, { "epoch": 4.993200222538172, "grad_norm": 0.09959305077791214, "learning_rate": 1.750426531495333e-05, "loss": 0.007, "step": 161550 }, { "epoch": 4.99412746491933, "grad_norm": 0.12509682774543762, "learning_rate": 1.750380169376275e-05, "loss": 0.008, "step": 161580 }, { "epoch": 4.995054707300488, "grad_norm": 0.13964642584323883, "learning_rate": 1.750333807257217e-05, "loss": 0.0076, "step": 161610 }, { "epoch": 4.995981949681647, "grad_norm": 0.16710712015628815, "learning_rate": 1.750287445138159e-05, "loss": 0.0069, "step": 161640 }, { "epoch": 4.996909192062805, "grad_norm": 0.11278828233480453, "learning_rate": 1.7502410830191013e-05, "loss": 0.0075, "step": 161670 }, { "epoch": 4.997836434443964, "grad_norm": 0.1210939958691597, "learning_rate": 1.7501947209000434e-05, "loss": 0.0072, "step": 161700 }, { "epoch": 4.9987636768251225, "grad_norm": 0.11347106099128723, "learning_rate": 1.7501483587809856e-05, "loss": 0.0079, "step": 161730 }, { "epoch": 4.99969091920628, "grad_norm": 0.13430608808994293, "learning_rate": 1.7501019966619274e-05, "loss": 0.0074, "step": 161760 }, { "epoch": 5.000618161587439, "grad_norm": 0.13327138125896454, "learning_rate": 1.7500556345428695e-05, "loss": 0.0065, "step": 161790 }, { "epoch": 5.001545403968597, "grad_norm": 0.11229462176561356, "learning_rate": 1.7500092724238117e-05, "loss": 0.0068, "step": 161820 }, { "epoch": 5.002472646349756, "grad_norm": 0.10307755321264267, "learning_rate": 1.749962910304754e-05, "loss": 0.0068, "step": 161850 }, { "epoch": 5.003399888730915, "grad_norm": 0.0868978276848793, "learning_rate": 1.749916548185696e-05, "loss": 0.0069, "step": 161880 }, { "epoch": 5.004327131112072, "grad_norm": 0.10723195225000381, "learning_rate": 1.749870186066638e-05, "loss": 0.0069, "step": 161910 }, { "epoch": 5.005254373493231, "grad_norm": 0.14285212755203247, "learning_rate": 1.74982382394758e-05, "loss": 0.0072, "step": 161940 }, { "epoch": 5.0061816158743895, "grad_norm": 0.06991103291511536, "learning_rate": 1.749777461828522e-05, "loss": 0.0071, "step": 161970 }, { "epoch": 5.007108858255548, "grad_norm": 0.1284433752298355, "learning_rate": 1.7497310997094643e-05, "loss": 0.0073, "step": 162000 }, { "epoch": 5.008036100636707, "grad_norm": 0.08719570189714432, "learning_rate": 1.7496847375904064e-05, "loss": 0.0073, "step": 162030 }, { "epoch": 5.008963343017865, "grad_norm": 0.11156509816646576, "learning_rate": 1.7496383754713486e-05, "loss": 0.007, "step": 162060 }, { "epoch": 5.009890585399023, "grad_norm": 0.13383913040161133, "learning_rate": 1.7495920133522904e-05, "loss": 0.0073, "step": 162090 }, { "epoch": 5.010817827780182, "grad_norm": 0.14612673223018646, "learning_rate": 1.7495456512332325e-05, "loss": 0.0079, "step": 162120 }, { "epoch": 5.01174507016134, "grad_norm": 0.07079488039016724, "learning_rate": 1.7494992891141747e-05, "loss": 0.0072, "step": 162150 }, { "epoch": 5.012672312542499, "grad_norm": 0.07298697531223297, "learning_rate": 1.7494529269951165e-05, "loss": 0.0079, "step": 162180 }, { "epoch": 5.013599554923657, "grad_norm": 0.11068940162658691, "learning_rate": 1.7494065648760586e-05, "loss": 0.0068, "step": 162210 }, { "epoch": 5.014526797304815, "grad_norm": 0.13095250725746155, "learning_rate": 1.7493602027570008e-05, "loss": 0.0073, "step": 162240 }, { "epoch": 5.015454039685974, "grad_norm": 0.1157107949256897, "learning_rate": 1.749313840637943e-05, "loss": 0.0073, "step": 162270 }, { "epoch": 5.016381282067132, "grad_norm": 0.14790625870227814, "learning_rate": 1.749267478518885e-05, "loss": 0.0066, "step": 162300 }, { "epoch": 5.017308524448291, "grad_norm": 0.0933152511715889, "learning_rate": 1.7492211163998272e-05, "loss": 0.0082, "step": 162330 }, { "epoch": 5.0182357668294495, "grad_norm": 0.09328426420688629, "learning_rate": 1.749174754280769e-05, "loss": 0.0072, "step": 162360 }, { "epoch": 5.019163009210608, "grad_norm": 0.11505948007106781, "learning_rate": 1.7491283921617112e-05, "loss": 0.0074, "step": 162390 }, { "epoch": 5.020090251591766, "grad_norm": 0.11269128322601318, "learning_rate": 1.7490820300426533e-05, "loss": 0.0073, "step": 162420 }, { "epoch": 5.021017493972924, "grad_norm": 0.09996984899044037, "learning_rate": 1.7490356679235955e-05, "loss": 0.007, "step": 162450 }, { "epoch": 5.021944736354083, "grad_norm": 0.08093591779470444, "learning_rate": 1.7489893058045376e-05, "loss": 0.007, "step": 162480 }, { "epoch": 5.022871978735242, "grad_norm": 0.06658287346363068, "learning_rate": 1.7489429436854795e-05, "loss": 0.0076, "step": 162510 }, { "epoch": 5.0237992211164, "grad_norm": 0.11496284604072571, "learning_rate": 1.7488965815664216e-05, "loss": 0.008, "step": 162540 }, { "epoch": 5.024726463497558, "grad_norm": 0.11016331613063812, "learning_rate": 1.7488502194473638e-05, "loss": 0.0069, "step": 162570 }, { "epoch": 5.0256537058787165, "grad_norm": 0.14720240235328674, "learning_rate": 1.7488038573283056e-05, "loss": 0.0075, "step": 162600 }, { "epoch": 5.026580948259875, "grad_norm": 0.1326962113380432, "learning_rate": 1.7487574952092477e-05, "loss": 0.0077, "step": 162630 }, { "epoch": 5.027508190641034, "grad_norm": 0.1408575028181076, "learning_rate": 1.74871113309019e-05, "loss": 0.0074, "step": 162660 }, { "epoch": 5.028435433022192, "grad_norm": 0.0952037051320076, "learning_rate": 1.748664770971132e-05, "loss": 0.0074, "step": 162690 }, { "epoch": 5.02936267540335, "grad_norm": 0.09775017946958542, "learning_rate": 1.7486184088520742e-05, "loss": 0.0072, "step": 162720 }, { "epoch": 5.030289917784509, "grad_norm": 0.12833741307258606, "learning_rate": 1.748572046733016e-05, "loss": 0.0069, "step": 162750 }, { "epoch": 5.031217160165667, "grad_norm": 0.07549944519996643, "learning_rate": 1.748525684613958e-05, "loss": 0.0077, "step": 162780 }, { "epoch": 5.032144402546826, "grad_norm": 0.08002232015132904, "learning_rate": 1.7484793224949003e-05, "loss": 0.007, "step": 162810 }, { "epoch": 5.033071644927984, "grad_norm": 0.12762358784675598, "learning_rate": 1.7484329603758424e-05, "loss": 0.0067, "step": 162840 }, { "epoch": 5.033998887309143, "grad_norm": 0.1420973241329193, "learning_rate": 1.7483865982567846e-05, "loss": 0.0068, "step": 162870 }, { "epoch": 5.034926129690301, "grad_norm": 0.10876469314098358, "learning_rate": 1.7483402361377267e-05, "loss": 0.0071, "step": 162900 }, { "epoch": 5.035853372071459, "grad_norm": 0.133980855345726, "learning_rate": 1.7482938740186685e-05, "loss": 0.0072, "step": 162930 }, { "epoch": 5.036780614452618, "grad_norm": 0.19485169649124146, "learning_rate": 1.7482475118996107e-05, "loss": 0.0079, "step": 162960 }, { "epoch": 5.0377078568337765, "grad_norm": 0.08569381386041641, "learning_rate": 1.748201149780553e-05, "loss": 0.0064, "step": 162990 }, { "epoch": 5.038635099214935, "grad_norm": 0.1523876041173935, "learning_rate": 1.7481563330654633e-05, "loss": 0.0073, "step": 163020 }, { "epoch": 5.039562341596093, "grad_norm": 0.10451126843690872, "learning_rate": 1.7481099709464055e-05, "loss": 0.007, "step": 163050 }, { "epoch": 5.040489583977251, "grad_norm": 0.09862964600324631, "learning_rate": 1.7480636088273476e-05, "loss": 0.0071, "step": 163080 }, { "epoch": 5.04141682635841, "grad_norm": 0.12436240166425705, "learning_rate": 1.7480172467082898e-05, "loss": 0.0079, "step": 163110 }, { "epoch": 5.042344068739569, "grad_norm": 0.11412505060434341, "learning_rate": 1.747970884589232e-05, "loss": 0.0069, "step": 163140 }, { "epoch": 5.043271311120727, "grad_norm": 0.13689827919006348, "learning_rate": 1.7479245224701737e-05, "loss": 0.007, "step": 163170 }, { "epoch": 5.044198553501886, "grad_norm": 0.0898902639746666, "learning_rate": 1.747878160351116e-05, "loss": 0.0075, "step": 163200 }, { "epoch": 5.0451257958830436, "grad_norm": 0.0855514407157898, "learning_rate": 1.747831798232058e-05, "loss": 0.0074, "step": 163230 }, { "epoch": 5.046053038264202, "grad_norm": 0.10328518599271774, "learning_rate": 1.747785436113e-05, "loss": 0.0068, "step": 163260 }, { "epoch": 5.046980280645361, "grad_norm": 0.11578080803155899, "learning_rate": 1.7477390739939423e-05, "loss": 0.0068, "step": 163290 }, { "epoch": 5.047907523026519, "grad_norm": 0.09116586297750473, "learning_rate": 1.7476927118748845e-05, "loss": 0.0068, "step": 163320 }, { "epoch": 5.048834765407678, "grad_norm": 0.08337472379207611, "learning_rate": 1.7476463497558263e-05, "loss": 0.0074, "step": 163350 }, { "epoch": 5.049762007788836, "grad_norm": 0.6102595925331116, "learning_rate": 1.7475999876367684e-05, "loss": 0.0071, "step": 163380 }, { "epoch": 5.050689250169994, "grad_norm": 0.18072733283042908, "learning_rate": 1.7475536255177106e-05, "loss": 0.0077, "step": 163410 }, { "epoch": 5.051616492551153, "grad_norm": 0.06674933433532715, "learning_rate": 1.7475072633986524e-05, "loss": 0.008, "step": 163440 }, { "epoch": 5.0525437349323115, "grad_norm": 0.11303811520338058, "learning_rate": 1.7474609012795945e-05, "loss": 0.0072, "step": 163470 }, { "epoch": 5.05347097731347, "grad_norm": 0.12720175087451935, "learning_rate": 1.7474145391605367e-05, "loss": 0.0068, "step": 163500 }, { "epoch": 5.054398219694628, "grad_norm": 0.10688415169715881, "learning_rate": 1.747368177041479e-05, "loss": 0.0067, "step": 163530 }, { "epoch": 5.055325462075786, "grad_norm": 0.0747612863779068, "learning_rate": 1.747321814922421e-05, "loss": 0.007, "step": 163560 }, { "epoch": 5.056252704456945, "grad_norm": 0.10699962824583054, "learning_rate": 1.7472754528033628e-05, "loss": 0.0068, "step": 163590 }, { "epoch": 5.057179946838104, "grad_norm": 0.1230161264538765, "learning_rate": 1.747229090684305e-05, "loss": 0.0077, "step": 163620 }, { "epoch": 5.058107189219262, "grad_norm": 0.12107528746128082, "learning_rate": 1.747182728565247e-05, "loss": 0.007, "step": 163650 }, { "epoch": 5.059034431600421, "grad_norm": 0.09405803680419922, "learning_rate": 1.7471363664461893e-05, "loss": 0.0075, "step": 163680 }, { "epoch": 5.0599616739815785, "grad_norm": 0.11002689599990845, "learning_rate": 1.7470900043271314e-05, "loss": 0.0078, "step": 163710 }, { "epoch": 5.060888916362737, "grad_norm": 0.10908028483390808, "learning_rate": 1.7470436422080736e-05, "loss": 0.0071, "step": 163740 }, { "epoch": 5.061816158743896, "grad_norm": 0.14670707285404205, "learning_rate": 1.7469972800890154e-05, "loss": 0.0076, "step": 163770 }, { "epoch": 5.062743401125054, "grad_norm": 0.1266733855009079, "learning_rate": 1.7469509179699575e-05, "loss": 0.007, "step": 163800 }, { "epoch": 5.063670643506213, "grad_norm": 0.12980753183364868, "learning_rate": 1.7469045558508993e-05, "loss": 0.0075, "step": 163830 }, { "epoch": 5.064597885887371, "grad_norm": 0.13860632479190826, "learning_rate": 1.7468581937318415e-05, "loss": 0.0073, "step": 163860 }, { "epoch": 5.065525128268529, "grad_norm": 0.12370602786540985, "learning_rate": 1.7468118316127836e-05, "loss": 0.0078, "step": 163890 }, { "epoch": 5.066452370649688, "grad_norm": 0.13028383255004883, "learning_rate": 1.7467654694937258e-05, "loss": 0.0082, "step": 163920 }, { "epoch": 5.067379613030846, "grad_norm": 0.12377215176820755, "learning_rate": 1.746719107374668e-05, "loss": 0.0079, "step": 163950 }, { "epoch": 5.068306855412005, "grad_norm": 0.11745817214250565, "learning_rate": 1.74667274525561e-05, "loss": 0.007, "step": 163980 }, { "epoch": 5.069234097793163, "grad_norm": 0.17074927687644958, "learning_rate": 1.746626383136552e-05, "loss": 0.0062, "step": 164010 }, { "epoch": 5.070161340174321, "grad_norm": 0.144013449549675, "learning_rate": 1.746580021017494e-05, "loss": 0.0074, "step": 164040 }, { "epoch": 5.07108858255548, "grad_norm": 0.24484704434871674, "learning_rate": 1.7465336588984362e-05, "loss": 0.0075, "step": 164070 }, { "epoch": 5.0720158249366385, "grad_norm": 0.15789452195167542, "learning_rate": 1.7464872967793784e-05, "loss": 0.0075, "step": 164100 }, { "epoch": 5.072943067317797, "grad_norm": 0.07556537538766861, "learning_rate": 1.7464409346603205e-05, "loss": 0.0071, "step": 164130 }, { "epoch": 5.073870309698956, "grad_norm": 0.07130338251590729, "learning_rate": 1.7463945725412623e-05, "loss": 0.0071, "step": 164160 }, { "epoch": 5.074797552080113, "grad_norm": 0.17026391625404358, "learning_rate": 1.7463482104222045e-05, "loss": 0.0072, "step": 164190 }, { "epoch": 5.075724794461272, "grad_norm": 0.11512193083763123, "learning_rate": 1.7463018483031466e-05, "loss": 0.0069, "step": 164220 }, { "epoch": 5.076652036842431, "grad_norm": 0.10403994470834732, "learning_rate": 1.7462554861840884e-05, "loss": 0.007, "step": 164250 }, { "epoch": 5.077579279223589, "grad_norm": 0.12723125517368317, "learning_rate": 1.7462091240650306e-05, "loss": 0.0075, "step": 164280 }, { "epoch": 5.078506521604748, "grad_norm": 0.09073852747678757, "learning_rate": 1.7461627619459727e-05, "loss": 0.0079, "step": 164310 }, { "epoch": 5.0794337639859055, "grad_norm": 0.14256513118743896, "learning_rate": 1.746116399826915e-05, "loss": 0.0072, "step": 164340 }, { "epoch": 5.080361006367064, "grad_norm": 0.09849561750888824, "learning_rate": 1.746070037707857e-05, "loss": 0.0073, "step": 164370 }, { "epoch": 5.081288248748223, "grad_norm": 0.09311923384666443, "learning_rate": 1.7460236755887992e-05, "loss": 0.0078, "step": 164400 }, { "epoch": 5.082215491129381, "grad_norm": 0.12854430079460144, "learning_rate": 1.745977313469741e-05, "loss": 0.0075, "step": 164430 }, { "epoch": 5.08314273351054, "grad_norm": 0.06727920472621918, "learning_rate": 1.745930951350683e-05, "loss": 0.0073, "step": 164460 }, { "epoch": 5.0840699758916985, "grad_norm": 0.14005936682224274, "learning_rate": 1.7458845892316253e-05, "loss": 0.0073, "step": 164490 }, { "epoch": 5.084997218272856, "grad_norm": 0.17444783449172974, "learning_rate": 1.7458382271125674e-05, "loss": 0.0073, "step": 164520 }, { "epoch": 5.085924460654015, "grad_norm": 0.1360025405883789, "learning_rate": 1.7457918649935096e-05, "loss": 0.0075, "step": 164550 }, { "epoch": 5.086851703035173, "grad_norm": 0.17125855386257172, "learning_rate": 1.7457455028744514e-05, "loss": 0.0077, "step": 164580 }, { "epoch": 5.087778945416332, "grad_norm": 0.15304091572761536, "learning_rate": 1.7456991407553936e-05, "loss": 0.0073, "step": 164610 }, { "epoch": 5.088706187797491, "grad_norm": 0.10759209841489792, "learning_rate": 1.7456527786363357e-05, "loss": 0.0073, "step": 164640 }, { "epoch": 5.089633430178648, "grad_norm": 0.1492759883403778, "learning_rate": 1.745606416517278e-05, "loss": 0.0072, "step": 164670 }, { "epoch": 5.090560672559807, "grad_norm": 0.13886307179927826, "learning_rate": 1.74556005439822e-05, "loss": 0.0079, "step": 164700 }, { "epoch": 5.0914879149409655, "grad_norm": 0.10760897397994995, "learning_rate": 1.745513692279162e-05, "loss": 0.0071, "step": 164730 }, { "epoch": 5.092415157322124, "grad_norm": 0.14359265565872192, "learning_rate": 1.745467330160104e-05, "loss": 0.0073, "step": 164760 }, { "epoch": 5.093342399703283, "grad_norm": 0.10283003002405167, "learning_rate": 1.745420968041046e-05, "loss": 0.0077, "step": 164790 }, { "epoch": 5.09426964208444, "grad_norm": 0.1169285848736763, "learning_rate": 1.745374605921988e-05, "loss": 0.0072, "step": 164820 }, { "epoch": 5.095196884465599, "grad_norm": 0.1186181977391243, "learning_rate": 1.74532824380293e-05, "loss": 0.0075, "step": 164850 }, { "epoch": 5.096124126846758, "grad_norm": 0.13259395956993103, "learning_rate": 1.7452818816838722e-05, "loss": 0.0074, "step": 164880 }, { "epoch": 5.097051369227916, "grad_norm": 0.13466718792915344, "learning_rate": 1.7452355195648144e-05, "loss": 0.0076, "step": 164910 }, { "epoch": 5.097978611609075, "grad_norm": 0.17252092063426971, "learning_rate": 1.7451891574457565e-05, "loss": 0.0079, "step": 164940 }, { "epoch": 5.098905853990233, "grad_norm": 0.10738153755664825, "learning_rate": 1.7451427953266987e-05, "loss": 0.0074, "step": 164970 }, { "epoch": 5.099833096371391, "grad_norm": 0.07756676524877548, "learning_rate": 1.7450964332076405e-05, "loss": 0.0081, "step": 165000 }, { "epoch": 5.10076033875255, "grad_norm": 0.08447550982236862, "learning_rate": 1.7450500710885826e-05, "loss": 0.0074, "step": 165030 }, { "epoch": 5.101687581133708, "grad_norm": 0.07752320915460587, "learning_rate": 1.7450037089695248e-05, "loss": 0.0077, "step": 165060 }, { "epoch": 5.102614823514867, "grad_norm": 0.24540716409683228, "learning_rate": 1.744957346850467e-05, "loss": 0.0081, "step": 165090 }, { "epoch": 5.1035420658960255, "grad_norm": 0.10456685721874237, "learning_rate": 1.744910984731409e-05, "loss": 0.0075, "step": 165120 }, { "epoch": 5.104469308277183, "grad_norm": 0.16637426614761353, "learning_rate": 1.7448646226123513e-05, "loss": 0.0081, "step": 165150 }, { "epoch": 5.105396550658342, "grad_norm": 0.15094329416751862, "learning_rate": 1.744818260493293e-05, "loss": 0.0079, "step": 165180 }, { "epoch": 5.1063237930395005, "grad_norm": 0.08841516077518463, "learning_rate": 1.7447718983742352e-05, "loss": 0.0077, "step": 165210 }, { "epoch": 5.107251035420659, "grad_norm": 0.16788113117218018, "learning_rate": 1.744727081659146e-05, "loss": 0.0075, "step": 165240 }, { "epoch": 5.108178277801818, "grad_norm": 0.17193889617919922, "learning_rate": 1.7446807195400878e-05, "loss": 0.0071, "step": 165270 }, { "epoch": 5.109105520182976, "grad_norm": 0.11391213536262512, "learning_rate": 1.74463435742103e-05, "loss": 0.0065, "step": 165300 }, { "epoch": 5.110032762564134, "grad_norm": 0.18383660912513733, "learning_rate": 1.744587995301972e-05, "loss": 0.0072, "step": 165330 }, { "epoch": 5.110960004945293, "grad_norm": 0.13283470273017883, "learning_rate": 1.7445416331829143e-05, "loss": 0.007, "step": 165360 }, { "epoch": 5.111887247326451, "grad_norm": 0.1123579889535904, "learning_rate": 1.7444952710638564e-05, "loss": 0.0074, "step": 165390 }, { "epoch": 5.11281448970761, "grad_norm": 0.1357964128255844, "learning_rate": 1.7444489089447982e-05, "loss": 0.0071, "step": 165420 }, { "epoch": 5.113741732088768, "grad_norm": 0.12424826622009277, "learning_rate": 1.7444025468257404e-05, "loss": 0.0074, "step": 165450 }, { "epoch": 5.114668974469926, "grad_norm": 0.1345798224210739, "learning_rate": 1.7443561847066825e-05, "loss": 0.0076, "step": 165480 }, { "epoch": 5.115596216851085, "grad_norm": 0.09903095662593842, "learning_rate": 1.7443098225876243e-05, "loss": 0.0077, "step": 165510 }, { "epoch": 5.116523459232243, "grad_norm": 0.15518023073673248, "learning_rate": 1.7442634604685665e-05, "loss": 0.0071, "step": 165540 }, { "epoch": 5.117450701613402, "grad_norm": 0.15539956092834473, "learning_rate": 1.7442170983495086e-05, "loss": 0.0081, "step": 165570 }, { "epoch": 5.1183779439945605, "grad_norm": 0.13546158373355865, "learning_rate": 1.7441707362304508e-05, "loss": 0.0075, "step": 165600 }, { "epoch": 5.119305186375718, "grad_norm": 0.12742462754249573, "learning_rate": 1.744124374111393e-05, "loss": 0.0074, "step": 165630 }, { "epoch": 5.120232428756877, "grad_norm": 0.1267356276512146, "learning_rate": 1.7440780119923348e-05, "loss": 0.0071, "step": 165660 }, { "epoch": 5.121159671138035, "grad_norm": 0.11980008333921432, "learning_rate": 1.744031649873277e-05, "loss": 0.0079, "step": 165690 }, { "epoch": 5.122086913519194, "grad_norm": 0.1788824051618576, "learning_rate": 1.743985287754219e-05, "loss": 0.0077, "step": 165720 }, { "epoch": 5.123014155900353, "grad_norm": 0.08346962928771973, "learning_rate": 1.7439389256351612e-05, "loss": 0.0076, "step": 165750 }, { "epoch": 5.123941398281511, "grad_norm": 0.137568861246109, "learning_rate": 1.7438925635161034e-05, "loss": 0.0073, "step": 165780 }, { "epoch": 5.124868640662669, "grad_norm": 0.12472490966320038, "learning_rate": 1.7438462013970455e-05, "loss": 0.0071, "step": 165810 }, { "epoch": 5.1257958830438275, "grad_norm": 0.07995685935020447, "learning_rate": 1.7437998392779873e-05, "loss": 0.0071, "step": 165840 }, { "epoch": 5.126723125424986, "grad_norm": 0.08601266890764236, "learning_rate": 1.7437534771589295e-05, "loss": 0.0079, "step": 165870 }, { "epoch": 5.127650367806145, "grad_norm": 0.12779271602630615, "learning_rate": 1.7437071150398716e-05, "loss": 0.0074, "step": 165900 }, { "epoch": 5.128577610187303, "grad_norm": 0.10279455780982971, "learning_rate": 1.7436607529208134e-05, "loss": 0.0077, "step": 165930 }, { "epoch": 5.129504852568461, "grad_norm": 0.1062554195523262, "learning_rate": 1.743614390801756e-05, "loss": 0.007, "step": 165960 }, { "epoch": 5.13043209494962, "grad_norm": 0.16124632954597473, "learning_rate": 1.7435680286826977e-05, "loss": 0.0068, "step": 165990 }, { "epoch": 5.131359337330778, "grad_norm": 0.16949543356895447, "learning_rate": 1.74352166656364e-05, "loss": 0.0075, "step": 166020 }, { "epoch": 5.132286579711937, "grad_norm": 0.09112982451915741, "learning_rate": 1.743475304444582e-05, "loss": 0.007, "step": 166050 }, { "epoch": 5.133213822093095, "grad_norm": 0.13539579510688782, "learning_rate": 1.743428942325524e-05, "loss": 0.0071, "step": 166080 }, { "epoch": 5.134141064474253, "grad_norm": 0.09961791336536407, "learning_rate": 1.743382580206466e-05, "loss": 0.0075, "step": 166110 }, { "epoch": 5.135068306855412, "grad_norm": 0.141546830534935, "learning_rate": 1.743336218087408e-05, "loss": 0.0075, "step": 166140 }, { "epoch": 5.13599554923657, "grad_norm": 0.17504483461380005, "learning_rate": 1.7432898559683503e-05, "loss": 0.0078, "step": 166170 }, { "epoch": 5.136922791617729, "grad_norm": 0.09951739013195038, "learning_rate": 1.7432434938492925e-05, "loss": 0.0072, "step": 166200 }, { "epoch": 5.1378500339988875, "grad_norm": 0.17419187724590302, "learning_rate": 1.7431971317302346e-05, "loss": 0.0067, "step": 166230 }, { "epoch": 5.138777276380046, "grad_norm": 0.0873958095908165, "learning_rate": 1.7431507696111764e-05, "loss": 0.0075, "step": 166260 }, { "epoch": 5.139704518761204, "grad_norm": 0.1389508694410324, "learning_rate": 1.7431044074921186e-05, "loss": 0.0079, "step": 166290 }, { "epoch": 5.140631761142362, "grad_norm": 0.11854586005210876, "learning_rate": 1.7430580453730607e-05, "loss": 0.0074, "step": 166320 }, { "epoch": 5.141559003523521, "grad_norm": 0.14368349313735962, "learning_rate": 1.743013228657971e-05, "loss": 0.0066, "step": 166350 }, { "epoch": 5.14248624590468, "grad_norm": 0.14001230895519257, "learning_rate": 1.7429668665389133e-05, "loss": 0.0072, "step": 166380 }, { "epoch": 5.143413488285838, "grad_norm": 0.09316588938236237, "learning_rate": 1.7429205044198555e-05, "loss": 0.0074, "step": 166410 }, { "epoch": 5.144340730666996, "grad_norm": 0.1694176346063614, "learning_rate": 1.7428741423007976e-05, "loss": 0.008, "step": 166440 }, { "epoch": 5.1452679730481545, "grad_norm": 0.1277886927127838, "learning_rate": 1.7428277801817398e-05, "loss": 0.0076, "step": 166470 }, { "epoch": 5.146195215429313, "grad_norm": 0.11884039640426636, "learning_rate": 1.7427814180626816e-05, "loss": 0.007, "step": 166500 }, { "epoch": 5.147122457810472, "grad_norm": 0.07787522673606873, "learning_rate": 1.7427350559436237e-05, "loss": 0.0074, "step": 166530 }, { "epoch": 5.14804970019163, "grad_norm": 0.0815851166844368, "learning_rate": 1.742688693824566e-05, "loss": 0.007, "step": 166560 }, { "epoch": 5.148976942572789, "grad_norm": 0.12260587513446808, "learning_rate": 1.742642331705508e-05, "loss": 0.0071, "step": 166590 }, { "epoch": 5.149904184953947, "grad_norm": 0.15411196649074554, "learning_rate": 1.7425959695864502e-05, "loss": 0.0071, "step": 166620 }, { "epoch": 5.150831427335105, "grad_norm": 0.1154007762670517, "learning_rate": 1.7425496074673923e-05, "loss": 0.0077, "step": 166650 }, { "epoch": 5.151758669716264, "grad_norm": 0.09367243200540543, "learning_rate": 1.742503245348334e-05, "loss": 0.0069, "step": 166680 }, { "epoch": 5.152685912097422, "grad_norm": 0.1431475579738617, "learning_rate": 1.7424568832292763e-05, "loss": 0.0076, "step": 166710 }, { "epoch": 5.153613154478581, "grad_norm": 0.11867686361074448, "learning_rate": 1.742410521110218e-05, "loss": 0.0073, "step": 166740 }, { "epoch": 5.154540396859739, "grad_norm": 4.45499324798584, "learning_rate": 1.7423641589911603e-05, "loss": 0.0076, "step": 166770 }, { "epoch": 5.155467639240897, "grad_norm": 0.09311158955097198, "learning_rate": 1.7423177968721024e-05, "loss": 0.0074, "step": 166800 }, { "epoch": 5.156394881622056, "grad_norm": 0.0974188894033432, "learning_rate": 1.7422714347530446e-05, "loss": 0.0077, "step": 166830 }, { "epoch": 5.1573221240032145, "grad_norm": 0.10785020142793655, "learning_rate": 1.7422250726339867e-05, "loss": 0.007, "step": 166860 }, { "epoch": 5.158249366384373, "grad_norm": 0.15054363012313843, "learning_rate": 1.742178710514929e-05, "loss": 0.007, "step": 166890 }, { "epoch": 5.159176608765531, "grad_norm": 0.09987311065196991, "learning_rate": 1.7421323483958707e-05, "loss": 0.0064, "step": 166920 }, { "epoch": 5.1601038511466895, "grad_norm": 0.1304345428943634, "learning_rate": 1.7420859862768128e-05, "loss": 0.0065, "step": 166950 }, { "epoch": 5.161031093527848, "grad_norm": 0.1621256321668625, "learning_rate": 1.742039624157755e-05, "loss": 0.0067, "step": 166980 }, { "epoch": 5.161958335909007, "grad_norm": 0.10130096226930618, "learning_rate": 1.741993262038697e-05, "loss": 0.0072, "step": 167010 }, { "epoch": 5.162885578290165, "grad_norm": 0.10856659710407257, "learning_rate": 1.7419468999196393e-05, "loss": 0.0077, "step": 167040 }, { "epoch": 5.163812820671324, "grad_norm": 0.13915391266345978, "learning_rate": 1.741900537800581e-05, "loss": 0.0077, "step": 167070 }, { "epoch": 5.164740063052482, "grad_norm": 0.4586617052555084, "learning_rate": 1.7418541756815232e-05, "loss": 0.0076, "step": 167100 }, { "epoch": 5.16566730543364, "grad_norm": 0.14038127660751343, "learning_rate": 1.7418078135624654e-05, "loss": 0.0064, "step": 167130 }, { "epoch": 5.166594547814799, "grad_norm": 0.1345939040184021, "learning_rate": 1.7417614514434072e-05, "loss": 0.0079, "step": 167160 }, { "epoch": 5.167521790195957, "grad_norm": 0.12722498178482056, "learning_rate": 1.7417150893243494e-05, "loss": 0.0066, "step": 167190 }, { "epoch": 5.168449032577116, "grad_norm": 0.14832627773284912, "learning_rate": 1.741668727205292e-05, "loss": 0.0075, "step": 167220 }, { "epoch": 5.169376274958274, "grad_norm": 0.08888151496648788, "learning_rate": 1.7416223650862337e-05, "loss": 0.0067, "step": 167250 }, { "epoch": 5.170303517339432, "grad_norm": 0.12245669960975647, "learning_rate": 1.7415760029671758e-05, "loss": 0.0069, "step": 167280 }, { "epoch": 5.171230759720591, "grad_norm": 0.17811518907546997, "learning_rate": 1.741529640848118e-05, "loss": 0.0073, "step": 167310 }, { "epoch": 5.1721580021017495, "grad_norm": 0.13326632976531982, "learning_rate": 1.7414832787290598e-05, "loss": 0.0072, "step": 167340 }, { "epoch": 5.173085244482908, "grad_norm": 0.14082370698451996, "learning_rate": 1.741436916610002e-05, "loss": 0.0074, "step": 167370 }, { "epoch": 5.174012486864067, "grad_norm": 0.1404765099287033, "learning_rate": 1.741390554490944e-05, "loss": 0.007, "step": 167400 }, { "epoch": 5.174939729245224, "grad_norm": 0.15010522305965424, "learning_rate": 1.7413441923718862e-05, "loss": 0.0071, "step": 167430 }, { "epoch": 5.175866971626383, "grad_norm": 0.09972722083330154, "learning_rate": 1.7412978302528284e-05, "loss": 0.0074, "step": 167460 }, { "epoch": 5.176794214007542, "grad_norm": 0.12072300910949707, "learning_rate": 1.7412514681337702e-05, "loss": 0.0068, "step": 167490 }, { "epoch": 5.1777214563887, "grad_norm": 0.18630647659301758, "learning_rate": 1.7412051060147123e-05, "loss": 0.0067, "step": 167520 }, { "epoch": 5.178648698769859, "grad_norm": 0.12568801641464233, "learning_rate": 1.7411587438956545e-05, "loss": 0.0069, "step": 167550 }, { "epoch": 5.1795759411510165, "grad_norm": 0.15762364864349365, "learning_rate": 1.7411123817765966e-05, "loss": 0.0066, "step": 167580 }, { "epoch": 5.180503183532175, "grad_norm": 0.08654771745204926, "learning_rate": 1.7410660196575388e-05, "loss": 0.0068, "step": 167610 }, { "epoch": 5.181430425913334, "grad_norm": 0.2841488718986511, "learning_rate": 1.741019657538481e-05, "loss": 0.0075, "step": 167640 }, { "epoch": 5.182357668294492, "grad_norm": 0.13687977194786072, "learning_rate": 1.7409732954194227e-05, "loss": 0.0074, "step": 167670 }, { "epoch": 5.183284910675651, "grad_norm": 0.14212185144424438, "learning_rate": 1.740926933300365e-05, "loss": 0.0075, "step": 167700 }, { "epoch": 5.184212153056809, "grad_norm": 0.13830214738845825, "learning_rate": 1.740880571181307e-05, "loss": 0.0067, "step": 167730 }, { "epoch": 5.185139395437967, "grad_norm": 0.12083302438259125, "learning_rate": 1.740834209062249e-05, "loss": 0.0075, "step": 167760 }, { "epoch": 5.186066637819126, "grad_norm": 0.0954093411564827, "learning_rate": 1.740787846943191e-05, "loss": 0.0069, "step": 167790 }, { "epoch": 5.186993880200284, "grad_norm": 0.2152736335992813, "learning_rate": 1.740741484824133e-05, "loss": 0.0077, "step": 167820 }, { "epoch": 5.187921122581443, "grad_norm": 0.15129947662353516, "learning_rate": 1.7406951227050753e-05, "loss": 0.0076, "step": 167850 }, { "epoch": 5.188848364962602, "grad_norm": 0.1374928504228592, "learning_rate": 1.7406487605860175e-05, "loss": 0.0078, "step": 167880 }, { "epoch": 5.189775607343759, "grad_norm": 0.1588524878025055, "learning_rate": 1.7406023984669593e-05, "loss": 0.0072, "step": 167910 }, { "epoch": 5.190702849724918, "grad_norm": 0.17594707012176514, "learning_rate": 1.7405560363479014e-05, "loss": 0.007, "step": 167940 }, { "epoch": 5.1916300921060765, "grad_norm": 0.15635521709918976, "learning_rate": 1.7405096742288436e-05, "loss": 0.0074, "step": 167970 }, { "epoch": 5.192557334487235, "grad_norm": 0.1073574423789978, "learning_rate": 1.7404633121097857e-05, "loss": 0.0076, "step": 168000 }, { "epoch": 5.193484576868394, "grad_norm": 0.1100224182009697, "learning_rate": 1.740416949990728e-05, "loss": 0.0072, "step": 168030 }, { "epoch": 5.194411819249551, "grad_norm": 0.19316549599170685, "learning_rate": 1.74037058787167e-05, "loss": 0.0077, "step": 168060 }, { "epoch": 5.19533906163071, "grad_norm": 0.09285102039575577, "learning_rate": 1.740324225752612e-05, "loss": 0.0064, "step": 168090 }, { "epoch": 5.196266304011869, "grad_norm": 0.14178244769573212, "learning_rate": 1.740277863633554e-05, "loss": 0.0075, "step": 168120 }, { "epoch": 5.197193546393027, "grad_norm": 0.11265362799167633, "learning_rate": 1.7402315015144958e-05, "loss": 0.0075, "step": 168150 }, { "epoch": 5.198120788774186, "grad_norm": 0.08118470758199692, "learning_rate": 1.740185139395438e-05, "loss": 0.0073, "step": 168180 }, { "epoch": 5.199048031155344, "grad_norm": 0.12226169556379318, "learning_rate": 1.74013877727638e-05, "loss": 0.0068, "step": 168210 }, { "epoch": 5.199975273536502, "grad_norm": 0.12156407535076141, "learning_rate": 1.7400924151573223e-05, "loss": 0.0073, "step": 168240 }, { "epoch": 5.200902515917661, "grad_norm": 0.09186214953660965, "learning_rate": 1.7400460530382644e-05, "loss": 0.0073, "step": 168270 }, { "epoch": 5.201829758298819, "grad_norm": 0.12078210711479187, "learning_rate": 1.7399996909192066e-05, "loss": 0.0067, "step": 168300 }, { "epoch": 5.202757000679978, "grad_norm": 0.19772404432296753, "learning_rate": 1.7399533288001484e-05, "loss": 0.007, "step": 168330 }, { "epoch": 5.2036842430611365, "grad_norm": 0.10745128244161606, "learning_rate": 1.7399069666810905e-05, "loss": 0.0083, "step": 168360 }, { "epoch": 5.204611485442294, "grad_norm": 0.12598110735416412, "learning_rate": 1.7398606045620327e-05, "loss": 0.0079, "step": 168390 }, { "epoch": 5.205538727823453, "grad_norm": 0.1103743314743042, "learning_rate": 1.7398142424429748e-05, "loss": 0.008, "step": 168420 }, { "epoch": 5.206465970204611, "grad_norm": 0.12530331313610077, "learning_rate": 1.739767880323917e-05, "loss": 0.0069, "step": 168450 }, { "epoch": 5.20739321258577, "grad_norm": 0.1265975534915924, "learning_rate": 1.7397215182048588e-05, "loss": 0.0076, "step": 168480 }, { "epoch": 5.208320454966929, "grad_norm": 0.1328212320804596, "learning_rate": 1.739675156085801e-05, "loss": 0.0075, "step": 168510 }, { "epoch": 5.209247697348086, "grad_norm": 0.13522428274154663, "learning_rate": 1.739628793966743e-05, "loss": 0.0079, "step": 168540 }, { "epoch": 5.210174939729245, "grad_norm": 0.15003710985183716, "learning_rate": 1.739582431847685e-05, "loss": 0.0086, "step": 168570 }, { "epoch": 5.2111021821104035, "grad_norm": 0.09822821617126465, "learning_rate": 1.739536069728627e-05, "loss": 0.0073, "step": 168600 }, { "epoch": 5.212029424491562, "grad_norm": 0.13021592795848846, "learning_rate": 1.7394897076095695e-05, "loss": 0.0073, "step": 168630 }, { "epoch": 5.212956666872721, "grad_norm": 0.05547688528895378, "learning_rate": 1.7394433454905113e-05, "loss": 0.0076, "step": 168660 }, { "epoch": 5.213883909253879, "grad_norm": 0.11997146159410477, "learning_rate": 1.7393969833714535e-05, "loss": 0.0066, "step": 168690 }, { "epoch": 5.214811151635037, "grad_norm": 0.10908771306276321, "learning_rate": 1.7393506212523956e-05, "loss": 0.0071, "step": 168720 }, { "epoch": 5.215738394016196, "grad_norm": 0.14629589021205902, "learning_rate": 1.7393042591333375e-05, "loss": 0.0078, "step": 168750 }, { "epoch": 5.216665636397354, "grad_norm": 0.12380073219537735, "learning_rate": 1.7392578970142796e-05, "loss": 0.0075, "step": 168780 }, { "epoch": 5.217592878778513, "grad_norm": 0.12511269748210907, "learning_rate": 1.7392115348952218e-05, "loss": 0.007, "step": 168810 }, { "epoch": 5.2185201211596715, "grad_norm": 0.12924128770828247, "learning_rate": 1.739165172776164e-05, "loss": 0.0072, "step": 168840 }, { "epoch": 5.219447363540829, "grad_norm": 0.12245651334524155, "learning_rate": 1.739118810657106e-05, "loss": 0.0067, "step": 168870 }, { "epoch": 5.220374605921988, "grad_norm": 0.20127829909324646, "learning_rate": 1.739072448538048e-05, "loss": 0.0078, "step": 168900 }, { "epoch": 5.221301848303146, "grad_norm": 0.09144824743270874, "learning_rate": 1.73902608641899e-05, "loss": 0.0068, "step": 168930 }, { "epoch": 5.222229090684305, "grad_norm": 0.10424090176820755, "learning_rate": 1.7389797242999322e-05, "loss": 0.0072, "step": 168960 }, { "epoch": 5.223156333065464, "grad_norm": 0.10076867043972015, "learning_rate": 1.7389333621808743e-05, "loss": 0.0064, "step": 168990 }, { "epoch": 5.224083575446622, "grad_norm": 0.12170730531215668, "learning_rate": 1.7388870000618165e-05, "loss": 0.0064, "step": 169020 }, { "epoch": 5.22501081782778, "grad_norm": 0.12312142550945282, "learning_rate": 1.7388406379427586e-05, "loss": 0.0075, "step": 169050 }, { "epoch": 5.2259380602089385, "grad_norm": 0.112940214574337, "learning_rate": 1.7387942758237004e-05, "loss": 0.007, "step": 169080 }, { "epoch": 5.226865302590097, "grad_norm": 0.1840941458940506, "learning_rate": 1.7387479137046426e-05, "loss": 0.0075, "step": 169110 }, { "epoch": 5.227792544971256, "grad_norm": 0.09877459704875946, "learning_rate": 1.7387015515855844e-05, "loss": 0.0069, "step": 169140 }, { "epoch": 5.228719787352414, "grad_norm": 0.12946750223636627, "learning_rate": 1.7386551894665265e-05, "loss": 0.0073, "step": 169170 }, { "epoch": 5.229647029733572, "grad_norm": 0.1293441504240036, "learning_rate": 1.7386088273474687e-05, "loss": 0.0073, "step": 169200 }, { "epoch": 5.230574272114731, "grad_norm": 0.11985059827566147, "learning_rate": 1.738562465228411e-05, "loss": 0.0075, "step": 169230 }, { "epoch": 5.231501514495889, "grad_norm": 0.11521230638027191, "learning_rate": 1.738516103109353e-05, "loss": 0.0078, "step": 169260 }, { "epoch": 5.232428756877048, "grad_norm": 0.18534184992313385, "learning_rate": 1.738469740990295e-05, "loss": 0.0073, "step": 169290 }, { "epoch": 5.233355999258206, "grad_norm": 0.14262552559375763, "learning_rate": 1.738423378871237e-05, "loss": 0.007, "step": 169320 }, { "epoch": 5.234283241639364, "grad_norm": 0.10144330561161041, "learning_rate": 1.738377016752179e-05, "loss": 0.0074, "step": 169350 }, { "epoch": 5.235210484020523, "grad_norm": 0.16615471243858337, "learning_rate": 1.7383306546331213e-05, "loss": 0.007, "step": 169380 }, { "epoch": 5.236137726401681, "grad_norm": 0.19240570068359375, "learning_rate": 1.7382842925140634e-05, "loss": 0.0063, "step": 169410 }, { "epoch": 5.23706496878284, "grad_norm": 0.1082504540681839, "learning_rate": 1.7382379303950056e-05, "loss": 0.0078, "step": 169440 }, { "epoch": 5.2379922111639985, "grad_norm": 0.058200664818286896, "learning_rate": 1.7381915682759474e-05, "loss": 0.0067, "step": 169470 }, { "epoch": 5.238919453545157, "grad_norm": 0.1864430457353592, "learning_rate": 1.7381452061568895e-05, "loss": 0.0069, "step": 169500 }, { "epoch": 5.239846695926315, "grad_norm": 0.10720140486955643, "learning_rate": 1.7380988440378317e-05, "loss": 0.007, "step": 169530 }, { "epoch": 5.240773938307473, "grad_norm": 0.12139959633350372, "learning_rate": 1.7380524819187735e-05, "loss": 0.0076, "step": 169560 }, { "epoch": 5.241701180688632, "grad_norm": 0.07153069972991943, "learning_rate": 1.7380061197997156e-05, "loss": 0.0076, "step": 169590 }, { "epoch": 5.242628423069791, "grad_norm": 0.07707148045301437, "learning_rate": 1.7379597576806578e-05, "loss": 0.008, "step": 169620 }, { "epoch": 5.243555665450949, "grad_norm": 0.13080501556396484, "learning_rate": 1.7379133955616e-05, "loss": 0.007, "step": 169650 }, { "epoch": 5.244482907832107, "grad_norm": 0.1512555330991745, "learning_rate": 1.737867033442542e-05, "loss": 0.0076, "step": 169680 }, { "epoch": 5.2454101502132655, "grad_norm": 0.14040905237197876, "learning_rate": 1.7378206713234842e-05, "loss": 0.0065, "step": 169710 }, { "epoch": 5.246337392594424, "grad_norm": 0.08456911891698837, "learning_rate": 1.737774309204426e-05, "loss": 0.0072, "step": 169740 }, { "epoch": 5.247264634975583, "grad_norm": 0.10668550431728363, "learning_rate": 1.7377279470853682e-05, "loss": 0.0072, "step": 169770 }, { "epoch": 5.248191877356741, "grad_norm": 0.17553235590457916, "learning_rate": 1.7376815849663104e-05, "loss": 0.0072, "step": 169800 }, { "epoch": 5.2491191197379, "grad_norm": 0.13870394229888916, "learning_rate": 1.7376352228472525e-05, "loss": 0.0075, "step": 169830 }, { "epoch": 5.250046362119058, "grad_norm": 0.2028796672821045, "learning_rate": 1.7375888607281947e-05, "loss": 0.0071, "step": 169860 }, { "epoch": 5.250973604500216, "grad_norm": 0.14775094389915466, "learning_rate": 1.7375424986091365e-05, "loss": 0.0072, "step": 169890 }, { "epoch": 5.251900846881375, "grad_norm": 0.09397595375776291, "learning_rate": 1.7374961364900786e-05, "loss": 0.0071, "step": 169920 }, { "epoch": 5.252828089262533, "grad_norm": 0.08113345503807068, "learning_rate": 1.7374497743710208e-05, "loss": 0.0073, "step": 169950 }, { "epoch": 5.253755331643692, "grad_norm": 0.23353081941604614, "learning_rate": 1.7374034122519626e-05, "loss": 0.0073, "step": 169980 }, { "epoch": 5.25468257402485, "grad_norm": 0.11527610570192337, "learning_rate": 1.7373570501329047e-05, "loss": 0.0072, "step": 170010 }, { "epoch": 5.255609816406008, "grad_norm": 0.11165726184844971, "learning_rate": 1.7373106880138472e-05, "loss": 0.0071, "step": 170040 }, { "epoch": 5.256537058787167, "grad_norm": 0.15063132345676422, "learning_rate": 1.737264325894789e-05, "loss": 0.0081, "step": 170070 }, { "epoch": 5.2574643011683255, "grad_norm": 0.11380444467067719, "learning_rate": 1.7372179637757312e-05, "loss": 0.0071, "step": 170100 }, { "epoch": 5.258391543549484, "grad_norm": 0.11553621292114258, "learning_rate": 1.737171601656673e-05, "loss": 0.0067, "step": 170130 }, { "epoch": 5.259318785930642, "grad_norm": 0.12652269005775452, "learning_rate": 1.737125239537615e-05, "loss": 0.0067, "step": 170160 }, { "epoch": 5.2602460283118, "grad_norm": 0.14327329397201538, "learning_rate": 1.7370788774185573e-05, "loss": 0.0073, "step": 170190 }, { "epoch": 5.261173270692959, "grad_norm": 0.10280174762010574, "learning_rate": 1.7370325152994994e-05, "loss": 0.0068, "step": 170220 }, { "epoch": 5.262100513074118, "grad_norm": 0.09594044089317322, "learning_rate": 1.7369861531804416e-05, "loss": 0.0071, "step": 170250 }, { "epoch": 5.263027755455276, "grad_norm": 0.14584681391716003, "learning_rate": 1.7369397910613837e-05, "loss": 0.0069, "step": 170280 }, { "epoch": 5.263954997836435, "grad_norm": 0.14834164083003998, "learning_rate": 1.7368934289423256e-05, "loss": 0.0076, "step": 170310 }, { "epoch": 5.2648822402175925, "grad_norm": 0.09786476939916611, "learning_rate": 1.7368470668232677e-05, "loss": 0.0082, "step": 170340 }, { "epoch": 5.265809482598751, "grad_norm": 0.09895646572113037, "learning_rate": 1.73680070470421e-05, "loss": 0.0068, "step": 170370 }, { "epoch": 5.26673672497991, "grad_norm": 0.11936220526695251, "learning_rate": 1.736754342585152e-05, "loss": 0.0082, "step": 170400 }, { "epoch": 5.267663967361068, "grad_norm": 0.2265918105840683, "learning_rate": 1.736707980466094e-05, "loss": 0.0072, "step": 170430 }, { "epoch": 5.268591209742227, "grad_norm": 0.15416505932807922, "learning_rate": 1.736661618347036e-05, "loss": 0.0074, "step": 170460 }, { "epoch": 5.269518452123385, "grad_norm": 0.10596991330385208, "learning_rate": 1.736615256227978e-05, "loss": 0.0074, "step": 170490 }, { "epoch": 5.270445694504543, "grad_norm": 0.4043160080909729, "learning_rate": 1.7365688941089203e-05, "loss": 0.0069, "step": 170520 }, { "epoch": 5.271372936885702, "grad_norm": 0.08840671926736832, "learning_rate": 1.736522531989862e-05, "loss": 0.0079, "step": 170550 }, { "epoch": 5.2723001792668605, "grad_norm": 0.09968718886375427, "learning_rate": 1.7364761698708042e-05, "loss": 0.0078, "step": 170580 }, { "epoch": 5.273227421648019, "grad_norm": 0.10957728326320648, "learning_rate": 1.7364298077517464e-05, "loss": 0.0078, "step": 170610 }, { "epoch": 5.274154664029178, "grad_norm": 0.1715749204158783, "learning_rate": 1.7363834456326885e-05, "loss": 0.0072, "step": 170640 }, { "epoch": 5.275081906410335, "grad_norm": 0.10411082208156586, "learning_rate": 1.7363370835136307e-05, "loss": 0.0079, "step": 170670 }, { "epoch": 5.276009148791494, "grad_norm": 0.09669441729784012, "learning_rate": 1.736290721394573e-05, "loss": 0.0074, "step": 170700 }, { "epoch": 5.276936391172653, "grad_norm": 0.12898674607276917, "learning_rate": 1.7362443592755147e-05, "loss": 0.008, "step": 170730 }, { "epoch": 5.277863633553811, "grad_norm": 0.1072465181350708, "learning_rate": 1.7361979971564568e-05, "loss": 0.0077, "step": 170760 }, { "epoch": 5.27879087593497, "grad_norm": 0.11240682750940323, "learning_rate": 1.736151635037399e-05, "loss": 0.0067, "step": 170790 }, { "epoch": 5.2797181183161275, "grad_norm": 0.18139363825321198, "learning_rate": 1.736105272918341e-05, "loss": 0.0076, "step": 170820 }, { "epoch": 5.280645360697286, "grad_norm": 0.08086758106946945, "learning_rate": 1.7360589107992833e-05, "loss": 0.0078, "step": 170850 }, { "epoch": 5.281572603078445, "grad_norm": 0.14431075751781464, "learning_rate": 1.736012548680225e-05, "loss": 0.0074, "step": 170880 }, { "epoch": 5.282499845459603, "grad_norm": 0.12595079839229584, "learning_rate": 1.7359661865611672e-05, "loss": 0.0073, "step": 170910 }, { "epoch": 5.283427087840762, "grad_norm": 0.2026320993900299, "learning_rate": 1.7359198244421094e-05, "loss": 0.008, "step": 170940 }, { "epoch": 5.28435433022192, "grad_norm": 0.16015997529029846, "learning_rate": 1.7358734623230512e-05, "loss": 0.0066, "step": 170970 }, { "epoch": 5.285281572603078, "grad_norm": 0.09314583986997604, "learning_rate": 1.7358271002039933e-05, "loss": 0.0072, "step": 171000 }, { "epoch": 5.286208814984237, "grad_norm": 0.06474786251783371, "learning_rate": 1.7357807380849355e-05, "loss": 0.0063, "step": 171030 }, { "epoch": 5.287136057365395, "grad_norm": 0.11800336837768555, "learning_rate": 1.7357343759658776e-05, "loss": 0.0068, "step": 171060 }, { "epoch": 5.288063299746554, "grad_norm": 0.14357523620128632, "learning_rate": 1.7356880138468198e-05, "loss": 0.0071, "step": 171090 }, { "epoch": 5.288990542127713, "grad_norm": 0.11631201952695847, "learning_rate": 1.735641651727762e-05, "loss": 0.0077, "step": 171120 }, { "epoch": 5.28991778450887, "grad_norm": 0.10498958081007004, "learning_rate": 1.7355952896087037e-05, "loss": 0.0075, "step": 171150 }, { "epoch": 5.290845026890029, "grad_norm": 0.11467855423688889, "learning_rate": 1.735548927489646e-05, "loss": 0.0068, "step": 171180 }, { "epoch": 5.2917722692711875, "grad_norm": 0.0968148335814476, "learning_rate": 1.735502565370588e-05, "loss": 0.0069, "step": 171210 }, { "epoch": 5.292699511652346, "grad_norm": 0.11427681148052216, "learning_rate": 1.7354562032515302e-05, "loss": 0.0077, "step": 171240 }, { "epoch": 5.293626754033505, "grad_norm": 0.08229750394821167, "learning_rate": 1.7354098411324723e-05, "loss": 0.0073, "step": 171270 }, { "epoch": 5.294553996414662, "grad_norm": 0.13058502972126007, "learning_rate": 1.735363479013414e-05, "loss": 0.0075, "step": 171300 }, { "epoch": 5.295481238795821, "grad_norm": 0.10387169569730759, "learning_rate": 1.7353171168943563e-05, "loss": 0.0075, "step": 171330 }, { "epoch": 5.29640848117698, "grad_norm": 0.07158692181110382, "learning_rate": 1.7352707547752985e-05, "loss": 0.007, "step": 171360 }, { "epoch": 5.297335723558138, "grad_norm": 0.11026150733232498, "learning_rate": 1.7352243926562403e-05, "loss": 0.0072, "step": 171390 }, { "epoch": 5.298262965939297, "grad_norm": 0.13845320045948029, "learning_rate": 1.7351780305371824e-05, "loss": 0.0081, "step": 171420 }, { "epoch": 5.299190208320455, "grad_norm": 0.12144298106431961, "learning_rate": 1.735131668418125e-05, "loss": 0.0073, "step": 171450 }, { "epoch": 5.300117450701613, "grad_norm": 0.12270079553127289, "learning_rate": 1.7350853062990667e-05, "loss": 0.0073, "step": 171480 }, { "epoch": 5.301044693082772, "grad_norm": 0.1005663275718689, "learning_rate": 1.735038944180009e-05, "loss": 0.0072, "step": 171510 }, { "epoch": 5.30197193546393, "grad_norm": 0.10024897754192352, "learning_rate": 1.7349925820609507e-05, "loss": 0.0071, "step": 171540 }, { "epoch": 5.302899177845089, "grad_norm": 0.09814739972352982, "learning_rate": 1.734946219941893e-05, "loss": 0.0071, "step": 171570 }, { "epoch": 5.3038264202262475, "grad_norm": 0.11732474714517593, "learning_rate": 1.734899857822835e-05, "loss": 0.0063, "step": 171600 }, { "epoch": 5.304753662607405, "grad_norm": 0.12177685648202896, "learning_rate": 1.734853495703777e-05, "loss": 0.0066, "step": 171630 }, { "epoch": 5.305680904988564, "grad_norm": 0.16610164940357208, "learning_rate": 1.7348071335847193e-05, "loss": 0.0071, "step": 171660 }, { "epoch": 5.306608147369722, "grad_norm": 0.10159400850534439, "learning_rate": 1.7347607714656614e-05, "loss": 0.0071, "step": 171690 }, { "epoch": 5.307535389750881, "grad_norm": 0.08978956192731857, "learning_rate": 1.7347144093466033e-05, "loss": 0.0068, "step": 171720 }, { "epoch": 5.30846263213204, "grad_norm": 0.1625763326883316, "learning_rate": 1.7346680472275454e-05, "loss": 0.0075, "step": 171750 }, { "epoch": 5.309389874513197, "grad_norm": 0.1278800070285797, "learning_rate": 1.7346216851084876e-05, "loss": 0.0073, "step": 171780 }, { "epoch": 5.310317116894356, "grad_norm": 0.08772356063127518, "learning_rate": 1.7345753229894297e-05, "loss": 0.0074, "step": 171810 }, { "epoch": 5.3112443592755145, "grad_norm": 0.1737421303987503, "learning_rate": 1.734528960870372e-05, "loss": 0.0076, "step": 171840 }, { "epoch": 5.312171601656673, "grad_norm": 0.13172809779644012, "learning_rate": 1.7344825987513137e-05, "loss": 0.0073, "step": 171870 }, { "epoch": 5.313098844037832, "grad_norm": 0.13932277262210846, "learning_rate": 1.7344362366322558e-05, "loss": 0.0073, "step": 171900 }, { "epoch": 5.31402608641899, "grad_norm": 0.17175287008285522, "learning_rate": 1.734389874513198e-05, "loss": 0.0072, "step": 171930 }, { "epoch": 5.314953328800148, "grad_norm": 0.11707237362861633, "learning_rate": 1.7343435123941398e-05, "loss": 0.0075, "step": 171960 }, { "epoch": 5.315880571181307, "grad_norm": 0.15728306770324707, "learning_rate": 1.734297150275082e-05, "loss": 0.0079, "step": 171990 }, { "epoch": 5.316807813562465, "grad_norm": 0.13907656073570251, "learning_rate": 1.734250788156024e-05, "loss": 0.0074, "step": 172020 }, { "epoch": 5.317735055943624, "grad_norm": 0.11373654752969742, "learning_rate": 1.734205971440935e-05, "loss": 0.0074, "step": 172050 }, { "epoch": 5.318662298324782, "grad_norm": 0.12066805362701416, "learning_rate": 1.734159609321877e-05, "loss": 0.0077, "step": 172080 }, { "epoch": 5.31958954070594, "grad_norm": 0.1740320920944214, "learning_rate": 1.7341132472028192e-05, "loss": 0.0071, "step": 172110 }, { "epoch": 5.320516783087099, "grad_norm": 0.11428117007017136, "learning_rate": 1.734066885083761e-05, "loss": 0.0074, "step": 172140 }, { "epoch": 5.321444025468257, "grad_norm": 0.10800988227128983, "learning_rate": 1.734020522964703e-05, "loss": 0.0074, "step": 172170 }, { "epoch": 5.322371267849416, "grad_norm": 0.08368247002363205, "learning_rate": 1.7339741608456453e-05, "loss": 0.007, "step": 172200 }, { "epoch": 5.3232985102305745, "grad_norm": 0.10272407531738281, "learning_rate": 1.733927798726587e-05, "loss": 0.0076, "step": 172230 }, { "epoch": 5.324225752611733, "grad_norm": 0.14546018838882446, "learning_rate": 1.7338814366075292e-05, "loss": 0.0069, "step": 172260 }, { "epoch": 5.325152994992891, "grad_norm": 0.1032387912273407, "learning_rate": 1.7338350744884714e-05, "loss": 0.0071, "step": 172290 }, { "epoch": 5.3260802373740495, "grad_norm": 0.06889548152685165, "learning_rate": 1.7337887123694135e-05, "loss": 0.0076, "step": 172320 }, { "epoch": 5.327007479755208, "grad_norm": 0.06424369663000107, "learning_rate": 1.7337423502503557e-05, "loss": 0.0075, "step": 172350 }, { "epoch": 5.327934722136367, "grad_norm": 0.09601959586143494, "learning_rate": 1.7336959881312975e-05, "loss": 0.007, "step": 172380 }, { "epoch": 5.328861964517525, "grad_norm": 0.11710167676210403, "learning_rate": 1.7336496260122397e-05, "loss": 0.0067, "step": 172410 }, { "epoch": 5.329789206898683, "grad_norm": 0.13594289124011993, "learning_rate": 1.7336032638931818e-05, "loss": 0.0066, "step": 172440 }, { "epoch": 5.330716449279842, "grad_norm": 0.13339222967624664, "learning_rate": 1.733556901774124e-05, "loss": 0.0069, "step": 172470 }, { "epoch": 5.331643691661, "grad_norm": 0.14234231412410736, "learning_rate": 1.733510539655066e-05, "loss": 0.0075, "step": 172500 }, { "epoch": 5.332570934042159, "grad_norm": 0.11876264959573746, "learning_rate": 1.7334641775360083e-05, "loss": 0.0072, "step": 172530 }, { "epoch": 5.333498176423317, "grad_norm": 0.11915400624275208, "learning_rate": 1.73341781541695e-05, "loss": 0.0075, "step": 172560 }, { "epoch": 5.334425418804475, "grad_norm": 0.1559576541185379, "learning_rate": 1.7333714532978922e-05, "loss": 0.0072, "step": 172590 }, { "epoch": 5.335352661185634, "grad_norm": 0.09322857856750488, "learning_rate": 1.733325091178834e-05, "loss": 0.0076, "step": 172620 }, { "epoch": 5.336279903566792, "grad_norm": 0.22550880908966064, "learning_rate": 1.7332787290597762e-05, "loss": 0.0071, "step": 172650 }, { "epoch": 5.337207145947951, "grad_norm": 0.13123276829719543, "learning_rate": 1.7332323669407183e-05, "loss": 0.0073, "step": 172680 }, { "epoch": 5.3381343883291095, "grad_norm": 0.10818494856357574, "learning_rate": 1.7331860048216605e-05, "loss": 0.0076, "step": 172710 }, { "epoch": 5.339061630710268, "grad_norm": 0.10067521035671234, "learning_rate": 1.7331396427026026e-05, "loss": 0.0074, "step": 172740 }, { "epoch": 5.339988873091426, "grad_norm": 0.0909004881978035, "learning_rate": 1.7330932805835448e-05, "loss": 0.0071, "step": 172770 }, { "epoch": 5.340916115472584, "grad_norm": 0.15523561835289001, "learning_rate": 1.7330469184644866e-05, "loss": 0.0082, "step": 172800 }, { "epoch": 5.341843357853743, "grad_norm": 0.1089000552892685, "learning_rate": 1.7330005563454288e-05, "loss": 0.0079, "step": 172830 }, { "epoch": 5.342770600234902, "grad_norm": 0.13599421083927155, "learning_rate": 1.732954194226371e-05, "loss": 0.0071, "step": 172860 }, { "epoch": 5.34369784261606, "grad_norm": 0.1972542703151703, "learning_rate": 1.732907832107313e-05, "loss": 0.0075, "step": 172890 }, { "epoch": 5.344625084997218, "grad_norm": 0.1353914886713028, "learning_rate": 1.7328614699882552e-05, "loss": 0.0066, "step": 172920 }, { "epoch": 5.3455523273783765, "grad_norm": 0.11966238170862198, "learning_rate": 1.732815107869197e-05, "loss": 0.0073, "step": 172950 }, { "epoch": 5.346479569759535, "grad_norm": 0.1201058030128479, "learning_rate": 1.732768745750139e-05, "loss": 0.0068, "step": 172980 }, { "epoch": 5.347406812140694, "grad_norm": 0.1355055570602417, "learning_rate": 1.7327223836310813e-05, "loss": 0.0075, "step": 173010 }, { "epoch": 5.348334054521852, "grad_norm": 0.1121373102068901, "learning_rate": 1.732676021512023e-05, "loss": 0.007, "step": 173040 }, { "epoch": 5.349261296903011, "grad_norm": 0.12133758515119553, "learning_rate": 1.7326296593929656e-05, "loss": 0.0071, "step": 173070 }, { "epoch": 5.350188539284169, "grad_norm": 0.1306222677230835, "learning_rate": 1.7325832972739078e-05, "loss": 0.0068, "step": 173100 }, { "epoch": 5.351115781665327, "grad_norm": 0.13644437491893768, "learning_rate": 1.7325369351548496e-05, "loss": 0.0075, "step": 173130 }, { "epoch": 5.352043024046486, "grad_norm": 0.13564547896385193, "learning_rate": 1.7324905730357917e-05, "loss": 0.0068, "step": 173160 }, { "epoch": 5.352970266427644, "grad_norm": 0.15806342661380768, "learning_rate": 1.732444210916734e-05, "loss": 0.0075, "step": 173190 }, { "epoch": 5.353897508808803, "grad_norm": 0.1252353936433792, "learning_rate": 1.7323978487976757e-05, "loss": 0.0074, "step": 173220 }, { "epoch": 5.354824751189961, "grad_norm": 0.14066652953624725, "learning_rate": 1.732351486678618e-05, "loss": 0.0073, "step": 173250 }, { "epoch": 5.355751993571119, "grad_norm": 0.13960996270179749, "learning_rate": 1.73230512455956e-05, "loss": 0.0073, "step": 173280 }, { "epoch": 5.356679235952278, "grad_norm": 0.15847939252853394, "learning_rate": 1.732258762440502e-05, "loss": 0.0076, "step": 173310 }, { "epoch": 5.3576064783334365, "grad_norm": 0.1949182003736496, "learning_rate": 1.7322124003214443e-05, "loss": 0.0074, "step": 173340 }, { "epoch": 5.358533720714595, "grad_norm": 0.18638400733470917, "learning_rate": 1.732166038202386e-05, "loss": 0.0068, "step": 173370 }, { "epoch": 5.359460963095753, "grad_norm": 0.16360042989253998, "learning_rate": 1.7321196760833283e-05, "loss": 0.0064, "step": 173400 }, { "epoch": 5.360388205476911, "grad_norm": 0.09743097424507141, "learning_rate": 1.7320733139642704e-05, "loss": 0.0074, "step": 173430 }, { "epoch": 5.36131544785807, "grad_norm": 0.13543608784675598, "learning_rate": 1.7320269518452126e-05, "loss": 0.0069, "step": 173460 }, { "epoch": 5.362242690239229, "grad_norm": 0.12954221665859222, "learning_rate": 1.7319805897261547e-05, "loss": 0.0072, "step": 173490 }, { "epoch": 5.363169932620387, "grad_norm": 0.1862960308790207, "learning_rate": 1.731934227607097e-05, "loss": 0.0073, "step": 173520 }, { "epoch": 5.364097175001546, "grad_norm": 0.10582060366868973, "learning_rate": 1.7318878654880387e-05, "loss": 0.0076, "step": 173550 }, { "epoch": 5.3650244173827035, "grad_norm": 0.10024761408567429, "learning_rate": 1.7318415033689808e-05, "loss": 0.007, "step": 173580 }, { "epoch": 5.365951659763862, "grad_norm": 0.22284674644470215, "learning_rate": 1.7317951412499226e-05, "loss": 0.0068, "step": 173610 }, { "epoch": 5.366878902145021, "grad_norm": 0.0915282666683197, "learning_rate": 1.7317487791308648e-05, "loss": 0.0079, "step": 173640 }, { "epoch": 5.367806144526179, "grad_norm": 0.157382532954216, "learning_rate": 1.731702417011807e-05, "loss": 0.0076, "step": 173670 }, { "epoch": 5.368733386907338, "grad_norm": 0.12898530066013336, "learning_rate": 1.731656054892749e-05, "loss": 0.0067, "step": 173700 }, { "epoch": 5.369660629288496, "grad_norm": 0.10853663086891174, "learning_rate": 1.7316096927736912e-05, "loss": 0.0072, "step": 173730 }, { "epoch": 5.370587871669654, "grad_norm": 0.10040731728076935, "learning_rate": 1.7315633306546334e-05, "loss": 0.0066, "step": 173760 }, { "epoch": 5.371515114050813, "grad_norm": 0.17060784995555878, "learning_rate": 1.7315169685355752e-05, "loss": 0.0071, "step": 173790 }, { "epoch": 5.372442356431971, "grad_norm": 0.0956885889172554, "learning_rate": 1.7314706064165174e-05, "loss": 0.0069, "step": 173820 }, { "epoch": 5.37336959881313, "grad_norm": 0.08551713079214096, "learning_rate": 1.7314242442974595e-05, "loss": 0.0075, "step": 173850 }, { "epoch": 5.374296841194289, "grad_norm": 0.08552981168031693, "learning_rate": 1.7313778821784017e-05, "loss": 0.0073, "step": 173880 }, { "epoch": 5.375224083575446, "grad_norm": 0.1231515035033226, "learning_rate": 1.7313315200593438e-05, "loss": 0.0076, "step": 173910 }, { "epoch": 5.376151325956605, "grad_norm": 0.11406168341636658, "learning_rate": 1.731285157940286e-05, "loss": 0.007, "step": 173940 }, { "epoch": 5.3770785683377635, "grad_norm": 0.10568580031394958, "learning_rate": 1.7312387958212278e-05, "loss": 0.007, "step": 173970 }, { "epoch": 5.378005810718922, "grad_norm": 0.12540067732334137, "learning_rate": 1.73119243370217e-05, "loss": 0.0077, "step": 174000 }, { "epoch": 5.378933053100081, "grad_norm": 0.1969130039215088, "learning_rate": 1.7311460715831117e-05, "loss": 0.0077, "step": 174030 }, { "epoch": 5.3798602954812385, "grad_norm": 0.13902637362480164, "learning_rate": 1.731099709464054e-05, "loss": 0.0063, "step": 174060 }, { "epoch": 5.380787537862397, "grad_norm": 0.07309257984161377, "learning_rate": 1.731053347344996e-05, "loss": 0.0074, "step": 174090 }, { "epoch": 5.381714780243556, "grad_norm": 0.0810125395655632, "learning_rate": 1.7310069852259382e-05, "loss": 0.0071, "step": 174120 }, { "epoch": 5.382642022624714, "grad_norm": 0.13425223529338837, "learning_rate": 1.7309606231068803e-05, "loss": 0.0072, "step": 174150 }, { "epoch": 5.383569265005873, "grad_norm": 0.11046617478132248, "learning_rate": 1.7309142609878225e-05, "loss": 0.0071, "step": 174180 }, { "epoch": 5.384496507387031, "grad_norm": 0.0951225683093071, "learning_rate": 1.7308678988687643e-05, "loss": 0.0067, "step": 174210 }, { "epoch": 5.385423749768189, "grad_norm": 0.1401468813419342, "learning_rate": 1.7308215367497064e-05, "loss": 0.0069, "step": 174240 }, { "epoch": 5.386350992149348, "grad_norm": 0.13048560917377472, "learning_rate": 1.7307751746306486e-05, "loss": 0.0073, "step": 174270 }, { "epoch": 5.387278234530506, "grad_norm": 0.15661419928073883, "learning_rate": 1.7307288125115907e-05, "loss": 0.0071, "step": 174300 }, { "epoch": 5.388205476911665, "grad_norm": 0.11932108551263809, "learning_rate": 1.730682450392533e-05, "loss": 0.0078, "step": 174330 }, { "epoch": 5.389132719292824, "grad_norm": 0.14472909271717072, "learning_rate": 1.7306360882734747e-05, "loss": 0.0076, "step": 174360 }, { "epoch": 5.390059961673981, "grad_norm": 0.12597614526748657, "learning_rate": 1.730589726154417e-05, "loss": 0.0068, "step": 174390 }, { "epoch": 5.39098720405514, "grad_norm": 0.12236731499433517, "learning_rate": 1.730543364035359e-05, "loss": 0.0077, "step": 174420 }, { "epoch": 5.3919144464362985, "grad_norm": 0.13241636753082275, "learning_rate": 1.7304970019163008e-05, "loss": 0.0071, "step": 174450 }, { "epoch": 5.392841688817457, "grad_norm": 0.09212060272693634, "learning_rate": 1.7304521852012116e-05, "loss": 0.0068, "step": 174480 }, { "epoch": 5.393768931198616, "grad_norm": 0.10659506171941757, "learning_rate": 1.7304058230821538e-05, "loss": 0.0071, "step": 174510 }, { "epoch": 5.394696173579773, "grad_norm": 0.11049255728721619, "learning_rate": 1.730359460963096e-05, "loss": 0.0073, "step": 174540 }, { "epoch": 5.395623415960932, "grad_norm": 0.07616493850946426, "learning_rate": 1.730313098844038e-05, "loss": 0.0073, "step": 174570 }, { "epoch": 5.396550658342091, "grad_norm": 0.15102878212928772, "learning_rate": 1.7302667367249802e-05, "loss": 0.0069, "step": 174600 }, { "epoch": 5.397477900723249, "grad_norm": 0.06887795031070709, "learning_rate": 1.730220374605922e-05, "loss": 0.0076, "step": 174630 }, { "epoch": 5.398405143104408, "grad_norm": 0.10256149619817734, "learning_rate": 1.7301740124868642e-05, "loss": 0.0077, "step": 174660 }, { "epoch": 5.399332385485566, "grad_norm": 0.1639256328344345, "learning_rate": 1.7301276503678063e-05, "loss": 0.0065, "step": 174690 }, { "epoch": 5.400259627866724, "grad_norm": 0.08141203969717026, "learning_rate": 1.7300812882487485e-05, "loss": 0.0072, "step": 174720 }, { "epoch": 5.401186870247883, "grad_norm": 0.1431759148836136, "learning_rate": 1.7300349261296906e-05, "loss": 0.0074, "step": 174750 }, { "epoch": 5.402114112629041, "grad_norm": 0.11221031844615936, "learning_rate": 1.7299885640106324e-05, "loss": 0.0078, "step": 174780 }, { "epoch": 5.4030413550102, "grad_norm": 0.12811583280563354, "learning_rate": 1.7299422018915746e-05, "loss": 0.0074, "step": 174810 }, { "epoch": 5.4039685973913585, "grad_norm": 0.17847023904323578, "learning_rate": 1.7298958397725167e-05, "loss": 0.0069, "step": 174840 }, { "epoch": 5.404895839772516, "grad_norm": 0.11594855785369873, "learning_rate": 1.7298494776534586e-05, "loss": 0.0075, "step": 174870 }, { "epoch": 5.405823082153675, "grad_norm": 0.11795026808977127, "learning_rate": 1.7298031155344007e-05, "loss": 0.007, "step": 174900 }, { "epoch": 5.406750324534833, "grad_norm": 0.09558378159999847, "learning_rate": 1.729756753415343e-05, "loss": 0.0073, "step": 174930 }, { "epoch": 5.407677566915992, "grad_norm": 0.10128765553236008, "learning_rate": 1.729710391296285e-05, "loss": 0.0074, "step": 174960 }, { "epoch": 5.408604809297151, "grad_norm": 0.08324280381202698, "learning_rate": 1.729664029177227e-05, "loss": 0.0071, "step": 174990 }, { "epoch": 5.409532051678308, "grad_norm": 0.11155514419078827, "learning_rate": 1.7296176670581693e-05, "loss": 0.0073, "step": 175020 }, { "epoch": 5.410459294059467, "grad_norm": 0.08842270821332932, "learning_rate": 1.729571304939111e-05, "loss": 0.0072, "step": 175050 }, { "epoch": 5.4113865364406255, "grad_norm": 0.13374833762645721, "learning_rate": 1.7295249428200533e-05, "loss": 0.0071, "step": 175080 }, { "epoch": 5.412313778821784, "grad_norm": 0.07834585756063461, "learning_rate": 1.7294785807009954e-05, "loss": 0.0073, "step": 175110 }, { "epoch": 5.413241021202943, "grad_norm": 0.07462462782859802, "learning_rate": 1.7294322185819376e-05, "loss": 0.0069, "step": 175140 }, { "epoch": 5.4141682635841, "grad_norm": 0.16377924382686615, "learning_rate": 1.7293858564628797e-05, "loss": 0.0071, "step": 175170 }, { "epoch": 5.415095505965259, "grad_norm": 0.09887346625328064, "learning_rate": 1.7293394943438215e-05, "loss": 0.0073, "step": 175200 }, { "epoch": 5.416022748346418, "grad_norm": 0.0987900048494339, "learning_rate": 1.7292931322247637e-05, "loss": 0.007, "step": 175230 }, { "epoch": 5.416949990727576, "grad_norm": 0.07732535898685455, "learning_rate": 1.729246770105706e-05, "loss": 0.0074, "step": 175260 }, { "epoch": 5.417877233108735, "grad_norm": 0.09841900318861008, "learning_rate": 1.7292004079866476e-05, "loss": 0.0073, "step": 175290 }, { "epoch": 5.418804475489893, "grad_norm": 0.18128319084644318, "learning_rate": 1.7291540458675898e-05, "loss": 0.0062, "step": 175320 }, { "epoch": 5.419731717871051, "grad_norm": 0.10837372392416, "learning_rate": 1.729107683748532e-05, "loss": 0.0079, "step": 175350 }, { "epoch": 5.42065896025221, "grad_norm": 0.13990932703018188, "learning_rate": 1.729061321629474e-05, "loss": 0.0076, "step": 175380 }, { "epoch": 5.421586202633368, "grad_norm": 0.15252703428268433, "learning_rate": 1.7290149595104162e-05, "loss": 0.0075, "step": 175410 }, { "epoch": 5.422513445014527, "grad_norm": 0.10105236619710922, "learning_rate": 1.728968597391358e-05, "loss": 0.0072, "step": 175440 }, { "epoch": 5.4234406873956855, "grad_norm": 0.12288753688335419, "learning_rate": 1.7289222352723002e-05, "loss": 0.0074, "step": 175470 }, { "epoch": 5.424367929776844, "grad_norm": 0.0738365575671196, "learning_rate": 1.7288758731532424e-05, "loss": 0.0075, "step": 175500 }, { "epoch": 5.425295172158002, "grad_norm": 0.09098488092422485, "learning_rate": 1.7288295110341845e-05, "loss": 0.0081, "step": 175530 }, { "epoch": 5.42622241453916, "grad_norm": 0.16970528662204742, "learning_rate": 1.7287831489151267e-05, "loss": 0.0073, "step": 175560 }, { "epoch": 5.427149656920319, "grad_norm": 0.14409880340099335, "learning_rate": 1.7287367867960688e-05, "loss": 0.0076, "step": 175590 }, { "epoch": 5.428076899301478, "grad_norm": 0.08822670578956604, "learning_rate": 1.7286904246770106e-05, "loss": 0.0069, "step": 175620 }, { "epoch": 5.429004141682636, "grad_norm": 0.11370483040809631, "learning_rate": 1.7286440625579528e-05, "loss": 0.007, "step": 175650 }, { "epoch": 5.429931384063794, "grad_norm": 0.07291629910469055, "learning_rate": 1.728597700438895e-05, "loss": 0.0068, "step": 175680 }, { "epoch": 5.4308586264449525, "grad_norm": 0.15423929691314697, "learning_rate": 1.7285513383198367e-05, "loss": 0.007, "step": 175710 }, { "epoch": 5.431785868826111, "grad_norm": 0.12311119586229324, "learning_rate": 1.7285049762007792e-05, "loss": 0.0074, "step": 175740 }, { "epoch": 5.43271311120727, "grad_norm": 0.15412931144237518, "learning_rate": 1.728458614081721e-05, "loss": 0.0074, "step": 175770 }, { "epoch": 5.433640353588428, "grad_norm": 0.0966198742389679, "learning_rate": 1.7284122519626632e-05, "loss": 0.0076, "step": 175800 }, { "epoch": 5.434567595969586, "grad_norm": 0.11502576619386673, "learning_rate": 1.7283658898436053e-05, "loss": 0.0075, "step": 175830 }, { "epoch": 5.435494838350745, "grad_norm": 0.10031119734048843, "learning_rate": 1.728319527724547e-05, "loss": 0.0076, "step": 175860 }, { "epoch": 5.436422080731903, "grad_norm": 0.10814383625984192, "learning_rate": 1.7282731656054893e-05, "loss": 0.0071, "step": 175890 }, { "epoch": 5.437349323113062, "grad_norm": 0.13008370995521545, "learning_rate": 1.7282268034864315e-05, "loss": 0.0076, "step": 175920 }, { "epoch": 5.4382765654942204, "grad_norm": 0.11751308292150497, "learning_rate": 1.7281804413673736e-05, "loss": 0.0076, "step": 175950 }, { "epoch": 5.439203807875378, "grad_norm": 0.09600755572319031, "learning_rate": 1.7281340792483158e-05, "loss": 0.0074, "step": 175980 }, { "epoch": 5.440131050256537, "grad_norm": 0.11579109728336334, "learning_rate": 1.728087717129258e-05, "loss": 0.0071, "step": 176010 }, { "epoch": 5.441058292637695, "grad_norm": 0.10588262975215912, "learning_rate": 1.7280413550101997e-05, "loss": 0.0068, "step": 176040 }, { "epoch": 5.441985535018854, "grad_norm": 0.1230815201997757, "learning_rate": 1.727994992891142e-05, "loss": 0.0072, "step": 176070 }, { "epoch": 5.442912777400013, "grad_norm": 0.1310173124074936, "learning_rate": 1.727948630772084e-05, "loss": 0.0072, "step": 176100 }, { "epoch": 5.443840019781171, "grad_norm": 0.12694679200649261, "learning_rate": 1.727902268653026e-05, "loss": 0.0071, "step": 176130 }, { "epoch": 5.444767262162329, "grad_norm": 0.06829682737588882, "learning_rate": 1.7278559065339683e-05, "loss": 0.0071, "step": 176160 }, { "epoch": 5.4456945045434875, "grad_norm": 0.1507551372051239, "learning_rate": 1.72780954441491e-05, "loss": 0.0072, "step": 176190 }, { "epoch": 5.446621746924646, "grad_norm": 0.1110716313123703, "learning_rate": 1.7277631822958523e-05, "loss": 0.0071, "step": 176220 }, { "epoch": 5.447548989305805, "grad_norm": 0.09029579162597656, "learning_rate": 1.7277168201767944e-05, "loss": 0.0075, "step": 176250 }, { "epoch": 5.448476231686963, "grad_norm": 0.10612848401069641, "learning_rate": 1.7276704580577362e-05, "loss": 0.0066, "step": 176280 }, { "epoch": 5.449403474068122, "grad_norm": 0.09694907814264297, "learning_rate": 1.7276240959386784e-05, "loss": 0.007, "step": 176310 }, { "epoch": 5.45033071644928, "grad_norm": 0.1309709995985031, "learning_rate": 1.7275777338196205e-05, "loss": 0.0074, "step": 176340 }, { "epoch": 5.451257958830438, "grad_norm": 0.1407550722360611, "learning_rate": 1.7275313717005627e-05, "loss": 0.0076, "step": 176370 }, { "epoch": 5.452185201211597, "grad_norm": 0.14561864733695984, "learning_rate": 1.727485009581505e-05, "loss": 0.0075, "step": 176400 }, { "epoch": 5.453112443592755, "grad_norm": 0.11124301701784134, "learning_rate": 1.7274386474624467e-05, "loss": 0.0077, "step": 176430 }, { "epoch": 5.454039685973914, "grad_norm": 0.07359571754932404, "learning_rate": 1.7273922853433888e-05, "loss": 0.0075, "step": 176460 }, { "epoch": 5.454966928355072, "grad_norm": 0.10295191407203674, "learning_rate": 1.727345923224331e-05, "loss": 0.007, "step": 176490 }, { "epoch": 5.45589417073623, "grad_norm": 0.13173890113830566, "learning_rate": 1.727299561105273e-05, "loss": 0.0067, "step": 176520 }, { "epoch": 5.456821413117389, "grad_norm": 0.1356201022863388, "learning_rate": 1.7272531989862153e-05, "loss": 0.0071, "step": 176550 }, { "epoch": 5.4577486554985475, "grad_norm": 0.0922384113073349, "learning_rate": 1.7272068368671574e-05, "loss": 0.0067, "step": 176580 }, { "epoch": 5.458675897879706, "grad_norm": 0.11534874141216278, "learning_rate": 1.7271604747480992e-05, "loss": 0.0073, "step": 176610 }, { "epoch": 5.459603140260864, "grad_norm": 0.12085279822349548, "learning_rate": 1.7271141126290414e-05, "loss": 0.0069, "step": 176640 }, { "epoch": 5.460530382642022, "grad_norm": 0.12705664336681366, "learning_rate": 1.7270677505099835e-05, "loss": 0.0071, "step": 176670 }, { "epoch": 5.461457625023181, "grad_norm": 0.1075434535741806, "learning_rate": 1.7270213883909253e-05, "loss": 0.007, "step": 176700 }, { "epoch": 5.46238486740434, "grad_norm": 0.14427785575389862, "learning_rate": 1.7269750262718675e-05, "loss": 0.0075, "step": 176730 }, { "epoch": 5.463312109785498, "grad_norm": 0.12843894958496094, "learning_rate": 1.7269286641528096e-05, "loss": 0.0068, "step": 176760 }, { "epoch": 5.464239352166656, "grad_norm": 0.12196192145347595, "learning_rate": 1.7268823020337518e-05, "loss": 0.0072, "step": 176790 }, { "epoch": 5.4651665945478145, "grad_norm": 0.0971074253320694, "learning_rate": 1.726835939914694e-05, "loss": 0.007, "step": 176820 }, { "epoch": 5.466093836928973, "grad_norm": 0.12801770865917206, "learning_rate": 1.7267895777956357e-05, "loss": 0.0072, "step": 176850 }, { "epoch": 5.467021079310132, "grad_norm": 0.0889064148068428, "learning_rate": 1.726743215676578e-05, "loss": 0.0066, "step": 176880 }, { "epoch": 5.46794832169129, "grad_norm": 0.09632284939289093, "learning_rate": 1.72669685355752e-05, "loss": 0.0073, "step": 176910 }, { "epoch": 5.468875564072449, "grad_norm": 0.14920081198215485, "learning_rate": 1.7266504914384622e-05, "loss": 0.0071, "step": 176940 }, { "epoch": 5.469802806453607, "grad_norm": 0.08830522000789642, "learning_rate": 1.7266041293194044e-05, "loss": 0.0067, "step": 176970 }, { "epoch": 5.470730048834765, "grad_norm": 0.12334975600242615, "learning_rate": 1.7265577672003465e-05, "loss": 0.0077, "step": 177000 }, { "epoch": 5.471657291215924, "grad_norm": 0.3353404700756073, "learning_rate": 1.7265114050812883e-05, "loss": 0.0077, "step": 177030 }, { "epoch": 5.472584533597082, "grad_norm": 0.08264059573411942, "learning_rate": 1.7264650429622305e-05, "loss": 0.0072, "step": 177060 }, { "epoch": 5.473511775978241, "grad_norm": 0.15340757369995117, "learning_rate": 1.7264186808431726e-05, "loss": 0.0065, "step": 177090 }, { "epoch": 5.4744390183594, "grad_norm": 0.11758896708488464, "learning_rate": 1.7263723187241144e-05, "loss": 0.0077, "step": 177120 }, { "epoch": 5.475366260740557, "grad_norm": 0.16684573888778687, "learning_rate": 1.726325956605057e-05, "loss": 0.0071, "step": 177150 }, { "epoch": 5.476293503121716, "grad_norm": 0.14671041071414948, "learning_rate": 1.7262795944859987e-05, "loss": 0.0069, "step": 177180 }, { "epoch": 5.4772207455028745, "grad_norm": 0.10817205905914307, "learning_rate": 1.726233232366941e-05, "loss": 0.0064, "step": 177210 }, { "epoch": 5.478147987884033, "grad_norm": 0.08084943890571594, "learning_rate": 1.726186870247883e-05, "loss": 0.0075, "step": 177240 }, { "epoch": 5.479075230265192, "grad_norm": 0.10651744902133942, "learning_rate": 1.726140508128825e-05, "loss": 0.0079, "step": 177270 }, { "epoch": 5.480002472646349, "grad_norm": 0.10851634293794632, "learning_rate": 1.726094146009767e-05, "loss": 0.007, "step": 177300 }, { "epoch": 5.480929715027508, "grad_norm": 0.1565800905227661, "learning_rate": 1.726047783890709e-05, "loss": 0.0075, "step": 177330 }, { "epoch": 5.481856957408667, "grad_norm": 0.12206102162599564, "learning_rate": 1.7260014217716513e-05, "loss": 0.0071, "step": 177360 }, { "epoch": 5.482784199789825, "grad_norm": 0.13155339658260345, "learning_rate": 1.7259550596525934e-05, "loss": 0.0064, "step": 177390 }, { "epoch": 5.483711442170984, "grad_norm": 0.09201142191886902, "learning_rate": 1.7259086975335356e-05, "loss": 0.0076, "step": 177420 }, { "epoch": 5.4846386845521415, "grad_norm": 0.12964846193790436, "learning_rate": 1.7258623354144774e-05, "loss": 0.0084, "step": 177450 }, { "epoch": 5.4855659269333, "grad_norm": 0.07892105728387833, "learning_rate": 1.7258159732954196e-05, "loss": 0.0077, "step": 177480 }, { "epoch": 5.486493169314459, "grad_norm": 0.12410780787467957, "learning_rate": 1.7257696111763617e-05, "loss": 0.007, "step": 177510 }, { "epoch": 5.487420411695617, "grad_norm": 0.11972735077142715, "learning_rate": 1.725723249057304e-05, "loss": 0.0078, "step": 177540 }, { "epoch": 5.488347654076776, "grad_norm": 0.061466820538043976, "learning_rate": 1.725676886938246e-05, "loss": 0.0069, "step": 177570 }, { "epoch": 5.489274896457934, "grad_norm": 0.11864389479160309, "learning_rate": 1.7256305248191878e-05, "loss": 0.0074, "step": 177600 }, { "epoch": 5.490202138839092, "grad_norm": 0.12641479074954987, "learning_rate": 1.72558416270013e-05, "loss": 0.0082, "step": 177630 }, { "epoch": 5.491129381220251, "grad_norm": 0.08696693181991577, "learning_rate": 1.725537800581072e-05, "loss": 0.0077, "step": 177660 }, { "epoch": 5.4920566236014094, "grad_norm": 0.12031629681587219, "learning_rate": 1.725491438462014e-05, "loss": 0.0075, "step": 177690 }, { "epoch": 5.492983865982568, "grad_norm": 0.08274286240339279, "learning_rate": 1.725445076342956e-05, "loss": 0.0067, "step": 177720 }, { "epoch": 5.493911108363727, "grad_norm": 0.1161055639386177, "learning_rate": 1.7253987142238982e-05, "loss": 0.0077, "step": 177750 }, { "epoch": 5.494838350744884, "grad_norm": 0.16147683560848236, "learning_rate": 1.7253523521048404e-05, "loss": 0.0073, "step": 177780 }, { "epoch": 5.495765593126043, "grad_norm": 0.12036909908056259, "learning_rate": 1.7253059899857825e-05, "loss": 0.0074, "step": 177810 }, { "epoch": 5.496692835507202, "grad_norm": 0.12482209503650665, "learning_rate": 1.7252596278667243e-05, "loss": 0.0074, "step": 177840 }, { "epoch": 5.49762007788836, "grad_norm": 0.12274429947137833, "learning_rate": 1.7252132657476665e-05, "loss": 0.0075, "step": 177870 }, { "epoch": 5.498547320269519, "grad_norm": 0.0971699208021164, "learning_rate": 1.7251669036286086e-05, "loss": 0.0074, "step": 177900 }, { "epoch": 5.499474562650677, "grad_norm": 0.14812368154525757, "learning_rate": 1.7251205415095508e-05, "loss": 0.0079, "step": 177930 }, { "epoch": 5.500401805031835, "grad_norm": 0.0994105190038681, "learning_rate": 1.725074179390493e-05, "loss": 0.0074, "step": 177960 }, { "epoch": 5.501329047412994, "grad_norm": 0.16827328503131866, "learning_rate": 1.725027817271435e-05, "loss": 0.0066, "step": 177990 }, { "epoch": 5.502256289794152, "grad_norm": 0.10489611327648163, "learning_rate": 1.724981455152377e-05, "loss": 0.007, "step": 178020 }, { "epoch": 5.503183532175311, "grad_norm": 0.1380695253610611, "learning_rate": 1.724935093033319e-05, "loss": 0.0068, "step": 178050 }, { "epoch": 5.5041107745564695, "grad_norm": 0.11361493915319443, "learning_rate": 1.7248887309142612e-05, "loss": 0.0075, "step": 178080 }, { "epoch": 5.505038016937627, "grad_norm": 0.10934922099113464, "learning_rate": 1.724842368795203e-05, "loss": 0.0069, "step": 178110 }, { "epoch": 5.505965259318786, "grad_norm": 0.10552550852298737, "learning_rate": 1.7247960066761452e-05, "loss": 0.0072, "step": 178140 }, { "epoch": 5.506892501699944, "grad_norm": 0.10233369469642639, "learning_rate": 1.7247496445570873e-05, "loss": 0.0065, "step": 178170 }, { "epoch": 5.507819744081103, "grad_norm": 0.1304703950881958, "learning_rate": 1.7247032824380295e-05, "loss": 0.0068, "step": 178200 }, { "epoch": 5.508746986462262, "grad_norm": 0.10401525348424911, "learning_rate": 1.7246569203189716e-05, "loss": 0.007, "step": 178230 }, { "epoch": 5.509674228843419, "grad_norm": 0.1460328847169876, "learning_rate": 1.7246105581999134e-05, "loss": 0.0069, "step": 178260 }, { "epoch": 5.510601471224578, "grad_norm": 0.13233882188796997, "learning_rate": 1.7245641960808556e-05, "loss": 0.0069, "step": 178290 }, { "epoch": 5.5115287136057365, "grad_norm": 0.1557711809873581, "learning_rate": 1.7245178339617977e-05, "loss": 0.007, "step": 178320 }, { "epoch": 5.512455955986895, "grad_norm": 0.12002724409103394, "learning_rate": 1.72447147184274e-05, "loss": 0.0076, "step": 178350 }, { "epoch": 5.513383198368054, "grad_norm": 0.1280704289674759, "learning_rate": 1.724425109723682e-05, "loss": 0.0074, "step": 178380 }, { "epoch": 5.514310440749211, "grad_norm": 0.12511087954044342, "learning_rate": 1.7243787476046242e-05, "loss": 0.0073, "step": 178410 }, { "epoch": 5.51523768313037, "grad_norm": 4.2772393226623535, "learning_rate": 1.7243339308895346e-05, "loss": 0.007, "step": 178440 }, { "epoch": 5.516164925511529, "grad_norm": 0.1601174771785736, "learning_rate": 1.7242875687704768e-05, "loss": 0.0077, "step": 178470 }, { "epoch": 5.517092167892687, "grad_norm": 0.12245414406061172, "learning_rate": 1.724241206651419e-05, "loss": 0.0074, "step": 178500 }, { "epoch": 5.518019410273846, "grad_norm": 0.08721228688955307, "learning_rate": 1.7241948445323608e-05, "loss": 0.0072, "step": 178530 }, { "epoch": 5.518946652655004, "grad_norm": 0.13616566359996796, "learning_rate": 1.724148482413303e-05, "loss": 0.0071, "step": 178560 }, { "epoch": 5.519873895036162, "grad_norm": 0.13243761658668518, "learning_rate": 1.724102120294245e-05, "loss": 0.0079, "step": 178590 }, { "epoch": 5.520801137417321, "grad_norm": 0.114283487200737, "learning_rate": 1.7240557581751872e-05, "loss": 0.0071, "step": 178620 }, { "epoch": 5.521728379798479, "grad_norm": 0.1277729868888855, "learning_rate": 1.7240093960561294e-05, "loss": 0.0073, "step": 178650 }, { "epoch": 5.522655622179638, "grad_norm": 0.11737778037786484, "learning_rate": 1.7239630339370712e-05, "loss": 0.0063, "step": 178680 }, { "epoch": 5.5235828645607965, "grad_norm": 0.11544404923915863, "learning_rate": 1.7239166718180133e-05, "loss": 0.0064, "step": 178710 }, { "epoch": 5.524510106941955, "grad_norm": 0.09678374975919724, "learning_rate": 1.7238703096989555e-05, "loss": 0.0076, "step": 178740 }, { "epoch": 5.525437349323113, "grad_norm": 0.10416711121797562, "learning_rate": 1.7238239475798976e-05, "loss": 0.0064, "step": 178770 }, { "epoch": 5.526364591704271, "grad_norm": 0.1855379343032837, "learning_rate": 1.7237775854608398e-05, "loss": 0.0068, "step": 178800 }, { "epoch": 5.52729183408543, "grad_norm": 0.1327715516090393, "learning_rate": 1.723731223341782e-05, "loss": 0.0075, "step": 178830 }, { "epoch": 5.528219076466589, "grad_norm": 0.1323244571685791, "learning_rate": 1.7236848612227237e-05, "loss": 0.0071, "step": 178860 }, { "epoch": 5.529146318847747, "grad_norm": 0.09749005734920502, "learning_rate": 1.723638499103666e-05, "loss": 0.0069, "step": 178890 }, { "epoch": 5.530073561228905, "grad_norm": 0.4554156959056854, "learning_rate": 1.7235921369846077e-05, "loss": 0.0072, "step": 178920 }, { "epoch": 5.5310008036100635, "grad_norm": 0.0968242958188057, "learning_rate": 1.72354577486555e-05, "loss": 0.0073, "step": 178950 }, { "epoch": 5.531928045991222, "grad_norm": 0.1502067595720291, "learning_rate": 1.723499412746492e-05, "loss": 0.0073, "step": 178980 }, { "epoch": 5.532855288372381, "grad_norm": 0.12649555504322052, "learning_rate": 1.723453050627434e-05, "loss": 0.0067, "step": 179010 }, { "epoch": 5.533782530753539, "grad_norm": 0.1237441897392273, "learning_rate": 1.7234066885083763e-05, "loss": 0.0077, "step": 179040 }, { "epoch": 5.534709773134697, "grad_norm": 0.14507336914539337, "learning_rate": 1.7233603263893185e-05, "loss": 0.0076, "step": 179070 }, { "epoch": 5.535637015515856, "grad_norm": 0.12449965626001358, "learning_rate": 1.7233139642702603e-05, "loss": 0.0062, "step": 179100 }, { "epoch": 5.536564257897014, "grad_norm": 0.16347458958625793, "learning_rate": 1.7232676021512024e-05, "loss": 0.0065, "step": 179130 }, { "epoch": 5.537491500278173, "grad_norm": 0.10529737174510956, "learning_rate": 1.7232212400321446e-05, "loss": 0.0072, "step": 179160 }, { "epoch": 5.538418742659331, "grad_norm": 0.17311881482601166, "learning_rate": 1.7231748779130867e-05, "loss": 0.0078, "step": 179190 }, { "epoch": 5.539345985040489, "grad_norm": 0.09134750813245773, "learning_rate": 1.723128515794029e-05, "loss": 0.0068, "step": 179220 }, { "epoch": 5.540273227421648, "grad_norm": 0.12394454330205917, "learning_rate": 1.7230821536749707e-05, "loss": 0.0075, "step": 179250 }, { "epoch": 5.541200469802806, "grad_norm": 0.08884634077548981, "learning_rate": 1.7230357915559128e-05, "loss": 0.0072, "step": 179280 }, { "epoch": 5.542127712183965, "grad_norm": 0.14707735180854797, "learning_rate": 1.722989429436855e-05, "loss": 0.0068, "step": 179310 }, { "epoch": 5.5430549545651235, "grad_norm": 0.12774646282196045, "learning_rate": 1.7229430673177968e-05, "loss": 0.007, "step": 179340 }, { "epoch": 5.543982196946282, "grad_norm": 0.11682549864053726, "learning_rate": 1.722896705198739e-05, "loss": 0.0071, "step": 179370 }, { "epoch": 5.54490943932744, "grad_norm": 0.14850006997585297, "learning_rate": 1.722850343079681e-05, "loss": 0.0081, "step": 179400 }, { "epoch": 5.5458366817085984, "grad_norm": 0.09815952926874161, "learning_rate": 1.7228039809606232e-05, "loss": 0.0076, "step": 179430 }, { "epoch": 5.546763924089757, "grad_norm": 0.1314583718776703, "learning_rate": 1.7227576188415654e-05, "loss": 0.0074, "step": 179460 }, { "epoch": 5.547691166470916, "grad_norm": 0.1568811684846878, "learning_rate": 1.7227112567225075e-05, "loss": 0.0081, "step": 179490 }, { "epoch": 5.548618408852074, "grad_norm": 0.12038043141365051, "learning_rate": 1.7226648946034494e-05, "loss": 0.0067, "step": 179520 }, { "epoch": 5.549545651233233, "grad_norm": 0.14077205955982208, "learning_rate": 1.7226185324843915e-05, "loss": 0.0076, "step": 179550 }, { "epoch": 5.550472893614391, "grad_norm": 0.11430477350950241, "learning_rate": 1.7225721703653337e-05, "loss": 0.007, "step": 179580 }, { "epoch": 5.551400135995549, "grad_norm": 0.16840873658657074, "learning_rate": 1.7225258082462758e-05, "loss": 0.0069, "step": 179610 }, { "epoch": 5.552327378376708, "grad_norm": 0.10294557362794876, "learning_rate": 1.722479446127218e-05, "loss": 0.0071, "step": 179640 }, { "epoch": 5.553254620757866, "grad_norm": 0.07589542865753174, "learning_rate": 1.7224330840081598e-05, "loss": 0.0076, "step": 179670 }, { "epoch": 5.554181863139025, "grad_norm": 0.10698775202035904, "learning_rate": 1.722386721889102e-05, "loss": 0.0068, "step": 179700 }, { "epoch": 5.555109105520183, "grad_norm": 0.12248562276363373, "learning_rate": 1.722340359770044e-05, "loss": 0.0075, "step": 179730 }, { "epoch": 5.556036347901341, "grad_norm": 0.1435779333114624, "learning_rate": 1.722293997650986e-05, "loss": 0.0072, "step": 179760 }, { "epoch": 5.5569635902825, "grad_norm": 0.11519574373960495, "learning_rate": 1.722247635531928e-05, "loss": 0.008, "step": 179790 }, { "epoch": 5.5578908326636585, "grad_norm": 0.11587048321962357, "learning_rate": 1.7222012734128705e-05, "loss": 0.0072, "step": 179820 }, { "epoch": 5.558818075044817, "grad_norm": 0.10689973831176758, "learning_rate": 1.7221549112938123e-05, "loss": 0.0065, "step": 179850 }, { "epoch": 5.559745317425975, "grad_norm": 0.08929166197776794, "learning_rate": 1.7221085491747545e-05, "loss": 0.0079, "step": 179880 }, { "epoch": 5.560672559807133, "grad_norm": 0.1089097261428833, "learning_rate": 1.7220621870556966e-05, "loss": 0.008, "step": 179910 }, { "epoch": 5.561599802188292, "grad_norm": 0.1118648573756218, "learning_rate": 1.7220158249366384e-05, "loss": 0.0069, "step": 179940 }, { "epoch": 5.562527044569451, "grad_norm": 0.07483990490436554, "learning_rate": 1.7219694628175806e-05, "loss": 0.0069, "step": 179970 }, { "epoch": 5.563454286950609, "grad_norm": 0.1291658878326416, "learning_rate": 1.7219231006985227e-05, "loss": 0.0072, "step": 180000 }, { "epoch": 5.564381529331767, "grad_norm": 0.10743194073438644, "learning_rate": 1.721876738579465e-05, "loss": 0.0075, "step": 180030 }, { "epoch": 5.5653087717129255, "grad_norm": 0.07312756031751633, "learning_rate": 1.721830376460407e-05, "loss": 0.0072, "step": 180060 }, { "epoch": 5.566236014094084, "grad_norm": 0.1199503168463707, "learning_rate": 1.721784014341349e-05, "loss": 0.007, "step": 180090 }, { "epoch": 5.567163256475243, "grad_norm": 0.13671936094760895, "learning_rate": 1.721737652222291e-05, "loss": 0.0068, "step": 180120 }, { "epoch": 5.568090498856401, "grad_norm": 0.18340237438678741, "learning_rate": 1.721691290103233e-05, "loss": 0.0076, "step": 180150 }, { "epoch": 5.56901774123756, "grad_norm": 0.09492375701665878, "learning_rate": 1.7216449279841753e-05, "loss": 0.0066, "step": 180180 }, { "epoch": 5.569944983618718, "grad_norm": 0.08255494385957718, "learning_rate": 1.7215985658651175e-05, "loss": 0.0073, "step": 180210 }, { "epoch": 5.570872225999876, "grad_norm": 0.10938059538602829, "learning_rate": 1.7215522037460596e-05, "loss": 0.0071, "step": 180240 }, { "epoch": 5.571799468381035, "grad_norm": 0.09352477639913559, "learning_rate": 1.7215058416270014e-05, "loss": 0.0067, "step": 180270 }, { "epoch": 5.572726710762193, "grad_norm": 0.13716904819011688, "learning_rate": 1.7214594795079436e-05, "loss": 0.0075, "step": 180300 }, { "epoch": 5.573653953143352, "grad_norm": 0.12615688145160675, "learning_rate": 1.7214131173888854e-05, "loss": 0.0071, "step": 180330 }, { "epoch": 5.574581195524511, "grad_norm": 0.1139281615614891, "learning_rate": 1.7213667552698275e-05, "loss": 0.0075, "step": 180360 }, { "epoch": 5.575508437905668, "grad_norm": 0.13441000878810883, "learning_rate": 1.7213203931507697e-05, "loss": 0.0079, "step": 180390 }, { "epoch": 5.576435680286827, "grad_norm": 0.08808334171772003, "learning_rate": 1.721274031031712e-05, "loss": 0.0068, "step": 180420 }, { "epoch": 5.5773629226679855, "grad_norm": 0.13385523855686188, "learning_rate": 1.721227668912654e-05, "loss": 0.0075, "step": 180450 }, { "epoch": 5.578290165049144, "grad_norm": 0.11375266313552856, "learning_rate": 1.721181306793596e-05, "loss": 0.0074, "step": 180480 }, { "epoch": 5.579217407430303, "grad_norm": 0.1462855339050293, "learning_rate": 1.721134944674538e-05, "loss": 0.0075, "step": 180510 }, { "epoch": 5.58014464981146, "grad_norm": 0.10438968241214752, "learning_rate": 1.72108858255548e-05, "loss": 0.0076, "step": 180540 }, { "epoch": 5.581071892192619, "grad_norm": 0.16849657893180847, "learning_rate": 1.7210422204364223e-05, "loss": 0.0066, "step": 180570 }, { "epoch": 5.581999134573778, "grad_norm": 0.09745771437883377, "learning_rate": 1.7209958583173644e-05, "loss": 0.0069, "step": 180600 }, { "epoch": 5.582926376954936, "grad_norm": 0.11299241334199905, "learning_rate": 1.7209494961983066e-05, "loss": 0.0066, "step": 180630 }, { "epoch": 5.583853619336095, "grad_norm": 0.1840876340866089, "learning_rate": 1.7209031340792484e-05, "loss": 0.0077, "step": 180660 }, { "epoch": 5.5847808617172525, "grad_norm": 0.08857358992099762, "learning_rate": 1.7208567719601905e-05, "loss": 0.0071, "step": 180690 }, { "epoch": 5.585708104098411, "grad_norm": 0.10946249216794968, "learning_rate": 1.7208104098411327e-05, "loss": 0.0069, "step": 180720 }, { "epoch": 5.58663534647957, "grad_norm": 0.1408139169216156, "learning_rate": 1.7207640477220745e-05, "loss": 0.0072, "step": 180750 }, { "epoch": 5.587562588860728, "grad_norm": 0.09152037650346756, "learning_rate": 1.7207176856030166e-05, "loss": 0.0068, "step": 180780 }, { "epoch": 5.588489831241887, "grad_norm": 0.09848146885633469, "learning_rate": 1.7206713234839588e-05, "loss": 0.0069, "step": 180810 }, { "epoch": 5.589417073623045, "grad_norm": 0.13805924355983734, "learning_rate": 1.720624961364901e-05, "loss": 0.0065, "step": 180840 }, { "epoch": 5.590344316004203, "grad_norm": 0.10119634121656418, "learning_rate": 1.720578599245843e-05, "loss": 0.0073, "step": 180870 }, { "epoch": 5.591271558385362, "grad_norm": 0.08610323071479797, "learning_rate": 1.7205322371267852e-05, "loss": 0.0069, "step": 180900 }, { "epoch": 5.59219880076652, "grad_norm": 0.11058124899864197, "learning_rate": 1.720485875007727e-05, "loss": 0.007, "step": 180930 }, { "epoch": 5.593126043147679, "grad_norm": 0.09804341942071915, "learning_rate": 1.7204395128886692e-05, "loss": 0.0074, "step": 180960 }, { "epoch": 5.594053285528837, "grad_norm": 0.11069290339946747, "learning_rate": 1.7203931507696113e-05, "loss": 0.0071, "step": 180990 }, { "epoch": 5.594980527909995, "grad_norm": 0.09102346748113632, "learning_rate": 1.7203467886505535e-05, "loss": 0.0074, "step": 181020 }, { "epoch": 5.595907770291154, "grad_norm": 0.14021217823028564, "learning_rate": 1.7203004265314956e-05, "loss": 0.0072, "step": 181050 }, { "epoch": 5.5968350126723125, "grad_norm": 0.1037406474351883, "learning_rate": 1.7202540644124375e-05, "loss": 0.0068, "step": 181080 }, { "epoch": 5.597762255053471, "grad_norm": 0.09527139365673065, "learning_rate": 1.7202077022933796e-05, "loss": 0.0077, "step": 181110 }, { "epoch": 5.59868949743463, "grad_norm": 0.10656420141458511, "learning_rate": 1.7201613401743218e-05, "loss": 0.0072, "step": 181140 }, { "epoch": 5.599616739815788, "grad_norm": 0.23780786991119385, "learning_rate": 1.7201149780552636e-05, "loss": 0.007, "step": 181170 }, { "epoch": 5.600543982196946, "grad_norm": 0.08284714072942734, "learning_rate": 1.7200686159362057e-05, "loss": 0.0073, "step": 181200 }, { "epoch": 5.601471224578105, "grad_norm": 0.15071852505207062, "learning_rate": 1.7200237992211165e-05, "loss": 0.0072, "step": 181230 }, { "epoch": 5.602398466959263, "grad_norm": 0.11021072417497635, "learning_rate": 1.7199774371020587e-05, "loss": 0.0077, "step": 181260 }, { "epoch": 5.603325709340422, "grad_norm": 0.1460202932357788, "learning_rate": 1.7199310749830008e-05, "loss": 0.0071, "step": 181290 }, { "epoch": 5.6042529517215804, "grad_norm": 0.12473976612091064, "learning_rate": 1.719884712863943e-05, "loss": 0.0073, "step": 181320 }, { "epoch": 5.605180194102738, "grad_norm": 0.16450262069702148, "learning_rate": 1.7198383507448848e-05, "loss": 0.0071, "step": 181350 }, { "epoch": 5.606107436483897, "grad_norm": 0.09969521313905716, "learning_rate": 1.719791988625827e-05, "loss": 0.0068, "step": 181380 }, { "epoch": 5.607034678865055, "grad_norm": 0.15355803072452545, "learning_rate": 1.7197456265067687e-05, "loss": 0.0072, "step": 181410 }, { "epoch": 5.607961921246214, "grad_norm": 0.11696271598339081, "learning_rate": 1.7196992643877112e-05, "loss": 0.0072, "step": 181440 }, { "epoch": 5.6088891636273726, "grad_norm": 0.1320488154888153, "learning_rate": 1.7196529022686534e-05, "loss": 0.007, "step": 181470 }, { "epoch": 5.60981640600853, "grad_norm": 0.10581374168395996, "learning_rate": 1.7196065401495952e-05, "loss": 0.0073, "step": 181500 }, { "epoch": 5.610743648389689, "grad_norm": 0.09603074193000793, "learning_rate": 1.7195601780305373e-05, "loss": 0.007, "step": 181530 }, { "epoch": 5.6116708907708475, "grad_norm": 0.13141822814941406, "learning_rate": 1.7195138159114795e-05, "loss": 0.007, "step": 181560 }, { "epoch": 5.612598133152006, "grad_norm": 0.14518186450004578, "learning_rate": 1.7194674537924213e-05, "loss": 0.0071, "step": 181590 }, { "epoch": 5.613525375533165, "grad_norm": 0.14845605194568634, "learning_rate": 1.7194210916733635e-05, "loss": 0.0069, "step": 181620 }, { "epoch": 5.614452617914322, "grad_norm": 0.13167597353458405, "learning_rate": 1.7193747295543056e-05, "loss": 0.007, "step": 181650 }, { "epoch": 5.615379860295481, "grad_norm": 0.08876509964466095, "learning_rate": 1.7193283674352478e-05, "loss": 0.0072, "step": 181680 }, { "epoch": 5.61630710267664, "grad_norm": 0.1361968070268631, "learning_rate": 1.71928200531619e-05, "loss": 0.0068, "step": 181710 }, { "epoch": 5.617234345057798, "grad_norm": 0.11197341978549957, "learning_rate": 1.7192356431971317e-05, "loss": 0.007, "step": 181740 }, { "epoch": 5.618161587438957, "grad_norm": 0.1222882866859436, "learning_rate": 1.719189281078074e-05, "loss": 0.0072, "step": 181770 }, { "epoch": 5.6190888298201145, "grad_norm": 0.10879869759082794, "learning_rate": 1.719142918959016e-05, "loss": 0.0073, "step": 181800 }, { "epoch": 5.620016072201273, "grad_norm": 0.11786952614784241, "learning_rate": 1.7190965568399582e-05, "loss": 0.0072, "step": 181830 }, { "epoch": 5.620943314582432, "grad_norm": 0.1331382393836975, "learning_rate": 1.7190501947209003e-05, "loss": 0.0075, "step": 181860 }, { "epoch": 5.62187055696359, "grad_norm": 0.15488120913505554, "learning_rate": 1.7190038326018425e-05, "loss": 0.007, "step": 181890 }, { "epoch": 5.622797799344749, "grad_norm": 0.11517711728811264, "learning_rate": 1.7189574704827843e-05, "loss": 0.0073, "step": 181920 }, { "epoch": 5.6237250417259075, "grad_norm": 0.12355715781450272, "learning_rate": 1.7189111083637264e-05, "loss": 0.0068, "step": 181950 }, { "epoch": 5.624652284107066, "grad_norm": 0.09733191877603531, "learning_rate": 1.7188647462446686e-05, "loss": 0.0073, "step": 181980 }, { "epoch": 5.625579526488224, "grad_norm": 0.10589388757944107, "learning_rate": 1.7188183841256104e-05, "loss": 0.0074, "step": 182010 }, { "epoch": 5.626506768869382, "grad_norm": 0.09053117781877518, "learning_rate": 1.7187720220065525e-05, "loss": 0.0076, "step": 182040 }, { "epoch": 5.627434011250541, "grad_norm": 0.15278097987174988, "learning_rate": 1.7187256598874947e-05, "loss": 0.0071, "step": 182070 }, { "epoch": 5.6283612536317, "grad_norm": 0.15002883970737457, "learning_rate": 1.718679297768437e-05, "loss": 0.0067, "step": 182100 }, { "epoch": 5.629288496012858, "grad_norm": 0.10245786607265472, "learning_rate": 1.718632935649379e-05, "loss": 0.0072, "step": 182130 }, { "epoch": 5.630215738394016, "grad_norm": 0.10033448040485382, "learning_rate": 1.7185865735303208e-05, "loss": 0.007, "step": 182160 }, { "epoch": 5.6311429807751745, "grad_norm": 0.18462111055850983, "learning_rate": 1.718540211411263e-05, "loss": 0.007, "step": 182190 }, { "epoch": 5.632070223156333, "grad_norm": 0.10858836770057678, "learning_rate": 1.718493849292205e-05, "loss": 0.0066, "step": 182220 }, { "epoch": 5.632997465537492, "grad_norm": 0.07334112375974655, "learning_rate": 1.7184474871731473e-05, "loss": 0.0073, "step": 182250 }, { "epoch": 5.63392470791865, "grad_norm": 0.11991719156503677, "learning_rate": 1.7184011250540894e-05, "loss": 0.0066, "step": 182280 }, { "epoch": 5.634851950299808, "grad_norm": 0.13091199100017548, "learning_rate": 1.7183547629350316e-05, "loss": 0.0069, "step": 182310 }, { "epoch": 5.635779192680967, "grad_norm": 0.15665583312511444, "learning_rate": 1.7183084008159734e-05, "loss": 0.0074, "step": 182340 }, { "epoch": 5.636706435062125, "grad_norm": 0.1388876736164093, "learning_rate": 1.7182620386969155e-05, "loss": 0.0063, "step": 182370 }, { "epoch": 5.637633677443284, "grad_norm": 0.18829932808876038, "learning_rate": 1.7182156765778573e-05, "loss": 0.007, "step": 182400 }, { "epoch": 5.638560919824442, "grad_norm": 0.13461141288280487, "learning_rate": 1.7181693144587995e-05, "loss": 0.0078, "step": 182430 }, { "epoch": 5.6394881622056, "grad_norm": 0.12221287190914154, "learning_rate": 1.7181229523397416e-05, "loss": 0.0072, "step": 182460 }, { "epoch": 5.640415404586759, "grad_norm": 0.11550445854663849, "learning_rate": 1.7180765902206838e-05, "loss": 0.007, "step": 182490 }, { "epoch": 5.641342646967917, "grad_norm": 0.17677748203277588, "learning_rate": 1.718030228101626e-05, "loss": 0.0078, "step": 182520 }, { "epoch": 5.642269889349076, "grad_norm": 0.14225855469703674, "learning_rate": 1.717983865982568e-05, "loss": 0.0072, "step": 182550 }, { "epoch": 5.6431971317302345, "grad_norm": 0.07362604141235352, "learning_rate": 1.71793750386351e-05, "loss": 0.007, "step": 182580 }, { "epoch": 5.644124374111392, "grad_norm": 0.15064705908298492, "learning_rate": 1.717891141744452e-05, "loss": 0.0069, "step": 182610 }, { "epoch": 5.645051616492551, "grad_norm": 0.11683127284049988, "learning_rate": 1.7178447796253942e-05, "loss": 0.0073, "step": 182640 }, { "epoch": 5.645978858873709, "grad_norm": 0.14050403237342834, "learning_rate": 1.7177984175063364e-05, "loss": 0.0068, "step": 182670 }, { "epoch": 5.646906101254868, "grad_norm": 0.09523702412843704, "learning_rate": 1.7177520553872785e-05, "loss": 0.0078, "step": 182700 }, { "epoch": 5.647833343636027, "grad_norm": 0.15363283455371857, "learning_rate": 1.7177056932682207e-05, "loss": 0.0074, "step": 182730 }, { "epoch": 5.648760586017185, "grad_norm": 0.1378404051065445, "learning_rate": 1.7176593311491625e-05, "loss": 0.0078, "step": 182760 }, { "epoch": 5.649687828398344, "grad_norm": 0.10416989028453827, "learning_rate": 1.7176129690301046e-05, "loss": 0.0071, "step": 182790 }, { "epoch": 5.6506150707795015, "grad_norm": 0.11944326013326645, "learning_rate": 1.7175666069110464e-05, "loss": 0.0075, "step": 182820 }, { "epoch": 5.65154231316066, "grad_norm": 0.11364820599555969, "learning_rate": 1.717520244791989e-05, "loss": 0.0065, "step": 182850 }, { "epoch": 5.652469555541819, "grad_norm": 0.13153645396232605, "learning_rate": 1.717473882672931e-05, "loss": 0.0065, "step": 182880 }, { "epoch": 5.653396797922977, "grad_norm": 0.15560302138328552, "learning_rate": 1.717427520553873e-05, "loss": 0.0073, "step": 182910 }, { "epoch": 5.654324040304136, "grad_norm": 0.12591055035591125, "learning_rate": 1.717381158434815e-05, "loss": 0.0071, "step": 182940 }, { "epoch": 5.655251282685294, "grad_norm": 0.14469681680202484, "learning_rate": 1.7173347963157572e-05, "loss": 0.0073, "step": 182970 }, { "epoch": 5.656178525066452, "grad_norm": 0.09369268268346786, "learning_rate": 1.717288434196699e-05, "loss": 0.0068, "step": 183000 }, { "epoch": 5.657105767447611, "grad_norm": 0.10899284482002258, "learning_rate": 1.717242072077641e-05, "loss": 0.007, "step": 183030 }, { "epoch": 5.658033009828769, "grad_norm": 0.138469398021698, "learning_rate": 1.7171957099585833e-05, "loss": 0.0072, "step": 183060 }, { "epoch": 5.658960252209928, "grad_norm": 0.11091162264347076, "learning_rate": 1.7171493478395254e-05, "loss": 0.0073, "step": 183090 }, { "epoch": 5.659887494591086, "grad_norm": 0.09444806724786758, "learning_rate": 1.7171029857204676e-05, "loss": 0.0071, "step": 183120 }, { "epoch": 5.660814736972244, "grad_norm": 0.0792180523276329, "learning_rate": 1.7170566236014094e-05, "loss": 0.0068, "step": 183150 }, { "epoch": 5.661741979353403, "grad_norm": 0.11542337387800217, "learning_rate": 1.7170102614823516e-05, "loss": 0.0071, "step": 183180 }, { "epoch": 5.6626692217345616, "grad_norm": 0.0892806202173233, "learning_rate": 1.7169638993632937e-05, "loss": 0.0067, "step": 183210 }, { "epoch": 5.66359646411572, "grad_norm": 0.11029722541570663, "learning_rate": 1.716917537244236e-05, "loss": 0.0074, "step": 183240 }, { "epoch": 5.664523706496878, "grad_norm": 0.09463023394346237, "learning_rate": 1.716871175125178e-05, "loss": 0.0069, "step": 183270 }, { "epoch": 5.6654509488780365, "grad_norm": 0.10611561685800552, "learning_rate": 1.71682481300612e-05, "loss": 0.0064, "step": 183300 }, { "epoch": 5.666378191259195, "grad_norm": 0.11880587041378021, "learning_rate": 1.716778450887062e-05, "loss": 0.0074, "step": 183330 }, { "epoch": 5.667305433640354, "grad_norm": 0.1474553942680359, "learning_rate": 1.716732088768004e-05, "loss": 0.0075, "step": 183360 }, { "epoch": 5.668232676021512, "grad_norm": 0.1093902587890625, "learning_rate": 1.7166857266489463e-05, "loss": 0.0073, "step": 183390 }, { "epoch": 5.66915991840267, "grad_norm": 0.10336429625749588, "learning_rate": 1.716639364529888e-05, "loss": 0.007, "step": 183420 }, { "epoch": 5.670087160783829, "grad_norm": 0.10269911587238312, "learning_rate": 1.7165930024108302e-05, "loss": 0.0074, "step": 183450 }, { "epoch": 5.671014403164987, "grad_norm": 0.13819533586502075, "learning_rate": 1.7165466402917724e-05, "loss": 0.007, "step": 183480 }, { "epoch": 5.671941645546146, "grad_norm": 0.13330134749412537, "learning_rate": 1.7165002781727145e-05, "loss": 0.0064, "step": 183510 }, { "epoch": 5.672868887927304, "grad_norm": 0.10203659534454346, "learning_rate": 1.7164539160536567e-05, "loss": 0.007, "step": 183540 }, { "epoch": 5.673796130308463, "grad_norm": 0.10540967434644699, "learning_rate": 1.7164075539345985e-05, "loss": 0.0065, "step": 183570 }, { "epoch": 5.674723372689621, "grad_norm": 0.09420829266309738, "learning_rate": 1.7163611918155407e-05, "loss": 0.0075, "step": 183600 }, { "epoch": 5.675650615070779, "grad_norm": 0.11112627387046814, "learning_rate": 1.7163148296964828e-05, "loss": 0.0072, "step": 183630 }, { "epoch": 5.676577857451938, "grad_norm": 0.06861045211553574, "learning_rate": 1.716268467577425e-05, "loss": 0.0074, "step": 183660 }, { "epoch": 5.6775050998330965, "grad_norm": 0.1220807358622551, "learning_rate": 1.716222105458367e-05, "loss": 0.0075, "step": 183690 }, { "epoch": 5.678432342214255, "grad_norm": 0.15092504024505615, "learning_rate": 1.7161757433393093e-05, "loss": 0.0072, "step": 183720 }, { "epoch": 5.679359584595414, "grad_norm": 0.14522984623908997, "learning_rate": 1.716129381220251e-05, "loss": 0.0081, "step": 183750 }, { "epoch": 5.680286826976571, "grad_norm": 0.10560116916894913, "learning_rate": 1.7160830191011932e-05, "loss": 0.0077, "step": 183780 }, { "epoch": 5.68121406935773, "grad_norm": 0.07510977983474731, "learning_rate": 1.716036656982135e-05, "loss": 0.0078, "step": 183810 }, { "epoch": 5.682141311738889, "grad_norm": 0.05682849511504173, "learning_rate": 1.7159902948630772e-05, "loss": 0.0065, "step": 183840 }, { "epoch": 5.683068554120047, "grad_norm": 0.12112375348806381, "learning_rate": 1.7159439327440193e-05, "loss": 0.0072, "step": 183870 }, { "epoch": 5.683995796501206, "grad_norm": 0.09391804784536362, "learning_rate": 1.7158975706249615e-05, "loss": 0.0064, "step": 183900 }, { "epoch": 5.6849230388823635, "grad_norm": 0.10764341801404953, "learning_rate": 1.7158512085059036e-05, "loss": 0.0069, "step": 183930 }, { "epoch": 5.685850281263522, "grad_norm": 0.15788595378398895, "learning_rate": 1.7158048463868458e-05, "loss": 0.0072, "step": 183960 }, { "epoch": 5.686777523644681, "grad_norm": 0.12170471251010895, "learning_rate": 1.7157584842677876e-05, "loss": 0.0072, "step": 183990 }, { "epoch": 5.687704766025839, "grad_norm": 0.10956704616546631, "learning_rate": 1.7157121221487297e-05, "loss": 0.0076, "step": 184020 }, { "epoch": 5.688632008406998, "grad_norm": 0.1291358470916748, "learning_rate": 1.715665760029672e-05, "loss": 0.0075, "step": 184050 }, { "epoch": 5.689559250788156, "grad_norm": 0.09972970932722092, "learning_rate": 1.715619397910614e-05, "loss": 0.0071, "step": 184080 }, { "epoch": 5.690486493169314, "grad_norm": 0.13144944608211517, "learning_rate": 1.7155730357915562e-05, "loss": 0.0075, "step": 184110 }, { "epoch": 5.691413735550473, "grad_norm": 0.16668231785297394, "learning_rate": 1.715526673672498e-05, "loss": 0.0064, "step": 184140 }, { "epoch": 5.692340977931631, "grad_norm": 0.10982945561408997, "learning_rate": 1.71548031155344e-05, "loss": 0.0076, "step": 184170 }, { "epoch": 5.69326822031279, "grad_norm": 0.11878916621208191, "learning_rate": 1.7154339494343823e-05, "loss": 0.0076, "step": 184200 }, { "epoch": 5.694195462693948, "grad_norm": 0.12469825148582458, "learning_rate": 1.715387587315324e-05, "loss": 0.0074, "step": 184230 }, { "epoch": 5.695122705075106, "grad_norm": 0.11651457101106644, "learning_rate": 1.7153412251962666e-05, "loss": 0.0073, "step": 184260 }, { "epoch": 5.696049947456265, "grad_norm": 0.15376465022563934, "learning_rate": 1.7152948630772088e-05, "loss": 0.0072, "step": 184290 }, { "epoch": 5.6969771898374235, "grad_norm": 0.12971344590187073, "learning_rate": 1.7152485009581506e-05, "loss": 0.0068, "step": 184320 }, { "epoch": 5.697904432218582, "grad_norm": 0.12225448340177536, "learning_rate": 1.7152021388390927e-05, "loss": 0.0073, "step": 184350 }, { "epoch": 5.698831674599741, "grad_norm": 0.14872001111507416, "learning_rate": 1.715155776720035e-05, "loss": 0.0068, "step": 184380 }, { "epoch": 5.699758916980898, "grad_norm": 0.08625183254480362, "learning_rate": 1.7151094146009767e-05, "loss": 0.0068, "step": 184410 }, { "epoch": 5.700686159362057, "grad_norm": 0.12203576415777206, "learning_rate": 1.715063052481919e-05, "loss": 0.007, "step": 184440 }, { "epoch": 5.701613401743216, "grad_norm": 0.09302552789449692, "learning_rate": 1.715016690362861e-05, "loss": 0.0075, "step": 184470 }, { "epoch": 5.702540644124374, "grad_norm": 0.09899160265922546, "learning_rate": 1.714970328243803e-05, "loss": 0.007, "step": 184500 }, { "epoch": 5.703467886505533, "grad_norm": 0.13183511793613434, "learning_rate": 1.7149239661247453e-05, "loss": 0.0073, "step": 184530 }, { "epoch": 5.704395128886691, "grad_norm": 0.10566695779561996, "learning_rate": 1.714877604005687e-05, "loss": 0.0074, "step": 184560 }, { "epoch": 5.705322371267849, "grad_norm": 0.11507317423820496, "learning_rate": 1.7148312418866293e-05, "loss": 0.0078, "step": 184590 }, { "epoch": 5.706249613649008, "grad_norm": 0.14391621947288513, "learning_rate": 1.7147848797675714e-05, "loss": 0.0069, "step": 184620 }, { "epoch": 5.707176856030166, "grad_norm": 0.11702164262533188, "learning_rate": 1.7147385176485136e-05, "loss": 0.0072, "step": 184650 }, { "epoch": 5.708104098411325, "grad_norm": 0.11081768572330475, "learning_rate": 1.7146921555294557e-05, "loss": 0.0074, "step": 184680 }, { "epoch": 5.7090313407924835, "grad_norm": 0.07189731299877167, "learning_rate": 1.714645793410398e-05, "loss": 0.0073, "step": 184710 }, { "epoch": 5.709958583173641, "grad_norm": 0.13133084774017334, "learning_rate": 1.7145994312913397e-05, "loss": 0.0078, "step": 184740 }, { "epoch": 5.7108858255548, "grad_norm": 0.09723467379808426, "learning_rate": 1.7145530691722818e-05, "loss": 0.0076, "step": 184770 }, { "epoch": 5.711813067935958, "grad_norm": 0.1611168533563614, "learning_rate": 1.7145067070532236e-05, "loss": 0.007, "step": 184800 }, { "epoch": 5.712740310317117, "grad_norm": 0.11809813976287842, "learning_rate": 1.7144603449341658e-05, "loss": 0.0068, "step": 184830 }, { "epoch": 5.713667552698276, "grad_norm": 0.11982464045286179, "learning_rate": 1.714413982815108e-05, "loss": 0.0071, "step": 184860 }, { "epoch": 5.714594795079433, "grad_norm": 0.15156270563602448, "learning_rate": 1.71436762069605e-05, "loss": 0.0073, "step": 184890 }, { "epoch": 5.715522037460592, "grad_norm": 0.12821529805660248, "learning_rate": 1.7143212585769922e-05, "loss": 0.0073, "step": 184920 }, { "epoch": 5.7164492798417506, "grad_norm": 0.06708837300539017, "learning_rate": 1.7142748964579344e-05, "loss": 0.0076, "step": 184950 }, { "epoch": 5.717376522222909, "grad_norm": 0.1537117063999176, "learning_rate": 1.7142285343388762e-05, "loss": 0.008, "step": 184980 }, { "epoch": 5.718303764604068, "grad_norm": 0.1401980072259903, "learning_rate": 1.7141821722198183e-05, "loss": 0.0071, "step": 185010 }, { "epoch": 5.7192310069852255, "grad_norm": 0.16113023459911346, "learning_rate": 1.7141358101007605e-05, "loss": 0.0067, "step": 185040 }, { "epoch": 5.720158249366384, "grad_norm": 0.11921516805887222, "learning_rate": 1.7140894479817026e-05, "loss": 0.0069, "step": 185070 }, { "epoch": 5.721085491747543, "grad_norm": 0.08027754724025726, "learning_rate": 1.7140430858626448e-05, "loss": 0.0068, "step": 185100 }, { "epoch": 5.722012734128701, "grad_norm": 0.12868282198905945, "learning_rate": 1.7139967237435866e-05, "loss": 0.0078, "step": 185130 }, { "epoch": 5.72293997650986, "grad_norm": 0.1565908044576645, "learning_rate": 1.7139503616245288e-05, "loss": 0.007, "step": 185160 }, { "epoch": 5.7238672188910185, "grad_norm": 0.09382166713476181, "learning_rate": 1.713903999505471e-05, "loss": 0.0073, "step": 185190 }, { "epoch": 5.724794461272176, "grad_norm": 0.12760759890079498, "learning_rate": 1.7138576373864127e-05, "loss": 0.0077, "step": 185220 }, { "epoch": 5.725721703653335, "grad_norm": 0.10029327869415283, "learning_rate": 1.713811275267355e-05, "loss": 0.0071, "step": 185250 }, { "epoch": 5.726648946034493, "grad_norm": 0.0899726003408432, "learning_rate": 1.713764913148297e-05, "loss": 0.0067, "step": 185280 }, { "epoch": 5.727576188415652, "grad_norm": 0.16067565977573395, "learning_rate": 1.7137185510292392e-05, "loss": 0.0072, "step": 185310 }, { "epoch": 5.728503430796811, "grad_norm": 0.13967162370681763, "learning_rate": 1.7136721889101813e-05, "loss": 0.0076, "step": 185340 }, { "epoch": 5.729430673177969, "grad_norm": 0.15621495246887207, "learning_rate": 1.7136258267911235e-05, "loss": 0.0073, "step": 185370 }, { "epoch": 5.730357915559127, "grad_norm": 0.11132778972387314, "learning_rate": 1.7135794646720653e-05, "loss": 0.007, "step": 185400 }, { "epoch": 5.7312851579402855, "grad_norm": 0.10825181007385254, "learning_rate": 1.7135331025530074e-05, "loss": 0.0069, "step": 185430 }, { "epoch": 5.732212400321444, "grad_norm": 0.16358038783073425, "learning_rate": 1.7134867404339496e-05, "loss": 0.0072, "step": 185460 }, { "epoch": 5.733139642702603, "grad_norm": 0.1147538423538208, "learning_rate": 1.7134403783148917e-05, "loss": 0.0074, "step": 185490 }, { "epoch": 5.734066885083761, "grad_norm": 0.07761052250862122, "learning_rate": 1.713394016195834e-05, "loss": 0.0066, "step": 185520 }, { "epoch": 5.734994127464919, "grad_norm": 0.08791635185480118, "learning_rate": 1.7133476540767757e-05, "loss": 0.0069, "step": 185550 }, { "epoch": 5.735921369846078, "grad_norm": 0.10466460883617401, "learning_rate": 1.713301291957718e-05, "loss": 0.0067, "step": 185580 }, { "epoch": 5.736848612227236, "grad_norm": 0.17459365725517273, "learning_rate": 1.71325492983866e-05, "loss": 0.0076, "step": 185610 }, { "epoch": 5.737775854608395, "grad_norm": 0.12738969922065735, "learning_rate": 1.7132085677196018e-05, "loss": 0.0079, "step": 185640 }, { "epoch": 5.738703096989553, "grad_norm": 0.10725262016057968, "learning_rate": 1.7131622056005443e-05, "loss": 0.0071, "step": 185670 }, { "epoch": 5.739630339370711, "grad_norm": 0.13746201992034912, "learning_rate": 1.7131158434814865e-05, "loss": 0.007, "step": 185700 }, { "epoch": 5.74055758175187, "grad_norm": 0.17950303852558136, "learning_rate": 1.7130694813624283e-05, "loss": 0.007, "step": 185730 }, { "epoch": 5.741484824133028, "grad_norm": 0.10899720340967178, "learning_rate": 1.7130231192433704e-05, "loss": 0.0069, "step": 185760 }, { "epoch": 5.742412066514187, "grad_norm": 0.14693060517311096, "learning_rate": 1.7129767571243122e-05, "loss": 0.0072, "step": 185790 }, { "epoch": 5.7433393088953455, "grad_norm": 0.11318731307983398, "learning_rate": 1.7129303950052544e-05, "loss": 0.0069, "step": 185820 }, { "epoch": 5.744266551276503, "grad_norm": 0.23740065097808838, "learning_rate": 1.7128840328861965e-05, "loss": 0.0073, "step": 185850 }, { "epoch": 5.745193793657662, "grad_norm": 0.10476627200841904, "learning_rate": 1.7128376707671387e-05, "loss": 0.007, "step": 185880 }, { "epoch": 5.74612103603882, "grad_norm": 0.1148785725235939, "learning_rate": 1.7127913086480808e-05, "loss": 0.0078, "step": 185910 }, { "epoch": 5.747048278419979, "grad_norm": 0.168053537607193, "learning_rate": 1.712744946529023e-05, "loss": 0.007, "step": 185940 }, { "epoch": 5.747975520801138, "grad_norm": 0.1399369090795517, "learning_rate": 1.7126985844099648e-05, "loss": 0.007, "step": 185970 }, { "epoch": 5.748902763182296, "grad_norm": 0.10081921517848969, "learning_rate": 1.712652222290907e-05, "loss": 0.0072, "step": 186000 }, { "epoch": 5.749830005563454, "grad_norm": 0.17799629271030426, "learning_rate": 1.712605860171849e-05, "loss": 0.0071, "step": 186030 }, { "epoch": 5.7507572479446125, "grad_norm": 0.12803830206394196, "learning_rate": 1.7125594980527912e-05, "loss": 0.0076, "step": 186060 }, { "epoch": 5.751684490325771, "grad_norm": 0.15963773429393768, "learning_rate": 1.7125131359337334e-05, "loss": 0.0074, "step": 186090 }, { "epoch": 5.75261173270693, "grad_norm": 0.14569224417209625, "learning_rate": 1.7124667738146752e-05, "loss": 0.0076, "step": 186120 }, { "epoch": 5.753538975088088, "grad_norm": 0.1054210364818573, "learning_rate": 1.7124204116956174e-05, "loss": 0.0072, "step": 186150 }, { "epoch": 5.754466217469247, "grad_norm": 0.12715327739715576, "learning_rate": 1.7123740495765595e-05, "loss": 0.0076, "step": 186180 }, { "epoch": 5.755393459850405, "grad_norm": 0.160819873213768, "learning_rate": 1.7123276874575013e-05, "loss": 0.0068, "step": 186210 }, { "epoch": 5.756320702231563, "grad_norm": 0.13466067612171173, "learning_rate": 1.7122813253384435e-05, "loss": 0.0069, "step": 186240 }, { "epoch": 5.757247944612722, "grad_norm": 0.10028139501810074, "learning_rate": 1.7122349632193856e-05, "loss": 0.0075, "step": 186270 }, { "epoch": 5.75817518699388, "grad_norm": 0.12241511046886444, "learning_rate": 1.7121886011003278e-05, "loss": 0.0068, "step": 186300 }, { "epoch": 5.759102429375039, "grad_norm": 0.1258196383714676, "learning_rate": 1.71214223898127e-05, "loss": 0.0073, "step": 186330 }, { "epoch": 5.760029671756197, "grad_norm": 0.12431927025318146, "learning_rate": 1.712095876862212e-05, "loss": 0.0073, "step": 186360 }, { "epoch": 5.760956914137355, "grad_norm": 0.142597958445549, "learning_rate": 1.712049514743154e-05, "loss": 0.0068, "step": 186390 }, { "epoch": 5.761884156518514, "grad_norm": 0.17555609345436096, "learning_rate": 1.712003152624096e-05, "loss": 0.0073, "step": 186420 }, { "epoch": 5.7628113988996725, "grad_norm": 0.09266341477632523, "learning_rate": 1.7119567905050382e-05, "loss": 0.0072, "step": 186450 }, { "epoch": 5.763738641280831, "grad_norm": 0.13160836696624756, "learning_rate": 1.7119104283859803e-05, "loss": 0.0074, "step": 186480 }, { "epoch": 5.764665883661989, "grad_norm": 0.22970740497112274, "learning_rate": 1.7118640662669225e-05, "loss": 0.0071, "step": 186510 }, { "epoch": 5.765593126043147, "grad_norm": 0.18511654436588287, "learning_rate": 1.7118177041478643e-05, "loss": 0.007, "step": 186540 }, { "epoch": 5.766520368424306, "grad_norm": 0.09283715486526489, "learning_rate": 1.7117713420288064e-05, "loss": 0.0062, "step": 186570 }, { "epoch": 5.767447610805465, "grad_norm": 0.14527177810668945, "learning_rate": 1.7117249799097486e-05, "loss": 0.0077, "step": 186600 }, { "epoch": 5.768374853186623, "grad_norm": 0.11340104788541794, "learning_rate": 1.7116786177906904e-05, "loss": 0.0076, "step": 186630 }, { "epoch": 5.769302095567781, "grad_norm": 0.1122056320309639, "learning_rate": 1.7116322556716326e-05, "loss": 0.0073, "step": 186660 }, { "epoch": 5.7702293379489396, "grad_norm": 0.141196608543396, "learning_rate": 1.7115858935525747e-05, "loss": 0.007, "step": 186690 }, { "epoch": 5.771156580330098, "grad_norm": 0.11708498001098633, "learning_rate": 1.711539531433517e-05, "loss": 0.0068, "step": 186720 }, { "epoch": 5.772083822711257, "grad_norm": 0.12659353017807007, "learning_rate": 1.711493169314459e-05, "loss": 0.0074, "step": 186750 }, { "epoch": 5.773011065092415, "grad_norm": 0.19809108972549438, "learning_rate": 1.7114483525993698e-05, "loss": 0.0081, "step": 186780 }, { "epoch": 5.773938307473574, "grad_norm": 0.07702194899320602, "learning_rate": 1.7114019904803116e-05, "loss": 0.0071, "step": 186810 }, { "epoch": 5.774865549854732, "grad_norm": 0.1188855916261673, "learning_rate": 1.7113556283612538e-05, "loss": 0.0078, "step": 186840 }, { "epoch": 5.77579279223589, "grad_norm": 0.12478522956371307, "learning_rate": 1.711309266242196e-05, "loss": 0.007, "step": 186870 }, { "epoch": 5.776720034617049, "grad_norm": 0.36138710379600525, "learning_rate": 1.7112629041231377e-05, "loss": 0.0075, "step": 186900 }, { "epoch": 5.7776472769982075, "grad_norm": 0.13456764817237854, "learning_rate": 1.7112165420040802e-05, "loss": 0.0072, "step": 186930 }, { "epoch": 5.778574519379366, "grad_norm": 0.06297002732753754, "learning_rate": 1.7111717252889907e-05, "loss": 0.0071, "step": 186960 }, { "epoch": 5.779501761760525, "grad_norm": 0.11867687851190567, "learning_rate": 1.7111253631699328e-05, "loss": 0.0073, "step": 186990 }, { "epoch": 5.780429004141682, "grad_norm": 0.11798860877752304, "learning_rate": 1.711079001050875e-05, "loss": 0.0065, "step": 187020 }, { "epoch": 5.781356246522841, "grad_norm": 0.15957435965538025, "learning_rate": 1.7110326389318168e-05, "loss": 0.0075, "step": 187050 }, { "epoch": 5.782283488904, "grad_norm": 0.15635798871517181, "learning_rate": 1.710986276812759e-05, "loss": 0.0078, "step": 187080 }, { "epoch": 5.783210731285158, "grad_norm": 0.07201837003231049, "learning_rate": 1.710939914693701e-05, "loss": 0.0076, "step": 187110 }, { "epoch": 5.784137973666317, "grad_norm": 0.07174143195152283, "learning_rate": 1.7108935525746432e-05, "loss": 0.0068, "step": 187140 }, { "epoch": 5.7850652160474745, "grad_norm": 0.16894395649433136, "learning_rate": 1.7108471904555854e-05, "loss": 0.0065, "step": 187170 }, { "epoch": 5.785992458428633, "grad_norm": 0.17702922224998474, "learning_rate": 1.7108008283365275e-05, "loss": 0.0075, "step": 187200 }, { "epoch": 5.786919700809792, "grad_norm": 0.14807310700416565, "learning_rate": 1.7107544662174693e-05, "loss": 0.0067, "step": 187230 }, { "epoch": 5.78784694319095, "grad_norm": 0.10182681679725647, "learning_rate": 1.7107081040984115e-05, "loss": 0.0065, "step": 187260 }, { "epoch": 5.788774185572109, "grad_norm": 0.12383129447698593, "learning_rate": 1.7106617419793536e-05, "loss": 0.0068, "step": 187290 }, { "epoch": 5.789701427953267, "grad_norm": 0.12937931716442108, "learning_rate": 1.7106153798602955e-05, "loss": 0.0072, "step": 187320 }, { "epoch": 5.790628670334425, "grad_norm": 0.2534468173980713, "learning_rate": 1.7105690177412376e-05, "loss": 0.0065, "step": 187350 }, { "epoch": 5.791555912715584, "grad_norm": 0.3080061972141266, "learning_rate": 1.7105226556221798e-05, "loss": 0.0068, "step": 187380 }, { "epoch": 5.792483155096742, "grad_norm": 0.08666443079710007, "learning_rate": 1.710476293503122e-05, "loss": 0.0068, "step": 187410 }, { "epoch": 5.793410397477901, "grad_norm": 0.12736362218856812, "learning_rate": 1.710429931384064e-05, "loss": 0.007, "step": 187440 }, { "epoch": 5.794337639859059, "grad_norm": 0.16053153574466705, "learning_rate": 1.710383569265006e-05, "loss": 0.0074, "step": 187470 }, { "epoch": 5.795264882240217, "grad_norm": 0.09985247999429703, "learning_rate": 1.710337207145948e-05, "loss": 0.0071, "step": 187500 }, { "epoch": 5.796192124621376, "grad_norm": 0.13069485127925873, "learning_rate": 1.7102908450268902e-05, "loss": 0.0073, "step": 187530 }, { "epoch": 5.7971193670025345, "grad_norm": 0.11728464812040329, "learning_rate": 1.7102444829078323e-05, "loss": 0.0077, "step": 187560 }, { "epoch": 5.798046609383693, "grad_norm": 0.20301200449466705, "learning_rate": 1.7101981207887745e-05, "loss": 0.0074, "step": 187590 }, { "epoch": 5.798973851764852, "grad_norm": 0.09453226625919342, "learning_rate": 1.7101517586697166e-05, "loss": 0.0077, "step": 187620 }, { "epoch": 5.799901094146009, "grad_norm": 0.11990011483430862, "learning_rate": 1.7101053965506584e-05, "loss": 0.0074, "step": 187650 }, { "epoch": 5.800828336527168, "grad_norm": 0.1399373710155487, "learning_rate": 1.7100590344316006e-05, "loss": 0.0068, "step": 187680 }, { "epoch": 5.801755578908327, "grad_norm": 0.12770456075668335, "learning_rate": 1.7100126723125424e-05, "loss": 0.0071, "step": 187710 }, { "epoch": 5.802682821289485, "grad_norm": 0.08968063443899155, "learning_rate": 1.7099663101934846e-05, "loss": 0.0075, "step": 187740 }, { "epoch": 5.803610063670644, "grad_norm": 0.10864236950874329, "learning_rate": 1.7099199480744267e-05, "loss": 0.0068, "step": 187770 }, { "epoch": 5.804537306051802, "grad_norm": 0.1544281542301178, "learning_rate": 1.709873585955369e-05, "loss": 0.0077, "step": 187800 }, { "epoch": 5.80546454843296, "grad_norm": 0.13630712032318115, "learning_rate": 1.709827223836311e-05, "loss": 0.0069, "step": 187830 }, { "epoch": 5.806391790814119, "grad_norm": 0.11688698083162308, "learning_rate": 1.709780861717253e-05, "loss": 0.0076, "step": 187860 }, { "epoch": 5.807319033195277, "grad_norm": 0.12585562467575073, "learning_rate": 1.709734499598195e-05, "loss": 0.0075, "step": 187890 }, { "epoch": 5.808246275576436, "grad_norm": 0.1725558489561081, "learning_rate": 1.709688137479137e-05, "loss": 0.007, "step": 187920 }, { "epoch": 5.8091735179575945, "grad_norm": 0.13622064888477325, "learning_rate": 1.7096417753600793e-05, "loss": 0.0069, "step": 187950 }, { "epoch": 5.810100760338752, "grad_norm": 0.12937162816524506, "learning_rate": 1.7095954132410214e-05, "loss": 0.0076, "step": 187980 }, { "epoch": 5.811028002719911, "grad_norm": 0.10157639533281326, "learning_rate": 1.7095490511219636e-05, "loss": 0.0063, "step": 188010 }, { "epoch": 5.811955245101069, "grad_norm": 0.1518232226371765, "learning_rate": 1.7095026890029054e-05, "loss": 0.0078, "step": 188040 }, { "epoch": 5.812882487482228, "grad_norm": 0.2066008448600769, "learning_rate": 1.7094563268838475e-05, "loss": 0.0076, "step": 188070 }, { "epoch": 5.813809729863387, "grad_norm": 0.117085762321949, "learning_rate": 1.7094099647647897e-05, "loss": 0.0071, "step": 188100 }, { "epoch": 5.814736972244544, "grad_norm": 0.13454343378543854, "learning_rate": 1.7093636026457315e-05, "loss": 0.0071, "step": 188130 }, { "epoch": 5.815664214625703, "grad_norm": 0.11480982601642609, "learning_rate": 1.7093172405266736e-05, "loss": 0.0079, "step": 188160 }, { "epoch": 5.8165914570068615, "grad_norm": 0.10861824452877045, "learning_rate": 1.7092708784076158e-05, "loss": 0.0074, "step": 188190 }, { "epoch": 5.81751869938802, "grad_norm": 0.09225627034902573, "learning_rate": 1.709224516288558e-05, "loss": 0.0069, "step": 188220 }, { "epoch": 5.818445941769179, "grad_norm": 0.1066984310746193, "learning_rate": 1.7091781541695e-05, "loss": 0.0074, "step": 188250 }, { "epoch": 5.819373184150336, "grad_norm": 0.19487114250659943, "learning_rate": 1.7091317920504422e-05, "loss": 0.0075, "step": 188280 }, { "epoch": 5.820300426531495, "grad_norm": 0.11208205670118332, "learning_rate": 1.709085429931384e-05, "loss": 0.0074, "step": 188310 }, { "epoch": 5.821227668912654, "grad_norm": 0.08845870196819305, "learning_rate": 1.7090390678123262e-05, "loss": 0.0073, "step": 188340 }, { "epoch": 5.822154911293812, "grad_norm": 0.10164837539196014, "learning_rate": 1.7089927056932684e-05, "loss": 0.0065, "step": 188370 }, { "epoch": 5.823082153674971, "grad_norm": 0.14542162418365479, "learning_rate": 1.7089463435742105e-05, "loss": 0.0074, "step": 188400 }, { "epoch": 5.824009396056129, "grad_norm": 0.08812384307384491, "learning_rate": 1.7088999814551527e-05, "loss": 0.0074, "step": 188430 }, { "epoch": 5.824936638437287, "grad_norm": 0.1161966323852539, "learning_rate": 1.7088536193360945e-05, "loss": 0.0075, "step": 188460 }, { "epoch": 5.825863880818446, "grad_norm": 0.12436578422784805, "learning_rate": 1.7088072572170366e-05, "loss": 0.0069, "step": 188490 }, { "epoch": 5.826791123199604, "grad_norm": 0.12753719091415405, "learning_rate": 1.7087608950979788e-05, "loss": 0.0078, "step": 188520 }, { "epoch": 5.827718365580763, "grad_norm": 0.09070590883493423, "learning_rate": 1.708714532978921e-05, "loss": 0.0072, "step": 188550 }, { "epoch": 5.8286456079619215, "grad_norm": 0.16642595827579498, "learning_rate": 1.708668170859863e-05, "loss": 0.0071, "step": 188580 }, { "epoch": 5.82957285034308, "grad_norm": 0.11236762255430222, "learning_rate": 1.7086218087408052e-05, "loss": 0.0073, "step": 188610 }, { "epoch": 5.830500092724238, "grad_norm": 0.12393709272146225, "learning_rate": 1.708575446621747e-05, "loss": 0.0071, "step": 188640 }, { "epoch": 5.8314273351053965, "grad_norm": 0.11177992075681686, "learning_rate": 1.7085290845026892e-05, "loss": 0.0069, "step": 188670 }, { "epoch": 5.832354577486555, "grad_norm": 0.12917453050613403, "learning_rate": 1.7084827223836313e-05, "loss": 0.0067, "step": 188700 }, { "epoch": 5.833281819867714, "grad_norm": 0.1277666836977005, "learning_rate": 1.708436360264573e-05, "loss": 0.0073, "step": 188730 }, { "epoch": 5.834209062248872, "grad_norm": 0.10990093648433685, "learning_rate": 1.7083899981455153e-05, "loss": 0.0069, "step": 188760 }, { "epoch": 5.83513630463003, "grad_norm": 0.08756580203771591, "learning_rate": 1.7083436360264575e-05, "loss": 0.007, "step": 188790 }, { "epoch": 5.836063547011189, "grad_norm": 0.117132768034935, "learning_rate": 1.7082972739073996e-05, "loss": 0.0064, "step": 188820 }, { "epoch": 5.836990789392347, "grad_norm": 0.1366443932056427, "learning_rate": 1.7082509117883418e-05, "loss": 0.0071, "step": 188850 }, { "epoch": 5.837918031773506, "grad_norm": 0.10087288171052933, "learning_rate": 1.7082045496692836e-05, "loss": 0.007, "step": 188880 }, { "epoch": 5.838845274154664, "grad_norm": 0.12353358417749405, "learning_rate": 1.7081581875502257e-05, "loss": 0.0075, "step": 188910 }, { "epoch": 5.839772516535822, "grad_norm": 0.11250976473093033, "learning_rate": 1.708111825431168e-05, "loss": 0.0073, "step": 188940 }, { "epoch": 5.840699758916981, "grad_norm": 0.16858093440532684, "learning_rate": 1.70806546331211e-05, "loss": 0.0069, "step": 188970 }, { "epoch": 5.841627001298139, "grad_norm": 0.15400761365890503, "learning_rate": 1.708019101193052e-05, "loss": 0.007, "step": 189000 }, { "epoch": 5.842554243679298, "grad_norm": 0.15272875130176544, "learning_rate": 1.7079727390739943e-05, "loss": 0.007, "step": 189030 }, { "epoch": 5.8434814860604565, "grad_norm": 0.1257394552230835, "learning_rate": 1.707926376954936e-05, "loss": 0.007, "step": 189060 }, { "epoch": 5.844408728441614, "grad_norm": 0.132265105843544, "learning_rate": 1.7078800148358783e-05, "loss": 0.0071, "step": 189090 }, { "epoch": 5.845335970822773, "grad_norm": 0.12425919622182846, "learning_rate": 1.70783365271682e-05, "loss": 0.0073, "step": 189120 }, { "epoch": 5.846263213203931, "grad_norm": 0.1351500153541565, "learning_rate": 1.7077872905977622e-05, "loss": 0.0069, "step": 189150 }, { "epoch": 5.84719045558509, "grad_norm": 0.09720392525196075, "learning_rate": 1.7077409284787044e-05, "loss": 0.0073, "step": 189180 }, { "epoch": 5.848117697966249, "grad_norm": 0.11125524342060089, "learning_rate": 1.7076945663596465e-05, "loss": 0.0076, "step": 189210 }, { "epoch": 5.849044940347407, "grad_norm": 0.11685994267463684, "learning_rate": 1.7076482042405887e-05, "loss": 0.0074, "step": 189240 }, { "epoch": 5.849972182728565, "grad_norm": 0.19406621158123016, "learning_rate": 1.707601842121531e-05, "loss": 0.0069, "step": 189270 }, { "epoch": 5.8508994251097235, "grad_norm": 0.07202266901731491, "learning_rate": 1.7075554800024727e-05, "loss": 0.0067, "step": 189300 }, { "epoch": 5.851826667490882, "grad_norm": 0.11865735799074173, "learning_rate": 1.7075091178834148e-05, "loss": 0.0069, "step": 189330 }, { "epoch": 5.852753909872041, "grad_norm": 0.11521381884813309, "learning_rate": 1.707462755764357e-05, "loss": 0.0075, "step": 189360 }, { "epoch": 5.853681152253199, "grad_norm": 0.12530028820037842, "learning_rate": 1.707416393645299e-05, "loss": 0.0073, "step": 189390 }, { "epoch": 5.854608394634358, "grad_norm": 0.13997775316238403, "learning_rate": 1.7073700315262413e-05, "loss": 0.0069, "step": 189420 }, { "epoch": 5.855535637015516, "grad_norm": 0.11785058677196503, "learning_rate": 1.707323669407183e-05, "loss": 0.0071, "step": 189450 }, { "epoch": 5.856462879396674, "grad_norm": 0.11826924979686737, "learning_rate": 1.7072773072881252e-05, "loss": 0.0068, "step": 189480 }, { "epoch": 5.857390121777833, "grad_norm": 0.0826655849814415, "learning_rate": 1.7072309451690674e-05, "loss": 0.0071, "step": 189510 }, { "epoch": 5.858317364158991, "grad_norm": 0.17849351465702057, "learning_rate": 1.7071845830500092e-05, "loss": 0.0075, "step": 189540 }, { "epoch": 5.85924460654015, "grad_norm": 0.08640854805707932, "learning_rate": 1.7071382209309513e-05, "loss": 0.0071, "step": 189570 }, { "epoch": 5.860171848921308, "grad_norm": 0.12113334983587265, "learning_rate": 1.7070918588118935e-05, "loss": 0.0072, "step": 189600 }, { "epoch": 5.861099091302466, "grad_norm": 0.12444142252206802, "learning_rate": 1.7070454966928356e-05, "loss": 0.0073, "step": 189630 }, { "epoch": 5.862026333683625, "grad_norm": 0.15928363800048828, "learning_rate": 1.7070006799777464e-05, "loss": 0.0072, "step": 189660 }, { "epoch": 5.8629535760647835, "grad_norm": 0.14125826954841614, "learning_rate": 1.7069543178586886e-05, "loss": 0.0074, "step": 189690 }, { "epoch": 5.863880818445942, "grad_norm": 0.12009206414222717, "learning_rate": 1.7069079557396304e-05, "loss": 0.0069, "step": 189720 }, { "epoch": 5.8648080608271, "grad_norm": 0.1450047641992569, "learning_rate": 1.7068615936205725e-05, "loss": 0.0069, "step": 189750 }, { "epoch": 5.865735303208258, "grad_norm": 0.14754226803779602, "learning_rate": 1.7068152315015147e-05, "loss": 0.0076, "step": 189780 }, { "epoch": 5.866662545589417, "grad_norm": 0.11472117900848389, "learning_rate": 1.706768869382457e-05, "loss": 0.0066, "step": 189810 }, { "epoch": 5.867589787970576, "grad_norm": 0.11231469362974167, "learning_rate": 1.706722507263399e-05, "loss": 0.0071, "step": 189840 }, { "epoch": 5.868517030351734, "grad_norm": 0.12314298003911972, "learning_rate": 1.7066761451443408e-05, "loss": 0.0064, "step": 189870 }, { "epoch": 5.869444272732892, "grad_norm": 0.10534258931875229, "learning_rate": 1.706629783025283e-05, "loss": 0.0071, "step": 189900 }, { "epoch": 5.8703715151140505, "grad_norm": 0.08061770349740982, "learning_rate": 1.706583420906225e-05, "loss": 0.0072, "step": 189930 }, { "epoch": 5.871298757495209, "grad_norm": 0.09185560047626495, "learning_rate": 1.706537058787167e-05, "loss": 0.0074, "step": 189960 }, { "epoch": 5.872225999876368, "grad_norm": 0.1376512199640274, "learning_rate": 1.706490696668109e-05, "loss": 0.0074, "step": 189990 }, { "epoch": 5.873153242257526, "grad_norm": 0.10092996805906296, "learning_rate": 1.7064443345490512e-05, "loss": 0.0077, "step": 190020 }, { "epoch": 5.874080484638685, "grad_norm": 0.11000051349401474, "learning_rate": 1.7063979724299934e-05, "loss": 0.0073, "step": 190050 }, { "epoch": 5.875007727019843, "grad_norm": 0.11492660641670227, "learning_rate": 1.7063516103109355e-05, "loss": 0.0072, "step": 190080 }, { "epoch": 5.875934969401001, "grad_norm": 0.19901436567306519, "learning_rate": 1.7063052481918777e-05, "loss": 0.0071, "step": 190110 }, { "epoch": 5.87686221178216, "grad_norm": 0.08786620199680328, "learning_rate": 1.7062588860728195e-05, "loss": 0.0071, "step": 190140 }, { "epoch": 5.877789454163318, "grad_norm": 0.15439173579216003, "learning_rate": 1.7062125239537616e-05, "loss": 0.0067, "step": 190170 }, { "epoch": 5.878716696544477, "grad_norm": 0.12273036688566208, "learning_rate": 1.7061661618347038e-05, "loss": 0.0074, "step": 190200 }, { "epoch": 5.879643938925636, "grad_norm": 0.13475558161735535, "learning_rate": 1.706119799715646e-05, "loss": 0.0069, "step": 190230 }, { "epoch": 5.880571181306793, "grad_norm": 0.1391417384147644, "learning_rate": 1.706073437596588e-05, "loss": 0.0068, "step": 190260 }, { "epoch": 5.881498423687952, "grad_norm": 0.1423911452293396, "learning_rate": 1.70602707547753e-05, "loss": 0.0077, "step": 190290 }, { "epoch": 5.8824256660691105, "grad_norm": 0.20636068284511566, "learning_rate": 1.705980713358472e-05, "loss": 0.0069, "step": 190320 }, { "epoch": 5.883352908450269, "grad_norm": 0.12404841929674149, "learning_rate": 1.7059343512394142e-05, "loss": 0.0072, "step": 190350 }, { "epoch": 5.884280150831428, "grad_norm": 0.11812005192041397, "learning_rate": 1.705887989120356e-05, "loss": 0.0065, "step": 190380 }, { "epoch": 5.8852073932125855, "grad_norm": 0.1529613435268402, "learning_rate": 1.705841627001298e-05, "loss": 0.0066, "step": 190410 }, { "epoch": 5.886134635593744, "grad_norm": 0.15586185455322266, "learning_rate": 1.7057952648822403e-05, "loss": 0.0073, "step": 190440 }, { "epoch": 5.887061877974903, "grad_norm": 0.12257947027683258, "learning_rate": 1.7057489027631825e-05, "loss": 0.0072, "step": 190470 }, { "epoch": 5.887989120356061, "grad_norm": 0.08669926971197128, "learning_rate": 1.7057025406441246e-05, "loss": 0.0074, "step": 190500 }, { "epoch": 5.88891636273722, "grad_norm": 0.11951444298028946, "learning_rate": 1.7056561785250664e-05, "loss": 0.0064, "step": 190530 }, { "epoch": 5.889843605118378, "grad_norm": 0.1393895000219345, "learning_rate": 1.7056098164060086e-05, "loss": 0.0068, "step": 190560 }, { "epoch": 5.890770847499536, "grad_norm": 0.09776974469423294, "learning_rate": 1.7055634542869507e-05, "loss": 0.0067, "step": 190590 }, { "epoch": 5.891698089880695, "grad_norm": 0.15952545404434204, "learning_rate": 1.705517092167893e-05, "loss": 0.0079, "step": 190620 }, { "epoch": 5.892625332261853, "grad_norm": 0.10641151666641235, "learning_rate": 1.705470730048835e-05, "loss": 0.0066, "step": 190650 }, { "epoch": 5.893552574643012, "grad_norm": 0.16283823549747467, "learning_rate": 1.7054243679297772e-05, "loss": 0.007, "step": 190680 }, { "epoch": 5.89447981702417, "grad_norm": 0.12092981487512589, "learning_rate": 1.705378005810719e-05, "loss": 0.0081, "step": 190710 }, { "epoch": 5.895407059405328, "grad_norm": 0.1362561583518982, "learning_rate": 1.705331643691661e-05, "loss": 0.0076, "step": 190740 }, { "epoch": 5.896334301786487, "grad_norm": 0.13907112181186676, "learning_rate": 1.7052852815726033e-05, "loss": 0.0071, "step": 190770 }, { "epoch": 5.8972615441676455, "grad_norm": 0.09743884950876236, "learning_rate": 1.705238919453545e-05, "loss": 0.0066, "step": 190800 }, { "epoch": 5.898188786548804, "grad_norm": 0.0851893201470375, "learning_rate": 1.7051925573344873e-05, "loss": 0.0063, "step": 190830 }, { "epoch": 5.899116028929963, "grad_norm": 0.12101925909519196, "learning_rate": 1.7051461952154294e-05, "loss": 0.0067, "step": 190860 }, { "epoch": 5.90004327131112, "grad_norm": 0.13668960332870483, "learning_rate": 1.7050998330963716e-05, "loss": 0.0075, "step": 190890 }, { "epoch": 5.900970513692279, "grad_norm": 0.1938113123178482, "learning_rate": 1.7050534709773137e-05, "loss": 0.0072, "step": 190920 }, { "epoch": 5.901897756073438, "grad_norm": 0.1392449289560318, "learning_rate": 1.7050071088582555e-05, "loss": 0.0068, "step": 190950 }, { "epoch": 5.902824998454596, "grad_norm": 0.07957901060581207, "learning_rate": 1.7049607467391977e-05, "loss": 0.0069, "step": 190980 }, { "epoch": 5.903752240835755, "grad_norm": 0.12315705418586731, "learning_rate": 1.7049143846201398e-05, "loss": 0.0072, "step": 191010 }, { "epoch": 5.904679483216913, "grad_norm": 0.12896795570850372, "learning_rate": 1.704868022501082e-05, "loss": 0.007, "step": 191040 }, { "epoch": 5.905606725598071, "grad_norm": 0.10398700088262558, "learning_rate": 1.704821660382024e-05, "loss": 0.0072, "step": 191070 }, { "epoch": 5.90653396797923, "grad_norm": 0.15213508903980255, "learning_rate": 1.7047752982629663e-05, "loss": 0.0068, "step": 191100 }, { "epoch": 5.907461210360388, "grad_norm": 0.11623694002628326, "learning_rate": 1.7047304815478767e-05, "loss": 0.0069, "step": 191130 }, { "epoch": 5.908388452741547, "grad_norm": 0.10181767493486404, "learning_rate": 1.704684119428819e-05, "loss": 0.0072, "step": 191160 }, { "epoch": 5.9093156951227055, "grad_norm": 0.12465579807758331, "learning_rate": 1.704637757309761e-05, "loss": 0.007, "step": 191190 }, { "epoch": 5.910242937503863, "grad_norm": 0.08456426858901978, "learning_rate": 1.704591395190703e-05, "loss": 0.0065, "step": 191220 }, { "epoch": 5.911170179885022, "grad_norm": 0.10180408507585526, "learning_rate": 1.704545033071645e-05, "loss": 0.0078, "step": 191250 }, { "epoch": 5.91209742226618, "grad_norm": 0.15928250551223755, "learning_rate": 1.704498670952587e-05, "loss": 0.0063, "step": 191280 }, { "epoch": 5.913024664647339, "grad_norm": 0.1395707130432129, "learning_rate": 1.7044523088335293e-05, "loss": 0.0068, "step": 191310 }, { "epoch": 5.913951907028498, "grad_norm": 0.11152388155460358, "learning_rate": 1.7044059467144714e-05, "loss": 0.0074, "step": 191340 }, { "epoch": 5.914879149409655, "grad_norm": 0.14502449333667755, "learning_rate": 1.7043595845954132e-05, "loss": 0.0073, "step": 191370 }, { "epoch": 5.915806391790814, "grad_norm": 0.13252057135105133, "learning_rate": 1.7043132224763554e-05, "loss": 0.0063, "step": 191400 }, { "epoch": 5.9167336341719725, "grad_norm": 0.12396277487277985, "learning_rate": 1.7042668603572975e-05, "loss": 0.007, "step": 191430 }, { "epoch": 5.917660876553131, "grad_norm": 0.14509528875350952, "learning_rate": 1.7042204982382397e-05, "loss": 0.0072, "step": 191460 }, { "epoch": 5.91858811893429, "grad_norm": 0.10232868790626526, "learning_rate": 1.704174136119182e-05, "loss": 0.0074, "step": 191490 }, { "epoch": 5.919515361315447, "grad_norm": 0.1273469775915146, "learning_rate": 1.704127774000124e-05, "loss": 0.0072, "step": 191520 }, { "epoch": 5.920442603696606, "grad_norm": 0.07732483744621277, "learning_rate": 1.7040814118810658e-05, "loss": 0.0074, "step": 191550 }, { "epoch": 5.921369846077765, "grad_norm": 0.12603873014450073, "learning_rate": 1.704035049762008e-05, "loss": 0.0075, "step": 191580 }, { "epoch": 5.922297088458923, "grad_norm": 0.09030661731958389, "learning_rate": 1.70398868764295e-05, "loss": 0.0077, "step": 191610 }, { "epoch": 5.923224330840082, "grad_norm": 0.12559868395328522, "learning_rate": 1.703942325523892e-05, "loss": 0.007, "step": 191640 }, { "epoch": 5.92415157322124, "grad_norm": 0.1956549882888794, "learning_rate": 1.703895963404834e-05, "loss": 0.0076, "step": 191670 }, { "epoch": 5.925078815602398, "grad_norm": 0.10884297639131546, "learning_rate": 1.7038496012857762e-05, "loss": 0.0071, "step": 191700 }, { "epoch": 5.926006057983557, "grad_norm": 0.1626385748386383, "learning_rate": 1.7038032391667184e-05, "loss": 0.0077, "step": 191730 }, { "epoch": 5.926933300364715, "grad_norm": 0.14710141718387604, "learning_rate": 1.7037568770476605e-05, "loss": 0.0068, "step": 191760 }, { "epoch": 5.927860542745874, "grad_norm": 0.15183384716510773, "learning_rate": 1.7037105149286023e-05, "loss": 0.0075, "step": 191790 }, { "epoch": 5.9287877851270325, "grad_norm": 0.10954813659191132, "learning_rate": 1.7036641528095445e-05, "loss": 0.0068, "step": 191820 }, { "epoch": 5.929715027508191, "grad_norm": 0.14806729555130005, "learning_rate": 1.7036177906904866e-05, "loss": 0.0068, "step": 191850 }, { "epoch": 5.930642269889349, "grad_norm": 0.19167622923851013, "learning_rate": 1.7035714285714288e-05, "loss": 0.007, "step": 191880 }, { "epoch": 5.931569512270507, "grad_norm": 0.16389474272727966, "learning_rate": 1.703525066452371e-05, "loss": 0.0075, "step": 191910 }, { "epoch": 5.932496754651666, "grad_norm": 0.1288822442293167, "learning_rate": 1.703478704333313e-05, "loss": 0.0081, "step": 191940 }, { "epoch": 5.933423997032825, "grad_norm": 0.1560802459716797, "learning_rate": 1.703432342214255e-05, "loss": 0.0074, "step": 191970 }, { "epoch": 5.934351239413983, "grad_norm": 0.10953033715486526, "learning_rate": 1.703385980095197e-05, "loss": 0.0068, "step": 192000 }, { "epoch": 5.935278481795141, "grad_norm": 0.08357425779104233, "learning_rate": 1.703339617976139e-05, "loss": 0.0075, "step": 192030 }, { "epoch": 5.9362057241762995, "grad_norm": 0.13773676753044128, "learning_rate": 1.703293255857081e-05, "loss": 0.007, "step": 192060 }, { "epoch": 5.937132966557458, "grad_norm": 0.07446298748254776, "learning_rate": 1.703246893738023e-05, "loss": 0.0072, "step": 192090 }, { "epoch": 5.938060208938617, "grad_norm": 0.13213622570037842, "learning_rate": 1.7032005316189653e-05, "loss": 0.0079, "step": 192120 }, { "epoch": 5.938987451319775, "grad_norm": 0.11471540480852127, "learning_rate": 1.7031541694999075e-05, "loss": 0.0066, "step": 192150 }, { "epoch": 5.939914693700933, "grad_norm": 0.15029895305633545, "learning_rate": 1.7031078073808496e-05, "loss": 0.0072, "step": 192180 }, { "epoch": 5.940841936082092, "grad_norm": 0.17423857748508453, "learning_rate": 1.7030614452617914e-05, "loss": 0.0067, "step": 192210 }, { "epoch": 5.94176917846325, "grad_norm": 0.11583422124385834, "learning_rate": 1.7030150831427336e-05, "loss": 0.0075, "step": 192240 }, { "epoch": 5.942696420844409, "grad_norm": 0.1020580381155014, "learning_rate": 1.7029687210236757e-05, "loss": 0.0065, "step": 192270 }, { "epoch": 5.9436236632255675, "grad_norm": 0.11284272372722626, "learning_rate": 1.702922358904618e-05, "loss": 0.0075, "step": 192300 }, { "epoch": 5.944550905606725, "grad_norm": 0.1355983018875122, "learning_rate": 1.70287599678556e-05, "loss": 0.0071, "step": 192330 }, { "epoch": 5.945478147987884, "grad_norm": 0.11959733068943024, "learning_rate": 1.702829634666502e-05, "loss": 0.0068, "step": 192360 }, { "epoch": 5.946405390369042, "grad_norm": 0.08450602740049362, "learning_rate": 1.702783272547444e-05, "loss": 0.0073, "step": 192390 }, { "epoch": 5.947332632750201, "grad_norm": 0.10556121915578842, "learning_rate": 1.702736910428386e-05, "loss": 0.0068, "step": 192420 }, { "epoch": 5.94825987513136, "grad_norm": 0.13178059458732605, "learning_rate": 1.702690548309328e-05, "loss": 0.0079, "step": 192450 }, { "epoch": 5.949187117512517, "grad_norm": 0.09903409332036972, "learning_rate": 1.70264418619027e-05, "loss": 0.007, "step": 192480 }, { "epoch": 5.950114359893676, "grad_norm": 0.15042242407798767, "learning_rate": 1.7025978240712126e-05, "loss": 0.0073, "step": 192510 }, { "epoch": 5.9510416022748345, "grad_norm": 0.18558822572231293, "learning_rate": 1.7025514619521544e-05, "loss": 0.0069, "step": 192540 }, { "epoch": 5.951968844655993, "grad_norm": 0.16317053139209747, "learning_rate": 1.7025050998330966e-05, "loss": 0.0073, "step": 192570 }, { "epoch": 5.952896087037152, "grad_norm": 0.18941393494606018, "learning_rate": 1.7024587377140387e-05, "loss": 0.007, "step": 192600 }, { "epoch": 5.95382332941831, "grad_norm": 0.15507230162620544, "learning_rate": 1.7024123755949805e-05, "loss": 0.0064, "step": 192630 }, { "epoch": 5.954750571799469, "grad_norm": 0.10316964238882065, "learning_rate": 1.7023660134759227e-05, "loss": 0.0077, "step": 192660 }, { "epoch": 5.955677814180627, "grad_norm": 0.089139424264431, "learning_rate": 1.7023196513568648e-05, "loss": 0.0068, "step": 192690 }, { "epoch": 5.956605056561785, "grad_norm": 0.1438133418560028, "learning_rate": 1.702273289237807e-05, "loss": 0.0069, "step": 192720 }, { "epoch": 5.957532298942944, "grad_norm": 0.10273869335651398, "learning_rate": 1.702226927118749e-05, "loss": 0.0073, "step": 192750 }, { "epoch": 5.958459541324102, "grad_norm": 0.13703878223896027, "learning_rate": 1.702180564999691e-05, "loss": 0.0067, "step": 192780 }, { "epoch": 5.959386783705261, "grad_norm": 0.08443824201822281, "learning_rate": 1.702134202880633e-05, "loss": 0.0073, "step": 192810 }, { "epoch": 5.960314026086419, "grad_norm": 0.12399347126483917, "learning_rate": 1.7020878407615752e-05, "loss": 0.0069, "step": 192840 }, { "epoch": 5.961241268467577, "grad_norm": 0.08195889741182327, "learning_rate": 1.7020414786425174e-05, "loss": 0.0069, "step": 192870 }, { "epoch": 5.962168510848736, "grad_norm": 0.1288631707429886, "learning_rate": 1.7019951165234595e-05, "loss": 0.0068, "step": 192900 }, { "epoch": 5.9630957532298945, "grad_norm": 0.1297687292098999, "learning_rate": 1.7019487544044017e-05, "loss": 0.0062, "step": 192930 }, { "epoch": 5.964022995611053, "grad_norm": 0.11434469372034073, "learning_rate": 1.7019023922853435e-05, "loss": 0.0072, "step": 192960 }, { "epoch": 5.964950237992211, "grad_norm": 0.12327061593532562, "learning_rate": 1.7018560301662857e-05, "loss": 0.0072, "step": 192990 }, { "epoch": 5.965877480373369, "grad_norm": 0.1481127291917801, "learning_rate": 1.7018096680472275e-05, "loss": 0.0071, "step": 193020 }, { "epoch": 5.966804722754528, "grad_norm": 0.13306742906570435, "learning_rate": 1.7017633059281696e-05, "loss": 0.0068, "step": 193050 }, { "epoch": 5.967731965135687, "grad_norm": 0.12584462761878967, "learning_rate": 1.7017169438091118e-05, "loss": 0.0068, "step": 193080 }, { "epoch": 5.968659207516845, "grad_norm": 0.11937983334064484, "learning_rate": 1.701670581690054e-05, "loss": 0.0072, "step": 193110 }, { "epoch": 5.969586449898003, "grad_norm": 0.08954542130231857, "learning_rate": 1.701624219570996e-05, "loss": 0.0062, "step": 193140 }, { "epoch": 5.9705136922791615, "grad_norm": 0.13314782083034515, "learning_rate": 1.7015778574519382e-05, "loss": 0.0071, "step": 193170 }, { "epoch": 5.97144093466032, "grad_norm": 0.1140175610780716, "learning_rate": 1.70153149533288e-05, "loss": 0.0066, "step": 193200 }, { "epoch": 5.972368177041479, "grad_norm": 0.14080096781253815, "learning_rate": 1.7014851332138222e-05, "loss": 0.0067, "step": 193230 }, { "epoch": 5.973295419422637, "grad_norm": 0.13050533831119537, "learning_rate": 1.701440316498733e-05, "loss": 0.0074, "step": 193260 }, { "epoch": 5.974222661803795, "grad_norm": 0.15458939969539642, "learning_rate": 1.7013939543796748e-05, "loss": 0.0065, "step": 193290 }, { "epoch": 5.975149904184954, "grad_norm": 0.13699038326740265, "learning_rate": 1.701347592260617e-05, "loss": 0.0071, "step": 193320 }, { "epoch": 5.976077146566112, "grad_norm": 0.09914907813072205, "learning_rate": 1.701301230141559e-05, "loss": 0.0072, "step": 193350 }, { "epoch": 5.977004388947271, "grad_norm": 0.11741071194410324, "learning_rate": 1.7012548680225012e-05, "loss": 0.0072, "step": 193380 }, { "epoch": 5.977931631328429, "grad_norm": 0.08860047906637192, "learning_rate": 1.7012085059034434e-05, "loss": 0.0069, "step": 193410 }, { "epoch": 5.978858873709588, "grad_norm": 0.14782290160655975, "learning_rate": 1.7011621437843852e-05, "loss": 0.0071, "step": 193440 }, { "epoch": 5.979786116090747, "grad_norm": 0.08189874142408371, "learning_rate": 1.7011157816653273e-05, "loss": 0.0074, "step": 193470 }, { "epoch": 5.980713358471904, "grad_norm": 0.0826958492398262, "learning_rate": 1.7010694195462695e-05, "loss": 0.0072, "step": 193500 }, { "epoch": 5.981640600853063, "grad_norm": 0.13397757709026337, "learning_rate": 1.7010230574272116e-05, "loss": 0.0073, "step": 193530 }, { "epoch": 5.9825678432342215, "grad_norm": 0.08373875916004181, "learning_rate": 1.7009766953081538e-05, "loss": 0.0063, "step": 193560 }, { "epoch": 5.98349508561538, "grad_norm": 0.15875326097011566, "learning_rate": 1.700930333189096e-05, "loss": 0.008, "step": 193590 }, { "epoch": 5.984422327996539, "grad_norm": 0.08093606680631638, "learning_rate": 1.7008839710700378e-05, "loss": 0.0074, "step": 193620 }, { "epoch": 5.985349570377696, "grad_norm": 0.11419952660799026, "learning_rate": 1.70083760895098e-05, "loss": 0.0074, "step": 193650 }, { "epoch": 5.986276812758855, "grad_norm": 0.10917720943689346, "learning_rate": 1.700791246831922e-05, "loss": 0.0065, "step": 193680 }, { "epoch": 5.987204055140014, "grad_norm": 0.1248556524515152, "learning_rate": 1.700744884712864e-05, "loss": 0.0069, "step": 193710 }, { "epoch": 5.988131297521172, "grad_norm": 0.0997123271226883, "learning_rate": 1.700698522593806e-05, "loss": 0.0066, "step": 193740 }, { "epoch": 5.989058539902331, "grad_norm": 0.14991740882396698, "learning_rate": 1.7006521604747485e-05, "loss": 0.0069, "step": 193770 }, { "epoch": 5.9899857822834885, "grad_norm": 0.11372887343168259, "learning_rate": 1.7006057983556903e-05, "loss": 0.0067, "step": 193800 }, { "epoch": 5.990913024664647, "grad_norm": 0.13597698509693146, "learning_rate": 1.7005594362366325e-05, "loss": 0.0066, "step": 193830 }, { "epoch": 5.991840267045806, "grad_norm": 0.08567865192890167, "learning_rate": 1.7005130741175743e-05, "loss": 0.0068, "step": 193860 }, { "epoch": 5.992767509426964, "grad_norm": 0.10967150330543518, "learning_rate": 1.7004667119985164e-05, "loss": 0.0075, "step": 193890 }, { "epoch": 5.993694751808123, "grad_norm": 0.12384409457445145, "learning_rate": 1.7004203498794586e-05, "loss": 0.0069, "step": 193920 }, { "epoch": 5.994621994189281, "grad_norm": 0.13245615363121033, "learning_rate": 1.7003739877604007e-05, "loss": 0.0065, "step": 193950 }, { "epoch": 5.995549236570439, "grad_norm": 0.044565942138433456, "learning_rate": 1.700327625641343e-05, "loss": 0.007, "step": 193980 }, { "epoch": 5.996476478951598, "grad_norm": 0.11077797412872314, "learning_rate": 1.700281263522285e-05, "loss": 0.007, "step": 194010 }, { "epoch": 5.9974037213327565, "grad_norm": 0.10085821896791458, "learning_rate": 1.700234901403227e-05, "loss": 0.0067, "step": 194040 }, { "epoch": 5.998330963713915, "grad_norm": 0.09820086508989334, "learning_rate": 1.700188539284169e-05, "loss": 0.0069, "step": 194070 }, { "epoch": 5.999258206095073, "grad_norm": 0.13681109249591827, "learning_rate": 1.700142177165111e-05, "loss": 0.0072, "step": 194100 }, { "epoch": 6.000185448476231, "grad_norm": 0.10415679961442947, "learning_rate": 1.7000958150460533e-05, "loss": 0.007, "step": 194130 }, { "epoch": 6.00111269085739, "grad_norm": 0.14675824344158173, "learning_rate": 1.7000494529269955e-05, "loss": 0.0068, "step": 194160 }, { "epoch": 6.002039933238549, "grad_norm": 0.08121483027935028, "learning_rate": 1.7000030908079373e-05, "loss": 0.0071, "step": 194190 }, { "epoch": 6.002967175619707, "grad_norm": 0.09833698719739914, "learning_rate": 1.6999567286888794e-05, "loss": 0.0067, "step": 194220 }, { "epoch": 6.003894418000866, "grad_norm": 0.11878317594528198, "learning_rate": 1.6999103665698216e-05, "loss": 0.0065, "step": 194250 }, { "epoch": 6.0048216603820235, "grad_norm": 0.1366177499294281, "learning_rate": 1.6998640044507634e-05, "loss": 0.0069, "step": 194280 }, { "epoch": 6.005748902763182, "grad_norm": 0.13172723352909088, "learning_rate": 1.6998176423317055e-05, "loss": 0.0073, "step": 194310 }, { "epoch": 6.006676145144341, "grad_norm": 0.12217213213443756, "learning_rate": 1.6997712802126477e-05, "loss": 0.0062, "step": 194340 }, { "epoch": 6.007603387525499, "grad_norm": 0.11617530882358551, "learning_rate": 1.69972491809359e-05, "loss": 0.0071, "step": 194370 }, { "epoch": 6.008530629906658, "grad_norm": 0.16963127255439758, "learning_rate": 1.699678555974532e-05, "loss": 0.0071, "step": 194400 }, { "epoch": 6.0094578722878165, "grad_norm": 0.10426275432109833, "learning_rate": 1.699632193855474e-05, "loss": 0.0072, "step": 194430 }, { "epoch": 6.010385114668974, "grad_norm": 0.12320408225059509, "learning_rate": 1.699585831736416e-05, "loss": 0.0066, "step": 194460 }, { "epoch": 6.011312357050133, "grad_norm": 0.08039303869009018, "learning_rate": 1.699539469617358e-05, "loss": 0.0073, "step": 194490 }, { "epoch": 6.012239599431291, "grad_norm": 0.16596247255802155, "learning_rate": 1.6994931074983002e-05, "loss": 0.0072, "step": 194520 }, { "epoch": 6.01316684181245, "grad_norm": 0.11478251963853836, "learning_rate": 1.6994467453792424e-05, "loss": 0.0067, "step": 194550 }, { "epoch": 6.014094084193609, "grad_norm": 0.12215802073478699, "learning_rate": 1.6994003832601845e-05, "loss": 0.0065, "step": 194580 }, { "epoch": 6.015021326574766, "grad_norm": 0.13180166482925415, "learning_rate": 1.6993540211411264e-05, "loss": 0.0069, "step": 194610 }, { "epoch": 6.015948568955925, "grad_norm": 0.11748894304037094, "learning_rate": 1.6993076590220685e-05, "loss": 0.0073, "step": 194640 }, { "epoch": 6.0168758113370835, "grad_norm": 0.11938311159610748, "learning_rate": 1.6992612969030107e-05, "loss": 0.0072, "step": 194670 }, { "epoch": 6.017803053718242, "grad_norm": 0.15436188876628876, "learning_rate": 1.6992149347839525e-05, "loss": 0.007, "step": 194700 }, { "epoch": 6.018730296099401, "grad_norm": 0.14067579805850983, "learning_rate": 1.6991685726648946e-05, "loss": 0.0068, "step": 194730 }, { "epoch": 6.019657538480558, "grad_norm": 0.10957080125808716, "learning_rate": 1.6991222105458368e-05, "loss": 0.0068, "step": 194760 }, { "epoch": 6.020584780861717, "grad_norm": 0.12124953418970108, "learning_rate": 1.699075848426779e-05, "loss": 0.0065, "step": 194790 }, { "epoch": 6.021512023242876, "grad_norm": 0.11448497325181961, "learning_rate": 1.699029486307721e-05, "loss": 0.0071, "step": 194820 }, { "epoch": 6.022439265624034, "grad_norm": 0.116889588534832, "learning_rate": 1.698983124188663e-05, "loss": 0.0072, "step": 194850 }, { "epoch": 6.023366508005193, "grad_norm": 0.17000062763690948, "learning_rate": 1.698936762069605e-05, "loss": 0.0066, "step": 194880 }, { "epoch": 6.024293750386351, "grad_norm": 0.15072254836559296, "learning_rate": 1.6988903999505472e-05, "loss": 0.0078, "step": 194910 }, { "epoch": 6.025220992767509, "grad_norm": 0.10107021778821945, "learning_rate": 1.6988440378314893e-05, "loss": 0.0062, "step": 194940 }, { "epoch": 6.026148235148668, "grad_norm": 0.09852844476699829, "learning_rate": 1.6987976757124315e-05, "loss": 0.0076, "step": 194970 }, { "epoch": 6.027075477529826, "grad_norm": 0.12499500811100006, "learning_rate": 1.6987513135933736e-05, "loss": 0.0068, "step": 195000 }, { "epoch": 6.028002719910985, "grad_norm": 0.16770386695861816, "learning_rate": 1.6987049514743155e-05, "loss": 0.0065, "step": 195030 }, { "epoch": 6.0289299622921435, "grad_norm": 0.12817399203777313, "learning_rate": 1.6986585893552576e-05, "loss": 0.0066, "step": 195060 }, { "epoch": 6.029857204673301, "grad_norm": 0.11210813373327255, "learning_rate": 1.6986122272361998e-05, "loss": 0.0073, "step": 195090 }, { "epoch": 6.03078444705446, "grad_norm": 0.09273353219032288, "learning_rate": 1.6985658651171416e-05, "loss": 0.007, "step": 195120 }, { "epoch": 6.031711689435618, "grad_norm": 0.09711186587810516, "learning_rate": 1.6985195029980837e-05, "loss": 0.0068, "step": 195150 }, { "epoch": 6.032638931816777, "grad_norm": 0.1742587685585022, "learning_rate": 1.698473140879026e-05, "loss": 0.0067, "step": 195180 }, { "epoch": 6.033566174197936, "grad_norm": 0.11677074432373047, "learning_rate": 1.698426778759968e-05, "loss": 0.0071, "step": 195210 }, { "epoch": 6.034493416579094, "grad_norm": 0.12011399120092392, "learning_rate": 1.69838041664091e-05, "loss": 0.0075, "step": 195240 }, { "epoch": 6.035420658960252, "grad_norm": 0.098607137799263, "learning_rate": 1.698334054521852e-05, "loss": 0.0066, "step": 195270 }, { "epoch": 6.0363479013414105, "grad_norm": 0.07012826204299927, "learning_rate": 1.698287692402794e-05, "loss": 0.0071, "step": 195300 }, { "epoch": 6.037275143722569, "grad_norm": 0.1142946407198906, "learning_rate": 1.6982413302837363e-05, "loss": 0.007, "step": 195330 }, { "epoch": 6.038202386103728, "grad_norm": 0.14118807017803192, "learning_rate": 1.6981949681646784e-05, "loss": 0.0062, "step": 195360 }, { "epoch": 6.039129628484886, "grad_norm": 0.09943905472755432, "learning_rate": 1.6981486060456206e-05, "loss": 0.0073, "step": 195390 }, { "epoch": 6.040056870866044, "grad_norm": 0.11939524114131927, "learning_rate": 1.6981022439265627e-05, "loss": 0.0072, "step": 195420 }, { "epoch": 6.040984113247203, "grad_norm": 0.10125478357076645, "learning_rate": 1.6980558818075045e-05, "loss": 0.0068, "step": 195450 }, { "epoch": 6.041911355628361, "grad_norm": 0.12042688578367233, "learning_rate": 1.6980095196884467e-05, "loss": 0.0066, "step": 195480 }, { "epoch": 6.04283859800952, "grad_norm": 0.1112159714102745, "learning_rate": 1.697963157569389e-05, "loss": 0.0077, "step": 195510 }, { "epoch": 6.043765840390678, "grad_norm": 0.11159605532884598, "learning_rate": 1.697916795450331e-05, "loss": 0.0069, "step": 195540 }, { "epoch": 6.044693082771836, "grad_norm": 0.11639701575040817, "learning_rate": 1.697870433331273e-05, "loss": 0.0076, "step": 195570 }, { "epoch": 6.045620325152995, "grad_norm": 0.15667259693145752, "learning_rate": 1.697824071212215e-05, "loss": 0.0073, "step": 195600 }, { "epoch": 6.046547567534153, "grad_norm": 0.0896238163113594, "learning_rate": 1.697777709093157e-05, "loss": 0.0068, "step": 195630 }, { "epoch": 6.047474809915312, "grad_norm": 0.13665355741977692, "learning_rate": 1.6977313469740993e-05, "loss": 0.0069, "step": 195660 }, { "epoch": 6.0484020522964705, "grad_norm": 0.10011137276887894, "learning_rate": 1.697684984855041e-05, "loss": 0.0069, "step": 195690 }, { "epoch": 6.049329294677629, "grad_norm": 0.07763887941837311, "learning_rate": 1.6976386227359832e-05, "loss": 0.0067, "step": 195720 }, { "epoch": 6.050256537058787, "grad_norm": 0.11956050246953964, "learning_rate": 1.6975922606169254e-05, "loss": 0.0071, "step": 195750 }, { "epoch": 6.0511837794399455, "grad_norm": 0.10191451013088226, "learning_rate": 1.6975458984978675e-05, "loss": 0.0071, "step": 195780 }, { "epoch": 6.052111021821104, "grad_norm": 0.10247265547513962, "learning_rate": 1.6974995363788097e-05, "loss": 0.007, "step": 195810 }, { "epoch": 6.053038264202263, "grad_norm": 0.11693903803825378, "learning_rate": 1.6974531742597515e-05, "loss": 0.0069, "step": 195840 }, { "epoch": 6.053965506583421, "grad_norm": 0.1310906708240509, "learning_rate": 1.6974068121406936e-05, "loss": 0.0078, "step": 195870 }, { "epoch": 6.054892748964579, "grad_norm": 0.09471938759088516, "learning_rate": 1.6973604500216358e-05, "loss": 0.0068, "step": 195900 }, { "epoch": 6.055819991345738, "grad_norm": 0.13263817131519318, "learning_rate": 1.697314087902578e-05, "loss": 0.0066, "step": 195930 }, { "epoch": 6.056747233726896, "grad_norm": 0.1299484223127365, "learning_rate": 1.69726772578352e-05, "loss": 0.0069, "step": 195960 }, { "epoch": 6.057674476108055, "grad_norm": 0.0866088941693306, "learning_rate": 1.6972213636644622e-05, "loss": 0.0064, "step": 195990 }, { "epoch": 6.058601718489213, "grad_norm": 0.12384804338216782, "learning_rate": 1.697175001545404e-05, "loss": 0.0069, "step": 196020 }, { "epoch": 6.059528960870372, "grad_norm": 0.11889585107564926, "learning_rate": 1.6971286394263462e-05, "loss": 0.0073, "step": 196050 }, { "epoch": 6.06045620325153, "grad_norm": 0.13635315001010895, "learning_rate": 1.6970822773072884e-05, "loss": 0.0066, "step": 196080 }, { "epoch": 6.061383445632688, "grad_norm": 0.13312995433807373, "learning_rate": 1.69703591518823e-05, "loss": 0.0069, "step": 196110 }, { "epoch": 6.062310688013847, "grad_norm": 0.14301100373268127, "learning_rate": 1.6969895530691723e-05, "loss": 0.0066, "step": 196140 }, { "epoch": 6.0632379303950055, "grad_norm": 0.13929763436317444, "learning_rate": 1.6969431909501145e-05, "loss": 0.007, "step": 196170 }, { "epoch": 6.064165172776164, "grad_norm": 0.09694914519786835, "learning_rate": 1.6968968288310566e-05, "loss": 0.0069, "step": 196200 }, { "epoch": 6.065092415157322, "grad_norm": 0.1082649901509285, "learning_rate": 1.6968504667119988e-05, "loss": 0.007, "step": 196230 }, { "epoch": 6.06601965753848, "grad_norm": 0.12121406942605972, "learning_rate": 1.6968041045929406e-05, "loss": 0.0075, "step": 196260 }, { "epoch": 6.066946899919639, "grad_norm": 0.1386193484067917, "learning_rate": 1.6967577424738827e-05, "loss": 0.0071, "step": 196290 }, { "epoch": 6.067874142300798, "grad_norm": 0.14480207860469818, "learning_rate": 1.696711380354825e-05, "loss": 0.007, "step": 196320 }, { "epoch": 6.068801384681956, "grad_norm": 0.11939758062362671, "learning_rate": 1.696665018235767e-05, "loss": 0.007, "step": 196350 }, { "epoch": 6.069728627063114, "grad_norm": 0.1064077615737915, "learning_rate": 1.6966186561167092e-05, "loss": 0.0071, "step": 196380 }, { "epoch": 6.0706558694442725, "grad_norm": 0.09733089804649353, "learning_rate": 1.6965722939976513e-05, "loss": 0.0069, "step": 196410 }, { "epoch": 6.071583111825431, "grad_norm": 0.0970403328537941, "learning_rate": 1.696525931878593e-05, "loss": 0.0069, "step": 196440 }, { "epoch": 6.07251035420659, "grad_norm": 0.18465878069400787, "learning_rate": 1.6964795697595353e-05, "loss": 0.0069, "step": 196470 }, { "epoch": 6.073437596587748, "grad_norm": 0.12483329325914383, "learning_rate": 1.696433207640477e-05, "loss": 0.0068, "step": 196500 }, { "epoch": 6.074364838968907, "grad_norm": 0.15653690695762634, "learning_rate": 1.6963868455214193e-05, "loss": 0.0071, "step": 196530 }, { "epoch": 6.075292081350065, "grad_norm": 0.12409032136201859, "learning_rate": 1.6963404834023614e-05, "loss": 0.0062, "step": 196560 }, { "epoch": 6.076219323731223, "grad_norm": 0.1104772686958313, "learning_rate": 1.6962941212833036e-05, "loss": 0.0068, "step": 196590 }, { "epoch": 6.077146566112382, "grad_norm": 0.15348011255264282, "learning_rate": 1.6962477591642457e-05, "loss": 0.0066, "step": 196620 }, { "epoch": 6.07807380849354, "grad_norm": 0.10075996816158295, "learning_rate": 1.696201397045188e-05, "loss": 0.0067, "step": 196650 }, { "epoch": 6.079001050874699, "grad_norm": 0.088174968957901, "learning_rate": 1.6961550349261297e-05, "loss": 0.0066, "step": 196680 }, { "epoch": 6.079928293255857, "grad_norm": 0.15154874324798584, "learning_rate": 1.6961086728070718e-05, "loss": 0.0072, "step": 196710 }, { "epoch": 6.080855535637015, "grad_norm": 0.08902809768915176, "learning_rate": 1.696062310688014e-05, "loss": 0.0065, "step": 196740 }, { "epoch": 6.081782778018174, "grad_norm": 0.11307142674922943, "learning_rate": 1.696015948568956e-05, "loss": 0.0076, "step": 196770 }, { "epoch": 6.0827100203993325, "grad_norm": 0.15476523339748383, "learning_rate": 1.6959695864498983e-05, "loss": 0.0067, "step": 196800 }, { "epoch": 6.083637262780491, "grad_norm": 0.13645733892917633, "learning_rate": 1.69592322433084e-05, "loss": 0.0074, "step": 196830 }, { "epoch": 6.08456450516165, "grad_norm": 0.1623660773038864, "learning_rate": 1.6958768622117822e-05, "loss": 0.0067, "step": 196860 }, { "epoch": 6.085491747542807, "grad_norm": 0.12419017404317856, "learning_rate": 1.6958305000927244e-05, "loss": 0.0067, "step": 196890 }, { "epoch": 6.086418989923966, "grad_norm": 0.09396208822727203, "learning_rate": 1.6957841379736665e-05, "loss": 0.0068, "step": 196920 }, { "epoch": 6.087346232305125, "grad_norm": 0.26559939980506897, "learning_rate": 1.6957377758546087e-05, "loss": 0.0061, "step": 196950 }, { "epoch": 6.088273474686283, "grad_norm": 0.21920493245124817, "learning_rate": 1.695691413735551e-05, "loss": 0.007, "step": 196980 }, { "epoch": 6.089200717067442, "grad_norm": 0.10916461795568466, "learning_rate": 1.6956450516164926e-05, "loss": 0.0069, "step": 197010 }, { "epoch": 6.0901279594485995, "grad_norm": 0.11732544749975204, "learning_rate": 1.6955986894974348e-05, "loss": 0.0064, "step": 197040 }, { "epoch": 6.091055201829758, "grad_norm": 0.17126469314098358, "learning_rate": 1.695552327378377e-05, "loss": 0.0068, "step": 197070 }, { "epoch": 6.091982444210917, "grad_norm": 0.10238152742385864, "learning_rate": 1.6955059652593188e-05, "loss": 0.0074, "step": 197100 }, { "epoch": 6.092909686592075, "grad_norm": 0.1794992983341217, "learning_rate": 1.695459603140261e-05, "loss": 0.0064, "step": 197130 }, { "epoch": 6.093836928973234, "grad_norm": 0.14070133864879608, "learning_rate": 1.695413241021203e-05, "loss": 0.0062, "step": 197160 }, { "epoch": 6.094764171354392, "grad_norm": 0.122765451669693, "learning_rate": 1.6953668789021452e-05, "loss": 0.0072, "step": 197190 }, { "epoch": 6.09569141373555, "grad_norm": 0.1875840127468109, "learning_rate": 1.6953205167830874e-05, "loss": 0.0066, "step": 197220 }, { "epoch": 6.096618656116709, "grad_norm": 0.13358725607395172, "learning_rate": 1.6952741546640292e-05, "loss": 0.0067, "step": 197250 }, { "epoch": 6.097545898497867, "grad_norm": 0.1647392064332962, "learning_rate": 1.6952277925449713e-05, "loss": 0.0071, "step": 197280 }, { "epoch": 6.098473140879026, "grad_norm": 0.1161874383687973, "learning_rate": 1.6951814304259135e-05, "loss": 0.0066, "step": 197310 }, { "epoch": 6.099400383260185, "grad_norm": 0.11281886696815491, "learning_rate": 1.6951350683068556e-05, "loss": 0.0076, "step": 197340 }, { "epoch": 6.100327625641342, "grad_norm": 0.14778906106948853, "learning_rate": 1.6950887061877978e-05, "loss": 0.0072, "step": 197370 }, { "epoch": 6.101254868022501, "grad_norm": 0.09455560147762299, "learning_rate": 1.69504234406874e-05, "loss": 0.007, "step": 197400 }, { "epoch": 6.1021821104036595, "grad_norm": 0.09131788462400436, "learning_rate": 1.6949959819496817e-05, "loss": 0.0067, "step": 197430 }, { "epoch": 6.103109352784818, "grad_norm": 0.08625137060880661, "learning_rate": 1.694949619830624e-05, "loss": 0.0067, "step": 197460 }, { "epoch": 6.104036595165977, "grad_norm": 0.13257604837417603, "learning_rate": 1.694903257711566e-05, "loss": 0.0062, "step": 197490 }, { "epoch": 6.1049638375471345, "grad_norm": 0.1493774801492691, "learning_rate": 1.694856895592508e-05, "loss": 0.0076, "step": 197520 }, { "epoch": 6.105891079928293, "grad_norm": 0.09725165367126465, "learning_rate": 1.69481053347345e-05, "loss": 0.0068, "step": 197550 }, { "epoch": 6.106818322309452, "grad_norm": 0.13965868949890137, "learning_rate": 1.694764171354392e-05, "loss": 0.0068, "step": 197580 }, { "epoch": 6.10774556469061, "grad_norm": 0.13445086777210236, "learning_rate": 1.6947178092353343e-05, "loss": 0.0064, "step": 197610 }, { "epoch": 6.108672807071769, "grad_norm": 0.09544359892606735, "learning_rate": 1.6946714471162765e-05, "loss": 0.0068, "step": 197640 }, { "epoch": 6.109600049452927, "grad_norm": 0.08452162146568298, "learning_rate": 1.6946250849972183e-05, "loss": 0.0074, "step": 197670 }, { "epoch": 6.110527291834085, "grad_norm": 0.14704886078834534, "learning_rate": 1.6945787228781604e-05, "loss": 0.0064, "step": 197700 }, { "epoch": 6.111454534215244, "grad_norm": 0.11374540627002716, "learning_rate": 1.6945323607591026e-05, "loss": 0.0064, "step": 197730 }, { "epoch": 6.112381776596402, "grad_norm": 0.15377655625343323, "learning_rate": 1.6944859986400447e-05, "loss": 0.0068, "step": 197760 }, { "epoch": 6.113309018977561, "grad_norm": 0.13596834242343903, "learning_rate": 1.694439636520987e-05, "loss": 0.0068, "step": 197790 }, { "epoch": 6.11423626135872, "grad_norm": 0.09956619143486023, "learning_rate": 1.694393274401929e-05, "loss": 0.0067, "step": 197820 }, { "epoch": 6.115163503739877, "grad_norm": 0.12596118450164795, "learning_rate": 1.694346912282871e-05, "loss": 0.0073, "step": 197850 }, { "epoch": 6.116090746121036, "grad_norm": 0.18601979315280914, "learning_rate": 1.694300550163813e-05, "loss": 0.0069, "step": 197880 }, { "epoch": 6.1170179885021945, "grad_norm": 0.12348531931638718, "learning_rate": 1.6942541880447548e-05, "loss": 0.0074, "step": 197910 }, { "epoch": 6.117945230883353, "grad_norm": 0.09312906861305237, "learning_rate": 1.694207825925697e-05, "loss": 0.0069, "step": 197940 }, { "epoch": 6.118872473264512, "grad_norm": 0.12827621400356293, "learning_rate": 1.694161463806639e-05, "loss": 0.0066, "step": 197970 }, { "epoch": 6.119799715645669, "grad_norm": 0.11234632879495621, "learning_rate": 1.6941151016875812e-05, "loss": 0.0067, "step": 198000 }, { "epoch": 6.120726958026828, "grad_norm": 0.11468595266342163, "learning_rate": 1.6940687395685234e-05, "loss": 0.0063, "step": 198030 }, { "epoch": 6.121654200407987, "grad_norm": 0.15473030507564545, "learning_rate": 1.6940223774494655e-05, "loss": 0.0066, "step": 198060 }, { "epoch": 6.122581442789145, "grad_norm": 0.11672262847423553, "learning_rate": 1.6939760153304074e-05, "loss": 0.0069, "step": 198090 }, { "epoch": 6.123508685170304, "grad_norm": 0.14329572021961212, "learning_rate": 1.6939296532113495e-05, "loss": 0.0071, "step": 198120 }, { "epoch": 6.124435927551462, "grad_norm": 0.16280236840248108, "learning_rate": 1.6938832910922917e-05, "loss": 0.0064, "step": 198150 }, { "epoch": 6.12536316993262, "grad_norm": 0.09354718774557114, "learning_rate": 1.6938369289732338e-05, "loss": 0.0069, "step": 198180 }, { "epoch": 6.126290412313779, "grad_norm": 0.13854792714118958, "learning_rate": 1.693790566854176e-05, "loss": 0.0077, "step": 198210 }, { "epoch": 6.127217654694937, "grad_norm": 0.09288833290338516, "learning_rate": 1.6937442047351178e-05, "loss": 0.0067, "step": 198240 }, { "epoch": 6.128144897076096, "grad_norm": 0.15810200572013855, "learning_rate": 1.69369784261606e-05, "loss": 0.0068, "step": 198270 }, { "epoch": 6.1290721394572545, "grad_norm": 0.12253119796514511, "learning_rate": 1.693651480497002e-05, "loss": 0.0066, "step": 198300 }, { "epoch": 6.129999381838412, "grad_norm": 0.10140147060155869, "learning_rate": 1.6936051183779442e-05, "loss": 0.0067, "step": 198330 }, { "epoch": 6.130926624219571, "grad_norm": 0.14381064474582672, "learning_rate": 1.6935587562588864e-05, "loss": 0.0071, "step": 198360 }, { "epoch": 6.131853866600729, "grad_norm": 0.12673240900039673, "learning_rate": 1.6935139395437968e-05, "loss": 0.0065, "step": 198390 }, { "epoch": 6.132781108981888, "grad_norm": 0.13868093490600586, "learning_rate": 1.693467577424739e-05, "loss": 0.0064, "step": 198420 }, { "epoch": 6.133708351363047, "grad_norm": 0.11186333000659943, "learning_rate": 1.693421215305681e-05, "loss": 0.0064, "step": 198450 }, { "epoch": 6.134635593744204, "grad_norm": 0.11119774729013443, "learning_rate": 1.6933748531866233e-05, "loss": 0.0074, "step": 198480 }, { "epoch": 6.135562836125363, "grad_norm": 0.12790518999099731, "learning_rate": 1.693328491067565e-05, "loss": 0.0072, "step": 198510 }, { "epoch": 6.1364900785065215, "grad_norm": 0.10356923937797546, "learning_rate": 1.6932821289485072e-05, "loss": 0.0071, "step": 198540 }, { "epoch": 6.13741732088768, "grad_norm": 0.13242170214653015, "learning_rate": 1.6932357668294494e-05, "loss": 0.0067, "step": 198570 }, { "epoch": 6.138344563268839, "grad_norm": 0.14294420182704926, "learning_rate": 1.6931894047103915e-05, "loss": 0.0074, "step": 198600 }, { "epoch": 6.139271805649997, "grad_norm": 0.12934806942939758, "learning_rate": 1.6931430425913337e-05, "loss": 0.0069, "step": 198630 }, { "epoch": 6.140199048031155, "grad_norm": 0.09913332015275955, "learning_rate": 1.6930966804722755e-05, "loss": 0.0064, "step": 198660 }, { "epoch": 6.141126290412314, "grad_norm": 0.08918213844299316, "learning_rate": 1.6930503183532177e-05, "loss": 0.0074, "step": 198690 }, { "epoch": 6.142053532793472, "grad_norm": 0.18167953193187714, "learning_rate": 1.6930039562341598e-05, "loss": 0.0068, "step": 198720 }, { "epoch": 6.142980775174631, "grad_norm": 0.14296993613243103, "learning_rate": 1.6929575941151016e-05, "loss": 0.0073, "step": 198750 }, { "epoch": 6.143908017555789, "grad_norm": 0.16695065796375275, "learning_rate": 1.6929112319960438e-05, "loss": 0.0068, "step": 198780 }, { "epoch": 6.144835259936947, "grad_norm": 0.11813700944185257, "learning_rate": 1.692864869876986e-05, "loss": 0.0073, "step": 198810 }, { "epoch": 6.145762502318106, "grad_norm": 0.08745688945055008, "learning_rate": 1.692818507757928e-05, "loss": 0.0072, "step": 198840 }, { "epoch": 6.146689744699264, "grad_norm": 0.09696047753095627, "learning_rate": 1.6927721456388702e-05, "loss": 0.0066, "step": 198870 }, { "epoch": 6.147616987080423, "grad_norm": 0.08729918301105499, "learning_rate": 1.6927257835198124e-05, "loss": 0.0065, "step": 198900 }, { "epoch": 6.1485442294615815, "grad_norm": 0.14668020606040955, "learning_rate": 1.6926794214007542e-05, "loss": 0.0072, "step": 198930 }, { "epoch": 6.14947147184274, "grad_norm": 0.15823760628700256, "learning_rate": 1.6926330592816963e-05, "loss": 0.0073, "step": 198960 }, { "epoch": 6.150398714223898, "grad_norm": 0.1627683937549591, "learning_rate": 1.6925866971626385e-05, "loss": 0.0073, "step": 198990 }, { "epoch": 6.151325956605056, "grad_norm": 0.08402393758296967, "learning_rate": 1.6925403350435806e-05, "loss": 0.0066, "step": 199020 }, { "epoch": 6.152253198986215, "grad_norm": 0.15119095146656036, "learning_rate": 1.6924939729245228e-05, "loss": 0.0069, "step": 199050 }, { "epoch": 6.153180441367374, "grad_norm": 0.1084417849779129, "learning_rate": 1.6924476108054646e-05, "loss": 0.0066, "step": 199080 }, { "epoch": 6.154107683748532, "grad_norm": 0.11388779431581497, "learning_rate": 1.6924012486864067e-05, "loss": 0.0068, "step": 199110 }, { "epoch": 6.15503492612969, "grad_norm": 0.10843993723392487, "learning_rate": 1.692354886567349e-05, "loss": 0.0075, "step": 199140 }, { "epoch": 6.1559621685108485, "grad_norm": 0.12690012156963348, "learning_rate": 1.6923085244482907e-05, "loss": 0.007, "step": 199170 }, { "epoch": 6.156889410892007, "grad_norm": 0.1055103987455368, "learning_rate": 1.692262162329233e-05, "loss": 0.0066, "step": 199200 }, { "epoch": 6.157816653273166, "grad_norm": 0.1356867104768753, "learning_rate": 1.692215800210175e-05, "loss": 0.0066, "step": 199230 }, { "epoch": 6.158743895654324, "grad_norm": 0.13564446568489075, "learning_rate": 1.692169438091117e-05, "loss": 0.0071, "step": 199260 }, { "epoch": 6.159671138035482, "grad_norm": 0.10249362140893936, "learning_rate": 1.6921230759720593e-05, "loss": 0.0069, "step": 199290 }, { "epoch": 6.160598380416641, "grad_norm": 0.11600743979215622, "learning_rate": 1.692076713853001e-05, "loss": 0.0066, "step": 199320 }, { "epoch": 6.161525622797799, "grad_norm": 0.1154041513800621, "learning_rate": 1.6920303517339433e-05, "loss": 0.0076, "step": 199350 }, { "epoch": 6.162452865178958, "grad_norm": 0.0818675309419632, "learning_rate": 1.6919839896148854e-05, "loss": 0.0067, "step": 199380 }, { "epoch": 6.1633801075601164, "grad_norm": 0.11573608219623566, "learning_rate": 1.6919376274958276e-05, "loss": 0.0064, "step": 199410 }, { "epoch": 6.164307349941275, "grad_norm": 0.08498375862836838, "learning_rate": 1.6918912653767697e-05, "loss": 0.0069, "step": 199440 }, { "epoch": 6.165234592322433, "grad_norm": 0.1342819184064865, "learning_rate": 1.691844903257712e-05, "loss": 0.0073, "step": 199470 }, { "epoch": 6.166161834703591, "grad_norm": 0.08443453162908554, "learning_rate": 1.6917985411386537e-05, "loss": 0.0073, "step": 199500 }, { "epoch": 6.16708907708475, "grad_norm": 0.11067397147417068, "learning_rate": 1.691752179019596e-05, "loss": 0.0073, "step": 199530 }, { "epoch": 6.168016319465909, "grad_norm": 0.09885276108980179, "learning_rate": 1.691705816900538e-05, "loss": 0.0066, "step": 199560 }, { "epoch": 6.168943561847067, "grad_norm": 0.15529654920101166, "learning_rate": 1.6916594547814798e-05, "loss": 0.008, "step": 199590 }, { "epoch": 6.169870804228225, "grad_norm": 0.14077146351337433, "learning_rate": 1.6916130926624223e-05, "loss": 0.0071, "step": 199620 }, { "epoch": 6.1707980466093835, "grad_norm": 0.08703923225402832, "learning_rate": 1.691566730543364e-05, "loss": 0.0079, "step": 199650 }, { "epoch": 6.171725288990542, "grad_norm": 0.12424229830503464, "learning_rate": 1.6915203684243063e-05, "loss": 0.0072, "step": 199680 }, { "epoch": 6.172652531371701, "grad_norm": 0.16063006222248077, "learning_rate": 1.6914740063052484e-05, "loss": 0.0069, "step": 199710 }, { "epoch": 6.173579773752859, "grad_norm": 0.10964534431695938, "learning_rate": 1.6914276441861902e-05, "loss": 0.0069, "step": 199740 }, { "epoch": 6.174507016134017, "grad_norm": 0.09850499778985977, "learning_rate": 1.6913812820671324e-05, "loss": 0.0068, "step": 199770 }, { "epoch": 6.175434258515176, "grad_norm": 0.1254219263792038, "learning_rate": 1.6913349199480745e-05, "loss": 0.007, "step": 199800 }, { "epoch": 6.176361500896334, "grad_norm": 0.11184919625520706, "learning_rate": 1.6912885578290167e-05, "loss": 0.007, "step": 199830 }, { "epoch": 6.177288743277493, "grad_norm": 0.13768723607063293, "learning_rate": 1.6912421957099588e-05, "loss": 0.0072, "step": 199860 }, { "epoch": 6.178215985658651, "grad_norm": 0.13069021701812744, "learning_rate": 1.691195833590901e-05, "loss": 0.0065, "step": 199890 }, { "epoch": 6.17914322803981, "grad_norm": 0.1256827861070633, "learning_rate": 1.6911494714718428e-05, "loss": 0.0068, "step": 199920 }, { "epoch": 6.180070470420968, "grad_norm": 0.12265090644359589, "learning_rate": 1.691103109352785e-05, "loss": 0.0067, "step": 199950 }, { "epoch": 6.180997712802126, "grad_norm": 0.09930244833230972, "learning_rate": 1.691056747233727e-05, "loss": 0.0067, "step": 199980 }, { "epoch": 6.181615874389565, "eval_f1": 0.9977372776944012, "eval_loss": 0.00758362328633666, "eval_precision": 0.997735781903205, "eval_recall": 0.9977396448771155, "eval_runtime": 4182.7447, "eval_samples_per_second": 282.399, "eval_steps_per_second": 8.825, "step": 200000 }, { "epoch": 6.181924955183285, "grad_norm": 0.0863494724035263, "learning_rate": 1.6910103851146692e-05, "loss": 0.0061, "step": 200010 }, { "epoch": 6.1828521975644435, "grad_norm": 0.0835685208439827, "learning_rate": 1.6909640229956114e-05, "loss": 0.0067, "step": 200040 }, { "epoch": 6.183779439945602, "grad_norm": 0.13501127064228058, "learning_rate": 1.6909176608765532e-05, "loss": 0.0068, "step": 200070 }, { "epoch": 6.18470668232676, "grad_norm": 0.11717486381530762, "learning_rate": 1.6908712987574953e-05, "loss": 0.0066, "step": 200100 }, { "epoch": 6.185633924707918, "grad_norm": 0.11647889763116837, "learning_rate": 1.6908249366384375e-05, "loss": 0.0061, "step": 200130 }, { "epoch": 6.186561167089077, "grad_norm": 0.1398460865020752, "learning_rate": 1.6907785745193793e-05, "loss": 0.0064, "step": 200160 }, { "epoch": 6.187488409470236, "grad_norm": 0.09003426134586334, "learning_rate": 1.6907322124003215e-05, "loss": 0.007, "step": 200190 }, { "epoch": 6.188415651851394, "grad_norm": 0.10013366490602493, "learning_rate": 1.6906858502812636e-05, "loss": 0.0069, "step": 200220 }, { "epoch": 6.189342894232553, "grad_norm": 0.09993542730808258, "learning_rate": 1.6906394881622058e-05, "loss": 0.0064, "step": 200250 }, { "epoch": 6.1902701366137105, "grad_norm": 0.11177488416433334, "learning_rate": 1.690593126043148e-05, "loss": 0.0072, "step": 200280 }, { "epoch": 6.191197378994869, "grad_norm": 0.11364465951919556, "learning_rate": 1.69054676392409e-05, "loss": 0.0071, "step": 200310 }, { "epoch": 6.192124621376028, "grad_norm": 0.16817723214626312, "learning_rate": 1.690500401805032e-05, "loss": 0.0068, "step": 200340 }, { "epoch": 6.193051863757186, "grad_norm": 0.1349310427904129, "learning_rate": 1.690454039685974e-05, "loss": 0.0069, "step": 200370 }, { "epoch": 6.193979106138345, "grad_norm": 0.0867416188120842, "learning_rate": 1.6904076775669162e-05, "loss": 0.0074, "step": 200400 }, { "epoch": 6.194906348519503, "grad_norm": 0.12170007824897766, "learning_rate": 1.6903613154478583e-05, "loss": 0.0065, "step": 200430 }, { "epoch": 6.195833590900661, "grad_norm": 0.1400425285100937, "learning_rate": 1.6903149533288005e-05, "loss": 0.0068, "step": 200460 }, { "epoch": 6.19676083328182, "grad_norm": 0.20151881873607635, "learning_rate": 1.6902685912097423e-05, "loss": 0.0066, "step": 200490 }, { "epoch": 6.197688075662978, "grad_norm": 0.17742682993412018, "learning_rate": 1.6902222290906844e-05, "loss": 0.0071, "step": 200520 }, { "epoch": 6.198615318044137, "grad_norm": 0.15556760132312775, "learning_rate": 1.6901758669716266e-05, "loss": 0.007, "step": 200550 }, { "epoch": 6.199542560425295, "grad_norm": 0.17573556303977966, "learning_rate": 1.6901295048525684e-05, "loss": 0.0066, "step": 200580 }, { "epoch": 6.200469802806453, "grad_norm": 0.16334222257137299, "learning_rate": 1.6900831427335106e-05, "loss": 0.0076, "step": 200610 }, { "epoch": 6.201397045187612, "grad_norm": 0.10067225247621536, "learning_rate": 1.6900367806144527e-05, "loss": 0.0078, "step": 200640 }, { "epoch": 6.2023242875687705, "grad_norm": 0.1532747894525528, "learning_rate": 1.689990418495395e-05, "loss": 0.0064, "step": 200670 }, { "epoch": 6.203251529949929, "grad_norm": 0.09279467165470123, "learning_rate": 1.689944056376337e-05, "loss": 0.0067, "step": 200700 }, { "epoch": 6.204178772331088, "grad_norm": 0.10027851164340973, "learning_rate": 1.6898976942572788e-05, "loss": 0.0064, "step": 200730 }, { "epoch": 6.205106014712245, "grad_norm": 0.12654730677604675, "learning_rate": 1.6898544229461582e-05, "loss": 0.0067, "step": 200760 }, { "epoch": 6.206033257093404, "grad_norm": 0.10266076773405075, "learning_rate": 1.6898080608271004e-05, "loss": 0.0066, "step": 200790 }, { "epoch": 6.206960499474563, "grad_norm": 0.15920771658420563, "learning_rate": 1.6897616987080425e-05, "loss": 0.0065, "step": 200820 }, { "epoch": 6.207887741855721, "grad_norm": 0.14028635621070862, "learning_rate": 1.6897153365889844e-05, "loss": 0.0071, "step": 200850 }, { "epoch": 6.20881498423688, "grad_norm": 0.11954990774393082, "learning_rate": 1.6896689744699265e-05, "loss": 0.0079, "step": 200880 }, { "epoch": 6.2097422266180375, "grad_norm": 0.09932807087898254, "learning_rate": 1.6896226123508687e-05, "loss": 0.0071, "step": 200910 }, { "epoch": 6.210669468999196, "grad_norm": 0.12307319045066833, "learning_rate": 1.6895762502318108e-05, "loss": 0.006, "step": 200940 }, { "epoch": 6.211596711380355, "grad_norm": 0.08397147804498672, "learning_rate": 1.689529888112753e-05, "loss": 0.0073, "step": 200970 }, { "epoch": 6.212523953761513, "grad_norm": 0.10945069044828415, "learning_rate": 1.6894835259936948e-05, "loss": 0.0063, "step": 201000 }, { "epoch": 6.213451196142672, "grad_norm": 0.11270319670438766, "learning_rate": 1.689437163874637e-05, "loss": 0.0067, "step": 201030 }, { "epoch": 6.2143784385238305, "grad_norm": 0.12339692562818527, "learning_rate": 1.689390801755579e-05, "loss": 0.0066, "step": 201060 }, { "epoch": 6.215305680904988, "grad_norm": 0.08018703013658524, "learning_rate": 1.6893444396365212e-05, "loss": 0.0068, "step": 201090 }, { "epoch": 6.216232923286147, "grad_norm": 0.11325030028820038, "learning_rate": 1.6892980775174634e-05, "loss": 0.0064, "step": 201120 }, { "epoch": 6.2171601656673054, "grad_norm": 0.15511848032474518, "learning_rate": 1.6892517153984055e-05, "loss": 0.0063, "step": 201150 }, { "epoch": 6.218087408048464, "grad_norm": 0.10602276772260666, "learning_rate": 1.689206898683316e-05, "loss": 0.0066, "step": 201180 }, { "epoch": 6.219014650429623, "grad_norm": 0.11747659742832184, "learning_rate": 1.689160536564258e-05, "loss": 0.0071, "step": 201210 }, { "epoch": 6.21994189281078, "grad_norm": 0.13565289974212646, "learning_rate": 1.6891141744452003e-05, "loss": 0.0076, "step": 201240 }, { "epoch": 6.220869135191939, "grad_norm": 0.09500136971473694, "learning_rate": 1.689067812326142e-05, "loss": 0.0066, "step": 201270 }, { "epoch": 6.221796377573098, "grad_norm": 0.144953653216362, "learning_rate": 1.6890214502070842e-05, "loss": 0.0071, "step": 201300 }, { "epoch": 6.222723619954256, "grad_norm": 0.1909025013446808, "learning_rate": 1.6889750880880264e-05, "loss": 0.0077, "step": 201330 }, { "epoch": 6.223650862335415, "grad_norm": 0.16617758572101593, "learning_rate": 1.6889287259689685e-05, "loss": 0.0068, "step": 201360 }, { "epoch": 6.2245781047165725, "grad_norm": 0.14017383754253387, "learning_rate": 1.6888823638499107e-05, "loss": 0.007, "step": 201390 }, { "epoch": 6.225505347097731, "grad_norm": 0.08918240666389465, "learning_rate": 1.6888360017308525e-05, "loss": 0.0072, "step": 201420 }, { "epoch": 6.22643258947889, "grad_norm": 0.128618061542511, "learning_rate": 1.6887896396117947e-05, "loss": 0.0069, "step": 201450 }, { "epoch": 6.227359831860048, "grad_norm": 0.05488007515668869, "learning_rate": 1.6887432774927368e-05, "loss": 0.0071, "step": 201480 }, { "epoch": 6.228287074241207, "grad_norm": 0.09817805886268616, "learning_rate": 1.6886969153736786e-05, "loss": 0.0077, "step": 201510 }, { "epoch": 6.2292143166223655, "grad_norm": 0.11906830966472626, "learning_rate": 1.6886505532546208e-05, "loss": 0.0071, "step": 201540 }, { "epoch": 6.230141559003523, "grad_norm": 0.11423703283071518, "learning_rate": 1.688604191135563e-05, "loss": 0.0063, "step": 201570 }, { "epoch": 6.231068801384682, "grad_norm": 0.06490360200405121, "learning_rate": 1.688557829016505e-05, "loss": 0.0066, "step": 201600 }, { "epoch": 6.23199604376584, "grad_norm": 0.13041889667510986, "learning_rate": 1.6885114668974472e-05, "loss": 0.0067, "step": 201630 }, { "epoch": 6.232923286146999, "grad_norm": 0.12204357236623764, "learning_rate": 1.6884651047783894e-05, "loss": 0.0066, "step": 201660 }, { "epoch": 6.233850528528158, "grad_norm": 0.11238055676221848, "learning_rate": 1.6884187426593312e-05, "loss": 0.0068, "step": 201690 }, { "epoch": 6.234777770909315, "grad_norm": 0.09310929477214813, "learning_rate": 1.6883723805402733e-05, "loss": 0.0065, "step": 201720 }, { "epoch": 6.235705013290474, "grad_norm": 0.14287735521793365, "learning_rate": 1.6883260184212155e-05, "loss": 0.0074, "step": 201750 }, { "epoch": 6.2366322556716325, "grad_norm": 0.11483893543481827, "learning_rate": 1.6882796563021576e-05, "loss": 0.0066, "step": 201780 }, { "epoch": 6.237559498052791, "grad_norm": 0.10144514590501785, "learning_rate": 1.6882332941830998e-05, "loss": 0.0066, "step": 201810 }, { "epoch": 6.23848674043395, "grad_norm": 0.11938696354627609, "learning_rate": 1.6881869320640416e-05, "loss": 0.0069, "step": 201840 }, { "epoch": 6.239413982815108, "grad_norm": 0.11358854174613953, "learning_rate": 1.6881405699449837e-05, "loss": 0.0064, "step": 201870 }, { "epoch": 6.240341225196266, "grad_norm": 0.09585771709680557, "learning_rate": 1.688094207825926e-05, "loss": 0.0069, "step": 201900 }, { "epoch": 6.241268467577425, "grad_norm": 0.08079175651073456, "learning_rate": 1.6880478457068677e-05, "loss": 0.0071, "step": 201930 }, { "epoch": 6.242195709958583, "grad_norm": 0.12080620974302292, "learning_rate": 1.68800148358781e-05, "loss": 0.0075, "step": 201960 }, { "epoch": 6.243122952339742, "grad_norm": 0.09165628254413605, "learning_rate": 1.687955121468752e-05, "loss": 0.0066, "step": 201990 }, { "epoch": 6.2440501947209, "grad_norm": 0.16245579719543457, "learning_rate": 1.687908759349694e-05, "loss": 0.0063, "step": 202020 }, { "epoch": 6.244977437102058, "grad_norm": 0.1551934778690338, "learning_rate": 1.6878623972306363e-05, "loss": 0.0078, "step": 202050 }, { "epoch": 6.245904679483217, "grad_norm": 0.12539850175380707, "learning_rate": 1.687816035111578e-05, "loss": 0.0071, "step": 202080 }, { "epoch": 6.246831921864375, "grad_norm": 0.1445969045162201, "learning_rate": 1.6877696729925203e-05, "loss": 0.0066, "step": 202110 }, { "epoch": 6.247759164245534, "grad_norm": 0.1285250037908554, "learning_rate": 1.6877233108734624e-05, "loss": 0.0064, "step": 202140 }, { "epoch": 6.2486864066266925, "grad_norm": 0.10403125733137131, "learning_rate": 1.6876769487544046e-05, "loss": 0.0067, "step": 202170 }, { "epoch": 6.24961364900785, "grad_norm": 0.10169189423322678, "learning_rate": 1.6876305866353467e-05, "loss": 0.0067, "step": 202200 }, { "epoch": 6.250540891389009, "grad_norm": 0.11319846659898758, "learning_rate": 1.687584224516289e-05, "loss": 0.0067, "step": 202230 }, { "epoch": 6.251468133770167, "grad_norm": 0.07493367046117783, "learning_rate": 1.6875378623972307e-05, "loss": 0.0068, "step": 202260 }, { "epoch": 6.252395376151326, "grad_norm": 0.1266324818134308, "learning_rate": 1.687491500278173e-05, "loss": 0.007, "step": 202290 }, { "epoch": 6.253322618532485, "grad_norm": 0.13736547529697418, "learning_rate": 1.687445138159115e-05, "loss": 0.0071, "step": 202320 }, { "epoch": 6.254249860913643, "grad_norm": 0.09306496381759644, "learning_rate": 1.687398776040057e-05, "loss": 0.0063, "step": 202350 }, { "epoch": 6.255177103294801, "grad_norm": 0.11548775434494019, "learning_rate": 1.6873524139209993e-05, "loss": 0.0071, "step": 202380 }, { "epoch": 6.2561043456759595, "grad_norm": 0.11227481067180634, "learning_rate": 1.687306051801941e-05, "loss": 0.0072, "step": 202410 }, { "epoch": 6.257031588057118, "grad_norm": 0.1653938591480255, "learning_rate": 1.6872596896828833e-05, "loss": 0.0068, "step": 202440 }, { "epoch": 6.257958830438277, "grad_norm": 0.13207802176475525, "learning_rate": 1.6872133275638254e-05, "loss": 0.007, "step": 202470 }, { "epoch": 6.258886072819435, "grad_norm": 0.07805941253900528, "learning_rate": 1.6871669654447672e-05, "loss": 0.0065, "step": 202500 }, { "epoch": 6.259813315200593, "grad_norm": 0.10005497187376022, "learning_rate": 1.6871206033257094e-05, "loss": 0.0073, "step": 202530 }, { "epoch": 6.260740557581752, "grad_norm": 0.12859025597572327, "learning_rate": 1.6870742412066515e-05, "loss": 0.0068, "step": 202560 }, { "epoch": 6.26166779996291, "grad_norm": 0.09697561711072922, "learning_rate": 1.6870278790875937e-05, "loss": 0.0069, "step": 202590 }, { "epoch": 6.262595042344069, "grad_norm": 0.16376660764217377, "learning_rate": 1.6869815169685358e-05, "loss": 0.0067, "step": 202620 }, { "epoch": 6.263522284725227, "grad_norm": 0.15227444469928741, "learning_rate": 1.686935154849478e-05, "loss": 0.007, "step": 202650 }, { "epoch": 6.264449527106386, "grad_norm": 0.1738913655281067, "learning_rate": 1.6868887927304198e-05, "loss": 0.0062, "step": 202680 }, { "epoch": 6.265376769487544, "grad_norm": 0.09091150015592575, "learning_rate": 1.686842430611362e-05, "loss": 0.0064, "step": 202710 }, { "epoch": 6.266304011868702, "grad_norm": 0.10015274584293365, "learning_rate": 1.686796068492304e-05, "loss": 0.0063, "step": 202740 }, { "epoch": 6.267231254249861, "grad_norm": 0.12985652685165405, "learning_rate": 1.6867497063732462e-05, "loss": 0.0069, "step": 202770 }, { "epoch": 6.2681584966310195, "grad_norm": 0.14084741473197937, "learning_rate": 1.6867033442541884e-05, "loss": 0.0071, "step": 202800 }, { "epoch": 6.269085739012178, "grad_norm": 0.09362046420574188, "learning_rate": 1.6866569821351302e-05, "loss": 0.0061, "step": 202830 }, { "epoch": 6.270012981393336, "grad_norm": 0.12622009217739105, "learning_rate": 1.6866106200160723e-05, "loss": 0.0066, "step": 202860 }, { "epoch": 6.2709402237744944, "grad_norm": 0.1615564078092575, "learning_rate": 1.6865642578970145e-05, "loss": 0.0061, "step": 202890 }, { "epoch": 6.271867466155653, "grad_norm": 0.13126641511917114, "learning_rate": 1.6865178957779563e-05, "loss": 0.0062, "step": 202920 }, { "epoch": 6.272794708536812, "grad_norm": 0.14645515382289886, "learning_rate": 1.6864715336588985e-05, "loss": 0.0066, "step": 202950 }, { "epoch": 6.27372195091797, "grad_norm": 0.10597436130046844, "learning_rate": 1.6864251715398406e-05, "loss": 0.0063, "step": 202980 }, { "epoch": 6.274649193299128, "grad_norm": 0.12112567573785782, "learning_rate": 1.6863788094207828e-05, "loss": 0.007, "step": 203010 }, { "epoch": 6.275576435680287, "grad_norm": 0.154898539185524, "learning_rate": 1.686332447301725e-05, "loss": 0.0079, "step": 203040 }, { "epoch": 6.276503678061445, "grad_norm": 0.13471433520317078, "learning_rate": 1.6862860851826667e-05, "loss": 0.0076, "step": 203070 }, { "epoch": 6.277430920442604, "grad_norm": 0.11659957468509674, "learning_rate": 1.686239723063609e-05, "loss": 0.0069, "step": 203100 }, { "epoch": 6.278358162823762, "grad_norm": 0.11499584466218948, "learning_rate": 1.686193360944551e-05, "loss": 0.0071, "step": 203130 }, { "epoch": 6.279285405204921, "grad_norm": 0.13042087852954865, "learning_rate": 1.6861469988254932e-05, "loss": 0.006, "step": 203160 }, { "epoch": 6.280212647586079, "grad_norm": 0.09492238610982895, "learning_rate": 1.6861006367064353e-05, "loss": 0.0065, "step": 203190 }, { "epoch": 6.281139889967237, "grad_norm": 0.106651172041893, "learning_rate": 1.6860542745873775e-05, "loss": 0.0068, "step": 203220 }, { "epoch": 6.282067132348396, "grad_norm": 0.1513689160346985, "learning_rate": 1.6860079124683193e-05, "loss": 0.0073, "step": 203250 }, { "epoch": 6.2829943747295545, "grad_norm": 0.08371749520301819, "learning_rate": 1.6859615503492614e-05, "loss": 0.0065, "step": 203280 }, { "epoch": 6.283921617110713, "grad_norm": 0.14960406720638275, "learning_rate": 1.6859151882302036e-05, "loss": 0.0068, "step": 203310 }, { "epoch": 6.284848859491871, "grad_norm": 0.13324770331382751, "learning_rate": 1.6858688261111454e-05, "loss": 0.0072, "step": 203340 }, { "epoch": 6.285776101873029, "grad_norm": 0.13306626677513123, "learning_rate": 1.6858224639920876e-05, "loss": 0.0069, "step": 203370 }, { "epoch": 6.286703344254188, "grad_norm": 0.14927054941654205, "learning_rate": 1.6857761018730297e-05, "loss": 0.007, "step": 203400 }, { "epoch": 6.287630586635347, "grad_norm": 0.12583716213703156, "learning_rate": 1.685729739753972e-05, "loss": 0.0068, "step": 203430 }, { "epoch": 6.288557829016505, "grad_norm": 0.07983948290348053, "learning_rate": 1.685683377634914e-05, "loss": 0.0067, "step": 203460 }, { "epoch": 6.289485071397664, "grad_norm": 0.13221639394760132, "learning_rate": 1.6856370155158558e-05, "loss": 0.0072, "step": 203490 }, { "epoch": 6.2904123137788215, "grad_norm": 0.10152684152126312, "learning_rate": 1.685590653396798e-05, "loss": 0.0072, "step": 203520 }, { "epoch": 6.29133955615998, "grad_norm": 0.1080191358923912, "learning_rate": 1.68554429127774e-05, "loss": 0.0064, "step": 203550 }, { "epoch": 6.292266798541139, "grad_norm": 0.11875060945749283, "learning_rate": 1.6854979291586823e-05, "loss": 0.007, "step": 203580 }, { "epoch": 6.293194040922297, "grad_norm": 0.12775035202503204, "learning_rate": 1.6854515670396244e-05, "loss": 0.0066, "step": 203610 }, { "epoch": 6.294121283303456, "grad_norm": 0.08082447946071625, "learning_rate": 1.6854052049205666e-05, "loss": 0.0065, "step": 203640 }, { "epoch": 6.295048525684614, "grad_norm": 0.093834787607193, "learning_rate": 1.6853588428015084e-05, "loss": 0.007, "step": 203670 }, { "epoch": 6.295975768065772, "grad_norm": 0.10226502269506454, "learning_rate": 1.6853124806824505e-05, "loss": 0.0076, "step": 203700 }, { "epoch": 6.296903010446931, "grad_norm": 0.1049601212143898, "learning_rate": 1.6852661185633923e-05, "loss": 0.0066, "step": 203730 }, { "epoch": 6.297830252828089, "grad_norm": 0.08881913125514984, "learning_rate": 1.685219756444335e-05, "loss": 0.0068, "step": 203760 }, { "epoch": 6.298757495209248, "grad_norm": 0.09511199593544006, "learning_rate": 1.685173394325277e-05, "loss": 0.007, "step": 203790 }, { "epoch": 6.299684737590406, "grad_norm": 0.12771949172019958, "learning_rate": 1.6851270322062188e-05, "loss": 0.0068, "step": 203820 }, { "epoch": 6.300611979971564, "grad_norm": 0.13601456582546234, "learning_rate": 1.685080670087161e-05, "loss": 0.0067, "step": 203850 }, { "epoch": 6.301539222352723, "grad_norm": 0.16345225274562836, "learning_rate": 1.685034307968103e-05, "loss": 0.0071, "step": 203880 }, { "epoch": 6.3024664647338815, "grad_norm": 0.10899519175291061, "learning_rate": 1.684987945849045e-05, "loss": 0.0065, "step": 203910 }, { "epoch": 6.30339370711504, "grad_norm": 0.12211804836988449, "learning_rate": 1.684941583729987e-05, "loss": 0.007, "step": 203940 }, { "epoch": 6.304320949496199, "grad_norm": 0.16354838013648987, "learning_rate": 1.6848952216109292e-05, "loss": 0.0068, "step": 203970 }, { "epoch": 6.305248191877356, "grad_norm": 0.14872464537620544, "learning_rate": 1.6848488594918714e-05, "loss": 0.0066, "step": 204000 }, { "epoch": 6.306175434258515, "grad_norm": 0.14481262862682343, "learning_rate": 1.6848024973728135e-05, "loss": 0.0069, "step": 204030 }, { "epoch": 6.307102676639674, "grad_norm": 0.10270091146230698, "learning_rate": 1.6847561352537553e-05, "loss": 0.007, "step": 204060 }, { "epoch": 6.308029919020832, "grad_norm": 0.14468610286712646, "learning_rate": 1.6847097731346975e-05, "loss": 0.0069, "step": 204090 }, { "epoch": 6.308957161401991, "grad_norm": 0.11432592570781708, "learning_rate": 1.6846634110156396e-05, "loss": 0.0064, "step": 204120 }, { "epoch": 6.3098844037831485, "grad_norm": 0.17153041064739227, "learning_rate": 1.6846170488965818e-05, "loss": 0.0066, "step": 204150 }, { "epoch": 6.310811646164307, "grad_norm": 0.06844071298837662, "learning_rate": 1.684570686777524e-05, "loss": 0.0064, "step": 204180 }, { "epoch": 6.311738888545466, "grad_norm": 0.15003128349781036, "learning_rate": 1.684524324658466e-05, "loss": 0.0075, "step": 204210 }, { "epoch": 6.312666130926624, "grad_norm": 0.1353512704372406, "learning_rate": 1.684477962539408e-05, "loss": 0.0073, "step": 204240 }, { "epoch": 6.313593373307783, "grad_norm": 0.14096836745738983, "learning_rate": 1.68443160042035e-05, "loss": 0.0068, "step": 204270 }, { "epoch": 6.3145206156889415, "grad_norm": 0.12960566580295563, "learning_rate": 1.6843852383012922e-05, "loss": 0.0067, "step": 204300 }, { "epoch": 6.315447858070099, "grad_norm": 0.12836101651191711, "learning_rate": 1.684338876182234e-05, "loss": 0.0074, "step": 204330 }, { "epoch": 6.316375100451258, "grad_norm": 0.13345025479793549, "learning_rate": 1.684292514063176e-05, "loss": 0.0072, "step": 204360 }, { "epoch": 6.317302342832416, "grad_norm": 0.10505904257297516, "learning_rate": 1.6842461519441183e-05, "loss": 0.0064, "step": 204390 }, { "epoch": 6.318229585213575, "grad_norm": 0.14290787279605865, "learning_rate": 1.6841997898250605e-05, "loss": 0.0063, "step": 204420 }, { "epoch": 6.319156827594734, "grad_norm": 0.11799254268407822, "learning_rate": 1.6841534277060026e-05, "loss": 0.0069, "step": 204450 }, { "epoch": 6.320084069975891, "grad_norm": 0.14162704348564148, "learning_rate": 1.6841070655869444e-05, "loss": 0.0067, "step": 204480 }, { "epoch": 6.32101131235705, "grad_norm": 0.10796229541301727, "learning_rate": 1.6840607034678866e-05, "loss": 0.0071, "step": 204510 }, { "epoch": 6.3219385547382085, "grad_norm": 0.12120895832777023, "learning_rate": 1.6840143413488287e-05, "loss": 0.0072, "step": 204540 }, { "epoch": 6.322865797119367, "grad_norm": 0.12206985801458359, "learning_rate": 1.683967979229771e-05, "loss": 0.0064, "step": 204570 }, { "epoch": 6.323793039500526, "grad_norm": 0.1116853654384613, "learning_rate": 1.683921617110713e-05, "loss": 0.0074, "step": 204600 }, { "epoch": 6.3247202818816834, "grad_norm": 0.11326074600219727, "learning_rate": 1.683875254991655e-05, "loss": 0.0062, "step": 204630 }, { "epoch": 6.325647524262842, "grad_norm": 0.10858674347400665, "learning_rate": 1.683828892872597e-05, "loss": 0.0067, "step": 204660 }, { "epoch": 6.326574766644001, "grad_norm": 0.12131545692682266, "learning_rate": 1.683782530753539e-05, "loss": 0.0069, "step": 204690 }, { "epoch": 6.327502009025159, "grad_norm": 0.1047106683254242, "learning_rate": 1.683736168634481e-05, "loss": 0.0073, "step": 204720 }, { "epoch": 6.328429251406318, "grad_norm": 0.2650162875652313, "learning_rate": 1.683689806515423e-05, "loss": 0.0067, "step": 204750 }, { "epoch": 6.3293564937874764, "grad_norm": 0.1364443004131317, "learning_rate": 1.6836434443963652e-05, "loss": 0.0067, "step": 204780 }, { "epoch": 6.330283736168634, "grad_norm": 0.08931270241737366, "learning_rate": 1.6835970822773074e-05, "loss": 0.0065, "step": 204810 }, { "epoch": 6.331210978549793, "grad_norm": 0.08393227308988571, "learning_rate": 1.6835507201582495e-05, "loss": 0.0083, "step": 204840 }, { "epoch": 6.332138220930951, "grad_norm": 0.111463762819767, "learning_rate": 1.6835043580391917e-05, "loss": 0.0071, "step": 204870 }, { "epoch": 6.33306546331211, "grad_norm": 0.12815314531326294, "learning_rate": 1.6834579959201335e-05, "loss": 0.0072, "step": 204900 }, { "epoch": 6.3339927056932686, "grad_norm": 0.08473075926303864, "learning_rate": 1.6834116338010757e-05, "loss": 0.0062, "step": 204930 }, { "epoch": 6.334919948074426, "grad_norm": 0.12175467610359192, "learning_rate": 1.6833652716820178e-05, "loss": 0.0062, "step": 204960 }, { "epoch": 6.335847190455585, "grad_norm": 0.10344778746366501, "learning_rate": 1.68331890956296e-05, "loss": 0.0075, "step": 204990 }, { "epoch": 6.3367744328367435, "grad_norm": 0.12476940453052521, "learning_rate": 1.683272547443902e-05, "loss": 0.0068, "step": 205020 }, { "epoch": 6.337701675217902, "grad_norm": 0.0907168984413147, "learning_rate": 1.683226185324844e-05, "loss": 0.0068, "step": 205050 }, { "epoch": 6.338628917599061, "grad_norm": 0.11554528027772903, "learning_rate": 1.683179823205786e-05, "loss": 0.0073, "step": 205080 }, { "epoch": 6.339556159980219, "grad_norm": 0.1352292150259018, "learning_rate": 1.6831334610867282e-05, "loss": 0.0076, "step": 205110 }, { "epoch": 6.340483402361377, "grad_norm": 0.14069396257400513, "learning_rate": 1.68308709896767e-05, "loss": 0.0069, "step": 205140 }, { "epoch": 6.341410644742536, "grad_norm": 0.10101346671581268, "learning_rate": 1.6830407368486125e-05, "loss": 0.007, "step": 205170 }, { "epoch": 6.342337887123694, "grad_norm": 0.08620449155569077, "learning_rate": 1.6829943747295547e-05, "loss": 0.0069, "step": 205200 }, { "epoch": 6.343265129504853, "grad_norm": 0.12429709732532501, "learning_rate": 1.6829480126104965e-05, "loss": 0.0068, "step": 205230 }, { "epoch": 6.344192371886011, "grad_norm": 0.16315442323684692, "learning_rate": 1.6829016504914386e-05, "loss": 0.0066, "step": 205260 }, { "epoch": 6.345119614267169, "grad_norm": 0.11831611394882202, "learning_rate": 1.6828552883723808e-05, "loss": 0.0072, "step": 205290 }, { "epoch": 6.346046856648328, "grad_norm": 0.09720511734485626, "learning_rate": 1.6828089262533226e-05, "loss": 0.0065, "step": 205320 }, { "epoch": 6.346974099029486, "grad_norm": 0.09535227715969086, "learning_rate": 1.6827625641342647e-05, "loss": 0.0071, "step": 205350 }, { "epoch": 6.347901341410645, "grad_norm": 0.2637181282043457, "learning_rate": 1.682716202015207e-05, "loss": 0.0068, "step": 205380 }, { "epoch": 6.3488285837918035, "grad_norm": 0.1519998162984848, "learning_rate": 1.682669839896149e-05, "loss": 0.0068, "step": 205410 }, { "epoch": 6.349755826172961, "grad_norm": 0.11841917783021927, "learning_rate": 1.6826234777770912e-05, "loss": 0.0064, "step": 205440 }, { "epoch": 6.35068306855412, "grad_norm": 0.10536032170057297, "learning_rate": 1.682577115658033e-05, "loss": 0.0068, "step": 205470 }, { "epoch": 6.351610310935278, "grad_norm": 0.16176508367061615, "learning_rate": 1.682530753538975e-05, "loss": 0.0069, "step": 205500 }, { "epoch": 6.352537553316437, "grad_norm": 0.1317651867866516, "learning_rate": 1.6824843914199173e-05, "loss": 0.0065, "step": 205530 }, { "epoch": 6.353464795697596, "grad_norm": 0.09215550124645233, "learning_rate": 1.6824380293008595e-05, "loss": 0.0071, "step": 205560 }, { "epoch": 6.354392038078754, "grad_norm": 0.08819093555212021, "learning_rate": 1.6823916671818016e-05, "loss": 0.007, "step": 205590 }, { "epoch": 6.355319280459912, "grad_norm": 0.12189217656850815, "learning_rate": 1.6823453050627438e-05, "loss": 0.0066, "step": 205620 }, { "epoch": 6.3562465228410705, "grad_norm": 0.15463665127754211, "learning_rate": 1.6822989429436856e-05, "loss": 0.0066, "step": 205650 }, { "epoch": 6.357173765222229, "grad_norm": 0.1284402310848236, "learning_rate": 1.6822525808246277e-05, "loss": 0.0065, "step": 205680 }, { "epoch": 6.358101007603388, "grad_norm": 0.10163424909114838, "learning_rate": 1.68220621870557e-05, "loss": 0.0069, "step": 205710 }, { "epoch": 6.359028249984546, "grad_norm": 0.10991363972425461, "learning_rate": 1.6821598565865117e-05, "loss": 0.0069, "step": 205740 }, { "epoch": 6.359955492365704, "grad_norm": 0.10618893802165985, "learning_rate": 1.682113494467454e-05, "loss": 0.0066, "step": 205770 }, { "epoch": 6.360882734746863, "grad_norm": 0.11662182211875916, "learning_rate": 1.682067132348396e-05, "loss": 0.006, "step": 205800 }, { "epoch": 6.361809977128021, "grad_norm": 0.12607143819332123, "learning_rate": 1.682020770229338e-05, "loss": 0.0068, "step": 205830 }, { "epoch": 6.36273721950918, "grad_norm": 0.15549485385417938, "learning_rate": 1.6819744081102803e-05, "loss": 0.0071, "step": 205860 }, { "epoch": 6.363664461890338, "grad_norm": 0.07377181947231293, "learning_rate": 1.681928045991222e-05, "loss": 0.0068, "step": 205890 }, { "epoch": 6.364591704271497, "grad_norm": 0.11750483512878418, "learning_rate": 1.6818816838721643e-05, "loss": 0.0065, "step": 205920 }, { "epoch": 6.365518946652655, "grad_norm": 0.11940392106771469, "learning_rate": 1.6818353217531064e-05, "loss": 0.0066, "step": 205950 }, { "epoch": 6.366446189033813, "grad_norm": 0.14806991815567017, "learning_rate": 1.6817889596340486e-05, "loss": 0.0069, "step": 205980 }, { "epoch": 6.367373431414972, "grad_norm": 0.08017181605100632, "learning_rate": 1.6817425975149907e-05, "loss": 0.0066, "step": 206010 }, { "epoch": 6.3683006737961305, "grad_norm": 0.0756189152598381, "learning_rate": 1.681696235395933e-05, "loss": 0.0069, "step": 206040 }, { "epoch": 6.369227916177289, "grad_norm": 0.0744447112083435, "learning_rate": 1.6816498732768747e-05, "loss": 0.0068, "step": 206070 }, { "epoch": 6.370155158558447, "grad_norm": 0.15143993496894836, "learning_rate": 1.6816035111578168e-05, "loss": 0.0071, "step": 206100 }, { "epoch": 6.371082400939605, "grad_norm": 0.13972240686416626, "learning_rate": 1.6815571490387586e-05, "loss": 0.0069, "step": 206130 }, { "epoch": 6.372009643320764, "grad_norm": 0.15117932856082916, "learning_rate": 1.6815107869197008e-05, "loss": 0.0074, "step": 206160 }, { "epoch": 6.372936885701923, "grad_norm": 0.12643451988697052, "learning_rate": 1.681464424800643e-05, "loss": 0.0068, "step": 206190 }, { "epoch": 6.373864128083081, "grad_norm": 0.11513436585664749, "learning_rate": 1.681418062681585e-05, "loss": 0.0071, "step": 206220 }, { "epoch": 6.374791370464239, "grad_norm": 0.12021946907043457, "learning_rate": 1.6813717005625272e-05, "loss": 0.0066, "step": 206250 }, { "epoch": 6.3757186128453975, "grad_norm": 0.12020986527204514, "learning_rate": 1.6813253384434694e-05, "loss": 0.0066, "step": 206280 }, { "epoch": 6.376645855226556, "grad_norm": 0.0929340347647667, "learning_rate": 1.6812789763244112e-05, "loss": 0.007, "step": 206310 }, { "epoch": 6.377573097607715, "grad_norm": 0.15518341958522797, "learning_rate": 1.6812326142053533e-05, "loss": 0.0071, "step": 206340 }, { "epoch": 6.378500339988873, "grad_norm": 0.08594135195016861, "learning_rate": 1.6811862520862955e-05, "loss": 0.0065, "step": 206370 }, { "epoch": 6.379427582370032, "grad_norm": 0.1261453628540039, "learning_rate": 1.6811398899672376e-05, "loss": 0.0066, "step": 206400 }, { "epoch": 6.38035482475119, "grad_norm": 0.12705190479755402, "learning_rate": 1.6810935278481798e-05, "loss": 0.0068, "step": 206430 }, { "epoch": 6.381282067132348, "grad_norm": 0.1001802533864975, "learning_rate": 1.6810471657291216e-05, "loss": 0.0065, "step": 206460 }, { "epoch": 6.382209309513507, "grad_norm": 0.08949047327041626, "learning_rate": 1.6810008036100638e-05, "loss": 0.0059, "step": 206490 }, { "epoch": 6.3831365518946654, "grad_norm": 0.12318059056997299, "learning_rate": 1.680954441491006e-05, "loss": 0.0072, "step": 206520 }, { "epoch": 6.384063794275824, "grad_norm": 0.12025900185108185, "learning_rate": 1.6809080793719477e-05, "loss": 0.007, "step": 206550 }, { "epoch": 6.384991036656982, "grad_norm": 0.11333530396223068, "learning_rate": 1.6808617172528902e-05, "loss": 0.007, "step": 206580 }, { "epoch": 6.38591827903814, "grad_norm": 0.09346511214971542, "learning_rate": 1.6808153551338324e-05, "loss": 0.006, "step": 206610 }, { "epoch": 6.386845521419299, "grad_norm": 0.14579632878303528, "learning_rate": 1.6807689930147742e-05, "loss": 0.007, "step": 206640 }, { "epoch": 6.3877727638004576, "grad_norm": 0.12766475975513458, "learning_rate": 1.6807226308957163e-05, "loss": 0.0066, "step": 206670 }, { "epoch": 6.388700006181616, "grad_norm": 0.18587544560432434, "learning_rate": 1.6806762687766585e-05, "loss": 0.0068, "step": 206700 }, { "epoch": 6.389627248562775, "grad_norm": 0.11128716170787811, "learning_rate": 1.6806299066576003e-05, "loss": 0.0071, "step": 206730 }, { "epoch": 6.3905544909439325, "grad_norm": 0.10840035229921341, "learning_rate": 1.6805835445385424e-05, "loss": 0.0068, "step": 206760 }, { "epoch": 6.391481733325091, "grad_norm": 0.14314314723014832, "learning_rate": 1.6805371824194846e-05, "loss": 0.0065, "step": 206790 }, { "epoch": 6.39240897570625, "grad_norm": 0.10329533368349075, "learning_rate": 1.6804908203004267e-05, "loss": 0.0075, "step": 206820 }, { "epoch": 6.393336218087408, "grad_norm": 0.10485269874334335, "learning_rate": 1.680444458181369e-05, "loss": 0.0072, "step": 206850 }, { "epoch": 6.394263460468567, "grad_norm": 0.09788252413272858, "learning_rate": 1.6803980960623107e-05, "loss": 0.007, "step": 206880 }, { "epoch": 6.395190702849725, "grad_norm": 0.12588216364383698, "learning_rate": 1.680351733943253e-05, "loss": 0.0072, "step": 206910 }, { "epoch": 6.396117945230883, "grad_norm": 0.09750398248434067, "learning_rate": 1.680305371824195e-05, "loss": 0.0066, "step": 206940 }, { "epoch": 6.397045187612042, "grad_norm": 0.10961290448904037, "learning_rate": 1.680259009705137e-05, "loss": 0.0067, "step": 206970 }, { "epoch": 6.3979724299932, "grad_norm": 0.11575670540332794, "learning_rate": 1.6802126475860793e-05, "loss": 0.0074, "step": 207000 }, { "epoch": 6.398899672374359, "grad_norm": 0.14583522081375122, "learning_rate": 1.6801662854670215e-05, "loss": 0.0065, "step": 207030 }, { "epoch": 6.399826914755517, "grad_norm": 0.15521028637886047, "learning_rate": 1.6801199233479633e-05, "loss": 0.0065, "step": 207060 }, { "epoch": 6.400754157136675, "grad_norm": 0.10912735015153885, "learning_rate": 1.6800735612289054e-05, "loss": 0.0069, "step": 207090 }, { "epoch": 6.401681399517834, "grad_norm": 0.06259705126285553, "learning_rate": 1.6800271991098472e-05, "loss": 0.007, "step": 207120 }, { "epoch": 6.4026086418989925, "grad_norm": 0.07087863236665726, "learning_rate": 1.6799808369907894e-05, "loss": 0.0069, "step": 207150 }, { "epoch": 6.403535884280151, "grad_norm": 0.10531526803970337, "learning_rate": 1.6799344748717315e-05, "loss": 0.0071, "step": 207180 }, { "epoch": 6.40446312666131, "grad_norm": 0.1401234120130539, "learning_rate": 1.6798881127526737e-05, "loss": 0.0068, "step": 207210 }, { "epoch": 6.405390369042467, "grad_norm": 0.09166572988033295, "learning_rate": 1.679841750633616e-05, "loss": 0.0074, "step": 207240 }, { "epoch": 6.406317611423626, "grad_norm": 0.124425508081913, "learning_rate": 1.679795388514558e-05, "loss": 0.0068, "step": 207270 }, { "epoch": 6.407244853804785, "grad_norm": 0.07772152870893478, "learning_rate": 1.6797490263954998e-05, "loss": 0.0067, "step": 207300 }, { "epoch": 6.408172096185943, "grad_norm": 0.12891501188278198, "learning_rate": 1.679702664276442e-05, "loss": 0.0073, "step": 207330 }, { "epoch": 6.409099338567102, "grad_norm": 0.08961248397827148, "learning_rate": 1.679656302157384e-05, "loss": 0.0064, "step": 207360 }, { "epoch": 6.4100265809482595, "grad_norm": 0.07884785532951355, "learning_rate": 1.6796099400383262e-05, "loss": 0.0061, "step": 207390 }, { "epoch": 6.410953823329418, "grad_norm": 0.12888039648532867, "learning_rate": 1.6795635779192684e-05, "loss": 0.0065, "step": 207420 }, { "epoch": 6.411881065710577, "grad_norm": 0.12307606637477875, "learning_rate": 1.6795172158002102e-05, "loss": 0.007, "step": 207450 }, { "epoch": 6.412808308091735, "grad_norm": 0.12323159724473953, "learning_rate": 1.6794708536811524e-05, "loss": 0.0069, "step": 207480 }, { "epoch": 6.413735550472894, "grad_norm": 0.13887083530426025, "learning_rate": 1.6794244915620945e-05, "loss": 0.0063, "step": 207510 }, { "epoch": 6.4146627928540525, "grad_norm": 0.10719522833824158, "learning_rate": 1.6793781294430363e-05, "loss": 0.007, "step": 207540 }, { "epoch": 6.41559003523521, "grad_norm": 0.09447789937257767, "learning_rate": 1.6793317673239785e-05, "loss": 0.0075, "step": 207570 }, { "epoch": 6.416517277616369, "grad_norm": 0.18400324881076813, "learning_rate": 1.6792854052049206e-05, "loss": 0.0069, "step": 207600 }, { "epoch": 6.417444519997527, "grad_norm": 0.14005766808986664, "learning_rate": 1.6792390430858628e-05, "loss": 0.0063, "step": 207630 }, { "epoch": 6.418371762378686, "grad_norm": 0.17680248618125916, "learning_rate": 1.679192680966805e-05, "loss": 0.0067, "step": 207660 }, { "epoch": 6.419299004759845, "grad_norm": 0.08054833859205246, "learning_rate": 1.679146318847747e-05, "loss": 0.0059, "step": 207690 }, { "epoch": 6.420226247141002, "grad_norm": 0.09459126740694046, "learning_rate": 1.679099956728689e-05, "loss": 0.0068, "step": 207720 }, { "epoch": 6.421153489522161, "grad_norm": 0.09431096166372299, "learning_rate": 1.679053594609631e-05, "loss": 0.0068, "step": 207750 }, { "epoch": 6.4220807319033195, "grad_norm": 0.12777280807495117, "learning_rate": 1.6790072324905732e-05, "loss": 0.0071, "step": 207780 }, { "epoch": 6.423007974284478, "grad_norm": 0.11213363707065582, "learning_rate": 1.6789608703715153e-05, "loss": 0.0068, "step": 207810 }, { "epoch": 6.423935216665637, "grad_norm": 0.0969708040356636, "learning_rate": 1.6789145082524575e-05, "loss": 0.0068, "step": 207840 }, { "epoch": 6.424862459046794, "grad_norm": 0.11073305457830429, "learning_rate": 1.6788681461333993e-05, "loss": 0.0074, "step": 207870 }, { "epoch": 6.425789701427953, "grad_norm": 0.1389363408088684, "learning_rate": 1.6788217840143415e-05, "loss": 0.0073, "step": 207900 }, { "epoch": 6.426716943809112, "grad_norm": 0.07155200839042664, "learning_rate": 1.6787754218952836e-05, "loss": 0.0074, "step": 207930 }, { "epoch": 6.42764418619027, "grad_norm": 0.1299440711736679, "learning_rate": 1.678730605180194e-05, "loss": 0.0071, "step": 207960 }, { "epoch": 6.428571428571429, "grad_norm": 0.10715389251708984, "learning_rate": 1.6786842430611362e-05, "loss": 0.0072, "step": 207990 }, { "epoch": 6.4294986709525865, "grad_norm": 0.15517638623714447, "learning_rate": 1.6786378809420784e-05, "loss": 0.0068, "step": 208020 }, { "epoch": 6.430425913333745, "grad_norm": 0.15180152654647827, "learning_rate": 1.6785915188230205e-05, "loss": 0.0062, "step": 208050 }, { "epoch": 6.431353155714904, "grad_norm": 0.14507366716861725, "learning_rate": 1.6785451567039627e-05, "loss": 0.007, "step": 208080 }, { "epoch": 6.432280398096062, "grad_norm": 0.13671214878559113, "learning_rate": 1.6784987945849048e-05, "loss": 0.0059, "step": 208110 }, { "epoch": 6.433207640477221, "grad_norm": 0.15713529288768768, "learning_rate": 1.6784524324658466e-05, "loss": 0.0066, "step": 208140 }, { "epoch": 6.4341348828583795, "grad_norm": 0.08468668907880783, "learning_rate": 1.6784060703467888e-05, "loss": 0.0073, "step": 208170 }, { "epoch": 6.435062125239537, "grad_norm": 0.09505951404571533, "learning_rate": 1.678359708227731e-05, "loss": 0.0068, "step": 208200 }, { "epoch": 6.435989367620696, "grad_norm": 0.14010681211948395, "learning_rate": 1.678313346108673e-05, "loss": 0.0068, "step": 208230 }, { "epoch": 6.436916610001854, "grad_norm": 0.17720410227775574, "learning_rate": 1.6782669839896152e-05, "loss": 0.0065, "step": 208260 }, { "epoch": 6.437843852383013, "grad_norm": 0.10274357348680496, "learning_rate": 1.678220621870557e-05, "loss": 0.0071, "step": 208290 }, { "epoch": 6.438771094764172, "grad_norm": 0.15083828568458557, "learning_rate": 1.6781742597514992e-05, "loss": 0.0064, "step": 208320 }, { "epoch": 6.43969833714533, "grad_norm": 0.09752960503101349, "learning_rate": 1.6781278976324413e-05, "loss": 0.007, "step": 208350 }, { "epoch": 6.440625579526488, "grad_norm": 0.10297174751758575, "learning_rate": 1.678081535513383e-05, "loss": 0.0068, "step": 208380 }, { "epoch": 6.4415528219076466, "grad_norm": 0.1638256311416626, "learning_rate": 1.6780351733943253e-05, "loss": 0.0072, "step": 208410 }, { "epoch": 6.442480064288805, "grad_norm": 0.10151796042919159, "learning_rate": 1.6779888112752674e-05, "loss": 0.0064, "step": 208440 }, { "epoch": 6.443407306669964, "grad_norm": 0.07964120805263519, "learning_rate": 1.6779424491562096e-05, "loss": 0.0067, "step": 208470 }, { "epoch": 6.444334549051122, "grad_norm": 0.08481158316135406, "learning_rate": 1.6778960870371517e-05, "loss": 0.0069, "step": 208500 }, { "epoch": 6.44526179143228, "grad_norm": 0.10372017323970795, "learning_rate": 1.677849724918094e-05, "loss": 0.0064, "step": 208530 }, { "epoch": 6.446189033813439, "grad_norm": 0.1630113571882248, "learning_rate": 1.6778033627990357e-05, "loss": 0.0066, "step": 208560 }, { "epoch": 6.447116276194597, "grad_norm": 0.1278224140405655, "learning_rate": 1.677757000679978e-05, "loss": 0.007, "step": 208590 }, { "epoch": 6.448043518575756, "grad_norm": 0.11742053180932999, "learning_rate": 1.67771063856092e-05, "loss": 0.0068, "step": 208620 }, { "epoch": 6.4489707609569145, "grad_norm": 0.12001626193523407, "learning_rate": 1.677664276441862e-05, "loss": 0.007, "step": 208650 }, { "epoch": 6.449898003338072, "grad_norm": 0.10144262760877609, "learning_rate": 1.6776179143228043e-05, "loss": 0.0073, "step": 208680 }, { "epoch": 6.450825245719231, "grad_norm": 0.10625915974378586, "learning_rate": 1.677571552203746e-05, "loss": 0.0067, "step": 208710 }, { "epoch": 6.451752488100389, "grad_norm": 0.16427047550678253, "learning_rate": 1.6775251900846883e-05, "loss": 0.0067, "step": 208740 }, { "epoch": 6.452679730481548, "grad_norm": 0.12194312363862991, "learning_rate": 1.6774788279656304e-05, "loss": 0.007, "step": 208770 }, { "epoch": 6.453606972862707, "grad_norm": 0.09791230410337448, "learning_rate": 1.6774324658465722e-05, "loss": 0.007, "step": 208800 }, { "epoch": 6.454534215243864, "grad_norm": 0.13551297783851624, "learning_rate": 1.6773861037275144e-05, "loss": 0.0065, "step": 208830 }, { "epoch": 6.455461457625023, "grad_norm": 0.13534806668758392, "learning_rate": 1.6773397416084565e-05, "loss": 0.007, "step": 208860 }, { "epoch": 6.4563887000061815, "grad_norm": 0.1263367384672165, "learning_rate": 1.6772933794893987e-05, "loss": 0.0061, "step": 208890 }, { "epoch": 6.45731594238734, "grad_norm": 0.10561060160398483, "learning_rate": 1.677247017370341e-05, "loss": 0.0066, "step": 208920 }, { "epoch": 6.458243184768499, "grad_norm": 0.1358533650636673, "learning_rate": 1.6772006552512827e-05, "loss": 0.0068, "step": 208950 }, { "epoch": 6.459170427149657, "grad_norm": 0.13028548657894135, "learning_rate": 1.6771542931322248e-05, "loss": 0.0069, "step": 208980 }, { "epoch": 6.460097669530815, "grad_norm": 0.14938302338123322, "learning_rate": 1.677107931013167e-05, "loss": 0.0066, "step": 209010 }, { "epoch": 6.461024911911974, "grad_norm": 0.16144759953022003, "learning_rate": 1.677061568894109e-05, "loss": 0.0063, "step": 209040 }, { "epoch": 6.461952154293132, "grad_norm": 0.06951676309108734, "learning_rate": 1.6770152067750513e-05, "loss": 0.0065, "step": 209070 }, { "epoch": 6.462879396674291, "grad_norm": 0.09809704124927521, "learning_rate": 1.6769688446559934e-05, "loss": 0.0066, "step": 209100 }, { "epoch": 6.463806639055449, "grad_norm": 0.0958462506532669, "learning_rate": 1.6769224825369352e-05, "loss": 0.0068, "step": 209130 }, { "epoch": 6.464733881436608, "grad_norm": 0.13375864923000336, "learning_rate": 1.6768761204178774e-05, "loss": 0.0069, "step": 209160 }, { "epoch": 6.465661123817766, "grad_norm": 0.13323481380939484, "learning_rate": 1.6768297582988195e-05, "loss": 0.0067, "step": 209190 }, { "epoch": 6.466588366198924, "grad_norm": 0.11861289292573929, "learning_rate": 1.6767833961797613e-05, "loss": 0.0068, "step": 209220 }, { "epoch": 6.467515608580083, "grad_norm": 0.12031423300504684, "learning_rate": 1.6767370340607038e-05, "loss": 0.0066, "step": 209250 }, { "epoch": 6.4684428509612415, "grad_norm": 0.07980392873287201, "learning_rate": 1.6766906719416456e-05, "loss": 0.0064, "step": 209280 }, { "epoch": 6.4693700933424, "grad_norm": 0.138712540268898, "learning_rate": 1.6766443098225878e-05, "loss": 0.0074, "step": 209310 }, { "epoch": 6.470297335723558, "grad_norm": 0.06647691875696182, "learning_rate": 1.67659794770353e-05, "loss": 0.0067, "step": 209340 }, { "epoch": 6.471224578104716, "grad_norm": 0.13092125952243805, "learning_rate": 1.6765515855844717e-05, "loss": 0.0073, "step": 209370 }, { "epoch": 6.472151820485875, "grad_norm": 0.10374454408884048, "learning_rate": 1.676505223465414e-05, "loss": 0.0075, "step": 209400 }, { "epoch": 6.473079062867034, "grad_norm": 0.14275330305099487, "learning_rate": 1.676458861346356e-05, "loss": 0.0063, "step": 209430 }, { "epoch": 6.474006305248192, "grad_norm": 0.16604165732860565, "learning_rate": 1.6764124992272982e-05, "loss": 0.0069, "step": 209460 }, { "epoch": 6.47493354762935, "grad_norm": 0.14564673602581024, "learning_rate": 1.6763661371082403e-05, "loss": 0.007, "step": 209490 }, { "epoch": 6.4758607900105085, "grad_norm": 0.07759857177734375, "learning_rate": 1.6763197749891825e-05, "loss": 0.0071, "step": 209520 }, { "epoch": 6.476788032391667, "grad_norm": 0.14416639506816864, "learning_rate": 1.6762734128701243e-05, "loss": 0.0068, "step": 209550 }, { "epoch": 6.477715274772826, "grad_norm": 0.07017771154642105, "learning_rate": 1.6762270507510665e-05, "loss": 0.0073, "step": 209580 }, { "epoch": 6.478642517153984, "grad_norm": 0.1063012033700943, "learning_rate": 1.6761806886320086e-05, "loss": 0.0066, "step": 209610 }, { "epoch": 6.479569759535142, "grad_norm": 0.13180650770664215, "learning_rate": 1.6761343265129508e-05, "loss": 0.0073, "step": 209640 }, { "epoch": 6.480497001916301, "grad_norm": 0.14475242793560028, "learning_rate": 1.676087964393893e-05, "loss": 0.0068, "step": 209670 }, { "epoch": 6.481424244297459, "grad_norm": 0.2347293645143509, "learning_rate": 1.6760416022748347e-05, "loss": 0.007, "step": 209700 }, { "epoch": 6.482351486678618, "grad_norm": 0.10556650161743164, "learning_rate": 1.675995240155777e-05, "loss": 0.0061, "step": 209730 }, { "epoch": 6.483278729059776, "grad_norm": 0.0914987102150917, "learning_rate": 1.675948878036719e-05, "loss": 0.0076, "step": 209760 }, { "epoch": 6.484205971440935, "grad_norm": 0.08274151384830475, "learning_rate": 1.675902515917661e-05, "loss": 0.007, "step": 209790 }, { "epoch": 6.485133213822093, "grad_norm": 0.12688226997852325, "learning_rate": 1.675856153798603e-05, "loss": 0.0057, "step": 209820 }, { "epoch": 6.486060456203251, "grad_norm": 0.14880326390266418, "learning_rate": 1.675809791679545e-05, "loss": 0.0079, "step": 209850 }, { "epoch": 6.48698769858441, "grad_norm": 0.11080677062273026, "learning_rate": 1.6757634295604873e-05, "loss": 0.0066, "step": 209880 }, { "epoch": 6.4879149409655685, "grad_norm": 0.14455530047416687, "learning_rate": 1.6757170674414294e-05, "loss": 0.0063, "step": 209910 }, { "epoch": 6.488842183346727, "grad_norm": 0.17421792447566986, "learning_rate": 1.6756707053223712e-05, "loss": 0.007, "step": 209940 }, { "epoch": 6.489769425727886, "grad_norm": 0.12867164611816406, "learning_rate": 1.6756243432033134e-05, "loss": 0.0067, "step": 209970 }, { "epoch": 6.490696668109043, "grad_norm": 0.12756755948066711, "learning_rate": 1.6755779810842556e-05, "loss": 0.0066, "step": 210000 }, { "epoch": 6.491623910490202, "grad_norm": 0.07815233618021011, "learning_rate": 1.6755316189651977e-05, "loss": 0.0071, "step": 210030 }, { "epoch": 6.492551152871361, "grad_norm": 0.15258322656154633, "learning_rate": 1.67548525684614e-05, "loss": 0.007, "step": 210060 }, { "epoch": 6.493478395252519, "grad_norm": 0.1429935097694397, "learning_rate": 1.675438894727082e-05, "loss": 0.007, "step": 210090 }, { "epoch": 6.494405637633678, "grad_norm": 0.0988350361585617, "learning_rate": 1.6753925326080238e-05, "loss": 0.0065, "step": 210120 }, { "epoch": 6.4953328800148356, "grad_norm": 0.11245137453079224, "learning_rate": 1.675346170488966e-05, "loss": 0.0072, "step": 210150 }, { "epoch": 6.496260122395994, "grad_norm": 0.096986323595047, "learning_rate": 1.675299808369908e-05, "loss": 0.0064, "step": 210180 }, { "epoch": 6.497187364777153, "grad_norm": 0.13741794228553772, "learning_rate": 1.67525344625085e-05, "loss": 0.0065, "step": 210210 }, { "epoch": 6.498114607158311, "grad_norm": 0.14383026957511902, "learning_rate": 1.675207084131792e-05, "loss": 0.0066, "step": 210240 }, { "epoch": 6.49904184953947, "grad_norm": 0.12767650187015533, "learning_rate": 1.6751607220127342e-05, "loss": 0.0069, "step": 210270 }, { "epoch": 6.499969091920628, "grad_norm": 0.18534623086452484, "learning_rate": 1.6751143598936764e-05, "loss": 0.0065, "step": 210300 }, { "epoch": 6.500896334301786, "grad_norm": 0.14495548605918884, "learning_rate": 1.6750679977746185e-05, "loss": 0.0063, "step": 210330 }, { "epoch": 6.501823576682945, "grad_norm": 0.1143215224146843, "learning_rate": 1.6750216356555603e-05, "loss": 0.0066, "step": 210360 }, { "epoch": 6.5027508190641035, "grad_norm": 0.1479305773973465, "learning_rate": 1.6749752735365025e-05, "loss": 0.0066, "step": 210390 }, { "epoch": 6.503678061445262, "grad_norm": 0.09874947369098663, "learning_rate": 1.6749289114174446e-05, "loss": 0.0065, "step": 210420 }, { "epoch": 6.50460530382642, "grad_norm": 0.13801956176757812, "learning_rate": 1.6748825492983868e-05, "loss": 0.0069, "step": 210450 }, { "epoch": 6.505532546207578, "grad_norm": 0.1037505492568016, "learning_rate": 1.674836187179329e-05, "loss": 0.0067, "step": 210480 }, { "epoch": 6.506459788588737, "grad_norm": 0.09119787067174911, "learning_rate": 1.674789825060271e-05, "loss": 0.0067, "step": 210510 }, { "epoch": 6.507387030969896, "grad_norm": 0.12911571562290192, "learning_rate": 1.674743462941213e-05, "loss": 0.0065, "step": 210540 }, { "epoch": 6.508314273351054, "grad_norm": 0.11027967929840088, "learning_rate": 1.674697100822155e-05, "loss": 0.0065, "step": 210570 }, { "epoch": 6.509241515732213, "grad_norm": 0.11494141817092896, "learning_rate": 1.674650738703097e-05, "loss": 0.0065, "step": 210600 }, { "epoch": 6.5101687581133705, "grad_norm": 0.12359157204627991, "learning_rate": 1.674604376584039e-05, "loss": 0.0076, "step": 210630 }, { "epoch": 6.511096000494529, "grad_norm": 0.22069895267486572, "learning_rate": 1.6745580144649815e-05, "loss": 0.0072, "step": 210660 }, { "epoch": 6.512023242875688, "grad_norm": 0.0959216058254242, "learning_rate": 1.6745116523459233e-05, "loss": 0.0073, "step": 210690 }, { "epoch": 6.512950485256846, "grad_norm": 0.11262114346027374, "learning_rate": 1.6744652902268655e-05, "loss": 0.0068, "step": 210720 }, { "epoch": 6.513877727638005, "grad_norm": 0.13211378455162048, "learning_rate": 1.6744189281078076e-05, "loss": 0.0064, "step": 210750 }, { "epoch": 6.5148049700191635, "grad_norm": 0.09600316733121872, "learning_rate": 1.6743725659887494e-05, "loss": 0.0071, "step": 210780 }, { "epoch": 6.515732212400321, "grad_norm": 0.08108343929052353, "learning_rate": 1.6743262038696916e-05, "loss": 0.0068, "step": 210810 }, { "epoch": 6.51665945478148, "grad_norm": 0.11953362077474594, "learning_rate": 1.6742798417506337e-05, "loss": 0.007, "step": 210840 }, { "epoch": 6.517586697162638, "grad_norm": 0.15395112335681915, "learning_rate": 1.674233479631576e-05, "loss": 0.0056, "step": 210870 }, { "epoch": 6.518513939543797, "grad_norm": 0.10172230750322342, "learning_rate": 1.674187117512518e-05, "loss": 0.0066, "step": 210900 }, { "epoch": 6.519441181924956, "grad_norm": 0.10503967106342316, "learning_rate": 1.67414075539346e-05, "loss": 0.0073, "step": 210930 }, { "epoch": 6.520368424306113, "grad_norm": 0.11079144477844238, "learning_rate": 1.674094393274402e-05, "loss": 0.0073, "step": 210960 }, { "epoch": 6.521295666687272, "grad_norm": 0.11805416643619537, "learning_rate": 1.674048031155344e-05, "loss": 0.0066, "step": 210990 }, { "epoch": 6.5222229090684305, "grad_norm": 0.13077902793884277, "learning_rate": 1.6740016690362863e-05, "loss": 0.0066, "step": 211020 }, { "epoch": 6.523150151449589, "grad_norm": 0.11322735995054245, "learning_rate": 1.6739553069172284e-05, "loss": 0.0072, "step": 211050 }, { "epoch": 6.524077393830748, "grad_norm": 0.09281488507986069, "learning_rate": 1.6739089447981706e-05, "loss": 0.0066, "step": 211080 }, { "epoch": 6.525004636211905, "grad_norm": 0.12620426714420319, "learning_rate": 1.6738625826791124e-05, "loss": 0.0069, "step": 211110 }, { "epoch": 6.525931878593064, "grad_norm": 0.14738662540912628, "learning_rate": 1.6738162205600546e-05, "loss": 0.0071, "step": 211140 }, { "epoch": 6.526859120974223, "grad_norm": 0.15343637764453888, "learning_rate": 1.6737698584409967e-05, "loss": 0.0076, "step": 211170 }, { "epoch": 6.527786363355381, "grad_norm": 0.09442656487226486, "learning_rate": 1.6737234963219385e-05, "loss": 0.0071, "step": 211200 }, { "epoch": 6.52871360573654, "grad_norm": 0.11883113533258438, "learning_rate": 1.6736771342028807e-05, "loss": 0.0068, "step": 211230 }, { "epoch": 6.5296408481176975, "grad_norm": 0.11022955179214478, "learning_rate": 1.6736307720838228e-05, "loss": 0.0069, "step": 211260 }, { "epoch": 6.530568090498856, "grad_norm": 0.1277323216199875, "learning_rate": 1.673584409964765e-05, "loss": 0.0066, "step": 211290 }, { "epoch": 6.531495332880015, "grad_norm": 0.12338738143444061, "learning_rate": 1.673538047845707e-05, "loss": 0.0074, "step": 211320 }, { "epoch": 6.532422575261173, "grad_norm": 0.13582031428813934, "learning_rate": 1.673491685726649e-05, "loss": 0.007, "step": 211350 }, { "epoch": 6.533349817642332, "grad_norm": 0.10037732124328613, "learning_rate": 1.673445323607591e-05, "loss": 0.0073, "step": 211380 }, { "epoch": 6.5342770600234905, "grad_norm": 0.10787557810544968, "learning_rate": 1.6733989614885332e-05, "loss": 0.0067, "step": 211410 }, { "epoch": 6.535204302404648, "grad_norm": 0.13397997617721558, "learning_rate": 1.6733525993694754e-05, "loss": 0.0073, "step": 211440 }, { "epoch": 6.536131544785807, "grad_norm": 0.12465820461511612, "learning_rate": 1.6733062372504175e-05, "loss": 0.0068, "step": 211470 }, { "epoch": 6.537058787166965, "grad_norm": 0.14983220398426056, "learning_rate": 1.6732598751313597e-05, "loss": 0.0066, "step": 211500 }, { "epoch": 6.537986029548124, "grad_norm": 0.07388046383857727, "learning_rate": 1.6732135130123015e-05, "loss": 0.0063, "step": 211530 }, { "epoch": 6.538913271929283, "grad_norm": 0.07954476028680801, "learning_rate": 1.6731671508932437e-05, "loss": 0.0068, "step": 211560 }, { "epoch": 6.539840514310441, "grad_norm": 0.061194028705358505, "learning_rate": 1.6731207887741855e-05, "loss": 0.0071, "step": 211590 }, { "epoch": 6.540767756691599, "grad_norm": 0.07072525471448898, "learning_rate": 1.6730744266551276e-05, "loss": 0.0067, "step": 211620 }, { "epoch": 6.5416949990727575, "grad_norm": 0.14478829503059387, "learning_rate": 1.6730280645360698e-05, "loss": 0.007, "step": 211650 }, { "epoch": 6.542622241453916, "grad_norm": 0.1591370701789856, "learning_rate": 1.672981702417012e-05, "loss": 0.0067, "step": 211680 }, { "epoch": 6.543549483835075, "grad_norm": 0.11868146806955338, "learning_rate": 1.672935340297954e-05, "loss": 0.0066, "step": 211710 }, { "epoch": 6.544476726216233, "grad_norm": 0.1721382439136505, "learning_rate": 1.6728889781788962e-05, "loss": 0.0069, "step": 211740 }, { "epoch": 6.545403968597391, "grad_norm": 0.08478853851556778, "learning_rate": 1.672842616059838e-05, "loss": 0.0065, "step": 211770 }, { "epoch": 6.54633121097855, "grad_norm": 0.1508803814649582, "learning_rate": 1.6727962539407802e-05, "loss": 0.0068, "step": 211800 }, { "epoch": 6.547258453359708, "grad_norm": 0.0981978178024292, "learning_rate": 1.6727498918217223e-05, "loss": 0.007, "step": 211830 }, { "epoch": 6.548185695740867, "grad_norm": 0.15240490436553955, "learning_rate": 1.6727035297026645e-05, "loss": 0.0072, "step": 211860 }, { "epoch": 6.549112938122025, "grad_norm": 0.1473669558763504, "learning_rate": 1.6726571675836066e-05, "loss": 0.0067, "step": 211890 }, { "epoch": 6.550040180503183, "grad_norm": 0.1517263948917389, "learning_rate": 1.6726108054645484e-05, "loss": 0.0065, "step": 211920 }, { "epoch": 6.550967422884342, "grad_norm": 0.07988671213388443, "learning_rate": 1.6725644433454906e-05, "loss": 0.0067, "step": 211950 }, { "epoch": 6.5518946652655, "grad_norm": 0.14461465179920197, "learning_rate": 1.6725180812264327e-05, "loss": 0.007, "step": 211980 }, { "epoch": 6.552821907646659, "grad_norm": 0.12219499796628952, "learning_rate": 1.6724717191073746e-05, "loss": 0.0069, "step": 212010 }, { "epoch": 6.5537491500278175, "grad_norm": 0.18038013577461243, "learning_rate": 1.6724253569883167e-05, "loss": 0.0063, "step": 212040 }, { "epoch": 6.554676392408975, "grad_norm": 0.11034735292196274, "learning_rate": 1.6723789948692592e-05, "loss": 0.0065, "step": 212070 }, { "epoch": 6.555603634790134, "grad_norm": 0.12343838065862656, "learning_rate": 1.672332632750201e-05, "loss": 0.0072, "step": 212100 }, { "epoch": 6.5565308771712925, "grad_norm": 0.12535178661346436, "learning_rate": 1.672286270631143e-05, "loss": 0.0064, "step": 212130 }, { "epoch": 6.557458119552451, "grad_norm": 0.10433994233608246, "learning_rate": 1.6722399085120853e-05, "loss": 0.0067, "step": 212160 }, { "epoch": 6.55838536193361, "grad_norm": 0.13382500410079956, "learning_rate": 1.672193546393027e-05, "loss": 0.0074, "step": 212190 }, { "epoch": 6.559312604314768, "grad_norm": 0.10343810170888901, "learning_rate": 1.6721471842739693e-05, "loss": 0.007, "step": 212220 }, { "epoch": 6.560239846695926, "grad_norm": 0.12561415135860443, "learning_rate": 1.6721008221549114e-05, "loss": 0.0064, "step": 212250 }, { "epoch": 6.561167089077085, "grad_norm": 0.18220151960849762, "learning_rate": 1.6720544600358536e-05, "loss": 0.0076, "step": 212280 }, { "epoch": 6.562094331458243, "grad_norm": 0.11403897404670715, "learning_rate": 1.6720080979167957e-05, "loss": 0.0068, "step": 212310 }, { "epoch": 6.563021573839402, "grad_norm": 0.14199285209178925, "learning_rate": 1.6719617357977375e-05, "loss": 0.0067, "step": 212340 }, { "epoch": 6.56394881622056, "grad_norm": 0.11890515685081482, "learning_rate": 1.6719153736786797e-05, "loss": 0.0081, "step": 212370 }, { "epoch": 6.564876058601719, "grad_norm": 0.12390828877687454, "learning_rate": 1.671869011559622e-05, "loss": 0.0073, "step": 212400 }, { "epoch": 6.565803300982877, "grad_norm": 0.1261163055896759, "learning_rate": 1.671822649440564e-05, "loss": 0.0072, "step": 212430 }, { "epoch": 6.566730543364035, "grad_norm": 0.125251904129982, "learning_rate": 1.671776287321506e-05, "loss": 0.0064, "step": 212460 }, { "epoch": 6.567657785745194, "grad_norm": 0.09423727542161942, "learning_rate": 1.6717299252024483e-05, "loss": 0.0065, "step": 212490 }, { "epoch": 6.5685850281263525, "grad_norm": 0.26539304852485657, "learning_rate": 1.67168356308339e-05, "loss": 0.0068, "step": 212520 }, { "epoch": 6.569512270507511, "grad_norm": 0.0964246466755867, "learning_rate": 1.6716372009643323e-05, "loss": 0.0066, "step": 212550 }, { "epoch": 6.570439512888669, "grad_norm": 0.11423394829034805, "learning_rate": 1.6715908388452744e-05, "loss": 0.0068, "step": 212580 }, { "epoch": 6.571366755269827, "grad_norm": 0.11829835921525955, "learning_rate": 1.6715444767262162e-05, "loss": 0.0066, "step": 212610 }, { "epoch": 6.572293997650986, "grad_norm": 0.07228963077068329, "learning_rate": 1.6714981146071584e-05, "loss": 0.007, "step": 212640 }, { "epoch": 6.573221240032145, "grad_norm": 0.0893048420548439, "learning_rate": 1.6714517524881005e-05, "loss": 0.0072, "step": 212670 }, { "epoch": 6.574148482413303, "grad_norm": 0.1176612451672554, "learning_rate": 1.6714053903690427e-05, "loss": 0.0073, "step": 212700 }, { "epoch": 6.575075724794461, "grad_norm": 0.098300039768219, "learning_rate": 1.6713590282499848e-05, "loss": 0.0067, "step": 212730 }, { "epoch": 6.5760029671756195, "grad_norm": 0.07982306182384491, "learning_rate": 1.6713126661309266e-05, "loss": 0.0064, "step": 212760 }, { "epoch": 6.576930209556778, "grad_norm": 0.1109389066696167, "learning_rate": 1.6712663040118688e-05, "loss": 0.0064, "step": 212790 }, { "epoch": 6.577857451937937, "grad_norm": 0.08708814531564713, "learning_rate": 1.6712214872967796e-05, "loss": 0.0067, "step": 212820 }, { "epoch": 6.578784694319095, "grad_norm": 0.12204902619123459, "learning_rate": 1.6711751251777214e-05, "loss": 0.0073, "step": 212850 }, { "epoch": 6.579711936700253, "grad_norm": 0.10945702344179153, "learning_rate": 1.6711287630586635e-05, "loss": 0.0067, "step": 212880 }, { "epoch": 6.580639179081412, "grad_norm": 0.06522005796432495, "learning_rate": 1.6710824009396057e-05, "loss": 0.0061, "step": 212910 }, { "epoch": 6.58156642146257, "grad_norm": 0.10440848767757416, "learning_rate": 1.671036038820548e-05, "loss": 0.0065, "step": 212940 }, { "epoch": 6.582493663843729, "grad_norm": 0.1072811409831047, "learning_rate": 1.67098967670149e-05, "loss": 0.0071, "step": 212970 }, { "epoch": 6.583420906224887, "grad_norm": 0.12167050689458847, "learning_rate": 1.670943314582432e-05, "loss": 0.0069, "step": 213000 }, { "epoch": 6.584348148606046, "grad_norm": 0.13071084022521973, "learning_rate": 1.670896952463374e-05, "loss": 0.0072, "step": 213030 }, { "epoch": 6.585275390987204, "grad_norm": 0.11747697740793228, "learning_rate": 1.670850590344316e-05, "loss": 0.0064, "step": 213060 }, { "epoch": 6.586202633368362, "grad_norm": 0.1119549497961998, "learning_rate": 1.6708042282252582e-05, "loss": 0.007, "step": 213090 }, { "epoch": 6.587129875749521, "grad_norm": 0.11314625293016434, "learning_rate": 1.6707578661062004e-05, "loss": 0.0065, "step": 213120 }, { "epoch": 6.5880571181306795, "grad_norm": 0.1162201315164566, "learning_rate": 1.6707115039871426e-05, "loss": 0.0063, "step": 213150 }, { "epoch": 6.588984360511838, "grad_norm": 0.11694468557834625, "learning_rate": 1.6706651418680844e-05, "loss": 0.0069, "step": 213180 }, { "epoch": 6.589911602892997, "grad_norm": 0.14463432133197784, "learning_rate": 1.6706187797490265e-05, "loss": 0.006, "step": 213210 }, { "epoch": 6.590838845274154, "grad_norm": 0.1015770360827446, "learning_rate": 1.6705724176299687e-05, "loss": 0.0061, "step": 213240 }, { "epoch": 6.591766087655313, "grad_norm": 0.151952862739563, "learning_rate": 1.6705260555109105e-05, "loss": 0.0069, "step": 213270 }, { "epoch": 6.592693330036472, "grad_norm": 0.1359025537967682, "learning_rate": 1.6704796933918526e-05, "loss": 0.0074, "step": 213300 }, { "epoch": 6.59362057241763, "grad_norm": 0.21905402839183807, "learning_rate": 1.6704333312727948e-05, "loss": 0.0068, "step": 213330 }, { "epoch": 6.594547814798789, "grad_norm": 0.12064213305711746, "learning_rate": 1.670386969153737e-05, "loss": 0.006, "step": 213360 }, { "epoch": 6.5954750571799465, "grad_norm": 0.10286117345094681, "learning_rate": 1.670340607034679e-05, "loss": 0.0068, "step": 213390 }, { "epoch": 6.596402299561105, "grad_norm": 0.10941694676876068, "learning_rate": 1.670294244915621e-05, "loss": 0.0069, "step": 213420 }, { "epoch": 6.597329541942264, "grad_norm": 0.09933742135763168, "learning_rate": 1.670247882796563e-05, "loss": 0.007, "step": 213450 }, { "epoch": 6.598256784323422, "grad_norm": 0.09415999054908752, "learning_rate": 1.6702015206775052e-05, "loss": 0.0065, "step": 213480 }, { "epoch": 6.599184026704581, "grad_norm": 0.13477115333080292, "learning_rate": 1.6701551585584473e-05, "loss": 0.0069, "step": 213510 }, { "epoch": 6.600111269085739, "grad_norm": 0.11570493876934052, "learning_rate": 1.6701087964393895e-05, "loss": 0.0068, "step": 213540 }, { "epoch": 6.601038511466897, "grad_norm": 0.11908814311027527, "learning_rate": 1.6700624343203316e-05, "loss": 0.0075, "step": 213570 }, { "epoch": 6.601965753848056, "grad_norm": 0.08689512312412262, "learning_rate": 1.6700160722012735e-05, "loss": 0.0066, "step": 213600 }, { "epoch": 6.602892996229214, "grad_norm": 0.13603198528289795, "learning_rate": 1.6699697100822156e-05, "loss": 0.0067, "step": 213630 }, { "epoch": 6.603820238610373, "grad_norm": 0.10723570734262466, "learning_rate": 1.6699233479631578e-05, "loss": 0.0069, "step": 213660 }, { "epoch": 6.604747480991531, "grad_norm": 0.10069446265697479, "learning_rate": 1.6698769858441e-05, "loss": 0.0067, "step": 213690 }, { "epoch": 6.605674723372689, "grad_norm": 0.09809190034866333, "learning_rate": 1.669830623725042e-05, "loss": 0.0072, "step": 213720 }, { "epoch": 6.606601965753848, "grad_norm": 0.12703709304332733, "learning_rate": 1.669784261605984e-05, "loss": 0.0072, "step": 213750 }, { "epoch": 6.6075292081350065, "grad_norm": 0.13065947592258453, "learning_rate": 1.669737899486926e-05, "loss": 0.0072, "step": 213780 }, { "epoch": 6.608456450516165, "grad_norm": 0.11223307996988297, "learning_rate": 1.669691537367868e-05, "loss": 0.006, "step": 213810 }, { "epoch": 6.609383692897324, "grad_norm": 0.08565757423639297, "learning_rate": 1.66964517524881e-05, "loss": 0.0064, "step": 213840 }, { "epoch": 6.6103109352784815, "grad_norm": 0.11509454995393753, "learning_rate": 1.669598813129752e-05, "loss": 0.0065, "step": 213870 }, { "epoch": 6.61123817765964, "grad_norm": 0.15427973866462708, "learning_rate": 1.6695524510106943e-05, "loss": 0.0072, "step": 213900 }, { "epoch": 6.612165420040799, "grad_norm": 0.0955590307712555, "learning_rate": 1.6695060888916364e-05, "loss": 0.0065, "step": 213930 }, { "epoch": 6.613092662421957, "grad_norm": 0.10290203243494034, "learning_rate": 1.6694597267725786e-05, "loss": 0.0067, "step": 213960 }, { "epoch": 6.614019904803116, "grad_norm": 0.09290821105241776, "learning_rate": 1.6694133646535207e-05, "loss": 0.0074, "step": 213990 }, { "epoch": 6.6149471471842745, "grad_norm": 0.10071594268083572, "learning_rate": 1.6693670025344625e-05, "loss": 0.0067, "step": 214020 }, { "epoch": 6.615874389565432, "grad_norm": 0.11080798506736755, "learning_rate": 1.6693206404154047e-05, "loss": 0.0073, "step": 214050 }, { "epoch": 6.616801631946591, "grad_norm": 0.11663137376308441, "learning_rate": 1.669274278296347e-05, "loss": 0.0064, "step": 214080 }, { "epoch": 6.617728874327749, "grad_norm": 0.10690411925315857, "learning_rate": 1.669227916177289e-05, "loss": 0.0064, "step": 214110 }, { "epoch": 6.618656116708908, "grad_norm": 0.1189749613404274, "learning_rate": 1.669181554058231e-05, "loss": 0.0071, "step": 214140 }, { "epoch": 6.619583359090067, "grad_norm": 0.1680896282196045, "learning_rate": 1.669135191939173e-05, "loss": 0.0063, "step": 214170 }, { "epoch": 6.620510601471224, "grad_norm": 0.11891434341669083, "learning_rate": 1.669088829820115e-05, "loss": 0.0066, "step": 214200 }, { "epoch": 6.621437843852383, "grad_norm": 0.11233536154031754, "learning_rate": 1.6690424677010573e-05, "loss": 0.0074, "step": 214230 }, { "epoch": 6.6223650862335415, "grad_norm": 0.10328234732151031, "learning_rate": 1.668996105581999e-05, "loss": 0.0065, "step": 214260 }, { "epoch": 6.6232923286147, "grad_norm": 0.09819765388965607, "learning_rate": 1.6689497434629412e-05, "loss": 0.0074, "step": 214290 }, { "epoch": 6.624219570995859, "grad_norm": 0.09227444976568222, "learning_rate": 1.6689033813438834e-05, "loss": 0.0072, "step": 214320 }, { "epoch": 6.625146813377016, "grad_norm": 0.22872529923915863, "learning_rate": 1.6688570192248255e-05, "loss": 0.0075, "step": 214350 }, { "epoch": 6.626074055758175, "grad_norm": 0.13870468735694885, "learning_rate": 1.6688106571057677e-05, "loss": 0.008, "step": 214380 }, { "epoch": 6.627001298139334, "grad_norm": 0.19211578369140625, "learning_rate": 1.6687642949867095e-05, "loss": 0.0068, "step": 214410 }, { "epoch": 6.627928540520492, "grad_norm": 0.07272512465715408, "learning_rate": 1.6687179328676516e-05, "loss": 0.0066, "step": 214440 }, { "epoch": 6.628855782901651, "grad_norm": 0.14014360308647156, "learning_rate": 1.6686715707485938e-05, "loss": 0.0066, "step": 214470 }, { "epoch": 6.6297830252828085, "grad_norm": 0.11184421926736832, "learning_rate": 1.668625208629536e-05, "loss": 0.0075, "step": 214500 }, { "epoch": 6.630710267663967, "grad_norm": 0.1258832812309265, "learning_rate": 1.668578846510478e-05, "loss": 0.0066, "step": 214530 }, { "epoch": 6.631637510045126, "grad_norm": 0.18857651948928833, "learning_rate": 1.6685324843914202e-05, "loss": 0.0068, "step": 214560 }, { "epoch": 6.632564752426284, "grad_norm": 0.13909785449504852, "learning_rate": 1.668486122272362e-05, "loss": 0.0066, "step": 214590 }, { "epoch": 6.633491994807443, "grad_norm": 0.0785147100687027, "learning_rate": 1.6684397601533042e-05, "loss": 0.0067, "step": 214620 }, { "epoch": 6.634419237188601, "grad_norm": 0.1330125331878662, "learning_rate": 1.6683933980342464e-05, "loss": 0.0066, "step": 214650 }, { "epoch": 6.635346479569759, "grad_norm": 0.08802423626184464, "learning_rate": 1.668347035915188e-05, "loss": 0.0066, "step": 214680 }, { "epoch": 6.636273721950918, "grad_norm": 0.13995444774627686, "learning_rate": 1.6683006737961303e-05, "loss": 0.0067, "step": 214710 }, { "epoch": 6.637200964332076, "grad_norm": 0.08052020519971848, "learning_rate": 1.6682543116770725e-05, "loss": 0.0064, "step": 214740 }, { "epoch": 6.638128206713235, "grad_norm": 0.17203238606452942, "learning_rate": 1.6682079495580146e-05, "loss": 0.007, "step": 214770 }, { "epoch": 6.639055449094394, "grad_norm": 0.11884748190641403, "learning_rate": 1.6681615874389568e-05, "loss": 0.0069, "step": 214800 }, { "epoch": 6.639982691475552, "grad_norm": 0.12912485003471375, "learning_rate": 1.6681152253198986e-05, "loss": 0.0069, "step": 214830 }, { "epoch": 6.64090993385671, "grad_norm": 0.22816912829875946, "learning_rate": 1.6680688632008407e-05, "loss": 0.0072, "step": 214860 }, { "epoch": 6.6418371762378685, "grad_norm": 0.055501099675893784, "learning_rate": 1.668022501081783e-05, "loss": 0.0058, "step": 214890 }, { "epoch": 6.642764418619027, "grad_norm": 0.19608867168426514, "learning_rate": 1.667976138962725e-05, "loss": 0.0071, "step": 214920 }, { "epoch": 6.643691661000186, "grad_norm": 0.08869903534650803, "learning_rate": 1.6679297768436672e-05, "loss": 0.0074, "step": 214950 }, { "epoch": 6.644618903381344, "grad_norm": 0.15530724823474884, "learning_rate": 1.6678834147246093e-05, "loss": 0.0072, "step": 214980 }, { "epoch": 6.645546145762502, "grad_norm": 0.07692775130271912, "learning_rate": 1.667837052605551e-05, "loss": 0.0062, "step": 215010 }, { "epoch": 6.646473388143661, "grad_norm": 0.1302281767129898, "learning_rate": 1.6677906904864933e-05, "loss": 0.0064, "step": 215040 }, { "epoch": 6.647400630524819, "grad_norm": 0.1193646639585495, "learning_rate": 1.6677443283674354e-05, "loss": 0.0065, "step": 215070 }, { "epoch": 6.648327872905978, "grad_norm": 0.15277598798274994, "learning_rate": 1.6676979662483776e-05, "loss": 0.0066, "step": 215100 }, { "epoch": 6.649255115287136, "grad_norm": 0.18911531567573547, "learning_rate": 1.6676516041293197e-05, "loss": 0.0065, "step": 215130 }, { "epoch": 6.650182357668294, "grad_norm": 0.13616225123405457, "learning_rate": 1.6676052420102616e-05, "loss": 0.0068, "step": 215160 }, { "epoch": 6.651109600049453, "grad_norm": 0.1069486066699028, "learning_rate": 1.6675588798912037e-05, "loss": 0.0073, "step": 215190 }, { "epoch": 6.652036842430611, "grad_norm": 0.10408101975917816, "learning_rate": 1.667512517772146e-05, "loss": 0.0067, "step": 215220 }, { "epoch": 6.65296408481177, "grad_norm": 0.17702148854732513, "learning_rate": 1.6674661556530877e-05, "loss": 0.0062, "step": 215250 }, { "epoch": 6.6538913271929285, "grad_norm": 0.1289423257112503, "learning_rate": 1.6674197935340298e-05, "loss": 0.0071, "step": 215280 }, { "epoch": 6.654818569574086, "grad_norm": 0.13442951440811157, "learning_rate": 1.667373431414972e-05, "loss": 0.0067, "step": 215310 }, { "epoch": 6.655745811955245, "grad_norm": 0.10340479016304016, "learning_rate": 1.667327069295914e-05, "loss": 0.0068, "step": 215340 }, { "epoch": 6.656673054336403, "grad_norm": 0.10739283263683319, "learning_rate": 1.6672807071768563e-05, "loss": 0.0076, "step": 215370 }, { "epoch": 6.657600296717562, "grad_norm": 0.07823199778795242, "learning_rate": 1.6672343450577984e-05, "loss": 0.0073, "step": 215400 }, { "epoch": 6.658527539098721, "grad_norm": 0.08930569887161255, "learning_rate": 1.6671879829387402e-05, "loss": 0.0063, "step": 215430 }, { "epoch": 6.659454781479878, "grad_norm": 0.09255938977003098, "learning_rate": 1.6671416208196824e-05, "loss": 0.0069, "step": 215460 }, { "epoch": 6.660382023861037, "grad_norm": 0.15653513371944427, "learning_rate": 1.6670952587006245e-05, "loss": 0.0072, "step": 215490 }, { "epoch": 6.6613092662421955, "grad_norm": 0.05621834844350815, "learning_rate": 1.6670488965815667e-05, "loss": 0.0065, "step": 215520 }, { "epoch": 6.662236508623354, "grad_norm": 0.1176978051662445, "learning_rate": 1.667002534462509e-05, "loss": 0.0063, "step": 215550 }, { "epoch": 6.663163751004513, "grad_norm": 0.12854528427124023, "learning_rate": 1.6669561723434507e-05, "loss": 0.0061, "step": 215580 }, { "epoch": 6.664090993385671, "grad_norm": 0.1437530219554901, "learning_rate": 1.6669098102243928e-05, "loss": 0.0062, "step": 215610 }, { "epoch": 6.66501823576683, "grad_norm": 0.0943167582154274, "learning_rate": 1.666863448105335e-05, "loss": 0.0069, "step": 215640 }, { "epoch": 6.665945478147988, "grad_norm": 0.11197052896022797, "learning_rate": 1.6668170859862768e-05, "loss": 0.0064, "step": 215670 }, { "epoch": 6.666872720529146, "grad_norm": 0.14849671721458435, "learning_rate": 1.666770723867219e-05, "loss": 0.0068, "step": 215700 }, { "epoch": 6.667799962910305, "grad_norm": 0.09695716202259064, "learning_rate": 1.666724361748161e-05, "loss": 0.007, "step": 215730 }, { "epoch": 6.6687272052914635, "grad_norm": 0.09775086492300034, "learning_rate": 1.6666779996291032e-05, "loss": 0.0069, "step": 215760 }, { "epoch": 6.669654447672622, "grad_norm": 0.12531425058841705, "learning_rate": 1.6666316375100454e-05, "loss": 0.0064, "step": 215790 }, { "epoch": 6.67058169005378, "grad_norm": 0.12236679345369339, "learning_rate": 1.6665852753909872e-05, "loss": 0.0065, "step": 215820 }, { "epoch": 6.671508932434938, "grad_norm": 0.13314874470233917, "learning_rate": 1.6665389132719293e-05, "loss": 0.0065, "step": 215850 }, { "epoch": 6.672436174816097, "grad_norm": 0.11818353831768036, "learning_rate": 1.6664925511528715e-05, "loss": 0.0074, "step": 215880 }, { "epoch": 6.673363417197256, "grad_norm": 0.13079017400741577, "learning_rate": 1.6664461890338136e-05, "loss": 0.0061, "step": 215910 }, { "epoch": 6.674290659578414, "grad_norm": 0.10318050533533096, "learning_rate": 1.6663998269147558e-05, "loss": 0.0073, "step": 215940 }, { "epoch": 6.675217901959572, "grad_norm": 0.13784539699554443, "learning_rate": 1.666353464795698e-05, "loss": 0.0069, "step": 215970 }, { "epoch": 6.6761451443407305, "grad_norm": 0.10801097750663757, "learning_rate": 1.6663071026766397e-05, "loss": 0.0063, "step": 216000 }, { "epoch": 6.677072386721889, "grad_norm": 0.15752020478248596, "learning_rate": 1.666260740557582e-05, "loss": 0.0066, "step": 216030 }, { "epoch": 6.677999629103048, "grad_norm": 0.1013152152299881, "learning_rate": 1.666214378438524e-05, "loss": 0.0064, "step": 216060 }, { "epoch": 6.678926871484206, "grad_norm": 0.10771851986646652, "learning_rate": 1.666168016319466e-05, "loss": 0.0071, "step": 216090 }, { "epoch": 6.679854113865364, "grad_norm": 0.12147283554077148, "learning_rate": 1.666121654200408e-05, "loss": 0.0065, "step": 216120 }, { "epoch": 6.680781356246523, "grad_norm": 0.10403239727020264, "learning_rate": 1.66607529208135e-05, "loss": 0.0071, "step": 216150 }, { "epoch": 6.681708598627681, "grad_norm": 0.07967869937419891, "learning_rate": 1.6660289299622923e-05, "loss": 0.0071, "step": 216180 }, { "epoch": 6.68263584100884, "grad_norm": 0.1382605880498886, "learning_rate": 1.6659825678432345e-05, "loss": 0.0069, "step": 216210 }, { "epoch": 6.683563083389998, "grad_norm": 0.06624667346477509, "learning_rate": 1.6659362057241763e-05, "loss": 0.0061, "step": 216240 }, { "epoch": 6.684490325771156, "grad_norm": 0.09028830379247665, "learning_rate": 1.6658898436051184e-05, "loss": 0.0059, "step": 216270 }, { "epoch": 6.685417568152315, "grad_norm": 0.13171179592609406, "learning_rate": 1.6658434814860606e-05, "loss": 0.007, "step": 216300 }, { "epoch": 6.686344810533473, "grad_norm": 0.09085663408041, "learning_rate": 1.6657971193670027e-05, "loss": 0.0065, "step": 216330 }, { "epoch": 6.687272052914632, "grad_norm": 0.10952872782945633, "learning_rate": 1.665750757247945e-05, "loss": 0.0073, "step": 216360 }, { "epoch": 6.6881992952957905, "grad_norm": 0.16118642687797546, "learning_rate": 1.665704395128887e-05, "loss": 0.0077, "step": 216390 }, { "epoch": 6.689126537676949, "grad_norm": 0.11099553108215332, "learning_rate": 1.665658033009829e-05, "loss": 0.0065, "step": 216420 }, { "epoch": 6.690053780058107, "grad_norm": 0.12865294516086578, "learning_rate": 1.665611670890771e-05, "loss": 0.0076, "step": 216450 }, { "epoch": 6.690981022439265, "grad_norm": 0.10036913305521011, "learning_rate": 1.6655653087717128e-05, "loss": 0.0077, "step": 216480 }, { "epoch": 6.691908264820424, "grad_norm": 0.08057805150747299, "learning_rate": 1.6655189466526553e-05, "loss": 0.0067, "step": 216510 }, { "epoch": 6.692835507201583, "grad_norm": 0.08672299981117249, "learning_rate": 1.6654725845335974e-05, "loss": 0.007, "step": 216540 }, { "epoch": 6.693762749582741, "grad_norm": 0.07525616884231567, "learning_rate": 1.6654262224145392e-05, "loss": 0.0065, "step": 216570 }, { "epoch": 6.6946899919639, "grad_norm": 0.10579311847686768, "learning_rate": 1.6653798602954814e-05, "loss": 0.0072, "step": 216600 }, { "epoch": 6.6956172343450575, "grad_norm": 0.1472739279270172, "learning_rate": 1.6653334981764235e-05, "loss": 0.0066, "step": 216630 }, { "epoch": 6.696544476726216, "grad_norm": 0.10608994215726852, "learning_rate": 1.6652871360573654e-05, "loss": 0.0064, "step": 216660 }, { "epoch": 6.697471719107375, "grad_norm": 0.16811873018741608, "learning_rate": 1.6652407739383075e-05, "loss": 0.0064, "step": 216690 }, { "epoch": 6.698398961488533, "grad_norm": 0.10093029588460922, "learning_rate": 1.6651944118192497e-05, "loss": 0.0072, "step": 216720 }, { "epoch": 6.699326203869692, "grad_norm": 0.06984079629182816, "learning_rate": 1.6651480497001918e-05, "loss": 0.0069, "step": 216750 }, { "epoch": 6.70025344625085, "grad_norm": 0.12149040400981903, "learning_rate": 1.665101687581134e-05, "loss": 0.0069, "step": 216780 }, { "epoch": 6.701180688632008, "grad_norm": 0.10226695984601974, "learning_rate": 1.6650553254620758e-05, "loss": 0.0072, "step": 216810 }, { "epoch": 6.702107931013167, "grad_norm": 0.08088677376508713, "learning_rate": 1.665008963343018e-05, "loss": 0.0064, "step": 216840 }, { "epoch": 6.703035173394325, "grad_norm": 0.10490935295820236, "learning_rate": 1.66496260122396e-05, "loss": 0.0068, "step": 216870 }, { "epoch": 6.703962415775484, "grad_norm": 0.11471214890480042, "learning_rate": 1.6649162391049022e-05, "loss": 0.0062, "step": 216900 }, { "epoch": 6.704889658156642, "grad_norm": 0.13510559499263763, "learning_rate": 1.6648698769858444e-05, "loss": 0.0069, "step": 216930 }, { "epoch": 6.7058169005378, "grad_norm": 0.18289197981357574, "learning_rate": 1.6648250602707548e-05, "loss": 0.0068, "step": 216960 }, { "epoch": 6.706744142918959, "grad_norm": 0.10226546972990036, "learning_rate": 1.664778698151697e-05, "loss": 0.0065, "step": 216990 }, { "epoch": 6.7076713853001175, "grad_norm": 0.09001630544662476, "learning_rate": 1.664732336032639e-05, "loss": 0.0066, "step": 217020 }, { "epoch": 6.708598627681276, "grad_norm": 0.09517693519592285, "learning_rate": 1.6646859739135813e-05, "loss": 0.006, "step": 217050 }, { "epoch": 6.709525870062434, "grad_norm": 0.12962183356285095, "learning_rate": 1.664639611794523e-05, "loss": 0.0071, "step": 217080 }, { "epoch": 6.710453112443592, "grad_norm": 0.08310204744338989, "learning_rate": 1.6645932496754652e-05, "loss": 0.0068, "step": 217110 }, { "epoch": 6.711380354824751, "grad_norm": 0.18376970291137695, "learning_rate": 1.6645468875564074e-05, "loss": 0.007, "step": 217140 }, { "epoch": 6.71230759720591, "grad_norm": 0.10898535698652267, "learning_rate": 1.6645005254373495e-05, "loss": 0.0064, "step": 217170 }, { "epoch": 6.713234839587068, "grad_norm": 0.1278771311044693, "learning_rate": 1.6644541633182917e-05, "loss": 0.0064, "step": 217200 }, { "epoch": 6.714162081968227, "grad_norm": 0.12323667854070663, "learning_rate": 1.6644078011992335e-05, "loss": 0.0071, "step": 217230 }, { "epoch": 6.7150893243493845, "grad_norm": 0.2257101684808731, "learning_rate": 1.6643614390801757e-05, "loss": 0.0069, "step": 217260 }, { "epoch": 6.716016566730543, "grad_norm": 0.11324617266654968, "learning_rate": 1.6643150769611178e-05, "loss": 0.0072, "step": 217290 }, { "epoch": 6.716943809111702, "grad_norm": 0.0775521844625473, "learning_rate": 1.6642687148420596e-05, "loss": 0.0064, "step": 217320 }, { "epoch": 6.71787105149286, "grad_norm": 0.10056030750274658, "learning_rate": 1.6642223527230018e-05, "loss": 0.0076, "step": 217350 }, { "epoch": 6.718798293874019, "grad_norm": 0.10164546221494675, "learning_rate": 1.664175990603944e-05, "loss": 0.0069, "step": 217380 }, { "epoch": 6.7197255362551775, "grad_norm": 0.07851433753967285, "learning_rate": 1.664129628484886e-05, "loss": 0.0064, "step": 217410 }, { "epoch": 6.720652778636335, "grad_norm": 0.11677014082670212, "learning_rate": 1.6640832663658282e-05, "loss": 0.0075, "step": 217440 }, { "epoch": 6.721580021017494, "grad_norm": 0.11925647407770157, "learning_rate": 1.6640369042467704e-05, "loss": 0.0071, "step": 217470 }, { "epoch": 6.7225072633986525, "grad_norm": 0.10777787119150162, "learning_rate": 1.6639905421277122e-05, "loss": 0.0069, "step": 217500 }, { "epoch": 6.723434505779811, "grad_norm": 0.14391179382801056, "learning_rate": 1.6639441800086543e-05, "loss": 0.0064, "step": 217530 }, { "epoch": 6.72436174816097, "grad_norm": 0.08095601201057434, "learning_rate": 1.6638978178895965e-05, "loss": 0.0063, "step": 217560 }, { "epoch": 6.725288990542127, "grad_norm": 0.10821591317653656, "learning_rate": 1.6638514557705386e-05, "loss": 0.0067, "step": 217590 }, { "epoch": 6.726216232923286, "grad_norm": 0.133844256401062, "learning_rate": 1.6638050936514808e-05, "loss": 0.0073, "step": 217620 }, { "epoch": 6.727143475304445, "grad_norm": 0.09286193549633026, "learning_rate": 1.6637587315324226e-05, "loss": 0.0063, "step": 217650 }, { "epoch": 6.728070717685603, "grad_norm": 0.1445331573486328, "learning_rate": 1.6637123694133648e-05, "loss": 0.0066, "step": 217680 }, { "epoch": 6.728997960066762, "grad_norm": 0.12260215729475021, "learning_rate": 1.663666007294307e-05, "loss": 0.0067, "step": 217710 }, { "epoch": 6.7299252024479195, "grad_norm": 0.07768591493368149, "learning_rate": 1.6636196451752487e-05, "loss": 0.0065, "step": 217740 }, { "epoch": 6.730852444829078, "grad_norm": 0.09977264702320099, "learning_rate": 1.6635732830561912e-05, "loss": 0.0073, "step": 217770 }, { "epoch": 6.731779687210237, "grad_norm": 0.11241482198238373, "learning_rate": 1.6635269209371334e-05, "loss": 0.0067, "step": 217800 }, { "epoch": 6.732706929591395, "grad_norm": 0.10291510820388794, "learning_rate": 1.663480558818075e-05, "loss": 0.0064, "step": 217830 }, { "epoch": 6.733634171972554, "grad_norm": 0.08756325393915176, "learning_rate": 1.6634341966990173e-05, "loss": 0.0074, "step": 217860 }, { "epoch": 6.734561414353712, "grad_norm": 0.10299757122993469, "learning_rate": 1.663387834579959e-05, "loss": 0.0064, "step": 217890 }, { "epoch": 6.73548865673487, "grad_norm": 0.13955476880073547, "learning_rate": 1.6633414724609013e-05, "loss": 0.0065, "step": 217920 }, { "epoch": 6.736415899116029, "grad_norm": 0.09840158373117447, "learning_rate": 1.6632951103418434e-05, "loss": 0.0068, "step": 217950 }, { "epoch": 6.737343141497187, "grad_norm": 0.10342539846897125, "learning_rate": 1.6632487482227856e-05, "loss": 0.0069, "step": 217980 }, { "epoch": 6.738270383878346, "grad_norm": 0.10910646617412567, "learning_rate": 1.6632023861037277e-05, "loss": 0.0069, "step": 218010 }, { "epoch": 6.739197626259505, "grad_norm": 0.11415237933397293, "learning_rate": 1.66315602398467e-05, "loss": 0.0068, "step": 218040 }, { "epoch": 6.740124868640662, "grad_norm": 0.10274941474199295, "learning_rate": 1.6631096618656117e-05, "loss": 0.0061, "step": 218070 }, { "epoch": 6.741052111021821, "grad_norm": 0.11738670617341995, "learning_rate": 1.663063299746554e-05, "loss": 0.0066, "step": 218100 }, { "epoch": 6.7419793534029795, "grad_norm": 0.1256629228591919, "learning_rate": 1.663016937627496e-05, "loss": 0.007, "step": 218130 }, { "epoch": 6.742906595784138, "grad_norm": 0.08471283316612244, "learning_rate": 1.662970575508438e-05, "loss": 0.007, "step": 218160 }, { "epoch": 6.743833838165297, "grad_norm": 0.11750545352697372, "learning_rate": 1.6629242133893803e-05, "loss": 0.0073, "step": 218190 }, { "epoch": 6.744761080546455, "grad_norm": 0.11365807056427002, "learning_rate": 1.6628778512703224e-05, "loss": 0.0067, "step": 218220 }, { "epoch": 6.745688322927613, "grad_norm": 0.12410526722669601, "learning_rate": 1.6628314891512643e-05, "loss": 0.0066, "step": 218250 }, { "epoch": 6.746615565308772, "grad_norm": 0.11182851344347, "learning_rate": 1.6627851270322064e-05, "loss": 0.0069, "step": 218280 }, { "epoch": 6.74754280768993, "grad_norm": 0.10931484401226044, "learning_rate": 1.6627387649131482e-05, "loss": 0.0068, "step": 218310 }, { "epoch": 6.748470050071089, "grad_norm": 0.05815606936812401, "learning_rate": 1.6626924027940904e-05, "loss": 0.0063, "step": 218340 }, { "epoch": 6.749397292452247, "grad_norm": 0.11353457719087601, "learning_rate": 1.6626460406750325e-05, "loss": 0.0067, "step": 218370 }, { "epoch": 6.750324534833405, "grad_norm": 0.1329115778207779, "learning_rate": 1.6625996785559747e-05, "loss": 0.007, "step": 218400 }, { "epoch": 6.751251777214564, "grad_norm": 0.09669420123100281, "learning_rate": 1.6625533164369168e-05, "loss": 0.0062, "step": 218430 }, { "epoch": 6.752179019595722, "grad_norm": 0.09874731302261353, "learning_rate": 1.662506954317859e-05, "loss": 0.0075, "step": 218460 }, { "epoch": 6.753106261976881, "grad_norm": 0.10181214660406113, "learning_rate": 1.6624605921988008e-05, "loss": 0.0072, "step": 218490 }, { "epoch": 6.7540335043580395, "grad_norm": 0.14584152400493622, "learning_rate": 1.662414230079743e-05, "loss": 0.0062, "step": 218520 }, { "epoch": 6.754960746739197, "grad_norm": 0.10931218415498734, "learning_rate": 1.662367867960685e-05, "loss": 0.0065, "step": 218550 }, { "epoch": 6.755887989120356, "grad_norm": 0.1096019446849823, "learning_rate": 1.6623215058416272e-05, "loss": 0.0071, "step": 218580 }, { "epoch": 6.756815231501514, "grad_norm": 0.12316737323999405, "learning_rate": 1.6622751437225694e-05, "loss": 0.0068, "step": 218610 }, { "epoch": 6.757742473882673, "grad_norm": 0.14319458603858948, "learning_rate": 1.6622287816035112e-05, "loss": 0.0069, "step": 218640 }, { "epoch": 6.758669716263832, "grad_norm": 0.2181040495634079, "learning_rate": 1.6621824194844533e-05, "loss": 0.007, "step": 218670 }, { "epoch": 6.759596958644989, "grad_norm": 0.09195408225059509, "learning_rate": 1.6621360573653955e-05, "loss": 0.0072, "step": 218700 }, { "epoch": 6.760524201026148, "grad_norm": 0.10741183161735535, "learning_rate": 1.6620896952463373e-05, "loss": 0.0069, "step": 218730 }, { "epoch": 6.7614514434073065, "grad_norm": 0.12087181955575943, "learning_rate": 1.6620433331272795e-05, "loss": 0.0066, "step": 218760 }, { "epoch": 6.762378685788465, "grad_norm": 0.07416723668575287, "learning_rate": 1.6619969710082216e-05, "loss": 0.0062, "step": 218790 }, { "epoch": 6.763305928169624, "grad_norm": 0.13167543709278107, "learning_rate": 1.6619506088891638e-05, "loss": 0.0064, "step": 218820 }, { "epoch": 6.764233170550782, "grad_norm": 0.11767512559890747, "learning_rate": 1.661904246770106e-05, "loss": 0.0071, "step": 218850 }, { "epoch": 6.76516041293194, "grad_norm": 0.11525760591030121, "learning_rate": 1.661857884651048e-05, "loss": 0.0072, "step": 218880 }, { "epoch": 6.766087655313099, "grad_norm": 0.06971430033445358, "learning_rate": 1.66181152253199e-05, "loss": 0.0072, "step": 218910 }, { "epoch": 6.767014897694257, "grad_norm": 0.10772276669740677, "learning_rate": 1.661765160412932e-05, "loss": 0.0064, "step": 218940 }, { "epoch": 6.767942140075416, "grad_norm": 0.15084673464298248, "learning_rate": 1.6617187982938742e-05, "loss": 0.0064, "step": 218970 }, { "epoch": 6.768869382456574, "grad_norm": 0.12453760951757431, "learning_rate": 1.6616724361748163e-05, "loss": 0.0062, "step": 219000 }, { "epoch": 6.769796624837733, "grad_norm": 0.15685343742370605, "learning_rate": 1.6616260740557585e-05, "loss": 0.0076, "step": 219030 }, { "epoch": 6.770723867218891, "grad_norm": 0.1676337867975235, "learning_rate": 1.6615797119367003e-05, "loss": 0.0061, "step": 219060 }, { "epoch": 6.771651109600049, "grad_norm": 0.11107809096574783, "learning_rate": 1.6615333498176424e-05, "loss": 0.0065, "step": 219090 }, { "epoch": 6.772578351981208, "grad_norm": 0.11462664604187012, "learning_rate": 1.6614869876985846e-05, "loss": 0.0071, "step": 219120 }, { "epoch": 6.7735055943623665, "grad_norm": 0.10255120694637299, "learning_rate": 1.6614406255795264e-05, "loss": 0.0069, "step": 219150 }, { "epoch": 6.774432836743525, "grad_norm": 0.07775206118822098, "learning_rate": 1.661394263460469e-05, "loss": 0.0068, "step": 219180 }, { "epoch": 6.775360079124683, "grad_norm": 0.17607305943965912, "learning_rate": 1.661347901341411e-05, "loss": 0.0062, "step": 219210 }, { "epoch": 6.7762873215058415, "grad_norm": 0.12656435370445251, "learning_rate": 1.661301539222353e-05, "loss": 0.0078, "step": 219240 }, { "epoch": 6.777214563887, "grad_norm": 0.10554333031177521, "learning_rate": 1.661255177103295e-05, "loss": 0.0065, "step": 219270 }, { "epoch": 6.778141806268159, "grad_norm": 0.08550220727920532, "learning_rate": 1.6612088149842368e-05, "loss": 0.007, "step": 219300 }, { "epoch": 6.779069048649317, "grad_norm": 0.15929608047008514, "learning_rate": 1.661162452865179e-05, "loss": 0.0063, "step": 219330 }, { "epoch": 6.779996291030475, "grad_norm": 0.09663520753383636, "learning_rate": 1.661116090746121e-05, "loss": 0.0069, "step": 219360 }, { "epoch": 6.780923533411634, "grad_norm": 0.09461046755313873, "learning_rate": 1.6610697286270633e-05, "loss": 0.0067, "step": 219390 }, { "epoch": 6.781850775792792, "grad_norm": 0.23141396045684814, "learning_rate": 1.6610233665080054e-05, "loss": 0.0073, "step": 219420 }, { "epoch": 6.782778018173951, "grad_norm": 0.10457944869995117, "learning_rate": 1.6609785497929162e-05, "loss": 0.0066, "step": 219450 }, { "epoch": 6.783705260555109, "grad_norm": 0.11088473349809647, "learning_rate": 1.660932187673858e-05, "loss": 0.0065, "step": 219480 }, { "epoch": 6.784632502936267, "grad_norm": 0.1413537710905075, "learning_rate": 1.6608858255548002e-05, "loss": 0.0072, "step": 219510 }, { "epoch": 6.785559745317426, "grad_norm": 0.11882088333368301, "learning_rate": 1.6608394634357423e-05, "loss": 0.0074, "step": 219540 }, { "epoch": 6.786486987698584, "grad_norm": 0.118675597012043, "learning_rate": 1.660793101316684e-05, "loss": 0.0067, "step": 219570 }, { "epoch": 6.787414230079743, "grad_norm": 0.1301857978105545, "learning_rate": 1.6607467391976263e-05, "loss": 0.0065, "step": 219600 }, { "epoch": 6.7883414724609015, "grad_norm": 0.12570033967494965, "learning_rate": 1.6607003770785684e-05, "loss": 0.0065, "step": 219630 }, { "epoch": 6.78926871484206, "grad_norm": 0.10636074841022491, "learning_rate": 1.6606540149595106e-05, "loss": 0.0069, "step": 219660 }, { "epoch": 6.790195957223218, "grad_norm": 0.10239957273006439, "learning_rate": 1.6606076528404527e-05, "loss": 0.0065, "step": 219690 }, { "epoch": 6.791123199604376, "grad_norm": 0.10871639847755432, "learning_rate": 1.6605612907213945e-05, "loss": 0.0068, "step": 219720 }, { "epoch": 6.792050441985535, "grad_norm": 0.14533604681491852, "learning_rate": 1.6605149286023367e-05, "loss": 0.0067, "step": 219750 }, { "epoch": 6.792977684366694, "grad_norm": 0.1055879071354866, "learning_rate": 1.660468566483279e-05, "loss": 0.0069, "step": 219780 }, { "epoch": 6.793904926747852, "grad_norm": 0.1265963464975357, "learning_rate": 1.660422204364221e-05, "loss": 0.0068, "step": 219810 }, { "epoch": 6.794832169129011, "grad_norm": 0.1294436752796173, "learning_rate": 1.660375842245163e-05, "loss": 0.0073, "step": 219840 }, { "epoch": 6.7957594115101685, "grad_norm": 0.10824637860059738, "learning_rate": 1.6603294801261053e-05, "loss": 0.0068, "step": 219870 }, { "epoch": 6.796686653891327, "grad_norm": 0.1066485121846199, "learning_rate": 1.660283118007047e-05, "loss": 0.0064, "step": 219900 }, { "epoch": 6.797613896272486, "grad_norm": 0.1269397735595703, "learning_rate": 1.6602367558879893e-05, "loss": 0.0076, "step": 219930 }, { "epoch": 6.798541138653644, "grad_norm": 0.1484232395887375, "learning_rate": 1.6601903937689314e-05, "loss": 0.0072, "step": 219960 }, { "epoch": 6.799468381034803, "grad_norm": 0.13303835690021515, "learning_rate": 1.6601440316498732e-05, "loss": 0.0065, "step": 219990 }, { "epoch": 6.800395623415961, "grad_norm": 0.11802944540977478, "learning_rate": 1.6600976695308154e-05, "loss": 0.0072, "step": 220020 }, { "epoch": 6.801322865797119, "grad_norm": 0.10013256222009659, "learning_rate": 1.6600513074117575e-05, "loss": 0.007, "step": 220050 }, { "epoch": 6.802250108178278, "grad_norm": 0.13330425322055817, "learning_rate": 1.6600049452926997e-05, "loss": 0.0065, "step": 220080 }, { "epoch": 6.803177350559436, "grad_norm": 0.12357717752456665, "learning_rate": 1.6599585831736418e-05, "loss": 0.0068, "step": 220110 }, { "epoch": 6.804104592940595, "grad_norm": 0.10767675191164017, "learning_rate": 1.6599122210545836e-05, "loss": 0.0063, "step": 220140 }, { "epoch": 6.805031835321753, "grad_norm": 0.13483595848083496, "learning_rate": 1.6598658589355258e-05, "loss": 0.0071, "step": 220170 }, { "epoch": 6.805959077702911, "grad_norm": 0.12818080186843872, "learning_rate": 1.659819496816468e-05, "loss": 0.0066, "step": 220200 }, { "epoch": 6.80688632008407, "grad_norm": 0.11035718768835068, "learning_rate": 1.65977313469741e-05, "loss": 0.0071, "step": 220230 }, { "epoch": 6.8078135624652285, "grad_norm": 0.08432400226593018, "learning_rate": 1.6597267725783522e-05, "loss": 0.0069, "step": 220260 }, { "epoch": 6.808740804846387, "grad_norm": 0.08281715959310532, "learning_rate": 1.6596804104592944e-05, "loss": 0.0063, "step": 220290 }, { "epoch": 6.809668047227545, "grad_norm": 0.10143683850765228, "learning_rate": 1.6596340483402362e-05, "loss": 0.0068, "step": 220320 }, { "epoch": 6.810595289608703, "grad_norm": 0.08614695072174072, "learning_rate": 1.6595876862211784e-05, "loss": 0.0066, "step": 220350 }, { "epoch": 6.811522531989862, "grad_norm": 0.1534758061170578, "learning_rate": 1.65954132410212e-05, "loss": 0.0075, "step": 220380 }, { "epoch": 6.812449774371021, "grad_norm": 0.13417194783687592, "learning_rate": 1.6594949619830623e-05, "loss": 0.0066, "step": 220410 }, { "epoch": 6.813377016752179, "grad_norm": 0.13667091727256775, "learning_rate": 1.6594485998640045e-05, "loss": 0.0068, "step": 220440 }, { "epoch": 6.814304259133338, "grad_norm": 0.15176960825920105, "learning_rate": 1.6594022377449466e-05, "loss": 0.0065, "step": 220470 }, { "epoch": 6.8152315015144955, "grad_norm": 0.14827348291873932, "learning_rate": 1.6593558756258888e-05, "loss": 0.007, "step": 220500 }, { "epoch": 6.816158743895654, "grad_norm": 0.14749875664710999, "learning_rate": 1.659309513506831e-05, "loss": 0.0066, "step": 220530 }, { "epoch": 6.817085986276813, "grad_norm": 0.11390510201454163, "learning_rate": 1.6592631513877727e-05, "loss": 0.0062, "step": 220560 }, { "epoch": 6.818013228657971, "grad_norm": 0.11673945188522339, "learning_rate": 1.659216789268715e-05, "loss": 0.0072, "step": 220590 }, { "epoch": 6.81894047103913, "grad_norm": 0.09445738792419434, "learning_rate": 1.659170427149657e-05, "loss": 0.0063, "step": 220620 }, { "epoch": 6.8198677134202885, "grad_norm": 0.15451733767986298, "learning_rate": 1.6591240650305992e-05, "loss": 0.0069, "step": 220650 }, { "epoch": 6.820794955801446, "grad_norm": 0.11643660068511963, "learning_rate": 1.6590777029115413e-05, "loss": 0.007, "step": 220680 }, { "epoch": 6.821722198182605, "grad_norm": 0.1794782429933548, "learning_rate": 1.659031340792483e-05, "loss": 0.0068, "step": 220710 }, { "epoch": 6.822649440563763, "grad_norm": 0.07758241891860962, "learning_rate": 1.6589849786734253e-05, "loss": 0.0068, "step": 220740 }, { "epoch": 6.823576682944922, "grad_norm": 0.13273347914218903, "learning_rate": 1.6589386165543674e-05, "loss": 0.0072, "step": 220770 }, { "epoch": 6.824503925326081, "grad_norm": 0.0978551059961319, "learning_rate": 1.6588922544353096e-05, "loss": 0.0065, "step": 220800 }, { "epoch": 6.825431167707238, "grad_norm": 0.09587259590625763, "learning_rate": 1.6588458923162518e-05, "loss": 0.0064, "step": 220830 }, { "epoch": 6.826358410088397, "grad_norm": 0.1180201917886734, "learning_rate": 1.658799530197194e-05, "loss": 0.0068, "step": 220860 }, { "epoch": 6.8272856524695555, "grad_norm": 0.1549198031425476, "learning_rate": 1.6587531680781357e-05, "loss": 0.0067, "step": 220890 }, { "epoch": 6.828212894850714, "grad_norm": 0.13445723056793213, "learning_rate": 1.658706805959078e-05, "loss": 0.007, "step": 220920 }, { "epoch": 6.829140137231873, "grad_norm": 0.15645457804203033, "learning_rate": 1.65866044384002e-05, "loss": 0.0069, "step": 220950 }, { "epoch": 6.8300673796130305, "grad_norm": 0.11564476042985916, "learning_rate": 1.6586140817209618e-05, "loss": 0.0063, "step": 220980 }, { "epoch": 6.830994621994189, "grad_norm": 0.16375824809074402, "learning_rate": 1.658567719601904e-05, "loss": 0.0063, "step": 221010 }, { "epoch": 6.831921864375348, "grad_norm": 0.09274241328239441, "learning_rate": 1.658521357482846e-05, "loss": 0.0073, "step": 221040 }, { "epoch": 6.832849106756506, "grad_norm": 0.1082690879702568, "learning_rate": 1.6584749953637883e-05, "loss": 0.0067, "step": 221070 }, { "epoch": 6.833776349137665, "grad_norm": 0.13126802444458008, "learning_rate": 1.6584286332447304e-05, "loss": 0.0066, "step": 221100 }, { "epoch": 6.834703591518823, "grad_norm": 0.11518643796443939, "learning_rate": 1.6583822711256722e-05, "loss": 0.0069, "step": 221130 }, { "epoch": 6.835630833899981, "grad_norm": 0.10320433229207993, "learning_rate": 1.6583359090066144e-05, "loss": 0.0066, "step": 221160 }, { "epoch": 6.83655807628114, "grad_norm": 0.09414331614971161, "learning_rate": 1.6582895468875565e-05, "loss": 0.0062, "step": 221190 }, { "epoch": 6.837485318662298, "grad_norm": 0.09747131168842316, "learning_rate": 1.6582431847684987e-05, "loss": 0.0061, "step": 221220 }, { "epoch": 6.838412561043457, "grad_norm": 0.1533145010471344, "learning_rate": 1.658196822649441e-05, "loss": 0.0072, "step": 221250 }, { "epoch": 6.839339803424616, "grad_norm": 0.09332787990570068, "learning_rate": 1.658150460530383e-05, "loss": 0.006, "step": 221280 }, { "epoch": 6.840267045805773, "grad_norm": 0.1595311164855957, "learning_rate": 1.6581040984113248e-05, "loss": 0.0061, "step": 221310 }, { "epoch": 6.841194288186932, "grad_norm": 0.07920733094215393, "learning_rate": 1.658057736292267e-05, "loss": 0.007, "step": 221340 }, { "epoch": 6.8421215305680905, "grad_norm": 0.08722444623708725, "learning_rate": 1.658011374173209e-05, "loss": 0.0063, "step": 221370 }, { "epoch": 6.843048772949249, "grad_norm": 0.10995221138000488, "learning_rate": 1.657965012054151e-05, "loss": 0.007, "step": 221400 }, { "epoch": 6.843976015330408, "grad_norm": 0.09590472280979156, "learning_rate": 1.657918649935093e-05, "loss": 0.007, "step": 221430 }, { "epoch": 6.844903257711566, "grad_norm": 0.07851625233888626, "learning_rate": 1.6578722878160352e-05, "loss": 0.0063, "step": 221460 }, { "epoch": 6.845830500092724, "grad_norm": 0.09141571819782257, "learning_rate": 1.6578259256969774e-05, "loss": 0.0065, "step": 221490 }, { "epoch": 6.846757742473883, "grad_norm": 0.09020315855741501, "learning_rate": 1.6577795635779195e-05, "loss": 0.0068, "step": 221520 }, { "epoch": 6.847684984855041, "grad_norm": 0.09574558585882187, "learning_rate": 1.6577332014588613e-05, "loss": 0.007, "step": 221550 }, { "epoch": 6.8486122272362, "grad_norm": 0.11276879161596298, "learning_rate": 1.6576868393398035e-05, "loss": 0.0067, "step": 221580 }, { "epoch": 6.849539469617358, "grad_norm": 0.10814716666936874, "learning_rate": 1.6576404772207456e-05, "loss": 0.0075, "step": 221610 }, { "epoch": 6.850466711998516, "grad_norm": 0.13825996220111847, "learning_rate": 1.6575941151016878e-05, "loss": 0.0068, "step": 221640 }, { "epoch": 6.851393954379675, "grad_norm": 0.17164577543735504, "learning_rate": 1.6575492983865982e-05, "loss": 0.0074, "step": 221670 }, { "epoch": 6.852321196760833, "grad_norm": 0.09505561739206314, "learning_rate": 1.6575029362675404e-05, "loss": 0.0066, "step": 221700 }, { "epoch": 6.853248439141992, "grad_norm": 0.15033654868602753, "learning_rate": 1.6574565741484825e-05, "loss": 0.0074, "step": 221730 }, { "epoch": 6.8541756815231505, "grad_norm": 0.16428610682487488, "learning_rate": 1.6574102120294247e-05, "loss": 0.0072, "step": 221760 }, { "epoch": 6.855102923904308, "grad_norm": 0.1253894567489624, "learning_rate": 1.657363849910367e-05, "loss": 0.0069, "step": 221790 }, { "epoch": 6.856030166285467, "grad_norm": 0.10050732642412186, "learning_rate": 1.6573174877913086e-05, "loss": 0.0069, "step": 221820 }, { "epoch": 6.856957408666625, "grad_norm": 0.09954019635915756, "learning_rate": 1.6572711256722508e-05, "loss": 0.0067, "step": 221850 }, { "epoch": 6.857884651047784, "grad_norm": 0.11604122072458267, "learning_rate": 1.657224763553193e-05, "loss": 0.0064, "step": 221880 }, { "epoch": 6.858811893428943, "grad_norm": 0.09067603945732117, "learning_rate": 1.657178401434135e-05, "loss": 0.007, "step": 221910 }, { "epoch": 6.8597391358101, "grad_norm": 0.14500457048416138, "learning_rate": 1.6571320393150773e-05, "loss": 0.0069, "step": 221940 }, { "epoch": 6.860666378191259, "grad_norm": 0.10223928838968277, "learning_rate": 1.657085677196019e-05, "loss": 0.0068, "step": 221970 }, { "epoch": 6.8615936205724175, "grad_norm": 0.07612396776676178, "learning_rate": 1.6570393150769612e-05, "loss": 0.0065, "step": 222000 }, { "epoch": 6.862520862953576, "grad_norm": 0.1032695472240448, "learning_rate": 1.6569929529579034e-05, "loss": 0.0072, "step": 222030 }, { "epoch": 6.863448105334735, "grad_norm": 0.09269289672374725, "learning_rate": 1.6569465908388455e-05, "loss": 0.0069, "step": 222060 }, { "epoch": 6.864375347715893, "grad_norm": 0.17171552777290344, "learning_rate": 1.6569002287197877e-05, "loss": 0.0067, "step": 222090 }, { "epoch": 6.865302590097051, "grad_norm": 0.16797468066215515, "learning_rate": 1.6568538666007298e-05, "loss": 0.0064, "step": 222120 }, { "epoch": 6.86622983247821, "grad_norm": 0.15487076342105865, "learning_rate": 1.6568075044816716e-05, "loss": 0.0072, "step": 222150 }, { "epoch": 6.867157074859368, "grad_norm": 0.13342903554439545, "learning_rate": 1.6567611423626138e-05, "loss": 0.0062, "step": 222180 }, { "epoch": 6.868084317240527, "grad_norm": 0.13247820734977722, "learning_rate": 1.6567147802435556e-05, "loss": 0.0068, "step": 222210 }, { "epoch": 6.869011559621685, "grad_norm": 0.08604052662849426, "learning_rate": 1.6566684181244977e-05, "loss": 0.007, "step": 222240 }, { "epoch": 6.869938802002844, "grad_norm": 0.10853429138660431, "learning_rate": 1.65662205600544e-05, "loss": 0.0068, "step": 222270 }, { "epoch": 6.870866044384002, "grad_norm": 0.09197378903627396, "learning_rate": 1.656575693886382e-05, "loss": 0.0071, "step": 222300 }, { "epoch": 6.87179328676516, "grad_norm": 0.14536532759666443, "learning_rate": 1.6565293317673242e-05, "loss": 0.0061, "step": 222330 }, { "epoch": 6.872720529146319, "grad_norm": 0.09236340969800949, "learning_rate": 1.6564829696482663e-05, "loss": 0.0069, "step": 222360 }, { "epoch": 6.8736477715274775, "grad_norm": 0.0959508866071701, "learning_rate": 1.656436607529208e-05, "loss": 0.0067, "step": 222390 }, { "epoch": 6.874575013908636, "grad_norm": 0.11778402328491211, "learning_rate": 1.6563902454101503e-05, "loss": 0.0072, "step": 222420 }, { "epoch": 6.875502256289794, "grad_norm": 0.11733091622591019, "learning_rate": 1.6563438832910925e-05, "loss": 0.0064, "step": 222450 }, { "epoch": 6.876429498670952, "grad_norm": 0.10763933509588242, "learning_rate": 1.6562975211720346e-05, "loss": 0.0062, "step": 222480 }, { "epoch": 6.877356741052111, "grad_norm": 0.11066634953022003, "learning_rate": 1.6562511590529768e-05, "loss": 0.0072, "step": 222510 }, { "epoch": 6.87828398343327, "grad_norm": 0.10404069721698761, "learning_rate": 1.6562047969339186e-05, "loss": 0.0065, "step": 222540 }, { "epoch": 6.879211225814428, "grad_norm": 0.09827882051467896, "learning_rate": 1.6561584348148607e-05, "loss": 0.007, "step": 222570 }, { "epoch": 6.880138468195586, "grad_norm": 0.09477496892213821, "learning_rate": 1.656112072695803e-05, "loss": 0.0071, "step": 222600 }, { "epoch": 6.8810657105767445, "grad_norm": 0.10497896373271942, "learning_rate": 1.6560657105767447e-05, "loss": 0.0067, "step": 222630 }, { "epoch": 6.881992952957903, "grad_norm": 0.17442497611045837, "learning_rate": 1.656019348457687e-05, "loss": 0.0065, "step": 222660 }, { "epoch": 6.882920195339062, "grad_norm": 0.11900433897972107, "learning_rate": 1.655972986338629e-05, "loss": 0.0066, "step": 222690 }, { "epoch": 6.88384743772022, "grad_norm": 0.11259345710277557, "learning_rate": 1.655926624219571e-05, "loss": 0.0061, "step": 222720 }, { "epoch": 6.884774680101378, "grad_norm": 0.12502457201480865, "learning_rate": 1.6558802621005133e-05, "loss": 0.0069, "step": 222750 }, { "epoch": 6.885701922482537, "grad_norm": 0.10995306819677353, "learning_rate": 1.6558338999814554e-05, "loss": 0.0064, "step": 222780 }, { "epoch": 6.886629164863695, "grad_norm": 0.10268859565258026, "learning_rate": 1.6557875378623972e-05, "loss": 0.0065, "step": 222810 }, { "epoch": 6.887556407244854, "grad_norm": 0.07535845786333084, "learning_rate": 1.6557411757433394e-05, "loss": 0.0067, "step": 222840 }, { "epoch": 6.8884836496260125, "grad_norm": 0.11877270042896271, "learning_rate": 1.6556948136242815e-05, "loss": 0.0065, "step": 222870 }, { "epoch": 6.889410892007171, "grad_norm": 0.20267505943775177, "learning_rate": 1.6556484515052237e-05, "loss": 0.0071, "step": 222900 }, { "epoch": 6.890338134388329, "grad_norm": 0.08540593087673187, "learning_rate": 1.655602089386166e-05, "loss": 0.006, "step": 222930 }, { "epoch": 6.891265376769487, "grad_norm": 0.08822552859783173, "learning_rate": 1.6555557272671077e-05, "loss": 0.0069, "step": 222960 }, { "epoch": 6.892192619150646, "grad_norm": 0.18208743631839752, "learning_rate": 1.6555093651480498e-05, "loss": 0.0065, "step": 222990 }, { "epoch": 6.893119861531805, "grad_norm": 0.18167629837989807, "learning_rate": 1.655463003028992e-05, "loss": 0.0069, "step": 223020 }, { "epoch": 6.894047103912963, "grad_norm": 0.10013867169618607, "learning_rate": 1.6554166409099338e-05, "loss": 0.007, "step": 223050 }, { "epoch": 6.894974346294122, "grad_norm": 0.08253391832113266, "learning_rate": 1.655370278790876e-05, "loss": 0.0066, "step": 223080 }, { "epoch": 6.8959015886752795, "grad_norm": 0.14434492588043213, "learning_rate": 1.655323916671818e-05, "loss": 0.0064, "step": 223110 }, { "epoch": 6.896828831056438, "grad_norm": 0.07102813571691513, "learning_rate": 1.6552775545527602e-05, "loss": 0.0073, "step": 223140 }, { "epoch": 6.897756073437597, "grad_norm": 0.1089794859290123, "learning_rate": 1.6552311924337024e-05, "loss": 0.0068, "step": 223170 }, { "epoch": 6.898683315818755, "grad_norm": 0.17873549461364746, "learning_rate": 1.6551848303146442e-05, "loss": 0.0062, "step": 223200 }, { "epoch": 6.899610558199914, "grad_norm": 0.10552658140659332, "learning_rate": 1.6551384681955863e-05, "loss": 0.0069, "step": 223230 }, { "epoch": 6.900537800581072, "grad_norm": 0.13269923627376556, "learning_rate": 1.6550921060765285e-05, "loss": 0.0071, "step": 223260 }, { "epoch": 6.90146504296223, "grad_norm": 0.09188712388277054, "learning_rate": 1.6550457439574706e-05, "loss": 0.007, "step": 223290 }, { "epoch": 6.902392285343389, "grad_norm": 0.09595640748739243, "learning_rate": 1.6549993818384128e-05, "loss": 0.0067, "step": 223320 }, { "epoch": 6.903319527724547, "grad_norm": 0.12186142057180405, "learning_rate": 1.654953019719355e-05, "loss": 0.0064, "step": 223350 }, { "epoch": 6.904246770105706, "grad_norm": 0.18066899478435516, "learning_rate": 1.6549066576002968e-05, "loss": 0.0069, "step": 223380 }, { "epoch": 6.905174012486864, "grad_norm": 0.10240895301103592, "learning_rate": 1.654860295481239e-05, "loss": 0.0065, "step": 223410 }, { "epoch": 6.906101254868022, "grad_norm": 0.094961978495121, "learning_rate": 1.654813933362181e-05, "loss": 0.0067, "step": 223440 }, { "epoch": 6.907028497249181, "grad_norm": 0.10715587437152863, "learning_rate": 1.6547675712431232e-05, "loss": 0.0066, "step": 223470 }, { "epoch": 6.9079557396303395, "grad_norm": 0.1248101219534874, "learning_rate": 1.6547212091240654e-05, "loss": 0.0069, "step": 223500 }, { "epoch": 6.908882982011498, "grad_norm": 0.14016608893871307, "learning_rate": 1.654674847005007e-05, "loss": 0.0069, "step": 223530 }, { "epoch": 6.909810224392656, "grad_norm": 0.10572398453950882, "learning_rate": 1.6546284848859493e-05, "loss": 0.0063, "step": 223560 }, { "epoch": 6.910737466773814, "grad_norm": 0.11579158902168274, "learning_rate": 1.6545821227668915e-05, "loss": 0.0071, "step": 223590 }, { "epoch": 6.911664709154973, "grad_norm": 0.15692800283432007, "learning_rate": 1.6545357606478333e-05, "loss": 0.0075, "step": 223620 }, { "epoch": 6.912591951536132, "grad_norm": 0.09737402945756912, "learning_rate": 1.6544893985287754e-05, "loss": 0.007, "step": 223650 }, { "epoch": 6.91351919391729, "grad_norm": 0.10688658058643341, "learning_rate": 1.6544430364097176e-05, "loss": 0.007, "step": 223680 }, { "epoch": 6.914446436298449, "grad_norm": 0.11162807047367096, "learning_rate": 1.6543966742906597e-05, "loss": 0.0074, "step": 223710 }, { "epoch": 6.9153736786796065, "grad_norm": 0.18869948387145996, "learning_rate": 1.654350312171602e-05, "loss": 0.0073, "step": 223740 }, { "epoch": 6.916300921060765, "grad_norm": 0.1456955373287201, "learning_rate": 1.654303950052544e-05, "loss": 0.0071, "step": 223770 }, { "epoch": 6.917228163441924, "grad_norm": 0.1582430750131607, "learning_rate": 1.654257587933486e-05, "loss": 0.0063, "step": 223800 }, { "epoch": 6.918155405823082, "grad_norm": 0.1291474848985672, "learning_rate": 1.654211225814428e-05, "loss": 0.0071, "step": 223830 }, { "epoch": 6.919082648204241, "grad_norm": 0.11478815972805023, "learning_rate": 1.65416486369537e-05, "loss": 0.0065, "step": 223860 }, { "epoch": 6.9200098905853995, "grad_norm": 0.1076747328042984, "learning_rate": 1.6541185015763123e-05, "loss": 0.0074, "step": 223890 }, { "epoch": 6.920937132966557, "grad_norm": 0.12778493762016296, "learning_rate": 1.6540721394572544e-05, "loss": 0.0066, "step": 223920 }, { "epoch": 6.921864375347716, "grad_norm": 0.13696810603141785, "learning_rate": 1.6540257773381963e-05, "loss": 0.007, "step": 223950 }, { "epoch": 6.922791617728874, "grad_norm": 0.0767354741692543, "learning_rate": 1.6539794152191384e-05, "loss": 0.0065, "step": 223980 }, { "epoch": 6.923718860110033, "grad_norm": Infinity, "learning_rate": 1.6539345985040492e-05, "loss": 0.0071, "step": 224010 }, { "epoch": 6.924646102491192, "grad_norm": 0.1129116415977478, "learning_rate": 1.653888236384991e-05, "loss": 0.0067, "step": 224040 }, { "epoch": 6.925573344872349, "grad_norm": 0.13388097286224365, "learning_rate": 1.653841874265933e-05, "loss": 0.0065, "step": 224070 }, { "epoch": 6.926500587253508, "grad_norm": 0.15006676316261292, "learning_rate": 1.6537955121468753e-05, "loss": 0.0072, "step": 224100 }, { "epoch": 6.9274278296346665, "grad_norm": 0.10898460447788239, "learning_rate": 1.6537491500278175e-05, "loss": 0.0072, "step": 224130 }, { "epoch": 6.928355072015825, "grad_norm": 0.11456931382417679, "learning_rate": 1.6537027879087596e-05, "loss": 0.0062, "step": 224160 }, { "epoch": 6.929282314396984, "grad_norm": 0.09979680180549622, "learning_rate": 1.6536564257897018e-05, "loss": 0.0065, "step": 224190 }, { "epoch": 6.930209556778141, "grad_norm": 0.16691194474697113, "learning_rate": 1.6536100636706436e-05, "loss": 0.0072, "step": 224220 }, { "epoch": 6.9311367991593, "grad_norm": 0.11305861175060272, "learning_rate": 1.6535637015515857e-05, "loss": 0.0062, "step": 224250 }, { "epoch": 6.932064041540459, "grad_norm": 0.11096669733524323, "learning_rate": 1.653517339432528e-05, "loss": 0.0072, "step": 224280 }, { "epoch": 6.932991283921617, "grad_norm": 0.12830466032028198, "learning_rate": 1.6534709773134697e-05, "loss": 0.0069, "step": 224310 }, { "epoch": 6.933918526302776, "grad_norm": 0.07167498767375946, "learning_rate": 1.653424615194412e-05, "loss": 0.0067, "step": 224340 }, { "epoch": 6.9348457686839335, "grad_norm": 0.10030365735292435, "learning_rate": 1.653378253075354e-05, "loss": 0.0069, "step": 224370 }, { "epoch": 6.935773011065092, "grad_norm": 0.12518079578876495, "learning_rate": 1.653331890956296e-05, "loss": 0.0066, "step": 224400 }, { "epoch": 6.936700253446251, "grad_norm": 0.08051613718271255, "learning_rate": 1.6532855288372383e-05, "loss": 0.0056, "step": 224430 }, { "epoch": 6.937627495827409, "grad_norm": 0.13715896010398865, "learning_rate": 1.65323916671818e-05, "loss": 0.0066, "step": 224460 }, { "epoch": 6.938554738208568, "grad_norm": 0.12308710813522339, "learning_rate": 1.6531928045991223e-05, "loss": 0.0068, "step": 224490 }, { "epoch": 6.9394819805897265, "grad_norm": 0.15222008526325226, "learning_rate": 1.6531464424800644e-05, "loss": 0.0067, "step": 224520 }, { "epoch": 6.940409222970884, "grad_norm": 0.10509131848812103, "learning_rate": 1.6531000803610066e-05, "loss": 0.0072, "step": 224550 }, { "epoch": 6.941336465352043, "grad_norm": 0.10765907168388367, "learning_rate": 1.6530537182419487e-05, "loss": 0.0066, "step": 224580 }, { "epoch": 6.9422637077332014, "grad_norm": 0.1051688939332962, "learning_rate": 1.653007356122891e-05, "loss": 0.0075, "step": 224610 }, { "epoch": 6.94319095011436, "grad_norm": 0.08622770756483078, "learning_rate": 1.6529609940038327e-05, "loss": 0.0067, "step": 224640 }, { "epoch": 6.944118192495519, "grad_norm": 0.13437849283218384, "learning_rate": 1.6529146318847748e-05, "loss": 0.0068, "step": 224670 }, { "epoch": 6.945045434876677, "grad_norm": 0.04700527712702751, "learning_rate": 1.6528682697657166e-05, "loss": 0.0063, "step": 224700 }, { "epoch": 6.945972677257835, "grad_norm": 0.14602763950824738, "learning_rate": 1.6528219076466588e-05, "loss": 0.0069, "step": 224730 }, { "epoch": 6.946899919638994, "grad_norm": 0.08851426094770432, "learning_rate": 1.6527755455276013e-05, "loss": 0.0065, "step": 224760 }, { "epoch": 6.947827162020152, "grad_norm": 0.10780211538076401, "learning_rate": 1.652729183408543e-05, "loss": 0.0067, "step": 224790 }, { "epoch": 6.948754404401311, "grad_norm": 0.08896194398403168, "learning_rate": 1.6526828212894852e-05, "loss": 0.0067, "step": 224820 }, { "epoch": 6.949681646782469, "grad_norm": 0.15775734186172485, "learning_rate": 1.6526364591704274e-05, "loss": 0.0065, "step": 224850 }, { "epoch": 6.950608889163627, "grad_norm": 0.2107236087322235, "learning_rate": 1.6525900970513692e-05, "loss": 0.0063, "step": 224880 }, { "epoch": 6.951536131544786, "grad_norm": 0.1447429358959198, "learning_rate": 1.6525437349323113e-05, "loss": 0.0072, "step": 224910 }, { "epoch": 6.952463373925944, "grad_norm": 0.09573939442634583, "learning_rate": 1.6524973728132535e-05, "loss": 0.007, "step": 224940 }, { "epoch": 6.953390616307103, "grad_norm": 0.08333004266023636, "learning_rate": 1.6524510106941956e-05, "loss": 0.0069, "step": 224970 }, { "epoch": 6.9543178586882615, "grad_norm": 0.11372128129005432, "learning_rate": 1.6524046485751378e-05, "loss": 0.0071, "step": 225000 }, { "epoch": 6.955245101069419, "grad_norm": 0.11096614599227905, "learning_rate": 1.6523582864560796e-05, "loss": 0.0064, "step": 225030 }, { "epoch": 6.956172343450578, "grad_norm": 0.13521189987659454, "learning_rate": 1.6523119243370218e-05, "loss": 0.0066, "step": 225060 }, { "epoch": 6.957099585831736, "grad_norm": 0.11898843199014664, "learning_rate": 1.652265562217964e-05, "loss": 0.0071, "step": 225090 }, { "epoch": 6.958026828212895, "grad_norm": 0.11018873751163483, "learning_rate": 1.652219200098906e-05, "loss": 0.0074, "step": 225120 }, { "epoch": 6.958954070594054, "grad_norm": 0.11138518899679184, "learning_rate": 1.6521728379798482e-05, "loss": 0.006, "step": 225150 }, { "epoch": 6.959881312975211, "grad_norm": 0.14601445198059082, "learning_rate": 1.6521264758607904e-05, "loss": 0.0061, "step": 225180 }, { "epoch": 6.96080855535637, "grad_norm": 0.14236025512218475, "learning_rate": 1.6520801137417322e-05, "loss": 0.0066, "step": 225210 }, { "epoch": 6.9617357977375285, "grad_norm": 0.1984749436378479, "learning_rate": 1.6520337516226743e-05, "loss": 0.0067, "step": 225240 }, { "epoch": 6.962663040118687, "grad_norm": 0.16174942255020142, "learning_rate": 1.6519873895036165e-05, "loss": 0.0063, "step": 225270 }, { "epoch": 6.963590282499846, "grad_norm": 0.15663497149944305, "learning_rate": 1.6519410273845583e-05, "loss": 0.0065, "step": 225300 }, { "epoch": 6.964517524881003, "grad_norm": 0.12187173217535019, "learning_rate": 1.6518946652655004e-05, "loss": 0.0073, "step": 225330 }, { "epoch": 6.965444767262162, "grad_norm": 0.07982661575078964, "learning_rate": 1.6518483031464426e-05, "loss": 0.0065, "step": 225360 }, { "epoch": 6.966372009643321, "grad_norm": 0.19239024817943573, "learning_rate": 1.6518019410273847e-05, "loss": 0.0065, "step": 225390 }, { "epoch": 6.967299252024479, "grad_norm": 0.1072753444314003, "learning_rate": 1.651755578908327e-05, "loss": 0.0065, "step": 225420 }, { "epoch": 6.968226494405638, "grad_norm": 0.09264370054006577, "learning_rate": 1.6517092167892687e-05, "loss": 0.0067, "step": 225450 }, { "epoch": 6.969153736786796, "grad_norm": 0.10562299937009811, "learning_rate": 1.651662854670211e-05, "loss": 0.0066, "step": 225480 }, { "epoch": 6.970080979167955, "grad_norm": 0.17367804050445557, "learning_rate": 1.651616492551153e-05, "loss": 0.007, "step": 225510 }, { "epoch": 6.971008221549113, "grad_norm": 0.078802689909935, "learning_rate": 1.651570130432095e-05, "loss": 0.0068, "step": 225540 }, { "epoch": 6.971935463930271, "grad_norm": 0.15803438425064087, "learning_rate": 1.6515237683130373e-05, "loss": 0.006, "step": 225570 }, { "epoch": 6.97286270631143, "grad_norm": 0.12848001718521118, "learning_rate": 1.6514774061939795e-05, "loss": 0.007, "step": 225600 }, { "epoch": 6.9737899486925885, "grad_norm": 0.14066049456596375, "learning_rate": 1.6514310440749213e-05, "loss": 0.0062, "step": 225630 }, { "epoch": 6.974717191073747, "grad_norm": 0.1029183566570282, "learning_rate": 1.6513846819558634e-05, "loss": 0.0061, "step": 225660 }, { "epoch": 6.975644433454905, "grad_norm": 0.13181579113006592, "learning_rate": 1.6513383198368052e-05, "loss": 0.007, "step": 225690 }, { "epoch": 6.976571675836063, "grad_norm": 0.1476864069700241, "learning_rate": 1.6512919577177474e-05, "loss": 0.0072, "step": 225720 }, { "epoch": 6.977498918217222, "grad_norm": 0.0816551148891449, "learning_rate": 1.6512455955986895e-05, "loss": 0.0066, "step": 225750 }, { "epoch": 6.978426160598381, "grad_norm": 0.1263979822397232, "learning_rate": 1.6511992334796317e-05, "loss": 0.0059, "step": 225780 }, { "epoch": 6.979353402979539, "grad_norm": 0.11554472893476486, "learning_rate": 1.651152871360574e-05, "loss": 0.0071, "step": 225810 }, { "epoch": 6.980280645360697, "grad_norm": 0.1356220543384552, "learning_rate": 1.651106509241516e-05, "loss": 0.0068, "step": 225840 }, { "epoch": 6.9812078877418555, "grad_norm": 0.1608932763338089, "learning_rate": 1.6510601471224578e-05, "loss": 0.0067, "step": 225870 }, { "epoch": 6.982135130123014, "grad_norm": 0.113584004342556, "learning_rate": 1.6510137850034e-05, "loss": 0.007, "step": 225900 }, { "epoch": 6.983062372504173, "grad_norm": 0.12317882478237152, "learning_rate": 1.650967422884342e-05, "loss": 0.0059, "step": 225930 }, { "epoch": 6.983989614885331, "grad_norm": 0.16616152226924896, "learning_rate": 1.6509210607652842e-05, "loss": 0.0069, "step": 225960 }, { "epoch": 6.984916857266489, "grad_norm": 0.11049474775791168, "learning_rate": 1.6508746986462264e-05, "loss": 0.0069, "step": 225990 }, { "epoch": 6.985844099647648, "grad_norm": 0.1352139264345169, "learning_rate": 1.6508283365271682e-05, "loss": 0.0066, "step": 226020 }, { "epoch": 6.986771342028806, "grad_norm": 0.16911569237709045, "learning_rate": 1.6507819744081104e-05, "loss": 0.0068, "step": 226050 }, { "epoch": 6.987698584409965, "grad_norm": 0.1294577568769455, "learning_rate": 1.6507356122890525e-05, "loss": 0.0065, "step": 226080 }, { "epoch": 6.988625826791123, "grad_norm": 0.11895552277565002, "learning_rate": 1.6506892501699943e-05, "loss": 0.0068, "step": 226110 }, { "epoch": 6.989553069172281, "grad_norm": 0.14972016215324402, "learning_rate": 1.6506428880509365e-05, "loss": 0.0074, "step": 226140 }, { "epoch": 6.99048031155344, "grad_norm": 0.1076376661658287, "learning_rate": 1.650596525931879e-05, "loss": 0.0068, "step": 226170 }, { "epoch": 6.991407553934598, "grad_norm": 0.12837767601013184, "learning_rate": 1.6505501638128208e-05, "loss": 0.0062, "step": 226200 }, { "epoch": 6.992334796315757, "grad_norm": 0.10125672817230225, "learning_rate": 1.650503801693763e-05, "loss": 0.007, "step": 226230 }, { "epoch": 6.9932620386969155, "grad_norm": 0.09835970401763916, "learning_rate": 1.650457439574705e-05, "loss": 0.0067, "step": 226260 }, { "epoch": 6.994189281078074, "grad_norm": 0.1358226090669632, "learning_rate": 1.650411077455647e-05, "loss": 0.007, "step": 226290 }, { "epoch": 6.995116523459233, "grad_norm": 0.06691098213195801, "learning_rate": 1.650364715336589e-05, "loss": 0.0069, "step": 226320 }, { "epoch": 6.9960437658403904, "grad_norm": 0.12672638893127441, "learning_rate": 1.6503183532175312e-05, "loss": 0.0068, "step": 226350 }, { "epoch": 6.996971008221549, "grad_norm": 0.15105944871902466, "learning_rate": 1.6502719910984733e-05, "loss": 0.0072, "step": 226380 }, { "epoch": 6.997898250602708, "grad_norm": 0.1417655646800995, "learning_rate": 1.6502256289794155e-05, "loss": 0.0071, "step": 226410 }, { "epoch": 6.998825492983866, "grad_norm": 0.10842515528202057, "learning_rate": 1.6501792668603573e-05, "loss": 0.0076, "step": 226440 }, { "epoch": 6.999752735365025, "grad_norm": 0.07345357537269592, "learning_rate": 1.6501329047412995e-05, "loss": 0.0061, "step": 226470 }, { "epoch": 7.000679977746183, "grad_norm": 0.11589142680168152, "learning_rate": 1.6500865426222416e-05, "loss": 0.0065, "step": 226500 }, { "epoch": 7.001607220127341, "grad_norm": 0.09056547284126282, "learning_rate": 1.6500401805031838e-05, "loss": 0.0064, "step": 226530 }, { "epoch": 7.0025344625085, "grad_norm": 0.13963183760643005, "learning_rate": 1.649993818384126e-05, "loss": 0.0065, "step": 226560 }, { "epoch": 7.003461704889658, "grad_norm": 0.10479274392127991, "learning_rate": 1.649947456265068e-05, "loss": 0.0069, "step": 226590 }, { "epoch": 7.004388947270817, "grad_norm": 0.10104598850011826, "learning_rate": 1.64990109414601e-05, "loss": 0.007, "step": 226620 }, { "epoch": 7.005316189651975, "grad_norm": 0.09112194925546646, "learning_rate": 1.649854732026952e-05, "loss": 0.0064, "step": 226650 }, { "epoch": 7.006243432033133, "grad_norm": 0.18462087213993073, "learning_rate": 1.6498083699078938e-05, "loss": 0.0064, "step": 226680 }, { "epoch": 7.007170674414292, "grad_norm": 0.1279703974723816, "learning_rate": 1.649762007788836e-05, "loss": 0.0067, "step": 226710 }, { "epoch": 7.0080979167954505, "grad_norm": 0.10282403230667114, "learning_rate": 1.649715645669778e-05, "loss": 0.0064, "step": 226740 }, { "epoch": 7.009025159176609, "grad_norm": 0.09790060669183731, "learning_rate": 1.6496692835507203e-05, "loss": 0.0069, "step": 226770 }, { "epoch": 7.009952401557767, "grad_norm": 0.09052672237157822, "learning_rate": 1.6496229214316624e-05, "loss": 0.0067, "step": 226800 }, { "epoch": 7.010879643938925, "grad_norm": 0.11335504800081253, "learning_rate": 1.6495765593126046e-05, "loss": 0.0064, "step": 226830 }, { "epoch": 7.011806886320084, "grad_norm": 0.11644397675991058, "learning_rate": 1.6495301971935464e-05, "loss": 0.0064, "step": 226860 }, { "epoch": 7.012734128701243, "grad_norm": 0.10984858870506287, "learning_rate": 1.6494838350744885e-05, "loss": 0.0066, "step": 226890 }, { "epoch": 7.013661371082401, "grad_norm": 0.11136128008365631, "learning_rate": 1.6494374729554307e-05, "loss": 0.0068, "step": 226920 }, { "epoch": 7.01458861346356, "grad_norm": 0.17463867366313934, "learning_rate": 1.649391110836373e-05, "loss": 0.0066, "step": 226950 }, { "epoch": 7.0155158558447175, "grad_norm": 0.1365506798028946, "learning_rate": 1.649344748717315e-05, "loss": 0.0064, "step": 226980 }, { "epoch": 7.016443098225876, "grad_norm": 0.15627282857894897, "learning_rate": 1.649298386598257e-05, "loss": 0.0069, "step": 227010 }, { "epoch": 7.017370340607035, "grad_norm": 0.0794108435511589, "learning_rate": 1.649252024479199e-05, "loss": 0.0068, "step": 227040 }, { "epoch": 7.018297582988193, "grad_norm": 0.059405308216810226, "learning_rate": 1.649205662360141e-05, "loss": 0.0066, "step": 227070 }, { "epoch": 7.019224825369352, "grad_norm": 0.12320305407047272, "learning_rate": 1.649159300241083e-05, "loss": 0.0065, "step": 227100 }, { "epoch": 7.02015206775051, "grad_norm": 0.12107912451028824, "learning_rate": 1.649112938122025e-05, "loss": 0.0071, "step": 227130 }, { "epoch": 7.021079310131668, "grad_norm": 0.13123780488967896, "learning_rate": 1.6490665760029672e-05, "loss": 0.0059, "step": 227160 }, { "epoch": 7.022006552512827, "grad_norm": 0.1084880605340004, "learning_rate": 1.6490202138839094e-05, "loss": 0.0063, "step": 227190 }, { "epoch": 7.022933794893985, "grad_norm": 0.10886916518211365, "learning_rate": 1.6489738517648515e-05, "loss": 0.0067, "step": 227220 }, { "epoch": 7.023861037275144, "grad_norm": 0.07022734731435776, "learning_rate": 1.6489274896457937e-05, "loss": 0.0073, "step": 227250 }, { "epoch": 7.024788279656303, "grad_norm": 0.10604600608348846, "learning_rate": 1.6488811275267355e-05, "loss": 0.0068, "step": 227280 }, { "epoch": 7.02571552203746, "grad_norm": 0.0985383465886116, "learning_rate": 1.6488347654076776e-05, "loss": 0.0062, "step": 227310 }, { "epoch": 7.026642764418619, "grad_norm": 0.1744658648967743, "learning_rate": 1.6487884032886198e-05, "loss": 0.0061, "step": 227340 }, { "epoch": 7.0275700067997775, "grad_norm": 0.08967715501785278, "learning_rate": 1.648742041169562e-05, "loss": 0.0062, "step": 227370 }, { "epoch": 7.028497249180936, "grad_norm": 0.12382000684738159, "learning_rate": 1.648695679050504e-05, "loss": 0.0061, "step": 227400 }, { "epoch": 7.029424491562095, "grad_norm": 0.2224029153585434, "learning_rate": 1.648649316931446e-05, "loss": 0.0073, "step": 227430 }, { "epoch": 7.030351733943252, "grad_norm": 0.13222894072532654, "learning_rate": 1.6486045002163567e-05, "loss": 0.0065, "step": 227460 }, { "epoch": 7.031278976324411, "grad_norm": 0.13992077112197876, "learning_rate": 1.648558138097299e-05, "loss": 0.0059, "step": 227490 }, { "epoch": 7.03220621870557, "grad_norm": 0.18771034479141235, "learning_rate": 1.6485117759782407e-05, "loss": 0.0063, "step": 227520 }, { "epoch": 7.033133461086728, "grad_norm": 0.09760615229606628, "learning_rate": 1.6484669592631514e-05, "loss": 0.0063, "step": 227550 }, { "epoch": 7.034060703467887, "grad_norm": 0.12477829307317734, "learning_rate": 1.6484205971440936e-05, "loss": 0.0066, "step": 227580 }, { "epoch": 7.0349879458490445, "grad_norm": 1.048256516456604, "learning_rate": 1.6483742350250354e-05, "loss": 0.0064, "step": 227610 }, { "epoch": 7.035915188230203, "grad_norm": 0.13464799523353577, "learning_rate": 1.648327872905978e-05, "loss": 0.0067, "step": 227640 }, { "epoch": 7.036842430611362, "grad_norm": 0.12090989947319031, "learning_rate": 1.64828151078692e-05, "loss": 0.007, "step": 227670 }, { "epoch": 7.03776967299252, "grad_norm": 0.1403605192899704, "learning_rate": 1.648235148667862e-05, "loss": 0.0066, "step": 227700 }, { "epoch": 7.038696915373679, "grad_norm": 0.1108538880944252, "learning_rate": 1.648188786548804e-05, "loss": 0.006, "step": 227730 }, { "epoch": 7.0396241577548375, "grad_norm": 0.10785575211048126, "learning_rate": 1.648142424429746e-05, "loss": 0.0069, "step": 227760 }, { "epoch": 7.040551400135995, "grad_norm": 0.10337018966674805, "learning_rate": 1.648096062310688e-05, "loss": 0.0068, "step": 227790 }, { "epoch": 7.041478642517154, "grad_norm": 0.17324231564998627, "learning_rate": 1.64804970019163e-05, "loss": 0.0067, "step": 227820 }, { "epoch": 7.042405884898312, "grad_norm": 0.10879155993461609, "learning_rate": 1.6480033380725723e-05, "loss": 0.0072, "step": 227850 }, { "epoch": 7.043333127279471, "grad_norm": 0.10406602919101715, "learning_rate": 1.6479569759535144e-05, "loss": 0.007, "step": 227880 }, { "epoch": 7.04426036966063, "grad_norm": 0.13545414805412292, "learning_rate": 1.6479106138344566e-05, "loss": 0.007, "step": 227910 }, { "epoch": 7.045187612041787, "grad_norm": 0.1466689556837082, "learning_rate": 1.6478642517153984e-05, "loss": 0.0063, "step": 227940 }, { "epoch": 7.046114854422946, "grad_norm": 0.14621417224407196, "learning_rate": 1.6478178895963405e-05, "loss": 0.0065, "step": 227970 }, { "epoch": 7.0470420968041045, "grad_norm": 0.1356775462627411, "learning_rate": 1.6477715274772827e-05, "loss": 0.0065, "step": 228000 }, { "epoch": 7.047969339185263, "grad_norm": 0.1135576069355011, "learning_rate": 1.647725165358225e-05, "loss": 0.0066, "step": 228030 }, { "epoch": 7.048896581566422, "grad_norm": 0.10054710507392883, "learning_rate": 1.647678803239167e-05, "loss": 0.0066, "step": 228060 }, { "epoch": 7.04982382394758, "grad_norm": 0.11670149117708206, "learning_rate": 1.647632441120109e-05, "loss": 0.0067, "step": 228090 }, { "epoch": 7.050751066328738, "grad_norm": 0.16730044782161713, "learning_rate": 1.647586079001051e-05, "loss": 0.0065, "step": 228120 }, { "epoch": 7.051678308709897, "grad_norm": 0.10984911024570465, "learning_rate": 1.647539716881993e-05, "loss": 0.0065, "step": 228150 }, { "epoch": 7.052605551091055, "grad_norm": 0.09643925726413727, "learning_rate": 1.6474933547629353e-05, "loss": 0.0064, "step": 228180 }, { "epoch": 7.053532793472214, "grad_norm": 0.08333822339773178, "learning_rate": 1.647446992643877e-05, "loss": 0.0066, "step": 228210 }, { "epoch": 7.0544600358533724, "grad_norm": 0.10927069187164307, "learning_rate": 1.6474006305248192e-05, "loss": 0.0068, "step": 228240 }, { "epoch": 7.05538727823453, "grad_norm": 0.14018140733242035, "learning_rate": 1.6473542684057614e-05, "loss": 0.0061, "step": 228270 }, { "epoch": 7.056314520615689, "grad_norm": 0.10477762669324875, "learning_rate": 1.6473079062867035e-05, "loss": 0.0066, "step": 228300 }, { "epoch": 7.057241762996847, "grad_norm": 0.10039416700601578, "learning_rate": 1.6472615441676457e-05, "loss": 0.0065, "step": 228330 }, { "epoch": 7.058169005378006, "grad_norm": 0.12269620597362518, "learning_rate": 1.6472151820485875e-05, "loss": 0.0062, "step": 228360 }, { "epoch": 7.0590962477591646, "grad_norm": 0.08701686561107635, "learning_rate": 1.6471688199295296e-05, "loss": 0.0059, "step": 228390 }, { "epoch": 7.060023490140322, "grad_norm": 0.1111074760556221, "learning_rate": 1.6471224578104718e-05, "loss": 0.0071, "step": 228420 }, { "epoch": 7.060950732521481, "grad_norm": 0.09982295334339142, "learning_rate": 1.647076095691414e-05, "loss": 0.0066, "step": 228450 }, { "epoch": 7.0618779749026395, "grad_norm": 0.15896816551685333, "learning_rate": 1.647029733572356e-05, "loss": 0.0074, "step": 228480 }, { "epoch": 7.062805217283798, "grad_norm": 0.09202875941991806, "learning_rate": 1.6469833714532982e-05, "loss": 0.007, "step": 228510 }, { "epoch": 7.063732459664957, "grad_norm": 0.16592414677143097, "learning_rate": 1.64693700933424e-05, "loss": 0.0067, "step": 228540 }, { "epoch": 7.064659702046115, "grad_norm": 0.10518649965524673, "learning_rate": 1.6468906472151822e-05, "loss": 0.0066, "step": 228570 }, { "epoch": 7.065586944427273, "grad_norm": 0.09124016761779785, "learning_rate": 1.646844285096124e-05, "loss": 0.0063, "step": 228600 }, { "epoch": 7.066514186808432, "grad_norm": 0.08624863624572754, "learning_rate": 1.646797922977066e-05, "loss": 0.0068, "step": 228630 }, { "epoch": 7.06744142918959, "grad_norm": 0.09992612153291702, "learning_rate": 1.6467515608580083e-05, "loss": 0.0064, "step": 228660 }, { "epoch": 7.068368671570749, "grad_norm": 0.12150786072015762, "learning_rate": 1.6467051987389505e-05, "loss": 0.006, "step": 228690 }, { "epoch": 7.069295913951907, "grad_norm": 0.1252291202545166, "learning_rate": 1.6466588366198926e-05, "loss": 0.0065, "step": 228720 }, { "epoch": 7.070223156333065, "grad_norm": 0.08572115749120712, "learning_rate": 1.6466124745008348e-05, "loss": 0.006, "step": 228750 }, { "epoch": 7.071150398714224, "grad_norm": 0.14780230820178986, "learning_rate": 1.6465661123817766e-05, "loss": 0.0067, "step": 228780 }, { "epoch": 7.072077641095382, "grad_norm": 0.1170472726225853, "learning_rate": 1.6465197502627187e-05, "loss": 0.006, "step": 228810 }, { "epoch": 7.073004883476541, "grad_norm": 0.09222814440727234, "learning_rate": 1.646473388143661e-05, "loss": 0.0068, "step": 228840 }, { "epoch": 7.0739321258576995, "grad_norm": 0.0972791388630867, "learning_rate": 1.646427026024603e-05, "loss": 0.0067, "step": 228870 }, { "epoch": 7.074859368238858, "grad_norm": 0.12977851927280426, "learning_rate": 1.6463806639055452e-05, "loss": 0.0064, "step": 228900 }, { "epoch": 7.075786610620016, "grad_norm": 0.12290555983781815, "learning_rate": 1.6463343017864873e-05, "loss": 0.0066, "step": 228930 }, { "epoch": 7.076713853001174, "grad_norm": 0.10198665410280228, "learning_rate": 1.646287939667429e-05, "loss": 0.0061, "step": 228960 }, { "epoch": 7.077641095382333, "grad_norm": 0.13388380408287048, "learning_rate": 1.6462415775483713e-05, "loss": 0.0069, "step": 228990 }, { "epoch": 7.078568337763492, "grad_norm": 0.08923473209142685, "learning_rate": 1.646195215429313e-05, "loss": 0.0072, "step": 229020 }, { "epoch": 7.07949558014465, "grad_norm": 0.1373167634010315, "learning_rate": 1.6461488533102556e-05, "loss": 0.0071, "step": 229050 }, { "epoch": 7.080422822525808, "grad_norm": 0.13771899044513702, "learning_rate": 1.6461024911911977e-05, "loss": 0.0072, "step": 229080 }, { "epoch": 7.0813500649069665, "grad_norm": 0.08934378623962402, "learning_rate": 1.6460561290721395e-05, "loss": 0.0063, "step": 229110 }, { "epoch": 7.082277307288125, "grad_norm": 0.1327170431613922, "learning_rate": 1.6460097669530817e-05, "loss": 0.0065, "step": 229140 }, { "epoch": 7.083204549669284, "grad_norm": 0.08879700303077698, "learning_rate": 1.645963404834024e-05, "loss": 0.006, "step": 229170 }, { "epoch": 7.084131792050442, "grad_norm": 0.10678574442863464, "learning_rate": 1.6459170427149657e-05, "loss": 0.0063, "step": 229200 }, { "epoch": 7.0850590344316, "grad_norm": 0.09308350831270218, "learning_rate": 1.6458706805959078e-05, "loss": 0.0065, "step": 229230 }, { "epoch": 7.085986276812759, "grad_norm": 0.12277963757514954, "learning_rate": 1.64582431847685e-05, "loss": 0.0059, "step": 229260 }, { "epoch": 7.086913519193917, "grad_norm": 0.12960591912269592, "learning_rate": 1.645777956357792e-05, "loss": 0.0063, "step": 229290 }, { "epoch": 7.087840761575076, "grad_norm": 0.13728801906108856, "learning_rate": 1.6457315942387343e-05, "loss": 0.0066, "step": 229320 }, { "epoch": 7.088768003956234, "grad_norm": 0.1467755287885666, "learning_rate": 1.645685232119676e-05, "loss": 0.0066, "step": 229350 }, { "epoch": 7.089695246337393, "grad_norm": 0.08096513152122498, "learning_rate": 1.6456388700006182e-05, "loss": 0.0071, "step": 229380 }, { "epoch": 7.090622488718551, "grad_norm": 0.1569415032863617, "learning_rate": 1.6455925078815604e-05, "loss": 0.0065, "step": 229410 }, { "epoch": 7.091549731099709, "grad_norm": 0.10299892723560333, "learning_rate": 1.6455461457625025e-05, "loss": 0.0059, "step": 229440 }, { "epoch": 7.092476973480868, "grad_norm": 0.1694817990064621, "learning_rate": 1.6454997836434447e-05, "loss": 0.0066, "step": 229470 }, { "epoch": 7.0934042158620265, "grad_norm": 0.09382739663124084, "learning_rate": 1.6454534215243868e-05, "loss": 0.0067, "step": 229500 }, { "epoch": 7.094331458243185, "grad_norm": 0.09652373939752579, "learning_rate": 1.6454070594053286e-05, "loss": 0.0056, "step": 229530 }, { "epoch": 7.095258700624343, "grad_norm": 0.08617249131202698, "learning_rate": 1.6453606972862708e-05, "loss": 0.007, "step": 229560 }, { "epoch": 7.096185943005501, "grad_norm": 0.06926263123750687, "learning_rate": 1.645314335167213e-05, "loss": 0.0063, "step": 229590 }, { "epoch": 7.09711318538666, "grad_norm": 0.10366813093423843, "learning_rate": 1.6452679730481548e-05, "loss": 0.0069, "step": 229620 }, { "epoch": 7.098040427767819, "grad_norm": 0.11683480441570282, "learning_rate": 1.645221610929097e-05, "loss": 0.0064, "step": 229650 }, { "epoch": 7.098967670148977, "grad_norm": 0.11026590317487717, "learning_rate": 1.645175248810039e-05, "loss": 0.0069, "step": 229680 }, { "epoch": 7.099894912530136, "grad_norm": 0.14450755715370178, "learning_rate": 1.6451288866909812e-05, "loss": 0.0059, "step": 229710 }, { "epoch": 7.1008221549112935, "grad_norm": 0.16785846650600433, "learning_rate": 1.6450825245719234e-05, "loss": 0.0067, "step": 229740 }, { "epoch": 7.101749397292452, "grad_norm": 0.09636176377534866, "learning_rate": 1.645036162452865e-05, "loss": 0.0067, "step": 229770 }, { "epoch": 7.102676639673611, "grad_norm": 0.11711758375167847, "learning_rate": 1.6449898003338073e-05, "loss": 0.0065, "step": 229800 }, { "epoch": 7.103603882054769, "grad_norm": 0.1211957037448883, "learning_rate": 1.6449434382147495e-05, "loss": 0.0062, "step": 229830 }, { "epoch": 7.104531124435928, "grad_norm": 0.1564331203699112, "learning_rate": 1.6448970760956916e-05, "loss": 0.0067, "step": 229860 }, { "epoch": 7.105458366817086, "grad_norm": 0.11521514505147934, "learning_rate": 1.6448507139766338e-05, "loss": 0.0057, "step": 229890 }, { "epoch": 7.106385609198244, "grad_norm": 0.12534667551517487, "learning_rate": 1.644804351857576e-05, "loss": 0.0062, "step": 229920 }, { "epoch": 7.107312851579403, "grad_norm": 0.14205124974250793, "learning_rate": 1.6447579897385177e-05, "loss": 0.0059, "step": 229950 }, { "epoch": 7.1082400939605614, "grad_norm": 0.0938674658536911, "learning_rate": 1.64471162761946e-05, "loss": 0.007, "step": 229980 }, { "epoch": 7.10916733634172, "grad_norm": 0.14696989953517914, "learning_rate": 1.6446652655004017e-05, "loss": 0.0066, "step": 230010 }, { "epoch": 7.110094578722878, "grad_norm": 0.14194199442863464, "learning_rate": 1.644618903381344e-05, "loss": 0.0063, "step": 230040 }, { "epoch": 7.111021821104036, "grad_norm": 0.09054766595363617, "learning_rate": 1.644572541262286e-05, "loss": 0.0066, "step": 230070 }, { "epoch": 7.111949063485195, "grad_norm": 0.10772712528705597, "learning_rate": 1.644526179143228e-05, "loss": 0.0066, "step": 230100 }, { "epoch": 7.1128763058663536, "grad_norm": 0.10308925807476044, "learning_rate": 1.6444798170241703e-05, "loss": 0.0068, "step": 230130 }, { "epoch": 7.113803548247512, "grad_norm": 0.12822812795639038, "learning_rate": 1.6444334549051124e-05, "loss": 0.0065, "step": 230160 }, { "epoch": 7.114730790628671, "grad_norm": 0.1333005577325821, "learning_rate": 1.6443870927860543e-05, "loss": 0.0071, "step": 230190 }, { "epoch": 7.1156580330098285, "grad_norm": 0.10397886484861374, "learning_rate": 1.6443407306669964e-05, "loss": 0.0067, "step": 230220 }, { "epoch": 7.116585275390987, "grad_norm": 0.11769385635852814, "learning_rate": 1.6442943685479386e-05, "loss": 0.007, "step": 230250 }, { "epoch": 7.117512517772146, "grad_norm": 0.11881125718355179, "learning_rate": 1.6442480064288807e-05, "loss": 0.0062, "step": 230280 }, { "epoch": 7.118439760153304, "grad_norm": 0.11902764439582825, "learning_rate": 1.644201644309823e-05, "loss": 0.0064, "step": 230310 }, { "epoch": 7.119367002534463, "grad_norm": 0.12093471735715866, "learning_rate": 1.6441552821907647e-05, "loss": 0.0069, "step": 230340 }, { "epoch": 7.120294244915621, "grad_norm": 0.1453372985124588, "learning_rate": 1.6441089200717068e-05, "loss": 0.0067, "step": 230370 }, { "epoch": 7.121221487296779, "grad_norm": 0.11622298508882523, "learning_rate": 1.644062557952649e-05, "loss": 0.0074, "step": 230400 }, { "epoch": 7.122148729677938, "grad_norm": 0.08570653200149536, "learning_rate": 1.6440161958335908e-05, "loss": 0.0063, "step": 230430 }, { "epoch": 7.123075972059096, "grad_norm": 0.12934105098247528, "learning_rate": 1.6439698337145333e-05, "loss": 0.0062, "step": 230460 }, { "epoch": 7.124003214440255, "grad_norm": 0.0852944403886795, "learning_rate": 1.6439234715954754e-05, "loss": 0.0068, "step": 230490 }, { "epoch": 7.124930456821413, "grad_norm": 0.10945500433444977, "learning_rate": 1.6438771094764172e-05, "loss": 0.0066, "step": 230520 }, { "epoch": 7.125857699202571, "grad_norm": 0.20547433197498322, "learning_rate": 1.6438307473573594e-05, "loss": 0.0061, "step": 230550 }, { "epoch": 7.12678494158373, "grad_norm": 0.11953926086425781, "learning_rate": 1.6437843852383015e-05, "loss": 0.006, "step": 230580 }, { "epoch": 7.1277121839648885, "grad_norm": 0.14029331505298615, "learning_rate": 1.6437380231192434e-05, "loss": 0.0066, "step": 230610 }, { "epoch": 7.128639426346047, "grad_norm": 0.10822351276874542, "learning_rate": 1.6436916610001855e-05, "loss": 0.0066, "step": 230640 }, { "epoch": 7.129566668727206, "grad_norm": 0.15118157863616943, "learning_rate": 1.6436452988811277e-05, "loss": 0.0062, "step": 230670 }, { "epoch": 7.130493911108363, "grad_norm": 0.12518347799777985, "learning_rate": 1.6435989367620698e-05, "loss": 0.0064, "step": 230700 }, { "epoch": 7.131421153489522, "grad_norm": 0.11023104190826416, "learning_rate": 1.643552574643012e-05, "loss": 0.0068, "step": 230730 }, { "epoch": 7.132348395870681, "grad_norm": 0.1045999526977539, "learning_rate": 1.6435062125239538e-05, "loss": 0.0064, "step": 230760 }, { "epoch": 7.133275638251839, "grad_norm": 0.11859269440174103, "learning_rate": 1.643459850404896e-05, "loss": 0.0069, "step": 230790 }, { "epoch": 7.134202880632998, "grad_norm": 0.12633079290390015, "learning_rate": 1.643413488285838e-05, "loss": 0.0065, "step": 230820 }, { "epoch": 7.1351301230141555, "grad_norm": 0.08258303999900818, "learning_rate": 1.6433671261667802e-05, "loss": 0.0063, "step": 230850 }, { "epoch": 7.136057365395314, "grad_norm": 0.09509081393480301, "learning_rate": 1.6433207640477224e-05, "loss": 0.007, "step": 230880 }, { "epoch": 7.136984607776473, "grad_norm": 0.08708567917346954, "learning_rate": 1.6432744019286645e-05, "loss": 0.0063, "step": 230910 }, { "epoch": 7.137911850157631, "grad_norm": 0.1115984320640564, "learning_rate": 1.6432280398096063e-05, "loss": 0.0063, "step": 230940 }, { "epoch": 7.13883909253879, "grad_norm": 0.10085293650627136, "learning_rate": 1.6431816776905485e-05, "loss": 0.0068, "step": 230970 }, { "epoch": 7.1397663349199485, "grad_norm": 0.11541865766048431, "learning_rate": 1.6431353155714903e-05, "loss": 0.0066, "step": 231000 }, { "epoch": 7.140693577301106, "grad_norm": 0.16232024133205414, "learning_rate": 1.6430889534524324e-05, "loss": 0.0065, "step": 231030 }, { "epoch": 7.141620819682265, "grad_norm": 0.1361454725265503, "learning_rate": 1.6430425913333746e-05, "loss": 0.0073, "step": 231060 }, { "epoch": 7.142548062063423, "grad_norm": 0.09380155056715012, "learning_rate": 1.6429962292143167e-05, "loss": 0.0071, "step": 231090 }, { "epoch": 7.143475304444582, "grad_norm": 0.12264488637447357, "learning_rate": 1.642949867095259e-05, "loss": 0.0068, "step": 231120 }, { "epoch": 7.144402546825741, "grad_norm": 0.12563787400722504, "learning_rate": 1.642903504976201e-05, "loss": 0.0063, "step": 231150 }, { "epoch": 7.145329789206898, "grad_norm": 0.09051503241062164, "learning_rate": 1.642857142857143e-05, "loss": 0.0067, "step": 231180 }, { "epoch": 7.146257031588057, "grad_norm": 0.09022000432014465, "learning_rate": 1.642810780738085e-05, "loss": 0.0069, "step": 231210 }, { "epoch": 7.1471842739692155, "grad_norm": 0.10909941047430038, "learning_rate": 1.642764418619027e-05, "loss": 0.007, "step": 231240 }, { "epoch": 7.148111516350374, "grad_norm": 0.07278861105442047, "learning_rate": 1.6427180564999693e-05, "loss": 0.0067, "step": 231270 }, { "epoch": 7.149038758731533, "grad_norm": 0.08053479343652725, "learning_rate": 1.6426716943809115e-05, "loss": 0.0067, "step": 231300 }, { "epoch": 7.14996600111269, "grad_norm": 0.11999720335006714, "learning_rate": 1.6426253322618533e-05, "loss": 0.0065, "step": 231330 }, { "epoch": 7.150893243493849, "grad_norm": 0.12911735475063324, "learning_rate": 1.6425789701427954e-05, "loss": 0.0068, "step": 231360 }, { "epoch": 7.151820485875008, "grad_norm": 0.12206240743398666, "learning_rate": 1.6425326080237376e-05, "loss": 0.0065, "step": 231390 }, { "epoch": 7.152747728256166, "grad_norm": 0.11552835255861282, "learning_rate": 1.6424862459046794e-05, "loss": 0.0066, "step": 231420 }, { "epoch": 7.153674970637325, "grad_norm": 0.10643771290779114, "learning_rate": 1.6424398837856215e-05, "loss": 0.0068, "step": 231450 }, { "epoch": 7.154602213018483, "grad_norm": 0.12978099286556244, "learning_rate": 1.6423935216665637e-05, "loss": 0.0057, "step": 231480 }, { "epoch": 7.155529455399641, "grad_norm": 0.08204349875450134, "learning_rate": 1.642347159547506e-05, "loss": 0.0065, "step": 231510 }, { "epoch": 7.1564566977808, "grad_norm": 0.11217708140611649, "learning_rate": 1.642300797428448e-05, "loss": 0.0066, "step": 231540 }, { "epoch": 7.157383940161958, "grad_norm": 0.13319504261016846, "learning_rate": 1.64225443530939e-05, "loss": 0.0066, "step": 231570 }, { "epoch": 7.158311182543117, "grad_norm": 0.13760147988796234, "learning_rate": 1.642208073190332e-05, "loss": 0.0068, "step": 231600 }, { "epoch": 7.1592384249242755, "grad_norm": 0.13876385986804962, "learning_rate": 1.642161711071274e-05, "loss": 0.0061, "step": 231630 }, { "epoch": 7.160165667305433, "grad_norm": 0.07971808314323425, "learning_rate": 1.642116894356185e-05, "loss": 0.0062, "step": 231660 }, { "epoch": 7.161092909686592, "grad_norm": 0.12218579649925232, "learning_rate": 1.6420705322371267e-05, "loss": 0.006, "step": 231690 }, { "epoch": 7.1620201520677504, "grad_norm": 0.1273048222064972, "learning_rate": 1.6420241701180692e-05, "loss": 0.0064, "step": 231720 }, { "epoch": 7.162947394448909, "grad_norm": 0.08903961628675461, "learning_rate": 1.6419778079990113e-05, "loss": 0.0066, "step": 231750 }, { "epoch": 7.163874636830068, "grad_norm": 0.12294113636016846, "learning_rate": 1.641931445879953e-05, "loss": 0.0072, "step": 231780 }, { "epoch": 7.164801879211226, "grad_norm": 0.11044217646121979, "learning_rate": 1.6418850837608953e-05, "loss": 0.0065, "step": 231810 }, { "epoch": 7.165729121592384, "grad_norm": 0.06692035496234894, "learning_rate": 1.641838721641837e-05, "loss": 0.0061, "step": 231840 }, { "epoch": 7.1666563639735426, "grad_norm": 0.16730841994285583, "learning_rate": 1.6417923595227793e-05, "loss": 0.0067, "step": 231870 }, { "epoch": 7.167583606354701, "grad_norm": 0.1366318017244339, "learning_rate": 1.6417459974037214e-05, "loss": 0.0062, "step": 231900 }, { "epoch": 7.16851084873586, "grad_norm": 0.07987471669912338, "learning_rate": 1.6416996352846636e-05, "loss": 0.0067, "step": 231930 }, { "epoch": 7.169438091117018, "grad_norm": 0.10167054086923599, "learning_rate": 1.6416532731656057e-05, "loss": 0.0067, "step": 231960 }, { "epoch": 7.170365333498176, "grad_norm": 0.0751948207616806, "learning_rate": 1.641606911046548e-05, "loss": 0.0065, "step": 231990 }, { "epoch": 7.171292575879335, "grad_norm": 0.11320968717336655, "learning_rate": 1.6415605489274897e-05, "loss": 0.0067, "step": 232020 }, { "epoch": 7.172219818260493, "grad_norm": 0.1492445170879364, "learning_rate": 1.641514186808432e-05, "loss": 0.0056, "step": 232050 }, { "epoch": 7.173147060641652, "grad_norm": 0.1064622551202774, "learning_rate": 1.641467824689374e-05, "loss": 0.0058, "step": 232080 }, { "epoch": 7.1740743030228105, "grad_norm": 0.0765906497836113, "learning_rate": 1.641421462570316e-05, "loss": 0.0065, "step": 232110 }, { "epoch": 7.175001545403968, "grad_norm": 0.1159062534570694, "learning_rate": 1.6413751004512583e-05, "loss": 0.0066, "step": 232140 }, { "epoch": 7.175928787785127, "grad_norm": 0.12166810780763626, "learning_rate": 1.6413287383322e-05, "loss": 0.0065, "step": 232170 }, { "epoch": 7.176856030166285, "grad_norm": 0.12886418402194977, "learning_rate": 1.6412823762131422e-05, "loss": 0.0067, "step": 232200 }, { "epoch": 7.177783272547444, "grad_norm": 0.14372442662715912, "learning_rate": 1.6412360140940844e-05, "loss": 0.0068, "step": 232230 }, { "epoch": 7.178710514928603, "grad_norm": 0.14280591905117035, "learning_rate": 1.6411896519750262e-05, "loss": 0.006, "step": 232260 }, { "epoch": 7.179637757309761, "grad_norm": 0.12074983865022659, "learning_rate": 1.6411432898559684e-05, "loss": 0.0067, "step": 232290 }, { "epoch": 7.180564999690919, "grad_norm": 0.05622198060154915, "learning_rate": 1.6410969277369105e-05, "loss": 0.0064, "step": 232320 }, { "epoch": 7.1814922420720775, "grad_norm": 0.1480737030506134, "learning_rate": 1.6410505656178527e-05, "loss": 0.0069, "step": 232350 }, { "epoch": 7.182419484453236, "grad_norm": 0.08243891596794128, "learning_rate": 1.6410042034987948e-05, "loss": 0.0063, "step": 232380 }, { "epoch": 7.183346726834395, "grad_norm": 0.13074684143066406, "learning_rate": 1.640957841379737e-05, "loss": 0.0065, "step": 232410 }, { "epoch": 7.184273969215553, "grad_norm": 0.15139533579349518, "learning_rate": 1.6409114792606788e-05, "loss": 0.0075, "step": 232440 }, { "epoch": 7.185201211596711, "grad_norm": 0.10450194031000137, "learning_rate": 1.640865117141621e-05, "loss": 0.007, "step": 232470 }, { "epoch": 7.18612845397787, "grad_norm": 0.09124422818422318, "learning_rate": 1.640818755022563e-05, "loss": 0.0063, "step": 232500 }, { "epoch": 7.187055696359028, "grad_norm": 0.08418348431587219, "learning_rate": 1.6407723929035052e-05, "loss": 0.007, "step": 232530 }, { "epoch": 7.187982938740187, "grad_norm": 0.12285257130861282, "learning_rate": 1.6407260307844474e-05, "loss": 0.0065, "step": 232560 }, { "epoch": 7.188910181121345, "grad_norm": 0.12570035457611084, "learning_rate": 1.6406796686653892e-05, "loss": 0.0063, "step": 232590 }, { "epoch": 7.189837423502503, "grad_norm": 0.1038513258099556, "learning_rate": 1.6406333065463313e-05, "loss": 0.0053, "step": 232620 }, { "epoch": 7.190764665883662, "grad_norm": 0.1160687804222107, "learning_rate": 1.6405869444272735e-05, "loss": 0.006, "step": 232650 }, { "epoch": 7.19169190826482, "grad_norm": 0.09652630984783173, "learning_rate": 1.6405405823082153e-05, "loss": 0.0064, "step": 232680 }, { "epoch": 7.192619150645979, "grad_norm": 0.0843791589140892, "learning_rate": 1.6404942201891575e-05, "loss": 0.0067, "step": 232710 }, { "epoch": 7.1935463930271375, "grad_norm": 0.10260210186243057, "learning_rate": 1.6404478580700996e-05, "loss": 0.0065, "step": 232740 }, { "epoch": 7.194473635408296, "grad_norm": 0.080850750207901, "learning_rate": 1.6404014959510418e-05, "loss": 0.0069, "step": 232770 }, { "epoch": 7.195400877789454, "grad_norm": 0.13638506829738617, "learning_rate": 1.640355133831984e-05, "loss": 0.0066, "step": 232800 }, { "epoch": 7.196328120170612, "grad_norm": 0.10055702924728394, "learning_rate": 1.6403087717129257e-05, "loss": 0.0072, "step": 232830 }, { "epoch": 7.197255362551771, "grad_norm": 0.1007748618721962, "learning_rate": 1.640262409593868e-05, "loss": 0.007, "step": 232860 }, { "epoch": 7.19818260493293, "grad_norm": 0.11870256811380386, "learning_rate": 1.64021604747481e-05, "loss": 0.007, "step": 232890 }, { "epoch": 7.199109847314088, "grad_norm": 0.10586423426866531, "learning_rate": 1.640169685355752e-05, "loss": 0.0071, "step": 232920 }, { "epoch": 7.200037089695246, "grad_norm": 0.17642128467559814, "learning_rate": 1.6401233232366943e-05, "loss": 0.0065, "step": 232950 }, { "epoch": 7.2009643320764045, "grad_norm": 0.07957758009433746, "learning_rate": 1.6400769611176365e-05, "loss": 0.0068, "step": 232980 }, { "epoch": 7.201891574457563, "grad_norm": 0.12850044667720795, "learning_rate": 1.6400305989985783e-05, "loss": 0.0066, "step": 233010 }, { "epoch": 7.202818816838722, "grad_norm": 0.10499503463506699, "learning_rate": 1.6399842368795204e-05, "loss": 0.0067, "step": 233040 }, { "epoch": 7.20374605921988, "grad_norm": 0.11688540875911713, "learning_rate": 1.6399378747604626e-05, "loss": 0.0068, "step": 233070 }, { "epoch": 7.204673301601039, "grad_norm": 0.1440809667110443, "learning_rate": 1.6398915126414044e-05, "loss": 0.0066, "step": 233100 }, { "epoch": 7.205600543982197, "grad_norm": 0.13456980884075165, "learning_rate": 1.639845150522347e-05, "loss": 0.0067, "step": 233130 }, { "epoch": 7.206527786363355, "grad_norm": 0.11666613072156906, "learning_rate": 1.6397987884032887e-05, "loss": 0.0063, "step": 233160 }, { "epoch": 7.207455028744514, "grad_norm": 0.15475940704345703, "learning_rate": 1.639752426284231e-05, "loss": 0.0072, "step": 233190 }, { "epoch": 7.208382271125672, "grad_norm": 0.11309485137462616, "learning_rate": 1.639706064165173e-05, "loss": 0.0066, "step": 233220 }, { "epoch": 7.209309513506831, "grad_norm": 0.11378993093967438, "learning_rate": 1.6396597020461148e-05, "loss": 0.0062, "step": 233250 }, { "epoch": 7.210236755887989, "grad_norm": 0.1027950569987297, "learning_rate": 1.639613339927057e-05, "loss": 0.0067, "step": 233280 }, { "epoch": 7.211163998269147, "grad_norm": 0.10864464938640594, "learning_rate": 1.639566977807999e-05, "loss": 0.0068, "step": 233310 }, { "epoch": 7.212091240650306, "grad_norm": 0.13448315858840942, "learning_rate": 1.6395206156889413e-05, "loss": 0.0059, "step": 233340 }, { "epoch": 7.2130184830314645, "grad_norm": 0.12441537529230118, "learning_rate": 1.6394742535698834e-05, "loss": 0.0064, "step": 233370 }, { "epoch": 7.213945725412623, "grad_norm": 0.1097174733877182, "learning_rate": 1.6394278914508256e-05, "loss": 0.0065, "step": 233400 }, { "epoch": 7.214872967793781, "grad_norm": 0.15019014477729797, "learning_rate": 1.6393815293317674e-05, "loss": 0.0064, "step": 233430 }, { "epoch": 7.2158002101749394, "grad_norm": 0.1371033936738968, "learning_rate": 1.6393351672127095e-05, "loss": 0.0057, "step": 233460 }, { "epoch": 7.216727452556098, "grad_norm": 0.08848278224468231, "learning_rate": 1.6392888050936517e-05, "loss": 0.0071, "step": 233490 }, { "epoch": 7.217654694937257, "grad_norm": 0.10812931507825851, "learning_rate": 1.6392424429745938e-05, "loss": 0.0065, "step": 233520 }, { "epoch": 7.218581937318415, "grad_norm": 0.07663879543542862, "learning_rate": 1.639196080855536e-05, "loss": 0.0064, "step": 233550 }, { "epoch": 7.219509179699574, "grad_norm": 0.10159424692392349, "learning_rate": 1.6391497187364778e-05, "loss": 0.007, "step": 233580 }, { "epoch": 7.2204364220807316, "grad_norm": 0.07747025042772293, "learning_rate": 1.63910335661742e-05, "loss": 0.0065, "step": 233610 }, { "epoch": 7.22136366446189, "grad_norm": 0.14689290523529053, "learning_rate": 1.639056994498362e-05, "loss": 0.0068, "step": 233640 }, { "epoch": 7.222290906843049, "grad_norm": 0.0973997563123703, "learning_rate": 1.639010632379304e-05, "loss": 0.006, "step": 233670 }, { "epoch": 7.223218149224207, "grad_norm": 0.13843511044979095, "learning_rate": 1.638964270260246e-05, "loss": 0.0064, "step": 233700 }, { "epoch": 7.224145391605366, "grad_norm": 0.05449235066771507, "learning_rate": 1.6389179081411882e-05, "loss": 0.0061, "step": 233730 }, { "epoch": 7.225072633986524, "grad_norm": 0.06174711883068085, "learning_rate": 1.6388715460221304e-05, "loss": 0.0059, "step": 233760 }, { "epoch": 7.225999876367682, "grad_norm": 0.11383002251386642, "learning_rate": 1.6388251839030725e-05, "loss": 0.0068, "step": 233790 }, { "epoch": 7.226927118748841, "grad_norm": 0.19467632472515106, "learning_rate": 1.6387788217840143e-05, "loss": 0.0068, "step": 233820 }, { "epoch": 7.2278543611299995, "grad_norm": 0.13610926270484924, "learning_rate": 1.6387324596649565e-05, "loss": 0.0063, "step": 233850 }, { "epoch": 7.228781603511158, "grad_norm": 0.10048159211874008, "learning_rate": 1.6386860975458986e-05, "loss": 0.006, "step": 233880 }, { "epoch": 7.229708845892317, "grad_norm": 0.1412714570760727, "learning_rate": 1.6386397354268408e-05, "loss": 0.0058, "step": 233910 }, { "epoch": 7.230636088273474, "grad_norm": 0.10880863666534424, "learning_rate": 1.638593373307783e-05, "loss": 0.0072, "step": 233940 }, { "epoch": 7.231563330654633, "grad_norm": 0.08837112039327621, "learning_rate": 1.638547011188725e-05, "loss": 0.0064, "step": 233970 }, { "epoch": 7.232490573035792, "grad_norm": 0.13093134760856628, "learning_rate": 1.638500649069667e-05, "loss": 0.0066, "step": 234000 }, { "epoch": 7.23341781541695, "grad_norm": 0.14478692412376404, "learning_rate": 1.638454286950609e-05, "loss": 0.0074, "step": 234030 }, { "epoch": 7.234345057798109, "grad_norm": 0.0723753273487091, "learning_rate": 1.6384079248315512e-05, "loss": 0.0063, "step": 234060 }, { "epoch": 7.2352723001792665, "grad_norm": 0.11261790245771408, "learning_rate": 1.638361562712493e-05, "loss": 0.007, "step": 234090 }, { "epoch": 7.236199542560425, "grad_norm": 0.15425552427768707, "learning_rate": 1.638315200593435e-05, "loss": 0.0055, "step": 234120 }, { "epoch": 7.237126784941584, "grad_norm": 0.12147454172372818, "learning_rate": 1.6382688384743773e-05, "loss": 0.0062, "step": 234150 }, { "epoch": 7.238054027322742, "grad_norm": 0.11866473406553268, "learning_rate": 1.6382224763553194e-05, "loss": 0.0072, "step": 234180 }, { "epoch": 7.238981269703901, "grad_norm": 0.11787906289100647, "learning_rate": 1.6381761142362616e-05, "loss": 0.0071, "step": 234210 }, { "epoch": 7.239908512085059, "grad_norm": 0.11178229004144669, "learning_rate": 1.6381297521172034e-05, "loss": 0.0065, "step": 234240 }, { "epoch": 7.240835754466217, "grad_norm": 0.11421159654855728, "learning_rate": 1.6380849354021142e-05, "loss": 0.0065, "step": 234270 }, { "epoch": 7.241762996847376, "grad_norm": 0.13437171280384064, "learning_rate": 1.6380385732830563e-05, "loss": 0.0069, "step": 234300 }, { "epoch": 7.242690239228534, "grad_norm": 0.11621599644422531, "learning_rate": 1.637992211163998e-05, "loss": 0.0061, "step": 234330 }, { "epoch": 7.243617481609693, "grad_norm": 0.14783315360546112, "learning_rate": 1.6379458490449403e-05, "loss": 0.0064, "step": 234360 }, { "epoch": 7.244544723990852, "grad_norm": 0.07621314376592636, "learning_rate": 1.6378994869258828e-05, "loss": 0.0063, "step": 234390 }, { "epoch": 7.245471966372009, "grad_norm": 0.10292597860097885, "learning_rate": 1.6378531248068246e-05, "loss": 0.0064, "step": 234420 }, { "epoch": 7.246399208753168, "grad_norm": 0.10768215358257294, "learning_rate": 1.6378067626877668e-05, "loss": 0.0074, "step": 234450 }, { "epoch": 7.2473264511343265, "grad_norm": 0.12987034022808075, "learning_rate": 1.637760400568709e-05, "loss": 0.0066, "step": 234480 }, { "epoch": 7.248253693515485, "grad_norm": 0.11939380317926407, "learning_rate": 1.6377140384496507e-05, "loss": 0.0061, "step": 234510 }, { "epoch": 7.249180935896644, "grad_norm": 0.13783712685108185, "learning_rate": 1.637667676330593e-05, "loss": 0.0065, "step": 234540 }, { "epoch": 7.250108178277801, "grad_norm": 0.09279058873653412, "learning_rate": 1.637621314211535e-05, "loss": 0.0064, "step": 234570 }, { "epoch": 7.25103542065896, "grad_norm": 0.10453340411186218, "learning_rate": 1.6375749520924772e-05, "loss": 0.0069, "step": 234600 }, { "epoch": 7.251962663040119, "grad_norm": 0.12321721017360687, "learning_rate": 1.6375285899734193e-05, "loss": 0.0071, "step": 234630 }, { "epoch": 7.252889905421277, "grad_norm": 0.11529494822025299, "learning_rate": 1.637482227854361e-05, "loss": 0.0073, "step": 234660 }, { "epoch": 7.253817147802436, "grad_norm": 0.07951723039150238, "learning_rate": 1.6374358657353033e-05, "loss": 0.0064, "step": 234690 }, { "epoch": 7.254744390183594, "grad_norm": 0.12466181069612503, "learning_rate": 1.6373895036162454e-05, "loss": 0.0065, "step": 234720 }, { "epoch": 7.255671632564752, "grad_norm": 0.13877370953559875, "learning_rate": 1.6373431414971876e-05, "loss": 0.0061, "step": 234750 }, { "epoch": 7.256598874945911, "grad_norm": 0.1933547705411911, "learning_rate": 1.6372967793781297e-05, "loss": 0.0068, "step": 234780 }, { "epoch": 7.257526117327069, "grad_norm": 0.12757404148578644, "learning_rate": 1.637250417259072e-05, "loss": 0.0067, "step": 234810 }, { "epoch": 7.258453359708228, "grad_norm": 0.13771392405033112, "learning_rate": 1.6372040551400137e-05, "loss": 0.0065, "step": 234840 }, { "epoch": 7.2593806020893865, "grad_norm": 0.07595193386077881, "learning_rate": 1.6371592384249245e-05, "loss": 0.0061, "step": 234870 }, { "epoch": 7.260307844470544, "grad_norm": 0.056851375848054886, "learning_rate": 1.6371128763058666e-05, "loss": 0.0067, "step": 234900 }, { "epoch": 7.261235086851703, "grad_norm": 0.10547599196434021, "learning_rate": 1.6370665141868085e-05, "loss": 0.0062, "step": 234930 }, { "epoch": 7.262162329232861, "grad_norm": 0.16061845421791077, "learning_rate": 1.6370201520677506e-05, "loss": 0.0067, "step": 234960 }, { "epoch": 7.26308957161402, "grad_norm": 0.11487387865781784, "learning_rate": 1.6369737899486928e-05, "loss": 0.0066, "step": 234990 }, { "epoch": 7.264016813995179, "grad_norm": 0.11667715758085251, "learning_rate": 1.636927427829635e-05, "loss": 0.0066, "step": 235020 }, { "epoch": 7.264944056376336, "grad_norm": 0.11741410195827484, "learning_rate": 1.636881065710577e-05, "loss": 0.0066, "step": 235050 }, { "epoch": 7.265871298757495, "grad_norm": 0.08113949745893478, "learning_rate": 1.636834703591519e-05, "loss": 0.0063, "step": 235080 }, { "epoch": 7.2667985411386535, "grad_norm": 0.1259126514196396, "learning_rate": 1.636788341472461e-05, "loss": 0.0061, "step": 235110 }, { "epoch": 7.267725783519812, "grad_norm": 0.07716957479715347, "learning_rate": 1.6367419793534032e-05, "loss": 0.0065, "step": 235140 }, { "epoch": 7.268653025900971, "grad_norm": 0.1320224404335022, "learning_rate": 1.636695617234345e-05, "loss": 0.0069, "step": 235170 }, { "epoch": 7.269580268282129, "grad_norm": 0.11276174336671829, "learning_rate": 1.636649255115287e-05, "loss": 0.006, "step": 235200 }, { "epoch": 7.270507510663287, "grad_norm": 0.1559033840894699, "learning_rate": 1.6366028929962293e-05, "loss": 0.0071, "step": 235230 }, { "epoch": 7.271434753044446, "grad_norm": 0.09101134538650513, "learning_rate": 1.6365565308771714e-05, "loss": 0.0064, "step": 235260 }, { "epoch": 7.272361995425604, "grad_norm": 0.10745802521705627, "learning_rate": 1.6365101687581136e-05, "loss": 0.007, "step": 235290 }, { "epoch": 7.273289237806763, "grad_norm": 0.14568880200386047, "learning_rate": 1.6364638066390557e-05, "loss": 0.0062, "step": 235320 }, { "epoch": 7.274216480187921, "grad_norm": 0.10362443327903748, "learning_rate": 1.6364174445199975e-05, "loss": 0.0064, "step": 235350 }, { "epoch": 7.275143722569079, "grad_norm": 0.13507702946662903, "learning_rate": 1.6363710824009397e-05, "loss": 0.0068, "step": 235380 }, { "epoch": 7.276070964950238, "grad_norm": 0.0890592485666275, "learning_rate": 1.636324720281882e-05, "loss": 0.0065, "step": 235410 }, { "epoch": 7.276998207331396, "grad_norm": 0.09849938750267029, "learning_rate": 1.636278358162824e-05, "loss": 0.0074, "step": 235440 }, { "epoch": 7.277925449712555, "grad_norm": 0.10320770740509033, "learning_rate": 1.636231996043766e-05, "loss": 0.0063, "step": 235470 }, { "epoch": 7.2788526920937136, "grad_norm": 0.0962492898106575, "learning_rate": 1.636185633924708e-05, "loss": 0.0059, "step": 235500 }, { "epoch": 7.279779934474872, "grad_norm": 0.2441704124212265, "learning_rate": 1.63613927180565e-05, "loss": 0.0069, "step": 235530 }, { "epoch": 7.28070717685603, "grad_norm": 0.09185861051082611, "learning_rate": 1.6360929096865923e-05, "loss": 0.0065, "step": 235560 }, { "epoch": 7.2816344192371885, "grad_norm": 0.15248805284500122, "learning_rate": 1.636046547567534e-05, "loss": 0.0067, "step": 235590 }, { "epoch": 7.282561661618347, "grad_norm": 0.12673240900039673, "learning_rate": 1.6360001854484762e-05, "loss": 0.0065, "step": 235620 }, { "epoch": 7.283488903999506, "grad_norm": 0.09472090750932693, "learning_rate": 1.6359538233294184e-05, "loss": 0.0064, "step": 235650 }, { "epoch": 7.284416146380664, "grad_norm": 0.13837677240371704, "learning_rate": 1.6359074612103605e-05, "loss": 0.0069, "step": 235680 }, { "epoch": 7.285343388761822, "grad_norm": 0.16580693423748016, "learning_rate": 1.6358610990913027e-05, "loss": 0.0077, "step": 235710 }, { "epoch": 7.286270631142981, "grad_norm": 0.08379077166318893, "learning_rate": 1.6358147369722445e-05, "loss": 0.0071, "step": 235740 }, { "epoch": 7.287197873524139, "grad_norm": 0.11119858175516129, "learning_rate": 1.6357683748531866e-05, "loss": 0.0066, "step": 235770 }, { "epoch": 7.288125115905298, "grad_norm": 0.13317611813545227, "learning_rate": 1.6357220127341288e-05, "loss": 0.0065, "step": 235800 }, { "epoch": 7.289052358286456, "grad_norm": 0.11829110234975815, "learning_rate": 1.635675650615071e-05, "loss": 0.0074, "step": 235830 }, { "epoch": 7.289979600667614, "grad_norm": 0.10472559183835983, "learning_rate": 1.635629288496013e-05, "loss": 0.0066, "step": 235860 }, { "epoch": 7.290906843048773, "grad_norm": 0.10250045359134674, "learning_rate": 1.6355829263769552e-05, "loss": 0.0061, "step": 235890 }, { "epoch": 7.291834085429931, "grad_norm": 0.10310421139001846, "learning_rate": 1.635536564257897e-05, "loss": 0.0073, "step": 235920 }, { "epoch": 7.29276132781109, "grad_norm": 0.11266718804836273, "learning_rate": 1.6354902021388392e-05, "loss": 0.0069, "step": 235950 }, { "epoch": 7.2936885701922485, "grad_norm": 0.11224159598350525, "learning_rate": 1.6354438400197814e-05, "loss": 0.007, "step": 235980 }, { "epoch": 7.294615812573407, "grad_norm": 0.11292468011379242, "learning_rate": 1.6353974779007235e-05, "loss": 0.0072, "step": 236010 }, { "epoch": 7.295543054954565, "grad_norm": 0.1842224895954132, "learning_rate": 1.6353511157816657e-05, "loss": 0.0062, "step": 236040 }, { "epoch": 7.296470297335723, "grad_norm": 0.12371649593114853, "learning_rate": 1.6353047536626075e-05, "loss": 0.0069, "step": 236070 }, { "epoch": 7.297397539716882, "grad_norm": 0.09738674014806747, "learning_rate": 1.6352583915435496e-05, "loss": 0.0064, "step": 236100 }, { "epoch": 7.298324782098041, "grad_norm": 0.0882495567202568, "learning_rate": 1.6352120294244918e-05, "loss": 0.0073, "step": 236130 }, { "epoch": 7.299252024479199, "grad_norm": 0.07694720476865768, "learning_rate": 1.6351656673054336e-05, "loss": 0.0068, "step": 236160 }, { "epoch": 7.300179266860357, "grad_norm": 0.14263299107551575, "learning_rate": 1.6351193051863757e-05, "loss": 0.0064, "step": 236190 }, { "epoch": 7.3011065092415155, "grad_norm": 0.08743519335985184, "learning_rate": 1.635072943067318e-05, "loss": 0.007, "step": 236220 }, { "epoch": 7.302033751622674, "grad_norm": 0.10239870101213455, "learning_rate": 1.63502658094826e-05, "loss": 0.0068, "step": 236250 }, { "epoch": 7.302960994003833, "grad_norm": 0.17396721243858337, "learning_rate": 1.6349802188292022e-05, "loss": 0.006, "step": 236280 }, { "epoch": 7.303888236384991, "grad_norm": 0.1238526701927185, "learning_rate": 1.6349338567101443e-05, "loss": 0.0069, "step": 236310 }, { "epoch": 7.30481547876615, "grad_norm": 0.08194182068109512, "learning_rate": 1.634887494591086e-05, "loss": 0.0064, "step": 236340 }, { "epoch": 7.305742721147308, "grad_norm": 0.10973646491765976, "learning_rate": 1.6348411324720283e-05, "loss": 0.0064, "step": 236370 }, { "epoch": 7.306669963528466, "grad_norm": 0.08594056218862534, "learning_rate": 1.6347947703529704e-05, "loss": 0.0068, "step": 236400 }, { "epoch": 7.307597205909625, "grad_norm": 0.13361585140228271, "learning_rate": 1.6347484082339126e-05, "loss": 0.0061, "step": 236430 }, { "epoch": 7.308524448290783, "grad_norm": 0.08588261157274246, "learning_rate": 1.6347020461148547e-05, "loss": 0.0061, "step": 236460 }, { "epoch": 7.309451690671942, "grad_norm": 0.32565587759017944, "learning_rate": 1.6346556839957966e-05, "loss": 0.0061, "step": 236490 }, { "epoch": 7.3103789330531, "grad_norm": 0.1936940997838974, "learning_rate": 1.6346093218767387e-05, "loss": 0.007, "step": 236520 }, { "epoch": 7.311306175434258, "grad_norm": 0.09195920825004578, "learning_rate": 1.634562959757681e-05, "loss": 0.0065, "step": 236550 }, { "epoch": 7.312233417815417, "grad_norm": 0.13488808274269104, "learning_rate": 1.6345165976386227e-05, "loss": 0.0064, "step": 236580 }, { "epoch": 7.3131606601965755, "grad_norm": 0.0934363454580307, "learning_rate": 1.6344702355195648e-05, "loss": 0.0061, "step": 236610 }, { "epoch": 7.314087902577734, "grad_norm": 0.11735491454601288, "learning_rate": 1.634423873400507e-05, "loss": 0.0072, "step": 236640 }, { "epoch": 7.315015144958892, "grad_norm": 0.08983900398015976, "learning_rate": 1.634377511281449e-05, "loss": 0.0072, "step": 236670 }, { "epoch": 7.31594238734005, "grad_norm": 0.17113345861434937, "learning_rate": 1.6343311491623913e-05, "loss": 0.0066, "step": 236700 }, { "epoch": 7.316869629721209, "grad_norm": 0.09770702570676804, "learning_rate": 1.634284787043333e-05, "loss": 0.0073, "step": 236730 }, { "epoch": 7.317796872102368, "grad_norm": 0.09953297674655914, "learning_rate": 1.6342384249242752e-05, "loss": 0.0058, "step": 236760 }, { "epoch": 7.318724114483526, "grad_norm": 0.13360588252544403, "learning_rate": 1.6341920628052174e-05, "loss": 0.0064, "step": 236790 }, { "epoch": 7.319651356864685, "grad_norm": 0.08655055612325668, "learning_rate": 1.6341457006861595e-05, "loss": 0.0064, "step": 236820 }, { "epoch": 7.3205785992458425, "grad_norm": 0.10609475523233414, "learning_rate": 1.6340993385671017e-05, "loss": 0.0061, "step": 236850 }, { "epoch": 7.321505841627001, "grad_norm": 0.12507064640522003, "learning_rate": 1.634052976448044e-05, "loss": 0.0069, "step": 236880 }, { "epoch": 7.32243308400816, "grad_norm": 0.15919730067253113, "learning_rate": 1.6340066143289857e-05, "loss": 0.0064, "step": 236910 }, { "epoch": 7.323360326389318, "grad_norm": 0.0948873907327652, "learning_rate": 1.6339602522099278e-05, "loss": 0.0062, "step": 236940 }, { "epoch": 7.324287568770477, "grad_norm": 0.16070988774299622, "learning_rate": 1.63391389009087e-05, "loss": 0.0067, "step": 236970 }, { "epoch": 7.325214811151635, "grad_norm": 0.13933458924293518, "learning_rate": 1.6338675279718118e-05, "loss": 0.007, "step": 237000 }, { "epoch": 7.326142053532793, "grad_norm": 0.1018405482172966, "learning_rate": 1.633821165852754e-05, "loss": 0.0067, "step": 237030 }, { "epoch": 7.327069295913952, "grad_norm": 0.11042755097150803, "learning_rate": 1.633774803733696e-05, "loss": 0.0069, "step": 237060 }, { "epoch": 7.32799653829511, "grad_norm": 0.11120987683534622, "learning_rate": 1.6337284416146382e-05, "loss": 0.0064, "step": 237090 }, { "epoch": 7.328923780676269, "grad_norm": 0.09428470581769943, "learning_rate": 1.6336820794955804e-05, "loss": 0.0069, "step": 237120 }, { "epoch": 7.329851023057428, "grad_norm": 0.11060994118452072, "learning_rate": 1.6336357173765222e-05, "loss": 0.007, "step": 237150 }, { "epoch": 7.330778265438585, "grad_norm": 0.1377953439950943, "learning_rate": 1.6335893552574643e-05, "loss": 0.0064, "step": 237180 }, { "epoch": 7.331705507819744, "grad_norm": 0.1004500538110733, "learning_rate": 1.6335429931384065e-05, "loss": 0.0064, "step": 237210 }, { "epoch": 7.3326327502009025, "grad_norm": 0.14346106350421906, "learning_rate": 1.6334966310193486e-05, "loss": 0.0064, "step": 237240 }, { "epoch": 7.333559992582061, "grad_norm": 0.08602721989154816, "learning_rate": 1.6334502689002908e-05, "loss": 0.0061, "step": 237270 }, { "epoch": 7.33448723496322, "grad_norm": 0.18994823098182678, "learning_rate": 1.633403906781233e-05, "loss": 0.0067, "step": 237300 }, { "epoch": 7.3354144773443775, "grad_norm": 0.12451697885990143, "learning_rate": 1.6333575446621747e-05, "loss": 0.0058, "step": 237330 }, { "epoch": 7.336341719725536, "grad_norm": 0.13137564063072205, "learning_rate": 1.633311182543117e-05, "loss": 0.006, "step": 237360 }, { "epoch": 7.337268962106695, "grad_norm": 0.1245449036359787, "learning_rate": 1.6332648204240587e-05, "loss": 0.0062, "step": 237390 }, { "epoch": 7.338196204487853, "grad_norm": 0.12009026110172272, "learning_rate": 1.6332184583050012e-05, "loss": 0.0064, "step": 237420 }, { "epoch": 7.339123446869012, "grad_norm": 0.15462779998779297, "learning_rate": 1.6331720961859433e-05, "loss": 0.0068, "step": 237450 }, { "epoch": 7.34005068925017, "grad_norm": 0.12954770028591156, "learning_rate": 1.633125734066885e-05, "loss": 0.0062, "step": 237480 }, { "epoch": 7.340977931631328, "grad_norm": 0.12147129327058792, "learning_rate": 1.6330793719478273e-05, "loss": 0.0065, "step": 237510 }, { "epoch": 7.341905174012487, "grad_norm": 0.17186042666435242, "learning_rate": 1.6330330098287695e-05, "loss": 0.0064, "step": 237540 }, { "epoch": 7.342832416393645, "grad_norm": 0.14662043750286102, "learning_rate": 1.6329866477097113e-05, "loss": 0.0073, "step": 237570 }, { "epoch": 7.343759658774804, "grad_norm": 0.07224024087190628, "learning_rate": 1.6329402855906534e-05, "loss": 0.0063, "step": 237600 }, { "epoch": 7.344686901155963, "grad_norm": 0.1007336750626564, "learning_rate": 1.6328939234715956e-05, "loss": 0.0062, "step": 237630 }, { "epoch": 7.34561414353712, "grad_norm": 0.1466047763824463, "learning_rate": 1.6328475613525377e-05, "loss": 0.0071, "step": 237660 }, { "epoch": 7.346541385918279, "grad_norm": 0.10559242218732834, "learning_rate": 1.63280119923348e-05, "loss": 0.0065, "step": 237690 }, { "epoch": 7.3474686282994375, "grad_norm": 0.1916278600692749, "learning_rate": 1.632754837114422e-05, "loss": 0.0065, "step": 237720 }, { "epoch": 7.348395870680596, "grad_norm": 0.11579587310552597, "learning_rate": 1.632708474995364e-05, "loss": 0.0069, "step": 237750 }, { "epoch": 7.349323113061755, "grad_norm": 0.09194061160087585, "learning_rate": 1.632662112876306e-05, "loss": 0.006, "step": 237780 }, { "epoch": 7.350250355442912, "grad_norm": 0.10024584829807281, "learning_rate": 1.632615750757248e-05, "loss": 0.0061, "step": 237810 }, { "epoch": 7.351177597824071, "grad_norm": 0.12556715309619904, "learning_rate": 1.6325693886381903e-05, "loss": 0.0065, "step": 237840 }, { "epoch": 7.35210484020523, "grad_norm": 0.1414806842803955, "learning_rate": 1.6325230265191324e-05, "loss": 0.0061, "step": 237870 }, { "epoch": 7.353032082586388, "grad_norm": 0.12027337402105331, "learning_rate": 1.6324766644000743e-05, "loss": 0.0067, "step": 237900 }, { "epoch": 7.353959324967547, "grad_norm": 0.12753769755363464, "learning_rate": 1.6324303022810164e-05, "loss": 0.0065, "step": 237930 }, { "epoch": 7.354886567348705, "grad_norm": 0.098586805164814, "learning_rate": 1.6323839401619586e-05, "loss": 0.0074, "step": 237960 }, { "epoch": 7.355813809729863, "grad_norm": 0.10708757489919662, "learning_rate": 1.6323375780429004e-05, "loss": 0.0059, "step": 237990 }, { "epoch": 7.356741052111022, "grad_norm": 0.155901700258255, "learning_rate": 1.6322912159238425e-05, "loss": 0.0066, "step": 238020 }, { "epoch": 7.35766829449218, "grad_norm": 0.08646564185619354, "learning_rate": 1.6322448538047847e-05, "loss": 0.0066, "step": 238050 }, { "epoch": 7.358595536873339, "grad_norm": 0.10808165371417999, "learning_rate": 1.6321984916857268e-05, "loss": 0.0066, "step": 238080 }, { "epoch": 7.3595227792544975, "grad_norm": 0.16065657138824463, "learning_rate": 1.632152129566669e-05, "loss": 0.0066, "step": 238110 }, { "epoch": 7.360450021635655, "grad_norm": 0.14511002600193024, "learning_rate": 1.6321057674476108e-05, "loss": 0.0067, "step": 238140 }, { "epoch": 7.361377264016814, "grad_norm": 0.16990895569324493, "learning_rate": 1.632059405328553e-05, "loss": 0.0063, "step": 238170 }, { "epoch": 7.362304506397972, "grad_norm": 0.09174870699644089, "learning_rate": 1.632013043209495e-05, "loss": 0.0065, "step": 238200 }, { "epoch": 7.363231748779131, "grad_norm": 0.1485319882631302, "learning_rate": 1.6319666810904372e-05, "loss": 0.0062, "step": 238230 }, { "epoch": 7.36415899116029, "grad_norm": 0.13300572335720062, "learning_rate": 1.6319203189713794e-05, "loss": 0.0062, "step": 238260 }, { "epoch": 7.365086233541447, "grad_norm": 0.10923095047473907, "learning_rate": 1.6318739568523215e-05, "loss": 0.0065, "step": 238290 }, { "epoch": 7.366013475922606, "grad_norm": 0.12137947976589203, "learning_rate": 1.6318275947332633e-05, "loss": 0.007, "step": 238320 }, { "epoch": 7.3669407183037645, "grad_norm": 0.09971069544553757, "learning_rate": 1.6317812326142055e-05, "loss": 0.0074, "step": 238350 }, { "epoch": 7.367867960684923, "grad_norm": 0.16006794571876526, "learning_rate": 1.6317348704951476e-05, "loss": 0.0067, "step": 238380 }, { "epoch": 7.368795203066082, "grad_norm": 0.10579380393028259, "learning_rate": 1.6316885083760895e-05, "loss": 0.0063, "step": 238410 }, { "epoch": 7.36972244544724, "grad_norm": 0.06712755560874939, "learning_rate": 1.6316421462570316e-05, "loss": 0.0066, "step": 238440 }, { "epoch": 7.370649687828398, "grad_norm": 0.13295768201351166, "learning_rate": 1.6315957841379738e-05, "loss": 0.0068, "step": 238470 }, { "epoch": 7.371576930209557, "grad_norm": 0.12597203254699707, "learning_rate": 1.631549422018916e-05, "loss": 0.0066, "step": 238500 }, { "epoch": 7.372504172590715, "grad_norm": 0.12925775349140167, "learning_rate": 1.631503059899858e-05, "loss": 0.0061, "step": 238530 }, { "epoch": 7.373431414971874, "grad_norm": 0.14694270491600037, "learning_rate": 1.6314566977808e-05, "loss": 0.0059, "step": 238560 }, { "epoch": 7.374358657353032, "grad_norm": 0.15396980941295624, "learning_rate": 1.631410335661742e-05, "loss": 0.0062, "step": 238590 }, { "epoch": 7.37528589973419, "grad_norm": 0.1845511794090271, "learning_rate": 1.6313639735426842e-05, "loss": 0.0074, "step": 238620 }, { "epoch": 7.376213142115349, "grad_norm": 0.1165221631526947, "learning_rate": 1.6313176114236263e-05, "loss": 0.0068, "step": 238650 }, { "epoch": 7.377140384496507, "grad_norm": 0.11177902668714523, "learning_rate": 1.6312712493045685e-05, "loss": 0.0066, "step": 238680 }, { "epoch": 7.378067626877666, "grad_norm": 0.11702011525630951, "learning_rate": 1.6312248871855106e-05, "loss": 0.0062, "step": 238710 }, { "epoch": 7.3789948692588245, "grad_norm": 0.08706527948379517, "learning_rate": 1.6311785250664524e-05, "loss": 0.0056, "step": 238740 }, { "epoch": 7.379922111639983, "grad_norm": 0.15539748966693878, "learning_rate": 1.6311321629473946e-05, "loss": 0.0067, "step": 238770 }, { "epoch": 7.380849354021141, "grad_norm": 0.11484234035015106, "learning_rate": 1.6310858008283364e-05, "loss": 0.0065, "step": 238800 }, { "epoch": 7.381776596402299, "grad_norm": 0.10395780205726624, "learning_rate": 1.631039438709279e-05, "loss": 0.0063, "step": 238830 }, { "epoch": 7.382703838783458, "grad_norm": 0.10895422846078873, "learning_rate": 1.630993076590221e-05, "loss": 0.0061, "step": 238860 }, { "epoch": 7.383631081164617, "grad_norm": 0.07298419624567032, "learning_rate": 1.630946714471163e-05, "loss": 0.0061, "step": 238890 }, { "epoch": 7.384558323545775, "grad_norm": 0.08885262161493301, "learning_rate": 1.630900352352105e-05, "loss": 0.0069, "step": 238920 }, { "epoch": 7.385485565926933, "grad_norm": 0.12853582203388214, "learning_rate": 1.630853990233047e-05, "loss": 0.0061, "step": 238950 }, { "epoch": 7.3864128083080915, "grad_norm": 0.0949968695640564, "learning_rate": 1.630807628113989e-05, "loss": 0.0069, "step": 238980 }, { "epoch": 7.38734005068925, "grad_norm": 0.12010273337364197, "learning_rate": 1.630761265994931e-05, "loss": 0.0063, "step": 239010 }, { "epoch": 7.388267293070409, "grad_norm": 0.15972468256950378, "learning_rate": 1.6307149038758733e-05, "loss": 0.0069, "step": 239040 }, { "epoch": 7.389194535451567, "grad_norm": 0.18319781124591827, "learning_rate": 1.6306685417568154e-05, "loss": 0.0065, "step": 239070 }, { "epoch": 7.390121777832725, "grad_norm": 0.13923406600952148, "learning_rate": 1.6306221796377576e-05, "loss": 0.0063, "step": 239100 }, { "epoch": 7.391049020213884, "grad_norm": 0.11521785706281662, "learning_rate": 1.6305758175186994e-05, "loss": 0.0066, "step": 239130 }, { "epoch": 7.391976262595042, "grad_norm": 0.14635974168777466, "learning_rate": 1.6305294553996415e-05, "loss": 0.0064, "step": 239160 }, { "epoch": 7.392903504976201, "grad_norm": 0.10259141027927399, "learning_rate": 1.6304830932805837e-05, "loss": 0.0073, "step": 239190 }, { "epoch": 7.3938307473573595, "grad_norm": 0.1079912856221199, "learning_rate": 1.6304367311615258e-05, "loss": 0.0064, "step": 239220 }, { "epoch": 7.394757989738518, "grad_norm": 0.06465861201286316, "learning_rate": 1.630390369042468e-05, "loss": 0.0065, "step": 239250 }, { "epoch": 7.395685232119676, "grad_norm": 0.1362462341785431, "learning_rate": 1.63034400692341e-05, "loss": 0.0067, "step": 239280 }, { "epoch": 7.396612474500834, "grad_norm": 0.10169325023889542, "learning_rate": 1.630297644804352e-05, "loss": 0.0066, "step": 239310 }, { "epoch": 7.397539716881993, "grad_norm": 0.1256963312625885, "learning_rate": 1.630251282685294e-05, "loss": 0.0066, "step": 239340 }, { "epoch": 7.398466959263152, "grad_norm": 0.11882191896438599, "learning_rate": 1.6302049205662362e-05, "loss": 0.0066, "step": 239370 }, { "epoch": 7.39939420164431, "grad_norm": 0.1194119080901146, "learning_rate": 1.630158558447178e-05, "loss": 0.0063, "step": 239400 }, { "epoch": 7.400321444025468, "grad_norm": 0.14794865250587463, "learning_rate": 1.6301121963281202e-05, "loss": 0.0062, "step": 239430 }, { "epoch": 7.4012486864066265, "grad_norm": 0.07802434265613556, "learning_rate": 1.6300658342090624e-05, "loss": 0.0062, "step": 239460 }, { "epoch": 7.402175928787785, "grad_norm": Infinity, "learning_rate": 1.630021017493973e-05, "loss": 0.0056, "step": 239490 }, { "epoch": 7.403103171168944, "grad_norm": 0.11390011012554169, "learning_rate": 1.6299746553749153e-05, "loss": 0.0071, "step": 239520 }, { "epoch": 7.404030413550102, "grad_norm": 0.11568614840507507, "learning_rate": 1.629928293255857e-05, "loss": 0.0064, "step": 239550 }, { "epoch": 7.404957655931261, "grad_norm": 0.09699056297540665, "learning_rate": 1.6298819311367993e-05, "loss": 0.0062, "step": 239580 }, { "epoch": 7.405884898312419, "grad_norm": 0.13892526924610138, "learning_rate": 1.6298355690177414e-05, "loss": 0.0067, "step": 239610 }, { "epoch": 7.406812140693577, "grad_norm": 0.0932636484503746, "learning_rate": 1.6297892068986832e-05, "loss": 0.0062, "step": 239640 }, { "epoch": 7.407739383074736, "grad_norm": 0.13322344422340393, "learning_rate": 1.6297428447796254e-05, "loss": 0.0062, "step": 239670 }, { "epoch": 7.408666625455894, "grad_norm": 0.10673286020755768, "learning_rate": 1.6296964826605675e-05, "loss": 0.0076, "step": 239700 }, { "epoch": 7.409593867837053, "grad_norm": 0.09788484871387482, "learning_rate": 1.6296501205415097e-05, "loss": 0.0066, "step": 239730 }, { "epoch": 7.410521110218211, "grad_norm": 0.12236464023590088, "learning_rate": 1.6296037584224518e-05, "loss": 0.0069, "step": 239760 }, { "epoch": 7.411448352599369, "grad_norm": 0.14374613761901855, "learning_rate": 1.629557396303394e-05, "loss": 0.0062, "step": 239790 }, { "epoch": 7.412375594980528, "grad_norm": 0.10523929446935654, "learning_rate": 1.6295110341843358e-05, "loss": 0.0073, "step": 239820 }, { "epoch": 7.4133028373616865, "grad_norm": 0.10262275487184525, "learning_rate": 1.629464672065278e-05, "loss": 0.0065, "step": 239850 }, { "epoch": 7.414230079742845, "grad_norm": 0.09239719063043594, "learning_rate": 1.62941830994622e-05, "loss": 0.0063, "step": 239880 }, { "epoch": 7.415157322124003, "grad_norm": 0.16577479243278503, "learning_rate": 1.6293719478271622e-05, "loss": 0.006, "step": 239910 }, { "epoch": 7.416084564505161, "grad_norm": 0.09316403418779373, "learning_rate": 1.6293255857081044e-05, "loss": 0.006, "step": 239940 }, { "epoch": 7.41701180688632, "grad_norm": 0.13974927365779877, "learning_rate": 1.6292792235890462e-05, "loss": 0.0062, "step": 239970 }, { "epoch": 7.417939049267479, "grad_norm": 0.10860332101583481, "learning_rate": 1.6292328614699884e-05, "loss": 0.0071, "step": 240000 }, { "epoch": 7.418866291648637, "grad_norm": 0.09461675584316254, "learning_rate": 1.6291864993509305e-05, "loss": 0.0063, "step": 240030 }, { "epoch": 7.419793534029796, "grad_norm": 0.08411239832639694, "learning_rate": 1.6291401372318723e-05, "loss": 0.0064, "step": 240060 }, { "epoch": 7.4207207764109535, "grad_norm": 0.10170117020606995, "learning_rate": 1.6290937751128148e-05, "loss": 0.0065, "step": 240090 }, { "epoch": 7.421648018792112, "grad_norm": 0.12058232724666595, "learning_rate": 1.629047412993757e-05, "loss": 0.0065, "step": 240120 }, { "epoch": 7.422575261173271, "grad_norm": 0.16242890059947968, "learning_rate": 1.6290010508746988e-05, "loss": 0.0064, "step": 240150 }, { "epoch": 7.423502503554429, "grad_norm": 0.13092690706253052, "learning_rate": 1.628954688755641e-05, "loss": 0.0064, "step": 240180 }, { "epoch": 7.424429745935588, "grad_norm": 0.1781591922044754, "learning_rate": 1.6289083266365827e-05, "loss": 0.0065, "step": 240210 }, { "epoch": 7.425356988316746, "grad_norm": 0.12430813908576965, "learning_rate": 1.628861964517525e-05, "loss": 0.0059, "step": 240240 }, { "epoch": 7.426284230697904, "grad_norm": 0.12066590040922165, "learning_rate": 1.628815602398467e-05, "loss": 0.0063, "step": 240270 }, { "epoch": 7.427211473079063, "grad_norm": 0.14860033988952637, "learning_rate": 1.6287692402794092e-05, "loss": 0.0071, "step": 240300 }, { "epoch": 7.428138715460221, "grad_norm": 0.06299642473459244, "learning_rate": 1.6287228781603513e-05, "loss": 0.0063, "step": 240330 }, { "epoch": 7.42906595784138, "grad_norm": 0.12195917963981628, "learning_rate": 1.6286765160412935e-05, "loss": 0.0066, "step": 240360 }, { "epoch": 7.429993200222539, "grad_norm": 0.1505005806684494, "learning_rate": 1.6286301539222353e-05, "loss": 0.0066, "step": 240390 }, { "epoch": 7.430920442603696, "grad_norm": 0.1100088581442833, "learning_rate": 1.6285837918031774e-05, "loss": 0.0067, "step": 240420 }, { "epoch": 7.431847684984855, "grad_norm": 0.11869078129529953, "learning_rate": 1.6285374296841196e-05, "loss": 0.0067, "step": 240450 }, { "epoch": 7.4327749273660135, "grad_norm": 0.136506587266922, "learning_rate": 1.6284910675650617e-05, "loss": 0.006, "step": 240480 }, { "epoch": 7.433702169747172, "grad_norm": 0.12592408061027527, "learning_rate": 1.628444705446004e-05, "loss": 0.006, "step": 240510 }, { "epoch": 7.434629412128331, "grad_norm": 0.13691481947898865, "learning_rate": 1.628398343326946e-05, "loss": 0.0071, "step": 240540 }, { "epoch": 7.435556654509488, "grad_norm": 0.12264905869960785, "learning_rate": 1.628351981207888e-05, "loss": 0.0064, "step": 240570 }, { "epoch": 7.436483896890647, "grad_norm": 0.09181416779756546, "learning_rate": 1.62830561908883e-05, "loss": 0.0065, "step": 240600 }, { "epoch": 7.437411139271806, "grad_norm": 0.13832619786262512, "learning_rate": 1.6282592569697718e-05, "loss": 0.0066, "step": 240630 }, { "epoch": 7.438338381652964, "grad_norm": 0.15857379138469696, "learning_rate": 1.628212894850714e-05, "loss": 0.0059, "step": 240660 }, { "epoch": 7.439265624034123, "grad_norm": 0.11458947509527206, "learning_rate": 1.628166532731656e-05, "loss": 0.0066, "step": 240690 }, { "epoch": 7.4401928664152805, "grad_norm": 0.11176929622888565, "learning_rate": 1.6281201706125983e-05, "loss": 0.007, "step": 240720 }, { "epoch": 7.441120108796439, "grad_norm": 0.10520141571760178, "learning_rate": 1.6280738084935404e-05, "loss": 0.0067, "step": 240750 }, { "epoch": 7.442047351177598, "grad_norm": 0.0643080621957779, "learning_rate": 1.6280274463744826e-05, "loss": 0.0064, "step": 240780 }, { "epoch": 7.442974593558756, "grad_norm": 0.07909896969795227, "learning_rate": 1.6279810842554244e-05, "loss": 0.0062, "step": 240810 }, { "epoch": 7.443901835939915, "grad_norm": 0.12631075084209442, "learning_rate": 1.6279347221363665e-05, "loss": 0.0061, "step": 240840 }, { "epoch": 7.444829078321073, "grad_norm": 0.08061293512582779, "learning_rate": 1.6278883600173087e-05, "loss": 0.0074, "step": 240870 }, { "epoch": 7.445756320702231, "grad_norm": 0.08585253357887268, "learning_rate": 1.627841997898251e-05, "loss": 0.0058, "step": 240900 }, { "epoch": 7.44668356308339, "grad_norm": 0.16514137387275696, "learning_rate": 1.627795635779193e-05, "loss": 0.0067, "step": 240930 }, { "epoch": 7.4476108054645485, "grad_norm": 0.15657928586006165, "learning_rate": 1.6277492736601348e-05, "loss": 0.0066, "step": 240960 }, { "epoch": 7.448538047845707, "grad_norm": 0.12275055050849915, "learning_rate": 1.627702911541077e-05, "loss": 0.0055, "step": 240990 }, { "epoch": 7.449465290226866, "grad_norm": 0.1341811865568161, "learning_rate": 1.627656549422019e-05, "loss": 0.0071, "step": 241020 }, { "epoch": 7.450392532608023, "grad_norm": 0.1319596767425537, "learning_rate": 1.627610187302961e-05, "loss": 0.0068, "step": 241050 }, { "epoch": 7.451319774989182, "grad_norm": 0.12831149995326996, "learning_rate": 1.627563825183903e-05, "loss": 0.0066, "step": 241080 }, { "epoch": 7.452247017370341, "grad_norm": 0.14000417292118073, "learning_rate": 1.627519008468814e-05, "loss": 0.0061, "step": 241110 }, { "epoch": 7.453174259751499, "grad_norm": 0.14049533009529114, "learning_rate": 1.627472646349756e-05, "loss": 0.0066, "step": 241140 }, { "epoch": 7.454101502132658, "grad_norm": 0.1324874311685562, "learning_rate": 1.627426284230698e-05, "loss": 0.0066, "step": 241170 }, { "epoch": 7.455028744513816, "grad_norm": 0.12984798848628998, "learning_rate": 1.6273799221116403e-05, "loss": 0.0064, "step": 241200 }, { "epoch": 7.455955986894974, "grad_norm": 0.10215192288160324, "learning_rate": 1.627333559992582e-05, "loss": 0.0069, "step": 241230 }, { "epoch": 7.456883229276133, "grad_norm": 0.10167579352855682, "learning_rate": 1.6272871978735243e-05, "loss": 0.0065, "step": 241260 }, { "epoch": 7.457810471657291, "grad_norm": 0.14346285164356232, "learning_rate": 1.6272408357544664e-05, "loss": 0.0063, "step": 241290 }, { "epoch": 7.45873771403845, "grad_norm": 0.1803533434867859, "learning_rate": 1.6271944736354082e-05, "loss": 0.0075, "step": 241320 }, { "epoch": 7.4596649564196085, "grad_norm": 0.11164150387048721, "learning_rate": 1.6271481115163504e-05, "loss": 0.0066, "step": 241350 }, { "epoch": 7.460592198800766, "grad_norm": 0.14803797006607056, "learning_rate": 1.6271017493972925e-05, "loss": 0.0062, "step": 241380 }, { "epoch": 7.461519441181925, "grad_norm": 0.0852029100060463, "learning_rate": 1.6270553872782347e-05, "loss": 0.0068, "step": 241410 }, { "epoch": 7.462446683563083, "grad_norm": 0.14647987484931946, "learning_rate": 1.627009025159177e-05, "loss": 0.0068, "step": 241440 }, { "epoch": 7.463373925944242, "grad_norm": 0.14301714301109314, "learning_rate": 1.6269626630401186e-05, "loss": 0.0069, "step": 241470 }, { "epoch": 7.464301168325401, "grad_norm": 0.06588441878557205, "learning_rate": 1.6269163009210608e-05, "loss": 0.0064, "step": 241500 }, { "epoch": 7.465228410706558, "grad_norm": 0.09841304272413254, "learning_rate": 1.626869938802003e-05, "loss": 0.0066, "step": 241530 }, { "epoch": 7.466155653087717, "grad_norm": 0.07406891137361526, "learning_rate": 1.626823576682945e-05, "loss": 0.007, "step": 241560 }, { "epoch": 7.4670828954688755, "grad_norm": 0.1249440610408783, "learning_rate": 1.6267772145638872e-05, "loss": 0.0068, "step": 241590 }, { "epoch": 7.468010137850034, "grad_norm": 0.09267181903123856, "learning_rate": 1.6267308524448294e-05, "loss": 0.0063, "step": 241620 }, { "epoch": 7.468937380231193, "grad_norm": 0.09852717071771622, "learning_rate": 1.6266844903257712e-05, "loss": 0.0063, "step": 241650 }, { "epoch": 7.46986462261235, "grad_norm": 0.17114950716495514, "learning_rate": 1.6266381282067134e-05, "loss": 0.0066, "step": 241680 }, { "epoch": 7.470791864993509, "grad_norm": 0.13637124001979828, "learning_rate": 1.6265917660876555e-05, "loss": 0.0065, "step": 241710 }, { "epoch": 7.471719107374668, "grad_norm": 0.15405623614788055, "learning_rate": 1.6265454039685977e-05, "loss": 0.0066, "step": 241740 }, { "epoch": 7.472646349755826, "grad_norm": 0.24252763390541077, "learning_rate": 1.6264990418495398e-05, "loss": 0.0061, "step": 241770 }, { "epoch": 7.473573592136985, "grad_norm": 0.09717698395252228, "learning_rate": 1.6264526797304816e-05, "loss": 0.007, "step": 241800 }, { "epoch": 7.474500834518143, "grad_norm": 0.0761769562959671, "learning_rate": 1.6264063176114238e-05, "loss": 0.0066, "step": 241830 }, { "epoch": 7.475428076899301, "grad_norm": 0.06135798618197441, "learning_rate": 1.626359955492366e-05, "loss": 0.0063, "step": 241860 }, { "epoch": 7.47635531928046, "grad_norm": 0.141836479306221, "learning_rate": 1.6263135933733077e-05, "loss": 0.007, "step": 241890 }, { "epoch": 7.477282561661618, "grad_norm": 0.08071072399616241, "learning_rate": 1.62626723125425e-05, "loss": 0.0067, "step": 241920 }, { "epoch": 7.478209804042777, "grad_norm": 0.055976830422878265, "learning_rate": 1.626220869135192e-05, "loss": 0.0058, "step": 241950 }, { "epoch": 7.4791370464239355, "grad_norm": 0.16622981429100037, "learning_rate": 1.6261745070161342e-05, "loss": 0.007, "step": 241980 }, { "epoch": 7.480064288805094, "grad_norm": 0.12287551909685135, "learning_rate": 1.6261281448970763e-05, "loss": 0.0066, "step": 242010 }, { "epoch": 7.480991531186252, "grad_norm": 0.13403131067752838, "learning_rate": 1.626081782778018e-05, "loss": 0.0066, "step": 242040 }, { "epoch": 7.48191877356741, "grad_norm": 0.15032483637332916, "learning_rate": 1.6260354206589603e-05, "loss": 0.0069, "step": 242070 }, { "epoch": 7.482846015948569, "grad_norm": 0.11009042710065842, "learning_rate": 1.6259890585399025e-05, "loss": 0.006, "step": 242100 }, { "epoch": 7.483773258329728, "grad_norm": 0.14394505321979523, "learning_rate": 1.6259426964208446e-05, "loss": 0.0063, "step": 242130 }, { "epoch": 7.484700500710886, "grad_norm": 0.12545573711395264, "learning_rate": 1.6258963343017868e-05, "loss": 0.0071, "step": 242160 }, { "epoch": 7.485627743092044, "grad_norm": 0.14152540266513824, "learning_rate": 1.625849972182729e-05, "loss": 0.0069, "step": 242190 }, { "epoch": 7.4865549854732025, "grad_norm": 0.12977614998817444, "learning_rate": 1.6258036100636707e-05, "loss": 0.0069, "step": 242220 }, { "epoch": 7.487482227854361, "grad_norm": 0.11785924434661865, "learning_rate": 1.625757247944613e-05, "loss": 0.0072, "step": 242250 }, { "epoch": 7.48840947023552, "grad_norm": 0.1531507968902588, "learning_rate": 1.625710885825555e-05, "loss": 0.0071, "step": 242280 }, { "epoch": 7.489336712616678, "grad_norm": 0.12327972799539566, "learning_rate": 1.6256645237064968e-05, "loss": 0.0066, "step": 242310 }, { "epoch": 7.490263954997836, "grad_norm": 0.11268164962530136, "learning_rate": 1.625618161587439e-05, "loss": 0.0069, "step": 242340 }, { "epoch": 7.491191197378995, "grad_norm": 0.09984046965837479, "learning_rate": 1.625571799468381e-05, "loss": 0.0061, "step": 242370 }, { "epoch": 7.492118439760153, "grad_norm": 0.15461233258247375, "learning_rate": 1.6255254373493233e-05, "loss": 0.0063, "step": 242400 }, { "epoch": 7.493045682141312, "grad_norm": 0.10726547241210938, "learning_rate": 1.6254790752302654e-05, "loss": 0.007, "step": 242430 }, { "epoch": 7.49397292452247, "grad_norm": 0.13155882060527802, "learning_rate": 1.6254327131112072e-05, "loss": 0.0066, "step": 242460 }, { "epoch": 7.494900166903628, "grad_norm": 0.12688696384429932, "learning_rate": 1.6253863509921494e-05, "loss": 0.0076, "step": 242490 }, { "epoch": 7.495827409284787, "grad_norm": 0.10782118886709213, "learning_rate": 1.6253399888730915e-05, "loss": 0.007, "step": 242520 }, { "epoch": 7.496754651665945, "grad_norm": 0.10122150182723999, "learning_rate": 1.6252936267540337e-05, "loss": 0.0061, "step": 242550 }, { "epoch": 7.497681894047104, "grad_norm": 0.11384671926498413, "learning_rate": 1.625247264634976e-05, "loss": 0.0067, "step": 242580 }, { "epoch": 7.4986091364282625, "grad_norm": 0.11679688096046448, "learning_rate": 1.625200902515918e-05, "loss": 0.0065, "step": 242610 }, { "epoch": 7.499536378809421, "grad_norm": 0.162469744682312, "learning_rate": 1.6251545403968598e-05, "loss": 0.0066, "step": 242640 }, { "epoch": 7.500463621190579, "grad_norm": 0.1394236981868744, "learning_rate": 1.6251097236817706e-05, "loss": 0.0066, "step": 242670 }, { "epoch": 7.5013908635717375, "grad_norm": 0.09179965406656265, "learning_rate": 1.6250633615627127e-05, "loss": 0.0069, "step": 242700 }, { "epoch": 7.502318105952896, "grad_norm": 0.11447495967149734, "learning_rate": 1.6250169994436546e-05, "loss": 0.0071, "step": 242730 }, { "epoch": 7.503245348334055, "grad_norm": 0.11351615935564041, "learning_rate": 1.6249706373245967e-05, "loss": 0.0058, "step": 242760 }, { "epoch": 7.504172590715213, "grad_norm": 0.07489201426506042, "learning_rate": 1.624924275205539e-05, "loss": 0.0063, "step": 242790 }, { "epoch": 7.505099833096372, "grad_norm": 0.07288851588964462, "learning_rate": 1.624877913086481e-05, "loss": 0.0069, "step": 242820 }, { "epoch": 7.50602707547753, "grad_norm": 0.09583888947963715, "learning_rate": 1.624831550967423e-05, "loss": 0.0062, "step": 242850 }, { "epoch": 7.506954317858688, "grad_norm": 0.14293719828128815, "learning_rate": 1.624785188848365e-05, "loss": 0.0057, "step": 242880 }, { "epoch": 7.507881560239847, "grad_norm": 0.07707107067108154, "learning_rate": 1.624738826729307e-05, "loss": 0.0069, "step": 242910 }, { "epoch": 7.508808802621005, "grad_norm": 0.16564510762691498, "learning_rate": 1.6246924646102493e-05, "loss": 0.0066, "step": 242940 }, { "epoch": 7.509736045002164, "grad_norm": 0.11025729030370712, "learning_rate": 1.624646102491191e-05, "loss": 0.0069, "step": 242970 }, { "epoch": 7.510663287383322, "grad_norm": 0.15906885266304016, "learning_rate": 1.6245997403721336e-05, "loss": 0.0065, "step": 243000 }, { "epoch": 7.51159052976448, "grad_norm": 0.11038514971733093, "learning_rate": 1.6245533782530757e-05, "loss": 0.0062, "step": 243030 }, { "epoch": 7.512517772145639, "grad_norm": 0.0865306556224823, "learning_rate": 1.6245070161340175e-05, "loss": 0.006, "step": 243060 }, { "epoch": 7.5134450145267975, "grad_norm": 0.1394459456205368, "learning_rate": 1.6244606540149597e-05, "loss": 0.0063, "step": 243090 }, { "epoch": 7.514372256907956, "grad_norm": 0.10553315281867981, "learning_rate": 1.624414291895902e-05, "loss": 0.007, "step": 243120 }, { "epoch": 7.515299499289114, "grad_norm": 0.20021694898605347, "learning_rate": 1.6243679297768437e-05, "loss": 0.0065, "step": 243150 }, { "epoch": 7.516226741670272, "grad_norm": 0.11187133193016052, "learning_rate": 1.6243215676577858e-05, "loss": 0.0063, "step": 243180 }, { "epoch": 7.517153984051431, "grad_norm": 0.09095274657011032, "learning_rate": 1.624275205538728e-05, "loss": 0.0063, "step": 243210 }, { "epoch": 7.51808122643259, "grad_norm": 0.13476677238941193, "learning_rate": 1.62422884341967e-05, "loss": 0.0062, "step": 243240 }, { "epoch": 7.519008468813748, "grad_norm": 0.08119344711303711, "learning_rate": 1.6241824813006123e-05, "loss": 0.0065, "step": 243270 }, { "epoch": 7.519935711194906, "grad_norm": 0.1086132600903511, "learning_rate": 1.624136119181554e-05, "loss": 0.0063, "step": 243300 }, { "epoch": 7.5208629535760645, "grad_norm": 0.08615745604038239, "learning_rate": 1.6240897570624962e-05, "loss": 0.0066, "step": 243330 }, { "epoch": 7.521790195957223, "grad_norm": 0.1373950093984604, "learning_rate": 1.6240433949434384e-05, "loss": 0.0061, "step": 243360 }, { "epoch": 7.522717438338382, "grad_norm": 0.1332528293132782, "learning_rate": 1.6239970328243805e-05, "loss": 0.0065, "step": 243390 }, { "epoch": 7.52364468071954, "grad_norm": 0.10620950907468796, "learning_rate": 1.6239506707053227e-05, "loss": 0.0062, "step": 243420 }, { "epoch": 7.524571923100699, "grad_norm": 0.16455134749412537, "learning_rate": 1.6239043085862648e-05, "loss": 0.0064, "step": 243450 }, { "epoch": 7.525499165481857, "grad_norm": 0.12798099219799042, "learning_rate": 1.6238579464672066e-05, "loss": 0.0066, "step": 243480 }, { "epoch": 7.526426407863015, "grad_norm": 0.07551660388708115, "learning_rate": 1.6238115843481488e-05, "loss": 0.006, "step": 243510 }, { "epoch": 7.527353650244174, "grad_norm": 0.15149074792861938, "learning_rate": 1.6237652222290906e-05, "loss": 0.0066, "step": 243540 }, { "epoch": 7.528280892625332, "grad_norm": 0.15028661489486694, "learning_rate": 1.6237188601100327e-05, "loss": 0.0061, "step": 243570 }, { "epoch": 7.529208135006491, "grad_norm": 0.10130281746387482, "learning_rate": 1.623672497990975e-05, "loss": 0.0066, "step": 243600 }, { "epoch": 7.53013537738765, "grad_norm": 0.12316566705703735, "learning_rate": 1.623626135871917e-05, "loss": 0.0065, "step": 243630 }, { "epoch": 7.531062619768807, "grad_norm": 0.08115466684103012, "learning_rate": 1.6235797737528592e-05, "loss": 0.0068, "step": 243660 }, { "epoch": 7.531989862149966, "grad_norm": 0.10132281482219696, "learning_rate": 1.6235334116338013e-05, "loss": 0.0066, "step": 243690 }, { "epoch": 7.5329171045311245, "grad_norm": 0.15369994938373566, "learning_rate": 1.623487049514743e-05, "loss": 0.0062, "step": 243720 }, { "epoch": 7.533844346912283, "grad_norm": 0.12383634597063065, "learning_rate": 1.6234406873956853e-05, "loss": 0.0055, "step": 243750 }, { "epoch": 7.534771589293442, "grad_norm": 0.11482653766870499, "learning_rate": 1.6233943252766275e-05, "loss": 0.0069, "step": 243780 }, { "epoch": 7.535698831674599, "grad_norm": 0.1665819138288498, "learning_rate": 1.6233479631575696e-05, "loss": 0.0065, "step": 243810 }, { "epoch": 7.536626074055758, "grad_norm": 0.15051916241645813, "learning_rate": 1.6233016010385118e-05, "loss": 0.0063, "step": 243840 }, { "epoch": 7.537553316436917, "grad_norm": 0.09984426200389862, "learning_rate": 1.6232552389194536e-05, "loss": 0.0066, "step": 243870 }, { "epoch": 7.538480558818075, "grad_norm": 0.12100689858198166, "learning_rate": 1.6232088768003957e-05, "loss": 0.0065, "step": 243900 }, { "epoch": 7.539407801199234, "grad_norm": 0.13852928578853607, "learning_rate": 1.623162514681338e-05, "loss": 0.0066, "step": 243930 }, { "epoch": 7.5403350435803915, "grad_norm": 0.11982624232769012, "learning_rate": 1.6231161525622797e-05, "loss": 0.007, "step": 243960 }, { "epoch": 7.54126228596155, "grad_norm": 0.13418321311473846, "learning_rate": 1.623069790443222e-05, "loss": 0.0058, "step": 243990 }, { "epoch": 7.542189528342709, "grad_norm": 0.11226826161146164, "learning_rate": 1.623023428324164e-05, "loss": 0.0066, "step": 244020 }, { "epoch": 7.543116770723867, "grad_norm": 0.07385748624801636, "learning_rate": 1.622977066205106e-05, "loss": 0.0065, "step": 244050 }, { "epoch": 7.544044013105026, "grad_norm": 0.17133639752864838, "learning_rate": 1.6229307040860483e-05, "loss": 0.0058, "step": 244080 }, { "epoch": 7.544971255486184, "grad_norm": 0.14296387135982513, "learning_rate": 1.6228843419669904e-05, "loss": 0.0067, "step": 244110 }, { "epoch": 7.545898497867342, "grad_norm": 0.11836466193199158, "learning_rate": 1.6228379798479323e-05, "loss": 0.0066, "step": 244140 }, { "epoch": 7.546825740248501, "grad_norm": 0.11605197191238403, "learning_rate": 1.6227916177288744e-05, "loss": 0.0062, "step": 244170 }, { "epoch": 7.547752982629659, "grad_norm": 0.12102890759706497, "learning_rate": 1.6227452556098166e-05, "loss": 0.0067, "step": 244200 }, { "epoch": 7.548680225010818, "grad_norm": 0.11444148421287537, "learning_rate": 1.6226988934907587e-05, "loss": 0.0066, "step": 244230 }, { "epoch": 7.549607467391977, "grad_norm": 0.11138924956321716, "learning_rate": 1.622652531371701e-05, "loss": 0.0072, "step": 244260 }, { "epoch": 7.550534709773134, "grad_norm": 0.08160414546728134, "learning_rate": 1.6226061692526427e-05, "loss": 0.0061, "step": 244290 }, { "epoch": 7.551461952154293, "grad_norm": 0.14906419813632965, "learning_rate": 1.6225598071335848e-05, "loss": 0.0064, "step": 244320 }, { "epoch": 7.5523891945354515, "grad_norm": 0.10883079469203949, "learning_rate": 1.622513445014527e-05, "loss": 0.0067, "step": 244350 }, { "epoch": 7.55331643691661, "grad_norm": 0.09126152843236923, "learning_rate": 1.622467082895469e-05, "loss": 0.0063, "step": 244380 }, { "epoch": 7.554243679297769, "grad_norm": 0.10701170563697815, "learning_rate": 1.6224207207764113e-05, "loss": 0.0061, "step": 244410 }, { "epoch": 7.555170921678927, "grad_norm": 0.07849206775426865, "learning_rate": 1.6223743586573534e-05, "loss": 0.0063, "step": 244440 }, { "epoch": 7.556098164060085, "grad_norm": 0.09827978909015656, "learning_rate": 1.6223279965382952e-05, "loss": 0.0067, "step": 244470 }, { "epoch": 7.557025406441244, "grad_norm": 0.08855143189430237, "learning_rate": 1.6222816344192374e-05, "loss": 0.0062, "step": 244500 }, { "epoch": 7.557952648822402, "grad_norm": 0.09809146076440811, "learning_rate": 1.6222352723001792e-05, "loss": 0.0057, "step": 244530 }, { "epoch": 7.558879891203561, "grad_norm": 0.15663011372089386, "learning_rate": 1.6221889101811213e-05, "loss": 0.0072, "step": 244560 }, { "epoch": 7.5598071335847195, "grad_norm": 0.14045321941375732, "learning_rate": 1.6221425480620635e-05, "loss": 0.007, "step": 244590 }, { "epoch": 7.560734375965877, "grad_norm": 0.06511541455984116, "learning_rate": 1.6220961859430056e-05, "loss": 0.0061, "step": 244620 }, { "epoch": 7.561661618347036, "grad_norm": 0.11825263500213623, "learning_rate": 1.6220498238239478e-05, "loss": 0.0065, "step": 244650 }, { "epoch": 7.562588860728194, "grad_norm": 0.14055486023426056, "learning_rate": 1.62200346170489e-05, "loss": 0.007, "step": 244680 }, { "epoch": 7.563516103109353, "grad_norm": 0.15373235940933228, "learning_rate": 1.6219570995858318e-05, "loss": 0.0064, "step": 244710 }, { "epoch": 7.564443345490512, "grad_norm": 0.17178477346897125, "learning_rate": 1.621910737466774e-05, "loss": 0.0063, "step": 244740 }, { "epoch": 7.565370587871669, "grad_norm": 0.18062201142311096, "learning_rate": 1.621864375347716e-05, "loss": 0.0058, "step": 244770 }, { "epoch": 7.566297830252828, "grad_norm": 0.13131961226463318, "learning_rate": 1.6218180132286582e-05, "loss": 0.0064, "step": 244800 }, { "epoch": 7.5672250726339865, "grad_norm": 0.13276542723178864, "learning_rate": 1.6217716511096004e-05, "loss": 0.0056, "step": 244830 }, { "epoch": 7.568152315015145, "grad_norm": 0.12326063215732574, "learning_rate": 1.6217252889905422e-05, "loss": 0.0067, "step": 244860 }, { "epoch": 7.569079557396304, "grad_norm": 0.14219191670417786, "learning_rate": 1.6216789268714843e-05, "loss": 0.007, "step": 244890 }, { "epoch": 7.570006799777461, "grad_norm": 0.09972205758094788, "learning_rate": 1.6216325647524265e-05, "loss": 0.0065, "step": 244920 }, { "epoch": 7.57093404215862, "grad_norm": 0.08850075304508209, "learning_rate": 1.6215862026333683e-05, "loss": 0.0063, "step": 244950 }, { "epoch": 7.571861284539779, "grad_norm": 0.12496367841959, "learning_rate": 1.6215398405143104e-05, "loss": 0.0066, "step": 244980 }, { "epoch": 7.572788526920937, "grad_norm": 0.13465926051139832, "learning_rate": 1.6214934783952526e-05, "loss": 0.0068, "step": 245010 }, { "epoch": 7.573715769302096, "grad_norm": 0.13587136566638947, "learning_rate": 1.6214471162761947e-05, "loss": 0.0062, "step": 245040 }, { "epoch": 7.574643011683254, "grad_norm": 0.1277521401643753, "learning_rate": 1.621400754157137e-05, "loss": 0.0067, "step": 245070 }, { "epoch": 7.575570254064412, "grad_norm": 0.1053781509399414, "learning_rate": 1.621354392038079e-05, "loss": 0.0065, "step": 245100 }, { "epoch": 7.576497496445571, "grad_norm": 0.138630211353302, "learning_rate": 1.621308029919021e-05, "loss": 0.0066, "step": 245130 }, { "epoch": 7.577424738826729, "grad_norm": 0.17985151708126068, "learning_rate": 1.621261667799963e-05, "loss": 0.0062, "step": 245160 }, { "epoch": 7.578351981207888, "grad_norm": 0.12192896753549576, "learning_rate": 1.621215305680905e-05, "loss": 0.006, "step": 245190 }, { "epoch": 7.5792792235890465, "grad_norm": 0.09622862190008163, "learning_rate": 1.6211689435618473e-05, "loss": 0.0071, "step": 245220 }, { "epoch": 7.580206465970205, "grad_norm": 0.1919167935848236, "learning_rate": 1.6211225814427895e-05, "loss": 0.0077, "step": 245250 }, { "epoch": 7.581133708351363, "grad_norm": 0.1067129597067833, "learning_rate": 1.6210762193237313e-05, "loss": 0.0065, "step": 245280 }, { "epoch": 7.582060950732521, "grad_norm": 0.11524079740047455, "learning_rate": 1.6210298572046734e-05, "loss": 0.0072, "step": 245310 }, { "epoch": 7.58298819311368, "grad_norm": 0.13477723300457, "learning_rate": 1.6209834950856156e-05, "loss": 0.0065, "step": 245340 }, { "epoch": 7.583915435494839, "grad_norm": 0.09487593919038773, "learning_rate": 1.6209371329665574e-05, "loss": 0.0063, "step": 245370 }, { "epoch": 7.584842677875997, "grad_norm": 0.11280181258916855, "learning_rate": 1.6208907708474995e-05, "loss": 0.0059, "step": 245400 }, { "epoch": 7.585769920257155, "grad_norm": 0.07176393270492554, "learning_rate": 1.6208444087284417e-05, "loss": 0.0064, "step": 245430 }, { "epoch": 7.5866971626383135, "grad_norm": 0.17797008156776428, "learning_rate": 1.6207980466093838e-05, "loss": 0.0063, "step": 245460 }, { "epoch": 7.587624405019472, "grad_norm": 0.09169333428144455, "learning_rate": 1.620751684490326e-05, "loss": 0.0066, "step": 245490 }, { "epoch": 7.588551647400631, "grad_norm": 0.1282367706298828, "learning_rate": 1.6207053223712678e-05, "loss": 0.0065, "step": 245520 }, { "epoch": 7.589478889781789, "grad_norm": 0.11426004022359848, "learning_rate": 1.62065896025221e-05, "loss": 0.0068, "step": 245550 }, { "epoch": 7.590406132162947, "grad_norm": 0.14112737774848938, "learning_rate": 1.620612598133152e-05, "loss": 0.0066, "step": 245580 }, { "epoch": 7.591333374544106, "grad_norm": 0.18820489943027496, "learning_rate": 1.6205662360140942e-05, "loss": 0.0068, "step": 245610 }, { "epoch": 7.592260616925264, "grad_norm": 0.12913493812084198, "learning_rate": 1.6205198738950364e-05, "loss": 0.0062, "step": 245640 }, { "epoch": 7.593187859306423, "grad_norm": 0.10601896792650223, "learning_rate": 1.6204735117759785e-05, "loss": 0.0061, "step": 245670 }, { "epoch": 7.594115101687581, "grad_norm": 0.11928235739469528, "learning_rate": 1.6204271496569204e-05, "loss": 0.0068, "step": 245700 }, { "epoch": 7.595042344068739, "grad_norm": 0.15634100139141083, "learning_rate": 1.6203807875378625e-05, "loss": 0.007, "step": 245730 }, { "epoch": 7.595969586449898, "grad_norm": 0.11900001764297485, "learning_rate": 1.6203344254188047e-05, "loss": 0.0074, "step": 245760 }, { "epoch": 7.596896828831056, "grad_norm": 0.133588507771492, "learning_rate": 1.6202880632997468e-05, "loss": 0.0062, "step": 245790 }, { "epoch": 7.597824071212215, "grad_norm": 0.10086575150489807, "learning_rate": 1.620241701180689e-05, "loss": 0.0064, "step": 245820 }, { "epoch": 7.5987513135933735, "grad_norm": 0.09119705855846405, "learning_rate": 1.6201953390616308e-05, "loss": 0.0071, "step": 245850 }, { "epoch": 7.599678555974532, "grad_norm": 0.12603294849395752, "learning_rate": 1.620148976942573e-05, "loss": 0.0065, "step": 245880 }, { "epoch": 7.60060579835569, "grad_norm": 0.10915225744247437, "learning_rate": 1.620102614823515e-05, "loss": 0.0065, "step": 245910 }, { "epoch": 7.601533040736848, "grad_norm": 0.1387907713651657, "learning_rate": 1.620056252704457e-05, "loss": 0.0064, "step": 245940 }, { "epoch": 7.602460283118007, "grad_norm": 0.12382151931524277, "learning_rate": 1.620009890585399e-05, "loss": 0.0062, "step": 245970 }, { "epoch": 7.603387525499166, "grad_norm": 0.09217330813407898, "learning_rate": 1.6199635284663412e-05, "loss": 0.0062, "step": 246000 }, { "epoch": 7.604314767880324, "grad_norm": 0.13930636644363403, "learning_rate": 1.6199171663472833e-05, "loss": 0.0067, "step": 246030 }, { "epoch": 7.605242010261483, "grad_norm": 0.15465207397937775, "learning_rate": 1.6198708042282255e-05, "loss": 0.0062, "step": 246060 }, { "epoch": 7.6061692526426405, "grad_norm": 0.10938696563243866, "learning_rate": 1.6198244421091676e-05, "loss": 0.007, "step": 246090 }, { "epoch": 7.607096495023799, "grad_norm": 0.13119767606258392, "learning_rate": 1.6197780799901094e-05, "loss": 0.0065, "step": 246120 }, { "epoch": 7.608023737404958, "grad_norm": 0.1511934995651245, "learning_rate": 1.6197317178710516e-05, "loss": 0.0066, "step": 246150 }, { "epoch": 7.608950979786116, "grad_norm": 0.0755951777100563, "learning_rate": 1.6196853557519937e-05, "loss": 0.0065, "step": 246180 }, { "epoch": 7.609878222167275, "grad_norm": 0.1848667860031128, "learning_rate": 1.619638993632936e-05, "loss": 0.0071, "step": 246210 }, { "epoch": 7.610805464548433, "grad_norm": 0.11277540028095245, "learning_rate": 1.619592631513878e-05, "loss": 0.0066, "step": 246240 }, { "epoch": 7.611732706929591, "grad_norm": 0.16087843477725983, "learning_rate": 1.6195478147987885e-05, "loss": 0.0066, "step": 246270 }, { "epoch": 7.61265994931075, "grad_norm": 0.11244695633649826, "learning_rate": 1.6195014526797307e-05, "loss": 0.0067, "step": 246300 }, { "epoch": 7.6135871916919085, "grad_norm": 0.12475960701704025, "learning_rate": 1.6194550905606728e-05, "loss": 0.0063, "step": 246330 }, { "epoch": 7.614514434073067, "grad_norm": 0.10591203719377518, "learning_rate": 1.6194087284416146e-05, "loss": 0.0064, "step": 246360 }, { "epoch": 7.615441676454225, "grad_norm": 0.11446181684732437, "learning_rate": 1.6193623663225568e-05, "loss": 0.007, "step": 246390 }, { "epoch": 7.616368918835383, "grad_norm": 0.11969120800495148, "learning_rate": 1.619316004203499e-05, "loss": 0.0065, "step": 246420 }, { "epoch": 7.617296161216542, "grad_norm": 0.09559926390647888, "learning_rate": 1.619269642084441e-05, "loss": 0.0064, "step": 246450 }, { "epoch": 7.618223403597701, "grad_norm": 0.1524505615234375, "learning_rate": 1.6192232799653832e-05, "loss": 0.006, "step": 246480 }, { "epoch": 7.619150645978859, "grad_norm": 0.10375362634658813, "learning_rate": 1.6191769178463254e-05, "loss": 0.0067, "step": 246510 }, { "epoch": 7.620077888360017, "grad_norm": 0.1144079864025116, "learning_rate": 1.6191305557272672e-05, "loss": 0.0066, "step": 246540 }, { "epoch": 7.6210051307411755, "grad_norm": 0.15142765641212463, "learning_rate": 1.6190841936082093e-05, "loss": 0.0065, "step": 246570 }, { "epoch": 7.621932373122334, "grad_norm": 0.1839558482170105, "learning_rate": 1.6190378314891515e-05, "loss": 0.0065, "step": 246600 }, { "epoch": 7.622859615503493, "grad_norm": 0.11942339688539505, "learning_rate": 1.6189914693700933e-05, "loss": 0.0063, "step": 246630 }, { "epoch": 7.623786857884651, "grad_norm": 0.11764656752347946, "learning_rate": 1.6189451072510354e-05, "loss": 0.0066, "step": 246660 }, { "epoch": 7.62471410026581, "grad_norm": 0.16357524693012238, "learning_rate": 1.6188987451319776e-05, "loss": 0.0061, "step": 246690 }, { "epoch": 7.625641342646968, "grad_norm": 0.10223913192749023, "learning_rate": 1.6188523830129197e-05, "loss": 0.0063, "step": 246720 }, { "epoch": 7.626568585028126, "grad_norm": 0.14315374195575714, "learning_rate": 1.618806020893862e-05, "loss": 0.0057, "step": 246750 }, { "epoch": 7.627495827409285, "grad_norm": 0.12759800255298615, "learning_rate": 1.6187596587748037e-05, "loss": 0.0066, "step": 246780 }, { "epoch": 7.628423069790443, "grad_norm": 0.10836327821016312, "learning_rate": 1.618713296655746e-05, "loss": 0.0065, "step": 246810 }, { "epoch": 7.629350312171602, "grad_norm": 0.09836213290691376, "learning_rate": 1.618666934536688e-05, "loss": 0.0064, "step": 246840 }, { "epoch": 7.630277554552761, "grad_norm": 0.14391548931598663, "learning_rate": 1.61862057241763e-05, "loss": 0.0055, "step": 246870 }, { "epoch": 7.631204796933918, "grad_norm": 0.12103112787008286, "learning_rate": 1.6185742102985723e-05, "loss": 0.007, "step": 246900 }, { "epoch": 7.632132039315077, "grad_norm": 0.09157466143369675, "learning_rate": 1.6185278481795145e-05, "loss": 0.0061, "step": 246930 }, { "epoch": 7.6330592816962355, "grad_norm": 0.11824598908424377, "learning_rate": 1.6184814860604563e-05, "loss": 0.0066, "step": 246960 }, { "epoch": 7.633986524077394, "grad_norm": 0.08409513533115387, "learning_rate": 1.6184351239413984e-05, "loss": 0.007, "step": 246990 }, { "epoch": 7.634913766458553, "grad_norm": 0.09706436842679977, "learning_rate": 1.6183887618223402e-05, "loss": 0.0063, "step": 247020 }, { "epoch": 7.63584100883971, "grad_norm": 0.129959374666214, "learning_rate": 1.6183423997032824e-05, "loss": 0.0062, "step": 247050 }, { "epoch": 7.636768251220869, "grad_norm": 0.06534481048583984, "learning_rate": 1.618296037584225e-05, "loss": 0.0062, "step": 247080 }, { "epoch": 7.637695493602028, "grad_norm": 0.14178377389907837, "learning_rate": 1.6182496754651667e-05, "loss": 0.0063, "step": 247110 }, { "epoch": 7.638622735983186, "grad_norm": 0.09195102006196976, "learning_rate": 1.618203313346109e-05, "loss": 0.0063, "step": 247140 }, { "epoch": 7.639549978364345, "grad_norm": 0.08725125342607498, "learning_rate": 1.618156951227051e-05, "loss": 0.0065, "step": 247170 }, { "epoch": 7.6404772207455025, "grad_norm": 0.09694714844226837, "learning_rate": 1.6181105891079928e-05, "loss": 0.0065, "step": 247200 }, { "epoch": 7.641404463126661, "grad_norm": 0.10862704366445541, "learning_rate": 1.618064226988935e-05, "loss": 0.0062, "step": 247230 }, { "epoch": 7.64233170550782, "grad_norm": 0.08566774427890778, "learning_rate": 1.618017864869877e-05, "loss": 0.0061, "step": 247260 }, { "epoch": 7.643258947888978, "grad_norm": 0.10122048109769821, "learning_rate": 1.6179715027508193e-05, "loss": 0.0059, "step": 247290 }, { "epoch": 7.644186190270137, "grad_norm": 0.2177661955356598, "learning_rate": 1.6179251406317614e-05, "loss": 0.0073, "step": 247320 }, { "epoch": 7.645113432651295, "grad_norm": 0.0854915976524353, "learning_rate": 1.6178787785127032e-05, "loss": 0.0068, "step": 247350 }, { "epoch": 7.646040675032453, "grad_norm": 0.11656788736581802, "learning_rate": 1.6178324163936454e-05, "loss": 0.0067, "step": 247380 }, { "epoch": 7.646967917413612, "grad_norm": 0.1166267991065979, "learning_rate": 1.6177860542745875e-05, "loss": 0.0066, "step": 247410 }, { "epoch": 7.64789515979477, "grad_norm": 0.10318230092525482, "learning_rate": 1.6177396921555297e-05, "loss": 0.0065, "step": 247440 }, { "epoch": 7.648822402175929, "grad_norm": 0.11172541975975037, "learning_rate": 1.6176933300364718e-05, "loss": 0.0069, "step": 247470 }, { "epoch": 7.649749644557087, "grad_norm": 0.13581116497516632, "learning_rate": 1.617646967917414e-05, "loss": 0.0062, "step": 247500 }, { "epoch": 7.650676886938245, "grad_norm": 0.11743386834859848, "learning_rate": 1.6176006057983558e-05, "loss": 0.0066, "step": 247530 }, { "epoch": 7.651604129319404, "grad_norm": 0.09756746888160706, "learning_rate": 1.617554243679298e-05, "loss": 0.0062, "step": 247560 }, { "epoch": 7.6525313717005625, "grad_norm": 0.053448498249053955, "learning_rate": 1.61750788156024e-05, "loss": 0.0069, "step": 247590 }, { "epoch": 7.653458614081721, "grad_norm": 0.14688929915428162, "learning_rate": 1.617461519441182e-05, "loss": 0.0068, "step": 247620 }, { "epoch": 7.65438585646288, "grad_norm": 0.12371209263801575, "learning_rate": 1.617415157322124e-05, "loss": 0.0073, "step": 247650 }, { "epoch": 7.655313098844038, "grad_norm": 0.12935037910938263, "learning_rate": 1.6173687952030662e-05, "loss": 0.0067, "step": 247680 }, { "epoch": 7.656240341225196, "grad_norm": 0.11613618582487106, "learning_rate": 1.6173224330840083e-05, "loss": 0.0063, "step": 247710 }, { "epoch": 7.657167583606355, "grad_norm": 0.06814577430486679, "learning_rate": 1.6172760709649505e-05, "loss": 0.0057, "step": 247740 }, { "epoch": 7.658094825987513, "grad_norm": 0.09122530370950699, "learning_rate": 1.6172297088458923e-05, "loss": 0.0064, "step": 247770 }, { "epoch": 7.659022068368672, "grad_norm": 0.13621540367603302, "learning_rate": 1.6171833467268345e-05, "loss": 0.0067, "step": 247800 }, { "epoch": 7.65994931074983, "grad_norm": 0.09116895496845245, "learning_rate": 1.6171369846077766e-05, "loss": 0.0064, "step": 247830 }, { "epoch": 7.660876553130988, "grad_norm": 0.10754502564668655, "learning_rate": 1.6170906224887188e-05, "loss": 0.0066, "step": 247860 }, { "epoch": 7.661803795512147, "grad_norm": 0.09232422709465027, "learning_rate": 1.617044260369661e-05, "loss": 0.0073, "step": 247890 }, { "epoch": 7.662731037893305, "grad_norm": 0.12334433943033218, "learning_rate": 1.616997898250603e-05, "loss": 0.0067, "step": 247920 }, { "epoch": 7.663658280274464, "grad_norm": 0.13220210373401642, "learning_rate": 1.616951536131545e-05, "loss": 0.0067, "step": 247950 }, { "epoch": 7.6645855226556225, "grad_norm": 0.11722910404205322, "learning_rate": 1.616905174012487e-05, "loss": 0.0063, "step": 247980 }, { "epoch": 7.66551276503678, "grad_norm": 0.06416573375463486, "learning_rate": 1.616858811893429e-05, "loss": 0.0066, "step": 248010 }, { "epoch": 7.666440007417939, "grad_norm": 0.12895210087299347, "learning_rate": 1.616812449774371e-05, "loss": 0.0062, "step": 248040 }, { "epoch": 7.6673672497990975, "grad_norm": 0.13914914429187775, "learning_rate": 1.616766087655313e-05, "loss": 0.0062, "step": 248070 }, { "epoch": 7.668294492180256, "grad_norm": 0.09512194246053696, "learning_rate": 1.6167197255362553e-05, "loss": 0.0066, "step": 248100 }, { "epoch": 7.669221734561415, "grad_norm": 0.14202427864074707, "learning_rate": 1.6166733634171974e-05, "loss": 0.0072, "step": 248130 }, { "epoch": 7.670148976942572, "grad_norm": 0.10322931408882141, "learning_rate": 1.6166270012981396e-05, "loss": 0.0069, "step": 248160 }, { "epoch": 7.671076219323731, "grad_norm": 0.1447301208972931, "learning_rate": 1.6165806391790814e-05, "loss": 0.0063, "step": 248190 }, { "epoch": 7.67200346170489, "grad_norm": 0.11695919185876846, "learning_rate": 1.6165342770600235e-05, "loss": 0.0065, "step": 248220 }, { "epoch": 7.672930704086048, "grad_norm": 0.09956331551074982, "learning_rate": 1.6164879149409657e-05, "loss": 0.0058, "step": 248250 }, { "epoch": 7.673857946467207, "grad_norm": 0.09866171330213547, "learning_rate": 1.616441552821908e-05, "loss": 0.0061, "step": 248280 }, { "epoch": 7.6747851888483645, "grad_norm": 0.11781986057758331, "learning_rate": 1.61639519070285e-05, "loss": 0.0068, "step": 248310 }, { "epoch": 7.675712431229523, "grad_norm": 0.06677079945802689, "learning_rate": 1.6163488285837918e-05, "loss": 0.0058, "step": 248340 }, { "epoch": 7.676639673610682, "grad_norm": 0.09765508025884628, "learning_rate": 1.616302466464734e-05, "loss": 0.0061, "step": 248370 }, { "epoch": 7.67756691599184, "grad_norm": 0.07702649384737015, "learning_rate": 1.616256104345676e-05, "loss": 0.0067, "step": 248400 }, { "epoch": 7.678494158372999, "grad_norm": 0.11046947538852692, "learning_rate": 1.616209742226618e-05, "loss": 0.0064, "step": 248430 }, { "epoch": 7.6794214007541575, "grad_norm": 0.1451181322336197, "learning_rate": 1.61616338010756e-05, "loss": 0.0066, "step": 248460 }, { "epoch": 7.680348643135316, "grad_norm": 0.11765379458665848, "learning_rate": 1.6161170179885026e-05, "loss": 0.0066, "step": 248490 }, { "epoch": 7.681275885516474, "grad_norm": 0.07868774980306625, "learning_rate": 1.6160706558694444e-05, "loss": 0.006, "step": 248520 }, { "epoch": 7.682203127897632, "grad_norm": 0.10395583510398865, "learning_rate": 1.6160242937503865e-05, "loss": 0.0065, "step": 248550 }, { "epoch": 7.683130370278791, "grad_norm": 0.08762434870004654, "learning_rate": 1.6159779316313287e-05, "loss": 0.0071, "step": 248580 }, { "epoch": 7.68405761265995, "grad_norm": 0.07328033447265625, "learning_rate": 1.6159315695122705e-05, "loss": 0.0063, "step": 248610 }, { "epoch": 7.684984855041108, "grad_norm": 0.1740662157535553, "learning_rate": 1.6158852073932126e-05, "loss": 0.0061, "step": 248640 }, { "epoch": 7.685912097422266, "grad_norm": 0.14451299607753754, "learning_rate": 1.6158388452741548e-05, "loss": 0.0065, "step": 248670 }, { "epoch": 7.6868393398034245, "grad_norm": 0.1683211475610733, "learning_rate": 1.615792483155097e-05, "loss": 0.0068, "step": 248700 }, { "epoch": 7.687766582184583, "grad_norm": 0.14791783690452576, "learning_rate": 1.615746121036039e-05, "loss": 0.007, "step": 248730 }, { "epoch": 7.688693824565742, "grad_norm": 0.12082086503505707, "learning_rate": 1.615699758916981e-05, "loss": 0.0065, "step": 248760 }, { "epoch": 7.6896210669469, "grad_norm": 0.12391980737447739, "learning_rate": 1.615653396797923e-05, "loss": 0.0061, "step": 248790 }, { "epoch": 7.690548309328058, "grad_norm": 0.12702421844005585, "learning_rate": 1.6156070346788652e-05, "loss": 0.0068, "step": 248820 }, { "epoch": 7.691475551709217, "grad_norm": 0.174412801861763, "learning_rate": 1.6155606725598074e-05, "loss": 0.0074, "step": 248850 }, { "epoch": 7.692402794090375, "grad_norm": 0.103548564016819, "learning_rate": 1.6155143104407495e-05, "loss": 0.0062, "step": 248880 }, { "epoch": 7.693330036471534, "grad_norm": 0.12704791128635406, "learning_rate": 1.6154679483216917e-05, "loss": 0.0067, "step": 248910 }, { "epoch": 7.694257278852692, "grad_norm": 0.10937044769525528, "learning_rate": 1.6154215862026335e-05, "loss": 0.0062, "step": 248940 }, { "epoch": 7.69518452123385, "grad_norm": 0.11163213849067688, "learning_rate": 1.6153752240835756e-05, "loss": 0.0059, "step": 248970 }, { "epoch": 7.696111763615009, "grad_norm": 0.15219685435295105, "learning_rate": 1.6153288619645174e-05, "loss": 0.006, "step": 249000 }, { "epoch": 7.697039005996167, "grad_norm": 0.1145668774843216, "learning_rate": 1.6152824998454596e-05, "loss": 0.0065, "step": 249030 }, { "epoch": 7.697966248377326, "grad_norm": 0.1310037225484848, "learning_rate": 1.6152361377264017e-05, "loss": 0.0066, "step": 249060 }, { "epoch": 7.6988934907584845, "grad_norm": 0.09001876413822174, "learning_rate": 1.615189775607344e-05, "loss": 0.006, "step": 249090 }, { "epoch": 7.699820733139642, "grad_norm": 0.1319434642791748, "learning_rate": 1.615143413488286e-05, "loss": 0.0063, "step": 249120 }, { "epoch": 7.700747975520801, "grad_norm": 0.09067052602767944, "learning_rate": 1.6150970513692282e-05, "loss": 0.0068, "step": 249150 }, { "epoch": 7.701675217901959, "grad_norm": 0.08924787491559982, "learning_rate": 1.61505068925017e-05, "loss": 0.0061, "step": 249180 }, { "epoch": 7.702602460283118, "grad_norm": 0.13740283250808716, "learning_rate": 1.615004327131112e-05, "loss": 0.0069, "step": 249210 }, { "epoch": 7.703529702664277, "grad_norm": 0.09664816409349442, "learning_rate": 1.6149579650120543e-05, "loss": 0.0062, "step": 249240 }, { "epoch": 7.704456945045435, "grad_norm": 0.1533876359462738, "learning_rate": 1.6149116028929964e-05, "loss": 0.0059, "step": 249270 }, { "epoch": 7.705384187426593, "grad_norm": 0.150326669216156, "learning_rate": 1.6148652407739386e-05, "loss": 0.0063, "step": 249300 }, { "epoch": 7.7063114298077515, "grad_norm": 0.1490834355354309, "learning_rate": 1.6148188786548807e-05, "loss": 0.0061, "step": 249330 }, { "epoch": 7.70723867218891, "grad_norm": 0.16083061695098877, "learning_rate": 1.6147725165358226e-05, "loss": 0.0064, "step": 249360 }, { "epoch": 7.708165914570069, "grad_norm": 0.11534876376390457, "learning_rate": 1.6147261544167647e-05, "loss": 0.0071, "step": 249390 }, { "epoch": 7.709093156951227, "grad_norm": 0.09440892189741135, "learning_rate": 1.6146797922977065e-05, "loss": 0.0059, "step": 249420 }, { "epoch": 7.710020399332386, "grad_norm": 0.14708958566188812, "learning_rate": 1.6146334301786487e-05, "loss": 0.0061, "step": 249450 }, { "epoch": 7.710947641713544, "grad_norm": 0.18796265125274658, "learning_rate": 1.6145870680595908e-05, "loss": 0.0062, "step": 249480 }, { "epoch": 7.711874884094702, "grad_norm": 0.11663846671581268, "learning_rate": 1.614540705940533e-05, "loss": 0.0061, "step": 249510 }, { "epoch": 7.712802126475861, "grad_norm": 0.11818720400333405, "learning_rate": 1.614494343821475e-05, "loss": 0.0059, "step": 249540 }, { "epoch": 7.713729368857019, "grad_norm": 0.08801490068435669, "learning_rate": 1.6144479817024173e-05, "loss": 0.0065, "step": 249570 }, { "epoch": 7.714656611238178, "grad_norm": 0.14108924567699432, "learning_rate": 1.614401619583359e-05, "loss": 0.0061, "step": 249600 }, { "epoch": 7.715583853619336, "grad_norm": 0.09024456888437271, "learning_rate": 1.6143552574643012e-05, "loss": 0.0072, "step": 249630 }, { "epoch": 7.716511096000494, "grad_norm": 0.08732395619153976, "learning_rate": 1.6143088953452434e-05, "loss": 0.0071, "step": 249660 }, { "epoch": 7.717438338381653, "grad_norm": 0.16107670962810516, "learning_rate": 1.6142625332261855e-05, "loss": 0.007, "step": 249690 }, { "epoch": 7.7183655807628115, "grad_norm": 0.11450481414794922, "learning_rate": 1.6142161711071277e-05, "loss": 0.0071, "step": 249720 }, { "epoch": 7.71929282314397, "grad_norm": 0.12888269126415253, "learning_rate": 1.6141698089880695e-05, "loss": 0.0064, "step": 249750 }, { "epoch": 7.720220065525128, "grad_norm": 0.11593568325042725, "learning_rate": 1.6141234468690117e-05, "loss": 0.0064, "step": 249780 }, { "epoch": 7.7211473079062864, "grad_norm": 0.09500470012426376, "learning_rate": 1.6140770847499538e-05, "loss": 0.0067, "step": 249810 }, { "epoch": 7.722074550287445, "grad_norm": 0.12109628319740295, "learning_rate": 1.6140307226308956e-05, "loss": 0.0065, "step": 249840 }, { "epoch": 7.723001792668604, "grad_norm": 0.1343594193458557, "learning_rate": 1.6139843605118378e-05, "loss": 0.0064, "step": 249870 }, { "epoch": 7.723929035049762, "grad_norm": 0.17895042896270752, "learning_rate": 1.6139379983927803e-05, "loss": 0.0062, "step": 249900 }, { "epoch": 7.72485627743092, "grad_norm": 0.09851329773664474, "learning_rate": 1.613891636273722e-05, "loss": 0.0066, "step": 249930 }, { "epoch": 7.725783519812079, "grad_norm": 0.1300269067287445, "learning_rate": 1.6138452741546642e-05, "loss": 0.0071, "step": 249960 }, { "epoch": 7.726710762193237, "grad_norm": 0.08456982672214508, "learning_rate": 1.6137989120356064e-05, "loss": 0.0068, "step": 249990 }, { "epoch": 7.727019842986957, "eval_f1": 0.9978758231426141, "eval_loss": 0.007011482026427984, "eval_precision": 0.9978744297044838, "eval_recall": 0.9978783073778889, "eval_runtime": 4176.0077, "eval_samples_per_second": 282.854, "eval_steps_per_second": 8.839, "step": 250000 }, { "epoch": 7.727638004574396, "grad_norm": 0.07545635104179382, "learning_rate": 1.6137525499165482e-05, "loss": 0.0062, "step": 250020 }, { "epoch": 7.728565246955554, "grad_norm": 0.10039230436086655, "learning_rate": 1.6137061877974903e-05, "loss": 0.0067, "step": 250050 }, { "epoch": 7.729492489336713, "grad_norm": 0.1089882180094719, "learning_rate": 1.6136598256784325e-05, "loss": 0.0068, "step": 250080 }, { "epoch": 7.730419731717871, "grad_norm": 0.054105810821056366, "learning_rate": 1.6136134635593746e-05, "loss": 0.0071, "step": 250110 }, { "epoch": 7.731346974099029, "grad_norm": 0.10006685554981232, "learning_rate": 1.6135671014403168e-05, "loss": 0.0061, "step": 250140 }, { "epoch": 7.732274216480188, "grad_norm": 0.08892451226711273, "learning_rate": 1.6135207393212586e-05, "loss": 0.0066, "step": 250170 }, { "epoch": 7.7332014588613465, "grad_norm": 0.15103527903556824, "learning_rate": 1.6134743772022007e-05, "loss": 0.0068, "step": 250200 }, { "epoch": 7.734128701242505, "grad_norm": 0.10635857284069061, "learning_rate": 1.613428015083143e-05, "loss": 0.0068, "step": 250230 }, { "epoch": 7.735055943623664, "grad_norm": 0.08348477631807327, "learning_rate": 1.613381652964085e-05, "loss": 0.0069, "step": 250260 }, { "epoch": 7.735983186004821, "grad_norm": 0.13835659623146057, "learning_rate": 1.6133352908450272e-05, "loss": 0.0064, "step": 250290 }, { "epoch": 7.73691042838598, "grad_norm": 0.1342790573835373, "learning_rate": 1.6132889287259693e-05, "loss": 0.0063, "step": 250320 }, { "epoch": 7.737837670767139, "grad_norm": 0.11907787621021271, "learning_rate": 1.613242566606911e-05, "loss": 0.0072, "step": 250350 }, { "epoch": 7.738764913148297, "grad_norm": 0.1364368498325348, "learning_rate": 1.6131962044878533e-05, "loss": 0.0061, "step": 250380 }, { "epoch": 7.739692155529456, "grad_norm": 0.12330559641122818, "learning_rate": 1.613149842368795e-05, "loss": 0.0058, "step": 250410 }, { "epoch": 7.7406193979106135, "grad_norm": 0.09876614809036255, "learning_rate": 1.6131034802497373e-05, "loss": 0.0063, "step": 250440 }, { "epoch": 7.741546640291772, "grad_norm": 0.10667028278112411, "learning_rate": 1.6130571181306794e-05, "loss": 0.0068, "step": 250470 }, { "epoch": 7.742473882672931, "grad_norm": 0.10123665630817413, "learning_rate": 1.6130107560116216e-05, "loss": 0.0076, "step": 250500 }, { "epoch": 7.743401125054089, "grad_norm": 0.14129087328910828, "learning_rate": 1.6129643938925637e-05, "loss": 0.0062, "step": 250530 }, { "epoch": 7.744328367435248, "grad_norm": 0.10606019198894501, "learning_rate": 1.612918031773506e-05, "loss": 0.0061, "step": 250560 }, { "epoch": 7.745255609816406, "grad_norm": 0.1207132488489151, "learning_rate": 1.6128716696544477e-05, "loss": 0.0071, "step": 250590 }, { "epoch": 7.746182852197564, "grad_norm": 0.1285819560289383, "learning_rate": 1.61282530753539e-05, "loss": 0.006, "step": 250620 }, { "epoch": 7.747110094578723, "grad_norm": 0.1716717630624771, "learning_rate": 1.612778945416332e-05, "loss": 0.0065, "step": 250650 }, { "epoch": 7.748037336959881, "grad_norm": 0.1200098767876625, "learning_rate": 1.612732583297274e-05, "loss": 0.0069, "step": 250680 }, { "epoch": 7.74896457934104, "grad_norm": 0.09214561432600021, "learning_rate": 1.6126862211782163e-05, "loss": 0.0068, "step": 250710 }, { "epoch": 7.749891821722198, "grad_norm": 0.09966088086366653, "learning_rate": 1.612639859059158e-05, "loss": 0.0066, "step": 250740 }, { "epoch": 7.750819064103356, "grad_norm": 0.09575513750314713, "learning_rate": 1.6125934969401003e-05, "loss": 0.0065, "step": 250770 }, { "epoch": 7.751746306484515, "grad_norm": 0.13359542191028595, "learning_rate": 1.6125471348210424e-05, "loss": 0.0066, "step": 250800 }, { "epoch": 7.7526735488656735, "grad_norm": 0.1166343167424202, "learning_rate": 1.6125007727019842e-05, "loss": 0.0067, "step": 250830 }, { "epoch": 7.753600791246832, "grad_norm": 0.1439131498336792, "learning_rate": 1.6124544105829264e-05, "loss": 0.0062, "step": 250860 }, { "epoch": 7.754528033627991, "grad_norm": 0.17571406066417694, "learning_rate": 1.6124080484638685e-05, "loss": 0.0063, "step": 250890 }, { "epoch": 7.755455276009148, "grad_norm": 0.18457230925559998, "learning_rate": 1.6123616863448107e-05, "loss": 0.0065, "step": 250920 }, { "epoch": 7.756382518390307, "grad_norm": 0.1275651901960373, "learning_rate": 1.6123153242257528e-05, "loss": 0.0067, "step": 250950 }, { "epoch": 7.757309760771466, "grad_norm": 0.1710156500339508, "learning_rate": 1.612268962106695e-05, "loss": 0.0066, "step": 250980 }, { "epoch": 7.758237003152624, "grad_norm": 0.08214729279279709, "learning_rate": 1.6122225999876368e-05, "loss": 0.0069, "step": 251010 }, { "epoch": 7.759164245533783, "grad_norm": 0.09320946037769318, "learning_rate": 1.612176237868579e-05, "loss": 0.0071, "step": 251040 }, { "epoch": 7.760091487914941, "grad_norm": 0.11557309329509735, "learning_rate": 1.612129875749521e-05, "loss": 0.0068, "step": 251070 }, { "epoch": 7.761018730296099, "grad_norm": 0.12047084420919418, "learning_rate": 1.6120835136304632e-05, "loss": 0.0063, "step": 251100 }, { "epoch": 7.761945972677258, "grad_norm": 0.1412891149520874, "learning_rate": 1.6120371515114054e-05, "loss": 0.0067, "step": 251130 }, { "epoch": 7.762873215058416, "grad_norm": 0.09138938039541245, "learning_rate": 1.6119907893923472e-05, "loss": 0.0069, "step": 251160 }, { "epoch": 7.763800457439575, "grad_norm": 0.12211722880601883, "learning_rate": 1.6119444272732893e-05, "loss": 0.0062, "step": 251190 }, { "epoch": 7.7647276998207335, "grad_norm": 0.11557751148939133, "learning_rate": 1.6118980651542315e-05, "loss": 0.0067, "step": 251220 }, { "epoch": 7.765654942201891, "grad_norm": 0.07530538737773895, "learning_rate": 1.6118517030351733e-05, "loss": 0.0062, "step": 251250 }, { "epoch": 7.76658218458305, "grad_norm": 0.07491336762905121, "learning_rate": 1.6118053409161155e-05, "loss": 0.0061, "step": 251280 }, { "epoch": 7.767509426964208, "grad_norm": 0.10299519449472427, "learning_rate": 1.611758978797058e-05, "loss": 0.0068, "step": 251310 }, { "epoch": 7.768436669345367, "grad_norm": 0.14479218423366547, "learning_rate": 1.6117126166779998e-05, "loss": 0.0065, "step": 251340 }, { "epoch": 7.769363911726526, "grad_norm": 0.16354641318321228, "learning_rate": 1.611666254558942e-05, "loss": 0.0065, "step": 251370 }, { "epoch": 7.770291154107683, "grad_norm": 0.08805253356695175, "learning_rate": 1.6116198924398837e-05, "loss": 0.0072, "step": 251400 }, { "epoch": 7.771218396488842, "grad_norm": 0.11156129091978073, "learning_rate": 1.611573530320826e-05, "loss": 0.0068, "step": 251430 }, { "epoch": 7.7721456388700005, "grad_norm": 0.10906113684177399, "learning_rate": 1.611527168201768e-05, "loss": 0.006, "step": 251460 }, { "epoch": 7.773072881251159, "grad_norm": 0.14726212620735168, "learning_rate": 1.6114808060827102e-05, "loss": 0.0067, "step": 251490 }, { "epoch": 7.774000123632318, "grad_norm": 0.09868704527616501, "learning_rate": 1.6114344439636523e-05, "loss": 0.0055, "step": 251520 }, { "epoch": 7.7749273660134754, "grad_norm": 0.16426199674606323, "learning_rate": 1.6113880818445945e-05, "loss": 0.0066, "step": 251550 }, { "epoch": 7.775854608394634, "grad_norm": 0.14343217015266418, "learning_rate": 1.6113417197255363e-05, "loss": 0.0063, "step": 251580 }, { "epoch": 7.776781850775793, "grad_norm": 0.11107524484395981, "learning_rate": 1.6112953576064784e-05, "loss": 0.006, "step": 251610 }, { "epoch": 7.777709093156951, "grad_norm": 0.1290958970785141, "learning_rate": 1.6112489954874206e-05, "loss": 0.0069, "step": 251640 }, { "epoch": 7.77863633553811, "grad_norm": 0.09185510128736496, "learning_rate": 1.6112026333683627e-05, "loss": 0.0064, "step": 251670 }, { "epoch": 7.7795635779192684, "grad_norm": 0.14548702538013458, "learning_rate": 1.611156271249305e-05, "loss": 0.0072, "step": 251700 }, { "epoch": 7.780490820300426, "grad_norm": 0.15366125106811523, "learning_rate": 1.6111099091302467e-05, "loss": 0.0068, "step": 251730 }, { "epoch": 7.781418062681585, "grad_norm": 0.13238167762756348, "learning_rate": 1.611063547011189e-05, "loss": 0.0067, "step": 251760 }, { "epoch": 7.782345305062743, "grad_norm": 0.13534830510616302, "learning_rate": 1.611017184892131e-05, "loss": 0.0066, "step": 251790 }, { "epoch": 7.783272547443902, "grad_norm": 0.13688886165618896, "learning_rate": 1.6109708227730728e-05, "loss": 0.0065, "step": 251820 }, { "epoch": 7.7841997898250606, "grad_norm": 0.12290609627962112, "learning_rate": 1.610924460654015e-05, "loss": 0.0069, "step": 251850 }, { "epoch": 7.785127032206219, "grad_norm": 0.14601080119609833, "learning_rate": 1.610878098534957e-05, "loss": 0.0066, "step": 251880 }, { "epoch": 7.786054274587377, "grad_norm": 0.14810122549533844, "learning_rate": 1.6108317364158993e-05, "loss": 0.0065, "step": 251910 }, { "epoch": 7.7869815169685355, "grad_norm": 0.09353742748498917, "learning_rate": 1.6107853742968414e-05, "loss": 0.0065, "step": 251940 }, { "epoch": 7.787908759349694, "grad_norm": 0.1358995884656906, "learning_rate": 1.6107390121777836e-05, "loss": 0.0063, "step": 251970 }, { "epoch": 7.788836001730853, "grad_norm": 0.08588557690382004, "learning_rate": 1.6106926500587254e-05, "loss": 0.0062, "step": 252000 }, { "epoch": 7.789763244112011, "grad_norm": 0.15087616443634033, "learning_rate": 1.6106462879396675e-05, "loss": 0.0063, "step": 252030 }, { "epoch": 7.790690486493169, "grad_norm": 0.13231447339057922, "learning_rate": 1.6105999258206097e-05, "loss": 0.0064, "step": 252060 }, { "epoch": 7.791617728874328, "grad_norm": 0.12969259917736053, "learning_rate": 1.6105535637015518e-05, "loss": 0.0066, "step": 252090 }, { "epoch": 7.792544971255486, "grad_norm": 0.09893635660409927, "learning_rate": 1.610507201582494e-05, "loss": 0.0071, "step": 252120 }, { "epoch": 7.793472213636645, "grad_norm": 0.09376908838748932, "learning_rate": 1.6104608394634358e-05, "loss": 0.0063, "step": 252150 }, { "epoch": 7.794399456017803, "grad_norm": 0.1208738386631012, "learning_rate": 1.610414477344378e-05, "loss": 0.0063, "step": 252180 }, { "epoch": 7.795326698398961, "grad_norm": 0.15285545587539673, "learning_rate": 1.61036811522532e-05, "loss": 0.0062, "step": 252210 }, { "epoch": 7.79625394078012, "grad_norm": 0.1086038127541542, "learning_rate": 1.610321753106262e-05, "loss": 0.0071, "step": 252240 }, { "epoch": 7.797181183161278, "grad_norm": 0.09359411895275116, "learning_rate": 1.610275390987204e-05, "loss": 0.0068, "step": 252270 }, { "epoch": 7.798108425542437, "grad_norm": 0.10521497577428818, "learning_rate": 1.6102290288681462e-05, "loss": 0.0064, "step": 252300 }, { "epoch": 7.7990356679235955, "grad_norm": 0.13277578353881836, "learning_rate": 1.6101826667490884e-05, "loss": 0.0071, "step": 252330 }, { "epoch": 7.799962910304753, "grad_norm": 0.09964949637651443, "learning_rate": 1.6101363046300305e-05, "loss": 0.0053, "step": 252360 }, { "epoch": 7.800890152685912, "grad_norm": 0.10044752806425095, "learning_rate": 1.6100899425109723e-05, "loss": 0.0064, "step": 252390 }, { "epoch": 7.80181739506707, "grad_norm": 0.10982845723628998, "learning_rate": 1.6100435803919145e-05, "loss": 0.0069, "step": 252420 }, { "epoch": 7.802744637448229, "grad_norm": 0.16885143518447876, "learning_rate": 1.6099972182728566e-05, "loss": 0.0063, "step": 252450 }, { "epoch": 7.803671879829388, "grad_norm": 0.06764963269233704, "learning_rate": 1.6099508561537988e-05, "loss": 0.0065, "step": 252480 }, { "epoch": 7.804599122210546, "grad_norm": 0.1293339729309082, "learning_rate": 1.609904494034741e-05, "loss": 0.0066, "step": 252510 }, { "epoch": 7.805526364591704, "grad_norm": 0.14502133429050446, "learning_rate": 1.609858131915683e-05, "loss": 0.0063, "step": 252540 }, { "epoch": 7.8064536069728625, "grad_norm": 0.09519924968481064, "learning_rate": 1.609811769796625e-05, "loss": 0.0069, "step": 252570 }, { "epoch": 7.807380849354021, "grad_norm": 0.12114094942808151, "learning_rate": 1.609765407677567e-05, "loss": 0.0059, "step": 252600 }, { "epoch": 7.80830809173518, "grad_norm": 0.12509644031524658, "learning_rate": 1.6097190455585092e-05, "loss": 0.0068, "step": 252630 }, { "epoch": 7.809235334116338, "grad_norm": 0.09483925253152847, "learning_rate": 1.609672683439451e-05, "loss": 0.0068, "step": 252660 }, { "epoch": 7.810162576497497, "grad_norm": 0.12439915537834167, "learning_rate": 1.609626321320393e-05, "loss": 0.0062, "step": 252690 }, { "epoch": 7.811089818878655, "grad_norm": 0.09384333342313766, "learning_rate": 1.6095799592013353e-05, "loss": 0.0067, "step": 252720 }, { "epoch": 7.812017061259813, "grad_norm": 0.10089088976383209, "learning_rate": 1.6095335970822774e-05, "loss": 0.0064, "step": 252750 }, { "epoch": 7.812944303640972, "grad_norm": 0.13931937515735626, "learning_rate": 1.6094872349632196e-05, "loss": 0.0062, "step": 252780 }, { "epoch": 7.81387154602213, "grad_norm": 0.13330437242984772, "learning_rate": 1.6094408728441614e-05, "loss": 0.0066, "step": 252810 }, { "epoch": 7.814798788403289, "grad_norm": 0.12258747220039368, "learning_rate": 1.6093945107251036e-05, "loss": 0.0066, "step": 252840 }, { "epoch": 7.815726030784447, "grad_norm": 0.1356317549943924, "learning_rate": 1.6093496940100144e-05, "loss": 0.0067, "step": 252870 }, { "epoch": 7.816653273165605, "grad_norm": 0.14142754673957825, "learning_rate": 1.6093033318909565e-05, "loss": 0.0073, "step": 252900 }, { "epoch": 7.817580515546764, "grad_norm": 0.10810063034296036, "learning_rate": 1.609258515175867e-05, "loss": 0.0062, "step": 252930 }, { "epoch": 7.8185077579279225, "grad_norm": 0.1230507642030716, "learning_rate": 1.609212153056809e-05, "loss": 0.0062, "step": 252960 }, { "epoch": 7.819435000309081, "grad_norm": 0.10638703405857086, "learning_rate": 1.6091657909377513e-05, "loss": 0.0066, "step": 252990 }, { "epoch": 7.820362242690239, "grad_norm": 0.09841697663068771, "learning_rate": 1.6091194288186934e-05, "loss": 0.0061, "step": 253020 }, { "epoch": 7.821289485071397, "grad_norm": 0.1561156064271927, "learning_rate": 1.6090730666996356e-05, "loss": 0.0066, "step": 253050 }, { "epoch": 7.822216727452556, "grad_norm": 0.09045660495758057, "learning_rate": 1.6090267045805774e-05, "loss": 0.007, "step": 253080 }, { "epoch": 7.823143969833715, "grad_norm": 0.16313940286636353, "learning_rate": 1.6089803424615195e-05, "loss": 0.0066, "step": 253110 }, { "epoch": 7.824071212214873, "grad_norm": 0.1274496167898178, "learning_rate": 1.6089339803424617e-05, "loss": 0.0065, "step": 253140 }, { "epoch": 7.824998454596031, "grad_norm": 0.16705018281936646, "learning_rate": 1.6088876182234038e-05, "loss": 0.0063, "step": 253170 }, { "epoch": 7.8259256969771895, "grad_norm": 0.10217129439115524, "learning_rate": 1.608841256104346e-05, "loss": 0.0061, "step": 253200 }, { "epoch": 7.826852939358348, "grad_norm": 0.1565684825181961, "learning_rate": 1.608794893985288e-05, "loss": 0.0068, "step": 253230 }, { "epoch": 7.827780181739507, "grad_norm": 0.14325223863124847, "learning_rate": 1.60874853186623e-05, "loss": 0.0069, "step": 253260 }, { "epoch": 7.828707424120665, "grad_norm": 0.11942372471094131, "learning_rate": 1.608702169747172e-05, "loss": 0.0061, "step": 253290 }, { "epoch": 7.829634666501824, "grad_norm": 0.13822242617607117, "learning_rate": 1.608655807628114e-05, "loss": 0.0065, "step": 253320 }, { "epoch": 7.830561908882982, "grad_norm": 0.08291695266962051, "learning_rate": 1.608609445509056e-05, "loss": 0.007, "step": 253350 }, { "epoch": 7.83148915126414, "grad_norm": 0.07500869035720825, "learning_rate": 1.6085630833899982e-05, "loss": 0.0061, "step": 253380 }, { "epoch": 7.832416393645299, "grad_norm": 0.1452002227306366, "learning_rate": 1.6085167212709403e-05, "loss": 0.0065, "step": 253410 }, { "epoch": 7.8333436360264574, "grad_norm": 0.09841704368591309, "learning_rate": 1.6084703591518825e-05, "loss": 0.0065, "step": 253440 }, { "epoch": 7.834270878407616, "grad_norm": 0.1181599423289299, "learning_rate": 1.6084239970328246e-05, "loss": 0.0067, "step": 253470 }, { "epoch": 7.835198120788775, "grad_norm": 0.10232418030500412, "learning_rate": 1.6083776349137665e-05, "loss": 0.0062, "step": 253500 }, { "epoch": 7.836125363169932, "grad_norm": 0.09909509867429733, "learning_rate": 1.6083312727947086e-05, "loss": 0.0067, "step": 253530 }, { "epoch": 7.837052605551091, "grad_norm": 0.07928662747144699, "learning_rate": 1.6082849106756508e-05, "loss": 0.0062, "step": 253560 }, { "epoch": 7.8379798479322496, "grad_norm": 0.16893087327480316, "learning_rate": 1.608238548556593e-05, "loss": 0.0063, "step": 253590 }, { "epoch": 7.838907090313408, "grad_norm": 0.12630055844783783, "learning_rate": 1.608192186437535e-05, "loss": 0.0069, "step": 253620 }, { "epoch": 7.839834332694567, "grad_norm": 0.09799101948738098, "learning_rate": 1.608145824318477e-05, "loss": 0.0066, "step": 253650 }, { "epoch": 7.8407615750757245, "grad_norm": 0.13932836055755615, "learning_rate": 1.608099462199419e-05, "loss": 0.0066, "step": 253680 }, { "epoch": 7.841688817456883, "grad_norm": 0.07970548421144485, "learning_rate": 1.6080531000803612e-05, "loss": 0.0062, "step": 253710 }, { "epoch": 7.842616059838042, "grad_norm": 0.12633149325847626, "learning_rate": 1.608006737961303e-05, "loss": 0.0067, "step": 253740 }, { "epoch": 7.8435433022192, "grad_norm": 0.08501357585191727, "learning_rate": 1.607960375842245e-05, "loss": 0.0072, "step": 253770 }, { "epoch": 7.844470544600359, "grad_norm": 0.11804655939340591, "learning_rate": 1.6079140137231873e-05, "loss": 0.0064, "step": 253800 }, { "epoch": 7.845397786981517, "grad_norm": 0.1066821739077568, "learning_rate": 1.6078676516041294e-05, "loss": 0.0066, "step": 253830 }, { "epoch": 7.846325029362675, "grad_norm": 0.05804669111967087, "learning_rate": 1.6078212894850716e-05, "loss": 0.0061, "step": 253860 }, { "epoch": 7.847252271743834, "grad_norm": 0.15805745124816895, "learning_rate": 1.6077749273660137e-05, "loss": 0.0069, "step": 253890 }, { "epoch": 7.848179514124992, "grad_norm": 0.10251742601394653, "learning_rate": 1.6077285652469556e-05, "loss": 0.007, "step": 253920 }, { "epoch": 7.849106756506151, "grad_norm": 0.15075089037418365, "learning_rate": 1.6076822031278977e-05, "loss": 0.0072, "step": 253950 }, { "epoch": 7.850033998887309, "grad_norm": 0.10829544067382812, "learning_rate": 1.60763584100884e-05, "loss": 0.0066, "step": 253980 }, { "epoch": 7.850961241268467, "grad_norm": 0.117493636906147, "learning_rate": 1.607589478889782e-05, "loss": 0.0067, "step": 254010 }, { "epoch": 7.851888483649626, "grad_norm": 0.10247272253036499, "learning_rate": 1.607543116770724e-05, "loss": 0.0066, "step": 254040 }, { "epoch": 7.8528157260307845, "grad_norm": 0.14634822309017181, "learning_rate": 1.607496754651666e-05, "loss": 0.0072, "step": 254070 }, { "epoch": 7.853742968411943, "grad_norm": 0.11318529397249222, "learning_rate": 1.607450392532608e-05, "loss": 0.007, "step": 254100 }, { "epoch": 7.854670210793102, "grad_norm": 0.13318008184432983, "learning_rate": 1.6074040304135503e-05, "loss": 0.007, "step": 254130 }, { "epoch": 7.855597453174259, "grad_norm": 0.1603381633758545, "learning_rate": 1.607357668294492e-05, "loss": 0.0067, "step": 254160 }, { "epoch": 7.856524695555418, "grad_norm": 0.12449946999549866, "learning_rate": 1.6073113061754346e-05, "loss": 0.0067, "step": 254190 }, { "epoch": 7.857451937936577, "grad_norm": 0.11887132376432419, "learning_rate": 1.6072649440563767e-05, "loss": 0.0068, "step": 254220 }, { "epoch": 7.858379180317735, "grad_norm": 0.1695040911436081, "learning_rate": 1.6072185819373185e-05, "loss": 0.0063, "step": 254250 }, { "epoch": 7.859306422698894, "grad_norm": 0.10598941892385483, "learning_rate": 1.6071722198182607e-05, "loss": 0.0069, "step": 254280 }, { "epoch": 7.860233665080052, "grad_norm": 0.14869363605976105, "learning_rate": 1.6071258576992025e-05, "loss": 0.007, "step": 254310 }, { "epoch": 7.86116090746121, "grad_norm": 0.1101418286561966, "learning_rate": 1.6070794955801446e-05, "loss": 0.0064, "step": 254340 }, { "epoch": 7.862088149842369, "grad_norm": 0.0945172980427742, "learning_rate": 1.6070331334610868e-05, "loss": 0.0067, "step": 254370 }, { "epoch": 7.863015392223527, "grad_norm": 0.14687375724315643, "learning_rate": 1.606986771342029e-05, "loss": 0.0067, "step": 254400 }, { "epoch": 7.863942634604686, "grad_norm": 0.1700812131166458, "learning_rate": 1.606940409222971e-05, "loss": 0.0066, "step": 254430 }, { "epoch": 7.8648698769858445, "grad_norm": 0.12619836628437042, "learning_rate": 1.6068940471039132e-05, "loss": 0.0066, "step": 254460 }, { "epoch": 7.865797119367002, "grad_norm": 0.1089802086353302, "learning_rate": 1.606847684984855e-05, "loss": 0.0061, "step": 254490 }, { "epoch": 7.866724361748161, "grad_norm": 0.1848813146352768, "learning_rate": 1.6068013228657972e-05, "loss": 0.0062, "step": 254520 }, { "epoch": 7.867651604129319, "grad_norm": 0.07902821898460388, "learning_rate": 1.6067549607467394e-05, "loss": 0.0066, "step": 254550 }, { "epoch": 7.868578846510478, "grad_norm": 0.17363488674163818, "learning_rate": 1.6067085986276815e-05, "loss": 0.0074, "step": 254580 }, { "epoch": 7.869506088891637, "grad_norm": 0.13589821755886078, "learning_rate": 1.6066622365086237e-05, "loss": 0.0067, "step": 254610 }, { "epoch": 7.870433331272794, "grad_norm": 0.11166832596063614, "learning_rate": 1.6066158743895655e-05, "loss": 0.0066, "step": 254640 }, { "epoch": 7.871360573653953, "grad_norm": 0.09398381412029266, "learning_rate": 1.6065695122705076e-05, "loss": 0.006, "step": 254670 }, { "epoch": 7.8722878160351115, "grad_norm": 0.09263265877962112, "learning_rate": 1.6065231501514498e-05, "loss": 0.0066, "step": 254700 }, { "epoch": 7.87321505841627, "grad_norm": 0.14547213912010193, "learning_rate": 1.6064767880323916e-05, "loss": 0.006, "step": 254730 }, { "epoch": 7.874142300797429, "grad_norm": 0.064608134329319, "learning_rate": 1.6064304259133337e-05, "loss": 0.0062, "step": 254760 }, { "epoch": 7.875069543178586, "grad_norm": 0.15898199379444122, "learning_rate": 1.606384063794276e-05, "loss": 0.0061, "step": 254790 }, { "epoch": 7.875996785559745, "grad_norm": 0.10286059975624084, "learning_rate": 1.606337701675218e-05, "loss": 0.0066, "step": 254820 }, { "epoch": 7.876924027940904, "grad_norm": 0.16914372146129608, "learning_rate": 1.6062913395561602e-05, "loss": 0.0068, "step": 254850 }, { "epoch": 7.877851270322062, "grad_norm": 0.1156095415353775, "learning_rate": 1.6062449774371023e-05, "loss": 0.0062, "step": 254880 }, { "epoch": 7.878778512703221, "grad_norm": 0.17387396097183228, "learning_rate": 1.606198615318044e-05, "loss": 0.0063, "step": 254910 }, { "epoch": 7.879705755084379, "grad_norm": 0.13439323008060455, "learning_rate": 1.6061522531989863e-05, "loss": 0.0068, "step": 254940 }, { "epoch": 7.880632997465537, "grad_norm": 0.0995272770524025, "learning_rate": 1.6061058910799285e-05, "loss": 0.0065, "step": 254970 }, { "epoch": 7.881560239846696, "grad_norm": 0.14563439786434174, "learning_rate": 1.6060595289608706e-05, "loss": 0.0071, "step": 255000 }, { "epoch": 7.882487482227854, "grad_norm": 0.14535589516162872, "learning_rate": 1.6060131668418128e-05, "loss": 0.0065, "step": 255030 }, { "epoch": 7.883414724609013, "grad_norm": 0.08480267226696014, "learning_rate": 1.6059668047227546e-05, "loss": 0.0068, "step": 255060 }, { "epoch": 7.8843419669901715, "grad_norm": 0.0943484678864479, "learning_rate": 1.6059204426036967e-05, "loss": 0.0069, "step": 255090 }, { "epoch": 7.88526920937133, "grad_norm": 0.12102094292640686, "learning_rate": 1.605874080484639e-05, "loss": 0.0065, "step": 255120 }, { "epoch": 7.886196451752488, "grad_norm": 0.1669941246509552, "learning_rate": 1.6058277183655807e-05, "loss": 0.0061, "step": 255150 }, { "epoch": 7.8871236941336464, "grad_norm": 0.15311899781227112, "learning_rate": 1.6057813562465228e-05, "loss": 0.0063, "step": 255180 }, { "epoch": 7.888050936514805, "grad_norm": 0.152015820145607, "learning_rate": 1.605734994127465e-05, "loss": 0.0069, "step": 255210 }, { "epoch": 7.888978178895964, "grad_norm": 0.06507182121276855, "learning_rate": 1.605688632008407e-05, "loss": 0.0071, "step": 255240 }, { "epoch": 7.889905421277122, "grad_norm": 0.13583904504776, "learning_rate": 1.6056422698893493e-05, "loss": 0.0065, "step": 255270 }, { "epoch": 7.89083266365828, "grad_norm": 0.09884151071310043, "learning_rate": 1.6055959077702914e-05, "loss": 0.0065, "step": 255300 }, { "epoch": 7.8917599060394386, "grad_norm": 0.11569132655858994, "learning_rate": 1.6055495456512332e-05, "loss": 0.0066, "step": 255330 }, { "epoch": 7.892687148420597, "grad_norm": 0.12729087471961975, "learning_rate": 1.6055031835321754e-05, "loss": 0.0063, "step": 255360 }, { "epoch": 7.893614390801756, "grad_norm": 0.08925287425518036, "learning_rate": 1.6054568214131175e-05, "loss": 0.0069, "step": 255390 }, { "epoch": 7.894541633182914, "grad_norm": 0.11521942168474197, "learning_rate": 1.6054104592940597e-05, "loss": 0.0069, "step": 255420 }, { "epoch": 7.895468875564072, "grad_norm": 0.10838160663843155, "learning_rate": 1.605364097175002e-05, "loss": 0.007, "step": 255450 }, { "epoch": 7.896396117945231, "grad_norm": 0.17669473588466644, "learning_rate": 1.6053177350559437e-05, "loss": 0.0064, "step": 255480 }, { "epoch": 7.897323360326389, "grad_norm": 0.12895162403583527, "learning_rate": 1.6052713729368858e-05, "loss": 0.0058, "step": 255510 }, { "epoch": 7.898250602707548, "grad_norm": 0.14151784777641296, "learning_rate": 1.605225010817828e-05, "loss": 0.0063, "step": 255540 }, { "epoch": 7.8991778450887065, "grad_norm": 0.13857939839363098, "learning_rate": 1.6051786486987698e-05, "loss": 0.0069, "step": 255570 }, { "epoch": 7.900105087469864, "grad_norm": 0.11692880094051361, "learning_rate": 1.6051322865797123e-05, "loss": 0.0065, "step": 255600 }, { "epoch": 7.901032329851023, "grad_norm": 0.14398889243602753, "learning_rate": 1.6050859244606544e-05, "loss": 0.0061, "step": 255630 }, { "epoch": 7.901959572232181, "grad_norm": 0.11040893942117691, "learning_rate": 1.6050395623415962e-05, "loss": 0.0072, "step": 255660 }, { "epoch": 7.90288681461334, "grad_norm": 0.10717212408781052, "learning_rate": 1.6049932002225384e-05, "loss": 0.0073, "step": 255690 }, { "epoch": 7.903814056994499, "grad_norm": 0.13103201985359192, "learning_rate": 1.6049468381034802e-05, "loss": 0.0061, "step": 255720 }, { "epoch": 7.904741299375657, "grad_norm": 0.08829069137573242, "learning_rate": 1.6049004759844223e-05, "loss": 0.0061, "step": 255750 }, { "epoch": 7.905668541756815, "grad_norm": 0.13113680481910706, "learning_rate": 1.6048541138653645e-05, "loss": 0.0068, "step": 255780 }, { "epoch": 7.9065957841379735, "grad_norm": 0.11287353932857513, "learning_rate": 1.6048077517463066e-05, "loss": 0.0064, "step": 255810 }, { "epoch": 7.907523026519132, "grad_norm": 0.08914849907159805, "learning_rate": 1.6047613896272488e-05, "loss": 0.0064, "step": 255840 }, { "epoch": 7.908450268900291, "grad_norm": 0.10226843506097794, "learning_rate": 1.604715027508191e-05, "loss": 0.0071, "step": 255870 }, { "epoch": 7.909377511281449, "grad_norm": 0.16094191372394562, "learning_rate": 1.6046686653891327e-05, "loss": 0.0075, "step": 255900 }, { "epoch": 7.910304753662608, "grad_norm": 0.18223795294761658, "learning_rate": 1.604622303270075e-05, "loss": 0.0068, "step": 255930 }, { "epoch": 7.911231996043766, "grad_norm": 0.1786722093820572, "learning_rate": 1.604575941151017e-05, "loss": 0.0064, "step": 255960 }, { "epoch": 7.912159238424924, "grad_norm": 0.08906826376914978, "learning_rate": 1.6045295790319592e-05, "loss": 0.0057, "step": 255990 }, { "epoch": 7.913086480806083, "grad_norm": 0.1551407277584076, "learning_rate": 1.6044832169129014e-05, "loss": 0.006, "step": 256020 }, { "epoch": 7.914013723187241, "grad_norm": 0.130243182182312, "learning_rate": 1.604436854793843e-05, "loss": 0.0068, "step": 256050 }, { "epoch": 7.9149409655684, "grad_norm": 0.0833439826965332, "learning_rate": 1.6043904926747853e-05, "loss": 0.0066, "step": 256080 }, { "epoch": 7.915868207949558, "grad_norm": 0.10337832570075989, "learning_rate": 1.6043441305557275e-05, "loss": 0.0069, "step": 256110 }, { "epoch": 7.916795450330716, "grad_norm": 0.06313792616128922, "learning_rate": 1.6042977684366693e-05, "loss": 0.0065, "step": 256140 }, { "epoch": 7.917722692711875, "grad_norm": 0.11583340913057327, "learning_rate": 1.6042514063176114e-05, "loss": 0.0066, "step": 256170 }, { "epoch": 7.9186499350930335, "grad_norm": 0.10944069921970367, "learning_rate": 1.6042050441985536e-05, "loss": 0.0059, "step": 256200 }, { "epoch": 7.919577177474192, "grad_norm": 0.047870032489299774, "learning_rate": 1.6041586820794957e-05, "loss": 0.0063, "step": 256230 }, { "epoch": 7.92050441985535, "grad_norm": 0.08332972228527069, "learning_rate": 1.604112319960438e-05, "loss": 0.006, "step": 256260 }, { "epoch": 7.921431662236508, "grad_norm": 0.16023316979408264, "learning_rate": 1.60406595784138e-05, "loss": 0.0063, "step": 256290 }, { "epoch": 7.922358904617667, "grad_norm": 0.13495449721813202, "learning_rate": 1.604019595722322e-05, "loss": 0.0061, "step": 256320 }, { "epoch": 7.923286146998826, "grad_norm": 0.0612267330288887, "learning_rate": 1.603973233603264e-05, "loss": 0.007, "step": 256350 }, { "epoch": 7.924213389379984, "grad_norm": 0.12537547945976257, "learning_rate": 1.603926871484206e-05, "loss": 0.0062, "step": 256380 }, { "epoch": 7.925140631761142, "grad_norm": 0.11543365567922592, "learning_rate": 1.6038805093651483e-05, "loss": 0.0072, "step": 256410 }, { "epoch": 7.9260678741423005, "grad_norm": 0.12920090556144714, "learning_rate": 1.6038341472460904e-05, "loss": 0.007, "step": 256440 }, { "epoch": 7.926995116523459, "grad_norm": 0.1292908638715744, "learning_rate": 1.6037877851270323e-05, "loss": 0.0062, "step": 256470 }, { "epoch": 7.927922358904618, "grad_norm": 0.1081785038113594, "learning_rate": 1.6037414230079744e-05, "loss": 0.0061, "step": 256500 }, { "epoch": 7.928849601285776, "grad_norm": 0.0874052420258522, "learning_rate": 1.6036950608889166e-05, "loss": 0.0061, "step": 256530 }, { "epoch": 7.929776843666935, "grad_norm": 0.19560444355010986, "learning_rate": 1.6036486987698584e-05, "loss": 0.0065, "step": 256560 }, { "epoch": 7.930704086048093, "grad_norm": 0.1324206292629242, "learning_rate": 1.6036023366508005e-05, "loss": 0.006, "step": 256590 }, { "epoch": 7.931631328429251, "grad_norm": 0.12532556056976318, "learning_rate": 1.6035559745317427e-05, "loss": 0.0066, "step": 256620 }, { "epoch": 7.93255857081041, "grad_norm": 0.08333567529916763, "learning_rate": 1.6035096124126848e-05, "loss": 0.007, "step": 256650 }, { "epoch": 7.933485813191568, "grad_norm": 0.09252263605594635, "learning_rate": 1.603463250293627e-05, "loss": 0.0061, "step": 256680 }, { "epoch": 7.934413055572727, "grad_norm": 0.09658551961183548, "learning_rate": 1.6034168881745688e-05, "loss": 0.0074, "step": 256710 }, { "epoch": 7.935340297953886, "grad_norm": 0.08895634114742279, "learning_rate": 1.603370526055511e-05, "loss": 0.0064, "step": 256740 }, { "epoch": 7.936267540335043, "grad_norm": 0.09382734447717667, "learning_rate": 1.603324163936453e-05, "loss": 0.0065, "step": 256770 }, { "epoch": 7.937194782716202, "grad_norm": 0.10104874521493912, "learning_rate": 1.6032778018173952e-05, "loss": 0.0063, "step": 256800 }, { "epoch": 7.9381220250973605, "grad_norm": 0.06229988858103752, "learning_rate": 1.6032314396983374e-05, "loss": 0.0058, "step": 256830 }, { "epoch": 7.939049267478519, "grad_norm": 0.09765923768281937, "learning_rate": 1.6031850775792795e-05, "loss": 0.0065, "step": 256860 }, { "epoch": 7.939976509859678, "grad_norm": 0.12445090711116791, "learning_rate": 1.6031387154602213e-05, "loss": 0.0062, "step": 256890 }, { "epoch": 7.9409037522408354, "grad_norm": 0.1491481512784958, "learning_rate": 1.6030923533411635e-05, "loss": 0.0074, "step": 256920 }, { "epoch": 7.941830994621994, "grad_norm": 0.09729032218456268, "learning_rate": 1.6030459912221056e-05, "loss": 0.0066, "step": 256950 }, { "epoch": 7.942758237003153, "grad_norm": 0.12684084475040436, "learning_rate": 1.6029996291030475e-05, "loss": 0.0061, "step": 256980 }, { "epoch": 7.943685479384311, "grad_norm": 0.11726552993059158, "learning_rate": 1.60295326698399e-05, "loss": 0.0067, "step": 257010 }, { "epoch": 7.94461272176547, "grad_norm": 0.1211608424782753, "learning_rate": 1.6029069048649318e-05, "loss": 0.006, "step": 257040 }, { "epoch": 7.9455399641466276, "grad_norm": 0.09537781029939651, "learning_rate": 1.602860542745874e-05, "loss": 0.0061, "step": 257070 }, { "epoch": 7.946467206527786, "grad_norm": 0.07178912311792374, "learning_rate": 1.602814180626816e-05, "loss": 0.0062, "step": 257100 }, { "epoch": 7.947394448908945, "grad_norm": 0.0739271491765976, "learning_rate": 1.602767818507758e-05, "loss": 0.0066, "step": 257130 }, { "epoch": 7.948321691290103, "grad_norm": 0.08049822598695755, "learning_rate": 1.6027214563887e-05, "loss": 0.0069, "step": 257160 }, { "epoch": 7.949248933671262, "grad_norm": 0.12657493352890015, "learning_rate": 1.6026750942696422e-05, "loss": 0.0066, "step": 257190 }, { "epoch": 7.95017617605242, "grad_norm": 0.13867077231407166, "learning_rate": 1.6026287321505843e-05, "loss": 0.0067, "step": 257220 }, { "epoch": 7.951103418433578, "grad_norm": 0.11567935347557068, "learning_rate": 1.6025823700315265e-05, "loss": 0.0071, "step": 257250 }, { "epoch": 7.952030660814737, "grad_norm": 0.12646470963954926, "learning_rate": 1.6025360079124686e-05, "loss": 0.007, "step": 257280 }, { "epoch": 7.9529579031958955, "grad_norm": 0.10020571202039719, "learning_rate": 1.6024896457934104e-05, "loss": 0.0073, "step": 257310 }, { "epoch": 7.953885145577054, "grad_norm": 0.13764716684818268, "learning_rate": 1.6024432836743526e-05, "loss": 0.0066, "step": 257340 }, { "epoch": 7.954812387958213, "grad_norm": 0.14870992302894592, "learning_rate": 1.6023969215552947e-05, "loss": 0.0064, "step": 257370 }, { "epoch": 7.95573963033937, "grad_norm": 0.13970419764518738, "learning_rate": 1.602350559436237e-05, "loss": 0.0062, "step": 257400 }, { "epoch": 7.956666872720529, "grad_norm": 0.1156754344701767, "learning_rate": 1.602304197317179e-05, "loss": 0.0062, "step": 257430 }, { "epoch": 7.957594115101688, "grad_norm": 0.13202358782291412, "learning_rate": 1.602257835198121e-05, "loss": 0.0065, "step": 257460 }, { "epoch": 7.958521357482846, "grad_norm": 0.13126736879348755, "learning_rate": 1.602211473079063e-05, "loss": 0.0061, "step": 257490 }, { "epoch": 7.959448599864005, "grad_norm": 0.15537859499454498, "learning_rate": 1.602165110960005e-05, "loss": 0.0062, "step": 257520 }, { "epoch": 7.960375842245163, "grad_norm": 0.106498584151268, "learning_rate": 1.602118748840947e-05, "loss": 0.0067, "step": 257550 }, { "epoch": 7.961303084626321, "grad_norm": 0.09294113516807556, "learning_rate": 1.602072386721889e-05, "loss": 0.006, "step": 257580 }, { "epoch": 7.96223032700748, "grad_norm": 0.11123965680599213, "learning_rate": 1.6020260246028313e-05, "loss": 0.0063, "step": 257610 }, { "epoch": 7.963157569388638, "grad_norm": 0.08956565707921982, "learning_rate": 1.6019796624837734e-05, "loss": 0.0065, "step": 257640 }, { "epoch": 7.964084811769797, "grad_norm": 0.1316249966621399, "learning_rate": 1.6019333003647156e-05, "loss": 0.0072, "step": 257670 }, { "epoch": 7.9650120541509555, "grad_norm": 0.07909387350082397, "learning_rate": 1.6018869382456574e-05, "loss": 0.0056, "step": 257700 }, { "epoch": 7.965939296532113, "grad_norm": 0.08765719830989838, "learning_rate": 1.6018405761265995e-05, "loss": 0.0063, "step": 257730 }, { "epoch": 7.966866538913272, "grad_norm": 0.12010404467582703, "learning_rate": 1.6017942140075417e-05, "loss": 0.0064, "step": 257760 }, { "epoch": 7.96779378129443, "grad_norm": 0.15097776055335999, "learning_rate": 1.601747851888484e-05, "loss": 0.0065, "step": 257790 }, { "epoch": 7.968721023675589, "grad_norm": 0.09084730595350266, "learning_rate": 1.601701489769426e-05, "loss": 0.0066, "step": 257820 }, { "epoch": 7.969648266056748, "grad_norm": 0.15199759602546692, "learning_rate": 1.601655127650368e-05, "loss": 0.0056, "step": 257850 }, { "epoch": 7.970575508437905, "grad_norm": 0.08487532287836075, "learning_rate": 1.60160876553131e-05, "loss": 0.0065, "step": 257880 }, { "epoch": 7.971502750819064, "grad_norm": 0.12393496185541153, "learning_rate": 1.601562403412252e-05, "loss": 0.0061, "step": 257910 }, { "epoch": 7.9724299932002225, "grad_norm": 0.14977386593818665, "learning_rate": 1.6015160412931942e-05, "loss": 0.007, "step": 257940 }, { "epoch": 7.973357235581381, "grad_norm": 0.10833677649497986, "learning_rate": 1.601469679174136e-05, "loss": 0.0063, "step": 257970 }, { "epoch": 7.97428447796254, "grad_norm": 0.14221587777137756, "learning_rate": 1.6014233170550782e-05, "loss": 0.0061, "step": 258000 }, { "epoch": 7.975211720343697, "grad_norm": 0.10576646029949188, "learning_rate": 1.6013769549360204e-05, "loss": 0.0066, "step": 258030 }, { "epoch": 7.976138962724856, "grad_norm": 0.14826253056526184, "learning_rate": 1.6013305928169625e-05, "loss": 0.0065, "step": 258060 }, { "epoch": 7.977066205106015, "grad_norm": 0.09236666560173035, "learning_rate": 1.6012842306979047e-05, "loss": 0.0064, "step": 258090 }, { "epoch": 7.977993447487173, "grad_norm": 0.12484254688024521, "learning_rate": 1.6012378685788465e-05, "loss": 0.0063, "step": 258120 }, { "epoch": 7.978920689868332, "grad_norm": 0.11996736377477646, "learning_rate": 1.6011915064597886e-05, "loss": 0.0067, "step": 258150 }, { "epoch": 7.9798479322494895, "grad_norm": 0.1126125305891037, "learning_rate": 1.6011451443407308e-05, "loss": 0.0064, "step": 258180 }, { "epoch": 7.980775174630648, "grad_norm": 0.1124480590224266, "learning_rate": 1.601098782221673e-05, "loss": 0.0067, "step": 258210 }, { "epoch": 7.981702417011807, "grad_norm": 0.0865790843963623, "learning_rate": 1.601052420102615e-05, "loss": 0.0066, "step": 258240 }, { "epoch": 7.982629659392965, "grad_norm": 0.10333722829818726, "learning_rate": 1.6010060579835572e-05, "loss": 0.0066, "step": 258270 }, { "epoch": 7.983556901774124, "grad_norm": 0.10221466422080994, "learning_rate": 1.600959695864499e-05, "loss": 0.0068, "step": 258300 }, { "epoch": 7.9844841441552825, "grad_norm": 0.1339605301618576, "learning_rate": 1.6009133337454412e-05, "loss": 0.0069, "step": 258330 }, { "epoch": 7.985411386536441, "grad_norm": 0.13134342432022095, "learning_rate": 1.600866971626383e-05, "loss": 0.0071, "step": 258360 }, { "epoch": 7.986338628917599, "grad_norm": 0.20176956057548523, "learning_rate": 1.6008221549112938e-05, "loss": 0.0065, "step": 258390 }, { "epoch": 7.987265871298757, "grad_norm": 0.12835431098937988, "learning_rate": 1.600775792792236e-05, "loss": 0.007, "step": 258420 }, { "epoch": 7.988193113679916, "grad_norm": 0.11092667281627655, "learning_rate": 1.600729430673178e-05, "loss": 0.006, "step": 258450 }, { "epoch": 7.989120356061075, "grad_norm": 0.07904869318008423, "learning_rate": 1.6006830685541202e-05, "loss": 0.0072, "step": 258480 }, { "epoch": 7.990047598442233, "grad_norm": 0.1306617558002472, "learning_rate": 1.6006367064350624e-05, "loss": 0.0063, "step": 258510 }, { "epoch": 7.990974840823391, "grad_norm": 0.08622704446315765, "learning_rate": 1.6005903443160042e-05, "loss": 0.0071, "step": 258540 }, { "epoch": 7.9919020832045495, "grad_norm": 0.12570269405841827, "learning_rate": 1.6005439821969464e-05, "loss": 0.0063, "step": 258570 }, { "epoch": 7.992829325585708, "grad_norm": 0.1439700573682785, "learning_rate": 1.6004976200778885e-05, "loss": 0.0063, "step": 258600 }, { "epoch": 7.993756567966867, "grad_norm": 0.19277624785900116, "learning_rate": 1.6004512579588307e-05, "loss": 0.0066, "step": 258630 }, { "epoch": 7.994683810348025, "grad_norm": 0.06870109587907791, "learning_rate": 1.6004048958397728e-05, "loss": 0.0065, "step": 258660 }, { "epoch": 7.995611052729183, "grad_norm": 0.18028171360492706, "learning_rate": 1.600358533720715e-05, "loss": 0.0069, "step": 258690 }, { "epoch": 7.996538295110342, "grad_norm": 0.11602684110403061, "learning_rate": 1.6003121716016568e-05, "loss": 0.0064, "step": 258720 }, { "epoch": 7.9974655374915, "grad_norm": 0.11869219690561295, "learning_rate": 1.600265809482599e-05, "loss": 0.0072, "step": 258750 }, { "epoch": 7.998392779872659, "grad_norm": 0.09894774109125137, "learning_rate": 1.600219447363541e-05, "loss": 0.0066, "step": 258780 }, { "epoch": 7.999320022253817, "grad_norm": 0.15984956920146942, "learning_rate": 1.600173085244483e-05, "loss": 0.0062, "step": 258810 }, { "epoch": 8.000247264634975, "grad_norm": 0.14436835050582886, "learning_rate": 1.600126723125425e-05, "loss": 0.007, "step": 258840 }, { "epoch": 8.001174507016135, "grad_norm": 0.06939083337783813, "learning_rate": 1.6000803610063672e-05, "loss": 0.0066, "step": 258870 }, { "epoch": 8.002101749397292, "grad_norm": 0.12926743924617767, "learning_rate": 1.6000339988873093e-05, "loss": 0.0063, "step": 258900 }, { "epoch": 8.00302899177845, "grad_norm": 0.1347460150718689, "learning_rate": 1.5999876367682515e-05, "loss": 0.0068, "step": 258930 }, { "epoch": 8.00395623415961, "grad_norm": 0.1501941680908203, "learning_rate": 1.5999412746491933e-05, "loss": 0.0067, "step": 258960 }, { "epoch": 8.004883476540767, "grad_norm": 0.10081412643194199, "learning_rate": 1.5998949125301354e-05, "loss": 0.0069, "step": 258990 }, { "epoch": 8.005810718921927, "grad_norm": 0.11145273596048355, "learning_rate": 1.5998485504110776e-05, "loss": 0.0066, "step": 259020 }, { "epoch": 8.006737961303084, "grad_norm": 0.1194317415356636, "learning_rate": 1.5998021882920197e-05, "loss": 0.0064, "step": 259050 }, { "epoch": 8.007665203684244, "grad_norm": 0.1560990959405899, "learning_rate": 1.599755826172962e-05, "loss": 0.0071, "step": 259080 }, { "epoch": 8.008592446065402, "grad_norm": 0.11043158918619156, "learning_rate": 1.599709464053904e-05, "loss": 0.0068, "step": 259110 }, { "epoch": 8.00951968844656, "grad_norm": 0.13531287014484406, "learning_rate": 1.599663101934846e-05, "loss": 0.0057, "step": 259140 }, { "epoch": 8.010446930827719, "grad_norm": 0.13598352670669556, "learning_rate": 1.599616739815788e-05, "loss": 0.0067, "step": 259170 }, { "epoch": 8.011374173208877, "grad_norm": 0.08506104350090027, "learning_rate": 1.5995703776967298e-05, "loss": 0.0064, "step": 259200 }, { "epoch": 8.012301415590036, "grad_norm": 0.15599092841148376, "learning_rate": 1.599524015577672e-05, "loss": 0.0063, "step": 259230 }, { "epoch": 8.013228657971194, "grad_norm": 0.13324810564517975, "learning_rate": 1.599477653458614e-05, "loss": 0.0062, "step": 259260 }, { "epoch": 8.014155900352351, "grad_norm": 0.09311795979738235, "learning_rate": 1.5994312913395563e-05, "loss": 0.0059, "step": 259290 }, { "epoch": 8.015083142733511, "grad_norm": 0.1768573671579361, "learning_rate": 1.5993849292204984e-05, "loss": 0.006, "step": 259320 }, { "epoch": 8.016010385114669, "grad_norm": 0.1539720892906189, "learning_rate": 1.5993385671014406e-05, "loss": 0.0065, "step": 259350 }, { "epoch": 8.016937627495828, "grad_norm": 0.11312985420227051, "learning_rate": 1.5992922049823824e-05, "loss": 0.0069, "step": 259380 }, { "epoch": 8.017864869876986, "grad_norm": 0.12452046573162079, "learning_rate": 1.5992458428633245e-05, "loss": 0.0059, "step": 259410 }, { "epoch": 8.018792112258144, "grad_norm": 0.10352639108896255, "learning_rate": 1.5991994807442667e-05, "loss": 0.0072, "step": 259440 }, { "epoch": 8.019719354639303, "grad_norm": 0.07284451276063919, "learning_rate": 1.599153118625209e-05, "loss": 0.006, "step": 259470 }, { "epoch": 8.02064659702046, "grad_norm": 0.14255958795547485, "learning_rate": 1.599106756506151e-05, "loss": 0.0059, "step": 259500 }, { "epoch": 8.02157383940162, "grad_norm": 0.09251603484153748, "learning_rate": 1.5990603943870928e-05, "loss": 0.0067, "step": 259530 }, { "epoch": 8.022501081782778, "grad_norm": 0.09310510754585266, "learning_rate": 1.599014032268035e-05, "loss": 0.0062, "step": 259560 }, { "epoch": 8.023428324163936, "grad_norm": 0.1295253485441208, "learning_rate": 1.598967670148977e-05, "loss": 0.006, "step": 259590 }, { "epoch": 8.024355566545095, "grad_norm": 0.1344931721687317, "learning_rate": 1.598921308029919e-05, "loss": 0.0064, "step": 259620 }, { "epoch": 8.025282808926253, "grad_norm": 0.09966069459915161, "learning_rate": 1.598874945910861e-05, "loss": 0.0064, "step": 259650 }, { "epoch": 8.026210051307412, "grad_norm": 0.09050475060939789, "learning_rate": 1.5988285837918036e-05, "loss": 0.0064, "step": 259680 }, { "epoch": 8.02713729368857, "grad_norm": 0.10712116956710815, "learning_rate": 1.5987822216727454e-05, "loss": 0.0062, "step": 259710 }, { "epoch": 8.028064536069728, "grad_norm": 0.0917188972234726, "learning_rate": 1.5987358595536875e-05, "loss": 0.0063, "step": 259740 }, { "epoch": 8.028991778450887, "grad_norm": 0.10027489811182022, "learning_rate": 1.5986894974346297e-05, "loss": 0.0066, "step": 259770 }, { "epoch": 8.029919020832045, "grad_norm": 0.050986453890800476, "learning_rate": 1.5986431353155715e-05, "loss": 0.0055, "step": 259800 }, { "epoch": 8.030846263213204, "grad_norm": 0.09183148294687271, "learning_rate": 1.5985967731965136e-05, "loss": 0.0065, "step": 259830 }, { "epoch": 8.031773505594362, "grad_norm": 0.10573738068342209, "learning_rate": 1.5985504110774558e-05, "loss": 0.0062, "step": 259860 }, { "epoch": 8.03270074797552, "grad_norm": 0.1414700448513031, "learning_rate": 1.598504048958398e-05, "loss": 0.0063, "step": 259890 }, { "epoch": 8.03362799035668, "grad_norm": 0.12663115561008453, "learning_rate": 1.59845768683934e-05, "loss": 0.0066, "step": 259920 }, { "epoch": 8.034555232737837, "grad_norm": 0.16688813269138336, "learning_rate": 1.598411324720282e-05, "loss": 0.0066, "step": 259950 }, { "epoch": 8.035482475118997, "grad_norm": 0.15320375561714172, "learning_rate": 1.598364962601224e-05, "loss": 0.0066, "step": 259980 }, { "epoch": 8.036409717500154, "grad_norm": 0.12409599870443344, "learning_rate": 1.5983186004821662e-05, "loss": 0.0061, "step": 260010 }, { "epoch": 8.037336959881314, "grad_norm": 0.12467770278453827, "learning_rate": 1.5982722383631083e-05, "loss": 0.0064, "step": 260040 }, { "epoch": 8.038264202262472, "grad_norm": 0.11776434630155563, "learning_rate": 1.5982258762440505e-05, "loss": 0.0063, "step": 260070 }, { "epoch": 8.03919144464363, "grad_norm": 0.12491792440414429, "learning_rate": 1.5981795141249926e-05, "loss": 0.0067, "step": 260100 }, { "epoch": 8.040118687024789, "grad_norm": 0.12122653424739838, "learning_rate": 1.5981331520059345e-05, "loss": 0.0059, "step": 260130 }, { "epoch": 8.041045929405946, "grad_norm": 0.1175227090716362, "learning_rate": 1.5980867898868766e-05, "loss": 0.0062, "step": 260160 }, { "epoch": 8.041973171787106, "grad_norm": 0.09937304258346558, "learning_rate": 1.5980404277678184e-05, "loss": 0.006, "step": 260190 }, { "epoch": 8.042900414168264, "grad_norm": 0.1402505338191986, "learning_rate": 1.5979940656487606e-05, "loss": 0.0066, "step": 260220 }, { "epoch": 8.043827656549421, "grad_norm": 0.09570645540952682, "learning_rate": 1.5979477035297027e-05, "loss": 0.0064, "step": 260250 }, { "epoch": 8.04475489893058, "grad_norm": 0.1286238431930542, "learning_rate": 1.597901341410645e-05, "loss": 0.006, "step": 260280 }, { "epoch": 8.045682141311739, "grad_norm": 0.12236706912517548, "learning_rate": 1.597854979291587e-05, "loss": 0.006, "step": 260310 }, { "epoch": 8.046609383692898, "grad_norm": 0.09904760122299194, "learning_rate": 1.5978086171725292e-05, "loss": 0.0055, "step": 260340 }, { "epoch": 8.047536626074056, "grad_norm": 0.08561387658119202, "learning_rate": 1.597762255053471e-05, "loss": 0.0067, "step": 260370 }, { "epoch": 8.048463868455213, "grad_norm": 0.07848475128412247, "learning_rate": 1.597715892934413e-05, "loss": 0.0064, "step": 260400 }, { "epoch": 8.049391110836373, "grad_norm": 0.13280360400676727, "learning_rate": 1.597671076219324e-05, "loss": 0.0065, "step": 260430 }, { "epoch": 8.05031835321753, "grad_norm": 0.07887663692235947, "learning_rate": 1.5976247141002657e-05, "loss": 0.0067, "step": 260460 }, { "epoch": 8.05124559559869, "grad_norm": 0.1437966674566269, "learning_rate": 1.597578351981208e-05, "loss": 0.0065, "step": 260490 }, { "epoch": 8.052172837979848, "grad_norm": 0.06771697849035263, "learning_rate": 1.59753198986215e-05, "loss": 0.0067, "step": 260520 }, { "epoch": 8.053100080361006, "grad_norm": 0.07154274731874466, "learning_rate": 1.5974856277430922e-05, "loss": 0.0055, "step": 260550 }, { "epoch": 8.054027322742165, "grad_norm": 0.16815757751464844, "learning_rate": 1.5974392656240343e-05, "loss": 0.0064, "step": 260580 }, { "epoch": 8.054954565123323, "grad_norm": 0.0853024274110794, "learning_rate": 1.597392903504976e-05, "loss": 0.0058, "step": 260610 }, { "epoch": 8.055881807504482, "grad_norm": 0.11568588018417358, "learning_rate": 1.5973465413859183e-05, "loss": 0.0066, "step": 260640 }, { "epoch": 8.05680904988564, "grad_norm": 0.08826812356710434, "learning_rate": 1.5973001792668605e-05, "loss": 0.0063, "step": 260670 }, { "epoch": 8.057736292266798, "grad_norm": 0.13427463173866272, "learning_rate": 1.5972538171478026e-05, "loss": 0.0058, "step": 260700 }, { "epoch": 8.058663534647957, "grad_norm": 0.1235925555229187, "learning_rate": 1.5972074550287448e-05, "loss": 0.0066, "step": 260730 }, { "epoch": 8.059590777029115, "grad_norm": 0.12503129243850708, "learning_rate": 1.597161092909687e-05, "loss": 0.006, "step": 260760 }, { "epoch": 8.060518019410274, "grad_norm": 0.10361365228891373, "learning_rate": 1.5971147307906287e-05, "loss": 0.006, "step": 260790 }, { "epoch": 8.061445261791432, "grad_norm": 0.1382398009300232, "learning_rate": 1.597068368671571e-05, "loss": 0.0063, "step": 260820 }, { "epoch": 8.062372504172592, "grad_norm": 0.184842050075531, "learning_rate": 1.597022006552513e-05, "loss": 0.0059, "step": 260850 }, { "epoch": 8.06329974655375, "grad_norm": 0.15871727466583252, "learning_rate": 1.596975644433455e-05, "loss": 0.0067, "step": 260880 }, { "epoch": 8.064226988934907, "grad_norm": 0.12138864398002625, "learning_rate": 1.596929282314397e-05, "loss": 0.0061, "step": 260910 }, { "epoch": 8.065154231316066, "grad_norm": 0.11050425469875336, "learning_rate": 1.596882920195339e-05, "loss": 0.0068, "step": 260940 }, { "epoch": 8.066081473697224, "grad_norm": 0.1341008096933365, "learning_rate": 1.5968365580762813e-05, "loss": 0.007, "step": 260970 }, { "epoch": 8.067008716078384, "grad_norm": 0.1489008665084839, "learning_rate": 1.5967901959572234e-05, "loss": 0.0065, "step": 261000 }, { "epoch": 8.067935958459541, "grad_norm": 0.0879857987165451, "learning_rate": 1.5967438338381652e-05, "loss": 0.0068, "step": 261030 }, { "epoch": 8.068863200840699, "grad_norm": 0.08951449394226074, "learning_rate": 1.5966974717191074e-05, "loss": 0.0068, "step": 261060 }, { "epoch": 8.069790443221859, "grad_norm": 0.07664874941110611, "learning_rate": 1.5966511096000495e-05, "loss": 0.0057, "step": 261090 }, { "epoch": 8.070717685603016, "grad_norm": 0.12240339070558548, "learning_rate": 1.5966047474809917e-05, "loss": 0.0068, "step": 261120 }, { "epoch": 8.071644927984176, "grad_norm": 0.1524536907672882, "learning_rate": 1.596558385361934e-05, "loss": 0.0067, "step": 261150 }, { "epoch": 8.072572170365333, "grad_norm": 0.09385211020708084, "learning_rate": 1.596512023242876e-05, "loss": 0.0064, "step": 261180 }, { "epoch": 8.073499412746491, "grad_norm": 0.08499732613563538, "learning_rate": 1.5964656611238178e-05, "loss": 0.006, "step": 261210 }, { "epoch": 8.07442665512765, "grad_norm": 0.16093285381793976, "learning_rate": 1.59641929900476e-05, "loss": 0.0069, "step": 261240 }, { "epoch": 8.075353897508808, "grad_norm": 0.12974955141544342, "learning_rate": 1.596372936885702e-05, "loss": 0.0067, "step": 261270 }, { "epoch": 8.076281139889968, "grad_norm": 0.15606482326984406, "learning_rate": 1.5963265747666443e-05, "loss": 0.0066, "step": 261300 }, { "epoch": 8.077208382271126, "grad_norm": 0.13737985491752625, "learning_rate": 1.5962802126475864e-05, "loss": 0.0058, "step": 261330 }, { "epoch": 8.078135624652283, "grad_norm": 0.1140335202217102, "learning_rate": 1.5962338505285282e-05, "loss": 0.0059, "step": 261360 }, { "epoch": 8.079062867033443, "grad_norm": 0.13873623311519623, "learning_rate": 1.5961874884094704e-05, "loss": 0.0063, "step": 261390 }, { "epoch": 8.0799901094146, "grad_norm": 0.099676214158535, "learning_rate": 1.5961411262904125e-05, "loss": 0.0064, "step": 261420 }, { "epoch": 8.08091735179576, "grad_norm": 0.14529037475585938, "learning_rate": 1.5960947641713543e-05, "loss": 0.0063, "step": 261450 }, { "epoch": 8.081844594176918, "grad_norm": 0.1323261857032776, "learning_rate": 1.5960484020522965e-05, "loss": 0.006, "step": 261480 }, { "epoch": 8.082771836558075, "grad_norm": 0.11372833698987961, "learning_rate": 1.5960020399332386e-05, "loss": 0.0073, "step": 261510 }, { "epoch": 8.083699078939235, "grad_norm": 0.1013915091753006, "learning_rate": 1.5959556778141808e-05, "loss": 0.0065, "step": 261540 }, { "epoch": 8.084626321320393, "grad_norm": 0.10845968127250671, "learning_rate": 1.595909315695123e-05, "loss": 0.0067, "step": 261570 }, { "epoch": 8.085553563701552, "grad_norm": 0.10386290401220322, "learning_rate": 1.595862953576065e-05, "loss": 0.0063, "step": 261600 }, { "epoch": 8.08648080608271, "grad_norm": 0.07816269993782043, "learning_rate": 1.595816591457007e-05, "loss": 0.0061, "step": 261630 }, { "epoch": 8.08740804846387, "grad_norm": 0.14963114261627197, "learning_rate": 1.595770229337949e-05, "loss": 0.0062, "step": 261660 }, { "epoch": 8.088335290845027, "grad_norm": 0.11187205463647842, "learning_rate": 1.5957238672188912e-05, "loss": 0.006, "step": 261690 }, { "epoch": 8.089262533226185, "grad_norm": 0.14616894721984863, "learning_rate": 1.5956775050998334e-05, "loss": 0.0067, "step": 261720 }, { "epoch": 8.090189775607344, "grad_norm": 0.0966181680560112, "learning_rate": 1.5956311429807755e-05, "loss": 0.006, "step": 261750 }, { "epoch": 8.091117017988502, "grad_norm": 0.10709524899721146, "learning_rate": 1.5955847808617173e-05, "loss": 0.0065, "step": 261780 }, { "epoch": 8.092044260369661, "grad_norm": 0.11242125928401947, "learning_rate": 1.5955384187426595e-05, "loss": 0.0064, "step": 261810 }, { "epoch": 8.09297150275082, "grad_norm": 0.1015285924077034, "learning_rate": 1.5954920566236016e-05, "loss": 0.0061, "step": 261840 }, { "epoch": 8.093898745131977, "grad_norm": 0.1594746857881546, "learning_rate": 1.5954456945045434e-05, "loss": 0.0064, "step": 261870 }, { "epoch": 8.094825987513136, "grad_norm": 0.13375921547412872, "learning_rate": 1.5953993323854856e-05, "loss": 0.0065, "step": 261900 }, { "epoch": 8.095753229894294, "grad_norm": 0.12390852719545364, "learning_rate": 1.5953529702664277e-05, "loss": 0.007, "step": 261930 }, { "epoch": 8.096680472275454, "grad_norm": 0.12692782282829285, "learning_rate": 1.59530660814737e-05, "loss": 0.0063, "step": 261960 }, { "epoch": 8.097607714656611, "grad_norm": 0.11727110296487808, "learning_rate": 1.595260246028312e-05, "loss": 0.0063, "step": 261990 }, { "epoch": 8.098534957037769, "grad_norm": 0.10063530504703522, "learning_rate": 1.595213883909254e-05, "loss": 0.0063, "step": 262020 }, { "epoch": 8.099462199418928, "grad_norm": 0.1545792818069458, "learning_rate": 1.595167521790196e-05, "loss": 0.0062, "step": 262050 }, { "epoch": 8.100389441800086, "grad_norm": 0.09667489677667618, "learning_rate": 1.595121159671138e-05, "loss": 0.006, "step": 262080 }, { "epoch": 8.101316684181246, "grad_norm": 0.13984256982803345, "learning_rate": 1.5950747975520803e-05, "loss": 0.0066, "step": 262110 }, { "epoch": 8.102243926562403, "grad_norm": 0.10033301264047623, "learning_rate": 1.5950284354330224e-05, "loss": 0.0064, "step": 262140 }, { "epoch": 8.103171168943561, "grad_norm": 0.09819619357585907, "learning_rate": 1.5949820733139646e-05, "loss": 0.0058, "step": 262170 }, { "epoch": 8.10409841132472, "grad_norm": 0.1422540545463562, "learning_rate": 1.5949357111949064e-05, "loss": 0.0063, "step": 262200 }, { "epoch": 8.105025653705878, "grad_norm": 0.09635604172945023, "learning_rate": 1.5948893490758486e-05, "loss": 0.0059, "step": 262230 }, { "epoch": 8.105952896087038, "grad_norm": 0.16456377506256104, "learning_rate": 1.5948429869567907e-05, "loss": 0.0067, "step": 262260 }, { "epoch": 8.106880138468195, "grad_norm": 0.1014232262969017, "learning_rate": 1.5947966248377325e-05, "loss": 0.0072, "step": 262290 }, { "epoch": 8.107807380849353, "grad_norm": 0.08905111998319626, "learning_rate": 1.5947502627186747e-05, "loss": 0.0066, "step": 262320 }, { "epoch": 8.108734623230513, "grad_norm": 0.12830154597759247, "learning_rate": 1.5947039005996168e-05, "loss": 0.0067, "step": 262350 }, { "epoch": 8.10966186561167, "grad_norm": 0.11521276831626892, "learning_rate": 1.594657538480559e-05, "loss": 0.0063, "step": 262380 }, { "epoch": 8.11058910799283, "grad_norm": 0.08443678915500641, "learning_rate": 1.594611176361501e-05, "loss": 0.0061, "step": 262410 }, { "epoch": 8.111516350373988, "grad_norm": 0.10336200892925262, "learning_rate": 1.594564814242443e-05, "loss": 0.0065, "step": 262440 }, { "epoch": 8.112443592755147, "grad_norm": 0.09837941825389862, "learning_rate": 1.594518452123385e-05, "loss": 0.0068, "step": 262470 }, { "epoch": 8.113370835136305, "grad_norm": 0.0878223106265068, "learning_rate": 1.5944720900043272e-05, "loss": 0.0056, "step": 262500 }, { "epoch": 8.114298077517462, "grad_norm": 0.08537878096103668, "learning_rate": 1.5944257278852694e-05, "loss": 0.0064, "step": 262530 }, { "epoch": 8.115225319898622, "grad_norm": 0.12330733984708786, "learning_rate": 1.5943793657662115e-05, "loss": 0.0065, "step": 262560 }, { "epoch": 8.11615256227978, "grad_norm": 0.17764148116111755, "learning_rate": 1.5943330036471537e-05, "loss": 0.0062, "step": 262590 }, { "epoch": 8.11707980466094, "grad_norm": 0.078191377222538, "learning_rate": 1.5942866415280955e-05, "loss": 0.0058, "step": 262620 }, { "epoch": 8.118007047042097, "grad_norm": 0.14713604748249054, "learning_rate": 1.5942402794090377e-05, "loss": 0.0067, "step": 262650 }, { "epoch": 8.118934289423255, "grad_norm": 0.14833968877792358, "learning_rate": 1.5941939172899795e-05, "loss": 0.0062, "step": 262680 }, { "epoch": 8.119861531804414, "grad_norm": 0.2185339331626892, "learning_rate": 1.594147555170922e-05, "loss": 0.0065, "step": 262710 }, { "epoch": 8.120788774185572, "grad_norm": 0.10620512813329697, "learning_rate": 1.5941027384558324e-05, "loss": 0.0066, "step": 262740 }, { "epoch": 8.121716016566731, "grad_norm": 0.08658047020435333, "learning_rate": 1.5940563763367746e-05, "loss": 0.007, "step": 262770 }, { "epoch": 8.122643258947889, "grad_norm": 0.06465417891740799, "learning_rate": 1.5940100142177167e-05, "loss": 0.0069, "step": 262800 }, { "epoch": 8.123570501329047, "grad_norm": 0.061356961727142334, "learning_rate": 1.593963652098659e-05, "loss": 0.0057, "step": 262830 }, { "epoch": 8.124497743710206, "grad_norm": 0.13238824903964996, "learning_rate": 1.5939172899796007e-05, "loss": 0.0066, "step": 262860 }, { "epoch": 8.125424986091364, "grad_norm": 0.05443170294165611, "learning_rate": 1.5938709278605428e-05, "loss": 0.0063, "step": 262890 }, { "epoch": 8.126352228472523, "grad_norm": 0.10283228754997253, "learning_rate": 1.593824565741485e-05, "loss": 0.0067, "step": 262920 }, { "epoch": 8.127279470853681, "grad_norm": 0.10219492018222809, "learning_rate": 1.593778203622427e-05, "loss": 0.0067, "step": 262950 }, { "epoch": 8.128206713234839, "grad_norm": 0.09602803736925125, "learning_rate": 1.5937318415033693e-05, "loss": 0.006, "step": 262980 }, { "epoch": 8.129133955615998, "grad_norm": 0.08907563984394073, "learning_rate": 1.5936854793843114e-05, "loss": 0.0062, "step": 263010 }, { "epoch": 8.130061197997156, "grad_norm": 0.12758201360702515, "learning_rate": 1.5936391172652532e-05, "loss": 0.006, "step": 263040 }, { "epoch": 8.130988440378315, "grad_norm": 0.09463168680667877, "learning_rate": 1.5935927551461954e-05, "loss": 0.0059, "step": 263070 }, { "epoch": 8.131915682759473, "grad_norm": 0.10100224614143372, "learning_rate": 1.5935463930271372e-05, "loss": 0.0059, "step": 263100 }, { "epoch": 8.132842925140631, "grad_norm": 0.08103854209184647, "learning_rate": 1.5935000309080793e-05, "loss": 0.006, "step": 263130 }, { "epoch": 8.13377016752179, "grad_norm": 0.10171747952699661, "learning_rate": 1.5934536687890215e-05, "loss": 0.0073, "step": 263160 }, { "epoch": 8.134697409902948, "grad_norm": 0.0809464082121849, "learning_rate": 1.5934073066699636e-05, "loss": 0.0068, "step": 263190 }, { "epoch": 8.135624652284108, "grad_norm": 0.11677130311727524, "learning_rate": 1.5933609445509058e-05, "loss": 0.0072, "step": 263220 }, { "epoch": 8.136551894665265, "grad_norm": 0.1209045946598053, "learning_rate": 1.593314582431848e-05, "loss": 0.007, "step": 263250 }, { "epoch": 8.137479137046425, "grad_norm": 0.11695186048746109, "learning_rate": 1.5932682203127898e-05, "loss": 0.006, "step": 263280 }, { "epoch": 8.138406379427582, "grad_norm": 0.13179823756217957, "learning_rate": 1.593221858193732e-05, "loss": 0.0064, "step": 263310 }, { "epoch": 8.13933362180874, "grad_norm": 0.10625184327363968, "learning_rate": 1.593175496074674e-05, "loss": 0.0069, "step": 263340 }, { "epoch": 8.1402608641899, "grad_norm": 0.10720623284578323, "learning_rate": 1.5931291339556162e-05, "loss": 0.0064, "step": 263370 }, { "epoch": 8.141188106571057, "grad_norm": 0.1283322274684906, "learning_rate": 1.5930827718365584e-05, "loss": 0.0064, "step": 263400 }, { "epoch": 8.142115348952217, "grad_norm": 0.08916537463665009, "learning_rate": 1.5930364097175002e-05, "loss": 0.0059, "step": 263430 }, { "epoch": 8.143042591333375, "grad_norm": 0.1306314617395401, "learning_rate": 1.5929900475984423e-05, "loss": 0.0069, "step": 263460 }, { "epoch": 8.143969833714532, "grad_norm": 0.12681180238723755, "learning_rate": 1.5929436854793845e-05, "loss": 0.0059, "step": 263490 }, { "epoch": 8.144897076095692, "grad_norm": 0.13816149532794952, "learning_rate": 1.5928973233603263e-05, "loss": 0.0059, "step": 263520 }, { "epoch": 8.14582431847685, "grad_norm": 0.11398906260728836, "learning_rate": 1.5928509612412684e-05, "loss": 0.0069, "step": 263550 }, { "epoch": 8.146751560858009, "grad_norm": 0.10247930884361267, "learning_rate": 1.5928045991222106e-05, "loss": 0.0064, "step": 263580 }, { "epoch": 8.147678803239167, "grad_norm": 0.17003990709781647, "learning_rate": 1.5927582370031527e-05, "loss": 0.0069, "step": 263610 }, { "epoch": 8.148606045620324, "grad_norm": 0.12164702266454697, "learning_rate": 1.592711874884095e-05, "loss": 0.0064, "step": 263640 }, { "epoch": 8.149533288001484, "grad_norm": 0.16255763173103333, "learning_rate": 1.592665512765037e-05, "loss": 0.0065, "step": 263670 }, { "epoch": 8.150460530382642, "grad_norm": 0.12668587267398834, "learning_rate": 1.592619150645979e-05, "loss": 0.0058, "step": 263700 }, { "epoch": 8.151387772763801, "grad_norm": 0.12515906989574432, "learning_rate": 1.592572788526921e-05, "loss": 0.0065, "step": 263730 }, { "epoch": 8.152315015144959, "grad_norm": 0.13637444376945496, "learning_rate": 1.592526426407863e-05, "loss": 0.0058, "step": 263760 }, { "epoch": 8.153242257526117, "grad_norm": 0.12066524475812912, "learning_rate": 1.5924800642888053e-05, "loss": 0.0067, "step": 263790 }, { "epoch": 8.154169499907276, "grad_norm": 0.07986338436603546, "learning_rate": 1.5924337021697475e-05, "loss": 0.0065, "step": 263820 }, { "epoch": 8.155096742288434, "grad_norm": 0.10632944107055664, "learning_rate": 1.5923873400506893e-05, "loss": 0.0063, "step": 263850 }, { "epoch": 8.156023984669593, "grad_norm": 0.18214555084705353, "learning_rate": 1.5923409779316314e-05, "loss": 0.0063, "step": 263880 }, { "epoch": 8.156951227050751, "grad_norm": 0.11485018581151962, "learning_rate": 1.5922946158125736e-05, "loss": 0.0072, "step": 263910 }, { "epoch": 8.157878469431909, "grad_norm": 0.1148841381072998, "learning_rate": 1.5922482536935154e-05, "loss": 0.006, "step": 263940 }, { "epoch": 8.158805711813068, "grad_norm": 0.1122511699795723, "learning_rate": 1.592201891574458e-05, "loss": 0.0067, "step": 263970 }, { "epoch": 8.159732954194226, "grad_norm": 0.11680427193641663, "learning_rate": 1.5921555294554e-05, "loss": 0.0064, "step": 264000 }, { "epoch": 8.160660196575385, "grad_norm": 0.1283273845911026, "learning_rate": 1.592109167336342e-05, "loss": 0.0066, "step": 264030 }, { "epoch": 8.161587438956543, "grad_norm": 0.14231304824352264, "learning_rate": 1.592062805217284e-05, "loss": 0.0064, "step": 264060 }, { "epoch": 8.162514681337703, "grad_norm": 0.16641847789287567, "learning_rate": 1.592016443098226e-05, "loss": 0.0069, "step": 264090 }, { "epoch": 8.16344192371886, "grad_norm": 0.1423911452293396, "learning_rate": 1.591970080979168e-05, "loss": 0.0066, "step": 264120 }, { "epoch": 8.164369166100018, "grad_norm": 0.09852650761604309, "learning_rate": 1.59192371886011e-05, "loss": 0.0063, "step": 264150 }, { "epoch": 8.165296408481177, "grad_norm": 0.12106600403785706, "learning_rate": 1.5918773567410522e-05, "loss": 0.007, "step": 264180 }, { "epoch": 8.166223650862335, "grad_norm": 0.1499922126531601, "learning_rate": 1.5918309946219944e-05, "loss": 0.0063, "step": 264210 }, { "epoch": 8.167150893243495, "grad_norm": 0.16608113050460815, "learning_rate": 1.5917846325029365e-05, "loss": 0.0063, "step": 264240 }, { "epoch": 8.168078135624652, "grad_norm": 0.0988350361585617, "learning_rate": 1.5917382703838784e-05, "loss": 0.0064, "step": 264270 }, { "epoch": 8.16900537800581, "grad_norm": 0.11572512984275818, "learning_rate": 1.5916919082648205e-05, "loss": 0.0064, "step": 264300 }, { "epoch": 8.16993262038697, "grad_norm": 0.10289166122674942, "learning_rate": 1.5916455461457627e-05, "loss": 0.0064, "step": 264330 }, { "epoch": 8.170859862768127, "grad_norm": 0.17427968978881836, "learning_rate": 1.5915991840267048e-05, "loss": 0.006, "step": 264360 }, { "epoch": 8.171787105149287, "grad_norm": 0.15850239992141724, "learning_rate": 1.591552821907647e-05, "loss": 0.0059, "step": 264390 }, { "epoch": 8.172714347530444, "grad_norm": 0.14233745634555817, "learning_rate": 1.591506459788589e-05, "loss": 0.0061, "step": 264420 }, { "epoch": 8.173641589911602, "grad_norm": 0.07599412649869919, "learning_rate": 1.591460097669531e-05, "loss": 0.0064, "step": 264450 }, { "epoch": 8.174568832292762, "grad_norm": 0.14421184360980988, "learning_rate": 1.591413735550473e-05, "loss": 0.0059, "step": 264480 }, { "epoch": 8.17549607467392, "grad_norm": 0.14217618107795715, "learning_rate": 1.591367373431415e-05, "loss": 0.006, "step": 264510 }, { "epoch": 8.176423317055079, "grad_norm": 0.06365050375461578, "learning_rate": 1.591321011312357e-05, "loss": 0.0066, "step": 264540 }, { "epoch": 8.177350559436237, "grad_norm": 0.09523067623376846, "learning_rate": 1.5912746491932992e-05, "loss": 0.0063, "step": 264570 }, { "epoch": 8.178277801817394, "grad_norm": 0.1310502588748932, "learning_rate": 1.5912282870742413e-05, "loss": 0.0066, "step": 264600 }, { "epoch": 8.179205044198554, "grad_norm": 0.17022447288036346, "learning_rate": 1.5911819249551835e-05, "loss": 0.0066, "step": 264630 }, { "epoch": 8.180132286579711, "grad_norm": 0.10843299329280853, "learning_rate": 1.5911355628361256e-05, "loss": 0.006, "step": 264660 }, { "epoch": 8.181059528960871, "grad_norm": 0.09150389581918716, "learning_rate": 1.5910892007170675e-05, "loss": 0.0062, "step": 264690 }, { "epoch": 8.181986771342029, "grad_norm": 0.15898573398590088, "learning_rate": 1.5910428385980096e-05, "loss": 0.0062, "step": 264720 }, { "epoch": 8.182914013723186, "grad_norm": 0.08806435018777847, "learning_rate": 1.5909964764789518e-05, "loss": 0.0069, "step": 264750 }, { "epoch": 8.183841256104346, "grad_norm": 0.14244402945041656, "learning_rate": 1.590950114359894e-05, "loss": 0.0073, "step": 264780 }, { "epoch": 8.184768498485504, "grad_norm": 0.12247679382562637, "learning_rate": 1.590903752240836e-05, "loss": 0.0068, "step": 264810 }, { "epoch": 8.185695740866663, "grad_norm": 0.0998179167509079, "learning_rate": 1.590857390121778e-05, "loss": 0.0073, "step": 264840 }, { "epoch": 8.18662298324782, "grad_norm": 0.08683957904577255, "learning_rate": 1.59081102800272e-05, "loss": 0.0062, "step": 264870 }, { "epoch": 8.18755022562898, "grad_norm": 0.09814494848251343, "learning_rate": 1.590764665883662e-05, "loss": 0.0068, "step": 264900 }, { "epoch": 8.188477468010138, "grad_norm": 0.11479628086090088, "learning_rate": 1.590718303764604e-05, "loss": 0.0066, "step": 264930 }, { "epoch": 8.189404710391296, "grad_norm": 0.15315918624401093, "learning_rate": 1.590671941645546e-05, "loss": 0.0063, "step": 264960 }, { "epoch": 8.190331952772455, "grad_norm": 0.14349670708179474, "learning_rate": 1.5906255795264883e-05, "loss": 0.0063, "step": 264990 }, { "epoch": 8.191259195153613, "grad_norm": 0.12616457045078278, "learning_rate": 1.5905792174074304e-05, "loss": 0.0065, "step": 265020 }, { "epoch": 8.192186437534772, "grad_norm": 0.13243502378463745, "learning_rate": 1.5905328552883726e-05, "loss": 0.0058, "step": 265050 }, { "epoch": 8.19311367991593, "grad_norm": 0.14159643650054932, "learning_rate": 1.5904864931693147e-05, "loss": 0.0063, "step": 265080 }, { "epoch": 8.194040922297088, "grad_norm": 0.12314138561487198, "learning_rate": 1.5904401310502565e-05, "loss": 0.0065, "step": 265110 }, { "epoch": 8.194968164678247, "grad_norm": 0.12624838948249817, "learning_rate": 1.5903937689311987e-05, "loss": 0.0064, "step": 265140 }, { "epoch": 8.195895407059405, "grad_norm": 0.10098715126514435, "learning_rate": 1.590347406812141e-05, "loss": 0.0061, "step": 265170 }, { "epoch": 8.196822649440564, "grad_norm": 0.17386603355407715, "learning_rate": 1.590301044693083e-05, "loss": 0.0056, "step": 265200 }, { "epoch": 8.197749891821722, "grad_norm": 0.1761353313922882, "learning_rate": 1.590254682574025e-05, "loss": 0.0064, "step": 265230 }, { "epoch": 8.19867713420288, "grad_norm": 0.11197467148303986, "learning_rate": 1.590208320454967e-05, "loss": 0.0064, "step": 265260 }, { "epoch": 8.19960437658404, "grad_norm": 0.06261316686868668, "learning_rate": 1.590161958335909e-05, "loss": 0.0058, "step": 265290 }, { "epoch": 8.200531618965197, "grad_norm": 0.0865790843963623, "learning_rate": 1.5901155962168513e-05, "loss": 0.0066, "step": 265320 }, { "epoch": 8.201458861346357, "grad_norm": 0.11697058379650116, "learning_rate": 1.590069234097793e-05, "loss": 0.0065, "step": 265350 }, { "epoch": 8.202386103727514, "grad_norm": 0.10293494164943695, "learning_rate": 1.5900228719787356e-05, "loss": 0.0055, "step": 265380 }, { "epoch": 8.203313346108672, "grad_norm": 0.11621993780136108, "learning_rate": 1.5899765098596777e-05, "loss": 0.0063, "step": 265410 }, { "epoch": 8.204240588489832, "grad_norm": 0.12391460686922073, "learning_rate": 1.5899301477406195e-05, "loss": 0.0067, "step": 265440 }, { "epoch": 8.20516783087099, "grad_norm": 0.11839043349027634, "learning_rate": 1.5898837856215617e-05, "loss": 0.0064, "step": 265470 }, { "epoch": 8.206095073252149, "grad_norm": 0.11918739229440689, "learning_rate": 1.5898374235025035e-05, "loss": 0.006, "step": 265500 }, { "epoch": 8.207022315633306, "grad_norm": 0.10555066168308258, "learning_rate": 1.5897910613834456e-05, "loss": 0.0067, "step": 265530 }, { "epoch": 8.207949558014464, "grad_norm": 0.12810026109218597, "learning_rate": 1.5897446992643878e-05, "loss": 0.0055, "step": 265560 }, { "epoch": 8.208876800395624, "grad_norm": 0.09459182620048523, "learning_rate": 1.58969833714533e-05, "loss": 0.0067, "step": 265590 }, { "epoch": 8.209804042776781, "grad_norm": 0.12471777200698853, "learning_rate": 1.589651975026272e-05, "loss": 0.0067, "step": 265620 }, { "epoch": 8.21073128515794, "grad_norm": 0.08613885194063187, "learning_rate": 1.5896056129072142e-05, "loss": 0.0058, "step": 265650 }, { "epoch": 8.211658527539099, "grad_norm": 0.11481504887342453, "learning_rate": 1.589559250788156e-05, "loss": 0.0063, "step": 265680 }, { "epoch": 8.212585769920258, "grad_norm": 0.09781567752361298, "learning_rate": 1.5895128886690982e-05, "loss": 0.006, "step": 265710 }, { "epoch": 8.213513012301416, "grad_norm": 0.130761981010437, "learning_rate": 1.5894665265500403e-05, "loss": 0.0068, "step": 265740 }, { "epoch": 8.214440254682573, "grad_norm": 0.11824283003807068, "learning_rate": 1.5894201644309825e-05, "loss": 0.0063, "step": 265770 }, { "epoch": 8.215367497063733, "grad_norm": 0.09071952849626541, "learning_rate": 1.5893738023119247e-05, "loss": 0.0061, "step": 265800 }, { "epoch": 8.21629473944489, "grad_norm": 0.125279501080513, "learning_rate": 1.5893274401928665e-05, "loss": 0.0062, "step": 265830 }, { "epoch": 8.21722198182605, "grad_norm": 0.11007511615753174, "learning_rate": 1.5892810780738086e-05, "loss": 0.006, "step": 265860 }, { "epoch": 8.218149224207208, "grad_norm": 0.1043807789683342, "learning_rate": 1.5892347159547508e-05, "loss": 0.0063, "step": 265890 }, { "epoch": 8.219076466588366, "grad_norm": 0.13580958545207977, "learning_rate": 1.5891883538356926e-05, "loss": 0.0058, "step": 265920 }, { "epoch": 8.220003708969525, "grad_norm": 0.10304252803325653, "learning_rate": 1.5891419917166347e-05, "loss": 0.0067, "step": 265950 }, { "epoch": 8.220930951350683, "grad_norm": 0.10739025473594666, "learning_rate": 1.589095629597577e-05, "loss": 0.0062, "step": 265980 }, { "epoch": 8.221858193731842, "grad_norm": 0.09967753291130066, "learning_rate": 1.589049267478519e-05, "loss": 0.0065, "step": 266010 }, { "epoch": 8.222785436113, "grad_norm": 0.08523210138082504, "learning_rate": 1.5890029053594612e-05, "loss": 0.0061, "step": 266040 }, { "epoch": 8.223712678494158, "grad_norm": 0.0965539813041687, "learning_rate": 1.5889565432404033e-05, "loss": 0.0063, "step": 266070 }, { "epoch": 8.224639920875317, "grad_norm": 0.08704649657011032, "learning_rate": 1.588910181121345e-05, "loss": 0.0058, "step": 266100 }, { "epoch": 8.225567163256475, "grad_norm": 0.08632051944732666, "learning_rate": 1.5888638190022873e-05, "loss": 0.0059, "step": 266130 }, { "epoch": 8.226494405637634, "grad_norm": 0.11683227121829987, "learning_rate": 1.5888174568832294e-05, "loss": 0.0063, "step": 266160 }, { "epoch": 8.227421648018792, "grad_norm": 0.0870780199766159, "learning_rate": 1.5887710947641716e-05, "loss": 0.0062, "step": 266190 }, { "epoch": 8.22834889039995, "grad_norm": 0.11172301322221756, "learning_rate": 1.5887247326451137e-05, "loss": 0.0062, "step": 266220 }, { "epoch": 8.22927613278111, "grad_norm": 0.0809800922870636, "learning_rate": 1.5886783705260556e-05, "loss": 0.007, "step": 266250 }, { "epoch": 8.230203375162267, "grad_norm": 0.0948539525270462, "learning_rate": 1.5886320084069977e-05, "loss": 0.0064, "step": 266280 }, { "epoch": 8.231130617543426, "grad_norm": 0.16432371735572815, "learning_rate": 1.58858564628794e-05, "loss": 0.0061, "step": 266310 }, { "epoch": 8.232057859924584, "grad_norm": 0.12191648036241531, "learning_rate": 1.5885392841688817e-05, "loss": 0.0065, "step": 266340 }, { "epoch": 8.232985102305742, "grad_norm": 0.082489512860775, "learning_rate": 1.5884929220498238e-05, "loss": 0.0063, "step": 266370 }, { "epoch": 8.233912344686901, "grad_norm": 0.14919926226139069, "learning_rate": 1.5884481053347346e-05, "loss": 0.0064, "step": 266400 }, { "epoch": 8.234839587068059, "grad_norm": 0.08785109966993332, "learning_rate": 1.5884017432156768e-05, "loss": 0.0066, "step": 266430 }, { "epoch": 8.235766829449219, "grad_norm": 0.13547812402248383, "learning_rate": 1.588355381096619e-05, "loss": 0.0063, "step": 266460 }, { "epoch": 8.236694071830376, "grad_norm": 0.07414530962705612, "learning_rate": 1.588309018977561e-05, "loss": 0.0064, "step": 266490 }, { "epoch": 8.237621314211536, "grad_norm": 0.13154955208301544, "learning_rate": 1.588262656858503e-05, "loss": 0.0065, "step": 266520 }, { "epoch": 8.238548556592693, "grad_norm": 0.15156687796115875, "learning_rate": 1.588216294739445e-05, "loss": 0.006, "step": 266550 }, { "epoch": 8.239475798973851, "grad_norm": 0.05646660551428795, "learning_rate": 1.588169932620387e-05, "loss": 0.0064, "step": 266580 }, { "epoch": 8.24040304135501, "grad_norm": 0.11572501063346863, "learning_rate": 1.588123570501329e-05, "loss": 0.0063, "step": 266610 }, { "epoch": 8.241330283736168, "grad_norm": 0.12437744438648224, "learning_rate": 1.5880772083822715e-05, "loss": 0.0065, "step": 266640 }, { "epoch": 8.242257526117328, "grad_norm": 0.10224416106939316, "learning_rate": 1.5880308462632133e-05, "loss": 0.006, "step": 266670 }, { "epoch": 8.243184768498486, "grad_norm": 0.09926687180995941, "learning_rate": 1.5879844841441554e-05, "loss": 0.0061, "step": 266700 }, { "epoch": 8.244112010879643, "grad_norm": 0.12464900314807892, "learning_rate": 1.5879381220250976e-05, "loss": 0.0062, "step": 266730 }, { "epoch": 8.245039253260803, "grad_norm": 0.13825617730617523, "learning_rate": 1.5878917599060394e-05, "loss": 0.0063, "step": 266760 }, { "epoch": 8.24596649564196, "grad_norm": 0.12224047631025314, "learning_rate": 1.5878453977869816e-05, "loss": 0.0064, "step": 266790 }, { "epoch": 8.24689373802312, "grad_norm": 0.11830412596464157, "learning_rate": 1.5877990356679237e-05, "loss": 0.0061, "step": 266820 }, { "epoch": 8.247820980404278, "grad_norm": 0.09230925887823105, "learning_rate": 1.587752673548866e-05, "loss": 0.0069, "step": 266850 }, { "epoch": 8.248748222785435, "grad_norm": 0.14886474609375, "learning_rate": 1.587706311429808e-05, "loss": 0.0064, "step": 266880 }, { "epoch": 8.249675465166595, "grad_norm": 0.11011743545532227, "learning_rate": 1.58765994931075e-05, "loss": 0.006, "step": 266910 }, { "epoch": 8.250602707547753, "grad_norm": 0.14347846806049347, "learning_rate": 1.587613587191692e-05, "loss": 0.0066, "step": 266940 }, { "epoch": 8.251529949928912, "grad_norm": 0.1136382520198822, "learning_rate": 1.587567225072634e-05, "loss": 0.0067, "step": 266970 }, { "epoch": 8.25245719231007, "grad_norm": 0.11576756834983826, "learning_rate": 1.5875208629535763e-05, "loss": 0.0063, "step": 267000 }, { "epoch": 8.253384434691228, "grad_norm": 0.11132458597421646, "learning_rate": 1.5874745008345184e-05, "loss": 0.0058, "step": 267030 }, { "epoch": 8.254311677072387, "grad_norm": 0.054799001663923264, "learning_rate": 1.5874281387154606e-05, "loss": 0.0063, "step": 267060 }, { "epoch": 8.255238919453545, "grad_norm": 0.0737808346748352, "learning_rate": 1.5873817765964024e-05, "loss": 0.006, "step": 267090 }, { "epoch": 8.256166161834704, "grad_norm": 0.1467932164669037, "learning_rate": 1.5873354144773445e-05, "loss": 0.0058, "step": 267120 }, { "epoch": 8.257093404215862, "grad_norm": 0.16766145825386047, "learning_rate": 1.5872890523582867e-05, "loss": 0.0064, "step": 267150 }, { "epoch": 8.25802064659702, "grad_norm": 0.12879227101802826, "learning_rate": 1.5872426902392285e-05, "loss": 0.0062, "step": 267180 }, { "epoch": 8.258947888978179, "grad_norm": 0.1146525889635086, "learning_rate": 1.5871963281201706e-05, "loss": 0.0063, "step": 267210 }, { "epoch": 8.259875131359337, "grad_norm": 0.13277076184749603, "learning_rate": 1.5871499660011128e-05, "loss": 0.0063, "step": 267240 }, { "epoch": 8.260802373740496, "grad_norm": 0.13148382306098938, "learning_rate": 1.587103603882055e-05, "loss": 0.0068, "step": 267270 }, { "epoch": 8.261729616121654, "grad_norm": 0.15450604259967804, "learning_rate": 1.587057241762997e-05, "loss": 0.0054, "step": 267300 }, { "epoch": 8.262656858502812, "grad_norm": 0.1496032327413559, "learning_rate": 1.587010879643939e-05, "loss": 0.007, "step": 267330 }, { "epoch": 8.263584100883971, "grad_norm": 0.0880904570221901, "learning_rate": 1.586964517524881e-05, "loss": 0.006, "step": 267360 }, { "epoch": 8.264511343265129, "grad_norm": 0.1023809015750885, "learning_rate": 1.5869181554058232e-05, "loss": 0.0065, "step": 267390 }, { "epoch": 8.265438585646288, "grad_norm": 0.12388278543949127, "learning_rate": 1.5868717932867654e-05, "loss": 0.0061, "step": 267420 }, { "epoch": 8.266365828027446, "grad_norm": 0.08344301581382751, "learning_rate": 1.5868254311677075e-05, "loss": 0.0063, "step": 267450 }, { "epoch": 8.267293070408606, "grad_norm": 0.09196151047945023, "learning_rate": 1.5867790690486497e-05, "loss": 0.0061, "step": 267480 }, { "epoch": 8.268220312789763, "grad_norm": 0.11675183475017548, "learning_rate": 1.5867327069295915e-05, "loss": 0.0063, "step": 267510 }, { "epoch": 8.269147555170921, "grad_norm": 0.1394253522157669, "learning_rate": 1.5866863448105336e-05, "loss": 0.0061, "step": 267540 }, { "epoch": 8.27007479755208, "grad_norm": 0.13196377456188202, "learning_rate": 1.5866399826914758e-05, "loss": 0.0062, "step": 267570 }, { "epoch": 8.271002039933238, "grad_norm": 0.11488600820302963, "learning_rate": 1.5865936205724176e-05, "loss": 0.0072, "step": 267600 }, { "epoch": 8.271929282314398, "grad_norm": 0.11961962282657623, "learning_rate": 1.5865472584533597e-05, "loss": 0.0063, "step": 267630 }, { "epoch": 8.272856524695555, "grad_norm": 0.09105018526315689, "learning_rate": 1.5865024417382705e-05, "loss": 0.0066, "step": 267660 }, { "epoch": 8.273783767076713, "grad_norm": 0.11688140779733658, "learning_rate": 1.5864560796192127e-05, "loss": 0.006, "step": 267690 }, { "epoch": 8.274711009457873, "grad_norm": 0.11345534026622772, "learning_rate": 1.5864097175001548e-05, "loss": 0.0061, "step": 267720 }, { "epoch": 8.27563825183903, "grad_norm": 0.3249344229698181, "learning_rate": 1.5863633553810966e-05, "loss": 0.0067, "step": 267750 }, { "epoch": 8.27656549422019, "grad_norm": 0.1072339415550232, "learning_rate": 1.5863169932620388e-05, "loss": 0.0061, "step": 267780 }, { "epoch": 8.277492736601348, "grad_norm": 0.16328836977481842, "learning_rate": 1.586270631142981e-05, "loss": 0.0056, "step": 267810 }, { "epoch": 8.278419978982505, "grad_norm": 0.09090255200862885, "learning_rate": 1.5862242690239228e-05, "loss": 0.0067, "step": 267840 }, { "epoch": 8.279347221363665, "grad_norm": 0.07903538644313812, "learning_rate": 1.586177906904865e-05, "loss": 0.0067, "step": 267870 }, { "epoch": 8.280274463744822, "grad_norm": 0.18468281626701355, "learning_rate": 1.586131544785807e-05, "loss": 0.0066, "step": 267900 }, { "epoch": 8.281201706125982, "grad_norm": 0.18207363784313202, "learning_rate": 1.5860851826667492e-05, "loss": 0.0062, "step": 267930 }, { "epoch": 8.28212894850714, "grad_norm": 0.13568565249443054, "learning_rate": 1.5860388205476914e-05, "loss": 0.0067, "step": 267960 }, { "epoch": 8.283056190888297, "grad_norm": 0.08487759530544281, "learning_rate": 1.5859924584286335e-05, "loss": 0.0055, "step": 267990 }, { "epoch": 8.283983433269457, "grad_norm": 0.11275547742843628, "learning_rate": 1.5859460963095753e-05, "loss": 0.0065, "step": 268020 }, { "epoch": 8.284910675650615, "grad_norm": 0.12941938638687134, "learning_rate": 1.5858997341905175e-05, "loss": 0.0062, "step": 268050 }, { "epoch": 8.285837918031774, "grad_norm": 0.12079945206642151, "learning_rate": 1.5858533720714596e-05, "loss": 0.0063, "step": 268080 }, { "epoch": 8.286765160412932, "grad_norm": 0.1037740558385849, "learning_rate": 1.5858070099524018e-05, "loss": 0.0064, "step": 268110 }, { "epoch": 8.28769240279409, "grad_norm": 0.11507614701986313, "learning_rate": 1.585760647833344e-05, "loss": 0.006, "step": 268140 }, { "epoch": 8.288619645175249, "grad_norm": 0.0880294144153595, "learning_rate": 1.5857142857142857e-05, "loss": 0.0065, "step": 268170 }, { "epoch": 8.289546887556407, "grad_norm": 0.12921422719955444, "learning_rate": 1.585667923595228e-05, "loss": 0.0064, "step": 268200 }, { "epoch": 8.290474129937566, "grad_norm": 0.09939799457788467, "learning_rate": 1.58562156147617e-05, "loss": 0.0066, "step": 268230 }, { "epoch": 8.291401372318724, "grad_norm": 0.16818977892398834, "learning_rate": 1.5855751993571122e-05, "loss": 0.0065, "step": 268260 }, { "epoch": 8.292328614699883, "grad_norm": 0.11079001426696777, "learning_rate": 1.5855288372380543e-05, "loss": 0.0059, "step": 268290 }, { "epoch": 8.293255857081041, "grad_norm": 0.13892509043216705, "learning_rate": 1.5854824751189965e-05, "loss": 0.0061, "step": 268320 }, { "epoch": 8.294183099462199, "grad_norm": 0.15741002559661865, "learning_rate": 1.5854361129999383e-05, "loss": 0.0069, "step": 268350 }, { "epoch": 8.295110341843358, "grad_norm": 0.10168596357107162, "learning_rate": 1.5853897508808804e-05, "loss": 0.0063, "step": 268380 }, { "epoch": 8.296037584224516, "grad_norm": 0.10321073979139328, "learning_rate": 1.5853433887618223e-05, "loss": 0.0063, "step": 268410 }, { "epoch": 8.296964826605675, "grad_norm": 0.10567811876535416, "learning_rate": 1.5852970266427644e-05, "loss": 0.0065, "step": 268440 }, { "epoch": 8.297892068986833, "grad_norm": 0.11364858597517014, "learning_rate": 1.5852506645237066e-05, "loss": 0.0061, "step": 268470 }, { "epoch": 8.298819311367991, "grad_norm": 0.11283225566148758, "learning_rate": 1.5852043024046487e-05, "loss": 0.0058, "step": 268500 }, { "epoch": 8.29974655374915, "grad_norm": 0.0887301117181778, "learning_rate": 1.585157940285591e-05, "loss": 0.0064, "step": 268530 }, { "epoch": 8.300673796130308, "grad_norm": 0.13797613978385925, "learning_rate": 1.585111578166533e-05, "loss": 0.0069, "step": 268560 }, { "epoch": 8.301601038511468, "grad_norm": 0.08420757949352264, "learning_rate": 1.5850652160474748e-05, "loss": 0.0067, "step": 268590 }, { "epoch": 8.302528280892625, "grad_norm": 0.12038520723581314, "learning_rate": 1.585018853928417e-05, "loss": 0.0064, "step": 268620 }, { "epoch": 8.303455523273783, "grad_norm": 0.1123599112033844, "learning_rate": 1.584972491809359e-05, "loss": 0.0059, "step": 268650 }, { "epoch": 8.304382765654942, "grad_norm": 0.13139675557613373, "learning_rate": 1.5849261296903013e-05, "loss": 0.0064, "step": 268680 }, { "epoch": 8.3053100080361, "grad_norm": 0.10484274476766586, "learning_rate": 1.5848797675712434e-05, "loss": 0.006, "step": 268710 }, { "epoch": 8.30623725041726, "grad_norm": 0.14529329538345337, "learning_rate": 1.5848334054521852e-05, "loss": 0.0067, "step": 268740 }, { "epoch": 8.307164492798417, "grad_norm": 0.11664418876171112, "learning_rate": 1.5847870433331274e-05, "loss": 0.0067, "step": 268770 }, { "epoch": 8.308091735179575, "grad_norm": 0.10204381495714188, "learning_rate": 1.5847406812140695e-05, "loss": 0.006, "step": 268800 }, { "epoch": 8.309018977560735, "grad_norm": 0.09657824784517288, "learning_rate": 1.5846943190950113e-05, "loss": 0.0064, "step": 268830 }, { "epoch": 8.309946219941892, "grad_norm": 0.1446083039045334, "learning_rate": 1.5846479569759535e-05, "loss": 0.0065, "step": 268860 }, { "epoch": 8.310873462323052, "grad_norm": 0.10476686805486679, "learning_rate": 1.5846015948568957e-05, "loss": 0.0064, "step": 268890 }, { "epoch": 8.31180070470421, "grad_norm": 0.07973150163888931, "learning_rate": 1.5845552327378378e-05, "loss": 0.0067, "step": 268920 }, { "epoch": 8.312727947085367, "grad_norm": 0.10724207758903503, "learning_rate": 1.58450887061878e-05, "loss": 0.007, "step": 268950 }, { "epoch": 8.313655189466527, "grad_norm": 0.09494487941265106, "learning_rate": 1.584462508499722e-05, "loss": 0.0069, "step": 268980 }, { "epoch": 8.314582431847684, "grad_norm": 0.0934271365404129, "learning_rate": 1.584416146380664e-05, "loss": 0.0064, "step": 269010 }, { "epoch": 8.315509674228844, "grad_norm": 0.10957089066505432, "learning_rate": 1.584369784261606e-05, "loss": 0.0062, "step": 269040 }, { "epoch": 8.316436916610002, "grad_norm": 0.13315634429454803, "learning_rate": 1.5843234221425482e-05, "loss": 0.0075, "step": 269070 }, { "epoch": 8.317364158991161, "grad_norm": 0.17014990746974945, "learning_rate": 1.5842770600234904e-05, "loss": 0.0065, "step": 269100 }, { "epoch": 8.318291401372319, "grad_norm": 0.1046110987663269, "learning_rate": 1.5842306979044325e-05, "loss": 0.0063, "step": 269130 }, { "epoch": 8.319218643753477, "grad_norm": 0.09067387133836746, "learning_rate": 1.5841843357853743e-05, "loss": 0.006, "step": 269160 }, { "epoch": 8.320145886134636, "grad_norm": 0.12409986555576324, "learning_rate": 1.5841379736663165e-05, "loss": 0.0068, "step": 269190 }, { "epoch": 8.321073128515794, "grad_norm": 0.1647755652666092, "learning_rate": 1.5840916115472586e-05, "loss": 0.006, "step": 269220 }, { "epoch": 8.322000370896953, "grad_norm": 0.1359792947769165, "learning_rate": 1.5840452494282004e-05, "loss": 0.0063, "step": 269250 }, { "epoch": 8.322927613278111, "grad_norm": 0.15449972450733185, "learning_rate": 1.5839988873091426e-05, "loss": 0.0063, "step": 269280 }, { "epoch": 8.323854855659269, "grad_norm": 0.10938505083322525, "learning_rate": 1.5839525251900847e-05, "loss": 0.0061, "step": 269310 }, { "epoch": 8.324782098040428, "grad_norm": 0.10197658091783524, "learning_rate": 1.583906163071027e-05, "loss": 0.0061, "step": 269340 }, { "epoch": 8.325709340421586, "grad_norm": 0.06840084493160248, "learning_rate": 1.583859800951969e-05, "loss": 0.006, "step": 269370 }, { "epoch": 8.326636582802745, "grad_norm": 0.14324885606765747, "learning_rate": 1.583813438832911e-05, "loss": 0.0061, "step": 269400 }, { "epoch": 8.327563825183903, "grad_norm": 0.12139880657196045, "learning_rate": 1.583767076713853e-05, "loss": 0.0061, "step": 269430 }, { "epoch": 8.32849106756506, "grad_norm": 0.08760574460029602, "learning_rate": 1.583720714594795e-05, "loss": 0.0067, "step": 269460 }, { "epoch": 8.32941830994622, "grad_norm": 0.08115464448928833, "learning_rate": 1.5836743524757373e-05, "loss": 0.0063, "step": 269490 }, { "epoch": 8.330345552327378, "grad_norm": 0.11283452063798904, "learning_rate": 1.5836279903566795e-05, "loss": 0.0072, "step": 269520 }, { "epoch": 8.331272794708537, "grad_norm": 0.11198188364505768, "learning_rate": 1.5835816282376216e-05, "loss": 0.0066, "step": 269550 }, { "epoch": 8.332200037089695, "grad_norm": 0.10929165035486221, "learning_rate": 1.5835352661185634e-05, "loss": 0.0067, "step": 269580 }, { "epoch": 8.333127279470853, "grad_norm": 0.07902511954307556, "learning_rate": 1.5834889039995056e-05, "loss": 0.0061, "step": 269610 }, { "epoch": 8.334054521852012, "grad_norm": 0.10792458802461624, "learning_rate": 1.5834425418804477e-05, "loss": 0.006, "step": 269640 }, { "epoch": 8.33498176423317, "grad_norm": 0.14994792640209198, "learning_rate": 1.58339617976139e-05, "loss": 0.0066, "step": 269670 }, { "epoch": 8.33590900661433, "grad_norm": 0.10528772324323654, "learning_rate": 1.5833513630463003e-05, "loss": 0.006, "step": 269700 }, { "epoch": 8.336836248995487, "grad_norm": 0.1113467738032341, "learning_rate": 1.5833050009272425e-05, "loss": 0.0065, "step": 269730 }, { "epoch": 8.337763491376645, "grad_norm": 0.16661040484905243, "learning_rate": 1.5832586388081846e-05, "loss": 0.0068, "step": 269760 }, { "epoch": 8.338690733757804, "grad_norm": 0.10060319304466248, "learning_rate": 1.5832122766891268e-05, "loss": 0.0059, "step": 269790 }, { "epoch": 8.339617976138962, "grad_norm": 0.08708736300468445, "learning_rate": 1.583165914570069e-05, "loss": 0.0063, "step": 269820 }, { "epoch": 8.340545218520122, "grad_norm": 0.13937893509864807, "learning_rate": 1.5831195524510107e-05, "loss": 0.0059, "step": 269850 }, { "epoch": 8.34147246090128, "grad_norm": 0.16827823221683502, "learning_rate": 1.583073190331953e-05, "loss": 0.0061, "step": 269880 }, { "epoch": 8.342399703282439, "grad_norm": 0.12398678809404373, "learning_rate": 1.583026828212895e-05, "loss": 0.006, "step": 269910 }, { "epoch": 8.343326945663597, "grad_norm": 0.08701960742473602, "learning_rate": 1.5829804660938372e-05, "loss": 0.0063, "step": 269940 }, { "epoch": 8.344254188044754, "grad_norm": 0.12172604352235794, "learning_rate": 1.5829341039747793e-05, "loss": 0.0061, "step": 269970 }, { "epoch": 8.345181430425914, "grad_norm": 0.12721878290176392, "learning_rate": 1.582887741855721e-05, "loss": 0.0058, "step": 270000 }, { "epoch": 8.346108672807071, "grad_norm": 0.10071509331464767, "learning_rate": 1.5828413797366633e-05, "loss": 0.0063, "step": 270030 }, { "epoch": 8.347035915188231, "grad_norm": 0.07649637758731842, "learning_rate": 1.5827950176176055e-05, "loss": 0.0061, "step": 270060 }, { "epoch": 8.347963157569389, "grad_norm": 0.07465271651744843, "learning_rate": 1.5827486554985473e-05, "loss": 0.0061, "step": 270090 }, { "epoch": 8.348890399950546, "grad_norm": 0.12309384346008301, "learning_rate": 1.5827022933794894e-05, "loss": 0.0061, "step": 270120 }, { "epoch": 8.349817642331706, "grad_norm": 0.09398400038480759, "learning_rate": 1.5826559312604316e-05, "loss": 0.0063, "step": 270150 }, { "epoch": 8.350744884712864, "grad_norm": 0.13226892054080963, "learning_rate": 1.5826095691413737e-05, "loss": 0.0059, "step": 270180 }, { "epoch": 8.351672127094023, "grad_norm": 0.14492389559745789, "learning_rate": 1.582563207022316e-05, "loss": 0.0057, "step": 270210 }, { "epoch": 8.35259936947518, "grad_norm": 0.09446612000465393, "learning_rate": 1.5825168449032577e-05, "loss": 0.006, "step": 270240 }, { "epoch": 8.353526611856338, "grad_norm": 0.10123580694198608, "learning_rate": 1.5824704827841998e-05, "loss": 0.006, "step": 270270 }, { "epoch": 8.354453854237498, "grad_norm": 0.09047402441501617, "learning_rate": 1.582424120665142e-05, "loss": 0.0061, "step": 270300 }, { "epoch": 8.355381096618656, "grad_norm": 0.09253446757793427, "learning_rate": 1.582377758546084e-05, "loss": 0.006, "step": 270330 }, { "epoch": 8.356308338999815, "grad_norm": 0.11394275724887848, "learning_rate": 1.5823313964270263e-05, "loss": 0.0062, "step": 270360 }, { "epoch": 8.357235581380973, "grad_norm": 0.09196595847606659, "learning_rate": 1.5822850343079684e-05, "loss": 0.0063, "step": 270390 }, { "epoch": 8.35816282376213, "grad_norm": 0.09365099668502808, "learning_rate": 1.5822386721889102e-05, "loss": 0.0068, "step": 270420 }, { "epoch": 8.35909006614329, "grad_norm": 0.11377998441457748, "learning_rate": 1.5821923100698524e-05, "loss": 0.0069, "step": 270450 }, { "epoch": 8.360017308524448, "grad_norm": 0.07393713295459747, "learning_rate": 1.5821459479507945e-05, "loss": 0.0061, "step": 270480 }, { "epoch": 8.360944550905607, "grad_norm": 0.06825947761535645, "learning_rate": 1.5820995858317364e-05, "loss": 0.0062, "step": 270510 }, { "epoch": 8.361871793286765, "grad_norm": 0.1291811615228653, "learning_rate": 1.5820532237126785e-05, "loss": 0.0066, "step": 270540 }, { "epoch": 8.362799035667923, "grad_norm": 0.13106539845466614, "learning_rate": 1.5820068615936207e-05, "loss": 0.0063, "step": 270570 }, { "epoch": 8.363726278049082, "grad_norm": 0.12349250167608261, "learning_rate": 1.5819604994745628e-05, "loss": 0.0061, "step": 270600 }, { "epoch": 8.36465352043024, "grad_norm": 0.1158856675028801, "learning_rate": 1.581914137355505e-05, "loss": 0.006, "step": 270630 }, { "epoch": 8.3655807628114, "grad_norm": 0.11752476543188095, "learning_rate": 1.5818677752364468e-05, "loss": 0.0063, "step": 270660 }, { "epoch": 8.366508005192557, "grad_norm": 0.10879237949848175, "learning_rate": 1.581821413117389e-05, "loss": 0.0062, "step": 270690 }, { "epoch": 8.367435247573717, "grad_norm": 0.0870940089225769, "learning_rate": 1.581775050998331e-05, "loss": 0.0064, "step": 270720 }, { "epoch": 8.368362489954874, "grad_norm": 0.1328101009130478, "learning_rate": 1.5817286888792732e-05, "loss": 0.006, "step": 270750 }, { "epoch": 8.369289732336032, "grad_norm": 0.13720539212226868, "learning_rate": 1.5816823267602154e-05, "loss": 0.0061, "step": 270780 }, { "epoch": 8.370216974717192, "grad_norm": 0.07124093919992447, "learning_rate": 1.5816359646411575e-05, "loss": 0.0065, "step": 270810 }, { "epoch": 8.37114421709835, "grad_norm": 0.09247702360153198, "learning_rate": 1.5815896025220993e-05, "loss": 0.0061, "step": 270840 }, { "epoch": 8.372071459479509, "grad_norm": 0.1226210668683052, "learning_rate": 1.5815432404030415e-05, "loss": 0.0063, "step": 270870 }, { "epoch": 8.372998701860666, "grad_norm": 0.06321416050195694, "learning_rate": 1.5814968782839833e-05, "loss": 0.0059, "step": 270900 }, { "epoch": 8.373925944241824, "grad_norm": 0.1600029319524765, "learning_rate": 1.5814505161649258e-05, "loss": 0.0056, "step": 270930 }, { "epoch": 8.374853186622984, "grad_norm": 0.14070309698581696, "learning_rate": 1.581404154045868e-05, "loss": 0.0061, "step": 270960 }, { "epoch": 8.375780429004141, "grad_norm": 0.1094907745718956, "learning_rate": 1.5813577919268098e-05, "loss": 0.0061, "step": 270990 }, { "epoch": 8.3767076713853, "grad_norm": 0.16646607220172882, "learning_rate": 1.581311429807752e-05, "loss": 0.007, "step": 271020 }, { "epoch": 8.377634913766459, "grad_norm": 0.12671491503715515, "learning_rate": 1.581265067688694e-05, "loss": 0.007, "step": 271050 }, { "epoch": 8.378562156147616, "grad_norm": 0.09413792192935944, "learning_rate": 1.581218705569636e-05, "loss": 0.0062, "step": 271080 }, { "epoch": 8.379489398528776, "grad_norm": 0.11695613712072372, "learning_rate": 1.581172343450578e-05, "loss": 0.0063, "step": 271110 }, { "epoch": 8.380416640909933, "grad_norm": 0.095102459192276, "learning_rate": 1.58112598133152e-05, "loss": 0.0059, "step": 271140 }, { "epoch": 8.381343883291093, "grad_norm": 0.09650451689958572, "learning_rate": 1.5810796192124623e-05, "loss": 0.0064, "step": 271170 }, { "epoch": 8.38227112567225, "grad_norm": 0.12066720426082611, "learning_rate": 1.5810332570934045e-05, "loss": 0.0064, "step": 271200 }, { "epoch": 8.383198368053408, "grad_norm": 0.07864031195640564, "learning_rate": 1.5809868949743463e-05, "loss": 0.0058, "step": 271230 }, { "epoch": 8.384125610434568, "grad_norm": 0.11479896306991577, "learning_rate": 1.5809405328552884e-05, "loss": 0.0058, "step": 271260 }, { "epoch": 8.385052852815726, "grad_norm": 0.1277703493833542, "learning_rate": 1.5808941707362306e-05, "loss": 0.0063, "step": 271290 }, { "epoch": 8.385980095196885, "grad_norm": 0.13386456668376923, "learning_rate": 1.5808478086171727e-05, "loss": 0.0068, "step": 271320 }, { "epoch": 8.386907337578043, "grad_norm": 0.099324069917202, "learning_rate": 1.580801446498115e-05, "loss": 0.0061, "step": 271350 }, { "epoch": 8.3878345799592, "grad_norm": 0.11438807100057602, "learning_rate": 1.580755084379057e-05, "loss": 0.0061, "step": 271380 }, { "epoch": 8.38876182234036, "grad_norm": 0.1418312042951584, "learning_rate": 1.580708722259999e-05, "loss": 0.0063, "step": 271410 }, { "epoch": 8.389689064721518, "grad_norm": 0.1048969104886055, "learning_rate": 1.580662360140941e-05, "loss": 0.0054, "step": 271440 }, { "epoch": 8.390616307102677, "grad_norm": 0.13324473798274994, "learning_rate": 1.580615998021883e-05, "loss": 0.0069, "step": 271470 }, { "epoch": 8.391543549483835, "grad_norm": 0.07235273718833923, "learning_rate": 1.580569635902825e-05, "loss": 0.0064, "step": 271500 }, { "epoch": 8.392470791864994, "grad_norm": 0.10164497047662735, "learning_rate": 1.580523273783767e-05, "loss": 0.0064, "step": 271530 }, { "epoch": 8.393398034246152, "grad_norm": 0.20303404331207275, "learning_rate": 1.5804769116647093e-05, "loss": 0.0061, "step": 271560 }, { "epoch": 8.39432527662731, "grad_norm": 0.16118888556957245, "learning_rate": 1.5804305495456514e-05, "loss": 0.0064, "step": 271590 }, { "epoch": 8.39525251900847, "grad_norm": 0.11878924816846848, "learning_rate": 1.5803841874265936e-05, "loss": 0.0071, "step": 271620 }, { "epoch": 8.396179761389627, "grad_norm": 0.1120775043964386, "learning_rate": 1.5803378253075354e-05, "loss": 0.0067, "step": 271650 }, { "epoch": 8.397107003770786, "grad_norm": 0.08649899065494537, "learning_rate": 1.5802914631884775e-05, "loss": 0.0065, "step": 271680 }, { "epoch": 8.398034246151944, "grad_norm": 0.15840166807174683, "learning_rate": 1.5802451010694197e-05, "loss": 0.0065, "step": 271710 }, { "epoch": 8.398961488533102, "grad_norm": 0.09686357527971268, "learning_rate": 1.5801987389503618e-05, "loss": 0.0068, "step": 271740 }, { "epoch": 8.399888730914261, "grad_norm": 0.1296715885400772, "learning_rate": 1.580152376831304e-05, "loss": 0.0057, "step": 271770 }, { "epoch": 8.400815973295419, "grad_norm": 0.14088624715805054, "learning_rate": 1.580106014712246e-05, "loss": 0.0064, "step": 271800 }, { "epoch": 8.401743215676579, "grad_norm": 0.11717162281274796, "learning_rate": 1.580059652593188e-05, "loss": 0.0056, "step": 271830 }, { "epoch": 8.402670458057736, "grad_norm": 0.12601839005947113, "learning_rate": 1.58001329047413e-05, "loss": 0.0062, "step": 271860 }, { "epoch": 8.403597700438894, "grad_norm": 0.13908255100250244, "learning_rate": 1.579966928355072e-05, "loss": 0.0061, "step": 271890 }, { "epoch": 8.404524942820053, "grad_norm": 0.11000586301088333, "learning_rate": 1.579920566236014e-05, "loss": 0.0067, "step": 271920 }, { "epoch": 8.405452185201211, "grad_norm": 0.14947426319122314, "learning_rate": 1.5798742041169562e-05, "loss": 0.0062, "step": 271950 }, { "epoch": 8.40637942758237, "grad_norm": 0.16068899631500244, "learning_rate": 1.5798278419978983e-05, "loss": 0.0061, "step": 271980 }, { "epoch": 8.407306669963528, "grad_norm": 0.07792964577674866, "learning_rate": 1.5797814798788405e-05, "loss": 0.0058, "step": 272010 }, { "epoch": 8.408233912344686, "grad_norm": 0.10793939232826233, "learning_rate": 1.5797351177597827e-05, "loss": 0.0058, "step": 272040 }, { "epoch": 8.409161154725846, "grad_norm": 0.1105453297495842, "learning_rate": 1.5796887556407245e-05, "loss": 0.0063, "step": 272070 }, { "epoch": 8.410088397107003, "grad_norm": 0.10653462260961533, "learning_rate": 1.5796423935216666e-05, "loss": 0.0074, "step": 272100 }, { "epoch": 8.411015639488163, "grad_norm": 0.12126868218183517, "learning_rate": 1.5795960314026088e-05, "loss": 0.0062, "step": 272130 }, { "epoch": 8.41194288186932, "grad_norm": 0.07430072128772736, "learning_rate": 1.579549669283551e-05, "loss": 0.0063, "step": 272160 }, { "epoch": 8.412870124250478, "grad_norm": 0.09291719645261765, "learning_rate": 1.579503307164493e-05, "loss": 0.0065, "step": 272190 }, { "epoch": 8.413797366631638, "grad_norm": 0.13304246962070465, "learning_rate": 1.579456945045435e-05, "loss": 0.0067, "step": 272220 }, { "epoch": 8.414724609012795, "grad_norm": 0.13362309336662292, "learning_rate": 1.579410582926377e-05, "loss": 0.0061, "step": 272250 }, { "epoch": 8.415651851393955, "grad_norm": 0.15800543129444122, "learning_rate": 1.5793642208073192e-05, "loss": 0.0063, "step": 272280 }, { "epoch": 8.416579093775113, "grad_norm": 0.15076912939548492, "learning_rate": 1.57931940409223e-05, "loss": 0.006, "step": 272310 }, { "epoch": 8.417506336156272, "grad_norm": 0.1818714141845703, "learning_rate": 1.5792730419731718e-05, "loss": 0.0062, "step": 272340 }, { "epoch": 8.41843357853743, "grad_norm": 0.1458585560321808, "learning_rate": 1.579226679854114e-05, "loss": 0.0069, "step": 272370 }, { "epoch": 8.419360820918588, "grad_norm": 0.10401946306228638, "learning_rate": 1.579180317735056e-05, "loss": 0.0068, "step": 272400 }, { "epoch": 8.420288063299747, "grad_norm": 0.1180887520313263, "learning_rate": 1.5791339556159982e-05, "loss": 0.0066, "step": 272430 }, { "epoch": 8.421215305680905, "grad_norm": 0.11833532154560089, "learning_rate": 1.5790875934969404e-05, "loss": 0.0066, "step": 272460 }, { "epoch": 8.422142548062064, "grad_norm": 0.12831054627895355, "learning_rate": 1.5790412313778822e-05, "loss": 0.0057, "step": 272490 }, { "epoch": 8.423069790443222, "grad_norm": 0.09454689174890518, "learning_rate": 1.5789948692588243e-05, "loss": 0.006, "step": 272520 }, { "epoch": 8.42399703282438, "grad_norm": 0.08986224234104156, "learning_rate": 1.5789485071397665e-05, "loss": 0.0062, "step": 272550 }, { "epoch": 8.424924275205539, "grad_norm": 0.07744020223617554, "learning_rate": 1.5789021450207086e-05, "loss": 0.0062, "step": 272580 }, { "epoch": 8.425851517586697, "grad_norm": 0.11906123161315918, "learning_rate": 1.5788557829016508e-05, "loss": 0.0061, "step": 272610 }, { "epoch": 8.426778759967856, "grad_norm": 0.10931501537561417, "learning_rate": 1.578809420782593e-05, "loss": 0.0069, "step": 272640 }, { "epoch": 8.427706002349014, "grad_norm": 0.06095968931913376, "learning_rate": 1.5787630586635348e-05, "loss": 0.0054, "step": 272670 }, { "epoch": 8.428633244730172, "grad_norm": 0.12222772091627121, "learning_rate": 1.578716696544477e-05, "loss": 0.0056, "step": 272700 }, { "epoch": 8.429560487111331, "grad_norm": 0.11740734428167343, "learning_rate": 1.5786703344254187e-05, "loss": 0.0058, "step": 272730 }, { "epoch": 8.430487729492489, "grad_norm": 0.09794355183839798, "learning_rate": 1.578623972306361e-05, "loss": 0.0066, "step": 272760 }, { "epoch": 8.431414971873648, "grad_norm": 0.1827082484960556, "learning_rate": 1.578577610187303e-05, "loss": 0.0067, "step": 272790 }, { "epoch": 8.432342214254806, "grad_norm": 0.09865743666887283, "learning_rate": 1.5785312480682452e-05, "loss": 0.0068, "step": 272820 }, { "epoch": 8.433269456635964, "grad_norm": 0.0910876989364624, "learning_rate": 1.5784848859491873e-05, "loss": 0.0068, "step": 272850 }, { "epoch": 8.434196699017123, "grad_norm": 0.08542466163635254, "learning_rate": 1.5784385238301295e-05, "loss": 0.0064, "step": 272880 }, { "epoch": 8.435123941398281, "grad_norm": 0.09476971626281738, "learning_rate": 1.5783921617110713e-05, "loss": 0.0063, "step": 272910 }, { "epoch": 8.43605118377944, "grad_norm": 0.10090233385562897, "learning_rate": 1.5783457995920134e-05, "loss": 0.0061, "step": 272940 }, { "epoch": 8.436978426160598, "grad_norm": 0.08768678456544876, "learning_rate": 1.5782994374729556e-05, "loss": 0.0063, "step": 272970 }, { "epoch": 8.437905668541756, "grad_norm": 0.13159383833408356, "learning_rate": 1.5782530753538977e-05, "loss": 0.0065, "step": 273000 }, { "epoch": 8.438832910922915, "grad_norm": 0.14001059532165527, "learning_rate": 1.57820671323484e-05, "loss": 0.0064, "step": 273030 }, { "epoch": 8.439760153304073, "grad_norm": 0.07862523198127747, "learning_rate": 1.5781603511157817e-05, "loss": 0.0062, "step": 273060 }, { "epoch": 8.440687395685233, "grad_norm": 0.18035350739955902, "learning_rate": 1.578113988996724e-05, "loss": 0.0072, "step": 273090 }, { "epoch": 8.44161463806639, "grad_norm": 0.11531537026166916, "learning_rate": 1.578067626877666e-05, "loss": 0.0064, "step": 273120 }, { "epoch": 8.44254188044755, "grad_norm": 0.07686509191989899, "learning_rate": 1.5780212647586078e-05, "loss": 0.0059, "step": 273150 }, { "epoch": 8.443469122828708, "grad_norm": 0.0863264799118042, "learning_rate": 1.57797490263955e-05, "loss": 0.0063, "step": 273180 }, { "epoch": 8.444396365209865, "grad_norm": 0.14718835055828094, "learning_rate": 1.577928540520492e-05, "loss": 0.0059, "step": 273210 }, { "epoch": 8.445323607591025, "grad_norm": 0.07143284380435944, "learning_rate": 1.5778821784014343e-05, "loss": 0.0059, "step": 273240 }, { "epoch": 8.446250849972182, "grad_norm": 0.0841151624917984, "learning_rate": 1.5778358162823764e-05, "loss": 0.0063, "step": 273270 }, { "epoch": 8.447178092353342, "grad_norm": 0.060302846133708954, "learning_rate": 1.5777894541633186e-05, "loss": 0.0062, "step": 273300 }, { "epoch": 8.4481053347345, "grad_norm": 0.08992837369441986, "learning_rate": 1.5777430920442604e-05, "loss": 0.0063, "step": 273330 }, { "epoch": 8.449032577115657, "grad_norm": 0.1179899275302887, "learning_rate": 1.5776967299252025e-05, "loss": 0.0065, "step": 273360 }, { "epoch": 8.449959819496817, "grad_norm": 0.10775960981845856, "learning_rate": 1.5776503678061447e-05, "loss": 0.0057, "step": 273390 }, { "epoch": 8.450887061877975, "grad_norm": 0.0955892950296402, "learning_rate": 1.5776040056870868e-05, "loss": 0.0069, "step": 273420 }, { "epoch": 8.451814304259134, "grad_norm": 0.14185109734535217, "learning_rate": 1.577557643568029e-05, "loss": 0.0065, "step": 273450 }, { "epoch": 8.452741546640292, "grad_norm": 0.09795130789279938, "learning_rate": 1.5775112814489708e-05, "loss": 0.0066, "step": 273480 }, { "epoch": 8.45366878902145, "grad_norm": 0.07940042018890381, "learning_rate": 1.577464919329913e-05, "loss": 0.0069, "step": 273510 }, { "epoch": 8.454596031402609, "grad_norm": 0.11718742549419403, "learning_rate": 1.577418557210855e-05, "loss": 0.0066, "step": 273540 }, { "epoch": 8.455523273783767, "grad_norm": 0.145931214094162, "learning_rate": 1.577372195091797e-05, "loss": 0.0062, "step": 273570 }, { "epoch": 8.456450516164926, "grad_norm": 0.1164483055472374, "learning_rate": 1.577325832972739e-05, "loss": 0.006, "step": 273600 }, { "epoch": 8.457377758546084, "grad_norm": 0.12115244567394257, "learning_rate": 1.5772794708536815e-05, "loss": 0.0063, "step": 273630 }, { "epoch": 8.458305000927242, "grad_norm": 0.11673414707183838, "learning_rate": 1.5772331087346234e-05, "loss": 0.0063, "step": 273660 }, { "epoch": 8.459232243308401, "grad_norm": 0.11886224895715714, "learning_rate": 1.5771867466155655e-05, "loss": 0.0061, "step": 273690 }, { "epoch": 8.460159485689559, "grad_norm": 0.0949607565999031, "learning_rate": 1.5771403844965073e-05, "loss": 0.0066, "step": 273720 }, { "epoch": 8.461086728070718, "grad_norm": 0.27643492817878723, "learning_rate": 1.5770940223774495e-05, "loss": 0.0065, "step": 273750 }, { "epoch": 8.462013970451876, "grad_norm": 0.08866966515779495, "learning_rate": 1.5770476602583916e-05, "loss": 0.0071, "step": 273780 }, { "epoch": 8.462941212833034, "grad_norm": 0.1474103033542633, "learning_rate": 1.5770012981393338e-05, "loss": 0.0057, "step": 273810 }, { "epoch": 8.463868455214193, "grad_norm": 0.1361958235502243, "learning_rate": 1.576954936020276e-05, "loss": 0.0056, "step": 273840 }, { "epoch": 8.46479569759535, "grad_norm": 0.10216246545314789, "learning_rate": 1.576908573901218e-05, "loss": 0.0061, "step": 273870 }, { "epoch": 8.46572293997651, "grad_norm": 0.12506705522537231, "learning_rate": 1.57686221178216e-05, "loss": 0.0066, "step": 273900 }, { "epoch": 8.466650182357668, "grad_norm": 0.12779347598552704, "learning_rate": 1.576815849663102e-05, "loss": 0.0066, "step": 273930 }, { "epoch": 8.467577424738828, "grad_norm": 0.15565556287765503, "learning_rate": 1.5767694875440442e-05, "loss": 0.007, "step": 273960 }, { "epoch": 8.468504667119985, "grad_norm": 0.15128637850284576, "learning_rate": 1.5767231254249863e-05, "loss": 0.0059, "step": 273990 }, { "epoch": 8.469431909501143, "grad_norm": 0.16651444137096405, "learning_rate": 1.5766767633059285e-05, "loss": 0.0056, "step": 274020 }, { "epoch": 8.470359151882302, "grad_norm": 0.15091320872306824, "learning_rate": 1.5766304011868703e-05, "loss": 0.0061, "step": 274050 }, { "epoch": 8.47128639426346, "grad_norm": 0.11031719297170639, "learning_rate": 1.5765840390678124e-05, "loss": 0.0065, "step": 274080 }, { "epoch": 8.47221363664462, "grad_norm": 0.11276760697364807, "learning_rate": 1.5765376769487546e-05, "loss": 0.0062, "step": 274110 }, { "epoch": 8.473140879025777, "grad_norm": 0.0921366810798645, "learning_rate": 1.5764913148296964e-05, "loss": 0.006, "step": 274140 }, { "epoch": 8.474068121406935, "grad_norm": 0.12925556302070618, "learning_rate": 1.5764449527106386e-05, "loss": 0.0065, "step": 274170 }, { "epoch": 8.474995363788095, "grad_norm": 0.126114159822464, "learning_rate": 1.5763985905915807e-05, "loss": 0.0061, "step": 274200 }, { "epoch": 8.475922606169252, "grad_norm": 0.15657003223896027, "learning_rate": 1.576352228472523e-05, "loss": 0.0064, "step": 274230 }, { "epoch": 8.476849848550412, "grad_norm": 0.13102073967456818, "learning_rate": 1.576305866353465e-05, "loss": 0.0066, "step": 274260 }, { "epoch": 8.47777709093157, "grad_norm": 0.13419267535209656, "learning_rate": 1.576259504234407e-05, "loss": 0.0071, "step": 274290 }, { "epoch": 8.478704333312727, "grad_norm": 0.08968710899353027, "learning_rate": 1.576213142115349e-05, "loss": 0.0064, "step": 274320 }, { "epoch": 8.479631575693887, "grad_norm": 0.1260797083377838, "learning_rate": 1.576166779996291e-05, "loss": 0.0064, "step": 274350 }, { "epoch": 8.480558818075044, "grad_norm": 0.1291249543428421, "learning_rate": 1.5761204178772333e-05, "loss": 0.0066, "step": 274380 }, { "epoch": 8.481486060456204, "grad_norm": 0.14644339680671692, "learning_rate": 1.5760740557581754e-05, "loss": 0.0066, "step": 274410 }, { "epoch": 8.482413302837362, "grad_norm": 0.14494864642620087, "learning_rate": 1.5760276936391176e-05, "loss": 0.0054, "step": 274440 }, { "epoch": 8.48334054521852, "grad_norm": 0.16544635593891144, "learning_rate": 1.5759813315200594e-05, "loss": 0.0059, "step": 274470 }, { "epoch": 8.484267787599679, "grad_norm": 0.10737427324056625, "learning_rate": 1.5759349694010015e-05, "loss": 0.0057, "step": 274500 }, { "epoch": 8.485195029980837, "grad_norm": 0.12936004996299744, "learning_rate": 1.5758886072819437e-05, "loss": 0.007, "step": 274530 }, { "epoch": 8.486122272361996, "grad_norm": 0.11897081881761551, "learning_rate": 1.5758422451628855e-05, "loss": 0.0064, "step": 274560 }, { "epoch": 8.487049514743154, "grad_norm": 0.13398081064224243, "learning_rate": 1.5757958830438277e-05, "loss": 0.0061, "step": 274590 }, { "epoch": 8.487976757124311, "grad_norm": 0.13009604811668396, "learning_rate": 1.5757495209247698e-05, "loss": 0.0068, "step": 274620 }, { "epoch": 8.488903999505471, "grad_norm": 0.08897293359041214, "learning_rate": 1.575703158805712e-05, "loss": 0.0066, "step": 274650 }, { "epoch": 8.489831241886629, "grad_norm": 0.09230709075927734, "learning_rate": 1.575656796686654e-05, "loss": 0.0066, "step": 274680 }, { "epoch": 8.490758484267788, "grad_norm": 0.12108007073402405, "learning_rate": 1.575610434567596e-05, "loss": 0.0061, "step": 274710 }, { "epoch": 8.491685726648946, "grad_norm": 0.07085516303777695, "learning_rate": 1.575564072448538e-05, "loss": 0.0063, "step": 274740 }, { "epoch": 8.492612969030105, "grad_norm": 0.09763732552528381, "learning_rate": 1.5755177103294802e-05, "loss": 0.0064, "step": 274770 }, { "epoch": 8.493540211411263, "grad_norm": 0.11451523005962372, "learning_rate": 1.5754713482104224e-05, "loss": 0.0065, "step": 274800 }, { "epoch": 8.49446745379242, "grad_norm": 0.09537868946790695, "learning_rate": 1.5754249860913645e-05, "loss": 0.0058, "step": 274830 }, { "epoch": 8.49539469617358, "grad_norm": 0.0890483409166336, "learning_rate": 1.5753786239723067e-05, "loss": 0.0063, "step": 274860 }, { "epoch": 8.496321938554738, "grad_norm": 0.07795315980911255, "learning_rate": 1.5753322618532485e-05, "loss": 0.0062, "step": 274890 }, { "epoch": 8.497249180935897, "grad_norm": 0.166154682636261, "learning_rate": 1.5752858997341906e-05, "loss": 0.0054, "step": 274920 }, { "epoch": 8.498176423317055, "grad_norm": 0.09594245254993439, "learning_rate": 1.5752395376151328e-05, "loss": 0.006, "step": 274950 }, { "epoch": 8.499103665698213, "grad_norm": 0.12492644041776657, "learning_rate": 1.5751931754960746e-05, "loss": 0.0061, "step": 274980 }, { "epoch": 8.500030908079372, "grad_norm": 0.09470783919095993, "learning_rate": 1.5751468133770167e-05, "loss": 0.0067, "step": 275010 }, { "epoch": 8.50095815046053, "grad_norm": 0.10381123423576355, "learning_rate": 1.575100451257959e-05, "loss": 0.0063, "step": 275040 }, { "epoch": 8.50188539284169, "grad_norm": 0.1026776060461998, "learning_rate": 1.575054089138901e-05, "loss": 0.0064, "step": 275070 }, { "epoch": 8.502812635222847, "grad_norm": 0.09367235004901886, "learning_rate": 1.5750077270198432e-05, "loss": 0.0064, "step": 275100 }, { "epoch": 8.503739877604005, "grad_norm": 0.1677752137184143, "learning_rate": 1.574961364900785e-05, "loss": 0.0057, "step": 275130 }, { "epoch": 8.504667119985164, "grad_norm": 0.10447489470243454, "learning_rate": 1.574915002781727e-05, "loss": 0.0063, "step": 275160 }, { "epoch": 8.505594362366322, "grad_norm": 0.47723090648651123, "learning_rate": 1.5748686406626693e-05, "loss": 0.0064, "step": 275190 }, { "epoch": 8.506521604747482, "grad_norm": 0.1600102186203003, "learning_rate": 1.5748222785436115e-05, "loss": 0.0059, "step": 275220 }, { "epoch": 8.50744884712864, "grad_norm": 0.0850202739238739, "learning_rate": 1.5747759164245536e-05, "loss": 0.0068, "step": 275250 }, { "epoch": 8.508376089509797, "grad_norm": 0.10967772454023361, "learning_rate": 1.5747295543054958e-05, "loss": 0.0057, "step": 275280 }, { "epoch": 8.509303331890957, "grad_norm": 0.1469334214925766, "learning_rate": 1.5746831921864376e-05, "loss": 0.0069, "step": 275310 }, { "epoch": 8.510230574272114, "grad_norm": 0.08654969930648804, "learning_rate": 1.5746368300673797e-05, "loss": 0.0064, "step": 275340 }, { "epoch": 8.511157816653274, "grad_norm": 0.11497566848993301, "learning_rate": 1.574590467948322e-05, "loss": 0.0059, "step": 275370 }, { "epoch": 8.512085059034431, "grad_norm": 0.1715206652879715, "learning_rate": 1.574544105829264e-05, "loss": 0.0068, "step": 275400 }, { "epoch": 8.51301230141559, "grad_norm": 0.08101295679807663, "learning_rate": 1.5744977437102062e-05, "loss": 0.0061, "step": 275430 }, { "epoch": 8.513939543796749, "grad_norm": 0.11365770548582077, "learning_rate": 1.574451381591148e-05, "loss": 0.006, "step": 275460 }, { "epoch": 8.514866786177906, "grad_norm": 0.09386084228754044, "learning_rate": 1.57440501947209e-05, "loss": 0.0065, "step": 275490 }, { "epoch": 8.515794028559066, "grad_norm": 0.12257953733205795, "learning_rate": 1.5743586573530323e-05, "loss": 0.0065, "step": 275520 }, { "epoch": 8.516721270940224, "grad_norm": 0.10776825994253159, "learning_rate": 1.574312295233974e-05, "loss": 0.0062, "step": 275550 }, { "epoch": 8.517648513321383, "grad_norm": 0.10052712261676788, "learning_rate": 1.5742659331149163e-05, "loss": 0.006, "step": 275580 }, { "epoch": 8.51857575570254, "grad_norm": 0.14293639361858368, "learning_rate": 1.5742195709958584e-05, "loss": 0.0066, "step": 275610 }, { "epoch": 8.519502998083698, "grad_norm": 0.15764698386192322, "learning_rate": 1.5741732088768006e-05, "loss": 0.0064, "step": 275640 }, { "epoch": 8.520430240464858, "grad_norm": 0.07552752643823624, "learning_rate": 1.5741268467577427e-05, "loss": 0.0062, "step": 275670 }, { "epoch": 8.521357482846016, "grad_norm": 0.11511939764022827, "learning_rate": 1.574080484638685e-05, "loss": 0.0062, "step": 275700 }, { "epoch": 8.522284725227175, "grad_norm": 0.11127161234617233, "learning_rate": 1.5740341225196267e-05, "loss": 0.0062, "step": 275730 }, { "epoch": 8.523211967608333, "grad_norm": 0.11097081750631332, "learning_rate": 1.5739877604005688e-05, "loss": 0.0069, "step": 275760 }, { "epoch": 8.52413920998949, "grad_norm": 0.11071518063545227, "learning_rate": 1.573941398281511e-05, "loss": 0.0058, "step": 275790 }, { "epoch": 8.52506645237065, "grad_norm": 0.11248951405286789, "learning_rate": 1.573895036162453e-05, "loss": 0.0064, "step": 275820 }, { "epoch": 8.525993694751808, "grad_norm": 0.10003194212913513, "learning_rate": 1.5738486740433953e-05, "loss": 0.006, "step": 275850 }, { "epoch": 8.526920937132967, "grad_norm": 0.1204388290643692, "learning_rate": 1.573802311924337e-05, "loss": 0.0063, "step": 275880 }, { "epoch": 8.527848179514125, "grad_norm": 0.1342908889055252, "learning_rate": 1.5737559498052792e-05, "loss": 0.0063, "step": 275910 }, { "epoch": 8.528775421895283, "grad_norm": 0.1388852447271347, "learning_rate": 1.5737095876862214e-05, "loss": 0.0067, "step": 275940 }, { "epoch": 8.529702664276442, "grad_norm": 0.151114359498024, "learning_rate": 1.5736632255671632e-05, "loss": 0.006, "step": 275970 }, { "epoch": 8.5306299066576, "grad_norm": 0.09163374453783035, "learning_rate": 1.5736168634481053e-05, "loss": 0.0067, "step": 276000 }, { "epoch": 8.53155714903876, "grad_norm": 0.11336281150579453, "learning_rate": 1.5735705013290475e-05, "loss": 0.0064, "step": 276030 }, { "epoch": 8.532484391419917, "grad_norm": 0.10017111897468567, "learning_rate": 1.5735241392099896e-05, "loss": 0.0067, "step": 276060 }, { "epoch": 8.533411633801075, "grad_norm": 0.11559668928384781, "learning_rate": 1.5734777770909318e-05, "loss": 0.0061, "step": 276090 }, { "epoch": 8.534338876182234, "grad_norm": 0.14469435811042786, "learning_rate": 1.5734314149718736e-05, "loss": 0.0063, "step": 276120 }, { "epoch": 8.535266118563392, "grad_norm": 0.1255582571029663, "learning_rate": 1.5733850528528158e-05, "loss": 0.006, "step": 276150 }, { "epoch": 8.536193360944552, "grad_norm": 0.1057855412364006, "learning_rate": 1.573338690733758e-05, "loss": 0.0059, "step": 276180 }, { "epoch": 8.53712060332571, "grad_norm": 0.1239301860332489, "learning_rate": 1.5732923286147e-05, "loss": 0.0062, "step": 276210 }, { "epoch": 8.538047845706867, "grad_norm": 0.1431579738855362, "learning_rate": 1.5732459664956422e-05, "loss": 0.0062, "step": 276240 }, { "epoch": 8.538975088088026, "grad_norm": 0.11725557595491409, "learning_rate": 1.5731996043765844e-05, "loss": 0.006, "step": 276270 }, { "epoch": 8.539902330469184, "grad_norm": 0.10825157165527344, "learning_rate": 1.5731532422575262e-05, "loss": 0.0068, "step": 276300 }, { "epoch": 8.540829572850344, "grad_norm": 0.1280461549758911, "learning_rate": 1.5731068801384683e-05, "loss": 0.0056, "step": 276330 }, { "epoch": 8.541756815231501, "grad_norm": 0.12831909954547882, "learning_rate": 1.5730605180194105e-05, "loss": 0.0063, "step": 276360 }, { "epoch": 8.54268405761266, "grad_norm": 0.1638469398021698, "learning_rate": 1.5730141559003523e-05, "loss": 0.0064, "step": 276390 }, { "epoch": 8.543611299993819, "grad_norm": 0.08734361827373505, "learning_rate": 1.5729677937812944e-05, "loss": 0.0061, "step": 276420 }, { "epoch": 8.544538542374976, "grad_norm": 0.11932078748941422, "learning_rate": 1.5729214316622366e-05, "loss": 0.0065, "step": 276450 }, { "epoch": 8.545465784756136, "grad_norm": 0.0890965387225151, "learning_rate": 1.5728750695431787e-05, "loss": 0.0067, "step": 276480 }, { "epoch": 8.546393027137293, "grad_norm": 0.07268160581588745, "learning_rate": 1.572828707424121e-05, "loss": 0.0059, "step": 276510 }, { "epoch": 8.547320269518453, "grad_norm": 0.10741150379180908, "learning_rate": 1.5727823453050627e-05, "loss": 0.0063, "step": 276540 }, { "epoch": 8.54824751189961, "grad_norm": 0.12374240905046463, "learning_rate": 1.572735983186005e-05, "loss": 0.0059, "step": 276570 }, { "epoch": 8.549174754280768, "grad_norm": 0.08193308115005493, "learning_rate": 1.572689621066947e-05, "loss": 0.0058, "step": 276600 }, { "epoch": 8.550101996661928, "grad_norm": 0.07414083182811737, "learning_rate": 1.572643258947889e-05, "loss": 0.0064, "step": 276630 }, { "epoch": 8.551029239043086, "grad_norm": 0.1181345209479332, "learning_rate": 1.5725968968288313e-05, "loss": 0.0064, "step": 276660 }, { "epoch": 8.551956481424245, "grad_norm": 0.07395754754543304, "learning_rate": 1.5725505347097735e-05, "loss": 0.006, "step": 276690 }, { "epoch": 8.552883723805403, "grad_norm": 0.11508386582136154, "learning_rate": 1.5725041725907153e-05, "loss": 0.0065, "step": 276720 }, { "epoch": 8.55381096618656, "grad_norm": 0.12891344726085663, "learning_rate": 1.5724578104716574e-05, "loss": 0.0062, "step": 276750 }, { "epoch": 8.55473820856772, "grad_norm": 0.04917636513710022, "learning_rate": 1.5724114483525996e-05, "loss": 0.0066, "step": 276780 }, { "epoch": 8.555665450948878, "grad_norm": 0.134683296084404, "learning_rate": 1.5723650862335417e-05, "loss": 0.0068, "step": 276810 }, { "epoch": 8.556592693330037, "grad_norm": 0.05612410604953766, "learning_rate": 1.572318724114484e-05, "loss": 0.0061, "step": 276840 }, { "epoch": 8.557519935711195, "grad_norm": 0.0972750261425972, "learning_rate": 1.5722723619954257e-05, "loss": 0.0059, "step": 276870 }, { "epoch": 8.558447178092353, "grad_norm": 0.07606451958417892, "learning_rate": 1.5722259998763678e-05, "loss": 0.006, "step": 276900 }, { "epoch": 8.559374420473512, "grad_norm": 0.11803264915943146, "learning_rate": 1.57217963775731e-05, "loss": 0.0066, "step": 276930 }, { "epoch": 8.56030166285467, "grad_norm": 0.1440427452325821, "learning_rate": 1.5721332756382518e-05, "loss": 0.006, "step": 276960 }, { "epoch": 8.56122890523583, "grad_norm": 0.16299022734165192, "learning_rate": 1.572086913519194e-05, "loss": 0.0063, "step": 276990 }, { "epoch": 8.562156147616987, "grad_norm": 0.14708907902240753, "learning_rate": 1.572040551400136e-05, "loss": 0.0062, "step": 277020 }, { "epoch": 8.563083389998145, "grad_norm": 0.12536047399044037, "learning_rate": 1.5719941892810782e-05, "loss": 0.006, "step": 277050 }, { "epoch": 8.564010632379304, "grad_norm": 0.14032232761383057, "learning_rate": 1.571949372565989e-05, "loss": 0.0066, "step": 277080 }, { "epoch": 8.564937874760462, "grad_norm": 0.06813095510005951, "learning_rate": 1.5719030104469312e-05, "loss": 0.0061, "step": 277110 }, { "epoch": 8.565865117141621, "grad_norm": 0.10227929055690765, "learning_rate": 1.571856648327873e-05, "loss": 0.0064, "step": 277140 }, { "epoch": 8.566792359522779, "grad_norm": 0.1318812519311905, "learning_rate": 1.571810286208815e-05, "loss": 0.0064, "step": 277170 }, { "epoch": 8.567719601903939, "grad_norm": 0.17522384226322174, "learning_rate": 1.571763924089757e-05, "loss": 0.0065, "step": 277200 }, { "epoch": 8.568646844285096, "grad_norm": 0.10704502463340759, "learning_rate": 1.571717561970699e-05, "loss": 0.0066, "step": 277230 }, { "epoch": 8.569574086666254, "grad_norm": 0.12394252419471741, "learning_rate": 1.5716711998516413e-05, "loss": 0.0067, "step": 277260 }, { "epoch": 8.570501329047413, "grad_norm": 0.11456389725208282, "learning_rate": 1.5716248377325834e-05, "loss": 0.0063, "step": 277290 }, { "epoch": 8.571428571428571, "grad_norm": 0.08367089182138443, "learning_rate": 1.5715784756135256e-05, "loss": 0.0066, "step": 277320 }, { "epoch": 8.57235581380973, "grad_norm": 0.14867372810840607, "learning_rate": 1.5715321134944677e-05, "loss": 0.0057, "step": 277350 }, { "epoch": 8.573283056190888, "grad_norm": 0.09458665549755096, "learning_rate": 1.5714857513754095e-05, "loss": 0.006, "step": 277380 }, { "epoch": 8.574210298572046, "grad_norm": 0.08058452606201172, "learning_rate": 1.5714393892563517e-05, "loss": 0.0069, "step": 277410 }, { "epoch": 8.575137540953206, "grad_norm": 0.10216277092695236, "learning_rate": 1.5713930271372938e-05, "loss": 0.0059, "step": 277440 }, { "epoch": 8.576064783334363, "grad_norm": 0.12275426834821701, "learning_rate": 1.571346665018236e-05, "loss": 0.0064, "step": 277470 }, { "epoch": 8.576992025715523, "grad_norm": 0.13414989411830902, "learning_rate": 1.571300302899178e-05, "loss": 0.006, "step": 277500 }, { "epoch": 8.57791926809668, "grad_norm": 0.09577585011720657, "learning_rate": 1.57125394078012e-05, "loss": 0.006, "step": 277530 }, { "epoch": 8.578846510477838, "grad_norm": 0.09738130122423172, "learning_rate": 1.571207578661062e-05, "loss": 0.0061, "step": 277560 }, { "epoch": 8.579773752858998, "grad_norm": 0.10583069920539856, "learning_rate": 1.5711612165420042e-05, "loss": 0.0064, "step": 277590 }, { "epoch": 8.580700995240155, "grad_norm": 0.05583531782031059, "learning_rate": 1.5711163998269147e-05, "loss": 0.0066, "step": 277620 }, { "epoch": 8.581628237621315, "grad_norm": 0.11929042637348175, "learning_rate": 1.571070037707857e-05, "loss": 0.0067, "step": 277650 }, { "epoch": 8.582555480002473, "grad_norm": 0.08446977287530899, "learning_rate": 1.571023675588799e-05, "loss": 0.0063, "step": 277680 }, { "epoch": 8.58348272238363, "grad_norm": 0.10900907218456268, "learning_rate": 1.570977313469741e-05, "loss": 0.0062, "step": 277710 }, { "epoch": 8.58440996476479, "grad_norm": 0.11914321780204773, "learning_rate": 1.5709324967546516e-05, "loss": 0.006, "step": 277740 }, { "epoch": 8.585337207145948, "grad_norm": 0.15381115674972534, "learning_rate": 1.570886134635594e-05, "loss": 0.007, "step": 277770 }, { "epoch": 8.586264449527107, "grad_norm": 0.12410711497068405, "learning_rate": 1.570839772516536e-05, "loss": 0.0063, "step": 277800 }, { "epoch": 8.587191691908265, "grad_norm": 0.12580150365829468, "learning_rate": 1.570793410397478e-05, "loss": 0.007, "step": 277830 }, { "epoch": 8.588118934289422, "grad_norm": 0.1410926729440689, "learning_rate": 1.5707470482784202e-05, "loss": 0.0061, "step": 277860 }, { "epoch": 8.589046176670582, "grad_norm": 0.08036790788173676, "learning_rate": 1.570700686159362e-05, "loss": 0.0065, "step": 277890 }, { "epoch": 8.58997341905174, "grad_norm": 0.13892899453639984, "learning_rate": 1.570654324040304e-05, "loss": 0.0064, "step": 277920 }, { "epoch": 8.590900661432899, "grad_norm": 0.11874902248382568, "learning_rate": 1.5706079619212463e-05, "loss": 0.0067, "step": 277950 }, { "epoch": 8.591827903814057, "grad_norm": 0.07475731521844864, "learning_rate": 1.5705615998021885e-05, "loss": 0.0057, "step": 277980 }, { "epoch": 8.592755146195216, "grad_norm": 0.10230797529220581, "learning_rate": 1.5705152376831306e-05, "loss": 0.0062, "step": 278010 }, { "epoch": 8.593682388576374, "grad_norm": 0.10755419731140137, "learning_rate": 1.5704688755640728e-05, "loss": 0.0065, "step": 278040 }, { "epoch": 8.594609630957532, "grad_norm": 0.07185583561658859, "learning_rate": 1.5704225134450146e-05, "loss": 0.0067, "step": 278070 }, { "epoch": 8.595536873338691, "grad_norm": 0.10317522287368774, "learning_rate": 1.5703761513259567e-05, "loss": 0.0063, "step": 278100 }, { "epoch": 8.596464115719849, "grad_norm": 0.07995908707380295, "learning_rate": 1.570329789206899e-05, "loss": 0.0066, "step": 278130 }, { "epoch": 8.597391358101008, "grad_norm": 0.11608703434467316, "learning_rate": 1.570283427087841e-05, "loss": 0.0066, "step": 278160 }, { "epoch": 8.598318600482166, "grad_norm": 0.1174299567937851, "learning_rate": 1.5702370649687832e-05, "loss": 0.0062, "step": 278190 }, { "epoch": 8.599245842863324, "grad_norm": 0.11813141405582428, "learning_rate": 1.570190702849725e-05, "loss": 0.0061, "step": 278220 }, { "epoch": 8.600173085244483, "grad_norm": 0.17393508553504944, "learning_rate": 1.570144340730667e-05, "loss": 0.0062, "step": 278250 }, { "epoch": 8.601100327625641, "grad_norm": 0.17133456468582153, "learning_rate": 1.5700979786116093e-05, "loss": 0.0066, "step": 278280 }, { "epoch": 8.6020275700068, "grad_norm": 0.15029208362102509, "learning_rate": 1.5700531618965197e-05, "loss": 0.0067, "step": 278310 }, { "epoch": 8.602954812387958, "grad_norm": 0.11255563050508499, "learning_rate": 1.570006799777462e-05, "loss": 0.0064, "step": 278340 }, { "epoch": 8.603882054769116, "grad_norm": 0.11670166254043579, "learning_rate": 1.569960437658404e-05, "loss": 0.0057, "step": 278370 }, { "epoch": 8.604809297150275, "grad_norm": 0.12497584521770477, "learning_rate": 1.5699140755393462e-05, "loss": 0.0065, "step": 278400 }, { "epoch": 8.605736539531433, "grad_norm": 0.11782553791999817, "learning_rate": 1.5698677134202883e-05, "loss": 0.0062, "step": 278430 }, { "epoch": 8.606663781912593, "grad_norm": 0.16947484016418457, "learning_rate": 1.5698213513012305e-05, "loss": 0.0059, "step": 278460 }, { "epoch": 8.60759102429375, "grad_norm": 0.11245585978031158, "learning_rate": 1.5697749891821723e-05, "loss": 0.0064, "step": 278490 }, { "epoch": 8.608518266674908, "grad_norm": 0.10518653690814972, "learning_rate": 1.5697286270631145e-05, "loss": 0.0066, "step": 278520 }, { "epoch": 8.609445509056068, "grad_norm": 0.15360428392887115, "learning_rate": 1.5696822649440563e-05, "loss": 0.0055, "step": 278550 }, { "epoch": 8.610372751437225, "grad_norm": 0.09494733810424805, "learning_rate": 1.5696359028249984e-05, "loss": 0.0058, "step": 278580 }, { "epoch": 8.611299993818385, "grad_norm": 0.1090579405426979, "learning_rate": 1.5695895407059406e-05, "loss": 0.0059, "step": 278610 }, { "epoch": 8.612227236199542, "grad_norm": 0.09903322160243988, "learning_rate": 1.5695431785868827e-05, "loss": 0.0065, "step": 278640 }, { "epoch": 8.6131544785807, "grad_norm": 0.14059312641620636, "learning_rate": 1.5694983618717935e-05, "loss": 0.0071, "step": 278670 }, { "epoch": 8.61408172096186, "grad_norm": 0.08357676863670349, "learning_rate": 1.5694519997527357e-05, "loss": 0.0062, "step": 278700 }, { "epoch": 8.615008963343017, "grad_norm": 0.11644619703292847, "learning_rate": 1.5694056376336775e-05, "loss": 0.0063, "step": 278730 }, { "epoch": 8.615936205724177, "grad_norm": 0.13085460662841797, "learning_rate": 1.5693592755146196e-05, "loss": 0.0062, "step": 278760 }, { "epoch": 8.616863448105335, "grad_norm": 0.1611739844083786, "learning_rate": 1.5693129133955618e-05, "loss": 0.0065, "step": 278790 }, { "epoch": 8.617790690486494, "grad_norm": 0.11098770797252655, "learning_rate": 1.5692665512765036e-05, "loss": 0.0059, "step": 278820 }, { "epoch": 8.618717932867652, "grad_norm": 0.09294576197862625, "learning_rate": 1.5692201891574457e-05, "loss": 0.0057, "step": 278850 }, { "epoch": 8.61964517524881, "grad_norm": 0.14266912639141083, "learning_rate": 1.569173827038388e-05, "loss": 0.0059, "step": 278880 }, { "epoch": 8.620572417629969, "grad_norm": 0.14333784580230713, "learning_rate": 1.56912746491933e-05, "loss": 0.007, "step": 278910 }, { "epoch": 8.621499660011127, "grad_norm": 0.06605474650859833, "learning_rate": 1.5690811028002722e-05, "loss": 0.0065, "step": 278940 }, { "epoch": 8.622426902392286, "grad_norm": 0.1396818310022354, "learning_rate": 1.5690347406812143e-05, "loss": 0.0057, "step": 278970 }, { "epoch": 8.623354144773444, "grad_norm": 0.09795329719781876, "learning_rate": 1.568988378562156e-05, "loss": 0.0062, "step": 279000 }, { "epoch": 8.624281387154602, "grad_norm": 0.0972876101732254, "learning_rate": 1.5689420164430983e-05, "loss": 0.0063, "step": 279030 }, { "epoch": 8.625208629535761, "grad_norm": 0.10931547731161118, "learning_rate": 1.5688956543240405e-05, "loss": 0.0058, "step": 279060 }, { "epoch": 8.626135871916919, "grad_norm": 0.14566394686698914, "learning_rate": 1.5688492922049826e-05, "loss": 0.0064, "step": 279090 }, { "epoch": 8.627063114298078, "grad_norm": 0.1459396481513977, "learning_rate": 1.5688029300859248e-05, "loss": 0.0059, "step": 279120 }, { "epoch": 8.627990356679236, "grad_norm": 0.07827354222536087, "learning_rate": 1.5687565679668666e-05, "loss": 0.0067, "step": 279150 }, { "epoch": 8.628917599060394, "grad_norm": 0.08644790202379227, "learning_rate": 1.5687102058478087e-05, "loss": 0.0061, "step": 279180 }, { "epoch": 8.629844841441553, "grad_norm": 0.14820219576358795, "learning_rate": 1.568663843728751e-05, "loss": 0.0059, "step": 279210 }, { "epoch": 8.63077208382271, "grad_norm": 0.3089251220226288, "learning_rate": 1.568617481609693e-05, "loss": 0.0065, "step": 279240 }, { "epoch": 8.63169932620387, "grad_norm": 0.07495931535959244, "learning_rate": 1.568571119490635e-05, "loss": 0.0058, "step": 279270 }, { "epoch": 8.632626568585028, "grad_norm": 0.14639854431152344, "learning_rate": 1.5685247573715773e-05, "loss": 0.0063, "step": 279300 }, { "epoch": 8.633553810966186, "grad_norm": 0.11843100190162659, "learning_rate": 1.568478395252519e-05, "loss": 0.0057, "step": 279330 }, { "epoch": 8.634481053347345, "grad_norm": 0.1287866234779358, "learning_rate": 1.5684320331334613e-05, "loss": 0.0058, "step": 279360 }, { "epoch": 8.635408295728503, "grad_norm": 0.1474936306476593, "learning_rate": 1.568385671014403e-05, "loss": 0.0062, "step": 279390 }, { "epoch": 8.636335538109662, "grad_norm": 0.07772532850503922, "learning_rate": 1.5683393088953452e-05, "loss": 0.0059, "step": 279420 }, { "epoch": 8.63726278049082, "grad_norm": 0.11577235162258148, "learning_rate": 1.5682929467762874e-05, "loss": 0.0061, "step": 279450 }, { "epoch": 8.638190022871978, "grad_norm": 0.1571439504623413, "learning_rate": 1.5682465846572295e-05, "loss": 0.0068, "step": 279480 }, { "epoch": 8.639117265253137, "grad_norm": 0.13748690485954285, "learning_rate": 1.5682002225381717e-05, "loss": 0.0057, "step": 279510 }, { "epoch": 8.640044507634295, "grad_norm": 0.1534804254770279, "learning_rate": 1.568153860419114e-05, "loss": 0.0066, "step": 279540 }, { "epoch": 8.640971750015455, "grad_norm": 0.18024161458015442, "learning_rate": 1.5681074983000557e-05, "loss": 0.0067, "step": 279570 }, { "epoch": 8.641898992396612, "grad_norm": 0.10741374641656876, "learning_rate": 1.5680611361809978e-05, "loss": 0.0064, "step": 279600 }, { "epoch": 8.642826234777772, "grad_norm": 0.11008093506097794, "learning_rate": 1.56801477406194e-05, "loss": 0.0061, "step": 279630 }, { "epoch": 8.64375347715893, "grad_norm": 0.13094478845596313, "learning_rate": 1.567968411942882e-05, "loss": 0.0065, "step": 279660 }, { "epoch": 8.644680719540087, "grad_norm": 0.09677580744028091, "learning_rate": 1.5679220498238243e-05, "loss": 0.0066, "step": 279690 }, { "epoch": 8.645607961921247, "grad_norm": 0.15896736085414886, "learning_rate": 1.567875687704766e-05, "loss": 0.0057, "step": 279720 }, { "epoch": 8.646535204302404, "grad_norm": 0.11641155928373337, "learning_rate": 1.5678293255857082e-05, "loss": 0.007, "step": 279750 }, { "epoch": 8.647462446683564, "grad_norm": 0.12486547976732254, "learning_rate": 1.5677829634666504e-05, "loss": 0.0058, "step": 279780 }, { "epoch": 8.648389689064722, "grad_norm": 0.11587054282426834, "learning_rate": 1.5677366013475922e-05, "loss": 0.0065, "step": 279810 }, { "epoch": 8.64931693144588, "grad_norm": 0.10801964998245239, "learning_rate": 1.5676902392285343e-05, "loss": 0.0057, "step": 279840 }, { "epoch": 8.650244173827039, "grad_norm": 0.14684702455997467, "learning_rate": 1.5676438771094765e-05, "loss": 0.0059, "step": 279870 }, { "epoch": 8.651171416208197, "grad_norm": 0.11543174833059311, "learning_rate": 1.5675975149904186e-05, "loss": 0.0066, "step": 279900 }, { "epoch": 8.652098658589356, "grad_norm": 0.08777501434087753, "learning_rate": 1.5675511528713608e-05, "loss": 0.0063, "step": 279930 }, { "epoch": 8.653025900970514, "grad_norm": 0.09090646356344223, "learning_rate": 1.567504790752303e-05, "loss": 0.0068, "step": 279960 }, { "epoch": 8.653953143351671, "grad_norm": 0.08094088733196259, "learning_rate": 1.5674584286332448e-05, "loss": 0.0062, "step": 279990 }, { "epoch": 8.654880385732831, "grad_norm": 0.07306468486785889, "learning_rate": 1.567412066514187e-05, "loss": 0.0062, "step": 280020 }, { "epoch": 8.655807628113989, "grad_norm": 0.10146187245845795, "learning_rate": 1.567365704395129e-05, "loss": 0.0071, "step": 280050 }, { "epoch": 8.656734870495148, "grad_norm": 0.09982029348611832, "learning_rate": 1.5673193422760712e-05, "loss": 0.0061, "step": 280080 }, { "epoch": 8.657662112876306, "grad_norm": 0.07766479253768921, "learning_rate": 1.5672729801570134e-05, "loss": 0.0062, "step": 280110 }, { "epoch": 8.658589355257464, "grad_norm": 0.1053607240319252, "learning_rate": 1.567226618037955e-05, "loss": 0.0061, "step": 280140 }, { "epoch": 8.659516597638623, "grad_norm": 0.11345262825489044, "learning_rate": 1.5671802559188973e-05, "loss": 0.0059, "step": 280170 }, { "epoch": 8.66044384001978, "grad_norm": 0.08995447307825089, "learning_rate": 1.5671338937998395e-05, "loss": 0.006, "step": 280200 }, { "epoch": 8.66137108240094, "grad_norm": 0.10531699657440186, "learning_rate": 1.5670875316807813e-05, "loss": 0.0068, "step": 280230 }, { "epoch": 8.662298324782098, "grad_norm": 0.14145466685295105, "learning_rate": 1.5670411695617234e-05, "loss": 0.0059, "step": 280260 }, { "epoch": 8.663225567163256, "grad_norm": 0.09410306066274643, "learning_rate": 1.5669948074426656e-05, "loss": 0.0063, "step": 280290 }, { "epoch": 8.664152809544415, "grad_norm": 0.13337092101573944, "learning_rate": 1.5669484453236077e-05, "loss": 0.006, "step": 280320 }, { "epoch": 8.665080051925573, "grad_norm": 0.08646559715270996, "learning_rate": 1.56690208320455e-05, "loss": 0.006, "step": 280350 }, { "epoch": 8.666007294306732, "grad_norm": 0.11134910583496094, "learning_rate": 1.5668557210854917e-05, "loss": 0.0066, "step": 280380 }, { "epoch": 8.66693453668789, "grad_norm": 0.12203511595726013, "learning_rate": 1.566809358966434e-05, "loss": 0.0064, "step": 280410 }, { "epoch": 8.66786177906905, "grad_norm": 0.15194490551948547, "learning_rate": 1.566762996847376e-05, "loss": 0.0056, "step": 280440 }, { "epoch": 8.668789021450207, "grad_norm": 0.18552380800247192, "learning_rate": 1.566716634728318e-05, "loss": 0.0057, "step": 280470 }, { "epoch": 8.669716263831365, "grad_norm": 0.16712048649787903, "learning_rate": 1.5666702726092603e-05, "loss": 0.0064, "step": 280500 }, { "epoch": 8.670643506212524, "grad_norm": 0.12864075601100922, "learning_rate": 1.5666239104902024e-05, "loss": 0.0061, "step": 280530 }, { "epoch": 8.671570748593682, "grad_norm": 0.0788455456495285, "learning_rate": 1.5665775483711443e-05, "loss": 0.006, "step": 280560 }, { "epoch": 8.672497990974842, "grad_norm": 0.17726320028305054, "learning_rate": 1.5665311862520864e-05, "loss": 0.006, "step": 280590 }, { "epoch": 8.673425233356, "grad_norm": 0.1238025426864624, "learning_rate": 1.5664848241330286e-05, "loss": 0.0065, "step": 280620 }, { "epoch": 8.674352475737157, "grad_norm": 0.09334718436002731, "learning_rate": 1.5664384620139707e-05, "loss": 0.006, "step": 280650 }, { "epoch": 8.675279718118317, "grad_norm": 0.17102377116680145, "learning_rate": 1.566392099894913e-05, "loss": 0.0069, "step": 280680 }, { "epoch": 8.676206960499474, "grad_norm": 0.08316673338413239, "learning_rate": 1.5663457377758547e-05, "loss": 0.0057, "step": 280710 }, { "epoch": 8.677134202880634, "grad_norm": 0.1192973256111145, "learning_rate": 1.5662993756567968e-05, "loss": 0.0057, "step": 280740 }, { "epoch": 8.678061445261791, "grad_norm": 0.15000921487808228, "learning_rate": 1.566253013537739e-05, "loss": 0.0063, "step": 280770 }, { "epoch": 8.67898868764295, "grad_norm": 0.13895457983016968, "learning_rate": 1.5662066514186808e-05, "loss": 0.0066, "step": 280800 }, { "epoch": 8.679915930024109, "grad_norm": 0.11433397978544235, "learning_rate": 1.566160289299623e-05, "loss": 0.0061, "step": 280830 }, { "epoch": 8.680843172405266, "grad_norm": 0.06768877059221268, "learning_rate": 1.566113927180565e-05, "loss": 0.0065, "step": 280860 }, { "epoch": 8.681770414786426, "grad_norm": 0.11233347654342651, "learning_rate": 1.5660675650615072e-05, "loss": 0.0058, "step": 280890 }, { "epoch": 8.682697657167584, "grad_norm": 0.09571659564971924, "learning_rate": 1.5660212029424494e-05, "loss": 0.0058, "step": 280920 }, { "epoch": 8.683624899548741, "grad_norm": 0.1250942349433899, "learning_rate": 1.5659748408233915e-05, "loss": 0.0063, "step": 280950 }, { "epoch": 8.6845521419299, "grad_norm": 0.10099367052316666, "learning_rate": 1.5659284787043333e-05, "loss": 0.0064, "step": 280980 }, { "epoch": 8.685479384311058, "grad_norm": 0.10189837962388992, "learning_rate": 1.5658821165852755e-05, "loss": 0.007, "step": 281010 }, { "epoch": 8.686406626692218, "grad_norm": 0.10387793928384781, "learning_rate": 1.5658357544662176e-05, "loss": 0.0061, "step": 281040 }, { "epoch": 8.687333869073376, "grad_norm": 0.13288697600364685, "learning_rate": 1.5657893923471598e-05, "loss": 0.0061, "step": 281070 }, { "epoch": 8.688261111454533, "grad_norm": 0.11083001643419266, "learning_rate": 1.565743030228102e-05, "loss": 0.0067, "step": 281100 }, { "epoch": 8.689188353835693, "grad_norm": 0.08948743343353271, "learning_rate": 1.5656966681090438e-05, "loss": 0.0063, "step": 281130 }, { "epoch": 8.69011559621685, "grad_norm": 0.12247352302074432, "learning_rate": 1.565650305989986e-05, "loss": 0.0067, "step": 281160 }, { "epoch": 8.69104283859801, "grad_norm": 0.10123012959957123, "learning_rate": 1.565603943870928e-05, "loss": 0.0063, "step": 281190 }, { "epoch": 8.691970080979168, "grad_norm": 0.15148553252220154, "learning_rate": 1.56555758175187e-05, "loss": 0.0053, "step": 281220 }, { "epoch": 8.692897323360327, "grad_norm": 0.1391536444425583, "learning_rate": 1.565511219632812e-05, "loss": 0.0062, "step": 281250 }, { "epoch": 8.693824565741485, "grad_norm": 0.09842754900455475, "learning_rate": 1.5654648575137542e-05, "loss": 0.0059, "step": 281280 }, { "epoch": 8.694751808122643, "grad_norm": 0.17379942536354065, "learning_rate": 1.5654184953946963e-05, "loss": 0.0059, "step": 281310 }, { "epoch": 8.695679050503802, "grad_norm": 0.12652193009853363, "learning_rate": 1.5653721332756385e-05, "loss": 0.006, "step": 281340 }, { "epoch": 8.69660629288496, "grad_norm": 0.09738149493932724, "learning_rate": 1.5653257711565803e-05, "loss": 0.006, "step": 281370 }, { "epoch": 8.69753353526612, "grad_norm": 0.09676806628704071, "learning_rate": 1.5652794090375224e-05, "loss": 0.0058, "step": 281400 }, { "epoch": 8.698460777647277, "grad_norm": 0.14041399955749512, "learning_rate": 1.5652330469184646e-05, "loss": 0.0062, "step": 281430 }, { "epoch": 8.699388020028435, "grad_norm": 0.1268707513809204, "learning_rate": 1.5651866847994067e-05, "loss": 0.0064, "step": 281460 }, { "epoch": 8.700315262409594, "grad_norm": 0.09624163061380386, "learning_rate": 1.565140322680349e-05, "loss": 0.0065, "step": 281490 }, { "epoch": 8.701242504790752, "grad_norm": 0.12123564630746841, "learning_rate": 1.565093960561291e-05, "loss": 0.006, "step": 281520 }, { "epoch": 8.702169747171911, "grad_norm": 0.13964243233203888, "learning_rate": 1.565047598442233e-05, "loss": 0.0065, "step": 281550 }, { "epoch": 8.70309698955307, "grad_norm": 0.10834562033414841, "learning_rate": 1.565001236323175e-05, "loss": 0.006, "step": 281580 }, { "epoch": 8.704024231934227, "grad_norm": 0.10420635342597961, "learning_rate": 1.564954874204117e-05, "loss": 0.0064, "step": 281610 }, { "epoch": 8.704951474315386, "grad_norm": 0.08915404230356216, "learning_rate": 1.564908512085059e-05, "loss": 0.0059, "step": 281640 }, { "epoch": 8.705878716696544, "grad_norm": 0.11915791779756546, "learning_rate": 1.564862149966001e-05, "loss": 0.0065, "step": 281670 }, { "epoch": 8.706805959077704, "grad_norm": 0.07844915986061096, "learning_rate": 1.5648157878469433e-05, "loss": 0.006, "step": 281700 }, { "epoch": 8.707733201458861, "grad_norm": 0.10274390876293182, "learning_rate": 1.5647694257278854e-05, "loss": 0.0072, "step": 281730 }, { "epoch": 8.708660443840019, "grad_norm": 0.10794869065284729, "learning_rate": 1.5647230636088276e-05, "loss": 0.0061, "step": 281760 }, { "epoch": 8.709587686221179, "grad_norm": 0.08353573083877563, "learning_rate": 1.5646767014897694e-05, "loss": 0.0061, "step": 281790 }, { "epoch": 8.710514928602336, "grad_norm": 0.14798586070537567, "learning_rate": 1.5646303393707115e-05, "loss": 0.006, "step": 281820 }, { "epoch": 8.711442170983496, "grad_norm": 0.14611677825450897, "learning_rate": 1.5645839772516537e-05, "loss": 0.0061, "step": 281850 }, { "epoch": 8.712369413364653, "grad_norm": 0.0816497653722763, "learning_rate": 1.564537615132596e-05, "loss": 0.006, "step": 281880 }, { "epoch": 8.713296655745811, "grad_norm": 0.12666964530944824, "learning_rate": 1.564491253013538e-05, "loss": 0.0066, "step": 281910 }, { "epoch": 8.71422389812697, "grad_norm": 0.08704613149166107, "learning_rate": 1.56444489089448e-05, "loss": 0.0061, "step": 281940 }, { "epoch": 8.715151140508128, "grad_norm": 0.11301422864198685, "learning_rate": 1.564398528775422e-05, "loss": 0.0059, "step": 281970 }, { "epoch": 8.716078382889288, "grad_norm": 0.11225921660661697, "learning_rate": 1.564352166656364e-05, "loss": 0.0065, "step": 282000 }, { "epoch": 8.717005625270446, "grad_norm": 0.19803689420223236, "learning_rate": 1.564305804537306e-05, "loss": 0.0065, "step": 282030 }, { "epoch": 8.717932867651605, "grad_norm": 0.09548771381378174, "learning_rate": 1.5642594424182484e-05, "loss": 0.0064, "step": 282060 }, { "epoch": 8.718860110032763, "grad_norm": 0.17417272925376892, "learning_rate": 1.5642130802991905e-05, "loss": 0.0063, "step": 282090 }, { "epoch": 8.71978735241392, "grad_norm": 0.09655600041151047, "learning_rate": 1.5641667181801324e-05, "loss": 0.0059, "step": 282120 }, { "epoch": 8.72071459479508, "grad_norm": 0.11751247197389603, "learning_rate": 1.5641203560610745e-05, "loss": 0.0062, "step": 282150 }, { "epoch": 8.721641837176238, "grad_norm": 0.14304716885089874, "learning_rate": 1.5640739939420167e-05, "loss": 0.0067, "step": 282180 }, { "epoch": 8.722569079557397, "grad_norm": 0.12479252368211746, "learning_rate": 1.5640276318229585e-05, "loss": 0.0061, "step": 282210 }, { "epoch": 8.723496321938555, "grad_norm": 0.14528398215770721, "learning_rate": 1.5639812697039006e-05, "loss": 0.006, "step": 282240 }, { "epoch": 8.724423564319713, "grad_norm": 0.1121467649936676, "learning_rate": 1.5639349075848428e-05, "loss": 0.0064, "step": 282270 }, { "epoch": 8.725350806700872, "grad_norm": 0.10671243816614151, "learning_rate": 1.563888545465785e-05, "loss": 0.0061, "step": 282300 }, { "epoch": 8.72627804908203, "grad_norm": 0.11876792460680008, "learning_rate": 1.563842183346727e-05, "loss": 0.0071, "step": 282330 }, { "epoch": 8.72720529146319, "grad_norm": 0.10143280029296875, "learning_rate": 1.563795821227669e-05, "loss": 0.0064, "step": 282360 }, { "epoch": 8.728132533844347, "grad_norm": 0.14669965207576752, "learning_rate": 1.563749459108611e-05, "loss": 0.0061, "step": 282390 }, { "epoch": 8.729059776225505, "grad_norm": 0.09565751254558563, "learning_rate": 1.5637030969895532e-05, "loss": 0.0062, "step": 282420 }, { "epoch": 8.729987018606664, "grad_norm": 0.10282764583826065, "learning_rate": 1.5636567348704953e-05, "loss": 0.0059, "step": 282450 }, { "epoch": 8.730914260987822, "grad_norm": 0.12016064673662186, "learning_rate": 1.5636103727514375e-05, "loss": 0.0063, "step": 282480 }, { "epoch": 8.731841503368981, "grad_norm": 0.08634697645902634, "learning_rate": 1.5635640106323796e-05, "loss": 0.0067, "step": 282510 }, { "epoch": 8.732768745750139, "grad_norm": 0.07585601508617401, "learning_rate": 1.5635176485133215e-05, "loss": 0.0066, "step": 282540 }, { "epoch": 8.733695988131297, "grad_norm": 0.11635281145572662, "learning_rate": 1.5634712863942636e-05, "loss": 0.006, "step": 282570 }, { "epoch": 8.734623230512456, "grad_norm": 0.09506703168153763, "learning_rate": 1.5634249242752058e-05, "loss": 0.0062, "step": 282600 }, { "epoch": 8.735550472893614, "grad_norm": 0.10004232078790665, "learning_rate": 1.5633785621561476e-05, "loss": 0.0061, "step": 282630 }, { "epoch": 8.736477715274773, "grad_norm": 0.16331276297569275, "learning_rate": 1.5633322000370897e-05, "loss": 0.0058, "step": 282660 }, { "epoch": 8.737404957655931, "grad_norm": 0.12414882332086563, "learning_rate": 1.563285837918032e-05, "loss": 0.0066, "step": 282690 }, { "epoch": 8.738332200037089, "grad_norm": 0.14060786366462708, "learning_rate": 1.563239475798974e-05, "loss": 0.0056, "step": 282720 }, { "epoch": 8.739259442418248, "grad_norm": 0.0850054919719696, "learning_rate": 1.563193113679916e-05, "loss": 0.0056, "step": 282750 }, { "epoch": 8.740186684799406, "grad_norm": 0.12959003448486328, "learning_rate": 1.563146751560858e-05, "loss": 0.0058, "step": 282780 }, { "epoch": 8.741113927180566, "grad_norm": 0.0691242516040802, "learning_rate": 1.5631003894418e-05, "loss": 0.0061, "step": 282810 }, { "epoch": 8.742041169561723, "grad_norm": 0.09126889705657959, "learning_rate": 1.5630540273227423e-05, "loss": 0.006, "step": 282840 }, { "epoch": 8.742968411942883, "grad_norm": 0.1511082649230957, "learning_rate": 1.5630076652036844e-05, "loss": 0.0064, "step": 282870 }, { "epoch": 8.74389565432404, "grad_norm": 0.13258497416973114, "learning_rate": 1.5629613030846266e-05, "loss": 0.0062, "step": 282900 }, { "epoch": 8.744822896705198, "grad_norm": 0.10138455033302307, "learning_rate": 1.5629149409655687e-05, "loss": 0.0069, "step": 282930 }, { "epoch": 8.745750139086358, "grad_norm": 0.10740882903337479, "learning_rate": 1.5628685788465105e-05, "loss": 0.0057, "step": 282960 }, { "epoch": 8.746677381467515, "grad_norm": 0.0836658701300621, "learning_rate": 1.5628222167274527e-05, "loss": 0.0062, "step": 282990 }, { "epoch": 8.747604623848673, "grad_norm": 0.11052761226892471, "learning_rate": 1.562775854608395e-05, "loss": 0.0067, "step": 283020 }, { "epoch": 8.748531866229833, "grad_norm": 0.12470018863677979, "learning_rate": 1.5627294924893367e-05, "loss": 0.0064, "step": 283050 }, { "epoch": 8.74945910861099, "grad_norm": 0.1153864786028862, "learning_rate": 1.5626831303702788e-05, "loss": 0.0063, "step": 283080 }, { "epoch": 8.75038635099215, "grad_norm": 0.11327345669269562, "learning_rate": 1.562636768251221e-05, "loss": 0.0068, "step": 283110 }, { "epoch": 8.751313593373307, "grad_norm": 0.09984742850065231, "learning_rate": 1.562590406132163e-05, "loss": 0.0063, "step": 283140 }, { "epoch": 8.752240835754467, "grad_norm": 0.06382758915424347, "learning_rate": 1.5625440440131053e-05, "loss": 0.006, "step": 283170 }, { "epoch": 8.753168078135625, "grad_norm": 0.14428368210792542, "learning_rate": 1.562497681894047e-05, "loss": 0.0065, "step": 283200 }, { "epoch": 8.754095320516782, "grad_norm": 0.12043283134698868, "learning_rate": 1.5624513197749892e-05, "loss": 0.0066, "step": 283230 }, { "epoch": 8.755022562897942, "grad_norm": 0.10053737461566925, "learning_rate": 1.5624049576559314e-05, "loss": 0.0057, "step": 283260 }, { "epoch": 8.7559498052791, "grad_norm": 0.08805570751428604, "learning_rate": 1.5623585955368735e-05, "loss": 0.0067, "step": 283290 }, { "epoch": 8.756877047660259, "grad_norm": 0.13962647318840027, "learning_rate": 1.5623122334178157e-05, "loss": 0.0061, "step": 283320 }, { "epoch": 8.757804290041417, "grad_norm": 0.100274458527565, "learning_rate": 1.5622658712987578e-05, "loss": 0.0061, "step": 283350 }, { "epoch": 8.758731532422575, "grad_norm": 0.09569943696260452, "learning_rate": 1.5622195091796996e-05, "loss": 0.0059, "step": 283380 }, { "epoch": 8.759658774803734, "grad_norm": 0.15355265140533447, "learning_rate": 1.5621731470606418e-05, "loss": 0.0062, "step": 283410 }, { "epoch": 8.760586017184892, "grad_norm": 0.1297045201063156, "learning_rate": 1.5621267849415836e-05, "loss": 0.0059, "step": 283440 }, { "epoch": 8.761513259566051, "grad_norm": 0.17044298350811005, "learning_rate": 1.562080422822526e-05, "loss": 0.0061, "step": 283470 }, { "epoch": 8.762440501947209, "grad_norm": 0.09162367135286331, "learning_rate": 1.5620340607034682e-05, "loss": 0.0063, "step": 283500 }, { "epoch": 8.763367744328367, "grad_norm": 0.08376022428274155, "learning_rate": 1.56198769858441e-05, "loss": 0.0062, "step": 283530 }, { "epoch": 8.764294986709526, "grad_norm": 0.114286407828331, "learning_rate": 1.5619413364653522e-05, "loss": 0.0066, "step": 283560 }, { "epoch": 8.765222229090684, "grad_norm": 0.1855841726064682, "learning_rate": 1.5618949743462944e-05, "loss": 0.0061, "step": 283590 }, { "epoch": 8.766149471471843, "grad_norm": 0.11055628955364227, "learning_rate": 1.561848612227236e-05, "loss": 0.0068, "step": 283620 }, { "epoch": 8.767076713853001, "grad_norm": 0.1458265781402588, "learning_rate": 1.5618022501081783e-05, "loss": 0.0057, "step": 283650 }, { "epoch": 8.76800395623416, "grad_norm": 0.1237092837691307, "learning_rate": 1.5617558879891205e-05, "loss": 0.0064, "step": 283680 }, { "epoch": 8.768931198615318, "grad_norm": 0.1270417422056198, "learning_rate": 1.5617095258700626e-05, "loss": 0.0061, "step": 283710 }, { "epoch": 8.769858440996476, "grad_norm": 0.17461811006069183, "learning_rate": 1.5616631637510048e-05, "loss": 0.0062, "step": 283740 }, { "epoch": 8.770785683377635, "grad_norm": 0.12058336287736893, "learning_rate": 1.5616168016319466e-05, "loss": 0.0065, "step": 283770 }, { "epoch": 8.771712925758793, "grad_norm": 0.0964001715183258, "learning_rate": 1.5615704395128887e-05, "loss": 0.0058, "step": 283800 }, { "epoch": 8.77264016813995, "grad_norm": 0.10350365191698074, "learning_rate": 1.561524077393831e-05, "loss": 0.0068, "step": 283830 }, { "epoch": 8.77356741052111, "grad_norm": 0.10784243792295456, "learning_rate": 1.561477715274773e-05, "loss": 0.0065, "step": 283860 }, { "epoch": 8.774494652902268, "grad_norm": 0.09677717089653015, "learning_rate": 1.5614313531557152e-05, "loss": 0.0057, "step": 283890 }, { "epoch": 8.775421895283428, "grad_norm": 0.09858093410730362, "learning_rate": 1.5613849910366573e-05, "loss": 0.0059, "step": 283920 }, { "epoch": 8.776349137664585, "grad_norm": 0.16651251912117004, "learning_rate": 1.561338628917599e-05, "loss": 0.0069, "step": 283950 }, { "epoch": 8.777276380045745, "grad_norm": 0.07497704029083252, "learning_rate": 1.5612922667985413e-05, "loss": 0.006, "step": 283980 }, { "epoch": 8.778203622426902, "grad_norm": 0.09583687037229538, "learning_rate": 1.5612459046794834e-05, "loss": 0.006, "step": 284010 }, { "epoch": 8.77913086480806, "grad_norm": 0.10835348069667816, "learning_rate": 1.5611995425604253e-05, "loss": 0.0064, "step": 284040 }, { "epoch": 8.78005810718922, "grad_norm": 0.10853971540927887, "learning_rate": 1.5611531804413674e-05, "loss": 0.0056, "step": 284070 }, { "epoch": 8.780985349570377, "grad_norm": 0.15045444667339325, "learning_rate": 1.5611068183223096e-05, "loss": 0.0065, "step": 284100 }, { "epoch": 8.781912591951537, "grad_norm": 0.06636693328619003, "learning_rate": 1.5610604562032517e-05, "loss": 0.0064, "step": 284130 }, { "epoch": 8.782839834332695, "grad_norm": 0.14883600175380707, "learning_rate": 1.561014094084194e-05, "loss": 0.0063, "step": 284160 }, { "epoch": 8.783767076713852, "grad_norm": 0.11751712113618851, "learning_rate": 1.5609677319651357e-05, "loss": 0.0059, "step": 284190 }, { "epoch": 8.784694319095012, "grad_norm": 0.08360660821199417, "learning_rate": 1.5609213698460778e-05, "loss": 0.0068, "step": 284220 }, { "epoch": 8.78562156147617, "grad_norm": 0.07737286388874054, "learning_rate": 1.56087500772702e-05, "loss": 0.0064, "step": 284250 }, { "epoch": 8.786548803857329, "grad_norm": 0.19022846221923828, "learning_rate": 1.560828645607962e-05, "loss": 0.0062, "step": 284280 }, { "epoch": 8.787476046238487, "grad_norm": 0.10602439939975739, "learning_rate": 1.5607822834889043e-05, "loss": 0.0065, "step": 284310 }, { "epoch": 8.788403288619644, "grad_norm": 0.13565832376480103, "learning_rate": 1.5607359213698464e-05, "loss": 0.0063, "step": 284340 }, { "epoch": 8.789330531000804, "grad_norm": 0.13638675212860107, "learning_rate": 1.5606895592507882e-05, "loss": 0.0062, "step": 284370 }, { "epoch": 8.790257773381962, "grad_norm": 0.16183426976203918, "learning_rate": 1.5606431971317304e-05, "loss": 0.0056, "step": 284400 }, { "epoch": 8.791185015763121, "grad_norm": 0.16381876170635223, "learning_rate": 1.5605968350126722e-05, "loss": 0.0062, "step": 284430 }, { "epoch": 8.792112258144279, "grad_norm": 0.14008203148841858, "learning_rate": 1.5605504728936143e-05, "loss": 0.0065, "step": 284460 }, { "epoch": 8.793039500525438, "grad_norm": 0.0950058251619339, "learning_rate": 1.5605041107745565e-05, "loss": 0.0064, "step": 284490 }, { "epoch": 8.793966742906596, "grad_norm": 0.1269652098417282, "learning_rate": 1.5604577486554986e-05, "loss": 0.0063, "step": 284520 }, { "epoch": 8.794893985287754, "grad_norm": 0.10473976284265518, "learning_rate": 1.5604113865364408e-05, "loss": 0.0062, "step": 284550 }, { "epoch": 8.795821227668913, "grad_norm": 0.0742664784193039, "learning_rate": 1.560365024417383e-05, "loss": 0.0061, "step": 284580 }, { "epoch": 8.79674847005007, "grad_norm": 0.09290721267461777, "learning_rate": 1.5603186622983248e-05, "loss": 0.0055, "step": 284610 }, { "epoch": 8.797675712431229, "grad_norm": 0.1379142850637436, "learning_rate": 1.560272300179267e-05, "loss": 0.006, "step": 284640 }, { "epoch": 8.798602954812388, "grad_norm": 0.1567258983850479, "learning_rate": 1.560225938060209e-05, "loss": 0.006, "step": 284670 }, { "epoch": 8.799530197193546, "grad_norm": 0.1782144159078598, "learning_rate": 1.5601795759411512e-05, "loss": 0.0066, "step": 284700 }, { "epoch": 8.800457439574705, "grad_norm": 0.16133534908294678, "learning_rate": 1.5601332138220934e-05, "loss": 0.0062, "step": 284730 }, { "epoch": 8.801384681955863, "grad_norm": 0.10878104716539383, "learning_rate": 1.5600868517030352e-05, "loss": 0.0063, "step": 284760 }, { "epoch": 8.802311924337022, "grad_norm": 0.11763976514339447, "learning_rate": 1.5600404895839773e-05, "loss": 0.0062, "step": 284790 }, { "epoch": 8.80323916671818, "grad_norm": 0.13660405576229095, "learning_rate": 1.5599941274649195e-05, "loss": 0.0057, "step": 284820 }, { "epoch": 8.804166409099338, "grad_norm": 0.06370704621076584, "learning_rate": 1.5599477653458613e-05, "loss": 0.0065, "step": 284850 }, { "epoch": 8.805093651480497, "grad_norm": 0.12893131375312805, "learning_rate": 1.5599014032268038e-05, "loss": 0.0059, "step": 284880 }, { "epoch": 8.806020893861655, "grad_norm": 0.13643865287303925, "learning_rate": 1.5598565865117142e-05, "loss": 0.0057, "step": 284910 }, { "epoch": 8.806948136242815, "grad_norm": 0.1972021758556366, "learning_rate": 1.5598102243926564e-05, "loss": 0.0062, "step": 284940 }, { "epoch": 8.807875378623972, "grad_norm": 0.11606670171022415, "learning_rate": 1.5597638622735985e-05, "loss": 0.0059, "step": 284970 }, { "epoch": 8.80880262100513, "grad_norm": 0.1294039487838745, "learning_rate": 1.5597175001545407e-05, "loss": 0.0067, "step": 285000 }, { "epoch": 8.80972986338629, "grad_norm": 0.09175010025501251, "learning_rate": 1.5596711380354825e-05, "loss": 0.006, "step": 285030 }, { "epoch": 8.810657105767447, "grad_norm": 0.1339528113603592, "learning_rate": 1.5596247759164246e-05, "loss": 0.0059, "step": 285060 }, { "epoch": 8.811584348148607, "grad_norm": 0.08734731376171112, "learning_rate": 1.5595784137973668e-05, "loss": 0.006, "step": 285090 }, { "epoch": 8.812511590529764, "grad_norm": 0.1468389481306076, "learning_rate": 1.559532051678309e-05, "loss": 0.0067, "step": 285120 }, { "epoch": 8.813438832910922, "grad_norm": 0.11777443438768387, "learning_rate": 1.559485689559251e-05, "loss": 0.0062, "step": 285150 }, { "epoch": 8.814366075292082, "grad_norm": 0.09424161911010742, "learning_rate": 1.559439327440193e-05, "loss": 0.0058, "step": 285180 }, { "epoch": 8.81529331767324, "grad_norm": 0.12529288232326508, "learning_rate": 1.559392965321135e-05, "loss": 0.0067, "step": 285210 }, { "epoch": 8.816220560054399, "grad_norm": 0.056220892816782, "learning_rate": 1.5593466032020772e-05, "loss": 0.0062, "step": 285240 }, { "epoch": 8.817147802435557, "grad_norm": 0.11593470722436905, "learning_rate": 1.559300241083019e-05, "loss": 0.0063, "step": 285270 }, { "epoch": 8.818075044816716, "grad_norm": 0.10783573985099792, "learning_rate": 1.5592538789639612e-05, "loss": 0.006, "step": 285300 }, { "epoch": 8.819002287197874, "grad_norm": 0.10715007036924362, "learning_rate": 1.5592075168449033e-05, "loss": 0.0065, "step": 285330 }, { "epoch": 8.819929529579031, "grad_norm": 0.11120899766683578, "learning_rate": 1.5591611547258455e-05, "loss": 0.0066, "step": 285360 }, { "epoch": 8.820856771960191, "grad_norm": 0.09356949478387833, "learning_rate": 1.5591147926067876e-05, "loss": 0.0062, "step": 285390 }, { "epoch": 8.821784014341349, "grad_norm": 0.14942486584186554, "learning_rate": 1.5590684304877298e-05, "loss": 0.0067, "step": 285420 }, { "epoch": 8.822711256722506, "grad_norm": 0.09792817384004593, "learning_rate": 1.5590220683686716e-05, "loss": 0.0068, "step": 285450 }, { "epoch": 8.823638499103666, "grad_norm": 0.17714521288871765, "learning_rate": 1.5589757062496137e-05, "loss": 0.0066, "step": 285480 }, { "epoch": 8.824565741484824, "grad_norm": 0.15919427573680878, "learning_rate": 1.558929344130556e-05, "loss": 0.0061, "step": 285510 }, { "epoch": 8.825492983865983, "grad_norm": 0.17091967165470123, "learning_rate": 1.558882982011498e-05, "loss": 0.0063, "step": 285540 }, { "epoch": 8.82642022624714, "grad_norm": 0.11602559685707092, "learning_rate": 1.5588366198924402e-05, "loss": 0.0062, "step": 285570 }, { "epoch": 8.8273474686283, "grad_norm": 0.11891255527734756, "learning_rate": 1.558790257773382e-05, "loss": 0.0071, "step": 285600 }, { "epoch": 8.828274711009458, "grad_norm": 0.13771992921829224, "learning_rate": 1.558743895654324e-05, "loss": 0.0061, "step": 285630 }, { "epoch": 8.829201953390616, "grad_norm": 0.08505664020776749, "learning_rate": 1.5586975335352663e-05, "loss": 0.0056, "step": 285660 }, { "epoch": 8.830129195771775, "grad_norm": 0.09372039884328842, "learning_rate": 1.558651171416208e-05, "loss": 0.0061, "step": 285690 }, { "epoch": 8.831056438152933, "grad_norm": 0.1625286042690277, "learning_rate": 1.5586048092971503e-05, "loss": 0.0064, "step": 285720 }, { "epoch": 8.831983680534092, "grad_norm": 0.1136443167924881, "learning_rate": 1.5585584471780924e-05, "loss": 0.0064, "step": 285750 }, { "epoch": 8.83291092291525, "grad_norm": 0.14158840477466583, "learning_rate": 1.5585120850590346e-05, "loss": 0.0069, "step": 285780 }, { "epoch": 8.833838165296408, "grad_norm": 0.07833535969257355, "learning_rate": 1.5584657229399767e-05, "loss": 0.0068, "step": 285810 }, { "epoch": 8.834765407677567, "grad_norm": 0.09756147861480713, "learning_rate": 1.558419360820919e-05, "loss": 0.0059, "step": 285840 }, { "epoch": 8.835692650058725, "grad_norm": 0.12492215633392334, "learning_rate": 1.5583729987018607e-05, "loss": 0.0062, "step": 285870 }, { "epoch": 8.836619892439884, "grad_norm": 0.12801213562488556, "learning_rate": 1.5583266365828028e-05, "loss": 0.0054, "step": 285900 }, { "epoch": 8.837547134821042, "grad_norm": 0.10473956167697906, "learning_rate": 1.558280274463745e-05, "loss": 0.0062, "step": 285930 }, { "epoch": 8.8384743772022, "grad_norm": 0.15198872983455658, "learning_rate": 1.558233912344687e-05, "loss": 0.006, "step": 285960 }, { "epoch": 8.83940161958336, "grad_norm": 0.14590485394001007, "learning_rate": 1.5581875502256293e-05, "loss": 0.0066, "step": 285990 }, { "epoch": 8.840328861964517, "grad_norm": 0.14090175926685333, "learning_rate": 1.558141188106571e-05, "loss": 0.0063, "step": 286020 }, { "epoch": 8.841256104345677, "grad_norm": 0.10121358186006546, "learning_rate": 1.5580948259875132e-05, "loss": 0.0072, "step": 286050 }, { "epoch": 8.842183346726834, "grad_norm": 0.07943378388881683, "learning_rate": 1.5580484638684554e-05, "loss": 0.006, "step": 286080 }, { "epoch": 8.843110589107994, "grad_norm": 0.12018118798732758, "learning_rate": 1.5580021017493972e-05, "loss": 0.0062, "step": 286110 }, { "epoch": 8.844037831489151, "grad_norm": 0.13095682859420776, "learning_rate": 1.5579557396303394e-05, "loss": 0.0062, "step": 286140 }, { "epoch": 8.84496507387031, "grad_norm": 0.11932136863470078, "learning_rate": 1.557909377511282e-05, "loss": 0.0062, "step": 286170 }, { "epoch": 8.845892316251469, "grad_norm": 0.09812863171100616, "learning_rate": 1.5578630153922237e-05, "loss": 0.0057, "step": 286200 }, { "epoch": 8.846819558632626, "grad_norm": 0.1259717345237732, "learning_rate": 1.5578166532731658e-05, "loss": 0.0059, "step": 286230 }, { "epoch": 8.847746801013784, "grad_norm": 0.10154179483652115, "learning_rate": 1.5577702911541076e-05, "loss": 0.0066, "step": 286260 }, { "epoch": 8.848674043394944, "grad_norm": 0.1101849377155304, "learning_rate": 1.5577239290350498e-05, "loss": 0.0064, "step": 286290 }, { "epoch": 8.849601285776101, "grad_norm": 0.10342636704444885, "learning_rate": 1.557677566915992e-05, "loss": 0.006, "step": 286320 }, { "epoch": 8.85052852815726, "grad_norm": 0.11774524301290512, "learning_rate": 1.557631204796934e-05, "loss": 0.0059, "step": 286350 }, { "epoch": 8.851455770538418, "grad_norm": 0.09491246193647385, "learning_rate": 1.5575848426778762e-05, "loss": 0.0063, "step": 286380 }, { "epoch": 8.852383012919578, "grad_norm": 0.08843406289815903, "learning_rate": 1.5575384805588184e-05, "loss": 0.0063, "step": 286410 }, { "epoch": 8.853310255300736, "grad_norm": 0.27638477087020874, "learning_rate": 1.5574921184397602e-05, "loss": 0.0061, "step": 286440 }, { "epoch": 8.854237497681893, "grad_norm": 0.10420737415552139, "learning_rate": 1.5574457563207023e-05, "loss": 0.0065, "step": 286470 }, { "epoch": 8.855164740063053, "grad_norm": 0.09981472790241241, "learning_rate": 1.5573993942016445e-05, "loss": 0.006, "step": 286500 }, { "epoch": 8.85609198244421, "grad_norm": 0.08140762895345688, "learning_rate": 1.5573530320825866e-05, "loss": 0.0064, "step": 286530 }, { "epoch": 8.85701922482537, "grad_norm": 0.17825943231582642, "learning_rate": 1.5573066699635288e-05, "loss": 0.0067, "step": 286560 }, { "epoch": 8.857946467206528, "grad_norm": 0.10150505602359772, "learning_rate": 1.5572603078444706e-05, "loss": 0.006, "step": 286590 }, { "epoch": 8.858873709587685, "grad_norm": 0.13891112804412842, "learning_rate": 1.5572139457254127e-05, "loss": 0.0069, "step": 286620 }, { "epoch": 8.859800951968845, "grad_norm": 0.11243610084056854, "learning_rate": 1.557167583606355e-05, "loss": 0.0061, "step": 286650 }, { "epoch": 8.860728194350003, "grad_norm": 0.21708837151527405, "learning_rate": 1.5571212214872967e-05, "loss": 0.0068, "step": 286680 }, { "epoch": 8.861655436731162, "grad_norm": 0.1374814808368683, "learning_rate": 1.557074859368239e-05, "loss": 0.0065, "step": 286710 }, { "epoch": 8.86258267911232, "grad_norm": 0.08708729594945908, "learning_rate": 1.557028497249181e-05, "loss": 0.0064, "step": 286740 }, { "epoch": 8.863509921493478, "grad_norm": 0.11598283797502518, "learning_rate": 1.556982135130123e-05, "loss": 0.0059, "step": 286770 }, { "epoch": 8.864437163874637, "grad_norm": 0.10086583346128464, "learning_rate": 1.5569357730110653e-05, "loss": 0.0066, "step": 286800 }, { "epoch": 8.865364406255795, "grad_norm": 0.10879120975732803, "learning_rate": 1.5568894108920075e-05, "loss": 0.0068, "step": 286830 }, { "epoch": 8.866291648636954, "grad_norm": 0.17574231326580048, "learning_rate": 1.5568430487729493e-05, "loss": 0.0064, "step": 286860 }, { "epoch": 8.867218891018112, "grad_norm": 0.10923958569765091, "learning_rate": 1.5567966866538914e-05, "loss": 0.0064, "step": 286890 }, { "epoch": 8.868146133399271, "grad_norm": 0.15110331773757935, "learning_rate": 1.5567503245348336e-05, "loss": 0.0061, "step": 286920 }, { "epoch": 8.86907337578043, "grad_norm": 0.07242723554372787, "learning_rate": 1.5567039624157757e-05, "loss": 0.0063, "step": 286950 }, { "epoch": 8.870000618161587, "grad_norm": 0.09345294535160065, "learning_rate": 1.556657600296718e-05, "loss": 0.0068, "step": 286980 }, { "epoch": 8.870927860542746, "grad_norm": 0.09044199436903, "learning_rate": 1.5566112381776597e-05, "loss": 0.0069, "step": 287010 }, { "epoch": 8.871855102923904, "grad_norm": 0.1162504255771637, "learning_rate": 1.556564876058602e-05, "loss": 0.0059, "step": 287040 }, { "epoch": 8.872782345305062, "grad_norm": 0.12252101302146912, "learning_rate": 1.556518513939544e-05, "loss": 0.0063, "step": 287070 }, { "epoch": 8.873709587686221, "grad_norm": 0.13822180032730103, "learning_rate": 1.5564721518204858e-05, "loss": 0.0063, "step": 287100 }, { "epoch": 8.874636830067379, "grad_norm": 0.10473074018955231, "learning_rate": 1.556425789701428e-05, "loss": 0.0066, "step": 287130 }, { "epoch": 8.875564072448539, "grad_norm": 0.15213017165660858, "learning_rate": 1.55637942758237e-05, "loss": 0.0066, "step": 287160 }, { "epoch": 8.876491314829696, "grad_norm": 0.11769367009401321, "learning_rate": 1.5563330654633123e-05, "loss": 0.0058, "step": 287190 }, { "epoch": 8.877418557210856, "grad_norm": 0.09277546405792236, "learning_rate": 1.5562867033442544e-05, "loss": 0.0057, "step": 287220 }, { "epoch": 8.878345799592013, "grad_norm": 0.14494183659553528, "learning_rate": 1.5562403412251962e-05, "loss": 0.0063, "step": 287250 }, { "epoch": 8.879273041973171, "grad_norm": 0.12221945822238922, "learning_rate": 1.5561939791061384e-05, "loss": 0.0066, "step": 287280 }, { "epoch": 8.88020028435433, "grad_norm": 0.12422904372215271, "learning_rate": 1.5561476169870805e-05, "loss": 0.0065, "step": 287310 }, { "epoch": 8.881127526735488, "grad_norm": 0.13019636273384094, "learning_rate": 1.5561012548680227e-05, "loss": 0.0065, "step": 287340 }, { "epoch": 8.882054769116648, "grad_norm": 0.10062792897224426, "learning_rate": 1.5560548927489648e-05, "loss": 0.0062, "step": 287370 }, { "epoch": 8.882982011497806, "grad_norm": 0.13253678381443024, "learning_rate": 1.556008530629907e-05, "loss": 0.0069, "step": 287400 }, { "epoch": 8.883909253878963, "grad_norm": 0.09834545850753784, "learning_rate": 1.5559621685108488e-05, "loss": 0.006, "step": 287430 }, { "epoch": 8.884836496260123, "grad_norm": 0.08973487466573715, "learning_rate": 1.555915806391791e-05, "loss": 0.0058, "step": 287460 }, { "epoch": 8.88576373864128, "grad_norm": 0.08130308240652084, "learning_rate": 1.555869444272733e-05, "loss": 0.0064, "step": 287490 }, { "epoch": 8.88669098102244, "grad_norm": 0.1255633682012558, "learning_rate": 1.555823082153675e-05, "loss": 0.0064, "step": 287520 }, { "epoch": 8.887618223403598, "grad_norm": 0.11965359002351761, "learning_rate": 1.555776720034617e-05, "loss": 0.0057, "step": 287550 }, { "epoch": 8.888545465784755, "grad_norm": 0.15059152245521545, "learning_rate": 1.5557303579155592e-05, "loss": 0.0067, "step": 287580 }, { "epoch": 8.889472708165915, "grad_norm": 0.09266885370016098, "learning_rate": 1.5556839957965013e-05, "loss": 0.0063, "step": 287610 }, { "epoch": 8.890399950547073, "grad_norm": 0.13136181235313416, "learning_rate": 1.5556376336774435e-05, "loss": 0.0062, "step": 287640 }, { "epoch": 8.891327192928232, "grad_norm": 0.0927463099360466, "learning_rate": 1.5555912715583853e-05, "loss": 0.0062, "step": 287670 }, { "epoch": 8.89225443530939, "grad_norm": 0.0673484280705452, "learning_rate": 1.5555449094393275e-05, "loss": 0.007, "step": 287700 }, { "epoch": 8.89318167769055, "grad_norm": 0.0986793264746666, "learning_rate": 1.5554985473202696e-05, "loss": 0.0072, "step": 287730 }, { "epoch": 8.894108920071707, "grad_norm": 0.059174101799726486, "learning_rate": 1.5554521852012118e-05, "loss": 0.0062, "step": 287760 }, { "epoch": 8.895036162452865, "grad_norm": 0.1078844889998436, "learning_rate": 1.555405823082154e-05, "loss": 0.006, "step": 287790 }, { "epoch": 8.895963404834024, "grad_norm": 0.13422657549381256, "learning_rate": 1.555359460963096e-05, "loss": 0.0061, "step": 287820 }, { "epoch": 8.896890647215182, "grad_norm": 0.14721311628818512, "learning_rate": 1.555313098844038e-05, "loss": 0.0066, "step": 287850 }, { "epoch": 8.89781788959634, "grad_norm": 0.13261756300926208, "learning_rate": 1.55526673672498e-05, "loss": 0.0056, "step": 287880 }, { "epoch": 8.898745131977499, "grad_norm": 0.08194131404161453, "learning_rate": 1.5552203746059222e-05, "loss": 0.006, "step": 287910 }, { "epoch": 8.899672374358657, "grad_norm": 0.1412711888551712, "learning_rate": 1.5551740124868643e-05, "loss": 0.0061, "step": 287940 }, { "epoch": 8.900599616739816, "grad_norm": 0.09039580821990967, "learning_rate": 1.5551276503678065e-05, "loss": 0.0063, "step": 287970 }, { "epoch": 8.901526859120974, "grad_norm": 0.1089789867401123, "learning_rate": 1.5550812882487483e-05, "loss": 0.0063, "step": 288000 }, { "epoch": 8.902454101502133, "grad_norm": 0.08430305123329163, "learning_rate": 1.5550349261296904e-05, "loss": 0.0064, "step": 288030 }, { "epoch": 8.903381343883291, "grad_norm": 0.11444628983736038, "learning_rate": 1.5549885640106326e-05, "loss": 0.0069, "step": 288060 }, { "epoch": 8.904308586264449, "grad_norm": 0.10026639699935913, "learning_rate": 1.5549422018915744e-05, "loss": 0.0066, "step": 288090 }, { "epoch": 8.905235828645608, "grad_norm": 0.10220953822135925, "learning_rate": 1.5548958397725166e-05, "loss": 0.0059, "step": 288120 }, { "epoch": 8.906163071026766, "grad_norm": 0.09950344264507294, "learning_rate": 1.5548494776534587e-05, "loss": 0.0059, "step": 288150 }, { "epoch": 8.907090313407926, "grad_norm": 0.09438863396644592, "learning_rate": 1.554803115534401e-05, "loss": 0.0059, "step": 288180 }, { "epoch": 8.908017555789083, "grad_norm": 0.12485511600971222, "learning_rate": 1.554756753415343e-05, "loss": 0.0072, "step": 288210 }, { "epoch": 8.908944798170241, "grad_norm": 0.15080031752586365, "learning_rate": 1.5547103912962848e-05, "loss": 0.0059, "step": 288240 }, { "epoch": 8.9098720405514, "grad_norm": 0.15899226069450378, "learning_rate": 1.554664029177227e-05, "loss": 0.0062, "step": 288270 }, { "epoch": 8.910799282932558, "grad_norm": 0.09529711306095123, "learning_rate": 1.554617667058169e-05, "loss": 0.0058, "step": 288300 }, { "epoch": 8.911726525313718, "grad_norm": 0.1505950391292572, "learning_rate": 1.5545713049391113e-05, "loss": 0.0068, "step": 288330 }, { "epoch": 8.912653767694875, "grad_norm": 0.08544103056192398, "learning_rate": 1.5545249428200534e-05, "loss": 0.0059, "step": 288360 }, { "epoch": 8.913581010076033, "grad_norm": 0.11068369448184967, "learning_rate": 1.5544785807009956e-05, "loss": 0.0062, "step": 288390 }, { "epoch": 8.914508252457193, "grad_norm": 0.12695568799972534, "learning_rate": 1.5544322185819374e-05, "loss": 0.0063, "step": 288420 }, { "epoch": 8.91543549483835, "grad_norm": 0.10976461321115494, "learning_rate": 1.5543858564628795e-05, "loss": 0.0055, "step": 288450 }, { "epoch": 8.91636273721951, "grad_norm": 0.14680634438991547, "learning_rate": 1.5543394943438217e-05, "loss": 0.0061, "step": 288480 }, { "epoch": 8.917289979600667, "grad_norm": 0.09818326681852341, "learning_rate": 1.5542931322247635e-05, "loss": 0.006, "step": 288510 }, { "epoch": 8.918217221981827, "grad_norm": 0.11475516855716705, "learning_rate": 1.5542467701057056e-05, "loss": 0.0066, "step": 288540 }, { "epoch": 8.919144464362985, "grad_norm": 0.09267094731330872, "learning_rate": 1.5542004079866478e-05, "loss": 0.0056, "step": 288570 }, { "epoch": 8.920071706744142, "grad_norm": 0.12915465235710144, "learning_rate": 1.55415404586759e-05, "loss": 0.0066, "step": 288600 }, { "epoch": 8.920998949125302, "grad_norm": 0.16881896555423737, "learning_rate": 1.554107683748532e-05, "loss": 0.0065, "step": 288630 }, { "epoch": 8.92192619150646, "grad_norm": 0.1321198046207428, "learning_rate": 1.554061321629474e-05, "loss": 0.0067, "step": 288660 }, { "epoch": 8.922853433887617, "grad_norm": 0.09793872386217117, "learning_rate": 1.554014959510416e-05, "loss": 0.0064, "step": 288690 }, { "epoch": 8.923780676268777, "grad_norm": 0.08633837103843689, "learning_rate": 1.5539685973913582e-05, "loss": 0.0063, "step": 288720 }, { "epoch": 8.924707918649935, "grad_norm": 0.1273118406534195, "learning_rate": 1.5539222352723004e-05, "loss": 0.0061, "step": 288750 }, { "epoch": 8.925635161031094, "grad_norm": 0.08878280222415924, "learning_rate": 1.5538758731532425e-05, "loss": 0.0065, "step": 288780 }, { "epoch": 8.926562403412252, "grad_norm": 0.08686165511608124, "learning_rate": 1.5538295110341847e-05, "loss": 0.0066, "step": 288810 }, { "epoch": 8.927489645793411, "grad_norm": 0.07598711550235748, "learning_rate": 1.5537831489151265e-05, "loss": 0.0056, "step": 288840 }, { "epoch": 8.928416888174569, "grad_norm": 0.08131109923124313, "learning_rate": 1.5537367867960686e-05, "loss": 0.0067, "step": 288870 }, { "epoch": 8.929344130555727, "grad_norm": 0.18526749312877655, "learning_rate": 1.5536904246770104e-05, "loss": 0.0059, "step": 288900 }, { "epoch": 8.930271372936886, "grad_norm": 0.10109613090753555, "learning_rate": 1.5536440625579526e-05, "loss": 0.0065, "step": 288930 }, { "epoch": 8.931198615318044, "grad_norm": 0.16258811950683594, "learning_rate": 1.553597700438895e-05, "loss": 0.0064, "step": 288960 }, { "epoch": 8.932125857699203, "grad_norm": 0.1835441291332245, "learning_rate": 1.553551338319837e-05, "loss": 0.0062, "step": 288990 }, { "epoch": 8.933053100080361, "grad_norm": 0.09200942516326904, "learning_rate": 1.553504976200779e-05, "loss": 0.0055, "step": 289020 }, { "epoch": 8.933980342461519, "grad_norm": 0.12468863278627396, "learning_rate": 1.5534586140817212e-05, "loss": 0.0064, "step": 289050 }, { "epoch": 8.934907584842678, "grad_norm": 0.11510561406612396, "learning_rate": 1.553412251962663e-05, "loss": 0.0061, "step": 289080 }, { "epoch": 8.935834827223836, "grad_norm": 0.1240646094083786, "learning_rate": 1.553365889843605e-05, "loss": 0.0066, "step": 289110 }, { "epoch": 8.936762069604995, "grad_norm": 0.08184236288070679, "learning_rate": 1.5533195277245473e-05, "loss": 0.0063, "step": 289140 }, { "epoch": 8.937689311986153, "grad_norm": 0.12377715855836868, "learning_rate": 1.5532731656054895e-05, "loss": 0.0068, "step": 289170 }, { "epoch": 8.93861655436731, "grad_norm": 0.18544898927211761, "learning_rate": 1.5532268034864316e-05, "loss": 0.0066, "step": 289200 }, { "epoch": 8.93954379674847, "grad_norm": 0.1480867564678192, "learning_rate": 1.5531804413673734e-05, "loss": 0.0055, "step": 289230 }, { "epoch": 8.940471039129628, "grad_norm": 0.0905461460351944, "learning_rate": 1.5531340792483156e-05, "loss": 0.0057, "step": 289260 }, { "epoch": 8.941398281510788, "grad_norm": 0.0970173329114914, "learning_rate": 1.5530877171292577e-05, "loss": 0.0062, "step": 289290 }, { "epoch": 8.942325523891945, "grad_norm": 0.14874526858329773, "learning_rate": 1.5530413550102e-05, "loss": 0.0057, "step": 289320 }, { "epoch": 8.943252766273105, "grad_norm": 0.12596043944358826, "learning_rate": 1.552994992891142e-05, "loss": 0.0064, "step": 289350 }, { "epoch": 8.944180008654262, "grad_norm": 0.10618286579847336, "learning_rate": 1.552948630772084e-05, "loss": 0.0064, "step": 289380 }, { "epoch": 8.94510725103542, "grad_norm": 0.11541444063186646, "learning_rate": 1.552902268653026e-05, "loss": 0.0062, "step": 289410 }, { "epoch": 8.94603449341658, "grad_norm": 0.10083068907260895, "learning_rate": 1.552855906533968e-05, "loss": 0.006, "step": 289440 }, { "epoch": 8.946961735797737, "grad_norm": 0.10190648585557938, "learning_rate": 1.5528095444149103e-05, "loss": 0.0065, "step": 289470 }, { "epoch": 8.947888978178895, "grad_norm": 0.14179378747940063, "learning_rate": 1.552763182295852e-05, "loss": 0.0063, "step": 289500 }, { "epoch": 8.948816220560055, "grad_norm": 0.1002063974738121, "learning_rate": 1.5527168201767942e-05, "loss": 0.006, "step": 289530 }, { "epoch": 8.949743462941212, "grad_norm": 0.08396207541227341, "learning_rate": 1.5526704580577364e-05, "loss": 0.0062, "step": 289560 }, { "epoch": 8.950670705322372, "grad_norm": 0.07600010186433792, "learning_rate": 1.5526240959386785e-05, "loss": 0.006, "step": 289590 }, { "epoch": 8.95159794770353, "grad_norm": 0.12301068753004074, "learning_rate": 1.5525777338196207e-05, "loss": 0.0067, "step": 289620 }, { "epoch": 8.952525190084689, "grad_norm": 0.0914861410856247, "learning_rate": 1.5525313717005625e-05, "loss": 0.0055, "step": 289650 }, { "epoch": 8.953452432465847, "grad_norm": 0.1394658386707306, "learning_rate": 1.5524850095815047e-05, "loss": 0.0062, "step": 289680 }, { "epoch": 8.954379674847004, "grad_norm": 0.14490343630313873, "learning_rate": 1.5524386474624468e-05, "loss": 0.007, "step": 289710 }, { "epoch": 8.955306917228164, "grad_norm": 0.0987246185541153, "learning_rate": 1.552392285343389e-05, "loss": 0.0058, "step": 289740 }, { "epoch": 8.956234159609322, "grad_norm": 0.1438964605331421, "learning_rate": 1.552345923224331e-05, "loss": 0.0059, "step": 289770 }, { "epoch": 8.957161401990481, "grad_norm": 0.12691771984100342, "learning_rate": 1.5522995611052733e-05, "loss": 0.0067, "step": 289800 }, { "epoch": 8.958088644371639, "grad_norm": 0.12225675582885742, "learning_rate": 1.552253198986215e-05, "loss": 0.0061, "step": 289830 }, { "epoch": 8.959015886752796, "grad_norm": 0.10931549221277237, "learning_rate": 1.5522068368671572e-05, "loss": 0.0067, "step": 289860 }, { "epoch": 8.959943129133956, "grad_norm": 0.135189026594162, "learning_rate": 1.5521604747480994e-05, "loss": 0.0058, "step": 289890 }, { "epoch": 8.960870371515114, "grad_norm": 0.12543973326683044, "learning_rate": 1.5521141126290412e-05, "loss": 0.0063, "step": 289920 }, { "epoch": 8.961797613896273, "grad_norm": 0.09403204172849655, "learning_rate": 1.5520677505099833e-05, "loss": 0.0056, "step": 289950 }, { "epoch": 8.96272485627743, "grad_norm": 0.1245662122964859, "learning_rate": 1.5520213883909255e-05, "loss": 0.0064, "step": 289980 }, { "epoch": 8.963652098658589, "grad_norm": 0.1578400582075119, "learning_rate": 1.5519750262718676e-05, "loss": 0.0061, "step": 290010 }, { "epoch": 8.964579341039748, "grad_norm": 0.24210938811302185, "learning_rate": 1.5519286641528098e-05, "loss": 0.0054, "step": 290040 }, { "epoch": 8.965506583420906, "grad_norm": 0.11254126578569412, "learning_rate": 1.5518823020337516e-05, "loss": 0.0063, "step": 290070 }, { "epoch": 8.966433825802065, "grad_norm": 0.09606242179870605, "learning_rate": 1.5518359399146937e-05, "loss": 0.006, "step": 290100 }, { "epoch": 8.967361068183223, "grad_norm": 0.12463022768497467, "learning_rate": 1.551789577795636e-05, "loss": 0.006, "step": 290130 }, { "epoch": 8.968288310564382, "grad_norm": 0.11668103188276291, "learning_rate": 1.551743215676578e-05, "loss": 0.0062, "step": 290160 }, { "epoch": 8.96921555294554, "grad_norm": 0.11646215617656708, "learning_rate": 1.5516968535575202e-05, "loss": 0.0064, "step": 290190 }, { "epoch": 8.970142795326698, "grad_norm": 0.09314433485269547, "learning_rate": 1.5516504914384624e-05, "loss": 0.0067, "step": 290220 }, { "epoch": 8.971070037707857, "grad_norm": 0.07558880746364594, "learning_rate": 1.551604129319404e-05, "loss": 0.0064, "step": 290250 }, { "epoch": 8.971997280089015, "grad_norm": 0.09557899087667465, "learning_rate": 1.5515577672003463e-05, "loss": 0.0058, "step": 290280 }, { "epoch": 8.972924522470173, "grad_norm": 0.09908417612314224, "learning_rate": 1.551511405081288e-05, "loss": 0.0061, "step": 290310 }, { "epoch": 8.973851764851332, "grad_norm": 0.10577849298715591, "learning_rate": 1.5514650429622303e-05, "loss": 0.0055, "step": 290340 }, { "epoch": 8.97477900723249, "grad_norm": 0.09080086648464203, "learning_rate": 1.5514186808431728e-05, "loss": 0.0064, "step": 290370 }, { "epoch": 8.97570624961365, "grad_norm": 0.12841826677322388, "learning_rate": 1.5513723187241146e-05, "loss": 0.006, "step": 290400 }, { "epoch": 8.976633491994807, "grad_norm": 0.1344262659549713, "learning_rate": 1.5513259566050567e-05, "loss": 0.0055, "step": 290430 }, { "epoch": 8.977560734375967, "grad_norm": 0.07996799796819687, "learning_rate": 1.551279594485999e-05, "loss": 0.0064, "step": 290460 }, { "epoch": 8.978487976757124, "grad_norm": 0.12525929510593414, "learning_rate": 1.5512332323669407e-05, "loss": 0.0071, "step": 290490 }, { "epoch": 8.979415219138282, "grad_norm": 0.08057156205177307, "learning_rate": 1.551186870247883e-05, "loss": 0.006, "step": 290520 }, { "epoch": 8.980342461519442, "grad_norm": 0.0795876756310463, "learning_rate": 1.551140508128825e-05, "loss": 0.0064, "step": 290550 }, { "epoch": 8.9812697039006, "grad_norm": 0.09274984151124954, "learning_rate": 1.551094146009767e-05, "loss": 0.006, "step": 290580 }, { "epoch": 8.982196946281759, "grad_norm": 0.11315865814685822, "learning_rate": 1.5510477838907093e-05, "loss": 0.0067, "step": 290610 }, { "epoch": 8.983124188662917, "grad_norm": 0.1145918220281601, "learning_rate": 1.551001421771651e-05, "loss": 0.0066, "step": 290640 }, { "epoch": 8.984051431044074, "grad_norm": 0.14262783527374268, "learning_rate": 1.5509550596525933e-05, "loss": 0.0062, "step": 290670 }, { "epoch": 8.984978673425234, "grad_norm": 0.134611576795578, "learning_rate": 1.5509086975335354e-05, "loss": 0.0063, "step": 290700 }, { "epoch": 8.985905915806391, "grad_norm": 0.09128312021493912, "learning_rate": 1.5508623354144776e-05, "loss": 0.0066, "step": 290730 }, { "epoch": 8.986833158187551, "grad_norm": 0.11641298234462738, "learning_rate": 1.5508159732954197e-05, "loss": 0.0059, "step": 290760 }, { "epoch": 8.987760400568709, "grad_norm": 0.12805208563804626, "learning_rate": 1.550769611176362e-05, "loss": 0.0062, "step": 290790 }, { "epoch": 8.988687642949866, "grad_norm": 0.07305314391851425, "learning_rate": 1.5507232490573037e-05, "loss": 0.0059, "step": 290820 }, { "epoch": 8.989614885331026, "grad_norm": 0.13396596908569336, "learning_rate": 1.5506768869382458e-05, "loss": 0.0067, "step": 290850 }, { "epoch": 8.990542127712184, "grad_norm": 0.10077361762523651, "learning_rate": 1.550630524819188e-05, "loss": 0.0068, "step": 290880 }, { "epoch": 8.991469370093343, "grad_norm": 0.13070547580718994, "learning_rate": 1.5505841627001298e-05, "loss": 0.0063, "step": 290910 }, { "epoch": 8.9923966124745, "grad_norm": 0.13540028035640717, "learning_rate": 1.550537800581072e-05, "loss": 0.0059, "step": 290940 }, { "epoch": 8.993323854855658, "grad_norm": 0.08551055192947388, "learning_rate": 1.550491438462014e-05, "loss": 0.0063, "step": 290970 }, { "epoch": 8.994251097236818, "grad_norm": 0.10141775757074356, "learning_rate": 1.5504450763429562e-05, "loss": 0.0059, "step": 291000 }, { "epoch": 8.995178339617976, "grad_norm": 0.17678681015968323, "learning_rate": 1.5503987142238984e-05, "loss": 0.0067, "step": 291030 }, { "epoch": 8.996105581999135, "grad_norm": 0.1600467413663864, "learning_rate": 1.5503523521048402e-05, "loss": 0.0056, "step": 291060 }, { "epoch": 8.997032824380293, "grad_norm": 0.14854948222637177, "learning_rate": 1.5503059899857823e-05, "loss": 0.006, "step": 291090 }, { "epoch": 8.99796006676145, "grad_norm": 0.11975522339344025, "learning_rate": 1.5502596278667245e-05, "loss": 0.0064, "step": 291120 }, { "epoch": 8.99888730914261, "grad_norm": 0.10026994347572327, "learning_rate": 1.5502132657476666e-05, "loss": 0.0064, "step": 291150 }, { "epoch": 8.999814551523768, "grad_norm": 0.09387923032045364, "learning_rate": 1.5501669036286088e-05, "loss": 0.0058, "step": 291180 }, { "epoch": 9.000741793904927, "grad_norm": 0.08841147273778915, "learning_rate": 1.550120541509551e-05, "loss": 0.0059, "step": 291210 }, { "epoch": 9.001669036286085, "grad_norm": 0.08082147687673569, "learning_rate": 1.5500741793904928e-05, "loss": 0.0065, "step": 291240 }, { "epoch": 9.002596278667244, "grad_norm": 0.16410472989082336, "learning_rate": 1.550027817271435e-05, "loss": 0.0064, "step": 291270 }, { "epoch": 9.003523521048402, "grad_norm": 0.12688586115837097, "learning_rate": 1.5499814551523767e-05, "loss": 0.0058, "step": 291300 }, { "epoch": 9.00445076342956, "grad_norm": 0.0892360582947731, "learning_rate": 1.549935093033319e-05, "loss": 0.0065, "step": 291330 }, { "epoch": 9.00537800581072, "grad_norm": 0.14608076214790344, "learning_rate": 1.549888730914261e-05, "loss": 0.0061, "step": 291360 }, { "epoch": 9.006305248191877, "grad_norm": 0.08718746900558472, "learning_rate": 1.5498423687952032e-05, "loss": 0.0062, "step": 291390 }, { "epoch": 9.007232490573037, "grad_norm": 0.17109103500843048, "learning_rate": 1.5497960066761453e-05, "loss": 0.0057, "step": 291420 }, { "epoch": 9.008159732954194, "grad_norm": 0.14738960564136505, "learning_rate": 1.5497496445570875e-05, "loss": 0.0059, "step": 291450 }, { "epoch": 9.009086975335352, "grad_norm": 0.21401001513004303, "learning_rate": 1.5497032824380293e-05, "loss": 0.0057, "step": 291480 }, { "epoch": 9.010014217716511, "grad_norm": 0.11969040334224701, "learning_rate": 1.5496569203189714e-05, "loss": 0.0058, "step": 291510 }, { "epoch": 9.01094146009767, "grad_norm": 0.06572320312261581, "learning_rate": 1.5496121036038822e-05, "loss": 0.0061, "step": 291540 }, { "epoch": 9.011868702478829, "grad_norm": 0.09435907751321793, "learning_rate": 1.549565741484824e-05, "loss": 0.0061, "step": 291570 }, { "epoch": 9.012795944859986, "grad_norm": 0.11644136160612106, "learning_rate": 1.5495193793657662e-05, "loss": 0.0062, "step": 291600 }, { "epoch": 9.013723187241144, "grad_norm": 0.12488660961389542, "learning_rate": 1.5494730172467083e-05, "loss": 0.0059, "step": 291630 }, { "epoch": 9.014650429622304, "grad_norm": 0.07899569720029831, "learning_rate": 1.5494266551276505e-05, "loss": 0.0062, "step": 291660 }, { "epoch": 9.015577672003461, "grad_norm": 0.15098467469215393, "learning_rate": 1.5493802930085926e-05, "loss": 0.0048, "step": 291690 }, { "epoch": 9.01650491438462, "grad_norm": 0.13860632479190826, "learning_rate": 1.5493339308895345e-05, "loss": 0.0063, "step": 291720 }, { "epoch": 9.017432156765778, "grad_norm": 0.14108926057815552, "learning_rate": 1.5492875687704766e-05, "loss": 0.0061, "step": 291750 }, { "epoch": 9.018359399146936, "grad_norm": 0.1422782689332962, "learning_rate": 1.5492412066514188e-05, "loss": 0.0061, "step": 291780 }, { "epoch": 9.019286641528096, "grad_norm": 0.1124684065580368, "learning_rate": 1.549194844532361e-05, "loss": 0.0065, "step": 291810 }, { "epoch": 9.020213883909253, "grad_norm": 0.1347583383321762, "learning_rate": 1.549148482413303e-05, "loss": 0.007, "step": 291840 }, { "epoch": 9.021141126290413, "grad_norm": 0.1483919620513916, "learning_rate": 1.5491021202942452e-05, "loss": 0.0058, "step": 291870 }, { "epoch": 9.02206836867157, "grad_norm": 0.09444795548915863, "learning_rate": 1.549055758175187e-05, "loss": 0.0056, "step": 291900 }, { "epoch": 9.02299561105273, "grad_norm": 0.11000185459852219, "learning_rate": 1.5490093960561292e-05, "loss": 0.0058, "step": 291930 }, { "epoch": 9.023922853433888, "grad_norm": 0.11377277225255966, "learning_rate": 1.5489630339370713e-05, "loss": 0.0056, "step": 291960 }, { "epoch": 9.024850095815045, "grad_norm": 0.1521521955728531, "learning_rate": 1.5489166718180135e-05, "loss": 0.0058, "step": 291990 }, { "epoch": 9.025777338196205, "grad_norm": 0.10289528220891953, "learning_rate": 1.5488703096989556e-05, "loss": 0.0057, "step": 292020 }, { "epoch": 9.026704580577363, "grad_norm": 0.10047652572393417, "learning_rate": 1.5488239475798974e-05, "loss": 0.0065, "step": 292050 }, { "epoch": 9.027631822958522, "grad_norm": 0.10317211598157883, "learning_rate": 1.5487775854608396e-05, "loss": 0.0059, "step": 292080 }, { "epoch": 9.02855906533968, "grad_norm": 0.11407309770584106, "learning_rate": 1.5487312233417817e-05, "loss": 0.0062, "step": 292110 }, { "epoch": 9.029486307720838, "grad_norm": 0.12016308307647705, "learning_rate": 1.5486848612227235e-05, "loss": 0.0063, "step": 292140 }, { "epoch": 9.030413550101997, "grad_norm": 0.09458771347999573, "learning_rate": 1.5486384991036657e-05, "loss": 0.0066, "step": 292170 }, { "epoch": 9.031340792483155, "grad_norm": 0.05736961588263512, "learning_rate": 1.548592136984608e-05, "loss": 0.007, "step": 292200 }, { "epoch": 9.032268034864314, "grad_norm": 0.18266373872756958, "learning_rate": 1.54854577486555e-05, "loss": 0.0057, "step": 292230 }, { "epoch": 9.033195277245472, "grad_norm": 0.10522014647722244, "learning_rate": 1.548499412746492e-05, "loss": 0.0051, "step": 292260 }, { "epoch": 9.03412251962663, "grad_norm": 0.09089767932891846, "learning_rate": 1.5484530506274343e-05, "loss": 0.0057, "step": 292290 }, { "epoch": 9.03504976200779, "grad_norm": 0.15172290802001953, "learning_rate": 1.548406688508376e-05, "loss": 0.0062, "step": 292320 }, { "epoch": 9.035977004388947, "grad_norm": 0.08971775323152542, "learning_rate": 1.5483603263893183e-05, "loss": 0.0067, "step": 292350 }, { "epoch": 9.036904246770106, "grad_norm": 0.0988665372133255, "learning_rate": 1.5483139642702604e-05, "loss": 0.006, "step": 292380 }, { "epoch": 9.037831489151264, "grad_norm": 0.09932728856801987, "learning_rate": 1.5482676021512026e-05, "loss": 0.0058, "step": 292410 }, { "epoch": 9.038758731532422, "grad_norm": 0.08022020757198334, "learning_rate": 1.5482212400321447e-05, "loss": 0.0057, "step": 292440 }, { "epoch": 9.039685973913581, "grad_norm": 0.08280805498361588, "learning_rate": 1.5481748779130865e-05, "loss": 0.0062, "step": 292470 }, { "epoch": 9.040613216294739, "grad_norm": 0.11733341962099075, "learning_rate": 1.5481285157940287e-05, "loss": 0.0064, "step": 292500 }, { "epoch": 9.041540458675899, "grad_norm": 0.18084348738193512, "learning_rate": 1.5480821536749708e-05, "loss": 0.006, "step": 292530 }, { "epoch": 9.042467701057056, "grad_norm": 0.07240582257509232, "learning_rate": 1.5480357915559126e-05, "loss": 0.0064, "step": 292560 }, { "epoch": 9.043394943438214, "grad_norm": 0.09203918278217316, "learning_rate": 1.5479894294368548e-05, "loss": 0.0058, "step": 292590 }, { "epoch": 9.044322185819373, "grad_norm": 0.1452483981847763, "learning_rate": 1.547943067317797e-05, "loss": 0.0061, "step": 292620 }, { "epoch": 9.045249428200531, "grad_norm": 0.1463264673948288, "learning_rate": 1.547896705198739e-05, "loss": 0.0057, "step": 292650 }, { "epoch": 9.04617667058169, "grad_norm": 0.11052633076906204, "learning_rate": 1.5478503430796812e-05, "loss": 0.0059, "step": 292680 }, { "epoch": 9.047103912962848, "grad_norm": 0.10921036452054977, "learning_rate": 1.5478039809606234e-05, "loss": 0.0054, "step": 292710 }, { "epoch": 9.048031155344006, "grad_norm": 0.07002733647823334, "learning_rate": 1.5477576188415652e-05, "loss": 0.0056, "step": 292740 }, { "epoch": 9.048958397725166, "grad_norm": 0.13565143942832947, "learning_rate": 1.5477112567225074e-05, "loss": 0.006, "step": 292770 }, { "epoch": 9.049885640106323, "grad_norm": 0.09362872689962387, "learning_rate": 1.5476648946034495e-05, "loss": 0.006, "step": 292800 }, { "epoch": 9.050812882487483, "grad_norm": 0.11525501310825348, "learning_rate": 1.5476185324843917e-05, "loss": 0.0057, "step": 292830 }, { "epoch": 9.05174012486864, "grad_norm": 0.1182793527841568, "learning_rate": 1.5475721703653338e-05, "loss": 0.0062, "step": 292860 }, { "epoch": 9.0526673672498, "grad_norm": 0.12344402074813843, "learning_rate": 1.5475258082462756e-05, "loss": 0.007, "step": 292890 }, { "epoch": 9.053594609630958, "grad_norm": 0.09750242531299591, "learning_rate": 1.5474794461272178e-05, "loss": 0.0059, "step": 292920 }, { "epoch": 9.054521852012115, "grad_norm": 0.124625064432621, "learning_rate": 1.54743308400816e-05, "loss": 0.0059, "step": 292950 }, { "epoch": 9.055449094393275, "grad_norm": 0.12346477806568146, "learning_rate": 1.5473867218891017e-05, "loss": 0.0068, "step": 292980 }, { "epoch": 9.056376336774433, "grad_norm": 0.09391327202320099, "learning_rate": 1.547340359770044e-05, "loss": 0.0063, "step": 293010 }, { "epoch": 9.057303579155592, "grad_norm": 0.10502514988183975, "learning_rate": 1.547293997650986e-05, "loss": 0.0058, "step": 293040 }, { "epoch": 9.05823082153675, "grad_norm": 0.19697603583335876, "learning_rate": 1.5472476355319282e-05, "loss": 0.0063, "step": 293070 }, { "epoch": 9.059158063917907, "grad_norm": 0.1242428570985794, "learning_rate": 1.5472012734128703e-05, "loss": 0.0056, "step": 293100 }, { "epoch": 9.060085306299067, "grad_norm": 0.09851614385843277, "learning_rate": 1.547154911293812e-05, "loss": 0.0064, "step": 293130 }, { "epoch": 9.061012548680225, "grad_norm": 0.08813434094190598, "learning_rate": 1.5471085491747543e-05, "loss": 0.0069, "step": 293160 }, { "epoch": 9.061939791061384, "grad_norm": 0.09427155554294586, "learning_rate": 1.5470621870556964e-05, "loss": 0.0061, "step": 293190 }, { "epoch": 9.062867033442542, "grad_norm": 0.12599919736385345, "learning_rate": 1.5470158249366386e-05, "loss": 0.0062, "step": 293220 }, { "epoch": 9.0637942758237, "grad_norm": 0.1154671385884285, "learning_rate": 1.5469694628175807e-05, "loss": 0.0064, "step": 293250 }, { "epoch": 9.064721518204859, "grad_norm": 0.08876772224903107, "learning_rate": 1.546923100698523e-05, "loss": 0.0063, "step": 293280 }, { "epoch": 9.065648760586017, "grad_norm": 0.1520872712135315, "learning_rate": 1.5468767385794647e-05, "loss": 0.0065, "step": 293310 }, { "epoch": 9.066576002967176, "grad_norm": 0.13936157524585724, "learning_rate": 1.546830376460407e-05, "loss": 0.0064, "step": 293340 }, { "epoch": 9.067503245348334, "grad_norm": 0.09192699193954468, "learning_rate": 1.546784014341349e-05, "loss": 0.0064, "step": 293370 }, { "epoch": 9.068430487729492, "grad_norm": 0.12011539191007614, "learning_rate": 1.546737652222291e-05, "loss": 0.0057, "step": 293400 }, { "epoch": 9.069357730110651, "grad_norm": 0.11862027645111084, "learning_rate": 1.5466912901032333e-05, "loss": 0.0065, "step": 293430 }, { "epoch": 9.070284972491809, "grad_norm": 0.108751080930233, "learning_rate": 1.546644927984175e-05, "loss": 0.0061, "step": 293460 }, { "epoch": 9.071212214872968, "grad_norm": 0.10160987079143524, "learning_rate": 1.5465985658651173e-05, "loss": 0.0062, "step": 293490 }, { "epoch": 9.072139457254126, "grad_norm": 0.18474335968494415, "learning_rate": 1.5465522037460594e-05, "loss": 0.0057, "step": 293520 }, { "epoch": 9.073066699635284, "grad_norm": 0.09902045130729675, "learning_rate": 1.5465058416270012e-05, "loss": 0.0059, "step": 293550 }, { "epoch": 9.073993942016443, "grad_norm": 0.09250567853450775, "learning_rate": 1.5464594795079434e-05, "loss": 0.0059, "step": 293580 }, { "epoch": 9.074921184397601, "grad_norm": 0.07458808273077011, "learning_rate": 1.5464131173888855e-05, "loss": 0.0062, "step": 293610 }, { "epoch": 9.07584842677876, "grad_norm": 0.12265541404485703, "learning_rate": 1.5463667552698277e-05, "loss": 0.0061, "step": 293640 }, { "epoch": 9.076775669159918, "grad_norm": 0.13256841897964478, "learning_rate": 1.54632039315077e-05, "loss": 0.0058, "step": 293670 }, { "epoch": 9.077702911541078, "grad_norm": 0.1070859432220459, "learning_rate": 1.546274031031712e-05, "loss": 0.0058, "step": 293700 }, { "epoch": 9.078630153922235, "grad_norm": 0.10940900444984436, "learning_rate": 1.5462276689126538e-05, "loss": 0.0059, "step": 293730 }, { "epoch": 9.079557396303393, "grad_norm": 0.09910455346107483, "learning_rate": 1.546181306793596e-05, "loss": 0.0062, "step": 293760 }, { "epoch": 9.080484638684553, "grad_norm": 0.14121882617473602, "learning_rate": 1.546134944674538e-05, "loss": 0.0066, "step": 293790 }, { "epoch": 9.08141188106571, "grad_norm": 0.15078343451023102, "learning_rate": 1.5460885825554803e-05, "loss": 0.0056, "step": 293820 }, { "epoch": 9.08233912344687, "grad_norm": 0.1018114909529686, "learning_rate": 1.5460422204364224e-05, "loss": 0.0058, "step": 293850 }, { "epoch": 9.083266365828027, "grad_norm": 0.13020384311676025, "learning_rate": 1.5459958583173642e-05, "loss": 0.0058, "step": 293880 }, { "epoch": 9.084193608209185, "grad_norm": 0.14101368188858032, "learning_rate": 1.5459494961983064e-05, "loss": 0.0061, "step": 293910 }, { "epoch": 9.085120850590345, "grad_norm": 0.13230179250240326, "learning_rate": 1.5459031340792485e-05, "loss": 0.0061, "step": 293940 }, { "epoch": 9.086048092971502, "grad_norm": 0.211495503783226, "learning_rate": 1.5458567719601903e-05, "loss": 0.0064, "step": 293970 }, { "epoch": 9.086975335352662, "grad_norm": 0.10486014187335968, "learning_rate": 1.5458104098411325e-05, "loss": 0.0062, "step": 294000 }, { "epoch": 9.08790257773382, "grad_norm": 0.11264451593160629, "learning_rate": 1.5457640477220746e-05, "loss": 0.006, "step": 294030 }, { "epoch": 9.088829820114977, "grad_norm": 0.11547110974788666, "learning_rate": 1.5457176856030168e-05, "loss": 0.006, "step": 294060 }, { "epoch": 9.089757062496137, "grad_norm": 0.11451176553964615, "learning_rate": 1.545671323483959e-05, "loss": 0.0057, "step": 294090 }, { "epoch": 9.090684304877295, "grad_norm": 0.1228947713971138, "learning_rate": 1.5456265067688697e-05, "loss": 0.0064, "step": 294120 }, { "epoch": 9.091611547258454, "grad_norm": 0.11411657184362411, "learning_rate": 1.5455801446498115e-05, "loss": 0.0067, "step": 294150 }, { "epoch": 9.092538789639612, "grad_norm": 0.0862262099981308, "learning_rate": 1.5455337825307537e-05, "loss": 0.0057, "step": 294180 }, { "epoch": 9.09346603202077, "grad_norm": 0.12476775050163269, "learning_rate": 1.5454874204116955e-05, "loss": 0.0066, "step": 294210 }, { "epoch": 9.094393274401929, "grad_norm": 0.12045880407094955, "learning_rate": 1.5454410582926376e-05, "loss": 0.0064, "step": 294240 }, { "epoch": 9.095320516783087, "grad_norm": 0.1404879093170166, "learning_rate": 1.5453946961735798e-05, "loss": 0.0058, "step": 294270 }, { "epoch": 9.096247759164246, "grad_norm": 0.11132361739873886, "learning_rate": 1.545348334054522e-05, "loss": 0.0059, "step": 294300 }, { "epoch": 9.097175001545404, "grad_norm": 0.11264444142580032, "learning_rate": 1.545301971935464e-05, "loss": 0.0063, "step": 294330 }, { "epoch": 9.098102243926562, "grad_norm": 0.14515800774097443, "learning_rate": 1.5452556098164063e-05, "loss": 0.0062, "step": 294360 }, { "epoch": 9.099029486307721, "grad_norm": 0.17714190483093262, "learning_rate": 1.545209247697348e-05, "loss": 0.0059, "step": 294390 }, { "epoch": 9.099956728688879, "grad_norm": 0.1527474969625473, "learning_rate": 1.5451628855782902e-05, "loss": 0.0064, "step": 294420 }, { "epoch": 9.100883971070038, "grad_norm": 0.12676236033439636, "learning_rate": 1.5451165234592324e-05, "loss": 0.006, "step": 294450 }, { "epoch": 9.101811213451196, "grad_norm": 0.07360612601041794, "learning_rate": 1.5450701613401745e-05, "loss": 0.0061, "step": 294480 }, { "epoch": 9.102738455832355, "grad_norm": 0.07479894161224365, "learning_rate": 1.5450237992211167e-05, "loss": 0.0056, "step": 294510 }, { "epoch": 9.103665698213513, "grad_norm": 0.08019676059484482, "learning_rate": 1.5449774371020585e-05, "loss": 0.0064, "step": 294540 }, { "epoch": 9.10459294059467, "grad_norm": 0.12036319077014923, "learning_rate": 1.5449310749830006e-05, "loss": 0.0063, "step": 294570 }, { "epoch": 9.10552018297583, "grad_norm": 0.12142826616764069, "learning_rate": 1.5448847128639428e-05, "loss": 0.0061, "step": 294600 }, { "epoch": 9.106447425356988, "grad_norm": 0.10625799000263214, "learning_rate": 1.5448383507448846e-05, "loss": 0.0058, "step": 294630 }, { "epoch": 9.107374667738148, "grad_norm": 0.20054537057876587, "learning_rate": 1.544791988625827e-05, "loss": 0.006, "step": 294660 }, { "epoch": 9.108301910119305, "grad_norm": 0.1063414141535759, "learning_rate": 1.5447456265067692e-05, "loss": 0.0058, "step": 294690 }, { "epoch": 9.109229152500463, "grad_norm": 0.25583797693252563, "learning_rate": 1.5447008097916797e-05, "loss": 0.0065, "step": 294720 }, { "epoch": 9.110156394881622, "grad_norm": 0.0502471998333931, "learning_rate": 1.544654447672622e-05, "loss": 0.0057, "step": 294750 }, { "epoch": 9.11108363726278, "grad_norm": 0.15554919838905334, "learning_rate": 1.544608085553564e-05, "loss": 0.0057, "step": 294780 }, { "epoch": 9.11201087964394, "grad_norm": 0.08896591514348984, "learning_rate": 1.5445617234345058e-05, "loss": 0.0058, "step": 294810 }, { "epoch": 9.112938122025097, "grad_norm": 0.11581100523471832, "learning_rate": 1.544515361315448e-05, "loss": 0.0059, "step": 294840 }, { "epoch": 9.113865364406255, "grad_norm": 0.16966907680034637, "learning_rate": 1.54446899919639e-05, "loss": 0.0061, "step": 294870 }, { "epoch": 9.114792606787415, "grad_norm": 0.10643205791711807, "learning_rate": 1.5444226370773322e-05, "loss": 0.0061, "step": 294900 }, { "epoch": 9.115719849168572, "grad_norm": 0.12129902839660645, "learning_rate": 1.5443762749582744e-05, "loss": 0.0059, "step": 294930 }, { "epoch": 9.116647091549732, "grad_norm": 0.17735014855861664, "learning_rate": 1.5443299128392165e-05, "loss": 0.0067, "step": 294960 }, { "epoch": 9.11757433393089, "grad_norm": 0.1485195904970169, "learning_rate": 1.5442835507201584e-05, "loss": 0.0065, "step": 294990 }, { "epoch": 9.118501576312047, "grad_norm": 0.16956010460853577, "learning_rate": 1.5442371886011005e-05, "loss": 0.0061, "step": 295020 }, { "epoch": 9.119428818693207, "grad_norm": 0.06135108694434166, "learning_rate": 1.5441908264820423e-05, "loss": 0.0059, "step": 295050 }, { "epoch": 9.120356061074364, "grad_norm": 0.13292035460472107, "learning_rate": 1.5441444643629845e-05, "loss": 0.0062, "step": 295080 }, { "epoch": 9.121283303455524, "grad_norm": 0.11942166835069656, "learning_rate": 1.5440981022439266e-05, "loss": 0.0061, "step": 295110 }, { "epoch": 9.122210545836682, "grad_norm": 0.11654281616210938, "learning_rate": 1.5440517401248688e-05, "loss": 0.006, "step": 295140 }, { "epoch": 9.12313778821784, "grad_norm": 0.11631106585264206, "learning_rate": 1.544005378005811e-05, "loss": 0.0054, "step": 295170 }, { "epoch": 9.124065030598999, "grad_norm": 0.1511833816766739, "learning_rate": 1.543959015886753e-05, "loss": 0.0067, "step": 295200 }, { "epoch": 9.124992272980156, "grad_norm": 0.11435054242610931, "learning_rate": 1.543912653767695e-05, "loss": 0.0055, "step": 295230 }, { "epoch": 9.125919515361316, "grad_norm": 0.15291573107242584, "learning_rate": 1.543866291648637e-05, "loss": 0.0059, "step": 295260 }, { "epoch": 9.126846757742474, "grad_norm": 0.15681812167167664, "learning_rate": 1.5438199295295792e-05, "loss": 0.0058, "step": 295290 }, { "epoch": 9.127774000123633, "grad_norm": 0.12364568561315536, "learning_rate": 1.5437735674105213e-05, "loss": 0.0055, "step": 295320 }, { "epoch": 9.12870124250479, "grad_norm": 0.09148241579532623, "learning_rate": 1.5437272052914635e-05, "loss": 0.006, "step": 295350 }, { "epoch": 9.129628484885949, "grad_norm": 0.13491947948932648, "learning_rate": 1.5436808431724053e-05, "loss": 0.0054, "step": 295380 }, { "epoch": 9.130555727267108, "grad_norm": 0.11892551928758621, "learning_rate": 1.5436344810533475e-05, "loss": 0.0065, "step": 295410 }, { "epoch": 9.131482969648266, "grad_norm": 0.08887796103954315, "learning_rate": 1.5435881189342896e-05, "loss": 0.0063, "step": 295440 }, { "epoch": 9.132410212029425, "grad_norm": 0.17358282208442688, "learning_rate": 1.5435417568152314e-05, "loss": 0.0067, "step": 295470 }, { "epoch": 9.133337454410583, "grad_norm": 0.11650227755308151, "learning_rate": 1.5434953946961736e-05, "loss": 0.0065, "step": 295500 }, { "epoch": 9.13426469679174, "grad_norm": 0.11494279652833939, "learning_rate": 1.5434490325771157e-05, "loss": 0.0056, "step": 295530 }, { "epoch": 9.1351919391729, "grad_norm": 0.09584026038646698, "learning_rate": 1.543402670458058e-05, "loss": 0.0055, "step": 295560 }, { "epoch": 9.136119181554058, "grad_norm": 0.11840979754924774, "learning_rate": 1.543356308339e-05, "loss": 0.0059, "step": 295590 }, { "epoch": 9.137046423935217, "grad_norm": 0.07347510755062103, "learning_rate": 1.543309946219942e-05, "loss": 0.006, "step": 295620 }, { "epoch": 9.137973666316375, "grad_norm": 0.11045654863119125, "learning_rate": 1.543263584100884e-05, "loss": 0.0056, "step": 295650 }, { "epoch": 9.138900908697533, "grad_norm": 0.1909942626953125, "learning_rate": 1.543217221981826e-05, "loss": 0.0061, "step": 295680 }, { "epoch": 9.139828151078692, "grad_norm": 0.1357439160346985, "learning_rate": 1.5431708598627683e-05, "loss": 0.0065, "step": 295710 }, { "epoch": 9.14075539345985, "grad_norm": 0.1012362539768219, "learning_rate": 1.5431244977437104e-05, "loss": 0.0057, "step": 295740 }, { "epoch": 9.14168263584101, "grad_norm": 0.11546177417039871, "learning_rate": 1.5430781356246526e-05, "loss": 0.0062, "step": 295770 }, { "epoch": 9.142609878222167, "grad_norm": 0.12361226230859756, "learning_rate": 1.5430317735055944e-05, "loss": 0.0067, "step": 295800 }, { "epoch": 9.143537120603325, "grad_norm": 0.1295987218618393, "learning_rate": 1.5429854113865365e-05, "loss": 0.0061, "step": 295830 }, { "epoch": 9.144464362984484, "grad_norm": 0.13761240243911743, "learning_rate": 1.5429405946714473e-05, "loss": 0.0057, "step": 295860 }, { "epoch": 9.145391605365642, "grad_norm": 0.13053767383098602, "learning_rate": 1.542894232552389e-05, "loss": 0.0057, "step": 295890 }, { "epoch": 9.146318847746802, "grad_norm": 0.14834339916706085, "learning_rate": 1.5428478704333313e-05, "loss": 0.0056, "step": 295920 }, { "epoch": 9.14724609012796, "grad_norm": 0.09724929928779602, "learning_rate": 1.5428015083142734e-05, "loss": 0.0055, "step": 295950 }, { "epoch": 9.148173332509117, "grad_norm": 0.10112281143665314, "learning_rate": 1.5427551461952156e-05, "loss": 0.0067, "step": 295980 }, { "epoch": 9.149100574890277, "grad_norm": 0.13209256529808044, "learning_rate": 1.5427087840761577e-05, "loss": 0.0058, "step": 296010 }, { "epoch": 9.150027817271434, "grad_norm": 0.137051522731781, "learning_rate": 1.5426624219571e-05, "loss": 0.0058, "step": 296040 }, { "epoch": 9.150955059652594, "grad_norm": 0.16167262196540833, "learning_rate": 1.5426160598380417e-05, "loss": 0.0063, "step": 296070 }, { "epoch": 9.151882302033751, "grad_norm": 0.10057179629802704, "learning_rate": 1.542569697718984e-05, "loss": 0.0062, "step": 296100 }, { "epoch": 9.152809544414911, "grad_norm": 0.10996692627668381, "learning_rate": 1.5425233355999257e-05, "loss": 0.0064, "step": 296130 }, { "epoch": 9.153736786796069, "grad_norm": 0.10315581411123276, "learning_rate": 1.542476973480868e-05, "loss": 0.0058, "step": 296160 }, { "epoch": 9.154664029177226, "grad_norm": 0.10031557083129883, "learning_rate": 1.5424306113618103e-05, "loss": 0.0057, "step": 296190 }, { "epoch": 9.155591271558386, "grad_norm": 0.06872205436229706, "learning_rate": 1.542384249242752e-05, "loss": 0.0061, "step": 296220 }, { "epoch": 9.156518513939544, "grad_norm": 0.11060623079538345, "learning_rate": 1.5423378871236943e-05, "loss": 0.0062, "step": 296250 }, { "epoch": 9.157445756320703, "grad_norm": 0.15445931255817413, "learning_rate": 1.5422915250046364e-05, "loss": 0.0058, "step": 296280 }, { "epoch": 9.15837299870186, "grad_norm": 0.10763714462518692, "learning_rate": 1.5422451628855782e-05, "loss": 0.0067, "step": 296310 }, { "epoch": 9.159300241083018, "grad_norm": 0.14639456570148468, "learning_rate": 1.5421988007665204e-05, "loss": 0.0059, "step": 296340 }, { "epoch": 9.160227483464178, "grad_norm": 0.08141915500164032, "learning_rate": 1.5421524386474625e-05, "loss": 0.0057, "step": 296370 }, { "epoch": 9.161154725845336, "grad_norm": 0.19005094468593597, "learning_rate": 1.5421060765284047e-05, "loss": 0.0063, "step": 296400 }, { "epoch": 9.162081968226495, "grad_norm": 0.10259535163640976, "learning_rate": 1.542059714409347e-05, "loss": 0.0059, "step": 296430 }, { "epoch": 9.163009210607653, "grad_norm": 0.10857126861810684, "learning_rate": 1.5420133522902887e-05, "loss": 0.0062, "step": 296460 }, { "epoch": 9.16393645298881, "grad_norm": 0.18297991156578064, "learning_rate": 1.5419669901712308e-05, "loss": 0.0055, "step": 296490 }, { "epoch": 9.16486369536997, "grad_norm": 0.11927549540996552, "learning_rate": 1.541920628052173e-05, "loss": 0.0052, "step": 296520 }, { "epoch": 9.165790937751128, "grad_norm": 0.12822313606739044, "learning_rate": 1.541874265933115e-05, "loss": 0.0064, "step": 296550 }, { "epoch": 9.166718180132287, "grad_norm": 0.09849090874195099, "learning_rate": 1.5418279038140573e-05, "loss": 0.0069, "step": 296580 }, { "epoch": 9.167645422513445, "grad_norm": 0.08076050877571106, "learning_rate": 1.5417815416949994e-05, "loss": 0.0058, "step": 296610 }, { "epoch": 9.168572664894603, "grad_norm": 0.09698060154914856, "learning_rate": 1.5417351795759412e-05, "loss": 0.0055, "step": 296640 }, { "epoch": 9.169499907275762, "grad_norm": 0.10694942623376846, "learning_rate": 1.5416888174568834e-05, "loss": 0.0061, "step": 296670 }, { "epoch": 9.17042714965692, "grad_norm": 0.1313968449831009, "learning_rate": 1.5416424553378255e-05, "loss": 0.0063, "step": 296700 }, { "epoch": 9.17135439203808, "grad_norm": 0.051507290452718735, "learning_rate": 1.5415960932187673e-05, "loss": 0.0058, "step": 296730 }, { "epoch": 9.172281634419237, "grad_norm": 0.10544388741254807, "learning_rate": 1.5415497310997095e-05, "loss": 0.0066, "step": 296760 }, { "epoch": 9.173208876800395, "grad_norm": 0.16091972589492798, "learning_rate": 1.5415033689806516e-05, "loss": 0.0066, "step": 296790 }, { "epoch": 9.174136119181554, "grad_norm": 0.14978858828544617, "learning_rate": 1.5414570068615938e-05, "loss": 0.0067, "step": 296820 }, { "epoch": 9.175063361562712, "grad_norm": 0.13501162827014923, "learning_rate": 1.541410644742536e-05, "loss": 0.0063, "step": 296850 }, { "epoch": 9.175990603943871, "grad_norm": 0.14342881739139557, "learning_rate": 1.5413642826234777e-05, "loss": 0.0058, "step": 296880 }, { "epoch": 9.17691784632503, "grad_norm": 0.11556023359298706, "learning_rate": 1.54131792050442e-05, "loss": 0.0059, "step": 296910 }, { "epoch": 9.177845088706189, "grad_norm": 0.10286688059568405, "learning_rate": 1.541271558385362e-05, "loss": 0.0063, "step": 296940 }, { "epoch": 9.178772331087346, "grad_norm": 0.11380220204591751, "learning_rate": 1.5412251962663042e-05, "loss": 0.0057, "step": 296970 }, { "epoch": 9.179699573468504, "grad_norm": 0.13142335414886475, "learning_rate": 1.5411788341472463e-05, "loss": 0.0056, "step": 297000 }, { "epoch": 9.180626815849664, "grad_norm": 0.06623051315546036, "learning_rate": 1.5411324720281885e-05, "loss": 0.0061, "step": 297030 }, { "epoch": 9.181554058230821, "grad_norm": 0.094358429312706, "learning_rate": 1.5410861099091303e-05, "loss": 0.0062, "step": 297060 }, { "epoch": 9.18248130061198, "grad_norm": 0.12882791459560394, "learning_rate": 1.5410397477900725e-05, "loss": 0.0051, "step": 297090 }, { "epoch": 9.183408542993138, "grad_norm": 0.14170318841934204, "learning_rate": 1.5409933856710143e-05, "loss": 0.0063, "step": 297120 }, { "epoch": 9.184335785374296, "grad_norm": 0.1301010400056839, "learning_rate": 1.5409470235519564e-05, "loss": 0.0059, "step": 297150 }, { "epoch": 9.185263027755456, "grad_norm": 0.1381566971540451, "learning_rate": 1.5409006614328986e-05, "loss": 0.0061, "step": 297180 }, { "epoch": 9.186190270136613, "grad_norm": 0.11597999185323715, "learning_rate": 1.5408542993138407e-05, "loss": 0.0073, "step": 297210 }, { "epoch": 9.187117512517773, "grad_norm": 0.14244993031024933, "learning_rate": 1.540807937194783e-05, "loss": 0.0064, "step": 297240 }, { "epoch": 9.18804475489893, "grad_norm": 0.09238094091415405, "learning_rate": 1.540761575075725e-05, "loss": 0.0059, "step": 297270 }, { "epoch": 9.188971997280088, "grad_norm": 0.08437854796648026, "learning_rate": 1.540715212956667e-05, "loss": 0.0062, "step": 297300 }, { "epoch": 9.189899239661248, "grad_norm": 0.10939369350671768, "learning_rate": 1.540668850837609e-05, "loss": 0.0056, "step": 297330 }, { "epoch": 9.190826482042405, "grad_norm": 0.11448907107114792, "learning_rate": 1.540622488718551e-05, "loss": 0.0062, "step": 297360 }, { "epoch": 9.191753724423565, "grad_norm": 0.11493824422359467, "learning_rate": 1.5405761265994933e-05, "loss": 0.005, "step": 297390 }, { "epoch": 9.192680966804723, "grad_norm": 0.10975026339292526, "learning_rate": 1.5405297644804354e-05, "loss": 0.006, "step": 297420 }, { "epoch": 9.19360820918588, "grad_norm": 0.10471294820308685, "learning_rate": 1.5404834023613776e-05, "loss": 0.0064, "step": 297450 }, { "epoch": 9.19453545156704, "grad_norm": 0.12857326865196228, "learning_rate": 1.5404370402423194e-05, "loss": 0.0063, "step": 297480 }, { "epoch": 9.195462693948198, "grad_norm": 0.16953252255916595, "learning_rate": 1.5403906781232616e-05, "loss": 0.0061, "step": 297510 }, { "epoch": 9.196389936329357, "grad_norm": 0.13380944728851318, "learning_rate": 1.5403443160042034e-05, "loss": 0.0061, "step": 297540 }, { "epoch": 9.197317178710515, "grad_norm": 0.15280374884605408, "learning_rate": 1.540297953885146e-05, "loss": 0.0063, "step": 297570 }, { "epoch": 9.198244421091673, "grad_norm": 0.05522402375936508, "learning_rate": 1.540251591766088e-05, "loss": 0.0064, "step": 297600 }, { "epoch": 9.199171663472832, "grad_norm": 0.08003702014684677, "learning_rate": 1.5402052296470298e-05, "loss": 0.0059, "step": 297630 }, { "epoch": 9.20009890585399, "grad_norm": 0.09080708026885986, "learning_rate": 1.540158867527972e-05, "loss": 0.0059, "step": 297660 }, { "epoch": 9.20102614823515, "grad_norm": 0.07236716151237488, "learning_rate": 1.540112505408914e-05, "loss": 0.006, "step": 297690 }, { "epoch": 9.201953390616307, "grad_norm": 0.19424034655094147, "learning_rate": 1.540066143289856e-05, "loss": 0.0058, "step": 297720 }, { "epoch": 9.202880632997466, "grad_norm": 0.11390651762485504, "learning_rate": 1.540019781170798e-05, "loss": 0.006, "step": 297750 }, { "epoch": 9.203807875378624, "grad_norm": 0.10473205894231796, "learning_rate": 1.5399734190517402e-05, "loss": 0.006, "step": 297780 }, { "epoch": 9.204735117759782, "grad_norm": 0.10681614279747009, "learning_rate": 1.5399270569326824e-05, "loss": 0.0058, "step": 297810 }, { "epoch": 9.205662360140941, "grad_norm": 0.14947862923145294, "learning_rate": 1.5398806948136245e-05, "loss": 0.0063, "step": 297840 }, { "epoch": 9.206589602522099, "grad_norm": 0.07303277403116226, "learning_rate": 1.5398343326945663e-05, "loss": 0.0065, "step": 297870 }, { "epoch": 9.207516844903259, "grad_norm": 0.09786392748355865, "learning_rate": 1.5397879705755085e-05, "loss": 0.0054, "step": 297900 }, { "epoch": 9.208444087284416, "grad_norm": 0.15135402977466583, "learning_rate": 1.5397416084564506e-05, "loss": 0.0058, "step": 297930 }, { "epoch": 9.209371329665574, "grad_norm": 0.10539697110652924, "learning_rate": 1.5396952463373928e-05, "loss": 0.0056, "step": 297960 }, { "epoch": 9.210298572046733, "grad_norm": 0.1250336468219757, "learning_rate": 1.539648884218335e-05, "loss": 0.0073, "step": 297990 }, { "epoch": 9.211225814427891, "grad_norm": 0.1346583217382431, "learning_rate": 1.539602522099277e-05, "loss": 0.0062, "step": 298020 }, { "epoch": 9.21215305680905, "grad_norm": 0.10492244362831116, "learning_rate": 1.539556159980219e-05, "loss": 0.0062, "step": 298050 }, { "epoch": 9.213080299190208, "grad_norm": 0.11812805384397507, "learning_rate": 1.539509797861161e-05, "loss": 0.0063, "step": 298080 }, { "epoch": 9.214007541571366, "grad_norm": 0.141702800989151, "learning_rate": 1.5394634357421032e-05, "loss": 0.0064, "step": 298110 }, { "epoch": 9.214934783952526, "grad_norm": 0.13309799134731293, "learning_rate": 1.539417073623045e-05, "loss": 0.0055, "step": 298140 }, { "epoch": 9.215862026333683, "grad_norm": 0.09860936552286148, "learning_rate": 1.5393707115039872e-05, "loss": 0.0062, "step": 298170 }, { "epoch": 9.216789268714843, "grad_norm": 0.07587224990129471, "learning_rate": 1.5393243493849293e-05, "loss": 0.0064, "step": 298200 }, { "epoch": 9.217716511096, "grad_norm": 0.1951586902141571, "learning_rate": 1.5392779872658715e-05, "loss": 0.0064, "step": 298230 }, { "epoch": 9.218643753477158, "grad_norm": 0.11052794754505157, "learning_rate": 1.5392316251468136e-05, "loss": 0.0061, "step": 298260 }, { "epoch": 9.219570995858318, "grad_norm": 0.08899825066328049, "learning_rate": 1.5391852630277554e-05, "loss": 0.0062, "step": 298290 }, { "epoch": 9.220498238239475, "grad_norm": 0.13913409411907196, "learning_rate": 1.5391389009086976e-05, "loss": 0.0067, "step": 298320 }, { "epoch": 9.221425480620635, "grad_norm": 0.05991770699620247, "learning_rate": 1.5390925387896397e-05, "loss": 0.0061, "step": 298350 }, { "epoch": 9.222352723001793, "grad_norm": 0.1025322750210762, "learning_rate": 1.539046176670582e-05, "loss": 0.0059, "step": 298380 }, { "epoch": 9.22327996538295, "grad_norm": 0.11645803600549698, "learning_rate": 1.538999814551524e-05, "loss": 0.006, "step": 298410 }, { "epoch": 9.22420720776411, "grad_norm": 0.09223300218582153, "learning_rate": 1.5389534524324662e-05, "loss": 0.0063, "step": 298440 }, { "epoch": 9.225134450145267, "grad_norm": 0.12678712606430054, "learning_rate": 1.538907090313408e-05, "loss": 0.0055, "step": 298470 }, { "epoch": 9.226061692526427, "grad_norm": 0.15821851789951324, "learning_rate": 1.53886072819435e-05, "loss": 0.0062, "step": 298500 }, { "epoch": 9.226988934907585, "grad_norm": 0.15746131539344788, "learning_rate": 1.538814366075292e-05, "loss": 0.0054, "step": 298530 }, { "epoch": 9.227916177288744, "grad_norm": 0.13118574023246765, "learning_rate": 1.538768003956234e-05, "loss": 0.0073, "step": 298560 }, { "epoch": 9.228843419669902, "grad_norm": 0.0844544768333435, "learning_rate": 1.5387216418371763e-05, "loss": 0.0064, "step": 298590 }, { "epoch": 9.22977066205106, "grad_norm": 0.11788839101791382, "learning_rate": 1.5386752797181184e-05, "loss": 0.0062, "step": 298620 }, { "epoch": 9.230697904432219, "grad_norm": 0.0926792100071907, "learning_rate": 1.5386289175990606e-05, "loss": 0.0063, "step": 298650 }, { "epoch": 9.231625146813377, "grad_norm": 0.1260143369436264, "learning_rate": 1.5385825554800027e-05, "loss": 0.0063, "step": 298680 }, { "epoch": 9.232552389194536, "grad_norm": 0.1060798317193985, "learning_rate": 1.5385361933609445e-05, "loss": 0.0063, "step": 298710 }, { "epoch": 9.233479631575694, "grad_norm": 0.09734801203012466, "learning_rate": 1.5384898312418867e-05, "loss": 0.0053, "step": 298740 }, { "epoch": 9.234406873956852, "grad_norm": 0.09075376391410828, "learning_rate": 1.5384434691228288e-05, "loss": 0.006, "step": 298770 }, { "epoch": 9.235334116338011, "grad_norm": 0.0818030908703804, "learning_rate": 1.538397107003771e-05, "loss": 0.0064, "step": 298800 }, { "epoch": 9.236261358719169, "grad_norm": 0.14471971988677979, "learning_rate": 1.538350744884713e-05, "loss": 0.0057, "step": 298830 }, { "epoch": 9.237188601100328, "grad_norm": 0.10351631790399551, "learning_rate": 1.538304382765655e-05, "loss": 0.0059, "step": 298860 }, { "epoch": 9.238115843481486, "grad_norm": 0.09489922970533371, "learning_rate": 1.538258020646597e-05, "loss": 0.0062, "step": 298890 }, { "epoch": 9.239043085862644, "grad_norm": 0.0671805590391159, "learning_rate": 1.5382116585275392e-05, "loss": 0.0059, "step": 298920 }, { "epoch": 9.239970328243803, "grad_norm": 0.08574904501438141, "learning_rate": 1.538165296408481e-05, "loss": 0.0067, "step": 298950 }, { "epoch": 9.240897570624961, "grad_norm": 0.11184097081422806, "learning_rate": 1.5381189342894235e-05, "loss": 0.0061, "step": 298980 }, { "epoch": 9.24182481300612, "grad_norm": 0.10659652203321457, "learning_rate": 1.5380725721703657e-05, "loss": 0.0059, "step": 299010 }, { "epoch": 9.242752055387278, "grad_norm": 0.08756809681653976, "learning_rate": 1.5380262100513075e-05, "loss": 0.006, "step": 299040 }, { "epoch": 9.243679297768436, "grad_norm": 0.08045218884944916, "learning_rate": 1.5379798479322497e-05, "loss": 0.0065, "step": 299070 }, { "epoch": 9.244606540149595, "grad_norm": 0.14090080559253693, "learning_rate": 1.5379334858131918e-05, "loss": 0.0066, "step": 299100 }, { "epoch": 9.245533782530753, "grad_norm": 0.0903361365199089, "learning_rate": 1.5378871236941336e-05, "loss": 0.0061, "step": 299130 }, { "epoch": 9.246461024911913, "grad_norm": 0.11363647878170013, "learning_rate": 1.5378407615750758e-05, "loss": 0.0057, "step": 299160 }, { "epoch": 9.24738826729307, "grad_norm": 0.09423573315143585, "learning_rate": 1.537794399456018e-05, "loss": 0.0061, "step": 299190 }, { "epoch": 9.248315509674228, "grad_norm": 0.07099711149930954, "learning_rate": 1.53774803733696e-05, "loss": 0.0064, "step": 299220 }, { "epoch": 9.249242752055387, "grad_norm": 0.10031239688396454, "learning_rate": 1.5377016752179022e-05, "loss": 0.0057, "step": 299250 }, { "epoch": 9.250169994436545, "grad_norm": 0.11828873306512833, "learning_rate": 1.537655313098844e-05, "loss": 0.0061, "step": 299280 }, { "epoch": 9.251097236817705, "grad_norm": 0.10696601867675781, "learning_rate": 1.5376089509797862e-05, "loss": 0.006, "step": 299310 }, { "epoch": 9.252024479198862, "grad_norm": 0.10686469078063965, "learning_rate": 1.5375625888607283e-05, "loss": 0.006, "step": 299340 }, { "epoch": 9.252951721580022, "grad_norm": 0.1396549791097641, "learning_rate": 1.5375162267416705e-05, "loss": 0.0067, "step": 299370 }, { "epoch": 9.25387896396118, "grad_norm": 0.14879554510116577, "learning_rate": 1.5374698646226126e-05, "loss": 0.0061, "step": 299400 }, { "epoch": 9.254806206342337, "grad_norm": 0.10424309223890305, "learning_rate": 1.5374235025035548e-05, "loss": 0.006, "step": 299430 }, { "epoch": 9.255733448723497, "grad_norm": 0.107217937707901, "learning_rate": 1.5373771403844966e-05, "loss": 0.0057, "step": 299460 }, { "epoch": 9.256660691104655, "grad_norm": 0.0889824628829956, "learning_rate": 1.5373307782654387e-05, "loss": 0.0063, "step": 299490 }, { "epoch": 9.257587933485814, "grad_norm": 0.09495577961206436, "learning_rate": 1.5372844161463806e-05, "loss": 0.006, "step": 299520 }, { "epoch": 9.258515175866972, "grad_norm": 0.15221545100212097, "learning_rate": 1.5372380540273227e-05, "loss": 0.0068, "step": 299550 }, { "epoch": 9.25944241824813, "grad_norm": 0.13038183748722076, "learning_rate": 1.537191691908265e-05, "loss": 0.0063, "step": 299580 }, { "epoch": 9.260369660629289, "grad_norm": 0.12366678565740585, "learning_rate": 1.537145329789207e-05, "loss": 0.0061, "step": 299610 }, { "epoch": 9.261296903010447, "grad_norm": 0.12227680534124374, "learning_rate": 1.537098967670149e-05, "loss": 0.006, "step": 299640 }, { "epoch": 9.262224145391606, "grad_norm": 0.15156638622283936, "learning_rate": 1.5370526055510913e-05, "loss": 0.006, "step": 299670 }, { "epoch": 9.263151387772764, "grad_norm": 0.1567951887845993, "learning_rate": 1.537006243432033e-05, "loss": 0.0061, "step": 299700 }, { "epoch": 9.264078630153922, "grad_norm": 0.1022341176867485, "learning_rate": 1.5369598813129753e-05, "loss": 0.0065, "step": 299730 }, { "epoch": 9.265005872535081, "grad_norm": 0.09242624044418335, "learning_rate": 1.5369135191939174e-05, "loss": 0.0061, "step": 299760 }, { "epoch": 9.265933114916239, "grad_norm": 0.060991764068603516, "learning_rate": 1.5368671570748596e-05, "loss": 0.0057, "step": 299790 }, { "epoch": 9.266860357297398, "grad_norm": 0.09747239202260971, "learning_rate": 1.5368207949558017e-05, "loss": 0.0059, "step": 299820 }, { "epoch": 9.267787599678556, "grad_norm": 0.09972060471773148, "learning_rate": 1.5367744328367435e-05, "loss": 0.0063, "step": 299850 }, { "epoch": 9.268714842059714, "grad_norm": 0.15710632503032684, "learning_rate": 1.5367280707176857e-05, "loss": 0.0067, "step": 299880 }, { "epoch": 9.269642084440873, "grad_norm": 0.10592544823884964, "learning_rate": 1.536681708598628e-05, "loss": 0.0059, "step": 299910 }, { "epoch": 9.27056932682203, "grad_norm": 0.1054435521364212, "learning_rate": 1.5366353464795697e-05, "loss": 0.0063, "step": 299940 }, { "epoch": 9.27149656920319, "grad_norm": 0.07664375007152557, "learning_rate": 1.5365889843605118e-05, "loss": 0.0064, "step": 299970 }, { "epoch": 9.272423811584348, "grad_norm": 0.1509827822446823, "learning_rate": 1.536542622241454e-05, "loss": 0.0056, "step": 300000 }, { "epoch": 9.272423811584348, "eval_f1": 0.9979960102832819, "eval_loss": 0.006878287997096777, "eval_precision": 0.9979947881144333, "eval_recall": 0.9979980892301712, "eval_runtime": 4186.0007, "eval_samples_per_second": 282.179, "eval_steps_per_second": 8.818, "step": 300000 }, { "epoch": 9.273351053965506, "grad_norm": 0.0854186862707138, "learning_rate": 1.536496260122396e-05, "loss": 0.0062, "step": 300030 }, { "epoch": 9.274278296346665, "grad_norm": 0.10940191894769669, "learning_rate": 1.5364498980033383e-05, "loss": 0.0059, "step": 300060 }, { "epoch": 9.275205538727823, "grad_norm": 0.13009163737297058, "learning_rate": 1.5364035358842804e-05, "loss": 0.0054, "step": 300090 }, { "epoch": 9.276132781108982, "grad_norm": 0.0914069190621376, "learning_rate": 1.536358719169191e-05, "loss": 0.0056, "step": 300120 }, { "epoch": 9.27706002349014, "grad_norm": 0.04571763798594475, "learning_rate": 1.536312357050133e-05, "loss": 0.0061, "step": 300150 }, { "epoch": 9.2779872658713, "grad_norm": 0.10320702195167542, "learning_rate": 1.536265994931075e-05, "loss": 0.006, "step": 300180 }, { "epoch": 9.278914508252457, "grad_norm": 0.14881999790668488, "learning_rate": 1.536219632812017e-05, "loss": 0.0056, "step": 300210 }, { "epoch": 9.279841750633615, "grad_norm": 0.21274302899837494, "learning_rate": 1.5361732706929595e-05, "loss": 0.006, "step": 300240 }, { "epoch": 9.280768993014775, "grad_norm": 0.09231113642454147, "learning_rate": 1.5361269085739016e-05, "loss": 0.0061, "step": 300270 }, { "epoch": 9.281696235395932, "grad_norm": 0.05713800713419914, "learning_rate": 1.5360805464548434e-05, "loss": 0.0059, "step": 300300 }, { "epoch": 9.282623477777092, "grad_norm": 0.11016491055488586, "learning_rate": 1.5360341843357856e-05, "loss": 0.0062, "step": 300330 }, { "epoch": 9.28355072015825, "grad_norm": 0.17899170517921448, "learning_rate": 1.5359878222167274e-05, "loss": 0.0065, "step": 300360 }, { "epoch": 9.284477962539407, "grad_norm": 0.14548861980438232, "learning_rate": 1.5359414600976695e-05, "loss": 0.0066, "step": 300390 }, { "epoch": 9.285405204920567, "grad_norm": 0.06443315744400024, "learning_rate": 1.5358950979786117e-05, "loss": 0.0059, "step": 300420 }, { "epoch": 9.286332447301724, "grad_norm": 0.1177508607506752, "learning_rate": 1.535848735859554e-05, "loss": 0.006, "step": 300450 }, { "epoch": 9.287259689682884, "grad_norm": 0.16635537147521973, "learning_rate": 1.535802373740496e-05, "loss": 0.0066, "step": 300480 }, { "epoch": 9.288186932064042, "grad_norm": 0.09485536813735962, "learning_rate": 1.535756011621438e-05, "loss": 0.0058, "step": 300510 }, { "epoch": 9.2891141744452, "grad_norm": 0.11659255623817444, "learning_rate": 1.53570964950238e-05, "loss": 0.0052, "step": 300540 }, { "epoch": 9.290041416826359, "grad_norm": 0.09950820356607437, "learning_rate": 1.535663287383322e-05, "loss": 0.0056, "step": 300570 }, { "epoch": 9.290968659207516, "grad_norm": 0.12264693528413773, "learning_rate": 1.5356169252642643e-05, "loss": 0.0061, "step": 300600 }, { "epoch": 9.291895901588676, "grad_norm": 0.41424521803855896, "learning_rate": 1.5355705631452064e-05, "loss": 0.0058, "step": 300630 }, { "epoch": 9.292823143969834, "grad_norm": 0.10766413062810898, "learning_rate": 1.5355242010261486e-05, "loss": 0.0059, "step": 300660 }, { "epoch": 9.293750386350991, "grad_norm": 0.0763009563088417, "learning_rate": 1.5354778389070904e-05, "loss": 0.0058, "step": 300690 }, { "epoch": 9.29467762873215, "grad_norm": 0.10647911578416824, "learning_rate": 1.5354314767880325e-05, "loss": 0.0059, "step": 300720 }, { "epoch": 9.295604871113309, "grad_norm": 0.11361835151910782, "learning_rate": 1.5353851146689747e-05, "loss": 0.0057, "step": 300750 }, { "epoch": 9.296532113494468, "grad_norm": 0.09385696053504944, "learning_rate": 1.5353387525499165e-05, "loss": 0.0059, "step": 300780 }, { "epoch": 9.297459355875626, "grad_norm": 0.11988034844398499, "learning_rate": 1.5352923904308586e-05, "loss": 0.0067, "step": 300810 }, { "epoch": 9.298386598256783, "grad_norm": 0.1284007579088211, "learning_rate": 1.5352460283118008e-05, "loss": 0.0057, "step": 300840 }, { "epoch": 9.299313840637943, "grad_norm": 0.11946114897727966, "learning_rate": 1.535199666192743e-05, "loss": 0.0051, "step": 300870 }, { "epoch": 9.3002410830191, "grad_norm": 0.09951497614383698, "learning_rate": 1.535153304073685e-05, "loss": 0.0058, "step": 300900 }, { "epoch": 9.30116832540026, "grad_norm": 0.1110803633928299, "learning_rate": 1.5351069419546272e-05, "loss": 0.0064, "step": 300930 }, { "epoch": 9.302095567781418, "grad_norm": 0.15067818760871887, "learning_rate": 1.535060579835569e-05, "loss": 0.006, "step": 300960 }, { "epoch": 9.303022810162577, "grad_norm": 0.1538969725370407, "learning_rate": 1.5350142177165112e-05, "loss": 0.006, "step": 300990 }, { "epoch": 9.303950052543735, "grad_norm": 0.17716284096240997, "learning_rate": 1.5349678555974533e-05, "loss": 0.0062, "step": 301020 }, { "epoch": 9.304877294924893, "grad_norm": 0.10600054264068604, "learning_rate": 1.5349214934783955e-05, "loss": 0.0061, "step": 301050 }, { "epoch": 9.305804537306052, "grad_norm": 0.119184210896492, "learning_rate": 1.5348751313593376e-05, "loss": 0.0054, "step": 301080 }, { "epoch": 9.30673177968721, "grad_norm": 0.08628393709659576, "learning_rate": 1.5348287692402795e-05, "loss": 0.0062, "step": 301110 }, { "epoch": 9.30765902206837, "grad_norm": 0.08966154605150223, "learning_rate": 1.5347824071212216e-05, "loss": 0.0067, "step": 301140 }, { "epoch": 9.308586264449527, "grad_norm": 0.12164922803640366, "learning_rate": 1.5347360450021638e-05, "loss": 0.0065, "step": 301170 }, { "epoch": 9.309513506830685, "grad_norm": 0.10566306859254837, "learning_rate": 1.5346896828831056e-05, "loss": 0.0058, "step": 301200 }, { "epoch": 9.310440749211844, "grad_norm": 0.16261160373687744, "learning_rate": 1.5346433207640477e-05, "loss": 0.0057, "step": 301230 }, { "epoch": 9.311367991593002, "grad_norm": 0.11700471490621567, "learning_rate": 1.53459695864499e-05, "loss": 0.006, "step": 301260 }, { "epoch": 9.312295233974162, "grad_norm": 0.1065557599067688, "learning_rate": 1.534550596525932e-05, "loss": 0.0059, "step": 301290 }, { "epoch": 9.31322247635532, "grad_norm": 0.12308739870786667, "learning_rate": 1.5345042344068742e-05, "loss": 0.0055, "step": 301320 }, { "epoch": 9.314149718736477, "grad_norm": 0.10025575011968613, "learning_rate": 1.534457872287816e-05, "loss": 0.0062, "step": 301350 }, { "epoch": 9.315076961117637, "grad_norm": 0.15744994580745697, "learning_rate": 1.534411510168758e-05, "loss": 0.0064, "step": 301380 }, { "epoch": 9.316004203498794, "grad_norm": 0.10289555042982101, "learning_rate": 1.5343651480497003e-05, "loss": 0.0061, "step": 301410 }, { "epoch": 9.316931445879954, "grad_norm": 0.10088613629341125, "learning_rate": 1.5343187859306424e-05, "loss": 0.006, "step": 301440 }, { "epoch": 9.317858688261111, "grad_norm": 0.11357108503580093, "learning_rate": 1.5342724238115846e-05, "loss": 0.0059, "step": 301470 }, { "epoch": 9.31878593064227, "grad_norm": 0.08496373891830444, "learning_rate": 1.5342260616925267e-05, "loss": 0.006, "step": 301500 }, { "epoch": 9.319713173023429, "grad_norm": 0.09405292570590973, "learning_rate": 1.5341796995734685e-05, "loss": 0.006, "step": 301530 }, { "epoch": 9.320640415404586, "grad_norm": 0.1302650421857834, "learning_rate": 1.5341333374544107e-05, "loss": 0.006, "step": 301560 }, { "epoch": 9.321567657785746, "grad_norm": 0.11927737295627594, "learning_rate": 1.534086975335353e-05, "loss": 0.0059, "step": 301590 }, { "epoch": 9.322494900166904, "grad_norm": 0.1618068367242813, "learning_rate": 1.5340406132162947e-05, "loss": 0.0068, "step": 301620 }, { "epoch": 9.323422142548061, "grad_norm": 0.14070644974708557, "learning_rate": 1.533994251097237e-05, "loss": 0.006, "step": 301650 }, { "epoch": 9.32434938492922, "grad_norm": 0.08176565915346146, "learning_rate": 1.533947888978179e-05, "loss": 0.0059, "step": 301680 }, { "epoch": 9.325276627310378, "grad_norm": 0.1310519576072693, "learning_rate": 1.533901526859121e-05, "loss": 0.006, "step": 301710 }, { "epoch": 9.326203869691538, "grad_norm": 0.09120509773492813, "learning_rate": 1.5338551647400633e-05, "loss": 0.0056, "step": 301740 }, { "epoch": 9.327131112072696, "grad_norm": 0.11571650952100754, "learning_rate": 1.533808802621005e-05, "loss": 0.0059, "step": 301770 }, { "epoch": 9.328058354453855, "grad_norm": 0.16453933715820312, "learning_rate": 1.5337624405019472e-05, "loss": 0.0063, "step": 301800 }, { "epoch": 9.328985596835013, "grad_norm": 0.09842094779014587, "learning_rate": 1.5337160783828894e-05, "loss": 0.006, "step": 301830 }, { "epoch": 9.32991283921617, "grad_norm": 0.1970227211713791, "learning_rate": 1.5336697162638315e-05, "loss": 0.0056, "step": 301860 }, { "epoch": 9.33084008159733, "grad_norm": 0.1482190489768982, "learning_rate": 1.5336233541447737e-05, "loss": 0.0064, "step": 301890 }, { "epoch": 9.331767323978488, "grad_norm": 0.07619030028581619, "learning_rate": 1.5335769920257158e-05, "loss": 0.0066, "step": 301920 }, { "epoch": 9.332694566359647, "grad_norm": 0.09550665318965912, "learning_rate": 1.5335306299066576e-05, "loss": 0.0058, "step": 301950 }, { "epoch": 9.333621808740805, "grad_norm": 0.10911640524864197, "learning_rate": 1.5334842677875998e-05, "loss": 0.0061, "step": 301980 }, { "epoch": 9.334549051121963, "grad_norm": 0.08213949203491211, "learning_rate": 1.533437905668542e-05, "loss": 0.0061, "step": 302010 }, { "epoch": 9.335476293503122, "grad_norm": 0.054030682891607285, "learning_rate": 1.533391543549484e-05, "loss": 0.0062, "step": 302040 }, { "epoch": 9.33640353588428, "grad_norm": 0.11232894659042358, "learning_rate": 1.5333451814304262e-05, "loss": 0.0062, "step": 302070 }, { "epoch": 9.33733077826544, "grad_norm": 0.1606091409921646, "learning_rate": 1.533298819311368e-05, "loss": 0.006, "step": 302100 }, { "epoch": 9.338258020646597, "grad_norm": 0.15762746334075928, "learning_rate": 1.5332524571923102e-05, "loss": 0.006, "step": 302130 }, { "epoch": 9.339185263027755, "grad_norm": 0.09165584295988083, "learning_rate": 1.5332060950732524e-05, "loss": 0.0063, "step": 302160 }, { "epoch": 9.340112505408914, "grad_norm": 0.08470112830400467, "learning_rate": 1.533159732954194e-05, "loss": 0.0058, "step": 302190 }, { "epoch": 9.341039747790072, "grad_norm": 0.19674882292747498, "learning_rate": 1.5331133708351363e-05, "loss": 0.0061, "step": 302220 }, { "epoch": 9.341966990171231, "grad_norm": 0.11805456131696701, "learning_rate": 1.5330670087160785e-05, "loss": 0.0058, "step": 302250 }, { "epoch": 9.34289423255239, "grad_norm": 0.1810476928949356, "learning_rate": 1.5330206465970206e-05, "loss": 0.0062, "step": 302280 }, { "epoch": 9.343821474933547, "grad_norm": 0.0876726433634758, "learning_rate": 1.5329742844779628e-05, "loss": 0.0066, "step": 302310 }, { "epoch": 9.344748717314706, "grad_norm": 0.08821506798267365, "learning_rate": 1.5329279223589046e-05, "loss": 0.0061, "step": 302340 }, { "epoch": 9.345675959695864, "grad_norm": 0.1340348720550537, "learning_rate": 1.5328815602398467e-05, "loss": 0.0058, "step": 302370 }, { "epoch": 9.346603202077024, "grad_norm": 0.10944735258817673, "learning_rate": 1.532835198120789e-05, "loss": 0.0061, "step": 302400 }, { "epoch": 9.347530444458181, "grad_norm": 0.1430235654115677, "learning_rate": 1.532788836001731e-05, "loss": 0.0062, "step": 302430 }, { "epoch": 9.348457686839339, "grad_norm": 0.1406235247850418, "learning_rate": 1.5327424738826732e-05, "loss": 0.0063, "step": 302460 }, { "epoch": 9.349384929220498, "grad_norm": 0.12336363643407822, "learning_rate": 1.5326961117636153e-05, "loss": 0.0058, "step": 302490 }, { "epoch": 9.350312171601656, "grad_norm": 0.13594022393226624, "learning_rate": 1.532649749644557e-05, "loss": 0.006, "step": 302520 }, { "epoch": 9.351239413982816, "grad_norm": 0.09389537572860718, "learning_rate": 1.5326033875254993e-05, "loss": 0.0065, "step": 302550 }, { "epoch": 9.352166656363973, "grad_norm": 0.11452279984951019, "learning_rate": 1.5325570254064414e-05, "loss": 0.006, "step": 302580 }, { "epoch": 9.353093898745133, "grad_norm": 0.08086840063333511, "learning_rate": 1.5325106632873833e-05, "loss": 0.0059, "step": 302610 }, { "epoch": 9.35402114112629, "grad_norm": 0.09166324883699417, "learning_rate": 1.5324643011683254e-05, "loss": 0.0061, "step": 302640 }, { "epoch": 9.354948383507448, "grad_norm": 0.16944870352745056, "learning_rate": 1.5324179390492676e-05, "loss": 0.0062, "step": 302670 }, { "epoch": 9.355875625888608, "grad_norm": 0.12030380219221115, "learning_rate": 1.5323715769302097e-05, "loss": 0.0057, "step": 302700 }, { "epoch": 9.356802868269765, "grad_norm": 0.0864255353808403, "learning_rate": 1.532325214811152e-05, "loss": 0.0069, "step": 302730 }, { "epoch": 9.357730110650925, "grad_norm": 0.12095531821250916, "learning_rate": 1.5322788526920937e-05, "loss": 0.0057, "step": 302760 }, { "epoch": 9.358657353032083, "grad_norm": 0.10493116080760956, "learning_rate": 1.5322324905730358e-05, "loss": 0.0056, "step": 302790 }, { "epoch": 9.35958459541324, "grad_norm": 0.13023881614208221, "learning_rate": 1.532186128453978e-05, "loss": 0.0059, "step": 302820 }, { "epoch": 9.3605118377944, "grad_norm": 0.09539753198623657, "learning_rate": 1.53213976633492e-05, "loss": 0.0059, "step": 302850 }, { "epoch": 9.361439080175558, "grad_norm": 0.1053990051150322, "learning_rate": 1.5320934042158623e-05, "loss": 0.0064, "step": 302880 }, { "epoch": 9.362366322556717, "grad_norm": 0.11014017462730408, "learning_rate": 1.5320470420968044e-05, "loss": 0.0063, "step": 302910 }, { "epoch": 9.363293564937875, "grad_norm": 0.0810546949505806, "learning_rate": 1.5320006799777462e-05, "loss": 0.0061, "step": 302940 }, { "epoch": 9.364220807319033, "grad_norm": 0.16058145463466644, "learning_rate": 1.5319543178586884e-05, "loss": 0.0066, "step": 302970 }, { "epoch": 9.365148049700192, "grad_norm": 0.12576358020305634, "learning_rate": 1.5319079557396302e-05, "loss": 0.0058, "step": 303000 }, { "epoch": 9.36607529208135, "grad_norm": 0.1037413626909256, "learning_rate": 1.5318615936205724e-05, "loss": 0.0065, "step": 303030 }, { "epoch": 9.36700253446251, "grad_norm": 0.08008277416229248, "learning_rate": 1.531815231501515e-05, "loss": 0.0058, "step": 303060 }, { "epoch": 9.367929776843667, "grad_norm": 0.10879471153020859, "learning_rate": 1.5317688693824567e-05, "loss": 0.0061, "step": 303090 }, { "epoch": 9.368857019224825, "grad_norm": 0.13978442549705505, "learning_rate": 1.5317225072633988e-05, "loss": 0.0057, "step": 303120 }, { "epoch": 9.369784261605984, "grad_norm": 0.0952032208442688, "learning_rate": 1.531676145144341e-05, "loss": 0.0064, "step": 303150 }, { "epoch": 9.370711503987142, "grad_norm": 0.10563019663095474, "learning_rate": 1.5316297830252828e-05, "loss": 0.0057, "step": 303180 }, { "epoch": 9.371638746368301, "grad_norm": 0.10741473734378815, "learning_rate": 1.531583420906225e-05, "loss": 0.0058, "step": 303210 }, { "epoch": 9.372565988749459, "grad_norm": 0.1359662562608719, "learning_rate": 1.531537058787167e-05, "loss": 0.0062, "step": 303240 }, { "epoch": 9.373493231130617, "grad_norm": 0.12238916009664536, "learning_rate": 1.5314906966681092e-05, "loss": 0.0058, "step": 303270 }, { "epoch": 9.374420473511776, "grad_norm": 0.11155089735984802, "learning_rate": 1.5314443345490514e-05, "loss": 0.0062, "step": 303300 }, { "epoch": 9.375347715892934, "grad_norm": 0.10894887149333954, "learning_rate": 1.5313979724299932e-05, "loss": 0.0062, "step": 303330 }, { "epoch": 9.376274958274093, "grad_norm": 0.12461894750595093, "learning_rate": 1.5313516103109353e-05, "loss": 0.0057, "step": 303360 }, { "epoch": 9.377202200655251, "grad_norm": 0.08512526750564575, "learning_rate": 1.5313052481918775e-05, "loss": 0.0056, "step": 303390 }, { "epoch": 9.37812944303641, "grad_norm": 0.06351281702518463, "learning_rate": 1.5312588860728196e-05, "loss": 0.0056, "step": 303420 }, { "epoch": 9.379056685417568, "grad_norm": 0.13499566912651062, "learning_rate": 1.5312125239537618e-05, "loss": 0.0057, "step": 303450 }, { "epoch": 9.379983927798726, "grad_norm": 0.0811600461602211, "learning_rate": 1.531166161834704e-05, "loss": 0.0055, "step": 303480 }, { "epoch": 9.380911170179886, "grad_norm": 0.07002618908882141, "learning_rate": 1.5311197997156457e-05, "loss": 0.0061, "step": 303510 }, { "epoch": 9.381838412561043, "grad_norm": 0.12630067765712738, "learning_rate": 1.531073437596588e-05, "loss": 0.0061, "step": 303540 }, { "epoch": 9.382765654942203, "grad_norm": 0.12783728539943695, "learning_rate": 1.53102707547753e-05, "loss": 0.0059, "step": 303570 }, { "epoch": 9.38369289732336, "grad_norm": 0.1354021579027176, "learning_rate": 1.530980713358472e-05, "loss": 0.0064, "step": 303600 }, { "epoch": 9.384620139704518, "grad_norm": 0.07049234211444855, "learning_rate": 1.530934351239414e-05, "loss": 0.0063, "step": 303630 }, { "epoch": 9.385547382085678, "grad_norm": 0.12336163222789764, "learning_rate": 1.530887989120356e-05, "loss": 0.0061, "step": 303660 }, { "epoch": 9.386474624466835, "grad_norm": 0.10205450654029846, "learning_rate": 1.5308416270012983e-05, "loss": 0.0054, "step": 303690 }, { "epoch": 9.387401866847995, "grad_norm": 0.11540906131267548, "learning_rate": 1.5307952648822405e-05, "loss": 0.006, "step": 303720 }, { "epoch": 9.388329109229153, "grad_norm": 0.12885431945323944, "learning_rate": 1.5307489027631823e-05, "loss": 0.0064, "step": 303750 }, { "epoch": 9.38925635161031, "grad_norm": 0.07220549136400223, "learning_rate": 1.5307025406441244e-05, "loss": 0.0061, "step": 303780 }, { "epoch": 9.39018359399147, "grad_norm": 0.09787360578775406, "learning_rate": 1.5306561785250666e-05, "loss": 0.0064, "step": 303810 }, { "epoch": 9.391110836372627, "grad_norm": 0.11273433268070221, "learning_rate": 1.5306098164060087e-05, "loss": 0.0062, "step": 303840 }, { "epoch": 9.392038078753787, "grad_norm": 0.1127682700753212, "learning_rate": 1.530563454286951e-05, "loss": 0.0066, "step": 303870 }, { "epoch": 9.392965321134945, "grad_norm": 0.09682077914476395, "learning_rate": 1.530517092167893e-05, "loss": 0.0066, "step": 303900 }, { "epoch": 9.393892563516102, "grad_norm": 0.0876753106713295, "learning_rate": 1.530470730048835e-05, "loss": 0.0064, "step": 303930 }, { "epoch": 9.394819805897262, "grad_norm": 0.16458144783973694, "learning_rate": 1.530424367929777e-05, "loss": 0.0063, "step": 303960 }, { "epoch": 9.39574704827842, "grad_norm": 0.11954902112483978, "learning_rate": 1.5303780058107188e-05, "loss": 0.0062, "step": 303990 }, { "epoch": 9.396674290659579, "grad_norm": 0.12644287943840027, "learning_rate": 1.530331643691661e-05, "loss": 0.0058, "step": 304020 }, { "epoch": 9.397601533040737, "grad_norm": 0.10305780172348022, "learning_rate": 1.530285281572603e-05, "loss": 0.0055, "step": 304050 }, { "epoch": 9.398528775421894, "grad_norm": 0.14570973813533783, "learning_rate": 1.5302389194535453e-05, "loss": 0.0057, "step": 304080 }, { "epoch": 9.399456017803054, "grad_norm": 0.09470919519662857, "learning_rate": 1.5301925573344874e-05, "loss": 0.0058, "step": 304110 }, { "epoch": 9.400383260184212, "grad_norm": 0.12367651611566544, "learning_rate": 1.5301461952154296e-05, "loss": 0.007, "step": 304140 }, { "epoch": 9.401310502565371, "grad_norm": 0.17126893997192383, "learning_rate": 1.5300998330963714e-05, "loss": 0.006, "step": 304170 }, { "epoch": 9.402237744946529, "grad_norm": 0.05799974128603935, "learning_rate": 1.5300534709773135e-05, "loss": 0.0057, "step": 304200 }, { "epoch": 9.403164987327688, "grad_norm": 0.10670474171638489, "learning_rate": 1.5300071088582557e-05, "loss": 0.0066, "step": 304230 }, { "epoch": 9.404092229708846, "grad_norm": 0.11570437252521515, "learning_rate": 1.5299607467391978e-05, "loss": 0.0058, "step": 304260 }, { "epoch": 9.405019472090004, "grad_norm": 0.10675627738237381, "learning_rate": 1.52991438462014e-05, "loss": 0.0057, "step": 304290 }, { "epoch": 9.405946714471163, "grad_norm": 0.09887310117483139, "learning_rate": 1.5298695679050508e-05, "loss": 0.0058, "step": 304320 }, { "epoch": 9.406873956852321, "grad_norm": 0.11807530373334885, "learning_rate": 1.5298232057859926e-05, "loss": 0.006, "step": 304350 }, { "epoch": 9.40780119923348, "grad_norm": 0.1269858181476593, "learning_rate": 1.5297768436669347e-05, "loss": 0.0062, "step": 304380 }, { "epoch": 9.408728441614638, "grad_norm": 0.11411190778017044, "learning_rate": 1.529730481547877e-05, "loss": 0.0057, "step": 304410 }, { "epoch": 9.409655683995796, "grad_norm": 0.1456214040517807, "learning_rate": 1.5296841194288187e-05, "loss": 0.0058, "step": 304440 }, { "epoch": 9.410582926376955, "grad_norm": 0.09621565043926239, "learning_rate": 1.529637757309761e-05, "loss": 0.0053, "step": 304470 }, { "epoch": 9.411510168758113, "grad_norm": 0.1628267765045166, "learning_rate": 1.529591395190703e-05, "loss": 0.006, "step": 304500 }, { "epoch": 9.412437411139273, "grad_norm": 0.17157022655010223, "learning_rate": 1.529545033071645e-05, "loss": 0.0064, "step": 304530 }, { "epoch": 9.41336465352043, "grad_norm": 0.13739962875843048, "learning_rate": 1.5294986709525873e-05, "loss": 0.0065, "step": 304560 }, { "epoch": 9.414291895901588, "grad_norm": 0.11581256985664368, "learning_rate": 1.529452308833529e-05, "loss": 0.0057, "step": 304590 }, { "epoch": 9.415219138282747, "grad_norm": 0.1077900230884552, "learning_rate": 1.5294059467144712e-05, "loss": 0.0065, "step": 304620 }, { "epoch": 9.416146380663905, "grad_norm": 0.15855935215950012, "learning_rate": 1.5293595845954134e-05, "loss": 0.0063, "step": 304650 }, { "epoch": 9.417073623045065, "grad_norm": 0.12518611550331116, "learning_rate": 1.5293132224763555e-05, "loss": 0.0054, "step": 304680 }, { "epoch": 9.418000865426222, "grad_norm": 0.12290811538696289, "learning_rate": 1.5292668603572977e-05, "loss": 0.0064, "step": 304710 }, { "epoch": 9.41892810780738, "grad_norm": 0.1376396268606186, "learning_rate": 1.52922049823824e-05, "loss": 0.0065, "step": 304740 }, { "epoch": 9.41985535018854, "grad_norm": 0.0851658433675766, "learning_rate": 1.5291741361191817e-05, "loss": 0.0064, "step": 304770 }, { "epoch": 9.420782592569697, "grad_norm": 0.0746946856379509, "learning_rate": 1.5291277740001238e-05, "loss": 0.0061, "step": 304800 }, { "epoch": 9.421709834950857, "grad_norm": 0.10252294689416885, "learning_rate": 1.5290814118810656e-05, "loss": 0.0068, "step": 304830 }, { "epoch": 9.422637077332014, "grad_norm": 0.09583429992198944, "learning_rate": 1.5290350497620078e-05, "loss": 0.0065, "step": 304860 }, { "epoch": 9.423564319713172, "grad_norm": 0.07931631803512573, "learning_rate": 1.52898868764295e-05, "loss": 0.0059, "step": 304890 }, { "epoch": 9.424491562094332, "grad_norm": 0.1180155873298645, "learning_rate": 1.528942325523892e-05, "loss": 0.0065, "step": 304920 }, { "epoch": 9.42541880447549, "grad_norm": 0.08886409550905228, "learning_rate": 1.5288959634048342e-05, "loss": 0.0058, "step": 304950 }, { "epoch": 9.426346046856649, "grad_norm": 0.1449325978755951, "learning_rate": 1.5288496012857764e-05, "loss": 0.006, "step": 304980 }, { "epoch": 9.427273289237807, "grad_norm": 0.08656129240989685, "learning_rate": 1.5288032391667182e-05, "loss": 0.006, "step": 305010 }, { "epoch": 9.428200531618966, "grad_norm": 0.08295878767967224, "learning_rate": 1.5287568770476603e-05, "loss": 0.0064, "step": 305040 }, { "epoch": 9.429127774000124, "grad_norm": 0.10850774496793747, "learning_rate": 1.5287105149286025e-05, "loss": 0.0066, "step": 305070 }, { "epoch": 9.430055016381282, "grad_norm": 0.13069678843021393, "learning_rate": 1.5286641528095446e-05, "loss": 0.0068, "step": 305100 }, { "epoch": 9.430982258762441, "grad_norm": 0.15271489322185516, "learning_rate": 1.5286177906904868e-05, "loss": 0.0064, "step": 305130 }, { "epoch": 9.431909501143599, "grad_norm": 0.12273500859737396, "learning_rate": 1.5285714285714286e-05, "loss": 0.0064, "step": 305160 }, { "epoch": 9.432836743524758, "grad_norm": 0.11060157418251038, "learning_rate": 1.5285250664523708e-05, "loss": 0.006, "step": 305190 }, { "epoch": 9.433763985905916, "grad_norm": 0.07648293673992157, "learning_rate": 1.528478704333313e-05, "loss": 0.0061, "step": 305220 }, { "epoch": 9.434691228287074, "grad_norm": 0.12867200374603271, "learning_rate": 1.5284323422142547e-05, "loss": 0.0057, "step": 305250 }, { "epoch": 9.435618470668233, "grad_norm": 0.0960504412651062, "learning_rate": 1.528385980095197e-05, "loss": 0.0065, "step": 305280 }, { "epoch": 9.43654571304939, "grad_norm": 0.10733126103878021, "learning_rate": 1.528339617976139e-05, "loss": 0.0063, "step": 305310 }, { "epoch": 9.43747295543055, "grad_norm": 0.11155758053064346, "learning_rate": 1.528293255857081e-05, "loss": 0.0059, "step": 305340 }, { "epoch": 9.438400197811708, "grad_norm": 0.12579850852489471, "learning_rate": 1.5282468937380233e-05, "loss": 0.0061, "step": 305370 }, { "epoch": 9.439327440192866, "grad_norm": 0.09412582218647003, "learning_rate": 1.5282005316189655e-05, "loss": 0.006, "step": 305400 }, { "epoch": 9.440254682574025, "grad_norm": 0.06852924078702927, "learning_rate": 1.5281541694999073e-05, "loss": 0.0065, "step": 305430 }, { "epoch": 9.441181924955183, "grad_norm": 0.12989002466201782, "learning_rate": 1.5281078073808494e-05, "loss": 0.0062, "step": 305460 }, { "epoch": 9.442109167336342, "grad_norm": 0.10526281595230103, "learning_rate": 1.5280614452617916e-05, "loss": 0.006, "step": 305490 }, { "epoch": 9.4430364097175, "grad_norm": 0.11111287027597427, "learning_rate": 1.5280150831427337e-05, "loss": 0.0068, "step": 305520 }, { "epoch": 9.443963652098658, "grad_norm": 0.11918551474809647, "learning_rate": 1.527968721023676e-05, "loss": 0.006, "step": 305550 }, { "epoch": 9.444890894479817, "grad_norm": 0.0962652787566185, "learning_rate": 1.5279223589046177e-05, "loss": 0.0057, "step": 305580 }, { "epoch": 9.445818136860975, "grad_norm": 0.08867955207824707, "learning_rate": 1.52787599678556e-05, "loss": 0.0065, "step": 305610 }, { "epoch": 9.446745379242135, "grad_norm": 0.1322256624698639, "learning_rate": 1.527829634666502e-05, "loss": 0.0061, "step": 305640 }, { "epoch": 9.447672621623292, "grad_norm": 0.1180880218744278, "learning_rate": 1.5277832725474438e-05, "loss": 0.0062, "step": 305670 }, { "epoch": 9.44859986400445, "grad_norm": 0.10341274738311768, "learning_rate": 1.527736910428386e-05, "loss": 0.0059, "step": 305700 }, { "epoch": 9.44952710638561, "grad_norm": 0.14423206448554993, "learning_rate": 1.5276905483093284e-05, "loss": 0.0061, "step": 305730 }, { "epoch": 9.450454348766767, "grad_norm": 0.11765630543231964, "learning_rate": 1.5276441861902703e-05, "loss": 0.0058, "step": 305760 }, { "epoch": 9.451381591147927, "grad_norm": 0.11475151032209396, "learning_rate": 1.5275978240712124e-05, "loss": 0.0058, "step": 305790 }, { "epoch": 9.452308833529084, "grad_norm": 0.11604276299476624, "learning_rate": 1.5275514619521542e-05, "loss": 0.0061, "step": 305820 }, { "epoch": 9.453236075910244, "grad_norm": 0.10570337623357773, "learning_rate": 1.5275050998330964e-05, "loss": 0.0057, "step": 305850 }, { "epoch": 9.454163318291402, "grad_norm": 0.20241759717464447, "learning_rate": 1.5274587377140385e-05, "loss": 0.0059, "step": 305880 }, { "epoch": 9.45509056067256, "grad_norm": 0.16985982656478882, "learning_rate": 1.5274123755949807e-05, "loss": 0.0066, "step": 305910 }, { "epoch": 9.456017803053719, "grad_norm": 0.11214303225278854, "learning_rate": 1.5273660134759228e-05, "loss": 0.005, "step": 305940 }, { "epoch": 9.456945045434876, "grad_norm": 0.09115246683359146, "learning_rate": 1.527319651356865e-05, "loss": 0.0058, "step": 305970 }, { "epoch": 9.457872287816036, "grad_norm": 0.14349329471588135, "learning_rate": 1.5272732892378068e-05, "loss": 0.0061, "step": 306000 }, { "epoch": 9.458799530197194, "grad_norm": 0.1472080498933792, "learning_rate": 1.527226927118749e-05, "loss": 0.0066, "step": 306030 }, { "epoch": 9.459726772578351, "grad_norm": 0.11408492922782898, "learning_rate": 1.527180564999691e-05, "loss": 0.0064, "step": 306060 }, { "epoch": 9.46065401495951, "grad_norm": 0.07084556668996811, "learning_rate": 1.5271342028806332e-05, "loss": 0.0055, "step": 306090 }, { "epoch": 9.461581257340669, "grad_norm": 0.07548350095748901, "learning_rate": 1.5270878407615754e-05, "loss": 0.0059, "step": 306120 }, { "epoch": 9.462508499721828, "grad_norm": 0.1149226501584053, "learning_rate": 1.5270414786425172e-05, "loss": 0.0062, "step": 306150 }, { "epoch": 9.463435742102986, "grad_norm": 0.0723424032330513, "learning_rate": 1.5269951165234594e-05, "loss": 0.0061, "step": 306180 }, { "epoch": 9.464362984484143, "grad_norm": 0.11786601692438126, "learning_rate": 1.5269487544044015e-05, "loss": 0.0055, "step": 306210 }, { "epoch": 9.465290226865303, "grad_norm": 0.11468443274497986, "learning_rate": 1.5269023922853433e-05, "loss": 0.0058, "step": 306240 }, { "epoch": 9.46621746924646, "grad_norm": 0.07699503004550934, "learning_rate": 1.5268560301662855e-05, "loss": 0.0056, "step": 306270 }, { "epoch": 9.46714471162762, "grad_norm": 0.13513325154781342, "learning_rate": 1.5268096680472276e-05, "loss": 0.0058, "step": 306300 }, { "epoch": 9.468071954008778, "grad_norm": 0.13736790418624878, "learning_rate": 1.5267633059281698e-05, "loss": 0.0063, "step": 306330 }, { "epoch": 9.468999196389936, "grad_norm": 0.10933776944875717, "learning_rate": 1.526716943809112e-05, "loss": 0.0059, "step": 306360 }, { "epoch": 9.469926438771095, "grad_norm": 0.08639216423034668, "learning_rate": 1.526670581690054e-05, "loss": 0.006, "step": 306390 }, { "epoch": 9.470853681152253, "grad_norm": 0.12300921976566315, "learning_rate": 1.526624219570996e-05, "loss": 0.0058, "step": 306420 }, { "epoch": 9.471780923533412, "grad_norm": 0.07915909588336945, "learning_rate": 1.526577857451938e-05, "loss": 0.0063, "step": 306450 }, { "epoch": 9.47270816591457, "grad_norm": 0.13523319363594055, "learning_rate": 1.5265314953328802e-05, "loss": 0.0059, "step": 306480 }, { "epoch": 9.473635408295728, "grad_norm": 0.1002853512763977, "learning_rate": 1.5264851332138223e-05, "loss": 0.0069, "step": 306510 }, { "epoch": 9.474562650676887, "grad_norm": 0.14633484184741974, "learning_rate": 1.5264387710947645e-05, "loss": 0.0064, "step": 306540 }, { "epoch": 9.475489893058045, "grad_norm": 0.09236439317464828, "learning_rate": 1.5263924089757063e-05, "loss": 0.0061, "step": 306570 }, { "epoch": 9.476417135439204, "grad_norm": 0.08923119306564331, "learning_rate": 1.5263460468566484e-05, "loss": 0.0059, "step": 306600 }, { "epoch": 9.477344377820362, "grad_norm": 0.11963538080453873, "learning_rate": 1.5262996847375906e-05, "loss": 0.0059, "step": 306630 }, { "epoch": 9.478271620201522, "grad_norm": 0.10122881829738617, "learning_rate": 1.5262533226185324e-05, "loss": 0.0053, "step": 306660 }, { "epoch": 9.47919886258268, "grad_norm": 0.1508047878742218, "learning_rate": 1.5262069604994746e-05, "loss": 0.0057, "step": 306690 }, { "epoch": 9.480126104963837, "grad_norm": 0.13299618661403656, "learning_rate": 1.5261605983804167e-05, "loss": 0.0068, "step": 306720 }, { "epoch": 9.481053347344996, "grad_norm": 0.09370949864387512, "learning_rate": 1.526114236261359e-05, "loss": 0.0056, "step": 306750 }, { "epoch": 9.481980589726154, "grad_norm": 0.1421852856874466, "learning_rate": 1.526067874142301e-05, "loss": 0.0063, "step": 306780 }, { "epoch": 9.482907832107314, "grad_norm": 0.09468245506286621, "learning_rate": 1.5260215120232428e-05, "loss": 0.0058, "step": 306810 }, { "epoch": 9.483835074488471, "grad_norm": 0.1018025130033493, "learning_rate": 1.525975149904185e-05, "loss": 0.0058, "step": 306840 }, { "epoch": 9.48476231686963, "grad_norm": 0.1289089024066925, "learning_rate": 1.525928787785127e-05, "loss": 0.0061, "step": 306870 }, { "epoch": 9.485689559250789, "grad_norm": 0.11584927886724472, "learning_rate": 1.5258824256660693e-05, "loss": 0.0058, "step": 306900 }, { "epoch": 9.486616801631946, "grad_norm": 0.09317903220653534, "learning_rate": 1.5258360635470113e-05, "loss": 0.0059, "step": 306930 }, { "epoch": 9.487544044013106, "grad_norm": 0.052928678691387177, "learning_rate": 1.5257897014279534e-05, "loss": 0.0057, "step": 306960 }, { "epoch": 9.488471286394264, "grad_norm": 0.10616781562566757, "learning_rate": 1.5257433393088954e-05, "loss": 0.006, "step": 306990 }, { "epoch": 9.489398528775421, "grad_norm": 0.10858238488435745, "learning_rate": 1.5256969771898375e-05, "loss": 0.0066, "step": 307020 }, { "epoch": 9.49032577115658, "grad_norm": Infinity, "learning_rate": 1.5256521604747483e-05, "loss": 0.0056, "step": 307050 }, { "epoch": 9.491253013537738, "grad_norm": 0.11375583708286285, "learning_rate": 1.5256057983556903e-05, "loss": 0.0066, "step": 307080 }, { "epoch": 9.492180255918898, "grad_norm": 0.14788056910037994, "learning_rate": 1.5255594362366325e-05, "loss": 0.0057, "step": 307110 }, { "epoch": 9.493107498300056, "grad_norm": 0.11774659901857376, "learning_rate": 1.5255130741175746e-05, "loss": 0.0061, "step": 307140 }, { "epoch": 9.494034740681213, "grad_norm": 0.07527671754360199, "learning_rate": 1.5254667119985164e-05, "loss": 0.006, "step": 307170 }, { "epoch": 9.494961983062373, "grad_norm": 0.10269978642463684, "learning_rate": 1.5254203498794586e-05, "loss": 0.0057, "step": 307200 }, { "epoch": 9.49588922544353, "grad_norm": 0.14408980309963226, "learning_rate": 1.5253739877604007e-05, "loss": 0.0061, "step": 307230 }, { "epoch": 9.49681646782469, "grad_norm": 0.08660273253917694, "learning_rate": 1.5253276256413427e-05, "loss": 0.0065, "step": 307260 }, { "epoch": 9.497743710205848, "grad_norm": 0.07378256320953369, "learning_rate": 1.5252812635222849e-05, "loss": 0.0061, "step": 307290 }, { "epoch": 9.498670952587005, "grad_norm": 0.11876396089792252, "learning_rate": 1.5252349014032268e-05, "loss": 0.0059, "step": 307320 }, { "epoch": 9.499598194968165, "grad_norm": 0.1298363357782364, "learning_rate": 1.525188539284169e-05, "loss": 0.0069, "step": 307350 }, { "epoch": 9.500525437349323, "grad_norm": 0.08192965388298035, "learning_rate": 1.5251421771651111e-05, "loss": 0.0063, "step": 307380 }, { "epoch": 9.501452679730482, "grad_norm": 0.1273990273475647, "learning_rate": 1.5250958150460531e-05, "loss": 0.0058, "step": 307410 }, { "epoch": 9.50237992211164, "grad_norm": 0.09179830551147461, "learning_rate": 1.5250494529269953e-05, "loss": 0.0057, "step": 307440 }, { "epoch": 9.5033071644928, "grad_norm": 0.15005239844322205, "learning_rate": 1.5250030908079374e-05, "loss": 0.0063, "step": 307470 }, { "epoch": 9.504234406873957, "grad_norm": 0.10943938046693802, "learning_rate": 1.5249567286888794e-05, "loss": 0.0057, "step": 307500 }, { "epoch": 9.505161649255115, "grad_norm": 0.10862715542316437, "learning_rate": 1.5249103665698215e-05, "loss": 0.0057, "step": 307530 }, { "epoch": 9.506088891636274, "grad_norm": 0.0956912711262703, "learning_rate": 1.5248640044507637e-05, "loss": 0.0061, "step": 307560 }, { "epoch": 9.507016134017432, "grad_norm": 0.1540108323097229, "learning_rate": 1.5248176423317055e-05, "loss": 0.0057, "step": 307590 }, { "epoch": 9.50794337639859, "grad_norm": 0.10249275714159012, "learning_rate": 1.5247712802126478e-05, "loss": 0.0063, "step": 307620 }, { "epoch": 9.50887061877975, "grad_norm": 0.09009787440299988, "learning_rate": 1.5247249180935896e-05, "loss": 0.0062, "step": 307650 }, { "epoch": 9.509797861160907, "grad_norm": 0.12773650884628296, "learning_rate": 1.5246785559745318e-05, "loss": 0.0064, "step": 307680 }, { "epoch": 9.510725103542066, "grad_norm": 0.10019490867853165, "learning_rate": 1.524632193855474e-05, "loss": 0.0061, "step": 307710 }, { "epoch": 9.511652345923224, "grad_norm": 0.10398264229297638, "learning_rate": 1.524585831736416e-05, "loss": 0.0063, "step": 307740 }, { "epoch": 9.512579588304384, "grad_norm": 0.10011646896600723, "learning_rate": 1.524539469617358e-05, "loss": 0.0059, "step": 307770 }, { "epoch": 9.513506830685541, "grad_norm": 0.10634039342403412, "learning_rate": 1.5244931074983002e-05, "loss": 0.0059, "step": 307800 }, { "epoch": 9.514434073066699, "grad_norm": 0.11248156428337097, "learning_rate": 1.5244467453792422e-05, "loss": 0.0066, "step": 307830 }, { "epoch": 9.515361315447858, "grad_norm": 0.14767330884933472, "learning_rate": 1.5244003832601844e-05, "loss": 0.0061, "step": 307860 }, { "epoch": 9.516288557829016, "grad_norm": 0.11304140090942383, "learning_rate": 1.5243540211411265e-05, "loss": 0.0057, "step": 307890 }, { "epoch": 9.517215800210176, "grad_norm": 0.10054360330104828, "learning_rate": 1.5243076590220685e-05, "loss": 0.0058, "step": 307920 }, { "epoch": 9.518143042591333, "grad_norm": 0.14882847666740417, "learning_rate": 1.5242612969030106e-05, "loss": 0.0065, "step": 307950 }, { "epoch": 9.519070284972491, "grad_norm": 0.14818143844604492, "learning_rate": 1.5242149347839526e-05, "loss": 0.0064, "step": 307980 }, { "epoch": 9.51999752735365, "grad_norm": 0.10598565638065338, "learning_rate": 1.5241685726648948e-05, "loss": 0.0067, "step": 308010 }, { "epoch": 9.520924769734808, "grad_norm": 0.06433423608541489, "learning_rate": 1.524122210545837e-05, "loss": 0.0057, "step": 308040 }, { "epoch": 9.521852012115968, "grad_norm": 0.09449511766433716, "learning_rate": 1.5240758484267787e-05, "loss": 0.0057, "step": 308070 }, { "epoch": 9.522779254497125, "grad_norm": 0.10927166789770126, "learning_rate": 1.5240294863077209e-05, "loss": 0.0056, "step": 308100 }, { "epoch": 9.523706496878283, "grad_norm": 0.08945855498313904, "learning_rate": 1.523983124188663e-05, "loss": 0.006, "step": 308130 }, { "epoch": 9.524633739259443, "grad_norm": 0.1218419000506401, "learning_rate": 1.523936762069605e-05, "loss": 0.0062, "step": 308160 }, { "epoch": 9.5255609816406, "grad_norm": 0.1059446781873703, "learning_rate": 1.5238903999505472e-05, "loss": 0.006, "step": 308190 }, { "epoch": 9.52648822402176, "grad_norm": 0.13116149604320526, "learning_rate": 1.5238440378314893e-05, "loss": 0.0067, "step": 308220 }, { "epoch": 9.527415466402918, "grad_norm": 0.13285109400749207, "learning_rate": 1.5237976757124313e-05, "loss": 0.0064, "step": 308250 }, { "epoch": 9.528342708784077, "grad_norm": 0.10028303414583206, "learning_rate": 1.5237513135933735e-05, "loss": 0.0059, "step": 308280 }, { "epoch": 9.529269951165235, "grad_norm": 0.13700221478939056, "learning_rate": 1.5237049514743154e-05, "loss": 0.0061, "step": 308310 }, { "epoch": 9.530197193546392, "grad_norm": 0.12452223896980286, "learning_rate": 1.5236585893552576e-05, "loss": 0.0063, "step": 308340 }, { "epoch": 9.531124435927552, "grad_norm": 0.065433070063591, "learning_rate": 1.5236122272361997e-05, "loss": 0.0056, "step": 308370 }, { "epoch": 9.53205167830871, "grad_norm": 0.13512520492076874, "learning_rate": 1.5235658651171417e-05, "loss": 0.006, "step": 308400 }, { "epoch": 9.532978920689867, "grad_norm": 0.1179598867893219, "learning_rate": 1.5235195029980839e-05, "loss": 0.0061, "step": 308430 }, { "epoch": 9.533906163071027, "grad_norm": 0.09615755081176758, "learning_rate": 1.523473140879026e-05, "loss": 0.0066, "step": 308460 }, { "epoch": 9.534833405452185, "grad_norm": 0.11953986436128616, "learning_rate": 1.523426778759968e-05, "loss": 0.0053, "step": 308490 }, { "epoch": 9.535760647833344, "grad_norm": 0.1304575502872467, "learning_rate": 1.5233804166409101e-05, "loss": 0.0056, "step": 308520 }, { "epoch": 9.536687890214502, "grad_norm": 0.11735117435455322, "learning_rate": 1.5233340545218523e-05, "loss": 0.0056, "step": 308550 }, { "epoch": 9.537615132595661, "grad_norm": 0.1286226511001587, "learning_rate": 1.5232876924027941e-05, "loss": 0.0061, "step": 308580 }, { "epoch": 9.538542374976819, "grad_norm": 0.15749019384384155, "learning_rate": 1.5232413302837363e-05, "loss": 0.0055, "step": 308610 }, { "epoch": 9.539469617357977, "grad_norm": 0.1321638971567154, "learning_rate": 1.5231949681646782e-05, "loss": 0.006, "step": 308640 }, { "epoch": 9.540396859739136, "grad_norm": 0.12593457102775574, "learning_rate": 1.5231486060456204e-05, "loss": 0.0068, "step": 308670 }, { "epoch": 9.541324102120294, "grad_norm": 0.0919443815946579, "learning_rate": 1.5231022439265625e-05, "loss": 0.0052, "step": 308700 }, { "epoch": 9.542251344501453, "grad_norm": 0.09429806470870972, "learning_rate": 1.5230558818075045e-05, "loss": 0.0061, "step": 308730 }, { "epoch": 9.543178586882611, "grad_norm": 0.10412368923425674, "learning_rate": 1.5230095196884467e-05, "loss": 0.0066, "step": 308760 }, { "epoch": 9.544105829263769, "grad_norm": 0.08513457328081131, "learning_rate": 1.5229631575693888e-05, "loss": 0.0064, "step": 308790 }, { "epoch": 9.545033071644928, "grad_norm": 0.11217620968818665, "learning_rate": 1.5229167954503308e-05, "loss": 0.0064, "step": 308820 }, { "epoch": 9.545960314026086, "grad_norm": 0.10563033074140549, "learning_rate": 1.522870433331273e-05, "loss": 0.0059, "step": 308850 }, { "epoch": 9.546887556407246, "grad_norm": 0.13360603153705597, "learning_rate": 1.5228240712122151e-05, "loss": 0.0063, "step": 308880 }, { "epoch": 9.547814798788403, "grad_norm": 0.13669200241565704, "learning_rate": 1.5227777090931571e-05, "loss": 0.0055, "step": 308910 }, { "epoch": 9.548742041169561, "grad_norm": 0.2653726637363434, "learning_rate": 1.5227313469740992e-05, "loss": 0.0058, "step": 308940 }, { "epoch": 9.54966928355072, "grad_norm": 0.1602020561695099, "learning_rate": 1.522684984855041e-05, "loss": 0.0062, "step": 308970 }, { "epoch": 9.550596525931878, "grad_norm": 0.12642720341682434, "learning_rate": 1.5226386227359832e-05, "loss": 0.0066, "step": 309000 }, { "epoch": 9.551523768313038, "grad_norm": 0.12391714006662369, "learning_rate": 1.5225922606169255e-05, "loss": 0.0066, "step": 309030 }, { "epoch": 9.552451010694195, "grad_norm": 0.12344395369291306, "learning_rate": 1.5225458984978673e-05, "loss": 0.0059, "step": 309060 }, { "epoch": 9.553378253075355, "grad_norm": 0.0931440219283104, "learning_rate": 1.5224995363788095e-05, "loss": 0.0057, "step": 309090 }, { "epoch": 9.554305495456513, "grad_norm": 0.07422301918268204, "learning_rate": 1.5224531742597516e-05, "loss": 0.0061, "step": 309120 }, { "epoch": 9.55523273783767, "grad_norm": 0.09938361495733261, "learning_rate": 1.5224068121406936e-05, "loss": 0.0067, "step": 309150 }, { "epoch": 9.55615998021883, "grad_norm": 0.07760272175073624, "learning_rate": 1.5223604500216358e-05, "loss": 0.0063, "step": 309180 }, { "epoch": 9.557087222599987, "grad_norm": 0.11153963208198547, "learning_rate": 1.522314087902578e-05, "loss": 0.007, "step": 309210 }, { "epoch": 9.558014464981145, "grad_norm": 0.1174248605966568, "learning_rate": 1.5222677257835199e-05, "loss": 0.0067, "step": 309240 }, { "epoch": 9.558941707362305, "grad_norm": 0.09931284189224243, "learning_rate": 1.522221363664462e-05, "loss": 0.0064, "step": 309270 }, { "epoch": 9.559868949743462, "grad_norm": 0.07063984125852585, "learning_rate": 1.522175001545404e-05, "loss": 0.0065, "step": 309300 }, { "epoch": 9.560796192124622, "grad_norm": 0.11738785356283188, "learning_rate": 1.5221286394263462e-05, "loss": 0.0055, "step": 309330 }, { "epoch": 9.56172343450578, "grad_norm": 0.0924055352807045, "learning_rate": 1.5220822773072883e-05, "loss": 0.0063, "step": 309360 }, { "epoch": 9.562650676886939, "grad_norm": 0.15301649272441864, "learning_rate": 1.5220359151882303e-05, "loss": 0.0061, "step": 309390 }, { "epoch": 9.563577919268097, "grad_norm": 0.14105622470378876, "learning_rate": 1.5219895530691725e-05, "loss": 0.0065, "step": 309420 }, { "epoch": 9.564505161649254, "grad_norm": 0.12496459484100342, "learning_rate": 1.5219431909501146e-05, "loss": 0.0063, "step": 309450 }, { "epoch": 9.565432404030414, "grad_norm": 0.08963114023208618, "learning_rate": 1.5218968288310564e-05, "loss": 0.0062, "step": 309480 }, { "epoch": 9.566359646411572, "grad_norm": 0.07932480424642563, "learning_rate": 1.5218504667119986e-05, "loss": 0.006, "step": 309510 }, { "epoch": 9.567286888792731, "grad_norm": 0.09446163475513458, "learning_rate": 1.5218041045929407e-05, "loss": 0.0064, "step": 309540 }, { "epoch": 9.568214131173889, "grad_norm": 0.16398558020591736, "learning_rate": 1.5217577424738827e-05, "loss": 0.0063, "step": 309570 }, { "epoch": 9.569141373555047, "grad_norm": 0.15798614919185638, "learning_rate": 1.5217113803548249e-05, "loss": 0.0058, "step": 309600 }, { "epoch": 9.570068615936206, "grad_norm": 0.1439100056886673, "learning_rate": 1.5216650182357668e-05, "loss": 0.0059, "step": 309630 }, { "epoch": 9.570995858317364, "grad_norm": 0.10321784019470215, "learning_rate": 1.521618656116709e-05, "loss": 0.0068, "step": 309660 }, { "epoch": 9.571923100698523, "grad_norm": 0.09446481615304947, "learning_rate": 1.5215722939976511e-05, "loss": 0.0063, "step": 309690 }, { "epoch": 9.572850343079681, "grad_norm": 0.10897740721702576, "learning_rate": 1.5215259318785931e-05, "loss": 0.0059, "step": 309720 }, { "epoch": 9.573777585460839, "grad_norm": 0.09815037995576859, "learning_rate": 1.5214795697595353e-05, "loss": 0.0057, "step": 309750 }, { "epoch": 9.574704827841998, "grad_norm": 0.11788427084684372, "learning_rate": 1.5214332076404774e-05, "loss": 0.0058, "step": 309780 }, { "epoch": 9.575632070223156, "grad_norm": 0.15155303478240967, "learning_rate": 1.5213868455214194e-05, "loss": 0.0061, "step": 309810 }, { "epoch": 9.576559312604315, "grad_norm": 0.05747808516025543, "learning_rate": 1.5213404834023616e-05, "loss": 0.0064, "step": 309840 }, { "epoch": 9.577486554985473, "grad_norm": 0.1265496462583542, "learning_rate": 1.5212941212833037e-05, "loss": 0.0054, "step": 309870 }, { "epoch": 9.578413797366633, "grad_norm": 0.1151491031050682, "learning_rate": 1.5212477591642457e-05, "loss": 0.0054, "step": 309900 }, { "epoch": 9.57934103974779, "grad_norm": 0.14443108439445496, "learning_rate": 1.5212013970451878e-05, "loss": 0.0053, "step": 309930 }, { "epoch": 9.580268282128948, "grad_norm": 0.11559716612100601, "learning_rate": 1.52115503492613e-05, "loss": 0.0063, "step": 309960 }, { "epoch": 9.581195524510107, "grad_norm": 0.12829482555389404, "learning_rate": 1.5211086728070718e-05, "loss": 0.0051, "step": 309990 }, { "epoch": 9.582122766891265, "grad_norm": 0.1261335015296936, "learning_rate": 1.521062310688014e-05, "loss": 0.0062, "step": 310020 }, { "epoch": 9.583050009272423, "grad_norm": 0.07701975852251053, "learning_rate": 1.521015948568956e-05, "loss": 0.006, "step": 310050 }, { "epoch": 9.583977251653582, "grad_norm": 0.1385146826505661, "learning_rate": 1.520969586449898e-05, "loss": 0.0061, "step": 310080 }, { "epoch": 9.58490449403474, "grad_norm": 0.10976818948984146, "learning_rate": 1.5209232243308402e-05, "loss": 0.0064, "step": 310110 }, { "epoch": 9.5858317364159, "grad_norm": 0.137441948056221, "learning_rate": 1.5208768622117822e-05, "loss": 0.006, "step": 310140 }, { "epoch": 9.586758978797057, "grad_norm": 0.09798911213874817, "learning_rate": 1.520832045496693e-05, "loss": 0.0064, "step": 310170 }, { "epoch": 9.587686221178217, "grad_norm": 0.08582442998886108, "learning_rate": 1.5207856833776352e-05, "loss": 0.0062, "step": 310200 }, { "epoch": 9.588613463559374, "grad_norm": 0.14038223028182983, "learning_rate": 1.520739321258577e-05, "loss": 0.006, "step": 310230 }, { "epoch": 9.589540705940532, "grad_norm": 0.10158046334981918, "learning_rate": 1.5206929591395191e-05, "loss": 0.0057, "step": 310260 }, { "epoch": 9.590467948321692, "grad_norm": 0.08237351477146149, "learning_rate": 1.5206465970204614e-05, "loss": 0.0058, "step": 310290 }, { "epoch": 9.59139519070285, "grad_norm": 0.1810789704322815, "learning_rate": 1.5206002349014033e-05, "loss": 0.0062, "step": 310320 }, { "epoch": 9.592322433084009, "grad_norm": 0.15170888602733612, "learning_rate": 1.5205538727823454e-05, "loss": 0.0058, "step": 310350 }, { "epoch": 9.593249675465167, "grad_norm": 0.1678021252155304, "learning_rate": 1.5205075106632876e-05, "loss": 0.0058, "step": 310380 }, { "epoch": 9.594176917846324, "grad_norm": 0.09362799674272537, "learning_rate": 1.5204611485442295e-05, "loss": 0.0064, "step": 310410 }, { "epoch": 9.595104160227484, "grad_norm": 0.08050220459699631, "learning_rate": 1.5204147864251717e-05, "loss": 0.0059, "step": 310440 }, { "epoch": 9.596031402608642, "grad_norm": 0.12297743558883667, "learning_rate": 1.5203684243061137e-05, "loss": 0.0061, "step": 310470 }, { "epoch": 9.596958644989801, "grad_norm": 0.15994328260421753, "learning_rate": 1.5203220621870558e-05, "loss": 0.0067, "step": 310500 }, { "epoch": 9.597885887370959, "grad_norm": 0.12597934901714325, "learning_rate": 1.520275700067998e-05, "loss": 0.0058, "step": 310530 }, { "epoch": 9.598813129752116, "grad_norm": 0.10278474539518356, "learning_rate": 1.52022933794894e-05, "loss": 0.0062, "step": 310560 }, { "epoch": 9.599740372133276, "grad_norm": 0.17182131111621857, "learning_rate": 1.5201829758298821e-05, "loss": 0.0057, "step": 310590 }, { "epoch": 9.600667614514434, "grad_norm": 0.10928566753864288, "learning_rate": 1.5201366137108242e-05, "loss": 0.0054, "step": 310620 }, { "epoch": 9.601594856895593, "grad_norm": 0.16386789083480835, "learning_rate": 1.5200902515917662e-05, "loss": 0.0058, "step": 310650 }, { "epoch": 9.60252209927675, "grad_norm": 0.09529836475849152, "learning_rate": 1.5200438894727084e-05, "loss": 0.0065, "step": 310680 }, { "epoch": 9.60344934165791, "grad_norm": 0.1060943752527237, "learning_rate": 1.5199975273536505e-05, "loss": 0.0064, "step": 310710 }, { "epoch": 9.604376584039068, "grad_norm": 0.10428091138601303, "learning_rate": 1.5199511652345923e-05, "loss": 0.0062, "step": 310740 }, { "epoch": 9.605303826420226, "grad_norm": 0.10474684089422226, "learning_rate": 1.5199048031155345e-05, "loss": 0.0062, "step": 310770 }, { "epoch": 9.606231068801385, "grad_norm": 0.09170478582382202, "learning_rate": 1.5198584409964765e-05, "loss": 0.006, "step": 310800 }, { "epoch": 9.607158311182543, "grad_norm": 0.08410237729549408, "learning_rate": 1.5198120788774186e-05, "loss": 0.0059, "step": 310830 }, { "epoch": 9.6080855535637, "grad_norm": 0.13999241590499878, "learning_rate": 1.5197657167583608e-05, "loss": 0.0063, "step": 310860 }, { "epoch": 9.60901279594486, "grad_norm": 0.12822303175926208, "learning_rate": 1.5197193546393028e-05, "loss": 0.0055, "step": 310890 }, { "epoch": 9.609940038326018, "grad_norm": 0.11396519094705582, "learning_rate": 1.5196729925202449e-05, "loss": 0.0057, "step": 310920 }, { "epoch": 9.610867280707177, "grad_norm": 0.11653419584035873, "learning_rate": 1.519626630401187e-05, "loss": 0.0058, "step": 310950 }, { "epoch": 9.611794523088335, "grad_norm": 0.11332632601261139, "learning_rate": 1.519580268282129e-05, "loss": 0.0059, "step": 310980 }, { "epoch": 9.612721765469495, "grad_norm": 0.1372889280319214, "learning_rate": 1.5195339061630712e-05, "loss": 0.0062, "step": 311010 }, { "epoch": 9.613649007850652, "grad_norm": 0.145954892039299, "learning_rate": 1.5194875440440133e-05, "loss": 0.0052, "step": 311040 }, { "epoch": 9.61457625023181, "grad_norm": 0.37236589193344116, "learning_rate": 1.5194411819249553e-05, "loss": 0.0067, "step": 311070 }, { "epoch": 9.61550349261297, "grad_norm": 0.11856581270694733, "learning_rate": 1.5193948198058975e-05, "loss": 0.0057, "step": 311100 }, { "epoch": 9.616430734994127, "grad_norm": 0.12157116085290909, "learning_rate": 1.5193484576868393e-05, "loss": 0.0059, "step": 311130 }, { "epoch": 9.617357977375287, "grad_norm": 0.12547355890274048, "learning_rate": 1.5193020955677816e-05, "loss": 0.0064, "step": 311160 }, { "epoch": 9.618285219756444, "grad_norm": 0.04671641066670418, "learning_rate": 1.5192557334487238e-05, "loss": 0.0062, "step": 311190 }, { "epoch": 9.619212462137602, "grad_norm": 0.17178525030612946, "learning_rate": 1.5192093713296656e-05, "loss": 0.0064, "step": 311220 }, { "epoch": 9.620139704518762, "grad_norm": 0.13000299036502838, "learning_rate": 1.5191630092106077e-05, "loss": 0.0057, "step": 311250 }, { "epoch": 9.62106694689992, "grad_norm": 0.13427942991256714, "learning_rate": 1.5191166470915499e-05, "loss": 0.0059, "step": 311280 }, { "epoch": 9.621994189281079, "grad_norm": 0.1284918338060379, "learning_rate": 1.5190702849724918e-05, "loss": 0.0069, "step": 311310 }, { "epoch": 9.622921431662236, "grad_norm": 0.12138655036687851, "learning_rate": 1.519023922853434e-05, "loss": 0.0059, "step": 311340 }, { "epoch": 9.623848674043394, "grad_norm": 0.1069171354174614, "learning_rate": 1.5189775607343761e-05, "loss": 0.0062, "step": 311370 }, { "epoch": 9.624775916424554, "grad_norm": 0.1076546162366867, "learning_rate": 1.5189311986153181e-05, "loss": 0.0062, "step": 311400 }, { "epoch": 9.625703158805711, "grad_norm": 0.10854592174291611, "learning_rate": 1.5188848364962603e-05, "loss": 0.006, "step": 311430 }, { "epoch": 9.62663040118687, "grad_norm": 0.16387565433979034, "learning_rate": 1.5188384743772023e-05, "loss": 0.006, "step": 311460 }, { "epoch": 9.627557643568029, "grad_norm": 0.15485069155693054, "learning_rate": 1.5187921122581444e-05, "loss": 0.0056, "step": 311490 }, { "epoch": 9.628484885949188, "grad_norm": 0.14014282822608948, "learning_rate": 1.5187457501390866e-05, "loss": 0.0062, "step": 311520 }, { "epoch": 9.629412128330346, "grad_norm": 0.10689600557088852, "learning_rate": 1.5186993880200285e-05, "loss": 0.0065, "step": 311550 }, { "epoch": 9.630339370711503, "grad_norm": 0.06708236783742905, "learning_rate": 1.5186530259009707e-05, "loss": 0.0055, "step": 311580 }, { "epoch": 9.631266613092663, "grad_norm": 0.10440129786729813, "learning_rate": 1.5186066637819128e-05, "loss": 0.0061, "step": 311610 }, { "epoch": 9.63219385547382, "grad_norm": 0.12190282344818115, "learning_rate": 1.5185603016628547e-05, "loss": 0.0058, "step": 311640 }, { "epoch": 9.633121097854978, "grad_norm": 0.15682590007781982, "learning_rate": 1.5185139395437968e-05, "loss": 0.0064, "step": 311670 }, { "epoch": 9.634048340236138, "grad_norm": 0.13029100000858307, "learning_rate": 1.5184675774247391e-05, "loss": 0.006, "step": 311700 }, { "epoch": 9.634975582617296, "grad_norm": 0.11496169865131378, "learning_rate": 1.5184227607096496e-05, "loss": 0.0064, "step": 311730 }, { "epoch": 9.635902824998455, "grad_norm": 0.14196404814720154, "learning_rate": 1.5183763985905917e-05, "loss": 0.006, "step": 311760 }, { "epoch": 9.636830067379613, "grad_norm": 0.14795589447021484, "learning_rate": 1.5183300364715339e-05, "loss": 0.0062, "step": 311790 }, { "epoch": 9.637757309760772, "grad_norm": 0.1128329262137413, "learning_rate": 1.5182836743524759e-05, "loss": 0.006, "step": 311820 }, { "epoch": 9.63868455214193, "grad_norm": 0.09479493647813797, "learning_rate": 1.518237312233418e-05, "loss": 0.005, "step": 311850 }, { "epoch": 9.639611794523088, "grad_norm": 0.1114429384469986, "learning_rate": 1.5181909501143598e-05, "loss": 0.0063, "step": 311880 }, { "epoch": 9.640539036904247, "grad_norm": 0.14521372318267822, "learning_rate": 1.5181445879953021e-05, "loss": 0.0056, "step": 311910 }, { "epoch": 9.641466279285405, "grad_norm": 0.14035439491271973, "learning_rate": 1.5180982258762443e-05, "loss": 0.0058, "step": 311940 }, { "epoch": 9.642393521666564, "grad_norm": 0.13110513985157013, "learning_rate": 1.5180518637571861e-05, "loss": 0.0062, "step": 311970 }, { "epoch": 9.643320764047722, "grad_norm": 0.11052756011486053, "learning_rate": 1.5180055016381283e-05, "loss": 0.0068, "step": 312000 }, { "epoch": 9.64424800642888, "grad_norm": 0.12405547499656677, "learning_rate": 1.5179591395190704e-05, "loss": 0.006, "step": 312030 }, { "epoch": 9.64517524881004, "grad_norm": 0.11373548209667206, "learning_rate": 1.5179127774000124e-05, "loss": 0.0065, "step": 312060 }, { "epoch": 9.646102491191197, "grad_norm": 0.14616067707538605, "learning_rate": 1.5178664152809545e-05, "loss": 0.0062, "step": 312090 }, { "epoch": 9.647029733572356, "grad_norm": 0.08222334086894989, "learning_rate": 1.5178200531618967e-05, "loss": 0.0062, "step": 312120 }, { "epoch": 9.647956975953514, "grad_norm": 0.12783639132976532, "learning_rate": 1.5177736910428387e-05, "loss": 0.0065, "step": 312150 }, { "epoch": 9.648884218334672, "grad_norm": 0.09754849970340729, "learning_rate": 1.5177273289237808e-05, "loss": 0.0057, "step": 312180 }, { "epoch": 9.649811460715831, "grad_norm": 0.10608168691396713, "learning_rate": 1.517680966804723e-05, "loss": 0.0061, "step": 312210 }, { "epoch": 9.65073870309699, "grad_norm": 0.08057642728090286, "learning_rate": 1.517634604685665e-05, "loss": 0.0058, "step": 312240 }, { "epoch": 9.651665945478149, "grad_norm": 0.09230859577655792, "learning_rate": 1.5175882425666071e-05, "loss": 0.0062, "step": 312270 }, { "epoch": 9.652593187859306, "grad_norm": 0.1113184317946434, "learning_rate": 1.5175418804475491e-05, "loss": 0.0058, "step": 312300 }, { "epoch": 9.653520430240466, "grad_norm": 0.1308668851852417, "learning_rate": 1.5174955183284912e-05, "loss": 0.0058, "step": 312330 }, { "epoch": 9.654447672621624, "grad_norm": 0.10087268799543381, "learning_rate": 1.5174491562094334e-05, "loss": 0.0068, "step": 312360 }, { "epoch": 9.655374915002781, "grad_norm": 0.09958534687757492, "learning_rate": 1.5174027940903752e-05, "loss": 0.0058, "step": 312390 }, { "epoch": 9.65630215738394, "grad_norm": 0.09941672533750534, "learning_rate": 1.5173564319713174e-05, "loss": 0.0063, "step": 312420 }, { "epoch": 9.657229399765098, "grad_norm": 0.09055052697658539, "learning_rate": 1.5173100698522597e-05, "loss": 0.006, "step": 312450 }, { "epoch": 9.658156642146256, "grad_norm": 0.1545415222644806, "learning_rate": 1.5172637077332015e-05, "loss": 0.0058, "step": 312480 }, { "epoch": 9.659083884527416, "grad_norm": 0.0631839707493782, "learning_rate": 1.5172173456141436e-05, "loss": 0.0056, "step": 312510 }, { "epoch": 9.660011126908573, "grad_norm": 0.08541549742221832, "learning_rate": 1.5171709834950858e-05, "loss": 0.0064, "step": 312540 }, { "epoch": 9.660938369289733, "grad_norm": 0.15187767148017883, "learning_rate": 1.5171246213760278e-05, "loss": 0.0069, "step": 312570 }, { "epoch": 9.66186561167089, "grad_norm": 0.11162231862545013, "learning_rate": 1.51707825925697e-05, "loss": 0.0055, "step": 312600 }, { "epoch": 9.66279285405205, "grad_norm": 0.06664814800024033, "learning_rate": 1.5170318971379119e-05, "loss": 0.006, "step": 312630 }, { "epoch": 9.663720096433208, "grad_norm": 0.1505432277917862, "learning_rate": 1.516985535018854e-05, "loss": 0.0061, "step": 312660 }, { "epoch": 9.664647338814365, "grad_norm": 0.14855992794036865, "learning_rate": 1.5169391728997962e-05, "loss": 0.0057, "step": 312690 }, { "epoch": 9.665574581195525, "grad_norm": 0.09127769619226456, "learning_rate": 1.5168928107807382e-05, "loss": 0.0064, "step": 312720 }, { "epoch": 9.666501823576683, "grad_norm": 0.10378411412239075, "learning_rate": 1.5168464486616803e-05, "loss": 0.0058, "step": 312750 }, { "epoch": 9.667429065957842, "grad_norm": 0.15420202910900116, "learning_rate": 1.5168000865426225e-05, "loss": 0.0057, "step": 312780 }, { "epoch": 9.668356308339, "grad_norm": 0.14704842865467072, "learning_rate": 1.5167537244235645e-05, "loss": 0.0063, "step": 312810 }, { "epoch": 9.669283550720158, "grad_norm": 0.11605754494667053, "learning_rate": 1.5167073623045066e-05, "loss": 0.0059, "step": 312840 }, { "epoch": 9.670210793101317, "grad_norm": 0.15281997621059418, "learning_rate": 1.5166610001854488e-05, "loss": 0.0057, "step": 312870 }, { "epoch": 9.671138035482475, "grad_norm": 0.11931776255369186, "learning_rate": 1.5166146380663906e-05, "loss": 0.006, "step": 312900 }, { "epoch": 9.672065277863634, "grad_norm": 0.08288135379552841, "learning_rate": 1.5165682759473327e-05, "loss": 0.0069, "step": 312930 }, { "epoch": 9.672992520244792, "grad_norm": 0.1085585430264473, "learning_rate": 1.5165219138282747e-05, "loss": 0.006, "step": 312960 }, { "epoch": 9.67391976262595, "grad_norm": 0.11169180274009705, "learning_rate": 1.5164755517092169e-05, "loss": 0.0064, "step": 312990 }, { "epoch": 9.67484700500711, "grad_norm": 0.12138085812330246, "learning_rate": 1.516429189590159e-05, "loss": 0.0065, "step": 313020 }, { "epoch": 9.675774247388267, "grad_norm": 0.1326971799135208, "learning_rate": 1.516382827471101e-05, "loss": 0.0059, "step": 313050 }, { "epoch": 9.676701489769426, "grad_norm": 0.122179314494133, "learning_rate": 1.5163364653520431e-05, "loss": 0.0063, "step": 313080 }, { "epoch": 9.677628732150584, "grad_norm": 0.05379430949687958, "learning_rate": 1.5162901032329853e-05, "loss": 0.006, "step": 313110 }, { "epoch": 9.678555974531742, "grad_norm": 0.12076736986637115, "learning_rate": 1.5162437411139273e-05, "loss": 0.006, "step": 313140 }, { "epoch": 9.679483216912901, "grad_norm": 0.07261659950017929, "learning_rate": 1.5161973789948694e-05, "loss": 0.0059, "step": 313170 }, { "epoch": 9.680410459294059, "grad_norm": 0.10698443651199341, "learning_rate": 1.5161510168758116e-05, "loss": 0.0059, "step": 313200 }, { "epoch": 9.681337701675218, "grad_norm": 0.11649302393198013, "learning_rate": 1.5161046547567536e-05, "loss": 0.0057, "step": 313230 }, { "epoch": 9.682264944056376, "grad_norm": 0.12209422141313553, "learning_rate": 1.5160582926376957e-05, "loss": 0.0064, "step": 313260 }, { "epoch": 9.683192186437534, "grad_norm": 0.08441309630870819, "learning_rate": 1.5160119305186375e-05, "loss": 0.006, "step": 313290 }, { "epoch": 9.684119428818693, "grad_norm": 0.15290968120098114, "learning_rate": 1.5159655683995798e-05, "loss": 0.0057, "step": 313320 }, { "epoch": 9.685046671199851, "grad_norm": 0.17069990932941437, "learning_rate": 1.515919206280522e-05, "loss": 0.0065, "step": 313350 }, { "epoch": 9.68597391358101, "grad_norm": 0.08361820131540298, "learning_rate": 1.5158728441614638e-05, "loss": 0.0059, "step": 313380 }, { "epoch": 9.686901155962168, "grad_norm": 0.14780879020690918, "learning_rate": 1.515826482042406e-05, "loss": 0.0061, "step": 313410 }, { "epoch": 9.687828398343328, "grad_norm": 0.17937754094600677, "learning_rate": 1.5157801199233481e-05, "loss": 0.0061, "step": 313440 }, { "epoch": 9.688755640724485, "grad_norm": 0.14947420358657837, "learning_rate": 1.51573375780429e-05, "loss": 0.0068, "step": 313470 }, { "epoch": 9.689682883105643, "grad_norm": 0.151933491230011, "learning_rate": 1.5156873956852322e-05, "loss": 0.0061, "step": 313500 }, { "epoch": 9.690610125486803, "grad_norm": 0.10995389521121979, "learning_rate": 1.5156410335661744e-05, "loss": 0.0063, "step": 313530 }, { "epoch": 9.69153736786796, "grad_norm": 0.10560653358697891, "learning_rate": 1.5155946714471164e-05, "loss": 0.006, "step": 313560 }, { "epoch": 9.69246461024912, "grad_norm": 0.1312355399131775, "learning_rate": 1.5155483093280585e-05, "loss": 0.0065, "step": 313590 }, { "epoch": 9.693391852630278, "grad_norm": 0.13163812458515167, "learning_rate": 1.5155019472090005e-05, "loss": 0.0065, "step": 313620 }, { "epoch": 9.694319095011435, "grad_norm": 0.11861896514892578, "learning_rate": 1.5154555850899426e-05, "loss": 0.006, "step": 313650 }, { "epoch": 9.695246337392595, "grad_norm": 0.09659628570079803, "learning_rate": 1.5154092229708848e-05, "loss": 0.0056, "step": 313680 }, { "epoch": 9.696173579773752, "grad_norm": 0.07136379927396774, "learning_rate": 1.5153628608518268e-05, "loss": 0.006, "step": 313710 }, { "epoch": 9.697100822154912, "grad_norm": 0.08569822460412979, "learning_rate": 1.515316498732769e-05, "loss": 0.0057, "step": 313740 }, { "epoch": 9.69802806453607, "grad_norm": 0.11937401443719864, "learning_rate": 1.515270136613711e-05, "loss": 0.006, "step": 313770 }, { "epoch": 9.698955306917227, "grad_norm": 0.08500488847494125, "learning_rate": 1.5152237744946529e-05, "loss": 0.0054, "step": 313800 }, { "epoch": 9.699882549298387, "grad_norm": 0.09387405216693878, "learning_rate": 1.515177412375595e-05, "loss": 0.0063, "step": 313830 }, { "epoch": 9.700809791679545, "grad_norm": 0.1332031786441803, "learning_rate": 1.5151310502565374e-05, "loss": 0.0061, "step": 313860 }, { "epoch": 9.701737034060704, "grad_norm": 0.1331212818622589, "learning_rate": 1.5150846881374792e-05, "loss": 0.0061, "step": 313890 }, { "epoch": 9.702664276441862, "grad_norm": 0.10151368379592896, "learning_rate": 1.5150383260184213e-05, "loss": 0.0065, "step": 313920 }, { "epoch": 9.70359151882302, "grad_norm": 0.13100211322307587, "learning_rate": 1.5149919638993633e-05, "loss": 0.0061, "step": 313950 }, { "epoch": 9.704518761204179, "grad_norm": 0.11509975045919418, "learning_rate": 1.5149456017803055e-05, "loss": 0.0069, "step": 313980 }, { "epoch": 9.705446003585337, "grad_norm": 0.08024074137210846, "learning_rate": 1.5148992396612476e-05, "loss": 0.006, "step": 314010 }, { "epoch": 9.706373245966496, "grad_norm": 0.135417640209198, "learning_rate": 1.5148528775421896e-05, "loss": 0.0056, "step": 314040 }, { "epoch": 9.707300488347654, "grad_norm": 0.15612836182117462, "learning_rate": 1.5148065154231317e-05, "loss": 0.006, "step": 314070 }, { "epoch": 9.708227730728812, "grad_norm": 0.13019603490829468, "learning_rate": 1.5147601533040739e-05, "loss": 0.0062, "step": 314100 }, { "epoch": 9.709154973109971, "grad_norm": 0.10017584264278412, "learning_rate": 1.5147137911850159e-05, "loss": 0.0057, "step": 314130 }, { "epoch": 9.710082215491129, "grad_norm": 0.10927142947912216, "learning_rate": 1.514667429065958e-05, "loss": 0.0069, "step": 314160 }, { "epoch": 9.711009457872288, "grad_norm": 0.09750872850418091, "learning_rate": 1.5146210669469002e-05, "loss": 0.006, "step": 314190 }, { "epoch": 9.711936700253446, "grad_norm": 0.09081494808197021, "learning_rate": 1.5145747048278422e-05, "loss": 0.0061, "step": 314220 }, { "epoch": 9.712863942634606, "grad_norm": 0.11318328976631165, "learning_rate": 1.5145283427087843e-05, "loss": 0.0069, "step": 314250 }, { "epoch": 9.713791185015763, "grad_norm": 0.12430974841117859, "learning_rate": 1.5144819805897261e-05, "loss": 0.0061, "step": 314280 }, { "epoch": 9.714718427396921, "grad_norm": 0.1413227766752243, "learning_rate": 1.5144356184706683e-05, "loss": 0.0058, "step": 314310 }, { "epoch": 9.71564566977808, "grad_norm": 0.11943066120147705, "learning_rate": 1.5143892563516104e-05, "loss": 0.0056, "step": 314340 }, { "epoch": 9.716572912159238, "grad_norm": 0.0998348519206047, "learning_rate": 1.5143428942325524e-05, "loss": 0.0057, "step": 314370 }, { "epoch": 9.717500154540398, "grad_norm": 0.11979979276657104, "learning_rate": 1.5142965321134945e-05, "loss": 0.0061, "step": 314400 }, { "epoch": 9.718427396921555, "grad_norm": 0.09462720900774002, "learning_rate": 1.5142501699944367e-05, "loss": 0.0054, "step": 314430 }, { "epoch": 9.719354639302713, "grad_norm": 0.1115477904677391, "learning_rate": 1.5142038078753787e-05, "loss": 0.0062, "step": 314460 }, { "epoch": 9.720281881683873, "grad_norm": 0.07923615723848343, "learning_rate": 1.5141574457563208e-05, "loss": 0.0064, "step": 314490 }, { "epoch": 9.72120912406503, "grad_norm": 0.08562598377466202, "learning_rate": 1.514111083637263e-05, "loss": 0.0063, "step": 314520 }, { "epoch": 9.72213636644619, "grad_norm": 0.13386228680610657, "learning_rate": 1.514064721518205e-05, "loss": 0.0066, "step": 314550 }, { "epoch": 9.723063608827347, "grad_norm": 0.11259501427412033, "learning_rate": 1.5140183593991471e-05, "loss": 0.0059, "step": 314580 }, { "epoch": 9.723990851208505, "grad_norm": 0.07725860178470612, "learning_rate": 1.5139719972800891e-05, "loss": 0.0056, "step": 314610 }, { "epoch": 9.724918093589665, "grad_norm": 0.08966121077537537, "learning_rate": 1.5139256351610312e-05, "loss": 0.0062, "step": 314640 }, { "epoch": 9.725845335970822, "grad_norm": 0.09726230055093765, "learning_rate": 1.5138792730419734e-05, "loss": 0.0066, "step": 314670 }, { "epoch": 9.726772578351982, "grad_norm": 0.17187108099460602, "learning_rate": 1.5138329109229152e-05, "loss": 0.0063, "step": 314700 }, { "epoch": 9.72769982073314, "grad_norm": 0.08638983219861984, "learning_rate": 1.5137865488038575e-05, "loss": 0.0057, "step": 314730 }, { "epoch": 9.728627063114297, "grad_norm": 0.13122430443763733, "learning_rate": 1.5137401866847997e-05, "loss": 0.0059, "step": 314760 }, { "epoch": 9.729554305495457, "grad_norm": 0.09445519000291824, "learning_rate": 1.5136938245657415e-05, "loss": 0.0056, "step": 314790 }, { "epoch": 9.730481547876614, "grad_norm": 0.17720578610897064, "learning_rate": 1.5136474624466836e-05, "loss": 0.0064, "step": 314820 }, { "epoch": 9.731408790257774, "grad_norm": 0.1084139347076416, "learning_rate": 1.5136011003276258e-05, "loss": 0.0061, "step": 314850 }, { "epoch": 9.732336032638932, "grad_norm": 0.12822501361370087, "learning_rate": 1.5135547382085678e-05, "loss": 0.0068, "step": 314880 }, { "epoch": 9.73326327502009, "grad_norm": 0.09788279235363007, "learning_rate": 1.51350837608951e-05, "loss": 0.0068, "step": 314910 }, { "epoch": 9.734190517401249, "grad_norm": 0.1059974879026413, "learning_rate": 1.5134620139704519e-05, "loss": 0.0058, "step": 314940 }, { "epoch": 9.735117759782407, "grad_norm": 0.09805580973625183, "learning_rate": 1.513415651851394e-05, "loss": 0.0053, "step": 314970 }, { "epoch": 9.736045002163566, "grad_norm": 0.11181601881980896, "learning_rate": 1.5133692897323362e-05, "loss": 0.0056, "step": 315000 }, { "epoch": 9.736972244544724, "grad_norm": 0.08659783750772476, "learning_rate": 1.5133229276132782e-05, "loss": 0.0063, "step": 315030 }, { "epoch": 9.737899486925883, "grad_norm": 0.10110428184270859, "learning_rate": 1.5132765654942203e-05, "loss": 0.0058, "step": 315060 }, { "epoch": 9.738826729307041, "grad_norm": 0.1259593367576599, "learning_rate": 1.5132302033751625e-05, "loss": 0.0057, "step": 315090 }, { "epoch": 9.739753971688199, "grad_norm": 0.13567742705345154, "learning_rate": 1.5131838412561045e-05, "loss": 0.0059, "step": 315120 }, { "epoch": 9.740681214069358, "grad_norm": 0.14796759188175201, "learning_rate": 1.5131374791370466e-05, "loss": 0.0066, "step": 315150 }, { "epoch": 9.741608456450516, "grad_norm": 0.14196652173995972, "learning_rate": 1.5130911170179888e-05, "loss": 0.0068, "step": 315180 }, { "epoch": 9.742535698831675, "grad_norm": 0.08276572078466415, "learning_rate": 1.5130447548989306e-05, "loss": 0.0063, "step": 315210 }, { "epoch": 9.743462941212833, "grad_norm": 0.10815029591321945, "learning_rate": 1.5129983927798727e-05, "loss": 0.0058, "step": 315240 }, { "epoch": 9.74439018359399, "grad_norm": 0.10356315225362778, "learning_rate": 1.5129520306608147e-05, "loss": 0.0062, "step": 315270 }, { "epoch": 9.74531742597515, "grad_norm": 0.12435414642095566, "learning_rate": 1.5129056685417569e-05, "loss": 0.0061, "step": 315300 }, { "epoch": 9.746244668356308, "grad_norm": 0.12417902797460556, "learning_rate": 1.512859306422699e-05, "loss": 0.0063, "step": 315330 }, { "epoch": 9.747171910737467, "grad_norm": 0.0837123915553093, "learning_rate": 1.512812944303641e-05, "loss": 0.0055, "step": 315360 }, { "epoch": 9.748099153118625, "grad_norm": 0.09945385158061981, "learning_rate": 1.5127665821845831e-05, "loss": 0.0059, "step": 315390 }, { "epoch": 9.749026395499783, "grad_norm": 0.06922829896211624, "learning_rate": 1.5127202200655253e-05, "loss": 0.006, "step": 315420 }, { "epoch": 9.749953637880942, "grad_norm": 0.09915758669376373, "learning_rate": 1.5126738579464673e-05, "loss": 0.0057, "step": 315450 }, { "epoch": 9.7508808802621, "grad_norm": 0.126724511384964, "learning_rate": 1.5126274958274094e-05, "loss": 0.0062, "step": 315480 }, { "epoch": 9.75180812264326, "grad_norm": 0.1532500982284546, "learning_rate": 1.5125811337083516e-05, "loss": 0.0061, "step": 315510 }, { "epoch": 9.752735365024417, "grad_norm": 0.18343810737133026, "learning_rate": 1.5125347715892936e-05, "loss": 0.0071, "step": 315540 }, { "epoch": 9.753662607405575, "grad_norm": 0.12160594016313553, "learning_rate": 1.5124884094702357e-05, "loss": 0.0062, "step": 315570 }, { "epoch": 9.754589849786734, "grad_norm": 0.09664004296064377, "learning_rate": 1.5124420473511777e-05, "loss": 0.0065, "step": 315600 }, { "epoch": 9.755517092167892, "grad_norm": 0.1531485617160797, "learning_rate": 1.5123956852321198e-05, "loss": 0.0067, "step": 315630 }, { "epoch": 9.756444334549052, "grad_norm": 0.16733646392822266, "learning_rate": 1.512349323113062e-05, "loss": 0.0054, "step": 315660 }, { "epoch": 9.75737157693021, "grad_norm": 0.13664454221725464, "learning_rate": 1.5123029609940038e-05, "loss": 0.0065, "step": 315690 }, { "epoch": 9.758298819311367, "grad_norm": 0.09860696643590927, "learning_rate": 1.512256598874946e-05, "loss": 0.0055, "step": 315720 }, { "epoch": 9.759226061692527, "grad_norm": 0.11950619518756866, "learning_rate": 1.5122102367558881e-05, "loss": 0.0059, "step": 315750 }, { "epoch": 9.760153304073684, "grad_norm": 0.09743477404117584, "learning_rate": 1.5121638746368301e-05, "loss": 0.0067, "step": 315780 }, { "epoch": 9.761080546454844, "grad_norm": 0.10388058423995972, "learning_rate": 1.5121175125177722e-05, "loss": 0.0058, "step": 315810 }, { "epoch": 9.762007788836002, "grad_norm": 0.0929998978972435, "learning_rate": 1.5120711503987144e-05, "loss": 0.0061, "step": 315840 }, { "epoch": 9.762935031217161, "grad_norm": 0.0608314611017704, "learning_rate": 1.5120247882796564e-05, "loss": 0.0059, "step": 315870 }, { "epoch": 9.763862273598319, "grad_norm": 0.13925933837890625, "learning_rate": 1.5119784261605985e-05, "loss": 0.0062, "step": 315900 }, { "epoch": 9.764789515979476, "grad_norm": 0.09411896020174026, "learning_rate": 1.5119320640415407e-05, "loss": 0.0062, "step": 315930 }, { "epoch": 9.765716758360636, "grad_norm": 0.10040856152772903, "learning_rate": 1.5118857019224827e-05, "loss": 0.0058, "step": 315960 }, { "epoch": 9.766644000741794, "grad_norm": 0.09163586795330048, "learning_rate": 1.5118393398034248e-05, "loss": 0.0059, "step": 315990 }, { "epoch": 9.767571243122953, "grad_norm": 0.14369434118270874, "learning_rate": 1.5117929776843668e-05, "loss": 0.0066, "step": 316020 }, { "epoch": 9.76849848550411, "grad_norm": 0.1522584855556488, "learning_rate": 1.511746615565309e-05, "loss": 0.0059, "step": 316050 }, { "epoch": 9.769425727885269, "grad_norm": 0.08667725324630737, "learning_rate": 1.511700253446251e-05, "loss": 0.0059, "step": 316080 }, { "epoch": 9.770352970266428, "grad_norm": 0.10866012424230576, "learning_rate": 1.5116538913271929e-05, "loss": 0.0052, "step": 316110 }, { "epoch": 9.771280212647586, "grad_norm": 0.1137739047408104, "learning_rate": 1.5116075292081352e-05, "loss": 0.0063, "step": 316140 }, { "epoch": 9.772207455028745, "grad_norm": 0.09127248823642731, "learning_rate": 1.5115611670890774e-05, "loss": 0.0063, "step": 316170 }, { "epoch": 9.773134697409903, "grad_norm": 0.11205786466598511, "learning_rate": 1.5115148049700192e-05, "loss": 0.007, "step": 316200 }, { "epoch": 9.77406193979106, "grad_norm": 0.1348501294851303, "learning_rate": 1.51146998825493e-05, "loss": 0.0055, "step": 316230 }, { "epoch": 9.77498918217222, "grad_norm": 0.09986244887113571, "learning_rate": 1.5114236261358721e-05, "loss": 0.0065, "step": 316260 }, { "epoch": 9.775916424553378, "grad_norm": 0.15375804901123047, "learning_rate": 1.5113772640168141e-05, "loss": 0.0056, "step": 316290 }, { "epoch": 9.776843666934537, "grad_norm": 0.14334887266159058, "learning_rate": 1.5113309018977563e-05, "loss": 0.0067, "step": 316320 }, { "epoch": 9.777770909315695, "grad_norm": 0.11147589981555939, "learning_rate": 1.5112845397786984e-05, "loss": 0.005, "step": 316350 }, { "epoch": 9.778698151696853, "grad_norm": 0.12335172295570374, "learning_rate": 1.5112381776596404e-05, "loss": 0.0072, "step": 316380 }, { "epoch": 9.779625394078012, "grad_norm": 0.11411090195178986, "learning_rate": 1.5111918155405825e-05, "loss": 0.0059, "step": 316410 }, { "epoch": 9.78055263645917, "grad_norm": 0.10811065882444382, "learning_rate": 1.5111454534215243e-05, "loss": 0.0065, "step": 316440 }, { "epoch": 9.78147987884033, "grad_norm": 0.12187641859054565, "learning_rate": 1.5110990913024665e-05, "loss": 0.0067, "step": 316470 }, { "epoch": 9.782407121221487, "grad_norm": 0.1068946048617363, "learning_rate": 1.5110527291834086e-05, "loss": 0.0055, "step": 316500 }, { "epoch": 9.783334363602645, "grad_norm": 0.08001867681741714, "learning_rate": 1.5110063670643506e-05, "loss": 0.0056, "step": 316530 }, { "epoch": 9.784261605983804, "grad_norm": 0.0950869619846344, "learning_rate": 1.5109600049452928e-05, "loss": 0.0061, "step": 316560 }, { "epoch": 9.785188848364962, "grad_norm": 0.16420747339725494, "learning_rate": 1.510913642826235e-05, "loss": 0.006, "step": 316590 }, { "epoch": 9.786116090746122, "grad_norm": 0.08947142958641052, "learning_rate": 1.5108672807071769e-05, "loss": 0.0063, "step": 316620 }, { "epoch": 9.78704333312728, "grad_norm": 0.09271814674139023, "learning_rate": 1.510820918588119e-05, "loss": 0.0058, "step": 316650 }, { "epoch": 9.787970575508439, "grad_norm": 0.13809069991111755, "learning_rate": 1.5107745564690612e-05, "loss": 0.0056, "step": 316680 }, { "epoch": 9.788897817889596, "grad_norm": 0.1604005992412567, "learning_rate": 1.5107281943500032e-05, "loss": 0.0057, "step": 316710 }, { "epoch": 9.789825060270754, "grad_norm": 0.13501858711242676, "learning_rate": 1.5106818322309453e-05, "loss": 0.0061, "step": 316740 }, { "epoch": 9.790752302651914, "grad_norm": 0.09424781799316406, "learning_rate": 1.5106354701118873e-05, "loss": 0.0061, "step": 316770 }, { "epoch": 9.791679545033071, "grad_norm": 0.08448690176010132, "learning_rate": 1.5105891079928295e-05, "loss": 0.0061, "step": 316800 }, { "epoch": 9.79260678741423, "grad_norm": 0.10350707918405533, "learning_rate": 1.5105427458737716e-05, "loss": 0.0062, "step": 316830 }, { "epoch": 9.793534029795389, "grad_norm": 0.089068703353405, "learning_rate": 1.5104963837547136e-05, "loss": 0.0062, "step": 316860 }, { "epoch": 9.794461272176546, "grad_norm": 0.08261534571647644, "learning_rate": 1.5104500216356558e-05, "loss": 0.0061, "step": 316890 }, { "epoch": 9.795388514557706, "grad_norm": 0.11612237989902496, "learning_rate": 1.5104036595165979e-05, "loss": 0.0055, "step": 316920 }, { "epoch": 9.796315756938863, "grad_norm": 0.11735768616199493, "learning_rate": 1.5103572973975397e-05, "loss": 0.0058, "step": 316950 }, { "epoch": 9.797242999320023, "grad_norm": 0.10617869347333908, "learning_rate": 1.5103109352784819e-05, "loss": 0.0065, "step": 316980 }, { "epoch": 9.79817024170118, "grad_norm": 0.08425869792699814, "learning_rate": 1.510264573159424e-05, "loss": 0.006, "step": 317010 }, { "epoch": 9.799097484082338, "grad_norm": 0.07659745216369629, "learning_rate": 1.510218211040366e-05, "loss": 0.0058, "step": 317040 }, { "epoch": 9.800024726463498, "grad_norm": 0.14939960837364197, "learning_rate": 1.5101718489213082e-05, "loss": 0.0058, "step": 317070 }, { "epoch": 9.800951968844656, "grad_norm": 0.12981656193733215, "learning_rate": 1.5101254868022501e-05, "loss": 0.0057, "step": 317100 }, { "epoch": 9.801879211225815, "grad_norm": 0.11345432698726654, "learning_rate": 1.5100791246831923e-05, "loss": 0.0059, "step": 317130 }, { "epoch": 9.802806453606973, "grad_norm": 0.1752285361289978, "learning_rate": 1.5100327625641344e-05, "loss": 0.0065, "step": 317160 }, { "epoch": 9.80373369598813, "grad_norm": 0.11070829629898071, "learning_rate": 1.5099864004450764e-05, "loss": 0.0055, "step": 317190 }, { "epoch": 9.80466093836929, "grad_norm": 0.10138269513845444, "learning_rate": 1.5099400383260186e-05, "loss": 0.0059, "step": 317220 }, { "epoch": 9.805588180750448, "grad_norm": 0.11310181766748428, "learning_rate": 1.5098936762069607e-05, "loss": 0.0058, "step": 317250 }, { "epoch": 9.806515423131607, "grad_norm": 0.09336629509925842, "learning_rate": 1.5098473140879027e-05, "loss": 0.0059, "step": 317280 }, { "epoch": 9.807442665512765, "grad_norm": 0.08890463411808014, "learning_rate": 1.5098009519688449e-05, "loss": 0.0063, "step": 317310 }, { "epoch": 9.808369907893923, "grad_norm": 0.14012132585048676, "learning_rate": 1.509754589849787e-05, "loss": 0.0068, "step": 317340 }, { "epoch": 9.809297150275082, "grad_norm": 0.09886365383863449, "learning_rate": 1.5097082277307288e-05, "loss": 0.0061, "step": 317370 }, { "epoch": 9.81022439265624, "grad_norm": 0.06386232376098633, "learning_rate": 1.5096618656116711e-05, "loss": 0.0058, "step": 317400 }, { "epoch": 9.8111516350374, "grad_norm": 0.10951948910951614, "learning_rate": 1.509615503492613e-05, "loss": 0.0056, "step": 317430 }, { "epoch": 9.812078877418557, "grad_norm": 0.07583832740783691, "learning_rate": 1.5095691413735551e-05, "loss": 0.0067, "step": 317460 }, { "epoch": 9.813006119799716, "grad_norm": 0.08340156823396683, "learning_rate": 1.5095227792544972e-05, "loss": 0.0061, "step": 317490 }, { "epoch": 9.813933362180874, "grad_norm": 0.09409525990486145, "learning_rate": 1.5094764171354392e-05, "loss": 0.0057, "step": 317520 }, { "epoch": 9.814860604562032, "grad_norm": 0.1366940587759018, "learning_rate": 1.5094300550163814e-05, "loss": 0.0063, "step": 317550 }, { "epoch": 9.815787846943191, "grad_norm": 0.13440844416618347, "learning_rate": 1.5093836928973235e-05, "loss": 0.0065, "step": 317580 }, { "epoch": 9.816715089324349, "grad_norm": 0.1024194210767746, "learning_rate": 1.5093373307782655e-05, "loss": 0.0061, "step": 317610 }, { "epoch": 9.817642331705509, "grad_norm": 0.08837081491947174, "learning_rate": 1.5092909686592077e-05, "loss": 0.0057, "step": 317640 }, { "epoch": 9.818569574086666, "grad_norm": 0.0966462716460228, "learning_rate": 1.5092446065401498e-05, "loss": 0.0059, "step": 317670 }, { "epoch": 9.819496816467824, "grad_norm": 0.19477221369743347, "learning_rate": 1.5091982444210918e-05, "loss": 0.0058, "step": 317700 }, { "epoch": 9.820424058848984, "grad_norm": 0.09889879822731018, "learning_rate": 1.509151882302034e-05, "loss": 0.0057, "step": 317730 }, { "epoch": 9.821351301230141, "grad_norm": 0.0975218340754509, "learning_rate": 1.509105520182976e-05, "loss": 0.0061, "step": 317760 }, { "epoch": 9.8222785436113, "grad_norm": 0.10334480553865433, "learning_rate": 1.509059158063918e-05, "loss": 0.0063, "step": 317790 }, { "epoch": 9.823205785992458, "grad_norm": 0.11163540929555893, "learning_rate": 1.5090127959448602e-05, "loss": 0.0066, "step": 317820 }, { "epoch": 9.824133028373616, "grad_norm": 0.08152836561203003, "learning_rate": 1.508966433825802e-05, "loss": 0.0057, "step": 317850 }, { "epoch": 9.825060270754776, "grad_norm": 0.09197431057691574, "learning_rate": 1.5089200717067442e-05, "loss": 0.006, "step": 317880 }, { "epoch": 9.825987513135933, "grad_norm": 0.06458171457052231, "learning_rate": 1.5088737095876863e-05, "loss": 0.0062, "step": 317910 }, { "epoch": 9.826914755517093, "grad_norm": 0.10107140988111496, "learning_rate": 1.5088273474686283e-05, "loss": 0.0065, "step": 317940 }, { "epoch": 9.82784199789825, "grad_norm": 0.1195129007101059, "learning_rate": 1.5087809853495705e-05, "loss": 0.0059, "step": 317970 }, { "epoch": 9.828769240279408, "grad_norm": 0.07252320647239685, "learning_rate": 1.5087346232305126e-05, "loss": 0.0062, "step": 318000 }, { "epoch": 9.829696482660568, "grad_norm": 0.09618707746267319, "learning_rate": 1.5086882611114546e-05, "loss": 0.0059, "step": 318030 }, { "epoch": 9.830623725041725, "grad_norm": 0.10749475657939911, "learning_rate": 1.5086418989923968e-05, "loss": 0.007, "step": 318060 }, { "epoch": 9.831550967422885, "grad_norm": 0.13206976652145386, "learning_rate": 1.5085955368733387e-05, "loss": 0.0055, "step": 318090 }, { "epoch": 9.832478209804043, "grad_norm": 0.1379164159297943, "learning_rate": 1.5085491747542809e-05, "loss": 0.0065, "step": 318120 }, { "epoch": 9.8334054521852, "grad_norm": 0.11469153314828873, "learning_rate": 1.508502812635223e-05, "loss": 0.0066, "step": 318150 }, { "epoch": 9.83433269456636, "grad_norm": 0.11915002018213272, "learning_rate": 1.508456450516165e-05, "loss": 0.0063, "step": 318180 }, { "epoch": 9.835259936947518, "grad_norm": 0.07678089290857315, "learning_rate": 1.5084100883971072e-05, "loss": 0.0062, "step": 318210 }, { "epoch": 9.836187179328677, "grad_norm": 0.1251882016658783, "learning_rate": 1.5083637262780493e-05, "loss": 0.0068, "step": 318240 }, { "epoch": 9.837114421709835, "grad_norm": 0.14021839201450348, "learning_rate": 1.5083173641589913e-05, "loss": 0.0063, "step": 318270 }, { "epoch": 9.838041664090994, "grad_norm": 0.08776110410690308, "learning_rate": 1.5082710020399334e-05, "loss": 0.0056, "step": 318300 }, { "epoch": 9.838968906472152, "grad_norm": 0.1029151901602745, "learning_rate": 1.5082246399208756e-05, "loss": 0.0056, "step": 318330 }, { "epoch": 9.83989614885331, "grad_norm": 0.11043431609869003, "learning_rate": 1.5081782778018174e-05, "loss": 0.0056, "step": 318360 }, { "epoch": 9.84082339123447, "grad_norm": 0.10830655694007874, "learning_rate": 1.5081319156827596e-05, "loss": 0.0055, "step": 318390 }, { "epoch": 9.841750633615627, "grad_norm": 0.09592196345329285, "learning_rate": 1.5080855535637015e-05, "loss": 0.0059, "step": 318420 }, { "epoch": 9.842677875996786, "grad_norm": 0.14675530791282654, "learning_rate": 1.5080391914446437e-05, "loss": 0.0064, "step": 318450 }, { "epoch": 9.843605118377944, "grad_norm": 0.11465092748403549, "learning_rate": 1.5079928293255858e-05, "loss": 0.0057, "step": 318480 }, { "epoch": 9.844532360759102, "grad_norm": 0.08454074710607529, "learning_rate": 1.5079464672065278e-05, "loss": 0.0063, "step": 318510 }, { "epoch": 9.845459603140261, "grad_norm": 0.13300339877605438, "learning_rate": 1.50790010508747e-05, "loss": 0.0062, "step": 318540 }, { "epoch": 9.846386845521419, "grad_norm": 0.06220753863453865, "learning_rate": 1.5078537429684121e-05, "loss": 0.0061, "step": 318570 }, { "epoch": 9.847314087902578, "grad_norm": 0.12122101336717606, "learning_rate": 1.5078073808493541e-05, "loss": 0.006, "step": 318600 }, { "epoch": 9.848241330283736, "grad_norm": 0.08969082683324814, "learning_rate": 1.5077610187302963e-05, "loss": 0.0063, "step": 318630 }, { "epoch": 9.849168572664894, "grad_norm": 0.09509452432394028, "learning_rate": 1.5077146566112384e-05, "loss": 0.0057, "step": 318660 }, { "epoch": 9.850095815046053, "grad_norm": 0.09841670840978622, "learning_rate": 1.5076682944921804e-05, "loss": 0.0058, "step": 318690 }, { "epoch": 9.851023057427211, "grad_norm": 0.12058870494365692, "learning_rate": 1.5076219323731225e-05, "loss": 0.0064, "step": 318720 }, { "epoch": 9.85195029980837, "grad_norm": 0.07884115725755692, "learning_rate": 1.5075755702540647e-05, "loss": 0.006, "step": 318750 }, { "epoch": 9.852877542189528, "grad_norm": 0.15040509402751923, "learning_rate": 1.5075292081350065e-05, "loss": 0.0063, "step": 318780 }, { "epoch": 9.853804784570686, "grad_norm": 0.09073489904403687, "learning_rate": 1.5074828460159488e-05, "loss": 0.0062, "step": 318810 }, { "epoch": 9.854732026951845, "grad_norm": 0.1678367257118225, "learning_rate": 1.5074364838968906e-05, "loss": 0.0059, "step": 318840 }, { "epoch": 9.855659269333003, "grad_norm": 0.1000719889998436, "learning_rate": 1.5073901217778328e-05, "loss": 0.0065, "step": 318870 }, { "epoch": 9.856586511714163, "grad_norm": 0.10073038190603256, "learning_rate": 1.507343759658775e-05, "loss": 0.006, "step": 318900 }, { "epoch": 9.85751375409532, "grad_norm": 0.16203083097934723, "learning_rate": 1.507297397539717e-05, "loss": 0.006, "step": 318930 }, { "epoch": 9.858440996476478, "grad_norm": 0.11273350566625595, "learning_rate": 1.507251035420659e-05, "loss": 0.0064, "step": 318960 }, { "epoch": 9.859368238857638, "grad_norm": 0.11938732117414474, "learning_rate": 1.5072046733016012e-05, "loss": 0.0057, "step": 318990 }, { "epoch": 9.860295481238795, "grad_norm": 0.0777021199464798, "learning_rate": 1.5071583111825432e-05, "loss": 0.0063, "step": 319020 }, { "epoch": 9.861222723619955, "grad_norm": 0.12659133970737457, "learning_rate": 1.5071119490634854e-05, "loss": 0.0059, "step": 319050 }, { "epoch": 9.862149966001112, "grad_norm": 0.09434442222118378, "learning_rate": 1.5070655869444275e-05, "loss": 0.0058, "step": 319080 }, { "epoch": 9.863077208382272, "grad_norm": 0.09804032742977142, "learning_rate": 1.5070192248253695e-05, "loss": 0.0057, "step": 319110 }, { "epoch": 9.86400445076343, "grad_norm": 0.13658897578716278, "learning_rate": 1.5069728627063116e-05, "loss": 0.0057, "step": 319140 }, { "epoch": 9.864931693144587, "grad_norm": 0.12098246812820435, "learning_rate": 1.5069265005872536e-05, "loss": 0.0065, "step": 319170 }, { "epoch": 9.865858935525747, "grad_norm": 0.16004469990730286, "learning_rate": 1.5068801384681958e-05, "loss": 0.0061, "step": 319200 }, { "epoch": 9.866786177906905, "grad_norm": 0.12390220165252686, "learning_rate": 1.5068337763491379e-05, "loss": 0.0063, "step": 319230 }, { "epoch": 9.867713420288064, "grad_norm": 0.1348385065793991, "learning_rate": 1.5067874142300797e-05, "loss": 0.0069, "step": 319260 }, { "epoch": 9.868640662669222, "grad_norm": 0.08573401719331741, "learning_rate": 1.5067410521110219e-05, "loss": 0.0057, "step": 319290 }, { "epoch": 9.86956790505038, "grad_norm": 0.09771496802568436, "learning_rate": 1.506694689991964e-05, "loss": 0.0056, "step": 319320 }, { "epoch": 9.870495147431539, "grad_norm": 0.08623979240655899, "learning_rate": 1.506648327872906e-05, "loss": 0.0062, "step": 319350 }, { "epoch": 9.871422389812697, "grad_norm": 0.10265912860631943, "learning_rate": 1.5066019657538482e-05, "loss": 0.0064, "step": 319380 }, { "epoch": 9.872349632193856, "grad_norm": 0.1319618672132492, "learning_rate": 1.5065556036347903e-05, "loss": 0.0059, "step": 319410 }, { "epoch": 9.873276874575014, "grad_norm": 0.26837918162345886, "learning_rate": 1.5065092415157323e-05, "loss": 0.0058, "step": 319440 }, { "epoch": 9.874204116956172, "grad_norm": 0.14175021648406982, "learning_rate": 1.5064628793966744e-05, "loss": 0.0059, "step": 319470 }, { "epoch": 9.875131359337331, "grad_norm": 0.12529724836349487, "learning_rate": 1.5064165172776164e-05, "loss": 0.0065, "step": 319500 }, { "epoch": 9.876058601718489, "grad_norm": 0.13343225419521332, "learning_rate": 1.5063701551585586e-05, "loss": 0.0061, "step": 319530 }, { "epoch": 9.876985844099648, "grad_norm": 0.16720089316368103, "learning_rate": 1.5063237930395007e-05, "loss": 0.0061, "step": 319560 }, { "epoch": 9.877913086480806, "grad_norm": 0.13816523551940918, "learning_rate": 1.5062774309204427e-05, "loss": 0.0062, "step": 319590 }, { "epoch": 9.878840328861964, "grad_norm": 0.08951546251773834, "learning_rate": 1.5062310688013849e-05, "loss": 0.006, "step": 319620 }, { "epoch": 9.879767571243123, "grad_norm": 0.088945671916008, "learning_rate": 1.506184706682327e-05, "loss": 0.0064, "step": 319650 }, { "epoch": 9.880694813624281, "grad_norm": 0.10398919135332108, "learning_rate": 1.506138344563269e-05, "loss": 0.0059, "step": 319680 }, { "epoch": 9.88162205600544, "grad_norm": 0.10652846097946167, "learning_rate": 1.5060919824442111e-05, "loss": 0.0065, "step": 319710 }, { "epoch": 9.882549298386598, "grad_norm": 0.09325184673070908, "learning_rate": 1.5060456203251533e-05, "loss": 0.0066, "step": 319740 }, { "epoch": 9.883476540767756, "grad_norm": 0.10388506948947906, "learning_rate": 1.5059992582060951e-05, "loss": 0.0058, "step": 319770 }, { "epoch": 9.884403783148915, "grad_norm": 0.11142915487289429, "learning_rate": 1.5059528960870373e-05, "loss": 0.0054, "step": 319800 }, { "epoch": 9.885331025530073, "grad_norm": 0.14047785103321075, "learning_rate": 1.5059065339679792e-05, "loss": 0.0058, "step": 319830 }, { "epoch": 9.886258267911233, "grad_norm": 0.11590473353862762, "learning_rate": 1.5058601718489214e-05, "loss": 0.0055, "step": 319860 }, { "epoch": 9.88718551029239, "grad_norm": 0.1491651087999344, "learning_rate": 1.5058138097298635e-05, "loss": 0.0059, "step": 319890 }, { "epoch": 9.88811275267355, "grad_norm": 0.11514277011156082, "learning_rate": 1.5057674476108055e-05, "loss": 0.0062, "step": 319920 }, { "epoch": 9.889039995054707, "grad_norm": 0.15485163033008575, "learning_rate": 1.5057210854917477e-05, "loss": 0.0063, "step": 319950 }, { "epoch": 9.889967237435865, "grad_norm": 0.08837844431400299, "learning_rate": 1.5056747233726898e-05, "loss": 0.0063, "step": 319980 }, { "epoch": 9.890894479817025, "grad_norm": 0.09469973295927048, "learning_rate": 1.5056283612536318e-05, "loss": 0.0064, "step": 320010 }, { "epoch": 9.891821722198182, "grad_norm": 0.10054422914981842, "learning_rate": 1.505581999134574e-05, "loss": 0.0059, "step": 320040 }, { "epoch": 9.892748964579342, "grad_norm": 0.1245591938495636, "learning_rate": 1.5055356370155161e-05, "loss": 0.0057, "step": 320070 }, { "epoch": 9.8936762069605, "grad_norm": 0.12666288018226624, "learning_rate": 1.505489274896458e-05, "loss": 0.0057, "step": 320100 }, { "epoch": 9.894603449341657, "grad_norm": 0.0799613744020462, "learning_rate": 1.5054429127774002e-05, "loss": 0.0065, "step": 320130 }, { "epoch": 9.895530691722817, "grad_norm": 0.13882464170455933, "learning_rate": 1.505396550658342e-05, "loss": 0.006, "step": 320160 }, { "epoch": 9.896457934103974, "grad_norm": 0.0925305113196373, "learning_rate": 1.5053501885392842e-05, "loss": 0.0065, "step": 320190 }, { "epoch": 9.897385176485134, "grad_norm": 0.13681207597255707, "learning_rate": 1.5053038264202265e-05, "loss": 0.0061, "step": 320220 }, { "epoch": 9.898312418866292, "grad_norm": 0.0826321467757225, "learning_rate": 1.5052574643011683e-05, "loss": 0.0063, "step": 320250 }, { "epoch": 9.89923966124745, "grad_norm": 0.15306977927684784, "learning_rate": 1.5052111021821105e-05, "loss": 0.0059, "step": 320280 }, { "epoch": 9.900166903628609, "grad_norm": 0.12783589959144592, "learning_rate": 1.5051662854670213e-05, "loss": 0.0062, "step": 320310 }, { "epoch": 9.901094146009767, "grad_norm": 0.20997318625450134, "learning_rate": 1.5051199233479632e-05, "loss": 0.0054, "step": 320340 }, { "epoch": 9.902021388390926, "grad_norm": 0.11315450817346573, "learning_rate": 1.5050751066328739e-05, "loss": 0.0061, "step": 320370 }, { "epoch": 9.902948630772084, "grad_norm": 0.09534748643636703, "learning_rate": 1.505028744513816e-05, "loss": 0.0056, "step": 320400 }, { "epoch": 9.903875873153241, "grad_norm": 0.1256696581840515, "learning_rate": 1.504982382394758e-05, "loss": 0.0063, "step": 320430 }, { "epoch": 9.904803115534401, "grad_norm": 0.11800957471132278, "learning_rate": 1.5049360202757002e-05, "loss": 0.0061, "step": 320460 }, { "epoch": 9.905730357915559, "grad_norm": 0.14800797402858734, "learning_rate": 1.5048896581566423e-05, "loss": 0.0059, "step": 320490 }, { "epoch": 9.906657600296718, "grad_norm": 0.10343994945287704, "learning_rate": 1.5048432960375843e-05, "loss": 0.0062, "step": 320520 }, { "epoch": 9.907584842677876, "grad_norm": 0.13012468814849854, "learning_rate": 1.5047969339185264e-05, "loss": 0.0063, "step": 320550 }, { "epoch": 9.908512085059034, "grad_norm": 0.14701728522777557, "learning_rate": 1.5047505717994686e-05, "loss": 0.0069, "step": 320580 }, { "epoch": 9.909439327440193, "grad_norm": 0.10726305842399597, "learning_rate": 1.5047042096804106e-05, "loss": 0.0059, "step": 320610 }, { "epoch": 9.91036656982135, "grad_norm": 0.1393011510372162, "learning_rate": 1.5046578475613527e-05, "loss": 0.0065, "step": 320640 }, { "epoch": 9.91129381220251, "grad_norm": 0.1265822947025299, "learning_rate": 1.5046114854422947e-05, "loss": 0.0065, "step": 320670 }, { "epoch": 9.912221054583668, "grad_norm": 0.08594319969415665, "learning_rate": 1.5045651233232368e-05, "loss": 0.0054, "step": 320700 }, { "epoch": 9.913148296964827, "grad_norm": 0.11720521003007889, "learning_rate": 1.504518761204179e-05, "loss": 0.0062, "step": 320730 }, { "epoch": 9.914075539345985, "grad_norm": 0.20006325840950012, "learning_rate": 1.5044723990851208e-05, "loss": 0.0053, "step": 320760 }, { "epoch": 9.915002781727143, "grad_norm": 0.06865451484918594, "learning_rate": 1.504426036966063e-05, "loss": 0.0062, "step": 320790 }, { "epoch": 9.915930024108302, "grad_norm": 0.08288490772247314, "learning_rate": 1.5043796748470053e-05, "loss": 0.0059, "step": 320820 }, { "epoch": 9.91685726648946, "grad_norm": 0.13007718324661255, "learning_rate": 1.5043333127279471e-05, "loss": 0.0062, "step": 320850 }, { "epoch": 9.91778450887062, "grad_norm": 0.0973593071103096, "learning_rate": 1.5042869506088892e-05, "loss": 0.0055, "step": 320880 }, { "epoch": 9.918711751251777, "grad_norm": 0.07484354078769684, "learning_rate": 1.5042405884898314e-05, "loss": 0.0057, "step": 320910 }, { "epoch": 9.919638993632935, "grad_norm": 0.08765488117933273, "learning_rate": 1.5041942263707734e-05, "loss": 0.0054, "step": 320940 }, { "epoch": 9.920566236014094, "grad_norm": 0.06836400926113129, "learning_rate": 1.5041478642517155e-05, "loss": 0.0066, "step": 320970 }, { "epoch": 9.921493478395252, "grad_norm": 0.11227735877037048, "learning_rate": 1.5041015021326577e-05, "loss": 0.0059, "step": 321000 }, { "epoch": 9.922420720776412, "grad_norm": 0.1403886079788208, "learning_rate": 1.5040551400135997e-05, "loss": 0.0054, "step": 321030 }, { "epoch": 9.92334796315757, "grad_norm": 0.12084674090147018, "learning_rate": 1.5040087778945418e-05, "loss": 0.0059, "step": 321060 }, { "epoch": 9.924275205538727, "grad_norm": 0.1358322650194168, "learning_rate": 1.5039624157754838e-05, "loss": 0.0062, "step": 321090 }, { "epoch": 9.925202447919887, "grad_norm": 0.12579022347927094, "learning_rate": 1.503916053656426e-05, "loss": 0.0066, "step": 321120 }, { "epoch": 9.926129690301044, "grad_norm": 0.1775711327791214, "learning_rate": 1.5038696915373681e-05, "loss": 0.0052, "step": 321150 }, { "epoch": 9.927056932682204, "grad_norm": 0.11851488053798676, "learning_rate": 1.50382332941831e-05, "loss": 0.0064, "step": 321180 }, { "epoch": 9.927984175063362, "grad_norm": 0.13078001141548157, "learning_rate": 1.5037769672992522e-05, "loss": 0.0059, "step": 321210 }, { "epoch": 9.92891141744452, "grad_norm": 0.11131596565246582, "learning_rate": 1.5037306051801944e-05, "loss": 0.0059, "step": 321240 }, { "epoch": 9.929838659825679, "grad_norm": 0.09130646288394928, "learning_rate": 1.5036842430611362e-05, "loss": 0.0061, "step": 321270 }, { "epoch": 9.930765902206836, "grad_norm": 0.1298495978116989, "learning_rate": 1.5036378809420783e-05, "loss": 0.0058, "step": 321300 }, { "epoch": 9.931693144587996, "grad_norm": 0.07841183990240097, "learning_rate": 1.5035915188230205e-05, "loss": 0.0055, "step": 321330 }, { "epoch": 9.932620386969154, "grad_norm": 0.06373319774866104, "learning_rate": 1.5035451567039625e-05, "loss": 0.0053, "step": 321360 }, { "epoch": 9.933547629350311, "grad_norm": 0.14187981188297272, "learning_rate": 1.5034987945849046e-05, "loss": 0.0063, "step": 321390 }, { "epoch": 9.93447487173147, "grad_norm": 0.08232537657022476, "learning_rate": 1.5034524324658466e-05, "loss": 0.0053, "step": 321420 }, { "epoch": 9.935402114112629, "grad_norm": 0.11314620077610016, "learning_rate": 1.5034060703467887e-05, "loss": 0.0065, "step": 321450 }, { "epoch": 9.936329356493788, "grad_norm": 0.08881241083145142, "learning_rate": 1.5033597082277309e-05, "loss": 0.0059, "step": 321480 }, { "epoch": 9.937256598874946, "grad_norm": 0.10868684202432632, "learning_rate": 1.5033133461086729e-05, "loss": 0.006, "step": 321510 }, { "epoch": 9.938183841256105, "grad_norm": 0.14234164357185364, "learning_rate": 1.503266983989615e-05, "loss": 0.0058, "step": 321540 }, { "epoch": 9.939111083637263, "grad_norm": 0.08943195641040802, "learning_rate": 1.5032206218705572e-05, "loss": 0.0055, "step": 321570 }, { "epoch": 9.94003832601842, "grad_norm": 0.12710341811180115, "learning_rate": 1.5031742597514992e-05, "loss": 0.0062, "step": 321600 }, { "epoch": 9.94096556839958, "grad_norm": 0.11199741065502167, "learning_rate": 1.5031278976324413e-05, "loss": 0.0064, "step": 321630 }, { "epoch": 9.941892810780738, "grad_norm": 0.07457954436540604, "learning_rate": 1.5030815355133835e-05, "loss": 0.0059, "step": 321660 }, { "epoch": 9.942820053161897, "grad_norm": 0.10276412963867188, "learning_rate": 1.5030351733943254e-05, "loss": 0.0057, "step": 321690 }, { "epoch": 9.943747295543055, "grad_norm": 0.11590363085269928, "learning_rate": 1.5029888112752676e-05, "loss": 0.0064, "step": 321720 }, { "epoch": 9.944674537924213, "grad_norm": 0.11242666095495224, "learning_rate": 1.5029424491562094e-05, "loss": 0.0062, "step": 321750 }, { "epoch": 9.945601780305372, "grad_norm": 0.12462806701660156, "learning_rate": 1.5028960870371516e-05, "loss": 0.0058, "step": 321780 }, { "epoch": 9.94652902268653, "grad_norm": 0.11223903298377991, "learning_rate": 1.5028497249180937e-05, "loss": 0.0061, "step": 321810 }, { "epoch": 9.94745626506769, "grad_norm": 0.08898115903139114, "learning_rate": 1.5028033627990357e-05, "loss": 0.0062, "step": 321840 }, { "epoch": 9.948383507448847, "grad_norm": 0.12355893105268478, "learning_rate": 1.5027570006799778e-05, "loss": 0.0056, "step": 321870 }, { "epoch": 9.949310749830005, "grad_norm": 0.08397660404443741, "learning_rate": 1.50271063856092e-05, "loss": 0.0061, "step": 321900 }, { "epoch": 9.950237992211164, "grad_norm": 0.10432327538728714, "learning_rate": 1.502664276441862e-05, "loss": 0.0057, "step": 321930 }, { "epoch": 9.951165234592322, "grad_norm": 0.15008214116096497, "learning_rate": 1.5026179143228041e-05, "loss": 0.0058, "step": 321960 }, { "epoch": 9.952092476973482, "grad_norm": 0.12969957292079926, "learning_rate": 1.5025715522037463e-05, "loss": 0.0062, "step": 321990 }, { "epoch": 9.95301971935464, "grad_norm": 0.10344791412353516, "learning_rate": 1.5025251900846883e-05, "loss": 0.0066, "step": 322020 }, { "epoch": 9.953946961735797, "grad_norm": 0.11084671318531036, "learning_rate": 1.5024788279656304e-05, "loss": 0.0058, "step": 322050 }, { "epoch": 9.954874204116956, "grad_norm": 0.12630969285964966, "learning_rate": 1.5024324658465724e-05, "loss": 0.0067, "step": 322080 }, { "epoch": 9.955801446498114, "grad_norm": 0.1348610818386078, "learning_rate": 1.5023861037275145e-05, "loss": 0.0064, "step": 322110 }, { "epoch": 9.956728688879274, "grad_norm": 0.07418395578861237, "learning_rate": 1.5023397416084567e-05, "loss": 0.0059, "step": 322140 }, { "epoch": 9.957655931260431, "grad_norm": 0.07011028379201889, "learning_rate": 1.5022933794893985e-05, "loss": 0.0059, "step": 322170 }, { "epoch": 9.958583173641589, "grad_norm": 0.1122991219162941, "learning_rate": 1.5022470173703407e-05, "loss": 0.0062, "step": 322200 }, { "epoch": 9.959510416022749, "grad_norm": 0.16418085992336273, "learning_rate": 1.502200655251283e-05, "loss": 0.0056, "step": 322230 }, { "epoch": 9.960437658403906, "grad_norm": 0.11229187995195389, "learning_rate": 1.5021558385361934e-05, "loss": 0.0062, "step": 322260 }, { "epoch": 9.961364900785066, "grad_norm": 0.20333565771579742, "learning_rate": 1.5021094764171356e-05, "loss": 0.0066, "step": 322290 }, { "epoch": 9.962292143166223, "grad_norm": 0.1649009734392166, "learning_rate": 1.5020631142980777e-05, "loss": 0.0066, "step": 322320 }, { "epoch": 9.963219385547383, "grad_norm": 0.1367959827184677, "learning_rate": 1.5020167521790197e-05, "loss": 0.0065, "step": 322350 }, { "epoch": 9.96414662792854, "grad_norm": 0.10756378620862961, "learning_rate": 1.5019703900599619e-05, "loss": 0.0053, "step": 322380 }, { "epoch": 9.965073870309698, "grad_norm": 0.11075820028781891, "learning_rate": 1.501924027940904e-05, "loss": 0.0062, "step": 322410 }, { "epoch": 9.966001112690858, "grad_norm": 0.08837993443012238, "learning_rate": 1.501877665821846e-05, "loss": 0.0058, "step": 322440 }, { "epoch": 9.966928355072016, "grad_norm": 0.10204226523637772, "learning_rate": 1.5018313037027881e-05, "loss": 0.0057, "step": 322470 }, { "epoch": 9.967855597453175, "grad_norm": 0.16681276261806488, "learning_rate": 1.50178494158373e-05, "loss": 0.0063, "step": 322500 }, { "epoch": 9.968782839834333, "grad_norm": 0.11252148449420929, "learning_rate": 1.5017385794646721e-05, "loss": 0.0062, "step": 322530 }, { "epoch": 9.96971008221549, "grad_norm": 0.1153394803404808, "learning_rate": 1.5016922173456143e-05, "loss": 0.0063, "step": 322560 }, { "epoch": 9.97063732459665, "grad_norm": 0.0651588886976242, "learning_rate": 1.5016458552265562e-05, "loss": 0.0062, "step": 322590 }, { "epoch": 9.971564566977808, "grad_norm": 0.11428435891866684, "learning_rate": 1.5015994931074984e-05, "loss": 0.0053, "step": 322620 }, { "epoch": 9.972491809358967, "grad_norm": 0.11540233343839645, "learning_rate": 1.5015531309884405e-05, "loss": 0.0059, "step": 322650 }, { "epoch": 9.973419051740125, "grad_norm": 0.1302109956741333, "learning_rate": 1.5015067688693825e-05, "loss": 0.0071, "step": 322680 }, { "epoch": 9.974346294121283, "grad_norm": 0.1923280954360962, "learning_rate": 1.5014604067503247e-05, "loss": 0.0057, "step": 322710 }, { "epoch": 9.975273536502442, "grad_norm": 0.09563338756561279, "learning_rate": 1.5014140446312668e-05, "loss": 0.006, "step": 322740 }, { "epoch": 9.9762007788836, "grad_norm": 0.10984257608652115, "learning_rate": 1.5013676825122088e-05, "loss": 0.0063, "step": 322770 }, { "epoch": 9.97712802126476, "grad_norm": 0.10559017956256866, "learning_rate": 1.501321320393151e-05, "loss": 0.0056, "step": 322800 }, { "epoch": 9.978055263645917, "grad_norm": 0.11153580993413925, "learning_rate": 1.501274958274093e-05, "loss": 0.0053, "step": 322830 }, { "epoch": 9.978982506027075, "grad_norm": 0.12885738909244537, "learning_rate": 1.501228596155035e-05, "loss": 0.0058, "step": 322860 }, { "epoch": 9.979909748408234, "grad_norm": 0.06031263619661331, "learning_rate": 1.5011822340359772e-05, "loss": 0.0064, "step": 322890 }, { "epoch": 9.980836990789392, "grad_norm": 0.16097216308116913, "learning_rate": 1.501135871916919e-05, "loss": 0.0069, "step": 322920 }, { "epoch": 9.981764233170551, "grad_norm": 0.10391437262296677, "learning_rate": 1.5010895097978612e-05, "loss": 0.0061, "step": 322950 }, { "epoch": 9.982691475551709, "grad_norm": 0.13597074151039124, "learning_rate": 1.5010431476788035e-05, "loss": 0.0057, "step": 322980 }, { "epoch": 9.983618717932867, "grad_norm": 0.1310727447271347, "learning_rate": 1.5009967855597453e-05, "loss": 0.0059, "step": 323010 }, { "epoch": 9.984545960314026, "grad_norm": 0.08605034649372101, "learning_rate": 1.5009504234406875e-05, "loss": 0.006, "step": 323040 }, { "epoch": 9.985473202695184, "grad_norm": 0.1353648453950882, "learning_rate": 1.5009040613216296e-05, "loss": 0.0061, "step": 323070 }, { "epoch": 9.986400445076344, "grad_norm": 0.12443406879901886, "learning_rate": 1.5008576992025716e-05, "loss": 0.0058, "step": 323100 }, { "epoch": 9.987327687457501, "grad_norm": 0.1293812096118927, "learning_rate": 1.5008113370835138e-05, "loss": 0.006, "step": 323130 }, { "epoch": 9.98825492983866, "grad_norm": 0.0884609967470169, "learning_rate": 1.5007649749644557e-05, "loss": 0.0062, "step": 323160 }, { "epoch": 9.989182172219818, "grad_norm": 0.09117662906646729, "learning_rate": 1.5007186128453979e-05, "loss": 0.0054, "step": 323190 }, { "epoch": 9.990109414600976, "grad_norm": 0.11806546151638031, "learning_rate": 1.50067225072634e-05, "loss": 0.0066, "step": 323220 }, { "epoch": 9.991036656982136, "grad_norm": 0.14558658003807068, "learning_rate": 1.500625888607282e-05, "loss": 0.006, "step": 323250 }, { "epoch": 9.991963899363293, "grad_norm": 0.10874494910240173, "learning_rate": 1.5005795264882242e-05, "loss": 0.0055, "step": 323280 }, { "epoch": 9.992891141744453, "grad_norm": 0.12398925423622131, "learning_rate": 1.5005331643691663e-05, "loss": 0.0063, "step": 323310 }, { "epoch": 9.99381838412561, "grad_norm": 0.12106841057538986, "learning_rate": 1.5004868022501083e-05, "loss": 0.0066, "step": 323340 }, { "epoch": 9.994745626506768, "grad_norm": 0.11139847338199615, "learning_rate": 1.5004404401310505e-05, "loss": 0.0057, "step": 323370 }, { "epoch": 9.995672868887928, "grad_norm": 0.13303229212760925, "learning_rate": 1.5003940780119926e-05, "loss": 0.0057, "step": 323400 }, { "epoch": 9.996600111269085, "grad_norm": 0.06982071697711945, "learning_rate": 1.5003477158929344e-05, "loss": 0.0051, "step": 323430 }, { "epoch": 9.997527353650245, "grad_norm": 0.155878946185112, "learning_rate": 1.5003013537738766e-05, "loss": 0.0063, "step": 323460 }, { "epoch": 9.998454596031403, "grad_norm": 0.07831333577632904, "learning_rate": 1.5002549916548185e-05, "loss": 0.0055, "step": 323490 }, { "epoch": 9.99938183841256, "grad_norm": 0.09106625616550446, "learning_rate": 1.5002086295357607e-05, "loss": 0.0061, "step": 323520 }, { "epoch": 10.00030908079372, "grad_norm": 0.1756489872932434, "learning_rate": 1.5001622674167028e-05, "loss": 0.0061, "step": 323550 }, { "epoch": 10.001236323174878, "grad_norm": 0.10828706622123718, "learning_rate": 1.5001159052976448e-05, "loss": 0.0052, "step": 323580 }, { "epoch": 10.002163565556037, "grad_norm": 0.21569174528121948, "learning_rate": 1.500069543178587e-05, "loss": 0.0066, "step": 323610 }, { "epoch": 10.003090807937195, "grad_norm": 0.07417625188827515, "learning_rate": 1.5000231810595291e-05, "loss": 0.0062, "step": 323640 }, { "epoch": 10.004018050318352, "grad_norm": 0.11070159077644348, "learning_rate": 1.4999768189404711e-05, "loss": 0.0058, "step": 323670 }, { "epoch": 10.004945292699512, "grad_norm": 0.10241172462701797, "learning_rate": 1.4999304568214133e-05, "loss": 0.0063, "step": 323700 }, { "epoch": 10.00587253508067, "grad_norm": 0.09761690348386765, "learning_rate": 1.4998840947023554e-05, "loss": 0.0057, "step": 323730 }, { "epoch": 10.00679977746183, "grad_norm": 0.07408269494771957, "learning_rate": 1.4998377325832974e-05, "loss": 0.0055, "step": 323760 }, { "epoch": 10.007727019842987, "grad_norm": 0.11572951078414917, "learning_rate": 1.4997913704642395e-05, "loss": 0.0055, "step": 323790 }, { "epoch": 10.008654262224145, "grad_norm": 0.10107588022947311, "learning_rate": 1.4997450083451817e-05, "loss": 0.0062, "step": 323820 }, { "epoch": 10.009581504605304, "grad_norm": 0.09630833566188812, "learning_rate": 1.4996986462261237e-05, "loss": 0.0061, "step": 323850 }, { "epoch": 10.010508746986462, "grad_norm": 0.1556897610425949, "learning_rate": 1.4996522841070658e-05, "loss": 0.0059, "step": 323880 }, { "epoch": 10.011435989367621, "grad_norm": 0.1793316900730133, "learning_rate": 1.4996059219880076e-05, "loss": 0.0055, "step": 323910 }, { "epoch": 10.012363231748779, "grad_norm": 0.13609656691551208, "learning_rate": 1.4995595598689498e-05, "loss": 0.0059, "step": 323940 }, { "epoch": 10.013290474129938, "grad_norm": 0.10651601105928421, "learning_rate": 1.499513197749892e-05, "loss": 0.006, "step": 323970 }, { "epoch": 10.014217716511096, "grad_norm": 0.09493207186460495, "learning_rate": 1.499466835630834e-05, "loss": 0.006, "step": 324000 }, { "epoch": 10.015144958892254, "grad_norm": 0.16187800467014313, "learning_rate": 1.499420473511776e-05, "loss": 0.0058, "step": 324030 }, { "epoch": 10.016072201273413, "grad_norm": 0.14845257997512817, "learning_rate": 1.4993741113927182e-05, "loss": 0.0053, "step": 324060 }, { "epoch": 10.016999443654571, "grad_norm": 0.11587953567504883, "learning_rate": 1.4993277492736602e-05, "loss": 0.0059, "step": 324090 }, { "epoch": 10.01792668603573, "grad_norm": 0.09790036082267761, "learning_rate": 1.4992813871546024e-05, "loss": 0.0055, "step": 324120 }, { "epoch": 10.018853928416888, "grad_norm": 0.11655279994010925, "learning_rate": 1.4992350250355445e-05, "loss": 0.0058, "step": 324150 }, { "epoch": 10.019781170798046, "grad_norm": 0.09128677099943161, "learning_rate": 1.4991886629164865e-05, "loss": 0.0059, "step": 324180 }, { "epoch": 10.020708413179205, "grad_norm": 0.1068926751613617, "learning_rate": 1.4991423007974286e-05, "loss": 0.0056, "step": 324210 }, { "epoch": 10.021635655560363, "grad_norm": 0.1410687416791916, "learning_rate": 1.4990959386783706e-05, "loss": 0.0051, "step": 324240 }, { "epoch": 10.022562897941523, "grad_norm": 0.14446115493774414, "learning_rate": 1.4990495765593128e-05, "loss": 0.0059, "step": 324270 }, { "epoch": 10.02349014032268, "grad_norm": 0.0976860523223877, "learning_rate": 1.499003214440255e-05, "loss": 0.0064, "step": 324300 }, { "epoch": 10.024417382703838, "grad_norm": 0.09912122786045074, "learning_rate": 1.4989568523211967e-05, "loss": 0.0066, "step": 324330 }, { "epoch": 10.025344625084998, "grad_norm": 0.1524103432893753, "learning_rate": 1.498910490202139e-05, "loss": 0.0057, "step": 324360 }, { "epoch": 10.026271867466155, "grad_norm": 0.10394369810819626, "learning_rate": 1.4988641280830812e-05, "loss": 0.0058, "step": 324390 }, { "epoch": 10.027199109847315, "grad_norm": 0.13439835608005524, "learning_rate": 1.498817765964023e-05, "loss": 0.0062, "step": 324420 }, { "epoch": 10.028126352228472, "grad_norm": 0.09536084532737732, "learning_rate": 1.4987714038449652e-05, "loss": 0.0061, "step": 324450 }, { "epoch": 10.02905359460963, "grad_norm": 0.0728808343410492, "learning_rate": 1.4987250417259073e-05, "loss": 0.0054, "step": 324480 }, { "epoch": 10.02998083699079, "grad_norm": 0.2001650184392929, "learning_rate": 1.4986786796068493e-05, "loss": 0.0064, "step": 324510 }, { "epoch": 10.030908079371947, "grad_norm": 0.10904651880264282, "learning_rate": 1.4986323174877914e-05, "loss": 0.0063, "step": 324540 }, { "epoch": 10.031835321753107, "grad_norm": 0.09079188853502274, "learning_rate": 1.4985859553687334e-05, "loss": 0.0058, "step": 324570 }, { "epoch": 10.032762564134265, "grad_norm": 0.13034917414188385, "learning_rate": 1.4985395932496756e-05, "loss": 0.0055, "step": 324600 }, { "epoch": 10.033689806515422, "grad_norm": 0.14561094343662262, "learning_rate": 1.4984932311306177e-05, "loss": 0.0065, "step": 324630 }, { "epoch": 10.034617048896582, "grad_norm": 0.14122983813285828, "learning_rate": 1.4984468690115597e-05, "loss": 0.006, "step": 324660 }, { "epoch": 10.03554429127774, "grad_norm": 0.13202987611293793, "learning_rate": 1.4984005068925019e-05, "loss": 0.0053, "step": 324690 }, { "epoch": 10.036471533658899, "grad_norm": 0.162480428814888, "learning_rate": 1.498354144773444e-05, "loss": 0.0059, "step": 324720 }, { "epoch": 10.037398776040057, "grad_norm": 0.07695914804935455, "learning_rate": 1.498307782654386e-05, "loss": 0.0061, "step": 324750 }, { "epoch": 10.038326018421216, "grad_norm": 0.10055743902921677, "learning_rate": 1.4982614205353281e-05, "loss": 0.0059, "step": 324780 }, { "epoch": 10.039253260802374, "grad_norm": 0.14430662989616394, "learning_rate": 1.4982150584162703e-05, "loss": 0.0059, "step": 324810 }, { "epoch": 10.040180503183532, "grad_norm": 0.11591289192438126, "learning_rate": 1.4981686962972121e-05, "loss": 0.0058, "step": 324840 }, { "epoch": 10.041107745564691, "grad_norm": 0.17162534594535828, "learning_rate": 1.4981223341781543e-05, "loss": 0.0057, "step": 324870 }, { "epoch": 10.042034987945849, "grad_norm": 0.16336704790592194, "learning_rate": 1.4980759720590962e-05, "loss": 0.0059, "step": 324900 }, { "epoch": 10.042962230327008, "grad_norm": 0.10950300097465515, "learning_rate": 1.4980296099400384e-05, "loss": 0.006, "step": 324930 }, { "epoch": 10.043889472708166, "grad_norm": 0.09489747881889343, "learning_rate": 1.4979832478209805e-05, "loss": 0.0062, "step": 324960 }, { "epoch": 10.044816715089324, "grad_norm": 0.09357738494873047, "learning_rate": 1.4979368857019225e-05, "loss": 0.0057, "step": 324990 }, { "epoch": 10.045743957470483, "grad_norm": 0.11188225448131561, "learning_rate": 1.4978905235828647e-05, "loss": 0.0064, "step": 325020 }, { "epoch": 10.046671199851641, "grad_norm": 0.11071351915597916, "learning_rate": 1.4978441614638068e-05, "loss": 0.0057, "step": 325050 }, { "epoch": 10.0475984422328, "grad_norm": 0.11751417070627213, "learning_rate": 1.4977977993447488e-05, "loss": 0.0059, "step": 325080 }, { "epoch": 10.048525684613958, "grad_norm": 0.07898599654436111, "learning_rate": 1.497751437225691e-05, "loss": 0.006, "step": 325110 }, { "epoch": 10.049452926995116, "grad_norm": 0.11862761527299881, "learning_rate": 1.4977050751066331e-05, "loss": 0.0061, "step": 325140 }, { "epoch": 10.050380169376275, "grad_norm": 0.17343571782112122, "learning_rate": 1.4976587129875751e-05, "loss": 0.0059, "step": 325170 }, { "epoch": 10.051307411757433, "grad_norm": 0.20796620845794678, "learning_rate": 1.4976123508685172e-05, "loss": 0.0062, "step": 325200 }, { "epoch": 10.052234654138593, "grad_norm": 0.1033279076218605, "learning_rate": 1.497565988749459e-05, "loss": 0.0058, "step": 325230 }, { "epoch": 10.05316189651975, "grad_norm": 0.09769595414400101, "learning_rate": 1.4975196266304014e-05, "loss": 0.0061, "step": 325260 }, { "epoch": 10.054089138900908, "grad_norm": 0.10685756802558899, "learning_rate": 1.4974732645113435e-05, "loss": 0.0057, "step": 325290 }, { "epoch": 10.055016381282067, "grad_norm": 0.07636726647615433, "learning_rate": 1.4974269023922853e-05, "loss": 0.0053, "step": 325320 }, { "epoch": 10.055943623663225, "grad_norm": 0.15550832450389862, "learning_rate": 1.4973805402732275e-05, "loss": 0.0059, "step": 325350 }, { "epoch": 10.056870866044385, "grad_norm": 0.07264907658100128, "learning_rate": 1.4973341781541696e-05, "loss": 0.0059, "step": 325380 }, { "epoch": 10.057798108425542, "grad_norm": 0.143578439950943, "learning_rate": 1.4972878160351116e-05, "loss": 0.0055, "step": 325410 }, { "epoch": 10.0587253508067, "grad_norm": 0.10372878611087799, "learning_rate": 1.4972414539160538e-05, "loss": 0.006, "step": 325440 }, { "epoch": 10.05965259318786, "grad_norm": 0.0822509229183197, "learning_rate": 1.4971950917969959e-05, "loss": 0.0057, "step": 325470 }, { "epoch": 10.060579835569017, "grad_norm": 0.07801365107297897, "learning_rate": 1.4971487296779379e-05, "loss": 0.0057, "step": 325500 }, { "epoch": 10.061507077950177, "grad_norm": 0.08498869091272354, "learning_rate": 1.49710236755888e-05, "loss": 0.0056, "step": 325530 }, { "epoch": 10.062434320331334, "grad_norm": 0.14112205803394318, "learning_rate": 1.497056005439822e-05, "loss": 0.007, "step": 325560 }, { "epoch": 10.063361562712494, "grad_norm": 0.16910980641841888, "learning_rate": 1.4970096433207642e-05, "loss": 0.0063, "step": 325590 }, { "epoch": 10.064288805093652, "grad_norm": 0.2677295506000519, "learning_rate": 1.4969632812017063e-05, "loss": 0.0059, "step": 325620 }, { "epoch": 10.06521604747481, "grad_norm": 0.10416620969772339, "learning_rate": 1.4969169190826483e-05, "loss": 0.0057, "step": 325650 }, { "epoch": 10.066143289855969, "grad_norm": 0.10908065736293793, "learning_rate": 1.4968705569635905e-05, "loss": 0.0054, "step": 325680 }, { "epoch": 10.067070532237127, "grad_norm": 0.09977938234806061, "learning_rate": 1.4968241948445326e-05, "loss": 0.0052, "step": 325710 }, { "epoch": 10.067997774618286, "grad_norm": 0.11635325849056244, "learning_rate": 1.4967778327254744e-05, "loss": 0.0059, "step": 325740 }, { "epoch": 10.068925016999444, "grad_norm": 0.17294390499591827, "learning_rate": 1.4967314706064167e-05, "loss": 0.0058, "step": 325770 }, { "epoch": 10.069852259380601, "grad_norm": 0.11334792524576187, "learning_rate": 1.4966851084873589e-05, "loss": 0.0059, "step": 325800 }, { "epoch": 10.070779501761761, "grad_norm": 0.10411594063043594, "learning_rate": 1.4966387463683007e-05, "loss": 0.0066, "step": 325830 }, { "epoch": 10.071706744142919, "grad_norm": 0.08457072824239731, "learning_rate": 1.4965923842492429e-05, "loss": 0.0056, "step": 325860 }, { "epoch": 10.072633986524078, "grad_norm": 0.12600891292095184, "learning_rate": 1.4965460221301848e-05, "loss": 0.0064, "step": 325890 }, { "epoch": 10.073561228905236, "grad_norm": 0.08924592286348343, "learning_rate": 1.496499660011127e-05, "loss": 0.0055, "step": 325920 }, { "epoch": 10.074488471286394, "grad_norm": 0.11830953508615494, "learning_rate": 1.4964532978920691e-05, "loss": 0.0058, "step": 325950 }, { "epoch": 10.075415713667553, "grad_norm": 0.1417459398508072, "learning_rate": 1.4964069357730111e-05, "loss": 0.0058, "step": 325980 }, { "epoch": 10.07634295604871, "grad_norm": 0.0799800232052803, "learning_rate": 1.4963605736539533e-05, "loss": 0.0061, "step": 326010 }, { "epoch": 10.07727019842987, "grad_norm": 0.048731088638305664, "learning_rate": 1.4963142115348954e-05, "loss": 0.0052, "step": 326040 }, { "epoch": 10.078197440811028, "grad_norm": 0.0805182084441185, "learning_rate": 1.4962678494158374e-05, "loss": 0.0057, "step": 326070 }, { "epoch": 10.079124683192186, "grad_norm": 0.09718511253595352, "learning_rate": 1.4962214872967796e-05, "loss": 0.0061, "step": 326100 }, { "epoch": 10.080051925573345, "grad_norm": 0.16268877685070038, "learning_rate": 1.4961751251777217e-05, "loss": 0.0052, "step": 326130 }, { "epoch": 10.080979167954503, "grad_norm": 0.09713273495435715, "learning_rate": 1.4961287630586637e-05, "loss": 0.0057, "step": 326160 }, { "epoch": 10.081906410335662, "grad_norm": 0.16894154250621796, "learning_rate": 1.4960824009396058e-05, "loss": 0.0054, "step": 326190 }, { "epoch": 10.08283365271682, "grad_norm": 0.09126897901296616, "learning_rate": 1.4960360388205476e-05, "loss": 0.006, "step": 326220 }, { "epoch": 10.083760895097978, "grad_norm": 0.08582720160484314, "learning_rate": 1.4959896767014898e-05, "loss": 0.0058, "step": 326250 }, { "epoch": 10.084688137479137, "grad_norm": 0.07471319288015366, "learning_rate": 1.495943314582432e-05, "loss": 0.0056, "step": 326280 }, { "epoch": 10.085615379860295, "grad_norm": 0.0930127426981926, "learning_rate": 1.495896952463374e-05, "loss": 0.0057, "step": 326310 }, { "epoch": 10.086542622241454, "grad_norm": 0.10936567932367325, "learning_rate": 1.4958521357482847e-05, "loss": 0.0062, "step": 326340 }, { "epoch": 10.087469864622612, "grad_norm": 0.0971183106303215, "learning_rate": 1.4958057736292269e-05, "loss": 0.0063, "step": 326370 }, { "epoch": 10.088397107003772, "grad_norm": 0.11642250418663025, "learning_rate": 1.4957594115101689e-05, "loss": 0.0055, "step": 326400 }, { "epoch": 10.08932434938493, "grad_norm": 0.09655039012432098, "learning_rate": 1.495713049391111e-05, "loss": 0.0058, "step": 326430 }, { "epoch": 10.090251591766087, "grad_norm": 0.09756240993738174, "learning_rate": 1.4956666872720532e-05, "loss": 0.0059, "step": 326460 }, { "epoch": 10.091178834147247, "grad_norm": 0.1273285150527954, "learning_rate": 1.495620325152995e-05, "loss": 0.0061, "step": 326490 }, { "epoch": 10.092106076528404, "grad_norm": 0.10375582426786423, "learning_rate": 1.4955739630339373e-05, "loss": 0.0064, "step": 326520 }, { "epoch": 10.093033318909564, "grad_norm": 0.07766446471214294, "learning_rate": 1.4955276009148794e-05, "loss": 0.0061, "step": 326550 }, { "epoch": 10.093960561290722, "grad_norm": 0.08249115198850632, "learning_rate": 1.4954812387958212e-05, "loss": 0.0067, "step": 326580 }, { "epoch": 10.09488780367188, "grad_norm": 0.11695452779531479, "learning_rate": 1.4954348766767634e-05, "loss": 0.0062, "step": 326610 }, { "epoch": 10.095815046053039, "grad_norm": 0.07305454462766647, "learning_rate": 1.4953885145577055e-05, "loss": 0.0056, "step": 326640 }, { "epoch": 10.096742288434196, "grad_norm": 0.08264212310314178, "learning_rate": 1.4953436978426162e-05, "loss": 0.0057, "step": 326670 }, { "epoch": 10.097669530815356, "grad_norm": 0.13380758464336395, "learning_rate": 1.4952973357235583e-05, "loss": 0.0064, "step": 326700 }, { "epoch": 10.098596773196514, "grad_norm": 0.08281415700912476, "learning_rate": 1.4952509736045005e-05, "loss": 0.0057, "step": 326730 }, { "epoch": 10.099524015577671, "grad_norm": 0.16322386264801025, "learning_rate": 1.4952046114854425e-05, "loss": 0.0056, "step": 326760 }, { "epoch": 10.10045125795883, "grad_norm": 0.08712697774171829, "learning_rate": 1.4951582493663846e-05, "loss": 0.0059, "step": 326790 }, { "epoch": 10.101378500339989, "grad_norm": 0.118655264377594, "learning_rate": 1.4951118872473264e-05, "loss": 0.0053, "step": 326820 }, { "epoch": 10.102305742721148, "grad_norm": 0.1149759292602539, "learning_rate": 1.4950655251282686e-05, "loss": 0.006, "step": 326850 }, { "epoch": 10.103232985102306, "grad_norm": 0.09884124249219894, "learning_rate": 1.4950191630092107e-05, "loss": 0.005, "step": 326880 }, { "epoch": 10.104160227483463, "grad_norm": 0.13797035813331604, "learning_rate": 1.4949728008901527e-05, "loss": 0.0058, "step": 326910 }, { "epoch": 10.105087469864623, "grad_norm": 0.10739625990390778, "learning_rate": 1.4949264387710948e-05, "loss": 0.0062, "step": 326940 }, { "epoch": 10.10601471224578, "grad_norm": 0.1306006759405136, "learning_rate": 1.494880076652037e-05, "loss": 0.0061, "step": 326970 }, { "epoch": 10.10694195462694, "grad_norm": 0.13602504134178162, "learning_rate": 1.494833714532979e-05, "loss": 0.0058, "step": 327000 }, { "epoch": 10.107869197008098, "grad_norm": 0.11323308199644089, "learning_rate": 1.4947873524139211e-05, "loss": 0.0064, "step": 327030 }, { "epoch": 10.108796439389256, "grad_norm": 0.09456312656402588, "learning_rate": 1.4947409902948633e-05, "loss": 0.0055, "step": 327060 }, { "epoch": 10.109723681770415, "grad_norm": 0.06168251484632492, "learning_rate": 1.4946946281758053e-05, "loss": 0.0063, "step": 327090 }, { "epoch": 10.110650924151573, "grad_norm": 0.1466607302427292, "learning_rate": 1.4946482660567474e-05, "loss": 0.0062, "step": 327120 }, { "epoch": 10.111578166532732, "grad_norm": 0.13657192885875702, "learning_rate": 1.4946019039376894e-05, "loss": 0.006, "step": 327150 }, { "epoch": 10.11250540891389, "grad_norm": 0.13960357010364532, "learning_rate": 1.4945555418186315e-05, "loss": 0.0058, "step": 327180 }, { "epoch": 10.113432651295048, "grad_norm": 0.1651189774274826, "learning_rate": 1.4945091796995737e-05, "loss": 0.0065, "step": 327210 }, { "epoch": 10.114359893676207, "grad_norm": 0.10059471428394318, "learning_rate": 1.4944628175805155e-05, "loss": 0.0055, "step": 327240 }, { "epoch": 10.115287136057365, "grad_norm": 0.08143117278814316, "learning_rate": 1.4944164554614578e-05, "loss": 0.0063, "step": 327270 }, { "epoch": 10.116214378438524, "grad_norm": 0.10429171472787857, "learning_rate": 1.4943700933424e-05, "loss": 0.0064, "step": 327300 }, { "epoch": 10.117141620819682, "grad_norm": 0.09405650943517685, "learning_rate": 1.4943237312233418e-05, "loss": 0.0056, "step": 327330 }, { "epoch": 10.118068863200842, "grad_norm": 0.08432289958000183, "learning_rate": 1.494277369104284e-05, "loss": 0.0058, "step": 327360 }, { "epoch": 10.118996105582, "grad_norm": 0.10438218712806702, "learning_rate": 1.4942310069852261e-05, "loss": 0.0059, "step": 327390 }, { "epoch": 10.119923347963157, "grad_norm": 0.12258677184581757, "learning_rate": 1.494184644866168e-05, "loss": 0.0068, "step": 327420 }, { "epoch": 10.120850590344316, "grad_norm": 0.08943381905555725, "learning_rate": 1.4941382827471102e-05, "loss": 0.0062, "step": 327450 }, { "epoch": 10.121777832725474, "grad_norm": 0.08812051266431808, "learning_rate": 1.4940919206280522e-05, "loss": 0.0054, "step": 327480 }, { "epoch": 10.122705075106634, "grad_norm": 0.09509933739900589, "learning_rate": 1.4940455585089944e-05, "loss": 0.0066, "step": 327510 }, { "epoch": 10.123632317487791, "grad_norm": 0.10122185200452805, "learning_rate": 1.4939991963899365e-05, "loss": 0.0053, "step": 327540 }, { "epoch": 10.124559559868949, "grad_norm": 0.1749468296766281, "learning_rate": 1.4939528342708785e-05, "loss": 0.0063, "step": 327570 }, { "epoch": 10.125486802250109, "grad_norm": 0.11634255945682526, "learning_rate": 1.4939064721518206e-05, "loss": 0.006, "step": 327600 }, { "epoch": 10.126414044631266, "grad_norm": 0.08748508989810944, "learning_rate": 1.4938601100327628e-05, "loss": 0.0058, "step": 327630 }, { "epoch": 10.127341287012426, "grad_norm": 0.10054431110620499, "learning_rate": 1.4938137479137048e-05, "loss": 0.0061, "step": 327660 }, { "epoch": 10.128268529393583, "grad_norm": 0.21698129177093506, "learning_rate": 1.493767385794647e-05, "loss": 0.0059, "step": 327690 }, { "epoch": 10.129195771774741, "grad_norm": 0.10274964570999146, "learning_rate": 1.493721023675589e-05, "loss": 0.0059, "step": 327720 }, { "epoch": 10.1301230141559, "grad_norm": 0.15664559602737427, "learning_rate": 1.4936746615565309e-05, "loss": 0.006, "step": 327750 }, { "epoch": 10.131050256537058, "grad_norm": 0.1552528291940689, "learning_rate": 1.4936282994374732e-05, "loss": 0.0061, "step": 327780 }, { "epoch": 10.131977498918218, "grad_norm": 0.09527264535427094, "learning_rate": 1.493581937318415e-05, "loss": 0.0051, "step": 327810 }, { "epoch": 10.132904741299376, "grad_norm": 0.1464754194021225, "learning_rate": 1.4935355751993572e-05, "loss": 0.0052, "step": 327840 }, { "epoch": 10.133831983680533, "grad_norm": 0.12157640606164932, "learning_rate": 1.4934892130802993e-05, "loss": 0.0062, "step": 327870 }, { "epoch": 10.134759226061693, "grad_norm": 0.07686987519264221, "learning_rate": 1.4934428509612413e-05, "loss": 0.0062, "step": 327900 }, { "epoch": 10.13568646844285, "grad_norm": 0.09452389180660248, "learning_rate": 1.4933964888421834e-05, "loss": 0.0056, "step": 327930 }, { "epoch": 10.13661371082401, "grad_norm": 0.0921836569905281, "learning_rate": 1.4933501267231256e-05, "loss": 0.0059, "step": 327960 }, { "epoch": 10.137540953205168, "grad_norm": 0.07059371471405029, "learning_rate": 1.4933037646040676e-05, "loss": 0.0052, "step": 327990 }, { "epoch": 10.138468195586325, "grad_norm": 0.19405654072761536, "learning_rate": 1.4932574024850097e-05, "loss": 0.0053, "step": 328020 }, { "epoch": 10.139395437967485, "grad_norm": 0.1447947919368744, "learning_rate": 1.4932110403659519e-05, "loss": 0.0057, "step": 328050 }, { "epoch": 10.140322680348643, "grad_norm": 0.09495845437049866, "learning_rate": 1.4931646782468939e-05, "loss": 0.0056, "step": 328080 }, { "epoch": 10.141249922729802, "grad_norm": 0.12079817056655884, "learning_rate": 1.493118316127836e-05, "loss": 0.0062, "step": 328110 }, { "epoch": 10.14217716511096, "grad_norm": 0.12622873485088348, "learning_rate": 1.493071954008778e-05, "loss": 0.005, "step": 328140 }, { "epoch": 10.14310440749212, "grad_norm": 0.130787193775177, "learning_rate": 1.4930255918897201e-05, "loss": 0.0061, "step": 328170 }, { "epoch": 10.144031649873277, "grad_norm": 0.10871583968400955, "learning_rate": 1.4929792297706623e-05, "loss": 0.0054, "step": 328200 }, { "epoch": 10.144958892254435, "grad_norm": 0.09397293627262115, "learning_rate": 1.4929328676516041e-05, "loss": 0.006, "step": 328230 }, { "epoch": 10.145886134635594, "grad_norm": 0.11522816121578217, "learning_rate": 1.4928865055325463e-05, "loss": 0.0056, "step": 328260 }, { "epoch": 10.146813377016752, "grad_norm": 0.1808594912290573, "learning_rate": 1.4928401434134884e-05, "loss": 0.0055, "step": 328290 }, { "epoch": 10.147740619397911, "grad_norm": 0.09562511742115021, "learning_rate": 1.4927937812944304e-05, "loss": 0.0059, "step": 328320 }, { "epoch": 10.148667861779069, "grad_norm": 0.12489814311265945, "learning_rate": 1.4927474191753725e-05, "loss": 0.0061, "step": 328350 }, { "epoch": 10.149595104160227, "grad_norm": 0.053150724619627, "learning_rate": 1.4927010570563147e-05, "loss": 0.0056, "step": 328380 }, { "epoch": 10.150522346541386, "grad_norm": 0.14305134117603302, "learning_rate": 1.4926546949372567e-05, "loss": 0.0058, "step": 328410 }, { "epoch": 10.151449588922544, "grad_norm": 0.11070796847343445, "learning_rate": 1.4926083328181988e-05, "loss": 0.0067, "step": 328440 }, { "epoch": 10.152376831303703, "grad_norm": 0.16306345164775848, "learning_rate": 1.4925619706991408e-05, "loss": 0.0064, "step": 328470 }, { "epoch": 10.153304073684861, "grad_norm": 0.12376359105110168, "learning_rate": 1.492515608580083e-05, "loss": 0.0058, "step": 328500 }, { "epoch": 10.154231316066019, "grad_norm": 0.06729422509670258, "learning_rate": 1.4924692464610251e-05, "loss": 0.0054, "step": 328530 }, { "epoch": 10.155158558447178, "grad_norm": 0.1177893802523613, "learning_rate": 1.492422884341967e-05, "loss": 0.0056, "step": 328560 }, { "epoch": 10.156085800828336, "grad_norm": 0.07125706225633621, "learning_rate": 1.4923765222229092e-05, "loss": 0.0059, "step": 328590 }, { "epoch": 10.157013043209496, "grad_norm": 0.1266871988773346, "learning_rate": 1.4923301601038514e-05, "loss": 0.0059, "step": 328620 }, { "epoch": 10.157940285590653, "grad_norm": 0.16843780875205994, "learning_rate": 1.4922837979847932e-05, "loss": 0.0053, "step": 328650 }, { "epoch": 10.158867527971811, "grad_norm": 0.09003030508756638, "learning_rate": 1.4922374358657355e-05, "loss": 0.0056, "step": 328680 }, { "epoch": 10.15979477035297, "grad_norm": 0.11020172387361526, "learning_rate": 1.4921910737466777e-05, "loss": 0.0055, "step": 328710 }, { "epoch": 10.160722012734128, "grad_norm": 0.08213711529970169, "learning_rate": 1.4921447116276195e-05, "loss": 0.0064, "step": 328740 }, { "epoch": 10.161649255115288, "grad_norm": 0.11575454473495483, "learning_rate": 1.4920983495085616e-05, "loss": 0.0062, "step": 328770 }, { "epoch": 10.162576497496445, "grad_norm": 0.09483175724744797, "learning_rate": 1.4920519873895036e-05, "loss": 0.0059, "step": 328800 }, { "epoch": 10.163503739877603, "grad_norm": 0.1268245279788971, "learning_rate": 1.4920056252704458e-05, "loss": 0.006, "step": 328830 }, { "epoch": 10.164430982258763, "grad_norm": 0.11670782417058945, "learning_rate": 1.4919592631513879e-05, "loss": 0.0058, "step": 328860 }, { "epoch": 10.16535822463992, "grad_norm": 0.12648364901542664, "learning_rate": 1.4919129010323299e-05, "loss": 0.0066, "step": 328890 }, { "epoch": 10.16628546702108, "grad_norm": 0.11084724217653275, "learning_rate": 1.491866538913272e-05, "loss": 0.0057, "step": 328920 }, { "epoch": 10.167212709402238, "grad_norm": 0.13574393093585968, "learning_rate": 1.4918201767942142e-05, "loss": 0.0061, "step": 328950 }, { "epoch": 10.168139951783397, "grad_norm": 0.10146705061197281, "learning_rate": 1.4917738146751562e-05, "loss": 0.0057, "step": 328980 }, { "epoch": 10.169067194164555, "grad_norm": 0.1752699464559555, "learning_rate": 1.4917274525560983e-05, "loss": 0.0057, "step": 329010 }, { "epoch": 10.169994436545712, "grad_norm": 0.09708791226148605, "learning_rate": 1.4916810904370405e-05, "loss": 0.006, "step": 329040 }, { "epoch": 10.170921678926872, "grad_norm": 0.14430008828639984, "learning_rate": 1.4916347283179825e-05, "loss": 0.0049, "step": 329070 }, { "epoch": 10.17184892130803, "grad_norm": 0.10099507123231888, "learning_rate": 1.4915883661989246e-05, "loss": 0.0057, "step": 329100 }, { "epoch": 10.17277616368919, "grad_norm": 0.14680762588977814, "learning_rate": 1.4915420040798664e-05, "loss": 0.0056, "step": 329130 }, { "epoch": 10.173703406070347, "grad_norm": 0.11198106408119202, "learning_rate": 1.4914956419608086e-05, "loss": 0.0067, "step": 329160 }, { "epoch": 10.174630648451505, "grad_norm": 0.08816001564264297, "learning_rate": 1.4914492798417509e-05, "loss": 0.0061, "step": 329190 }, { "epoch": 10.175557890832664, "grad_norm": 0.09992045909166336, "learning_rate": 1.4914029177226927e-05, "loss": 0.0058, "step": 329220 }, { "epoch": 10.176485133213822, "grad_norm": 0.10775204002857208, "learning_rate": 1.4913565556036349e-05, "loss": 0.0059, "step": 329250 }, { "epoch": 10.177412375594981, "grad_norm": 0.13441988825798035, "learning_rate": 1.491310193484577e-05, "loss": 0.0059, "step": 329280 }, { "epoch": 10.178339617976139, "grad_norm": 0.10649998486042023, "learning_rate": 1.491263831365519e-05, "loss": 0.0061, "step": 329310 }, { "epoch": 10.179266860357297, "grad_norm": 0.08393467217683792, "learning_rate": 1.4912174692464611e-05, "loss": 0.0055, "step": 329340 }, { "epoch": 10.180194102738456, "grad_norm": 0.10519737005233765, "learning_rate": 1.4911711071274033e-05, "loss": 0.0056, "step": 329370 }, { "epoch": 10.181121345119614, "grad_norm": 0.08947809785604477, "learning_rate": 1.4911247450083453e-05, "loss": 0.0058, "step": 329400 }, { "epoch": 10.182048587500773, "grad_norm": 0.12666556239128113, "learning_rate": 1.4910783828892874e-05, "loss": 0.006, "step": 329430 }, { "epoch": 10.182975829881931, "grad_norm": 0.25150641798973083, "learning_rate": 1.4910320207702294e-05, "loss": 0.0061, "step": 329460 }, { "epoch": 10.183903072263089, "grad_norm": 0.09427845478057861, "learning_rate": 1.4909856586511716e-05, "loss": 0.0058, "step": 329490 }, { "epoch": 10.184830314644248, "grad_norm": 0.09193207323551178, "learning_rate": 1.4909392965321137e-05, "loss": 0.0056, "step": 329520 }, { "epoch": 10.185757557025406, "grad_norm": 0.08263946324586868, "learning_rate": 1.4908929344130557e-05, "loss": 0.0061, "step": 329550 }, { "epoch": 10.186684799406565, "grad_norm": 0.15615355968475342, "learning_rate": 1.4908465722939978e-05, "loss": 0.0055, "step": 329580 }, { "epoch": 10.187612041787723, "grad_norm": 0.0801093727350235, "learning_rate": 1.49080021017494e-05, "loss": 0.0057, "step": 329610 }, { "epoch": 10.18853928416888, "grad_norm": 0.10009647160768509, "learning_rate": 1.4907538480558818e-05, "loss": 0.0055, "step": 329640 }, { "epoch": 10.18946652655004, "grad_norm": 0.10498794913291931, "learning_rate": 1.490707485936824e-05, "loss": 0.0057, "step": 329670 }, { "epoch": 10.190393768931198, "grad_norm": 0.05101839452981949, "learning_rate": 1.4906611238177661e-05, "loss": 0.0058, "step": 329700 }, { "epoch": 10.191321011312358, "grad_norm": 0.18341867625713348, "learning_rate": 1.490614761698708e-05, "loss": 0.0063, "step": 329730 }, { "epoch": 10.192248253693515, "grad_norm": 0.0810600146651268, "learning_rate": 1.4905683995796502e-05, "loss": 0.0057, "step": 329760 }, { "epoch": 10.193175496074675, "grad_norm": 0.09171345084905624, "learning_rate": 1.4905220374605924e-05, "loss": 0.0063, "step": 329790 }, { "epoch": 10.194102738455832, "grad_norm": 0.12075425684452057, "learning_rate": 1.4904756753415344e-05, "loss": 0.0058, "step": 329820 }, { "epoch": 10.19502998083699, "grad_norm": 0.08256657421588898, "learning_rate": 1.4904293132224765e-05, "loss": 0.006, "step": 329850 }, { "epoch": 10.19595722321815, "grad_norm": 0.10415039956569672, "learning_rate": 1.4903829511034185e-05, "loss": 0.0061, "step": 329880 }, { "epoch": 10.196884465599307, "grad_norm": 0.0827428326010704, "learning_rate": 1.4903365889843606e-05, "loss": 0.0059, "step": 329910 }, { "epoch": 10.197811707980467, "grad_norm": 0.1100802943110466, "learning_rate": 1.4902902268653028e-05, "loss": 0.0058, "step": 329940 }, { "epoch": 10.198738950361625, "grad_norm": 0.1124143898487091, "learning_rate": 1.4902438647462448e-05, "loss": 0.0061, "step": 329970 }, { "epoch": 10.199666192742782, "grad_norm": 0.1081220954656601, "learning_rate": 1.490197502627187e-05, "loss": 0.006, "step": 330000 }, { "epoch": 10.200593435123942, "grad_norm": 0.09000273048877716, "learning_rate": 1.490151140508129e-05, "loss": 0.006, "step": 330030 }, { "epoch": 10.2015206775051, "grad_norm": 0.10313765704631805, "learning_rate": 1.490104778389071e-05, "loss": 0.0055, "step": 330060 }, { "epoch": 10.202447919886259, "grad_norm": 0.08836860954761505, "learning_rate": 1.4900584162700132e-05, "loss": 0.0057, "step": 330090 }, { "epoch": 10.203375162267417, "grad_norm": 0.1118444874882698, "learning_rate": 1.4900120541509554e-05, "loss": 0.0055, "step": 330120 }, { "epoch": 10.204302404648574, "grad_norm": 0.07519480586051941, "learning_rate": 1.4899656920318972e-05, "loss": 0.0057, "step": 330150 }, { "epoch": 10.205229647029734, "grad_norm": 0.10847198218107224, "learning_rate": 1.4899193299128393e-05, "loss": 0.0056, "step": 330180 }, { "epoch": 10.206156889410892, "grad_norm": 0.13933148980140686, "learning_rate": 1.4898729677937813e-05, "loss": 0.0055, "step": 330210 }, { "epoch": 10.207084131792051, "grad_norm": 0.1000990942120552, "learning_rate": 1.4898266056747235e-05, "loss": 0.0055, "step": 330240 }, { "epoch": 10.208011374173209, "grad_norm": 0.13856032490730286, "learning_rate": 1.4897802435556656e-05, "loss": 0.0065, "step": 330270 }, { "epoch": 10.208938616554367, "grad_norm": 0.13900019228458405, "learning_rate": 1.4897338814366076e-05, "loss": 0.0059, "step": 330300 }, { "epoch": 10.209865858935526, "grad_norm": 0.09965118765830994, "learning_rate": 1.4896875193175497e-05, "loss": 0.0058, "step": 330330 }, { "epoch": 10.210793101316684, "grad_norm": 0.15764707326889038, "learning_rate": 1.4896411571984919e-05, "loss": 0.0056, "step": 330360 }, { "epoch": 10.211720343697843, "grad_norm": 0.1692892462015152, "learning_rate": 1.4895947950794339e-05, "loss": 0.0059, "step": 330390 }, { "epoch": 10.212647586079001, "grad_norm": 0.18646147847175598, "learning_rate": 1.489548432960376e-05, "loss": 0.0058, "step": 330420 }, { "epoch": 10.213574828460159, "grad_norm": 0.07070749998092651, "learning_rate": 1.4895020708413182e-05, "loss": 0.0055, "step": 330450 }, { "epoch": 10.214502070841318, "grad_norm": 0.0823315754532814, "learning_rate": 1.4894557087222601e-05, "loss": 0.0059, "step": 330480 }, { "epoch": 10.215429313222476, "grad_norm": 0.1150406002998352, "learning_rate": 1.4894093466032023e-05, "loss": 0.006, "step": 330510 }, { "epoch": 10.216356555603635, "grad_norm": 0.08133707195520401, "learning_rate": 1.4893629844841441e-05, "loss": 0.0058, "step": 330540 }, { "epoch": 10.217283797984793, "grad_norm": 0.15077437460422516, "learning_rate": 1.4893166223650863e-05, "loss": 0.0054, "step": 330570 }, { "epoch": 10.218211040365953, "grad_norm": 0.14398573338985443, "learning_rate": 1.4892702602460286e-05, "loss": 0.006, "step": 330600 }, { "epoch": 10.21913828274711, "grad_norm": 0.12986956536769867, "learning_rate": 1.4892238981269704e-05, "loss": 0.0057, "step": 330630 }, { "epoch": 10.220065525128268, "grad_norm": 0.12861691415309906, "learning_rate": 1.4891775360079125e-05, "loss": 0.0056, "step": 330660 }, { "epoch": 10.220992767509427, "grad_norm": 0.12310519069433212, "learning_rate": 1.4891311738888547e-05, "loss": 0.0054, "step": 330690 }, { "epoch": 10.221920009890585, "grad_norm": 0.19096891582012177, "learning_rate": 1.4890848117697967e-05, "loss": 0.0062, "step": 330720 }, { "epoch": 10.222847252271745, "grad_norm": 0.10668204724788666, "learning_rate": 1.4890384496507388e-05, "loss": 0.0063, "step": 330750 }, { "epoch": 10.223774494652902, "grad_norm": 0.09001872688531876, "learning_rate": 1.488992087531681e-05, "loss": 0.0057, "step": 330780 }, { "epoch": 10.22470173703406, "grad_norm": 0.042570214718580246, "learning_rate": 1.488945725412623e-05, "loss": 0.0056, "step": 330810 }, { "epoch": 10.22562897941522, "grad_norm": 0.14823904633522034, "learning_rate": 1.4888993632935651e-05, "loss": 0.0062, "step": 330840 }, { "epoch": 10.226556221796377, "grad_norm": 0.07016802579164505, "learning_rate": 1.4888530011745071e-05, "loss": 0.006, "step": 330870 }, { "epoch": 10.227483464177537, "grad_norm": 0.08949742466211319, "learning_rate": 1.4888066390554492e-05, "loss": 0.0056, "step": 330900 }, { "epoch": 10.228410706558694, "grad_norm": 0.13540197908878326, "learning_rate": 1.4887602769363914e-05, "loss": 0.0058, "step": 330930 }, { "epoch": 10.229337948939852, "grad_norm": 0.15066121518611908, "learning_rate": 1.4887139148173334e-05, "loss": 0.0052, "step": 330960 }, { "epoch": 10.230265191321012, "grad_norm": 0.10322137176990509, "learning_rate": 1.4886675526982755e-05, "loss": 0.0059, "step": 330990 }, { "epoch": 10.23119243370217, "grad_norm": 0.09221568703651428, "learning_rate": 1.4886211905792177e-05, "loss": 0.0062, "step": 331020 }, { "epoch": 10.232119676083329, "grad_norm": 0.08923430740833282, "learning_rate": 1.4885748284601595e-05, "loss": 0.0052, "step": 331050 }, { "epoch": 10.233046918464487, "grad_norm": 0.14461582899093628, "learning_rate": 1.4885284663411016e-05, "loss": 0.0056, "step": 331080 }, { "epoch": 10.233974160845644, "grad_norm": 0.12476811558008194, "learning_rate": 1.4884821042220438e-05, "loss": 0.0064, "step": 331110 }, { "epoch": 10.234901403226804, "grad_norm": 0.10245249420404434, "learning_rate": 1.4884357421029858e-05, "loss": 0.0064, "step": 331140 }, { "epoch": 10.235828645607961, "grad_norm": 0.15326577425003052, "learning_rate": 1.488389379983928e-05, "loss": 0.0057, "step": 331170 }, { "epoch": 10.236755887989121, "grad_norm": 0.110161691904068, "learning_rate": 1.4883430178648699e-05, "loss": 0.0056, "step": 331200 }, { "epoch": 10.237683130370279, "grad_norm": 0.13376684486865997, "learning_rate": 1.488296655745812e-05, "loss": 0.0059, "step": 331230 }, { "epoch": 10.238610372751436, "grad_norm": 0.10195092111825943, "learning_rate": 1.4882502936267542e-05, "loss": 0.0058, "step": 331260 }, { "epoch": 10.239537615132596, "grad_norm": 0.091284841299057, "learning_rate": 1.4882039315076962e-05, "loss": 0.0061, "step": 331290 }, { "epoch": 10.240464857513754, "grad_norm": 0.09329145401716232, "learning_rate": 1.4881575693886383e-05, "loss": 0.0065, "step": 331320 }, { "epoch": 10.241392099894913, "grad_norm": 0.07200286537408829, "learning_rate": 1.4881112072695805e-05, "loss": 0.0056, "step": 331350 }, { "epoch": 10.24231934227607, "grad_norm": 0.08702981472015381, "learning_rate": 1.4880648451505225e-05, "loss": 0.0058, "step": 331380 }, { "epoch": 10.24324658465723, "grad_norm": 0.09969955682754517, "learning_rate": 1.4880184830314646e-05, "loss": 0.0067, "step": 331410 }, { "epoch": 10.244173827038388, "grad_norm": 0.12053363025188446, "learning_rate": 1.4879721209124068e-05, "loss": 0.0059, "step": 331440 }, { "epoch": 10.245101069419546, "grad_norm": 0.09111768007278442, "learning_rate": 1.4879257587933487e-05, "loss": 0.0061, "step": 331470 }, { "epoch": 10.246028311800705, "grad_norm": 0.1473090648651123, "learning_rate": 1.4878793966742909e-05, "loss": 0.0057, "step": 331500 }, { "epoch": 10.246955554181863, "grad_norm": 0.13575975596904755, "learning_rate": 1.4878330345552327e-05, "loss": 0.0063, "step": 331530 }, { "epoch": 10.247882796563022, "grad_norm": 0.08900360763072968, "learning_rate": 1.4877866724361749e-05, "loss": 0.0056, "step": 331560 }, { "epoch": 10.24881003894418, "grad_norm": 0.1577635556459427, "learning_rate": 1.487740310317117e-05, "loss": 0.0063, "step": 331590 }, { "epoch": 10.249737281325338, "grad_norm": 0.09476929903030396, "learning_rate": 1.487693948198059e-05, "loss": 0.0061, "step": 331620 }, { "epoch": 10.250664523706497, "grad_norm": 0.06402164697647095, "learning_rate": 1.4876475860790011e-05, "loss": 0.0053, "step": 331650 }, { "epoch": 10.251591766087655, "grad_norm": 0.10177943855524063, "learning_rate": 1.4876012239599433e-05, "loss": 0.0054, "step": 331680 }, { "epoch": 10.252519008468814, "grad_norm": 0.13757124543190002, "learning_rate": 1.4875548618408853e-05, "loss": 0.0059, "step": 331710 }, { "epoch": 10.253446250849972, "grad_norm": 0.11631684005260468, "learning_rate": 1.4875084997218274e-05, "loss": 0.0063, "step": 331740 }, { "epoch": 10.25437349323113, "grad_norm": 0.11512842029333115, "learning_rate": 1.4874621376027696e-05, "loss": 0.006, "step": 331770 }, { "epoch": 10.25530073561229, "grad_norm": 0.08187733590602875, "learning_rate": 1.4874157754837116e-05, "loss": 0.0057, "step": 331800 }, { "epoch": 10.256227977993447, "grad_norm": 0.12710776925086975, "learning_rate": 1.4873694133646537e-05, "loss": 0.0057, "step": 331830 }, { "epoch": 10.257155220374607, "grad_norm": 0.13585832715034485, "learning_rate": 1.4873230512455957e-05, "loss": 0.006, "step": 331860 }, { "epoch": 10.258082462755764, "grad_norm": 0.09369703382253647, "learning_rate": 1.4872766891265378e-05, "loss": 0.0053, "step": 331890 }, { "epoch": 10.259009705136922, "grad_norm": 0.15133912861347198, "learning_rate": 1.48723032700748e-05, "loss": 0.0061, "step": 331920 }, { "epoch": 10.259936947518081, "grad_norm": 0.14975713193416595, "learning_rate": 1.4871839648884218e-05, "loss": 0.0059, "step": 331950 }, { "epoch": 10.26086418989924, "grad_norm": 0.10679326951503754, "learning_rate": 1.487137602769364e-05, "loss": 0.0049, "step": 331980 }, { "epoch": 10.261791432280399, "grad_norm": 0.10168779641389847, "learning_rate": 1.4870912406503063e-05, "loss": 0.0058, "step": 332010 }, { "epoch": 10.262718674661556, "grad_norm": 0.10676193982362747, "learning_rate": 1.487044878531248e-05, "loss": 0.006, "step": 332040 }, { "epoch": 10.263645917042714, "grad_norm": 0.11338899284601212, "learning_rate": 1.4869985164121902e-05, "loss": 0.0061, "step": 332070 }, { "epoch": 10.264573159423874, "grad_norm": 0.09191329032182693, "learning_rate": 1.4869521542931324e-05, "loss": 0.006, "step": 332100 }, { "epoch": 10.265500401805031, "grad_norm": 0.14367665350437164, "learning_rate": 1.4869057921740744e-05, "loss": 0.0056, "step": 332130 }, { "epoch": 10.26642764418619, "grad_norm": 0.08435381203889847, "learning_rate": 1.4868594300550165e-05, "loss": 0.0058, "step": 332160 }, { "epoch": 10.267354886567349, "grad_norm": 0.1333811730146408, "learning_rate": 1.4868130679359585e-05, "loss": 0.0051, "step": 332190 }, { "epoch": 10.268282128948506, "grad_norm": 0.1234104335308075, "learning_rate": 1.4867667058169006e-05, "loss": 0.0064, "step": 332220 }, { "epoch": 10.269209371329666, "grad_norm": 0.08450733125209808, "learning_rate": 1.4867203436978428e-05, "loss": 0.006, "step": 332250 }, { "epoch": 10.270136613710823, "grad_norm": 0.15721476078033447, "learning_rate": 1.4866739815787848e-05, "loss": 0.0064, "step": 332280 }, { "epoch": 10.271063856091983, "grad_norm": 0.21456970274448395, "learning_rate": 1.486627619459727e-05, "loss": 0.0055, "step": 332310 }, { "epoch": 10.27199109847314, "grad_norm": 0.04217292740941048, "learning_rate": 1.486581257340669e-05, "loss": 0.005, "step": 332340 }, { "epoch": 10.2729183408543, "grad_norm": 0.1407923549413681, "learning_rate": 1.486534895221611e-05, "loss": 0.0054, "step": 332370 }, { "epoch": 10.273845583235458, "grad_norm": 0.08160172402858734, "learning_rate": 1.4864885331025532e-05, "loss": 0.0054, "step": 332400 }, { "epoch": 10.274772825616616, "grad_norm": 0.1375701129436493, "learning_rate": 1.4864437163874638e-05, "loss": 0.0054, "step": 332430 }, { "epoch": 10.275700067997775, "grad_norm": 0.09586600959300995, "learning_rate": 1.4863973542684058e-05, "loss": 0.0058, "step": 332460 }, { "epoch": 10.276627310378933, "grad_norm": 0.14783349633216858, "learning_rate": 1.486350992149348e-05, "loss": 0.0062, "step": 332490 }, { "epoch": 10.277554552760092, "grad_norm": 0.11278371512889862, "learning_rate": 1.4863046300302901e-05, "loss": 0.0059, "step": 332520 }, { "epoch": 10.27848179514125, "grad_norm": 0.12350430339574814, "learning_rate": 1.4862582679112321e-05, "loss": 0.0052, "step": 332550 }, { "epoch": 10.279409037522408, "grad_norm": 0.14170563220977783, "learning_rate": 1.4862119057921742e-05, "loss": 0.0057, "step": 332580 }, { "epoch": 10.280336279903567, "grad_norm": 0.10125120729207993, "learning_rate": 1.4861655436731164e-05, "loss": 0.006, "step": 332610 }, { "epoch": 10.281263522284725, "grad_norm": 0.10133837163448334, "learning_rate": 1.4861191815540584e-05, "loss": 0.0061, "step": 332640 }, { "epoch": 10.282190764665884, "grad_norm": 0.10120386630296707, "learning_rate": 1.4860728194350005e-05, "loss": 0.0056, "step": 332670 }, { "epoch": 10.283118007047042, "grad_norm": 0.07938118278980255, "learning_rate": 1.4860264573159423e-05, "loss": 0.0061, "step": 332700 }, { "epoch": 10.2840452494282, "grad_norm": 0.1441390961408615, "learning_rate": 1.4859800951968845e-05, "loss": 0.0057, "step": 332730 }, { "epoch": 10.28497249180936, "grad_norm": 0.0841561108827591, "learning_rate": 1.4859337330778268e-05, "loss": 0.0059, "step": 332760 }, { "epoch": 10.285899734190517, "grad_norm": 0.076248899102211, "learning_rate": 1.4858873709587686e-05, "loss": 0.0055, "step": 332790 }, { "epoch": 10.286826976571676, "grad_norm": 0.06648476421833038, "learning_rate": 1.4858410088397108e-05, "loss": 0.0054, "step": 332820 }, { "epoch": 10.287754218952834, "grad_norm": 0.09026723355054855, "learning_rate": 1.485794646720653e-05, "loss": 0.006, "step": 332850 }, { "epoch": 10.288681461333992, "grad_norm": 0.11775702238082886, "learning_rate": 1.4857482846015949e-05, "loss": 0.0059, "step": 332880 }, { "epoch": 10.289608703715151, "grad_norm": 0.1260657012462616, "learning_rate": 1.485701922482537e-05, "loss": 0.006, "step": 332910 }, { "epoch": 10.290535946096309, "grad_norm": 0.10195168107748032, "learning_rate": 1.4856555603634792e-05, "loss": 0.0063, "step": 332940 }, { "epoch": 10.291463188477469, "grad_norm": 0.13004106283187866, "learning_rate": 1.4856091982444212e-05, "loss": 0.0059, "step": 332970 }, { "epoch": 10.292390430858626, "grad_norm": 0.0816667377948761, "learning_rate": 1.4855628361253633e-05, "loss": 0.0058, "step": 333000 }, { "epoch": 10.293317673239784, "grad_norm": 0.20114180445671082, "learning_rate": 1.4855164740063053e-05, "loss": 0.0053, "step": 333030 }, { "epoch": 10.294244915620943, "grad_norm": 0.11353465914726257, "learning_rate": 1.4854701118872475e-05, "loss": 0.0065, "step": 333060 }, { "epoch": 10.295172158002101, "grad_norm": 0.11125514656305313, "learning_rate": 1.4854237497681896e-05, "loss": 0.0063, "step": 333090 }, { "epoch": 10.29609940038326, "grad_norm": 0.08572777360677719, "learning_rate": 1.4853789330531e-05, "loss": 0.0069, "step": 333120 }, { "epoch": 10.297026642764418, "grad_norm": 0.10343874990940094, "learning_rate": 1.4853325709340422e-05, "loss": 0.0057, "step": 333150 }, { "epoch": 10.297953885145578, "grad_norm": 0.14818672835826874, "learning_rate": 1.4852862088149844e-05, "loss": 0.0059, "step": 333180 }, { "epoch": 10.298881127526736, "grad_norm": 0.10952107608318329, "learning_rate": 1.4852398466959264e-05, "loss": 0.0058, "step": 333210 }, { "epoch": 10.299808369907893, "grad_norm": 0.11092336475849152, "learning_rate": 1.4851934845768685e-05, "loss": 0.0061, "step": 333240 }, { "epoch": 10.300735612289053, "grad_norm": 0.07053566724061966, "learning_rate": 1.4851471224578107e-05, "loss": 0.0057, "step": 333270 }, { "epoch": 10.30166285467021, "grad_norm": 0.06396627426147461, "learning_rate": 1.4851007603387526e-05, "loss": 0.0058, "step": 333300 }, { "epoch": 10.30259009705137, "grad_norm": 0.1347496658563614, "learning_rate": 1.4850543982196948e-05, "loss": 0.0058, "step": 333330 }, { "epoch": 10.303517339432528, "grad_norm": 0.11149008572101593, "learning_rate": 1.485008036100637e-05, "loss": 0.0057, "step": 333360 }, { "epoch": 10.304444581813685, "grad_norm": 0.07110236585140228, "learning_rate": 1.484961673981579e-05, "loss": 0.0063, "step": 333390 }, { "epoch": 10.305371824194845, "grad_norm": 0.1417757272720337, "learning_rate": 1.484915311862521e-05, "loss": 0.006, "step": 333420 }, { "epoch": 10.306299066576003, "grad_norm": 0.1432322859764099, "learning_rate": 1.4848689497434629e-05, "loss": 0.006, "step": 333450 }, { "epoch": 10.307226308957162, "grad_norm": 0.10530026257038116, "learning_rate": 1.4848225876244052e-05, "loss": 0.0055, "step": 333480 }, { "epoch": 10.30815355133832, "grad_norm": 0.0986294373869896, "learning_rate": 1.4847762255053474e-05, "loss": 0.0058, "step": 333510 }, { "epoch": 10.309080793719477, "grad_norm": 0.1178860291838646, "learning_rate": 1.4847298633862892e-05, "loss": 0.0052, "step": 333540 }, { "epoch": 10.310008036100637, "grad_norm": 0.1580386757850647, "learning_rate": 1.4846835012672313e-05, "loss": 0.0056, "step": 333570 }, { "epoch": 10.310935278481795, "grad_norm": 0.1050214096903801, "learning_rate": 1.4846371391481735e-05, "loss": 0.006, "step": 333600 }, { "epoch": 10.311862520862954, "grad_norm": 0.10902804136276245, "learning_rate": 1.4845907770291154e-05, "loss": 0.005, "step": 333630 }, { "epoch": 10.312789763244112, "grad_norm": 0.12714329361915588, "learning_rate": 1.4845444149100576e-05, "loss": 0.0057, "step": 333660 }, { "epoch": 10.31371700562527, "grad_norm": 0.14622469246387482, "learning_rate": 1.4844980527909998e-05, "loss": 0.0057, "step": 333690 }, { "epoch": 10.314644248006429, "grad_norm": 0.07358163595199585, "learning_rate": 1.4844516906719417e-05, "loss": 0.0052, "step": 333720 }, { "epoch": 10.315571490387587, "grad_norm": 0.10269606858491898, "learning_rate": 1.4844053285528839e-05, "loss": 0.006, "step": 333750 }, { "epoch": 10.316498732768746, "grad_norm": 0.08008749783039093, "learning_rate": 1.4843589664338259e-05, "loss": 0.0051, "step": 333780 }, { "epoch": 10.317425975149904, "grad_norm": 0.16211389005184174, "learning_rate": 1.484312604314768e-05, "loss": 0.0057, "step": 333810 }, { "epoch": 10.318353217531062, "grad_norm": 0.10397222638130188, "learning_rate": 1.4842662421957102e-05, "loss": 0.0061, "step": 333840 }, { "epoch": 10.319280459912221, "grad_norm": 0.10114403069019318, "learning_rate": 1.4842198800766521e-05, "loss": 0.0059, "step": 333870 }, { "epoch": 10.320207702293379, "grad_norm": 0.10721360146999359, "learning_rate": 1.4841735179575943e-05, "loss": 0.0066, "step": 333900 }, { "epoch": 10.321134944674538, "grad_norm": 0.09718959033489227, "learning_rate": 1.4841271558385364e-05, "loss": 0.0056, "step": 333930 }, { "epoch": 10.322062187055696, "grad_norm": 0.048942070454359055, "learning_rate": 1.4840807937194783e-05, "loss": 0.0055, "step": 333960 }, { "epoch": 10.322989429436856, "grad_norm": 0.10803002864122391, "learning_rate": 1.4840344316004204e-05, "loss": 0.0064, "step": 333990 }, { "epoch": 10.323916671818013, "grad_norm": 0.12646670639514923, "learning_rate": 1.4839880694813627e-05, "loss": 0.0061, "step": 334020 }, { "epoch": 10.324843914199171, "grad_norm": 0.06128557026386261, "learning_rate": 1.4839417073623045e-05, "loss": 0.0055, "step": 334050 }, { "epoch": 10.32577115658033, "grad_norm": 0.06413404643535614, "learning_rate": 1.4838953452432467e-05, "loss": 0.006, "step": 334080 }, { "epoch": 10.326698398961488, "grad_norm": 0.12038882076740265, "learning_rate": 1.4838489831241887e-05, "loss": 0.0055, "step": 334110 }, { "epoch": 10.327625641342648, "grad_norm": 0.14180177450180054, "learning_rate": 1.4838026210051308e-05, "loss": 0.0059, "step": 334140 }, { "epoch": 10.328552883723805, "grad_norm": 0.13109461963176727, "learning_rate": 1.483756258886073e-05, "loss": 0.0063, "step": 334170 }, { "epoch": 10.329480126104963, "grad_norm": 0.09203170239925385, "learning_rate": 1.483709896767015e-05, "loss": 0.0063, "step": 334200 }, { "epoch": 10.330407368486123, "grad_norm": 0.10414082556962967, "learning_rate": 1.4836635346479571e-05, "loss": 0.006, "step": 334230 }, { "epoch": 10.33133461086728, "grad_norm": 0.0840519443154335, "learning_rate": 1.4836171725288993e-05, "loss": 0.0059, "step": 334260 }, { "epoch": 10.33226185324844, "grad_norm": 0.13210256397724152, "learning_rate": 1.4835708104098412e-05, "loss": 0.0053, "step": 334290 }, { "epoch": 10.333189095629598, "grad_norm": 0.10874934494495392, "learning_rate": 1.4835244482907834e-05, "loss": 0.0064, "step": 334320 }, { "epoch": 10.334116338010755, "grad_norm": 0.10183868557214737, "learning_rate": 1.4834780861717255e-05, "loss": 0.0065, "step": 334350 }, { "epoch": 10.335043580391915, "grad_norm": 0.10437451303005219, "learning_rate": 1.4834317240526675e-05, "loss": 0.0057, "step": 334380 }, { "epoch": 10.335970822773072, "grad_norm": 0.15302711725234985, "learning_rate": 1.4833853619336097e-05, "loss": 0.0068, "step": 334410 }, { "epoch": 10.336898065154232, "grad_norm": 0.07604050636291504, "learning_rate": 1.4833389998145515e-05, "loss": 0.006, "step": 334440 }, { "epoch": 10.33782530753539, "grad_norm": 0.17280514538288116, "learning_rate": 1.4832926376954936e-05, "loss": 0.0064, "step": 334470 }, { "epoch": 10.338752549916547, "grad_norm": 0.08697998523712158, "learning_rate": 1.4832462755764358e-05, "loss": 0.0058, "step": 334500 }, { "epoch": 10.339679792297707, "grad_norm": 0.13755056262016296, "learning_rate": 1.4831999134573778e-05, "loss": 0.0062, "step": 334530 }, { "epoch": 10.340607034678865, "grad_norm": 0.09709110856056213, "learning_rate": 1.48315355133832e-05, "loss": 0.0064, "step": 334560 }, { "epoch": 10.341534277060024, "grad_norm": 0.1359110176563263, "learning_rate": 1.483107189219262e-05, "loss": 0.0058, "step": 334590 }, { "epoch": 10.342461519441182, "grad_norm": 0.1274619698524475, "learning_rate": 1.483060827100204e-05, "loss": 0.0063, "step": 334620 }, { "epoch": 10.34338876182234, "grad_norm": 0.10827012360095978, "learning_rate": 1.4830144649811462e-05, "loss": 0.0055, "step": 334650 }, { "epoch": 10.344316004203499, "grad_norm": 0.13399803638458252, "learning_rate": 1.4829681028620883e-05, "loss": 0.0062, "step": 334680 }, { "epoch": 10.345243246584657, "grad_norm": 0.17450633645057678, "learning_rate": 1.4829217407430303e-05, "loss": 0.0063, "step": 334710 }, { "epoch": 10.346170488965816, "grad_norm": 0.15704727172851562, "learning_rate": 1.4828753786239725e-05, "loss": 0.0059, "step": 334740 }, { "epoch": 10.347097731346974, "grad_norm": 0.11823378503322601, "learning_rate": 1.4828290165049145e-05, "loss": 0.0055, "step": 334770 }, { "epoch": 10.348024973728133, "grad_norm": 0.17545832693576813, "learning_rate": 1.4827826543858566e-05, "loss": 0.0054, "step": 334800 }, { "epoch": 10.348952216109291, "grad_norm": 0.11341787874698639, "learning_rate": 1.4827362922667988e-05, "loss": 0.0058, "step": 334830 }, { "epoch": 10.349879458490449, "grad_norm": 0.10303802043199539, "learning_rate": 1.4826899301477406e-05, "loss": 0.0061, "step": 334860 }, { "epoch": 10.350806700871608, "grad_norm": 0.09880268573760986, "learning_rate": 1.4826435680286829e-05, "loss": 0.0056, "step": 334890 }, { "epoch": 10.351733943252766, "grad_norm": 0.07856739312410355, "learning_rate": 1.482597205909625e-05, "loss": 0.0059, "step": 334920 }, { "epoch": 10.352661185633925, "grad_norm": 0.1339263767004013, "learning_rate": 1.4825508437905669e-05, "loss": 0.0057, "step": 334950 }, { "epoch": 10.353588428015083, "grad_norm": 0.08583686500787735, "learning_rate": 1.482504481671509e-05, "loss": 0.0057, "step": 334980 }, { "epoch": 10.35451567039624, "grad_norm": 0.12505848705768585, "learning_rate": 1.4824581195524512e-05, "loss": 0.006, "step": 335010 }, { "epoch": 10.3554429127774, "grad_norm": 0.14027775824069977, "learning_rate": 1.4824117574333931e-05, "loss": 0.006, "step": 335040 }, { "epoch": 10.356370155158558, "grad_norm": 0.11115894466638565, "learning_rate": 1.4823653953143353e-05, "loss": 0.006, "step": 335070 }, { "epoch": 10.357297397539718, "grad_norm": 0.10361188650131226, "learning_rate": 1.4823190331952773e-05, "loss": 0.0058, "step": 335100 }, { "epoch": 10.358224639920875, "grad_norm": 0.07651394605636597, "learning_rate": 1.4822726710762194e-05, "loss": 0.0059, "step": 335130 }, { "epoch": 10.359151882302033, "grad_norm": 0.11040233075618744, "learning_rate": 1.4822263089571616e-05, "loss": 0.006, "step": 335160 }, { "epoch": 10.360079124683192, "grad_norm": 0.11825746297836304, "learning_rate": 1.4821799468381036e-05, "loss": 0.006, "step": 335190 }, { "epoch": 10.36100636706435, "grad_norm": 0.11853298544883728, "learning_rate": 1.4821335847190457e-05, "loss": 0.0065, "step": 335220 }, { "epoch": 10.36193360944551, "grad_norm": 0.11547596007585526, "learning_rate": 1.4820872225999879e-05, "loss": 0.0056, "step": 335250 }, { "epoch": 10.362860851826667, "grad_norm": 0.08045581728219986, "learning_rate": 1.4820408604809298e-05, "loss": 0.0067, "step": 335280 }, { "epoch": 10.363788094207825, "grad_norm": 0.10233037918806076, "learning_rate": 1.481994498361872e-05, "loss": 0.0058, "step": 335310 }, { "epoch": 10.364715336588985, "grad_norm": 0.10600295662879944, "learning_rate": 1.4819481362428141e-05, "loss": 0.006, "step": 335340 }, { "epoch": 10.365642578970142, "grad_norm": 0.14120155572891235, "learning_rate": 1.481901774123756e-05, "loss": 0.0064, "step": 335370 }, { "epoch": 10.366569821351302, "grad_norm": 0.09022247791290283, "learning_rate": 1.4818554120046981e-05, "loss": 0.0057, "step": 335400 }, { "epoch": 10.36749706373246, "grad_norm": 0.10772662609815598, "learning_rate": 1.4818090498856404e-05, "loss": 0.0061, "step": 335430 }, { "epoch": 10.368424306113617, "grad_norm": 0.09402233362197876, "learning_rate": 1.4817626877665822e-05, "loss": 0.0056, "step": 335460 }, { "epoch": 10.369351548494777, "grad_norm": 0.1072973757982254, "learning_rate": 1.4817163256475244e-05, "loss": 0.0055, "step": 335490 }, { "epoch": 10.370278790875934, "grad_norm": 0.119561105966568, "learning_rate": 1.4816699635284664e-05, "loss": 0.0057, "step": 335520 }, { "epoch": 10.371206033257094, "grad_norm": 0.15803158283233643, "learning_rate": 1.4816236014094085e-05, "loss": 0.0057, "step": 335550 }, { "epoch": 10.372133275638252, "grad_norm": 0.12107343971729279, "learning_rate": 1.4815772392903507e-05, "loss": 0.0065, "step": 335580 }, { "epoch": 10.373060518019411, "grad_norm": 0.1084141656756401, "learning_rate": 1.4815308771712926e-05, "loss": 0.0064, "step": 335610 }, { "epoch": 10.373987760400569, "grad_norm": 0.12067589163780212, "learning_rate": 1.4814845150522348e-05, "loss": 0.0062, "step": 335640 }, { "epoch": 10.374915002781727, "grad_norm": 0.08020944148302078, "learning_rate": 1.481438152933177e-05, "loss": 0.0062, "step": 335670 }, { "epoch": 10.375842245162886, "grad_norm": 0.09844204783439636, "learning_rate": 1.481391790814119e-05, "loss": 0.0061, "step": 335700 }, { "epoch": 10.376769487544044, "grad_norm": 0.10079088807106018, "learning_rate": 1.481345428695061e-05, "loss": 0.0062, "step": 335730 }, { "epoch": 10.377696729925203, "grad_norm": 0.06331286579370499, "learning_rate": 1.4812990665760032e-05, "loss": 0.0052, "step": 335760 }, { "epoch": 10.378623972306361, "grad_norm": 0.08064138889312744, "learning_rate": 1.4812527044569452e-05, "loss": 0.006, "step": 335790 }, { "epoch": 10.379551214687519, "grad_norm": 0.07877814769744873, "learning_rate": 1.4812063423378874e-05, "loss": 0.0059, "step": 335820 }, { "epoch": 10.380478457068678, "grad_norm": 0.09224538505077362, "learning_rate": 1.4811599802188292e-05, "loss": 0.0052, "step": 335850 }, { "epoch": 10.381405699449836, "grad_norm": 0.09432051330804825, "learning_rate": 1.4811136180997713e-05, "loss": 0.0057, "step": 335880 }, { "epoch": 10.382332941830995, "grad_norm": 0.0937696248292923, "learning_rate": 1.4810672559807135e-05, "loss": 0.0059, "step": 335910 }, { "epoch": 10.383260184212153, "grad_norm": 0.10144583135843277, "learning_rate": 1.4810208938616555e-05, "loss": 0.0063, "step": 335940 }, { "epoch": 10.38418742659331, "grad_norm": 0.1140723004937172, "learning_rate": 1.4809745317425976e-05, "loss": 0.0056, "step": 335970 }, { "epoch": 10.38511466897447, "grad_norm": 0.11623179912567139, "learning_rate": 1.4809281696235398e-05, "loss": 0.006, "step": 336000 }, { "epoch": 10.386041911355628, "grad_norm": 0.11926453560590744, "learning_rate": 1.4808818075044817e-05, "loss": 0.0054, "step": 336030 }, { "epoch": 10.386969153736787, "grad_norm": 0.1336412876844406, "learning_rate": 1.4808354453854239e-05, "loss": 0.0057, "step": 336060 }, { "epoch": 10.387896396117945, "grad_norm": 0.08865708857774734, "learning_rate": 1.480789083266366e-05, "loss": 0.0063, "step": 336090 }, { "epoch": 10.388823638499103, "grad_norm": 0.08224453777074814, "learning_rate": 1.480742721147308e-05, "loss": 0.0055, "step": 336120 }, { "epoch": 10.389750880880262, "grad_norm": 0.11406426131725311, "learning_rate": 1.4806963590282502e-05, "loss": 0.0057, "step": 336150 }, { "epoch": 10.39067812326142, "grad_norm": 0.13242650032043457, "learning_rate": 1.4806499969091922e-05, "loss": 0.0063, "step": 336180 }, { "epoch": 10.39160536564258, "grad_norm": 0.10675380378961563, "learning_rate": 1.4806036347901343e-05, "loss": 0.0056, "step": 336210 }, { "epoch": 10.392532608023737, "grad_norm": 0.11954480409622192, "learning_rate": 1.4805572726710765e-05, "loss": 0.0062, "step": 336240 }, { "epoch": 10.393459850404895, "grad_norm": 0.13395579159259796, "learning_rate": 1.4805109105520183e-05, "loss": 0.0062, "step": 336270 }, { "epoch": 10.394387092786054, "grad_norm": 0.10222529619932175, "learning_rate": 1.4804645484329606e-05, "loss": 0.0061, "step": 336300 }, { "epoch": 10.395314335167212, "grad_norm": 0.09054212272167206, "learning_rate": 1.4804181863139027e-05, "loss": 0.0057, "step": 336330 }, { "epoch": 10.396241577548372, "grad_norm": 0.12210255861282349, "learning_rate": 1.4803718241948445e-05, "loss": 0.0059, "step": 336360 }, { "epoch": 10.39716881992953, "grad_norm": 0.1335752010345459, "learning_rate": 1.4803254620757867e-05, "loss": 0.0064, "step": 336390 }, { "epoch": 10.398096062310689, "grad_norm": 0.1205308809876442, "learning_rate": 1.4802790999567288e-05, "loss": 0.0059, "step": 336420 }, { "epoch": 10.399023304691847, "grad_norm": 0.1203135997056961, "learning_rate": 1.4802327378376708e-05, "loss": 0.006, "step": 336450 }, { "epoch": 10.399950547073004, "grad_norm": 0.13508203625679016, "learning_rate": 1.480186375718613e-05, "loss": 0.0057, "step": 336480 }, { "epoch": 10.400877789454164, "grad_norm": 0.11142463982105255, "learning_rate": 1.480140013599555e-05, "loss": 0.0062, "step": 336510 }, { "epoch": 10.401805031835321, "grad_norm": 0.08890826255083084, "learning_rate": 1.4800936514804971e-05, "loss": 0.0058, "step": 336540 }, { "epoch": 10.402732274216481, "grad_norm": 0.10001295059919357, "learning_rate": 1.4800472893614393e-05, "loss": 0.0053, "step": 336570 }, { "epoch": 10.403659516597639, "grad_norm": 0.10140860080718994, "learning_rate": 1.4800009272423812e-05, "loss": 0.006, "step": 336600 }, { "epoch": 10.404586758978796, "grad_norm": 0.12738968431949615, "learning_rate": 1.4799561105272919e-05, "loss": 0.0058, "step": 336630 }, { "epoch": 10.405514001359956, "grad_norm": 0.1167810708284378, "learning_rate": 1.479909748408234e-05, "loss": 0.0059, "step": 336660 }, { "epoch": 10.406441243741114, "grad_norm": 0.07738529145717621, "learning_rate": 1.479863386289176e-05, "loss": 0.0062, "step": 336690 }, { "epoch": 10.407368486122273, "grad_norm": 0.08426977694034576, "learning_rate": 1.4798170241701181e-05, "loss": 0.006, "step": 336720 }, { "epoch": 10.40829572850343, "grad_norm": 0.08960714191198349, "learning_rate": 1.4797706620510603e-05, "loss": 0.0052, "step": 336750 }, { "epoch": 10.409222970884588, "grad_norm": 0.1285729706287384, "learning_rate": 1.4797242999320023e-05, "loss": 0.0056, "step": 336780 }, { "epoch": 10.410150213265748, "grad_norm": 0.09468922019004822, "learning_rate": 1.4796779378129444e-05, "loss": 0.0052, "step": 336810 }, { "epoch": 10.411077455646906, "grad_norm": 0.14227737486362457, "learning_rate": 1.4796315756938866e-05, "loss": 0.0062, "step": 336840 }, { "epoch": 10.412004698028065, "grad_norm": 0.09013182669878006, "learning_rate": 1.4795852135748286e-05, "loss": 0.0057, "step": 336870 }, { "epoch": 10.412931940409223, "grad_norm": 0.0963730737566948, "learning_rate": 1.4795388514557707e-05, "loss": 0.006, "step": 336900 }, { "epoch": 10.41385918279038, "grad_norm": 0.13302351534366608, "learning_rate": 1.4794924893367127e-05, "loss": 0.0056, "step": 336930 }, { "epoch": 10.41478642517154, "grad_norm": 0.10144738107919693, "learning_rate": 1.4794461272176548e-05, "loss": 0.0058, "step": 336960 }, { "epoch": 10.415713667552698, "grad_norm": 0.10984387248754501, "learning_rate": 1.479399765098597e-05, "loss": 0.0064, "step": 336990 }, { "epoch": 10.416640909933857, "grad_norm": 0.1434950977563858, "learning_rate": 1.4793534029795388e-05, "loss": 0.006, "step": 337020 }, { "epoch": 10.417568152315015, "grad_norm": 0.09366634488105774, "learning_rate": 1.4793070408604811e-05, "loss": 0.0065, "step": 337050 }, { "epoch": 10.418495394696173, "grad_norm": 0.07933468371629715, "learning_rate": 1.4792606787414233e-05, "loss": 0.006, "step": 337080 }, { "epoch": 10.419422637077332, "grad_norm": 0.07095826417207718, "learning_rate": 1.4792143166223651e-05, "loss": 0.0061, "step": 337110 }, { "epoch": 10.42034987945849, "grad_norm": 0.13686871528625488, "learning_rate": 1.4791679545033072e-05, "loss": 0.0062, "step": 337140 }, { "epoch": 10.42127712183965, "grad_norm": 0.0993095338344574, "learning_rate": 1.4791215923842494e-05, "loss": 0.0054, "step": 337170 }, { "epoch": 10.422204364220807, "grad_norm": 0.11806042492389679, "learning_rate": 1.4790752302651914e-05, "loss": 0.0054, "step": 337200 }, { "epoch": 10.423131606601967, "grad_norm": 0.0796746164560318, "learning_rate": 1.4790288681461335e-05, "loss": 0.0059, "step": 337230 }, { "epoch": 10.424058848983124, "grad_norm": 0.0985175147652626, "learning_rate": 1.4789825060270755e-05, "loss": 0.006, "step": 337260 }, { "epoch": 10.424986091364282, "grad_norm": 0.10586050152778625, "learning_rate": 1.4789361439080177e-05, "loss": 0.0059, "step": 337290 }, { "epoch": 10.425913333745441, "grad_norm": 0.13412849605083466, "learning_rate": 1.4788897817889598e-05, "loss": 0.0058, "step": 337320 }, { "epoch": 10.4268405761266, "grad_norm": 0.12215336412191391, "learning_rate": 1.4788434196699018e-05, "loss": 0.0059, "step": 337350 }, { "epoch": 10.427767818507759, "grad_norm": 0.051657285541296005, "learning_rate": 1.478797057550844e-05, "loss": 0.0056, "step": 337380 }, { "epoch": 10.428695060888916, "grad_norm": 0.12082667648792267, "learning_rate": 1.4787506954317861e-05, "loss": 0.0062, "step": 337410 }, { "epoch": 10.429622303270074, "grad_norm": 0.11850783973932266, "learning_rate": 1.478704333312728e-05, "loss": 0.0059, "step": 337440 }, { "epoch": 10.430549545651234, "grad_norm": 0.11951916664838791, "learning_rate": 1.4786579711936702e-05, "loss": 0.0064, "step": 337470 }, { "epoch": 10.431476788032391, "grad_norm": 0.09933902323246002, "learning_rate": 1.4786116090746124e-05, "loss": 0.0059, "step": 337500 }, { "epoch": 10.43240403041355, "grad_norm": 0.12347648292779922, "learning_rate": 1.4785652469555542e-05, "loss": 0.0059, "step": 337530 }, { "epoch": 10.433331272794709, "grad_norm": 0.14623519778251648, "learning_rate": 1.4785188848364963e-05, "loss": 0.0062, "step": 337560 }, { "epoch": 10.434258515175866, "grad_norm": 0.11449797451496124, "learning_rate": 1.4784725227174383e-05, "loss": 0.0059, "step": 337590 }, { "epoch": 10.435185757557026, "grad_norm": 0.1411551684141159, "learning_rate": 1.4784261605983805e-05, "loss": 0.0054, "step": 337620 }, { "epoch": 10.436112999938183, "grad_norm": 0.10476770997047424, "learning_rate": 1.4783797984793226e-05, "loss": 0.0054, "step": 337650 }, { "epoch": 10.437040242319343, "grad_norm": 0.12917307019233704, "learning_rate": 1.4783334363602646e-05, "loss": 0.0058, "step": 337680 }, { "epoch": 10.4379674847005, "grad_norm": 0.1325032263994217, "learning_rate": 1.4782870742412067e-05, "loss": 0.005, "step": 337710 }, { "epoch": 10.438894727081658, "grad_norm": 0.06389416754245758, "learning_rate": 1.4782407121221489e-05, "loss": 0.006, "step": 337740 }, { "epoch": 10.439821969462818, "grad_norm": 0.11202951520681381, "learning_rate": 1.4781943500030909e-05, "loss": 0.0063, "step": 337770 }, { "epoch": 10.440749211843976, "grad_norm": 0.10175728797912598, "learning_rate": 1.478147987884033e-05, "loss": 0.0065, "step": 337800 }, { "epoch": 10.441676454225135, "grad_norm": 0.12424095720052719, "learning_rate": 1.4781016257649752e-05, "loss": 0.0059, "step": 337830 }, { "epoch": 10.442603696606293, "grad_norm": 0.12307877093553543, "learning_rate": 1.4780552636459172e-05, "loss": 0.0054, "step": 337860 }, { "epoch": 10.44353093898745, "grad_norm": 0.15977782011032104, "learning_rate": 1.4780089015268593e-05, "loss": 0.005, "step": 337890 }, { "epoch": 10.44445818136861, "grad_norm": 0.12700282037258148, "learning_rate": 1.4779625394078013e-05, "loss": 0.0055, "step": 337920 }, { "epoch": 10.445385423749768, "grad_norm": 0.07499177753925323, "learning_rate": 1.4779161772887434e-05, "loss": 0.0057, "step": 337950 }, { "epoch": 10.446312666130927, "grad_norm": 0.08990725874900818, "learning_rate": 1.4778698151696856e-05, "loss": 0.0058, "step": 337980 }, { "epoch": 10.447239908512085, "grad_norm": 0.12552601099014282, "learning_rate": 1.4778234530506274e-05, "loss": 0.0062, "step": 338010 }, { "epoch": 10.448167150893244, "grad_norm": 0.1885773241519928, "learning_rate": 1.4777770909315696e-05, "loss": 0.0055, "step": 338040 }, { "epoch": 10.449094393274402, "grad_norm": 0.0820608139038086, "learning_rate": 1.4777307288125117e-05, "loss": 0.0053, "step": 338070 }, { "epoch": 10.45002163565556, "grad_norm": 0.10903146117925644, "learning_rate": 1.4776843666934537e-05, "loss": 0.0061, "step": 338100 }, { "epoch": 10.45094887803672, "grad_norm": 0.11188001930713654, "learning_rate": 1.4776380045743958e-05, "loss": 0.0057, "step": 338130 }, { "epoch": 10.451876120417877, "grad_norm": 0.16951867938041687, "learning_rate": 1.477591642455338e-05, "loss": 0.0059, "step": 338160 }, { "epoch": 10.452803362799036, "grad_norm": 0.07860106974840164, "learning_rate": 1.47754528033628e-05, "loss": 0.0064, "step": 338190 }, { "epoch": 10.453730605180194, "grad_norm": 0.13162371516227722, "learning_rate": 1.4774989182172221e-05, "loss": 0.0058, "step": 338220 }, { "epoch": 10.454657847561352, "grad_norm": 0.13397009670734406, "learning_rate": 1.4774525560981641e-05, "loss": 0.0056, "step": 338250 }, { "epoch": 10.455585089942511, "grad_norm": 0.12854865193367004, "learning_rate": 1.4774061939791063e-05, "loss": 0.0054, "step": 338280 }, { "epoch": 10.456512332323669, "grad_norm": 0.13447590172290802, "learning_rate": 1.4773598318600484e-05, "loss": 0.0062, "step": 338310 }, { "epoch": 10.457439574704829, "grad_norm": 0.10549536347389221, "learning_rate": 1.4773134697409904e-05, "loss": 0.005, "step": 338340 }, { "epoch": 10.458366817085986, "grad_norm": 0.11948245763778687, "learning_rate": 1.4772671076219325e-05, "loss": 0.0058, "step": 338370 }, { "epoch": 10.459294059467144, "grad_norm": 0.0822116807103157, "learning_rate": 1.4772207455028747e-05, "loss": 0.0061, "step": 338400 }, { "epoch": 10.460221301848303, "grad_norm": 0.13666415214538574, "learning_rate": 1.4771743833838165e-05, "loss": 0.0057, "step": 338430 }, { "epoch": 10.461148544229461, "grad_norm": 0.0978577733039856, "learning_rate": 1.4771280212647588e-05, "loss": 0.0062, "step": 338460 }, { "epoch": 10.46207578661062, "grad_norm": 0.1287824660539627, "learning_rate": 1.477081659145701e-05, "loss": 0.0061, "step": 338490 }, { "epoch": 10.463003028991778, "grad_norm": 0.15325893461704254, "learning_rate": 1.4770352970266428e-05, "loss": 0.0066, "step": 338520 }, { "epoch": 10.463930271372936, "grad_norm": 0.1275845319032669, "learning_rate": 1.476988934907585e-05, "loss": 0.0058, "step": 338550 }, { "epoch": 10.464857513754096, "grad_norm": 0.12906675040721893, "learning_rate": 1.476942572788527e-05, "loss": 0.0061, "step": 338580 }, { "epoch": 10.465784756135253, "grad_norm": 0.10155259817838669, "learning_rate": 1.476896210669469e-05, "loss": 0.006, "step": 338610 }, { "epoch": 10.466711998516413, "grad_norm": 0.08189208805561066, "learning_rate": 1.4768498485504112e-05, "loss": 0.0058, "step": 338640 }, { "epoch": 10.46763924089757, "grad_norm": 0.11422480642795563, "learning_rate": 1.4768034864313532e-05, "loss": 0.0066, "step": 338670 }, { "epoch": 10.468566483278728, "grad_norm": 0.10893938690423965, "learning_rate": 1.4767571243122953e-05, "loss": 0.0053, "step": 338700 }, { "epoch": 10.469493725659888, "grad_norm": 0.08552782237529755, "learning_rate": 1.4767107621932375e-05, "loss": 0.0057, "step": 338730 }, { "epoch": 10.470420968041045, "grad_norm": 0.11018998920917511, "learning_rate": 1.4766644000741795e-05, "loss": 0.0057, "step": 338760 }, { "epoch": 10.471348210422205, "grad_norm": 0.07314931601285934, "learning_rate": 1.4766180379551216e-05, "loss": 0.0055, "step": 338790 }, { "epoch": 10.472275452803363, "grad_norm": 0.1390286237001419, "learning_rate": 1.4765716758360638e-05, "loss": 0.0054, "step": 338820 }, { "epoch": 10.473202695184522, "grad_norm": 0.09714877605438232, "learning_rate": 1.4765253137170058e-05, "loss": 0.0059, "step": 338850 }, { "epoch": 10.47412993756568, "grad_norm": 0.11025232076644897, "learning_rate": 1.4764789515979479e-05, "loss": 0.0057, "step": 338880 }, { "epoch": 10.475057179946837, "grad_norm": 0.08516675978899002, "learning_rate": 1.47643258947889e-05, "loss": 0.006, "step": 338910 }, { "epoch": 10.475984422327997, "grad_norm": 0.10687036067247391, "learning_rate": 1.4763862273598319e-05, "loss": 0.0061, "step": 338940 }, { "epoch": 10.476911664709155, "grad_norm": 0.0972999781370163, "learning_rate": 1.476339865240774e-05, "loss": 0.0056, "step": 338970 }, { "epoch": 10.477838907090314, "grad_norm": 0.15205718576908112, "learning_rate": 1.476293503121716e-05, "loss": 0.0059, "step": 339000 }, { "epoch": 10.478766149471472, "grad_norm": 0.1373414397239685, "learning_rate": 1.4762471410026582e-05, "loss": 0.0056, "step": 339030 }, { "epoch": 10.47969339185263, "grad_norm": 0.1286739856004715, "learning_rate": 1.4762007788836003e-05, "loss": 0.0057, "step": 339060 }, { "epoch": 10.480620634233789, "grad_norm": 0.08608133345842361, "learning_rate": 1.4761544167645423e-05, "loss": 0.0056, "step": 339090 }, { "epoch": 10.481547876614947, "grad_norm": 0.1688038408756256, "learning_rate": 1.4761080546454844e-05, "loss": 0.0067, "step": 339120 }, { "epoch": 10.482475118996106, "grad_norm": 0.11580642312765121, "learning_rate": 1.4760616925264266e-05, "loss": 0.0056, "step": 339150 }, { "epoch": 10.483402361377264, "grad_norm": 0.20252704620361328, "learning_rate": 1.4760153304073686e-05, "loss": 0.0053, "step": 339180 }, { "epoch": 10.484329603758422, "grad_norm": 0.12236317247152328, "learning_rate": 1.4759689682883107e-05, "loss": 0.0056, "step": 339210 }, { "epoch": 10.485256846139581, "grad_norm": 0.13559208810329437, "learning_rate": 1.4759226061692529e-05, "loss": 0.0053, "step": 339240 }, { "epoch": 10.486184088520739, "grad_norm": 0.09489675611257553, "learning_rate": 1.4758762440501949e-05, "loss": 0.006, "step": 339270 }, { "epoch": 10.487111330901898, "grad_norm": 0.03836514428257942, "learning_rate": 1.475829881931137e-05, "loss": 0.005, "step": 339300 }, { "epoch": 10.488038573283056, "grad_norm": 0.1402183324098587, "learning_rate": 1.475783519812079e-05, "loss": 0.0058, "step": 339330 }, { "epoch": 10.488965815664214, "grad_norm": 0.1363985687494278, "learning_rate": 1.4757371576930211e-05, "loss": 0.0055, "step": 339360 }, { "epoch": 10.489893058045373, "grad_norm": 0.10804799199104309, "learning_rate": 1.4756907955739633e-05, "loss": 0.0059, "step": 339390 }, { "epoch": 10.490820300426531, "grad_norm": 0.14311915636062622, "learning_rate": 1.4756444334549051e-05, "loss": 0.0061, "step": 339420 }, { "epoch": 10.49174754280769, "grad_norm": 0.09381264448165894, "learning_rate": 1.4755980713358472e-05, "loss": 0.0055, "step": 339450 }, { "epoch": 10.492674785188848, "grad_norm": 0.0895741656422615, "learning_rate": 1.4755517092167894e-05, "loss": 0.0057, "step": 339480 }, { "epoch": 10.493602027570006, "grad_norm": 0.1876780092716217, "learning_rate": 1.4755053470977314e-05, "loss": 0.0058, "step": 339510 }, { "epoch": 10.494529269951165, "grad_norm": 0.14882324635982513, "learning_rate": 1.4754589849786735e-05, "loss": 0.0061, "step": 339540 }, { "epoch": 10.495456512332323, "grad_norm": 0.12878063321113586, "learning_rate": 1.4754126228596157e-05, "loss": 0.0057, "step": 339570 }, { "epoch": 10.496383754713483, "grad_norm": 0.10931199789047241, "learning_rate": 1.4753662607405577e-05, "loss": 0.0053, "step": 339600 }, { "epoch": 10.49731099709464, "grad_norm": 0.09796105325222015, "learning_rate": 1.4753198986214998e-05, "loss": 0.0058, "step": 339630 }, { "epoch": 10.4982382394758, "grad_norm": 0.09492971748113632, "learning_rate": 1.4752735365024418e-05, "loss": 0.0058, "step": 339660 }, { "epoch": 10.499165481856958, "grad_norm": 0.1316155195236206, "learning_rate": 1.475227174383384e-05, "loss": 0.0059, "step": 339690 }, { "epoch": 10.500092724238115, "grad_norm": 0.11684073507785797, "learning_rate": 1.4751808122643261e-05, "loss": 0.0055, "step": 339720 }, { "epoch": 10.501019966619275, "grad_norm": 0.10762128233909607, "learning_rate": 1.475134450145268e-05, "loss": 0.0058, "step": 339750 }, { "epoch": 10.501947209000432, "grad_norm": 0.1149926409125328, "learning_rate": 1.4750880880262102e-05, "loss": 0.0056, "step": 339780 }, { "epoch": 10.502874451381592, "grad_norm": 0.11200783401727676, "learning_rate": 1.4750417259071524e-05, "loss": 0.0058, "step": 339810 }, { "epoch": 10.50380169376275, "grad_norm": 0.10936075448989868, "learning_rate": 1.4749953637880942e-05, "loss": 0.0057, "step": 339840 }, { "epoch": 10.504728936143907, "grad_norm": 0.16803449392318726, "learning_rate": 1.4749490016690365e-05, "loss": 0.0066, "step": 339870 }, { "epoch": 10.505656178525067, "grad_norm": 0.15800312161445618, "learning_rate": 1.4749026395499787e-05, "loss": 0.0062, "step": 339900 }, { "epoch": 10.506583420906225, "grad_norm": 0.09770314395427704, "learning_rate": 1.4748562774309205e-05, "loss": 0.0062, "step": 339930 }, { "epoch": 10.507510663287384, "grad_norm": 0.11091423034667969, "learning_rate": 1.4748099153118626e-05, "loss": 0.0056, "step": 339960 }, { "epoch": 10.508437905668542, "grad_norm": 0.0824192687869072, "learning_rate": 1.4747635531928046e-05, "loss": 0.0063, "step": 339990 }, { "epoch": 10.5093651480497, "grad_norm": 0.06382495164871216, "learning_rate": 1.4747171910737468e-05, "loss": 0.0061, "step": 340020 }, { "epoch": 10.510292390430859, "grad_norm": 0.0789131373167038, "learning_rate": 1.4746708289546889e-05, "loss": 0.0057, "step": 340050 }, { "epoch": 10.511219632812017, "grad_norm": 0.14500296115875244, "learning_rate": 1.4746244668356309e-05, "loss": 0.0059, "step": 340080 }, { "epoch": 10.512146875193176, "grad_norm": 0.12768973410129547, "learning_rate": 1.474578104716573e-05, "loss": 0.0052, "step": 340110 }, { "epoch": 10.513074117574334, "grad_norm": 0.12648549675941467, "learning_rate": 1.4745317425975152e-05, "loss": 0.0057, "step": 340140 }, { "epoch": 10.514001359955492, "grad_norm": 0.1736985743045807, "learning_rate": 1.4744853804784572e-05, "loss": 0.0064, "step": 340170 }, { "epoch": 10.514928602336651, "grad_norm": 0.11388292163610458, "learning_rate": 1.4744390183593993e-05, "loss": 0.0053, "step": 340200 }, { "epoch": 10.515855844717809, "grad_norm": 0.08887332677841187, "learning_rate": 1.4743926562403415e-05, "loss": 0.0057, "step": 340230 }, { "epoch": 10.516783087098968, "grad_norm": 0.12426315993070602, "learning_rate": 1.4743462941212834e-05, "loss": 0.0065, "step": 340260 }, { "epoch": 10.517710329480126, "grad_norm": 0.06510350853204727, "learning_rate": 1.4742999320022256e-05, "loss": 0.0062, "step": 340290 }, { "epoch": 10.518637571861284, "grad_norm": 0.13250118494033813, "learning_rate": 1.4742535698831674e-05, "loss": 0.005, "step": 340320 }, { "epoch": 10.519564814242443, "grad_norm": 0.15973514318466187, "learning_rate": 1.4742072077641096e-05, "loss": 0.0056, "step": 340350 }, { "epoch": 10.5204920566236, "grad_norm": 0.11524398624897003, "learning_rate": 1.4741608456450517e-05, "loss": 0.0061, "step": 340380 }, { "epoch": 10.52141929900476, "grad_norm": 0.11849658191204071, "learning_rate": 1.4741144835259937e-05, "loss": 0.0054, "step": 340410 }, { "epoch": 10.522346541385918, "grad_norm": 0.09230751544237137, "learning_rate": 1.4740681214069358e-05, "loss": 0.006, "step": 340440 }, { "epoch": 10.523273783767078, "grad_norm": 0.10474354773759842, "learning_rate": 1.474021759287878e-05, "loss": 0.0056, "step": 340470 }, { "epoch": 10.524201026148235, "grad_norm": 0.12824735045433044, "learning_rate": 1.47397539716882e-05, "loss": 0.0056, "step": 340500 }, { "epoch": 10.525128268529393, "grad_norm": 0.1514195203781128, "learning_rate": 1.4739290350497621e-05, "loss": 0.006, "step": 340530 }, { "epoch": 10.526055510910552, "grad_norm": 0.09266337007284164, "learning_rate": 1.4738826729307043e-05, "loss": 0.0058, "step": 340560 }, { "epoch": 10.52698275329171, "grad_norm": 0.14333601295948029, "learning_rate": 1.4738363108116463e-05, "loss": 0.0064, "step": 340590 }, { "epoch": 10.52790999567287, "grad_norm": 0.11130354553461075, "learning_rate": 1.4737899486925884e-05, "loss": 0.0063, "step": 340620 }, { "epoch": 10.528837238054027, "grad_norm": 0.18365556001663208, "learning_rate": 1.4737435865735304e-05, "loss": 0.0056, "step": 340650 }, { "epoch": 10.529764480435185, "grad_norm": 0.1516563594341278, "learning_rate": 1.4736972244544725e-05, "loss": 0.0066, "step": 340680 }, { "epoch": 10.530691722816345, "grad_norm": 0.12427947670221329, "learning_rate": 1.4736508623354147e-05, "loss": 0.006, "step": 340710 }, { "epoch": 10.531618965197502, "grad_norm": 0.07238118350505829, "learning_rate": 1.4736045002163567e-05, "loss": 0.0056, "step": 340740 }, { "epoch": 10.532546207578662, "grad_norm": 0.09781108051538467, "learning_rate": 1.4735581380972988e-05, "loss": 0.0056, "step": 340770 }, { "epoch": 10.53347344995982, "grad_norm": 0.15555959939956665, "learning_rate": 1.473511775978241e-05, "loss": 0.0056, "step": 340800 }, { "epoch": 10.534400692340977, "grad_norm": 0.11555993556976318, "learning_rate": 1.4734654138591828e-05, "loss": 0.0064, "step": 340830 }, { "epoch": 10.535327934722137, "grad_norm": 0.10514303296804428, "learning_rate": 1.473419051740125e-05, "loss": 0.0057, "step": 340860 }, { "epoch": 10.536255177103294, "grad_norm": 0.1277167648077011, "learning_rate": 1.4733726896210671e-05, "loss": 0.0052, "step": 340890 }, { "epoch": 10.537182419484454, "grad_norm": 0.12843163311481476, "learning_rate": 1.4733278729059777e-05, "loss": 0.0053, "step": 340920 }, { "epoch": 10.538109661865612, "grad_norm": 0.08041873574256897, "learning_rate": 1.4732815107869199e-05, "loss": 0.0058, "step": 340950 }, { "epoch": 10.53903690424677, "grad_norm": 0.07444862276315689, "learning_rate": 1.473235148667862e-05, "loss": 0.0058, "step": 340980 }, { "epoch": 10.539964146627929, "grad_norm": 0.15029142796993256, "learning_rate": 1.473188786548804e-05, "loss": 0.0061, "step": 341010 }, { "epoch": 10.540891389009087, "grad_norm": 0.16477786004543304, "learning_rate": 1.4731424244297461e-05, "loss": 0.0063, "step": 341040 }, { "epoch": 10.541818631390246, "grad_norm": 0.11846582591533661, "learning_rate": 1.473096062310688e-05, "loss": 0.0065, "step": 341070 }, { "epoch": 10.542745873771404, "grad_norm": 0.14084960520267487, "learning_rate": 1.4730497001916301e-05, "loss": 0.0054, "step": 341100 }, { "epoch": 10.543673116152561, "grad_norm": 0.1137123852968216, "learning_rate": 1.4730033380725724e-05, "loss": 0.0054, "step": 341130 }, { "epoch": 10.544600358533721, "grad_norm": 0.10247063636779785, "learning_rate": 1.4729569759535142e-05, "loss": 0.006, "step": 341160 }, { "epoch": 10.545527600914879, "grad_norm": 0.09029984474182129, "learning_rate": 1.4729106138344564e-05, "loss": 0.0062, "step": 341190 }, { "epoch": 10.546454843296038, "grad_norm": 0.0779469832777977, "learning_rate": 1.4728642517153985e-05, "loss": 0.0058, "step": 341220 }, { "epoch": 10.547382085677196, "grad_norm": 0.10993948578834534, "learning_rate": 1.4728178895963405e-05, "loss": 0.0051, "step": 341250 }, { "epoch": 10.548309328058355, "grad_norm": 0.13446511328220367, "learning_rate": 1.4727715274772827e-05, "loss": 0.0058, "step": 341280 }, { "epoch": 10.549236570439513, "grad_norm": 0.13854780793190002, "learning_rate": 1.4727251653582248e-05, "loss": 0.006, "step": 341310 }, { "epoch": 10.55016381282067, "grad_norm": 0.08693471550941467, "learning_rate": 1.4726788032391668e-05, "loss": 0.0055, "step": 341340 }, { "epoch": 10.55109105520183, "grad_norm": 0.08144950121641159, "learning_rate": 1.472632441120109e-05, "loss": 0.006, "step": 341370 }, { "epoch": 10.552018297582988, "grad_norm": 0.12039757519960403, "learning_rate": 1.4725860790010511e-05, "loss": 0.0055, "step": 341400 }, { "epoch": 10.552945539964147, "grad_norm": 0.10319346189498901, "learning_rate": 1.472539716881993e-05, "loss": 0.0055, "step": 341430 }, { "epoch": 10.553872782345305, "grad_norm": 0.11410972476005554, "learning_rate": 1.4724933547629352e-05, "loss": 0.006, "step": 341460 }, { "epoch": 10.554800024726463, "grad_norm": 0.11903753876686096, "learning_rate": 1.4724469926438772e-05, "loss": 0.0056, "step": 341490 }, { "epoch": 10.555727267107622, "grad_norm": 0.1199641078710556, "learning_rate": 1.4724006305248194e-05, "loss": 0.0053, "step": 341520 }, { "epoch": 10.55665450948878, "grad_norm": 0.12974336743354797, "learning_rate": 1.4723542684057615e-05, "loss": 0.0057, "step": 341550 }, { "epoch": 10.55758175186994, "grad_norm": 0.12843191623687744, "learning_rate": 1.4723079062867033e-05, "loss": 0.0058, "step": 341580 }, { "epoch": 10.558508994251097, "grad_norm": 0.12359471619129181, "learning_rate": 1.4722615441676455e-05, "loss": 0.0061, "step": 341610 }, { "epoch": 10.559436236632255, "grad_norm": 0.08571089804172516, "learning_rate": 1.4722151820485876e-05, "loss": 0.0059, "step": 341640 }, { "epoch": 10.560363479013414, "grad_norm": 0.1461394727230072, "learning_rate": 1.4721688199295296e-05, "loss": 0.0058, "step": 341670 }, { "epoch": 10.561290721394572, "grad_norm": 0.06247967481613159, "learning_rate": 1.4721224578104718e-05, "loss": 0.0051, "step": 341700 }, { "epoch": 10.562217963775732, "grad_norm": 0.11091308295726776, "learning_rate": 1.4720760956914139e-05, "loss": 0.0062, "step": 341730 }, { "epoch": 10.56314520615689, "grad_norm": 0.1126294732093811, "learning_rate": 1.4720297335723559e-05, "loss": 0.0051, "step": 341760 }, { "epoch": 10.564072448538047, "grad_norm": 0.056262530386447906, "learning_rate": 1.471983371453298e-05, "loss": 0.0059, "step": 341790 }, { "epoch": 10.564999690919207, "grad_norm": 0.10414574295282364, "learning_rate": 1.47193700933424e-05, "loss": 0.0053, "step": 341820 }, { "epoch": 10.565926933300364, "grad_norm": 0.09794409573078156, "learning_rate": 1.4718906472151822e-05, "loss": 0.0062, "step": 341850 }, { "epoch": 10.566854175681524, "grad_norm": 0.08603613078594208, "learning_rate": 1.4718442850961243e-05, "loss": 0.0061, "step": 341880 }, { "epoch": 10.567781418062681, "grad_norm": 0.11582239717245102, "learning_rate": 1.4717979229770663e-05, "loss": 0.005, "step": 341910 }, { "epoch": 10.56870866044384, "grad_norm": 0.20898443460464478, "learning_rate": 1.4717515608580085e-05, "loss": 0.0057, "step": 341940 }, { "epoch": 10.569635902824999, "grad_norm": 0.17700634896755219, "learning_rate": 1.4717051987389506e-05, "loss": 0.0058, "step": 341970 }, { "epoch": 10.570563145206156, "grad_norm": 0.11607358604669571, "learning_rate": 1.4716588366198926e-05, "loss": 0.0063, "step": 342000 }, { "epoch": 10.571490387587316, "grad_norm": 0.11007378995418549, "learning_rate": 1.4716124745008347e-05, "loss": 0.0065, "step": 342030 }, { "epoch": 10.572417629968474, "grad_norm": 0.12640348076820374, "learning_rate": 1.4715661123817769e-05, "loss": 0.0066, "step": 342060 }, { "epoch": 10.573344872349633, "grad_norm": 0.07968247681856155, "learning_rate": 1.4715197502627187e-05, "loss": 0.0056, "step": 342090 }, { "epoch": 10.57427211473079, "grad_norm": 0.10927943885326385, "learning_rate": 1.4714733881436609e-05, "loss": 0.0058, "step": 342120 }, { "epoch": 10.575199357111948, "grad_norm": 0.10239008814096451, "learning_rate": 1.4714270260246028e-05, "loss": 0.006, "step": 342150 }, { "epoch": 10.576126599493108, "grad_norm": 0.13242360949516296, "learning_rate": 1.471380663905545e-05, "loss": 0.006, "step": 342180 }, { "epoch": 10.577053841874266, "grad_norm": 0.09845149517059326, "learning_rate": 1.4713343017864871e-05, "loss": 0.0061, "step": 342210 }, { "epoch": 10.577981084255425, "grad_norm": 0.08763343840837479, "learning_rate": 1.4712879396674291e-05, "loss": 0.0058, "step": 342240 }, { "epoch": 10.578908326636583, "grad_norm": 0.14153431355953217, "learning_rate": 1.4712415775483713e-05, "loss": 0.0053, "step": 342270 }, { "epoch": 10.57983556901774, "grad_norm": 0.10045425593852997, "learning_rate": 1.4711952154293134e-05, "loss": 0.0065, "step": 342300 }, { "epoch": 10.5807628113989, "grad_norm": 0.09920358657836914, "learning_rate": 1.4711488533102554e-05, "loss": 0.0054, "step": 342330 }, { "epoch": 10.581690053780058, "grad_norm": 0.09740173816680908, "learning_rate": 1.4711024911911975e-05, "loss": 0.006, "step": 342360 }, { "epoch": 10.582617296161217, "grad_norm": 0.13385117053985596, "learning_rate": 1.4710561290721397e-05, "loss": 0.0056, "step": 342390 }, { "epoch": 10.583544538542375, "grad_norm": 0.07745515555143356, "learning_rate": 1.4710097669530817e-05, "loss": 0.0056, "step": 342420 }, { "epoch": 10.584471780923533, "grad_norm": 0.08022769540548325, "learning_rate": 1.4709634048340238e-05, "loss": 0.0059, "step": 342450 }, { "epoch": 10.585399023304692, "grad_norm": 0.09396403282880783, "learning_rate": 1.4709170427149656e-05, "loss": 0.006, "step": 342480 }, { "epoch": 10.58632626568585, "grad_norm": 0.1450398713350296, "learning_rate": 1.4708706805959078e-05, "loss": 0.006, "step": 342510 }, { "epoch": 10.58725350806701, "grad_norm": 0.10612403601408005, "learning_rate": 1.4708243184768501e-05, "loss": 0.0049, "step": 342540 }, { "epoch": 10.588180750448167, "grad_norm": 0.11486812680959702, "learning_rate": 1.470777956357792e-05, "loss": 0.0056, "step": 342570 }, { "epoch": 10.589107992829325, "grad_norm": 0.17441651225090027, "learning_rate": 1.470731594238734e-05, "loss": 0.0064, "step": 342600 }, { "epoch": 10.590035235210484, "grad_norm": 0.11624659597873688, "learning_rate": 1.4706852321196762e-05, "loss": 0.0063, "step": 342630 }, { "epoch": 10.590962477591642, "grad_norm": 0.12595388293266296, "learning_rate": 1.4706388700006182e-05, "loss": 0.0065, "step": 342660 }, { "epoch": 10.591889719972801, "grad_norm": 0.10362957417964935, "learning_rate": 1.4705925078815604e-05, "loss": 0.0055, "step": 342690 }, { "epoch": 10.59281696235396, "grad_norm": 0.12401508539915085, "learning_rate": 1.4705461457625025e-05, "loss": 0.0055, "step": 342720 }, { "epoch": 10.593744204735117, "grad_norm": 0.21870025992393494, "learning_rate": 1.4704997836434445e-05, "loss": 0.0058, "step": 342750 }, { "epoch": 10.594671447116276, "grad_norm": 0.1264001876115799, "learning_rate": 1.4704534215243866e-05, "loss": 0.0059, "step": 342780 }, { "epoch": 10.595598689497434, "grad_norm": 0.09397173672914505, "learning_rate": 1.4704070594053286e-05, "loss": 0.0062, "step": 342810 }, { "epoch": 10.596525931878594, "grad_norm": 0.14413011074066162, "learning_rate": 1.4703606972862708e-05, "loss": 0.0061, "step": 342840 }, { "epoch": 10.597453174259751, "grad_norm": 0.15105746686458588, "learning_rate": 1.470314335167213e-05, "loss": 0.0058, "step": 342870 }, { "epoch": 10.59838041664091, "grad_norm": 0.14805622398853302, "learning_rate": 1.4702679730481549e-05, "loss": 0.0054, "step": 342900 }, { "epoch": 10.599307659022069, "grad_norm": 0.09210597723722458, "learning_rate": 1.470221610929097e-05, "loss": 0.0053, "step": 342930 }, { "epoch": 10.600234901403226, "grad_norm": 0.12221177667379379, "learning_rate": 1.4701752488100392e-05, "loss": 0.0062, "step": 342960 }, { "epoch": 10.601162143784386, "grad_norm": 0.1084916740655899, "learning_rate": 1.4701304320949497e-05, "loss": 0.006, "step": 342990 }, { "epoch": 10.602089386165543, "grad_norm": 0.11104819923639297, "learning_rate": 1.4700840699758918e-05, "loss": 0.0058, "step": 343020 }, { "epoch": 10.603016628546703, "grad_norm": 0.10433904081583023, "learning_rate": 1.470037707856834e-05, "loss": 0.0059, "step": 343050 }, { "epoch": 10.60394387092786, "grad_norm": 0.08268453180789948, "learning_rate": 1.469991345737776e-05, "loss": 0.0056, "step": 343080 }, { "epoch": 10.604871113309018, "grad_norm": 0.09599269181489944, "learning_rate": 1.4699449836187181e-05, "loss": 0.0053, "step": 343110 }, { "epoch": 10.605798355690178, "grad_norm": 0.10123185068368912, "learning_rate": 1.4698986214996602e-05, "loss": 0.0052, "step": 343140 }, { "epoch": 10.606725598071336, "grad_norm": 0.12426599115133286, "learning_rate": 1.4698522593806022e-05, "loss": 0.006, "step": 343170 }, { "epoch": 10.607652840452495, "grad_norm": 0.14419817924499512, "learning_rate": 1.4698058972615444e-05, "loss": 0.0057, "step": 343200 }, { "epoch": 10.608580082833653, "grad_norm": 0.12193586677312851, "learning_rate": 1.4697595351424862e-05, "loss": 0.0056, "step": 343230 }, { "epoch": 10.60950732521481, "grad_norm": 0.15624916553497314, "learning_rate": 1.4697131730234283e-05, "loss": 0.0054, "step": 343260 }, { "epoch": 10.61043456759597, "grad_norm": 0.15415167808532715, "learning_rate": 1.4696668109043707e-05, "loss": 0.006, "step": 343290 }, { "epoch": 10.611361809977128, "grad_norm": 0.1487196981906891, "learning_rate": 1.4696204487853125e-05, "loss": 0.0057, "step": 343320 }, { "epoch": 10.612289052358287, "grad_norm": 0.17417757213115692, "learning_rate": 1.4695740866662546e-05, "loss": 0.0062, "step": 343350 }, { "epoch": 10.613216294739445, "grad_norm": 0.10165008157491684, "learning_rate": 1.4695277245471968e-05, "loss": 0.0062, "step": 343380 }, { "epoch": 10.614143537120603, "grad_norm": 0.06982619315385818, "learning_rate": 1.4694813624281387e-05, "loss": 0.0061, "step": 343410 }, { "epoch": 10.615070779501762, "grad_norm": 0.13722161948680878, "learning_rate": 1.4694350003090809e-05, "loss": 0.0058, "step": 343440 }, { "epoch": 10.61599802188292, "grad_norm": 0.10203608870506287, "learning_rate": 1.469388638190023e-05, "loss": 0.0058, "step": 343470 }, { "epoch": 10.61692526426408, "grad_norm": 0.11823184788227081, "learning_rate": 1.469342276070965e-05, "loss": 0.0062, "step": 343500 }, { "epoch": 10.617852506645237, "grad_norm": 0.09364694356918335, "learning_rate": 1.4692959139519072e-05, "loss": 0.0061, "step": 343530 }, { "epoch": 10.618779749026395, "grad_norm": 0.0864882618188858, "learning_rate": 1.4692495518328492e-05, "loss": 0.0063, "step": 343560 }, { "epoch": 10.619706991407554, "grad_norm": 0.13936607539653778, "learning_rate": 1.4692031897137913e-05, "loss": 0.0055, "step": 343590 }, { "epoch": 10.620634233788712, "grad_norm": 0.12704284489154816, "learning_rate": 1.4691568275947335e-05, "loss": 0.0059, "step": 343620 }, { "epoch": 10.621561476169871, "grad_norm": 0.18364274501800537, "learning_rate": 1.4691104654756754e-05, "loss": 0.0062, "step": 343650 }, { "epoch": 10.622488718551029, "grad_norm": 0.10856818407773972, "learning_rate": 1.4690641033566176e-05, "loss": 0.0063, "step": 343680 }, { "epoch": 10.623415960932189, "grad_norm": 0.47732895612716675, "learning_rate": 1.4690177412375597e-05, "loss": 0.0061, "step": 343710 }, { "epoch": 10.624343203313346, "grad_norm": 0.06271529942750931, "learning_rate": 1.4689713791185016e-05, "loss": 0.0056, "step": 343740 }, { "epoch": 10.625270445694504, "grad_norm": 0.09500732272863388, "learning_rate": 1.4689250169994437e-05, "loss": 0.0064, "step": 343770 }, { "epoch": 10.626197688075663, "grad_norm": 0.09622237831354141, "learning_rate": 1.468878654880386e-05, "loss": 0.0056, "step": 343800 }, { "epoch": 10.627124930456821, "grad_norm": 0.1030142530798912, "learning_rate": 1.4688322927613278e-05, "loss": 0.0057, "step": 343830 }, { "epoch": 10.62805217283798, "grad_norm": 0.12350418418645859, "learning_rate": 1.46878593064227e-05, "loss": 0.0055, "step": 343860 }, { "epoch": 10.628979415219138, "grad_norm": 0.11919113248586655, "learning_rate": 1.468739568523212e-05, "loss": 0.0055, "step": 343890 }, { "epoch": 10.629906657600296, "grad_norm": 0.108027383685112, "learning_rate": 1.4686932064041541e-05, "loss": 0.0059, "step": 343920 }, { "epoch": 10.630833899981456, "grad_norm": 0.10022303462028503, "learning_rate": 1.4686468442850963e-05, "loss": 0.0063, "step": 343950 }, { "epoch": 10.631761142362613, "grad_norm": 0.15779618918895721, "learning_rate": 1.4686004821660383e-05, "loss": 0.0059, "step": 343980 }, { "epoch": 10.632688384743773, "grad_norm": 0.08609195798635483, "learning_rate": 1.4685541200469804e-05, "loss": 0.0055, "step": 344010 }, { "epoch": 10.63361562712493, "grad_norm": 0.12083634734153748, "learning_rate": 1.4685077579279226e-05, "loss": 0.0066, "step": 344040 }, { "epoch": 10.634542869506088, "grad_norm": 0.12514916062355042, "learning_rate": 1.4684613958088645e-05, "loss": 0.006, "step": 344070 }, { "epoch": 10.635470111887248, "grad_norm": 0.1289094090461731, "learning_rate": 1.4684150336898067e-05, "loss": 0.0054, "step": 344100 }, { "epoch": 10.636397354268405, "grad_norm": 0.08919654041528702, "learning_rate": 1.4683686715707488e-05, "loss": 0.0058, "step": 344130 }, { "epoch": 10.637324596649565, "grad_norm": 0.11565250158309937, "learning_rate": 1.4683238548556593e-05, "loss": 0.0058, "step": 344160 }, { "epoch": 10.638251839030723, "grad_norm": 0.11546353995800018, "learning_rate": 1.4682774927366014e-05, "loss": 0.0052, "step": 344190 }, { "epoch": 10.63917908141188, "grad_norm": 0.1246369257569313, "learning_rate": 1.4682311306175436e-05, "loss": 0.0058, "step": 344220 }, { "epoch": 10.64010632379304, "grad_norm": 0.0868244618177414, "learning_rate": 1.4681847684984856e-05, "loss": 0.0058, "step": 344250 }, { "epoch": 10.641033566174197, "grad_norm": 0.16414326429367065, "learning_rate": 1.4681384063794277e-05, "loss": 0.0055, "step": 344280 }, { "epoch": 10.641960808555357, "grad_norm": 0.14597830176353455, "learning_rate": 1.4680920442603699e-05, "loss": 0.0061, "step": 344310 }, { "epoch": 10.642888050936515, "grad_norm": 0.13734200596809387, "learning_rate": 1.4680456821413119e-05, "loss": 0.0062, "step": 344340 }, { "epoch": 10.643815293317672, "grad_norm": 0.11190403252840042, "learning_rate": 1.467999320022254e-05, "loss": 0.0065, "step": 344370 }, { "epoch": 10.644742535698832, "grad_norm": 0.09616868197917938, "learning_rate": 1.467952957903196e-05, "loss": 0.0058, "step": 344400 }, { "epoch": 10.64566977807999, "grad_norm": 0.11003032326698303, "learning_rate": 1.4679065957841381e-05, "loss": 0.006, "step": 344430 }, { "epoch": 10.646597020461149, "grad_norm": 0.11189711838960648, "learning_rate": 1.4678602336650803e-05, "loss": 0.0056, "step": 344460 }, { "epoch": 10.647524262842307, "grad_norm": 0.10590073466300964, "learning_rate": 1.4678138715460221e-05, "loss": 0.0061, "step": 344490 }, { "epoch": 10.648451505223466, "grad_norm": 0.14649812877178192, "learning_rate": 1.4677675094269643e-05, "loss": 0.0058, "step": 344520 }, { "epoch": 10.649378747604624, "grad_norm": 0.11457198858261108, "learning_rate": 1.4677211473079066e-05, "loss": 0.0063, "step": 344550 }, { "epoch": 10.650305989985782, "grad_norm": 0.1360868513584137, "learning_rate": 1.4676747851888484e-05, "loss": 0.0058, "step": 344580 }, { "epoch": 10.651233232366941, "grad_norm": 0.10195332020521164, "learning_rate": 1.4676284230697905e-05, "loss": 0.0057, "step": 344610 }, { "epoch": 10.652160474748099, "grad_norm": 0.11361844092607498, "learning_rate": 1.4675820609507327e-05, "loss": 0.0066, "step": 344640 }, { "epoch": 10.653087717129258, "grad_norm": 0.09548476338386536, "learning_rate": 1.4675356988316747e-05, "loss": 0.0058, "step": 344670 }, { "epoch": 10.654014959510416, "grad_norm": 0.11515991389751434, "learning_rate": 1.4674893367126168e-05, "loss": 0.006, "step": 344700 }, { "epoch": 10.654942201891574, "grad_norm": 0.10939104855060577, "learning_rate": 1.4674429745935588e-05, "loss": 0.0055, "step": 344730 }, { "epoch": 10.655869444272733, "grad_norm": 0.1146795004606247, "learning_rate": 1.467396612474501e-05, "loss": 0.0054, "step": 344760 }, { "epoch": 10.656796686653891, "grad_norm": 0.12527722120285034, "learning_rate": 1.4673502503554431e-05, "loss": 0.0056, "step": 344790 }, { "epoch": 10.65772392903505, "grad_norm": 0.2078961730003357, "learning_rate": 1.467303888236385e-05, "loss": 0.006, "step": 344820 }, { "epoch": 10.658651171416208, "grad_norm": 0.07405529171228409, "learning_rate": 1.4672575261173272e-05, "loss": 0.0053, "step": 344850 }, { "epoch": 10.659578413797366, "grad_norm": 0.12997183203697205, "learning_rate": 1.4672111639982694e-05, "loss": 0.0055, "step": 344880 }, { "epoch": 10.660505656178525, "grad_norm": 0.11776577681303024, "learning_rate": 1.4671648018792114e-05, "loss": 0.0054, "step": 344910 }, { "epoch": 10.661432898559683, "grad_norm": 0.0818462073802948, "learning_rate": 1.4671184397601535e-05, "loss": 0.006, "step": 344940 }, { "epoch": 10.662360140940843, "grad_norm": 0.10530942678451538, "learning_rate": 1.4670720776410957e-05, "loss": 0.0058, "step": 344970 }, { "epoch": 10.663287383322, "grad_norm": 0.05568189546465874, "learning_rate": 1.4670257155220375e-05, "loss": 0.0054, "step": 345000 }, { "epoch": 10.664214625703158, "grad_norm": 0.10345623642206192, "learning_rate": 1.4669793534029796e-05, "loss": 0.0064, "step": 345030 }, { "epoch": 10.665141868084318, "grad_norm": 0.1016494482755661, "learning_rate": 1.4669329912839216e-05, "loss": 0.0062, "step": 345060 }, { "epoch": 10.666069110465475, "grad_norm": 0.09046439826488495, "learning_rate": 1.4668866291648638e-05, "loss": 0.005, "step": 345090 }, { "epoch": 10.666996352846635, "grad_norm": 0.13195526599884033, "learning_rate": 1.4668402670458059e-05, "loss": 0.0061, "step": 345120 }, { "epoch": 10.667923595227792, "grad_norm": 0.08867175877094269, "learning_rate": 1.4667939049267479e-05, "loss": 0.0056, "step": 345150 }, { "epoch": 10.66885083760895, "grad_norm": 0.1397208869457245, "learning_rate": 1.46674754280769e-05, "loss": 0.0053, "step": 345180 }, { "epoch": 10.66977807999011, "grad_norm": 0.11836244910955429, "learning_rate": 1.4667011806886322e-05, "loss": 0.006, "step": 345210 }, { "epoch": 10.670705322371267, "grad_norm": 0.05759408324956894, "learning_rate": 1.4666548185695742e-05, "loss": 0.0055, "step": 345240 }, { "epoch": 10.671632564752427, "grad_norm": 0.11265701800584793, "learning_rate": 1.4666084564505163e-05, "loss": 0.006, "step": 345270 }, { "epoch": 10.672559807133585, "grad_norm": 0.12534929811954498, "learning_rate": 1.4665620943314585e-05, "loss": 0.0056, "step": 345300 }, { "epoch": 10.673487049514744, "grad_norm": 0.15184226632118225, "learning_rate": 1.4665157322124005e-05, "loss": 0.006, "step": 345330 }, { "epoch": 10.674414291895902, "grad_norm": 0.20108583569526672, "learning_rate": 1.4664693700933426e-05, "loss": 0.006, "step": 345360 }, { "epoch": 10.67534153427706, "grad_norm": 0.10033752024173737, "learning_rate": 1.4664230079742844e-05, "loss": 0.0057, "step": 345390 }, { "epoch": 10.676268776658219, "grad_norm": 0.14716213941574097, "learning_rate": 1.4663766458552267e-05, "loss": 0.0058, "step": 345420 }, { "epoch": 10.677196019039377, "grad_norm": 0.20911507308483124, "learning_rate": 1.4663302837361689e-05, "loss": 0.0058, "step": 345450 }, { "epoch": 10.678123261420536, "grad_norm": 0.1620432287454605, "learning_rate": 1.4662839216171107e-05, "loss": 0.0063, "step": 345480 }, { "epoch": 10.679050503801694, "grad_norm": 0.14564839005470276, "learning_rate": 1.4662375594980529e-05, "loss": 0.0053, "step": 345510 }, { "epoch": 10.679977746182852, "grad_norm": 0.08878619223833084, "learning_rate": 1.466191197378995e-05, "loss": 0.0057, "step": 345540 }, { "epoch": 10.680904988564011, "grad_norm": 0.13982808589935303, "learning_rate": 1.466144835259937e-05, "loss": 0.006, "step": 345570 }, { "epoch": 10.681832230945169, "grad_norm": 0.11408210545778275, "learning_rate": 1.4660984731408791e-05, "loss": 0.0053, "step": 345600 }, { "epoch": 10.682759473326328, "grad_norm": 0.12054196745157242, "learning_rate": 1.4660521110218213e-05, "loss": 0.0055, "step": 345630 }, { "epoch": 10.683686715707486, "grad_norm": 0.07885222136974335, "learning_rate": 1.4660057489027633e-05, "loss": 0.0053, "step": 345660 }, { "epoch": 10.684613958088644, "grad_norm": 0.13483406603336334, "learning_rate": 1.4659593867837054e-05, "loss": 0.0054, "step": 345690 }, { "epoch": 10.685541200469803, "grad_norm": 0.059336550533771515, "learning_rate": 1.4659130246646474e-05, "loss": 0.0053, "step": 345720 }, { "epoch": 10.68646844285096, "grad_norm": 0.09915922582149506, "learning_rate": 1.4658666625455895e-05, "loss": 0.006, "step": 345750 }, { "epoch": 10.68739568523212, "grad_norm": 0.11860792338848114, "learning_rate": 1.4658203004265317e-05, "loss": 0.0054, "step": 345780 }, { "epoch": 10.688322927613278, "grad_norm": 0.13108734786510468, "learning_rate": 1.4657739383074737e-05, "loss": 0.006, "step": 345810 }, { "epoch": 10.689250169994436, "grad_norm": 0.14525176584720612, "learning_rate": 1.4657275761884158e-05, "loss": 0.0055, "step": 345840 }, { "epoch": 10.690177412375595, "grad_norm": 0.09992209076881409, "learning_rate": 1.465681214069358e-05, "loss": 0.0057, "step": 345870 }, { "epoch": 10.691104654756753, "grad_norm": 0.08237564563751221, "learning_rate": 1.4656348519502998e-05, "loss": 0.0061, "step": 345900 }, { "epoch": 10.692031897137912, "grad_norm": 0.11841725558042526, "learning_rate": 1.465588489831242e-05, "loss": 0.0061, "step": 345930 }, { "epoch": 10.69295913951907, "grad_norm": 0.10490068793296814, "learning_rate": 1.4655421277121843e-05, "loss": 0.0059, "step": 345960 }, { "epoch": 10.693886381900228, "grad_norm": 0.1126970425248146, "learning_rate": 1.465495765593126e-05, "loss": 0.005, "step": 345990 }, { "epoch": 10.694813624281387, "grad_norm": 0.10002221167087555, "learning_rate": 1.4654494034740682e-05, "loss": 0.0063, "step": 346020 }, { "epoch": 10.695740866662545, "grad_norm": 0.16236434876918793, "learning_rate": 1.4654030413550102e-05, "loss": 0.006, "step": 346050 }, { "epoch": 10.696668109043705, "grad_norm": 0.07004275172948837, "learning_rate": 1.4653566792359524e-05, "loss": 0.0062, "step": 346080 }, { "epoch": 10.697595351424862, "grad_norm": 0.10203568637371063, "learning_rate": 1.4653103171168945e-05, "loss": 0.0066, "step": 346110 }, { "epoch": 10.698522593806022, "grad_norm": 0.12894317507743835, "learning_rate": 1.4652639549978365e-05, "loss": 0.0054, "step": 346140 }, { "epoch": 10.69944983618718, "grad_norm": 0.08375371247529984, "learning_rate": 1.4652175928787786e-05, "loss": 0.0063, "step": 346170 }, { "epoch": 10.700377078568337, "grad_norm": 0.100730761885643, "learning_rate": 1.4651712307597208e-05, "loss": 0.0068, "step": 346200 }, { "epoch": 10.701304320949497, "grad_norm": 0.12207789719104767, "learning_rate": 1.4651248686406628e-05, "loss": 0.0063, "step": 346230 }, { "epoch": 10.702231563330654, "grad_norm": 0.13364163041114807, "learning_rate": 1.465078506521605e-05, "loss": 0.006, "step": 346260 }, { "epoch": 10.703158805711814, "grad_norm": 0.12492498010396957, "learning_rate": 1.465032144402547e-05, "loss": 0.0058, "step": 346290 }, { "epoch": 10.704086048092972, "grad_norm": 0.09715120494365692, "learning_rate": 1.464985782283489e-05, "loss": 0.0056, "step": 346320 }, { "epoch": 10.70501329047413, "grad_norm": 0.10505005717277527, "learning_rate": 1.4649394201644312e-05, "loss": 0.0063, "step": 346350 }, { "epoch": 10.705940532855289, "grad_norm": 0.09334531426429749, "learning_rate": 1.464893058045373e-05, "loss": 0.0058, "step": 346380 }, { "epoch": 10.706867775236447, "grad_norm": 0.11643274873495102, "learning_rate": 1.4648466959263152e-05, "loss": 0.0057, "step": 346410 }, { "epoch": 10.707795017617606, "grad_norm": 0.13036151230335236, "learning_rate": 1.4648003338072573e-05, "loss": 0.0062, "step": 346440 }, { "epoch": 10.708722259998764, "grad_norm": 0.1344020515680313, "learning_rate": 1.4647539716881993e-05, "loss": 0.0055, "step": 346470 }, { "epoch": 10.709649502379921, "grad_norm": 0.12662693858146667, "learning_rate": 1.4647076095691414e-05, "loss": 0.0063, "step": 346500 }, { "epoch": 10.710576744761081, "grad_norm": 0.10187558829784393, "learning_rate": 1.4646612474500836e-05, "loss": 0.0061, "step": 346530 }, { "epoch": 10.711503987142239, "grad_norm": 0.09041651338338852, "learning_rate": 1.4646148853310256e-05, "loss": 0.006, "step": 346560 }, { "epoch": 10.712431229523398, "grad_norm": 0.13439713418483734, "learning_rate": 1.4645685232119677e-05, "loss": 0.0057, "step": 346590 }, { "epoch": 10.713358471904556, "grad_norm": 0.08743676543235779, "learning_rate": 1.4645221610929099e-05, "loss": 0.0055, "step": 346620 }, { "epoch": 10.714285714285714, "grad_norm": 0.09373095631599426, "learning_rate": 1.4644757989738519e-05, "loss": 0.0064, "step": 346650 }, { "epoch": 10.715212956666873, "grad_norm": 0.09339682757854462, "learning_rate": 1.464429436854794e-05, "loss": 0.0062, "step": 346680 }, { "epoch": 10.71614019904803, "grad_norm": 0.12827998399734497, "learning_rate": 1.464383074735736e-05, "loss": 0.0051, "step": 346710 }, { "epoch": 10.71706744142919, "grad_norm": 0.09590603411197662, "learning_rate": 1.4643367126166781e-05, "loss": 0.0058, "step": 346740 }, { "epoch": 10.717994683810348, "grad_norm": 0.07975687831640244, "learning_rate": 1.4642903504976203e-05, "loss": 0.0058, "step": 346770 }, { "epoch": 10.718921926191506, "grad_norm": 0.16024987399578094, "learning_rate": 1.4642439883785621e-05, "loss": 0.006, "step": 346800 }, { "epoch": 10.719849168572665, "grad_norm": 0.17582295835018158, "learning_rate": 1.4641976262595044e-05, "loss": 0.0058, "step": 346830 }, { "epoch": 10.720776410953823, "grad_norm": 0.11712124198675156, "learning_rate": 1.4641512641404466e-05, "loss": 0.0064, "step": 346860 }, { "epoch": 10.721703653334982, "grad_norm": 0.09335067123174667, "learning_rate": 1.4641049020213884e-05, "loss": 0.0056, "step": 346890 }, { "epoch": 10.72263089571614, "grad_norm": 0.08575096726417542, "learning_rate": 1.4640585399023305e-05, "loss": 0.006, "step": 346920 }, { "epoch": 10.7235581380973, "grad_norm": 0.09339404106140137, "learning_rate": 1.4640121777832727e-05, "loss": 0.0061, "step": 346950 }, { "epoch": 10.724485380478457, "grad_norm": 0.16180117428302765, "learning_rate": 1.4639658156642147e-05, "loss": 0.0065, "step": 346980 }, { "epoch": 10.725412622859615, "grad_norm": 0.10972893983125687, "learning_rate": 1.4639194535451568e-05, "loss": 0.0059, "step": 347010 }, { "epoch": 10.726339865240774, "grad_norm": 0.08515827357769012, "learning_rate": 1.4638730914260988e-05, "loss": 0.0064, "step": 347040 }, { "epoch": 10.727267107621932, "grad_norm": 0.1115778312087059, "learning_rate": 1.463826729307041e-05, "loss": 0.0046, "step": 347070 }, { "epoch": 10.728194350003092, "grad_norm": 0.14194916188716888, "learning_rate": 1.4637803671879831e-05, "loss": 0.0066, "step": 347100 }, { "epoch": 10.72912159238425, "grad_norm": 0.08165298402309418, "learning_rate": 1.4637340050689251e-05, "loss": 0.0057, "step": 347130 }, { "epoch": 10.730048834765407, "grad_norm": 0.1252422332763672, "learning_rate": 1.4636876429498672e-05, "loss": 0.0064, "step": 347160 }, { "epoch": 10.730976077146567, "grad_norm": 0.1038009449839592, "learning_rate": 1.4636412808308094e-05, "loss": 0.0062, "step": 347190 }, { "epoch": 10.731903319527724, "grad_norm": 0.10918937623500824, "learning_rate": 1.4635949187117514e-05, "loss": 0.0056, "step": 347220 }, { "epoch": 10.732830561908884, "grad_norm": 0.16482585668563843, "learning_rate": 1.4635485565926935e-05, "loss": 0.006, "step": 347250 }, { "epoch": 10.733757804290041, "grad_norm": 0.17375101149082184, "learning_rate": 1.4635021944736357e-05, "loss": 0.0064, "step": 347280 }, { "epoch": 10.7346850466712, "grad_norm": 0.053776923567056656, "learning_rate": 1.4634558323545775e-05, "loss": 0.0061, "step": 347310 }, { "epoch": 10.735612289052359, "grad_norm": 0.06211100518703461, "learning_rate": 1.4634094702355196e-05, "loss": 0.0062, "step": 347340 }, { "epoch": 10.736539531433516, "grad_norm": 0.09938590973615646, "learning_rate": 1.463363108116462e-05, "loss": 0.0064, "step": 347370 }, { "epoch": 10.737466773814676, "grad_norm": 0.11543615162372589, "learning_rate": 1.4633167459974038e-05, "loss": 0.0057, "step": 347400 }, { "epoch": 10.738394016195834, "grad_norm": 0.11234132945537567, "learning_rate": 1.4632703838783459e-05, "loss": 0.0054, "step": 347430 }, { "epoch": 10.739321258576991, "grad_norm": 0.13507944345474243, "learning_rate": 1.4632240217592879e-05, "loss": 0.0063, "step": 347460 }, { "epoch": 10.74024850095815, "grad_norm": 0.10943221300840378, "learning_rate": 1.46317765964023e-05, "loss": 0.0055, "step": 347490 }, { "epoch": 10.741175743339308, "grad_norm": 0.11480111628770828, "learning_rate": 1.4631312975211722e-05, "loss": 0.0056, "step": 347520 }, { "epoch": 10.742102985720468, "grad_norm": 0.12715817987918854, "learning_rate": 1.4630849354021142e-05, "loss": 0.0068, "step": 347550 }, { "epoch": 10.743030228101626, "grad_norm": 0.06530588865280151, "learning_rate": 1.4630385732830563e-05, "loss": 0.0055, "step": 347580 }, { "epoch": 10.743957470482783, "grad_norm": 0.11643441021442413, "learning_rate": 1.4629922111639985e-05, "loss": 0.0058, "step": 347610 }, { "epoch": 10.744884712863943, "grad_norm": 0.10871893912553787, "learning_rate": 1.4629458490449405e-05, "loss": 0.0059, "step": 347640 }, { "epoch": 10.7458119552451, "grad_norm": 0.18953654170036316, "learning_rate": 1.4628994869258826e-05, "loss": 0.0064, "step": 347670 }, { "epoch": 10.74673919762626, "grad_norm": 0.08513487875461578, "learning_rate": 1.4628531248068248e-05, "loss": 0.0059, "step": 347700 }, { "epoch": 10.747666440007418, "grad_norm": 0.09767083078622818, "learning_rate": 1.4628067626877667e-05, "loss": 0.0054, "step": 347730 }, { "epoch": 10.748593682388577, "grad_norm": 0.0989520251750946, "learning_rate": 1.4627604005687089e-05, "loss": 0.006, "step": 347760 }, { "epoch": 10.749520924769735, "grad_norm": 0.09019187837839127, "learning_rate": 1.4627140384496507e-05, "loss": 0.006, "step": 347790 }, { "epoch": 10.750448167150893, "grad_norm": 0.1364995241165161, "learning_rate": 1.4626676763305929e-05, "loss": 0.0065, "step": 347820 }, { "epoch": 10.751375409532052, "grad_norm": 0.15480203926563263, "learning_rate": 1.462621314211535e-05, "loss": 0.0055, "step": 347850 }, { "epoch": 10.75230265191321, "grad_norm": 0.056555453687906265, "learning_rate": 1.462574952092477e-05, "loss": 0.0064, "step": 347880 }, { "epoch": 10.75322989429437, "grad_norm": 0.07904575020074844, "learning_rate": 1.4625285899734191e-05, "loss": 0.0057, "step": 347910 }, { "epoch": 10.754157136675527, "grad_norm": 0.12468887120485306, "learning_rate": 1.4624822278543613e-05, "loss": 0.0057, "step": 347940 }, { "epoch": 10.755084379056685, "grad_norm": 0.08949054032564163, "learning_rate": 1.4624358657353033e-05, "loss": 0.0059, "step": 347970 }, { "epoch": 10.756011621437844, "grad_norm": 0.11832012981176376, "learning_rate": 1.4623895036162454e-05, "loss": 0.0055, "step": 348000 }, { "epoch": 10.756938863819002, "grad_norm": 0.11696933954954147, "learning_rate": 1.4623431414971876e-05, "loss": 0.0065, "step": 348030 }, { "epoch": 10.757866106200161, "grad_norm": 0.0905463919043541, "learning_rate": 1.4622967793781296e-05, "loss": 0.0053, "step": 348060 }, { "epoch": 10.75879334858132, "grad_norm": 0.13370922207832336, "learning_rate": 1.4622504172590717e-05, "loss": 0.006, "step": 348090 }, { "epoch": 10.759720590962477, "grad_norm": 0.12744221091270447, "learning_rate": 1.4622040551400137e-05, "loss": 0.0057, "step": 348120 }, { "epoch": 10.760647833343636, "grad_norm": 0.14287903904914856, "learning_rate": 1.4621576930209558e-05, "loss": 0.0061, "step": 348150 }, { "epoch": 10.761575075724794, "grad_norm": 0.10012584179639816, "learning_rate": 1.462111330901898e-05, "loss": 0.006, "step": 348180 }, { "epoch": 10.762502318105954, "grad_norm": 0.11611252278089523, "learning_rate": 1.4620649687828398e-05, "loss": 0.0054, "step": 348210 }, { "epoch": 10.763429560487111, "grad_norm": 0.1140558198094368, "learning_rate": 1.4620186066637821e-05, "loss": 0.0058, "step": 348240 }, { "epoch": 10.764356802868269, "grad_norm": 0.08695538341999054, "learning_rate": 1.4619722445447243e-05, "loss": 0.0056, "step": 348270 }, { "epoch": 10.765284045249429, "grad_norm": 0.13033230602741241, "learning_rate": 1.461925882425666e-05, "loss": 0.0059, "step": 348300 }, { "epoch": 10.766211287630586, "grad_norm": 0.049845147877931595, "learning_rate": 1.4618795203066082e-05, "loss": 0.0055, "step": 348330 }, { "epoch": 10.767138530011746, "grad_norm": 0.14094963669776917, "learning_rate": 1.4618331581875504e-05, "loss": 0.0057, "step": 348360 }, { "epoch": 10.768065772392903, "grad_norm": 0.12428007274866104, "learning_rate": 1.4617867960684924e-05, "loss": 0.0057, "step": 348390 }, { "epoch": 10.768993014774061, "grad_norm": 0.14449474215507507, "learning_rate": 1.4617404339494345e-05, "loss": 0.0056, "step": 348420 }, { "epoch": 10.76992025715522, "grad_norm": 0.13020873069763184, "learning_rate": 1.4616940718303765e-05, "loss": 0.0059, "step": 348450 }, { "epoch": 10.770847499536378, "grad_norm": 0.09372727572917938, "learning_rate": 1.4616477097113186e-05, "loss": 0.0057, "step": 348480 }, { "epoch": 10.771774741917538, "grad_norm": 0.12254839390516281, "learning_rate": 1.4616013475922608e-05, "loss": 0.0062, "step": 348510 }, { "epoch": 10.772701984298696, "grad_norm": 0.07504083216190338, "learning_rate": 1.4615549854732028e-05, "loss": 0.006, "step": 348540 }, { "epoch": 10.773629226679855, "grad_norm": 0.13758452236652374, "learning_rate": 1.461508623354145e-05, "loss": 0.0053, "step": 348570 }, { "epoch": 10.774556469061013, "grad_norm": 0.10469920933246613, "learning_rate": 1.461462261235087e-05, "loss": 0.0057, "step": 348600 }, { "epoch": 10.77548371144217, "grad_norm": Infinity, "learning_rate": 1.4614174445199975e-05, "loss": 0.006, "step": 348630 }, { "epoch": 10.77641095382333, "grad_norm": 0.1278279572725296, "learning_rate": 1.4613710824009397e-05, "loss": 0.0055, "step": 348660 }, { "epoch": 10.777338196204488, "grad_norm": 0.12814421951770782, "learning_rate": 1.4613247202818818e-05, "loss": 0.0061, "step": 348690 }, { "epoch": 10.778265438585647, "grad_norm": 0.13882622122764587, "learning_rate": 1.4612783581628238e-05, "loss": 0.0057, "step": 348720 }, { "epoch": 10.779192680966805, "grad_norm": 0.1466182917356491, "learning_rate": 1.461231996043766e-05, "loss": 0.0066, "step": 348750 }, { "epoch": 10.780119923347963, "grad_norm": 0.13138560950756073, "learning_rate": 1.4611856339247081e-05, "loss": 0.0068, "step": 348780 }, { "epoch": 10.781047165729122, "grad_norm": 0.13225431740283966, "learning_rate": 1.4611392718056501e-05, "loss": 0.0055, "step": 348810 }, { "epoch": 10.78197440811028, "grad_norm": 0.11060165613889694, "learning_rate": 1.4610929096865922e-05, "loss": 0.0055, "step": 348840 }, { "epoch": 10.78290165049144, "grad_norm": 0.17861849069595337, "learning_rate": 1.4610465475675342e-05, "loss": 0.0055, "step": 348870 }, { "epoch": 10.783828892872597, "grad_norm": 0.09881556034088135, "learning_rate": 1.4610001854484764e-05, "loss": 0.0057, "step": 348900 }, { "epoch": 10.784756135253755, "grad_norm": 0.10771318525075912, "learning_rate": 1.4609538233294185e-05, "loss": 0.0055, "step": 348930 }, { "epoch": 10.785683377634914, "grad_norm": 0.09838307648897171, "learning_rate": 1.4609074612103603e-05, "loss": 0.0055, "step": 348960 }, { "epoch": 10.786610620016072, "grad_norm": 0.10509505122900009, "learning_rate": 1.4608610990913027e-05, "loss": 0.0059, "step": 348990 }, { "epoch": 10.787537862397231, "grad_norm": 0.08933589607477188, "learning_rate": 1.4608147369722448e-05, "loss": 0.0057, "step": 349020 }, { "epoch": 10.788465104778389, "grad_norm": 0.13767851889133453, "learning_rate": 1.4607683748531866e-05, "loss": 0.0059, "step": 349050 }, { "epoch": 10.789392347159547, "grad_norm": 0.1437893509864807, "learning_rate": 1.4607220127341288e-05, "loss": 0.006, "step": 349080 }, { "epoch": 10.790319589540706, "grad_norm": 0.1106128916144371, "learning_rate": 1.460675650615071e-05, "loss": 0.0057, "step": 349110 }, { "epoch": 10.791246831921864, "grad_norm": 0.10199877619743347, "learning_rate": 1.4606292884960129e-05, "loss": 0.0061, "step": 349140 }, { "epoch": 10.792174074303023, "grad_norm": 0.1499754637479782, "learning_rate": 1.460582926376955e-05, "loss": 0.0057, "step": 349170 }, { "epoch": 10.793101316684181, "grad_norm": 0.10746100544929504, "learning_rate": 1.460536564257897e-05, "loss": 0.006, "step": 349200 }, { "epoch": 10.794028559065339, "grad_norm": 0.14645981788635254, "learning_rate": 1.4604902021388392e-05, "loss": 0.0055, "step": 349230 }, { "epoch": 10.794955801446498, "grad_norm": 0.14411067962646484, "learning_rate": 1.4604438400197813e-05, "loss": 0.0055, "step": 349260 }, { "epoch": 10.795883043827656, "grad_norm": 0.08487439900636673, "learning_rate": 1.4603974779007233e-05, "loss": 0.0058, "step": 349290 }, { "epoch": 10.796810286208816, "grad_norm": 0.08231471478939056, "learning_rate": 1.4603511157816655e-05, "loss": 0.006, "step": 349320 }, { "epoch": 10.797737528589973, "grad_norm": 0.12399114668369293, "learning_rate": 1.4603047536626076e-05, "loss": 0.0058, "step": 349350 }, { "epoch": 10.798664770971133, "grad_norm": 0.10918474197387695, "learning_rate": 1.4602583915435496e-05, "loss": 0.006, "step": 349380 }, { "epoch": 10.79959201335229, "grad_norm": 0.13840611279010773, "learning_rate": 1.4602120294244918e-05, "loss": 0.006, "step": 349410 }, { "epoch": 10.800519255733448, "grad_norm": 0.1250373125076294, "learning_rate": 1.4601656673054339e-05, "loss": 0.0061, "step": 349440 }, { "epoch": 10.801446498114608, "grad_norm": 0.1283210664987564, "learning_rate": 1.4601193051863757e-05, "loss": 0.0053, "step": 349470 }, { "epoch": 10.802373740495765, "grad_norm": 0.15069685876369476, "learning_rate": 1.460072943067318e-05, "loss": 0.0053, "step": 349500 }, { "epoch": 10.803300982876923, "grad_norm": 0.10139212012290955, "learning_rate": 1.4600265809482598e-05, "loss": 0.0058, "step": 349530 }, { "epoch": 10.804228225258083, "grad_norm": 0.1053353026509285, "learning_rate": 1.459980218829202e-05, "loss": 0.0064, "step": 349560 }, { "epoch": 10.80515546763924, "grad_norm": 0.08137219399213791, "learning_rate": 1.4599338567101441e-05, "loss": 0.0051, "step": 349590 }, { "epoch": 10.8060827100204, "grad_norm": 0.10421590507030487, "learning_rate": 1.4598874945910861e-05, "loss": 0.0054, "step": 349620 }, { "epoch": 10.807009952401557, "grad_norm": 0.09363899379968643, "learning_rate": 1.4598411324720283e-05, "loss": 0.0062, "step": 349650 }, { "epoch": 10.807937194782717, "grad_norm": 0.12327735871076584, "learning_rate": 1.4597947703529704e-05, "loss": 0.006, "step": 349680 }, { "epoch": 10.808864437163875, "grad_norm": 0.0894947424530983, "learning_rate": 1.4597484082339124e-05, "loss": 0.0062, "step": 349710 }, { "epoch": 10.809791679545032, "grad_norm": 0.08948751538991928, "learning_rate": 1.4597020461148546e-05, "loss": 0.0063, "step": 349740 }, { "epoch": 10.810718921926192, "grad_norm": 0.08530933409929276, "learning_rate": 1.4596556839957967e-05, "loss": 0.0061, "step": 349770 }, { "epoch": 10.81164616430735, "grad_norm": 0.1309766322374344, "learning_rate": 1.4596093218767387e-05, "loss": 0.0066, "step": 349800 }, { "epoch": 10.812573406688509, "grad_norm": 0.09984597563743591, "learning_rate": 1.4595629597576808e-05, "loss": 0.0065, "step": 349830 }, { "epoch": 10.813500649069667, "grad_norm": 0.09525173902511597, "learning_rate": 1.4595165976386228e-05, "loss": 0.006, "step": 349860 }, { "epoch": 10.814427891450825, "grad_norm": 0.13200540840625763, "learning_rate": 1.459470235519565e-05, "loss": 0.0064, "step": 349890 }, { "epoch": 10.815355133831984, "grad_norm": 0.09592419862747192, "learning_rate": 1.4594238734005071e-05, "loss": 0.0052, "step": 349920 }, { "epoch": 10.816282376213142, "grad_norm": 0.11084771901369095, "learning_rate": 1.459377511281449e-05, "loss": 0.0056, "step": 349950 }, { "epoch": 10.817209618594301, "grad_norm": 0.11670900881290436, "learning_rate": 1.4593311491623911e-05, "loss": 0.006, "step": 349980 }, { "epoch": 10.817827780181739, "eval_f1": 0.9980881605501312, "eval_loss": 0.00658430578187108, "eval_precision": 0.9980870468468599, "eval_recall": 0.9980900723728231, "eval_runtime": 4179.3935, "eval_samples_per_second": 282.625, "eval_steps_per_second": 8.832, "step": 350000 }, { "epoch": 10.818136860975459, "grad_norm": 0.07266703248023987, "learning_rate": 1.4592863324473019e-05, "loss": 0.0063, "step": 350010 }, { "epoch": 10.819064103356617, "grad_norm": 0.08281983435153961, "learning_rate": 1.4592399703282439e-05, "loss": 0.0066, "step": 350040 }, { "epoch": 10.819991345737776, "grad_norm": 0.0993109866976738, "learning_rate": 1.459193608209186e-05, "loss": 0.0061, "step": 350070 }, { "epoch": 10.820918588118934, "grad_norm": 0.09855248034000397, "learning_rate": 1.4591472460901282e-05, "loss": 0.0056, "step": 350100 }, { "epoch": 10.821845830500093, "grad_norm": 0.15694528818130493, "learning_rate": 1.4591008839710701e-05, "loss": 0.0059, "step": 350130 }, { "epoch": 10.822773072881251, "grad_norm": 0.11238108575344086, "learning_rate": 1.4590545218520123e-05, "loss": 0.0059, "step": 350160 }, { "epoch": 10.82370031526241, "grad_norm": 0.15517720580101013, "learning_rate": 1.4590081597329544e-05, "loss": 0.0056, "step": 350190 }, { "epoch": 10.824627557643568, "grad_norm": 0.13125139474868774, "learning_rate": 1.4589617976138963e-05, "loss": 0.0062, "step": 350220 }, { "epoch": 10.825554800024726, "grad_norm": 0.10437696427106857, "learning_rate": 1.4589154354948386e-05, "loss": 0.0053, "step": 350250 }, { "epoch": 10.826482042405885, "grad_norm": 0.08705496042966843, "learning_rate": 1.4588690733757807e-05, "loss": 0.0062, "step": 350280 }, { "epoch": 10.827409284787043, "grad_norm": 0.11286725103855133, "learning_rate": 1.4588227112567225e-05, "loss": 0.0063, "step": 350310 }, { "epoch": 10.8283365271682, "grad_norm": 0.13300259411334991, "learning_rate": 1.4587763491376647e-05, "loss": 0.0052, "step": 350340 }, { "epoch": 10.82926376954936, "grad_norm": 0.06423031538724899, "learning_rate": 1.4587299870186067e-05, "loss": 0.0056, "step": 350370 }, { "epoch": 10.830191011930518, "grad_norm": 0.1345626562833786, "learning_rate": 1.4586836248995488e-05, "loss": 0.006, "step": 350400 }, { "epoch": 10.831118254311678, "grad_norm": 0.08470282703638077, "learning_rate": 1.458637262780491e-05, "loss": 0.0054, "step": 350430 }, { "epoch": 10.832045496692835, "grad_norm": 0.113343745470047, "learning_rate": 1.458590900661433e-05, "loss": 0.0056, "step": 350460 }, { "epoch": 10.832972739073995, "grad_norm": 0.10613878816366196, "learning_rate": 1.4585445385423751e-05, "loss": 0.0053, "step": 350490 }, { "epoch": 10.833899981455152, "grad_norm": 0.07023218274116516, "learning_rate": 1.4584981764233173e-05, "loss": 0.0064, "step": 350520 }, { "epoch": 10.83482722383631, "grad_norm": 0.08064621686935425, "learning_rate": 1.4584518143042592e-05, "loss": 0.0054, "step": 350550 }, { "epoch": 10.83575446621747, "grad_norm": 0.10612326860427856, "learning_rate": 1.4584054521852014e-05, "loss": 0.0057, "step": 350580 }, { "epoch": 10.836681708598627, "grad_norm": 0.09398150444030762, "learning_rate": 1.4583590900661435e-05, "loss": 0.0065, "step": 350610 }, { "epoch": 10.837608950979787, "grad_norm": 0.09317609667778015, "learning_rate": 1.4583127279470855e-05, "loss": 0.0053, "step": 350640 }, { "epoch": 10.838536193360945, "grad_norm": 0.095124751329422, "learning_rate": 1.4582663658280277e-05, "loss": 0.006, "step": 350670 }, { "epoch": 10.839463435742102, "grad_norm": 0.1002223789691925, "learning_rate": 1.4582200037089695e-05, "loss": 0.006, "step": 350700 }, { "epoch": 10.840390678123262, "grad_norm": 0.15122276544570923, "learning_rate": 1.4581736415899116e-05, "loss": 0.0057, "step": 350730 }, { "epoch": 10.84131792050442, "grad_norm": 0.0871715098619461, "learning_rate": 1.4581272794708538e-05, "loss": 0.0051, "step": 350760 }, { "epoch": 10.842245162885579, "grad_norm": 0.07532969117164612, "learning_rate": 1.4580809173517958e-05, "loss": 0.0061, "step": 350790 }, { "epoch": 10.843172405266737, "grad_norm": 0.12835824489593506, "learning_rate": 1.4580345552327379e-05, "loss": 0.0058, "step": 350820 }, { "epoch": 10.844099647647894, "grad_norm": 0.06497576832771301, "learning_rate": 1.45798819311368e-05, "loss": 0.0055, "step": 350850 }, { "epoch": 10.845026890029054, "grad_norm": 0.10660349577665329, "learning_rate": 1.457941830994622e-05, "loss": 0.0063, "step": 350880 }, { "epoch": 10.845954132410212, "grad_norm": 0.10776323080062866, "learning_rate": 1.4578954688755642e-05, "loss": 0.0055, "step": 350910 }, { "epoch": 10.846881374791371, "grad_norm": 0.25458037853240967, "learning_rate": 1.4578491067565063e-05, "loss": 0.006, "step": 350940 }, { "epoch": 10.847808617172529, "grad_norm": 0.13181912899017334, "learning_rate": 1.4578027446374483e-05, "loss": 0.0059, "step": 350970 }, { "epoch": 10.848735859553688, "grad_norm": 0.08448892831802368, "learning_rate": 1.4577563825183905e-05, "loss": 0.0053, "step": 351000 }, { "epoch": 10.849663101934846, "grad_norm": 0.0751737430691719, "learning_rate": 1.4577100203993325e-05, "loss": 0.0056, "step": 351030 }, { "epoch": 10.850590344316004, "grad_norm": 0.07099779695272446, "learning_rate": 1.4576636582802746e-05, "loss": 0.0059, "step": 351060 }, { "epoch": 10.851517586697163, "grad_norm": 0.11468223482370377, "learning_rate": 1.4576172961612168e-05, "loss": 0.0061, "step": 351090 }, { "epoch": 10.85244482907832, "grad_norm": 0.10116111487150192, "learning_rate": 1.4575709340421587e-05, "loss": 0.0059, "step": 351120 }, { "epoch": 10.853372071459479, "grad_norm": 0.07355953007936478, "learning_rate": 1.4575245719231009e-05, "loss": 0.0053, "step": 351150 }, { "epoch": 10.854299313840638, "grad_norm": 0.1746218055486679, "learning_rate": 1.457478209804043e-05, "loss": 0.0055, "step": 351180 }, { "epoch": 10.855226556221796, "grad_norm": 0.13160467147827148, "learning_rate": 1.4574318476849849e-05, "loss": 0.0055, "step": 351210 }, { "epoch": 10.856153798602955, "grad_norm": 0.22061708569526672, "learning_rate": 1.457385485565927e-05, "loss": 0.0054, "step": 351240 }, { "epoch": 10.857081040984113, "grad_norm": 0.11293817311525345, "learning_rate": 1.4573391234468692e-05, "loss": 0.0057, "step": 351270 }, { "epoch": 10.858008283365272, "grad_norm": 0.13459385931491852, "learning_rate": 1.4572927613278111e-05, "loss": 0.0063, "step": 351300 }, { "epoch": 10.85893552574643, "grad_norm": 0.15650074183940887, "learning_rate": 1.4572463992087533e-05, "loss": 0.0051, "step": 351330 }, { "epoch": 10.859862768127588, "grad_norm": 0.0886085256934166, "learning_rate": 1.4572000370896953e-05, "loss": 0.0063, "step": 351360 }, { "epoch": 10.860790010508747, "grad_norm": 0.1504759043455124, "learning_rate": 1.4571536749706374e-05, "loss": 0.0057, "step": 351390 }, { "epoch": 10.861717252889905, "grad_norm": 0.15153169631958008, "learning_rate": 1.4571073128515796e-05, "loss": 0.007, "step": 351420 }, { "epoch": 10.862644495271065, "grad_norm": 0.12047873437404633, "learning_rate": 1.4570609507325216e-05, "loss": 0.0054, "step": 351450 }, { "epoch": 10.863571737652222, "grad_norm": 0.07641997933387756, "learning_rate": 1.4570145886134637e-05, "loss": 0.0059, "step": 351480 }, { "epoch": 10.86449898003338, "grad_norm": 0.07633377611637115, "learning_rate": 1.4569682264944059e-05, "loss": 0.0061, "step": 351510 }, { "epoch": 10.86542622241454, "grad_norm": 0.13768750429153442, "learning_rate": 1.4569218643753478e-05, "loss": 0.0057, "step": 351540 }, { "epoch": 10.866353464795697, "grad_norm": 0.13292716443538666, "learning_rate": 1.45687550225629e-05, "loss": 0.006, "step": 351570 }, { "epoch": 10.867280707176857, "grad_norm": 0.08026513457298279, "learning_rate": 1.4568291401372321e-05, "loss": 0.0053, "step": 351600 }, { "epoch": 10.868207949558014, "grad_norm": 0.12783457338809967, "learning_rate": 1.456782778018174e-05, "loss": 0.0064, "step": 351630 }, { "epoch": 10.869135191939172, "grad_norm": 0.11125443875789642, "learning_rate": 1.4567364158991163e-05, "loss": 0.0056, "step": 351660 }, { "epoch": 10.870062434320332, "grad_norm": 0.20571869611740112, "learning_rate": 1.456690053780058e-05, "loss": 0.0062, "step": 351690 }, { "epoch": 10.87098967670149, "grad_norm": 0.10295373201370239, "learning_rate": 1.4566436916610002e-05, "loss": 0.0064, "step": 351720 }, { "epoch": 10.871916919082649, "grad_norm": 0.11905106902122498, "learning_rate": 1.4565973295419424e-05, "loss": 0.0058, "step": 351750 }, { "epoch": 10.872844161463807, "grad_norm": 0.0950203612446785, "learning_rate": 1.4565509674228844e-05, "loss": 0.0055, "step": 351780 }, { "epoch": 10.873771403844966, "grad_norm": 0.10574020445346832, "learning_rate": 1.4565046053038265e-05, "loss": 0.0062, "step": 351810 }, { "epoch": 10.874698646226124, "grad_norm": 0.1557956486940384, "learning_rate": 1.4564582431847687e-05, "loss": 0.0061, "step": 351840 }, { "epoch": 10.875625888607281, "grad_norm": 0.14880585670471191, "learning_rate": 1.4564118810657106e-05, "loss": 0.0052, "step": 351870 }, { "epoch": 10.876553130988441, "grad_norm": 0.09131638705730438, "learning_rate": 1.4563655189466528e-05, "loss": 0.0058, "step": 351900 }, { "epoch": 10.877480373369599, "grad_norm": 0.07679475843906403, "learning_rate": 1.456319156827595e-05, "loss": 0.006, "step": 351930 }, { "epoch": 10.878407615750756, "grad_norm": 0.09551321715116501, "learning_rate": 1.456272794708537e-05, "loss": 0.0063, "step": 351960 }, { "epoch": 10.879334858131916, "grad_norm": 0.18033015727996826, "learning_rate": 1.456226432589479e-05, "loss": 0.006, "step": 351990 }, { "epoch": 10.880262100513074, "grad_norm": 0.08118066936731339, "learning_rate": 1.456180070470421e-05, "loss": 0.0054, "step": 352020 }, { "epoch": 10.881189342894233, "grad_norm": 0.13216114044189453, "learning_rate": 1.4561337083513632e-05, "loss": 0.0051, "step": 352050 }, { "epoch": 10.88211658527539, "grad_norm": 0.1738165318965912, "learning_rate": 1.4560873462323054e-05, "loss": 0.0064, "step": 352080 }, { "epoch": 10.88304382765655, "grad_norm": 0.11411089450120926, "learning_rate": 1.4560409841132472e-05, "loss": 0.0057, "step": 352110 }, { "epoch": 10.883971070037708, "grad_norm": 0.08953256905078888, "learning_rate": 1.4559946219941893e-05, "loss": 0.0064, "step": 352140 }, { "epoch": 10.884898312418866, "grad_norm": 0.08922409266233444, "learning_rate": 1.4559482598751315e-05, "loss": 0.0057, "step": 352170 }, { "epoch": 10.885825554800025, "grad_norm": 0.10512620955705643, "learning_rate": 1.4559018977560735e-05, "loss": 0.0056, "step": 352200 }, { "epoch": 10.886752797181183, "grad_norm": 0.08207251876592636, "learning_rate": 1.4558555356370156e-05, "loss": 0.0061, "step": 352230 }, { "epoch": 10.887680039562342, "grad_norm": 0.08509353548288345, "learning_rate": 1.4558091735179578e-05, "loss": 0.006, "step": 352260 }, { "epoch": 10.8886072819435, "grad_norm": 0.10381447523832321, "learning_rate": 1.4557628113988997e-05, "loss": 0.006, "step": 352290 }, { "epoch": 10.889534524324658, "grad_norm": 0.15279556810855865, "learning_rate": 1.4557164492798419e-05, "loss": 0.0061, "step": 352320 }, { "epoch": 10.890461766705817, "grad_norm": 0.057332709431648254, "learning_rate": 1.4556700871607839e-05, "loss": 0.0064, "step": 352350 }, { "epoch": 10.891389009086975, "grad_norm": 0.15636278688907623, "learning_rate": 1.455623725041726e-05, "loss": 0.0054, "step": 352380 }, { "epoch": 10.892316251468134, "grad_norm": 0.0893678069114685, "learning_rate": 1.4555773629226682e-05, "loss": 0.0061, "step": 352410 }, { "epoch": 10.893243493849292, "grad_norm": 0.13026434183120728, "learning_rate": 1.4555310008036101e-05, "loss": 0.0056, "step": 352440 }, { "epoch": 10.89417073623045, "grad_norm": 0.145889014005661, "learning_rate": 1.4554846386845523e-05, "loss": 0.0056, "step": 352470 }, { "epoch": 10.89509797861161, "grad_norm": 0.08744151890277863, "learning_rate": 1.4554382765654945e-05, "loss": 0.0061, "step": 352500 }, { "epoch": 10.896025220992767, "grad_norm": 0.06662679463624954, "learning_rate": 1.4553919144464364e-05, "loss": 0.006, "step": 352530 }, { "epoch": 10.896952463373927, "grad_norm": 0.08850163221359253, "learning_rate": 1.4553455523273786e-05, "loss": 0.0057, "step": 352560 }, { "epoch": 10.897879705755084, "grad_norm": 0.1473352015018463, "learning_rate": 1.4552991902083207e-05, "loss": 0.0055, "step": 352590 }, { "epoch": 10.898806948136244, "grad_norm": 0.09198912233114243, "learning_rate": 1.4552528280892625e-05, "loss": 0.0062, "step": 352620 }, { "epoch": 10.899734190517401, "grad_norm": 0.13334278762340546, "learning_rate": 1.4552064659702047e-05, "loss": 0.0055, "step": 352650 }, { "epoch": 10.90066143289856, "grad_norm": 0.09347257018089294, "learning_rate": 1.4551601038511467e-05, "loss": 0.0059, "step": 352680 }, { "epoch": 10.901588675279719, "grad_norm": 0.13415494561195374, "learning_rate": 1.4551137417320888e-05, "loss": 0.006, "step": 352710 }, { "epoch": 10.902515917660876, "grad_norm": 0.13588853180408478, "learning_rate": 1.455067379613031e-05, "loss": 0.0061, "step": 352740 }, { "epoch": 10.903443160042034, "grad_norm": 0.148152694106102, "learning_rate": 1.455021017493973e-05, "loss": 0.0054, "step": 352770 }, { "epoch": 10.904370402423194, "grad_norm": 0.12440118938684464, "learning_rate": 1.4549746553749151e-05, "loss": 0.0057, "step": 352800 }, { "epoch": 10.905297644804351, "grad_norm": 0.17623519897460938, "learning_rate": 1.4549282932558573e-05, "loss": 0.0057, "step": 352830 }, { "epoch": 10.90622488718551, "grad_norm": 0.11869896203279495, "learning_rate": 1.4548819311367992e-05, "loss": 0.0058, "step": 352860 }, { "epoch": 10.907152129566668, "grad_norm": 0.09907718002796173, "learning_rate": 1.4548355690177414e-05, "loss": 0.0058, "step": 352890 }, { "epoch": 10.908079371947828, "grad_norm": 0.10867348313331604, "learning_rate": 1.4547892068986835e-05, "loss": 0.0059, "step": 352920 }, { "epoch": 10.909006614328986, "grad_norm": 0.12568646669387817, "learning_rate": 1.4547428447796255e-05, "loss": 0.0053, "step": 352950 }, { "epoch": 10.909933856710143, "grad_norm": 0.0749095156788826, "learning_rate": 1.4546964826605677e-05, "loss": 0.0055, "step": 352980 }, { "epoch": 10.910861099091303, "grad_norm": 0.12625934183597565, "learning_rate": 1.4546501205415098e-05, "loss": 0.0058, "step": 353010 }, { "epoch": 10.91178834147246, "grad_norm": 0.06922677904367447, "learning_rate": 1.4546037584224516e-05, "loss": 0.0058, "step": 353040 }, { "epoch": 10.91271558385362, "grad_norm": 0.12555648386478424, "learning_rate": 1.454557396303394e-05, "loss": 0.0055, "step": 353070 }, { "epoch": 10.913642826234778, "grad_norm": 0.08277145028114319, "learning_rate": 1.4545110341843358e-05, "loss": 0.0057, "step": 353100 }, { "epoch": 10.914570068615935, "grad_norm": 0.09020286053419113, "learning_rate": 1.454464672065278e-05, "loss": 0.005, "step": 353130 }, { "epoch": 10.915497310997095, "grad_norm": 0.0908655896782875, "learning_rate": 1.45441830994622e-05, "loss": 0.0064, "step": 353160 }, { "epoch": 10.916424553378253, "grad_norm": 0.1403597742319107, "learning_rate": 1.454371947827162e-05, "loss": 0.0051, "step": 353190 }, { "epoch": 10.917351795759412, "grad_norm": 0.08972632884979248, "learning_rate": 1.4543255857081042e-05, "loss": 0.0056, "step": 353220 }, { "epoch": 10.91827903814057, "grad_norm": 0.07014885544776917, "learning_rate": 1.4542792235890464e-05, "loss": 0.0057, "step": 353250 }, { "epoch": 10.919206280521728, "grad_norm": 0.057864297181367874, "learning_rate": 1.4542328614699883e-05, "loss": 0.0059, "step": 353280 }, { "epoch": 10.920133522902887, "grad_norm": 0.08486784994602203, "learning_rate": 1.4541864993509305e-05, "loss": 0.0056, "step": 353310 }, { "epoch": 10.921060765284045, "grad_norm": 0.16073192656040192, "learning_rate": 1.4541401372318726e-05, "loss": 0.0064, "step": 353340 }, { "epoch": 10.921988007665204, "grad_norm": 0.0892469584941864, "learning_rate": 1.4540937751128146e-05, "loss": 0.0056, "step": 353370 }, { "epoch": 10.922915250046362, "grad_norm": 0.17568299174308777, "learning_rate": 1.4540474129937568e-05, "loss": 0.006, "step": 353400 }, { "epoch": 10.923842492427521, "grad_norm": 0.07625900954008102, "learning_rate": 1.4540010508746987e-05, "loss": 0.0057, "step": 353430 }, { "epoch": 10.92476973480868, "grad_norm": 0.08084189891815186, "learning_rate": 1.4539546887556409e-05, "loss": 0.0051, "step": 353460 }, { "epoch": 10.925696977189837, "grad_norm": 0.11040126532316208, "learning_rate": 1.453908326636583e-05, "loss": 0.0056, "step": 353490 }, { "epoch": 10.926624219570996, "grad_norm": 0.07449222356081009, "learning_rate": 1.4538619645175249e-05, "loss": 0.0061, "step": 353520 }, { "epoch": 10.927551461952154, "grad_norm": 0.14551272988319397, "learning_rate": 1.453815602398467e-05, "loss": 0.0062, "step": 353550 }, { "epoch": 10.928478704333312, "grad_norm": 0.12448850274085999, "learning_rate": 1.4537692402794092e-05, "loss": 0.0056, "step": 353580 }, { "epoch": 10.929405946714471, "grad_norm": 0.13071583211421967, "learning_rate": 1.4537228781603511e-05, "loss": 0.0059, "step": 353610 }, { "epoch": 10.930333189095629, "grad_norm": 0.0946689322590828, "learning_rate": 1.4536765160412933e-05, "loss": 0.006, "step": 353640 }, { "epoch": 10.931260431476788, "grad_norm": 0.10840942710638046, "learning_rate": 1.4536301539222354e-05, "loss": 0.006, "step": 353670 }, { "epoch": 10.932187673857946, "grad_norm": 0.08792218565940857, "learning_rate": 1.4535837918031774e-05, "loss": 0.0055, "step": 353700 }, { "epoch": 10.933114916239106, "grad_norm": 0.1687113344669342, "learning_rate": 1.4535374296841196e-05, "loss": 0.0058, "step": 353730 }, { "epoch": 10.934042158620263, "grad_norm": 0.1506793051958084, "learning_rate": 1.4534910675650616e-05, "loss": 0.0054, "step": 353760 }, { "epoch": 10.934969401001421, "grad_norm": 0.059095028787851334, "learning_rate": 1.4534447054460037e-05, "loss": 0.0064, "step": 353790 }, { "epoch": 10.93589664338258, "grad_norm": 0.09597644954919815, "learning_rate": 1.4533983433269459e-05, "loss": 0.0057, "step": 353820 }, { "epoch": 10.936823885763738, "grad_norm": 0.10523030161857605, "learning_rate": 1.4533519812078878e-05, "loss": 0.0061, "step": 353850 }, { "epoch": 10.937751128144898, "grad_norm": 0.10998250544071198, "learning_rate": 1.45330561908883e-05, "loss": 0.005, "step": 353880 }, { "epoch": 10.938678370526056, "grad_norm": 0.09264729917049408, "learning_rate": 1.4532592569697721e-05, "loss": 0.0051, "step": 353910 }, { "epoch": 10.939605612907213, "grad_norm": 0.0802173763513565, "learning_rate": 1.4532128948507141e-05, "loss": 0.0055, "step": 353940 }, { "epoch": 10.940532855288373, "grad_norm": 0.10878768563270569, "learning_rate": 1.4531665327316563e-05, "loss": 0.0062, "step": 353970 }, { "epoch": 10.94146009766953, "grad_norm": 0.12574481964111328, "learning_rate": 1.4531201706125984e-05, "loss": 0.0059, "step": 354000 }, { "epoch": 10.94238734005069, "grad_norm": 0.12301818281412125, "learning_rate": 1.4530738084935402e-05, "loss": 0.0059, "step": 354030 }, { "epoch": 10.943314582431848, "grad_norm": 0.14636914432048798, "learning_rate": 1.4530274463744824e-05, "loss": 0.0058, "step": 354060 }, { "epoch": 10.944241824813005, "grad_norm": 0.10960905253887177, "learning_rate": 1.4529810842554244e-05, "loss": 0.0059, "step": 354090 }, { "epoch": 10.945169067194165, "grad_norm": 0.11068854480981827, "learning_rate": 1.4529347221363665e-05, "loss": 0.0061, "step": 354120 }, { "epoch": 10.946096309575323, "grad_norm": 0.08231282979249954, "learning_rate": 1.4528883600173087e-05, "loss": 0.0055, "step": 354150 }, { "epoch": 10.947023551956482, "grad_norm": 0.0829789862036705, "learning_rate": 1.4528419978982506e-05, "loss": 0.0064, "step": 354180 }, { "epoch": 10.94795079433764, "grad_norm": 0.09199004620313644, "learning_rate": 1.4527956357791928e-05, "loss": 0.0056, "step": 354210 }, { "epoch": 10.9488780367188, "grad_norm": 0.11587973684072495, "learning_rate": 1.452749273660135e-05, "loss": 0.0057, "step": 354240 }, { "epoch": 10.949805279099957, "grad_norm": 0.13515955209732056, "learning_rate": 1.452702911541077e-05, "loss": 0.006, "step": 354270 }, { "epoch": 10.950732521481115, "grad_norm": 0.0846065804362297, "learning_rate": 1.452656549422019e-05, "loss": 0.0055, "step": 354300 }, { "epoch": 10.951659763862274, "grad_norm": 0.11606954038143158, "learning_rate": 1.4526101873029612e-05, "loss": 0.0053, "step": 354330 }, { "epoch": 10.952587006243432, "grad_norm": 0.10303125530481339, "learning_rate": 1.4525638251839032e-05, "loss": 0.0056, "step": 354360 }, { "epoch": 10.95351424862459, "grad_norm": 0.11799956858158112, "learning_rate": 1.4525174630648454e-05, "loss": 0.0061, "step": 354390 }, { "epoch": 10.954441491005749, "grad_norm": 0.10249587148427963, "learning_rate": 1.4524711009457872e-05, "loss": 0.006, "step": 354420 }, { "epoch": 10.955368733386907, "grad_norm": 0.09086412936449051, "learning_rate": 1.4524247388267293e-05, "loss": 0.0059, "step": 354450 }, { "epoch": 10.956295975768066, "grad_norm": 0.07613720744848251, "learning_rate": 1.4523783767076716e-05, "loss": 0.0063, "step": 354480 }, { "epoch": 10.957223218149224, "grad_norm": 0.13967590034008026, "learning_rate": 1.4523320145886135e-05, "loss": 0.006, "step": 354510 }, { "epoch": 10.958150460530383, "grad_norm": 0.08122322708368301, "learning_rate": 1.4522856524695556e-05, "loss": 0.006, "step": 354540 }, { "epoch": 10.959077702911541, "grad_norm": 0.18120789527893066, "learning_rate": 1.4522408357544664e-05, "loss": 0.0057, "step": 354570 }, { "epoch": 10.960004945292699, "grad_norm": 0.08826358616352081, "learning_rate": 1.4521944736354084e-05, "loss": 0.0058, "step": 354600 }, { "epoch": 10.960932187673858, "grad_norm": 0.12875555455684662, "learning_rate": 1.4521481115163505e-05, "loss": 0.0057, "step": 354630 }, { "epoch": 10.961859430055016, "grad_norm": 0.08940675854682922, "learning_rate": 1.4521017493972927e-05, "loss": 0.0054, "step": 354660 }, { "epoch": 10.962786672436176, "grad_norm": 0.1937655657529831, "learning_rate": 1.4520553872782347e-05, "loss": 0.0065, "step": 354690 }, { "epoch": 10.963713914817333, "grad_norm": 0.050079576671123505, "learning_rate": 1.4520090251591768e-05, "loss": 0.0056, "step": 354720 }, { "epoch": 10.964641157198491, "grad_norm": 0.11701998859643936, "learning_rate": 1.451962663040119e-05, "loss": 0.006, "step": 354750 }, { "epoch": 10.96556839957965, "grad_norm": 0.08159836381673813, "learning_rate": 1.4519163009210608e-05, "loss": 0.0061, "step": 354780 }, { "epoch": 10.966495641960808, "grad_norm": 0.06164802610874176, "learning_rate": 1.451869938802003e-05, "loss": 0.006, "step": 354810 }, { "epoch": 10.967422884341968, "grad_norm": 0.097101129591465, "learning_rate": 1.4518235766829449e-05, "loss": 0.0065, "step": 354840 }, { "epoch": 10.968350126723125, "grad_norm": 0.09503050893545151, "learning_rate": 1.451777214563887e-05, "loss": 0.0059, "step": 354870 }, { "epoch": 10.969277369104283, "grad_norm": 0.13053078949451447, "learning_rate": 1.4517308524448292e-05, "loss": 0.006, "step": 354900 }, { "epoch": 10.970204611485443, "grad_norm": 0.09199227392673492, "learning_rate": 1.4516844903257712e-05, "loss": 0.0061, "step": 354930 }, { "epoch": 10.9711318538666, "grad_norm": 0.11238503456115723, "learning_rate": 1.4516381282067133e-05, "loss": 0.006, "step": 354960 }, { "epoch": 10.97205909624776, "grad_norm": 0.10768607258796692, "learning_rate": 1.4515917660876555e-05, "loss": 0.0057, "step": 354990 }, { "epoch": 10.972986338628917, "grad_norm": 0.09423354268074036, "learning_rate": 1.4515454039685975e-05, "loss": 0.0058, "step": 355020 }, { "epoch": 10.973913581010077, "grad_norm": 0.06229470297694206, "learning_rate": 1.4514990418495396e-05, "loss": 0.0062, "step": 355050 }, { "epoch": 10.974840823391235, "grad_norm": 0.0735345333814621, "learning_rate": 1.4514526797304818e-05, "loss": 0.0064, "step": 355080 }, { "epoch": 10.975768065772392, "grad_norm": 0.12404385209083557, "learning_rate": 1.4514063176114238e-05, "loss": 0.0057, "step": 355110 }, { "epoch": 10.976695308153552, "grad_norm": 0.16322103142738342, "learning_rate": 1.4513599554923659e-05, "loss": 0.0053, "step": 355140 }, { "epoch": 10.97762255053471, "grad_norm": 0.10305432975292206, "learning_rate": 1.4513135933733077e-05, "loss": 0.0056, "step": 355170 }, { "epoch": 10.978549792915867, "grad_norm": 0.1471758335828781, "learning_rate": 1.4512672312542499e-05, "loss": 0.006, "step": 355200 }, { "epoch": 10.979477035297027, "grad_norm": 0.13352690637111664, "learning_rate": 1.4512208691351922e-05, "loss": 0.0062, "step": 355230 }, { "epoch": 10.980404277678184, "grad_norm": 0.07501184195280075, "learning_rate": 1.451174507016134e-05, "loss": 0.0054, "step": 355260 }, { "epoch": 10.981331520059344, "grad_norm": 0.09535759687423706, "learning_rate": 1.4511281448970762e-05, "loss": 0.0064, "step": 355290 }, { "epoch": 10.982258762440502, "grad_norm": 0.09054501354694366, "learning_rate": 1.4510817827780183e-05, "loss": 0.0056, "step": 355320 }, { "epoch": 10.983186004821661, "grad_norm": 0.14352567493915558, "learning_rate": 1.4510354206589603e-05, "loss": 0.0058, "step": 355350 }, { "epoch": 10.984113247202819, "grad_norm": 0.12210534512996674, "learning_rate": 1.4509890585399024e-05, "loss": 0.0058, "step": 355380 }, { "epoch": 10.985040489583977, "grad_norm": 0.12121164798736572, "learning_rate": 1.4509426964208446e-05, "loss": 0.0059, "step": 355410 }, { "epoch": 10.985967731965136, "grad_norm": 0.12371654808521271, "learning_rate": 1.4508963343017866e-05, "loss": 0.0053, "step": 355440 }, { "epoch": 10.986894974346294, "grad_norm": 0.0806986466050148, "learning_rate": 1.4508499721827287e-05, "loss": 0.0057, "step": 355470 }, { "epoch": 10.987822216727453, "grad_norm": 0.11743951588869095, "learning_rate": 1.4508036100636707e-05, "loss": 0.0055, "step": 355500 }, { "epoch": 10.988749459108611, "grad_norm": 0.14457698166370392, "learning_rate": 1.4507572479446128e-05, "loss": 0.0059, "step": 355530 }, { "epoch": 10.989676701489769, "grad_norm": 0.08668765425682068, "learning_rate": 1.450710885825555e-05, "loss": 0.0055, "step": 355560 }, { "epoch": 10.990603943870928, "grad_norm": 0.05021408200263977, "learning_rate": 1.450664523706497e-05, "loss": 0.0046, "step": 355590 }, { "epoch": 10.991531186252086, "grad_norm": 0.10856585204601288, "learning_rate": 1.4506181615874391e-05, "loss": 0.0064, "step": 355620 }, { "epoch": 10.992458428633245, "grad_norm": 0.14580483734607697, "learning_rate": 1.4505717994683813e-05, "loss": 0.0061, "step": 355650 }, { "epoch": 10.993385671014403, "grad_norm": 0.11768047511577606, "learning_rate": 1.4505254373493231e-05, "loss": 0.0061, "step": 355680 }, { "epoch": 10.99431291339556, "grad_norm": 0.11388706415891647, "learning_rate": 1.4504790752302652e-05, "loss": 0.0064, "step": 355710 }, { "epoch": 10.99524015577672, "grad_norm": 0.14270992577075958, "learning_rate": 1.4504327131112076e-05, "loss": 0.0062, "step": 355740 }, { "epoch": 10.996167398157878, "grad_norm": 0.12251786887645721, "learning_rate": 1.4503863509921494e-05, "loss": 0.0053, "step": 355770 }, { "epoch": 10.997094640539038, "grad_norm": 0.11335193365812302, "learning_rate": 1.4503399888730915e-05, "loss": 0.006, "step": 355800 }, { "epoch": 10.998021882920195, "grad_norm": 0.12428199499845505, "learning_rate": 1.4502936267540335e-05, "loss": 0.0059, "step": 355830 }, { "epoch": 10.998949125301355, "grad_norm": 0.09668534249067307, "learning_rate": 1.4502472646349757e-05, "loss": 0.006, "step": 355860 }, { "epoch": 10.999876367682512, "grad_norm": 0.08192130923271179, "learning_rate": 1.4502009025159178e-05, "loss": 0.0063, "step": 355890 }, { "epoch": 11.00080361006367, "grad_norm": 0.08336547017097473, "learning_rate": 1.4501545403968598e-05, "loss": 0.0054, "step": 355920 }, { "epoch": 11.00173085244483, "grad_norm": 0.12696228921413422, "learning_rate": 1.450108178277802e-05, "loss": 0.0058, "step": 355950 }, { "epoch": 11.002658094825987, "grad_norm": 0.08822599053382874, "learning_rate": 1.4500618161587441e-05, "loss": 0.0054, "step": 355980 }, { "epoch": 11.003585337207147, "grad_norm": 0.12658365070819855, "learning_rate": 1.450015454039686e-05, "loss": 0.005, "step": 356010 }, { "epoch": 11.004512579588305, "grad_norm": 0.13567261397838593, "learning_rate": 1.4499690919206282e-05, "loss": 0.0044, "step": 356040 }, { "epoch": 11.005439821969462, "grad_norm": 0.08793080598115921, "learning_rate": 1.4499227298015704e-05, "loss": 0.0065, "step": 356070 }, { "epoch": 11.006367064350622, "grad_norm": 0.14632639288902283, "learning_rate": 1.4498763676825124e-05, "loss": 0.0056, "step": 356100 }, { "epoch": 11.00729430673178, "grad_norm": 0.1395646631717682, "learning_rate": 1.4498300055634545e-05, "loss": 0.0057, "step": 356130 }, { "epoch": 11.008221549112939, "grad_norm": 0.09936509281396866, "learning_rate": 1.4497836434443967e-05, "loss": 0.0054, "step": 356160 }, { "epoch": 11.009148791494097, "grad_norm": 0.06963544338941574, "learning_rate": 1.4497372813253385e-05, "loss": 0.0054, "step": 356190 }, { "epoch": 11.010076033875254, "grad_norm": 0.12496954947710037, "learning_rate": 1.4496909192062806e-05, "loss": 0.0063, "step": 356220 }, { "epoch": 11.011003276256414, "grad_norm": 0.23420965671539307, "learning_rate": 1.4496445570872226e-05, "loss": 0.0057, "step": 356250 }, { "epoch": 11.011930518637572, "grad_norm": 0.10599618405103683, "learning_rate": 1.4495981949681647e-05, "loss": 0.0056, "step": 356280 }, { "epoch": 11.012857761018731, "grad_norm": 0.14888978004455566, "learning_rate": 1.4495518328491069e-05, "loss": 0.0058, "step": 356310 }, { "epoch": 11.013785003399889, "grad_norm": 0.1067880168557167, "learning_rate": 1.4495054707300489e-05, "loss": 0.0059, "step": 356340 }, { "epoch": 11.014712245781046, "grad_norm": 0.1036602333188057, "learning_rate": 1.449459108610991e-05, "loss": 0.0049, "step": 356370 }, { "epoch": 11.015639488162206, "grad_norm": 0.1436978429555893, "learning_rate": 1.4494127464919332e-05, "loss": 0.0058, "step": 356400 }, { "epoch": 11.016566730543364, "grad_norm": 0.11120709776878357, "learning_rate": 1.4493663843728752e-05, "loss": 0.0057, "step": 356430 }, { "epoch": 11.017493972924523, "grad_norm": 0.099333755671978, "learning_rate": 1.4493200222538173e-05, "loss": 0.0059, "step": 356460 }, { "epoch": 11.01842121530568, "grad_norm": 0.08169715851545334, "learning_rate": 1.4492736601347595e-05, "loss": 0.005, "step": 356490 }, { "epoch": 11.019348457686839, "grad_norm": 0.11106928437948227, "learning_rate": 1.4492272980157014e-05, "loss": 0.0054, "step": 356520 }, { "epoch": 11.020275700067998, "grad_norm": 0.13979285955429077, "learning_rate": 1.4491809358966436e-05, "loss": 0.0059, "step": 356550 }, { "epoch": 11.021202942449156, "grad_norm": 0.09494137018918991, "learning_rate": 1.4491345737775854e-05, "loss": 0.0059, "step": 356580 }, { "epoch": 11.022130184830315, "grad_norm": 0.10304688662290573, "learning_rate": 1.4490882116585277e-05, "loss": 0.0055, "step": 356610 }, { "epoch": 11.023057427211473, "grad_norm": 0.12306475639343262, "learning_rate": 1.4490418495394699e-05, "loss": 0.0058, "step": 356640 }, { "epoch": 11.02398466959263, "grad_norm": 0.08722596615552902, "learning_rate": 1.4489954874204117e-05, "loss": 0.0057, "step": 356670 }, { "epoch": 11.02491191197379, "grad_norm": 0.059546057134866714, "learning_rate": 1.4489491253013538e-05, "loss": 0.0055, "step": 356700 }, { "epoch": 11.025839154354948, "grad_norm": 0.10365262627601624, "learning_rate": 1.448902763182296e-05, "loss": 0.0059, "step": 356730 }, { "epoch": 11.026766396736107, "grad_norm": 0.1256224811077118, "learning_rate": 1.448856401063238e-05, "loss": 0.0057, "step": 356760 }, { "epoch": 11.027693639117265, "grad_norm": 0.07415167987346649, "learning_rate": 1.4488100389441801e-05, "loss": 0.0054, "step": 356790 }, { "epoch": 11.028620881498425, "grad_norm": 0.1420895755290985, "learning_rate": 1.4487636768251223e-05, "loss": 0.005, "step": 356820 }, { "epoch": 11.029548123879582, "grad_norm": 0.07550894469022751, "learning_rate": 1.4487173147060643e-05, "loss": 0.0057, "step": 356850 }, { "epoch": 11.03047536626074, "grad_norm": 0.09152273833751678, "learning_rate": 1.4486709525870064e-05, "loss": 0.0066, "step": 356880 }, { "epoch": 11.0314026086419, "grad_norm": 0.11434576660394669, "learning_rate": 1.4486245904679484e-05, "loss": 0.0053, "step": 356910 }, { "epoch": 11.032329851023057, "grad_norm": 0.1273515522480011, "learning_rate": 1.4485782283488905e-05, "loss": 0.0058, "step": 356940 }, { "epoch": 11.033257093404217, "grad_norm": 0.13374121487140656, "learning_rate": 1.4485318662298327e-05, "loss": 0.0055, "step": 356970 }, { "epoch": 11.034184335785374, "grad_norm": 0.0886123925447464, "learning_rate": 1.4484855041107747e-05, "loss": 0.0061, "step": 357000 }, { "epoch": 11.035111578166532, "grad_norm": 0.14178846776485443, "learning_rate": 1.4484391419917168e-05, "loss": 0.0062, "step": 357030 }, { "epoch": 11.036038820547692, "grad_norm": 0.11169441044330597, "learning_rate": 1.448392779872659e-05, "loss": 0.006, "step": 357060 }, { "epoch": 11.03696606292885, "grad_norm": 0.0763213261961937, "learning_rate": 1.4483464177536008e-05, "loss": 0.0058, "step": 357090 }, { "epoch": 11.037893305310009, "grad_norm": 0.08157742768526077, "learning_rate": 1.448300055634543e-05, "loss": 0.0055, "step": 357120 }, { "epoch": 11.038820547691166, "grad_norm": 0.1145334392786026, "learning_rate": 1.4482536935154853e-05, "loss": 0.0056, "step": 357150 }, { "epoch": 11.039747790072324, "grad_norm": 0.05860274285078049, "learning_rate": 1.448207331396427e-05, "loss": 0.0057, "step": 357180 }, { "epoch": 11.040675032453484, "grad_norm": 0.1131146177649498, "learning_rate": 1.4481609692773692e-05, "loss": 0.0053, "step": 357210 }, { "epoch": 11.041602274834641, "grad_norm": 0.12500672042369843, "learning_rate": 1.4481146071583112e-05, "loss": 0.0054, "step": 357240 }, { "epoch": 11.042529517215801, "grad_norm": 0.03915470093488693, "learning_rate": 1.4480682450392533e-05, "loss": 0.0057, "step": 357270 }, { "epoch": 11.043456759596959, "grad_norm": 0.14248299598693848, "learning_rate": 1.4480218829201955e-05, "loss": 0.0057, "step": 357300 }, { "epoch": 11.044384001978116, "grad_norm": 0.10604985058307648, "learning_rate": 1.4479755208011375e-05, "loss": 0.0061, "step": 357330 }, { "epoch": 11.045311244359276, "grad_norm": 0.1256852000951767, "learning_rate": 1.4479291586820796e-05, "loss": 0.0059, "step": 357360 }, { "epoch": 11.046238486740434, "grad_norm": 0.1079665943980217, "learning_rate": 1.4478827965630218e-05, "loss": 0.0056, "step": 357390 }, { "epoch": 11.047165729121593, "grad_norm": 0.11825352162122726, "learning_rate": 1.4478364344439638e-05, "loss": 0.006, "step": 357420 }, { "epoch": 11.04809297150275, "grad_norm": 0.08178645372390747, "learning_rate": 1.4477900723249059e-05, "loss": 0.0056, "step": 357450 }, { "epoch": 11.049020213883908, "grad_norm": 0.12317213416099548, "learning_rate": 1.447743710205848e-05, "loss": 0.0059, "step": 357480 }, { "epoch": 11.049947456265068, "grad_norm": 0.09078598767518997, "learning_rate": 1.44769734808679e-05, "loss": 0.005, "step": 357510 }, { "epoch": 11.050874698646226, "grad_norm": 0.12540210783481598, "learning_rate": 1.4476509859677322e-05, "loss": 0.0056, "step": 357540 }, { "epoch": 11.051801941027385, "grad_norm": 0.09209748357534409, "learning_rate": 1.447604623848674e-05, "loss": 0.0051, "step": 357570 }, { "epoch": 11.052729183408543, "grad_norm": 0.07051052153110504, "learning_rate": 1.4475582617296162e-05, "loss": 0.0059, "step": 357600 }, { "epoch": 11.053656425789702, "grad_norm": 0.14726339280605316, "learning_rate": 1.4475118996105583e-05, "loss": 0.0058, "step": 357630 }, { "epoch": 11.05458366817086, "grad_norm": 0.12670329213142395, "learning_rate": 1.4474655374915003e-05, "loss": 0.0055, "step": 357660 }, { "epoch": 11.055510910552018, "grad_norm": 0.10802768915891647, "learning_rate": 1.4474191753724424e-05, "loss": 0.006, "step": 357690 }, { "epoch": 11.056438152933177, "grad_norm": 0.06651054322719574, "learning_rate": 1.4473728132533846e-05, "loss": 0.0061, "step": 357720 }, { "epoch": 11.057365395314335, "grad_norm": 0.1539018601179123, "learning_rate": 1.4473264511343266e-05, "loss": 0.0057, "step": 357750 }, { "epoch": 11.058292637695494, "grad_norm": 0.12473249435424805, "learning_rate": 1.4472800890152687e-05, "loss": 0.0058, "step": 357780 }, { "epoch": 11.059219880076652, "grad_norm": 0.1013927310705185, "learning_rate": 1.4472337268962109e-05, "loss": 0.0061, "step": 357810 }, { "epoch": 11.06014712245781, "grad_norm": 0.0885002538561821, "learning_rate": 1.4471873647771529e-05, "loss": 0.0054, "step": 357840 }, { "epoch": 11.06107436483897, "grad_norm": 0.12087825685739517, "learning_rate": 1.447141002658095e-05, "loss": 0.006, "step": 357870 }, { "epoch": 11.062001607220127, "grad_norm": 0.11964467167854309, "learning_rate": 1.447094640539037e-05, "loss": 0.0053, "step": 357900 }, { "epoch": 11.062928849601287, "grad_norm": 0.1413150429725647, "learning_rate": 1.4470482784199791e-05, "loss": 0.0057, "step": 357930 }, { "epoch": 11.063856091982444, "grad_norm": 0.11646905541419983, "learning_rate": 1.4470019163009213e-05, "loss": 0.0061, "step": 357960 }, { "epoch": 11.064783334363602, "grad_norm": 0.11440666764974594, "learning_rate": 1.4469555541818631e-05, "loss": 0.0062, "step": 357990 }, { "epoch": 11.065710576744761, "grad_norm": 0.1225864440202713, "learning_rate": 1.4469091920628054e-05, "loss": 0.0059, "step": 358020 }, { "epoch": 11.06663781912592, "grad_norm": 0.1628054976463318, "learning_rate": 1.4468628299437476e-05, "loss": 0.0054, "step": 358050 }, { "epoch": 11.067565061507079, "grad_norm": 0.08696436882019043, "learning_rate": 1.4468164678246894e-05, "loss": 0.0055, "step": 358080 }, { "epoch": 11.068492303888236, "grad_norm": 0.17345914244651794, "learning_rate": 1.4467701057056315e-05, "loss": 0.0056, "step": 358110 }, { "epoch": 11.069419546269394, "grad_norm": 0.06917047500610352, "learning_rate": 1.4467237435865737e-05, "loss": 0.0053, "step": 358140 }, { "epoch": 11.070346788650554, "grad_norm": 0.11595606058835983, "learning_rate": 1.4466773814675157e-05, "loss": 0.0061, "step": 358170 }, { "epoch": 11.071274031031711, "grad_norm": 0.07927954941987991, "learning_rate": 1.4466310193484578e-05, "loss": 0.0054, "step": 358200 }, { "epoch": 11.07220127341287, "grad_norm": 0.09149271249771118, "learning_rate": 1.4465846572293998e-05, "loss": 0.0057, "step": 358230 }, { "epoch": 11.073128515794028, "grad_norm": 0.07799574732780457, "learning_rate": 1.446538295110342e-05, "loss": 0.0062, "step": 358260 }, { "epoch": 11.074055758175186, "grad_norm": 0.11611583083868027, "learning_rate": 1.4464919329912841e-05, "loss": 0.006, "step": 358290 }, { "epoch": 11.074983000556346, "grad_norm": 0.07678649574518204, "learning_rate": 1.446445570872226e-05, "loss": 0.0064, "step": 358320 }, { "epoch": 11.075910242937503, "grad_norm": 0.06585008651018143, "learning_rate": 1.4463992087531682e-05, "loss": 0.0058, "step": 358350 }, { "epoch": 11.076837485318663, "grad_norm": 0.07967641204595566, "learning_rate": 1.4463528466341104e-05, "loss": 0.0058, "step": 358380 }, { "epoch": 11.07776472769982, "grad_norm": 0.10968825966119766, "learning_rate": 1.4463064845150524e-05, "loss": 0.0064, "step": 358410 }, { "epoch": 11.078691970080978, "grad_norm": 0.13896989822387695, "learning_rate": 1.4462601223959945e-05, "loss": 0.0053, "step": 358440 }, { "epoch": 11.079619212462138, "grad_norm": 0.13713037967681885, "learning_rate": 1.4462137602769367e-05, "loss": 0.006, "step": 358470 }, { "epoch": 11.080546454843295, "grad_norm": 0.11385286599397659, "learning_rate": 1.4461673981578785e-05, "loss": 0.0063, "step": 358500 }, { "epoch": 11.081473697224455, "grad_norm": 0.0910702720284462, "learning_rate": 1.4461210360388206e-05, "loss": 0.0056, "step": 358530 }, { "epoch": 11.082400939605613, "grad_norm": 0.1266879439353943, "learning_rate": 1.4460746739197626e-05, "loss": 0.0062, "step": 358560 }, { "epoch": 11.083328181986772, "grad_norm": 0.13135087490081787, "learning_rate": 1.4460283118007048e-05, "loss": 0.0054, "step": 358590 }, { "epoch": 11.08425542436793, "grad_norm": 0.11349406838417053, "learning_rate": 1.4459819496816469e-05, "loss": 0.0056, "step": 358620 }, { "epoch": 11.085182666749088, "grad_norm": 0.13356199860572815, "learning_rate": 1.4459355875625889e-05, "loss": 0.0058, "step": 358650 }, { "epoch": 11.086109909130247, "grad_norm": 0.11913179606199265, "learning_rate": 1.445889225443531e-05, "loss": 0.0057, "step": 358680 }, { "epoch": 11.087037151511405, "grad_norm": 0.10772319883108139, "learning_rate": 1.4458428633244732e-05, "loss": 0.0055, "step": 358710 }, { "epoch": 11.087964393892564, "grad_norm": 0.13748575747013092, "learning_rate": 1.4457965012054152e-05, "loss": 0.0063, "step": 358740 }, { "epoch": 11.088891636273722, "grad_norm": 0.07247834652662277, "learning_rate": 1.4457501390863573e-05, "loss": 0.0049, "step": 358770 }, { "epoch": 11.08981887865488, "grad_norm": 0.13623374700546265, "learning_rate": 1.4457053223712681e-05, "loss": 0.0066, "step": 358800 }, { "epoch": 11.09074612103604, "grad_norm": 0.10270147770643234, "learning_rate": 1.44565896025221e-05, "loss": 0.0063, "step": 358830 }, { "epoch": 11.091673363417197, "grad_norm": 0.12516523897647858, "learning_rate": 1.445612598133152e-05, "loss": 0.0055, "step": 358860 }, { "epoch": 11.092600605798356, "grad_norm": Infinity, "learning_rate": 1.4455677814180629e-05, "loss": 0.0056, "step": 358890 }, { "epoch": 11.093527848179514, "grad_norm": 0.1115487664937973, "learning_rate": 1.4455214192990048e-05, "loss": 0.0053, "step": 358920 }, { "epoch": 11.094455090560672, "grad_norm": 0.13004463911056519, "learning_rate": 1.445475057179947e-05, "loss": 0.0057, "step": 358950 }, { "epoch": 11.095382332941831, "grad_norm": 0.10105010867118835, "learning_rate": 1.4454286950608891e-05, "loss": 0.0054, "step": 358980 }, { "epoch": 11.096309575322989, "grad_norm": 0.13006477057933807, "learning_rate": 1.4453823329418311e-05, "loss": 0.0056, "step": 359010 }, { "epoch": 11.097236817704148, "grad_norm": 0.10497987270355225, "learning_rate": 1.4453359708227733e-05, "loss": 0.0058, "step": 359040 }, { "epoch": 11.098164060085306, "grad_norm": 0.09548797458410263, "learning_rate": 1.4452896087037154e-05, "loss": 0.0061, "step": 359070 }, { "epoch": 11.099091302466464, "grad_norm": 0.13066533207893372, "learning_rate": 1.4452432465846572e-05, "loss": 0.0058, "step": 359100 }, { "epoch": 11.100018544847623, "grad_norm": 0.11825308948755264, "learning_rate": 1.4451968844655994e-05, "loss": 0.0053, "step": 359130 }, { "epoch": 11.100945787228781, "grad_norm": 0.16514751315116882, "learning_rate": 1.4451505223465414e-05, "loss": 0.0055, "step": 359160 }, { "epoch": 11.10187302960994, "grad_norm": 0.11328119784593582, "learning_rate": 1.4451041602274835e-05, "loss": 0.0058, "step": 359190 }, { "epoch": 11.102800271991098, "grad_norm": 0.11103639751672745, "learning_rate": 1.4450577981084257e-05, "loss": 0.0053, "step": 359220 }, { "epoch": 11.103727514372256, "grad_norm": 0.11025114357471466, "learning_rate": 1.4450114359893677e-05, "loss": 0.0062, "step": 359250 }, { "epoch": 11.104654756753416, "grad_norm": 0.12427323311567307, "learning_rate": 1.4449650738703098e-05, "loss": 0.0054, "step": 359280 }, { "epoch": 11.105581999134573, "grad_norm": 0.1017773374915123, "learning_rate": 1.444918711751252e-05, "loss": 0.0056, "step": 359310 }, { "epoch": 11.106509241515733, "grad_norm": 0.1671295166015625, "learning_rate": 1.444872349632194e-05, "loss": 0.0056, "step": 359340 }, { "epoch": 11.10743648389689, "grad_norm": 0.11137412488460541, "learning_rate": 1.4448259875131361e-05, "loss": 0.0051, "step": 359370 }, { "epoch": 11.10836372627805, "grad_norm": 0.11306821554899216, "learning_rate": 1.4447796253940782e-05, "loss": 0.0061, "step": 359400 }, { "epoch": 11.109290968659208, "grad_norm": 0.1064894050359726, "learning_rate": 1.4447332632750202e-05, "loss": 0.006, "step": 359430 }, { "epoch": 11.110218211040365, "grad_norm": 0.13409559428691864, "learning_rate": 1.4446869011559624e-05, "loss": 0.0063, "step": 359460 }, { "epoch": 11.111145453421525, "grad_norm": 0.09072709828615189, "learning_rate": 1.4446405390369042e-05, "loss": 0.0055, "step": 359490 }, { "epoch": 11.112072695802683, "grad_norm": 0.1544792652130127, "learning_rate": 1.4445941769178465e-05, "loss": 0.0057, "step": 359520 }, { "epoch": 11.112999938183842, "grad_norm": 0.11080439388751984, "learning_rate": 1.4445478147987887e-05, "loss": 0.0051, "step": 359550 }, { "epoch": 11.113927180565, "grad_norm": 0.06625624001026154, "learning_rate": 1.4445014526797305e-05, "loss": 0.0054, "step": 359580 }, { "epoch": 11.114854422946157, "grad_norm": 0.07029537856578827, "learning_rate": 1.4444550905606726e-05, "loss": 0.0064, "step": 359610 }, { "epoch": 11.115781665327317, "grad_norm": 0.083712138235569, "learning_rate": 1.4444087284416148e-05, "loss": 0.0059, "step": 359640 }, { "epoch": 11.116708907708475, "grad_norm": 0.5876331329345703, "learning_rate": 1.4443623663225567e-05, "loss": 0.0061, "step": 359670 }, { "epoch": 11.117636150089634, "grad_norm": 0.1349412053823471, "learning_rate": 1.4443160042034989e-05, "loss": 0.0052, "step": 359700 }, { "epoch": 11.118563392470792, "grad_norm": 0.12815304100513458, "learning_rate": 1.444269642084441e-05, "loss": 0.0052, "step": 359730 }, { "epoch": 11.11949063485195, "grad_norm": 0.13907624781131744, "learning_rate": 1.444223279965383e-05, "loss": 0.0059, "step": 359760 }, { "epoch": 11.120417877233109, "grad_norm": 0.06505465507507324, "learning_rate": 1.4441769178463252e-05, "loss": 0.006, "step": 359790 }, { "epoch": 11.121345119614267, "grad_norm": 0.1281256377696991, "learning_rate": 1.4441305557272672e-05, "loss": 0.0051, "step": 359820 }, { "epoch": 11.122272361995426, "grad_norm": 0.09723541140556335, "learning_rate": 1.4440841936082093e-05, "loss": 0.0061, "step": 359850 }, { "epoch": 11.123199604376584, "grad_norm": 0.10009215027093887, "learning_rate": 1.4440378314891515e-05, "loss": 0.0062, "step": 359880 }, { "epoch": 11.124126846757742, "grad_norm": 0.10350629687309265, "learning_rate": 1.4439914693700934e-05, "loss": 0.0062, "step": 359910 }, { "epoch": 11.125054089138901, "grad_norm": 0.08063945919275284, "learning_rate": 1.4439451072510356e-05, "loss": 0.0058, "step": 359940 }, { "epoch": 11.125981331520059, "grad_norm": 0.12424749881029129, "learning_rate": 1.4438987451319777e-05, "loss": 0.0058, "step": 359970 }, { "epoch": 11.126908573901218, "grad_norm": 0.1285274177789688, "learning_rate": 1.4438523830129196e-05, "loss": 0.006, "step": 360000 }, { "epoch": 11.127835816282376, "grad_norm": 0.08307076245546341, "learning_rate": 1.4438060208938619e-05, "loss": 0.0053, "step": 360030 }, { "epoch": 11.128763058663534, "grad_norm": 0.11414144933223724, "learning_rate": 1.443759658774804e-05, "loss": 0.0059, "step": 360060 }, { "epoch": 11.129690301044693, "grad_norm": 0.0925496518611908, "learning_rate": 1.4437132966557458e-05, "loss": 0.0059, "step": 360090 }, { "epoch": 11.130617543425851, "grad_norm": 0.20431587100028992, "learning_rate": 1.443666934536688e-05, "loss": 0.0054, "step": 360120 }, { "epoch": 11.13154478580701, "grad_norm": 0.1323956996202469, "learning_rate": 1.44362057241763e-05, "loss": 0.0062, "step": 360150 }, { "epoch": 11.132472028188168, "grad_norm": 0.1249520406126976, "learning_rate": 1.4435742102985721e-05, "loss": 0.006, "step": 360180 }, { "epoch": 11.133399270569328, "grad_norm": 0.0803162083029747, "learning_rate": 1.4435278481795143e-05, "loss": 0.0052, "step": 360210 }, { "epoch": 11.134326512950485, "grad_norm": 0.08702103048563004, "learning_rate": 1.4434814860604563e-05, "loss": 0.0055, "step": 360240 }, { "epoch": 11.135253755331643, "grad_norm": 0.0661626085639, "learning_rate": 1.4434351239413984e-05, "loss": 0.0049, "step": 360270 }, { "epoch": 11.136180997712803, "grad_norm": 0.1621781587600708, "learning_rate": 1.4433887618223406e-05, "loss": 0.0064, "step": 360300 }, { "epoch": 11.13710824009396, "grad_norm": 0.08583945780992508, "learning_rate": 1.4433423997032825e-05, "loss": 0.0062, "step": 360330 }, { "epoch": 11.13803548247512, "grad_norm": 0.09440871328115463, "learning_rate": 1.4432960375842247e-05, "loss": 0.0051, "step": 360360 }, { "epoch": 11.138962724856277, "grad_norm": 0.12021759152412415, "learning_rate": 1.4432496754651668e-05, "loss": 0.0065, "step": 360390 }, { "epoch": 11.139889967237435, "grad_norm": 0.09713789075613022, "learning_rate": 1.4432033133461088e-05, "loss": 0.0057, "step": 360420 }, { "epoch": 11.140817209618595, "grad_norm": 0.08776593953371048, "learning_rate": 1.443156951227051e-05, "loss": 0.0053, "step": 360450 }, { "epoch": 11.141744451999752, "grad_norm": 0.08194795250892639, "learning_rate": 1.4431105891079928e-05, "loss": 0.0061, "step": 360480 }, { "epoch": 11.142671694380912, "grad_norm": 0.12571090459823608, "learning_rate": 1.4430657723929036e-05, "loss": 0.0054, "step": 360510 }, { "epoch": 11.14359893676207, "grad_norm": 0.20218485593795776, "learning_rate": 1.4430194102738457e-05, "loss": 0.0053, "step": 360540 }, { "epoch": 11.144526179143227, "grad_norm": 0.08021437376737595, "learning_rate": 1.4429730481547877e-05, "loss": 0.0056, "step": 360570 }, { "epoch": 11.145453421524387, "grad_norm": 0.09485387802124023, "learning_rate": 1.4429266860357299e-05, "loss": 0.0062, "step": 360600 }, { "epoch": 11.146380663905544, "grad_norm": 0.09888700395822525, "learning_rate": 1.442880323916672e-05, "loss": 0.0055, "step": 360630 }, { "epoch": 11.147307906286704, "grad_norm": 0.14998605847358704, "learning_rate": 1.442833961797614e-05, "loss": 0.0053, "step": 360660 }, { "epoch": 11.148235148667862, "grad_norm": 0.16585825383663177, "learning_rate": 1.4427875996785561e-05, "loss": 0.0055, "step": 360690 }, { "epoch": 11.14916239104902, "grad_norm": 0.08413979411125183, "learning_rate": 1.4427412375594983e-05, "loss": 0.0053, "step": 360720 }, { "epoch": 11.150089633430179, "grad_norm": 0.13060630857944489, "learning_rate": 1.4426948754404401e-05, "loss": 0.0059, "step": 360750 }, { "epoch": 11.151016875811337, "grad_norm": 0.08410421013832092, "learning_rate": 1.4426485133213824e-05, "loss": 0.0056, "step": 360780 }, { "epoch": 11.151944118192496, "grad_norm": 0.103472039103508, "learning_rate": 1.4426021512023246e-05, "loss": 0.0053, "step": 360810 }, { "epoch": 11.152871360573654, "grad_norm": 0.11531426757574081, "learning_rate": 1.4425557890832664e-05, "loss": 0.0064, "step": 360840 }, { "epoch": 11.153798602954812, "grad_norm": 0.1001739352941513, "learning_rate": 1.4425094269642085e-05, "loss": 0.0058, "step": 360870 }, { "epoch": 11.154725845335971, "grad_norm": 0.1442134529352188, "learning_rate": 1.4424630648451507e-05, "loss": 0.0062, "step": 360900 }, { "epoch": 11.155653087717129, "grad_norm": 0.09080793708562851, "learning_rate": 1.4424167027260927e-05, "loss": 0.006, "step": 360930 }, { "epoch": 11.156580330098288, "grad_norm": 0.14332380890846252, "learning_rate": 1.4423703406070348e-05, "loss": 0.0061, "step": 360960 }, { "epoch": 11.157507572479446, "grad_norm": 0.1542634814977646, "learning_rate": 1.4423239784879768e-05, "loss": 0.0064, "step": 360990 }, { "epoch": 11.158434814860605, "grad_norm": 0.07948320358991623, "learning_rate": 1.442277616368919e-05, "loss": 0.0053, "step": 361020 }, { "epoch": 11.159362057241763, "grad_norm": 0.09316620975732803, "learning_rate": 1.4422312542498611e-05, "loss": 0.005, "step": 361050 }, { "epoch": 11.16028929962292, "grad_norm": 0.10576778650283813, "learning_rate": 1.442184892130803e-05, "loss": 0.0058, "step": 361080 }, { "epoch": 11.16121654200408, "grad_norm": 0.09462788701057434, "learning_rate": 1.4421385300117452e-05, "loss": 0.0061, "step": 361110 }, { "epoch": 11.162143784385238, "grad_norm": 0.10037603974342346, "learning_rate": 1.4420921678926874e-05, "loss": 0.0058, "step": 361140 }, { "epoch": 11.163071026766398, "grad_norm": 0.09836374968290329, "learning_rate": 1.4420458057736294e-05, "loss": 0.0059, "step": 361170 }, { "epoch": 11.163998269147555, "grad_norm": 0.11059190332889557, "learning_rate": 1.4419994436545715e-05, "loss": 0.0053, "step": 361200 }, { "epoch": 11.164925511528713, "grad_norm": 0.061722058802843094, "learning_rate": 1.4419530815355137e-05, "loss": 0.0056, "step": 361230 }, { "epoch": 11.165852753909872, "grad_norm": 0.10028361529111862, "learning_rate": 1.4419067194164555e-05, "loss": 0.0059, "step": 361260 }, { "epoch": 11.16677999629103, "grad_norm": 0.16852900385856628, "learning_rate": 1.4418603572973976e-05, "loss": 0.0057, "step": 361290 }, { "epoch": 11.16770723867219, "grad_norm": 0.1476309448480606, "learning_rate": 1.4418139951783396e-05, "loss": 0.0062, "step": 361320 }, { "epoch": 11.168634481053347, "grad_norm": 0.13997970521450043, "learning_rate": 1.4417676330592818e-05, "loss": 0.0052, "step": 361350 }, { "epoch": 11.169561723434505, "grad_norm": 0.11055486649274826, "learning_rate": 1.4417212709402239e-05, "loss": 0.006, "step": 361380 }, { "epoch": 11.170488965815665, "grad_norm": 0.18865597248077393, "learning_rate": 1.4416749088211659e-05, "loss": 0.0057, "step": 361410 }, { "epoch": 11.171416208196822, "grad_norm": 0.07848392426967621, "learning_rate": 1.441628546702108e-05, "loss": 0.0058, "step": 361440 }, { "epoch": 11.172343450577982, "grad_norm": 0.12219765037298203, "learning_rate": 1.4415821845830502e-05, "loss": 0.0055, "step": 361470 }, { "epoch": 11.17327069295914, "grad_norm": 0.10376948863267899, "learning_rate": 1.4415358224639922e-05, "loss": 0.0057, "step": 361500 }, { "epoch": 11.174197935340297, "grad_norm": 0.07487918436527252, "learning_rate": 1.4414894603449343e-05, "loss": 0.0059, "step": 361530 }, { "epoch": 11.175125177721457, "grad_norm": 0.06274791806936264, "learning_rate": 1.4414430982258765e-05, "loss": 0.0053, "step": 361560 }, { "epoch": 11.176052420102614, "grad_norm": 0.07879682630300522, "learning_rate": 1.4413967361068185e-05, "loss": 0.0057, "step": 361590 }, { "epoch": 11.176979662483774, "grad_norm": 0.17715515196323395, "learning_rate": 1.4413503739877606e-05, "loss": 0.0061, "step": 361620 }, { "epoch": 11.177906904864932, "grad_norm": 0.10190079361200333, "learning_rate": 1.4413040118687026e-05, "loss": 0.0057, "step": 361650 }, { "epoch": 11.17883414724609, "grad_norm": 0.08065633475780487, "learning_rate": 1.4412576497496447e-05, "loss": 0.0061, "step": 361680 }, { "epoch": 11.179761389627249, "grad_norm": 0.1420493870973587, "learning_rate": 1.4412112876305869e-05, "loss": 0.0057, "step": 361710 }, { "epoch": 11.180688632008406, "grad_norm": 0.09458056092262268, "learning_rate": 1.4411649255115287e-05, "loss": 0.0049, "step": 361740 }, { "epoch": 11.181615874389566, "grad_norm": 0.05288723483681679, "learning_rate": 1.4411185633924708e-05, "loss": 0.0056, "step": 361770 }, { "epoch": 11.182543116770724, "grad_norm": 0.20029951632022858, "learning_rate": 1.441072201273413e-05, "loss": 0.0053, "step": 361800 }, { "epoch": 11.183470359151883, "grad_norm": 0.14248394966125488, "learning_rate": 1.441025839154355e-05, "loss": 0.0057, "step": 361830 }, { "epoch": 11.18439760153304, "grad_norm": 0.06131601333618164, "learning_rate": 1.4409794770352971e-05, "loss": 0.0057, "step": 361860 }, { "epoch": 11.185324843914199, "grad_norm": 0.1389792412519455, "learning_rate": 1.4409331149162393e-05, "loss": 0.005, "step": 361890 }, { "epoch": 11.186252086295358, "grad_norm": 0.10641239583492279, "learning_rate": 1.4408867527971813e-05, "loss": 0.0063, "step": 361920 }, { "epoch": 11.187179328676516, "grad_norm": 0.137344092130661, "learning_rate": 1.4408403906781234e-05, "loss": 0.0055, "step": 361950 }, { "epoch": 11.188106571057675, "grad_norm": 0.11892155557870865, "learning_rate": 1.4407940285590654e-05, "loss": 0.0056, "step": 361980 }, { "epoch": 11.189033813438833, "grad_norm": 0.1117018610239029, "learning_rate": 1.4407476664400075e-05, "loss": 0.0057, "step": 362010 }, { "epoch": 11.18996105581999, "grad_norm": 0.1610027551651001, "learning_rate": 1.4407013043209497e-05, "loss": 0.0053, "step": 362040 }, { "epoch": 11.19088829820115, "grad_norm": 0.12538015842437744, "learning_rate": 1.4406549422018917e-05, "loss": 0.006, "step": 362070 }, { "epoch": 11.191815540582308, "grad_norm": 0.08291540294885635, "learning_rate": 1.4406085800828338e-05, "loss": 0.0061, "step": 362100 }, { "epoch": 11.192742782963467, "grad_norm": 0.12862998247146606, "learning_rate": 1.440562217963776e-05, "loss": 0.0056, "step": 362130 }, { "epoch": 11.193670025344625, "grad_norm": 0.16118277609348297, "learning_rate": 1.4405158558447178e-05, "loss": 0.0054, "step": 362160 }, { "epoch": 11.194597267725783, "grad_norm": 0.11060936748981476, "learning_rate": 1.4404694937256601e-05, "loss": 0.0058, "step": 362190 }, { "epoch": 11.195524510106942, "grad_norm": 0.12422125786542892, "learning_rate": 1.4404231316066023e-05, "loss": 0.0053, "step": 362220 }, { "epoch": 11.1964517524881, "grad_norm": 0.1345101147890091, "learning_rate": 1.440376769487544e-05, "loss": 0.0053, "step": 362250 }, { "epoch": 11.19737899486926, "grad_norm": 0.12794673442840576, "learning_rate": 1.4403304073684862e-05, "loss": 0.0061, "step": 362280 }, { "epoch": 11.198306237250417, "grad_norm": 0.11529242992401123, "learning_rate": 1.4402840452494282e-05, "loss": 0.006, "step": 362310 }, { "epoch": 11.199233479631575, "grad_norm": 0.13968141376972198, "learning_rate": 1.4402376831303704e-05, "loss": 0.0057, "step": 362340 }, { "epoch": 11.200160722012734, "grad_norm": 0.11099324375391006, "learning_rate": 1.4401913210113125e-05, "loss": 0.0058, "step": 362370 }, { "epoch": 11.201087964393892, "grad_norm": 0.13143260776996613, "learning_rate": 1.4401449588922545e-05, "loss": 0.0048, "step": 362400 }, { "epoch": 11.202015206775052, "grad_norm": 0.25300824642181396, "learning_rate": 1.4400985967731966e-05, "loss": 0.0061, "step": 362430 }, { "epoch": 11.20294244915621, "grad_norm": 0.13497145473957062, "learning_rate": 1.4400522346541388e-05, "loss": 0.0058, "step": 362460 }, { "epoch": 11.203869691537367, "grad_norm": 0.08795695751905441, "learning_rate": 1.4400058725350808e-05, "loss": 0.0055, "step": 362490 }, { "epoch": 11.204796933918526, "grad_norm": 0.1040627583861351, "learning_rate": 1.439959510416023e-05, "loss": 0.0051, "step": 362520 }, { "epoch": 11.205724176299684, "grad_norm": 0.0987425222992897, "learning_rate": 1.439913148296965e-05, "loss": 0.0062, "step": 362550 }, { "epoch": 11.206651418680844, "grad_norm": 0.08500958979129791, "learning_rate": 1.439866786177907e-05, "loss": 0.0058, "step": 362580 }, { "epoch": 11.207578661062001, "grad_norm": 0.10175954550504684, "learning_rate": 1.4398204240588492e-05, "loss": 0.0055, "step": 362610 }, { "epoch": 11.208505903443161, "grad_norm": 0.1470656543970108, "learning_rate": 1.439774061939791e-05, "loss": 0.0057, "step": 362640 }, { "epoch": 11.209433145824319, "grad_norm": 0.3623496890068054, "learning_rate": 1.4397276998207332e-05, "loss": 0.0063, "step": 362670 }, { "epoch": 11.210360388205476, "grad_norm": 0.09781212359666824, "learning_rate": 1.4396813377016753e-05, "loss": 0.0061, "step": 362700 }, { "epoch": 11.211287630586636, "grad_norm": 0.15968000888824463, "learning_rate": 1.4396349755826173e-05, "loss": 0.0051, "step": 362730 }, { "epoch": 11.212214872967794, "grad_norm": 0.10665890574455261, "learning_rate": 1.4395886134635594e-05, "loss": 0.0057, "step": 362760 }, { "epoch": 11.213142115348953, "grad_norm": 0.09995010495185852, "learning_rate": 1.4395422513445016e-05, "loss": 0.0055, "step": 362790 }, { "epoch": 11.21406935773011, "grad_norm": 0.07633348554372787, "learning_rate": 1.4394958892254436e-05, "loss": 0.0057, "step": 362820 }, { "epoch": 11.214996600111268, "grad_norm": 0.08539199829101562, "learning_rate": 1.4394495271063857e-05, "loss": 0.0053, "step": 362850 }, { "epoch": 11.215923842492428, "grad_norm": 0.14448848366737366, "learning_rate": 1.4394031649873279e-05, "loss": 0.0061, "step": 362880 }, { "epoch": 11.216851084873586, "grad_norm": 0.09966439008712769, "learning_rate": 1.4393568028682699e-05, "loss": 0.0056, "step": 362910 }, { "epoch": 11.217778327254745, "grad_norm": 0.11756695806980133, "learning_rate": 1.439310440749212e-05, "loss": 0.0054, "step": 362940 }, { "epoch": 11.218705569635903, "grad_norm": 0.13345041871070862, "learning_rate": 1.439264078630154e-05, "loss": 0.0053, "step": 362970 }, { "epoch": 11.21963281201706, "grad_norm": 0.10501585900783539, "learning_rate": 1.4392177165110961e-05, "loss": 0.0066, "step": 363000 }, { "epoch": 11.22056005439822, "grad_norm": 0.10020504146814346, "learning_rate": 1.4391713543920383e-05, "loss": 0.0054, "step": 363030 }, { "epoch": 11.221487296779378, "grad_norm": 0.14284078776836395, "learning_rate": 1.4391249922729803e-05, "loss": 0.0059, "step": 363060 }, { "epoch": 11.222414539160537, "grad_norm": 0.17073649168014526, "learning_rate": 1.4390786301539224e-05, "loss": 0.0057, "step": 363090 }, { "epoch": 11.223341781541695, "grad_norm": 0.09695462882518768, "learning_rate": 1.4390322680348646e-05, "loss": 0.0051, "step": 363120 }, { "epoch": 11.224269023922853, "grad_norm": 0.14806006848812103, "learning_rate": 1.4389859059158064e-05, "loss": 0.0054, "step": 363150 }, { "epoch": 11.225196266304012, "grad_norm": 0.12431363761425018, "learning_rate": 1.4389395437967485e-05, "loss": 0.0051, "step": 363180 }, { "epoch": 11.22612350868517, "grad_norm": 0.12740953266620636, "learning_rate": 1.4388931816776907e-05, "loss": 0.0059, "step": 363210 }, { "epoch": 11.22705075106633, "grad_norm": 0.11601702868938446, "learning_rate": 1.4388468195586327e-05, "loss": 0.0058, "step": 363240 }, { "epoch": 11.227977993447487, "grad_norm": 0.0637834370136261, "learning_rate": 1.4388004574395748e-05, "loss": 0.0052, "step": 363270 }, { "epoch": 11.228905235828645, "grad_norm": 0.13230077922344208, "learning_rate": 1.4387540953205168e-05, "loss": 0.0049, "step": 363300 }, { "epoch": 11.229832478209804, "grad_norm": 0.11919475346803665, "learning_rate": 1.438707733201459e-05, "loss": 0.0059, "step": 363330 }, { "epoch": 11.230759720590962, "grad_norm": 0.08723762631416321, "learning_rate": 1.4386613710824011e-05, "loss": 0.0054, "step": 363360 }, { "epoch": 11.231686962972121, "grad_norm": 0.14133940637111664, "learning_rate": 1.438615008963343e-05, "loss": 0.0067, "step": 363390 }, { "epoch": 11.23261420535328, "grad_norm": 0.15183348953723907, "learning_rate": 1.4385686468442852e-05, "loss": 0.0062, "step": 363420 }, { "epoch": 11.233541447734439, "grad_norm": 0.11158976703882217, "learning_rate": 1.4385222847252274e-05, "loss": 0.0057, "step": 363450 }, { "epoch": 11.234468690115596, "grad_norm": 0.14355586469173431, "learning_rate": 1.4384759226061694e-05, "loss": 0.0055, "step": 363480 }, { "epoch": 11.235395932496754, "grad_norm": 0.14936576783657074, "learning_rate": 1.4384295604871115e-05, "loss": 0.0058, "step": 363510 }, { "epoch": 11.236323174877914, "grad_norm": 0.10985404253005981, "learning_rate": 1.4383831983680537e-05, "loss": 0.0054, "step": 363540 }, { "epoch": 11.237250417259071, "grad_norm": 0.07825934886932373, "learning_rate": 1.4383368362489955e-05, "loss": 0.0063, "step": 363570 }, { "epoch": 11.23817765964023, "grad_norm": 0.09161961078643799, "learning_rate": 1.4382904741299378e-05, "loss": 0.006, "step": 363600 }, { "epoch": 11.239104902021388, "grad_norm": 0.11938930302858353, "learning_rate": 1.4382441120108796e-05, "loss": 0.006, "step": 363630 }, { "epoch": 11.240032144402546, "grad_norm": 0.15082338452339172, "learning_rate": 1.4381977498918218e-05, "loss": 0.0051, "step": 363660 }, { "epoch": 11.240959386783706, "grad_norm": 0.09013812243938446, "learning_rate": 1.4381513877727639e-05, "loss": 0.0058, "step": 363690 }, { "epoch": 11.241886629164863, "grad_norm": 0.11903748661279678, "learning_rate": 1.4381050256537059e-05, "loss": 0.0055, "step": 363720 }, { "epoch": 11.242813871546023, "grad_norm": 0.10583433508872986, "learning_rate": 1.438058663534648e-05, "loss": 0.0059, "step": 363750 }, { "epoch": 11.24374111392718, "grad_norm": 0.11837399750947952, "learning_rate": 1.4380123014155902e-05, "loss": 0.0059, "step": 363780 }, { "epoch": 11.244668356308338, "grad_norm": 0.14035221934318542, "learning_rate": 1.4379659392965322e-05, "loss": 0.006, "step": 363810 }, { "epoch": 11.245595598689498, "grad_norm": 0.10276748985052109, "learning_rate": 1.4379195771774743e-05, "loss": 0.0052, "step": 363840 }, { "epoch": 11.246522841070655, "grad_norm": 0.09879089146852493, "learning_rate": 1.4378732150584165e-05, "loss": 0.0059, "step": 363870 }, { "epoch": 11.247450083451815, "grad_norm": 0.10632266104221344, "learning_rate": 1.4378268529393585e-05, "loss": 0.0058, "step": 363900 }, { "epoch": 11.248377325832973, "grad_norm": 0.2372383028268814, "learning_rate": 1.4377804908203006e-05, "loss": 0.0058, "step": 363930 }, { "epoch": 11.24930456821413, "grad_norm": 0.1256473809480667, "learning_rate": 1.4377341287012426e-05, "loss": 0.0054, "step": 363960 }, { "epoch": 11.25023181059529, "grad_norm": 0.09117105603218079, "learning_rate": 1.4376877665821847e-05, "loss": 0.0056, "step": 363990 }, { "epoch": 11.251159052976448, "grad_norm": 0.11576028913259506, "learning_rate": 1.4376414044631269e-05, "loss": 0.0055, "step": 364020 }, { "epoch": 11.252086295357607, "grad_norm": 0.14571307599544525, "learning_rate": 1.4375950423440687e-05, "loss": 0.0063, "step": 364050 }, { "epoch": 11.253013537738765, "grad_norm": 0.07855435460805893, "learning_rate": 1.4375486802250109e-05, "loss": 0.0057, "step": 364080 }, { "epoch": 11.253940780119922, "grad_norm": 0.08221746236085892, "learning_rate": 1.437502318105953e-05, "loss": 0.0049, "step": 364110 }, { "epoch": 11.254868022501082, "grad_norm": 0.11714848130941391, "learning_rate": 1.437455955986895e-05, "loss": 0.0055, "step": 364140 }, { "epoch": 11.25579526488224, "grad_norm": 0.13754208385944366, "learning_rate": 1.4374095938678371e-05, "loss": 0.0059, "step": 364170 }, { "epoch": 11.2567225072634, "grad_norm": 0.0891517624258995, "learning_rate": 1.4373632317487793e-05, "loss": 0.0059, "step": 364200 }, { "epoch": 11.257649749644557, "grad_norm": 0.08746138215065002, "learning_rate": 1.4373168696297213e-05, "loss": 0.0056, "step": 364230 }, { "epoch": 11.258576992025716, "grad_norm": 0.09491046518087387, "learning_rate": 1.4372705075106634e-05, "loss": 0.0058, "step": 364260 }, { "epoch": 11.259504234406874, "grad_norm": 0.07948461920022964, "learning_rate": 1.4372241453916054e-05, "loss": 0.0055, "step": 364290 }, { "epoch": 11.260431476788032, "grad_norm": 0.10928118973970413, "learning_rate": 1.4371777832725475e-05, "loss": 0.0059, "step": 364320 }, { "epoch": 11.261358719169191, "grad_norm": 0.09371007978916168, "learning_rate": 1.4371314211534897e-05, "loss": 0.0055, "step": 364350 }, { "epoch": 11.262285961550349, "grad_norm": 0.1364481896162033, "learning_rate": 1.4370850590344317e-05, "loss": 0.0061, "step": 364380 }, { "epoch": 11.263213203931508, "grad_norm": 0.09423842281103134, "learning_rate": 1.4370386969153738e-05, "loss": 0.0051, "step": 364410 }, { "epoch": 11.264140446312666, "grad_norm": 0.14452920854091644, "learning_rate": 1.436992334796316e-05, "loss": 0.0054, "step": 364440 }, { "epoch": 11.265067688693824, "grad_norm": 0.1565174013376236, "learning_rate": 1.436945972677258e-05, "loss": 0.0058, "step": 364470 }, { "epoch": 11.265994931074983, "grad_norm": 0.10921034216880798, "learning_rate": 1.4368996105582001e-05, "loss": 0.006, "step": 364500 }, { "epoch": 11.266922173456141, "grad_norm": 0.12427497655153275, "learning_rate": 1.4368532484391423e-05, "loss": 0.0063, "step": 364530 }, { "epoch": 11.2678494158373, "grad_norm": 0.10285371541976929, "learning_rate": 1.436806886320084e-05, "loss": 0.0057, "step": 364560 }, { "epoch": 11.268776658218458, "grad_norm": 0.11865274608135223, "learning_rate": 1.4367605242010262e-05, "loss": 0.0057, "step": 364590 }, { "epoch": 11.269703900599616, "grad_norm": 0.0905369371175766, "learning_rate": 1.436715707485937e-05, "loss": 0.0064, "step": 364620 }, { "epoch": 11.270631142980776, "grad_norm": 0.12649376690387726, "learning_rate": 1.436669345366879e-05, "loss": 0.0062, "step": 364650 }, { "epoch": 11.271558385361933, "grad_norm": 0.15350060164928436, "learning_rate": 1.4366229832478212e-05, "loss": 0.0057, "step": 364680 }, { "epoch": 11.272485627743093, "grad_norm": 0.12579001486301422, "learning_rate": 1.4365766211287633e-05, "loss": 0.0061, "step": 364710 }, { "epoch": 11.27341287012425, "grad_norm": 0.136626198887825, "learning_rate": 1.4365302590097053e-05, "loss": 0.0056, "step": 364740 }, { "epoch": 11.274340112505408, "grad_norm": 0.12467681616544724, "learning_rate": 1.4364838968906474e-05, "loss": 0.0053, "step": 364770 }, { "epoch": 11.275267354886568, "grad_norm": 0.11907083541154861, "learning_rate": 1.4364375347715892e-05, "loss": 0.0062, "step": 364800 }, { "epoch": 11.276194597267725, "grad_norm": 0.12080457806587219, "learning_rate": 1.4363911726525314e-05, "loss": 0.0058, "step": 364830 }, { "epoch": 11.277121839648885, "grad_norm": 0.13229575753211975, "learning_rate": 1.4363448105334737e-05, "loss": 0.0054, "step": 364860 }, { "epoch": 11.278049082030043, "grad_norm": 0.09722647815942764, "learning_rate": 1.4362984484144155e-05, "loss": 0.0054, "step": 364890 }, { "epoch": 11.2789763244112, "grad_norm": 0.0998113825917244, "learning_rate": 1.4362520862953577e-05, "loss": 0.0055, "step": 364920 }, { "epoch": 11.27990356679236, "grad_norm": 0.15979839861392975, "learning_rate": 1.4362057241762998e-05, "loss": 0.0055, "step": 364950 }, { "epoch": 11.280830809173517, "grad_norm": 0.12735123932361603, "learning_rate": 1.4361593620572418e-05, "loss": 0.0053, "step": 364980 }, { "epoch": 11.281758051554677, "grad_norm": 0.09624293446540833, "learning_rate": 1.436112999938184e-05, "loss": 0.0058, "step": 365010 }, { "epoch": 11.282685293935835, "grad_norm": 0.07107161730527878, "learning_rate": 1.4360666378191261e-05, "loss": 0.006, "step": 365040 }, { "epoch": 11.283612536316994, "grad_norm": 0.05853278189897537, "learning_rate": 1.4360202757000681e-05, "loss": 0.0055, "step": 365070 }, { "epoch": 11.284539778698152, "grad_norm": 0.07529778778553009, "learning_rate": 1.4359739135810102e-05, "loss": 0.0054, "step": 365100 }, { "epoch": 11.28546702107931, "grad_norm": 0.17424127459526062, "learning_rate": 1.4359275514619522e-05, "loss": 0.0057, "step": 365130 }, { "epoch": 11.286394263460469, "grad_norm": 0.1057325005531311, "learning_rate": 1.4358811893428944e-05, "loss": 0.0057, "step": 365160 }, { "epoch": 11.287321505841627, "grad_norm": 0.08590853214263916, "learning_rate": 1.4358348272238365e-05, "loss": 0.0058, "step": 365190 }, { "epoch": 11.288248748222786, "grad_norm": 0.11362161487340927, "learning_rate": 1.4357884651047785e-05, "loss": 0.0058, "step": 365220 }, { "epoch": 11.289175990603944, "grad_norm": 0.11775295436382294, "learning_rate": 1.4357421029857207e-05, "loss": 0.0053, "step": 365250 }, { "epoch": 11.290103232985102, "grad_norm": 0.11238348484039307, "learning_rate": 1.4356957408666628e-05, "loss": 0.0056, "step": 365280 }, { "epoch": 11.291030475366261, "grad_norm": 0.12953059375286102, "learning_rate": 1.4356493787476046e-05, "loss": 0.0065, "step": 365310 }, { "epoch": 11.291957717747419, "grad_norm": 0.12211544066667557, "learning_rate": 1.4356030166285468e-05, "loss": 0.0053, "step": 365340 }, { "epoch": 11.292884960128578, "grad_norm": 0.12489594519138336, "learning_rate": 1.435556654509489e-05, "loss": 0.0058, "step": 365370 }, { "epoch": 11.293812202509736, "grad_norm": 0.13776475191116333, "learning_rate": 1.4355102923904309e-05, "loss": 0.0056, "step": 365400 }, { "epoch": 11.294739444890894, "grad_norm": 0.1171780601143837, "learning_rate": 1.435463930271373e-05, "loss": 0.0058, "step": 365430 }, { "epoch": 11.295666687272053, "grad_norm": 0.15046623349189758, "learning_rate": 1.435417568152315e-05, "loss": 0.0057, "step": 365460 }, { "epoch": 11.296593929653211, "grad_norm": 0.1453486979007721, "learning_rate": 1.4353712060332572e-05, "loss": 0.0055, "step": 365490 }, { "epoch": 11.29752117203437, "grad_norm": 0.0953650251030922, "learning_rate": 1.4353248439141993e-05, "loss": 0.0054, "step": 365520 }, { "epoch": 11.298448414415528, "grad_norm": 0.130404993891716, "learning_rate": 1.4352784817951413e-05, "loss": 0.0058, "step": 365550 }, { "epoch": 11.299375656796686, "grad_norm": 0.10190970450639725, "learning_rate": 1.4352321196760835e-05, "loss": 0.0053, "step": 365580 }, { "epoch": 11.300302899177845, "grad_norm": 0.11677408963441849, "learning_rate": 1.4351857575570256e-05, "loss": 0.0057, "step": 365610 }, { "epoch": 11.301230141559003, "grad_norm": 0.11214005947113037, "learning_rate": 1.4351393954379676e-05, "loss": 0.0055, "step": 365640 }, { "epoch": 11.302157383940163, "grad_norm": 0.10566944628953934, "learning_rate": 1.4350930333189097e-05, "loss": 0.0055, "step": 365670 }, { "epoch": 11.30308462632132, "grad_norm": 0.07156285643577576, "learning_rate": 1.4350466711998519e-05, "loss": 0.0049, "step": 365700 }, { "epoch": 11.304011868702478, "grad_norm": 0.12123862653970718, "learning_rate": 1.4350003090807939e-05, "loss": 0.0059, "step": 365730 }, { "epoch": 11.304939111083637, "grad_norm": 0.1384132355451584, "learning_rate": 1.434953946961736e-05, "loss": 0.0053, "step": 365760 }, { "epoch": 11.305866353464795, "grad_norm": 0.12757229804992676, "learning_rate": 1.4349075848426778e-05, "loss": 0.0063, "step": 365790 }, { "epoch": 11.306793595845955, "grad_norm": 0.1350823938846588, "learning_rate": 1.43486122272362e-05, "loss": 0.0052, "step": 365820 }, { "epoch": 11.307720838227112, "grad_norm": 0.12279292941093445, "learning_rate": 1.4348148606045621e-05, "loss": 0.0052, "step": 365850 }, { "epoch": 11.308648080608272, "grad_norm": 0.08246846497058868, "learning_rate": 1.4347684984855041e-05, "loss": 0.006, "step": 365880 }, { "epoch": 11.30957532298943, "grad_norm": 0.11494158953428268, "learning_rate": 1.4347221363664463e-05, "loss": 0.0053, "step": 365910 }, { "epoch": 11.310502565370587, "grad_norm": 0.07826370745897293, "learning_rate": 1.4346757742473884e-05, "loss": 0.0062, "step": 365940 }, { "epoch": 11.311429807751747, "grad_norm": 0.08961819112300873, "learning_rate": 1.434630957532299e-05, "loss": 0.0058, "step": 365970 }, { "epoch": 11.312357050132904, "grad_norm": 0.0979643240571022, "learning_rate": 1.4345845954132412e-05, "loss": 0.0051, "step": 366000 }, { "epoch": 11.313284292514064, "grad_norm": 0.1309763789176941, "learning_rate": 1.4345382332941833e-05, "loss": 0.0056, "step": 366030 }, { "epoch": 11.314211534895222, "grad_norm": 0.10440628230571747, "learning_rate": 1.4344918711751252e-05, "loss": 0.0053, "step": 366060 }, { "epoch": 11.31513877727638, "grad_norm": 0.11562480032444, "learning_rate": 1.4344455090560673e-05, "loss": 0.0054, "step": 366090 }, { "epoch": 11.316066019657539, "grad_norm": 0.1462906152009964, "learning_rate": 1.4343991469370095e-05, "loss": 0.0057, "step": 366120 }, { "epoch": 11.316993262038697, "grad_norm": 0.09579966217279434, "learning_rate": 1.4343527848179514e-05, "loss": 0.0054, "step": 366150 }, { "epoch": 11.317920504419856, "grad_norm": 0.05245121195912361, "learning_rate": 1.4343064226988936e-05, "loss": 0.0055, "step": 366180 }, { "epoch": 11.318847746801014, "grad_norm": 0.11723967641592026, "learning_rate": 1.4342600605798356e-05, "loss": 0.0053, "step": 366210 }, { "epoch": 11.319774989182172, "grad_norm": 0.11858827620744705, "learning_rate": 1.4342136984607777e-05, "loss": 0.0056, "step": 366240 }, { "epoch": 11.320702231563331, "grad_norm": 0.08395927399396896, "learning_rate": 1.4341673363417199e-05, "loss": 0.0057, "step": 366270 }, { "epoch": 11.321629473944489, "grad_norm": 0.0896926149725914, "learning_rate": 1.4341209742226619e-05, "loss": 0.0058, "step": 366300 }, { "epoch": 11.322556716325648, "grad_norm": 0.1093916967511177, "learning_rate": 1.434074612103604e-05, "loss": 0.0058, "step": 366330 }, { "epoch": 11.323483958706806, "grad_norm": 0.16794146597385406, "learning_rate": 1.4340282499845462e-05, "loss": 0.0055, "step": 366360 }, { "epoch": 11.324411201087964, "grad_norm": 0.07578624039888382, "learning_rate": 1.4339818878654881e-05, "loss": 0.0055, "step": 366390 }, { "epoch": 11.325338443469123, "grad_norm": 0.07012174278497696, "learning_rate": 1.4339355257464303e-05, "loss": 0.0057, "step": 366420 }, { "epoch": 11.32626568585028, "grad_norm": 0.12348025292158127, "learning_rate": 1.4338891636273724e-05, "loss": 0.0047, "step": 366450 }, { "epoch": 11.32719292823144, "grad_norm": 0.14467856287956238, "learning_rate": 1.4338428015083144e-05, "loss": 0.0057, "step": 366480 }, { "epoch": 11.328120170612598, "grad_norm": 0.10538982599973679, "learning_rate": 1.4337964393892566e-05, "loss": 0.0054, "step": 366510 }, { "epoch": 11.329047412993756, "grad_norm": 0.09871221333742142, "learning_rate": 1.4337500772701984e-05, "loss": 0.0052, "step": 366540 }, { "epoch": 11.329974655374915, "grad_norm": 0.08487965911626816, "learning_rate": 1.4337037151511405e-05, "loss": 0.0056, "step": 366570 }, { "epoch": 11.330901897756073, "grad_norm": 0.24061229825019836, "learning_rate": 1.4336573530320827e-05, "loss": 0.0056, "step": 366600 }, { "epoch": 11.331829140137232, "grad_norm": 0.16695618629455566, "learning_rate": 1.4336109909130247e-05, "loss": 0.0059, "step": 366630 }, { "epoch": 11.33275638251839, "grad_norm": 0.06830630451440811, "learning_rate": 1.4335646287939668e-05, "loss": 0.0062, "step": 366660 }, { "epoch": 11.33368362489955, "grad_norm": 0.08281981199979782, "learning_rate": 1.433518266674909e-05, "loss": 0.0054, "step": 366690 }, { "epoch": 11.334610867280707, "grad_norm": 0.07146870344877243, "learning_rate": 1.433471904555851e-05, "loss": 0.0055, "step": 366720 }, { "epoch": 11.335538109661865, "grad_norm": 0.10506287962198257, "learning_rate": 1.4334255424367931e-05, "loss": 0.0054, "step": 366750 }, { "epoch": 11.336465352043025, "grad_norm": 0.12377911061048508, "learning_rate": 1.4333791803177353e-05, "loss": 0.0056, "step": 366780 }, { "epoch": 11.337392594424182, "grad_norm": 0.10813020169734955, "learning_rate": 1.4333328181986772e-05, "loss": 0.0058, "step": 366810 }, { "epoch": 11.338319836805342, "grad_norm": 0.10611068457365036, "learning_rate": 1.4332864560796194e-05, "loss": 0.0057, "step": 366840 }, { "epoch": 11.3392470791865, "grad_norm": 0.10915423184633255, "learning_rate": 1.4332400939605615e-05, "loss": 0.0056, "step": 366870 }, { "epoch": 11.340174321567657, "grad_norm": 0.10690557211637497, "learning_rate": 1.4331937318415035e-05, "loss": 0.0059, "step": 366900 }, { "epoch": 11.341101563948817, "grad_norm": 0.08094745129346848, "learning_rate": 1.4331473697224457e-05, "loss": 0.0057, "step": 366930 }, { "epoch": 11.342028806329974, "grad_norm": 0.08262640982866287, "learning_rate": 1.4331010076033875e-05, "loss": 0.0055, "step": 366960 }, { "epoch": 11.342956048711134, "grad_norm": 0.07127685099840164, "learning_rate": 1.4330546454843296e-05, "loss": 0.0056, "step": 366990 }, { "epoch": 11.343883291092292, "grad_norm": 0.11759974807500839, "learning_rate": 1.433008283365272e-05, "loss": 0.0062, "step": 367020 }, { "epoch": 11.34481053347345, "grad_norm": 0.12432954460382462, "learning_rate": 1.4329619212462138e-05, "loss": 0.006, "step": 367050 }, { "epoch": 11.345737775854609, "grad_norm": 0.1464931219816208, "learning_rate": 1.4329155591271559e-05, "loss": 0.0056, "step": 367080 }, { "epoch": 11.346665018235766, "grad_norm": 0.1171749010682106, "learning_rate": 1.432869197008098e-05, "loss": 0.0055, "step": 367110 }, { "epoch": 11.347592260616926, "grad_norm": 0.1595531702041626, "learning_rate": 1.43282283488904e-05, "loss": 0.006, "step": 367140 }, { "epoch": 11.348519502998084, "grad_norm": 0.09123494476079941, "learning_rate": 1.4327764727699822e-05, "loss": 0.0049, "step": 367170 }, { "epoch": 11.349446745379241, "grad_norm": 0.14962328970432281, "learning_rate": 1.4327301106509243e-05, "loss": 0.006, "step": 367200 }, { "epoch": 11.3503739877604, "grad_norm": 0.14258615672588348, "learning_rate": 1.4326837485318663e-05, "loss": 0.006, "step": 367230 }, { "epoch": 11.351301230141559, "grad_norm": 0.20171427726745605, "learning_rate": 1.4326373864128085e-05, "loss": 0.0059, "step": 367260 }, { "epoch": 11.352228472522718, "grad_norm": 0.1588183492422104, "learning_rate": 1.4325910242937505e-05, "loss": 0.0061, "step": 367290 }, { "epoch": 11.353155714903876, "grad_norm": 0.14933975040912628, "learning_rate": 1.4325446621746926e-05, "loss": 0.0054, "step": 367320 }, { "epoch": 11.354082957285033, "grad_norm": 0.1457844227552414, "learning_rate": 1.4324983000556348e-05, "loss": 0.0054, "step": 367350 }, { "epoch": 11.355010199666193, "grad_norm": 0.11177834123373032, "learning_rate": 1.4324519379365767e-05, "loss": 0.0052, "step": 367380 }, { "epoch": 11.35593744204735, "grad_norm": 0.10341953486204147, "learning_rate": 1.4324055758175189e-05, "loss": 0.0055, "step": 367410 }, { "epoch": 11.35686468442851, "grad_norm": 0.11502236872911453, "learning_rate": 1.432359213698461e-05, "loss": 0.0056, "step": 367440 }, { "epoch": 11.357791926809668, "grad_norm": 0.11471728980541229, "learning_rate": 1.4323128515794029e-05, "loss": 0.0057, "step": 367470 }, { "epoch": 11.358719169190827, "grad_norm": 0.09357759356498718, "learning_rate": 1.432266489460345e-05, "loss": 0.0053, "step": 367500 }, { "epoch": 11.359646411571985, "grad_norm": 0.09102024883031845, "learning_rate": 1.4322201273412872e-05, "loss": 0.0049, "step": 367530 }, { "epoch": 11.360573653953143, "grad_norm": 0.1356906294822693, "learning_rate": 1.4321737652222291e-05, "loss": 0.0056, "step": 367560 }, { "epoch": 11.361500896334302, "grad_norm": 0.09121290594339371, "learning_rate": 1.4321274031031713e-05, "loss": 0.0055, "step": 367590 }, { "epoch": 11.36242813871546, "grad_norm": 0.11685469001531601, "learning_rate": 1.4320810409841133e-05, "loss": 0.006, "step": 367620 }, { "epoch": 11.36335538109662, "grad_norm": 0.17610058188438416, "learning_rate": 1.4320346788650554e-05, "loss": 0.0061, "step": 367650 }, { "epoch": 11.364282623477777, "grad_norm": 0.10583153367042542, "learning_rate": 1.4319883167459976e-05, "loss": 0.0061, "step": 367680 }, { "epoch": 11.365209865858935, "grad_norm": 0.09996207803487778, "learning_rate": 1.4319419546269395e-05, "loss": 0.0052, "step": 367710 }, { "epoch": 11.366137108240094, "grad_norm": 0.07510537654161453, "learning_rate": 1.4318955925078817e-05, "loss": 0.0065, "step": 367740 }, { "epoch": 11.367064350621252, "grad_norm": 0.12342286854982376, "learning_rate": 1.4318492303888238e-05, "loss": 0.0056, "step": 367770 }, { "epoch": 11.367991593002412, "grad_norm": 0.14508284628391266, "learning_rate": 1.4318028682697658e-05, "loss": 0.0066, "step": 367800 }, { "epoch": 11.36891883538357, "grad_norm": 0.09910520166158676, "learning_rate": 1.431756506150708e-05, "loss": 0.0052, "step": 367830 }, { "epoch": 11.369846077764727, "grad_norm": 0.10465674847364426, "learning_rate": 1.4317101440316501e-05, "loss": 0.006, "step": 367860 }, { "epoch": 11.370773320145886, "grad_norm": 0.1108761876821518, "learning_rate": 1.4316637819125921e-05, "loss": 0.0057, "step": 367890 }, { "epoch": 11.371700562527044, "grad_norm": 0.1329624503850937, "learning_rate": 1.4316174197935343e-05, "loss": 0.0057, "step": 367920 }, { "epoch": 11.372627804908204, "grad_norm": 0.12778809666633606, "learning_rate": 1.431571057674476e-05, "loss": 0.0056, "step": 367950 }, { "epoch": 11.373555047289361, "grad_norm": 0.05485386773943901, "learning_rate": 1.4315246955554182e-05, "loss": 0.0057, "step": 367980 }, { "epoch": 11.374482289670519, "grad_norm": 0.15751467645168304, "learning_rate": 1.4314783334363604e-05, "loss": 0.0058, "step": 368010 }, { "epoch": 11.375409532051679, "grad_norm": 0.1322251409292221, "learning_rate": 1.4314319713173024e-05, "loss": 0.0057, "step": 368040 }, { "epoch": 11.376336774432836, "grad_norm": 0.15278343856334686, "learning_rate": 1.4313856091982445e-05, "loss": 0.0062, "step": 368070 }, { "epoch": 11.377264016813996, "grad_norm": 0.08323072642087936, "learning_rate": 1.4313392470791867e-05, "loss": 0.0054, "step": 368100 }, { "epoch": 11.378191259195154, "grad_norm": 0.1283864676952362, "learning_rate": 1.4312928849601286e-05, "loss": 0.0063, "step": 368130 }, { "epoch": 11.379118501576311, "grad_norm": 0.09740548580884933, "learning_rate": 1.4312465228410708e-05, "loss": 0.0061, "step": 368160 }, { "epoch": 11.38004574395747, "grad_norm": 0.1171174943447113, "learning_rate": 1.431200160722013e-05, "loss": 0.0064, "step": 368190 }, { "epoch": 11.380972986338628, "grad_norm": 0.11185555905103683, "learning_rate": 1.431153798602955e-05, "loss": 0.0058, "step": 368220 }, { "epoch": 11.381900228719788, "grad_norm": 0.14054043591022491, "learning_rate": 1.431107436483897e-05, "loss": 0.0063, "step": 368250 }, { "epoch": 11.382827471100946, "grad_norm": 0.09689129889011383, "learning_rate": 1.431061074364839e-05, "loss": 0.0057, "step": 368280 }, { "epoch": 11.383754713482105, "grad_norm": 0.08445174992084503, "learning_rate": 1.4310147122457812e-05, "loss": 0.0059, "step": 368310 }, { "epoch": 11.384681955863263, "grad_norm": 0.0932711809873581, "learning_rate": 1.4309683501267234e-05, "loss": 0.0063, "step": 368340 }, { "epoch": 11.38560919824442, "grad_norm": 0.05975191667675972, "learning_rate": 1.4309219880076652e-05, "loss": 0.0054, "step": 368370 }, { "epoch": 11.38653644062558, "grad_norm": 0.10202141106128693, "learning_rate": 1.4308756258886073e-05, "loss": 0.0059, "step": 368400 }, { "epoch": 11.387463683006738, "grad_norm": 0.08749505132436752, "learning_rate": 1.4308292637695496e-05, "loss": 0.0062, "step": 368430 }, { "epoch": 11.388390925387897, "grad_norm": 0.10223960131406784, "learning_rate": 1.4307829016504914e-05, "loss": 0.0052, "step": 368460 }, { "epoch": 11.389318167769055, "grad_norm": 0.09810525178909302, "learning_rate": 1.4307365395314336e-05, "loss": 0.0059, "step": 368490 }, { "epoch": 11.390245410150213, "grad_norm": 0.10520298033952713, "learning_rate": 1.4306901774123758e-05, "loss": 0.0059, "step": 368520 }, { "epoch": 11.391172652531372, "grad_norm": 0.11459711939096451, "learning_rate": 1.4306438152933177e-05, "loss": 0.0059, "step": 368550 }, { "epoch": 11.39209989491253, "grad_norm": 0.12054155766963959, "learning_rate": 1.4305974531742599e-05, "loss": 0.0061, "step": 368580 }, { "epoch": 11.39302713729369, "grad_norm": 0.1305703967809677, "learning_rate": 1.4305510910552019e-05, "loss": 0.0059, "step": 368610 }, { "epoch": 11.393954379674847, "grad_norm": 0.1059882715344429, "learning_rate": 1.430504728936144e-05, "loss": 0.0053, "step": 368640 }, { "epoch": 11.394881622056005, "grad_norm": 0.13224296271800995, "learning_rate": 1.4304583668170862e-05, "loss": 0.0055, "step": 368670 }, { "epoch": 11.395808864437164, "grad_norm": 0.1541466861963272, "learning_rate": 1.4304120046980281e-05, "loss": 0.0059, "step": 368700 }, { "epoch": 11.396736106818322, "grad_norm": 0.09994561225175858, "learning_rate": 1.4303656425789703e-05, "loss": 0.0056, "step": 368730 }, { "epoch": 11.397663349199481, "grad_norm": 0.11079783737659454, "learning_rate": 1.4303192804599124e-05, "loss": 0.0056, "step": 368760 }, { "epoch": 11.39859059158064, "grad_norm": 0.1288035362958908, "learning_rate": 1.4302729183408544e-05, "loss": 0.0056, "step": 368790 }, { "epoch": 11.399517833961797, "grad_norm": 0.09020300209522247, "learning_rate": 1.4302265562217966e-05, "loss": 0.0052, "step": 368820 }, { "epoch": 11.400445076342956, "grad_norm": 0.1009257435798645, "learning_rate": 1.4301801941027387e-05, "loss": 0.0059, "step": 368850 }, { "epoch": 11.401372318724114, "grad_norm": 0.1304086148738861, "learning_rate": 1.4301338319836805e-05, "loss": 0.0051, "step": 368880 }, { "epoch": 11.402299561105274, "grad_norm": 0.12653648853302002, "learning_rate": 1.4300874698646227e-05, "loss": 0.0053, "step": 368910 }, { "epoch": 11.403226803486431, "grad_norm": 0.05449537932872772, "learning_rate": 1.4300411077455647e-05, "loss": 0.0058, "step": 368940 }, { "epoch": 11.404154045867589, "grad_norm": 0.10676924139261246, "learning_rate": 1.4299947456265068e-05, "loss": 0.0056, "step": 368970 }, { "epoch": 11.405081288248748, "grad_norm": 0.08279310911893845, "learning_rate": 1.429948383507449e-05, "loss": 0.0055, "step": 369000 }, { "epoch": 11.406008530629906, "grad_norm": 0.11447671055793762, "learning_rate": 1.429902021388391e-05, "loss": 0.0057, "step": 369030 }, { "epoch": 11.406935773011066, "grad_norm": 0.06881994754076004, "learning_rate": 1.4298556592693331e-05, "loss": 0.0052, "step": 369060 }, { "epoch": 11.407863015392223, "grad_norm": 0.1875329315662384, "learning_rate": 1.4298092971502753e-05, "loss": 0.0057, "step": 369090 }, { "epoch": 11.408790257773383, "grad_norm": 0.10601233690977097, "learning_rate": 1.4297629350312172e-05, "loss": 0.0062, "step": 369120 }, { "epoch": 11.40971750015454, "grad_norm": 0.15530015528202057, "learning_rate": 1.4297165729121594e-05, "loss": 0.0061, "step": 369150 }, { "epoch": 11.410644742535698, "grad_norm": 0.0957862064242363, "learning_rate": 1.4296702107931015e-05, "loss": 0.0059, "step": 369180 }, { "epoch": 11.411571984916858, "grad_norm": 0.08145465701818466, "learning_rate": 1.4296238486740435e-05, "loss": 0.0057, "step": 369210 }, { "epoch": 11.412499227298015, "grad_norm": 0.07942160964012146, "learning_rate": 1.4295774865549857e-05, "loss": 0.0055, "step": 369240 }, { "epoch": 11.413426469679175, "grad_norm": 0.1518183946609497, "learning_rate": 1.4295311244359275e-05, "loss": 0.0062, "step": 369270 }, { "epoch": 11.414353712060333, "grad_norm": 0.07788417488336563, "learning_rate": 1.4294847623168698e-05, "loss": 0.0058, "step": 369300 }, { "epoch": 11.41528095444149, "grad_norm": 0.12226995825767517, "learning_rate": 1.429438400197812e-05, "loss": 0.0059, "step": 369330 }, { "epoch": 11.41620819682265, "grad_norm": 0.11330708861351013, "learning_rate": 1.4293920380787538e-05, "loss": 0.0057, "step": 369360 }, { "epoch": 11.417135439203808, "grad_norm": 0.10273674130439758, "learning_rate": 1.429345675959696e-05, "loss": 0.0057, "step": 369390 }, { "epoch": 11.418062681584967, "grad_norm": 0.1710992306470871, "learning_rate": 1.429299313840638e-05, "loss": 0.0059, "step": 369420 }, { "epoch": 11.418989923966125, "grad_norm": 0.12321937829256058, "learning_rate": 1.42925295172158e-05, "loss": 0.006, "step": 369450 }, { "epoch": 11.419917166347282, "grad_norm": 0.08810830861330032, "learning_rate": 1.4292065896025222e-05, "loss": 0.0061, "step": 369480 }, { "epoch": 11.420844408728442, "grad_norm": 0.08162591606378555, "learning_rate": 1.4291602274834643e-05, "loss": 0.0046, "step": 369510 }, { "epoch": 11.4217716511096, "grad_norm": 0.08396165817975998, "learning_rate": 1.4291138653644063e-05, "loss": 0.0052, "step": 369540 }, { "epoch": 11.42269889349076, "grad_norm": 0.09788626432418823, "learning_rate": 1.4290675032453485e-05, "loss": 0.0057, "step": 369570 }, { "epoch": 11.423626135871917, "grad_norm": 0.15091082453727722, "learning_rate": 1.4290211411262905e-05, "loss": 0.0058, "step": 369600 }, { "epoch": 11.424553378253075, "grad_norm": 0.11953682452440262, "learning_rate": 1.4289747790072326e-05, "loss": 0.0053, "step": 369630 }, { "epoch": 11.425480620634234, "grad_norm": 0.10165463387966156, "learning_rate": 1.4289284168881748e-05, "loss": 0.0057, "step": 369660 }, { "epoch": 11.426407863015392, "grad_norm": 0.11820382624864578, "learning_rate": 1.4288820547691167e-05, "loss": 0.0054, "step": 369690 }, { "epoch": 11.427335105396551, "grad_norm": 0.11561120301485062, "learning_rate": 1.4288356926500589e-05, "loss": 0.0051, "step": 369720 }, { "epoch": 11.428262347777709, "grad_norm": 0.12937751412391663, "learning_rate": 1.428789330531001e-05, "loss": 0.0063, "step": 369750 }, { "epoch": 11.429189590158867, "grad_norm": 0.12548136711120605, "learning_rate": 1.4287429684119429e-05, "loss": 0.0054, "step": 369780 }, { "epoch": 11.430116832540026, "grad_norm": 0.11581084877252579, "learning_rate": 1.428696606292885e-05, "loss": 0.0049, "step": 369810 }, { "epoch": 11.431044074921184, "grad_norm": 0.1517379879951477, "learning_rate": 1.4286502441738273e-05, "loss": 0.0056, "step": 369840 }, { "epoch": 11.431971317302343, "grad_norm": 0.10803544521331787, "learning_rate": 1.4286038820547691e-05, "loss": 0.0062, "step": 369870 }, { "epoch": 11.432898559683501, "grad_norm": 0.1346672773361206, "learning_rate": 1.4285575199357113e-05, "loss": 0.0061, "step": 369900 }, { "epoch": 11.43382580206466, "grad_norm": 0.09743785858154297, "learning_rate": 1.4285111578166533e-05, "loss": 0.0048, "step": 369930 }, { "epoch": 11.434753044445818, "grad_norm": 0.13000962138175964, "learning_rate": 1.4284647956975954e-05, "loss": 0.0059, "step": 369960 }, { "epoch": 11.435680286826976, "grad_norm": 0.1074233278632164, "learning_rate": 1.4284184335785376e-05, "loss": 0.005, "step": 369990 }, { "epoch": 11.436607529208136, "grad_norm": 0.0992312952876091, "learning_rate": 1.4283720714594796e-05, "loss": 0.0062, "step": 370020 }, { "epoch": 11.437534771589293, "grad_norm": 0.07822979986667633, "learning_rate": 1.4283257093404217e-05, "loss": 0.0061, "step": 370050 }, { "epoch": 11.438462013970453, "grad_norm": 0.07771403342485428, "learning_rate": 1.4282793472213639e-05, "loss": 0.005, "step": 370080 }, { "epoch": 11.43938925635161, "grad_norm": 0.14156632125377655, "learning_rate": 1.4282329851023058e-05, "loss": 0.0057, "step": 370110 }, { "epoch": 11.440316498732768, "grad_norm": 0.11546476930379868, "learning_rate": 1.428186622983248e-05, "loss": 0.0059, "step": 370140 }, { "epoch": 11.441243741113928, "grad_norm": 0.0981079563498497, "learning_rate": 1.4281402608641901e-05, "loss": 0.0054, "step": 370170 }, { "epoch": 11.442170983495085, "grad_norm": 0.12993866205215454, "learning_rate": 1.4280938987451321e-05, "loss": 0.0055, "step": 370200 }, { "epoch": 11.443098225876245, "grad_norm": 0.11015231907367706, "learning_rate": 1.4280475366260743e-05, "loss": 0.0058, "step": 370230 }, { "epoch": 11.444025468257403, "grad_norm": 0.13954252004623413, "learning_rate": 1.428001174507016e-05, "loss": 0.0059, "step": 370260 }, { "epoch": 11.44495271063856, "grad_norm": 0.10716870427131653, "learning_rate": 1.4279548123879582e-05, "loss": 0.0058, "step": 370290 }, { "epoch": 11.44587995301972, "grad_norm": 0.07616405934095383, "learning_rate": 1.4279084502689004e-05, "loss": 0.0056, "step": 370320 }, { "epoch": 11.446807195400877, "grad_norm": 0.07898858189582825, "learning_rate": 1.4278620881498424e-05, "loss": 0.0056, "step": 370350 }, { "epoch": 11.447734437782037, "grad_norm": 0.07631157338619232, "learning_rate": 1.4278157260307845e-05, "loss": 0.0055, "step": 370380 }, { "epoch": 11.448661680163195, "grad_norm": 0.10229376703500748, "learning_rate": 1.4277693639117267e-05, "loss": 0.0054, "step": 370410 }, { "epoch": 11.449588922544352, "grad_norm": 0.11793701350688934, "learning_rate": 1.4277230017926686e-05, "loss": 0.0062, "step": 370440 }, { "epoch": 11.450516164925512, "grad_norm": 0.09923728555440903, "learning_rate": 1.4276766396736108e-05, "loss": 0.0057, "step": 370470 }, { "epoch": 11.45144340730667, "grad_norm": 0.0967925563454628, "learning_rate": 1.427630277554553e-05, "loss": 0.0052, "step": 370500 }, { "epoch": 11.452370649687829, "grad_norm": 0.13529899716377258, "learning_rate": 1.427583915435495e-05, "loss": 0.0055, "step": 370530 }, { "epoch": 11.453297892068987, "grad_norm": 0.21106180548667908, "learning_rate": 1.427537553316437e-05, "loss": 0.0059, "step": 370560 }, { "epoch": 11.454225134450144, "grad_norm": 0.1154584214091301, "learning_rate": 1.4274911911973792e-05, "loss": 0.0056, "step": 370590 }, { "epoch": 11.455152376831304, "grad_norm": 0.07452113181352615, "learning_rate": 1.4274448290783212e-05, "loss": 0.0053, "step": 370620 }, { "epoch": 11.456079619212462, "grad_norm": 0.14573493599891663, "learning_rate": 1.4273984669592634e-05, "loss": 0.0059, "step": 370650 }, { "epoch": 11.457006861593621, "grad_norm": 0.09037388861179352, "learning_rate": 1.4273521048402052e-05, "loss": 0.0062, "step": 370680 }, { "epoch": 11.457934103974779, "grad_norm": 0.09027783572673798, "learning_rate": 1.4273057427211475e-05, "loss": 0.0058, "step": 370710 }, { "epoch": 11.458861346355938, "grad_norm": 0.06420838832855225, "learning_rate": 1.4272593806020896e-05, "loss": 0.0063, "step": 370740 }, { "epoch": 11.459788588737096, "grad_norm": 0.13870501518249512, "learning_rate": 1.4272130184830315e-05, "loss": 0.0054, "step": 370770 }, { "epoch": 11.460715831118254, "grad_norm": 0.09634163230657578, "learning_rate": 1.4271666563639736e-05, "loss": 0.0058, "step": 370800 }, { "epoch": 11.461643073499413, "grad_norm": 0.0866636335849762, "learning_rate": 1.4271202942449158e-05, "loss": 0.0054, "step": 370830 }, { "epoch": 11.462570315880571, "grad_norm": 0.08467864245176315, "learning_rate": 1.4270739321258577e-05, "loss": 0.0065, "step": 370860 }, { "epoch": 11.46349755826173, "grad_norm": 0.0691663846373558, "learning_rate": 1.4270275700067999e-05, "loss": 0.0056, "step": 370890 }, { "epoch": 11.464424800642888, "grad_norm": 0.09809789806604385, "learning_rate": 1.426981207887742e-05, "loss": 0.0056, "step": 370920 }, { "epoch": 11.465352043024046, "grad_norm": 0.06991209089756012, "learning_rate": 1.426934845768684e-05, "loss": 0.0052, "step": 370950 }, { "epoch": 11.466279285405205, "grad_norm": 0.13394078612327576, "learning_rate": 1.4268884836496262e-05, "loss": 0.0061, "step": 370980 }, { "epoch": 11.467206527786363, "grad_norm": 0.08628825098276138, "learning_rate": 1.4268421215305682e-05, "loss": 0.0049, "step": 371010 }, { "epoch": 11.468133770167523, "grad_norm": 0.10390815883874893, "learning_rate": 1.4267957594115103e-05, "loss": 0.0056, "step": 371040 }, { "epoch": 11.46906101254868, "grad_norm": 0.12729500234127045, "learning_rate": 1.4267493972924525e-05, "loss": 0.0056, "step": 371070 }, { "epoch": 11.469988254929838, "grad_norm": 0.2165110856294632, "learning_rate": 1.4267030351733944e-05, "loss": 0.006, "step": 371100 }, { "epoch": 11.470915497310997, "grad_norm": 0.1255592554807663, "learning_rate": 1.4266566730543366e-05, "loss": 0.0053, "step": 371130 }, { "epoch": 11.471842739692155, "grad_norm": 0.09985517710447311, "learning_rate": 1.4266103109352787e-05, "loss": 0.0063, "step": 371160 }, { "epoch": 11.472769982073315, "grad_norm": 0.08790472149848938, "learning_rate": 1.4265639488162205e-05, "loss": 0.0057, "step": 371190 }, { "epoch": 11.473697224454472, "grad_norm": 0.06928738206624985, "learning_rate": 1.4265175866971627e-05, "loss": 0.0058, "step": 371220 }, { "epoch": 11.47462446683563, "grad_norm": 0.08209454268217087, "learning_rate": 1.426471224578105e-05, "loss": 0.0061, "step": 371250 }, { "epoch": 11.47555170921679, "grad_norm": 0.08935268223285675, "learning_rate": 1.4264248624590468e-05, "loss": 0.0061, "step": 371280 }, { "epoch": 11.476478951597947, "grad_norm": 0.113338403403759, "learning_rate": 1.426378500339989e-05, "loss": 0.006, "step": 371310 }, { "epoch": 11.477406193979107, "grad_norm": 0.07821305841207504, "learning_rate": 1.426332138220931e-05, "loss": 0.0063, "step": 371340 }, { "epoch": 11.478333436360264, "grad_norm": 0.08690804988145828, "learning_rate": 1.4262857761018731e-05, "loss": 0.0058, "step": 371370 }, { "epoch": 11.479260678741422, "grad_norm": 0.1120879203081131, "learning_rate": 1.4262394139828153e-05, "loss": 0.0055, "step": 371400 }, { "epoch": 11.480187921122582, "grad_norm": 0.11386260390281677, "learning_rate": 1.4261930518637572e-05, "loss": 0.0061, "step": 371430 }, { "epoch": 11.48111516350374, "grad_norm": 0.09918878227472305, "learning_rate": 1.4261466897446994e-05, "loss": 0.0053, "step": 371460 }, { "epoch": 11.482042405884899, "grad_norm": 0.09683956205844879, "learning_rate": 1.4261003276256415e-05, "loss": 0.0056, "step": 371490 }, { "epoch": 11.482969648266057, "grad_norm": 0.12211155891418457, "learning_rate": 1.4260539655065835e-05, "loss": 0.0052, "step": 371520 }, { "epoch": 11.483896890647216, "grad_norm": 0.09251922369003296, "learning_rate": 1.4260076033875257e-05, "loss": 0.006, "step": 371550 }, { "epoch": 11.484824133028374, "grad_norm": 0.11937860399484634, "learning_rate": 1.4259612412684678e-05, "loss": 0.0055, "step": 371580 }, { "epoch": 11.485751375409532, "grad_norm": 0.09792530536651611, "learning_rate": 1.4259148791494098e-05, "loss": 0.0059, "step": 371610 }, { "epoch": 11.486678617790691, "grad_norm": 0.10587237030267715, "learning_rate": 1.425868517030352e-05, "loss": 0.0056, "step": 371640 }, { "epoch": 11.487605860171849, "grad_norm": 0.052107762545347214, "learning_rate": 1.4258221549112938e-05, "loss": 0.0054, "step": 371670 }, { "epoch": 11.488533102553008, "grad_norm": 0.1116822138428688, "learning_rate": 1.425775792792236e-05, "loss": 0.0056, "step": 371700 }, { "epoch": 11.489460344934166, "grad_norm": 0.1159818023443222, "learning_rate": 1.425729430673178e-05, "loss": 0.0054, "step": 371730 }, { "epoch": 11.490387587315324, "grad_norm": 0.08926115930080414, "learning_rate": 1.42568306855412e-05, "loss": 0.0056, "step": 371760 }, { "epoch": 11.491314829696483, "grad_norm": 0.124745674431324, "learning_rate": 1.4256367064350622e-05, "loss": 0.0065, "step": 371790 }, { "epoch": 11.49224207207764, "grad_norm": 0.0992434173822403, "learning_rate": 1.4255903443160044e-05, "loss": 0.0058, "step": 371820 }, { "epoch": 11.4931693144588, "grad_norm": 0.08128231763839722, "learning_rate": 1.4255439821969463e-05, "loss": 0.0054, "step": 371850 }, { "epoch": 11.494096556839958, "grad_norm": 0.08822512626647949, "learning_rate": 1.4254976200778885e-05, "loss": 0.0057, "step": 371880 }, { "epoch": 11.495023799221116, "grad_norm": 0.09627661108970642, "learning_rate": 1.4254512579588306e-05, "loss": 0.0054, "step": 371910 }, { "epoch": 11.495951041602275, "grad_norm": 0.10109639167785645, "learning_rate": 1.4254048958397726e-05, "loss": 0.0062, "step": 371940 }, { "epoch": 11.496878283983433, "grad_norm": 0.1398572027683258, "learning_rate": 1.4253585337207148e-05, "loss": 0.0051, "step": 371970 }, { "epoch": 11.497805526364592, "grad_norm": 0.11947350203990936, "learning_rate": 1.4253137170056256e-05, "loss": 0.0058, "step": 372000 }, { "epoch": 11.49873276874575, "grad_norm": 0.08814572542905807, "learning_rate": 1.4252673548865674e-05, "loss": 0.0054, "step": 372030 }, { "epoch": 11.499660011126908, "grad_norm": 0.1312166452407837, "learning_rate": 1.4252209927675095e-05, "loss": 0.0058, "step": 372060 }, { "epoch": 11.500587253508067, "grad_norm": 0.09028532356023788, "learning_rate": 1.4251746306484515e-05, "loss": 0.0051, "step": 372090 }, { "epoch": 11.501514495889225, "grad_norm": 0.1325056254863739, "learning_rate": 1.4251282685293937e-05, "loss": 0.0058, "step": 372120 }, { "epoch": 11.502441738270385, "grad_norm": 0.16849607229232788, "learning_rate": 1.4250819064103358e-05, "loss": 0.0062, "step": 372150 }, { "epoch": 11.503368980651542, "grad_norm": 0.13683657348155975, "learning_rate": 1.4250355442912778e-05, "loss": 0.0057, "step": 372180 }, { "epoch": 11.5042962230327, "grad_norm": 0.11471086740493774, "learning_rate": 1.42498918217222e-05, "loss": 0.0056, "step": 372210 }, { "epoch": 11.50522346541386, "grad_norm": 0.06716495752334595, "learning_rate": 1.4249428200531621e-05, "loss": 0.0064, "step": 372240 }, { "epoch": 11.506150707795017, "grad_norm": 0.08228658139705658, "learning_rate": 1.424896457934104e-05, "loss": 0.0052, "step": 372270 }, { "epoch": 11.507077950176177, "grad_norm": 0.08794870972633362, "learning_rate": 1.4248500958150462e-05, "loss": 0.0056, "step": 372300 }, { "epoch": 11.508005192557334, "grad_norm": 0.0906624123454094, "learning_rate": 1.4248037336959884e-05, "loss": 0.0059, "step": 372330 }, { "epoch": 11.508932434938494, "grad_norm": 0.08137767016887665, "learning_rate": 1.4247573715769304e-05, "loss": 0.0059, "step": 372360 }, { "epoch": 11.509859677319652, "grad_norm": 0.09806997328996658, "learning_rate": 1.4247110094578725e-05, "loss": 0.0057, "step": 372390 }, { "epoch": 11.51078691970081, "grad_norm": 0.10446139425039291, "learning_rate": 1.4246646473388143e-05, "loss": 0.0053, "step": 372420 }, { "epoch": 11.511714162081969, "grad_norm": 0.07047705352306366, "learning_rate": 1.4246182852197565e-05, "loss": 0.0055, "step": 372450 }, { "epoch": 11.512641404463126, "grad_norm": 0.12470180541276932, "learning_rate": 1.4245719231006986e-05, "loss": 0.0061, "step": 372480 }, { "epoch": 11.513568646844284, "grad_norm": 0.13470152020454407, "learning_rate": 1.4245255609816406e-05, "loss": 0.0051, "step": 372510 }, { "epoch": 11.514495889225444, "grad_norm": 0.09170109033584595, "learning_rate": 1.4244791988625827e-05, "loss": 0.0055, "step": 372540 }, { "epoch": 11.515423131606601, "grad_norm": 0.19573023915290833, "learning_rate": 1.4244328367435249e-05, "loss": 0.0052, "step": 372570 }, { "epoch": 11.51635037398776, "grad_norm": 0.1256551444530487, "learning_rate": 1.4243864746244669e-05, "loss": 0.006, "step": 372600 }, { "epoch": 11.517277616368919, "grad_norm": 0.11903408169746399, "learning_rate": 1.424340112505409e-05, "loss": 0.0064, "step": 372630 }, { "epoch": 11.518204858750078, "grad_norm": 0.12028694897890091, "learning_rate": 1.4242937503863512e-05, "loss": 0.0054, "step": 372660 }, { "epoch": 11.519132101131236, "grad_norm": 0.10691056400537491, "learning_rate": 1.4242473882672932e-05, "loss": 0.0056, "step": 372690 }, { "epoch": 11.520059343512393, "grad_norm": 0.05607084184885025, "learning_rate": 1.4242010261482353e-05, "loss": 0.0062, "step": 372720 }, { "epoch": 11.520986585893553, "grad_norm": 0.12236332893371582, "learning_rate": 1.4241546640291773e-05, "loss": 0.0053, "step": 372750 }, { "epoch": 11.52191382827471, "grad_norm": 0.07082834839820862, "learning_rate": 1.4241083019101194e-05, "loss": 0.0056, "step": 372780 }, { "epoch": 11.52284107065587, "grad_norm": 0.13129538297653198, "learning_rate": 1.4240619397910616e-05, "loss": 0.006, "step": 372810 }, { "epoch": 11.523768313037028, "grad_norm": 0.13063067197799683, "learning_rate": 1.4240155776720036e-05, "loss": 0.0059, "step": 372840 }, { "epoch": 11.524695555418186, "grad_norm": 0.1928829848766327, "learning_rate": 1.4239692155529457e-05, "loss": 0.0058, "step": 372870 }, { "epoch": 11.525622797799345, "grad_norm": 0.15743078291416168, "learning_rate": 1.4239228534338879e-05, "loss": 0.0055, "step": 372900 }, { "epoch": 11.526550040180503, "grad_norm": 0.1260341852903366, "learning_rate": 1.4238764913148297e-05, "loss": 0.0057, "step": 372930 }, { "epoch": 11.527477282561662, "grad_norm": 0.10968752950429916, "learning_rate": 1.4238301291957718e-05, "loss": 0.0056, "step": 372960 }, { "epoch": 11.52840452494282, "grad_norm": 0.11763468384742737, "learning_rate": 1.4237853124806826e-05, "loss": 0.0061, "step": 372990 }, { "epoch": 11.529331767323978, "grad_norm": 0.16014587879180908, "learning_rate": 1.4237389503616246e-05, "loss": 0.0058, "step": 373020 }, { "epoch": 11.530259009705137, "grad_norm": 0.14190863072872162, "learning_rate": 1.4236925882425668e-05, "loss": 0.0053, "step": 373050 }, { "epoch": 11.531186252086295, "grad_norm": 0.13391605019569397, "learning_rate": 1.4236462261235089e-05, "loss": 0.0059, "step": 373080 }, { "epoch": 11.532113494467454, "grad_norm": 0.07691971957683563, "learning_rate": 1.4235998640044509e-05, "loss": 0.0049, "step": 373110 }, { "epoch": 11.533040736848612, "grad_norm": 0.11468544602394104, "learning_rate": 1.423553501885393e-05, "loss": 0.0051, "step": 373140 }, { "epoch": 11.533967979229772, "grad_norm": 0.1190037876367569, "learning_rate": 1.4235071397663352e-05, "loss": 0.0057, "step": 373170 }, { "epoch": 11.53489522161093, "grad_norm": 0.13368014991283417, "learning_rate": 1.423460777647277e-05, "loss": 0.0053, "step": 373200 }, { "epoch": 11.535822463992087, "grad_norm": 0.1335156410932541, "learning_rate": 1.4234144155282192e-05, "loss": 0.0058, "step": 373230 }, { "epoch": 11.536749706373246, "grad_norm": 0.10978005826473236, "learning_rate": 1.4233680534091611e-05, "loss": 0.0055, "step": 373260 }, { "epoch": 11.537676948754404, "grad_norm": 0.15311697125434875, "learning_rate": 1.4233216912901033e-05, "loss": 0.0056, "step": 373290 }, { "epoch": 11.538604191135562, "grad_norm": 0.09377390146255493, "learning_rate": 1.4232753291710454e-05, "loss": 0.0048, "step": 373320 }, { "epoch": 11.539531433516721, "grad_norm": 0.07458340376615524, "learning_rate": 1.4232289670519874e-05, "loss": 0.0057, "step": 373350 }, { "epoch": 11.540458675897879, "grad_norm": 0.059634383767843246, "learning_rate": 1.4231826049329296e-05, "loss": 0.0052, "step": 373380 }, { "epoch": 11.541385918279039, "grad_norm": 0.10139339417219162, "learning_rate": 1.4231362428138717e-05, "loss": 0.0058, "step": 373410 }, { "epoch": 11.542313160660196, "grad_norm": 0.12064005434513092, "learning_rate": 1.4230898806948137e-05, "loss": 0.0058, "step": 373440 }, { "epoch": 11.543240403041356, "grad_norm": 0.10570535063743591, "learning_rate": 1.4230435185757559e-05, "loss": 0.0055, "step": 373470 }, { "epoch": 11.544167645422514, "grad_norm": 0.051454778760671616, "learning_rate": 1.422997156456698e-05, "loss": 0.0053, "step": 373500 }, { "epoch": 11.545094887803671, "grad_norm": 0.0905061662197113, "learning_rate": 1.42295079433764e-05, "loss": 0.0056, "step": 373530 }, { "epoch": 11.54602213018483, "grad_norm": 0.10135696083307266, "learning_rate": 1.4229044322185821e-05, "loss": 0.006, "step": 373560 }, { "epoch": 11.546949372565988, "grad_norm": 0.1062491312623024, "learning_rate": 1.4228580700995241e-05, "loss": 0.0051, "step": 373590 }, { "epoch": 11.547876614947148, "grad_norm": 0.15899603068828583, "learning_rate": 1.4228117079804663e-05, "loss": 0.0052, "step": 373620 }, { "epoch": 11.548803857328306, "grad_norm": 0.17945832014083862, "learning_rate": 1.4227653458614084e-05, "loss": 0.0052, "step": 373650 }, { "epoch": 11.549731099709463, "grad_norm": 0.13511021435260773, "learning_rate": 1.4227189837423502e-05, "loss": 0.0062, "step": 373680 }, { "epoch": 11.550658342090623, "grad_norm": 0.1144903302192688, "learning_rate": 1.4226726216232924e-05, "loss": 0.0052, "step": 373710 }, { "epoch": 11.55158558447178, "grad_norm": 0.10050930827856064, "learning_rate": 1.4226262595042345e-05, "loss": 0.0061, "step": 373740 }, { "epoch": 11.55251282685294, "grad_norm": 0.10827919095754623, "learning_rate": 1.4225798973851765e-05, "loss": 0.0053, "step": 373770 }, { "epoch": 11.553440069234098, "grad_norm": 0.09719626605510712, "learning_rate": 1.4225335352661187e-05, "loss": 0.0059, "step": 373800 }, { "epoch": 11.554367311615255, "grad_norm": 0.08679354190826416, "learning_rate": 1.4224871731470608e-05, "loss": 0.0053, "step": 373830 }, { "epoch": 11.555294553996415, "grad_norm": 0.10673380643129349, "learning_rate": 1.4224408110280028e-05, "loss": 0.0059, "step": 373860 }, { "epoch": 11.556221796377573, "grad_norm": 0.09487158060073853, "learning_rate": 1.422394448908945e-05, "loss": 0.0055, "step": 373890 }, { "epoch": 11.557149038758732, "grad_norm": 0.21685488522052765, "learning_rate": 1.422348086789887e-05, "loss": 0.0058, "step": 373920 }, { "epoch": 11.55807628113989, "grad_norm": 0.10030176490545273, "learning_rate": 1.422301724670829e-05, "loss": 0.0051, "step": 373950 }, { "epoch": 11.55900352352105, "grad_norm": 0.08604635298252106, "learning_rate": 1.4222553625517712e-05, "loss": 0.0053, "step": 373980 }, { "epoch": 11.559930765902207, "grad_norm": 0.15779943764209747, "learning_rate": 1.4222090004327132e-05, "loss": 0.0063, "step": 374010 }, { "epoch": 11.560858008283365, "grad_norm": 0.08514445275068283, "learning_rate": 1.4221626383136554e-05, "loss": 0.0056, "step": 374040 }, { "epoch": 11.561785250664524, "grad_norm": 0.08135484158992767, "learning_rate": 1.4221162761945975e-05, "loss": 0.0051, "step": 374070 }, { "epoch": 11.562712493045682, "grad_norm": 0.11673960834741592, "learning_rate": 1.4220699140755393e-05, "loss": 0.0057, "step": 374100 }, { "epoch": 11.56363973542684, "grad_norm": 0.10610082000494003, "learning_rate": 1.4220235519564816e-05, "loss": 0.0059, "step": 374130 }, { "epoch": 11.564566977808, "grad_norm": 0.1431397944688797, "learning_rate": 1.4219771898374238e-05, "loss": 0.006, "step": 374160 }, { "epoch": 11.565494220189157, "grad_norm": 0.11921319365501404, "learning_rate": 1.4219308277183656e-05, "loss": 0.0059, "step": 374190 }, { "epoch": 11.566421462570316, "grad_norm": 0.13501212000846863, "learning_rate": 1.4218844655993078e-05, "loss": 0.006, "step": 374220 }, { "epoch": 11.567348704951474, "grad_norm": 0.059305500239133835, "learning_rate": 1.4218381034802497e-05, "loss": 0.0054, "step": 374250 }, { "epoch": 11.568275947332634, "grad_norm": 0.2844298481941223, "learning_rate": 1.4217917413611919e-05, "loss": 0.0055, "step": 374280 }, { "epoch": 11.569203189713791, "grad_norm": 0.13862353563308716, "learning_rate": 1.421745379242134e-05, "loss": 0.006, "step": 374310 }, { "epoch": 11.570130432094949, "grad_norm": 0.12921446561813354, "learning_rate": 1.421699017123076e-05, "loss": 0.0061, "step": 374340 }, { "epoch": 11.571057674476108, "grad_norm": 0.08619315177202225, "learning_rate": 1.4216526550040182e-05, "loss": 0.0053, "step": 374370 }, { "epoch": 11.571984916857266, "grad_norm": 0.0882311761379242, "learning_rate": 1.4216062928849603e-05, "loss": 0.0056, "step": 374400 }, { "epoch": 11.572912159238426, "grad_norm": 0.08578077703714371, "learning_rate": 1.4215599307659023e-05, "loss": 0.006, "step": 374430 }, { "epoch": 11.573839401619583, "grad_norm": 0.08901868760585785, "learning_rate": 1.4215135686468445e-05, "loss": 0.0062, "step": 374460 }, { "epoch": 11.574766644000741, "grad_norm": 0.09380131214857101, "learning_rate": 1.4214672065277866e-05, "loss": 0.0063, "step": 374490 }, { "epoch": 11.5756938863819, "grad_norm": 0.076675646007061, "learning_rate": 1.4214208444087286e-05, "loss": 0.0055, "step": 374520 }, { "epoch": 11.576621128763058, "grad_norm": 0.08900372684001923, "learning_rate": 1.4213744822896707e-05, "loss": 0.0058, "step": 374550 }, { "epoch": 11.577548371144218, "grad_norm": 0.09150868654251099, "learning_rate": 1.4213281201706125e-05, "loss": 0.0057, "step": 374580 }, { "epoch": 11.578475613525375, "grad_norm": 0.1492944359779358, "learning_rate": 1.4212817580515547e-05, "loss": 0.0061, "step": 374610 }, { "epoch": 11.579402855906533, "grad_norm": 0.08912499994039536, "learning_rate": 1.4212353959324968e-05, "loss": 0.0056, "step": 374640 }, { "epoch": 11.580330098287693, "grad_norm": 0.0960346907377243, "learning_rate": 1.4211890338134388e-05, "loss": 0.0058, "step": 374670 }, { "epoch": 11.58125734066885, "grad_norm": 0.08627390116453171, "learning_rate": 1.421142671694381e-05, "loss": 0.0052, "step": 374700 }, { "epoch": 11.58218458305001, "grad_norm": 0.13426825404167175, "learning_rate": 1.4210963095753231e-05, "loss": 0.0056, "step": 374730 }, { "epoch": 11.583111825431168, "grad_norm": 0.07923281192779541, "learning_rate": 1.4210499474562651e-05, "loss": 0.0054, "step": 374760 }, { "epoch": 11.584039067812327, "grad_norm": 0.10772823542356491, "learning_rate": 1.4210035853372073e-05, "loss": 0.0071, "step": 374790 }, { "epoch": 11.584966310193485, "grad_norm": 0.11660019308328629, "learning_rate": 1.4209572232181494e-05, "loss": 0.0052, "step": 374820 }, { "epoch": 11.585893552574642, "grad_norm": 0.08173209428787231, "learning_rate": 1.4209108610990914e-05, "loss": 0.0056, "step": 374850 }, { "epoch": 11.586820794955802, "grad_norm": 0.07919751852750778, "learning_rate": 1.4208644989800335e-05, "loss": 0.0054, "step": 374880 }, { "epoch": 11.58774803733696, "grad_norm": 0.07610032707452774, "learning_rate": 1.4208181368609755e-05, "loss": 0.0051, "step": 374910 }, { "epoch": 11.588675279718117, "grad_norm": 0.0716865137219429, "learning_rate": 1.4207717747419177e-05, "loss": 0.0058, "step": 374940 }, { "epoch": 11.589602522099277, "grad_norm": 0.09656353294849396, "learning_rate": 1.4207254126228598e-05, "loss": 0.0058, "step": 374970 }, { "epoch": 11.590529764480435, "grad_norm": 0.1082686260342598, "learning_rate": 1.4206790505038018e-05, "loss": 0.0056, "step": 375000 }, { "epoch": 11.591457006861594, "grad_norm": 0.12098943442106247, "learning_rate": 1.420632688384744e-05, "loss": 0.0053, "step": 375030 }, { "epoch": 11.592384249242752, "grad_norm": 0.11086868494749069, "learning_rate": 1.4205863262656861e-05, "loss": 0.0055, "step": 375060 }, { "epoch": 11.593311491623911, "grad_norm": 0.1289990395307541, "learning_rate": 1.420539964146628e-05, "loss": 0.006, "step": 375090 }, { "epoch": 11.594238734005069, "grad_norm": 0.17750099301338196, "learning_rate": 1.42049360202757e-05, "loss": 0.0055, "step": 375120 }, { "epoch": 11.595165976386227, "grad_norm": 0.150937020778656, "learning_rate": 1.4204472399085122e-05, "loss": 0.0054, "step": 375150 }, { "epoch": 11.596093218767386, "grad_norm": 0.1533118635416031, "learning_rate": 1.4204008777894542e-05, "loss": 0.0057, "step": 375180 }, { "epoch": 11.597020461148544, "grad_norm": 0.09940887987613678, "learning_rate": 1.4203545156703964e-05, "loss": 0.0054, "step": 375210 }, { "epoch": 11.597947703529703, "grad_norm": 0.08044744282960892, "learning_rate": 1.4203081535513383e-05, "loss": 0.0058, "step": 375240 }, { "epoch": 11.598874945910861, "grad_norm": 0.08557026833295822, "learning_rate": 1.4202617914322805e-05, "loss": 0.0052, "step": 375270 }, { "epoch": 11.599802188292019, "grad_norm": 0.08867400884628296, "learning_rate": 1.4202154293132226e-05, "loss": 0.0052, "step": 375300 }, { "epoch": 11.600729430673178, "grad_norm": 0.1291201263666153, "learning_rate": 1.4201690671941646e-05, "loss": 0.0057, "step": 375330 }, { "epoch": 11.601656673054336, "grad_norm": 0.11445067822933197, "learning_rate": 1.4201227050751068e-05, "loss": 0.0058, "step": 375360 }, { "epoch": 11.602583915435495, "grad_norm": 0.062142349779605865, "learning_rate": 1.420076342956049e-05, "loss": 0.0058, "step": 375390 }, { "epoch": 11.603511157816653, "grad_norm": 0.11154430359601974, "learning_rate": 1.4200299808369909e-05, "loss": 0.0056, "step": 375420 }, { "epoch": 11.604438400197811, "grad_norm": 0.10545762628316879, "learning_rate": 1.419983618717933e-05, "loss": 0.0056, "step": 375450 }, { "epoch": 11.60536564257897, "grad_norm": 0.14847077429294586, "learning_rate": 1.4199372565988752e-05, "loss": 0.0056, "step": 375480 }, { "epoch": 11.606292884960128, "grad_norm": 0.09310990571975708, "learning_rate": 1.419890894479817e-05, "loss": 0.0058, "step": 375510 }, { "epoch": 11.607220127341288, "grad_norm": 0.11370307952165604, "learning_rate": 1.4198445323607593e-05, "loss": 0.0053, "step": 375540 }, { "epoch": 11.608147369722445, "grad_norm": 0.09016923606395721, "learning_rate": 1.4197981702417011e-05, "loss": 0.0052, "step": 375570 }, { "epoch": 11.609074612103605, "grad_norm": 0.1386706829071045, "learning_rate": 1.4197518081226433e-05, "loss": 0.006, "step": 375600 }, { "epoch": 11.610001854484763, "grad_norm": 0.10988473147153854, "learning_rate": 1.4197054460035854e-05, "loss": 0.0053, "step": 375630 }, { "epoch": 11.61092909686592, "grad_norm": 0.08078795671463013, "learning_rate": 1.4196590838845274e-05, "loss": 0.0056, "step": 375660 }, { "epoch": 11.61185633924708, "grad_norm": 0.12771226465702057, "learning_rate": 1.4196127217654696e-05, "loss": 0.0058, "step": 375690 }, { "epoch": 11.612783581628237, "grad_norm": 0.18269862234592438, "learning_rate": 1.4195663596464117e-05, "loss": 0.0053, "step": 375720 }, { "epoch": 11.613710824009395, "grad_norm": 0.10995811969041824, "learning_rate": 1.4195199975273537e-05, "loss": 0.0055, "step": 375750 }, { "epoch": 11.614638066390555, "grad_norm": 0.15149709582328796, "learning_rate": 1.4194736354082959e-05, "loss": 0.0062, "step": 375780 }, { "epoch": 11.615565308771712, "grad_norm": 0.08974294364452362, "learning_rate": 1.419427273289238e-05, "loss": 0.006, "step": 375810 }, { "epoch": 11.616492551152872, "grad_norm": 0.10315153747797012, "learning_rate": 1.41938091117018e-05, "loss": 0.0057, "step": 375840 }, { "epoch": 11.61741979353403, "grad_norm": 0.06907468289136887, "learning_rate": 1.4193345490511221e-05, "loss": 0.0052, "step": 375870 }, { "epoch": 11.618347035915189, "grad_norm": 0.155008926987648, "learning_rate": 1.4192881869320641e-05, "loss": 0.0062, "step": 375900 }, { "epoch": 11.619274278296347, "grad_norm": 0.08711405843496323, "learning_rate": 1.4192418248130063e-05, "loss": 0.0053, "step": 375930 }, { "epoch": 11.620201520677504, "grad_norm": 0.13220767676830292, "learning_rate": 1.4191954626939484e-05, "loss": 0.0054, "step": 375960 }, { "epoch": 11.621128763058664, "grad_norm": 0.09384235739707947, "learning_rate": 1.4191491005748902e-05, "loss": 0.0052, "step": 375990 }, { "epoch": 11.622056005439822, "grad_norm": 0.1318451315164566, "learning_rate": 1.4191027384558324e-05, "loss": 0.0052, "step": 376020 }, { "epoch": 11.622983247820981, "grad_norm": 0.08955462276935577, "learning_rate": 1.4190563763367747e-05, "loss": 0.0058, "step": 376050 }, { "epoch": 11.623910490202139, "grad_norm": 0.1331125944852829, "learning_rate": 1.4190100142177165e-05, "loss": 0.0055, "step": 376080 }, { "epoch": 11.624837732583297, "grad_norm": 0.130271315574646, "learning_rate": 1.4189636520986587e-05, "loss": 0.0059, "step": 376110 }, { "epoch": 11.625764974964456, "grad_norm": 0.11457224190235138, "learning_rate": 1.4189172899796008e-05, "loss": 0.0058, "step": 376140 }, { "epoch": 11.626692217345614, "grad_norm": 0.09838300943374634, "learning_rate": 1.4188709278605428e-05, "loss": 0.0056, "step": 376170 }, { "epoch": 11.627619459726773, "grad_norm": 0.07930540293455124, "learning_rate": 1.418824565741485e-05, "loss": 0.0052, "step": 376200 }, { "epoch": 11.628546702107931, "grad_norm": 0.07655564695596695, "learning_rate": 1.418778203622427e-05, "loss": 0.0057, "step": 376230 }, { "epoch": 11.629473944489089, "grad_norm": 0.08538440614938736, "learning_rate": 1.418731841503369e-05, "loss": 0.0051, "step": 376260 }, { "epoch": 11.630401186870248, "grad_norm": 0.21134625375270844, "learning_rate": 1.4186854793843112e-05, "loss": 0.0055, "step": 376290 }, { "epoch": 11.631328429251406, "grad_norm": 0.11918517202138901, "learning_rate": 1.4186391172652532e-05, "loss": 0.0052, "step": 376320 }, { "epoch": 11.632255671632565, "grad_norm": 0.1198858916759491, "learning_rate": 1.4185927551461954e-05, "loss": 0.0062, "step": 376350 }, { "epoch": 11.633182914013723, "grad_norm": 0.17828048765659332, "learning_rate": 1.4185463930271375e-05, "loss": 0.0057, "step": 376380 }, { "epoch": 11.634110156394883, "grad_norm": 0.127994105219841, "learning_rate": 1.4185000309080795e-05, "loss": 0.0054, "step": 376410 }, { "epoch": 11.63503739877604, "grad_norm": 0.0466180145740509, "learning_rate": 1.4184536687890216e-05, "loss": 0.006, "step": 376440 }, { "epoch": 11.635964641157198, "grad_norm": 0.06346607953310013, "learning_rate": 1.4184073066699638e-05, "loss": 0.0053, "step": 376470 }, { "epoch": 11.636891883538357, "grad_norm": 0.08749274164438248, "learning_rate": 1.4183609445509056e-05, "loss": 0.0061, "step": 376500 }, { "epoch": 11.637819125919515, "grad_norm": 0.08824551850557327, "learning_rate": 1.4183145824318478e-05, "loss": 0.0057, "step": 376530 }, { "epoch": 11.638746368300673, "grad_norm": 0.09907251596450806, "learning_rate": 1.4182682203127899e-05, "loss": 0.0057, "step": 376560 }, { "epoch": 11.639673610681832, "grad_norm": 0.039122775197029114, "learning_rate": 1.4182218581937319e-05, "loss": 0.0061, "step": 376590 }, { "epoch": 11.64060085306299, "grad_norm": 0.0935928151011467, "learning_rate": 1.418175496074674e-05, "loss": 0.006, "step": 376620 }, { "epoch": 11.64152809544415, "grad_norm": 0.10740114003419876, "learning_rate": 1.418129133955616e-05, "loss": 0.005, "step": 376650 }, { "epoch": 11.642455337825307, "grad_norm": 0.14364948868751526, "learning_rate": 1.4180827718365582e-05, "loss": 0.0058, "step": 376680 }, { "epoch": 11.643382580206467, "grad_norm": 0.09410589188337326, "learning_rate": 1.4180364097175003e-05, "loss": 0.0056, "step": 376710 }, { "epoch": 11.644309822587624, "grad_norm": 0.14145393669605255, "learning_rate": 1.4179900475984423e-05, "loss": 0.0056, "step": 376740 }, { "epoch": 11.645237064968782, "grad_norm": 0.1532861888408661, "learning_rate": 1.4179436854793845e-05, "loss": 0.0046, "step": 376770 }, { "epoch": 11.646164307349942, "grad_norm": 0.07200169563293457, "learning_rate": 1.4178973233603266e-05, "loss": 0.0056, "step": 376800 }, { "epoch": 11.6470915497311, "grad_norm": 0.09602881968021393, "learning_rate": 1.4178509612412686e-05, "loss": 0.0056, "step": 376830 }, { "epoch": 11.648018792112259, "grad_norm": 0.07913091033697128, "learning_rate": 1.4178045991222107e-05, "loss": 0.0057, "step": 376860 }, { "epoch": 11.648946034493417, "grad_norm": 0.11038760840892792, "learning_rate": 1.4177582370031529e-05, "loss": 0.0055, "step": 376890 }, { "epoch": 11.649873276874574, "grad_norm": 0.09716876596212387, "learning_rate": 1.4177118748840947e-05, "loss": 0.0062, "step": 376920 }, { "epoch": 11.650800519255734, "grad_norm": 0.15999667346477509, "learning_rate": 1.417665512765037e-05, "loss": 0.0053, "step": 376950 }, { "epoch": 11.651727761636892, "grad_norm": 0.17674730718135834, "learning_rate": 1.4176191506459788e-05, "loss": 0.0057, "step": 376980 }, { "epoch": 11.652655004018051, "grad_norm": 0.14103741943836212, "learning_rate": 1.417572788526921e-05, "loss": 0.006, "step": 377010 }, { "epoch": 11.653582246399209, "grad_norm": 0.11253146827220917, "learning_rate": 1.4175264264078631e-05, "loss": 0.0062, "step": 377040 }, { "epoch": 11.654509488780366, "grad_norm": 0.12228362262248993, "learning_rate": 1.4174800642888051e-05, "loss": 0.0059, "step": 377070 }, { "epoch": 11.655436731161526, "grad_norm": 0.1679084450006485, "learning_rate": 1.4174337021697473e-05, "loss": 0.0063, "step": 377100 }, { "epoch": 11.656363973542684, "grad_norm": 0.10799704492092133, "learning_rate": 1.4173873400506894e-05, "loss": 0.0059, "step": 377130 }, { "epoch": 11.657291215923843, "grad_norm": 0.17852802574634552, "learning_rate": 1.4173409779316314e-05, "loss": 0.0061, "step": 377160 }, { "epoch": 11.658218458305, "grad_norm": 0.09217645972967148, "learning_rate": 1.4172961612165422e-05, "loss": 0.0054, "step": 377190 }, { "epoch": 11.65914570068616, "grad_norm": 0.10728595405817032, "learning_rate": 1.4172497990974843e-05, "loss": 0.0059, "step": 377220 }, { "epoch": 11.660072943067318, "grad_norm": 0.13900834321975708, "learning_rate": 1.4172034369784262e-05, "loss": 0.0058, "step": 377250 }, { "epoch": 11.661000185448476, "grad_norm": 0.11127939075231552, "learning_rate": 1.4171570748593683e-05, "loss": 0.0056, "step": 377280 }, { "epoch": 11.661927427829635, "grad_norm": 0.1228487640619278, "learning_rate": 1.4171107127403105e-05, "loss": 0.005, "step": 377310 }, { "epoch": 11.662854670210793, "grad_norm": 0.17095130681991577, "learning_rate": 1.4170643506212524e-05, "loss": 0.0062, "step": 377340 }, { "epoch": 11.66378191259195, "grad_norm": 0.10587142407894135, "learning_rate": 1.4170179885021946e-05, "loss": 0.0052, "step": 377370 }, { "epoch": 11.66470915497311, "grad_norm": 0.1380917876958847, "learning_rate": 1.4169716263831366e-05, "loss": 0.0054, "step": 377400 }, { "epoch": 11.665636397354268, "grad_norm": 0.07762186974287033, "learning_rate": 1.4169252642640787e-05, "loss": 0.0053, "step": 377430 }, { "epoch": 11.666563639735427, "grad_norm": 0.09527099877595901, "learning_rate": 1.4168789021450209e-05, "loss": 0.0057, "step": 377460 }, { "epoch": 11.667490882116585, "grad_norm": 0.09761945903301239, "learning_rate": 1.4168325400259628e-05, "loss": 0.0055, "step": 377490 }, { "epoch": 11.668418124497745, "grad_norm": 0.14096879959106445, "learning_rate": 1.416786177906905e-05, "loss": 0.0056, "step": 377520 }, { "epoch": 11.669345366878902, "grad_norm": 0.12665370106697083, "learning_rate": 1.4167398157878471e-05, "loss": 0.0058, "step": 377550 }, { "epoch": 11.67027260926006, "grad_norm": 0.1485619992017746, "learning_rate": 1.4166934536687891e-05, "loss": 0.0051, "step": 377580 }, { "epoch": 11.67119985164122, "grad_norm": 0.12661677598953247, "learning_rate": 1.4166470915497313e-05, "loss": 0.0057, "step": 377610 }, { "epoch": 11.672127094022377, "grad_norm": 0.08797965943813324, "learning_rate": 1.4166007294306734e-05, "loss": 0.0054, "step": 377640 }, { "epoch": 11.673054336403537, "grad_norm": 0.17141947150230408, "learning_rate": 1.4165543673116154e-05, "loss": 0.006, "step": 377670 }, { "epoch": 11.673981578784694, "grad_norm": 0.14773571491241455, "learning_rate": 1.4165080051925576e-05, "loss": 0.0057, "step": 377700 }, { "epoch": 11.674908821165852, "grad_norm": 0.05616004019975662, "learning_rate": 1.4164616430734994e-05, "loss": 0.0056, "step": 377730 }, { "epoch": 11.675836063547012, "grad_norm": 0.2045546919107437, "learning_rate": 1.4164152809544415e-05, "loss": 0.0054, "step": 377760 }, { "epoch": 11.67676330592817, "grad_norm": 0.1955869346857071, "learning_rate": 1.4163689188353837e-05, "loss": 0.0057, "step": 377790 }, { "epoch": 11.677690548309329, "grad_norm": 0.1492842733860016, "learning_rate": 1.4163241021202943e-05, "loss": 0.0057, "step": 377820 }, { "epoch": 11.678617790690486, "grad_norm": 0.09239551424980164, "learning_rate": 1.4162777400012364e-05, "loss": 0.0059, "step": 377850 }, { "epoch": 11.679545033071644, "grad_norm": 0.08093857765197754, "learning_rate": 1.4162313778821786e-05, "loss": 0.0055, "step": 377880 }, { "epoch": 11.680472275452804, "grad_norm": 0.11644205451011658, "learning_rate": 1.4161850157631206e-05, "loss": 0.0059, "step": 377910 }, { "epoch": 11.681399517833961, "grad_norm": 0.07362201809883118, "learning_rate": 1.4161386536440627e-05, "loss": 0.0057, "step": 377940 }, { "epoch": 11.68232676021512, "grad_norm": 0.15066038072109222, "learning_rate": 1.4160922915250049e-05, "loss": 0.0057, "step": 377970 }, { "epoch": 11.683254002596279, "grad_norm": 0.1408926546573639, "learning_rate": 1.4160459294059467e-05, "loss": 0.0065, "step": 378000 }, { "epoch": 11.684181244977438, "grad_norm": 0.08956407755613327, "learning_rate": 1.4159995672868888e-05, "loss": 0.0053, "step": 378030 }, { "epoch": 11.685108487358596, "grad_norm": 0.08779740333557129, "learning_rate": 1.4159532051678312e-05, "loss": 0.0057, "step": 378060 }, { "epoch": 11.686035729739753, "grad_norm": 0.0950620174407959, "learning_rate": 1.415906843048773e-05, "loss": 0.006, "step": 378090 }, { "epoch": 11.686962972120913, "grad_norm": 0.07792966812849045, "learning_rate": 1.4158604809297151e-05, "loss": 0.0055, "step": 378120 }, { "epoch": 11.68789021450207, "grad_norm": 0.08694098144769669, "learning_rate": 1.4158141188106571e-05, "loss": 0.0054, "step": 378150 }, { "epoch": 11.688817456883228, "grad_norm": 0.11521141976118088, "learning_rate": 1.4157677566915993e-05, "loss": 0.0067, "step": 378180 }, { "epoch": 11.689744699264388, "grad_norm": 0.17233087122440338, "learning_rate": 1.4157213945725414e-05, "loss": 0.0058, "step": 378210 }, { "epoch": 11.690671941645546, "grad_norm": 0.07948887348175049, "learning_rate": 1.4156750324534834e-05, "loss": 0.0054, "step": 378240 }, { "epoch": 11.691599184026705, "grad_norm": 0.14795000851154327, "learning_rate": 1.4156286703344255e-05, "loss": 0.0056, "step": 378270 }, { "epoch": 11.692526426407863, "grad_norm": 0.1020493358373642, "learning_rate": 1.4155823082153677e-05, "loss": 0.0062, "step": 378300 }, { "epoch": 11.693453668789022, "grad_norm": 0.13777586817741394, "learning_rate": 1.4155359460963097e-05, "loss": 0.0053, "step": 378330 }, { "epoch": 11.69438091117018, "grad_norm": 0.08351776003837585, "learning_rate": 1.4154895839772518e-05, "loss": 0.006, "step": 378360 }, { "epoch": 11.695308153551338, "grad_norm": 0.18451330065727234, "learning_rate": 1.415443221858194e-05, "loss": 0.0053, "step": 378390 }, { "epoch": 11.696235395932497, "grad_norm": 0.1387936919927597, "learning_rate": 1.415396859739136e-05, "loss": 0.0048, "step": 378420 }, { "epoch": 11.697162638313655, "grad_norm": 0.09235331416130066, "learning_rate": 1.4153504976200781e-05, "loss": 0.0058, "step": 378450 }, { "epoch": 11.698089880694814, "grad_norm": 0.09130027890205383, "learning_rate": 1.4153041355010203e-05, "loss": 0.0055, "step": 378480 }, { "epoch": 11.699017123075972, "grad_norm": 0.09721367806196213, "learning_rate": 1.415257773381962e-05, "loss": 0.0055, "step": 378510 }, { "epoch": 11.69994436545713, "grad_norm": 0.076930470764637, "learning_rate": 1.4152114112629042e-05, "loss": 0.0048, "step": 378540 }, { "epoch": 11.70087160783829, "grad_norm": 0.12765619158744812, "learning_rate": 1.4151650491438462e-05, "loss": 0.0054, "step": 378570 }, { "epoch": 11.701798850219447, "grad_norm": 0.10266993939876556, "learning_rate": 1.4151186870247883e-05, "loss": 0.0056, "step": 378600 }, { "epoch": 11.702726092600606, "grad_norm": 0.13654442131519318, "learning_rate": 1.4150723249057305e-05, "loss": 0.0056, "step": 378630 }, { "epoch": 11.703653334981764, "grad_norm": 0.10805772989988327, "learning_rate": 1.4150259627866725e-05, "loss": 0.0064, "step": 378660 }, { "epoch": 11.704580577362922, "grad_norm": 0.08022051304578781, "learning_rate": 1.4149796006676146e-05, "loss": 0.0058, "step": 378690 }, { "epoch": 11.705507819744081, "grad_norm": 0.06404265016317368, "learning_rate": 1.4149332385485568e-05, "loss": 0.0052, "step": 378720 }, { "epoch": 11.706435062125239, "grad_norm": 0.1108236238360405, "learning_rate": 1.4148868764294988e-05, "loss": 0.0058, "step": 378750 }, { "epoch": 11.707362304506399, "grad_norm": 0.11846326291561127, "learning_rate": 1.4148405143104409e-05, "loss": 0.0059, "step": 378780 }, { "epoch": 11.708289546887556, "grad_norm": 0.10226475447416306, "learning_rate": 1.414794152191383e-05, "loss": 0.0056, "step": 378810 }, { "epoch": 11.709216789268714, "grad_norm": 0.10661688446998596, "learning_rate": 1.414747790072325e-05, "loss": 0.0056, "step": 378840 }, { "epoch": 11.710144031649873, "grad_norm": 0.18232133984565735, "learning_rate": 1.4147014279532672e-05, "loss": 0.0057, "step": 378870 }, { "epoch": 11.711071274031031, "grad_norm": 0.07711376249790192, "learning_rate": 1.414655065834209e-05, "loss": 0.0053, "step": 378900 }, { "epoch": 11.71199851641219, "grad_norm": 0.11282522231340408, "learning_rate": 1.4146087037151512e-05, "loss": 0.0054, "step": 378930 }, { "epoch": 11.712925758793348, "grad_norm": 0.1072622537612915, "learning_rate": 1.4145623415960935e-05, "loss": 0.0058, "step": 378960 }, { "epoch": 11.713853001174506, "grad_norm": 0.08862931281328201, "learning_rate": 1.4145159794770353e-05, "loss": 0.006, "step": 378990 }, { "epoch": 11.714780243555666, "grad_norm": 0.13182324171066284, "learning_rate": 1.4144696173579774e-05, "loss": 0.0059, "step": 379020 }, { "epoch": 11.715707485936823, "grad_norm": 0.08192074298858643, "learning_rate": 1.4144232552389196e-05, "loss": 0.0056, "step": 379050 }, { "epoch": 11.716634728317983, "grad_norm": 0.11232993006706238, "learning_rate": 1.4143768931198616e-05, "loss": 0.0056, "step": 379080 }, { "epoch": 11.71756197069914, "grad_norm": 0.13100898265838623, "learning_rate": 1.4143305310008037e-05, "loss": 0.0056, "step": 379110 }, { "epoch": 11.7184892130803, "grad_norm": 0.1539958119392395, "learning_rate": 1.4142841688817459e-05, "loss": 0.0057, "step": 379140 }, { "epoch": 11.719416455461458, "grad_norm": 0.07153771072626114, "learning_rate": 1.4142378067626879e-05, "loss": 0.0059, "step": 379170 }, { "epoch": 11.720343697842615, "grad_norm": 0.12840673327445984, "learning_rate": 1.41419144464363e-05, "loss": 0.006, "step": 379200 }, { "epoch": 11.721270940223775, "grad_norm": 0.11986123025417328, "learning_rate": 1.414145082524572e-05, "loss": 0.0061, "step": 379230 }, { "epoch": 11.722198182604933, "grad_norm": 0.09855207055807114, "learning_rate": 1.4140987204055141e-05, "loss": 0.0056, "step": 379260 }, { "epoch": 11.723125424986092, "grad_norm": 0.11196354031562805, "learning_rate": 1.4140523582864563e-05, "loss": 0.0055, "step": 379290 }, { "epoch": 11.72405266736725, "grad_norm": 0.09496085345745087, "learning_rate": 1.4140059961673983e-05, "loss": 0.0054, "step": 379320 }, { "epoch": 11.724979909748408, "grad_norm": 0.12931016087532043, "learning_rate": 1.4139596340483404e-05, "loss": 0.0058, "step": 379350 }, { "epoch": 11.725907152129567, "grad_norm": 0.07270083576440811, "learning_rate": 1.4139132719292826e-05, "loss": 0.0064, "step": 379380 }, { "epoch": 11.726834394510725, "grad_norm": 0.11217987537384033, "learning_rate": 1.4138669098102244e-05, "loss": 0.0059, "step": 379410 }, { "epoch": 11.727761636891884, "grad_norm": 0.10489007085561752, "learning_rate": 1.4138205476911665e-05, "loss": 0.0056, "step": 379440 }, { "epoch": 11.728688879273042, "grad_norm": 0.0979151502251625, "learning_rate": 1.4137741855721089e-05, "loss": 0.0056, "step": 379470 }, { "epoch": 11.7296161216542, "grad_norm": 0.10468005388975143, "learning_rate": 1.4137278234530507e-05, "loss": 0.0057, "step": 379500 }, { "epoch": 11.73054336403536, "grad_norm": 0.10388467460870743, "learning_rate": 1.4136814613339928e-05, "loss": 0.0056, "step": 379530 }, { "epoch": 11.731470606416517, "grad_norm": 0.10633634775876999, "learning_rate": 1.4136350992149348e-05, "loss": 0.0058, "step": 379560 }, { "epoch": 11.732397848797676, "grad_norm": 0.10003688931465149, "learning_rate": 1.413588737095877e-05, "loss": 0.0054, "step": 379590 }, { "epoch": 11.733325091178834, "grad_norm": 0.111791230738163, "learning_rate": 1.4135423749768191e-05, "loss": 0.0056, "step": 379620 }, { "epoch": 11.734252333559992, "grad_norm": 0.08500837534666061, "learning_rate": 1.413496012857761e-05, "loss": 0.0055, "step": 379650 }, { "epoch": 11.735179575941151, "grad_norm": 0.06272099912166595, "learning_rate": 1.4134496507387032e-05, "loss": 0.0055, "step": 379680 }, { "epoch": 11.736106818322309, "grad_norm": 0.06178085505962372, "learning_rate": 1.4134032886196454e-05, "loss": 0.0053, "step": 379710 }, { "epoch": 11.737034060703468, "grad_norm": 0.11929826438426971, "learning_rate": 1.4133569265005874e-05, "loss": 0.006, "step": 379740 }, { "epoch": 11.737961303084626, "grad_norm": 0.15431182086467743, "learning_rate": 1.4133105643815295e-05, "loss": 0.0046, "step": 379770 }, { "epoch": 11.738888545465784, "grad_norm": 0.08098705857992172, "learning_rate": 1.4132642022624717e-05, "loss": 0.0053, "step": 379800 }, { "epoch": 11.739815787846943, "grad_norm": 0.13887308537960052, "learning_rate": 1.4132178401434136e-05, "loss": 0.0049, "step": 379830 }, { "epoch": 11.740743030228101, "grad_norm": 0.14121802151203156, "learning_rate": 1.4131714780243558e-05, "loss": 0.0062, "step": 379860 }, { "epoch": 11.74167027260926, "grad_norm": 0.1496746987104416, "learning_rate": 1.4131251159052976e-05, "loss": 0.005, "step": 379890 }, { "epoch": 11.742597514990418, "grad_norm": 0.06234874948859215, "learning_rate": 1.4130787537862398e-05, "loss": 0.0053, "step": 379920 }, { "epoch": 11.743524757371578, "grad_norm": 0.09371277689933777, "learning_rate": 1.4130323916671819e-05, "loss": 0.0051, "step": 379950 }, { "epoch": 11.744451999752735, "grad_norm": 0.12847034633159637, "learning_rate": 1.4129860295481239e-05, "loss": 0.0057, "step": 379980 }, { "epoch": 11.745379242133893, "grad_norm": 0.05977857857942581, "learning_rate": 1.412939667429066e-05, "loss": 0.0053, "step": 380010 }, { "epoch": 11.746306484515053, "grad_norm": 0.10184206068515778, "learning_rate": 1.4128933053100082e-05, "loss": 0.0055, "step": 380040 }, { "epoch": 11.74723372689621, "grad_norm": 0.11387477815151215, "learning_rate": 1.4128469431909502e-05, "loss": 0.0056, "step": 380070 }, { "epoch": 11.74816096927737, "grad_norm": 0.09653734415769577, "learning_rate": 1.4128005810718923e-05, "loss": 0.0054, "step": 380100 }, { "epoch": 11.749088211658528, "grad_norm": 0.09623033553361893, "learning_rate": 1.4127542189528345e-05, "loss": 0.0053, "step": 380130 }, { "epoch": 11.750015454039685, "grad_norm": 0.11970094591379166, "learning_rate": 1.4127078568337765e-05, "loss": 0.0047, "step": 380160 }, { "epoch": 11.750942696420845, "grad_norm": 0.10224194824695587, "learning_rate": 1.4126614947147186e-05, "loss": 0.0066, "step": 380190 }, { "epoch": 11.751869938802002, "grad_norm": 0.13150785863399506, "learning_rate": 1.4126151325956606e-05, "loss": 0.0056, "step": 380220 }, { "epoch": 11.752797181183162, "grad_norm": 0.1283908635377884, "learning_rate": 1.4125687704766027e-05, "loss": 0.0058, "step": 380250 }, { "epoch": 11.75372442356432, "grad_norm": 0.08781441301107407, "learning_rate": 1.4125224083575449e-05, "loss": 0.0055, "step": 380280 }, { "epoch": 11.754651665945477, "grad_norm": 0.1056985929608345, "learning_rate": 1.4124760462384867e-05, "loss": 0.0059, "step": 380310 }, { "epoch": 11.755578908326637, "grad_norm": 0.14722102880477905, "learning_rate": 1.4124296841194288e-05, "loss": 0.0056, "step": 380340 }, { "epoch": 11.756506150707795, "grad_norm": 0.1005394384264946, "learning_rate": 1.4123833220003712e-05, "loss": 0.0057, "step": 380370 }, { "epoch": 11.757433393088954, "grad_norm": 0.08682873845100403, "learning_rate": 1.412336959881313e-05, "loss": 0.006, "step": 380400 }, { "epoch": 11.758360635470112, "grad_norm": 0.12593649327754974, "learning_rate": 1.4122905977622551e-05, "loss": 0.0057, "step": 380430 }, { "epoch": 11.75928787785127, "grad_norm": 0.10401421785354614, "learning_rate": 1.4122442356431973e-05, "loss": 0.0054, "step": 380460 }, { "epoch": 11.760215120232429, "grad_norm": 0.10067053139209747, "learning_rate": 1.4121978735241393e-05, "loss": 0.0057, "step": 380490 }, { "epoch": 11.761142362613587, "grad_norm": 0.08454044163227081, "learning_rate": 1.4121515114050814e-05, "loss": 0.0059, "step": 380520 }, { "epoch": 11.762069604994746, "grad_norm": 0.09930077195167542, "learning_rate": 1.4121051492860234e-05, "loss": 0.0047, "step": 380550 }, { "epoch": 11.762996847375904, "grad_norm": 0.13765256106853485, "learning_rate": 1.4120587871669655e-05, "loss": 0.0058, "step": 380580 }, { "epoch": 11.763924089757062, "grad_norm": 0.12755419313907623, "learning_rate": 1.4120124250479077e-05, "loss": 0.0059, "step": 380610 }, { "epoch": 11.764851332138221, "grad_norm": 0.11425996571779251, "learning_rate": 1.4119660629288497e-05, "loss": 0.006, "step": 380640 }, { "epoch": 11.765778574519379, "grad_norm": 0.08559095114469528, "learning_rate": 1.4119197008097918e-05, "loss": 0.0059, "step": 380670 }, { "epoch": 11.766705816900538, "grad_norm": 0.10788147151470184, "learning_rate": 1.411873338690734e-05, "loss": 0.0057, "step": 380700 }, { "epoch": 11.767633059281696, "grad_norm": 0.09511660039424896, "learning_rate": 1.411826976571676e-05, "loss": 0.0051, "step": 380730 }, { "epoch": 11.768560301662855, "grad_norm": 0.11353901773691177, "learning_rate": 1.4117806144526181e-05, "loss": 0.0056, "step": 380760 }, { "epoch": 11.769487544044013, "grad_norm": 0.10984037071466446, "learning_rate": 1.4117342523335603e-05, "loss": 0.0052, "step": 380790 }, { "epoch": 11.770414786425171, "grad_norm": 0.14791510999202728, "learning_rate": 1.411687890214502e-05, "loss": 0.0052, "step": 380820 }, { "epoch": 11.77134202880633, "grad_norm": 0.0993628278374672, "learning_rate": 1.4116415280954442e-05, "loss": 0.0055, "step": 380850 }, { "epoch": 11.772269271187488, "grad_norm": 0.10585606098175049, "learning_rate": 1.4115951659763862e-05, "loss": 0.006, "step": 380880 }, { "epoch": 11.773196513568648, "grad_norm": 0.09458045661449432, "learning_rate": 1.4115488038573284e-05, "loss": 0.0056, "step": 380910 }, { "epoch": 11.774123755949805, "grad_norm": 0.10807182639837265, "learning_rate": 1.4115024417382705e-05, "loss": 0.0055, "step": 380940 }, { "epoch": 11.775050998330963, "grad_norm": 0.1318906992673874, "learning_rate": 1.4114560796192125e-05, "loss": 0.005, "step": 380970 }, { "epoch": 11.775978240712123, "grad_norm": 0.15397000312805176, "learning_rate": 1.4114097175001546e-05, "loss": 0.0055, "step": 381000 }, { "epoch": 11.77690548309328, "grad_norm": 0.1200266033411026, "learning_rate": 1.4113633553810968e-05, "loss": 0.0057, "step": 381030 }, { "epoch": 11.77783272547444, "grad_norm": 0.09333641827106476, "learning_rate": 1.4113169932620388e-05, "loss": 0.0054, "step": 381060 }, { "epoch": 11.778759967855597, "grad_norm": 0.07939738780260086, "learning_rate": 1.411270631142981e-05, "loss": 0.0053, "step": 381090 }, { "epoch": 11.779687210236755, "grad_norm": 0.10112788528203964, "learning_rate": 1.411224269023923e-05, "loss": 0.0065, "step": 381120 }, { "epoch": 11.780614452617915, "grad_norm": 0.06958779692649841, "learning_rate": 1.411177906904865e-05, "loss": 0.0057, "step": 381150 }, { "epoch": 11.781541694999072, "grad_norm": 0.08311272412538528, "learning_rate": 1.4111315447858072e-05, "loss": 0.0062, "step": 381180 }, { "epoch": 11.782468937380232, "grad_norm": 0.1287960708141327, "learning_rate": 1.411085182666749e-05, "loss": 0.0057, "step": 381210 }, { "epoch": 11.78339617976139, "grad_norm": 0.13830365240573883, "learning_rate": 1.4110388205476913e-05, "loss": 0.0064, "step": 381240 }, { "epoch": 11.784323422142547, "grad_norm": 0.12276412546634674, "learning_rate": 1.4109924584286335e-05, "loss": 0.0058, "step": 381270 }, { "epoch": 11.785250664523707, "grad_norm": 0.13038720190525055, "learning_rate": 1.4109460963095753e-05, "loss": 0.0061, "step": 381300 }, { "epoch": 11.786177906904864, "grad_norm": 0.18875986337661743, "learning_rate": 1.4108997341905174e-05, "loss": 0.0054, "step": 381330 }, { "epoch": 11.787105149286024, "grad_norm": 0.07603555172681808, "learning_rate": 1.4108533720714596e-05, "loss": 0.0059, "step": 381360 }, { "epoch": 11.788032391667182, "grad_norm": 0.11221083253622055, "learning_rate": 1.4108070099524016e-05, "loss": 0.0059, "step": 381390 }, { "epoch": 11.78895963404834, "grad_norm": 0.11328116804361343, "learning_rate": 1.4107606478333437e-05, "loss": 0.0057, "step": 381420 }, { "epoch": 11.789886876429499, "grad_norm": 0.10711986571550369, "learning_rate": 1.4107142857142859e-05, "loss": 0.0054, "step": 381450 }, { "epoch": 11.790814118810657, "grad_norm": 0.05796387046575546, "learning_rate": 1.4106679235952279e-05, "loss": 0.0062, "step": 381480 }, { "epoch": 11.791741361191816, "grad_norm": 0.08774761855602264, "learning_rate": 1.41062156147617e-05, "loss": 0.0058, "step": 381510 }, { "epoch": 11.792668603572974, "grad_norm": 0.07913994789123535, "learning_rate": 1.410575199357112e-05, "loss": 0.0058, "step": 381540 }, { "epoch": 11.793595845954133, "grad_norm": 0.10996139049530029, "learning_rate": 1.4105288372380541e-05, "loss": 0.0056, "step": 381570 }, { "epoch": 11.794523088335291, "grad_norm": 0.11384186148643494, "learning_rate": 1.4104824751189963e-05, "loss": 0.0055, "step": 381600 }, { "epoch": 11.795450330716449, "grad_norm": 0.11484132707118988, "learning_rate": 1.4104361129999383e-05, "loss": 0.0057, "step": 381630 }, { "epoch": 11.796377573097608, "grad_norm": 0.0948021188378334, "learning_rate": 1.4103897508808804e-05, "loss": 0.0051, "step": 381660 }, { "epoch": 11.797304815478766, "grad_norm": 0.14146873354911804, "learning_rate": 1.4103433887618226e-05, "loss": 0.0054, "step": 381690 }, { "epoch": 11.798232057859925, "grad_norm": 0.1102428138256073, "learning_rate": 1.4102970266427644e-05, "loss": 0.0057, "step": 381720 }, { "epoch": 11.799159300241083, "grad_norm": 0.09943285584449768, "learning_rate": 1.4102506645237067e-05, "loss": 0.0056, "step": 381750 }, { "epoch": 11.80008654262224, "grad_norm": 0.14651213586330414, "learning_rate": 1.4102043024046489e-05, "loss": 0.0061, "step": 381780 }, { "epoch": 11.8010137850034, "grad_norm": 0.12279284000396729, "learning_rate": 1.4101579402855907e-05, "loss": 0.0063, "step": 381810 }, { "epoch": 11.801941027384558, "grad_norm": 0.10301007330417633, "learning_rate": 1.4101115781665328e-05, "loss": 0.0049, "step": 381840 }, { "epoch": 11.802868269765717, "grad_norm": 0.15972185134887695, "learning_rate": 1.4100652160474748e-05, "loss": 0.0057, "step": 381870 }, { "epoch": 11.803795512146875, "grad_norm": 0.15494832396507263, "learning_rate": 1.410018853928417e-05, "loss": 0.005, "step": 381900 }, { "epoch": 11.804722754528033, "grad_norm": 0.2217458188533783, "learning_rate": 1.4099724918093591e-05, "loss": 0.0056, "step": 381930 }, { "epoch": 11.805649996909192, "grad_norm": 0.1360442340373993, "learning_rate": 1.4099261296903011e-05, "loss": 0.0054, "step": 381960 }, { "epoch": 11.80657723929035, "grad_norm": 0.09522876888513565, "learning_rate": 1.4098797675712432e-05, "loss": 0.0055, "step": 381990 }, { "epoch": 11.80750448167151, "grad_norm": 0.1130189597606659, "learning_rate": 1.4098334054521854e-05, "loss": 0.0057, "step": 382020 }, { "epoch": 11.808431724052667, "grad_norm": 0.07806165516376495, "learning_rate": 1.4097870433331274e-05, "loss": 0.0055, "step": 382050 }, { "epoch": 11.809358966433825, "grad_norm": 0.09761296957731247, "learning_rate": 1.4097406812140695e-05, "loss": 0.0057, "step": 382080 }, { "epoch": 11.810286208814984, "grad_norm": 0.10060294717550278, "learning_rate": 1.4096943190950117e-05, "loss": 0.0055, "step": 382110 }, { "epoch": 11.811213451196142, "grad_norm": 0.15331068634986877, "learning_rate": 1.4096479569759537e-05, "loss": 0.0059, "step": 382140 }, { "epoch": 11.812140693577302, "grad_norm": 0.13658124208450317, "learning_rate": 1.4096015948568958e-05, "loss": 0.006, "step": 382170 }, { "epoch": 11.81306793595846, "grad_norm": 0.10183441638946533, "learning_rate": 1.4095552327378376e-05, "loss": 0.0053, "step": 382200 }, { "epoch": 11.813995178339617, "grad_norm": 0.11062982678413391, "learning_rate": 1.4095088706187798e-05, "loss": 0.0056, "step": 382230 }, { "epoch": 11.814922420720777, "grad_norm": 0.06755387783050537, "learning_rate": 1.4094625084997219e-05, "loss": 0.0057, "step": 382260 }, { "epoch": 11.815849663101934, "grad_norm": 0.11204748600721359, "learning_rate": 1.4094161463806639e-05, "loss": 0.0054, "step": 382290 }, { "epoch": 11.816776905483094, "grad_norm": 0.10756755620241165, "learning_rate": 1.409369784261606e-05, "loss": 0.0053, "step": 382320 }, { "epoch": 11.817704147864251, "grad_norm": 0.11690258234739304, "learning_rate": 1.4093234221425482e-05, "loss": 0.0056, "step": 382350 }, { "epoch": 11.818631390245411, "grad_norm": 0.08996739983558655, "learning_rate": 1.4092770600234902e-05, "loss": 0.0055, "step": 382380 }, { "epoch": 11.819558632626569, "grad_norm": 0.07719793170690536, "learning_rate": 1.4092306979044323e-05, "loss": 0.0067, "step": 382410 }, { "epoch": 11.820485875007726, "grad_norm": 0.11007629334926605, "learning_rate": 1.4091843357853745e-05, "loss": 0.0052, "step": 382440 }, { "epoch": 11.821413117388886, "grad_norm": 0.16726450622081757, "learning_rate": 1.4091379736663165e-05, "loss": 0.006, "step": 382470 }, { "epoch": 11.822340359770044, "grad_norm": 0.09201108664274216, "learning_rate": 1.4090916115472586e-05, "loss": 0.0059, "step": 382500 }, { "epoch": 11.823267602151203, "grad_norm": 0.11653107404708862, "learning_rate": 1.4090452494282008e-05, "loss": 0.0057, "step": 382530 }, { "epoch": 11.82419484453236, "grad_norm": 0.15125912427902222, "learning_rate": 1.4089988873091427e-05, "loss": 0.0056, "step": 382560 }, { "epoch": 11.825122086913519, "grad_norm": 0.13506872951984406, "learning_rate": 1.4089525251900849e-05, "loss": 0.0061, "step": 382590 }, { "epoch": 11.826049329294678, "grad_norm": 0.16392329335212708, "learning_rate": 1.4089061630710267e-05, "loss": 0.0057, "step": 382620 }, { "epoch": 11.826976571675836, "grad_norm": 0.1217195987701416, "learning_rate": 1.408859800951969e-05, "loss": 0.0062, "step": 382650 }, { "epoch": 11.827903814056995, "grad_norm": 0.0784456729888916, "learning_rate": 1.4088134388329112e-05, "loss": 0.0054, "step": 382680 }, { "epoch": 11.828831056438153, "grad_norm": 0.1108480840921402, "learning_rate": 1.408767076713853e-05, "loss": 0.0057, "step": 382710 }, { "epoch": 11.82975829881931, "grad_norm": 0.11373072862625122, "learning_rate": 1.4087207145947951e-05, "loss": 0.0059, "step": 382740 }, { "epoch": 11.83068554120047, "grad_norm": 0.10781766474246979, "learning_rate": 1.408675897879706e-05, "loss": 0.0055, "step": 382770 }, { "epoch": 11.831612783581628, "grad_norm": 0.11372823268175125, "learning_rate": 1.4086295357606479e-05, "loss": 0.0059, "step": 382800 }, { "epoch": 11.832540025962787, "grad_norm": 0.07992634922266006, "learning_rate": 1.40858317364159e-05, "loss": 0.006, "step": 382830 }, { "epoch": 11.833467268343945, "grad_norm": 0.09178446978330612, "learning_rate": 1.4085368115225322e-05, "loss": 0.0056, "step": 382860 }, { "epoch": 11.834394510725103, "grad_norm": 0.10912821441888809, "learning_rate": 1.4084904494034742e-05, "loss": 0.005, "step": 382890 }, { "epoch": 11.835321753106262, "grad_norm": 0.11597134172916412, "learning_rate": 1.4084440872844163e-05, "loss": 0.0059, "step": 382920 }, { "epoch": 11.83624899548742, "grad_norm": 0.14371968805789948, "learning_rate": 1.4083977251653585e-05, "loss": 0.0061, "step": 382950 }, { "epoch": 11.83717623786858, "grad_norm": 0.17268572747707367, "learning_rate": 1.4083513630463003e-05, "loss": 0.0049, "step": 382980 }, { "epoch": 11.838103480249737, "grad_norm": 0.12374965101480484, "learning_rate": 1.4083050009272425e-05, "loss": 0.0057, "step": 383010 }, { "epoch": 11.839030722630895, "grad_norm": 0.10812786221504211, "learning_rate": 1.4082586388081844e-05, "loss": 0.0054, "step": 383040 }, { "epoch": 11.839957965012054, "grad_norm": 0.08467500656843185, "learning_rate": 1.4082122766891266e-05, "loss": 0.0061, "step": 383070 }, { "epoch": 11.840885207393212, "grad_norm": 0.0994310975074768, "learning_rate": 1.4081659145700687e-05, "loss": 0.0064, "step": 383100 }, { "epoch": 11.841812449774372, "grad_norm": 0.09185684472322464, "learning_rate": 1.4081195524510107e-05, "loss": 0.0062, "step": 383130 }, { "epoch": 11.84273969215553, "grad_norm": 0.09045284241437912, "learning_rate": 1.4080731903319529e-05, "loss": 0.0056, "step": 383160 }, { "epoch": 11.843666934536689, "grad_norm": 0.13181565701961517, "learning_rate": 1.408026828212895e-05, "loss": 0.0057, "step": 383190 }, { "epoch": 11.844594176917846, "grad_norm": 0.09970900416374207, "learning_rate": 1.407980466093837e-05, "loss": 0.0053, "step": 383220 }, { "epoch": 11.845521419299004, "grad_norm": 0.1311030238866806, "learning_rate": 1.4079341039747792e-05, "loss": 0.0058, "step": 383250 }, { "epoch": 11.846448661680164, "grad_norm": 0.08991049975156784, "learning_rate": 1.4078877418557213e-05, "loss": 0.0051, "step": 383280 }, { "epoch": 11.847375904061321, "grad_norm": 0.16970054805278778, "learning_rate": 1.4078413797366633e-05, "loss": 0.0064, "step": 383310 }, { "epoch": 11.84830314644248, "grad_norm": 0.1329086571931839, "learning_rate": 1.4077950176176054e-05, "loss": 0.0055, "step": 383340 }, { "epoch": 11.849230388823639, "grad_norm": 0.13119401037693024, "learning_rate": 1.4077486554985474e-05, "loss": 0.005, "step": 383370 }, { "epoch": 11.850157631204796, "grad_norm": 0.1155015379190445, "learning_rate": 1.4077022933794896e-05, "loss": 0.0056, "step": 383400 }, { "epoch": 11.851084873585956, "grad_norm": 0.1407642960548401, "learning_rate": 1.4076559312604317e-05, "loss": 0.0054, "step": 383430 }, { "epoch": 11.852012115967113, "grad_norm": 0.10147867351770401, "learning_rate": 1.4076095691413735e-05, "loss": 0.0056, "step": 383460 }, { "epoch": 11.852939358348273, "grad_norm": 0.16859175264835358, "learning_rate": 1.4075632070223157e-05, "loss": 0.0052, "step": 383490 }, { "epoch": 11.85386660072943, "grad_norm": 0.10075017064809799, "learning_rate": 1.4075168449032578e-05, "loss": 0.0058, "step": 383520 }, { "epoch": 11.854793843110588, "grad_norm": 0.13514022529125214, "learning_rate": 1.4074704827841998e-05, "loss": 0.0054, "step": 383550 }, { "epoch": 11.855721085491748, "grad_norm": 0.09114893525838852, "learning_rate": 1.407424120665142e-05, "loss": 0.0054, "step": 383580 }, { "epoch": 11.856648327872906, "grad_norm": 0.16266348958015442, "learning_rate": 1.4073777585460841e-05, "loss": 0.0064, "step": 383610 }, { "epoch": 11.857575570254065, "grad_norm": 0.10271817445755005, "learning_rate": 1.4073313964270261e-05, "loss": 0.0056, "step": 383640 }, { "epoch": 11.858502812635223, "grad_norm": 0.05994938313961029, "learning_rate": 1.4072850343079682e-05, "loss": 0.0058, "step": 383670 }, { "epoch": 11.85943005501638, "grad_norm": 0.06861121952533722, "learning_rate": 1.4072386721889102e-05, "loss": 0.0055, "step": 383700 }, { "epoch": 11.86035729739754, "grad_norm": 0.12368854880332947, "learning_rate": 1.4071923100698524e-05, "loss": 0.0057, "step": 383730 }, { "epoch": 11.861284539778698, "grad_norm": 0.10710018128156662, "learning_rate": 1.4071459479507945e-05, "loss": 0.0059, "step": 383760 }, { "epoch": 11.862211782159857, "grad_norm": 0.12485066056251526, "learning_rate": 1.4070995858317365e-05, "loss": 0.0059, "step": 383790 }, { "epoch": 11.863139024541015, "grad_norm": 0.12411360442638397, "learning_rate": 1.4070532237126787e-05, "loss": 0.0052, "step": 383820 }, { "epoch": 11.864066266922173, "grad_norm": 0.10547789931297302, "learning_rate": 1.4070068615936208e-05, "loss": 0.0055, "step": 383850 }, { "epoch": 11.864993509303332, "grad_norm": 0.12068907916545868, "learning_rate": 1.4069604994745626e-05, "loss": 0.0054, "step": 383880 }, { "epoch": 11.86592075168449, "grad_norm": 0.08616146445274353, "learning_rate": 1.406914137355505e-05, "loss": 0.0053, "step": 383910 }, { "epoch": 11.86684799406565, "grad_norm": 0.13536518812179565, "learning_rate": 1.4068677752364471e-05, "loss": 0.0057, "step": 383940 }, { "epoch": 11.867775236446807, "grad_norm": 0.12087101489305496, "learning_rate": 1.4068214131173889e-05, "loss": 0.0059, "step": 383970 }, { "epoch": 11.868702478827966, "grad_norm": 0.11682261526584625, "learning_rate": 1.406775050998331e-05, "loss": 0.0053, "step": 384000 }, { "epoch": 11.869629721209124, "grad_norm": 0.13918456435203552, "learning_rate": 1.406728688879273e-05, "loss": 0.0058, "step": 384030 }, { "epoch": 11.870556963590282, "grad_norm": 0.09665181487798691, "learning_rate": 1.4066823267602152e-05, "loss": 0.0057, "step": 384060 }, { "epoch": 11.871484205971441, "grad_norm": 0.08149239420890808, "learning_rate": 1.4066359646411573e-05, "loss": 0.006, "step": 384090 }, { "epoch": 11.872411448352599, "grad_norm": 0.10934385657310486, "learning_rate": 1.4065896025220993e-05, "loss": 0.0052, "step": 384120 }, { "epoch": 11.873338690733759, "grad_norm": 0.0963323563337326, "learning_rate": 1.4065432404030415e-05, "loss": 0.0063, "step": 384150 }, { "epoch": 11.874265933114916, "grad_norm": 0.1421104222536087, "learning_rate": 1.4064968782839836e-05, "loss": 0.0057, "step": 384180 }, { "epoch": 11.875193175496074, "grad_norm": 0.14890074729919434, "learning_rate": 1.4064505161649256e-05, "loss": 0.0056, "step": 384210 }, { "epoch": 11.876120417877233, "grad_norm": 0.1424289047718048, "learning_rate": 1.4064041540458678e-05, "loss": 0.0056, "step": 384240 }, { "epoch": 11.877047660258391, "grad_norm": 0.12624087929725647, "learning_rate": 1.4063577919268099e-05, "loss": 0.0053, "step": 384270 }, { "epoch": 11.87797490263955, "grad_norm": 0.08593505620956421, "learning_rate": 1.4063114298077519e-05, "loss": 0.0054, "step": 384300 }, { "epoch": 11.878902145020708, "grad_norm": 0.06845678389072418, "learning_rate": 1.406265067688694e-05, "loss": 0.0061, "step": 384330 }, { "epoch": 11.879829387401866, "grad_norm": 0.1045323982834816, "learning_rate": 1.4062187055696358e-05, "loss": 0.0056, "step": 384360 }, { "epoch": 11.880756629783026, "grad_norm": 0.09399528801441193, "learning_rate": 1.406172343450578e-05, "loss": 0.0051, "step": 384390 }, { "epoch": 11.881683872164183, "grad_norm": 0.10641579329967499, "learning_rate": 1.4061259813315201e-05, "loss": 0.006, "step": 384420 }, { "epoch": 11.882611114545343, "grad_norm": 0.1219811961054802, "learning_rate": 1.4060796192124621e-05, "loss": 0.0056, "step": 384450 }, { "epoch": 11.8835383569265, "grad_norm": 0.09926873445510864, "learning_rate": 1.4060332570934043e-05, "loss": 0.0053, "step": 384480 }, { "epoch": 11.884465599307658, "grad_norm": 0.1328705996274948, "learning_rate": 1.405988440378315e-05, "loss": 0.0059, "step": 384510 }, { "epoch": 11.885392841688818, "grad_norm": 0.102794349193573, "learning_rate": 1.405942078259257e-05, "loss": 0.0056, "step": 384540 }, { "epoch": 11.886320084069975, "grad_norm": 0.08835051953792572, "learning_rate": 1.4058957161401992e-05, "loss": 0.0061, "step": 384570 }, { "epoch": 11.887247326451135, "grad_norm": 0.08661951124668121, "learning_rate": 1.4058493540211414e-05, "loss": 0.006, "step": 384600 }, { "epoch": 11.888174568832293, "grad_norm": 0.10470811277627945, "learning_rate": 1.4058029919020832e-05, "loss": 0.0062, "step": 384630 }, { "epoch": 11.88910181121345, "grad_norm": 0.04698314890265465, "learning_rate": 1.4057566297830255e-05, "loss": 0.006, "step": 384660 }, { "epoch": 11.89002905359461, "grad_norm": 0.13706883788108826, "learning_rate": 1.4057102676639676e-05, "loss": 0.0053, "step": 384690 }, { "epoch": 11.890956295975768, "grad_norm": 0.09363137185573578, "learning_rate": 1.4056639055449094e-05, "loss": 0.0056, "step": 384720 }, { "epoch": 11.891883538356927, "grad_norm": 0.14111539721488953, "learning_rate": 1.4056175434258516e-05, "loss": 0.0061, "step": 384750 }, { "epoch": 11.892810780738085, "grad_norm": 0.08250505477190018, "learning_rate": 1.4055711813067937e-05, "loss": 0.0054, "step": 384780 }, { "epoch": 11.893738023119244, "grad_norm": 0.06767996400594711, "learning_rate": 1.4055248191877357e-05, "loss": 0.0053, "step": 384810 }, { "epoch": 11.894665265500402, "grad_norm": 0.09123557060956955, "learning_rate": 1.4054784570686779e-05, "loss": 0.0057, "step": 384840 }, { "epoch": 11.89559250788156, "grad_norm": 0.12684917449951172, "learning_rate": 1.4054320949496199e-05, "loss": 0.006, "step": 384870 }, { "epoch": 11.89651975026272, "grad_norm": 0.12551496922969818, "learning_rate": 1.405385732830562e-05, "loss": 0.0054, "step": 384900 }, { "epoch": 11.897446992643877, "grad_norm": 0.12655425071716309, "learning_rate": 1.4053393707115042e-05, "loss": 0.0059, "step": 384930 }, { "epoch": 11.898374235025036, "grad_norm": 0.10179232060909271, "learning_rate": 1.4052930085924461e-05, "loss": 0.0052, "step": 384960 }, { "epoch": 11.899301477406194, "grad_norm": 0.14230650663375854, "learning_rate": 1.4052466464733883e-05, "loss": 0.006, "step": 384990 }, { "epoch": 11.900228719787352, "grad_norm": 0.15297982096672058, "learning_rate": 1.4052002843543304e-05, "loss": 0.0059, "step": 385020 }, { "epoch": 11.901155962168511, "grad_norm": 0.08477814495563507, "learning_rate": 1.4051539222352724e-05, "loss": 0.0054, "step": 385050 }, { "epoch": 11.902083204549669, "grad_norm": 0.10682358592748642, "learning_rate": 1.4051075601162146e-05, "loss": 0.0057, "step": 385080 }, { "epoch": 11.903010446930828, "grad_norm": 0.1162799596786499, "learning_rate": 1.4050611979971567e-05, "loss": 0.0054, "step": 385110 }, { "epoch": 11.903937689311986, "grad_norm": 0.1129208654165268, "learning_rate": 1.4050148358780985e-05, "loss": 0.0055, "step": 385140 }, { "epoch": 11.904864931693144, "grad_norm": 0.12315484136343002, "learning_rate": 1.4049684737590409e-05, "loss": 0.0056, "step": 385170 }, { "epoch": 11.905792174074303, "grad_norm": 0.16559970378875732, "learning_rate": 1.4049221116399827e-05, "loss": 0.0053, "step": 385200 }, { "epoch": 11.906719416455461, "grad_norm": 0.09269284456968307, "learning_rate": 1.4048757495209248e-05, "loss": 0.0058, "step": 385230 }, { "epoch": 11.90764665883662, "grad_norm": 0.11579227447509766, "learning_rate": 1.404829387401867e-05, "loss": 0.0058, "step": 385260 }, { "epoch": 11.908573901217778, "grad_norm": 0.10335305333137512, "learning_rate": 1.4047845706867776e-05, "loss": 0.0058, "step": 385290 }, { "epoch": 11.909501143598936, "grad_norm": 0.07502667605876923, "learning_rate": 1.4047382085677197e-05, "loss": 0.0057, "step": 385320 }, { "epoch": 11.910428385980095, "grad_norm": 0.1629319041967392, "learning_rate": 1.4046918464486619e-05, "loss": 0.0062, "step": 385350 }, { "epoch": 11.911355628361253, "grad_norm": 0.06838128715753555, "learning_rate": 1.4046454843296039e-05, "loss": 0.0055, "step": 385380 }, { "epoch": 11.912282870742413, "grad_norm": 0.09516779333353043, "learning_rate": 1.404599122210546e-05, "loss": 0.0054, "step": 385410 }, { "epoch": 11.91321011312357, "grad_norm": 0.06655703485012054, "learning_rate": 1.4045527600914882e-05, "loss": 0.0056, "step": 385440 }, { "epoch": 11.914137355504728, "grad_norm": 0.1341075301170349, "learning_rate": 1.40450639797243e-05, "loss": 0.0055, "step": 385470 }, { "epoch": 11.915064597885888, "grad_norm": 0.11093645542860031, "learning_rate": 1.4044600358533721e-05, "loss": 0.0057, "step": 385500 }, { "epoch": 11.915991840267045, "grad_norm": 0.14807407557964325, "learning_rate": 1.4044136737343143e-05, "loss": 0.0057, "step": 385530 }, { "epoch": 11.916919082648205, "grad_norm": 0.12129111588001251, "learning_rate": 1.4043673116152563e-05, "loss": 0.0056, "step": 385560 }, { "epoch": 11.917846325029362, "grad_norm": 0.13661934435367584, "learning_rate": 1.4043209494961984e-05, "loss": 0.0051, "step": 385590 }, { "epoch": 11.918773567410522, "grad_norm": 0.14040979743003845, "learning_rate": 1.4042745873771404e-05, "loss": 0.006, "step": 385620 }, { "epoch": 11.91970080979168, "grad_norm": 0.12333056330680847, "learning_rate": 1.4042282252580826e-05, "loss": 0.0056, "step": 385650 }, { "epoch": 11.920628052172837, "grad_norm": 0.07848843187093735, "learning_rate": 1.4041818631390247e-05, "loss": 0.0061, "step": 385680 }, { "epoch": 11.921555294553997, "grad_norm": 0.0832505151629448, "learning_rate": 1.4041355010199667e-05, "loss": 0.006, "step": 385710 }, { "epoch": 11.922482536935155, "grad_norm": 0.06479743123054504, "learning_rate": 1.4040891389009088e-05, "loss": 0.0054, "step": 385740 }, { "epoch": 11.923409779316314, "grad_norm": 0.07392419874668121, "learning_rate": 1.404042776781851e-05, "loss": 0.0058, "step": 385770 }, { "epoch": 11.924337021697472, "grad_norm": 0.09264275431632996, "learning_rate": 1.403996414662793e-05, "loss": 0.0052, "step": 385800 }, { "epoch": 11.92526426407863, "grad_norm": 0.16360218822956085, "learning_rate": 1.4039500525437351e-05, "loss": 0.0062, "step": 385830 }, { "epoch": 11.926191506459789, "grad_norm": 0.09570816904306412, "learning_rate": 1.4039036904246773e-05, "loss": 0.0054, "step": 385860 }, { "epoch": 11.927118748840947, "grad_norm": 0.06911145895719528, "learning_rate": 1.403857328305619e-05, "loss": 0.0054, "step": 385890 }, { "epoch": 11.928045991222106, "grad_norm": 0.1613636016845703, "learning_rate": 1.4038109661865614e-05, "loss": 0.0059, "step": 385920 }, { "epoch": 11.928973233603264, "grad_norm": 0.07283303886651993, "learning_rate": 1.4037646040675032e-05, "loss": 0.0054, "step": 385950 }, { "epoch": 11.929900475984422, "grad_norm": 0.07048195600509644, "learning_rate": 1.4037182419484454e-05, "loss": 0.0055, "step": 385980 }, { "epoch": 11.930827718365581, "grad_norm": 0.06756680458784103, "learning_rate": 1.4036718798293875e-05, "loss": 0.0055, "step": 386010 }, { "epoch": 11.931754960746739, "grad_norm": 0.1292847990989685, "learning_rate": 1.4036255177103295e-05, "loss": 0.0058, "step": 386040 }, { "epoch": 11.932682203127898, "grad_norm": 0.1283058226108551, "learning_rate": 1.4035791555912716e-05, "loss": 0.0051, "step": 386070 }, { "epoch": 11.933609445509056, "grad_norm": 0.12327195703983307, "learning_rate": 1.4035327934722138e-05, "loss": 0.0065, "step": 386100 }, { "epoch": 11.934536687890214, "grad_norm": 0.0788591057062149, "learning_rate": 1.4034864313531558e-05, "loss": 0.0053, "step": 386130 }, { "epoch": 11.935463930271373, "grad_norm": 0.06354059278964996, "learning_rate": 1.403440069234098e-05, "loss": 0.006, "step": 386160 }, { "epoch": 11.936391172652531, "grad_norm": 0.10172752290964127, "learning_rate": 1.40339370711504e-05, "loss": 0.006, "step": 386190 }, { "epoch": 11.93731841503369, "grad_norm": 0.07441186159849167, "learning_rate": 1.403347344995982e-05, "loss": 0.0051, "step": 386220 }, { "epoch": 11.938245657414848, "grad_norm": 0.12414521723985672, "learning_rate": 1.4033009828769242e-05, "loss": 0.0052, "step": 386250 }, { "epoch": 11.939172899796006, "grad_norm": 0.12555009126663208, "learning_rate": 1.4032546207578662e-05, "loss": 0.0053, "step": 386280 }, { "epoch": 11.940100142177165, "grad_norm": 0.0883185863494873, "learning_rate": 1.4032082586388083e-05, "loss": 0.0066, "step": 386310 }, { "epoch": 11.941027384558323, "grad_norm": 0.08155027031898499, "learning_rate": 1.4031618965197505e-05, "loss": 0.0056, "step": 386340 }, { "epoch": 11.941954626939483, "grad_norm": 0.06855302304029465, "learning_rate": 1.4031155344006923e-05, "loss": 0.0051, "step": 386370 }, { "epoch": 11.94288186932064, "grad_norm": 0.13507075607776642, "learning_rate": 1.4030691722816345e-05, "loss": 0.0053, "step": 386400 }, { "epoch": 11.9438091117018, "grad_norm": 0.11005150526762009, "learning_rate": 1.4030228101625766e-05, "loss": 0.0055, "step": 386430 }, { "epoch": 11.944736354082957, "grad_norm": 0.13907112181186676, "learning_rate": 1.4029764480435186e-05, "loss": 0.0055, "step": 386460 }, { "epoch": 11.945663596464115, "grad_norm": 0.1410118192434311, "learning_rate": 1.4029300859244607e-05, "loss": 0.0056, "step": 386490 }, { "epoch": 11.946590838845275, "grad_norm": 0.09472136199474335, "learning_rate": 1.4028837238054029e-05, "loss": 0.0063, "step": 386520 }, { "epoch": 11.947518081226432, "grad_norm": 0.15037283301353455, "learning_rate": 1.4028373616863449e-05, "loss": 0.0056, "step": 386550 }, { "epoch": 11.948445323607592, "grad_norm": 0.10552085191011429, "learning_rate": 1.402790999567287e-05, "loss": 0.0059, "step": 386580 }, { "epoch": 11.94937256598875, "grad_norm": 0.11669134348630905, "learning_rate": 1.402744637448229e-05, "loss": 0.0059, "step": 386610 }, { "epoch": 11.950299808369907, "grad_norm": 0.11670329421758652, "learning_rate": 1.4026982753291712e-05, "loss": 0.0056, "step": 386640 }, { "epoch": 11.951227050751067, "grad_norm": 0.24259881675243378, "learning_rate": 1.4026519132101133e-05, "loss": 0.0049, "step": 386670 }, { "epoch": 11.952154293132224, "grad_norm": 0.12424164265394211, "learning_rate": 1.4026055510910553e-05, "loss": 0.0055, "step": 386700 }, { "epoch": 11.953081535513384, "grad_norm": 0.12057314813137054, "learning_rate": 1.4025591889719974e-05, "loss": 0.0062, "step": 386730 }, { "epoch": 11.954008777894542, "grad_norm": 0.08123764395713806, "learning_rate": 1.4025128268529396e-05, "loss": 0.0053, "step": 386760 }, { "epoch": 11.9549360202757, "grad_norm": 0.11221112310886383, "learning_rate": 1.4024664647338816e-05, "loss": 0.0058, "step": 386790 }, { "epoch": 11.955863262656859, "grad_norm": 0.0967506542801857, "learning_rate": 1.4024201026148237e-05, "loss": 0.0057, "step": 386820 }, { "epoch": 11.956790505038017, "grad_norm": 0.07365158945322037, "learning_rate": 1.4023737404957659e-05, "loss": 0.0059, "step": 386850 }, { "epoch": 11.957717747419176, "grad_norm": 0.12824705243110657, "learning_rate": 1.4023273783767077e-05, "loss": 0.0051, "step": 386880 }, { "epoch": 11.958644989800334, "grad_norm": 0.06361404061317444, "learning_rate": 1.4022810162576498e-05, "loss": 0.0052, "step": 386910 }, { "epoch": 11.959572232181491, "grad_norm": 0.15325769782066345, "learning_rate": 1.4022346541385918e-05, "loss": 0.0058, "step": 386940 }, { "epoch": 11.960499474562651, "grad_norm": 0.0794011726975441, "learning_rate": 1.402188292019534e-05, "loss": 0.0054, "step": 386970 }, { "epoch": 11.961426716943809, "grad_norm": 0.11026086658239365, "learning_rate": 1.4021419299004761e-05, "loss": 0.0058, "step": 387000 }, { "epoch": 11.962353959324968, "grad_norm": 0.10595061630010605, "learning_rate": 1.4020955677814181e-05, "loss": 0.0058, "step": 387030 }, { "epoch": 11.963281201706126, "grad_norm": 0.15428423881530762, "learning_rate": 1.4020492056623602e-05, "loss": 0.0048, "step": 387060 }, { "epoch": 11.964208444087284, "grad_norm": 0.08029546588659286, "learning_rate": 1.4020028435433024e-05, "loss": 0.0055, "step": 387090 }, { "epoch": 11.965135686468443, "grad_norm": 0.11123492568731308, "learning_rate": 1.4019564814242444e-05, "loss": 0.0059, "step": 387120 }, { "epoch": 11.9660629288496, "grad_norm": 0.10604915767908096, "learning_rate": 1.4019101193051865e-05, "loss": 0.0053, "step": 387150 }, { "epoch": 11.96699017123076, "grad_norm": 0.1029781773686409, "learning_rate": 1.4018637571861287e-05, "loss": 0.0057, "step": 387180 }, { "epoch": 11.967917413611918, "grad_norm": 0.14782820641994476, "learning_rate": 1.4018173950670707e-05, "loss": 0.0056, "step": 387210 }, { "epoch": 11.968844655993077, "grad_norm": 0.05424267053604126, "learning_rate": 1.4017710329480128e-05, "loss": 0.006, "step": 387240 }, { "epoch": 11.969771898374235, "grad_norm": 0.08261571079492569, "learning_rate": 1.401724670828955e-05, "loss": 0.0054, "step": 387270 }, { "epoch": 11.970699140755393, "grad_norm": 0.1461348682641983, "learning_rate": 1.4016783087098968e-05, "loss": 0.0055, "step": 387300 }, { "epoch": 11.971626383136552, "grad_norm": 0.12601928412914276, "learning_rate": 1.4016319465908391e-05, "loss": 0.0046, "step": 387330 }, { "epoch": 11.97255362551771, "grad_norm": 0.06223283335566521, "learning_rate": 1.4015855844717809e-05, "loss": 0.0062, "step": 387360 }, { "epoch": 11.97348086789887, "grad_norm": 0.08737687021493912, "learning_rate": 1.401539222352723e-05, "loss": 0.0055, "step": 387390 }, { "epoch": 11.974408110280027, "grad_norm": 0.09502197057008743, "learning_rate": 1.4014928602336652e-05, "loss": 0.006, "step": 387420 }, { "epoch": 11.975335352661185, "grad_norm": 0.071302130818367, "learning_rate": 1.4014464981146072e-05, "loss": 0.0057, "step": 387450 }, { "epoch": 11.976262595042344, "grad_norm": 0.08863605558872223, "learning_rate": 1.4014001359955493e-05, "loss": 0.006, "step": 387480 }, { "epoch": 11.977189837423502, "grad_norm": 0.07857650518417358, "learning_rate": 1.4013537738764915e-05, "loss": 0.005, "step": 387510 }, { "epoch": 11.978117079804662, "grad_norm": 0.08149658888578415, "learning_rate": 1.4013074117574335e-05, "loss": 0.0053, "step": 387540 }, { "epoch": 11.97904432218582, "grad_norm": 0.06418659538030624, "learning_rate": 1.4012610496383756e-05, "loss": 0.0057, "step": 387570 }, { "epoch": 11.979971564566977, "grad_norm": 0.10385874658823013, "learning_rate": 1.4012146875193178e-05, "loss": 0.0053, "step": 387600 }, { "epoch": 11.980898806948137, "grad_norm": 0.07684887945652008, "learning_rate": 1.4011683254002597e-05, "loss": 0.006, "step": 387630 }, { "epoch": 11.981826049329294, "grad_norm": 0.10332359373569489, "learning_rate": 1.4011219632812019e-05, "loss": 0.0059, "step": 387660 }, { "epoch": 11.982753291710454, "grad_norm": 0.058143250644207, "learning_rate": 1.4010756011621439e-05, "loss": 0.0063, "step": 387690 }, { "epoch": 11.983680534091611, "grad_norm": 0.08681383728981018, "learning_rate": 1.401029239043086e-05, "loss": 0.0053, "step": 387720 }, { "epoch": 11.98460777647277, "grad_norm": 0.08625872433185577, "learning_rate": 1.4009828769240282e-05, "loss": 0.0055, "step": 387750 }, { "epoch": 11.985535018853929, "grad_norm": 0.09011020511388779, "learning_rate": 1.40093651480497e-05, "loss": 0.0056, "step": 387780 }, { "epoch": 11.986462261235086, "grad_norm": 0.11285215616226196, "learning_rate": 1.4008901526859121e-05, "loss": 0.0061, "step": 387810 }, { "epoch": 11.987389503616246, "grad_norm": 0.1020590290427208, "learning_rate": 1.4008437905668543e-05, "loss": 0.0055, "step": 387840 }, { "epoch": 11.988316745997404, "grad_norm": 0.12546968460083008, "learning_rate": 1.4007974284477963e-05, "loss": 0.006, "step": 387870 }, { "epoch": 11.989243988378561, "grad_norm": 0.09540167450904846, "learning_rate": 1.4007510663287384e-05, "loss": 0.0055, "step": 387900 }, { "epoch": 11.99017123075972, "grad_norm": 0.13336913287639618, "learning_rate": 1.4007047042096806e-05, "loss": 0.0056, "step": 387930 }, { "epoch": 11.991098473140879, "grad_norm": 0.12038327753543854, "learning_rate": 1.4006583420906226e-05, "loss": 0.0055, "step": 387960 }, { "epoch": 11.992025715522038, "grad_norm": 0.08384158462285995, "learning_rate": 1.4006119799715647e-05, "loss": 0.0055, "step": 387990 }, { "epoch": 11.992952957903196, "grad_norm": 0.10875896364450455, "learning_rate": 1.4005656178525067e-05, "loss": 0.0055, "step": 388020 }, { "epoch": 11.993880200284355, "grad_norm": 0.13662919402122498, "learning_rate": 1.4005192557334488e-05, "loss": 0.0054, "step": 388050 }, { "epoch": 11.994807442665513, "grad_norm": 0.12718752026557922, "learning_rate": 1.400472893614391e-05, "loss": 0.0051, "step": 388080 }, { "epoch": 11.99573468504667, "grad_norm": 0.15657491981983185, "learning_rate": 1.400426531495333e-05, "loss": 0.0054, "step": 388110 }, { "epoch": 11.99666192742783, "grad_norm": 0.07371685653924942, "learning_rate": 1.4003801693762751e-05, "loss": 0.0058, "step": 388140 }, { "epoch": 11.997589169808988, "grad_norm": 0.06941268593072891, "learning_rate": 1.4003338072572173e-05, "loss": 0.0055, "step": 388170 }, { "epoch": 11.998516412190147, "grad_norm": 0.176579087972641, "learning_rate": 1.4002874451381593e-05, "loss": 0.0051, "step": 388200 }, { "epoch": 11.999443654571305, "grad_norm": 0.11106973141431808, "learning_rate": 1.4002410830191014e-05, "loss": 0.0055, "step": 388230 }, { "epoch": 12.000370896952463, "grad_norm": 0.07754279673099518, "learning_rate": 1.4001947209000436e-05, "loss": 0.0052, "step": 388260 }, { "epoch": 12.001298139333622, "grad_norm": 0.09399484097957611, "learning_rate": 1.4001483587809854e-05, "loss": 0.0059, "step": 388290 }, { "epoch": 12.00222538171478, "grad_norm": 0.11498072743415833, "learning_rate": 1.4001019966619275e-05, "loss": 0.0063, "step": 388320 }, { "epoch": 12.00315262409594, "grad_norm": 0.06470617651939392, "learning_rate": 1.4000556345428695e-05, "loss": 0.005, "step": 388350 }, { "epoch": 12.004079866477097, "grad_norm": 0.10822237282991409, "learning_rate": 1.4000092724238117e-05, "loss": 0.0051, "step": 388380 }, { "epoch": 12.005007108858255, "grad_norm": 0.10890718549489975, "learning_rate": 1.3999629103047538e-05, "loss": 0.0052, "step": 388410 }, { "epoch": 12.005934351239414, "grad_norm": 0.11917955428361893, "learning_rate": 1.3999165481856958e-05, "loss": 0.0054, "step": 388440 }, { "epoch": 12.006861593620572, "grad_norm": 0.09762046486139297, "learning_rate": 1.399870186066638e-05, "loss": 0.0053, "step": 388470 }, { "epoch": 12.007788836001732, "grad_norm": 0.1432943344116211, "learning_rate": 1.39982382394758e-05, "loss": 0.0051, "step": 388500 }, { "epoch": 12.00871607838289, "grad_norm": 0.13448140025138855, "learning_rate": 1.399777461828522e-05, "loss": 0.0055, "step": 388530 }, { "epoch": 12.009643320764047, "grad_norm": 0.0813194140791893, "learning_rate": 1.3997310997094642e-05, "loss": 0.0056, "step": 388560 }, { "epoch": 12.010570563145206, "grad_norm": 0.06651133298873901, "learning_rate": 1.3996847375904064e-05, "loss": 0.006, "step": 388590 }, { "epoch": 12.011497805526364, "grad_norm": 0.08280938863754272, "learning_rate": 1.3996383754713483e-05, "loss": 0.005, "step": 388620 }, { "epoch": 12.012425047907524, "grad_norm": 0.1143963560461998, "learning_rate": 1.3995920133522905e-05, "loss": 0.0063, "step": 388650 }, { "epoch": 12.013352290288681, "grad_norm": 0.12041399627923965, "learning_rate": 1.3995456512332323e-05, "loss": 0.0058, "step": 388680 }, { "epoch": 12.014279532669839, "grad_norm": 0.14965684711933136, "learning_rate": 1.3994992891141745e-05, "loss": 0.0056, "step": 388710 }, { "epoch": 12.015206775050999, "grad_norm": 0.11314956098794937, "learning_rate": 1.3994529269951168e-05, "loss": 0.006, "step": 388740 }, { "epoch": 12.016134017432156, "grad_norm": 0.10841868072748184, "learning_rate": 1.3994065648760586e-05, "loss": 0.0055, "step": 388770 }, { "epoch": 12.017061259813316, "grad_norm": 0.13598130643367767, "learning_rate": 1.3993602027570007e-05, "loss": 0.0061, "step": 388800 }, { "epoch": 12.017988502194473, "grad_norm": 0.12451504170894623, "learning_rate": 1.3993138406379429e-05, "loss": 0.0055, "step": 388830 }, { "epoch": 12.018915744575633, "grad_norm": 0.08257102966308594, "learning_rate": 1.3992674785188849e-05, "loss": 0.0051, "step": 388860 }, { "epoch": 12.01984298695679, "grad_norm": 0.09606457501649857, "learning_rate": 1.399221116399827e-05, "loss": 0.0059, "step": 388890 }, { "epoch": 12.020770229337948, "grad_norm": 0.1131548136472702, "learning_rate": 1.3991747542807692e-05, "loss": 0.0059, "step": 388920 }, { "epoch": 12.021697471719108, "grad_norm": 0.09556187689304352, "learning_rate": 1.3991283921617112e-05, "loss": 0.0052, "step": 388950 }, { "epoch": 12.022624714100266, "grad_norm": 0.1470492035150528, "learning_rate": 1.3990820300426533e-05, "loss": 0.0054, "step": 388980 }, { "epoch": 12.023551956481425, "grad_norm": 0.08511443436145782, "learning_rate": 1.3990356679235953e-05, "loss": 0.0065, "step": 389010 }, { "epoch": 12.024479198862583, "grad_norm": 0.0802522748708725, "learning_rate": 1.3989893058045374e-05, "loss": 0.0052, "step": 389040 }, { "epoch": 12.02540644124374, "grad_norm": 0.0832216739654541, "learning_rate": 1.3989429436854796e-05, "loss": 0.0059, "step": 389070 }, { "epoch": 12.0263336836249, "grad_norm": 0.09564551711082458, "learning_rate": 1.3988965815664216e-05, "loss": 0.0061, "step": 389100 }, { "epoch": 12.027260926006058, "grad_norm": 0.12098145484924316, "learning_rate": 1.3988502194473637e-05, "loss": 0.0056, "step": 389130 }, { "epoch": 12.028188168387217, "grad_norm": 0.04831937700510025, "learning_rate": 1.3988038573283059e-05, "loss": 0.0056, "step": 389160 }, { "epoch": 12.029115410768375, "grad_norm": 0.15064822137355804, "learning_rate": 1.3987574952092477e-05, "loss": 0.0054, "step": 389190 }, { "epoch": 12.030042653149533, "grad_norm": 0.18366673588752747, "learning_rate": 1.3987111330901898e-05, "loss": 0.0053, "step": 389220 }, { "epoch": 12.030969895530692, "grad_norm": 0.1068800836801529, "learning_rate": 1.398664770971132e-05, "loss": 0.0054, "step": 389250 }, { "epoch": 12.03189713791185, "grad_norm": 0.07339025288820267, "learning_rate": 1.398618408852074e-05, "loss": 0.0053, "step": 389280 }, { "epoch": 12.03282438029301, "grad_norm": 0.12302940338850021, "learning_rate": 1.3985720467330161e-05, "loss": 0.0057, "step": 389310 }, { "epoch": 12.033751622674167, "grad_norm": 0.10186734050512314, "learning_rate": 1.3985256846139581e-05, "loss": 0.006, "step": 389340 }, { "epoch": 12.034678865055325, "grad_norm": 0.11657832562923431, "learning_rate": 1.3984793224949002e-05, "loss": 0.0051, "step": 389370 }, { "epoch": 12.035606107436484, "grad_norm": 0.143922358751297, "learning_rate": 1.3984329603758424e-05, "loss": 0.0058, "step": 389400 }, { "epoch": 12.036533349817642, "grad_norm": 0.14541645348072052, "learning_rate": 1.3983865982567844e-05, "loss": 0.0054, "step": 389430 }, { "epoch": 12.037460592198801, "grad_norm": 0.1126520186662674, "learning_rate": 1.3983402361377265e-05, "loss": 0.0059, "step": 389460 }, { "epoch": 12.038387834579959, "grad_norm": 0.08552202582359314, "learning_rate": 1.3982938740186687e-05, "loss": 0.0053, "step": 389490 }, { "epoch": 12.039315076961117, "grad_norm": 0.11328699439764023, "learning_rate": 1.3982475118996107e-05, "loss": 0.0056, "step": 389520 }, { "epoch": 12.040242319342276, "grad_norm": 0.10778345167636871, "learning_rate": 1.3982011497805528e-05, "loss": 0.0054, "step": 389550 }, { "epoch": 12.041169561723434, "grad_norm": 0.09241658449172974, "learning_rate": 1.398154787661495e-05, "loss": 0.0053, "step": 389580 }, { "epoch": 12.042096804104593, "grad_norm": 0.15229475498199463, "learning_rate": 1.398108425542437e-05, "loss": 0.0055, "step": 389610 }, { "epoch": 12.043024046485751, "grad_norm": 0.07230708748102188, "learning_rate": 1.3980620634233791e-05, "loss": 0.0057, "step": 389640 }, { "epoch": 12.04395128886691, "grad_norm": 0.1519351750612259, "learning_rate": 1.3980157013043209e-05, "loss": 0.005, "step": 389670 }, { "epoch": 12.044878531248068, "grad_norm": 0.07611246407032013, "learning_rate": 1.397969339185263e-05, "loss": 0.0059, "step": 389700 }, { "epoch": 12.045805773629226, "grad_norm": 0.1694054901599884, "learning_rate": 1.3979229770662052e-05, "loss": 0.0048, "step": 389730 }, { "epoch": 12.046733016010386, "grad_norm": 0.12332409620285034, "learning_rate": 1.3978766149471472e-05, "loss": 0.0059, "step": 389760 }, { "epoch": 12.047660258391543, "grad_norm": 0.11007421463727951, "learning_rate": 1.3978302528280893e-05, "loss": 0.0058, "step": 389790 }, { "epoch": 12.048587500772703, "grad_norm": 0.12298732250928879, "learning_rate": 1.3977838907090315e-05, "loss": 0.0052, "step": 389820 }, { "epoch": 12.04951474315386, "grad_norm": 0.09527949243783951, "learning_rate": 1.3977375285899735e-05, "loss": 0.0056, "step": 389850 }, { "epoch": 12.050441985535018, "grad_norm": 0.09611884504556656, "learning_rate": 1.3976911664709156e-05, "loss": 0.0058, "step": 389880 }, { "epoch": 12.051369227916178, "grad_norm": 0.08589194715023041, "learning_rate": 1.3976448043518578e-05, "loss": 0.0051, "step": 389910 }, { "epoch": 12.052296470297335, "grad_norm": 0.09645391255617142, "learning_rate": 1.3975999876367682e-05, "loss": 0.0055, "step": 389940 }, { "epoch": 12.053223712678495, "grad_norm": 0.1544317603111267, "learning_rate": 1.3975536255177104e-05, "loss": 0.0053, "step": 389970 }, { "epoch": 12.054150955059653, "grad_norm": 0.12120561301708221, "learning_rate": 1.3975072633986527e-05, "loss": 0.0056, "step": 390000 } ], "logging_steps": 30, "max_steps": 1294160, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.521993873044192e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }