{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 18640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.3648068669527894e-05, "grad_norm": 0.39453125, "learning_rate": 1.78826895565093e-09, "loss": 2.4083, "step": 1 }, { "epoch": 0.00010729613733905579, "grad_norm": 0.326171875, "learning_rate": 3.57653791130186e-09, "loss": 2.498, "step": 2 }, { "epoch": 0.0001609442060085837, "grad_norm": 0.390625, "learning_rate": 5.36480686695279e-09, "loss": 2.5823, "step": 3 }, { "epoch": 0.00021459227467811158, "grad_norm": 1.1953125, "learning_rate": 7.15307582260372e-09, "loss": 2.5493, "step": 4 }, { "epoch": 0.0002682403433476395, "grad_norm": 0.419921875, "learning_rate": 8.94134477825465e-09, "loss": 2.5265, "step": 5 }, { "epoch": 0.0003218884120171674, "grad_norm": 0.443359375, "learning_rate": 1.072961373390558e-08, "loss": 2.8562, "step": 6 }, { "epoch": 0.0003755364806866953, "grad_norm": 0.408203125, "learning_rate": 1.251788268955651e-08, "loss": 2.6135, "step": 7 }, { "epoch": 0.00042918454935622315, "grad_norm": 0.35546875, "learning_rate": 1.430615164520744e-08, "loss": 2.3897, "step": 8 }, { "epoch": 0.00048283261802575106, "grad_norm": 0.3828125, "learning_rate": 1.609442060085837e-08, "loss": 2.4573, "step": 9 }, { "epoch": 0.000536480686695279, "grad_norm": 0.291015625, "learning_rate": 1.78826895565093e-08, "loss": 2.036, "step": 10 }, { "epoch": 0.0005901287553648068, "grad_norm": 0.98828125, "learning_rate": 1.967095851216023e-08, "loss": 2.801, "step": 11 }, { "epoch": 0.0006437768240343348, "grad_norm": 0.412109375, "learning_rate": 2.145922746781116e-08, "loss": 2.1978, "step": 12 }, { "epoch": 0.0006974248927038626, "grad_norm": 0.431640625, "learning_rate": 2.324749642346209e-08, "loss": 2.5903, "step": 13 }, { "epoch": 0.0007510729613733906, "grad_norm": 1.3671875, "learning_rate": 2.503576537911302e-08, "loss": 2.687, "step": 14 }, { "epoch": 0.0008047210300429185, "grad_norm": 0.333984375, "learning_rate": 2.682403433476395e-08, "loss": 2.4478, "step": 15 }, { "epoch": 0.0008583690987124463, "grad_norm": 0.302734375, "learning_rate": 2.861230329041488e-08, "loss": 2.2632, "step": 16 }, { "epoch": 0.0009120171673819743, "grad_norm": 0.4453125, "learning_rate": 3.040057224606581e-08, "loss": 1.7356, "step": 17 }, { "epoch": 0.0009656652360515021, "grad_norm": 0.306640625, "learning_rate": 3.218884120171674e-08, "loss": 2.4523, "step": 18 }, { "epoch": 0.00101931330472103, "grad_norm": 0.478515625, "learning_rate": 3.397711015736767e-08, "loss": 2.4249, "step": 19 }, { "epoch": 0.001072961373390558, "grad_norm": 0.390625, "learning_rate": 3.57653791130186e-08, "loss": 2.5147, "step": 20 }, { "epoch": 0.0011266094420600858, "grad_norm": 0.44140625, "learning_rate": 3.755364806866953e-08, "loss": 2.5997, "step": 21 }, { "epoch": 0.0011802575107296136, "grad_norm": 0.32421875, "learning_rate": 3.934191702432046e-08, "loss": 2.5245, "step": 22 }, { "epoch": 0.0012339055793991417, "grad_norm": 0.34375, "learning_rate": 4.1130185979971395e-08, "loss": 2.3243, "step": 23 }, { "epoch": 0.0012875536480686696, "grad_norm": 0.306640625, "learning_rate": 4.291845493562232e-08, "loss": 2.5863, "step": 24 }, { "epoch": 0.0013412017167381974, "grad_norm": 0.2373046875, "learning_rate": 4.470672389127325e-08, "loss": 2.1751, "step": 25 }, { "epoch": 0.0013948497854077253, "grad_norm": 0.41015625, "learning_rate": 4.649499284692418e-08, "loss": 2.2426, "step": 26 }, { "epoch": 0.0014484978540772531, "grad_norm": 0.3515625, "learning_rate": 4.8283261802575116e-08, "loss": 2.4159, "step": 27 }, { "epoch": 0.0015021459227467812, "grad_norm": 0.51953125, "learning_rate": 5.007153075822604e-08, "loss": 2.4058, "step": 28 }, { "epoch": 0.001555793991416309, "grad_norm": 0.396484375, "learning_rate": 5.1859799713876974e-08, "loss": 2.6801, "step": 29 }, { "epoch": 0.001609442060085837, "grad_norm": 0.578125, "learning_rate": 5.36480686695279e-08, "loss": 2.4075, "step": 30 }, { "epoch": 0.0016630901287553648, "grad_norm": 0.404296875, "learning_rate": 5.543633762517884e-08, "loss": 2.6774, "step": 31 }, { "epoch": 0.0017167381974248926, "grad_norm": 0.330078125, "learning_rate": 5.722460658082976e-08, "loss": 2.5303, "step": 32 }, { "epoch": 0.0017703862660944207, "grad_norm": 0.328125, "learning_rate": 5.9012875536480695e-08, "loss": 2.596, "step": 33 }, { "epoch": 0.0018240343347639485, "grad_norm": 0.4609375, "learning_rate": 6.080114449213162e-08, "loss": 2.8418, "step": 34 }, { "epoch": 0.0018776824034334764, "grad_norm": 0.34375, "learning_rate": 6.258941344778256e-08, "loss": 2.5661, "step": 35 }, { "epoch": 0.0019313304721030042, "grad_norm": 0.353515625, "learning_rate": 6.437768240343348e-08, "loss": 2.561, "step": 36 }, { "epoch": 0.0019849785407725323, "grad_norm": 0.27734375, "learning_rate": 6.616595135908442e-08, "loss": 2.2062, "step": 37 }, { "epoch": 0.00203862660944206, "grad_norm": 0.359375, "learning_rate": 6.795422031473535e-08, "loss": 2.4371, "step": 38 }, { "epoch": 0.002092274678111588, "grad_norm": 0.37109375, "learning_rate": 6.974248927038627e-08, "loss": 2.4343, "step": 39 }, { "epoch": 0.002145922746781116, "grad_norm": 0.36328125, "learning_rate": 7.15307582260372e-08, "loss": 2.457, "step": 40 }, { "epoch": 0.0021995708154506437, "grad_norm": 0.37109375, "learning_rate": 7.331902718168813e-08, "loss": 2.4671, "step": 41 }, { "epoch": 0.0022532188841201716, "grad_norm": 0.359375, "learning_rate": 7.510729613733906e-08, "loss": 2.6244, "step": 42 }, { "epoch": 0.0023068669527896994, "grad_norm": 0.404296875, "learning_rate": 7.689556509298999e-08, "loss": 2.3905, "step": 43 }, { "epoch": 0.0023605150214592273, "grad_norm": 0.318359375, "learning_rate": 7.868383404864092e-08, "loss": 2.4561, "step": 44 }, { "epoch": 0.0024141630901287556, "grad_norm": 0.51953125, "learning_rate": 8.047210300429185e-08, "loss": 2.5588, "step": 45 }, { "epoch": 0.0024678111587982834, "grad_norm": 0.404296875, "learning_rate": 8.226037195994279e-08, "loss": 2.2913, "step": 46 }, { "epoch": 0.0025214592274678113, "grad_norm": 0.52734375, "learning_rate": 8.404864091559372e-08, "loss": 2.3237, "step": 47 }, { "epoch": 0.002575107296137339, "grad_norm": 0.5078125, "learning_rate": 8.583690987124465e-08, "loss": 2.2655, "step": 48 }, { "epoch": 0.002628755364806867, "grad_norm": 0.640625, "learning_rate": 8.762517882689556e-08, "loss": 2.682, "step": 49 }, { "epoch": 0.002682403433476395, "grad_norm": 0.490234375, "learning_rate": 8.94134477825465e-08, "loss": 2.2509, "step": 50 }, { "epoch": 0.0027360515021459227, "grad_norm": 0.328125, "learning_rate": 9.120171673819743e-08, "loss": 2.5278, "step": 51 }, { "epoch": 0.0027896995708154505, "grad_norm": 0.447265625, "learning_rate": 9.298998569384836e-08, "loss": 2.4237, "step": 52 }, { "epoch": 0.0028433476394849784, "grad_norm": 0.29296875, "learning_rate": 9.477825464949929e-08, "loss": 2.2723, "step": 53 }, { "epoch": 0.0028969957081545063, "grad_norm": 0.8203125, "learning_rate": 9.656652360515023e-08, "loss": 1.4755, "step": 54 }, { "epoch": 0.0029506437768240345, "grad_norm": 0.78125, "learning_rate": 9.835479256080116e-08, "loss": 2.4364, "step": 55 }, { "epoch": 0.0030042918454935624, "grad_norm": 0.291015625, "learning_rate": 1.0014306151645208e-07, "loss": 2.7337, "step": 56 }, { "epoch": 0.0030579399141630902, "grad_norm": 0.427734375, "learning_rate": 1.01931330472103e-07, "loss": 2.5014, "step": 57 }, { "epoch": 0.003111587982832618, "grad_norm": 0.337890625, "learning_rate": 1.0371959942775395e-07, "loss": 2.0146, "step": 58 }, { "epoch": 0.003165236051502146, "grad_norm": 1.40625, "learning_rate": 1.0550786838340488e-07, "loss": 2.4555, "step": 59 }, { "epoch": 0.003218884120171674, "grad_norm": 0.470703125, "learning_rate": 1.072961373390558e-07, "loss": 2.553, "step": 60 }, { "epoch": 0.0032725321888412017, "grad_norm": 0.4140625, "learning_rate": 1.0908440629470673e-07, "loss": 2.639, "step": 61 }, { "epoch": 0.0033261802575107295, "grad_norm": 0.35546875, "learning_rate": 1.1087267525035768e-07, "loss": 2.212, "step": 62 }, { "epoch": 0.0033798283261802574, "grad_norm": 0.306640625, "learning_rate": 1.1266094420600859e-07, "loss": 2.4842, "step": 63 }, { "epoch": 0.0034334763948497852, "grad_norm": 0.357421875, "learning_rate": 1.1444921316165952e-07, "loss": 2.7548, "step": 64 }, { "epoch": 0.0034871244635193135, "grad_norm": 0.400390625, "learning_rate": 1.1623748211731045e-07, "loss": 2.8246, "step": 65 }, { "epoch": 0.0035407725321888414, "grad_norm": 0.341796875, "learning_rate": 1.1802575107296139e-07, "loss": 2.4038, "step": 66 }, { "epoch": 0.003594420600858369, "grad_norm": 0.2890625, "learning_rate": 1.198140200286123e-07, "loss": 2.2669, "step": 67 }, { "epoch": 0.003648068669527897, "grad_norm": 0.34375, "learning_rate": 1.2160228898426323e-07, "loss": 2.4509, "step": 68 }, { "epoch": 0.003701716738197425, "grad_norm": 0.37109375, "learning_rate": 1.2339055793991416e-07, "loss": 2.6045, "step": 69 }, { "epoch": 0.0037553648068669528, "grad_norm": 0.51953125, "learning_rate": 1.2517882689556512e-07, "loss": 2.3488, "step": 70 }, { "epoch": 0.0038090128755364806, "grad_norm": 0.35546875, "learning_rate": 1.2696709585121605e-07, "loss": 2.514, "step": 71 }, { "epoch": 0.0038626609442060085, "grad_norm": 0.32421875, "learning_rate": 1.2875536480686695e-07, "loss": 2.5386, "step": 72 }, { "epoch": 0.003916309012875536, "grad_norm": 0.291015625, "learning_rate": 1.3054363376251788e-07, "loss": 2.5442, "step": 73 }, { "epoch": 0.003969957081545065, "grad_norm": 0.330078125, "learning_rate": 1.3233190271816883e-07, "loss": 2.5375, "step": 74 }, { "epoch": 0.004023605150214592, "grad_norm": 0.291015625, "learning_rate": 1.3412017167381976e-07, "loss": 2.2197, "step": 75 }, { "epoch": 0.00407725321888412, "grad_norm": 0.3203125, "learning_rate": 1.359084406294707e-07, "loss": 2.4232, "step": 76 }, { "epoch": 0.004130901287553648, "grad_norm": 0.5625, "learning_rate": 1.3769670958512162e-07, "loss": 2.2159, "step": 77 }, { "epoch": 0.004184549356223176, "grad_norm": 0.361328125, "learning_rate": 1.3948497854077255e-07, "loss": 2.6874, "step": 78 }, { "epoch": 0.0042381974248927035, "grad_norm": 1.0078125, "learning_rate": 1.4127324749642348e-07, "loss": 2.5792, "step": 79 }, { "epoch": 0.004291845493562232, "grad_norm": 0.451171875, "learning_rate": 1.430615164520744e-07, "loss": 2.6089, "step": 80 }, { "epoch": 0.00434549356223176, "grad_norm": 0.287109375, "learning_rate": 1.4484978540772534e-07, "loss": 2.4192, "step": 81 }, { "epoch": 0.0043991416309012875, "grad_norm": 1.015625, "learning_rate": 1.4663805436337626e-07, "loss": 1.806, "step": 82 }, { "epoch": 0.004452789699570816, "grad_norm": 0.33984375, "learning_rate": 1.484263233190272e-07, "loss": 2.5854, "step": 83 }, { "epoch": 0.004506437768240343, "grad_norm": 0.3359375, "learning_rate": 1.5021459227467812e-07, "loss": 2.5636, "step": 84 }, { "epoch": 0.0045600858369098714, "grad_norm": 0.353515625, "learning_rate": 1.5200286123032905e-07, "loss": 2.5148, "step": 85 }, { "epoch": 0.004613733905579399, "grad_norm": 0.333984375, "learning_rate": 1.5379113018597998e-07, "loss": 2.5357, "step": 86 }, { "epoch": 0.004667381974248927, "grad_norm": 0.419921875, "learning_rate": 1.555793991416309e-07, "loss": 2.6014, "step": 87 }, { "epoch": 0.004721030042918455, "grad_norm": 0.5546875, "learning_rate": 1.5736766809728184e-07, "loss": 2.6102, "step": 88 }, { "epoch": 0.004774678111587983, "grad_norm": 0.62890625, "learning_rate": 1.591559370529328e-07, "loss": 2.6584, "step": 89 }, { "epoch": 0.004828326180257511, "grad_norm": 0.40625, "learning_rate": 1.609442060085837e-07, "loss": 2.5585, "step": 90 }, { "epoch": 0.0048819742489270386, "grad_norm": 0.310546875, "learning_rate": 1.6273247496423465e-07, "loss": 2.5021, "step": 91 }, { "epoch": 0.004935622317596567, "grad_norm": 0.462890625, "learning_rate": 1.6452074391988558e-07, "loss": 2.3809, "step": 92 }, { "epoch": 0.004989270386266094, "grad_norm": 0.3671875, "learning_rate": 1.6630901287553648e-07, "loss": 2.6141, "step": 93 }, { "epoch": 0.0050429184549356226, "grad_norm": 0.3046875, "learning_rate": 1.6809728183118744e-07, "loss": 2.561, "step": 94 }, { "epoch": 0.00509656652360515, "grad_norm": 0.4296875, "learning_rate": 1.6988555078683834e-07, "loss": 2.2725, "step": 95 }, { "epoch": 0.005150214592274678, "grad_norm": 0.4453125, "learning_rate": 1.716738197424893e-07, "loss": 2.2078, "step": 96 }, { "epoch": 0.005203862660944206, "grad_norm": 0.30078125, "learning_rate": 1.7346208869814022e-07, "loss": 2.7496, "step": 97 }, { "epoch": 0.005257510729613734, "grad_norm": 0.412109375, "learning_rate": 1.7525035765379112e-07, "loss": 1.7421, "step": 98 }, { "epoch": 0.005311158798283261, "grad_norm": 0.3203125, "learning_rate": 1.7703862660944208e-07, "loss": 2.4606, "step": 99 }, { "epoch": 0.00536480686695279, "grad_norm": 0.32421875, "learning_rate": 1.78826895565093e-07, "loss": 2.5337, "step": 100 }, { "epoch": 0.005418454935622318, "grad_norm": 0.4453125, "learning_rate": 1.8061516452074394e-07, "loss": 2.4455, "step": 101 }, { "epoch": 0.005472103004291845, "grad_norm": 1.765625, "learning_rate": 1.8240343347639487e-07, "loss": 2.3098, "step": 102 }, { "epoch": 0.005525751072961374, "grad_norm": 0.6953125, "learning_rate": 1.8419170243204577e-07, "loss": 2.692, "step": 103 }, { "epoch": 0.005579399141630901, "grad_norm": 0.98828125, "learning_rate": 1.8597997138769672e-07, "loss": 2.61, "step": 104 }, { "epoch": 0.005633047210300429, "grad_norm": 0.640625, "learning_rate": 1.8776824034334768e-07, "loss": 2.6198, "step": 105 }, { "epoch": 0.005686695278969957, "grad_norm": 0.39453125, "learning_rate": 1.8955650929899858e-07, "loss": 2.9015, "step": 106 }, { "epoch": 0.005740343347639485, "grad_norm": 0.421875, "learning_rate": 1.913447782546495e-07, "loss": 2.5938, "step": 107 }, { "epoch": 0.0057939914163090125, "grad_norm": 0.43359375, "learning_rate": 1.9313304721030046e-07, "loss": 2.2852, "step": 108 }, { "epoch": 0.005847639484978541, "grad_norm": 0.310546875, "learning_rate": 1.9492131616595137e-07, "loss": 2.4845, "step": 109 }, { "epoch": 0.005901287553648069, "grad_norm": 0.93359375, "learning_rate": 1.9670958512160232e-07, "loss": 2.4035, "step": 110 }, { "epoch": 0.0059549356223175965, "grad_norm": 0.5546875, "learning_rate": 1.9849785407725322e-07, "loss": 2.2722, "step": 111 }, { "epoch": 0.006008583690987125, "grad_norm": 0.375, "learning_rate": 2.0028612303290415e-07, "loss": 2.5463, "step": 112 }, { "epoch": 0.006062231759656652, "grad_norm": 0.62109375, "learning_rate": 2.020743919885551e-07, "loss": 2.5563, "step": 113 }, { "epoch": 0.0061158798283261805, "grad_norm": 0.546875, "learning_rate": 2.03862660944206e-07, "loss": 2.3814, "step": 114 }, { "epoch": 0.006169527896995708, "grad_norm": 0.35546875, "learning_rate": 2.0565092989985697e-07, "loss": 2.4245, "step": 115 }, { "epoch": 0.006223175965665236, "grad_norm": 0.52734375, "learning_rate": 2.074391988555079e-07, "loss": 1.6676, "step": 116 }, { "epoch": 0.006276824034334764, "grad_norm": 0.462890625, "learning_rate": 2.092274678111588e-07, "loss": 2.6345, "step": 117 }, { "epoch": 0.006330472103004292, "grad_norm": 0.302734375, "learning_rate": 2.1101573676680975e-07, "loss": 2.5535, "step": 118 }, { "epoch": 0.006384120171673819, "grad_norm": 0.32421875, "learning_rate": 2.1280400572246065e-07, "loss": 2.5378, "step": 119 }, { "epoch": 0.006437768240343348, "grad_norm": 0.3203125, "learning_rate": 2.145922746781116e-07, "loss": 2.6598, "step": 120 }, { "epoch": 0.006491416309012876, "grad_norm": 0.310546875, "learning_rate": 2.1638054363376254e-07, "loss": 2.5333, "step": 121 }, { "epoch": 0.006545064377682403, "grad_norm": 0.39453125, "learning_rate": 2.1816881258941347e-07, "loss": 2.5408, "step": 122 }, { "epoch": 0.006598712446351932, "grad_norm": 0.4921875, "learning_rate": 2.199570815450644e-07, "loss": 2.4658, "step": 123 }, { "epoch": 0.006652360515021459, "grad_norm": 0.326171875, "learning_rate": 2.2174535050071535e-07, "loss": 2.4675, "step": 124 }, { "epoch": 0.006706008583690987, "grad_norm": 0.421875, "learning_rate": 2.2353361945636625e-07, "loss": 2.6432, "step": 125 }, { "epoch": 0.006759656652360515, "grad_norm": 0.365234375, "learning_rate": 2.2532188841201718e-07, "loss": 2.5494, "step": 126 }, { "epoch": 0.006813304721030043, "grad_norm": 0.423828125, "learning_rate": 2.271101573676681e-07, "loss": 2.7104, "step": 127 }, { "epoch": 0.0068669527896995704, "grad_norm": 0.349609375, "learning_rate": 2.2889842632331904e-07, "loss": 2.3482, "step": 128 }, { "epoch": 0.006920600858369099, "grad_norm": 0.35546875, "learning_rate": 2.3068669527897e-07, "loss": 2.4934, "step": 129 }, { "epoch": 0.006974248927038627, "grad_norm": 0.71484375, "learning_rate": 2.324749642346209e-07, "loss": 2.5158, "step": 130 }, { "epoch": 0.0070278969957081544, "grad_norm": 0.453125, "learning_rate": 2.3426323319027185e-07, "loss": 2.2247, "step": 131 }, { "epoch": 0.007081545064377683, "grad_norm": 0.396484375, "learning_rate": 2.3605150214592278e-07, "loss": 1.7295, "step": 132 }, { "epoch": 0.00713519313304721, "grad_norm": 0.31640625, "learning_rate": 2.3783977110157368e-07, "loss": 2.0676, "step": 133 }, { "epoch": 0.007188841201716738, "grad_norm": 0.380859375, "learning_rate": 2.396280400572246e-07, "loss": 2.3562, "step": 134 }, { "epoch": 0.007242489270386266, "grad_norm": 0.365234375, "learning_rate": 2.4141630901287554e-07, "loss": 2.5425, "step": 135 }, { "epoch": 0.007296137339055794, "grad_norm": 0.486328125, "learning_rate": 2.4320457796852647e-07, "loss": 2.4493, "step": 136 }, { "epoch": 0.0073497854077253216, "grad_norm": 0.3359375, "learning_rate": 2.4499284692417745e-07, "loss": 2.3006, "step": 137 }, { "epoch": 0.00740343347639485, "grad_norm": 0.326171875, "learning_rate": 2.4678111587982833e-07, "loss": 2.6299, "step": 138 }, { "epoch": 0.007457081545064377, "grad_norm": 0.328125, "learning_rate": 2.4856938483547926e-07, "loss": 2.3178, "step": 139 }, { "epoch": 0.0075107296137339056, "grad_norm": 0.3515625, "learning_rate": 2.5035765379113024e-07, "loss": 2.4958, "step": 140 }, { "epoch": 0.007564377682403434, "grad_norm": 0.34375, "learning_rate": 2.521459227467811e-07, "loss": 2.4116, "step": 141 }, { "epoch": 0.007618025751072961, "grad_norm": 2.890625, "learning_rate": 2.539341917024321e-07, "loss": 2.6777, "step": 142 }, { "epoch": 0.0076716738197424895, "grad_norm": 0.3515625, "learning_rate": 2.5572246065808297e-07, "loss": 2.3124, "step": 143 }, { "epoch": 0.007725321888412017, "grad_norm": 0.640625, "learning_rate": 2.575107296137339e-07, "loss": 2.6318, "step": 144 }, { "epoch": 0.007778969957081545, "grad_norm": 0.3125, "learning_rate": 2.592989985693849e-07, "loss": 2.6204, "step": 145 }, { "epoch": 0.007832618025751073, "grad_norm": 0.322265625, "learning_rate": 2.6108726752503576e-07, "loss": 2.5832, "step": 146 }, { "epoch": 0.007886266094420601, "grad_norm": 0.31640625, "learning_rate": 2.6287553648068674e-07, "loss": 2.4318, "step": 147 }, { "epoch": 0.00793991416309013, "grad_norm": 0.298828125, "learning_rate": 2.6466380543633767e-07, "loss": 2.4571, "step": 148 }, { "epoch": 0.007993562231759656, "grad_norm": 0.314453125, "learning_rate": 2.664520743919886e-07, "loss": 2.6741, "step": 149 }, { "epoch": 0.008047210300429184, "grad_norm": 0.5078125, "learning_rate": 2.682403433476395e-07, "loss": 2.631, "step": 150 }, { "epoch": 0.008100858369098712, "grad_norm": 0.3046875, "learning_rate": 2.700286123032904e-07, "loss": 2.5523, "step": 151 }, { "epoch": 0.00815450643776824, "grad_norm": 0.326171875, "learning_rate": 2.718168812589414e-07, "loss": 2.377, "step": 152 }, { "epoch": 0.008208154506437769, "grad_norm": 0.412109375, "learning_rate": 2.736051502145923e-07, "loss": 2.5577, "step": 153 }, { "epoch": 0.008261802575107296, "grad_norm": 0.326171875, "learning_rate": 2.7539341917024324e-07, "loss": 2.6477, "step": 154 }, { "epoch": 0.008315450643776824, "grad_norm": 0.384765625, "learning_rate": 2.7718168812589417e-07, "loss": 2.4416, "step": 155 }, { "epoch": 0.008369098712446352, "grad_norm": 0.49609375, "learning_rate": 2.789699570815451e-07, "loss": 2.2736, "step": 156 }, { "epoch": 0.00842274678111588, "grad_norm": 0.373046875, "learning_rate": 2.8075822603719603e-07, "loss": 2.2339, "step": 157 }, { "epoch": 0.008476394849785407, "grad_norm": 0.462890625, "learning_rate": 2.8254649499284696e-07, "loss": 2.591, "step": 158 }, { "epoch": 0.008530042918454935, "grad_norm": 0.337890625, "learning_rate": 2.843347639484979e-07, "loss": 2.188, "step": 159 }, { "epoch": 0.008583690987124463, "grad_norm": 0.380859375, "learning_rate": 2.861230329041488e-07, "loss": 2.4335, "step": 160 }, { "epoch": 0.008637339055793992, "grad_norm": 0.29296875, "learning_rate": 2.8791130185979974e-07, "loss": 2.4525, "step": 161 }, { "epoch": 0.00869098712446352, "grad_norm": 0.3359375, "learning_rate": 2.8969957081545067e-07, "loss": 2.3558, "step": 162 }, { "epoch": 0.008744635193133047, "grad_norm": 0.61328125, "learning_rate": 2.914878397711016e-07, "loss": 2.1916, "step": 163 }, { "epoch": 0.008798283261802575, "grad_norm": 0.42578125, "learning_rate": 2.9327610872675253e-07, "loss": 2.4573, "step": 164 }, { "epoch": 0.008851931330472103, "grad_norm": 0.42578125, "learning_rate": 2.9506437768240346e-07, "loss": 2.3892, "step": 165 }, { "epoch": 0.008905579399141631, "grad_norm": 0.3046875, "learning_rate": 2.968526466380544e-07, "loss": 2.5658, "step": 166 }, { "epoch": 0.008959227467811158, "grad_norm": 0.29296875, "learning_rate": 2.986409155937053e-07, "loss": 2.6882, "step": 167 }, { "epoch": 0.009012875536480686, "grad_norm": 0.453125, "learning_rate": 3.0042918454935624e-07, "loss": 2.4261, "step": 168 }, { "epoch": 0.009066523605150215, "grad_norm": 0.396484375, "learning_rate": 3.0221745350500717e-07, "loss": 2.564, "step": 169 }, { "epoch": 0.009120171673819743, "grad_norm": 0.400390625, "learning_rate": 3.040057224606581e-07, "loss": 2.5462, "step": 170 }, { "epoch": 0.009173819742489271, "grad_norm": 0.384765625, "learning_rate": 3.0579399141630903e-07, "loss": 2.6185, "step": 171 }, { "epoch": 0.009227467811158798, "grad_norm": 0.388671875, "learning_rate": 3.0758226037195996e-07, "loss": 2.5382, "step": 172 }, { "epoch": 0.009281115879828326, "grad_norm": 0.5703125, "learning_rate": 3.093705293276109e-07, "loss": 2.6381, "step": 173 }, { "epoch": 0.009334763948497854, "grad_norm": 0.34765625, "learning_rate": 3.111587982832618e-07, "loss": 2.4119, "step": 174 }, { "epoch": 0.009388412017167383, "grad_norm": 0.3203125, "learning_rate": 3.1294706723891274e-07, "loss": 2.4732, "step": 175 }, { "epoch": 0.00944206008583691, "grad_norm": 0.38671875, "learning_rate": 3.1473533619456367e-07, "loss": 2.9193, "step": 176 }, { "epoch": 0.009495708154506437, "grad_norm": 11.6875, "learning_rate": 3.1652360515021465e-07, "loss": 2.406, "step": 177 }, { "epoch": 0.009549356223175966, "grad_norm": 0.38671875, "learning_rate": 3.183118741058656e-07, "loss": 2.5553, "step": 178 }, { "epoch": 0.009603004291845494, "grad_norm": 0.4921875, "learning_rate": 3.2010014306151646e-07, "loss": 2.4548, "step": 179 }, { "epoch": 0.009656652360515022, "grad_norm": 0.41015625, "learning_rate": 3.218884120171674e-07, "loss": 2.3571, "step": 180 }, { "epoch": 0.009710300429184549, "grad_norm": 0.375, "learning_rate": 3.236766809728183e-07, "loss": 2.7181, "step": 181 }, { "epoch": 0.009763948497854077, "grad_norm": 0.35546875, "learning_rate": 3.254649499284693e-07, "loss": 2.4217, "step": 182 }, { "epoch": 0.009817596566523605, "grad_norm": 0.357421875, "learning_rate": 3.2725321888412023e-07, "loss": 2.8628, "step": 183 }, { "epoch": 0.009871244635193134, "grad_norm": 0.33984375, "learning_rate": 3.2904148783977116e-07, "loss": 2.3178, "step": 184 }, { "epoch": 0.00992489270386266, "grad_norm": 0.287109375, "learning_rate": 3.3082975679542203e-07, "loss": 2.4766, "step": 185 }, { "epoch": 0.009978540772532189, "grad_norm": 0.38671875, "learning_rate": 3.3261802575107296e-07, "loss": 2.5345, "step": 186 }, { "epoch": 0.010032188841201717, "grad_norm": 0.42578125, "learning_rate": 3.3440629470672394e-07, "loss": 2.5146, "step": 187 }, { "epoch": 0.010085836909871245, "grad_norm": 0.40625, "learning_rate": 3.3619456366237487e-07, "loss": 2.2619, "step": 188 }, { "epoch": 0.010139484978540772, "grad_norm": 0.333984375, "learning_rate": 3.379828326180258e-07, "loss": 2.5021, "step": 189 }, { "epoch": 0.0101931330472103, "grad_norm": 0.5234375, "learning_rate": 3.397711015736767e-07, "loss": 2.5583, "step": 190 }, { "epoch": 0.010246781115879828, "grad_norm": 0.349609375, "learning_rate": 3.415593705293276e-07, "loss": 2.5187, "step": 191 }, { "epoch": 0.010300429184549357, "grad_norm": 0.486328125, "learning_rate": 3.433476394849786e-07, "loss": 2.8518, "step": 192 }, { "epoch": 0.010354077253218885, "grad_norm": 0.4453125, "learning_rate": 3.451359084406295e-07, "loss": 1.8132, "step": 193 }, { "epoch": 0.010407725321888411, "grad_norm": 0.33984375, "learning_rate": 3.4692417739628044e-07, "loss": 2.6649, "step": 194 }, { "epoch": 0.01046137339055794, "grad_norm": 0.30859375, "learning_rate": 3.487124463519313e-07, "loss": 2.5046, "step": 195 }, { "epoch": 0.010515021459227468, "grad_norm": 0.38671875, "learning_rate": 3.5050071530758225e-07, "loss": 2.5836, "step": 196 }, { "epoch": 0.010568669527896996, "grad_norm": 0.322265625, "learning_rate": 3.5228898426323323e-07, "loss": 2.5395, "step": 197 }, { "epoch": 0.010622317596566523, "grad_norm": 0.37109375, "learning_rate": 3.5407725321888416e-07, "loss": 2.6288, "step": 198 }, { "epoch": 0.010675965665236051, "grad_norm": 0.66796875, "learning_rate": 3.558655221745351e-07, "loss": 1.7938, "step": 199 }, { "epoch": 0.01072961373390558, "grad_norm": 0.33203125, "learning_rate": 3.57653791130186e-07, "loss": 2.5429, "step": 200 }, { "epoch": 0.010783261802575108, "grad_norm": 0.349609375, "learning_rate": 3.594420600858369e-07, "loss": 2.734, "step": 201 }, { "epoch": 0.010836909871244636, "grad_norm": 0.447265625, "learning_rate": 3.612303290414879e-07, "loss": 2.4992, "step": 202 }, { "epoch": 0.010890557939914162, "grad_norm": 0.421875, "learning_rate": 3.630185979971388e-07, "loss": 2.7004, "step": 203 }, { "epoch": 0.01094420600858369, "grad_norm": 0.34375, "learning_rate": 3.6480686695278973e-07, "loss": 2.1132, "step": 204 }, { "epoch": 0.010997854077253219, "grad_norm": 1.171875, "learning_rate": 3.665951359084407e-07, "loss": 2.4594, "step": 205 }, { "epoch": 0.011051502145922747, "grad_norm": 0.53125, "learning_rate": 3.6838340486409154e-07, "loss": 2.5401, "step": 206 }, { "epoch": 0.011105150214592274, "grad_norm": 0.40625, "learning_rate": 3.701716738197425e-07, "loss": 2.3527, "step": 207 }, { "epoch": 0.011158798283261802, "grad_norm": 0.337890625, "learning_rate": 3.7195994277539345e-07, "loss": 2.4697, "step": 208 }, { "epoch": 0.01121244635193133, "grad_norm": 0.34765625, "learning_rate": 3.737482117310444e-07, "loss": 2.5975, "step": 209 }, { "epoch": 0.011266094420600859, "grad_norm": 0.279296875, "learning_rate": 3.7553648068669536e-07, "loss": 2.4245, "step": 210 }, { "epoch": 0.011319742489270387, "grad_norm": 0.466796875, "learning_rate": 3.7732474964234623e-07, "loss": 2.5424, "step": 211 }, { "epoch": 0.011373390557939914, "grad_norm": 0.3671875, "learning_rate": 3.7911301859799716e-07, "loss": 2.3144, "step": 212 }, { "epoch": 0.011427038626609442, "grad_norm": 0.302734375, "learning_rate": 3.809012875536481e-07, "loss": 2.174, "step": 213 }, { "epoch": 0.01148068669527897, "grad_norm": 0.357421875, "learning_rate": 3.82689556509299e-07, "loss": 2.5862, "step": 214 }, { "epoch": 0.011534334763948498, "grad_norm": 1.1171875, "learning_rate": 3.8447782546495e-07, "loss": 2.4637, "step": 215 }, { "epoch": 0.011587982832618025, "grad_norm": 0.498046875, "learning_rate": 3.8626609442060093e-07, "loss": 2.6565, "step": 216 }, { "epoch": 0.011641630901287553, "grad_norm": 0.4921875, "learning_rate": 3.880543633762518e-07, "loss": 2.7114, "step": 217 }, { "epoch": 0.011695278969957082, "grad_norm": 0.341796875, "learning_rate": 3.8984263233190273e-07, "loss": 2.6905, "step": 218 }, { "epoch": 0.01174892703862661, "grad_norm": 0.451171875, "learning_rate": 3.9163090128755366e-07, "loss": 2.6216, "step": 219 }, { "epoch": 0.011802575107296138, "grad_norm": 0.373046875, "learning_rate": 3.9341917024320464e-07, "loss": 2.4426, "step": 220 }, { "epoch": 0.011856223175965665, "grad_norm": 0.453125, "learning_rate": 3.9520743919885557e-07, "loss": 2.42, "step": 221 }, { "epoch": 0.011909871244635193, "grad_norm": 0.306640625, "learning_rate": 3.9699570815450645e-07, "loss": 2.4862, "step": 222 }, { "epoch": 0.011963519313304721, "grad_norm": 0.341796875, "learning_rate": 3.987839771101574e-07, "loss": 2.5665, "step": 223 }, { "epoch": 0.01201716738197425, "grad_norm": 0.296875, "learning_rate": 4.005722460658083e-07, "loss": 2.2103, "step": 224 }, { "epoch": 0.012070815450643776, "grad_norm": 0.341796875, "learning_rate": 4.023605150214593e-07, "loss": 2.5508, "step": 225 }, { "epoch": 0.012124463519313304, "grad_norm": 0.37109375, "learning_rate": 4.041487839771102e-07, "loss": 2.4504, "step": 226 }, { "epoch": 0.012178111587982833, "grad_norm": 0.52734375, "learning_rate": 4.059370529327611e-07, "loss": 2.5348, "step": 227 }, { "epoch": 0.012231759656652361, "grad_norm": 0.349609375, "learning_rate": 4.07725321888412e-07, "loss": 2.4402, "step": 228 }, { "epoch": 0.012285407725321888, "grad_norm": 0.359375, "learning_rate": 4.0951359084406295e-07, "loss": 2.6193, "step": 229 }, { "epoch": 0.012339055793991416, "grad_norm": 0.6171875, "learning_rate": 4.1130185979971393e-07, "loss": 2.4065, "step": 230 }, { "epoch": 0.012392703862660944, "grad_norm": 0.36328125, "learning_rate": 4.1309012875536486e-07, "loss": 2.3769, "step": 231 }, { "epoch": 0.012446351931330472, "grad_norm": 0.462890625, "learning_rate": 4.148783977110158e-07, "loss": 2.4518, "step": 232 }, { "epoch": 0.0125, "grad_norm": 0.66015625, "learning_rate": 4.1666666666666667e-07, "loss": 2.3601, "step": 233 }, { "epoch": 0.012553648068669527, "grad_norm": 0.328125, "learning_rate": 4.184549356223176e-07, "loss": 2.4887, "step": 234 }, { "epoch": 0.012607296137339056, "grad_norm": 0.404296875, "learning_rate": 4.202432045779686e-07, "loss": 2.5771, "step": 235 }, { "epoch": 0.012660944206008584, "grad_norm": 0.337890625, "learning_rate": 4.220314735336195e-07, "loss": 2.3046, "step": 236 }, { "epoch": 0.012714592274678112, "grad_norm": 0.333984375, "learning_rate": 4.2381974248927043e-07, "loss": 2.5424, "step": 237 }, { "epoch": 0.012768240343347639, "grad_norm": 0.30078125, "learning_rate": 4.256080114449213e-07, "loss": 2.3938, "step": 238 }, { "epoch": 0.012821888412017167, "grad_norm": 0.427734375, "learning_rate": 4.273962804005723e-07, "loss": 2.4174, "step": 239 }, { "epoch": 0.012875536480686695, "grad_norm": 0.435546875, "learning_rate": 4.291845493562232e-07, "loss": 2.646, "step": 240 }, { "epoch": 0.012929184549356224, "grad_norm": 0.322265625, "learning_rate": 4.3097281831187415e-07, "loss": 2.2584, "step": 241 }, { "epoch": 0.012982832618025752, "grad_norm": 0.3125, "learning_rate": 4.327610872675251e-07, "loss": 2.1425, "step": 242 }, { "epoch": 0.013036480686695278, "grad_norm": 0.298828125, "learning_rate": 4.3454935622317595e-07, "loss": 2.5278, "step": 243 }, { "epoch": 0.013090128755364807, "grad_norm": 0.361328125, "learning_rate": 4.3633762517882693e-07, "loss": 2.6812, "step": 244 }, { "epoch": 0.013143776824034335, "grad_norm": 0.8359375, "learning_rate": 4.3812589413447786e-07, "loss": 2.5466, "step": 245 }, { "epoch": 0.013197424892703863, "grad_norm": 0.455078125, "learning_rate": 4.399141630901288e-07, "loss": 2.4388, "step": 246 }, { "epoch": 0.01325107296137339, "grad_norm": 0.60546875, "learning_rate": 4.417024320457797e-07, "loss": 1.9663, "step": 247 }, { "epoch": 0.013304721030042918, "grad_norm": 0.7890625, "learning_rate": 4.434907010014307e-07, "loss": 2.0127, "step": 248 }, { "epoch": 0.013358369098712446, "grad_norm": 0.337890625, "learning_rate": 4.452789699570816e-07, "loss": 2.2317, "step": 249 }, { "epoch": 0.013412017167381975, "grad_norm": 0.345703125, "learning_rate": 4.470672389127325e-07, "loss": 2.285, "step": 250 }, { "epoch": 0.013465665236051503, "grad_norm": 0.58984375, "learning_rate": 4.4885550786838344e-07, "loss": 2.5223, "step": 251 }, { "epoch": 0.01351931330472103, "grad_norm": 0.328125, "learning_rate": 4.5064377682403436e-07, "loss": 2.5103, "step": 252 }, { "epoch": 0.013572961373390558, "grad_norm": 0.65234375, "learning_rate": 4.5243204577968535e-07, "loss": 2.5283, "step": 253 }, { "epoch": 0.013626609442060086, "grad_norm": 0.353515625, "learning_rate": 4.542203147353362e-07, "loss": 2.5737, "step": 254 }, { "epoch": 0.013680257510729614, "grad_norm": 0.357421875, "learning_rate": 4.5600858369098715e-07, "loss": 2.612, "step": 255 }, { "epoch": 0.013733905579399141, "grad_norm": 0.53515625, "learning_rate": 4.577968526466381e-07, "loss": 2.5613, "step": 256 }, { "epoch": 0.01378755364806867, "grad_norm": 0.50390625, "learning_rate": 4.59585121602289e-07, "loss": 2.6785, "step": 257 }, { "epoch": 0.013841201716738197, "grad_norm": 0.5625, "learning_rate": 4.6137339055794e-07, "loss": 2.3022, "step": 258 }, { "epoch": 0.013894849785407726, "grad_norm": 0.412109375, "learning_rate": 4.6316165951359087e-07, "loss": 2.6421, "step": 259 }, { "epoch": 0.013948497854077254, "grad_norm": 0.349609375, "learning_rate": 4.649499284692418e-07, "loss": 2.2318, "step": 260 }, { "epoch": 0.01400214592274678, "grad_norm": 0.29296875, "learning_rate": 4.667381974248927e-07, "loss": 2.3664, "step": 261 }, { "epoch": 0.014055793991416309, "grad_norm": 0.396484375, "learning_rate": 4.685264663805437e-07, "loss": 2.5308, "step": 262 }, { "epoch": 0.014109442060085837, "grad_norm": 0.271484375, "learning_rate": 4.7031473533619463e-07, "loss": 2.287, "step": 263 }, { "epoch": 0.014163090128755365, "grad_norm": 0.419921875, "learning_rate": 4.7210300429184556e-07, "loss": 2.4583, "step": 264 }, { "epoch": 0.014216738197424892, "grad_norm": 0.384765625, "learning_rate": 4.7389127324749644e-07, "loss": 2.3593, "step": 265 }, { "epoch": 0.01427038626609442, "grad_norm": 1.984375, "learning_rate": 4.7567954220314737e-07, "loss": 2.7945, "step": 266 }, { "epoch": 0.014324034334763949, "grad_norm": 0.51953125, "learning_rate": 4.774678111587983e-07, "loss": 2.7293, "step": 267 }, { "epoch": 0.014377682403433477, "grad_norm": 0.31640625, "learning_rate": 4.792560801144492e-07, "loss": 2.558, "step": 268 }, { "epoch": 0.014431330472103005, "grad_norm": 0.341796875, "learning_rate": 4.810443490701002e-07, "loss": 2.5879, "step": 269 }, { "epoch": 0.014484978540772532, "grad_norm": 0.283203125, "learning_rate": 4.828326180257511e-07, "loss": 2.2996, "step": 270 }, { "epoch": 0.01453862660944206, "grad_norm": 0.400390625, "learning_rate": 4.846208869814021e-07, "loss": 2.1767, "step": 271 }, { "epoch": 0.014592274678111588, "grad_norm": 0.3515625, "learning_rate": 4.864091559370529e-07, "loss": 2.4629, "step": 272 }, { "epoch": 0.014645922746781117, "grad_norm": 0.337890625, "learning_rate": 4.881974248927039e-07, "loss": 2.5235, "step": 273 }, { "epoch": 0.014699570815450643, "grad_norm": 0.296875, "learning_rate": 4.899856938483549e-07, "loss": 2.3942, "step": 274 }, { "epoch": 0.014753218884120171, "grad_norm": 0.38671875, "learning_rate": 4.917739628040058e-07, "loss": 2.4046, "step": 275 }, { "epoch": 0.0148068669527897, "grad_norm": 0.31640625, "learning_rate": 4.935622317596567e-07, "loss": 2.1799, "step": 276 }, { "epoch": 0.014860515021459228, "grad_norm": 0.35546875, "learning_rate": 4.953505007153076e-07, "loss": 2.7221, "step": 277 }, { "epoch": 0.014914163090128755, "grad_norm": 0.58203125, "learning_rate": 4.971387696709585e-07, "loss": 2.7621, "step": 278 }, { "epoch": 0.014967811158798283, "grad_norm": 0.34765625, "learning_rate": 4.989270386266095e-07, "loss": 1.3594, "step": 279 }, { "epoch": 0.015021459227467811, "grad_norm": 0.318359375, "learning_rate": 5.007153075822605e-07, "loss": 2.6645, "step": 280 }, { "epoch": 0.01507510729613734, "grad_norm": 0.33984375, "learning_rate": 5.025035765379114e-07, "loss": 2.5015, "step": 281 }, { "epoch": 0.015128755364806868, "grad_norm": 0.341796875, "learning_rate": 5.042918454935622e-07, "loss": 2.5557, "step": 282 }, { "epoch": 0.015182403433476394, "grad_norm": 0.46484375, "learning_rate": 5.060801144492132e-07, "loss": 2.6156, "step": 283 }, { "epoch": 0.015236051502145923, "grad_norm": 0.357421875, "learning_rate": 5.078683834048642e-07, "loss": 2.3768, "step": 284 }, { "epoch": 0.01528969957081545, "grad_norm": 0.259765625, "learning_rate": 5.096566523605151e-07, "loss": 2.6167, "step": 285 }, { "epoch": 0.015343347639484979, "grad_norm": 0.83984375, "learning_rate": 5.114449213161659e-07, "loss": 2.3451, "step": 286 }, { "epoch": 0.015396995708154506, "grad_norm": 0.310546875, "learning_rate": 5.132331902718169e-07, "loss": 2.454, "step": 287 }, { "epoch": 0.015450643776824034, "grad_norm": 0.376953125, "learning_rate": 5.150214592274678e-07, "loss": 2.5051, "step": 288 }, { "epoch": 0.015504291845493562, "grad_norm": 0.3203125, "learning_rate": 5.168097281831188e-07, "loss": 2.411, "step": 289 }, { "epoch": 0.01555793991416309, "grad_norm": 0.4296875, "learning_rate": 5.185979971387698e-07, "loss": 2.4192, "step": 290 }, { "epoch": 0.015611587982832619, "grad_norm": 0.3984375, "learning_rate": 5.203862660944206e-07, "loss": 2.6243, "step": 291 }, { "epoch": 0.015665236051502145, "grad_norm": 0.390625, "learning_rate": 5.221745350500715e-07, "loss": 2.4333, "step": 292 }, { "epoch": 0.015718884120171674, "grad_norm": 0.392578125, "learning_rate": 5.239628040057225e-07, "loss": 2.5379, "step": 293 }, { "epoch": 0.015772532188841202, "grad_norm": 0.349609375, "learning_rate": 5.257510729613735e-07, "loss": 2.5397, "step": 294 }, { "epoch": 0.01582618025751073, "grad_norm": 0.37109375, "learning_rate": 5.275393419170244e-07, "loss": 2.7619, "step": 295 }, { "epoch": 0.01587982832618026, "grad_norm": 0.326171875, "learning_rate": 5.293276108726753e-07, "loss": 2.4961, "step": 296 }, { "epoch": 0.015933476394849787, "grad_norm": 0.4609375, "learning_rate": 5.311158798283262e-07, "loss": 2.3486, "step": 297 }, { "epoch": 0.01598712446351931, "grad_norm": 0.318359375, "learning_rate": 5.329041487839772e-07, "loss": 2.419, "step": 298 }, { "epoch": 0.01604077253218884, "grad_norm": 0.49609375, "learning_rate": 5.346924177396281e-07, "loss": 1.5741, "step": 299 }, { "epoch": 0.016094420600858368, "grad_norm": 0.337890625, "learning_rate": 5.36480686695279e-07, "loss": 2.4041, "step": 300 }, { "epoch": 0.016148068669527896, "grad_norm": 0.263671875, "learning_rate": 5.382689556509299e-07, "loss": 2.0668, "step": 301 }, { "epoch": 0.016201716738197425, "grad_norm": 0.349609375, "learning_rate": 5.400572246065808e-07, "loss": 2.4519, "step": 302 }, { "epoch": 0.016255364806866953, "grad_norm": 0.380859375, "learning_rate": 5.418454935622318e-07, "loss": 2.4274, "step": 303 }, { "epoch": 0.01630901287553648, "grad_norm": 0.373046875, "learning_rate": 5.436337625178828e-07, "loss": 2.4423, "step": 304 }, { "epoch": 0.01636266094420601, "grad_norm": 0.353515625, "learning_rate": 5.454220314735336e-07, "loss": 2.4688, "step": 305 }, { "epoch": 0.016416309012875538, "grad_norm": 0.271484375, "learning_rate": 5.472103004291846e-07, "loss": 2.4001, "step": 306 }, { "epoch": 0.016469957081545063, "grad_norm": 0.41015625, "learning_rate": 5.489985693848355e-07, "loss": 2.5308, "step": 307 }, { "epoch": 0.01652360515021459, "grad_norm": 0.3671875, "learning_rate": 5.507868383404865e-07, "loss": 2.442, "step": 308 }, { "epoch": 0.01657725321888412, "grad_norm": 0.373046875, "learning_rate": 5.525751072961374e-07, "loss": 2.654, "step": 309 }, { "epoch": 0.016630901287553648, "grad_norm": 0.349609375, "learning_rate": 5.543633762517883e-07, "loss": 2.5613, "step": 310 }, { "epoch": 0.016684549356223176, "grad_norm": 0.39453125, "learning_rate": 5.561516452074392e-07, "loss": 2.2215, "step": 311 }, { "epoch": 0.016738197424892704, "grad_norm": 0.314453125, "learning_rate": 5.579399141630902e-07, "loss": 2.4609, "step": 312 }, { "epoch": 0.016791845493562232, "grad_norm": 0.310546875, "learning_rate": 5.597281831187411e-07, "loss": 2.3655, "step": 313 }, { "epoch": 0.01684549356223176, "grad_norm": 0.458984375, "learning_rate": 5.615164520743921e-07, "loss": 2.6857, "step": 314 }, { "epoch": 0.01689914163090129, "grad_norm": 0.51171875, "learning_rate": 5.633047210300429e-07, "loss": 2.7834, "step": 315 }, { "epoch": 0.016952789699570814, "grad_norm": 0.3046875, "learning_rate": 5.650929899856939e-07, "loss": 2.479, "step": 316 }, { "epoch": 0.017006437768240342, "grad_norm": 0.318359375, "learning_rate": 5.668812589413449e-07, "loss": 2.0574, "step": 317 }, { "epoch": 0.01706008583690987, "grad_norm": 0.390625, "learning_rate": 5.686695278969958e-07, "loss": 2.4947, "step": 318 }, { "epoch": 0.0171137339055794, "grad_norm": 7.09375, "learning_rate": 5.704577968526466e-07, "loss": 2.4121, "step": 319 }, { "epoch": 0.017167381974248927, "grad_norm": 0.361328125, "learning_rate": 5.722460658082976e-07, "loss": 2.1336, "step": 320 }, { "epoch": 0.017221030042918455, "grad_norm": 0.3671875, "learning_rate": 5.740343347639486e-07, "loss": 2.4242, "step": 321 }, { "epoch": 0.017274678111587984, "grad_norm": 0.326171875, "learning_rate": 5.758226037195995e-07, "loss": 2.5378, "step": 322 }, { "epoch": 0.017328326180257512, "grad_norm": 0.376953125, "learning_rate": 5.776108726752504e-07, "loss": 1.8996, "step": 323 }, { "epoch": 0.01738197424892704, "grad_norm": 1.0078125, "learning_rate": 5.793991416309013e-07, "loss": 2.0291, "step": 324 }, { "epoch": 0.017435622317596565, "grad_norm": 0.404296875, "learning_rate": 5.811874105865522e-07, "loss": 2.4706, "step": 325 }, { "epoch": 0.017489270386266093, "grad_norm": 1.1328125, "learning_rate": 5.829756795422032e-07, "loss": 2.4258, "step": 326 }, { "epoch": 0.01754291845493562, "grad_norm": 0.51953125, "learning_rate": 5.847639484978542e-07, "loss": 2.1966, "step": 327 }, { "epoch": 0.01759656652360515, "grad_norm": 0.40234375, "learning_rate": 5.865522174535051e-07, "loss": 2.281, "step": 328 }, { "epoch": 0.017650214592274678, "grad_norm": 0.3046875, "learning_rate": 5.883404864091559e-07, "loss": 2.3281, "step": 329 }, { "epoch": 0.017703862660944206, "grad_norm": 0.27734375, "learning_rate": 5.901287553648069e-07, "loss": 2.729, "step": 330 }, { "epoch": 0.017757510729613735, "grad_norm": 4.0, "learning_rate": 5.919170243204579e-07, "loss": 2.273, "step": 331 }, { "epoch": 0.017811158798283263, "grad_norm": 0.314453125, "learning_rate": 5.937052932761088e-07, "loss": 2.4466, "step": 332 }, { "epoch": 0.01786480686695279, "grad_norm": 0.29296875, "learning_rate": 5.954935622317598e-07, "loss": 2.2985, "step": 333 }, { "epoch": 0.017918454935622316, "grad_norm": 0.439453125, "learning_rate": 5.972818311874106e-07, "loss": 2.7088, "step": 334 }, { "epoch": 0.017972103004291844, "grad_norm": 0.470703125, "learning_rate": 5.990701001430615e-07, "loss": 2.4683, "step": 335 }, { "epoch": 0.018025751072961373, "grad_norm": 0.46484375, "learning_rate": 6.008583690987125e-07, "loss": 2.7179, "step": 336 }, { "epoch": 0.0180793991416309, "grad_norm": 0.47265625, "learning_rate": 6.026466380543635e-07, "loss": 2.5419, "step": 337 }, { "epoch": 0.01813304721030043, "grad_norm": 0.41796875, "learning_rate": 6.044349070100143e-07, "loss": 2.4026, "step": 338 }, { "epoch": 0.018186695278969957, "grad_norm": 0.37890625, "learning_rate": 6.062231759656652e-07, "loss": 2.7435, "step": 339 }, { "epoch": 0.018240343347639486, "grad_norm": 6.0625, "learning_rate": 6.080114449213162e-07, "loss": 2.349, "step": 340 }, { "epoch": 0.018293991416309014, "grad_norm": 0.318359375, "learning_rate": 6.097997138769672e-07, "loss": 2.4787, "step": 341 }, { "epoch": 0.018347639484978542, "grad_norm": 0.310546875, "learning_rate": 6.115879828326181e-07, "loss": 2.2879, "step": 342 }, { "epoch": 0.018401287553648067, "grad_norm": 0.67578125, "learning_rate": 6.13376251788269e-07, "loss": 2.5408, "step": 343 }, { "epoch": 0.018454935622317595, "grad_norm": 0.416015625, "learning_rate": 6.151645207439199e-07, "loss": 2.3358, "step": 344 }, { "epoch": 0.018508583690987124, "grad_norm": 0.36328125, "learning_rate": 6.169527896995708e-07, "loss": 2.2695, "step": 345 }, { "epoch": 0.018562231759656652, "grad_norm": 0.44921875, "learning_rate": 6.187410586552218e-07, "loss": 2.1789, "step": 346 }, { "epoch": 0.01861587982832618, "grad_norm": 0.39453125, "learning_rate": 6.205293276108728e-07, "loss": 2.6383, "step": 347 }, { "epoch": 0.01866952789699571, "grad_norm": 0.291015625, "learning_rate": 6.223175965665236e-07, "loss": 2.2311, "step": 348 }, { "epoch": 0.018723175965665237, "grad_norm": 0.35546875, "learning_rate": 6.241058655221746e-07, "loss": 2.4769, "step": 349 }, { "epoch": 0.018776824034334765, "grad_norm": 0.259765625, "learning_rate": 6.258941344778255e-07, "loss": 2.528, "step": 350 }, { "epoch": 0.018830472103004293, "grad_norm": 0.59375, "learning_rate": 6.276824034334765e-07, "loss": 2.2412, "step": 351 }, { "epoch": 0.01888412017167382, "grad_norm": 0.373046875, "learning_rate": 6.294706723891273e-07, "loss": 2.5979, "step": 352 }, { "epoch": 0.018937768240343347, "grad_norm": 0.2734375, "learning_rate": 6.312589413447782e-07, "loss": 2.312, "step": 353 }, { "epoch": 0.018991416309012875, "grad_norm": 0.50390625, "learning_rate": 6.330472103004293e-07, "loss": 2.6484, "step": 354 }, { "epoch": 0.019045064377682403, "grad_norm": 0.27734375, "learning_rate": 6.348354792560802e-07, "loss": 2.5796, "step": 355 }, { "epoch": 0.01909871244635193, "grad_norm": 0.365234375, "learning_rate": 6.366237482117312e-07, "loss": 2.5359, "step": 356 }, { "epoch": 0.01915236051502146, "grad_norm": 0.3359375, "learning_rate": 6.38412017167382e-07, "loss": 2.4539, "step": 357 }, { "epoch": 0.019206008583690988, "grad_norm": 0.3828125, "learning_rate": 6.402002861230329e-07, "loss": 2.6497, "step": 358 }, { "epoch": 0.019259656652360516, "grad_norm": 0.365234375, "learning_rate": 6.419885550786839e-07, "loss": 2.5039, "step": 359 }, { "epoch": 0.019313304721030045, "grad_norm": 0.40625, "learning_rate": 6.437768240343348e-07, "loss": 2.562, "step": 360 }, { "epoch": 0.01936695278969957, "grad_norm": 0.41015625, "learning_rate": 6.455650929899858e-07, "loss": 2.432, "step": 361 }, { "epoch": 0.019420600858369098, "grad_norm": 0.88671875, "learning_rate": 6.473533619456366e-07, "loss": 2.4255, "step": 362 }, { "epoch": 0.019474248927038626, "grad_norm": 0.5078125, "learning_rate": 6.491416309012875e-07, "loss": 2.5831, "step": 363 }, { "epoch": 0.019527896995708154, "grad_norm": 0.40625, "learning_rate": 6.509298998569386e-07, "loss": 2.524, "step": 364 }, { "epoch": 0.019581545064377683, "grad_norm": 0.43359375, "learning_rate": 6.527181688125895e-07, "loss": 2.6172, "step": 365 }, { "epoch": 0.01963519313304721, "grad_norm": 0.53515625, "learning_rate": 6.545064377682405e-07, "loss": 2.3431, "step": 366 }, { "epoch": 0.01968884120171674, "grad_norm": 0.3984375, "learning_rate": 6.562947067238913e-07, "loss": 2.3687, "step": 367 }, { "epoch": 0.019742489270386267, "grad_norm": 1.1796875, "learning_rate": 6.580829756795423e-07, "loss": 2.2573, "step": 368 }, { "epoch": 0.019796137339055796, "grad_norm": 0.4765625, "learning_rate": 6.598712446351932e-07, "loss": 2.4733, "step": 369 }, { "epoch": 0.01984978540772532, "grad_norm": 0.310546875, "learning_rate": 6.616595135908441e-07, "loss": 2.6257, "step": 370 }, { "epoch": 0.01990343347639485, "grad_norm": 0.46875, "learning_rate": 6.63447782546495e-07, "loss": 2.3036, "step": 371 }, { "epoch": 0.019957081545064377, "grad_norm": 0.400390625, "learning_rate": 6.652360515021459e-07, "loss": 2.6102, "step": 372 }, { "epoch": 0.020010729613733905, "grad_norm": 0.3125, "learning_rate": 6.67024320457797e-07, "loss": 2.5883, "step": 373 }, { "epoch": 0.020064377682403434, "grad_norm": 0.91015625, "learning_rate": 6.688125894134479e-07, "loss": 2.6761, "step": 374 }, { "epoch": 0.020118025751072962, "grad_norm": 0.3046875, "learning_rate": 6.706008583690988e-07, "loss": 2.1163, "step": 375 }, { "epoch": 0.02017167381974249, "grad_norm": 0.333984375, "learning_rate": 6.723891273247497e-07, "loss": 2.4389, "step": 376 }, { "epoch": 0.02022532188841202, "grad_norm": 0.34375, "learning_rate": 6.741773962804006e-07, "loss": 2.6518, "step": 377 }, { "epoch": 0.020278969957081543, "grad_norm": 0.40234375, "learning_rate": 6.759656652360516e-07, "loss": 2.5494, "step": 378 }, { "epoch": 0.02033261802575107, "grad_norm": 0.291015625, "learning_rate": 6.777539341917025e-07, "loss": 2.3615, "step": 379 }, { "epoch": 0.0203862660944206, "grad_norm": 0.349609375, "learning_rate": 6.795422031473534e-07, "loss": 2.3903, "step": 380 }, { "epoch": 0.020439914163090128, "grad_norm": 0.390625, "learning_rate": 6.813304721030043e-07, "loss": 2.2546, "step": 381 }, { "epoch": 0.020493562231759656, "grad_norm": 0.34375, "learning_rate": 6.831187410586552e-07, "loss": 2.762, "step": 382 }, { "epoch": 0.020547210300429185, "grad_norm": 0.2890625, "learning_rate": 6.849070100143063e-07, "loss": 2.5368, "step": 383 }, { "epoch": 0.020600858369098713, "grad_norm": 0.318359375, "learning_rate": 6.866952789699572e-07, "loss": 2.6037, "step": 384 }, { "epoch": 0.02065450643776824, "grad_norm": 0.404296875, "learning_rate": 6.88483547925608e-07, "loss": 2.2548, "step": 385 }, { "epoch": 0.02070815450643777, "grad_norm": 0.43359375, "learning_rate": 6.90271816881259e-07, "loss": 2.4479, "step": 386 }, { "epoch": 0.020761802575107294, "grad_norm": 0.32421875, "learning_rate": 6.920600858369099e-07, "loss": 2.3025, "step": 387 }, { "epoch": 0.020815450643776823, "grad_norm": 0.54296875, "learning_rate": 6.938483547925609e-07, "loss": 2.5665, "step": 388 }, { "epoch": 0.02086909871244635, "grad_norm": 0.384765625, "learning_rate": 6.956366237482118e-07, "loss": 2.4038, "step": 389 }, { "epoch": 0.02092274678111588, "grad_norm": 0.275390625, "learning_rate": 6.974248927038626e-07, "loss": 2.4645, "step": 390 }, { "epoch": 0.020976394849785408, "grad_norm": 0.7109375, "learning_rate": 6.992131616595136e-07, "loss": 2.4951, "step": 391 }, { "epoch": 0.021030042918454936, "grad_norm": 0.61328125, "learning_rate": 7.010014306151645e-07, "loss": 2.6755, "step": 392 }, { "epoch": 0.021083690987124464, "grad_norm": 0.6953125, "learning_rate": 7.027896995708156e-07, "loss": 2.8145, "step": 393 }, { "epoch": 0.021137339055793992, "grad_norm": 1.1015625, "learning_rate": 7.045779685264665e-07, "loss": 2.4966, "step": 394 }, { "epoch": 0.02119098712446352, "grad_norm": 0.490234375, "learning_rate": 7.063662374821173e-07, "loss": 2.6325, "step": 395 }, { "epoch": 0.021244635193133046, "grad_norm": 4.625, "learning_rate": 7.081545064377683e-07, "loss": 2.4619, "step": 396 }, { "epoch": 0.021298283261802574, "grad_norm": 0.4453125, "learning_rate": 7.099427753934192e-07, "loss": 2.2704, "step": 397 }, { "epoch": 0.021351931330472102, "grad_norm": 0.482421875, "learning_rate": 7.117310443490702e-07, "loss": 2.3632, "step": 398 }, { "epoch": 0.02140557939914163, "grad_norm": 0.318359375, "learning_rate": 7.13519313304721e-07, "loss": 2.0946, "step": 399 }, { "epoch": 0.02145922746781116, "grad_norm": 0.35546875, "learning_rate": 7.15307582260372e-07, "loss": 2.6352, "step": 400 }, { "epoch": 0.021512875536480687, "grad_norm": 0.328125, "learning_rate": 7.170958512160229e-07, "loss": 2.5781, "step": 401 }, { "epoch": 0.021566523605150215, "grad_norm": 0.330078125, "learning_rate": 7.188841201716738e-07, "loss": 2.3473, "step": 402 }, { "epoch": 0.021620171673819744, "grad_norm": 0.314453125, "learning_rate": 7.206723891273249e-07, "loss": 2.575, "step": 403 }, { "epoch": 0.021673819742489272, "grad_norm": 0.54296875, "learning_rate": 7.224606580829757e-07, "loss": 2.1073, "step": 404 }, { "epoch": 0.021727467811158797, "grad_norm": 0.326171875, "learning_rate": 7.242489270386267e-07, "loss": 2.5382, "step": 405 }, { "epoch": 0.021781115879828325, "grad_norm": 0.357421875, "learning_rate": 7.260371959942776e-07, "loss": 2.3385, "step": 406 }, { "epoch": 0.021834763948497853, "grad_norm": 0.388671875, "learning_rate": 7.278254649499285e-07, "loss": 2.1501, "step": 407 }, { "epoch": 0.02188841201716738, "grad_norm": 0.26171875, "learning_rate": 7.296137339055795e-07, "loss": 2.3243, "step": 408 }, { "epoch": 0.02194206008583691, "grad_norm": 0.640625, "learning_rate": 7.314020028612303e-07, "loss": 2.1815, "step": 409 }, { "epoch": 0.021995708154506438, "grad_norm": 0.52734375, "learning_rate": 7.331902718168814e-07, "loss": 2.6204, "step": 410 }, { "epoch": 0.022049356223175966, "grad_norm": 0.34375, "learning_rate": 7.349785407725323e-07, "loss": 2.4681, "step": 411 }, { "epoch": 0.022103004291845495, "grad_norm": 0.375, "learning_rate": 7.367668097281831e-07, "loss": 2.7185, "step": 412 }, { "epoch": 0.022156652360515023, "grad_norm": 0.484375, "learning_rate": 7.385550786838342e-07, "loss": 2.4633, "step": 413 }, { "epoch": 0.022210300429184548, "grad_norm": 0.443359375, "learning_rate": 7.40343347639485e-07, "loss": 2.9364, "step": 414 }, { "epoch": 0.022263948497854076, "grad_norm": 1.546875, "learning_rate": 7.42131616595136e-07, "loss": 2.5495, "step": 415 }, { "epoch": 0.022317596566523604, "grad_norm": 0.55078125, "learning_rate": 7.439198855507869e-07, "loss": 2.6852, "step": 416 }, { "epoch": 0.022371244635193133, "grad_norm": 0.50390625, "learning_rate": 7.457081545064378e-07, "loss": 1.7639, "step": 417 }, { "epoch": 0.02242489270386266, "grad_norm": 0.279296875, "learning_rate": 7.474964234620888e-07, "loss": 2.199, "step": 418 }, { "epoch": 0.02247854077253219, "grad_norm": 0.337890625, "learning_rate": 7.492846924177396e-07, "loss": 2.4641, "step": 419 }, { "epoch": 0.022532188841201718, "grad_norm": 0.458984375, "learning_rate": 7.510729613733907e-07, "loss": 1.6108, "step": 420 }, { "epoch": 0.022585836909871246, "grad_norm": 0.30078125, "learning_rate": 7.528612303290416e-07, "loss": 2.4512, "step": 421 }, { "epoch": 0.022639484978540774, "grad_norm": 0.35546875, "learning_rate": 7.546494992846925e-07, "loss": 2.3345, "step": 422 }, { "epoch": 0.0226931330472103, "grad_norm": 0.439453125, "learning_rate": 7.564377682403434e-07, "loss": 2.576, "step": 423 }, { "epoch": 0.022746781115879827, "grad_norm": 0.55078125, "learning_rate": 7.582260371959943e-07, "loss": 2.4091, "step": 424 }, { "epoch": 0.022800429184549355, "grad_norm": 0.375, "learning_rate": 7.600143061516453e-07, "loss": 2.2566, "step": 425 }, { "epoch": 0.022854077253218884, "grad_norm": 0.3828125, "learning_rate": 7.618025751072962e-07, "loss": 2.4053, "step": 426 }, { "epoch": 0.022907725321888412, "grad_norm": 0.6484375, "learning_rate": 7.635908440629471e-07, "loss": 2.7371, "step": 427 }, { "epoch": 0.02296137339055794, "grad_norm": 0.328125, "learning_rate": 7.65379113018598e-07, "loss": 2.6196, "step": 428 }, { "epoch": 0.02301502145922747, "grad_norm": 0.43359375, "learning_rate": 7.671673819742489e-07, "loss": 2.4226, "step": 429 }, { "epoch": 0.023068669527896997, "grad_norm": 0.412109375, "learning_rate": 7.689556509299e-07, "loss": 2.5252, "step": 430 }, { "epoch": 0.023122317596566525, "grad_norm": 2.546875, "learning_rate": 7.707439198855509e-07, "loss": 2.2297, "step": 431 }, { "epoch": 0.02317596566523605, "grad_norm": 0.4453125, "learning_rate": 7.725321888412019e-07, "loss": 2.4877, "step": 432 }, { "epoch": 0.02322961373390558, "grad_norm": 0.408203125, "learning_rate": 7.743204577968527e-07, "loss": 2.5749, "step": 433 }, { "epoch": 0.023283261802575107, "grad_norm": 0.32421875, "learning_rate": 7.761087267525036e-07, "loss": 2.3846, "step": 434 }, { "epoch": 0.023336909871244635, "grad_norm": 1.15625, "learning_rate": 7.778969957081546e-07, "loss": 2.5232, "step": 435 }, { "epoch": 0.023390557939914163, "grad_norm": 0.271484375, "learning_rate": 7.796852646638055e-07, "loss": 2.4935, "step": 436 }, { "epoch": 0.02344420600858369, "grad_norm": 0.3359375, "learning_rate": 7.814735336194564e-07, "loss": 2.4845, "step": 437 }, { "epoch": 0.02349785407725322, "grad_norm": 1.1171875, "learning_rate": 7.832618025751073e-07, "loss": 2.6366, "step": 438 }, { "epoch": 0.023551502145922748, "grad_norm": 0.306640625, "learning_rate": 7.850500715307582e-07, "loss": 2.3498, "step": 439 }, { "epoch": 0.023605150214592276, "grad_norm": 0.416015625, "learning_rate": 7.868383404864093e-07, "loss": 2.5227, "step": 440 }, { "epoch": 0.0236587982832618, "grad_norm": 0.333984375, "learning_rate": 7.886266094420602e-07, "loss": 2.5374, "step": 441 }, { "epoch": 0.02371244635193133, "grad_norm": 0.322265625, "learning_rate": 7.904148783977111e-07, "loss": 2.3822, "step": 442 }, { "epoch": 0.023766094420600858, "grad_norm": 0.400390625, "learning_rate": 7.92203147353362e-07, "loss": 2.4555, "step": 443 }, { "epoch": 0.023819742489270386, "grad_norm": 0.4375, "learning_rate": 7.939914163090129e-07, "loss": 2.7176, "step": 444 }, { "epoch": 0.023873390557939914, "grad_norm": 0.498046875, "learning_rate": 7.957796852646639e-07, "loss": 1.7076, "step": 445 }, { "epoch": 0.023927038626609443, "grad_norm": 0.306640625, "learning_rate": 7.975679542203148e-07, "loss": 2.5388, "step": 446 }, { "epoch": 0.02398068669527897, "grad_norm": 0.34765625, "learning_rate": 7.993562231759657e-07, "loss": 2.4425, "step": 447 }, { "epoch": 0.0240343347639485, "grad_norm": 0.306640625, "learning_rate": 8.011444921316166e-07, "loss": 2.4235, "step": 448 }, { "epoch": 0.024087982832618027, "grad_norm": 0.376953125, "learning_rate": 8.029327610872675e-07, "loss": 2.1424, "step": 449 }, { "epoch": 0.024141630901287552, "grad_norm": 0.478515625, "learning_rate": 8.047210300429186e-07, "loss": 2.6018, "step": 450 }, { "epoch": 0.02419527896995708, "grad_norm": 0.439453125, "learning_rate": 8.065092989985695e-07, "loss": 2.6914, "step": 451 }, { "epoch": 0.02424892703862661, "grad_norm": 0.34375, "learning_rate": 8.082975679542204e-07, "loss": 2.7616, "step": 452 }, { "epoch": 0.024302575107296137, "grad_norm": 0.294921875, "learning_rate": 8.100858369098713e-07, "loss": 2.4871, "step": 453 }, { "epoch": 0.024356223175965665, "grad_norm": 0.796875, "learning_rate": 8.118741058655222e-07, "loss": 2.4732, "step": 454 }, { "epoch": 0.024409871244635194, "grad_norm": 0.3671875, "learning_rate": 8.136623748211732e-07, "loss": 2.5515, "step": 455 }, { "epoch": 0.024463519313304722, "grad_norm": 0.4375, "learning_rate": 8.15450643776824e-07, "loss": 2.2687, "step": 456 }, { "epoch": 0.02451716738197425, "grad_norm": 0.46484375, "learning_rate": 8.17238912732475e-07, "loss": 2.6547, "step": 457 }, { "epoch": 0.024570815450643775, "grad_norm": 1.0703125, "learning_rate": 8.190271816881259e-07, "loss": 2.6164, "step": 458 }, { "epoch": 0.024624463519313303, "grad_norm": 0.33203125, "learning_rate": 8.208154506437768e-07, "loss": 2.5067, "step": 459 }, { "epoch": 0.02467811158798283, "grad_norm": 0.353515625, "learning_rate": 8.226037195994279e-07, "loss": 2.486, "step": 460 }, { "epoch": 0.02473175965665236, "grad_norm": 0.5078125, "learning_rate": 8.243919885550787e-07, "loss": 2.3203, "step": 461 }, { "epoch": 0.024785407725321888, "grad_norm": 0.470703125, "learning_rate": 8.261802575107297e-07, "loss": 2.5493, "step": 462 }, { "epoch": 0.024839055793991417, "grad_norm": 0.37109375, "learning_rate": 8.279685264663806e-07, "loss": 2.4726, "step": 463 }, { "epoch": 0.024892703862660945, "grad_norm": 0.341796875, "learning_rate": 8.297567954220316e-07, "loss": 2.6852, "step": 464 }, { "epoch": 0.024946351931330473, "grad_norm": 0.380859375, "learning_rate": 8.315450643776825e-07, "loss": 2.1376, "step": 465 }, { "epoch": 0.025, "grad_norm": 0.546875, "learning_rate": 8.333333333333333e-07, "loss": 2.3662, "step": 466 }, { "epoch": 0.025053648068669526, "grad_norm": 0.421875, "learning_rate": 8.351216022889844e-07, "loss": 2.5358, "step": 467 }, { "epoch": 0.025107296137339054, "grad_norm": 0.34765625, "learning_rate": 8.369098712446352e-07, "loss": 2.5964, "step": 468 }, { "epoch": 0.025160944206008583, "grad_norm": 6.46875, "learning_rate": 8.386981402002863e-07, "loss": 2.3873, "step": 469 }, { "epoch": 0.02521459227467811, "grad_norm": 0.34375, "learning_rate": 8.404864091559372e-07, "loss": 2.4337, "step": 470 }, { "epoch": 0.02526824034334764, "grad_norm": 0.271484375, "learning_rate": 8.42274678111588e-07, "loss": 2.1511, "step": 471 }, { "epoch": 0.025321888412017168, "grad_norm": 0.376953125, "learning_rate": 8.44062947067239e-07, "loss": 2.2533, "step": 472 }, { "epoch": 0.025375536480686696, "grad_norm": 0.396484375, "learning_rate": 8.458512160228899e-07, "loss": 2.6373, "step": 473 }, { "epoch": 0.025429184549356224, "grad_norm": 0.51171875, "learning_rate": 8.476394849785409e-07, "loss": 2.3247, "step": 474 }, { "epoch": 0.025482832618025753, "grad_norm": 0.318359375, "learning_rate": 8.494277539341917e-07, "loss": 2.7036, "step": 475 }, { "epoch": 0.025536480686695277, "grad_norm": 0.443359375, "learning_rate": 8.512160228898426e-07, "loss": 2.5895, "step": 476 }, { "epoch": 0.025590128755364806, "grad_norm": 0.37890625, "learning_rate": 8.530042918454937e-07, "loss": 2.4512, "step": 477 }, { "epoch": 0.025643776824034334, "grad_norm": 4.34375, "learning_rate": 8.547925608011446e-07, "loss": 1.327, "step": 478 }, { "epoch": 0.025697424892703862, "grad_norm": 0.3359375, "learning_rate": 8.565808297567956e-07, "loss": 2.5544, "step": 479 }, { "epoch": 0.02575107296137339, "grad_norm": 0.392578125, "learning_rate": 8.583690987124464e-07, "loss": 2.4943, "step": 480 }, { "epoch": 0.02580472103004292, "grad_norm": 0.365234375, "learning_rate": 8.601573676680973e-07, "loss": 2.6054, "step": 481 }, { "epoch": 0.025858369098712447, "grad_norm": 0.3125, "learning_rate": 8.619456366237483e-07, "loss": 2.4367, "step": 482 }, { "epoch": 0.025912017167381975, "grad_norm": 0.478515625, "learning_rate": 8.637339055793992e-07, "loss": 2.4768, "step": 483 }, { "epoch": 0.025965665236051504, "grad_norm": 0.42578125, "learning_rate": 8.655221745350502e-07, "loss": 1.9497, "step": 484 }, { "epoch": 0.02601931330472103, "grad_norm": 0.33203125, "learning_rate": 8.67310443490701e-07, "loss": 2.6494, "step": 485 }, { "epoch": 0.026072961373390557, "grad_norm": 0.330078125, "learning_rate": 8.690987124463519e-07, "loss": 2.4921, "step": 486 }, { "epoch": 0.026126609442060085, "grad_norm": 0.3671875, "learning_rate": 8.70886981402003e-07, "loss": 2.4631, "step": 487 }, { "epoch": 0.026180257510729613, "grad_norm": 0.36328125, "learning_rate": 8.726752503576539e-07, "loss": 1.8175, "step": 488 }, { "epoch": 0.02623390557939914, "grad_norm": 0.515625, "learning_rate": 8.744635193133049e-07, "loss": 2.4221, "step": 489 }, { "epoch": 0.02628755364806867, "grad_norm": 0.3125, "learning_rate": 8.762517882689557e-07, "loss": 2.165, "step": 490 }, { "epoch": 0.026341201716738198, "grad_norm": 0.3359375, "learning_rate": 8.780400572246066e-07, "loss": 2.507, "step": 491 }, { "epoch": 0.026394849785407726, "grad_norm": 0.455078125, "learning_rate": 8.798283261802576e-07, "loss": 2.4094, "step": 492 }, { "epoch": 0.026448497854077255, "grad_norm": 0.322265625, "learning_rate": 8.816165951359085e-07, "loss": 2.543, "step": 493 }, { "epoch": 0.02650214592274678, "grad_norm": 0.37109375, "learning_rate": 8.834048640915594e-07, "loss": 2.3122, "step": 494 }, { "epoch": 0.026555793991416308, "grad_norm": 0.498046875, "learning_rate": 8.851931330472103e-07, "loss": 2.7045, "step": 495 }, { "epoch": 0.026609442060085836, "grad_norm": 0.3046875, "learning_rate": 8.869814020028614e-07, "loss": 2.4252, "step": 496 }, { "epoch": 0.026663090128755364, "grad_norm": 0.28125, "learning_rate": 8.887696709585123e-07, "loss": 2.7205, "step": 497 }, { "epoch": 0.026716738197424893, "grad_norm": 0.341796875, "learning_rate": 8.905579399141632e-07, "loss": 2.398, "step": 498 }, { "epoch": 0.02677038626609442, "grad_norm": 0.353515625, "learning_rate": 8.923462088698141e-07, "loss": 2.3684, "step": 499 }, { "epoch": 0.02682403433476395, "grad_norm": 0.341796875, "learning_rate": 8.94134477825465e-07, "loss": 2.509, "step": 500 }, { "epoch": 0.026877682403433478, "grad_norm": 0.328125, "learning_rate": 8.95922746781116e-07, "loss": 2.6755, "step": 501 }, { "epoch": 0.026931330472103006, "grad_norm": 0.283203125, "learning_rate": 8.977110157367669e-07, "loss": 2.4974, "step": 502 }, { "epoch": 0.02698497854077253, "grad_norm": 1.1484375, "learning_rate": 8.994992846924177e-07, "loss": 2.8117, "step": 503 }, { "epoch": 0.02703862660944206, "grad_norm": 0.38671875, "learning_rate": 9.012875536480687e-07, "loss": 2.3854, "step": 504 }, { "epoch": 0.027092274678111587, "grad_norm": 0.359375, "learning_rate": 9.030758226037196e-07, "loss": 2.3714, "step": 505 }, { "epoch": 0.027145922746781116, "grad_norm": 0.306640625, "learning_rate": 9.048640915593707e-07, "loss": 2.6873, "step": 506 }, { "epoch": 0.027199570815450644, "grad_norm": 0.33984375, "learning_rate": 9.066523605150216e-07, "loss": 2.3912, "step": 507 }, { "epoch": 0.027253218884120172, "grad_norm": 0.29296875, "learning_rate": 9.084406294706724e-07, "loss": 2.3976, "step": 508 }, { "epoch": 0.0273068669527897, "grad_norm": 0.298828125, "learning_rate": 9.102288984263234e-07, "loss": 2.4203, "step": 509 }, { "epoch": 0.02736051502145923, "grad_norm": 1.015625, "learning_rate": 9.120171673819743e-07, "loss": 2.5275, "step": 510 }, { "epoch": 0.027414163090128757, "grad_norm": 0.5078125, "learning_rate": 9.138054363376253e-07, "loss": 2.5554, "step": 511 }, { "epoch": 0.027467811158798282, "grad_norm": 0.59765625, "learning_rate": 9.155937052932762e-07, "loss": 1.7675, "step": 512 }, { "epoch": 0.02752145922746781, "grad_norm": 0.357421875, "learning_rate": 9.17381974248927e-07, "loss": 2.3573, "step": 513 }, { "epoch": 0.02757510729613734, "grad_norm": 0.33984375, "learning_rate": 9.19170243204578e-07, "loss": 2.3346, "step": 514 }, { "epoch": 0.027628755364806867, "grad_norm": 0.34765625, "learning_rate": 9.209585121602289e-07, "loss": 2.3726, "step": 515 }, { "epoch": 0.027682403433476395, "grad_norm": 0.42578125, "learning_rate": 9.2274678111588e-07, "loss": 2.3747, "step": 516 }, { "epoch": 0.027736051502145923, "grad_norm": 1.2421875, "learning_rate": 9.245350500715309e-07, "loss": 2.5336, "step": 517 }, { "epoch": 0.02778969957081545, "grad_norm": 0.435546875, "learning_rate": 9.263233190271817e-07, "loss": 2.3231, "step": 518 }, { "epoch": 0.02784334763948498, "grad_norm": 0.375, "learning_rate": 9.281115879828327e-07, "loss": 2.4974, "step": 519 }, { "epoch": 0.027896995708154508, "grad_norm": 0.36328125, "learning_rate": 9.298998569384836e-07, "loss": 2.4507, "step": 520 }, { "epoch": 0.027950643776824033, "grad_norm": 0.51953125, "learning_rate": 9.316881258941346e-07, "loss": 2.1741, "step": 521 }, { "epoch": 0.02800429184549356, "grad_norm": 0.337890625, "learning_rate": 9.334763948497854e-07, "loss": 2.6607, "step": 522 }, { "epoch": 0.02805793991416309, "grad_norm": 0.3671875, "learning_rate": 9.352646638054363e-07, "loss": 2.2669, "step": 523 }, { "epoch": 0.028111587982832618, "grad_norm": 0.4921875, "learning_rate": 9.370529327610874e-07, "loss": 2.4106, "step": 524 }, { "epoch": 0.028165236051502146, "grad_norm": 0.443359375, "learning_rate": 9.388412017167382e-07, "loss": 2.3468, "step": 525 }, { "epoch": 0.028218884120171674, "grad_norm": 0.462890625, "learning_rate": 9.406294706723893e-07, "loss": 2.2069, "step": 526 }, { "epoch": 0.028272532188841203, "grad_norm": 0.5625, "learning_rate": 9.424177396280401e-07, "loss": 1.9182, "step": 527 }, { "epoch": 0.02832618025751073, "grad_norm": 0.40625, "learning_rate": 9.442060085836911e-07, "loss": 2.412, "step": 528 }, { "epoch": 0.02837982832618026, "grad_norm": 0.5625, "learning_rate": 9.45994277539342e-07, "loss": 2.2071, "step": 529 }, { "epoch": 0.028433476394849784, "grad_norm": 0.357421875, "learning_rate": 9.477825464949929e-07, "loss": 1.9348, "step": 530 }, { "epoch": 0.028487124463519312, "grad_norm": 0.404296875, "learning_rate": 9.495708154506439e-07, "loss": 2.3738, "step": 531 }, { "epoch": 0.02854077253218884, "grad_norm": 0.41796875, "learning_rate": 9.513590844062947e-07, "loss": 2.5984, "step": 532 }, { "epoch": 0.02859442060085837, "grad_norm": 0.310546875, "learning_rate": 9.531473533619458e-07, "loss": 2.3606, "step": 533 }, { "epoch": 0.028648068669527897, "grad_norm": 0.4375, "learning_rate": 9.549356223175967e-07, "loss": 2.1681, "step": 534 }, { "epoch": 0.028701716738197425, "grad_norm": 0.33203125, "learning_rate": 9.567238912732476e-07, "loss": 1.9664, "step": 535 }, { "epoch": 0.028755364806866954, "grad_norm": 0.439453125, "learning_rate": 9.585121602288984e-07, "loss": 2.3031, "step": 536 }, { "epoch": 0.028809012875536482, "grad_norm": 0.318359375, "learning_rate": 9.603004291845493e-07, "loss": 2.6263, "step": 537 }, { "epoch": 0.02886266094420601, "grad_norm": 0.41796875, "learning_rate": 9.620886981402004e-07, "loss": 2.5548, "step": 538 }, { "epoch": 0.028916309012875535, "grad_norm": 0.310546875, "learning_rate": 9.638769670958513e-07, "loss": 2.4333, "step": 539 }, { "epoch": 0.028969957081545063, "grad_norm": 3.0625, "learning_rate": 9.656652360515022e-07, "loss": 2.4949, "step": 540 }, { "epoch": 0.02902360515021459, "grad_norm": 0.412109375, "learning_rate": 9.674535050071533e-07, "loss": 2.4585, "step": 541 }, { "epoch": 0.02907725321888412, "grad_norm": 0.306640625, "learning_rate": 9.692417739628041e-07, "loss": 2.4813, "step": 542 }, { "epoch": 0.02913090128755365, "grad_norm": 0.9921875, "learning_rate": 9.71030042918455e-07, "loss": 1.7287, "step": 543 }, { "epoch": 0.029184549356223177, "grad_norm": 0.474609375, "learning_rate": 9.728183118741059e-07, "loss": 2.4169, "step": 544 }, { "epoch": 0.029238197424892705, "grad_norm": 0.283203125, "learning_rate": 9.746065808297568e-07, "loss": 2.5388, "step": 545 }, { "epoch": 0.029291845493562233, "grad_norm": 0.373046875, "learning_rate": 9.763948497854078e-07, "loss": 2.5995, "step": 546 }, { "epoch": 0.029345493562231758, "grad_norm": 0.3359375, "learning_rate": 9.781831187410587e-07, "loss": 2.4117, "step": 547 }, { "epoch": 0.029399141630901286, "grad_norm": 0.5390625, "learning_rate": 9.799713876967098e-07, "loss": 2.4271, "step": 548 }, { "epoch": 0.029452789699570815, "grad_norm": 0.408203125, "learning_rate": 9.817596566523607e-07, "loss": 2.4392, "step": 549 }, { "epoch": 0.029506437768240343, "grad_norm": 0.369140625, "learning_rate": 9.835479256080116e-07, "loss": 2.1713, "step": 550 }, { "epoch": 0.02956008583690987, "grad_norm": 0.421875, "learning_rate": 9.853361945636624e-07, "loss": 2.2869, "step": 551 }, { "epoch": 0.0296137339055794, "grad_norm": 0.59765625, "learning_rate": 9.871244635193133e-07, "loss": 2.4478, "step": 552 }, { "epoch": 0.029667381974248928, "grad_norm": 0.337890625, "learning_rate": 9.889127324749644e-07, "loss": 2.3429, "step": 553 }, { "epoch": 0.029721030042918456, "grad_norm": 0.59375, "learning_rate": 9.907010014306153e-07, "loss": 2.4669, "step": 554 }, { "epoch": 0.029774678111587984, "grad_norm": 0.302734375, "learning_rate": 9.924892703862661e-07, "loss": 2.3334, "step": 555 }, { "epoch": 0.02982832618025751, "grad_norm": 0.318359375, "learning_rate": 9.94277539341917e-07, "loss": 2.376, "step": 556 }, { "epoch": 0.029881974248927037, "grad_norm": 0.330078125, "learning_rate": 9.96065808297568e-07, "loss": 2.5067, "step": 557 }, { "epoch": 0.029935622317596566, "grad_norm": 0.5390625, "learning_rate": 9.97854077253219e-07, "loss": 2.5734, "step": 558 }, { "epoch": 0.029989270386266094, "grad_norm": 0.345703125, "learning_rate": 9.996423462088699e-07, "loss": 2.5279, "step": 559 }, { "epoch": 0.030042918454935622, "grad_norm": 0.439453125, "learning_rate": 1.001430615164521e-06, "loss": 2.3779, "step": 560 }, { "epoch": 0.03009656652360515, "grad_norm": 0.373046875, "learning_rate": 1.0032188841201718e-06, "loss": 1.9072, "step": 561 }, { "epoch": 0.03015021459227468, "grad_norm": 0.7734375, "learning_rate": 1.0050071530758227e-06, "loss": 2.42, "step": 562 }, { "epoch": 0.030203862660944207, "grad_norm": 0.470703125, "learning_rate": 1.0067954220314736e-06, "loss": 2.6524, "step": 563 }, { "epoch": 0.030257510729613735, "grad_norm": 0.458984375, "learning_rate": 1.0085836909871245e-06, "loss": 2.6576, "step": 564 }, { "epoch": 0.03031115879828326, "grad_norm": 0.53125, "learning_rate": 1.0103719599427755e-06, "loss": 2.4042, "step": 565 }, { "epoch": 0.03036480686695279, "grad_norm": 0.337890625, "learning_rate": 1.0121602288984264e-06, "loss": 2.5033, "step": 566 }, { "epoch": 0.030418454935622317, "grad_norm": 0.326171875, "learning_rate": 1.0139484978540773e-06, "loss": 2.417, "step": 567 }, { "epoch": 0.030472103004291845, "grad_norm": 0.337890625, "learning_rate": 1.0157367668097284e-06, "loss": 2.2659, "step": 568 }, { "epoch": 0.030525751072961373, "grad_norm": 0.3046875, "learning_rate": 1.0175250357653793e-06, "loss": 2.525, "step": 569 }, { "epoch": 0.0305793991416309, "grad_norm": 0.396484375, "learning_rate": 1.0193133047210301e-06, "loss": 2.4183, "step": 570 }, { "epoch": 0.03063304721030043, "grad_norm": 0.408203125, "learning_rate": 1.021101573676681e-06, "loss": 2.2936, "step": 571 }, { "epoch": 0.030686695278969958, "grad_norm": 0.2890625, "learning_rate": 1.0228898426323319e-06, "loss": 2.3341, "step": 572 }, { "epoch": 0.030740343347639486, "grad_norm": 0.333984375, "learning_rate": 1.024678111587983e-06, "loss": 1.9359, "step": 573 }, { "epoch": 0.03079399141630901, "grad_norm": 0.51953125, "learning_rate": 1.0264663805436338e-06, "loss": 2.3845, "step": 574 }, { "epoch": 0.03084763948497854, "grad_norm": 0.369140625, "learning_rate": 1.0282546494992847e-06, "loss": 2.6345, "step": 575 }, { "epoch": 0.030901287553648068, "grad_norm": 0.3125, "learning_rate": 1.0300429184549356e-06, "loss": 2.3578, "step": 576 }, { "epoch": 0.030954935622317596, "grad_norm": 0.41796875, "learning_rate": 1.0318311874105865e-06, "loss": 2.3443, "step": 577 }, { "epoch": 0.031008583690987124, "grad_norm": 0.369140625, "learning_rate": 1.0336194563662376e-06, "loss": 2.5009, "step": 578 }, { "epoch": 0.031062231759656653, "grad_norm": 0.4140625, "learning_rate": 1.0354077253218884e-06, "loss": 2.5208, "step": 579 }, { "epoch": 0.03111587982832618, "grad_norm": 0.271484375, "learning_rate": 1.0371959942775395e-06, "loss": 2.4333, "step": 580 }, { "epoch": 0.03116952789699571, "grad_norm": 0.515625, "learning_rate": 1.0389842632331904e-06, "loss": 2.5238, "step": 581 }, { "epoch": 0.031223175965665238, "grad_norm": 0.30078125, "learning_rate": 1.0407725321888413e-06, "loss": 2.4288, "step": 582 }, { "epoch": 0.031276824034334766, "grad_norm": 0.57421875, "learning_rate": 1.0425608011444922e-06, "loss": 2.6612, "step": 583 }, { "epoch": 0.03133047210300429, "grad_norm": 0.392578125, "learning_rate": 1.044349070100143e-06, "loss": 2.5919, "step": 584 }, { "epoch": 0.03138412017167382, "grad_norm": 0.369140625, "learning_rate": 1.0461373390557941e-06, "loss": 2.377, "step": 585 }, { "epoch": 0.03143776824034335, "grad_norm": 0.41015625, "learning_rate": 1.047925608011445e-06, "loss": 2.3663, "step": 586 }, { "epoch": 0.03149141630901287, "grad_norm": 0.359375, "learning_rate": 1.0497138769670959e-06, "loss": 2.6805, "step": 587 }, { "epoch": 0.031545064377682404, "grad_norm": 2.296875, "learning_rate": 1.051502145922747e-06, "loss": 2.5538, "step": 588 }, { "epoch": 0.03159871244635193, "grad_norm": 0.4140625, "learning_rate": 1.0532904148783978e-06, "loss": 2.3455, "step": 589 }, { "epoch": 0.03165236051502146, "grad_norm": 0.322265625, "learning_rate": 1.0550786838340487e-06, "loss": 2.3263, "step": 590 }, { "epoch": 0.031706008583690985, "grad_norm": 0.47265625, "learning_rate": 1.0568669527896996e-06, "loss": 1.4262, "step": 591 }, { "epoch": 0.03175965665236052, "grad_norm": 0.333984375, "learning_rate": 1.0586552217453507e-06, "loss": 2.0717, "step": 592 }, { "epoch": 0.03181330472103004, "grad_norm": 0.359375, "learning_rate": 1.0604434907010015e-06, "loss": 2.4091, "step": 593 }, { "epoch": 0.031866952789699574, "grad_norm": 0.365234375, "learning_rate": 1.0622317596566524e-06, "loss": 2.1548, "step": 594 }, { "epoch": 0.0319206008583691, "grad_norm": 0.41796875, "learning_rate": 1.0640200286123035e-06, "loss": 2.3398, "step": 595 }, { "epoch": 0.03197424892703862, "grad_norm": 0.5078125, "learning_rate": 1.0658082975679544e-06, "loss": 2.4948, "step": 596 }, { "epoch": 0.032027896995708155, "grad_norm": 0.376953125, "learning_rate": 1.0675965665236053e-06, "loss": 2.4331, "step": 597 }, { "epoch": 0.03208154506437768, "grad_norm": 0.396484375, "learning_rate": 1.0693848354792561e-06, "loss": 2.4488, "step": 598 }, { "epoch": 0.03213519313304721, "grad_norm": 0.42578125, "learning_rate": 1.071173104434907e-06, "loss": 2.4702, "step": 599 }, { "epoch": 0.032188841201716736, "grad_norm": 0.333984375, "learning_rate": 1.072961373390558e-06, "loss": 1.7696, "step": 600 }, { "epoch": 0.03224248927038627, "grad_norm": 0.365234375, "learning_rate": 1.074749642346209e-06, "loss": 2.4332, "step": 601 }, { "epoch": 0.03229613733905579, "grad_norm": 0.451171875, "learning_rate": 1.0765379113018599e-06, "loss": 2.4186, "step": 602 }, { "epoch": 0.032349785407725325, "grad_norm": 0.5234375, "learning_rate": 1.0783261802575107e-06, "loss": 2.5228, "step": 603 }, { "epoch": 0.03240343347639485, "grad_norm": 0.5546875, "learning_rate": 1.0801144492131616e-06, "loss": 2.6045, "step": 604 }, { "epoch": 0.032457081545064374, "grad_norm": 0.302734375, "learning_rate": 1.0819027181688127e-06, "loss": 2.452, "step": 605 }, { "epoch": 0.032510729613733906, "grad_norm": 0.6015625, "learning_rate": 1.0836909871244636e-06, "loss": 2.4305, "step": 606 }, { "epoch": 0.03256437768240343, "grad_norm": 0.2734375, "learning_rate": 1.0854792560801147e-06, "loss": 2.3182, "step": 607 }, { "epoch": 0.03261802575107296, "grad_norm": 0.470703125, "learning_rate": 1.0872675250357655e-06, "loss": 1.7148, "step": 608 }, { "epoch": 0.03267167381974249, "grad_norm": 0.41796875, "learning_rate": 1.0890557939914164e-06, "loss": 2.6712, "step": 609 }, { "epoch": 0.03272532188841202, "grad_norm": 0.310546875, "learning_rate": 1.0908440629470673e-06, "loss": 2.4819, "step": 610 }, { "epoch": 0.032778969957081544, "grad_norm": 0.322265625, "learning_rate": 1.0926323319027182e-06, "loss": 2.2184, "step": 611 }, { "epoch": 0.032832618025751076, "grad_norm": 0.671875, "learning_rate": 1.0944206008583692e-06, "loss": 2.3071, "step": 612 }, { "epoch": 0.0328862660944206, "grad_norm": 0.455078125, "learning_rate": 1.0962088698140201e-06, "loss": 2.4986, "step": 613 }, { "epoch": 0.032939914163090125, "grad_norm": 0.419921875, "learning_rate": 1.097997138769671e-06, "loss": 2.5145, "step": 614 }, { "epoch": 0.03299356223175966, "grad_norm": 0.298828125, "learning_rate": 1.099785407725322e-06, "loss": 2.6306, "step": 615 }, { "epoch": 0.03304721030042918, "grad_norm": 0.330078125, "learning_rate": 1.101573676680973e-06, "loss": 2.5035, "step": 616 }, { "epoch": 0.033100858369098714, "grad_norm": 0.359375, "learning_rate": 1.1033619456366238e-06, "loss": 2.2657, "step": 617 }, { "epoch": 0.03315450643776824, "grad_norm": 0.50390625, "learning_rate": 1.1051502145922747e-06, "loss": 2.0746, "step": 618 }, { "epoch": 0.03320815450643777, "grad_norm": 0.25, "learning_rate": 1.1069384835479256e-06, "loss": 2.3618, "step": 619 }, { "epoch": 0.033261802575107295, "grad_norm": 0.408203125, "learning_rate": 1.1087267525035767e-06, "loss": 2.5551, "step": 620 }, { "epoch": 0.03331545064377683, "grad_norm": 0.373046875, "learning_rate": 1.1105150214592276e-06, "loss": 2.5902, "step": 621 }, { "epoch": 0.03336909871244635, "grad_norm": 0.53515625, "learning_rate": 1.1123032904148784e-06, "loss": 2.5069, "step": 622 }, { "epoch": 0.03342274678111588, "grad_norm": 0.353515625, "learning_rate": 1.1140915593705293e-06, "loss": 2.4267, "step": 623 }, { "epoch": 0.03347639484978541, "grad_norm": 0.55859375, "learning_rate": 1.1158798283261804e-06, "loss": 2.4193, "step": 624 }, { "epoch": 0.03353004291845493, "grad_norm": 0.337890625, "learning_rate": 1.1176680972818313e-06, "loss": 2.5159, "step": 625 }, { "epoch": 0.033583690987124465, "grad_norm": 0.431640625, "learning_rate": 1.1194563662374821e-06, "loss": 2.5467, "step": 626 }, { "epoch": 0.03363733905579399, "grad_norm": 0.439453125, "learning_rate": 1.1212446351931332e-06, "loss": 2.4166, "step": 627 }, { "epoch": 0.03369098712446352, "grad_norm": 0.75390625, "learning_rate": 1.1230329041487841e-06, "loss": 2.6872, "step": 628 }, { "epoch": 0.033744635193133046, "grad_norm": 0.5, "learning_rate": 1.124821173104435e-06, "loss": 2.4373, "step": 629 }, { "epoch": 0.03379828326180258, "grad_norm": 1.6953125, "learning_rate": 1.1266094420600859e-06, "loss": 2.338, "step": 630 }, { "epoch": 0.0338519313304721, "grad_norm": 0.35546875, "learning_rate": 1.1283977110157367e-06, "loss": 2.4845, "step": 631 }, { "epoch": 0.03390557939914163, "grad_norm": 0.5546875, "learning_rate": 1.1301859799713878e-06, "loss": 2.4222, "step": 632 }, { "epoch": 0.03395922746781116, "grad_norm": 0.431640625, "learning_rate": 1.1319742489270387e-06, "loss": 2.5177, "step": 633 }, { "epoch": 0.034012875536480684, "grad_norm": 0.3046875, "learning_rate": 1.1337625178826898e-06, "loss": 2.2976, "step": 634 }, { "epoch": 0.034066523605150216, "grad_norm": 0.984375, "learning_rate": 1.1355507868383407e-06, "loss": 2.3124, "step": 635 }, { "epoch": 0.03412017167381974, "grad_norm": 0.36328125, "learning_rate": 1.1373390557939915e-06, "loss": 2.3869, "step": 636 }, { "epoch": 0.03417381974248927, "grad_norm": 0.388671875, "learning_rate": 1.1391273247496424e-06, "loss": 2.7494, "step": 637 }, { "epoch": 0.0342274678111588, "grad_norm": 0.4140625, "learning_rate": 1.1409155937052933e-06, "loss": 2.3011, "step": 638 }, { "epoch": 0.03428111587982833, "grad_norm": 0.408203125, "learning_rate": 1.1427038626609444e-06, "loss": 2.4173, "step": 639 }, { "epoch": 0.034334763948497854, "grad_norm": 0.44921875, "learning_rate": 1.1444921316165953e-06, "loss": 2.4018, "step": 640 }, { "epoch": 0.03438841201716738, "grad_norm": 0.484375, "learning_rate": 1.1462804005722461e-06, "loss": 2.4956, "step": 641 }, { "epoch": 0.03444206008583691, "grad_norm": 0.5078125, "learning_rate": 1.1480686695278972e-06, "loss": 2.1901, "step": 642 }, { "epoch": 0.034495708154506435, "grad_norm": 0.359375, "learning_rate": 1.1498569384835479e-06, "loss": 2.3271, "step": 643 }, { "epoch": 0.03454935622317597, "grad_norm": 0.40625, "learning_rate": 1.151645207439199e-06, "loss": 2.6526, "step": 644 }, { "epoch": 0.03460300429184549, "grad_norm": 0.341796875, "learning_rate": 1.1534334763948498e-06, "loss": 2.5722, "step": 645 }, { "epoch": 0.034656652360515024, "grad_norm": 0.3203125, "learning_rate": 1.1552217453505007e-06, "loss": 2.3245, "step": 646 }, { "epoch": 0.03471030042918455, "grad_norm": 0.447265625, "learning_rate": 1.1570100143061518e-06, "loss": 2.3918, "step": 647 }, { "epoch": 0.03476394849785408, "grad_norm": 8.125, "learning_rate": 1.1587982832618027e-06, "loss": 2.543, "step": 648 }, { "epoch": 0.034817596566523605, "grad_norm": 0.4140625, "learning_rate": 1.1605865522174536e-06, "loss": 2.5161, "step": 649 }, { "epoch": 0.03487124463519313, "grad_norm": 0.55859375, "learning_rate": 1.1623748211731044e-06, "loss": 2.4413, "step": 650 }, { "epoch": 0.03492489270386266, "grad_norm": 0.494140625, "learning_rate": 1.1641630901287553e-06, "loss": 1.8642, "step": 651 }, { "epoch": 0.034978540772532186, "grad_norm": 0.35546875, "learning_rate": 1.1659513590844064e-06, "loss": 2.2716, "step": 652 }, { "epoch": 0.03503218884120172, "grad_norm": 0.72265625, "learning_rate": 1.1677396280400573e-06, "loss": 2.6355, "step": 653 }, { "epoch": 0.03508583690987124, "grad_norm": 0.462890625, "learning_rate": 1.1695278969957084e-06, "loss": 2.5791, "step": 654 }, { "epoch": 0.035139484978540775, "grad_norm": 0.361328125, "learning_rate": 1.1713161659513592e-06, "loss": 2.2059, "step": 655 }, { "epoch": 0.0351931330472103, "grad_norm": 0.35546875, "learning_rate": 1.1731044349070101e-06, "loss": 2.5133, "step": 656 }, { "epoch": 0.03524678111587983, "grad_norm": 0.30859375, "learning_rate": 1.174892703862661e-06, "loss": 2.5332, "step": 657 }, { "epoch": 0.035300429184549356, "grad_norm": 0.49609375, "learning_rate": 1.1766809728183119e-06, "loss": 2.5722, "step": 658 }, { "epoch": 0.03535407725321888, "grad_norm": 0.408203125, "learning_rate": 1.178469241773963e-06, "loss": 2.9663, "step": 659 }, { "epoch": 0.03540772532188841, "grad_norm": 0.30859375, "learning_rate": 1.1802575107296138e-06, "loss": 2.3916, "step": 660 }, { "epoch": 0.03546137339055794, "grad_norm": 0.78125, "learning_rate": 1.182045779685265e-06, "loss": 1.5143, "step": 661 }, { "epoch": 0.03551502145922747, "grad_norm": 0.400390625, "learning_rate": 1.1838340486409158e-06, "loss": 2.5112, "step": 662 }, { "epoch": 0.035568669527896994, "grad_norm": 0.388671875, "learning_rate": 1.1856223175965667e-06, "loss": 2.3137, "step": 663 }, { "epoch": 0.035622317596566526, "grad_norm": 0.4609375, "learning_rate": 1.1874105865522175e-06, "loss": 2.6819, "step": 664 }, { "epoch": 0.03567596566523605, "grad_norm": 0.3359375, "learning_rate": 1.1891988555078684e-06, "loss": 2.3629, "step": 665 }, { "epoch": 0.03572961373390558, "grad_norm": 0.3359375, "learning_rate": 1.1909871244635195e-06, "loss": 2.3997, "step": 666 }, { "epoch": 0.03578326180257511, "grad_norm": 1.90625, "learning_rate": 1.1927753934191704e-06, "loss": 2.6001, "step": 667 }, { "epoch": 0.03583690987124463, "grad_norm": 0.392578125, "learning_rate": 1.1945636623748213e-06, "loss": 2.6031, "step": 668 }, { "epoch": 0.035890557939914164, "grad_norm": 0.57421875, "learning_rate": 1.1963519313304721e-06, "loss": 2.6134, "step": 669 }, { "epoch": 0.03594420600858369, "grad_norm": 0.388671875, "learning_rate": 1.198140200286123e-06, "loss": 2.3309, "step": 670 }, { "epoch": 0.03599785407725322, "grad_norm": 0.333984375, "learning_rate": 1.199928469241774e-06, "loss": 2.4558, "step": 671 }, { "epoch": 0.036051502145922745, "grad_norm": 0.412109375, "learning_rate": 1.201716738197425e-06, "loss": 2.2724, "step": 672 }, { "epoch": 0.03610515021459228, "grad_norm": 0.34375, "learning_rate": 1.2035050071530758e-06, "loss": 2.4542, "step": 673 }, { "epoch": 0.0361587982832618, "grad_norm": 0.453125, "learning_rate": 1.205293276108727e-06, "loss": 2.6605, "step": 674 }, { "epoch": 0.036212446351931334, "grad_norm": 0.36328125, "learning_rate": 1.2070815450643778e-06, "loss": 2.3726, "step": 675 }, { "epoch": 0.03626609442060086, "grad_norm": 0.373046875, "learning_rate": 1.2088698140200287e-06, "loss": 2.3764, "step": 676 }, { "epoch": 0.03631974248927038, "grad_norm": 0.33984375, "learning_rate": 1.2106580829756796e-06, "loss": 2.5479, "step": 677 }, { "epoch": 0.036373390557939915, "grad_norm": 0.27734375, "learning_rate": 1.2124463519313304e-06, "loss": 2.7122, "step": 678 }, { "epoch": 0.03642703862660944, "grad_norm": 0.32421875, "learning_rate": 1.2142346208869815e-06, "loss": 2.6049, "step": 679 }, { "epoch": 0.03648068669527897, "grad_norm": 0.40234375, "learning_rate": 1.2160228898426324e-06, "loss": 2.3247, "step": 680 }, { "epoch": 0.036534334763948496, "grad_norm": 0.6171875, "learning_rate": 1.2178111587982835e-06, "loss": 2.465, "step": 681 }, { "epoch": 0.03658798283261803, "grad_norm": 0.388671875, "learning_rate": 1.2195994277539344e-06, "loss": 2.3235, "step": 682 }, { "epoch": 0.03664163090128755, "grad_norm": 0.337890625, "learning_rate": 1.2213876967095852e-06, "loss": 2.4169, "step": 683 }, { "epoch": 0.036695278969957085, "grad_norm": 0.279296875, "learning_rate": 1.2231759656652361e-06, "loss": 2.4014, "step": 684 }, { "epoch": 0.03674892703862661, "grad_norm": 0.55078125, "learning_rate": 1.224964234620887e-06, "loss": 2.5669, "step": 685 }, { "epoch": 0.036802575107296134, "grad_norm": 0.361328125, "learning_rate": 1.226752503576538e-06, "loss": 2.2072, "step": 686 }, { "epoch": 0.036856223175965666, "grad_norm": 0.3671875, "learning_rate": 1.228540772532189e-06, "loss": 2.4111, "step": 687 }, { "epoch": 0.03690987124463519, "grad_norm": 0.318359375, "learning_rate": 1.2303290414878398e-06, "loss": 2.1474, "step": 688 }, { "epoch": 0.03696351931330472, "grad_norm": 0.341796875, "learning_rate": 1.2321173104434907e-06, "loss": 2.4994, "step": 689 }, { "epoch": 0.03701716738197425, "grad_norm": 0.263671875, "learning_rate": 1.2339055793991416e-06, "loss": 2.1547, "step": 690 }, { "epoch": 0.03707081545064378, "grad_norm": 0.353515625, "learning_rate": 1.2356938483547927e-06, "loss": 2.5118, "step": 691 }, { "epoch": 0.037124463519313304, "grad_norm": 0.5546875, "learning_rate": 1.2374821173104435e-06, "loss": 2.5398, "step": 692 }, { "epoch": 0.037178111587982836, "grad_norm": 0.40234375, "learning_rate": 1.2392703862660946e-06, "loss": 2.3984, "step": 693 }, { "epoch": 0.03723175965665236, "grad_norm": 0.5703125, "learning_rate": 1.2410586552217455e-06, "loss": 2.6697, "step": 694 }, { "epoch": 0.037285407725321885, "grad_norm": 0.37109375, "learning_rate": 1.2428469241773964e-06, "loss": 2.4184, "step": 695 }, { "epoch": 0.03733905579399142, "grad_norm": 0.291015625, "learning_rate": 1.2446351931330473e-06, "loss": 2.317, "step": 696 }, { "epoch": 0.03739270386266094, "grad_norm": 0.56640625, "learning_rate": 1.2464234620886981e-06, "loss": 2.7187, "step": 697 }, { "epoch": 0.037446351931330474, "grad_norm": 0.400390625, "learning_rate": 1.2482117310443492e-06, "loss": 2.334, "step": 698 }, { "epoch": 0.0375, "grad_norm": 0.337890625, "learning_rate": 1.25e-06, "loss": 2.26, "step": 699 }, { "epoch": 0.03755364806866953, "grad_norm": 0.443359375, "learning_rate": 1.251788268955651e-06, "loss": 2.4786, "step": 700 }, { "epoch": 0.037607296137339055, "grad_norm": 0.72265625, "learning_rate": 1.2535765379113019e-06, "loss": 2.5206, "step": 701 }, { "epoch": 0.03766094420600859, "grad_norm": 0.326171875, "learning_rate": 1.255364806866953e-06, "loss": 2.6237, "step": 702 }, { "epoch": 0.03771459227467811, "grad_norm": 0.302734375, "learning_rate": 1.2571530758226038e-06, "loss": 2.6274, "step": 703 }, { "epoch": 0.03776824034334764, "grad_norm": 0.4140625, "learning_rate": 1.2589413447782547e-06, "loss": 2.5546, "step": 704 }, { "epoch": 0.03782188841201717, "grad_norm": 0.318359375, "learning_rate": 1.2607296137339056e-06, "loss": 2.6304, "step": 705 }, { "epoch": 0.03787553648068669, "grad_norm": 0.404296875, "learning_rate": 1.2625178826895564e-06, "loss": 2.7341, "step": 706 }, { "epoch": 0.037929184549356225, "grad_norm": 0.578125, "learning_rate": 1.2643061516452077e-06, "loss": 2.4848, "step": 707 }, { "epoch": 0.03798283261802575, "grad_norm": 1.8515625, "learning_rate": 1.2660944206008586e-06, "loss": 2.6051, "step": 708 }, { "epoch": 0.03803648068669528, "grad_norm": 0.359375, "learning_rate": 1.2678826895565095e-06, "loss": 2.5885, "step": 709 }, { "epoch": 0.038090128755364806, "grad_norm": 0.3828125, "learning_rate": 1.2696709585121604e-06, "loss": 2.443, "step": 710 }, { "epoch": 0.03814377682403434, "grad_norm": 1.1015625, "learning_rate": 1.271459227467811e-06, "loss": 2.2928, "step": 711 }, { "epoch": 0.03819742489270386, "grad_norm": 0.515625, "learning_rate": 1.2732474964234623e-06, "loss": 2.4642, "step": 712 }, { "epoch": 0.03825107296137339, "grad_norm": 0.3046875, "learning_rate": 1.2750357653791132e-06, "loss": 2.1973, "step": 713 }, { "epoch": 0.03830472103004292, "grad_norm": 0.376953125, "learning_rate": 1.276824034334764e-06, "loss": 2.5249, "step": 714 }, { "epoch": 0.038358369098712444, "grad_norm": 0.462890625, "learning_rate": 1.278612303290415e-06, "loss": 2.4157, "step": 715 }, { "epoch": 0.038412017167381976, "grad_norm": 0.4609375, "learning_rate": 1.2804005722460658e-06, "loss": 2.0761, "step": 716 }, { "epoch": 0.0384656652360515, "grad_norm": 2.65625, "learning_rate": 1.282188841201717e-06, "loss": 2.0861, "step": 717 }, { "epoch": 0.03851931330472103, "grad_norm": 0.40625, "learning_rate": 1.2839771101573678e-06, "loss": 2.3872, "step": 718 }, { "epoch": 0.03857296137339056, "grad_norm": 0.404296875, "learning_rate": 1.2857653791130187e-06, "loss": 2.3345, "step": 719 }, { "epoch": 0.03862660944206009, "grad_norm": 0.33984375, "learning_rate": 1.2875536480686696e-06, "loss": 2.2226, "step": 720 }, { "epoch": 0.038680257510729614, "grad_norm": 0.369140625, "learning_rate": 1.2893419170243204e-06, "loss": 2.4944, "step": 721 }, { "epoch": 0.03873390557939914, "grad_norm": 0.326171875, "learning_rate": 1.2911301859799715e-06, "loss": 2.6303, "step": 722 }, { "epoch": 0.03878755364806867, "grad_norm": 2.3125, "learning_rate": 1.2929184549356224e-06, "loss": 2.69, "step": 723 }, { "epoch": 0.038841201716738195, "grad_norm": 0.359375, "learning_rate": 1.2947067238912733e-06, "loss": 2.4801, "step": 724 }, { "epoch": 0.03889484978540773, "grad_norm": 0.57421875, "learning_rate": 1.2964949928469241e-06, "loss": 2.3558, "step": 725 }, { "epoch": 0.03894849785407725, "grad_norm": 0.287109375, "learning_rate": 1.298283261802575e-06, "loss": 2.4959, "step": 726 }, { "epoch": 0.039002145922746784, "grad_norm": 0.59765625, "learning_rate": 1.3000715307582263e-06, "loss": 2.482, "step": 727 }, { "epoch": 0.03905579399141631, "grad_norm": 0.322265625, "learning_rate": 1.3018597997138772e-06, "loss": 2.4073, "step": 728 }, { "epoch": 0.03910944206008584, "grad_norm": 0.412109375, "learning_rate": 1.303648068669528e-06, "loss": 2.225, "step": 729 }, { "epoch": 0.039163090128755365, "grad_norm": 0.3984375, "learning_rate": 1.305436337625179e-06, "loss": 2.3287, "step": 730 }, { "epoch": 0.03921673819742489, "grad_norm": 0.419921875, "learning_rate": 1.30722460658083e-06, "loss": 2.6098, "step": 731 }, { "epoch": 0.03927038626609442, "grad_norm": 0.322265625, "learning_rate": 1.309012875536481e-06, "loss": 2.4133, "step": 732 }, { "epoch": 0.039324034334763946, "grad_norm": 0.38671875, "learning_rate": 1.3108011444921318e-06, "loss": 2.3345, "step": 733 }, { "epoch": 0.03937768240343348, "grad_norm": 0.390625, "learning_rate": 1.3125894134477827e-06, "loss": 2.3745, "step": 734 }, { "epoch": 0.039431330472103, "grad_norm": 0.337890625, "learning_rate": 1.3143776824034335e-06, "loss": 2.4408, "step": 735 }, { "epoch": 0.039484978540772535, "grad_norm": 0.400390625, "learning_rate": 1.3161659513590846e-06, "loss": 2.5597, "step": 736 }, { "epoch": 0.03953862660944206, "grad_norm": 0.462890625, "learning_rate": 1.3179542203147355e-06, "loss": 2.6759, "step": 737 }, { "epoch": 0.03959227467811159, "grad_norm": 0.439453125, "learning_rate": 1.3197424892703864e-06, "loss": 1.5004, "step": 738 }, { "epoch": 0.039645922746781116, "grad_norm": 0.5078125, "learning_rate": 1.3215307582260373e-06, "loss": 1.9843, "step": 739 }, { "epoch": 0.03969957081545064, "grad_norm": 0.69921875, "learning_rate": 1.3233190271816881e-06, "loss": 2.5284, "step": 740 }, { "epoch": 0.03975321888412017, "grad_norm": 0.796875, "learning_rate": 1.3251072961373392e-06, "loss": 1.4086, "step": 741 }, { "epoch": 0.0398068669527897, "grad_norm": 0.3046875, "learning_rate": 1.32689556509299e-06, "loss": 2.4115, "step": 742 }, { "epoch": 0.03986051502145923, "grad_norm": 0.39453125, "learning_rate": 1.328683834048641e-06, "loss": 2.7345, "step": 743 }, { "epoch": 0.039914163090128754, "grad_norm": 0.35546875, "learning_rate": 1.3304721030042918e-06, "loss": 2.2906, "step": 744 }, { "epoch": 0.039967811158798286, "grad_norm": 0.328125, "learning_rate": 1.3322603719599427e-06, "loss": 2.2802, "step": 745 }, { "epoch": 0.04002145922746781, "grad_norm": 0.291015625, "learning_rate": 1.334048640915594e-06, "loss": 2.1479, "step": 746 }, { "epoch": 0.04007510729613734, "grad_norm": 0.390625, "learning_rate": 1.335836909871245e-06, "loss": 2.2092, "step": 747 }, { "epoch": 0.04012875536480687, "grad_norm": 0.39453125, "learning_rate": 1.3376251788268958e-06, "loss": 2.6033, "step": 748 }, { "epoch": 0.04018240343347639, "grad_norm": 0.38671875, "learning_rate": 1.3394134477825466e-06, "loss": 2.3872, "step": 749 }, { "epoch": 0.040236051502145924, "grad_norm": 0.3125, "learning_rate": 1.3412017167381975e-06, "loss": 2.3537, "step": 750 }, { "epoch": 0.04028969957081545, "grad_norm": 0.4296875, "learning_rate": 1.3429899856938486e-06, "loss": 2.5458, "step": 751 }, { "epoch": 0.04034334763948498, "grad_norm": 0.408203125, "learning_rate": 1.3447782546494995e-06, "loss": 2.1961, "step": 752 }, { "epoch": 0.040396995708154505, "grad_norm": 0.3671875, "learning_rate": 1.3465665236051504e-06, "loss": 2.4071, "step": 753 }, { "epoch": 0.04045064377682404, "grad_norm": 0.376953125, "learning_rate": 1.3483547925608012e-06, "loss": 2.5349, "step": 754 }, { "epoch": 0.04050429184549356, "grad_norm": 0.40234375, "learning_rate": 1.3501430615164521e-06, "loss": 2.155, "step": 755 }, { "epoch": 0.04055793991416309, "grad_norm": 0.28125, "learning_rate": 1.3519313304721032e-06, "loss": 2.5522, "step": 756 }, { "epoch": 0.04061158798283262, "grad_norm": 0.328125, "learning_rate": 1.353719599427754e-06, "loss": 2.3316, "step": 757 }, { "epoch": 0.04066523605150214, "grad_norm": 0.462890625, "learning_rate": 1.355507868383405e-06, "loss": 2.5888, "step": 758 }, { "epoch": 0.040718884120171675, "grad_norm": 0.349609375, "learning_rate": 1.3572961373390558e-06, "loss": 2.4959, "step": 759 }, { "epoch": 0.0407725321888412, "grad_norm": 0.353515625, "learning_rate": 1.3590844062947067e-06, "loss": 2.259, "step": 760 }, { "epoch": 0.04082618025751073, "grad_norm": 0.361328125, "learning_rate": 1.3608726752503578e-06, "loss": 2.2827, "step": 761 }, { "epoch": 0.040879828326180256, "grad_norm": 1.3671875, "learning_rate": 1.3626609442060087e-06, "loss": 2.4127, "step": 762 }, { "epoch": 0.04093347639484979, "grad_norm": 0.359375, "learning_rate": 1.3644492131616595e-06, "loss": 2.3973, "step": 763 }, { "epoch": 0.04098712446351931, "grad_norm": 0.396484375, "learning_rate": 1.3662374821173104e-06, "loss": 2.5341, "step": 764 }, { "epoch": 0.04104077253218884, "grad_norm": 0.77734375, "learning_rate": 1.3680257510729613e-06, "loss": 2.1937, "step": 765 }, { "epoch": 0.04109442060085837, "grad_norm": 0.443359375, "learning_rate": 1.3698140200286126e-06, "loss": 2.3396, "step": 766 }, { "epoch": 0.041148068669527894, "grad_norm": 0.365234375, "learning_rate": 1.3716022889842635e-06, "loss": 1.9297, "step": 767 }, { "epoch": 0.041201716738197426, "grad_norm": 0.408203125, "learning_rate": 1.3733905579399143e-06, "loss": 2.4015, "step": 768 }, { "epoch": 0.04125536480686695, "grad_norm": 0.40234375, "learning_rate": 1.3751788268955652e-06, "loss": 2.3264, "step": 769 }, { "epoch": 0.04130901287553648, "grad_norm": 0.51953125, "learning_rate": 1.376967095851216e-06, "loss": 2.3259, "step": 770 }, { "epoch": 0.04136266094420601, "grad_norm": 0.349609375, "learning_rate": 1.3787553648068672e-06, "loss": 2.6022, "step": 771 }, { "epoch": 0.04141630901287554, "grad_norm": 0.3359375, "learning_rate": 1.380543633762518e-06, "loss": 2.4966, "step": 772 }, { "epoch": 0.041469957081545064, "grad_norm": 0.54296875, "learning_rate": 1.382331902718169e-06, "loss": 2.3775, "step": 773 }, { "epoch": 0.04152360515021459, "grad_norm": 0.3515625, "learning_rate": 1.3841201716738198e-06, "loss": 2.5992, "step": 774 }, { "epoch": 0.04157725321888412, "grad_norm": 0.435546875, "learning_rate": 1.3859084406294707e-06, "loss": 2.688, "step": 775 }, { "epoch": 0.041630901287553645, "grad_norm": 0.369140625, "learning_rate": 1.3876967095851218e-06, "loss": 2.5141, "step": 776 }, { "epoch": 0.04168454935622318, "grad_norm": 0.32421875, "learning_rate": 1.3894849785407727e-06, "loss": 2.4136, "step": 777 }, { "epoch": 0.0417381974248927, "grad_norm": 1.3359375, "learning_rate": 1.3912732474964235e-06, "loss": 2.3696, "step": 778 }, { "epoch": 0.041791845493562234, "grad_norm": 0.419921875, "learning_rate": 1.3930615164520744e-06, "loss": 2.5719, "step": 779 }, { "epoch": 0.04184549356223176, "grad_norm": 0.28515625, "learning_rate": 1.3948497854077253e-06, "loss": 2.5019, "step": 780 }, { "epoch": 0.04189914163090129, "grad_norm": 0.388671875, "learning_rate": 1.3966380543633764e-06, "loss": 2.4958, "step": 781 }, { "epoch": 0.041952789699570815, "grad_norm": 0.318359375, "learning_rate": 1.3984263233190272e-06, "loss": 2.5139, "step": 782 }, { "epoch": 0.04200643776824034, "grad_norm": 0.490234375, "learning_rate": 1.4002145922746781e-06, "loss": 2.2762, "step": 783 }, { "epoch": 0.04206008583690987, "grad_norm": 0.431640625, "learning_rate": 1.402002861230329e-06, "loss": 2.0563, "step": 784 }, { "epoch": 0.0421137339055794, "grad_norm": 0.244140625, "learning_rate": 1.4037911301859799e-06, "loss": 2.9708, "step": 785 }, { "epoch": 0.04216738197424893, "grad_norm": 0.375, "learning_rate": 1.4055793991416312e-06, "loss": 2.4161, "step": 786 }, { "epoch": 0.04222103004291845, "grad_norm": 0.36328125, "learning_rate": 1.407367668097282e-06, "loss": 2.2735, "step": 787 }, { "epoch": 0.042274678111587985, "grad_norm": 0.40625, "learning_rate": 1.409155937052933e-06, "loss": 2.6733, "step": 788 }, { "epoch": 0.04232832618025751, "grad_norm": 0.5625, "learning_rate": 1.4109442060085838e-06, "loss": 2.3567, "step": 789 }, { "epoch": 0.04238197424892704, "grad_norm": 0.447265625, "learning_rate": 1.4127324749642347e-06, "loss": 2.7518, "step": 790 }, { "epoch": 0.042435622317596566, "grad_norm": 0.4453125, "learning_rate": 1.4145207439198858e-06, "loss": 2.4696, "step": 791 }, { "epoch": 0.04248927038626609, "grad_norm": 0.62890625, "learning_rate": 1.4163090128755366e-06, "loss": 2.4709, "step": 792 }, { "epoch": 0.04254291845493562, "grad_norm": 0.474609375, "learning_rate": 1.4180972818311875e-06, "loss": 2.2495, "step": 793 }, { "epoch": 0.04259656652360515, "grad_norm": 0.423828125, "learning_rate": 1.4198855507868384e-06, "loss": 2.5038, "step": 794 }, { "epoch": 0.04265021459227468, "grad_norm": 0.318359375, "learning_rate": 1.4216738197424895e-06, "loss": 2.377, "step": 795 }, { "epoch": 0.042703862660944204, "grad_norm": 0.765625, "learning_rate": 1.4234620886981404e-06, "loss": 2.5334, "step": 796 }, { "epoch": 0.042757510729613736, "grad_norm": 0.291015625, "learning_rate": 1.4252503576537912e-06, "loss": 2.7178, "step": 797 }, { "epoch": 0.04281115879828326, "grad_norm": 0.41015625, "learning_rate": 1.427038626609442e-06, "loss": 2.5246, "step": 798 }, { "epoch": 0.04286480686695279, "grad_norm": 0.35546875, "learning_rate": 1.428826895565093e-06, "loss": 2.4296, "step": 799 }, { "epoch": 0.04291845493562232, "grad_norm": 0.3125, "learning_rate": 1.430615164520744e-06, "loss": 2.3586, "step": 800 }, { "epoch": 0.04297210300429184, "grad_norm": 0.32421875, "learning_rate": 1.432403433476395e-06, "loss": 2.4828, "step": 801 }, { "epoch": 0.043025751072961374, "grad_norm": 0.451171875, "learning_rate": 1.4341917024320458e-06, "loss": 2.53, "step": 802 }, { "epoch": 0.0430793991416309, "grad_norm": 0.35546875, "learning_rate": 1.4359799713876967e-06, "loss": 2.502, "step": 803 }, { "epoch": 0.04313304721030043, "grad_norm": 0.3203125, "learning_rate": 1.4377682403433476e-06, "loss": 2.3363, "step": 804 }, { "epoch": 0.043186695278969955, "grad_norm": 0.431640625, "learning_rate": 1.4395565092989989e-06, "loss": 2.6009, "step": 805 }, { "epoch": 0.04324034334763949, "grad_norm": 0.63671875, "learning_rate": 1.4413447782546497e-06, "loss": 2.6981, "step": 806 }, { "epoch": 0.04329399141630901, "grad_norm": 0.64453125, "learning_rate": 1.4431330472103006e-06, "loss": 2.6242, "step": 807 }, { "epoch": 0.043347639484978544, "grad_norm": 0.330078125, "learning_rate": 1.4449213161659515e-06, "loss": 2.4446, "step": 808 }, { "epoch": 0.04340128755364807, "grad_norm": 0.33203125, "learning_rate": 1.4467095851216024e-06, "loss": 2.4934, "step": 809 }, { "epoch": 0.04345493562231759, "grad_norm": 0.34375, "learning_rate": 1.4484978540772535e-06, "loss": 2.3758, "step": 810 }, { "epoch": 0.043508583690987125, "grad_norm": 0.43359375, "learning_rate": 1.4502861230329043e-06, "loss": 2.493, "step": 811 }, { "epoch": 0.04356223175965665, "grad_norm": 0.390625, "learning_rate": 1.4520743919885552e-06, "loss": 2.5109, "step": 812 }, { "epoch": 0.04361587982832618, "grad_norm": 0.314453125, "learning_rate": 1.453862660944206e-06, "loss": 2.4121, "step": 813 }, { "epoch": 0.043669527896995707, "grad_norm": 0.3671875, "learning_rate": 1.455650929899857e-06, "loss": 1.6285, "step": 814 }, { "epoch": 0.04372317596566524, "grad_norm": 0.30859375, "learning_rate": 1.457439198855508e-06, "loss": 2.5189, "step": 815 }, { "epoch": 0.04377682403433476, "grad_norm": 0.359375, "learning_rate": 1.459227467811159e-06, "loss": 2.416, "step": 816 }, { "epoch": 0.043830472103004295, "grad_norm": 0.3671875, "learning_rate": 1.4610157367668098e-06, "loss": 2.4916, "step": 817 }, { "epoch": 0.04388412017167382, "grad_norm": 0.361328125, "learning_rate": 1.4628040057224607e-06, "loss": 2.301, "step": 818 }, { "epoch": 0.043937768240343344, "grad_norm": 0.47265625, "learning_rate": 1.4645922746781116e-06, "loss": 2.2703, "step": 819 }, { "epoch": 0.043991416309012876, "grad_norm": 1.40625, "learning_rate": 1.4663805436337629e-06, "loss": 2.6941, "step": 820 }, { "epoch": 0.0440450643776824, "grad_norm": 0.421875, "learning_rate": 1.4681688125894137e-06, "loss": 2.6363, "step": 821 }, { "epoch": 0.04409871244635193, "grad_norm": 0.396484375, "learning_rate": 1.4699570815450646e-06, "loss": 2.6261, "step": 822 }, { "epoch": 0.04415236051502146, "grad_norm": 0.640625, "learning_rate": 1.4717453505007155e-06, "loss": 2.3352, "step": 823 }, { "epoch": 0.04420600858369099, "grad_norm": 0.3359375, "learning_rate": 1.4735336194563661e-06, "loss": 2.479, "step": 824 }, { "epoch": 0.044259656652360514, "grad_norm": 0.35546875, "learning_rate": 1.4753218884120174e-06, "loss": 2.5648, "step": 825 }, { "epoch": 0.044313304721030046, "grad_norm": 0.470703125, "learning_rate": 1.4771101573676683e-06, "loss": 2.411, "step": 826 }, { "epoch": 0.04436695278969957, "grad_norm": 0.275390625, "learning_rate": 1.4788984263233192e-06, "loss": 2.8137, "step": 827 }, { "epoch": 0.044420600858369096, "grad_norm": 0.482421875, "learning_rate": 1.48068669527897e-06, "loss": 2.2456, "step": 828 }, { "epoch": 0.04447424892703863, "grad_norm": 0.400390625, "learning_rate": 1.482474964234621e-06, "loss": 2.4765, "step": 829 }, { "epoch": 0.04452789699570815, "grad_norm": 0.447265625, "learning_rate": 1.484263233190272e-06, "loss": 2.3816, "step": 830 }, { "epoch": 0.044581545064377684, "grad_norm": 0.32421875, "learning_rate": 1.486051502145923e-06, "loss": 2.3729, "step": 831 }, { "epoch": 0.04463519313304721, "grad_norm": 0.3125, "learning_rate": 1.4878397711015738e-06, "loss": 2.3915, "step": 832 }, { "epoch": 0.04468884120171674, "grad_norm": 0.400390625, "learning_rate": 1.4896280400572247e-06, "loss": 2.3064, "step": 833 }, { "epoch": 0.044742489270386265, "grad_norm": 0.5703125, "learning_rate": 1.4914163090128755e-06, "loss": 2.6276, "step": 834 }, { "epoch": 0.0447961373390558, "grad_norm": 0.5625, "learning_rate": 1.4932045779685266e-06, "loss": 2.6878, "step": 835 }, { "epoch": 0.04484978540772532, "grad_norm": 0.470703125, "learning_rate": 1.4949928469241775e-06, "loss": 2.4363, "step": 836 }, { "epoch": 0.04490343347639485, "grad_norm": 0.408203125, "learning_rate": 1.4967811158798284e-06, "loss": 2.3689, "step": 837 }, { "epoch": 0.04495708154506438, "grad_norm": 0.451171875, "learning_rate": 1.4985693848354793e-06, "loss": 2.2237, "step": 838 }, { "epoch": 0.0450107296137339, "grad_norm": 0.48046875, "learning_rate": 1.5003576537911301e-06, "loss": 2.4521, "step": 839 }, { "epoch": 0.045064377682403435, "grad_norm": 0.76171875, "learning_rate": 1.5021459227467814e-06, "loss": 2.3089, "step": 840 }, { "epoch": 0.04511802575107296, "grad_norm": 0.3671875, "learning_rate": 1.5039341917024323e-06, "loss": 2.4181, "step": 841 }, { "epoch": 0.04517167381974249, "grad_norm": 0.375, "learning_rate": 1.5057224606580832e-06, "loss": 2.5944, "step": 842 }, { "epoch": 0.045225321888412016, "grad_norm": 0.486328125, "learning_rate": 1.507510729613734e-06, "loss": 2.4615, "step": 843 }, { "epoch": 0.04527896995708155, "grad_norm": 0.421875, "learning_rate": 1.509298998569385e-06, "loss": 2.2305, "step": 844 }, { "epoch": 0.04533261802575107, "grad_norm": 0.3671875, "learning_rate": 1.511087267525036e-06, "loss": 2.54, "step": 845 }, { "epoch": 0.0453862660944206, "grad_norm": 0.353515625, "learning_rate": 1.512875536480687e-06, "loss": 2.7056, "step": 846 }, { "epoch": 0.04543991416309013, "grad_norm": 0.328125, "learning_rate": 1.5146638054363378e-06, "loss": 2.3568, "step": 847 }, { "epoch": 0.045493562231759654, "grad_norm": 0.421875, "learning_rate": 1.5164520743919886e-06, "loss": 2.4975, "step": 848 }, { "epoch": 0.045547210300429186, "grad_norm": 0.357421875, "learning_rate": 1.5182403433476395e-06, "loss": 2.5089, "step": 849 }, { "epoch": 0.04560085836909871, "grad_norm": 0.5234375, "learning_rate": 1.5200286123032906e-06, "loss": 2.3455, "step": 850 }, { "epoch": 0.04565450643776824, "grad_norm": 0.375, "learning_rate": 1.5218168812589415e-06, "loss": 2.4103, "step": 851 }, { "epoch": 0.04570815450643777, "grad_norm": 0.427734375, "learning_rate": 1.5236051502145924e-06, "loss": 2.4878, "step": 852 }, { "epoch": 0.0457618025751073, "grad_norm": 0.69921875, "learning_rate": 1.5253934191702432e-06, "loss": 1.5303, "step": 853 }, { "epoch": 0.045815450643776824, "grad_norm": 0.486328125, "learning_rate": 1.5271816881258941e-06, "loss": 2.4561, "step": 854 }, { "epoch": 0.04586909871244635, "grad_norm": 0.578125, "learning_rate": 1.5289699570815452e-06, "loss": 2.5645, "step": 855 }, { "epoch": 0.04592274678111588, "grad_norm": 0.4453125, "learning_rate": 1.530758226037196e-06, "loss": 2.4688, "step": 856 }, { "epoch": 0.045976394849785406, "grad_norm": 0.447265625, "learning_rate": 1.532546494992847e-06, "loss": 2.4702, "step": 857 }, { "epoch": 0.04603004291845494, "grad_norm": 0.310546875, "learning_rate": 1.5343347639484978e-06, "loss": 2.3451, "step": 858 }, { "epoch": 0.04608369098712446, "grad_norm": 0.314453125, "learning_rate": 1.5361230329041491e-06, "loss": 2.3625, "step": 859 }, { "epoch": 0.046137339055793994, "grad_norm": 0.33203125, "learning_rate": 1.5379113018598e-06, "loss": 2.6092, "step": 860 }, { "epoch": 0.04619098712446352, "grad_norm": 0.3203125, "learning_rate": 1.5396995708154509e-06, "loss": 2.4964, "step": 861 }, { "epoch": 0.04624463519313305, "grad_norm": 0.36328125, "learning_rate": 1.5414878397711018e-06, "loss": 2.5576, "step": 862 }, { "epoch": 0.046298283261802575, "grad_norm": 0.423828125, "learning_rate": 1.5432761087267526e-06, "loss": 2.1949, "step": 863 }, { "epoch": 0.0463519313304721, "grad_norm": 0.349609375, "learning_rate": 1.5450643776824037e-06, "loss": 2.4155, "step": 864 }, { "epoch": 0.04640557939914163, "grad_norm": 0.4296875, "learning_rate": 1.5468526466380546e-06, "loss": 2.5361, "step": 865 }, { "epoch": 0.04645922746781116, "grad_norm": 0.2890625, "learning_rate": 1.5486409155937055e-06, "loss": 2.3784, "step": 866 }, { "epoch": 0.04651287553648069, "grad_norm": 0.333984375, "learning_rate": 1.5504291845493563e-06, "loss": 2.4383, "step": 867 }, { "epoch": 0.04656652360515021, "grad_norm": 0.322265625, "learning_rate": 1.5522174535050072e-06, "loss": 2.2894, "step": 868 }, { "epoch": 0.046620171673819745, "grad_norm": 0.44921875, "learning_rate": 1.5540057224606583e-06, "loss": 2.534, "step": 869 }, { "epoch": 0.04667381974248927, "grad_norm": 0.359375, "learning_rate": 1.5557939914163092e-06, "loss": 2.1384, "step": 870 }, { "epoch": 0.0467274678111588, "grad_norm": 0.345703125, "learning_rate": 1.55758226037196e-06, "loss": 2.3173, "step": 871 }, { "epoch": 0.046781115879828326, "grad_norm": 0.296875, "learning_rate": 1.559370529327611e-06, "loss": 2.4922, "step": 872 }, { "epoch": 0.04683476394849785, "grad_norm": 0.419921875, "learning_rate": 1.5611587982832618e-06, "loss": 2.47, "step": 873 }, { "epoch": 0.04688841201716738, "grad_norm": 0.42578125, "learning_rate": 1.562947067238913e-06, "loss": 2.1809, "step": 874 }, { "epoch": 0.04694206008583691, "grad_norm": 0.365234375, "learning_rate": 1.5647353361945638e-06, "loss": 2.5304, "step": 875 }, { "epoch": 0.04699570815450644, "grad_norm": 0.345703125, "learning_rate": 1.5665236051502147e-06, "loss": 2.4909, "step": 876 }, { "epoch": 0.047049356223175964, "grad_norm": 0.61328125, "learning_rate": 1.5683118741058655e-06, "loss": 2.4724, "step": 877 }, { "epoch": 0.047103004291845496, "grad_norm": 0.38671875, "learning_rate": 1.5701001430615164e-06, "loss": 2.4594, "step": 878 }, { "epoch": 0.04715665236051502, "grad_norm": 0.39453125, "learning_rate": 1.5718884120171677e-06, "loss": 2.3539, "step": 879 }, { "epoch": 0.04721030042918455, "grad_norm": 0.439453125, "learning_rate": 1.5736766809728186e-06, "loss": 2.1959, "step": 880 }, { "epoch": 0.04726394849785408, "grad_norm": 0.2734375, "learning_rate": 1.5754649499284695e-06, "loss": 2.2715, "step": 881 }, { "epoch": 0.0473175965665236, "grad_norm": 0.84375, "learning_rate": 1.5772532188841203e-06, "loss": 2.4584, "step": 882 }, { "epoch": 0.047371244635193134, "grad_norm": 0.3984375, "learning_rate": 1.5790414878397712e-06, "loss": 2.4965, "step": 883 }, { "epoch": 0.04742489270386266, "grad_norm": 0.447265625, "learning_rate": 1.5808297567954223e-06, "loss": 2.4237, "step": 884 }, { "epoch": 0.04747854077253219, "grad_norm": 0.482421875, "learning_rate": 1.5826180257510732e-06, "loss": 2.5613, "step": 885 }, { "epoch": 0.047532188841201715, "grad_norm": 0.419921875, "learning_rate": 1.584406294706724e-06, "loss": 2.383, "step": 886 }, { "epoch": 0.04758583690987125, "grad_norm": 0.3671875, "learning_rate": 1.586194563662375e-06, "loss": 2.474, "step": 887 }, { "epoch": 0.04763948497854077, "grad_norm": 0.33984375, "learning_rate": 1.5879828326180258e-06, "loss": 2.6701, "step": 888 }, { "epoch": 0.047693133047210304, "grad_norm": 0.7265625, "learning_rate": 1.5897711015736769e-06, "loss": 2.6443, "step": 889 }, { "epoch": 0.04774678111587983, "grad_norm": 0.470703125, "learning_rate": 1.5915593705293278e-06, "loss": 2.6659, "step": 890 }, { "epoch": 0.04780042918454935, "grad_norm": 0.310546875, "learning_rate": 1.5933476394849786e-06, "loss": 2.3637, "step": 891 }, { "epoch": 0.047854077253218885, "grad_norm": 0.515625, "learning_rate": 1.5951359084406295e-06, "loss": 2.5664, "step": 892 }, { "epoch": 0.04790772532188841, "grad_norm": 0.26953125, "learning_rate": 1.5969241773962804e-06, "loss": 2.533, "step": 893 }, { "epoch": 0.04796137339055794, "grad_norm": 0.357421875, "learning_rate": 1.5987124463519315e-06, "loss": 2.4087, "step": 894 }, { "epoch": 0.04801502145922747, "grad_norm": 0.326171875, "learning_rate": 1.6005007153075824e-06, "loss": 2.5388, "step": 895 }, { "epoch": 0.048068669527897, "grad_norm": 0.302734375, "learning_rate": 1.6022889842632332e-06, "loss": 2.4701, "step": 896 }, { "epoch": 0.04812231759656652, "grad_norm": 0.376953125, "learning_rate": 1.604077253218884e-06, "loss": 2.5579, "step": 897 }, { "epoch": 0.048175965665236055, "grad_norm": 0.25, "learning_rate": 1.605865522174535e-06, "loss": 2.3166, "step": 898 }, { "epoch": 0.04822961373390558, "grad_norm": 0.58203125, "learning_rate": 1.6076537911301863e-06, "loss": 2.5638, "step": 899 }, { "epoch": 0.048283261802575105, "grad_norm": 0.26953125, "learning_rate": 1.6094420600858372e-06, "loss": 2.3247, "step": 900 }, { "epoch": 0.048336909871244636, "grad_norm": 0.443359375, "learning_rate": 1.611230329041488e-06, "loss": 2.5444, "step": 901 }, { "epoch": 0.04839055793991416, "grad_norm": 0.41015625, "learning_rate": 1.613018597997139e-06, "loss": 2.4365, "step": 902 }, { "epoch": 0.04844420600858369, "grad_norm": 0.298828125, "learning_rate": 1.6148068669527898e-06, "loss": 2.3902, "step": 903 }, { "epoch": 0.04849785407725322, "grad_norm": 0.37109375, "learning_rate": 1.6165951359084409e-06, "loss": 2.3938, "step": 904 }, { "epoch": 0.04855150214592275, "grad_norm": 0.396484375, "learning_rate": 1.6183834048640917e-06, "loss": 2.383, "step": 905 }, { "epoch": 0.048605150214592274, "grad_norm": 0.306640625, "learning_rate": 1.6201716738197426e-06, "loss": 2.2779, "step": 906 }, { "epoch": 0.048658798283261806, "grad_norm": 0.357421875, "learning_rate": 1.6219599427753935e-06, "loss": 2.3873, "step": 907 }, { "epoch": 0.04871244635193133, "grad_norm": 0.302734375, "learning_rate": 1.6237482117310444e-06, "loss": 2.5644, "step": 908 }, { "epoch": 0.048766094420600856, "grad_norm": 0.451171875, "learning_rate": 1.6255364806866955e-06, "loss": 2.3583, "step": 909 }, { "epoch": 0.04881974248927039, "grad_norm": 0.55078125, "learning_rate": 1.6273247496423463e-06, "loss": 2.094, "step": 910 }, { "epoch": 0.04887339055793991, "grad_norm": 0.337890625, "learning_rate": 1.6291130185979972e-06, "loss": 2.0487, "step": 911 }, { "epoch": 0.048927038626609444, "grad_norm": 0.28515625, "learning_rate": 1.630901287553648e-06, "loss": 2.4457, "step": 912 }, { "epoch": 0.04898068669527897, "grad_norm": 0.51171875, "learning_rate": 1.632689556509299e-06, "loss": 2.4113, "step": 913 }, { "epoch": 0.0490343347639485, "grad_norm": 0.65234375, "learning_rate": 1.63447782546495e-06, "loss": 2.4289, "step": 914 }, { "epoch": 0.049087982832618025, "grad_norm": 1.6640625, "learning_rate": 1.636266094420601e-06, "loss": 2.414, "step": 915 }, { "epoch": 0.04914163090128755, "grad_norm": 0.333984375, "learning_rate": 1.6380543633762518e-06, "loss": 2.4622, "step": 916 }, { "epoch": 0.04919527896995708, "grad_norm": 0.369140625, "learning_rate": 1.6398426323319027e-06, "loss": 2.5566, "step": 917 }, { "epoch": 0.04924892703862661, "grad_norm": 0.361328125, "learning_rate": 1.6416309012875536e-06, "loss": 2.6255, "step": 918 }, { "epoch": 0.04930257510729614, "grad_norm": 0.341796875, "learning_rate": 1.6434191702432049e-06, "loss": 2.3611, "step": 919 }, { "epoch": 0.04935622317596566, "grad_norm": 0.392578125, "learning_rate": 1.6452074391988557e-06, "loss": 2.1584, "step": 920 }, { "epoch": 0.049409871244635195, "grad_norm": 0.470703125, "learning_rate": 1.6469957081545066e-06, "loss": 2.3039, "step": 921 }, { "epoch": 0.04946351931330472, "grad_norm": 0.404296875, "learning_rate": 1.6487839771101575e-06, "loss": 2.0669, "step": 922 }, { "epoch": 0.04951716738197425, "grad_norm": 0.279296875, "learning_rate": 1.6505722460658086e-06, "loss": 2.3928, "step": 923 }, { "epoch": 0.049570815450643776, "grad_norm": 0.43359375, "learning_rate": 1.6523605150214594e-06, "loss": 2.4045, "step": 924 }, { "epoch": 0.0496244635193133, "grad_norm": 0.392578125, "learning_rate": 1.6541487839771103e-06, "loss": 1.9758, "step": 925 }, { "epoch": 0.04967811158798283, "grad_norm": 0.33984375, "learning_rate": 1.6559370529327612e-06, "loss": 2.2327, "step": 926 }, { "epoch": 0.04973175965665236, "grad_norm": 0.37890625, "learning_rate": 1.657725321888412e-06, "loss": 2.3561, "step": 927 }, { "epoch": 0.04978540772532189, "grad_norm": 0.33203125, "learning_rate": 1.6595135908440632e-06, "loss": 2.4657, "step": 928 }, { "epoch": 0.049839055793991414, "grad_norm": 0.349609375, "learning_rate": 1.661301859799714e-06, "loss": 2.3788, "step": 929 }, { "epoch": 0.049892703862660946, "grad_norm": 0.41796875, "learning_rate": 1.663090128755365e-06, "loss": 2.604, "step": 930 }, { "epoch": 0.04994635193133047, "grad_norm": 0.6640625, "learning_rate": 1.6648783977110158e-06, "loss": 2.4638, "step": 931 }, { "epoch": 0.05, "grad_norm": 0.453125, "learning_rate": 1.6666666666666667e-06, "loss": 2.456, "step": 932 }, { "epoch": 0.05005364806866953, "grad_norm": 0.57421875, "learning_rate": 1.668454935622318e-06, "loss": 2.3991, "step": 933 }, { "epoch": 0.05010729613733905, "grad_norm": 0.4140625, "learning_rate": 1.6702432045779688e-06, "loss": 1.7884, "step": 934 }, { "epoch": 0.050160944206008584, "grad_norm": 0.88671875, "learning_rate": 1.6720314735336197e-06, "loss": 2.6017, "step": 935 }, { "epoch": 0.05021459227467811, "grad_norm": 0.322265625, "learning_rate": 1.6738197424892704e-06, "loss": 2.4886, "step": 936 }, { "epoch": 0.05026824034334764, "grad_norm": 0.484375, "learning_rate": 1.6756080114449213e-06, "loss": 2.6471, "step": 937 }, { "epoch": 0.050321888412017166, "grad_norm": 0.80859375, "learning_rate": 1.6773962804005726e-06, "loss": 2.4132, "step": 938 }, { "epoch": 0.0503755364806867, "grad_norm": 0.51953125, "learning_rate": 1.6791845493562234e-06, "loss": 1.9285, "step": 939 }, { "epoch": 0.05042918454935622, "grad_norm": 0.376953125, "learning_rate": 1.6809728183118743e-06, "loss": 2.2931, "step": 940 }, { "epoch": 0.050482832618025754, "grad_norm": 0.58203125, "learning_rate": 1.6827610872675252e-06, "loss": 2.5463, "step": 941 }, { "epoch": 0.05053648068669528, "grad_norm": 0.40234375, "learning_rate": 1.684549356223176e-06, "loss": 2.621, "step": 942 }, { "epoch": 0.050590128755364804, "grad_norm": 0.28125, "learning_rate": 1.6863376251788271e-06, "loss": 2.2759, "step": 943 }, { "epoch": 0.050643776824034335, "grad_norm": 0.380859375, "learning_rate": 1.688125894134478e-06, "loss": 2.5368, "step": 944 }, { "epoch": 0.05069742489270386, "grad_norm": 0.37109375, "learning_rate": 1.689914163090129e-06, "loss": 2.4487, "step": 945 }, { "epoch": 0.05075107296137339, "grad_norm": 0.296875, "learning_rate": 1.6917024320457798e-06, "loss": 2.407, "step": 946 }, { "epoch": 0.05080472103004292, "grad_norm": 0.546875, "learning_rate": 1.6934907010014306e-06, "loss": 2.1775, "step": 947 }, { "epoch": 0.05085836909871245, "grad_norm": 0.578125, "learning_rate": 1.6952789699570817e-06, "loss": 2.2645, "step": 948 }, { "epoch": 0.05091201716738197, "grad_norm": 0.2734375, "learning_rate": 1.6970672389127326e-06, "loss": 2.3912, "step": 949 }, { "epoch": 0.050965665236051505, "grad_norm": 0.3359375, "learning_rate": 1.6988555078683835e-06, "loss": 2.4095, "step": 950 }, { "epoch": 0.05101931330472103, "grad_norm": 0.41015625, "learning_rate": 1.7006437768240344e-06, "loss": 2.4562, "step": 951 }, { "epoch": 0.051072961373390555, "grad_norm": 0.6015625, "learning_rate": 1.7024320457796852e-06, "loss": 2.4764, "step": 952 }, { "epoch": 0.051126609442060086, "grad_norm": 0.54296875, "learning_rate": 1.7042203147353365e-06, "loss": 2.3143, "step": 953 }, { "epoch": 0.05118025751072961, "grad_norm": 0.376953125, "learning_rate": 1.7060085836909874e-06, "loss": 2.4838, "step": 954 }, { "epoch": 0.05123390557939914, "grad_norm": 0.86328125, "learning_rate": 1.7077968526466383e-06, "loss": 2.3901, "step": 955 }, { "epoch": 0.05128755364806867, "grad_norm": 0.23828125, "learning_rate": 1.7095851216022892e-06, "loss": 2.4918, "step": 956 }, { "epoch": 0.0513412017167382, "grad_norm": 0.26953125, "learning_rate": 1.71137339055794e-06, "loss": 2.4895, "step": 957 }, { "epoch": 0.051394849785407724, "grad_norm": 0.54296875, "learning_rate": 1.7131616595135911e-06, "loss": 2.5146, "step": 958 }, { "epoch": 0.051448497854077256, "grad_norm": 0.369140625, "learning_rate": 1.714949928469242e-06, "loss": 2.472, "step": 959 }, { "epoch": 0.05150214592274678, "grad_norm": 0.48046875, "learning_rate": 1.7167381974248929e-06, "loss": 2.4624, "step": 960 }, { "epoch": 0.051555793991416306, "grad_norm": 0.38671875, "learning_rate": 1.7185264663805438e-06, "loss": 2.4335, "step": 961 }, { "epoch": 0.05160944206008584, "grad_norm": 0.3125, "learning_rate": 1.7203147353361946e-06, "loss": 2.059, "step": 962 }, { "epoch": 0.05166309012875536, "grad_norm": 0.271484375, "learning_rate": 1.7221030042918457e-06, "loss": 2.4688, "step": 963 }, { "epoch": 0.051716738197424894, "grad_norm": 0.255859375, "learning_rate": 1.7238912732474966e-06, "loss": 2.1237, "step": 964 }, { "epoch": 0.05177038626609442, "grad_norm": 0.27734375, "learning_rate": 1.7256795422031475e-06, "loss": 2.3123, "step": 965 }, { "epoch": 0.05182403433476395, "grad_norm": 0.32421875, "learning_rate": 1.7274678111587983e-06, "loss": 2.1579, "step": 966 }, { "epoch": 0.051877682403433475, "grad_norm": 0.3125, "learning_rate": 1.7292560801144492e-06, "loss": 2.2758, "step": 967 }, { "epoch": 0.05193133047210301, "grad_norm": 0.419921875, "learning_rate": 1.7310443490701003e-06, "loss": 2.4724, "step": 968 }, { "epoch": 0.05198497854077253, "grad_norm": 0.443359375, "learning_rate": 1.7328326180257512e-06, "loss": 2.5202, "step": 969 }, { "epoch": 0.05203862660944206, "grad_norm": 0.40234375, "learning_rate": 1.734620886981402e-06, "loss": 1.9985, "step": 970 }, { "epoch": 0.05209227467811159, "grad_norm": 0.3046875, "learning_rate": 1.736409155937053e-06, "loss": 2.7304, "step": 971 }, { "epoch": 0.05214592274678111, "grad_norm": 0.287109375, "learning_rate": 1.7381974248927038e-06, "loss": 2.0823, "step": 972 }, { "epoch": 0.052199570815450645, "grad_norm": 0.33203125, "learning_rate": 1.7399856938483551e-06, "loss": 2.6022, "step": 973 }, { "epoch": 0.05225321888412017, "grad_norm": 0.34765625, "learning_rate": 1.741773962804006e-06, "loss": 2.1868, "step": 974 }, { "epoch": 0.0523068669527897, "grad_norm": 0.50390625, "learning_rate": 1.7435622317596569e-06, "loss": 2.7772, "step": 975 }, { "epoch": 0.05236051502145923, "grad_norm": 2.59375, "learning_rate": 1.7453505007153077e-06, "loss": 2.3234, "step": 976 }, { "epoch": 0.05241416309012876, "grad_norm": 0.365234375, "learning_rate": 1.7471387696709586e-06, "loss": 2.3316, "step": 977 }, { "epoch": 0.05246781115879828, "grad_norm": 0.306640625, "learning_rate": 1.7489270386266097e-06, "loss": 2.5826, "step": 978 }, { "epoch": 0.05252145922746781, "grad_norm": 0.310546875, "learning_rate": 1.7507153075822606e-06, "loss": 2.4483, "step": 979 }, { "epoch": 0.05257510729613734, "grad_norm": 0.369140625, "learning_rate": 1.7525035765379115e-06, "loss": 2.4893, "step": 980 }, { "epoch": 0.052628755364806865, "grad_norm": 0.33984375, "learning_rate": 1.7542918454935623e-06, "loss": 2.5281, "step": 981 }, { "epoch": 0.052682403433476396, "grad_norm": 0.310546875, "learning_rate": 1.7560801144492132e-06, "loss": 2.3841, "step": 982 }, { "epoch": 0.05273605150214592, "grad_norm": 0.384765625, "learning_rate": 1.7578683834048643e-06, "loss": 2.3863, "step": 983 }, { "epoch": 0.05278969957081545, "grad_norm": 0.28515625, "learning_rate": 1.7596566523605152e-06, "loss": 2.4567, "step": 984 }, { "epoch": 0.05284334763948498, "grad_norm": 0.484375, "learning_rate": 1.761444921316166e-06, "loss": 2.6365, "step": 985 }, { "epoch": 0.05289699570815451, "grad_norm": 0.474609375, "learning_rate": 1.763233190271817e-06, "loss": 2.6337, "step": 986 }, { "epoch": 0.052950643776824034, "grad_norm": 0.396484375, "learning_rate": 1.765021459227468e-06, "loss": 2.5241, "step": 987 }, { "epoch": 0.05300429184549356, "grad_norm": 0.455078125, "learning_rate": 1.7668097281831189e-06, "loss": 2.0958, "step": 988 }, { "epoch": 0.05305793991416309, "grad_norm": 0.333984375, "learning_rate": 1.7685979971387698e-06, "loss": 2.6487, "step": 989 }, { "epoch": 0.053111587982832616, "grad_norm": 0.77734375, "learning_rate": 1.7703862660944206e-06, "loss": 2.4158, "step": 990 }, { "epoch": 0.05316523605150215, "grad_norm": 0.396484375, "learning_rate": 1.7721745350500715e-06, "loss": 2.5518, "step": 991 }, { "epoch": 0.05321888412017167, "grad_norm": 0.3515625, "learning_rate": 1.7739628040057228e-06, "loss": 2.4129, "step": 992 }, { "epoch": 0.053272532188841204, "grad_norm": 0.43359375, "learning_rate": 1.7757510729613737e-06, "loss": 2.3807, "step": 993 }, { "epoch": 0.05332618025751073, "grad_norm": 0.490234375, "learning_rate": 1.7775393419170246e-06, "loss": 2.1954, "step": 994 }, { "epoch": 0.05337982832618026, "grad_norm": 0.369140625, "learning_rate": 1.7793276108726754e-06, "loss": 2.3352, "step": 995 }, { "epoch": 0.053433476394849785, "grad_norm": 0.337890625, "learning_rate": 1.7811158798283263e-06, "loss": 2.049, "step": 996 }, { "epoch": 0.05348712446351931, "grad_norm": 0.26171875, "learning_rate": 1.7829041487839774e-06, "loss": 2.2232, "step": 997 }, { "epoch": 0.05354077253218884, "grad_norm": 0.55859375, "learning_rate": 1.7846924177396283e-06, "loss": 2.9451, "step": 998 }, { "epoch": 0.05359442060085837, "grad_norm": 0.451171875, "learning_rate": 1.7864806866952792e-06, "loss": 2.3928, "step": 999 }, { "epoch": 0.0536480686695279, "grad_norm": 0.384765625, "learning_rate": 1.78826895565093e-06, "loss": 2.4644, "step": 1000 }, { "epoch": 0.05370171673819742, "grad_norm": 0.287109375, "learning_rate": 1.790057224606581e-06, "loss": 2.571, "step": 1001 }, { "epoch": 0.053755364806866955, "grad_norm": 0.28515625, "learning_rate": 1.791845493562232e-06, "loss": 2.3715, "step": 1002 }, { "epoch": 0.05380901287553648, "grad_norm": 0.337890625, "learning_rate": 1.7936337625178829e-06, "loss": 2.13, "step": 1003 }, { "epoch": 0.05386266094420601, "grad_norm": 0.59375, "learning_rate": 1.7954220314735337e-06, "loss": 2.2157, "step": 1004 }, { "epoch": 0.053916309012875537, "grad_norm": 0.50390625, "learning_rate": 1.7972103004291846e-06, "loss": 2.311, "step": 1005 }, { "epoch": 0.05396995708154506, "grad_norm": 0.28515625, "learning_rate": 1.7989985693848355e-06, "loss": 2.456, "step": 1006 }, { "epoch": 0.05402360515021459, "grad_norm": 1.3046875, "learning_rate": 1.8007868383404866e-06, "loss": 2.3761, "step": 1007 }, { "epoch": 0.05407725321888412, "grad_norm": 0.333984375, "learning_rate": 1.8025751072961375e-06, "loss": 1.6944, "step": 1008 }, { "epoch": 0.05413090128755365, "grad_norm": 2.09375, "learning_rate": 1.8043633762517883e-06, "loss": 2.435, "step": 1009 }, { "epoch": 0.054184549356223174, "grad_norm": 0.984375, "learning_rate": 1.8061516452074392e-06, "loss": 2.4782, "step": 1010 }, { "epoch": 0.054238197424892706, "grad_norm": 0.451171875, "learning_rate": 1.80793991416309e-06, "loss": 2.5311, "step": 1011 }, { "epoch": 0.05429184549356223, "grad_norm": 0.333984375, "learning_rate": 1.8097281831187414e-06, "loss": 2.3233, "step": 1012 }, { "epoch": 0.05434549356223176, "grad_norm": 0.80859375, "learning_rate": 1.8115164520743923e-06, "loss": 2.5227, "step": 1013 }, { "epoch": 0.05439914163090129, "grad_norm": 0.33984375, "learning_rate": 1.8133047210300431e-06, "loss": 2.1428, "step": 1014 }, { "epoch": 0.05445278969957081, "grad_norm": 0.6328125, "learning_rate": 1.815092989985694e-06, "loss": 2.563, "step": 1015 }, { "epoch": 0.054506437768240344, "grad_norm": 0.306640625, "learning_rate": 1.8168812589413449e-06, "loss": 2.4096, "step": 1016 }, { "epoch": 0.05456008583690987, "grad_norm": 0.353515625, "learning_rate": 1.818669527896996e-06, "loss": 2.5096, "step": 1017 }, { "epoch": 0.0546137339055794, "grad_norm": 0.3671875, "learning_rate": 1.8204577968526469e-06, "loss": 2.3854, "step": 1018 }, { "epoch": 0.054667381974248926, "grad_norm": 0.49609375, "learning_rate": 1.8222460658082977e-06, "loss": 2.4078, "step": 1019 }, { "epoch": 0.05472103004291846, "grad_norm": 0.306640625, "learning_rate": 1.8240343347639486e-06, "loss": 2.3344, "step": 1020 }, { "epoch": 0.05477467811158798, "grad_norm": 0.451171875, "learning_rate": 1.8258226037195995e-06, "loss": 2.1087, "step": 1021 }, { "epoch": 0.054828326180257514, "grad_norm": 0.318359375, "learning_rate": 1.8276108726752506e-06, "loss": 2.0824, "step": 1022 }, { "epoch": 0.05488197424892704, "grad_norm": 0.318359375, "learning_rate": 1.8293991416309014e-06, "loss": 2.3063, "step": 1023 }, { "epoch": 0.054935622317596564, "grad_norm": 0.53125, "learning_rate": 1.8311874105865523e-06, "loss": 2.5562, "step": 1024 }, { "epoch": 0.054989270386266095, "grad_norm": 0.59375, "learning_rate": 1.8329756795422032e-06, "loss": 2.5037, "step": 1025 }, { "epoch": 0.05504291845493562, "grad_norm": 0.3046875, "learning_rate": 1.834763948497854e-06, "loss": 2.2668, "step": 1026 }, { "epoch": 0.05509656652360515, "grad_norm": 0.66015625, "learning_rate": 1.8365522174535052e-06, "loss": 2.4268, "step": 1027 }, { "epoch": 0.05515021459227468, "grad_norm": 0.30859375, "learning_rate": 1.838340486409156e-06, "loss": 2.1841, "step": 1028 }, { "epoch": 0.05520386266094421, "grad_norm": 0.37109375, "learning_rate": 1.840128755364807e-06, "loss": 2.277, "step": 1029 }, { "epoch": 0.05525751072961373, "grad_norm": 0.318359375, "learning_rate": 1.8419170243204578e-06, "loss": 2.4837, "step": 1030 }, { "epoch": 0.055311158798283265, "grad_norm": 0.36328125, "learning_rate": 1.8437052932761087e-06, "loss": 2.485, "step": 1031 }, { "epoch": 0.05536480686695279, "grad_norm": 0.31640625, "learning_rate": 1.84549356223176e-06, "loss": 2.4371, "step": 1032 }, { "epoch": 0.055418454935622315, "grad_norm": 0.341796875, "learning_rate": 1.8472818311874108e-06, "loss": 2.402, "step": 1033 }, { "epoch": 0.055472103004291846, "grad_norm": 0.330078125, "learning_rate": 1.8490701001430617e-06, "loss": 2.1684, "step": 1034 }, { "epoch": 0.05552575107296137, "grad_norm": 0.33984375, "learning_rate": 1.8508583690987126e-06, "loss": 2.5576, "step": 1035 }, { "epoch": 0.0555793991416309, "grad_norm": 0.35546875, "learning_rate": 1.8526466380543635e-06, "loss": 2.4097, "step": 1036 }, { "epoch": 0.05563304721030043, "grad_norm": 0.3671875, "learning_rate": 1.8544349070100146e-06, "loss": 2.4593, "step": 1037 }, { "epoch": 0.05568669527896996, "grad_norm": 0.267578125, "learning_rate": 1.8562231759656654e-06, "loss": 2.4015, "step": 1038 }, { "epoch": 0.055740343347639484, "grad_norm": 0.30859375, "learning_rate": 1.8580114449213163e-06, "loss": 2.549, "step": 1039 }, { "epoch": 0.055793991416309016, "grad_norm": 0.435546875, "learning_rate": 1.8597997138769672e-06, "loss": 2.3902, "step": 1040 }, { "epoch": 0.05584763948497854, "grad_norm": 0.474609375, "learning_rate": 1.861587982832618e-06, "loss": 2.2929, "step": 1041 }, { "epoch": 0.055901287553648066, "grad_norm": 0.45703125, "learning_rate": 1.8633762517882691e-06, "loss": 2.2991, "step": 1042 }, { "epoch": 0.0559549356223176, "grad_norm": 0.275390625, "learning_rate": 1.86516452074392e-06, "loss": 2.4877, "step": 1043 }, { "epoch": 0.05600858369098712, "grad_norm": 0.35546875, "learning_rate": 1.866952789699571e-06, "loss": 2.427, "step": 1044 }, { "epoch": 0.056062231759656654, "grad_norm": 0.248046875, "learning_rate": 1.8687410586552218e-06, "loss": 2.2265, "step": 1045 }, { "epoch": 0.05611587982832618, "grad_norm": 0.66796875, "learning_rate": 1.8705293276108726e-06, "loss": 2.4849, "step": 1046 }, { "epoch": 0.05616952789699571, "grad_norm": 0.333984375, "learning_rate": 1.872317596566524e-06, "loss": 2.4121, "step": 1047 }, { "epoch": 0.056223175965665236, "grad_norm": 3.375, "learning_rate": 1.8741058655221748e-06, "loss": 2.4454, "step": 1048 }, { "epoch": 0.05627682403433477, "grad_norm": 0.61328125, "learning_rate": 1.8758941344778255e-06, "loss": 2.4874, "step": 1049 }, { "epoch": 0.05633047210300429, "grad_norm": 0.310546875, "learning_rate": 1.8776824034334764e-06, "loss": 2.2602, "step": 1050 }, { "epoch": 0.05638412017167382, "grad_norm": 0.453125, "learning_rate": 1.8794706723891277e-06, "loss": 2.7601, "step": 1051 }, { "epoch": 0.05643776824034335, "grad_norm": 0.388671875, "learning_rate": 1.8812589413447785e-06, "loss": 2.5427, "step": 1052 }, { "epoch": 0.056491416309012873, "grad_norm": 0.427734375, "learning_rate": 1.8830472103004294e-06, "loss": 2.3579, "step": 1053 }, { "epoch": 0.056545064377682405, "grad_norm": 0.30859375, "learning_rate": 1.8848354792560803e-06, "loss": 2.3036, "step": 1054 }, { "epoch": 0.05659871244635193, "grad_norm": 0.283203125, "learning_rate": 1.8866237482117312e-06, "loss": 2.2291, "step": 1055 }, { "epoch": 0.05665236051502146, "grad_norm": 0.3828125, "learning_rate": 1.8884120171673823e-06, "loss": 2.3804, "step": 1056 }, { "epoch": 0.05670600858369099, "grad_norm": 0.29296875, "learning_rate": 1.8902002861230331e-06, "loss": 2.2085, "step": 1057 }, { "epoch": 0.05675965665236052, "grad_norm": 0.4453125, "learning_rate": 1.891988555078684e-06, "loss": 2.4983, "step": 1058 }, { "epoch": 0.05681330472103004, "grad_norm": 0.3984375, "learning_rate": 1.8937768240343349e-06, "loss": 2.3611, "step": 1059 }, { "epoch": 0.05686695278969957, "grad_norm": 0.376953125, "learning_rate": 1.8955650929899858e-06, "loss": 2.4902, "step": 1060 }, { "epoch": 0.0569206008583691, "grad_norm": 0.427734375, "learning_rate": 1.8973533619456368e-06, "loss": 2.3957, "step": 1061 }, { "epoch": 0.056974248927038625, "grad_norm": 0.357421875, "learning_rate": 1.8991416309012877e-06, "loss": 2.2801, "step": 1062 }, { "epoch": 0.057027896995708156, "grad_norm": 0.431640625, "learning_rate": 1.9009298998569386e-06, "loss": 2.4692, "step": 1063 }, { "epoch": 0.05708154506437768, "grad_norm": 0.482421875, "learning_rate": 1.9027181688125895e-06, "loss": 1.8775, "step": 1064 }, { "epoch": 0.05713519313304721, "grad_norm": 0.27734375, "learning_rate": 1.9045064377682403e-06, "loss": 2.3998, "step": 1065 }, { "epoch": 0.05718884120171674, "grad_norm": 0.4375, "learning_rate": 1.9062947067238916e-06, "loss": 2.1461, "step": 1066 }, { "epoch": 0.05724248927038627, "grad_norm": 0.283203125, "learning_rate": 1.9080829756795425e-06, "loss": 2.1151, "step": 1067 }, { "epoch": 0.057296137339055794, "grad_norm": 0.44140625, "learning_rate": 1.9098712446351934e-06, "loss": 2.4771, "step": 1068 }, { "epoch": 0.05734978540772532, "grad_norm": 0.5, "learning_rate": 1.9116595135908443e-06, "loss": 2.6075, "step": 1069 }, { "epoch": 0.05740343347639485, "grad_norm": 0.4296875, "learning_rate": 1.913447782546495e-06, "loss": 2.2719, "step": 1070 }, { "epoch": 0.057457081545064376, "grad_norm": 0.341796875, "learning_rate": 1.915236051502146e-06, "loss": 2.1579, "step": 1071 }, { "epoch": 0.05751072961373391, "grad_norm": 0.2578125, "learning_rate": 1.917024320457797e-06, "loss": 2.2327, "step": 1072 }, { "epoch": 0.05756437768240343, "grad_norm": 0.30078125, "learning_rate": 1.9188125894134478e-06, "loss": 2.3832, "step": 1073 }, { "epoch": 0.057618025751072964, "grad_norm": 0.59765625, "learning_rate": 1.9206008583690987e-06, "loss": 1.6711, "step": 1074 }, { "epoch": 0.05767167381974249, "grad_norm": 0.37890625, "learning_rate": 1.9223891273247495e-06, "loss": 2.2593, "step": 1075 }, { "epoch": 0.05772532188841202, "grad_norm": 0.326171875, "learning_rate": 1.924177396280401e-06, "loss": 2.617, "step": 1076 }, { "epoch": 0.057778969957081545, "grad_norm": 0.3046875, "learning_rate": 1.9259656652360517e-06, "loss": 2.4137, "step": 1077 }, { "epoch": 0.05783261802575107, "grad_norm": 0.416015625, "learning_rate": 1.9277539341917026e-06, "loss": 2.3979, "step": 1078 }, { "epoch": 0.0578862660944206, "grad_norm": 0.5078125, "learning_rate": 1.9295422031473535e-06, "loss": 2.566, "step": 1079 }, { "epoch": 0.05793991416309013, "grad_norm": 0.384765625, "learning_rate": 1.9313304721030043e-06, "loss": 2.4632, "step": 1080 }, { "epoch": 0.05799356223175966, "grad_norm": 0.2890625, "learning_rate": 1.9331187410586556e-06, "loss": 2.407, "step": 1081 }, { "epoch": 0.05804721030042918, "grad_norm": 0.73046875, "learning_rate": 1.9349070100143065e-06, "loss": 1.8138, "step": 1082 }, { "epoch": 0.058100858369098715, "grad_norm": 0.31640625, "learning_rate": 1.9366952789699574e-06, "loss": 2.3145, "step": 1083 }, { "epoch": 0.05815450643776824, "grad_norm": 0.3359375, "learning_rate": 1.9384835479256083e-06, "loss": 2.3573, "step": 1084 }, { "epoch": 0.058208154506437765, "grad_norm": 0.34375, "learning_rate": 1.940271816881259e-06, "loss": 2.4484, "step": 1085 }, { "epoch": 0.0582618025751073, "grad_norm": 0.337890625, "learning_rate": 1.94206008583691e-06, "loss": 2.6359, "step": 1086 }, { "epoch": 0.05831545064377682, "grad_norm": 0.302734375, "learning_rate": 1.943848354792561e-06, "loss": 2.3065, "step": 1087 }, { "epoch": 0.05836909871244635, "grad_norm": 0.515625, "learning_rate": 1.9456366237482118e-06, "loss": 2.5201, "step": 1088 }, { "epoch": 0.05842274678111588, "grad_norm": 0.3125, "learning_rate": 1.9474248927038626e-06, "loss": 2.5494, "step": 1089 }, { "epoch": 0.05847639484978541, "grad_norm": 0.25390625, "learning_rate": 1.9492131616595135e-06, "loss": 2.3559, "step": 1090 }, { "epoch": 0.058530042918454935, "grad_norm": 0.26171875, "learning_rate": 1.951001430615165e-06, "loss": 2.3355, "step": 1091 }, { "epoch": 0.058583690987124466, "grad_norm": 0.453125, "learning_rate": 1.9527896995708157e-06, "loss": 2.2991, "step": 1092 }, { "epoch": 0.05863733905579399, "grad_norm": 0.330078125, "learning_rate": 1.9545779685264666e-06, "loss": 2.4964, "step": 1093 }, { "epoch": 0.058690987124463516, "grad_norm": 0.27734375, "learning_rate": 1.9563662374821174e-06, "loss": 2.376, "step": 1094 }, { "epoch": 0.05874463519313305, "grad_norm": 0.369140625, "learning_rate": 1.9581545064377683e-06, "loss": 2.3024, "step": 1095 }, { "epoch": 0.05879828326180257, "grad_norm": 0.390625, "learning_rate": 1.9599427753934196e-06, "loss": 2.3209, "step": 1096 }, { "epoch": 0.058851931330472104, "grad_norm": 0.515625, "learning_rate": 1.9617310443490705e-06, "loss": 2.1563, "step": 1097 }, { "epoch": 0.05890557939914163, "grad_norm": 0.333984375, "learning_rate": 1.9635193133047214e-06, "loss": 2.5832, "step": 1098 }, { "epoch": 0.05895922746781116, "grad_norm": 0.294921875, "learning_rate": 1.9653075822603722e-06, "loss": 2.2935, "step": 1099 }, { "epoch": 0.059012875536480686, "grad_norm": 0.349609375, "learning_rate": 1.967095851216023e-06, "loss": 2.4748, "step": 1100 }, { "epoch": 0.05906652360515022, "grad_norm": 0.51171875, "learning_rate": 1.968884120171674e-06, "loss": 2.4425, "step": 1101 }, { "epoch": 0.05912017167381974, "grad_norm": 0.306640625, "learning_rate": 1.970672389127325e-06, "loss": 2.5559, "step": 1102 }, { "epoch": 0.05917381974248927, "grad_norm": 0.64453125, "learning_rate": 1.9724606580829757e-06, "loss": 1.3932, "step": 1103 }, { "epoch": 0.0592274678111588, "grad_norm": 0.361328125, "learning_rate": 1.9742489270386266e-06, "loss": 2.4042, "step": 1104 }, { "epoch": 0.059281115879828324, "grad_norm": 0.33984375, "learning_rate": 1.9760371959942775e-06, "loss": 2.6409, "step": 1105 }, { "epoch": 0.059334763948497855, "grad_norm": 0.498046875, "learning_rate": 1.977825464949929e-06, "loss": 2.436, "step": 1106 }, { "epoch": 0.05938841201716738, "grad_norm": 0.3671875, "learning_rate": 1.9796137339055797e-06, "loss": 2.2128, "step": 1107 }, { "epoch": 0.05944206008583691, "grad_norm": 0.59375, "learning_rate": 1.9814020028612305e-06, "loss": 1.4469, "step": 1108 }, { "epoch": 0.05949570815450644, "grad_norm": 0.609375, "learning_rate": 1.9831902718168814e-06, "loss": 2.41, "step": 1109 }, { "epoch": 0.05954935622317597, "grad_norm": 0.3828125, "learning_rate": 1.9849785407725323e-06, "loss": 2.3678, "step": 1110 }, { "epoch": 0.05960300429184549, "grad_norm": 0.451171875, "learning_rate": 1.986766809728183e-06, "loss": 2.4742, "step": 1111 }, { "epoch": 0.05965665236051502, "grad_norm": 0.33984375, "learning_rate": 1.988555078683834e-06, "loss": 2.4125, "step": 1112 }, { "epoch": 0.05971030042918455, "grad_norm": 0.478515625, "learning_rate": 1.990343347639485e-06, "loss": 2.4311, "step": 1113 }, { "epoch": 0.059763948497854075, "grad_norm": 0.30078125, "learning_rate": 1.992131616595136e-06, "loss": 2.3403, "step": 1114 }, { "epoch": 0.059817596566523606, "grad_norm": 0.36328125, "learning_rate": 1.993919885550787e-06, "loss": 2.5221, "step": 1115 }, { "epoch": 0.05987124463519313, "grad_norm": 0.337890625, "learning_rate": 1.995708154506438e-06, "loss": 2.3283, "step": 1116 }, { "epoch": 0.05992489270386266, "grad_norm": 0.365234375, "learning_rate": 1.997496423462089e-06, "loss": 2.6577, "step": 1117 }, { "epoch": 0.05997854077253219, "grad_norm": 0.46875, "learning_rate": 1.9992846924177397e-06, "loss": 2.4224, "step": 1118 }, { "epoch": 0.06003218884120172, "grad_norm": 0.35546875, "learning_rate": 2.0010729613733906e-06, "loss": 2.4903, "step": 1119 }, { "epoch": 0.060085836909871244, "grad_norm": 0.443359375, "learning_rate": 2.002861230329042e-06, "loss": 2.3487, "step": 1120 }, { "epoch": 0.06013948497854077, "grad_norm": 0.29296875, "learning_rate": 2.0046494992846928e-06, "loss": 2.3983, "step": 1121 }, { "epoch": 0.0601931330472103, "grad_norm": 0.27734375, "learning_rate": 2.0064377682403437e-06, "loss": 2.3932, "step": 1122 }, { "epoch": 0.060246781115879826, "grad_norm": 0.71484375, "learning_rate": 2.0082260371959945e-06, "loss": 2.48, "step": 1123 }, { "epoch": 0.06030042918454936, "grad_norm": 0.373046875, "learning_rate": 2.0100143061516454e-06, "loss": 2.4728, "step": 1124 }, { "epoch": 0.06035407725321888, "grad_norm": 0.310546875, "learning_rate": 2.0118025751072963e-06, "loss": 2.4378, "step": 1125 }, { "epoch": 0.060407725321888414, "grad_norm": 0.27734375, "learning_rate": 2.013590844062947e-06, "loss": 2.435, "step": 1126 }, { "epoch": 0.06046137339055794, "grad_norm": 0.30859375, "learning_rate": 2.015379113018598e-06, "loss": 2.2777, "step": 1127 }, { "epoch": 0.06051502145922747, "grad_norm": 0.416015625, "learning_rate": 2.017167381974249e-06, "loss": 2.3112, "step": 1128 }, { "epoch": 0.060568669527896996, "grad_norm": 0.4140625, "learning_rate": 2.0189556509298998e-06, "loss": 2.0091, "step": 1129 }, { "epoch": 0.06062231759656652, "grad_norm": 0.27734375, "learning_rate": 2.020743919885551e-06, "loss": 2.4632, "step": 1130 }, { "epoch": 0.06067596566523605, "grad_norm": 0.3828125, "learning_rate": 2.022532188841202e-06, "loss": 1.7995, "step": 1131 }, { "epoch": 0.06072961373390558, "grad_norm": 0.298828125, "learning_rate": 2.024320457796853e-06, "loss": 2.605, "step": 1132 }, { "epoch": 0.06078326180257511, "grad_norm": 0.4375, "learning_rate": 2.0261087267525037e-06, "loss": 2.4429, "step": 1133 }, { "epoch": 0.060836909871244634, "grad_norm": 0.326171875, "learning_rate": 2.0278969957081546e-06, "loss": 2.3473, "step": 1134 }, { "epoch": 0.060890557939914165, "grad_norm": 0.34375, "learning_rate": 2.029685264663806e-06, "loss": 2.2276, "step": 1135 }, { "epoch": 0.06094420600858369, "grad_norm": 0.578125, "learning_rate": 2.0314735336194568e-06, "loss": 2.0804, "step": 1136 }, { "epoch": 0.06099785407725322, "grad_norm": 0.337890625, "learning_rate": 2.0332618025751076e-06, "loss": 2.2593, "step": 1137 }, { "epoch": 0.06105150214592275, "grad_norm": 0.328125, "learning_rate": 2.0350500715307585e-06, "loss": 2.4027, "step": 1138 }, { "epoch": 0.06110515021459227, "grad_norm": 0.330078125, "learning_rate": 2.0368383404864094e-06, "loss": 2.4081, "step": 1139 }, { "epoch": 0.0611587982832618, "grad_norm": 0.32421875, "learning_rate": 2.0386266094420603e-06, "loss": 2.6636, "step": 1140 }, { "epoch": 0.06121244635193133, "grad_norm": 2.96875, "learning_rate": 2.040414878397711e-06, "loss": 2.3214, "step": 1141 }, { "epoch": 0.06126609442060086, "grad_norm": 0.375, "learning_rate": 2.042203147353362e-06, "loss": 2.5356, "step": 1142 }, { "epoch": 0.061319742489270385, "grad_norm": 0.302734375, "learning_rate": 2.043991416309013e-06, "loss": 2.385, "step": 1143 }, { "epoch": 0.061373390557939916, "grad_norm": 0.453125, "learning_rate": 2.0457796852646638e-06, "loss": 2.5885, "step": 1144 }, { "epoch": 0.06142703862660944, "grad_norm": 0.333984375, "learning_rate": 2.047567954220315e-06, "loss": 2.3704, "step": 1145 }, { "epoch": 0.06148068669527897, "grad_norm": 0.4296875, "learning_rate": 2.049356223175966e-06, "loss": 2.3786, "step": 1146 }, { "epoch": 0.0615343347639485, "grad_norm": 0.337890625, "learning_rate": 2.051144492131617e-06, "loss": 2.4855, "step": 1147 }, { "epoch": 0.06158798283261802, "grad_norm": 0.376953125, "learning_rate": 2.0529327610872677e-06, "loss": 2.3171, "step": 1148 }, { "epoch": 0.061641630901287554, "grad_norm": 0.412109375, "learning_rate": 2.0547210300429186e-06, "loss": 2.142, "step": 1149 }, { "epoch": 0.06169527896995708, "grad_norm": 0.28125, "learning_rate": 2.0565092989985694e-06, "loss": 2.5542, "step": 1150 }, { "epoch": 0.06174892703862661, "grad_norm": 0.330078125, "learning_rate": 2.0582975679542203e-06, "loss": 2.0082, "step": 1151 }, { "epoch": 0.061802575107296136, "grad_norm": 0.4140625, "learning_rate": 2.060085836909871e-06, "loss": 2.4426, "step": 1152 }, { "epoch": 0.06185622317596567, "grad_norm": 0.298828125, "learning_rate": 2.061874105865522e-06, "loss": 2.3753, "step": 1153 }, { "epoch": 0.06190987124463519, "grad_norm": 0.322265625, "learning_rate": 2.063662374821173e-06, "loss": 2.6331, "step": 1154 }, { "epoch": 0.061963519313304724, "grad_norm": 0.421875, "learning_rate": 2.0654506437768243e-06, "loss": 2.3861, "step": 1155 }, { "epoch": 0.06201716738197425, "grad_norm": 0.42578125, "learning_rate": 2.067238912732475e-06, "loss": 2.5383, "step": 1156 }, { "epoch": 0.062070815450643774, "grad_norm": 0.2734375, "learning_rate": 2.069027181688126e-06, "loss": 2.4629, "step": 1157 }, { "epoch": 0.062124463519313305, "grad_norm": 0.392578125, "learning_rate": 2.070815450643777e-06, "loss": 2.2688, "step": 1158 }, { "epoch": 0.06217811158798283, "grad_norm": 0.3515625, "learning_rate": 2.0726037195994278e-06, "loss": 2.3472, "step": 1159 }, { "epoch": 0.06223175965665236, "grad_norm": 0.56640625, "learning_rate": 2.074391988555079e-06, "loss": 1.9622, "step": 1160 }, { "epoch": 0.06228540772532189, "grad_norm": 0.341796875, "learning_rate": 2.07618025751073e-06, "loss": 2.5177, "step": 1161 }, { "epoch": 0.06233905579399142, "grad_norm": 0.51171875, "learning_rate": 2.077968526466381e-06, "loss": 2.4929, "step": 1162 }, { "epoch": 0.06239270386266094, "grad_norm": 0.322265625, "learning_rate": 2.0797567954220317e-06, "loss": 2.0277, "step": 1163 }, { "epoch": 0.062446351931330475, "grad_norm": 0.271484375, "learning_rate": 2.0815450643776826e-06, "loss": 2.2877, "step": 1164 }, { "epoch": 0.0625, "grad_norm": 0.322265625, "learning_rate": 2.0833333333333334e-06, "loss": 2.5657, "step": 1165 }, { "epoch": 0.06255364806866953, "grad_norm": 0.4453125, "learning_rate": 2.0851216022889843e-06, "loss": 2.4232, "step": 1166 }, { "epoch": 0.06260729613733905, "grad_norm": 0.369140625, "learning_rate": 2.086909871244635e-06, "loss": 2.5732, "step": 1167 }, { "epoch": 0.06266094420600858, "grad_norm": 0.314453125, "learning_rate": 2.088698140200286e-06, "loss": 2.3661, "step": 1168 }, { "epoch": 0.06271459227467811, "grad_norm": 0.267578125, "learning_rate": 2.090486409155937e-06, "loss": 2.3541, "step": 1169 }, { "epoch": 0.06276824034334764, "grad_norm": 0.298828125, "learning_rate": 2.0922746781115882e-06, "loss": 2.5543, "step": 1170 }, { "epoch": 0.06282188841201716, "grad_norm": 0.296875, "learning_rate": 2.094062947067239e-06, "loss": 2.2346, "step": 1171 }, { "epoch": 0.0628755364806867, "grad_norm": 0.330078125, "learning_rate": 2.09585121602289e-06, "loss": 2.3261, "step": 1172 }, { "epoch": 0.06292918454935623, "grad_norm": 0.359375, "learning_rate": 2.097639484978541e-06, "loss": 2.4965, "step": 1173 }, { "epoch": 0.06298283261802574, "grad_norm": 0.373046875, "learning_rate": 2.0994277539341917e-06, "loss": 2.2068, "step": 1174 }, { "epoch": 0.06303648068669528, "grad_norm": 0.287109375, "learning_rate": 2.101216022889843e-06, "loss": 2.0726, "step": 1175 }, { "epoch": 0.06309012875536481, "grad_norm": 0.271484375, "learning_rate": 2.103004291845494e-06, "loss": 2.5033, "step": 1176 }, { "epoch": 0.06314377682403434, "grad_norm": 0.330078125, "learning_rate": 2.104792560801145e-06, "loss": 2.4512, "step": 1177 }, { "epoch": 0.06319742489270386, "grad_norm": 0.333984375, "learning_rate": 2.1065808297567957e-06, "loss": 2.4084, "step": 1178 }, { "epoch": 0.06325107296137339, "grad_norm": 0.390625, "learning_rate": 2.1083690987124465e-06, "loss": 2.4135, "step": 1179 }, { "epoch": 0.06330472103004292, "grad_norm": 0.298828125, "learning_rate": 2.1101573676680974e-06, "loss": 2.4083, "step": 1180 }, { "epoch": 0.06335836909871245, "grad_norm": 0.390625, "learning_rate": 2.1119456366237483e-06, "loss": 2.1365, "step": 1181 }, { "epoch": 0.06341201716738197, "grad_norm": 0.33203125, "learning_rate": 2.113733905579399e-06, "loss": 2.3492, "step": 1182 }, { "epoch": 0.0634656652360515, "grad_norm": 1.953125, "learning_rate": 2.11552217453505e-06, "loss": 2.4888, "step": 1183 }, { "epoch": 0.06351931330472103, "grad_norm": 0.357421875, "learning_rate": 2.1173104434907013e-06, "loss": 2.3181, "step": 1184 }, { "epoch": 0.06357296137339055, "grad_norm": 0.466796875, "learning_rate": 2.1190987124463522e-06, "loss": 2.4765, "step": 1185 }, { "epoch": 0.06362660944206008, "grad_norm": 0.39453125, "learning_rate": 2.120886981402003e-06, "loss": 2.355, "step": 1186 }, { "epoch": 0.06368025751072962, "grad_norm": 0.326171875, "learning_rate": 2.122675250357654e-06, "loss": 2.5157, "step": 1187 }, { "epoch": 0.06373390557939915, "grad_norm": 0.373046875, "learning_rate": 2.124463519313305e-06, "loss": 2.4965, "step": 1188 }, { "epoch": 0.06378755364806867, "grad_norm": 0.890625, "learning_rate": 2.126251788268956e-06, "loss": 2.5433, "step": 1189 }, { "epoch": 0.0638412017167382, "grad_norm": 2.9375, "learning_rate": 2.128040057224607e-06, "loss": 2.4044, "step": 1190 }, { "epoch": 0.06389484978540773, "grad_norm": 0.279296875, "learning_rate": 2.129828326180258e-06, "loss": 2.2361, "step": 1191 }, { "epoch": 0.06394849785407725, "grad_norm": 0.298828125, "learning_rate": 2.1316165951359088e-06, "loss": 2.4295, "step": 1192 }, { "epoch": 0.06400214592274678, "grad_norm": 0.32421875, "learning_rate": 2.1334048640915597e-06, "loss": 2.5419, "step": 1193 }, { "epoch": 0.06405579399141631, "grad_norm": 0.32421875, "learning_rate": 2.1351931330472105e-06, "loss": 2.4801, "step": 1194 }, { "epoch": 0.06410944206008584, "grad_norm": 0.40234375, "learning_rate": 2.1369814020028614e-06, "loss": 2.5202, "step": 1195 }, { "epoch": 0.06416309012875536, "grad_norm": 0.3046875, "learning_rate": 2.1387696709585123e-06, "loss": 2.5454, "step": 1196 }, { "epoch": 0.06421673819742489, "grad_norm": 0.287109375, "learning_rate": 2.140557939914163e-06, "loss": 2.526, "step": 1197 }, { "epoch": 0.06427038626609442, "grad_norm": 0.435546875, "learning_rate": 2.142346208869814e-06, "loss": 2.5152, "step": 1198 }, { "epoch": 0.06432403433476395, "grad_norm": 0.287109375, "learning_rate": 2.1441344778254653e-06, "loss": 2.2807, "step": 1199 }, { "epoch": 0.06437768240343347, "grad_norm": 0.390625, "learning_rate": 2.145922746781116e-06, "loss": 2.2159, "step": 1200 }, { "epoch": 0.064431330472103, "grad_norm": 0.32421875, "learning_rate": 2.147711015736767e-06, "loss": 2.5431, "step": 1201 }, { "epoch": 0.06448497854077254, "grad_norm": 0.349609375, "learning_rate": 2.149499284692418e-06, "loss": 2.7205, "step": 1202 }, { "epoch": 0.06453862660944205, "grad_norm": 0.35546875, "learning_rate": 2.151287553648069e-06, "loss": 2.2434, "step": 1203 }, { "epoch": 0.06459227467811159, "grad_norm": 0.357421875, "learning_rate": 2.1530758226037197e-06, "loss": 2.5699, "step": 1204 }, { "epoch": 0.06464592274678112, "grad_norm": 0.28125, "learning_rate": 2.1548640915593706e-06, "loss": 2.4834, "step": 1205 }, { "epoch": 0.06469957081545065, "grad_norm": 0.29296875, "learning_rate": 2.1566523605150215e-06, "loss": 2.2271, "step": 1206 }, { "epoch": 0.06475321888412017, "grad_norm": 0.283203125, "learning_rate": 2.1584406294706723e-06, "loss": 2.396, "step": 1207 }, { "epoch": 0.0648068669527897, "grad_norm": 0.400390625, "learning_rate": 2.160228898426323e-06, "loss": 2.2544, "step": 1208 }, { "epoch": 0.06486051502145923, "grad_norm": 0.310546875, "learning_rate": 2.1620171673819745e-06, "loss": 2.4526, "step": 1209 }, { "epoch": 0.06491416309012875, "grad_norm": 0.27734375, "learning_rate": 2.1638054363376254e-06, "loss": 2.3646, "step": 1210 }, { "epoch": 0.06496781115879828, "grad_norm": 0.51953125, "learning_rate": 2.1655937052932763e-06, "loss": 2.5277, "step": 1211 }, { "epoch": 0.06502145922746781, "grad_norm": 0.310546875, "learning_rate": 2.167381974248927e-06, "loss": 2.2434, "step": 1212 }, { "epoch": 0.06507510729613734, "grad_norm": 0.4375, "learning_rate": 2.169170243204578e-06, "loss": 2.3638, "step": 1213 }, { "epoch": 0.06512875536480686, "grad_norm": 0.271484375, "learning_rate": 2.1709585121602293e-06, "loss": 2.3775, "step": 1214 }, { "epoch": 0.0651824034334764, "grad_norm": 0.326171875, "learning_rate": 2.17274678111588e-06, "loss": 2.2663, "step": 1215 }, { "epoch": 0.06523605150214593, "grad_norm": 0.28515625, "learning_rate": 2.174535050071531e-06, "loss": 2.4125, "step": 1216 }, { "epoch": 0.06528969957081546, "grad_norm": 0.369140625, "learning_rate": 2.176323319027182e-06, "loss": 2.2322, "step": 1217 }, { "epoch": 0.06534334763948497, "grad_norm": 0.482421875, "learning_rate": 2.178111587982833e-06, "loss": 2.4743, "step": 1218 }, { "epoch": 0.0653969957081545, "grad_norm": 0.44921875, "learning_rate": 2.1798998569384837e-06, "loss": 1.6516, "step": 1219 }, { "epoch": 0.06545064377682404, "grad_norm": 0.46875, "learning_rate": 2.1816881258941346e-06, "loss": 1.718, "step": 1220 }, { "epoch": 0.06550429184549356, "grad_norm": 0.4765625, "learning_rate": 2.1834763948497854e-06, "loss": 2.493, "step": 1221 }, { "epoch": 0.06555793991416309, "grad_norm": 0.4765625, "learning_rate": 2.1852646638054363e-06, "loss": 1.6151, "step": 1222 }, { "epoch": 0.06561158798283262, "grad_norm": 0.490234375, "learning_rate": 2.187052932761087e-06, "loss": 2.2422, "step": 1223 }, { "epoch": 0.06566523605150215, "grad_norm": 0.283203125, "learning_rate": 2.1888412017167385e-06, "loss": 2.3243, "step": 1224 }, { "epoch": 0.06571888412017167, "grad_norm": 0.341796875, "learning_rate": 2.1906294706723894e-06, "loss": 2.369, "step": 1225 }, { "epoch": 0.0657725321888412, "grad_norm": 0.44140625, "learning_rate": 2.1924177396280402e-06, "loss": 2.2829, "step": 1226 }, { "epoch": 0.06582618025751073, "grad_norm": 0.2890625, "learning_rate": 2.194206008583691e-06, "loss": 2.3281, "step": 1227 }, { "epoch": 0.06587982832618025, "grad_norm": 0.8203125, "learning_rate": 2.195994277539342e-06, "loss": 2.7643, "step": 1228 }, { "epoch": 0.06593347639484978, "grad_norm": 0.330078125, "learning_rate": 2.1977825464949933e-06, "loss": 2.3876, "step": 1229 }, { "epoch": 0.06598712446351931, "grad_norm": 0.341796875, "learning_rate": 2.199570815450644e-06, "loss": 2.2415, "step": 1230 }, { "epoch": 0.06604077253218885, "grad_norm": 0.94921875, "learning_rate": 2.201359084406295e-06, "loss": 2.4427, "step": 1231 }, { "epoch": 0.06609442060085836, "grad_norm": 0.8203125, "learning_rate": 2.203147353361946e-06, "loss": 2.5501, "step": 1232 }, { "epoch": 0.0661480686695279, "grad_norm": 0.337890625, "learning_rate": 2.204935622317597e-06, "loss": 2.2966, "step": 1233 }, { "epoch": 0.06620171673819743, "grad_norm": 1.5625, "learning_rate": 2.2067238912732477e-06, "loss": 2.5513, "step": 1234 }, { "epoch": 0.06625536480686696, "grad_norm": 0.3203125, "learning_rate": 2.2085121602288986e-06, "loss": 2.294, "step": 1235 }, { "epoch": 0.06630901287553648, "grad_norm": 0.4921875, "learning_rate": 2.2103004291845494e-06, "loss": 2.2894, "step": 1236 }, { "epoch": 0.06636266094420601, "grad_norm": 0.265625, "learning_rate": 2.2120886981402003e-06, "loss": 2.4269, "step": 1237 }, { "epoch": 0.06641630901287554, "grad_norm": 0.263671875, "learning_rate": 2.213876967095851e-06, "loss": 2.2876, "step": 1238 }, { "epoch": 0.06646995708154506, "grad_norm": 0.55859375, "learning_rate": 2.2156652360515025e-06, "loss": 2.6765, "step": 1239 }, { "epoch": 0.06652360515021459, "grad_norm": 0.271484375, "learning_rate": 2.2174535050071534e-06, "loss": 2.3873, "step": 1240 }, { "epoch": 0.06657725321888412, "grad_norm": 0.59375, "learning_rate": 2.2192417739628042e-06, "loss": 2.5207, "step": 1241 }, { "epoch": 0.06663090128755365, "grad_norm": 0.60546875, "learning_rate": 2.221030042918455e-06, "loss": 1.6185, "step": 1242 }, { "epoch": 0.06668454935622317, "grad_norm": 0.470703125, "learning_rate": 2.222818311874106e-06, "loss": 2.3341, "step": 1243 }, { "epoch": 0.0667381974248927, "grad_norm": 0.53125, "learning_rate": 2.224606580829757e-06, "loss": 2.3384, "step": 1244 }, { "epoch": 0.06679184549356224, "grad_norm": 0.330078125, "learning_rate": 2.2263948497854077e-06, "loss": 2.393, "step": 1245 }, { "epoch": 0.06684549356223175, "grad_norm": 0.2890625, "learning_rate": 2.2281831187410586e-06, "loss": 2.3016, "step": 1246 }, { "epoch": 0.06689914163090128, "grad_norm": 0.53515625, "learning_rate": 2.2299713876967095e-06, "loss": 2.3031, "step": 1247 }, { "epoch": 0.06695278969957082, "grad_norm": 0.310546875, "learning_rate": 2.2317596566523608e-06, "loss": 2.5499, "step": 1248 }, { "epoch": 0.06700643776824035, "grad_norm": 0.384765625, "learning_rate": 2.2335479256080117e-06, "loss": 2.3637, "step": 1249 }, { "epoch": 0.06706008583690987, "grad_norm": 0.28515625, "learning_rate": 2.2353361945636625e-06, "loss": 2.3558, "step": 1250 }, { "epoch": 0.0671137339055794, "grad_norm": 0.310546875, "learning_rate": 2.2371244635193134e-06, "loss": 2.3235, "step": 1251 }, { "epoch": 0.06716738197424893, "grad_norm": 0.2734375, "learning_rate": 2.2389127324749643e-06, "loss": 2.4403, "step": 1252 }, { "epoch": 0.06722103004291846, "grad_norm": 0.3046875, "learning_rate": 2.2407010014306156e-06, "loss": 2.4165, "step": 1253 }, { "epoch": 0.06727467811158798, "grad_norm": 0.34765625, "learning_rate": 2.2424892703862665e-06, "loss": 2.3628, "step": 1254 }, { "epoch": 0.06732832618025751, "grad_norm": 5.71875, "learning_rate": 2.2442775393419173e-06, "loss": 2.3425, "step": 1255 }, { "epoch": 0.06738197424892704, "grad_norm": 0.3828125, "learning_rate": 2.2460658082975682e-06, "loss": 2.4959, "step": 1256 }, { "epoch": 0.06743562231759656, "grad_norm": 0.4296875, "learning_rate": 2.247854077253219e-06, "loss": 2.3719, "step": 1257 }, { "epoch": 0.06748927038626609, "grad_norm": 0.27734375, "learning_rate": 2.24964234620887e-06, "loss": 2.4724, "step": 1258 }, { "epoch": 0.06754291845493562, "grad_norm": 0.310546875, "learning_rate": 2.251430615164521e-06, "loss": 2.07, "step": 1259 }, { "epoch": 0.06759656652360516, "grad_norm": 0.412109375, "learning_rate": 2.2532188841201717e-06, "loss": 2.4505, "step": 1260 }, { "epoch": 0.06765021459227467, "grad_norm": 0.41015625, "learning_rate": 2.2550071530758226e-06, "loss": 2.3488, "step": 1261 }, { "epoch": 0.0677038626609442, "grad_norm": 0.4921875, "learning_rate": 2.2567954220314735e-06, "loss": 1.9886, "step": 1262 }, { "epoch": 0.06775751072961374, "grad_norm": 0.408203125, "learning_rate": 2.2585836909871248e-06, "loss": 2.4132, "step": 1263 }, { "epoch": 0.06781115879828326, "grad_norm": 0.87890625, "learning_rate": 2.2603719599427756e-06, "loss": 2.586, "step": 1264 }, { "epoch": 0.06786480686695279, "grad_norm": 0.28515625, "learning_rate": 2.2621602288984265e-06, "loss": 2.4215, "step": 1265 }, { "epoch": 0.06791845493562232, "grad_norm": 0.62109375, "learning_rate": 2.2639484978540774e-06, "loss": 2.8144, "step": 1266 }, { "epoch": 0.06797210300429185, "grad_norm": 0.26171875, "learning_rate": 2.2657367668097283e-06, "loss": 2.2076, "step": 1267 }, { "epoch": 0.06802575107296137, "grad_norm": 0.34375, "learning_rate": 2.2675250357653796e-06, "loss": 2.1318, "step": 1268 }, { "epoch": 0.0680793991416309, "grad_norm": 0.494140625, "learning_rate": 2.2693133047210304e-06, "loss": 2.4568, "step": 1269 }, { "epoch": 0.06813304721030043, "grad_norm": 0.296875, "learning_rate": 2.2711015736766813e-06, "loss": 2.5732, "step": 1270 }, { "epoch": 0.06818669527896996, "grad_norm": 0.29296875, "learning_rate": 2.272889842632332e-06, "loss": 2.4195, "step": 1271 }, { "epoch": 0.06824034334763948, "grad_norm": 0.33984375, "learning_rate": 2.274678111587983e-06, "loss": 2.2619, "step": 1272 }, { "epoch": 0.06829399141630901, "grad_norm": 0.275390625, "learning_rate": 2.276466380543634e-06, "loss": 2.353, "step": 1273 }, { "epoch": 0.06834763948497855, "grad_norm": 0.302734375, "learning_rate": 2.278254649499285e-06, "loss": 2.4534, "step": 1274 }, { "epoch": 0.06840128755364806, "grad_norm": 0.3671875, "learning_rate": 2.2800429184549357e-06, "loss": 1.7626, "step": 1275 }, { "epoch": 0.0684549356223176, "grad_norm": 0.3046875, "learning_rate": 2.2818311874105866e-06, "loss": 2.6983, "step": 1276 }, { "epoch": 0.06850858369098713, "grad_norm": 0.439453125, "learning_rate": 2.2836194563662375e-06, "loss": 2.5449, "step": 1277 }, { "epoch": 0.06856223175965666, "grad_norm": 0.37890625, "learning_rate": 2.2854077253218888e-06, "loss": 1.8378, "step": 1278 }, { "epoch": 0.06861587982832618, "grad_norm": 0.6875, "learning_rate": 2.2871959942775396e-06, "loss": 2.1457, "step": 1279 }, { "epoch": 0.06866952789699571, "grad_norm": 0.3515625, "learning_rate": 2.2889842632331905e-06, "loss": 2.4122, "step": 1280 }, { "epoch": 0.06872317596566524, "grad_norm": 0.29296875, "learning_rate": 2.2907725321888414e-06, "loss": 2.2969, "step": 1281 }, { "epoch": 0.06877682403433476, "grad_norm": 0.50390625, "learning_rate": 2.2925608011444923e-06, "loss": 2.4978, "step": 1282 }, { "epoch": 0.06883047210300429, "grad_norm": 0.369140625, "learning_rate": 2.2943490701001436e-06, "loss": 2.4457, "step": 1283 }, { "epoch": 0.06888412017167382, "grad_norm": 0.25, "learning_rate": 2.2961373390557944e-06, "loss": 2.2512, "step": 1284 }, { "epoch": 0.06893776824034335, "grad_norm": 0.435546875, "learning_rate": 2.2979256080114453e-06, "loss": 2.4441, "step": 1285 }, { "epoch": 0.06899141630901287, "grad_norm": 0.3984375, "learning_rate": 2.2997138769670958e-06, "loss": 2.4678, "step": 1286 }, { "epoch": 0.0690450643776824, "grad_norm": 0.291015625, "learning_rate": 2.3015021459227466e-06, "loss": 2.6685, "step": 1287 }, { "epoch": 0.06909871244635193, "grad_norm": 0.40625, "learning_rate": 2.303290414878398e-06, "loss": 2.5109, "step": 1288 }, { "epoch": 0.06915236051502147, "grad_norm": 0.65234375, "learning_rate": 2.305078683834049e-06, "loss": 2.2062, "step": 1289 }, { "epoch": 0.06920600858369098, "grad_norm": 0.97265625, "learning_rate": 2.3068669527896997e-06, "loss": 2.5004, "step": 1290 }, { "epoch": 0.06925965665236052, "grad_norm": 0.3828125, "learning_rate": 2.3086552217453506e-06, "loss": 2.2881, "step": 1291 }, { "epoch": 0.06931330472103005, "grad_norm": 0.37890625, "learning_rate": 2.3104434907010014e-06, "loss": 1.8552, "step": 1292 }, { "epoch": 0.06936695278969957, "grad_norm": 0.408203125, "learning_rate": 2.3122317596566527e-06, "loss": 1.5957, "step": 1293 }, { "epoch": 0.0694206008583691, "grad_norm": 1.078125, "learning_rate": 2.3140200286123036e-06, "loss": 1.8955, "step": 1294 }, { "epoch": 0.06947424892703863, "grad_norm": 0.388671875, "learning_rate": 2.3158082975679545e-06, "loss": 2.5858, "step": 1295 }, { "epoch": 0.06952789699570816, "grad_norm": 0.48046875, "learning_rate": 2.3175965665236054e-06, "loss": 1.6048, "step": 1296 }, { "epoch": 0.06958154506437768, "grad_norm": 0.30078125, "learning_rate": 2.3193848354792562e-06, "loss": 2.3952, "step": 1297 }, { "epoch": 0.06963519313304721, "grad_norm": 0.392578125, "learning_rate": 2.321173104434907e-06, "loss": 2.1566, "step": 1298 }, { "epoch": 0.06968884120171674, "grad_norm": 0.609375, "learning_rate": 2.322961373390558e-06, "loss": 2.546, "step": 1299 }, { "epoch": 0.06974248927038626, "grad_norm": 0.3046875, "learning_rate": 2.324749642346209e-06, "loss": 2.5943, "step": 1300 }, { "epoch": 0.06979613733905579, "grad_norm": 0.33984375, "learning_rate": 2.3265379113018597e-06, "loss": 2.2548, "step": 1301 }, { "epoch": 0.06984978540772532, "grad_norm": 0.453125, "learning_rate": 2.3283261802575106e-06, "loss": 2.5333, "step": 1302 }, { "epoch": 0.06990343347639486, "grad_norm": 0.23828125, "learning_rate": 2.330114449213162e-06, "loss": 2.1772, "step": 1303 }, { "epoch": 0.06995708154506437, "grad_norm": 0.48828125, "learning_rate": 2.331902718168813e-06, "loss": 2.5498, "step": 1304 }, { "epoch": 0.0700107296137339, "grad_norm": 0.28125, "learning_rate": 2.3336909871244637e-06, "loss": 2.3038, "step": 1305 }, { "epoch": 0.07006437768240344, "grad_norm": 0.349609375, "learning_rate": 2.3354792560801145e-06, "loss": 2.48, "step": 1306 }, { "epoch": 0.07011802575107297, "grad_norm": 0.33203125, "learning_rate": 2.337267525035766e-06, "loss": 2.5535, "step": 1307 }, { "epoch": 0.07017167381974249, "grad_norm": 0.2890625, "learning_rate": 2.3390557939914167e-06, "loss": 2.2639, "step": 1308 }, { "epoch": 0.07022532188841202, "grad_norm": 0.400390625, "learning_rate": 2.3408440629470676e-06, "loss": 2.3274, "step": 1309 }, { "epoch": 0.07027896995708155, "grad_norm": 0.298828125, "learning_rate": 2.3426323319027185e-06, "loss": 2.4543, "step": 1310 }, { "epoch": 0.07033261802575107, "grad_norm": 0.341796875, "learning_rate": 2.3444206008583693e-06, "loss": 2.2669, "step": 1311 }, { "epoch": 0.0703862660944206, "grad_norm": 0.41796875, "learning_rate": 2.3462088698140202e-06, "loss": 2.3353, "step": 1312 }, { "epoch": 0.07043991416309013, "grad_norm": 0.6171875, "learning_rate": 2.347997138769671e-06, "loss": 2.3281, "step": 1313 }, { "epoch": 0.07049356223175966, "grad_norm": 0.47265625, "learning_rate": 2.349785407725322e-06, "loss": 2.4948, "step": 1314 }, { "epoch": 0.07054721030042918, "grad_norm": 0.353515625, "learning_rate": 2.351573676680973e-06, "loss": 2.2631, "step": 1315 }, { "epoch": 0.07060085836909871, "grad_norm": 0.283203125, "learning_rate": 2.3533619456366237e-06, "loss": 2.3839, "step": 1316 }, { "epoch": 0.07065450643776824, "grad_norm": 0.359375, "learning_rate": 2.355150214592275e-06, "loss": 2.0054, "step": 1317 }, { "epoch": 0.07070815450643776, "grad_norm": 0.390625, "learning_rate": 2.356938483547926e-06, "loss": 2.5686, "step": 1318 }, { "epoch": 0.0707618025751073, "grad_norm": 0.578125, "learning_rate": 2.3587267525035768e-06, "loss": 2.5402, "step": 1319 }, { "epoch": 0.07081545064377683, "grad_norm": 0.3515625, "learning_rate": 2.3605150214592277e-06, "loss": 2.5343, "step": 1320 }, { "epoch": 0.07086909871244636, "grad_norm": 0.330078125, "learning_rate": 2.3623032904148785e-06, "loss": 2.1977, "step": 1321 }, { "epoch": 0.07092274678111588, "grad_norm": 0.271484375, "learning_rate": 2.36409155937053e-06, "loss": 2.5157, "step": 1322 }, { "epoch": 0.0709763948497854, "grad_norm": 0.30859375, "learning_rate": 2.3658798283261807e-06, "loss": 2.3208, "step": 1323 }, { "epoch": 0.07103004291845494, "grad_norm": 0.40234375, "learning_rate": 2.3676680972818316e-06, "loss": 2.5653, "step": 1324 }, { "epoch": 0.07108369098712447, "grad_norm": 0.439453125, "learning_rate": 2.3694563662374825e-06, "loss": 2.4658, "step": 1325 }, { "epoch": 0.07113733905579399, "grad_norm": 0.5390625, "learning_rate": 2.3712446351931333e-06, "loss": 2.4376, "step": 1326 }, { "epoch": 0.07119098712446352, "grad_norm": 0.26953125, "learning_rate": 2.373032904148784e-06, "loss": 2.3002, "step": 1327 }, { "epoch": 0.07124463519313305, "grad_norm": 0.30859375, "learning_rate": 2.374821173104435e-06, "loss": 2.6897, "step": 1328 }, { "epoch": 0.07129828326180257, "grad_norm": 0.318359375, "learning_rate": 2.376609442060086e-06, "loss": 2.3498, "step": 1329 }, { "epoch": 0.0713519313304721, "grad_norm": 0.3671875, "learning_rate": 2.378397711015737e-06, "loss": 2.2547, "step": 1330 }, { "epoch": 0.07140557939914163, "grad_norm": 0.38671875, "learning_rate": 2.3801859799713877e-06, "loss": 2.2929, "step": 1331 }, { "epoch": 0.07145922746781116, "grad_norm": 0.294921875, "learning_rate": 2.381974248927039e-06, "loss": 2.3957, "step": 1332 }, { "epoch": 0.07151287553648068, "grad_norm": 0.375, "learning_rate": 2.38376251788269e-06, "loss": 2.5716, "step": 1333 }, { "epoch": 0.07156652360515021, "grad_norm": 0.310546875, "learning_rate": 2.3855507868383408e-06, "loss": 2.4233, "step": 1334 }, { "epoch": 0.07162017167381975, "grad_norm": 0.294921875, "learning_rate": 2.3873390557939916e-06, "loss": 2.2534, "step": 1335 }, { "epoch": 0.07167381974248926, "grad_norm": 0.44140625, "learning_rate": 2.3891273247496425e-06, "loss": 2.4689, "step": 1336 }, { "epoch": 0.0717274678111588, "grad_norm": 0.5234375, "learning_rate": 2.3909155937052934e-06, "loss": 2.3941, "step": 1337 }, { "epoch": 0.07178111587982833, "grad_norm": 0.29296875, "learning_rate": 2.3927038626609443e-06, "loss": 2.3894, "step": 1338 }, { "epoch": 0.07183476394849786, "grad_norm": 0.43359375, "learning_rate": 2.394492131616595e-06, "loss": 2.3029, "step": 1339 }, { "epoch": 0.07188841201716738, "grad_norm": 0.306640625, "learning_rate": 2.396280400572246e-06, "loss": 2.4447, "step": 1340 }, { "epoch": 0.07194206008583691, "grad_norm": 0.34375, "learning_rate": 2.398068669527897e-06, "loss": 2.4873, "step": 1341 }, { "epoch": 0.07199570815450644, "grad_norm": 0.443359375, "learning_rate": 2.399856938483548e-06, "loss": 2.2794, "step": 1342 }, { "epoch": 0.07204935622317596, "grad_norm": 0.306640625, "learning_rate": 2.401645207439199e-06, "loss": 2.126, "step": 1343 }, { "epoch": 0.07210300429184549, "grad_norm": 0.31640625, "learning_rate": 2.40343347639485e-06, "loss": 2.2857, "step": 1344 }, { "epoch": 0.07215665236051502, "grad_norm": 0.400390625, "learning_rate": 2.405221745350501e-06, "loss": 2.4254, "step": 1345 }, { "epoch": 0.07221030042918455, "grad_norm": 0.2734375, "learning_rate": 2.4070100143061517e-06, "loss": 2.2774, "step": 1346 }, { "epoch": 0.07226394849785407, "grad_norm": 0.35546875, "learning_rate": 2.408798283261803e-06, "loss": 2.4125, "step": 1347 }, { "epoch": 0.0723175965665236, "grad_norm": 0.3203125, "learning_rate": 2.410586552217454e-06, "loss": 2.3733, "step": 1348 }, { "epoch": 0.07237124463519314, "grad_norm": 0.474609375, "learning_rate": 2.4123748211731047e-06, "loss": 1.6031, "step": 1349 }, { "epoch": 0.07242489270386267, "grad_norm": 0.52734375, "learning_rate": 2.4141630901287556e-06, "loss": 2.4995, "step": 1350 }, { "epoch": 0.07247854077253219, "grad_norm": 1.3359375, "learning_rate": 2.4159513590844065e-06, "loss": 2.3552, "step": 1351 }, { "epoch": 0.07253218884120172, "grad_norm": 0.2451171875, "learning_rate": 2.4177396280400574e-06, "loss": 2.4324, "step": 1352 }, { "epoch": 0.07258583690987125, "grad_norm": 0.283203125, "learning_rate": 2.4195278969957083e-06, "loss": 2.4589, "step": 1353 }, { "epoch": 0.07263948497854077, "grad_norm": 0.53515625, "learning_rate": 2.421316165951359e-06, "loss": 2.5053, "step": 1354 }, { "epoch": 0.0726931330472103, "grad_norm": 0.388671875, "learning_rate": 2.42310443490701e-06, "loss": 2.3562, "step": 1355 }, { "epoch": 0.07274678111587983, "grad_norm": 0.33203125, "learning_rate": 2.424892703862661e-06, "loss": 2.2064, "step": 1356 }, { "epoch": 0.07280042918454936, "grad_norm": 0.306640625, "learning_rate": 2.426680972818312e-06, "loss": 2.2803, "step": 1357 }, { "epoch": 0.07285407725321888, "grad_norm": 0.396484375, "learning_rate": 2.428469241773963e-06, "loss": 2.4075, "step": 1358 }, { "epoch": 0.07290772532188841, "grad_norm": 0.416015625, "learning_rate": 2.430257510729614e-06, "loss": 2.5274, "step": 1359 }, { "epoch": 0.07296137339055794, "grad_norm": 0.33984375, "learning_rate": 2.432045779685265e-06, "loss": 2.3402, "step": 1360 }, { "epoch": 0.07301502145922746, "grad_norm": 0.302734375, "learning_rate": 2.4338340486409157e-06, "loss": 2.5289, "step": 1361 }, { "epoch": 0.07306866952789699, "grad_norm": 0.408203125, "learning_rate": 2.435622317596567e-06, "loss": 2.535, "step": 1362 }, { "epoch": 0.07312231759656652, "grad_norm": 0.28125, "learning_rate": 2.437410586552218e-06, "loss": 2.3481, "step": 1363 }, { "epoch": 0.07317596566523606, "grad_norm": 0.33203125, "learning_rate": 2.4391988555078687e-06, "loss": 2.2138, "step": 1364 }, { "epoch": 0.07322961373390557, "grad_norm": 0.37890625, "learning_rate": 2.4409871244635196e-06, "loss": 2.1971, "step": 1365 }, { "epoch": 0.0732832618025751, "grad_norm": 0.28125, "learning_rate": 2.4427753934191705e-06, "loss": 2.6494, "step": 1366 }, { "epoch": 0.07333690987124464, "grad_norm": 0.416015625, "learning_rate": 2.4445636623748214e-06, "loss": 1.6314, "step": 1367 }, { "epoch": 0.07339055793991417, "grad_norm": 0.376953125, "learning_rate": 2.4463519313304722e-06, "loss": 1.6725, "step": 1368 }, { "epoch": 0.07344420600858369, "grad_norm": 0.287109375, "learning_rate": 2.448140200286123e-06, "loss": 2.4225, "step": 1369 }, { "epoch": 0.07349785407725322, "grad_norm": 0.64453125, "learning_rate": 2.449928469241774e-06, "loss": 2.4131, "step": 1370 }, { "epoch": 0.07355150214592275, "grad_norm": 0.42578125, "learning_rate": 2.4517167381974253e-06, "loss": 2.2076, "step": 1371 }, { "epoch": 0.07360515021459227, "grad_norm": 0.28125, "learning_rate": 2.453505007153076e-06, "loss": 2.3243, "step": 1372 }, { "epoch": 0.0736587982832618, "grad_norm": 0.32421875, "learning_rate": 2.455293276108727e-06, "loss": 2.3296, "step": 1373 }, { "epoch": 0.07371244635193133, "grad_norm": 0.46875, "learning_rate": 2.457081545064378e-06, "loss": 2.4342, "step": 1374 }, { "epoch": 0.07376609442060086, "grad_norm": 0.4296875, "learning_rate": 2.458869814020029e-06, "loss": 2.2965, "step": 1375 }, { "epoch": 0.07381974248927038, "grad_norm": 0.34375, "learning_rate": 2.4606580829756797e-06, "loss": 1.9686, "step": 1376 }, { "epoch": 0.07387339055793991, "grad_norm": 0.482421875, "learning_rate": 2.4624463519313305e-06, "loss": 2.622, "step": 1377 }, { "epoch": 0.07392703862660945, "grad_norm": 0.271484375, "learning_rate": 2.4642346208869814e-06, "loss": 2.4454, "step": 1378 }, { "epoch": 0.07398068669527896, "grad_norm": 0.353515625, "learning_rate": 2.4660228898426323e-06, "loss": 2.3148, "step": 1379 }, { "epoch": 0.0740343347639485, "grad_norm": 0.337890625, "learning_rate": 2.467811158798283e-06, "loss": 2.4646, "step": 1380 }, { "epoch": 0.07408798283261803, "grad_norm": 0.26953125, "learning_rate": 2.4695994277539345e-06, "loss": 2.4344, "step": 1381 }, { "epoch": 0.07414163090128756, "grad_norm": 0.306640625, "learning_rate": 2.4713876967095853e-06, "loss": 2.2819, "step": 1382 }, { "epoch": 0.07419527896995708, "grad_norm": 0.3359375, "learning_rate": 2.4731759656652362e-06, "loss": 2.4617, "step": 1383 }, { "epoch": 0.07424892703862661, "grad_norm": 0.416015625, "learning_rate": 2.474964234620887e-06, "loss": 2.3197, "step": 1384 }, { "epoch": 0.07430257510729614, "grad_norm": 0.66015625, "learning_rate": 2.476752503576538e-06, "loss": 2.7264, "step": 1385 }, { "epoch": 0.07435622317596567, "grad_norm": 0.515625, "learning_rate": 2.4785407725321893e-06, "loss": 2.6231, "step": 1386 }, { "epoch": 0.07440987124463519, "grad_norm": 0.4140625, "learning_rate": 2.48032904148784e-06, "loss": 2.458, "step": 1387 }, { "epoch": 0.07446351931330472, "grad_norm": 0.3359375, "learning_rate": 2.482117310443491e-06, "loss": 2.3704, "step": 1388 }, { "epoch": 0.07451716738197425, "grad_norm": 0.3046875, "learning_rate": 2.483905579399142e-06, "loss": 2.1709, "step": 1389 }, { "epoch": 0.07457081545064377, "grad_norm": 0.365234375, "learning_rate": 2.4856938483547928e-06, "loss": 2.2664, "step": 1390 }, { "epoch": 0.0746244635193133, "grad_norm": 0.384765625, "learning_rate": 2.4874821173104437e-06, "loss": 2.3177, "step": 1391 }, { "epoch": 0.07467811158798283, "grad_norm": 0.35546875, "learning_rate": 2.4892703862660945e-06, "loss": 2.3361, "step": 1392 }, { "epoch": 0.07473175965665237, "grad_norm": 0.265625, "learning_rate": 2.4910586552217454e-06, "loss": 2.3965, "step": 1393 }, { "epoch": 0.07478540772532188, "grad_norm": 0.353515625, "learning_rate": 2.4928469241773963e-06, "loss": 2.5248, "step": 1394 }, { "epoch": 0.07483905579399142, "grad_norm": 0.466796875, "learning_rate": 2.494635193133047e-06, "loss": 2.479, "step": 1395 }, { "epoch": 0.07489270386266095, "grad_norm": 0.275390625, "learning_rate": 2.4964234620886985e-06, "loss": 2.5775, "step": 1396 }, { "epoch": 0.07494635193133047, "grad_norm": 0.83203125, "learning_rate": 2.4982117310443493e-06, "loss": 2.3551, "step": 1397 }, { "epoch": 0.075, "grad_norm": 0.32421875, "learning_rate": 2.5e-06, "loss": 2.2707, "step": 1398 }, { "epoch": 0.07505364806866953, "grad_norm": 0.35546875, "learning_rate": 2.501788268955651e-06, "loss": 2.4277, "step": 1399 }, { "epoch": 0.07510729613733906, "grad_norm": 0.341796875, "learning_rate": 2.503576537911302e-06, "loss": 2.4192, "step": 1400 }, { "epoch": 0.07516094420600858, "grad_norm": 0.2578125, "learning_rate": 2.505364806866953e-06, "loss": 2.2467, "step": 1401 }, { "epoch": 0.07521459227467811, "grad_norm": 0.443359375, "learning_rate": 2.5071530758226037e-06, "loss": 2.4671, "step": 1402 }, { "epoch": 0.07526824034334764, "grad_norm": 0.25390625, "learning_rate": 2.508941344778255e-06, "loss": 2.1597, "step": 1403 }, { "epoch": 0.07532188841201717, "grad_norm": 0.310546875, "learning_rate": 2.510729613733906e-06, "loss": 2.339, "step": 1404 }, { "epoch": 0.07537553648068669, "grad_norm": 0.3671875, "learning_rate": 2.5125178826895568e-06, "loss": 2.6584, "step": 1405 }, { "epoch": 0.07542918454935622, "grad_norm": 0.546875, "learning_rate": 2.5143061516452076e-06, "loss": 2.4126, "step": 1406 }, { "epoch": 0.07548283261802576, "grad_norm": 0.32421875, "learning_rate": 2.5160944206008585e-06, "loss": 2.3315, "step": 1407 }, { "epoch": 0.07553648068669527, "grad_norm": 0.279296875, "learning_rate": 2.5178826895565094e-06, "loss": 1.815, "step": 1408 }, { "epoch": 0.0755901287553648, "grad_norm": 0.353515625, "learning_rate": 2.5196709585121603e-06, "loss": 2.3813, "step": 1409 }, { "epoch": 0.07564377682403434, "grad_norm": 0.41796875, "learning_rate": 2.521459227467811e-06, "loss": 2.3991, "step": 1410 }, { "epoch": 0.07569742489270387, "grad_norm": 0.353515625, "learning_rate": 2.523247496423462e-06, "loss": 2.45, "step": 1411 }, { "epoch": 0.07575107296137339, "grad_norm": 0.265625, "learning_rate": 2.525035765379113e-06, "loss": 2.6294, "step": 1412 }, { "epoch": 0.07580472103004292, "grad_norm": 0.4609375, "learning_rate": 2.5268240343347646e-06, "loss": 2.1842, "step": 1413 }, { "epoch": 0.07585836909871245, "grad_norm": 0.32421875, "learning_rate": 2.5286123032904155e-06, "loss": 2.5732, "step": 1414 }, { "epoch": 0.07591201716738197, "grad_norm": 0.349609375, "learning_rate": 2.5304005722460664e-06, "loss": 2.4838, "step": 1415 }, { "epoch": 0.0759656652360515, "grad_norm": 0.55859375, "learning_rate": 2.5321888412017172e-06, "loss": 2.3502, "step": 1416 }, { "epoch": 0.07601931330472103, "grad_norm": 0.39453125, "learning_rate": 2.533977110157368e-06, "loss": 2.3725, "step": 1417 }, { "epoch": 0.07607296137339056, "grad_norm": 0.40625, "learning_rate": 2.535765379113019e-06, "loss": 2.3296, "step": 1418 }, { "epoch": 0.07612660944206008, "grad_norm": 0.298828125, "learning_rate": 2.53755364806867e-06, "loss": 2.3893, "step": 1419 }, { "epoch": 0.07618025751072961, "grad_norm": 0.306640625, "learning_rate": 2.5393419170243207e-06, "loss": 2.081, "step": 1420 }, { "epoch": 0.07623390557939914, "grad_norm": 2.1875, "learning_rate": 2.5411301859799716e-06, "loss": 2.5035, "step": 1421 }, { "epoch": 0.07628755364806868, "grad_norm": 0.3359375, "learning_rate": 2.542918454935622e-06, "loss": 2.4706, "step": 1422 }, { "epoch": 0.0763412017167382, "grad_norm": 0.404296875, "learning_rate": 2.544706723891274e-06, "loss": 2.0578, "step": 1423 }, { "epoch": 0.07639484978540773, "grad_norm": 0.326171875, "learning_rate": 2.5464949928469247e-06, "loss": 2.5513, "step": 1424 }, { "epoch": 0.07644849785407726, "grad_norm": 0.34765625, "learning_rate": 2.5482832618025755e-06, "loss": 2.2776, "step": 1425 }, { "epoch": 0.07650214592274678, "grad_norm": 0.294921875, "learning_rate": 2.5500715307582264e-06, "loss": 2.3373, "step": 1426 }, { "epoch": 0.07655579399141631, "grad_norm": 0.28515625, "learning_rate": 2.5518597997138773e-06, "loss": 2.3209, "step": 1427 }, { "epoch": 0.07660944206008584, "grad_norm": 0.35546875, "learning_rate": 2.553648068669528e-06, "loss": 2.237, "step": 1428 }, { "epoch": 0.07666309012875537, "grad_norm": 0.404296875, "learning_rate": 2.555436337625179e-06, "loss": 2.4875, "step": 1429 }, { "epoch": 0.07671673819742489, "grad_norm": 0.318359375, "learning_rate": 2.55722460658083e-06, "loss": 2.1972, "step": 1430 }, { "epoch": 0.07677038626609442, "grad_norm": 0.30078125, "learning_rate": 2.559012875536481e-06, "loss": 2.5334, "step": 1431 }, { "epoch": 0.07682403433476395, "grad_norm": 0.30078125, "learning_rate": 2.5608011444921317e-06, "loss": 1.9442, "step": 1432 }, { "epoch": 0.07687768240343347, "grad_norm": 0.337890625, "learning_rate": 2.562589413447783e-06, "loss": 2.1832, "step": 1433 }, { "epoch": 0.076931330472103, "grad_norm": 0.359375, "learning_rate": 2.564377682403434e-06, "loss": 2.6028, "step": 1434 }, { "epoch": 0.07698497854077253, "grad_norm": 0.60546875, "learning_rate": 2.5661659513590847e-06, "loss": 2.4501, "step": 1435 }, { "epoch": 0.07703862660944207, "grad_norm": 0.341796875, "learning_rate": 2.5679542203147356e-06, "loss": 2.6473, "step": 1436 }, { "epoch": 0.07709227467811158, "grad_norm": 0.361328125, "learning_rate": 2.5697424892703865e-06, "loss": 2.1789, "step": 1437 }, { "epoch": 0.07714592274678111, "grad_norm": 0.41796875, "learning_rate": 2.5715307582260374e-06, "loss": 1.6642, "step": 1438 }, { "epoch": 0.07719957081545065, "grad_norm": 0.40234375, "learning_rate": 2.5733190271816882e-06, "loss": 2.4271, "step": 1439 }, { "epoch": 0.07725321888412018, "grad_norm": 0.44140625, "learning_rate": 2.575107296137339e-06, "loss": 2.4993, "step": 1440 }, { "epoch": 0.0773068669527897, "grad_norm": 0.30859375, "learning_rate": 2.57689556509299e-06, "loss": 2.2405, "step": 1441 }, { "epoch": 0.07736051502145923, "grad_norm": 0.53125, "learning_rate": 2.578683834048641e-06, "loss": 2.4233, "step": 1442 }, { "epoch": 0.07741416309012876, "grad_norm": 0.416015625, "learning_rate": 2.580472103004292e-06, "loss": 2.3012, "step": 1443 }, { "epoch": 0.07746781115879828, "grad_norm": 0.390625, "learning_rate": 2.582260371959943e-06, "loss": 2.3787, "step": 1444 }, { "epoch": 0.07752145922746781, "grad_norm": 1.78125, "learning_rate": 2.584048640915594e-06, "loss": 2.6991, "step": 1445 }, { "epoch": 0.07757510729613734, "grad_norm": 0.34375, "learning_rate": 2.5858369098712448e-06, "loss": 2.1993, "step": 1446 }, { "epoch": 0.07762875536480687, "grad_norm": 0.41015625, "learning_rate": 2.5876251788268957e-06, "loss": 2.0444, "step": 1447 }, { "epoch": 0.07768240343347639, "grad_norm": 0.28125, "learning_rate": 2.5894134477825465e-06, "loss": 2.3355, "step": 1448 }, { "epoch": 0.07773605150214592, "grad_norm": 0.32421875, "learning_rate": 2.5912017167381974e-06, "loss": 1.9716, "step": 1449 }, { "epoch": 0.07778969957081545, "grad_norm": 0.388671875, "learning_rate": 2.5929899856938483e-06, "loss": 2.6277, "step": 1450 }, { "epoch": 0.07784334763948497, "grad_norm": 0.265625, "learning_rate": 2.594778254649499e-06, "loss": 2.413, "step": 1451 }, { "epoch": 0.0778969957081545, "grad_norm": 0.50390625, "learning_rate": 2.59656652360515e-06, "loss": 2.2826, "step": 1452 }, { "epoch": 0.07795064377682404, "grad_norm": 0.361328125, "learning_rate": 2.5983547925608018e-06, "loss": 2.2317, "step": 1453 }, { "epoch": 0.07800429184549357, "grad_norm": 0.333984375, "learning_rate": 2.6001430615164526e-06, "loss": 2.289, "step": 1454 }, { "epoch": 0.07805793991416309, "grad_norm": 0.275390625, "learning_rate": 2.6019313304721035e-06, "loss": 1.9588, "step": 1455 }, { "epoch": 0.07811158798283262, "grad_norm": 0.34375, "learning_rate": 2.6037195994277544e-06, "loss": 2.3168, "step": 1456 }, { "epoch": 0.07816523605150215, "grad_norm": 0.322265625, "learning_rate": 2.6055078683834053e-06, "loss": 2.4448, "step": 1457 }, { "epoch": 0.07821888412017168, "grad_norm": 0.482421875, "learning_rate": 2.607296137339056e-06, "loss": 1.7533, "step": 1458 }, { "epoch": 0.0782725321888412, "grad_norm": 0.443359375, "learning_rate": 2.609084406294707e-06, "loss": 1.5643, "step": 1459 }, { "epoch": 0.07832618025751073, "grad_norm": 0.419921875, "learning_rate": 2.610872675250358e-06, "loss": 2.4983, "step": 1460 }, { "epoch": 0.07837982832618026, "grad_norm": 0.462890625, "learning_rate": 2.6126609442060088e-06, "loss": 2.6497, "step": 1461 }, { "epoch": 0.07843347639484978, "grad_norm": 0.3515625, "learning_rate": 2.61444921316166e-06, "loss": 2.4169, "step": 1462 }, { "epoch": 0.07848712446351931, "grad_norm": 0.330078125, "learning_rate": 2.616237482117311e-06, "loss": 2.2239, "step": 1463 }, { "epoch": 0.07854077253218884, "grad_norm": 0.330078125, "learning_rate": 2.618025751072962e-06, "loss": 2.7294, "step": 1464 }, { "epoch": 0.07859442060085838, "grad_norm": 0.271484375, "learning_rate": 2.6198140200286127e-06, "loss": 2.5084, "step": 1465 }, { "epoch": 0.07864806866952789, "grad_norm": 0.380859375, "learning_rate": 2.6216022889842636e-06, "loss": 2.4401, "step": 1466 }, { "epoch": 0.07870171673819742, "grad_norm": 0.33203125, "learning_rate": 2.6233905579399144e-06, "loss": 2.5095, "step": 1467 }, { "epoch": 0.07875536480686696, "grad_norm": 0.34765625, "learning_rate": 2.6251788268955653e-06, "loss": 2.3893, "step": 1468 }, { "epoch": 0.07880901287553647, "grad_norm": 0.40234375, "learning_rate": 2.626967095851216e-06, "loss": 2.5088, "step": 1469 }, { "epoch": 0.078862660944206, "grad_norm": 0.248046875, "learning_rate": 2.628755364806867e-06, "loss": 2.4513, "step": 1470 }, { "epoch": 0.07891630901287554, "grad_norm": 0.2314453125, "learning_rate": 2.630543633762518e-06, "loss": 2.3734, "step": 1471 }, { "epoch": 0.07896995708154507, "grad_norm": 0.44921875, "learning_rate": 2.6323319027181693e-06, "loss": 2.7865, "step": 1472 }, { "epoch": 0.07902360515021459, "grad_norm": 0.279296875, "learning_rate": 2.63412017167382e-06, "loss": 2.2712, "step": 1473 }, { "epoch": 0.07907725321888412, "grad_norm": 0.52734375, "learning_rate": 2.635908440629471e-06, "loss": 2.4487, "step": 1474 }, { "epoch": 0.07913090128755365, "grad_norm": 0.306640625, "learning_rate": 2.637696709585122e-06, "loss": 2.4802, "step": 1475 }, { "epoch": 0.07918454935622318, "grad_norm": 0.2890625, "learning_rate": 2.6394849785407728e-06, "loss": 2.3237, "step": 1476 }, { "epoch": 0.0792381974248927, "grad_norm": 0.453125, "learning_rate": 2.6412732474964236e-06, "loss": 2.3525, "step": 1477 }, { "epoch": 0.07929184549356223, "grad_norm": 0.66015625, "learning_rate": 2.6430615164520745e-06, "loss": 2.3678, "step": 1478 }, { "epoch": 0.07934549356223176, "grad_norm": 1.015625, "learning_rate": 2.6448497854077254e-06, "loss": 2.2862, "step": 1479 }, { "epoch": 0.07939914163090128, "grad_norm": 0.275390625, "learning_rate": 2.6466380543633763e-06, "loss": 2.4603, "step": 1480 }, { "epoch": 0.07945278969957081, "grad_norm": 0.3515625, "learning_rate": 2.648426323319027e-06, "loss": 2.1415, "step": 1481 }, { "epoch": 0.07950643776824035, "grad_norm": 0.275390625, "learning_rate": 2.6502145922746784e-06, "loss": 2.406, "step": 1482 }, { "epoch": 0.07956008583690988, "grad_norm": 0.37109375, "learning_rate": 2.6520028612303293e-06, "loss": 2.2256, "step": 1483 }, { "epoch": 0.0796137339055794, "grad_norm": 0.255859375, "learning_rate": 2.65379113018598e-06, "loss": 2.1231, "step": 1484 }, { "epoch": 0.07966738197424893, "grad_norm": 0.34375, "learning_rate": 2.655579399141631e-06, "loss": 2.5046, "step": 1485 }, { "epoch": 0.07972103004291846, "grad_norm": 0.279296875, "learning_rate": 2.657367668097282e-06, "loss": 2.3656, "step": 1486 }, { "epoch": 0.07977467811158798, "grad_norm": 0.46875, "learning_rate": 2.659155937052933e-06, "loss": 2.2715, "step": 1487 }, { "epoch": 0.07982832618025751, "grad_norm": 0.56640625, "learning_rate": 2.6609442060085837e-06, "loss": 2.3546, "step": 1488 }, { "epoch": 0.07988197424892704, "grad_norm": 0.345703125, "learning_rate": 2.6627324749642346e-06, "loss": 2.1245, "step": 1489 }, { "epoch": 0.07993562231759657, "grad_norm": 0.302734375, "learning_rate": 2.6645207439198854e-06, "loss": 2.5736, "step": 1490 }, { "epoch": 0.07998927038626609, "grad_norm": 0.302734375, "learning_rate": 2.6663090128755363e-06, "loss": 2.4807, "step": 1491 }, { "epoch": 0.08004291845493562, "grad_norm": 0.359375, "learning_rate": 2.668097281831188e-06, "loss": 2.332, "step": 1492 }, { "epoch": 0.08009656652360515, "grad_norm": 0.287109375, "learning_rate": 2.669885550786839e-06, "loss": 2.4803, "step": 1493 }, { "epoch": 0.08015021459227469, "grad_norm": 0.255859375, "learning_rate": 2.67167381974249e-06, "loss": 2.2893, "step": 1494 }, { "epoch": 0.0802038626609442, "grad_norm": 0.3046875, "learning_rate": 2.6734620886981407e-06, "loss": 2.4047, "step": 1495 }, { "epoch": 0.08025751072961373, "grad_norm": 0.4765625, "learning_rate": 2.6752503576537915e-06, "loss": 2.3819, "step": 1496 }, { "epoch": 0.08031115879828327, "grad_norm": 0.53125, "learning_rate": 2.6770386266094424e-06, "loss": 2.1909, "step": 1497 }, { "epoch": 0.08036480686695278, "grad_norm": 0.3984375, "learning_rate": 2.6788268955650933e-06, "loss": 2.3897, "step": 1498 }, { "epoch": 0.08041845493562232, "grad_norm": 0.435546875, "learning_rate": 2.680615164520744e-06, "loss": 2.3626, "step": 1499 }, { "epoch": 0.08047210300429185, "grad_norm": 0.236328125, "learning_rate": 2.682403433476395e-06, "loss": 2.489, "step": 1500 }, { "epoch": 0.08052575107296138, "grad_norm": 0.453125, "learning_rate": 2.684191702432046e-06, "loss": 2.3638, "step": 1501 }, { "epoch": 0.0805793991416309, "grad_norm": 0.318359375, "learning_rate": 2.6859799713876972e-06, "loss": 2.2639, "step": 1502 }, { "epoch": 0.08063304721030043, "grad_norm": 0.390625, "learning_rate": 2.687768240343348e-06, "loss": 2.3295, "step": 1503 }, { "epoch": 0.08068669527896996, "grad_norm": 0.26953125, "learning_rate": 2.689556509298999e-06, "loss": 2.3914, "step": 1504 }, { "epoch": 0.08074034334763948, "grad_norm": 0.28125, "learning_rate": 2.69134477825465e-06, "loss": 2.1996, "step": 1505 }, { "epoch": 0.08079399141630901, "grad_norm": 0.318359375, "learning_rate": 2.6931330472103007e-06, "loss": 2.3858, "step": 1506 }, { "epoch": 0.08084763948497854, "grad_norm": 0.71875, "learning_rate": 2.6949213161659516e-06, "loss": 2.3786, "step": 1507 }, { "epoch": 0.08090128755364807, "grad_norm": 0.302734375, "learning_rate": 2.6967095851216025e-06, "loss": 2.6032, "step": 1508 }, { "epoch": 0.08095493562231759, "grad_norm": 0.28515625, "learning_rate": 2.6984978540772533e-06, "loss": 2.4609, "step": 1509 }, { "epoch": 0.08100858369098712, "grad_norm": 0.341796875, "learning_rate": 2.7002861230329042e-06, "loss": 2.5199, "step": 1510 }, { "epoch": 0.08106223175965666, "grad_norm": 0.298828125, "learning_rate": 2.702074391988555e-06, "loss": 2.2739, "step": 1511 }, { "epoch": 0.08111587982832617, "grad_norm": 0.328125, "learning_rate": 2.7038626609442064e-06, "loss": 2.3826, "step": 1512 }, { "epoch": 0.0811695278969957, "grad_norm": 0.73046875, "learning_rate": 2.7056509298998573e-06, "loss": 2.5041, "step": 1513 }, { "epoch": 0.08122317596566524, "grad_norm": 0.2490234375, "learning_rate": 2.707439198855508e-06, "loss": 2.5956, "step": 1514 }, { "epoch": 0.08127682403433477, "grad_norm": 0.26953125, "learning_rate": 2.709227467811159e-06, "loss": 2.6355, "step": 1515 }, { "epoch": 0.08133047210300429, "grad_norm": 0.283203125, "learning_rate": 2.71101573676681e-06, "loss": 2.513, "step": 1516 }, { "epoch": 0.08138412017167382, "grad_norm": 0.275390625, "learning_rate": 2.7128040057224608e-06, "loss": 2.3027, "step": 1517 }, { "epoch": 0.08143776824034335, "grad_norm": 0.3046875, "learning_rate": 2.7145922746781117e-06, "loss": 2.5787, "step": 1518 }, { "epoch": 0.08149141630901288, "grad_norm": 0.34765625, "learning_rate": 2.7163805436337625e-06, "loss": 2.4882, "step": 1519 }, { "epoch": 0.0815450643776824, "grad_norm": 0.400390625, "learning_rate": 2.7181688125894134e-06, "loss": 2.5983, "step": 1520 }, { "epoch": 0.08159871244635193, "grad_norm": 0.447265625, "learning_rate": 2.7199570815450643e-06, "loss": 2.4839, "step": 1521 }, { "epoch": 0.08165236051502146, "grad_norm": 0.5625, "learning_rate": 2.7217453505007156e-06, "loss": 2.4553, "step": 1522 }, { "epoch": 0.08170600858369098, "grad_norm": 0.328125, "learning_rate": 2.7235336194563665e-06, "loss": 2.5919, "step": 1523 }, { "epoch": 0.08175965665236051, "grad_norm": 0.3125, "learning_rate": 2.7253218884120173e-06, "loss": 2.46, "step": 1524 }, { "epoch": 0.08181330472103004, "grad_norm": 0.275390625, "learning_rate": 2.727110157367668e-06, "loss": 2.5501, "step": 1525 }, { "epoch": 0.08186695278969958, "grad_norm": 0.36328125, "learning_rate": 2.728898426323319e-06, "loss": 2.2296, "step": 1526 }, { "epoch": 0.0819206008583691, "grad_norm": 0.56640625, "learning_rate": 2.73068669527897e-06, "loss": 2.0532, "step": 1527 }, { "epoch": 0.08197424892703863, "grad_norm": 0.46875, "learning_rate": 2.732474964234621e-06, "loss": 2.6117, "step": 1528 }, { "epoch": 0.08202789699570816, "grad_norm": 0.302734375, "learning_rate": 2.7342632331902717e-06, "loss": 2.4008, "step": 1529 }, { "epoch": 0.08208154506437768, "grad_norm": 0.30859375, "learning_rate": 2.7360515021459226e-06, "loss": 1.9459, "step": 1530 }, { "epoch": 0.08213519313304721, "grad_norm": 0.2734375, "learning_rate": 2.7378397711015743e-06, "loss": 2.4995, "step": 1531 }, { "epoch": 0.08218884120171674, "grad_norm": 0.3046875, "learning_rate": 2.739628040057225e-06, "loss": 2.544, "step": 1532 }, { "epoch": 0.08224248927038627, "grad_norm": 0.33203125, "learning_rate": 2.741416309012876e-06, "loss": 2.3106, "step": 1533 }, { "epoch": 0.08229613733905579, "grad_norm": 0.34375, "learning_rate": 2.743204577968527e-06, "loss": 2.2351, "step": 1534 }, { "epoch": 0.08234978540772532, "grad_norm": 0.32421875, "learning_rate": 2.744992846924178e-06, "loss": 2.3834, "step": 1535 }, { "epoch": 0.08240343347639485, "grad_norm": 0.353515625, "learning_rate": 2.7467811158798287e-06, "loss": 2.1329, "step": 1536 }, { "epoch": 0.08245708154506438, "grad_norm": 0.341796875, "learning_rate": 2.7485693848354796e-06, "loss": 2.354, "step": 1537 }, { "epoch": 0.0825107296137339, "grad_norm": 0.3125, "learning_rate": 2.7503576537911304e-06, "loss": 2.3315, "step": 1538 }, { "epoch": 0.08256437768240343, "grad_norm": 0.2412109375, "learning_rate": 2.7521459227467813e-06, "loss": 2.18, "step": 1539 }, { "epoch": 0.08261802575107297, "grad_norm": 0.359375, "learning_rate": 2.753934191702432e-06, "loss": 2.3234, "step": 1540 }, { "epoch": 0.08267167381974248, "grad_norm": 0.36328125, "learning_rate": 2.7557224606580835e-06, "loss": 2.5048, "step": 1541 }, { "epoch": 0.08272532188841202, "grad_norm": 0.345703125, "learning_rate": 2.7575107296137344e-06, "loss": 2.6175, "step": 1542 }, { "epoch": 0.08277896995708155, "grad_norm": 0.5234375, "learning_rate": 2.7592989985693852e-06, "loss": 2.3178, "step": 1543 }, { "epoch": 0.08283261802575108, "grad_norm": 0.314453125, "learning_rate": 2.761087267525036e-06, "loss": 2.2476, "step": 1544 }, { "epoch": 0.0828862660944206, "grad_norm": 0.41796875, "learning_rate": 2.762875536480687e-06, "loss": 2.219, "step": 1545 }, { "epoch": 0.08293991416309013, "grad_norm": 0.248046875, "learning_rate": 2.764663805436338e-06, "loss": 2.3697, "step": 1546 }, { "epoch": 0.08299356223175966, "grad_norm": 0.5078125, "learning_rate": 2.7664520743919887e-06, "loss": 2.3612, "step": 1547 }, { "epoch": 0.08304721030042918, "grad_norm": 0.3671875, "learning_rate": 2.7682403433476396e-06, "loss": 2.4247, "step": 1548 }, { "epoch": 0.08310085836909871, "grad_norm": 0.271484375, "learning_rate": 2.7700286123032905e-06, "loss": 2.3883, "step": 1549 }, { "epoch": 0.08315450643776824, "grad_norm": 0.275390625, "learning_rate": 2.7718168812589414e-06, "loss": 2.4006, "step": 1550 }, { "epoch": 0.08320815450643777, "grad_norm": 1.171875, "learning_rate": 2.7736051502145927e-06, "loss": 2.6842, "step": 1551 }, { "epoch": 0.08326180257510729, "grad_norm": 0.3828125, "learning_rate": 2.7753934191702436e-06, "loss": 2.0033, "step": 1552 }, { "epoch": 0.08331545064377682, "grad_norm": 0.294921875, "learning_rate": 2.7771816881258944e-06, "loss": 2.4223, "step": 1553 }, { "epoch": 0.08336909871244635, "grad_norm": 0.294921875, "learning_rate": 2.7789699570815453e-06, "loss": 2.5069, "step": 1554 }, { "epoch": 0.08342274678111589, "grad_norm": 0.59375, "learning_rate": 2.780758226037196e-06, "loss": 2.3374, "step": 1555 }, { "epoch": 0.0834763948497854, "grad_norm": 0.291015625, "learning_rate": 2.782546494992847e-06, "loss": 2.2345, "step": 1556 }, { "epoch": 0.08353004291845494, "grad_norm": 0.703125, "learning_rate": 2.784334763948498e-06, "loss": 2.5835, "step": 1557 }, { "epoch": 0.08358369098712447, "grad_norm": 0.412109375, "learning_rate": 2.786123032904149e-06, "loss": 2.6222, "step": 1558 }, { "epoch": 0.08363733905579399, "grad_norm": 0.345703125, "learning_rate": 2.7879113018597997e-06, "loss": 2.4265, "step": 1559 }, { "epoch": 0.08369098712446352, "grad_norm": 0.34765625, "learning_rate": 2.7896995708154506e-06, "loss": 2.207, "step": 1560 }, { "epoch": 0.08374463519313305, "grad_norm": 0.31640625, "learning_rate": 2.791487839771102e-06, "loss": 2.4686, "step": 1561 }, { "epoch": 0.08379828326180258, "grad_norm": 0.26171875, "learning_rate": 2.7932761087267527e-06, "loss": 2.1425, "step": 1562 }, { "epoch": 0.0838519313304721, "grad_norm": 0.283203125, "learning_rate": 2.7950643776824036e-06, "loss": 2.5028, "step": 1563 }, { "epoch": 0.08390557939914163, "grad_norm": 0.330078125, "learning_rate": 2.7968526466380545e-06, "loss": 2.1965, "step": 1564 }, { "epoch": 0.08395922746781116, "grad_norm": 2.078125, "learning_rate": 2.7986409155937054e-06, "loss": 2.3909, "step": 1565 }, { "epoch": 0.08401287553648068, "grad_norm": 0.33984375, "learning_rate": 2.8004291845493562e-06, "loss": 2.1402, "step": 1566 }, { "epoch": 0.08406652360515021, "grad_norm": 0.2578125, "learning_rate": 2.802217453505007e-06, "loss": 2.2027, "step": 1567 }, { "epoch": 0.08412017167381974, "grad_norm": 0.4453125, "learning_rate": 2.804005722460658e-06, "loss": 2.4546, "step": 1568 }, { "epoch": 0.08417381974248928, "grad_norm": 0.365234375, "learning_rate": 2.805793991416309e-06, "loss": 2.2559, "step": 1569 }, { "epoch": 0.0842274678111588, "grad_norm": 0.326171875, "learning_rate": 2.8075822603719597e-06, "loss": 2.602, "step": 1570 }, { "epoch": 0.08428111587982832, "grad_norm": 0.2890625, "learning_rate": 2.8093705293276115e-06, "loss": 2.2797, "step": 1571 }, { "epoch": 0.08433476394849786, "grad_norm": 0.375, "learning_rate": 2.8111587982832623e-06, "loss": 2.1914, "step": 1572 }, { "epoch": 0.08438841201716739, "grad_norm": 0.478515625, "learning_rate": 2.8129470672389132e-06, "loss": 2.4491, "step": 1573 }, { "epoch": 0.0844420600858369, "grad_norm": 0.267578125, "learning_rate": 2.814735336194564e-06, "loss": 2.2267, "step": 1574 }, { "epoch": 0.08449570815450644, "grad_norm": 0.478515625, "learning_rate": 2.816523605150215e-06, "loss": 2.4832, "step": 1575 }, { "epoch": 0.08454935622317597, "grad_norm": 0.5859375, "learning_rate": 2.818311874105866e-06, "loss": 2.4369, "step": 1576 }, { "epoch": 0.08460300429184549, "grad_norm": 0.6796875, "learning_rate": 2.8201001430615167e-06, "loss": 2.3372, "step": 1577 }, { "epoch": 0.08465665236051502, "grad_norm": 0.412109375, "learning_rate": 2.8218884120171676e-06, "loss": 2.5336, "step": 1578 }, { "epoch": 0.08471030042918455, "grad_norm": 0.37890625, "learning_rate": 2.8236766809728185e-06, "loss": 2.3271, "step": 1579 }, { "epoch": 0.08476394849785408, "grad_norm": 0.2890625, "learning_rate": 2.8254649499284693e-06, "loss": 2.3997, "step": 1580 }, { "epoch": 0.0848175965665236, "grad_norm": 0.25, "learning_rate": 2.8272532188841206e-06, "loss": 2.2547, "step": 1581 }, { "epoch": 0.08487124463519313, "grad_norm": 0.283203125, "learning_rate": 2.8290414878397715e-06, "loss": 2.3256, "step": 1582 }, { "epoch": 0.08492489270386266, "grad_norm": 0.4453125, "learning_rate": 2.8308297567954224e-06, "loss": 2.2915, "step": 1583 }, { "epoch": 0.08497854077253218, "grad_norm": 0.353515625, "learning_rate": 2.8326180257510733e-06, "loss": 2.2529, "step": 1584 }, { "epoch": 0.08503218884120171, "grad_norm": 0.34375, "learning_rate": 2.834406294706724e-06, "loss": 2.3849, "step": 1585 }, { "epoch": 0.08508583690987125, "grad_norm": 0.59765625, "learning_rate": 2.836194563662375e-06, "loss": 2.4409, "step": 1586 }, { "epoch": 0.08513948497854078, "grad_norm": 0.46875, "learning_rate": 2.837982832618026e-06, "loss": 2.6106, "step": 1587 }, { "epoch": 0.0851931330472103, "grad_norm": 0.423828125, "learning_rate": 2.8397711015736768e-06, "loss": 2.6496, "step": 1588 }, { "epoch": 0.08524678111587983, "grad_norm": 0.6796875, "learning_rate": 2.8415593705293277e-06, "loss": 2.503, "step": 1589 }, { "epoch": 0.08530042918454936, "grad_norm": 0.33203125, "learning_rate": 2.843347639484979e-06, "loss": 2.2833, "step": 1590 }, { "epoch": 0.08535407725321889, "grad_norm": 0.5703125, "learning_rate": 2.84513590844063e-06, "loss": 2.5118, "step": 1591 }, { "epoch": 0.08540772532188841, "grad_norm": 0.416015625, "learning_rate": 2.8469241773962807e-06, "loss": 2.3155, "step": 1592 }, { "epoch": 0.08546137339055794, "grad_norm": 0.294921875, "learning_rate": 2.8487124463519316e-06, "loss": 2.4373, "step": 1593 }, { "epoch": 0.08551502145922747, "grad_norm": 0.283203125, "learning_rate": 2.8505007153075825e-06, "loss": 2.3812, "step": 1594 }, { "epoch": 0.08556866952789699, "grad_norm": 0.337890625, "learning_rate": 2.8522889842632333e-06, "loss": 2.3048, "step": 1595 }, { "epoch": 0.08562231759656652, "grad_norm": 0.328125, "learning_rate": 2.854077253218884e-06, "loss": 2.4165, "step": 1596 }, { "epoch": 0.08567596566523605, "grad_norm": 0.306640625, "learning_rate": 2.855865522174535e-06, "loss": 2.5981, "step": 1597 }, { "epoch": 0.08572961373390559, "grad_norm": 0.267578125, "learning_rate": 2.857653791130186e-06, "loss": 2.282, "step": 1598 }, { "epoch": 0.0857832618025751, "grad_norm": 0.3203125, "learning_rate": 2.859442060085837e-06, "loss": 2.2179, "step": 1599 }, { "epoch": 0.08583690987124463, "grad_norm": 0.296875, "learning_rate": 2.861230329041488e-06, "loss": 2.6469, "step": 1600 }, { "epoch": 0.08589055793991417, "grad_norm": 0.4140625, "learning_rate": 2.863018597997139e-06, "loss": 1.8423, "step": 1601 }, { "epoch": 0.08594420600858368, "grad_norm": 0.578125, "learning_rate": 2.86480686695279e-06, "loss": 2.5013, "step": 1602 }, { "epoch": 0.08599785407725322, "grad_norm": 0.390625, "learning_rate": 2.8665951359084408e-06, "loss": 2.5707, "step": 1603 }, { "epoch": 0.08605150214592275, "grad_norm": 0.482421875, "learning_rate": 2.8683834048640916e-06, "loss": 2.5692, "step": 1604 }, { "epoch": 0.08610515021459228, "grad_norm": 0.5078125, "learning_rate": 2.8701716738197425e-06, "loss": 2.49, "step": 1605 }, { "epoch": 0.0861587982832618, "grad_norm": 0.578125, "learning_rate": 2.8719599427753934e-06, "loss": 1.6159, "step": 1606 }, { "epoch": 0.08621244635193133, "grad_norm": 0.333984375, "learning_rate": 2.8737482117310443e-06, "loss": 2.4376, "step": 1607 }, { "epoch": 0.08626609442060086, "grad_norm": 0.2431640625, "learning_rate": 2.875536480686695e-06, "loss": 2.1662, "step": 1608 }, { "epoch": 0.08631974248927039, "grad_norm": 0.34375, "learning_rate": 2.877324749642346e-06, "loss": 2.355, "step": 1609 }, { "epoch": 0.08637339055793991, "grad_norm": 0.298828125, "learning_rate": 2.8791130185979977e-06, "loss": 2.4748, "step": 1610 }, { "epoch": 0.08642703862660944, "grad_norm": 0.328125, "learning_rate": 2.8809012875536486e-06, "loss": 2.4992, "step": 1611 }, { "epoch": 0.08648068669527897, "grad_norm": 0.29296875, "learning_rate": 2.8826895565092995e-06, "loss": 2.2948, "step": 1612 }, { "epoch": 0.08653433476394849, "grad_norm": 0.328125, "learning_rate": 2.8844778254649504e-06, "loss": 2.3368, "step": 1613 }, { "epoch": 0.08658798283261802, "grad_norm": 0.443359375, "learning_rate": 2.8862660944206012e-06, "loss": 2.511, "step": 1614 }, { "epoch": 0.08664163090128756, "grad_norm": 0.322265625, "learning_rate": 2.888054363376252e-06, "loss": 2.307, "step": 1615 }, { "epoch": 0.08669527896995709, "grad_norm": 0.8203125, "learning_rate": 2.889842632331903e-06, "loss": 2.4359, "step": 1616 }, { "epoch": 0.0867489270386266, "grad_norm": 0.322265625, "learning_rate": 2.891630901287554e-06, "loss": 2.6047, "step": 1617 }, { "epoch": 0.08680257510729614, "grad_norm": 0.5625, "learning_rate": 2.8934191702432047e-06, "loss": 2.3722, "step": 1618 }, { "epoch": 0.08685622317596567, "grad_norm": 0.294921875, "learning_rate": 2.8952074391988556e-06, "loss": 2.3401, "step": 1619 }, { "epoch": 0.08690987124463519, "grad_norm": 0.494140625, "learning_rate": 2.896995708154507e-06, "loss": 2.3208, "step": 1620 }, { "epoch": 0.08696351931330472, "grad_norm": 0.27734375, "learning_rate": 2.898783977110158e-06, "loss": 1.8771, "step": 1621 }, { "epoch": 0.08701716738197425, "grad_norm": 0.7265625, "learning_rate": 2.9005722460658087e-06, "loss": 2.3384, "step": 1622 }, { "epoch": 0.08707081545064378, "grad_norm": 0.365234375, "learning_rate": 2.9023605150214595e-06, "loss": 2.347, "step": 1623 }, { "epoch": 0.0871244635193133, "grad_norm": 0.31640625, "learning_rate": 2.9041487839771104e-06, "loss": 2.3725, "step": 1624 }, { "epoch": 0.08717811158798283, "grad_norm": 0.66796875, "learning_rate": 2.9059370529327613e-06, "loss": 2.3976, "step": 1625 }, { "epoch": 0.08723175965665236, "grad_norm": 0.30078125, "learning_rate": 2.907725321888412e-06, "loss": 2.302, "step": 1626 }, { "epoch": 0.0872854077253219, "grad_norm": 0.30859375, "learning_rate": 2.909513590844063e-06, "loss": 2.0829, "step": 1627 }, { "epoch": 0.08733905579399141, "grad_norm": 0.330078125, "learning_rate": 2.911301859799714e-06, "loss": 2.3819, "step": 1628 }, { "epoch": 0.08739270386266094, "grad_norm": 0.34375, "learning_rate": 2.913090128755365e-06, "loss": 2.1782, "step": 1629 }, { "epoch": 0.08744635193133048, "grad_norm": 0.5234375, "learning_rate": 2.914878397711016e-06, "loss": 2.2776, "step": 1630 }, { "epoch": 0.0875, "grad_norm": 0.275390625, "learning_rate": 2.916666666666667e-06, "loss": 2.3167, "step": 1631 }, { "epoch": 0.08755364806866953, "grad_norm": 0.337890625, "learning_rate": 2.918454935622318e-06, "loss": 2.3628, "step": 1632 }, { "epoch": 0.08760729613733906, "grad_norm": 0.7734375, "learning_rate": 2.9202432045779687e-06, "loss": 2.583, "step": 1633 }, { "epoch": 0.08766094420600859, "grad_norm": 0.2890625, "learning_rate": 2.9220314735336196e-06, "loss": 2.6267, "step": 1634 }, { "epoch": 0.08771459227467811, "grad_norm": 0.302734375, "learning_rate": 2.9238197424892705e-06, "loss": 2.3221, "step": 1635 }, { "epoch": 0.08776824034334764, "grad_norm": 0.3125, "learning_rate": 2.9256080114449214e-06, "loss": 2.3199, "step": 1636 }, { "epoch": 0.08782188841201717, "grad_norm": 0.353515625, "learning_rate": 2.9273962804005722e-06, "loss": 2.5893, "step": 1637 }, { "epoch": 0.08787553648068669, "grad_norm": 0.27734375, "learning_rate": 2.929184549356223e-06, "loss": 2.4188, "step": 1638 }, { "epoch": 0.08792918454935622, "grad_norm": 0.30859375, "learning_rate": 2.930972818311874e-06, "loss": 2.5456, "step": 1639 }, { "epoch": 0.08798283261802575, "grad_norm": 1.546875, "learning_rate": 2.9327610872675257e-06, "loss": 2.2658, "step": 1640 }, { "epoch": 0.08803648068669528, "grad_norm": 0.41796875, "learning_rate": 2.9345493562231766e-06, "loss": 2.4275, "step": 1641 }, { "epoch": 0.0880901287553648, "grad_norm": 0.28515625, "learning_rate": 2.9363376251788275e-06, "loss": 2.2089, "step": 1642 }, { "epoch": 0.08814377682403433, "grad_norm": 0.287109375, "learning_rate": 2.9381258941344783e-06, "loss": 2.2894, "step": 1643 }, { "epoch": 0.08819742489270387, "grad_norm": 0.328125, "learning_rate": 2.939914163090129e-06, "loss": 2.4265, "step": 1644 }, { "epoch": 0.0882510729613734, "grad_norm": 0.330078125, "learning_rate": 2.94170243204578e-06, "loss": 2.4809, "step": 1645 }, { "epoch": 0.08830472103004292, "grad_norm": 0.310546875, "learning_rate": 2.943490701001431e-06, "loss": 2.3953, "step": 1646 }, { "epoch": 0.08835836909871245, "grad_norm": 0.46484375, "learning_rate": 2.9452789699570814e-06, "loss": 1.7697, "step": 1647 }, { "epoch": 0.08841201716738198, "grad_norm": 0.2890625, "learning_rate": 2.9470672389127323e-06, "loss": 2.4435, "step": 1648 }, { "epoch": 0.0884656652360515, "grad_norm": 0.2734375, "learning_rate": 2.948855507868383e-06, "loss": 2.4252, "step": 1649 }, { "epoch": 0.08851931330472103, "grad_norm": 0.373046875, "learning_rate": 2.950643776824035e-06, "loss": 2.4754, "step": 1650 }, { "epoch": 0.08857296137339056, "grad_norm": 0.4609375, "learning_rate": 2.9524320457796858e-06, "loss": 2.6361, "step": 1651 }, { "epoch": 0.08862660944206009, "grad_norm": 0.287109375, "learning_rate": 2.9542203147353366e-06, "loss": 2.4172, "step": 1652 }, { "epoch": 0.08868025751072961, "grad_norm": 0.255859375, "learning_rate": 2.9560085836909875e-06, "loss": 2.2565, "step": 1653 }, { "epoch": 0.08873390557939914, "grad_norm": 0.365234375, "learning_rate": 2.9577968526466384e-06, "loss": 2.3482, "step": 1654 }, { "epoch": 0.08878755364806867, "grad_norm": 0.33203125, "learning_rate": 2.9595851216022893e-06, "loss": 2.3525, "step": 1655 }, { "epoch": 0.08884120171673819, "grad_norm": 0.341796875, "learning_rate": 2.96137339055794e-06, "loss": 2.6015, "step": 1656 }, { "epoch": 0.08889484978540772, "grad_norm": 0.30859375, "learning_rate": 2.963161659513591e-06, "loss": 2.2409, "step": 1657 }, { "epoch": 0.08894849785407725, "grad_norm": 0.296875, "learning_rate": 2.964949928469242e-06, "loss": 2.3734, "step": 1658 }, { "epoch": 0.08900214592274679, "grad_norm": 0.3984375, "learning_rate": 2.966738197424893e-06, "loss": 2.6253, "step": 1659 }, { "epoch": 0.0890557939914163, "grad_norm": 0.375, "learning_rate": 2.968526466380544e-06, "loss": 2.0491, "step": 1660 }, { "epoch": 0.08910944206008584, "grad_norm": 0.373046875, "learning_rate": 2.970314735336195e-06, "loss": 2.3519, "step": 1661 }, { "epoch": 0.08916309012875537, "grad_norm": 0.40625, "learning_rate": 2.972103004291846e-06, "loss": 2.4189, "step": 1662 }, { "epoch": 0.0892167381974249, "grad_norm": 0.447265625, "learning_rate": 2.9738912732474967e-06, "loss": 2.6889, "step": 1663 }, { "epoch": 0.08927038626609442, "grad_norm": 0.37890625, "learning_rate": 2.9756795422031476e-06, "loss": 2.4061, "step": 1664 }, { "epoch": 0.08932403433476395, "grad_norm": 0.29296875, "learning_rate": 2.9774678111587984e-06, "loss": 2.2944, "step": 1665 }, { "epoch": 0.08937768240343348, "grad_norm": 0.341796875, "learning_rate": 2.9792560801144493e-06, "loss": 2.4057, "step": 1666 }, { "epoch": 0.089431330472103, "grad_norm": 0.5703125, "learning_rate": 2.9810443490701e-06, "loss": 2.5046, "step": 1667 }, { "epoch": 0.08948497854077253, "grad_norm": 0.306640625, "learning_rate": 2.982832618025751e-06, "loss": 2.4419, "step": 1668 }, { "epoch": 0.08953862660944206, "grad_norm": 0.44921875, "learning_rate": 2.9846208869814024e-06, "loss": 2.1259, "step": 1669 }, { "epoch": 0.0895922746781116, "grad_norm": 0.50390625, "learning_rate": 2.9864091559370533e-06, "loss": 2.5487, "step": 1670 }, { "epoch": 0.08964592274678111, "grad_norm": 0.298828125, "learning_rate": 2.988197424892704e-06, "loss": 2.3095, "step": 1671 }, { "epoch": 0.08969957081545064, "grad_norm": 1.8203125, "learning_rate": 2.989985693848355e-06, "loss": 2.2443, "step": 1672 }, { "epoch": 0.08975321888412018, "grad_norm": 0.419921875, "learning_rate": 2.991773962804006e-06, "loss": 2.4987, "step": 1673 }, { "epoch": 0.0898068669527897, "grad_norm": 0.294921875, "learning_rate": 2.9935622317596568e-06, "loss": 2.513, "step": 1674 }, { "epoch": 0.08986051502145923, "grad_norm": 0.27734375, "learning_rate": 2.9953505007153076e-06, "loss": 2.574, "step": 1675 }, { "epoch": 0.08991416309012876, "grad_norm": 0.291015625, "learning_rate": 2.9971387696709585e-06, "loss": 2.2824, "step": 1676 }, { "epoch": 0.08996781115879829, "grad_norm": 0.53125, "learning_rate": 2.9989270386266094e-06, "loss": 2.5166, "step": 1677 }, { "epoch": 0.0900214592274678, "grad_norm": 0.46875, "learning_rate": 3.0007153075822603e-06, "loss": 2.4907, "step": 1678 }, { "epoch": 0.09007510729613734, "grad_norm": 0.375, "learning_rate": 3.002503576537912e-06, "loss": 2.2257, "step": 1679 }, { "epoch": 0.09012875536480687, "grad_norm": 0.349609375, "learning_rate": 3.004291845493563e-06, "loss": 2.1002, "step": 1680 }, { "epoch": 0.09018240343347639, "grad_norm": 0.322265625, "learning_rate": 3.0060801144492137e-06, "loss": 2.5217, "step": 1681 }, { "epoch": 0.09023605150214592, "grad_norm": 0.341796875, "learning_rate": 3.0078683834048646e-06, "loss": 2.471, "step": 1682 }, { "epoch": 0.09028969957081545, "grad_norm": 0.46484375, "learning_rate": 3.0096566523605155e-06, "loss": 2.5779, "step": 1683 }, { "epoch": 0.09034334763948498, "grad_norm": 0.306640625, "learning_rate": 3.0114449213161664e-06, "loss": 2.4124, "step": 1684 }, { "epoch": 0.0903969957081545, "grad_norm": 0.306640625, "learning_rate": 3.0132331902718172e-06, "loss": 2.1771, "step": 1685 }, { "epoch": 0.09045064377682403, "grad_norm": 0.443359375, "learning_rate": 3.015021459227468e-06, "loss": 1.8479, "step": 1686 }, { "epoch": 0.09050429184549356, "grad_norm": 0.765625, "learning_rate": 3.016809728183119e-06, "loss": 2.3868, "step": 1687 }, { "epoch": 0.0905579399141631, "grad_norm": 0.361328125, "learning_rate": 3.01859799713877e-06, "loss": 2.4749, "step": 1688 }, { "epoch": 0.09061158798283261, "grad_norm": 0.275390625, "learning_rate": 3.020386266094421e-06, "loss": 2.2583, "step": 1689 }, { "epoch": 0.09066523605150215, "grad_norm": 0.31640625, "learning_rate": 3.022174535050072e-06, "loss": 2.5223, "step": 1690 }, { "epoch": 0.09071888412017168, "grad_norm": 0.3359375, "learning_rate": 3.023962804005723e-06, "loss": 2.2676, "step": 1691 }, { "epoch": 0.0907725321888412, "grad_norm": 0.390625, "learning_rate": 3.025751072961374e-06, "loss": 2.4467, "step": 1692 }, { "epoch": 0.09082618025751073, "grad_norm": 0.365234375, "learning_rate": 3.0275393419170247e-06, "loss": 2.4149, "step": 1693 }, { "epoch": 0.09087982832618026, "grad_norm": 0.2734375, "learning_rate": 3.0293276108726755e-06, "loss": 2.2264, "step": 1694 }, { "epoch": 0.09093347639484979, "grad_norm": 0.30859375, "learning_rate": 3.0311158798283264e-06, "loss": 2.2556, "step": 1695 }, { "epoch": 0.09098712446351931, "grad_norm": 0.59765625, "learning_rate": 3.0329041487839773e-06, "loss": 1.8309, "step": 1696 }, { "epoch": 0.09104077253218884, "grad_norm": 0.330078125, "learning_rate": 3.034692417739628e-06, "loss": 2.3766, "step": 1697 }, { "epoch": 0.09109442060085837, "grad_norm": 0.31640625, "learning_rate": 3.036480686695279e-06, "loss": 2.3454, "step": 1698 }, { "epoch": 0.09114806866952789, "grad_norm": 0.416015625, "learning_rate": 3.0382689556509303e-06, "loss": 2.6418, "step": 1699 }, { "epoch": 0.09120171673819742, "grad_norm": 0.416015625, "learning_rate": 3.0400572246065812e-06, "loss": 2.3859, "step": 1700 }, { "epoch": 0.09125536480686695, "grad_norm": 0.275390625, "learning_rate": 3.041845493562232e-06, "loss": 2.5065, "step": 1701 }, { "epoch": 0.09130901287553649, "grad_norm": 0.2490234375, "learning_rate": 3.043633762517883e-06, "loss": 2.0341, "step": 1702 }, { "epoch": 0.091362660944206, "grad_norm": 0.328125, "learning_rate": 3.045422031473534e-06, "loss": 2.3479, "step": 1703 }, { "epoch": 0.09141630901287554, "grad_norm": 0.314453125, "learning_rate": 3.0472103004291847e-06, "loss": 2.178, "step": 1704 }, { "epoch": 0.09146995708154507, "grad_norm": 0.326171875, "learning_rate": 3.0489985693848356e-06, "loss": 2.2725, "step": 1705 }, { "epoch": 0.0915236051502146, "grad_norm": 0.267578125, "learning_rate": 3.0507868383404865e-06, "loss": 2.1962, "step": 1706 }, { "epoch": 0.09157725321888412, "grad_norm": 0.322265625, "learning_rate": 3.0525751072961373e-06, "loss": 2.4466, "step": 1707 }, { "epoch": 0.09163090128755365, "grad_norm": 0.287109375, "learning_rate": 3.0543633762517882e-06, "loss": 2.4715, "step": 1708 }, { "epoch": 0.09168454935622318, "grad_norm": 0.365234375, "learning_rate": 3.0561516452074395e-06, "loss": 2.428, "step": 1709 }, { "epoch": 0.0917381974248927, "grad_norm": 0.33203125, "learning_rate": 3.0579399141630904e-06, "loss": 2.4215, "step": 1710 }, { "epoch": 0.09179184549356223, "grad_norm": 0.3125, "learning_rate": 3.0597281831187413e-06, "loss": 2.3881, "step": 1711 }, { "epoch": 0.09184549356223176, "grad_norm": 0.326171875, "learning_rate": 3.061516452074392e-06, "loss": 2.3477, "step": 1712 }, { "epoch": 0.0918991416309013, "grad_norm": 0.490234375, "learning_rate": 3.063304721030043e-06, "loss": 2.3202, "step": 1713 }, { "epoch": 0.09195278969957081, "grad_norm": 0.34375, "learning_rate": 3.065092989985694e-06, "loss": 2.2663, "step": 1714 }, { "epoch": 0.09200643776824034, "grad_norm": 0.55859375, "learning_rate": 3.0668812589413448e-06, "loss": 1.7334, "step": 1715 }, { "epoch": 0.09206008583690987, "grad_norm": 0.4765625, "learning_rate": 3.0686695278969957e-06, "loss": 2.5669, "step": 1716 }, { "epoch": 0.09211373390557939, "grad_norm": 0.361328125, "learning_rate": 3.0704577968526465e-06, "loss": 2.0605, "step": 1717 }, { "epoch": 0.09216738197424892, "grad_norm": 0.6328125, "learning_rate": 3.0722460658082983e-06, "loss": 2.5894, "step": 1718 }, { "epoch": 0.09222103004291846, "grad_norm": 0.3515625, "learning_rate": 3.074034334763949e-06, "loss": 2.2887, "step": 1719 }, { "epoch": 0.09227467811158799, "grad_norm": 0.419921875, "learning_rate": 3.0758226037196e-06, "loss": 2.4736, "step": 1720 }, { "epoch": 0.0923283261802575, "grad_norm": 0.287109375, "learning_rate": 3.077610872675251e-06, "loss": 2.5362, "step": 1721 }, { "epoch": 0.09238197424892704, "grad_norm": 0.287109375, "learning_rate": 3.0793991416309018e-06, "loss": 2.4599, "step": 1722 }, { "epoch": 0.09243562231759657, "grad_norm": 0.37890625, "learning_rate": 3.0811874105865526e-06, "loss": 2.4554, "step": 1723 }, { "epoch": 0.0924892703862661, "grad_norm": 0.3828125, "learning_rate": 3.0829756795422035e-06, "loss": 2.3486, "step": 1724 }, { "epoch": 0.09254291845493562, "grad_norm": 0.306640625, "learning_rate": 3.0847639484978544e-06, "loss": 2.5251, "step": 1725 }, { "epoch": 0.09259656652360515, "grad_norm": 0.2451171875, "learning_rate": 3.0865522174535053e-06, "loss": 2.2678, "step": 1726 }, { "epoch": 0.09265021459227468, "grad_norm": 0.396484375, "learning_rate": 3.088340486409156e-06, "loss": 2.5122, "step": 1727 }, { "epoch": 0.0927038626609442, "grad_norm": 0.283203125, "learning_rate": 3.0901287553648074e-06, "loss": 2.2722, "step": 1728 }, { "epoch": 0.09275751072961373, "grad_norm": 0.53125, "learning_rate": 3.0919170243204583e-06, "loss": 2.2476, "step": 1729 }, { "epoch": 0.09281115879828326, "grad_norm": 0.333984375, "learning_rate": 3.093705293276109e-06, "loss": 2.6554, "step": 1730 }, { "epoch": 0.0928648068669528, "grad_norm": 0.3515625, "learning_rate": 3.09549356223176e-06, "loss": 2.0587, "step": 1731 }, { "epoch": 0.09291845493562231, "grad_norm": 0.3125, "learning_rate": 3.097281831187411e-06, "loss": 2.4185, "step": 1732 }, { "epoch": 0.09297210300429185, "grad_norm": 0.92578125, "learning_rate": 3.099070100143062e-06, "loss": 2.2311, "step": 1733 }, { "epoch": 0.09302575107296138, "grad_norm": 0.283203125, "learning_rate": 3.1008583690987127e-06, "loss": 2.449, "step": 1734 }, { "epoch": 0.0930793991416309, "grad_norm": 0.388671875, "learning_rate": 3.1026466380543636e-06, "loss": 2.2951, "step": 1735 }, { "epoch": 0.09313304721030043, "grad_norm": 0.435546875, "learning_rate": 3.1044349070100144e-06, "loss": 1.8554, "step": 1736 }, { "epoch": 0.09318669527896996, "grad_norm": 0.302734375, "learning_rate": 3.1062231759656653e-06, "loss": 2.2981, "step": 1737 }, { "epoch": 0.09324034334763949, "grad_norm": 0.3515625, "learning_rate": 3.1080114449213166e-06, "loss": 2.4538, "step": 1738 }, { "epoch": 0.09329399141630901, "grad_norm": 0.384765625, "learning_rate": 3.1097997138769675e-06, "loss": 2.2129, "step": 1739 }, { "epoch": 0.09334763948497854, "grad_norm": 0.375, "learning_rate": 3.1115879828326184e-06, "loss": 2.2269, "step": 1740 }, { "epoch": 0.09340128755364807, "grad_norm": 0.361328125, "learning_rate": 3.1133762517882692e-06, "loss": 2.0675, "step": 1741 }, { "epoch": 0.0934549356223176, "grad_norm": 0.45703125, "learning_rate": 3.11516452074392e-06, "loss": 2.0077, "step": 1742 }, { "epoch": 0.09350858369098712, "grad_norm": 0.298828125, "learning_rate": 3.116952789699571e-06, "loss": 2.5025, "step": 1743 }, { "epoch": 0.09356223175965665, "grad_norm": 0.353515625, "learning_rate": 3.118741058655222e-06, "loss": 2.1936, "step": 1744 }, { "epoch": 0.09361587982832618, "grad_norm": 0.306640625, "learning_rate": 3.1205293276108727e-06, "loss": 2.4738, "step": 1745 }, { "epoch": 0.0936695278969957, "grad_norm": 0.2578125, "learning_rate": 3.1223175965665236e-06, "loss": 2.3554, "step": 1746 }, { "epoch": 0.09372317596566523, "grad_norm": 0.283203125, "learning_rate": 3.1241058655221745e-06, "loss": 2.4081, "step": 1747 }, { "epoch": 0.09377682403433477, "grad_norm": 0.375, "learning_rate": 3.125894134477826e-06, "loss": 2.671, "step": 1748 }, { "epoch": 0.0938304721030043, "grad_norm": 0.375, "learning_rate": 3.1276824034334767e-06, "loss": 2.3079, "step": 1749 }, { "epoch": 0.09388412017167382, "grad_norm": 0.30078125, "learning_rate": 3.1294706723891276e-06, "loss": 2.2735, "step": 1750 }, { "epoch": 0.09393776824034335, "grad_norm": 0.291015625, "learning_rate": 3.1312589413447784e-06, "loss": 2.4347, "step": 1751 }, { "epoch": 0.09399141630901288, "grad_norm": 0.294921875, "learning_rate": 3.1330472103004293e-06, "loss": 2.4219, "step": 1752 }, { "epoch": 0.0940450643776824, "grad_norm": 0.30078125, "learning_rate": 3.13483547925608e-06, "loss": 2.2839, "step": 1753 }, { "epoch": 0.09409871244635193, "grad_norm": 0.3203125, "learning_rate": 3.136623748211731e-06, "loss": 2.3425, "step": 1754 }, { "epoch": 0.09415236051502146, "grad_norm": 0.28125, "learning_rate": 3.138412017167382e-06, "loss": 2.4336, "step": 1755 }, { "epoch": 0.09420600858369099, "grad_norm": 0.345703125, "learning_rate": 3.140200286123033e-06, "loss": 2.583, "step": 1756 }, { "epoch": 0.09425965665236051, "grad_norm": 0.44140625, "learning_rate": 3.1419885550786837e-06, "loss": 2.4741, "step": 1757 }, { "epoch": 0.09431330472103004, "grad_norm": 0.384765625, "learning_rate": 3.1437768240343354e-06, "loss": 2.7355, "step": 1758 }, { "epoch": 0.09436695278969957, "grad_norm": 0.439453125, "learning_rate": 3.1455650929899863e-06, "loss": 2.3669, "step": 1759 }, { "epoch": 0.0944206008583691, "grad_norm": 0.35546875, "learning_rate": 3.147353361945637e-06, "loss": 2.1677, "step": 1760 }, { "epoch": 0.09447424892703862, "grad_norm": 0.267578125, "learning_rate": 3.149141630901288e-06, "loss": 2.4743, "step": 1761 }, { "epoch": 0.09452789699570815, "grad_norm": 0.365234375, "learning_rate": 3.150929899856939e-06, "loss": 2.588, "step": 1762 }, { "epoch": 0.09458154506437769, "grad_norm": 0.283203125, "learning_rate": 3.1527181688125898e-06, "loss": 2.2236, "step": 1763 }, { "epoch": 0.0946351931330472, "grad_norm": 0.3046875, "learning_rate": 3.1545064377682407e-06, "loss": 2.251, "step": 1764 }, { "epoch": 0.09468884120171674, "grad_norm": 0.3046875, "learning_rate": 3.1562947067238915e-06, "loss": 2.2167, "step": 1765 }, { "epoch": 0.09474248927038627, "grad_norm": 0.298828125, "learning_rate": 3.1580829756795424e-06, "loss": 2.3791, "step": 1766 }, { "epoch": 0.0947961373390558, "grad_norm": 0.337890625, "learning_rate": 3.1598712446351933e-06, "loss": 1.9216, "step": 1767 }, { "epoch": 0.09484978540772532, "grad_norm": 1.21875, "learning_rate": 3.1616595135908446e-06, "loss": 2.3138, "step": 1768 }, { "epoch": 0.09490343347639485, "grad_norm": 0.283203125, "learning_rate": 3.1634477825464955e-06, "loss": 2.3739, "step": 1769 }, { "epoch": 0.09495708154506438, "grad_norm": 0.296875, "learning_rate": 3.1652360515021463e-06, "loss": 2.6038, "step": 1770 }, { "epoch": 0.0950107296137339, "grad_norm": 0.30859375, "learning_rate": 3.1670243204577972e-06, "loss": 2.5266, "step": 1771 }, { "epoch": 0.09506437768240343, "grad_norm": 0.283203125, "learning_rate": 3.168812589413448e-06, "loss": 2.4506, "step": 1772 }, { "epoch": 0.09511802575107296, "grad_norm": 0.28125, "learning_rate": 3.170600858369099e-06, "loss": 2.4585, "step": 1773 }, { "epoch": 0.0951716738197425, "grad_norm": 0.455078125, "learning_rate": 3.17238912732475e-06, "loss": 2.1654, "step": 1774 }, { "epoch": 0.09522532188841201, "grad_norm": 0.412109375, "learning_rate": 3.1741773962804007e-06, "loss": 2.4272, "step": 1775 }, { "epoch": 0.09527896995708154, "grad_norm": 0.310546875, "learning_rate": 3.1759656652360516e-06, "loss": 2.5441, "step": 1776 }, { "epoch": 0.09533261802575108, "grad_norm": 0.302734375, "learning_rate": 3.1777539341917025e-06, "loss": 2.5528, "step": 1777 }, { "epoch": 0.09538626609442061, "grad_norm": 0.328125, "learning_rate": 3.1795422031473538e-06, "loss": 2.1644, "step": 1778 }, { "epoch": 0.09543991416309013, "grad_norm": 0.2890625, "learning_rate": 3.1813304721030046e-06, "loss": 2.2783, "step": 1779 }, { "epoch": 0.09549356223175966, "grad_norm": 0.29296875, "learning_rate": 3.1831187410586555e-06, "loss": 2.3409, "step": 1780 }, { "epoch": 0.09554721030042919, "grad_norm": 0.328125, "learning_rate": 3.1849070100143064e-06, "loss": 2.3533, "step": 1781 }, { "epoch": 0.0956008583690987, "grad_norm": 0.265625, "learning_rate": 3.1866952789699573e-06, "loss": 2.2971, "step": 1782 }, { "epoch": 0.09565450643776824, "grad_norm": 0.404296875, "learning_rate": 3.188483547925608e-06, "loss": 2.3262, "step": 1783 }, { "epoch": 0.09570815450643777, "grad_norm": 0.78125, "learning_rate": 3.190271816881259e-06, "loss": 2.4741, "step": 1784 }, { "epoch": 0.0957618025751073, "grad_norm": 0.3359375, "learning_rate": 3.19206008583691e-06, "loss": 2.3702, "step": 1785 }, { "epoch": 0.09581545064377682, "grad_norm": 0.62890625, "learning_rate": 3.1938483547925608e-06, "loss": 1.6672, "step": 1786 }, { "epoch": 0.09586909871244635, "grad_norm": 0.4140625, "learning_rate": 3.195636623748212e-06, "loss": 2.1808, "step": 1787 }, { "epoch": 0.09592274678111588, "grad_norm": 1.0546875, "learning_rate": 3.197424892703863e-06, "loss": 2.1714, "step": 1788 }, { "epoch": 0.0959763948497854, "grad_norm": 0.349609375, "learning_rate": 3.199213161659514e-06, "loss": 2.555, "step": 1789 }, { "epoch": 0.09603004291845493, "grad_norm": 0.34375, "learning_rate": 3.2010014306151647e-06, "loss": 2.4018, "step": 1790 }, { "epoch": 0.09608369098712446, "grad_norm": 0.3046875, "learning_rate": 3.2027896995708156e-06, "loss": 2.2365, "step": 1791 }, { "epoch": 0.096137339055794, "grad_norm": 0.58984375, "learning_rate": 3.2045779685264665e-06, "loss": 2.2972, "step": 1792 }, { "epoch": 0.09619098712446351, "grad_norm": 0.3203125, "learning_rate": 3.2063662374821173e-06, "loss": 2.3763, "step": 1793 }, { "epoch": 0.09624463519313305, "grad_norm": 0.408203125, "learning_rate": 3.208154506437768e-06, "loss": 1.6084, "step": 1794 }, { "epoch": 0.09629828326180258, "grad_norm": 0.29296875, "learning_rate": 3.209942775393419e-06, "loss": 2.316, "step": 1795 }, { "epoch": 0.09635193133047211, "grad_norm": 0.3203125, "learning_rate": 3.21173104434907e-06, "loss": 2.3623, "step": 1796 }, { "epoch": 0.09640557939914163, "grad_norm": 0.361328125, "learning_rate": 3.2135193133047217e-06, "loss": 2.4884, "step": 1797 }, { "epoch": 0.09645922746781116, "grad_norm": 0.419921875, "learning_rate": 3.2153075822603726e-06, "loss": 2.3853, "step": 1798 }, { "epoch": 0.09651287553648069, "grad_norm": 0.326171875, "learning_rate": 3.2170958512160234e-06, "loss": 2.3767, "step": 1799 }, { "epoch": 0.09656652360515021, "grad_norm": 0.3125, "learning_rate": 3.2188841201716743e-06, "loss": 2.3763, "step": 1800 }, { "epoch": 0.09662017167381974, "grad_norm": 0.416015625, "learning_rate": 3.220672389127325e-06, "loss": 2.2875, "step": 1801 }, { "epoch": 0.09667381974248927, "grad_norm": 0.453125, "learning_rate": 3.222460658082976e-06, "loss": 2.1212, "step": 1802 }, { "epoch": 0.0967274678111588, "grad_norm": 0.30859375, "learning_rate": 3.224248927038627e-06, "loss": 2.3382, "step": 1803 }, { "epoch": 0.09678111587982832, "grad_norm": 0.37109375, "learning_rate": 3.226037195994278e-06, "loss": 2.2166, "step": 1804 }, { "epoch": 0.09683476394849785, "grad_norm": 0.341796875, "learning_rate": 3.2278254649499287e-06, "loss": 2.3092, "step": 1805 }, { "epoch": 0.09688841201716739, "grad_norm": 1.125, "learning_rate": 3.2296137339055796e-06, "loss": 1.9457, "step": 1806 }, { "epoch": 0.0969420600858369, "grad_norm": 0.314453125, "learning_rate": 3.231402002861231e-06, "loss": 2.4845, "step": 1807 }, { "epoch": 0.09699570815450644, "grad_norm": 0.3125, "learning_rate": 3.2331902718168817e-06, "loss": 2.4129, "step": 1808 }, { "epoch": 0.09704935622317597, "grad_norm": 0.314453125, "learning_rate": 3.2349785407725326e-06, "loss": 2.3114, "step": 1809 }, { "epoch": 0.0971030042918455, "grad_norm": 0.380859375, "learning_rate": 3.2367668097281835e-06, "loss": 2.3205, "step": 1810 }, { "epoch": 0.09715665236051502, "grad_norm": 0.310546875, "learning_rate": 3.2385550786838344e-06, "loss": 2.2329, "step": 1811 }, { "epoch": 0.09721030042918455, "grad_norm": 0.34765625, "learning_rate": 3.2403433476394852e-06, "loss": 2.4674, "step": 1812 }, { "epoch": 0.09726394849785408, "grad_norm": 0.294921875, "learning_rate": 3.242131616595136e-06, "loss": 2.433, "step": 1813 }, { "epoch": 0.09731759656652361, "grad_norm": 0.287109375, "learning_rate": 3.243919885550787e-06, "loss": 2.3941, "step": 1814 }, { "epoch": 0.09737124463519313, "grad_norm": 0.453125, "learning_rate": 3.245708154506438e-06, "loss": 2.3346, "step": 1815 }, { "epoch": 0.09742489270386266, "grad_norm": 0.36328125, "learning_rate": 3.2474964234620887e-06, "loss": 2.3223, "step": 1816 }, { "epoch": 0.0974785407725322, "grad_norm": 0.68359375, "learning_rate": 3.24928469241774e-06, "loss": 2.1036, "step": 1817 }, { "epoch": 0.09753218884120171, "grad_norm": 0.380859375, "learning_rate": 3.251072961373391e-06, "loss": 2.442, "step": 1818 }, { "epoch": 0.09758583690987124, "grad_norm": 0.330078125, "learning_rate": 3.252861230329042e-06, "loss": 2.4005, "step": 1819 }, { "epoch": 0.09763948497854077, "grad_norm": 0.44140625, "learning_rate": 3.2546494992846927e-06, "loss": 2.4433, "step": 1820 }, { "epoch": 0.0976931330472103, "grad_norm": 0.47265625, "learning_rate": 3.2564377682403435e-06, "loss": 2.0415, "step": 1821 }, { "epoch": 0.09774678111587982, "grad_norm": 0.390625, "learning_rate": 3.2582260371959944e-06, "loss": 2.6357, "step": 1822 }, { "epoch": 0.09780042918454936, "grad_norm": 0.28515625, "learning_rate": 3.2600143061516453e-06, "loss": 2.4659, "step": 1823 }, { "epoch": 0.09785407725321889, "grad_norm": 0.65234375, "learning_rate": 3.261802575107296e-06, "loss": 2.2302, "step": 1824 }, { "epoch": 0.0979077253218884, "grad_norm": 0.2490234375, "learning_rate": 3.263590844062947e-06, "loss": 2.1841, "step": 1825 }, { "epoch": 0.09796137339055794, "grad_norm": 0.36328125, "learning_rate": 3.265379113018598e-06, "loss": 2.4522, "step": 1826 }, { "epoch": 0.09801502145922747, "grad_norm": 0.275390625, "learning_rate": 3.2671673819742492e-06, "loss": 2.3899, "step": 1827 }, { "epoch": 0.098068669527897, "grad_norm": 1.4765625, "learning_rate": 3.2689556509299e-06, "loss": 2.5442, "step": 1828 }, { "epoch": 0.09812231759656652, "grad_norm": 0.291015625, "learning_rate": 3.270743919885551e-06, "loss": 2.4684, "step": 1829 }, { "epoch": 0.09817596566523605, "grad_norm": 0.5625, "learning_rate": 3.272532188841202e-06, "loss": 2.4278, "step": 1830 }, { "epoch": 0.09822961373390558, "grad_norm": 0.275390625, "learning_rate": 3.2743204577968527e-06, "loss": 2.2508, "step": 1831 }, { "epoch": 0.0982832618025751, "grad_norm": 0.353515625, "learning_rate": 3.2761087267525036e-06, "loss": 2.3228, "step": 1832 }, { "epoch": 0.09833690987124463, "grad_norm": 0.296875, "learning_rate": 3.2778969957081545e-06, "loss": 2.2496, "step": 1833 }, { "epoch": 0.09839055793991416, "grad_norm": 0.345703125, "learning_rate": 3.2796852646638054e-06, "loss": 2.5139, "step": 1834 }, { "epoch": 0.0984442060085837, "grad_norm": 0.318359375, "learning_rate": 3.2814735336194562e-06, "loss": 2.0646, "step": 1835 }, { "epoch": 0.09849785407725321, "grad_norm": 0.271484375, "learning_rate": 3.283261802575107e-06, "loss": 2.469, "step": 1836 }, { "epoch": 0.09855150214592275, "grad_norm": 0.337890625, "learning_rate": 3.285050071530759e-06, "loss": 2.4398, "step": 1837 }, { "epoch": 0.09860515021459228, "grad_norm": 0.306640625, "learning_rate": 3.2868383404864097e-06, "loss": 2.172, "step": 1838 }, { "epoch": 0.09865879828326181, "grad_norm": 0.326171875, "learning_rate": 3.2886266094420606e-06, "loss": 2.5456, "step": 1839 }, { "epoch": 0.09871244635193133, "grad_norm": 0.365234375, "learning_rate": 3.2904148783977115e-06, "loss": 2.3088, "step": 1840 }, { "epoch": 0.09876609442060086, "grad_norm": 0.302734375, "learning_rate": 3.2922031473533623e-06, "loss": 2.3013, "step": 1841 }, { "epoch": 0.09881974248927039, "grad_norm": 0.384765625, "learning_rate": 3.293991416309013e-06, "loss": 2.5251, "step": 1842 }, { "epoch": 0.09887339055793991, "grad_norm": 0.4140625, "learning_rate": 3.295779685264664e-06, "loss": 2.2745, "step": 1843 }, { "epoch": 0.09892703862660944, "grad_norm": 0.310546875, "learning_rate": 3.297567954220315e-06, "loss": 1.8906, "step": 1844 }, { "epoch": 0.09898068669527897, "grad_norm": 0.30859375, "learning_rate": 3.299356223175966e-06, "loss": 2.4155, "step": 1845 }, { "epoch": 0.0990343347639485, "grad_norm": 0.271484375, "learning_rate": 3.301144492131617e-06, "loss": 2.4911, "step": 1846 }, { "epoch": 0.09908798283261802, "grad_norm": 0.3984375, "learning_rate": 3.302932761087268e-06, "loss": 2.2057, "step": 1847 }, { "epoch": 0.09914163090128755, "grad_norm": 0.48828125, "learning_rate": 3.304721030042919e-06, "loss": 2.2291, "step": 1848 }, { "epoch": 0.09919527896995708, "grad_norm": 0.3046875, "learning_rate": 3.3065092989985698e-06, "loss": 2.1683, "step": 1849 }, { "epoch": 0.0992489270386266, "grad_norm": 0.392578125, "learning_rate": 3.3082975679542206e-06, "loss": 2.0233, "step": 1850 }, { "epoch": 0.09930257510729613, "grad_norm": 0.33203125, "learning_rate": 3.3100858369098715e-06, "loss": 2.6861, "step": 1851 }, { "epoch": 0.09935622317596567, "grad_norm": 1.15625, "learning_rate": 3.3118741058655224e-06, "loss": 2.5563, "step": 1852 }, { "epoch": 0.0994098712446352, "grad_norm": 0.458984375, "learning_rate": 3.3136623748211733e-06, "loss": 2.322, "step": 1853 }, { "epoch": 0.09946351931330472, "grad_norm": 0.87890625, "learning_rate": 3.315450643776824e-06, "loss": 2.6716, "step": 1854 }, { "epoch": 0.09951716738197425, "grad_norm": 0.57421875, "learning_rate": 3.317238912732475e-06, "loss": 2.5271, "step": 1855 }, { "epoch": 0.09957081545064378, "grad_norm": 0.3359375, "learning_rate": 3.3190271816881263e-06, "loss": 2.2757, "step": 1856 }, { "epoch": 0.09962446351931331, "grad_norm": 0.29296875, "learning_rate": 3.320815450643777e-06, "loss": 2.4664, "step": 1857 }, { "epoch": 0.09967811158798283, "grad_norm": 0.423828125, "learning_rate": 3.322603719599428e-06, "loss": 2.2297, "step": 1858 }, { "epoch": 0.09973175965665236, "grad_norm": 0.337890625, "learning_rate": 3.324391988555079e-06, "loss": 2.2805, "step": 1859 }, { "epoch": 0.09978540772532189, "grad_norm": 0.396484375, "learning_rate": 3.32618025751073e-06, "loss": 2.3565, "step": 1860 }, { "epoch": 0.09983905579399141, "grad_norm": 0.2734375, "learning_rate": 3.3279685264663807e-06, "loss": 2.5298, "step": 1861 }, { "epoch": 0.09989270386266094, "grad_norm": 0.38671875, "learning_rate": 3.3297567954220316e-06, "loss": 2.1956, "step": 1862 }, { "epoch": 0.09994635193133047, "grad_norm": 2.515625, "learning_rate": 3.3315450643776824e-06, "loss": 2.5034, "step": 1863 }, { "epoch": 0.1, "grad_norm": 0.36328125, "learning_rate": 3.3333333333333333e-06, "loss": 2.4421, "step": 1864 }, { "epoch": 0.10005364806866952, "grad_norm": 0.28515625, "learning_rate": 3.335121602288984e-06, "loss": 2.2629, "step": 1865 }, { "epoch": 0.10010729613733906, "grad_norm": 0.310546875, "learning_rate": 3.336909871244636e-06, "loss": 2.3191, "step": 1866 }, { "epoch": 0.10016094420600859, "grad_norm": 0.32421875, "learning_rate": 3.338698140200287e-06, "loss": 2.3113, "step": 1867 }, { "epoch": 0.1002145922746781, "grad_norm": 0.302734375, "learning_rate": 3.3404864091559377e-06, "loss": 2.5534, "step": 1868 }, { "epoch": 0.10026824034334764, "grad_norm": 0.275390625, "learning_rate": 3.3422746781115885e-06, "loss": 2.3333, "step": 1869 }, { "epoch": 0.10032188841201717, "grad_norm": 0.41015625, "learning_rate": 3.3440629470672394e-06, "loss": 2.4726, "step": 1870 }, { "epoch": 0.1003755364806867, "grad_norm": 0.30078125, "learning_rate": 3.3458512160228903e-06, "loss": 2.3039, "step": 1871 }, { "epoch": 0.10042918454935622, "grad_norm": 0.73046875, "learning_rate": 3.3476394849785408e-06, "loss": 2.2414, "step": 1872 }, { "epoch": 0.10048283261802575, "grad_norm": 0.37890625, "learning_rate": 3.3494277539341916e-06, "loss": 2.3642, "step": 1873 }, { "epoch": 0.10053648068669528, "grad_norm": 0.41015625, "learning_rate": 3.3512160228898425e-06, "loss": 2.4431, "step": 1874 }, { "epoch": 0.10059012875536481, "grad_norm": 0.40625, "learning_rate": 3.3530042918454934e-06, "loss": 2.1551, "step": 1875 }, { "epoch": 0.10064377682403433, "grad_norm": 0.28125, "learning_rate": 3.354792560801145e-06, "loss": 2.6725, "step": 1876 }, { "epoch": 0.10069742489270386, "grad_norm": 0.294921875, "learning_rate": 3.356580829756796e-06, "loss": 2.6802, "step": 1877 }, { "epoch": 0.1007510729613734, "grad_norm": 0.314453125, "learning_rate": 3.358369098712447e-06, "loss": 2.1911, "step": 1878 }, { "epoch": 0.10080472103004291, "grad_norm": 0.2294921875, "learning_rate": 3.3601573676680977e-06, "loss": 2.0293, "step": 1879 }, { "epoch": 0.10085836909871244, "grad_norm": 0.2578125, "learning_rate": 3.3619456366237486e-06, "loss": 2.2231, "step": 1880 }, { "epoch": 0.10091201716738198, "grad_norm": 0.283203125, "learning_rate": 3.3637339055793995e-06, "loss": 2.3435, "step": 1881 }, { "epoch": 0.10096566523605151, "grad_norm": 0.33203125, "learning_rate": 3.3655221745350504e-06, "loss": 2.1474, "step": 1882 }, { "epoch": 0.10101931330472103, "grad_norm": 0.32421875, "learning_rate": 3.3673104434907012e-06, "loss": 2.338, "step": 1883 }, { "epoch": 0.10107296137339056, "grad_norm": 0.2890625, "learning_rate": 3.369098712446352e-06, "loss": 2.1223, "step": 1884 }, { "epoch": 0.10112660944206009, "grad_norm": 0.296875, "learning_rate": 3.370886981402003e-06, "loss": 2.4205, "step": 1885 }, { "epoch": 0.10118025751072961, "grad_norm": 0.35546875, "learning_rate": 3.3726752503576543e-06, "loss": 2.1381, "step": 1886 }, { "epoch": 0.10123390557939914, "grad_norm": 0.380859375, "learning_rate": 3.374463519313305e-06, "loss": 2.2856, "step": 1887 }, { "epoch": 0.10128755364806867, "grad_norm": 0.47265625, "learning_rate": 3.376251788268956e-06, "loss": 2.4862, "step": 1888 }, { "epoch": 0.1013412017167382, "grad_norm": 0.314453125, "learning_rate": 3.378040057224607e-06, "loss": 2.3237, "step": 1889 }, { "epoch": 0.10139484978540772, "grad_norm": 0.38671875, "learning_rate": 3.379828326180258e-06, "loss": 2.6489, "step": 1890 }, { "epoch": 0.10144849785407725, "grad_norm": 0.353515625, "learning_rate": 3.3816165951359087e-06, "loss": 2.3974, "step": 1891 }, { "epoch": 0.10150214592274678, "grad_norm": 0.359375, "learning_rate": 3.3834048640915595e-06, "loss": 2.7624, "step": 1892 }, { "epoch": 0.10155579399141632, "grad_norm": 0.314453125, "learning_rate": 3.3851931330472104e-06, "loss": 2.5371, "step": 1893 }, { "epoch": 0.10160944206008583, "grad_norm": 0.46875, "learning_rate": 3.3869814020028613e-06, "loss": 1.8085, "step": 1894 }, { "epoch": 0.10166309012875537, "grad_norm": 0.330078125, "learning_rate": 3.388769670958512e-06, "loss": 2.23, "step": 1895 }, { "epoch": 0.1017167381974249, "grad_norm": 0.37890625, "learning_rate": 3.3905579399141635e-06, "loss": 1.7654, "step": 1896 }, { "epoch": 0.10177038626609441, "grad_norm": 0.322265625, "learning_rate": 3.3923462088698143e-06, "loss": 2.4983, "step": 1897 }, { "epoch": 0.10182403433476395, "grad_norm": 0.423828125, "learning_rate": 3.3941344778254652e-06, "loss": 2.3797, "step": 1898 }, { "epoch": 0.10187768240343348, "grad_norm": 0.275390625, "learning_rate": 3.395922746781116e-06, "loss": 2.548, "step": 1899 }, { "epoch": 0.10193133047210301, "grad_norm": 0.353515625, "learning_rate": 3.397711015736767e-06, "loss": 2.0337, "step": 1900 }, { "epoch": 0.10198497854077253, "grad_norm": 0.380859375, "learning_rate": 3.399499284692418e-06, "loss": 2.1763, "step": 1901 }, { "epoch": 0.10203862660944206, "grad_norm": 0.82421875, "learning_rate": 3.4012875536480687e-06, "loss": 2.5312, "step": 1902 }, { "epoch": 0.10209227467811159, "grad_norm": 0.31640625, "learning_rate": 3.4030758226037196e-06, "loss": 2.2981, "step": 1903 }, { "epoch": 0.10214592274678111, "grad_norm": 0.302734375, "learning_rate": 3.4048640915593705e-06, "loss": 1.9508, "step": 1904 }, { "epoch": 0.10219957081545064, "grad_norm": 0.375, "learning_rate": 3.4066523605150213e-06, "loss": 2.2551, "step": 1905 }, { "epoch": 0.10225321888412017, "grad_norm": 0.365234375, "learning_rate": 3.408440629470673e-06, "loss": 2.5305, "step": 1906 }, { "epoch": 0.1023068669527897, "grad_norm": 0.2890625, "learning_rate": 3.410228898426324e-06, "loss": 2.3295, "step": 1907 }, { "epoch": 0.10236051502145922, "grad_norm": 0.37890625, "learning_rate": 3.412017167381975e-06, "loss": 2.5087, "step": 1908 }, { "epoch": 0.10241416309012875, "grad_norm": 0.27734375, "learning_rate": 3.4138054363376257e-06, "loss": 1.9473, "step": 1909 }, { "epoch": 0.10246781115879829, "grad_norm": 0.310546875, "learning_rate": 3.4155937052932766e-06, "loss": 2.1492, "step": 1910 }, { "epoch": 0.10252145922746782, "grad_norm": 0.2734375, "learning_rate": 3.4173819742489275e-06, "loss": 2.5761, "step": 1911 }, { "epoch": 0.10257510729613734, "grad_norm": 0.29296875, "learning_rate": 3.4191702432045783e-06, "loss": 2.5124, "step": 1912 }, { "epoch": 0.10262875536480687, "grad_norm": 0.302734375, "learning_rate": 3.420958512160229e-06, "loss": 2.4523, "step": 1913 }, { "epoch": 0.1026824034334764, "grad_norm": 0.298828125, "learning_rate": 3.42274678111588e-06, "loss": 2.4263, "step": 1914 }, { "epoch": 0.10273605150214592, "grad_norm": 0.349609375, "learning_rate": 3.4245350500715314e-06, "loss": 2.1202, "step": 1915 }, { "epoch": 0.10278969957081545, "grad_norm": 0.3984375, "learning_rate": 3.4263233190271823e-06, "loss": 2.1661, "step": 1916 }, { "epoch": 0.10284334763948498, "grad_norm": 0.279296875, "learning_rate": 3.428111587982833e-06, "loss": 2.1987, "step": 1917 }, { "epoch": 0.10289699570815451, "grad_norm": 0.32421875, "learning_rate": 3.429899856938484e-06, "loss": 2.4675, "step": 1918 }, { "epoch": 0.10295064377682403, "grad_norm": 0.306640625, "learning_rate": 3.431688125894135e-06, "loss": 2.3728, "step": 1919 }, { "epoch": 0.10300429184549356, "grad_norm": 0.328125, "learning_rate": 3.4334763948497858e-06, "loss": 2.2861, "step": 1920 }, { "epoch": 0.1030579399141631, "grad_norm": 0.322265625, "learning_rate": 3.4352646638054366e-06, "loss": 2.1771, "step": 1921 }, { "epoch": 0.10311158798283261, "grad_norm": 0.296875, "learning_rate": 3.4370529327610875e-06, "loss": 2.4603, "step": 1922 }, { "epoch": 0.10316523605150214, "grad_norm": 0.36328125, "learning_rate": 3.4388412017167384e-06, "loss": 1.7366, "step": 1923 }, { "epoch": 0.10321888412017168, "grad_norm": 0.306640625, "learning_rate": 3.4406294706723893e-06, "loss": 2.2869, "step": 1924 }, { "epoch": 0.1032725321888412, "grad_norm": 0.33203125, "learning_rate": 3.4424177396280406e-06, "loss": 2.6591, "step": 1925 }, { "epoch": 0.10332618025751072, "grad_norm": 0.384765625, "learning_rate": 3.4442060085836914e-06, "loss": 2.212, "step": 1926 }, { "epoch": 0.10337982832618026, "grad_norm": 0.330078125, "learning_rate": 3.4459942775393423e-06, "loss": 2.4467, "step": 1927 }, { "epoch": 0.10343347639484979, "grad_norm": 0.490234375, "learning_rate": 3.447782546494993e-06, "loss": 2.4213, "step": 1928 }, { "epoch": 0.10348712446351932, "grad_norm": 0.369140625, "learning_rate": 3.449570815450644e-06, "loss": 2.4396, "step": 1929 }, { "epoch": 0.10354077253218884, "grad_norm": 0.37890625, "learning_rate": 3.451359084406295e-06, "loss": 2.2573, "step": 1930 }, { "epoch": 0.10359442060085837, "grad_norm": 0.2890625, "learning_rate": 3.453147353361946e-06, "loss": 2.3364, "step": 1931 }, { "epoch": 0.1036480686695279, "grad_norm": 0.357421875, "learning_rate": 3.4549356223175967e-06, "loss": 2.1312, "step": 1932 }, { "epoch": 0.10370171673819742, "grad_norm": 0.3125, "learning_rate": 3.4567238912732476e-06, "loss": 2.4162, "step": 1933 }, { "epoch": 0.10375536480686695, "grad_norm": 0.44921875, "learning_rate": 3.4585121602288984e-06, "loss": 2.5472, "step": 1934 }, { "epoch": 0.10380901287553648, "grad_norm": 0.291015625, "learning_rate": 3.4603004291845497e-06, "loss": 2.4274, "step": 1935 }, { "epoch": 0.10386266094420601, "grad_norm": 0.255859375, "learning_rate": 3.4620886981402006e-06, "loss": 2.2202, "step": 1936 }, { "epoch": 0.10391630901287553, "grad_norm": 0.28515625, "learning_rate": 3.4638769670958515e-06, "loss": 2.2816, "step": 1937 }, { "epoch": 0.10396995708154506, "grad_norm": 0.326171875, "learning_rate": 3.4656652360515024e-06, "loss": 2.227, "step": 1938 }, { "epoch": 0.1040236051502146, "grad_norm": 0.369140625, "learning_rate": 3.4674535050071532e-06, "loss": 2.4261, "step": 1939 }, { "epoch": 0.10407725321888411, "grad_norm": 0.328125, "learning_rate": 3.469241773962804e-06, "loss": 2.361, "step": 1940 }, { "epoch": 0.10413090128755365, "grad_norm": 0.69140625, "learning_rate": 3.471030042918455e-06, "loss": 2.4049, "step": 1941 }, { "epoch": 0.10418454935622318, "grad_norm": 0.30859375, "learning_rate": 3.472818311874106e-06, "loss": 2.3647, "step": 1942 }, { "epoch": 0.10423819742489271, "grad_norm": 0.31640625, "learning_rate": 3.4746065808297567e-06, "loss": 2.4357, "step": 1943 }, { "epoch": 0.10429184549356223, "grad_norm": 0.53515625, "learning_rate": 3.4763948497854076e-06, "loss": 2.227, "step": 1944 }, { "epoch": 0.10434549356223176, "grad_norm": 0.29296875, "learning_rate": 3.4781831187410593e-06, "loss": 2.3218, "step": 1945 }, { "epoch": 0.10439914163090129, "grad_norm": 1.234375, "learning_rate": 3.4799713876967102e-06, "loss": 2.3803, "step": 1946 }, { "epoch": 0.10445278969957082, "grad_norm": 0.412109375, "learning_rate": 3.481759656652361e-06, "loss": 2.5235, "step": 1947 }, { "epoch": 0.10450643776824034, "grad_norm": 0.271484375, "learning_rate": 3.483547925608012e-06, "loss": 2.0958, "step": 1948 }, { "epoch": 0.10456008583690987, "grad_norm": 0.33203125, "learning_rate": 3.485336194563663e-06, "loss": 2.2272, "step": 1949 }, { "epoch": 0.1046137339055794, "grad_norm": 0.318359375, "learning_rate": 3.4871244635193137e-06, "loss": 2.4878, "step": 1950 }, { "epoch": 0.10466738197424892, "grad_norm": 0.30078125, "learning_rate": 3.4889127324749646e-06, "loss": 2.4345, "step": 1951 }, { "epoch": 0.10472103004291845, "grad_norm": 0.392578125, "learning_rate": 3.4907010014306155e-06, "loss": 2.5248, "step": 1952 }, { "epoch": 0.10477467811158798, "grad_norm": 0.68359375, "learning_rate": 3.4924892703862664e-06, "loss": 2.4817, "step": 1953 }, { "epoch": 0.10482832618025752, "grad_norm": 0.38671875, "learning_rate": 3.4942775393419172e-06, "loss": 2.4703, "step": 1954 }, { "epoch": 0.10488197424892703, "grad_norm": 0.2578125, "learning_rate": 3.4960658082975685e-06, "loss": 2.1155, "step": 1955 }, { "epoch": 0.10493562231759657, "grad_norm": 0.26953125, "learning_rate": 3.4978540772532194e-06, "loss": 2.3372, "step": 1956 }, { "epoch": 0.1049892703862661, "grad_norm": 0.30078125, "learning_rate": 3.4996423462088703e-06, "loss": 2.4501, "step": 1957 }, { "epoch": 0.10504291845493562, "grad_norm": 0.283203125, "learning_rate": 3.501430615164521e-06, "loss": 2.4934, "step": 1958 }, { "epoch": 0.10509656652360515, "grad_norm": 0.353515625, "learning_rate": 3.503218884120172e-06, "loss": 2.0084, "step": 1959 }, { "epoch": 0.10515021459227468, "grad_norm": 0.349609375, "learning_rate": 3.505007153075823e-06, "loss": 2.1249, "step": 1960 }, { "epoch": 0.10520386266094421, "grad_norm": 0.369140625, "learning_rate": 3.5067954220314738e-06, "loss": 2.0833, "step": 1961 }, { "epoch": 0.10525751072961373, "grad_norm": 0.34375, "learning_rate": 3.5085836909871247e-06, "loss": 2.4178, "step": 1962 }, { "epoch": 0.10531115879828326, "grad_norm": 0.333984375, "learning_rate": 3.5103719599427755e-06, "loss": 2.4738, "step": 1963 }, { "epoch": 0.10536480686695279, "grad_norm": 0.291015625, "learning_rate": 3.5121602288984264e-06, "loss": 2.2389, "step": 1964 }, { "epoch": 0.10541845493562232, "grad_norm": 0.3359375, "learning_rate": 3.5139484978540777e-06, "loss": 2.3417, "step": 1965 }, { "epoch": 0.10547210300429184, "grad_norm": 0.306640625, "learning_rate": 3.5157367668097286e-06, "loss": 2.3153, "step": 1966 }, { "epoch": 0.10552575107296137, "grad_norm": 0.3125, "learning_rate": 3.5175250357653795e-06, "loss": 2.4313, "step": 1967 }, { "epoch": 0.1055793991416309, "grad_norm": 0.43359375, "learning_rate": 3.5193133047210303e-06, "loss": 2.4879, "step": 1968 }, { "epoch": 0.10563304721030042, "grad_norm": 0.328125, "learning_rate": 3.5211015736766812e-06, "loss": 2.4743, "step": 1969 }, { "epoch": 0.10568669527896996, "grad_norm": 0.3359375, "learning_rate": 3.522889842632332e-06, "loss": 2.3775, "step": 1970 }, { "epoch": 0.10574034334763949, "grad_norm": 0.263671875, "learning_rate": 3.524678111587983e-06, "loss": 2.143, "step": 1971 }, { "epoch": 0.10579399141630902, "grad_norm": 0.349609375, "learning_rate": 3.526466380543634e-06, "loss": 2.2427, "step": 1972 }, { "epoch": 0.10584763948497854, "grad_norm": 0.330078125, "learning_rate": 3.5282546494992847e-06, "loss": 2.4789, "step": 1973 }, { "epoch": 0.10590128755364807, "grad_norm": 0.2734375, "learning_rate": 3.530042918454936e-06, "loss": 2.4512, "step": 1974 }, { "epoch": 0.1059549356223176, "grad_norm": 0.318359375, "learning_rate": 3.531831187410587e-06, "loss": 2.3281, "step": 1975 }, { "epoch": 0.10600858369098712, "grad_norm": 0.337890625, "learning_rate": 3.5336194563662378e-06, "loss": 2.2938, "step": 1976 }, { "epoch": 0.10606223175965665, "grad_norm": 0.34765625, "learning_rate": 3.5354077253218886e-06, "loss": 2.2877, "step": 1977 }, { "epoch": 0.10611587982832618, "grad_norm": 0.3359375, "learning_rate": 3.5371959942775395e-06, "loss": 2.3506, "step": 1978 }, { "epoch": 0.10616952789699571, "grad_norm": 0.28515625, "learning_rate": 3.5389842632331904e-06, "loss": 2.3387, "step": 1979 }, { "epoch": 0.10622317596566523, "grad_norm": 0.3125, "learning_rate": 3.5407725321888413e-06, "loss": 2.3663, "step": 1980 }, { "epoch": 0.10627682403433476, "grad_norm": 0.345703125, "learning_rate": 3.542560801144492e-06, "loss": 2.4255, "step": 1981 }, { "epoch": 0.1063304721030043, "grad_norm": 0.44140625, "learning_rate": 3.544349070100143e-06, "loss": 2.3015, "step": 1982 }, { "epoch": 0.10638412017167383, "grad_norm": 0.28125, "learning_rate": 3.546137339055794e-06, "loss": 2.4187, "step": 1983 }, { "epoch": 0.10643776824034334, "grad_norm": 0.2578125, "learning_rate": 3.5479256080114456e-06, "loss": 2.2769, "step": 1984 }, { "epoch": 0.10649141630901288, "grad_norm": 0.314453125, "learning_rate": 3.5497138769670965e-06, "loss": 2.2324, "step": 1985 }, { "epoch": 0.10654506437768241, "grad_norm": 0.490234375, "learning_rate": 3.5515021459227474e-06, "loss": 1.9225, "step": 1986 }, { "epoch": 0.10659871244635193, "grad_norm": 0.259765625, "learning_rate": 3.5532904148783982e-06, "loss": 2.0301, "step": 1987 }, { "epoch": 0.10665236051502146, "grad_norm": 0.275390625, "learning_rate": 3.555078683834049e-06, "loss": 2.3181, "step": 1988 }, { "epoch": 0.10670600858369099, "grad_norm": 0.333984375, "learning_rate": 3.5568669527897e-06, "loss": 2.3787, "step": 1989 }, { "epoch": 0.10675965665236052, "grad_norm": 0.359375, "learning_rate": 3.558655221745351e-06, "loss": 2.4299, "step": 1990 }, { "epoch": 0.10681330472103004, "grad_norm": 0.296875, "learning_rate": 3.5604434907010018e-06, "loss": 2.2037, "step": 1991 }, { "epoch": 0.10686695278969957, "grad_norm": 0.318359375, "learning_rate": 3.5622317596566526e-06, "loss": 2.3649, "step": 1992 }, { "epoch": 0.1069206008583691, "grad_norm": 0.30859375, "learning_rate": 3.5640200286123035e-06, "loss": 2.1486, "step": 1993 }, { "epoch": 0.10697424892703862, "grad_norm": 0.296875, "learning_rate": 3.565808297567955e-06, "loss": 2.4181, "step": 1994 }, { "epoch": 0.10702789699570815, "grad_norm": 0.26953125, "learning_rate": 3.5675965665236057e-06, "loss": 2.418, "step": 1995 }, { "epoch": 0.10708154506437768, "grad_norm": 1.1875, "learning_rate": 3.5693848354792566e-06, "loss": 2.1563, "step": 1996 }, { "epoch": 0.10713519313304722, "grad_norm": 0.3046875, "learning_rate": 3.5711731044349074e-06, "loss": 2.2594, "step": 1997 }, { "epoch": 0.10718884120171673, "grad_norm": 0.482421875, "learning_rate": 3.5729613733905583e-06, "loss": 2.3455, "step": 1998 }, { "epoch": 0.10724248927038627, "grad_norm": 0.28515625, "learning_rate": 3.574749642346209e-06, "loss": 2.22, "step": 1999 }, { "epoch": 0.1072961373390558, "grad_norm": 0.32421875, "learning_rate": 3.57653791130186e-06, "loss": 2.5418, "step": 2000 }, { "epoch": 0.10734978540772531, "grad_norm": 0.2734375, "learning_rate": 3.578326180257511e-06, "loss": 2.3393, "step": 2001 }, { "epoch": 0.10740343347639485, "grad_norm": 0.296875, "learning_rate": 3.580114449213162e-06, "loss": 2.1836, "step": 2002 }, { "epoch": 0.10745708154506438, "grad_norm": 0.515625, "learning_rate": 3.5819027181688127e-06, "loss": 2.3163, "step": 2003 }, { "epoch": 0.10751072961373391, "grad_norm": 0.33984375, "learning_rate": 3.583690987124464e-06, "loss": 2.2571, "step": 2004 }, { "epoch": 0.10756437768240343, "grad_norm": 0.384765625, "learning_rate": 3.585479256080115e-06, "loss": 2.1186, "step": 2005 }, { "epoch": 0.10761802575107296, "grad_norm": 0.345703125, "learning_rate": 3.5872675250357657e-06, "loss": 2.5612, "step": 2006 }, { "epoch": 0.10767167381974249, "grad_norm": 0.36328125, "learning_rate": 3.5890557939914166e-06, "loss": 2.2638, "step": 2007 }, { "epoch": 0.10772532188841202, "grad_norm": 0.44140625, "learning_rate": 3.5908440629470675e-06, "loss": 2.5159, "step": 2008 }, { "epoch": 0.10777896995708154, "grad_norm": 0.296875, "learning_rate": 3.5926323319027184e-06, "loss": 2.4112, "step": 2009 }, { "epoch": 0.10783261802575107, "grad_norm": 0.52734375, "learning_rate": 3.5944206008583692e-06, "loss": 2.5405, "step": 2010 }, { "epoch": 0.1078862660944206, "grad_norm": 0.296875, "learning_rate": 3.59620886981402e-06, "loss": 2.4297, "step": 2011 }, { "epoch": 0.10793991416309012, "grad_norm": 0.2734375, "learning_rate": 3.597997138769671e-06, "loss": 2.2347, "step": 2012 }, { "epoch": 0.10799356223175965, "grad_norm": 0.326171875, "learning_rate": 3.599785407725322e-06, "loss": 2.4735, "step": 2013 }, { "epoch": 0.10804721030042919, "grad_norm": 0.33984375, "learning_rate": 3.601573676680973e-06, "loss": 2.5507, "step": 2014 }, { "epoch": 0.10810085836909872, "grad_norm": 0.28515625, "learning_rate": 3.603361945636624e-06, "loss": 2.2149, "step": 2015 }, { "epoch": 0.10815450643776824, "grad_norm": 1.203125, "learning_rate": 3.605150214592275e-06, "loss": 2.59, "step": 2016 }, { "epoch": 0.10820815450643777, "grad_norm": 0.25, "learning_rate": 3.606938483547926e-06, "loss": 2.2556, "step": 2017 }, { "epoch": 0.1082618025751073, "grad_norm": 0.390625, "learning_rate": 3.6087267525035767e-06, "loss": 2.4696, "step": 2018 }, { "epoch": 0.10831545064377682, "grad_norm": 0.453125, "learning_rate": 3.6105150214592275e-06, "loss": 2.2095, "step": 2019 }, { "epoch": 0.10836909871244635, "grad_norm": 0.357421875, "learning_rate": 3.6123032904148784e-06, "loss": 2.3194, "step": 2020 }, { "epoch": 0.10842274678111588, "grad_norm": 0.326171875, "learning_rate": 3.6140915593705293e-06, "loss": 2.4833, "step": 2021 }, { "epoch": 0.10847639484978541, "grad_norm": 0.384765625, "learning_rate": 3.61587982832618e-06, "loss": 2.4363, "step": 2022 }, { "epoch": 0.10853004291845493, "grad_norm": 0.283203125, "learning_rate": 3.617668097281831e-06, "loss": 2.2152, "step": 2023 }, { "epoch": 0.10858369098712446, "grad_norm": 0.255859375, "learning_rate": 3.6194563662374828e-06, "loss": 2.0057, "step": 2024 }, { "epoch": 0.108637339055794, "grad_norm": 0.283203125, "learning_rate": 3.6212446351931336e-06, "loss": 2.3656, "step": 2025 }, { "epoch": 0.10869098712446353, "grad_norm": 0.30859375, "learning_rate": 3.6230329041487845e-06, "loss": 2.5074, "step": 2026 }, { "epoch": 0.10874463519313304, "grad_norm": 0.52734375, "learning_rate": 3.6248211731044354e-06, "loss": 2.4202, "step": 2027 }, { "epoch": 0.10879828326180258, "grad_norm": 0.271484375, "learning_rate": 3.6266094420600863e-06, "loss": 2.3536, "step": 2028 }, { "epoch": 0.10885193133047211, "grad_norm": 0.283203125, "learning_rate": 3.628397711015737e-06, "loss": 2.4317, "step": 2029 }, { "epoch": 0.10890557939914162, "grad_norm": 0.396484375, "learning_rate": 3.630185979971388e-06, "loss": 2.4109, "step": 2030 }, { "epoch": 0.10895922746781116, "grad_norm": 0.376953125, "learning_rate": 3.631974248927039e-06, "loss": 2.1464, "step": 2031 }, { "epoch": 0.10901287553648069, "grad_norm": 0.474609375, "learning_rate": 3.6337625178826898e-06, "loss": 2.3748, "step": 2032 }, { "epoch": 0.10906652360515022, "grad_norm": 0.37109375, "learning_rate": 3.6355507868383407e-06, "loss": 2.3743, "step": 2033 }, { "epoch": 0.10912017167381974, "grad_norm": 0.365234375, "learning_rate": 3.637339055793992e-06, "loss": 2.1516, "step": 2034 }, { "epoch": 0.10917381974248927, "grad_norm": 0.294921875, "learning_rate": 3.639127324749643e-06, "loss": 2.4735, "step": 2035 }, { "epoch": 0.1092274678111588, "grad_norm": 0.2578125, "learning_rate": 3.6409155937052937e-06, "loss": 2.1733, "step": 2036 }, { "epoch": 0.10928111587982832, "grad_norm": 0.2294921875, "learning_rate": 3.6427038626609446e-06, "loss": 2.1797, "step": 2037 }, { "epoch": 0.10933476394849785, "grad_norm": 0.28125, "learning_rate": 3.6444921316165955e-06, "loss": 2.442, "step": 2038 }, { "epoch": 0.10938841201716738, "grad_norm": 0.470703125, "learning_rate": 3.6462804005722463e-06, "loss": 1.5494, "step": 2039 }, { "epoch": 0.10944206008583691, "grad_norm": 0.48828125, "learning_rate": 3.648068669527897e-06, "loss": 2.2333, "step": 2040 }, { "epoch": 0.10949570815450643, "grad_norm": 0.267578125, "learning_rate": 3.649856938483548e-06, "loss": 2.2534, "step": 2041 }, { "epoch": 0.10954935622317596, "grad_norm": 0.4375, "learning_rate": 3.651645207439199e-06, "loss": 2.3841, "step": 2042 }, { "epoch": 0.1096030042918455, "grad_norm": 0.35546875, "learning_rate": 3.6534334763948503e-06, "loss": 2.3175, "step": 2043 }, { "epoch": 0.10965665236051503, "grad_norm": 0.32421875, "learning_rate": 3.655221745350501e-06, "loss": 2.5703, "step": 2044 }, { "epoch": 0.10971030042918455, "grad_norm": 0.361328125, "learning_rate": 3.657010014306152e-06, "loss": 2.3797, "step": 2045 }, { "epoch": 0.10976394849785408, "grad_norm": 0.263671875, "learning_rate": 3.658798283261803e-06, "loss": 2.0358, "step": 2046 }, { "epoch": 0.10981759656652361, "grad_norm": 0.287109375, "learning_rate": 3.6605865522174538e-06, "loss": 2.2185, "step": 2047 }, { "epoch": 0.10987124463519313, "grad_norm": 0.33984375, "learning_rate": 3.6623748211731046e-06, "loss": 2.0697, "step": 2048 }, { "epoch": 0.10992489270386266, "grad_norm": 0.318359375, "learning_rate": 3.6641630901287555e-06, "loss": 2.4192, "step": 2049 }, { "epoch": 0.10997854077253219, "grad_norm": 0.384765625, "learning_rate": 3.6659513590844064e-06, "loss": 2.2785, "step": 2050 }, { "epoch": 0.11003218884120172, "grad_norm": 0.279296875, "learning_rate": 3.6677396280400573e-06, "loss": 2.2398, "step": 2051 }, { "epoch": 0.11008583690987124, "grad_norm": 0.3203125, "learning_rate": 3.669527896995708e-06, "loss": 2.3269, "step": 2052 }, { "epoch": 0.11013948497854077, "grad_norm": 0.26953125, "learning_rate": 3.6713161659513594e-06, "loss": 2.3572, "step": 2053 }, { "epoch": 0.1101931330472103, "grad_norm": 0.46484375, "learning_rate": 3.6731044349070103e-06, "loss": 2.5661, "step": 2054 }, { "epoch": 0.11024678111587982, "grad_norm": 0.408203125, "learning_rate": 3.674892703862661e-06, "loss": 2.4468, "step": 2055 }, { "epoch": 0.11030042918454935, "grad_norm": 0.314453125, "learning_rate": 3.676680972818312e-06, "loss": 2.1503, "step": 2056 }, { "epoch": 0.11035407725321889, "grad_norm": 0.384765625, "learning_rate": 3.678469241773963e-06, "loss": 2.3768, "step": 2057 }, { "epoch": 0.11040772532188842, "grad_norm": 0.357421875, "learning_rate": 3.680257510729614e-06, "loss": 2.3515, "step": 2058 }, { "epoch": 0.11046137339055793, "grad_norm": 0.328125, "learning_rate": 3.6820457796852647e-06, "loss": 2.4019, "step": 2059 }, { "epoch": 0.11051502145922747, "grad_norm": 0.341796875, "learning_rate": 3.6838340486409156e-06, "loss": 2.1538, "step": 2060 }, { "epoch": 0.110568669527897, "grad_norm": 0.369140625, "learning_rate": 3.6856223175965664e-06, "loss": 2.6055, "step": 2061 }, { "epoch": 0.11062231759656653, "grad_norm": 0.26171875, "learning_rate": 3.6874105865522173e-06, "loss": 2.5345, "step": 2062 }, { "epoch": 0.11067596566523605, "grad_norm": 0.361328125, "learning_rate": 3.689198855507869e-06, "loss": 2.3935, "step": 2063 }, { "epoch": 0.11072961373390558, "grad_norm": 0.271484375, "learning_rate": 3.69098712446352e-06, "loss": 2.4324, "step": 2064 }, { "epoch": 0.11078326180257511, "grad_norm": 0.310546875, "learning_rate": 3.692775393419171e-06, "loss": 2.3122, "step": 2065 }, { "epoch": 0.11083690987124463, "grad_norm": 0.294921875, "learning_rate": 3.6945636623748217e-06, "loss": 2.3617, "step": 2066 }, { "epoch": 0.11089055793991416, "grad_norm": 0.337890625, "learning_rate": 3.6963519313304725e-06, "loss": 2.4604, "step": 2067 }, { "epoch": 0.11094420600858369, "grad_norm": 0.37109375, "learning_rate": 3.6981402002861234e-06, "loss": 2.4305, "step": 2068 }, { "epoch": 0.11099785407725322, "grad_norm": 0.5234375, "learning_rate": 3.6999284692417743e-06, "loss": 2.3713, "step": 2069 }, { "epoch": 0.11105150214592274, "grad_norm": 0.29296875, "learning_rate": 3.701716738197425e-06, "loss": 2.0554, "step": 2070 }, { "epoch": 0.11110515021459227, "grad_norm": 0.306640625, "learning_rate": 3.703505007153076e-06, "loss": 2.5994, "step": 2071 }, { "epoch": 0.1111587982832618, "grad_norm": 0.32421875, "learning_rate": 3.705293276108727e-06, "loss": 2.4616, "step": 2072 }, { "epoch": 0.11121244635193132, "grad_norm": 0.330078125, "learning_rate": 3.7070815450643782e-06, "loss": 2.2356, "step": 2073 }, { "epoch": 0.11126609442060086, "grad_norm": 0.50390625, "learning_rate": 3.708869814020029e-06, "loss": 1.6502, "step": 2074 }, { "epoch": 0.11131974248927039, "grad_norm": 0.384765625, "learning_rate": 3.71065808297568e-06, "loss": 1.4668, "step": 2075 }, { "epoch": 0.11137339055793992, "grad_norm": 0.251953125, "learning_rate": 3.712446351931331e-06, "loss": 2.4413, "step": 2076 }, { "epoch": 0.11142703862660944, "grad_norm": 0.26953125, "learning_rate": 3.7142346208869817e-06, "loss": 2.3826, "step": 2077 }, { "epoch": 0.11148068669527897, "grad_norm": 0.55078125, "learning_rate": 3.7160228898426326e-06, "loss": 2.3511, "step": 2078 }, { "epoch": 0.1115343347639485, "grad_norm": 0.306640625, "learning_rate": 3.7178111587982835e-06, "loss": 2.2094, "step": 2079 }, { "epoch": 0.11158798283261803, "grad_norm": 0.318359375, "learning_rate": 3.7195994277539344e-06, "loss": 2.2363, "step": 2080 }, { "epoch": 0.11164163090128755, "grad_norm": 0.49609375, "learning_rate": 3.7213876967095852e-06, "loss": 2.3397, "step": 2081 }, { "epoch": 0.11169527896995708, "grad_norm": 0.337890625, "learning_rate": 3.723175965665236e-06, "loss": 2.4028, "step": 2082 }, { "epoch": 0.11174892703862661, "grad_norm": 0.294921875, "learning_rate": 3.7249642346208874e-06, "loss": 2.4442, "step": 2083 }, { "epoch": 0.11180257510729613, "grad_norm": 0.94140625, "learning_rate": 3.7267525035765383e-06, "loss": 2.3734, "step": 2084 }, { "epoch": 0.11185622317596566, "grad_norm": 0.25390625, "learning_rate": 3.728540772532189e-06, "loss": 2.0349, "step": 2085 }, { "epoch": 0.1119098712446352, "grad_norm": 0.3203125, "learning_rate": 3.73032904148784e-06, "loss": 2.12, "step": 2086 }, { "epoch": 0.11196351931330473, "grad_norm": 0.30859375, "learning_rate": 3.732117310443491e-06, "loss": 2.4551, "step": 2087 }, { "epoch": 0.11201716738197424, "grad_norm": 0.427734375, "learning_rate": 3.733905579399142e-06, "loss": 2.2976, "step": 2088 }, { "epoch": 0.11207081545064378, "grad_norm": 0.314453125, "learning_rate": 3.7356938483547927e-06, "loss": 2.5275, "step": 2089 }, { "epoch": 0.11212446351931331, "grad_norm": 1.734375, "learning_rate": 3.7374821173104435e-06, "loss": 2.2941, "step": 2090 }, { "epoch": 0.11217811158798283, "grad_norm": 0.337890625, "learning_rate": 3.7392703862660944e-06, "loss": 2.1755, "step": 2091 }, { "epoch": 0.11223175965665236, "grad_norm": 0.353515625, "learning_rate": 3.7410586552217453e-06, "loss": 2.4996, "step": 2092 }, { "epoch": 0.11228540772532189, "grad_norm": 0.310546875, "learning_rate": 3.742846924177397e-06, "loss": 2.2313, "step": 2093 }, { "epoch": 0.11233905579399142, "grad_norm": 1.921875, "learning_rate": 3.744635193133048e-06, "loss": 2.3327, "step": 2094 }, { "epoch": 0.11239270386266094, "grad_norm": 0.48828125, "learning_rate": 3.7464234620886988e-06, "loss": 2.0455, "step": 2095 }, { "epoch": 0.11244635193133047, "grad_norm": 0.30078125, "learning_rate": 3.7482117310443496e-06, "loss": 2.3301, "step": 2096 }, { "epoch": 0.1125, "grad_norm": 0.376953125, "learning_rate": 3.7500000000000005e-06, "loss": 2.2528, "step": 2097 }, { "epoch": 0.11255364806866953, "grad_norm": 0.451171875, "learning_rate": 3.751788268955651e-06, "loss": 2.5271, "step": 2098 }, { "epoch": 0.11260729613733905, "grad_norm": 0.62890625, "learning_rate": 3.753576537911302e-06, "loss": 2.1511, "step": 2099 }, { "epoch": 0.11266094420600858, "grad_norm": 0.3203125, "learning_rate": 3.7553648068669527e-06, "loss": 2.2892, "step": 2100 }, { "epoch": 0.11271459227467812, "grad_norm": 0.271484375, "learning_rate": 3.7571530758226036e-06, "loss": 2.1526, "step": 2101 }, { "epoch": 0.11276824034334763, "grad_norm": 0.333984375, "learning_rate": 3.7589413447782553e-06, "loss": 1.9284, "step": 2102 }, { "epoch": 0.11282188841201717, "grad_norm": 0.33203125, "learning_rate": 3.760729613733906e-06, "loss": 2.2432, "step": 2103 }, { "epoch": 0.1128755364806867, "grad_norm": 0.625, "learning_rate": 3.762517882689557e-06, "loss": 2.5191, "step": 2104 }, { "epoch": 0.11292918454935623, "grad_norm": 0.375, "learning_rate": 3.764306151645208e-06, "loss": 2.4555, "step": 2105 }, { "epoch": 0.11298283261802575, "grad_norm": 0.265625, "learning_rate": 3.766094420600859e-06, "loss": 2.3928, "step": 2106 }, { "epoch": 0.11303648068669528, "grad_norm": 0.58203125, "learning_rate": 3.7678826895565097e-06, "loss": 2.5085, "step": 2107 }, { "epoch": 0.11309012875536481, "grad_norm": 0.279296875, "learning_rate": 3.7696709585121606e-06, "loss": 2.4378, "step": 2108 }, { "epoch": 0.11314377682403433, "grad_norm": 0.287109375, "learning_rate": 3.7714592274678115e-06, "loss": 2.2744, "step": 2109 }, { "epoch": 0.11319742489270386, "grad_norm": 0.333984375, "learning_rate": 3.7732474964234623e-06, "loss": 2.3983, "step": 2110 }, { "epoch": 0.11325107296137339, "grad_norm": 0.255859375, "learning_rate": 3.775035765379113e-06, "loss": 2.2542, "step": 2111 }, { "epoch": 0.11330472103004292, "grad_norm": 0.287109375, "learning_rate": 3.7768240343347645e-06, "loss": 2.2811, "step": 2112 }, { "epoch": 0.11335836909871244, "grad_norm": 0.4140625, "learning_rate": 3.7786123032904154e-06, "loss": 2.5827, "step": 2113 }, { "epoch": 0.11341201716738197, "grad_norm": 0.5078125, "learning_rate": 3.7804005722460663e-06, "loss": 2.3535, "step": 2114 }, { "epoch": 0.1134656652360515, "grad_norm": 0.36328125, "learning_rate": 3.782188841201717e-06, "loss": 2.4892, "step": 2115 }, { "epoch": 0.11351931330472104, "grad_norm": 0.326171875, "learning_rate": 3.783977110157368e-06, "loss": 2.5538, "step": 2116 }, { "epoch": 0.11357296137339055, "grad_norm": 0.361328125, "learning_rate": 3.785765379113019e-06, "loss": 2.2038, "step": 2117 }, { "epoch": 0.11362660944206009, "grad_norm": 0.38671875, "learning_rate": 3.7875536480686698e-06, "loss": 2.3133, "step": 2118 }, { "epoch": 0.11368025751072962, "grad_norm": 0.35546875, "learning_rate": 3.7893419170243206e-06, "loss": 2.723, "step": 2119 }, { "epoch": 0.11373390557939914, "grad_norm": 0.373046875, "learning_rate": 3.7911301859799715e-06, "loss": 2.372, "step": 2120 }, { "epoch": 0.11378755364806867, "grad_norm": 0.388671875, "learning_rate": 3.7929184549356224e-06, "loss": 2.3645, "step": 2121 }, { "epoch": 0.1138412017167382, "grad_norm": 0.287109375, "learning_rate": 3.7947067238912737e-06, "loss": 2.3282, "step": 2122 }, { "epoch": 0.11389484978540773, "grad_norm": 0.283203125, "learning_rate": 3.7964949928469246e-06, "loss": 2.2937, "step": 2123 }, { "epoch": 0.11394849785407725, "grad_norm": 0.625, "learning_rate": 3.7982832618025754e-06, "loss": 1.9435, "step": 2124 }, { "epoch": 0.11400214592274678, "grad_norm": 0.271484375, "learning_rate": 3.8000715307582263e-06, "loss": 2.0551, "step": 2125 }, { "epoch": 0.11405579399141631, "grad_norm": 0.2734375, "learning_rate": 3.801859799713877e-06, "loss": 2.2766, "step": 2126 }, { "epoch": 0.11410944206008583, "grad_norm": 0.271484375, "learning_rate": 3.803648068669528e-06, "loss": 2.4182, "step": 2127 }, { "epoch": 0.11416309012875536, "grad_norm": 0.29296875, "learning_rate": 3.805436337625179e-06, "loss": 2.2064, "step": 2128 }, { "epoch": 0.1142167381974249, "grad_norm": 0.310546875, "learning_rate": 3.80722460658083e-06, "loss": 2.2537, "step": 2129 }, { "epoch": 0.11427038626609443, "grad_norm": 0.359375, "learning_rate": 3.8090128755364807e-06, "loss": 2.3787, "step": 2130 }, { "epoch": 0.11432403433476394, "grad_norm": 0.25390625, "learning_rate": 3.8108011444921316e-06, "loss": 2.4839, "step": 2131 }, { "epoch": 0.11437768240343348, "grad_norm": 0.314453125, "learning_rate": 3.8125894134477833e-06, "loss": 2.3926, "step": 2132 }, { "epoch": 0.11443133047210301, "grad_norm": 0.28125, "learning_rate": 3.814377682403434e-06, "loss": 2.3715, "step": 2133 }, { "epoch": 0.11448497854077254, "grad_norm": 0.4921875, "learning_rate": 3.816165951359085e-06, "loss": 2.4406, "step": 2134 }, { "epoch": 0.11453862660944206, "grad_norm": 0.78125, "learning_rate": 3.8179542203147355e-06, "loss": 2.519, "step": 2135 }, { "epoch": 0.11459227467811159, "grad_norm": 0.423828125, "learning_rate": 3.819742489270387e-06, "loss": 2.3861, "step": 2136 }, { "epoch": 0.11464592274678112, "grad_norm": 0.5, "learning_rate": 3.821530758226037e-06, "loss": 2.3669, "step": 2137 }, { "epoch": 0.11469957081545064, "grad_norm": 0.34375, "learning_rate": 3.8233190271816885e-06, "loss": 2.51, "step": 2138 }, { "epoch": 0.11475321888412017, "grad_norm": 0.55859375, "learning_rate": 3.825107296137339e-06, "loss": 2.1557, "step": 2139 }, { "epoch": 0.1148068669527897, "grad_norm": 0.271484375, "learning_rate": 3.82689556509299e-06, "loss": 1.9156, "step": 2140 }, { "epoch": 0.11486051502145923, "grad_norm": 0.291015625, "learning_rate": 3.828683834048641e-06, "loss": 2.2557, "step": 2141 }, { "epoch": 0.11491416309012875, "grad_norm": 0.390625, "learning_rate": 3.830472103004292e-06, "loss": 2.4983, "step": 2142 }, { "epoch": 0.11496781115879828, "grad_norm": 0.34765625, "learning_rate": 3.832260371959943e-06, "loss": 2.3592, "step": 2143 }, { "epoch": 0.11502145922746781, "grad_norm": 0.326171875, "learning_rate": 3.834048640915594e-06, "loss": 2.425, "step": 2144 }, { "epoch": 0.11507510729613733, "grad_norm": 0.400390625, "learning_rate": 3.835836909871245e-06, "loss": 2.3276, "step": 2145 }, { "epoch": 0.11512875536480686, "grad_norm": 0.302734375, "learning_rate": 3.8376251788268956e-06, "loss": 2.3912, "step": 2146 }, { "epoch": 0.1151824034334764, "grad_norm": 0.34375, "learning_rate": 3.839413447782547e-06, "loss": 2.6002, "step": 2147 }, { "epoch": 0.11523605150214593, "grad_norm": 0.302734375, "learning_rate": 3.841201716738197e-06, "loss": 2.1401, "step": 2148 }, { "epoch": 0.11528969957081545, "grad_norm": 0.3359375, "learning_rate": 3.842989985693849e-06, "loss": 1.7984, "step": 2149 }, { "epoch": 0.11534334763948498, "grad_norm": 0.24609375, "learning_rate": 3.844778254649499e-06, "loss": 2.2028, "step": 2150 }, { "epoch": 0.11539699570815451, "grad_norm": 0.2890625, "learning_rate": 3.84656652360515e-06, "loss": 2.25, "step": 2151 }, { "epoch": 0.11545064377682404, "grad_norm": 0.36328125, "learning_rate": 3.848354792560802e-06, "loss": 2.2792, "step": 2152 }, { "epoch": 0.11550429184549356, "grad_norm": 0.3359375, "learning_rate": 3.850143061516453e-06, "loss": 2.5209, "step": 2153 }, { "epoch": 0.11555793991416309, "grad_norm": 0.296875, "learning_rate": 3.851931330472103e-06, "loss": 2.3123, "step": 2154 }, { "epoch": 0.11561158798283262, "grad_norm": 0.271484375, "learning_rate": 3.853719599427755e-06, "loss": 2.4092, "step": 2155 }, { "epoch": 0.11566523605150214, "grad_norm": 0.375, "learning_rate": 3.855507868383405e-06, "loss": 2.4056, "step": 2156 }, { "epoch": 0.11571888412017167, "grad_norm": 0.275390625, "learning_rate": 3.8572961373390565e-06, "loss": 2.4188, "step": 2157 }, { "epoch": 0.1157725321888412, "grad_norm": 0.310546875, "learning_rate": 3.859084406294707e-06, "loss": 2.4943, "step": 2158 }, { "epoch": 0.11582618025751074, "grad_norm": 0.291015625, "learning_rate": 3.860872675250358e-06, "loss": 2.2302, "step": 2159 }, { "epoch": 0.11587982832618025, "grad_norm": 0.25, "learning_rate": 3.862660944206009e-06, "loss": 1.9849, "step": 2160 }, { "epoch": 0.11593347639484979, "grad_norm": 0.32421875, "learning_rate": 3.86444921316166e-06, "loss": 2.3456, "step": 2161 }, { "epoch": 0.11598712446351932, "grad_norm": 0.318359375, "learning_rate": 3.866237482117311e-06, "loss": 2.3134, "step": 2162 }, { "epoch": 0.11604077253218884, "grad_norm": 0.30859375, "learning_rate": 3.868025751072962e-06, "loss": 2.2202, "step": 2163 }, { "epoch": 0.11609442060085837, "grad_norm": 0.294921875, "learning_rate": 3.869814020028613e-06, "loss": 2.4422, "step": 2164 }, { "epoch": 0.1161480686695279, "grad_norm": 0.4375, "learning_rate": 3.8716022889842635e-06, "loss": 2.3571, "step": 2165 }, { "epoch": 0.11620171673819743, "grad_norm": 0.306640625, "learning_rate": 3.873390557939915e-06, "loss": 2.5081, "step": 2166 }, { "epoch": 0.11625536480686695, "grad_norm": 0.3203125, "learning_rate": 3.875178826895565e-06, "loss": 2.4022, "step": 2167 }, { "epoch": 0.11630901287553648, "grad_norm": 0.25, "learning_rate": 3.8769670958512165e-06, "loss": 2.2048, "step": 2168 }, { "epoch": 0.11636266094420601, "grad_norm": 0.318359375, "learning_rate": 3.878755364806867e-06, "loss": 2.449, "step": 2169 }, { "epoch": 0.11641630901287553, "grad_norm": 0.4375, "learning_rate": 3.880543633762518e-06, "loss": 2.4724, "step": 2170 }, { "epoch": 0.11646995708154506, "grad_norm": 1.046875, "learning_rate": 3.8823319027181696e-06, "loss": 2.3919, "step": 2171 }, { "epoch": 0.1165236051502146, "grad_norm": 0.330078125, "learning_rate": 3.88412017167382e-06, "loss": 2.1569, "step": 2172 }, { "epoch": 0.11657725321888412, "grad_norm": 0.326171875, "learning_rate": 3.885908440629471e-06, "loss": 2.0496, "step": 2173 }, { "epoch": 0.11663090128755364, "grad_norm": 0.3671875, "learning_rate": 3.887696709585122e-06, "loss": 2.6973, "step": 2174 }, { "epoch": 0.11668454935622317, "grad_norm": 0.3203125, "learning_rate": 3.889484978540773e-06, "loss": 1.9998, "step": 2175 }, { "epoch": 0.1167381974248927, "grad_norm": 0.267578125, "learning_rate": 3.8912732474964235e-06, "loss": 2.3962, "step": 2176 }, { "epoch": 0.11679184549356224, "grad_norm": 0.30859375, "learning_rate": 3.893061516452075e-06, "loss": 2.395, "step": 2177 }, { "epoch": 0.11684549356223176, "grad_norm": 0.33984375, "learning_rate": 3.894849785407725e-06, "loss": 2.2053, "step": 2178 }, { "epoch": 0.11689914163090129, "grad_norm": 0.37109375, "learning_rate": 3.8966380543633766e-06, "loss": 2.3221, "step": 2179 }, { "epoch": 0.11695278969957082, "grad_norm": 0.470703125, "learning_rate": 3.898426323319027e-06, "loss": 2.4779, "step": 2180 }, { "epoch": 0.11700643776824034, "grad_norm": 0.32421875, "learning_rate": 3.900214592274678e-06, "loss": 2.3247, "step": 2181 }, { "epoch": 0.11706008583690987, "grad_norm": 3.96875, "learning_rate": 3.90200286123033e-06, "loss": 2.5246, "step": 2182 }, { "epoch": 0.1171137339055794, "grad_norm": 0.47265625, "learning_rate": 3.90379113018598e-06, "loss": 2.2556, "step": 2183 }, { "epoch": 0.11716738197424893, "grad_norm": 0.326171875, "learning_rate": 3.905579399141631e-06, "loss": 2.6245, "step": 2184 }, { "epoch": 0.11722103004291845, "grad_norm": 0.310546875, "learning_rate": 3.907367668097282e-06, "loss": 2.3562, "step": 2185 }, { "epoch": 0.11727467811158798, "grad_norm": 0.4296875, "learning_rate": 3.909155937052933e-06, "loss": 2.4487, "step": 2186 }, { "epoch": 0.11732832618025751, "grad_norm": 0.341796875, "learning_rate": 3.910944206008584e-06, "loss": 2.1309, "step": 2187 }, { "epoch": 0.11738197424892703, "grad_norm": 0.28515625, "learning_rate": 3.912732474964235e-06, "loss": 2.2199, "step": 2188 }, { "epoch": 0.11743562231759656, "grad_norm": 0.3125, "learning_rate": 3.914520743919885e-06, "loss": 2.3453, "step": 2189 }, { "epoch": 0.1174892703862661, "grad_norm": 0.33203125, "learning_rate": 3.916309012875537e-06, "loss": 2.2479, "step": 2190 }, { "epoch": 0.11754291845493563, "grad_norm": 0.5625, "learning_rate": 3.918097281831188e-06, "loss": 2.7141, "step": 2191 }, { "epoch": 0.11759656652360514, "grad_norm": 0.326171875, "learning_rate": 3.919885550786839e-06, "loss": 2.263, "step": 2192 }, { "epoch": 0.11765021459227468, "grad_norm": 0.32421875, "learning_rate": 3.92167381974249e-06, "loss": 2.2928, "step": 2193 }, { "epoch": 0.11770386266094421, "grad_norm": 0.6953125, "learning_rate": 3.923462088698141e-06, "loss": 2.3435, "step": 2194 }, { "epoch": 0.11775751072961374, "grad_norm": 0.3203125, "learning_rate": 3.9252503576537914e-06, "loss": 2.5288, "step": 2195 }, { "epoch": 0.11781115879828326, "grad_norm": 0.294921875, "learning_rate": 3.927038626609443e-06, "loss": 2.2515, "step": 2196 }, { "epoch": 0.11786480686695279, "grad_norm": 0.388671875, "learning_rate": 3.928826895565093e-06, "loss": 2.3688, "step": 2197 }, { "epoch": 0.11791845493562232, "grad_norm": 0.31640625, "learning_rate": 3.9306151645207445e-06, "loss": 2.3643, "step": 2198 }, { "epoch": 0.11797210300429184, "grad_norm": 0.494140625, "learning_rate": 3.932403433476395e-06, "loss": 2.4439, "step": 2199 }, { "epoch": 0.11802575107296137, "grad_norm": 0.359375, "learning_rate": 3.934191702432046e-06, "loss": 2.5429, "step": 2200 }, { "epoch": 0.1180793991416309, "grad_norm": 0.486328125, "learning_rate": 3.9359799713876975e-06, "loss": 2.5077, "step": 2201 }, { "epoch": 0.11813304721030043, "grad_norm": 0.54296875, "learning_rate": 3.937768240343348e-06, "loss": 2.4334, "step": 2202 }, { "epoch": 0.11818669527896995, "grad_norm": 0.3203125, "learning_rate": 3.939556509298999e-06, "loss": 2.1058, "step": 2203 }, { "epoch": 0.11824034334763948, "grad_norm": 0.4375, "learning_rate": 3.94134477825465e-06, "loss": 2.4818, "step": 2204 }, { "epoch": 0.11829399141630902, "grad_norm": 0.291015625, "learning_rate": 3.943133047210301e-06, "loss": 2.4755, "step": 2205 }, { "epoch": 0.11834763948497853, "grad_norm": 0.291015625, "learning_rate": 3.9449213161659515e-06, "loss": 2.1629, "step": 2206 }, { "epoch": 0.11840128755364807, "grad_norm": 0.31640625, "learning_rate": 3.946709585121603e-06, "loss": 2.4157, "step": 2207 }, { "epoch": 0.1184549356223176, "grad_norm": 0.26171875, "learning_rate": 3.948497854077253e-06, "loss": 2.1229, "step": 2208 }, { "epoch": 0.11850858369098713, "grad_norm": 0.365234375, "learning_rate": 3.9502861230329045e-06, "loss": 2.0797, "step": 2209 }, { "epoch": 0.11856223175965665, "grad_norm": 1.6953125, "learning_rate": 3.952074391988555e-06, "loss": 2.4303, "step": 2210 }, { "epoch": 0.11861587982832618, "grad_norm": 0.359375, "learning_rate": 3.953862660944206e-06, "loss": 2.2895, "step": 2211 }, { "epoch": 0.11866952789699571, "grad_norm": 0.37109375, "learning_rate": 3.955650929899858e-06, "loss": 2.6209, "step": 2212 }, { "epoch": 0.11872317596566524, "grad_norm": 0.41796875, "learning_rate": 3.957439198855508e-06, "loss": 2.4826, "step": 2213 }, { "epoch": 0.11877682403433476, "grad_norm": 0.369140625, "learning_rate": 3.959227467811159e-06, "loss": 2.375, "step": 2214 }, { "epoch": 0.11883047210300429, "grad_norm": 0.3125, "learning_rate": 3.96101573676681e-06, "loss": 2.3093, "step": 2215 }, { "epoch": 0.11888412017167382, "grad_norm": 0.291015625, "learning_rate": 3.962804005722461e-06, "loss": 2.4422, "step": 2216 }, { "epoch": 0.11893776824034334, "grad_norm": 0.3046875, "learning_rate": 3.9645922746781115e-06, "loss": 2.2265, "step": 2217 }, { "epoch": 0.11899141630901287, "grad_norm": 0.32421875, "learning_rate": 3.966380543633763e-06, "loss": 2.4606, "step": 2218 }, { "epoch": 0.1190450643776824, "grad_norm": 0.33984375, "learning_rate": 3.968168812589413e-06, "loss": 2.2314, "step": 2219 }, { "epoch": 0.11909871244635194, "grad_norm": 0.359375, "learning_rate": 3.969957081545065e-06, "loss": 2.3227, "step": 2220 }, { "epoch": 0.11915236051502145, "grad_norm": 0.353515625, "learning_rate": 3.971745350500716e-06, "loss": 2.5058, "step": 2221 }, { "epoch": 0.11920600858369099, "grad_norm": 0.310546875, "learning_rate": 3.973533619456366e-06, "loss": 2.4485, "step": 2222 }, { "epoch": 0.11925965665236052, "grad_norm": 0.4921875, "learning_rate": 3.975321888412018e-06, "loss": 2.1844, "step": 2223 }, { "epoch": 0.11931330472103004, "grad_norm": 0.93359375, "learning_rate": 3.977110157367668e-06, "loss": 2.0816, "step": 2224 }, { "epoch": 0.11936695278969957, "grad_norm": 0.38671875, "learning_rate": 3.978898426323319e-06, "loss": 2.2378, "step": 2225 }, { "epoch": 0.1194206008583691, "grad_norm": 0.3359375, "learning_rate": 3.98068669527897e-06, "loss": 2.347, "step": 2226 }, { "epoch": 0.11947424892703863, "grad_norm": 0.37109375, "learning_rate": 3.982474964234621e-06, "loss": 2.4598, "step": 2227 }, { "epoch": 0.11952789699570815, "grad_norm": 0.326171875, "learning_rate": 3.984263233190272e-06, "loss": 2.2469, "step": 2228 }, { "epoch": 0.11958154506437768, "grad_norm": 1.0, "learning_rate": 3.986051502145923e-06, "loss": 2.3343, "step": 2229 }, { "epoch": 0.11963519313304721, "grad_norm": 0.2890625, "learning_rate": 3.987839771101574e-06, "loss": 2.1662, "step": 2230 }, { "epoch": 0.11968884120171674, "grad_norm": 0.326171875, "learning_rate": 3.9896280400572255e-06, "loss": 2.423, "step": 2231 }, { "epoch": 0.11974248927038626, "grad_norm": 0.265625, "learning_rate": 3.991416309012876e-06, "loss": 2.363, "step": 2232 }, { "epoch": 0.1197961373390558, "grad_norm": 0.34765625, "learning_rate": 3.993204577968527e-06, "loss": 2.3781, "step": 2233 }, { "epoch": 0.11984978540772533, "grad_norm": 0.296875, "learning_rate": 3.994992846924178e-06, "loss": 2.2162, "step": 2234 }, { "epoch": 0.11990343347639484, "grad_norm": 0.515625, "learning_rate": 3.996781115879829e-06, "loss": 2.2439, "step": 2235 }, { "epoch": 0.11995708154506438, "grad_norm": 0.44140625, "learning_rate": 3.9985693848354795e-06, "loss": 2.1383, "step": 2236 }, { "epoch": 0.12001072961373391, "grad_norm": 0.314453125, "learning_rate": 4.000357653791131e-06, "loss": 2.255, "step": 2237 }, { "epoch": 0.12006437768240344, "grad_norm": 0.291015625, "learning_rate": 4.002145922746781e-06, "loss": 2.3466, "step": 2238 }, { "epoch": 0.12011802575107296, "grad_norm": 0.5703125, "learning_rate": 4.0039341917024325e-06, "loss": 2.0544, "step": 2239 }, { "epoch": 0.12017167381974249, "grad_norm": 0.439453125, "learning_rate": 4.005722460658084e-06, "loss": 2.4871, "step": 2240 }, { "epoch": 0.12022532188841202, "grad_norm": 0.3125, "learning_rate": 4.007510729613734e-06, "loss": 2.6536, "step": 2241 }, { "epoch": 0.12027896995708154, "grad_norm": 0.2890625, "learning_rate": 4.0092989985693856e-06, "loss": 2.2285, "step": 2242 }, { "epoch": 0.12033261802575107, "grad_norm": 0.58984375, "learning_rate": 4.011087267525036e-06, "loss": 2.4802, "step": 2243 }, { "epoch": 0.1203862660944206, "grad_norm": 0.27734375, "learning_rate": 4.012875536480687e-06, "loss": 2.4202, "step": 2244 }, { "epoch": 0.12043991416309013, "grad_norm": 0.283203125, "learning_rate": 4.014663805436338e-06, "loss": 1.9998, "step": 2245 }, { "epoch": 0.12049356223175965, "grad_norm": 0.3515625, "learning_rate": 4.016452074391989e-06, "loss": 2.4737, "step": 2246 }, { "epoch": 0.12054721030042918, "grad_norm": 0.2890625, "learning_rate": 4.0182403433476395e-06, "loss": 2.3249, "step": 2247 }, { "epoch": 0.12060085836909872, "grad_norm": 0.546875, "learning_rate": 4.020028612303291e-06, "loss": 2.4467, "step": 2248 }, { "epoch": 0.12065450643776825, "grad_norm": 0.466796875, "learning_rate": 4.021816881258941e-06, "loss": 1.607, "step": 2249 }, { "epoch": 0.12070815450643776, "grad_norm": 0.365234375, "learning_rate": 4.0236051502145926e-06, "loss": 2.5862, "step": 2250 }, { "epoch": 0.1207618025751073, "grad_norm": 0.32421875, "learning_rate": 4.025393419170244e-06, "loss": 2.5477, "step": 2251 }, { "epoch": 0.12081545064377683, "grad_norm": 0.35546875, "learning_rate": 4.027181688125894e-06, "loss": 2.2915, "step": 2252 }, { "epoch": 0.12086909871244635, "grad_norm": 0.275390625, "learning_rate": 4.028969957081546e-06, "loss": 2.0082, "step": 2253 }, { "epoch": 0.12092274678111588, "grad_norm": 1.71875, "learning_rate": 4.030758226037196e-06, "loss": 2.2484, "step": 2254 }, { "epoch": 0.12097639484978541, "grad_norm": 0.31640625, "learning_rate": 4.032546494992847e-06, "loss": 2.339, "step": 2255 }, { "epoch": 0.12103004291845494, "grad_norm": 0.318359375, "learning_rate": 4.034334763948498e-06, "loss": 2.475, "step": 2256 }, { "epoch": 0.12108369098712446, "grad_norm": 0.271484375, "learning_rate": 4.036123032904149e-06, "loss": 2.2974, "step": 2257 }, { "epoch": 0.12113733905579399, "grad_norm": 0.34765625, "learning_rate": 4.0379113018597996e-06, "loss": 2.1981, "step": 2258 }, { "epoch": 0.12119098712446352, "grad_norm": 0.34375, "learning_rate": 4.039699570815451e-06, "loss": 2.528, "step": 2259 }, { "epoch": 0.12124463519313304, "grad_norm": 0.314453125, "learning_rate": 4.041487839771102e-06, "loss": 2.4562, "step": 2260 }, { "epoch": 0.12129828326180257, "grad_norm": 1.2578125, "learning_rate": 4.043276108726753e-06, "loss": 2.3322, "step": 2261 }, { "epoch": 0.1213519313304721, "grad_norm": 0.349609375, "learning_rate": 4.045064377682404e-06, "loss": 2.2515, "step": 2262 }, { "epoch": 0.12140557939914164, "grad_norm": 0.73046875, "learning_rate": 4.046852646638054e-06, "loss": 2.2698, "step": 2263 }, { "epoch": 0.12145922746781115, "grad_norm": 0.318359375, "learning_rate": 4.048640915593706e-06, "loss": 2.3795, "step": 2264 }, { "epoch": 0.12151287553648069, "grad_norm": 0.30859375, "learning_rate": 4.050429184549356e-06, "loss": 2.394, "step": 2265 }, { "epoch": 0.12156652360515022, "grad_norm": 0.30078125, "learning_rate": 4.0522174535050074e-06, "loss": 2.1762, "step": 2266 }, { "epoch": 0.12162017167381975, "grad_norm": 0.314453125, "learning_rate": 4.054005722460658e-06, "loss": 2.2588, "step": 2267 }, { "epoch": 0.12167381974248927, "grad_norm": 0.361328125, "learning_rate": 4.055793991416309e-06, "loss": 2.6372, "step": 2268 }, { "epoch": 0.1217274678111588, "grad_norm": 0.310546875, "learning_rate": 4.05758226037196e-06, "loss": 2.1577, "step": 2269 }, { "epoch": 0.12178111587982833, "grad_norm": 0.29296875, "learning_rate": 4.059370529327612e-06, "loss": 2.6252, "step": 2270 }, { "epoch": 0.12183476394849785, "grad_norm": 0.30859375, "learning_rate": 4.061158798283262e-06, "loss": 2.2482, "step": 2271 }, { "epoch": 0.12188841201716738, "grad_norm": 0.349609375, "learning_rate": 4.0629470672389135e-06, "loss": 2.2392, "step": 2272 }, { "epoch": 0.12194206008583691, "grad_norm": 0.3046875, "learning_rate": 4.064735336194564e-06, "loss": 2.3367, "step": 2273 }, { "epoch": 0.12199570815450644, "grad_norm": 0.3515625, "learning_rate": 4.066523605150215e-06, "loss": 2.3145, "step": 2274 }, { "epoch": 0.12204935622317596, "grad_norm": 0.384765625, "learning_rate": 4.068311874105866e-06, "loss": 2.1383, "step": 2275 }, { "epoch": 0.1221030042918455, "grad_norm": 0.306640625, "learning_rate": 4.070100143061517e-06, "loss": 2.4898, "step": 2276 }, { "epoch": 0.12215665236051503, "grad_norm": 0.33984375, "learning_rate": 4.0718884120171675e-06, "loss": 2.0382, "step": 2277 }, { "epoch": 0.12221030042918454, "grad_norm": 0.31640625, "learning_rate": 4.073676680972819e-06, "loss": 2.4287, "step": 2278 }, { "epoch": 0.12226394849785407, "grad_norm": 0.298828125, "learning_rate": 4.075464949928469e-06, "loss": 2.0626, "step": 2279 }, { "epoch": 0.1223175965665236, "grad_norm": 0.3359375, "learning_rate": 4.0772532188841205e-06, "loss": 2.1176, "step": 2280 }, { "epoch": 0.12237124463519314, "grad_norm": 0.34375, "learning_rate": 4.079041487839772e-06, "loss": 2.2022, "step": 2281 }, { "epoch": 0.12242489270386266, "grad_norm": 0.275390625, "learning_rate": 4.080829756795422e-06, "loss": 2.0632, "step": 2282 }, { "epoch": 0.12247854077253219, "grad_norm": 0.7734375, "learning_rate": 4.082618025751074e-06, "loss": 2.414, "step": 2283 }, { "epoch": 0.12253218884120172, "grad_norm": 0.3515625, "learning_rate": 4.084406294706724e-06, "loss": 2.3139, "step": 2284 }, { "epoch": 0.12258583690987125, "grad_norm": 0.337890625, "learning_rate": 4.086194563662375e-06, "loss": 2.4407, "step": 2285 }, { "epoch": 0.12263948497854077, "grad_norm": 0.9453125, "learning_rate": 4.087982832618026e-06, "loss": 2.3742, "step": 2286 }, { "epoch": 0.1226931330472103, "grad_norm": 0.3671875, "learning_rate": 4.089771101573677e-06, "loss": 2.2795, "step": 2287 }, { "epoch": 0.12274678111587983, "grad_norm": 0.490234375, "learning_rate": 4.0915593705293275e-06, "loss": 2.1934, "step": 2288 }, { "epoch": 0.12280042918454935, "grad_norm": 0.3984375, "learning_rate": 4.093347639484979e-06, "loss": 2.7744, "step": 2289 }, { "epoch": 0.12285407725321888, "grad_norm": 0.30078125, "learning_rate": 4.09513590844063e-06, "loss": 2.4153, "step": 2290 }, { "epoch": 0.12290772532188841, "grad_norm": 0.333984375, "learning_rate": 4.096924177396281e-06, "loss": 2.251, "step": 2291 }, { "epoch": 0.12296137339055795, "grad_norm": 0.248046875, "learning_rate": 4.098712446351932e-06, "loss": 2.2073, "step": 2292 }, { "epoch": 0.12301502145922746, "grad_norm": 0.306640625, "learning_rate": 4.100500715307582e-06, "loss": 2.1259, "step": 2293 }, { "epoch": 0.123068669527897, "grad_norm": 0.29296875, "learning_rate": 4.102288984263234e-06, "loss": 2.2025, "step": 2294 }, { "epoch": 0.12312231759656653, "grad_norm": 2.625, "learning_rate": 4.104077253218884e-06, "loss": 2.3774, "step": 2295 }, { "epoch": 0.12317596566523605, "grad_norm": 0.3046875, "learning_rate": 4.105865522174535e-06, "loss": 2.4393, "step": 2296 }, { "epoch": 0.12322961373390558, "grad_norm": 0.3125, "learning_rate": 4.107653791130186e-06, "loss": 2.3066, "step": 2297 }, { "epoch": 0.12328326180257511, "grad_norm": 0.326171875, "learning_rate": 4.109442060085837e-06, "loss": 2.3295, "step": 2298 }, { "epoch": 0.12333690987124464, "grad_norm": 0.390625, "learning_rate": 4.1112303290414884e-06, "loss": 2.5174, "step": 2299 }, { "epoch": 0.12339055793991416, "grad_norm": 0.3828125, "learning_rate": 4.113018597997139e-06, "loss": 2.4497, "step": 2300 }, { "epoch": 0.12344420600858369, "grad_norm": 0.294921875, "learning_rate": 4.11480686695279e-06, "loss": 2.4167, "step": 2301 }, { "epoch": 0.12349785407725322, "grad_norm": 0.326171875, "learning_rate": 4.116595135908441e-06, "loss": 2.2234, "step": 2302 }, { "epoch": 0.12355150214592275, "grad_norm": 0.275390625, "learning_rate": 4.118383404864092e-06, "loss": 2.348, "step": 2303 }, { "epoch": 0.12360515021459227, "grad_norm": 0.30078125, "learning_rate": 4.120171673819742e-06, "loss": 2.5239, "step": 2304 }, { "epoch": 0.1236587982832618, "grad_norm": 0.322265625, "learning_rate": 4.121959942775394e-06, "loss": 2.7057, "step": 2305 }, { "epoch": 0.12371244635193133, "grad_norm": 0.291015625, "learning_rate": 4.123748211731044e-06, "loss": 2.1187, "step": 2306 }, { "epoch": 0.12376609442060085, "grad_norm": 0.341796875, "learning_rate": 4.1255364806866955e-06, "loss": 2.1877, "step": 2307 }, { "epoch": 0.12381974248927038, "grad_norm": 0.330078125, "learning_rate": 4.127324749642346e-06, "loss": 2.481, "step": 2308 }, { "epoch": 0.12387339055793992, "grad_norm": 0.35546875, "learning_rate": 4.129113018597998e-06, "loss": 2.4386, "step": 2309 }, { "epoch": 0.12392703862660945, "grad_norm": 0.330078125, "learning_rate": 4.1309012875536485e-06, "loss": 1.9539, "step": 2310 }, { "epoch": 0.12398068669527897, "grad_norm": 1.21875, "learning_rate": 4.1326895565093e-06, "loss": 2.4367, "step": 2311 }, { "epoch": 0.1240343347639485, "grad_norm": 0.294921875, "learning_rate": 4.13447782546495e-06, "loss": 2.3158, "step": 2312 }, { "epoch": 0.12408798283261803, "grad_norm": 0.255859375, "learning_rate": 4.1362660944206016e-06, "loss": 2.0182, "step": 2313 }, { "epoch": 0.12414163090128755, "grad_norm": 0.318359375, "learning_rate": 4.138054363376252e-06, "loss": 2.6264, "step": 2314 }, { "epoch": 0.12419527896995708, "grad_norm": 0.3359375, "learning_rate": 4.139842632331903e-06, "loss": 2.2984, "step": 2315 }, { "epoch": 0.12424892703862661, "grad_norm": 0.310546875, "learning_rate": 4.141630901287554e-06, "loss": 2.4359, "step": 2316 }, { "epoch": 0.12430257510729614, "grad_norm": 0.3359375, "learning_rate": 4.143419170243205e-06, "loss": 2.1293, "step": 2317 }, { "epoch": 0.12435622317596566, "grad_norm": 0.3125, "learning_rate": 4.1452074391988555e-06, "loss": 2.2591, "step": 2318 }, { "epoch": 0.12440987124463519, "grad_norm": 0.294921875, "learning_rate": 4.146995708154507e-06, "loss": 2.2768, "step": 2319 }, { "epoch": 0.12446351931330472, "grad_norm": 0.357421875, "learning_rate": 4.148783977110158e-06, "loss": 2.1906, "step": 2320 }, { "epoch": 0.12451716738197426, "grad_norm": 0.30859375, "learning_rate": 4.1505722460658086e-06, "loss": 2.2269, "step": 2321 }, { "epoch": 0.12457081545064377, "grad_norm": 0.306640625, "learning_rate": 4.15236051502146e-06, "loss": 2.451, "step": 2322 }, { "epoch": 0.1246244635193133, "grad_norm": 0.314453125, "learning_rate": 4.15414878397711e-06, "loss": 2.1325, "step": 2323 }, { "epoch": 0.12467811158798284, "grad_norm": 0.34375, "learning_rate": 4.155937052932762e-06, "loss": 2.3826, "step": 2324 }, { "epoch": 0.12473175965665236, "grad_norm": 0.390625, "learning_rate": 4.157725321888412e-06, "loss": 2.205, "step": 2325 }, { "epoch": 0.12478540772532189, "grad_norm": 0.29296875, "learning_rate": 4.159513590844063e-06, "loss": 2.3052, "step": 2326 }, { "epoch": 0.12483905579399142, "grad_norm": 0.427734375, "learning_rate": 4.161301859799714e-06, "loss": 2.3904, "step": 2327 }, { "epoch": 0.12489270386266095, "grad_norm": 0.49609375, "learning_rate": 4.163090128755365e-06, "loss": 2.3911, "step": 2328 }, { "epoch": 0.12494635193133047, "grad_norm": 0.337890625, "learning_rate": 4.164878397711016e-06, "loss": 2.2883, "step": 2329 }, { "epoch": 0.125, "grad_norm": 0.35546875, "learning_rate": 4.166666666666667e-06, "loss": 2.2196, "step": 2330 }, { "epoch": 0.12505364806866953, "grad_norm": 0.392578125, "learning_rate": 4.168454935622318e-06, "loss": 2.2411, "step": 2331 }, { "epoch": 0.12510729613733906, "grad_norm": 0.40625, "learning_rate": 4.170243204577969e-06, "loss": 2.407, "step": 2332 }, { "epoch": 0.1251609442060086, "grad_norm": 0.53125, "learning_rate": 4.17203147353362e-06, "loss": 2.3722, "step": 2333 }, { "epoch": 0.1252145922746781, "grad_norm": 0.337890625, "learning_rate": 4.17381974248927e-06, "loss": 2.4601, "step": 2334 }, { "epoch": 0.12526824034334763, "grad_norm": 0.345703125, "learning_rate": 4.175608011444922e-06, "loss": 2.4901, "step": 2335 }, { "epoch": 0.12532188841201716, "grad_norm": 0.408203125, "learning_rate": 4.177396280400572e-06, "loss": 2.5503, "step": 2336 }, { "epoch": 0.1253755364806867, "grad_norm": 0.322265625, "learning_rate": 4.179184549356223e-06, "loss": 2.3725, "step": 2337 }, { "epoch": 0.12542918454935623, "grad_norm": 0.359375, "learning_rate": 4.180972818311874e-06, "loss": 2.5572, "step": 2338 }, { "epoch": 0.12548283261802576, "grad_norm": 0.3828125, "learning_rate": 4.182761087267526e-06, "loss": 2.3689, "step": 2339 }, { "epoch": 0.1255364806866953, "grad_norm": 0.38671875, "learning_rate": 4.1845493562231765e-06, "loss": 2.1153, "step": 2340 }, { "epoch": 0.1255901287553648, "grad_norm": 0.34375, "learning_rate": 4.186337625178828e-06, "loss": 2.1709, "step": 2341 }, { "epoch": 0.12564377682403433, "grad_norm": 0.56640625, "learning_rate": 4.188125894134478e-06, "loss": 1.6597, "step": 2342 }, { "epoch": 0.12569742489270386, "grad_norm": 0.328125, "learning_rate": 4.1899141630901295e-06, "loss": 2.1081, "step": 2343 }, { "epoch": 0.1257510729613734, "grad_norm": 0.36328125, "learning_rate": 4.19170243204578e-06, "loss": 2.3789, "step": 2344 }, { "epoch": 0.12580472103004292, "grad_norm": 0.302734375, "learning_rate": 4.193490701001431e-06, "loss": 2.455, "step": 2345 }, { "epoch": 0.12585836909871245, "grad_norm": 0.283203125, "learning_rate": 4.195278969957082e-06, "loss": 2.2965, "step": 2346 }, { "epoch": 0.12591201716738198, "grad_norm": 0.369140625, "learning_rate": 4.197067238912732e-06, "loss": 2.4752, "step": 2347 }, { "epoch": 0.1259656652360515, "grad_norm": 0.318359375, "learning_rate": 4.1988555078683835e-06, "loss": 2.4816, "step": 2348 }, { "epoch": 0.12601931330472102, "grad_norm": 0.326171875, "learning_rate": 4.200643776824035e-06, "loss": 2.3087, "step": 2349 }, { "epoch": 0.12607296137339055, "grad_norm": 0.3125, "learning_rate": 4.202432045779686e-06, "loss": 2.2867, "step": 2350 }, { "epoch": 0.12612660944206008, "grad_norm": 0.373046875, "learning_rate": 4.2042203147353365e-06, "loss": 2.4131, "step": 2351 }, { "epoch": 0.12618025751072962, "grad_norm": 0.3984375, "learning_rate": 4.206008583690988e-06, "loss": 2.1236, "step": 2352 }, { "epoch": 0.12623390557939915, "grad_norm": 0.30859375, "learning_rate": 4.207796852646638e-06, "loss": 2.3046, "step": 2353 }, { "epoch": 0.12628755364806868, "grad_norm": 0.310546875, "learning_rate": 4.20958512160229e-06, "loss": 2.5478, "step": 2354 }, { "epoch": 0.1263412017167382, "grad_norm": 0.27734375, "learning_rate": 4.21137339055794e-06, "loss": 2.3794, "step": 2355 }, { "epoch": 0.12639484978540771, "grad_norm": 0.27734375, "learning_rate": 4.213161659513591e-06, "loss": 2.4166, "step": 2356 }, { "epoch": 0.12644849785407725, "grad_norm": 0.322265625, "learning_rate": 4.214949928469242e-06, "loss": 2.4934, "step": 2357 }, { "epoch": 0.12650214592274678, "grad_norm": 0.359375, "learning_rate": 4.216738197424893e-06, "loss": 2.3715, "step": 2358 }, { "epoch": 0.1265557939914163, "grad_norm": 0.51953125, "learning_rate": 4.218526466380544e-06, "loss": 2.3627, "step": 2359 }, { "epoch": 0.12660944206008584, "grad_norm": 0.287109375, "learning_rate": 4.220314735336195e-06, "loss": 2.3747, "step": 2360 }, { "epoch": 0.12666309012875537, "grad_norm": 0.31640625, "learning_rate": 4.222103004291846e-06, "loss": 2.4197, "step": 2361 }, { "epoch": 0.1267167381974249, "grad_norm": 0.3046875, "learning_rate": 4.223891273247497e-06, "loss": 2.0905, "step": 2362 }, { "epoch": 0.1267703862660944, "grad_norm": 0.357421875, "learning_rate": 4.225679542203148e-06, "loss": 2.5155, "step": 2363 }, { "epoch": 0.12682403433476394, "grad_norm": 0.33203125, "learning_rate": 4.227467811158798e-06, "loss": 2.6983, "step": 2364 }, { "epoch": 0.12687768240343347, "grad_norm": 0.361328125, "learning_rate": 4.22925608011445e-06, "loss": 2.0489, "step": 2365 }, { "epoch": 0.126931330472103, "grad_norm": 0.296875, "learning_rate": 4.2310443490701e-06, "loss": 2.46, "step": 2366 }, { "epoch": 0.12698497854077254, "grad_norm": 0.39453125, "learning_rate": 4.232832618025751e-06, "loss": 2.1907, "step": 2367 }, { "epoch": 0.12703862660944207, "grad_norm": 0.27734375, "learning_rate": 4.234620886981403e-06, "loss": 2.3217, "step": 2368 }, { "epoch": 0.1270922746781116, "grad_norm": 0.359375, "learning_rate": 4.236409155937053e-06, "loss": 2.4051, "step": 2369 }, { "epoch": 0.1271459227467811, "grad_norm": 0.375, "learning_rate": 4.2381974248927044e-06, "loss": 2.2565, "step": 2370 }, { "epoch": 0.12719957081545064, "grad_norm": 0.515625, "learning_rate": 4.239985693848355e-06, "loss": 2.6269, "step": 2371 }, { "epoch": 0.12725321888412017, "grad_norm": 0.326171875, "learning_rate": 4.241773962804006e-06, "loss": 2.3833, "step": 2372 }, { "epoch": 0.1273068669527897, "grad_norm": 0.298828125, "learning_rate": 4.243562231759657e-06, "loss": 2.1214, "step": 2373 }, { "epoch": 0.12736051502145923, "grad_norm": 0.314453125, "learning_rate": 4.245350500715308e-06, "loss": 2.2173, "step": 2374 }, { "epoch": 0.12741416309012876, "grad_norm": 0.4765625, "learning_rate": 4.247138769670958e-06, "loss": 2.7094, "step": 2375 }, { "epoch": 0.1274678111587983, "grad_norm": 0.291015625, "learning_rate": 4.24892703862661e-06, "loss": 2.412, "step": 2376 }, { "epoch": 0.1275214592274678, "grad_norm": 0.3515625, "learning_rate": 4.25071530758226e-06, "loss": 2.1372, "step": 2377 }, { "epoch": 0.12757510729613733, "grad_norm": 0.337890625, "learning_rate": 4.252503576537912e-06, "loss": 2.3329, "step": 2378 }, { "epoch": 0.12762875536480686, "grad_norm": 0.3515625, "learning_rate": 4.254291845493563e-06, "loss": 2.4013, "step": 2379 }, { "epoch": 0.1276824034334764, "grad_norm": 0.84375, "learning_rate": 4.256080114449214e-06, "loss": 2.3499, "step": 2380 }, { "epoch": 0.12773605150214593, "grad_norm": 0.283203125, "learning_rate": 4.2578683834048645e-06, "loss": 2.3622, "step": 2381 }, { "epoch": 0.12778969957081546, "grad_norm": 0.267578125, "learning_rate": 4.259656652360516e-06, "loss": 2.3571, "step": 2382 }, { "epoch": 0.127843347639485, "grad_norm": 0.337890625, "learning_rate": 4.261444921316166e-06, "loss": 2.426, "step": 2383 }, { "epoch": 0.1278969957081545, "grad_norm": 0.32421875, "learning_rate": 4.2632331902718175e-06, "loss": 2.3873, "step": 2384 }, { "epoch": 0.12795064377682402, "grad_norm": 0.267578125, "learning_rate": 4.265021459227468e-06, "loss": 2.4855, "step": 2385 }, { "epoch": 0.12800429184549356, "grad_norm": 0.27734375, "learning_rate": 4.266809728183119e-06, "loss": 2.368, "step": 2386 }, { "epoch": 0.1280579399141631, "grad_norm": 0.3125, "learning_rate": 4.26859799713877e-06, "loss": 2.3733, "step": 2387 }, { "epoch": 0.12811158798283262, "grad_norm": 0.34765625, "learning_rate": 4.270386266094421e-06, "loss": 2.4825, "step": 2388 }, { "epoch": 0.12816523605150215, "grad_norm": 0.400390625, "learning_rate": 4.272174535050072e-06, "loss": 2.3239, "step": 2389 }, { "epoch": 0.12821888412017168, "grad_norm": 0.322265625, "learning_rate": 4.273962804005723e-06, "loss": 2.3973, "step": 2390 }, { "epoch": 0.12827253218884122, "grad_norm": 0.41015625, "learning_rate": 4.275751072961374e-06, "loss": 2.6471, "step": 2391 }, { "epoch": 0.12832618025751072, "grad_norm": 0.5625, "learning_rate": 4.2775393419170246e-06, "loss": 2.04, "step": 2392 }, { "epoch": 0.12837982832618025, "grad_norm": 0.296875, "learning_rate": 4.279327610872676e-06, "loss": 2.24, "step": 2393 }, { "epoch": 0.12843347639484978, "grad_norm": 0.32421875, "learning_rate": 4.281115879828326e-06, "loss": 2.2296, "step": 2394 }, { "epoch": 0.12848712446351931, "grad_norm": 0.33203125, "learning_rate": 4.282904148783978e-06, "loss": 2.0129, "step": 2395 }, { "epoch": 0.12854077253218885, "grad_norm": 0.302734375, "learning_rate": 4.284692417739628e-06, "loss": 2.2294, "step": 2396 }, { "epoch": 0.12859442060085838, "grad_norm": 0.3125, "learning_rate": 4.286480686695279e-06, "loss": 2.3946, "step": 2397 }, { "epoch": 0.1286480686695279, "grad_norm": 1.078125, "learning_rate": 4.288268955650931e-06, "loss": 2.4282, "step": 2398 }, { "epoch": 0.1287017167381974, "grad_norm": 0.46484375, "learning_rate": 4.290057224606581e-06, "loss": 2.3935, "step": 2399 }, { "epoch": 0.12875536480686695, "grad_norm": 0.3125, "learning_rate": 4.291845493562232e-06, "loss": 2.4373, "step": 2400 }, { "epoch": 0.12880901287553648, "grad_norm": 0.4375, "learning_rate": 4.293633762517883e-06, "loss": 2.2988, "step": 2401 }, { "epoch": 0.128862660944206, "grad_norm": 0.35546875, "learning_rate": 4.295422031473534e-06, "loss": 2.1422, "step": 2402 }, { "epoch": 0.12891630901287554, "grad_norm": 0.3203125, "learning_rate": 4.297210300429185e-06, "loss": 2.2053, "step": 2403 }, { "epoch": 0.12896995708154507, "grad_norm": 0.33203125, "learning_rate": 4.298998569384836e-06, "loss": 2.355, "step": 2404 }, { "epoch": 0.1290236051502146, "grad_norm": 0.365234375, "learning_rate": 4.300786838340486e-06, "loss": 2.3379, "step": 2405 }, { "epoch": 0.1290772532188841, "grad_norm": 0.361328125, "learning_rate": 4.302575107296138e-06, "loss": 2.7169, "step": 2406 }, { "epoch": 0.12913090128755364, "grad_norm": 0.291015625, "learning_rate": 4.304363376251788e-06, "loss": 2.2074, "step": 2407 }, { "epoch": 0.12918454935622317, "grad_norm": 0.3203125, "learning_rate": 4.306151645207439e-06, "loss": 2.1688, "step": 2408 }, { "epoch": 0.1292381974248927, "grad_norm": 0.3125, "learning_rate": 4.307939914163091e-06, "loss": 2.1795, "step": 2409 }, { "epoch": 0.12929184549356224, "grad_norm": 0.396484375, "learning_rate": 4.309728183118741e-06, "loss": 2.6022, "step": 2410 }, { "epoch": 0.12934549356223177, "grad_norm": 0.3125, "learning_rate": 4.3115164520743925e-06, "loss": 2.6432, "step": 2411 }, { "epoch": 0.1293991416309013, "grad_norm": 0.42578125, "learning_rate": 4.313304721030043e-06, "loss": 1.8675, "step": 2412 }, { "epoch": 0.1294527896995708, "grad_norm": 0.33203125, "learning_rate": 4.315092989985694e-06, "loss": 2.4212, "step": 2413 }, { "epoch": 0.12950643776824033, "grad_norm": 0.3359375, "learning_rate": 4.316881258941345e-06, "loss": 2.4522, "step": 2414 }, { "epoch": 0.12956008583690987, "grad_norm": 0.26953125, "learning_rate": 4.318669527896996e-06, "loss": 2.038, "step": 2415 }, { "epoch": 0.1296137339055794, "grad_norm": 0.353515625, "learning_rate": 4.320457796852646e-06, "loss": 2.3895, "step": 2416 }, { "epoch": 0.12966738197424893, "grad_norm": 0.296875, "learning_rate": 4.3222460658082986e-06, "loss": 2.3928, "step": 2417 }, { "epoch": 0.12972103004291846, "grad_norm": 0.318359375, "learning_rate": 4.324034334763949e-06, "loss": 2.3931, "step": 2418 }, { "epoch": 0.129774678111588, "grad_norm": 0.3359375, "learning_rate": 4.3258226037196e-06, "loss": 2.4203, "step": 2419 }, { "epoch": 0.1298283261802575, "grad_norm": 0.283203125, "learning_rate": 4.327610872675251e-06, "loss": 2.2639, "step": 2420 }, { "epoch": 0.12988197424892703, "grad_norm": 0.341796875, "learning_rate": 4.329399141630902e-06, "loss": 2.2074, "step": 2421 }, { "epoch": 0.12993562231759656, "grad_norm": 0.341796875, "learning_rate": 4.3311874105865525e-06, "loss": 2.2854, "step": 2422 }, { "epoch": 0.1299892703862661, "grad_norm": 0.494140625, "learning_rate": 4.332975679542204e-06, "loss": 2.1781, "step": 2423 }, { "epoch": 0.13004291845493562, "grad_norm": 0.61328125, "learning_rate": 4.334763948497854e-06, "loss": 1.6977, "step": 2424 }, { "epoch": 0.13009656652360516, "grad_norm": 0.330078125, "learning_rate": 4.3365522174535056e-06, "loss": 2.401, "step": 2425 }, { "epoch": 0.1301502145922747, "grad_norm": 0.3203125, "learning_rate": 4.338340486409156e-06, "loss": 2.4613, "step": 2426 }, { "epoch": 0.13020386266094422, "grad_norm": 0.314453125, "learning_rate": 4.340128755364807e-06, "loss": 2.3405, "step": 2427 }, { "epoch": 0.13025751072961372, "grad_norm": 0.287109375, "learning_rate": 4.341917024320459e-06, "loss": 2.3144, "step": 2428 }, { "epoch": 0.13031115879828326, "grad_norm": 0.423828125, "learning_rate": 4.343705293276109e-06, "loss": 1.2648, "step": 2429 }, { "epoch": 0.1303648068669528, "grad_norm": 0.33203125, "learning_rate": 4.34549356223176e-06, "loss": 2.4152, "step": 2430 }, { "epoch": 0.13041845493562232, "grad_norm": 0.31640625, "learning_rate": 4.347281831187411e-06, "loss": 2.3419, "step": 2431 }, { "epoch": 0.13047210300429185, "grad_norm": 0.416015625, "learning_rate": 4.349070100143062e-06, "loss": 2.2502, "step": 2432 }, { "epoch": 0.13052575107296138, "grad_norm": 0.31640625, "learning_rate": 4.350858369098713e-06, "loss": 2.5858, "step": 2433 }, { "epoch": 0.13057939914163091, "grad_norm": 0.28515625, "learning_rate": 4.352646638054364e-06, "loss": 2.371, "step": 2434 }, { "epoch": 0.13063304721030042, "grad_norm": 0.3515625, "learning_rate": 4.354434907010014e-06, "loss": 2.52, "step": 2435 }, { "epoch": 0.13068669527896995, "grad_norm": 0.3671875, "learning_rate": 4.356223175965666e-06, "loss": 2.5011, "step": 2436 }, { "epoch": 0.13074034334763948, "grad_norm": 0.4453125, "learning_rate": 4.358011444921317e-06, "loss": 2.3871, "step": 2437 }, { "epoch": 0.130793991416309, "grad_norm": 0.3203125, "learning_rate": 4.359799713876967e-06, "loss": 2.3866, "step": 2438 }, { "epoch": 0.13084763948497855, "grad_norm": 0.61328125, "learning_rate": 4.361587982832619e-06, "loss": 2.3212, "step": 2439 }, { "epoch": 0.13090128755364808, "grad_norm": 0.390625, "learning_rate": 4.363376251788269e-06, "loss": 2.4741, "step": 2440 }, { "epoch": 0.1309549356223176, "grad_norm": 0.32421875, "learning_rate": 4.3651645207439204e-06, "loss": 2.4285, "step": 2441 }, { "epoch": 0.1310085836909871, "grad_norm": 0.365234375, "learning_rate": 4.366952789699571e-06, "loss": 2.4339, "step": 2442 }, { "epoch": 0.13106223175965664, "grad_norm": 0.412109375, "learning_rate": 4.368741058655222e-06, "loss": 2.3463, "step": 2443 }, { "epoch": 0.13111587982832618, "grad_norm": 0.32421875, "learning_rate": 4.370529327610873e-06, "loss": 2.4214, "step": 2444 }, { "epoch": 0.1311695278969957, "grad_norm": 0.478515625, "learning_rate": 4.372317596566524e-06, "loss": 2.1768, "step": 2445 }, { "epoch": 0.13122317596566524, "grad_norm": 0.34765625, "learning_rate": 4.374105865522174e-06, "loss": 2.4294, "step": 2446 }, { "epoch": 0.13127682403433477, "grad_norm": 0.302734375, "learning_rate": 4.375894134477826e-06, "loss": 2.4205, "step": 2447 }, { "epoch": 0.1313304721030043, "grad_norm": 0.376953125, "learning_rate": 4.377682403433477e-06, "loss": 2.4307, "step": 2448 }, { "epoch": 0.1313841201716738, "grad_norm": 0.50390625, "learning_rate": 4.3794706723891274e-06, "loss": 2.2793, "step": 2449 }, { "epoch": 0.13143776824034334, "grad_norm": 0.62109375, "learning_rate": 4.381258941344779e-06, "loss": 1.4698, "step": 2450 }, { "epoch": 0.13149141630901287, "grad_norm": 0.314453125, "learning_rate": 4.383047210300429e-06, "loss": 2.5063, "step": 2451 }, { "epoch": 0.1315450643776824, "grad_norm": 0.357421875, "learning_rate": 4.3848354792560805e-06, "loss": 2.1089, "step": 2452 }, { "epoch": 0.13159871244635193, "grad_norm": 0.298828125, "learning_rate": 4.386623748211731e-06, "loss": 2.2249, "step": 2453 }, { "epoch": 0.13165236051502147, "grad_norm": 0.326171875, "learning_rate": 4.388412017167382e-06, "loss": 2.3682, "step": 2454 }, { "epoch": 0.131706008583691, "grad_norm": 0.390625, "learning_rate": 4.390200286123033e-06, "loss": 2.6533, "step": 2455 }, { "epoch": 0.1317596566523605, "grad_norm": 0.279296875, "learning_rate": 4.391988555078684e-06, "loss": 2.187, "step": 2456 }, { "epoch": 0.13181330472103003, "grad_norm": 0.33984375, "learning_rate": 4.393776824034335e-06, "loss": 2.4068, "step": 2457 }, { "epoch": 0.13186695278969957, "grad_norm": 0.419921875, "learning_rate": 4.395565092989987e-06, "loss": 2.3627, "step": 2458 }, { "epoch": 0.1319206008583691, "grad_norm": 0.27734375, "learning_rate": 4.397353361945637e-06, "loss": 2.3878, "step": 2459 }, { "epoch": 0.13197424892703863, "grad_norm": 0.42578125, "learning_rate": 4.399141630901288e-06, "loss": 2.319, "step": 2460 }, { "epoch": 0.13202789699570816, "grad_norm": 0.375, "learning_rate": 4.400929899856939e-06, "loss": 2.1403, "step": 2461 }, { "epoch": 0.1320815450643777, "grad_norm": 0.35546875, "learning_rate": 4.40271816881259e-06, "loss": 2.1959, "step": 2462 }, { "epoch": 0.13213519313304722, "grad_norm": 0.3359375, "learning_rate": 4.4045064377682406e-06, "loss": 1.9592, "step": 2463 }, { "epoch": 0.13218884120171673, "grad_norm": 0.34375, "learning_rate": 4.406294706723892e-06, "loss": 2.3746, "step": 2464 }, { "epoch": 0.13224248927038626, "grad_norm": 0.34375, "learning_rate": 4.408082975679542e-06, "loss": 2.421, "step": 2465 }, { "epoch": 0.1322961373390558, "grad_norm": 0.57421875, "learning_rate": 4.409871244635194e-06, "loss": 2.345, "step": 2466 }, { "epoch": 0.13234978540772532, "grad_norm": 0.34375, "learning_rate": 4.411659513590845e-06, "loss": 2.4756, "step": 2467 }, { "epoch": 0.13240343347639486, "grad_norm": 0.330078125, "learning_rate": 4.413447782546495e-06, "loss": 2.166, "step": 2468 }, { "epoch": 0.1324570815450644, "grad_norm": 0.474609375, "learning_rate": 4.415236051502147e-06, "loss": 2.3386, "step": 2469 }, { "epoch": 0.13251072961373392, "grad_norm": 0.38671875, "learning_rate": 4.417024320457797e-06, "loss": 2.3124, "step": 2470 }, { "epoch": 0.13256437768240342, "grad_norm": 0.52734375, "learning_rate": 4.418812589413448e-06, "loss": 2.3705, "step": 2471 }, { "epoch": 0.13261802575107295, "grad_norm": 0.33984375, "learning_rate": 4.420600858369099e-06, "loss": 2.4909, "step": 2472 }, { "epoch": 0.13267167381974249, "grad_norm": 0.3046875, "learning_rate": 4.42238912732475e-06, "loss": 2.3539, "step": 2473 }, { "epoch": 0.13272532188841202, "grad_norm": 0.3515625, "learning_rate": 4.424177396280401e-06, "loss": 2.444, "step": 2474 }, { "epoch": 0.13277896995708155, "grad_norm": 0.453125, "learning_rate": 4.425965665236052e-06, "loss": 2.2236, "step": 2475 }, { "epoch": 0.13283261802575108, "grad_norm": 0.58203125, "learning_rate": 4.427753934191702e-06, "loss": 2.1339, "step": 2476 }, { "epoch": 0.1328862660944206, "grad_norm": 0.330078125, "learning_rate": 4.429542203147354e-06, "loss": 2.3223, "step": 2477 }, { "epoch": 0.13293991416309012, "grad_norm": 0.34375, "learning_rate": 4.431330472103005e-06, "loss": 2.2508, "step": 2478 }, { "epoch": 0.13299356223175965, "grad_norm": 0.330078125, "learning_rate": 4.433118741058655e-06, "loss": 2.2068, "step": 2479 }, { "epoch": 0.13304721030042918, "grad_norm": 0.7265625, "learning_rate": 4.434907010014307e-06, "loss": 1.6193, "step": 2480 }, { "epoch": 0.1331008583690987, "grad_norm": 0.33203125, "learning_rate": 4.436695278969957e-06, "loss": 2.4532, "step": 2481 }, { "epoch": 0.13315450643776824, "grad_norm": 0.7109375, "learning_rate": 4.4384835479256085e-06, "loss": 2.3496, "step": 2482 }, { "epoch": 0.13320815450643778, "grad_norm": 0.66796875, "learning_rate": 4.440271816881259e-06, "loss": 2.4124, "step": 2483 }, { "epoch": 0.1332618025751073, "grad_norm": 0.33984375, "learning_rate": 4.44206008583691e-06, "loss": 2.2207, "step": 2484 }, { "epoch": 0.1333154506437768, "grad_norm": 0.33984375, "learning_rate": 4.443848354792561e-06, "loss": 2.5058, "step": 2485 }, { "epoch": 0.13336909871244634, "grad_norm": 0.259765625, "learning_rate": 4.445636623748212e-06, "loss": 2.2336, "step": 2486 }, { "epoch": 0.13342274678111588, "grad_norm": 0.333984375, "learning_rate": 4.447424892703863e-06, "loss": 1.9363, "step": 2487 }, { "epoch": 0.1334763948497854, "grad_norm": 0.3359375, "learning_rate": 4.449213161659514e-06, "loss": 2.3032, "step": 2488 }, { "epoch": 0.13353004291845494, "grad_norm": 0.328125, "learning_rate": 4.451001430615165e-06, "loss": 2.3392, "step": 2489 }, { "epoch": 0.13358369098712447, "grad_norm": 0.322265625, "learning_rate": 4.4527896995708155e-06, "loss": 2.1145, "step": 2490 }, { "epoch": 0.133637339055794, "grad_norm": 0.3359375, "learning_rate": 4.454577968526467e-06, "loss": 2.1897, "step": 2491 }, { "epoch": 0.1336909871244635, "grad_norm": 0.3515625, "learning_rate": 4.456366237482117e-06, "loss": 2.2062, "step": 2492 }, { "epoch": 0.13374463519313304, "grad_norm": 0.55078125, "learning_rate": 4.4581545064377685e-06, "loss": 1.5148, "step": 2493 }, { "epoch": 0.13379828326180257, "grad_norm": 0.6484375, "learning_rate": 4.459942775393419e-06, "loss": 2.3787, "step": 2494 }, { "epoch": 0.1338519313304721, "grad_norm": 0.287109375, "learning_rate": 4.46173104434907e-06, "loss": 2.1106, "step": 2495 }, { "epoch": 0.13390557939914163, "grad_norm": 0.333984375, "learning_rate": 4.4635193133047216e-06, "loss": 2.4752, "step": 2496 }, { "epoch": 0.13395922746781116, "grad_norm": 0.384765625, "learning_rate": 4.465307582260373e-06, "loss": 2.4259, "step": 2497 }, { "epoch": 0.1340128755364807, "grad_norm": 0.373046875, "learning_rate": 4.467095851216023e-06, "loss": 2.2329, "step": 2498 }, { "epoch": 0.1340665236051502, "grad_norm": 0.3359375, "learning_rate": 4.468884120171675e-06, "loss": 2.502, "step": 2499 }, { "epoch": 0.13412017167381973, "grad_norm": 0.34765625, "learning_rate": 4.470672389127325e-06, "loss": 2.4366, "step": 2500 }, { "epoch": 0.13417381974248926, "grad_norm": 0.353515625, "learning_rate": 4.472460658082976e-06, "loss": 2.42, "step": 2501 }, { "epoch": 0.1342274678111588, "grad_norm": 0.291015625, "learning_rate": 4.474248927038627e-06, "loss": 2.0264, "step": 2502 }, { "epoch": 0.13428111587982833, "grad_norm": 0.3671875, "learning_rate": 4.476037195994278e-06, "loss": 2.3941, "step": 2503 }, { "epoch": 0.13433476394849786, "grad_norm": 0.447265625, "learning_rate": 4.4778254649499286e-06, "loss": 1.7976, "step": 2504 }, { "epoch": 0.1343884120171674, "grad_norm": 0.40625, "learning_rate": 4.47961373390558e-06, "loss": 2.5317, "step": 2505 }, { "epoch": 0.13444206008583692, "grad_norm": 0.353515625, "learning_rate": 4.481402002861231e-06, "loss": 2.2196, "step": 2506 }, { "epoch": 0.13449570815450643, "grad_norm": 0.3984375, "learning_rate": 4.483190271816882e-06, "loss": 2.512, "step": 2507 }, { "epoch": 0.13454935622317596, "grad_norm": 0.306640625, "learning_rate": 4.484978540772533e-06, "loss": 2.1115, "step": 2508 }, { "epoch": 0.1346030042918455, "grad_norm": 0.65625, "learning_rate": 4.486766809728183e-06, "loss": 2.2687, "step": 2509 }, { "epoch": 0.13465665236051502, "grad_norm": 0.38671875, "learning_rate": 4.488555078683835e-06, "loss": 1.9127, "step": 2510 }, { "epoch": 0.13471030042918455, "grad_norm": 0.30078125, "learning_rate": 4.490343347639485e-06, "loss": 2.3341, "step": 2511 }, { "epoch": 0.13476394849785409, "grad_norm": 0.265625, "learning_rate": 4.4921316165951364e-06, "loss": 2.3171, "step": 2512 }, { "epoch": 0.13481759656652362, "grad_norm": 0.333984375, "learning_rate": 4.493919885550787e-06, "loss": 2.3731, "step": 2513 }, { "epoch": 0.13487124463519312, "grad_norm": 0.283203125, "learning_rate": 4.495708154506438e-06, "loss": 2.223, "step": 2514 }, { "epoch": 0.13492489270386265, "grad_norm": 0.330078125, "learning_rate": 4.497496423462089e-06, "loss": 1.994, "step": 2515 }, { "epoch": 0.13497854077253219, "grad_norm": 0.28125, "learning_rate": 4.49928469241774e-06, "loss": 2.1764, "step": 2516 }, { "epoch": 0.13503218884120172, "grad_norm": 1.0859375, "learning_rate": 4.501072961373391e-06, "loss": 2.3938, "step": 2517 }, { "epoch": 0.13508583690987125, "grad_norm": 0.337890625, "learning_rate": 4.502861230329042e-06, "loss": 2.2027, "step": 2518 }, { "epoch": 0.13513948497854078, "grad_norm": 0.33984375, "learning_rate": 4.504649499284693e-06, "loss": 2.2728, "step": 2519 }, { "epoch": 0.1351931330472103, "grad_norm": 1.2265625, "learning_rate": 4.5064377682403434e-06, "loss": 2.7375, "step": 2520 }, { "epoch": 0.13524678111587982, "grad_norm": 0.333984375, "learning_rate": 4.508226037195995e-06, "loss": 2.3979, "step": 2521 }, { "epoch": 0.13530042918454935, "grad_norm": 0.41015625, "learning_rate": 4.510014306151645e-06, "loss": 2.2997, "step": 2522 }, { "epoch": 0.13535407725321888, "grad_norm": 0.32421875, "learning_rate": 4.5118025751072965e-06, "loss": 2.5079, "step": 2523 }, { "epoch": 0.1354077253218884, "grad_norm": 0.490234375, "learning_rate": 4.513590844062947e-06, "loss": 2.3939, "step": 2524 }, { "epoch": 0.13546137339055794, "grad_norm": 0.33984375, "learning_rate": 4.515379113018598e-06, "loss": 2.3026, "step": 2525 }, { "epoch": 0.13551502145922747, "grad_norm": 0.296875, "learning_rate": 4.5171673819742495e-06, "loss": 2.637, "step": 2526 }, { "epoch": 0.135568669527897, "grad_norm": 0.484375, "learning_rate": 4.5189556509299e-06, "loss": 2.266, "step": 2527 }, { "epoch": 0.1356223175965665, "grad_norm": 0.349609375, "learning_rate": 4.520743919885551e-06, "loss": 2.3067, "step": 2528 }, { "epoch": 0.13567596566523604, "grad_norm": 0.5625, "learning_rate": 4.522532188841202e-06, "loss": 2.1779, "step": 2529 }, { "epoch": 0.13572961373390557, "grad_norm": 0.27734375, "learning_rate": 4.524320457796853e-06, "loss": 2.4284, "step": 2530 }, { "epoch": 0.1357832618025751, "grad_norm": 0.33203125, "learning_rate": 4.5261087267525035e-06, "loss": 2.1247, "step": 2531 }, { "epoch": 0.13583690987124464, "grad_norm": 0.33203125, "learning_rate": 4.527896995708155e-06, "loss": 2.3699, "step": 2532 }, { "epoch": 0.13589055793991417, "grad_norm": 0.439453125, "learning_rate": 4.529685264663805e-06, "loss": 2.4085, "step": 2533 }, { "epoch": 0.1359442060085837, "grad_norm": 0.2578125, "learning_rate": 4.5314735336194565e-06, "loss": 2.3652, "step": 2534 }, { "epoch": 0.1359978540772532, "grad_norm": 2.359375, "learning_rate": 4.533261802575107e-06, "loss": 2.346, "step": 2535 }, { "epoch": 0.13605150214592274, "grad_norm": 0.33203125, "learning_rate": 4.535050071530759e-06, "loss": 2.4387, "step": 2536 }, { "epoch": 0.13610515021459227, "grad_norm": 0.306640625, "learning_rate": 4.53683834048641e-06, "loss": 2.1356, "step": 2537 }, { "epoch": 0.1361587982832618, "grad_norm": 0.85546875, "learning_rate": 4.538626609442061e-06, "loss": 2.4683, "step": 2538 }, { "epoch": 0.13621244635193133, "grad_norm": 0.34765625, "learning_rate": 4.540414878397711e-06, "loss": 2.4848, "step": 2539 }, { "epoch": 0.13626609442060086, "grad_norm": 0.51171875, "learning_rate": 4.542203147353363e-06, "loss": 2.1465, "step": 2540 }, { "epoch": 0.1363197424892704, "grad_norm": 0.283203125, "learning_rate": 4.543991416309013e-06, "loss": 2.4244, "step": 2541 }, { "epoch": 0.13637339055793993, "grad_norm": 0.3671875, "learning_rate": 4.545779685264664e-06, "loss": 1.6193, "step": 2542 }, { "epoch": 0.13642703862660943, "grad_norm": 0.30078125, "learning_rate": 4.547567954220315e-06, "loss": 2.403, "step": 2543 }, { "epoch": 0.13648068669527896, "grad_norm": 0.306640625, "learning_rate": 4.549356223175966e-06, "loss": 2.3303, "step": 2544 }, { "epoch": 0.1365343347639485, "grad_norm": 0.3984375, "learning_rate": 4.5511444921316174e-06, "loss": 2.3047, "step": 2545 }, { "epoch": 0.13658798283261803, "grad_norm": 0.435546875, "learning_rate": 4.552932761087268e-06, "loss": 2.2599, "step": 2546 }, { "epoch": 0.13664163090128756, "grad_norm": 0.271484375, "learning_rate": 4.554721030042919e-06, "loss": 2.3043, "step": 2547 }, { "epoch": 0.1366952789699571, "grad_norm": 0.314453125, "learning_rate": 4.55650929899857e-06, "loss": 2.2332, "step": 2548 }, { "epoch": 0.13674892703862662, "grad_norm": 0.3046875, "learning_rate": 4.558297567954221e-06, "loss": 2.2255, "step": 2549 }, { "epoch": 0.13680257510729613, "grad_norm": 0.29296875, "learning_rate": 4.560085836909871e-06, "loss": 2.4245, "step": 2550 }, { "epoch": 0.13685622317596566, "grad_norm": 0.3515625, "learning_rate": 4.561874105865523e-06, "loss": 2.4481, "step": 2551 }, { "epoch": 0.1369098712446352, "grad_norm": 0.31640625, "learning_rate": 4.563662374821173e-06, "loss": 2.3068, "step": 2552 }, { "epoch": 0.13696351931330472, "grad_norm": 0.345703125, "learning_rate": 4.5654506437768245e-06, "loss": 2.2526, "step": 2553 }, { "epoch": 0.13701716738197425, "grad_norm": 0.296875, "learning_rate": 4.567238912732475e-06, "loss": 2.035, "step": 2554 }, { "epoch": 0.13707081545064378, "grad_norm": 0.314453125, "learning_rate": 4.569027181688126e-06, "loss": 2.2996, "step": 2555 }, { "epoch": 0.13712446351931332, "grad_norm": 0.5234375, "learning_rate": 4.5708154506437775e-06, "loss": 2.3824, "step": 2556 }, { "epoch": 0.13717811158798282, "grad_norm": 0.369140625, "learning_rate": 4.572603719599428e-06, "loss": 2.2531, "step": 2557 }, { "epoch": 0.13723175965665235, "grad_norm": 0.2890625, "learning_rate": 4.574391988555079e-06, "loss": 2.2902, "step": 2558 }, { "epoch": 0.13728540772532188, "grad_norm": 0.34765625, "learning_rate": 4.57618025751073e-06, "loss": 2.4555, "step": 2559 }, { "epoch": 0.13733905579399142, "grad_norm": 0.34765625, "learning_rate": 4.577968526466381e-06, "loss": 2.2617, "step": 2560 }, { "epoch": 0.13739270386266095, "grad_norm": 0.326171875, "learning_rate": 4.5797567954220315e-06, "loss": 2.418, "step": 2561 }, { "epoch": 0.13744635193133048, "grad_norm": 0.28125, "learning_rate": 4.581545064377683e-06, "loss": 2.2828, "step": 2562 }, { "epoch": 0.1375, "grad_norm": 0.330078125, "learning_rate": 4.583333333333333e-06, "loss": 2.2, "step": 2563 }, { "epoch": 0.13755364806866952, "grad_norm": 0.291015625, "learning_rate": 4.5851216022889845e-06, "loss": 2.332, "step": 2564 }, { "epoch": 0.13760729613733905, "grad_norm": 0.33203125, "learning_rate": 4.586909871244636e-06, "loss": 2.2765, "step": 2565 }, { "epoch": 0.13766094420600858, "grad_norm": 0.34765625, "learning_rate": 4.588698140200287e-06, "loss": 2.2534, "step": 2566 }, { "epoch": 0.1377145922746781, "grad_norm": 0.470703125, "learning_rate": 4.5904864091559376e-06, "loss": 2.5869, "step": 2567 }, { "epoch": 0.13776824034334764, "grad_norm": 0.376953125, "learning_rate": 4.592274678111589e-06, "loss": 2.5798, "step": 2568 }, { "epoch": 0.13782188841201717, "grad_norm": 0.6015625, "learning_rate": 4.594062947067239e-06, "loss": 2.4364, "step": 2569 }, { "epoch": 0.1378755364806867, "grad_norm": 0.359375, "learning_rate": 4.595851216022891e-06, "loss": 2.4366, "step": 2570 }, { "epoch": 0.1379291845493562, "grad_norm": 0.322265625, "learning_rate": 4.597639484978541e-06, "loss": 2.6136, "step": 2571 }, { "epoch": 0.13798283261802574, "grad_norm": 0.330078125, "learning_rate": 4.5994277539341915e-06, "loss": 2.463, "step": 2572 }, { "epoch": 0.13803648068669527, "grad_norm": 0.30859375, "learning_rate": 4.601216022889843e-06, "loss": 2.0181, "step": 2573 }, { "epoch": 0.1380901287553648, "grad_norm": 0.298828125, "learning_rate": 4.603004291845493e-06, "loss": 2.4837, "step": 2574 }, { "epoch": 0.13814377682403434, "grad_norm": 0.4609375, "learning_rate": 4.604792560801145e-06, "loss": 2.423, "step": 2575 }, { "epoch": 0.13819742489270387, "grad_norm": 0.494140625, "learning_rate": 4.606580829756796e-06, "loss": 2.565, "step": 2576 }, { "epoch": 0.1382510729613734, "grad_norm": 0.427734375, "learning_rate": 4.608369098712447e-06, "loss": 2.2916, "step": 2577 }, { "epoch": 0.13830472103004293, "grad_norm": 0.29296875, "learning_rate": 4.610157367668098e-06, "loss": 2.297, "step": 2578 }, { "epoch": 0.13835836909871244, "grad_norm": 0.390625, "learning_rate": 4.611945636623749e-06, "loss": 2.2495, "step": 2579 }, { "epoch": 0.13841201716738197, "grad_norm": 0.314453125, "learning_rate": 4.613733905579399e-06, "loss": 2.4079, "step": 2580 }, { "epoch": 0.1384656652360515, "grad_norm": 0.365234375, "learning_rate": 4.615522174535051e-06, "loss": 2.2718, "step": 2581 }, { "epoch": 0.13851931330472103, "grad_norm": 0.32421875, "learning_rate": 4.617310443490701e-06, "loss": 2.1141, "step": 2582 }, { "epoch": 0.13857296137339056, "grad_norm": 0.296875, "learning_rate": 4.6190987124463524e-06, "loss": 2.4308, "step": 2583 }, { "epoch": 0.1386266094420601, "grad_norm": 0.357421875, "learning_rate": 4.620886981402003e-06, "loss": 2.5445, "step": 2584 }, { "epoch": 0.13868025751072963, "grad_norm": 0.3046875, "learning_rate": 4.622675250357654e-06, "loss": 2.1886, "step": 2585 }, { "epoch": 0.13873390557939913, "grad_norm": 0.55078125, "learning_rate": 4.6244635193133055e-06, "loss": 2.5193, "step": 2586 }, { "epoch": 0.13878755364806866, "grad_norm": 0.294921875, "learning_rate": 4.626251788268956e-06, "loss": 2.121, "step": 2587 }, { "epoch": 0.1388412017167382, "grad_norm": 0.388671875, "learning_rate": 4.628040057224607e-06, "loss": 2.2893, "step": 2588 }, { "epoch": 0.13889484978540773, "grad_norm": 0.474609375, "learning_rate": 4.629828326180258e-06, "loss": 2.405, "step": 2589 }, { "epoch": 0.13894849785407726, "grad_norm": 1.71875, "learning_rate": 4.631616595135909e-06, "loss": 2.2152, "step": 2590 }, { "epoch": 0.1390021459227468, "grad_norm": 0.310546875, "learning_rate": 4.6334048640915594e-06, "loss": 2.1278, "step": 2591 }, { "epoch": 0.13905579399141632, "grad_norm": 0.357421875, "learning_rate": 4.635193133047211e-06, "loss": 2.1979, "step": 2592 }, { "epoch": 0.13910944206008583, "grad_norm": 0.625, "learning_rate": 4.636981402002861e-06, "loss": 2.4573, "step": 2593 }, { "epoch": 0.13916309012875536, "grad_norm": 0.345703125, "learning_rate": 4.6387696709585125e-06, "loss": 2.367, "step": 2594 }, { "epoch": 0.1392167381974249, "grad_norm": 0.310546875, "learning_rate": 4.640557939914164e-06, "loss": 2.2484, "step": 2595 }, { "epoch": 0.13927038626609442, "grad_norm": 0.330078125, "learning_rate": 4.642346208869814e-06, "loss": 2.5028, "step": 2596 }, { "epoch": 0.13932403433476395, "grad_norm": 0.451171875, "learning_rate": 4.6441344778254655e-06, "loss": 2.0904, "step": 2597 }, { "epoch": 0.13937768240343348, "grad_norm": 0.3671875, "learning_rate": 4.645922746781116e-06, "loss": 2.2378, "step": 2598 }, { "epoch": 0.13943133047210302, "grad_norm": 0.3515625, "learning_rate": 4.647711015736767e-06, "loss": 2.5893, "step": 2599 }, { "epoch": 0.13948497854077252, "grad_norm": 0.41796875, "learning_rate": 4.649499284692418e-06, "loss": 2.3466, "step": 2600 }, { "epoch": 0.13953862660944205, "grad_norm": 0.40625, "learning_rate": 4.651287553648069e-06, "loss": 2.2, "step": 2601 }, { "epoch": 0.13959227467811158, "grad_norm": 0.3984375, "learning_rate": 4.6530758226037195e-06, "loss": 1.4297, "step": 2602 }, { "epoch": 0.13964592274678111, "grad_norm": 0.33203125, "learning_rate": 4.654864091559371e-06, "loss": 2.4652, "step": 2603 }, { "epoch": 0.13969957081545065, "grad_norm": 0.408203125, "learning_rate": 4.656652360515021e-06, "loss": 2.4568, "step": 2604 }, { "epoch": 0.13975321888412018, "grad_norm": 0.33984375, "learning_rate": 4.658440629470673e-06, "loss": 2.3081, "step": 2605 }, { "epoch": 0.1398068669527897, "grad_norm": 0.34765625, "learning_rate": 4.660228898426324e-06, "loss": 2.3514, "step": 2606 }, { "epoch": 0.13986051502145921, "grad_norm": 0.5, "learning_rate": 4.662017167381975e-06, "loss": 2.1297, "step": 2607 }, { "epoch": 0.13991416309012875, "grad_norm": 0.28515625, "learning_rate": 4.663805436337626e-06, "loss": 2.2784, "step": 2608 }, { "epoch": 0.13996781115879828, "grad_norm": 0.5234375, "learning_rate": 4.665593705293277e-06, "loss": 2.2588, "step": 2609 }, { "epoch": 0.1400214592274678, "grad_norm": 0.419921875, "learning_rate": 4.667381974248927e-06, "loss": 2.2252, "step": 2610 }, { "epoch": 0.14007510729613734, "grad_norm": 0.349609375, "learning_rate": 4.669170243204579e-06, "loss": 2.3417, "step": 2611 }, { "epoch": 0.14012875536480687, "grad_norm": 0.322265625, "learning_rate": 4.670958512160229e-06, "loss": 2.2812, "step": 2612 }, { "epoch": 0.1401824034334764, "grad_norm": 0.310546875, "learning_rate": 4.67274678111588e-06, "loss": 2.3051, "step": 2613 }, { "epoch": 0.14023605150214594, "grad_norm": 0.287109375, "learning_rate": 4.674535050071532e-06, "loss": 2.1997, "step": 2614 }, { "epoch": 0.14028969957081544, "grad_norm": 0.28125, "learning_rate": 4.676323319027182e-06, "loss": 2.2611, "step": 2615 }, { "epoch": 0.14034334763948497, "grad_norm": 0.3046875, "learning_rate": 4.6781115879828334e-06, "loss": 2.3767, "step": 2616 }, { "epoch": 0.1403969957081545, "grad_norm": 0.287109375, "learning_rate": 4.679899856938484e-06, "loss": 2.2376, "step": 2617 }, { "epoch": 0.14045064377682404, "grad_norm": 0.498046875, "learning_rate": 4.681688125894135e-06, "loss": 1.58, "step": 2618 }, { "epoch": 0.14050429184549357, "grad_norm": 1.71875, "learning_rate": 4.683476394849786e-06, "loss": 2.2382, "step": 2619 }, { "epoch": 0.1405579399141631, "grad_norm": 0.3046875, "learning_rate": 4.685264663805437e-06, "loss": 2.254, "step": 2620 }, { "epoch": 0.14061158798283263, "grad_norm": 0.31640625, "learning_rate": 4.687052932761087e-06, "loss": 2.6836, "step": 2621 }, { "epoch": 0.14066523605150213, "grad_norm": 0.3984375, "learning_rate": 4.688841201716739e-06, "loss": 2.137, "step": 2622 }, { "epoch": 0.14071888412017167, "grad_norm": 0.30859375, "learning_rate": 4.690629470672389e-06, "loss": 2.2046, "step": 2623 }, { "epoch": 0.1407725321888412, "grad_norm": 0.357421875, "learning_rate": 4.6924177396280405e-06, "loss": 2.2915, "step": 2624 }, { "epoch": 0.14082618025751073, "grad_norm": 0.33203125, "learning_rate": 4.694206008583692e-06, "loss": 2.2197, "step": 2625 }, { "epoch": 0.14087982832618026, "grad_norm": 0.408203125, "learning_rate": 4.695994277539342e-06, "loss": 2.4271, "step": 2626 }, { "epoch": 0.1409334763948498, "grad_norm": 0.7734375, "learning_rate": 4.6977825464949935e-06, "loss": 1.5871, "step": 2627 }, { "epoch": 0.14098712446351933, "grad_norm": 0.32421875, "learning_rate": 4.699570815450644e-06, "loss": 2.1468, "step": 2628 }, { "epoch": 0.14104077253218883, "grad_norm": 0.326171875, "learning_rate": 4.701359084406295e-06, "loss": 2.2049, "step": 2629 }, { "epoch": 0.14109442060085836, "grad_norm": 0.283203125, "learning_rate": 4.703147353361946e-06, "loss": 2.0035, "step": 2630 }, { "epoch": 0.1411480686695279, "grad_norm": 0.294921875, "learning_rate": 4.704935622317597e-06, "loss": 2.4976, "step": 2631 }, { "epoch": 0.14120171673819742, "grad_norm": 0.294921875, "learning_rate": 4.7067238912732475e-06, "loss": 2.3313, "step": 2632 }, { "epoch": 0.14125536480686696, "grad_norm": 0.345703125, "learning_rate": 4.708512160228899e-06, "loss": 2.3224, "step": 2633 }, { "epoch": 0.1413090128755365, "grad_norm": 0.3046875, "learning_rate": 4.71030042918455e-06, "loss": 2.365, "step": 2634 }, { "epoch": 0.14136266094420602, "grad_norm": 0.349609375, "learning_rate": 4.7120886981402005e-06, "loss": 2.1, "step": 2635 }, { "epoch": 0.14141630901287552, "grad_norm": 0.400390625, "learning_rate": 4.713876967095852e-06, "loss": 2.746, "step": 2636 }, { "epoch": 0.14146995708154506, "grad_norm": 0.421875, "learning_rate": 4.715665236051502e-06, "loss": 1.5985, "step": 2637 }, { "epoch": 0.1415236051502146, "grad_norm": 0.287109375, "learning_rate": 4.7174535050071536e-06, "loss": 2.3276, "step": 2638 }, { "epoch": 0.14157725321888412, "grad_norm": 0.30078125, "learning_rate": 4.719241773962804e-06, "loss": 2.4061, "step": 2639 }, { "epoch": 0.14163090128755365, "grad_norm": 0.2890625, "learning_rate": 4.721030042918455e-06, "loss": 2.2149, "step": 2640 }, { "epoch": 0.14168454935622318, "grad_norm": 0.310546875, "learning_rate": 4.722818311874106e-06, "loss": 2.3146, "step": 2641 }, { "epoch": 0.14173819742489271, "grad_norm": 0.28125, "learning_rate": 4.724606580829757e-06, "loss": 2.1986, "step": 2642 }, { "epoch": 0.14179184549356222, "grad_norm": 0.423828125, "learning_rate": 4.7263948497854075e-06, "loss": 2.5118, "step": 2643 }, { "epoch": 0.14184549356223175, "grad_norm": 0.375, "learning_rate": 4.72818311874106e-06, "loss": 2.3738, "step": 2644 }, { "epoch": 0.14189914163090128, "grad_norm": 0.306640625, "learning_rate": 4.72997138769671e-06, "loss": 2.3896, "step": 2645 }, { "epoch": 0.1419527896995708, "grad_norm": 0.59765625, "learning_rate": 4.731759656652361e-06, "loss": 2.1322, "step": 2646 }, { "epoch": 0.14200643776824035, "grad_norm": 0.462890625, "learning_rate": 4.733547925608012e-06, "loss": 1.6399, "step": 2647 }, { "epoch": 0.14206008583690988, "grad_norm": 0.369140625, "learning_rate": 4.735336194563663e-06, "loss": 2.3551, "step": 2648 }, { "epoch": 0.1421137339055794, "grad_norm": 0.341796875, "learning_rate": 4.737124463519314e-06, "loss": 2.254, "step": 2649 }, { "epoch": 0.14216738197424894, "grad_norm": 0.30078125, "learning_rate": 4.738912732474965e-06, "loss": 2.3137, "step": 2650 }, { "epoch": 0.14222103004291844, "grad_norm": 0.59765625, "learning_rate": 4.740701001430615e-06, "loss": 2.1373, "step": 2651 }, { "epoch": 0.14227467811158798, "grad_norm": 0.3515625, "learning_rate": 4.742489270386267e-06, "loss": 2.1213, "step": 2652 }, { "epoch": 0.1423283261802575, "grad_norm": 0.310546875, "learning_rate": 4.744277539341917e-06, "loss": 2.1753, "step": 2653 }, { "epoch": 0.14238197424892704, "grad_norm": 0.640625, "learning_rate": 4.746065808297568e-06, "loss": 2.2534, "step": 2654 }, { "epoch": 0.14243562231759657, "grad_norm": 0.466796875, "learning_rate": 4.74785407725322e-06, "loss": 2.2368, "step": 2655 }, { "epoch": 0.1424892703862661, "grad_norm": 0.412109375, "learning_rate": 4.74964234620887e-06, "loss": 2.4112, "step": 2656 }, { "epoch": 0.14254291845493564, "grad_norm": 0.287109375, "learning_rate": 4.7514306151645215e-06, "loss": 2.1501, "step": 2657 }, { "epoch": 0.14259656652360514, "grad_norm": 0.330078125, "learning_rate": 4.753218884120172e-06, "loss": 2.2626, "step": 2658 }, { "epoch": 0.14265021459227467, "grad_norm": 0.259765625, "learning_rate": 4.755007153075823e-06, "loss": 2.3156, "step": 2659 }, { "epoch": 0.1427038626609442, "grad_norm": 0.36328125, "learning_rate": 4.756795422031474e-06, "loss": 2.2897, "step": 2660 }, { "epoch": 0.14275751072961373, "grad_norm": 0.294921875, "learning_rate": 4.758583690987125e-06, "loss": 2.3294, "step": 2661 }, { "epoch": 0.14281115879828327, "grad_norm": 0.30859375, "learning_rate": 4.7603719599427754e-06, "loss": 2.5314, "step": 2662 }, { "epoch": 0.1428648068669528, "grad_norm": 0.330078125, "learning_rate": 4.762160228898427e-06, "loss": 2.425, "step": 2663 }, { "epoch": 0.14291845493562233, "grad_norm": 0.3984375, "learning_rate": 4.763948497854078e-06, "loss": 2.5369, "step": 2664 }, { "epoch": 0.14297210300429183, "grad_norm": 0.6640625, "learning_rate": 4.7657367668097285e-06, "loss": 2.3594, "step": 2665 }, { "epoch": 0.14302575107296137, "grad_norm": 2.109375, "learning_rate": 4.76752503576538e-06, "loss": 2.2354, "step": 2666 }, { "epoch": 0.1430793991416309, "grad_norm": 0.29296875, "learning_rate": 4.76931330472103e-06, "loss": 2.3803, "step": 2667 }, { "epoch": 0.14313304721030043, "grad_norm": 0.357421875, "learning_rate": 4.7711015736766815e-06, "loss": 2.3382, "step": 2668 }, { "epoch": 0.14318669527896996, "grad_norm": 0.310546875, "learning_rate": 4.772889842632332e-06, "loss": 2.393, "step": 2669 }, { "epoch": 0.1432403433476395, "grad_norm": 0.3125, "learning_rate": 4.774678111587983e-06, "loss": 2.3854, "step": 2670 }, { "epoch": 0.14329399141630902, "grad_norm": 0.3359375, "learning_rate": 4.776466380543634e-06, "loss": 2.3227, "step": 2671 }, { "epoch": 0.14334763948497853, "grad_norm": 0.328125, "learning_rate": 4.778254649499285e-06, "loss": 2.2748, "step": 2672 }, { "epoch": 0.14340128755364806, "grad_norm": 0.318359375, "learning_rate": 4.780042918454936e-06, "loss": 2.2692, "step": 2673 }, { "epoch": 0.1434549356223176, "grad_norm": 0.296875, "learning_rate": 4.781831187410587e-06, "loss": 2.3911, "step": 2674 }, { "epoch": 0.14350858369098712, "grad_norm": 0.341796875, "learning_rate": 4.783619456366238e-06, "loss": 2.4411, "step": 2675 }, { "epoch": 0.14356223175965666, "grad_norm": 0.306640625, "learning_rate": 4.7854077253218885e-06, "loss": 2.4293, "step": 2676 }, { "epoch": 0.1436158798283262, "grad_norm": 0.498046875, "learning_rate": 4.78719599427754e-06, "loss": 2.3404, "step": 2677 }, { "epoch": 0.14366952789699572, "grad_norm": 0.291015625, "learning_rate": 4.78898426323319e-06, "loss": 2.4107, "step": 2678 }, { "epoch": 0.14372317596566522, "grad_norm": 0.375, "learning_rate": 4.790772532188842e-06, "loss": 2.3747, "step": 2679 }, { "epoch": 0.14377682403433475, "grad_norm": 0.3046875, "learning_rate": 4.792560801144492e-06, "loss": 2.3006, "step": 2680 }, { "epoch": 0.1438304721030043, "grad_norm": 0.33203125, "learning_rate": 4.794349070100143e-06, "loss": 2.4459, "step": 2681 }, { "epoch": 0.14388412017167382, "grad_norm": 0.353515625, "learning_rate": 4.796137339055794e-06, "loss": 2.3547, "step": 2682 }, { "epoch": 0.14393776824034335, "grad_norm": 0.3125, "learning_rate": 4.797925608011446e-06, "loss": 2.4161, "step": 2683 }, { "epoch": 0.14399141630901288, "grad_norm": 0.326171875, "learning_rate": 4.799713876967096e-06, "loss": 2.3509, "step": 2684 }, { "epoch": 0.1440450643776824, "grad_norm": 0.421875, "learning_rate": 4.801502145922748e-06, "loss": 2.3247, "step": 2685 }, { "epoch": 0.14409871244635192, "grad_norm": 0.326171875, "learning_rate": 4.803290414878398e-06, "loss": 2.2587, "step": 2686 }, { "epoch": 0.14415236051502145, "grad_norm": 0.341796875, "learning_rate": 4.8050786838340494e-06, "loss": 2.5253, "step": 2687 }, { "epoch": 0.14420600858369098, "grad_norm": 0.349609375, "learning_rate": 4.8068669527897e-06, "loss": 2.3822, "step": 2688 }, { "epoch": 0.1442596566523605, "grad_norm": 0.337890625, "learning_rate": 4.808655221745351e-06, "loss": 2.0962, "step": 2689 }, { "epoch": 0.14431330472103004, "grad_norm": 0.404296875, "learning_rate": 4.810443490701002e-06, "loss": 2.5429, "step": 2690 }, { "epoch": 0.14436695278969958, "grad_norm": 0.4375, "learning_rate": 4.812231759656653e-06, "loss": 2.4205, "step": 2691 }, { "epoch": 0.1444206008583691, "grad_norm": 0.322265625, "learning_rate": 4.814020028612303e-06, "loss": 2.3183, "step": 2692 }, { "epoch": 0.14447424892703864, "grad_norm": 0.5625, "learning_rate": 4.815808297567955e-06, "loss": 2.3242, "step": 2693 }, { "epoch": 0.14452789699570814, "grad_norm": 0.3046875, "learning_rate": 4.817596566523606e-06, "loss": 2.2377, "step": 2694 }, { "epoch": 0.14458154506437768, "grad_norm": 0.345703125, "learning_rate": 4.8193848354792564e-06, "loss": 2.4881, "step": 2695 }, { "epoch": 0.1446351931330472, "grad_norm": 0.404296875, "learning_rate": 4.821173104434908e-06, "loss": 2.4473, "step": 2696 }, { "epoch": 0.14468884120171674, "grad_norm": 0.328125, "learning_rate": 4.822961373390558e-06, "loss": 2.3599, "step": 2697 }, { "epoch": 0.14474248927038627, "grad_norm": 0.41015625, "learning_rate": 4.8247496423462095e-06, "loss": 2.3837, "step": 2698 }, { "epoch": 0.1447961373390558, "grad_norm": 0.345703125, "learning_rate": 4.82653791130186e-06, "loss": 1.5673, "step": 2699 }, { "epoch": 0.14484978540772533, "grad_norm": 0.345703125, "learning_rate": 4.828326180257511e-06, "loss": 1.7055, "step": 2700 }, { "epoch": 0.14490343347639484, "grad_norm": 0.3046875, "learning_rate": 4.830114449213162e-06, "loss": 2.5134, "step": 2701 }, { "epoch": 0.14495708154506437, "grad_norm": 1.1015625, "learning_rate": 4.831902718168813e-06, "loss": 2.5049, "step": 2702 }, { "epoch": 0.1450107296137339, "grad_norm": 0.314453125, "learning_rate": 4.833690987124464e-06, "loss": 2.5554, "step": 2703 }, { "epoch": 0.14506437768240343, "grad_norm": 0.353515625, "learning_rate": 4.835479256080115e-06, "loss": 2.3257, "step": 2704 }, { "epoch": 0.14511802575107297, "grad_norm": 0.2734375, "learning_rate": 4.837267525035766e-06, "loss": 2.2938, "step": 2705 }, { "epoch": 0.1451716738197425, "grad_norm": 0.314453125, "learning_rate": 4.8390557939914165e-06, "loss": 2.3984, "step": 2706 }, { "epoch": 0.14522532188841203, "grad_norm": 0.333984375, "learning_rate": 4.840844062947068e-06, "loss": 2.353, "step": 2707 }, { "epoch": 0.14527896995708153, "grad_norm": 0.318359375, "learning_rate": 4.842632331902718e-06, "loss": 2.5066, "step": 2708 }, { "epoch": 0.14533261802575106, "grad_norm": 0.298828125, "learning_rate": 4.8444206008583696e-06, "loss": 2.2283, "step": 2709 }, { "epoch": 0.1453862660944206, "grad_norm": 0.271484375, "learning_rate": 4.84620886981402e-06, "loss": 2.2358, "step": 2710 }, { "epoch": 0.14543991416309013, "grad_norm": 0.3125, "learning_rate": 4.847997138769671e-06, "loss": 2.3584, "step": 2711 }, { "epoch": 0.14549356223175966, "grad_norm": 0.287109375, "learning_rate": 4.849785407725322e-06, "loss": 2.4151, "step": 2712 }, { "epoch": 0.1455472103004292, "grad_norm": 0.376953125, "learning_rate": 4.851573676680973e-06, "loss": 2.3095, "step": 2713 }, { "epoch": 0.14560085836909872, "grad_norm": 0.28515625, "learning_rate": 4.853361945636624e-06, "loss": 2.233, "step": 2714 }, { "epoch": 0.14565450643776823, "grad_norm": 0.341796875, "learning_rate": 4.855150214592275e-06, "loss": 2.6101, "step": 2715 }, { "epoch": 0.14570815450643776, "grad_norm": 0.330078125, "learning_rate": 4.856938483547926e-06, "loss": 2.2529, "step": 2716 }, { "epoch": 0.1457618025751073, "grad_norm": 1.0859375, "learning_rate": 4.8587267525035766e-06, "loss": 2.5046, "step": 2717 }, { "epoch": 0.14581545064377682, "grad_norm": 0.33984375, "learning_rate": 4.860515021459228e-06, "loss": 2.3408, "step": 2718 }, { "epoch": 0.14586909871244635, "grad_norm": 0.34375, "learning_rate": 4.862303290414878e-06, "loss": 2.3857, "step": 2719 }, { "epoch": 0.1459227467811159, "grad_norm": 0.375, "learning_rate": 4.86409155937053e-06, "loss": 2.4297, "step": 2720 }, { "epoch": 0.14597639484978542, "grad_norm": 0.400390625, "learning_rate": 4.86587982832618e-06, "loss": 2.3854, "step": 2721 }, { "epoch": 0.14603004291845492, "grad_norm": 0.396484375, "learning_rate": 4.867668097281831e-06, "loss": 2.5682, "step": 2722 }, { "epoch": 0.14608369098712445, "grad_norm": 0.32421875, "learning_rate": 4.869456366237483e-06, "loss": 2.361, "step": 2723 }, { "epoch": 0.14613733905579399, "grad_norm": 0.400390625, "learning_rate": 4.871244635193134e-06, "loss": 2.3099, "step": 2724 }, { "epoch": 0.14619098712446352, "grad_norm": 0.26171875, "learning_rate": 4.873032904148784e-06, "loss": 2.2004, "step": 2725 }, { "epoch": 0.14624463519313305, "grad_norm": 0.314453125, "learning_rate": 4.874821173104436e-06, "loss": 2.1678, "step": 2726 }, { "epoch": 0.14629828326180258, "grad_norm": 0.310546875, "learning_rate": 4.876609442060086e-06, "loss": 2.1168, "step": 2727 }, { "epoch": 0.1463519313304721, "grad_norm": 0.44140625, "learning_rate": 4.8783977110157375e-06, "loss": 2.2041, "step": 2728 }, { "epoch": 0.14640557939914164, "grad_norm": 0.314453125, "learning_rate": 4.880185979971388e-06, "loss": 2.3005, "step": 2729 }, { "epoch": 0.14645922746781115, "grad_norm": 0.291015625, "learning_rate": 4.881974248927039e-06, "loss": 2.4552, "step": 2730 }, { "epoch": 0.14651287553648068, "grad_norm": 0.2890625, "learning_rate": 4.88376251788269e-06, "loss": 2.4796, "step": 2731 }, { "epoch": 0.1465665236051502, "grad_norm": 0.365234375, "learning_rate": 4.885550786838341e-06, "loss": 2.2024, "step": 2732 }, { "epoch": 0.14662017167381974, "grad_norm": 0.3203125, "learning_rate": 4.887339055793992e-06, "loss": 2.4015, "step": 2733 }, { "epoch": 0.14667381974248928, "grad_norm": 0.310546875, "learning_rate": 4.889127324749643e-06, "loss": 2.3284, "step": 2734 }, { "epoch": 0.1467274678111588, "grad_norm": 0.341796875, "learning_rate": 4.890915593705294e-06, "loss": 2.1558, "step": 2735 }, { "epoch": 0.14678111587982834, "grad_norm": 0.73046875, "learning_rate": 4.8927038626609445e-06, "loss": 2.5159, "step": 2736 }, { "epoch": 0.14683476394849784, "grad_norm": 0.337890625, "learning_rate": 4.894492131616596e-06, "loss": 2.2842, "step": 2737 }, { "epoch": 0.14688841201716737, "grad_norm": 0.328125, "learning_rate": 4.896280400572246e-06, "loss": 2.5236, "step": 2738 }, { "epoch": 0.1469420600858369, "grad_norm": 0.30859375, "learning_rate": 4.8980686695278975e-06, "loss": 2.2813, "step": 2739 }, { "epoch": 0.14699570815450644, "grad_norm": 0.25390625, "learning_rate": 4.899856938483548e-06, "loss": 2.1114, "step": 2740 }, { "epoch": 0.14704935622317597, "grad_norm": 0.365234375, "learning_rate": 4.901645207439199e-06, "loss": 2.1121, "step": 2741 }, { "epoch": 0.1471030042918455, "grad_norm": 0.3046875, "learning_rate": 4.9034334763948506e-06, "loss": 2.4642, "step": 2742 }, { "epoch": 0.14715665236051503, "grad_norm": 0.423828125, "learning_rate": 4.905221745350501e-06, "loss": 2.3189, "step": 2743 }, { "epoch": 0.14721030042918454, "grad_norm": 0.357421875, "learning_rate": 4.907010014306152e-06, "loss": 2.4125, "step": 2744 }, { "epoch": 0.14726394849785407, "grad_norm": 0.88671875, "learning_rate": 4.908798283261803e-06, "loss": 2.0357, "step": 2745 }, { "epoch": 0.1473175965665236, "grad_norm": 0.32421875, "learning_rate": 4.910586552217454e-06, "loss": 2.532, "step": 2746 }, { "epoch": 0.14737124463519313, "grad_norm": 0.296875, "learning_rate": 4.9123748211731045e-06, "loss": 2.2029, "step": 2747 }, { "epoch": 0.14742489270386266, "grad_norm": 0.384765625, "learning_rate": 4.914163090128756e-06, "loss": 2.601, "step": 2748 }, { "epoch": 0.1474785407725322, "grad_norm": 0.314453125, "learning_rate": 4.915951359084406e-06, "loss": 2.0479, "step": 2749 }, { "epoch": 0.14753218884120173, "grad_norm": 0.3203125, "learning_rate": 4.917739628040058e-06, "loss": 2.4917, "step": 2750 }, { "epoch": 0.14758583690987123, "grad_norm": 0.345703125, "learning_rate": 4.919527896995708e-06, "loss": 2.2418, "step": 2751 }, { "epoch": 0.14763948497854076, "grad_norm": 0.34375, "learning_rate": 4.921316165951359e-06, "loss": 2.196, "step": 2752 }, { "epoch": 0.1476931330472103, "grad_norm": 0.390625, "learning_rate": 4.923104434907011e-06, "loss": 2.4146, "step": 2753 }, { "epoch": 0.14774678111587983, "grad_norm": 0.33203125, "learning_rate": 4.924892703862661e-06, "loss": 2.2549, "step": 2754 }, { "epoch": 0.14780042918454936, "grad_norm": 0.251953125, "learning_rate": 4.926680972818312e-06, "loss": 2.1658, "step": 2755 }, { "epoch": 0.1478540772532189, "grad_norm": 0.435546875, "learning_rate": 4.928469241773963e-06, "loss": 2.4253, "step": 2756 }, { "epoch": 0.14790772532188842, "grad_norm": 0.3984375, "learning_rate": 4.930257510729614e-06, "loss": 2.4405, "step": 2757 }, { "epoch": 0.14796137339055793, "grad_norm": 0.5390625, "learning_rate": 4.932045779685265e-06, "loss": 2.3129, "step": 2758 }, { "epoch": 0.14801502145922746, "grad_norm": 0.322265625, "learning_rate": 4.933834048640916e-06, "loss": 2.3697, "step": 2759 }, { "epoch": 0.148068669527897, "grad_norm": 0.314453125, "learning_rate": 4.935622317596566e-06, "loss": 2.3762, "step": 2760 }, { "epoch": 0.14812231759656652, "grad_norm": 0.384765625, "learning_rate": 4.937410586552218e-06, "loss": 2.5149, "step": 2761 }, { "epoch": 0.14817596566523605, "grad_norm": 0.5390625, "learning_rate": 4.939198855507869e-06, "loss": 2.2073, "step": 2762 }, { "epoch": 0.14822961373390559, "grad_norm": 0.390625, "learning_rate": 4.94098712446352e-06, "loss": 2.5632, "step": 2763 }, { "epoch": 0.14828326180257512, "grad_norm": 0.314453125, "learning_rate": 4.942775393419171e-06, "loss": 2.2833, "step": 2764 }, { "epoch": 0.14833690987124465, "grad_norm": 0.294921875, "learning_rate": 4.944563662374822e-06, "loss": 2.4316, "step": 2765 }, { "epoch": 0.14839055793991415, "grad_norm": 0.455078125, "learning_rate": 4.9463519313304724e-06, "loss": 2.4265, "step": 2766 }, { "epoch": 0.14844420600858368, "grad_norm": 0.306640625, "learning_rate": 4.948140200286124e-06, "loss": 2.2723, "step": 2767 }, { "epoch": 0.14849785407725322, "grad_norm": 0.283203125, "learning_rate": 4.949928469241774e-06, "loss": 2.3607, "step": 2768 }, { "epoch": 0.14855150214592275, "grad_norm": 0.361328125, "learning_rate": 4.9517167381974255e-06, "loss": 2.5423, "step": 2769 }, { "epoch": 0.14860515021459228, "grad_norm": 0.318359375, "learning_rate": 4.953505007153076e-06, "loss": 2.6443, "step": 2770 }, { "epoch": 0.1486587982832618, "grad_norm": 0.306640625, "learning_rate": 4.955293276108727e-06, "loss": 2.3618, "step": 2771 }, { "epoch": 0.14871244635193134, "grad_norm": 0.306640625, "learning_rate": 4.9570815450643785e-06, "loss": 2.1007, "step": 2772 }, { "epoch": 0.14876609442060085, "grad_norm": 0.3203125, "learning_rate": 4.958869814020029e-06, "loss": 2.4026, "step": 2773 }, { "epoch": 0.14881974248927038, "grad_norm": 0.53125, "learning_rate": 4.96065808297568e-06, "loss": 2.4717, "step": 2774 }, { "epoch": 0.1488733905579399, "grad_norm": 0.765625, "learning_rate": 4.962446351931331e-06, "loss": 2.331, "step": 2775 }, { "epoch": 0.14892703862660944, "grad_norm": 0.333984375, "learning_rate": 4.964234620886982e-06, "loss": 2.4633, "step": 2776 }, { "epoch": 0.14898068669527897, "grad_norm": 0.412109375, "learning_rate": 4.9660228898426325e-06, "loss": 2.3683, "step": 2777 }, { "epoch": 0.1490343347639485, "grad_norm": 0.50390625, "learning_rate": 4.967811158798284e-06, "loss": 2.324, "step": 2778 }, { "epoch": 0.14908798283261804, "grad_norm": 0.275390625, "learning_rate": 4.969599427753934e-06, "loss": 2.0219, "step": 2779 }, { "epoch": 0.14914163090128754, "grad_norm": 0.4296875, "learning_rate": 4.9713876967095855e-06, "loss": 2.6862, "step": 2780 }, { "epoch": 0.14919527896995707, "grad_norm": 0.431640625, "learning_rate": 4.973175965665236e-06, "loss": 2.3303, "step": 2781 }, { "epoch": 0.1492489270386266, "grad_norm": 0.298828125, "learning_rate": 4.974964234620887e-06, "loss": 2.3672, "step": 2782 }, { "epoch": 0.14930257510729614, "grad_norm": 0.322265625, "learning_rate": 4.976752503576539e-06, "loss": 2.3249, "step": 2783 }, { "epoch": 0.14935622317596567, "grad_norm": 0.36328125, "learning_rate": 4.978540772532189e-06, "loss": 2.5729, "step": 2784 }, { "epoch": 0.1494098712446352, "grad_norm": 0.353515625, "learning_rate": 4.98032904148784e-06, "loss": 1.9681, "step": 2785 }, { "epoch": 0.14946351931330473, "grad_norm": 0.359375, "learning_rate": 4.982117310443491e-06, "loss": 2.4205, "step": 2786 }, { "epoch": 0.14951716738197424, "grad_norm": 0.421875, "learning_rate": 4.983905579399142e-06, "loss": 2.3927, "step": 2787 }, { "epoch": 0.14957081545064377, "grad_norm": 0.322265625, "learning_rate": 4.9856938483547926e-06, "loss": 2.3645, "step": 2788 }, { "epoch": 0.1496244635193133, "grad_norm": 0.7578125, "learning_rate": 4.987482117310444e-06, "loss": 2.4078, "step": 2789 }, { "epoch": 0.14967811158798283, "grad_norm": 0.36328125, "learning_rate": 4.989270386266094e-06, "loss": 2.5312, "step": 2790 }, { "epoch": 0.14973175965665236, "grad_norm": 0.400390625, "learning_rate": 4.991058655221746e-06, "loss": 2.4083, "step": 2791 }, { "epoch": 0.1497854077253219, "grad_norm": 0.375, "learning_rate": 4.992846924177397e-06, "loss": 2.3295, "step": 2792 }, { "epoch": 0.14983905579399143, "grad_norm": 2.703125, "learning_rate": 4.994635193133048e-06, "loss": 2.3189, "step": 2793 }, { "epoch": 0.14989270386266093, "grad_norm": 0.30078125, "learning_rate": 4.996423462088699e-06, "loss": 2.552, "step": 2794 }, { "epoch": 0.14994635193133046, "grad_norm": 0.357421875, "learning_rate": 4.99821173104435e-06, "loss": 2.4193, "step": 2795 }, { "epoch": 0.15, "grad_norm": 0.43359375, "learning_rate": 5e-06, "loss": 2.5891, "step": 2796 }, { "epoch": 0.15005364806866953, "grad_norm": 0.51171875, "learning_rate": 4.999999998490495e-06, "loss": 2.5787, "step": 2797 }, { "epoch": 0.15010729613733906, "grad_norm": 0.3125, "learning_rate": 4.999999993961981e-06, "loss": 2.2645, "step": 2798 }, { "epoch": 0.1501609442060086, "grad_norm": 0.314453125, "learning_rate": 4.9999999864144584e-06, "loss": 2.628, "step": 2799 }, { "epoch": 0.15021459227467812, "grad_norm": 0.369140625, "learning_rate": 4.999999975847926e-06, "loss": 2.4222, "step": 2800 }, { "epoch": 0.15026824034334765, "grad_norm": 0.392578125, "learning_rate": 4.999999962262384e-06, "loss": 1.6503, "step": 2801 }, { "epoch": 0.15032188841201716, "grad_norm": 0.42578125, "learning_rate": 4.999999945657832e-06, "loss": 2.2628, "step": 2802 }, { "epoch": 0.1503755364806867, "grad_norm": 0.373046875, "learning_rate": 4.999999926034271e-06, "loss": 2.2423, "step": 2803 }, { "epoch": 0.15042918454935622, "grad_norm": 0.34765625, "learning_rate": 4.999999903391702e-06, "loss": 2.2874, "step": 2804 }, { "epoch": 0.15048283261802575, "grad_norm": 0.318359375, "learning_rate": 4.999999877730122e-06, "loss": 2.0837, "step": 2805 }, { "epoch": 0.15053648068669528, "grad_norm": 0.333984375, "learning_rate": 4.999999849049534e-06, "loss": 2.3925, "step": 2806 }, { "epoch": 0.15059012875536482, "grad_norm": 0.279296875, "learning_rate": 4.999999817349936e-06, "loss": 2.3559, "step": 2807 }, { "epoch": 0.15064377682403435, "grad_norm": 0.298828125, "learning_rate": 4.999999782631329e-06, "loss": 2.3486, "step": 2808 }, { "epoch": 0.15069742489270385, "grad_norm": 0.30859375, "learning_rate": 4.999999744893713e-06, "loss": 2.22, "step": 2809 }, { "epoch": 0.15075107296137338, "grad_norm": 0.3984375, "learning_rate": 4.999999704137088e-06, "loss": 2.1162, "step": 2810 }, { "epoch": 0.15080472103004292, "grad_norm": 0.275390625, "learning_rate": 4.999999660361454e-06, "loss": 2.3627, "step": 2811 }, { "epoch": 0.15085836909871245, "grad_norm": 0.57421875, "learning_rate": 4.999999613566811e-06, "loss": 2.297, "step": 2812 }, { "epoch": 0.15091201716738198, "grad_norm": 0.328125, "learning_rate": 4.9999995637531595e-06, "loss": 2.4804, "step": 2813 }, { "epoch": 0.1509656652360515, "grad_norm": 0.30078125, "learning_rate": 4.999999510920498e-06, "loss": 2.1224, "step": 2814 }, { "epoch": 0.15101931330472104, "grad_norm": 0.28515625, "learning_rate": 4.999999455068829e-06, "loss": 2.4671, "step": 2815 }, { "epoch": 0.15107296137339055, "grad_norm": 0.32421875, "learning_rate": 4.999999396198151e-06, "loss": 2.365, "step": 2816 }, { "epoch": 0.15112660944206008, "grad_norm": 0.462890625, "learning_rate": 4.999999334308464e-06, "loss": 2.3896, "step": 2817 }, { "epoch": 0.1511802575107296, "grad_norm": 0.287109375, "learning_rate": 4.999999269399769e-06, "loss": 2.3185, "step": 2818 }, { "epoch": 0.15123390557939914, "grad_norm": 0.7421875, "learning_rate": 4.9999992014720645e-06, "loss": 2.264, "step": 2819 }, { "epoch": 0.15128755364806867, "grad_norm": 0.388671875, "learning_rate": 4.999999130525352e-06, "loss": 2.521, "step": 2820 }, { "epoch": 0.1513412017167382, "grad_norm": 0.306640625, "learning_rate": 4.999999056559632e-06, "loss": 2.274, "step": 2821 }, { "epoch": 0.15139484978540774, "grad_norm": 0.34375, "learning_rate": 4.999998979574903e-06, "loss": 2.2192, "step": 2822 }, { "epoch": 0.15144849785407724, "grad_norm": 0.34765625, "learning_rate": 4.999998899571166e-06, "loss": 2.3761, "step": 2823 }, { "epoch": 0.15150214592274677, "grad_norm": 0.34765625, "learning_rate": 4.999998816548421e-06, "loss": 2.2751, "step": 2824 }, { "epoch": 0.1515557939914163, "grad_norm": 0.318359375, "learning_rate": 4.999998730506668e-06, "loss": 2.3395, "step": 2825 }, { "epoch": 0.15160944206008584, "grad_norm": 0.322265625, "learning_rate": 4.999998641445907e-06, "loss": 2.1313, "step": 2826 }, { "epoch": 0.15166309012875537, "grad_norm": 0.333984375, "learning_rate": 4.999998549366139e-06, "loss": 2.3032, "step": 2827 }, { "epoch": 0.1517167381974249, "grad_norm": 0.42578125, "learning_rate": 4.999998454267363e-06, "loss": 2.3531, "step": 2828 }, { "epoch": 0.15177038626609443, "grad_norm": 0.3359375, "learning_rate": 4.999998356149579e-06, "loss": 2.2538, "step": 2829 }, { "epoch": 0.15182403433476394, "grad_norm": 0.353515625, "learning_rate": 4.999998255012788e-06, "loss": 2.3843, "step": 2830 }, { "epoch": 0.15187768240343347, "grad_norm": 0.3828125, "learning_rate": 4.99999815085699e-06, "loss": 2.5385, "step": 2831 }, { "epoch": 0.151931330472103, "grad_norm": 0.33203125, "learning_rate": 4.9999980436821844e-06, "loss": 2.0365, "step": 2832 }, { "epoch": 0.15198497854077253, "grad_norm": 0.353515625, "learning_rate": 4.999997933488372e-06, "loss": 2.2918, "step": 2833 }, { "epoch": 0.15203862660944206, "grad_norm": 0.423828125, "learning_rate": 4.999997820275553e-06, "loss": 2.298, "step": 2834 }, { "epoch": 0.1520922746781116, "grad_norm": 0.283203125, "learning_rate": 4.999997704043726e-06, "loss": 2.138, "step": 2835 }, { "epoch": 0.15214592274678113, "grad_norm": 0.333984375, "learning_rate": 4.999997584792894e-06, "loss": 2.3109, "step": 2836 }, { "epoch": 0.15219957081545063, "grad_norm": 0.421875, "learning_rate": 4.9999974625230554e-06, "loss": 2.1306, "step": 2837 }, { "epoch": 0.15225321888412016, "grad_norm": 0.419921875, "learning_rate": 4.999997337234209e-06, "loss": 2.3245, "step": 2838 }, { "epoch": 0.1523068669527897, "grad_norm": 0.326171875, "learning_rate": 4.999997208926358e-06, "loss": 2.445, "step": 2839 }, { "epoch": 0.15236051502145923, "grad_norm": 0.294921875, "learning_rate": 4.9999970775995e-06, "loss": 2.4067, "step": 2840 }, { "epoch": 0.15241416309012876, "grad_norm": 0.32421875, "learning_rate": 4.999996943253636e-06, "loss": 2.4876, "step": 2841 }, { "epoch": 0.1524678111587983, "grad_norm": 0.35546875, "learning_rate": 4.9999968058887685e-06, "loss": 2.2376, "step": 2842 }, { "epoch": 0.15252145922746782, "grad_norm": 0.35546875, "learning_rate": 4.999996665504894e-06, "loss": 2.5607, "step": 2843 }, { "epoch": 0.15257510729613735, "grad_norm": 0.375, "learning_rate": 4.999996522102013e-06, "loss": 2.3409, "step": 2844 }, { "epoch": 0.15262875536480686, "grad_norm": 0.267578125, "learning_rate": 4.999996375680128e-06, "loss": 2.0361, "step": 2845 }, { "epoch": 0.1526824034334764, "grad_norm": 0.3125, "learning_rate": 4.999996226239239e-06, "loss": 2.2849, "step": 2846 }, { "epoch": 0.15273605150214592, "grad_norm": 0.79296875, "learning_rate": 4.999996073779343e-06, "loss": 2.3811, "step": 2847 }, { "epoch": 0.15278969957081545, "grad_norm": 0.359375, "learning_rate": 4.9999959183004445e-06, "loss": 2.5932, "step": 2848 }, { "epoch": 0.15284334763948498, "grad_norm": 0.28125, "learning_rate": 4.99999575980254e-06, "loss": 1.9496, "step": 2849 }, { "epoch": 0.15289699570815452, "grad_norm": 0.322265625, "learning_rate": 4.999995598285632e-06, "loss": 2.4133, "step": 2850 }, { "epoch": 0.15295064377682405, "grad_norm": 0.333984375, "learning_rate": 4.999995433749719e-06, "loss": 2.3756, "step": 2851 }, { "epoch": 0.15300429184549355, "grad_norm": 0.302734375, "learning_rate": 4.999995266194804e-06, "loss": 2.4075, "step": 2852 }, { "epoch": 0.15305793991416308, "grad_norm": 0.484375, "learning_rate": 4.999995095620884e-06, "loss": 2.4858, "step": 2853 }, { "epoch": 0.15311158798283261, "grad_norm": 0.318359375, "learning_rate": 4.99999492202796e-06, "loss": 2.4783, "step": 2854 }, { "epoch": 0.15316523605150215, "grad_norm": 0.7890625, "learning_rate": 4.999994745416033e-06, "loss": 2.1558, "step": 2855 }, { "epoch": 0.15321888412017168, "grad_norm": 0.30859375, "learning_rate": 4.999994565785105e-06, "loss": 2.2787, "step": 2856 }, { "epoch": 0.1532725321888412, "grad_norm": 0.318359375, "learning_rate": 4.999994383135172e-06, "loss": 2.2524, "step": 2857 }, { "epoch": 0.15332618025751074, "grad_norm": 0.345703125, "learning_rate": 4.999994197466238e-06, "loss": 2.2235, "step": 2858 }, { "epoch": 0.15337982832618025, "grad_norm": 0.388671875, "learning_rate": 4.9999940087783e-06, "loss": 2.1485, "step": 2859 }, { "epoch": 0.15343347639484978, "grad_norm": 0.4609375, "learning_rate": 4.999993817071361e-06, "loss": 2.3396, "step": 2860 }, { "epoch": 0.1534871244635193, "grad_norm": 2.984375, "learning_rate": 4.9999936223454195e-06, "loss": 2.4769, "step": 2861 }, { "epoch": 0.15354077253218884, "grad_norm": 0.6015625, "learning_rate": 4.999993424600476e-06, "loss": 2.4733, "step": 2862 }, { "epoch": 0.15359442060085837, "grad_norm": 0.431640625, "learning_rate": 4.999993223836532e-06, "loss": 2.0257, "step": 2863 }, { "epoch": 0.1536480686695279, "grad_norm": 0.55078125, "learning_rate": 4.999993020053587e-06, "loss": 2.4673, "step": 2864 }, { "epoch": 0.15370171673819744, "grad_norm": 0.3984375, "learning_rate": 4.9999928132516404e-06, "loss": 2.5639, "step": 2865 }, { "epoch": 0.15375536480686694, "grad_norm": 0.28515625, "learning_rate": 4.999992603430693e-06, "loss": 2.1499, "step": 2866 }, { "epoch": 0.15380901287553647, "grad_norm": 0.35546875, "learning_rate": 4.9999923905907455e-06, "loss": 2.4858, "step": 2867 }, { "epoch": 0.153862660944206, "grad_norm": 0.333984375, "learning_rate": 4.999992174731798e-06, "loss": 2.4142, "step": 2868 }, { "epoch": 0.15391630901287554, "grad_norm": 0.353515625, "learning_rate": 4.99999195585385e-06, "loss": 2.054, "step": 2869 }, { "epoch": 0.15396995708154507, "grad_norm": 0.345703125, "learning_rate": 4.999991733956902e-06, "loss": 2.0872, "step": 2870 }, { "epoch": 0.1540236051502146, "grad_norm": 0.359375, "learning_rate": 4.999991509040956e-06, "loss": 2.323, "step": 2871 }, { "epoch": 0.15407725321888413, "grad_norm": 0.333984375, "learning_rate": 4.99999128110601e-06, "loss": 2.2234, "step": 2872 }, { "epoch": 0.15413090128755363, "grad_norm": 0.341796875, "learning_rate": 4.999991050152065e-06, "loss": 2.2082, "step": 2873 }, { "epoch": 0.15418454935622317, "grad_norm": 0.4296875, "learning_rate": 4.999990816179122e-06, "loss": 2.1567, "step": 2874 }, { "epoch": 0.1542381974248927, "grad_norm": 0.4296875, "learning_rate": 4.999990579187181e-06, "loss": 2.1189, "step": 2875 }, { "epoch": 0.15429184549356223, "grad_norm": 0.310546875, "learning_rate": 4.9999903391762415e-06, "loss": 2.105, "step": 2876 }, { "epoch": 0.15434549356223176, "grad_norm": 0.326171875, "learning_rate": 4.999990096146304e-06, "loss": 2.3419, "step": 2877 }, { "epoch": 0.1543991416309013, "grad_norm": 0.302734375, "learning_rate": 4.999989850097369e-06, "loss": 2.5081, "step": 2878 }, { "epoch": 0.15445278969957082, "grad_norm": 1.7734375, "learning_rate": 4.999989601029437e-06, "loss": 2.4285, "step": 2879 }, { "epoch": 0.15450643776824036, "grad_norm": 0.8203125, "learning_rate": 4.999989348942509e-06, "loss": 2.2725, "step": 2880 }, { "epoch": 0.15456008583690986, "grad_norm": 0.33984375, "learning_rate": 4.999989093836584e-06, "loss": 2.2848, "step": 2881 }, { "epoch": 0.1546137339055794, "grad_norm": 0.32421875, "learning_rate": 4.9999888357116625e-06, "loss": 2.1026, "step": 2882 }, { "epoch": 0.15466738197424892, "grad_norm": 0.310546875, "learning_rate": 4.999988574567746e-06, "loss": 2.2956, "step": 2883 }, { "epoch": 0.15472103004291846, "grad_norm": 0.318359375, "learning_rate": 4.999988310404832e-06, "loss": 2.2701, "step": 2884 }, { "epoch": 0.154774678111588, "grad_norm": 0.349609375, "learning_rate": 4.999988043222924e-06, "loss": 2.4013, "step": 2885 }, { "epoch": 0.15482832618025752, "grad_norm": 1.2734375, "learning_rate": 4.999987773022021e-06, "loss": 2.6425, "step": 2886 }, { "epoch": 0.15488197424892705, "grad_norm": 0.34375, "learning_rate": 4.999987499802124e-06, "loss": 2.1413, "step": 2887 }, { "epoch": 0.15493562231759656, "grad_norm": 0.60546875, "learning_rate": 4.999987223563232e-06, "loss": 2.3177, "step": 2888 }, { "epoch": 0.1549892703862661, "grad_norm": 0.3984375, "learning_rate": 4.9999869443053465e-06, "loss": 2.1482, "step": 2889 }, { "epoch": 0.15504291845493562, "grad_norm": 0.419921875, "learning_rate": 4.999986662028468e-06, "loss": 2.2753, "step": 2890 }, { "epoch": 0.15509656652360515, "grad_norm": 0.318359375, "learning_rate": 4.999986376732595e-06, "loss": 2.3597, "step": 2891 }, { "epoch": 0.15515021459227468, "grad_norm": 0.515625, "learning_rate": 4.9999860884177295e-06, "loss": 2.0585, "step": 2892 }, { "epoch": 0.15520386266094421, "grad_norm": 0.361328125, "learning_rate": 4.999985797083871e-06, "loss": 2.3373, "step": 2893 }, { "epoch": 0.15525751072961375, "grad_norm": 0.33203125, "learning_rate": 4.9999855027310205e-06, "loss": 2.3513, "step": 2894 }, { "epoch": 0.15531115879828325, "grad_norm": 0.8984375, "learning_rate": 4.999985205359178e-06, "loss": 2.5046, "step": 2895 }, { "epoch": 0.15536480686695278, "grad_norm": 0.375, "learning_rate": 4.999984904968345e-06, "loss": 1.2947, "step": 2896 }, { "epoch": 0.1554184549356223, "grad_norm": 0.27734375, "learning_rate": 4.99998460155852e-06, "loss": 2.3581, "step": 2897 }, { "epoch": 0.15547210300429185, "grad_norm": 0.435546875, "learning_rate": 4.999984295129704e-06, "loss": 2.2245, "step": 2898 }, { "epoch": 0.15552575107296138, "grad_norm": 0.455078125, "learning_rate": 4.999983985681899e-06, "loss": 1.9621, "step": 2899 }, { "epoch": 0.1555793991416309, "grad_norm": 0.361328125, "learning_rate": 4.999983673215102e-06, "loss": 2.2669, "step": 2900 }, { "epoch": 0.15563304721030044, "grad_norm": 0.453125, "learning_rate": 4.999983357729317e-06, "loss": 1.9368, "step": 2901 }, { "epoch": 0.15568669527896994, "grad_norm": 0.3203125, "learning_rate": 4.999983039224542e-06, "loss": 2.4738, "step": 2902 }, { "epoch": 0.15574034334763948, "grad_norm": 0.30859375, "learning_rate": 4.999982717700778e-06, "loss": 2.6183, "step": 2903 }, { "epoch": 0.155793991416309, "grad_norm": 0.3828125, "learning_rate": 4.999982393158026e-06, "loss": 2.2388, "step": 2904 }, { "epoch": 0.15584763948497854, "grad_norm": 0.412109375, "learning_rate": 4.999982065596286e-06, "loss": 2.4618, "step": 2905 }, { "epoch": 0.15590128755364807, "grad_norm": 0.296875, "learning_rate": 4.999981735015557e-06, "loss": 2.3603, "step": 2906 }, { "epoch": 0.1559549356223176, "grad_norm": 0.326171875, "learning_rate": 4.999981401415842e-06, "loss": 2.4621, "step": 2907 }, { "epoch": 0.15600858369098713, "grad_norm": 0.341796875, "learning_rate": 4.999981064797139e-06, "loss": 2.1829, "step": 2908 }, { "epoch": 0.15606223175965664, "grad_norm": 0.365234375, "learning_rate": 4.99998072515945e-06, "loss": 1.814, "step": 2909 }, { "epoch": 0.15611587982832617, "grad_norm": 0.267578125, "learning_rate": 4.999980382502776e-06, "loss": 2.3075, "step": 2910 }, { "epoch": 0.1561695278969957, "grad_norm": 0.359375, "learning_rate": 4.999980036827115e-06, "loss": 2.1264, "step": 2911 }, { "epoch": 0.15622317596566523, "grad_norm": 0.31640625, "learning_rate": 4.999979688132469e-06, "loss": 2.3834, "step": 2912 }, { "epoch": 0.15627682403433477, "grad_norm": 0.328125, "learning_rate": 4.999979336418838e-06, "loss": 2.3468, "step": 2913 }, { "epoch": 0.1563304721030043, "grad_norm": 0.29296875, "learning_rate": 4.999978981686223e-06, "loss": 2.4374, "step": 2914 }, { "epoch": 0.15638412017167383, "grad_norm": 0.27734375, "learning_rate": 4.999978623934623e-06, "loss": 2.1806, "step": 2915 }, { "epoch": 0.15643776824034336, "grad_norm": 0.3125, "learning_rate": 4.9999782631640415e-06, "loss": 2.442, "step": 2916 }, { "epoch": 0.15649141630901287, "grad_norm": 0.4453125, "learning_rate": 4.999977899374475e-06, "loss": 2.4856, "step": 2917 }, { "epoch": 0.1565450643776824, "grad_norm": 0.322265625, "learning_rate": 4.999977532565927e-06, "loss": 2.0513, "step": 2918 }, { "epoch": 0.15659871244635193, "grad_norm": 0.5859375, "learning_rate": 4.999977162738396e-06, "loss": 2.3431, "step": 2919 }, { "epoch": 0.15665236051502146, "grad_norm": 0.26171875, "learning_rate": 4.999976789891884e-06, "loss": 2.2557, "step": 2920 }, { "epoch": 0.156706008583691, "grad_norm": 0.2890625, "learning_rate": 4.99997641402639e-06, "loss": 2.2875, "step": 2921 }, { "epoch": 0.15675965665236052, "grad_norm": 0.314453125, "learning_rate": 4.999976035141915e-06, "loss": 2.2785, "step": 2922 }, { "epoch": 0.15681330472103006, "grad_norm": 0.33203125, "learning_rate": 4.99997565323846e-06, "loss": 2.3567, "step": 2923 }, { "epoch": 0.15686695278969956, "grad_norm": 0.36328125, "learning_rate": 4.9999752683160255e-06, "loss": 2.2314, "step": 2924 }, { "epoch": 0.1569206008583691, "grad_norm": 0.392578125, "learning_rate": 4.99997488037461e-06, "loss": 2.3358, "step": 2925 }, { "epoch": 0.15697424892703862, "grad_norm": 0.32421875, "learning_rate": 4.9999744894142166e-06, "loss": 2.4157, "step": 2926 }, { "epoch": 0.15702789699570815, "grad_norm": 0.296875, "learning_rate": 4.999974095434844e-06, "loss": 2.3627, "step": 2927 }, { "epoch": 0.1570815450643777, "grad_norm": 0.376953125, "learning_rate": 4.9999736984364935e-06, "loss": 2.711, "step": 2928 }, { "epoch": 0.15713519313304722, "grad_norm": 0.3125, "learning_rate": 4.999973298419166e-06, "loss": 2.2503, "step": 2929 }, { "epoch": 0.15718884120171675, "grad_norm": 0.302734375, "learning_rate": 4.999972895382861e-06, "loss": 2.3112, "step": 2930 }, { "epoch": 0.15724248927038625, "grad_norm": 0.37890625, "learning_rate": 4.999972489327579e-06, "loss": 2.2113, "step": 2931 }, { "epoch": 0.15729613733905579, "grad_norm": 0.298828125, "learning_rate": 4.999972080253321e-06, "loss": 2.4061, "step": 2932 }, { "epoch": 0.15734978540772532, "grad_norm": 0.3203125, "learning_rate": 4.999971668160088e-06, "loss": 2.2865, "step": 2933 }, { "epoch": 0.15740343347639485, "grad_norm": 0.333984375, "learning_rate": 4.99997125304788e-06, "loss": 2.6153, "step": 2934 }, { "epoch": 0.15745708154506438, "grad_norm": 0.29296875, "learning_rate": 4.999970834916696e-06, "loss": 2.364, "step": 2935 }, { "epoch": 0.1575107296137339, "grad_norm": 0.35546875, "learning_rate": 4.999970413766538e-06, "loss": 2.3354, "step": 2936 }, { "epoch": 0.15756437768240344, "grad_norm": 0.33203125, "learning_rate": 4.999969989597407e-06, "loss": 2.5057, "step": 2937 }, { "epoch": 0.15761802575107295, "grad_norm": 0.431640625, "learning_rate": 4.999969562409303e-06, "loss": 2.2805, "step": 2938 }, { "epoch": 0.15767167381974248, "grad_norm": 0.302734375, "learning_rate": 4.999969132202226e-06, "loss": 2.2487, "step": 2939 }, { "epoch": 0.157725321888412, "grad_norm": 0.306640625, "learning_rate": 4.999968698976178e-06, "loss": 2.063, "step": 2940 }, { "epoch": 0.15777896995708154, "grad_norm": 0.296875, "learning_rate": 4.999968262731157e-06, "loss": 2.3068, "step": 2941 }, { "epoch": 0.15783261802575108, "grad_norm": 0.357421875, "learning_rate": 4.999967823467165e-06, "loss": 2.5461, "step": 2942 }, { "epoch": 0.1578862660944206, "grad_norm": 0.404296875, "learning_rate": 4.999967381184203e-06, "loss": 2.5751, "step": 2943 }, { "epoch": 0.15793991416309014, "grad_norm": 0.427734375, "learning_rate": 4.999966935882271e-06, "loss": 2.3922, "step": 2944 }, { "epoch": 0.15799356223175964, "grad_norm": 0.33984375, "learning_rate": 4.99996648756137e-06, "loss": 2.4071, "step": 2945 }, { "epoch": 0.15804721030042918, "grad_norm": 0.349609375, "learning_rate": 4.9999660362214996e-06, "loss": 2.31, "step": 2946 }, { "epoch": 0.1581008583690987, "grad_norm": 0.322265625, "learning_rate": 4.999965581862661e-06, "loss": 2.2755, "step": 2947 }, { "epoch": 0.15815450643776824, "grad_norm": 0.37890625, "learning_rate": 4.999965124484854e-06, "loss": 2.5833, "step": 2948 }, { "epoch": 0.15820815450643777, "grad_norm": 0.3515625, "learning_rate": 4.9999646640880805e-06, "loss": 2.4116, "step": 2949 }, { "epoch": 0.1582618025751073, "grad_norm": 0.3203125, "learning_rate": 4.99996420067234e-06, "loss": 2.4404, "step": 2950 }, { "epoch": 0.15831545064377683, "grad_norm": 0.353515625, "learning_rate": 4.999963734237634e-06, "loss": 2.282, "step": 2951 }, { "epoch": 0.15836909871244637, "grad_norm": 0.3359375, "learning_rate": 4.999963264783961e-06, "loss": 2.265, "step": 2952 }, { "epoch": 0.15842274678111587, "grad_norm": 0.41015625, "learning_rate": 4.999962792311324e-06, "loss": 2.3552, "step": 2953 }, { "epoch": 0.1584763948497854, "grad_norm": 0.3203125, "learning_rate": 4.999962316819722e-06, "loss": 2.463, "step": 2954 }, { "epoch": 0.15853004291845493, "grad_norm": 0.333984375, "learning_rate": 4.999961838309156e-06, "loss": 2.2893, "step": 2955 }, { "epoch": 0.15858369098712446, "grad_norm": 0.373046875, "learning_rate": 4.999961356779628e-06, "loss": 2.3824, "step": 2956 }, { "epoch": 0.158637339055794, "grad_norm": 0.33984375, "learning_rate": 4.999960872231137e-06, "loss": 2.5831, "step": 2957 }, { "epoch": 0.15869098712446353, "grad_norm": 0.328125, "learning_rate": 4.999960384663683e-06, "loss": 2.289, "step": 2958 }, { "epoch": 0.15874463519313306, "grad_norm": 0.34765625, "learning_rate": 4.999959894077267e-06, "loss": 2.1898, "step": 2959 }, { "epoch": 0.15879828326180256, "grad_norm": 1.484375, "learning_rate": 4.999959400471892e-06, "loss": 2.5676, "step": 2960 }, { "epoch": 0.1588519313304721, "grad_norm": 0.322265625, "learning_rate": 4.999958903847555e-06, "loss": 2.4441, "step": 2961 }, { "epoch": 0.15890557939914163, "grad_norm": 0.3203125, "learning_rate": 4.999958404204259e-06, "loss": 2.4836, "step": 2962 }, { "epoch": 0.15895922746781116, "grad_norm": 0.306640625, "learning_rate": 4.999957901542004e-06, "loss": 2.2034, "step": 2963 }, { "epoch": 0.1590128755364807, "grad_norm": 0.310546875, "learning_rate": 4.99995739586079e-06, "loss": 2.205, "step": 2964 }, { "epoch": 0.15906652360515022, "grad_norm": 0.357421875, "learning_rate": 4.999956887160618e-06, "loss": 2.3255, "step": 2965 }, { "epoch": 0.15912017167381975, "grad_norm": 0.345703125, "learning_rate": 4.999956375441489e-06, "loss": 2.5908, "step": 2966 }, { "epoch": 0.15917381974248926, "grad_norm": 0.322265625, "learning_rate": 4.999955860703404e-06, "loss": 2.2493, "step": 2967 }, { "epoch": 0.1592274678111588, "grad_norm": 0.384765625, "learning_rate": 4.9999553429463615e-06, "loss": 2.4873, "step": 2968 }, { "epoch": 0.15928111587982832, "grad_norm": 0.3359375, "learning_rate": 4.999954822170364e-06, "loss": 2.3738, "step": 2969 }, { "epoch": 0.15933476394849785, "grad_norm": 0.349609375, "learning_rate": 4.999954298375412e-06, "loss": 2.2892, "step": 2970 }, { "epoch": 0.15938841201716739, "grad_norm": 0.44921875, "learning_rate": 4.999953771561506e-06, "loss": 1.9496, "step": 2971 }, { "epoch": 0.15944206008583692, "grad_norm": 0.3828125, "learning_rate": 4.999953241728646e-06, "loss": 2.3711, "step": 2972 }, { "epoch": 0.15949570815450645, "grad_norm": 0.353515625, "learning_rate": 4.999952708876834e-06, "loss": 2.2203, "step": 2973 }, { "epoch": 0.15954935622317595, "grad_norm": 0.458984375, "learning_rate": 4.999952173006069e-06, "loss": 2.4476, "step": 2974 }, { "epoch": 0.15960300429184548, "grad_norm": 0.859375, "learning_rate": 4.999951634116352e-06, "loss": 2.3961, "step": 2975 }, { "epoch": 0.15965665236051502, "grad_norm": 0.365234375, "learning_rate": 4.9999510922076845e-06, "loss": 2.3185, "step": 2976 }, { "epoch": 0.15971030042918455, "grad_norm": 0.3203125, "learning_rate": 4.999950547280067e-06, "loss": 2.4042, "step": 2977 }, { "epoch": 0.15976394849785408, "grad_norm": 0.39453125, "learning_rate": 4.9999499993335e-06, "loss": 2.2289, "step": 2978 }, { "epoch": 0.1598175965665236, "grad_norm": 0.34375, "learning_rate": 4.9999494483679824e-06, "loss": 2.3645, "step": 2979 }, { "epoch": 0.15987124463519314, "grad_norm": 0.38671875, "learning_rate": 4.999948894383519e-06, "loss": 2.1757, "step": 2980 }, { "epoch": 0.15992489270386265, "grad_norm": 0.5234375, "learning_rate": 4.999948337380106e-06, "loss": 2.2433, "step": 2981 }, { "epoch": 0.15997854077253218, "grad_norm": 0.26953125, "learning_rate": 4.999947777357746e-06, "loss": 2.2636, "step": 2982 }, { "epoch": 0.1600321888412017, "grad_norm": 0.41015625, "learning_rate": 4.999947214316441e-06, "loss": 2.299, "step": 2983 }, { "epoch": 0.16008583690987124, "grad_norm": 0.31640625, "learning_rate": 4.9999466482561885e-06, "loss": 2.3283, "step": 2984 }, { "epoch": 0.16013948497854077, "grad_norm": 0.345703125, "learning_rate": 4.999946079176993e-06, "loss": 2.4906, "step": 2985 }, { "epoch": 0.1601931330472103, "grad_norm": 0.375, "learning_rate": 4.999945507078852e-06, "loss": 1.9057, "step": 2986 }, { "epoch": 0.16024678111587984, "grad_norm": 0.298828125, "learning_rate": 4.9999449319617684e-06, "loss": 2.2467, "step": 2987 }, { "epoch": 0.16030042918454937, "grad_norm": 0.412109375, "learning_rate": 4.999944353825741e-06, "loss": 2.3052, "step": 2988 }, { "epoch": 0.16035407725321887, "grad_norm": 0.33203125, "learning_rate": 4.999943772670772e-06, "loss": 2.182, "step": 2989 }, { "epoch": 0.1604077253218884, "grad_norm": 0.337890625, "learning_rate": 4.999943188496862e-06, "loss": 2.4651, "step": 2990 }, { "epoch": 0.16046137339055794, "grad_norm": 1.53125, "learning_rate": 4.9999426013040095e-06, "loss": 2.2734, "step": 2991 }, { "epoch": 0.16051502145922747, "grad_norm": 0.310546875, "learning_rate": 4.999942011092218e-06, "loss": 2.4753, "step": 2992 }, { "epoch": 0.160568669527897, "grad_norm": 0.76953125, "learning_rate": 4.999941417861488e-06, "loss": 1.1445, "step": 2993 }, { "epoch": 0.16062231759656653, "grad_norm": 0.435546875, "learning_rate": 4.999940821611819e-06, "loss": 2.3508, "step": 2994 }, { "epoch": 0.16067596566523606, "grad_norm": 0.298828125, "learning_rate": 4.999940222343212e-06, "loss": 2.2533, "step": 2995 }, { "epoch": 0.16072961373390557, "grad_norm": 0.341796875, "learning_rate": 4.9999396200556665e-06, "loss": 2.1818, "step": 2996 }, { "epoch": 0.1607832618025751, "grad_norm": 0.294921875, "learning_rate": 4.999939014749186e-06, "loss": 2.1984, "step": 2997 }, { "epoch": 0.16083690987124463, "grad_norm": 0.322265625, "learning_rate": 4.999938406423769e-06, "loss": 2.1887, "step": 2998 }, { "epoch": 0.16089055793991416, "grad_norm": 0.3125, "learning_rate": 4.999937795079417e-06, "loss": 2.448, "step": 2999 }, { "epoch": 0.1609442060085837, "grad_norm": 0.3125, "learning_rate": 4.999937180716132e-06, "loss": 2.1849, "step": 3000 }, { "epoch": 0.16099785407725323, "grad_norm": 0.56640625, "learning_rate": 4.999936563333912e-06, "loss": 2.3685, "step": 3001 }, { "epoch": 0.16105150214592276, "grad_norm": 0.390625, "learning_rate": 4.99993594293276e-06, "loss": 2.2872, "step": 3002 }, { "epoch": 0.16110515021459226, "grad_norm": 0.333984375, "learning_rate": 4.999935319512675e-06, "loss": 2.2782, "step": 3003 }, { "epoch": 0.1611587982832618, "grad_norm": 0.30078125, "learning_rate": 4.99993469307366e-06, "loss": 2.0301, "step": 3004 }, { "epoch": 0.16121244635193133, "grad_norm": 0.298828125, "learning_rate": 4.999934063615715e-06, "loss": 2.313, "step": 3005 }, { "epoch": 0.16126609442060086, "grad_norm": 0.33203125, "learning_rate": 4.999933431138839e-06, "loss": 2.2765, "step": 3006 }, { "epoch": 0.1613197424892704, "grad_norm": 0.328125, "learning_rate": 4.999932795643034e-06, "loss": 2.4604, "step": 3007 }, { "epoch": 0.16137339055793992, "grad_norm": 0.44140625, "learning_rate": 4.999932157128301e-06, "loss": 1.8302, "step": 3008 }, { "epoch": 0.16142703862660945, "grad_norm": 0.375, "learning_rate": 4.99993151559464e-06, "loss": 2.1337, "step": 3009 }, { "epoch": 0.16148068669527896, "grad_norm": 0.380859375, "learning_rate": 4.9999308710420535e-06, "loss": 2.3498, "step": 3010 }, { "epoch": 0.1615343347639485, "grad_norm": 0.96484375, "learning_rate": 4.999930223470541e-06, "loss": 2.1496, "step": 3011 }, { "epoch": 0.16158798283261802, "grad_norm": 0.333984375, "learning_rate": 4.999929572880103e-06, "loss": 2.5164, "step": 3012 }, { "epoch": 0.16164163090128755, "grad_norm": 0.341796875, "learning_rate": 4.99992891927074e-06, "loss": 1.9714, "step": 3013 }, { "epoch": 0.16169527896995708, "grad_norm": 0.359375, "learning_rate": 4.999928262642455e-06, "loss": 2.3774, "step": 3014 }, { "epoch": 0.16174892703862662, "grad_norm": 0.341796875, "learning_rate": 4.999927602995246e-06, "loss": 2.3247, "step": 3015 }, { "epoch": 0.16180257510729615, "grad_norm": 0.3828125, "learning_rate": 4.999926940329116e-06, "loss": 2.3886, "step": 3016 }, { "epoch": 0.16185622317596565, "grad_norm": 0.515625, "learning_rate": 4.999926274644064e-06, "loss": 2.2071, "step": 3017 }, { "epoch": 0.16190987124463518, "grad_norm": 0.3828125, "learning_rate": 4.999925605940092e-06, "loss": 2.4894, "step": 3018 }, { "epoch": 0.16196351931330472, "grad_norm": 0.3984375, "learning_rate": 4.999924934217201e-06, "loss": 1.9559, "step": 3019 }, { "epoch": 0.16201716738197425, "grad_norm": 0.333984375, "learning_rate": 4.99992425947539e-06, "loss": 2.2218, "step": 3020 }, { "epoch": 0.16207081545064378, "grad_norm": 0.34375, "learning_rate": 4.999923581714662e-06, "loss": 2.432, "step": 3021 }, { "epoch": 0.1621244635193133, "grad_norm": 0.322265625, "learning_rate": 4.999922900935017e-06, "loss": 2.1085, "step": 3022 }, { "epoch": 0.16217811158798284, "grad_norm": 0.44921875, "learning_rate": 4.999922217136455e-06, "loss": 2.0134, "step": 3023 }, { "epoch": 0.16223175965665235, "grad_norm": 1.4140625, "learning_rate": 4.999921530318978e-06, "loss": 2.4152, "step": 3024 }, { "epoch": 0.16228540772532188, "grad_norm": 0.27734375, "learning_rate": 4.999920840482586e-06, "loss": 2.1376, "step": 3025 }, { "epoch": 0.1623390557939914, "grad_norm": 0.3828125, "learning_rate": 4.999920147627281e-06, "loss": 2.5019, "step": 3026 }, { "epoch": 0.16239270386266094, "grad_norm": 0.306640625, "learning_rate": 4.999919451753063e-06, "loss": 2.3584, "step": 3027 }, { "epoch": 0.16244635193133047, "grad_norm": 0.35546875, "learning_rate": 4.999918752859933e-06, "loss": 2.4754, "step": 3028 }, { "epoch": 0.1625, "grad_norm": 0.337890625, "learning_rate": 4.999918050947891e-06, "loss": 2.1221, "step": 3029 }, { "epoch": 0.16255364806866954, "grad_norm": 0.361328125, "learning_rate": 4.999917346016939e-06, "loss": 2.533, "step": 3030 }, { "epoch": 0.16260729613733907, "grad_norm": 0.453125, "learning_rate": 4.999916638067077e-06, "loss": 2.2593, "step": 3031 }, { "epoch": 0.16266094420600857, "grad_norm": 0.33984375, "learning_rate": 4.9999159270983075e-06, "loss": 2.363, "step": 3032 }, { "epoch": 0.1627145922746781, "grad_norm": 0.328125, "learning_rate": 4.999915213110629e-06, "loss": 2.3398, "step": 3033 }, { "epoch": 0.16276824034334764, "grad_norm": 0.3046875, "learning_rate": 4.999914496104044e-06, "loss": 2.1531, "step": 3034 }, { "epoch": 0.16282188841201717, "grad_norm": 0.39453125, "learning_rate": 4.999913776078552e-06, "loss": 2.4777, "step": 3035 }, { "epoch": 0.1628755364806867, "grad_norm": 0.3125, "learning_rate": 4.999913053034157e-06, "loss": 2.2968, "step": 3036 }, { "epoch": 0.16292918454935623, "grad_norm": 0.765625, "learning_rate": 4.999912326970856e-06, "loss": 2.4817, "step": 3037 }, { "epoch": 0.16298283261802576, "grad_norm": 0.36328125, "learning_rate": 4.999911597888652e-06, "loss": 2.3244, "step": 3038 }, { "epoch": 0.16303648068669527, "grad_norm": 0.330078125, "learning_rate": 4.999910865787544e-06, "loss": 2.1467, "step": 3039 }, { "epoch": 0.1630901287553648, "grad_norm": 0.267578125, "learning_rate": 4.999910130667536e-06, "loss": 2.1953, "step": 3040 }, { "epoch": 0.16314377682403433, "grad_norm": 0.55859375, "learning_rate": 4.999909392528626e-06, "loss": 2.1382, "step": 3041 }, { "epoch": 0.16319742489270386, "grad_norm": 0.384765625, "learning_rate": 4.999908651370816e-06, "loss": 2.195, "step": 3042 }, { "epoch": 0.1632510729613734, "grad_norm": 0.36328125, "learning_rate": 4.999907907194108e-06, "loss": 2.5342, "step": 3043 }, { "epoch": 0.16330472103004293, "grad_norm": 0.318359375, "learning_rate": 4.9999071599985015e-06, "loss": 2.3359, "step": 3044 }, { "epoch": 0.16335836909871246, "grad_norm": 0.5234375, "learning_rate": 4.999906409783997e-06, "loss": 2.4478, "step": 3045 }, { "epoch": 0.16341201716738196, "grad_norm": 0.345703125, "learning_rate": 4.999905656550597e-06, "loss": 2.4441, "step": 3046 }, { "epoch": 0.1634656652360515, "grad_norm": 0.33984375, "learning_rate": 4.999904900298301e-06, "loss": 2.3957, "step": 3047 }, { "epoch": 0.16351931330472103, "grad_norm": 0.359375, "learning_rate": 4.999904141027111e-06, "loss": 2.582, "step": 3048 }, { "epoch": 0.16357296137339056, "grad_norm": 0.2890625, "learning_rate": 4.9999033787370275e-06, "loss": 2.0739, "step": 3049 }, { "epoch": 0.1636266094420601, "grad_norm": 0.34765625, "learning_rate": 4.99990261342805e-06, "loss": 2.1627, "step": 3050 }, { "epoch": 0.16368025751072962, "grad_norm": 0.34375, "learning_rate": 4.999901845100182e-06, "loss": 2.1991, "step": 3051 }, { "epoch": 0.16373390557939915, "grad_norm": 0.296875, "learning_rate": 4.999901073753423e-06, "loss": 2.2931, "step": 3052 }, { "epoch": 0.16378755364806866, "grad_norm": 0.32421875, "learning_rate": 4.999900299387774e-06, "loss": 2.2556, "step": 3053 }, { "epoch": 0.1638412017167382, "grad_norm": 0.32421875, "learning_rate": 4.9998995220032365e-06, "loss": 2.2973, "step": 3054 }, { "epoch": 0.16389484978540772, "grad_norm": 0.375, "learning_rate": 4.99989874159981e-06, "loss": 2.3728, "step": 3055 }, { "epoch": 0.16394849785407725, "grad_norm": 0.287109375, "learning_rate": 4.999897958177497e-06, "loss": 2.024, "step": 3056 }, { "epoch": 0.16400214592274678, "grad_norm": 0.322265625, "learning_rate": 4.999897171736298e-06, "loss": 2.1755, "step": 3057 }, { "epoch": 0.16405579399141632, "grad_norm": 0.28515625, "learning_rate": 4.999896382276213e-06, "loss": 1.9975, "step": 3058 }, { "epoch": 0.16410944206008585, "grad_norm": 0.33984375, "learning_rate": 4.9998955897972445e-06, "loss": 2.4196, "step": 3059 }, { "epoch": 0.16416309012875535, "grad_norm": 0.3515625, "learning_rate": 4.999894794299393e-06, "loss": 2.508, "step": 3060 }, { "epoch": 0.16421673819742488, "grad_norm": 0.3203125, "learning_rate": 4.999893995782658e-06, "loss": 2.2658, "step": 3061 }, { "epoch": 0.16427038626609441, "grad_norm": 0.314453125, "learning_rate": 4.999893194247042e-06, "loss": 2.2202, "step": 3062 }, { "epoch": 0.16432403433476395, "grad_norm": 0.34375, "learning_rate": 4.999892389692546e-06, "loss": 2.4654, "step": 3063 }, { "epoch": 0.16437768240343348, "grad_norm": 0.29296875, "learning_rate": 4.999891582119171e-06, "loss": 2.2596, "step": 3064 }, { "epoch": 0.164431330472103, "grad_norm": 0.314453125, "learning_rate": 4.999890771526917e-06, "loss": 2.2797, "step": 3065 }, { "epoch": 0.16448497854077254, "grad_norm": 0.302734375, "learning_rate": 4.999889957915786e-06, "loss": 2.5245, "step": 3066 }, { "epoch": 0.16453862660944207, "grad_norm": 0.322265625, "learning_rate": 4.9998891412857776e-06, "loss": 2.1606, "step": 3067 }, { "epoch": 0.16459227467811158, "grad_norm": 0.396484375, "learning_rate": 4.999888321636894e-06, "loss": 2.2127, "step": 3068 }, { "epoch": 0.1646459227467811, "grad_norm": 0.388671875, "learning_rate": 4.999887498969136e-06, "loss": 2.3409, "step": 3069 }, { "epoch": 0.16469957081545064, "grad_norm": 0.34765625, "learning_rate": 4.9998866732825046e-06, "loss": 2.3322, "step": 3070 }, { "epoch": 0.16475321888412017, "grad_norm": 0.388671875, "learning_rate": 4.999885844577001e-06, "loss": 2.6339, "step": 3071 }, { "epoch": 0.1648068669527897, "grad_norm": 0.3984375, "learning_rate": 4.9998850128526255e-06, "loss": 2.3737, "step": 3072 }, { "epoch": 0.16486051502145924, "grad_norm": 0.33984375, "learning_rate": 4.99988417810938e-06, "loss": 2.3553, "step": 3073 }, { "epoch": 0.16491416309012877, "grad_norm": 0.361328125, "learning_rate": 4.999883340347264e-06, "loss": 2.1704, "step": 3074 }, { "epoch": 0.16496781115879827, "grad_norm": 0.5546875, "learning_rate": 4.99988249956628e-06, "loss": 2.3842, "step": 3075 }, { "epoch": 0.1650214592274678, "grad_norm": 1.109375, "learning_rate": 4.999881655766429e-06, "loss": 2.4809, "step": 3076 }, { "epoch": 0.16507510729613734, "grad_norm": 0.29296875, "learning_rate": 4.999880808947711e-06, "loss": 1.9871, "step": 3077 }, { "epoch": 0.16512875536480687, "grad_norm": 0.341796875, "learning_rate": 4.999879959110128e-06, "loss": 1.7171, "step": 3078 }, { "epoch": 0.1651824034334764, "grad_norm": 0.33203125, "learning_rate": 4.99987910625368e-06, "loss": 2.4618, "step": 3079 }, { "epoch": 0.16523605150214593, "grad_norm": 0.400390625, "learning_rate": 4.9998782503783695e-06, "loss": 2.3074, "step": 3080 }, { "epoch": 0.16528969957081546, "grad_norm": 0.34375, "learning_rate": 4.999877391484196e-06, "loss": 2.4151, "step": 3081 }, { "epoch": 0.16534334763948497, "grad_norm": 0.38671875, "learning_rate": 4.99987652957116e-06, "loss": 2.4674, "step": 3082 }, { "epoch": 0.1653969957081545, "grad_norm": 0.431640625, "learning_rate": 4.999875664639265e-06, "loss": 2.2466, "step": 3083 }, { "epoch": 0.16545064377682403, "grad_norm": 0.33203125, "learning_rate": 4.999874796688512e-06, "loss": 2.2667, "step": 3084 }, { "epoch": 0.16550429184549356, "grad_norm": 0.349609375, "learning_rate": 4.999873925718899e-06, "loss": 2.3136, "step": 3085 }, { "epoch": 0.1655579399141631, "grad_norm": 0.35546875, "learning_rate": 4.9998730517304295e-06, "loss": 2.1719, "step": 3086 }, { "epoch": 0.16561158798283263, "grad_norm": 0.44140625, "learning_rate": 4.999872174723104e-06, "loss": 2.3216, "step": 3087 }, { "epoch": 0.16566523605150216, "grad_norm": 0.314453125, "learning_rate": 4.999871294696924e-06, "loss": 2.5928, "step": 3088 }, { "epoch": 0.16571888412017166, "grad_norm": 0.330078125, "learning_rate": 4.999870411651889e-06, "loss": 2.1592, "step": 3089 }, { "epoch": 0.1657725321888412, "grad_norm": 0.349609375, "learning_rate": 4.9998695255880015e-06, "loss": 2.3946, "step": 3090 }, { "epoch": 0.16582618025751072, "grad_norm": 0.314453125, "learning_rate": 4.999868636505262e-06, "loss": 2.3731, "step": 3091 }, { "epoch": 0.16587982832618026, "grad_norm": 0.314453125, "learning_rate": 4.999867744403672e-06, "loss": 2.3471, "step": 3092 }, { "epoch": 0.1659334763948498, "grad_norm": 0.306640625, "learning_rate": 4.999866849283232e-06, "loss": 2.4329, "step": 3093 }, { "epoch": 0.16598712446351932, "grad_norm": 0.435546875, "learning_rate": 4.999865951143945e-06, "loss": 2.2495, "step": 3094 }, { "epoch": 0.16604077253218885, "grad_norm": 0.373046875, "learning_rate": 4.999865049985809e-06, "loss": 2.351, "step": 3095 }, { "epoch": 0.16609442060085836, "grad_norm": 0.33984375, "learning_rate": 4.999864145808827e-06, "loss": 2.3577, "step": 3096 }, { "epoch": 0.1661480686695279, "grad_norm": 0.34765625, "learning_rate": 4.9998632386129995e-06, "loss": 2.3916, "step": 3097 }, { "epoch": 0.16620171673819742, "grad_norm": 0.369140625, "learning_rate": 4.999862328398328e-06, "loss": 2.198, "step": 3098 }, { "epoch": 0.16625536480686695, "grad_norm": 0.349609375, "learning_rate": 4.999861415164814e-06, "loss": 2.3444, "step": 3099 }, { "epoch": 0.16630901287553648, "grad_norm": 0.3203125, "learning_rate": 4.999860498912457e-06, "loss": 2.3039, "step": 3100 }, { "epoch": 0.16636266094420601, "grad_norm": 1.03125, "learning_rate": 4.999859579641258e-06, "loss": 2.2005, "step": 3101 }, { "epoch": 0.16641630901287555, "grad_norm": 0.30859375, "learning_rate": 4.999858657351222e-06, "loss": 2.4575, "step": 3102 }, { "epoch": 0.16646995708154508, "grad_norm": 0.396484375, "learning_rate": 4.9998577320423455e-06, "loss": 2.2672, "step": 3103 }, { "epoch": 0.16652360515021458, "grad_norm": 0.49609375, "learning_rate": 4.999856803714632e-06, "loss": 2.4381, "step": 3104 }, { "epoch": 0.1665772532188841, "grad_norm": 0.3046875, "learning_rate": 4.999855872368081e-06, "loss": 2.6286, "step": 3105 }, { "epoch": 0.16663090128755365, "grad_norm": 0.466796875, "learning_rate": 4.999854938002696e-06, "loss": 2.3551, "step": 3106 }, { "epoch": 0.16668454935622318, "grad_norm": 0.47265625, "learning_rate": 4.9998540006184765e-06, "loss": 2.291, "step": 3107 }, { "epoch": 0.1667381974248927, "grad_norm": 0.302734375, "learning_rate": 4.999853060215424e-06, "loss": 1.9017, "step": 3108 }, { "epoch": 0.16679184549356224, "grad_norm": 0.365234375, "learning_rate": 4.999852116793539e-06, "loss": 2.3767, "step": 3109 }, { "epoch": 0.16684549356223177, "grad_norm": 0.3203125, "learning_rate": 4.999851170352824e-06, "loss": 2.1544, "step": 3110 }, { "epoch": 0.16689914163090128, "grad_norm": 0.322265625, "learning_rate": 4.999850220893279e-06, "loss": 2.3262, "step": 3111 }, { "epoch": 0.1669527896995708, "grad_norm": 0.3125, "learning_rate": 4.999849268414905e-06, "loss": 2.1154, "step": 3112 }, { "epoch": 0.16700643776824034, "grad_norm": 0.30859375, "learning_rate": 4.999848312917704e-06, "loss": 2.1047, "step": 3113 }, { "epoch": 0.16706008583690987, "grad_norm": 0.298828125, "learning_rate": 4.999847354401677e-06, "loss": 2.2553, "step": 3114 }, { "epoch": 0.1671137339055794, "grad_norm": 0.341796875, "learning_rate": 4.999846392866825e-06, "loss": 2.3133, "step": 3115 }, { "epoch": 0.16716738197424894, "grad_norm": 0.318359375, "learning_rate": 4.999845428313149e-06, "loss": 2.1893, "step": 3116 }, { "epoch": 0.16722103004291847, "grad_norm": 0.3359375, "learning_rate": 4.999844460740651e-06, "loss": 2.3183, "step": 3117 }, { "epoch": 0.16727467811158797, "grad_norm": 0.40625, "learning_rate": 4.99984349014933e-06, "loss": 2.3593, "step": 3118 }, { "epoch": 0.1673283261802575, "grad_norm": 0.365234375, "learning_rate": 4.9998425165391894e-06, "loss": 2.2579, "step": 3119 }, { "epoch": 0.16738197424892703, "grad_norm": 0.6484375, "learning_rate": 4.999841539910229e-06, "loss": 1.241, "step": 3120 }, { "epoch": 0.16743562231759657, "grad_norm": 0.482421875, "learning_rate": 4.999840560262452e-06, "loss": 2.3806, "step": 3121 }, { "epoch": 0.1674892703862661, "grad_norm": 0.4140625, "learning_rate": 4.999839577595858e-06, "loss": 2.258, "step": 3122 }, { "epoch": 0.16754291845493563, "grad_norm": 0.30078125, "learning_rate": 4.999838591910448e-06, "loss": 2.2611, "step": 3123 }, { "epoch": 0.16759656652360516, "grad_norm": 0.412109375, "learning_rate": 4.999837603206222e-06, "loss": 2.127, "step": 3124 }, { "epoch": 0.16765021459227467, "grad_norm": 0.58203125, "learning_rate": 4.999836611483184e-06, "loss": 2.3895, "step": 3125 }, { "epoch": 0.1677038626609442, "grad_norm": 0.408203125, "learning_rate": 4.999835616741334e-06, "loss": 2.4691, "step": 3126 }, { "epoch": 0.16775751072961373, "grad_norm": 0.318359375, "learning_rate": 4.9998346189806735e-06, "loss": 2.2573, "step": 3127 }, { "epoch": 0.16781115879828326, "grad_norm": 0.328125, "learning_rate": 4.999833618201203e-06, "loss": 2.2333, "step": 3128 }, { "epoch": 0.1678648068669528, "grad_norm": 0.291015625, "learning_rate": 4.999832614402924e-06, "loss": 2.3284, "step": 3129 }, { "epoch": 0.16791845493562232, "grad_norm": 0.353515625, "learning_rate": 4.999831607585838e-06, "loss": 2.3956, "step": 3130 }, { "epoch": 0.16797210300429186, "grad_norm": 0.361328125, "learning_rate": 4.999830597749946e-06, "loss": 1.5601, "step": 3131 }, { "epoch": 0.16802575107296136, "grad_norm": 0.30859375, "learning_rate": 4.999829584895248e-06, "loss": 2.3643, "step": 3132 }, { "epoch": 0.1680793991416309, "grad_norm": 0.29296875, "learning_rate": 4.999828569021748e-06, "loss": 2.2138, "step": 3133 }, { "epoch": 0.16813304721030042, "grad_norm": 0.34375, "learning_rate": 4.999827550129445e-06, "loss": 2.147, "step": 3134 }, { "epoch": 0.16818669527896996, "grad_norm": 0.326171875, "learning_rate": 4.999826528218341e-06, "loss": 2.1964, "step": 3135 }, { "epoch": 0.1682403433476395, "grad_norm": 0.34375, "learning_rate": 4.999825503288437e-06, "loss": 2.5229, "step": 3136 }, { "epoch": 0.16829399141630902, "grad_norm": 0.453125, "learning_rate": 4.999824475339734e-06, "loss": 2.3371, "step": 3137 }, { "epoch": 0.16834763948497855, "grad_norm": 0.3671875, "learning_rate": 4.9998234443722344e-06, "loss": 2.1122, "step": 3138 }, { "epoch": 0.16840128755364808, "grad_norm": 0.625, "learning_rate": 4.9998224103859384e-06, "loss": 2.255, "step": 3139 }, { "epoch": 0.1684549356223176, "grad_norm": 0.333984375, "learning_rate": 4.999821373380847e-06, "loss": 2.2843, "step": 3140 }, { "epoch": 0.16850858369098712, "grad_norm": 0.2734375, "learning_rate": 4.9998203333569625e-06, "loss": 2.0095, "step": 3141 }, { "epoch": 0.16856223175965665, "grad_norm": 0.291015625, "learning_rate": 4.999819290314285e-06, "loss": 2.4354, "step": 3142 }, { "epoch": 0.16861587982832618, "grad_norm": 0.3046875, "learning_rate": 4.999818244252816e-06, "loss": 2.1551, "step": 3143 }, { "epoch": 0.1686695278969957, "grad_norm": 0.376953125, "learning_rate": 4.999817195172558e-06, "loss": 2.3181, "step": 3144 }, { "epoch": 0.16872317596566525, "grad_norm": 0.33984375, "learning_rate": 4.999816143073511e-06, "loss": 2.4014, "step": 3145 }, { "epoch": 0.16877682403433478, "grad_norm": 0.34765625, "learning_rate": 4.999815087955677e-06, "loss": 2.2641, "step": 3146 }, { "epoch": 0.16883047210300428, "grad_norm": 0.48046875, "learning_rate": 4.999814029819058e-06, "loss": 2.3694, "step": 3147 }, { "epoch": 0.1688841201716738, "grad_norm": 0.302734375, "learning_rate": 4.9998129686636524e-06, "loss": 2.2717, "step": 3148 }, { "epoch": 0.16893776824034334, "grad_norm": 0.35546875, "learning_rate": 4.999811904489464e-06, "loss": 2.3496, "step": 3149 }, { "epoch": 0.16899141630901288, "grad_norm": 0.45703125, "learning_rate": 4.999810837296493e-06, "loss": 2.505, "step": 3150 }, { "epoch": 0.1690450643776824, "grad_norm": 0.341796875, "learning_rate": 4.999809767084741e-06, "loss": 2.131, "step": 3151 }, { "epoch": 0.16909871244635194, "grad_norm": 0.32421875, "learning_rate": 4.99980869385421e-06, "loss": 2.3331, "step": 3152 }, { "epoch": 0.16915236051502147, "grad_norm": 0.41796875, "learning_rate": 4.9998076176049e-06, "loss": 2.2497, "step": 3153 }, { "epoch": 0.16920600858369098, "grad_norm": 0.341796875, "learning_rate": 4.999806538336814e-06, "loss": 2.2078, "step": 3154 }, { "epoch": 0.1692596566523605, "grad_norm": 0.451171875, "learning_rate": 4.9998054560499515e-06, "loss": 2.5175, "step": 3155 }, { "epoch": 0.16931330472103004, "grad_norm": 0.37109375, "learning_rate": 4.9998043707443145e-06, "loss": 2.3045, "step": 3156 }, { "epoch": 0.16936695278969957, "grad_norm": 0.4140625, "learning_rate": 4.999803282419905e-06, "loss": 2.1089, "step": 3157 }, { "epoch": 0.1694206008583691, "grad_norm": 0.375, "learning_rate": 4.9998021910767225e-06, "loss": 2.3731, "step": 3158 }, { "epoch": 0.16947424892703863, "grad_norm": 0.32421875, "learning_rate": 4.99980109671477e-06, "loss": 2.4786, "step": 3159 }, { "epoch": 0.16952789699570817, "grad_norm": 0.337890625, "learning_rate": 4.999799999334049e-06, "loss": 2.2357, "step": 3160 }, { "epoch": 0.16958154506437767, "grad_norm": 0.63671875, "learning_rate": 4.999798898934559e-06, "loss": 2.1435, "step": 3161 }, { "epoch": 0.1696351931330472, "grad_norm": 0.3046875, "learning_rate": 4.9997977955163036e-06, "loss": 2.2298, "step": 3162 }, { "epoch": 0.16968884120171673, "grad_norm": 0.2890625, "learning_rate": 4.999796689079282e-06, "loss": 1.968, "step": 3163 }, { "epoch": 0.16974248927038627, "grad_norm": 0.34375, "learning_rate": 4.999795579623498e-06, "loss": 2.2578, "step": 3164 }, { "epoch": 0.1697961373390558, "grad_norm": 0.314453125, "learning_rate": 4.999794467148951e-06, "loss": 2.2324, "step": 3165 }, { "epoch": 0.16984978540772533, "grad_norm": 0.427734375, "learning_rate": 4.999793351655642e-06, "loss": 2.4872, "step": 3166 }, { "epoch": 0.16990343347639486, "grad_norm": 0.34765625, "learning_rate": 4.999792233143573e-06, "loss": 2.3728, "step": 3167 }, { "epoch": 0.16995708154506436, "grad_norm": 0.33984375, "learning_rate": 4.999791111612747e-06, "loss": 2.0667, "step": 3168 }, { "epoch": 0.1700107296137339, "grad_norm": 0.41796875, "learning_rate": 4.999789987063163e-06, "loss": 2.278, "step": 3169 }, { "epoch": 0.17006437768240343, "grad_norm": 0.380859375, "learning_rate": 4.9997888594948226e-06, "loss": 2.3148, "step": 3170 }, { "epoch": 0.17011802575107296, "grad_norm": 0.345703125, "learning_rate": 4.999787728907729e-06, "loss": 1.9347, "step": 3171 }, { "epoch": 0.1701716738197425, "grad_norm": 0.28515625, "learning_rate": 4.999786595301882e-06, "loss": 2.236, "step": 3172 }, { "epoch": 0.17022532188841202, "grad_norm": 0.361328125, "learning_rate": 4.999785458677283e-06, "loss": 2.3851, "step": 3173 }, { "epoch": 0.17027896995708156, "grad_norm": 0.8125, "learning_rate": 4.999784319033933e-06, "loss": 2.6344, "step": 3174 }, { "epoch": 0.17033261802575106, "grad_norm": 0.296875, "learning_rate": 4.9997831763718355e-06, "loss": 1.8797, "step": 3175 }, { "epoch": 0.1703862660944206, "grad_norm": 0.341796875, "learning_rate": 4.99978203069099e-06, "loss": 2.475, "step": 3176 }, { "epoch": 0.17043991416309012, "grad_norm": 0.373046875, "learning_rate": 4.999780881991398e-06, "loss": 2.2721, "step": 3177 }, { "epoch": 0.17049356223175965, "grad_norm": 0.314453125, "learning_rate": 4.999779730273062e-06, "loss": 2.251, "step": 3178 }, { "epoch": 0.1705472103004292, "grad_norm": 0.328125, "learning_rate": 4.999778575535982e-06, "loss": 2.332, "step": 3179 }, { "epoch": 0.17060085836909872, "grad_norm": 0.3125, "learning_rate": 4.99977741778016e-06, "loss": 2.1134, "step": 3180 }, { "epoch": 0.17065450643776825, "grad_norm": 0.349609375, "learning_rate": 4.999776257005599e-06, "loss": 2.3073, "step": 3181 }, { "epoch": 0.17070815450643778, "grad_norm": 0.353515625, "learning_rate": 4.999775093212297e-06, "loss": 2.2242, "step": 3182 }, { "epoch": 0.17076180257510729, "grad_norm": 0.294921875, "learning_rate": 4.999773926400257e-06, "loss": 2.2581, "step": 3183 }, { "epoch": 0.17081545064377682, "grad_norm": 0.357421875, "learning_rate": 4.999772756569482e-06, "loss": 2.3005, "step": 3184 }, { "epoch": 0.17086909871244635, "grad_norm": 0.3984375, "learning_rate": 4.999771583719971e-06, "loss": 2.5681, "step": 3185 }, { "epoch": 0.17092274678111588, "grad_norm": 2.6875, "learning_rate": 4.999770407851727e-06, "loss": 1.5555, "step": 3186 }, { "epoch": 0.1709763948497854, "grad_norm": 0.298828125, "learning_rate": 4.999769228964751e-06, "loss": 2.3159, "step": 3187 }, { "epoch": 0.17103004291845494, "grad_norm": 0.328125, "learning_rate": 4.999768047059044e-06, "loss": 2.5875, "step": 3188 }, { "epoch": 0.17108369098712448, "grad_norm": 0.33984375, "learning_rate": 4.999766862134607e-06, "loss": 1.9357, "step": 3189 }, { "epoch": 0.17113733905579398, "grad_norm": 0.380859375, "learning_rate": 4.999765674191444e-06, "loss": 2.3963, "step": 3190 }, { "epoch": 0.1711909871244635, "grad_norm": 0.3046875, "learning_rate": 4.999764483229553e-06, "loss": 2.3, "step": 3191 }, { "epoch": 0.17124463519313304, "grad_norm": 0.306640625, "learning_rate": 4.999763289248938e-06, "loss": 2.2408, "step": 3192 }, { "epoch": 0.17129828326180258, "grad_norm": 0.39453125, "learning_rate": 4.999762092249598e-06, "loss": 2.1983, "step": 3193 }, { "epoch": 0.1713519313304721, "grad_norm": 0.6953125, "learning_rate": 4.999760892231537e-06, "loss": 2.2777, "step": 3194 }, { "epoch": 0.17140557939914164, "grad_norm": 0.28515625, "learning_rate": 4.9997596891947555e-06, "loss": 2.2594, "step": 3195 }, { "epoch": 0.17145922746781117, "grad_norm": 0.8203125, "learning_rate": 4.999758483139255e-06, "loss": 2.3641, "step": 3196 }, { "epoch": 0.17151287553648067, "grad_norm": 0.380859375, "learning_rate": 4.999757274065037e-06, "loss": 2.1683, "step": 3197 }, { "epoch": 0.1715665236051502, "grad_norm": 0.376953125, "learning_rate": 4.999756061972102e-06, "loss": 2.5835, "step": 3198 }, { "epoch": 0.17162017167381974, "grad_norm": 0.42578125, "learning_rate": 4.999754846860452e-06, "loss": 2.3336, "step": 3199 }, { "epoch": 0.17167381974248927, "grad_norm": 0.380859375, "learning_rate": 4.99975362873009e-06, "loss": 2.1053, "step": 3200 }, { "epoch": 0.1717274678111588, "grad_norm": 0.37109375, "learning_rate": 4.9997524075810144e-06, "loss": 2.0307, "step": 3201 }, { "epoch": 0.17178111587982833, "grad_norm": 0.353515625, "learning_rate": 4.99975118341323e-06, "loss": 2.453, "step": 3202 }, { "epoch": 0.17183476394849787, "grad_norm": 0.369140625, "learning_rate": 4.999749956226736e-06, "loss": 2.2887, "step": 3203 }, { "epoch": 0.17188841201716737, "grad_norm": 0.390625, "learning_rate": 4.999748726021535e-06, "loss": 2.2973, "step": 3204 }, { "epoch": 0.1719420600858369, "grad_norm": 0.32421875, "learning_rate": 4.999747492797627e-06, "loss": 2.4164, "step": 3205 }, { "epoch": 0.17199570815450643, "grad_norm": 0.55078125, "learning_rate": 4.999746256555016e-06, "loss": 2.1882, "step": 3206 }, { "epoch": 0.17204935622317596, "grad_norm": 0.33203125, "learning_rate": 4.999745017293701e-06, "loss": 2.5538, "step": 3207 }, { "epoch": 0.1721030042918455, "grad_norm": 0.9296875, "learning_rate": 4.9997437750136845e-06, "loss": 2.3498, "step": 3208 }, { "epoch": 0.17215665236051503, "grad_norm": 0.357421875, "learning_rate": 4.999742529714968e-06, "loss": 2.3734, "step": 3209 }, { "epoch": 0.17221030042918456, "grad_norm": 0.337890625, "learning_rate": 4.999741281397554e-06, "loss": 2.1609, "step": 3210 }, { "epoch": 0.17226394849785406, "grad_norm": 0.40234375, "learning_rate": 4.9997400300614416e-06, "loss": 2.2774, "step": 3211 }, { "epoch": 0.1723175965665236, "grad_norm": 1.25, "learning_rate": 4.999738775706635e-06, "loss": 2.423, "step": 3212 }, { "epoch": 0.17237124463519313, "grad_norm": 0.310546875, "learning_rate": 4.999737518333134e-06, "loss": 2.2135, "step": 3213 }, { "epoch": 0.17242489270386266, "grad_norm": 0.373046875, "learning_rate": 4.999736257940941e-06, "loss": 2.2934, "step": 3214 }, { "epoch": 0.1724785407725322, "grad_norm": 0.34765625, "learning_rate": 4.999734994530057e-06, "loss": 2.3543, "step": 3215 }, { "epoch": 0.17253218884120172, "grad_norm": 0.34375, "learning_rate": 4.999733728100483e-06, "loss": 2.0632, "step": 3216 }, { "epoch": 0.17258583690987125, "grad_norm": 0.318359375, "learning_rate": 4.9997324586522225e-06, "loss": 2.4013, "step": 3217 }, { "epoch": 0.17263948497854079, "grad_norm": 0.330078125, "learning_rate": 4.999731186185275e-06, "loss": 2.4675, "step": 3218 }, { "epoch": 0.1726931330472103, "grad_norm": 0.34375, "learning_rate": 4.9997299106996425e-06, "loss": 2.2307, "step": 3219 }, { "epoch": 0.17274678111587982, "grad_norm": 0.41015625, "learning_rate": 4.999728632195327e-06, "loss": 2.2844, "step": 3220 }, { "epoch": 0.17280042918454935, "grad_norm": 0.53515625, "learning_rate": 4.99972735067233e-06, "loss": 2.5044, "step": 3221 }, { "epoch": 0.17285407725321889, "grad_norm": 0.28125, "learning_rate": 4.999726066130652e-06, "loss": 1.9436, "step": 3222 }, { "epoch": 0.17290772532188842, "grad_norm": 0.3125, "learning_rate": 4.999724778570296e-06, "loss": 2.2543, "step": 3223 }, { "epoch": 0.17296137339055795, "grad_norm": 0.3671875, "learning_rate": 4.999723487991264e-06, "loss": 2.2868, "step": 3224 }, { "epoch": 0.17301502145922748, "grad_norm": 0.33984375, "learning_rate": 4.999722194393556e-06, "loss": 2.2675, "step": 3225 }, { "epoch": 0.17306866952789698, "grad_norm": 0.400390625, "learning_rate": 4.999720897777173e-06, "loss": 2.4373, "step": 3226 }, { "epoch": 0.17312231759656652, "grad_norm": 0.37890625, "learning_rate": 4.9997195981421185e-06, "loss": 2.1368, "step": 3227 }, { "epoch": 0.17317596566523605, "grad_norm": 0.43359375, "learning_rate": 4.999718295488393e-06, "loss": 2.5211, "step": 3228 }, { "epoch": 0.17322961373390558, "grad_norm": 0.435546875, "learning_rate": 4.999716989815999e-06, "loss": 1.2818, "step": 3229 }, { "epoch": 0.1732832618025751, "grad_norm": 0.3125, "learning_rate": 4.999715681124937e-06, "loss": 2.287, "step": 3230 }, { "epoch": 0.17333690987124464, "grad_norm": 0.283203125, "learning_rate": 4.9997143694152085e-06, "loss": 2.3857, "step": 3231 }, { "epoch": 0.17339055793991417, "grad_norm": 0.337890625, "learning_rate": 4.999713054686816e-06, "loss": 2.4663, "step": 3232 }, { "epoch": 0.17344420600858368, "grad_norm": 0.3515625, "learning_rate": 4.999711736939761e-06, "loss": 2.3371, "step": 3233 }, { "epoch": 0.1734978540772532, "grad_norm": 0.38671875, "learning_rate": 4.999710416174044e-06, "loss": 2.1901, "step": 3234 }, { "epoch": 0.17355150214592274, "grad_norm": 0.3125, "learning_rate": 4.9997090923896675e-06, "loss": 2.308, "step": 3235 }, { "epoch": 0.17360515021459227, "grad_norm": 0.369140625, "learning_rate": 4.999707765586634e-06, "loss": 2.4501, "step": 3236 }, { "epoch": 0.1736587982832618, "grad_norm": 0.421875, "learning_rate": 4.999706435764943e-06, "loss": 2.2521, "step": 3237 }, { "epoch": 0.17371244635193134, "grad_norm": 0.80078125, "learning_rate": 4.999705102924597e-06, "loss": 2.1943, "step": 3238 }, { "epoch": 0.17376609442060087, "grad_norm": 0.5625, "learning_rate": 4.999703767065598e-06, "loss": 2.4079, "step": 3239 }, { "epoch": 0.17381974248927037, "grad_norm": 0.3515625, "learning_rate": 4.999702428187947e-06, "loss": 1.9698, "step": 3240 }, { "epoch": 0.1738733905579399, "grad_norm": 0.3515625, "learning_rate": 4.999701086291646e-06, "loss": 1.9105, "step": 3241 }, { "epoch": 0.17392703862660944, "grad_norm": 0.34375, "learning_rate": 4.999699741376697e-06, "loss": 1.9771, "step": 3242 }, { "epoch": 0.17398068669527897, "grad_norm": 0.41796875, "learning_rate": 4.999698393443102e-06, "loss": 2.4524, "step": 3243 }, { "epoch": 0.1740343347639485, "grad_norm": 0.322265625, "learning_rate": 4.9996970424908605e-06, "loss": 2.4005, "step": 3244 }, { "epoch": 0.17408798283261803, "grad_norm": 0.294921875, "learning_rate": 4.999695688519977e-06, "loss": 2.1979, "step": 3245 }, { "epoch": 0.17414163090128756, "grad_norm": 0.34765625, "learning_rate": 4.999694331530451e-06, "loss": 2.2744, "step": 3246 }, { "epoch": 0.17419527896995707, "grad_norm": 0.400390625, "learning_rate": 4.999692971522283e-06, "loss": 2.3643, "step": 3247 }, { "epoch": 0.1742489270386266, "grad_norm": 0.3203125, "learning_rate": 4.9996916084954785e-06, "loss": 2.5828, "step": 3248 }, { "epoch": 0.17430257510729613, "grad_norm": 0.31640625, "learning_rate": 4.999690242450036e-06, "loss": 2.4592, "step": 3249 }, { "epoch": 0.17435622317596566, "grad_norm": 0.306640625, "learning_rate": 4.9996888733859585e-06, "loss": 2.3639, "step": 3250 }, { "epoch": 0.1744098712446352, "grad_norm": 0.384765625, "learning_rate": 4.999687501303247e-06, "loss": 2.5285, "step": 3251 }, { "epoch": 0.17446351931330473, "grad_norm": 0.357421875, "learning_rate": 4.999686126201904e-06, "loss": 2.3332, "step": 3252 }, { "epoch": 0.17451716738197426, "grad_norm": 1.15625, "learning_rate": 4.999684748081931e-06, "loss": 1.1773, "step": 3253 }, { "epoch": 0.1745708154506438, "grad_norm": 0.3046875, "learning_rate": 4.9996833669433285e-06, "loss": 2.3244, "step": 3254 }, { "epoch": 0.1746244635193133, "grad_norm": 0.34375, "learning_rate": 4.9996819827861e-06, "loss": 2.5577, "step": 3255 }, { "epoch": 0.17467811158798283, "grad_norm": 0.3671875, "learning_rate": 4.9996805956102455e-06, "loss": 2.164, "step": 3256 }, { "epoch": 0.17473175965665236, "grad_norm": 0.447265625, "learning_rate": 4.999679205415767e-06, "loss": 2.2861, "step": 3257 }, { "epoch": 0.1747854077253219, "grad_norm": 0.326171875, "learning_rate": 4.999677812202667e-06, "loss": 2.2747, "step": 3258 }, { "epoch": 0.17483905579399142, "grad_norm": 0.36328125, "learning_rate": 4.9996764159709465e-06, "loss": 2.3115, "step": 3259 }, { "epoch": 0.17489270386266095, "grad_norm": 0.3125, "learning_rate": 4.999675016720608e-06, "loss": 2.3404, "step": 3260 }, { "epoch": 0.17494635193133048, "grad_norm": 0.333984375, "learning_rate": 4.999673614451652e-06, "loss": 2.2792, "step": 3261 }, { "epoch": 0.175, "grad_norm": 0.37109375, "learning_rate": 4.9996722091640805e-06, "loss": 2.2425, "step": 3262 }, { "epoch": 0.17505364806866952, "grad_norm": 0.326171875, "learning_rate": 4.999670800857897e-06, "loss": 2.1457, "step": 3263 }, { "epoch": 0.17510729613733905, "grad_norm": 0.318359375, "learning_rate": 4.999669389533099e-06, "loss": 2.3595, "step": 3264 }, { "epoch": 0.17516094420600858, "grad_norm": 0.38671875, "learning_rate": 4.999667975189693e-06, "loss": 2.4525, "step": 3265 }, { "epoch": 0.17521459227467812, "grad_norm": 0.28515625, "learning_rate": 4.9996665578276785e-06, "loss": 1.885, "step": 3266 }, { "epoch": 0.17526824034334765, "grad_norm": 0.400390625, "learning_rate": 4.999665137447056e-06, "loss": 2.3516, "step": 3267 }, { "epoch": 0.17532188841201718, "grad_norm": 0.3046875, "learning_rate": 4.99966371404783e-06, "loss": 2.2103, "step": 3268 }, { "epoch": 0.17537553648068668, "grad_norm": 0.333984375, "learning_rate": 4.999662287629999e-06, "loss": 2.3393, "step": 3269 }, { "epoch": 0.17542918454935622, "grad_norm": 0.333984375, "learning_rate": 4.999660858193568e-06, "loss": 2.2829, "step": 3270 }, { "epoch": 0.17548283261802575, "grad_norm": 0.365234375, "learning_rate": 4.999659425738537e-06, "loss": 2.4836, "step": 3271 }, { "epoch": 0.17553648068669528, "grad_norm": 0.298828125, "learning_rate": 4.999657990264908e-06, "loss": 2.3352, "step": 3272 }, { "epoch": 0.1755901287553648, "grad_norm": 0.625, "learning_rate": 4.9996565517726815e-06, "loss": 2.1433, "step": 3273 }, { "epoch": 0.17564377682403434, "grad_norm": 0.322265625, "learning_rate": 4.999655110261862e-06, "loss": 2.2799, "step": 3274 }, { "epoch": 0.17569742489270387, "grad_norm": 0.34375, "learning_rate": 4.999653665732448e-06, "loss": 1.9374, "step": 3275 }, { "epoch": 0.17575107296137338, "grad_norm": 0.275390625, "learning_rate": 4.999652218184444e-06, "loss": 2.4497, "step": 3276 }, { "epoch": 0.1758047210300429, "grad_norm": 0.625, "learning_rate": 4.99965076761785e-06, "loss": 2.3395, "step": 3277 }, { "epoch": 0.17585836909871244, "grad_norm": 0.349609375, "learning_rate": 4.999649314032669e-06, "loss": 2.1741, "step": 3278 }, { "epoch": 0.17591201716738197, "grad_norm": 0.484375, "learning_rate": 4.999647857428901e-06, "loss": 2.2208, "step": 3279 }, { "epoch": 0.1759656652360515, "grad_norm": 0.32421875, "learning_rate": 4.99964639780655e-06, "loss": 2.2799, "step": 3280 }, { "epoch": 0.17601931330472104, "grad_norm": 0.291015625, "learning_rate": 4.9996449351656165e-06, "loss": 1.9881, "step": 3281 }, { "epoch": 0.17607296137339057, "grad_norm": 0.455078125, "learning_rate": 4.999643469506102e-06, "loss": 2.3862, "step": 3282 }, { "epoch": 0.17612660944206007, "grad_norm": 0.3203125, "learning_rate": 4.999642000828008e-06, "loss": 2.1831, "step": 3283 }, { "epoch": 0.1761802575107296, "grad_norm": 0.333984375, "learning_rate": 4.999640529131338e-06, "loss": 2.6248, "step": 3284 }, { "epoch": 0.17623390557939914, "grad_norm": 0.447265625, "learning_rate": 4.999639054416093e-06, "loss": 2.4783, "step": 3285 }, { "epoch": 0.17628755364806867, "grad_norm": 0.345703125, "learning_rate": 4.9996375766822735e-06, "loss": 2.3509, "step": 3286 }, { "epoch": 0.1763412017167382, "grad_norm": 0.408203125, "learning_rate": 4.999636095929883e-06, "loss": 2.5601, "step": 3287 }, { "epoch": 0.17639484978540773, "grad_norm": 0.326171875, "learning_rate": 4.999634612158922e-06, "loss": 2.369, "step": 3288 }, { "epoch": 0.17644849785407726, "grad_norm": 0.34765625, "learning_rate": 4.999633125369394e-06, "loss": 2.3526, "step": 3289 }, { "epoch": 0.1765021459227468, "grad_norm": 0.42578125, "learning_rate": 4.999631635561298e-06, "loss": 2.2429, "step": 3290 }, { "epoch": 0.1765557939914163, "grad_norm": 0.314453125, "learning_rate": 4.999630142734639e-06, "loss": 2.2739, "step": 3291 }, { "epoch": 0.17660944206008583, "grad_norm": 0.3125, "learning_rate": 4.999628646889416e-06, "loss": 1.9876, "step": 3292 }, { "epoch": 0.17666309012875536, "grad_norm": 0.359375, "learning_rate": 4.999627148025633e-06, "loss": 2.634, "step": 3293 }, { "epoch": 0.1767167381974249, "grad_norm": 0.326171875, "learning_rate": 4.999625646143291e-06, "loss": 2.45, "step": 3294 }, { "epoch": 0.17677038626609443, "grad_norm": 0.28515625, "learning_rate": 4.999624141242391e-06, "loss": 2.188, "step": 3295 }, { "epoch": 0.17682403433476396, "grad_norm": 0.3828125, "learning_rate": 4.999622633322936e-06, "loss": 2.1063, "step": 3296 }, { "epoch": 0.1768776824034335, "grad_norm": 0.51171875, "learning_rate": 4.999621122384927e-06, "loss": 1.9712, "step": 3297 }, { "epoch": 0.176931330472103, "grad_norm": 0.44140625, "learning_rate": 4.999619608428367e-06, "loss": 2.2042, "step": 3298 }, { "epoch": 0.17698497854077253, "grad_norm": 5.28125, "learning_rate": 4.999618091453256e-06, "loss": 2.1749, "step": 3299 }, { "epoch": 0.17703862660944206, "grad_norm": 0.345703125, "learning_rate": 4.999616571459597e-06, "loss": 2.142, "step": 3300 }, { "epoch": 0.1770922746781116, "grad_norm": 0.32421875, "learning_rate": 4.999615048447392e-06, "loss": 2.2357, "step": 3301 }, { "epoch": 0.17714592274678112, "grad_norm": 0.3125, "learning_rate": 4.999613522416642e-06, "loss": 2.3569, "step": 3302 }, { "epoch": 0.17719957081545065, "grad_norm": 1.2421875, "learning_rate": 4.999611993367351e-06, "loss": 2.3319, "step": 3303 }, { "epoch": 0.17725321888412018, "grad_norm": 0.376953125, "learning_rate": 4.999610461299517e-06, "loss": 2.3566, "step": 3304 }, { "epoch": 0.1773068669527897, "grad_norm": 0.37109375, "learning_rate": 4.999608926213145e-06, "loss": 2.4683, "step": 3305 }, { "epoch": 0.17736051502145922, "grad_norm": 0.326171875, "learning_rate": 4.999607388108236e-06, "loss": 1.5518, "step": 3306 }, { "epoch": 0.17741416309012875, "grad_norm": 0.34765625, "learning_rate": 4.999605846984791e-06, "loss": 2.3326, "step": 3307 }, { "epoch": 0.17746781115879828, "grad_norm": 0.34765625, "learning_rate": 4.999604302842813e-06, "loss": 2.4353, "step": 3308 }, { "epoch": 0.17752145922746781, "grad_norm": 0.7265625, "learning_rate": 4.999602755682304e-06, "loss": 2.0704, "step": 3309 }, { "epoch": 0.17757510729613735, "grad_norm": 0.390625, "learning_rate": 4.999601205503265e-06, "loss": 2.64, "step": 3310 }, { "epoch": 0.17762875536480688, "grad_norm": 0.375, "learning_rate": 4.999599652305698e-06, "loss": 2.433, "step": 3311 }, { "epoch": 0.17768240343347638, "grad_norm": 0.408203125, "learning_rate": 4.999598096089605e-06, "loss": 2.0271, "step": 3312 }, { "epoch": 0.17773605150214591, "grad_norm": 0.55078125, "learning_rate": 4.999596536854989e-06, "loss": 2.2744, "step": 3313 }, { "epoch": 0.17778969957081545, "grad_norm": 0.330078125, "learning_rate": 4.99959497460185e-06, "loss": 2.4125, "step": 3314 }, { "epoch": 0.17784334763948498, "grad_norm": 0.37109375, "learning_rate": 4.999593409330191e-06, "loss": 2.4591, "step": 3315 }, { "epoch": 0.1778969957081545, "grad_norm": 0.353515625, "learning_rate": 4.999591841040014e-06, "loss": 2.4206, "step": 3316 }, { "epoch": 0.17795064377682404, "grad_norm": 0.498046875, "learning_rate": 4.9995902697313195e-06, "loss": 2.4566, "step": 3317 }, { "epoch": 0.17800429184549357, "grad_norm": 0.326171875, "learning_rate": 4.999588695404111e-06, "loss": 2.4033, "step": 3318 }, { "epoch": 0.17805793991416308, "grad_norm": 0.34375, "learning_rate": 4.999587118058389e-06, "loss": 2.4679, "step": 3319 }, { "epoch": 0.1781115879828326, "grad_norm": 0.314453125, "learning_rate": 4.999585537694157e-06, "loss": 2.208, "step": 3320 }, { "epoch": 0.17816523605150214, "grad_norm": 0.388671875, "learning_rate": 4.999583954311416e-06, "loss": 1.8692, "step": 3321 }, { "epoch": 0.17821888412017167, "grad_norm": 0.30859375, "learning_rate": 4.999582367910169e-06, "loss": 2.3098, "step": 3322 }, { "epoch": 0.1782725321888412, "grad_norm": 0.33203125, "learning_rate": 4.999580778490416e-06, "loss": 2.6037, "step": 3323 }, { "epoch": 0.17832618025751074, "grad_norm": 0.345703125, "learning_rate": 4.99957918605216e-06, "loss": 2.267, "step": 3324 }, { "epoch": 0.17837982832618027, "grad_norm": 0.439453125, "learning_rate": 4.999577590595402e-06, "loss": 2.213, "step": 3325 }, { "epoch": 0.1784334763948498, "grad_norm": 0.3828125, "learning_rate": 4.999575992120146e-06, "loss": 2.3533, "step": 3326 }, { "epoch": 0.1784871244635193, "grad_norm": 0.390625, "learning_rate": 4.999574390626393e-06, "loss": 2.2569, "step": 3327 }, { "epoch": 0.17854077253218884, "grad_norm": 0.396484375, "learning_rate": 4.999572786114143e-06, "loss": 2.3025, "step": 3328 }, { "epoch": 0.17859442060085837, "grad_norm": 0.5, "learning_rate": 4.999571178583401e-06, "loss": 2.3264, "step": 3329 }, { "epoch": 0.1786480686695279, "grad_norm": 0.33203125, "learning_rate": 4.999569568034167e-06, "loss": 2.2307, "step": 3330 }, { "epoch": 0.17870171673819743, "grad_norm": 0.390625, "learning_rate": 4.999567954466443e-06, "loss": 2.4987, "step": 3331 }, { "epoch": 0.17875536480686696, "grad_norm": 0.462890625, "learning_rate": 4.999566337880232e-06, "loss": 2.4274, "step": 3332 }, { "epoch": 0.1788090128755365, "grad_norm": 0.4140625, "learning_rate": 4.999564718275534e-06, "loss": 2.5596, "step": 3333 }, { "epoch": 0.178862660944206, "grad_norm": 0.421875, "learning_rate": 4.9995630956523535e-06, "loss": 2.3608, "step": 3334 }, { "epoch": 0.17891630901287553, "grad_norm": 0.3828125, "learning_rate": 4.999561470010691e-06, "loss": 2.3548, "step": 3335 }, { "epoch": 0.17896995708154506, "grad_norm": 0.494140625, "learning_rate": 4.999559841350549e-06, "loss": 2.2794, "step": 3336 }, { "epoch": 0.1790236051502146, "grad_norm": 0.345703125, "learning_rate": 4.999558209671928e-06, "loss": 2.6123, "step": 3337 }, { "epoch": 0.17907725321888412, "grad_norm": 0.56640625, "learning_rate": 4.999556574974833e-06, "loss": 2.3173, "step": 3338 }, { "epoch": 0.17913090128755366, "grad_norm": 0.3515625, "learning_rate": 4.999554937259263e-06, "loss": 2.1194, "step": 3339 }, { "epoch": 0.1791845493562232, "grad_norm": 0.361328125, "learning_rate": 4.999553296525222e-06, "loss": 2.3731, "step": 3340 }, { "epoch": 0.1792381974248927, "grad_norm": 0.3828125, "learning_rate": 4.999551652772709e-06, "loss": 2.4907, "step": 3341 }, { "epoch": 0.17929184549356222, "grad_norm": 0.380859375, "learning_rate": 4.99955000600173e-06, "loss": 2.4025, "step": 3342 }, { "epoch": 0.17934549356223176, "grad_norm": 0.455078125, "learning_rate": 4.999548356212284e-06, "loss": 2.2577, "step": 3343 }, { "epoch": 0.1793991416309013, "grad_norm": 0.37890625, "learning_rate": 4.999546703404374e-06, "loss": 2.3747, "step": 3344 }, { "epoch": 0.17945278969957082, "grad_norm": 0.373046875, "learning_rate": 4.999545047578002e-06, "loss": 2.5943, "step": 3345 }, { "epoch": 0.17950643776824035, "grad_norm": 0.361328125, "learning_rate": 4.999543388733171e-06, "loss": 2.3229, "step": 3346 }, { "epoch": 0.17956008583690988, "grad_norm": 0.32421875, "learning_rate": 4.999541726869882e-06, "loss": 2.2618, "step": 3347 }, { "epoch": 0.1796137339055794, "grad_norm": 0.392578125, "learning_rate": 4.9995400619881365e-06, "loss": 2.1445, "step": 3348 }, { "epoch": 0.17966738197424892, "grad_norm": 0.466796875, "learning_rate": 4.9995383940879376e-06, "loss": 2.1275, "step": 3349 }, { "epoch": 0.17972103004291845, "grad_norm": 0.333984375, "learning_rate": 4.999536723169286e-06, "loss": 2.3194, "step": 3350 }, { "epoch": 0.17977467811158798, "grad_norm": 0.392578125, "learning_rate": 4.999535049232185e-06, "loss": 2.3622, "step": 3351 }, { "epoch": 0.17982832618025751, "grad_norm": 0.31640625, "learning_rate": 4.9995333722766355e-06, "loss": 2.2805, "step": 3352 }, { "epoch": 0.17988197424892705, "grad_norm": 0.37890625, "learning_rate": 4.99953169230264e-06, "loss": 2.4102, "step": 3353 }, { "epoch": 0.17993562231759658, "grad_norm": 0.345703125, "learning_rate": 4.999530009310201e-06, "loss": 2.6786, "step": 3354 }, { "epoch": 0.17998927038626608, "grad_norm": 0.63671875, "learning_rate": 4.999528323299321e-06, "loss": 2.0473, "step": 3355 }, { "epoch": 0.1800429184549356, "grad_norm": 0.3046875, "learning_rate": 4.99952663427e-06, "loss": 2.1816, "step": 3356 }, { "epoch": 0.18009656652360514, "grad_norm": 0.3828125, "learning_rate": 4.999524942222242e-06, "loss": 2.2232, "step": 3357 }, { "epoch": 0.18015021459227468, "grad_norm": 0.3125, "learning_rate": 4.999523247156048e-06, "loss": 2.0789, "step": 3358 }, { "epoch": 0.1802038626609442, "grad_norm": 1.0, "learning_rate": 4.999521549071421e-06, "loss": 2.4047, "step": 3359 }, { "epoch": 0.18025751072961374, "grad_norm": 0.375, "learning_rate": 4.999519847968362e-06, "loss": 2.4037, "step": 3360 }, { "epoch": 0.18031115879828327, "grad_norm": 0.34375, "learning_rate": 4.999518143846873e-06, "loss": 2.2792, "step": 3361 }, { "epoch": 0.18036480686695278, "grad_norm": 0.34765625, "learning_rate": 4.9995164367069575e-06, "loss": 2.304, "step": 3362 }, { "epoch": 0.1804184549356223, "grad_norm": 0.349609375, "learning_rate": 4.999514726548615e-06, "loss": 2.2679, "step": 3363 }, { "epoch": 0.18047210300429184, "grad_norm": 0.357421875, "learning_rate": 4.999513013371851e-06, "loss": 2.4485, "step": 3364 }, { "epoch": 0.18052575107296137, "grad_norm": 0.359375, "learning_rate": 4.999511297176665e-06, "loss": 2.1135, "step": 3365 }, { "epoch": 0.1805793991416309, "grad_norm": 0.412109375, "learning_rate": 4.99950957796306e-06, "loss": 2.4306, "step": 3366 }, { "epoch": 0.18063304721030043, "grad_norm": 0.322265625, "learning_rate": 4.999507855731037e-06, "loss": 2.2778, "step": 3367 }, { "epoch": 0.18068669527896997, "grad_norm": 0.3203125, "learning_rate": 4.999506130480599e-06, "loss": 2.292, "step": 3368 }, { "epoch": 0.1807403433476395, "grad_norm": 0.38671875, "learning_rate": 4.999504402211749e-06, "loss": 2.3065, "step": 3369 }, { "epoch": 0.180793991416309, "grad_norm": 0.439453125, "learning_rate": 4.999502670924487e-06, "loss": 2.4243, "step": 3370 }, { "epoch": 0.18084763948497853, "grad_norm": 0.44921875, "learning_rate": 4.999500936618817e-06, "loss": 2.2116, "step": 3371 }, { "epoch": 0.18090128755364807, "grad_norm": 0.369140625, "learning_rate": 4.999499199294741e-06, "loss": 2.5735, "step": 3372 }, { "epoch": 0.1809549356223176, "grad_norm": 0.40625, "learning_rate": 4.999497458952259e-06, "loss": 2.3608, "step": 3373 }, { "epoch": 0.18100858369098713, "grad_norm": 0.5703125, "learning_rate": 4.999495715591375e-06, "loss": 2.2326, "step": 3374 }, { "epoch": 0.18106223175965666, "grad_norm": 0.283203125, "learning_rate": 4.999493969212091e-06, "loss": 2.0893, "step": 3375 }, { "epoch": 0.1811158798283262, "grad_norm": 1.3203125, "learning_rate": 4.999492219814408e-06, "loss": 2.482, "step": 3376 }, { "epoch": 0.1811695278969957, "grad_norm": 0.328125, "learning_rate": 4.9994904673983295e-06, "loss": 2.4992, "step": 3377 }, { "epoch": 0.18122317596566523, "grad_norm": 0.341796875, "learning_rate": 4.999488711963857e-06, "loss": 2.372, "step": 3378 }, { "epoch": 0.18127682403433476, "grad_norm": 0.341796875, "learning_rate": 4.999486953510991e-06, "loss": 2.4141, "step": 3379 }, { "epoch": 0.1813304721030043, "grad_norm": 0.447265625, "learning_rate": 4.999485192039737e-06, "loss": 2.26, "step": 3380 }, { "epoch": 0.18138412017167382, "grad_norm": 0.2890625, "learning_rate": 4.999483427550095e-06, "loss": 2.238, "step": 3381 }, { "epoch": 0.18143776824034336, "grad_norm": 0.5625, "learning_rate": 4.999481660042067e-06, "loss": 1.9186, "step": 3382 }, { "epoch": 0.1814914163090129, "grad_norm": 0.451171875, "learning_rate": 4.999479889515656e-06, "loss": 2.2742, "step": 3383 }, { "epoch": 0.1815450643776824, "grad_norm": 0.349609375, "learning_rate": 4.999478115970863e-06, "loss": 2.2801, "step": 3384 }, { "epoch": 0.18159871244635192, "grad_norm": 0.359375, "learning_rate": 4.999476339407691e-06, "loss": 2.3801, "step": 3385 }, { "epoch": 0.18165236051502145, "grad_norm": 0.3203125, "learning_rate": 4.999474559826143e-06, "loss": 2.1989, "step": 3386 }, { "epoch": 0.181706008583691, "grad_norm": 0.421875, "learning_rate": 4.999472777226219e-06, "loss": 2.1665, "step": 3387 }, { "epoch": 0.18175965665236052, "grad_norm": 0.7890625, "learning_rate": 4.999470991607923e-06, "loss": 2.3768, "step": 3388 }, { "epoch": 0.18181330472103005, "grad_norm": 0.390625, "learning_rate": 4.999469202971256e-06, "loss": 2.4013, "step": 3389 }, { "epoch": 0.18186695278969958, "grad_norm": 0.328125, "learning_rate": 4.999467411316221e-06, "loss": 2.3772, "step": 3390 }, { "epoch": 0.18192060085836909, "grad_norm": 0.51171875, "learning_rate": 4.999465616642819e-06, "loss": 1.7803, "step": 3391 }, { "epoch": 0.18197424892703862, "grad_norm": 0.431640625, "learning_rate": 4.9994638189510545e-06, "loss": 2.4562, "step": 3392 }, { "epoch": 0.18202789699570815, "grad_norm": 0.349609375, "learning_rate": 4.999462018240927e-06, "loss": 2.2297, "step": 3393 }, { "epoch": 0.18208154506437768, "grad_norm": 0.3359375, "learning_rate": 4.999460214512441e-06, "loss": 2.4821, "step": 3394 }, { "epoch": 0.1821351931330472, "grad_norm": 0.369140625, "learning_rate": 4.9994584077655955e-06, "loss": 1.8082, "step": 3395 }, { "epoch": 0.18218884120171674, "grad_norm": 0.302734375, "learning_rate": 4.999456598000396e-06, "loss": 2.3186, "step": 3396 }, { "epoch": 0.18224248927038628, "grad_norm": 0.369140625, "learning_rate": 4.999454785216843e-06, "loss": 2.5002, "step": 3397 }, { "epoch": 0.18229613733905578, "grad_norm": 0.365234375, "learning_rate": 4.999452969414939e-06, "loss": 2.3104, "step": 3398 }, { "epoch": 0.1823497854077253, "grad_norm": 0.34765625, "learning_rate": 4.999451150594686e-06, "loss": 2.0113, "step": 3399 }, { "epoch": 0.18240343347639484, "grad_norm": 0.30078125, "learning_rate": 4.999449328756086e-06, "loss": 2.3823, "step": 3400 }, { "epoch": 0.18245708154506438, "grad_norm": 0.283203125, "learning_rate": 4.999447503899143e-06, "loss": 2.1941, "step": 3401 }, { "epoch": 0.1825107296137339, "grad_norm": 0.357421875, "learning_rate": 4.9994456760238576e-06, "loss": 2.1923, "step": 3402 }, { "epoch": 0.18256437768240344, "grad_norm": 0.34765625, "learning_rate": 4.999443845130231e-06, "loss": 2.4688, "step": 3403 }, { "epoch": 0.18261802575107297, "grad_norm": 0.326171875, "learning_rate": 4.999442011218268e-06, "loss": 2.2298, "step": 3404 }, { "epoch": 0.1826716738197425, "grad_norm": 0.349609375, "learning_rate": 4.9994401742879685e-06, "loss": 2.2121, "step": 3405 }, { "epoch": 0.182725321888412, "grad_norm": 0.453125, "learning_rate": 4.999438334339336e-06, "loss": 2.335, "step": 3406 }, { "epoch": 0.18277896995708154, "grad_norm": 0.345703125, "learning_rate": 4.999436491372373e-06, "loss": 2.3449, "step": 3407 }, { "epoch": 0.18283261802575107, "grad_norm": 0.373046875, "learning_rate": 4.99943464538708e-06, "loss": 2.6075, "step": 3408 }, { "epoch": 0.1828862660944206, "grad_norm": 0.51171875, "learning_rate": 4.999432796383461e-06, "loss": 2.445, "step": 3409 }, { "epoch": 0.18293991416309013, "grad_norm": 0.296875, "learning_rate": 4.9994309443615174e-06, "loss": 2.3144, "step": 3410 }, { "epoch": 0.18299356223175967, "grad_norm": 0.42578125, "learning_rate": 4.999429089321252e-06, "loss": 2.3242, "step": 3411 }, { "epoch": 0.1830472103004292, "grad_norm": 0.345703125, "learning_rate": 4.999427231262667e-06, "loss": 2.3671, "step": 3412 }, { "epoch": 0.1831008583690987, "grad_norm": 0.3359375, "learning_rate": 4.999425370185763e-06, "loss": 2.2541, "step": 3413 }, { "epoch": 0.18315450643776823, "grad_norm": 0.3046875, "learning_rate": 4.999423506090545e-06, "loss": 2.2874, "step": 3414 }, { "epoch": 0.18320815450643776, "grad_norm": 0.400390625, "learning_rate": 4.9994216389770126e-06, "loss": 2.1851, "step": 3415 }, { "epoch": 0.1832618025751073, "grad_norm": 0.37109375, "learning_rate": 4.99941976884517e-06, "loss": 2.5008, "step": 3416 }, { "epoch": 0.18331545064377683, "grad_norm": 0.318359375, "learning_rate": 4.999417895695019e-06, "loss": 2.379, "step": 3417 }, { "epoch": 0.18336909871244636, "grad_norm": 0.3671875, "learning_rate": 4.99941601952656e-06, "loss": 2.3804, "step": 3418 }, { "epoch": 0.1834227467811159, "grad_norm": 3.15625, "learning_rate": 4.999414140339798e-06, "loss": 2.4586, "step": 3419 }, { "epoch": 0.1834763948497854, "grad_norm": 0.66796875, "learning_rate": 4.999412258134734e-06, "loss": 2.3534, "step": 3420 }, { "epoch": 0.18353004291845493, "grad_norm": 0.376953125, "learning_rate": 4.999410372911371e-06, "loss": 2.2021, "step": 3421 }, { "epoch": 0.18358369098712446, "grad_norm": 0.361328125, "learning_rate": 4.99940848466971e-06, "loss": 2.3689, "step": 3422 }, { "epoch": 0.183637339055794, "grad_norm": 0.375, "learning_rate": 4.9994065934097535e-06, "loss": 2.2204, "step": 3423 }, { "epoch": 0.18369098712446352, "grad_norm": 0.341796875, "learning_rate": 4.9994046991315046e-06, "loss": 2.3684, "step": 3424 }, { "epoch": 0.18374463519313305, "grad_norm": 0.3125, "learning_rate": 4.999402801834966e-06, "loss": 2.2298, "step": 3425 }, { "epoch": 0.1837982832618026, "grad_norm": 0.4296875, "learning_rate": 4.999400901520138e-06, "loss": 2.257, "step": 3426 }, { "epoch": 0.1838519313304721, "grad_norm": 0.34375, "learning_rate": 4.999398998187025e-06, "loss": 2.3133, "step": 3427 }, { "epoch": 0.18390557939914162, "grad_norm": 0.703125, "learning_rate": 4.999397091835628e-06, "loss": 2.4289, "step": 3428 }, { "epoch": 0.18395922746781115, "grad_norm": 0.31640625, "learning_rate": 4.999395182465951e-06, "loss": 2.188, "step": 3429 }, { "epoch": 0.18401287553648069, "grad_norm": 0.322265625, "learning_rate": 4.9993932700779926e-06, "loss": 2.3185, "step": 3430 }, { "epoch": 0.18406652360515022, "grad_norm": 0.4921875, "learning_rate": 4.999391354671759e-06, "loss": 2.3274, "step": 3431 }, { "epoch": 0.18412017167381975, "grad_norm": 0.4296875, "learning_rate": 4.999389436247251e-06, "loss": 2.2183, "step": 3432 }, { "epoch": 0.18417381974248928, "grad_norm": 0.31640625, "learning_rate": 4.9993875148044706e-06, "loss": 2.2863, "step": 3433 }, { "epoch": 0.18422746781115878, "grad_norm": 0.310546875, "learning_rate": 4.999385590343421e-06, "loss": 1.8773, "step": 3434 }, { "epoch": 0.18428111587982832, "grad_norm": 0.3828125, "learning_rate": 4.999383662864103e-06, "loss": 2.3493, "step": 3435 }, { "epoch": 0.18433476394849785, "grad_norm": 0.96484375, "learning_rate": 4.999381732366521e-06, "loss": 2.2495, "step": 3436 }, { "epoch": 0.18438841201716738, "grad_norm": 0.34765625, "learning_rate": 4.999379798850676e-06, "loss": 2.3449, "step": 3437 }, { "epoch": 0.1844420600858369, "grad_norm": 0.80859375, "learning_rate": 4.99937786231657e-06, "loss": 2.3799, "step": 3438 }, { "epoch": 0.18449570815450644, "grad_norm": 0.3359375, "learning_rate": 4.9993759227642055e-06, "loss": 2.461, "step": 3439 }, { "epoch": 0.18454935622317598, "grad_norm": 0.44140625, "learning_rate": 4.999373980193587e-06, "loss": 2.6467, "step": 3440 }, { "epoch": 0.1846030042918455, "grad_norm": 0.314453125, "learning_rate": 4.999372034604714e-06, "loss": 2.3603, "step": 3441 }, { "epoch": 0.184656652360515, "grad_norm": 0.341796875, "learning_rate": 4.99937008599759e-06, "loss": 2.2857, "step": 3442 }, { "epoch": 0.18471030042918454, "grad_norm": 0.43359375, "learning_rate": 4.999368134372217e-06, "loss": 2.4246, "step": 3443 }, { "epoch": 0.18476394849785407, "grad_norm": 0.302734375, "learning_rate": 4.9993661797285984e-06, "loss": 2.2866, "step": 3444 }, { "epoch": 0.1848175965665236, "grad_norm": 0.314453125, "learning_rate": 4.999364222066736e-06, "loss": 2.0998, "step": 3445 }, { "epoch": 0.18487124463519314, "grad_norm": 0.369140625, "learning_rate": 4.999362261386631e-06, "loss": 2.3095, "step": 3446 }, { "epoch": 0.18492489270386267, "grad_norm": 0.359375, "learning_rate": 4.999360297688287e-06, "loss": 2.2902, "step": 3447 }, { "epoch": 0.1849785407725322, "grad_norm": 0.388671875, "learning_rate": 4.999358330971707e-06, "loss": 2.2763, "step": 3448 }, { "epoch": 0.1850321888412017, "grad_norm": 0.34375, "learning_rate": 4.999356361236893e-06, "loss": 2.2048, "step": 3449 }, { "epoch": 0.18508583690987124, "grad_norm": 0.375, "learning_rate": 4.999354388483845e-06, "loss": 1.6719, "step": 3450 }, { "epoch": 0.18513948497854077, "grad_norm": 0.287109375, "learning_rate": 4.999352412712568e-06, "loss": 2.1735, "step": 3451 }, { "epoch": 0.1851931330472103, "grad_norm": 0.3515625, "learning_rate": 4.9993504339230635e-06, "loss": 2.3448, "step": 3452 }, { "epoch": 0.18524678111587983, "grad_norm": 0.404296875, "learning_rate": 4.999348452115334e-06, "loss": 1.4639, "step": 3453 }, { "epoch": 0.18530042918454936, "grad_norm": 0.365234375, "learning_rate": 4.9993464672893836e-06, "loss": 2.2797, "step": 3454 }, { "epoch": 0.1853540772532189, "grad_norm": 0.5, "learning_rate": 4.9993444794452105e-06, "loss": 2.0837, "step": 3455 }, { "epoch": 0.1854077253218884, "grad_norm": 0.3515625, "learning_rate": 4.999342488582821e-06, "loss": 2.3752, "step": 3456 }, { "epoch": 0.18546137339055793, "grad_norm": 0.33203125, "learning_rate": 4.999340494702216e-06, "loss": 2.2828, "step": 3457 }, { "epoch": 0.18551502145922746, "grad_norm": 0.3359375, "learning_rate": 4.999338497803399e-06, "loss": 2.5626, "step": 3458 }, { "epoch": 0.185568669527897, "grad_norm": 0.388671875, "learning_rate": 4.99933649788637e-06, "loss": 2.6549, "step": 3459 }, { "epoch": 0.18562231759656653, "grad_norm": 0.408203125, "learning_rate": 4.999334494951134e-06, "loss": 2.5398, "step": 3460 }, { "epoch": 0.18567596566523606, "grad_norm": 0.28125, "learning_rate": 4.999332488997691e-06, "loss": 2.1847, "step": 3461 }, { "epoch": 0.1857296137339056, "grad_norm": 0.3984375, "learning_rate": 4.999330480026046e-06, "loss": 2.416, "step": 3462 }, { "epoch": 0.1857832618025751, "grad_norm": 0.26171875, "learning_rate": 4.999328468036199e-06, "loss": 1.9335, "step": 3463 }, { "epoch": 0.18583690987124463, "grad_norm": 0.52734375, "learning_rate": 4.9993264530281535e-06, "loss": 2.2439, "step": 3464 }, { "epoch": 0.18589055793991416, "grad_norm": 0.34375, "learning_rate": 4.999324435001913e-06, "loss": 2.2019, "step": 3465 }, { "epoch": 0.1859442060085837, "grad_norm": 0.40234375, "learning_rate": 4.999322413957479e-06, "loss": 2.3207, "step": 3466 }, { "epoch": 0.18599785407725322, "grad_norm": 0.255859375, "learning_rate": 4.999320389894854e-06, "loss": 2.0921, "step": 3467 }, { "epoch": 0.18605150214592275, "grad_norm": 0.34765625, "learning_rate": 4.999318362814039e-06, "loss": 2.4678, "step": 3468 }, { "epoch": 0.18610515021459229, "grad_norm": 0.33203125, "learning_rate": 4.999316332715038e-06, "loss": 2.2175, "step": 3469 }, { "epoch": 0.1861587982832618, "grad_norm": 1.0234375, "learning_rate": 4.999314299597855e-06, "loss": 2.3363, "step": 3470 }, { "epoch": 0.18621244635193132, "grad_norm": 0.330078125, "learning_rate": 4.999312263462489e-06, "loss": 2.3586, "step": 3471 }, { "epoch": 0.18626609442060085, "grad_norm": 0.33203125, "learning_rate": 4.999310224308945e-06, "loss": 2.2399, "step": 3472 }, { "epoch": 0.18631974248927038, "grad_norm": 0.455078125, "learning_rate": 4.999308182137224e-06, "loss": 1.4328, "step": 3473 }, { "epoch": 0.18637339055793992, "grad_norm": 0.369140625, "learning_rate": 4.999306136947329e-06, "loss": 2.3049, "step": 3474 }, { "epoch": 0.18642703862660945, "grad_norm": 0.35546875, "learning_rate": 4.999304088739263e-06, "loss": 2.3299, "step": 3475 }, { "epoch": 0.18648068669527898, "grad_norm": 0.359375, "learning_rate": 4.999302037513029e-06, "loss": 2.5396, "step": 3476 }, { "epoch": 0.1865343347639485, "grad_norm": 0.359375, "learning_rate": 4.9992999832686265e-06, "loss": 2.4046, "step": 3477 }, { "epoch": 0.18658798283261802, "grad_norm": 0.31640625, "learning_rate": 4.9992979260060606e-06, "loss": 2.1985, "step": 3478 }, { "epoch": 0.18664163090128755, "grad_norm": 0.361328125, "learning_rate": 4.999295865725334e-06, "loss": 2.1355, "step": 3479 }, { "epoch": 0.18669527896995708, "grad_norm": 0.365234375, "learning_rate": 4.999293802426448e-06, "loss": 2.3734, "step": 3480 }, { "epoch": 0.1867489270386266, "grad_norm": 0.314453125, "learning_rate": 4.999291736109405e-06, "loss": 2.2939, "step": 3481 }, { "epoch": 0.18680257510729614, "grad_norm": 0.369140625, "learning_rate": 4.999289666774208e-06, "loss": 2.2704, "step": 3482 }, { "epoch": 0.18685622317596567, "grad_norm": 0.291015625, "learning_rate": 4.99928759442086e-06, "loss": 2.2917, "step": 3483 }, { "epoch": 0.1869098712446352, "grad_norm": 0.3359375, "learning_rate": 4.999285519049362e-06, "loss": 2.5451, "step": 3484 }, { "epoch": 0.1869635193133047, "grad_norm": 0.5078125, "learning_rate": 4.999283440659718e-06, "loss": 2.3403, "step": 3485 }, { "epoch": 0.18701716738197424, "grad_norm": 0.330078125, "learning_rate": 4.999281359251929e-06, "loss": 2.3792, "step": 3486 }, { "epoch": 0.18707081545064377, "grad_norm": 0.4140625, "learning_rate": 4.999279274826e-06, "loss": 2.5092, "step": 3487 }, { "epoch": 0.1871244635193133, "grad_norm": 0.365234375, "learning_rate": 4.9992771873819315e-06, "loss": 2.1716, "step": 3488 }, { "epoch": 0.18717811158798284, "grad_norm": 0.33984375, "learning_rate": 4.999275096919726e-06, "loss": 2.3364, "step": 3489 }, { "epoch": 0.18723175965665237, "grad_norm": 1.0625, "learning_rate": 4.9992730034393875e-06, "loss": 2.2777, "step": 3490 }, { "epoch": 0.1872854077253219, "grad_norm": 0.44921875, "learning_rate": 4.999270906940916e-06, "loss": 2.2686, "step": 3491 }, { "epoch": 0.1873390557939914, "grad_norm": 0.423828125, "learning_rate": 4.999268807424317e-06, "loss": 1.5979, "step": 3492 }, { "epoch": 0.18739270386266094, "grad_norm": 0.62890625, "learning_rate": 4.999266704889591e-06, "loss": 2.549, "step": 3493 }, { "epoch": 0.18744635193133047, "grad_norm": 0.326171875, "learning_rate": 4.999264599336742e-06, "loss": 2.498, "step": 3494 }, { "epoch": 0.1875, "grad_norm": 0.375, "learning_rate": 4.99926249076577e-06, "loss": 2.3884, "step": 3495 }, { "epoch": 0.18755364806866953, "grad_norm": 0.431640625, "learning_rate": 4.999260379176681e-06, "loss": 2.2608, "step": 3496 }, { "epoch": 0.18760729613733906, "grad_norm": 0.392578125, "learning_rate": 4.999258264569475e-06, "loss": 2.4884, "step": 3497 }, { "epoch": 0.1876609442060086, "grad_norm": 0.326171875, "learning_rate": 4.999256146944155e-06, "loss": 2.3062, "step": 3498 }, { "epoch": 0.1877145922746781, "grad_norm": 0.361328125, "learning_rate": 4.999254026300724e-06, "loss": 2.2905, "step": 3499 }, { "epoch": 0.18776824034334763, "grad_norm": 0.330078125, "learning_rate": 4.999251902639185e-06, "loss": 2.4094, "step": 3500 }, { "epoch": 0.18782188841201716, "grad_norm": 0.326171875, "learning_rate": 4.99924977595954e-06, "loss": 2.2175, "step": 3501 }, { "epoch": 0.1878755364806867, "grad_norm": 0.310546875, "learning_rate": 4.999247646261791e-06, "loss": 2.101, "step": 3502 }, { "epoch": 0.18792918454935623, "grad_norm": 0.328125, "learning_rate": 4.999245513545942e-06, "loss": 2.4006, "step": 3503 }, { "epoch": 0.18798283261802576, "grad_norm": 0.322265625, "learning_rate": 4.999243377811994e-06, "loss": 2.1634, "step": 3504 }, { "epoch": 0.1880364806866953, "grad_norm": 0.390625, "learning_rate": 4.999241239059951e-06, "loss": 2.3065, "step": 3505 }, { "epoch": 0.1880901287553648, "grad_norm": 0.322265625, "learning_rate": 4.999239097289815e-06, "loss": 2.4631, "step": 3506 }, { "epoch": 0.18814377682403433, "grad_norm": 0.294921875, "learning_rate": 4.999236952501588e-06, "loss": 2.256, "step": 3507 }, { "epoch": 0.18819742489270386, "grad_norm": 0.330078125, "learning_rate": 4.999234804695273e-06, "loss": 2.2992, "step": 3508 }, { "epoch": 0.1882510729613734, "grad_norm": 0.400390625, "learning_rate": 4.999232653870873e-06, "loss": 2.2621, "step": 3509 }, { "epoch": 0.18830472103004292, "grad_norm": 0.423828125, "learning_rate": 4.9992305000283905e-06, "loss": 2.3287, "step": 3510 }, { "epoch": 0.18835836909871245, "grad_norm": 0.3359375, "learning_rate": 4.999228343167826e-06, "loss": 2.3188, "step": 3511 }, { "epoch": 0.18841201716738198, "grad_norm": 0.30078125, "learning_rate": 4.9992261832891865e-06, "loss": 2.4167, "step": 3512 }, { "epoch": 0.1884656652360515, "grad_norm": 0.27734375, "learning_rate": 4.999224020392472e-06, "loss": 1.9209, "step": 3513 }, { "epoch": 0.18851931330472102, "grad_norm": 0.357421875, "learning_rate": 4.999221854477684e-06, "loss": 2.3907, "step": 3514 }, { "epoch": 0.18857296137339055, "grad_norm": 0.384765625, "learning_rate": 4.999219685544826e-06, "loss": 2.3742, "step": 3515 }, { "epoch": 0.18862660944206008, "grad_norm": 0.3203125, "learning_rate": 4.999217513593901e-06, "loss": 2.0246, "step": 3516 }, { "epoch": 0.18868025751072962, "grad_norm": 0.5234375, "learning_rate": 4.999215338624913e-06, "loss": 2.4549, "step": 3517 }, { "epoch": 0.18873390557939915, "grad_norm": 0.33203125, "learning_rate": 4.999213160637862e-06, "loss": 2.4412, "step": 3518 }, { "epoch": 0.18878755364806868, "grad_norm": 1.015625, "learning_rate": 4.999210979632753e-06, "loss": 2.0107, "step": 3519 }, { "epoch": 0.1888412017167382, "grad_norm": 0.318359375, "learning_rate": 4.999208795609586e-06, "loss": 2.3458, "step": 3520 }, { "epoch": 0.18889484978540771, "grad_norm": 0.32421875, "learning_rate": 4.999206608568366e-06, "loss": 2.2923, "step": 3521 }, { "epoch": 0.18894849785407725, "grad_norm": 0.38671875, "learning_rate": 4.999204418509094e-06, "loss": 2.3239, "step": 3522 }, { "epoch": 0.18900214592274678, "grad_norm": 0.33203125, "learning_rate": 4.999202225431774e-06, "loss": 2.3252, "step": 3523 }, { "epoch": 0.1890557939914163, "grad_norm": 0.59375, "learning_rate": 4.999200029336408e-06, "loss": 2.1715, "step": 3524 }, { "epoch": 0.18910944206008584, "grad_norm": 10.3125, "learning_rate": 4.999197830222998e-06, "loss": 2.5428, "step": 3525 }, { "epoch": 0.18916309012875537, "grad_norm": 0.74609375, "learning_rate": 4.999195628091548e-06, "loss": 2.5996, "step": 3526 }, { "epoch": 0.1892167381974249, "grad_norm": 0.54296875, "learning_rate": 4.99919342294206e-06, "loss": 2.2091, "step": 3527 }, { "epoch": 0.1892703862660944, "grad_norm": 0.283203125, "learning_rate": 4.999191214774537e-06, "loss": 2.3285, "step": 3528 }, { "epoch": 0.18932403433476394, "grad_norm": 0.62109375, "learning_rate": 4.99918900358898e-06, "loss": 2.3978, "step": 3529 }, { "epoch": 0.18937768240343347, "grad_norm": 0.37109375, "learning_rate": 4.999186789385394e-06, "loss": 2.5225, "step": 3530 }, { "epoch": 0.189431330472103, "grad_norm": 0.416015625, "learning_rate": 4.999184572163781e-06, "loss": 2.2393, "step": 3531 }, { "epoch": 0.18948497854077254, "grad_norm": 0.53125, "learning_rate": 4.999182351924142e-06, "loss": 2.3109, "step": 3532 }, { "epoch": 0.18953862660944207, "grad_norm": 0.369140625, "learning_rate": 4.999180128666482e-06, "loss": 2.1772, "step": 3533 }, { "epoch": 0.1895922746781116, "grad_norm": 0.349609375, "learning_rate": 4.999177902390802e-06, "loss": 2.3418, "step": 3534 }, { "epoch": 0.1896459227467811, "grad_norm": 0.34765625, "learning_rate": 4.999175673097107e-06, "loss": 2.3384, "step": 3535 }, { "epoch": 0.18969957081545064, "grad_norm": 0.322265625, "learning_rate": 4.999173440785397e-06, "loss": 2.2618, "step": 3536 }, { "epoch": 0.18975321888412017, "grad_norm": 0.341796875, "learning_rate": 4.999171205455677e-06, "loss": 2.3976, "step": 3537 }, { "epoch": 0.1898068669527897, "grad_norm": 0.431640625, "learning_rate": 4.999168967107946e-06, "loss": 2.3531, "step": 3538 }, { "epoch": 0.18986051502145923, "grad_norm": 0.2890625, "learning_rate": 4.999166725742211e-06, "loss": 2.1376, "step": 3539 }, { "epoch": 0.18991416309012876, "grad_norm": 0.341796875, "learning_rate": 4.9991644813584725e-06, "loss": 2.1202, "step": 3540 }, { "epoch": 0.1899678111587983, "grad_norm": 0.419921875, "learning_rate": 4.999162233956734e-06, "loss": 2.3545, "step": 3541 }, { "epoch": 0.1900214592274678, "grad_norm": 0.3125, "learning_rate": 4.999159983536997e-06, "loss": 2.3493, "step": 3542 }, { "epoch": 0.19007510729613733, "grad_norm": 0.349609375, "learning_rate": 4.999157730099265e-06, "loss": 2.3253, "step": 3543 }, { "epoch": 0.19012875536480686, "grad_norm": 0.40625, "learning_rate": 4.999155473643541e-06, "loss": 2.5497, "step": 3544 }, { "epoch": 0.1901824034334764, "grad_norm": 0.33984375, "learning_rate": 4.999153214169828e-06, "loss": 2.2844, "step": 3545 }, { "epoch": 0.19023605150214593, "grad_norm": 0.482421875, "learning_rate": 4.999150951678128e-06, "loss": 2.371, "step": 3546 }, { "epoch": 0.19028969957081546, "grad_norm": 0.63671875, "learning_rate": 4.9991486861684434e-06, "loss": 2.2654, "step": 3547 }, { "epoch": 0.190343347639485, "grad_norm": 0.375, "learning_rate": 4.999146417640778e-06, "loss": 2.2754, "step": 3548 }, { "epoch": 0.1903969957081545, "grad_norm": 0.41796875, "learning_rate": 4.999144146095134e-06, "loss": 2.2407, "step": 3549 }, { "epoch": 0.19045064377682402, "grad_norm": 0.482421875, "learning_rate": 4.999141871531514e-06, "loss": 2.0627, "step": 3550 }, { "epoch": 0.19050429184549356, "grad_norm": 0.400390625, "learning_rate": 4.999139593949921e-06, "loss": 2.0629, "step": 3551 }, { "epoch": 0.1905579399141631, "grad_norm": 0.73046875, "learning_rate": 4.999137313350358e-06, "loss": 2.3311, "step": 3552 }, { "epoch": 0.19061158798283262, "grad_norm": 0.404296875, "learning_rate": 4.999135029732827e-06, "loss": 2.2931, "step": 3553 }, { "epoch": 0.19066523605150215, "grad_norm": 1.8984375, "learning_rate": 4.999132743097331e-06, "loss": 2.76, "step": 3554 }, { "epoch": 0.19071888412017168, "grad_norm": 0.345703125, "learning_rate": 4.999130453443873e-06, "loss": 2.5255, "step": 3555 }, { "epoch": 0.19077253218884122, "grad_norm": 0.390625, "learning_rate": 4.999128160772456e-06, "loss": 2.3594, "step": 3556 }, { "epoch": 0.19082618025751072, "grad_norm": 0.30859375, "learning_rate": 4.999125865083082e-06, "loss": 2.3338, "step": 3557 }, { "epoch": 0.19087982832618025, "grad_norm": 0.333984375, "learning_rate": 4.999123566375755e-06, "loss": 2.1156, "step": 3558 }, { "epoch": 0.19093347639484978, "grad_norm": 0.34765625, "learning_rate": 4.999121264650476e-06, "loss": 2.3692, "step": 3559 }, { "epoch": 0.19098712446351931, "grad_norm": 0.4765625, "learning_rate": 4.99911895990725e-06, "loss": 2.5257, "step": 3560 }, { "epoch": 0.19104077253218885, "grad_norm": 0.671875, "learning_rate": 4.999116652146078e-06, "loss": 2.3114, "step": 3561 }, { "epoch": 0.19109442060085838, "grad_norm": 0.498046875, "learning_rate": 4.999114341366963e-06, "loss": 2.0707, "step": 3562 }, { "epoch": 0.1911480686695279, "grad_norm": 0.349609375, "learning_rate": 4.999112027569908e-06, "loss": 2.1959, "step": 3563 }, { "epoch": 0.1912017167381974, "grad_norm": 0.330078125, "learning_rate": 4.999109710754917e-06, "loss": 2.4688, "step": 3564 }, { "epoch": 0.19125536480686695, "grad_norm": 0.34765625, "learning_rate": 4.999107390921991e-06, "loss": 2.0701, "step": 3565 }, { "epoch": 0.19130901287553648, "grad_norm": 0.345703125, "learning_rate": 4.999105068071134e-06, "loss": 2.4694, "step": 3566 }, { "epoch": 0.191362660944206, "grad_norm": 0.30078125, "learning_rate": 4.999102742202348e-06, "loss": 2.0513, "step": 3567 }, { "epoch": 0.19141630901287554, "grad_norm": 8.5, "learning_rate": 4.9991004133156365e-06, "loss": 2.1997, "step": 3568 }, { "epoch": 0.19146995708154507, "grad_norm": 0.3359375, "learning_rate": 4.999098081411001e-06, "loss": 2.388, "step": 3569 }, { "epoch": 0.1915236051502146, "grad_norm": 0.53125, "learning_rate": 4.999095746488446e-06, "loss": 2.2121, "step": 3570 }, { "epoch": 0.1915772532188841, "grad_norm": 0.384765625, "learning_rate": 4.9990934085479735e-06, "loss": 2.3827, "step": 3571 }, { "epoch": 0.19163090128755364, "grad_norm": 0.4140625, "learning_rate": 4.999091067589587e-06, "loss": 2.1896, "step": 3572 }, { "epoch": 0.19168454935622317, "grad_norm": 0.39453125, "learning_rate": 4.999088723613288e-06, "loss": 2.0773, "step": 3573 }, { "epoch": 0.1917381974248927, "grad_norm": 0.310546875, "learning_rate": 4.999086376619081e-06, "loss": 2.4285, "step": 3574 }, { "epoch": 0.19179184549356224, "grad_norm": 0.37109375, "learning_rate": 4.9990840266069664e-06, "loss": 2.2971, "step": 3575 }, { "epoch": 0.19184549356223177, "grad_norm": 0.3984375, "learning_rate": 4.99908167357695e-06, "loss": 2.4197, "step": 3576 }, { "epoch": 0.1918991416309013, "grad_norm": 0.314453125, "learning_rate": 4.999079317529033e-06, "loss": 2.3048, "step": 3577 }, { "epoch": 0.1919527896995708, "grad_norm": 2.8125, "learning_rate": 4.9990769584632175e-06, "loss": 2.3682, "step": 3578 }, { "epoch": 0.19200643776824033, "grad_norm": 0.359375, "learning_rate": 4.999074596379509e-06, "loss": 2.2912, "step": 3579 }, { "epoch": 0.19206008583690987, "grad_norm": 0.353515625, "learning_rate": 4.999072231277906e-06, "loss": 2.2548, "step": 3580 }, { "epoch": 0.1921137339055794, "grad_norm": 0.37109375, "learning_rate": 4.999069863158417e-06, "loss": 2.7071, "step": 3581 }, { "epoch": 0.19216738197424893, "grad_norm": 0.326171875, "learning_rate": 4.99906749202104e-06, "loss": 2.2968, "step": 3582 }, { "epoch": 0.19222103004291846, "grad_norm": 0.32421875, "learning_rate": 4.99906511786578e-06, "loss": 2.1637, "step": 3583 }, { "epoch": 0.192274678111588, "grad_norm": 0.421875, "learning_rate": 4.999062740692639e-06, "loss": 2.0415, "step": 3584 }, { "epoch": 0.1923283261802575, "grad_norm": 0.40234375, "learning_rate": 4.999060360501622e-06, "loss": 2.5261, "step": 3585 }, { "epoch": 0.19238197424892703, "grad_norm": 0.357421875, "learning_rate": 4.999057977292729e-06, "loss": 2.1958, "step": 3586 }, { "epoch": 0.19243562231759656, "grad_norm": 0.74609375, "learning_rate": 4.999055591065966e-06, "loss": 2.5282, "step": 3587 }, { "epoch": 0.1924892703862661, "grad_norm": 0.333984375, "learning_rate": 4.999053201821332e-06, "loss": 2.351, "step": 3588 }, { "epoch": 0.19254291845493562, "grad_norm": 0.39453125, "learning_rate": 4.999050809558833e-06, "loss": 2.4838, "step": 3589 }, { "epoch": 0.19259656652360516, "grad_norm": 0.2734375, "learning_rate": 4.999048414278471e-06, "loss": 1.9929, "step": 3590 }, { "epoch": 0.1926502145922747, "grad_norm": 0.328125, "learning_rate": 4.999046015980249e-06, "loss": 2.1449, "step": 3591 }, { "epoch": 0.19270386266094422, "grad_norm": 0.42578125, "learning_rate": 4.9990436146641695e-06, "loss": 2.2974, "step": 3592 }, { "epoch": 0.19275751072961372, "grad_norm": 0.31640625, "learning_rate": 4.9990412103302345e-06, "loss": 2.3795, "step": 3593 }, { "epoch": 0.19281115879828326, "grad_norm": 0.353515625, "learning_rate": 4.999038802978448e-06, "loss": 2.2729, "step": 3594 }, { "epoch": 0.1928648068669528, "grad_norm": 0.333984375, "learning_rate": 4.999036392608815e-06, "loss": 1.8608, "step": 3595 }, { "epoch": 0.19291845493562232, "grad_norm": 0.3515625, "learning_rate": 4.999033979221335e-06, "loss": 2.2807, "step": 3596 }, { "epoch": 0.19297210300429185, "grad_norm": 0.412109375, "learning_rate": 4.999031562816012e-06, "loss": 2.4529, "step": 3597 }, { "epoch": 0.19302575107296138, "grad_norm": 0.55078125, "learning_rate": 4.999029143392849e-06, "loss": 2.4017, "step": 3598 }, { "epoch": 0.19307939914163091, "grad_norm": 0.419921875, "learning_rate": 4.999026720951849e-06, "loss": 2.2852, "step": 3599 }, { "epoch": 0.19313304721030042, "grad_norm": 0.380859375, "learning_rate": 4.999024295493016e-06, "loss": 2.3689, "step": 3600 }, { "epoch": 0.19318669527896995, "grad_norm": 0.435546875, "learning_rate": 4.99902186701635e-06, "loss": 2.4554, "step": 3601 }, { "epoch": 0.19324034334763948, "grad_norm": 0.35546875, "learning_rate": 4.999019435521858e-06, "loss": 2.351, "step": 3602 }, { "epoch": 0.193293991416309, "grad_norm": 0.435546875, "learning_rate": 4.999017001009539e-06, "loss": 2.4714, "step": 3603 }, { "epoch": 0.19334763948497855, "grad_norm": 0.4140625, "learning_rate": 4.999014563479398e-06, "loss": 2.5559, "step": 3604 }, { "epoch": 0.19340128755364808, "grad_norm": 0.322265625, "learning_rate": 4.999012122931439e-06, "loss": 2.3615, "step": 3605 }, { "epoch": 0.1934549356223176, "grad_norm": 1.390625, "learning_rate": 4.999009679365663e-06, "loss": 1.8281, "step": 3606 }, { "epoch": 0.1935085836909871, "grad_norm": 0.45703125, "learning_rate": 4.999007232782073e-06, "loss": 2.4625, "step": 3607 }, { "epoch": 0.19356223175965664, "grad_norm": 0.32421875, "learning_rate": 4.999004783180673e-06, "loss": 2.2341, "step": 3608 }, { "epoch": 0.19361587982832618, "grad_norm": 0.306640625, "learning_rate": 4.999002330561465e-06, "loss": 2.2692, "step": 3609 }, { "epoch": 0.1936695278969957, "grad_norm": 0.408203125, "learning_rate": 4.998999874924453e-06, "loss": 2.4344, "step": 3610 }, { "epoch": 0.19372317596566524, "grad_norm": 0.47265625, "learning_rate": 4.998997416269638e-06, "loss": 2.1732, "step": 3611 }, { "epoch": 0.19377682403433477, "grad_norm": 0.345703125, "learning_rate": 4.998994954597026e-06, "loss": 2.0586, "step": 3612 }, { "epoch": 0.1938304721030043, "grad_norm": 0.34765625, "learning_rate": 4.9989924899066176e-06, "loss": 2.3098, "step": 3613 }, { "epoch": 0.1938841201716738, "grad_norm": 0.3515625, "learning_rate": 4.998990022198417e-06, "loss": 2.131, "step": 3614 }, { "epoch": 0.19393776824034334, "grad_norm": 0.328125, "learning_rate": 4.998987551472426e-06, "loss": 2.3397, "step": 3615 }, { "epoch": 0.19399141630901287, "grad_norm": 0.443359375, "learning_rate": 4.998985077728649e-06, "loss": 2.5663, "step": 3616 }, { "epoch": 0.1940450643776824, "grad_norm": 0.458984375, "learning_rate": 4.9989826009670875e-06, "loss": 2.2089, "step": 3617 }, { "epoch": 0.19409871244635193, "grad_norm": 0.361328125, "learning_rate": 4.998980121187745e-06, "loss": 2.3569, "step": 3618 }, { "epoch": 0.19415236051502147, "grad_norm": 0.31640625, "learning_rate": 4.998977638390625e-06, "loss": 2.2526, "step": 3619 }, { "epoch": 0.194206008583691, "grad_norm": 1.1796875, "learning_rate": 4.998975152575731e-06, "loss": 2.3339, "step": 3620 }, { "epoch": 0.1942596566523605, "grad_norm": 0.373046875, "learning_rate": 4.998972663743064e-06, "loss": 1.9098, "step": 3621 }, { "epoch": 0.19431330472103003, "grad_norm": 0.330078125, "learning_rate": 4.99897017189263e-06, "loss": 2.5726, "step": 3622 }, { "epoch": 0.19436695278969957, "grad_norm": 0.326171875, "learning_rate": 4.998967677024429e-06, "loss": 2.332, "step": 3623 }, { "epoch": 0.1944206008583691, "grad_norm": 0.333984375, "learning_rate": 4.998965179138465e-06, "loss": 2.1371, "step": 3624 }, { "epoch": 0.19447424892703863, "grad_norm": 0.345703125, "learning_rate": 4.998962678234742e-06, "loss": 2.5313, "step": 3625 }, { "epoch": 0.19452789699570816, "grad_norm": 0.337890625, "learning_rate": 4.998960174313263e-06, "loss": 2.4092, "step": 3626 }, { "epoch": 0.1945815450643777, "grad_norm": 0.49609375, "learning_rate": 4.998957667374029e-06, "loss": 2.5753, "step": 3627 }, { "epoch": 0.19463519313304722, "grad_norm": 0.349609375, "learning_rate": 4.998955157417045e-06, "loss": 2.4157, "step": 3628 }, { "epoch": 0.19468884120171673, "grad_norm": 0.392578125, "learning_rate": 4.9989526444423135e-06, "loss": 2.4477, "step": 3629 }, { "epoch": 0.19474248927038626, "grad_norm": 0.34765625, "learning_rate": 4.998950128449837e-06, "loss": 2.02, "step": 3630 }, { "epoch": 0.1947961373390558, "grad_norm": 0.3359375, "learning_rate": 4.9989476094396185e-06, "loss": 2.3539, "step": 3631 }, { "epoch": 0.19484978540772532, "grad_norm": 0.279296875, "learning_rate": 4.9989450874116625e-06, "loss": 2.3531, "step": 3632 }, { "epoch": 0.19490343347639486, "grad_norm": 0.328125, "learning_rate": 4.9989425623659705e-06, "loss": 2.3169, "step": 3633 }, { "epoch": 0.1949570815450644, "grad_norm": 0.373046875, "learning_rate": 4.9989400343025465e-06, "loss": 2.2393, "step": 3634 }, { "epoch": 0.19501072961373392, "grad_norm": 0.333984375, "learning_rate": 4.998937503221393e-06, "loss": 2.4262, "step": 3635 }, { "epoch": 0.19506437768240342, "grad_norm": 0.40234375, "learning_rate": 4.998934969122512e-06, "loss": 2.4457, "step": 3636 }, { "epoch": 0.19511802575107295, "grad_norm": 0.279296875, "learning_rate": 4.998932432005909e-06, "loss": 2.197, "step": 3637 }, { "epoch": 0.19517167381974249, "grad_norm": 0.3515625, "learning_rate": 4.9989298918715865e-06, "loss": 2.3284, "step": 3638 }, { "epoch": 0.19522532188841202, "grad_norm": 0.455078125, "learning_rate": 4.998927348719545e-06, "loss": 2.3696, "step": 3639 }, { "epoch": 0.19527896995708155, "grad_norm": 0.384765625, "learning_rate": 4.998924802549792e-06, "loss": 2.2769, "step": 3640 }, { "epoch": 0.19533261802575108, "grad_norm": 0.341796875, "learning_rate": 4.998922253362326e-06, "loss": 2.5742, "step": 3641 }, { "epoch": 0.1953862660944206, "grad_norm": 0.353515625, "learning_rate": 4.9989197011571525e-06, "loss": 2.2631, "step": 3642 }, { "epoch": 0.19543991416309012, "grad_norm": 0.41015625, "learning_rate": 4.998917145934274e-06, "loss": 2.2535, "step": 3643 }, { "epoch": 0.19549356223175965, "grad_norm": 0.66015625, "learning_rate": 4.998914587693695e-06, "loss": 2.3326, "step": 3644 }, { "epoch": 0.19554721030042918, "grad_norm": 0.388671875, "learning_rate": 4.998912026435416e-06, "loss": 2.0941, "step": 3645 }, { "epoch": 0.1956008583690987, "grad_norm": 0.291015625, "learning_rate": 4.9989094621594424e-06, "loss": 2.161, "step": 3646 }, { "epoch": 0.19565450643776824, "grad_norm": 0.328125, "learning_rate": 4.998906894865776e-06, "loss": 2.4242, "step": 3647 }, { "epoch": 0.19570815450643778, "grad_norm": 0.328125, "learning_rate": 4.99890432455442e-06, "loss": 2.0964, "step": 3648 }, { "epoch": 0.1957618025751073, "grad_norm": 0.34375, "learning_rate": 4.998901751225378e-06, "loss": 2.3705, "step": 3649 }, { "epoch": 0.1958154506437768, "grad_norm": 0.32421875, "learning_rate": 4.998899174878653e-06, "loss": 2.3342, "step": 3650 }, { "epoch": 0.19586909871244634, "grad_norm": 0.875, "learning_rate": 4.998896595514248e-06, "loss": 2.2027, "step": 3651 }, { "epoch": 0.19592274678111588, "grad_norm": 0.3828125, "learning_rate": 4.998894013132166e-06, "loss": 2.5297, "step": 3652 }, { "epoch": 0.1959763948497854, "grad_norm": 0.33203125, "learning_rate": 4.998891427732411e-06, "loss": 2.221, "step": 3653 }, { "epoch": 0.19603004291845494, "grad_norm": 0.365234375, "learning_rate": 4.998888839314984e-06, "loss": 2.407, "step": 3654 }, { "epoch": 0.19608369098712447, "grad_norm": 0.408203125, "learning_rate": 4.9988862478798905e-06, "loss": 2.5994, "step": 3655 }, { "epoch": 0.196137339055794, "grad_norm": 0.4140625, "learning_rate": 4.998883653427132e-06, "loss": 2.1585, "step": 3656 }, { "epoch": 0.1961909871244635, "grad_norm": 0.376953125, "learning_rate": 4.998881055956713e-06, "loss": 2.2046, "step": 3657 }, { "epoch": 0.19624463519313304, "grad_norm": 0.3125, "learning_rate": 4.9988784554686345e-06, "loss": 2.2883, "step": 3658 }, { "epoch": 0.19629828326180257, "grad_norm": 0.39453125, "learning_rate": 4.998875851962902e-06, "loss": 2.3967, "step": 3659 }, { "epoch": 0.1963519313304721, "grad_norm": 0.625, "learning_rate": 4.998873245439517e-06, "loss": 2.5802, "step": 3660 }, { "epoch": 0.19640557939914163, "grad_norm": 0.345703125, "learning_rate": 4.998870635898484e-06, "loss": 2.2706, "step": 3661 }, { "epoch": 0.19645922746781116, "grad_norm": 0.298828125, "learning_rate": 4.998868023339805e-06, "loss": 1.9519, "step": 3662 }, { "epoch": 0.1965128755364807, "grad_norm": 0.3671875, "learning_rate": 4.998865407763484e-06, "loss": 2.5355, "step": 3663 }, { "epoch": 0.1965665236051502, "grad_norm": 0.341796875, "learning_rate": 4.9988627891695245e-06, "loss": 2.3811, "step": 3664 }, { "epoch": 0.19662017167381973, "grad_norm": 0.416015625, "learning_rate": 4.998860167557928e-06, "loss": 2.3244, "step": 3665 }, { "epoch": 0.19667381974248926, "grad_norm": 0.310546875, "learning_rate": 4.998857542928698e-06, "loss": 2.4252, "step": 3666 }, { "epoch": 0.1967274678111588, "grad_norm": 0.494140625, "learning_rate": 4.998854915281839e-06, "loss": 2.2045, "step": 3667 }, { "epoch": 0.19678111587982833, "grad_norm": 0.38671875, "learning_rate": 4.998852284617353e-06, "loss": 2.1697, "step": 3668 }, { "epoch": 0.19683476394849786, "grad_norm": 0.333984375, "learning_rate": 4.9988496509352444e-06, "loss": 2.5588, "step": 3669 }, { "epoch": 0.1968884120171674, "grad_norm": 0.45703125, "learning_rate": 4.998847014235515e-06, "loss": 2.356, "step": 3670 }, { "epoch": 0.19694206008583692, "grad_norm": 0.4765625, "learning_rate": 4.998844374518169e-06, "loss": 1.7579, "step": 3671 }, { "epoch": 0.19699570815450643, "grad_norm": 0.330078125, "learning_rate": 4.998841731783208e-06, "loss": 2.2309, "step": 3672 }, { "epoch": 0.19704935622317596, "grad_norm": 0.369140625, "learning_rate": 4.998839086030638e-06, "loss": 2.3103, "step": 3673 }, { "epoch": 0.1971030042918455, "grad_norm": 0.337890625, "learning_rate": 4.9988364372604595e-06, "loss": 2.2833, "step": 3674 }, { "epoch": 0.19715665236051502, "grad_norm": 0.35546875, "learning_rate": 4.998833785472678e-06, "loss": 2.2847, "step": 3675 }, { "epoch": 0.19721030042918455, "grad_norm": 0.3984375, "learning_rate": 4.998831130667294e-06, "loss": 2.5506, "step": 3676 }, { "epoch": 0.19726394849785409, "grad_norm": 0.34765625, "learning_rate": 4.9988284728443125e-06, "loss": 2.1098, "step": 3677 }, { "epoch": 0.19731759656652362, "grad_norm": 0.361328125, "learning_rate": 4.998825812003737e-06, "loss": 2.3558, "step": 3678 }, { "epoch": 0.19737124463519312, "grad_norm": 0.326171875, "learning_rate": 4.998823148145569e-06, "loss": 2.5246, "step": 3679 }, { "epoch": 0.19742489270386265, "grad_norm": 0.427734375, "learning_rate": 4.998820481269813e-06, "loss": 2.6184, "step": 3680 }, { "epoch": 0.19747854077253219, "grad_norm": 0.283203125, "learning_rate": 4.998817811376473e-06, "loss": 1.9266, "step": 3681 }, { "epoch": 0.19753218884120172, "grad_norm": 0.30859375, "learning_rate": 4.998815138465551e-06, "loss": 2.1919, "step": 3682 }, { "epoch": 0.19758583690987125, "grad_norm": 0.41796875, "learning_rate": 4.99881246253705e-06, "loss": 2.4375, "step": 3683 }, { "epoch": 0.19763948497854078, "grad_norm": 0.408203125, "learning_rate": 4.998809783590974e-06, "loss": 2.2626, "step": 3684 }, { "epoch": 0.1976931330472103, "grad_norm": 0.337890625, "learning_rate": 4.998807101627325e-06, "loss": 2.3596, "step": 3685 }, { "epoch": 0.19774678111587982, "grad_norm": 0.8046875, "learning_rate": 4.998804416646109e-06, "loss": 2.4084, "step": 3686 }, { "epoch": 0.19780042918454935, "grad_norm": 0.36328125, "learning_rate": 4.998801728647326e-06, "loss": 2.462, "step": 3687 }, { "epoch": 0.19785407725321888, "grad_norm": 0.33984375, "learning_rate": 4.998799037630981e-06, "loss": 2.0718, "step": 3688 }, { "epoch": 0.1979077253218884, "grad_norm": 0.361328125, "learning_rate": 4.998796343597078e-06, "loss": 2.4215, "step": 3689 }, { "epoch": 0.19796137339055794, "grad_norm": 0.349609375, "learning_rate": 4.9987936465456175e-06, "loss": 2.1503, "step": 3690 }, { "epoch": 0.19801502145922747, "grad_norm": 0.34375, "learning_rate": 4.9987909464766046e-06, "loss": 2.1006, "step": 3691 }, { "epoch": 0.198068669527897, "grad_norm": 0.333984375, "learning_rate": 4.9987882433900436e-06, "loss": 2.3061, "step": 3692 }, { "epoch": 0.1981223175965665, "grad_norm": 0.53515625, "learning_rate": 4.998785537285935e-06, "loss": 2.2674, "step": 3693 }, { "epoch": 0.19817596566523604, "grad_norm": 0.453125, "learning_rate": 4.998782828164285e-06, "loss": 2.2897, "step": 3694 }, { "epoch": 0.19822961373390557, "grad_norm": 0.298828125, "learning_rate": 4.998780116025095e-06, "loss": 2.1001, "step": 3695 }, { "epoch": 0.1982832618025751, "grad_norm": 0.296875, "learning_rate": 4.9987774008683685e-06, "loss": 2.1871, "step": 3696 }, { "epoch": 0.19833690987124464, "grad_norm": 0.361328125, "learning_rate": 4.998774682694109e-06, "loss": 1.9129, "step": 3697 }, { "epoch": 0.19839055793991417, "grad_norm": 0.333984375, "learning_rate": 4.998771961502321e-06, "loss": 2.2642, "step": 3698 }, { "epoch": 0.1984442060085837, "grad_norm": 0.3203125, "learning_rate": 4.998769237293005e-06, "loss": 2.2632, "step": 3699 }, { "epoch": 0.1984978540772532, "grad_norm": 0.41015625, "learning_rate": 4.998766510066168e-06, "loss": 2.4069, "step": 3700 }, { "epoch": 0.19855150214592274, "grad_norm": 0.44921875, "learning_rate": 4.9987637798218095e-06, "loss": 2.3592, "step": 3701 }, { "epoch": 0.19860515021459227, "grad_norm": 0.322265625, "learning_rate": 4.998761046559934e-06, "loss": 2.2194, "step": 3702 }, { "epoch": 0.1986587982832618, "grad_norm": 0.357421875, "learning_rate": 4.998758310280547e-06, "loss": 2.4778, "step": 3703 }, { "epoch": 0.19871244635193133, "grad_norm": 0.345703125, "learning_rate": 4.998755570983649e-06, "loss": 2.3354, "step": 3704 }, { "epoch": 0.19876609442060086, "grad_norm": 0.61328125, "learning_rate": 4.998752828669244e-06, "loss": 1.5046, "step": 3705 }, { "epoch": 0.1988197424892704, "grad_norm": 0.3125, "learning_rate": 4.998750083337337e-06, "loss": 2.508, "step": 3706 }, { "epoch": 0.19887339055793993, "grad_norm": 0.32421875, "learning_rate": 4.998747334987929e-06, "loss": 2.264, "step": 3707 }, { "epoch": 0.19892703862660943, "grad_norm": 0.38671875, "learning_rate": 4.998744583621025e-06, "loss": 2.5261, "step": 3708 }, { "epoch": 0.19898068669527896, "grad_norm": 0.328125, "learning_rate": 4.998741829236627e-06, "loss": 2.2358, "step": 3709 }, { "epoch": 0.1990343347639485, "grad_norm": 0.32421875, "learning_rate": 4.998739071834739e-06, "loss": 2.2591, "step": 3710 }, { "epoch": 0.19908798283261803, "grad_norm": 0.30859375, "learning_rate": 4.998736311415366e-06, "loss": 2.4445, "step": 3711 }, { "epoch": 0.19914163090128756, "grad_norm": 0.328125, "learning_rate": 4.9987335479785085e-06, "loss": 2.2439, "step": 3712 }, { "epoch": 0.1991952789699571, "grad_norm": 0.333984375, "learning_rate": 4.99873078152417e-06, "loss": 2.4737, "step": 3713 }, { "epoch": 0.19924892703862662, "grad_norm": 0.359375, "learning_rate": 4.9987280120523564e-06, "loss": 1.7776, "step": 3714 }, { "epoch": 0.19930257510729613, "grad_norm": 0.33203125, "learning_rate": 4.998725239563068e-06, "loss": 2.3689, "step": 3715 }, { "epoch": 0.19935622317596566, "grad_norm": 0.408203125, "learning_rate": 4.998722464056312e-06, "loss": 2.3294, "step": 3716 }, { "epoch": 0.1994098712446352, "grad_norm": 0.384765625, "learning_rate": 4.998719685532088e-06, "loss": 2.37, "step": 3717 }, { "epoch": 0.19946351931330472, "grad_norm": 0.384765625, "learning_rate": 4.998716903990399e-06, "loss": 2.306, "step": 3718 }, { "epoch": 0.19951716738197425, "grad_norm": 0.62890625, "learning_rate": 4.998714119431253e-06, "loss": 1.5099, "step": 3719 }, { "epoch": 0.19957081545064378, "grad_norm": 0.4140625, "learning_rate": 4.9987113318546484e-06, "loss": 2.1718, "step": 3720 }, { "epoch": 0.19962446351931332, "grad_norm": 0.404296875, "learning_rate": 4.998708541260592e-06, "loss": 2.3903, "step": 3721 }, { "epoch": 0.19967811158798282, "grad_norm": 0.416015625, "learning_rate": 4.9987057476490855e-06, "loss": 2.509, "step": 3722 }, { "epoch": 0.19973175965665235, "grad_norm": 0.3984375, "learning_rate": 4.998702951020132e-06, "loss": 2.2881, "step": 3723 }, { "epoch": 0.19978540772532188, "grad_norm": 0.458984375, "learning_rate": 4.998700151373736e-06, "loss": 2.3485, "step": 3724 }, { "epoch": 0.19983905579399142, "grad_norm": 0.34375, "learning_rate": 4.998697348709899e-06, "loss": 2.3466, "step": 3725 }, { "epoch": 0.19989270386266095, "grad_norm": 0.392578125, "learning_rate": 4.9986945430286275e-06, "loss": 2.1849, "step": 3726 }, { "epoch": 0.19994635193133048, "grad_norm": 0.369140625, "learning_rate": 4.998691734329923e-06, "loss": 2.2618, "step": 3727 }, { "epoch": 0.2, "grad_norm": 0.3984375, "learning_rate": 4.998688922613788e-06, "loss": 2.4533, "step": 3728 }, { "epoch": 0.20005364806866952, "grad_norm": 0.40625, "learning_rate": 4.998686107880227e-06, "loss": 2.2267, "step": 3729 }, { "epoch": 0.20010729613733905, "grad_norm": 0.306640625, "learning_rate": 4.998683290129244e-06, "loss": 2.298, "step": 3730 }, { "epoch": 0.20016094420600858, "grad_norm": 0.53515625, "learning_rate": 4.9986804693608406e-06, "loss": 2.2704, "step": 3731 }, { "epoch": 0.2002145922746781, "grad_norm": 0.3359375, "learning_rate": 4.998677645575022e-06, "loss": 2.5978, "step": 3732 }, { "epoch": 0.20026824034334764, "grad_norm": 0.37890625, "learning_rate": 4.998674818771792e-06, "loss": 2.1734, "step": 3733 }, { "epoch": 0.20032188841201717, "grad_norm": 0.337890625, "learning_rate": 4.998671988951151e-06, "loss": 2.1405, "step": 3734 }, { "epoch": 0.2003755364806867, "grad_norm": 0.37109375, "learning_rate": 4.998669156113105e-06, "loss": 2.2729, "step": 3735 }, { "epoch": 0.2004291845493562, "grad_norm": 0.357421875, "learning_rate": 4.998666320257657e-06, "loss": 2.1119, "step": 3736 }, { "epoch": 0.20048283261802574, "grad_norm": 0.3671875, "learning_rate": 4.99866348138481e-06, "loss": 2.6904, "step": 3737 }, { "epoch": 0.20053648068669527, "grad_norm": 0.361328125, "learning_rate": 4.998660639494568e-06, "loss": 2.3007, "step": 3738 }, { "epoch": 0.2005901287553648, "grad_norm": 1.796875, "learning_rate": 4.998657794586934e-06, "loss": 2.2288, "step": 3739 }, { "epoch": 0.20064377682403434, "grad_norm": 0.373046875, "learning_rate": 4.998654946661911e-06, "loss": 2.4253, "step": 3740 }, { "epoch": 0.20069742489270387, "grad_norm": 0.29296875, "learning_rate": 4.9986520957195025e-06, "loss": 2.4607, "step": 3741 }, { "epoch": 0.2007510729613734, "grad_norm": 0.357421875, "learning_rate": 4.998649241759714e-06, "loss": 2.2238, "step": 3742 }, { "epoch": 0.20080472103004293, "grad_norm": 0.314453125, "learning_rate": 4.998646384782546e-06, "loss": 2.2562, "step": 3743 }, { "epoch": 0.20085836909871244, "grad_norm": 0.3359375, "learning_rate": 4.998643524788004e-06, "loss": 2.2903, "step": 3744 }, { "epoch": 0.20091201716738197, "grad_norm": 0.431640625, "learning_rate": 4.9986406617760904e-06, "loss": 2.3423, "step": 3745 }, { "epoch": 0.2009656652360515, "grad_norm": 0.345703125, "learning_rate": 4.998637795746809e-06, "loss": 2.4406, "step": 3746 }, { "epoch": 0.20101931330472103, "grad_norm": 0.326171875, "learning_rate": 4.998634926700163e-06, "loss": 2.4196, "step": 3747 }, { "epoch": 0.20107296137339056, "grad_norm": 0.353515625, "learning_rate": 4.998632054636156e-06, "loss": 2.4486, "step": 3748 }, { "epoch": 0.2011266094420601, "grad_norm": 1.03125, "learning_rate": 4.998629179554792e-06, "loss": 2.3189, "step": 3749 }, { "epoch": 0.20118025751072963, "grad_norm": 0.361328125, "learning_rate": 4.998626301456073e-06, "loss": 2.4865, "step": 3750 }, { "epoch": 0.20123390557939913, "grad_norm": 0.36328125, "learning_rate": 4.9986234203400055e-06, "loss": 2.4066, "step": 3751 }, { "epoch": 0.20128755364806866, "grad_norm": 0.3671875, "learning_rate": 4.9986205362065895e-06, "loss": 2.1745, "step": 3752 }, { "epoch": 0.2013412017167382, "grad_norm": 0.310546875, "learning_rate": 4.99861764905583e-06, "loss": 2.2674, "step": 3753 }, { "epoch": 0.20139484978540773, "grad_norm": 0.373046875, "learning_rate": 4.998614758887731e-06, "loss": 2.1586, "step": 3754 }, { "epoch": 0.20144849785407726, "grad_norm": 0.34765625, "learning_rate": 4.998611865702295e-06, "loss": 2.3709, "step": 3755 }, { "epoch": 0.2015021459227468, "grad_norm": 0.353515625, "learning_rate": 4.998608969499526e-06, "loss": 2.5199, "step": 3756 }, { "epoch": 0.20155579399141632, "grad_norm": 0.2890625, "learning_rate": 4.998606070279428e-06, "loss": 2.2897, "step": 3757 }, { "epoch": 0.20160944206008583, "grad_norm": 0.353515625, "learning_rate": 4.998603168042004e-06, "loss": 2.4381, "step": 3758 }, { "epoch": 0.20166309012875536, "grad_norm": 0.33984375, "learning_rate": 4.998600262787256e-06, "loss": 2.3445, "step": 3759 }, { "epoch": 0.2017167381974249, "grad_norm": 0.373046875, "learning_rate": 4.998597354515191e-06, "loss": 2.5062, "step": 3760 }, { "epoch": 0.20177038626609442, "grad_norm": 0.3671875, "learning_rate": 4.998594443225809e-06, "loss": 2.2543, "step": 3761 }, { "epoch": 0.20182403433476395, "grad_norm": 0.294921875, "learning_rate": 4.998591528919116e-06, "loss": 2.081, "step": 3762 }, { "epoch": 0.20187768240343348, "grad_norm": 0.4140625, "learning_rate": 4.998588611595114e-06, "loss": 2.414, "step": 3763 }, { "epoch": 0.20193133047210302, "grad_norm": 0.40234375, "learning_rate": 4.998585691253806e-06, "loss": 1.6603, "step": 3764 }, { "epoch": 0.20198497854077252, "grad_norm": 0.32421875, "learning_rate": 4.998582767895198e-06, "loss": 2.1271, "step": 3765 }, { "epoch": 0.20203862660944205, "grad_norm": 0.34765625, "learning_rate": 4.998579841519292e-06, "loss": 2.2006, "step": 3766 }, { "epoch": 0.20209227467811158, "grad_norm": 0.2890625, "learning_rate": 4.998576912126091e-06, "loss": 2.1217, "step": 3767 }, { "epoch": 0.20214592274678111, "grad_norm": 0.39453125, "learning_rate": 4.998573979715599e-06, "loss": 2.5188, "step": 3768 }, { "epoch": 0.20219957081545065, "grad_norm": 0.71875, "learning_rate": 4.998571044287821e-06, "loss": 2.6488, "step": 3769 }, { "epoch": 0.20225321888412018, "grad_norm": 0.4375, "learning_rate": 4.9985681058427585e-06, "loss": 2.3367, "step": 3770 }, { "epoch": 0.2023068669527897, "grad_norm": 0.353515625, "learning_rate": 4.998565164380415e-06, "loss": 2.5516, "step": 3771 }, { "epoch": 0.20236051502145921, "grad_norm": 0.384765625, "learning_rate": 4.998562219900795e-06, "loss": 2.1424, "step": 3772 }, { "epoch": 0.20241416309012875, "grad_norm": 1.375, "learning_rate": 4.998559272403904e-06, "loss": 2.2496, "step": 3773 }, { "epoch": 0.20246781115879828, "grad_norm": 0.353515625, "learning_rate": 4.998556321889741e-06, "loss": 2.2902, "step": 3774 }, { "epoch": 0.2025214592274678, "grad_norm": 0.412109375, "learning_rate": 4.998553368358313e-06, "loss": 2.1833, "step": 3775 }, { "epoch": 0.20257510729613734, "grad_norm": 0.3828125, "learning_rate": 4.9985504118096226e-06, "loss": 2.4305, "step": 3776 }, { "epoch": 0.20262875536480687, "grad_norm": 0.3125, "learning_rate": 4.998547452243673e-06, "loss": 2.2489, "step": 3777 }, { "epoch": 0.2026824034334764, "grad_norm": 0.375, "learning_rate": 4.9985444896604684e-06, "loss": 2.4892, "step": 3778 }, { "epoch": 0.20273605150214594, "grad_norm": 0.37109375, "learning_rate": 4.998541524060012e-06, "loss": 2.5133, "step": 3779 }, { "epoch": 0.20278969957081544, "grad_norm": 0.345703125, "learning_rate": 4.998538555442308e-06, "loss": 2.1878, "step": 3780 }, { "epoch": 0.20284334763948497, "grad_norm": 0.953125, "learning_rate": 4.998535583807359e-06, "loss": 2.0517, "step": 3781 }, { "epoch": 0.2028969957081545, "grad_norm": 0.39453125, "learning_rate": 4.9985326091551685e-06, "loss": 2.6349, "step": 3782 }, { "epoch": 0.20295064377682404, "grad_norm": 1.8515625, "learning_rate": 4.998529631485741e-06, "loss": 2.3338, "step": 3783 }, { "epoch": 0.20300429184549357, "grad_norm": 0.38671875, "learning_rate": 4.99852665079908e-06, "loss": 2.2319, "step": 3784 }, { "epoch": 0.2030579399141631, "grad_norm": 1.109375, "learning_rate": 4.99852366709519e-06, "loss": 2.4848, "step": 3785 }, { "epoch": 0.20311158798283263, "grad_norm": 0.326171875, "learning_rate": 4.998520680374072e-06, "loss": 2.3535, "step": 3786 }, { "epoch": 0.20316523605150213, "grad_norm": 0.37109375, "learning_rate": 4.998517690635731e-06, "loss": 2.1452, "step": 3787 }, { "epoch": 0.20321888412017167, "grad_norm": 0.326171875, "learning_rate": 4.998514697880171e-06, "loss": 2.1224, "step": 3788 }, { "epoch": 0.2032725321888412, "grad_norm": 0.32421875, "learning_rate": 4.998511702107396e-06, "loss": 2.0312, "step": 3789 }, { "epoch": 0.20332618025751073, "grad_norm": 0.322265625, "learning_rate": 4.998508703317408e-06, "loss": 2.0127, "step": 3790 }, { "epoch": 0.20337982832618026, "grad_norm": 0.37109375, "learning_rate": 4.998505701510212e-06, "loss": 2.3762, "step": 3791 }, { "epoch": 0.2034334763948498, "grad_norm": 0.37890625, "learning_rate": 4.9985026966858105e-06, "loss": 2.3511, "step": 3792 }, { "epoch": 0.20348712446351933, "grad_norm": 0.365234375, "learning_rate": 4.998499688844208e-06, "loss": 2.2865, "step": 3793 }, { "epoch": 0.20354077253218883, "grad_norm": 0.423828125, "learning_rate": 4.998496677985409e-06, "loss": 2.2841, "step": 3794 }, { "epoch": 0.20359442060085836, "grad_norm": 0.34765625, "learning_rate": 4.998493664109414e-06, "loss": 2.5196, "step": 3795 }, { "epoch": 0.2036480686695279, "grad_norm": 0.333984375, "learning_rate": 4.998490647216231e-06, "loss": 2.3083, "step": 3796 }, { "epoch": 0.20370171673819742, "grad_norm": 0.443359375, "learning_rate": 4.99848762730586e-06, "loss": 2.4107, "step": 3797 }, { "epoch": 0.20375536480686696, "grad_norm": 0.419921875, "learning_rate": 4.998484604378306e-06, "loss": 2.3881, "step": 3798 }, { "epoch": 0.2038090128755365, "grad_norm": 0.337890625, "learning_rate": 4.998481578433573e-06, "loss": 2.3179, "step": 3799 }, { "epoch": 0.20386266094420602, "grad_norm": 0.375, "learning_rate": 4.998478549471664e-06, "loss": 2.0142, "step": 3800 }, { "epoch": 0.20391630901287552, "grad_norm": 0.34375, "learning_rate": 4.998475517492582e-06, "loss": 2.2588, "step": 3801 }, { "epoch": 0.20396995708154506, "grad_norm": 0.345703125, "learning_rate": 4.998472482496334e-06, "loss": 2.3742, "step": 3802 }, { "epoch": 0.2040236051502146, "grad_norm": 0.3515625, "learning_rate": 4.99846944448292e-06, "loss": 2.1959, "step": 3803 }, { "epoch": 0.20407725321888412, "grad_norm": 0.3515625, "learning_rate": 4.998466403452344e-06, "loss": 2.3707, "step": 3804 }, { "epoch": 0.20413090128755365, "grad_norm": 0.84375, "learning_rate": 4.998463359404612e-06, "loss": 2.1335, "step": 3805 }, { "epoch": 0.20418454935622318, "grad_norm": 0.390625, "learning_rate": 4.998460312339725e-06, "loss": 2.1404, "step": 3806 }, { "epoch": 0.20423819742489271, "grad_norm": 0.375, "learning_rate": 4.9984572622576884e-06, "loss": 1.6164, "step": 3807 }, { "epoch": 0.20429184549356222, "grad_norm": 0.302734375, "learning_rate": 4.998454209158506e-06, "loss": 2.225, "step": 3808 }, { "epoch": 0.20434549356223175, "grad_norm": 0.341796875, "learning_rate": 4.998451153042181e-06, "loss": 2.3854, "step": 3809 }, { "epoch": 0.20439914163090128, "grad_norm": 0.3359375, "learning_rate": 4.998448093908717e-06, "loss": 2.2163, "step": 3810 }, { "epoch": 0.2044527896995708, "grad_norm": 0.400390625, "learning_rate": 4.998445031758117e-06, "loss": 2.2603, "step": 3811 }, { "epoch": 0.20450643776824035, "grad_norm": 0.357421875, "learning_rate": 4.9984419665903865e-06, "loss": 2.2016, "step": 3812 }, { "epoch": 0.20456008583690988, "grad_norm": 0.36328125, "learning_rate": 4.998438898405527e-06, "loss": 2.3492, "step": 3813 }, { "epoch": 0.2046137339055794, "grad_norm": 0.427734375, "learning_rate": 4.998435827203544e-06, "loss": 2.7806, "step": 3814 }, { "epoch": 0.20466738197424894, "grad_norm": 0.359375, "learning_rate": 4.99843275298444e-06, "loss": 2.5066, "step": 3815 }, { "epoch": 0.20472103004291844, "grad_norm": 0.3359375, "learning_rate": 4.998429675748219e-06, "loss": 1.9309, "step": 3816 }, { "epoch": 0.20477467811158798, "grad_norm": 0.4296875, "learning_rate": 4.998426595494886e-06, "loss": 2.4851, "step": 3817 }, { "epoch": 0.2048283261802575, "grad_norm": 0.427734375, "learning_rate": 4.998423512224443e-06, "loss": 1.9166, "step": 3818 }, { "epoch": 0.20488197424892704, "grad_norm": 0.365234375, "learning_rate": 4.998420425936895e-06, "loss": 2.2036, "step": 3819 }, { "epoch": 0.20493562231759657, "grad_norm": 0.32421875, "learning_rate": 4.9984173366322445e-06, "loss": 2.3051, "step": 3820 }, { "epoch": 0.2049892703862661, "grad_norm": 0.361328125, "learning_rate": 4.998414244310496e-06, "loss": 2.6282, "step": 3821 }, { "epoch": 0.20504291845493564, "grad_norm": 0.328125, "learning_rate": 4.998411148971653e-06, "loss": 2.0352, "step": 3822 }, { "epoch": 0.20509656652360514, "grad_norm": 0.482421875, "learning_rate": 4.99840805061572e-06, "loss": 1.9536, "step": 3823 }, { "epoch": 0.20515021459227467, "grad_norm": 0.3359375, "learning_rate": 4.998404949242699e-06, "loss": 2.3131, "step": 3824 }, { "epoch": 0.2052038626609442, "grad_norm": 0.384765625, "learning_rate": 4.998401844852595e-06, "loss": 2.2638, "step": 3825 }, { "epoch": 0.20525751072961373, "grad_norm": 0.373046875, "learning_rate": 4.9983987374454125e-06, "loss": 2.3642, "step": 3826 }, { "epoch": 0.20531115879828327, "grad_norm": 0.337890625, "learning_rate": 4.9983956270211544e-06, "loss": 2.3778, "step": 3827 }, { "epoch": 0.2053648068669528, "grad_norm": 0.333984375, "learning_rate": 4.998392513579823e-06, "loss": 2.1531, "step": 3828 }, { "epoch": 0.20541845493562233, "grad_norm": 4.5625, "learning_rate": 4.9983893971214246e-06, "loss": 2.4674, "step": 3829 }, { "epoch": 0.20547210300429183, "grad_norm": 0.326171875, "learning_rate": 4.998386277645961e-06, "loss": 2.3772, "step": 3830 }, { "epoch": 0.20552575107296137, "grad_norm": 0.3671875, "learning_rate": 4.998383155153438e-06, "loss": 2.5074, "step": 3831 }, { "epoch": 0.2055793991416309, "grad_norm": 0.3515625, "learning_rate": 4.998380029643857e-06, "loss": 2.2774, "step": 3832 }, { "epoch": 0.20563304721030043, "grad_norm": 0.3046875, "learning_rate": 4.998376901117224e-06, "loss": 2.2666, "step": 3833 }, { "epoch": 0.20568669527896996, "grad_norm": 0.349609375, "learning_rate": 4.99837376957354e-06, "loss": 2.3217, "step": 3834 }, { "epoch": 0.2057403433476395, "grad_norm": 0.388671875, "learning_rate": 4.998370635012811e-06, "loss": 2.3125, "step": 3835 }, { "epoch": 0.20579399141630902, "grad_norm": 0.3125, "learning_rate": 4.998367497435041e-06, "loss": 2.3681, "step": 3836 }, { "epoch": 0.20584763948497853, "grad_norm": 0.43359375, "learning_rate": 4.998364356840233e-06, "loss": 2.5208, "step": 3837 }, { "epoch": 0.20590128755364806, "grad_norm": 0.40625, "learning_rate": 4.99836121322839e-06, "loss": 2.4522, "step": 3838 }, { "epoch": 0.2059549356223176, "grad_norm": 0.5625, "learning_rate": 4.998358066599518e-06, "loss": 1.7315, "step": 3839 }, { "epoch": 0.20600858369098712, "grad_norm": 0.37890625, "learning_rate": 4.998354916953618e-06, "loss": 2.2592, "step": 3840 }, { "epoch": 0.20606223175965666, "grad_norm": 0.70703125, "learning_rate": 4.998351764290696e-06, "loss": 2.2865, "step": 3841 }, { "epoch": 0.2061158798283262, "grad_norm": 0.33984375, "learning_rate": 4.998348608610755e-06, "loss": 2.37, "step": 3842 }, { "epoch": 0.20616952789699572, "grad_norm": 0.310546875, "learning_rate": 4.998345449913799e-06, "loss": 2.3505, "step": 3843 }, { "epoch": 0.20622317596566522, "grad_norm": 0.353515625, "learning_rate": 4.998342288199831e-06, "loss": 2.036, "step": 3844 }, { "epoch": 0.20627682403433475, "grad_norm": 0.365234375, "learning_rate": 4.998339123468856e-06, "loss": 2.3174, "step": 3845 }, { "epoch": 0.2063304721030043, "grad_norm": 0.37109375, "learning_rate": 4.998335955720877e-06, "loss": 2.3928, "step": 3846 }, { "epoch": 0.20638412017167382, "grad_norm": 0.404296875, "learning_rate": 4.998332784955898e-06, "loss": 2.517, "step": 3847 }, { "epoch": 0.20643776824034335, "grad_norm": 0.326171875, "learning_rate": 4.998329611173924e-06, "loss": 2.3277, "step": 3848 }, { "epoch": 0.20649141630901288, "grad_norm": 0.515625, "learning_rate": 4.998326434374957e-06, "loss": 2.1569, "step": 3849 }, { "epoch": 0.2065450643776824, "grad_norm": 0.376953125, "learning_rate": 4.998323254559002e-06, "loss": 2.2477, "step": 3850 }, { "epoch": 0.20659871244635192, "grad_norm": 0.345703125, "learning_rate": 4.998320071726062e-06, "loss": 2.3162, "step": 3851 }, { "epoch": 0.20665236051502145, "grad_norm": 0.44140625, "learning_rate": 4.9983168858761415e-06, "loss": 2.1794, "step": 3852 }, { "epoch": 0.20670600858369098, "grad_norm": 0.46484375, "learning_rate": 4.998313697009244e-06, "loss": 2.2518, "step": 3853 }, { "epoch": 0.2067596566523605, "grad_norm": 0.42578125, "learning_rate": 4.998310505125374e-06, "loss": 2.3789, "step": 3854 }, { "epoch": 0.20681330472103004, "grad_norm": 0.447265625, "learning_rate": 4.9983073102245346e-06, "loss": 2.2064, "step": 3855 }, { "epoch": 0.20686695278969958, "grad_norm": 0.34375, "learning_rate": 4.99830411230673e-06, "loss": 2.364, "step": 3856 }, { "epoch": 0.2069206008583691, "grad_norm": 0.322265625, "learning_rate": 4.998300911371964e-06, "loss": 1.9633, "step": 3857 }, { "epoch": 0.20697424892703864, "grad_norm": 0.345703125, "learning_rate": 4.99829770742024e-06, "loss": 2.3715, "step": 3858 }, { "epoch": 0.20702789699570814, "grad_norm": 0.33203125, "learning_rate": 4.998294500451563e-06, "loss": 2.4128, "step": 3859 }, { "epoch": 0.20708154506437768, "grad_norm": 0.3828125, "learning_rate": 4.9982912904659355e-06, "loss": 2.4717, "step": 3860 }, { "epoch": 0.2071351931330472, "grad_norm": 0.31640625, "learning_rate": 4.998288077463363e-06, "loss": 2.172, "step": 3861 }, { "epoch": 0.20718884120171674, "grad_norm": 0.36328125, "learning_rate": 4.998284861443848e-06, "loss": 2.4427, "step": 3862 }, { "epoch": 0.20724248927038627, "grad_norm": 0.376953125, "learning_rate": 4.998281642407394e-06, "loss": 2.2613, "step": 3863 }, { "epoch": 0.2072961373390558, "grad_norm": 0.3203125, "learning_rate": 4.9982784203540065e-06, "loss": 2.2279, "step": 3864 }, { "epoch": 0.20734978540772533, "grad_norm": 0.40625, "learning_rate": 4.998275195283689e-06, "loss": 2.4857, "step": 3865 }, { "epoch": 0.20740343347639484, "grad_norm": 1.46875, "learning_rate": 4.998271967196445e-06, "loss": 2.2866, "step": 3866 }, { "epoch": 0.20745708154506437, "grad_norm": 0.3984375, "learning_rate": 4.998268736092278e-06, "loss": 2.2933, "step": 3867 }, { "epoch": 0.2075107296137339, "grad_norm": 0.3125, "learning_rate": 4.998265501971192e-06, "loss": 2.1317, "step": 3868 }, { "epoch": 0.20756437768240343, "grad_norm": 0.33984375, "learning_rate": 4.998262264833192e-06, "loss": 2.3518, "step": 3869 }, { "epoch": 0.20761802575107297, "grad_norm": 0.318359375, "learning_rate": 4.99825902467828e-06, "loss": 2.2185, "step": 3870 }, { "epoch": 0.2076716738197425, "grad_norm": 0.337890625, "learning_rate": 4.998255781506463e-06, "loss": 2.2066, "step": 3871 }, { "epoch": 0.20772532188841203, "grad_norm": 0.3828125, "learning_rate": 4.998252535317741e-06, "loss": 2.335, "step": 3872 }, { "epoch": 0.20777896995708153, "grad_norm": 0.318359375, "learning_rate": 4.998249286112121e-06, "loss": 2.2384, "step": 3873 }, { "epoch": 0.20783261802575106, "grad_norm": 0.486328125, "learning_rate": 4.998246033889605e-06, "loss": 2.354, "step": 3874 }, { "epoch": 0.2078862660944206, "grad_norm": 0.353515625, "learning_rate": 4.998242778650198e-06, "loss": 2.3831, "step": 3875 }, { "epoch": 0.20793991416309013, "grad_norm": 0.421875, "learning_rate": 4.998239520393903e-06, "loss": 2.5176, "step": 3876 }, { "epoch": 0.20799356223175966, "grad_norm": 0.3125, "learning_rate": 4.998236259120726e-06, "loss": 2.4492, "step": 3877 }, { "epoch": 0.2080472103004292, "grad_norm": 0.3671875, "learning_rate": 4.99823299483067e-06, "loss": 2.328, "step": 3878 }, { "epoch": 0.20810085836909872, "grad_norm": 0.3671875, "learning_rate": 4.998229727523737e-06, "loss": 2.2554, "step": 3879 }, { "epoch": 0.20815450643776823, "grad_norm": 0.3359375, "learning_rate": 4.998226457199932e-06, "loss": 2.2248, "step": 3880 }, { "epoch": 0.20820815450643776, "grad_norm": 247.0, "learning_rate": 4.9982231838592605e-06, "loss": 2.3378, "step": 3881 }, { "epoch": 0.2082618025751073, "grad_norm": 0.326171875, "learning_rate": 4.998219907501725e-06, "loss": 2.093, "step": 3882 }, { "epoch": 0.20831545064377682, "grad_norm": 0.416015625, "learning_rate": 4.99821662812733e-06, "loss": 2.4154, "step": 3883 }, { "epoch": 0.20836909871244635, "grad_norm": 0.365234375, "learning_rate": 4.998213345736079e-06, "loss": 2.2663, "step": 3884 }, { "epoch": 0.2084227467811159, "grad_norm": 0.62890625, "learning_rate": 4.998210060327976e-06, "loss": 2.1113, "step": 3885 }, { "epoch": 0.20847639484978542, "grad_norm": 0.298828125, "learning_rate": 4.998206771903025e-06, "loss": 2.5071, "step": 3886 }, { "epoch": 0.20853004291845492, "grad_norm": 0.640625, "learning_rate": 4.99820348046123e-06, "loss": 2.3559, "step": 3887 }, { "epoch": 0.20858369098712445, "grad_norm": 0.431640625, "learning_rate": 4.9982001860025965e-06, "loss": 2.3928, "step": 3888 }, { "epoch": 0.20863733905579399, "grad_norm": 0.33203125, "learning_rate": 4.998196888527126e-06, "loss": 2.3485, "step": 3889 }, { "epoch": 0.20869098712446352, "grad_norm": 0.5, "learning_rate": 4.998193588034824e-06, "loss": 2.5808, "step": 3890 }, { "epoch": 0.20874463519313305, "grad_norm": 0.46875, "learning_rate": 4.998190284525694e-06, "loss": 2.4403, "step": 3891 }, { "epoch": 0.20879828326180258, "grad_norm": 0.3828125, "learning_rate": 4.99818697799974e-06, "loss": 2.3935, "step": 3892 }, { "epoch": 0.2088519313304721, "grad_norm": 0.318359375, "learning_rate": 4.998183668456965e-06, "loss": 2.276, "step": 3893 }, { "epoch": 0.20890557939914164, "grad_norm": 0.341796875, "learning_rate": 4.998180355897375e-06, "loss": 2.4047, "step": 3894 }, { "epoch": 0.20895922746781115, "grad_norm": 0.318359375, "learning_rate": 4.998177040320973e-06, "loss": 2.4313, "step": 3895 }, { "epoch": 0.20901287553648068, "grad_norm": 0.365234375, "learning_rate": 4.998173721727764e-06, "loss": 2.1914, "step": 3896 }, { "epoch": 0.2090665236051502, "grad_norm": 0.392578125, "learning_rate": 4.99817040011775e-06, "loss": 2.2927, "step": 3897 }, { "epoch": 0.20912017167381974, "grad_norm": 0.33984375, "learning_rate": 4.998167075490936e-06, "loss": 2.2463, "step": 3898 }, { "epoch": 0.20917381974248928, "grad_norm": 0.453125, "learning_rate": 4.998163747847326e-06, "loss": 2.535, "step": 3899 }, { "epoch": 0.2092274678111588, "grad_norm": 0.328125, "learning_rate": 4.998160417186925e-06, "loss": 2.0962, "step": 3900 }, { "epoch": 0.20928111587982834, "grad_norm": 0.3515625, "learning_rate": 4.998157083509735e-06, "loss": 2.2723, "step": 3901 }, { "epoch": 0.20933476394849784, "grad_norm": 0.37109375, "learning_rate": 4.998153746815762e-06, "loss": 2.0851, "step": 3902 }, { "epoch": 0.20938841201716737, "grad_norm": 0.412109375, "learning_rate": 4.9981504071050076e-06, "loss": 2.0602, "step": 3903 }, { "epoch": 0.2094420600858369, "grad_norm": 0.33203125, "learning_rate": 4.9981470643774795e-06, "loss": 2.3225, "step": 3904 }, { "epoch": 0.20949570815450644, "grad_norm": 0.376953125, "learning_rate": 4.998143718633178e-06, "loss": 2.2877, "step": 3905 }, { "epoch": 0.20954935622317597, "grad_norm": 0.40234375, "learning_rate": 4.99814036987211e-06, "loss": 2.4064, "step": 3906 }, { "epoch": 0.2096030042918455, "grad_norm": 0.439453125, "learning_rate": 4.998137018094278e-06, "loss": 2.3032, "step": 3907 }, { "epoch": 0.20965665236051503, "grad_norm": 0.376953125, "learning_rate": 4.998133663299686e-06, "loss": 2.0184, "step": 3908 }, { "epoch": 0.20971030042918454, "grad_norm": 0.412109375, "learning_rate": 4.998130305488339e-06, "loss": 2.3746, "step": 3909 }, { "epoch": 0.20976394849785407, "grad_norm": 0.369140625, "learning_rate": 4.99812694466024e-06, "loss": 2.0001, "step": 3910 }, { "epoch": 0.2098175965665236, "grad_norm": 0.384765625, "learning_rate": 4.998123580815394e-06, "loss": 2.3673, "step": 3911 }, { "epoch": 0.20987124463519313, "grad_norm": 0.294921875, "learning_rate": 4.998120213953804e-06, "loss": 2.2298, "step": 3912 }, { "epoch": 0.20992489270386266, "grad_norm": 0.375, "learning_rate": 4.998116844075474e-06, "loss": 2.141, "step": 3913 }, { "epoch": 0.2099785407725322, "grad_norm": 0.365234375, "learning_rate": 4.99811347118041e-06, "loss": 2.2594, "step": 3914 }, { "epoch": 0.21003218884120173, "grad_norm": 0.32421875, "learning_rate": 4.998110095268615e-06, "loss": 2.2526, "step": 3915 }, { "epoch": 0.21008583690987123, "grad_norm": 0.384765625, "learning_rate": 4.998106716340092e-06, "loss": 2.4891, "step": 3916 }, { "epoch": 0.21013948497854076, "grad_norm": 0.35546875, "learning_rate": 4.998103334394846e-06, "loss": 1.9348, "step": 3917 }, { "epoch": 0.2101931330472103, "grad_norm": 0.33984375, "learning_rate": 4.998099949432882e-06, "loss": 2.2799, "step": 3918 }, { "epoch": 0.21024678111587983, "grad_norm": 0.359375, "learning_rate": 4.998096561454202e-06, "loss": 2.3053, "step": 3919 }, { "epoch": 0.21030042918454936, "grad_norm": 0.41796875, "learning_rate": 4.998093170458812e-06, "loss": 2.3952, "step": 3920 }, { "epoch": 0.2103540772532189, "grad_norm": 0.431640625, "learning_rate": 4.998089776446715e-06, "loss": 2.1114, "step": 3921 }, { "epoch": 0.21040772532188842, "grad_norm": 0.42578125, "learning_rate": 4.998086379417915e-06, "loss": 2.2787, "step": 3922 }, { "epoch": 0.21046137339055793, "grad_norm": 0.341796875, "learning_rate": 4.998082979372418e-06, "loss": 2.2043, "step": 3923 }, { "epoch": 0.21051502145922746, "grad_norm": 0.423828125, "learning_rate": 4.998079576310225e-06, "loss": 2.3041, "step": 3924 }, { "epoch": 0.210568669527897, "grad_norm": 0.390625, "learning_rate": 4.998076170231343e-06, "loss": 2.4187, "step": 3925 }, { "epoch": 0.21062231759656652, "grad_norm": 0.365234375, "learning_rate": 4.998072761135774e-06, "loss": 2.1564, "step": 3926 }, { "epoch": 0.21067596566523605, "grad_norm": 0.404296875, "learning_rate": 4.998069349023523e-06, "loss": 2.4847, "step": 3927 }, { "epoch": 0.21072961373390559, "grad_norm": 0.345703125, "learning_rate": 4.998065933894594e-06, "loss": 2.0966, "step": 3928 }, { "epoch": 0.21078326180257512, "grad_norm": 0.369140625, "learning_rate": 4.998062515748992e-06, "loss": 2.3758, "step": 3929 }, { "epoch": 0.21083690987124465, "grad_norm": 0.357421875, "learning_rate": 4.99805909458672e-06, "loss": 2.3133, "step": 3930 }, { "epoch": 0.21089055793991415, "grad_norm": 0.380859375, "learning_rate": 4.998055670407782e-06, "loss": 2.4737, "step": 3931 }, { "epoch": 0.21094420600858368, "grad_norm": 0.326171875, "learning_rate": 4.998052243212184e-06, "loss": 2.3436, "step": 3932 }, { "epoch": 0.21099785407725322, "grad_norm": 0.306640625, "learning_rate": 4.998048812999927e-06, "loss": 2.1672, "step": 3933 }, { "epoch": 0.21105150214592275, "grad_norm": 0.34375, "learning_rate": 4.9980453797710175e-06, "loss": 2.491, "step": 3934 }, { "epoch": 0.21110515021459228, "grad_norm": 0.412109375, "learning_rate": 4.99804194352546e-06, "loss": 2.2919, "step": 3935 }, { "epoch": 0.2111587982832618, "grad_norm": 0.318359375, "learning_rate": 4.998038504263256e-06, "loss": 2.1322, "step": 3936 }, { "epoch": 0.21121244635193134, "grad_norm": 0.32421875, "learning_rate": 4.998035061984413e-06, "loss": 2.3783, "step": 3937 }, { "epoch": 0.21126609442060085, "grad_norm": 0.5546875, "learning_rate": 4.998031616688932e-06, "loss": 1.4935, "step": 3938 }, { "epoch": 0.21131974248927038, "grad_norm": 0.33984375, "learning_rate": 4.9980281683768195e-06, "loss": 2.2284, "step": 3939 }, { "epoch": 0.2113733905579399, "grad_norm": 0.318359375, "learning_rate": 4.998024717048079e-06, "loss": 1.8369, "step": 3940 }, { "epoch": 0.21142703862660944, "grad_norm": 0.40234375, "learning_rate": 4.9980212627027135e-06, "loss": 2.181, "step": 3941 }, { "epoch": 0.21148068669527897, "grad_norm": 0.357421875, "learning_rate": 4.9980178053407295e-06, "loss": 2.4551, "step": 3942 }, { "epoch": 0.2115343347639485, "grad_norm": 0.375, "learning_rate": 4.998014344962128e-06, "loss": 2.1323, "step": 3943 }, { "epoch": 0.21158798283261804, "grad_norm": 0.365234375, "learning_rate": 4.998010881566917e-06, "loss": 2.3545, "step": 3944 }, { "epoch": 0.21164163090128754, "grad_norm": 0.32421875, "learning_rate": 4.998007415155097e-06, "loss": 2.1893, "step": 3945 }, { "epoch": 0.21169527896995707, "grad_norm": 0.333984375, "learning_rate": 4.998003945726675e-06, "loss": 2.5058, "step": 3946 }, { "epoch": 0.2117489270386266, "grad_norm": 0.373046875, "learning_rate": 4.998000473281654e-06, "loss": 2.3696, "step": 3947 }, { "epoch": 0.21180257510729614, "grad_norm": 0.4921875, "learning_rate": 4.997996997820038e-06, "loss": 1.3883, "step": 3948 }, { "epoch": 0.21185622317596567, "grad_norm": 0.345703125, "learning_rate": 4.997993519341831e-06, "loss": 2.171, "step": 3949 }, { "epoch": 0.2119098712446352, "grad_norm": 0.380859375, "learning_rate": 4.997990037847039e-06, "loss": 2.2555, "step": 3950 }, { "epoch": 0.21196351931330473, "grad_norm": 0.333984375, "learning_rate": 4.997986553335664e-06, "loss": 2.3815, "step": 3951 }, { "epoch": 0.21201716738197424, "grad_norm": 0.349609375, "learning_rate": 4.997983065807711e-06, "loss": 2.4007, "step": 3952 }, { "epoch": 0.21207081545064377, "grad_norm": 1.046875, "learning_rate": 4.9979795752631845e-06, "loss": 2.2918, "step": 3953 }, { "epoch": 0.2121244635193133, "grad_norm": 0.3125, "learning_rate": 4.997976081702089e-06, "loss": 2.2767, "step": 3954 }, { "epoch": 0.21217811158798283, "grad_norm": 0.353515625, "learning_rate": 4.997972585124428e-06, "loss": 2.3588, "step": 3955 }, { "epoch": 0.21223175965665236, "grad_norm": 0.33203125, "learning_rate": 4.997969085530205e-06, "loss": 2.3985, "step": 3956 }, { "epoch": 0.2122854077253219, "grad_norm": 0.439453125, "learning_rate": 4.9979655829194264e-06, "loss": 2.3488, "step": 3957 }, { "epoch": 0.21233905579399143, "grad_norm": 0.2890625, "learning_rate": 4.997962077292095e-06, "loss": 2.1719, "step": 3958 }, { "epoch": 0.21239270386266093, "grad_norm": 0.30078125, "learning_rate": 4.997958568648214e-06, "loss": 2.3404, "step": 3959 }, { "epoch": 0.21244635193133046, "grad_norm": 0.3828125, "learning_rate": 4.99795505698779e-06, "loss": 2.3092, "step": 3960 }, { "epoch": 0.2125, "grad_norm": 0.337890625, "learning_rate": 4.9979515423108255e-06, "loss": 2.3467, "step": 3961 }, { "epoch": 0.21255364806866953, "grad_norm": 0.361328125, "learning_rate": 4.997948024617326e-06, "loss": 2.3594, "step": 3962 }, { "epoch": 0.21260729613733906, "grad_norm": 0.34375, "learning_rate": 4.9979445039072945e-06, "loss": 2.2872, "step": 3963 }, { "epoch": 0.2126609442060086, "grad_norm": 0.42578125, "learning_rate": 4.997940980180736e-06, "loss": 2.1411, "step": 3964 }, { "epoch": 0.21271459227467812, "grad_norm": 0.33203125, "learning_rate": 4.9979374534376546e-06, "loss": 2.1414, "step": 3965 }, { "epoch": 0.21276824034334765, "grad_norm": 0.546875, "learning_rate": 4.9979339236780545e-06, "loss": 2.229, "step": 3966 }, { "epoch": 0.21282188841201716, "grad_norm": 0.388671875, "learning_rate": 4.9979303909019405e-06, "loss": 2.5274, "step": 3967 }, { "epoch": 0.2128755364806867, "grad_norm": 0.4140625, "learning_rate": 4.997926855109316e-06, "loss": 2.3803, "step": 3968 }, { "epoch": 0.21292918454935622, "grad_norm": 0.38671875, "learning_rate": 4.997923316300186e-06, "loss": 2.3826, "step": 3969 }, { "epoch": 0.21298283261802575, "grad_norm": 0.40625, "learning_rate": 4.9979197744745544e-06, "loss": 2.3743, "step": 3970 }, { "epoch": 0.21303648068669528, "grad_norm": 0.337890625, "learning_rate": 4.997916229632425e-06, "loss": 2.2563, "step": 3971 }, { "epoch": 0.21309012875536482, "grad_norm": 0.455078125, "learning_rate": 4.997912681773803e-06, "loss": 2.4757, "step": 3972 }, { "epoch": 0.21314377682403435, "grad_norm": 0.357421875, "learning_rate": 4.9979091308986926e-06, "loss": 2.2695, "step": 3973 }, { "epoch": 0.21319742489270385, "grad_norm": 0.439453125, "learning_rate": 4.9979055770070975e-06, "loss": 2.5158, "step": 3974 }, { "epoch": 0.21325107296137338, "grad_norm": 0.412109375, "learning_rate": 4.997902020099022e-06, "loss": 2.1662, "step": 3975 }, { "epoch": 0.21330472103004292, "grad_norm": 0.41796875, "learning_rate": 4.997898460174471e-06, "loss": 1.9903, "step": 3976 }, { "epoch": 0.21335836909871245, "grad_norm": 0.3125, "learning_rate": 4.997894897233449e-06, "loss": 2.1737, "step": 3977 }, { "epoch": 0.21341201716738198, "grad_norm": 0.6171875, "learning_rate": 4.997891331275958e-06, "loss": 2.2818, "step": 3978 }, { "epoch": 0.2134656652360515, "grad_norm": 0.33984375, "learning_rate": 4.997887762302005e-06, "loss": 2.3255, "step": 3979 }, { "epoch": 0.21351931330472104, "grad_norm": 0.37890625, "learning_rate": 4.9978841903115934e-06, "loss": 2.3157, "step": 3980 }, { "epoch": 0.21357296137339055, "grad_norm": 0.32421875, "learning_rate": 4.9978806153047274e-06, "loss": 2.364, "step": 3981 }, { "epoch": 0.21362660944206008, "grad_norm": 0.318359375, "learning_rate": 4.997877037281412e-06, "loss": 2.2239, "step": 3982 }, { "epoch": 0.2136802575107296, "grad_norm": 0.365234375, "learning_rate": 4.997873456241651e-06, "loss": 2.2862, "step": 3983 }, { "epoch": 0.21373390557939914, "grad_norm": 0.43359375, "learning_rate": 4.997869872185448e-06, "loss": 2.4245, "step": 3984 }, { "epoch": 0.21378755364806867, "grad_norm": 0.416015625, "learning_rate": 4.997866285112808e-06, "loss": 2.289, "step": 3985 }, { "epoch": 0.2138412017167382, "grad_norm": 0.5625, "learning_rate": 4.9978626950237355e-06, "loss": 2.2769, "step": 3986 }, { "epoch": 0.21389484978540774, "grad_norm": 0.326171875, "learning_rate": 4.997859101918235e-06, "loss": 2.2194, "step": 3987 }, { "epoch": 0.21394849785407724, "grad_norm": 0.39453125, "learning_rate": 4.99785550579631e-06, "loss": 2.4305, "step": 3988 }, { "epoch": 0.21400214592274677, "grad_norm": 0.29296875, "learning_rate": 4.997851906657966e-06, "loss": 2.1487, "step": 3989 }, { "epoch": 0.2140557939914163, "grad_norm": 0.44921875, "learning_rate": 4.997848304503206e-06, "loss": 2.4681, "step": 3990 }, { "epoch": 0.21410944206008584, "grad_norm": 0.38671875, "learning_rate": 4.997844699332035e-06, "loss": 2.4863, "step": 3991 }, { "epoch": 0.21416309012875537, "grad_norm": 0.376953125, "learning_rate": 4.997841091144457e-06, "loss": 2.4268, "step": 3992 }, { "epoch": 0.2142167381974249, "grad_norm": 0.337890625, "learning_rate": 4.9978374799404776e-06, "loss": 2.2287, "step": 3993 }, { "epoch": 0.21427038626609443, "grad_norm": 0.4140625, "learning_rate": 4.9978338657201e-06, "loss": 2.327, "step": 3994 }, { "epoch": 0.21432403433476394, "grad_norm": 0.373046875, "learning_rate": 4.997830248483328e-06, "loss": 2.2019, "step": 3995 }, { "epoch": 0.21437768240343347, "grad_norm": 0.4375, "learning_rate": 4.997826628230168e-06, "loss": 2.2521, "step": 3996 }, { "epoch": 0.214431330472103, "grad_norm": 0.318359375, "learning_rate": 4.997823004960623e-06, "loss": 2.3175, "step": 3997 }, { "epoch": 0.21448497854077253, "grad_norm": 0.3203125, "learning_rate": 4.997819378674698e-06, "loss": 2.3772, "step": 3998 }, { "epoch": 0.21453862660944206, "grad_norm": 0.3359375, "learning_rate": 4.9978157493723964e-06, "loss": 2.1896, "step": 3999 }, { "epoch": 0.2145922746781116, "grad_norm": 0.365234375, "learning_rate": 4.997812117053723e-06, "loss": 2.4738, "step": 4000 }, { "epoch": 0.21464592274678113, "grad_norm": 0.333984375, "learning_rate": 4.9978084817186825e-06, "loss": 2.0489, "step": 4001 }, { "epoch": 0.21469957081545063, "grad_norm": 0.310546875, "learning_rate": 4.997804843367279e-06, "loss": 2.4799, "step": 4002 }, { "epoch": 0.21475321888412016, "grad_norm": 0.41015625, "learning_rate": 4.997801201999517e-06, "loss": 2.145, "step": 4003 }, { "epoch": 0.2148068669527897, "grad_norm": 0.38671875, "learning_rate": 4.997797557615401e-06, "loss": 2.0311, "step": 4004 }, { "epoch": 0.21486051502145923, "grad_norm": 0.349609375, "learning_rate": 4.997793910214935e-06, "loss": 2.269, "step": 4005 }, { "epoch": 0.21491416309012876, "grad_norm": 0.50390625, "learning_rate": 4.997790259798125e-06, "loss": 2.242, "step": 4006 }, { "epoch": 0.2149678111587983, "grad_norm": 0.427734375, "learning_rate": 4.997786606364973e-06, "loss": 1.6027, "step": 4007 }, { "epoch": 0.21502145922746782, "grad_norm": 1.2265625, "learning_rate": 4.997782949915484e-06, "loss": 2.1967, "step": 4008 }, { "epoch": 0.21507510729613735, "grad_norm": 0.375, "learning_rate": 4.997779290449663e-06, "loss": 2.3899, "step": 4009 }, { "epoch": 0.21512875536480686, "grad_norm": 0.326171875, "learning_rate": 4.997775627967516e-06, "loss": 2.3062, "step": 4010 }, { "epoch": 0.2151824034334764, "grad_norm": 0.37109375, "learning_rate": 4.997771962469045e-06, "loss": 2.31, "step": 4011 }, { "epoch": 0.21523605150214592, "grad_norm": 0.345703125, "learning_rate": 4.9977682939542545e-06, "loss": 2.3, "step": 4012 }, { "epoch": 0.21528969957081545, "grad_norm": 0.62890625, "learning_rate": 4.99776462242315e-06, "loss": 2.2512, "step": 4013 }, { "epoch": 0.21534334763948498, "grad_norm": 0.74609375, "learning_rate": 4.997760947875736e-06, "loss": 1.4578, "step": 4014 }, { "epoch": 0.21539699570815452, "grad_norm": 0.32421875, "learning_rate": 4.997757270312016e-06, "loss": 2.1911, "step": 4015 }, { "epoch": 0.21545064377682405, "grad_norm": 0.3984375, "learning_rate": 4.997753589731995e-06, "loss": 2.2334, "step": 4016 }, { "epoch": 0.21550429184549355, "grad_norm": 0.353515625, "learning_rate": 4.997749906135679e-06, "loss": 2.3351, "step": 4017 }, { "epoch": 0.21555793991416308, "grad_norm": 0.408203125, "learning_rate": 4.997746219523069e-06, "loss": 2.102, "step": 4018 }, { "epoch": 0.21561158798283261, "grad_norm": 0.298828125, "learning_rate": 4.997742529894171e-06, "loss": 2.1458, "step": 4019 }, { "epoch": 0.21566523605150215, "grad_norm": 0.376953125, "learning_rate": 4.997738837248991e-06, "loss": 2.2441, "step": 4020 }, { "epoch": 0.21571888412017168, "grad_norm": 0.416015625, "learning_rate": 4.997735141587532e-06, "loss": 2.2863, "step": 4021 }, { "epoch": 0.2157725321888412, "grad_norm": 0.33984375, "learning_rate": 4.997731442909798e-06, "loss": 2.5768, "step": 4022 }, { "epoch": 0.21582618025751074, "grad_norm": 0.3515625, "learning_rate": 4.9977277412157945e-06, "loss": 2.4309, "step": 4023 }, { "epoch": 0.21587982832618025, "grad_norm": 0.4453125, "learning_rate": 4.997724036505525e-06, "loss": 2.2762, "step": 4024 }, { "epoch": 0.21593347639484978, "grad_norm": 0.322265625, "learning_rate": 4.997720328778996e-06, "loss": 2.1867, "step": 4025 }, { "epoch": 0.2159871244635193, "grad_norm": 0.435546875, "learning_rate": 4.99771661803621e-06, "loss": 2.3371, "step": 4026 }, { "epoch": 0.21604077253218884, "grad_norm": 0.37890625, "learning_rate": 4.997712904277172e-06, "loss": 2.1668, "step": 4027 }, { "epoch": 0.21609442060085837, "grad_norm": 0.416015625, "learning_rate": 4.9977091875018865e-06, "loss": 2.126, "step": 4028 }, { "epoch": 0.2161480686695279, "grad_norm": 0.390625, "learning_rate": 4.9977054677103575e-06, "loss": 2.3522, "step": 4029 }, { "epoch": 0.21620171673819744, "grad_norm": 0.3125, "learning_rate": 4.997701744902591e-06, "loss": 2.2453, "step": 4030 }, { "epoch": 0.21625536480686694, "grad_norm": 0.44140625, "learning_rate": 4.9976980190785896e-06, "loss": 2.3728, "step": 4031 }, { "epoch": 0.21630901287553647, "grad_norm": 0.34765625, "learning_rate": 4.99769429023836e-06, "loss": 2.468, "step": 4032 }, { "epoch": 0.216362660944206, "grad_norm": 0.34765625, "learning_rate": 4.997690558381904e-06, "loss": 2.1287, "step": 4033 }, { "epoch": 0.21641630901287554, "grad_norm": 0.326171875, "learning_rate": 4.997686823509228e-06, "loss": 2.1396, "step": 4034 }, { "epoch": 0.21646995708154507, "grad_norm": 0.5078125, "learning_rate": 4.997683085620336e-06, "loss": 2.2958, "step": 4035 }, { "epoch": 0.2165236051502146, "grad_norm": 0.388671875, "learning_rate": 4.997679344715233e-06, "loss": 2.6031, "step": 4036 }, { "epoch": 0.21657725321888413, "grad_norm": 0.40625, "learning_rate": 4.997675600793922e-06, "loss": 1.9906, "step": 4037 }, { "epoch": 0.21663090128755363, "grad_norm": 0.3671875, "learning_rate": 4.9976718538564095e-06, "loss": 2.3578, "step": 4038 }, { "epoch": 0.21668454935622317, "grad_norm": 0.431640625, "learning_rate": 4.9976681039026996e-06, "loss": 2.3561, "step": 4039 }, { "epoch": 0.2167381974248927, "grad_norm": 0.33984375, "learning_rate": 4.997664350932795e-06, "loss": 2.258, "step": 4040 }, { "epoch": 0.21679184549356223, "grad_norm": 0.50390625, "learning_rate": 4.997660594946703e-06, "loss": 2.3824, "step": 4041 }, { "epoch": 0.21684549356223176, "grad_norm": 0.46484375, "learning_rate": 4.9976568359444255e-06, "loss": 2.5739, "step": 4042 }, { "epoch": 0.2168991416309013, "grad_norm": 0.41796875, "learning_rate": 4.997653073925969e-06, "loss": 2.4491, "step": 4043 }, { "epoch": 0.21695278969957082, "grad_norm": 0.4140625, "learning_rate": 4.997649308891336e-06, "loss": 2.1466, "step": 4044 }, { "epoch": 0.21700643776824036, "grad_norm": 0.37109375, "learning_rate": 4.9976455408405335e-06, "loss": 2.2835, "step": 4045 }, { "epoch": 0.21706008583690986, "grad_norm": 0.298828125, "learning_rate": 4.997641769773564e-06, "loss": 2.3029, "step": 4046 }, { "epoch": 0.2171137339055794, "grad_norm": 0.34375, "learning_rate": 4.9976379956904344e-06, "loss": 2.1545, "step": 4047 }, { "epoch": 0.21716738197424892, "grad_norm": 0.361328125, "learning_rate": 4.997634218591147e-06, "loss": 2.2468, "step": 4048 }, { "epoch": 0.21722103004291846, "grad_norm": 0.609375, "learning_rate": 4.997630438475707e-06, "loss": 2.3443, "step": 4049 }, { "epoch": 0.217274678111588, "grad_norm": 0.365234375, "learning_rate": 4.99762665534412e-06, "loss": 2.2253, "step": 4050 }, { "epoch": 0.21732832618025752, "grad_norm": 0.369140625, "learning_rate": 4.997622869196389e-06, "loss": 2.3901, "step": 4051 }, { "epoch": 0.21738197424892705, "grad_norm": 0.330078125, "learning_rate": 4.9976190800325184e-06, "loss": 2.0571, "step": 4052 }, { "epoch": 0.21743562231759656, "grad_norm": 0.34765625, "learning_rate": 4.997615287852514e-06, "loss": 2.5846, "step": 4053 }, { "epoch": 0.2174892703862661, "grad_norm": 0.412109375, "learning_rate": 4.997611492656381e-06, "loss": 2.4826, "step": 4054 }, { "epoch": 0.21754291845493562, "grad_norm": 0.95703125, "learning_rate": 4.9976076944441235e-06, "loss": 2.2685, "step": 4055 }, { "epoch": 0.21759656652360515, "grad_norm": 0.390625, "learning_rate": 4.997603893215744e-06, "loss": 2.1656, "step": 4056 }, { "epoch": 0.21765021459227468, "grad_norm": 0.373046875, "learning_rate": 4.997600088971249e-06, "loss": 2.7254, "step": 4057 }, { "epoch": 0.21770386266094421, "grad_norm": 0.345703125, "learning_rate": 4.9975962817106435e-06, "loss": 2.4529, "step": 4058 }, { "epoch": 0.21775751072961375, "grad_norm": 0.43359375, "learning_rate": 4.9975924714339304e-06, "loss": 1.562, "step": 4059 }, { "epoch": 0.21781115879828325, "grad_norm": 0.328125, "learning_rate": 4.997588658141116e-06, "loss": 2.209, "step": 4060 }, { "epoch": 0.21786480686695278, "grad_norm": 0.4296875, "learning_rate": 4.997584841832204e-06, "loss": 2.5256, "step": 4061 }, { "epoch": 0.2179184549356223, "grad_norm": 0.345703125, "learning_rate": 4.997581022507199e-06, "loss": 2.2974, "step": 4062 }, { "epoch": 0.21797210300429185, "grad_norm": 0.345703125, "learning_rate": 4.997577200166106e-06, "loss": 2.1465, "step": 4063 }, { "epoch": 0.21802575107296138, "grad_norm": 0.337890625, "learning_rate": 4.99757337480893e-06, "loss": 1.8446, "step": 4064 }, { "epoch": 0.2180793991416309, "grad_norm": 0.3515625, "learning_rate": 4.997569546435674e-06, "loss": 2.2883, "step": 4065 }, { "epoch": 0.21813304721030044, "grad_norm": 0.474609375, "learning_rate": 4.997565715046344e-06, "loss": 2.3877, "step": 4066 }, { "epoch": 0.21818669527896994, "grad_norm": 0.3203125, "learning_rate": 4.997561880640944e-06, "loss": 2.4532, "step": 4067 }, { "epoch": 0.21824034334763948, "grad_norm": 0.388671875, "learning_rate": 4.9975580432194795e-06, "loss": 2.1314, "step": 4068 }, { "epoch": 0.218293991416309, "grad_norm": 0.8125, "learning_rate": 4.997554202781954e-06, "loss": 2.7232, "step": 4069 }, { "epoch": 0.21834763948497854, "grad_norm": 0.357421875, "learning_rate": 4.997550359328373e-06, "loss": 2.2616, "step": 4070 }, { "epoch": 0.21840128755364807, "grad_norm": 0.349609375, "learning_rate": 4.997546512858741e-06, "loss": 2.1862, "step": 4071 }, { "epoch": 0.2184549356223176, "grad_norm": 0.36328125, "learning_rate": 4.9975426633730616e-06, "loss": 2.2073, "step": 4072 }, { "epoch": 0.21850858369098713, "grad_norm": 0.3359375, "learning_rate": 4.997538810871342e-06, "loss": 2.0765, "step": 4073 }, { "epoch": 0.21856223175965664, "grad_norm": 0.314453125, "learning_rate": 4.997534955353584e-06, "loss": 2.2481, "step": 4074 }, { "epoch": 0.21861587982832617, "grad_norm": 0.318359375, "learning_rate": 4.997531096819793e-06, "loss": 2.4375, "step": 4075 }, { "epoch": 0.2186695278969957, "grad_norm": 0.28515625, "learning_rate": 4.997527235269975e-06, "loss": 1.93, "step": 4076 }, { "epoch": 0.21872317596566523, "grad_norm": 0.33984375, "learning_rate": 4.997523370704133e-06, "loss": 2.3142, "step": 4077 }, { "epoch": 0.21877682403433477, "grad_norm": 0.294921875, "learning_rate": 4.9975195031222736e-06, "loss": 1.9226, "step": 4078 }, { "epoch": 0.2188304721030043, "grad_norm": 0.302734375, "learning_rate": 4.997515632524399e-06, "loss": 2.2361, "step": 4079 }, { "epoch": 0.21888412017167383, "grad_norm": 0.322265625, "learning_rate": 4.997511758910516e-06, "loss": 2.4612, "step": 4080 }, { "epoch": 0.21893776824034336, "grad_norm": 0.392578125, "learning_rate": 4.9975078822806275e-06, "loss": 2.4059, "step": 4081 }, { "epoch": 0.21899141630901287, "grad_norm": 0.359375, "learning_rate": 4.99750400263474e-06, "loss": 2.4502, "step": 4082 }, { "epoch": 0.2190450643776824, "grad_norm": 0.484375, "learning_rate": 4.9975001199728564e-06, "loss": 2.518, "step": 4083 }, { "epoch": 0.21909871244635193, "grad_norm": 0.3359375, "learning_rate": 4.997496234294984e-06, "loss": 2.3763, "step": 4084 }, { "epoch": 0.21915236051502146, "grad_norm": 0.45703125, "learning_rate": 4.9974923456011245e-06, "loss": 2.4759, "step": 4085 }, { "epoch": 0.219206008583691, "grad_norm": 0.62109375, "learning_rate": 4.997488453891284e-06, "loss": 2.4035, "step": 4086 }, { "epoch": 0.21925965665236052, "grad_norm": 0.3125, "learning_rate": 4.9974845591654675e-06, "loss": 2.2103, "step": 4087 }, { "epoch": 0.21931330472103006, "grad_norm": 0.412109375, "learning_rate": 4.997480661423678e-06, "loss": 2.2447, "step": 4088 }, { "epoch": 0.21936695278969956, "grad_norm": 0.291015625, "learning_rate": 4.997476760665923e-06, "loss": 1.8772, "step": 4089 }, { "epoch": 0.2194206008583691, "grad_norm": 0.48046875, "learning_rate": 4.997472856892205e-06, "loss": 2.334, "step": 4090 }, { "epoch": 0.21947424892703862, "grad_norm": 0.40234375, "learning_rate": 4.997468950102529e-06, "loss": 2.5466, "step": 4091 }, { "epoch": 0.21952789699570815, "grad_norm": 0.37109375, "learning_rate": 4.997465040296901e-06, "loss": 2.4838, "step": 4092 }, { "epoch": 0.2195815450643777, "grad_norm": 0.376953125, "learning_rate": 4.997461127475325e-06, "loss": 2.4476, "step": 4093 }, { "epoch": 0.21963519313304722, "grad_norm": 0.515625, "learning_rate": 4.9974572116378056e-06, "loss": 2.2645, "step": 4094 }, { "epoch": 0.21968884120171675, "grad_norm": 0.310546875, "learning_rate": 4.997453292784347e-06, "loss": 2.4131, "step": 4095 }, { "epoch": 0.21974248927038625, "grad_norm": 0.38671875, "learning_rate": 4.997449370914955e-06, "loss": 2.4566, "step": 4096 }, { "epoch": 0.21979613733905579, "grad_norm": 0.37109375, "learning_rate": 4.997445446029633e-06, "loss": 2.037, "step": 4097 }, { "epoch": 0.21984978540772532, "grad_norm": 0.427734375, "learning_rate": 4.997441518128387e-06, "loss": 2.3066, "step": 4098 }, { "epoch": 0.21990343347639485, "grad_norm": 0.400390625, "learning_rate": 4.997437587211221e-06, "loss": 2.4734, "step": 4099 }, { "epoch": 0.21995708154506438, "grad_norm": 0.3125, "learning_rate": 4.99743365327814e-06, "loss": 2.2844, "step": 4100 }, { "epoch": 0.2200107296137339, "grad_norm": 0.306640625, "learning_rate": 4.997429716329149e-06, "loss": 2.3598, "step": 4101 }, { "epoch": 0.22006437768240344, "grad_norm": 0.380859375, "learning_rate": 4.997425776364252e-06, "loss": 2.3107, "step": 4102 }, { "epoch": 0.22011802575107295, "grad_norm": 0.3515625, "learning_rate": 4.997421833383455e-06, "loss": 2.2064, "step": 4103 }, { "epoch": 0.22017167381974248, "grad_norm": 0.32421875, "learning_rate": 4.997417887386762e-06, "loss": 2.2864, "step": 4104 }, { "epoch": 0.220225321888412, "grad_norm": 0.3671875, "learning_rate": 4.997413938374177e-06, "loss": 2.2828, "step": 4105 }, { "epoch": 0.22027896995708154, "grad_norm": 0.34375, "learning_rate": 4.997409986345706e-06, "loss": 2.3734, "step": 4106 }, { "epoch": 0.22033261802575108, "grad_norm": 0.359375, "learning_rate": 4.997406031301354e-06, "loss": 1.9961, "step": 4107 }, { "epoch": 0.2203862660944206, "grad_norm": 0.3203125, "learning_rate": 4.997402073241124e-06, "loss": 2.2787, "step": 4108 }, { "epoch": 0.22043991416309014, "grad_norm": 0.466796875, "learning_rate": 4.997398112165023e-06, "loss": 2.4275, "step": 4109 }, { "epoch": 0.22049356223175964, "grad_norm": 0.30859375, "learning_rate": 4.997394148073053e-06, "loss": 2.1917, "step": 4110 }, { "epoch": 0.22054721030042918, "grad_norm": 0.86328125, "learning_rate": 4.997390180965222e-06, "loss": 2.3926, "step": 4111 }, { "epoch": 0.2206008583690987, "grad_norm": 0.34375, "learning_rate": 4.9973862108415335e-06, "loss": 2.445, "step": 4112 }, { "epoch": 0.22065450643776824, "grad_norm": 0.37890625, "learning_rate": 4.997382237701991e-06, "loss": 2.25, "step": 4113 }, { "epoch": 0.22070815450643777, "grad_norm": 0.3203125, "learning_rate": 4.9973782615466e-06, "loss": 2.0381, "step": 4114 }, { "epoch": 0.2207618025751073, "grad_norm": 0.39453125, "learning_rate": 4.997374282375367e-06, "loss": 2.389, "step": 4115 }, { "epoch": 0.22081545064377683, "grad_norm": 0.361328125, "learning_rate": 4.9973703001882955e-06, "loss": 2.2784, "step": 4116 }, { "epoch": 0.22086909871244637, "grad_norm": 0.3515625, "learning_rate": 4.997366314985389e-06, "loss": 2.166, "step": 4117 }, { "epoch": 0.22092274678111587, "grad_norm": 0.447265625, "learning_rate": 4.997362326766654e-06, "loss": 1.4802, "step": 4118 }, { "epoch": 0.2209763948497854, "grad_norm": 0.392578125, "learning_rate": 4.997358335532095e-06, "loss": 2.4287, "step": 4119 }, { "epoch": 0.22103004291845493, "grad_norm": 0.380859375, "learning_rate": 4.997354341281717e-06, "loss": 2.1453, "step": 4120 }, { "epoch": 0.22108369098712446, "grad_norm": 0.423828125, "learning_rate": 4.9973503440155245e-06, "loss": 2.2321, "step": 4121 }, { "epoch": 0.221137339055794, "grad_norm": 0.380859375, "learning_rate": 4.9973463437335226e-06, "loss": 2.4141, "step": 4122 }, { "epoch": 0.22119098712446353, "grad_norm": 0.357421875, "learning_rate": 4.997342340435715e-06, "loss": 2.3735, "step": 4123 }, { "epoch": 0.22124463519313306, "grad_norm": 0.40625, "learning_rate": 4.997338334122108e-06, "loss": 2.4722, "step": 4124 }, { "epoch": 0.22129828326180256, "grad_norm": 0.37109375, "learning_rate": 4.997334324792706e-06, "loss": 2.1414, "step": 4125 }, { "epoch": 0.2213519313304721, "grad_norm": 0.455078125, "learning_rate": 4.997330312447514e-06, "loss": 2.3678, "step": 4126 }, { "epoch": 0.22140557939914163, "grad_norm": 0.359375, "learning_rate": 4.997326297086536e-06, "loss": 2.4745, "step": 4127 }, { "epoch": 0.22145922746781116, "grad_norm": 0.2890625, "learning_rate": 4.997322278709777e-06, "loss": 2.2053, "step": 4128 }, { "epoch": 0.2215128755364807, "grad_norm": 0.4296875, "learning_rate": 4.997318257317243e-06, "loss": 2.3606, "step": 4129 }, { "epoch": 0.22156652360515022, "grad_norm": 0.5, "learning_rate": 4.997314232908938e-06, "loss": 2.3505, "step": 4130 }, { "epoch": 0.22162017167381975, "grad_norm": 0.37109375, "learning_rate": 4.997310205484866e-06, "loss": 2.3237, "step": 4131 }, { "epoch": 0.22167381974248926, "grad_norm": 0.341796875, "learning_rate": 4.997306175045034e-06, "loss": 2.1099, "step": 4132 }, { "epoch": 0.2217274678111588, "grad_norm": 0.3359375, "learning_rate": 4.997302141589445e-06, "loss": 2.2394, "step": 4133 }, { "epoch": 0.22178111587982832, "grad_norm": 0.330078125, "learning_rate": 4.997298105118106e-06, "loss": 2.2243, "step": 4134 }, { "epoch": 0.22183476394849785, "grad_norm": 0.47265625, "learning_rate": 4.997294065631019e-06, "loss": 2.2589, "step": 4135 }, { "epoch": 0.22188841201716739, "grad_norm": 0.5234375, "learning_rate": 4.99729002312819e-06, "loss": 2.3076, "step": 4136 }, { "epoch": 0.22194206008583692, "grad_norm": 0.333984375, "learning_rate": 4.997285977609625e-06, "loss": 2.4285, "step": 4137 }, { "epoch": 0.22199570815450645, "grad_norm": 0.357421875, "learning_rate": 4.997281929075328e-06, "loss": 2.4437, "step": 4138 }, { "epoch": 0.22204935622317595, "grad_norm": 1.203125, "learning_rate": 4.997277877525304e-06, "loss": 2.49, "step": 4139 }, { "epoch": 0.22210300429184548, "grad_norm": 0.375, "learning_rate": 4.997273822959558e-06, "loss": 2.196, "step": 4140 }, { "epoch": 0.22215665236051502, "grad_norm": 0.3359375, "learning_rate": 4.997269765378095e-06, "loss": 2.1052, "step": 4141 }, { "epoch": 0.22221030042918455, "grad_norm": 0.37890625, "learning_rate": 4.997265704780919e-06, "loss": 2.2092, "step": 4142 }, { "epoch": 0.22226394849785408, "grad_norm": 0.33203125, "learning_rate": 4.997261641168037e-06, "loss": 2.3784, "step": 4143 }, { "epoch": 0.2223175965665236, "grad_norm": 0.50390625, "learning_rate": 4.99725757453945e-06, "loss": 2.4013, "step": 4144 }, { "epoch": 0.22237124463519314, "grad_norm": 0.337890625, "learning_rate": 4.997253504895168e-06, "loss": 2.1885, "step": 4145 }, { "epoch": 0.22242489270386265, "grad_norm": 0.3828125, "learning_rate": 4.997249432235191e-06, "loss": 2.4027, "step": 4146 }, { "epoch": 0.22247854077253218, "grad_norm": 0.29296875, "learning_rate": 4.997245356559528e-06, "loss": 2.4625, "step": 4147 }, { "epoch": 0.2225321888412017, "grad_norm": 0.49609375, "learning_rate": 4.997241277868181e-06, "loss": 2.4189, "step": 4148 }, { "epoch": 0.22258583690987124, "grad_norm": 0.33984375, "learning_rate": 4.9972371961611565e-06, "loss": 2.1018, "step": 4149 }, { "epoch": 0.22263948497854077, "grad_norm": 0.46875, "learning_rate": 4.9972331114384595e-06, "loss": 2.3631, "step": 4150 }, { "epoch": 0.2226931330472103, "grad_norm": 0.35546875, "learning_rate": 4.997229023700094e-06, "loss": 2.1902, "step": 4151 }, { "epoch": 0.22274678111587984, "grad_norm": 0.3515625, "learning_rate": 4.997224932946065e-06, "loss": 2.4781, "step": 4152 }, { "epoch": 0.22280042918454937, "grad_norm": 0.3671875, "learning_rate": 4.997220839176379e-06, "loss": 2.25, "step": 4153 }, { "epoch": 0.22285407725321887, "grad_norm": 0.4140625, "learning_rate": 4.997216742391038e-06, "loss": 2.0111, "step": 4154 }, { "epoch": 0.2229077253218884, "grad_norm": 0.423828125, "learning_rate": 4.99721264259005e-06, "loss": 2.5159, "step": 4155 }, { "epoch": 0.22296137339055794, "grad_norm": 0.49609375, "learning_rate": 4.997208539773418e-06, "loss": 2.3582, "step": 4156 }, { "epoch": 0.22301502145922747, "grad_norm": 0.375, "learning_rate": 4.997204433941148e-06, "loss": 2.509, "step": 4157 }, { "epoch": 0.223068669527897, "grad_norm": 0.396484375, "learning_rate": 4.997200325093244e-06, "loss": 2.416, "step": 4158 }, { "epoch": 0.22312231759656653, "grad_norm": 0.412109375, "learning_rate": 4.997196213229713e-06, "loss": 1.8608, "step": 4159 }, { "epoch": 0.22317596566523606, "grad_norm": 0.306640625, "learning_rate": 4.9971920983505566e-06, "loss": 2.1949, "step": 4160 }, { "epoch": 0.22322961373390557, "grad_norm": 1.5703125, "learning_rate": 4.997187980455783e-06, "loss": 2.255, "step": 4161 }, { "epoch": 0.2232832618025751, "grad_norm": 0.50390625, "learning_rate": 4.9971838595453945e-06, "loss": 2.293, "step": 4162 }, { "epoch": 0.22333690987124463, "grad_norm": 0.451171875, "learning_rate": 4.997179735619399e-06, "loss": 2.4587, "step": 4163 }, { "epoch": 0.22339055793991416, "grad_norm": 0.353515625, "learning_rate": 4.997175608677799e-06, "loss": 2.5435, "step": 4164 }, { "epoch": 0.2234442060085837, "grad_norm": 0.37109375, "learning_rate": 4.997171478720601e-06, "loss": 2.3422, "step": 4165 }, { "epoch": 0.22349785407725323, "grad_norm": 0.421875, "learning_rate": 4.997167345747809e-06, "loss": 2.1909, "step": 4166 }, { "epoch": 0.22355150214592276, "grad_norm": 0.30859375, "learning_rate": 4.997163209759428e-06, "loss": 2.2839, "step": 4167 }, { "epoch": 0.22360515021459226, "grad_norm": 0.384765625, "learning_rate": 4.997159070755464e-06, "loss": 2.201, "step": 4168 }, { "epoch": 0.2236587982832618, "grad_norm": 4.53125, "learning_rate": 4.99715492873592e-06, "loss": 2.4416, "step": 4169 }, { "epoch": 0.22371244635193133, "grad_norm": 0.35546875, "learning_rate": 4.997150783700804e-06, "loss": 2.2467, "step": 4170 }, { "epoch": 0.22376609442060086, "grad_norm": 1.0703125, "learning_rate": 4.997146635650119e-06, "loss": 2.3992, "step": 4171 }, { "epoch": 0.2238197424892704, "grad_norm": 0.419921875, "learning_rate": 4.9971424845838695e-06, "loss": 2.1445, "step": 4172 }, { "epoch": 0.22387339055793992, "grad_norm": 0.404296875, "learning_rate": 4.997138330502062e-06, "loss": 2.114, "step": 4173 }, { "epoch": 0.22392703862660945, "grad_norm": 0.55859375, "learning_rate": 4.997134173404701e-06, "loss": 2.3427, "step": 4174 }, { "epoch": 0.22398068669527896, "grad_norm": 0.328125, "learning_rate": 4.997130013291791e-06, "loss": 2.0437, "step": 4175 }, { "epoch": 0.2240343347639485, "grad_norm": 0.443359375, "learning_rate": 4.997125850163338e-06, "loss": 2.4732, "step": 4176 }, { "epoch": 0.22408798283261802, "grad_norm": 0.494140625, "learning_rate": 4.997121684019346e-06, "loss": 2.1482, "step": 4177 }, { "epoch": 0.22414163090128755, "grad_norm": 0.34765625, "learning_rate": 4.99711751485982e-06, "loss": 2.3567, "step": 4178 }, { "epoch": 0.22419527896995708, "grad_norm": 0.294921875, "learning_rate": 4.997113342684766e-06, "loss": 2.1047, "step": 4179 }, { "epoch": 0.22424892703862662, "grad_norm": 0.373046875, "learning_rate": 4.997109167494189e-06, "loss": 2.2717, "step": 4180 }, { "epoch": 0.22430257510729615, "grad_norm": 0.3359375, "learning_rate": 4.997104989288094e-06, "loss": 2.3135, "step": 4181 }, { "epoch": 0.22435622317596565, "grad_norm": 0.421875, "learning_rate": 4.997100808066485e-06, "loss": 2.3569, "step": 4182 }, { "epoch": 0.22440987124463518, "grad_norm": 0.3359375, "learning_rate": 4.997096623829367e-06, "loss": 2.2812, "step": 4183 }, { "epoch": 0.22446351931330472, "grad_norm": 0.326171875, "learning_rate": 4.997092436576747e-06, "loss": 2.2852, "step": 4184 }, { "epoch": 0.22451716738197425, "grad_norm": 1.1171875, "learning_rate": 4.997088246308628e-06, "loss": 2.2118, "step": 4185 }, { "epoch": 0.22457081545064378, "grad_norm": 0.376953125, "learning_rate": 4.997084053025017e-06, "loss": 2.3466, "step": 4186 }, { "epoch": 0.2246244635193133, "grad_norm": 0.310546875, "learning_rate": 4.997079856725916e-06, "loss": 2.2519, "step": 4187 }, { "epoch": 0.22467811158798284, "grad_norm": 0.6640625, "learning_rate": 4.997075657411334e-06, "loss": 2.2376, "step": 4188 }, { "epoch": 0.22473175965665235, "grad_norm": 0.482421875, "learning_rate": 4.997071455081272e-06, "loss": 2.3722, "step": 4189 }, { "epoch": 0.22478540772532188, "grad_norm": 0.306640625, "learning_rate": 4.997067249735739e-06, "loss": 2.3682, "step": 4190 }, { "epoch": 0.2248390557939914, "grad_norm": 0.3203125, "learning_rate": 4.997063041374738e-06, "loss": 2.3158, "step": 4191 }, { "epoch": 0.22489270386266094, "grad_norm": 0.3046875, "learning_rate": 4.997058829998273e-06, "loss": 2.2059, "step": 4192 }, { "epoch": 0.22494635193133047, "grad_norm": 0.39453125, "learning_rate": 4.997054615606351e-06, "loss": 2.2753, "step": 4193 }, { "epoch": 0.225, "grad_norm": 0.33203125, "learning_rate": 4.997050398198977e-06, "loss": 2.3583, "step": 4194 }, { "epoch": 0.22505364806866954, "grad_norm": 0.384765625, "learning_rate": 4.997046177776156e-06, "loss": 1.6972, "step": 4195 }, { "epoch": 0.22510729613733907, "grad_norm": 0.349609375, "learning_rate": 4.997041954337891e-06, "loss": 2.3159, "step": 4196 }, { "epoch": 0.22516094420600857, "grad_norm": 0.400390625, "learning_rate": 4.9970377278841905e-06, "loss": 2.3623, "step": 4197 }, { "epoch": 0.2252145922746781, "grad_norm": 0.3828125, "learning_rate": 4.997033498415057e-06, "loss": 2.429, "step": 4198 }, { "epoch": 0.22526824034334764, "grad_norm": 0.796875, "learning_rate": 4.9970292659304965e-06, "loss": 2.2387, "step": 4199 }, { "epoch": 0.22532188841201717, "grad_norm": 0.3046875, "learning_rate": 4.997025030430515e-06, "loss": 2.0988, "step": 4200 }, { "epoch": 0.2253755364806867, "grad_norm": 0.341796875, "learning_rate": 4.997020791915115e-06, "loss": 2.1337, "step": 4201 }, { "epoch": 0.22542918454935623, "grad_norm": 0.3671875, "learning_rate": 4.997016550384305e-06, "loss": 2.2575, "step": 4202 }, { "epoch": 0.22548283261802576, "grad_norm": 0.369140625, "learning_rate": 4.997012305838087e-06, "loss": 2.342, "step": 4203 }, { "epoch": 0.22553648068669527, "grad_norm": 0.365234375, "learning_rate": 4.997008058276469e-06, "loss": 2.5416, "step": 4204 }, { "epoch": 0.2255901287553648, "grad_norm": 0.359375, "learning_rate": 4.997003807699454e-06, "loss": 2.1543, "step": 4205 }, { "epoch": 0.22564377682403433, "grad_norm": 0.373046875, "learning_rate": 4.9969995541070485e-06, "loss": 2.4598, "step": 4206 }, { "epoch": 0.22569742489270386, "grad_norm": 0.353515625, "learning_rate": 4.996995297499256e-06, "loss": 2.1357, "step": 4207 }, { "epoch": 0.2257510729613734, "grad_norm": 0.314453125, "learning_rate": 4.996991037876083e-06, "loss": 1.5575, "step": 4208 }, { "epoch": 0.22580472103004293, "grad_norm": 0.283203125, "learning_rate": 4.996986775237535e-06, "loss": 2.352, "step": 4209 }, { "epoch": 0.22585836909871246, "grad_norm": 0.408203125, "learning_rate": 4.996982509583615e-06, "loss": 2.2336, "step": 4210 }, { "epoch": 0.22591201716738196, "grad_norm": 0.37109375, "learning_rate": 4.996978240914329e-06, "loss": 2.4904, "step": 4211 }, { "epoch": 0.2259656652360515, "grad_norm": 0.35546875, "learning_rate": 4.9969739692296845e-06, "loss": 1.9298, "step": 4212 }, { "epoch": 0.22601931330472103, "grad_norm": 0.447265625, "learning_rate": 4.996969694529685e-06, "loss": 2.0481, "step": 4213 }, { "epoch": 0.22607296137339056, "grad_norm": 0.357421875, "learning_rate": 4.996965416814335e-06, "loss": 2.2185, "step": 4214 }, { "epoch": 0.2261266094420601, "grad_norm": 0.482421875, "learning_rate": 4.9969611360836405e-06, "loss": 2.3952, "step": 4215 }, { "epoch": 0.22618025751072962, "grad_norm": 0.314453125, "learning_rate": 4.996956852337606e-06, "loss": 2.407, "step": 4216 }, { "epoch": 0.22623390557939915, "grad_norm": 0.384765625, "learning_rate": 4.996952565576236e-06, "loss": 2.3033, "step": 4217 }, { "epoch": 0.22628755364806866, "grad_norm": 0.34765625, "learning_rate": 4.996948275799538e-06, "loss": 1.9608, "step": 4218 }, { "epoch": 0.2263412017167382, "grad_norm": 24.375, "learning_rate": 4.996943983007515e-06, "loss": 2.5074, "step": 4219 }, { "epoch": 0.22639484978540772, "grad_norm": 0.380859375, "learning_rate": 4.996939687200174e-06, "loss": 2.3027, "step": 4220 }, { "epoch": 0.22644849785407725, "grad_norm": 0.353515625, "learning_rate": 4.99693538837752e-06, "loss": 2.1501, "step": 4221 }, { "epoch": 0.22650214592274678, "grad_norm": 0.30078125, "learning_rate": 4.996931086539556e-06, "loss": 2.233, "step": 4222 }, { "epoch": 0.22655579399141632, "grad_norm": 0.421875, "learning_rate": 4.996926781686289e-06, "loss": 2.155, "step": 4223 }, { "epoch": 0.22660944206008585, "grad_norm": 0.34375, "learning_rate": 4.996922473817724e-06, "loss": 2.2284, "step": 4224 }, { "epoch": 0.22666309012875535, "grad_norm": 0.353515625, "learning_rate": 4.996918162933866e-06, "loss": 2.1372, "step": 4225 }, { "epoch": 0.22671673819742488, "grad_norm": 0.45703125, "learning_rate": 4.99691384903472e-06, "loss": 1.8424, "step": 4226 }, { "epoch": 0.22677038626609441, "grad_norm": 0.388671875, "learning_rate": 4.996909532120292e-06, "loss": 1.9846, "step": 4227 }, { "epoch": 0.22682403433476395, "grad_norm": 0.423828125, "learning_rate": 4.996905212190587e-06, "loss": 1.6471, "step": 4228 }, { "epoch": 0.22687768240343348, "grad_norm": 0.373046875, "learning_rate": 4.99690088924561e-06, "loss": 2.2717, "step": 4229 }, { "epoch": 0.226931330472103, "grad_norm": 0.357421875, "learning_rate": 4.996896563285365e-06, "loss": 2.4051, "step": 4230 }, { "epoch": 0.22698497854077254, "grad_norm": 0.37109375, "learning_rate": 4.996892234309859e-06, "loss": 2.343, "step": 4231 }, { "epoch": 0.22703862660944207, "grad_norm": 0.345703125, "learning_rate": 4.996887902319097e-06, "loss": 1.8322, "step": 4232 }, { "epoch": 0.22709227467811158, "grad_norm": 0.87890625, "learning_rate": 4.996883567313083e-06, "loss": 2.1477, "step": 4233 }, { "epoch": 0.2271459227467811, "grad_norm": 0.34375, "learning_rate": 4.996879229291823e-06, "loss": 2.3502, "step": 4234 }, { "epoch": 0.22719957081545064, "grad_norm": 0.46484375, "learning_rate": 4.996874888255322e-06, "loss": 2.3956, "step": 4235 }, { "epoch": 0.22725321888412017, "grad_norm": 0.431640625, "learning_rate": 4.996870544203586e-06, "loss": 2.4711, "step": 4236 }, { "epoch": 0.2273068669527897, "grad_norm": 0.62109375, "learning_rate": 4.996866197136621e-06, "loss": 2.2162, "step": 4237 }, { "epoch": 0.22736051502145924, "grad_norm": 0.55078125, "learning_rate": 4.996861847054429e-06, "loss": 2.305, "step": 4238 }, { "epoch": 0.22741416309012877, "grad_norm": 0.376953125, "learning_rate": 4.996857493957019e-06, "loss": 2.3068, "step": 4239 }, { "epoch": 0.22746781115879827, "grad_norm": 0.484375, "learning_rate": 4.996853137844393e-06, "loss": 2.3764, "step": 4240 }, { "epoch": 0.2275214592274678, "grad_norm": 0.47265625, "learning_rate": 4.996848778716558e-06, "loss": 2.2387, "step": 4241 }, { "epoch": 0.22757510729613734, "grad_norm": 0.40234375, "learning_rate": 4.9968444165735204e-06, "loss": 2.3558, "step": 4242 }, { "epoch": 0.22762875536480687, "grad_norm": 0.375, "learning_rate": 4.996840051415284e-06, "loss": 2.2759, "step": 4243 }, { "epoch": 0.2276824034334764, "grad_norm": 0.59375, "learning_rate": 4.996835683241853e-06, "loss": 2.1816, "step": 4244 }, { "epoch": 0.22773605150214593, "grad_norm": 0.470703125, "learning_rate": 4.996831312053234e-06, "loss": 1.4201, "step": 4245 }, { "epoch": 0.22778969957081546, "grad_norm": 0.3671875, "learning_rate": 4.9968269378494325e-06, "loss": 2.026, "step": 4246 }, { "epoch": 0.22784334763948497, "grad_norm": 0.466796875, "learning_rate": 4.996822560630454e-06, "loss": 2.2255, "step": 4247 }, { "epoch": 0.2278969957081545, "grad_norm": 0.341796875, "learning_rate": 4.996818180396303e-06, "loss": 2.3907, "step": 4248 }, { "epoch": 0.22795064377682403, "grad_norm": 0.365234375, "learning_rate": 4.996813797146984e-06, "loss": 2.4678, "step": 4249 }, { "epoch": 0.22800429184549356, "grad_norm": 0.439453125, "learning_rate": 4.996809410882504e-06, "loss": 2.1182, "step": 4250 }, { "epoch": 0.2280579399141631, "grad_norm": 0.396484375, "learning_rate": 4.996805021602868e-06, "loss": 2.4954, "step": 4251 }, { "epoch": 0.22811158798283263, "grad_norm": 0.34375, "learning_rate": 4.996800629308081e-06, "loss": 2.3289, "step": 4252 }, { "epoch": 0.22816523605150216, "grad_norm": 0.484375, "learning_rate": 4.9967962339981465e-06, "loss": 2.0933, "step": 4253 }, { "epoch": 0.22821888412017166, "grad_norm": 0.423828125, "learning_rate": 4.996791835673073e-06, "loss": 2.5717, "step": 4254 }, { "epoch": 0.2282725321888412, "grad_norm": 0.361328125, "learning_rate": 4.996787434332865e-06, "loss": 2.3071, "step": 4255 }, { "epoch": 0.22832618025751072, "grad_norm": 0.41015625, "learning_rate": 4.9967830299775255e-06, "loss": 2.1496, "step": 4256 }, { "epoch": 0.22837982832618026, "grad_norm": 0.421875, "learning_rate": 4.9967786226070625e-06, "loss": 2.4181, "step": 4257 }, { "epoch": 0.2284334763948498, "grad_norm": 0.6015625, "learning_rate": 4.99677421222148e-06, "loss": 2.0865, "step": 4258 }, { "epoch": 0.22848712446351932, "grad_norm": 0.357421875, "learning_rate": 4.996769798820783e-06, "loss": 2.1846, "step": 4259 }, { "epoch": 0.22854077253218885, "grad_norm": 0.4609375, "learning_rate": 4.996765382404978e-06, "loss": 2.5013, "step": 4260 }, { "epoch": 0.22859442060085836, "grad_norm": 0.361328125, "learning_rate": 4.99676096297407e-06, "loss": 2.5358, "step": 4261 }, { "epoch": 0.2286480686695279, "grad_norm": 0.36328125, "learning_rate": 4.996756540528064e-06, "loss": 2.4987, "step": 4262 }, { "epoch": 0.22870171673819742, "grad_norm": 0.38671875, "learning_rate": 4.996752115066965e-06, "loss": 2.3572, "step": 4263 }, { "epoch": 0.22875536480686695, "grad_norm": 0.37890625, "learning_rate": 4.9967476865907796e-06, "loss": 2.1733, "step": 4264 }, { "epoch": 0.22880901287553648, "grad_norm": 0.38671875, "learning_rate": 4.996743255099511e-06, "loss": 2.1379, "step": 4265 }, { "epoch": 0.22886266094420601, "grad_norm": 0.359375, "learning_rate": 4.996738820593167e-06, "loss": 2.376, "step": 4266 }, { "epoch": 0.22891630901287555, "grad_norm": 0.8125, "learning_rate": 4.996734383071751e-06, "loss": 2.3207, "step": 4267 }, { "epoch": 0.22896995708154508, "grad_norm": 9.6875, "learning_rate": 4.99672994253527e-06, "loss": 2.5585, "step": 4268 }, { "epoch": 0.22902360515021458, "grad_norm": 0.392578125, "learning_rate": 4.996725498983729e-06, "loss": 2.2412, "step": 4269 }, { "epoch": 0.2290772532188841, "grad_norm": 0.322265625, "learning_rate": 4.996721052417132e-06, "loss": 2.3119, "step": 4270 }, { "epoch": 0.22913090128755365, "grad_norm": 0.443359375, "learning_rate": 4.996716602835485e-06, "loss": 2.6014, "step": 4271 }, { "epoch": 0.22918454935622318, "grad_norm": 0.3828125, "learning_rate": 4.996712150238794e-06, "loss": 2.4704, "step": 4272 }, { "epoch": 0.2292381974248927, "grad_norm": 0.3984375, "learning_rate": 4.9967076946270645e-06, "loss": 2.4205, "step": 4273 }, { "epoch": 0.22929184549356224, "grad_norm": 0.58984375, "learning_rate": 4.996703236000301e-06, "loss": 1.6981, "step": 4274 }, { "epoch": 0.22934549356223177, "grad_norm": 0.353515625, "learning_rate": 4.99669877435851e-06, "loss": 2.2291, "step": 4275 }, { "epoch": 0.22939914163090128, "grad_norm": 0.345703125, "learning_rate": 4.996694309701695e-06, "loss": 2.2809, "step": 4276 }, { "epoch": 0.2294527896995708, "grad_norm": 0.6171875, "learning_rate": 4.996689842029864e-06, "loss": 2.3663, "step": 4277 }, { "epoch": 0.22950643776824034, "grad_norm": 0.275390625, "learning_rate": 4.99668537134302e-06, "loss": 1.8146, "step": 4278 }, { "epoch": 0.22956008583690987, "grad_norm": 0.353515625, "learning_rate": 4.99668089764117e-06, "loss": 2.4173, "step": 4279 }, { "epoch": 0.2296137339055794, "grad_norm": 0.3359375, "learning_rate": 4.996676420924318e-06, "loss": 2.1545, "step": 4280 }, { "epoch": 0.22966738197424894, "grad_norm": 0.380859375, "learning_rate": 4.996671941192471e-06, "loss": 2.2793, "step": 4281 }, { "epoch": 0.22972103004291847, "grad_norm": 0.421875, "learning_rate": 4.996667458445633e-06, "loss": 2.4141, "step": 4282 }, { "epoch": 0.22977467811158797, "grad_norm": 0.54296875, "learning_rate": 4.9966629726838104e-06, "loss": 2.2975, "step": 4283 }, { "epoch": 0.2298283261802575, "grad_norm": 0.294921875, "learning_rate": 4.996658483907008e-06, "loss": 2.0498, "step": 4284 }, { "epoch": 0.22988197424892703, "grad_norm": 0.359375, "learning_rate": 4.996653992115232e-06, "loss": 2.2445, "step": 4285 }, { "epoch": 0.22993562231759657, "grad_norm": 0.392578125, "learning_rate": 4.996649497308487e-06, "loss": 2.3789, "step": 4286 }, { "epoch": 0.2299892703862661, "grad_norm": 0.33203125, "learning_rate": 4.996644999486778e-06, "loss": 2.262, "step": 4287 }, { "epoch": 0.23004291845493563, "grad_norm": 0.353515625, "learning_rate": 4.996640498650111e-06, "loss": 2.3432, "step": 4288 }, { "epoch": 0.23009656652360516, "grad_norm": 0.40234375, "learning_rate": 4.996635994798493e-06, "loss": 2.2205, "step": 4289 }, { "epoch": 0.23015021459227467, "grad_norm": 0.390625, "learning_rate": 4.996631487931928e-06, "loss": 1.9318, "step": 4290 }, { "epoch": 0.2302038626609442, "grad_norm": 0.357421875, "learning_rate": 4.9966269780504205e-06, "loss": 2.3816, "step": 4291 }, { "epoch": 0.23025751072961373, "grad_norm": 0.34765625, "learning_rate": 4.996622465153977e-06, "loss": 2.3693, "step": 4292 }, { "epoch": 0.23031115879828326, "grad_norm": 0.6328125, "learning_rate": 4.996617949242603e-06, "loss": 2.3436, "step": 4293 }, { "epoch": 0.2303648068669528, "grad_norm": 0.369140625, "learning_rate": 4.996613430316304e-06, "loss": 2.4592, "step": 4294 }, { "epoch": 0.23041845493562232, "grad_norm": 0.369140625, "learning_rate": 4.996608908375085e-06, "loss": 2.6785, "step": 4295 }, { "epoch": 0.23047210300429186, "grad_norm": 0.53515625, "learning_rate": 4.996604383418952e-06, "loss": 2.2458, "step": 4296 }, { "epoch": 0.23052575107296136, "grad_norm": 0.34375, "learning_rate": 4.99659985544791e-06, "loss": 2.315, "step": 4297 }, { "epoch": 0.2305793991416309, "grad_norm": 0.3515625, "learning_rate": 4.996595324461965e-06, "loss": 2.408, "step": 4298 }, { "epoch": 0.23063304721030042, "grad_norm": 0.3359375, "learning_rate": 4.996590790461121e-06, "loss": 2.4442, "step": 4299 }, { "epoch": 0.23068669527896996, "grad_norm": 0.345703125, "learning_rate": 4.996586253445386e-06, "loss": 2.3645, "step": 4300 }, { "epoch": 0.2307403433476395, "grad_norm": 0.396484375, "learning_rate": 4.996581713414763e-06, "loss": 2.0742, "step": 4301 }, { "epoch": 0.23079399141630902, "grad_norm": 0.333984375, "learning_rate": 4.9965771703692595e-06, "loss": 2.3076, "step": 4302 }, { "epoch": 0.23084763948497855, "grad_norm": 0.359375, "learning_rate": 4.996572624308879e-06, "loss": 2.3194, "step": 4303 }, { "epoch": 0.23090128755364808, "grad_norm": 0.388671875, "learning_rate": 4.9965680752336295e-06, "loss": 2.4004, "step": 4304 }, { "epoch": 0.2309549356223176, "grad_norm": 0.3515625, "learning_rate": 4.996563523143514e-06, "loss": 2.1587, "step": 4305 }, { "epoch": 0.23100858369098712, "grad_norm": 0.3125, "learning_rate": 4.99655896803854e-06, "loss": 2.2278, "step": 4306 }, { "epoch": 0.23106223175965665, "grad_norm": 0.30078125, "learning_rate": 4.99655440991871e-06, "loss": 2.0133, "step": 4307 }, { "epoch": 0.23111587982832618, "grad_norm": 0.609375, "learning_rate": 4.996549848784034e-06, "loss": 1.75, "step": 4308 }, { "epoch": 0.2311695278969957, "grad_norm": 0.349609375, "learning_rate": 4.996545284634513e-06, "loss": 2.202, "step": 4309 }, { "epoch": 0.23122317596566525, "grad_norm": 0.37109375, "learning_rate": 4.9965407174701555e-06, "loss": 1.9887, "step": 4310 }, { "epoch": 0.23127682403433478, "grad_norm": 0.380859375, "learning_rate": 4.9965361472909654e-06, "loss": 2.2903, "step": 4311 }, { "epoch": 0.23133047210300428, "grad_norm": 0.357421875, "learning_rate": 4.99653157409695e-06, "loss": 2.4258, "step": 4312 }, { "epoch": 0.2313841201716738, "grad_norm": 0.361328125, "learning_rate": 4.996526997888114e-06, "loss": 2.1743, "step": 4313 }, { "epoch": 0.23143776824034334, "grad_norm": 0.33984375, "learning_rate": 4.996522418664461e-06, "loss": 2.1457, "step": 4314 }, { "epoch": 0.23149141630901288, "grad_norm": 0.37890625, "learning_rate": 4.996517836425999e-06, "loss": 2.3442, "step": 4315 }, { "epoch": 0.2315450643776824, "grad_norm": 0.4296875, "learning_rate": 4.996513251172733e-06, "loss": 2.2321, "step": 4316 }, { "epoch": 0.23159871244635194, "grad_norm": 0.419921875, "learning_rate": 4.996508662904667e-06, "loss": 2.241, "step": 4317 }, { "epoch": 0.23165236051502147, "grad_norm": 0.796875, "learning_rate": 4.996504071621809e-06, "loss": 2.4286, "step": 4318 }, { "epoch": 0.23170600858369098, "grad_norm": 0.5234375, "learning_rate": 4.996499477324164e-06, "loss": 2.53, "step": 4319 }, { "epoch": 0.2317596566523605, "grad_norm": 0.330078125, "learning_rate": 4.996494880011735e-06, "loss": 2.1664, "step": 4320 }, { "epoch": 0.23181330472103004, "grad_norm": 0.345703125, "learning_rate": 4.996490279684531e-06, "loss": 2.332, "step": 4321 }, { "epoch": 0.23186695278969957, "grad_norm": 0.427734375, "learning_rate": 4.996485676342555e-06, "loss": 2.4197, "step": 4322 }, { "epoch": 0.2319206008583691, "grad_norm": 0.310546875, "learning_rate": 4.996481069985814e-06, "loss": 2.3553, "step": 4323 }, { "epoch": 0.23197424892703863, "grad_norm": 0.359375, "learning_rate": 4.996476460614313e-06, "loss": 2.4276, "step": 4324 }, { "epoch": 0.23202789699570817, "grad_norm": 0.55078125, "learning_rate": 4.9964718482280576e-06, "loss": 2.3264, "step": 4325 }, { "epoch": 0.23208154506437767, "grad_norm": 0.52734375, "learning_rate": 4.996467232827053e-06, "loss": 2.1602, "step": 4326 }, { "epoch": 0.2321351931330472, "grad_norm": 0.6484375, "learning_rate": 4.996462614411306e-06, "loss": 2.6062, "step": 4327 }, { "epoch": 0.23218884120171673, "grad_norm": 0.310546875, "learning_rate": 4.996457992980821e-06, "loss": 2.1464, "step": 4328 }, { "epoch": 0.23224248927038627, "grad_norm": 0.37109375, "learning_rate": 4.996453368535605e-06, "loss": 2.3305, "step": 4329 }, { "epoch": 0.2322961373390558, "grad_norm": 0.396484375, "learning_rate": 4.996448741075661e-06, "loss": 2.1118, "step": 4330 }, { "epoch": 0.23234978540772533, "grad_norm": 0.3359375, "learning_rate": 4.996444110600997e-06, "loss": 2.2976, "step": 4331 }, { "epoch": 0.23240343347639486, "grad_norm": 0.5234375, "learning_rate": 4.996439477111618e-06, "loss": 2.485, "step": 4332 }, { "epoch": 0.23245708154506436, "grad_norm": 0.33984375, "learning_rate": 4.996434840607529e-06, "loss": 2.5021, "step": 4333 }, { "epoch": 0.2325107296137339, "grad_norm": 0.333984375, "learning_rate": 4.996430201088736e-06, "loss": 2.069, "step": 4334 }, { "epoch": 0.23256437768240343, "grad_norm": 0.34765625, "learning_rate": 4.9964255585552435e-06, "loss": 2.3059, "step": 4335 }, { "epoch": 0.23261802575107296, "grad_norm": 0.36328125, "learning_rate": 4.996420913007059e-06, "loss": 2.466, "step": 4336 }, { "epoch": 0.2326716738197425, "grad_norm": 0.388671875, "learning_rate": 4.996416264444187e-06, "loss": 2.2537, "step": 4337 }, { "epoch": 0.23272532188841202, "grad_norm": 0.384765625, "learning_rate": 4.996411612866634e-06, "loss": 2.4731, "step": 4338 }, { "epoch": 0.23277896995708156, "grad_norm": 0.373046875, "learning_rate": 4.996406958274405e-06, "loss": 2.2731, "step": 4339 }, { "epoch": 0.23283261802575106, "grad_norm": 0.419921875, "learning_rate": 4.996402300667505e-06, "loss": 2.1868, "step": 4340 }, { "epoch": 0.2328862660944206, "grad_norm": 0.384765625, "learning_rate": 4.9963976400459406e-06, "loss": 2.496, "step": 4341 }, { "epoch": 0.23293991416309012, "grad_norm": 0.3828125, "learning_rate": 4.996392976409717e-06, "loss": 2.2628, "step": 4342 }, { "epoch": 0.23299356223175965, "grad_norm": 0.361328125, "learning_rate": 4.996388309758839e-06, "loss": 2.4299, "step": 4343 }, { "epoch": 0.2330472103004292, "grad_norm": 0.390625, "learning_rate": 4.996383640093314e-06, "loss": 2.0676, "step": 4344 }, { "epoch": 0.23310085836909872, "grad_norm": 0.396484375, "learning_rate": 4.996378967413146e-06, "loss": 1.9596, "step": 4345 }, { "epoch": 0.23315450643776825, "grad_norm": 0.3671875, "learning_rate": 4.9963742917183425e-06, "loss": 2.1436, "step": 4346 }, { "epoch": 0.23320815450643778, "grad_norm": 0.384765625, "learning_rate": 4.996369613008907e-06, "loss": 2.3112, "step": 4347 }, { "epoch": 0.23326180257510729, "grad_norm": 0.4140625, "learning_rate": 4.996364931284847e-06, "loss": 2.1911, "step": 4348 }, { "epoch": 0.23331545064377682, "grad_norm": 0.326171875, "learning_rate": 4.996360246546167e-06, "loss": 2.3145, "step": 4349 }, { "epoch": 0.23336909871244635, "grad_norm": 0.40234375, "learning_rate": 4.996355558792874e-06, "loss": 2.0818, "step": 4350 }, { "epoch": 0.23342274678111588, "grad_norm": 0.77734375, "learning_rate": 4.996350868024971e-06, "loss": 2.3827, "step": 4351 }, { "epoch": 0.2334763948497854, "grad_norm": 0.455078125, "learning_rate": 4.9963461742424665e-06, "loss": 2.5579, "step": 4352 }, { "epoch": 0.23353004291845494, "grad_norm": 0.427734375, "learning_rate": 4.996341477445364e-06, "loss": 2.2483, "step": 4353 }, { "epoch": 0.23358369098712448, "grad_norm": 0.341796875, "learning_rate": 4.996336777633671e-06, "loss": 2.2574, "step": 4354 }, { "epoch": 0.23363733905579398, "grad_norm": 0.4296875, "learning_rate": 4.996332074807393e-06, "loss": 2.3737, "step": 4355 }, { "epoch": 0.2336909871244635, "grad_norm": 0.40625, "learning_rate": 4.996327368966533e-06, "loss": 2.304, "step": 4356 }, { "epoch": 0.23374463519313304, "grad_norm": 0.322265625, "learning_rate": 4.996322660111101e-06, "loss": 2.1667, "step": 4357 }, { "epoch": 0.23379828326180258, "grad_norm": 0.388671875, "learning_rate": 4.996317948241099e-06, "loss": 2.4131, "step": 4358 }, { "epoch": 0.2338519313304721, "grad_norm": 0.37109375, "learning_rate": 4.996313233356535e-06, "loss": 2.2624, "step": 4359 }, { "epoch": 0.23390557939914164, "grad_norm": 0.38671875, "learning_rate": 4.996308515457413e-06, "loss": 2.5098, "step": 4360 }, { "epoch": 0.23395922746781117, "grad_norm": 0.6875, "learning_rate": 4.996303794543739e-06, "loss": 2.2161, "step": 4361 }, { "epoch": 0.23401287553648067, "grad_norm": 0.3828125, "learning_rate": 4.99629907061552e-06, "loss": 2.3879, "step": 4362 }, { "epoch": 0.2340665236051502, "grad_norm": 0.41015625, "learning_rate": 4.99629434367276e-06, "loss": 2.4089, "step": 4363 }, { "epoch": 0.23412017167381974, "grad_norm": 0.375, "learning_rate": 4.996289613715467e-06, "loss": 2.3358, "step": 4364 }, { "epoch": 0.23417381974248927, "grad_norm": 0.30078125, "learning_rate": 4.996284880743645e-06, "loss": 2.3747, "step": 4365 }, { "epoch": 0.2342274678111588, "grad_norm": 0.3671875, "learning_rate": 4.9962801447573e-06, "loss": 2.1104, "step": 4366 }, { "epoch": 0.23428111587982833, "grad_norm": 0.63671875, "learning_rate": 4.996275405756436e-06, "loss": 2.5994, "step": 4367 }, { "epoch": 0.23433476394849787, "grad_norm": 0.37109375, "learning_rate": 4.996270663741063e-06, "loss": 2.2762, "step": 4368 }, { "epoch": 0.23438841201716737, "grad_norm": 0.466796875, "learning_rate": 4.996265918711183e-06, "loss": 2.5074, "step": 4369 }, { "epoch": 0.2344420600858369, "grad_norm": 0.337890625, "learning_rate": 4.996261170666803e-06, "loss": 2.1983, "step": 4370 }, { "epoch": 0.23449570815450643, "grad_norm": 0.328125, "learning_rate": 4.996256419607929e-06, "loss": 2.2616, "step": 4371 }, { "epoch": 0.23454935622317596, "grad_norm": 0.33203125, "learning_rate": 4.996251665534565e-06, "loss": 2.3466, "step": 4372 }, { "epoch": 0.2346030042918455, "grad_norm": 0.4140625, "learning_rate": 4.996246908446719e-06, "loss": 1.7617, "step": 4373 }, { "epoch": 0.23465665236051503, "grad_norm": 0.3359375, "learning_rate": 4.9962421483443965e-06, "loss": 2.3169, "step": 4374 }, { "epoch": 0.23471030042918456, "grad_norm": 4.65625, "learning_rate": 4.996237385227602e-06, "loss": 1.3484, "step": 4375 }, { "epoch": 0.23476394849785406, "grad_norm": 0.34765625, "learning_rate": 4.996232619096342e-06, "loss": 2.2149, "step": 4376 }, { "epoch": 0.2348175965665236, "grad_norm": 0.484375, "learning_rate": 4.996227849950622e-06, "loss": 2.3391, "step": 4377 }, { "epoch": 0.23487124463519313, "grad_norm": 0.345703125, "learning_rate": 4.996223077790449e-06, "loss": 2.476, "step": 4378 }, { "epoch": 0.23492489270386266, "grad_norm": 0.421875, "learning_rate": 4.996218302615826e-06, "loss": 2.1686, "step": 4379 }, { "epoch": 0.2349785407725322, "grad_norm": 0.33203125, "learning_rate": 4.9962135244267615e-06, "loss": 2.2898, "step": 4380 }, { "epoch": 0.23503218884120172, "grad_norm": 0.369140625, "learning_rate": 4.99620874322326e-06, "loss": 2.3067, "step": 4381 }, { "epoch": 0.23508583690987125, "grad_norm": 0.90234375, "learning_rate": 4.996203959005327e-06, "loss": 2.3529, "step": 4382 }, { "epoch": 0.23513948497854079, "grad_norm": 0.53125, "learning_rate": 4.996199171772969e-06, "loss": 2.1043, "step": 4383 }, { "epoch": 0.2351931330472103, "grad_norm": 0.56640625, "learning_rate": 4.996194381526192e-06, "loss": 2.2897, "step": 4384 }, { "epoch": 0.23524678111587982, "grad_norm": 0.37109375, "learning_rate": 4.996189588265001e-06, "loss": 2.5071, "step": 4385 }, { "epoch": 0.23530042918454935, "grad_norm": 0.421875, "learning_rate": 4.9961847919894015e-06, "loss": 2.6161, "step": 4386 }, { "epoch": 0.23535407725321889, "grad_norm": 0.40625, "learning_rate": 4.9961799926994e-06, "loss": 2.6021, "step": 4387 }, { "epoch": 0.23540772532188842, "grad_norm": 0.3828125, "learning_rate": 4.9961751903950025e-06, "loss": 2.3575, "step": 4388 }, { "epoch": 0.23546137339055795, "grad_norm": 0.55078125, "learning_rate": 4.996170385076215e-06, "loss": 2.4488, "step": 4389 }, { "epoch": 0.23551502145922748, "grad_norm": 0.341796875, "learning_rate": 4.996165576743043e-06, "loss": 2.2093, "step": 4390 }, { "epoch": 0.23556866952789698, "grad_norm": 0.421875, "learning_rate": 4.996160765395491e-06, "loss": 2.4794, "step": 4391 }, { "epoch": 0.23562231759656652, "grad_norm": 2.125, "learning_rate": 4.996155951033567e-06, "loss": 2.4225, "step": 4392 }, { "epoch": 0.23567596566523605, "grad_norm": 0.3671875, "learning_rate": 4.996151133657274e-06, "loss": 2.4799, "step": 4393 }, { "epoch": 0.23572961373390558, "grad_norm": 0.345703125, "learning_rate": 4.996146313266621e-06, "loss": 2.6127, "step": 4394 }, { "epoch": 0.2357832618025751, "grad_norm": 0.359375, "learning_rate": 4.9961414898616125e-06, "loss": 2.2001, "step": 4395 }, { "epoch": 0.23583690987124464, "grad_norm": 0.38671875, "learning_rate": 4.996136663442253e-06, "loss": 2.5978, "step": 4396 }, { "epoch": 0.23589055793991417, "grad_norm": 0.380859375, "learning_rate": 4.99613183400855e-06, "loss": 2.2504, "step": 4397 }, { "epoch": 0.23594420600858368, "grad_norm": 0.408203125, "learning_rate": 4.996127001560509e-06, "loss": 2.2614, "step": 4398 }, { "epoch": 0.2359978540772532, "grad_norm": 0.3671875, "learning_rate": 4.996122166098136e-06, "loss": 2.3109, "step": 4399 }, { "epoch": 0.23605150214592274, "grad_norm": 0.478515625, "learning_rate": 4.996117327621436e-06, "loss": 2.381, "step": 4400 }, { "epoch": 0.23610515021459227, "grad_norm": 0.3203125, "learning_rate": 4.996112486130415e-06, "loss": 2.1477, "step": 4401 }, { "epoch": 0.2361587982832618, "grad_norm": 0.35546875, "learning_rate": 4.99610764162508e-06, "loss": 2.2478, "step": 4402 }, { "epoch": 0.23621244635193134, "grad_norm": 0.390625, "learning_rate": 4.996102794105436e-06, "loss": 2.2177, "step": 4403 }, { "epoch": 0.23626609442060087, "grad_norm": 0.349609375, "learning_rate": 4.996097943571489e-06, "loss": 2.4646, "step": 4404 }, { "epoch": 0.23631974248927037, "grad_norm": 0.359375, "learning_rate": 4.996093090023245e-06, "loss": 1.8481, "step": 4405 }, { "epoch": 0.2363733905579399, "grad_norm": 0.337890625, "learning_rate": 4.996088233460708e-06, "loss": 2.2515, "step": 4406 }, { "epoch": 0.23642703862660944, "grad_norm": 0.357421875, "learning_rate": 4.996083373883887e-06, "loss": 2.4977, "step": 4407 }, { "epoch": 0.23648068669527897, "grad_norm": 0.47265625, "learning_rate": 4.996078511292786e-06, "loss": 2.2966, "step": 4408 }, { "epoch": 0.2365343347639485, "grad_norm": 0.359375, "learning_rate": 4.996073645687412e-06, "loss": 2.2711, "step": 4409 }, { "epoch": 0.23658798283261803, "grad_norm": 0.40625, "learning_rate": 4.996068777067769e-06, "loss": 2.2396, "step": 4410 }, { "epoch": 0.23664163090128756, "grad_norm": 0.58984375, "learning_rate": 4.996063905433865e-06, "loss": 2.3766, "step": 4411 }, { "epoch": 0.23669527896995707, "grad_norm": 0.349609375, "learning_rate": 4.996059030785703e-06, "loss": 2.4784, "step": 4412 }, { "epoch": 0.2367489270386266, "grad_norm": 0.322265625, "learning_rate": 4.996054153123292e-06, "loss": 2.299, "step": 4413 }, { "epoch": 0.23680257510729613, "grad_norm": 0.353515625, "learning_rate": 4.996049272446637e-06, "loss": 1.838, "step": 4414 }, { "epoch": 0.23685622317596566, "grad_norm": 0.38671875, "learning_rate": 4.996044388755743e-06, "loss": 2.1311, "step": 4415 }, { "epoch": 0.2369098712446352, "grad_norm": 0.3515625, "learning_rate": 4.996039502050617e-06, "loss": 2.3122, "step": 4416 }, { "epoch": 0.23696351931330473, "grad_norm": 0.298828125, "learning_rate": 4.9960346123312635e-06, "loss": 1.9074, "step": 4417 }, { "epoch": 0.23701716738197426, "grad_norm": 0.3828125, "learning_rate": 4.99602971959769e-06, "loss": 2.3038, "step": 4418 }, { "epoch": 0.2370708154506438, "grad_norm": 0.3125, "learning_rate": 4.996024823849901e-06, "loss": 2.3365, "step": 4419 }, { "epoch": 0.2371244635193133, "grad_norm": 1.4296875, "learning_rate": 4.996019925087904e-06, "loss": 1.3779, "step": 4420 }, { "epoch": 0.23717811158798283, "grad_norm": 0.380859375, "learning_rate": 4.996015023311703e-06, "loss": 2.2404, "step": 4421 }, { "epoch": 0.23723175965665236, "grad_norm": 0.318359375, "learning_rate": 4.996010118521305e-06, "loss": 2.4014, "step": 4422 }, { "epoch": 0.2372854077253219, "grad_norm": 0.359375, "learning_rate": 4.996005210716717e-06, "loss": 2.307, "step": 4423 }, { "epoch": 0.23733905579399142, "grad_norm": 0.421875, "learning_rate": 4.996000299897943e-06, "loss": 2.2976, "step": 4424 }, { "epoch": 0.23739270386266095, "grad_norm": 0.427734375, "learning_rate": 4.995995386064989e-06, "loss": 2.1347, "step": 4425 }, { "epoch": 0.23744635193133048, "grad_norm": 0.3125, "learning_rate": 4.995990469217862e-06, "loss": 2.403, "step": 4426 }, { "epoch": 0.2375, "grad_norm": 0.38671875, "learning_rate": 4.995985549356568e-06, "loss": 2.3395, "step": 4427 }, { "epoch": 0.23755364806866952, "grad_norm": 0.37109375, "learning_rate": 4.995980626481112e-06, "loss": 2.2455, "step": 4428 }, { "epoch": 0.23760729613733905, "grad_norm": 0.34765625, "learning_rate": 4.995975700591501e-06, "loss": 2.2237, "step": 4429 }, { "epoch": 0.23766094420600858, "grad_norm": 0.375, "learning_rate": 4.99597077168774e-06, "loss": 2.3662, "step": 4430 }, { "epoch": 0.23771459227467812, "grad_norm": 0.52734375, "learning_rate": 4.995965839769836e-06, "loss": 1.7465, "step": 4431 }, { "epoch": 0.23776824034334765, "grad_norm": 0.7265625, "learning_rate": 4.995960904837792e-06, "loss": 2.4668, "step": 4432 }, { "epoch": 0.23782188841201718, "grad_norm": 0.47265625, "learning_rate": 4.995955966891619e-06, "loss": 2.5541, "step": 4433 }, { "epoch": 0.23787553648068668, "grad_norm": 0.37109375, "learning_rate": 4.995951025931319e-06, "loss": 2.239, "step": 4434 }, { "epoch": 0.23792918454935622, "grad_norm": 0.353515625, "learning_rate": 4.9959460819568995e-06, "loss": 2.0969, "step": 4435 }, { "epoch": 0.23798283261802575, "grad_norm": 0.330078125, "learning_rate": 4.995941134968366e-06, "loss": 2.0611, "step": 4436 }, { "epoch": 0.23803648068669528, "grad_norm": 0.357421875, "learning_rate": 4.995936184965724e-06, "loss": 2.0201, "step": 4437 }, { "epoch": 0.2380901287553648, "grad_norm": 0.42578125, "learning_rate": 4.995931231948982e-06, "loss": 2.3877, "step": 4438 }, { "epoch": 0.23814377682403434, "grad_norm": 0.34765625, "learning_rate": 4.995926275918143e-06, "loss": 2.5047, "step": 4439 }, { "epoch": 0.23819742489270387, "grad_norm": 0.35546875, "learning_rate": 4.995921316873214e-06, "loss": 2.1056, "step": 4440 }, { "epoch": 0.23825107296137338, "grad_norm": 0.341796875, "learning_rate": 4.9959163548142e-06, "loss": 2.3089, "step": 4441 }, { "epoch": 0.2383047210300429, "grad_norm": 0.345703125, "learning_rate": 4.995911389741109e-06, "loss": 2.3981, "step": 4442 }, { "epoch": 0.23835836909871244, "grad_norm": 0.408203125, "learning_rate": 4.995906421653947e-06, "loss": 2.5148, "step": 4443 }, { "epoch": 0.23841201716738197, "grad_norm": 0.3515625, "learning_rate": 4.995901450552718e-06, "loss": 2.0109, "step": 4444 }, { "epoch": 0.2384656652360515, "grad_norm": 0.396484375, "learning_rate": 4.995896476437428e-06, "loss": 2.234, "step": 4445 }, { "epoch": 0.23851931330472104, "grad_norm": 0.3203125, "learning_rate": 4.995891499308086e-06, "loss": 2.1744, "step": 4446 }, { "epoch": 0.23857296137339057, "grad_norm": 0.3515625, "learning_rate": 4.995886519164695e-06, "loss": 2.3024, "step": 4447 }, { "epoch": 0.23862660944206007, "grad_norm": 0.341796875, "learning_rate": 4.995881536007262e-06, "loss": 2.2192, "step": 4448 }, { "epoch": 0.2386802575107296, "grad_norm": 0.4296875, "learning_rate": 4.995876549835794e-06, "loss": 1.5131, "step": 4449 }, { "epoch": 0.23873390557939914, "grad_norm": 0.453125, "learning_rate": 4.995871560650296e-06, "loss": 2.3098, "step": 4450 }, { "epoch": 0.23878755364806867, "grad_norm": 0.98046875, "learning_rate": 4.995866568450774e-06, "loss": 2.2933, "step": 4451 }, { "epoch": 0.2388412017167382, "grad_norm": 0.357421875, "learning_rate": 4.995861573237234e-06, "loss": 2.2712, "step": 4452 }, { "epoch": 0.23889484978540773, "grad_norm": 0.419921875, "learning_rate": 4.995856575009682e-06, "loss": 2.4613, "step": 4453 }, { "epoch": 0.23894849785407726, "grad_norm": 0.330078125, "learning_rate": 4.9958515737681245e-06, "loss": 2.1801, "step": 4454 }, { "epoch": 0.2390021459227468, "grad_norm": 0.345703125, "learning_rate": 4.995846569512567e-06, "loss": 2.3378, "step": 4455 }, { "epoch": 0.2390557939914163, "grad_norm": 0.34765625, "learning_rate": 4.995841562243017e-06, "loss": 2.1578, "step": 4456 }, { "epoch": 0.23910944206008583, "grad_norm": 0.62890625, "learning_rate": 4.995836551959477e-06, "loss": 2.1712, "step": 4457 }, { "epoch": 0.23916309012875536, "grad_norm": 0.38671875, "learning_rate": 4.9958315386619575e-06, "loss": 1.8003, "step": 4458 }, { "epoch": 0.2392167381974249, "grad_norm": 0.3984375, "learning_rate": 4.995826522350462e-06, "loss": 2.4277, "step": 4459 }, { "epoch": 0.23927038626609443, "grad_norm": 0.318359375, "learning_rate": 4.995821503024997e-06, "loss": 2.2494, "step": 4460 }, { "epoch": 0.23932403433476396, "grad_norm": 0.421875, "learning_rate": 4.995816480685568e-06, "loss": 2.1969, "step": 4461 }, { "epoch": 0.2393776824034335, "grad_norm": 0.78515625, "learning_rate": 4.995811455332182e-06, "loss": 2.1063, "step": 4462 }, { "epoch": 0.239431330472103, "grad_norm": 0.37109375, "learning_rate": 4.995806426964846e-06, "loss": 2.3503, "step": 4463 }, { "epoch": 0.23948497854077253, "grad_norm": 0.33203125, "learning_rate": 4.995801395583563e-06, "loss": 1.9303, "step": 4464 }, { "epoch": 0.23953862660944206, "grad_norm": 0.392578125, "learning_rate": 4.9957963611883415e-06, "loss": 2.2407, "step": 4465 }, { "epoch": 0.2395922746781116, "grad_norm": 0.361328125, "learning_rate": 4.995791323779187e-06, "loss": 2.2884, "step": 4466 }, { "epoch": 0.23964592274678112, "grad_norm": 0.349609375, "learning_rate": 4.995786283356105e-06, "loss": 2.3338, "step": 4467 }, { "epoch": 0.23969957081545065, "grad_norm": 0.392578125, "learning_rate": 4.995781239919103e-06, "loss": 2.2185, "step": 4468 }, { "epoch": 0.23975321888412018, "grad_norm": 0.33203125, "learning_rate": 4.9957761934681865e-06, "loss": 1.8877, "step": 4469 }, { "epoch": 0.2398068669527897, "grad_norm": 0.427734375, "learning_rate": 4.995771144003361e-06, "loss": 2.3482, "step": 4470 }, { "epoch": 0.23986051502145922, "grad_norm": 0.357421875, "learning_rate": 4.995766091524632e-06, "loss": 2.1798, "step": 4471 }, { "epoch": 0.23991416309012875, "grad_norm": 0.5078125, "learning_rate": 4.995761036032007e-06, "loss": 2.3819, "step": 4472 }, { "epoch": 0.23996781115879828, "grad_norm": 0.470703125, "learning_rate": 4.995755977525492e-06, "loss": 2.2641, "step": 4473 }, { "epoch": 0.24002145922746781, "grad_norm": 0.314453125, "learning_rate": 4.995750916005092e-06, "loss": 2.1664, "step": 4474 }, { "epoch": 0.24007510729613735, "grad_norm": 0.306640625, "learning_rate": 4.9957458514708155e-06, "loss": 1.9657, "step": 4475 }, { "epoch": 0.24012875536480688, "grad_norm": 0.328125, "learning_rate": 4.995740783922666e-06, "loss": 2.0926, "step": 4476 }, { "epoch": 0.24018240343347638, "grad_norm": 0.546875, "learning_rate": 4.99573571336065e-06, "loss": 1.8623, "step": 4477 }, { "epoch": 0.24023605150214591, "grad_norm": 0.388671875, "learning_rate": 4.9957306397847745e-06, "loss": 2.2556, "step": 4478 }, { "epoch": 0.24028969957081545, "grad_norm": 0.44921875, "learning_rate": 4.995725563195045e-06, "loss": 2.2786, "step": 4479 }, { "epoch": 0.24034334763948498, "grad_norm": 0.408203125, "learning_rate": 4.995720483591469e-06, "loss": 2.3825, "step": 4480 }, { "epoch": 0.2403969957081545, "grad_norm": 0.380859375, "learning_rate": 4.995715400974051e-06, "loss": 2.3931, "step": 4481 }, { "epoch": 0.24045064377682404, "grad_norm": 0.37890625, "learning_rate": 4.995710315342797e-06, "loss": 2.1958, "step": 4482 }, { "epoch": 0.24050429184549357, "grad_norm": 0.353515625, "learning_rate": 4.995705226697715e-06, "loss": 2.4638, "step": 4483 }, { "epoch": 0.24055793991416308, "grad_norm": 0.408203125, "learning_rate": 4.99570013503881e-06, "loss": 2.2074, "step": 4484 }, { "epoch": 0.2406115879828326, "grad_norm": 0.37890625, "learning_rate": 4.995695040366088e-06, "loss": 2.4277, "step": 4485 }, { "epoch": 0.24066523605150214, "grad_norm": 0.314453125, "learning_rate": 4.995689942679555e-06, "loss": 2.0864, "step": 4486 }, { "epoch": 0.24071888412017167, "grad_norm": 0.36328125, "learning_rate": 4.995684841979217e-06, "loss": 2.4642, "step": 4487 }, { "epoch": 0.2407725321888412, "grad_norm": 0.380859375, "learning_rate": 4.995679738265081e-06, "loss": 2.3718, "step": 4488 }, { "epoch": 0.24082618025751074, "grad_norm": 0.8515625, "learning_rate": 4.995674631537153e-06, "loss": 2.5446, "step": 4489 }, { "epoch": 0.24087982832618027, "grad_norm": 0.3515625, "learning_rate": 4.995669521795439e-06, "loss": 2.1314, "step": 4490 }, { "epoch": 0.2409334763948498, "grad_norm": 0.375, "learning_rate": 4.9956644090399455e-06, "loss": 2.6526, "step": 4491 }, { "epoch": 0.2409871244635193, "grad_norm": 0.384765625, "learning_rate": 4.995659293270677e-06, "loss": 2.4692, "step": 4492 }, { "epoch": 0.24104077253218884, "grad_norm": 0.37109375, "learning_rate": 4.995654174487642e-06, "loss": 2.3226, "step": 4493 }, { "epoch": 0.24109442060085837, "grad_norm": 0.341796875, "learning_rate": 4.995649052690846e-06, "loss": 2.4409, "step": 4494 }, { "epoch": 0.2411480686695279, "grad_norm": 0.369140625, "learning_rate": 4.995643927880294e-06, "loss": 2.3634, "step": 4495 }, { "epoch": 0.24120171673819743, "grad_norm": 0.359375, "learning_rate": 4.995638800055993e-06, "loss": 2.327, "step": 4496 }, { "epoch": 0.24125536480686696, "grad_norm": 0.376953125, "learning_rate": 4.99563366921795e-06, "loss": 2.1031, "step": 4497 }, { "epoch": 0.2413090128755365, "grad_norm": 0.3984375, "learning_rate": 4.995628535366171e-06, "loss": 2.2666, "step": 4498 }, { "epoch": 0.241362660944206, "grad_norm": 0.490234375, "learning_rate": 4.99562339850066e-06, "loss": 2.3488, "step": 4499 }, { "epoch": 0.24141630901287553, "grad_norm": 0.322265625, "learning_rate": 4.995618258621425e-06, "loss": 2.4031, "step": 4500 }, { "epoch": 0.24146995708154506, "grad_norm": 0.373046875, "learning_rate": 4.995613115728473e-06, "loss": 2.2496, "step": 4501 }, { "epoch": 0.2415236051502146, "grad_norm": 0.349609375, "learning_rate": 4.995607969821809e-06, "loss": 2.3826, "step": 4502 }, { "epoch": 0.24157725321888412, "grad_norm": 0.296875, "learning_rate": 4.995602820901439e-06, "loss": 2.1686, "step": 4503 }, { "epoch": 0.24163090128755366, "grad_norm": 0.59765625, "learning_rate": 4.995597668967371e-06, "loss": 2.6787, "step": 4504 }, { "epoch": 0.2416845493562232, "grad_norm": 0.416015625, "learning_rate": 4.995592514019609e-06, "loss": 2.7441, "step": 4505 }, { "epoch": 0.2417381974248927, "grad_norm": 0.4140625, "learning_rate": 4.995587356058159e-06, "loss": 2.2946, "step": 4506 }, { "epoch": 0.24179184549356222, "grad_norm": 0.39453125, "learning_rate": 4.9955821950830295e-06, "loss": 2.3741, "step": 4507 }, { "epoch": 0.24184549356223176, "grad_norm": 0.373046875, "learning_rate": 4.995577031094225e-06, "loss": 2.5099, "step": 4508 }, { "epoch": 0.2418991416309013, "grad_norm": 0.408203125, "learning_rate": 4.995571864091753e-06, "loss": 2.2865, "step": 4509 }, { "epoch": 0.24195278969957082, "grad_norm": 0.427734375, "learning_rate": 4.995566694075619e-06, "loss": 2.38, "step": 4510 }, { "epoch": 0.24200643776824035, "grad_norm": 0.58984375, "learning_rate": 4.9955615210458284e-06, "loss": 2.4925, "step": 4511 }, { "epoch": 0.24206008583690988, "grad_norm": 0.439453125, "learning_rate": 4.99555634500239e-06, "loss": 2.4641, "step": 4512 }, { "epoch": 0.2421137339055794, "grad_norm": 0.333984375, "learning_rate": 4.995551165945307e-06, "loss": 2.1134, "step": 4513 }, { "epoch": 0.24216738197424892, "grad_norm": 0.36328125, "learning_rate": 4.995545983874588e-06, "loss": 1.9623, "step": 4514 }, { "epoch": 0.24222103004291845, "grad_norm": 0.85546875, "learning_rate": 4.9955407987902375e-06, "loss": 2.3321, "step": 4515 }, { "epoch": 0.24227467811158798, "grad_norm": 0.37890625, "learning_rate": 4.995535610692263e-06, "loss": 2.1506, "step": 4516 }, { "epoch": 0.24232832618025751, "grad_norm": 0.31640625, "learning_rate": 4.99553041958067e-06, "loss": 2.1842, "step": 4517 }, { "epoch": 0.24238197424892705, "grad_norm": 0.455078125, "learning_rate": 4.995525225455465e-06, "loss": 2.2843, "step": 4518 }, { "epoch": 0.24243562231759658, "grad_norm": 0.365234375, "learning_rate": 4.995520028316656e-06, "loss": 2.3844, "step": 4519 }, { "epoch": 0.24248927038626608, "grad_norm": 0.412109375, "learning_rate": 4.995514828164246e-06, "loss": 2.4938, "step": 4520 }, { "epoch": 0.2425429184549356, "grad_norm": 1.796875, "learning_rate": 4.995509624998244e-06, "loss": 2.4699, "step": 4521 }, { "epoch": 0.24259656652360514, "grad_norm": 0.34765625, "learning_rate": 4.995504418818654e-06, "loss": 2.4737, "step": 4522 }, { "epoch": 0.24265021459227468, "grad_norm": 0.34375, "learning_rate": 4.995499209625484e-06, "loss": 2.2537, "step": 4523 }, { "epoch": 0.2427038626609442, "grad_norm": 0.35546875, "learning_rate": 4.99549399741874e-06, "loss": 2.3068, "step": 4524 }, { "epoch": 0.24275751072961374, "grad_norm": 0.35546875, "learning_rate": 4.9954887821984285e-06, "loss": 2.4009, "step": 4525 }, { "epoch": 0.24281115879828327, "grad_norm": 0.345703125, "learning_rate": 4.995483563964555e-06, "loss": 2.3111, "step": 4526 }, { "epoch": 0.24286480686695278, "grad_norm": 0.31640625, "learning_rate": 4.995478342717126e-06, "loss": 1.9589, "step": 4527 }, { "epoch": 0.2429184549356223, "grad_norm": 0.373046875, "learning_rate": 4.9954731184561475e-06, "loss": 2.2969, "step": 4528 }, { "epoch": 0.24297210300429184, "grad_norm": 0.310546875, "learning_rate": 4.995467891181627e-06, "loss": 2.5011, "step": 4529 }, { "epoch": 0.24302575107296137, "grad_norm": 0.3671875, "learning_rate": 4.99546266089357e-06, "loss": 2.6085, "step": 4530 }, { "epoch": 0.2430793991416309, "grad_norm": 0.474609375, "learning_rate": 4.995457427591983e-06, "loss": 2.4931, "step": 4531 }, { "epoch": 0.24313304721030043, "grad_norm": 0.337890625, "learning_rate": 4.995452191276873e-06, "loss": 2.1967, "step": 4532 }, { "epoch": 0.24318669527896997, "grad_norm": 0.396484375, "learning_rate": 4.995446951948244e-06, "loss": 2.4224, "step": 4533 }, { "epoch": 0.2432403433476395, "grad_norm": 0.341796875, "learning_rate": 4.9954417096061055e-06, "loss": 2.3547, "step": 4534 }, { "epoch": 0.243293991416309, "grad_norm": 0.5, "learning_rate": 4.995436464250461e-06, "loss": 2.3363, "step": 4535 }, { "epoch": 0.24334763948497853, "grad_norm": 0.361328125, "learning_rate": 4.995431215881319e-06, "loss": 2.1497, "step": 4536 }, { "epoch": 0.24340128755364807, "grad_norm": 0.46875, "learning_rate": 4.995425964498684e-06, "loss": 2.4131, "step": 4537 }, { "epoch": 0.2434549356223176, "grad_norm": 0.4140625, "learning_rate": 4.995420710102564e-06, "loss": 2.1842, "step": 4538 }, { "epoch": 0.24350858369098713, "grad_norm": 0.357421875, "learning_rate": 4.995415452692965e-06, "loss": 2.465, "step": 4539 }, { "epoch": 0.24356223175965666, "grad_norm": 0.953125, "learning_rate": 4.995410192269892e-06, "loss": 2.6375, "step": 4540 }, { "epoch": 0.2436158798283262, "grad_norm": 0.392578125, "learning_rate": 4.995404928833351e-06, "loss": 2.1969, "step": 4541 }, { "epoch": 0.2436695278969957, "grad_norm": 0.337890625, "learning_rate": 4.995399662383352e-06, "loss": 2.2847, "step": 4542 }, { "epoch": 0.24372317596566523, "grad_norm": 0.384765625, "learning_rate": 4.995394392919899e-06, "loss": 2.3106, "step": 4543 }, { "epoch": 0.24377682403433476, "grad_norm": 0.478515625, "learning_rate": 4.995389120442997e-06, "loss": 2.3467, "step": 4544 }, { "epoch": 0.2438304721030043, "grad_norm": 0.46484375, "learning_rate": 4.9953838449526545e-06, "loss": 2.0159, "step": 4545 }, { "epoch": 0.24388412017167382, "grad_norm": 0.455078125, "learning_rate": 4.9953785664488775e-06, "loss": 2.2645, "step": 4546 }, { "epoch": 0.24393776824034336, "grad_norm": 0.390625, "learning_rate": 4.995373284931671e-06, "loss": 2.4472, "step": 4547 }, { "epoch": 0.2439914163090129, "grad_norm": 0.310546875, "learning_rate": 4.995368000401043e-06, "loss": 2.0745, "step": 4548 }, { "epoch": 0.2440450643776824, "grad_norm": 0.388671875, "learning_rate": 4.995362712856999e-06, "loss": 2.3355, "step": 4549 }, { "epoch": 0.24409871244635192, "grad_norm": 0.474609375, "learning_rate": 4.995357422299546e-06, "loss": 2.4018, "step": 4550 }, { "epoch": 0.24415236051502145, "grad_norm": 0.46875, "learning_rate": 4.995352128728689e-06, "loss": 2.4894, "step": 4551 }, { "epoch": 0.244206008583691, "grad_norm": 0.333984375, "learning_rate": 4.995346832144437e-06, "loss": 2.3183, "step": 4552 }, { "epoch": 0.24425965665236052, "grad_norm": 0.36328125, "learning_rate": 4.995341532546793e-06, "loss": 2.3756, "step": 4553 }, { "epoch": 0.24431330472103005, "grad_norm": 0.30859375, "learning_rate": 4.995336229935767e-06, "loss": 2.2167, "step": 4554 }, { "epoch": 0.24436695278969958, "grad_norm": 0.44140625, "learning_rate": 4.995330924311363e-06, "loss": 2.5672, "step": 4555 }, { "epoch": 0.24442060085836909, "grad_norm": 0.3515625, "learning_rate": 4.995325615673587e-06, "loss": 2.1594, "step": 4556 }, { "epoch": 0.24447424892703862, "grad_norm": 0.34765625, "learning_rate": 4.995320304022448e-06, "loss": 2.3334, "step": 4557 }, { "epoch": 0.24452789699570815, "grad_norm": 0.38671875, "learning_rate": 4.99531498935795e-06, "loss": 2.5201, "step": 4558 }, { "epoch": 0.24458154506437768, "grad_norm": 0.451171875, "learning_rate": 4.9953096716801e-06, "loss": 2.2207, "step": 4559 }, { "epoch": 0.2446351931330472, "grad_norm": 0.357421875, "learning_rate": 4.995304350988905e-06, "loss": 2.0295, "step": 4560 }, { "epoch": 0.24468884120171674, "grad_norm": 0.36328125, "learning_rate": 4.995299027284371e-06, "loss": 2.2167, "step": 4561 }, { "epoch": 0.24474248927038628, "grad_norm": 0.419921875, "learning_rate": 4.995293700566506e-06, "loss": 2.3585, "step": 4562 }, { "epoch": 0.24479613733905578, "grad_norm": 0.55078125, "learning_rate": 4.995288370835313e-06, "loss": 2.246, "step": 4563 }, { "epoch": 0.2448497854077253, "grad_norm": 0.310546875, "learning_rate": 4.995283038090801e-06, "loss": 2.4302, "step": 4564 }, { "epoch": 0.24490343347639484, "grad_norm": 0.515625, "learning_rate": 4.995277702332976e-06, "loss": 2.3841, "step": 4565 }, { "epoch": 0.24495708154506438, "grad_norm": 0.349609375, "learning_rate": 4.995272363561845e-06, "loss": 2.3378, "step": 4566 }, { "epoch": 0.2450107296137339, "grad_norm": 0.369140625, "learning_rate": 4.995267021777413e-06, "loss": 2.2145, "step": 4567 }, { "epoch": 0.24506437768240344, "grad_norm": 0.3515625, "learning_rate": 4.9952616769796865e-06, "loss": 2.2366, "step": 4568 }, { "epoch": 0.24511802575107297, "grad_norm": 0.396484375, "learning_rate": 4.995256329168674e-06, "loss": 2.4716, "step": 4569 }, { "epoch": 0.2451716738197425, "grad_norm": 0.58984375, "learning_rate": 4.995250978344379e-06, "loss": 2.3167, "step": 4570 }, { "epoch": 0.245225321888412, "grad_norm": 0.3515625, "learning_rate": 4.9952456245068115e-06, "loss": 2.3981, "step": 4571 }, { "epoch": 0.24527896995708154, "grad_norm": 0.421875, "learning_rate": 4.995240267655975e-06, "loss": 2.5937, "step": 4572 }, { "epoch": 0.24533261802575107, "grad_norm": 0.47265625, "learning_rate": 4.995234907791877e-06, "loss": 2.1828, "step": 4573 }, { "epoch": 0.2453862660944206, "grad_norm": 1.015625, "learning_rate": 4.995229544914524e-06, "loss": 2.2706, "step": 4574 }, { "epoch": 0.24543991416309013, "grad_norm": 2.640625, "learning_rate": 4.995224179023922e-06, "loss": 2.1034, "step": 4575 }, { "epoch": 0.24549356223175967, "grad_norm": 4.0, "learning_rate": 4.995218810120079e-06, "loss": 2.5137, "step": 4576 }, { "epoch": 0.2455472103004292, "grad_norm": 0.357421875, "learning_rate": 4.995213438202999e-06, "loss": 2.2122, "step": 4577 }, { "epoch": 0.2456008583690987, "grad_norm": 0.37890625, "learning_rate": 4.9952080632726914e-06, "loss": 2.3585, "step": 4578 }, { "epoch": 0.24565450643776823, "grad_norm": 0.48046875, "learning_rate": 4.99520268532916e-06, "loss": 2.576, "step": 4579 }, { "epoch": 0.24570815450643776, "grad_norm": 0.40234375, "learning_rate": 4.995197304372414e-06, "loss": 2.4793, "step": 4580 }, { "epoch": 0.2457618025751073, "grad_norm": 0.796875, "learning_rate": 4.995191920402457e-06, "loss": 2.2395, "step": 4581 }, { "epoch": 0.24581545064377683, "grad_norm": 0.3671875, "learning_rate": 4.995186533419298e-06, "loss": 2.2984, "step": 4582 }, { "epoch": 0.24586909871244636, "grad_norm": 0.35546875, "learning_rate": 4.9951811434229404e-06, "loss": 2.2648, "step": 4583 }, { "epoch": 0.2459227467811159, "grad_norm": 0.419921875, "learning_rate": 4.995175750413395e-06, "loss": 2.4453, "step": 4584 }, { "epoch": 0.2459763948497854, "grad_norm": 0.32421875, "learning_rate": 4.995170354390665e-06, "loss": 2.2907, "step": 4585 }, { "epoch": 0.24603004291845493, "grad_norm": 0.314453125, "learning_rate": 4.995164955354758e-06, "loss": 2.004, "step": 4586 }, { "epoch": 0.24608369098712446, "grad_norm": 0.6796875, "learning_rate": 4.99515955330568e-06, "loss": 2.1667, "step": 4587 }, { "epoch": 0.246137339055794, "grad_norm": 0.4296875, "learning_rate": 4.995154148243438e-06, "loss": 2.2074, "step": 4588 }, { "epoch": 0.24619098712446352, "grad_norm": 0.390625, "learning_rate": 4.995148740168039e-06, "loss": 2.2952, "step": 4589 }, { "epoch": 0.24624463519313305, "grad_norm": 0.390625, "learning_rate": 4.99514332907949e-06, "loss": 2.3976, "step": 4590 }, { "epoch": 0.2462982832618026, "grad_norm": 0.3203125, "learning_rate": 4.9951379149777944e-06, "loss": 2.2176, "step": 4591 }, { "epoch": 0.2463519313304721, "grad_norm": 0.306640625, "learning_rate": 4.9951324978629625e-06, "loss": 1.8919, "step": 4592 }, { "epoch": 0.24640557939914162, "grad_norm": 0.373046875, "learning_rate": 4.995127077734998e-06, "loss": 2.2717, "step": 4593 }, { "epoch": 0.24645922746781115, "grad_norm": 0.419921875, "learning_rate": 4.99512165459391e-06, "loss": 2.2607, "step": 4594 }, { "epoch": 0.24651287553648069, "grad_norm": 0.345703125, "learning_rate": 4.995116228439702e-06, "loss": 2.3468, "step": 4595 }, { "epoch": 0.24656652360515022, "grad_norm": 0.388671875, "learning_rate": 4.995110799272384e-06, "loss": 2.3856, "step": 4596 }, { "epoch": 0.24662017167381975, "grad_norm": 0.44921875, "learning_rate": 4.99510536709196e-06, "loss": 2.1204, "step": 4597 }, { "epoch": 0.24667381974248928, "grad_norm": 0.365234375, "learning_rate": 4.995099931898438e-06, "loss": 2.376, "step": 4598 }, { "epoch": 0.24672746781115878, "grad_norm": 0.6328125, "learning_rate": 4.9950944936918236e-06, "loss": 2.5111, "step": 4599 }, { "epoch": 0.24678111587982832, "grad_norm": 0.369140625, "learning_rate": 4.995089052472124e-06, "loss": 2.2934, "step": 4600 }, { "epoch": 0.24683476394849785, "grad_norm": 0.341796875, "learning_rate": 4.995083608239345e-06, "loss": 2.2407, "step": 4601 }, { "epoch": 0.24688841201716738, "grad_norm": 0.345703125, "learning_rate": 4.995078160993494e-06, "loss": 2.2023, "step": 4602 }, { "epoch": 0.2469420600858369, "grad_norm": 0.3359375, "learning_rate": 4.995072710734578e-06, "loss": 2.1156, "step": 4603 }, { "epoch": 0.24699570815450644, "grad_norm": 0.4453125, "learning_rate": 4.995067257462601e-06, "loss": 1.9518, "step": 4604 }, { "epoch": 0.24704935622317598, "grad_norm": 0.421875, "learning_rate": 4.995061801177573e-06, "loss": 2.1171, "step": 4605 }, { "epoch": 0.2471030042918455, "grad_norm": 0.55078125, "learning_rate": 4.995056341879499e-06, "loss": 2.5426, "step": 4606 }, { "epoch": 0.247156652360515, "grad_norm": 0.39453125, "learning_rate": 4.9950508795683846e-06, "loss": 2.4797, "step": 4607 }, { "epoch": 0.24721030042918454, "grad_norm": 0.4296875, "learning_rate": 4.995045414244238e-06, "loss": 2.1743, "step": 4608 }, { "epoch": 0.24726394849785407, "grad_norm": 0.34375, "learning_rate": 4.9950399459070655e-06, "loss": 2.2923, "step": 4609 }, { "epoch": 0.2473175965665236, "grad_norm": 0.458984375, "learning_rate": 4.9950344745568724e-06, "loss": 1.4513, "step": 4610 }, { "epoch": 0.24737124463519314, "grad_norm": 1.1640625, "learning_rate": 4.995029000193667e-06, "loss": 2.5055, "step": 4611 }, { "epoch": 0.24742489270386267, "grad_norm": 0.369140625, "learning_rate": 4.9950235228174554e-06, "loss": 2.372, "step": 4612 }, { "epoch": 0.2474785407725322, "grad_norm": 0.310546875, "learning_rate": 4.995018042428243e-06, "loss": 2.2747, "step": 4613 }, { "epoch": 0.2475321888412017, "grad_norm": 0.3125, "learning_rate": 4.995012559026039e-06, "loss": 2.0832, "step": 4614 }, { "epoch": 0.24758583690987124, "grad_norm": 0.34765625, "learning_rate": 4.995007072610848e-06, "loss": 2.1322, "step": 4615 }, { "epoch": 0.24763948497854077, "grad_norm": 0.42578125, "learning_rate": 4.995001583182677e-06, "loss": 2.5339, "step": 4616 }, { "epoch": 0.2476931330472103, "grad_norm": 0.32421875, "learning_rate": 4.994996090741533e-06, "loss": 2.282, "step": 4617 }, { "epoch": 0.24774678111587983, "grad_norm": 0.341796875, "learning_rate": 4.9949905952874215e-06, "loss": 2.1521, "step": 4618 }, { "epoch": 0.24780042918454936, "grad_norm": 0.4453125, "learning_rate": 4.99498509682035e-06, "loss": 2.384, "step": 4619 }, { "epoch": 0.2478540772532189, "grad_norm": 0.330078125, "learning_rate": 4.994979595340326e-06, "loss": 1.8678, "step": 4620 }, { "epoch": 0.2479077253218884, "grad_norm": 0.400390625, "learning_rate": 4.994974090847355e-06, "loss": 2.4976, "step": 4621 }, { "epoch": 0.24796137339055793, "grad_norm": 0.4375, "learning_rate": 4.994968583341444e-06, "loss": 1.8876, "step": 4622 }, { "epoch": 0.24801502145922746, "grad_norm": 0.333984375, "learning_rate": 4.9949630728226e-06, "loss": 2.285, "step": 4623 }, { "epoch": 0.248068669527897, "grad_norm": 1.3125, "learning_rate": 4.994957559290828e-06, "loss": 2.4091, "step": 4624 }, { "epoch": 0.24812231759656653, "grad_norm": 0.419921875, "learning_rate": 4.994952042746138e-06, "loss": 2.0148, "step": 4625 }, { "epoch": 0.24817596566523606, "grad_norm": 0.30859375, "learning_rate": 4.994946523188532e-06, "loss": 1.976, "step": 4626 }, { "epoch": 0.2482296137339056, "grad_norm": 0.40234375, "learning_rate": 4.994941000618021e-06, "loss": 2.299, "step": 4627 }, { "epoch": 0.2482832618025751, "grad_norm": 0.341796875, "learning_rate": 4.994935475034609e-06, "loss": 2.2321, "step": 4628 }, { "epoch": 0.24833690987124463, "grad_norm": 0.4296875, "learning_rate": 4.994929946438304e-06, "loss": 1.7756, "step": 4629 }, { "epoch": 0.24839055793991416, "grad_norm": 0.431640625, "learning_rate": 4.994924414829112e-06, "loss": 2.2981, "step": 4630 }, { "epoch": 0.2484442060085837, "grad_norm": 0.333984375, "learning_rate": 4.99491888020704e-06, "loss": 2.0803, "step": 4631 }, { "epoch": 0.24849785407725322, "grad_norm": 0.4296875, "learning_rate": 4.994913342572095e-06, "loss": 2.3724, "step": 4632 }, { "epoch": 0.24855150214592275, "grad_norm": 4.125, "learning_rate": 4.994907801924282e-06, "loss": 2.1584, "step": 4633 }, { "epoch": 0.24860515021459229, "grad_norm": 0.435546875, "learning_rate": 4.994902258263611e-06, "loss": 2.3204, "step": 4634 }, { "epoch": 0.2486587982832618, "grad_norm": 0.388671875, "learning_rate": 4.994896711590085e-06, "loss": 2.2331, "step": 4635 }, { "epoch": 0.24871244635193132, "grad_norm": 0.765625, "learning_rate": 4.994891161903713e-06, "loss": 2.3358, "step": 4636 }, { "epoch": 0.24876609442060085, "grad_norm": 0.32421875, "learning_rate": 4.994885609204502e-06, "loss": 2.208, "step": 4637 }, { "epoch": 0.24881974248927038, "grad_norm": 0.4296875, "learning_rate": 4.994880053492456e-06, "loss": 2.2493, "step": 4638 }, { "epoch": 0.24887339055793992, "grad_norm": 0.369140625, "learning_rate": 4.994874494767585e-06, "loss": 2.6134, "step": 4639 }, { "epoch": 0.24892703862660945, "grad_norm": 0.373046875, "learning_rate": 4.994868933029894e-06, "loss": 2.4944, "step": 4640 }, { "epoch": 0.24898068669527898, "grad_norm": 0.294921875, "learning_rate": 4.994863368279389e-06, "loss": 2.2595, "step": 4641 }, { "epoch": 0.2490343347639485, "grad_norm": 0.38671875, "learning_rate": 4.994857800516078e-06, "loss": 1.6902, "step": 4642 }, { "epoch": 0.24908798283261802, "grad_norm": 0.38671875, "learning_rate": 4.994852229739968e-06, "loss": 2.2194, "step": 4643 }, { "epoch": 0.24914163090128755, "grad_norm": 0.3359375, "learning_rate": 4.994846655951064e-06, "loss": 2.2993, "step": 4644 }, { "epoch": 0.24919527896995708, "grad_norm": 0.33203125, "learning_rate": 4.994841079149375e-06, "loss": 2.1118, "step": 4645 }, { "epoch": 0.2492489270386266, "grad_norm": 0.31640625, "learning_rate": 4.994835499334906e-06, "loss": 2.1407, "step": 4646 }, { "epoch": 0.24930257510729614, "grad_norm": 0.384765625, "learning_rate": 4.994829916507664e-06, "loss": 2.3995, "step": 4647 }, { "epoch": 0.24935622317596567, "grad_norm": 0.341796875, "learning_rate": 4.994824330667657e-06, "loss": 2.1733, "step": 4648 }, { "epoch": 0.2494098712446352, "grad_norm": 0.34375, "learning_rate": 4.99481874181489e-06, "loss": 2.4756, "step": 4649 }, { "epoch": 0.2494635193133047, "grad_norm": 0.376953125, "learning_rate": 4.99481314994937e-06, "loss": 1.8024, "step": 4650 }, { "epoch": 0.24951716738197424, "grad_norm": 0.37890625, "learning_rate": 4.9948075550711055e-06, "loss": 2.2234, "step": 4651 }, { "epoch": 0.24957081545064377, "grad_norm": 1.3125, "learning_rate": 4.9948019571801015e-06, "loss": 2.2774, "step": 4652 }, { "epoch": 0.2496244635193133, "grad_norm": 0.40625, "learning_rate": 4.994796356276366e-06, "loss": 2.5687, "step": 4653 }, { "epoch": 0.24967811158798284, "grad_norm": 0.416015625, "learning_rate": 4.994790752359904e-06, "loss": 2.2956, "step": 4654 }, { "epoch": 0.24973175965665237, "grad_norm": 0.404296875, "learning_rate": 4.994785145430724e-06, "loss": 2.3831, "step": 4655 }, { "epoch": 0.2497854077253219, "grad_norm": 0.392578125, "learning_rate": 4.994779535488832e-06, "loss": 2.2913, "step": 4656 }, { "epoch": 0.2498390557939914, "grad_norm": 0.3984375, "learning_rate": 4.994773922534234e-06, "loss": 2.2935, "step": 4657 }, { "epoch": 0.24989270386266094, "grad_norm": 0.375, "learning_rate": 4.994768306566939e-06, "loss": 2.3534, "step": 4658 }, { "epoch": 0.24994635193133047, "grad_norm": 0.435546875, "learning_rate": 4.994762687586951e-06, "loss": 2.088, "step": 4659 }, { "epoch": 0.25, "grad_norm": 0.318359375, "learning_rate": 4.99475706559428e-06, "loss": 2.2418, "step": 4660 }, { "epoch": 0.25005364806866953, "grad_norm": 0.328125, "learning_rate": 4.994751440588929e-06, "loss": 2.0466, "step": 4661 }, { "epoch": 0.25010729613733906, "grad_norm": 0.43359375, "learning_rate": 4.994745812570908e-06, "loss": 2.4355, "step": 4662 }, { "epoch": 0.2501609442060086, "grad_norm": 0.3125, "learning_rate": 4.994740181540223e-06, "loss": 2.3079, "step": 4663 }, { "epoch": 0.2502145922746781, "grad_norm": 0.6015625, "learning_rate": 4.99473454749688e-06, "loss": 2.1142, "step": 4664 }, { "epoch": 0.25026824034334766, "grad_norm": 0.39453125, "learning_rate": 4.9947289104408864e-06, "loss": 2.5022, "step": 4665 }, { "epoch": 0.2503218884120172, "grad_norm": 0.38671875, "learning_rate": 4.994723270372248e-06, "loss": 2.4065, "step": 4666 }, { "epoch": 0.2503755364806867, "grad_norm": 0.435546875, "learning_rate": 4.994717627290974e-06, "loss": 2.1518, "step": 4667 }, { "epoch": 0.2504291845493562, "grad_norm": 0.33984375, "learning_rate": 4.994711981197068e-06, "loss": 2.3016, "step": 4668 }, { "epoch": 0.25048283261802573, "grad_norm": 0.37890625, "learning_rate": 4.994706332090539e-06, "loss": 2.2433, "step": 4669 }, { "epoch": 0.25053648068669526, "grad_norm": 0.353515625, "learning_rate": 4.994700679971394e-06, "loss": 2.3308, "step": 4670 }, { "epoch": 0.2505901287553648, "grad_norm": 0.36328125, "learning_rate": 4.994695024839638e-06, "loss": 2.3543, "step": 4671 }, { "epoch": 0.2506437768240343, "grad_norm": 0.41796875, "learning_rate": 4.99468936669528e-06, "loss": 2.3587, "step": 4672 }, { "epoch": 0.25069742489270386, "grad_norm": 0.435546875, "learning_rate": 4.9946837055383245e-06, "loss": 2.2651, "step": 4673 }, { "epoch": 0.2507510729613734, "grad_norm": 0.267578125, "learning_rate": 4.9946780413687804e-06, "loss": 2.2036, "step": 4674 }, { "epoch": 0.2508047210300429, "grad_norm": 0.44140625, "learning_rate": 4.994672374186654e-06, "loss": 2.2135, "step": 4675 }, { "epoch": 0.25085836909871245, "grad_norm": 0.376953125, "learning_rate": 4.994666703991952e-06, "loss": 1.9915, "step": 4676 }, { "epoch": 0.250912017167382, "grad_norm": 0.365234375, "learning_rate": 4.9946610307846796e-06, "loss": 2.2106, "step": 4677 }, { "epoch": 0.2509656652360515, "grad_norm": 0.4296875, "learning_rate": 4.994655354564847e-06, "loss": 2.0647, "step": 4678 }, { "epoch": 0.25101931330472105, "grad_norm": 0.34375, "learning_rate": 4.994649675332458e-06, "loss": 2.2666, "step": 4679 }, { "epoch": 0.2510729613733906, "grad_norm": 0.421875, "learning_rate": 4.994643993087522e-06, "loss": 2.3923, "step": 4680 }, { "epoch": 0.2511266094420601, "grad_norm": 0.4765625, "learning_rate": 4.994638307830043e-06, "loss": 2.4032, "step": 4681 }, { "epoch": 0.2511802575107296, "grad_norm": 0.625, "learning_rate": 4.9946326195600305e-06, "loss": 2.2781, "step": 4682 }, { "epoch": 0.2512339055793991, "grad_norm": 0.416015625, "learning_rate": 4.99462692827749e-06, "loss": 2.4921, "step": 4683 }, { "epoch": 0.25128755364806865, "grad_norm": 0.47265625, "learning_rate": 4.9946212339824285e-06, "loss": 2.3144, "step": 4684 }, { "epoch": 0.2513412017167382, "grad_norm": 0.359375, "learning_rate": 4.994615536674854e-06, "loss": 2.3492, "step": 4685 }, { "epoch": 0.2513948497854077, "grad_norm": 0.318359375, "learning_rate": 4.994609836354771e-06, "loss": 2.0215, "step": 4686 }, { "epoch": 0.25144849785407725, "grad_norm": 0.388671875, "learning_rate": 4.994604133022188e-06, "loss": 2.2617, "step": 4687 }, { "epoch": 0.2515021459227468, "grad_norm": 0.359375, "learning_rate": 4.994598426677113e-06, "loss": 2.2914, "step": 4688 }, { "epoch": 0.2515557939914163, "grad_norm": 0.63671875, "learning_rate": 4.994592717319551e-06, "loss": 2.2392, "step": 4689 }, { "epoch": 0.25160944206008584, "grad_norm": 0.408203125, "learning_rate": 4.994587004949509e-06, "loss": 2.3916, "step": 4690 }, { "epoch": 0.2516630901287554, "grad_norm": 0.361328125, "learning_rate": 4.994581289566994e-06, "loss": 2.1581, "step": 4691 }, { "epoch": 0.2517167381974249, "grad_norm": 0.455078125, "learning_rate": 4.994575571172015e-06, "loss": 2.4644, "step": 4692 }, { "epoch": 0.25177038626609444, "grad_norm": 0.466796875, "learning_rate": 4.994569849764576e-06, "loss": 1.7294, "step": 4693 }, { "epoch": 0.25182403433476397, "grad_norm": 0.392578125, "learning_rate": 4.9945641253446854e-06, "loss": 2.4405, "step": 4694 }, { "epoch": 0.2518776824034335, "grad_norm": 0.384765625, "learning_rate": 4.994558397912349e-06, "loss": 2.6195, "step": 4695 }, { "epoch": 0.251931330472103, "grad_norm": 0.341796875, "learning_rate": 4.994552667467576e-06, "loss": 2.1627, "step": 4696 }, { "epoch": 0.2519849785407725, "grad_norm": 0.361328125, "learning_rate": 4.994546934010371e-06, "loss": 2.3353, "step": 4697 }, { "epoch": 0.25203862660944204, "grad_norm": 0.365234375, "learning_rate": 4.994541197540741e-06, "loss": 2.2663, "step": 4698 }, { "epoch": 0.25209227467811157, "grad_norm": 0.33984375, "learning_rate": 4.994535458058695e-06, "loss": 2.289, "step": 4699 }, { "epoch": 0.2521459227467811, "grad_norm": 0.404296875, "learning_rate": 4.994529715564238e-06, "loss": 2.3735, "step": 4700 }, { "epoch": 0.25219957081545064, "grad_norm": 0.36328125, "learning_rate": 4.994523970057378e-06, "loss": 2.244, "step": 4701 }, { "epoch": 0.25225321888412017, "grad_norm": 0.3359375, "learning_rate": 4.99451822153812e-06, "loss": 2.1531, "step": 4702 }, { "epoch": 0.2523068669527897, "grad_norm": 0.439453125, "learning_rate": 4.994512470006474e-06, "loss": 2.3836, "step": 4703 }, { "epoch": 0.25236051502145923, "grad_norm": 0.4609375, "learning_rate": 4.994506715462445e-06, "loss": 1.7941, "step": 4704 }, { "epoch": 0.25241416309012876, "grad_norm": 0.337890625, "learning_rate": 4.994500957906041e-06, "loss": 2.2046, "step": 4705 }, { "epoch": 0.2524678111587983, "grad_norm": 0.361328125, "learning_rate": 4.994495197337268e-06, "loss": 2.3168, "step": 4706 }, { "epoch": 0.2525214592274678, "grad_norm": 0.359375, "learning_rate": 4.994489433756132e-06, "loss": 2.4058, "step": 4707 }, { "epoch": 0.25257510729613736, "grad_norm": 0.37890625, "learning_rate": 4.9944836671626425e-06, "loss": 2.3135, "step": 4708 }, { "epoch": 0.2526287553648069, "grad_norm": 0.33203125, "learning_rate": 4.9944778975568055e-06, "loss": 2.1412, "step": 4709 }, { "epoch": 0.2526824034334764, "grad_norm": 0.32421875, "learning_rate": 4.994472124938626e-06, "loss": 2.3329, "step": 4710 }, { "epoch": 0.2527360515021459, "grad_norm": 0.55078125, "learning_rate": 4.994466349308114e-06, "loss": 2.3757, "step": 4711 }, { "epoch": 0.25278969957081543, "grad_norm": 0.341796875, "learning_rate": 4.994460570665275e-06, "loss": 2.1203, "step": 4712 }, { "epoch": 0.25284334763948496, "grad_norm": 0.326171875, "learning_rate": 4.994454789010116e-06, "loss": 1.9971, "step": 4713 }, { "epoch": 0.2528969957081545, "grad_norm": 0.33984375, "learning_rate": 4.994449004342643e-06, "loss": 2.4338, "step": 4714 }, { "epoch": 0.252950643776824, "grad_norm": 0.37890625, "learning_rate": 4.994443216662865e-06, "loss": 2.4304, "step": 4715 }, { "epoch": 0.25300429184549356, "grad_norm": 0.42578125, "learning_rate": 4.9944374259707875e-06, "loss": 2.1622, "step": 4716 }, { "epoch": 0.2530579399141631, "grad_norm": 0.462890625, "learning_rate": 4.9944316322664185e-06, "loss": 2.3459, "step": 4717 }, { "epoch": 0.2531115879828326, "grad_norm": 0.51953125, "learning_rate": 4.994425835549764e-06, "loss": 1.8399, "step": 4718 }, { "epoch": 0.25316523605150215, "grad_norm": 0.353515625, "learning_rate": 4.9944200358208325e-06, "loss": 2.1955, "step": 4719 }, { "epoch": 0.2532188841201717, "grad_norm": 0.388671875, "learning_rate": 4.99441423307963e-06, "loss": 2.1003, "step": 4720 }, { "epoch": 0.2532725321888412, "grad_norm": 0.33203125, "learning_rate": 4.994408427326162e-06, "loss": 2.2772, "step": 4721 }, { "epoch": 0.25332618025751075, "grad_norm": 0.3359375, "learning_rate": 4.9944026185604385e-06, "loss": 2.1449, "step": 4722 }, { "epoch": 0.2533798283261803, "grad_norm": 0.46875, "learning_rate": 4.994396806782464e-06, "loss": 1.8377, "step": 4723 }, { "epoch": 0.2534334763948498, "grad_norm": 0.466796875, "learning_rate": 4.9943909919922475e-06, "loss": 2.2603, "step": 4724 }, { "epoch": 0.2534871244635193, "grad_norm": 0.298828125, "learning_rate": 4.9943851741897945e-06, "loss": 2.3176, "step": 4725 }, { "epoch": 0.2535407725321888, "grad_norm": 0.412109375, "learning_rate": 4.994379353375113e-06, "loss": 2.4992, "step": 4726 }, { "epoch": 0.25359442060085835, "grad_norm": 0.328125, "learning_rate": 4.994373529548209e-06, "loss": 2.1385, "step": 4727 }, { "epoch": 0.2536480686695279, "grad_norm": 0.66015625, "learning_rate": 4.994367702709091e-06, "loss": 2.4833, "step": 4728 }, { "epoch": 0.2537017167381974, "grad_norm": 0.384765625, "learning_rate": 4.994361872857765e-06, "loss": 2.444, "step": 4729 }, { "epoch": 0.25375536480686695, "grad_norm": 0.478515625, "learning_rate": 4.994356039994238e-06, "loss": 2.3972, "step": 4730 }, { "epoch": 0.2538090128755365, "grad_norm": 0.3671875, "learning_rate": 4.994350204118517e-06, "loss": 2.2105, "step": 4731 }, { "epoch": 0.253862660944206, "grad_norm": 0.392578125, "learning_rate": 4.99434436523061e-06, "loss": 2.2215, "step": 4732 }, { "epoch": 0.25391630901287554, "grad_norm": 0.345703125, "learning_rate": 4.994338523330523e-06, "loss": 2.2485, "step": 4733 }, { "epoch": 0.2539699570815451, "grad_norm": 0.43359375, "learning_rate": 4.994332678418263e-06, "loss": 2.3325, "step": 4734 }, { "epoch": 0.2540236051502146, "grad_norm": 0.36328125, "learning_rate": 4.994326830493839e-06, "loss": 2.2791, "step": 4735 }, { "epoch": 0.25407725321888414, "grad_norm": 0.421875, "learning_rate": 4.994320979557256e-06, "loss": 2.5119, "step": 4736 }, { "epoch": 0.25413090128755367, "grad_norm": 0.31640625, "learning_rate": 4.994315125608521e-06, "loss": 2.3531, "step": 4737 }, { "epoch": 0.2541845493562232, "grad_norm": 0.72265625, "learning_rate": 4.994309268647641e-06, "loss": 2.3866, "step": 4738 }, { "epoch": 0.25423819742489273, "grad_norm": 0.333984375, "learning_rate": 4.994303408674626e-06, "loss": 2.395, "step": 4739 }, { "epoch": 0.2542918454935622, "grad_norm": 0.3515625, "learning_rate": 4.994297545689479e-06, "loss": 2.0571, "step": 4740 }, { "epoch": 0.25434549356223174, "grad_norm": 0.3515625, "learning_rate": 4.9942916796922095e-06, "loss": 2.0461, "step": 4741 }, { "epoch": 0.25439914163090127, "grad_norm": 0.361328125, "learning_rate": 4.994285810682824e-06, "loss": 2.373, "step": 4742 }, { "epoch": 0.2544527896995708, "grad_norm": 0.35546875, "learning_rate": 4.994279938661329e-06, "loss": 2.1919, "step": 4743 }, { "epoch": 0.25450643776824033, "grad_norm": 0.375, "learning_rate": 4.994274063627734e-06, "loss": 2.4155, "step": 4744 }, { "epoch": 0.25456008583690987, "grad_norm": 0.408203125, "learning_rate": 4.994268185582043e-06, "loss": 2.4552, "step": 4745 }, { "epoch": 0.2546137339055794, "grad_norm": 0.416015625, "learning_rate": 4.9942623045242645e-06, "loss": 2.2609, "step": 4746 }, { "epoch": 0.25466738197424893, "grad_norm": 0.27734375, "learning_rate": 4.994256420454405e-06, "loss": 2.1475, "step": 4747 }, { "epoch": 0.25472103004291846, "grad_norm": 0.37109375, "learning_rate": 4.994250533372473e-06, "loss": 2.2549, "step": 4748 }, { "epoch": 0.254774678111588, "grad_norm": 0.34765625, "learning_rate": 4.994244643278475e-06, "loss": 2.5032, "step": 4749 }, { "epoch": 0.2548283261802575, "grad_norm": 0.357421875, "learning_rate": 4.994238750172417e-06, "loss": 2.4927, "step": 4750 }, { "epoch": 0.25488197424892706, "grad_norm": 0.421875, "learning_rate": 4.994232854054307e-06, "loss": 2.2787, "step": 4751 }, { "epoch": 0.2549356223175966, "grad_norm": 0.423828125, "learning_rate": 4.994226954924152e-06, "loss": 2.6359, "step": 4752 }, { "epoch": 0.2549892703862661, "grad_norm": 0.3671875, "learning_rate": 4.99422105278196e-06, "loss": 2.536, "step": 4753 }, { "epoch": 0.2550429184549356, "grad_norm": 0.345703125, "learning_rate": 4.9942151476277365e-06, "loss": 2.4047, "step": 4754 }, { "epoch": 0.25509656652360513, "grad_norm": 0.404296875, "learning_rate": 4.9942092394614895e-06, "loss": 2.1332, "step": 4755 }, { "epoch": 0.25515021459227466, "grad_norm": 0.353515625, "learning_rate": 4.9942033282832256e-06, "loss": 2.0293, "step": 4756 }, { "epoch": 0.2552038626609442, "grad_norm": 0.515625, "learning_rate": 4.994197414092953e-06, "loss": 2.2305, "step": 4757 }, { "epoch": 0.2552575107296137, "grad_norm": 0.392578125, "learning_rate": 4.994191496890679e-06, "loss": 2.0326, "step": 4758 }, { "epoch": 0.25531115879828326, "grad_norm": 0.392578125, "learning_rate": 4.994185576676409e-06, "loss": 2.2503, "step": 4759 }, { "epoch": 0.2553648068669528, "grad_norm": 0.427734375, "learning_rate": 4.994179653450152e-06, "loss": 2.3053, "step": 4760 }, { "epoch": 0.2554184549356223, "grad_norm": 0.4453125, "learning_rate": 4.994173727211913e-06, "loss": 2.4834, "step": 4761 }, { "epoch": 0.25547210300429185, "grad_norm": 0.5703125, "learning_rate": 4.994167797961701e-06, "loss": 2.5486, "step": 4762 }, { "epoch": 0.2555257510729614, "grad_norm": 0.380859375, "learning_rate": 4.994161865699523e-06, "loss": 2.2932, "step": 4763 }, { "epoch": 0.2555793991416309, "grad_norm": 0.359375, "learning_rate": 4.994155930425386e-06, "loss": 2.4229, "step": 4764 }, { "epoch": 0.25563304721030045, "grad_norm": 0.416015625, "learning_rate": 4.9941499921392965e-06, "loss": 2.1477, "step": 4765 }, { "epoch": 0.25568669527897, "grad_norm": 0.330078125, "learning_rate": 4.994144050841262e-06, "loss": 2.2445, "step": 4766 }, { "epoch": 0.2557403433476395, "grad_norm": 0.30859375, "learning_rate": 4.99413810653129e-06, "loss": 2.1012, "step": 4767 }, { "epoch": 0.255793991416309, "grad_norm": 0.39453125, "learning_rate": 4.994132159209387e-06, "loss": 2.3823, "step": 4768 }, { "epoch": 0.2558476394849785, "grad_norm": 0.40234375, "learning_rate": 4.994126208875561e-06, "loss": 2.4231, "step": 4769 }, { "epoch": 0.25590128755364805, "grad_norm": 0.4140625, "learning_rate": 4.99412025552982e-06, "loss": 2.3507, "step": 4770 }, { "epoch": 0.2559549356223176, "grad_norm": 0.35546875, "learning_rate": 4.994114299172168e-06, "loss": 2.3896, "step": 4771 }, { "epoch": 0.2560085836909871, "grad_norm": 0.39453125, "learning_rate": 4.9941083398026154e-06, "loss": 2.0742, "step": 4772 }, { "epoch": 0.25606223175965664, "grad_norm": 0.35546875, "learning_rate": 4.994102377421168e-06, "loss": 2.3616, "step": 4773 }, { "epoch": 0.2561158798283262, "grad_norm": 223.0, "learning_rate": 4.994096412027834e-06, "loss": 2.4052, "step": 4774 }, { "epoch": 0.2561695278969957, "grad_norm": 0.34375, "learning_rate": 4.994090443622619e-06, "loss": 2.4093, "step": 4775 }, { "epoch": 0.25622317596566524, "grad_norm": 0.400390625, "learning_rate": 4.994084472205531e-06, "loss": 2.3542, "step": 4776 }, { "epoch": 0.25627682403433477, "grad_norm": 0.3671875, "learning_rate": 4.994078497776576e-06, "loss": 2.2216, "step": 4777 }, { "epoch": 0.2563304721030043, "grad_norm": 0.5625, "learning_rate": 4.994072520335765e-06, "loss": 2.3032, "step": 4778 }, { "epoch": 0.25638412017167383, "grad_norm": 0.40234375, "learning_rate": 4.994066539883101e-06, "loss": 2.3282, "step": 4779 }, { "epoch": 0.25643776824034337, "grad_norm": 0.3671875, "learning_rate": 4.994060556418594e-06, "loss": 2.1603, "step": 4780 }, { "epoch": 0.2564914163090129, "grad_norm": 0.375, "learning_rate": 4.99405456994225e-06, "loss": 2.4368, "step": 4781 }, { "epoch": 0.25654506437768243, "grad_norm": 0.478515625, "learning_rate": 4.994048580454075e-06, "loss": 2.3092, "step": 4782 }, { "epoch": 0.2565987124463519, "grad_norm": 0.53515625, "learning_rate": 4.994042587954079e-06, "loss": 2.4096, "step": 4783 }, { "epoch": 0.25665236051502144, "grad_norm": 0.34765625, "learning_rate": 4.994036592442267e-06, "loss": 2.2702, "step": 4784 }, { "epoch": 0.25670600858369097, "grad_norm": 0.451171875, "learning_rate": 4.994030593918647e-06, "loss": 1.3757, "step": 4785 }, { "epoch": 0.2567596566523605, "grad_norm": 0.3828125, "learning_rate": 4.994024592383227e-06, "loss": 2.4489, "step": 4786 }, { "epoch": 0.25681330472103003, "grad_norm": 0.3671875, "learning_rate": 4.994018587836013e-06, "loss": 2.5165, "step": 4787 }, { "epoch": 0.25686695278969957, "grad_norm": 0.400390625, "learning_rate": 4.994012580277013e-06, "loss": 2.2621, "step": 4788 }, { "epoch": 0.2569206008583691, "grad_norm": 0.400390625, "learning_rate": 4.994006569706234e-06, "loss": 2.3618, "step": 4789 }, { "epoch": 0.25697424892703863, "grad_norm": 0.306640625, "learning_rate": 4.994000556123684e-06, "loss": 2.0198, "step": 4790 }, { "epoch": 0.25702789699570816, "grad_norm": 0.330078125, "learning_rate": 4.993994539529369e-06, "loss": 2.2585, "step": 4791 }, { "epoch": 0.2570815450643777, "grad_norm": 0.3984375, "learning_rate": 4.993988519923296e-06, "loss": 2.4709, "step": 4792 }, { "epoch": 0.2571351931330472, "grad_norm": 0.48046875, "learning_rate": 4.993982497305474e-06, "loss": 2.5599, "step": 4793 }, { "epoch": 0.25718884120171676, "grad_norm": 0.37890625, "learning_rate": 4.99397647167591e-06, "loss": 2.3292, "step": 4794 }, { "epoch": 0.2572424892703863, "grad_norm": 0.58203125, "learning_rate": 4.993970443034609e-06, "loss": 2.4437, "step": 4795 }, { "epoch": 0.2572961373390558, "grad_norm": 0.353515625, "learning_rate": 4.993964411381581e-06, "loss": 2.3087, "step": 4796 }, { "epoch": 0.2573497854077253, "grad_norm": 0.388671875, "learning_rate": 4.993958376716833e-06, "loss": 2.1554, "step": 4797 }, { "epoch": 0.2574034334763948, "grad_norm": 0.34765625, "learning_rate": 4.9939523390403696e-06, "loss": 2.1885, "step": 4798 }, { "epoch": 0.25745708154506436, "grad_norm": 0.3515625, "learning_rate": 4.993946298352201e-06, "loss": 2.1787, "step": 4799 }, { "epoch": 0.2575107296137339, "grad_norm": 0.4296875, "learning_rate": 4.993940254652334e-06, "loss": 2.259, "step": 4800 }, { "epoch": 0.2575643776824034, "grad_norm": 0.37890625, "learning_rate": 4.993934207940776e-06, "loss": 2.4025, "step": 4801 }, { "epoch": 0.25761802575107295, "grad_norm": 0.404296875, "learning_rate": 4.993928158217532e-06, "loss": 2.4618, "step": 4802 }, { "epoch": 0.2576716738197425, "grad_norm": 0.390625, "learning_rate": 4.993922105482611e-06, "loss": 2.133, "step": 4803 }, { "epoch": 0.257725321888412, "grad_norm": 0.365234375, "learning_rate": 4.9939160497360215e-06, "loss": 2.0485, "step": 4804 }, { "epoch": 0.25777896995708155, "grad_norm": 0.80859375, "learning_rate": 4.99390999097777e-06, "loss": 1.4253, "step": 4805 }, { "epoch": 0.2578326180257511, "grad_norm": 1.1875, "learning_rate": 4.993903929207863e-06, "loss": 2.2591, "step": 4806 }, { "epoch": 0.2578862660944206, "grad_norm": 0.427734375, "learning_rate": 4.993897864426307e-06, "loss": 2.3127, "step": 4807 }, { "epoch": 0.25793991416309014, "grad_norm": 0.37890625, "learning_rate": 4.9938917966331114e-06, "loss": 2.4395, "step": 4808 }, { "epoch": 0.2579935622317597, "grad_norm": 0.30859375, "learning_rate": 4.993885725828283e-06, "loss": 2.3213, "step": 4809 }, { "epoch": 0.2580472103004292, "grad_norm": 0.8125, "learning_rate": 4.993879652011828e-06, "loss": 1.3861, "step": 4810 }, { "epoch": 0.25810085836909874, "grad_norm": 0.3828125, "learning_rate": 4.9938735751837555e-06, "loss": 2.2093, "step": 4811 }, { "epoch": 0.2581545064377682, "grad_norm": 0.326171875, "learning_rate": 4.993867495344072e-06, "loss": 2.3305, "step": 4812 }, { "epoch": 0.25820815450643775, "grad_norm": 0.359375, "learning_rate": 4.993861412492784e-06, "loss": 2.3882, "step": 4813 }, { "epoch": 0.2582618025751073, "grad_norm": 0.337890625, "learning_rate": 4.993855326629901e-06, "loss": 2.4507, "step": 4814 }, { "epoch": 0.2583154506437768, "grad_norm": 0.39453125, "learning_rate": 4.993849237755427e-06, "loss": 2.4912, "step": 4815 }, { "epoch": 0.25836909871244634, "grad_norm": 0.3125, "learning_rate": 4.993843145869372e-06, "loss": 2.1408, "step": 4816 }, { "epoch": 0.2584227467811159, "grad_norm": 0.59375, "learning_rate": 4.993837050971744e-06, "loss": 2.2315, "step": 4817 }, { "epoch": 0.2584763948497854, "grad_norm": 0.375, "learning_rate": 4.993830953062548e-06, "loss": 2.1228, "step": 4818 }, { "epoch": 0.25853004291845494, "grad_norm": 0.3359375, "learning_rate": 4.993824852141792e-06, "loss": 2.4653, "step": 4819 }, { "epoch": 0.25858369098712447, "grad_norm": 0.390625, "learning_rate": 4.993818748209485e-06, "loss": 2.3596, "step": 4820 }, { "epoch": 0.258637339055794, "grad_norm": 0.337890625, "learning_rate": 4.993812641265632e-06, "loss": 2.1944, "step": 4821 }, { "epoch": 0.25869098712446353, "grad_norm": 2.015625, "learning_rate": 4.9938065313102415e-06, "loss": 2.2679, "step": 4822 }, { "epoch": 0.25874463519313307, "grad_norm": 0.40234375, "learning_rate": 4.993800418343321e-06, "loss": 2.2612, "step": 4823 }, { "epoch": 0.2587982832618026, "grad_norm": 0.357421875, "learning_rate": 4.993794302364878e-06, "loss": 2.1651, "step": 4824 }, { "epoch": 0.25885193133047213, "grad_norm": 0.38671875, "learning_rate": 4.99378818337492e-06, "loss": 2.5119, "step": 4825 }, { "epoch": 0.2589055793991416, "grad_norm": 0.439453125, "learning_rate": 4.993782061373453e-06, "loss": 2.315, "step": 4826 }, { "epoch": 0.25895922746781114, "grad_norm": 0.373046875, "learning_rate": 4.9937759363604865e-06, "loss": 2.2105, "step": 4827 }, { "epoch": 0.25901287553648067, "grad_norm": 0.34765625, "learning_rate": 4.9937698083360266e-06, "loss": 2.2735, "step": 4828 }, { "epoch": 0.2590665236051502, "grad_norm": 0.46484375, "learning_rate": 4.993763677300081e-06, "loss": 1.4746, "step": 4829 }, { "epoch": 0.25912017167381973, "grad_norm": 0.75390625, "learning_rate": 4.9937575432526566e-06, "loss": 2.3253, "step": 4830 }, { "epoch": 0.25917381974248926, "grad_norm": 0.3515625, "learning_rate": 4.993751406193762e-06, "loss": 2.1768, "step": 4831 }, { "epoch": 0.2592274678111588, "grad_norm": 1.21875, "learning_rate": 4.993745266123403e-06, "loss": 2.494, "step": 4832 }, { "epoch": 0.2592811158798283, "grad_norm": 0.44921875, "learning_rate": 4.9937391230415875e-06, "loss": 2.2641, "step": 4833 }, { "epoch": 0.25933476394849786, "grad_norm": 0.330078125, "learning_rate": 4.993732976948325e-06, "loss": 2.4023, "step": 4834 }, { "epoch": 0.2593884120171674, "grad_norm": 0.37890625, "learning_rate": 4.9937268278436196e-06, "loss": 2.2027, "step": 4835 }, { "epoch": 0.2594420600858369, "grad_norm": 0.3828125, "learning_rate": 4.993720675727481e-06, "loss": 2.3745, "step": 4836 }, { "epoch": 0.25949570815450645, "grad_norm": 0.359375, "learning_rate": 4.993714520599916e-06, "loss": 2.2951, "step": 4837 }, { "epoch": 0.259549356223176, "grad_norm": 0.359375, "learning_rate": 4.993708362460931e-06, "loss": 2.2517, "step": 4838 }, { "epoch": 0.2596030042918455, "grad_norm": 0.734375, "learning_rate": 4.993702201310537e-06, "loss": 2.5814, "step": 4839 }, { "epoch": 0.259656652360515, "grad_norm": 0.396484375, "learning_rate": 4.9936960371487365e-06, "loss": 2.4735, "step": 4840 }, { "epoch": 0.2597103004291845, "grad_norm": 0.44921875, "learning_rate": 4.993689869975539e-06, "loss": 2.2918, "step": 4841 }, { "epoch": 0.25976394849785406, "grad_norm": 0.357421875, "learning_rate": 4.993683699790955e-06, "loss": 2.0498, "step": 4842 }, { "epoch": 0.2598175965665236, "grad_norm": 0.38671875, "learning_rate": 4.993677526594987e-06, "loss": 1.9634, "step": 4843 }, { "epoch": 0.2598712446351931, "grad_norm": 0.470703125, "learning_rate": 4.993671350387645e-06, "loss": 2.2466, "step": 4844 }, { "epoch": 0.25992489270386265, "grad_norm": 0.353515625, "learning_rate": 4.993665171168937e-06, "loss": 2.5521, "step": 4845 }, { "epoch": 0.2599785407725322, "grad_norm": 0.365234375, "learning_rate": 4.993658988938868e-06, "loss": 2.361, "step": 4846 }, { "epoch": 0.2600321888412017, "grad_norm": 0.34375, "learning_rate": 4.993652803697448e-06, "loss": 2.4399, "step": 4847 }, { "epoch": 0.26008583690987125, "grad_norm": 0.3515625, "learning_rate": 4.993646615444684e-06, "loss": 2.4403, "step": 4848 }, { "epoch": 0.2601394849785408, "grad_norm": 0.35546875, "learning_rate": 4.993640424180582e-06, "loss": 2.3884, "step": 4849 }, { "epoch": 0.2601931330472103, "grad_norm": 0.376953125, "learning_rate": 4.993634229905151e-06, "loss": 2.1414, "step": 4850 }, { "epoch": 0.26024678111587984, "grad_norm": 0.365234375, "learning_rate": 4.993628032618398e-06, "loss": 2.4007, "step": 4851 }, { "epoch": 0.2603004291845494, "grad_norm": 0.388671875, "learning_rate": 4.99362183232033e-06, "loss": 2.3363, "step": 4852 }, { "epoch": 0.2603540772532189, "grad_norm": 0.484375, "learning_rate": 4.993615629010956e-06, "loss": 2.3563, "step": 4853 }, { "epoch": 0.26040772532188844, "grad_norm": 0.73828125, "learning_rate": 4.9936094226902815e-06, "loss": 2.309, "step": 4854 }, { "epoch": 0.2604613733905579, "grad_norm": 0.34375, "learning_rate": 4.993603213358315e-06, "loss": 2.2151, "step": 4855 }, { "epoch": 0.26051502145922745, "grad_norm": 0.39453125, "learning_rate": 4.993597001015064e-06, "loss": 1.6039, "step": 4856 }, { "epoch": 0.260568669527897, "grad_norm": 0.40234375, "learning_rate": 4.993590785660536e-06, "loss": 2.0236, "step": 4857 }, { "epoch": 0.2606223175965665, "grad_norm": 0.400390625, "learning_rate": 4.993584567294738e-06, "loss": 2.5387, "step": 4858 }, { "epoch": 0.26067596566523604, "grad_norm": 0.359375, "learning_rate": 4.993578345917679e-06, "loss": 2.2011, "step": 4859 }, { "epoch": 0.2607296137339056, "grad_norm": 0.341796875, "learning_rate": 4.993572121529365e-06, "loss": 2.325, "step": 4860 }, { "epoch": 0.2607832618025751, "grad_norm": 0.5, "learning_rate": 4.993565894129804e-06, "loss": 2.0354, "step": 4861 }, { "epoch": 0.26083690987124464, "grad_norm": 0.33984375, "learning_rate": 4.993559663719003e-06, "loss": 2.453, "step": 4862 }, { "epoch": 0.26089055793991417, "grad_norm": 0.3828125, "learning_rate": 4.99355343029697e-06, "loss": 2.2747, "step": 4863 }, { "epoch": 0.2609442060085837, "grad_norm": 0.462890625, "learning_rate": 4.993547193863713e-06, "loss": 2.3946, "step": 4864 }, { "epoch": 0.26099785407725323, "grad_norm": 0.5390625, "learning_rate": 4.993540954419238e-06, "loss": 2.3723, "step": 4865 }, { "epoch": 0.26105150214592276, "grad_norm": 0.349609375, "learning_rate": 4.993534711963555e-06, "loss": 2.3052, "step": 4866 }, { "epoch": 0.2611051502145923, "grad_norm": 0.361328125, "learning_rate": 4.99352846649667e-06, "loss": 2.2389, "step": 4867 }, { "epoch": 0.26115879828326183, "grad_norm": 0.33984375, "learning_rate": 4.9935222180185895e-06, "loss": 2.2687, "step": 4868 }, { "epoch": 0.2612124463519313, "grad_norm": 0.33984375, "learning_rate": 4.993515966529323e-06, "loss": 1.5772, "step": 4869 }, { "epoch": 0.26126609442060084, "grad_norm": 0.388671875, "learning_rate": 4.993509712028878e-06, "loss": 2.1788, "step": 4870 }, { "epoch": 0.26131974248927037, "grad_norm": 0.369140625, "learning_rate": 4.99350345451726e-06, "loss": 2.3296, "step": 4871 }, { "epoch": 0.2613733905579399, "grad_norm": 0.3984375, "learning_rate": 4.9934971939944785e-06, "loss": 2.3358, "step": 4872 }, { "epoch": 0.26142703862660943, "grad_norm": 0.416015625, "learning_rate": 4.993490930460541e-06, "loss": 2.2922, "step": 4873 }, { "epoch": 0.26148068669527896, "grad_norm": 0.447265625, "learning_rate": 4.993484663915453e-06, "loss": 2.1636, "step": 4874 }, { "epoch": 0.2615343347639485, "grad_norm": 0.3515625, "learning_rate": 4.993478394359225e-06, "loss": 1.9936, "step": 4875 }, { "epoch": 0.261587982832618, "grad_norm": 0.369140625, "learning_rate": 4.993472121791863e-06, "loss": 2.4369, "step": 4876 }, { "epoch": 0.26164163090128756, "grad_norm": 0.36328125, "learning_rate": 4.993465846213373e-06, "loss": 2.2366, "step": 4877 }, { "epoch": 0.2616952789699571, "grad_norm": 0.375, "learning_rate": 4.993459567623766e-06, "loss": 2.056, "step": 4878 }, { "epoch": 0.2617489270386266, "grad_norm": 0.322265625, "learning_rate": 4.993453286023048e-06, "loss": 2.1045, "step": 4879 }, { "epoch": 0.26180257510729615, "grad_norm": 0.4296875, "learning_rate": 4.993447001411226e-06, "loss": 2.4129, "step": 4880 }, { "epoch": 0.2618562231759657, "grad_norm": 0.396484375, "learning_rate": 4.993440713788308e-06, "loss": 2.3454, "step": 4881 }, { "epoch": 0.2619098712446352, "grad_norm": 0.38671875, "learning_rate": 4.993434423154302e-06, "loss": 2.1568, "step": 4882 }, { "epoch": 0.2619635193133047, "grad_norm": 0.32421875, "learning_rate": 4.993428129509214e-06, "loss": 2.2348, "step": 4883 }, { "epoch": 0.2620171673819742, "grad_norm": 0.361328125, "learning_rate": 4.993421832853054e-06, "loss": 1.8516, "step": 4884 }, { "epoch": 0.26207081545064376, "grad_norm": 0.37890625, "learning_rate": 4.993415533185829e-06, "loss": 2.3146, "step": 4885 }, { "epoch": 0.2621244635193133, "grad_norm": 0.4765625, "learning_rate": 4.993409230507546e-06, "loss": 2.3603, "step": 4886 }, { "epoch": 0.2621781115879828, "grad_norm": 0.5390625, "learning_rate": 4.9934029248182105e-06, "loss": 2.3307, "step": 4887 }, { "epoch": 0.26223175965665235, "grad_norm": 0.5, "learning_rate": 4.993396616117834e-06, "loss": 2.4954, "step": 4888 }, { "epoch": 0.2622854077253219, "grad_norm": 0.435546875, "learning_rate": 4.993390304406422e-06, "loss": 2.3809, "step": 4889 }, { "epoch": 0.2623390557939914, "grad_norm": 0.45703125, "learning_rate": 4.993383989683983e-06, "loss": 2.4799, "step": 4890 }, { "epoch": 0.26239270386266095, "grad_norm": 0.33984375, "learning_rate": 4.993377671950524e-06, "loss": 2.2439, "step": 4891 }, { "epoch": 0.2624463519313305, "grad_norm": 0.287109375, "learning_rate": 4.993371351206052e-06, "loss": 2.328, "step": 4892 }, { "epoch": 0.2625, "grad_norm": 0.345703125, "learning_rate": 4.993365027450576e-06, "loss": 2.4062, "step": 4893 }, { "epoch": 0.26255364806866954, "grad_norm": 0.484375, "learning_rate": 4.993358700684104e-06, "loss": 2.2748, "step": 4894 }, { "epoch": 0.2626072961373391, "grad_norm": 0.765625, "learning_rate": 4.993352370906641e-06, "loss": 2.1193, "step": 4895 }, { "epoch": 0.2626609442060086, "grad_norm": 0.365234375, "learning_rate": 4.993346038118197e-06, "loss": 2.2843, "step": 4896 }, { "epoch": 0.26271459227467814, "grad_norm": 0.56640625, "learning_rate": 4.9933397023187795e-06, "loss": 2.4005, "step": 4897 }, { "epoch": 0.2627682403433476, "grad_norm": 0.376953125, "learning_rate": 4.993333363508395e-06, "loss": 2.387, "step": 4898 }, { "epoch": 0.26282188841201715, "grad_norm": 0.3515625, "learning_rate": 4.993327021687052e-06, "loss": 2.1467, "step": 4899 }, { "epoch": 0.2628755364806867, "grad_norm": 0.369140625, "learning_rate": 4.993320676854758e-06, "loss": 2.4309, "step": 4900 }, { "epoch": 0.2629291845493562, "grad_norm": 0.375, "learning_rate": 4.99331432901152e-06, "loss": 2.3343, "step": 4901 }, { "epoch": 0.26298283261802574, "grad_norm": 0.3515625, "learning_rate": 4.993307978157348e-06, "loss": 2.4626, "step": 4902 }, { "epoch": 0.2630364806866953, "grad_norm": 0.39453125, "learning_rate": 4.993301624292246e-06, "loss": 2.507, "step": 4903 }, { "epoch": 0.2630901287553648, "grad_norm": 0.40625, "learning_rate": 4.993295267416225e-06, "loss": 2.2492, "step": 4904 }, { "epoch": 0.26314377682403434, "grad_norm": 0.404296875, "learning_rate": 4.99328890752929e-06, "loss": 2.2859, "step": 4905 }, { "epoch": 0.26319742489270387, "grad_norm": 0.345703125, "learning_rate": 4.9932825446314515e-06, "loss": 2.035, "step": 4906 }, { "epoch": 0.2632510729613734, "grad_norm": 0.412109375, "learning_rate": 4.993276178722715e-06, "loss": 2.6707, "step": 4907 }, { "epoch": 0.26330472103004293, "grad_norm": 0.34765625, "learning_rate": 4.993269809803088e-06, "loss": 2.185, "step": 4908 }, { "epoch": 0.26335836909871246, "grad_norm": 0.333984375, "learning_rate": 4.99326343787258e-06, "loss": 2.4817, "step": 4909 }, { "epoch": 0.263412017167382, "grad_norm": 0.68359375, "learning_rate": 4.993257062931197e-06, "loss": 2.5014, "step": 4910 }, { "epoch": 0.2634656652360515, "grad_norm": 0.462890625, "learning_rate": 4.993250684978948e-06, "loss": 2.251, "step": 4911 }, { "epoch": 0.263519313304721, "grad_norm": 0.349609375, "learning_rate": 4.993244304015839e-06, "loss": 2.0167, "step": 4912 }, { "epoch": 0.26357296137339054, "grad_norm": 0.47265625, "learning_rate": 4.993237920041881e-06, "loss": 2.3506, "step": 4913 }, { "epoch": 0.26362660944206007, "grad_norm": 0.33203125, "learning_rate": 4.993231533057078e-06, "loss": 2.2546, "step": 4914 }, { "epoch": 0.2636802575107296, "grad_norm": 0.396484375, "learning_rate": 4.99322514306144e-06, "loss": 2.3556, "step": 4915 }, { "epoch": 0.26373390557939913, "grad_norm": 0.3984375, "learning_rate": 4.993218750054974e-06, "loss": 2.3459, "step": 4916 }, { "epoch": 0.26378755364806866, "grad_norm": 0.35546875, "learning_rate": 4.993212354037687e-06, "loss": 2.5977, "step": 4917 }, { "epoch": 0.2638412017167382, "grad_norm": 0.322265625, "learning_rate": 4.993205955009587e-06, "loss": 2.3182, "step": 4918 }, { "epoch": 0.2638948497854077, "grad_norm": 0.3515625, "learning_rate": 4.993199552970683e-06, "loss": 2.3904, "step": 4919 }, { "epoch": 0.26394849785407726, "grad_norm": 0.45703125, "learning_rate": 4.993193147920982e-06, "loss": 2.7417, "step": 4920 }, { "epoch": 0.2640021459227468, "grad_norm": 0.404296875, "learning_rate": 4.993186739860491e-06, "loss": 2.2467, "step": 4921 }, { "epoch": 0.2640557939914163, "grad_norm": 0.349609375, "learning_rate": 4.993180328789219e-06, "loss": 2.2097, "step": 4922 }, { "epoch": 0.26410944206008585, "grad_norm": 0.36328125, "learning_rate": 4.9931739147071725e-06, "loss": 2.3388, "step": 4923 }, { "epoch": 0.2641630901287554, "grad_norm": 0.349609375, "learning_rate": 4.99316749761436e-06, "loss": 2.1425, "step": 4924 }, { "epoch": 0.2642167381974249, "grad_norm": 0.470703125, "learning_rate": 4.993161077510789e-06, "loss": 2.3414, "step": 4925 }, { "epoch": 0.26427038626609445, "grad_norm": 0.373046875, "learning_rate": 4.993154654396467e-06, "loss": 2.3111, "step": 4926 }, { "epoch": 0.2643240343347639, "grad_norm": 2.265625, "learning_rate": 4.9931482282714035e-06, "loss": 2.3306, "step": 4927 }, { "epoch": 0.26437768240343346, "grad_norm": 0.306640625, "learning_rate": 4.993141799135603e-06, "loss": 2.1948, "step": 4928 }, { "epoch": 0.264431330472103, "grad_norm": 0.455078125, "learning_rate": 4.993135366989077e-06, "loss": 2.3245, "step": 4929 }, { "epoch": 0.2644849785407725, "grad_norm": 0.400390625, "learning_rate": 4.99312893183183e-06, "loss": 2.4166, "step": 4930 }, { "epoch": 0.26453862660944205, "grad_norm": 0.3671875, "learning_rate": 4.993122493663872e-06, "loss": 2.3027, "step": 4931 }, { "epoch": 0.2645922746781116, "grad_norm": 0.408203125, "learning_rate": 4.99311605248521e-06, "loss": 2.2934, "step": 4932 }, { "epoch": 0.2646459227467811, "grad_norm": 0.4375, "learning_rate": 4.993109608295851e-06, "loss": 2.2045, "step": 4933 }, { "epoch": 0.26469957081545065, "grad_norm": 1.4765625, "learning_rate": 4.993103161095803e-06, "loss": 2.3317, "step": 4934 }, { "epoch": 0.2647532188841202, "grad_norm": 0.380859375, "learning_rate": 4.9930967108850756e-06, "loss": 2.1631, "step": 4935 }, { "epoch": 0.2648068669527897, "grad_norm": 0.44140625, "learning_rate": 4.993090257663675e-06, "loss": 2.1547, "step": 4936 }, { "epoch": 0.26486051502145924, "grad_norm": 0.326171875, "learning_rate": 4.993083801431609e-06, "loss": 2.1155, "step": 4937 }, { "epoch": 0.2649141630901288, "grad_norm": 0.36328125, "learning_rate": 4.993077342188886e-06, "loss": 2.2934, "step": 4938 }, { "epoch": 0.2649678111587983, "grad_norm": 0.36328125, "learning_rate": 4.993070879935512e-06, "loss": 2.1962, "step": 4939 }, { "epoch": 0.26502145922746784, "grad_norm": 0.376953125, "learning_rate": 4.993064414671498e-06, "loss": 2.2259, "step": 4940 }, { "epoch": 0.2650751072961373, "grad_norm": 1.1796875, "learning_rate": 4.9930579463968496e-06, "loss": 2.0017, "step": 4941 }, { "epoch": 0.26512875536480685, "grad_norm": 0.337890625, "learning_rate": 4.9930514751115745e-06, "loss": 1.8861, "step": 4942 }, { "epoch": 0.2651824034334764, "grad_norm": 0.60546875, "learning_rate": 4.993045000815682e-06, "loss": 2.5265, "step": 4943 }, { "epoch": 0.2652360515021459, "grad_norm": 0.5234375, "learning_rate": 4.993038523509178e-06, "loss": 2.2927, "step": 4944 }, { "epoch": 0.26528969957081544, "grad_norm": 0.43359375, "learning_rate": 4.993032043192072e-06, "loss": 2.1994, "step": 4945 }, { "epoch": 0.26534334763948497, "grad_norm": 0.48046875, "learning_rate": 4.99302555986437e-06, "loss": 2.5078, "step": 4946 }, { "epoch": 0.2653969957081545, "grad_norm": 0.32421875, "learning_rate": 4.993019073526083e-06, "loss": 2.0539, "step": 4947 }, { "epoch": 0.26545064377682404, "grad_norm": 0.361328125, "learning_rate": 4.993012584177216e-06, "loss": 2.4093, "step": 4948 }, { "epoch": 0.26550429184549357, "grad_norm": 0.48828125, "learning_rate": 4.993006091817777e-06, "loss": 2.2649, "step": 4949 }, { "epoch": 0.2655579399141631, "grad_norm": 0.38671875, "learning_rate": 4.992999596447775e-06, "loss": 2.5054, "step": 4950 }, { "epoch": 0.26561158798283263, "grad_norm": 0.40234375, "learning_rate": 4.992993098067218e-06, "loss": 2.3637, "step": 4951 }, { "epoch": 0.26566523605150216, "grad_norm": 0.416015625, "learning_rate": 4.992986596676112e-06, "loss": 1.7651, "step": 4952 }, { "epoch": 0.2657188841201717, "grad_norm": 0.38671875, "learning_rate": 4.992980092274466e-06, "loss": 2.454, "step": 4953 }, { "epoch": 0.2657725321888412, "grad_norm": 0.451171875, "learning_rate": 4.99297358486229e-06, "loss": 2.4954, "step": 4954 }, { "epoch": 0.2658261802575107, "grad_norm": 0.390625, "learning_rate": 4.992967074439587e-06, "loss": 2.5539, "step": 4955 }, { "epoch": 0.26587982832618023, "grad_norm": 0.337890625, "learning_rate": 4.9929605610063695e-06, "loss": 2.3086, "step": 4956 }, { "epoch": 0.26593347639484977, "grad_norm": 0.357421875, "learning_rate": 4.9929540445626425e-06, "loss": 2.0953, "step": 4957 }, { "epoch": 0.2659871244635193, "grad_norm": 0.40234375, "learning_rate": 4.9929475251084156e-06, "loss": 2.1411, "step": 4958 }, { "epoch": 0.26604077253218883, "grad_norm": 0.376953125, "learning_rate": 4.9929410026436954e-06, "loss": 2.2366, "step": 4959 }, { "epoch": 0.26609442060085836, "grad_norm": 0.41015625, "learning_rate": 4.992934477168491e-06, "loss": 2.4032, "step": 4960 }, { "epoch": 0.2661480686695279, "grad_norm": 0.6171875, "learning_rate": 4.992927948682809e-06, "loss": 1.9532, "step": 4961 }, { "epoch": 0.2662017167381974, "grad_norm": 0.40234375, "learning_rate": 4.992921417186657e-06, "loss": 2.0715, "step": 4962 }, { "epoch": 0.26625536480686696, "grad_norm": 0.408203125, "learning_rate": 4.992914882680044e-06, "loss": 2.1604, "step": 4963 }, { "epoch": 0.2663090128755365, "grad_norm": 0.359375, "learning_rate": 4.992908345162979e-06, "loss": 2.3723, "step": 4964 }, { "epoch": 0.266362660944206, "grad_norm": 0.380859375, "learning_rate": 4.992901804635468e-06, "loss": 2.2722, "step": 4965 }, { "epoch": 0.26641630901287555, "grad_norm": 0.349609375, "learning_rate": 4.992895261097519e-06, "loss": 2.2887, "step": 4966 }, { "epoch": 0.2664699570815451, "grad_norm": 1.078125, "learning_rate": 4.99288871454914e-06, "loss": 2.2784, "step": 4967 }, { "epoch": 0.2665236051502146, "grad_norm": 0.515625, "learning_rate": 4.99288216499034e-06, "loss": 2.3544, "step": 4968 }, { "epoch": 0.26657725321888415, "grad_norm": 0.396484375, "learning_rate": 4.992875612421126e-06, "loss": 2.3615, "step": 4969 }, { "epoch": 0.2666309012875536, "grad_norm": 0.373046875, "learning_rate": 4.9928690568415066e-06, "loss": 2.4178, "step": 4970 }, { "epoch": 0.26668454935622316, "grad_norm": 0.54296875, "learning_rate": 4.9928624982514885e-06, "loss": 2.1519, "step": 4971 }, { "epoch": 0.2667381974248927, "grad_norm": 0.380859375, "learning_rate": 4.992855936651081e-06, "loss": 2.1413, "step": 4972 }, { "epoch": 0.2667918454935622, "grad_norm": 0.373046875, "learning_rate": 4.992849372040291e-06, "loss": 2.3162, "step": 4973 }, { "epoch": 0.26684549356223175, "grad_norm": 0.302734375, "learning_rate": 4.992842804419126e-06, "loss": 1.986, "step": 4974 }, { "epoch": 0.2668991416309013, "grad_norm": 0.333984375, "learning_rate": 4.9928362337875944e-06, "loss": 2.1559, "step": 4975 }, { "epoch": 0.2669527896995708, "grad_norm": 0.37109375, "learning_rate": 4.992829660145706e-06, "loss": 2.2206, "step": 4976 }, { "epoch": 0.26700643776824035, "grad_norm": 3.15625, "learning_rate": 4.992823083493467e-06, "loss": 2.1032, "step": 4977 }, { "epoch": 0.2670600858369099, "grad_norm": 0.421875, "learning_rate": 4.992816503830884e-06, "loss": 2.162, "step": 4978 }, { "epoch": 0.2671137339055794, "grad_norm": 0.419921875, "learning_rate": 4.992809921157967e-06, "loss": 2.5065, "step": 4979 }, { "epoch": 0.26716738197424894, "grad_norm": 0.359375, "learning_rate": 4.9928033354747245e-06, "loss": 2.0983, "step": 4980 }, { "epoch": 0.2672210300429185, "grad_norm": 0.35546875, "learning_rate": 4.992796746781162e-06, "loss": 2.4346, "step": 4981 }, { "epoch": 0.267274678111588, "grad_norm": 0.37890625, "learning_rate": 4.99279015507729e-06, "loss": 2.4326, "step": 4982 }, { "epoch": 0.26732832618025754, "grad_norm": 0.7109375, "learning_rate": 4.992783560363115e-06, "loss": 2.4709, "step": 4983 }, { "epoch": 0.267381974248927, "grad_norm": 0.37109375, "learning_rate": 4.992776962638645e-06, "loss": 2.474, "step": 4984 }, { "epoch": 0.26743562231759654, "grad_norm": 0.357421875, "learning_rate": 4.992770361903888e-06, "loss": 2.3476, "step": 4985 }, { "epoch": 0.2674892703862661, "grad_norm": 0.375, "learning_rate": 4.992763758158852e-06, "loss": 2.4992, "step": 4986 }, { "epoch": 0.2675429184549356, "grad_norm": 0.3828125, "learning_rate": 4.992757151403545e-06, "loss": 2.4473, "step": 4987 }, { "epoch": 0.26759656652360514, "grad_norm": 0.455078125, "learning_rate": 4.992750541637976e-06, "loss": 2.3801, "step": 4988 }, { "epoch": 0.26765021459227467, "grad_norm": 0.57421875, "learning_rate": 4.992743928862151e-06, "loss": 2.0942, "step": 4989 }, { "epoch": 0.2677038626609442, "grad_norm": 0.32421875, "learning_rate": 4.99273731307608e-06, "loss": 2.2158, "step": 4990 }, { "epoch": 0.26775751072961373, "grad_norm": 9.8125, "learning_rate": 4.99273069427977e-06, "loss": 2.3658, "step": 4991 }, { "epoch": 0.26781115879828327, "grad_norm": 0.447265625, "learning_rate": 4.9927240724732286e-06, "loss": 2.3456, "step": 4992 }, { "epoch": 0.2678648068669528, "grad_norm": 0.369140625, "learning_rate": 4.992717447656464e-06, "loss": 2.3782, "step": 4993 }, { "epoch": 0.26791845493562233, "grad_norm": 0.3359375, "learning_rate": 4.992710819829486e-06, "loss": 2.2415, "step": 4994 }, { "epoch": 0.26797210300429186, "grad_norm": 0.373046875, "learning_rate": 4.9927041889923e-06, "loss": 2.2869, "step": 4995 }, { "epoch": 0.2680257510729614, "grad_norm": 0.392578125, "learning_rate": 4.992697555144916e-06, "loss": 2.411, "step": 4996 }, { "epoch": 0.2680793991416309, "grad_norm": 0.337890625, "learning_rate": 4.99269091828734e-06, "loss": 2.0436, "step": 4997 }, { "epoch": 0.2681330472103004, "grad_norm": 0.41796875, "learning_rate": 4.992684278419581e-06, "loss": 2.3723, "step": 4998 }, { "epoch": 0.26818669527896993, "grad_norm": 0.4140625, "learning_rate": 4.992677635541647e-06, "loss": 2.1263, "step": 4999 }, { "epoch": 0.26824034334763946, "grad_norm": 0.51953125, "learning_rate": 4.992670989653546e-06, "loss": 2.5489, "step": 5000 }, { "epoch": 0.268293991416309, "grad_norm": 1.4375, "learning_rate": 4.992664340755288e-06, "loss": 2.452, "step": 5001 }, { "epoch": 0.26834763948497853, "grad_norm": 0.365234375, "learning_rate": 4.992657688846877e-06, "loss": 2.047, "step": 5002 }, { "epoch": 0.26840128755364806, "grad_norm": 0.388671875, "learning_rate": 4.992651033928325e-06, "loss": 2.3191, "step": 5003 }, { "epoch": 0.2684549356223176, "grad_norm": 0.37890625, "learning_rate": 4.992644375999638e-06, "loss": 2.2052, "step": 5004 }, { "epoch": 0.2685085836909871, "grad_norm": 0.37109375, "learning_rate": 4.992637715060823e-06, "loss": 2.2283, "step": 5005 }, { "epoch": 0.26856223175965666, "grad_norm": 0.55078125, "learning_rate": 4.99263105111189e-06, "loss": 2.2526, "step": 5006 }, { "epoch": 0.2686158798283262, "grad_norm": 0.35546875, "learning_rate": 4.992624384152847e-06, "loss": 2.1881, "step": 5007 }, { "epoch": 0.2686695278969957, "grad_norm": 0.93359375, "learning_rate": 4.992617714183701e-06, "loss": 2.2417, "step": 5008 }, { "epoch": 0.26872317596566525, "grad_norm": 0.353515625, "learning_rate": 4.99261104120446e-06, "loss": 2.5622, "step": 5009 }, { "epoch": 0.2687768240343348, "grad_norm": 0.3515625, "learning_rate": 4.9926043652151336e-06, "loss": 2.2855, "step": 5010 }, { "epoch": 0.2688304721030043, "grad_norm": 0.35546875, "learning_rate": 4.992597686215728e-06, "loss": 2.6003, "step": 5011 }, { "epoch": 0.26888412017167385, "grad_norm": 0.427734375, "learning_rate": 4.992591004206253e-06, "loss": 2.3076, "step": 5012 }, { "epoch": 0.2689377682403433, "grad_norm": 0.453125, "learning_rate": 4.9925843191867155e-06, "loss": 2.1238, "step": 5013 }, { "epoch": 0.26899141630901285, "grad_norm": 0.369140625, "learning_rate": 4.992577631157123e-06, "loss": 2.4993, "step": 5014 }, { "epoch": 0.2690450643776824, "grad_norm": 0.466796875, "learning_rate": 4.992570940117485e-06, "loss": 2.1892, "step": 5015 }, { "epoch": 0.2690987124463519, "grad_norm": 0.31640625, "learning_rate": 4.992564246067809e-06, "loss": 2.3563, "step": 5016 }, { "epoch": 0.26915236051502145, "grad_norm": 0.3515625, "learning_rate": 4.992557549008102e-06, "loss": 2.1353, "step": 5017 }, { "epoch": 0.269206008583691, "grad_norm": 0.380859375, "learning_rate": 4.992550848938374e-06, "loss": 2.3228, "step": 5018 }, { "epoch": 0.2692596566523605, "grad_norm": 0.37109375, "learning_rate": 4.9925441458586335e-06, "loss": 2.2086, "step": 5019 }, { "epoch": 0.26931330472103004, "grad_norm": 0.8828125, "learning_rate": 4.9925374397688866e-06, "loss": 2.3077, "step": 5020 }, { "epoch": 0.2693669527896996, "grad_norm": 0.8515625, "learning_rate": 4.992530730669141e-06, "loss": 2.2715, "step": 5021 }, { "epoch": 0.2694206008583691, "grad_norm": 0.38671875, "learning_rate": 4.992524018559407e-06, "loss": 2.1349, "step": 5022 }, { "epoch": 0.26947424892703864, "grad_norm": 0.396484375, "learning_rate": 4.992517303439691e-06, "loss": 2.3978, "step": 5023 }, { "epoch": 0.26952789699570817, "grad_norm": 0.3828125, "learning_rate": 4.992510585310003e-06, "loss": 2.3116, "step": 5024 }, { "epoch": 0.2695815450643777, "grad_norm": 0.30859375, "learning_rate": 4.992503864170348e-06, "loss": 2.4622, "step": 5025 }, { "epoch": 0.26963519313304724, "grad_norm": 0.34375, "learning_rate": 4.992497140020738e-06, "loss": 2.2966, "step": 5026 }, { "epoch": 0.2696888412017167, "grad_norm": 0.4453125, "learning_rate": 4.992490412861178e-06, "loss": 2.2699, "step": 5027 }, { "epoch": 0.26974248927038624, "grad_norm": 0.4140625, "learning_rate": 4.992483682691677e-06, "loss": 2.2883, "step": 5028 }, { "epoch": 0.2697961373390558, "grad_norm": 0.3828125, "learning_rate": 4.992476949512244e-06, "loss": 2.3325, "step": 5029 }, { "epoch": 0.2698497854077253, "grad_norm": 0.4140625, "learning_rate": 4.992470213322887e-06, "loss": 2.5481, "step": 5030 }, { "epoch": 0.26990343347639484, "grad_norm": 0.8125, "learning_rate": 4.992463474123612e-06, "loss": 2.3083, "step": 5031 }, { "epoch": 0.26995708154506437, "grad_norm": 0.453125, "learning_rate": 4.99245673191443e-06, "loss": 2.5927, "step": 5032 }, { "epoch": 0.2700107296137339, "grad_norm": 0.96484375, "learning_rate": 4.992449986695348e-06, "loss": 2.3018, "step": 5033 }, { "epoch": 0.27006437768240343, "grad_norm": 0.76953125, "learning_rate": 4.992443238466373e-06, "loss": 2.3058, "step": 5034 }, { "epoch": 0.27011802575107297, "grad_norm": 0.48046875, "learning_rate": 4.9924364872275145e-06, "loss": 2.4437, "step": 5035 }, { "epoch": 0.2701716738197425, "grad_norm": 0.423828125, "learning_rate": 4.992429732978781e-06, "loss": 2.3307, "step": 5036 }, { "epoch": 0.27022532188841203, "grad_norm": 0.3984375, "learning_rate": 4.9924229757201795e-06, "loss": 2.4955, "step": 5037 }, { "epoch": 0.27027896995708156, "grad_norm": 0.369140625, "learning_rate": 4.992416215451718e-06, "loss": 2.3532, "step": 5038 }, { "epoch": 0.2703326180257511, "grad_norm": 0.333984375, "learning_rate": 4.992409452173407e-06, "loss": 2.0064, "step": 5039 }, { "epoch": 0.2703862660944206, "grad_norm": 0.3984375, "learning_rate": 4.992402685885253e-06, "loss": 2.2397, "step": 5040 }, { "epoch": 0.27043991416309016, "grad_norm": 0.376953125, "learning_rate": 4.992395916587263e-06, "loss": 2.2216, "step": 5041 }, { "epoch": 0.27049356223175963, "grad_norm": 0.33984375, "learning_rate": 4.992389144279447e-06, "loss": 2.2126, "step": 5042 }, { "epoch": 0.27054721030042916, "grad_norm": 0.35546875, "learning_rate": 4.992382368961812e-06, "loss": 2.2708, "step": 5043 }, { "epoch": 0.2706008583690987, "grad_norm": 0.4296875, "learning_rate": 4.992375590634367e-06, "loss": 2.5718, "step": 5044 }, { "epoch": 0.2706545064377682, "grad_norm": 1.0234375, "learning_rate": 4.992368809297119e-06, "loss": 1.9375, "step": 5045 }, { "epoch": 0.27070815450643776, "grad_norm": 0.373046875, "learning_rate": 4.992362024950079e-06, "loss": 2.2651, "step": 5046 }, { "epoch": 0.2707618025751073, "grad_norm": 0.353515625, "learning_rate": 4.992355237593252e-06, "loss": 2.2594, "step": 5047 }, { "epoch": 0.2708154506437768, "grad_norm": 0.369140625, "learning_rate": 4.992348447226648e-06, "loss": 2.2116, "step": 5048 }, { "epoch": 0.27086909871244635, "grad_norm": 0.32421875, "learning_rate": 4.9923416538502735e-06, "loss": 1.9138, "step": 5049 }, { "epoch": 0.2709227467811159, "grad_norm": 0.42578125, "learning_rate": 4.9923348574641395e-06, "loss": 2.5674, "step": 5050 }, { "epoch": 0.2709763948497854, "grad_norm": 0.3359375, "learning_rate": 4.992328058068252e-06, "loss": 2.1531, "step": 5051 }, { "epoch": 0.27103004291845495, "grad_norm": 0.419921875, "learning_rate": 4.992321255662619e-06, "loss": 2.4165, "step": 5052 }, { "epoch": 0.2710836909871245, "grad_norm": 0.4296875, "learning_rate": 4.992314450247251e-06, "loss": 2.4065, "step": 5053 }, { "epoch": 0.271137339055794, "grad_norm": 0.427734375, "learning_rate": 4.992307641822154e-06, "loss": 2.3825, "step": 5054 }, { "epoch": 0.27119098712446355, "grad_norm": 0.3984375, "learning_rate": 4.992300830387336e-06, "loss": 2.1139, "step": 5055 }, { "epoch": 0.271244635193133, "grad_norm": 0.421875, "learning_rate": 4.9922940159428066e-06, "loss": 2.483, "step": 5056 }, { "epoch": 0.27129828326180255, "grad_norm": 0.40625, "learning_rate": 4.9922871984885745e-06, "loss": 2.3641, "step": 5057 }, { "epoch": 0.2713519313304721, "grad_norm": 0.76171875, "learning_rate": 4.992280378024646e-06, "loss": 2.349, "step": 5058 }, { "epoch": 0.2714055793991416, "grad_norm": 0.34375, "learning_rate": 4.992273554551031e-06, "loss": 2.5598, "step": 5059 }, { "epoch": 0.27145922746781115, "grad_norm": 0.66015625, "learning_rate": 4.992266728067737e-06, "loss": 2.172, "step": 5060 }, { "epoch": 0.2715128755364807, "grad_norm": 0.482421875, "learning_rate": 4.9922598985747725e-06, "loss": 1.9912, "step": 5061 }, { "epoch": 0.2715665236051502, "grad_norm": 0.369140625, "learning_rate": 4.9922530660721454e-06, "loss": 2.1428, "step": 5062 }, { "epoch": 0.27162017167381974, "grad_norm": 0.3984375, "learning_rate": 4.9922462305598655e-06, "loss": 2.5805, "step": 5063 }, { "epoch": 0.2716738197424893, "grad_norm": 0.375, "learning_rate": 4.992239392037938e-06, "loss": 2.2617, "step": 5064 }, { "epoch": 0.2717274678111588, "grad_norm": 0.42578125, "learning_rate": 4.992232550506373e-06, "loss": 2.4416, "step": 5065 }, { "epoch": 0.27178111587982834, "grad_norm": 0.33984375, "learning_rate": 4.9922257059651794e-06, "loss": 2.5612, "step": 5066 }, { "epoch": 0.27183476394849787, "grad_norm": 0.64453125, "learning_rate": 4.992218858414364e-06, "loss": 2.3146, "step": 5067 }, { "epoch": 0.2718884120171674, "grad_norm": 0.404296875, "learning_rate": 4.992212007853936e-06, "loss": 2.2454, "step": 5068 }, { "epoch": 0.27194206008583693, "grad_norm": 0.357421875, "learning_rate": 4.9922051542839035e-06, "loss": 2.124, "step": 5069 }, { "epoch": 0.2719957081545064, "grad_norm": 0.380859375, "learning_rate": 4.992198297704275e-06, "loss": 2.5205, "step": 5070 }, { "epoch": 0.27204935622317594, "grad_norm": 0.67578125, "learning_rate": 4.992191438115058e-06, "loss": 2.1473, "step": 5071 }, { "epoch": 0.2721030042918455, "grad_norm": 1.4296875, "learning_rate": 4.992184575516261e-06, "loss": 2.111, "step": 5072 }, { "epoch": 0.272156652360515, "grad_norm": 0.40625, "learning_rate": 4.992177709907894e-06, "loss": 2.285, "step": 5073 }, { "epoch": 0.27221030042918454, "grad_norm": 0.31640625, "learning_rate": 4.992170841289963e-06, "loss": 2.2489, "step": 5074 }, { "epoch": 0.27226394849785407, "grad_norm": 0.37109375, "learning_rate": 4.992163969662477e-06, "loss": 2.1996, "step": 5075 }, { "epoch": 0.2723175965665236, "grad_norm": 0.7734375, "learning_rate": 4.992157095025444e-06, "loss": 2.2527, "step": 5076 }, { "epoch": 0.27237124463519313, "grad_norm": 0.361328125, "learning_rate": 4.992150217378873e-06, "loss": 2.3962, "step": 5077 }, { "epoch": 0.27242489270386266, "grad_norm": 0.359375, "learning_rate": 4.992143336722772e-06, "loss": 2.3166, "step": 5078 }, { "epoch": 0.2724785407725322, "grad_norm": 0.361328125, "learning_rate": 4.99213645305715e-06, "loss": 2.1957, "step": 5079 }, { "epoch": 0.27253218884120173, "grad_norm": 0.337890625, "learning_rate": 4.992129566382014e-06, "loss": 2.2714, "step": 5080 }, { "epoch": 0.27258583690987126, "grad_norm": 0.46484375, "learning_rate": 4.992122676697374e-06, "loss": 2.3909, "step": 5081 }, { "epoch": 0.2726394849785408, "grad_norm": 0.3515625, "learning_rate": 4.992115784003237e-06, "loss": 2.2363, "step": 5082 }, { "epoch": 0.2726931330472103, "grad_norm": 0.474609375, "learning_rate": 4.992108888299611e-06, "loss": 2.5571, "step": 5083 }, { "epoch": 0.27274678111587985, "grad_norm": 0.52734375, "learning_rate": 4.9921019895865055e-06, "loss": 2.1576, "step": 5084 }, { "epoch": 0.27280042918454933, "grad_norm": 0.3125, "learning_rate": 4.9920950878639275e-06, "loss": 2.2718, "step": 5085 }, { "epoch": 0.27285407725321886, "grad_norm": 0.416015625, "learning_rate": 4.9920881831318865e-06, "loss": 2.2929, "step": 5086 }, { "epoch": 0.2729077253218884, "grad_norm": 0.353515625, "learning_rate": 4.9920812753903915e-06, "loss": 2.1213, "step": 5087 }, { "epoch": 0.2729613733905579, "grad_norm": 0.359375, "learning_rate": 4.992074364639449e-06, "loss": 2.3679, "step": 5088 }, { "epoch": 0.27301502145922746, "grad_norm": 0.33984375, "learning_rate": 4.992067450879068e-06, "loss": 2.1537, "step": 5089 }, { "epoch": 0.273068669527897, "grad_norm": 0.4296875, "learning_rate": 4.992060534109257e-06, "loss": 2.6034, "step": 5090 }, { "epoch": 0.2731223175965665, "grad_norm": 0.38671875, "learning_rate": 4.992053614330025e-06, "loss": 2.4404, "step": 5091 }, { "epoch": 0.27317596566523605, "grad_norm": 0.5078125, "learning_rate": 4.992046691541379e-06, "loss": 2.4837, "step": 5092 }, { "epoch": 0.2732296137339056, "grad_norm": 0.37890625, "learning_rate": 4.992039765743328e-06, "loss": 2.3586, "step": 5093 }, { "epoch": 0.2732832618025751, "grad_norm": 0.486328125, "learning_rate": 4.9920328369358815e-06, "loss": 2.599, "step": 5094 }, { "epoch": 0.27333690987124465, "grad_norm": 0.365234375, "learning_rate": 4.9920259051190455e-06, "loss": 2.553, "step": 5095 }, { "epoch": 0.2733905579399142, "grad_norm": 0.53125, "learning_rate": 4.99201897029283e-06, "loss": 2.3297, "step": 5096 }, { "epoch": 0.2734442060085837, "grad_norm": 0.357421875, "learning_rate": 4.992012032457244e-06, "loss": 2.3599, "step": 5097 }, { "epoch": 0.27349785407725324, "grad_norm": 0.365234375, "learning_rate": 4.9920050916122945e-06, "loss": 2.3948, "step": 5098 }, { "epoch": 0.2735515021459227, "grad_norm": 0.3984375, "learning_rate": 4.9919981477579894e-06, "loss": 2.1227, "step": 5099 }, { "epoch": 0.27360515021459225, "grad_norm": 0.39453125, "learning_rate": 4.991991200894339e-06, "loss": 2.0401, "step": 5100 }, { "epoch": 0.2736587982832618, "grad_norm": 0.404296875, "learning_rate": 4.99198425102135e-06, "loss": 2.2095, "step": 5101 }, { "epoch": 0.2737124463519313, "grad_norm": 0.375, "learning_rate": 4.991977298139032e-06, "loss": 2.0839, "step": 5102 }, { "epoch": 0.27376609442060085, "grad_norm": 0.388671875, "learning_rate": 4.991970342247393e-06, "loss": 2.2977, "step": 5103 }, { "epoch": 0.2738197424892704, "grad_norm": 0.400390625, "learning_rate": 4.991963383346441e-06, "loss": 2.4523, "step": 5104 }, { "epoch": 0.2738733905579399, "grad_norm": 0.369140625, "learning_rate": 4.991956421436184e-06, "loss": 2.4114, "step": 5105 }, { "epoch": 0.27392703862660944, "grad_norm": 0.43359375, "learning_rate": 4.991949456516632e-06, "loss": 2.3609, "step": 5106 }, { "epoch": 0.273980686695279, "grad_norm": 0.4609375, "learning_rate": 4.991942488587792e-06, "loss": 2.635, "step": 5107 }, { "epoch": 0.2740343347639485, "grad_norm": 0.384765625, "learning_rate": 4.991935517649673e-06, "loss": 1.987, "step": 5108 }, { "epoch": 0.27408798283261804, "grad_norm": 0.52734375, "learning_rate": 4.991928543702284e-06, "loss": 2.3427, "step": 5109 }, { "epoch": 0.27414163090128757, "grad_norm": 0.404296875, "learning_rate": 4.991921566745632e-06, "loss": 2.262, "step": 5110 }, { "epoch": 0.2741952789699571, "grad_norm": 0.9453125, "learning_rate": 4.991914586779727e-06, "loss": 2.4553, "step": 5111 }, { "epoch": 0.27424892703862663, "grad_norm": 0.41796875, "learning_rate": 4.991907603804576e-06, "loss": 2.3784, "step": 5112 }, { "epoch": 0.27430257510729616, "grad_norm": 0.373046875, "learning_rate": 4.9919006178201875e-06, "loss": 2.3033, "step": 5113 }, { "epoch": 0.27435622317596564, "grad_norm": 0.6796875, "learning_rate": 4.991893628826571e-06, "loss": 2.2389, "step": 5114 }, { "epoch": 0.2744098712446352, "grad_norm": 0.6328125, "learning_rate": 4.991886636823734e-06, "loss": 2.431, "step": 5115 }, { "epoch": 0.2744635193133047, "grad_norm": 0.3984375, "learning_rate": 4.991879641811686e-06, "loss": 2.3363, "step": 5116 }, { "epoch": 0.27451716738197424, "grad_norm": 0.3984375, "learning_rate": 4.991872643790435e-06, "loss": 2.2569, "step": 5117 }, { "epoch": 0.27457081545064377, "grad_norm": 0.396484375, "learning_rate": 4.9918656427599886e-06, "loss": 2.2352, "step": 5118 }, { "epoch": 0.2746244635193133, "grad_norm": 0.41015625, "learning_rate": 4.991858638720356e-06, "loss": 2.1851, "step": 5119 }, { "epoch": 0.27467811158798283, "grad_norm": 0.353515625, "learning_rate": 4.991851631671546e-06, "loss": 2.2586, "step": 5120 }, { "epoch": 0.27473175965665236, "grad_norm": 0.408203125, "learning_rate": 4.991844621613566e-06, "loss": 2.3996, "step": 5121 }, { "epoch": 0.2747854077253219, "grad_norm": 0.53515625, "learning_rate": 4.991837608546426e-06, "loss": 2.1715, "step": 5122 }, { "epoch": 0.2748390557939914, "grad_norm": 0.435546875, "learning_rate": 4.991830592470132e-06, "loss": 2.4366, "step": 5123 }, { "epoch": 0.27489270386266096, "grad_norm": 0.447265625, "learning_rate": 4.991823573384695e-06, "loss": 2.4054, "step": 5124 }, { "epoch": 0.2749463519313305, "grad_norm": 0.3359375, "learning_rate": 4.991816551290124e-06, "loss": 2.3254, "step": 5125 }, { "epoch": 0.275, "grad_norm": 0.375, "learning_rate": 4.991809526186424e-06, "loss": 2.3396, "step": 5126 }, { "epoch": 0.27505364806866955, "grad_norm": 0.466796875, "learning_rate": 4.991802498073606e-06, "loss": 2.2835, "step": 5127 }, { "epoch": 0.27510729613733903, "grad_norm": 0.396484375, "learning_rate": 4.991795466951678e-06, "loss": 2.2784, "step": 5128 }, { "epoch": 0.27516094420600856, "grad_norm": 0.392578125, "learning_rate": 4.9917884328206485e-06, "loss": 2.0393, "step": 5129 }, { "epoch": 0.2752145922746781, "grad_norm": 0.404296875, "learning_rate": 4.991781395680526e-06, "loss": 2.626, "step": 5130 }, { "epoch": 0.2752682403433476, "grad_norm": 0.361328125, "learning_rate": 4.991774355531319e-06, "loss": 2.0335, "step": 5131 }, { "epoch": 0.27532188841201716, "grad_norm": 0.63671875, "learning_rate": 4.991767312373036e-06, "loss": 2.3117, "step": 5132 }, { "epoch": 0.2753755364806867, "grad_norm": 0.36328125, "learning_rate": 4.991760266205685e-06, "loss": 2.3114, "step": 5133 }, { "epoch": 0.2754291845493562, "grad_norm": 5.15625, "learning_rate": 4.9917532170292745e-06, "loss": 1.8617, "step": 5134 }, { "epoch": 0.27548283261802575, "grad_norm": 0.375, "learning_rate": 4.991746164843814e-06, "loss": 2.2252, "step": 5135 }, { "epoch": 0.2755364806866953, "grad_norm": 1.109375, "learning_rate": 4.991739109649313e-06, "loss": 2.4509, "step": 5136 }, { "epoch": 0.2755901287553648, "grad_norm": 0.46875, "learning_rate": 4.991732051445777e-06, "loss": 2.1235, "step": 5137 }, { "epoch": 0.27564377682403435, "grad_norm": 0.39453125, "learning_rate": 4.991724990233216e-06, "loss": 2.3104, "step": 5138 }, { "epoch": 0.2756974248927039, "grad_norm": 0.34375, "learning_rate": 4.991717926011639e-06, "loss": 2.2875, "step": 5139 }, { "epoch": 0.2757510729613734, "grad_norm": 0.447265625, "learning_rate": 4.9917108587810535e-06, "loss": 2.3702, "step": 5140 }, { "epoch": 0.27580472103004294, "grad_norm": 0.361328125, "learning_rate": 4.991703788541469e-06, "loss": 2.3943, "step": 5141 }, { "epoch": 0.2758583690987124, "grad_norm": 0.341796875, "learning_rate": 4.991696715292894e-06, "loss": 2.2307, "step": 5142 }, { "epoch": 0.27591201716738195, "grad_norm": 0.439453125, "learning_rate": 4.991689639035337e-06, "loss": 2.4676, "step": 5143 }, { "epoch": 0.2759656652360515, "grad_norm": 0.43359375, "learning_rate": 4.991682559768805e-06, "loss": 2.2842, "step": 5144 }, { "epoch": 0.276019313304721, "grad_norm": 0.35546875, "learning_rate": 4.9916754774933085e-06, "loss": 2.3996, "step": 5145 }, { "epoch": 0.27607296137339055, "grad_norm": 0.5859375, "learning_rate": 4.991668392208855e-06, "loss": 2.2499, "step": 5146 }, { "epoch": 0.2761266094420601, "grad_norm": 0.337890625, "learning_rate": 4.991661303915454e-06, "loss": 2.3632, "step": 5147 }, { "epoch": 0.2761802575107296, "grad_norm": 0.4453125, "learning_rate": 4.991654212613113e-06, "loss": 2.1815, "step": 5148 }, { "epoch": 0.27623390557939914, "grad_norm": 0.412109375, "learning_rate": 4.991647118301842e-06, "loss": 2.1539, "step": 5149 }, { "epoch": 0.2762875536480687, "grad_norm": 1.1015625, "learning_rate": 4.991640020981647e-06, "loss": 2.491, "step": 5150 }, { "epoch": 0.2763412017167382, "grad_norm": 0.34765625, "learning_rate": 4.9916329206525395e-06, "loss": 2.3095, "step": 5151 }, { "epoch": 0.27639484978540774, "grad_norm": 0.396484375, "learning_rate": 4.991625817314526e-06, "loss": 2.4221, "step": 5152 }, { "epoch": 0.27644849785407727, "grad_norm": 0.6484375, "learning_rate": 4.991618710967616e-06, "loss": 2.4298, "step": 5153 }, { "epoch": 0.2765021459227468, "grad_norm": 0.361328125, "learning_rate": 4.9916116016118185e-06, "loss": 2.3836, "step": 5154 }, { "epoch": 0.27655579399141633, "grad_norm": 0.361328125, "learning_rate": 4.991604489247141e-06, "loss": 2.246, "step": 5155 }, { "epoch": 0.27660944206008586, "grad_norm": 0.412109375, "learning_rate": 4.991597373873592e-06, "loss": 2.3317, "step": 5156 }, { "epoch": 0.27666309012875534, "grad_norm": 0.412109375, "learning_rate": 4.991590255491182e-06, "loss": 2.656, "step": 5157 }, { "epoch": 0.27671673819742487, "grad_norm": 0.365234375, "learning_rate": 4.9915831340999175e-06, "loss": 2.4295, "step": 5158 }, { "epoch": 0.2767703862660944, "grad_norm": 0.46875, "learning_rate": 4.991576009699808e-06, "loss": 2.4294, "step": 5159 }, { "epoch": 0.27682403433476394, "grad_norm": 0.33203125, "learning_rate": 4.991568882290861e-06, "loss": 2.3048, "step": 5160 }, { "epoch": 0.27687768240343347, "grad_norm": 0.375, "learning_rate": 4.991561751873087e-06, "loss": 2.4851, "step": 5161 }, { "epoch": 0.276931330472103, "grad_norm": 0.466796875, "learning_rate": 4.991554618446494e-06, "loss": 2.5925, "step": 5162 }, { "epoch": 0.27698497854077253, "grad_norm": 0.359375, "learning_rate": 4.99154748201109e-06, "loss": 2.1018, "step": 5163 }, { "epoch": 0.27703862660944206, "grad_norm": 0.361328125, "learning_rate": 4.991540342566884e-06, "loss": 2.3189, "step": 5164 }, { "epoch": 0.2770922746781116, "grad_norm": 0.48046875, "learning_rate": 4.991533200113884e-06, "loss": 2.3458, "step": 5165 }, { "epoch": 0.2771459227467811, "grad_norm": 1.296875, "learning_rate": 4.9915260546521e-06, "loss": 2.002, "step": 5166 }, { "epoch": 0.27719957081545066, "grad_norm": 0.341796875, "learning_rate": 4.99151890618154e-06, "loss": 2.3168, "step": 5167 }, { "epoch": 0.2772532188841202, "grad_norm": 0.37890625, "learning_rate": 4.991511754702211e-06, "loss": 2.0718, "step": 5168 }, { "epoch": 0.2773068669527897, "grad_norm": 0.55078125, "learning_rate": 4.991504600214124e-06, "loss": 2.5256, "step": 5169 }, { "epoch": 0.27736051502145925, "grad_norm": 0.38671875, "learning_rate": 4.991497442717287e-06, "loss": 2.541, "step": 5170 }, { "epoch": 0.27741416309012873, "grad_norm": 0.359375, "learning_rate": 4.991490282211707e-06, "loss": 2.26, "step": 5171 }, { "epoch": 0.27746781115879826, "grad_norm": 0.37109375, "learning_rate": 4.991483118697396e-06, "loss": 2.275, "step": 5172 }, { "epoch": 0.2775214592274678, "grad_norm": 0.3984375, "learning_rate": 4.99147595217436e-06, "loss": 2.2902, "step": 5173 }, { "epoch": 0.2775751072961373, "grad_norm": 0.37890625, "learning_rate": 4.9914687826426075e-06, "loss": 2.3622, "step": 5174 }, { "epoch": 0.27762875536480686, "grad_norm": 0.423828125, "learning_rate": 4.991461610102148e-06, "loss": 2.3679, "step": 5175 }, { "epoch": 0.2776824034334764, "grad_norm": 0.341796875, "learning_rate": 4.991454434552991e-06, "loss": 2.2834, "step": 5176 }, { "epoch": 0.2777360515021459, "grad_norm": 0.435546875, "learning_rate": 4.991447255995144e-06, "loss": 2.1363, "step": 5177 }, { "epoch": 0.27778969957081545, "grad_norm": 0.44921875, "learning_rate": 4.991440074428615e-06, "loss": 2.2917, "step": 5178 }, { "epoch": 0.277843347639485, "grad_norm": 0.486328125, "learning_rate": 4.991432889853414e-06, "loss": 2.1723, "step": 5179 }, { "epoch": 0.2778969957081545, "grad_norm": 0.392578125, "learning_rate": 4.99142570226955e-06, "loss": 2.5419, "step": 5180 }, { "epoch": 0.27795064377682405, "grad_norm": 0.37109375, "learning_rate": 4.991418511677031e-06, "loss": 2.5307, "step": 5181 }, { "epoch": 0.2780042918454936, "grad_norm": 0.37890625, "learning_rate": 4.991411318075865e-06, "loss": 2.3334, "step": 5182 }, { "epoch": 0.2780579399141631, "grad_norm": 0.4296875, "learning_rate": 4.9914041214660615e-06, "loss": 2.152, "step": 5183 }, { "epoch": 0.27811158798283264, "grad_norm": 0.376953125, "learning_rate": 4.99139692184763e-06, "loss": 2.1475, "step": 5184 }, { "epoch": 0.2781652360515021, "grad_norm": 0.4140625, "learning_rate": 4.9913897192205765e-06, "loss": 2.5287, "step": 5185 }, { "epoch": 0.27821888412017165, "grad_norm": 0.416015625, "learning_rate": 4.991382513584912e-06, "loss": 2.3435, "step": 5186 }, { "epoch": 0.2782725321888412, "grad_norm": 0.390625, "learning_rate": 4.991375304940646e-06, "loss": 2.3325, "step": 5187 }, { "epoch": 0.2783261802575107, "grad_norm": 0.37890625, "learning_rate": 4.991368093287785e-06, "loss": 2.2279, "step": 5188 }, { "epoch": 0.27837982832618025, "grad_norm": 0.609375, "learning_rate": 4.991360878626338e-06, "loss": 2.4165, "step": 5189 }, { "epoch": 0.2784334763948498, "grad_norm": 0.337890625, "learning_rate": 4.991353660956315e-06, "loss": 1.9797, "step": 5190 }, { "epoch": 0.2784871244635193, "grad_norm": 0.341796875, "learning_rate": 4.991346440277723e-06, "loss": 2.3166, "step": 5191 }, { "epoch": 0.27854077253218884, "grad_norm": 0.361328125, "learning_rate": 4.9913392165905725e-06, "loss": 2.3497, "step": 5192 }, { "epoch": 0.2785944206008584, "grad_norm": 0.365234375, "learning_rate": 4.991331989894872e-06, "loss": 2.3766, "step": 5193 }, { "epoch": 0.2786480686695279, "grad_norm": 0.376953125, "learning_rate": 4.991324760190629e-06, "loss": 2.3642, "step": 5194 }, { "epoch": 0.27870171673819744, "grad_norm": 0.50390625, "learning_rate": 4.9913175274778525e-06, "loss": 2.445, "step": 5195 }, { "epoch": 0.27875536480686697, "grad_norm": 0.447265625, "learning_rate": 4.991310291756551e-06, "loss": 2.3595, "step": 5196 }, { "epoch": 0.2788090128755365, "grad_norm": 0.4140625, "learning_rate": 4.991303053026736e-06, "loss": 2.4638, "step": 5197 }, { "epoch": 0.27886266094420603, "grad_norm": 0.55078125, "learning_rate": 4.991295811288412e-06, "loss": 1.6884, "step": 5198 }, { "epoch": 0.27891630901287556, "grad_norm": 0.412109375, "learning_rate": 4.991288566541591e-06, "loss": 2.5506, "step": 5199 }, { "epoch": 0.27896995708154504, "grad_norm": 0.6484375, "learning_rate": 4.9912813187862805e-06, "loss": 2.0569, "step": 5200 }, { "epoch": 0.27902360515021457, "grad_norm": 0.330078125, "learning_rate": 4.991274068022489e-06, "loss": 2.2476, "step": 5201 }, { "epoch": 0.2790772532188841, "grad_norm": 0.6953125, "learning_rate": 4.991266814250226e-06, "loss": 2.2615, "step": 5202 }, { "epoch": 0.27913090128755363, "grad_norm": 0.486328125, "learning_rate": 4.9912595574695e-06, "loss": 2.4643, "step": 5203 }, { "epoch": 0.27918454935622317, "grad_norm": 0.35546875, "learning_rate": 4.991252297680319e-06, "loss": 2.282, "step": 5204 }, { "epoch": 0.2792381974248927, "grad_norm": 0.345703125, "learning_rate": 4.9912450348826925e-06, "loss": 1.9604, "step": 5205 }, { "epoch": 0.27929184549356223, "grad_norm": 0.36328125, "learning_rate": 4.991237769076629e-06, "loss": 2.3608, "step": 5206 }, { "epoch": 0.27934549356223176, "grad_norm": 0.404296875, "learning_rate": 4.991230500262139e-06, "loss": 2.3651, "step": 5207 }, { "epoch": 0.2793991416309013, "grad_norm": 0.361328125, "learning_rate": 4.991223228439228e-06, "loss": 1.7528, "step": 5208 }, { "epoch": 0.2794527896995708, "grad_norm": 0.419921875, "learning_rate": 4.9912159536079066e-06, "loss": 2.3706, "step": 5209 }, { "epoch": 0.27950643776824036, "grad_norm": 0.4609375, "learning_rate": 4.991208675768184e-06, "loss": 2.2193, "step": 5210 }, { "epoch": 0.2795600858369099, "grad_norm": 0.443359375, "learning_rate": 4.991201394920068e-06, "loss": 2.4904, "step": 5211 }, { "epoch": 0.2796137339055794, "grad_norm": 0.4453125, "learning_rate": 4.991194111063569e-06, "loss": 2.1136, "step": 5212 }, { "epoch": 0.27966738197424895, "grad_norm": 0.35546875, "learning_rate": 4.991186824198693e-06, "loss": 2.3178, "step": 5213 }, { "epoch": 0.27972103004291843, "grad_norm": 0.4765625, "learning_rate": 4.9911795343254515e-06, "loss": 2.3938, "step": 5214 }, { "epoch": 0.27977467811158796, "grad_norm": 0.87890625, "learning_rate": 4.991172241443852e-06, "loss": 2.5493, "step": 5215 }, { "epoch": 0.2798283261802575, "grad_norm": 0.30078125, "learning_rate": 4.991164945553904e-06, "loss": 2.1791, "step": 5216 }, { "epoch": 0.279881974248927, "grad_norm": 0.34375, "learning_rate": 4.991157646655616e-06, "loss": 2.1422, "step": 5217 }, { "epoch": 0.27993562231759656, "grad_norm": 0.357421875, "learning_rate": 4.991150344748995e-06, "loss": 2.2829, "step": 5218 }, { "epoch": 0.2799892703862661, "grad_norm": 0.375, "learning_rate": 4.991143039834053e-06, "loss": 2.3739, "step": 5219 }, { "epoch": 0.2800429184549356, "grad_norm": 0.33203125, "learning_rate": 4.991135731910796e-06, "loss": 2.1085, "step": 5220 }, { "epoch": 0.28009656652360515, "grad_norm": 0.421875, "learning_rate": 4.991128420979237e-06, "loss": 2.1136, "step": 5221 }, { "epoch": 0.2801502145922747, "grad_norm": 0.466796875, "learning_rate": 4.991121107039379e-06, "loss": 2.423, "step": 5222 }, { "epoch": 0.2802038626609442, "grad_norm": 0.49609375, "learning_rate": 4.991113790091235e-06, "loss": 2.666, "step": 5223 }, { "epoch": 0.28025751072961375, "grad_norm": 0.5078125, "learning_rate": 4.991106470134813e-06, "loss": 2.2345, "step": 5224 }, { "epoch": 0.2803111587982833, "grad_norm": 0.369140625, "learning_rate": 4.9910991471701206e-06, "loss": 2.1098, "step": 5225 }, { "epoch": 0.2803648068669528, "grad_norm": 0.380859375, "learning_rate": 4.991091821197168e-06, "loss": 2.0164, "step": 5226 }, { "epoch": 0.28041845493562234, "grad_norm": 0.3671875, "learning_rate": 4.991084492215964e-06, "loss": 2.3235, "step": 5227 }, { "epoch": 0.2804721030042919, "grad_norm": 0.310546875, "learning_rate": 4.991077160226517e-06, "loss": 2.1892, "step": 5228 }, { "epoch": 0.28052575107296135, "grad_norm": 0.33984375, "learning_rate": 4.991069825228835e-06, "loss": 2.3213, "step": 5229 }, { "epoch": 0.2805793991416309, "grad_norm": 6.03125, "learning_rate": 4.991062487222928e-06, "loss": 2.1957, "step": 5230 }, { "epoch": 0.2806330472103004, "grad_norm": 0.37890625, "learning_rate": 4.9910551462088056e-06, "loss": 2.3296, "step": 5231 }, { "epoch": 0.28068669527896994, "grad_norm": 0.373046875, "learning_rate": 4.991047802186475e-06, "loss": 2.4027, "step": 5232 }, { "epoch": 0.2807403433476395, "grad_norm": 0.4140625, "learning_rate": 4.991040455155946e-06, "loss": 2.4158, "step": 5233 }, { "epoch": 0.280793991416309, "grad_norm": 0.408203125, "learning_rate": 4.991033105117227e-06, "loss": 2.2126, "step": 5234 }, { "epoch": 0.28084763948497854, "grad_norm": 0.408203125, "learning_rate": 4.991025752070326e-06, "loss": 2.544, "step": 5235 }, { "epoch": 0.28090128755364807, "grad_norm": 0.51171875, "learning_rate": 4.991018396015254e-06, "loss": 2.4182, "step": 5236 }, { "epoch": 0.2809549356223176, "grad_norm": 0.388671875, "learning_rate": 4.991011036952019e-06, "loss": 2.37, "step": 5237 }, { "epoch": 0.28100858369098713, "grad_norm": 0.36328125, "learning_rate": 4.991003674880629e-06, "loss": 2.2613, "step": 5238 }, { "epoch": 0.28106223175965667, "grad_norm": 0.400390625, "learning_rate": 4.990996309801095e-06, "loss": 2.3923, "step": 5239 }, { "epoch": 0.2811158798283262, "grad_norm": 0.462890625, "learning_rate": 4.990988941713424e-06, "loss": 1.8826, "step": 5240 }, { "epoch": 0.28116952789699573, "grad_norm": 1.390625, "learning_rate": 4.990981570617624e-06, "loss": 2.3908, "step": 5241 }, { "epoch": 0.28122317596566526, "grad_norm": 0.58984375, "learning_rate": 4.990974196513707e-06, "loss": 2.2539, "step": 5242 }, { "epoch": 0.28127682403433474, "grad_norm": 0.3828125, "learning_rate": 4.990966819401679e-06, "loss": 2.5446, "step": 5243 }, { "epoch": 0.28133047210300427, "grad_norm": 0.326171875, "learning_rate": 4.990959439281551e-06, "loss": 2.0298, "step": 5244 }, { "epoch": 0.2813841201716738, "grad_norm": 0.3671875, "learning_rate": 4.990952056153331e-06, "loss": 2.3428, "step": 5245 }, { "epoch": 0.28143776824034333, "grad_norm": 0.345703125, "learning_rate": 4.990944670017028e-06, "loss": 2.2663, "step": 5246 }, { "epoch": 0.28149141630901287, "grad_norm": 0.49609375, "learning_rate": 4.990937280872651e-06, "loss": 2.4004, "step": 5247 }, { "epoch": 0.2815450643776824, "grad_norm": 0.3046875, "learning_rate": 4.990929888720208e-06, "loss": 2.387, "step": 5248 }, { "epoch": 0.28159871244635193, "grad_norm": 0.322265625, "learning_rate": 4.990922493559709e-06, "loss": 2.2684, "step": 5249 }, { "epoch": 0.28165236051502146, "grad_norm": 0.546875, "learning_rate": 4.990915095391163e-06, "loss": 2.2317, "step": 5250 }, { "epoch": 0.281706008583691, "grad_norm": 0.76953125, "learning_rate": 4.990907694214578e-06, "loss": 2.4378, "step": 5251 }, { "epoch": 0.2817596566523605, "grad_norm": 0.3984375, "learning_rate": 4.9909002900299646e-06, "loss": 2.2003, "step": 5252 }, { "epoch": 0.28181330472103006, "grad_norm": 0.349609375, "learning_rate": 4.99089288283733e-06, "loss": 2.2571, "step": 5253 }, { "epoch": 0.2818669527896996, "grad_norm": 0.388671875, "learning_rate": 4.990885472636684e-06, "loss": 2.3789, "step": 5254 }, { "epoch": 0.2819206008583691, "grad_norm": 0.376953125, "learning_rate": 4.9908780594280355e-06, "loss": 2.3594, "step": 5255 }, { "epoch": 0.28197424892703865, "grad_norm": 0.41015625, "learning_rate": 4.990870643211393e-06, "loss": 2.2882, "step": 5256 }, { "epoch": 0.2820278969957081, "grad_norm": 0.333984375, "learning_rate": 4.990863223986766e-06, "loss": 2.3038, "step": 5257 }, { "epoch": 0.28208154506437766, "grad_norm": 0.34375, "learning_rate": 4.990855801754163e-06, "loss": 2.4229, "step": 5258 }, { "epoch": 0.2821351931330472, "grad_norm": 0.384765625, "learning_rate": 4.990848376513593e-06, "loss": 2.4497, "step": 5259 }, { "epoch": 0.2821888412017167, "grad_norm": 0.3828125, "learning_rate": 4.990840948265066e-06, "loss": 2.4063, "step": 5260 }, { "epoch": 0.28224248927038625, "grad_norm": 0.384765625, "learning_rate": 4.990833517008589e-06, "loss": 2.2818, "step": 5261 }, { "epoch": 0.2822961373390558, "grad_norm": 0.4375, "learning_rate": 4.990826082744173e-06, "loss": 2.2936, "step": 5262 }, { "epoch": 0.2823497854077253, "grad_norm": 0.369140625, "learning_rate": 4.990818645471826e-06, "loss": 2.6917, "step": 5263 }, { "epoch": 0.28240343347639485, "grad_norm": 0.3828125, "learning_rate": 4.990811205191557e-06, "loss": 2.3651, "step": 5264 }, { "epoch": 0.2824570815450644, "grad_norm": 0.31640625, "learning_rate": 4.990803761903374e-06, "loss": 2.3512, "step": 5265 }, { "epoch": 0.2825107296137339, "grad_norm": 0.443359375, "learning_rate": 4.990796315607289e-06, "loss": 2.6194, "step": 5266 }, { "epoch": 0.28256437768240344, "grad_norm": 0.453125, "learning_rate": 4.990788866303308e-06, "loss": 1.801, "step": 5267 }, { "epoch": 0.282618025751073, "grad_norm": 0.81640625, "learning_rate": 4.990781413991441e-06, "loss": 2.1371, "step": 5268 }, { "epoch": 0.2826716738197425, "grad_norm": 0.37890625, "learning_rate": 4.990773958671697e-06, "loss": 2.1945, "step": 5269 }, { "epoch": 0.28272532188841204, "grad_norm": 0.357421875, "learning_rate": 4.990766500344085e-06, "loss": 2.4712, "step": 5270 }, { "epoch": 0.28277896995708157, "grad_norm": 0.42578125, "learning_rate": 4.990759039008613e-06, "loss": 2.3202, "step": 5271 }, { "epoch": 0.28283261802575105, "grad_norm": 0.396484375, "learning_rate": 4.990751574665293e-06, "loss": 2.2633, "step": 5272 }, { "epoch": 0.2828862660944206, "grad_norm": 0.435546875, "learning_rate": 4.990744107314132e-06, "loss": 2.1961, "step": 5273 }, { "epoch": 0.2829399141630901, "grad_norm": 0.39453125, "learning_rate": 4.990736636955138e-06, "loss": 2.4144, "step": 5274 }, { "epoch": 0.28299356223175964, "grad_norm": 0.4609375, "learning_rate": 4.9907291635883215e-06, "loss": 2.2331, "step": 5275 }, { "epoch": 0.2830472103004292, "grad_norm": 0.384765625, "learning_rate": 4.990721687213691e-06, "loss": 2.2926, "step": 5276 }, { "epoch": 0.2831008583690987, "grad_norm": 0.404296875, "learning_rate": 4.990714207831255e-06, "loss": 2.4427, "step": 5277 }, { "epoch": 0.28315450643776824, "grad_norm": 0.3671875, "learning_rate": 4.990706725441025e-06, "loss": 2.324, "step": 5278 }, { "epoch": 0.28320815450643777, "grad_norm": 0.451171875, "learning_rate": 4.9906992400430065e-06, "loss": 2.4456, "step": 5279 }, { "epoch": 0.2832618025751073, "grad_norm": 0.34375, "learning_rate": 4.990691751637211e-06, "loss": 2.2946, "step": 5280 }, { "epoch": 0.28331545064377683, "grad_norm": 0.33203125, "learning_rate": 4.990684260223647e-06, "loss": 2.1017, "step": 5281 }, { "epoch": 0.28336909871244637, "grad_norm": 0.439453125, "learning_rate": 4.990676765802322e-06, "loss": 2.3399, "step": 5282 }, { "epoch": 0.2834227467811159, "grad_norm": 0.3515625, "learning_rate": 4.990669268373248e-06, "loss": 2.3393, "step": 5283 }, { "epoch": 0.28347639484978543, "grad_norm": 0.43359375, "learning_rate": 4.9906617679364324e-06, "loss": 2.2261, "step": 5284 }, { "epoch": 0.28353004291845496, "grad_norm": 0.326171875, "learning_rate": 4.9906542644918835e-06, "loss": 2.1563, "step": 5285 }, { "epoch": 0.28358369098712444, "grad_norm": 0.6015625, "learning_rate": 4.990646758039611e-06, "loss": 2.413, "step": 5286 }, { "epoch": 0.28363733905579397, "grad_norm": 0.44140625, "learning_rate": 4.990639248579625e-06, "loss": 2.4587, "step": 5287 }, { "epoch": 0.2836909871244635, "grad_norm": 0.3671875, "learning_rate": 4.990631736111933e-06, "loss": 2.3553, "step": 5288 }, { "epoch": 0.28374463519313303, "grad_norm": 0.33984375, "learning_rate": 4.9906242206365456e-06, "loss": 2.1125, "step": 5289 }, { "epoch": 0.28379828326180256, "grad_norm": 0.423828125, "learning_rate": 4.9906167021534704e-06, "loss": 2.2254, "step": 5290 }, { "epoch": 0.2838519313304721, "grad_norm": 0.361328125, "learning_rate": 4.990609180662718e-06, "loss": 2.2485, "step": 5291 }, { "epoch": 0.2839055793991416, "grad_norm": 0.392578125, "learning_rate": 4.990601656164296e-06, "loss": 2.464, "step": 5292 }, { "epoch": 0.28395922746781116, "grad_norm": 0.375, "learning_rate": 4.990594128658214e-06, "loss": 2.264, "step": 5293 }, { "epoch": 0.2840128755364807, "grad_norm": 0.330078125, "learning_rate": 4.990586598144481e-06, "loss": 2.1891, "step": 5294 }, { "epoch": 0.2840665236051502, "grad_norm": 0.40234375, "learning_rate": 4.990579064623106e-06, "loss": 2.4889, "step": 5295 }, { "epoch": 0.28412017167381975, "grad_norm": 0.365234375, "learning_rate": 4.990571528094099e-06, "loss": 2.3945, "step": 5296 }, { "epoch": 0.2841738197424893, "grad_norm": 0.3515625, "learning_rate": 4.990563988557468e-06, "loss": 2.2729, "step": 5297 }, { "epoch": 0.2842274678111588, "grad_norm": 0.63671875, "learning_rate": 4.990556446013224e-06, "loss": 1.7309, "step": 5298 }, { "epoch": 0.28428111587982835, "grad_norm": 0.369140625, "learning_rate": 4.990548900461374e-06, "loss": 2.2202, "step": 5299 }, { "epoch": 0.2843347639484979, "grad_norm": 0.390625, "learning_rate": 4.990541351901927e-06, "loss": 2.1883, "step": 5300 }, { "epoch": 0.28438841201716736, "grad_norm": 0.376953125, "learning_rate": 4.990533800334894e-06, "loss": 2.522, "step": 5301 }, { "epoch": 0.2844420600858369, "grad_norm": 0.392578125, "learning_rate": 4.9905262457602825e-06, "loss": 2.241, "step": 5302 }, { "epoch": 0.2844957081545064, "grad_norm": 0.451171875, "learning_rate": 4.990518688178101e-06, "loss": 2.3944, "step": 5303 }, { "epoch": 0.28454935622317595, "grad_norm": 0.359375, "learning_rate": 4.990511127588362e-06, "loss": 2.2611, "step": 5304 }, { "epoch": 0.2846030042918455, "grad_norm": 0.90625, "learning_rate": 4.990503563991071e-06, "loss": 2.4663, "step": 5305 }, { "epoch": 0.284656652360515, "grad_norm": 0.3828125, "learning_rate": 4.990495997386239e-06, "loss": 2.5073, "step": 5306 }, { "epoch": 0.28471030042918455, "grad_norm": 1.1328125, "learning_rate": 4.990488427773874e-06, "loss": 1.8438, "step": 5307 }, { "epoch": 0.2847639484978541, "grad_norm": 2.875, "learning_rate": 4.990480855153988e-06, "loss": 2.3314, "step": 5308 }, { "epoch": 0.2848175965665236, "grad_norm": 0.40234375, "learning_rate": 4.990473279526585e-06, "loss": 2.4014, "step": 5309 }, { "epoch": 0.28487124463519314, "grad_norm": 0.306640625, "learning_rate": 4.990465700891679e-06, "loss": 2.0302, "step": 5310 }, { "epoch": 0.2849248927038627, "grad_norm": 0.462890625, "learning_rate": 4.990458119249278e-06, "loss": 2.4262, "step": 5311 }, { "epoch": 0.2849785407725322, "grad_norm": 1.0546875, "learning_rate": 4.990450534599389e-06, "loss": 2.2515, "step": 5312 }, { "epoch": 0.28503218884120174, "grad_norm": 0.36328125, "learning_rate": 4.990442946942022e-06, "loss": 2.2093, "step": 5313 }, { "epoch": 0.28508583690987127, "grad_norm": 0.423828125, "learning_rate": 4.990435356277188e-06, "loss": 2.2816, "step": 5314 }, { "epoch": 0.28513948497854075, "grad_norm": 0.3671875, "learning_rate": 4.990427762604894e-06, "loss": 2.4412, "step": 5315 }, { "epoch": 0.2851931330472103, "grad_norm": 0.4140625, "learning_rate": 4.990420165925151e-06, "loss": 2.1935, "step": 5316 }, { "epoch": 0.2852467811158798, "grad_norm": 0.37109375, "learning_rate": 4.990412566237968e-06, "loss": 2.3885, "step": 5317 }, { "epoch": 0.28530042918454934, "grad_norm": 0.3515625, "learning_rate": 4.990404963543352e-06, "loss": 2.284, "step": 5318 }, { "epoch": 0.2853540772532189, "grad_norm": 0.34765625, "learning_rate": 4.990397357841315e-06, "loss": 2.2441, "step": 5319 }, { "epoch": 0.2854077253218884, "grad_norm": 0.384765625, "learning_rate": 4.990389749131863e-06, "loss": 2.0861, "step": 5320 }, { "epoch": 0.28546137339055794, "grad_norm": 0.302734375, "learning_rate": 4.9903821374150085e-06, "loss": 2.2295, "step": 5321 }, { "epoch": 0.28551502145922747, "grad_norm": 0.85546875, "learning_rate": 4.990374522690758e-06, "loss": 2.4435, "step": 5322 }, { "epoch": 0.285568669527897, "grad_norm": 0.34765625, "learning_rate": 4.990366904959123e-06, "loss": 2.3983, "step": 5323 }, { "epoch": 0.28562231759656653, "grad_norm": 0.6015625, "learning_rate": 4.990359284220111e-06, "loss": 2.4547, "step": 5324 }, { "epoch": 0.28567596566523606, "grad_norm": 0.35546875, "learning_rate": 4.990351660473732e-06, "loss": 2.2614, "step": 5325 }, { "epoch": 0.2857296137339056, "grad_norm": 0.55078125, "learning_rate": 4.990344033719995e-06, "loss": 2.1912, "step": 5326 }, { "epoch": 0.28578326180257513, "grad_norm": 0.369140625, "learning_rate": 4.9903364039589085e-06, "loss": 1.9429, "step": 5327 }, { "epoch": 0.28583690987124466, "grad_norm": 0.30859375, "learning_rate": 4.990328771190484e-06, "loss": 2.3764, "step": 5328 }, { "epoch": 0.28589055793991414, "grad_norm": 0.388671875, "learning_rate": 4.990321135414727e-06, "loss": 2.1535, "step": 5329 }, { "epoch": 0.28594420600858367, "grad_norm": 0.423828125, "learning_rate": 4.99031349663165e-06, "loss": 2.4104, "step": 5330 }, { "epoch": 0.2859978540772532, "grad_norm": 0.390625, "learning_rate": 4.990305854841262e-06, "loss": 2.4781, "step": 5331 }, { "epoch": 0.28605150214592273, "grad_norm": 0.328125, "learning_rate": 4.99029821004357e-06, "loss": 2.217, "step": 5332 }, { "epoch": 0.28610515021459226, "grad_norm": 0.421875, "learning_rate": 4.990290562238585e-06, "loss": 2.4794, "step": 5333 }, { "epoch": 0.2861587982832618, "grad_norm": 0.482421875, "learning_rate": 4.990282911426317e-06, "loss": 2.2563, "step": 5334 }, { "epoch": 0.2862124463519313, "grad_norm": 0.392578125, "learning_rate": 4.990275257606773e-06, "loss": 2.2281, "step": 5335 }, { "epoch": 0.28626609442060086, "grad_norm": 0.404296875, "learning_rate": 4.990267600779962e-06, "loss": 1.4914, "step": 5336 }, { "epoch": 0.2863197424892704, "grad_norm": 0.333984375, "learning_rate": 4.990259940945896e-06, "loss": 2.0355, "step": 5337 }, { "epoch": 0.2863733905579399, "grad_norm": 0.33984375, "learning_rate": 4.9902522781045825e-06, "loss": 2.0822, "step": 5338 }, { "epoch": 0.28642703862660945, "grad_norm": 0.3828125, "learning_rate": 4.990244612256031e-06, "loss": 2.2299, "step": 5339 }, { "epoch": 0.286480686695279, "grad_norm": 0.38671875, "learning_rate": 4.990236943400251e-06, "loss": 2.3757, "step": 5340 }, { "epoch": 0.2865343347639485, "grad_norm": 0.3828125, "learning_rate": 4.99022927153725e-06, "loss": 2.373, "step": 5341 }, { "epoch": 0.28658798283261805, "grad_norm": 0.3203125, "learning_rate": 4.990221596667041e-06, "loss": 2.3646, "step": 5342 }, { "epoch": 0.2866416309012876, "grad_norm": 0.474609375, "learning_rate": 4.9902139187896295e-06, "loss": 2.3451, "step": 5343 }, { "epoch": 0.28669527896995706, "grad_norm": 0.3828125, "learning_rate": 4.990206237905027e-06, "loss": 2.3692, "step": 5344 }, { "epoch": 0.2867489270386266, "grad_norm": 0.361328125, "learning_rate": 4.990198554013242e-06, "loss": 2.1847, "step": 5345 }, { "epoch": 0.2868025751072961, "grad_norm": 0.390625, "learning_rate": 4.990190867114284e-06, "loss": 2.0928, "step": 5346 }, { "epoch": 0.28685622317596565, "grad_norm": 0.384765625, "learning_rate": 4.990183177208162e-06, "loss": 2.0872, "step": 5347 }, { "epoch": 0.2869098712446352, "grad_norm": 0.373046875, "learning_rate": 4.990175484294886e-06, "loss": 2.4402, "step": 5348 }, { "epoch": 0.2869635193133047, "grad_norm": 0.361328125, "learning_rate": 4.990167788374464e-06, "loss": 2.4357, "step": 5349 }, { "epoch": 0.28701716738197425, "grad_norm": 0.357421875, "learning_rate": 4.990160089446906e-06, "loss": 1.7526, "step": 5350 }, { "epoch": 0.2870708154506438, "grad_norm": 0.74609375, "learning_rate": 4.990152387512222e-06, "loss": 2.4291, "step": 5351 }, { "epoch": 0.2871244635193133, "grad_norm": 0.328125, "learning_rate": 4.99014468257042e-06, "loss": 2.3306, "step": 5352 }, { "epoch": 0.28717811158798284, "grad_norm": 0.375, "learning_rate": 4.990136974621511e-06, "loss": 2.4145, "step": 5353 }, { "epoch": 0.2872317596566524, "grad_norm": 0.43359375, "learning_rate": 4.990129263665502e-06, "loss": 2.5494, "step": 5354 }, { "epoch": 0.2872854077253219, "grad_norm": 0.49609375, "learning_rate": 4.990121549702405e-06, "loss": 2.4972, "step": 5355 }, { "epoch": 0.28733905579399144, "grad_norm": 0.435546875, "learning_rate": 4.990113832732226e-06, "loss": 2.3951, "step": 5356 }, { "epoch": 0.28739270386266097, "grad_norm": 0.6171875, "learning_rate": 4.9901061127549774e-06, "loss": 2.2648, "step": 5357 }, { "epoch": 0.28744635193133045, "grad_norm": 0.392578125, "learning_rate": 4.990098389770668e-06, "loss": 2.2151, "step": 5358 }, { "epoch": 0.2875, "grad_norm": 0.375, "learning_rate": 4.990090663779305e-06, "loss": 2.3478, "step": 5359 }, { "epoch": 0.2875536480686695, "grad_norm": 0.404296875, "learning_rate": 4.9900829347809e-06, "loss": 2.3229, "step": 5360 }, { "epoch": 0.28760729613733904, "grad_norm": 0.3828125, "learning_rate": 4.990075202775461e-06, "loss": 2.066, "step": 5361 }, { "epoch": 0.2876609442060086, "grad_norm": 0.359375, "learning_rate": 4.990067467762998e-06, "loss": 2.3278, "step": 5362 }, { "epoch": 0.2877145922746781, "grad_norm": 0.361328125, "learning_rate": 4.99005972974352e-06, "loss": 2.2461, "step": 5363 }, { "epoch": 0.28776824034334764, "grad_norm": 0.396484375, "learning_rate": 4.990051988717038e-06, "loss": 2.4729, "step": 5364 }, { "epoch": 0.28782188841201717, "grad_norm": 0.369140625, "learning_rate": 4.990044244683559e-06, "loss": 2.2799, "step": 5365 }, { "epoch": 0.2878755364806867, "grad_norm": 0.359375, "learning_rate": 4.990036497643092e-06, "loss": 2.3922, "step": 5366 }, { "epoch": 0.28792918454935623, "grad_norm": 0.36328125, "learning_rate": 4.990028747595649e-06, "loss": 2.0832, "step": 5367 }, { "epoch": 0.28798283261802576, "grad_norm": 0.3671875, "learning_rate": 4.990020994541237e-06, "loss": 2.3245, "step": 5368 }, { "epoch": 0.2880364806866953, "grad_norm": 0.30859375, "learning_rate": 4.9900132384798675e-06, "loss": 2.1104, "step": 5369 }, { "epoch": 0.2880901287553648, "grad_norm": 0.470703125, "learning_rate": 4.990005479411548e-06, "loss": 2.2977, "step": 5370 }, { "epoch": 0.28814377682403436, "grad_norm": 0.408203125, "learning_rate": 4.989997717336288e-06, "loss": 2.2238, "step": 5371 }, { "epoch": 0.28819742489270384, "grad_norm": 0.33984375, "learning_rate": 4.9899899522540985e-06, "loss": 2.2821, "step": 5372 }, { "epoch": 0.28825107296137337, "grad_norm": 0.70703125, "learning_rate": 4.989982184164987e-06, "loss": 2.2791, "step": 5373 }, { "epoch": 0.2883047210300429, "grad_norm": 0.55078125, "learning_rate": 4.9899744130689645e-06, "loss": 2.5831, "step": 5374 }, { "epoch": 0.28835836909871243, "grad_norm": 0.361328125, "learning_rate": 4.9899666389660385e-06, "loss": 2.1, "step": 5375 }, { "epoch": 0.28841201716738196, "grad_norm": 0.34375, "learning_rate": 4.98995886185622e-06, "loss": 2.1897, "step": 5376 }, { "epoch": 0.2884656652360515, "grad_norm": 0.369140625, "learning_rate": 4.989951081739518e-06, "loss": 2.2697, "step": 5377 }, { "epoch": 0.288519313304721, "grad_norm": 0.435546875, "learning_rate": 4.989943298615942e-06, "loss": 2.1869, "step": 5378 }, { "epoch": 0.28857296137339056, "grad_norm": 0.337890625, "learning_rate": 4.9899355124855e-06, "loss": 2.3268, "step": 5379 }, { "epoch": 0.2886266094420601, "grad_norm": 0.408203125, "learning_rate": 4.9899277233482035e-06, "loss": 1.2524, "step": 5380 }, { "epoch": 0.2886802575107296, "grad_norm": 0.3984375, "learning_rate": 4.989919931204061e-06, "loss": 2.075, "step": 5381 }, { "epoch": 0.28873390557939915, "grad_norm": 0.341796875, "learning_rate": 4.989912136053081e-06, "loss": 2.3823, "step": 5382 }, { "epoch": 0.2887875536480687, "grad_norm": 0.37890625, "learning_rate": 4.989904337895275e-06, "loss": 2.2023, "step": 5383 }, { "epoch": 0.2888412017167382, "grad_norm": 0.419921875, "learning_rate": 4.989896536730651e-06, "loss": 2.5842, "step": 5384 }, { "epoch": 0.28889484978540775, "grad_norm": 0.388671875, "learning_rate": 4.9898887325592176e-06, "loss": 2.2946, "step": 5385 }, { "epoch": 0.2889484978540773, "grad_norm": 0.4375, "learning_rate": 4.989880925380986e-06, "loss": 2.3125, "step": 5386 }, { "epoch": 0.28900214592274676, "grad_norm": 0.392578125, "learning_rate": 4.989873115195965e-06, "loss": 2.374, "step": 5387 }, { "epoch": 0.2890557939914163, "grad_norm": 0.431640625, "learning_rate": 4.989865302004163e-06, "loss": 2.2233, "step": 5388 }, { "epoch": 0.2891094420600858, "grad_norm": 0.38671875, "learning_rate": 4.989857485805591e-06, "loss": 2.2704, "step": 5389 }, { "epoch": 0.28916309012875535, "grad_norm": 0.33984375, "learning_rate": 4.989849666600258e-06, "loss": 2.242, "step": 5390 }, { "epoch": 0.2892167381974249, "grad_norm": 0.4140625, "learning_rate": 4.989841844388172e-06, "loss": 2.331, "step": 5391 }, { "epoch": 0.2892703862660944, "grad_norm": 0.349609375, "learning_rate": 4.989834019169345e-06, "loss": 2.2554, "step": 5392 }, { "epoch": 0.28932403433476395, "grad_norm": 0.52734375, "learning_rate": 4.989826190943785e-06, "loss": 2.2347, "step": 5393 }, { "epoch": 0.2893776824034335, "grad_norm": 0.341796875, "learning_rate": 4.989818359711501e-06, "loss": 2.2001, "step": 5394 }, { "epoch": 0.289431330472103, "grad_norm": 0.361328125, "learning_rate": 4.989810525472504e-06, "loss": 2.3657, "step": 5395 }, { "epoch": 0.28948497854077254, "grad_norm": 0.314453125, "learning_rate": 4.989802688226801e-06, "loss": 2.2812, "step": 5396 }, { "epoch": 0.2895386266094421, "grad_norm": 0.3984375, "learning_rate": 4.989794847974404e-06, "loss": 2.53, "step": 5397 }, { "epoch": 0.2895922746781116, "grad_norm": 0.40234375, "learning_rate": 4.989787004715321e-06, "loss": 2.4421, "step": 5398 }, { "epoch": 0.28964592274678114, "grad_norm": 0.349609375, "learning_rate": 4.989779158449561e-06, "loss": 2.4394, "step": 5399 }, { "epoch": 0.28969957081545067, "grad_norm": 0.369140625, "learning_rate": 4.989771309177136e-06, "loss": 2.4241, "step": 5400 }, { "epoch": 0.28975321888412015, "grad_norm": 0.28515625, "learning_rate": 4.989763456898054e-06, "loss": 2.2203, "step": 5401 }, { "epoch": 0.2898068669527897, "grad_norm": 1.421875, "learning_rate": 4.989755601612323e-06, "loss": 1.867, "step": 5402 }, { "epoch": 0.2898605150214592, "grad_norm": 0.451171875, "learning_rate": 4.989747743319954e-06, "loss": 2.2913, "step": 5403 }, { "epoch": 0.28991416309012874, "grad_norm": 0.44921875, "learning_rate": 4.989739882020957e-06, "loss": 2.2578, "step": 5404 }, { "epoch": 0.28996781115879827, "grad_norm": 0.375, "learning_rate": 4.98973201771534e-06, "loss": 2.474, "step": 5405 }, { "epoch": 0.2900214592274678, "grad_norm": 0.54296875, "learning_rate": 4.9897241504031136e-06, "loss": 2.1995, "step": 5406 }, { "epoch": 0.29007510729613734, "grad_norm": 2.4375, "learning_rate": 4.989716280084287e-06, "loss": 2.3003, "step": 5407 }, { "epoch": 0.29012875536480687, "grad_norm": 0.345703125, "learning_rate": 4.98970840675887e-06, "loss": 2.1835, "step": 5408 }, { "epoch": 0.2901824034334764, "grad_norm": 0.486328125, "learning_rate": 4.989700530426871e-06, "loss": 2.3493, "step": 5409 }, { "epoch": 0.29023605150214593, "grad_norm": 0.349609375, "learning_rate": 4.989692651088301e-06, "loss": 2.149, "step": 5410 }, { "epoch": 0.29028969957081546, "grad_norm": 0.42578125, "learning_rate": 4.9896847687431686e-06, "loss": 2.4762, "step": 5411 }, { "epoch": 0.290343347639485, "grad_norm": 0.93359375, "learning_rate": 4.989676883391484e-06, "loss": 2.3261, "step": 5412 }, { "epoch": 0.2903969957081545, "grad_norm": 0.4140625, "learning_rate": 4.9896689950332555e-06, "loss": 2.2259, "step": 5413 }, { "epoch": 0.29045064377682406, "grad_norm": 0.3203125, "learning_rate": 4.9896611036684935e-06, "loss": 2.2694, "step": 5414 }, { "epoch": 0.2905042918454936, "grad_norm": 0.6171875, "learning_rate": 4.9896532092972085e-06, "loss": 2.4297, "step": 5415 }, { "epoch": 0.29055793991416307, "grad_norm": 0.3828125, "learning_rate": 4.989645311919408e-06, "loss": 2.1946, "step": 5416 }, { "epoch": 0.2906115879828326, "grad_norm": 0.328125, "learning_rate": 4.989637411535102e-06, "loss": 2.1855, "step": 5417 }, { "epoch": 0.29066523605150213, "grad_norm": 0.423828125, "learning_rate": 4.989629508144302e-06, "loss": 2.3573, "step": 5418 }, { "epoch": 0.29071888412017166, "grad_norm": 0.486328125, "learning_rate": 4.989621601747015e-06, "loss": 1.5677, "step": 5419 }, { "epoch": 0.2907725321888412, "grad_norm": 0.326171875, "learning_rate": 4.989613692343251e-06, "loss": 2.1862, "step": 5420 }, { "epoch": 0.2908261802575107, "grad_norm": 0.515625, "learning_rate": 4.989605779933021e-06, "loss": 2.2636, "step": 5421 }, { "epoch": 0.29087982832618026, "grad_norm": 0.439453125, "learning_rate": 4.989597864516335e-06, "loss": 2.146, "step": 5422 }, { "epoch": 0.2909334763948498, "grad_norm": 0.61328125, "learning_rate": 4.989589946093199e-06, "loss": 2.2159, "step": 5423 }, { "epoch": 0.2909871244635193, "grad_norm": 0.37109375, "learning_rate": 4.989582024663626e-06, "loss": 2.278, "step": 5424 }, { "epoch": 0.29104077253218885, "grad_norm": 0.423828125, "learning_rate": 4.989574100227624e-06, "loss": 2.4465, "step": 5425 }, { "epoch": 0.2910944206008584, "grad_norm": 0.439453125, "learning_rate": 4.989566172785204e-06, "loss": 2.4134, "step": 5426 }, { "epoch": 0.2911480686695279, "grad_norm": 0.353515625, "learning_rate": 4.989558242336373e-06, "loss": 2.2872, "step": 5427 }, { "epoch": 0.29120171673819745, "grad_norm": 0.357421875, "learning_rate": 4.989550308881144e-06, "loss": 2.2792, "step": 5428 }, { "epoch": 0.291255364806867, "grad_norm": 0.384765625, "learning_rate": 4.989542372419524e-06, "loss": 2.2509, "step": 5429 }, { "epoch": 0.29130901287553645, "grad_norm": 0.27734375, "learning_rate": 4.989534432951522e-06, "loss": 2.0952, "step": 5430 }, { "epoch": 0.291362660944206, "grad_norm": 0.400390625, "learning_rate": 4.98952649047715e-06, "loss": 2.5171, "step": 5431 }, { "epoch": 0.2914163090128755, "grad_norm": 0.431640625, "learning_rate": 4.989518544996417e-06, "loss": 2.0337, "step": 5432 }, { "epoch": 0.29146995708154505, "grad_norm": 0.3125, "learning_rate": 4.989510596509331e-06, "loss": 2.2799, "step": 5433 }, { "epoch": 0.2915236051502146, "grad_norm": 0.60546875, "learning_rate": 4.9895026450159035e-06, "loss": 2.4256, "step": 5434 }, { "epoch": 0.2915772532188841, "grad_norm": 0.5546875, "learning_rate": 4.989494690516142e-06, "loss": 2.2952, "step": 5435 }, { "epoch": 0.29163090128755365, "grad_norm": 0.369140625, "learning_rate": 4.989486733010059e-06, "loss": 2.4977, "step": 5436 }, { "epoch": 0.2916845493562232, "grad_norm": 0.4765625, "learning_rate": 4.989478772497661e-06, "loss": 2.5303, "step": 5437 }, { "epoch": 0.2917381974248927, "grad_norm": 0.365234375, "learning_rate": 4.98947080897896e-06, "loss": 2.3696, "step": 5438 }, { "epoch": 0.29179184549356224, "grad_norm": 0.318359375, "learning_rate": 4.989462842453965e-06, "loss": 2.1877, "step": 5439 }, { "epoch": 0.2918454935622318, "grad_norm": 0.400390625, "learning_rate": 4.989454872922685e-06, "loss": 2.1386, "step": 5440 }, { "epoch": 0.2918991416309013, "grad_norm": 2.0625, "learning_rate": 4.98944690038513e-06, "loss": 2.2841, "step": 5441 }, { "epoch": 0.29195278969957084, "grad_norm": 0.43359375, "learning_rate": 4.98943892484131e-06, "loss": 2.4931, "step": 5442 }, { "epoch": 0.29200643776824037, "grad_norm": 0.3359375, "learning_rate": 4.989430946291233e-06, "loss": 2.2385, "step": 5443 }, { "epoch": 0.29206008583690984, "grad_norm": 0.66015625, "learning_rate": 4.989422964734911e-06, "loss": 2.277, "step": 5444 }, { "epoch": 0.2921137339055794, "grad_norm": 0.33203125, "learning_rate": 4.9894149801723515e-06, "loss": 2.3475, "step": 5445 }, { "epoch": 0.2921673819742489, "grad_norm": 0.62890625, "learning_rate": 4.989406992603566e-06, "loss": 2.4725, "step": 5446 }, { "epoch": 0.29222103004291844, "grad_norm": 0.734375, "learning_rate": 4.989399002028563e-06, "loss": 2.5126, "step": 5447 }, { "epoch": 0.29227467811158797, "grad_norm": 0.373046875, "learning_rate": 4.989391008447353e-06, "loss": 2.2967, "step": 5448 }, { "epoch": 0.2923283261802575, "grad_norm": 0.337890625, "learning_rate": 4.989383011859944e-06, "loss": 2.4026, "step": 5449 }, { "epoch": 0.29238197424892703, "grad_norm": 0.4453125, "learning_rate": 4.989375012266347e-06, "loss": 2.2791, "step": 5450 }, { "epoch": 0.29243562231759657, "grad_norm": 0.61328125, "learning_rate": 4.989367009666572e-06, "loss": 2.2234, "step": 5451 }, { "epoch": 0.2924892703862661, "grad_norm": 0.421875, "learning_rate": 4.989359004060628e-06, "loss": 2.3402, "step": 5452 }, { "epoch": 0.29254291845493563, "grad_norm": 0.37890625, "learning_rate": 4.9893509954485245e-06, "loss": 2.3702, "step": 5453 }, { "epoch": 0.29259656652360516, "grad_norm": 0.3828125, "learning_rate": 4.989342983830271e-06, "loss": 2.3872, "step": 5454 }, { "epoch": 0.2926502145922747, "grad_norm": 0.369140625, "learning_rate": 4.989334969205878e-06, "loss": 2.202, "step": 5455 }, { "epoch": 0.2927038626609442, "grad_norm": 0.404296875, "learning_rate": 4.9893269515753555e-06, "loss": 2.2294, "step": 5456 }, { "epoch": 0.29275751072961376, "grad_norm": 0.486328125, "learning_rate": 4.989318930938711e-06, "loss": 2.236, "step": 5457 }, { "epoch": 0.2928111587982833, "grad_norm": 0.30078125, "learning_rate": 4.989310907295956e-06, "loss": 2.2414, "step": 5458 }, { "epoch": 0.29286480686695276, "grad_norm": 0.3515625, "learning_rate": 4.9893028806471e-06, "loss": 2.202, "step": 5459 }, { "epoch": 0.2929184549356223, "grad_norm": 0.64453125, "learning_rate": 4.989294850992154e-06, "loss": 2.5076, "step": 5460 }, { "epoch": 0.29297210300429183, "grad_norm": 0.412109375, "learning_rate": 4.989286818331124e-06, "loss": 2.472, "step": 5461 }, { "epoch": 0.29302575107296136, "grad_norm": 0.4296875, "learning_rate": 4.989278782664023e-06, "loss": 2.2996, "step": 5462 }, { "epoch": 0.2930793991416309, "grad_norm": 0.439453125, "learning_rate": 4.989270743990859e-06, "loss": 1.5033, "step": 5463 }, { "epoch": 0.2931330472103004, "grad_norm": 0.3828125, "learning_rate": 4.989262702311643e-06, "loss": 2.1411, "step": 5464 }, { "epoch": 0.29318669527896996, "grad_norm": 0.33984375, "learning_rate": 4.989254657626384e-06, "loss": 2.4093, "step": 5465 }, { "epoch": 0.2932403433476395, "grad_norm": 0.396484375, "learning_rate": 4.989246609935091e-06, "loss": 2.5664, "step": 5466 }, { "epoch": 0.293293991416309, "grad_norm": 0.34765625, "learning_rate": 4.989238559237776e-06, "loss": 2.1699, "step": 5467 }, { "epoch": 0.29334763948497855, "grad_norm": 0.388671875, "learning_rate": 4.989230505534445e-06, "loss": 1.8614, "step": 5468 }, { "epoch": 0.2934012875536481, "grad_norm": 0.412109375, "learning_rate": 4.9892224488251114e-06, "loss": 2.3517, "step": 5469 }, { "epoch": 0.2934549356223176, "grad_norm": 0.326171875, "learning_rate": 4.989214389109783e-06, "loss": 2.0721, "step": 5470 }, { "epoch": 0.29350858369098715, "grad_norm": 0.447265625, "learning_rate": 4.98920632638847e-06, "loss": 2.0666, "step": 5471 }, { "epoch": 0.2935622317596567, "grad_norm": 0.3828125, "learning_rate": 4.9891982606611825e-06, "loss": 2.128, "step": 5472 }, { "epoch": 0.29361587982832615, "grad_norm": 0.38671875, "learning_rate": 4.98919019192793e-06, "loss": 2.303, "step": 5473 }, { "epoch": 0.2936695278969957, "grad_norm": 0.37109375, "learning_rate": 4.9891821201887214e-06, "loss": 2.1504, "step": 5474 }, { "epoch": 0.2937231759656652, "grad_norm": 0.443359375, "learning_rate": 4.989174045443567e-06, "loss": 2.4263, "step": 5475 }, { "epoch": 0.29377682403433475, "grad_norm": 0.421875, "learning_rate": 4.989165967692478e-06, "loss": 2.3527, "step": 5476 }, { "epoch": 0.2938304721030043, "grad_norm": 1.3984375, "learning_rate": 4.989157886935461e-06, "loss": 2.3272, "step": 5477 }, { "epoch": 0.2938841201716738, "grad_norm": 0.447265625, "learning_rate": 4.9891498031725285e-06, "loss": 2.4965, "step": 5478 }, { "epoch": 0.29393776824034334, "grad_norm": 0.39453125, "learning_rate": 4.9891417164036895e-06, "loss": 2.3009, "step": 5479 }, { "epoch": 0.2939914163090129, "grad_norm": 0.412109375, "learning_rate": 4.989133626628953e-06, "loss": 2.1696, "step": 5480 }, { "epoch": 0.2940450643776824, "grad_norm": 0.435546875, "learning_rate": 4.98912553384833e-06, "loss": 2.3476, "step": 5481 }, { "epoch": 0.29409871244635194, "grad_norm": 0.396484375, "learning_rate": 4.98911743806183e-06, "loss": 2.1736, "step": 5482 }, { "epoch": 0.29415236051502147, "grad_norm": 0.306640625, "learning_rate": 4.989109339269463e-06, "loss": 2.3924, "step": 5483 }, { "epoch": 0.294206008583691, "grad_norm": 0.40234375, "learning_rate": 4.989101237471237e-06, "loss": 2.4787, "step": 5484 }, { "epoch": 0.29425965665236054, "grad_norm": 0.66015625, "learning_rate": 4.989093132667163e-06, "loss": 2.4943, "step": 5485 }, { "epoch": 0.29431330472103007, "grad_norm": 0.416015625, "learning_rate": 4.989085024857251e-06, "loss": 2.3291, "step": 5486 }, { "epoch": 0.2943669527896996, "grad_norm": 0.6015625, "learning_rate": 4.989076914041511e-06, "loss": 2.2789, "step": 5487 }, { "epoch": 0.2944206008583691, "grad_norm": 0.34765625, "learning_rate": 4.989068800219951e-06, "loss": 2.4593, "step": 5488 }, { "epoch": 0.2944742489270386, "grad_norm": 0.3671875, "learning_rate": 4.9890606833925845e-06, "loss": 2.2417, "step": 5489 }, { "epoch": 0.29452789699570814, "grad_norm": 0.462890625, "learning_rate": 4.989052563559418e-06, "loss": 2.491, "step": 5490 }, { "epoch": 0.29458154506437767, "grad_norm": 0.373046875, "learning_rate": 4.989044440720462e-06, "loss": 2.3239, "step": 5491 }, { "epoch": 0.2946351931330472, "grad_norm": 0.38671875, "learning_rate": 4.9890363148757256e-06, "loss": 2.1522, "step": 5492 }, { "epoch": 0.29468884120171673, "grad_norm": 0.375, "learning_rate": 4.989028186025221e-06, "loss": 2.6292, "step": 5493 }, { "epoch": 0.29474248927038627, "grad_norm": 0.400390625, "learning_rate": 4.989020054168957e-06, "loss": 2.1152, "step": 5494 }, { "epoch": 0.2947961373390558, "grad_norm": 0.494140625, "learning_rate": 4.989011919306942e-06, "loss": 2.3413, "step": 5495 }, { "epoch": 0.29484978540772533, "grad_norm": 0.404296875, "learning_rate": 4.9890037814391875e-06, "loss": 2.5182, "step": 5496 }, { "epoch": 0.29490343347639486, "grad_norm": 0.451171875, "learning_rate": 4.988995640565702e-06, "loss": 1.8741, "step": 5497 }, { "epoch": 0.2949570815450644, "grad_norm": 0.408203125, "learning_rate": 4.988987496686497e-06, "loss": 2.2811, "step": 5498 }, { "epoch": 0.2950107296137339, "grad_norm": 0.34375, "learning_rate": 4.988979349801581e-06, "loss": 2.2158, "step": 5499 }, { "epoch": 0.29506437768240346, "grad_norm": 0.380859375, "learning_rate": 4.988971199910964e-06, "loss": 2.5032, "step": 5500 }, { "epoch": 0.295118025751073, "grad_norm": 0.388671875, "learning_rate": 4.988963047014656e-06, "loss": 2.1703, "step": 5501 }, { "epoch": 0.29517167381974246, "grad_norm": 0.388671875, "learning_rate": 4.988954891112667e-06, "loss": 2.2838, "step": 5502 }, { "epoch": 0.295225321888412, "grad_norm": 0.40234375, "learning_rate": 4.988946732205007e-06, "loss": 2.3842, "step": 5503 }, { "epoch": 0.2952789699570815, "grad_norm": 0.462890625, "learning_rate": 4.988938570291686e-06, "loss": 2.3135, "step": 5504 }, { "epoch": 0.29533261802575106, "grad_norm": 0.3203125, "learning_rate": 4.988930405372712e-06, "loss": 2.1417, "step": 5505 }, { "epoch": 0.2953862660944206, "grad_norm": 0.388671875, "learning_rate": 4.988922237448097e-06, "loss": 2.1442, "step": 5506 }, { "epoch": 0.2954399141630901, "grad_norm": 0.404296875, "learning_rate": 4.98891406651785e-06, "loss": 2.4037, "step": 5507 }, { "epoch": 0.29549356223175965, "grad_norm": 0.357421875, "learning_rate": 4.988905892581982e-06, "loss": 2.331, "step": 5508 }, { "epoch": 0.2955472103004292, "grad_norm": 0.3515625, "learning_rate": 4.988897715640502e-06, "loss": 2.156, "step": 5509 }, { "epoch": 0.2956008583690987, "grad_norm": 0.490234375, "learning_rate": 4.988889535693419e-06, "loss": 2.3425, "step": 5510 }, { "epoch": 0.29565450643776825, "grad_norm": 0.416015625, "learning_rate": 4.988881352740743e-06, "loss": 2.3246, "step": 5511 }, { "epoch": 0.2957081545064378, "grad_norm": 0.37890625, "learning_rate": 4.988873166782485e-06, "loss": 2.4224, "step": 5512 }, { "epoch": 0.2957618025751073, "grad_norm": 1.265625, "learning_rate": 4.988864977818655e-06, "loss": 1.9453, "step": 5513 }, { "epoch": 0.29581545064377684, "grad_norm": 0.31640625, "learning_rate": 4.988856785849262e-06, "loss": 2.2495, "step": 5514 }, { "epoch": 0.2958690987124464, "grad_norm": 0.392578125, "learning_rate": 4.988848590874317e-06, "loss": 2.4393, "step": 5515 }, { "epoch": 0.29592274678111585, "grad_norm": 0.3828125, "learning_rate": 4.988840392893828e-06, "loss": 2.1393, "step": 5516 }, { "epoch": 0.2959763948497854, "grad_norm": 0.39453125, "learning_rate": 4.988832191907807e-06, "loss": 2.4642, "step": 5517 }, { "epoch": 0.2960300429184549, "grad_norm": 0.40625, "learning_rate": 4.988823987916261e-06, "loss": 1.7133, "step": 5518 }, { "epoch": 0.29608369098712445, "grad_norm": 0.349609375, "learning_rate": 4.988815780919204e-06, "loss": 2.2095, "step": 5519 }, { "epoch": 0.296137339055794, "grad_norm": 0.470703125, "learning_rate": 4.988807570916643e-06, "loss": 2.4877, "step": 5520 }, { "epoch": 0.2961909871244635, "grad_norm": 0.3828125, "learning_rate": 4.988799357908588e-06, "loss": 2.1672, "step": 5521 }, { "epoch": 0.29624463519313304, "grad_norm": 0.369140625, "learning_rate": 4.988791141895051e-06, "loss": 2.4011, "step": 5522 }, { "epoch": 0.2962982832618026, "grad_norm": 0.384765625, "learning_rate": 4.98878292287604e-06, "loss": 2.3577, "step": 5523 }, { "epoch": 0.2963519313304721, "grad_norm": 0.5546875, "learning_rate": 4.988774700851564e-06, "loss": 2.2826, "step": 5524 }, { "epoch": 0.29640557939914164, "grad_norm": 0.60546875, "learning_rate": 4.9887664758216355e-06, "loss": 2.3507, "step": 5525 }, { "epoch": 0.29645922746781117, "grad_norm": 0.408203125, "learning_rate": 4.988758247786264e-06, "loss": 2.4626, "step": 5526 }, { "epoch": 0.2965128755364807, "grad_norm": 0.3984375, "learning_rate": 4.988750016745457e-06, "loss": 2.4414, "step": 5527 }, { "epoch": 0.29656652360515023, "grad_norm": 0.349609375, "learning_rate": 4.988741782699227e-06, "loss": 2.399, "step": 5528 }, { "epoch": 0.29662017167381977, "grad_norm": 0.412109375, "learning_rate": 4.988733545647583e-06, "loss": 2.2451, "step": 5529 }, { "epoch": 0.2966738197424893, "grad_norm": 0.431640625, "learning_rate": 4.988725305590535e-06, "loss": 2.0191, "step": 5530 }, { "epoch": 0.2967274678111588, "grad_norm": 0.39453125, "learning_rate": 4.988717062528093e-06, "loss": 2.4675, "step": 5531 }, { "epoch": 0.2967811158798283, "grad_norm": 0.390625, "learning_rate": 4.9887088164602675e-06, "loss": 2.3073, "step": 5532 }, { "epoch": 0.29683476394849784, "grad_norm": 0.41796875, "learning_rate": 4.988700567387068e-06, "loss": 2.144, "step": 5533 }, { "epoch": 0.29688841201716737, "grad_norm": 0.4140625, "learning_rate": 4.988692315308503e-06, "loss": 2.3096, "step": 5534 }, { "epoch": 0.2969420600858369, "grad_norm": 0.326171875, "learning_rate": 4.988684060224585e-06, "loss": 2.3597, "step": 5535 }, { "epoch": 0.29699570815450643, "grad_norm": 0.3359375, "learning_rate": 4.988675802135323e-06, "loss": 2.341, "step": 5536 }, { "epoch": 0.29704935622317596, "grad_norm": 0.41015625, "learning_rate": 4.988667541040726e-06, "loss": 2.3819, "step": 5537 }, { "epoch": 0.2971030042918455, "grad_norm": 0.365234375, "learning_rate": 4.988659276940805e-06, "loss": 2.4421, "step": 5538 }, { "epoch": 0.29715665236051503, "grad_norm": 0.384765625, "learning_rate": 4.98865100983557e-06, "loss": 2.1588, "step": 5539 }, { "epoch": 0.29721030042918456, "grad_norm": 0.68359375, "learning_rate": 4.988642739725031e-06, "loss": 2.0428, "step": 5540 }, { "epoch": 0.2972639484978541, "grad_norm": 1.296875, "learning_rate": 4.9886344666091965e-06, "loss": 2.167, "step": 5541 }, { "epoch": 0.2973175965665236, "grad_norm": 0.392578125, "learning_rate": 4.9886261904880785e-06, "loss": 2.4741, "step": 5542 }, { "epoch": 0.29737124463519315, "grad_norm": 0.396484375, "learning_rate": 4.988617911361686e-06, "loss": 2.2194, "step": 5543 }, { "epoch": 0.2974248927038627, "grad_norm": 0.421875, "learning_rate": 4.988609629230029e-06, "loss": 2.2442, "step": 5544 }, { "epoch": 0.29747854077253216, "grad_norm": 0.5, "learning_rate": 4.988601344093118e-06, "loss": 2.4616, "step": 5545 }, { "epoch": 0.2975321888412017, "grad_norm": 0.7578125, "learning_rate": 4.988593055950963e-06, "loss": 2.2821, "step": 5546 }, { "epoch": 0.2975858369098712, "grad_norm": 0.345703125, "learning_rate": 4.9885847648035736e-06, "loss": 2.2436, "step": 5547 }, { "epoch": 0.29763948497854076, "grad_norm": 0.5703125, "learning_rate": 4.98857647065096e-06, "loss": 2.3739, "step": 5548 }, { "epoch": 0.2976931330472103, "grad_norm": 0.392578125, "learning_rate": 4.988568173493131e-06, "loss": 2.2274, "step": 5549 }, { "epoch": 0.2977467811158798, "grad_norm": 0.46484375, "learning_rate": 4.9885598733300985e-06, "loss": 2.2085, "step": 5550 }, { "epoch": 0.29780042918454935, "grad_norm": 0.396484375, "learning_rate": 4.988551570161872e-06, "loss": 2.4653, "step": 5551 }, { "epoch": 0.2978540772532189, "grad_norm": 0.5078125, "learning_rate": 4.988543263988461e-06, "loss": 2.6924, "step": 5552 }, { "epoch": 0.2979077253218884, "grad_norm": 0.3984375, "learning_rate": 4.988534954809876e-06, "loss": 2.4047, "step": 5553 }, { "epoch": 0.29796137339055795, "grad_norm": 0.376953125, "learning_rate": 4.988526642626126e-06, "loss": 2.2127, "step": 5554 }, { "epoch": 0.2980150214592275, "grad_norm": 0.53125, "learning_rate": 4.988518327437223e-06, "loss": 1.5991, "step": 5555 }, { "epoch": 0.298068669527897, "grad_norm": 0.412109375, "learning_rate": 4.988510009243176e-06, "loss": 2.2037, "step": 5556 }, { "epoch": 0.29812231759656654, "grad_norm": 0.37890625, "learning_rate": 4.988501688043994e-06, "loss": 2.2008, "step": 5557 }, { "epoch": 0.2981759656652361, "grad_norm": 0.369140625, "learning_rate": 4.9884933638396895e-06, "loss": 2.1986, "step": 5558 }, { "epoch": 0.29822961373390555, "grad_norm": 0.48828125, "learning_rate": 4.98848503663027e-06, "loss": 2.0959, "step": 5559 }, { "epoch": 0.2982832618025751, "grad_norm": 0.349609375, "learning_rate": 4.988476706415747e-06, "loss": 2.4213, "step": 5560 }, { "epoch": 0.2983369098712446, "grad_norm": 0.439453125, "learning_rate": 4.98846837319613e-06, "loss": 2.2541, "step": 5561 }, { "epoch": 0.29839055793991415, "grad_norm": 0.451171875, "learning_rate": 4.988460036971428e-06, "loss": 2.446, "step": 5562 }, { "epoch": 0.2984442060085837, "grad_norm": 0.36328125, "learning_rate": 4.988451697741655e-06, "loss": 2.371, "step": 5563 }, { "epoch": 0.2984978540772532, "grad_norm": 0.37109375, "learning_rate": 4.9884433555068164e-06, "loss": 2.1989, "step": 5564 }, { "epoch": 0.29855150214592274, "grad_norm": 0.53515625, "learning_rate": 4.988435010266926e-06, "loss": 2.5868, "step": 5565 }, { "epoch": 0.2986051502145923, "grad_norm": 0.404296875, "learning_rate": 4.98842666202199e-06, "loss": 2.5315, "step": 5566 }, { "epoch": 0.2986587982832618, "grad_norm": 0.384765625, "learning_rate": 4.9884183107720215e-06, "loss": 2.4492, "step": 5567 }, { "epoch": 0.29871244635193134, "grad_norm": 0.474609375, "learning_rate": 4.988409956517031e-06, "loss": 2.1976, "step": 5568 }, { "epoch": 0.29876609442060087, "grad_norm": 1.1484375, "learning_rate": 4.988401599257025e-06, "loss": 2.1513, "step": 5569 }, { "epoch": 0.2988197424892704, "grad_norm": 0.412109375, "learning_rate": 4.9883932389920185e-06, "loss": 2.62, "step": 5570 }, { "epoch": 0.29887339055793993, "grad_norm": 0.82421875, "learning_rate": 4.9883848757220175e-06, "loss": 2.5152, "step": 5571 }, { "epoch": 0.29892703862660946, "grad_norm": 0.4296875, "learning_rate": 4.988376509447034e-06, "loss": 2.4853, "step": 5572 }, { "epoch": 0.298980686695279, "grad_norm": 0.44921875, "learning_rate": 4.988368140167077e-06, "loss": 2.2239, "step": 5573 }, { "epoch": 0.2990343347639485, "grad_norm": 0.337890625, "learning_rate": 4.988359767882158e-06, "loss": 2.1448, "step": 5574 }, { "epoch": 0.299087982832618, "grad_norm": 0.380859375, "learning_rate": 4.988351392592286e-06, "loss": 2.4567, "step": 5575 }, { "epoch": 0.29914163090128754, "grad_norm": 0.412109375, "learning_rate": 4.988343014297472e-06, "loss": 2.5433, "step": 5576 }, { "epoch": 0.29919527896995707, "grad_norm": 0.83203125, "learning_rate": 4.988334632997726e-06, "loss": 2.2144, "step": 5577 }, { "epoch": 0.2992489270386266, "grad_norm": 0.376953125, "learning_rate": 4.988326248693056e-06, "loss": 2.3709, "step": 5578 }, { "epoch": 0.29930257510729613, "grad_norm": 0.42578125, "learning_rate": 4.988317861383475e-06, "loss": 2.4628, "step": 5579 }, { "epoch": 0.29935622317596566, "grad_norm": 0.396484375, "learning_rate": 4.988309471068993e-06, "loss": 2.2669, "step": 5580 }, { "epoch": 0.2994098712446352, "grad_norm": 0.3515625, "learning_rate": 4.988301077749618e-06, "loss": 2.2084, "step": 5581 }, { "epoch": 0.2994635193133047, "grad_norm": 0.37890625, "learning_rate": 4.9882926814253615e-06, "loss": 2.2912, "step": 5582 }, { "epoch": 0.29951716738197426, "grad_norm": 0.392578125, "learning_rate": 4.988284282096233e-06, "loss": 2.144, "step": 5583 }, { "epoch": 0.2995708154506438, "grad_norm": 0.353515625, "learning_rate": 4.988275879762244e-06, "loss": 2.2317, "step": 5584 }, { "epoch": 0.2996244635193133, "grad_norm": 0.380859375, "learning_rate": 4.988267474423403e-06, "loss": 2.2227, "step": 5585 }, { "epoch": 0.29967811158798285, "grad_norm": 0.326171875, "learning_rate": 4.988259066079722e-06, "loss": 2.2846, "step": 5586 }, { "epoch": 0.2997317596566524, "grad_norm": 0.416015625, "learning_rate": 4.988250654731208e-06, "loss": 2.1465, "step": 5587 }, { "epoch": 0.29978540772532186, "grad_norm": 0.375, "learning_rate": 4.988242240377875e-06, "loss": 2.3301, "step": 5588 }, { "epoch": 0.2998390557939914, "grad_norm": 0.390625, "learning_rate": 4.98823382301973e-06, "loss": 2.3378, "step": 5589 }, { "epoch": 0.2998927038626609, "grad_norm": 0.435546875, "learning_rate": 4.9882254026567855e-06, "loss": 2.0959, "step": 5590 }, { "epoch": 0.29994635193133046, "grad_norm": 0.3984375, "learning_rate": 4.988216979289051e-06, "loss": 2.0896, "step": 5591 }, { "epoch": 0.3, "grad_norm": 0.41015625, "learning_rate": 4.988208552916535e-06, "loss": 2.2988, "step": 5592 }, { "epoch": 0.3000536480686695, "grad_norm": 3.375, "learning_rate": 4.9882001235392505e-06, "loss": 2.2682, "step": 5593 }, { "epoch": 0.30010729613733905, "grad_norm": 0.39453125, "learning_rate": 4.988191691157205e-06, "loss": 2.4064, "step": 5594 }, { "epoch": 0.3001609442060086, "grad_norm": 0.455078125, "learning_rate": 4.98818325577041e-06, "loss": 2.2192, "step": 5595 }, { "epoch": 0.3002145922746781, "grad_norm": 0.455078125, "learning_rate": 4.9881748173788765e-06, "loss": 2.6275, "step": 5596 }, { "epoch": 0.30026824034334765, "grad_norm": 0.5078125, "learning_rate": 4.988166375982613e-06, "loss": 2.2648, "step": 5597 }, { "epoch": 0.3003218884120172, "grad_norm": 0.404296875, "learning_rate": 4.988157931581631e-06, "loss": 2.6306, "step": 5598 }, { "epoch": 0.3003755364806867, "grad_norm": 0.384765625, "learning_rate": 4.988149484175939e-06, "loss": 2.3033, "step": 5599 }, { "epoch": 0.30042918454935624, "grad_norm": 0.52734375, "learning_rate": 4.98814103376555e-06, "loss": 2.2222, "step": 5600 }, { "epoch": 0.3004828326180258, "grad_norm": 0.390625, "learning_rate": 4.9881325803504715e-06, "loss": 2.3246, "step": 5601 }, { "epoch": 0.3005364806866953, "grad_norm": 0.482421875, "learning_rate": 4.988124123930716e-06, "loss": 2.4433, "step": 5602 }, { "epoch": 0.3005901287553648, "grad_norm": 0.453125, "learning_rate": 4.988115664506291e-06, "loss": 2.3004, "step": 5603 }, { "epoch": 0.3006437768240343, "grad_norm": 0.37109375, "learning_rate": 4.988107202077208e-06, "loss": 2.3417, "step": 5604 }, { "epoch": 0.30069742489270385, "grad_norm": 0.392578125, "learning_rate": 4.9880987366434784e-06, "loss": 2.4214, "step": 5605 }, { "epoch": 0.3007510729613734, "grad_norm": 0.40234375, "learning_rate": 4.988090268205111e-06, "loss": 2.4072, "step": 5606 }, { "epoch": 0.3008047210300429, "grad_norm": 2.53125, "learning_rate": 4.988081796762116e-06, "loss": 2.198, "step": 5607 }, { "epoch": 0.30085836909871244, "grad_norm": 0.40625, "learning_rate": 4.988073322314505e-06, "loss": 2.4198, "step": 5608 }, { "epoch": 0.300912017167382, "grad_norm": 0.34375, "learning_rate": 4.988064844862288e-06, "loss": 2.1722, "step": 5609 }, { "epoch": 0.3009656652360515, "grad_norm": 0.40625, "learning_rate": 4.988056364405473e-06, "loss": 2.4885, "step": 5610 }, { "epoch": 0.30101931330472104, "grad_norm": 0.404296875, "learning_rate": 4.988047880944073e-06, "loss": 2.5419, "step": 5611 }, { "epoch": 0.30107296137339057, "grad_norm": 0.373046875, "learning_rate": 4.988039394478096e-06, "loss": 2.2525, "step": 5612 }, { "epoch": 0.3011266094420601, "grad_norm": 0.37890625, "learning_rate": 4.988030905007554e-06, "loss": 2.2694, "step": 5613 }, { "epoch": 0.30118025751072963, "grad_norm": 0.369140625, "learning_rate": 4.988022412532456e-06, "loss": 2.4119, "step": 5614 }, { "epoch": 0.30123390557939916, "grad_norm": 0.40625, "learning_rate": 4.988013917052813e-06, "loss": 2.0315, "step": 5615 }, { "epoch": 0.3012875536480687, "grad_norm": 0.388671875, "learning_rate": 4.9880054185686356e-06, "loss": 2.2087, "step": 5616 }, { "epoch": 0.30134120171673817, "grad_norm": 0.4453125, "learning_rate": 4.987996917079934e-06, "loss": 1.9753, "step": 5617 }, { "epoch": 0.3013948497854077, "grad_norm": 0.3984375, "learning_rate": 4.987988412586716e-06, "loss": 2.1734, "step": 5618 }, { "epoch": 0.30144849785407724, "grad_norm": 0.326171875, "learning_rate": 4.987979905088996e-06, "loss": 2.2211, "step": 5619 }, { "epoch": 0.30150214592274677, "grad_norm": 0.412109375, "learning_rate": 4.98797139458678e-06, "loss": 2.45, "step": 5620 }, { "epoch": 0.3015557939914163, "grad_norm": 0.392578125, "learning_rate": 4.987962881080082e-06, "loss": 2.2114, "step": 5621 }, { "epoch": 0.30160944206008583, "grad_norm": 0.33203125, "learning_rate": 4.98795436456891e-06, "loss": 2.1204, "step": 5622 }, { "epoch": 0.30166309012875536, "grad_norm": 0.390625, "learning_rate": 4.987945845053276e-06, "loss": 2.4185, "step": 5623 }, { "epoch": 0.3017167381974249, "grad_norm": 0.326171875, "learning_rate": 4.987937322533188e-06, "loss": 2.2328, "step": 5624 }, { "epoch": 0.3017703862660944, "grad_norm": 0.419921875, "learning_rate": 4.987928797008659e-06, "loss": 2.2284, "step": 5625 }, { "epoch": 0.30182403433476396, "grad_norm": 0.34375, "learning_rate": 4.987920268479697e-06, "loss": 2.2375, "step": 5626 }, { "epoch": 0.3018776824034335, "grad_norm": 0.51953125, "learning_rate": 4.987911736946312e-06, "loss": 2.2394, "step": 5627 }, { "epoch": 0.301931330472103, "grad_norm": 0.482421875, "learning_rate": 4.987903202408517e-06, "loss": 2.2078, "step": 5628 }, { "epoch": 0.30198497854077255, "grad_norm": 0.37890625, "learning_rate": 4.987894664866321e-06, "loss": 2.3342, "step": 5629 }, { "epoch": 0.3020386266094421, "grad_norm": 0.421875, "learning_rate": 4.987886124319733e-06, "loss": 2.1657, "step": 5630 }, { "epoch": 0.30209227467811156, "grad_norm": 0.349609375, "learning_rate": 4.987877580768765e-06, "loss": 2.213, "step": 5631 }, { "epoch": 0.3021459227467811, "grad_norm": 0.4140625, "learning_rate": 4.987869034213426e-06, "loss": 2.3023, "step": 5632 }, { "epoch": 0.3021995708154506, "grad_norm": 0.4609375, "learning_rate": 4.9878604846537285e-06, "loss": 2.1562, "step": 5633 }, { "epoch": 0.30225321888412016, "grad_norm": 0.357421875, "learning_rate": 4.98785193208968e-06, "loss": 2.2908, "step": 5634 }, { "epoch": 0.3023068669527897, "grad_norm": 0.427734375, "learning_rate": 4.987843376521293e-06, "loss": 2.1841, "step": 5635 }, { "epoch": 0.3023605150214592, "grad_norm": 0.421875, "learning_rate": 4.987834817948576e-06, "loss": 2.0346, "step": 5636 }, { "epoch": 0.30241416309012875, "grad_norm": 0.408203125, "learning_rate": 4.987826256371541e-06, "loss": 2.3717, "step": 5637 }, { "epoch": 0.3024678111587983, "grad_norm": 0.384765625, "learning_rate": 4.987817691790198e-06, "loss": 2.2898, "step": 5638 }, { "epoch": 0.3025214592274678, "grad_norm": 0.328125, "learning_rate": 4.987809124204557e-06, "loss": 2.216, "step": 5639 }, { "epoch": 0.30257510729613735, "grad_norm": 0.396484375, "learning_rate": 4.987800553614628e-06, "loss": 2.5659, "step": 5640 }, { "epoch": 0.3026287553648069, "grad_norm": 0.447265625, "learning_rate": 4.987791980020421e-06, "loss": 2.2881, "step": 5641 }, { "epoch": 0.3026824034334764, "grad_norm": 0.41015625, "learning_rate": 4.9877834034219486e-06, "loss": 2.5894, "step": 5642 }, { "epoch": 0.30273605150214594, "grad_norm": 0.3671875, "learning_rate": 4.987774823819219e-06, "loss": 2.3253, "step": 5643 }, { "epoch": 0.3027896995708155, "grad_norm": 0.34765625, "learning_rate": 4.987766241212243e-06, "loss": 2.2499, "step": 5644 }, { "epoch": 0.302843347639485, "grad_norm": 0.5703125, "learning_rate": 4.987757655601031e-06, "loss": 2.0465, "step": 5645 }, { "epoch": 0.3028969957081545, "grad_norm": 0.47265625, "learning_rate": 4.987749066985594e-06, "loss": 2.4995, "step": 5646 }, { "epoch": 0.302950643776824, "grad_norm": 0.37109375, "learning_rate": 4.987740475365942e-06, "loss": 2.4702, "step": 5647 }, { "epoch": 0.30300429184549355, "grad_norm": 0.333984375, "learning_rate": 4.987731880742085e-06, "loss": 2.3058, "step": 5648 }, { "epoch": 0.3030579399141631, "grad_norm": 0.41796875, "learning_rate": 4.987723283114033e-06, "loss": 2.3935, "step": 5649 }, { "epoch": 0.3031115879828326, "grad_norm": 0.412109375, "learning_rate": 4.987714682481798e-06, "loss": 2.3572, "step": 5650 }, { "epoch": 0.30316523605150214, "grad_norm": 0.408203125, "learning_rate": 4.987706078845389e-06, "loss": 2.4312, "step": 5651 }, { "epoch": 0.3032188841201717, "grad_norm": 0.404296875, "learning_rate": 4.9876974722048175e-06, "loss": 2.5378, "step": 5652 }, { "epoch": 0.3032725321888412, "grad_norm": 0.3828125, "learning_rate": 4.987688862560092e-06, "loss": 2.1198, "step": 5653 }, { "epoch": 0.30332618025751074, "grad_norm": 0.376953125, "learning_rate": 4.987680249911226e-06, "loss": 2.3307, "step": 5654 }, { "epoch": 0.30337982832618027, "grad_norm": 0.390625, "learning_rate": 4.987671634258226e-06, "loss": 2.2746, "step": 5655 }, { "epoch": 0.3034334763948498, "grad_norm": 0.578125, "learning_rate": 4.987663015601105e-06, "loss": 1.4796, "step": 5656 }, { "epoch": 0.30348712446351933, "grad_norm": 0.43359375, "learning_rate": 4.9876543939398734e-06, "loss": 2.6031, "step": 5657 }, { "epoch": 0.30354077253218886, "grad_norm": 0.396484375, "learning_rate": 4.987645769274541e-06, "loss": 2.3557, "step": 5658 }, { "epoch": 0.3035944206008584, "grad_norm": 0.361328125, "learning_rate": 4.987637141605117e-06, "loss": 2.2857, "step": 5659 }, { "epoch": 0.30364806866952787, "grad_norm": 0.361328125, "learning_rate": 4.987628510931614e-06, "loss": 2.3047, "step": 5660 }, { "epoch": 0.3037017167381974, "grad_norm": 0.341796875, "learning_rate": 4.987619877254042e-06, "loss": 2.1268, "step": 5661 }, { "epoch": 0.30375536480686693, "grad_norm": 0.455078125, "learning_rate": 4.98761124057241e-06, "loss": 2.5907, "step": 5662 }, { "epoch": 0.30380901287553647, "grad_norm": 0.419921875, "learning_rate": 4.98760260088673e-06, "loss": 2.439, "step": 5663 }, { "epoch": 0.303862660944206, "grad_norm": 0.451171875, "learning_rate": 4.987593958197011e-06, "loss": 2.357, "step": 5664 }, { "epoch": 0.30391630901287553, "grad_norm": 0.447265625, "learning_rate": 4.987585312503265e-06, "loss": 2.3085, "step": 5665 }, { "epoch": 0.30396995708154506, "grad_norm": 0.451171875, "learning_rate": 4.987576663805501e-06, "loss": 2.1313, "step": 5666 }, { "epoch": 0.3040236051502146, "grad_norm": 0.84375, "learning_rate": 4.987568012103731e-06, "loss": 2.2944, "step": 5667 }, { "epoch": 0.3040772532188841, "grad_norm": 0.326171875, "learning_rate": 4.987559357397963e-06, "loss": 2.016, "step": 5668 }, { "epoch": 0.30413090128755366, "grad_norm": 0.5546875, "learning_rate": 4.98755069968821e-06, "loss": 1.7294, "step": 5669 }, { "epoch": 0.3041845493562232, "grad_norm": 0.314453125, "learning_rate": 4.987542038974482e-06, "loss": 2.2395, "step": 5670 }, { "epoch": 0.3042381974248927, "grad_norm": 0.376953125, "learning_rate": 4.987533375256789e-06, "loss": 2.4964, "step": 5671 }, { "epoch": 0.30429184549356225, "grad_norm": 0.427734375, "learning_rate": 4.9875247085351405e-06, "loss": 2.5126, "step": 5672 }, { "epoch": 0.3043454935622318, "grad_norm": 0.53125, "learning_rate": 4.9875160388095476e-06, "loss": 2.3241, "step": 5673 }, { "epoch": 0.30439914163090126, "grad_norm": 0.392578125, "learning_rate": 4.987507366080022e-06, "loss": 2.4754, "step": 5674 }, { "epoch": 0.3044527896995708, "grad_norm": 0.3828125, "learning_rate": 4.987498690346572e-06, "loss": 2.3824, "step": 5675 }, { "epoch": 0.3045064377682403, "grad_norm": 0.34375, "learning_rate": 4.98749001160921e-06, "loss": 2.2384, "step": 5676 }, { "epoch": 0.30456008583690986, "grad_norm": 0.337890625, "learning_rate": 4.987481329867946e-06, "loss": 2.2222, "step": 5677 }, { "epoch": 0.3046137339055794, "grad_norm": 0.361328125, "learning_rate": 4.987472645122789e-06, "loss": 2.3546, "step": 5678 }, { "epoch": 0.3046673819742489, "grad_norm": 0.375, "learning_rate": 4.987463957373753e-06, "loss": 2.3351, "step": 5679 }, { "epoch": 0.30472103004291845, "grad_norm": 0.390625, "learning_rate": 4.987455266620844e-06, "loss": 1.9552, "step": 5680 }, { "epoch": 0.304774678111588, "grad_norm": 0.390625, "learning_rate": 4.987446572864075e-06, "loss": 1.9724, "step": 5681 }, { "epoch": 0.3048283261802575, "grad_norm": 1.0703125, "learning_rate": 4.987437876103457e-06, "loss": 2.3785, "step": 5682 }, { "epoch": 0.30488197424892705, "grad_norm": 0.50390625, "learning_rate": 4.987429176338999e-06, "loss": 2.1969, "step": 5683 }, { "epoch": 0.3049356223175966, "grad_norm": 0.6484375, "learning_rate": 4.987420473570713e-06, "loss": 2.4466, "step": 5684 }, { "epoch": 0.3049892703862661, "grad_norm": 0.384765625, "learning_rate": 4.987411767798609e-06, "loss": 2.2807, "step": 5685 }, { "epoch": 0.30504291845493564, "grad_norm": 0.380859375, "learning_rate": 4.987403059022696e-06, "loss": 2.5399, "step": 5686 }, { "epoch": 0.3050965665236052, "grad_norm": 0.375, "learning_rate": 4.9873943472429865e-06, "loss": 2.0847, "step": 5687 }, { "epoch": 0.3051502145922747, "grad_norm": 0.703125, "learning_rate": 4.98738563245949e-06, "loss": 2.2599, "step": 5688 }, { "epoch": 0.3052038626609442, "grad_norm": 0.40234375, "learning_rate": 4.987376914672217e-06, "loss": 2.434, "step": 5689 }, { "epoch": 0.3052575107296137, "grad_norm": 0.298828125, "learning_rate": 4.987368193881179e-06, "loss": 2.3485, "step": 5690 }, { "epoch": 0.30531115879828324, "grad_norm": 0.330078125, "learning_rate": 4.987359470086385e-06, "loss": 2.2319, "step": 5691 }, { "epoch": 0.3053648068669528, "grad_norm": 0.396484375, "learning_rate": 4.987350743287848e-06, "loss": 2.3269, "step": 5692 }, { "epoch": 0.3054184549356223, "grad_norm": 0.375, "learning_rate": 4.9873420134855755e-06, "loss": 2.4362, "step": 5693 }, { "epoch": 0.30547210300429184, "grad_norm": 0.416015625, "learning_rate": 4.98733328067958e-06, "loss": 2.5959, "step": 5694 }, { "epoch": 0.30552575107296137, "grad_norm": 0.443359375, "learning_rate": 4.98732454486987e-06, "loss": 2.3217, "step": 5695 }, { "epoch": 0.3055793991416309, "grad_norm": 0.412109375, "learning_rate": 4.9873158060564594e-06, "loss": 2.3206, "step": 5696 }, { "epoch": 0.30563304721030043, "grad_norm": 0.376953125, "learning_rate": 4.987307064239356e-06, "loss": 2.1767, "step": 5697 }, { "epoch": 0.30568669527896997, "grad_norm": 0.384765625, "learning_rate": 4.987298319418572e-06, "loss": 2.1092, "step": 5698 }, { "epoch": 0.3057403433476395, "grad_norm": 0.3671875, "learning_rate": 4.9872895715941174e-06, "loss": 2.2966, "step": 5699 }, { "epoch": 0.30579399141630903, "grad_norm": 0.408203125, "learning_rate": 4.9872808207660015e-06, "loss": 2.2876, "step": 5700 }, { "epoch": 0.30584763948497856, "grad_norm": 0.35546875, "learning_rate": 4.9872720669342365e-06, "loss": 2.2152, "step": 5701 }, { "epoch": 0.3059012875536481, "grad_norm": 0.47265625, "learning_rate": 4.987263310098832e-06, "loss": 2.2315, "step": 5702 }, { "epoch": 0.30595493562231757, "grad_norm": 0.458984375, "learning_rate": 4.9872545502598e-06, "loss": 2.4234, "step": 5703 }, { "epoch": 0.3060085836909871, "grad_norm": 0.408203125, "learning_rate": 4.987245787417149e-06, "loss": 2.2367, "step": 5704 }, { "epoch": 0.30606223175965663, "grad_norm": 0.46484375, "learning_rate": 4.9872370215708915e-06, "loss": 2.3065, "step": 5705 }, { "epoch": 0.30611587982832617, "grad_norm": 0.52734375, "learning_rate": 4.987228252721037e-06, "loss": 2.2953, "step": 5706 }, { "epoch": 0.3061695278969957, "grad_norm": 0.390625, "learning_rate": 4.987219480867596e-06, "loss": 2.3364, "step": 5707 }, { "epoch": 0.30622317596566523, "grad_norm": 0.33203125, "learning_rate": 4.98721070601058e-06, "loss": 2.3334, "step": 5708 }, { "epoch": 0.30627682403433476, "grad_norm": 0.36328125, "learning_rate": 4.987201928149998e-06, "loss": 2.1495, "step": 5709 }, { "epoch": 0.3063304721030043, "grad_norm": 0.40625, "learning_rate": 4.987193147285863e-06, "loss": 2.5053, "step": 5710 }, { "epoch": 0.3063841201716738, "grad_norm": 0.5625, "learning_rate": 4.987184363418184e-06, "loss": 2.2766, "step": 5711 }, { "epoch": 0.30643776824034336, "grad_norm": 0.353515625, "learning_rate": 4.987175576546971e-06, "loss": 2.1887, "step": 5712 }, { "epoch": 0.3064914163090129, "grad_norm": 0.421875, "learning_rate": 4.9871667866722366e-06, "loss": 2.1731, "step": 5713 }, { "epoch": 0.3065450643776824, "grad_norm": 0.384765625, "learning_rate": 4.987157993793988e-06, "loss": 2.061, "step": 5714 }, { "epoch": 0.30659871244635195, "grad_norm": 0.4609375, "learning_rate": 4.987149197912241e-06, "loss": 2.2199, "step": 5715 }, { "epoch": 0.3066523605150215, "grad_norm": 0.40625, "learning_rate": 4.9871403990270015e-06, "loss": 2.4127, "step": 5716 }, { "epoch": 0.306706008583691, "grad_norm": 0.357421875, "learning_rate": 4.987131597138283e-06, "loss": 2.4217, "step": 5717 }, { "epoch": 0.3067596566523605, "grad_norm": 0.4609375, "learning_rate": 4.987122792246094e-06, "loss": 2.2901, "step": 5718 }, { "epoch": 0.30681330472103, "grad_norm": 0.388671875, "learning_rate": 4.987113984350447e-06, "loss": 2.3649, "step": 5719 }, { "epoch": 0.30686695278969955, "grad_norm": 0.37109375, "learning_rate": 4.987105173451351e-06, "loss": 2.2748, "step": 5720 }, { "epoch": 0.3069206008583691, "grad_norm": 0.38671875, "learning_rate": 4.987096359548817e-06, "loss": 2.2874, "step": 5721 }, { "epoch": 0.3069742489270386, "grad_norm": 0.3671875, "learning_rate": 4.987087542642858e-06, "loss": 2.531, "step": 5722 }, { "epoch": 0.30702789699570815, "grad_norm": 0.8046875, "learning_rate": 4.987078722733482e-06, "loss": 2.3986, "step": 5723 }, { "epoch": 0.3070815450643777, "grad_norm": 0.392578125, "learning_rate": 4.9870698998207e-06, "loss": 2.3885, "step": 5724 }, { "epoch": 0.3071351931330472, "grad_norm": 0.322265625, "learning_rate": 4.987061073904523e-06, "loss": 2.1421, "step": 5725 }, { "epoch": 0.30718884120171674, "grad_norm": 0.44140625, "learning_rate": 4.987052244984962e-06, "loss": 2.321, "step": 5726 }, { "epoch": 0.3072424892703863, "grad_norm": 0.35546875, "learning_rate": 4.987043413062028e-06, "loss": 2.33, "step": 5727 }, { "epoch": 0.3072961373390558, "grad_norm": 0.37109375, "learning_rate": 4.98703457813573e-06, "loss": 2.2865, "step": 5728 }, { "epoch": 0.30734978540772534, "grad_norm": 0.4296875, "learning_rate": 4.987025740206081e-06, "loss": 2.345, "step": 5729 }, { "epoch": 0.30740343347639487, "grad_norm": 0.388671875, "learning_rate": 4.98701689927309e-06, "loss": 2.261, "step": 5730 }, { "epoch": 0.3074570815450644, "grad_norm": 0.3828125, "learning_rate": 4.987008055336767e-06, "loss": 2.2233, "step": 5731 }, { "epoch": 0.3075107296137339, "grad_norm": 0.400390625, "learning_rate": 4.986999208397124e-06, "loss": 2.4403, "step": 5732 }, { "epoch": 0.3075643776824034, "grad_norm": 0.443359375, "learning_rate": 4.986990358454172e-06, "loss": 2.2274, "step": 5733 }, { "epoch": 0.30761802575107294, "grad_norm": 0.6640625, "learning_rate": 4.986981505507921e-06, "loss": 2.0065, "step": 5734 }, { "epoch": 0.3076716738197425, "grad_norm": 0.3203125, "learning_rate": 4.986972649558381e-06, "loss": 1.9428, "step": 5735 }, { "epoch": 0.307725321888412, "grad_norm": 0.40234375, "learning_rate": 4.986963790605565e-06, "loss": 2.1853, "step": 5736 }, { "epoch": 0.30777896995708154, "grad_norm": 0.349609375, "learning_rate": 4.986954928649481e-06, "loss": 2.4052, "step": 5737 }, { "epoch": 0.30783261802575107, "grad_norm": 0.400390625, "learning_rate": 4.9869460636901425e-06, "loss": 1.9517, "step": 5738 }, { "epoch": 0.3078862660944206, "grad_norm": 0.4296875, "learning_rate": 4.986937195727557e-06, "loss": 2.3291, "step": 5739 }, { "epoch": 0.30793991416309013, "grad_norm": 0.7109375, "learning_rate": 4.986928324761737e-06, "loss": 2.5004, "step": 5740 }, { "epoch": 0.30799356223175967, "grad_norm": 0.326171875, "learning_rate": 4.986919450792694e-06, "loss": 2.133, "step": 5741 }, { "epoch": 0.3080472103004292, "grad_norm": 0.412109375, "learning_rate": 4.986910573820437e-06, "loss": 2.3774, "step": 5742 }, { "epoch": 0.30810085836909873, "grad_norm": 0.3828125, "learning_rate": 4.986901693844978e-06, "loss": 2.1724, "step": 5743 }, { "epoch": 0.30815450643776826, "grad_norm": 0.427734375, "learning_rate": 4.986892810866326e-06, "loss": 2.2415, "step": 5744 }, { "epoch": 0.3082081545064378, "grad_norm": 0.396484375, "learning_rate": 4.986883924884494e-06, "loss": 2.2236, "step": 5745 }, { "epoch": 0.30826180257510727, "grad_norm": 0.353515625, "learning_rate": 4.986875035899491e-06, "loss": 2.2123, "step": 5746 }, { "epoch": 0.3083154506437768, "grad_norm": 0.392578125, "learning_rate": 4.986866143911329e-06, "loss": 2.1365, "step": 5747 }, { "epoch": 0.30836909871244633, "grad_norm": 0.53125, "learning_rate": 4.986857248920019e-06, "loss": 2.5949, "step": 5748 }, { "epoch": 0.30842274678111586, "grad_norm": 0.40234375, "learning_rate": 4.986848350925569e-06, "loss": 2.3539, "step": 5749 }, { "epoch": 0.3084763948497854, "grad_norm": 0.39453125, "learning_rate": 4.986839449927992e-06, "loss": 2.183, "step": 5750 }, { "epoch": 0.3085300429184549, "grad_norm": 0.462890625, "learning_rate": 4.986830545927299e-06, "loss": 2.416, "step": 5751 }, { "epoch": 0.30858369098712446, "grad_norm": 0.35546875, "learning_rate": 4.9868216389235e-06, "loss": 2.2651, "step": 5752 }, { "epoch": 0.308637339055794, "grad_norm": 0.48046875, "learning_rate": 4.9868127289166055e-06, "loss": 2.1406, "step": 5753 }, { "epoch": 0.3086909871244635, "grad_norm": 0.4296875, "learning_rate": 4.986803815906627e-06, "loss": 2.4142, "step": 5754 }, { "epoch": 0.30874463519313305, "grad_norm": 0.474609375, "learning_rate": 4.9867948998935745e-06, "loss": 1.4018, "step": 5755 }, { "epoch": 0.3087982832618026, "grad_norm": 0.4140625, "learning_rate": 4.986785980877459e-06, "loss": 2.269, "step": 5756 }, { "epoch": 0.3088519313304721, "grad_norm": 4.0625, "learning_rate": 4.986777058858292e-06, "loss": 2.2237, "step": 5757 }, { "epoch": 0.30890557939914165, "grad_norm": 0.396484375, "learning_rate": 4.986768133836084e-06, "loss": 2.2506, "step": 5758 }, { "epoch": 0.3089592274678112, "grad_norm": 0.380859375, "learning_rate": 4.986759205810845e-06, "loss": 2.3742, "step": 5759 }, { "epoch": 0.3090128755364807, "grad_norm": 0.37109375, "learning_rate": 4.986750274782586e-06, "loss": 2.2396, "step": 5760 }, { "epoch": 0.3090665236051502, "grad_norm": 0.4921875, "learning_rate": 4.986741340751319e-06, "loss": 2.2061, "step": 5761 }, { "epoch": 0.3091201716738197, "grad_norm": 0.40234375, "learning_rate": 4.986732403717053e-06, "loss": 2.218, "step": 5762 }, { "epoch": 0.30917381974248925, "grad_norm": 0.451171875, "learning_rate": 4.986723463679799e-06, "loss": 2.0223, "step": 5763 }, { "epoch": 0.3092274678111588, "grad_norm": 0.49609375, "learning_rate": 4.986714520639569e-06, "loss": 2.5558, "step": 5764 }, { "epoch": 0.3092811158798283, "grad_norm": 0.33203125, "learning_rate": 4.986705574596374e-06, "loss": 2.1916, "step": 5765 }, { "epoch": 0.30933476394849785, "grad_norm": 2.421875, "learning_rate": 4.986696625550223e-06, "loss": 2.251, "step": 5766 }, { "epoch": 0.3093884120171674, "grad_norm": 0.482421875, "learning_rate": 4.986687673501129e-06, "loss": 2.5159, "step": 5767 }, { "epoch": 0.3094420600858369, "grad_norm": 0.408203125, "learning_rate": 4.9866787184491e-06, "loss": 2.5296, "step": 5768 }, { "epoch": 0.30949570815450644, "grad_norm": 0.51953125, "learning_rate": 4.98666976039415e-06, "loss": 2.5407, "step": 5769 }, { "epoch": 0.309549356223176, "grad_norm": 0.421875, "learning_rate": 4.986660799336288e-06, "loss": 2.4714, "step": 5770 }, { "epoch": 0.3096030042918455, "grad_norm": 0.3203125, "learning_rate": 4.9866518352755245e-06, "loss": 2.2583, "step": 5771 }, { "epoch": 0.30965665236051504, "grad_norm": 0.384765625, "learning_rate": 4.986642868211872e-06, "loss": 2.2943, "step": 5772 }, { "epoch": 0.30971030042918457, "grad_norm": 0.41015625, "learning_rate": 4.986633898145339e-06, "loss": 2.1909, "step": 5773 }, { "epoch": 0.3097639484978541, "grad_norm": 0.69140625, "learning_rate": 4.986624925075938e-06, "loss": 2.4148, "step": 5774 }, { "epoch": 0.3098175965665236, "grad_norm": 0.40234375, "learning_rate": 4.9866159490036795e-06, "loss": 2.3048, "step": 5775 }, { "epoch": 0.3098712446351931, "grad_norm": 0.38671875, "learning_rate": 4.986606969928574e-06, "loss": 2.2982, "step": 5776 }, { "epoch": 0.30992489270386264, "grad_norm": 0.427734375, "learning_rate": 4.986597987850633e-06, "loss": 2.6443, "step": 5777 }, { "epoch": 0.3099785407725322, "grad_norm": 0.43359375, "learning_rate": 4.986589002769867e-06, "loss": 2.3696, "step": 5778 }, { "epoch": 0.3100321888412017, "grad_norm": 0.34375, "learning_rate": 4.986580014686287e-06, "loss": 2.3792, "step": 5779 }, { "epoch": 0.31008583690987124, "grad_norm": 0.37890625, "learning_rate": 4.986571023599903e-06, "loss": 2.3313, "step": 5780 }, { "epoch": 0.31013948497854077, "grad_norm": 0.3984375, "learning_rate": 4.986562029510726e-06, "loss": 2.2198, "step": 5781 }, { "epoch": 0.3101931330472103, "grad_norm": 0.416015625, "learning_rate": 4.9865530324187686e-06, "loss": 2.2028, "step": 5782 }, { "epoch": 0.31024678111587983, "grad_norm": 0.328125, "learning_rate": 4.98654403232404e-06, "loss": 2.2236, "step": 5783 }, { "epoch": 0.31030042918454936, "grad_norm": 0.408203125, "learning_rate": 4.986535029226551e-06, "loss": 2.1303, "step": 5784 }, { "epoch": 0.3103540772532189, "grad_norm": 0.4375, "learning_rate": 4.9865260231263135e-06, "loss": 2.3633, "step": 5785 }, { "epoch": 0.31040772532188843, "grad_norm": 0.4921875, "learning_rate": 4.986517014023338e-06, "loss": 2.4137, "step": 5786 }, { "epoch": 0.31046137339055796, "grad_norm": 0.390625, "learning_rate": 4.986508001917635e-06, "loss": 2.3615, "step": 5787 }, { "epoch": 0.3105150214592275, "grad_norm": 0.384765625, "learning_rate": 4.986498986809216e-06, "loss": 2.3853, "step": 5788 }, { "epoch": 0.310568669527897, "grad_norm": 0.400390625, "learning_rate": 4.986489968698091e-06, "loss": 2.3479, "step": 5789 }, { "epoch": 0.3106223175965665, "grad_norm": 0.341796875, "learning_rate": 4.9864809475842715e-06, "loss": 2.2468, "step": 5790 }, { "epoch": 0.31067596566523603, "grad_norm": 0.353515625, "learning_rate": 4.986471923467768e-06, "loss": 2.3263, "step": 5791 }, { "epoch": 0.31072961373390556, "grad_norm": 0.3984375, "learning_rate": 4.986462896348593e-06, "loss": 2.6901, "step": 5792 }, { "epoch": 0.3107832618025751, "grad_norm": 0.390625, "learning_rate": 4.986453866226755e-06, "loss": 2.2894, "step": 5793 }, { "epoch": 0.3108369098712446, "grad_norm": 0.337890625, "learning_rate": 4.986444833102265e-06, "loss": 2.1175, "step": 5794 }, { "epoch": 0.31089055793991416, "grad_norm": 0.361328125, "learning_rate": 4.9864357969751374e-06, "loss": 2.2978, "step": 5795 }, { "epoch": 0.3109442060085837, "grad_norm": 7.6875, "learning_rate": 4.9864267578453785e-06, "loss": 2.0307, "step": 5796 }, { "epoch": 0.3109978540772532, "grad_norm": 0.76953125, "learning_rate": 4.986417715713002e-06, "loss": 2.1174, "step": 5797 }, { "epoch": 0.31105150214592275, "grad_norm": 0.400390625, "learning_rate": 4.986408670578018e-06, "loss": 2.0813, "step": 5798 }, { "epoch": 0.3111051502145923, "grad_norm": 0.3828125, "learning_rate": 4.986399622440438e-06, "loss": 2.1931, "step": 5799 }, { "epoch": 0.3111587982832618, "grad_norm": 0.41796875, "learning_rate": 4.986390571300272e-06, "loss": 2.4634, "step": 5800 }, { "epoch": 0.31121244635193135, "grad_norm": 0.373046875, "learning_rate": 4.986381517157533e-06, "loss": 2.3993, "step": 5801 }, { "epoch": 0.3112660944206009, "grad_norm": 0.419921875, "learning_rate": 4.9863724600122285e-06, "loss": 2.5063, "step": 5802 }, { "epoch": 0.3113197424892704, "grad_norm": 0.4453125, "learning_rate": 4.986363399864372e-06, "loss": 2.3504, "step": 5803 }, { "epoch": 0.3113733905579399, "grad_norm": 0.40234375, "learning_rate": 4.986354336713973e-06, "loss": 2.4998, "step": 5804 }, { "epoch": 0.3114270386266094, "grad_norm": 0.453125, "learning_rate": 4.986345270561044e-06, "loss": 2.4202, "step": 5805 }, { "epoch": 0.31148068669527895, "grad_norm": 0.375, "learning_rate": 4.986336201405595e-06, "loss": 2.0803, "step": 5806 }, { "epoch": 0.3115343347639485, "grad_norm": 0.365234375, "learning_rate": 4.986327129247637e-06, "loss": 2.3713, "step": 5807 }, { "epoch": 0.311587982832618, "grad_norm": 0.65625, "learning_rate": 4.986318054087181e-06, "loss": 2.3517, "step": 5808 }, { "epoch": 0.31164163090128755, "grad_norm": 0.3984375, "learning_rate": 4.986308975924238e-06, "loss": 2.1676, "step": 5809 }, { "epoch": 0.3116952789699571, "grad_norm": 0.546875, "learning_rate": 4.986299894758819e-06, "loss": 2.2835, "step": 5810 }, { "epoch": 0.3117489270386266, "grad_norm": 0.37890625, "learning_rate": 4.9862908105909354e-06, "loss": 2.304, "step": 5811 }, { "epoch": 0.31180257510729614, "grad_norm": 0.408203125, "learning_rate": 4.9862817234205975e-06, "loss": 2.3045, "step": 5812 }, { "epoch": 0.3118562231759657, "grad_norm": 0.53125, "learning_rate": 4.986272633247817e-06, "loss": 2.5095, "step": 5813 }, { "epoch": 0.3119098712446352, "grad_norm": 0.375, "learning_rate": 4.986263540072602e-06, "loss": 2.4629, "step": 5814 }, { "epoch": 0.31196351931330474, "grad_norm": 0.3359375, "learning_rate": 4.986254443894967e-06, "loss": 2.4672, "step": 5815 }, { "epoch": 0.31201716738197427, "grad_norm": 0.34375, "learning_rate": 4.986245344714924e-06, "loss": 2.2185, "step": 5816 }, { "epoch": 0.3120708154506438, "grad_norm": 0.453125, "learning_rate": 4.986236242532479e-06, "loss": 2.3174, "step": 5817 }, { "epoch": 0.3121244635193133, "grad_norm": 0.43359375, "learning_rate": 4.986227137347647e-06, "loss": 2.38, "step": 5818 }, { "epoch": 0.3121781115879828, "grad_norm": 0.57421875, "learning_rate": 4.986218029160438e-06, "loss": 2.2775, "step": 5819 }, { "epoch": 0.31223175965665234, "grad_norm": 0.357421875, "learning_rate": 4.986208917970862e-06, "loss": 2.054, "step": 5820 }, { "epoch": 0.3122854077253219, "grad_norm": 0.61328125, "learning_rate": 4.9861998037789315e-06, "loss": 2.1573, "step": 5821 }, { "epoch": 0.3123390557939914, "grad_norm": 0.423828125, "learning_rate": 4.986190686584657e-06, "loss": 2.336, "step": 5822 }, { "epoch": 0.31239270386266094, "grad_norm": 0.33984375, "learning_rate": 4.986181566388049e-06, "loss": 2.2439, "step": 5823 }, { "epoch": 0.31244635193133047, "grad_norm": 0.70703125, "learning_rate": 4.986172443189118e-06, "loss": 2.1433, "step": 5824 }, { "epoch": 0.3125, "grad_norm": 0.369140625, "learning_rate": 4.986163316987877e-06, "loss": 2.1988, "step": 5825 }, { "epoch": 0.31255364806866953, "grad_norm": 0.419921875, "learning_rate": 4.9861541877843345e-06, "loss": 2.5071, "step": 5826 }, { "epoch": 0.31260729613733906, "grad_norm": 0.55078125, "learning_rate": 4.986145055578504e-06, "loss": 2.2507, "step": 5827 }, { "epoch": 0.3126609442060086, "grad_norm": 0.412109375, "learning_rate": 4.986135920370395e-06, "loss": 2.4206, "step": 5828 }, { "epoch": 0.3127145922746781, "grad_norm": 0.328125, "learning_rate": 4.986126782160019e-06, "loss": 1.9057, "step": 5829 }, { "epoch": 0.31276824034334766, "grad_norm": 0.390625, "learning_rate": 4.986117640947388e-06, "loss": 2.3605, "step": 5830 }, { "epoch": 0.3128218884120172, "grad_norm": 0.376953125, "learning_rate": 4.986108496732511e-06, "loss": 2.4069, "step": 5831 }, { "epoch": 0.3128755364806867, "grad_norm": 0.3671875, "learning_rate": 4.9860993495154e-06, "loss": 2.3195, "step": 5832 }, { "epoch": 0.3129291845493562, "grad_norm": 0.3828125, "learning_rate": 4.986090199296066e-06, "loss": 2.3338, "step": 5833 }, { "epoch": 0.31298283261802573, "grad_norm": 0.65625, "learning_rate": 4.98608104607452e-06, "loss": 2.3931, "step": 5834 }, { "epoch": 0.31303648068669526, "grad_norm": 0.396484375, "learning_rate": 4.986071889850775e-06, "loss": 2.3927, "step": 5835 }, { "epoch": 0.3130901287553648, "grad_norm": 0.546875, "learning_rate": 4.986062730624838e-06, "loss": 2.3354, "step": 5836 }, { "epoch": 0.3131437768240343, "grad_norm": 0.435546875, "learning_rate": 4.986053568396723e-06, "loss": 2.4742, "step": 5837 }, { "epoch": 0.31319742489270386, "grad_norm": 0.365234375, "learning_rate": 4.986044403166441e-06, "loss": 2.3433, "step": 5838 }, { "epoch": 0.3132510729613734, "grad_norm": 0.490234375, "learning_rate": 4.986035234934002e-06, "loss": 1.7634, "step": 5839 }, { "epoch": 0.3133047210300429, "grad_norm": 0.326171875, "learning_rate": 4.9860260636994175e-06, "loss": 2.094, "step": 5840 }, { "epoch": 0.31335836909871245, "grad_norm": 0.421875, "learning_rate": 4.9860168894626985e-06, "loss": 2.388, "step": 5841 }, { "epoch": 0.313412017167382, "grad_norm": 0.326171875, "learning_rate": 4.986007712223857e-06, "loss": 2.098, "step": 5842 }, { "epoch": 0.3134656652360515, "grad_norm": 0.35546875, "learning_rate": 4.985998531982902e-06, "loss": 2.3066, "step": 5843 }, { "epoch": 0.31351931330472105, "grad_norm": 0.33984375, "learning_rate": 4.985989348739847e-06, "loss": 2.2516, "step": 5844 }, { "epoch": 0.3135729613733906, "grad_norm": 1.0546875, "learning_rate": 4.985980162494701e-06, "loss": 2.3719, "step": 5845 }, { "epoch": 0.3136266094420601, "grad_norm": 0.423828125, "learning_rate": 4.9859709732474775e-06, "loss": 2.3339, "step": 5846 }, { "epoch": 0.3136802575107296, "grad_norm": 0.435546875, "learning_rate": 4.985961780998184e-06, "loss": 2.1477, "step": 5847 }, { "epoch": 0.3137339055793991, "grad_norm": 0.478515625, "learning_rate": 4.985952585746835e-06, "loss": 2.2111, "step": 5848 }, { "epoch": 0.31378755364806865, "grad_norm": 0.314453125, "learning_rate": 4.985943387493441e-06, "loss": 2.1985, "step": 5849 }, { "epoch": 0.3138412017167382, "grad_norm": 0.447265625, "learning_rate": 4.985934186238012e-06, "loss": 2.461, "step": 5850 }, { "epoch": 0.3138948497854077, "grad_norm": 0.3828125, "learning_rate": 4.985924981980559e-06, "loss": 2.3494, "step": 5851 }, { "epoch": 0.31394849785407725, "grad_norm": 0.41015625, "learning_rate": 4.985915774721093e-06, "loss": 2.364, "step": 5852 }, { "epoch": 0.3140021459227468, "grad_norm": 0.41796875, "learning_rate": 4.985906564459627e-06, "loss": 2.1572, "step": 5853 }, { "epoch": 0.3140557939914163, "grad_norm": 0.470703125, "learning_rate": 4.9858973511961705e-06, "loss": 2.4252, "step": 5854 }, { "epoch": 0.31410944206008584, "grad_norm": 0.345703125, "learning_rate": 4.985888134930735e-06, "loss": 2.2127, "step": 5855 }, { "epoch": 0.3141630901287554, "grad_norm": 0.326171875, "learning_rate": 4.9858789156633325e-06, "loss": 2.1703, "step": 5856 }, { "epoch": 0.3142167381974249, "grad_norm": 0.40234375, "learning_rate": 4.985869693393973e-06, "loss": 2.3034, "step": 5857 }, { "epoch": 0.31427038626609444, "grad_norm": 0.390625, "learning_rate": 4.9858604681226675e-06, "loss": 2.4143, "step": 5858 }, { "epoch": 0.31432403433476397, "grad_norm": 0.416015625, "learning_rate": 4.985851239849427e-06, "loss": 2.4208, "step": 5859 }, { "epoch": 0.3143776824034335, "grad_norm": 0.419921875, "learning_rate": 4.985842008574264e-06, "loss": 2.3453, "step": 5860 }, { "epoch": 0.314431330472103, "grad_norm": 0.44140625, "learning_rate": 4.985832774297189e-06, "loss": 2.2741, "step": 5861 }, { "epoch": 0.3144849785407725, "grad_norm": 0.427734375, "learning_rate": 4.985823537018213e-06, "loss": 2.2315, "step": 5862 }, { "epoch": 0.31453862660944204, "grad_norm": 0.4140625, "learning_rate": 4.985814296737347e-06, "loss": 2.1982, "step": 5863 }, { "epoch": 0.31459227467811157, "grad_norm": 0.34765625, "learning_rate": 4.985805053454602e-06, "loss": 2.2574, "step": 5864 }, { "epoch": 0.3146459227467811, "grad_norm": 0.35546875, "learning_rate": 4.9857958071699905e-06, "loss": 2.1686, "step": 5865 }, { "epoch": 0.31469957081545064, "grad_norm": 0.41015625, "learning_rate": 4.9857865578835215e-06, "loss": 1.9984, "step": 5866 }, { "epoch": 0.31475321888412017, "grad_norm": 0.8125, "learning_rate": 4.985777305595208e-06, "loss": 2.1048, "step": 5867 }, { "epoch": 0.3148068669527897, "grad_norm": 0.384765625, "learning_rate": 4.98576805030506e-06, "loss": 2.1385, "step": 5868 }, { "epoch": 0.31486051502145923, "grad_norm": 0.388671875, "learning_rate": 4.98575879201309e-06, "loss": 2.1769, "step": 5869 }, { "epoch": 0.31491416309012876, "grad_norm": 0.3515625, "learning_rate": 4.9857495307193074e-06, "loss": 2.227, "step": 5870 }, { "epoch": 0.3149678111587983, "grad_norm": 0.5, "learning_rate": 4.985740266423725e-06, "loss": 2.3731, "step": 5871 }, { "epoch": 0.3150214592274678, "grad_norm": 0.69140625, "learning_rate": 4.9857309991263535e-06, "loss": 2.2409, "step": 5872 }, { "epoch": 0.31507510729613736, "grad_norm": 0.396484375, "learning_rate": 4.9857217288272035e-06, "loss": 2.1506, "step": 5873 }, { "epoch": 0.3151287553648069, "grad_norm": 0.439453125, "learning_rate": 4.985712455526287e-06, "loss": 2.2465, "step": 5874 }, { "epoch": 0.3151824034334764, "grad_norm": 0.37890625, "learning_rate": 4.985703179223614e-06, "loss": 2.4254, "step": 5875 }, { "epoch": 0.3152360515021459, "grad_norm": 0.458984375, "learning_rate": 4.985693899919197e-06, "loss": 2.3782, "step": 5876 }, { "epoch": 0.31528969957081543, "grad_norm": 0.419921875, "learning_rate": 4.985684617613048e-06, "loss": 2.4533, "step": 5877 }, { "epoch": 0.31534334763948496, "grad_norm": 0.384765625, "learning_rate": 4.985675332305175e-06, "loss": 2.4882, "step": 5878 }, { "epoch": 0.3153969957081545, "grad_norm": 0.470703125, "learning_rate": 4.985666043995592e-06, "loss": 1.9962, "step": 5879 }, { "epoch": 0.315450643776824, "grad_norm": 0.53515625, "learning_rate": 4.985656752684309e-06, "loss": 2.2772, "step": 5880 }, { "epoch": 0.31550429184549356, "grad_norm": 0.4921875, "learning_rate": 4.9856474583713386e-06, "loss": 2.4673, "step": 5881 }, { "epoch": 0.3155579399141631, "grad_norm": 0.36328125, "learning_rate": 4.98563816105669e-06, "loss": 1.9739, "step": 5882 }, { "epoch": 0.3156115879828326, "grad_norm": 0.58203125, "learning_rate": 4.985628860740376e-06, "loss": 2.3903, "step": 5883 }, { "epoch": 0.31566523605150215, "grad_norm": 0.361328125, "learning_rate": 4.985619557422407e-06, "loss": 2.1813, "step": 5884 }, { "epoch": 0.3157188841201717, "grad_norm": 0.494140625, "learning_rate": 4.985610251102794e-06, "loss": 2.4789, "step": 5885 }, { "epoch": 0.3157725321888412, "grad_norm": 0.40234375, "learning_rate": 4.98560094178155e-06, "loss": 2.4919, "step": 5886 }, { "epoch": 0.31582618025751075, "grad_norm": 0.41015625, "learning_rate": 4.985591629458684e-06, "loss": 2.2212, "step": 5887 }, { "epoch": 0.3158798283261803, "grad_norm": 0.470703125, "learning_rate": 4.985582314134208e-06, "loss": 2.6849, "step": 5888 }, { "epoch": 0.3159334763948498, "grad_norm": 0.39453125, "learning_rate": 4.985572995808134e-06, "loss": 2.4404, "step": 5889 }, { "epoch": 0.3159871244635193, "grad_norm": 0.32421875, "learning_rate": 4.985563674480472e-06, "loss": 1.8986, "step": 5890 }, { "epoch": 0.3160407725321888, "grad_norm": 0.36328125, "learning_rate": 4.985554350151236e-06, "loss": 2.2955, "step": 5891 }, { "epoch": 0.31609442060085835, "grad_norm": 0.455078125, "learning_rate": 4.985545022820434e-06, "loss": 2.3059, "step": 5892 }, { "epoch": 0.3161480686695279, "grad_norm": 0.35546875, "learning_rate": 4.9855356924880784e-06, "loss": 2.2958, "step": 5893 }, { "epoch": 0.3162017167381974, "grad_norm": 0.388671875, "learning_rate": 4.98552635915418e-06, "loss": 2.2453, "step": 5894 }, { "epoch": 0.31625536480686695, "grad_norm": 0.392578125, "learning_rate": 4.9855170228187514e-06, "loss": 2.1983, "step": 5895 }, { "epoch": 0.3163090128755365, "grad_norm": 0.46484375, "learning_rate": 4.9855076834818025e-06, "loss": 2.2015, "step": 5896 }, { "epoch": 0.316362660944206, "grad_norm": 0.3515625, "learning_rate": 4.9854983411433465e-06, "loss": 2.2031, "step": 5897 }, { "epoch": 0.31641630901287554, "grad_norm": 0.369140625, "learning_rate": 4.985488995803393e-06, "loss": 2.3875, "step": 5898 }, { "epoch": 0.3164699570815451, "grad_norm": 0.37109375, "learning_rate": 4.985479647461953e-06, "loss": 2.4267, "step": 5899 }, { "epoch": 0.3165236051502146, "grad_norm": 0.349609375, "learning_rate": 4.985470296119038e-06, "loss": 2.2806, "step": 5900 }, { "epoch": 0.31657725321888414, "grad_norm": 0.478515625, "learning_rate": 4.985460941774661e-06, "loss": 2.4234, "step": 5901 }, { "epoch": 0.31663090128755367, "grad_norm": 0.361328125, "learning_rate": 4.985451584428832e-06, "loss": 2.2998, "step": 5902 }, { "epoch": 0.3166845493562232, "grad_norm": 0.33203125, "learning_rate": 4.985442224081561e-06, "loss": 2.1831, "step": 5903 }, { "epoch": 0.31673819742489273, "grad_norm": 0.5546875, "learning_rate": 4.985432860732862e-06, "loss": 2.5179, "step": 5904 }, { "epoch": 0.3167918454935622, "grad_norm": 0.384765625, "learning_rate": 4.985423494382745e-06, "loss": 2.6583, "step": 5905 }, { "epoch": 0.31684549356223174, "grad_norm": 0.43359375, "learning_rate": 4.985414125031221e-06, "loss": 2.3367, "step": 5906 }, { "epoch": 0.31689914163090127, "grad_norm": 0.50390625, "learning_rate": 4.9854047526783015e-06, "loss": 2.6607, "step": 5907 }, { "epoch": 0.3169527896995708, "grad_norm": 0.400390625, "learning_rate": 4.985395377323997e-06, "loss": 2.3131, "step": 5908 }, { "epoch": 0.31700643776824033, "grad_norm": 0.359375, "learning_rate": 4.98538599896832e-06, "loss": 2.2995, "step": 5909 }, { "epoch": 0.31706008583690987, "grad_norm": 0.380859375, "learning_rate": 4.9853766176112824e-06, "loss": 2.2502, "step": 5910 }, { "epoch": 0.3171137339055794, "grad_norm": 0.4296875, "learning_rate": 4.985367233252895e-06, "loss": 2.1793, "step": 5911 }, { "epoch": 0.31716738197424893, "grad_norm": 0.498046875, "learning_rate": 4.985357845893168e-06, "loss": 2.4294, "step": 5912 }, { "epoch": 0.31722103004291846, "grad_norm": 0.48828125, "learning_rate": 4.985348455532114e-06, "loss": 2.4707, "step": 5913 }, { "epoch": 0.317274678111588, "grad_norm": 0.36328125, "learning_rate": 4.985339062169744e-06, "loss": 2.7433, "step": 5914 }, { "epoch": 0.3173283261802575, "grad_norm": 0.427734375, "learning_rate": 4.985329665806069e-06, "loss": 2.3745, "step": 5915 }, { "epoch": 0.31738197424892706, "grad_norm": 0.4609375, "learning_rate": 4.9853202664411e-06, "loss": 2.2647, "step": 5916 }, { "epoch": 0.3174356223175966, "grad_norm": 0.45703125, "learning_rate": 4.98531086407485e-06, "loss": 2.3731, "step": 5917 }, { "epoch": 0.3174892703862661, "grad_norm": 0.890625, "learning_rate": 4.985301458707328e-06, "loss": 1.9234, "step": 5918 }, { "epoch": 0.3175429184549356, "grad_norm": 0.466796875, "learning_rate": 4.985292050338547e-06, "loss": 2.5201, "step": 5919 }, { "epoch": 0.31759656652360513, "grad_norm": 0.384765625, "learning_rate": 4.985282638968518e-06, "loss": 2.0289, "step": 5920 }, { "epoch": 0.31765021459227466, "grad_norm": 0.337890625, "learning_rate": 4.985273224597254e-06, "loss": 2.1281, "step": 5921 }, { "epoch": 0.3177038626609442, "grad_norm": 0.36328125, "learning_rate": 4.985263807224763e-06, "loss": 2.1489, "step": 5922 }, { "epoch": 0.3177575107296137, "grad_norm": 0.416015625, "learning_rate": 4.985254386851059e-06, "loss": 2.4584, "step": 5923 }, { "epoch": 0.31781115879828326, "grad_norm": 0.37890625, "learning_rate": 4.985244963476151e-06, "loss": 2.4212, "step": 5924 }, { "epoch": 0.3178648068669528, "grad_norm": 0.439453125, "learning_rate": 4.985235537100053e-06, "loss": 2.359, "step": 5925 }, { "epoch": 0.3179184549356223, "grad_norm": 0.41015625, "learning_rate": 4.985226107722775e-06, "loss": 2.4357, "step": 5926 }, { "epoch": 0.31797210300429185, "grad_norm": 0.439453125, "learning_rate": 4.985216675344329e-06, "loss": 2.1975, "step": 5927 }, { "epoch": 0.3180257510729614, "grad_norm": 0.86328125, "learning_rate": 4.9852072399647255e-06, "loss": 2.319, "step": 5928 }, { "epoch": 0.3180793991416309, "grad_norm": 0.439453125, "learning_rate": 4.985197801583978e-06, "loss": 2.6718, "step": 5929 }, { "epoch": 0.31813304721030045, "grad_norm": 0.404296875, "learning_rate": 4.985188360202095e-06, "loss": 2.3908, "step": 5930 }, { "epoch": 0.31818669527897, "grad_norm": 0.390625, "learning_rate": 4.9851789158190885e-06, "loss": 2.323, "step": 5931 }, { "epoch": 0.3182403433476395, "grad_norm": 0.3203125, "learning_rate": 4.985169468434972e-06, "loss": 2.268, "step": 5932 }, { "epoch": 0.318293991416309, "grad_norm": 0.3125, "learning_rate": 4.9851600180497545e-06, "loss": 2.2567, "step": 5933 }, { "epoch": 0.3183476394849785, "grad_norm": 0.392578125, "learning_rate": 4.985150564663449e-06, "loss": 2.5615, "step": 5934 }, { "epoch": 0.31840128755364805, "grad_norm": 10.875, "learning_rate": 4.985141108276066e-06, "loss": 2.4442, "step": 5935 }, { "epoch": 0.3184549356223176, "grad_norm": 0.404296875, "learning_rate": 4.985131648887618e-06, "loss": 2.1381, "step": 5936 }, { "epoch": 0.3185085836909871, "grad_norm": 0.40625, "learning_rate": 4.985122186498114e-06, "loss": 2.345, "step": 5937 }, { "epoch": 0.31856223175965664, "grad_norm": 0.38671875, "learning_rate": 4.985112721107569e-06, "loss": 2.2735, "step": 5938 }, { "epoch": 0.3186158798283262, "grad_norm": 0.369140625, "learning_rate": 4.985103252715993e-06, "loss": 2.0373, "step": 5939 }, { "epoch": 0.3186695278969957, "grad_norm": 0.66796875, "learning_rate": 4.985093781323395e-06, "loss": 2.1862, "step": 5940 }, { "epoch": 0.31872317596566524, "grad_norm": 0.31640625, "learning_rate": 4.985084306929789e-06, "loss": 2.1257, "step": 5941 }, { "epoch": 0.31877682403433477, "grad_norm": 0.400390625, "learning_rate": 4.985074829535187e-06, "loss": 2.5643, "step": 5942 }, { "epoch": 0.3188304721030043, "grad_norm": 0.4296875, "learning_rate": 4.985065349139598e-06, "loss": 2.4467, "step": 5943 }, { "epoch": 0.31888412017167383, "grad_norm": 0.3828125, "learning_rate": 4.985055865743036e-06, "loss": 2.2425, "step": 5944 }, { "epoch": 0.31893776824034337, "grad_norm": 0.35546875, "learning_rate": 4.985046379345511e-06, "loss": 2.1112, "step": 5945 }, { "epoch": 0.3189914163090129, "grad_norm": 0.349609375, "learning_rate": 4.985036889947034e-06, "loss": 1.9154, "step": 5946 }, { "epoch": 0.31904506437768243, "grad_norm": 0.39453125, "learning_rate": 4.985027397547617e-06, "loss": 2.2502, "step": 5947 }, { "epoch": 0.3190987124463519, "grad_norm": 0.53125, "learning_rate": 4.985017902147272e-06, "loss": 2.1845, "step": 5948 }, { "epoch": 0.31915236051502144, "grad_norm": 0.34765625, "learning_rate": 4.98500840374601e-06, "loss": 2.194, "step": 5949 }, { "epoch": 0.31920600858369097, "grad_norm": 0.35546875, "learning_rate": 4.984998902343843e-06, "loss": 2.4836, "step": 5950 }, { "epoch": 0.3192596566523605, "grad_norm": 0.373046875, "learning_rate": 4.984989397940782e-06, "loss": 2.2695, "step": 5951 }, { "epoch": 0.31931330472103003, "grad_norm": 0.474609375, "learning_rate": 4.984979890536837e-06, "loss": 2.2573, "step": 5952 }, { "epoch": 0.31936695278969957, "grad_norm": 0.353515625, "learning_rate": 4.984970380132022e-06, "loss": 2.0091, "step": 5953 }, { "epoch": 0.3194206008583691, "grad_norm": 6.15625, "learning_rate": 4.984960866726348e-06, "loss": 2.5704, "step": 5954 }, { "epoch": 0.31947424892703863, "grad_norm": 0.3984375, "learning_rate": 4.984951350319826e-06, "loss": 2.5705, "step": 5955 }, { "epoch": 0.31952789699570816, "grad_norm": 0.361328125, "learning_rate": 4.984941830912466e-06, "loss": 2.1979, "step": 5956 }, { "epoch": 0.3195815450643777, "grad_norm": 0.33984375, "learning_rate": 4.984932308504282e-06, "loss": 1.7189, "step": 5957 }, { "epoch": 0.3196351931330472, "grad_norm": 0.345703125, "learning_rate": 4.984922783095285e-06, "loss": 2.474, "step": 5958 }, { "epoch": 0.31968884120171676, "grad_norm": 0.349609375, "learning_rate": 4.984913254685485e-06, "loss": 2.1911, "step": 5959 }, { "epoch": 0.3197424892703863, "grad_norm": 0.46875, "learning_rate": 4.984903723274895e-06, "loss": 2.2292, "step": 5960 }, { "epoch": 0.3197961373390558, "grad_norm": 0.40625, "learning_rate": 4.984894188863525e-06, "loss": 2.3043, "step": 5961 }, { "epoch": 0.3198497854077253, "grad_norm": 0.40625, "learning_rate": 4.984884651451388e-06, "loss": 2.383, "step": 5962 }, { "epoch": 0.3199034334763948, "grad_norm": 0.376953125, "learning_rate": 4.9848751110384955e-06, "loss": 2.3778, "step": 5963 }, { "epoch": 0.31995708154506436, "grad_norm": 0.412109375, "learning_rate": 4.984865567624858e-06, "loss": 2.6953, "step": 5964 }, { "epoch": 0.3200107296137339, "grad_norm": 0.4140625, "learning_rate": 4.984856021210488e-06, "loss": 2.3595, "step": 5965 }, { "epoch": 0.3200643776824034, "grad_norm": 0.412109375, "learning_rate": 4.984846471795396e-06, "loss": 2.1581, "step": 5966 }, { "epoch": 0.32011802575107295, "grad_norm": 0.353515625, "learning_rate": 4.984836919379594e-06, "loss": 2.3195, "step": 5967 }, { "epoch": 0.3201716738197425, "grad_norm": 0.37109375, "learning_rate": 4.984827363963094e-06, "loss": 2.5598, "step": 5968 }, { "epoch": 0.320225321888412, "grad_norm": 0.359375, "learning_rate": 4.984817805545907e-06, "loss": 2.4331, "step": 5969 }, { "epoch": 0.32027896995708155, "grad_norm": 0.484375, "learning_rate": 4.984808244128044e-06, "loss": 2.5594, "step": 5970 }, { "epoch": 0.3203326180257511, "grad_norm": 0.380859375, "learning_rate": 4.984798679709519e-06, "loss": 2.3611, "step": 5971 }, { "epoch": 0.3203862660944206, "grad_norm": 0.482421875, "learning_rate": 4.9847891122903405e-06, "loss": 2.4124, "step": 5972 }, { "epoch": 0.32043991416309014, "grad_norm": 0.42578125, "learning_rate": 4.9847795418705215e-06, "loss": 1.8526, "step": 5973 }, { "epoch": 0.3204935622317597, "grad_norm": 0.3359375, "learning_rate": 4.984769968450074e-06, "loss": 2.6496, "step": 5974 }, { "epoch": 0.3205472103004292, "grad_norm": 0.498046875, "learning_rate": 4.984760392029008e-06, "loss": 2.3913, "step": 5975 }, { "epoch": 0.32060085836909874, "grad_norm": 0.400390625, "learning_rate": 4.984750812607337e-06, "loss": 2.4017, "step": 5976 }, { "epoch": 0.3206545064377682, "grad_norm": 0.55078125, "learning_rate": 4.984741230185071e-06, "loss": 2.3261, "step": 5977 }, { "epoch": 0.32070815450643775, "grad_norm": 0.421875, "learning_rate": 4.984731644762222e-06, "loss": 2.2891, "step": 5978 }, { "epoch": 0.3207618025751073, "grad_norm": 0.4296875, "learning_rate": 4.9847220563388024e-06, "loss": 1.9481, "step": 5979 }, { "epoch": 0.3208154506437768, "grad_norm": 0.4140625, "learning_rate": 4.984712464914823e-06, "loss": 2.2687, "step": 5980 }, { "epoch": 0.32086909871244634, "grad_norm": 0.380859375, "learning_rate": 4.984702870490295e-06, "loss": 2.2678, "step": 5981 }, { "epoch": 0.3209227467811159, "grad_norm": 0.451171875, "learning_rate": 4.984693273065231e-06, "loss": 1.5317, "step": 5982 }, { "epoch": 0.3209763948497854, "grad_norm": 0.369140625, "learning_rate": 4.984683672639642e-06, "loss": 2.3022, "step": 5983 }, { "epoch": 0.32103004291845494, "grad_norm": 0.38671875, "learning_rate": 4.98467406921354e-06, "loss": 2.1668, "step": 5984 }, { "epoch": 0.32108369098712447, "grad_norm": 0.375, "learning_rate": 4.984664462786936e-06, "loss": 2.2191, "step": 5985 }, { "epoch": 0.321137339055794, "grad_norm": 0.427734375, "learning_rate": 4.984654853359842e-06, "loss": 2.0474, "step": 5986 }, { "epoch": 0.32119098712446353, "grad_norm": 0.640625, "learning_rate": 4.9846452409322685e-06, "loss": 2.4366, "step": 5987 }, { "epoch": 0.32124463519313307, "grad_norm": 0.32421875, "learning_rate": 4.984635625504229e-06, "loss": 2.1771, "step": 5988 }, { "epoch": 0.3212982832618026, "grad_norm": 0.392578125, "learning_rate": 4.984626007075735e-06, "loss": 2.4282, "step": 5989 }, { "epoch": 0.32135193133047213, "grad_norm": 0.3359375, "learning_rate": 4.984616385646795e-06, "loss": 2.2045, "step": 5990 }, { "epoch": 0.3214055793991416, "grad_norm": 0.6171875, "learning_rate": 4.984606761217426e-06, "loss": 1.5082, "step": 5991 }, { "epoch": 0.32145922746781114, "grad_norm": 0.3671875, "learning_rate": 4.984597133787634e-06, "loss": 2.3246, "step": 5992 }, { "epoch": 0.32151287553648067, "grad_norm": 0.5546875, "learning_rate": 4.984587503357434e-06, "loss": 2.3205, "step": 5993 }, { "epoch": 0.3215665236051502, "grad_norm": 0.404296875, "learning_rate": 4.984577869926837e-06, "loss": 2.4334, "step": 5994 }, { "epoch": 0.32162017167381973, "grad_norm": 0.40625, "learning_rate": 4.984568233495855e-06, "loss": 2.4818, "step": 5995 }, { "epoch": 0.32167381974248926, "grad_norm": 0.453125, "learning_rate": 4.984558594064498e-06, "loss": 2.367, "step": 5996 }, { "epoch": 0.3217274678111588, "grad_norm": 0.369140625, "learning_rate": 4.984548951632779e-06, "loss": 2.3455, "step": 5997 }, { "epoch": 0.3217811158798283, "grad_norm": 7.6875, "learning_rate": 4.98453930620071e-06, "loss": 2.3099, "step": 5998 }, { "epoch": 0.32183476394849786, "grad_norm": 0.6328125, "learning_rate": 4.984529657768301e-06, "loss": 2.4125, "step": 5999 }, { "epoch": 0.3218884120171674, "grad_norm": 0.455078125, "learning_rate": 4.984520006335566e-06, "loss": 2.3744, "step": 6000 }, { "epoch": 0.3219420600858369, "grad_norm": 0.4375, "learning_rate": 4.984510351902514e-06, "loss": 1.9916, "step": 6001 }, { "epoch": 0.32199570815450645, "grad_norm": 0.388671875, "learning_rate": 4.9845006944691585e-06, "loss": 2.2674, "step": 6002 }, { "epoch": 0.322049356223176, "grad_norm": 0.353515625, "learning_rate": 4.984491034035512e-06, "loss": 2.2065, "step": 6003 }, { "epoch": 0.3221030042918455, "grad_norm": 0.390625, "learning_rate": 4.9844813706015826e-06, "loss": 2.3398, "step": 6004 }, { "epoch": 0.322156652360515, "grad_norm": 0.482421875, "learning_rate": 4.984471704167385e-06, "loss": 1.772, "step": 6005 }, { "epoch": 0.3222103004291845, "grad_norm": 0.380859375, "learning_rate": 4.984462034732931e-06, "loss": 2.2829, "step": 6006 }, { "epoch": 0.32226394849785406, "grad_norm": 0.458984375, "learning_rate": 4.98445236229823e-06, "loss": 1.3883, "step": 6007 }, { "epoch": 0.3223175965665236, "grad_norm": 0.4453125, "learning_rate": 4.984442686863295e-06, "loss": 2.0986, "step": 6008 }, { "epoch": 0.3223712446351931, "grad_norm": 0.33984375, "learning_rate": 4.984433008428139e-06, "loss": 2.2329, "step": 6009 }, { "epoch": 0.32242489270386265, "grad_norm": 0.388671875, "learning_rate": 4.984423326992772e-06, "loss": 2.2425, "step": 6010 }, { "epoch": 0.3224785407725322, "grad_norm": 0.35546875, "learning_rate": 4.984413642557206e-06, "loss": 1.8316, "step": 6011 }, { "epoch": 0.3225321888412017, "grad_norm": 0.376953125, "learning_rate": 4.984403955121452e-06, "loss": 2.4854, "step": 6012 }, { "epoch": 0.32258583690987125, "grad_norm": 0.40234375, "learning_rate": 4.984394264685523e-06, "loss": 2.3785, "step": 6013 }, { "epoch": 0.3226394849785408, "grad_norm": 0.361328125, "learning_rate": 4.98438457124943e-06, "loss": 2.3305, "step": 6014 }, { "epoch": 0.3226931330472103, "grad_norm": 0.5, "learning_rate": 4.984374874813185e-06, "loss": 2.3636, "step": 6015 }, { "epoch": 0.32274678111587984, "grad_norm": 0.322265625, "learning_rate": 4.9843651753768e-06, "loss": 2.2276, "step": 6016 }, { "epoch": 0.3228004291845494, "grad_norm": 0.380859375, "learning_rate": 4.984355472940286e-06, "loss": 2.1146, "step": 6017 }, { "epoch": 0.3228540772532189, "grad_norm": 0.54296875, "learning_rate": 4.984345767503655e-06, "loss": 2.3963, "step": 6018 }, { "epoch": 0.32290772532188844, "grad_norm": 0.357421875, "learning_rate": 4.9843360590669185e-06, "loss": 2.2224, "step": 6019 }, { "epoch": 0.3229613733905579, "grad_norm": 0.376953125, "learning_rate": 4.984326347630088e-06, "loss": 2.2751, "step": 6020 }, { "epoch": 0.32301502145922745, "grad_norm": 0.466796875, "learning_rate": 4.984316633193177e-06, "loss": 2.3095, "step": 6021 }, { "epoch": 0.323068669527897, "grad_norm": 0.40234375, "learning_rate": 4.984306915756195e-06, "loss": 2.1897, "step": 6022 }, { "epoch": 0.3231223175965665, "grad_norm": 0.66015625, "learning_rate": 4.984297195319155e-06, "loss": 2.3123, "step": 6023 }, { "epoch": 0.32317596566523604, "grad_norm": 0.388671875, "learning_rate": 4.984287471882069e-06, "loss": 2.4307, "step": 6024 }, { "epoch": 0.3232296137339056, "grad_norm": 0.470703125, "learning_rate": 4.984277745444946e-06, "loss": 2.1745, "step": 6025 }, { "epoch": 0.3232832618025751, "grad_norm": 0.53515625, "learning_rate": 4.984268016007802e-06, "loss": 2.1012, "step": 6026 }, { "epoch": 0.32333690987124464, "grad_norm": 0.341796875, "learning_rate": 4.984258283570646e-06, "loss": 2.1699, "step": 6027 }, { "epoch": 0.32339055793991417, "grad_norm": 0.3828125, "learning_rate": 4.984248548133491e-06, "loss": 2.3404, "step": 6028 }, { "epoch": 0.3234442060085837, "grad_norm": 0.3984375, "learning_rate": 4.984238809696347e-06, "loss": 2.1974, "step": 6029 }, { "epoch": 0.32349785407725323, "grad_norm": 0.365234375, "learning_rate": 4.984229068259227e-06, "loss": 2.1098, "step": 6030 }, { "epoch": 0.32355150214592276, "grad_norm": 0.78125, "learning_rate": 4.984219323822143e-06, "loss": 2.3628, "step": 6031 }, { "epoch": 0.3236051502145923, "grad_norm": 0.578125, "learning_rate": 4.984209576385107e-06, "loss": 2.1348, "step": 6032 }, { "epoch": 0.32365879828326183, "grad_norm": 0.369140625, "learning_rate": 4.984199825948129e-06, "loss": 1.8845, "step": 6033 }, { "epoch": 0.3237124463519313, "grad_norm": 0.3828125, "learning_rate": 4.984190072511223e-06, "loss": 1.9613, "step": 6034 }, { "epoch": 0.32376609442060084, "grad_norm": 0.34375, "learning_rate": 4.9841803160744e-06, "loss": 2.4983, "step": 6035 }, { "epoch": 0.32381974248927037, "grad_norm": 0.494140625, "learning_rate": 4.984170556637671e-06, "loss": 2.2626, "step": 6036 }, { "epoch": 0.3238733905579399, "grad_norm": 0.54296875, "learning_rate": 4.984160794201049e-06, "loss": 2.3193, "step": 6037 }, { "epoch": 0.32392703862660943, "grad_norm": 0.42578125, "learning_rate": 4.984151028764544e-06, "loss": 2.441, "step": 6038 }, { "epoch": 0.32398068669527896, "grad_norm": 0.359375, "learning_rate": 4.984141260328169e-06, "loss": 2.2419, "step": 6039 }, { "epoch": 0.3240343347639485, "grad_norm": 0.462890625, "learning_rate": 4.984131488891936e-06, "loss": 2.5593, "step": 6040 }, { "epoch": 0.324087982832618, "grad_norm": 0.7890625, "learning_rate": 4.984121714455857e-06, "loss": 2.2476, "step": 6041 }, { "epoch": 0.32414163090128756, "grad_norm": 0.38671875, "learning_rate": 4.984111937019943e-06, "loss": 2.0232, "step": 6042 }, { "epoch": 0.3241952789699571, "grad_norm": 0.470703125, "learning_rate": 4.9841021565842054e-06, "loss": 2.4563, "step": 6043 }, { "epoch": 0.3242489270386266, "grad_norm": 0.419921875, "learning_rate": 4.984092373148658e-06, "loss": 2.2959, "step": 6044 }, { "epoch": 0.32430257510729615, "grad_norm": 0.462890625, "learning_rate": 4.984082586713311e-06, "loss": 2.5052, "step": 6045 }, { "epoch": 0.3243562231759657, "grad_norm": 0.408203125, "learning_rate": 4.9840727972781755e-06, "loss": 2.4114, "step": 6046 }, { "epoch": 0.3244098712446352, "grad_norm": 0.400390625, "learning_rate": 4.984063004843265e-06, "loss": 2.2378, "step": 6047 }, { "epoch": 0.3244635193133047, "grad_norm": 0.4453125, "learning_rate": 4.984053209408591e-06, "loss": 2.0535, "step": 6048 }, { "epoch": 0.3245171673819742, "grad_norm": 0.37109375, "learning_rate": 4.984043410974165e-06, "loss": 2.5913, "step": 6049 }, { "epoch": 0.32457081545064376, "grad_norm": 0.38671875, "learning_rate": 4.984033609539999e-06, "loss": 2.1787, "step": 6050 }, { "epoch": 0.3246244635193133, "grad_norm": 0.3828125, "learning_rate": 4.984023805106104e-06, "loss": 2.0544, "step": 6051 }, { "epoch": 0.3246781115879828, "grad_norm": 0.37109375, "learning_rate": 4.984013997672493e-06, "loss": 2.2416, "step": 6052 }, { "epoch": 0.32473175965665235, "grad_norm": 1.0234375, "learning_rate": 4.984004187239178e-06, "loss": 2.3876, "step": 6053 }, { "epoch": 0.3247854077253219, "grad_norm": 0.373046875, "learning_rate": 4.983994373806169e-06, "loss": 2.1747, "step": 6054 }, { "epoch": 0.3248390557939914, "grad_norm": 0.38671875, "learning_rate": 4.9839845573734795e-06, "loss": 2.2463, "step": 6055 }, { "epoch": 0.32489270386266095, "grad_norm": 0.36328125, "learning_rate": 4.983974737941121e-06, "loss": 2.2069, "step": 6056 }, { "epoch": 0.3249463519313305, "grad_norm": 0.404296875, "learning_rate": 4.983964915509105e-06, "loss": 2.2562, "step": 6057 }, { "epoch": 0.325, "grad_norm": 0.3515625, "learning_rate": 4.983955090077445e-06, "loss": 2.3706, "step": 6058 }, { "epoch": 0.32505364806866954, "grad_norm": 0.41796875, "learning_rate": 4.98394526164615e-06, "loss": 2.5456, "step": 6059 }, { "epoch": 0.3251072961373391, "grad_norm": 0.3984375, "learning_rate": 4.983935430215234e-06, "loss": 1.2897, "step": 6060 }, { "epoch": 0.3251609442060086, "grad_norm": 0.33984375, "learning_rate": 4.983925595784709e-06, "loss": 2.2573, "step": 6061 }, { "epoch": 0.32521459227467814, "grad_norm": 0.3828125, "learning_rate": 4.9839157583545845e-06, "loss": 2.2692, "step": 6062 }, { "epoch": 0.3252682403433476, "grad_norm": 0.68359375, "learning_rate": 4.983905917924875e-06, "loss": 2.3191, "step": 6063 }, { "epoch": 0.32532188841201715, "grad_norm": 0.3671875, "learning_rate": 4.983896074495592e-06, "loss": 2.2904, "step": 6064 }, { "epoch": 0.3253755364806867, "grad_norm": 0.431640625, "learning_rate": 4.983886228066746e-06, "loss": 2.1929, "step": 6065 }, { "epoch": 0.3254291845493562, "grad_norm": 2.140625, "learning_rate": 4.983876378638349e-06, "loss": 2.3265, "step": 6066 }, { "epoch": 0.32548283261802574, "grad_norm": 0.46484375, "learning_rate": 4.983866526210415e-06, "loss": 2.2851, "step": 6067 }, { "epoch": 0.3255364806866953, "grad_norm": 0.341796875, "learning_rate": 4.983856670782954e-06, "loss": 2.1788, "step": 6068 }, { "epoch": 0.3255901287553648, "grad_norm": 0.400390625, "learning_rate": 4.983846812355978e-06, "loss": 2.3127, "step": 6069 }, { "epoch": 0.32564377682403434, "grad_norm": 0.41796875, "learning_rate": 4.983836950929499e-06, "loss": 2.2955, "step": 6070 }, { "epoch": 0.32569742489270387, "grad_norm": 0.3828125, "learning_rate": 4.9838270865035296e-06, "loss": 1.8971, "step": 6071 }, { "epoch": 0.3257510729613734, "grad_norm": 0.380859375, "learning_rate": 4.983817219078082e-06, "loss": 2.2845, "step": 6072 }, { "epoch": 0.32580472103004293, "grad_norm": 0.39453125, "learning_rate": 4.983807348653167e-06, "loss": 2.3261, "step": 6073 }, { "epoch": 0.32585836909871246, "grad_norm": 0.361328125, "learning_rate": 4.983797475228796e-06, "loss": 2.2779, "step": 6074 }, { "epoch": 0.325912017167382, "grad_norm": 0.5703125, "learning_rate": 4.983787598804983e-06, "loss": 1.7786, "step": 6075 }, { "epoch": 0.3259656652360515, "grad_norm": 0.390625, "learning_rate": 4.983777719381738e-06, "loss": 2.3903, "step": 6076 }, { "epoch": 0.326019313304721, "grad_norm": 0.474609375, "learning_rate": 4.9837678369590745e-06, "loss": 2.2641, "step": 6077 }, { "epoch": 0.32607296137339054, "grad_norm": 0.36328125, "learning_rate": 4.983757951537003e-06, "loss": 2.1756, "step": 6078 }, { "epoch": 0.32612660944206007, "grad_norm": 0.404296875, "learning_rate": 4.983748063115537e-06, "loss": 2.0994, "step": 6079 }, { "epoch": 0.3261802575107296, "grad_norm": 0.365234375, "learning_rate": 4.983738171694686e-06, "loss": 2.3831, "step": 6080 }, { "epoch": 0.32623390557939913, "grad_norm": 0.3828125, "learning_rate": 4.983728277274465e-06, "loss": 2.1999, "step": 6081 }, { "epoch": 0.32628755364806866, "grad_norm": 0.40625, "learning_rate": 4.983718379854884e-06, "loss": 2.2356, "step": 6082 }, { "epoch": 0.3263412017167382, "grad_norm": 0.40234375, "learning_rate": 4.9837084794359545e-06, "loss": 2.2876, "step": 6083 }, { "epoch": 0.3263948497854077, "grad_norm": 0.41796875, "learning_rate": 4.9836985760176904e-06, "loss": 2.3802, "step": 6084 }, { "epoch": 0.32644849785407726, "grad_norm": 0.39453125, "learning_rate": 4.983688669600102e-06, "loss": 2.2672, "step": 6085 }, { "epoch": 0.3265021459227468, "grad_norm": 0.369140625, "learning_rate": 4.9836787601832025e-06, "loss": 2.1317, "step": 6086 }, { "epoch": 0.3265557939914163, "grad_norm": 0.42578125, "learning_rate": 4.983668847767002e-06, "loss": 2.4496, "step": 6087 }, { "epoch": 0.32660944206008585, "grad_norm": 0.361328125, "learning_rate": 4.983658932351515e-06, "loss": 2.1165, "step": 6088 }, { "epoch": 0.3266630901287554, "grad_norm": 0.35546875, "learning_rate": 4.983649013936751e-06, "loss": 2.3683, "step": 6089 }, { "epoch": 0.3267167381974249, "grad_norm": 0.474609375, "learning_rate": 4.983639092522724e-06, "loss": 2.2887, "step": 6090 }, { "epoch": 0.32677038626609445, "grad_norm": 0.375, "learning_rate": 4.983629168109445e-06, "loss": 2.1212, "step": 6091 }, { "epoch": 0.3268240343347639, "grad_norm": 0.44140625, "learning_rate": 4.983619240696926e-06, "loss": 2.4136, "step": 6092 }, { "epoch": 0.32687768240343346, "grad_norm": 0.39453125, "learning_rate": 4.983609310285179e-06, "loss": 2.6239, "step": 6093 }, { "epoch": 0.326931330472103, "grad_norm": 0.353515625, "learning_rate": 4.983599376874216e-06, "loss": 2.1012, "step": 6094 }, { "epoch": 0.3269849785407725, "grad_norm": 0.380859375, "learning_rate": 4.983589440464049e-06, "loss": 2.3796, "step": 6095 }, { "epoch": 0.32703862660944205, "grad_norm": 0.451171875, "learning_rate": 4.983579501054691e-06, "loss": 2.3716, "step": 6096 }, { "epoch": 0.3270922746781116, "grad_norm": 0.64453125, "learning_rate": 4.983569558646152e-06, "loss": 2.1501, "step": 6097 }, { "epoch": 0.3271459227467811, "grad_norm": 0.330078125, "learning_rate": 4.983559613238446e-06, "loss": 2.2381, "step": 6098 }, { "epoch": 0.32719957081545065, "grad_norm": 0.376953125, "learning_rate": 4.983549664831584e-06, "loss": 2.2368, "step": 6099 }, { "epoch": 0.3272532188841202, "grad_norm": 0.421875, "learning_rate": 4.9835397134255775e-06, "loss": 2.2678, "step": 6100 }, { "epoch": 0.3273068669527897, "grad_norm": 0.341796875, "learning_rate": 4.983529759020439e-06, "loss": 2.2287, "step": 6101 }, { "epoch": 0.32736051502145924, "grad_norm": 0.388671875, "learning_rate": 4.983519801616181e-06, "loss": 2.4386, "step": 6102 }, { "epoch": 0.3274141630901288, "grad_norm": 0.37109375, "learning_rate": 4.983509841212816e-06, "loss": 2.3142, "step": 6103 }, { "epoch": 0.3274678111587983, "grad_norm": 0.376953125, "learning_rate": 4.983499877810354e-06, "loss": 2.2367, "step": 6104 }, { "epoch": 0.32752145922746784, "grad_norm": 0.37890625, "learning_rate": 4.983489911408809e-06, "loss": 2.1676, "step": 6105 }, { "epoch": 0.3275751072961373, "grad_norm": 0.3203125, "learning_rate": 4.983479942008192e-06, "loss": 2.2163, "step": 6106 }, { "epoch": 0.32762875536480685, "grad_norm": 0.400390625, "learning_rate": 4.983469969608515e-06, "loss": 2.3757, "step": 6107 }, { "epoch": 0.3276824034334764, "grad_norm": 0.400390625, "learning_rate": 4.983459994209791e-06, "loss": 2.3252, "step": 6108 }, { "epoch": 0.3277360515021459, "grad_norm": 0.462890625, "learning_rate": 4.9834500158120304e-06, "loss": 2.3114, "step": 6109 }, { "epoch": 0.32778969957081544, "grad_norm": 0.6171875, "learning_rate": 4.983440034415247e-06, "loss": 1.291, "step": 6110 }, { "epoch": 0.32784334763948497, "grad_norm": 0.4453125, "learning_rate": 4.983430050019452e-06, "loss": 2.6579, "step": 6111 }, { "epoch": 0.3278969957081545, "grad_norm": 0.373046875, "learning_rate": 4.983420062624658e-06, "loss": 2.362, "step": 6112 }, { "epoch": 0.32795064377682404, "grad_norm": 0.341796875, "learning_rate": 4.9834100722308755e-06, "loss": 2.3397, "step": 6113 }, { "epoch": 0.32800429184549357, "grad_norm": 0.359375, "learning_rate": 4.983400078838119e-06, "loss": 2.2828, "step": 6114 }, { "epoch": 0.3280579399141631, "grad_norm": 0.392578125, "learning_rate": 4.983390082446398e-06, "loss": 2.3478, "step": 6115 }, { "epoch": 0.32811158798283263, "grad_norm": 0.37890625, "learning_rate": 4.983380083055727e-06, "loss": 2.3079, "step": 6116 }, { "epoch": 0.32816523605150216, "grad_norm": 0.353515625, "learning_rate": 4.983370080666116e-06, "loss": 2.1957, "step": 6117 }, { "epoch": 0.3282188841201717, "grad_norm": 0.294921875, "learning_rate": 4.983360075277578e-06, "loss": 1.9925, "step": 6118 }, { "epoch": 0.3282725321888412, "grad_norm": 0.3828125, "learning_rate": 4.983350066890126e-06, "loss": 2.2208, "step": 6119 }, { "epoch": 0.3283261802575107, "grad_norm": 0.42578125, "learning_rate": 4.983340055503771e-06, "loss": 2.2065, "step": 6120 }, { "epoch": 0.32837982832618023, "grad_norm": 0.44921875, "learning_rate": 4.983330041118524e-06, "loss": 2.3579, "step": 6121 }, { "epoch": 0.32843347639484977, "grad_norm": 0.412109375, "learning_rate": 4.983320023734399e-06, "loss": 2.3813, "step": 6122 }, { "epoch": 0.3284871244635193, "grad_norm": 0.392578125, "learning_rate": 4.983310003351408e-06, "loss": 2.3359, "step": 6123 }, { "epoch": 0.32854077253218883, "grad_norm": 0.45703125, "learning_rate": 4.983299979969562e-06, "loss": 2.4784, "step": 6124 }, { "epoch": 0.32859442060085836, "grad_norm": 0.42578125, "learning_rate": 4.983289953588873e-06, "loss": 2.3639, "step": 6125 }, { "epoch": 0.3286480686695279, "grad_norm": 0.46875, "learning_rate": 4.983279924209356e-06, "loss": 2.4796, "step": 6126 }, { "epoch": 0.3287017167381974, "grad_norm": 0.443359375, "learning_rate": 4.9832698918310185e-06, "loss": 2.3873, "step": 6127 }, { "epoch": 0.32875536480686696, "grad_norm": 0.498046875, "learning_rate": 4.983259856453876e-06, "loss": 1.9846, "step": 6128 }, { "epoch": 0.3288090128755365, "grad_norm": 0.37109375, "learning_rate": 4.983249818077939e-06, "loss": 2.3693, "step": 6129 }, { "epoch": 0.328862660944206, "grad_norm": 0.447265625, "learning_rate": 4.983239776703221e-06, "loss": 2.2468, "step": 6130 }, { "epoch": 0.32891630901287555, "grad_norm": 0.46484375, "learning_rate": 4.983229732329733e-06, "loss": 2.4915, "step": 6131 }, { "epoch": 0.3289699570815451, "grad_norm": 0.423828125, "learning_rate": 4.9832196849574876e-06, "loss": 2.3282, "step": 6132 }, { "epoch": 0.3290236051502146, "grad_norm": 0.455078125, "learning_rate": 4.983209634586497e-06, "loss": 2.674, "step": 6133 }, { "epoch": 0.32907725321888415, "grad_norm": 0.294921875, "learning_rate": 4.983199581216772e-06, "loss": 2.2419, "step": 6134 }, { "epoch": 0.3291309012875536, "grad_norm": 0.369140625, "learning_rate": 4.983189524848326e-06, "loss": 2.42, "step": 6135 }, { "epoch": 0.32918454935622316, "grad_norm": 0.349609375, "learning_rate": 4.983179465481173e-06, "loss": 2.3226, "step": 6136 }, { "epoch": 0.3292381974248927, "grad_norm": 0.427734375, "learning_rate": 4.983169403115321e-06, "loss": 2.0487, "step": 6137 }, { "epoch": 0.3292918454935622, "grad_norm": 0.337890625, "learning_rate": 4.983159337750786e-06, "loss": 2.3074, "step": 6138 }, { "epoch": 0.32934549356223175, "grad_norm": 0.85546875, "learning_rate": 4.983149269387577e-06, "loss": 2.2781, "step": 6139 }, { "epoch": 0.3293991416309013, "grad_norm": 0.45703125, "learning_rate": 4.983139198025708e-06, "loss": 2.1307, "step": 6140 }, { "epoch": 0.3294527896995708, "grad_norm": 0.357421875, "learning_rate": 4.9831291236651916e-06, "loss": 2.4285, "step": 6141 }, { "epoch": 0.32950643776824035, "grad_norm": 0.390625, "learning_rate": 4.9831190463060386e-06, "loss": 2.1537, "step": 6142 }, { "epoch": 0.3295600858369099, "grad_norm": 0.41796875, "learning_rate": 4.983108965948262e-06, "loss": 2.3353, "step": 6143 }, { "epoch": 0.3296137339055794, "grad_norm": 0.337890625, "learning_rate": 4.983098882591873e-06, "loss": 2.3697, "step": 6144 }, { "epoch": 0.32966738197424894, "grad_norm": 0.4140625, "learning_rate": 4.983088796236886e-06, "loss": 2.6538, "step": 6145 }, { "epoch": 0.3297210300429185, "grad_norm": 0.353515625, "learning_rate": 4.98307870688331e-06, "loss": 2.232, "step": 6146 }, { "epoch": 0.329774678111588, "grad_norm": 2.421875, "learning_rate": 4.983068614531159e-06, "loss": 2.4832, "step": 6147 }, { "epoch": 0.32982832618025754, "grad_norm": 0.435546875, "learning_rate": 4.983058519180446e-06, "loss": 2.3333, "step": 6148 }, { "epoch": 0.329881974248927, "grad_norm": 0.421875, "learning_rate": 4.983048420831181e-06, "loss": 2.3776, "step": 6149 }, { "epoch": 0.32993562231759654, "grad_norm": 0.392578125, "learning_rate": 4.983038319483379e-06, "loss": 2.2941, "step": 6150 }, { "epoch": 0.3299892703862661, "grad_norm": 0.392578125, "learning_rate": 4.98302821513705e-06, "loss": 2.3316, "step": 6151 }, { "epoch": 0.3300429184549356, "grad_norm": 0.400390625, "learning_rate": 4.983018107792206e-06, "loss": 2.4474, "step": 6152 }, { "epoch": 0.33009656652360514, "grad_norm": 0.349609375, "learning_rate": 4.983007997448861e-06, "loss": 2.1331, "step": 6153 }, { "epoch": 0.33015021459227467, "grad_norm": 0.3359375, "learning_rate": 4.982997884107026e-06, "loss": 2.4708, "step": 6154 }, { "epoch": 0.3302038626609442, "grad_norm": 0.43359375, "learning_rate": 4.982987767766713e-06, "loss": 2.3806, "step": 6155 }, { "epoch": 0.33025751072961373, "grad_norm": 0.3984375, "learning_rate": 4.982977648427935e-06, "loss": 2.3655, "step": 6156 }, { "epoch": 0.33031115879828327, "grad_norm": 0.3828125, "learning_rate": 4.982967526090704e-06, "loss": 2.2868, "step": 6157 }, { "epoch": 0.3303648068669528, "grad_norm": 0.3828125, "learning_rate": 4.982957400755032e-06, "loss": 2.4617, "step": 6158 }, { "epoch": 0.33041845493562233, "grad_norm": 0.42578125, "learning_rate": 4.982947272420932e-06, "loss": 2.463, "step": 6159 }, { "epoch": 0.33047210300429186, "grad_norm": 0.3203125, "learning_rate": 4.982937141088414e-06, "loss": 2.0599, "step": 6160 }, { "epoch": 0.3305257510729614, "grad_norm": 0.408203125, "learning_rate": 4.982927006757493e-06, "loss": 2.6343, "step": 6161 }, { "epoch": 0.3305793991416309, "grad_norm": 0.51953125, "learning_rate": 4.98291686942818e-06, "loss": 2.3084, "step": 6162 }, { "epoch": 0.3306330472103004, "grad_norm": 0.4453125, "learning_rate": 4.982906729100486e-06, "loss": 2.2834, "step": 6163 }, { "epoch": 0.33068669527896993, "grad_norm": 0.349609375, "learning_rate": 4.982896585774426e-06, "loss": 2.3624, "step": 6164 }, { "epoch": 0.33074034334763946, "grad_norm": 0.34765625, "learning_rate": 4.982886439450011e-06, "loss": 2.2742, "step": 6165 }, { "epoch": 0.330793991416309, "grad_norm": 0.40234375, "learning_rate": 4.982876290127252e-06, "loss": 2.2644, "step": 6166 }, { "epoch": 0.33084763948497853, "grad_norm": 1.1953125, "learning_rate": 4.982866137806162e-06, "loss": 2.4005, "step": 6167 }, { "epoch": 0.33090128755364806, "grad_norm": 0.39453125, "learning_rate": 4.982855982486755e-06, "loss": 2.4294, "step": 6168 }, { "epoch": 0.3309549356223176, "grad_norm": 0.3984375, "learning_rate": 4.982845824169041e-06, "loss": 2.2048, "step": 6169 }, { "epoch": 0.3310085836909871, "grad_norm": 0.353515625, "learning_rate": 4.9828356628530324e-06, "loss": 2.0934, "step": 6170 }, { "epoch": 0.33106223175965666, "grad_norm": 0.51171875, "learning_rate": 4.982825498538743e-06, "loss": 2.5427, "step": 6171 }, { "epoch": 0.3311158798283262, "grad_norm": 0.37109375, "learning_rate": 4.982815331226185e-06, "loss": 2.3313, "step": 6172 }, { "epoch": 0.3311695278969957, "grad_norm": 0.42578125, "learning_rate": 4.982805160915368e-06, "loss": 2.4755, "step": 6173 }, { "epoch": 0.33122317596566525, "grad_norm": 0.43359375, "learning_rate": 4.9827949876063076e-06, "loss": 2.3055, "step": 6174 }, { "epoch": 0.3312768240343348, "grad_norm": 0.392578125, "learning_rate": 4.982784811299014e-06, "loss": 2.2944, "step": 6175 }, { "epoch": 0.3313304721030043, "grad_norm": 0.3984375, "learning_rate": 4.9827746319935e-06, "loss": 2.2646, "step": 6176 }, { "epoch": 0.33138412017167385, "grad_norm": 0.431640625, "learning_rate": 4.982764449689779e-06, "loss": 2.3164, "step": 6177 }, { "epoch": 0.3314377682403433, "grad_norm": 0.365234375, "learning_rate": 4.9827542643878615e-06, "loss": 2.2809, "step": 6178 }, { "epoch": 0.33149141630901285, "grad_norm": 0.416015625, "learning_rate": 4.982744076087761e-06, "loss": 2.4459, "step": 6179 }, { "epoch": 0.3315450643776824, "grad_norm": 0.49609375, "learning_rate": 4.98273388478949e-06, "loss": 2.2354, "step": 6180 }, { "epoch": 0.3315987124463519, "grad_norm": 0.419921875, "learning_rate": 4.9827236904930595e-06, "loss": 2.437, "step": 6181 }, { "epoch": 0.33165236051502145, "grad_norm": 0.435546875, "learning_rate": 4.982713493198482e-06, "loss": 2.1946, "step": 6182 }, { "epoch": 0.331706008583691, "grad_norm": 0.365234375, "learning_rate": 4.9827032929057715e-06, "loss": 2.269, "step": 6183 }, { "epoch": 0.3317596566523605, "grad_norm": 0.3671875, "learning_rate": 4.982693089614939e-06, "loss": 2.2461, "step": 6184 }, { "epoch": 0.33181330472103004, "grad_norm": 0.369140625, "learning_rate": 4.982682883325996e-06, "loss": 2.2304, "step": 6185 }, { "epoch": 0.3318669527896996, "grad_norm": 0.462890625, "learning_rate": 4.982672674038957e-06, "loss": 2.2716, "step": 6186 }, { "epoch": 0.3319206008583691, "grad_norm": 0.734375, "learning_rate": 4.982662461753833e-06, "loss": 2.0953, "step": 6187 }, { "epoch": 0.33197424892703864, "grad_norm": 0.412109375, "learning_rate": 4.9826522464706364e-06, "loss": 2.1803, "step": 6188 }, { "epoch": 0.33202789699570817, "grad_norm": 0.478515625, "learning_rate": 4.9826420281893796e-06, "loss": 2.0118, "step": 6189 }, { "epoch": 0.3320815450643777, "grad_norm": 0.416015625, "learning_rate": 4.982631806910075e-06, "loss": 2.1027, "step": 6190 }, { "epoch": 0.33213519313304724, "grad_norm": 0.490234375, "learning_rate": 4.982621582632735e-06, "loss": 2.4729, "step": 6191 }, { "epoch": 0.3321888412017167, "grad_norm": 0.353515625, "learning_rate": 4.982611355357372e-06, "loss": 2.2429, "step": 6192 }, { "epoch": 0.33224248927038624, "grad_norm": 0.51953125, "learning_rate": 4.982601125083997e-06, "loss": 2.4343, "step": 6193 }, { "epoch": 0.3322961373390558, "grad_norm": 0.427734375, "learning_rate": 4.982590891812625e-06, "loss": 2.4006, "step": 6194 }, { "epoch": 0.3323497854077253, "grad_norm": 0.49609375, "learning_rate": 4.982580655543267e-06, "loss": 2.3188, "step": 6195 }, { "epoch": 0.33240343347639484, "grad_norm": 0.373046875, "learning_rate": 4.982570416275934e-06, "loss": 2.3659, "step": 6196 }, { "epoch": 0.33245708154506437, "grad_norm": 0.388671875, "learning_rate": 4.982560174010641e-06, "loss": 2.3628, "step": 6197 }, { "epoch": 0.3325107296137339, "grad_norm": 0.302734375, "learning_rate": 4.982549928747398e-06, "loss": 2.1509, "step": 6198 }, { "epoch": 0.33256437768240343, "grad_norm": 0.39453125, "learning_rate": 4.982539680486219e-06, "loss": 2.3014, "step": 6199 }, { "epoch": 0.33261802575107297, "grad_norm": 0.369140625, "learning_rate": 4.982529429227115e-06, "loss": 2.2125, "step": 6200 }, { "epoch": 0.3326716738197425, "grad_norm": 0.41015625, "learning_rate": 4.9825191749701005e-06, "loss": 2.2714, "step": 6201 }, { "epoch": 0.33272532188841203, "grad_norm": 0.34765625, "learning_rate": 4.982508917715186e-06, "loss": 2.3071, "step": 6202 }, { "epoch": 0.33277896995708156, "grad_norm": 0.345703125, "learning_rate": 4.982498657462384e-06, "loss": 2.3179, "step": 6203 }, { "epoch": 0.3328326180257511, "grad_norm": 0.431640625, "learning_rate": 4.982488394211708e-06, "loss": 2.3073, "step": 6204 }, { "epoch": 0.3328862660944206, "grad_norm": 0.55078125, "learning_rate": 4.982478127963169e-06, "loss": 2.2443, "step": 6205 }, { "epoch": 0.33293991416309016, "grad_norm": 0.47265625, "learning_rate": 4.982467858716781e-06, "loss": 2.3316, "step": 6206 }, { "epoch": 0.33299356223175963, "grad_norm": 0.396484375, "learning_rate": 4.982457586472554e-06, "loss": 2.2676, "step": 6207 }, { "epoch": 0.33304721030042916, "grad_norm": 0.462890625, "learning_rate": 4.982447311230503e-06, "loss": 2.276, "step": 6208 }, { "epoch": 0.3331008583690987, "grad_norm": 0.5078125, "learning_rate": 4.982437032990639e-06, "loss": 2.2825, "step": 6209 }, { "epoch": 0.3331545064377682, "grad_norm": 0.3671875, "learning_rate": 4.982426751752976e-06, "loss": 2.1818, "step": 6210 }, { "epoch": 0.33320815450643776, "grad_norm": 0.416015625, "learning_rate": 4.982416467517523e-06, "loss": 2.2493, "step": 6211 }, { "epoch": 0.3332618025751073, "grad_norm": 0.353515625, "learning_rate": 4.982406180284296e-06, "loss": 2.1804, "step": 6212 }, { "epoch": 0.3333154506437768, "grad_norm": 0.44140625, "learning_rate": 4.9823958900533055e-06, "loss": 2.6195, "step": 6213 }, { "epoch": 0.33336909871244635, "grad_norm": 0.671875, "learning_rate": 4.982385596824565e-06, "loss": 2.4457, "step": 6214 }, { "epoch": 0.3334227467811159, "grad_norm": 0.361328125, "learning_rate": 4.9823753005980854e-06, "loss": 2.4828, "step": 6215 }, { "epoch": 0.3334763948497854, "grad_norm": 0.404296875, "learning_rate": 4.982365001373881e-06, "loss": 2.1065, "step": 6216 }, { "epoch": 0.33353004291845495, "grad_norm": 0.5390625, "learning_rate": 4.982354699151962e-06, "loss": 1.8164, "step": 6217 }, { "epoch": 0.3335836909871245, "grad_norm": 0.302734375, "learning_rate": 4.982344393932344e-06, "loss": 1.9037, "step": 6218 }, { "epoch": 0.333637339055794, "grad_norm": 0.419921875, "learning_rate": 4.982334085715036e-06, "loss": 2.3238, "step": 6219 }, { "epoch": 0.33369098712446355, "grad_norm": 0.47265625, "learning_rate": 4.982323774500053e-06, "loss": 2.4664, "step": 6220 }, { "epoch": 0.333744635193133, "grad_norm": 0.423828125, "learning_rate": 4.982313460287407e-06, "loss": 2.2352, "step": 6221 }, { "epoch": 0.33379828326180255, "grad_norm": 0.375, "learning_rate": 4.982303143077109e-06, "loss": 2.1402, "step": 6222 }, { "epoch": 0.3338519313304721, "grad_norm": 0.408203125, "learning_rate": 4.982292822869172e-06, "loss": 2.0841, "step": 6223 }, { "epoch": 0.3339055793991416, "grad_norm": 0.470703125, "learning_rate": 4.9822824996636095e-06, "loss": 2.2915, "step": 6224 }, { "epoch": 0.33395922746781115, "grad_norm": 0.41796875, "learning_rate": 4.982272173460434e-06, "loss": 2.0763, "step": 6225 }, { "epoch": 0.3340128755364807, "grad_norm": 0.3984375, "learning_rate": 4.982261844259656e-06, "loss": 2.1941, "step": 6226 }, { "epoch": 0.3340665236051502, "grad_norm": 0.41796875, "learning_rate": 4.9822515120612905e-06, "loss": 2.4662, "step": 6227 }, { "epoch": 0.33412017167381974, "grad_norm": 0.388671875, "learning_rate": 4.982241176865348e-06, "loss": 2.1588, "step": 6228 }, { "epoch": 0.3341738197424893, "grad_norm": 0.3984375, "learning_rate": 4.982230838671842e-06, "loss": 2.232, "step": 6229 }, { "epoch": 0.3342274678111588, "grad_norm": 0.41015625, "learning_rate": 4.982220497480784e-06, "loss": 2.1211, "step": 6230 }, { "epoch": 0.33428111587982834, "grad_norm": 0.44921875, "learning_rate": 4.9822101532921885e-06, "loss": 1.7361, "step": 6231 }, { "epoch": 0.33433476394849787, "grad_norm": 0.337890625, "learning_rate": 4.982199806106066e-06, "loss": 2.3473, "step": 6232 }, { "epoch": 0.3343884120171674, "grad_norm": 0.4375, "learning_rate": 4.98218945592243e-06, "loss": 2.2169, "step": 6233 }, { "epoch": 0.33444206008583693, "grad_norm": 0.392578125, "learning_rate": 4.982179102741293e-06, "loss": 2.1592, "step": 6234 }, { "epoch": 0.3344957081545064, "grad_norm": 0.384765625, "learning_rate": 4.9821687465626665e-06, "loss": 2.3095, "step": 6235 }, { "epoch": 0.33454935622317594, "grad_norm": 0.40625, "learning_rate": 4.982158387386564e-06, "loss": 2.2263, "step": 6236 }, { "epoch": 0.3346030042918455, "grad_norm": 0.40234375, "learning_rate": 4.982148025212998e-06, "loss": 2.434, "step": 6237 }, { "epoch": 0.334656652360515, "grad_norm": 0.375, "learning_rate": 4.982137660041981e-06, "loss": 2.4396, "step": 6238 }, { "epoch": 0.33471030042918454, "grad_norm": 0.427734375, "learning_rate": 4.982127291873524e-06, "loss": 2.0189, "step": 6239 }, { "epoch": 0.33476394849785407, "grad_norm": 0.392578125, "learning_rate": 4.9821169207076415e-06, "loss": 2.3757, "step": 6240 }, { "epoch": 0.3348175965665236, "grad_norm": 0.51953125, "learning_rate": 4.982106546544345e-06, "loss": 2.1437, "step": 6241 }, { "epoch": 0.33487124463519313, "grad_norm": 0.455078125, "learning_rate": 4.9820961693836476e-06, "loss": 1.8153, "step": 6242 }, { "epoch": 0.33492489270386266, "grad_norm": 0.390625, "learning_rate": 4.982085789225562e-06, "loss": 2.4534, "step": 6243 }, { "epoch": 0.3349785407725322, "grad_norm": 1.0859375, "learning_rate": 4.9820754060701e-06, "loss": 2.6412, "step": 6244 }, { "epoch": 0.33503218884120173, "grad_norm": 0.451171875, "learning_rate": 4.982065019917274e-06, "loss": 2.0847, "step": 6245 }, { "epoch": 0.33508583690987126, "grad_norm": 0.4765625, "learning_rate": 4.982054630767098e-06, "loss": 2.1131, "step": 6246 }, { "epoch": 0.3351394849785408, "grad_norm": 0.58984375, "learning_rate": 4.982044238619582e-06, "loss": 2.5332, "step": 6247 }, { "epoch": 0.3351931330472103, "grad_norm": 0.416015625, "learning_rate": 4.982033843474742e-06, "loss": 2.4738, "step": 6248 }, { "epoch": 0.33524678111587985, "grad_norm": 0.4296875, "learning_rate": 4.982023445332587e-06, "loss": 2.2144, "step": 6249 }, { "epoch": 0.33530042918454933, "grad_norm": 0.41796875, "learning_rate": 4.982013044193131e-06, "loss": 2.4081, "step": 6250 }, { "epoch": 0.33535407725321886, "grad_norm": 0.5546875, "learning_rate": 4.982002640056388e-06, "loss": 2.2374, "step": 6251 }, { "epoch": 0.3354077253218884, "grad_norm": 0.373046875, "learning_rate": 4.9819922329223694e-06, "loss": 2.4697, "step": 6252 }, { "epoch": 0.3354613733905579, "grad_norm": 0.451171875, "learning_rate": 4.981981822791087e-06, "loss": 2.6227, "step": 6253 }, { "epoch": 0.33551502145922746, "grad_norm": 0.44140625, "learning_rate": 4.981971409662554e-06, "loss": 2.3198, "step": 6254 }, { "epoch": 0.335568669527897, "grad_norm": 0.365234375, "learning_rate": 4.981960993536783e-06, "loss": 2.331, "step": 6255 }, { "epoch": 0.3356223175965665, "grad_norm": 0.3359375, "learning_rate": 4.981950574413787e-06, "loss": 2.4817, "step": 6256 }, { "epoch": 0.33567596566523605, "grad_norm": 0.388671875, "learning_rate": 4.981940152293578e-06, "loss": 2.2822, "step": 6257 }, { "epoch": 0.3357296137339056, "grad_norm": 0.55078125, "learning_rate": 4.981929727176169e-06, "loss": 2.5526, "step": 6258 }, { "epoch": 0.3357832618025751, "grad_norm": 0.65625, "learning_rate": 4.981919299061572e-06, "loss": 2.2303, "step": 6259 }, { "epoch": 0.33583690987124465, "grad_norm": 0.55078125, "learning_rate": 4.9819088679498e-06, "loss": 2.2919, "step": 6260 }, { "epoch": 0.3358905579399142, "grad_norm": 0.38671875, "learning_rate": 4.981898433840865e-06, "loss": 2.1403, "step": 6261 }, { "epoch": 0.3359442060085837, "grad_norm": 0.55859375, "learning_rate": 4.981887996734781e-06, "loss": 2.5637, "step": 6262 }, { "epoch": 0.33599785407725324, "grad_norm": 0.44140625, "learning_rate": 4.98187755663156e-06, "loss": 2.0781, "step": 6263 }, { "epoch": 0.3360515021459227, "grad_norm": 0.404296875, "learning_rate": 4.9818671135312135e-06, "loss": 2.488, "step": 6264 }, { "epoch": 0.33610515021459225, "grad_norm": 0.546875, "learning_rate": 4.9818566674337556e-06, "loss": 1.6521, "step": 6265 }, { "epoch": 0.3361587982832618, "grad_norm": 0.373046875, "learning_rate": 4.981846218339198e-06, "loss": 2.1666, "step": 6266 }, { "epoch": 0.3362124463519313, "grad_norm": 0.421875, "learning_rate": 4.981835766247554e-06, "loss": 2.4519, "step": 6267 }, { "epoch": 0.33626609442060085, "grad_norm": 0.384765625, "learning_rate": 4.981825311158835e-06, "loss": 2.4416, "step": 6268 }, { "epoch": 0.3363197424892704, "grad_norm": 0.37890625, "learning_rate": 4.981814853073055e-06, "loss": 2.2633, "step": 6269 }, { "epoch": 0.3363733905579399, "grad_norm": 0.35546875, "learning_rate": 4.981804391990226e-06, "loss": 2.136, "step": 6270 }, { "epoch": 0.33642703862660944, "grad_norm": 0.5234375, "learning_rate": 4.9817939279103605e-06, "loss": 2.2456, "step": 6271 }, { "epoch": 0.336480686695279, "grad_norm": 0.41015625, "learning_rate": 4.981783460833471e-06, "loss": 2.384, "step": 6272 }, { "epoch": 0.3365343347639485, "grad_norm": 0.412109375, "learning_rate": 4.981772990759572e-06, "loss": 2.0576, "step": 6273 }, { "epoch": 0.33658798283261804, "grad_norm": 0.4296875, "learning_rate": 4.981762517688673e-06, "loss": 2.4873, "step": 6274 }, { "epoch": 0.33664163090128757, "grad_norm": 0.34375, "learning_rate": 4.981752041620789e-06, "loss": 2.037, "step": 6275 }, { "epoch": 0.3366952789699571, "grad_norm": 0.4140625, "learning_rate": 4.981741562555932e-06, "loss": 2.0247, "step": 6276 }, { "epoch": 0.33674892703862663, "grad_norm": 3.140625, "learning_rate": 4.981731080494114e-06, "loss": 2.1542, "step": 6277 }, { "epoch": 0.33680257510729616, "grad_norm": 0.39453125, "learning_rate": 4.981720595435349e-06, "loss": 2.2077, "step": 6278 }, { "epoch": 0.33685622317596564, "grad_norm": 0.3984375, "learning_rate": 4.981710107379649e-06, "loss": 2.0963, "step": 6279 }, { "epoch": 0.3369098712446352, "grad_norm": 0.498046875, "learning_rate": 4.981699616327026e-06, "loss": 2.3866, "step": 6280 }, { "epoch": 0.3369635193133047, "grad_norm": 0.4453125, "learning_rate": 4.981689122277494e-06, "loss": 2.3376, "step": 6281 }, { "epoch": 0.33701716738197424, "grad_norm": 0.396484375, "learning_rate": 4.981678625231065e-06, "loss": 2.3608, "step": 6282 }, { "epoch": 0.33707081545064377, "grad_norm": 0.76171875, "learning_rate": 4.98166812518775e-06, "loss": 2.4434, "step": 6283 }, { "epoch": 0.3371244635193133, "grad_norm": 0.396484375, "learning_rate": 4.981657622147564e-06, "loss": 2.2546, "step": 6284 }, { "epoch": 0.33717811158798283, "grad_norm": 0.447265625, "learning_rate": 4.981647116110519e-06, "loss": 2.2607, "step": 6285 }, { "epoch": 0.33723175965665236, "grad_norm": 0.408203125, "learning_rate": 4.981636607076629e-06, "loss": 2.1374, "step": 6286 }, { "epoch": 0.3372854077253219, "grad_norm": 1.1015625, "learning_rate": 4.981626095045904e-06, "loss": 2.2434, "step": 6287 }, { "epoch": 0.3373390557939914, "grad_norm": 0.345703125, "learning_rate": 4.981615580018358e-06, "loss": 2.0257, "step": 6288 }, { "epoch": 0.33739270386266096, "grad_norm": 0.46484375, "learning_rate": 4.981605061994004e-06, "loss": 2.3571, "step": 6289 }, { "epoch": 0.3374463519313305, "grad_norm": 0.392578125, "learning_rate": 4.981594540972854e-06, "loss": 2.2189, "step": 6290 }, { "epoch": 0.3375, "grad_norm": 0.404296875, "learning_rate": 4.9815840169549216e-06, "loss": 2.3277, "step": 6291 }, { "epoch": 0.33755364806866955, "grad_norm": 0.44921875, "learning_rate": 4.981573489940219e-06, "loss": 2.4662, "step": 6292 }, { "epoch": 0.33760729613733903, "grad_norm": 0.70703125, "learning_rate": 4.981562959928759e-06, "loss": 2.1752, "step": 6293 }, { "epoch": 0.33766094420600856, "grad_norm": 0.3984375, "learning_rate": 4.981552426920554e-06, "loss": 1.9591, "step": 6294 }, { "epoch": 0.3377145922746781, "grad_norm": 0.546875, "learning_rate": 4.981541890915617e-06, "loss": 2.3386, "step": 6295 }, { "epoch": 0.3377682403433476, "grad_norm": 0.390625, "learning_rate": 4.981531351913961e-06, "loss": 2.3421, "step": 6296 }, { "epoch": 0.33782188841201716, "grad_norm": 0.376953125, "learning_rate": 4.981520809915598e-06, "loss": 2.2087, "step": 6297 }, { "epoch": 0.3378755364806867, "grad_norm": 0.51953125, "learning_rate": 4.9815102649205415e-06, "loss": 2.3017, "step": 6298 }, { "epoch": 0.3379291845493562, "grad_norm": 0.361328125, "learning_rate": 4.981499716928804e-06, "loss": 2.2444, "step": 6299 }, { "epoch": 0.33798283261802575, "grad_norm": 0.310546875, "learning_rate": 4.981489165940398e-06, "loss": 1.9625, "step": 6300 }, { "epoch": 0.3380364806866953, "grad_norm": 0.40625, "learning_rate": 4.981478611955336e-06, "loss": 2.1885, "step": 6301 }, { "epoch": 0.3380901287553648, "grad_norm": 0.330078125, "learning_rate": 4.981468054973631e-06, "loss": 2.0947, "step": 6302 }, { "epoch": 0.33814377682403435, "grad_norm": 0.46875, "learning_rate": 4.9814574949952974e-06, "loss": 2.3617, "step": 6303 }, { "epoch": 0.3381974248927039, "grad_norm": 0.86328125, "learning_rate": 4.981446932020345e-06, "loss": 2.3611, "step": 6304 }, { "epoch": 0.3382510729613734, "grad_norm": 0.70703125, "learning_rate": 4.9814363660487885e-06, "loss": 2.3331, "step": 6305 }, { "epoch": 0.33830472103004294, "grad_norm": 0.3359375, "learning_rate": 4.98142579708064e-06, "loss": 2.2648, "step": 6306 }, { "epoch": 0.3383583690987124, "grad_norm": 0.546875, "learning_rate": 4.981415225115913e-06, "loss": 2.2553, "step": 6307 }, { "epoch": 0.33841201716738195, "grad_norm": 0.447265625, "learning_rate": 4.981404650154619e-06, "loss": 2.2818, "step": 6308 }, { "epoch": 0.3384656652360515, "grad_norm": 0.4140625, "learning_rate": 4.981394072196772e-06, "loss": 2.1893, "step": 6309 }, { "epoch": 0.338519313304721, "grad_norm": 0.421875, "learning_rate": 4.981383491242383e-06, "loss": 1.9636, "step": 6310 }, { "epoch": 0.33857296137339055, "grad_norm": 0.40234375, "learning_rate": 4.981372907291468e-06, "loss": 2.5685, "step": 6311 }, { "epoch": 0.3386266094420601, "grad_norm": 0.427734375, "learning_rate": 4.981362320344036e-06, "loss": 2.5103, "step": 6312 }, { "epoch": 0.3386802575107296, "grad_norm": 0.37109375, "learning_rate": 4.981351730400102e-06, "loss": 2.4726, "step": 6313 }, { "epoch": 0.33873390557939914, "grad_norm": 0.359375, "learning_rate": 4.9813411374596785e-06, "loss": 2.1329, "step": 6314 }, { "epoch": 0.3387875536480687, "grad_norm": 0.53515625, "learning_rate": 4.981330541522778e-06, "loss": 1.7271, "step": 6315 }, { "epoch": 0.3388412017167382, "grad_norm": 0.359375, "learning_rate": 4.9813199425894136e-06, "loss": 2.1693, "step": 6316 }, { "epoch": 0.33889484978540774, "grad_norm": 0.345703125, "learning_rate": 4.981309340659598e-06, "loss": 2.3237, "step": 6317 }, { "epoch": 0.33894849785407727, "grad_norm": 0.390625, "learning_rate": 4.9812987357333444e-06, "loss": 2.491, "step": 6318 }, { "epoch": 0.3390021459227468, "grad_norm": 0.53515625, "learning_rate": 4.981288127810664e-06, "loss": 2.2532, "step": 6319 }, { "epoch": 0.33905579399141633, "grad_norm": 0.3671875, "learning_rate": 4.981277516891572e-06, "loss": 2.1667, "step": 6320 }, { "epoch": 0.33910944206008586, "grad_norm": 0.8046875, "learning_rate": 4.981266902976079e-06, "loss": 2.112, "step": 6321 }, { "epoch": 0.33916309012875534, "grad_norm": 0.349609375, "learning_rate": 4.981256286064199e-06, "loss": 2.1044, "step": 6322 }, { "epoch": 0.33921673819742487, "grad_norm": 0.419921875, "learning_rate": 4.981245666155945e-06, "loss": 2.1715, "step": 6323 }, { "epoch": 0.3392703862660944, "grad_norm": 0.419921875, "learning_rate": 4.981235043251329e-06, "loss": 2.2459, "step": 6324 }, { "epoch": 0.33932403433476394, "grad_norm": 0.447265625, "learning_rate": 4.981224417350364e-06, "loss": 2.3589, "step": 6325 }, { "epoch": 0.33937768240343347, "grad_norm": 0.37109375, "learning_rate": 4.981213788453064e-06, "loss": 2.3097, "step": 6326 }, { "epoch": 0.339431330472103, "grad_norm": 0.77734375, "learning_rate": 4.9812031565594406e-06, "loss": 2.3132, "step": 6327 }, { "epoch": 0.33948497854077253, "grad_norm": 0.3515625, "learning_rate": 4.981192521669507e-06, "loss": 2.6137, "step": 6328 }, { "epoch": 0.33953862660944206, "grad_norm": 0.400390625, "learning_rate": 4.981181883783275e-06, "loss": 2.2378, "step": 6329 }, { "epoch": 0.3395922746781116, "grad_norm": 0.416015625, "learning_rate": 4.98117124290076e-06, "loss": 2.3782, "step": 6330 }, { "epoch": 0.3396459227467811, "grad_norm": 0.37109375, "learning_rate": 4.981160599021973e-06, "loss": 2.1558, "step": 6331 }, { "epoch": 0.33969957081545066, "grad_norm": 0.4296875, "learning_rate": 4.981149952146927e-06, "loss": 2.2072, "step": 6332 }, { "epoch": 0.3397532188841202, "grad_norm": 0.404296875, "learning_rate": 4.981139302275634e-06, "loss": 2.2946, "step": 6333 }, { "epoch": 0.3398068669527897, "grad_norm": 0.408203125, "learning_rate": 4.981128649408109e-06, "loss": 2.2145, "step": 6334 }, { "epoch": 0.33986051502145925, "grad_norm": 0.37109375, "learning_rate": 4.9811179935443635e-06, "loss": 2.3604, "step": 6335 }, { "epoch": 0.33991416309012873, "grad_norm": 0.390625, "learning_rate": 4.98110733468441e-06, "loss": 2.2619, "step": 6336 }, { "epoch": 0.33996781115879826, "grad_norm": 0.3671875, "learning_rate": 4.981096672828263e-06, "loss": 2.0779, "step": 6337 }, { "epoch": 0.3400214592274678, "grad_norm": 0.419921875, "learning_rate": 4.981086007975934e-06, "loss": 2.2789, "step": 6338 }, { "epoch": 0.3400751072961373, "grad_norm": 0.341796875, "learning_rate": 4.981075340127436e-06, "loss": 2.2403, "step": 6339 }, { "epoch": 0.34012875536480686, "grad_norm": 0.486328125, "learning_rate": 4.981064669282782e-06, "loss": 2.2218, "step": 6340 }, { "epoch": 0.3401824034334764, "grad_norm": 0.369140625, "learning_rate": 4.981053995441985e-06, "loss": 2.1174, "step": 6341 }, { "epoch": 0.3402360515021459, "grad_norm": 0.388671875, "learning_rate": 4.9810433186050586e-06, "loss": 2.4048, "step": 6342 }, { "epoch": 0.34028969957081545, "grad_norm": 0.451171875, "learning_rate": 4.981032638772014e-06, "loss": 2.1118, "step": 6343 }, { "epoch": 0.340343347639485, "grad_norm": 0.3828125, "learning_rate": 4.981021955942866e-06, "loss": 2.4515, "step": 6344 }, { "epoch": 0.3403969957081545, "grad_norm": 0.431640625, "learning_rate": 4.9810112701176265e-06, "loss": 2.2746, "step": 6345 }, { "epoch": 0.34045064377682405, "grad_norm": 2.53125, "learning_rate": 4.981000581296308e-06, "loss": 1.8443, "step": 6346 }, { "epoch": 0.3405042918454936, "grad_norm": 0.3671875, "learning_rate": 4.980989889478924e-06, "loss": 2.0696, "step": 6347 }, { "epoch": 0.3405579399141631, "grad_norm": 0.42578125, "learning_rate": 4.9809791946654875e-06, "loss": 2.3379, "step": 6348 }, { "epoch": 0.34061158798283264, "grad_norm": 0.3828125, "learning_rate": 4.980968496856011e-06, "loss": 2.6259, "step": 6349 }, { "epoch": 0.3406652360515021, "grad_norm": 0.423828125, "learning_rate": 4.980957796050507e-06, "loss": 2.2757, "step": 6350 }, { "epoch": 0.34071888412017165, "grad_norm": 0.40625, "learning_rate": 4.980947092248991e-06, "loss": 2.4966, "step": 6351 }, { "epoch": 0.3407725321888412, "grad_norm": 0.703125, "learning_rate": 4.980936385451472e-06, "loss": 2.2821, "step": 6352 }, { "epoch": 0.3408261802575107, "grad_norm": 0.380859375, "learning_rate": 4.980925675657966e-06, "loss": 2.2872, "step": 6353 }, { "epoch": 0.34087982832618025, "grad_norm": 0.46875, "learning_rate": 4.980914962868485e-06, "loss": 2.3598, "step": 6354 }, { "epoch": 0.3409334763948498, "grad_norm": 0.40625, "learning_rate": 4.980904247083041e-06, "loss": 2.4833, "step": 6355 }, { "epoch": 0.3409871244635193, "grad_norm": 0.3671875, "learning_rate": 4.980893528301648e-06, "loss": 2.3833, "step": 6356 }, { "epoch": 0.34104077253218884, "grad_norm": 0.41015625, "learning_rate": 4.980882806524319e-06, "loss": 2.2885, "step": 6357 }, { "epoch": 0.3410944206008584, "grad_norm": 0.43359375, "learning_rate": 4.9808720817510665e-06, "loss": 2.6119, "step": 6358 }, { "epoch": 0.3411480686695279, "grad_norm": 0.5234375, "learning_rate": 4.980861353981904e-06, "loss": 2.6435, "step": 6359 }, { "epoch": 0.34120171673819744, "grad_norm": 0.41015625, "learning_rate": 4.980850623216843e-06, "loss": 2.299, "step": 6360 }, { "epoch": 0.34125536480686697, "grad_norm": 0.431640625, "learning_rate": 4.9808398894558985e-06, "loss": 2.3774, "step": 6361 }, { "epoch": 0.3413090128755365, "grad_norm": 0.357421875, "learning_rate": 4.980829152699081e-06, "loss": 2.1218, "step": 6362 }, { "epoch": 0.34136266094420603, "grad_norm": 0.37109375, "learning_rate": 4.980818412946407e-06, "loss": 2.3273, "step": 6363 }, { "epoch": 0.34141630901287556, "grad_norm": 0.40625, "learning_rate": 4.9808076701978854e-06, "loss": 2.3034, "step": 6364 }, { "epoch": 0.34146995708154504, "grad_norm": 0.423828125, "learning_rate": 4.980796924453533e-06, "loss": 2.6145, "step": 6365 }, { "epoch": 0.34152360515021457, "grad_norm": 0.443359375, "learning_rate": 4.9807861757133595e-06, "loss": 2.4874, "step": 6366 }, { "epoch": 0.3415772532188841, "grad_norm": 0.5078125, "learning_rate": 4.98077542397738e-06, "loss": 2.1818, "step": 6367 }, { "epoch": 0.34163090128755363, "grad_norm": 0.384765625, "learning_rate": 4.980764669245607e-06, "loss": 2.3044, "step": 6368 }, { "epoch": 0.34168454935622317, "grad_norm": 0.51953125, "learning_rate": 4.980753911518052e-06, "loss": 2.3247, "step": 6369 }, { "epoch": 0.3417381974248927, "grad_norm": 0.34765625, "learning_rate": 4.980743150794731e-06, "loss": 2.3258, "step": 6370 }, { "epoch": 0.34179184549356223, "grad_norm": 0.3828125, "learning_rate": 4.980732387075654e-06, "loss": 2.2604, "step": 6371 }, { "epoch": 0.34184549356223176, "grad_norm": 0.373046875, "learning_rate": 4.980721620360836e-06, "loss": 2.0988, "step": 6372 }, { "epoch": 0.3418991416309013, "grad_norm": 0.69140625, "learning_rate": 4.980710850650289e-06, "loss": 2.3415, "step": 6373 }, { "epoch": 0.3419527896995708, "grad_norm": 0.416015625, "learning_rate": 4.9807000779440265e-06, "loss": 2.179, "step": 6374 }, { "epoch": 0.34200643776824036, "grad_norm": 0.443359375, "learning_rate": 4.980689302242061e-06, "loss": 2.3506, "step": 6375 }, { "epoch": 0.3420600858369099, "grad_norm": 0.52734375, "learning_rate": 4.980678523544406e-06, "loss": 2.2638, "step": 6376 }, { "epoch": 0.3421137339055794, "grad_norm": 0.373046875, "learning_rate": 4.980667741851074e-06, "loss": 2.1076, "step": 6377 }, { "epoch": 0.34216738197424895, "grad_norm": 0.375, "learning_rate": 4.980656957162079e-06, "loss": 2.303, "step": 6378 }, { "epoch": 0.34222103004291843, "grad_norm": 0.51953125, "learning_rate": 4.980646169477432e-06, "loss": 2.1318, "step": 6379 }, { "epoch": 0.34227467811158796, "grad_norm": 0.431640625, "learning_rate": 4.980635378797148e-06, "loss": 2.3494, "step": 6380 }, { "epoch": 0.3423283261802575, "grad_norm": 0.38671875, "learning_rate": 4.98062458512124e-06, "loss": 2.3982, "step": 6381 }, { "epoch": 0.342381974248927, "grad_norm": 0.37109375, "learning_rate": 4.980613788449721e-06, "loss": 2.3793, "step": 6382 }, { "epoch": 0.34243562231759656, "grad_norm": 0.365234375, "learning_rate": 4.980602988782602e-06, "loss": 1.78, "step": 6383 }, { "epoch": 0.3424892703862661, "grad_norm": 0.5859375, "learning_rate": 4.980592186119899e-06, "loss": 2.1369, "step": 6384 }, { "epoch": 0.3425429184549356, "grad_norm": 0.369140625, "learning_rate": 4.980581380461622e-06, "loss": 2.2529, "step": 6385 }, { "epoch": 0.34259656652360515, "grad_norm": 0.431640625, "learning_rate": 4.980570571807787e-06, "loss": 2.5586, "step": 6386 }, { "epoch": 0.3426502145922747, "grad_norm": 0.462890625, "learning_rate": 4.980559760158404e-06, "loss": 2.4511, "step": 6387 }, { "epoch": 0.3427038626609442, "grad_norm": 0.54296875, "learning_rate": 4.980548945513489e-06, "loss": 2.4863, "step": 6388 }, { "epoch": 0.34275751072961375, "grad_norm": 0.515625, "learning_rate": 4.980538127873054e-06, "loss": 2.0726, "step": 6389 }, { "epoch": 0.3428111587982833, "grad_norm": 0.408203125, "learning_rate": 4.980527307237112e-06, "loss": 2.3548, "step": 6390 }, { "epoch": 0.3428648068669528, "grad_norm": 0.419921875, "learning_rate": 4.980516483605675e-06, "loss": 2.2894, "step": 6391 }, { "epoch": 0.34291845493562234, "grad_norm": 0.369140625, "learning_rate": 4.980505656978758e-06, "loss": 2.2732, "step": 6392 }, { "epoch": 0.3429721030042919, "grad_norm": 0.54296875, "learning_rate": 4.980494827356372e-06, "loss": 1.5462, "step": 6393 }, { "epoch": 0.34302575107296135, "grad_norm": 0.328125, "learning_rate": 4.980483994738532e-06, "loss": 2.151, "step": 6394 }, { "epoch": 0.3430793991416309, "grad_norm": 0.37109375, "learning_rate": 4.98047315912525e-06, "loss": 2.1465, "step": 6395 }, { "epoch": 0.3431330472103004, "grad_norm": 0.33203125, "learning_rate": 4.980462320516539e-06, "loss": 2.314, "step": 6396 }, { "epoch": 0.34318669527896994, "grad_norm": 0.384765625, "learning_rate": 4.980451478912412e-06, "loss": 2.2561, "step": 6397 }, { "epoch": 0.3432403433476395, "grad_norm": 0.3828125, "learning_rate": 4.980440634312883e-06, "loss": 2.2415, "step": 6398 }, { "epoch": 0.343293991416309, "grad_norm": 0.4765625, "learning_rate": 4.9804297867179655e-06, "loss": 2.475, "step": 6399 }, { "epoch": 0.34334763948497854, "grad_norm": 0.412109375, "learning_rate": 4.980418936127671e-06, "loss": 2.3691, "step": 6400 }, { "epoch": 0.34340128755364807, "grad_norm": 0.5234375, "learning_rate": 4.980408082542012e-06, "loss": 2.3373, "step": 6401 }, { "epoch": 0.3434549356223176, "grad_norm": 0.8125, "learning_rate": 4.980397225961005e-06, "loss": 2.3909, "step": 6402 }, { "epoch": 0.34350858369098713, "grad_norm": 0.53515625, "learning_rate": 4.980386366384659e-06, "loss": 2.248, "step": 6403 }, { "epoch": 0.34356223175965667, "grad_norm": 0.76953125, "learning_rate": 4.980375503812991e-06, "loss": 2.1161, "step": 6404 }, { "epoch": 0.3436158798283262, "grad_norm": 0.45703125, "learning_rate": 4.980364638246011e-06, "loss": 2.361, "step": 6405 }, { "epoch": 0.34366952789699573, "grad_norm": 0.345703125, "learning_rate": 4.980353769683734e-06, "loss": 2.0882, "step": 6406 }, { "epoch": 0.34372317596566526, "grad_norm": 0.380859375, "learning_rate": 4.980342898126172e-06, "loss": 2.2048, "step": 6407 }, { "epoch": 0.34377682403433474, "grad_norm": 0.546875, "learning_rate": 4.98033202357334e-06, "loss": 2.228, "step": 6408 }, { "epoch": 0.34383047210300427, "grad_norm": 0.439453125, "learning_rate": 4.980321146025249e-06, "loss": 2.355, "step": 6409 }, { "epoch": 0.3438841201716738, "grad_norm": 0.443359375, "learning_rate": 4.980310265481912e-06, "loss": 2.1425, "step": 6410 }, { "epoch": 0.34393776824034333, "grad_norm": 0.396484375, "learning_rate": 4.980299381943343e-06, "loss": 2.3956, "step": 6411 }, { "epoch": 0.34399141630901287, "grad_norm": 0.421875, "learning_rate": 4.980288495409556e-06, "loss": 2.4367, "step": 6412 }, { "epoch": 0.3440450643776824, "grad_norm": 0.3984375, "learning_rate": 4.980277605880563e-06, "loss": 2.402, "step": 6413 }, { "epoch": 0.34409871244635193, "grad_norm": 0.47265625, "learning_rate": 4.980266713356378e-06, "loss": 2.5473, "step": 6414 }, { "epoch": 0.34415236051502146, "grad_norm": 0.44140625, "learning_rate": 4.980255817837013e-06, "loss": 2.3422, "step": 6415 }, { "epoch": 0.344206008583691, "grad_norm": 0.423828125, "learning_rate": 4.9802449193224826e-06, "loss": 2.2635, "step": 6416 }, { "epoch": 0.3442596566523605, "grad_norm": 0.400390625, "learning_rate": 4.980234017812799e-06, "loss": 2.2819, "step": 6417 }, { "epoch": 0.34431330472103006, "grad_norm": 0.369140625, "learning_rate": 4.980223113307974e-06, "loss": 2.2514, "step": 6418 }, { "epoch": 0.3443669527896996, "grad_norm": 0.45703125, "learning_rate": 4.980212205808024e-06, "loss": 2.3496, "step": 6419 }, { "epoch": 0.3444206008583691, "grad_norm": 0.375, "learning_rate": 4.9802012953129595e-06, "loss": 2.4053, "step": 6420 }, { "epoch": 0.34447424892703865, "grad_norm": 0.416015625, "learning_rate": 4.980190381822795e-06, "loss": 2.5989, "step": 6421 }, { "epoch": 0.3445278969957081, "grad_norm": 0.388671875, "learning_rate": 4.980179465337542e-06, "loss": 2.4138, "step": 6422 }, { "epoch": 0.34458154506437766, "grad_norm": 0.380859375, "learning_rate": 4.980168545857217e-06, "loss": 1.9399, "step": 6423 }, { "epoch": 0.3446351931330472, "grad_norm": 0.400390625, "learning_rate": 4.980157623381831e-06, "loss": 2.507, "step": 6424 }, { "epoch": 0.3446888412017167, "grad_norm": 0.431640625, "learning_rate": 4.980146697911396e-06, "loss": 2.2255, "step": 6425 }, { "epoch": 0.34474248927038625, "grad_norm": 0.4921875, "learning_rate": 4.980135769445928e-06, "loss": 2.2857, "step": 6426 }, { "epoch": 0.3447961373390558, "grad_norm": 0.4375, "learning_rate": 4.9801248379854375e-06, "loss": 2.3759, "step": 6427 }, { "epoch": 0.3448497854077253, "grad_norm": 0.412109375, "learning_rate": 4.980113903529939e-06, "loss": 2.2865, "step": 6428 }, { "epoch": 0.34490343347639485, "grad_norm": 0.40625, "learning_rate": 4.9801029660794465e-06, "loss": 2.1587, "step": 6429 }, { "epoch": 0.3449570815450644, "grad_norm": 0.435546875, "learning_rate": 4.980092025633971e-06, "loss": 2.5009, "step": 6430 }, { "epoch": 0.3450107296137339, "grad_norm": 0.388671875, "learning_rate": 4.980081082193529e-06, "loss": 2.2528, "step": 6431 }, { "epoch": 0.34506437768240344, "grad_norm": 0.42578125, "learning_rate": 4.980070135758131e-06, "loss": 2.3081, "step": 6432 }, { "epoch": 0.345118025751073, "grad_norm": 0.40625, "learning_rate": 4.980059186327791e-06, "loss": 2.3071, "step": 6433 }, { "epoch": 0.3451716738197425, "grad_norm": 0.45703125, "learning_rate": 4.980048233902521e-06, "loss": 2.2502, "step": 6434 }, { "epoch": 0.34522532188841204, "grad_norm": 0.365234375, "learning_rate": 4.980037278482337e-06, "loss": 2.4057, "step": 6435 }, { "epoch": 0.34527896995708157, "grad_norm": 0.330078125, "learning_rate": 4.98002632006725e-06, "loss": 2.2462, "step": 6436 }, { "epoch": 0.34533261802575105, "grad_norm": 0.5, "learning_rate": 4.9800153586572745e-06, "loss": 2.1818, "step": 6437 }, { "epoch": 0.3453862660944206, "grad_norm": 0.42578125, "learning_rate": 4.980004394252422e-06, "loss": 2.323, "step": 6438 }, { "epoch": 0.3454399141630901, "grad_norm": 0.43359375, "learning_rate": 4.979993426852708e-06, "loss": 2.362, "step": 6439 }, { "epoch": 0.34549356223175964, "grad_norm": 0.373046875, "learning_rate": 4.9799824564581445e-06, "loss": 2.4948, "step": 6440 }, { "epoch": 0.3455472103004292, "grad_norm": 0.427734375, "learning_rate": 4.979971483068745e-06, "loss": 2.2637, "step": 6441 }, { "epoch": 0.3456008583690987, "grad_norm": 0.61328125, "learning_rate": 4.979960506684522e-06, "loss": 2.3037, "step": 6442 }, { "epoch": 0.34565450643776824, "grad_norm": 0.390625, "learning_rate": 4.97994952730549e-06, "loss": 2.3596, "step": 6443 }, { "epoch": 0.34570815450643777, "grad_norm": 0.3984375, "learning_rate": 4.979938544931661e-06, "loss": 2.3323, "step": 6444 }, { "epoch": 0.3457618025751073, "grad_norm": 0.43359375, "learning_rate": 4.97992755956305e-06, "loss": 2.2467, "step": 6445 }, { "epoch": 0.34581545064377683, "grad_norm": 0.404296875, "learning_rate": 4.979916571199668e-06, "loss": 2.3108, "step": 6446 }, { "epoch": 0.34586909871244637, "grad_norm": 0.6328125, "learning_rate": 4.97990557984153e-06, "loss": 2.4107, "step": 6447 }, { "epoch": 0.3459227467811159, "grad_norm": 0.4296875, "learning_rate": 4.979894585488649e-06, "loss": 2.4041, "step": 6448 }, { "epoch": 0.34597639484978543, "grad_norm": 0.400390625, "learning_rate": 4.9798835881410375e-06, "loss": 2.0966, "step": 6449 }, { "epoch": 0.34603004291845496, "grad_norm": 0.3671875, "learning_rate": 4.97987258779871e-06, "loss": 2.4556, "step": 6450 }, { "epoch": 0.34608369098712444, "grad_norm": 0.439453125, "learning_rate": 4.979861584461679e-06, "loss": 2.0538, "step": 6451 }, { "epoch": 0.34613733905579397, "grad_norm": 0.419921875, "learning_rate": 4.9798505781299565e-06, "loss": 2.1759, "step": 6452 }, { "epoch": 0.3461909871244635, "grad_norm": 0.337890625, "learning_rate": 4.979839568803559e-06, "loss": 2.1319, "step": 6453 }, { "epoch": 0.34624463519313303, "grad_norm": 0.427734375, "learning_rate": 4.979828556482496e-06, "loss": 2.2646, "step": 6454 }, { "epoch": 0.34629828326180256, "grad_norm": 0.40234375, "learning_rate": 4.9798175411667835e-06, "loss": 2.2209, "step": 6455 }, { "epoch": 0.3463519313304721, "grad_norm": 2.140625, "learning_rate": 4.979806522856435e-06, "loss": 2.2363, "step": 6456 }, { "epoch": 0.3464055793991416, "grad_norm": 0.4765625, "learning_rate": 4.979795501551462e-06, "loss": 2.4007, "step": 6457 }, { "epoch": 0.34645922746781116, "grad_norm": 0.4140625, "learning_rate": 4.9797844772518785e-06, "loss": 2.4319, "step": 6458 }, { "epoch": 0.3465128755364807, "grad_norm": 0.369140625, "learning_rate": 4.9797734499576985e-06, "loss": 2.084, "step": 6459 }, { "epoch": 0.3465665236051502, "grad_norm": 0.396484375, "learning_rate": 4.979762419668934e-06, "loss": 2.2484, "step": 6460 }, { "epoch": 0.34662017167381975, "grad_norm": 0.412109375, "learning_rate": 4.9797513863856005e-06, "loss": 2.1538, "step": 6461 }, { "epoch": 0.3466738197424893, "grad_norm": 0.408203125, "learning_rate": 4.979740350107709e-06, "loss": 2.245, "step": 6462 }, { "epoch": 0.3467274678111588, "grad_norm": 0.478515625, "learning_rate": 4.979729310835274e-06, "loss": 2.2194, "step": 6463 }, { "epoch": 0.34678111587982835, "grad_norm": 0.376953125, "learning_rate": 4.979718268568308e-06, "loss": 2.4822, "step": 6464 }, { "epoch": 0.3468347639484979, "grad_norm": 0.412109375, "learning_rate": 4.979707223306825e-06, "loss": 2.3541, "step": 6465 }, { "epoch": 0.34688841201716736, "grad_norm": 0.3984375, "learning_rate": 4.979696175050839e-06, "loss": 2.2467, "step": 6466 }, { "epoch": 0.3469420600858369, "grad_norm": 0.4609375, "learning_rate": 4.9796851238003626e-06, "loss": 2.268, "step": 6467 }, { "epoch": 0.3469957081545064, "grad_norm": 0.40234375, "learning_rate": 4.9796740695554095e-06, "loss": 2.0618, "step": 6468 }, { "epoch": 0.34704935622317595, "grad_norm": 0.65625, "learning_rate": 4.979663012315991e-06, "loss": 2.2221, "step": 6469 }, { "epoch": 0.3471030042918455, "grad_norm": 0.423828125, "learning_rate": 4.979651952082123e-06, "loss": 2.3537, "step": 6470 }, { "epoch": 0.347156652360515, "grad_norm": 0.40234375, "learning_rate": 4.979640888853818e-06, "loss": 2.5463, "step": 6471 }, { "epoch": 0.34721030042918455, "grad_norm": 0.37109375, "learning_rate": 4.97962982263109e-06, "loss": 2.2813, "step": 6472 }, { "epoch": 0.3472639484978541, "grad_norm": 0.353515625, "learning_rate": 4.979618753413952e-06, "loss": 2.2056, "step": 6473 }, { "epoch": 0.3473175965665236, "grad_norm": 0.451171875, "learning_rate": 4.979607681202415e-06, "loss": 2.2139, "step": 6474 }, { "epoch": 0.34737124463519314, "grad_norm": 0.41015625, "learning_rate": 4.979596605996497e-06, "loss": 2.4119, "step": 6475 }, { "epoch": 0.3474248927038627, "grad_norm": 0.369140625, "learning_rate": 4.979585527796207e-06, "loss": 2.3119, "step": 6476 }, { "epoch": 0.3474785407725322, "grad_norm": 0.75, "learning_rate": 4.9795744466015615e-06, "loss": 2.3023, "step": 6477 }, { "epoch": 0.34753218884120174, "grad_norm": 0.37109375, "learning_rate": 4.9795633624125716e-06, "loss": 2.2497, "step": 6478 }, { "epoch": 0.34758583690987127, "grad_norm": 2.234375, "learning_rate": 4.979552275229252e-06, "loss": 2.3562, "step": 6479 }, { "epoch": 0.34763948497854075, "grad_norm": 0.40234375, "learning_rate": 4.979541185051616e-06, "loss": 2.2883, "step": 6480 }, { "epoch": 0.3476931330472103, "grad_norm": 0.3984375, "learning_rate": 4.979530091879677e-06, "loss": 2.3261, "step": 6481 }, { "epoch": 0.3477467811158798, "grad_norm": 0.38671875, "learning_rate": 4.979518995713448e-06, "loss": 2.1198, "step": 6482 }, { "epoch": 0.34780042918454934, "grad_norm": 0.40234375, "learning_rate": 4.979507896552942e-06, "loss": 2.1769, "step": 6483 }, { "epoch": 0.3478540772532189, "grad_norm": 0.37890625, "learning_rate": 4.979496794398174e-06, "loss": 2.2345, "step": 6484 }, { "epoch": 0.3479077253218884, "grad_norm": 0.33984375, "learning_rate": 4.979485689249155e-06, "loss": 2.2612, "step": 6485 }, { "epoch": 0.34796137339055794, "grad_norm": 0.326171875, "learning_rate": 4.979474581105901e-06, "loss": 1.8923, "step": 6486 }, { "epoch": 0.34801502145922747, "grad_norm": 0.5234375, "learning_rate": 4.979463469968424e-06, "loss": 2.3103, "step": 6487 }, { "epoch": 0.348068669527897, "grad_norm": 0.625, "learning_rate": 4.979452355836737e-06, "loss": 2.1151, "step": 6488 }, { "epoch": 0.34812231759656653, "grad_norm": 0.392578125, "learning_rate": 4.979441238710855e-06, "loss": 2.2843, "step": 6489 }, { "epoch": 0.34817596566523606, "grad_norm": 0.390625, "learning_rate": 4.97943011859079e-06, "loss": 2.2024, "step": 6490 }, { "epoch": 0.3482296137339056, "grad_norm": 0.373046875, "learning_rate": 4.979418995476555e-06, "loss": 2.406, "step": 6491 }, { "epoch": 0.34828326180257513, "grad_norm": 0.51171875, "learning_rate": 4.979407869368165e-06, "loss": 2.1829, "step": 6492 }, { "epoch": 0.34833690987124466, "grad_norm": 0.384765625, "learning_rate": 4.979396740265633e-06, "loss": 2.1728, "step": 6493 }, { "epoch": 0.34839055793991414, "grad_norm": 0.474609375, "learning_rate": 4.9793856081689725e-06, "loss": 2.2377, "step": 6494 }, { "epoch": 0.34844420600858367, "grad_norm": 1.0859375, "learning_rate": 4.979374473078196e-06, "loss": 2.4536, "step": 6495 }, { "epoch": 0.3484978540772532, "grad_norm": 0.400390625, "learning_rate": 4.9793633349933176e-06, "loss": 2.3029, "step": 6496 }, { "epoch": 0.34855150214592273, "grad_norm": 0.47265625, "learning_rate": 4.9793521939143516e-06, "loss": 2.2043, "step": 6497 }, { "epoch": 0.34860515021459226, "grad_norm": 0.373046875, "learning_rate": 4.97934104984131e-06, "loss": 2.5196, "step": 6498 }, { "epoch": 0.3486587982832618, "grad_norm": 0.8046875, "learning_rate": 4.979329902774206e-06, "loss": 2.411, "step": 6499 }, { "epoch": 0.3487124463519313, "grad_norm": 0.482421875, "learning_rate": 4.9793187527130545e-06, "loss": 2.1659, "step": 6500 }, { "epoch": 0.34876609442060086, "grad_norm": 0.34375, "learning_rate": 4.979307599657869e-06, "loss": 2.0299, "step": 6501 }, { "epoch": 0.3488197424892704, "grad_norm": 1.3203125, "learning_rate": 4.9792964436086625e-06, "loss": 2.3858, "step": 6502 }, { "epoch": 0.3488733905579399, "grad_norm": 0.4140625, "learning_rate": 4.979285284565447e-06, "loss": 1.9854, "step": 6503 }, { "epoch": 0.34892703862660945, "grad_norm": 0.47265625, "learning_rate": 4.979274122528238e-06, "loss": 2.215, "step": 6504 }, { "epoch": 0.348980686695279, "grad_norm": 0.470703125, "learning_rate": 4.979262957497049e-06, "loss": 2.2416, "step": 6505 }, { "epoch": 0.3490343347639485, "grad_norm": 0.412109375, "learning_rate": 4.9792517894718925e-06, "loss": 2.4079, "step": 6506 }, { "epoch": 0.34908798283261805, "grad_norm": 0.353515625, "learning_rate": 4.979240618452782e-06, "loss": 2.1287, "step": 6507 }, { "epoch": 0.3491416309012876, "grad_norm": 0.3828125, "learning_rate": 4.979229444439731e-06, "loss": 2.4231, "step": 6508 }, { "epoch": 0.34919527896995706, "grad_norm": 0.45703125, "learning_rate": 4.9792182674327525e-06, "loss": 2.1285, "step": 6509 }, { "epoch": 0.3492489270386266, "grad_norm": 0.4609375, "learning_rate": 4.979207087431862e-06, "loss": 2.3536, "step": 6510 }, { "epoch": 0.3493025751072961, "grad_norm": 0.56640625, "learning_rate": 4.979195904437072e-06, "loss": 2.0962, "step": 6511 }, { "epoch": 0.34935622317596565, "grad_norm": 0.5234375, "learning_rate": 4.979184718448394e-06, "loss": 2.4187, "step": 6512 }, { "epoch": 0.3494098712446352, "grad_norm": 0.369140625, "learning_rate": 4.979173529465845e-06, "loss": 2.3468, "step": 6513 }, { "epoch": 0.3494635193133047, "grad_norm": 0.384765625, "learning_rate": 4.979162337489436e-06, "loss": 2.3336, "step": 6514 }, { "epoch": 0.34951716738197425, "grad_norm": 0.41015625, "learning_rate": 4.979151142519181e-06, "loss": 2.2886, "step": 6515 }, { "epoch": 0.3495708154506438, "grad_norm": 0.376953125, "learning_rate": 4.979139944555095e-06, "loss": 2.2207, "step": 6516 }, { "epoch": 0.3496244635193133, "grad_norm": 0.357421875, "learning_rate": 4.979128743597189e-06, "loss": 2.2407, "step": 6517 }, { "epoch": 0.34967811158798284, "grad_norm": 0.40234375, "learning_rate": 4.979117539645479e-06, "loss": 2.3377, "step": 6518 }, { "epoch": 0.3497317596566524, "grad_norm": 0.49609375, "learning_rate": 4.979106332699975e-06, "loss": 2.3621, "step": 6519 }, { "epoch": 0.3497854077253219, "grad_norm": 0.37890625, "learning_rate": 4.979095122760695e-06, "loss": 2.0159, "step": 6520 }, { "epoch": 0.34983905579399144, "grad_norm": 0.421875, "learning_rate": 4.97908390982765e-06, "loss": 2.1022, "step": 6521 }, { "epoch": 0.34989270386266097, "grad_norm": 0.439453125, "learning_rate": 4.979072693900854e-06, "loss": 2.2935, "step": 6522 }, { "epoch": 0.34994635193133045, "grad_norm": 0.353515625, "learning_rate": 4.9790614749803205e-06, "loss": 2.2659, "step": 6523 }, { "epoch": 0.35, "grad_norm": 0.376953125, "learning_rate": 4.979050253066064e-06, "loss": 2.2481, "step": 6524 }, { "epoch": 0.3500536480686695, "grad_norm": 0.4609375, "learning_rate": 4.979039028158095e-06, "loss": 2.4613, "step": 6525 }, { "epoch": 0.35010729613733904, "grad_norm": 2.421875, "learning_rate": 4.979027800256431e-06, "loss": 2.1693, "step": 6526 }, { "epoch": 0.3501609442060086, "grad_norm": 0.359375, "learning_rate": 4.979016569361083e-06, "loss": 2.267, "step": 6527 }, { "epoch": 0.3502145922746781, "grad_norm": 0.427734375, "learning_rate": 4.9790053354720655e-06, "loss": 2.3672, "step": 6528 }, { "epoch": 0.35026824034334764, "grad_norm": 0.458984375, "learning_rate": 4.9789940985893914e-06, "loss": 2.1698, "step": 6529 }, { "epoch": 0.35032188841201717, "grad_norm": 0.392578125, "learning_rate": 4.978982858713075e-06, "loss": 2.1544, "step": 6530 }, { "epoch": 0.3503755364806867, "grad_norm": 0.421875, "learning_rate": 4.97897161584313e-06, "loss": 2.174, "step": 6531 }, { "epoch": 0.35042918454935623, "grad_norm": 0.392578125, "learning_rate": 4.978960369979569e-06, "loss": 2.0086, "step": 6532 }, { "epoch": 0.35048283261802576, "grad_norm": 0.396484375, "learning_rate": 4.978949121122407e-06, "loss": 2.5183, "step": 6533 }, { "epoch": 0.3505364806866953, "grad_norm": 0.5390625, "learning_rate": 4.978937869271656e-06, "loss": 2.2208, "step": 6534 }, { "epoch": 0.3505901287553648, "grad_norm": 0.478515625, "learning_rate": 4.978926614427331e-06, "loss": 2.3228, "step": 6535 }, { "epoch": 0.35064377682403436, "grad_norm": 0.396484375, "learning_rate": 4.9789153565894436e-06, "loss": 2.4722, "step": 6536 }, { "epoch": 0.35069742489270384, "grad_norm": 0.466796875, "learning_rate": 4.97890409575801e-06, "loss": 2.2423, "step": 6537 }, { "epoch": 0.35075107296137337, "grad_norm": 0.349609375, "learning_rate": 4.978892831933042e-06, "loss": 2.0805, "step": 6538 }, { "epoch": 0.3508047210300429, "grad_norm": 0.447265625, "learning_rate": 4.978881565114554e-06, "loss": 2.1613, "step": 6539 }, { "epoch": 0.35085836909871243, "grad_norm": 0.412109375, "learning_rate": 4.9788702953025596e-06, "loss": 2.2941, "step": 6540 }, { "epoch": 0.35091201716738196, "grad_norm": 0.419921875, "learning_rate": 4.978859022497071e-06, "loss": 2.2918, "step": 6541 }, { "epoch": 0.3509656652360515, "grad_norm": 0.421875, "learning_rate": 4.978847746698104e-06, "loss": 2.4136, "step": 6542 }, { "epoch": 0.351019313304721, "grad_norm": 0.416015625, "learning_rate": 4.978836467905671e-06, "loss": 2.2771, "step": 6543 }, { "epoch": 0.35107296137339056, "grad_norm": 0.396484375, "learning_rate": 4.978825186119786e-06, "loss": 2.2964, "step": 6544 }, { "epoch": 0.3511266094420601, "grad_norm": 0.46484375, "learning_rate": 4.9788139013404615e-06, "loss": 2.2716, "step": 6545 }, { "epoch": 0.3511802575107296, "grad_norm": 0.453125, "learning_rate": 4.9788026135677125e-06, "loss": 2.1341, "step": 6546 }, { "epoch": 0.35123390557939915, "grad_norm": 2.6875, "learning_rate": 4.9787913228015525e-06, "loss": 2.1417, "step": 6547 }, { "epoch": 0.3512875536480687, "grad_norm": 0.400390625, "learning_rate": 4.978780029041994e-06, "loss": 2.2856, "step": 6548 }, { "epoch": 0.3513412017167382, "grad_norm": 0.44140625, "learning_rate": 4.978768732289052e-06, "loss": 2.1848, "step": 6549 }, { "epoch": 0.35139484978540775, "grad_norm": 0.345703125, "learning_rate": 4.97875743254274e-06, "loss": 2.4389, "step": 6550 }, { "epoch": 0.3514484978540773, "grad_norm": 0.462890625, "learning_rate": 4.978746129803071e-06, "loss": 2.1225, "step": 6551 }, { "epoch": 0.35150214592274676, "grad_norm": 0.451171875, "learning_rate": 4.9787348240700585e-06, "loss": 2.1929, "step": 6552 }, { "epoch": 0.3515557939914163, "grad_norm": 0.427734375, "learning_rate": 4.978723515343717e-06, "loss": 2.3344, "step": 6553 }, { "epoch": 0.3516094420600858, "grad_norm": 0.396484375, "learning_rate": 4.97871220362406e-06, "loss": 2.4517, "step": 6554 }, { "epoch": 0.35166309012875535, "grad_norm": 0.57421875, "learning_rate": 4.9787008889111e-06, "loss": 2.3793, "step": 6555 }, { "epoch": 0.3517167381974249, "grad_norm": 0.43359375, "learning_rate": 4.978689571204851e-06, "loss": 2.4262, "step": 6556 }, { "epoch": 0.3517703862660944, "grad_norm": 0.40234375, "learning_rate": 4.978678250505329e-06, "loss": 2.3066, "step": 6557 }, { "epoch": 0.35182403433476395, "grad_norm": 0.388671875, "learning_rate": 4.978666926812544e-06, "loss": 2.3225, "step": 6558 }, { "epoch": 0.3518776824034335, "grad_norm": 0.7265625, "learning_rate": 4.978655600126513e-06, "loss": 2.356, "step": 6559 }, { "epoch": 0.351931330472103, "grad_norm": 0.447265625, "learning_rate": 4.978644270447248e-06, "loss": 2.1365, "step": 6560 }, { "epoch": 0.35198497854077254, "grad_norm": 0.6171875, "learning_rate": 4.978632937774762e-06, "loss": 2.3576, "step": 6561 }, { "epoch": 0.3520386266094421, "grad_norm": 0.345703125, "learning_rate": 4.97862160210907e-06, "loss": 2.057, "step": 6562 }, { "epoch": 0.3520922746781116, "grad_norm": 0.42578125, "learning_rate": 4.978610263450185e-06, "loss": 2.2304, "step": 6563 }, { "epoch": 0.35214592274678114, "grad_norm": 0.423828125, "learning_rate": 4.978598921798123e-06, "loss": 2.2585, "step": 6564 }, { "epoch": 0.35219957081545067, "grad_norm": 0.4140625, "learning_rate": 4.978587577152893e-06, "loss": 2.3095, "step": 6565 }, { "epoch": 0.35225321888412015, "grad_norm": 0.3828125, "learning_rate": 4.9785762295145125e-06, "loss": 2.3243, "step": 6566 }, { "epoch": 0.3523068669527897, "grad_norm": 0.458984375, "learning_rate": 4.978564878882993e-06, "loss": 2.3135, "step": 6567 }, { "epoch": 0.3523605150214592, "grad_norm": 0.388671875, "learning_rate": 4.978553525258351e-06, "loss": 2.5964, "step": 6568 }, { "epoch": 0.35241416309012874, "grad_norm": 0.4296875, "learning_rate": 4.978542168640598e-06, "loss": 2.4402, "step": 6569 }, { "epoch": 0.35246781115879827, "grad_norm": 0.4140625, "learning_rate": 4.978530809029747e-06, "loss": 2.3442, "step": 6570 }, { "epoch": 0.3525214592274678, "grad_norm": 0.380859375, "learning_rate": 4.978519446425814e-06, "loss": 2.4327, "step": 6571 }, { "epoch": 0.35257510729613734, "grad_norm": 0.376953125, "learning_rate": 4.978508080828811e-06, "loss": 2.1342, "step": 6572 }, { "epoch": 0.35262875536480687, "grad_norm": 0.3984375, "learning_rate": 4.978496712238753e-06, "loss": 2.4299, "step": 6573 }, { "epoch": 0.3526824034334764, "grad_norm": 0.361328125, "learning_rate": 4.978485340655653e-06, "loss": 2.3535, "step": 6574 }, { "epoch": 0.35273605150214593, "grad_norm": 0.37109375, "learning_rate": 4.978473966079524e-06, "loss": 2.0099, "step": 6575 }, { "epoch": 0.35278969957081546, "grad_norm": 0.412109375, "learning_rate": 4.9784625885103815e-06, "loss": 2.3325, "step": 6576 }, { "epoch": 0.352843347639485, "grad_norm": 0.392578125, "learning_rate": 4.978451207948238e-06, "loss": 2.2227, "step": 6577 }, { "epoch": 0.3528969957081545, "grad_norm": 0.466796875, "learning_rate": 4.978439824393107e-06, "loss": 2.6028, "step": 6578 }, { "epoch": 0.35295064377682406, "grad_norm": 0.40625, "learning_rate": 4.978428437845003e-06, "loss": 2.2273, "step": 6579 }, { "epoch": 0.3530042918454936, "grad_norm": 0.369140625, "learning_rate": 4.9784170483039405e-06, "loss": 2.2786, "step": 6580 }, { "epoch": 0.35305793991416307, "grad_norm": 0.3984375, "learning_rate": 4.978405655769931e-06, "loss": 2.1945, "step": 6581 }, { "epoch": 0.3531115879828326, "grad_norm": 0.3515625, "learning_rate": 4.9783942602429904e-06, "loss": 2.1408, "step": 6582 }, { "epoch": 0.35316523605150213, "grad_norm": 0.8828125, "learning_rate": 4.9783828617231315e-06, "loss": 2.3597, "step": 6583 }, { "epoch": 0.35321888412017166, "grad_norm": 0.48046875, "learning_rate": 4.978371460210368e-06, "loss": 2.238, "step": 6584 }, { "epoch": 0.3532725321888412, "grad_norm": 6.59375, "learning_rate": 4.978360055704714e-06, "loss": 2.0441, "step": 6585 }, { "epoch": 0.3533261802575107, "grad_norm": 0.427734375, "learning_rate": 4.978348648206183e-06, "loss": 2.3729, "step": 6586 }, { "epoch": 0.35337982832618026, "grad_norm": 0.38671875, "learning_rate": 4.978337237714789e-06, "loss": 2.3191, "step": 6587 }, { "epoch": 0.3534334763948498, "grad_norm": 0.38671875, "learning_rate": 4.9783258242305445e-06, "loss": 2.4802, "step": 6588 }, { "epoch": 0.3534871244635193, "grad_norm": 0.578125, "learning_rate": 4.978314407753466e-06, "loss": 2.3223, "step": 6589 }, { "epoch": 0.35354077253218885, "grad_norm": 0.427734375, "learning_rate": 4.978302988283565e-06, "loss": 2.3044, "step": 6590 }, { "epoch": 0.3535944206008584, "grad_norm": 0.453125, "learning_rate": 4.978291565820856e-06, "loss": 2.145, "step": 6591 }, { "epoch": 0.3536480686695279, "grad_norm": 0.453125, "learning_rate": 4.978280140365353e-06, "loss": 2.5065, "step": 6592 }, { "epoch": 0.35370171673819745, "grad_norm": 0.390625, "learning_rate": 4.97826871191707e-06, "loss": 2.3489, "step": 6593 }, { "epoch": 0.353755364806867, "grad_norm": 1.2578125, "learning_rate": 4.97825728047602e-06, "loss": 2.0133, "step": 6594 }, { "epoch": 0.35380901287553645, "grad_norm": 0.3828125, "learning_rate": 4.978245846042217e-06, "loss": 2.444, "step": 6595 }, { "epoch": 0.353862660944206, "grad_norm": 0.421875, "learning_rate": 4.9782344086156756e-06, "loss": 2.2693, "step": 6596 }, { "epoch": 0.3539163090128755, "grad_norm": 0.373046875, "learning_rate": 4.978222968196409e-06, "loss": 2.4013, "step": 6597 }, { "epoch": 0.35396995708154505, "grad_norm": 0.6953125, "learning_rate": 4.9782115247844295e-06, "loss": 2.2666, "step": 6598 }, { "epoch": 0.3540236051502146, "grad_norm": 0.416015625, "learning_rate": 4.978200078379754e-06, "loss": 2.391, "step": 6599 }, { "epoch": 0.3540772532188841, "grad_norm": 0.431640625, "learning_rate": 4.978188628982395e-06, "loss": 2.3527, "step": 6600 }, { "epoch": 0.35413090128755365, "grad_norm": 0.361328125, "learning_rate": 4.978177176592366e-06, "loss": 2.2935, "step": 6601 }, { "epoch": 0.3541845493562232, "grad_norm": 0.44921875, "learning_rate": 4.97816572120968e-06, "loss": 2.3421, "step": 6602 }, { "epoch": 0.3542381974248927, "grad_norm": 0.412109375, "learning_rate": 4.978154262834352e-06, "loss": 2.3438, "step": 6603 }, { "epoch": 0.35429184549356224, "grad_norm": 0.482421875, "learning_rate": 4.978142801466397e-06, "loss": 2.3665, "step": 6604 }, { "epoch": 0.3543454935622318, "grad_norm": 0.421875, "learning_rate": 4.978131337105826e-06, "loss": 2.2892, "step": 6605 }, { "epoch": 0.3543991416309013, "grad_norm": 0.376953125, "learning_rate": 4.978119869752655e-06, "loss": 2.2426, "step": 6606 }, { "epoch": 0.35445278969957084, "grad_norm": 5.03125, "learning_rate": 4.978108399406897e-06, "loss": 2.3694, "step": 6607 }, { "epoch": 0.35450643776824037, "grad_norm": 0.341796875, "learning_rate": 4.9780969260685655e-06, "loss": 1.9909, "step": 6608 }, { "epoch": 0.35456008583690984, "grad_norm": 0.361328125, "learning_rate": 4.978085449737676e-06, "loss": 2.203, "step": 6609 }, { "epoch": 0.3546137339055794, "grad_norm": 0.419921875, "learning_rate": 4.97807397041424e-06, "loss": 2.4118, "step": 6610 }, { "epoch": 0.3546673819742489, "grad_norm": 3.390625, "learning_rate": 4.978062488098273e-06, "loss": 2.1278, "step": 6611 }, { "epoch": 0.35472103004291844, "grad_norm": 0.455078125, "learning_rate": 4.9780510027897885e-06, "loss": 2.3142, "step": 6612 }, { "epoch": 0.35477467811158797, "grad_norm": 0.37109375, "learning_rate": 4.978039514488801e-06, "loss": 2.2315, "step": 6613 }, { "epoch": 0.3548283261802575, "grad_norm": 0.400390625, "learning_rate": 4.978028023195323e-06, "loss": 2.3329, "step": 6614 }, { "epoch": 0.35488197424892703, "grad_norm": 0.4140625, "learning_rate": 4.97801652890937e-06, "loss": 2.2224, "step": 6615 }, { "epoch": 0.35493562231759657, "grad_norm": 0.392578125, "learning_rate": 4.978005031630954e-06, "loss": 2.3207, "step": 6616 }, { "epoch": 0.3549892703862661, "grad_norm": 0.3671875, "learning_rate": 4.977993531360089e-06, "loss": 2.3196, "step": 6617 }, { "epoch": 0.35504291845493563, "grad_norm": 0.578125, "learning_rate": 4.977982028096791e-06, "loss": 2.2885, "step": 6618 }, { "epoch": 0.35509656652360516, "grad_norm": 0.390625, "learning_rate": 4.977970521841072e-06, "loss": 2.0451, "step": 6619 }, { "epoch": 0.3551502145922747, "grad_norm": 0.64453125, "learning_rate": 4.977959012592947e-06, "loss": 2.2431, "step": 6620 }, { "epoch": 0.3552038626609442, "grad_norm": 0.462890625, "learning_rate": 4.977947500352429e-06, "loss": 2.352, "step": 6621 }, { "epoch": 0.35525751072961376, "grad_norm": 0.369140625, "learning_rate": 4.977935985119533e-06, "loss": 2.2622, "step": 6622 }, { "epoch": 0.3553111587982833, "grad_norm": 0.380859375, "learning_rate": 4.977924466894272e-06, "loss": 2.0746, "step": 6623 }, { "epoch": 0.35536480686695276, "grad_norm": 0.388671875, "learning_rate": 4.977912945676659e-06, "loss": 2.2433, "step": 6624 }, { "epoch": 0.3554184549356223, "grad_norm": 0.54296875, "learning_rate": 4.977901421466711e-06, "loss": 2.1565, "step": 6625 }, { "epoch": 0.35547210300429183, "grad_norm": 0.392578125, "learning_rate": 4.977889894264438e-06, "loss": 2.2263, "step": 6626 }, { "epoch": 0.35552575107296136, "grad_norm": 0.365234375, "learning_rate": 4.9778783640698574e-06, "loss": 2.049, "step": 6627 }, { "epoch": 0.3555793991416309, "grad_norm": 0.421875, "learning_rate": 4.977866830882981e-06, "loss": 2.2334, "step": 6628 }, { "epoch": 0.3556330472103004, "grad_norm": 0.380859375, "learning_rate": 4.9778552947038234e-06, "loss": 2.0759, "step": 6629 }, { "epoch": 0.35568669527896996, "grad_norm": 0.421875, "learning_rate": 4.977843755532398e-06, "loss": 2.2596, "step": 6630 }, { "epoch": 0.3557403433476395, "grad_norm": 0.361328125, "learning_rate": 4.977832213368719e-06, "loss": 2.1355, "step": 6631 }, { "epoch": 0.355793991416309, "grad_norm": 0.384765625, "learning_rate": 4.977820668212802e-06, "loss": 2.4183, "step": 6632 }, { "epoch": 0.35584763948497855, "grad_norm": 0.388671875, "learning_rate": 4.977809120064658e-06, "loss": 2.3046, "step": 6633 }, { "epoch": 0.3559012875536481, "grad_norm": 0.419921875, "learning_rate": 4.977797568924303e-06, "loss": 2.2475, "step": 6634 }, { "epoch": 0.3559549356223176, "grad_norm": 0.443359375, "learning_rate": 4.977786014791751e-06, "loss": 2.5129, "step": 6635 }, { "epoch": 0.35600858369098715, "grad_norm": 0.3984375, "learning_rate": 4.977774457667015e-06, "loss": 2.3443, "step": 6636 }, { "epoch": 0.3560622317596567, "grad_norm": 0.375, "learning_rate": 4.9777628975501085e-06, "loss": 2.0129, "step": 6637 }, { "epoch": 0.35611587982832615, "grad_norm": 0.376953125, "learning_rate": 4.9777513344410465e-06, "loss": 2.3795, "step": 6638 }, { "epoch": 0.3561695278969957, "grad_norm": 0.375, "learning_rate": 4.977739768339843e-06, "loss": 2.128, "step": 6639 }, { "epoch": 0.3562231759656652, "grad_norm": 0.396484375, "learning_rate": 4.977728199246512e-06, "loss": 2.3061, "step": 6640 }, { "epoch": 0.35627682403433475, "grad_norm": 0.50390625, "learning_rate": 4.977716627161067e-06, "loss": 1.424, "step": 6641 }, { "epoch": 0.3563304721030043, "grad_norm": 0.53125, "learning_rate": 4.977705052083522e-06, "loss": 2.242, "step": 6642 }, { "epoch": 0.3563841201716738, "grad_norm": 0.373046875, "learning_rate": 4.97769347401389e-06, "loss": 2.3037, "step": 6643 }, { "epoch": 0.35643776824034334, "grad_norm": 0.427734375, "learning_rate": 4.977681892952188e-06, "loss": 2.2954, "step": 6644 }, { "epoch": 0.3564914163090129, "grad_norm": 0.8046875, "learning_rate": 4.977670308898426e-06, "loss": 1.8741, "step": 6645 }, { "epoch": 0.3565450643776824, "grad_norm": 0.431640625, "learning_rate": 4.977658721852622e-06, "loss": 2.3413, "step": 6646 }, { "epoch": 0.35659871244635194, "grad_norm": 0.333984375, "learning_rate": 4.977647131814787e-06, "loss": 2.0883, "step": 6647 }, { "epoch": 0.35665236051502147, "grad_norm": 0.384765625, "learning_rate": 4.9776355387849365e-06, "loss": 2.0407, "step": 6648 }, { "epoch": 0.356706008583691, "grad_norm": 0.396484375, "learning_rate": 4.977623942763083e-06, "loss": 2.2976, "step": 6649 }, { "epoch": 0.35675965665236054, "grad_norm": 0.60546875, "learning_rate": 4.977612343749242e-06, "loss": 2.298, "step": 6650 }, { "epoch": 0.35681330472103007, "grad_norm": 0.404296875, "learning_rate": 4.977600741743428e-06, "loss": 2.2486, "step": 6651 }, { "epoch": 0.3568669527896996, "grad_norm": 0.439453125, "learning_rate": 4.977589136745653e-06, "loss": 2.3462, "step": 6652 }, { "epoch": 0.3569206008583691, "grad_norm": 0.431640625, "learning_rate": 4.977577528755932e-06, "loss": 2.3076, "step": 6653 }, { "epoch": 0.3569742489270386, "grad_norm": 0.412109375, "learning_rate": 4.97756591777428e-06, "loss": 2.3697, "step": 6654 }, { "epoch": 0.35702789699570814, "grad_norm": 0.447265625, "learning_rate": 4.97755430380071e-06, "loss": 1.3876, "step": 6655 }, { "epoch": 0.35708154506437767, "grad_norm": 0.392578125, "learning_rate": 4.977542686835236e-06, "loss": 2.2461, "step": 6656 }, { "epoch": 0.3571351931330472, "grad_norm": 0.3984375, "learning_rate": 4.977531066877872e-06, "loss": 2.4064, "step": 6657 }, { "epoch": 0.35718884120171673, "grad_norm": 0.42578125, "learning_rate": 4.9775194439286316e-06, "loss": 2.2999, "step": 6658 }, { "epoch": 0.35724248927038627, "grad_norm": 0.85546875, "learning_rate": 4.97750781798753e-06, "loss": 2.2076, "step": 6659 }, { "epoch": 0.3572961373390558, "grad_norm": 0.345703125, "learning_rate": 4.977496189054581e-06, "loss": 2.283, "step": 6660 }, { "epoch": 0.35734978540772533, "grad_norm": 0.365234375, "learning_rate": 4.9774845571297985e-06, "loss": 2.1803, "step": 6661 }, { "epoch": 0.35740343347639486, "grad_norm": 0.376953125, "learning_rate": 4.977472922213195e-06, "loss": 2.2338, "step": 6662 }, { "epoch": 0.3574570815450644, "grad_norm": 0.404296875, "learning_rate": 4.977461284304786e-06, "loss": 2.1599, "step": 6663 }, { "epoch": 0.3575107296137339, "grad_norm": 0.404296875, "learning_rate": 4.977449643404587e-06, "loss": 2.4874, "step": 6664 }, { "epoch": 0.35756437768240346, "grad_norm": 0.390625, "learning_rate": 4.97743799951261e-06, "loss": 2.1748, "step": 6665 }, { "epoch": 0.357618025751073, "grad_norm": 0.384765625, "learning_rate": 4.977426352628869e-06, "loss": 2.3418, "step": 6666 }, { "epoch": 0.35767167381974246, "grad_norm": 0.400390625, "learning_rate": 4.977414702753379e-06, "loss": 2.0693, "step": 6667 }, { "epoch": 0.357725321888412, "grad_norm": 0.447265625, "learning_rate": 4.9774030498861535e-06, "loss": 2.5061, "step": 6668 }, { "epoch": 0.3577789699570815, "grad_norm": 0.369140625, "learning_rate": 4.9773913940272074e-06, "loss": 2.1508, "step": 6669 }, { "epoch": 0.35783261802575106, "grad_norm": 0.44921875, "learning_rate": 4.977379735176554e-06, "loss": 2.3595, "step": 6670 }, { "epoch": 0.3578862660944206, "grad_norm": 0.34765625, "learning_rate": 4.9773680733342075e-06, "loss": 2.3074, "step": 6671 }, { "epoch": 0.3579399141630901, "grad_norm": 0.419921875, "learning_rate": 4.977356408500182e-06, "loss": 2.3892, "step": 6672 }, { "epoch": 0.35799356223175965, "grad_norm": 0.484375, "learning_rate": 4.977344740674491e-06, "loss": 2.1894, "step": 6673 }, { "epoch": 0.3580472103004292, "grad_norm": 0.494140625, "learning_rate": 4.97733306985715e-06, "loss": 2.4677, "step": 6674 }, { "epoch": 0.3581008583690987, "grad_norm": 0.36328125, "learning_rate": 4.9773213960481715e-06, "loss": 2.4473, "step": 6675 }, { "epoch": 0.35815450643776825, "grad_norm": 0.380859375, "learning_rate": 4.977309719247571e-06, "loss": 2.3196, "step": 6676 }, { "epoch": 0.3582081545064378, "grad_norm": 0.390625, "learning_rate": 4.977298039455362e-06, "loss": 2.2547, "step": 6677 }, { "epoch": 0.3582618025751073, "grad_norm": 0.39453125, "learning_rate": 4.977286356671559e-06, "loss": 2.2708, "step": 6678 }, { "epoch": 0.35831545064377684, "grad_norm": 0.330078125, "learning_rate": 4.977274670896175e-06, "loss": 2.0966, "step": 6679 }, { "epoch": 0.3583690987124464, "grad_norm": 0.61328125, "learning_rate": 4.977262982129225e-06, "loss": 2.3075, "step": 6680 }, { "epoch": 0.35842274678111585, "grad_norm": 0.44140625, "learning_rate": 4.9772512903707225e-06, "loss": 2.5076, "step": 6681 }, { "epoch": 0.3584763948497854, "grad_norm": 0.40234375, "learning_rate": 4.977239595620683e-06, "loss": 2.3817, "step": 6682 }, { "epoch": 0.3585300429184549, "grad_norm": 0.380859375, "learning_rate": 4.977227897879119e-06, "loss": 2.2842, "step": 6683 }, { "epoch": 0.35858369098712445, "grad_norm": 3.875, "learning_rate": 4.977216197146045e-06, "loss": 2.2135, "step": 6684 }, { "epoch": 0.358637339055794, "grad_norm": 0.388671875, "learning_rate": 4.9772044934214765e-06, "loss": 2.4048, "step": 6685 }, { "epoch": 0.3586909871244635, "grad_norm": 0.345703125, "learning_rate": 4.977192786705426e-06, "loss": 2.3425, "step": 6686 }, { "epoch": 0.35874463519313304, "grad_norm": 0.458984375, "learning_rate": 4.977181076997908e-06, "loss": 2.3242, "step": 6687 }, { "epoch": 0.3587982832618026, "grad_norm": 0.703125, "learning_rate": 4.977169364298937e-06, "loss": 2.4195, "step": 6688 }, { "epoch": 0.3588519313304721, "grad_norm": 0.390625, "learning_rate": 4.977157648608527e-06, "loss": 2.2186, "step": 6689 }, { "epoch": 0.35890557939914164, "grad_norm": 0.36328125, "learning_rate": 4.977145929926692e-06, "loss": 2.2656, "step": 6690 }, { "epoch": 0.35895922746781117, "grad_norm": 0.50390625, "learning_rate": 4.977134208253447e-06, "loss": 2.2002, "step": 6691 }, { "epoch": 0.3590128755364807, "grad_norm": 0.470703125, "learning_rate": 4.977122483588805e-06, "loss": 1.9939, "step": 6692 }, { "epoch": 0.35906652360515023, "grad_norm": 0.75390625, "learning_rate": 4.97711075593278e-06, "loss": 2.2833, "step": 6693 }, { "epoch": 0.35912017167381977, "grad_norm": 0.44140625, "learning_rate": 4.977099025285388e-06, "loss": 1.2382, "step": 6694 }, { "epoch": 0.3591738197424893, "grad_norm": 0.33203125, "learning_rate": 4.977087291646641e-06, "loss": 2.0806, "step": 6695 }, { "epoch": 0.3592274678111588, "grad_norm": 0.65234375, "learning_rate": 4.977075555016554e-06, "loss": 2.1619, "step": 6696 }, { "epoch": 0.3592811158798283, "grad_norm": 0.37890625, "learning_rate": 4.9770638153951424e-06, "loss": 2.4469, "step": 6697 }, { "epoch": 0.35933476394849784, "grad_norm": 0.453125, "learning_rate": 4.977052072782418e-06, "loss": 2.5237, "step": 6698 }, { "epoch": 0.35938841201716737, "grad_norm": 0.55859375, "learning_rate": 4.977040327178397e-06, "loss": 2.3341, "step": 6699 }, { "epoch": 0.3594420600858369, "grad_norm": 0.396484375, "learning_rate": 4.9770285785830925e-06, "loss": 2.2409, "step": 6700 }, { "epoch": 0.35949570815450643, "grad_norm": 0.361328125, "learning_rate": 4.977016826996519e-06, "loss": 2.4489, "step": 6701 }, { "epoch": 0.35954935622317596, "grad_norm": 0.392578125, "learning_rate": 4.977005072418691e-06, "loss": 2.2339, "step": 6702 }, { "epoch": 0.3596030042918455, "grad_norm": 0.380859375, "learning_rate": 4.9769933148496215e-06, "loss": 2.3315, "step": 6703 }, { "epoch": 0.35965665236051503, "grad_norm": 0.47265625, "learning_rate": 4.976981554289326e-06, "loss": 2.2516, "step": 6704 }, { "epoch": 0.35971030042918456, "grad_norm": 0.3984375, "learning_rate": 4.976969790737819e-06, "loss": 2.285, "step": 6705 }, { "epoch": 0.3597639484978541, "grad_norm": 0.63671875, "learning_rate": 4.976958024195113e-06, "loss": 2.2661, "step": 6706 }, { "epoch": 0.3598175965665236, "grad_norm": 0.396484375, "learning_rate": 4.9769462546612244e-06, "loss": 2.4619, "step": 6707 }, { "epoch": 0.35987124463519315, "grad_norm": 0.48046875, "learning_rate": 4.976934482136166e-06, "loss": 2.2948, "step": 6708 }, { "epoch": 0.3599248927038627, "grad_norm": 0.45703125, "learning_rate": 4.976922706619951e-06, "loss": 2.1575, "step": 6709 }, { "epoch": 0.35997854077253216, "grad_norm": 0.408203125, "learning_rate": 4.976910928112596e-06, "loss": 2.4475, "step": 6710 }, { "epoch": 0.3600321888412017, "grad_norm": 0.412109375, "learning_rate": 4.976899146614114e-06, "loss": 2.4032, "step": 6711 }, { "epoch": 0.3600858369098712, "grad_norm": 0.40625, "learning_rate": 4.976887362124518e-06, "loss": 2.5078, "step": 6712 }, { "epoch": 0.36013948497854076, "grad_norm": 0.515625, "learning_rate": 4.976875574643825e-06, "loss": 2.174, "step": 6713 }, { "epoch": 0.3601931330472103, "grad_norm": 0.390625, "learning_rate": 4.976863784172048e-06, "loss": 2.3022, "step": 6714 }, { "epoch": 0.3602467811158798, "grad_norm": 0.38671875, "learning_rate": 4.9768519907092e-06, "loss": 2.3837, "step": 6715 }, { "epoch": 0.36030042918454935, "grad_norm": 0.361328125, "learning_rate": 4.976840194255297e-06, "loss": 2.162, "step": 6716 }, { "epoch": 0.3603540772532189, "grad_norm": 0.453125, "learning_rate": 4.976828394810351e-06, "loss": 2.3466, "step": 6717 }, { "epoch": 0.3604077253218884, "grad_norm": 0.52734375, "learning_rate": 4.97681659237438e-06, "loss": 1.9863, "step": 6718 }, { "epoch": 0.36046137339055795, "grad_norm": 0.357421875, "learning_rate": 4.976804786947394e-06, "loss": 2.3143, "step": 6719 }, { "epoch": 0.3605150214592275, "grad_norm": 0.404296875, "learning_rate": 4.976792978529411e-06, "loss": 2.3318, "step": 6720 }, { "epoch": 0.360568669527897, "grad_norm": 0.5859375, "learning_rate": 4.9767811671204425e-06, "loss": 2.3513, "step": 6721 }, { "epoch": 0.36062231759656654, "grad_norm": 0.36328125, "learning_rate": 4.976769352720503e-06, "loss": 2.2195, "step": 6722 }, { "epoch": 0.3606759656652361, "grad_norm": 0.30859375, "learning_rate": 4.9767575353296095e-06, "loss": 2.3454, "step": 6723 }, { "epoch": 0.36072961373390555, "grad_norm": 0.41796875, "learning_rate": 4.976745714947773e-06, "loss": 2.1028, "step": 6724 }, { "epoch": 0.3607832618025751, "grad_norm": 0.392578125, "learning_rate": 4.976733891575009e-06, "loss": 2.3394, "step": 6725 }, { "epoch": 0.3608369098712446, "grad_norm": 0.498046875, "learning_rate": 4.976722065211332e-06, "loss": 2.3959, "step": 6726 }, { "epoch": 0.36089055793991415, "grad_norm": 0.408203125, "learning_rate": 4.976710235856756e-06, "loss": 2.4043, "step": 6727 }, { "epoch": 0.3609442060085837, "grad_norm": 0.400390625, "learning_rate": 4.9766984035112966e-06, "loss": 2.4398, "step": 6728 }, { "epoch": 0.3609978540772532, "grad_norm": 0.51171875, "learning_rate": 4.976686568174966e-06, "loss": 2.6048, "step": 6729 }, { "epoch": 0.36105150214592274, "grad_norm": 0.578125, "learning_rate": 4.9766747298477795e-06, "loss": 2.2228, "step": 6730 }, { "epoch": 0.3611051502145923, "grad_norm": 0.39453125, "learning_rate": 4.9766628885297515e-06, "loss": 2.1763, "step": 6731 }, { "epoch": 0.3611587982832618, "grad_norm": 0.404296875, "learning_rate": 4.976651044220896e-06, "loss": 2.2997, "step": 6732 }, { "epoch": 0.36121244635193134, "grad_norm": 0.486328125, "learning_rate": 4.976639196921227e-06, "loss": 2.1931, "step": 6733 }, { "epoch": 0.36126609442060087, "grad_norm": 0.482421875, "learning_rate": 4.97662734663076e-06, "loss": 1.9536, "step": 6734 }, { "epoch": 0.3613197424892704, "grad_norm": 0.35546875, "learning_rate": 4.976615493349508e-06, "loss": 2.2995, "step": 6735 }, { "epoch": 0.36137339055793993, "grad_norm": 0.3984375, "learning_rate": 4.976603637077486e-06, "loss": 2.3231, "step": 6736 }, { "epoch": 0.36142703862660946, "grad_norm": 0.412109375, "learning_rate": 4.9765917778147085e-06, "loss": 2.3979, "step": 6737 }, { "epoch": 0.361480686695279, "grad_norm": 0.5078125, "learning_rate": 4.976579915561189e-06, "loss": 2.1391, "step": 6738 }, { "epoch": 0.3615343347639485, "grad_norm": 0.39453125, "learning_rate": 4.976568050316943e-06, "loss": 2.2693, "step": 6739 }, { "epoch": 0.361587982832618, "grad_norm": 0.375, "learning_rate": 4.976556182081984e-06, "loss": 2.5841, "step": 6740 }, { "epoch": 0.36164163090128754, "grad_norm": 0.388671875, "learning_rate": 4.9765443108563255e-06, "loss": 2.3822, "step": 6741 }, { "epoch": 0.36169527896995707, "grad_norm": 0.435546875, "learning_rate": 4.976532436639983e-06, "loss": 2.285, "step": 6742 }, { "epoch": 0.3617489270386266, "grad_norm": 0.373046875, "learning_rate": 4.976520559432971e-06, "loss": 2.0832, "step": 6743 }, { "epoch": 0.36180257510729613, "grad_norm": 0.404296875, "learning_rate": 4.976508679235304e-06, "loss": 2.5932, "step": 6744 }, { "epoch": 0.36185622317596566, "grad_norm": 0.3125, "learning_rate": 4.9764967960469954e-06, "loss": 2.2057, "step": 6745 }, { "epoch": 0.3619098712446352, "grad_norm": 0.55078125, "learning_rate": 4.9764849098680605e-06, "loss": 2.4485, "step": 6746 }, { "epoch": 0.3619635193133047, "grad_norm": 0.3515625, "learning_rate": 4.976473020698512e-06, "loss": 2.239, "step": 6747 }, { "epoch": 0.36201716738197426, "grad_norm": 0.4453125, "learning_rate": 4.976461128538367e-06, "loss": 2.4697, "step": 6748 }, { "epoch": 0.3620708154506438, "grad_norm": 0.37890625, "learning_rate": 4.976449233387637e-06, "loss": 2.2323, "step": 6749 }, { "epoch": 0.3621244635193133, "grad_norm": 0.396484375, "learning_rate": 4.9764373352463374e-06, "loss": 2.1303, "step": 6750 }, { "epoch": 0.36217811158798285, "grad_norm": 0.470703125, "learning_rate": 4.976425434114483e-06, "loss": 2.2844, "step": 6751 }, { "epoch": 0.3622317596566524, "grad_norm": 0.36328125, "learning_rate": 4.976413529992089e-06, "loss": 2.6147, "step": 6752 }, { "epoch": 0.36228540772532186, "grad_norm": 0.416015625, "learning_rate": 4.976401622879168e-06, "loss": 1.4948, "step": 6753 }, { "epoch": 0.3623390557939914, "grad_norm": 0.5625, "learning_rate": 4.976389712775735e-06, "loss": 2.4326, "step": 6754 }, { "epoch": 0.3623927038626609, "grad_norm": 0.416015625, "learning_rate": 4.976377799681804e-06, "loss": 1.9687, "step": 6755 }, { "epoch": 0.36244635193133046, "grad_norm": 0.5859375, "learning_rate": 4.976365883597391e-06, "loss": 2.1822, "step": 6756 }, { "epoch": 0.3625, "grad_norm": 0.373046875, "learning_rate": 4.976353964522509e-06, "loss": 2.0957, "step": 6757 }, { "epoch": 0.3625536480686695, "grad_norm": 0.380859375, "learning_rate": 4.976342042457172e-06, "loss": 2.2932, "step": 6758 }, { "epoch": 0.36260729613733905, "grad_norm": 0.3515625, "learning_rate": 4.976330117401395e-06, "loss": 1.8567, "step": 6759 }, { "epoch": 0.3626609442060086, "grad_norm": 0.34765625, "learning_rate": 4.976318189355194e-06, "loss": 2.4214, "step": 6760 }, { "epoch": 0.3627145922746781, "grad_norm": 0.404296875, "learning_rate": 4.976306258318581e-06, "loss": 2.3802, "step": 6761 }, { "epoch": 0.36276824034334765, "grad_norm": 0.38671875, "learning_rate": 4.976294324291571e-06, "loss": 2.1973, "step": 6762 }, { "epoch": 0.3628218884120172, "grad_norm": 0.3671875, "learning_rate": 4.9762823872741785e-06, "loss": 2.2332, "step": 6763 }, { "epoch": 0.3628755364806867, "grad_norm": 0.462890625, "learning_rate": 4.976270447266418e-06, "loss": 2.3854, "step": 6764 }, { "epoch": 0.36292918454935624, "grad_norm": 0.80859375, "learning_rate": 4.976258504268306e-06, "loss": 2.4444, "step": 6765 }, { "epoch": 0.3629828326180258, "grad_norm": 0.380859375, "learning_rate": 4.9762465582798525e-06, "loss": 2.1601, "step": 6766 }, { "epoch": 0.3630364806866953, "grad_norm": 0.48046875, "learning_rate": 4.976234609301076e-06, "loss": 1.6838, "step": 6767 }, { "epoch": 0.3630901287553648, "grad_norm": 0.39453125, "learning_rate": 4.976222657331988e-06, "loss": 2.31, "step": 6768 }, { "epoch": 0.3631437768240343, "grad_norm": 0.4140625, "learning_rate": 4.976210702372605e-06, "loss": 2.2388, "step": 6769 }, { "epoch": 0.36319742489270385, "grad_norm": 0.37890625, "learning_rate": 4.976198744422941e-06, "loss": 2.1773, "step": 6770 }, { "epoch": 0.3632510729613734, "grad_norm": 0.41015625, "learning_rate": 4.9761867834830094e-06, "loss": 2.3193, "step": 6771 }, { "epoch": 0.3633047210300429, "grad_norm": 0.423828125, "learning_rate": 4.9761748195528255e-06, "loss": 2.302, "step": 6772 }, { "epoch": 0.36335836909871244, "grad_norm": 0.404296875, "learning_rate": 4.9761628526324035e-06, "loss": 2.1091, "step": 6773 }, { "epoch": 0.363412017167382, "grad_norm": 0.390625, "learning_rate": 4.976150882721759e-06, "loss": 2.6435, "step": 6774 }, { "epoch": 0.3634656652360515, "grad_norm": 0.42578125, "learning_rate": 4.976138909820904e-06, "loss": 2.4909, "step": 6775 }, { "epoch": 0.36351931330472104, "grad_norm": 0.34765625, "learning_rate": 4.976126933929855e-06, "loss": 2.2047, "step": 6776 }, { "epoch": 0.36357296137339057, "grad_norm": 20.0, "learning_rate": 4.9761149550486254e-06, "loss": 2.2801, "step": 6777 }, { "epoch": 0.3636266094420601, "grad_norm": 0.369140625, "learning_rate": 4.976102973177231e-06, "loss": 2.2084, "step": 6778 }, { "epoch": 0.36368025751072963, "grad_norm": 0.42578125, "learning_rate": 4.9760909883156845e-06, "loss": 2.4399, "step": 6779 }, { "epoch": 0.36373390557939916, "grad_norm": 0.3671875, "learning_rate": 4.976079000464002e-06, "loss": 2.4386, "step": 6780 }, { "epoch": 0.3637875536480687, "grad_norm": 0.4453125, "learning_rate": 4.976067009622196e-06, "loss": 2.1211, "step": 6781 }, { "epoch": 0.36384120171673817, "grad_norm": 0.4140625, "learning_rate": 4.976055015790284e-06, "loss": 2.3809, "step": 6782 }, { "epoch": 0.3638948497854077, "grad_norm": 0.36328125, "learning_rate": 4.9760430189682775e-06, "loss": 2.252, "step": 6783 }, { "epoch": 0.36394849785407724, "grad_norm": 0.3515625, "learning_rate": 4.976031019156192e-06, "loss": 1.707, "step": 6784 }, { "epoch": 0.36400214592274677, "grad_norm": 4.375, "learning_rate": 4.9760190163540435e-06, "loss": 2.088, "step": 6785 }, { "epoch": 0.3640557939914163, "grad_norm": 0.5234375, "learning_rate": 4.976007010561844e-06, "loss": 2.643, "step": 6786 }, { "epoch": 0.36410944206008583, "grad_norm": 0.419921875, "learning_rate": 4.9759950017796085e-06, "loss": 2.4823, "step": 6787 }, { "epoch": 0.36416309012875536, "grad_norm": 0.37890625, "learning_rate": 4.975982990007354e-06, "loss": 2.4143, "step": 6788 }, { "epoch": 0.3642167381974249, "grad_norm": 0.43359375, "learning_rate": 4.975970975245092e-06, "loss": 2.3961, "step": 6789 }, { "epoch": 0.3642703862660944, "grad_norm": 0.365234375, "learning_rate": 4.975958957492838e-06, "loss": 2.2715, "step": 6790 }, { "epoch": 0.36432403433476396, "grad_norm": 0.38671875, "learning_rate": 4.975946936750608e-06, "loss": 2.44, "step": 6791 }, { "epoch": 0.3643776824034335, "grad_norm": 0.37890625, "learning_rate": 4.975934913018413e-06, "loss": 2.0424, "step": 6792 }, { "epoch": 0.364431330472103, "grad_norm": 0.36328125, "learning_rate": 4.975922886296271e-06, "loss": 2.203, "step": 6793 }, { "epoch": 0.36448497854077255, "grad_norm": 0.42578125, "learning_rate": 4.975910856584196e-06, "loss": 2.3955, "step": 6794 }, { "epoch": 0.3645386266094421, "grad_norm": 0.53125, "learning_rate": 4.975898823882201e-06, "loss": 2.369, "step": 6795 }, { "epoch": 0.36459227467811156, "grad_norm": 0.40234375, "learning_rate": 4.975886788190301e-06, "loss": 2.3355, "step": 6796 }, { "epoch": 0.3646459227467811, "grad_norm": 0.443359375, "learning_rate": 4.975874749508511e-06, "loss": 2.3078, "step": 6797 }, { "epoch": 0.3646995708154506, "grad_norm": 0.44140625, "learning_rate": 4.975862707836846e-06, "loss": 2.022, "step": 6798 }, { "epoch": 0.36475321888412016, "grad_norm": 0.400390625, "learning_rate": 4.975850663175319e-06, "loss": 1.8757, "step": 6799 }, { "epoch": 0.3648068669527897, "grad_norm": 0.392578125, "learning_rate": 4.975838615523946e-06, "loss": 2.2426, "step": 6800 }, { "epoch": 0.3648605150214592, "grad_norm": 0.412109375, "learning_rate": 4.9758265648827406e-06, "loss": 2.4858, "step": 6801 }, { "epoch": 0.36491416309012875, "grad_norm": 0.396484375, "learning_rate": 4.975814511251718e-06, "loss": 2.2375, "step": 6802 }, { "epoch": 0.3649678111587983, "grad_norm": 0.390625, "learning_rate": 4.975802454630893e-06, "loss": 2.4016, "step": 6803 }, { "epoch": 0.3650214592274678, "grad_norm": 0.470703125, "learning_rate": 4.975790395020279e-06, "loss": 2.2979, "step": 6804 }, { "epoch": 0.36507510729613735, "grad_norm": 0.419921875, "learning_rate": 4.975778332419892e-06, "loss": 2.2469, "step": 6805 }, { "epoch": 0.3651287553648069, "grad_norm": 0.41015625, "learning_rate": 4.975766266829744e-06, "loss": 2.2298, "step": 6806 }, { "epoch": 0.3651824034334764, "grad_norm": 0.369140625, "learning_rate": 4.9757541982498535e-06, "loss": 2.3282, "step": 6807 }, { "epoch": 0.36523605150214594, "grad_norm": 0.60546875, "learning_rate": 4.975742126680232e-06, "loss": 1.4782, "step": 6808 }, { "epoch": 0.3652896995708155, "grad_norm": 0.390625, "learning_rate": 4.975730052120895e-06, "loss": 2.5128, "step": 6809 }, { "epoch": 0.365343347639485, "grad_norm": 0.5078125, "learning_rate": 4.975717974571857e-06, "loss": 2.1067, "step": 6810 }, { "epoch": 0.3653969957081545, "grad_norm": 0.3671875, "learning_rate": 4.9757058940331334e-06, "loss": 2.351, "step": 6811 }, { "epoch": 0.365450643776824, "grad_norm": 0.412109375, "learning_rate": 4.975693810504738e-06, "loss": 2.267, "step": 6812 }, { "epoch": 0.36550429184549355, "grad_norm": 0.404296875, "learning_rate": 4.975681723986685e-06, "loss": 2.439, "step": 6813 }, { "epoch": 0.3655579399141631, "grad_norm": 0.392578125, "learning_rate": 4.97566963447899e-06, "loss": 2.3487, "step": 6814 }, { "epoch": 0.3656115879828326, "grad_norm": 0.455078125, "learning_rate": 4.975657541981666e-06, "loss": 2.3504, "step": 6815 }, { "epoch": 0.36566523605150214, "grad_norm": 0.447265625, "learning_rate": 4.975645446494729e-06, "loss": 2.0361, "step": 6816 }, { "epoch": 0.3657188841201717, "grad_norm": 0.447265625, "learning_rate": 4.975633348018194e-06, "loss": 2.2876, "step": 6817 }, { "epoch": 0.3657725321888412, "grad_norm": 0.40234375, "learning_rate": 4.9756212465520745e-06, "loss": 2.4066, "step": 6818 }, { "epoch": 0.36582618025751074, "grad_norm": 0.3984375, "learning_rate": 4.975609142096386e-06, "loss": 2.3373, "step": 6819 }, { "epoch": 0.36587982832618027, "grad_norm": 0.408203125, "learning_rate": 4.975597034651142e-06, "loss": 2.2851, "step": 6820 }, { "epoch": 0.3659334763948498, "grad_norm": 0.44921875, "learning_rate": 4.975584924216358e-06, "loss": 2.331, "step": 6821 }, { "epoch": 0.36598712446351933, "grad_norm": 0.45703125, "learning_rate": 4.9755728107920475e-06, "loss": 2.4257, "step": 6822 }, { "epoch": 0.36604077253218886, "grad_norm": 0.4609375, "learning_rate": 4.9755606943782275e-06, "loss": 2.253, "step": 6823 }, { "epoch": 0.3660944206008584, "grad_norm": 0.57421875, "learning_rate": 4.97554857497491e-06, "loss": 2.2984, "step": 6824 }, { "epoch": 0.36614806866952787, "grad_norm": 0.375, "learning_rate": 4.975536452582111e-06, "loss": 2.2818, "step": 6825 }, { "epoch": 0.3662017167381974, "grad_norm": 0.421875, "learning_rate": 4.9755243271998455e-06, "loss": 2.2281, "step": 6826 }, { "epoch": 0.36625536480686693, "grad_norm": 0.546875, "learning_rate": 4.975512198828127e-06, "loss": 2.2769, "step": 6827 }, { "epoch": 0.36630901287553647, "grad_norm": 0.3515625, "learning_rate": 4.9755000674669705e-06, "loss": 2.2034, "step": 6828 }, { "epoch": 0.366362660944206, "grad_norm": 0.322265625, "learning_rate": 4.975487933116391e-06, "loss": 2.2504, "step": 6829 }, { "epoch": 0.36641630901287553, "grad_norm": 0.38671875, "learning_rate": 4.9754757957764035e-06, "loss": 2.5962, "step": 6830 }, { "epoch": 0.36646995708154506, "grad_norm": 0.484375, "learning_rate": 4.975463655447022e-06, "loss": 2.1265, "step": 6831 }, { "epoch": 0.3665236051502146, "grad_norm": 0.4453125, "learning_rate": 4.975451512128261e-06, "loss": 2.4629, "step": 6832 }, { "epoch": 0.3665772532188841, "grad_norm": 0.396484375, "learning_rate": 4.975439365820135e-06, "loss": 1.6209, "step": 6833 }, { "epoch": 0.36663090128755366, "grad_norm": 0.453125, "learning_rate": 4.97542721652266e-06, "loss": 2.2907, "step": 6834 }, { "epoch": 0.3666845493562232, "grad_norm": 0.419921875, "learning_rate": 4.97541506423585e-06, "loss": 2.2874, "step": 6835 }, { "epoch": 0.3667381974248927, "grad_norm": 1.1171875, "learning_rate": 4.975402908959719e-06, "loss": 2.1849, "step": 6836 }, { "epoch": 0.36679184549356225, "grad_norm": 0.416015625, "learning_rate": 4.975390750694282e-06, "loss": 2.2262, "step": 6837 }, { "epoch": 0.3668454935622318, "grad_norm": 0.466796875, "learning_rate": 4.975378589439553e-06, "loss": 2.2796, "step": 6838 }, { "epoch": 0.36689914163090126, "grad_norm": 0.37890625, "learning_rate": 4.975366425195549e-06, "loss": 2.4515, "step": 6839 }, { "epoch": 0.3669527896995708, "grad_norm": 0.37890625, "learning_rate": 4.975354257962283e-06, "loss": 2.2198, "step": 6840 }, { "epoch": 0.3670064377682403, "grad_norm": 0.4375, "learning_rate": 4.975342087739769e-06, "loss": 2.4159, "step": 6841 }, { "epoch": 0.36706008583690986, "grad_norm": 0.4296875, "learning_rate": 4.975329914528025e-06, "loss": 2.4086, "step": 6842 }, { "epoch": 0.3671137339055794, "grad_norm": 0.404296875, "learning_rate": 4.975317738327061e-06, "loss": 2.5122, "step": 6843 }, { "epoch": 0.3671673819742489, "grad_norm": 0.4765625, "learning_rate": 4.975305559136895e-06, "loss": 2.2918, "step": 6844 }, { "epoch": 0.36722103004291845, "grad_norm": 0.390625, "learning_rate": 4.9752933769575396e-06, "loss": 2.442, "step": 6845 }, { "epoch": 0.367274678111588, "grad_norm": 0.4375, "learning_rate": 4.975281191789012e-06, "loss": 2.1205, "step": 6846 }, { "epoch": 0.3673283261802575, "grad_norm": 0.41015625, "learning_rate": 4.975269003631324e-06, "loss": 2.3605, "step": 6847 }, { "epoch": 0.36738197424892705, "grad_norm": 0.345703125, "learning_rate": 4.975256812484494e-06, "loss": 2.0178, "step": 6848 }, { "epoch": 0.3674356223175966, "grad_norm": 0.4921875, "learning_rate": 4.975244618348533e-06, "loss": 2.045, "step": 6849 }, { "epoch": 0.3674892703862661, "grad_norm": 0.482421875, "learning_rate": 4.975232421223458e-06, "loss": 2.4261, "step": 6850 }, { "epoch": 0.36754291845493564, "grad_norm": 0.388671875, "learning_rate": 4.975220221109282e-06, "loss": 2.3471, "step": 6851 }, { "epoch": 0.3675965665236052, "grad_norm": 0.439453125, "learning_rate": 4.975208018006023e-06, "loss": 2.2766, "step": 6852 }, { "epoch": 0.3676502145922747, "grad_norm": 0.416015625, "learning_rate": 4.9751958119136914e-06, "loss": 2.4252, "step": 6853 }, { "epoch": 0.3677038626609442, "grad_norm": 0.388671875, "learning_rate": 4.975183602832305e-06, "loss": 2.4068, "step": 6854 }, { "epoch": 0.3677575107296137, "grad_norm": 0.51953125, "learning_rate": 4.975171390761877e-06, "loss": 2.3274, "step": 6855 }, { "epoch": 0.36781115879828324, "grad_norm": 0.404296875, "learning_rate": 4.975159175702423e-06, "loss": 2.2168, "step": 6856 }, { "epoch": 0.3678648068669528, "grad_norm": 0.419921875, "learning_rate": 4.975146957653958e-06, "loss": 2.6385, "step": 6857 }, { "epoch": 0.3679184549356223, "grad_norm": 0.3984375, "learning_rate": 4.975134736616496e-06, "loss": 2.2738, "step": 6858 }, { "epoch": 0.36797210300429184, "grad_norm": 0.4140625, "learning_rate": 4.975122512590053e-06, "loss": 2.1279, "step": 6859 }, { "epoch": 0.36802575107296137, "grad_norm": 0.49609375, "learning_rate": 4.9751102855746415e-06, "loss": 2.5478, "step": 6860 }, { "epoch": 0.3680793991416309, "grad_norm": 0.40625, "learning_rate": 4.975098055570277e-06, "loss": 2.057, "step": 6861 }, { "epoch": 0.36813304721030043, "grad_norm": 0.94921875, "learning_rate": 4.9750858225769764e-06, "loss": 2.4407, "step": 6862 }, { "epoch": 0.36818669527896997, "grad_norm": 0.5, "learning_rate": 4.9750735865947515e-06, "loss": 2.415, "step": 6863 }, { "epoch": 0.3682403433476395, "grad_norm": 0.35546875, "learning_rate": 4.975061347623618e-06, "loss": 2.3506, "step": 6864 }, { "epoch": 0.36829399141630903, "grad_norm": 0.388671875, "learning_rate": 4.975049105663593e-06, "loss": 2.2141, "step": 6865 }, { "epoch": 0.36834763948497856, "grad_norm": 0.50390625, "learning_rate": 4.975036860714689e-06, "loss": 2.4209, "step": 6866 }, { "epoch": 0.3684012875536481, "grad_norm": 0.388671875, "learning_rate": 4.975024612776921e-06, "loss": 2.3542, "step": 6867 }, { "epoch": 0.36845493562231757, "grad_norm": 0.4453125, "learning_rate": 4.975012361850303e-06, "loss": 2.2208, "step": 6868 }, { "epoch": 0.3685085836909871, "grad_norm": 0.423828125, "learning_rate": 4.975000107934852e-06, "loss": 2.3214, "step": 6869 }, { "epoch": 0.36856223175965663, "grad_norm": 0.314453125, "learning_rate": 4.974987851030581e-06, "loss": 1.7984, "step": 6870 }, { "epoch": 0.36861587982832617, "grad_norm": 0.412109375, "learning_rate": 4.9749755911375055e-06, "loss": 2.4339, "step": 6871 }, { "epoch": 0.3686695278969957, "grad_norm": 0.45703125, "learning_rate": 4.974963328255641e-06, "loss": 2.1736, "step": 6872 }, { "epoch": 0.36872317596566523, "grad_norm": 0.359375, "learning_rate": 4.9749510623850006e-06, "loss": 2.2885, "step": 6873 }, { "epoch": 0.36877682403433476, "grad_norm": 0.384765625, "learning_rate": 4.9749387935256e-06, "loss": 2.0643, "step": 6874 }, { "epoch": 0.3688304721030043, "grad_norm": 0.69921875, "learning_rate": 4.974926521677455e-06, "loss": 2.2615, "step": 6875 }, { "epoch": 0.3688841201716738, "grad_norm": 0.396484375, "learning_rate": 4.974914246840578e-06, "loss": 2.4251, "step": 6876 }, { "epoch": 0.36893776824034336, "grad_norm": 0.478515625, "learning_rate": 4.974901969014986e-06, "loss": 2.3759, "step": 6877 }, { "epoch": 0.3689914163090129, "grad_norm": 0.392578125, "learning_rate": 4.974889688200694e-06, "loss": 2.3382, "step": 6878 }, { "epoch": 0.3690450643776824, "grad_norm": 0.396484375, "learning_rate": 4.974877404397714e-06, "loss": 2.3338, "step": 6879 }, { "epoch": 0.36909871244635195, "grad_norm": 0.375, "learning_rate": 4.974865117606064e-06, "loss": 2.275, "step": 6880 }, { "epoch": 0.3691523605150215, "grad_norm": 0.373046875, "learning_rate": 4.974852827825758e-06, "loss": 2.0743, "step": 6881 }, { "epoch": 0.369206008583691, "grad_norm": 0.400390625, "learning_rate": 4.97484053505681e-06, "loss": 2.3374, "step": 6882 }, { "epoch": 0.3692596566523605, "grad_norm": 0.4140625, "learning_rate": 4.9748282392992346e-06, "loss": 2.1264, "step": 6883 }, { "epoch": 0.36931330472103, "grad_norm": 0.33984375, "learning_rate": 4.974815940553048e-06, "loss": 2.1017, "step": 6884 }, { "epoch": 0.36936695278969955, "grad_norm": 0.34765625, "learning_rate": 4.974803638818264e-06, "loss": 2.2042, "step": 6885 }, { "epoch": 0.3694206008583691, "grad_norm": 0.5546875, "learning_rate": 4.9747913340948985e-06, "loss": 2.2099, "step": 6886 }, { "epoch": 0.3694742489270386, "grad_norm": 0.384765625, "learning_rate": 4.974779026382966e-06, "loss": 2.377, "step": 6887 }, { "epoch": 0.36952789699570815, "grad_norm": 0.322265625, "learning_rate": 4.97476671568248e-06, "loss": 2.1031, "step": 6888 }, { "epoch": 0.3695815450643777, "grad_norm": 0.416015625, "learning_rate": 4.9747544019934564e-06, "loss": 2.5017, "step": 6889 }, { "epoch": 0.3696351931330472, "grad_norm": 0.412109375, "learning_rate": 4.974742085315911e-06, "loss": 2.2995, "step": 6890 }, { "epoch": 0.36968884120171674, "grad_norm": 0.388671875, "learning_rate": 4.9747297656498575e-06, "loss": 2.1405, "step": 6891 }, { "epoch": 0.3697424892703863, "grad_norm": 0.345703125, "learning_rate": 4.974717442995311e-06, "loss": 2.0872, "step": 6892 }, { "epoch": 0.3697961373390558, "grad_norm": 0.453125, "learning_rate": 4.974705117352287e-06, "loss": 2.2513, "step": 6893 }, { "epoch": 0.36984978540772534, "grad_norm": 0.314453125, "learning_rate": 4.9746927887207984e-06, "loss": 2.3424, "step": 6894 }, { "epoch": 0.36990343347639487, "grad_norm": 0.44140625, "learning_rate": 4.974680457100862e-06, "loss": 2.2495, "step": 6895 }, { "epoch": 0.3699570815450644, "grad_norm": 0.96484375, "learning_rate": 4.974668122492493e-06, "loss": 2.421, "step": 6896 }, { "epoch": 0.3700107296137339, "grad_norm": 0.3359375, "learning_rate": 4.974655784895705e-06, "loss": 2.0767, "step": 6897 }, { "epoch": 0.3700643776824034, "grad_norm": 0.322265625, "learning_rate": 4.974643444310513e-06, "loss": 1.9064, "step": 6898 }, { "epoch": 0.37011802575107294, "grad_norm": 0.439453125, "learning_rate": 4.974631100736933e-06, "loss": 2.2839, "step": 6899 }, { "epoch": 0.3701716738197425, "grad_norm": 0.37890625, "learning_rate": 4.974618754174979e-06, "loss": 2.0965, "step": 6900 }, { "epoch": 0.370225321888412, "grad_norm": 0.42578125, "learning_rate": 4.974606404624667e-06, "loss": 2.2364, "step": 6901 }, { "epoch": 0.37027896995708154, "grad_norm": 0.392578125, "learning_rate": 4.974594052086009e-06, "loss": 2.2326, "step": 6902 }, { "epoch": 0.37033261802575107, "grad_norm": 0.39453125, "learning_rate": 4.974581696559023e-06, "loss": 2.4757, "step": 6903 }, { "epoch": 0.3703862660944206, "grad_norm": 0.3203125, "learning_rate": 4.974569338043723e-06, "loss": 2.1913, "step": 6904 }, { "epoch": 0.37043991416309013, "grad_norm": 0.4296875, "learning_rate": 4.9745569765401245e-06, "loss": 2.2778, "step": 6905 }, { "epoch": 0.37049356223175967, "grad_norm": 0.36328125, "learning_rate": 4.974544612048241e-06, "loss": 2.2967, "step": 6906 }, { "epoch": 0.3705472103004292, "grad_norm": 0.55859375, "learning_rate": 4.974532244568088e-06, "loss": 2.2191, "step": 6907 }, { "epoch": 0.37060085836909873, "grad_norm": 0.416015625, "learning_rate": 4.97451987409968e-06, "loss": 2.5115, "step": 6908 }, { "epoch": 0.37065450643776826, "grad_norm": 0.3671875, "learning_rate": 4.974507500643034e-06, "loss": 2.1549, "step": 6909 }, { "epoch": 0.3707081545064378, "grad_norm": 0.4375, "learning_rate": 4.974495124198162e-06, "loss": 2.4114, "step": 6910 }, { "epoch": 0.37076180257510727, "grad_norm": 0.462890625, "learning_rate": 4.974482744765081e-06, "loss": 2.0806, "step": 6911 }, { "epoch": 0.3708154506437768, "grad_norm": 0.4609375, "learning_rate": 4.974470362343806e-06, "loss": 2.2015, "step": 6912 }, { "epoch": 0.37086909871244633, "grad_norm": 0.478515625, "learning_rate": 4.974457976934351e-06, "loss": 2.3766, "step": 6913 }, { "epoch": 0.37092274678111586, "grad_norm": 0.5546875, "learning_rate": 4.974445588536731e-06, "loss": 2.4321, "step": 6914 }, { "epoch": 0.3709763948497854, "grad_norm": 0.447265625, "learning_rate": 4.974433197150961e-06, "loss": 2.3437, "step": 6915 }, { "epoch": 0.3710300429184549, "grad_norm": 0.421875, "learning_rate": 4.974420802777056e-06, "loss": 2.4078, "step": 6916 }, { "epoch": 0.37108369098712446, "grad_norm": 0.439453125, "learning_rate": 4.974408405415032e-06, "loss": 2.3784, "step": 6917 }, { "epoch": 0.371137339055794, "grad_norm": 0.396484375, "learning_rate": 4.974396005064903e-06, "loss": 2.3799, "step": 6918 }, { "epoch": 0.3711909871244635, "grad_norm": 0.341796875, "learning_rate": 4.974383601726683e-06, "loss": 1.919, "step": 6919 }, { "epoch": 0.37124463519313305, "grad_norm": 0.44140625, "learning_rate": 4.97437119540039e-06, "loss": 1.9071, "step": 6920 }, { "epoch": 0.3712982832618026, "grad_norm": 0.48828125, "learning_rate": 4.974358786086035e-06, "loss": 2.0394, "step": 6921 }, { "epoch": 0.3713519313304721, "grad_norm": 0.431640625, "learning_rate": 4.974346373783637e-06, "loss": 1.8474, "step": 6922 }, { "epoch": 0.37140557939914165, "grad_norm": 0.380859375, "learning_rate": 4.974333958493208e-06, "loss": 1.8861, "step": 6923 }, { "epoch": 0.3714592274678112, "grad_norm": 0.392578125, "learning_rate": 4.974321540214764e-06, "loss": 2.1243, "step": 6924 }, { "epoch": 0.3715128755364807, "grad_norm": 0.431640625, "learning_rate": 4.9743091189483205e-06, "loss": 2.4927, "step": 6925 }, { "epoch": 0.3715665236051502, "grad_norm": 0.400390625, "learning_rate": 4.974296694693892e-06, "loss": 2.3666, "step": 6926 }, { "epoch": 0.3716201716738197, "grad_norm": 0.458984375, "learning_rate": 4.974284267451493e-06, "loss": 2.4341, "step": 6927 }, { "epoch": 0.37167381974248925, "grad_norm": 0.3359375, "learning_rate": 4.974271837221139e-06, "loss": 2.0622, "step": 6928 }, { "epoch": 0.3717274678111588, "grad_norm": 0.41015625, "learning_rate": 4.974259404002846e-06, "loss": 2.4283, "step": 6929 }, { "epoch": 0.3717811158798283, "grad_norm": 0.470703125, "learning_rate": 4.974246967796627e-06, "loss": 2.3384, "step": 6930 }, { "epoch": 0.37183476394849785, "grad_norm": 0.427734375, "learning_rate": 4.974234528602499e-06, "loss": 2.3403, "step": 6931 }, { "epoch": 0.3718884120171674, "grad_norm": 0.470703125, "learning_rate": 4.974222086420475e-06, "loss": 2.1993, "step": 6932 }, { "epoch": 0.3719420600858369, "grad_norm": 0.443359375, "learning_rate": 4.974209641250572e-06, "loss": 2.1121, "step": 6933 }, { "epoch": 0.37199570815450644, "grad_norm": 0.85546875, "learning_rate": 4.974197193092804e-06, "loss": 2.3049, "step": 6934 }, { "epoch": 0.372049356223176, "grad_norm": 0.474609375, "learning_rate": 4.974184741947186e-06, "loss": 2.4605, "step": 6935 }, { "epoch": 0.3721030042918455, "grad_norm": 0.376953125, "learning_rate": 4.974172287813734e-06, "loss": 2.2224, "step": 6936 }, { "epoch": 0.37215665236051504, "grad_norm": 0.462890625, "learning_rate": 4.974159830692461e-06, "loss": 2.2858, "step": 6937 }, { "epoch": 0.37221030042918457, "grad_norm": 0.388671875, "learning_rate": 4.974147370583385e-06, "loss": 2.3696, "step": 6938 }, { "epoch": 0.3722639484978541, "grad_norm": 0.5390625, "learning_rate": 4.974134907486518e-06, "loss": 2.3892, "step": 6939 }, { "epoch": 0.3723175965665236, "grad_norm": 0.40625, "learning_rate": 4.974122441401877e-06, "loss": 2.5238, "step": 6940 }, { "epoch": 0.3723712446351931, "grad_norm": 0.330078125, "learning_rate": 4.974109972329476e-06, "loss": 1.8149, "step": 6941 }, { "epoch": 0.37242489270386264, "grad_norm": 0.640625, "learning_rate": 4.974097500269331e-06, "loss": 2.4112, "step": 6942 }, { "epoch": 0.3724785407725322, "grad_norm": 0.35546875, "learning_rate": 4.9740850252214565e-06, "loss": 2.2018, "step": 6943 }, { "epoch": 0.3725321888412017, "grad_norm": 0.49609375, "learning_rate": 4.974072547185868e-06, "loss": 2.3131, "step": 6944 }, { "epoch": 0.37258583690987124, "grad_norm": 0.376953125, "learning_rate": 4.97406006616258e-06, "loss": 2.0773, "step": 6945 }, { "epoch": 0.37263948497854077, "grad_norm": 0.98828125, "learning_rate": 4.974047582151606e-06, "loss": 1.8677, "step": 6946 }, { "epoch": 0.3726931330472103, "grad_norm": 0.466796875, "learning_rate": 4.974035095152965e-06, "loss": 2.2946, "step": 6947 }, { "epoch": 0.37274678111587983, "grad_norm": 0.4140625, "learning_rate": 4.97402260516667e-06, "loss": 2.2978, "step": 6948 }, { "epoch": 0.37280042918454936, "grad_norm": 0.330078125, "learning_rate": 4.974010112192734e-06, "loss": 1.9992, "step": 6949 }, { "epoch": 0.3728540772532189, "grad_norm": 0.447265625, "learning_rate": 4.973997616231176e-06, "loss": 2.3852, "step": 6950 }, { "epoch": 0.37290772532188843, "grad_norm": 0.59765625, "learning_rate": 4.973985117282009e-06, "loss": 2.0971, "step": 6951 }, { "epoch": 0.37296137339055796, "grad_norm": 0.412109375, "learning_rate": 4.973972615345248e-06, "loss": 2.2981, "step": 6952 }, { "epoch": 0.3730150214592275, "grad_norm": 0.458984375, "learning_rate": 4.973960110420908e-06, "loss": 2.4744, "step": 6953 }, { "epoch": 0.373068669527897, "grad_norm": 0.431640625, "learning_rate": 4.9739476025090046e-06, "loss": 1.196, "step": 6954 }, { "epoch": 0.3731223175965665, "grad_norm": 0.4765625, "learning_rate": 4.973935091609554e-06, "loss": 2.1648, "step": 6955 }, { "epoch": 0.37317596566523603, "grad_norm": 0.3984375, "learning_rate": 4.973922577722568e-06, "loss": 2.192, "step": 6956 }, { "epoch": 0.37322961373390556, "grad_norm": 0.494140625, "learning_rate": 4.973910060848066e-06, "loss": 2.5079, "step": 6957 }, { "epoch": 0.3732832618025751, "grad_norm": 0.369140625, "learning_rate": 4.97389754098606e-06, "loss": 2.3727, "step": 6958 }, { "epoch": 0.3733369098712446, "grad_norm": 0.41796875, "learning_rate": 4.973885018136566e-06, "loss": 2.1315, "step": 6959 }, { "epoch": 0.37339055793991416, "grad_norm": 0.37890625, "learning_rate": 4.973872492299599e-06, "loss": 2.2941, "step": 6960 }, { "epoch": 0.3734442060085837, "grad_norm": 0.392578125, "learning_rate": 4.973859963475174e-06, "loss": 2.1137, "step": 6961 }, { "epoch": 0.3734978540772532, "grad_norm": 0.400390625, "learning_rate": 4.973847431663308e-06, "loss": 2.3168, "step": 6962 }, { "epoch": 0.37355150214592275, "grad_norm": 0.396484375, "learning_rate": 4.973834896864013e-06, "loss": 2.2209, "step": 6963 }, { "epoch": 0.3736051502145923, "grad_norm": 0.390625, "learning_rate": 4.973822359077306e-06, "loss": 2.3145, "step": 6964 }, { "epoch": 0.3736587982832618, "grad_norm": 0.423828125, "learning_rate": 4.973809818303203e-06, "loss": 1.395, "step": 6965 }, { "epoch": 0.37371244635193135, "grad_norm": 0.392578125, "learning_rate": 4.973797274541716e-06, "loss": 2.1085, "step": 6966 }, { "epoch": 0.3737660944206009, "grad_norm": 0.35546875, "learning_rate": 4.973784727792863e-06, "loss": 2.2008, "step": 6967 }, { "epoch": 0.3738197424892704, "grad_norm": 0.435546875, "learning_rate": 4.973772178056659e-06, "loss": 2.2881, "step": 6968 }, { "epoch": 0.3738733905579399, "grad_norm": 0.60546875, "learning_rate": 4.973759625333118e-06, "loss": 2.1359, "step": 6969 }, { "epoch": 0.3739270386266094, "grad_norm": 0.380859375, "learning_rate": 4.973747069622256e-06, "loss": 2.241, "step": 6970 }, { "epoch": 0.37398068669527895, "grad_norm": 0.4296875, "learning_rate": 4.973734510924087e-06, "loss": 1.8089, "step": 6971 }, { "epoch": 0.3740343347639485, "grad_norm": 0.43359375, "learning_rate": 4.973721949238627e-06, "loss": 2.2164, "step": 6972 }, { "epoch": 0.374087982832618, "grad_norm": 0.44921875, "learning_rate": 4.973709384565891e-06, "loss": 2.3273, "step": 6973 }, { "epoch": 0.37414163090128755, "grad_norm": 0.439453125, "learning_rate": 4.973696816905896e-06, "loss": 2.2034, "step": 6974 }, { "epoch": 0.3741952789699571, "grad_norm": 0.6015625, "learning_rate": 4.9736842462586535e-06, "loss": 2.6564, "step": 6975 }, { "epoch": 0.3742489270386266, "grad_norm": 0.357421875, "learning_rate": 4.97367167262418e-06, "loss": 2.5283, "step": 6976 }, { "epoch": 0.37430257510729614, "grad_norm": 0.38671875, "learning_rate": 4.973659096002493e-06, "loss": 2.4363, "step": 6977 }, { "epoch": 0.3743562231759657, "grad_norm": 0.408203125, "learning_rate": 4.973646516393607e-06, "loss": 2.4353, "step": 6978 }, { "epoch": 0.3744098712446352, "grad_norm": 0.376953125, "learning_rate": 4.9736339337975335e-06, "loss": 2.3223, "step": 6979 }, { "epoch": 0.37446351931330474, "grad_norm": 0.388671875, "learning_rate": 4.973621348214292e-06, "loss": 2.1807, "step": 6980 }, { "epoch": 0.37451716738197427, "grad_norm": 0.484375, "learning_rate": 4.973608759643896e-06, "loss": 2.3822, "step": 6981 }, { "epoch": 0.3745708154506438, "grad_norm": 0.396484375, "learning_rate": 4.973596168086361e-06, "loss": 2.1162, "step": 6982 }, { "epoch": 0.3746244635193133, "grad_norm": 0.41796875, "learning_rate": 4.973583573541701e-06, "loss": 2.6139, "step": 6983 }, { "epoch": 0.3746781115879828, "grad_norm": 0.333984375, "learning_rate": 4.973570976009933e-06, "loss": 2.4712, "step": 6984 }, { "epoch": 0.37473175965665234, "grad_norm": 0.44921875, "learning_rate": 4.973558375491071e-06, "loss": 2.465, "step": 6985 }, { "epoch": 0.3747854077253219, "grad_norm": 0.6328125, "learning_rate": 4.973545771985131e-06, "loss": 2.4601, "step": 6986 }, { "epoch": 0.3748390557939914, "grad_norm": 0.375, "learning_rate": 4.973533165492127e-06, "loss": 2.2448, "step": 6987 }, { "epoch": 0.37489270386266094, "grad_norm": 0.39453125, "learning_rate": 4.973520556012076e-06, "loss": 2.2427, "step": 6988 }, { "epoch": 0.37494635193133047, "grad_norm": 0.37109375, "learning_rate": 4.973507943544992e-06, "loss": 2.2347, "step": 6989 }, { "epoch": 0.375, "grad_norm": 0.392578125, "learning_rate": 4.973495328090891e-06, "loss": 2.3712, "step": 6990 }, { "epoch": 0.37505364806866953, "grad_norm": 0.37109375, "learning_rate": 4.973482709649787e-06, "loss": 2.2415, "step": 6991 }, { "epoch": 0.37510729613733906, "grad_norm": 0.3671875, "learning_rate": 4.9734700882216954e-06, "loss": 2.3807, "step": 6992 }, { "epoch": 0.3751609442060086, "grad_norm": 0.5625, "learning_rate": 4.973457463806633e-06, "loss": 2.246, "step": 6993 }, { "epoch": 0.3752145922746781, "grad_norm": 0.38671875, "learning_rate": 4.973444836404615e-06, "loss": 2.1981, "step": 6994 }, { "epoch": 0.37526824034334766, "grad_norm": 0.423828125, "learning_rate": 4.973432206015655e-06, "loss": 2.2594, "step": 6995 }, { "epoch": 0.3753218884120172, "grad_norm": 0.375, "learning_rate": 4.973419572639768e-06, "loss": 1.8943, "step": 6996 }, { "epoch": 0.3753755364806867, "grad_norm": 0.345703125, "learning_rate": 4.973406936276972e-06, "loss": 2.3893, "step": 6997 }, { "epoch": 0.3754291845493562, "grad_norm": 0.38671875, "learning_rate": 4.973394296927279e-06, "loss": 1.9093, "step": 6998 }, { "epoch": 0.37548283261802573, "grad_norm": 0.486328125, "learning_rate": 4.973381654590707e-06, "loss": 2.4733, "step": 6999 }, { "epoch": 0.37553648068669526, "grad_norm": 0.392578125, "learning_rate": 4.97336900926727e-06, "loss": 2.2757, "step": 7000 }, { "epoch": 0.3755901287553648, "grad_norm": 0.345703125, "learning_rate": 4.973356360956982e-06, "loss": 2.1916, "step": 7001 }, { "epoch": 0.3756437768240343, "grad_norm": 0.40625, "learning_rate": 4.97334370965986e-06, "loss": 2.485, "step": 7002 }, { "epoch": 0.37569742489270386, "grad_norm": 0.3984375, "learning_rate": 4.973331055375919e-06, "loss": 2.0981, "step": 7003 }, { "epoch": 0.3757510729613734, "grad_norm": 0.404296875, "learning_rate": 4.973318398105175e-06, "loss": 2.2179, "step": 7004 }, { "epoch": 0.3758047210300429, "grad_norm": 0.37109375, "learning_rate": 4.973305737847641e-06, "loss": 2.274, "step": 7005 }, { "epoch": 0.37585836909871245, "grad_norm": 0.59375, "learning_rate": 4.973293074603335e-06, "loss": 2.633, "step": 7006 }, { "epoch": 0.375912017167382, "grad_norm": 0.38671875, "learning_rate": 4.9732804083722705e-06, "loss": 2.1843, "step": 7007 }, { "epoch": 0.3759656652360515, "grad_norm": 0.5078125, "learning_rate": 4.9732677391544635e-06, "loss": 2.2365, "step": 7008 }, { "epoch": 0.37601931330472105, "grad_norm": 0.57421875, "learning_rate": 4.973255066949929e-06, "loss": 2.3077, "step": 7009 }, { "epoch": 0.3760729613733906, "grad_norm": 0.44921875, "learning_rate": 4.9732423917586826e-06, "loss": 2.3912, "step": 7010 }, { "epoch": 0.3761266094420601, "grad_norm": 0.486328125, "learning_rate": 4.973229713580738e-06, "loss": 2.137, "step": 7011 }, { "epoch": 0.3761802575107296, "grad_norm": 0.40234375, "learning_rate": 4.9732170324161134e-06, "loss": 2.4725, "step": 7012 }, { "epoch": 0.3762339055793991, "grad_norm": 0.435546875, "learning_rate": 4.9732043482648225e-06, "loss": 2.2928, "step": 7013 }, { "epoch": 0.37628755364806865, "grad_norm": 0.45703125, "learning_rate": 4.973191661126881e-06, "loss": 2.6556, "step": 7014 }, { "epoch": 0.3763412017167382, "grad_norm": 0.375, "learning_rate": 4.973178971002304e-06, "loss": 2.3176, "step": 7015 }, { "epoch": 0.3763948497854077, "grad_norm": 0.375, "learning_rate": 4.973166277891106e-06, "loss": 2.2521, "step": 7016 }, { "epoch": 0.37644849785407725, "grad_norm": 0.486328125, "learning_rate": 4.973153581793303e-06, "loss": 2.427, "step": 7017 }, { "epoch": 0.3765021459227468, "grad_norm": 1.875, "learning_rate": 4.973140882708911e-06, "loss": 2.3426, "step": 7018 }, { "epoch": 0.3765557939914163, "grad_norm": 0.3828125, "learning_rate": 4.973128180637945e-06, "loss": 2.3357, "step": 7019 }, { "epoch": 0.37660944206008584, "grad_norm": 0.796875, "learning_rate": 4.97311547558042e-06, "loss": 2.3495, "step": 7020 }, { "epoch": 0.3766630901287554, "grad_norm": 0.50390625, "learning_rate": 4.973102767536352e-06, "loss": 2.3665, "step": 7021 }, { "epoch": 0.3767167381974249, "grad_norm": 0.63671875, "learning_rate": 4.973090056505755e-06, "loss": 2.1446, "step": 7022 }, { "epoch": 0.37677038626609444, "grad_norm": 0.4140625, "learning_rate": 4.973077342488645e-06, "loss": 2.2574, "step": 7023 }, { "epoch": 0.37682403433476397, "grad_norm": 0.474609375, "learning_rate": 4.973064625485038e-06, "loss": 2.2097, "step": 7024 }, { "epoch": 0.3768776824034335, "grad_norm": 0.50390625, "learning_rate": 4.973051905494949e-06, "loss": 2.3199, "step": 7025 }, { "epoch": 0.376931330472103, "grad_norm": 0.40234375, "learning_rate": 4.973039182518393e-06, "loss": 2.4487, "step": 7026 }, { "epoch": 0.3769849785407725, "grad_norm": 0.421875, "learning_rate": 4.973026456555386e-06, "loss": 2.4638, "step": 7027 }, { "epoch": 0.37703862660944204, "grad_norm": 0.349609375, "learning_rate": 4.973013727605943e-06, "loss": 2.1403, "step": 7028 }, { "epoch": 0.37709227467811157, "grad_norm": 0.4140625, "learning_rate": 4.9730009956700784e-06, "loss": 2.3351, "step": 7029 }, { "epoch": 0.3771459227467811, "grad_norm": 0.484375, "learning_rate": 4.97298826074781e-06, "loss": 2.469, "step": 7030 }, { "epoch": 0.37719957081545064, "grad_norm": 0.328125, "learning_rate": 4.97297552283915e-06, "loss": 2.0746, "step": 7031 }, { "epoch": 0.37725321888412017, "grad_norm": 0.451171875, "learning_rate": 4.9729627819441165e-06, "loss": 2.2719, "step": 7032 }, { "epoch": 0.3773068669527897, "grad_norm": 0.40234375, "learning_rate": 4.9729500380627235e-06, "loss": 2.3857, "step": 7033 }, { "epoch": 0.37736051502145923, "grad_norm": 0.404296875, "learning_rate": 4.972937291194987e-06, "loss": 2.4285, "step": 7034 }, { "epoch": 0.37741416309012876, "grad_norm": 0.326171875, "learning_rate": 4.9729245413409225e-06, "loss": 1.9944, "step": 7035 }, { "epoch": 0.3774678111587983, "grad_norm": 0.384765625, "learning_rate": 4.972911788500545e-06, "loss": 2.0916, "step": 7036 }, { "epoch": 0.3775214592274678, "grad_norm": 0.458984375, "learning_rate": 4.97289903267387e-06, "loss": 2.5332, "step": 7037 }, { "epoch": 0.37757510729613736, "grad_norm": 0.44140625, "learning_rate": 4.9728862738609115e-06, "loss": 2.2524, "step": 7038 }, { "epoch": 0.3776287553648069, "grad_norm": 0.34765625, "learning_rate": 4.972873512061688e-06, "loss": 2.1768, "step": 7039 }, { "epoch": 0.3776824034334764, "grad_norm": 0.458984375, "learning_rate": 4.972860747276213e-06, "loss": 2.295, "step": 7040 }, { "epoch": 0.3777360515021459, "grad_norm": 0.34765625, "learning_rate": 4.972847979504502e-06, "loss": 2.0305, "step": 7041 }, { "epoch": 0.37778969957081543, "grad_norm": 0.447265625, "learning_rate": 4.97283520874657e-06, "loss": 2.2667, "step": 7042 }, { "epoch": 0.37784334763948496, "grad_norm": 0.3828125, "learning_rate": 4.972822435002432e-06, "loss": 2.4008, "step": 7043 }, { "epoch": 0.3778969957081545, "grad_norm": 0.5, "learning_rate": 4.972809658272106e-06, "loss": 2.3243, "step": 7044 }, { "epoch": 0.377950643776824, "grad_norm": 0.609375, "learning_rate": 4.9727968785556044e-06, "loss": 2.3922, "step": 7045 }, { "epoch": 0.37800429184549356, "grad_norm": 0.443359375, "learning_rate": 4.972784095852945e-06, "loss": 2.2605, "step": 7046 }, { "epoch": 0.3780579399141631, "grad_norm": 0.54296875, "learning_rate": 4.972771310164143e-06, "loss": 2.1397, "step": 7047 }, { "epoch": 0.3781115879828326, "grad_norm": 0.451171875, "learning_rate": 4.972758521489211e-06, "loss": 2.338, "step": 7048 }, { "epoch": 0.37816523605150215, "grad_norm": 0.443359375, "learning_rate": 4.972745729828169e-06, "loss": 2.2055, "step": 7049 }, { "epoch": 0.3782188841201717, "grad_norm": 0.37890625, "learning_rate": 4.972732935181028e-06, "loss": 2.2838, "step": 7050 }, { "epoch": 0.3782725321888412, "grad_norm": 0.42578125, "learning_rate": 4.972720137547806e-06, "loss": 2.4065, "step": 7051 }, { "epoch": 0.37832618025751075, "grad_norm": 0.33984375, "learning_rate": 4.9727073369285186e-06, "loss": 2.028, "step": 7052 }, { "epoch": 0.3783798283261803, "grad_norm": 0.390625, "learning_rate": 4.97269453332318e-06, "loss": 2.0486, "step": 7053 }, { "epoch": 0.3784334763948498, "grad_norm": 0.5234375, "learning_rate": 4.972681726731806e-06, "loss": 2.1498, "step": 7054 }, { "epoch": 0.3784871244635193, "grad_norm": 0.4296875, "learning_rate": 4.972668917154412e-06, "loss": 2.4562, "step": 7055 }, { "epoch": 0.3785407725321888, "grad_norm": 0.439453125, "learning_rate": 4.9726561045910145e-06, "loss": 2.429, "step": 7056 }, { "epoch": 0.37859442060085835, "grad_norm": 0.416015625, "learning_rate": 4.9726432890416285e-06, "loss": 2.2175, "step": 7057 }, { "epoch": 0.3786480686695279, "grad_norm": 0.388671875, "learning_rate": 4.9726304705062685e-06, "loss": 2.405, "step": 7058 }, { "epoch": 0.3787017167381974, "grad_norm": 0.43359375, "learning_rate": 4.972617648984951e-06, "loss": 2.366, "step": 7059 }, { "epoch": 0.37875536480686695, "grad_norm": 0.33203125, "learning_rate": 4.97260482447769e-06, "loss": 2.2983, "step": 7060 }, { "epoch": 0.3788090128755365, "grad_norm": 0.36328125, "learning_rate": 4.972591996984504e-06, "loss": 2.1874, "step": 7061 }, { "epoch": 0.378862660944206, "grad_norm": 0.37109375, "learning_rate": 4.9725791665054056e-06, "loss": 2.4753, "step": 7062 }, { "epoch": 0.37891630901287554, "grad_norm": 0.38671875, "learning_rate": 4.972566333040411e-06, "loss": 2.2042, "step": 7063 }, { "epoch": 0.3789699570815451, "grad_norm": 0.369140625, "learning_rate": 4.972553496589537e-06, "loss": 2.3195, "step": 7064 }, { "epoch": 0.3790236051502146, "grad_norm": 0.37109375, "learning_rate": 4.972540657152798e-06, "loss": 2.0113, "step": 7065 }, { "epoch": 0.37907725321888414, "grad_norm": 0.3671875, "learning_rate": 4.972527814730209e-06, "loss": 2.0691, "step": 7066 }, { "epoch": 0.37913090128755367, "grad_norm": 0.416015625, "learning_rate": 4.972514969321787e-06, "loss": 2.2011, "step": 7067 }, { "epoch": 0.3791845493562232, "grad_norm": 0.474609375, "learning_rate": 4.972502120927546e-06, "loss": 2.2321, "step": 7068 }, { "epoch": 0.37923819742489273, "grad_norm": 0.3515625, "learning_rate": 4.972489269547503e-06, "loss": 2.1991, "step": 7069 }, { "epoch": 0.3792918454935622, "grad_norm": 0.39453125, "learning_rate": 4.972476415181671e-06, "loss": 2.289, "step": 7070 }, { "epoch": 0.37934549356223174, "grad_norm": 0.333984375, "learning_rate": 4.972463557830069e-06, "loss": 2.1045, "step": 7071 }, { "epoch": 0.37939914163090127, "grad_norm": 0.423828125, "learning_rate": 4.972450697492709e-06, "loss": 2.2808, "step": 7072 }, { "epoch": 0.3794527896995708, "grad_norm": 0.380859375, "learning_rate": 4.97243783416961e-06, "loss": 2.2335, "step": 7073 }, { "epoch": 0.37950643776824033, "grad_norm": 0.416015625, "learning_rate": 4.972424967860784e-06, "loss": 2.0, "step": 7074 }, { "epoch": 0.37956008583690987, "grad_norm": 0.3515625, "learning_rate": 4.97241209856625e-06, "loss": 2.1722, "step": 7075 }, { "epoch": 0.3796137339055794, "grad_norm": 0.486328125, "learning_rate": 4.972399226286021e-06, "loss": 2.397, "step": 7076 }, { "epoch": 0.37966738197424893, "grad_norm": 0.439453125, "learning_rate": 4.972386351020114e-06, "loss": 2.295, "step": 7077 }, { "epoch": 0.37972103004291846, "grad_norm": 0.369140625, "learning_rate": 4.972373472768544e-06, "loss": 2.1835, "step": 7078 }, { "epoch": 0.379774678111588, "grad_norm": 0.37890625, "learning_rate": 4.972360591531326e-06, "loss": 2.0656, "step": 7079 }, { "epoch": 0.3798283261802575, "grad_norm": 0.515625, "learning_rate": 4.972347707308476e-06, "loss": 1.707, "step": 7080 }, { "epoch": 0.37988197424892706, "grad_norm": 0.37890625, "learning_rate": 4.9723348201000095e-06, "loss": 2.5063, "step": 7081 }, { "epoch": 0.3799356223175966, "grad_norm": 0.79296875, "learning_rate": 4.972321929905943e-06, "loss": 2.2989, "step": 7082 }, { "epoch": 0.3799892703862661, "grad_norm": 1.625, "learning_rate": 4.972309036726291e-06, "loss": 2.482, "step": 7083 }, { "epoch": 0.3800429184549356, "grad_norm": 0.421875, "learning_rate": 4.972296140561069e-06, "loss": 2.4023, "step": 7084 }, { "epoch": 0.38009656652360513, "grad_norm": 0.4296875, "learning_rate": 4.972283241410293e-06, "loss": 2.182, "step": 7085 }, { "epoch": 0.38015021459227466, "grad_norm": 0.421875, "learning_rate": 4.9722703392739795e-06, "loss": 2.2363, "step": 7086 }, { "epoch": 0.3802038626609442, "grad_norm": 0.400390625, "learning_rate": 4.972257434152141e-06, "loss": 2.2699, "step": 7087 }, { "epoch": 0.3802575107296137, "grad_norm": 0.466796875, "learning_rate": 4.972244526044797e-06, "loss": 2.1328, "step": 7088 }, { "epoch": 0.38031115879828326, "grad_norm": 0.44140625, "learning_rate": 4.972231614951961e-06, "loss": 2.2785, "step": 7089 }, { "epoch": 0.3803648068669528, "grad_norm": 0.35546875, "learning_rate": 4.972218700873648e-06, "loss": 2.1698, "step": 7090 }, { "epoch": 0.3804184549356223, "grad_norm": 0.474609375, "learning_rate": 4.972205783809874e-06, "loss": 2.4779, "step": 7091 }, { "epoch": 0.38047210300429185, "grad_norm": 0.4453125, "learning_rate": 4.972192863760656e-06, "loss": 2.3522, "step": 7092 }, { "epoch": 0.3805257510729614, "grad_norm": 0.376953125, "learning_rate": 4.97217994072601e-06, "loss": 2.4875, "step": 7093 }, { "epoch": 0.3805793991416309, "grad_norm": 0.68359375, "learning_rate": 4.972167014705948e-06, "loss": 2.537, "step": 7094 }, { "epoch": 0.38063304721030045, "grad_norm": 0.482421875, "learning_rate": 4.972154085700489e-06, "loss": 2.3838, "step": 7095 }, { "epoch": 0.38068669527897, "grad_norm": 0.4296875, "learning_rate": 4.972141153709647e-06, "loss": 2.3521, "step": 7096 }, { "epoch": 0.3807403433476395, "grad_norm": 0.546875, "learning_rate": 4.972128218733439e-06, "loss": 2.3617, "step": 7097 }, { "epoch": 0.380793991416309, "grad_norm": 0.384765625, "learning_rate": 4.972115280771878e-06, "loss": 2.1456, "step": 7098 }, { "epoch": 0.3808476394849785, "grad_norm": 0.361328125, "learning_rate": 4.9721023398249825e-06, "loss": 2.3124, "step": 7099 }, { "epoch": 0.38090128755364805, "grad_norm": 0.384765625, "learning_rate": 4.972089395892766e-06, "loss": 2.3804, "step": 7100 }, { "epoch": 0.3809549356223176, "grad_norm": 0.359375, "learning_rate": 4.972076448975246e-06, "loss": 2.3229, "step": 7101 }, { "epoch": 0.3810085836909871, "grad_norm": 0.404296875, "learning_rate": 4.972063499072437e-06, "loss": 2.5004, "step": 7102 }, { "epoch": 0.38106223175965664, "grad_norm": 0.384765625, "learning_rate": 4.972050546184355e-06, "loss": 2.2201, "step": 7103 }, { "epoch": 0.3811158798283262, "grad_norm": 0.42578125, "learning_rate": 4.9720375903110154e-06, "loss": 2.3234, "step": 7104 }, { "epoch": 0.3811695278969957, "grad_norm": 0.421875, "learning_rate": 4.9720246314524334e-06, "loss": 2.6284, "step": 7105 }, { "epoch": 0.38122317596566524, "grad_norm": 0.431640625, "learning_rate": 4.972011669608626e-06, "loss": 2.7362, "step": 7106 }, { "epoch": 0.38127682403433477, "grad_norm": 0.43359375, "learning_rate": 4.971998704779608e-06, "loss": 2.4781, "step": 7107 }, { "epoch": 0.3813304721030043, "grad_norm": 0.390625, "learning_rate": 4.971985736965395e-06, "loss": 2.1324, "step": 7108 }, { "epoch": 0.38138412017167383, "grad_norm": 0.388671875, "learning_rate": 4.971972766166002e-06, "loss": 2.5452, "step": 7109 }, { "epoch": 0.38143776824034337, "grad_norm": 0.408203125, "learning_rate": 4.971959792381446e-06, "loss": 2.2418, "step": 7110 }, { "epoch": 0.3814914163090129, "grad_norm": 0.515625, "learning_rate": 4.9719468156117415e-06, "loss": 2.3692, "step": 7111 }, { "epoch": 0.38154506437768243, "grad_norm": 0.38671875, "learning_rate": 4.971933835856906e-06, "loss": 2.1894, "step": 7112 }, { "epoch": 0.3815987124463519, "grad_norm": 0.38671875, "learning_rate": 4.971920853116952e-06, "loss": 2.0978, "step": 7113 }, { "epoch": 0.38165236051502144, "grad_norm": 0.42578125, "learning_rate": 4.9719078673919e-06, "loss": 2.4108, "step": 7114 }, { "epoch": 0.38170600858369097, "grad_norm": 0.345703125, "learning_rate": 4.971894878681761e-06, "loss": 2.4164, "step": 7115 }, { "epoch": 0.3817596566523605, "grad_norm": 0.376953125, "learning_rate": 4.971881886986552e-06, "loss": 2.4666, "step": 7116 }, { "epoch": 0.38181330472103003, "grad_norm": 0.55078125, "learning_rate": 4.97186889230629e-06, "loss": 2.2857, "step": 7117 }, { "epoch": 0.38186695278969957, "grad_norm": 0.4140625, "learning_rate": 4.97185589464099e-06, "loss": 2.2304, "step": 7118 }, { "epoch": 0.3819206008583691, "grad_norm": 0.458984375, "learning_rate": 4.971842893990667e-06, "loss": 2.2917, "step": 7119 }, { "epoch": 0.38197424892703863, "grad_norm": 0.453125, "learning_rate": 4.971829890355337e-06, "loss": 2.2466, "step": 7120 }, { "epoch": 0.38202789699570816, "grad_norm": 0.369140625, "learning_rate": 4.971816883735017e-06, "loss": 2.4693, "step": 7121 }, { "epoch": 0.3820815450643777, "grad_norm": 0.359375, "learning_rate": 4.971803874129721e-06, "loss": 2.1536, "step": 7122 }, { "epoch": 0.3821351931330472, "grad_norm": 0.412109375, "learning_rate": 4.971790861539465e-06, "loss": 2.2441, "step": 7123 }, { "epoch": 0.38218884120171676, "grad_norm": 0.41796875, "learning_rate": 4.971777845964266e-06, "loss": 2.2809, "step": 7124 }, { "epoch": 0.3822424892703863, "grad_norm": 0.498046875, "learning_rate": 4.971764827404139e-06, "loss": 2.2723, "step": 7125 }, { "epoch": 0.3822961373390558, "grad_norm": 0.423828125, "learning_rate": 4.971751805859099e-06, "loss": 2.3634, "step": 7126 }, { "epoch": 0.3823497854077253, "grad_norm": 0.376953125, "learning_rate": 4.971738781329161e-06, "loss": 2.0454, "step": 7127 }, { "epoch": 0.3824034334763948, "grad_norm": 0.41015625, "learning_rate": 4.971725753814344e-06, "loss": 2.1559, "step": 7128 }, { "epoch": 0.38245708154506436, "grad_norm": 0.419921875, "learning_rate": 4.971712723314661e-06, "loss": 2.3429, "step": 7129 }, { "epoch": 0.3825107296137339, "grad_norm": 0.39453125, "learning_rate": 4.971699689830128e-06, "loss": 2.5033, "step": 7130 }, { "epoch": 0.3825643776824034, "grad_norm": 0.435546875, "learning_rate": 4.9716866533607614e-06, "loss": 2.2841, "step": 7131 }, { "epoch": 0.38261802575107295, "grad_norm": 0.49609375, "learning_rate": 4.971673613906577e-06, "loss": 2.2691, "step": 7132 }, { "epoch": 0.3826716738197425, "grad_norm": 0.41796875, "learning_rate": 4.9716605714675906e-06, "loss": 2.4972, "step": 7133 }, { "epoch": 0.382725321888412, "grad_norm": 0.61328125, "learning_rate": 4.971647526043817e-06, "loss": 2.3507, "step": 7134 }, { "epoch": 0.38277896995708155, "grad_norm": 0.40234375, "learning_rate": 4.971634477635272e-06, "loss": 2.3991, "step": 7135 }, { "epoch": 0.3828326180257511, "grad_norm": 0.41796875, "learning_rate": 4.971621426241972e-06, "loss": 2.3648, "step": 7136 }, { "epoch": 0.3828862660944206, "grad_norm": 0.46875, "learning_rate": 4.971608371863934e-06, "loss": 1.2331, "step": 7137 }, { "epoch": 0.38293991416309014, "grad_norm": 0.404296875, "learning_rate": 4.971595314501172e-06, "loss": 2.2686, "step": 7138 }, { "epoch": 0.3829935622317597, "grad_norm": 0.384765625, "learning_rate": 4.971582254153702e-06, "loss": 2.2696, "step": 7139 }, { "epoch": 0.3830472103004292, "grad_norm": 0.34375, "learning_rate": 4.97156919082154e-06, "loss": 2.2344, "step": 7140 }, { "epoch": 0.38310085836909874, "grad_norm": 0.43359375, "learning_rate": 4.9715561245047025e-06, "loss": 2.4161, "step": 7141 }, { "epoch": 0.3831545064377682, "grad_norm": 0.37109375, "learning_rate": 4.971543055203204e-06, "loss": 2.1406, "step": 7142 }, { "epoch": 0.38320815450643775, "grad_norm": 0.51171875, "learning_rate": 4.971529982917061e-06, "loss": 2.2736, "step": 7143 }, { "epoch": 0.3832618025751073, "grad_norm": 0.369140625, "learning_rate": 4.971516907646289e-06, "loss": 2.2636, "step": 7144 }, { "epoch": 0.3833154506437768, "grad_norm": 0.47265625, "learning_rate": 4.971503829390903e-06, "loss": 1.5563, "step": 7145 }, { "epoch": 0.38336909871244634, "grad_norm": 0.41015625, "learning_rate": 4.9714907481509214e-06, "loss": 2.1994, "step": 7146 }, { "epoch": 0.3834227467811159, "grad_norm": 0.435546875, "learning_rate": 4.971477663926357e-06, "loss": 2.1354, "step": 7147 }, { "epoch": 0.3834763948497854, "grad_norm": 0.75390625, "learning_rate": 4.971464576717228e-06, "loss": 2.3701, "step": 7148 }, { "epoch": 0.38353004291845494, "grad_norm": 0.6328125, "learning_rate": 4.971451486523548e-06, "loss": 2.0047, "step": 7149 }, { "epoch": 0.38358369098712447, "grad_norm": 0.37109375, "learning_rate": 4.971438393345335e-06, "loss": 2.2298, "step": 7150 }, { "epoch": 0.383637339055794, "grad_norm": 0.41796875, "learning_rate": 4.971425297182603e-06, "loss": 2.6139, "step": 7151 }, { "epoch": 0.38369098712446353, "grad_norm": 0.435546875, "learning_rate": 4.971412198035368e-06, "loss": 2.076, "step": 7152 }, { "epoch": 0.38374463519313307, "grad_norm": 0.51171875, "learning_rate": 4.971399095903647e-06, "loss": 2.4839, "step": 7153 }, { "epoch": 0.3837982832618026, "grad_norm": 0.48046875, "learning_rate": 4.971385990787456e-06, "loss": 2.2269, "step": 7154 }, { "epoch": 0.38385193133047213, "grad_norm": 0.357421875, "learning_rate": 4.971372882686809e-06, "loss": 2.1772, "step": 7155 }, { "epoch": 0.3839055793991416, "grad_norm": 0.412109375, "learning_rate": 4.971359771601723e-06, "loss": 2.5647, "step": 7156 }, { "epoch": 0.38395922746781114, "grad_norm": 0.458984375, "learning_rate": 4.971346657532214e-06, "loss": 2.4423, "step": 7157 }, { "epoch": 0.38401287553648067, "grad_norm": 0.41796875, "learning_rate": 4.971333540478297e-06, "loss": 2.3204, "step": 7158 }, { "epoch": 0.3840665236051502, "grad_norm": 0.435546875, "learning_rate": 4.971320420439989e-06, "loss": 2.2262, "step": 7159 }, { "epoch": 0.38412017167381973, "grad_norm": 0.361328125, "learning_rate": 4.971307297417305e-06, "loss": 1.5546, "step": 7160 }, { "epoch": 0.38417381974248926, "grad_norm": 0.396484375, "learning_rate": 4.971294171410261e-06, "loss": 2.2744, "step": 7161 }, { "epoch": 0.3842274678111588, "grad_norm": 0.37890625, "learning_rate": 4.971281042418873e-06, "loss": 2.3603, "step": 7162 }, { "epoch": 0.3842811158798283, "grad_norm": 0.45703125, "learning_rate": 4.971267910443156e-06, "loss": 2.1471, "step": 7163 }, { "epoch": 0.38433476394849786, "grad_norm": 0.5625, "learning_rate": 4.971254775483127e-06, "loss": 2.218, "step": 7164 }, { "epoch": 0.3843884120171674, "grad_norm": 0.359375, "learning_rate": 4.971241637538802e-06, "loss": 2.1042, "step": 7165 }, { "epoch": 0.3844420600858369, "grad_norm": 0.48828125, "learning_rate": 4.971228496610196e-06, "loss": 2.3388, "step": 7166 }, { "epoch": 0.38449570815450645, "grad_norm": 0.458984375, "learning_rate": 4.971215352697325e-06, "loss": 2.2415, "step": 7167 }, { "epoch": 0.384549356223176, "grad_norm": 60.0, "learning_rate": 4.971202205800205e-06, "loss": 1.7043, "step": 7168 }, { "epoch": 0.3846030042918455, "grad_norm": 0.376953125, "learning_rate": 4.971189055918853e-06, "loss": 2.213, "step": 7169 }, { "epoch": 0.384656652360515, "grad_norm": 0.38671875, "learning_rate": 4.971175903053282e-06, "loss": 2.6604, "step": 7170 }, { "epoch": 0.3847103004291845, "grad_norm": 0.39453125, "learning_rate": 4.971162747203512e-06, "loss": 2.4183, "step": 7171 }, { "epoch": 0.38476394849785406, "grad_norm": 0.625, "learning_rate": 4.971149588369555e-06, "loss": 2.1015, "step": 7172 }, { "epoch": 0.3848175965665236, "grad_norm": 0.408203125, "learning_rate": 4.971136426551429e-06, "loss": 2.0564, "step": 7173 }, { "epoch": 0.3848712446351931, "grad_norm": 0.455078125, "learning_rate": 4.971123261749149e-06, "loss": 2.3866, "step": 7174 }, { "epoch": 0.38492489270386265, "grad_norm": 0.388671875, "learning_rate": 4.971110093962732e-06, "loss": 2.0914, "step": 7175 }, { "epoch": 0.3849785407725322, "grad_norm": 0.384765625, "learning_rate": 4.971096923192192e-06, "loss": 2.1723, "step": 7176 }, { "epoch": 0.3850321888412017, "grad_norm": 0.6171875, "learning_rate": 4.9710837494375475e-06, "loss": 2.3698, "step": 7177 }, { "epoch": 0.38508583690987125, "grad_norm": 0.4921875, "learning_rate": 4.9710705726988125e-06, "loss": 2.1224, "step": 7178 }, { "epoch": 0.3851394849785408, "grad_norm": 0.6796875, "learning_rate": 4.971057392976003e-06, "loss": 1.8444, "step": 7179 }, { "epoch": 0.3851931330472103, "grad_norm": 0.44140625, "learning_rate": 4.971044210269136e-06, "loss": 2.2913, "step": 7180 }, { "epoch": 0.38524678111587984, "grad_norm": 2.65625, "learning_rate": 4.971031024578226e-06, "loss": 2.333, "step": 7181 }, { "epoch": 0.3853004291845494, "grad_norm": 0.51953125, "learning_rate": 4.97101783590329e-06, "loss": 2.4733, "step": 7182 }, { "epoch": 0.3853540772532189, "grad_norm": 0.369140625, "learning_rate": 4.9710046442443434e-06, "loss": 2.4432, "step": 7183 }, { "epoch": 0.38540772532188844, "grad_norm": 0.298828125, "learning_rate": 4.970991449601403e-06, "loss": 1.9196, "step": 7184 }, { "epoch": 0.3854613733905579, "grad_norm": 0.4609375, "learning_rate": 4.970978251974483e-06, "loss": 2.4908, "step": 7185 }, { "epoch": 0.38551502145922745, "grad_norm": 0.392578125, "learning_rate": 4.9709650513636015e-06, "loss": 2.3079, "step": 7186 }, { "epoch": 0.385568669527897, "grad_norm": 0.451171875, "learning_rate": 4.970951847768773e-06, "loss": 2.2438, "step": 7187 }, { "epoch": 0.3856223175965665, "grad_norm": 0.390625, "learning_rate": 4.970938641190013e-06, "loss": 2.2458, "step": 7188 }, { "epoch": 0.38567596566523604, "grad_norm": 0.40625, "learning_rate": 4.9709254316273394e-06, "loss": 2.4819, "step": 7189 }, { "epoch": 0.3857296137339056, "grad_norm": 0.41796875, "learning_rate": 4.970912219080767e-06, "loss": 2.272, "step": 7190 }, { "epoch": 0.3857832618025751, "grad_norm": 0.365234375, "learning_rate": 4.9708990035503105e-06, "loss": 2.1889, "step": 7191 }, { "epoch": 0.38583690987124464, "grad_norm": 0.3828125, "learning_rate": 4.9708857850359884e-06, "loss": 2.2867, "step": 7192 }, { "epoch": 0.38589055793991417, "grad_norm": 0.412109375, "learning_rate": 4.970872563537814e-06, "loss": 2.4879, "step": 7193 }, { "epoch": 0.3859442060085837, "grad_norm": 0.37109375, "learning_rate": 4.970859339055806e-06, "loss": 2.1779, "step": 7194 }, { "epoch": 0.38599785407725323, "grad_norm": 0.45703125, "learning_rate": 4.970846111589979e-06, "loss": 2.3466, "step": 7195 }, { "epoch": 0.38605150214592276, "grad_norm": 0.419921875, "learning_rate": 4.9708328811403475e-06, "loss": 2.389, "step": 7196 }, { "epoch": 0.3861051502145923, "grad_norm": 0.435546875, "learning_rate": 4.97081964770693e-06, "loss": 2.2325, "step": 7197 }, { "epoch": 0.38615879828326183, "grad_norm": 0.3984375, "learning_rate": 4.9708064112897416e-06, "loss": 2.3294, "step": 7198 }, { "epoch": 0.3862124463519313, "grad_norm": 0.369140625, "learning_rate": 4.970793171888798e-06, "loss": 2.1591, "step": 7199 }, { "epoch": 0.38626609442060084, "grad_norm": 0.3984375, "learning_rate": 4.9707799295041145e-06, "loss": 2.4702, "step": 7200 }, { "epoch": 0.38631974248927037, "grad_norm": 0.42578125, "learning_rate": 4.970766684135709e-06, "loss": 2.2485, "step": 7201 }, { "epoch": 0.3863733905579399, "grad_norm": 0.435546875, "learning_rate": 4.970753435783596e-06, "loss": 2.277, "step": 7202 }, { "epoch": 0.38642703862660943, "grad_norm": 0.447265625, "learning_rate": 4.970740184447791e-06, "loss": 2.2314, "step": 7203 }, { "epoch": 0.38648068669527896, "grad_norm": 0.4765625, "learning_rate": 4.9707269301283125e-06, "loss": 2.0534, "step": 7204 }, { "epoch": 0.3865343347639485, "grad_norm": 0.4765625, "learning_rate": 4.970713672825174e-06, "loss": 2.5011, "step": 7205 }, { "epoch": 0.386587982832618, "grad_norm": 0.40625, "learning_rate": 4.970700412538393e-06, "loss": 2.2704, "step": 7206 }, { "epoch": 0.38664163090128756, "grad_norm": 0.380859375, "learning_rate": 4.970687149267984e-06, "loss": 2.4894, "step": 7207 }, { "epoch": 0.3866952789699571, "grad_norm": 0.48046875, "learning_rate": 4.970673883013964e-06, "loss": 2.4007, "step": 7208 }, { "epoch": 0.3867489270386266, "grad_norm": 0.435546875, "learning_rate": 4.97066061377635e-06, "loss": 2.2059, "step": 7209 }, { "epoch": 0.38680257510729615, "grad_norm": 0.40234375, "learning_rate": 4.9706473415551556e-06, "loss": 2.2692, "step": 7210 }, { "epoch": 0.3868562231759657, "grad_norm": 0.4296875, "learning_rate": 4.9706340663504e-06, "loss": 2.1618, "step": 7211 }, { "epoch": 0.3869098712446352, "grad_norm": 0.4140625, "learning_rate": 4.970620788162096e-06, "loss": 2.2394, "step": 7212 }, { "epoch": 0.3869635193133047, "grad_norm": 0.33984375, "learning_rate": 4.970607506990261e-06, "loss": 2.1964, "step": 7213 }, { "epoch": 0.3870171673819742, "grad_norm": 0.51171875, "learning_rate": 4.970594222834911e-06, "loss": 2.3603, "step": 7214 }, { "epoch": 0.38707081545064376, "grad_norm": 0.361328125, "learning_rate": 4.970580935696063e-06, "loss": 2.1607, "step": 7215 }, { "epoch": 0.3871244635193133, "grad_norm": 0.3984375, "learning_rate": 4.970567645573732e-06, "loss": 2.2467, "step": 7216 }, { "epoch": 0.3871781115879828, "grad_norm": 0.375, "learning_rate": 4.970554352467935e-06, "loss": 2.4177, "step": 7217 }, { "epoch": 0.38723175965665235, "grad_norm": 0.515625, "learning_rate": 4.970541056378686e-06, "loss": 2.2152, "step": 7218 }, { "epoch": 0.3872854077253219, "grad_norm": 0.447265625, "learning_rate": 4.970527757306003e-06, "loss": 2.3034, "step": 7219 }, { "epoch": 0.3873390557939914, "grad_norm": 0.828125, "learning_rate": 4.970514455249901e-06, "loss": 1.3975, "step": 7220 }, { "epoch": 0.38739270386266095, "grad_norm": 0.4140625, "learning_rate": 4.9705011502103965e-06, "loss": 2.5222, "step": 7221 }, { "epoch": 0.3874463519313305, "grad_norm": 0.478515625, "learning_rate": 4.970487842187506e-06, "loss": 2.044, "step": 7222 }, { "epoch": 0.3875, "grad_norm": 0.41796875, "learning_rate": 4.970474531181245e-06, "loss": 1.9152, "step": 7223 }, { "epoch": 0.38755364806866954, "grad_norm": 0.37890625, "learning_rate": 4.97046121719163e-06, "loss": 2.3908, "step": 7224 }, { "epoch": 0.3876072961373391, "grad_norm": 0.46484375, "learning_rate": 4.970447900218676e-06, "loss": 1.8052, "step": 7225 }, { "epoch": 0.3876609442060086, "grad_norm": 0.416015625, "learning_rate": 4.9704345802624e-06, "loss": 2.2304, "step": 7226 }, { "epoch": 0.38771459227467814, "grad_norm": 0.388671875, "learning_rate": 4.970421257322818e-06, "loss": 2.3354, "step": 7227 }, { "epoch": 0.3877682403433476, "grad_norm": 0.427734375, "learning_rate": 4.970407931399947e-06, "loss": 2.4083, "step": 7228 }, { "epoch": 0.38782188841201715, "grad_norm": 0.49609375, "learning_rate": 4.970394602493801e-06, "loss": 2.2387, "step": 7229 }, { "epoch": 0.3878755364806867, "grad_norm": 0.392578125, "learning_rate": 4.970381270604398e-06, "loss": 2.0629, "step": 7230 }, { "epoch": 0.3879291845493562, "grad_norm": 0.439453125, "learning_rate": 4.970367935731753e-06, "loss": 2.2259, "step": 7231 }, { "epoch": 0.38798283261802574, "grad_norm": 0.421875, "learning_rate": 4.9703545978758814e-06, "loss": 2.1824, "step": 7232 }, { "epoch": 0.3880364806866953, "grad_norm": 0.455078125, "learning_rate": 4.970341257036802e-06, "loss": 2.3738, "step": 7233 }, { "epoch": 0.3880901287553648, "grad_norm": 0.439453125, "learning_rate": 4.9703279132145285e-06, "loss": 1.8181, "step": 7234 }, { "epoch": 0.38814377682403434, "grad_norm": 0.4140625, "learning_rate": 4.970314566409077e-06, "loss": 2.3089, "step": 7235 }, { "epoch": 0.38819742489270387, "grad_norm": 0.42578125, "learning_rate": 4.970301216620466e-06, "loss": 2.3052, "step": 7236 }, { "epoch": 0.3882510729613734, "grad_norm": 0.431640625, "learning_rate": 4.970287863848709e-06, "loss": 2.3736, "step": 7237 }, { "epoch": 0.38830472103004293, "grad_norm": 0.36328125, "learning_rate": 4.970274508093823e-06, "loss": 2.1999, "step": 7238 }, { "epoch": 0.38835836909871246, "grad_norm": 0.451171875, "learning_rate": 4.970261149355824e-06, "loss": 2.1875, "step": 7239 }, { "epoch": 0.388412017167382, "grad_norm": 0.546875, "learning_rate": 4.970247787634729e-06, "loss": 2.1045, "step": 7240 }, { "epoch": 0.3884656652360515, "grad_norm": 0.3984375, "learning_rate": 4.970234422930553e-06, "loss": 2.3745, "step": 7241 }, { "epoch": 0.388519313304721, "grad_norm": 0.9609375, "learning_rate": 4.970221055243314e-06, "loss": 2.4308, "step": 7242 }, { "epoch": 0.38857296137339054, "grad_norm": 0.337890625, "learning_rate": 4.970207684573025e-06, "loss": 1.9982, "step": 7243 }, { "epoch": 0.38862660944206007, "grad_norm": 0.44921875, "learning_rate": 4.970194310919705e-06, "loss": 2.0566, "step": 7244 }, { "epoch": 0.3886802575107296, "grad_norm": 0.34375, "learning_rate": 4.970180934283369e-06, "loss": 1.6918, "step": 7245 }, { "epoch": 0.38873390557939913, "grad_norm": 0.48828125, "learning_rate": 4.970167554664033e-06, "loss": 2.5412, "step": 7246 }, { "epoch": 0.38878755364806866, "grad_norm": 0.439453125, "learning_rate": 4.9701541720617145e-06, "loss": 2.3166, "step": 7247 }, { "epoch": 0.3888412017167382, "grad_norm": 0.51171875, "learning_rate": 4.970140786476427e-06, "loss": 2.1394, "step": 7248 }, { "epoch": 0.3888948497854077, "grad_norm": 0.40625, "learning_rate": 4.970127397908189e-06, "loss": 2.2356, "step": 7249 }, { "epoch": 0.38894849785407726, "grad_norm": 0.455078125, "learning_rate": 4.970114006357015e-06, "loss": 2.2066, "step": 7250 }, { "epoch": 0.3890021459227468, "grad_norm": 0.76953125, "learning_rate": 4.970100611822924e-06, "loss": 2.0629, "step": 7251 }, { "epoch": 0.3890557939914163, "grad_norm": 0.54296875, "learning_rate": 4.9700872143059285e-06, "loss": 2.3704, "step": 7252 }, { "epoch": 0.38910944206008585, "grad_norm": 0.40234375, "learning_rate": 4.9700738138060466e-06, "loss": 2.3735, "step": 7253 }, { "epoch": 0.3891630901287554, "grad_norm": 0.5390625, "learning_rate": 4.970060410323295e-06, "loss": 2.3912, "step": 7254 }, { "epoch": 0.3892167381974249, "grad_norm": 0.439453125, "learning_rate": 4.970047003857689e-06, "loss": 2.2663, "step": 7255 }, { "epoch": 0.38927038626609445, "grad_norm": 0.4140625, "learning_rate": 4.970033594409244e-06, "loss": 1.9004, "step": 7256 }, { "epoch": 0.3893240343347639, "grad_norm": 0.35546875, "learning_rate": 4.970020181977978e-06, "loss": 2.2093, "step": 7257 }, { "epoch": 0.38937768240343346, "grad_norm": 0.373046875, "learning_rate": 4.970006766563906e-06, "loss": 2.4144, "step": 7258 }, { "epoch": 0.389431330472103, "grad_norm": 0.384765625, "learning_rate": 4.969993348167046e-06, "loss": 2.3434, "step": 7259 }, { "epoch": 0.3894849785407725, "grad_norm": 0.451171875, "learning_rate": 4.969979926787411e-06, "loss": 2.4466, "step": 7260 }, { "epoch": 0.38953862660944205, "grad_norm": 0.458984375, "learning_rate": 4.96996650242502e-06, "loss": 2.4355, "step": 7261 }, { "epoch": 0.3895922746781116, "grad_norm": 0.34765625, "learning_rate": 4.969953075079887e-06, "loss": 2.3112, "step": 7262 }, { "epoch": 0.3896459227467811, "grad_norm": 0.376953125, "learning_rate": 4.969939644752031e-06, "loss": 2.235, "step": 7263 }, { "epoch": 0.38969957081545065, "grad_norm": 0.3984375, "learning_rate": 4.969926211441466e-06, "loss": 2.0678, "step": 7264 }, { "epoch": 0.3897532188841202, "grad_norm": 0.46484375, "learning_rate": 4.969912775148209e-06, "loss": 2.3609, "step": 7265 }, { "epoch": 0.3898068669527897, "grad_norm": 0.419921875, "learning_rate": 4.969899335872276e-06, "loss": 2.0462, "step": 7266 }, { "epoch": 0.38986051502145924, "grad_norm": 0.498046875, "learning_rate": 4.969885893613683e-06, "loss": 2.344, "step": 7267 }, { "epoch": 0.3899141630901288, "grad_norm": 0.4921875, "learning_rate": 4.969872448372447e-06, "loss": 2.4514, "step": 7268 }, { "epoch": 0.3899678111587983, "grad_norm": 0.36328125, "learning_rate": 4.969859000148583e-06, "loss": 2.3005, "step": 7269 }, { "epoch": 0.39002145922746784, "grad_norm": 1.1796875, "learning_rate": 4.969845548942108e-06, "loss": 2.5793, "step": 7270 }, { "epoch": 0.3900751072961373, "grad_norm": 0.40234375, "learning_rate": 4.969832094753039e-06, "loss": 2.3775, "step": 7271 }, { "epoch": 0.39012875536480685, "grad_norm": 0.41796875, "learning_rate": 4.9698186375813916e-06, "loss": 2.0881, "step": 7272 }, { "epoch": 0.3901824034334764, "grad_norm": 0.42578125, "learning_rate": 4.9698051774271815e-06, "loss": 2.3244, "step": 7273 }, { "epoch": 0.3902360515021459, "grad_norm": 0.38671875, "learning_rate": 4.969791714290425e-06, "loss": 2.4709, "step": 7274 }, { "epoch": 0.39028969957081544, "grad_norm": 0.451171875, "learning_rate": 4.96977824817114e-06, "loss": 2.196, "step": 7275 }, { "epoch": 0.39034334763948497, "grad_norm": 0.3984375, "learning_rate": 4.96976477906934e-06, "loss": 2.2223, "step": 7276 }, { "epoch": 0.3903969957081545, "grad_norm": 0.42578125, "learning_rate": 4.969751306985045e-06, "loss": 2.3744, "step": 7277 }, { "epoch": 0.39045064377682404, "grad_norm": 0.41015625, "learning_rate": 4.9697378319182665e-06, "loss": 2.36, "step": 7278 }, { "epoch": 0.39050429184549357, "grad_norm": 0.451171875, "learning_rate": 4.969724353869025e-06, "loss": 1.4826, "step": 7279 }, { "epoch": 0.3905579399141631, "grad_norm": 0.333984375, "learning_rate": 4.969710872837334e-06, "loss": 1.8932, "step": 7280 }, { "epoch": 0.39061158798283263, "grad_norm": 0.35546875, "learning_rate": 4.969697388823212e-06, "loss": 2.1662, "step": 7281 }, { "epoch": 0.39066523605150216, "grad_norm": 0.376953125, "learning_rate": 4.969683901826674e-06, "loss": 2.3805, "step": 7282 }, { "epoch": 0.3907188841201717, "grad_norm": 0.439453125, "learning_rate": 4.9696704118477355e-06, "loss": 2.5809, "step": 7283 }, { "epoch": 0.3907725321888412, "grad_norm": 0.84375, "learning_rate": 4.9696569188864156e-06, "loss": 2.4046, "step": 7284 }, { "epoch": 0.3908261802575107, "grad_norm": 0.412109375, "learning_rate": 4.969643422942727e-06, "loss": 2.3834, "step": 7285 }, { "epoch": 0.39087982832618023, "grad_norm": 0.458984375, "learning_rate": 4.969629924016688e-06, "loss": 2.4188, "step": 7286 }, { "epoch": 0.39093347639484977, "grad_norm": 0.390625, "learning_rate": 4.9696164221083155e-06, "loss": 2.0414, "step": 7287 }, { "epoch": 0.3909871244635193, "grad_norm": 0.45703125, "learning_rate": 4.969602917217624e-06, "loss": 2.3513, "step": 7288 }, { "epoch": 0.39104077253218883, "grad_norm": 0.408203125, "learning_rate": 4.969589409344632e-06, "loss": 2.4017, "step": 7289 }, { "epoch": 0.39109442060085836, "grad_norm": 0.38671875, "learning_rate": 4.969575898489352e-06, "loss": 2.3466, "step": 7290 }, { "epoch": 0.3911480686695279, "grad_norm": 0.365234375, "learning_rate": 4.969562384651806e-06, "loss": 2.1313, "step": 7291 }, { "epoch": 0.3912017167381974, "grad_norm": 0.392578125, "learning_rate": 4.969548867832006e-06, "loss": 2.2739, "step": 7292 }, { "epoch": 0.39125536480686696, "grad_norm": 0.390625, "learning_rate": 4.969535348029969e-06, "loss": 2.3615, "step": 7293 }, { "epoch": 0.3913090128755365, "grad_norm": 0.6171875, "learning_rate": 4.969521825245712e-06, "loss": 2.2908, "step": 7294 }, { "epoch": 0.391362660944206, "grad_norm": 0.50390625, "learning_rate": 4.969508299479252e-06, "loss": 2.5669, "step": 7295 }, { "epoch": 0.39141630901287555, "grad_norm": 0.37109375, "learning_rate": 4.969494770730604e-06, "loss": 2.2607, "step": 7296 }, { "epoch": 0.3914699570815451, "grad_norm": 0.396484375, "learning_rate": 4.969481238999785e-06, "loss": 2.3488, "step": 7297 }, { "epoch": 0.3915236051502146, "grad_norm": 0.39453125, "learning_rate": 4.969467704286811e-06, "loss": 1.9909, "step": 7298 }, { "epoch": 0.39157725321888415, "grad_norm": 0.478515625, "learning_rate": 4.969454166591699e-06, "loss": 2.3813, "step": 7299 }, { "epoch": 0.3916309012875536, "grad_norm": 0.408203125, "learning_rate": 4.969440625914463e-06, "loss": 2.44, "step": 7300 }, { "epoch": 0.39168454935622316, "grad_norm": 0.400390625, "learning_rate": 4.969427082255123e-06, "loss": 1.729, "step": 7301 }, { "epoch": 0.3917381974248927, "grad_norm": 0.373046875, "learning_rate": 4.969413535613694e-06, "loss": 2.5105, "step": 7302 }, { "epoch": 0.3917918454935622, "grad_norm": 0.404296875, "learning_rate": 4.969399985990191e-06, "loss": 2.2666, "step": 7303 }, { "epoch": 0.39184549356223175, "grad_norm": 0.41015625, "learning_rate": 4.969386433384631e-06, "loss": 2.3775, "step": 7304 }, { "epoch": 0.3918991416309013, "grad_norm": 0.392578125, "learning_rate": 4.969372877797032e-06, "loss": 2.2873, "step": 7305 }, { "epoch": 0.3919527896995708, "grad_norm": 0.5078125, "learning_rate": 4.969359319227408e-06, "loss": 1.8637, "step": 7306 }, { "epoch": 0.39200643776824035, "grad_norm": 0.412109375, "learning_rate": 4.969345757675777e-06, "loss": 2.2206, "step": 7307 }, { "epoch": 0.3920600858369099, "grad_norm": 0.400390625, "learning_rate": 4.969332193142154e-06, "loss": 2.311, "step": 7308 }, { "epoch": 0.3921137339055794, "grad_norm": 0.390625, "learning_rate": 4.969318625626557e-06, "loss": 2.4605, "step": 7309 }, { "epoch": 0.39216738197424894, "grad_norm": 0.376953125, "learning_rate": 4.969305055129002e-06, "loss": 2.3191, "step": 7310 }, { "epoch": 0.3922210300429185, "grad_norm": 0.4375, "learning_rate": 4.969291481649504e-06, "loss": 2.434, "step": 7311 }, { "epoch": 0.392274678111588, "grad_norm": 0.388671875, "learning_rate": 4.969277905188081e-06, "loss": 2.4169, "step": 7312 }, { "epoch": 0.39232832618025754, "grad_norm": 0.408203125, "learning_rate": 4.969264325744748e-06, "loss": 2.4501, "step": 7313 }, { "epoch": 0.392381974248927, "grad_norm": 30.75, "learning_rate": 4.969250743319523e-06, "loss": 2.2282, "step": 7314 }, { "epoch": 0.39243562231759654, "grad_norm": 0.30859375, "learning_rate": 4.969237157912421e-06, "loss": 2.1037, "step": 7315 }, { "epoch": 0.3924892703862661, "grad_norm": 0.80859375, "learning_rate": 4.969223569523459e-06, "loss": 1.9681, "step": 7316 }, { "epoch": 0.3925429184549356, "grad_norm": 0.52734375, "learning_rate": 4.969209978152653e-06, "loss": 2.3258, "step": 7317 }, { "epoch": 0.39259656652360514, "grad_norm": 0.46875, "learning_rate": 4.96919638380002e-06, "loss": 2.797, "step": 7318 }, { "epoch": 0.39265021459227467, "grad_norm": 0.423828125, "learning_rate": 4.969182786465577e-06, "loss": 2.3412, "step": 7319 }, { "epoch": 0.3927038626609442, "grad_norm": 0.6015625, "learning_rate": 4.969169186149338e-06, "loss": 2.4259, "step": 7320 }, { "epoch": 0.39275751072961373, "grad_norm": 0.37109375, "learning_rate": 4.969155582851323e-06, "loss": 2.0552, "step": 7321 }, { "epoch": 0.39281115879828327, "grad_norm": 0.36328125, "learning_rate": 4.969141976571545e-06, "loss": 2.3111, "step": 7322 }, { "epoch": 0.3928648068669528, "grad_norm": 0.455078125, "learning_rate": 4.969128367310023e-06, "loss": 2.5277, "step": 7323 }, { "epoch": 0.39291845493562233, "grad_norm": 0.384765625, "learning_rate": 4.9691147550667714e-06, "loss": 2.1252, "step": 7324 }, { "epoch": 0.39297210300429186, "grad_norm": 0.42578125, "learning_rate": 4.969101139841808e-06, "loss": 2.102, "step": 7325 }, { "epoch": 0.3930257510729614, "grad_norm": 0.41015625, "learning_rate": 4.969087521635149e-06, "loss": 2.2946, "step": 7326 }, { "epoch": 0.3930793991416309, "grad_norm": 0.390625, "learning_rate": 4.96907390044681e-06, "loss": 2.1636, "step": 7327 }, { "epoch": 0.3931330472103004, "grad_norm": 0.451171875, "learning_rate": 4.9690602762768086e-06, "loss": 2.4976, "step": 7328 }, { "epoch": 0.39318669527896993, "grad_norm": 0.3671875, "learning_rate": 4.969046649125161e-06, "loss": 2.1733, "step": 7329 }, { "epoch": 0.39324034334763946, "grad_norm": 0.474609375, "learning_rate": 4.969033018991883e-06, "loss": 1.5897, "step": 7330 }, { "epoch": 0.393293991416309, "grad_norm": 0.376953125, "learning_rate": 4.969019385876991e-06, "loss": 2.1793, "step": 7331 }, { "epoch": 0.39334763948497853, "grad_norm": 0.373046875, "learning_rate": 4.9690057497805025e-06, "loss": 2.2858, "step": 7332 }, { "epoch": 0.39340128755364806, "grad_norm": 0.3828125, "learning_rate": 4.968992110702434e-06, "loss": 2.3371, "step": 7333 }, { "epoch": 0.3934549356223176, "grad_norm": 0.392578125, "learning_rate": 4.9689784686428e-06, "loss": 2.3665, "step": 7334 }, { "epoch": 0.3935085836909871, "grad_norm": 0.40625, "learning_rate": 4.96896482360162e-06, "loss": 2.3282, "step": 7335 }, { "epoch": 0.39356223175965666, "grad_norm": 0.392578125, "learning_rate": 4.968951175578908e-06, "loss": 2.4321, "step": 7336 }, { "epoch": 0.3936158798283262, "grad_norm": 0.40625, "learning_rate": 4.968937524574681e-06, "loss": 2.195, "step": 7337 }, { "epoch": 0.3936695278969957, "grad_norm": 0.412109375, "learning_rate": 4.968923870588955e-06, "loss": 2.4622, "step": 7338 }, { "epoch": 0.39372317596566525, "grad_norm": 0.341796875, "learning_rate": 4.968910213621749e-06, "loss": 2.0999, "step": 7339 }, { "epoch": 0.3937768240343348, "grad_norm": 0.4296875, "learning_rate": 4.9688965536730774e-06, "loss": 2.2574, "step": 7340 }, { "epoch": 0.3938304721030043, "grad_norm": 0.40625, "learning_rate": 4.968882890742957e-06, "loss": 2.3325, "step": 7341 }, { "epoch": 0.39388412017167385, "grad_norm": 0.486328125, "learning_rate": 4.968869224831404e-06, "loss": 2.2449, "step": 7342 }, { "epoch": 0.3939377682403433, "grad_norm": 0.453125, "learning_rate": 4.968855555938435e-06, "loss": 2.4122, "step": 7343 }, { "epoch": 0.39399141630901285, "grad_norm": 0.490234375, "learning_rate": 4.9688418840640675e-06, "loss": 2.131, "step": 7344 }, { "epoch": 0.3940450643776824, "grad_norm": 0.318359375, "learning_rate": 4.968828209208316e-06, "loss": 2.0677, "step": 7345 }, { "epoch": 0.3940987124463519, "grad_norm": 0.470703125, "learning_rate": 4.9688145313711995e-06, "loss": 2.3562, "step": 7346 }, { "epoch": 0.39415236051502145, "grad_norm": 0.431640625, "learning_rate": 4.968800850552733e-06, "loss": 2.3312, "step": 7347 }, { "epoch": 0.394206008583691, "grad_norm": 0.38671875, "learning_rate": 4.968787166752934e-06, "loss": 2.192, "step": 7348 }, { "epoch": 0.3942596566523605, "grad_norm": 0.44921875, "learning_rate": 4.968773479971818e-06, "loss": 2.4159, "step": 7349 }, { "epoch": 0.39431330472103004, "grad_norm": 0.486328125, "learning_rate": 4.968759790209401e-06, "loss": 2.1575, "step": 7350 }, { "epoch": 0.3943669527896996, "grad_norm": 0.45703125, "learning_rate": 4.968746097465701e-06, "loss": 2.3264, "step": 7351 }, { "epoch": 0.3944206008583691, "grad_norm": 0.40234375, "learning_rate": 4.968732401740734e-06, "loss": 2.116, "step": 7352 }, { "epoch": 0.39447424892703864, "grad_norm": 0.439453125, "learning_rate": 4.968718703034517e-06, "loss": 2.0924, "step": 7353 }, { "epoch": 0.39452789699570817, "grad_norm": 0.365234375, "learning_rate": 4.968705001347065e-06, "loss": 2.234, "step": 7354 }, { "epoch": 0.3945815450643777, "grad_norm": 0.4296875, "learning_rate": 4.9686912966783965e-06, "loss": 2.1706, "step": 7355 }, { "epoch": 0.39463519313304724, "grad_norm": 0.404296875, "learning_rate": 4.9686775890285275e-06, "loss": 2.358, "step": 7356 }, { "epoch": 0.3946888412017167, "grad_norm": 0.4296875, "learning_rate": 4.968663878397473e-06, "loss": 2.2694, "step": 7357 }, { "epoch": 0.39474248927038624, "grad_norm": 0.46484375, "learning_rate": 4.968650164785252e-06, "loss": 2.3791, "step": 7358 }, { "epoch": 0.3947961373390558, "grad_norm": 0.408203125, "learning_rate": 4.968636448191878e-06, "loss": 2.4247, "step": 7359 }, { "epoch": 0.3948497854077253, "grad_norm": 0.412109375, "learning_rate": 4.968622728617371e-06, "loss": 2.1346, "step": 7360 }, { "epoch": 0.39490343347639484, "grad_norm": 0.373046875, "learning_rate": 4.968609006061745e-06, "loss": 2.4299, "step": 7361 }, { "epoch": 0.39495708154506437, "grad_norm": 0.3671875, "learning_rate": 4.968595280525018e-06, "loss": 2.2468, "step": 7362 }, { "epoch": 0.3950107296137339, "grad_norm": 0.39453125, "learning_rate": 4.968581552007206e-06, "loss": 2.2949, "step": 7363 }, { "epoch": 0.39506437768240343, "grad_norm": 0.404296875, "learning_rate": 4.9685678205083255e-06, "loss": 2.388, "step": 7364 }, { "epoch": 0.39511802575107297, "grad_norm": 0.53515625, "learning_rate": 4.968554086028394e-06, "loss": 2.2039, "step": 7365 }, { "epoch": 0.3951716738197425, "grad_norm": 0.380859375, "learning_rate": 4.968540348567427e-06, "loss": 2.5327, "step": 7366 }, { "epoch": 0.39522532188841203, "grad_norm": 0.388671875, "learning_rate": 4.96852660812544e-06, "loss": 2.3383, "step": 7367 }, { "epoch": 0.39527896995708156, "grad_norm": 0.431640625, "learning_rate": 4.968512864702453e-06, "loss": 2.4501, "step": 7368 }, { "epoch": 0.3953326180257511, "grad_norm": 0.40625, "learning_rate": 4.9684991182984795e-06, "loss": 2.3988, "step": 7369 }, { "epoch": 0.3953862660944206, "grad_norm": 0.47265625, "learning_rate": 4.968485368913538e-06, "loss": 2.301, "step": 7370 }, { "epoch": 0.39543991416309016, "grad_norm": 0.44921875, "learning_rate": 4.9684716165476435e-06, "loss": 1.7452, "step": 7371 }, { "epoch": 0.39549356223175963, "grad_norm": 0.380859375, "learning_rate": 4.968457861200814e-06, "loss": 2.3989, "step": 7372 }, { "epoch": 0.39554721030042916, "grad_norm": 0.462890625, "learning_rate": 4.9684441028730655e-06, "loss": 2.319, "step": 7373 }, { "epoch": 0.3956008583690987, "grad_norm": 0.408203125, "learning_rate": 4.968430341564414e-06, "loss": 2.0956, "step": 7374 }, { "epoch": 0.3956545064377682, "grad_norm": 0.373046875, "learning_rate": 4.9684165772748775e-06, "loss": 2.2778, "step": 7375 }, { "epoch": 0.39570815450643776, "grad_norm": 0.40234375, "learning_rate": 4.968402810004471e-06, "loss": 2.3372, "step": 7376 }, { "epoch": 0.3957618025751073, "grad_norm": 0.384765625, "learning_rate": 4.968389039753213e-06, "loss": 1.8414, "step": 7377 }, { "epoch": 0.3958154506437768, "grad_norm": 0.423828125, "learning_rate": 4.968375266521119e-06, "loss": 2.3768, "step": 7378 }, { "epoch": 0.39586909871244635, "grad_norm": 0.423828125, "learning_rate": 4.968361490308205e-06, "loss": 2.3757, "step": 7379 }, { "epoch": 0.3959227467811159, "grad_norm": 0.439453125, "learning_rate": 4.9683477111144895e-06, "loss": 2.2917, "step": 7380 }, { "epoch": 0.3959763948497854, "grad_norm": 0.44140625, "learning_rate": 4.968333928939987e-06, "loss": 2.563, "step": 7381 }, { "epoch": 0.39603004291845495, "grad_norm": 0.57421875, "learning_rate": 4.968320143784716e-06, "loss": 2.3342, "step": 7382 }, { "epoch": 0.3960836909871245, "grad_norm": 0.375, "learning_rate": 4.968306355648692e-06, "loss": 2.183, "step": 7383 }, { "epoch": 0.396137339055794, "grad_norm": 0.6015625, "learning_rate": 4.9682925645319325e-06, "loss": 2.1655, "step": 7384 }, { "epoch": 0.39619098712446355, "grad_norm": 0.375, "learning_rate": 4.968278770434453e-06, "loss": 2.4049, "step": 7385 }, { "epoch": 0.396244635193133, "grad_norm": 0.46875, "learning_rate": 4.968264973356271e-06, "loss": 2.2691, "step": 7386 }, { "epoch": 0.39629828326180255, "grad_norm": 0.365234375, "learning_rate": 4.968251173297403e-06, "loss": 2.3374, "step": 7387 }, { "epoch": 0.3963519313304721, "grad_norm": 3.078125, "learning_rate": 4.968237370257866e-06, "loss": 2.441, "step": 7388 }, { "epoch": 0.3964055793991416, "grad_norm": 0.46875, "learning_rate": 4.968223564237675e-06, "loss": 1.3755, "step": 7389 }, { "epoch": 0.39645922746781115, "grad_norm": 0.392578125, "learning_rate": 4.968209755236849e-06, "loss": 2.3905, "step": 7390 }, { "epoch": 0.3965128755364807, "grad_norm": 0.3203125, "learning_rate": 4.968195943255404e-06, "loss": 2.1306, "step": 7391 }, { "epoch": 0.3965665236051502, "grad_norm": 0.431640625, "learning_rate": 4.968182128293355e-06, "loss": 2.1477, "step": 7392 }, { "epoch": 0.39662017167381974, "grad_norm": 0.47265625, "learning_rate": 4.968168310350721e-06, "loss": 2.3758, "step": 7393 }, { "epoch": 0.3966738197424893, "grad_norm": 0.408203125, "learning_rate": 4.968154489427518e-06, "loss": 2.2042, "step": 7394 }, { "epoch": 0.3967274678111588, "grad_norm": 0.58203125, "learning_rate": 4.9681406655237615e-06, "loss": 2.4393, "step": 7395 }, { "epoch": 0.39678111587982834, "grad_norm": 0.65625, "learning_rate": 4.96812683863947e-06, "loss": 2.3782, "step": 7396 }, { "epoch": 0.39683476394849787, "grad_norm": 0.4375, "learning_rate": 4.968113008774659e-06, "loss": 2.2697, "step": 7397 }, { "epoch": 0.3968884120171674, "grad_norm": 0.423828125, "learning_rate": 4.968099175929345e-06, "loss": 2.2104, "step": 7398 }, { "epoch": 0.39694206008583693, "grad_norm": 0.380859375, "learning_rate": 4.968085340103545e-06, "loss": 2.289, "step": 7399 }, { "epoch": 0.3969957081545064, "grad_norm": 0.365234375, "learning_rate": 4.968071501297277e-06, "loss": 2.2424, "step": 7400 }, { "epoch": 0.39704935622317594, "grad_norm": 0.4453125, "learning_rate": 4.968057659510556e-06, "loss": 2.1998, "step": 7401 }, { "epoch": 0.3971030042918455, "grad_norm": 0.412109375, "learning_rate": 4.9680438147434e-06, "loss": 2.1687, "step": 7402 }, { "epoch": 0.397156652360515, "grad_norm": 0.89453125, "learning_rate": 4.968029966995823e-06, "loss": 1.4124, "step": 7403 }, { "epoch": 0.39721030042918454, "grad_norm": 0.421875, "learning_rate": 4.9680161162678455e-06, "loss": 2.1902, "step": 7404 }, { "epoch": 0.39726394849785407, "grad_norm": 0.392578125, "learning_rate": 4.968002262559483e-06, "loss": 2.0362, "step": 7405 }, { "epoch": 0.3973175965665236, "grad_norm": 0.439453125, "learning_rate": 4.96798840587075e-06, "loss": 2.3283, "step": 7406 }, { "epoch": 0.39737124463519313, "grad_norm": 0.4140625, "learning_rate": 4.967974546201666e-06, "loss": 2.2574, "step": 7407 }, { "epoch": 0.39742489270386266, "grad_norm": 0.37109375, "learning_rate": 4.967960683552246e-06, "loss": 2.2498, "step": 7408 }, { "epoch": 0.3974785407725322, "grad_norm": 0.373046875, "learning_rate": 4.967946817922508e-06, "loss": 2.2291, "step": 7409 }, { "epoch": 0.39753218884120173, "grad_norm": 0.73046875, "learning_rate": 4.967932949312468e-06, "loss": 2.2969, "step": 7410 }, { "epoch": 0.39758583690987126, "grad_norm": 0.388671875, "learning_rate": 4.967919077722143e-06, "loss": 2.3941, "step": 7411 }, { "epoch": 0.3976394849785408, "grad_norm": 0.369140625, "learning_rate": 4.967905203151549e-06, "loss": 2.3276, "step": 7412 }, { "epoch": 0.3976931330472103, "grad_norm": 0.40234375, "learning_rate": 4.967891325600705e-06, "loss": 2.4569, "step": 7413 }, { "epoch": 0.39774678111587985, "grad_norm": 0.435546875, "learning_rate": 4.967877445069626e-06, "loss": 2.4053, "step": 7414 }, { "epoch": 0.39780042918454933, "grad_norm": 0.392578125, "learning_rate": 4.9678635615583274e-06, "loss": 2.3278, "step": 7415 }, { "epoch": 0.39785407725321886, "grad_norm": 0.490234375, "learning_rate": 4.967849675066828e-06, "loss": 2.3218, "step": 7416 }, { "epoch": 0.3979077253218884, "grad_norm": 0.455078125, "learning_rate": 4.967835785595145e-06, "loss": 2.625, "step": 7417 }, { "epoch": 0.3979613733905579, "grad_norm": 0.6171875, "learning_rate": 4.967821893143294e-06, "loss": 1.8014, "step": 7418 }, { "epoch": 0.39801502145922746, "grad_norm": 0.390625, "learning_rate": 4.967807997711291e-06, "loss": 2.1269, "step": 7419 }, { "epoch": 0.398068669527897, "grad_norm": 0.515625, "learning_rate": 4.967794099299155e-06, "loss": 2.4252, "step": 7420 }, { "epoch": 0.3981223175965665, "grad_norm": 0.375, "learning_rate": 4.9677801979069006e-06, "loss": 2.4486, "step": 7421 }, { "epoch": 0.39817596566523605, "grad_norm": 0.40625, "learning_rate": 4.967766293534546e-06, "loss": 2.3396, "step": 7422 }, { "epoch": 0.3982296137339056, "grad_norm": 0.46875, "learning_rate": 4.967752386182108e-06, "loss": 2.3152, "step": 7423 }, { "epoch": 0.3982832618025751, "grad_norm": 0.447265625, "learning_rate": 4.967738475849603e-06, "loss": 2.2264, "step": 7424 }, { "epoch": 0.39833690987124465, "grad_norm": 0.4921875, "learning_rate": 4.9677245625370475e-06, "loss": 2.231, "step": 7425 }, { "epoch": 0.3983905579399142, "grad_norm": 0.41015625, "learning_rate": 4.967710646244458e-06, "loss": 2.1343, "step": 7426 }, { "epoch": 0.3984442060085837, "grad_norm": 0.431640625, "learning_rate": 4.967696726971853e-06, "loss": 2.2769, "step": 7427 }, { "epoch": 0.39849785407725324, "grad_norm": 0.453125, "learning_rate": 4.967682804719247e-06, "loss": 2.1541, "step": 7428 }, { "epoch": 0.3985515021459227, "grad_norm": 0.7109375, "learning_rate": 4.967668879486659e-06, "loss": 2.1952, "step": 7429 }, { "epoch": 0.39860515021459225, "grad_norm": 0.427734375, "learning_rate": 4.967654951274104e-06, "loss": 2.3439, "step": 7430 }, { "epoch": 0.3986587982832618, "grad_norm": 0.416015625, "learning_rate": 4.9676410200816e-06, "loss": 2.4244, "step": 7431 }, { "epoch": 0.3987124463519313, "grad_norm": 0.484375, "learning_rate": 4.967627085909163e-06, "loss": 2.481, "step": 7432 }, { "epoch": 0.39876609442060085, "grad_norm": 0.51171875, "learning_rate": 4.967613148756812e-06, "loss": 2.3418, "step": 7433 }, { "epoch": 0.3988197424892704, "grad_norm": 0.3984375, "learning_rate": 4.96759920862456e-06, "loss": 1.6314, "step": 7434 }, { "epoch": 0.3988733905579399, "grad_norm": 0.40234375, "learning_rate": 4.967585265512427e-06, "loss": 2.1913, "step": 7435 }, { "epoch": 0.39892703862660944, "grad_norm": 0.396484375, "learning_rate": 4.9675713194204285e-06, "loss": 2.2063, "step": 7436 }, { "epoch": 0.398980686695279, "grad_norm": 0.380859375, "learning_rate": 4.967557370348581e-06, "loss": 2.2336, "step": 7437 }, { "epoch": 0.3990343347639485, "grad_norm": 0.392578125, "learning_rate": 4.967543418296903e-06, "loss": 2.354, "step": 7438 }, { "epoch": 0.39908798283261804, "grad_norm": 0.51953125, "learning_rate": 4.96752946326541e-06, "loss": 2.4519, "step": 7439 }, { "epoch": 0.39914163090128757, "grad_norm": 0.439453125, "learning_rate": 4.967515505254119e-06, "loss": 2.2544, "step": 7440 }, { "epoch": 0.3991952789699571, "grad_norm": 0.484375, "learning_rate": 4.967501544263048e-06, "loss": 2.3701, "step": 7441 }, { "epoch": 0.39924892703862663, "grad_norm": 0.37109375, "learning_rate": 4.967487580292212e-06, "loss": 2.4132, "step": 7442 }, { "epoch": 0.39930257510729616, "grad_norm": 0.443359375, "learning_rate": 4.967473613341629e-06, "loss": 2.2895, "step": 7443 }, { "epoch": 0.39935622317596564, "grad_norm": 0.328125, "learning_rate": 4.9674596434113155e-06, "loss": 2.0724, "step": 7444 }, { "epoch": 0.3994098712446352, "grad_norm": 0.431640625, "learning_rate": 4.967445670501288e-06, "loss": 2.4444, "step": 7445 }, { "epoch": 0.3994635193133047, "grad_norm": 0.40625, "learning_rate": 4.967431694611564e-06, "loss": 2.0914, "step": 7446 }, { "epoch": 0.39951716738197424, "grad_norm": 0.63671875, "learning_rate": 4.967417715742161e-06, "loss": 2.1023, "step": 7447 }, { "epoch": 0.39957081545064377, "grad_norm": 0.380859375, "learning_rate": 4.9674037338930946e-06, "loss": 2.3548, "step": 7448 }, { "epoch": 0.3996244635193133, "grad_norm": 0.4453125, "learning_rate": 4.967389749064382e-06, "loss": 2.485, "step": 7449 }, { "epoch": 0.39967811158798283, "grad_norm": 0.484375, "learning_rate": 4.967375761256041e-06, "loss": 2.0846, "step": 7450 }, { "epoch": 0.39973175965665236, "grad_norm": 0.361328125, "learning_rate": 4.9673617704680874e-06, "loss": 2.1224, "step": 7451 }, { "epoch": 0.3997854077253219, "grad_norm": 0.43359375, "learning_rate": 4.967347776700538e-06, "loss": 2.4797, "step": 7452 }, { "epoch": 0.3998390557939914, "grad_norm": 0.5625, "learning_rate": 4.967333779953411e-06, "loss": 2.2107, "step": 7453 }, { "epoch": 0.39989270386266096, "grad_norm": 0.3984375, "learning_rate": 4.967319780226722e-06, "loss": 2.2776, "step": 7454 }, { "epoch": 0.3999463519313305, "grad_norm": 0.34765625, "learning_rate": 4.967305777520488e-06, "loss": 2.1775, "step": 7455 }, { "epoch": 0.4, "grad_norm": 0.443359375, "learning_rate": 4.967291771834727e-06, "loss": 2.2956, "step": 7456 }, { "epoch": 0.40005364806866955, "grad_norm": 0.4140625, "learning_rate": 4.9672777631694555e-06, "loss": 2.1034, "step": 7457 }, { "epoch": 0.40010729613733903, "grad_norm": 0.458984375, "learning_rate": 4.967263751524689e-06, "loss": 2.2611, "step": 7458 }, { "epoch": 0.40016094420600856, "grad_norm": 0.75390625, "learning_rate": 4.9672497369004465e-06, "loss": 2.3016, "step": 7459 }, { "epoch": 0.4002145922746781, "grad_norm": 1.0078125, "learning_rate": 4.967235719296744e-06, "loss": 2.5395, "step": 7460 }, { "epoch": 0.4002682403433476, "grad_norm": 0.53515625, "learning_rate": 4.967221698713598e-06, "loss": 2.2762, "step": 7461 }, { "epoch": 0.40032188841201716, "grad_norm": 0.46484375, "learning_rate": 4.967207675151026e-06, "loss": 2.3356, "step": 7462 }, { "epoch": 0.4003755364806867, "grad_norm": 0.515625, "learning_rate": 4.967193648609044e-06, "loss": 1.7449, "step": 7463 }, { "epoch": 0.4004291845493562, "grad_norm": 0.671875, "learning_rate": 4.96717961908767e-06, "loss": 2.3367, "step": 7464 }, { "epoch": 0.40048283261802575, "grad_norm": 0.431640625, "learning_rate": 4.967165586586922e-06, "loss": 2.3295, "step": 7465 }, { "epoch": 0.4005364806866953, "grad_norm": 0.4296875, "learning_rate": 4.967151551106813e-06, "loss": 2.3868, "step": 7466 }, { "epoch": 0.4005901287553648, "grad_norm": 0.423828125, "learning_rate": 4.967137512647365e-06, "loss": 2.2897, "step": 7467 }, { "epoch": 0.40064377682403435, "grad_norm": 0.365234375, "learning_rate": 4.9671234712085916e-06, "loss": 2.0712, "step": 7468 }, { "epoch": 0.4006974248927039, "grad_norm": 0.390625, "learning_rate": 4.967109426790511e-06, "loss": 2.3328, "step": 7469 }, { "epoch": 0.4007510729613734, "grad_norm": 0.435546875, "learning_rate": 4.967095379393139e-06, "loss": 2.5105, "step": 7470 }, { "epoch": 0.40080472103004294, "grad_norm": 0.408203125, "learning_rate": 4.967081329016494e-06, "loss": 2.4181, "step": 7471 }, { "epoch": 0.4008583690987124, "grad_norm": 0.365234375, "learning_rate": 4.967067275660592e-06, "loss": 2.0838, "step": 7472 }, { "epoch": 0.40091201716738195, "grad_norm": 0.39453125, "learning_rate": 4.9670532193254505e-06, "loss": 2.1714, "step": 7473 }, { "epoch": 0.4009656652360515, "grad_norm": 0.4453125, "learning_rate": 4.967039160011087e-06, "loss": 2.4196, "step": 7474 }, { "epoch": 0.401019313304721, "grad_norm": 0.64453125, "learning_rate": 4.9670250977175165e-06, "loss": 2.1704, "step": 7475 }, { "epoch": 0.40107296137339055, "grad_norm": 0.5703125, "learning_rate": 4.967011032444758e-06, "loss": 2.1611, "step": 7476 }, { "epoch": 0.4011266094420601, "grad_norm": 0.578125, "learning_rate": 4.9669969641928275e-06, "loss": 2.2798, "step": 7477 }, { "epoch": 0.4011802575107296, "grad_norm": 0.408203125, "learning_rate": 4.966982892961742e-06, "loss": 2.3599, "step": 7478 }, { "epoch": 0.40123390557939914, "grad_norm": 0.6484375, "learning_rate": 4.966968818751518e-06, "loss": 2.4937, "step": 7479 }, { "epoch": 0.4012875536480687, "grad_norm": 0.443359375, "learning_rate": 4.966954741562176e-06, "loss": 2.4276, "step": 7480 }, { "epoch": 0.4013412017167382, "grad_norm": 0.61328125, "learning_rate": 4.966940661393728e-06, "loss": 2.147, "step": 7481 }, { "epoch": 0.40139484978540774, "grad_norm": 0.388671875, "learning_rate": 4.966926578246193e-06, "loss": 2.2426, "step": 7482 }, { "epoch": 0.40144849785407727, "grad_norm": 0.51171875, "learning_rate": 4.966912492119589e-06, "loss": 2.221, "step": 7483 }, { "epoch": 0.4015021459227468, "grad_norm": 0.345703125, "learning_rate": 4.966898403013932e-06, "loss": 2.3964, "step": 7484 }, { "epoch": 0.40155579399141633, "grad_norm": 0.427734375, "learning_rate": 4.966884310929239e-06, "loss": 2.3794, "step": 7485 }, { "epoch": 0.40160944206008586, "grad_norm": 0.494140625, "learning_rate": 4.966870215865528e-06, "loss": 1.5436, "step": 7486 }, { "epoch": 0.40166309012875534, "grad_norm": 0.9375, "learning_rate": 4.966856117822814e-06, "loss": 2.3031, "step": 7487 }, { "epoch": 0.40171673819742487, "grad_norm": 0.392578125, "learning_rate": 4.9668420168011165e-06, "loss": 2.1342, "step": 7488 }, { "epoch": 0.4017703862660944, "grad_norm": 0.32421875, "learning_rate": 4.966827912800451e-06, "loss": 1.9142, "step": 7489 }, { "epoch": 0.40182403433476394, "grad_norm": 0.39453125, "learning_rate": 4.966813805820835e-06, "loss": 2.2079, "step": 7490 }, { "epoch": 0.40187768240343347, "grad_norm": 0.43359375, "learning_rate": 4.966799695862285e-06, "loss": 2.265, "step": 7491 }, { "epoch": 0.401931330472103, "grad_norm": 0.375, "learning_rate": 4.966785582924819e-06, "loss": 2.329, "step": 7492 }, { "epoch": 0.40198497854077253, "grad_norm": 0.416015625, "learning_rate": 4.966771467008453e-06, "loss": 2.2872, "step": 7493 }, { "epoch": 0.40203862660944206, "grad_norm": 0.443359375, "learning_rate": 4.966757348113205e-06, "loss": 2.3151, "step": 7494 }, { "epoch": 0.4020922746781116, "grad_norm": 0.43359375, "learning_rate": 4.966743226239091e-06, "loss": 2.5184, "step": 7495 }, { "epoch": 0.4021459227467811, "grad_norm": 0.419921875, "learning_rate": 4.966729101386128e-06, "loss": 2.3053, "step": 7496 }, { "epoch": 0.40219957081545066, "grad_norm": 0.375, "learning_rate": 4.966714973554335e-06, "loss": 2.1647, "step": 7497 }, { "epoch": 0.4022532188841202, "grad_norm": 0.484375, "learning_rate": 4.966700842743728e-06, "loss": 2.4846, "step": 7498 }, { "epoch": 0.4023068669527897, "grad_norm": 4.46875, "learning_rate": 4.966686708954323e-06, "loss": 2.2385, "step": 7499 }, { "epoch": 0.40236051502145925, "grad_norm": 0.416015625, "learning_rate": 4.966672572186138e-06, "loss": 2.4243, "step": 7500 }, { "epoch": 0.40241416309012873, "grad_norm": 0.44921875, "learning_rate": 4.96665843243919e-06, "loss": 2.3233, "step": 7501 }, { "epoch": 0.40246781115879826, "grad_norm": 0.470703125, "learning_rate": 4.966644289713496e-06, "loss": 2.417, "step": 7502 }, { "epoch": 0.4025214592274678, "grad_norm": 0.482421875, "learning_rate": 4.9666301440090735e-06, "loss": 2.0871, "step": 7503 }, { "epoch": 0.4025751072961373, "grad_norm": 0.40625, "learning_rate": 4.966615995325939e-06, "loss": 2.3886, "step": 7504 }, { "epoch": 0.40262875536480686, "grad_norm": 0.8828125, "learning_rate": 4.9666018436641094e-06, "loss": 2.2989, "step": 7505 }, { "epoch": 0.4026824034334764, "grad_norm": 0.388671875, "learning_rate": 4.966587689023602e-06, "loss": 2.2712, "step": 7506 }, { "epoch": 0.4027360515021459, "grad_norm": 0.384765625, "learning_rate": 4.966573531404435e-06, "loss": 2.2151, "step": 7507 }, { "epoch": 0.40278969957081545, "grad_norm": 0.43359375, "learning_rate": 4.966559370806624e-06, "loss": 2.0082, "step": 7508 }, { "epoch": 0.402843347639485, "grad_norm": 0.466796875, "learning_rate": 4.966545207230187e-06, "loss": 2.3577, "step": 7509 }, { "epoch": 0.4028969957081545, "grad_norm": 0.400390625, "learning_rate": 4.96653104067514e-06, "loss": 2.2959, "step": 7510 }, { "epoch": 0.40295064377682405, "grad_norm": 0.455078125, "learning_rate": 4.966516871141502e-06, "loss": 2.3609, "step": 7511 }, { "epoch": 0.4030042918454936, "grad_norm": 0.498046875, "learning_rate": 4.966502698629287e-06, "loss": 2.2797, "step": 7512 }, { "epoch": 0.4030579399141631, "grad_norm": 0.3515625, "learning_rate": 4.966488523138516e-06, "loss": 2.1294, "step": 7513 }, { "epoch": 0.40311158798283264, "grad_norm": 0.384765625, "learning_rate": 4.966474344669203e-06, "loss": 2.0668, "step": 7514 }, { "epoch": 0.4031652360515021, "grad_norm": 0.353515625, "learning_rate": 4.966460163221368e-06, "loss": 2.3303, "step": 7515 }, { "epoch": 0.40321888412017165, "grad_norm": 0.390625, "learning_rate": 4.966445978795024e-06, "loss": 2.2955, "step": 7516 }, { "epoch": 0.4032725321888412, "grad_norm": 0.41796875, "learning_rate": 4.966431791390194e-06, "loss": 2.3716, "step": 7517 }, { "epoch": 0.4033261802575107, "grad_norm": 0.5078125, "learning_rate": 4.966417601006889e-06, "loss": 1.3964, "step": 7518 }, { "epoch": 0.40337982832618025, "grad_norm": 0.474609375, "learning_rate": 4.96640340764513e-06, "loss": 2.3582, "step": 7519 }, { "epoch": 0.4034334763948498, "grad_norm": 1.015625, "learning_rate": 4.966389211304932e-06, "loss": 2.2271, "step": 7520 }, { "epoch": 0.4034871244635193, "grad_norm": 0.455078125, "learning_rate": 4.9663750119863145e-06, "loss": 2.5406, "step": 7521 }, { "epoch": 0.40354077253218884, "grad_norm": 0.77734375, "learning_rate": 4.9663608096892926e-06, "loss": 2.4964, "step": 7522 }, { "epoch": 0.4035944206008584, "grad_norm": 0.41015625, "learning_rate": 4.9663466044138834e-06, "loss": 2.3964, "step": 7523 }, { "epoch": 0.4036480686695279, "grad_norm": 0.625, "learning_rate": 4.966332396160106e-06, "loss": 2.4112, "step": 7524 }, { "epoch": 0.40370171673819744, "grad_norm": 0.37890625, "learning_rate": 4.9663181849279755e-06, "loss": 2.4586, "step": 7525 }, { "epoch": 0.40375536480686697, "grad_norm": 0.4609375, "learning_rate": 4.96630397071751e-06, "loss": 2.0592, "step": 7526 }, { "epoch": 0.4038090128755365, "grad_norm": 0.671875, "learning_rate": 4.966289753528727e-06, "loss": 2.3415, "step": 7527 }, { "epoch": 0.40386266094420603, "grad_norm": 0.41015625, "learning_rate": 4.966275533361643e-06, "loss": 2.4209, "step": 7528 }, { "epoch": 0.40391630901287556, "grad_norm": 0.419921875, "learning_rate": 4.966261310216276e-06, "loss": 2.3484, "step": 7529 }, { "epoch": 0.40396995708154504, "grad_norm": 0.396484375, "learning_rate": 4.966247084092641e-06, "loss": 2.2792, "step": 7530 }, { "epoch": 0.40402360515021457, "grad_norm": 0.41796875, "learning_rate": 4.966232854990759e-06, "loss": 2.2975, "step": 7531 }, { "epoch": 0.4040772532188841, "grad_norm": 0.37890625, "learning_rate": 4.966218622910643e-06, "loss": 2.4558, "step": 7532 }, { "epoch": 0.40413090128755363, "grad_norm": 0.45703125, "learning_rate": 4.966204387852313e-06, "loss": 2.2867, "step": 7533 }, { "epoch": 0.40418454935622317, "grad_norm": 0.349609375, "learning_rate": 4.9661901498157865e-06, "loss": 2.3979, "step": 7534 }, { "epoch": 0.4042381974248927, "grad_norm": 0.42578125, "learning_rate": 4.9661759088010774e-06, "loss": 2.3384, "step": 7535 }, { "epoch": 0.40429184549356223, "grad_norm": 0.388671875, "learning_rate": 4.9661616648082065e-06, "loss": 2.3229, "step": 7536 }, { "epoch": 0.40434549356223176, "grad_norm": 0.357421875, "learning_rate": 4.966147417837189e-06, "loss": 2.1328, "step": 7537 }, { "epoch": 0.4043991416309013, "grad_norm": 0.40625, "learning_rate": 4.966133167888042e-06, "loss": 2.3168, "step": 7538 }, { "epoch": 0.4044527896995708, "grad_norm": 0.388671875, "learning_rate": 4.966118914960785e-06, "loss": 1.8927, "step": 7539 }, { "epoch": 0.40450643776824036, "grad_norm": 0.384765625, "learning_rate": 4.966104659055432e-06, "loss": 2.3352, "step": 7540 }, { "epoch": 0.4045600858369099, "grad_norm": 0.419921875, "learning_rate": 4.966090400172002e-06, "loss": 2.4954, "step": 7541 }, { "epoch": 0.4046137339055794, "grad_norm": 1.515625, "learning_rate": 4.966076138310512e-06, "loss": 2.0749, "step": 7542 }, { "epoch": 0.40466738197424895, "grad_norm": 0.349609375, "learning_rate": 4.966061873470981e-06, "loss": 2.2144, "step": 7543 }, { "epoch": 0.40472103004291843, "grad_norm": 0.390625, "learning_rate": 4.9660476056534226e-06, "loss": 2.3466, "step": 7544 }, { "epoch": 0.40477467811158796, "grad_norm": 0.36328125, "learning_rate": 4.966033334857856e-06, "loss": 2.0377, "step": 7545 }, { "epoch": 0.4048283261802575, "grad_norm": 0.404296875, "learning_rate": 4.966019061084298e-06, "loss": 2.3215, "step": 7546 }, { "epoch": 0.404881974248927, "grad_norm": 0.482421875, "learning_rate": 4.966004784332768e-06, "loss": 2.2873, "step": 7547 }, { "epoch": 0.40493562231759656, "grad_norm": 0.427734375, "learning_rate": 4.9659905046032796e-06, "loss": 2.2948, "step": 7548 }, { "epoch": 0.4049892703862661, "grad_norm": 0.408203125, "learning_rate": 4.965976221895852e-06, "loss": 2.4646, "step": 7549 }, { "epoch": 0.4050429184549356, "grad_norm": 0.38671875, "learning_rate": 4.9659619362105025e-06, "loss": 2.4103, "step": 7550 }, { "epoch": 0.40509656652360515, "grad_norm": 0.43359375, "learning_rate": 4.965947647547248e-06, "loss": 2.2631, "step": 7551 }, { "epoch": 0.4051502145922747, "grad_norm": 0.39453125, "learning_rate": 4.965933355906106e-06, "loss": 2.5403, "step": 7552 }, { "epoch": 0.4052038626609442, "grad_norm": 0.390625, "learning_rate": 4.965919061287095e-06, "loss": 2.2586, "step": 7553 }, { "epoch": 0.40525751072961375, "grad_norm": 0.400390625, "learning_rate": 4.965904763690229e-06, "loss": 2.3813, "step": 7554 }, { "epoch": 0.4053111587982833, "grad_norm": 0.41796875, "learning_rate": 4.965890463115528e-06, "loss": 2.1338, "step": 7555 }, { "epoch": 0.4053648068669528, "grad_norm": 0.494140625, "learning_rate": 4.965876159563008e-06, "loss": 2.3932, "step": 7556 }, { "epoch": 0.40541845493562234, "grad_norm": 0.478515625, "learning_rate": 4.965861853032687e-06, "loss": 2.411, "step": 7557 }, { "epoch": 0.4054721030042919, "grad_norm": 0.39453125, "learning_rate": 4.965847543524582e-06, "loss": 1.9338, "step": 7558 }, { "epoch": 0.40552575107296135, "grad_norm": 0.435546875, "learning_rate": 4.96583323103871e-06, "loss": 2.3214, "step": 7559 }, { "epoch": 0.4055793991416309, "grad_norm": 0.443359375, "learning_rate": 4.965818915575089e-06, "loss": 2.3319, "step": 7560 }, { "epoch": 0.4056330472103004, "grad_norm": 0.39453125, "learning_rate": 4.965804597133735e-06, "loss": 2.2465, "step": 7561 }, { "epoch": 0.40568669527896994, "grad_norm": 0.4140625, "learning_rate": 4.965790275714667e-06, "loss": 2.4386, "step": 7562 }, { "epoch": 0.4057403433476395, "grad_norm": 1.3515625, "learning_rate": 4.965775951317901e-06, "loss": 2.3129, "step": 7563 }, { "epoch": 0.405793991416309, "grad_norm": 0.34375, "learning_rate": 4.965761623943455e-06, "loss": 2.1555, "step": 7564 }, { "epoch": 0.40584763948497854, "grad_norm": 0.451171875, "learning_rate": 4.965747293591346e-06, "loss": 2.0889, "step": 7565 }, { "epoch": 0.40590128755364807, "grad_norm": 0.37890625, "learning_rate": 4.965732960261591e-06, "loss": 2.2582, "step": 7566 }, { "epoch": 0.4059549356223176, "grad_norm": 0.419921875, "learning_rate": 4.965718623954208e-06, "loss": 2.571, "step": 7567 }, { "epoch": 0.40600858369098713, "grad_norm": 0.38671875, "learning_rate": 4.965704284669214e-06, "loss": 2.3344, "step": 7568 }, { "epoch": 0.40606223175965667, "grad_norm": 0.51953125, "learning_rate": 4.965689942406626e-06, "loss": 2.4312, "step": 7569 }, { "epoch": 0.4061158798283262, "grad_norm": 0.404296875, "learning_rate": 4.9656755971664615e-06, "loss": 2.2413, "step": 7570 }, { "epoch": 0.40616952789699573, "grad_norm": 0.470703125, "learning_rate": 4.965661248948738e-06, "loss": 2.3273, "step": 7571 }, { "epoch": 0.40622317596566526, "grad_norm": 0.466796875, "learning_rate": 4.965646897753473e-06, "loss": 1.8439, "step": 7572 }, { "epoch": 0.40627682403433474, "grad_norm": 0.373046875, "learning_rate": 4.965632543580683e-06, "loss": 2.2844, "step": 7573 }, { "epoch": 0.40633047210300427, "grad_norm": 0.44140625, "learning_rate": 4.965618186430387e-06, "loss": 2.636, "step": 7574 }, { "epoch": 0.4063841201716738, "grad_norm": 0.45703125, "learning_rate": 4.9656038263026e-06, "loss": 1.6258, "step": 7575 }, { "epoch": 0.40643776824034333, "grad_norm": 0.443359375, "learning_rate": 4.965589463197341e-06, "loss": 2.2664, "step": 7576 }, { "epoch": 0.40649141630901287, "grad_norm": 0.42578125, "learning_rate": 4.9655750971146266e-06, "loss": 2.1441, "step": 7577 }, { "epoch": 0.4065450643776824, "grad_norm": 0.3984375, "learning_rate": 4.965560728054475e-06, "loss": 2.3979, "step": 7578 }, { "epoch": 0.40659871244635193, "grad_norm": 0.470703125, "learning_rate": 4.965546356016903e-06, "loss": 2.1562, "step": 7579 }, { "epoch": 0.40665236051502146, "grad_norm": 0.375, "learning_rate": 4.965531981001928e-06, "loss": 2.2658, "step": 7580 }, { "epoch": 0.406706008583691, "grad_norm": 0.400390625, "learning_rate": 4.965517603009567e-06, "loss": 2.1627, "step": 7581 }, { "epoch": 0.4067596566523605, "grad_norm": 0.373046875, "learning_rate": 4.965503222039838e-06, "loss": 2.2255, "step": 7582 }, { "epoch": 0.40681330472103006, "grad_norm": 0.384765625, "learning_rate": 4.965488838092758e-06, "loss": 2.4512, "step": 7583 }, { "epoch": 0.4068669527896996, "grad_norm": 0.4375, "learning_rate": 4.965474451168344e-06, "loss": 2.2997, "step": 7584 }, { "epoch": 0.4069206008583691, "grad_norm": 1.453125, "learning_rate": 4.965460061266615e-06, "loss": 2.6037, "step": 7585 }, { "epoch": 0.40697424892703865, "grad_norm": 0.484375, "learning_rate": 4.965445668387586e-06, "loss": 2.3122, "step": 7586 }, { "epoch": 0.4070278969957081, "grad_norm": 0.4921875, "learning_rate": 4.965431272531276e-06, "loss": 2.3922, "step": 7587 }, { "epoch": 0.40708154506437766, "grad_norm": 8.5, "learning_rate": 4.9654168736977015e-06, "loss": 2.5364, "step": 7588 }, { "epoch": 0.4071351931330472, "grad_norm": 0.408203125, "learning_rate": 4.965402471886881e-06, "loss": 2.4207, "step": 7589 }, { "epoch": 0.4071888412017167, "grad_norm": 0.478515625, "learning_rate": 4.965388067098832e-06, "loss": 2.2744, "step": 7590 }, { "epoch": 0.40724248927038625, "grad_norm": 0.4375, "learning_rate": 4.965373659333569e-06, "loss": 2.4882, "step": 7591 }, { "epoch": 0.4072961373390558, "grad_norm": 0.416015625, "learning_rate": 4.9653592485911124e-06, "loss": 2.3192, "step": 7592 }, { "epoch": 0.4073497854077253, "grad_norm": 0.451171875, "learning_rate": 4.9653448348714795e-06, "loss": 2.2208, "step": 7593 }, { "epoch": 0.40740343347639485, "grad_norm": 0.404296875, "learning_rate": 4.965330418174687e-06, "loss": 2.1694, "step": 7594 }, { "epoch": 0.4074570815450644, "grad_norm": 0.390625, "learning_rate": 4.965315998500752e-06, "loss": 2.3548, "step": 7595 }, { "epoch": 0.4075107296137339, "grad_norm": 1.2734375, "learning_rate": 4.965301575849692e-06, "loss": 1.6959, "step": 7596 }, { "epoch": 0.40756437768240344, "grad_norm": 0.421875, "learning_rate": 4.9652871502215236e-06, "loss": 2.1819, "step": 7597 }, { "epoch": 0.407618025751073, "grad_norm": 0.400390625, "learning_rate": 4.965272721616266e-06, "loss": 2.2649, "step": 7598 }, { "epoch": 0.4076716738197425, "grad_norm": 0.6796875, "learning_rate": 4.965258290033936e-06, "loss": 2.3468, "step": 7599 }, { "epoch": 0.40772532188841204, "grad_norm": 0.62109375, "learning_rate": 4.965243855474551e-06, "loss": 2.2447, "step": 7600 }, { "epoch": 0.40777896995708157, "grad_norm": 0.416015625, "learning_rate": 4.965229417938128e-06, "loss": 2.1922, "step": 7601 }, { "epoch": 0.40783261802575105, "grad_norm": 0.458984375, "learning_rate": 4.9652149774246845e-06, "loss": 1.9172, "step": 7602 }, { "epoch": 0.4078862660944206, "grad_norm": 0.59375, "learning_rate": 4.965200533934238e-06, "loss": 2.3532, "step": 7603 }, { "epoch": 0.4079399141630901, "grad_norm": 0.400390625, "learning_rate": 4.965186087466807e-06, "loss": 2.2291, "step": 7604 }, { "epoch": 0.40799356223175964, "grad_norm": 0.421875, "learning_rate": 4.9651716380224076e-06, "loss": 2.3842, "step": 7605 }, { "epoch": 0.4080472103004292, "grad_norm": 0.404296875, "learning_rate": 4.9651571856010575e-06, "loss": 2.5248, "step": 7606 }, { "epoch": 0.4081008583690987, "grad_norm": 0.5546875, "learning_rate": 4.9651427302027745e-06, "loss": 2.252, "step": 7607 }, { "epoch": 0.40815450643776824, "grad_norm": 0.396484375, "learning_rate": 4.965128271827576e-06, "loss": 2.4874, "step": 7608 }, { "epoch": 0.40820815450643777, "grad_norm": 0.392578125, "learning_rate": 4.965113810475479e-06, "loss": 2.2475, "step": 7609 }, { "epoch": 0.4082618025751073, "grad_norm": 0.3671875, "learning_rate": 4.965099346146501e-06, "loss": 2.0152, "step": 7610 }, { "epoch": 0.40831545064377683, "grad_norm": 0.3828125, "learning_rate": 4.96508487884066e-06, "loss": 2.4385, "step": 7611 }, { "epoch": 0.40836909871244637, "grad_norm": 0.361328125, "learning_rate": 4.965070408557974e-06, "loss": 2.2002, "step": 7612 }, { "epoch": 0.4084227467811159, "grad_norm": 0.408203125, "learning_rate": 4.96505593529846e-06, "loss": 2.4325, "step": 7613 }, { "epoch": 0.40847639484978543, "grad_norm": 0.4609375, "learning_rate": 4.9650414590621346e-06, "loss": 2.5428, "step": 7614 }, { "epoch": 0.40853004291845496, "grad_norm": 0.41015625, "learning_rate": 4.9650269798490155e-06, "loss": 2.2517, "step": 7615 }, { "epoch": 0.40858369098712444, "grad_norm": 0.46875, "learning_rate": 4.965012497659121e-06, "loss": 2.4158, "step": 7616 }, { "epoch": 0.40863733905579397, "grad_norm": 0.4609375, "learning_rate": 4.964998012492468e-06, "loss": 2.4373, "step": 7617 }, { "epoch": 0.4086909871244635, "grad_norm": 0.51171875, "learning_rate": 4.964983524349074e-06, "loss": 2.4412, "step": 7618 }, { "epoch": 0.40874463519313303, "grad_norm": 0.48828125, "learning_rate": 4.964969033228957e-06, "loss": 2.2496, "step": 7619 }, { "epoch": 0.40879828326180256, "grad_norm": 2.4375, "learning_rate": 4.964954539132134e-06, "loss": 2.4937, "step": 7620 }, { "epoch": 0.4088519313304721, "grad_norm": 0.39453125, "learning_rate": 4.964940042058622e-06, "loss": 2.5788, "step": 7621 }, { "epoch": 0.4089055793991416, "grad_norm": 0.37109375, "learning_rate": 4.964925542008441e-06, "loss": 2.3454, "step": 7622 }, { "epoch": 0.40895922746781116, "grad_norm": 0.431640625, "learning_rate": 4.9649110389816054e-06, "loss": 2.3534, "step": 7623 }, { "epoch": 0.4090128755364807, "grad_norm": 0.388671875, "learning_rate": 4.964896532978134e-06, "loss": 2.3556, "step": 7624 }, { "epoch": 0.4090665236051502, "grad_norm": 0.443359375, "learning_rate": 4.964882023998045e-06, "loss": 2.4504, "step": 7625 }, { "epoch": 0.40912017167381975, "grad_norm": 0.431640625, "learning_rate": 4.964867512041354e-06, "loss": 2.2717, "step": 7626 }, { "epoch": 0.4091738197424893, "grad_norm": 0.42578125, "learning_rate": 4.964852997108081e-06, "loss": 2.351, "step": 7627 }, { "epoch": 0.4092274678111588, "grad_norm": 0.408203125, "learning_rate": 4.964838479198241e-06, "loss": 2.4237, "step": 7628 }, { "epoch": 0.40928111587982835, "grad_norm": 0.47265625, "learning_rate": 4.964823958311854e-06, "loss": 2.2908, "step": 7629 }, { "epoch": 0.4093347639484979, "grad_norm": 0.400390625, "learning_rate": 4.964809434448936e-06, "loss": 2.2917, "step": 7630 }, { "epoch": 0.40938841201716736, "grad_norm": 0.392578125, "learning_rate": 4.964794907609505e-06, "loss": 2.1542, "step": 7631 }, { "epoch": 0.4094420600858369, "grad_norm": 0.40234375, "learning_rate": 4.964780377793577e-06, "loss": 2.4253, "step": 7632 }, { "epoch": 0.4094957081545064, "grad_norm": 0.439453125, "learning_rate": 4.964765845001172e-06, "loss": 2.2689, "step": 7633 }, { "epoch": 0.40954935622317595, "grad_norm": 0.3828125, "learning_rate": 4.964751309232307e-06, "loss": 2.2593, "step": 7634 }, { "epoch": 0.4096030042918455, "grad_norm": 0.400390625, "learning_rate": 4.964736770487e-06, "loss": 2.0394, "step": 7635 }, { "epoch": 0.409656652360515, "grad_norm": 0.5390625, "learning_rate": 4.964722228765266e-06, "loss": 2.4408, "step": 7636 }, { "epoch": 0.40971030042918455, "grad_norm": 0.361328125, "learning_rate": 4.964707684067125e-06, "loss": 2.139, "step": 7637 }, { "epoch": 0.4097639484978541, "grad_norm": 0.3671875, "learning_rate": 4.964693136392594e-06, "loss": 2.5411, "step": 7638 }, { "epoch": 0.4098175965665236, "grad_norm": 0.40625, "learning_rate": 4.964678585741689e-06, "loss": 2.3484, "step": 7639 }, { "epoch": 0.40987124463519314, "grad_norm": 0.37109375, "learning_rate": 4.96466403211443e-06, "loss": 2.1945, "step": 7640 }, { "epoch": 0.4099248927038627, "grad_norm": 0.435546875, "learning_rate": 4.964649475510833e-06, "loss": 2.3992, "step": 7641 }, { "epoch": 0.4099785407725322, "grad_norm": 1.0703125, "learning_rate": 4.964634915930916e-06, "loss": 2.239, "step": 7642 }, { "epoch": 0.41003218884120174, "grad_norm": 0.390625, "learning_rate": 4.964620353374696e-06, "loss": 1.9396, "step": 7643 }, { "epoch": 0.41008583690987127, "grad_norm": 0.35546875, "learning_rate": 4.964605787842193e-06, "loss": 2.1863, "step": 7644 }, { "epoch": 0.41013948497854075, "grad_norm": 0.345703125, "learning_rate": 4.964591219333421e-06, "loss": 2.3344, "step": 7645 }, { "epoch": 0.4101931330472103, "grad_norm": 0.419921875, "learning_rate": 4.964576647848401e-06, "loss": 2.3338, "step": 7646 }, { "epoch": 0.4102467811158798, "grad_norm": 0.439453125, "learning_rate": 4.964562073387148e-06, "loss": 2.3376, "step": 7647 }, { "epoch": 0.41030042918454934, "grad_norm": 0.404296875, "learning_rate": 4.964547495949681e-06, "loss": 2.0967, "step": 7648 }, { "epoch": 0.4103540772532189, "grad_norm": 0.474609375, "learning_rate": 4.964532915536017e-06, "loss": 2.3033, "step": 7649 }, { "epoch": 0.4104077253218884, "grad_norm": 0.390625, "learning_rate": 4.964518332146175e-06, "loss": 2.089, "step": 7650 }, { "epoch": 0.41046137339055794, "grad_norm": 0.6015625, "learning_rate": 4.964503745780169e-06, "loss": 1.9905, "step": 7651 }, { "epoch": 0.41051502145922747, "grad_norm": 0.404296875, "learning_rate": 4.964489156438021e-06, "loss": 2.0118, "step": 7652 }, { "epoch": 0.410568669527897, "grad_norm": 0.439453125, "learning_rate": 4.964474564119745e-06, "loss": 2.3988, "step": 7653 }, { "epoch": 0.41062231759656653, "grad_norm": 0.470703125, "learning_rate": 4.964459968825363e-06, "loss": 2.5322, "step": 7654 }, { "epoch": 0.41067596566523606, "grad_norm": 0.56640625, "learning_rate": 4.964445370554887e-06, "loss": 2.4261, "step": 7655 }, { "epoch": 0.4107296137339056, "grad_norm": 0.765625, "learning_rate": 4.964430769308339e-06, "loss": 2.4543, "step": 7656 }, { "epoch": 0.41078326180257513, "grad_norm": 0.88671875, "learning_rate": 4.9644161650857345e-06, "loss": 2.2798, "step": 7657 }, { "epoch": 0.41083690987124466, "grad_norm": 0.4453125, "learning_rate": 4.964401557887093e-06, "loss": 2.2786, "step": 7658 }, { "epoch": 0.41089055793991414, "grad_norm": 0.44140625, "learning_rate": 4.96438694771243e-06, "loss": 2.407, "step": 7659 }, { "epoch": 0.41094420600858367, "grad_norm": 0.5, "learning_rate": 4.964372334561764e-06, "loss": 2.4661, "step": 7660 }, { "epoch": 0.4109978540772532, "grad_norm": 0.48046875, "learning_rate": 4.964357718435114e-06, "loss": 2.4781, "step": 7661 }, { "epoch": 0.41105150214592273, "grad_norm": 0.38671875, "learning_rate": 4.964343099332495e-06, "loss": 2.2903, "step": 7662 }, { "epoch": 0.41110515021459226, "grad_norm": 0.51953125, "learning_rate": 4.964328477253926e-06, "loss": 2.2847, "step": 7663 }, { "epoch": 0.4111587982832618, "grad_norm": 0.3984375, "learning_rate": 4.964313852199425e-06, "loss": 2.4231, "step": 7664 }, { "epoch": 0.4112124463519313, "grad_norm": 0.408203125, "learning_rate": 4.964299224169009e-06, "loss": 2.253, "step": 7665 }, { "epoch": 0.41126609442060086, "grad_norm": 0.5234375, "learning_rate": 4.964284593162697e-06, "loss": 2.2595, "step": 7666 }, { "epoch": 0.4113197424892704, "grad_norm": 0.39453125, "learning_rate": 4.964269959180505e-06, "loss": 2.1699, "step": 7667 }, { "epoch": 0.4113733905579399, "grad_norm": 0.298828125, "learning_rate": 4.9642553222224524e-06, "loss": 1.8595, "step": 7668 }, { "epoch": 0.41142703862660945, "grad_norm": 0.37109375, "learning_rate": 4.964240682288554e-06, "loss": 2.2549, "step": 7669 }, { "epoch": 0.411480686695279, "grad_norm": 0.369140625, "learning_rate": 4.96422603937883e-06, "loss": 2.2169, "step": 7670 }, { "epoch": 0.4115343347639485, "grad_norm": 0.447265625, "learning_rate": 4.964211393493298e-06, "loss": 2.3419, "step": 7671 }, { "epoch": 0.41158798283261805, "grad_norm": 0.3125, "learning_rate": 4.964196744631975e-06, "loss": 1.9902, "step": 7672 }, { "epoch": 0.4116416309012876, "grad_norm": 0.39453125, "learning_rate": 4.964182092794878e-06, "loss": 2.1646, "step": 7673 }, { "epoch": 0.41169527896995706, "grad_norm": 0.39453125, "learning_rate": 4.9641674379820265e-06, "loss": 2.2533, "step": 7674 }, { "epoch": 0.4117489270386266, "grad_norm": 0.42578125, "learning_rate": 4.964152780193437e-06, "loss": 2.4239, "step": 7675 }, { "epoch": 0.4118025751072961, "grad_norm": 0.44921875, "learning_rate": 4.964138119429128e-06, "loss": 1.5931, "step": 7676 }, { "epoch": 0.41185622317596565, "grad_norm": 0.439453125, "learning_rate": 4.964123455689115e-06, "loss": 2.1554, "step": 7677 }, { "epoch": 0.4119098712446352, "grad_norm": 0.416015625, "learning_rate": 4.964108788973418e-06, "loss": 2.3527, "step": 7678 }, { "epoch": 0.4119635193133047, "grad_norm": 0.400390625, "learning_rate": 4.9640941192820535e-06, "loss": 2.3007, "step": 7679 }, { "epoch": 0.41201716738197425, "grad_norm": 0.37890625, "learning_rate": 4.96407944661504e-06, "loss": 2.2453, "step": 7680 }, { "epoch": 0.4120708154506438, "grad_norm": 0.52734375, "learning_rate": 4.964064770972395e-06, "loss": 2.4764, "step": 7681 }, { "epoch": 0.4121244635193133, "grad_norm": 0.65625, "learning_rate": 4.964050092354136e-06, "loss": 2.282, "step": 7682 }, { "epoch": 0.41217811158798284, "grad_norm": 0.486328125, "learning_rate": 4.964035410760281e-06, "loss": 2.365, "step": 7683 }, { "epoch": 0.4122317596566524, "grad_norm": 0.421875, "learning_rate": 4.964020726190848e-06, "loss": 2.4713, "step": 7684 }, { "epoch": 0.4122854077253219, "grad_norm": 0.3984375, "learning_rate": 4.964006038645854e-06, "loss": 2.1981, "step": 7685 }, { "epoch": 0.41233905579399144, "grad_norm": 0.486328125, "learning_rate": 4.963991348125317e-06, "loss": 2.2168, "step": 7686 }, { "epoch": 0.41239270386266097, "grad_norm": 0.47265625, "learning_rate": 4.9639766546292545e-06, "loss": 2.3385, "step": 7687 }, { "epoch": 0.41244635193133045, "grad_norm": 0.515625, "learning_rate": 4.963961958157685e-06, "loss": 2.3988, "step": 7688 }, { "epoch": 0.4125, "grad_norm": 0.37109375, "learning_rate": 4.963947258710626e-06, "loss": 2.1808, "step": 7689 }, { "epoch": 0.4125536480686695, "grad_norm": 0.4765625, "learning_rate": 4.9639325562880945e-06, "loss": 2.5911, "step": 7690 }, { "epoch": 0.41260729613733904, "grad_norm": 0.41015625, "learning_rate": 4.96391785089011e-06, "loss": 2.4672, "step": 7691 }, { "epoch": 0.4126609442060086, "grad_norm": 0.43359375, "learning_rate": 4.963903142516688e-06, "loss": 2.2612, "step": 7692 }, { "epoch": 0.4127145922746781, "grad_norm": 0.408203125, "learning_rate": 4.963888431167847e-06, "loss": 2.4462, "step": 7693 }, { "epoch": 0.41276824034334764, "grad_norm": 0.396484375, "learning_rate": 4.963873716843606e-06, "loss": 2.2483, "step": 7694 }, { "epoch": 0.41282188841201717, "grad_norm": 0.40234375, "learning_rate": 4.9638589995439816e-06, "loss": 2.2695, "step": 7695 }, { "epoch": 0.4128755364806867, "grad_norm": 0.4296875, "learning_rate": 4.9638442792689914e-06, "loss": 2.5265, "step": 7696 }, { "epoch": 0.41292918454935623, "grad_norm": 0.37890625, "learning_rate": 4.963829556018654e-06, "loss": 2.443, "step": 7697 }, { "epoch": 0.41298283261802576, "grad_norm": 0.40625, "learning_rate": 4.9638148297929864e-06, "loss": 2.2969, "step": 7698 }, { "epoch": 0.4130364806866953, "grad_norm": 0.453125, "learning_rate": 4.963800100592008e-06, "loss": 2.0775, "step": 7699 }, { "epoch": 0.4130901287553648, "grad_norm": 0.3359375, "learning_rate": 4.963785368415734e-06, "loss": 1.9737, "step": 7700 }, { "epoch": 0.41314377682403436, "grad_norm": 0.42578125, "learning_rate": 4.963770633264184e-06, "loss": 2.2606, "step": 7701 }, { "epoch": 0.41319742489270384, "grad_norm": 0.3671875, "learning_rate": 4.963755895137376e-06, "loss": 2.4266, "step": 7702 }, { "epoch": 0.41325107296137337, "grad_norm": 0.447265625, "learning_rate": 4.963741154035326e-06, "loss": 1.1922, "step": 7703 }, { "epoch": 0.4133047210300429, "grad_norm": 0.412109375, "learning_rate": 4.9637264099580535e-06, "loss": 2.5499, "step": 7704 }, { "epoch": 0.41335836909871243, "grad_norm": 0.451171875, "learning_rate": 4.963711662905576e-06, "loss": 2.3187, "step": 7705 }, { "epoch": 0.41341201716738196, "grad_norm": 0.55859375, "learning_rate": 4.96369691287791e-06, "loss": 1.6386, "step": 7706 }, { "epoch": 0.4134656652360515, "grad_norm": 0.4375, "learning_rate": 4.9636821598750754e-06, "loss": 2.5349, "step": 7707 }, { "epoch": 0.413519313304721, "grad_norm": 0.412109375, "learning_rate": 4.963667403897089e-06, "loss": 2.193, "step": 7708 }, { "epoch": 0.41357296137339056, "grad_norm": 0.56640625, "learning_rate": 4.963652644943968e-06, "loss": 2.1406, "step": 7709 }, { "epoch": 0.4136266094420601, "grad_norm": 0.388671875, "learning_rate": 4.963637883015732e-06, "loss": 2.4921, "step": 7710 }, { "epoch": 0.4136802575107296, "grad_norm": 0.34765625, "learning_rate": 4.963623118112396e-06, "loss": 1.9087, "step": 7711 }, { "epoch": 0.41373390557939915, "grad_norm": 0.416015625, "learning_rate": 4.96360835023398e-06, "loss": 2.1692, "step": 7712 }, { "epoch": 0.4137875536480687, "grad_norm": 0.4609375, "learning_rate": 4.963593579380502e-06, "loss": 2.4312, "step": 7713 }, { "epoch": 0.4138412017167382, "grad_norm": 0.470703125, "learning_rate": 4.9635788055519795e-06, "loss": 2.3612, "step": 7714 }, { "epoch": 0.41389484978540775, "grad_norm": 0.39453125, "learning_rate": 4.963564028748429e-06, "loss": 2.2987, "step": 7715 }, { "epoch": 0.4139484978540773, "grad_norm": 0.39453125, "learning_rate": 4.96354924896987e-06, "loss": 2.4443, "step": 7716 }, { "epoch": 0.41400214592274676, "grad_norm": 0.423828125, "learning_rate": 4.963534466216319e-06, "loss": 2.3827, "step": 7717 }, { "epoch": 0.4140557939914163, "grad_norm": 0.73828125, "learning_rate": 4.963519680487795e-06, "loss": 2.2757, "step": 7718 }, { "epoch": 0.4141094420600858, "grad_norm": 0.48046875, "learning_rate": 4.963504891784315e-06, "loss": 2.4528, "step": 7719 }, { "epoch": 0.41416309012875535, "grad_norm": 0.392578125, "learning_rate": 4.963490100105898e-06, "loss": 2.2931, "step": 7720 }, { "epoch": 0.4142167381974249, "grad_norm": 0.361328125, "learning_rate": 4.963475305452561e-06, "loss": 2.0861, "step": 7721 }, { "epoch": 0.4142703862660944, "grad_norm": 0.39453125, "learning_rate": 4.9634605078243214e-06, "loss": 2.2782, "step": 7722 }, { "epoch": 0.41432403433476395, "grad_norm": 0.431640625, "learning_rate": 4.963445707221198e-06, "loss": 2.228, "step": 7723 }, { "epoch": 0.4143776824034335, "grad_norm": 0.375, "learning_rate": 4.963430903643209e-06, "loss": 2.4299, "step": 7724 }, { "epoch": 0.414431330472103, "grad_norm": 0.375, "learning_rate": 4.96341609709037e-06, "loss": 2.3047, "step": 7725 }, { "epoch": 0.41448497854077254, "grad_norm": 0.447265625, "learning_rate": 4.963401287562702e-06, "loss": 2.3025, "step": 7726 }, { "epoch": 0.4145386266094421, "grad_norm": 0.396484375, "learning_rate": 4.96338647506022e-06, "loss": 2.3139, "step": 7727 }, { "epoch": 0.4145922746781116, "grad_norm": 0.4140625, "learning_rate": 4.963371659582944e-06, "loss": 1.8135, "step": 7728 }, { "epoch": 0.41464592274678114, "grad_norm": 0.458984375, "learning_rate": 4.963356841130892e-06, "loss": 2.4367, "step": 7729 }, { "epoch": 0.41469957081545067, "grad_norm": 0.68359375, "learning_rate": 4.96334201970408e-06, "loss": 2.3834, "step": 7730 }, { "epoch": 0.41475321888412015, "grad_norm": 0.5390625, "learning_rate": 4.963327195302527e-06, "loss": 2.4146, "step": 7731 }, { "epoch": 0.4148068669527897, "grad_norm": 0.423828125, "learning_rate": 4.963312367926251e-06, "loss": 2.3102, "step": 7732 }, { "epoch": 0.4148605150214592, "grad_norm": 0.44140625, "learning_rate": 4.96329753757527e-06, "loss": 2.4304, "step": 7733 }, { "epoch": 0.41491416309012874, "grad_norm": 0.337890625, "learning_rate": 4.9632827042496015e-06, "loss": 2.0753, "step": 7734 }, { "epoch": 0.41496781115879827, "grad_norm": 0.3984375, "learning_rate": 4.963267867949263e-06, "loss": 2.5131, "step": 7735 }, { "epoch": 0.4150214592274678, "grad_norm": 0.48828125, "learning_rate": 4.963253028674274e-06, "loss": 2.2729, "step": 7736 }, { "epoch": 0.41507510729613734, "grad_norm": 0.404296875, "learning_rate": 4.96323818642465e-06, "loss": 2.3056, "step": 7737 }, { "epoch": 0.41512875536480687, "grad_norm": 0.396484375, "learning_rate": 4.9632233412004114e-06, "loss": 2.2235, "step": 7738 }, { "epoch": 0.4151824034334764, "grad_norm": 0.3515625, "learning_rate": 4.963208493001576e-06, "loss": 2.2733, "step": 7739 }, { "epoch": 0.41523605150214593, "grad_norm": 0.3515625, "learning_rate": 4.963193641828159e-06, "loss": 2.1338, "step": 7740 }, { "epoch": 0.41528969957081546, "grad_norm": 0.5703125, "learning_rate": 4.963178787680181e-06, "loss": 2.1552, "step": 7741 }, { "epoch": 0.415343347639485, "grad_norm": 0.51171875, "learning_rate": 4.963163930557658e-06, "loss": 2.1292, "step": 7742 }, { "epoch": 0.4153969957081545, "grad_norm": 0.423828125, "learning_rate": 4.963149070460611e-06, "loss": 2.2121, "step": 7743 }, { "epoch": 0.41545064377682406, "grad_norm": 0.4140625, "learning_rate": 4.963134207389054e-06, "loss": 2.2801, "step": 7744 }, { "epoch": 0.4155042918454936, "grad_norm": 0.47265625, "learning_rate": 4.963119341343008e-06, "loss": 2.2281, "step": 7745 }, { "epoch": 0.41555793991416307, "grad_norm": 0.5625, "learning_rate": 4.96310447232249e-06, "loss": 2.2248, "step": 7746 }, { "epoch": 0.4156115879828326, "grad_norm": 0.392578125, "learning_rate": 4.9630896003275175e-06, "loss": 2.1441, "step": 7747 }, { "epoch": 0.41566523605150213, "grad_norm": 0.470703125, "learning_rate": 4.963074725358108e-06, "loss": 2.4018, "step": 7748 }, { "epoch": 0.41571888412017166, "grad_norm": 0.423828125, "learning_rate": 4.963059847414281e-06, "loss": 2.262, "step": 7749 }, { "epoch": 0.4157725321888412, "grad_norm": 0.33203125, "learning_rate": 4.963044966496054e-06, "loss": 2.0267, "step": 7750 }, { "epoch": 0.4158261802575107, "grad_norm": 0.66796875, "learning_rate": 4.963030082603444e-06, "loss": 2.2718, "step": 7751 }, { "epoch": 0.41587982832618026, "grad_norm": 0.390625, "learning_rate": 4.9630151957364705e-06, "loss": 2.2832, "step": 7752 }, { "epoch": 0.4159334763948498, "grad_norm": 0.5, "learning_rate": 4.96300030589515e-06, "loss": 2.2266, "step": 7753 }, { "epoch": 0.4159871244635193, "grad_norm": 0.388671875, "learning_rate": 4.962985413079501e-06, "loss": 2.2332, "step": 7754 }, { "epoch": 0.41604077253218885, "grad_norm": 0.55859375, "learning_rate": 4.9629705172895415e-06, "loss": 2.309, "step": 7755 }, { "epoch": 0.4160944206008584, "grad_norm": 0.376953125, "learning_rate": 4.96295561852529e-06, "loss": 2.3672, "step": 7756 }, { "epoch": 0.4161480686695279, "grad_norm": 0.40625, "learning_rate": 4.9629407167867634e-06, "loss": 2.2445, "step": 7757 }, { "epoch": 0.41620171673819745, "grad_norm": 0.392578125, "learning_rate": 4.962925812073981e-06, "loss": 2.0372, "step": 7758 }, { "epoch": 0.416255364806867, "grad_norm": 0.474609375, "learning_rate": 4.96291090438696e-06, "loss": 2.2352, "step": 7759 }, { "epoch": 0.41630901287553645, "grad_norm": 0.42578125, "learning_rate": 4.962895993725719e-06, "loss": 2.4588, "step": 7760 }, { "epoch": 0.416362660944206, "grad_norm": 0.4140625, "learning_rate": 4.962881080090275e-06, "loss": 2.3786, "step": 7761 }, { "epoch": 0.4164163090128755, "grad_norm": 0.35546875, "learning_rate": 4.962866163480646e-06, "loss": 2.2153, "step": 7762 }, { "epoch": 0.41646995708154505, "grad_norm": 0.435546875, "learning_rate": 4.962851243896852e-06, "loss": 2.0991, "step": 7763 }, { "epoch": 0.4165236051502146, "grad_norm": 0.44921875, "learning_rate": 4.962836321338909e-06, "loss": 2.2565, "step": 7764 }, { "epoch": 0.4165772532188841, "grad_norm": 0.47265625, "learning_rate": 4.962821395806835e-06, "loss": 2.3717, "step": 7765 }, { "epoch": 0.41663090128755365, "grad_norm": 0.375, "learning_rate": 4.962806467300649e-06, "loss": 2.3246, "step": 7766 }, { "epoch": 0.4166845493562232, "grad_norm": 0.455078125, "learning_rate": 4.96279153582037e-06, "loss": 2.5625, "step": 7767 }, { "epoch": 0.4167381974248927, "grad_norm": 0.4296875, "learning_rate": 4.9627766013660126e-06, "loss": 2.3409, "step": 7768 }, { "epoch": 0.41679184549356224, "grad_norm": 0.439453125, "learning_rate": 4.962761663937598e-06, "loss": 1.48, "step": 7769 }, { "epoch": 0.4168454935622318, "grad_norm": 0.318359375, "learning_rate": 4.962746723535144e-06, "loss": 1.9173, "step": 7770 }, { "epoch": 0.4168991416309013, "grad_norm": 0.625, "learning_rate": 4.962731780158666e-06, "loss": 2.417, "step": 7771 }, { "epoch": 0.41695278969957084, "grad_norm": 0.419921875, "learning_rate": 4.962716833808185e-06, "loss": 2.5189, "step": 7772 }, { "epoch": 0.41700643776824037, "grad_norm": 0.41796875, "learning_rate": 4.962701884483718e-06, "loss": 2.2127, "step": 7773 }, { "epoch": 0.41706008583690984, "grad_norm": 0.404296875, "learning_rate": 4.962686932185282e-06, "loss": 2.3386, "step": 7774 }, { "epoch": 0.4171137339055794, "grad_norm": 0.44921875, "learning_rate": 4.962671976912897e-06, "loss": 2.4546, "step": 7775 }, { "epoch": 0.4171673819742489, "grad_norm": 0.380859375, "learning_rate": 4.96265701866658e-06, "loss": 2.6547, "step": 7776 }, { "epoch": 0.41722103004291844, "grad_norm": 0.3671875, "learning_rate": 4.962642057446348e-06, "loss": 1.9599, "step": 7777 }, { "epoch": 0.41727467811158797, "grad_norm": 0.404296875, "learning_rate": 4.962627093252222e-06, "loss": 2.3865, "step": 7778 }, { "epoch": 0.4173283261802575, "grad_norm": 0.392578125, "learning_rate": 4.962612126084218e-06, "loss": 2.3284, "step": 7779 }, { "epoch": 0.41738197424892703, "grad_norm": 0.419921875, "learning_rate": 4.9625971559423535e-06, "loss": 2.4731, "step": 7780 }, { "epoch": 0.41743562231759657, "grad_norm": 0.37109375, "learning_rate": 4.962582182826647e-06, "loss": 2.2398, "step": 7781 }, { "epoch": 0.4174892703862661, "grad_norm": 0.43359375, "learning_rate": 4.962567206737119e-06, "loss": 2.3158, "step": 7782 }, { "epoch": 0.41754291845493563, "grad_norm": 0.388671875, "learning_rate": 4.962552227673784e-06, "loss": 2.1835, "step": 7783 }, { "epoch": 0.41759656652360516, "grad_norm": 0.4453125, "learning_rate": 4.962537245636662e-06, "loss": 2.1999, "step": 7784 }, { "epoch": 0.4176502145922747, "grad_norm": 0.38671875, "learning_rate": 4.962522260625771e-06, "loss": 2.4039, "step": 7785 }, { "epoch": 0.4177038626609442, "grad_norm": 0.35546875, "learning_rate": 4.962507272641129e-06, "loss": 2.0527, "step": 7786 }, { "epoch": 0.41775751072961376, "grad_norm": 0.373046875, "learning_rate": 4.962492281682754e-06, "loss": 2.4343, "step": 7787 }, { "epoch": 0.4178111587982833, "grad_norm": 0.466796875, "learning_rate": 4.962477287750663e-06, "loss": 2.5506, "step": 7788 }, { "epoch": 0.41786480686695276, "grad_norm": 0.474609375, "learning_rate": 4.962462290844877e-06, "loss": 2.2698, "step": 7789 }, { "epoch": 0.4179184549356223, "grad_norm": 0.435546875, "learning_rate": 4.962447290965411e-06, "loss": 2.4372, "step": 7790 }, { "epoch": 0.41797210300429183, "grad_norm": 0.5078125, "learning_rate": 4.962432288112285e-06, "loss": 2.2116, "step": 7791 }, { "epoch": 0.41802575107296136, "grad_norm": 0.435546875, "learning_rate": 4.962417282285517e-06, "loss": 2.3454, "step": 7792 }, { "epoch": 0.4180793991416309, "grad_norm": 0.4296875, "learning_rate": 4.962402273485123e-06, "loss": 2.2575, "step": 7793 }, { "epoch": 0.4181330472103004, "grad_norm": 0.5234375, "learning_rate": 4.962387261711124e-06, "loss": 2.3687, "step": 7794 }, { "epoch": 0.41818669527896996, "grad_norm": 0.37109375, "learning_rate": 4.962372246963537e-06, "loss": 2.0659, "step": 7795 }, { "epoch": 0.4182403433476395, "grad_norm": 0.486328125, "learning_rate": 4.96235722924238e-06, "loss": 2.5191, "step": 7796 }, { "epoch": 0.418293991416309, "grad_norm": 0.408203125, "learning_rate": 4.962342208547671e-06, "loss": 2.4319, "step": 7797 }, { "epoch": 0.41834763948497855, "grad_norm": 0.38671875, "learning_rate": 4.962327184879428e-06, "loss": 2.3212, "step": 7798 }, { "epoch": 0.4184012875536481, "grad_norm": 0.427734375, "learning_rate": 4.96231215823767e-06, "loss": 2.0094, "step": 7799 }, { "epoch": 0.4184549356223176, "grad_norm": 0.435546875, "learning_rate": 4.962297128622414e-06, "loss": 2.2877, "step": 7800 }, { "epoch": 0.41850858369098715, "grad_norm": 0.384765625, "learning_rate": 4.962282096033679e-06, "loss": 2.3055, "step": 7801 }, { "epoch": 0.4185622317596567, "grad_norm": 0.45703125, "learning_rate": 4.962267060471483e-06, "loss": 2.1724, "step": 7802 }, { "epoch": 0.41861587982832615, "grad_norm": 0.41015625, "learning_rate": 4.962252021935845e-06, "loss": 2.1863, "step": 7803 }, { "epoch": 0.4186695278969957, "grad_norm": 0.341796875, "learning_rate": 4.962236980426781e-06, "loss": 2.042, "step": 7804 }, { "epoch": 0.4187231759656652, "grad_norm": 0.484375, "learning_rate": 4.962221935944311e-06, "loss": 2.5317, "step": 7805 }, { "epoch": 0.41877682403433475, "grad_norm": 0.75, "learning_rate": 4.962206888488452e-06, "loss": 2.3494, "step": 7806 }, { "epoch": 0.4188304721030043, "grad_norm": 0.44140625, "learning_rate": 4.962191838059223e-06, "loss": 2.0442, "step": 7807 }, { "epoch": 0.4188841201716738, "grad_norm": 0.38671875, "learning_rate": 4.962176784656641e-06, "loss": 2.1931, "step": 7808 }, { "epoch": 0.41893776824034334, "grad_norm": 0.41796875, "learning_rate": 4.962161728280727e-06, "loss": 2.4124, "step": 7809 }, { "epoch": 0.4189914163090129, "grad_norm": 0.369140625, "learning_rate": 4.962146668931496e-06, "loss": 2.3255, "step": 7810 }, { "epoch": 0.4190450643776824, "grad_norm": 0.46484375, "learning_rate": 4.962131606608968e-06, "loss": 2.2176, "step": 7811 }, { "epoch": 0.41909871244635194, "grad_norm": 0.435546875, "learning_rate": 4.96211654131316e-06, "loss": 2.4167, "step": 7812 }, { "epoch": 0.41915236051502147, "grad_norm": 0.337890625, "learning_rate": 4.962101473044092e-06, "loss": 1.9923, "step": 7813 }, { "epoch": 0.419206008583691, "grad_norm": 0.56640625, "learning_rate": 4.96208640180178e-06, "loss": 2.3854, "step": 7814 }, { "epoch": 0.41925965665236054, "grad_norm": 2.5, "learning_rate": 4.962071327586243e-06, "loss": 2.4462, "step": 7815 }, { "epoch": 0.41931330472103007, "grad_norm": 0.490234375, "learning_rate": 4.9620562503975e-06, "loss": 2.5271, "step": 7816 }, { "epoch": 0.4193669527896996, "grad_norm": 0.41015625, "learning_rate": 4.9620411702355685e-06, "loss": 2.2845, "step": 7817 }, { "epoch": 0.4194206008583691, "grad_norm": 0.490234375, "learning_rate": 4.962026087100468e-06, "loss": 2.3585, "step": 7818 }, { "epoch": 0.4194742489270386, "grad_norm": 0.353515625, "learning_rate": 4.962011000992214e-06, "loss": 2.2143, "step": 7819 }, { "epoch": 0.41952789699570814, "grad_norm": 0.37109375, "learning_rate": 4.961995911910827e-06, "loss": 2.3569, "step": 7820 }, { "epoch": 0.41958154506437767, "grad_norm": 0.4609375, "learning_rate": 4.961980819856324e-06, "loss": 2.3607, "step": 7821 }, { "epoch": 0.4196351931330472, "grad_norm": 0.37109375, "learning_rate": 4.961965724828724e-06, "loss": 1.8552, "step": 7822 }, { "epoch": 0.41968884120171673, "grad_norm": 0.49609375, "learning_rate": 4.9619506268280445e-06, "loss": 2.2538, "step": 7823 }, { "epoch": 0.41974248927038627, "grad_norm": 0.376953125, "learning_rate": 4.9619355258543055e-06, "loss": 2.4059, "step": 7824 }, { "epoch": 0.4197961373390558, "grad_norm": 0.392578125, "learning_rate": 4.961920421907523e-06, "loss": 2.2968, "step": 7825 }, { "epoch": 0.41984978540772533, "grad_norm": 0.396484375, "learning_rate": 4.961905314987716e-06, "loss": 1.9111, "step": 7826 }, { "epoch": 0.41990343347639486, "grad_norm": 0.404296875, "learning_rate": 4.961890205094904e-06, "loss": 2.227, "step": 7827 }, { "epoch": 0.4199570815450644, "grad_norm": 0.4296875, "learning_rate": 4.961875092229103e-06, "loss": 2.336, "step": 7828 }, { "epoch": 0.4200107296137339, "grad_norm": 0.5, "learning_rate": 4.961859976390333e-06, "loss": 2.1967, "step": 7829 }, { "epoch": 0.42006437768240346, "grad_norm": 0.421875, "learning_rate": 4.961844857578612e-06, "loss": 2.2761, "step": 7830 }, { "epoch": 0.420118025751073, "grad_norm": 0.4921875, "learning_rate": 4.961829735793957e-06, "loss": 1.6034, "step": 7831 }, { "epoch": 0.42017167381974246, "grad_norm": 0.412109375, "learning_rate": 4.961814611036387e-06, "loss": 2.1209, "step": 7832 }, { "epoch": 0.420225321888412, "grad_norm": 0.380859375, "learning_rate": 4.961799483305922e-06, "loss": 2.1807, "step": 7833 }, { "epoch": 0.4202789699570815, "grad_norm": 0.48046875, "learning_rate": 4.961784352602576e-06, "loss": 2.2541, "step": 7834 }, { "epoch": 0.42033261802575106, "grad_norm": 0.41015625, "learning_rate": 4.961769218926372e-06, "loss": 2.2254, "step": 7835 }, { "epoch": 0.4203862660944206, "grad_norm": 0.431640625, "learning_rate": 4.961754082277327e-06, "loss": 2.3466, "step": 7836 }, { "epoch": 0.4204399141630901, "grad_norm": 0.41796875, "learning_rate": 4.961738942655457e-06, "loss": 2.2847, "step": 7837 }, { "epoch": 0.42049356223175965, "grad_norm": 0.48046875, "learning_rate": 4.9617238000607825e-06, "loss": 2.8409, "step": 7838 }, { "epoch": 0.4205472103004292, "grad_norm": 0.349609375, "learning_rate": 4.961708654493321e-06, "loss": 2.2808, "step": 7839 }, { "epoch": 0.4206008583690987, "grad_norm": 0.38671875, "learning_rate": 4.9616935059530915e-06, "loss": 2.0836, "step": 7840 }, { "epoch": 0.42065450643776825, "grad_norm": 0.486328125, "learning_rate": 4.96167835444011e-06, "loss": 2.266, "step": 7841 }, { "epoch": 0.4207081545064378, "grad_norm": 0.34765625, "learning_rate": 4.961663199954399e-06, "loss": 1.9926, "step": 7842 }, { "epoch": 0.4207618025751073, "grad_norm": 0.4140625, "learning_rate": 4.961648042495972e-06, "loss": 2.0401, "step": 7843 }, { "epoch": 0.42081545064377684, "grad_norm": 0.4296875, "learning_rate": 4.961632882064851e-06, "loss": 2.3802, "step": 7844 }, { "epoch": 0.4208690987124464, "grad_norm": 0.7265625, "learning_rate": 4.961617718661052e-06, "loss": 2.339, "step": 7845 }, { "epoch": 0.42092274678111585, "grad_norm": 0.390625, "learning_rate": 4.961602552284595e-06, "loss": 2.2678, "step": 7846 }, { "epoch": 0.4209763948497854, "grad_norm": 0.4453125, "learning_rate": 4.9615873829354965e-06, "loss": 1.9648, "step": 7847 }, { "epoch": 0.4210300429184549, "grad_norm": 0.400390625, "learning_rate": 4.961572210613777e-06, "loss": 2.2841, "step": 7848 }, { "epoch": 0.42108369098712445, "grad_norm": 1.0859375, "learning_rate": 4.961557035319453e-06, "loss": 2.4025, "step": 7849 }, { "epoch": 0.421137339055794, "grad_norm": 0.47265625, "learning_rate": 4.9615418570525435e-06, "loss": 2.2567, "step": 7850 }, { "epoch": 0.4211909871244635, "grad_norm": 0.384765625, "learning_rate": 4.961526675813067e-06, "loss": 2.4522, "step": 7851 }, { "epoch": 0.42124463519313304, "grad_norm": 0.412109375, "learning_rate": 4.961511491601042e-06, "loss": 2.225, "step": 7852 }, { "epoch": 0.4212982832618026, "grad_norm": 0.4296875, "learning_rate": 4.961496304416485e-06, "loss": 2.2466, "step": 7853 }, { "epoch": 0.4213519313304721, "grad_norm": 0.3984375, "learning_rate": 4.961481114259418e-06, "loss": 2.2549, "step": 7854 }, { "epoch": 0.42140557939914164, "grad_norm": 0.38671875, "learning_rate": 4.961465921129856e-06, "loss": 2.2126, "step": 7855 }, { "epoch": 0.42145922746781117, "grad_norm": 0.376953125, "learning_rate": 4.961450725027819e-06, "loss": 2.2105, "step": 7856 }, { "epoch": 0.4215128755364807, "grad_norm": 0.4296875, "learning_rate": 4.961435525953324e-06, "loss": 2.2368, "step": 7857 }, { "epoch": 0.42156652360515023, "grad_norm": 0.3828125, "learning_rate": 4.961420323906391e-06, "loss": 2.3385, "step": 7858 }, { "epoch": 0.42162017167381977, "grad_norm": 0.3828125, "learning_rate": 4.961405118887037e-06, "loss": 2.1811, "step": 7859 }, { "epoch": 0.4216738197424893, "grad_norm": 0.4140625, "learning_rate": 4.961389910895282e-06, "loss": 2.4892, "step": 7860 }, { "epoch": 0.4217274678111588, "grad_norm": 0.41015625, "learning_rate": 4.961374699931143e-06, "loss": 2.4166, "step": 7861 }, { "epoch": 0.4217811158798283, "grad_norm": 0.486328125, "learning_rate": 4.961359485994638e-06, "loss": 2.1111, "step": 7862 }, { "epoch": 0.42183476394849784, "grad_norm": 0.3828125, "learning_rate": 4.9613442690857864e-06, "loss": 2.3064, "step": 7863 }, { "epoch": 0.42188841201716737, "grad_norm": 0.369140625, "learning_rate": 4.961329049204606e-06, "loss": 2.2226, "step": 7864 }, { "epoch": 0.4219420600858369, "grad_norm": 0.416015625, "learning_rate": 4.9613138263511166e-06, "loss": 2.6043, "step": 7865 }, { "epoch": 0.42199570815450643, "grad_norm": 0.41796875, "learning_rate": 4.9612986005253335e-06, "loss": 2.2804, "step": 7866 }, { "epoch": 0.42204935622317596, "grad_norm": 0.44140625, "learning_rate": 4.961283371727278e-06, "loss": 2.401, "step": 7867 }, { "epoch": 0.4221030042918455, "grad_norm": 0.412109375, "learning_rate": 4.961268139956968e-06, "loss": 1.961, "step": 7868 }, { "epoch": 0.42215665236051503, "grad_norm": 0.40625, "learning_rate": 4.9612529052144205e-06, "loss": 2.2333, "step": 7869 }, { "epoch": 0.42221030042918456, "grad_norm": 0.5234375, "learning_rate": 4.961237667499656e-06, "loss": 2.3525, "step": 7870 }, { "epoch": 0.4222639484978541, "grad_norm": 0.482421875, "learning_rate": 4.96122242681269e-06, "loss": 2.1954, "step": 7871 }, { "epoch": 0.4223175965665236, "grad_norm": 0.484375, "learning_rate": 4.961207183153544e-06, "loss": 2.1564, "step": 7872 }, { "epoch": 0.42237124463519315, "grad_norm": 0.40625, "learning_rate": 4.9611919365222335e-06, "loss": 2.1566, "step": 7873 }, { "epoch": 0.4224248927038627, "grad_norm": 0.4765625, "learning_rate": 4.9611766869187796e-06, "loss": 2.0647, "step": 7874 }, { "epoch": 0.42247854077253216, "grad_norm": 0.384765625, "learning_rate": 4.961161434343199e-06, "loss": 2.4994, "step": 7875 }, { "epoch": 0.4225321888412017, "grad_norm": 0.3984375, "learning_rate": 4.961146178795511e-06, "loss": 1.8788, "step": 7876 }, { "epoch": 0.4225858369098712, "grad_norm": 0.359375, "learning_rate": 4.961130920275733e-06, "loss": 2.3078, "step": 7877 }, { "epoch": 0.42263948497854076, "grad_norm": 0.435546875, "learning_rate": 4.961115658783885e-06, "loss": 2.4538, "step": 7878 }, { "epoch": 0.4226931330472103, "grad_norm": 0.51171875, "learning_rate": 4.961100394319983e-06, "loss": 1.3789, "step": 7879 }, { "epoch": 0.4227467811158798, "grad_norm": 0.45703125, "learning_rate": 4.9610851268840486e-06, "loss": 2.3267, "step": 7880 }, { "epoch": 0.42280042918454935, "grad_norm": 0.498046875, "learning_rate": 4.9610698564760975e-06, "loss": 2.245, "step": 7881 }, { "epoch": 0.4228540772532189, "grad_norm": 0.322265625, "learning_rate": 4.96105458309615e-06, "loss": 1.9979, "step": 7882 }, { "epoch": 0.4229077253218884, "grad_norm": 0.390625, "learning_rate": 4.961039306744223e-06, "loss": 2.1072, "step": 7883 }, { "epoch": 0.42296137339055795, "grad_norm": 0.5, "learning_rate": 4.9610240274203355e-06, "loss": 2.3876, "step": 7884 }, { "epoch": 0.4230150214592275, "grad_norm": 0.33203125, "learning_rate": 4.961008745124507e-06, "loss": 1.8568, "step": 7885 }, { "epoch": 0.423068669527897, "grad_norm": 0.380859375, "learning_rate": 4.960993459856756e-06, "loss": 1.9841, "step": 7886 }, { "epoch": 0.42312231759656654, "grad_norm": 0.45703125, "learning_rate": 4.9609781716170975e-06, "loss": 2.3894, "step": 7887 }, { "epoch": 0.4231759656652361, "grad_norm": 0.380859375, "learning_rate": 4.960962880405554e-06, "loss": 2.1788, "step": 7888 }, { "epoch": 0.42322961373390555, "grad_norm": 0.408203125, "learning_rate": 4.960947586222143e-06, "loss": 2.0377, "step": 7889 }, { "epoch": 0.4232832618025751, "grad_norm": 0.423828125, "learning_rate": 4.960932289066882e-06, "loss": 2.6103, "step": 7890 }, { "epoch": 0.4233369098712446, "grad_norm": 0.3828125, "learning_rate": 4.96091698893979e-06, "loss": 2.1484, "step": 7891 }, { "epoch": 0.42339055793991415, "grad_norm": 0.421875, "learning_rate": 4.9609016858408855e-06, "loss": 2.3982, "step": 7892 }, { "epoch": 0.4234442060085837, "grad_norm": 0.412109375, "learning_rate": 4.960886379770187e-06, "loss": 2.326, "step": 7893 }, { "epoch": 0.4234978540772532, "grad_norm": 0.39453125, "learning_rate": 4.960871070727712e-06, "loss": 2.4412, "step": 7894 }, { "epoch": 0.42355150214592274, "grad_norm": 0.4921875, "learning_rate": 4.9608557587134805e-06, "loss": 2.4618, "step": 7895 }, { "epoch": 0.4236051502145923, "grad_norm": 0.373046875, "learning_rate": 4.96084044372751e-06, "loss": 2.3485, "step": 7896 }, { "epoch": 0.4236587982832618, "grad_norm": 0.40234375, "learning_rate": 4.96082512576982e-06, "loss": 2.1653, "step": 7897 }, { "epoch": 0.42371244635193134, "grad_norm": 0.3671875, "learning_rate": 4.960809804840428e-06, "loss": 2.3013, "step": 7898 }, { "epoch": 0.42376609442060087, "grad_norm": 0.4375, "learning_rate": 4.960794480939353e-06, "loss": 2.1685, "step": 7899 }, { "epoch": 0.4238197424892704, "grad_norm": 0.416015625, "learning_rate": 4.960779154066614e-06, "loss": 2.3712, "step": 7900 }, { "epoch": 0.42387339055793993, "grad_norm": 0.421875, "learning_rate": 4.960763824222228e-06, "loss": 2.3379, "step": 7901 }, { "epoch": 0.42392703862660946, "grad_norm": 0.365234375, "learning_rate": 4.960748491406215e-06, "loss": 2.2299, "step": 7902 }, { "epoch": 0.423980686695279, "grad_norm": 0.37109375, "learning_rate": 4.9607331556185914e-06, "loss": 2.2201, "step": 7903 }, { "epoch": 0.4240343347639485, "grad_norm": 0.515625, "learning_rate": 4.9607178168593785e-06, "loss": 2.3933, "step": 7904 }, { "epoch": 0.424087982832618, "grad_norm": 0.3671875, "learning_rate": 4.960702475128593e-06, "loss": 2.1518, "step": 7905 }, { "epoch": 0.42414163090128754, "grad_norm": 0.43359375, "learning_rate": 4.960687130426254e-06, "loss": 2.4916, "step": 7906 }, { "epoch": 0.42419527896995707, "grad_norm": 0.5625, "learning_rate": 4.9606717827523805e-06, "loss": 2.3881, "step": 7907 }, { "epoch": 0.4242489270386266, "grad_norm": 0.392578125, "learning_rate": 4.9606564321069906e-06, "loss": 2.191, "step": 7908 }, { "epoch": 0.42430257510729613, "grad_norm": 0.4375, "learning_rate": 4.960641078490102e-06, "loss": 2.2143, "step": 7909 }, { "epoch": 0.42435622317596566, "grad_norm": 0.42578125, "learning_rate": 4.960625721901735e-06, "loss": 2.5026, "step": 7910 }, { "epoch": 0.4244098712446352, "grad_norm": 0.435546875, "learning_rate": 4.960610362341906e-06, "loss": 2.3123, "step": 7911 }, { "epoch": 0.4244635193133047, "grad_norm": 0.6484375, "learning_rate": 4.960594999810636e-06, "loss": 2.2014, "step": 7912 }, { "epoch": 0.42451716738197426, "grad_norm": 0.37109375, "learning_rate": 4.960579634307941e-06, "loss": 2.1866, "step": 7913 }, { "epoch": 0.4245708154506438, "grad_norm": 0.431640625, "learning_rate": 4.960564265833842e-06, "loss": 2.2825, "step": 7914 }, { "epoch": 0.4246244635193133, "grad_norm": 0.37890625, "learning_rate": 4.960548894388355e-06, "loss": 2.3632, "step": 7915 }, { "epoch": 0.42467811158798285, "grad_norm": 0.490234375, "learning_rate": 4.960533519971501e-06, "loss": 1.5035, "step": 7916 }, { "epoch": 0.4247317596566524, "grad_norm": 0.455078125, "learning_rate": 4.9605181425832975e-06, "loss": 2.3493, "step": 7917 }, { "epoch": 0.42478540772532186, "grad_norm": 0.416015625, "learning_rate": 4.960502762223762e-06, "loss": 2.3792, "step": 7918 }, { "epoch": 0.4248390557939914, "grad_norm": 0.412109375, "learning_rate": 4.960487378892915e-06, "loss": 2.3741, "step": 7919 }, { "epoch": 0.4248927038626609, "grad_norm": 0.3828125, "learning_rate": 4.960471992590774e-06, "loss": 2.2964, "step": 7920 }, { "epoch": 0.42494635193133046, "grad_norm": 0.443359375, "learning_rate": 4.960456603317358e-06, "loss": 2.4697, "step": 7921 }, { "epoch": 0.425, "grad_norm": 0.408203125, "learning_rate": 4.960441211072686e-06, "loss": 2.268, "step": 7922 }, { "epoch": 0.4250536480686695, "grad_norm": 0.453125, "learning_rate": 4.960425815856776e-06, "loss": 2.4646, "step": 7923 }, { "epoch": 0.42510729613733905, "grad_norm": 0.41015625, "learning_rate": 4.960410417669646e-06, "loss": 2.4699, "step": 7924 }, { "epoch": 0.4251609442060086, "grad_norm": 0.5390625, "learning_rate": 4.9603950165113145e-06, "loss": 1.2588, "step": 7925 }, { "epoch": 0.4252145922746781, "grad_norm": 0.455078125, "learning_rate": 4.960379612381801e-06, "loss": 2.4682, "step": 7926 }, { "epoch": 0.42526824034334765, "grad_norm": 0.388671875, "learning_rate": 4.960364205281124e-06, "loss": 2.2737, "step": 7927 }, { "epoch": 0.4253218884120172, "grad_norm": 0.421875, "learning_rate": 4.9603487952093025e-06, "loss": 2.2998, "step": 7928 }, { "epoch": 0.4253755364806867, "grad_norm": 0.3125, "learning_rate": 4.960333382166354e-06, "loss": 2.2808, "step": 7929 }, { "epoch": 0.42542918454935624, "grad_norm": 0.431640625, "learning_rate": 4.960317966152298e-06, "loss": 2.0365, "step": 7930 }, { "epoch": 0.4254828326180258, "grad_norm": 0.408203125, "learning_rate": 4.960302547167153e-06, "loss": 2.3105, "step": 7931 }, { "epoch": 0.4255364806866953, "grad_norm": 0.373046875, "learning_rate": 4.960287125210936e-06, "loss": 2.2626, "step": 7932 }, { "epoch": 0.4255901287553648, "grad_norm": 0.435546875, "learning_rate": 4.960271700283669e-06, "loss": 1.7465, "step": 7933 }, { "epoch": 0.4256437768240343, "grad_norm": 0.390625, "learning_rate": 4.960256272385367e-06, "loss": 2.192, "step": 7934 }, { "epoch": 0.42569742489270385, "grad_norm": 0.546875, "learning_rate": 4.960240841516052e-06, "loss": 2.3274, "step": 7935 }, { "epoch": 0.4257510729613734, "grad_norm": 0.390625, "learning_rate": 4.960225407675739e-06, "loss": 2.3632, "step": 7936 }, { "epoch": 0.4258047210300429, "grad_norm": 0.396484375, "learning_rate": 4.96020997086445e-06, "loss": 2.4398, "step": 7937 }, { "epoch": 0.42585836909871244, "grad_norm": 0.44921875, "learning_rate": 4.960194531082202e-06, "loss": 2.4335, "step": 7938 }, { "epoch": 0.425912017167382, "grad_norm": 0.419921875, "learning_rate": 4.960179088329014e-06, "loss": 2.3527, "step": 7939 }, { "epoch": 0.4259656652360515, "grad_norm": 0.44140625, "learning_rate": 4.960163642604905e-06, "loss": 1.5932, "step": 7940 }, { "epoch": 0.42601931330472104, "grad_norm": 0.431640625, "learning_rate": 4.960148193909892e-06, "loss": 2.493, "step": 7941 }, { "epoch": 0.42607296137339057, "grad_norm": 0.48828125, "learning_rate": 4.960132742243995e-06, "loss": 2.2497, "step": 7942 }, { "epoch": 0.4261266094420601, "grad_norm": 0.47265625, "learning_rate": 4.960117287607233e-06, "loss": 2.4199, "step": 7943 }, { "epoch": 0.42618025751072963, "grad_norm": 0.34765625, "learning_rate": 4.960101829999623e-06, "loss": 2.1938, "step": 7944 }, { "epoch": 0.42623390557939916, "grad_norm": 0.466796875, "learning_rate": 4.9600863694211864e-06, "loss": 2.0799, "step": 7945 }, { "epoch": 0.4262875536480687, "grad_norm": 0.8984375, "learning_rate": 4.960070905871939e-06, "loss": 2.523, "step": 7946 }, { "epoch": 0.42634120171673817, "grad_norm": 0.41015625, "learning_rate": 4.9600554393519015e-06, "loss": 2.3288, "step": 7947 }, { "epoch": 0.4263948497854077, "grad_norm": 0.546875, "learning_rate": 4.9600399698610916e-06, "loss": 2.2673, "step": 7948 }, { "epoch": 0.42644849785407724, "grad_norm": 0.4140625, "learning_rate": 4.960024497399528e-06, "loss": 2.4253, "step": 7949 }, { "epoch": 0.42650214592274677, "grad_norm": 0.44921875, "learning_rate": 4.960009021967231e-06, "loss": 2.2579, "step": 7950 }, { "epoch": 0.4265557939914163, "grad_norm": 0.37109375, "learning_rate": 4.959993543564216e-06, "loss": 2.5843, "step": 7951 }, { "epoch": 0.42660944206008583, "grad_norm": 0.439453125, "learning_rate": 4.959978062190505e-06, "loss": 2.2202, "step": 7952 }, { "epoch": 0.42666309012875536, "grad_norm": 0.400390625, "learning_rate": 4.959962577846114e-06, "loss": 1.9632, "step": 7953 }, { "epoch": 0.4267167381974249, "grad_norm": 0.412109375, "learning_rate": 4.9599470905310635e-06, "loss": 2.063, "step": 7954 }, { "epoch": 0.4267703862660944, "grad_norm": 0.37109375, "learning_rate": 4.959931600245372e-06, "loss": 2.4682, "step": 7955 }, { "epoch": 0.42682403433476396, "grad_norm": 0.435546875, "learning_rate": 4.959916106989057e-06, "loss": 2.3457, "step": 7956 }, { "epoch": 0.4268776824034335, "grad_norm": 0.373046875, "learning_rate": 4.959900610762139e-06, "loss": 2.281, "step": 7957 }, { "epoch": 0.426931330472103, "grad_norm": 0.40625, "learning_rate": 4.959885111564635e-06, "loss": 2.2625, "step": 7958 }, { "epoch": 0.42698497854077255, "grad_norm": 0.349609375, "learning_rate": 4.959869609396566e-06, "loss": 2.2899, "step": 7959 }, { "epoch": 0.4270386266094421, "grad_norm": 0.416015625, "learning_rate": 4.959854104257948e-06, "loss": 2.2625, "step": 7960 }, { "epoch": 0.42709227467811156, "grad_norm": 0.4453125, "learning_rate": 4.959838596148801e-06, "loss": 2.2374, "step": 7961 }, { "epoch": 0.4271459227467811, "grad_norm": 0.390625, "learning_rate": 4.959823085069144e-06, "loss": 2.0419, "step": 7962 }, { "epoch": 0.4271995708154506, "grad_norm": 0.50390625, "learning_rate": 4.9598075710189955e-06, "loss": 2.4946, "step": 7963 }, { "epoch": 0.42725321888412016, "grad_norm": 0.458984375, "learning_rate": 4.959792053998373e-06, "loss": 1.9956, "step": 7964 }, { "epoch": 0.4273068669527897, "grad_norm": 0.41796875, "learning_rate": 4.959776534007298e-06, "loss": 2.3471, "step": 7965 }, { "epoch": 0.4273605150214592, "grad_norm": 0.359375, "learning_rate": 4.959761011045787e-06, "loss": 2.1836, "step": 7966 }, { "epoch": 0.42741416309012875, "grad_norm": 0.37890625, "learning_rate": 4.95974548511386e-06, "loss": 2.3269, "step": 7967 }, { "epoch": 0.4274678111587983, "grad_norm": 0.3984375, "learning_rate": 4.9597299562115346e-06, "loss": 2.2184, "step": 7968 }, { "epoch": 0.4275214592274678, "grad_norm": 0.427734375, "learning_rate": 4.959714424338829e-06, "loss": 2.2061, "step": 7969 }, { "epoch": 0.42757510729613735, "grad_norm": 0.423828125, "learning_rate": 4.959698889495765e-06, "loss": 2.3496, "step": 7970 }, { "epoch": 0.4276287553648069, "grad_norm": 0.349609375, "learning_rate": 4.959683351682358e-06, "loss": 1.9672, "step": 7971 }, { "epoch": 0.4276824034334764, "grad_norm": 0.58203125, "learning_rate": 4.959667810898629e-06, "loss": 2.1551, "step": 7972 }, { "epoch": 0.42773605150214594, "grad_norm": 0.423828125, "learning_rate": 4.959652267144596e-06, "loss": 2.2166, "step": 7973 }, { "epoch": 0.4277896995708155, "grad_norm": 0.44921875, "learning_rate": 4.959636720420277e-06, "loss": 2.3635, "step": 7974 }, { "epoch": 0.427843347639485, "grad_norm": 0.3828125, "learning_rate": 4.9596211707256915e-06, "loss": 2.3172, "step": 7975 }, { "epoch": 0.4278969957081545, "grad_norm": 0.40625, "learning_rate": 4.959605618060859e-06, "loss": 2.3959, "step": 7976 }, { "epoch": 0.427950643776824, "grad_norm": 1.5390625, "learning_rate": 4.959590062425796e-06, "loss": 2.3445, "step": 7977 }, { "epoch": 0.42800429184549355, "grad_norm": 0.4140625, "learning_rate": 4.9595745038205245e-06, "loss": 2.2489, "step": 7978 }, { "epoch": 0.4280579399141631, "grad_norm": 0.41015625, "learning_rate": 4.959558942245061e-06, "loss": 2.3266, "step": 7979 }, { "epoch": 0.4281115879828326, "grad_norm": 1.359375, "learning_rate": 4.959543377699425e-06, "loss": 2.3644, "step": 7980 }, { "epoch": 0.42816523605150214, "grad_norm": 0.43359375, "learning_rate": 4.9595278101836355e-06, "loss": 2.3272, "step": 7981 }, { "epoch": 0.4282188841201717, "grad_norm": 0.375, "learning_rate": 4.95951223969771e-06, "loss": 2.4884, "step": 7982 }, { "epoch": 0.4282725321888412, "grad_norm": 0.498046875, "learning_rate": 4.95949666624167e-06, "loss": 2.4526, "step": 7983 }, { "epoch": 0.42832618025751074, "grad_norm": 0.400390625, "learning_rate": 4.959481089815531e-06, "loss": 2.3709, "step": 7984 }, { "epoch": 0.42837982832618027, "grad_norm": 0.375, "learning_rate": 4.959465510419314e-06, "loss": 2.2641, "step": 7985 }, { "epoch": 0.4284334763948498, "grad_norm": 0.6015625, "learning_rate": 4.959449928053037e-06, "loss": 2.284, "step": 7986 }, { "epoch": 0.42848712446351933, "grad_norm": 0.4140625, "learning_rate": 4.959434342716719e-06, "loss": 2.4182, "step": 7987 }, { "epoch": 0.42854077253218886, "grad_norm": 0.396484375, "learning_rate": 4.9594187544103785e-06, "loss": 2.2429, "step": 7988 }, { "epoch": 0.4285944206008584, "grad_norm": 0.546875, "learning_rate": 4.959403163134036e-06, "loss": 1.9282, "step": 7989 }, { "epoch": 0.42864806866952787, "grad_norm": 0.423828125, "learning_rate": 4.959387568887708e-06, "loss": 2.277, "step": 7990 }, { "epoch": 0.4287017167381974, "grad_norm": 0.361328125, "learning_rate": 4.9593719716714155e-06, "loss": 1.948, "step": 7991 }, { "epoch": 0.42875536480686693, "grad_norm": 0.30859375, "learning_rate": 4.959356371485175e-06, "loss": 1.8599, "step": 7992 }, { "epoch": 0.42880901287553647, "grad_norm": 0.357421875, "learning_rate": 4.959340768329007e-06, "loss": 2.3564, "step": 7993 }, { "epoch": 0.428862660944206, "grad_norm": 0.35546875, "learning_rate": 4.959325162202929e-06, "loss": 1.8286, "step": 7994 }, { "epoch": 0.42891630901287553, "grad_norm": 0.388671875, "learning_rate": 4.959309553106962e-06, "loss": 2.4908, "step": 7995 }, { "epoch": 0.42896995708154506, "grad_norm": 0.6171875, "learning_rate": 4.9592939410411235e-06, "loss": 2.261, "step": 7996 }, { "epoch": 0.4290236051502146, "grad_norm": 0.40234375, "learning_rate": 4.959278326005432e-06, "loss": 2.2056, "step": 7997 }, { "epoch": 0.4290772532188841, "grad_norm": 0.44140625, "learning_rate": 4.959262707999906e-06, "loss": 2.3836, "step": 7998 }, { "epoch": 0.42913090128755366, "grad_norm": 0.384765625, "learning_rate": 4.959247087024567e-06, "loss": 2.1569, "step": 7999 }, { "epoch": 0.4291845493562232, "grad_norm": 0.359375, "learning_rate": 4.95923146307943e-06, "loss": 2.1582, "step": 8000 }, { "epoch": 0.4292381974248927, "grad_norm": 0.7421875, "learning_rate": 4.959215836164517e-06, "loss": 2.1968, "step": 8001 }, { "epoch": 0.42929184549356225, "grad_norm": 0.408203125, "learning_rate": 4.959200206279845e-06, "loss": 2.2173, "step": 8002 }, { "epoch": 0.4293454935622318, "grad_norm": 0.734375, "learning_rate": 4.959184573425434e-06, "loss": 1.7087, "step": 8003 }, { "epoch": 0.42939914163090126, "grad_norm": 0.42578125, "learning_rate": 4.959168937601303e-06, "loss": 2.5078, "step": 8004 }, { "epoch": 0.4294527896995708, "grad_norm": 0.39453125, "learning_rate": 4.959153298807469e-06, "loss": 2.2555, "step": 8005 }, { "epoch": 0.4295064377682403, "grad_norm": 0.4375, "learning_rate": 4.9591376570439544e-06, "loss": 2.1305, "step": 8006 }, { "epoch": 0.42956008583690986, "grad_norm": 0.380859375, "learning_rate": 4.959122012310775e-06, "loss": 2.1662, "step": 8007 }, { "epoch": 0.4296137339055794, "grad_norm": 0.412109375, "learning_rate": 4.95910636460795e-06, "loss": 2.3101, "step": 8008 }, { "epoch": 0.4296673819742489, "grad_norm": 0.369140625, "learning_rate": 4.959090713935499e-06, "loss": 2.5275, "step": 8009 }, { "epoch": 0.42972103004291845, "grad_norm": 0.466796875, "learning_rate": 4.959075060293442e-06, "loss": 2.0155, "step": 8010 }, { "epoch": 0.429774678111588, "grad_norm": 0.5, "learning_rate": 4.959059403681795e-06, "loss": 2.2857, "step": 8011 }, { "epoch": 0.4298283261802575, "grad_norm": 0.482421875, "learning_rate": 4.95904374410058e-06, "loss": 2.4707, "step": 8012 }, { "epoch": 0.42988197424892705, "grad_norm": 0.6796875, "learning_rate": 4.959028081549814e-06, "loss": 1.271, "step": 8013 }, { "epoch": 0.4299356223175966, "grad_norm": 0.375, "learning_rate": 4.959012416029517e-06, "loss": 2.1272, "step": 8014 }, { "epoch": 0.4299892703862661, "grad_norm": 0.4765625, "learning_rate": 4.958996747539707e-06, "loss": 2.4254, "step": 8015 }, { "epoch": 0.43004291845493564, "grad_norm": 0.41015625, "learning_rate": 4.958981076080404e-06, "loss": 2.4272, "step": 8016 }, { "epoch": 0.4300965665236052, "grad_norm": 0.4140625, "learning_rate": 4.958965401651625e-06, "loss": 2.4619, "step": 8017 }, { "epoch": 0.4301502145922747, "grad_norm": 0.40625, "learning_rate": 4.95894972425339e-06, "loss": 2.0785, "step": 8018 }, { "epoch": 0.4302038626609442, "grad_norm": 0.5625, "learning_rate": 4.9589340438857195e-06, "loss": 2.3869, "step": 8019 }, { "epoch": 0.4302575107296137, "grad_norm": 0.41015625, "learning_rate": 4.95891836054863e-06, "loss": 2.3082, "step": 8020 }, { "epoch": 0.43031115879828324, "grad_norm": 0.466796875, "learning_rate": 4.958902674242142e-06, "loss": 2.3459, "step": 8021 }, { "epoch": 0.4303648068669528, "grad_norm": 0.435546875, "learning_rate": 4.9588869849662735e-06, "loss": 2.3565, "step": 8022 }, { "epoch": 0.4304184549356223, "grad_norm": 0.435546875, "learning_rate": 4.958871292721045e-06, "loss": 2.3388, "step": 8023 }, { "epoch": 0.43047210300429184, "grad_norm": 0.4609375, "learning_rate": 4.958855597506473e-06, "loss": 2.2986, "step": 8024 }, { "epoch": 0.43052575107296137, "grad_norm": 0.3828125, "learning_rate": 4.958839899322578e-06, "loss": 2.5218, "step": 8025 }, { "epoch": 0.4305793991416309, "grad_norm": 0.39453125, "learning_rate": 4.958824198169379e-06, "loss": 2.3665, "step": 8026 }, { "epoch": 0.43063304721030043, "grad_norm": 0.49609375, "learning_rate": 4.958808494046895e-06, "loss": 2.0179, "step": 8027 }, { "epoch": 0.43068669527896997, "grad_norm": 0.6171875, "learning_rate": 4.958792786955144e-06, "loss": 2.2723, "step": 8028 }, { "epoch": 0.4307403433476395, "grad_norm": 0.400390625, "learning_rate": 4.958777076894146e-06, "loss": 2.3031, "step": 8029 }, { "epoch": 0.43079399141630903, "grad_norm": 0.4140625, "learning_rate": 4.958761363863919e-06, "loss": 2.2066, "step": 8030 }, { "epoch": 0.43084763948497856, "grad_norm": 0.46484375, "learning_rate": 4.958745647864483e-06, "loss": 2.3845, "step": 8031 }, { "epoch": 0.4309012875536481, "grad_norm": 0.6328125, "learning_rate": 4.9587299288958566e-06, "loss": 2.263, "step": 8032 }, { "epoch": 0.43095493562231757, "grad_norm": 0.3984375, "learning_rate": 4.958714206958058e-06, "loss": 2.0282, "step": 8033 }, { "epoch": 0.4310085836909871, "grad_norm": 0.42578125, "learning_rate": 4.958698482051109e-06, "loss": 2.3013, "step": 8034 }, { "epoch": 0.43106223175965663, "grad_norm": 0.515625, "learning_rate": 4.9586827541750245e-06, "loss": 2.1037, "step": 8035 }, { "epoch": 0.43111587982832617, "grad_norm": 0.462890625, "learning_rate": 4.958667023329826e-06, "loss": 2.2946, "step": 8036 }, { "epoch": 0.4311695278969957, "grad_norm": 0.400390625, "learning_rate": 4.9586512895155325e-06, "loss": 2.4553, "step": 8037 }, { "epoch": 0.43122317596566523, "grad_norm": 0.56640625, "learning_rate": 4.958635552732161e-06, "loss": 2.3896, "step": 8038 }, { "epoch": 0.43127682403433476, "grad_norm": 0.453125, "learning_rate": 4.958619812979734e-06, "loss": 2.5771, "step": 8039 }, { "epoch": 0.4313304721030043, "grad_norm": 0.337890625, "learning_rate": 4.958604070258267e-06, "loss": 2.1336, "step": 8040 }, { "epoch": 0.4313841201716738, "grad_norm": 0.400390625, "learning_rate": 4.958588324567781e-06, "loss": 2.1954, "step": 8041 }, { "epoch": 0.43143776824034336, "grad_norm": 0.470703125, "learning_rate": 4.958572575908295e-06, "loss": 2.4137, "step": 8042 }, { "epoch": 0.4314914163090129, "grad_norm": 0.451171875, "learning_rate": 4.958556824279827e-06, "loss": 2.3492, "step": 8043 }, { "epoch": 0.4315450643776824, "grad_norm": 0.47265625, "learning_rate": 4.958541069682397e-06, "loss": 2.2884, "step": 8044 }, { "epoch": 0.43159871244635195, "grad_norm": 0.470703125, "learning_rate": 4.9585253121160236e-06, "loss": 2.3819, "step": 8045 }, { "epoch": 0.4316523605150215, "grad_norm": 0.390625, "learning_rate": 4.958509551580725e-06, "loss": 2.398, "step": 8046 }, { "epoch": 0.431706008583691, "grad_norm": 0.36328125, "learning_rate": 4.958493788076522e-06, "loss": 2.0067, "step": 8047 }, { "epoch": 0.4317596566523605, "grad_norm": 0.83984375, "learning_rate": 4.958478021603432e-06, "loss": 2.4478, "step": 8048 }, { "epoch": 0.43181330472103, "grad_norm": 0.3828125, "learning_rate": 4.958462252161475e-06, "loss": 2.327, "step": 8049 }, { "epoch": 0.43186695278969955, "grad_norm": 1.3046875, "learning_rate": 4.958446479750669e-06, "loss": 2.4864, "step": 8050 }, { "epoch": 0.4319206008583691, "grad_norm": 0.71875, "learning_rate": 4.958430704371035e-06, "loss": 2.3356, "step": 8051 }, { "epoch": 0.4319742489270386, "grad_norm": 0.392578125, "learning_rate": 4.95841492602259e-06, "loss": 2.1103, "step": 8052 }, { "epoch": 0.43202789699570815, "grad_norm": 0.4296875, "learning_rate": 4.958399144705355e-06, "loss": 2.3007, "step": 8053 }, { "epoch": 0.4320815450643777, "grad_norm": 0.42578125, "learning_rate": 4.958383360419348e-06, "loss": 2.2864, "step": 8054 }, { "epoch": 0.4321351931330472, "grad_norm": 0.474609375, "learning_rate": 4.958367573164587e-06, "loss": 2.0699, "step": 8055 }, { "epoch": 0.43218884120171674, "grad_norm": 0.412109375, "learning_rate": 4.958351782941092e-06, "loss": 2.2826, "step": 8056 }, { "epoch": 0.4322424892703863, "grad_norm": 0.412109375, "learning_rate": 4.958335989748883e-06, "loss": 2.1728, "step": 8057 }, { "epoch": 0.4322961373390558, "grad_norm": 0.404296875, "learning_rate": 4.9583201935879776e-06, "loss": 2.3802, "step": 8058 }, { "epoch": 0.43234978540772534, "grad_norm": 0.458984375, "learning_rate": 4.958304394458396e-06, "loss": 2.6331, "step": 8059 }, { "epoch": 0.43240343347639487, "grad_norm": 0.451171875, "learning_rate": 4.9582885923601564e-06, "loss": 2.3698, "step": 8060 }, { "epoch": 0.4324570815450644, "grad_norm": 0.400390625, "learning_rate": 4.958272787293279e-06, "loss": 2.4708, "step": 8061 }, { "epoch": 0.4325107296137339, "grad_norm": 0.408203125, "learning_rate": 4.958256979257782e-06, "loss": 2.0744, "step": 8062 }, { "epoch": 0.4325643776824034, "grad_norm": 0.4609375, "learning_rate": 4.9582411682536835e-06, "loss": 2.3632, "step": 8063 }, { "epoch": 0.43261802575107294, "grad_norm": 0.396484375, "learning_rate": 4.9582253542810045e-06, "loss": 2.1996, "step": 8064 }, { "epoch": 0.4326716738197425, "grad_norm": 0.435546875, "learning_rate": 4.958209537339764e-06, "loss": 2.0826, "step": 8065 }, { "epoch": 0.432725321888412, "grad_norm": 0.455078125, "learning_rate": 4.9581937174299795e-06, "loss": 2.2765, "step": 8066 }, { "epoch": 0.43277896995708154, "grad_norm": 0.421875, "learning_rate": 4.958177894551672e-06, "loss": 2.3064, "step": 8067 }, { "epoch": 0.43283261802575107, "grad_norm": 0.42578125, "learning_rate": 4.958162068704859e-06, "loss": 2.3817, "step": 8068 }, { "epoch": 0.4328862660944206, "grad_norm": 0.462890625, "learning_rate": 4.95814623988956e-06, "loss": 2.3497, "step": 8069 }, { "epoch": 0.43293991416309013, "grad_norm": 0.474609375, "learning_rate": 4.958130408105795e-06, "loss": 2.4784, "step": 8070 }, { "epoch": 0.43299356223175967, "grad_norm": 0.7109375, "learning_rate": 4.9581145733535815e-06, "loss": 2.3539, "step": 8071 }, { "epoch": 0.4330472103004292, "grad_norm": 0.38671875, "learning_rate": 4.958098735632941e-06, "loss": 2.3938, "step": 8072 }, { "epoch": 0.43310085836909873, "grad_norm": 0.55078125, "learning_rate": 4.958082894943891e-06, "loss": 2.112, "step": 8073 }, { "epoch": 0.43315450643776826, "grad_norm": 0.41015625, "learning_rate": 4.9580670512864505e-06, "loss": 2.1754, "step": 8074 }, { "epoch": 0.4332081545064378, "grad_norm": 0.8828125, "learning_rate": 4.958051204660639e-06, "loss": 2.4471, "step": 8075 }, { "epoch": 0.43326180257510727, "grad_norm": 0.42578125, "learning_rate": 4.958035355066475e-06, "loss": 2.2365, "step": 8076 }, { "epoch": 0.4333154506437768, "grad_norm": 0.453125, "learning_rate": 4.95801950250398e-06, "loss": 2.3936, "step": 8077 }, { "epoch": 0.43336909871244633, "grad_norm": 0.57421875, "learning_rate": 4.958003646973169e-06, "loss": 2.2648, "step": 8078 }, { "epoch": 0.43342274678111586, "grad_norm": 0.65234375, "learning_rate": 4.957987788474066e-06, "loss": 2.3681, "step": 8079 }, { "epoch": 0.4334763948497854, "grad_norm": 0.375, "learning_rate": 4.957971927006687e-06, "loss": 2.4586, "step": 8080 }, { "epoch": 0.4335300429184549, "grad_norm": 0.54296875, "learning_rate": 4.957956062571051e-06, "loss": 2.2233, "step": 8081 }, { "epoch": 0.43358369098712446, "grad_norm": 0.388671875, "learning_rate": 4.957940195167179e-06, "loss": 1.8253, "step": 8082 }, { "epoch": 0.433637339055794, "grad_norm": 0.390625, "learning_rate": 4.95792432479509e-06, "loss": 2.1295, "step": 8083 }, { "epoch": 0.4336909871244635, "grad_norm": 0.45703125, "learning_rate": 4.957908451454801e-06, "loss": 2.2982, "step": 8084 }, { "epoch": 0.43374463519313305, "grad_norm": 0.45703125, "learning_rate": 4.957892575146332e-06, "loss": 2.5845, "step": 8085 }, { "epoch": 0.4337982832618026, "grad_norm": 0.4609375, "learning_rate": 4.957876695869703e-06, "loss": 2.642, "step": 8086 }, { "epoch": 0.4338519313304721, "grad_norm": 0.421875, "learning_rate": 4.957860813624934e-06, "loss": 2.2777, "step": 8087 }, { "epoch": 0.43390557939914165, "grad_norm": 0.52734375, "learning_rate": 4.957844928412042e-06, "loss": 1.2979, "step": 8088 }, { "epoch": 0.4339592274678112, "grad_norm": 2.8125, "learning_rate": 4.957829040231048e-06, "loss": 2.4542, "step": 8089 }, { "epoch": 0.4340128755364807, "grad_norm": 0.341796875, "learning_rate": 4.95781314908197e-06, "loss": 2.1156, "step": 8090 }, { "epoch": 0.4340665236051502, "grad_norm": 0.427734375, "learning_rate": 4.957797254964829e-06, "loss": 2.3465, "step": 8091 }, { "epoch": 0.4341201716738197, "grad_norm": 0.4296875, "learning_rate": 4.9577813578796406e-06, "loss": 2.4259, "step": 8092 }, { "epoch": 0.43417381974248925, "grad_norm": 0.70703125, "learning_rate": 4.957765457826428e-06, "loss": 1.9396, "step": 8093 }, { "epoch": 0.4342274678111588, "grad_norm": 0.4140625, "learning_rate": 4.957749554805208e-06, "loss": 2.3744, "step": 8094 }, { "epoch": 0.4342811158798283, "grad_norm": 0.380859375, "learning_rate": 4.957733648816e-06, "loss": 2.254, "step": 8095 }, { "epoch": 0.43433476394849785, "grad_norm": 0.435546875, "learning_rate": 4.957717739858824e-06, "loss": 2.432, "step": 8096 }, { "epoch": 0.4343884120171674, "grad_norm": 0.328125, "learning_rate": 4.957701827933699e-06, "loss": 2.236, "step": 8097 }, { "epoch": 0.4344420600858369, "grad_norm": 0.427734375, "learning_rate": 4.957685913040643e-06, "loss": 2.1724, "step": 8098 }, { "epoch": 0.43449570815450644, "grad_norm": 0.4296875, "learning_rate": 4.957669995179677e-06, "loss": 2.2898, "step": 8099 }, { "epoch": 0.434549356223176, "grad_norm": 0.40234375, "learning_rate": 4.957654074350821e-06, "loss": 2.2533, "step": 8100 }, { "epoch": 0.4346030042918455, "grad_norm": 0.52734375, "learning_rate": 4.95763815055409e-06, "loss": 2.2275, "step": 8101 }, { "epoch": 0.43465665236051504, "grad_norm": 0.484375, "learning_rate": 4.957622223789508e-06, "loss": 2.244, "step": 8102 }, { "epoch": 0.43471030042918457, "grad_norm": 0.388671875, "learning_rate": 4.957606294057092e-06, "loss": 2.2234, "step": 8103 }, { "epoch": 0.4347639484978541, "grad_norm": 0.5390625, "learning_rate": 4.957590361356861e-06, "loss": 2.2021, "step": 8104 }, { "epoch": 0.4348175965665236, "grad_norm": 3.734375, "learning_rate": 4.957574425688834e-06, "loss": 2.1746, "step": 8105 }, { "epoch": 0.4348712446351931, "grad_norm": 0.478515625, "learning_rate": 4.9575584870530315e-06, "loss": 2.2655, "step": 8106 }, { "epoch": 0.43492489270386264, "grad_norm": 0.5390625, "learning_rate": 4.957542545449472e-06, "loss": 2.3701, "step": 8107 }, { "epoch": 0.4349785407725322, "grad_norm": 1.046875, "learning_rate": 4.957526600878175e-06, "loss": 2.2571, "step": 8108 }, { "epoch": 0.4350321888412017, "grad_norm": 0.439453125, "learning_rate": 4.9575106533391604e-06, "loss": 2.3607, "step": 8109 }, { "epoch": 0.43508583690987124, "grad_norm": 0.5078125, "learning_rate": 4.957494702832446e-06, "loss": 2.3361, "step": 8110 }, { "epoch": 0.43513948497854077, "grad_norm": 0.328125, "learning_rate": 4.957478749358052e-06, "loss": 2.3465, "step": 8111 }, { "epoch": 0.4351931330472103, "grad_norm": 0.392578125, "learning_rate": 4.957462792915997e-06, "loss": 2.238, "step": 8112 }, { "epoch": 0.43524678111587983, "grad_norm": 0.423828125, "learning_rate": 4.957446833506301e-06, "loss": 2.2959, "step": 8113 }, { "epoch": 0.43530042918454936, "grad_norm": 0.435546875, "learning_rate": 4.957430871128983e-06, "loss": 2.2202, "step": 8114 }, { "epoch": 0.4353540772532189, "grad_norm": 0.390625, "learning_rate": 4.957414905784062e-06, "loss": 2.2341, "step": 8115 }, { "epoch": 0.43540772532188843, "grad_norm": 0.40234375, "learning_rate": 4.9573989374715585e-06, "loss": 2.6308, "step": 8116 }, { "epoch": 0.43546137339055796, "grad_norm": 0.55859375, "learning_rate": 4.957382966191491e-06, "loss": 2.2504, "step": 8117 }, { "epoch": 0.4355150214592275, "grad_norm": 0.95703125, "learning_rate": 4.957366991943877e-06, "loss": 1.8291, "step": 8118 }, { "epoch": 0.435568669527897, "grad_norm": 0.412109375, "learning_rate": 4.957351014728739e-06, "loss": 2.1994, "step": 8119 }, { "epoch": 0.4356223175965665, "grad_norm": 0.455078125, "learning_rate": 4.957335034546094e-06, "loss": 2.2595, "step": 8120 }, { "epoch": 0.43567596566523603, "grad_norm": 0.318359375, "learning_rate": 4.957319051395961e-06, "loss": 2.2829, "step": 8121 }, { "epoch": 0.43572961373390556, "grad_norm": 0.392578125, "learning_rate": 4.957303065278362e-06, "loss": 2.1511, "step": 8122 }, { "epoch": 0.4357832618025751, "grad_norm": 0.392578125, "learning_rate": 4.957287076193313e-06, "loss": 2.1771, "step": 8123 }, { "epoch": 0.4358369098712446, "grad_norm": 1.40625, "learning_rate": 4.957271084140837e-06, "loss": 2.4061, "step": 8124 }, { "epoch": 0.43589055793991416, "grad_norm": 0.375, "learning_rate": 4.9572550891209495e-06, "loss": 2.3211, "step": 8125 }, { "epoch": 0.4359442060085837, "grad_norm": 0.46484375, "learning_rate": 4.957239091133672e-06, "loss": 2.0812, "step": 8126 }, { "epoch": 0.4359978540772532, "grad_norm": 0.412109375, "learning_rate": 4.957223090179023e-06, "loss": 2.3271, "step": 8127 }, { "epoch": 0.43605150214592275, "grad_norm": 0.47265625, "learning_rate": 4.957207086257023e-06, "loss": 2.5012, "step": 8128 }, { "epoch": 0.4361051502145923, "grad_norm": 0.498046875, "learning_rate": 4.95719107936769e-06, "loss": 2.2459, "step": 8129 }, { "epoch": 0.4361587982832618, "grad_norm": 0.341796875, "learning_rate": 4.9571750695110444e-06, "loss": 2.0812, "step": 8130 }, { "epoch": 0.43621244635193135, "grad_norm": 0.54296875, "learning_rate": 4.957159056687104e-06, "loss": 2.2873, "step": 8131 }, { "epoch": 0.4362660944206009, "grad_norm": 0.4765625, "learning_rate": 4.95714304089589e-06, "loss": 2.3767, "step": 8132 }, { "epoch": 0.4363197424892704, "grad_norm": 0.5546875, "learning_rate": 4.9571270221374204e-06, "loss": 2.395, "step": 8133 }, { "epoch": 0.4363733905579399, "grad_norm": 0.43359375, "learning_rate": 4.957111000411715e-06, "loss": 2.5504, "step": 8134 }, { "epoch": 0.4364270386266094, "grad_norm": 0.353515625, "learning_rate": 4.9570949757187935e-06, "loss": 2.2811, "step": 8135 }, { "epoch": 0.43648068669527895, "grad_norm": 0.38671875, "learning_rate": 4.957078948058675e-06, "loss": 2.2342, "step": 8136 }, { "epoch": 0.4365343347639485, "grad_norm": 0.40234375, "learning_rate": 4.957062917431378e-06, "loss": 2.317, "step": 8137 }, { "epoch": 0.436587982832618, "grad_norm": 0.51171875, "learning_rate": 4.957046883836924e-06, "loss": 1.9567, "step": 8138 }, { "epoch": 0.43664163090128755, "grad_norm": 0.53515625, "learning_rate": 4.957030847275329e-06, "loss": 2.5461, "step": 8139 }, { "epoch": 0.4366952789699571, "grad_norm": 0.427734375, "learning_rate": 4.957014807746615e-06, "loss": 2.123, "step": 8140 }, { "epoch": 0.4367489270386266, "grad_norm": 0.443359375, "learning_rate": 4.956998765250801e-06, "loss": 2.3688, "step": 8141 }, { "epoch": 0.43680257510729614, "grad_norm": 0.41015625, "learning_rate": 4.9569827197879065e-06, "loss": 2.104, "step": 8142 }, { "epoch": 0.4368562231759657, "grad_norm": 0.396484375, "learning_rate": 4.95696667135795e-06, "loss": 2.3358, "step": 8143 }, { "epoch": 0.4369098712446352, "grad_norm": 0.423828125, "learning_rate": 4.956950619960952e-06, "loss": 2.328, "step": 8144 }, { "epoch": 0.43696351931330474, "grad_norm": 0.408203125, "learning_rate": 4.95693456559693e-06, "loss": 2.3218, "step": 8145 }, { "epoch": 0.43701716738197427, "grad_norm": 0.59765625, "learning_rate": 4.956918508265905e-06, "loss": 2.3454, "step": 8146 }, { "epoch": 0.4370708154506438, "grad_norm": 2.3125, "learning_rate": 4.956902447967897e-06, "loss": 2.3481, "step": 8147 }, { "epoch": 0.4371244635193133, "grad_norm": 0.4140625, "learning_rate": 4.9568863847029235e-06, "loss": 2.3435, "step": 8148 }, { "epoch": 0.4371781115879828, "grad_norm": 0.490234375, "learning_rate": 4.956870318471005e-06, "loss": 2.496, "step": 8149 }, { "epoch": 0.43723175965665234, "grad_norm": 0.400390625, "learning_rate": 4.9568542492721605e-06, "loss": 2.1905, "step": 8150 }, { "epoch": 0.4372854077253219, "grad_norm": 0.412109375, "learning_rate": 4.95683817710641e-06, "loss": 2.4712, "step": 8151 }, { "epoch": 0.4373390557939914, "grad_norm": 0.62890625, "learning_rate": 4.956822101973773e-06, "loss": 2.3061, "step": 8152 }, { "epoch": 0.43739270386266094, "grad_norm": 0.435546875, "learning_rate": 4.956806023874267e-06, "loss": 2.292, "step": 8153 }, { "epoch": 0.43744635193133047, "grad_norm": 0.369140625, "learning_rate": 4.9567899428079134e-06, "loss": 2.2152, "step": 8154 }, { "epoch": 0.4375, "grad_norm": 0.421875, "learning_rate": 4.9567738587747314e-06, "loss": 2.0381, "step": 8155 }, { "epoch": 0.43755364806866953, "grad_norm": 0.416015625, "learning_rate": 4.95675777177474e-06, "loss": 2.1429, "step": 8156 }, { "epoch": 0.43760729613733906, "grad_norm": 0.41796875, "learning_rate": 4.956741681807959e-06, "loss": 2.336, "step": 8157 }, { "epoch": 0.4376609442060086, "grad_norm": 0.4140625, "learning_rate": 4.956725588874407e-06, "loss": 2.182, "step": 8158 }, { "epoch": 0.4377145922746781, "grad_norm": 0.4140625, "learning_rate": 4.956709492974104e-06, "loss": 2.1457, "step": 8159 }, { "epoch": 0.43776824034334766, "grad_norm": 0.37109375, "learning_rate": 4.956693394107069e-06, "loss": 2.07, "step": 8160 }, { "epoch": 0.4378218884120172, "grad_norm": 0.40625, "learning_rate": 4.956677292273323e-06, "loss": 2.1196, "step": 8161 }, { "epoch": 0.4378755364806867, "grad_norm": 0.435546875, "learning_rate": 4.9566611874728835e-06, "loss": 2.3341, "step": 8162 }, { "epoch": 0.4379291845493562, "grad_norm": 0.396484375, "learning_rate": 4.9566450797057706e-06, "loss": 2.361, "step": 8163 }, { "epoch": 0.43798283261802573, "grad_norm": 0.390625, "learning_rate": 4.956628968972004e-06, "loss": 2.5055, "step": 8164 }, { "epoch": 0.43803648068669526, "grad_norm": 0.67578125, "learning_rate": 4.9566128552716035e-06, "loss": 2.4253, "step": 8165 }, { "epoch": 0.4380901287553648, "grad_norm": 0.427734375, "learning_rate": 4.9565967386045875e-06, "loss": 2.2375, "step": 8166 }, { "epoch": 0.4381437768240343, "grad_norm": 0.41015625, "learning_rate": 4.956580618970977e-06, "loss": 1.4941, "step": 8167 }, { "epoch": 0.43819742489270386, "grad_norm": 0.38671875, "learning_rate": 4.95656449637079e-06, "loss": 2.2021, "step": 8168 }, { "epoch": 0.4382510729613734, "grad_norm": 0.328125, "learning_rate": 4.956548370804047e-06, "loss": 1.6805, "step": 8169 }, { "epoch": 0.4383047210300429, "grad_norm": 0.494140625, "learning_rate": 4.956532242270766e-06, "loss": 2.2044, "step": 8170 }, { "epoch": 0.43835836909871245, "grad_norm": 0.41015625, "learning_rate": 4.956516110770968e-06, "loss": 2.481, "step": 8171 }, { "epoch": 0.438412017167382, "grad_norm": 0.474609375, "learning_rate": 4.956499976304672e-06, "loss": 2.3914, "step": 8172 }, { "epoch": 0.4384656652360515, "grad_norm": 0.49609375, "learning_rate": 4.9564838388718975e-06, "loss": 1.5323, "step": 8173 }, { "epoch": 0.43851931330472105, "grad_norm": 0.41796875, "learning_rate": 4.956467698472663e-06, "loss": 2.2745, "step": 8174 }, { "epoch": 0.4385729613733906, "grad_norm": 0.373046875, "learning_rate": 4.95645155510699e-06, "loss": 2.25, "step": 8175 }, { "epoch": 0.4386266094420601, "grad_norm": 0.453125, "learning_rate": 4.9564354087748964e-06, "loss": 2.4289, "step": 8176 }, { "epoch": 0.4386802575107296, "grad_norm": 0.3984375, "learning_rate": 4.956419259476402e-06, "loss": 2.2181, "step": 8177 }, { "epoch": 0.4387339055793991, "grad_norm": 0.419921875, "learning_rate": 4.956403107211527e-06, "loss": 2.2624, "step": 8178 }, { "epoch": 0.43878755364806865, "grad_norm": 0.451171875, "learning_rate": 4.95638695198029e-06, "loss": 2.282, "step": 8179 }, { "epoch": 0.4388412017167382, "grad_norm": 5.34375, "learning_rate": 4.956370793782711e-06, "loss": 2.2885, "step": 8180 }, { "epoch": 0.4388948497854077, "grad_norm": 0.36328125, "learning_rate": 4.9563546326188095e-06, "loss": 1.9653, "step": 8181 }, { "epoch": 0.43894849785407725, "grad_norm": 0.388671875, "learning_rate": 4.9563384684886055e-06, "loss": 1.8764, "step": 8182 }, { "epoch": 0.4390021459227468, "grad_norm": 0.423828125, "learning_rate": 4.956322301392117e-06, "loss": 2.4123, "step": 8183 }, { "epoch": 0.4390557939914163, "grad_norm": 1.1171875, "learning_rate": 4.956306131329365e-06, "loss": 2.3141, "step": 8184 }, { "epoch": 0.43910944206008584, "grad_norm": 0.53515625, "learning_rate": 4.9562899583003686e-06, "loss": 2.4223, "step": 8185 }, { "epoch": 0.4391630901287554, "grad_norm": 0.455078125, "learning_rate": 4.9562737823051464e-06, "loss": 2.5589, "step": 8186 }, { "epoch": 0.4392167381974249, "grad_norm": 0.66015625, "learning_rate": 4.95625760334372e-06, "loss": 2.4173, "step": 8187 }, { "epoch": 0.43927038626609444, "grad_norm": 0.4375, "learning_rate": 4.956241421416107e-06, "loss": 2.4242, "step": 8188 }, { "epoch": 0.43932403433476397, "grad_norm": 0.451171875, "learning_rate": 4.956225236522328e-06, "loss": 2.3816, "step": 8189 }, { "epoch": 0.4393776824034335, "grad_norm": 0.59765625, "learning_rate": 4.956209048662402e-06, "loss": 2.4124, "step": 8190 }, { "epoch": 0.439431330472103, "grad_norm": 0.41015625, "learning_rate": 4.956192857836348e-06, "loss": 2.4362, "step": 8191 }, { "epoch": 0.4394849785407725, "grad_norm": 0.3984375, "learning_rate": 4.956176664044188e-06, "loss": 2.0693, "step": 8192 }, { "epoch": 0.43953862660944204, "grad_norm": 0.41796875, "learning_rate": 4.956160467285939e-06, "loss": 2.2607, "step": 8193 }, { "epoch": 0.43959227467811157, "grad_norm": 0.42578125, "learning_rate": 4.9561442675616206e-06, "loss": 2.2659, "step": 8194 }, { "epoch": 0.4396459227467811, "grad_norm": 0.5, "learning_rate": 4.956128064871254e-06, "loss": 2.4447, "step": 8195 }, { "epoch": 0.43969957081545064, "grad_norm": 0.4140625, "learning_rate": 4.956111859214857e-06, "loss": 2.3005, "step": 8196 }, { "epoch": 0.43975321888412017, "grad_norm": 0.427734375, "learning_rate": 4.956095650592452e-06, "loss": 2.2744, "step": 8197 }, { "epoch": 0.4398068669527897, "grad_norm": 0.4453125, "learning_rate": 4.956079439004056e-06, "loss": 2.1765, "step": 8198 }, { "epoch": 0.43986051502145923, "grad_norm": 0.58203125, "learning_rate": 4.956063224449688e-06, "loss": 2.2596, "step": 8199 }, { "epoch": 0.43991416309012876, "grad_norm": 0.349609375, "learning_rate": 4.95604700692937e-06, "loss": 2.1189, "step": 8200 }, { "epoch": 0.4399678111587983, "grad_norm": 0.421875, "learning_rate": 4.95603078644312e-06, "loss": 2.256, "step": 8201 }, { "epoch": 0.4400214592274678, "grad_norm": 0.408203125, "learning_rate": 4.956014562990958e-06, "loss": 2.2661, "step": 8202 }, { "epoch": 0.44007510729613736, "grad_norm": 0.431640625, "learning_rate": 4.955998336572904e-06, "loss": 2.4287, "step": 8203 }, { "epoch": 0.4401287553648069, "grad_norm": 0.61328125, "learning_rate": 4.9559821071889765e-06, "loss": 2.3185, "step": 8204 }, { "epoch": 0.4401824034334764, "grad_norm": 0.392578125, "learning_rate": 4.955965874839197e-06, "loss": 2.2778, "step": 8205 }, { "epoch": 0.4402360515021459, "grad_norm": 0.5546875, "learning_rate": 4.955949639523584e-06, "loss": 2.5401, "step": 8206 }, { "epoch": 0.44028969957081543, "grad_norm": 0.8125, "learning_rate": 4.9559334012421555e-06, "loss": 2.4611, "step": 8207 }, { "epoch": 0.44034334763948496, "grad_norm": 0.51171875, "learning_rate": 4.955917159994934e-06, "loss": 2.31, "step": 8208 }, { "epoch": 0.4403969957081545, "grad_norm": 0.455078125, "learning_rate": 4.955900915781936e-06, "loss": 2.5952, "step": 8209 }, { "epoch": 0.440450643776824, "grad_norm": 0.408203125, "learning_rate": 4.955884668603185e-06, "loss": 2.3636, "step": 8210 }, { "epoch": 0.44050429184549356, "grad_norm": 0.451171875, "learning_rate": 4.955868418458697e-06, "loss": 2.4107, "step": 8211 }, { "epoch": 0.4405579399141631, "grad_norm": 0.51171875, "learning_rate": 4.955852165348493e-06, "loss": 2.2921, "step": 8212 }, { "epoch": 0.4406115879828326, "grad_norm": 0.373046875, "learning_rate": 4.955835909272594e-06, "loss": 2.4004, "step": 8213 }, { "epoch": 0.44066523605150215, "grad_norm": 0.388671875, "learning_rate": 4.955819650231017e-06, "loss": 2.0766, "step": 8214 }, { "epoch": 0.4407188841201717, "grad_norm": 0.431640625, "learning_rate": 4.955803388223784e-06, "loss": 2.3672, "step": 8215 }, { "epoch": 0.4407725321888412, "grad_norm": 0.431640625, "learning_rate": 4.955787123250912e-06, "loss": 2.0957, "step": 8216 }, { "epoch": 0.44082618025751075, "grad_norm": 0.416015625, "learning_rate": 4.955770855312424e-06, "loss": 2.2711, "step": 8217 }, { "epoch": 0.4408798283261803, "grad_norm": 0.59375, "learning_rate": 4.955754584408337e-06, "loss": 2.2994, "step": 8218 }, { "epoch": 0.4409334763948498, "grad_norm": 0.44921875, "learning_rate": 4.955738310538672e-06, "loss": 2.1133, "step": 8219 }, { "epoch": 0.4409871244635193, "grad_norm": 0.4375, "learning_rate": 4.955722033703449e-06, "loss": 2.0573, "step": 8220 }, { "epoch": 0.4410407725321888, "grad_norm": 0.484375, "learning_rate": 4.955705753902686e-06, "loss": 2.6117, "step": 8221 }, { "epoch": 0.44109442060085835, "grad_norm": 0.384765625, "learning_rate": 4.955689471136403e-06, "loss": 2.2723, "step": 8222 }, { "epoch": 0.4411480686695279, "grad_norm": 5.09375, "learning_rate": 4.955673185404621e-06, "loss": 2.3369, "step": 8223 }, { "epoch": 0.4412017167381974, "grad_norm": 0.4140625, "learning_rate": 4.955656896707359e-06, "loss": 2.6603, "step": 8224 }, { "epoch": 0.44125536480686695, "grad_norm": 0.404296875, "learning_rate": 4.955640605044636e-06, "loss": 2.4412, "step": 8225 }, { "epoch": 0.4413090128755365, "grad_norm": 0.3984375, "learning_rate": 4.9556243104164725e-06, "loss": 2.2734, "step": 8226 }, { "epoch": 0.441362660944206, "grad_norm": 0.53515625, "learning_rate": 4.955608012822887e-06, "loss": 2.3376, "step": 8227 }, { "epoch": 0.44141630901287554, "grad_norm": 0.3984375, "learning_rate": 4.955591712263901e-06, "loss": 2.2491, "step": 8228 }, { "epoch": 0.4414699570815451, "grad_norm": 0.42578125, "learning_rate": 4.955575408739533e-06, "loss": 2.316, "step": 8229 }, { "epoch": 0.4415236051502146, "grad_norm": 0.384765625, "learning_rate": 4.955559102249803e-06, "loss": 2.2169, "step": 8230 }, { "epoch": 0.44157725321888414, "grad_norm": 0.470703125, "learning_rate": 4.955542792794731e-06, "loss": 2.4944, "step": 8231 }, { "epoch": 0.44163090128755367, "grad_norm": 0.474609375, "learning_rate": 4.955526480374335e-06, "loss": 2.4031, "step": 8232 }, { "epoch": 0.4416845493562232, "grad_norm": 0.458984375, "learning_rate": 4.955510164988638e-06, "loss": 2.0629, "step": 8233 }, { "epoch": 0.44173819742489273, "grad_norm": 0.392578125, "learning_rate": 4.955493846637656e-06, "loss": 2.4796, "step": 8234 }, { "epoch": 0.4417918454935622, "grad_norm": 0.6796875, "learning_rate": 4.955477525321411e-06, "loss": 2.1923, "step": 8235 }, { "epoch": 0.44184549356223174, "grad_norm": 0.4609375, "learning_rate": 4.955461201039922e-06, "loss": 2.2051, "step": 8236 }, { "epoch": 0.44189914163090127, "grad_norm": 0.412109375, "learning_rate": 4.9554448737932096e-06, "loss": 2.3696, "step": 8237 }, { "epoch": 0.4419527896995708, "grad_norm": 1.71875, "learning_rate": 4.955428543581292e-06, "loss": 2.4745, "step": 8238 }, { "epoch": 0.44200643776824033, "grad_norm": 0.37890625, "learning_rate": 4.95541221040419e-06, "loss": 2.124, "step": 8239 }, { "epoch": 0.44206008583690987, "grad_norm": 0.4375, "learning_rate": 4.955395874261923e-06, "loss": 2.177, "step": 8240 }, { "epoch": 0.4421137339055794, "grad_norm": 0.419921875, "learning_rate": 4.955379535154511e-06, "loss": 2.3753, "step": 8241 }, { "epoch": 0.44216738197424893, "grad_norm": 0.431640625, "learning_rate": 4.955363193081972e-06, "loss": 2.3628, "step": 8242 }, { "epoch": 0.44222103004291846, "grad_norm": 0.41015625, "learning_rate": 4.955346848044329e-06, "loss": 2.5075, "step": 8243 }, { "epoch": 0.442274678111588, "grad_norm": 0.419921875, "learning_rate": 4.955330500041599e-06, "loss": 1.7121, "step": 8244 }, { "epoch": 0.4423283261802575, "grad_norm": 0.466796875, "learning_rate": 4.955314149073803e-06, "loss": 2.2941, "step": 8245 }, { "epoch": 0.44238197424892706, "grad_norm": 0.451171875, "learning_rate": 4.955297795140961e-06, "loss": 2.2701, "step": 8246 }, { "epoch": 0.4424356223175966, "grad_norm": 0.439453125, "learning_rate": 4.955281438243091e-06, "loss": 2.4153, "step": 8247 }, { "epoch": 0.4424892703862661, "grad_norm": 0.44140625, "learning_rate": 4.955265078380215e-06, "loss": 2.3381, "step": 8248 }, { "epoch": 0.4425429184549356, "grad_norm": 0.54296875, "learning_rate": 4.955248715552351e-06, "loss": 2.5514, "step": 8249 }, { "epoch": 0.44259656652360513, "grad_norm": 0.5390625, "learning_rate": 4.955232349759519e-06, "loss": 2.0119, "step": 8250 }, { "epoch": 0.44265021459227466, "grad_norm": 0.490234375, "learning_rate": 4.9552159810017405e-06, "loss": 2.128, "step": 8251 }, { "epoch": 0.4427038626609442, "grad_norm": 0.515625, "learning_rate": 4.955199609279033e-06, "loss": 2.2129, "step": 8252 }, { "epoch": 0.4427575107296137, "grad_norm": 0.392578125, "learning_rate": 4.955183234591418e-06, "loss": 2.1162, "step": 8253 }, { "epoch": 0.44281115879828326, "grad_norm": 0.408203125, "learning_rate": 4.9551668569389145e-06, "loss": 2.2701, "step": 8254 }, { "epoch": 0.4428648068669528, "grad_norm": 0.423828125, "learning_rate": 4.955150476321541e-06, "loss": 2.2866, "step": 8255 }, { "epoch": 0.4429184549356223, "grad_norm": 0.4765625, "learning_rate": 4.95513409273932e-06, "loss": 2.2879, "step": 8256 }, { "epoch": 0.44297210300429185, "grad_norm": 0.45703125, "learning_rate": 4.9551177061922696e-06, "loss": 2.2668, "step": 8257 }, { "epoch": 0.4430257510729614, "grad_norm": 0.4140625, "learning_rate": 4.95510131668041e-06, "loss": 2.3721, "step": 8258 }, { "epoch": 0.4430793991416309, "grad_norm": 3.28125, "learning_rate": 4.95508492420376e-06, "loss": 2.3567, "step": 8259 }, { "epoch": 0.44313304721030045, "grad_norm": 0.46484375, "learning_rate": 4.955068528762341e-06, "loss": 1.5905, "step": 8260 }, { "epoch": 0.44318669527897, "grad_norm": 0.361328125, "learning_rate": 4.955052130356171e-06, "loss": 2.1415, "step": 8261 }, { "epoch": 0.4432403433476395, "grad_norm": 0.3515625, "learning_rate": 4.955035728985272e-06, "loss": 2.2501, "step": 8262 }, { "epoch": 0.443293991416309, "grad_norm": 0.388671875, "learning_rate": 4.955019324649663e-06, "loss": 2.1241, "step": 8263 }, { "epoch": 0.4433476394849785, "grad_norm": 0.431640625, "learning_rate": 4.955002917349362e-06, "loss": 2.461, "step": 8264 }, { "epoch": 0.44340128755364805, "grad_norm": 0.44921875, "learning_rate": 4.954986507084391e-06, "loss": 2.4867, "step": 8265 }, { "epoch": 0.4434549356223176, "grad_norm": 3.234375, "learning_rate": 4.9549700938547696e-06, "loss": 2.1209, "step": 8266 }, { "epoch": 0.4435085836909871, "grad_norm": 0.40625, "learning_rate": 4.954953677660516e-06, "loss": 2.3674, "step": 8267 }, { "epoch": 0.44356223175965664, "grad_norm": 0.451171875, "learning_rate": 4.954937258501652e-06, "loss": 2.4139, "step": 8268 }, { "epoch": 0.4436158798283262, "grad_norm": 0.404296875, "learning_rate": 4.954920836378196e-06, "loss": 2.6326, "step": 8269 }, { "epoch": 0.4436695278969957, "grad_norm": 0.447265625, "learning_rate": 4.954904411290169e-06, "loss": 2.4444, "step": 8270 }, { "epoch": 0.44372317596566524, "grad_norm": 0.42578125, "learning_rate": 4.95488798323759e-06, "loss": 2.4628, "step": 8271 }, { "epoch": 0.44377682403433477, "grad_norm": 0.455078125, "learning_rate": 4.954871552220479e-06, "loss": 2.427, "step": 8272 }, { "epoch": 0.4438304721030043, "grad_norm": 0.41015625, "learning_rate": 4.954855118238856e-06, "loss": 2.4154, "step": 8273 }, { "epoch": 0.44388412017167383, "grad_norm": 0.58203125, "learning_rate": 4.954838681292741e-06, "loss": 2.3887, "step": 8274 }, { "epoch": 0.44393776824034337, "grad_norm": 0.408203125, "learning_rate": 4.954822241382153e-06, "loss": 2.3256, "step": 8275 }, { "epoch": 0.4439914163090129, "grad_norm": 0.416015625, "learning_rate": 4.954805798507113e-06, "loss": 2.2242, "step": 8276 }, { "epoch": 0.44404506437768243, "grad_norm": 0.447265625, "learning_rate": 4.95478935266764e-06, "loss": 2.1115, "step": 8277 }, { "epoch": 0.4440987124463519, "grad_norm": 0.61328125, "learning_rate": 4.954772903863753e-06, "loss": 2.3902, "step": 8278 }, { "epoch": 0.44415236051502144, "grad_norm": 0.416015625, "learning_rate": 4.954756452095475e-06, "loss": 2.3317, "step": 8279 }, { "epoch": 0.44420600858369097, "grad_norm": 0.89453125, "learning_rate": 4.954739997362824e-06, "loss": 2.2889, "step": 8280 }, { "epoch": 0.4442596566523605, "grad_norm": 0.451171875, "learning_rate": 4.954723539665819e-06, "loss": 2.402, "step": 8281 }, { "epoch": 0.44431330472103003, "grad_norm": 0.376953125, "learning_rate": 4.95470707900448e-06, "loss": 2.1859, "step": 8282 }, { "epoch": 0.44436695278969957, "grad_norm": 0.515625, "learning_rate": 4.954690615378828e-06, "loss": 2.5645, "step": 8283 }, { "epoch": 0.4444206008583691, "grad_norm": 0.390625, "learning_rate": 4.954674148788883e-06, "loss": 2.3554, "step": 8284 }, { "epoch": 0.44447424892703863, "grad_norm": 0.494140625, "learning_rate": 4.954657679234664e-06, "loss": 2.3413, "step": 8285 }, { "epoch": 0.44452789699570816, "grad_norm": 0.451171875, "learning_rate": 4.954641206716191e-06, "loss": 2.3518, "step": 8286 }, { "epoch": 0.4445815450643777, "grad_norm": 0.4765625, "learning_rate": 4.954624731233485e-06, "loss": 2.5301, "step": 8287 }, { "epoch": 0.4446351931330472, "grad_norm": 0.419921875, "learning_rate": 4.954608252786564e-06, "loss": 2.226, "step": 8288 }, { "epoch": 0.44468884120171676, "grad_norm": 0.45703125, "learning_rate": 4.954591771375449e-06, "loss": 2.4196, "step": 8289 }, { "epoch": 0.4447424892703863, "grad_norm": 0.384765625, "learning_rate": 4.95457528700016e-06, "loss": 2.6264, "step": 8290 }, { "epoch": 0.4447961373390558, "grad_norm": 0.3671875, "learning_rate": 4.9545587996607155e-06, "loss": 2.3125, "step": 8291 }, { "epoch": 0.4448497854077253, "grad_norm": 0.427734375, "learning_rate": 4.954542309357138e-06, "loss": 2.2748, "step": 8292 }, { "epoch": 0.4449034334763948, "grad_norm": 0.41015625, "learning_rate": 4.954525816089445e-06, "loss": 2.2501, "step": 8293 }, { "epoch": 0.44495708154506436, "grad_norm": 1.1171875, "learning_rate": 4.954509319857659e-06, "loss": 2.1654, "step": 8294 }, { "epoch": 0.4450107296137339, "grad_norm": 0.435546875, "learning_rate": 4.954492820661797e-06, "loss": 1.766, "step": 8295 }, { "epoch": 0.4450643776824034, "grad_norm": 0.396484375, "learning_rate": 4.954476318501881e-06, "loss": 2.244, "step": 8296 }, { "epoch": 0.44511802575107295, "grad_norm": 0.39453125, "learning_rate": 4.9544598133779296e-06, "loss": 2.3952, "step": 8297 }, { "epoch": 0.4451716738197425, "grad_norm": 0.54296875, "learning_rate": 4.954443305289963e-06, "loss": 2.1108, "step": 8298 }, { "epoch": 0.445225321888412, "grad_norm": 0.361328125, "learning_rate": 4.954426794238002e-06, "loss": 2.2084, "step": 8299 }, { "epoch": 0.44527896995708155, "grad_norm": 0.369140625, "learning_rate": 4.954410280222066e-06, "loss": 2.2551, "step": 8300 }, { "epoch": 0.4453326180257511, "grad_norm": 0.384765625, "learning_rate": 4.954393763242176e-06, "loss": 2.3704, "step": 8301 }, { "epoch": 0.4453862660944206, "grad_norm": 1.6015625, "learning_rate": 4.95437724329835e-06, "loss": 2.3278, "step": 8302 }, { "epoch": 0.44543991416309014, "grad_norm": 0.421875, "learning_rate": 4.954360720390608e-06, "loss": 2.0803, "step": 8303 }, { "epoch": 0.4454935622317597, "grad_norm": 0.4765625, "learning_rate": 4.954344194518972e-06, "loss": 2.5164, "step": 8304 }, { "epoch": 0.4455472103004292, "grad_norm": 0.390625, "learning_rate": 4.954327665683459e-06, "loss": 2.4894, "step": 8305 }, { "epoch": 0.44560085836909874, "grad_norm": 0.486328125, "learning_rate": 4.954311133884093e-06, "loss": 2.4627, "step": 8306 }, { "epoch": 0.4456545064377682, "grad_norm": 0.37109375, "learning_rate": 4.95429459912089e-06, "loss": 2.2476, "step": 8307 }, { "epoch": 0.44570815450643775, "grad_norm": 0.470703125, "learning_rate": 4.954278061393872e-06, "loss": 2.1827, "step": 8308 }, { "epoch": 0.4457618025751073, "grad_norm": 0.41796875, "learning_rate": 4.95426152070306e-06, "loss": 2.4584, "step": 8309 }, { "epoch": 0.4458154506437768, "grad_norm": 0.42578125, "learning_rate": 4.954244977048471e-06, "loss": 2.5153, "step": 8310 }, { "epoch": 0.44586909871244634, "grad_norm": 0.546875, "learning_rate": 4.954228430430127e-06, "loss": 2.2184, "step": 8311 }, { "epoch": 0.4459227467811159, "grad_norm": 0.41796875, "learning_rate": 4.9542118808480475e-06, "loss": 2.4153, "step": 8312 }, { "epoch": 0.4459763948497854, "grad_norm": 0.42578125, "learning_rate": 4.954195328302253e-06, "loss": 2.3311, "step": 8313 }, { "epoch": 0.44603004291845494, "grad_norm": 0.416015625, "learning_rate": 4.954178772792762e-06, "loss": 1.9482, "step": 8314 }, { "epoch": 0.44608369098712447, "grad_norm": 0.69140625, "learning_rate": 4.954162214319597e-06, "loss": 2.2178, "step": 8315 }, { "epoch": 0.446137339055794, "grad_norm": 0.4453125, "learning_rate": 4.954145652882776e-06, "loss": 2.2251, "step": 8316 }, { "epoch": 0.44619098712446353, "grad_norm": 0.51171875, "learning_rate": 4.954129088482319e-06, "loss": 1.388, "step": 8317 }, { "epoch": 0.44624463519313307, "grad_norm": 0.47265625, "learning_rate": 4.954112521118246e-06, "loss": 2.3695, "step": 8318 }, { "epoch": 0.4462982832618026, "grad_norm": 0.51171875, "learning_rate": 4.954095950790579e-06, "loss": 2.292, "step": 8319 }, { "epoch": 0.44635193133047213, "grad_norm": 0.345703125, "learning_rate": 4.954079377499336e-06, "loss": 2.3354, "step": 8320 }, { "epoch": 0.4464055793991416, "grad_norm": 0.59765625, "learning_rate": 4.9540628012445365e-06, "loss": 2.0463, "step": 8321 }, { "epoch": 0.44645922746781114, "grad_norm": 0.37109375, "learning_rate": 4.9540462220262035e-06, "loss": 2.4264, "step": 8322 }, { "epoch": 0.44651287553648067, "grad_norm": 0.458984375, "learning_rate": 4.954029639844353e-06, "loss": 2.3109, "step": 8323 }, { "epoch": 0.4465665236051502, "grad_norm": 1.2578125, "learning_rate": 4.9540130546990085e-06, "loss": 2.1778, "step": 8324 }, { "epoch": 0.44662017167381973, "grad_norm": 0.4921875, "learning_rate": 4.953996466590189e-06, "loss": 2.3007, "step": 8325 }, { "epoch": 0.44667381974248926, "grad_norm": 0.44921875, "learning_rate": 4.953979875517913e-06, "loss": 2.0884, "step": 8326 }, { "epoch": 0.4467274678111588, "grad_norm": 0.396484375, "learning_rate": 4.953963281482201e-06, "loss": 2.2319, "step": 8327 }, { "epoch": 0.4467811158798283, "grad_norm": 0.375, "learning_rate": 4.953946684483075e-06, "loss": 2.1237, "step": 8328 }, { "epoch": 0.44683476394849786, "grad_norm": 0.43359375, "learning_rate": 4.953930084520554e-06, "loss": 2.3998, "step": 8329 }, { "epoch": 0.4468884120171674, "grad_norm": 0.333984375, "learning_rate": 4.953913481594658e-06, "loss": 2.1827, "step": 8330 }, { "epoch": 0.4469420600858369, "grad_norm": 0.451171875, "learning_rate": 4.953896875705406e-06, "loss": 2.4067, "step": 8331 }, { "epoch": 0.44699570815450645, "grad_norm": 0.43359375, "learning_rate": 4.953880266852819e-06, "loss": 2.407, "step": 8332 }, { "epoch": 0.447049356223176, "grad_norm": 0.494140625, "learning_rate": 4.953863655036918e-06, "loss": 1.4353, "step": 8333 }, { "epoch": 0.4471030042918455, "grad_norm": 0.408203125, "learning_rate": 4.953847040257721e-06, "loss": 2.2054, "step": 8334 }, { "epoch": 0.447156652360515, "grad_norm": 0.404296875, "learning_rate": 4.95383042251525e-06, "loss": 2.4311, "step": 8335 }, { "epoch": 0.4472103004291845, "grad_norm": 1.265625, "learning_rate": 4.953813801809523e-06, "loss": 2.4202, "step": 8336 }, { "epoch": 0.44726394849785406, "grad_norm": 0.31640625, "learning_rate": 4.953797178140562e-06, "loss": 2.2048, "step": 8337 }, { "epoch": 0.4473175965665236, "grad_norm": 0.3359375, "learning_rate": 4.953780551508387e-06, "loss": 2.0654, "step": 8338 }, { "epoch": 0.4473712446351931, "grad_norm": 0.39453125, "learning_rate": 4.953763921913017e-06, "loss": 2.3469, "step": 8339 }, { "epoch": 0.44742489270386265, "grad_norm": 0.3984375, "learning_rate": 4.953747289354471e-06, "loss": 2.2317, "step": 8340 }, { "epoch": 0.4474785407725322, "grad_norm": 0.443359375, "learning_rate": 4.953730653832772e-06, "loss": 2.3318, "step": 8341 }, { "epoch": 0.4475321888412017, "grad_norm": 0.41796875, "learning_rate": 4.9537140153479385e-06, "loss": 1.9569, "step": 8342 }, { "epoch": 0.44758583690987125, "grad_norm": 0.478515625, "learning_rate": 4.9536973738999905e-06, "loss": 2.3773, "step": 8343 }, { "epoch": 0.4476394849785408, "grad_norm": 0.388671875, "learning_rate": 4.9536807294889485e-06, "loss": 2.1725, "step": 8344 }, { "epoch": 0.4476931330472103, "grad_norm": 0.5, "learning_rate": 4.953664082114833e-06, "loss": 2.2599, "step": 8345 }, { "epoch": 0.44774678111587984, "grad_norm": 0.419921875, "learning_rate": 4.953647431777663e-06, "loss": 2.4084, "step": 8346 }, { "epoch": 0.4478004291845494, "grad_norm": 0.4375, "learning_rate": 4.953630778477459e-06, "loss": 2.4641, "step": 8347 }, { "epoch": 0.4478540772532189, "grad_norm": 0.404296875, "learning_rate": 4.9536141222142405e-06, "loss": 2.3564, "step": 8348 }, { "epoch": 0.44790772532188844, "grad_norm": 0.478515625, "learning_rate": 4.95359746298803e-06, "loss": 2.3456, "step": 8349 }, { "epoch": 0.4479613733905579, "grad_norm": 0.421875, "learning_rate": 4.953580800798845e-06, "loss": 2.2241, "step": 8350 }, { "epoch": 0.44801502145922745, "grad_norm": 0.5078125, "learning_rate": 4.9535641356467075e-06, "loss": 1.483, "step": 8351 }, { "epoch": 0.448068669527897, "grad_norm": 0.67578125, "learning_rate": 4.9535474675316364e-06, "loss": 2.4433, "step": 8352 }, { "epoch": 0.4481223175965665, "grad_norm": 0.41015625, "learning_rate": 4.953530796453652e-06, "loss": 2.3644, "step": 8353 }, { "epoch": 0.44817596566523604, "grad_norm": 0.416015625, "learning_rate": 4.953514122412775e-06, "loss": 2.1661, "step": 8354 }, { "epoch": 0.4482296137339056, "grad_norm": 0.359375, "learning_rate": 4.953497445409025e-06, "loss": 2.2327, "step": 8355 }, { "epoch": 0.4482832618025751, "grad_norm": 0.3359375, "learning_rate": 4.953480765442421e-06, "loss": 2.237, "step": 8356 }, { "epoch": 0.44833690987124464, "grad_norm": 2.140625, "learning_rate": 4.953464082512986e-06, "loss": 2.2048, "step": 8357 }, { "epoch": 0.44839055793991417, "grad_norm": 0.423828125, "learning_rate": 4.953447396620738e-06, "loss": 2.2674, "step": 8358 }, { "epoch": 0.4484442060085837, "grad_norm": 0.431640625, "learning_rate": 4.953430707765698e-06, "loss": 2.1966, "step": 8359 }, { "epoch": 0.44849785407725323, "grad_norm": 1.3203125, "learning_rate": 4.953414015947885e-06, "loss": 2.1801, "step": 8360 }, { "epoch": 0.44855150214592276, "grad_norm": 0.431640625, "learning_rate": 4.953397321167322e-06, "loss": 2.2445, "step": 8361 }, { "epoch": 0.4486051502145923, "grad_norm": 0.44140625, "learning_rate": 4.953380623424026e-06, "loss": 2.3121, "step": 8362 }, { "epoch": 0.44865879828326183, "grad_norm": 0.396484375, "learning_rate": 4.953363922718017e-06, "loss": 2.2877, "step": 8363 }, { "epoch": 0.4487124463519313, "grad_norm": 1.8359375, "learning_rate": 4.9533472190493184e-06, "loss": 2.3953, "step": 8364 }, { "epoch": 0.44876609442060084, "grad_norm": 0.5, "learning_rate": 4.953330512417948e-06, "loss": 2.1768, "step": 8365 }, { "epoch": 0.44881974248927037, "grad_norm": 0.4140625, "learning_rate": 4.953313802823926e-06, "loss": 2.4529, "step": 8366 }, { "epoch": 0.4488733905579399, "grad_norm": 0.48046875, "learning_rate": 4.9532970902672735e-06, "loss": 2.0977, "step": 8367 }, { "epoch": 0.44892703862660943, "grad_norm": 0.44140625, "learning_rate": 4.95328037474801e-06, "loss": 2.198, "step": 8368 }, { "epoch": 0.44898068669527896, "grad_norm": 0.4453125, "learning_rate": 4.953263656266157e-06, "loss": 2.1203, "step": 8369 }, { "epoch": 0.4490343347639485, "grad_norm": 0.4375, "learning_rate": 4.9532469348217326e-06, "loss": 2.4532, "step": 8370 }, { "epoch": 0.449087982832618, "grad_norm": 0.41796875, "learning_rate": 4.9532302104147585e-06, "loss": 2.2194, "step": 8371 }, { "epoch": 0.44914163090128756, "grad_norm": 0.416015625, "learning_rate": 4.953213483045254e-06, "loss": 2.3743, "step": 8372 }, { "epoch": 0.4491952789699571, "grad_norm": 0.41796875, "learning_rate": 4.953196752713239e-06, "loss": 2.2344, "step": 8373 }, { "epoch": 0.4492489270386266, "grad_norm": 0.3828125, "learning_rate": 4.953180019418736e-06, "loss": 2.3767, "step": 8374 }, { "epoch": 0.44930257510729615, "grad_norm": 0.7890625, "learning_rate": 4.953163283161762e-06, "loss": 2.0817, "step": 8375 }, { "epoch": 0.4493562231759657, "grad_norm": 0.3671875, "learning_rate": 4.95314654394234e-06, "loss": 2.3762, "step": 8376 }, { "epoch": 0.4494098712446352, "grad_norm": 0.388671875, "learning_rate": 4.953129801760489e-06, "loss": 2.2744, "step": 8377 }, { "epoch": 0.4494635193133047, "grad_norm": 0.46484375, "learning_rate": 4.953113056616229e-06, "loss": 2.2785, "step": 8378 }, { "epoch": 0.4495171673819742, "grad_norm": 0.33984375, "learning_rate": 4.953096308509581e-06, "loss": 2.2454, "step": 8379 }, { "epoch": 0.44957081545064376, "grad_norm": 0.41015625, "learning_rate": 4.953079557440563e-06, "loss": 2.2996, "step": 8380 }, { "epoch": 0.4496244635193133, "grad_norm": 0.3984375, "learning_rate": 4.953062803409199e-06, "loss": 2.2361, "step": 8381 }, { "epoch": 0.4496781115879828, "grad_norm": 0.45703125, "learning_rate": 4.953046046415506e-06, "loss": 2.1911, "step": 8382 }, { "epoch": 0.44973175965665235, "grad_norm": 0.6171875, "learning_rate": 4.953029286459506e-06, "loss": 2.2196, "step": 8383 }, { "epoch": 0.4497854077253219, "grad_norm": 0.400390625, "learning_rate": 4.953012523541217e-06, "loss": 2.3037, "step": 8384 }, { "epoch": 0.4498390557939914, "grad_norm": 0.470703125, "learning_rate": 4.952995757660662e-06, "loss": 1.6911, "step": 8385 }, { "epoch": 0.44989270386266095, "grad_norm": 0.4296875, "learning_rate": 4.952978988817861e-06, "loss": 2.4309, "step": 8386 }, { "epoch": 0.4499463519313305, "grad_norm": 0.46484375, "learning_rate": 4.952962217012833e-06, "loss": 2.4652, "step": 8387 }, { "epoch": 0.45, "grad_norm": 0.40234375, "learning_rate": 4.952945442245598e-06, "loss": 2.3981, "step": 8388 }, { "epoch": 0.45005364806866954, "grad_norm": 0.41015625, "learning_rate": 4.952928664516177e-06, "loss": 2.0191, "step": 8389 }, { "epoch": 0.4501072961373391, "grad_norm": 0.4375, "learning_rate": 4.9529118838245904e-06, "loss": 2.28, "step": 8390 }, { "epoch": 0.4501609442060086, "grad_norm": 1.0, "learning_rate": 4.9528951001708585e-06, "loss": 2.2511, "step": 8391 }, { "epoch": 0.45021459227467814, "grad_norm": 0.466796875, "learning_rate": 4.952878313555001e-06, "loss": 1.4553, "step": 8392 }, { "epoch": 0.4502682403433476, "grad_norm": 0.53515625, "learning_rate": 4.952861523977038e-06, "loss": 2.4859, "step": 8393 }, { "epoch": 0.45032188841201715, "grad_norm": 0.3984375, "learning_rate": 4.952844731436991e-06, "loss": 2.1288, "step": 8394 }, { "epoch": 0.4503755364806867, "grad_norm": 0.396484375, "learning_rate": 4.952827935934878e-06, "loss": 2.2539, "step": 8395 }, { "epoch": 0.4504291845493562, "grad_norm": 0.373046875, "learning_rate": 4.952811137470724e-06, "loss": 2.1037, "step": 8396 }, { "epoch": 0.45048283261802574, "grad_norm": 0.41796875, "learning_rate": 4.952794336044543e-06, "loss": 2.2161, "step": 8397 }, { "epoch": 0.4505364806866953, "grad_norm": 0.453125, "learning_rate": 4.952777531656359e-06, "loss": 2.4118, "step": 8398 }, { "epoch": 0.4505901287553648, "grad_norm": 0.380859375, "learning_rate": 4.952760724306193e-06, "loss": 2.3676, "step": 8399 }, { "epoch": 0.45064377682403434, "grad_norm": 0.421875, "learning_rate": 4.952743913994062e-06, "loss": 2.3102, "step": 8400 }, { "epoch": 0.45069742489270387, "grad_norm": 0.474609375, "learning_rate": 4.9527271007199895e-06, "loss": 2.361, "step": 8401 }, { "epoch": 0.4507510729613734, "grad_norm": 0.470703125, "learning_rate": 4.952710284483995e-06, "loss": 2.4784, "step": 8402 }, { "epoch": 0.45080472103004293, "grad_norm": 0.416015625, "learning_rate": 4.952693465286097e-06, "loss": 2.4767, "step": 8403 }, { "epoch": 0.45085836909871246, "grad_norm": 0.443359375, "learning_rate": 4.952676643126318e-06, "loss": 2.268, "step": 8404 }, { "epoch": 0.450912017167382, "grad_norm": 0.37890625, "learning_rate": 4.952659818004677e-06, "loss": 2.2219, "step": 8405 }, { "epoch": 0.4509656652360515, "grad_norm": 0.427734375, "learning_rate": 4.9526429899211956e-06, "loss": 2.3636, "step": 8406 }, { "epoch": 0.451019313304721, "grad_norm": 0.41796875, "learning_rate": 4.952626158875893e-06, "loss": 2.329, "step": 8407 }, { "epoch": 0.45107296137339054, "grad_norm": 5.15625, "learning_rate": 4.952609324868789e-06, "loss": 2.3105, "step": 8408 }, { "epoch": 0.45112660944206007, "grad_norm": 0.361328125, "learning_rate": 4.952592487899906e-06, "loss": 2.3801, "step": 8409 }, { "epoch": 0.4511802575107296, "grad_norm": 0.431640625, "learning_rate": 4.9525756479692624e-06, "loss": 2.448, "step": 8410 }, { "epoch": 0.45123390557939913, "grad_norm": 0.671875, "learning_rate": 4.95255880507688e-06, "loss": 2.59, "step": 8411 }, { "epoch": 0.45128755364806866, "grad_norm": 0.462890625, "learning_rate": 4.952541959222777e-06, "loss": 2.3858, "step": 8412 }, { "epoch": 0.4513412017167382, "grad_norm": 0.72265625, "learning_rate": 4.952525110406977e-06, "loss": 2.2176, "step": 8413 }, { "epoch": 0.4513948497854077, "grad_norm": 0.357421875, "learning_rate": 4.952508258629497e-06, "loss": 1.9599, "step": 8414 }, { "epoch": 0.45144849785407726, "grad_norm": 0.404296875, "learning_rate": 4.95249140389036e-06, "loss": 2.3862, "step": 8415 }, { "epoch": 0.4515021459227468, "grad_norm": 0.46875, "learning_rate": 4.952474546189584e-06, "loss": 2.309, "step": 8416 }, { "epoch": 0.4515557939914163, "grad_norm": 0.373046875, "learning_rate": 4.952457685527191e-06, "loss": 2.3404, "step": 8417 }, { "epoch": 0.45160944206008585, "grad_norm": 0.4140625, "learning_rate": 4.952440821903201e-06, "loss": 2.1644, "step": 8418 }, { "epoch": 0.4516630901287554, "grad_norm": 0.37890625, "learning_rate": 4.952423955317635e-06, "loss": 2.0841, "step": 8419 }, { "epoch": 0.4517167381974249, "grad_norm": 0.40234375, "learning_rate": 4.952407085770512e-06, "loss": 2.2432, "step": 8420 }, { "epoch": 0.45177038626609445, "grad_norm": 0.40234375, "learning_rate": 4.952390213261853e-06, "loss": 2.3771, "step": 8421 }, { "epoch": 0.4518240343347639, "grad_norm": 0.384765625, "learning_rate": 4.952373337791678e-06, "loss": 1.8755, "step": 8422 }, { "epoch": 0.45187768240343346, "grad_norm": 1.3046875, "learning_rate": 4.952356459360008e-06, "loss": 2.3771, "step": 8423 }, { "epoch": 0.451931330472103, "grad_norm": 0.5546875, "learning_rate": 4.9523395779668634e-06, "loss": 2.3111, "step": 8424 }, { "epoch": 0.4519849785407725, "grad_norm": 0.494140625, "learning_rate": 4.952322693612264e-06, "loss": 2.3477, "step": 8425 }, { "epoch": 0.45203862660944205, "grad_norm": 0.51953125, "learning_rate": 4.952305806296231e-06, "loss": 2.1239, "step": 8426 }, { "epoch": 0.4520922746781116, "grad_norm": 0.5, "learning_rate": 4.952288916018784e-06, "loss": 2.323, "step": 8427 }, { "epoch": 0.4521459227467811, "grad_norm": 0.3671875, "learning_rate": 4.952272022779944e-06, "loss": 2.3626, "step": 8428 }, { "epoch": 0.45219957081545065, "grad_norm": 0.396484375, "learning_rate": 4.952255126579731e-06, "loss": 2.3054, "step": 8429 }, { "epoch": 0.4522532188841202, "grad_norm": 0.427734375, "learning_rate": 4.9522382274181646e-06, "loss": 2.4169, "step": 8430 }, { "epoch": 0.4523068669527897, "grad_norm": 0.34375, "learning_rate": 4.952221325295267e-06, "loss": 2.3271, "step": 8431 }, { "epoch": 0.45236051502145924, "grad_norm": 0.443359375, "learning_rate": 4.952204420211058e-06, "loss": 2.3599, "step": 8432 }, { "epoch": 0.4524141630901288, "grad_norm": 0.3828125, "learning_rate": 4.952187512165557e-06, "loss": 2.2061, "step": 8433 }, { "epoch": 0.4524678111587983, "grad_norm": 0.51171875, "learning_rate": 4.952170601158786e-06, "loss": 2.2301, "step": 8434 }, { "epoch": 0.45252145922746784, "grad_norm": 0.484375, "learning_rate": 4.952153687190764e-06, "loss": 1.5833, "step": 8435 }, { "epoch": 0.4525751072961373, "grad_norm": 0.369140625, "learning_rate": 4.952136770261512e-06, "loss": 2.178, "step": 8436 }, { "epoch": 0.45262875536480685, "grad_norm": 0.48046875, "learning_rate": 4.952119850371051e-06, "loss": 2.3772, "step": 8437 }, { "epoch": 0.4526824034334764, "grad_norm": 0.69921875, "learning_rate": 4.9521029275194e-06, "loss": 1.5248, "step": 8438 }, { "epoch": 0.4527360515021459, "grad_norm": 0.427734375, "learning_rate": 4.952086001706581e-06, "loss": 2.0421, "step": 8439 }, { "epoch": 0.45278969957081544, "grad_norm": 0.39453125, "learning_rate": 4.952069072932614e-06, "loss": 2.2125, "step": 8440 }, { "epoch": 0.45284334763948497, "grad_norm": 0.39453125, "learning_rate": 4.952052141197518e-06, "loss": 2.2696, "step": 8441 }, { "epoch": 0.4528969957081545, "grad_norm": 0.373046875, "learning_rate": 4.952035206501316e-06, "loss": 2.1428, "step": 8442 }, { "epoch": 0.45295064377682404, "grad_norm": 0.423828125, "learning_rate": 4.952018268844027e-06, "loss": 2.2499, "step": 8443 }, { "epoch": 0.45300429184549357, "grad_norm": 0.431640625, "learning_rate": 4.952001328225671e-06, "loss": 2.3585, "step": 8444 }, { "epoch": 0.4530579399141631, "grad_norm": 0.388671875, "learning_rate": 4.951984384646269e-06, "loss": 2.3151, "step": 8445 }, { "epoch": 0.45311158798283263, "grad_norm": 0.470703125, "learning_rate": 4.951967438105841e-06, "loss": 2.1786, "step": 8446 }, { "epoch": 0.45316523605150216, "grad_norm": 0.431640625, "learning_rate": 4.951950488604409e-06, "loss": 1.9541, "step": 8447 }, { "epoch": 0.4532188841201717, "grad_norm": 0.453125, "learning_rate": 4.951933536141992e-06, "loss": 2.199, "step": 8448 }, { "epoch": 0.4532725321888412, "grad_norm": 0.421875, "learning_rate": 4.9519165807186104e-06, "loss": 2.3362, "step": 8449 }, { "epoch": 0.4533261802575107, "grad_norm": 0.486328125, "learning_rate": 4.9518996223342854e-06, "loss": 2.1963, "step": 8450 }, { "epoch": 0.45337982832618023, "grad_norm": 0.416015625, "learning_rate": 4.951882660989037e-06, "loss": 2.4643, "step": 8451 }, { "epoch": 0.45343347639484977, "grad_norm": 0.47265625, "learning_rate": 4.9518656966828865e-06, "loss": 2.2479, "step": 8452 }, { "epoch": 0.4534871244635193, "grad_norm": 0.50390625, "learning_rate": 4.951848729415853e-06, "loss": 2.5617, "step": 8453 }, { "epoch": 0.45354077253218883, "grad_norm": 0.44921875, "learning_rate": 4.951831759187959e-06, "loss": 2.2862, "step": 8454 }, { "epoch": 0.45359442060085836, "grad_norm": 0.6015625, "learning_rate": 4.951814785999223e-06, "loss": 2.1754, "step": 8455 }, { "epoch": 0.4536480686695279, "grad_norm": 0.6484375, "learning_rate": 4.951797809849667e-06, "loss": 1.489, "step": 8456 }, { "epoch": 0.4537017167381974, "grad_norm": 0.44140625, "learning_rate": 4.95178083073931e-06, "loss": 2.3414, "step": 8457 }, { "epoch": 0.45375536480686696, "grad_norm": 0.365234375, "learning_rate": 4.9517638486681725e-06, "loss": 2.1119, "step": 8458 }, { "epoch": 0.4538090128755365, "grad_norm": 0.392578125, "learning_rate": 4.951746863636277e-06, "loss": 2.3133, "step": 8459 }, { "epoch": 0.453862660944206, "grad_norm": 0.349609375, "learning_rate": 4.951729875643643e-06, "loss": 2.2496, "step": 8460 }, { "epoch": 0.45391630901287555, "grad_norm": 0.4375, "learning_rate": 4.9517128846902905e-06, "loss": 2.3218, "step": 8461 }, { "epoch": 0.4539699570815451, "grad_norm": 0.50390625, "learning_rate": 4.951695890776239e-06, "loss": 2.5545, "step": 8462 }, { "epoch": 0.4540236051502146, "grad_norm": 0.41015625, "learning_rate": 4.951678893901512e-06, "loss": 2.2577, "step": 8463 }, { "epoch": 0.45407725321888415, "grad_norm": 0.5859375, "learning_rate": 4.951661894066128e-06, "loss": 1.6804, "step": 8464 }, { "epoch": 0.4541309012875536, "grad_norm": 0.75390625, "learning_rate": 4.9516448912701074e-06, "loss": 1.9676, "step": 8465 }, { "epoch": 0.45418454935622316, "grad_norm": 2.484375, "learning_rate": 4.951627885513473e-06, "loss": 1.6453, "step": 8466 }, { "epoch": 0.4542381974248927, "grad_norm": 0.5625, "learning_rate": 4.951610876796241e-06, "loss": 2.4588, "step": 8467 }, { "epoch": 0.4542918454935622, "grad_norm": 0.515625, "learning_rate": 4.951593865118436e-06, "loss": 2.3578, "step": 8468 }, { "epoch": 0.45434549356223175, "grad_norm": 0.392578125, "learning_rate": 4.951576850480077e-06, "loss": 2.4135, "step": 8469 }, { "epoch": 0.4543991416309013, "grad_norm": 0.41015625, "learning_rate": 4.951559832881185e-06, "loss": 2.2758, "step": 8470 }, { "epoch": 0.4544527896995708, "grad_norm": 0.419921875, "learning_rate": 4.951542812321779e-06, "loss": 2.4216, "step": 8471 }, { "epoch": 0.45450643776824035, "grad_norm": 0.37109375, "learning_rate": 4.951525788801882e-06, "loss": 2.3303, "step": 8472 }, { "epoch": 0.4545600858369099, "grad_norm": 0.390625, "learning_rate": 4.951508762321512e-06, "loss": 2.3093, "step": 8473 }, { "epoch": 0.4546137339055794, "grad_norm": 0.361328125, "learning_rate": 4.951491732880691e-06, "loss": 2.0382, "step": 8474 }, { "epoch": 0.45466738197424894, "grad_norm": 0.453125, "learning_rate": 4.95147470047944e-06, "loss": 2.0122, "step": 8475 }, { "epoch": 0.4547210300429185, "grad_norm": 0.39453125, "learning_rate": 4.951457665117779e-06, "loss": 2.3454, "step": 8476 }, { "epoch": 0.454774678111588, "grad_norm": 0.4609375, "learning_rate": 4.9514406267957285e-06, "loss": 2.649, "step": 8477 }, { "epoch": 0.45482832618025754, "grad_norm": 0.400390625, "learning_rate": 4.951423585513309e-06, "loss": 2.5131, "step": 8478 }, { "epoch": 0.454881974248927, "grad_norm": 0.43359375, "learning_rate": 4.95140654127054e-06, "loss": 2.4807, "step": 8479 }, { "epoch": 0.45493562231759654, "grad_norm": 0.412109375, "learning_rate": 4.951389494067444e-06, "loss": 2.258, "step": 8480 }, { "epoch": 0.4549892703862661, "grad_norm": 0.40625, "learning_rate": 4.951372443904042e-06, "loss": 2.3084, "step": 8481 }, { "epoch": 0.4550429184549356, "grad_norm": 0.357421875, "learning_rate": 4.951355390780353e-06, "loss": 2.1216, "step": 8482 }, { "epoch": 0.45509656652360514, "grad_norm": 0.41796875, "learning_rate": 4.951338334696397e-06, "loss": 2.1563, "step": 8483 }, { "epoch": 0.45515021459227467, "grad_norm": 0.390625, "learning_rate": 4.9513212756521964e-06, "loss": 2.3142, "step": 8484 }, { "epoch": 0.4552038626609442, "grad_norm": 0.3671875, "learning_rate": 4.951304213647772e-06, "loss": 2.0205, "step": 8485 }, { "epoch": 0.45525751072961373, "grad_norm": 0.380859375, "learning_rate": 4.951287148683141e-06, "loss": 2.2307, "step": 8486 }, { "epoch": 0.45531115879828327, "grad_norm": 0.35546875, "learning_rate": 4.951270080758329e-06, "loss": 2.1282, "step": 8487 }, { "epoch": 0.4553648068669528, "grad_norm": 0.453125, "learning_rate": 4.951253009873352e-06, "loss": 2.1939, "step": 8488 }, { "epoch": 0.45541845493562233, "grad_norm": 0.462890625, "learning_rate": 4.951235936028235e-06, "loss": 2.3504, "step": 8489 }, { "epoch": 0.45547210300429186, "grad_norm": 0.5, "learning_rate": 4.951218859222994e-06, "loss": 2.4905, "step": 8490 }, { "epoch": 0.4555257510729614, "grad_norm": 0.447265625, "learning_rate": 4.951201779457653e-06, "loss": 2.2548, "step": 8491 }, { "epoch": 0.4555793991416309, "grad_norm": 0.462890625, "learning_rate": 4.951184696732232e-06, "loss": 2.2415, "step": 8492 }, { "epoch": 0.4556330472103004, "grad_norm": 0.40234375, "learning_rate": 4.95116761104675e-06, "loss": 2.3158, "step": 8493 }, { "epoch": 0.45568669527896993, "grad_norm": 0.6484375, "learning_rate": 4.951150522401229e-06, "loss": 2.4759, "step": 8494 }, { "epoch": 0.45574034334763946, "grad_norm": 0.423828125, "learning_rate": 4.95113343079569e-06, "loss": 2.3253, "step": 8495 }, { "epoch": 0.455793991416309, "grad_norm": 0.349609375, "learning_rate": 4.951116336230153e-06, "loss": 2.3316, "step": 8496 }, { "epoch": 0.45584763948497853, "grad_norm": 0.435546875, "learning_rate": 4.951099238704639e-06, "loss": 2.0674, "step": 8497 }, { "epoch": 0.45590128755364806, "grad_norm": 0.400390625, "learning_rate": 4.951082138219167e-06, "loss": 1.8181, "step": 8498 }, { "epoch": 0.4559549356223176, "grad_norm": 0.427734375, "learning_rate": 4.951065034773761e-06, "loss": 2.1142, "step": 8499 }, { "epoch": 0.4560085836909871, "grad_norm": 0.453125, "learning_rate": 4.951047928368439e-06, "loss": 2.2311, "step": 8500 }, { "epoch": 0.45606223175965666, "grad_norm": 0.39453125, "learning_rate": 4.951030819003222e-06, "loss": 2.4563, "step": 8501 }, { "epoch": 0.4561158798283262, "grad_norm": 0.361328125, "learning_rate": 4.95101370667813e-06, "loss": 2.0559, "step": 8502 }, { "epoch": 0.4561695278969957, "grad_norm": 0.40625, "learning_rate": 4.9509965913931865e-06, "loss": 1.9675, "step": 8503 }, { "epoch": 0.45622317596566525, "grad_norm": 0.46875, "learning_rate": 4.95097947314841e-06, "loss": 2.1449, "step": 8504 }, { "epoch": 0.4562768240343348, "grad_norm": 0.458984375, "learning_rate": 4.950962351943821e-06, "loss": 2.4614, "step": 8505 }, { "epoch": 0.4563304721030043, "grad_norm": 0.458984375, "learning_rate": 4.95094522777944e-06, "loss": 2.4725, "step": 8506 }, { "epoch": 0.45638412017167385, "grad_norm": 0.3828125, "learning_rate": 4.950928100655289e-06, "loss": 2.4476, "step": 8507 }, { "epoch": 0.4564377682403433, "grad_norm": 0.46484375, "learning_rate": 4.950910970571389e-06, "loss": 2.3364, "step": 8508 }, { "epoch": 0.45649141630901285, "grad_norm": 0.4140625, "learning_rate": 4.950893837527758e-06, "loss": 2.4826, "step": 8509 }, { "epoch": 0.4565450643776824, "grad_norm": 0.33984375, "learning_rate": 4.95087670152442e-06, "loss": 1.8897, "step": 8510 }, { "epoch": 0.4565987124463519, "grad_norm": 0.419921875, "learning_rate": 4.950859562561393e-06, "loss": 2.4421, "step": 8511 }, { "epoch": 0.45665236051502145, "grad_norm": 0.359375, "learning_rate": 4.950842420638699e-06, "loss": 2.134, "step": 8512 }, { "epoch": 0.456706008583691, "grad_norm": 0.396484375, "learning_rate": 4.950825275756359e-06, "loss": 2.1709, "step": 8513 }, { "epoch": 0.4567596566523605, "grad_norm": 0.40625, "learning_rate": 4.950808127914393e-06, "loss": 2.1851, "step": 8514 }, { "epoch": 0.45681330472103004, "grad_norm": 0.453125, "learning_rate": 4.950790977112821e-06, "loss": 2.1584, "step": 8515 }, { "epoch": 0.4568669527896996, "grad_norm": 0.46484375, "learning_rate": 4.950773823351666e-06, "loss": 2.1122, "step": 8516 }, { "epoch": 0.4569206008583691, "grad_norm": 0.412109375, "learning_rate": 4.950756666630947e-06, "loss": 2.126, "step": 8517 }, { "epoch": 0.45697424892703864, "grad_norm": 0.453125, "learning_rate": 4.950739506950685e-06, "loss": 2.3116, "step": 8518 }, { "epoch": 0.45702789699570817, "grad_norm": 0.466796875, "learning_rate": 4.9507223443109005e-06, "loss": 2.265, "step": 8519 }, { "epoch": 0.4570815450643777, "grad_norm": 0.3828125, "learning_rate": 4.950705178711614e-06, "loss": 2.2986, "step": 8520 }, { "epoch": 0.45713519313304724, "grad_norm": 0.41015625, "learning_rate": 4.950688010152847e-06, "loss": 2.4411, "step": 8521 }, { "epoch": 0.4571888412017167, "grad_norm": 0.466796875, "learning_rate": 4.950670838634621e-06, "loss": 2.2193, "step": 8522 }, { "epoch": 0.45724248927038624, "grad_norm": 0.49609375, "learning_rate": 4.9506536641569556e-06, "loss": 2.5148, "step": 8523 }, { "epoch": 0.4572961373390558, "grad_norm": 0.3671875, "learning_rate": 4.9506364867198705e-06, "loss": 2.271, "step": 8524 }, { "epoch": 0.4573497854077253, "grad_norm": 0.640625, "learning_rate": 4.950619306323389e-06, "loss": 2.2295, "step": 8525 }, { "epoch": 0.45740343347639484, "grad_norm": 0.4296875, "learning_rate": 4.95060212296753e-06, "loss": 2.1162, "step": 8526 }, { "epoch": 0.45745708154506437, "grad_norm": 0.46484375, "learning_rate": 4.950584936652314e-06, "loss": 2.2781, "step": 8527 }, { "epoch": 0.4575107296137339, "grad_norm": 0.455078125, "learning_rate": 4.950567747377763e-06, "loss": 2.0694, "step": 8528 }, { "epoch": 0.45756437768240343, "grad_norm": 0.408203125, "learning_rate": 4.9505505551438965e-06, "loss": 2.5125, "step": 8529 }, { "epoch": 0.45761802575107297, "grad_norm": 0.41796875, "learning_rate": 4.950533359950737e-06, "loss": 2.6147, "step": 8530 }, { "epoch": 0.4576716738197425, "grad_norm": 0.90234375, "learning_rate": 4.950516161798303e-06, "loss": 2.3663, "step": 8531 }, { "epoch": 0.45772532188841203, "grad_norm": 0.46875, "learning_rate": 4.9504989606866175e-06, "loss": 1.9967, "step": 8532 }, { "epoch": 0.45777896995708156, "grad_norm": 0.60546875, "learning_rate": 4.9504817566156995e-06, "loss": 2.5631, "step": 8533 }, { "epoch": 0.4578326180257511, "grad_norm": 0.318359375, "learning_rate": 4.950464549585571e-06, "loss": 1.9578, "step": 8534 }, { "epoch": 0.4578862660944206, "grad_norm": 0.427734375, "learning_rate": 4.950447339596253e-06, "loss": 2.3303, "step": 8535 }, { "epoch": 0.45793991416309016, "grad_norm": 0.640625, "learning_rate": 4.950430126647765e-06, "loss": 2.2034, "step": 8536 }, { "epoch": 0.45799356223175963, "grad_norm": 0.51953125, "learning_rate": 4.950412910740129e-06, "loss": 2.3788, "step": 8537 }, { "epoch": 0.45804721030042916, "grad_norm": 0.78125, "learning_rate": 4.950395691873364e-06, "loss": 2.3568, "step": 8538 }, { "epoch": 0.4581008583690987, "grad_norm": 0.4765625, "learning_rate": 4.950378470047492e-06, "loss": 1.9677, "step": 8539 }, { "epoch": 0.4581545064377682, "grad_norm": 0.42578125, "learning_rate": 4.950361245262535e-06, "loss": 2.3207, "step": 8540 }, { "epoch": 0.45820815450643776, "grad_norm": 0.63671875, "learning_rate": 4.950344017518511e-06, "loss": 2.0359, "step": 8541 }, { "epoch": 0.4582618025751073, "grad_norm": 0.353515625, "learning_rate": 4.950326786815444e-06, "loss": 2.0731, "step": 8542 }, { "epoch": 0.4583154506437768, "grad_norm": 0.3671875, "learning_rate": 4.950309553153352e-06, "loss": 2.2823, "step": 8543 }, { "epoch": 0.45836909871244635, "grad_norm": 0.32421875, "learning_rate": 4.950292316532258e-06, "loss": 2.1503, "step": 8544 }, { "epoch": 0.4584227467811159, "grad_norm": 0.392578125, "learning_rate": 4.9502750769521816e-06, "loss": 2.1374, "step": 8545 }, { "epoch": 0.4584763948497854, "grad_norm": 0.408203125, "learning_rate": 4.950257834413143e-06, "loss": 2.0237, "step": 8546 }, { "epoch": 0.45853004291845495, "grad_norm": 0.4921875, "learning_rate": 4.950240588915164e-06, "loss": 2.177, "step": 8547 }, { "epoch": 0.4585836909871245, "grad_norm": 0.5703125, "learning_rate": 4.950223340458265e-06, "loss": 2.5758, "step": 8548 }, { "epoch": 0.458637339055794, "grad_norm": 0.65234375, "learning_rate": 4.9502060890424686e-06, "loss": 2.5664, "step": 8549 }, { "epoch": 0.45869098712446355, "grad_norm": 0.46875, "learning_rate": 4.9501888346677936e-06, "loss": 2.3732, "step": 8550 }, { "epoch": 0.458744635193133, "grad_norm": 0.384765625, "learning_rate": 4.9501715773342606e-06, "loss": 2.226, "step": 8551 }, { "epoch": 0.45879828326180255, "grad_norm": 0.4921875, "learning_rate": 4.950154317041892e-06, "loss": 2.4912, "step": 8552 }, { "epoch": 0.4588519313304721, "grad_norm": 0.416015625, "learning_rate": 4.950137053790708e-06, "loss": 2.3971, "step": 8553 }, { "epoch": 0.4589055793991416, "grad_norm": 0.5, "learning_rate": 4.9501197875807285e-06, "loss": 2.4467, "step": 8554 }, { "epoch": 0.45895922746781115, "grad_norm": 0.388671875, "learning_rate": 4.950102518411976e-06, "loss": 2.3984, "step": 8555 }, { "epoch": 0.4590128755364807, "grad_norm": 1.8046875, "learning_rate": 4.9500852462844696e-06, "loss": 2.402, "step": 8556 }, { "epoch": 0.4590665236051502, "grad_norm": 1.171875, "learning_rate": 4.950067971198231e-06, "loss": 2.0658, "step": 8557 }, { "epoch": 0.45912017167381974, "grad_norm": 0.53125, "learning_rate": 4.9500506931532825e-06, "loss": 2.212, "step": 8558 }, { "epoch": 0.4591738197424893, "grad_norm": 0.37109375, "learning_rate": 4.9500334121496425e-06, "loss": 2.3466, "step": 8559 }, { "epoch": 0.4592274678111588, "grad_norm": 0.447265625, "learning_rate": 4.950016128187333e-06, "loss": 2.6097, "step": 8560 }, { "epoch": 0.45928111587982834, "grad_norm": 0.59375, "learning_rate": 4.949998841266375e-06, "loss": 1.8574, "step": 8561 }, { "epoch": 0.45933476394849787, "grad_norm": 0.37890625, "learning_rate": 4.94998155138679e-06, "loss": 2.0553, "step": 8562 }, { "epoch": 0.4593884120171674, "grad_norm": 0.58203125, "learning_rate": 4.949964258548597e-06, "loss": 2.0493, "step": 8563 }, { "epoch": 0.45944206008583693, "grad_norm": 0.390625, "learning_rate": 4.9499469627518185e-06, "loss": 1.8952, "step": 8564 }, { "epoch": 0.4594957081545064, "grad_norm": 0.36328125, "learning_rate": 4.9499296639964745e-06, "loss": 2.2676, "step": 8565 }, { "epoch": 0.45954935622317594, "grad_norm": 0.43359375, "learning_rate": 4.949912362282586e-06, "loss": 2.3267, "step": 8566 }, { "epoch": 0.4596030042918455, "grad_norm": 0.408203125, "learning_rate": 4.9498950576101754e-06, "loss": 1.4367, "step": 8567 }, { "epoch": 0.459656652360515, "grad_norm": 0.44140625, "learning_rate": 4.949877749979262e-06, "loss": 2.2968, "step": 8568 }, { "epoch": 0.45971030042918454, "grad_norm": 0.3984375, "learning_rate": 4.949860439389866e-06, "loss": 2.3463, "step": 8569 }, { "epoch": 0.45976394849785407, "grad_norm": 0.54296875, "learning_rate": 4.94984312584201e-06, "loss": 2.2992, "step": 8570 }, { "epoch": 0.4598175965665236, "grad_norm": 0.427734375, "learning_rate": 4.949825809335714e-06, "loss": 2.1866, "step": 8571 }, { "epoch": 0.45987124463519313, "grad_norm": 0.392578125, "learning_rate": 4.949808489871e-06, "loss": 2.3001, "step": 8572 }, { "epoch": 0.45992489270386266, "grad_norm": 0.427734375, "learning_rate": 4.949791167447887e-06, "loss": 2.2255, "step": 8573 }, { "epoch": 0.4599785407725322, "grad_norm": 0.41015625, "learning_rate": 4.9497738420663974e-06, "loss": 2.3224, "step": 8574 }, { "epoch": 0.46003218884120173, "grad_norm": 0.4453125, "learning_rate": 4.949756513726552e-06, "loss": 2.3078, "step": 8575 }, { "epoch": 0.46008583690987126, "grad_norm": 0.4140625, "learning_rate": 4.949739182428371e-06, "loss": 2.2121, "step": 8576 }, { "epoch": 0.4601394849785408, "grad_norm": 0.388671875, "learning_rate": 4.949721848171876e-06, "loss": 2.1394, "step": 8577 }, { "epoch": 0.4601931330472103, "grad_norm": 0.40234375, "learning_rate": 4.949704510957089e-06, "loss": 2.3503, "step": 8578 }, { "epoch": 0.46024678111587985, "grad_norm": 0.4296875, "learning_rate": 4.949687170784027e-06, "loss": 1.4092, "step": 8579 }, { "epoch": 0.46030042918454933, "grad_norm": 0.40625, "learning_rate": 4.949669827652716e-06, "loss": 2.2703, "step": 8580 }, { "epoch": 0.46035407725321886, "grad_norm": 0.4296875, "learning_rate": 4.949652481563174e-06, "loss": 2.3553, "step": 8581 }, { "epoch": 0.4604077253218884, "grad_norm": 0.375, "learning_rate": 4.949635132515422e-06, "loss": 2.182, "step": 8582 }, { "epoch": 0.4604613733905579, "grad_norm": 0.44140625, "learning_rate": 4.9496177805094816e-06, "loss": 2.4169, "step": 8583 }, { "epoch": 0.46051502145922746, "grad_norm": 0.51171875, "learning_rate": 4.949600425545373e-06, "loss": 2.6049, "step": 8584 }, { "epoch": 0.460568669527897, "grad_norm": 0.431640625, "learning_rate": 4.949583067623119e-06, "loss": 2.2837, "step": 8585 }, { "epoch": 0.4606223175965665, "grad_norm": 0.42578125, "learning_rate": 4.9495657067427385e-06, "loss": 2.4123, "step": 8586 }, { "epoch": 0.46067596566523605, "grad_norm": 0.44140625, "learning_rate": 4.949548342904253e-06, "loss": 2.4453, "step": 8587 }, { "epoch": 0.4607296137339056, "grad_norm": 0.3984375, "learning_rate": 4.949530976107684e-06, "loss": 2.4262, "step": 8588 }, { "epoch": 0.4607832618025751, "grad_norm": 0.474609375, "learning_rate": 4.949513606353052e-06, "loss": 2.1425, "step": 8589 }, { "epoch": 0.46083690987124465, "grad_norm": 0.4140625, "learning_rate": 4.949496233640378e-06, "loss": 2.418, "step": 8590 }, { "epoch": 0.4608905579399142, "grad_norm": 0.478515625, "learning_rate": 4.949478857969684e-06, "loss": 1.448, "step": 8591 }, { "epoch": 0.4609442060085837, "grad_norm": 0.890625, "learning_rate": 4.94946147934099e-06, "loss": 2.2145, "step": 8592 }, { "epoch": 0.46099785407725324, "grad_norm": 0.39453125, "learning_rate": 4.9494440977543165e-06, "loss": 2.1278, "step": 8593 }, { "epoch": 0.4610515021459227, "grad_norm": 0.396484375, "learning_rate": 4.949426713209687e-06, "loss": 2.3951, "step": 8594 }, { "epoch": 0.46110515021459225, "grad_norm": 0.390625, "learning_rate": 4.949409325707119e-06, "loss": 2.3548, "step": 8595 }, { "epoch": 0.4611587982832618, "grad_norm": 0.490234375, "learning_rate": 4.9493919352466345e-06, "loss": 2.2621, "step": 8596 }, { "epoch": 0.4612124463519313, "grad_norm": 0.4140625, "learning_rate": 4.949374541828255e-06, "loss": 2.2377, "step": 8597 }, { "epoch": 0.46126609442060085, "grad_norm": 0.474609375, "learning_rate": 4.949357145452003e-06, "loss": 2.2283, "step": 8598 }, { "epoch": 0.4613197424892704, "grad_norm": 0.388671875, "learning_rate": 4.949339746117898e-06, "loss": 2.3671, "step": 8599 }, { "epoch": 0.4613733905579399, "grad_norm": 0.55078125, "learning_rate": 4.949322343825961e-06, "loss": 2.2676, "step": 8600 }, { "epoch": 0.46142703862660944, "grad_norm": 0.451171875, "learning_rate": 4.949304938576213e-06, "loss": 2.2437, "step": 8601 }, { "epoch": 0.461480686695279, "grad_norm": 0.39453125, "learning_rate": 4.949287530368675e-06, "loss": 2.267, "step": 8602 }, { "epoch": 0.4615343347639485, "grad_norm": 0.33984375, "learning_rate": 4.949270119203368e-06, "loss": 2.287, "step": 8603 }, { "epoch": 0.46158798283261804, "grad_norm": 0.427734375, "learning_rate": 4.949252705080314e-06, "loss": 2.313, "step": 8604 }, { "epoch": 0.46164163090128757, "grad_norm": 0.443359375, "learning_rate": 4.9492352879995325e-06, "loss": 2.1312, "step": 8605 }, { "epoch": 0.4616952789699571, "grad_norm": 0.40234375, "learning_rate": 4.949217867961046e-06, "loss": 2.317, "step": 8606 }, { "epoch": 0.46174892703862663, "grad_norm": 0.373046875, "learning_rate": 4.949200444964874e-06, "loss": 2.2715, "step": 8607 }, { "epoch": 0.46180257510729616, "grad_norm": 0.408203125, "learning_rate": 4.949183019011039e-06, "loss": 2.2791, "step": 8608 }, { "epoch": 0.46185622317596564, "grad_norm": 0.458984375, "learning_rate": 4.949165590099561e-06, "loss": 2.5321, "step": 8609 }, { "epoch": 0.4619098712446352, "grad_norm": 0.45703125, "learning_rate": 4.949148158230462e-06, "loss": 2.1282, "step": 8610 }, { "epoch": 0.4619635193133047, "grad_norm": 0.353515625, "learning_rate": 4.949130723403762e-06, "loss": 2.0953, "step": 8611 }, { "epoch": 0.46201716738197424, "grad_norm": 0.412109375, "learning_rate": 4.949113285619483e-06, "loss": 2.246, "step": 8612 }, { "epoch": 0.46207081545064377, "grad_norm": 0.376953125, "learning_rate": 4.949095844877645e-06, "loss": 2.078, "step": 8613 }, { "epoch": 0.4621244635193133, "grad_norm": 0.423828125, "learning_rate": 4.94907840117827e-06, "loss": 2.4476, "step": 8614 }, { "epoch": 0.46217811158798283, "grad_norm": 0.455078125, "learning_rate": 4.949060954521379e-06, "loss": 2.3627, "step": 8615 }, { "epoch": 0.46223175965665236, "grad_norm": 0.416015625, "learning_rate": 4.9490435049069925e-06, "loss": 2.2935, "step": 8616 }, { "epoch": 0.4622854077253219, "grad_norm": 0.392578125, "learning_rate": 4.9490260523351316e-06, "loss": 2.4709, "step": 8617 }, { "epoch": 0.4623390557939914, "grad_norm": 0.3671875, "learning_rate": 4.949008596805819e-06, "loss": 2.1085, "step": 8618 }, { "epoch": 0.46239270386266096, "grad_norm": 0.46484375, "learning_rate": 4.9489911383190724e-06, "loss": 2.223, "step": 8619 }, { "epoch": 0.4624463519313305, "grad_norm": 0.77734375, "learning_rate": 4.948973676874917e-06, "loss": 2.4588, "step": 8620 }, { "epoch": 0.4625, "grad_norm": 0.5625, "learning_rate": 4.948956212473371e-06, "loss": 1.9511, "step": 8621 }, { "epoch": 0.46255364806866955, "grad_norm": 0.984375, "learning_rate": 4.948938745114456e-06, "loss": 2.1872, "step": 8622 }, { "epoch": 0.46260729613733903, "grad_norm": 0.4921875, "learning_rate": 4.948921274798193e-06, "loss": 1.1416, "step": 8623 }, { "epoch": 0.46266094420600856, "grad_norm": 0.44140625, "learning_rate": 4.948903801524604e-06, "loss": 2.2898, "step": 8624 }, { "epoch": 0.4627145922746781, "grad_norm": 0.5703125, "learning_rate": 4.948886325293709e-06, "loss": 2.5387, "step": 8625 }, { "epoch": 0.4627682403433476, "grad_norm": 0.48046875, "learning_rate": 4.948868846105531e-06, "loss": 2.1891, "step": 8626 }, { "epoch": 0.46282188841201716, "grad_norm": 0.38671875, "learning_rate": 4.948851363960089e-06, "loss": 2.2678, "step": 8627 }, { "epoch": 0.4628755364806867, "grad_norm": 0.408203125, "learning_rate": 4.948833878857405e-06, "loss": 2.1404, "step": 8628 }, { "epoch": 0.4629291845493562, "grad_norm": 0.5625, "learning_rate": 4.9488163907975e-06, "loss": 1.5915, "step": 8629 }, { "epoch": 0.46298283261802575, "grad_norm": 0.365234375, "learning_rate": 4.948798899780395e-06, "loss": 2.2786, "step": 8630 }, { "epoch": 0.4630364806866953, "grad_norm": 0.4453125, "learning_rate": 4.948781405806111e-06, "loss": 2.444, "step": 8631 }, { "epoch": 0.4630901287553648, "grad_norm": 0.3828125, "learning_rate": 4.948763908874671e-06, "loss": 2.5621, "step": 8632 }, { "epoch": 0.46314377682403435, "grad_norm": 0.69140625, "learning_rate": 4.948746408986092e-06, "loss": 2.2957, "step": 8633 }, { "epoch": 0.4631974248927039, "grad_norm": 0.423828125, "learning_rate": 4.9487289061404e-06, "loss": 1.7891, "step": 8634 }, { "epoch": 0.4632510729613734, "grad_norm": 0.59375, "learning_rate": 4.9487114003376125e-06, "loss": 2.1816, "step": 8635 }, { "epoch": 0.46330472103004294, "grad_norm": 0.462890625, "learning_rate": 4.948693891577752e-06, "loss": 2.473, "step": 8636 }, { "epoch": 0.4633583690987124, "grad_norm": 0.443359375, "learning_rate": 4.948676379860841e-06, "loss": 2.5707, "step": 8637 }, { "epoch": 0.46341201716738195, "grad_norm": 0.404296875, "learning_rate": 4.948658865186897e-06, "loss": 2.4099, "step": 8638 }, { "epoch": 0.4634656652360515, "grad_norm": 0.60546875, "learning_rate": 4.948641347555945e-06, "loss": 2.3977, "step": 8639 }, { "epoch": 0.463519313304721, "grad_norm": 0.439453125, "learning_rate": 4.948623826968004e-06, "loss": 2.2429, "step": 8640 }, { "epoch": 0.46357296137339055, "grad_norm": 0.56640625, "learning_rate": 4.9486063034230955e-06, "loss": 2.3169, "step": 8641 }, { "epoch": 0.4636266094420601, "grad_norm": 0.427734375, "learning_rate": 4.948588776921241e-06, "loss": 2.155, "step": 8642 }, { "epoch": 0.4636802575107296, "grad_norm": 0.412109375, "learning_rate": 4.948571247462461e-06, "loss": 2.2483, "step": 8643 }, { "epoch": 0.46373390557939914, "grad_norm": 0.408203125, "learning_rate": 4.9485537150467785e-06, "loss": 2.308, "step": 8644 }, { "epoch": 0.4637875536480687, "grad_norm": 0.404296875, "learning_rate": 4.948536179674213e-06, "loss": 2.095, "step": 8645 }, { "epoch": 0.4638412017167382, "grad_norm": 0.359375, "learning_rate": 4.948518641344785e-06, "loss": 2.078, "step": 8646 }, { "epoch": 0.46389484978540774, "grad_norm": 0.416015625, "learning_rate": 4.948501100058517e-06, "loss": 2.2442, "step": 8647 }, { "epoch": 0.46394849785407727, "grad_norm": 0.4140625, "learning_rate": 4.94848355581543e-06, "loss": 2.3839, "step": 8648 }, { "epoch": 0.4640021459227468, "grad_norm": 0.44140625, "learning_rate": 4.948466008615545e-06, "loss": 2.3316, "step": 8649 }, { "epoch": 0.46405579399141633, "grad_norm": 0.416015625, "learning_rate": 4.9484484584588845e-06, "loss": 2.5899, "step": 8650 }, { "epoch": 0.46410944206008586, "grad_norm": 0.412109375, "learning_rate": 4.948430905345467e-06, "loss": 2.4367, "step": 8651 }, { "epoch": 0.46416309012875534, "grad_norm": 0.37109375, "learning_rate": 4.948413349275316e-06, "loss": 2.1618, "step": 8652 }, { "epoch": 0.46421673819742487, "grad_norm": 0.419921875, "learning_rate": 4.948395790248452e-06, "loss": 2.3019, "step": 8653 }, { "epoch": 0.4642703862660944, "grad_norm": 0.419921875, "learning_rate": 4.948378228264895e-06, "loss": 2.2997, "step": 8654 }, { "epoch": 0.46432403433476394, "grad_norm": 0.4609375, "learning_rate": 4.948360663324669e-06, "loss": 2.4475, "step": 8655 }, { "epoch": 0.46437768240343347, "grad_norm": 0.45703125, "learning_rate": 4.948343095427791e-06, "loss": 2.4009, "step": 8656 }, { "epoch": 0.464431330472103, "grad_norm": 0.390625, "learning_rate": 4.948325524574287e-06, "loss": 2.306, "step": 8657 }, { "epoch": 0.46448497854077253, "grad_norm": 0.451171875, "learning_rate": 4.948307950764175e-06, "loss": 2.4364, "step": 8658 }, { "epoch": 0.46453862660944206, "grad_norm": 0.44140625, "learning_rate": 4.948290373997477e-06, "loss": 2.462, "step": 8659 }, { "epoch": 0.4645922746781116, "grad_norm": 0.546875, "learning_rate": 4.948272794274215e-06, "loss": 2.2513, "step": 8660 }, { "epoch": 0.4646459227467811, "grad_norm": 0.396484375, "learning_rate": 4.948255211594408e-06, "loss": 2.3605, "step": 8661 }, { "epoch": 0.46469957081545066, "grad_norm": 0.4453125, "learning_rate": 4.9482376259580806e-06, "loss": 2.2251, "step": 8662 }, { "epoch": 0.4647532188841202, "grad_norm": 0.515625, "learning_rate": 4.948220037365252e-06, "loss": 2.2427, "step": 8663 }, { "epoch": 0.4648068669527897, "grad_norm": 0.4140625, "learning_rate": 4.948202445815942e-06, "loss": 2.3291, "step": 8664 }, { "epoch": 0.46486051502145925, "grad_norm": 0.458984375, "learning_rate": 4.948184851310176e-06, "loss": 2.4302, "step": 8665 }, { "epoch": 0.46491416309012873, "grad_norm": 3.15625, "learning_rate": 4.948167253847972e-06, "loss": 2.4069, "step": 8666 }, { "epoch": 0.46496781115879826, "grad_norm": 0.46484375, "learning_rate": 4.948149653429351e-06, "loss": 2.1973, "step": 8667 }, { "epoch": 0.4650214592274678, "grad_norm": 0.33984375, "learning_rate": 4.9481320500543364e-06, "loss": 2.253, "step": 8668 }, { "epoch": 0.4650751072961373, "grad_norm": 0.359375, "learning_rate": 4.948114443722948e-06, "loss": 2.2617, "step": 8669 }, { "epoch": 0.46512875536480686, "grad_norm": 0.408203125, "learning_rate": 4.9480968344352075e-06, "loss": 2.1816, "step": 8670 }, { "epoch": 0.4651824034334764, "grad_norm": 0.357421875, "learning_rate": 4.948079222191136e-06, "loss": 2.1321, "step": 8671 }, { "epoch": 0.4652360515021459, "grad_norm": 0.49609375, "learning_rate": 4.948061606990755e-06, "loss": 2.1525, "step": 8672 }, { "epoch": 0.46528969957081545, "grad_norm": 0.58984375, "learning_rate": 4.948043988834085e-06, "loss": 2.0968, "step": 8673 }, { "epoch": 0.465343347639485, "grad_norm": 0.44140625, "learning_rate": 4.948026367721149e-06, "loss": 1.4518, "step": 8674 }, { "epoch": 0.4653969957081545, "grad_norm": 0.4296875, "learning_rate": 4.948008743651966e-06, "loss": 2.214, "step": 8675 }, { "epoch": 0.46545064377682405, "grad_norm": 0.4453125, "learning_rate": 4.947991116626559e-06, "loss": 2.452, "step": 8676 }, { "epoch": 0.4655042918454936, "grad_norm": 0.423828125, "learning_rate": 4.947973486644949e-06, "loss": 2.1661, "step": 8677 }, { "epoch": 0.4655579399141631, "grad_norm": 0.4453125, "learning_rate": 4.947955853707157e-06, "loss": 2.3545, "step": 8678 }, { "epoch": 0.46561158798283264, "grad_norm": 0.60546875, "learning_rate": 4.947938217813204e-06, "loss": 2.5084, "step": 8679 }, { "epoch": 0.4656652360515021, "grad_norm": 0.416015625, "learning_rate": 4.9479205789631125e-06, "loss": 2.2617, "step": 8680 }, { "epoch": 0.46571888412017165, "grad_norm": 0.42578125, "learning_rate": 4.947902937156902e-06, "loss": 2.4082, "step": 8681 }, { "epoch": 0.4657725321888412, "grad_norm": 0.8984375, "learning_rate": 4.9478852923945955e-06, "loss": 1.3799, "step": 8682 }, { "epoch": 0.4658261802575107, "grad_norm": 0.392578125, "learning_rate": 4.947867644676212e-06, "loss": 2.4117, "step": 8683 }, { "epoch": 0.46587982832618025, "grad_norm": 2.1875, "learning_rate": 4.947849994001776e-06, "loss": 1.9464, "step": 8684 }, { "epoch": 0.4659334763948498, "grad_norm": 0.4375, "learning_rate": 4.9478323403713065e-06, "loss": 2.1342, "step": 8685 }, { "epoch": 0.4659871244635193, "grad_norm": 0.486328125, "learning_rate": 4.9478146837848255e-06, "loss": 2.6123, "step": 8686 }, { "epoch": 0.46604077253218884, "grad_norm": 0.443359375, "learning_rate": 4.9477970242423544e-06, "loss": 1.4282, "step": 8687 }, { "epoch": 0.4660944206008584, "grad_norm": 0.47265625, "learning_rate": 4.947779361743915e-06, "loss": 2.2794, "step": 8688 }, { "epoch": 0.4661480686695279, "grad_norm": 0.69140625, "learning_rate": 4.947761696289527e-06, "loss": 2.4243, "step": 8689 }, { "epoch": 0.46620171673819744, "grad_norm": 0.466796875, "learning_rate": 4.947744027879213e-06, "loss": 2.5332, "step": 8690 }, { "epoch": 0.46625536480686697, "grad_norm": 1.3203125, "learning_rate": 4.947726356512995e-06, "loss": 2.2224, "step": 8691 }, { "epoch": 0.4663090128755365, "grad_norm": 0.388671875, "learning_rate": 4.9477086821908924e-06, "loss": 2.1382, "step": 8692 }, { "epoch": 0.46636266094420603, "grad_norm": 0.4375, "learning_rate": 4.947691004912929e-06, "loss": 2.2898, "step": 8693 }, { "epoch": 0.46641630901287556, "grad_norm": 0.4375, "learning_rate": 4.947673324679123e-06, "loss": 2.3597, "step": 8694 }, { "epoch": 0.46646995708154504, "grad_norm": 0.390625, "learning_rate": 4.947655641489499e-06, "loss": 2.3562, "step": 8695 }, { "epoch": 0.46652360515021457, "grad_norm": 0.404296875, "learning_rate": 4.947637955344076e-06, "loss": 2.3583, "step": 8696 }, { "epoch": 0.4665772532188841, "grad_norm": 0.38671875, "learning_rate": 4.947620266242877e-06, "loss": 2.055, "step": 8697 }, { "epoch": 0.46663090128755363, "grad_norm": 0.32421875, "learning_rate": 4.947602574185922e-06, "loss": 2.1712, "step": 8698 }, { "epoch": 0.46668454935622317, "grad_norm": 0.392578125, "learning_rate": 4.947584879173233e-06, "loss": 2.0843, "step": 8699 }, { "epoch": 0.4667381974248927, "grad_norm": 0.466796875, "learning_rate": 4.947567181204831e-06, "loss": 2.6011, "step": 8700 }, { "epoch": 0.46679184549356223, "grad_norm": 0.388671875, "learning_rate": 4.947549480280739e-06, "loss": 2.1461, "step": 8701 }, { "epoch": 0.46684549356223176, "grad_norm": 0.38671875, "learning_rate": 4.947531776400976e-06, "loss": 2.2179, "step": 8702 }, { "epoch": 0.4668991416309013, "grad_norm": 0.45703125, "learning_rate": 4.9475140695655645e-06, "loss": 2.3858, "step": 8703 }, { "epoch": 0.4669527896995708, "grad_norm": 1.125, "learning_rate": 4.947496359774527e-06, "loss": 2.101, "step": 8704 }, { "epoch": 0.46700643776824036, "grad_norm": 0.4140625, "learning_rate": 4.9474786470278816e-06, "loss": 2.2892, "step": 8705 }, { "epoch": 0.4670600858369099, "grad_norm": 0.423828125, "learning_rate": 4.947460931325653e-06, "loss": 2.3345, "step": 8706 }, { "epoch": 0.4671137339055794, "grad_norm": 0.7421875, "learning_rate": 4.947443212667861e-06, "loss": 2.4907, "step": 8707 }, { "epoch": 0.46716738197424895, "grad_norm": 0.53515625, "learning_rate": 4.947425491054528e-06, "loss": 2.0454, "step": 8708 }, { "epoch": 0.46722103004291843, "grad_norm": 0.412109375, "learning_rate": 4.947407766485674e-06, "loss": 2.459, "step": 8709 }, { "epoch": 0.46727467811158796, "grad_norm": 0.4140625, "learning_rate": 4.947390038961322e-06, "loss": 2.4538, "step": 8710 }, { "epoch": 0.4673283261802575, "grad_norm": 0.392578125, "learning_rate": 4.947372308481493e-06, "loss": 2.118, "step": 8711 }, { "epoch": 0.467381974248927, "grad_norm": 0.466796875, "learning_rate": 4.947354575046207e-06, "loss": 2.1973, "step": 8712 }, { "epoch": 0.46743562231759656, "grad_norm": 0.435546875, "learning_rate": 4.947336838655487e-06, "loss": 2.2637, "step": 8713 }, { "epoch": 0.4674892703862661, "grad_norm": 0.375, "learning_rate": 4.947319099309353e-06, "loss": 2.209, "step": 8714 }, { "epoch": 0.4675429184549356, "grad_norm": 0.384765625, "learning_rate": 4.947301357007829e-06, "loss": 2.2485, "step": 8715 }, { "epoch": 0.46759656652360515, "grad_norm": 0.404296875, "learning_rate": 4.947283611750933e-06, "loss": 2.1262, "step": 8716 }, { "epoch": 0.4676502145922747, "grad_norm": 0.421875, "learning_rate": 4.94726586353869e-06, "loss": 2.0217, "step": 8717 }, { "epoch": 0.4677038626609442, "grad_norm": 0.482421875, "learning_rate": 4.947248112371117e-06, "loss": 2.11, "step": 8718 }, { "epoch": 0.46775751072961375, "grad_norm": 0.43359375, "learning_rate": 4.94723035824824e-06, "loss": 2.3465, "step": 8719 }, { "epoch": 0.4678111587982833, "grad_norm": 0.4453125, "learning_rate": 4.9472126011700776e-06, "loss": 2.2171, "step": 8720 }, { "epoch": 0.4678648068669528, "grad_norm": 0.54296875, "learning_rate": 4.947194841136653e-06, "loss": 2.3893, "step": 8721 }, { "epoch": 0.46791845493562234, "grad_norm": 0.369140625, "learning_rate": 4.947177078147986e-06, "loss": 2.4491, "step": 8722 }, { "epoch": 0.4679721030042919, "grad_norm": 0.427734375, "learning_rate": 4.947159312204098e-06, "loss": 2.2903, "step": 8723 }, { "epoch": 0.46802575107296135, "grad_norm": 0.40625, "learning_rate": 4.947141543305013e-06, "loss": 2.0731, "step": 8724 }, { "epoch": 0.4680793991416309, "grad_norm": 0.36328125, "learning_rate": 4.94712377145075e-06, "loss": 2.2971, "step": 8725 }, { "epoch": 0.4681330472103004, "grad_norm": 0.439453125, "learning_rate": 4.947105996641331e-06, "loss": 2.2539, "step": 8726 }, { "epoch": 0.46818669527896994, "grad_norm": 0.373046875, "learning_rate": 4.947088218876777e-06, "loss": 1.9796, "step": 8727 }, { "epoch": 0.4682403433476395, "grad_norm": 0.37890625, "learning_rate": 4.947070438157111e-06, "loss": 2.4644, "step": 8728 }, { "epoch": 0.468293991416309, "grad_norm": 0.32421875, "learning_rate": 4.947052654482353e-06, "loss": 1.7644, "step": 8729 }, { "epoch": 0.46834763948497854, "grad_norm": 0.52734375, "learning_rate": 4.947034867852526e-06, "loss": 2.4002, "step": 8730 }, { "epoch": 0.46840128755364807, "grad_norm": 0.494140625, "learning_rate": 4.9470170782676494e-06, "loss": 1.8351, "step": 8731 }, { "epoch": 0.4684549356223176, "grad_norm": 0.4453125, "learning_rate": 4.946999285727746e-06, "loss": 1.9923, "step": 8732 }, { "epoch": 0.46850858369098713, "grad_norm": 0.482421875, "learning_rate": 4.946981490232837e-06, "loss": 2.0306, "step": 8733 }, { "epoch": 0.46856223175965667, "grad_norm": 0.4453125, "learning_rate": 4.946963691782945e-06, "loss": 2.0157, "step": 8734 }, { "epoch": 0.4686158798283262, "grad_norm": 0.359375, "learning_rate": 4.9469458903780895e-06, "loss": 1.9812, "step": 8735 }, { "epoch": 0.46866952789699573, "grad_norm": 0.4140625, "learning_rate": 4.946928086018293e-06, "loss": 2.3325, "step": 8736 }, { "epoch": 0.46872317596566526, "grad_norm": 0.6015625, "learning_rate": 4.946910278703578e-06, "loss": 2.199, "step": 8737 }, { "epoch": 0.46877682403433474, "grad_norm": 0.462890625, "learning_rate": 4.946892468433964e-06, "loss": 2.4099, "step": 8738 }, { "epoch": 0.46883047210300427, "grad_norm": 0.478515625, "learning_rate": 4.946874655209474e-06, "loss": 2.2578, "step": 8739 }, { "epoch": 0.4688841201716738, "grad_norm": 0.431640625, "learning_rate": 4.946856839030129e-06, "loss": 2.4344, "step": 8740 }, { "epoch": 0.46893776824034333, "grad_norm": 0.474609375, "learning_rate": 4.94683901989595e-06, "loss": 2.3279, "step": 8741 }, { "epoch": 0.46899141630901287, "grad_norm": 0.54296875, "learning_rate": 4.946821197806959e-06, "loss": 2.309, "step": 8742 }, { "epoch": 0.4690450643776824, "grad_norm": 0.3828125, "learning_rate": 4.946803372763178e-06, "loss": 2.1901, "step": 8743 }, { "epoch": 0.46909871244635193, "grad_norm": 0.37109375, "learning_rate": 4.9467855447646275e-06, "loss": 2.3878, "step": 8744 }, { "epoch": 0.46915236051502146, "grad_norm": 0.353515625, "learning_rate": 4.94676771381133e-06, "loss": 2.1814, "step": 8745 }, { "epoch": 0.469206008583691, "grad_norm": 0.42578125, "learning_rate": 4.946749879903306e-06, "loss": 2.2824, "step": 8746 }, { "epoch": 0.4692596566523605, "grad_norm": 0.43359375, "learning_rate": 4.946732043040579e-06, "loss": 2.421, "step": 8747 }, { "epoch": 0.46931330472103006, "grad_norm": 0.54296875, "learning_rate": 4.946714203223168e-06, "loss": 2.7437, "step": 8748 }, { "epoch": 0.4693669527896996, "grad_norm": 0.45703125, "learning_rate": 4.946696360451097e-06, "loss": 2.0539, "step": 8749 }, { "epoch": 0.4694206008583691, "grad_norm": 0.3671875, "learning_rate": 4.946678514724385e-06, "loss": 2.029, "step": 8750 }, { "epoch": 0.46947424892703865, "grad_norm": 0.43359375, "learning_rate": 4.946660666043056e-06, "loss": 2.5437, "step": 8751 }, { "epoch": 0.4695278969957081, "grad_norm": 0.64453125, "learning_rate": 4.94664281440713e-06, "loss": 2.2276, "step": 8752 }, { "epoch": 0.46958154506437766, "grad_norm": 0.412109375, "learning_rate": 4.9466249598166285e-06, "loss": 2.3015, "step": 8753 }, { "epoch": 0.4696351931330472, "grad_norm": 0.3984375, "learning_rate": 4.946607102271574e-06, "loss": 2.5112, "step": 8754 }, { "epoch": 0.4696888412017167, "grad_norm": 0.51171875, "learning_rate": 4.946589241771987e-06, "loss": 2.4474, "step": 8755 }, { "epoch": 0.46974248927038625, "grad_norm": 0.490234375, "learning_rate": 4.94657137831789e-06, "loss": 2.1784, "step": 8756 }, { "epoch": 0.4697961373390558, "grad_norm": 0.68359375, "learning_rate": 4.946553511909305e-06, "loss": 2.2995, "step": 8757 }, { "epoch": 0.4698497854077253, "grad_norm": 0.396484375, "learning_rate": 4.946535642546252e-06, "loss": 2.3782, "step": 8758 }, { "epoch": 0.46990343347639485, "grad_norm": 0.380859375, "learning_rate": 4.946517770228754e-06, "loss": 2.3339, "step": 8759 }, { "epoch": 0.4699570815450644, "grad_norm": 0.431640625, "learning_rate": 4.946499894956831e-06, "loss": 2.2643, "step": 8760 }, { "epoch": 0.4700107296137339, "grad_norm": 0.458984375, "learning_rate": 4.946482016730506e-06, "loss": 2.4007, "step": 8761 }, { "epoch": 0.47006437768240344, "grad_norm": 0.54296875, "learning_rate": 4.946464135549801e-06, "loss": 1.6152, "step": 8762 }, { "epoch": 0.470118025751073, "grad_norm": 1.8984375, "learning_rate": 4.946446251414735e-06, "loss": 2.4195, "step": 8763 }, { "epoch": 0.4701716738197425, "grad_norm": 0.52734375, "learning_rate": 4.946428364325333e-06, "loss": 1.2953, "step": 8764 }, { "epoch": 0.47022532188841204, "grad_norm": 0.439453125, "learning_rate": 4.946410474281614e-06, "loss": 2.1249, "step": 8765 }, { "epoch": 0.47027896995708157, "grad_norm": 0.421875, "learning_rate": 4.946392581283601e-06, "loss": 2.2753, "step": 8766 }, { "epoch": 0.47033261802575105, "grad_norm": 0.478515625, "learning_rate": 4.9463746853313146e-06, "loss": 2.2803, "step": 8767 }, { "epoch": 0.4703862660944206, "grad_norm": 0.58984375, "learning_rate": 4.946356786424777e-06, "loss": 2.3123, "step": 8768 }, { "epoch": 0.4704399141630901, "grad_norm": 0.54296875, "learning_rate": 4.94633888456401e-06, "loss": 2.2969, "step": 8769 }, { "epoch": 0.47049356223175964, "grad_norm": 0.365234375, "learning_rate": 4.946320979749035e-06, "loss": 2.345, "step": 8770 }, { "epoch": 0.4705472103004292, "grad_norm": 0.77734375, "learning_rate": 4.946303071979874e-06, "loss": 1.6842, "step": 8771 }, { "epoch": 0.4706008583690987, "grad_norm": 0.380859375, "learning_rate": 4.946285161256548e-06, "loss": 2.0171, "step": 8772 }, { "epoch": 0.47065450643776824, "grad_norm": 0.451171875, "learning_rate": 4.946267247579079e-06, "loss": 2.289, "step": 8773 }, { "epoch": 0.47070815450643777, "grad_norm": 0.421875, "learning_rate": 4.946249330947489e-06, "loss": 2.3974, "step": 8774 }, { "epoch": 0.4707618025751073, "grad_norm": 0.4375, "learning_rate": 4.946231411361798e-06, "loss": 2.2441, "step": 8775 }, { "epoch": 0.47081545064377683, "grad_norm": 0.58984375, "learning_rate": 4.946213488822029e-06, "loss": 2.1352, "step": 8776 }, { "epoch": 0.47086909871244637, "grad_norm": 0.4296875, "learning_rate": 4.9461955633282035e-06, "loss": 2.0421, "step": 8777 }, { "epoch": 0.4709227467811159, "grad_norm": 0.466796875, "learning_rate": 4.9461776348803435e-06, "loss": 2.4609, "step": 8778 }, { "epoch": 0.47097639484978543, "grad_norm": 0.75390625, "learning_rate": 4.94615970347847e-06, "loss": 2.5532, "step": 8779 }, { "epoch": 0.47103004291845496, "grad_norm": 0.416015625, "learning_rate": 4.946141769122604e-06, "loss": 2.3591, "step": 8780 }, { "epoch": 0.47108369098712444, "grad_norm": 0.423828125, "learning_rate": 4.946123831812769e-06, "loss": 2.3898, "step": 8781 }, { "epoch": 0.47113733905579397, "grad_norm": 0.4609375, "learning_rate": 4.946105891548986e-06, "loss": 2.3494, "step": 8782 }, { "epoch": 0.4711909871244635, "grad_norm": 0.3828125, "learning_rate": 4.946087948331276e-06, "loss": 2.2242, "step": 8783 }, { "epoch": 0.47124463519313303, "grad_norm": 0.416015625, "learning_rate": 4.946070002159661e-06, "loss": 2.2314, "step": 8784 }, { "epoch": 0.47129828326180256, "grad_norm": 0.392578125, "learning_rate": 4.946052053034163e-06, "loss": 2.3206, "step": 8785 }, { "epoch": 0.4713519313304721, "grad_norm": 0.42578125, "learning_rate": 4.946034100954803e-06, "loss": 1.8699, "step": 8786 }, { "epoch": 0.4714055793991416, "grad_norm": 0.482421875, "learning_rate": 4.9460161459216025e-06, "loss": 2.1504, "step": 8787 }, { "epoch": 0.47145922746781116, "grad_norm": 0.369140625, "learning_rate": 4.945998187934584e-06, "loss": 2.0733, "step": 8788 }, { "epoch": 0.4715128755364807, "grad_norm": 0.4921875, "learning_rate": 4.945980226993769e-06, "loss": 2.3009, "step": 8789 }, { "epoch": 0.4715665236051502, "grad_norm": 0.46875, "learning_rate": 4.94596226309918e-06, "loss": 2.3351, "step": 8790 }, { "epoch": 0.47162017167381975, "grad_norm": 0.462890625, "learning_rate": 4.945944296250838e-06, "loss": 2.0664, "step": 8791 }, { "epoch": 0.4716738197424893, "grad_norm": 0.42578125, "learning_rate": 4.945926326448763e-06, "loss": 2.0924, "step": 8792 }, { "epoch": 0.4717274678111588, "grad_norm": 0.443359375, "learning_rate": 4.9459083536929785e-06, "loss": 2.2773, "step": 8793 }, { "epoch": 0.47178111587982835, "grad_norm": 0.400390625, "learning_rate": 4.9458903779835066e-06, "loss": 2.2111, "step": 8794 }, { "epoch": 0.4718347639484979, "grad_norm": 0.447265625, "learning_rate": 4.945872399320368e-06, "loss": 2.3542, "step": 8795 }, { "epoch": 0.47188841201716736, "grad_norm": 0.486328125, "learning_rate": 4.945854417703584e-06, "loss": 2.404, "step": 8796 }, { "epoch": 0.4719420600858369, "grad_norm": 0.44921875, "learning_rate": 4.945836433133178e-06, "loss": 2.0879, "step": 8797 }, { "epoch": 0.4719957081545064, "grad_norm": 0.43359375, "learning_rate": 4.945818445609171e-06, "loss": 2.3505, "step": 8798 }, { "epoch": 0.47204935622317595, "grad_norm": 0.443359375, "learning_rate": 4.945800455131584e-06, "loss": 2.4325, "step": 8799 }, { "epoch": 0.4721030042918455, "grad_norm": 0.419921875, "learning_rate": 4.945782461700439e-06, "loss": 2.4049, "step": 8800 }, { "epoch": 0.472156652360515, "grad_norm": 0.326171875, "learning_rate": 4.945764465315758e-06, "loss": 2.1433, "step": 8801 }, { "epoch": 0.47221030042918455, "grad_norm": 0.455078125, "learning_rate": 4.945746465977563e-06, "loss": 2.3824, "step": 8802 }, { "epoch": 0.4722639484978541, "grad_norm": 0.474609375, "learning_rate": 4.945728463685875e-06, "loss": 2.3215, "step": 8803 }, { "epoch": 0.4723175965665236, "grad_norm": 0.470703125, "learning_rate": 4.945710458440716e-06, "loss": 2.1245, "step": 8804 }, { "epoch": 0.47237124463519314, "grad_norm": 0.439453125, "learning_rate": 4.945692450242108e-06, "loss": 2.2724, "step": 8805 }, { "epoch": 0.4724248927038627, "grad_norm": 0.44921875, "learning_rate": 4.945674439090073e-06, "loss": 2.1576, "step": 8806 }, { "epoch": 0.4724785407725322, "grad_norm": 2.703125, "learning_rate": 4.945656424984632e-06, "loss": 2.4053, "step": 8807 }, { "epoch": 0.47253218884120174, "grad_norm": 0.35546875, "learning_rate": 4.945638407925808e-06, "loss": 1.9733, "step": 8808 }, { "epoch": 0.47258583690987127, "grad_norm": 0.369140625, "learning_rate": 4.945620387913621e-06, "loss": 2.0461, "step": 8809 }, { "epoch": 0.47263948497854075, "grad_norm": 0.490234375, "learning_rate": 4.9456023649480935e-06, "loss": 2.4509, "step": 8810 }, { "epoch": 0.4726931330472103, "grad_norm": 1.0078125, "learning_rate": 4.945584339029248e-06, "loss": 2.4728, "step": 8811 }, { "epoch": 0.4727467811158798, "grad_norm": 0.71484375, "learning_rate": 4.945566310157105e-06, "loss": 2.3511, "step": 8812 }, { "epoch": 0.47280042918454934, "grad_norm": 0.470703125, "learning_rate": 4.9455482783316874e-06, "loss": 2.0818, "step": 8813 }, { "epoch": 0.4728540772532189, "grad_norm": 0.431640625, "learning_rate": 4.945530243553017e-06, "loss": 2.0493, "step": 8814 }, { "epoch": 0.4729077253218884, "grad_norm": 0.390625, "learning_rate": 4.945512205821115e-06, "loss": 2.5921, "step": 8815 }, { "epoch": 0.47296137339055794, "grad_norm": 0.43359375, "learning_rate": 4.945494165136002e-06, "loss": 2.4022, "step": 8816 }, { "epoch": 0.47301502145922747, "grad_norm": 0.404296875, "learning_rate": 4.9454761214977034e-06, "loss": 2.4339, "step": 8817 }, { "epoch": 0.473068669527897, "grad_norm": 0.400390625, "learning_rate": 4.945458074906237e-06, "loss": 2.4886, "step": 8818 }, { "epoch": 0.47312231759656653, "grad_norm": 0.380859375, "learning_rate": 4.945440025361626e-06, "loss": 2.2798, "step": 8819 }, { "epoch": 0.47317596566523606, "grad_norm": 0.451171875, "learning_rate": 4.945421972863893e-06, "loss": 2.327, "step": 8820 }, { "epoch": 0.4732296137339056, "grad_norm": 0.447265625, "learning_rate": 4.94540391741306e-06, "loss": 2.6489, "step": 8821 }, { "epoch": 0.47328326180257513, "grad_norm": 0.474609375, "learning_rate": 4.945385859009148e-06, "loss": 2.0168, "step": 8822 }, { "epoch": 0.47333690987124466, "grad_norm": 0.390625, "learning_rate": 4.945367797652177e-06, "loss": 2.0701, "step": 8823 }, { "epoch": 0.47339055793991414, "grad_norm": 0.462890625, "learning_rate": 4.945349733342172e-06, "loss": 2.5112, "step": 8824 }, { "epoch": 0.47344420600858367, "grad_norm": 0.365234375, "learning_rate": 4.945331666079154e-06, "loss": 2.0349, "step": 8825 }, { "epoch": 0.4734978540772532, "grad_norm": 0.44921875, "learning_rate": 4.945313595863144e-06, "loss": 2.2986, "step": 8826 }, { "epoch": 0.47355150214592273, "grad_norm": 0.373046875, "learning_rate": 4.945295522694163e-06, "loss": 2.2047, "step": 8827 }, { "epoch": 0.47360515021459226, "grad_norm": 0.388671875, "learning_rate": 4.945277446572236e-06, "loss": 2.3384, "step": 8828 }, { "epoch": 0.4736587982832618, "grad_norm": 0.5703125, "learning_rate": 4.945259367497381e-06, "loss": 2.5267, "step": 8829 }, { "epoch": 0.4737124463519313, "grad_norm": 0.455078125, "learning_rate": 4.945241285469622e-06, "loss": 2.1846, "step": 8830 }, { "epoch": 0.47376609442060086, "grad_norm": 0.91796875, "learning_rate": 4.945223200488981e-06, "loss": 2.6102, "step": 8831 }, { "epoch": 0.4738197424892704, "grad_norm": 0.474609375, "learning_rate": 4.945205112555479e-06, "loss": 2.2499, "step": 8832 }, { "epoch": 0.4738733905579399, "grad_norm": 0.38671875, "learning_rate": 4.945187021669138e-06, "loss": 2.1638, "step": 8833 }, { "epoch": 0.47392703862660945, "grad_norm": 0.447265625, "learning_rate": 4.945168927829981e-06, "loss": 2.3478, "step": 8834 }, { "epoch": 0.473980686695279, "grad_norm": 0.423828125, "learning_rate": 4.945150831038027e-06, "loss": 2.2133, "step": 8835 }, { "epoch": 0.4740343347639485, "grad_norm": 0.392578125, "learning_rate": 4.945132731293301e-06, "loss": 2.2084, "step": 8836 }, { "epoch": 0.47408798283261805, "grad_norm": 1.75, "learning_rate": 4.945114628595824e-06, "loss": 2.4991, "step": 8837 }, { "epoch": 0.4741416309012876, "grad_norm": 0.44921875, "learning_rate": 4.945096522945616e-06, "loss": 2.4052, "step": 8838 }, { "epoch": 0.47419527896995706, "grad_norm": 0.43359375, "learning_rate": 4.945078414342701e-06, "loss": 2.5375, "step": 8839 }, { "epoch": 0.4742489270386266, "grad_norm": 0.43359375, "learning_rate": 4.9450603027871e-06, "loss": 2.2988, "step": 8840 }, { "epoch": 0.4743025751072961, "grad_norm": 0.48828125, "learning_rate": 4.945042188278835e-06, "loss": 2.272, "step": 8841 }, { "epoch": 0.47435622317596565, "grad_norm": 0.4609375, "learning_rate": 4.945024070817928e-06, "loss": 2.3495, "step": 8842 }, { "epoch": 0.4744098712446352, "grad_norm": 0.376953125, "learning_rate": 4.945005950404401e-06, "loss": 2.2293, "step": 8843 }, { "epoch": 0.4744635193133047, "grad_norm": 0.4140625, "learning_rate": 4.944987827038276e-06, "loss": 2.3616, "step": 8844 }, { "epoch": 0.47451716738197425, "grad_norm": 0.439453125, "learning_rate": 4.944969700719573e-06, "loss": 1.6806, "step": 8845 }, { "epoch": 0.4745708154506438, "grad_norm": 0.447265625, "learning_rate": 4.944951571448317e-06, "loss": 2.1221, "step": 8846 }, { "epoch": 0.4746244635193133, "grad_norm": 0.3984375, "learning_rate": 4.944933439224527e-06, "loss": 2.3043, "step": 8847 }, { "epoch": 0.47467811158798284, "grad_norm": 0.51953125, "learning_rate": 4.944915304048227e-06, "loss": 2.4759, "step": 8848 }, { "epoch": 0.4747317596566524, "grad_norm": 0.474609375, "learning_rate": 4.944897165919439e-06, "loss": 2.316, "step": 8849 }, { "epoch": 0.4747854077253219, "grad_norm": 0.3515625, "learning_rate": 4.944879024838182e-06, "loss": 2.264, "step": 8850 }, { "epoch": 0.47483905579399144, "grad_norm": 0.4140625, "learning_rate": 4.944860880804481e-06, "loss": 2.3249, "step": 8851 }, { "epoch": 0.47489270386266097, "grad_norm": 0.4609375, "learning_rate": 4.944842733818357e-06, "loss": 2.123, "step": 8852 }, { "epoch": 0.47494635193133045, "grad_norm": 0.427734375, "learning_rate": 4.944824583879831e-06, "loss": 2.41, "step": 8853 }, { "epoch": 0.475, "grad_norm": 0.6484375, "learning_rate": 4.944806430988927e-06, "loss": 1.0647, "step": 8854 }, { "epoch": 0.4750536480686695, "grad_norm": 0.51171875, "learning_rate": 4.944788275145665e-06, "loss": 2.4199, "step": 8855 }, { "epoch": 0.47510729613733904, "grad_norm": 0.427734375, "learning_rate": 4.944770116350067e-06, "loss": 2.1947, "step": 8856 }, { "epoch": 0.4751609442060086, "grad_norm": 0.453125, "learning_rate": 4.944751954602156e-06, "loss": 2.1879, "step": 8857 }, { "epoch": 0.4752145922746781, "grad_norm": 0.486328125, "learning_rate": 4.944733789901953e-06, "loss": 2.3992, "step": 8858 }, { "epoch": 0.47526824034334764, "grad_norm": 0.3984375, "learning_rate": 4.94471562224948e-06, "loss": 2.1054, "step": 8859 }, { "epoch": 0.47532188841201717, "grad_norm": 1.1328125, "learning_rate": 4.94469745164476e-06, "loss": 2.1622, "step": 8860 }, { "epoch": 0.4753755364806867, "grad_norm": 0.5390625, "learning_rate": 4.944679278087814e-06, "loss": 2.1699, "step": 8861 }, { "epoch": 0.47542918454935623, "grad_norm": 0.56640625, "learning_rate": 4.944661101578665e-06, "loss": 2.2377, "step": 8862 }, { "epoch": 0.47548283261802576, "grad_norm": 0.40625, "learning_rate": 4.944642922117332e-06, "loss": 2.1724, "step": 8863 }, { "epoch": 0.4755364806866953, "grad_norm": 0.427734375, "learning_rate": 4.9446247397038405e-06, "loss": 2.2562, "step": 8864 }, { "epoch": 0.4755901287553648, "grad_norm": 0.361328125, "learning_rate": 4.944606554338211e-06, "loss": 2.0826, "step": 8865 }, { "epoch": 0.47564377682403436, "grad_norm": 0.4375, "learning_rate": 4.944588366020466e-06, "loss": 2.4331, "step": 8866 }, { "epoch": 0.47569742489270384, "grad_norm": 0.470703125, "learning_rate": 4.944570174750625e-06, "loss": 2.5492, "step": 8867 }, { "epoch": 0.47575107296137337, "grad_norm": 0.498046875, "learning_rate": 4.944551980528714e-06, "loss": 2.6729, "step": 8868 }, { "epoch": 0.4758047210300429, "grad_norm": 0.390625, "learning_rate": 4.944533783354751e-06, "loss": 2.1953, "step": 8869 }, { "epoch": 0.47585836909871243, "grad_norm": 0.703125, "learning_rate": 4.944515583228761e-06, "loss": 2.1878, "step": 8870 }, { "epoch": 0.47591201716738196, "grad_norm": 0.419921875, "learning_rate": 4.944497380150764e-06, "loss": 2.1381, "step": 8871 }, { "epoch": 0.4759656652360515, "grad_norm": 0.51953125, "learning_rate": 4.944479174120783e-06, "loss": 2.3032, "step": 8872 }, { "epoch": 0.476019313304721, "grad_norm": 0.6796875, "learning_rate": 4.944460965138841e-06, "loss": 1.3505, "step": 8873 }, { "epoch": 0.47607296137339056, "grad_norm": 0.48046875, "learning_rate": 4.9444427532049574e-06, "loss": 2.2773, "step": 8874 }, { "epoch": 0.4761266094420601, "grad_norm": 0.412109375, "learning_rate": 4.944424538319156e-06, "loss": 2.3156, "step": 8875 }, { "epoch": 0.4761802575107296, "grad_norm": 0.474609375, "learning_rate": 4.944406320481458e-06, "loss": 2.3368, "step": 8876 }, { "epoch": 0.47623390557939915, "grad_norm": 0.41015625, "learning_rate": 4.944388099691887e-06, "loss": 2.2277, "step": 8877 }, { "epoch": 0.4762875536480687, "grad_norm": 0.41796875, "learning_rate": 4.944369875950463e-06, "loss": 2.2986, "step": 8878 }, { "epoch": 0.4763412017167382, "grad_norm": 0.40234375, "learning_rate": 4.944351649257208e-06, "loss": 2.0862, "step": 8879 }, { "epoch": 0.47639484978540775, "grad_norm": 0.4140625, "learning_rate": 4.944333419612146e-06, "loss": 2.3175, "step": 8880 }, { "epoch": 0.4764484978540773, "grad_norm": 0.455078125, "learning_rate": 4.944315187015297e-06, "loss": 2.4188, "step": 8881 }, { "epoch": 0.47650214592274676, "grad_norm": 0.388671875, "learning_rate": 4.944296951466685e-06, "loss": 2.2673, "step": 8882 }, { "epoch": 0.4765557939914163, "grad_norm": 0.46484375, "learning_rate": 4.944278712966329e-06, "loss": 2.3835, "step": 8883 }, { "epoch": 0.4766094420600858, "grad_norm": 0.46484375, "learning_rate": 4.944260471514255e-06, "loss": 2.3446, "step": 8884 }, { "epoch": 0.47666309012875535, "grad_norm": 0.34765625, "learning_rate": 4.944242227110482e-06, "loss": 2.4847, "step": 8885 }, { "epoch": 0.4767167381974249, "grad_norm": 0.416015625, "learning_rate": 4.9442239797550315e-06, "loss": 2.2292, "step": 8886 }, { "epoch": 0.4767703862660944, "grad_norm": 0.390625, "learning_rate": 4.944205729447929e-06, "loss": 2.4284, "step": 8887 }, { "epoch": 0.47682403433476395, "grad_norm": 0.431640625, "learning_rate": 4.944187476189193e-06, "loss": 2.1611, "step": 8888 }, { "epoch": 0.4768776824034335, "grad_norm": 0.44140625, "learning_rate": 4.9441692199788485e-06, "loss": 2.189, "step": 8889 }, { "epoch": 0.476931330472103, "grad_norm": 0.353515625, "learning_rate": 4.944150960816915e-06, "loss": 1.9432, "step": 8890 }, { "epoch": 0.47698497854077254, "grad_norm": 0.46875, "learning_rate": 4.944132698703416e-06, "loss": 2.0996, "step": 8891 }, { "epoch": 0.4770386266094421, "grad_norm": 0.337890625, "learning_rate": 4.944114433638373e-06, "loss": 1.8503, "step": 8892 }, { "epoch": 0.4770922746781116, "grad_norm": 0.35546875, "learning_rate": 4.944096165621809e-06, "loss": 1.7676, "step": 8893 }, { "epoch": 0.47714592274678114, "grad_norm": 0.50390625, "learning_rate": 4.944077894653744e-06, "loss": 2.3126, "step": 8894 }, { "epoch": 0.47719957081545067, "grad_norm": 0.451171875, "learning_rate": 4.944059620734202e-06, "loss": 2.0806, "step": 8895 }, { "epoch": 0.47725321888412015, "grad_norm": 0.423828125, "learning_rate": 4.944041343863205e-06, "loss": 2.4534, "step": 8896 }, { "epoch": 0.4773068669527897, "grad_norm": 0.4140625, "learning_rate": 4.944023064040774e-06, "loss": 2.4978, "step": 8897 }, { "epoch": 0.4773605150214592, "grad_norm": 0.396484375, "learning_rate": 4.944004781266931e-06, "loss": 2.3939, "step": 8898 }, { "epoch": 0.47741416309012874, "grad_norm": 0.392578125, "learning_rate": 4.943986495541699e-06, "loss": 2.2004, "step": 8899 }, { "epoch": 0.47746781115879827, "grad_norm": 0.58984375, "learning_rate": 4.943968206865101e-06, "loss": 2.2038, "step": 8900 }, { "epoch": 0.4775214592274678, "grad_norm": 0.53125, "learning_rate": 4.943949915237156e-06, "loss": 2.124, "step": 8901 }, { "epoch": 0.47757510729613734, "grad_norm": 0.44921875, "learning_rate": 4.943931620657889e-06, "loss": 2.1689, "step": 8902 }, { "epoch": 0.47762875536480687, "grad_norm": 0.34765625, "learning_rate": 4.9439133231273204e-06, "loss": 2.1004, "step": 8903 }, { "epoch": 0.4776824034334764, "grad_norm": 0.41796875, "learning_rate": 4.943895022645474e-06, "loss": 2.1806, "step": 8904 }, { "epoch": 0.47773605150214593, "grad_norm": 1.7109375, "learning_rate": 4.94387671921237e-06, "loss": 2.0749, "step": 8905 }, { "epoch": 0.47778969957081546, "grad_norm": 0.421875, "learning_rate": 4.943858412828031e-06, "loss": 2.3557, "step": 8906 }, { "epoch": 0.477843347639485, "grad_norm": 0.419921875, "learning_rate": 4.94384010349248e-06, "loss": 2.2587, "step": 8907 }, { "epoch": 0.4778969957081545, "grad_norm": 0.458984375, "learning_rate": 4.943821791205738e-06, "loss": 2.2171, "step": 8908 }, { "epoch": 0.47795064377682406, "grad_norm": 0.38671875, "learning_rate": 4.943803475967829e-06, "loss": 2.1786, "step": 8909 }, { "epoch": 0.4780042918454936, "grad_norm": 0.359375, "learning_rate": 4.9437851577787725e-06, "loss": 2.2001, "step": 8910 }, { "epoch": 0.47805793991416307, "grad_norm": 0.455078125, "learning_rate": 4.943766836638592e-06, "loss": 2.3122, "step": 8911 }, { "epoch": 0.4781115879828326, "grad_norm": 0.44140625, "learning_rate": 4.943748512547309e-06, "loss": 2.1212, "step": 8912 }, { "epoch": 0.47816523605150213, "grad_norm": 0.5546875, "learning_rate": 4.943730185504948e-06, "loss": 2.3143, "step": 8913 }, { "epoch": 0.47821888412017166, "grad_norm": 0.41796875, "learning_rate": 4.9437118555115275e-06, "loss": 2.1702, "step": 8914 }, { "epoch": 0.4782725321888412, "grad_norm": 0.5, "learning_rate": 4.943693522567072e-06, "loss": 2.0561, "step": 8915 }, { "epoch": 0.4783261802575107, "grad_norm": 0.384765625, "learning_rate": 4.943675186671604e-06, "loss": 2.3774, "step": 8916 }, { "epoch": 0.47837982832618026, "grad_norm": 0.373046875, "learning_rate": 4.9436568478251435e-06, "loss": 2.1099, "step": 8917 }, { "epoch": 0.4784334763948498, "grad_norm": 0.373046875, "learning_rate": 4.943638506027713e-06, "loss": 2.1765, "step": 8918 }, { "epoch": 0.4784871244635193, "grad_norm": 0.421875, "learning_rate": 4.943620161279337e-06, "loss": 2.3736, "step": 8919 }, { "epoch": 0.47854077253218885, "grad_norm": 0.50390625, "learning_rate": 4.9436018135800356e-06, "loss": 2.2105, "step": 8920 }, { "epoch": 0.4785944206008584, "grad_norm": 20.0, "learning_rate": 4.943583462929832e-06, "loss": 2.2725, "step": 8921 }, { "epoch": 0.4786480686695279, "grad_norm": 0.40234375, "learning_rate": 4.943565109328747e-06, "loss": 2.0461, "step": 8922 }, { "epoch": 0.47870171673819745, "grad_norm": 0.42578125, "learning_rate": 4.943546752776804e-06, "loss": 2.0595, "step": 8923 }, { "epoch": 0.478755364806867, "grad_norm": 0.44140625, "learning_rate": 4.9435283932740255e-06, "loss": 2.2805, "step": 8924 }, { "epoch": 0.47880901287553645, "grad_norm": 0.453125, "learning_rate": 4.943510030820432e-06, "loss": 2.3567, "step": 8925 }, { "epoch": 0.478862660944206, "grad_norm": 0.3828125, "learning_rate": 4.943491665416047e-06, "loss": 2.279, "step": 8926 }, { "epoch": 0.4789163090128755, "grad_norm": 0.439453125, "learning_rate": 4.9434732970608926e-06, "loss": 2.5623, "step": 8927 }, { "epoch": 0.47896995708154505, "grad_norm": 0.462890625, "learning_rate": 4.94345492575499e-06, "loss": 2.3498, "step": 8928 }, { "epoch": 0.4790236051502146, "grad_norm": 0.39453125, "learning_rate": 4.943436551498362e-06, "loss": 2.3476, "step": 8929 }, { "epoch": 0.4790772532188841, "grad_norm": 0.490234375, "learning_rate": 4.943418174291031e-06, "loss": 1.4392, "step": 8930 }, { "epoch": 0.47913090128755365, "grad_norm": 0.4140625, "learning_rate": 4.94339979413302e-06, "loss": 2.2981, "step": 8931 }, { "epoch": 0.4791845493562232, "grad_norm": 0.50390625, "learning_rate": 4.943381411024349e-06, "loss": 2.4451, "step": 8932 }, { "epoch": 0.4792381974248927, "grad_norm": 0.384765625, "learning_rate": 4.943363024965042e-06, "loss": 2.0318, "step": 8933 }, { "epoch": 0.47929184549356224, "grad_norm": 0.40234375, "learning_rate": 4.943344635955121e-06, "loss": 2.4546, "step": 8934 }, { "epoch": 0.4793454935622318, "grad_norm": 0.451171875, "learning_rate": 4.9433262439946074e-06, "loss": 2.3844, "step": 8935 }, { "epoch": 0.4793991416309013, "grad_norm": 0.33984375, "learning_rate": 4.943307849083524e-06, "loss": 2.1855, "step": 8936 }, { "epoch": 0.47945278969957084, "grad_norm": 0.486328125, "learning_rate": 4.9432894512218935e-06, "loss": 2.363, "step": 8937 }, { "epoch": 0.47950643776824037, "grad_norm": 0.447265625, "learning_rate": 4.9432710504097365e-06, "loss": 2.0452, "step": 8938 }, { "epoch": 0.47956008583690984, "grad_norm": 0.47265625, "learning_rate": 4.9432526466470766e-06, "loss": 1.3487, "step": 8939 }, { "epoch": 0.4796137339055794, "grad_norm": 0.439453125, "learning_rate": 4.943234239933936e-06, "loss": 2.3998, "step": 8940 }, { "epoch": 0.4796673819742489, "grad_norm": 0.7265625, "learning_rate": 4.943215830270336e-06, "loss": 2.3344, "step": 8941 }, { "epoch": 0.47972103004291844, "grad_norm": 0.400390625, "learning_rate": 4.943197417656299e-06, "loss": 2.2451, "step": 8942 }, { "epoch": 0.47977467811158797, "grad_norm": 0.390625, "learning_rate": 4.943179002091849e-06, "loss": 2.4598, "step": 8943 }, { "epoch": 0.4798283261802575, "grad_norm": 0.388671875, "learning_rate": 4.943160583577006e-06, "loss": 2.1559, "step": 8944 }, { "epoch": 0.47988197424892703, "grad_norm": 0.46484375, "learning_rate": 4.9431421621117935e-06, "loss": 2.2971, "step": 8945 }, { "epoch": 0.47993562231759657, "grad_norm": 0.48046875, "learning_rate": 4.9431237376962325e-06, "loss": 2.1624, "step": 8946 }, { "epoch": 0.4799892703862661, "grad_norm": 0.451171875, "learning_rate": 4.943105310330347e-06, "loss": 2.2466, "step": 8947 }, { "epoch": 0.48004291845493563, "grad_norm": 0.41015625, "learning_rate": 4.943086880014158e-06, "loss": 2.1228, "step": 8948 }, { "epoch": 0.48009656652360516, "grad_norm": 0.45703125, "learning_rate": 4.943068446747689e-06, "loss": 2.3006, "step": 8949 }, { "epoch": 0.4801502145922747, "grad_norm": 0.44140625, "learning_rate": 4.94305001053096e-06, "loss": 2.3132, "step": 8950 }, { "epoch": 0.4802038626609442, "grad_norm": 0.421875, "learning_rate": 4.943031571363996e-06, "loss": 2.1231, "step": 8951 }, { "epoch": 0.48025751072961376, "grad_norm": 0.40234375, "learning_rate": 4.9430131292468165e-06, "loss": 2.2159, "step": 8952 }, { "epoch": 0.4803111587982833, "grad_norm": 0.53515625, "learning_rate": 4.942994684179446e-06, "loss": 2.5587, "step": 8953 }, { "epoch": 0.48036480686695276, "grad_norm": 0.462890625, "learning_rate": 4.942976236161906e-06, "loss": 2.2238, "step": 8954 }, { "epoch": 0.4804184549356223, "grad_norm": 0.52734375, "learning_rate": 4.942957785194219e-06, "loss": 2.2873, "step": 8955 }, { "epoch": 0.48047210300429183, "grad_norm": 0.439453125, "learning_rate": 4.9429393312764065e-06, "loss": 2.455, "step": 8956 }, { "epoch": 0.48052575107296136, "grad_norm": 0.33984375, "learning_rate": 4.942920874408491e-06, "loss": 2.0783, "step": 8957 }, { "epoch": 0.4805793991416309, "grad_norm": 0.404296875, "learning_rate": 4.942902414590496e-06, "loss": 2.2488, "step": 8958 }, { "epoch": 0.4806330472103004, "grad_norm": 0.6171875, "learning_rate": 4.942883951822442e-06, "loss": 2.1468, "step": 8959 }, { "epoch": 0.48068669527896996, "grad_norm": 0.474609375, "learning_rate": 4.942865486104353e-06, "loss": 2.2575, "step": 8960 }, { "epoch": 0.4807403433476395, "grad_norm": 0.37890625, "learning_rate": 4.94284701743625e-06, "loss": 2.0617, "step": 8961 }, { "epoch": 0.480793991416309, "grad_norm": 0.44140625, "learning_rate": 4.942828545818156e-06, "loss": 2.3212, "step": 8962 }, { "epoch": 0.48084763948497855, "grad_norm": 0.46875, "learning_rate": 4.942810071250093e-06, "loss": 2.5464, "step": 8963 }, { "epoch": 0.4809012875536481, "grad_norm": 0.44921875, "learning_rate": 4.942791593732084e-06, "loss": 2.3122, "step": 8964 }, { "epoch": 0.4809549356223176, "grad_norm": 0.486328125, "learning_rate": 4.9427731132641495e-06, "loss": 2.3914, "step": 8965 }, { "epoch": 0.48100858369098715, "grad_norm": 0.5234375, "learning_rate": 4.942754629846314e-06, "loss": 1.3401, "step": 8966 }, { "epoch": 0.4810622317596567, "grad_norm": 0.41796875, "learning_rate": 4.942736143478598e-06, "loss": 2.4228, "step": 8967 }, { "epoch": 0.48111587982832615, "grad_norm": 0.45703125, "learning_rate": 4.942717654161025e-06, "loss": 2.3495, "step": 8968 }, { "epoch": 0.4811695278969957, "grad_norm": 0.5, "learning_rate": 4.942699161893618e-06, "loss": 2.3206, "step": 8969 }, { "epoch": 0.4812231759656652, "grad_norm": 0.369140625, "learning_rate": 4.942680666676397e-06, "loss": 2.3529, "step": 8970 }, { "epoch": 0.48127682403433475, "grad_norm": 0.40625, "learning_rate": 4.942662168509386e-06, "loss": 2.1432, "step": 8971 }, { "epoch": 0.4813304721030043, "grad_norm": 0.474609375, "learning_rate": 4.942643667392607e-06, "loss": 2.1974, "step": 8972 }, { "epoch": 0.4813841201716738, "grad_norm": 0.60546875, "learning_rate": 4.942625163326083e-06, "loss": 2.5468, "step": 8973 }, { "epoch": 0.48143776824034334, "grad_norm": 0.482421875, "learning_rate": 4.942606656309835e-06, "loss": 2.2405, "step": 8974 }, { "epoch": 0.4814914163090129, "grad_norm": 0.453125, "learning_rate": 4.942588146343886e-06, "loss": 2.3571, "step": 8975 }, { "epoch": 0.4815450643776824, "grad_norm": 0.51171875, "learning_rate": 4.942569633428259e-06, "loss": 2.343, "step": 8976 }, { "epoch": 0.48159871244635194, "grad_norm": 0.3828125, "learning_rate": 4.942551117562976e-06, "loss": 2.3361, "step": 8977 }, { "epoch": 0.48165236051502147, "grad_norm": 0.44140625, "learning_rate": 4.942532598748058e-06, "loss": 2.0347, "step": 8978 }, { "epoch": 0.481706008583691, "grad_norm": 0.703125, "learning_rate": 4.942514076983529e-06, "loss": 2.4619, "step": 8979 }, { "epoch": 0.48175965665236054, "grad_norm": 0.484375, "learning_rate": 4.942495552269412e-06, "loss": 2.3984, "step": 8980 }, { "epoch": 0.48181330472103007, "grad_norm": 0.412109375, "learning_rate": 4.942477024605726e-06, "loss": 2.1396, "step": 8981 }, { "epoch": 0.4818669527896996, "grad_norm": 0.400390625, "learning_rate": 4.942458493992497e-06, "loss": 2.2013, "step": 8982 }, { "epoch": 0.4819206008583691, "grad_norm": 0.412109375, "learning_rate": 4.942439960429746e-06, "loss": 2.4223, "step": 8983 }, { "epoch": 0.4819742489270386, "grad_norm": 0.40625, "learning_rate": 4.942421423917495e-06, "loss": 2.2706, "step": 8984 }, { "epoch": 0.48202789699570814, "grad_norm": 0.419921875, "learning_rate": 4.942402884455767e-06, "loss": 2.3509, "step": 8985 }, { "epoch": 0.48208154506437767, "grad_norm": 0.462890625, "learning_rate": 4.942384342044584e-06, "loss": 2.5542, "step": 8986 }, { "epoch": 0.4821351931330472, "grad_norm": 0.451171875, "learning_rate": 4.942365796683968e-06, "loss": 2.2918, "step": 8987 }, { "epoch": 0.48218884120171673, "grad_norm": 0.439453125, "learning_rate": 4.942347248373942e-06, "loss": 2.7007, "step": 8988 }, { "epoch": 0.48224248927038627, "grad_norm": 0.400390625, "learning_rate": 4.942328697114529e-06, "loss": 2.1694, "step": 8989 }, { "epoch": 0.4822961373390558, "grad_norm": 0.384765625, "learning_rate": 4.942310142905751e-06, "loss": 2.2759, "step": 8990 }, { "epoch": 0.48234978540772533, "grad_norm": 1.2265625, "learning_rate": 4.942291585747629e-06, "loss": 2.3074, "step": 8991 }, { "epoch": 0.48240343347639486, "grad_norm": 0.458984375, "learning_rate": 4.942273025640187e-06, "loss": 2.3264, "step": 8992 }, { "epoch": 0.4824570815450644, "grad_norm": 0.466796875, "learning_rate": 4.942254462583447e-06, "loss": 2.4458, "step": 8993 }, { "epoch": 0.4825107296137339, "grad_norm": 0.466796875, "learning_rate": 4.9422358965774305e-06, "loss": 2.4479, "step": 8994 }, { "epoch": 0.48256437768240346, "grad_norm": 0.423828125, "learning_rate": 4.9422173276221615e-06, "loss": 2.2286, "step": 8995 }, { "epoch": 0.482618025751073, "grad_norm": 0.4453125, "learning_rate": 4.942198755717663e-06, "loss": 1.8302, "step": 8996 }, { "epoch": 0.48267167381974246, "grad_norm": 0.380859375, "learning_rate": 4.9421801808639545e-06, "loss": 2.1317, "step": 8997 }, { "epoch": 0.482725321888412, "grad_norm": 0.419921875, "learning_rate": 4.9421616030610605e-06, "loss": 2.2209, "step": 8998 }, { "epoch": 0.4827789699570815, "grad_norm": 0.408203125, "learning_rate": 4.942143022309003e-06, "loss": 2.647, "step": 8999 }, { "epoch": 0.48283261802575106, "grad_norm": 0.421875, "learning_rate": 4.942124438607804e-06, "loss": 2.2264, "step": 9000 }, { "epoch": 0.4828862660944206, "grad_norm": 0.447265625, "learning_rate": 4.9421058519574876e-06, "loss": 1.9805, "step": 9001 }, { "epoch": 0.4829399141630901, "grad_norm": 0.51953125, "learning_rate": 4.942087262358074e-06, "loss": 1.4317, "step": 9002 }, { "epoch": 0.48299356223175965, "grad_norm": 0.490234375, "learning_rate": 4.942068669809587e-06, "loss": 1.2435, "step": 9003 }, { "epoch": 0.4830472103004292, "grad_norm": 0.400390625, "learning_rate": 4.942050074312048e-06, "loss": 2.3133, "step": 9004 }, { "epoch": 0.4831008583690987, "grad_norm": 0.33984375, "learning_rate": 4.942031475865481e-06, "loss": 2.0476, "step": 9005 }, { "epoch": 0.48315450643776825, "grad_norm": 0.48046875, "learning_rate": 4.942012874469907e-06, "loss": 1.8831, "step": 9006 }, { "epoch": 0.4832081545064378, "grad_norm": 0.48046875, "learning_rate": 4.94199427012535e-06, "loss": 2.5907, "step": 9007 }, { "epoch": 0.4832618025751073, "grad_norm": 0.419921875, "learning_rate": 4.941975662831831e-06, "loss": 2.3133, "step": 9008 }, { "epoch": 0.48331545064377684, "grad_norm": 0.373046875, "learning_rate": 4.941957052589373e-06, "loss": 2.1545, "step": 9009 }, { "epoch": 0.4833690987124464, "grad_norm": 0.3828125, "learning_rate": 4.941938439397998e-06, "loss": 2.1603, "step": 9010 }, { "epoch": 0.48342274678111585, "grad_norm": 0.486328125, "learning_rate": 4.941919823257731e-06, "loss": 1.5484, "step": 9011 }, { "epoch": 0.4834763948497854, "grad_norm": 0.443359375, "learning_rate": 4.941901204168591e-06, "loss": 1.4546, "step": 9012 }, { "epoch": 0.4835300429184549, "grad_norm": 0.478515625, "learning_rate": 4.941882582130601e-06, "loss": 2.2087, "step": 9013 }, { "epoch": 0.48358369098712445, "grad_norm": 0.40625, "learning_rate": 4.941863957143786e-06, "loss": 2.3684, "step": 9014 }, { "epoch": 0.483637339055794, "grad_norm": 0.447265625, "learning_rate": 4.941845329208166e-06, "loss": 2.2454, "step": 9015 }, { "epoch": 0.4836909871244635, "grad_norm": 0.47265625, "learning_rate": 4.941826698323765e-06, "loss": 2.4991, "step": 9016 }, { "epoch": 0.48374463519313304, "grad_norm": 0.384765625, "learning_rate": 4.941808064490605e-06, "loss": 2.1394, "step": 9017 }, { "epoch": 0.4837982832618026, "grad_norm": 0.435546875, "learning_rate": 4.941789427708708e-06, "loss": 2.3915, "step": 9018 }, { "epoch": 0.4838519313304721, "grad_norm": 0.404296875, "learning_rate": 4.941770787978098e-06, "loss": 2.2707, "step": 9019 }, { "epoch": 0.48390557939914164, "grad_norm": 0.494140625, "learning_rate": 4.9417521452987945e-06, "loss": 2.5184, "step": 9020 }, { "epoch": 0.48395922746781117, "grad_norm": 0.451171875, "learning_rate": 4.941733499670823e-06, "loss": 2.5929, "step": 9021 }, { "epoch": 0.4840128755364807, "grad_norm": 0.4609375, "learning_rate": 4.9417148510942056e-06, "loss": 2.268, "step": 9022 }, { "epoch": 0.48406652360515023, "grad_norm": 0.43359375, "learning_rate": 4.941696199568964e-06, "loss": 2.0659, "step": 9023 }, { "epoch": 0.48412017167381977, "grad_norm": 0.423828125, "learning_rate": 4.941677545095121e-06, "loss": 2.2459, "step": 9024 }, { "epoch": 0.4841738197424893, "grad_norm": 0.40625, "learning_rate": 4.9416588876726976e-06, "loss": 2.1965, "step": 9025 }, { "epoch": 0.4842274678111588, "grad_norm": 0.453125, "learning_rate": 4.941640227301719e-06, "loss": 2.4047, "step": 9026 }, { "epoch": 0.4842811158798283, "grad_norm": 0.59375, "learning_rate": 4.9416215639822065e-06, "loss": 2.4748, "step": 9027 }, { "epoch": 0.48433476394849784, "grad_norm": 0.396484375, "learning_rate": 4.941602897714182e-06, "loss": 2.129, "step": 9028 }, { "epoch": 0.48438841201716737, "grad_norm": 0.47265625, "learning_rate": 4.941584228497669e-06, "loss": 2.2787, "step": 9029 }, { "epoch": 0.4844420600858369, "grad_norm": 0.46875, "learning_rate": 4.94156555633269e-06, "loss": 2.2277, "step": 9030 }, { "epoch": 0.48449570815450643, "grad_norm": 0.427734375, "learning_rate": 4.9415468812192665e-06, "loss": 2.4473, "step": 9031 }, { "epoch": 0.48454935622317596, "grad_norm": 0.46875, "learning_rate": 4.941528203157423e-06, "loss": 2.5249, "step": 9032 }, { "epoch": 0.4846030042918455, "grad_norm": 0.4140625, "learning_rate": 4.94150952214718e-06, "loss": 2.3381, "step": 9033 }, { "epoch": 0.48465665236051503, "grad_norm": 0.423828125, "learning_rate": 4.9414908381885614e-06, "loss": 2.3178, "step": 9034 }, { "epoch": 0.48471030042918456, "grad_norm": 0.412109375, "learning_rate": 4.94147215128159e-06, "loss": 2.2764, "step": 9035 }, { "epoch": 0.4847639484978541, "grad_norm": 0.40625, "learning_rate": 4.941453461426287e-06, "loss": 2.1235, "step": 9036 }, { "epoch": 0.4848175965665236, "grad_norm": 0.3828125, "learning_rate": 4.941434768622675e-06, "loss": 2.4506, "step": 9037 }, { "epoch": 0.48487124463519315, "grad_norm": 0.443359375, "learning_rate": 4.9414160728707775e-06, "loss": 2.2647, "step": 9038 }, { "epoch": 0.4849248927038627, "grad_norm": 0.421875, "learning_rate": 4.941397374170618e-06, "loss": 2.0963, "step": 9039 }, { "epoch": 0.48497854077253216, "grad_norm": 0.4296875, "learning_rate": 4.9413786725222165e-06, "loss": 2.118, "step": 9040 }, { "epoch": 0.4850321888412017, "grad_norm": 0.42578125, "learning_rate": 4.941359967925598e-06, "loss": 2.2155, "step": 9041 }, { "epoch": 0.4850858369098712, "grad_norm": 0.46484375, "learning_rate": 4.9413412603807834e-06, "loss": 2.5365, "step": 9042 }, { "epoch": 0.48513948497854076, "grad_norm": 0.37890625, "learning_rate": 4.9413225498877955e-06, "loss": 2.3933, "step": 9043 }, { "epoch": 0.4851931330472103, "grad_norm": 0.37890625, "learning_rate": 4.941303836446658e-06, "loss": 2.1739, "step": 9044 }, { "epoch": 0.4852467811158798, "grad_norm": 0.423828125, "learning_rate": 4.941285120057392e-06, "loss": 2.0412, "step": 9045 }, { "epoch": 0.48530042918454935, "grad_norm": 0.412109375, "learning_rate": 4.941266400720022e-06, "loss": 2.3201, "step": 9046 }, { "epoch": 0.4853540772532189, "grad_norm": 0.38671875, "learning_rate": 4.941247678434569e-06, "loss": 2.0026, "step": 9047 }, { "epoch": 0.4854077253218884, "grad_norm": 0.51171875, "learning_rate": 4.9412289532010565e-06, "loss": 2.2587, "step": 9048 }, { "epoch": 0.48546137339055795, "grad_norm": 0.3984375, "learning_rate": 4.941210225019507e-06, "loss": 1.8683, "step": 9049 }, { "epoch": 0.4855150214592275, "grad_norm": 0.40625, "learning_rate": 4.941191493889942e-06, "loss": 2.0733, "step": 9050 }, { "epoch": 0.485568669527897, "grad_norm": 0.76953125, "learning_rate": 4.941172759812386e-06, "loss": 2.4356, "step": 9051 }, { "epoch": 0.48562231759656654, "grad_norm": 0.369140625, "learning_rate": 4.94115402278686e-06, "loss": 2.4685, "step": 9052 }, { "epoch": 0.4856759656652361, "grad_norm": 0.439453125, "learning_rate": 4.941135282813387e-06, "loss": 2.4232, "step": 9053 }, { "epoch": 0.48572961373390555, "grad_norm": 0.490234375, "learning_rate": 4.94111653989199e-06, "loss": 2.2338, "step": 9054 }, { "epoch": 0.4857832618025751, "grad_norm": 0.455078125, "learning_rate": 4.941097794022692e-06, "loss": 2.2418, "step": 9055 }, { "epoch": 0.4858369098712446, "grad_norm": 0.44140625, "learning_rate": 4.941079045205515e-06, "loss": 2.3614, "step": 9056 }, { "epoch": 0.48589055793991415, "grad_norm": 0.419921875, "learning_rate": 4.9410602934404816e-06, "loss": 2.2997, "step": 9057 }, { "epoch": 0.4859442060085837, "grad_norm": 0.8359375, "learning_rate": 4.941041538727615e-06, "loss": 2.393, "step": 9058 }, { "epoch": 0.4859978540772532, "grad_norm": 0.3984375, "learning_rate": 4.941022781066938e-06, "loss": 1.7827, "step": 9059 }, { "epoch": 0.48605150214592274, "grad_norm": 0.419921875, "learning_rate": 4.941004020458471e-06, "loss": 2.2135, "step": 9060 }, { "epoch": 0.4861051502145923, "grad_norm": 0.458984375, "learning_rate": 4.94098525690224e-06, "loss": 2.1834, "step": 9061 }, { "epoch": 0.4861587982832618, "grad_norm": 0.48828125, "learning_rate": 4.9409664903982656e-06, "loss": 2.2713, "step": 9062 }, { "epoch": 0.48621244635193134, "grad_norm": 0.458984375, "learning_rate": 4.94094772094657e-06, "loss": 2.325, "step": 9063 }, { "epoch": 0.48626609442060087, "grad_norm": 0.388671875, "learning_rate": 4.940928948547178e-06, "loss": 2.2112, "step": 9064 }, { "epoch": 0.4863197424892704, "grad_norm": 0.373046875, "learning_rate": 4.940910173200111e-06, "loss": 2.2419, "step": 9065 }, { "epoch": 0.48637339055793993, "grad_norm": 0.453125, "learning_rate": 4.940891394905391e-06, "loss": 1.5618, "step": 9066 }, { "epoch": 0.48642703862660946, "grad_norm": 0.421875, "learning_rate": 4.940872613663041e-06, "loss": 2.3716, "step": 9067 }, { "epoch": 0.486480686695279, "grad_norm": 0.37109375, "learning_rate": 4.940853829473085e-06, "loss": 2.4968, "step": 9068 }, { "epoch": 0.4865343347639485, "grad_norm": 0.5, "learning_rate": 4.940835042335545e-06, "loss": 2.4202, "step": 9069 }, { "epoch": 0.486587982832618, "grad_norm": 0.54296875, "learning_rate": 4.940816252250443e-06, "loss": 2.4224, "step": 9070 }, { "epoch": 0.48664163090128754, "grad_norm": 0.458984375, "learning_rate": 4.940797459217802e-06, "loss": 2.1958, "step": 9071 }, { "epoch": 0.48669527896995707, "grad_norm": 0.45703125, "learning_rate": 4.940778663237646e-06, "loss": 2.4109, "step": 9072 }, { "epoch": 0.4867489270386266, "grad_norm": 0.46875, "learning_rate": 4.940759864309995e-06, "loss": 2.2996, "step": 9073 }, { "epoch": 0.48680257510729613, "grad_norm": 0.515625, "learning_rate": 4.9407410624348736e-06, "loss": 1.2525, "step": 9074 }, { "epoch": 0.48685622317596566, "grad_norm": 0.498046875, "learning_rate": 4.940722257612305e-06, "loss": 2.3855, "step": 9075 }, { "epoch": 0.4869098712446352, "grad_norm": 0.357421875, "learning_rate": 4.94070344984231e-06, "loss": 2.0967, "step": 9076 }, { "epoch": 0.4869635193133047, "grad_norm": 0.4921875, "learning_rate": 4.940684639124913e-06, "loss": 2.3836, "step": 9077 }, { "epoch": 0.48701716738197426, "grad_norm": 0.5625, "learning_rate": 4.940665825460136e-06, "loss": 2.2682, "step": 9078 }, { "epoch": 0.4870708154506438, "grad_norm": 0.40234375, "learning_rate": 4.940647008848002e-06, "loss": 1.7082, "step": 9079 }, { "epoch": 0.4871244635193133, "grad_norm": 0.40625, "learning_rate": 4.940628189288533e-06, "loss": 2.3402, "step": 9080 }, { "epoch": 0.48717811158798285, "grad_norm": 0.40234375, "learning_rate": 4.940609366781752e-06, "loss": 2.3104, "step": 9081 }, { "epoch": 0.4872317596566524, "grad_norm": 0.357421875, "learning_rate": 4.940590541327682e-06, "loss": 2.2618, "step": 9082 }, { "epoch": 0.48728540772532186, "grad_norm": 0.42578125, "learning_rate": 4.940571712926346e-06, "loss": 2.2245, "step": 9083 }, { "epoch": 0.4873390557939914, "grad_norm": 0.40234375, "learning_rate": 4.940552881577767e-06, "loss": 2.2753, "step": 9084 }, { "epoch": 0.4873927038626609, "grad_norm": 0.40625, "learning_rate": 4.940534047281967e-06, "loss": 2.5116, "step": 9085 }, { "epoch": 0.48744635193133046, "grad_norm": 0.455078125, "learning_rate": 4.940515210038969e-06, "loss": 2.4417, "step": 9086 }, { "epoch": 0.4875, "grad_norm": 0.5546875, "learning_rate": 4.940496369848795e-06, "loss": 2.3329, "step": 9087 }, { "epoch": 0.4875536480686695, "grad_norm": 0.5390625, "learning_rate": 4.940477526711469e-06, "loss": 2.0926, "step": 9088 }, { "epoch": 0.48760729613733905, "grad_norm": 0.353515625, "learning_rate": 4.940458680627013e-06, "loss": 1.809, "step": 9089 }, { "epoch": 0.4876609442060086, "grad_norm": 0.5, "learning_rate": 4.94043983159545e-06, "loss": 2.2553, "step": 9090 }, { "epoch": 0.4877145922746781, "grad_norm": 0.4296875, "learning_rate": 4.940420979616804e-06, "loss": 2.5123, "step": 9091 }, { "epoch": 0.48776824034334765, "grad_norm": 0.486328125, "learning_rate": 4.940402124691094e-06, "loss": 1.5021, "step": 9092 }, { "epoch": 0.4878218884120172, "grad_norm": 0.4921875, "learning_rate": 4.940383266818347e-06, "loss": 2.3391, "step": 9093 }, { "epoch": 0.4878755364806867, "grad_norm": 0.34375, "learning_rate": 4.940364405998584e-06, "loss": 2.1334, "step": 9094 }, { "epoch": 0.48792918454935624, "grad_norm": 0.39453125, "learning_rate": 4.940345542231826e-06, "loss": 2.2452, "step": 9095 }, { "epoch": 0.4879828326180258, "grad_norm": 0.5078125, "learning_rate": 4.940326675518099e-06, "loss": 2.3892, "step": 9096 }, { "epoch": 0.4880364806866953, "grad_norm": 0.419921875, "learning_rate": 4.9403078058574245e-06, "loss": 2.2521, "step": 9097 }, { "epoch": 0.4880901287553648, "grad_norm": 0.36328125, "learning_rate": 4.940288933249825e-06, "loss": 2.0879, "step": 9098 }, { "epoch": 0.4881437768240343, "grad_norm": 0.3828125, "learning_rate": 4.9402700576953235e-06, "loss": 2.1122, "step": 9099 }, { "epoch": 0.48819742489270385, "grad_norm": 0.423828125, "learning_rate": 4.940251179193943e-06, "loss": 2.2136, "step": 9100 }, { "epoch": 0.4882510729613734, "grad_norm": 0.44140625, "learning_rate": 4.940232297745705e-06, "loss": 2.1626, "step": 9101 }, { "epoch": 0.4883047210300429, "grad_norm": 0.423828125, "learning_rate": 4.940213413350634e-06, "loss": 2.3719, "step": 9102 }, { "epoch": 0.48835836909871244, "grad_norm": 0.47265625, "learning_rate": 4.940194526008751e-06, "loss": 2.4061, "step": 9103 }, { "epoch": 0.488412017167382, "grad_norm": 0.8671875, "learning_rate": 4.940175635720082e-06, "loss": 2.4483, "step": 9104 }, { "epoch": 0.4884656652360515, "grad_norm": 0.431640625, "learning_rate": 4.9401567424846466e-06, "loss": 2.3247, "step": 9105 }, { "epoch": 0.48851931330472104, "grad_norm": 0.41796875, "learning_rate": 4.9401378463024685e-06, "loss": 1.9428, "step": 9106 }, { "epoch": 0.48857296137339057, "grad_norm": 0.427734375, "learning_rate": 4.940118947173571e-06, "loss": 2.4197, "step": 9107 }, { "epoch": 0.4886266094420601, "grad_norm": 0.3828125, "learning_rate": 4.9401000450979776e-06, "loss": 2.3353, "step": 9108 }, { "epoch": 0.48868025751072963, "grad_norm": 0.408203125, "learning_rate": 4.94008114007571e-06, "loss": 2.4205, "step": 9109 }, { "epoch": 0.48873390557939916, "grad_norm": 0.46875, "learning_rate": 4.94006223210679e-06, "loss": 1.6493, "step": 9110 }, { "epoch": 0.4887875536480687, "grad_norm": 0.40625, "learning_rate": 4.940043321191242e-06, "loss": 2.1291, "step": 9111 }, { "epoch": 0.48884120171673817, "grad_norm": 0.421875, "learning_rate": 4.9400244073290895e-06, "loss": 2.2951, "step": 9112 }, { "epoch": 0.4888948497854077, "grad_norm": 0.359375, "learning_rate": 4.940005490520354e-06, "loss": 2.1166, "step": 9113 }, { "epoch": 0.48894849785407724, "grad_norm": 0.39453125, "learning_rate": 4.939986570765059e-06, "loss": 2.2433, "step": 9114 }, { "epoch": 0.48900214592274677, "grad_norm": 0.390625, "learning_rate": 4.9399676480632264e-06, "loss": 2.2647, "step": 9115 }, { "epoch": 0.4890557939914163, "grad_norm": 0.55859375, "learning_rate": 4.93994872241488e-06, "loss": 2.1868, "step": 9116 }, { "epoch": 0.48910944206008583, "grad_norm": 0.435546875, "learning_rate": 4.939929793820043e-06, "loss": 2.1864, "step": 9117 }, { "epoch": 0.48916309012875536, "grad_norm": 0.390625, "learning_rate": 4.939910862278737e-06, "loss": 2.3645, "step": 9118 }, { "epoch": 0.4892167381974249, "grad_norm": 0.423828125, "learning_rate": 4.939891927790985e-06, "loss": 2.3276, "step": 9119 }, { "epoch": 0.4892703862660944, "grad_norm": 0.4375, "learning_rate": 4.939872990356811e-06, "loss": 2.258, "step": 9120 }, { "epoch": 0.48932403433476396, "grad_norm": 0.54296875, "learning_rate": 4.939854049976237e-06, "loss": 2.3251, "step": 9121 }, { "epoch": 0.4893776824034335, "grad_norm": 0.470703125, "learning_rate": 4.939835106649287e-06, "loss": 2.5022, "step": 9122 }, { "epoch": 0.489431330472103, "grad_norm": 0.458984375, "learning_rate": 4.939816160375982e-06, "loss": 2.3052, "step": 9123 }, { "epoch": 0.48948497854077255, "grad_norm": 0.392578125, "learning_rate": 4.939797211156346e-06, "loss": 1.9992, "step": 9124 }, { "epoch": 0.4895386266094421, "grad_norm": 0.3515625, "learning_rate": 4.939778258990402e-06, "loss": 2.0072, "step": 9125 }, { "epoch": 0.48959227467811156, "grad_norm": 0.443359375, "learning_rate": 4.939759303878172e-06, "loss": 2.2711, "step": 9126 }, { "epoch": 0.4896459227467811, "grad_norm": 0.4296875, "learning_rate": 4.939740345819681e-06, "loss": 2.2658, "step": 9127 }, { "epoch": 0.4896995708154506, "grad_norm": 0.451171875, "learning_rate": 4.93972138481495e-06, "loss": 2.0728, "step": 9128 }, { "epoch": 0.48975321888412016, "grad_norm": 0.4140625, "learning_rate": 4.939702420864001e-06, "loss": 2.1175, "step": 9129 }, { "epoch": 0.4898068669527897, "grad_norm": 0.39453125, "learning_rate": 4.939683453966859e-06, "loss": 2.3942, "step": 9130 }, { "epoch": 0.4898605150214592, "grad_norm": 0.423828125, "learning_rate": 4.939664484123546e-06, "loss": 2.4857, "step": 9131 }, { "epoch": 0.48991416309012875, "grad_norm": 0.375, "learning_rate": 4.939645511334086e-06, "loss": 2.258, "step": 9132 }, { "epoch": 0.4899678111587983, "grad_norm": 0.49609375, "learning_rate": 4.939626535598499e-06, "loss": 2.4087, "step": 9133 }, { "epoch": 0.4900214592274678, "grad_norm": 0.4296875, "learning_rate": 4.9396075569168115e-06, "loss": 2.2031, "step": 9134 }, { "epoch": 0.49007510729613735, "grad_norm": 0.43359375, "learning_rate": 4.939588575289044e-06, "loss": 2.361, "step": 9135 }, { "epoch": 0.4901287553648069, "grad_norm": 0.578125, "learning_rate": 4.93956959071522e-06, "loss": 2.4059, "step": 9136 }, { "epoch": 0.4901824034334764, "grad_norm": 0.478515625, "learning_rate": 4.939550603195364e-06, "loss": 2.4272, "step": 9137 }, { "epoch": 0.49023605150214594, "grad_norm": 0.390625, "learning_rate": 4.939531612729496e-06, "loss": 2.1165, "step": 9138 }, { "epoch": 0.4902896995708155, "grad_norm": 0.439453125, "learning_rate": 4.939512619317642e-06, "loss": 2.2218, "step": 9139 }, { "epoch": 0.490343347639485, "grad_norm": 0.4609375, "learning_rate": 4.939493622959821e-06, "loss": 2.1532, "step": 9140 }, { "epoch": 0.4903969957081545, "grad_norm": 0.4296875, "learning_rate": 4.93947462365606e-06, "loss": 2.4366, "step": 9141 }, { "epoch": 0.490450643776824, "grad_norm": 0.423828125, "learning_rate": 4.939455621406381e-06, "loss": 2.2713, "step": 9142 }, { "epoch": 0.49050429184549355, "grad_norm": 0.392578125, "learning_rate": 4.939436616210804e-06, "loss": 2.2055, "step": 9143 }, { "epoch": 0.4905579399141631, "grad_norm": 0.4453125, "learning_rate": 4.939417608069356e-06, "loss": 2.6301, "step": 9144 }, { "epoch": 0.4906115879828326, "grad_norm": 0.69140625, "learning_rate": 4.939398596982058e-06, "loss": 2.367, "step": 9145 }, { "epoch": 0.49066523605150214, "grad_norm": 0.478515625, "learning_rate": 4.939379582948933e-06, "loss": 2.2719, "step": 9146 }, { "epoch": 0.4907188841201717, "grad_norm": 0.37890625, "learning_rate": 4.939360565970004e-06, "loss": 2.1491, "step": 9147 }, { "epoch": 0.4907725321888412, "grad_norm": 0.392578125, "learning_rate": 4.939341546045293e-06, "loss": 2.2633, "step": 9148 }, { "epoch": 0.49082618025751074, "grad_norm": 0.474609375, "learning_rate": 4.939322523174825e-06, "loss": 2.3635, "step": 9149 }, { "epoch": 0.49087982832618027, "grad_norm": 0.546875, "learning_rate": 4.9393034973586225e-06, "loss": 1.761, "step": 9150 }, { "epoch": 0.4909334763948498, "grad_norm": 0.4375, "learning_rate": 4.939284468596707e-06, "loss": 2.1599, "step": 9151 }, { "epoch": 0.49098712446351933, "grad_norm": 0.412109375, "learning_rate": 4.939265436889103e-06, "loss": 2.0407, "step": 9152 }, { "epoch": 0.49104077253218886, "grad_norm": 0.423828125, "learning_rate": 4.939246402235833e-06, "loss": 2.2541, "step": 9153 }, { "epoch": 0.4910944206008584, "grad_norm": 0.373046875, "learning_rate": 4.9392273646369195e-06, "loss": 2.2707, "step": 9154 }, { "epoch": 0.49114806866952787, "grad_norm": 0.48828125, "learning_rate": 4.939208324092386e-06, "loss": 2.4417, "step": 9155 }, { "epoch": 0.4912017167381974, "grad_norm": 0.46875, "learning_rate": 4.939189280602256e-06, "loss": 2.3158, "step": 9156 }, { "epoch": 0.49125536480686693, "grad_norm": 0.4140625, "learning_rate": 4.939170234166551e-06, "loss": 1.9382, "step": 9157 }, { "epoch": 0.49130901287553647, "grad_norm": 0.41015625, "learning_rate": 4.9391511847852944e-06, "loss": 2.0629, "step": 9158 }, { "epoch": 0.491362660944206, "grad_norm": 0.59375, "learning_rate": 4.939132132458511e-06, "loss": 2.1881, "step": 9159 }, { "epoch": 0.49141630901287553, "grad_norm": 0.42578125, "learning_rate": 4.939113077186222e-06, "loss": 2.2356, "step": 9160 }, { "epoch": 0.49146995708154506, "grad_norm": 0.427734375, "learning_rate": 4.939094018968451e-06, "loss": 2.2533, "step": 9161 }, { "epoch": 0.4915236051502146, "grad_norm": 0.474609375, "learning_rate": 4.939074957805221e-06, "loss": 2.0966, "step": 9162 }, { "epoch": 0.4915772532188841, "grad_norm": 0.51171875, "learning_rate": 4.939055893696554e-06, "loss": 2.0717, "step": 9163 }, { "epoch": 0.49163090128755366, "grad_norm": 0.404296875, "learning_rate": 4.9390368266424745e-06, "loss": 2.3147, "step": 9164 }, { "epoch": 0.4916845493562232, "grad_norm": 0.474609375, "learning_rate": 4.939017756643006e-06, "loss": 2.324, "step": 9165 }, { "epoch": 0.4917381974248927, "grad_norm": 0.5390625, "learning_rate": 4.9389986836981695e-06, "loss": 2.3451, "step": 9166 }, { "epoch": 0.49179184549356225, "grad_norm": 0.4765625, "learning_rate": 4.93897960780799e-06, "loss": 2.2775, "step": 9167 }, { "epoch": 0.4918454935622318, "grad_norm": 0.455078125, "learning_rate": 4.938960528972488e-06, "loss": 2.1526, "step": 9168 }, { "epoch": 0.49189914163090126, "grad_norm": 0.466796875, "learning_rate": 4.938941447191689e-06, "loss": 2.103, "step": 9169 }, { "epoch": 0.4919527896995708, "grad_norm": 0.447265625, "learning_rate": 4.9389223624656156e-06, "loss": 2.3087, "step": 9170 }, { "epoch": 0.4920064377682403, "grad_norm": 0.482421875, "learning_rate": 4.938903274794289e-06, "loss": 2.4381, "step": 9171 }, { "epoch": 0.49206008583690986, "grad_norm": 0.41015625, "learning_rate": 4.938884184177735e-06, "loss": 2.2061, "step": 9172 }, { "epoch": 0.4921137339055794, "grad_norm": 0.396484375, "learning_rate": 4.9388650906159755e-06, "loss": 2.2494, "step": 9173 }, { "epoch": 0.4921673819742489, "grad_norm": 0.53515625, "learning_rate": 4.9388459941090324e-06, "loss": 2.364, "step": 9174 }, { "epoch": 0.49222103004291845, "grad_norm": 0.484375, "learning_rate": 4.93882689465693e-06, "loss": 1.7887, "step": 9175 }, { "epoch": 0.492274678111588, "grad_norm": 0.380859375, "learning_rate": 4.938807792259691e-06, "loss": 2.1669, "step": 9176 }, { "epoch": 0.4923283261802575, "grad_norm": 0.376953125, "learning_rate": 4.93878868691734e-06, "loss": 2.2465, "step": 9177 }, { "epoch": 0.49238197424892705, "grad_norm": 0.470703125, "learning_rate": 4.9387695786298965e-06, "loss": 2.1988, "step": 9178 }, { "epoch": 0.4924356223175966, "grad_norm": 0.44921875, "learning_rate": 4.938750467397387e-06, "loss": 2.4354, "step": 9179 }, { "epoch": 0.4924892703862661, "grad_norm": 0.322265625, "learning_rate": 4.9387313532198325e-06, "loss": 2.4568, "step": 9180 }, { "epoch": 0.49254291845493564, "grad_norm": 0.462890625, "learning_rate": 4.938712236097258e-06, "loss": 2.0834, "step": 9181 }, { "epoch": 0.4925965665236052, "grad_norm": 0.400390625, "learning_rate": 4.938693116029685e-06, "loss": 2.1001, "step": 9182 }, { "epoch": 0.4926502145922747, "grad_norm": 0.40234375, "learning_rate": 4.938673993017137e-06, "loss": 2.5445, "step": 9183 }, { "epoch": 0.4927038626609442, "grad_norm": 0.5703125, "learning_rate": 4.938654867059637e-06, "loss": 1.2219, "step": 9184 }, { "epoch": 0.4927575107296137, "grad_norm": 0.384765625, "learning_rate": 4.938635738157208e-06, "loss": 2.3691, "step": 9185 }, { "epoch": 0.49281115879828324, "grad_norm": 0.4453125, "learning_rate": 4.938616606309873e-06, "loss": 2.4446, "step": 9186 }, { "epoch": 0.4928648068669528, "grad_norm": 0.4921875, "learning_rate": 4.938597471517657e-06, "loss": 2.1995, "step": 9187 }, { "epoch": 0.4929184549356223, "grad_norm": 0.435546875, "learning_rate": 4.9385783337805795e-06, "loss": 2.0774, "step": 9188 }, { "epoch": 0.49297210300429184, "grad_norm": 0.4453125, "learning_rate": 4.938559193098668e-06, "loss": 2.5899, "step": 9189 }, { "epoch": 0.49302575107296137, "grad_norm": 0.486328125, "learning_rate": 4.9385400494719415e-06, "loss": 2.2868, "step": 9190 }, { "epoch": 0.4930793991416309, "grad_norm": 0.365234375, "learning_rate": 4.938520902900426e-06, "loss": 2.0357, "step": 9191 }, { "epoch": 0.49313304721030043, "grad_norm": 0.51953125, "learning_rate": 4.938501753384142e-06, "loss": 2.386, "step": 9192 }, { "epoch": 0.49318669527896997, "grad_norm": 0.439453125, "learning_rate": 4.938482600923116e-06, "loss": 2.4142, "step": 9193 }, { "epoch": 0.4932403433476395, "grad_norm": 0.4140625, "learning_rate": 4.938463445517367e-06, "loss": 2.2922, "step": 9194 }, { "epoch": 0.49329399141630903, "grad_norm": 0.462890625, "learning_rate": 4.9384442871669225e-06, "loss": 2.3264, "step": 9195 }, { "epoch": 0.49334763948497856, "grad_norm": 0.345703125, "learning_rate": 4.938425125871803e-06, "loss": 2.1848, "step": 9196 }, { "epoch": 0.4934012875536481, "grad_norm": 0.357421875, "learning_rate": 4.938405961632032e-06, "loss": 2.0879, "step": 9197 }, { "epoch": 0.49345493562231757, "grad_norm": 0.40234375, "learning_rate": 4.9383867944476325e-06, "loss": 2.2541, "step": 9198 }, { "epoch": 0.4935085836909871, "grad_norm": 0.392578125, "learning_rate": 4.938367624318629e-06, "loss": 2.2006, "step": 9199 }, { "epoch": 0.49356223175965663, "grad_norm": 0.416015625, "learning_rate": 4.938348451245043e-06, "loss": 2.1995, "step": 9200 }, { "epoch": 0.49361587982832617, "grad_norm": 0.462890625, "learning_rate": 4.9383292752268975e-06, "loss": 2.3616, "step": 9201 }, { "epoch": 0.4936695278969957, "grad_norm": 0.4375, "learning_rate": 4.9383100962642175e-06, "loss": 2.2661, "step": 9202 }, { "epoch": 0.49372317596566523, "grad_norm": 0.546875, "learning_rate": 4.938290914357025e-06, "loss": 2.341, "step": 9203 }, { "epoch": 0.49377682403433476, "grad_norm": 0.486328125, "learning_rate": 4.9382717295053425e-06, "loss": 1.5136, "step": 9204 }, { "epoch": 0.4938304721030043, "grad_norm": 0.392578125, "learning_rate": 4.938252541709194e-06, "loss": 2.2483, "step": 9205 }, { "epoch": 0.4938841201716738, "grad_norm": 0.45703125, "learning_rate": 4.938233350968603e-06, "loss": 1.9121, "step": 9206 }, { "epoch": 0.49393776824034336, "grad_norm": 0.3984375, "learning_rate": 4.9382141572835926e-06, "loss": 2.113, "step": 9207 }, { "epoch": 0.4939914163090129, "grad_norm": 0.390625, "learning_rate": 4.9381949606541855e-06, "loss": 2.2727, "step": 9208 }, { "epoch": 0.4940450643776824, "grad_norm": 0.455078125, "learning_rate": 4.938175761080405e-06, "loss": 2.1658, "step": 9209 }, { "epoch": 0.49409871244635195, "grad_norm": 0.40234375, "learning_rate": 4.938156558562274e-06, "loss": 2.2549, "step": 9210 }, { "epoch": 0.4941523605150215, "grad_norm": 0.53515625, "learning_rate": 4.938137353099816e-06, "loss": 2.356, "step": 9211 }, { "epoch": 0.494206008583691, "grad_norm": 0.3671875, "learning_rate": 4.938118144693054e-06, "loss": 2.2668, "step": 9212 }, { "epoch": 0.4942596566523605, "grad_norm": 0.5859375, "learning_rate": 4.938098933342011e-06, "loss": 2.3309, "step": 9213 }, { "epoch": 0.49431330472103, "grad_norm": 0.453125, "learning_rate": 4.938079719046712e-06, "loss": 2.2002, "step": 9214 }, { "epoch": 0.49436695278969955, "grad_norm": 0.451171875, "learning_rate": 4.938060501807178e-06, "loss": 2.3143, "step": 9215 }, { "epoch": 0.4944206008583691, "grad_norm": 0.404296875, "learning_rate": 4.938041281623433e-06, "loss": 2.1065, "step": 9216 }, { "epoch": 0.4944742489270386, "grad_norm": 0.41015625, "learning_rate": 4.938022058495501e-06, "loss": 1.9861, "step": 9217 }, { "epoch": 0.49452789699570815, "grad_norm": 0.51953125, "learning_rate": 4.938002832423404e-06, "loss": 2.384, "step": 9218 }, { "epoch": 0.4945815450643777, "grad_norm": 0.384765625, "learning_rate": 4.937983603407165e-06, "loss": 2.2399, "step": 9219 }, { "epoch": 0.4946351931330472, "grad_norm": 0.48046875, "learning_rate": 4.937964371446809e-06, "loss": 2.5272, "step": 9220 }, { "epoch": 0.49468884120171674, "grad_norm": 0.400390625, "learning_rate": 4.937945136542357e-06, "loss": 2.5807, "step": 9221 }, { "epoch": 0.4947424892703863, "grad_norm": 0.4453125, "learning_rate": 4.937925898693833e-06, "loss": 2.3669, "step": 9222 }, { "epoch": 0.4947961373390558, "grad_norm": 0.4453125, "learning_rate": 4.937906657901261e-06, "loss": 2.2322, "step": 9223 }, { "epoch": 0.49484978540772534, "grad_norm": 0.375, "learning_rate": 4.937887414164665e-06, "loss": 2.3571, "step": 9224 }, { "epoch": 0.49490343347639487, "grad_norm": 0.404296875, "learning_rate": 4.937868167484066e-06, "loss": 2.5793, "step": 9225 }, { "epoch": 0.4949570815450644, "grad_norm": 0.45703125, "learning_rate": 4.937848917859488e-06, "loss": 2.2253, "step": 9226 }, { "epoch": 0.4950107296137339, "grad_norm": 0.478515625, "learning_rate": 4.937829665290955e-06, "loss": 2.2446, "step": 9227 }, { "epoch": 0.4950643776824034, "grad_norm": 0.5234375, "learning_rate": 4.937810409778489e-06, "loss": 2.0009, "step": 9228 }, { "epoch": 0.49511802575107294, "grad_norm": 0.41015625, "learning_rate": 4.937791151322114e-06, "loss": 2.2268, "step": 9229 }, { "epoch": 0.4951716738197425, "grad_norm": 1.0703125, "learning_rate": 4.937771889921854e-06, "loss": 2.3363, "step": 9230 }, { "epoch": 0.495225321888412, "grad_norm": 0.3828125, "learning_rate": 4.937752625577732e-06, "loss": 1.9939, "step": 9231 }, { "epoch": 0.49527896995708154, "grad_norm": 0.427734375, "learning_rate": 4.93773335828977e-06, "loss": 2.5528, "step": 9232 }, { "epoch": 0.49533261802575107, "grad_norm": 0.419921875, "learning_rate": 4.937714088057992e-06, "loss": 1.5654, "step": 9233 }, { "epoch": 0.4953862660944206, "grad_norm": 0.412109375, "learning_rate": 4.937694814882422e-06, "loss": 2.3304, "step": 9234 }, { "epoch": 0.49543991416309013, "grad_norm": 0.408203125, "learning_rate": 4.937675538763082e-06, "loss": 2.3441, "step": 9235 }, { "epoch": 0.49549356223175967, "grad_norm": 0.4609375, "learning_rate": 4.937656259699996e-06, "loss": 2.1544, "step": 9236 }, { "epoch": 0.4955472103004292, "grad_norm": 0.384765625, "learning_rate": 4.937636977693188e-06, "loss": 1.9372, "step": 9237 }, { "epoch": 0.49560085836909873, "grad_norm": 0.4296875, "learning_rate": 4.937617692742679e-06, "loss": 2.1547, "step": 9238 }, { "epoch": 0.49565450643776826, "grad_norm": 0.384765625, "learning_rate": 4.937598404848495e-06, "loss": 2.2147, "step": 9239 }, { "epoch": 0.4957081545064378, "grad_norm": 0.43359375, "learning_rate": 4.937579114010657e-06, "loss": 2.1974, "step": 9240 }, { "epoch": 0.49576180257510727, "grad_norm": 0.361328125, "learning_rate": 4.93755982022919e-06, "loss": 2.1274, "step": 9241 }, { "epoch": 0.4958154506437768, "grad_norm": 1.1484375, "learning_rate": 4.937540523504116e-06, "loss": 1.4562, "step": 9242 }, { "epoch": 0.49586909871244633, "grad_norm": 0.51953125, "learning_rate": 4.937521223835459e-06, "loss": 2.446, "step": 9243 }, { "epoch": 0.49592274678111586, "grad_norm": 0.58203125, "learning_rate": 4.9375019212232426e-06, "loss": 1.9685, "step": 9244 }, { "epoch": 0.4959763948497854, "grad_norm": 0.37109375, "learning_rate": 4.93748261566749e-06, "loss": 2.018, "step": 9245 }, { "epoch": 0.4960300429184549, "grad_norm": 0.50390625, "learning_rate": 4.937463307168224e-06, "loss": 2.354, "step": 9246 }, { "epoch": 0.49608369098712446, "grad_norm": 0.4921875, "learning_rate": 4.9374439957254684e-06, "loss": 2.4677, "step": 9247 }, { "epoch": 0.496137339055794, "grad_norm": 0.39453125, "learning_rate": 4.937424681339246e-06, "loss": 2.2487, "step": 9248 }, { "epoch": 0.4961909871244635, "grad_norm": 0.443359375, "learning_rate": 4.93740536400958e-06, "loss": 2.2739, "step": 9249 }, { "epoch": 0.49624463519313305, "grad_norm": 0.4453125, "learning_rate": 4.937386043736495e-06, "loss": 2.0443, "step": 9250 }, { "epoch": 0.4962982832618026, "grad_norm": 0.427734375, "learning_rate": 4.937366720520013e-06, "loss": 2.4192, "step": 9251 }, { "epoch": 0.4963519313304721, "grad_norm": 0.462890625, "learning_rate": 4.937347394360158e-06, "loss": 2.1999, "step": 9252 }, { "epoch": 0.49640557939914165, "grad_norm": 0.4296875, "learning_rate": 4.937328065256953e-06, "loss": 2.2163, "step": 9253 }, { "epoch": 0.4964592274678112, "grad_norm": 0.74609375, "learning_rate": 4.937308733210421e-06, "loss": 2.2796, "step": 9254 }, { "epoch": 0.4965128755364807, "grad_norm": 0.486328125, "learning_rate": 4.937289398220587e-06, "loss": 2.2428, "step": 9255 }, { "epoch": 0.4965665236051502, "grad_norm": 0.44140625, "learning_rate": 4.937270060287472e-06, "loss": 2.2205, "step": 9256 }, { "epoch": 0.4966201716738197, "grad_norm": 0.408203125, "learning_rate": 4.937250719411101e-06, "loss": 2.301, "step": 9257 }, { "epoch": 0.49667381974248925, "grad_norm": 0.328125, "learning_rate": 4.937231375591497e-06, "loss": 2.3022, "step": 9258 }, { "epoch": 0.4967274678111588, "grad_norm": 0.46484375, "learning_rate": 4.937212028828684e-06, "loss": 2.0985, "step": 9259 }, { "epoch": 0.4967811158798283, "grad_norm": 0.37890625, "learning_rate": 4.9371926791226836e-06, "loss": 1.8344, "step": 9260 }, { "epoch": 0.49683476394849785, "grad_norm": 0.54296875, "learning_rate": 4.93717332647352e-06, "loss": 2.0253, "step": 9261 }, { "epoch": 0.4968884120171674, "grad_norm": 0.462890625, "learning_rate": 4.937153970881218e-06, "loss": 2.3085, "step": 9262 }, { "epoch": 0.4969420600858369, "grad_norm": 0.427734375, "learning_rate": 4.937134612345799e-06, "loss": 2.3733, "step": 9263 }, { "epoch": 0.49699570815450644, "grad_norm": 0.443359375, "learning_rate": 4.937115250867286e-06, "loss": 2.4118, "step": 9264 }, { "epoch": 0.497049356223176, "grad_norm": 0.421875, "learning_rate": 4.937095886445706e-06, "loss": 2.4625, "step": 9265 }, { "epoch": 0.4971030042918455, "grad_norm": 0.50390625, "learning_rate": 4.937076519081078e-06, "loss": 2.4592, "step": 9266 }, { "epoch": 0.49715665236051504, "grad_norm": 0.400390625, "learning_rate": 4.937057148773427e-06, "loss": 2.2343, "step": 9267 }, { "epoch": 0.49721030042918457, "grad_norm": 0.51171875, "learning_rate": 4.9370377755227775e-06, "loss": 2.4649, "step": 9268 }, { "epoch": 0.4972639484978541, "grad_norm": 0.44921875, "learning_rate": 4.937018399329152e-06, "loss": 2.2936, "step": 9269 }, { "epoch": 0.4973175965665236, "grad_norm": 0.3984375, "learning_rate": 4.9369990201925735e-06, "loss": 2.2366, "step": 9270 }, { "epoch": 0.4973712446351931, "grad_norm": 0.4375, "learning_rate": 4.9369796381130666e-06, "loss": 2.3523, "step": 9271 }, { "epoch": 0.49742489270386264, "grad_norm": 0.416015625, "learning_rate": 4.936960253090654e-06, "loss": 2.135, "step": 9272 }, { "epoch": 0.4974785407725322, "grad_norm": 0.4609375, "learning_rate": 4.936940865125358e-06, "loss": 2.2525, "step": 9273 }, { "epoch": 0.4975321888412017, "grad_norm": 0.439453125, "learning_rate": 4.936921474217204e-06, "loss": 2.219, "step": 9274 }, { "epoch": 0.49758583690987124, "grad_norm": 0.6796875, "learning_rate": 4.936902080366214e-06, "loss": 2.5226, "step": 9275 }, { "epoch": 0.49763948497854077, "grad_norm": 0.458984375, "learning_rate": 4.936882683572412e-06, "loss": 2.2646, "step": 9276 }, { "epoch": 0.4976931330472103, "grad_norm": 0.44140625, "learning_rate": 4.936863283835821e-06, "loss": 2.2649, "step": 9277 }, { "epoch": 0.49774678111587983, "grad_norm": 0.50390625, "learning_rate": 4.936843881156466e-06, "loss": 2.2198, "step": 9278 }, { "epoch": 0.49780042918454936, "grad_norm": 0.455078125, "learning_rate": 4.936824475534368e-06, "loss": 2.2354, "step": 9279 }, { "epoch": 0.4978540772532189, "grad_norm": 0.470703125, "learning_rate": 4.936805066969552e-06, "loss": 2.3302, "step": 9280 }, { "epoch": 0.49790772532188843, "grad_norm": 0.376953125, "learning_rate": 4.9367856554620404e-06, "loss": 2.2921, "step": 9281 }, { "epoch": 0.49796137339055796, "grad_norm": 0.53515625, "learning_rate": 4.936766241011859e-06, "loss": 2.2926, "step": 9282 }, { "epoch": 0.4980150214592275, "grad_norm": 0.431640625, "learning_rate": 4.936746823619028e-06, "loss": 2.2113, "step": 9283 }, { "epoch": 0.498068669527897, "grad_norm": 0.46875, "learning_rate": 4.936727403283573e-06, "loss": 2.6357, "step": 9284 }, { "epoch": 0.4981223175965665, "grad_norm": 0.447265625, "learning_rate": 4.936707980005517e-06, "loss": 2.1704, "step": 9285 }, { "epoch": 0.49817596566523603, "grad_norm": 0.58984375, "learning_rate": 4.936688553784883e-06, "loss": 2.2174, "step": 9286 }, { "epoch": 0.49822961373390556, "grad_norm": 0.412109375, "learning_rate": 4.936669124621695e-06, "loss": 2.4275, "step": 9287 }, { "epoch": 0.4982832618025751, "grad_norm": 0.37109375, "learning_rate": 4.936649692515977e-06, "loss": 2.1592, "step": 9288 }, { "epoch": 0.4983369098712446, "grad_norm": 0.3671875, "learning_rate": 4.9366302574677505e-06, "loss": 2.3577, "step": 9289 }, { "epoch": 0.49839055793991416, "grad_norm": 0.5, "learning_rate": 4.93661081947704e-06, "loss": 2.1878, "step": 9290 }, { "epoch": 0.4984442060085837, "grad_norm": 0.455078125, "learning_rate": 4.93659137854387e-06, "loss": 1.9766, "step": 9291 }, { "epoch": 0.4984978540772532, "grad_norm": 0.421875, "learning_rate": 4.936571934668263e-06, "loss": 2.2163, "step": 9292 }, { "epoch": 0.49855150214592275, "grad_norm": 0.484375, "learning_rate": 4.936552487850243e-06, "loss": 2.3792, "step": 9293 }, { "epoch": 0.4986051502145923, "grad_norm": 0.4140625, "learning_rate": 4.936533038089831e-06, "loss": 2.2137, "step": 9294 }, { "epoch": 0.4986587982832618, "grad_norm": 0.462890625, "learning_rate": 4.936513585387055e-06, "loss": 2.341, "step": 9295 }, { "epoch": 0.49871244635193135, "grad_norm": 0.419921875, "learning_rate": 4.936494129741934e-06, "loss": 2.3039, "step": 9296 }, { "epoch": 0.4987660944206009, "grad_norm": 0.458984375, "learning_rate": 4.936474671154496e-06, "loss": 2.2981, "step": 9297 }, { "epoch": 0.4988197424892704, "grad_norm": 0.4296875, "learning_rate": 4.936455209624759e-06, "loss": 2.3219, "step": 9298 }, { "epoch": 0.4988733905579399, "grad_norm": 0.5390625, "learning_rate": 4.9364357451527525e-06, "loss": 2.2345, "step": 9299 }, { "epoch": 0.4989270386266094, "grad_norm": 0.39453125, "learning_rate": 4.936416277738496e-06, "loss": 2.0531, "step": 9300 }, { "epoch": 0.49898068669527895, "grad_norm": 0.42578125, "learning_rate": 4.936396807382013e-06, "loss": 2.4474, "step": 9301 }, { "epoch": 0.4990343347639485, "grad_norm": 0.451171875, "learning_rate": 4.93637733408333e-06, "loss": 2.0975, "step": 9302 }, { "epoch": 0.499087982832618, "grad_norm": 0.482421875, "learning_rate": 4.936357857842467e-06, "loss": 2.351, "step": 9303 }, { "epoch": 0.49914163090128755, "grad_norm": 0.400390625, "learning_rate": 4.93633837865945e-06, "loss": 2.2893, "step": 9304 }, { "epoch": 0.4991952789699571, "grad_norm": 0.703125, "learning_rate": 4.936318896534301e-06, "loss": 2.2899, "step": 9305 }, { "epoch": 0.4992489270386266, "grad_norm": 0.390625, "learning_rate": 4.936299411467045e-06, "loss": 2.1725, "step": 9306 }, { "epoch": 0.49930257510729614, "grad_norm": 0.3203125, "learning_rate": 4.936279923457704e-06, "loss": 1.8849, "step": 9307 }, { "epoch": 0.4993562231759657, "grad_norm": 0.41015625, "learning_rate": 4.936260432506302e-06, "loss": 2.0758, "step": 9308 }, { "epoch": 0.4994098712446352, "grad_norm": 0.408203125, "learning_rate": 4.936240938612864e-06, "loss": 2.4659, "step": 9309 }, { "epoch": 0.49946351931330474, "grad_norm": 0.4921875, "learning_rate": 4.9362214417774115e-06, "loss": 2.5298, "step": 9310 }, { "epoch": 0.49951716738197427, "grad_norm": 0.3828125, "learning_rate": 4.9362019419999686e-06, "loss": 2.1557, "step": 9311 }, { "epoch": 0.4995708154506438, "grad_norm": 0.55078125, "learning_rate": 4.93618243928056e-06, "loss": 2.2923, "step": 9312 }, { "epoch": 0.4996244635193133, "grad_norm": 0.39453125, "learning_rate": 4.936162933619208e-06, "loss": 2.2036, "step": 9313 }, { "epoch": 0.4996781115879828, "grad_norm": 0.447265625, "learning_rate": 4.936143425015936e-06, "loss": 2.3133, "step": 9314 }, { "epoch": 0.49973175965665234, "grad_norm": 0.380859375, "learning_rate": 4.936123913470768e-06, "loss": 2.3036, "step": 9315 }, { "epoch": 0.4997854077253219, "grad_norm": 0.365234375, "learning_rate": 4.936104398983727e-06, "loss": 2.2649, "step": 9316 }, { "epoch": 0.4998390557939914, "grad_norm": 0.40625, "learning_rate": 4.936084881554839e-06, "loss": 2.3157, "step": 9317 }, { "epoch": 0.49989270386266094, "grad_norm": 0.373046875, "learning_rate": 4.936065361184125e-06, "loss": 2.2739, "step": 9318 }, { "epoch": 0.49994635193133047, "grad_norm": 0.373046875, "learning_rate": 4.93604583787161e-06, "loss": 1.8952, "step": 9319 }, { "epoch": 0.5, "grad_norm": 0.423828125, "learning_rate": 4.936026311617316e-06, "loss": 2.2677, "step": 9320 }, { "epoch": 0.5000536480686695, "grad_norm": 0.42578125, "learning_rate": 4.936006782421267e-06, "loss": 2.2763, "step": 9321 }, { "epoch": 0.5001072961373391, "grad_norm": 0.341796875, "learning_rate": 4.935987250283488e-06, "loss": 2.0095, "step": 9322 }, { "epoch": 0.5001609442060085, "grad_norm": 0.484375, "learning_rate": 4.935967715204002e-06, "loss": 2.0474, "step": 9323 }, { "epoch": 0.5002145922746781, "grad_norm": 0.345703125, "learning_rate": 4.935948177182831e-06, "loss": 2.183, "step": 9324 }, { "epoch": 0.5002682403433476, "grad_norm": 0.419921875, "learning_rate": 4.935928636220001e-06, "loss": 2.4814, "step": 9325 }, { "epoch": 0.5003218884120172, "grad_norm": 0.4140625, "learning_rate": 4.935909092315534e-06, "loss": 2.1243, "step": 9326 }, { "epoch": 0.5003755364806867, "grad_norm": 0.41015625, "learning_rate": 4.935889545469454e-06, "loss": 2.3474, "step": 9327 }, { "epoch": 0.5004291845493563, "grad_norm": 0.408203125, "learning_rate": 4.935869995681785e-06, "loss": 1.9943, "step": 9328 }, { "epoch": 0.5004828326180257, "grad_norm": 0.478515625, "learning_rate": 4.93585044295255e-06, "loss": 2.1402, "step": 9329 }, { "epoch": 0.5005364806866953, "grad_norm": 0.45703125, "learning_rate": 4.9358308872817724e-06, "loss": 2.3229, "step": 9330 }, { "epoch": 0.5005901287553648, "grad_norm": 0.44140625, "learning_rate": 4.935811328669476e-06, "loss": 2.2235, "step": 9331 }, { "epoch": 0.5006437768240344, "grad_norm": 0.396484375, "learning_rate": 4.935791767115687e-06, "loss": 2.1504, "step": 9332 }, { "epoch": 0.5006974248927039, "grad_norm": 0.40625, "learning_rate": 4.9357722026204245e-06, "loss": 2.3909, "step": 9333 }, { "epoch": 0.5007510729613734, "grad_norm": 0.41796875, "learning_rate": 4.935752635183715e-06, "loss": 2.1624, "step": 9334 }, { "epoch": 0.5008047210300429, "grad_norm": 0.5234375, "learning_rate": 4.935733064805581e-06, "loss": 2.1949, "step": 9335 }, { "epoch": 0.5008583690987124, "grad_norm": 0.3984375, "learning_rate": 4.935713491486048e-06, "loss": 2.3485, "step": 9336 }, { "epoch": 0.500912017167382, "grad_norm": 0.39453125, "learning_rate": 4.935693915225137e-06, "loss": 2.2943, "step": 9337 }, { "epoch": 0.5009656652360515, "grad_norm": 0.44140625, "learning_rate": 4.935674336022873e-06, "loss": 2.6318, "step": 9338 }, { "epoch": 0.501019313304721, "grad_norm": 0.439453125, "learning_rate": 4.93565475387928e-06, "loss": 2.3921, "step": 9339 }, { "epoch": 0.5010729613733905, "grad_norm": 0.3671875, "learning_rate": 4.935635168794382e-06, "loss": 2.3463, "step": 9340 }, { "epoch": 0.5011266094420601, "grad_norm": 0.44921875, "learning_rate": 4.9356155807682004e-06, "loss": 2.2285, "step": 9341 }, { "epoch": 0.5011802575107296, "grad_norm": 0.486328125, "learning_rate": 4.935595989800761e-06, "loss": 2.3796, "step": 9342 }, { "epoch": 0.5012339055793992, "grad_norm": 0.4609375, "learning_rate": 4.9355763958920865e-06, "loss": 2.2339, "step": 9343 }, { "epoch": 0.5012875536480687, "grad_norm": 0.396484375, "learning_rate": 4.9355567990422e-06, "loss": 2.0665, "step": 9344 }, { "epoch": 0.5013412017167382, "grad_norm": 0.47265625, "learning_rate": 4.935537199251128e-06, "loss": 2.3104, "step": 9345 }, { "epoch": 0.5013948497854077, "grad_norm": 0.427734375, "learning_rate": 4.935517596518891e-06, "loss": 2.434, "step": 9346 }, { "epoch": 0.5014484978540773, "grad_norm": 0.392578125, "learning_rate": 4.9354979908455135e-06, "loss": 2.1805, "step": 9347 }, { "epoch": 0.5015021459227468, "grad_norm": 0.458984375, "learning_rate": 4.935478382231019e-06, "loss": 2.0349, "step": 9348 }, { "epoch": 0.5015557939914163, "grad_norm": 0.5859375, "learning_rate": 4.935458770675433e-06, "loss": 2.267, "step": 9349 }, { "epoch": 0.5016094420600858, "grad_norm": 0.5390625, "learning_rate": 4.935439156178778e-06, "loss": 2.3824, "step": 9350 }, { "epoch": 0.5016630901287553, "grad_norm": 0.369140625, "learning_rate": 4.9354195387410765e-06, "loss": 2.2082, "step": 9351 }, { "epoch": 0.5017167381974249, "grad_norm": 0.41796875, "learning_rate": 4.935399918362353e-06, "loss": 2.1732, "step": 9352 }, { "epoch": 0.5017703862660944, "grad_norm": 0.65234375, "learning_rate": 4.9353802950426324e-06, "loss": 1.462, "step": 9353 }, { "epoch": 0.501824034334764, "grad_norm": 0.45703125, "learning_rate": 4.935360668781938e-06, "loss": 2.5024, "step": 9354 }, { "epoch": 0.5018776824034334, "grad_norm": 0.380859375, "learning_rate": 4.9353410395802916e-06, "loss": 2.2391, "step": 9355 }, { "epoch": 0.501931330472103, "grad_norm": 0.4921875, "learning_rate": 4.935321407437719e-06, "loss": 2.4687, "step": 9356 }, { "epoch": 0.5019849785407725, "grad_norm": 0.39453125, "learning_rate": 4.935301772354242e-06, "loss": 2.4611, "step": 9357 }, { "epoch": 0.5020386266094421, "grad_norm": 0.43359375, "learning_rate": 4.935282134329886e-06, "loss": 2.0844, "step": 9358 }, { "epoch": 0.5020922746781116, "grad_norm": 0.44921875, "learning_rate": 4.935262493364675e-06, "loss": 2.2027, "step": 9359 }, { "epoch": 0.5021459227467812, "grad_norm": 0.4296875, "learning_rate": 4.935242849458632e-06, "loss": 2.2212, "step": 9360 }, { "epoch": 0.5021995708154506, "grad_norm": 0.4921875, "learning_rate": 4.93522320261178e-06, "loss": 2.6356, "step": 9361 }, { "epoch": 0.5022532188841202, "grad_norm": 0.408203125, "learning_rate": 4.935203552824143e-06, "loss": 2.2986, "step": 9362 }, { "epoch": 0.5023068669527897, "grad_norm": 0.6953125, "learning_rate": 4.935183900095745e-06, "loss": 2.1865, "step": 9363 }, { "epoch": 0.5023605150214592, "grad_norm": 0.439453125, "learning_rate": 4.93516424442661e-06, "loss": 2.2915, "step": 9364 }, { "epoch": 0.5024141630901288, "grad_norm": 0.46875, "learning_rate": 4.9351445858167615e-06, "loss": 2.4705, "step": 9365 }, { "epoch": 0.5024678111587982, "grad_norm": 0.373046875, "learning_rate": 4.935124924266224e-06, "loss": 2.1011, "step": 9366 }, { "epoch": 0.5025214592274678, "grad_norm": 0.462890625, "learning_rate": 4.935105259775019e-06, "loss": 2.4613, "step": 9367 }, { "epoch": 0.5025751072961373, "grad_norm": 1.0625, "learning_rate": 4.9350855923431735e-06, "loss": 2.3828, "step": 9368 }, { "epoch": 0.5026287553648069, "grad_norm": 0.423828125, "learning_rate": 4.935065921970709e-06, "loss": 2.2045, "step": 9369 }, { "epoch": 0.5026824034334764, "grad_norm": 0.431640625, "learning_rate": 4.935046248657649e-06, "loss": 2.31, "step": 9370 }, { "epoch": 0.502736051502146, "grad_norm": 0.4140625, "learning_rate": 4.935026572404018e-06, "loss": 2.1086, "step": 9371 }, { "epoch": 0.5027896995708154, "grad_norm": 0.45703125, "learning_rate": 4.93500689320984e-06, "loss": 2.1684, "step": 9372 }, { "epoch": 0.502843347639485, "grad_norm": 0.4296875, "learning_rate": 4.93498721107514e-06, "loss": 2.2968, "step": 9373 }, { "epoch": 0.5028969957081545, "grad_norm": 0.47265625, "learning_rate": 4.9349675259999385e-06, "loss": 2.2677, "step": 9374 }, { "epoch": 0.5029506437768241, "grad_norm": 0.392578125, "learning_rate": 4.934947837984262e-06, "loss": 2.2799, "step": 9375 }, { "epoch": 0.5030042918454936, "grad_norm": 0.431640625, "learning_rate": 4.934928147028133e-06, "loss": 2.3797, "step": 9376 }, { "epoch": 0.5030579399141631, "grad_norm": 0.4609375, "learning_rate": 4.9349084531315764e-06, "loss": 2.3654, "step": 9377 }, { "epoch": 0.5031115879828326, "grad_norm": 0.43359375, "learning_rate": 4.934888756294614e-06, "loss": 2.4059, "step": 9378 }, { "epoch": 0.5031652360515021, "grad_norm": 0.40234375, "learning_rate": 4.9348690565172715e-06, "loss": 2.2593, "step": 9379 }, { "epoch": 0.5032188841201717, "grad_norm": 0.51171875, "learning_rate": 4.934849353799572e-06, "loss": 2.2849, "step": 9380 }, { "epoch": 0.5032725321888412, "grad_norm": 0.439453125, "learning_rate": 4.934829648141539e-06, "loss": 2.3301, "step": 9381 }, { "epoch": 0.5033261802575107, "grad_norm": 0.447265625, "learning_rate": 4.934809939543197e-06, "loss": 2.3178, "step": 9382 }, { "epoch": 0.5033798283261802, "grad_norm": 0.5, "learning_rate": 4.934790228004569e-06, "loss": 2.1313, "step": 9383 }, { "epoch": 0.5034334763948498, "grad_norm": 0.4140625, "learning_rate": 4.9347705135256795e-06, "loss": 2.4243, "step": 9384 }, { "epoch": 0.5034871244635193, "grad_norm": 0.4296875, "learning_rate": 4.934750796106552e-06, "loss": 2.2823, "step": 9385 }, { "epoch": 0.5035407725321889, "grad_norm": 0.408203125, "learning_rate": 4.93473107574721e-06, "loss": 2.2749, "step": 9386 }, { "epoch": 0.5035944206008584, "grad_norm": 0.4375, "learning_rate": 4.934711352447678e-06, "loss": 2.5147, "step": 9387 }, { "epoch": 0.5036480686695279, "grad_norm": 0.40234375, "learning_rate": 4.934691626207979e-06, "loss": 1.6196, "step": 9388 }, { "epoch": 0.5037017167381974, "grad_norm": 0.3828125, "learning_rate": 4.934671897028138e-06, "loss": 2.3195, "step": 9389 }, { "epoch": 0.503755364806867, "grad_norm": 0.470703125, "learning_rate": 4.934652164908178e-06, "loss": 2.2973, "step": 9390 }, { "epoch": 0.5038090128755365, "grad_norm": 0.404296875, "learning_rate": 4.934632429848122e-06, "loss": 2.1725, "step": 9391 }, { "epoch": 0.503862660944206, "grad_norm": 0.443359375, "learning_rate": 4.934612691847995e-06, "loss": 2.4729, "step": 9392 }, { "epoch": 0.5039163090128755, "grad_norm": 0.388671875, "learning_rate": 4.934592950907821e-06, "loss": 2.2073, "step": 9393 }, { "epoch": 0.503969957081545, "grad_norm": 0.48046875, "learning_rate": 4.934573207027624e-06, "loss": 2.3639, "step": 9394 }, { "epoch": 0.5040236051502146, "grad_norm": 0.4375, "learning_rate": 4.934553460207426e-06, "loss": 2.2812, "step": 9395 }, { "epoch": 0.5040772532188841, "grad_norm": 0.4140625, "learning_rate": 4.934533710447253e-06, "loss": 2.2681, "step": 9396 }, { "epoch": 0.5041309012875537, "grad_norm": 0.408203125, "learning_rate": 4.934513957747128e-06, "loss": 2.1056, "step": 9397 }, { "epoch": 0.5041845493562231, "grad_norm": 0.443359375, "learning_rate": 4.934494202107074e-06, "loss": 2.3168, "step": 9398 }, { "epoch": 0.5042381974248927, "grad_norm": 0.4296875, "learning_rate": 4.934474443527117e-06, "loss": 2.3414, "step": 9399 }, { "epoch": 0.5042918454935622, "grad_norm": 0.515625, "learning_rate": 4.934454682007278e-06, "loss": 2.3189, "step": 9400 }, { "epoch": 0.5043454935622318, "grad_norm": 0.359375, "learning_rate": 4.934434917547584e-06, "loss": 2.1705, "step": 9401 }, { "epoch": 0.5043991416309013, "grad_norm": 0.416015625, "learning_rate": 4.934415150148056e-06, "loss": 2.2432, "step": 9402 }, { "epoch": 0.5044527896995709, "grad_norm": 0.53125, "learning_rate": 4.93439537980872e-06, "loss": 2.6028, "step": 9403 }, { "epoch": 0.5045064377682403, "grad_norm": 0.4375, "learning_rate": 4.9343756065295976e-06, "loss": 2.4398, "step": 9404 }, { "epoch": 0.5045600858369099, "grad_norm": 0.5, "learning_rate": 4.934355830310715e-06, "loss": 2.3987, "step": 9405 }, { "epoch": 0.5046137339055794, "grad_norm": 0.451171875, "learning_rate": 4.934336051152096e-06, "loss": 1.9314, "step": 9406 }, { "epoch": 0.5046673819742489, "grad_norm": 0.353515625, "learning_rate": 4.934316269053763e-06, "loss": 2.3499, "step": 9407 }, { "epoch": 0.5047210300429185, "grad_norm": 0.515625, "learning_rate": 4.93429648401574e-06, "loss": 2.291, "step": 9408 }, { "epoch": 0.5047746781115879, "grad_norm": 0.46875, "learning_rate": 4.934276696038052e-06, "loss": 2.5099, "step": 9409 }, { "epoch": 0.5048283261802575, "grad_norm": 0.4140625, "learning_rate": 4.934256905120723e-06, "loss": 2.4068, "step": 9410 }, { "epoch": 0.504881974248927, "grad_norm": 0.5, "learning_rate": 4.934237111263774e-06, "loss": 2.3279, "step": 9411 }, { "epoch": 0.5049356223175966, "grad_norm": 0.427734375, "learning_rate": 4.934217314467234e-06, "loss": 2.2949, "step": 9412 }, { "epoch": 0.5049892703862661, "grad_norm": 0.40234375, "learning_rate": 4.934197514731122e-06, "loss": 2.1172, "step": 9413 }, { "epoch": 0.5050429184549357, "grad_norm": 0.455078125, "learning_rate": 4.934177712055464e-06, "loss": 2.4203, "step": 9414 }, { "epoch": 0.5050965665236051, "grad_norm": 0.341796875, "learning_rate": 4.934157906440285e-06, "loss": 2.1862, "step": 9415 }, { "epoch": 0.5051502145922747, "grad_norm": 0.376953125, "learning_rate": 4.934138097885607e-06, "loss": 2.1097, "step": 9416 }, { "epoch": 0.5052038626609442, "grad_norm": 0.4140625, "learning_rate": 4.934118286391455e-06, "loss": 2.2313, "step": 9417 }, { "epoch": 0.5052575107296138, "grad_norm": 1.296875, "learning_rate": 4.934098471957854e-06, "loss": 2.5502, "step": 9418 }, { "epoch": 0.5053111587982833, "grad_norm": 1.1953125, "learning_rate": 4.934078654584824e-06, "loss": 2.1627, "step": 9419 }, { "epoch": 0.5053648068669528, "grad_norm": 0.38671875, "learning_rate": 4.9340588342723925e-06, "loss": 2.1177, "step": 9420 }, { "epoch": 0.5054184549356223, "grad_norm": 0.45703125, "learning_rate": 4.934039011020583e-06, "loss": 2.363, "step": 9421 }, { "epoch": 0.5054721030042918, "grad_norm": 0.447265625, "learning_rate": 4.934019184829419e-06, "loss": 2.3207, "step": 9422 }, { "epoch": 0.5055257510729614, "grad_norm": 0.462890625, "learning_rate": 4.933999355698923e-06, "loss": 2.4039, "step": 9423 }, { "epoch": 0.5055793991416309, "grad_norm": 0.48828125, "learning_rate": 4.933979523629121e-06, "loss": 2.2582, "step": 9424 }, { "epoch": 0.5056330472103004, "grad_norm": 0.404296875, "learning_rate": 4.933959688620037e-06, "loss": 2.1862, "step": 9425 }, { "epoch": 0.5056866952789699, "grad_norm": 0.427734375, "learning_rate": 4.933939850671693e-06, "loss": 2.3937, "step": 9426 }, { "epoch": 0.5057403433476395, "grad_norm": 0.408203125, "learning_rate": 4.933920009784115e-06, "loss": 2.1158, "step": 9427 }, { "epoch": 0.505793991416309, "grad_norm": 0.443359375, "learning_rate": 4.933900165957325e-06, "loss": 2.1401, "step": 9428 }, { "epoch": 0.5058476394849786, "grad_norm": 0.42578125, "learning_rate": 4.933880319191349e-06, "loss": 2.1899, "step": 9429 }, { "epoch": 0.505901287553648, "grad_norm": 0.64453125, "learning_rate": 4.933860469486209e-06, "loss": 2.5466, "step": 9430 }, { "epoch": 0.5059549356223176, "grad_norm": 0.52734375, "learning_rate": 4.933840616841931e-06, "loss": 2.2126, "step": 9431 }, { "epoch": 0.5060085836909871, "grad_norm": 0.376953125, "learning_rate": 4.933820761258538e-06, "loss": 2.3262, "step": 9432 }, { "epoch": 0.5060622317596567, "grad_norm": 0.470703125, "learning_rate": 4.933800902736053e-06, "loss": 2.0528, "step": 9433 }, { "epoch": 0.5061158798283262, "grad_norm": 0.4375, "learning_rate": 4.933781041274502e-06, "loss": 2.2817, "step": 9434 }, { "epoch": 0.5061695278969958, "grad_norm": 0.453125, "learning_rate": 4.933761176873907e-06, "loss": 2.3064, "step": 9435 }, { "epoch": 0.5062231759656652, "grad_norm": 0.3984375, "learning_rate": 4.933741309534294e-06, "loss": 2.3558, "step": 9436 }, { "epoch": 0.5062768240343347, "grad_norm": 0.5, "learning_rate": 4.933721439255684e-06, "loss": 2.4515, "step": 9437 }, { "epoch": 0.5063304721030043, "grad_norm": 0.43359375, "learning_rate": 4.933701566038104e-06, "loss": 2.1736, "step": 9438 }, { "epoch": 0.5063841201716738, "grad_norm": 0.431640625, "learning_rate": 4.933681689881577e-06, "loss": 2.3944, "step": 9439 }, { "epoch": 0.5064377682403434, "grad_norm": 0.71875, "learning_rate": 4.933661810786127e-06, "loss": 2.0828, "step": 9440 }, { "epoch": 0.5064914163090128, "grad_norm": 1.109375, "learning_rate": 4.9336419287517774e-06, "loss": 2.1455, "step": 9441 }, { "epoch": 0.5065450643776824, "grad_norm": 0.47265625, "learning_rate": 4.933622043778553e-06, "loss": 2.3745, "step": 9442 }, { "epoch": 0.5065987124463519, "grad_norm": 0.59375, "learning_rate": 4.933602155866477e-06, "loss": 2.3444, "step": 9443 }, { "epoch": 0.5066523605150215, "grad_norm": 0.39453125, "learning_rate": 4.933582265015574e-06, "loss": 2.2887, "step": 9444 }, { "epoch": 0.506706008583691, "grad_norm": 0.455078125, "learning_rate": 4.933562371225869e-06, "loss": 2.2472, "step": 9445 }, { "epoch": 0.5067596566523606, "grad_norm": 0.53515625, "learning_rate": 4.933542474497384e-06, "loss": 2.5082, "step": 9446 }, { "epoch": 0.50681330472103, "grad_norm": 0.375, "learning_rate": 4.933522574830144e-06, "loss": 2.353, "step": 9447 }, { "epoch": 0.5068669527896996, "grad_norm": 0.40625, "learning_rate": 4.933502672224173e-06, "loss": 2.0658, "step": 9448 }, { "epoch": 0.5069206008583691, "grad_norm": 0.443359375, "learning_rate": 4.933482766679495e-06, "loss": 2.5004, "step": 9449 }, { "epoch": 0.5069742489270386, "grad_norm": 0.421875, "learning_rate": 4.933462858196134e-06, "loss": 2.2097, "step": 9450 }, { "epoch": 0.5070278969957082, "grad_norm": 1.109375, "learning_rate": 4.933442946774115e-06, "loss": 2.4353, "step": 9451 }, { "epoch": 0.5070815450643776, "grad_norm": 0.3828125, "learning_rate": 4.93342303241346e-06, "loss": 1.9674, "step": 9452 }, { "epoch": 0.5071351931330472, "grad_norm": 0.46484375, "learning_rate": 4.933403115114196e-06, "loss": 2.1256, "step": 9453 }, { "epoch": 0.5071888412017167, "grad_norm": 0.455078125, "learning_rate": 4.9333831948763436e-06, "loss": 2.3472, "step": 9454 }, { "epoch": 0.5072424892703863, "grad_norm": 0.58203125, "learning_rate": 4.933363271699929e-06, "loss": 2.2991, "step": 9455 }, { "epoch": 0.5072961373390558, "grad_norm": 0.447265625, "learning_rate": 4.933343345584975e-06, "loss": 2.4147, "step": 9456 }, { "epoch": 0.5073497854077254, "grad_norm": 1.28125, "learning_rate": 4.933323416531508e-06, "loss": 2.2726, "step": 9457 }, { "epoch": 0.5074034334763948, "grad_norm": 0.404296875, "learning_rate": 4.93330348453955e-06, "loss": 2.1894, "step": 9458 }, { "epoch": 0.5074570815450644, "grad_norm": 0.447265625, "learning_rate": 4.933283549609125e-06, "loss": 1.3205, "step": 9459 }, { "epoch": 0.5075107296137339, "grad_norm": 0.455078125, "learning_rate": 4.933263611740259e-06, "loss": 2.19, "step": 9460 }, { "epoch": 0.5075643776824035, "grad_norm": 1.1171875, "learning_rate": 4.933243670932974e-06, "loss": 2.2489, "step": 9461 }, { "epoch": 0.507618025751073, "grad_norm": 0.490234375, "learning_rate": 4.933223727187294e-06, "loss": 1.692, "step": 9462 }, { "epoch": 0.5076716738197425, "grad_norm": 0.4453125, "learning_rate": 4.933203780503244e-06, "loss": 2.2031, "step": 9463 }, { "epoch": 0.507725321888412, "grad_norm": 0.396484375, "learning_rate": 4.933183830880849e-06, "loss": 2.4425, "step": 9464 }, { "epoch": 0.5077789699570815, "grad_norm": 0.369140625, "learning_rate": 4.933163878320132e-06, "loss": 2.3296, "step": 9465 }, { "epoch": 0.5078326180257511, "grad_norm": 0.388671875, "learning_rate": 4.933143922821116e-06, "loss": 2.1272, "step": 9466 }, { "epoch": 0.5078862660944206, "grad_norm": 0.431640625, "learning_rate": 4.933123964383827e-06, "loss": 2.4244, "step": 9467 }, { "epoch": 0.5079399141630901, "grad_norm": 0.47265625, "learning_rate": 4.933104003008289e-06, "loss": 2.2978, "step": 9468 }, { "epoch": 0.5079935622317596, "grad_norm": 0.396484375, "learning_rate": 4.933084038694525e-06, "loss": 2.3053, "step": 9469 }, { "epoch": 0.5080472103004292, "grad_norm": 0.427734375, "learning_rate": 4.9330640714425595e-06, "loss": 2.0927, "step": 9470 }, { "epoch": 0.5081008583690987, "grad_norm": 0.42578125, "learning_rate": 4.9330441012524165e-06, "loss": 2.1458, "step": 9471 }, { "epoch": 0.5081545064377683, "grad_norm": 1.265625, "learning_rate": 4.933024128124121e-06, "loss": 2.2109, "step": 9472 }, { "epoch": 0.5082081545064377, "grad_norm": 0.419921875, "learning_rate": 4.933004152057696e-06, "loss": 2.253, "step": 9473 }, { "epoch": 0.5082618025751073, "grad_norm": 0.447265625, "learning_rate": 4.932984173053166e-06, "loss": 1.8778, "step": 9474 }, { "epoch": 0.5083154506437768, "grad_norm": 0.423828125, "learning_rate": 4.932964191110555e-06, "loss": 2.3539, "step": 9475 }, { "epoch": 0.5083690987124464, "grad_norm": 0.396484375, "learning_rate": 4.9329442062298884e-06, "loss": 2.1395, "step": 9476 }, { "epoch": 0.5084227467811159, "grad_norm": 0.5, "learning_rate": 4.932924218411188e-06, "loss": 2.4196, "step": 9477 }, { "epoch": 0.5084763948497855, "grad_norm": 0.41015625, "learning_rate": 4.9329042276544795e-06, "loss": 2.2441, "step": 9478 }, { "epoch": 0.5085300429184549, "grad_norm": 0.4140625, "learning_rate": 4.932884233959787e-06, "loss": 2.3427, "step": 9479 }, { "epoch": 0.5085836909871244, "grad_norm": 0.41015625, "learning_rate": 4.932864237327135e-06, "loss": 2.0401, "step": 9480 }, { "epoch": 0.508637339055794, "grad_norm": 0.330078125, "learning_rate": 4.932844237756546e-06, "loss": 1.9794, "step": 9481 }, { "epoch": 0.5086909871244635, "grad_norm": 0.95703125, "learning_rate": 4.9328242352480455e-06, "loss": 2.4262, "step": 9482 }, { "epoch": 0.5087446351931331, "grad_norm": 0.431640625, "learning_rate": 4.932804229801658e-06, "loss": 2.4221, "step": 9483 }, { "epoch": 0.5087982832618025, "grad_norm": 0.40625, "learning_rate": 4.932784221417406e-06, "loss": 2.0419, "step": 9484 }, { "epoch": 0.5088519313304721, "grad_norm": 0.38671875, "learning_rate": 4.932764210095314e-06, "loss": 2.0586, "step": 9485 }, { "epoch": 0.5089055793991416, "grad_norm": 0.43359375, "learning_rate": 4.932744195835408e-06, "loss": 2.2587, "step": 9486 }, { "epoch": 0.5089592274678112, "grad_norm": 0.408203125, "learning_rate": 4.932724178637711e-06, "loss": 2.2588, "step": 9487 }, { "epoch": 0.5090128755364807, "grad_norm": 0.4453125, "learning_rate": 4.932704158502248e-06, "loss": 2.2626, "step": 9488 }, { "epoch": 0.5090665236051503, "grad_norm": 0.361328125, "learning_rate": 4.932684135429041e-06, "loss": 1.9752, "step": 9489 }, { "epoch": 0.5091201716738197, "grad_norm": 0.40625, "learning_rate": 4.932664109418116e-06, "loss": 2.3369, "step": 9490 }, { "epoch": 0.5091738197424893, "grad_norm": 0.48046875, "learning_rate": 4.932644080469497e-06, "loss": 1.4644, "step": 9491 }, { "epoch": 0.5092274678111588, "grad_norm": 0.53125, "learning_rate": 4.932624048583207e-06, "loss": 2.2756, "step": 9492 }, { "epoch": 0.5092811158798283, "grad_norm": 0.43359375, "learning_rate": 4.932604013759271e-06, "loss": 2.3141, "step": 9493 }, { "epoch": 0.5093347639484979, "grad_norm": 0.4140625, "learning_rate": 4.932583975997714e-06, "loss": 2.3217, "step": 9494 }, { "epoch": 0.5093884120171673, "grad_norm": 0.427734375, "learning_rate": 4.93256393529856e-06, "loss": 2.3763, "step": 9495 }, { "epoch": 0.5094420600858369, "grad_norm": 0.5390625, "learning_rate": 4.932543891661832e-06, "loss": 2.2745, "step": 9496 }, { "epoch": 0.5094957081545064, "grad_norm": 0.400390625, "learning_rate": 4.932523845087554e-06, "loss": 2.4843, "step": 9497 }, { "epoch": 0.509549356223176, "grad_norm": 0.412109375, "learning_rate": 4.932503795575752e-06, "loss": 2.2295, "step": 9498 }, { "epoch": 0.5096030042918455, "grad_norm": 0.4296875, "learning_rate": 4.932483743126449e-06, "loss": 2.2582, "step": 9499 }, { "epoch": 0.509656652360515, "grad_norm": 0.435546875, "learning_rate": 4.9324636877396706e-06, "loss": 1.9107, "step": 9500 }, { "epoch": 0.5097103004291845, "grad_norm": 0.4375, "learning_rate": 4.9324436294154385e-06, "loss": 2.4031, "step": 9501 }, { "epoch": 0.5097639484978541, "grad_norm": 0.40625, "learning_rate": 4.932423568153779e-06, "loss": 2.1025, "step": 9502 }, { "epoch": 0.5098175965665236, "grad_norm": 0.392578125, "learning_rate": 4.9324035039547155e-06, "loss": 2.3004, "step": 9503 }, { "epoch": 0.5098712446351932, "grad_norm": 0.458984375, "learning_rate": 4.932383436818273e-06, "loss": 2.0945, "step": 9504 }, { "epoch": 0.5099248927038627, "grad_norm": 0.51953125, "learning_rate": 4.932363366744474e-06, "loss": 2.2972, "step": 9505 }, { "epoch": 0.5099785407725322, "grad_norm": 0.44921875, "learning_rate": 4.932343293733345e-06, "loss": 2.4725, "step": 9506 }, { "epoch": 0.5100321888412017, "grad_norm": 0.392578125, "learning_rate": 4.932323217784909e-06, "loss": 2.188, "step": 9507 }, { "epoch": 0.5100858369098712, "grad_norm": 0.380859375, "learning_rate": 4.932303138899189e-06, "loss": 2.4479, "step": 9508 }, { "epoch": 0.5101394849785408, "grad_norm": 0.408203125, "learning_rate": 4.932283057076213e-06, "loss": 2.0628, "step": 9509 }, { "epoch": 0.5101931330472103, "grad_norm": 0.53125, "learning_rate": 4.932262972316001e-06, "loss": 2.5402, "step": 9510 }, { "epoch": 0.5102467811158798, "grad_norm": 0.396484375, "learning_rate": 4.93224288461858e-06, "loss": 2.3901, "step": 9511 }, { "epoch": 0.5103004291845493, "grad_norm": 0.447265625, "learning_rate": 4.932222793983973e-06, "loss": 2.4004, "step": 9512 }, { "epoch": 0.5103540772532189, "grad_norm": 0.419921875, "learning_rate": 4.932202700412204e-06, "loss": 2.2579, "step": 9513 }, { "epoch": 0.5104077253218884, "grad_norm": 0.490234375, "learning_rate": 4.9321826039032985e-06, "loss": 2.4916, "step": 9514 }, { "epoch": 0.510461373390558, "grad_norm": 0.50390625, "learning_rate": 4.93216250445728e-06, "loss": 1.6951, "step": 9515 }, { "epoch": 0.5105150214592274, "grad_norm": 0.4765625, "learning_rate": 4.932142402074174e-06, "loss": 2.4089, "step": 9516 }, { "epoch": 0.510568669527897, "grad_norm": 0.421875, "learning_rate": 4.932122296754003e-06, "loss": 2.361, "step": 9517 }, { "epoch": 0.5106223175965665, "grad_norm": 0.484375, "learning_rate": 4.932102188496791e-06, "loss": 2.0864, "step": 9518 }, { "epoch": 0.5106759656652361, "grad_norm": 0.369140625, "learning_rate": 4.932082077302564e-06, "loss": 1.8203, "step": 9519 }, { "epoch": 0.5107296137339056, "grad_norm": 0.41015625, "learning_rate": 4.932061963171346e-06, "loss": 2.3711, "step": 9520 }, { "epoch": 0.5107832618025752, "grad_norm": 0.48046875, "learning_rate": 4.932041846103162e-06, "loss": 2.3159, "step": 9521 }, { "epoch": 0.5108369098712446, "grad_norm": 0.5390625, "learning_rate": 4.932021726098033e-06, "loss": 2.2124, "step": 9522 }, { "epoch": 0.5108905579399141, "grad_norm": 0.470703125, "learning_rate": 4.932001603155986e-06, "loss": 2.3006, "step": 9523 }, { "epoch": 0.5109442060085837, "grad_norm": 0.486328125, "learning_rate": 4.931981477277045e-06, "loss": 2.3828, "step": 9524 }, { "epoch": 0.5109978540772532, "grad_norm": 0.41015625, "learning_rate": 4.9319613484612335e-06, "loss": 2.2093, "step": 9525 }, { "epoch": 0.5110515021459228, "grad_norm": 0.447265625, "learning_rate": 4.9319412167085775e-06, "loss": 2.2854, "step": 9526 }, { "epoch": 0.5111051502145922, "grad_norm": 0.357421875, "learning_rate": 4.9319210820191e-06, "loss": 2.1398, "step": 9527 }, { "epoch": 0.5111587982832618, "grad_norm": 0.443359375, "learning_rate": 4.9319009443928244e-06, "loss": 2.2713, "step": 9528 }, { "epoch": 0.5112124463519313, "grad_norm": 0.37890625, "learning_rate": 4.931880803829777e-06, "loss": 2.1954, "step": 9529 }, { "epoch": 0.5112660944206009, "grad_norm": 0.42578125, "learning_rate": 4.931860660329981e-06, "loss": 2.4021, "step": 9530 }, { "epoch": 0.5113197424892704, "grad_norm": 0.439453125, "learning_rate": 4.931840513893461e-06, "loss": 2.216, "step": 9531 }, { "epoch": 0.51137339055794, "grad_norm": 0.609375, "learning_rate": 4.931820364520241e-06, "loss": 2.2574, "step": 9532 }, { "epoch": 0.5114270386266094, "grad_norm": 0.427734375, "learning_rate": 4.931800212210346e-06, "loss": 2.2063, "step": 9533 }, { "epoch": 0.511480686695279, "grad_norm": 0.4140625, "learning_rate": 4.931780056963799e-06, "loss": 2.2291, "step": 9534 }, { "epoch": 0.5115343347639485, "grad_norm": 2.328125, "learning_rate": 4.931759898780627e-06, "loss": 2.2109, "step": 9535 }, { "epoch": 0.511587982832618, "grad_norm": 0.5546875, "learning_rate": 4.931739737660852e-06, "loss": 2.4034, "step": 9536 }, { "epoch": 0.5116416309012876, "grad_norm": 0.53515625, "learning_rate": 4.931719573604499e-06, "loss": 2.3143, "step": 9537 }, { "epoch": 0.511695278969957, "grad_norm": 0.451171875, "learning_rate": 4.931699406611591e-06, "loss": 2.3254, "step": 9538 }, { "epoch": 0.5117489270386266, "grad_norm": 1.046875, "learning_rate": 4.931679236682156e-06, "loss": 2.4029, "step": 9539 }, { "epoch": 0.5118025751072961, "grad_norm": 0.5625, "learning_rate": 4.931659063816214e-06, "loss": 2.3407, "step": 9540 }, { "epoch": 0.5118562231759657, "grad_norm": 0.466796875, "learning_rate": 4.931638888013792e-06, "loss": 2.127, "step": 9541 }, { "epoch": 0.5119098712446352, "grad_norm": 0.36328125, "learning_rate": 4.931618709274914e-06, "loss": 2.3659, "step": 9542 }, { "epoch": 0.5119635193133047, "grad_norm": 0.48828125, "learning_rate": 4.931598527599603e-06, "loss": 2.3962, "step": 9543 }, { "epoch": 0.5120171673819742, "grad_norm": 0.359375, "learning_rate": 4.931578342987887e-06, "loss": 2.1787, "step": 9544 }, { "epoch": 0.5120708154506438, "grad_norm": 0.486328125, "learning_rate": 4.9315581554397866e-06, "loss": 2.3772, "step": 9545 }, { "epoch": 0.5121244635193133, "grad_norm": 0.42578125, "learning_rate": 4.931537964955326e-06, "loss": 2.5828, "step": 9546 }, { "epoch": 0.5121781115879829, "grad_norm": 0.462890625, "learning_rate": 4.931517771534533e-06, "loss": 2.6213, "step": 9547 }, { "epoch": 0.5122317596566524, "grad_norm": 0.412109375, "learning_rate": 4.93149757517743e-06, "loss": 2.2766, "step": 9548 }, { "epoch": 0.5122854077253219, "grad_norm": 0.44140625, "learning_rate": 4.93147737588404e-06, "loss": 2.2671, "step": 9549 }, { "epoch": 0.5123390557939914, "grad_norm": 0.466796875, "learning_rate": 4.93145717365439e-06, "loss": 2.6164, "step": 9550 }, { "epoch": 0.5123927038626609, "grad_norm": 0.6171875, "learning_rate": 4.931436968488502e-06, "loss": 2.4214, "step": 9551 }, { "epoch": 0.5124463519313305, "grad_norm": 0.5234375, "learning_rate": 4.931416760386403e-06, "loss": 2.6149, "step": 9552 }, { "epoch": 0.5125, "grad_norm": 0.5078125, "learning_rate": 4.931396549348115e-06, "loss": 2.3946, "step": 9553 }, { "epoch": 0.5125536480686695, "grad_norm": 0.44921875, "learning_rate": 4.9313763353736645e-06, "loss": 2.3335, "step": 9554 }, { "epoch": 0.512607296137339, "grad_norm": 0.4765625, "learning_rate": 4.931356118463074e-06, "loss": 2.4285, "step": 9555 }, { "epoch": 0.5126609442060086, "grad_norm": 0.466796875, "learning_rate": 4.931335898616368e-06, "loss": 2.1269, "step": 9556 }, { "epoch": 0.5127145922746781, "grad_norm": 0.53125, "learning_rate": 4.9313156758335735e-06, "loss": 2.3984, "step": 9557 }, { "epoch": 0.5127682403433477, "grad_norm": 1.65625, "learning_rate": 4.931295450114711e-06, "loss": 2.3368, "step": 9558 }, { "epoch": 0.5128218884120171, "grad_norm": 0.51953125, "learning_rate": 4.931275221459809e-06, "loss": 2.4168, "step": 9559 }, { "epoch": 0.5128755364806867, "grad_norm": 0.36328125, "learning_rate": 4.931254989868889e-06, "loss": 2.16, "step": 9560 }, { "epoch": 0.5129291845493562, "grad_norm": 0.412109375, "learning_rate": 4.931234755341976e-06, "loss": 2.3466, "step": 9561 }, { "epoch": 0.5129828326180258, "grad_norm": 0.431640625, "learning_rate": 4.931214517879096e-06, "loss": 2.311, "step": 9562 }, { "epoch": 0.5130364806866953, "grad_norm": 1.65625, "learning_rate": 4.931194277480271e-06, "loss": 2.1427, "step": 9563 }, { "epoch": 0.5130901287553649, "grad_norm": 0.703125, "learning_rate": 4.931174034145527e-06, "loss": 2.326, "step": 9564 }, { "epoch": 0.5131437768240343, "grad_norm": 0.3984375, "learning_rate": 4.931153787874888e-06, "loss": 2.2073, "step": 9565 }, { "epoch": 0.5131974248927038, "grad_norm": 0.421875, "learning_rate": 4.931133538668379e-06, "loss": 2.41, "step": 9566 }, { "epoch": 0.5132510729613734, "grad_norm": 0.546875, "learning_rate": 4.931113286526024e-06, "loss": 2.1801, "step": 9567 }, { "epoch": 0.5133047210300429, "grad_norm": 0.41796875, "learning_rate": 4.931093031447847e-06, "loss": 2.1273, "step": 9568 }, { "epoch": 0.5133583690987125, "grad_norm": 0.44140625, "learning_rate": 4.9310727734338735e-06, "loss": 2.1987, "step": 9569 }, { "epoch": 0.5134120171673819, "grad_norm": 0.4140625, "learning_rate": 4.931052512484127e-06, "loss": 2.4394, "step": 9570 }, { "epoch": 0.5134656652360515, "grad_norm": 0.416015625, "learning_rate": 4.931032248598633e-06, "loss": 2.3071, "step": 9571 }, { "epoch": 0.513519313304721, "grad_norm": 0.435546875, "learning_rate": 4.9310119817774145e-06, "loss": 2.1666, "step": 9572 }, { "epoch": 0.5135729613733906, "grad_norm": 0.400390625, "learning_rate": 4.930991712020498e-06, "loss": 2.5265, "step": 9573 }, { "epoch": 0.5136266094420601, "grad_norm": 0.408203125, "learning_rate": 4.930971439327906e-06, "loss": 2.2059, "step": 9574 }, { "epoch": 0.5136802575107297, "grad_norm": 0.40625, "learning_rate": 4.9309511636996635e-06, "loss": 2.2736, "step": 9575 }, { "epoch": 0.5137339055793991, "grad_norm": 0.447265625, "learning_rate": 4.930930885135795e-06, "loss": 2.0847, "step": 9576 }, { "epoch": 0.5137875536480687, "grad_norm": 0.4296875, "learning_rate": 4.930910603636326e-06, "loss": 2.5187, "step": 9577 }, { "epoch": 0.5138412017167382, "grad_norm": 0.388671875, "learning_rate": 4.93089031920128e-06, "loss": 2.287, "step": 9578 }, { "epoch": 0.5138948497854077, "grad_norm": 1.0625, "learning_rate": 4.9308700318306826e-06, "loss": 2.3277, "step": 9579 }, { "epoch": 0.5139484978540773, "grad_norm": 0.4140625, "learning_rate": 4.930849741524557e-06, "loss": 1.9247, "step": 9580 }, { "epoch": 0.5140021459227467, "grad_norm": 0.361328125, "learning_rate": 4.930829448282927e-06, "loss": 1.9688, "step": 9581 }, { "epoch": 0.5140557939914163, "grad_norm": 1.875, "learning_rate": 4.9308091521058194e-06, "loss": 2.1528, "step": 9582 }, { "epoch": 0.5141094420600858, "grad_norm": 0.4296875, "learning_rate": 4.930788852993257e-06, "loss": 2.4158, "step": 9583 }, { "epoch": 0.5141630901287554, "grad_norm": 0.369140625, "learning_rate": 4.930768550945265e-06, "loss": 2.4654, "step": 9584 }, { "epoch": 0.5142167381974249, "grad_norm": 0.41796875, "learning_rate": 4.930748245961868e-06, "loss": 2.2029, "step": 9585 }, { "epoch": 0.5142703862660944, "grad_norm": 0.423828125, "learning_rate": 4.930727938043091e-06, "loss": 2.2832, "step": 9586 }, { "epoch": 0.5143240343347639, "grad_norm": 0.482421875, "learning_rate": 4.930707627188957e-06, "loss": 2.1862, "step": 9587 }, { "epoch": 0.5143776824034335, "grad_norm": 1.0, "learning_rate": 4.9306873133994905e-06, "loss": 2.33, "step": 9588 }, { "epoch": 0.514431330472103, "grad_norm": 0.5078125, "learning_rate": 4.930666996674718e-06, "loss": 2.0833, "step": 9589 }, { "epoch": 0.5144849785407726, "grad_norm": 0.609375, "learning_rate": 4.930646677014662e-06, "loss": 2.3614, "step": 9590 }, { "epoch": 0.514538626609442, "grad_norm": 0.392578125, "learning_rate": 4.930626354419349e-06, "loss": 2.1358, "step": 9591 }, { "epoch": 0.5145922746781116, "grad_norm": 0.482421875, "learning_rate": 4.930606028888803e-06, "loss": 2.1597, "step": 9592 }, { "epoch": 0.5146459227467811, "grad_norm": 0.455078125, "learning_rate": 4.9305857004230464e-06, "loss": 2.1527, "step": 9593 }, { "epoch": 0.5146995708154506, "grad_norm": 0.4453125, "learning_rate": 4.930565369022107e-06, "loss": 2.2622, "step": 9594 }, { "epoch": 0.5147532188841202, "grad_norm": 0.435546875, "learning_rate": 4.930545034686006e-06, "loss": 2.4044, "step": 9595 }, { "epoch": 0.5148068669527897, "grad_norm": 0.4296875, "learning_rate": 4.9305246974147715e-06, "loss": 2.2754, "step": 9596 }, { "epoch": 0.5148605150214592, "grad_norm": 0.54296875, "learning_rate": 4.930504357208425e-06, "loss": 1.5049, "step": 9597 }, { "epoch": 0.5149141630901287, "grad_norm": 0.455078125, "learning_rate": 4.930484014066993e-06, "loss": 2.1946, "step": 9598 }, { "epoch": 0.5149678111587983, "grad_norm": 0.53515625, "learning_rate": 4.930463667990498e-06, "loss": 2.4098, "step": 9599 }, { "epoch": 0.5150214592274678, "grad_norm": 0.494140625, "learning_rate": 4.930443318978968e-06, "loss": 2.2889, "step": 9600 }, { "epoch": 0.5150751072961374, "grad_norm": 0.416015625, "learning_rate": 4.930422967032425e-06, "loss": 2.3265, "step": 9601 }, { "epoch": 0.5151287553648068, "grad_norm": 0.4296875, "learning_rate": 4.930402612150894e-06, "loss": 2.3606, "step": 9602 }, { "epoch": 0.5151824034334764, "grad_norm": 0.455078125, "learning_rate": 4.930382254334399e-06, "loss": 2.3631, "step": 9603 }, { "epoch": 0.5152360515021459, "grad_norm": 0.50390625, "learning_rate": 4.930361893582965e-06, "loss": 2.0727, "step": 9604 }, { "epoch": 0.5152896995708155, "grad_norm": 0.419921875, "learning_rate": 4.9303415298966185e-06, "loss": 2.3058, "step": 9605 }, { "epoch": 0.515343347639485, "grad_norm": 0.41796875, "learning_rate": 4.930321163275381e-06, "loss": 2.3325, "step": 9606 }, { "epoch": 0.5153969957081546, "grad_norm": 0.45703125, "learning_rate": 4.930300793719279e-06, "loss": 2.1517, "step": 9607 }, { "epoch": 0.515450643776824, "grad_norm": 0.41796875, "learning_rate": 4.930280421228337e-06, "loss": 2.2228, "step": 9608 }, { "epoch": 0.5155042918454935, "grad_norm": 0.9453125, "learning_rate": 4.93026004580258e-06, "loss": 2.2918, "step": 9609 }, { "epoch": 0.5155579399141631, "grad_norm": 0.443359375, "learning_rate": 4.930239667442031e-06, "loss": 2.2784, "step": 9610 }, { "epoch": 0.5156115879828326, "grad_norm": 0.470703125, "learning_rate": 4.930219286146714e-06, "loss": 2.3661, "step": 9611 }, { "epoch": 0.5156652360515022, "grad_norm": 0.41796875, "learning_rate": 4.930198901916657e-06, "loss": 2.2596, "step": 9612 }, { "epoch": 0.5157188841201716, "grad_norm": 0.3984375, "learning_rate": 4.930178514751882e-06, "loss": 2.2703, "step": 9613 }, { "epoch": 0.5157725321888412, "grad_norm": 0.6015625, "learning_rate": 4.930158124652415e-06, "loss": 2.3742, "step": 9614 }, { "epoch": 0.5158261802575107, "grad_norm": 0.37109375, "learning_rate": 4.930137731618279e-06, "loss": 2.3312, "step": 9615 }, { "epoch": 0.5158798283261803, "grad_norm": 0.3984375, "learning_rate": 4.9301173356495e-06, "loss": 2.2751, "step": 9616 }, { "epoch": 0.5159334763948498, "grad_norm": 0.388671875, "learning_rate": 4.9300969367461025e-06, "loss": 2.3165, "step": 9617 }, { "epoch": 0.5159871244635194, "grad_norm": 0.443359375, "learning_rate": 4.93007653490811e-06, "loss": 2.4504, "step": 9618 }, { "epoch": 0.5160407725321888, "grad_norm": 0.455078125, "learning_rate": 4.9300561301355485e-06, "loss": 2.3296, "step": 9619 }, { "epoch": 0.5160944206008584, "grad_norm": 0.41015625, "learning_rate": 4.930035722428442e-06, "loss": 2.2406, "step": 9620 }, { "epoch": 0.5161480686695279, "grad_norm": 0.478515625, "learning_rate": 4.930015311786816e-06, "loss": 2.2536, "step": 9621 }, { "epoch": 0.5162017167381975, "grad_norm": 0.65625, "learning_rate": 4.929994898210694e-06, "loss": 2.5755, "step": 9622 }, { "epoch": 0.516255364806867, "grad_norm": 0.37890625, "learning_rate": 4.9299744817001e-06, "loss": 2.2956, "step": 9623 }, { "epoch": 0.5163090128755364, "grad_norm": 0.3671875, "learning_rate": 4.929954062255061e-06, "loss": 2.3299, "step": 9624 }, { "epoch": 0.516362660944206, "grad_norm": 0.44140625, "learning_rate": 4.9299336398756006e-06, "loss": 2.3292, "step": 9625 }, { "epoch": 0.5164163090128755, "grad_norm": 0.443359375, "learning_rate": 4.929913214561742e-06, "loss": 2.2175, "step": 9626 }, { "epoch": 0.5164699570815451, "grad_norm": 0.466796875, "learning_rate": 4.929892786313511e-06, "loss": 2.1061, "step": 9627 }, { "epoch": 0.5165236051502146, "grad_norm": 0.97265625, "learning_rate": 4.929872355130933e-06, "loss": 2.1381, "step": 9628 }, { "epoch": 0.5165772532188841, "grad_norm": 0.3828125, "learning_rate": 4.929851921014032e-06, "loss": 2.3487, "step": 9629 }, { "epoch": 0.5166309012875536, "grad_norm": 0.5390625, "learning_rate": 4.929831483962834e-06, "loss": 2.4093, "step": 9630 }, { "epoch": 0.5166845493562232, "grad_norm": 0.376953125, "learning_rate": 4.92981104397736e-06, "loss": 2.2615, "step": 9631 }, { "epoch": 0.5167381974248927, "grad_norm": 0.81640625, "learning_rate": 4.929790601057639e-06, "loss": 2.3152, "step": 9632 }, { "epoch": 0.5167918454935623, "grad_norm": 0.64453125, "learning_rate": 4.929770155203693e-06, "loss": 1.9806, "step": 9633 }, { "epoch": 0.5168454935622318, "grad_norm": 0.384765625, "learning_rate": 4.929749706415547e-06, "loss": 2.263, "step": 9634 }, { "epoch": 0.5168991416309013, "grad_norm": 0.388671875, "learning_rate": 4.929729254693226e-06, "loss": 2.3038, "step": 9635 }, { "epoch": 0.5169527896995708, "grad_norm": 0.49609375, "learning_rate": 4.929708800036756e-06, "loss": 2.3262, "step": 9636 }, { "epoch": 0.5170064377682403, "grad_norm": 0.36328125, "learning_rate": 4.92968834244616e-06, "loss": 1.9686, "step": 9637 }, { "epoch": 0.5170600858369099, "grad_norm": 0.435546875, "learning_rate": 4.929667881921464e-06, "loss": 2.1679, "step": 9638 }, { "epoch": 0.5171137339055794, "grad_norm": 0.427734375, "learning_rate": 4.929647418462691e-06, "loss": 2.2694, "step": 9639 }, { "epoch": 0.5171673819742489, "grad_norm": 0.423828125, "learning_rate": 4.9296269520698655e-06, "loss": 2.2972, "step": 9640 }, { "epoch": 0.5172210300429184, "grad_norm": 0.421875, "learning_rate": 4.929606482743015e-06, "loss": 2.3915, "step": 9641 }, { "epoch": 0.517274678111588, "grad_norm": 0.875, "learning_rate": 4.9295860104821626e-06, "loss": 1.3019, "step": 9642 }, { "epoch": 0.5173283261802575, "grad_norm": 0.3671875, "learning_rate": 4.9295655352873324e-06, "loss": 2.3449, "step": 9643 }, { "epoch": 0.5173819742489271, "grad_norm": 0.41015625, "learning_rate": 4.9295450571585504e-06, "loss": 2.4107, "step": 9644 }, { "epoch": 0.5174356223175965, "grad_norm": 0.392578125, "learning_rate": 4.929524576095841e-06, "loss": 2.2634, "step": 9645 }, { "epoch": 0.5174892703862661, "grad_norm": 0.46484375, "learning_rate": 4.929504092099227e-06, "loss": 2.07, "step": 9646 }, { "epoch": 0.5175429184549356, "grad_norm": 0.44921875, "learning_rate": 4.929483605168736e-06, "loss": 2.4999, "step": 9647 }, { "epoch": 0.5175965665236052, "grad_norm": 0.4296875, "learning_rate": 4.929463115304392e-06, "loss": 2.2475, "step": 9648 }, { "epoch": 0.5176502145922747, "grad_norm": 0.40625, "learning_rate": 4.929442622506219e-06, "loss": 2.1822, "step": 9649 }, { "epoch": 0.5177038626609443, "grad_norm": 0.46484375, "learning_rate": 4.929422126774241e-06, "loss": 2.1292, "step": 9650 }, { "epoch": 0.5177575107296137, "grad_norm": 0.482421875, "learning_rate": 4.929401628108485e-06, "loss": 2.4015, "step": 9651 }, { "epoch": 0.5178111587982832, "grad_norm": 0.5390625, "learning_rate": 4.929381126508973e-06, "loss": 2.2019, "step": 9652 }, { "epoch": 0.5178648068669528, "grad_norm": 0.39453125, "learning_rate": 4.929360621975732e-06, "loss": 2.4101, "step": 9653 }, { "epoch": 0.5179184549356223, "grad_norm": 0.435546875, "learning_rate": 4.9293401145087864e-06, "loss": 2.2956, "step": 9654 }, { "epoch": 0.5179721030042919, "grad_norm": 0.416015625, "learning_rate": 4.929319604108159e-06, "loss": 2.2382, "step": 9655 }, { "epoch": 0.5180257510729613, "grad_norm": 0.50390625, "learning_rate": 4.929299090773878e-06, "loss": 2.3971, "step": 9656 }, { "epoch": 0.5180793991416309, "grad_norm": 0.46875, "learning_rate": 4.929278574505966e-06, "loss": 2.2931, "step": 9657 }, { "epoch": 0.5181330472103004, "grad_norm": 0.470703125, "learning_rate": 4.929258055304447e-06, "loss": 2.1739, "step": 9658 }, { "epoch": 0.51818669527897, "grad_norm": 0.46875, "learning_rate": 4.929237533169348e-06, "loss": 2.2335, "step": 9659 }, { "epoch": 0.5182403433476395, "grad_norm": 0.4140625, "learning_rate": 4.929217008100692e-06, "loss": 2.4692, "step": 9660 }, { "epoch": 0.518293991416309, "grad_norm": 0.4140625, "learning_rate": 4.929196480098505e-06, "loss": 2.4005, "step": 9661 }, { "epoch": 0.5183476394849785, "grad_norm": 0.53125, "learning_rate": 4.929175949162811e-06, "loss": 1.3819, "step": 9662 }, { "epoch": 0.5184012875536481, "grad_norm": 0.435546875, "learning_rate": 4.929155415293634e-06, "loss": 2.3357, "step": 9663 }, { "epoch": 0.5184549356223176, "grad_norm": 0.455078125, "learning_rate": 4.929134878491001e-06, "loss": 2.5506, "step": 9664 }, { "epoch": 0.5185085836909872, "grad_norm": 0.45703125, "learning_rate": 4.929114338754936e-06, "loss": 2.3852, "step": 9665 }, { "epoch": 0.5185622317596567, "grad_norm": 0.43359375, "learning_rate": 4.929093796085462e-06, "loss": 1.9223, "step": 9666 }, { "epoch": 0.5186158798283261, "grad_norm": 0.6875, "learning_rate": 4.9290732504826065e-06, "loss": 2.3345, "step": 9667 }, { "epoch": 0.5186695278969957, "grad_norm": 0.41015625, "learning_rate": 4.929052701946393e-06, "loss": 2.281, "step": 9668 }, { "epoch": 0.5187231759656652, "grad_norm": 0.3671875, "learning_rate": 4.929032150476845e-06, "loss": 2.0263, "step": 9669 }, { "epoch": 0.5187768240343348, "grad_norm": 0.4453125, "learning_rate": 4.92901159607399e-06, "loss": 2.2339, "step": 9670 }, { "epoch": 0.5188304721030043, "grad_norm": 0.47265625, "learning_rate": 4.928991038737851e-06, "loss": 2.291, "step": 9671 }, { "epoch": 0.5188841201716738, "grad_norm": 0.5078125, "learning_rate": 4.928970478468454e-06, "loss": 1.9158, "step": 9672 }, { "epoch": 0.5189377682403433, "grad_norm": 0.435546875, "learning_rate": 4.928949915265821e-06, "loss": 2.166, "step": 9673 }, { "epoch": 0.5189914163090129, "grad_norm": 0.494140625, "learning_rate": 4.9289293491299814e-06, "loss": 2.3255, "step": 9674 }, { "epoch": 0.5190450643776824, "grad_norm": 0.5, "learning_rate": 4.928908780060957e-06, "loss": 2.345, "step": 9675 }, { "epoch": 0.519098712446352, "grad_norm": 0.416015625, "learning_rate": 4.928888208058772e-06, "loss": 2.4906, "step": 9676 }, { "epoch": 0.5191523605150214, "grad_norm": 0.44921875, "learning_rate": 4.928867633123454e-06, "loss": 2.2387, "step": 9677 }, { "epoch": 0.519206008583691, "grad_norm": 0.400390625, "learning_rate": 4.928847055255026e-06, "loss": 2.3274, "step": 9678 }, { "epoch": 0.5192596566523605, "grad_norm": 0.77734375, "learning_rate": 4.928826474453513e-06, "loss": 2.2682, "step": 9679 }, { "epoch": 0.51931330472103, "grad_norm": 0.416015625, "learning_rate": 4.92880589071894e-06, "loss": 2.327, "step": 9680 }, { "epoch": 0.5193669527896996, "grad_norm": 0.5078125, "learning_rate": 4.928785304051332e-06, "loss": 2.4193, "step": 9681 }, { "epoch": 0.519420600858369, "grad_norm": 0.462890625, "learning_rate": 4.928764714450714e-06, "loss": 2.3613, "step": 9682 }, { "epoch": 0.5194742489270386, "grad_norm": 0.380859375, "learning_rate": 4.92874412191711e-06, "loss": 2.337, "step": 9683 }, { "epoch": 0.5195278969957081, "grad_norm": 0.412109375, "learning_rate": 4.928723526450545e-06, "loss": 2.3214, "step": 9684 }, { "epoch": 0.5195815450643777, "grad_norm": 0.388671875, "learning_rate": 4.928702928051046e-06, "loss": 2.2166, "step": 9685 }, { "epoch": 0.5196351931330472, "grad_norm": 0.53125, "learning_rate": 4.928682326718636e-06, "loss": 2.6174, "step": 9686 }, { "epoch": 0.5196888412017168, "grad_norm": 0.390625, "learning_rate": 4.928661722453339e-06, "loss": 1.9166, "step": 9687 }, { "epoch": 0.5197424892703862, "grad_norm": 0.71484375, "learning_rate": 4.928641115255181e-06, "loss": 2.4107, "step": 9688 }, { "epoch": 0.5197961373390558, "grad_norm": 0.404296875, "learning_rate": 4.9286205051241884e-06, "loss": 2.177, "step": 9689 }, { "epoch": 0.5198497854077253, "grad_norm": 0.369140625, "learning_rate": 4.928599892060383e-06, "loss": 2.1797, "step": 9690 }, { "epoch": 0.5199034334763949, "grad_norm": 0.37890625, "learning_rate": 4.928579276063792e-06, "loss": 2.2159, "step": 9691 }, { "epoch": 0.5199570815450644, "grad_norm": 0.490234375, "learning_rate": 4.928558657134439e-06, "loss": 2.5239, "step": 9692 }, { "epoch": 0.520010729613734, "grad_norm": 0.439453125, "learning_rate": 4.92853803527235e-06, "loss": 2.1951, "step": 9693 }, { "epoch": 0.5200643776824034, "grad_norm": 0.412109375, "learning_rate": 4.92851741047755e-06, "loss": 2.2664, "step": 9694 }, { "epoch": 0.5201180257510729, "grad_norm": 0.50390625, "learning_rate": 4.928496782750063e-06, "loss": 2.3493, "step": 9695 }, { "epoch": 0.5201716738197425, "grad_norm": 0.79296875, "learning_rate": 4.928476152089914e-06, "loss": 2.4783, "step": 9696 }, { "epoch": 0.520225321888412, "grad_norm": 0.41796875, "learning_rate": 4.928455518497127e-06, "loss": 2.2719, "step": 9697 }, { "epoch": 0.5202789699570816, "grad_norm": 0.4453125, "learning_rate": 4.928434881971729e-06, "loss": 2.4056, "step": 9698 }, { "epoch": 0.520332618025751, "grad_norm": 0.53515625, "learning_rate": 4.928414242513744e-06, "loss": 2.385, "step": 9699 }, { "epoch": 0.5203862660944206, "grad_norm": 0.5078125, "learning_rate": 4.928393600123197e-06, "loss": 1.9342, "step": 9700 }, { "epoch": 0.5204399141630901, "grad_norm": 0.486328125, "learning_rate": 4.928372954800112e-06, "loss": 2.1612, "step": 9701 }, { "epoch": 0.5204935622317597, "grad_norm": 0.64453125, "learning_rate": 4.928352306544515e-06, "loss": 2.1338, "step": 9702 }, { "epoch": 0.5205472103004292, "grad_norm": 0.419921875, "learning_rate": 4.928331655356431e-06, "loss": 2.2414, "step": 9703 }, { "epoch": 0.5206008583690988, "grad_norm": 0.447265625, "learning_rate": 4.928311001235885e-06, "loss": 2.5428, "step": 9704 }, { "epoch": 0.5206545064377682, "grad_norm": 0.39453125, "learning_rate": 4.9282903441829e-06, "loss": 2.317, "step": 9705 }, { "epoch": 0.5207081545064378, "grad_norm": 0.443359375, "learning_rate": 4.928269684197504e-06, "loss": 2.3067, "step": 9706 }, { "epoch": 0.5207618025751073, "grad_norm": 0.40234375, "learning_rate": 4.9282490212797194e-06, "loss": 2.4464, "step": 9707 }, { "epoch": 0.5208154506437769, "grad_norm": 0.65234375, "learning_rate": 4.928228355429573e-06, "loss": 2.4566, "step": 9708 }, { "epoch": 0.5208690987124464, "grad_norm": 0.40234375, "learning_rate": 4.928207686647088e-06, "loss": 2.1119, "step": 9709 }, { "epoch": 0.5209227467811158, "grad_norm": 0.470703125, "learning_rate": 4.928187014932291e-06, "loss": 2.4167, "step": 9710 }, { "epoch": 0.5209763948497854, "grad_norm": 0.70703125, "learning_rate": 4.928166340285206e-06, "loss": 2.3057, "step": 9711 }, { "epoch": 0.5210300429184549, "grad_norm": 0.44140625, "learning_rate": 4.928145662705859e-06, "loss": 2.1661, "step": 9712 }, { "epoch": 0.5210836909871245, "grad_norm": 0.3671875, "learning_rate": 4.9281249821942734e-06, "loss": 2.0102, "step": 9713 }, { "epoch": 0.521137339055794, "grad_norm": 0.443359375, "learning_rate": 4.928104298750476e-06, "loss": 2.2184, "step": 9714 }, { "epoch": 0.5211909871244635, "grad_norm": 0.3984375, "learning_rate": 4.928083612374489e-06, "loss": 2.1426, "step": 9715 }, { "epoch": 0.521244635193133, "grad_norm": 0.6640625, "learning_rate": 4.92806292306634e-06, "loss": 2.3368, "step": 9716 }, { "epoch": 0.5212982832618026, "grad_norm": 0.4296875, "learning_rate": 4.928042230826053e-06, "loss": 2.2997, "step": 9717 }, { "epoch": 0.5213519313304721, "grad_norm": 0.43359375, "learning_rate": 4.928021535653654e-06, "loss": 2.2724, "step": 9718 }, { "epoch": 0.5214055793991417, "grad_norm": 0.40625, "learning_rate": 4.928000837549166e-06, "loss": 2.0995, "step": 9719 }, { "epoch": 0.5214592274678111, "grad_norm": 0.486328125, "learning_rate": 4.927980136512616e-06, "loss": 2.3078, "step": 9720 }, { "epoch": 0.5215128755364807, "grad_norm": 0.3984375, "learning_rate": 4.9279594325440275e-06, "loss": 2.2875, "step": 9721 }, { "epoch": 0.5215665236051502, "grad_norm": 0.384765625, "learning_rate": 4.927938725643425e-06, "loss": 1.7514, "step": 9722 }, { "epoch": 0.5216201716738197, "grad_norm": 0.458984375, "learning_rate": 4.927918015810836e-06, "loss": 2.0621, "step": 9723 }, { "epoch": 0.5216738197424893, "grad_norm": 1.0078125, "learning_rate": 4.927897303046284e-06, "loss": 2.2344, "step": 9724 }, { "epoch": 0.5217274678111588, "grad_norm": 0.65625, "learning_rate": 4.927876587349794e-06, "loss": 2.3037, "step": 9725 }, { "epoch": 0.5217811158798283, "grad_norm": 0.48046875, "learning_rate": 4.927855868721391e-06, "loss": 2.5449, "step": 9726 }, { "epoch": 0.5218347639484978, "grad_norm": 0.3359375, "learning_rate": 4.9278351471611e-06, "loss": 2.1647, "step": 9727 }, { "epoch": 0.5218884120171674, "grad_norm": 0.47265625, "learning_rate": 4.9278144226689465e-06, "loss": 2.3424, "step": 9728 }, { "epoch": 0.5219420600858369, "grad_norm": 0.5390625, "learning_rate": 4.927793695244955e-06, "loss": 2.3848, "step": 9729 }, { "epoch": 0.5219957081545065, "grad_norm": 0.490234375, "learning_rate": 4.927772964889151e-06, "loss": 2.068, "step": 9730 }, { "epoch": 0.5220493562231759, "grad_norm": 1.34375, "learning_rate": 4.927752231601559e-06, "loss": 1.9165, "step": 9731 }, { "epoch": 0.5221030042918455, "grad_norm": 0.46875, "learning_rate": 4.927731495382205e-06, "loss": 2.3994, "step": 9732 }, { "epoch": 0.522156652360515, "grad_norm": 0.42578125, "learning_rate": 4.927710756231112e-06, "loss": 2.2239, "step": 9733 }, { "epoch": 0.5222103004291846, "grad_norm": 0.427734375, "learning_rate": 4.927690014148308e-06, "loss": 2.2019, "step": 9734 }, { "epoch": 0.5222639484978541, "grad_norm": 0.34765625, "learning_rate": 4.927669269133815e-06, "loss": 2.2859, "step": 9735 }, { "epoch": 0.5223175965665237, "grad_norm": 1.0625, "learning_rate": 4.92764852118766e-06, "loss": 2.3747, "step": 9736 }, { "epoch": 0.5223712446351931, "grad_norm": 0.474609375, "learning_rate": 4.927627770309868e-06, "loss": 2.3034, "step": 9737 }, { "epoch": 0.5224248927038626, "grad_norm": 0.474609375, "learning_rate": 4.927607016500463e-06, "loss": 2.2322, "step": 9738 }, { "epoch": 0.5224785407725322, "grad_norm": 0.412109375, "learning_rate": 4.927586259759471e-06, "loss": 2.3282, "step": 9739 }, { "epoch": 0.5225321888412017, "grad_norm": 0.470703125, "learning_rate": 4.9275655000869164e-06, "loss": 2.2971, "step": 9740 }, { "epoch": 0.5225858369098713, "grad_norm": 0.416015625, "learning_rate": 4.927544737482825e-06, "loss": 2.2143, "step": 9741 }, { "epoch": 0.5226394849785407, "grad_norm": 0.486328125, "learning_rate": 4.927523971947221e-06, "loss": 2.3278, "step": 9742 }, { "epoch": 0.5226931330472103, "grad_norm": 0.41796875, "learning_rate": 4.927503203480131e-06, "loss": 2.1899, "step": 9743 }, { "epoch": 0.5227467811158798, "grad_norm": 0.43359375, "learning_rate": 4.927482432081578e-06, "loss": 2.3775, "step": 9744 }, { "epoch": 0.5228004291845494, "grad_norm": 0.3984375, "learning_rate": 4.927461657751588e-06, "loss": 2.0751, "step": 9745 }, { "epoch": 0.5228540772532189, "grad_norm": 1.7734375, "learning_rate": 4.927440880490188e-06, "loss": 2.3248, "step": 9746 }, { "epoch": 0.5229077253218885, "grad_norm": 0.486328125, "learning_rate": 4.9274201002973985e-06, "loss": 2.4204, "step": 9747 }, { "epoch": 0.5229613733905579, "grad_norm": 0.412109375, "learning_rate": 4.927399317173249e-06, "loss": 2.2701, "step": 9748 }, { "epoch": 0.5230150214592275, "grad_norm": 0.44921875, "learning_rate": 4.927378531117764e-06, "loss": 2.1419, "step": 9749 }, { "epoch": 0.523068669527897, "grad_norm": 0.3984375, "learning_rate": 4.9273577421309655e-06, "loss": 2.5234, "step": 9750 }, { "epoch": 0.5231223175965666, "grad_norm": 0.46484375, "learning_rate": 4.927336950212882e-06, "loss": 2.2819, "step": 9751 }, { "epoch": 0.523175965665236, "grad_norm": 0.42578125, "learning_rate": 4.927316155363536e-06, "loss": 2.3018, "step": 9752 }, { "epoch": 0.5232296137339055, "grad_norm": 0.453125, "learning_rate": 4.927295357582955e-06, "loss": 2.6996, "step": 9753 }, { "epoch": 0.5232832618025751, "grad_norm": 0.443359375, "learning_rate": 4.927274556871163e-06, "loss": 2.2391, "step": 9754 }, { "epoch": 0.5233369098712446, "grad_norm": 1.0859375, "learning_rate": 4.927253753228185e-06, "loss": 2.421, "step": 9755 }, { "epoch": 0.5233905579399142, "grad_norm": 0.5703125, "learning_rate": 4.9272329466540465e-06, "loss": 2.006, "step": 9756 }, { "epoch": 0.5234442060085837, "grad_norm": 0.435546875, "learning_rate": 4.927212137148772e-06, "loss": 2.1571, "step": 9757 }, { "epoch": 0.5234978540772532, "grad_norm": 0.4375, "learning_rate": 4.927191324712387e-06, "loss": 2.2953, "step": 9758 }, { "epoch": 0.5235515021459227, "grad_norm": 0.416015625, "learning_rate": 4.927170509344917e-06, "loss": 2.3536, "step": 9759 }, { "epoch": 0.5236051502145923, "grad_norm": 0.455078125, "learning_rate": 4.927149691046387e-06, "loss": 2.0829, "step": 9760 }, { "epoch": 0.5236587982832618, "grad_norm": 0.45703125, "learning_rate": 4.927128869816822e-06, "loss": 2.4614, "step": 9761 }, { "epoch": 0.5237124463519314, "grad_norm": 0.73828125, "learning_rate": 4.927108045656246e-06, "loss": 2.3177, "step": 9762 }, { "epoch": 0.5237660944206008, "grad_norm": 0.3984375, "learning_rate": 4.927087218564685e-06, "loss": 2.2556, "step": 9763 }, { "epoch": 0.5238197424892704, "grad_norm": 0.41015625, "learning_rate": 4.927066388542167e-06, "loss": 2.3926, "step": 9764 }, { "epoch": 0.5238733905579399, "grad_norm": 0.388671875, "learning_rate": 4.927045555588712e-06, "loss": 2.2331, "step": 9765 }, { "epoch": 0.5239270386266094, "grad_norm": 0.39453125, "learning_rate": 4.9270247197043485e-06, "loss": 2.1538, "step": 9766 }, { "epoch": 0.523980686695279, "grad_norm": 0.4453125, "learning_rate": 4.927003880889101e-06, "loss": 2.455, "step": 9767 }, { "epoch": 0.5240343347639485, "grad_norm": 0.466796875, "learning_rate": 4.926983039142994e-06, "loss": 2.1331, "step": 9768 }, { "epoch": 0.524087982832618, "grad_norm": 0.453125, "learning_rate": 4.926962194466054e-06, "loss": 2.2417, "step": 9769 }, { "epoch": 0.5241416309012875, "grad_norm": 0.4375, "learning_rate": 4.926941346858305e-06, "loss": 2.2096, "step": 9770 }, { "epoch": 0.5241952789699571, "grad_norm": 0.380859375, "learning_rate": 4.926920496319773e-06, "loss": 2.1199, "step": 9771 }, { "epoch": 0.5242489270386266, "grad_norm": 0.40625, "learning_rate": 4.926899642850481e-06, "loss": 2.3096, "step": 9772 }, { "epoch": 0.5243025751072962, "grad_norm": 0.43359375, "learning_rate": 4.926878786450457e-06, "loss": 2.2337, "step": 9773 }, { "epoch": 0.5243562231759656, "grad_norm": 0.419921875, "learning_rate": 4.926857927119726e-06, "loss": 2.4718, "step": 9774 }, { "epoch": 0.5244098712446352, "grad_norm": 0.484375, "learning_rate": 4.926837064858311e-06, "loss": 2.3442, "step": 9775 }, { "epoch": 0.5244635193133047, "grad_norm": 0.42578125, "learning_rate": 4.926816199666239e-06, "loss": 2.0174, "step": 9776 }, { "epoch": 0.5245171673819743, "grad_norm": 0.38671875, "learning_rate": 4.926795331543534e-06, "loss": 2.2319, "step": 9777 }, { "epoch": 0.5245708154506438, "grad_norm": 0.44921875, "learning_rate": 4.926774460490223e-06, "loss": 2.4052, "step": 9778 }, { "epoch": 0.5246244635193134, "grad_norm": 0.462890625, "learning_rate": 4.926753586506329e-06, "loss": 2.3174, "step": 9779 }, { "epoch": 0.5246781115879828, "grad_norm": 0.44921875, "learning_rate": 4.926732709591879e-06, "loss": 2.0791, "step": 9780 }, { "epoch": 0.5247317596566523, "grad_norm": 0.419921875, "learning_rate": 4.926711829746898e-06, "loss": 2.4732, "step": 9781 }, { "epoch": 0.5247854077253219, "grad_norm": 0.455078125, "learning_rate": 4.926690946971409e-06, "loss": 2.2226, "step": 9782 }, { "epoch": 0.5248390557939914, "grad_norm": 0.4296875, "learning_rate": 4.92667006126544e-06, "loss": 2.4169, "step": 9783 }, { "epoch": 0.524892703862661, "grad_norm": 0.39453125, "learning_rate": 4.926649172629015e-06, "loss": 2.2138, "step": 9784 }, { "epoch": 0.5249463519313304, "grad_norm": 0.447265625, "learning_rate": 4.9266282810621595e-06, "loss": 2.4124, "step": 9785 }, { "epoch": 0.525, "grad_norm": 0.5078125, "learning_rate": 4.926607386564898e-06, "loss": 2.4675, "step": 9786 }, { "epoch": 0.5250536480686695, "grad_norm": 0.416015625, "learning_rate": 4.9265864891372574e-06, "loss": 2.3861, "step": 9787 }, { "epoch": 0.5251072961373391, "grad_norm": 0.515625, "learning_rate": 4.926565588779261e-06, "loss": 2.3715, "step": 9788 }, { "epoch": 0.5251609442060086, "grad_norm": 0.486328125, "learning_rate": 4.926544685490935e-06, "loss": 2.3549, "step": 9789 }, { "epoch": 0.5252145922746781, "grad_norm": 0.421875, "learning_rate": 4.926523779272305e-06, "loss": 2.1924, "step": 9790 }, { "epoch": 0.5252682403433476, "grad_norm": 0.45703125, "learning_rate": 4.9265028701233956e-06, "loss": 2.4285, "step": 9791 }, { "epoch": 0.5253218884120172, "grad_norm": 0.412109375, "learning_rate": 4.926481958044231e-06, "loss": 2.3543, "step": 9792 }, { "epoch": 0.5253755364806867, "grad_norm": 0.466796875, "learning_rate": 4.926461043034839e-06, "loss": 2.5347, "step": 9793 }, { "epoch": 0.5254291845493563, "grad_norm": 0.423828125, "learning_rate": 4.926440125095244e-06, "loss": 2.3051, "step": 9794 }, { "epoch": 0.5254828326180258, "grad_norm": 0.408203125, "learning_rate": 4.92641920422547e-06, "loss": 2.3633, "step": 9795 }, { "epoch": 0.5255364806866952, "grad_norm": 0.61328125, "learning_rate": 4.926398280425543e-06, "loss": 2.07, "step": 9796 }, { "epoch": 0.5255901287553648, "grad_norm": 0.50390625, "learning_rate": 4.926377353695489e-06, "loss": 2.1826, "step": 9797 }, { "epoch": 0.5256437768240343, "grad_norm": 0.431640625, "learning_rate": 4.9263564240353326e-06, "loss": 2.2637, "step": 9798 }, { "epoch": 0.5256974248927039, "grad_norm": 0.515625, "learning_rate": 4.926335491445098e-06, "loss": 2.3426, "step": 9799 }, { "epoch": 0.5257510729613734, "grad_norm": 0.400390625, "learning_rate": 4.926314555924813e-06, "loss": 2.2572, "step": 9800 }, { "epoch": 0.5258047210300429, "grad_norm": 0.482421875, "learning_rate": 4.9262936174745e-06, "loss": 2.3891, "step": 9801 }, { "epoch": 0.5258583690987124, "grad_norm": 0.484375, "learning_rate": 4.926272676094187e-06, "loss": 2.2564, "step": 9802 }, { "epoch": 0.525912017167382, "grad_norm": 0.38671875, "learning_rate": 4.926251731783897e-06, "loss": 2.4949, "step": 9803 }, { "epoch": 0.5259656652360515, "grad_norm": 0.365234375, "learning_rate": 4.9262307845436565e-06, "loss": 2.254, "step": 9804 }, { "epoch": 0.5260193133047211, "grad_norm": 0.408203125, "learning_rate": 4.926209834373491e-06, "loss": 2.1894, "step": 9805 }, { "epoch": 0.5260729613733905, "grad_norm": 0.421875, "learning_rate": 4.926188881273426e-06, "loss": 2.2299, "step": 9806 }, { "epoch": 0.5261266094420601, "grad_norm": 0.51953125, "learning_rate": 4.926167925243485e-06, "loss": 2.3171, "step": 9807 }, { "epoch": 0.5261802575107296, "grad_norm": 0.423828125, "learning_rate": 4.926146966283695e-06, "loss": 2.4033, "step": 9808 }, { "epoch": 0.5262339055793992, "grad_norm": 0.365234375, "learning_rate": 4.92612600439408e-06, "loss": 2.3226, "step": 9809 }, { "epoch": 0.5262875536480687, "grad_norm": 0.451171875, "learning_rate": 4.926105039574668e-06, "loss": 2.442, "step": 9810 }, { "epoch": 0.5263412017167381, "grad_norm": 0.55078125, "learning_rate": 4.9260840718254815e-06, "loss": 2.5612, "step": 9811 }, { "epoch": 0.5263948497854077, "grad_norm": 0.443359375, "learning_rate": 4.926063101146547e-06, "loss": 2.333, "step": 9812 }, { "epoch": 0.5264484978540772, "grad_norm": 0.443359375, "learning_rate": 4.926042127537889e-06, "loss": 2.3029, "step": 9813 }, { "epoch": 0.5265021459227468, "grad_norm": 0.462890625, "learning_rate": 4.926021150999533e-06, "loss": 2.2945, "step": 9814 }, { "epoch": 0.5265557939914163, "grad_norm": 0.423828125, "learning_rate": 4.926000171531506e-06, "loss": 2.3054, "step": 9815 }, { "epoch": 0.5266094420600859, "grad_norm": 0.4296875, "learning_rate": 4.925979189133832e-06, "loss": 2.4955, "step": 9816 }, { "epoch": 0.5266630901287553, "grad_norm": 0.37890625, "learning_rate": 4.925958203806536e-06, "loss": 2.3226, "step": 9817 }, { "epoch": 0.5267167381974249, "grad_norm": 0.45703125, "learning_rate": 4.9259372155496445e-06, "loss": 2.2474, "step": 9818 }, { "epoch": 0.5267703862660944, "grad_norm": 0.4609375, "learning_rate": 4.925916224363181e-06, "loss": 2.4113, "step": 9819 }, { "epoch": 0.526824034334764, "grad_norm": 0.380859375, "learning_rate": 4.925895230247173e-06, "loss": 2.0948, "step": 9820 }, { "epoch": 0.5268776824034335, "grad_norm": 0.390625, "learning_rate": 4.925874233201644e-06, "loss": 2.2782, "step": 9821 }, { "epoch": 0.526931330472103, "grad_norm": 0.5234375, "learning_rate": 4.925853233226621e-06, "loss": 2.4174, "step": 9822 }, { "epoch": 0.5269849785407725, "grad_norm": 0.412109375, "learning_rate": 4.925832230322127e-06, "loss": 2.2245, "step": 9823 }, { "epoch": 0.527038626609442, "grad_norm": 0.408203125, "learning_rate": 4.92581122448819e-06, "loss": 2.1925, "step": 9824 }, { "epoch": 0.5270922746781116, "grad_norm": 0.8203125, "learning_rate": 4.925790215724835e-06, "loss": 1.5035, "step": 9825 }, { "epoch": 0.5271459227467811, "grad_norm": 0.470703125, "learning_rate": 4.925769204032086e-06, "loss": 2.0462, "step": 9826 }, { "epoch": 0.5271995708154507, "grad_norm": 0.51953125, "learning_rate": 4.9257481894099685e-06, "loss": 2.5045, "step": 9827 }, { "epoch": 0.5272532188841201, "grad_norm": 0.42578125, "learning_rate": 4.925727171858509e-06, "loss": 2.5312, "step": 9828 }, { "epoch": 0.5273068669527897, "grad_norm": 0.4375, "learning_rate": 4.925706151377732e-06, "loss": 2.319, "step": 9829 }, { "epoch": 0.5273605150214592, "grad_norm": 0.458984375, "learning_rate": 4.925685127967663e-06, "loss": 1.5671, "step": 9830 }, { "epoch": 0.5274141630901288, "grad_norm": 0.515625, "learning_rate": 4.925664101628328e-06, "loss": 2.1151, "step": 9831 }, { "epoch": 0.5274678111587983, "grad_norm": 0.421875, "learning_rate": 4.925643072359751e-06, "loss": 2.0352, "step": 9832 }, { "epoch": 0.5275214592274678, "grad_norm": 0.416015625, "learning_rate": 4.92562204016196e-06, "loss": 2.2009, "step": 9833 }, { "epoch": 0.5275751072961373, "grad_norm": 0.455078125, "learning_rate": 4.9256010050349774e-06, "loss": 2.2741, "step": 9834 }, { "epoch": 0.5276287553648069, "grad_norm": 0.578125, "learning_rate": 4.925579966978831e-06, "loss": 2.3213, "step": 9835 }, { "epoch": 0.5276824034334764, "grad_norm": 0.498046875, "learning_rate": 4.925558925993544e-06, "loss": 2.4248, "step": 9836 }, { "epoch": 0.527736051502146, "grad_norm": 0.58984375, "learning_rate": 4.925537882079143e-06, "loss": 2.1706, "step": 9837 }, { "epoch": 0.5277896995708155, "grad_norm": 0.4453125, "learning_rate": 4.925516835235654e-06, "loss": 2.3978, "step": 9838 }, { "epoch": 0.5278433476394849, "grad_norm": 0.59375, "learning_rate": 4.925495785463102e-06, "loss": 2.3476, "step": 9839 }, { "epoch": 0.5278969957081545, "grad_norm": 0.419921875, "learning_rate": 4.925474732761511e-06, "loss": 2.3546, "step": 9840 }, { "epoch": 0.527950643776824, "grad_norm": 0.412109375, "learning_rate": 4.925453677130909e-06, "loss": 2.1139, "step": 9841 }, { "epoch": 0.5280042918454936, "grad_norm": 0.470703125, "learning_rate": 4.925432618571319e-06, "loss": 2.1379, "step": 9842 }, { "epoch": 0.528057939914163, "grad_norm": 0.447265625, "learning_rate": 4.9254115570827684e-06, "loss": 2.3386, "step": 9843 }, { "epoch": 0.5281115879828326, "grad_norm": 0.55859375, "learning_rate": 4.925390492665281e-06, "loss": 2.5367, "step": 9844 }, { "epoch": 0.5281652360515021, "grad_norm": 0.453125, "learning_rate": 4.925369425318883e-06, "loss": 2.3296, "step": 9845 }, { "epoch": 0.5282188841201717, "grad_norm": 0.498046875, "learning_rate": 4.9253483550436e-06, "loss": 2.0627, "step": 9846 }, { "epoch": 0.5282725321888412, "grad_norm": 0.44140625, "learning_rate": 4.925327281839457e-06, "loss": 2.4964, "step": 9847 }, { "epoch": 0.5283261802575108, "grad_norm": 0.44140625, "learning_rate": 4.92530620570648e-06, "loss": 2.3166, "step": 9848 }, { "epoch": 0.5283798283261802, "grad_norm": 0.49609375, "learning_rate": 4.925285126644694e-06, "loss": 2.0721, "step": 9849 }, { "epoch": 0.5284334763948498, "grad_norm": 0.451171875, "learning_rate": 4.925264044654125e-06, "loss": 2.38, "step": 9850 }, { "epoch": 0.5284871244635193, "grad_norm": 0.380859375, "learning_rate": 4.925242959734798e-06, "loss": 2.4241, "step": 9851 }, { "epoch": 0.5285407725321889, "grad_norm": 0.43359375, "learning_rate": 4.925221871886738e-06, "loss": 2.4042, "step": 9852 }, { "epoch": 0.5285944206008584, "grad_norm": 0.41796875, "learning_rate": 4.925200781109971e-06, "loss": 2.3441, "step": 9853 }, { "epoch": 0.5286480686695278, "grad_norm": 0.404296875, "learning_rate": 4.925179687404523e-06, "loss": 2.2827, "step": 9854 }, { "epoch": 0.5287017167381974, "grad_norm": 0.78515625, "learning_rate": 4.925158590770418e-06, "loss": 2.221, "step": 9855 }, { "epoch": 0.5287553648068669, "grad_norm": 0.39453125, "learning_rate": 4.925137491207683e-06, "loss": 2.4698, "step": 9856 }, { "epoch": 0.5288090128755365, "grad_norm": 0.51171875, "learning_rate": 4.925116388716344e-06, "loss": 2.3985, "step": 9857 }, { "epoch": 0.528862660944206, "grad_norm": 0.45703125, "learning_rate": 4.925095283296423e-06, "loss": 2.4751, "step": 9858 }, { "epoch": 0.5289163090128756, "grad_norm": 0.486328125, "learning_rate": 4.9250741749479495e-06, "loss": 2.425, "step": 9859 }, { "epoch": 0.528969957081545, "grad_norm": 0.427734375, "learning_rate": 4.925053063670947e-06, "loss": 2.5201, "step": 9860 }, { "epoch": 0.5290236051502146, "grad_norm": 0.451171875, "learning_rate": 4.925031949465441e-06, "loss": 2.4645, "step": 9861 }, { "epoch": 0.5290772532188841, "grad_norm": 0.458984375, "learning_rate": 4.925010832331457e-06, "loss": 1.914, "step": 9862 }, { "epoch": 0.5291309012875537, "grad_norm": 0.94140625, "learning_rate": 4.924989712269021e-06, "loss": 2.1505, "step": 9863 }, { "epoch": 0.5291845493562232, "grad_norm": 0.412109375, "learning_rate": 4.9249685892781594e-06, "loss": 2.1547, "step": 9864 }, { "epoch": 0.5292381974248928, "grad_norm": 0.421875, "learning_rate": 4.9249474633588955e-06, "loss": 2.3176, "step": 9865 }, { "epoch": 0.5292918454935622, "grad_norm": 0.365234375, "learning_rate": 4.924926334511257e-06, "loss": 2.1171, "step": 9866 }, { "epoch": 0.5293454935622317, "grad_norm": 0.484375, "learning_rate": 4.924905202735267e-06, "loss": 2.3194, "step": 9867 }, { "epoch": 0.5293991416309013, "grad_norm": 0.4296875, "learning_rate": 4.924884068030954e-06, "loss": 2.1493, "step": 9868 }, { "epoch": 0.5294527896995708, "grad_norm": 0.48046875, "learning_rate": 4.924862930398341e-06, "loss": 2.2219, "step": 9869 }, { "epoch": 0.5295064377682404, "grad_norm": 0.380859375, "learning_rate": 4.924841789837454e-06, "loss": 2.3321, "step": 9870 }, { "epoch": 0.5295600858369098, "grad_norm": 0.40625, "learning_rate": 4.92482064634832e-06, "loss": 2.3697, "step": 9871 }, { "epoch": 0.5296137339055794, "grad_norm": 1.6171875, "learning_rate": 4.924799499930963e-06, "loss": 2.4092, "step": 9872 }, { "epoch": 0.5296673819742489, "grad_norm": 0.455078125, "learning_rate": 4.9247783505854085e-06, "loss": 2.2352, "step": 9873 }, { "epoch": 0.5297210300429185, "grad_norm": 0.498046875, "learning_rate": 4.924757198311683e-06, "loss": 2.3343, "step": 9874 }, { "epoch": 0.529774678111588, "grad_norm": 0.39453125, "learning_rate": 4.924736043109813e-06, "loss": 2.1132, "step": 9875 }, { "epoch": 0.5298283261802575, "grad_norm": 0.41796875, "learning_rate": 4.9247148849798204e-06, "loss": 2.5157, "step": 9876 }, { "epoch": 0.529881974248927, "grad_norm": 0.91796875, "learning_rate": 4.924693723921734e-06, "loss": 2.1891, "step": 9877 }, { "epoch": 0.5299356223175966, "grad_norm": 0.494140625, "learning_rate": 4.924672559935578e-06, "loss": 2.2242, "step": 9878 }, { "epoch": 0.5299892703862661, "grad_norm": 0.42578125, "learning_rate": 4.924651393021379e-06, "loss": 2.2013, "step": 9879 }, { "epoch": 0.5300429184549357, "grad_norm": 0.41015625, "learning_rate": 4.924630223179161e-06, "loss": 2.5697, "step": 9880 }, { "epoch": 0.5300965665236052, "grad_norm": 0.412109375, "learning_rate": 4.9246090504089516e-06, "loss": 2.2534, "step": 9881 }, { "epoch": 0.5301502145922746, "grad_norm": 0.451171875, "learning_rate": 4.924587874710774e-06, "loss": 2.441, "step": 9882 }, { "epoch": 0.5302038626609442, "grad_norm": 0.404296875, "learning_rate": 4.9245666960846564e-06, "loss": 2.3412, "step": 9883 }, { "epoch": 0.5302575107296137, "grad_norm": 0.69140625, "learning_rate": 4.924545514530622e-06, "loss": 2.4537, "step": 9884 }, { "epoch": 0.5303111587982833, "grad_norm": 0.4609375, "learning_rate": 4.924524330048698e-06, "loss": 2.0349, "step": 9885 }, { "epoch": 0.5303648068669528, "grad_norm": 0.46875, "learning_rate": 4.924503142638909e-06, "loss": 2.1507, "step": 9886 }, { "epoch": 0.5304184549356223, "grad_norm": 0.443359375, "learning_rate": 4.92448195230128e-06, "loss": 2.4971, "step": 9887 }, { "epoch": 0.5304721030042918, "grad_norm": 0.40234375, "learning_rate": 4.924460759035839e-06, "loss": 2.3428, "step": 9888 }, { "epoch": 0.5305257510729614, "grad_norm": 0.453125, "learning_rate": 4.924439562842609e-06, "loss": 2.517, "step": 9889 }, { "epoch": 0.5305793991416309, "grad_norm": 0.43359375, "learning_rate": 4.9244183637216175e-06, "loss": 2.2079, "step": 9890 }, { "epoch": 0.5306330472103005, "grad_norm": 0.40625, "learning_rate": 4.924397161672889e-06, "loss": 2.4883, "step": 9891 }, { "epoch": 0.5306866952789699, "grad_norm": 0.453125, "learning_rate": 4.9243759566964485e-06, "loss": 2.4766, "step": 9892 }, { "epoch": 0.5307403433476395, "grad_norm": 0.43359375, "learning_rate": 4.9243547487923235e-06, "loss": 2.2651, "step": 9893 }, { "epoch": 0.530793991416309, "grad_norm": 0.42578125, "learning_rate": 4.924333537960539e-06, "loss": 2.3347, "step": 9894 }, { "epoch": 0.5308476394849786, "grad_norm": 0.3828125, "learning_rate": 4.92431232420112e-06, "loss": 2.1201, "step": 9895 }, { "epoch": 0.5309012875536481, "grad_norm": 0.46484375, "learning_rate": 4.924291107514092e-06, "loss": 2.4076, "step": 9896 }, { "epoch": 0.5309549356223175, "grad_norm": 0.384765625, "learning_rate": 4.924269887899481e-06, "loss": 2.1707, "step": 9897 }, { "epoch": 0.5310085836909871, "grad_norm": 0.39453125, "learning_rate": 4.924248665357312e-06, "loss": 2.2916, "step": 9898 }, { "epoch": 0.5310622317596566, "grad_norm": 0.435546875, "learning_rate": 4.924227439887612e-06, "loss": 2.2651, "step": 9899 }, { "epoch": 0.5311158798283262, "grad_norm": 0.423828125, "learning_rate": 4.924206211490406e-06, "loss": 2.6368, "step": 9900 }, { "epoch": 0.5311695278969957, "grad_norm": 0.4296875, "learning_rate": 4.924184980165719e-06, "loss": 2.0271, "step": 9901 }, { "epoch": 0.5312231759656653, "grad_norm": 0.482421875, "learning_rate": 4.924163745913578e-06, "loss": 2.3549, "step": 9902 }, { "epoch": 0.5312768240343347, "grad_norm": 0.63671875, "learning_rate": 4.924142508734007e-06, "loss": 2.3033, "step": 9903 }, { "epoch": 0.5313304721030043, "grad_norm": 0.490234375, "learning_rate": 4.924121268627033e-06, "loss": 2.3924, "step": 9904 }, { "epoch": 0.5313841201716738, "grad_norm": 0.416015625, "learning_rate": 4.9241000255926795e-06, "loss": 2.3191, "step": 9905 }, { "epoch": 0.5314377682403434, "grad_norm": 0.447265625, "learning_rate": 4.924078779630975e-06, "loss": 2.3555, "step": 9906 }, { "epoch": 0.5314914163090129, "grad_norm": 0.45703125, "learning_rate": 4.924057530741944e-06, "loss": 2.2307, "step": 9907 }, { "epoch": 0.5315450643776825, "grad_norm": 0.44921875, "learning_rate": 4.924036278925612e-06, "loss": 2.4231, "step": 9908 }, { "epoch": 0.5315987124463519, "grad_norm": 0.44921875, "learning_rate": 4.924015024182004e-06, "loss": 2.3599, "step": 9909 }, { "epoch": 0.5316523605150214, "grad_norm": 0.412109375, "learning_rate": 4.923993766511147e-06, "loss": 2.3221, "step": 9910 }, { "epoch": 0.531706008583691, "grad_norm": 0.345703125, "learning_rate": 4.923972505913066e-06, "loss": 1.9992, "step": 9911 }, { "epoch": 0.5317596566523605, "grad_norm": 0.515625, "learning_rate": 4.923951242387787e-06, "loss": 2.4311, "step": 9912 }, { "epoch": 0.5318133047210301, "grad_norm": 0.375, "learning_rate": 4.923929975935334e-06, "loss": 1.7927, "step": 9913 }, { "epoch": 0.5318669527896995, "grad_norm": 0.375, "learning_rate": 4.923908706555735e-06, "loss": 2.1848, "step": 9914 }, { "epoch": 0.5319206008583691, "grad_norm": 0.45703125, "learning_rate": 4.923887434249015e-06, "loss": 2.0305, "step": 9915 }, { "epoch": 0.5319742489270386, "grad_norm": 0.400390625, "learning_rate": 4.923866159015199e-06, "loss": 2.2049, "step": 9916 }, { "epoch": 0.5320278969957082, "grad_norm": 0.66796875, "learning_rate": 4.9238448808543136e-06, "loss": 2.4896, "step": 9917 }, { "epoch": 0.5320815450643777, "grad_norm": 0.353515625, "learning_rate": 4.923823599766384e-06, "loss": 2.0346, "step": 9918 }, { "epoch": 0.5321351931330472, "grad_norm": 0.48046875, "learning_rate": 4.923802315751436e-06, "loss": 2.453, "step": 9919 }, { "epoch": 0.5321888412017167, "grad_norm": 0.48046875, "learning_rate": 4.923781028809495e-06, "loss": 2.2709, "step": 9920 }, { "epoch": 0.5322424892703863, "grad_norm": 0.412109375, "learning_rate": 4.923759738940587e-06, "loss": 2.2613, "step": 9921 }, { "epoch": 0.5322961373390558, "grad_norm": 0.46484375, "learning_rate": 4.923738446144738e-06, "loss": 2.2554, "step": 9922 }, { "epoch": 0.5323497854077254, "grad_norm": 0.423828125, "learning_rate": 4.923717150421973e-06, "loss": 2.318, "step": 9923 }, { "epoch": 0.5324034334763948, "grad_norm": 0.392578125, "learning_rate": 4.923695851772317e-06, "loss": 2.0766, "step": 9924 }, { "epoch": 0.5324570815450643, "grad_norm": 0.4765625, "learning_rate": 4.923674550195799e-06, "loss": 2.185, "step": 9925 }, { "epoch": 0.5325107296137339, "grad_norm": 0.349609375, "learning_rate": 4.9236532456924415e-06, "loss": 2.206, "step": 9926 }, { "epoch": 0.5325643776824034, "grad_norm": 0.4140625, "learning_rate": 4.923631938262271e-06, "loss": 2.1501, "step": 9927 }, { "epoch": 0.532618025751073, "grad_norm": 0.490234375, "learning_rate": 4.923610627905313e-06, "loss": 2.3314, "step": 9928 }, { "epoch": 0.5326716738197425, "grad_norm": 0.458984375, "learning_rate": 4.923589314621595e-06, "loss": 1.8071, "step": 9929 }, { "epoch": 0.532725321888412, "grad_norm": 0.546875, "learning_rate": 4.92356799841114e-06, "loss": 2.1759, "step": 9930 }, { "epoch": 0.5327789699570815, "grad_norm": 0.41796875, "learning_rate": 4.923546679273977e-06, "loss": 2.4524, "step": 9931 }, { "epoch": 0.5328326180257511, "grad_norm": 0.453125, "learning_rate": 4.923525357210129e-06, "loss": 2.4837, "step": 9932 }, { "epoch": 0.5328862660944206, "grad_norm": 1.1640625, "learning_rate": 4.923504032219623e-06, "loss": 2.1973, "step": 9933 }, { "epoch": 0.5329399141630902, "grad_norm": 0.45703125, "learning_rate": 4.923482704302483e-06, "loss": 2.3016, "step": 9934 }, { "epoch": 0.5329935622317596, "grad_norm": 0.4609375, "learning_rate": 4.9234613734587386e-06, "loss": 2.4056, "step": 9935 }, { "epoch": 0.5330472103004292, "grad_norm": 0.423828125, "learning_rate": 4.923440039688412e-06, "loss": 2.3203, "step": 9936 }, { "epoch": 0.5331008583690987, "grad_norm": 0.400390625, "learning_rate": 4.92341870299153e-06, "loss": 2.1552, "step": 9937 }, { "epoch": 0.5331545064377683, "grad_norm": 0.40625, "learning_rate": 4.923397363368118e-06, "loss": 2.4151, "step": 9938 }, { "epoch": 0.5332081545064378, "grad_norm": 0.49609375, "learning_rate": 4.923376020818203e-06, "loss": 2.4637, "step": 9939 }, { "epoch": 0.5332618025751072, "grad_norm": 0.6328125, "learning_rate": 4.92335467534181e-06, "loss": 2.391, "step": 9940 }, { "epoch": 0.5333154506437768, "grad_norm": 0.474609375, "learning_rate": 4.923333326938965e-06, "loss": 2.502, "step": 9941 }, { "epoch": 0.5333690987124463, "grad_norm": 0.427734375, "learning_rate": 4.9233119756096925e-06, "loss": 2.3144, "step": 9942 }, { "epoch": 0.5334227467811159, "grad_norm": 0.4609375, "learning_rate": 4.9232906213540205e-06, "loss": 2.3699, "step": 9943 }, { "epoch": 0.5334763948497854, "grad_norm": 0.4609375, "learning_rate": 4.923269264171973e-06, "loss": 2.2802, "step": 9944 }, { "epoch": 0.533530042918455, "grad_norm": 0.353515625, "learning_rate": 4.923247904063577e-06, "loss": 2.202, "step": 9945 }, { "epoch": 0.5335836909871244, "grad_norm": 0.42578125, "learning_rate": 4.923226541028857e-06, "loss": 2.4551, "step": 9946 }, { "epoch": 0.533637339055794, "grad_norm": 0.734375, "learning_rate": 4.923205175067841e-06, "loss": 2.2686, "step": 9947 }, { "epoch": 0.5336909871244635, "grad_norm": 0.3671875, "learning_rate": 4.923183806180552e-06, "loss": 1.8833, "step": 9948 }, { "epoch": 0.5337446351931331, "grad_norm": 3.578125, "learning_rate": 4.923162434367017e-06, "loss": 2.3553, "step": 9949 }, { "epoch": 0.5337982832618026, "grad_norm": 0.427734375, "learning_rate": 4.923141059627263e-06, "loss": 2.353, "step": 9950 }, { "epoch": 0.5338519313304722, "grad_norm": 0.46875, "learning_rate": 4.923119681961314e-06, "loss": 2.3074, "step": 9951 }, { "epoch": 0.5339055793991416, "grad_norm": 0.462890625, "learning_rate": 4.923098301369197e-06, "loss": 2.338, "step": 9952 }, { "epoch": 0.5339592274678111, "grad_norm": 0.462890625, "learning_rate": 4.923076917850937e-06, "loss": 2.4871, "step": 9953 }, { "epoch": 0.5340128755364807, "grad_norm": 0.431640625, "learning_rate": 4.923055531406561e-06, "loss": 2.2549, "step": 9954 }, { "epoch": 0.5340665236051502, "grad_norm": 0.4375, "learning_rate": 4.9230341420360934e-06, "loss": 2.3595, "step": 9955 }, { "epoch": 0.5341201716738198, "grad_norm": 0.40234375, "learning_rate": 4.923012749739561e-06, "loss": 2.2699, "step": 9956 }, { "epoch": 0.5341738197424892, "grad_norm": 0.392578125, "learning_rate": 4.922991354516989e-06, "loss": 2.3462, "step": 9957 }, { "epoch": 0.5342274678111588, "grad_norm": 0.37109375, "learning_rate": 4.922969956368404e-06, "loss": 2.4502, "step": 9958 }, { "epoch": 0.5342811158798283, "grad_norm": 0.451171875, "learning_rate": 4.9229485552938315e-06, "loss": 2.1964, "step": 9959 }, { "epoch": 0.5343347639484979, "grad_norm": 0.423828125, "learning_rate": 4.922927151293296e-06, "loss": 2.25, "step": 9960 }, { "epoch": 0.5343884120171674, "grad_norm": 0.494140625, "learning_rate": 4.922905744366826e-06, "loss": 2.2462, "step": 9961 }, { "epoch": 0.534442060085837, "grad_norm": 1.4453125, "learning_rate": 4.922884334514445e-06, "loss": 2.4412, "step": 9962 }, { "epoch": 0.5344957081545064, "grad_norm": 0.423828125, "learning_rate": 4.9228629217361805e-06, "loss": 2.227, "step": 9963 }, { "epoch": 0.534549356223176, "grad_norm": 0.46875, "learning_rate": 4.922841506032058e-06, "loss": 2.3362, "step": 9964 }, { "epoch": 0.5346030042918455, "grad_norm": 0.42578125, "learning_rate": 4.922820087402102e-06, "loss": 2.3556, "step": 9965 }, { "epoch": 0.5346566523605151, "grad_norm": 0.419921875, "learning_rate": 4.92279866584634e-06, "loss": 2.4492, "step": 9966 }, { "epoch": 0.5347103004291845, "grad_norm": 0.376953125, "learning_rate": 4.922777241364796e-06, "loss": 2.1604, "step": 9967 }, { "epoch": 0.534763948497854, "grad_norm": 0.6640625, "learning_rate": 4.922755813957499e-06, "loss": 2.2486, "step": 9968 }, { "epoch": 0.5348175965665236, "grad_norm": 0.34375, "learning_rate": 4.922734383624473e-06, "loss": 2.1133, "step": 9969 }, { "epoch": 0.5348712446351931, "grad_norm": 0.466796875, "learning_rate": 4.922712950365742e-06, "loss": 2.3968, "step": 9970 }, { "epoch": 0.5349248927038627, "grad_norm": 0.74609375, "learning_rate": 4.922691514181334e-06, "loss": 2.4334, "step": 9971 }, { "epoch": 0.5349785407725322, "grad_norm": 1.15625, "learning_rate": 4.922670075071276e-06, "loss": 2.3688, "step": 9972 }, { "epoch": 0.5350321888412017, "grad_norm": 0.3515625, "learning_rate": 4.922648633035593e-06, "loss": 2.1604, "step": 9973 }, { "epoch": 0.5350858369098712, "grad_norm": 0.53515625, "learning_rate": 4.9226271880743086e-06, "loss": 2.3275, "step": 9974 }, { "epoch": 0.5351394849785408, "grad_norm": 0.44140625, "learning_rate": 4.922605740187452e-06, "loss": 2.2388, "step": 9975 }, { "epoch": 0.5351931330472103, "grad_norm": 0.423828125, "learning_rate": 4.922584289375046e-06, "loss": 2.0382, "step": 9976 }, { "epoch": 0.5352467811158799, "grad_norm": 0.38671875, "learning_rate": 4.92256283563712e-06, "loss": 2.2806, "step": 9977 }, { "epoch": 0.5353004291845493, "grad_norm": 0.48046875, "learning_rate": 4.922541378973696e-06, "loss": 2.2952, "step": 9978 }, { "epoch": 0.5353540772532189, "grad_norm": 0.498046875, "learning_rate": 4.9225199193848035e-06, "loss": 2.2105, "step": 9979 }, { "epoch": 0.5354077253218884, "grad_norm": 0.4453125, "learning_rate": 4.922498456870467e-06, "loss": 2.2988, "step": 9980 }, { "epoch": 0.535461373390558, "grad_norm": 0.3984375, "learning_rate": 4.922476991430711e-06, "loss": 2.4805, "step": 9981 }, { "epoch": 0.5355150214592275, "grad_norm": 0.6484375, "learning_rate": 4.922455523065564e-06, "loss": 2.2229, "step": 9982 }, { "epoch": 0.535568669527897, "grad_norm": 0.384765625, "learning_rate": 4.92243405177505e-06, "loss": 2.1291, "step": 9983 }, { "epoch": 0.5356223175965665, "grad_norm": 0.4765625, "learning_rate": 4.922412577559196e-06, "loss": 2.5047, "step": 9984 }, { "epoch": 0.535675965665236, "grad_norm": 0.38671875, "learning_rate": 4.922391100418027e-06, "loss": 2.0155, "step": 9985 }, { "epoch": 0.5357296137339056, "grad_norm": 0.45703125, "learning_rate": 4.9223696203515695e-06, "loss": 2.4586, "step": 9986 }, { "epoch": 0.5357832618025751, "grad_norm": 0.5234375, "learning_rate": 4.922348137359849e-06, "loss": 2.3807, "step": 9987 }, { "epoch": 0.5358369098712447, "grad_norm": 0.6484375, "learning_rate": 4.922326651442893e-06, "loss": 2.6271, "step": 9988 }, { "epoch": 0.5358905579399141, "grad_norm": 0.3984375, "learning_rate": 4.922305162600725e-06, "loss": 2.0122, "step": 9989 }, { "epoch": 0.5359442060085837, "grad_norm": 0.42578125, "learning_rate": 4.922283670833373e-06, "loss": 2.3232, "step": 9990 }, { "epoch": 0.5359978540772532, "grad_norm": 0.4765625, "learning_rate": 4.922262176140862e-06, "loss": 2.3492, "step": 9991 }, { "epoch": 0.5360515021459228, "grad_norm": 0.9921875, "learning_rate": 4.922240678523218e-06, "loss": 1.3479, "step": 9992 }, { "epoch": 0.5361051502145923, "grad_norm": 0.419921875, "learning_rate": 4.922219177980467e-06, "loss": 2.2829, "step": 9993 }, { "epoch": 0.5361587982832619, "grad_norm": 0.412109375, "learning_rate": 4.922197674512635e-06, "loss": 2.2576, "step": 9994 }, { "epoch": 0.5362124463519313, "grad_norm": 0.427734375, "learning_rate": 4.9221761681197485e-06, "loss": 2.1258, "step": 9995 }, { "epoch": 0.5362660944206008, "grad_norm": 0.400390625, "learning_rate": 4.9221546588018324e-06, "loss": 2.3063, "step": 9996 }, { "epoch": 0.5363197424892704, "grad_norm": 0.47265625, "learning_rate": 4.922133146558914e-06, "loss": 2.1686, "step": 9997 }, { "epoch": 0.5363733905579399, "grad_norm": 0.36328125, "learning_rate": 4.922111631391018e-06, "loss": 2.3098, "step": 9998 }, { "epoch": 0.5364270386266095, "grad_norm": 0.359375, "learning_rate": 4.92209011329817e-06, "loss": 2.3359, "step": 9999 }, { "epoch": 0.5364806866952789, "grad_norm": 0.50390625, "learning_rate": 4.922068592280398e-06, "loss": 2.5495, "step": 10000 }, { "epoch": 0.5365343347639485, "grad_norm": 0.50390625, "learning_rate": 4.922047068337727e-06, "loss": 2.0196, "step": 10001 }, { "epoch": 0.536587982832618, "grad_norm": 0.392578125, "learning_rate": 4.922025541470182e-06, "loss": 2.2614, "step": 10002 }, { "epoch": 0.5366416309012876, "grad_norm": 0.462890625, "learning_rate": 4.922004011677791e-06, "loss": 2.4084, "step": 10003 }, { "epoch": 0.5366952789699571, "grad_norm": 0.3984375, "learning_rate": 4.921982478960578e-06, "loss": 2.2045, "step": 10004 }, { "epoch": 0.5367489270386266, "grad_norm": 0.443359375, "learning_rate": 4.921960943318571e-06, "loss": 2.3909, "step": 10005 }, { "epoch": 0.5368025751072961, "grad_norm": 1.09375, "learning_rate": 4.921939404751794e-06, "loss": 2.1872, "step": 10006 }, { "epoch": 0.5368562231759657, "grad_norm": 0.51953125, "learning_rate": 4.921917863260274e-06, "loss": 2.3067, "step": 10007 }, { "epoch": 0.5369098712446352, "grad_norm": 0.4765625, "learning_rate": 4.921896318844037e-06, "loss": 2.2617, "step": 10008 }, { "epoch": 0.5369635193133048, "grad_norm": 0.466796875, "learning_rate": 4.921874771503109e-06, "loss": 2.4773, "step": 10009 }, { "epoch": 0.5370171673819742, "grad_norm": 0.46875, "learning_rate": 4.9218532212375155e-06, "loss": 2.5829, "step": 10010 }, { "epoch": 0.5370708154506437, "grad_norm": 0.474609375, "learning_rate": 4.921831668047283e-06, "loss": 2.2248, "step": 10011 }, { "epoch": 0.5371244635193133, "grad_norm": 0.451171875, "learning_rate": 4.9218101119324376e-06, "loss": 2.2907, "step": 10012 }, { "epoch": 0.5371781115879828, "grad_norm": 0.94140625, "learning_rate": 4.921788552893006e-06, "loss": 2.2444, "step": 10013 }, { "epoch": 0.5372317596566524, "grad_norm": 0.388671875, "learning_rate": 4.921766990929013e-06, "loss": 2.3121, "step": 10014 }, { "epoch": 0.5372854077253219, "grad_norm": 0.42578125, "learning_rate": 4.921745426040485e-06, "loss": 2.2169, "step": 10015 }, { "epoch": 0.5373390557939914, "grad_norm": 0.416015625, "learning_rate": 4.9217238582274484e-06, "loss": 2.3065, "step": 10016 }, { "epoch": 0.5373927038626609, "grad_norm": 0.447265625, "learning_rate": 4.921702287489928e-06, "loss": 1.8805, "step": 10017 }, { "epoch": 0.5374463519313305, "grad_norm": 0.408203125, "learning_rate": 4.921680713827952e-06, "loss": 2.1403, "step": 10018 }, { "epoch": 0.5375, "grad_norm": 0.390625, "learning_rate": 4.921659137241544e-06, "loss": 2.2781, "step": 10019 }, { "epoch": 0.5375536480686696, "grad_norm": 0.4140625, "learning_rate": 4.921637557730733e-06, "loss": 2.3227, "step": 10020 }, { "epoch": 0.537607296137339, "grad_norm": 0.54296875, "learning_rate": 4.921615975295543e-06, "loss": 2.2092, "step": 10021 }, { "epoch": 0.5376609442060086, "grad_norm": 0.50390625, "learning_rate": 4.921594389936e-06, "loss": 2.2828, "step": 10022 }, { "epoch": 0.5377145922746781, "grad_norm": 0.4609375, "learning_rate": 4.92157280165213e-06, "loss": 2.1675, "step": 10023 }, { "epoch": 0.5377682403433477, "grad_norm": 0.455078125, "learning_rate": 4.921551210443961e-06, "loss": 2.2973, "step": 10024 }, { "epoch": 0.5378218884120172, "grad_norm": 0.4921875, "learning_rate": 4.921529616311517e-06, "loss": 2.106, "step": 10025 }, { "epoch": 0.5378755364806866, "grad_norm": 0.3359375, "learning_rate": 4.9215080192548246e-06, "loss": 2.1998, "step": 10026 }, { "epoch": 0.5379291845493562, "grad_norm": 0.419921875, "learning_rate": 4.9214864192739095e-06, "loss": 2.123, "step": 10027 }, { "epoch": 0.5379828326180257, "grad_norm": 0.55078125, "learning_rate": 4.9214648163687995e-06, "loss": 2.5795, "step": 10028 }, { "epoch": 0.5380364806866953, "grad_norm": 0.439453125, "learning_rate": 4.921443210539519e-06, "loss": 2.0183, "step": 10029 }, { "epoch": 0.5380901287553648, "grad_norm": 0.37109375, "learning_rate": 4.921421601786095e-06, "loss": 2.3056, "step": 10030 }, { "epoch": 0.5381437768240344, "grad_norm": 0.478515625, "learning_rate": 4.9213999901085525e-06, "loss": 2.5013, "step": 10031 }, { "epoch": 0.5381974248927038, "grad_norm": 0.40625, "learning_rate": 4.921378375506919e-06, "loss": 2.2297, "step": 10032 }, { "epoch": 0.5382510729613734, "grad_norm": 0.4296875, "learning_rate": 4.92135675798122e-06, "loss": 2.008, "step": 10033 }, { "epoch": 0.5383047210300429, "grad_norm": 0.486328125, "learning_rate": 4.92133513753148e-06, "loss": 2.311, "step": 10034 }, { "epoch": 0.5383583690987125, "grad_norm": 0.5390625, "learning_rate": 4.921313514157727e-06, "loss": 2.2971, "step": 10035 }, { "epoch": 0.538412017167382, "grad_norm": 0.431640625, "learning_rate": 4.921291887859988e-06, "loss": 2.2749, "step": 10036 }, { "epoch": 0.5384656652360515, "grad_norm": 0.4375, "learning_rate": 4.921270258638287e-06, "loss": 2.1596, "step": 10037 }, { "epoch": 0.538519313304721, "grad_norm": 0.43359375, "learning_rate": 4.921248626492651e-06, "loss": 2.3184, "step": 10038 }, { "epoch": 0.5385729613733906, "grad_norm": 0.416015625, "learning_rate": 4.921226991423106e-06, "loss": 2.3577, "step": 10039 }, { "epoch": 0.5386266094420601, "grad_norm": 0.478515625, "learning_rate": 4.9212053534296785e-06, "loss": 2.3138, "step": 10040 }, { "epoch": 0.5386802575107296, "grad_norm": 0.44140625, "learning_rate": 4.921183712512395e-06, "loss": 2.1248, "step": 10041 }, { "epoch": 0.5387339055793992, "grad_norm": 0.447265625, "learning_rate": 4.92116206867128e-06, "loss": 2.2984, "step": 10042 }, { "epoch": 0.5387875536480686, "grad_norm": 0.404296875, "learning_rate": 4.921140421906361e-06, "loss": 2.2129, "step": 10043 }, { "epoch": 0.5388412017167382, "grad_norm": 0.8046875, "learning_rate": 4.921118772217663e-06, "loss": 2.4248, "step": 10044 }, { "epoch": 0.5388948497854077, "grad_norm": 0.49609375, "learning_rate": 4.921097119605214e-06, "loss": 1.7029, "step": 10045 }, { "epoch": 0.5389484978540773, "grad_norm": 0.5859375, "learning_rate": 4.921075464069038e-06, "loss": 2.3957, "step": 10046 }, { "epoch": 0.5390021459227468, "grad_norm": 0.51171875, "learning_rate": 4.9210538056091635e-06, "loss": 2.5276, "step": 10047 }, { "epoch": 0.5390557939914163, "grad_norm": 0.41015625, "learning_rate": 4.9210321442256145e-06, "loss": 2.2477, "step": 10048 }, { "epoch": 0.5391094420600858, "grad_norm": 0.79296875, "learning_rate": 4.921010479918418e-06, "loss": 1.842, "step": 10049 }, { "epoch": 0.5391630901287554, "grad_norm": 0.5, "learning_rate": 4.920988812687601e-06, "loss": 2.648, "step": 10050 }, { "epoch": 0.5392167381974249, "grad_norm": 0.37890625, "learning_rate": 4.920967142533187e-06, "loss": 2.1015, "step": 10051 }, { "epoch": 0.5392703862660945, "grad_norm": 0.578125, "learning_rate": 4.920945469455206e-06, "loss": 2.3495, "step": 10052 }, { "epoch": 0.539324034334764, "grad_norm": 0.404296875, "learning_rate": 4.920923793453681e-06, "loss": 2.4337, "step": 10053 }, { "epoch": 0.5393776824034334, "grad_norm": 0.52734375, "learning_rate": 4.92090211452864e-06, "loss": 2.108, "step": 10054 }, { "epoch": 0.539431330472103, "grad_norm": 0.47265625, "learning_rate": 4.920880432680107e-06, "loss": 2.4204, "step": 10055 }, { "epoch": 0.5394849785407725, "grad_norm": 0.35546875, "learning_rate": 4.920858747908112e-06, "loss": 2.2159, "step": 10056 }, { "epoch": 0.5395386266094421, "grad_norm": 0.42578125, "learning_rate": 4.920837060212678e-06, "loss": 2.2511, "step": 10057 }, { "epoch": 0.5395922746781115, "grad_norm": 0.373046875, "learning_rate": 4.920815369593832e-06, "loss": 2.1048, "step": 10058 }, { "epoch": 0.5396459227467811, "grad_norm": 0.48828125, "learning_rate": 4.9207936760515994e-06, "loss": 2.3037, "step": 10059 }, { "epoch": 0.5396995708154506, "grad_norm": 0.486328125, "learning_rate": 4.920771979586008e-06, "loss": 2.3603, "step": 10060 }, { "epoch": 0.5397532188841202, "grad_norm": 0.72265625, "learning_rate": 4.920750280197084e-06, "loss": 2.4734, "step": 10061 }, { "epoch": 0.5398068669527897, "grad_norm": 0.48828125, "learning_rate": 4.920728577884852e-06, "loss": 2.3646, "step": 10062 }, { "epoch": 0.5398605150214593, "grad_norm": 0.419921875, "learning_rate": 4.920706872649339e-06, "loss": 2.1325, "step": 10063 }, { "epoch": 0.5399141630901287, "grad_norm": 0.404296875, "learning_rate": 4.920685164490572e-06, "loss": 2.3592, "step": 10064 }, { "epoch": 0.5399678111587983, "grad_norm": 0.412109375, "learning_rate": 4.920663453408576e-06, "loss": 2.2217, "step": 10065 }, { "epoch": 0.5400214592274678, "grad_norm": 0.4296875, "learning_rate": 4.920641739403378e-06, "loss": 2.3633, "step": 10066 }, { "epoch": 0.5400751072961374, "grad_norm": 0.494140625, "learning_rate": 4.920620022475004e-06, "loss": 2.2654, "step": 10067 }, { "epoch": 0.5401287553648069, "grad_norm": 0.42578125, "learning_rate": 4.9205983026234806e-06, "loss": 2.3933, "step": 10068 }, { "epoch": 0.5401824034334763, "grad_norm": 0.38671875, "learning_rate": 4.920576579848833e-06, "loss": 2.2828, "step": 10069 }, { "epoch": 0.5402360515021459, "grad_norm": 0.578125, "learning_rate": 4.920554854151088e-06, "loss": 2.2971, "step": 10070 }, { "epoch": 0.5402896995708154, "grad_norm": 0.46875, "learning_rate": 4.920533125530272e-06, "loss": 2.2938, "step": 10071 }, { "epoch": 0.540343347639485, "grad_norm": 0.462890625, "learning_rate": 4.920511393986411e-06, "loss": 2.4649, "step": 10072 }, { "epoch": 0.5403969957081545, "grad_norm": 0.4765625, "learning_rate": 4.9204896595195315e-06, "loss": 2.281, "step": 10073 }, { "epoch": 0.5404506437768241, "grad_norm": 0.44140625, "learning_rate": 4.92046792212966e-06, "loss": 2.1257, "step": 10074 }, { "epoch": 0.5405042918454935, "grad_norm": 0.390625, "learning_rate": 4.920446181816821e-06, "loss": 2.4108, "step": 10075 }, { "epoch": 0.5405579399141631, "grad_norm": 0.44140625, "learning_rate": 4.920424438581044e-06, "loss": 2.2979, "step": 10076 }, { "epoch": 0.5406115879828326, "grad_norm": 0.41796875, "learning_rate": 4.920402692422352e-06, "loss": 2.222, "step": 10077 }, { "epoch": 0.5406652360515022, "grad_norm": 0.51171875, "learning_rate": 4.920380943340774e-06, "loss": 2.3455, "step": 10078 }, { "epoch": 0.5407188841201717, "grad_norm": 0.3359375, "learning_rate": 4.920359191336333e-06, "loss": 2.0294, "step": 10079 }, { "epoch": 0.5407725321888412, "grad_norm": 0.65234375, "learning_rate": 4.920337436409059e-06, "loss": 2.2903, "step": 10080 }, { "epoch": 0.5408261802575107, "grad_norm": 0.71484375, "learning_rate": 4.920315678558975e-06, "loss": 2.3678, "step": 10081 }, { "epoch": 0.5408798283261803, "grad_norm": 0.44140625, "learning_rate": 4.92029391778611e-06, "loss": 2.2144, "step": 10082 }, { "epoch": 0.5409334763948498, "grad_norm": 0.46484375, "learning_rate": 4.920272154090488e-06, "loss": 2.2531, "step": 10083 }, { "epoch": 0.5409871244635193, "grad_norm": 0.416015625, "learning_rate": 4.920250387472137e-06, "loss": 2.1894, "step": 10084 }, { "epoch": 0.5410407725321889, "grad_norm": 0.458984375, "learning_rate": 4.920228617931082e-06, "loss": 2.396, "step": 10085 }, { "epoch": 0.5410944206008583, "grad_norm": 0.46875, "learning_rate": 4.9202068454673505e-06, "loss": 2.1505, "step": 10086 }, { "epoch": 0.5411480686695279, "grad_norm": 0.40625, "learning_rate": 4.920185070080967e-06, "loss": 2.2435, "step": 10087 }, { "epoch": 0.5412017167381974, "grad_norm": 0.423828125, "learning_rate": 4.92016329177196e-06, "loss": 2.2449, "step": 10088 }, { "epoch": 0.541255364806867, "grad_norm": 0.8359375, "learning_rate": 4.920141510540354e-06, "loss": 2.222, "step": 10089 }, { "epoch": 0.5413090128755365, "grad_norm": 0.435546875, "learning_rate": 4.920119726386177e-06, "loss": 1.6362, "step": 10090 }, { "epoch": 0.541362660944206, "grad_norm": 0.466796875, "learning_rate": 4.920097939309454e-06, "loss": 2.4392, "step": 10091 }, { "epoch": 0.5414163090128755, "grad_norm": 0.4296875, "learning_rate": 4.920076149310211e-06, "loss": 2.1255, "step": 10092 }, { "epoch": 0.5414699570815451, "grad_norm": 0.4609375, "learning_rate": 4.920054356388475e-06, "loss": 2.4702, "step": 10093 }, { "epoch": 0.5415236051502146, "grad_norm": 0.5390625, "learning_rate": 4.920032560544274e-06, "loss": 2.2034, "step": 10094 }, { "epoch": 0.5415772532188842, "grad_norm": 0.400390625, "learning_rate": 4.92001076177763e-06, "loss": 2.5093, "step": 10095 }, { "epoch": 0.5416309012875536, "grad_norm": 0.44140625, "learning_rate": 4.919988960088574e-06, "loss": 2.5057, "step": 10096 }, { "epoch": 0.5416845493562231, "grad_norm": 0.46484375, "learning_rate": 4.919967155477129e-06, "loss": 2.2028, "step": 10097 }, { "epoch": 0.5417381974248927, "grad_norm": 0.380859375, "learning_rate": 4.919945347943323e-06, "loss": 2.2103, "step": 10098 }, { "epoch": 0.5417918454935622, "grad_norm": 0.384765625, "learning_rate": 4.919923537487182e-06, "loss": 2.1766, "step": 10099 }, { "epoch": 0.5418454935622318, "grad_norm": 0.3828125, "learning_rate": 4.919901724108734e-06, "loss": 2.3393, "step": 10100 }, { "epoch": 0.5418991416309012, "grad_norm": 0.40625, "learning_rate": 4.919879907808001e-06, "loss": 2.1602, "step": 10101 }, { "epoch": 0.5419527896995708, "grad_norm": 0.4140625, "learning_rate": 4.919858088585012e-06, "loss": 2.2466, "step": 10102 }, { "epoch": 0.5420064377682403, "grad_norm": 0.404296875, "learning_rate": 4.919836266439795e-06, "loss": 2.1679, "step": 10103 }, { "epoch": 0.5420600858369099, "grad_norm": 0.48828125, "learning_rate": 4.9198144413723735e-06, "loss": 2.4745, "step": 10104 }, { "epoch": 0.5421137339055794, "grad_norm": 0.58203125, "learning_rate": 4.919792613382776e-06, "loss": 2.2847, "step": 10105 }, { "epoch": 0.542167381974249, "grad_norm": 0.357421875, "learning_rate": 4.9197707824710265e-06, "loss": 2.4205, "step": 10106 }, { "epoch": 0.5422210300429184, "grad_norm": 0.41796875, "learning_rate": 4.919748948637153e-06, "loss": 2.5977, "step": 10107 }, { "epoch": 0.542274678111588, "grad_norm": 0.412109375, "learning_rate": 4.919727111881183e-06, "loss": 2.4585, "step": 10108 }, { "epoch": 0.5423283261802575, "grad_norm": 0.51171875, "learning_rate": 4.91970527220314e-06, "loss": 2.4405, "step": 10109 }, { "epoch": 0.5423819742489271, "grad_norm": 0.42578125, "learning_rate": 4.919683429603052e-06, "loss": 2.3865, "step": 10110 }, { "epoch": 0.5424356223175966, "grad_norm": 0.435546875, "learning_rate": 4.919661584080946e-06, "loss": 2.1461, "step": 10111 }, { "epoch": 0.542489270386266, "grad_norm": 0.734375, "learning_rate": 4.919639735636848e-06, "loss": 2.1581, "step": 10112 }, { "epoch": 0.5425429184549356, "grad_norm": 0.33203125, "learning_rate": 4.919617884270782e-06, "loss": 2.0338, "step": 10113 }, { "epoch": 0.5425965665236051, "grad_norm": 0.41015625, "learning_rate": 4.919596029982779e-06, "loss": 2.2717, "step": 10114 }, { "epoch": 0.5426502145922747, "grad_norm": 0.4765625, "learning_rate": 4.919574172772861e-06, "loss": 2.2559, "step": 10115 }, { "epoch": 0.5427038626609442, "grad_norm": 0.46875, "learning_rate": 4.919552312641056e-06, "loss": 2.0707, "step": 10116 }, { "epoch": 0.5427575107296138, "grad_norm": 0.421875, "learning_rate": 4.919530449587391e-06, "loss": 2.4284, "step": 10117 }, { "epoch": 0.5428111587982832, "grad_norm": 0.419921875, "learning_rate": 4.919508583611892e-06, "loss": 2.0386, "step": 10118 }, { "epoch": 0.5428648068669528, "grad_norm": 0.42578125, "learning_rate": 4.919486714714585e-06, "loss": 2.1845, "step": 10119 }, { "epoch": 0.5429184549356223, "grad_norm": 0.404296875, "learning_rate": 4.919464842895497e-06, "loss": 2.0033, "step": 10120 }, { "epoch": 0.5429721030042919, "grad_norm": 0.435546875, "learning_rate": 4.919442968154655e-06, "loss": 2.2771, "step": 10121 }, { "epoch": 0.5430257510729614, "grad_norm": 0.400390625, "learning_rate": 4.919421090492084e-06, "loss": 2.1587, "step": 10122 }, { "epoch": 0.543079399141631, "grad_norm": 0.50390625, "learning_rate": 4.919399209907811e-06, "loss": 2.2916, "step": 10123 }, { "epoch": 0.5431330472103004, "grad_norm": 0.55859375, "learning_rate": 4.919377326401862e-06, "loss": 2.2104, "step": 10124 }, { "epoch": 0.54318669527897, "grad_norm": 0.466796875, "learning_rate": 4.919355439974265e-06, "loss": 2.4104, "step": 10125 }, { "epoch": 0.5432403433476395, "grad_norm": 0.66796875, "learning_rate": 4.919333550625044e-06, "loss": 2.5376, "step": 10126 }, { "epoch": 0.543293991416309, "grad_norm": 0.5078125, "learning_rate": 4.919311658354228e-06, "loss": 2.4232, "step": 10127 }, { "epoch": 0.5433476394849786, "grad_norm": 0.40234375, "learning_rate": 4.919289763161841e-06, "loss": 1.7676, "step": 10128 }, { "epoch": 0.543401287553648, "grad_norm": 0.43359375, "learning_rate": 4.919267865047911e-06, "loss": 2.3589, "step": 10129 }, { "epoch": 0.5434549356223176, "grad_norm": 0.388671875, "learning_rate": 4.919245964012464e-06, "loss": 2.284, "step": 10130 }, { "epoch": 0.5435085836909871, "grad_norm": 0.462890625, "learning_rate": 4.919224060055527e-06, "loss": 2.3483, "step": 10131 }, { "epoch": 0.5435622317596567, "grad_norm": 0.451171875, "learning_rate": 4.9192021531771255e-06, "loss": 2.4118, "step": 10132 }, { "epoch": 0.5436158798283262, "grad_norm": 0.494140625, "learning_rate": 4.919180243377286e-06, "loss": 2.2642, "step": 10133 }, { "epoch": 0.5436695278969957, "grad_norm": 0.33203125, "learning_rate": 4.919158330656037e-06, "loss": 2.0956, "step": 10134 }, { "epoch": 0.5437231759656652, "grad_norm": 1.046875, "learning_rate": 4.9191364150134014e-06, "loss": 2.3875, "step": 10135 }, { "epoch": 0.5437768240343348, "grad_norm": 0.4296875, "learning_rate": 4.919114496449409e-06, "loss": 2.2892, "step": 10136 }, { "epoch": 0.5438304721030043, "grad_norm": 0.390625, "learning_rate": 4.919092574964084e-06, "loss": 2.0849, "step": 10137 }, { "epoch": 0.5438841201716739, "grad_norm": 0.486328125, "learning_rate": 4.9190706505574545e-06, "loss": 2.3678, "step": 10138 }, { "epoch": 0.5439377682403433, "grad_norm": 0.44921875, "learning_rate": 4.9190487232295456e-06, "loss": 2.3053, "step": 10139 }, { "epoch": 0.5439914163090128, "grad_norm": 0.435546875, "learning_rate": 4.919026792980385e-06, "loss": 2.4208, "step": 10140 }, { "epoch": 0.5440450643776824, "grad_norm": 0.37890625, "learning_rate": 4.919004859809998e-06, "loss": 2.5547, "step": 10141 }, { "epoch": 0.5440987124463519, "grad_norm": 0.53125, "learning_rate": 4.918982923718412e-06, "loss": 2.1805, "step": 10142 }, { "epoch": 0.5441523605150215, "grad_norm": 0.466796875, "learning_rate": 4.918960984705653e-06, "loss": 2.2761, "step": 10143 }, { "epoch": 0.544206008583691, "grad_norm": 0.640625, "learning_rate": 4.918939042771747e-06, "loss": 2.3511, "step": 10144 }, { "epoch": 0.5442596566523605, "grad_norm": 0.4140625, "learning_rate": 4.918917097916722e-06, "loss": 2.1664, "step": 10145 }, { "epoch": 0.54431330472103, "grad_norm": 0.67578125, "learning_rate": 4.918895150140603e-06, "loss": 2.3157, "step": 10146 }, { "epoch": 0.5443669527896996, "grad_norm": 0.51171875, "learning_rate": 4.918873199443418e-06, "loss": 1.8681, "step": 10147 }, { "epoch": 0.5444206008583691, "grad_norm": 12.875, "learning_rate": 4.918851245825193e-06, "loss": 2.2125, "step": 10148 }, { "epoch": 0.5444742489270387, "grad_norm": 0.408203125, "learning_rate": 4.9188292892859525e-06, "loss": 2.3037, "step": 10149 }, { "epoch": 0.5445278969957081, "grad_norm": 0.439453125, "learning_rate": 4.918807329825726e-06, "loss": 2.3232, "step": 10150 }, { "epoch": 0.5445815450643777, "grad_norm": 0.40234375, "learning_rate": 4.918785367444538e-06, "loss": 2.4529, "step": 10151 }, { "epoch": 0.5446351931330472, "grad_norm": 0.474609375, "learning_rate": 4.918763402142416e-06, "loss": 2.364, "step": 10152 }, { "epoch": 0.5446888412017168, "grad_norm": 0.458984375, "learning_rate": 4.918741433919386e-06, "loss": 2.2149, "step": 10153 }, { "epoch": 0.5447424892703863, "grad_norm": 0.408203125, "learning_rate": 4.918719462775475e-06, "loss": 2.4452, "step": 10154 }, { "epoch": 0.5447961373390557, "grad_norm": 0.375, "learning_rate": 4.91869748871071e-06, "loss": 2.1509, "step": 10155 }, { "epoch": 0.5448497854077253, "grad_norm": 0.421875, "learning_rate": 4.918675511725116e-06, "loss": 2.4653, "step": 10156 }, { "epoch": 0.5449034334763948, "grad_norm": 1.328125, "learning_rate": 4.918653531818721e-06, "loss": 2.2838, "step": 10157 }, { "epoch": 0.5449570815450644, "grad_norm": 0.82421875, "learning_rate": 4.918631548991551e-06, "loss": 2.2264, "step": 10158 }, { "epoch": 0.5450107296137339, "grad_norm": 0.4140625, "learning_rate": 4.918609563243631e-06, "loss": 2.0218, "step": 10159 }, { "epoch": 0.5450643776824035, "grad_norm": 0.421875, "learning_rate": 4.91858757457499e-06, "loss": 2.2962, "step": 10160 }, { "epoch": 0.5451180257510729, "grad_norm": 0.44921875, "learning_rate": 4.918565582985654e-06, "loss": 2.1493, "step": 10161 }, { "epoch": 0.5451716738197425, "grad_norm": 0.451171875, "learning_rate": 4.9185435884756485e-06, "loss": 2.3404, "step": 10162 }, { "epoch": 0.545225321888412, "grad_norm": 0.388671875, "learning_rate": 4.918521591045002e-06, "loss": 2.0654, "step": 10163 }, { "epoch": 0.5452789699570816, "grad_norm": 0.4453125, "learning_rate": 4.918499590693738e-06, "loss": 2.363, "step": 10164 }, { "epoch": 0.5453326180257511, "grad_norm": 0.462890625, "learning_rate": 4.918477587421885e-06, "loss": 1.9739, "step": 10165 }, { "epoch": 0.5453862660944206, "grad_norm": 0.376953125, "learning_rate": 4.91845558122947e-06, "loss": 2.4167, "step": 10166 }, { "epoch": 0.5454399141630901, "grad_norm": 0.4140625, "learning_rate": 4.918433572116519e-06, "loss": 2.4655, "step": 10167 }, { "epoch": 0.5454935622317597, "grad_norm": 0.6015625, "learning_rate": 4.918411560083058e-06, "loss": 1.615, "step": 10168 }, { "epoch": 0.5455472103004292, "grad_norm": 0.435546875, "learning_rate": 4.918389545129116e-06, "loss": 2.3658, "step": 10169 }, { "epoch": 0.5456008583690987, "grad_norm": 0.41796875, "learning_rate": 4.918367527254716e-06, "loss": 2.2719, "step": 10170 }, { "epoch": 0.5456545064377682, "grad_norm": 0.80859375, "learning_rate": 4.918345506459886e-06, "loss": 2.2568, "step": 10171 }, { "epoch": 0.5457081545064377, "grad_norm": 0.365234375, "learning_rate": 4.918323482744653e-06, "loss": 2.2493, "step": 10172 }, { "epoch": 0.5457618025751073, "grad_norm": 0.5078125, "learning_rate": 4.918301456109045e-06, "loss": 2.3739, "step": 10173 }, { "epoch": 0.5458154506437768, "grad_norm": 0.54296875, "learning_rate": 4.918279426553087e-06, "loss": 2.2207, "step": 10174 }, { "epoch": 0.5458690987124464, "grad_norm": 0.5234375, "learning_rate": 4.918257394076804e-06, "loss": 2.2448, "step": 10175 }, { "epoch": 0.5459227467811159, "grad_norm": 0.419921875, "learning_rate": 4.918235358680225e-06, "loss": 2.3899, "step": 10176 }, { "epoch": 0.5459763948497854, "grad_norm": 0.423828125, "learning_rate": 4.918213320363376e-06, "loss": 2.0611, "step": 10177 }, { "epoch": 0.5460300429184549, "grad_norm": 0.443359375, "learning_rate": 4.918191279126283e-06, "loss": 2.4359, "step": 10178 }, { "epoch": 0.5460836909871245, "grad_norm": 0.412109375, "learning_rate": 4.918169234968975e-06, "loss": 2.3437, "step": 10179 }, { "epoch": 0.546137339055794, "grad_norm": 0.470703125, "learning_rate": 4.918147187891476e-06, "loss": 2.0128, "step": 10180 }, { "epoch": 0.5461909871244636, "grad_norm": 0.400390625, "learning_rate": 4.9181251378938125e-06, "loss": 2.2897, "step": 10181 }, { "epoch": 0.546244635193133, "grad_norm": 0.4609375, "learning_rate": 4.918103084976012e-06, "loss": 2.1618, "step": 10182 }, { "epoch": 0.5462982832618025, "grad_norm": 0.455078125, "learning_rate": 4.918081029138101e-06, "loss": 2.2537, "step": 10183 }, { "epoch": 0.5463519313304721, "grad_norm": 0.498046875, "learning_rate": 4.918058970380107e-06, "loss": 2.3801, "step": 10184 }, { "epoch": 0.5464055793991416, "grad_norm": 0.40625, "learning_rate": 4.918036908702056e-06, "loss": 2.1974, "step": 10185 }, { "epoch": 0.5464592274678112, "grad_norm": 0.4921875, "learning_rate": 4.918014844103974e-06, "loss": 1.8793, "step": 10186 }, { "epoch": 0.5465128755364806, "grad_norm": 0.3203125, "learning_rate": 4.9179927765858895e-06, "loss": 2.1495, "step": 10187 }, { "epoch": 0.5465665236051502, "grad_norm": 0.3984375, "learning_rate": 4.917970706147826e-06, "loss": 2.3742, "step": 10188 }, { "epoch": 0.5466201716738197, "grad_norm": 0.431640625, "learning_rate": 4.917948632789814e-06, "loss": 2.2658, "step": 10189 }, { "epoch": 0.5466738197424893, "grad_norm": 0.404296875, "learning_rate": 4.917926556511877e-06, "loss": 2.3894, "step": 10190 }, { "epoch": 0.5467274678111588, "grad_norm": 0.419921875, "learning_rate": 4.9179044773140425e-06, "loss": 1.6453, "step": 10191 }, { "epoch": 0.5467811158798284, "grad_norm": 0.376953125, "learning_rate": 4.917882395196339e-06, "loss": 2.2102, "step": 10192 }, { "epoch": 0.5468347639484978, "grad_norm": 0.4453125, "learning_rate": 4.917860310158789e-06, "loss": 2.4419, "step": 10193 }, { "epoch": 0.5468884120171674, "grad_norm": 0.458984375, "learning_rate": 4.917838222201424e-06, "loss": 2.1636, "step": 10194 }, { "epoch": 0.5469420600858369, "grad_norm": 0.412109375, "learning_rate": 4.917816131324269e-06, "loss": 2.1927, "step": 10195 }, { "epoch": 0.5469957081545065, "grad_norm": 0.46875, "learning_rate": 4.917794037527348e-06, "loss": 2.367, "step": 10196 }, { "epoch": 0.547049356223176, "grad_norm": 0.41796875, "learning_rate": 4.917771940810692e-06, "loss": 2.3051, "step": 10197 }, { "epoch": 0.5471030042918454, "grad_norm": 0.478515625, "learning_rate": 4.917749841174324e-06, "loss": 2.3277, "step": 10198 }, { "epoch": 0.547156652360515, "grad_norm": 1.4609375, "learning_rate": 4.917727738618273e-06, "loss": 2.259, "step": 10199 }, { "epoch": 0.5472103004291845, "grad_norm": 0.439453125, "learning_rate": 4.917705633142564e-06, "loss": 2.1745, "step": 10200 }, { "epoch": 0.5472639484978541, "grad_norm": 0.53515625, "learning_rate": 4.917683524747226e-06, "loss": 2.0905, "step": 10201 }, { "epoch": 0.5473175965665236, "grad_norm": 0.55859375, "learning_rate": 4.917661413432283e-06, "loss": 2.5256, "step": 10202 }, { "epoch": 0.5473712446351932, "grad_norm": 0.412109375, "learning_rate": 4.917639299197764e-06, "loss": 2.4257, "step": 10203 }, { "epoch": 0.5474248927038626, "grad_norm": 0.4765625, "learning_rate": 4.917617182043695e-06, "loss": 2.2203, "step": 10204 }, { "epoch": 0.5474785407725322, "grad_norm": 0.384765625, "learning_rate": 4.917595061970102e-06, "loss": 1.9931, "step": 10205 }, { "epoch": 0.5475321888412017, "grad_norm": 0.4296875, "learning_rate": 4.917572938977011e-06, "loss": 2.0413, "step": 10206 }, { "epoch": 0.5475858369098713, "grad_norm": 0.39453125, "learning_rate": 4.917550813064451e-06, "loss": 2.2652, "step": 10207 }, { "epoch": 0.5476394849785408, "grad_norm": 0.455078125, "learning_rate": 4.917528684232448e-06, "loss": 2.2998, "step": 10208 }, { "epoch": 0.5476931330472103, "grad_norm": 0.4140625, "learning_rate": 4.9175065524810274e-06, "loss": 2.2041, "step": 10209 }, { "epoch": 0.5477467811158798, "grad_norm": 0.41796875, "learning_rate": 4.917484417810216e-06, "loss": 2.3495, "step": 10210 }, { "epoch": 0.5478004291845494, "grad_norm": 0.447265625, "learning_rate": 4.917462280220043e-06, "loss": 2.2353, "step": 10211 }, { "epoch": 0.5478540772532189, "grad_norm": 0.478515625, "learning_rate": 4.917440139710533e-06, "loss": 1.8533, "step": 10212 }, { "epoch": 0.5479077253218884, "grad_norm": 0.3359375, "learning_rate": 4.917417996281714e-06, "loss": 2.0797, "step": 10213 }, { "epoch": 0.547961373390558, "grad_norm": 0.3671875, "learning_rate": 4.917395849933611e-06, "loss": 2.2138, "step": 10214 }, { "epoch": 0.5480150214592274, "grad_norm": 0.46484375, "learning_rate": 4.917373700666251e-06, "loss": 2.1809, "step": 10215 }, { "epoch": 0.548068669527897, "grad_norm": 0.458984375, "learning_rate": 4.917351548479663e-06, "loss": 2.2467, "step": 10216 }, { "epoch": 0.5481223175965665, "grad_norm": 0.42578125, "learning_rate": 4.917329393373871e-06, "loss": 2.4615, "step": 10217 }, { "epoch": 0.5481759656652361, "grad_norm": 0.359375, "learning_rate": 4.9173072353489035e-06, "loss": 2.1868, "step": 10218 }, { "epoch": 0.5482296137339056, "grad_norm": 0.3828125, "learning_rate": 4.917285074404787e-06, "loss": 2.1235, "step": 10219 }, { "epoch": 0.5482832618025751, "grad_norm": 0.5859375, "learning_rate": 4.917262910541548e-06, "loss": 2.4053, "step": 10220 }, { "epoch": 0.5483369098712446, "grad_norm": 0.408203125, "learning_rate": 4.917240743759213e-06, "loss": 2.06, "step": 10221 }, { "epoch": 0.5483905579399142, "grad_norm": 0.455078125, "learning_rate": 4.917218574057808e-06, "loss": 2.2767, "step": 10222 }, { "epoch": 0.5484442060085837, "grad_norm": 0.453125, "learning_rate": 4.917196401437362e-06, "loss": 1.9732, "step": 10223 }, { "epoch": 0.5484978540772533, "grad_norm": 0.4921875, "learning_rate": 4.917174225897901e-06, "loss": 2.3924, "step": 10224 }, { "epoch": 0.5485515021459227, "grad_norm": 0.47265625, "learning_rate": 4.91715204743945e-06, "loss": 2.2269, "step": 10225 }, { "epoch": 0.5486051502145923, "grad_norm": 0.50390625, "learning_rate": 4.917129866062038e-06, "loss": 2.3811, "step": 10226 }, { "epoch": 0.5486587982832618, "grad_norm": 0.3828125, "learning_rate": 4.917107681765691e-06, "loss": 2.2597, "step": 10227 }, { "epoch": 0.5487124463519313, "grad_norm": 0.431640625, "learning_rate": 4.917085494550435e-06, "loss": 2.1207, "step": 10228 }, { "epoch": 0.5487660944206009, "grad_norm": 0.474609375, "learning_rate": 4.917063304416298e-06, "loss": 2.3127, "step": 10229 }, { "epoch": 0.5488197424892703, "grad_norm": 0.462890625, "learning_rate": 4.917041111363306e-06, "loss": 2.3816, "step": 10230 }, { "epoch": 0.5488733905579399, "grad_norm": 0.458984375, "learning_rate": 4.9170189153914866e-06, "loss": 2.1607, "step": 10231 }, { "epoch": 0.5489270386266094, "grad_norm": 0.455078125, "learning_rate": 4.916996716500866e-06, "loss": 2.5621, "step": 10232 }, { "epoch": 0.548980686695279, "grad_norm": 0.431640625, "learning_rate": 4.9169745146914705e-06, "loss": 2.3648, "step": 10233 }, { "epoch": 0.5490343347639485, "grad_norm": 0.4296875, "learning_rate": 4.916952309963328e-06, "loss": 2.3903, "step": 10234 }, { "epoch": 0.5490879828326181, "grad_norm": 0.54296875, "learning_rate": 4.916930102316465e-06, "loss": 2.2572, "step": 10235 }, { "epoch": 0.5491416309012875, "grad_norm": 0.388671875, "learning_rate": 4.9169078917509085e-06, "loss": 2.3152, "step": 10236 }, { "epoch": 0.5491952789699571, "grad_norm": 0.41796875, "learning_rate": 4.916885678266684e-06, "loss": 2.3589, "step": 10237 }, { "epoch": 0.5492489270386266, "grad_norm": 0.50390625, "learning_rate": 4.916863461863819e-06, "loss": 2.485, "step": 10238 }, { "epoch": 0.5493025751072962, "grad_norm": 0.423828125, "learning_rate": 4.916841242542342e-06, "loss": 2.4562, "step": 10239 }, { "epoch": 0.5493562231759657, "grad_norm": 0.423828125, "learning_rate": 4.916819020302277e-06, "loss": 2.5882, "step": 10240 }, { "epoch": 0.5494098712446351, "grad_norm": 0.478515625, "learning_rate": 4.916796795143654e-06, "loss": 2.326, "step": 10241 }, { "epoch": 0.5494635193133047, "grad_norm": 0.494140625, "learning_rate": 4.916774567066497e-06, "loss": 2.3302, "step": 10242 }, { "epoch": 0.5495171673819742, "grad_norm": 0.490234375, "learning_rate": 4.916752336070834e-06, "loss": 2.3142, "step": 10243 }, { "epoch": 0.5495708154506438, "grad_norm": 0.44140625, "learning_rate": 4.916730102156692e-06, "loss": 2.3515, "step": 10244 }, { "epoch": 0.5496244635193133, "grad_norm": 0.427734375, "learning_rate": 4.916707865324098e-06, "loss": 2.3296, "step": 10245 }, { "epoch": 0.5496781115879829, "grad_norm": 0.380859375, "learning_rate": 4.916685625573078e-06, "loss": 2.0303, "step": 10246 }, { "epoch": 0.5497317596566523, "grad_norm": 0.447265625, "learning_rate": 4.916663382903659e-06, "loss": 2.4438, "step": 10247 }, { "epoch": 0.5497854077253219, "grad_norm": 0.390625, "learning_rate": 4.91664113731587e-06, "loss": 2.2706, "step": 10248 }, { "epoch": 0.5498390557939914, "grad_norm": 0.40625, "learning_rate": 4.916618888809734e-06, "loss": 1.8975, "step": 10249 }, { "epoch": 0.549892703862661, "grad_norm": 0.50390625, "learning_rate": 4.9165966373852815e-06, "loss": 2.3515, "step": 10250 }, { "epoch": 0.5499463519313305, "grad_norm": 0.408203125, "learning_rate": 4.9165743830425376e-06, "loss": 2.1178, "step": 10251 }, { "epoch": 0.55, "grad_norm": 0.419921875, "learning_rate": 4.916552125781529e-06, "loss": 2.1333, "step": 10252 }, { "epoch": 0.5500536480686695, "grad_norm": 0.3828125, "learning_rate": 4.9165298656022824e-06, "loss": 1.8857, "step": 10253 }, { "epoch": 0.5501072961373391, "grad_norm": 0.53515625, "learning_rate": 4.916507602504826e-06, "loss": 2.6044, "step": 10254 }, { "epoch": 0.5501609442060086, "grad_norm": 0.443359375, "learning_rate": 4.9164853364891865e-06, "loss": 2.4238, "step": 10255 }, { "epoch": 0.5502145922746781, "grad_norm": 0.416015625, "learning_rate": 4.91646306755539e-06, "loss": 2.2393, "step": 10256 }, { "epoch": 0.5502682403433476, "grad_norm": 0.400390625, "learning_rate": 4.9164407957034625e-06, "loss": 2.495, "step": 10257 }, { "epoch": 0.5503218884120171, "grad_norm": 0.44140625, "learning_rate": 4.916418520933432e-06, "loss": 2.2957, "step": 10258 }, { "epoch": 0.5503755364806867, "grad_norm": 0.458984375, "learning_rate": 4.916396243245327e-06, "loss": 2.3705, "step": 10259 }, { "epoch": 0.5504291845493562, "grad_norm": 0.416015625, "learning_rate": 4.916373962639172e-06, "loss": 2.2933, "step": 10260 }, { "epoch": 0.5504828326180258, "grad_norm": 8.125, "learning_rate": 4.9163516791149955e-06, "loss": 1.7979, "step": 10261 }, { "epoch": 0.5505364806866953, "grad_norm": 0.486328125, "learning_rate": 4.9163293926728226e-06, "loss": 2.1207, "step": 10262 }, { "epoch": 0.5505901287553648, "grad_norm": 0.4296875, "learning_rate": 4.916307103312682e-06, "loss": 1.8756, "step": 10263 }, { "epoch": 0.5506437768240343, "grad_norm": 0.408203125, "learning_rate": 4.9162848110345995e-06, "loss": 1.9358, "step": 10264 }, { "epoch": 0.5506974248927039, "grad_norm": 0.4453125, "learning_rate": 4.916262515838603e-06, "loss": 2.4346, "step": 10265 }, { "epoch": 0.5507510729613734, "grad_norm": 0.4453125, "learning_rate": 4.916240217724717e-06, "loss": 2.4205, "step": 10266 }, { "epoch": 0.550804721030043, "grad_norm": 0.4140625, "learning_rate": 4.916217916692973e-06, "loss": 2.0968, "step": 10267 }, { "epoch": 0.5508583690987124, "grad_norm": 0.3828125, "learning_rate": 4.916195612743393e-06, "loss": 2.1276, "step": 10268 }, { "epoch": 0.550912017167382, "grad_norm": 0.443359375, "learning_rate": 4.916173305876006e-06, "loss": 2.5476, "step": 10269 }, { "epoch": 0.5509656652360515, "grad_norm": 0.4296875, "learning_rate": 4.91615099609084e-06, "loss": 2.1359, "step": 10270 }, { "epoch": 0.551019313304721, "grad_norm": 0.396484375, "learning_rate": 4.916128683387922e-06, "loss": 2.2099, "step": 10271 }, { "epoch": 0.5510729613733906, "grad_norm": 1.03125, "learning_rate": 4.9161063677672765e-06, "loss": 2.3145, "step": 10272 }, { "epoch": 0.55112660944206, "grad_norm": 0.447265625, "learning_rate": 4.916084049228932e-06, "loss": 2.2771, "step": 10273 }, { "epoch": 0.5511802575107296, "grad_norm": 0.48046875, "learning_rate": 4.916061727772916e-06, "loss": 2.2289, "step": 10274 }, { "epoch": 0.5512339055793991, "grad_norm": 0.38671875, "learning_rate": 4.916039403399254e-06, "loss": 2.1035, "step": 10275 }, { "epoch": 0.5512875536480687, "grad_norm": 0.44140625, "learning_rate": 4.916017076107974e-06, "loss": 2.314, "step": 10276 }, { "epoch": 0.5513412017167382, "grad_norm": 0.640625, "learning_rate": 4.915994745899103e-06, "loss": 2.4225, "step": 10277 }, { "epoch": 0.5513948497854078, "grad_norm": 0.4765625, "learning_rate": 4.915972412772668e-06, "loss": 2.5239, "step": 10278 }, { "epoch": 0.5514484978540772, "grad_norm": 0.4765625, "learning_rate": 4.9159500767286954e-06, "loss": 2.2262, "step": 10279 }, { "epoch": 0.5515021459227468, "grad_norm": 0.53125, "learning_rate": 4.915927737767212e-06, "loss": 2.4612, "step": 10280 }, { "epoch": 0.5515557939914163, "grad_norm": 1.1171875, "learning_rate": 4.915905395888246e-06, "loss": 2.2085, "step": 10281 }, { "epoch": 0.5516094420600859, "grad_norm": 0.419921875, "learning_rate": 4.915883051091823e-06, "loss": 2.2801, "step": 10282 }, { "epoch": 0.5516630901287554, "grad_norm": 0.458984375, "learning_rate": 4.915860703377971e-06, "loss": 2.4439, "step": 10283 }, { "epoch": 0.5517167381974248, "grad_norm": 0.578125, "learning_rate": 4.915838352746716e-06, "loss": 2.4364, "step": 10284 }, { "epoch": 0.5517703862660944, "grad_norm": 0.5859375, "learning_rate": 4.915815999198086e-06, "loss": 2.5926, "step": 10285 }, { "epoch": 0.5518240343347639, "grad_norm": 0.41796875, "learning_rate": 4.9157936427321075e-06, "loss": 2.37, "step": 10286 }, { "epoch": 0.5518776824034335, "grad_norm": 0.3828125, "learning_rate": 4.915771283348807e-06, "loss": 2.1505, "step": 10287 }, { "epoch": 0.551931330472103, "grad_norm": 0.4765625, "learning_rate": 4.915748921048212e-06, "loss": 2.3889, "step": 10288 }, { "epoch": 0.5519849785407726, "grad_norm": 0.41015625, "learning_rate": 4.91572655583035e-06, "loss": 2.1984, "step": 10289 }, { "epoch": 0.552038626609442, "grad_norm": 0.439453125, "learning_rate": 4.915704187695248e-06, "loss": 2.325, "step": 10290 }, { "epoch": 0.5520922746781116, "grad_norm": 0.388671875, "learning_rate": 4.915681816642932e-06, "loss": 1.9013, "step": 10291 }, { "epoch": 0.5521459227467811, "grad_norm": 0.4296875, "learning_rate": 4.9156594426734296e-06, "loss": 2.402, "step": 10292 }, { "epoch": 0.5521995708154507, "grad_norm": 0.59375, "learning_rate": 4.915637065786768e-06, "loss": 2.2673, "step": 10293 }, { "epoch": 0.5522532188841202, "grad_norm": 0.474609375, "learning_rate": 4.915614685982974e-06, "loss": 2.2973, "step": 10294 }, { "epoch": 0.5523068669527897, "grad_norm": 0.478515625, "learning_rate": 4.915592303262074e-06, "loss": 2.2776, "step": 10295 }, { "epoch": 0.5523605150214592, "grad_norm": 0.4453125, "learning_rate": 4.9155699176240964e-06, "loss": 2.4365, "step": 10296 }, { "epoch": 0.5524141630901288, "grad_norm": 0.546875, "learning_rate": 4.915547529069068e-06, "loss": 2.3098, "step": 10297 }, { "epoch": 0.5524678111587983, "grad_norm": 0.345703125, "learning_rate": 4.915525137597015e-06, "loss": 2.274, "step": 10298 }, { "epoch": 0.5525214592274678, "grad_norm": 0.451171875, "learning_rate": 4.915502743207964e-06, "loss": 2.3076, "step": 10299 }, { "epoch": 0.5525751072961373, "grad_norm": 0.4375, "learning_rate": 4.9154803459019435e-06, "loss": 2.5049, "step": 10300 }, { "epoch": 0.5526287553648068, "grad_norm": 0.7421875, "learning_rate": 4.91545794567898e-06, "loss": 2.3853, "step": 10301 }, { "epoch": 0.5526824034334764, "grad_norm": 0.373046875, "learning_rate": 4.9154355425391e-06, "loss": 2.2188, "step": 10302 }, { "epoch": 0.5527360515021459, "grad_norm": 0.3984375, "learning_rate": 4.915413136482331e-06, "loss": 2.3035, "step": 10303 }, { "epoch": 0.5527896995708155, "grad_norm": 0.330078125, "learning_rate": 4.9153907275087e-06, "loss": 2.054, "step": 10304 }, { "epoch": 0.552843347639485, "grad_norm": 0.7265625, "learning_rate": 4.915368315618235e-06, "loss": 2.542, "step": 10305 }, { "epoch": 0.5528969957081545, "grad_norm": 0.486328125, "learning_rate": 4.9153459008109614e-06, "loss": 2.2946, "step": 10306 }, { "epoch": 0.552950643776824, "grad_norm": 0.45703125, "learning_rate": 4.915323483086906e-06, "loss": 1.4873, "step": 10307 }, { "epoch": 0.5530042918454936, "grad_norm": 0.44140625, "learning_rate": 4.915301062446099e-06, "loss": 2.2401, "step": 10308 }, { "epoch": 0.5530579399141631, "grad_norm": 0.6171875, "learning_rate": 4.915278638888565e-06, "loss": 1.8292, "step": 10309 }, { "epoch": 0.5531115879828327, "grad_norm": 0.447265625, "learning_rate": 4.91525621241433e-06, "loss": 2.2915, "step": 10310 }, { "epoch": 0.5531652360515021, "grad_norm": 0.37109375, "learning_rate": 4.915233783023424e-06, "loss": 1.9667, "step": 10311 }, { "epoch": 0.5532188841201717, "grad_norm": 0.47265625, "learning_rate": 4.9152113507158714e-06, "loss": 2.1129, "step": 10312 }, { "epoch": 0.5532725321888412, "grad_norm": 0.416015625, "learning_rate": 4.9151889154917015e-06, "loss": 2.2943, "step": 10313 }, { "epoch": 0.5533261802575107, "grad_norm": 0.494140625, "learning_rate": 4.91516647735094e-06, "loss": 2.2978, "step": 10314 }, { "epoch": 0.5533798283261803, "grad_norm": 0.416015625, "learning_rate": 4.9151440362936145e-06, "loss": 2.4064, "step": 10315 }, { "epoch": 0.5534334763948497, "grad_norm": 0.498046875, "learning_rate": 4.915121592319752e-06, "loss": 2.0982, "step": 10316 }, { "epoch": 0.5534871244635193, "grad_norm": 0.4453125, "learning_rate": 4.915099145429379e-06, "loss": 1.941, "step": 10317 }, { "epoch": 0.5535407725321888, "grad_norm": 0.388671875, "learning_rate": 4.915076695622525e-06, "loss": 2.4468, "step": 10318 }, { "epoch": 0.5535944206008584, "grad_norm": 0.42578125, "learning_rate": 4.915054242899214e-06, "loss": 2.2168, "step": 10319 }, { "epoch": 0.5536480686695279, "grad_norm": 0.443359375, "learning_rate": 4.915031787259475e-06, "loss": 2.3342, "step": 10320 }, { "epoch": 0.5537017167381975, "grad_norm": 0.451171875, "learning_rate": 4.915009328703333e-06, "loss": 1.6392, "step": 10321 }, { "epoch": 0.5537553648068669, "grad_norm": 0.62109375, "learning_rate": 4.9149868672308184e-06, "loss": 2.2173, "step": 10322 }, { "epoch": 0.5538090128755365, "grad_norm": 0.5390625, "learning_rate": 4.9149644028419565e-06, "loss": 2.4147, "step": 10323 }, { "epoch": 0.553862660944206, "grad_norm": 0.453125, "learning_rate": 4.914941935536774e-06, "loss": 2.2912, "step": 10324 }, { "epoch": 0.5539163090128756, "grad_norm": 0.419921875, "learning_rate": 4.914919465315299e-06, "loss": 2.3636, "step": 10325 }, { "epoch": 0.5539699570815451, "grad_norm": 0.435546875, "learning_rate": 4.914896992177557e-06, "loss": 2.2689, "step": 10326 }, { "epoch": 0.5540236051502145, "grad_norm": 0.4375, "learning_rate": 4.9148745161235775e-06, "loss": 2.3153, "step": 10327 }, { "epoch": 0.5540772532188841, "grad_norm": 0.431640625, "learning_rate": 4.9148520371533855e-06, "loss": 2.2501, "step": 10328 }, { "epoch": 0.5541309012875536, "grad_norm": 0.396484375, "learning_rate": 4.914829555267011e-06, "loss": 2.3098, "step": 10329 }, { "epoch": 0.5541845493562232, "grad_norm": 0.61328125, "learning_rate": 4.914807070464478e-06, "loss": 1.7421, "step": 10330 }, { "epoch": 0.5542381974248927, "grad_norm": 0.59375, "learning_rate": 4.914784582745815e-06, "loss": 2.4577, "step": 10331 }, { "epoch": 0.5542918454935623, "grad_norm": 0.42578125, "learning_rate": 4.914762092111049e-06, "loss": 2.1595, "step": 10332 }, { "epoch": 0.5543454935622317, "grad_norm": 0.43359375, "learning_rate": 4.914739598560208e-06, "loss": 2.3093, "step": 10333 }, { "epoch": 0.5543991416309013, "grad_norm": 0.470703125, "learning_rate": 4.914717102093317e-06, "loss": 2.2176, "step": 10334 }, { "epoch": 0.5544527896995708, "grad_norm": 0.44140625, "learning_rate": 4.914694602710406e-06, "loss": 2.0305, "step": 10335 }, { "epoch": 0.5545064377682404, "grad_norm": 0.53515625, "learning_rate": 4.914672100411499e-06, "loss": 2.5945, "step": 10336 }, { "epoch": 0.5545600858369099, "grad_norm": 0.443359375, "learning_rate": 4.914649595196627e-06, "loss": 2.4746, "step": 10337 }, { "epoch": 0.5546137339055794, "grad_norm": 1.1328125, "learning_rate": 4.9146270870658145e-06, "loss": 2.1875, "step": 10338 }, { "epoch": 0.5546673819742489, "grad_norm": 0.5625, "learning_rate": 4.914604576019089e-06, "loss": 2.236, "step": 10339 }, { "epoch": 0.5547210300429185, "grad_norm": 0.439453125, "learning_rate": 4.9145820620564775e-06, "loss": 2.2652, "step": 10340 }, { "epoch": 0.554774678111588, "grad_norm": 0.46484375, "learning_rate": 4.914559545178008e-06, "loss": 2.2336, "step": 10341 }, { "epoch": 0.5548283261802575, "grad_norm": 0.7421875, "learning_rate": 4.914537025383707e-06, "loss": 1.7673, "step": 10342 }, { "epoch": 0.554881974248927, "grad_norm": 0.50390625, "learning_rate": 4.914514502673603e-06, "loss": 2.1678, "step": 10343 }, { "epoch": 0.5549356223175965, "grad_norm": 0.486328125, "learning_rate": 4.914491977047722e-06, "loss": 2.4809, "step": 10344 }, { "epoch": 0.5549892703862661, "grad_norm": 0.66796875, "learning_rate": 4.914469448506091e-06, "loss": 2.2295, "step": 10345 }, { "epoch": 0.5550429184549356, "grad_norm": 0.400390625, "learning_rate": 4.914446917048737e-06, "loss": 2.0677, "step": 10346 }, { "epoch": 0.5550965665236052, "grad_norm": 0.53515625, "learning_rate": 4.914424382675689e-06, "loss": 2.1381, "step": 10347 }, { "epoch": 0.5551502145922746, "grad_norm": 0.451171875, "learning_rate": 4.914401845386973e-06, "loss": 2.2551, "step": 10348 }, { "epoch": 0.5552038626609442, "grad_norm": 0.56640625, "learning_rate": 4.914379305182616e-06, "loss": 2.1727, "step": 10349 }, { "epoch": 0.5552575107296137, "grad_norm": 0.375, "learning_rate": 4.914356762062645e-06, "loss": 2.3082, "step": 10350 }, { "epoch": 0.5553111587982833, "grad_norm": 0.51171875, "learning_rate": 4.914334216027088e-06, "loss": 2.3082, "step": 10351 }, { "epoch": 0.5553648068669528, "grad_norm": 0.5078125, "learning_rate": 4.914311667075972e-06, "loss": 2.3655, "step": 10352 }, { "epoch": 0.5554184549356224, "grad_norm": 0.57421875, "learning_rate": 4.914289115209325e-06, "loss": 2.2366, "step": 10353 }, { "epoch": 0.5554721030042918, "grad_norm": 0.53515625, "learning_rate": 4.914266560427173e-06, "loss": 2.2419, "step": 10354 }, { "epoch": 0.5555257510729614, "grad_norm": 0.384765625, "learning_rate": 4.914244002729543e-06, "loss": 2.1487, "step": 10355 }, { "epoch": 0.5555793991416309, "grad_norm": 0.5625, "learning_rate": 4.914221442116463e-06, "loss": 1.379, "step": 10356 }, { "epoch": 0.5556330472103004, "grad_norm": 0.396484375, "learning_rate": 4.91419887858796e-06, "loss": 2.4974, "step": 10357 }, { "epoch": 0.55568669527897, "grad_norm": 0.79296875, "learning_rate": 4.9141763121440625e-06, "loss": 2.4082, "step": 10358 }, { "epoch": 0.5557403433476394, "grad_norm": 0.42578125, "learning_rate": 4.914153742784796e-06, "loss": 2.4784, "step": 10359 }, { "epoch": 0.555793991416309, "grad_norm": 0.40234375, "learning_rate": 4.914131170510188e-06, "loss": 2.17, "step": 10360 }, { "epoch": 0.5558476394849785, "grad_norm": 0.44921875, "learning_rate": 4.914108595320267e-06, "loss": 2.3977, "step": 10361 }, { "epoch": 0.5559012875536481, "grad_norm": 0.46875, "learning_rate": 4.914086017215059e-06, "loss": 2.3848, "step": 10362 }, { "epoch": 0.5559549356223176, "grad_norm": 0.427734375, "learning_rate": 4.9140634361945914e-06, "loss": 2.2841, "step": 10363 }, { "epoch": 0.5560085836909872, "grad_norm": 0.35546875, "learning_rate": 4.914040852258892e-06, "loss": 1.9413, "step": 10364 }, { "epoch": 0.5560622317596566, "grad_norm": 0.44921875, "learning_rate": 4.914018265407987e-06, "loss": 2.3284, "step": 10365 }, { "epoch": 0.5561158798283262, "grad_norm": 0.5390625, "learning_rate": 4.913995675641906e-06, "loss": 2.2019, "step": 10366 }, { "epoch": 0.5561695278969957, "grad_norm": 0.48046875, "learning_rate": 4.913973082960674e-06, "loss": 1.8324, "step": 10367 }, { "epoch": 0.5562231759656653, "grad_norm": 0.419921875, "learning_rate": 4.913950487364319e-06, "loss": 2.2687, "step": 10368 }, { "epoch": 0.5562768240343348, "grad_norm": 0.396484375, "learning_rate": 4.913927888852868e-06, "loss": 2.0395, "step": 10369 }, { "epoch": 0.5563304721030042, "grad_norm": 0.451171875, "learning_rate": 4.91390528742635e-06, "loss": 2.1964, "step": 10370 }, { "epoch": 0.5563841201716738, "grad_norm": 0.5078125, "learning_rate": 4.913882683084789e-06, "loss": 2.3695, "step": 10371 }, { "epoch": 0.5564377682403433, "grad_norm": 0.466796875, "learning_rate": 4.913860075828216e-06, "loss": 1.9774, "step": 10372 }, { "epoch": 0.5564914163090129, "grad_norm": 0.478515625, "learning_rate": 4.913837465656656e-06, "loss": 2.259, "step": 10373 }, { "epoch": 0.5565450643776824, "grad_norm": 0.39453125, "learning_rate": 4.913814852570136e-06, "loss": 2.4223, "step": 10374 }, { "epoch": 0.556598712446352, "grad_norm": 0.423828125, "learning_rate": 4.913792236568685e-06, "loss": 2.3676, "step": 10375 }, { "epoch": 0.5566523605150214, "grad_norm": 0.47265625, "learning_rate": 4.9137696176523285e-06, "loss": 2.4565, "step": 10376 }, { "epoch": 0.556706008583691, "grad_norm": 0.453125, "learning_rate": 4.913746995821096e-06, "loss": 2.465, "step": 10377 }, { "epoch": 0.5567596566523605, "grad_norm": 0.51171875, "learning_rate": 4.913724371075013e-06, "loss": 2.3732, "step": 10378 }, { "epoch": 0.5568133047210301, "grad_norm": 0.53125, "learning_rate": 4.913701743414108e-06, "loss": 2.4574, "step": 10379 }, { "epoch": 0.5568669527896996, "grad_norm": 0.388671875, "learning_rate": 4.9136791128384064e-06, "loss": 2.5114, "step": 10380 }, { "epoch": 0.5569206008583691, "grad_norm": 0.56640625, "learning_rate": 4.913656479347938e-06, "loss": 2.5182, "step": 10381 }, { "epoch": 0.5569742489270386, "grad_norm": 0.41015625, "learning_rate": 4.913633842942729e-06, "loss": 2.2981, "step": 10382 }, { "epoch": 0.5570278969957082, "grad_norm": 0.392578125, "learning_rate": 4.9136112036228055e-06, "loss": 2.416, "step": 10383 }, { "epoch": 0.5570815450643777, "grad_norm": 0.48046875, "learning_rate": 4.913588561388197e-06, "loss": 2.2896, "step": 10384 }, { "epoch": 0.5571351931330472, "grad_norm": 0.625, "learning_rate": 4.91356591623893e-06, "loss": 2.1288, "step": 10385 }, { "epoch": 0.5571888412017167, "grad_norm": 0.408203125, "learning_rate": 4.913543268175032e-06, "loss": 2.3811, "step": 10386 }, { "epoch": 0.5572424892703862, "grad_norm": 0.4375, "learning_rate": 4.913520617196529e-06, "loss": 2.2003, "step": 10387 }, { "epoch": 0.5572961373390558, "grad_norm": 0.70703125, "learning_rate": 4.91349796330345e-06, "loss": 2.4067, "step": 10388 }, { "epoch": 0.5573497854077253, "grad_norm": 0.462890625, "learning_rate": 4.913475306495822e-06, "loss": 2.0121, "step": 10389 }, { "epoch": 0.5574034334763949, "grad_norm": 0.44140625, "learning_rate": 4.913452646773672e-06, "loss": 2.2949, "step": 10390 }, { "epoch": 0.5574570815450643, "grad_norm": 0.431640625, "learning_rate": 4.9134299841370285e-06, "loss": 2.3825, "step": 10391 }, { "epoch": 0.5575107296137339, "grad_norm": 0.44140625, "learning_rate": 4.913407318585916e-06, "loss": 2.3224, "step": 10392 }, { "epoch": 0.5575643776824034, "grad_norm": 0.38671875, "learning_rate": 4.913384650120365e-06, "loss": 2.467, "step": 10393 }, { "epoch": 0.557618025751073, "grad_norm": 0.625, "learning_rate": 4.913361978740402e-06, "loss": 2.1402, "step": 10394 }, { "epoch": 0.5576716738197425, "grad_norm": 0.431640625, "learning_rate": 4.913339304446053e-06, "loss": 2.27, "step": 10395 }, { "epoch": 0.5577253218884121, "grad_norm": 0.96875, "learning_rate": 4.913316627237347e-06, "loss": 2.4281, "step": 10396 }, { "epoch": 0.5577789699570815, "grad_norm": 0.33984375, "learning_rate": 4.913293947114311e-06, "loss": 2.0193, "step": 10397 }, { "epoch": 0.5578326180257511, "grad_norm": 0.37890625, "learning_rate": 4.9132712640769716e-06, "loss": 2.1508, "step": 10398 }, { "epoch": 0.5578862660944206, "grad_norm": 0.55078125, "learning_rate": 4.913248578125357e-06, "loss": 2.4175, "step": 10399 }, { "epoch": 0.5579399141630901, "grad_norm": 0.42578125, "learning_rate": 4.913225889259495e-06, "loss": 2.3133, "step": 10400 }, { "epoch": 0.5579935622317597, "grad_norm": 0.408203125, "learning_rate": 4.913203197479412e-06, "loss": 2.3367, "step": 10401 }, { "epoch": 0.5580472103004291, "grad_norm": 0.431640625, "learning_rate": 4.913180502785136e-06, "loss": 2.1598, "step": 10402 }, { "epoch": 0.5581008583690987, "grad_norm": 0.431640625, "learning_rate": 4.9131578051766936e-06, "loss": 2.1613, "step": 10403 }, { "epoch": 0.5581545064377682, "grad_norm": 0.451171875, "learning_rate": 4.913135104654113e-06, "loss": 2.1884, "step": 10404 }, { "epoch": 0.5582081545064378, "grad_norm": 0.462890625, "learning_rate": 4.913112401217421e-06, "loss": 2.24, "step": 10405 }, { "epoch": 0.5582618025751073, "grad_norm": 0.474609375, "learning_rate": 4.913089694866646e-06, "loss": 2.485, "step": 10406 }, { "epoch": 0.5583154506437769, "grad_norm": 0.4765625, "learning_rate": 4.913066985601814e-06, "loss": 2.5547, "step": 10407 }, { "epoch": 0.5583690987124463, "grad_norm": 0.38671875, "learning_rate": 4.913044273422954e-06, "loss": 2.287, "step": 10408 }, { "epoch": 0.5584227467811159, "grad_norm": 0.470703125, "learning_rate": 4.913021558330093e-06, "loss": 2.3967, "step": 10409 }, { "epoch": 0.5584763948497854, "grad_norm": 0.447265625, "learning_rate": 4.912998840323258e-06, "loss": 2.3499, "step": 10410 }, { "epoch": 0.558530042918455, "grad_norm": 0.384765625, "learning_rate": 4.912976119402476e-06, "loss": 1.9832, "step": 10411 }, { "epoch": 0.5585836909871245, "grad_norm": 0.443359375, "learning_rate": 4.912953395567775e-06, "loss": 2.2389, "step": 10412 }, { "epoch": 0.558637339055794, "grad_norm": 0.392578125, "learning_rate": 4.912930668819183e-06, "loss": 2.0254, "step": 10413 }, { "epoch": 0.5586909871244635, "grad_norm": 0.40625, "learning_rate": 4.912907939156727e-06, "loss": 1.9275, "step": 10414 }, { "epoch": 0.558744635193133, "grad_norm": 0.474609375, "learning_rate": 4.9128852065804335e-06, "loss": 2.2732, "step": 10415 }, { "epoch": 0.5587982832618026, "grad_norm": 0.51171875, "learning_rate": 4.912862471090331e-06, "loss": 2.4335, "step": 10416 }, { "epoch": 0.5588519313304721, "grad_norm": 0.5078125, "learning_rate": 4.912839732686448e-06, "loss": 2.4185, "step": 10417 }, { "epoch": 0.5589055793991416, "grad_norm": 0.45703125, "learning_rate": 4.9128169913688084e-06, "loss": 2.2445, "step": 10418 }, { "epoch": 0.5589592274678111, "grad_norm": 0.396484375, "learning_rate": 4.912794247137444e-06, "loss": 2.1865, "step": 10419 }, { "epoch": 0.5590128755364807, "grad_norm": 0.39453125, "learning_rate": 4.912771499992379e-06, "loss": 2.2303, "step": 10420 }, { "epoch": 0.5590665236051502, "grad_norm": 0.494140625, "learning_rate": 4.912748749933642e-06, "loss": 2.1257, "step": 10421 }, { "epoch": 0.5591201716738198, "grad_norm": 0.439453125, "learning_rate": 4.9127259969612615e-06, "loss": 2.4538, "step": 10422 }, { "epoch": 0.5591738197424893, "grad_norm": 0.52734375, "learning_rate": 4.912703241075264e-06, "loss": 2.3037, "step": 10423 }, { "epoch": 0.5592274678111588, "grad_norm": 0.55078125, "learning_rate": 4.912680482275676e-06, "loss": 2.0985, "step": 10424 }, { "epoch": 0.5592811158798283, "grad_norm": 0.3984375, "learning_rate": 4.912657720562527e-06, "loss": 2.3677, "step": 10425 }, { "epoch": 0.5593347639484979, "grad_norm": 0.416015625, "learning_rate": 4.9126349559358436e-06, "loss": 1.9189, "step": 10426 }, { "epoch": 0.5593884120171674, "grad_norm": 0.462890625, "learning_rate": 4.912612188395652e-06, "loss": 2.6177, "step": 10427 }, { "epoch": 0.5594420600858369, "grad_norm": 0.41796875, "learning_rate": 4.912589417941982e-06, "loss": 2.2003, "step": 10428 }, { "epoch": 0.5594957081545064, "grad_norm": 0.42578125, "learning_rate": 4.91256664457486e-06, "loss": 2.193, "step": 10429 }, { "epoch": 0.5595493562231759, "grad_norm": 0.451171875, "learning_rate": 4.912543868294313e-06, "loss": 2.1959, "step": 10430 }, { "epoch": 0.5596030042918455, "grad_norm": 0.357421875, "learning_rate": 4.912521089100369e-06, "loss": 1.682, "step": 10431 }, { "epoch": 0.559656652360515, "grad_norm": 0.458984375, "learning_rate": 4.912498306993055e-06, "loss": 2.3859, "step": 10432 }, { "epoch": 0.5597103004291846, "grad_norm": 0.43359375, "learning_rate": 4.912475521972399e-06, "loss": 2.3091, "step": 10433 }, { "epoch": 0.559763948497854, "grad_norm": 0.498046875, "learning_rate": 4.91245273403843e-06, "loss": 2.4107, "step": 10434 }, { "epoch": 0.5598175965665236, "grad_norm": 0.45703125, "learning_rate": 4.912429943191173e-06, "loss": 2.4681, "step": 10435 }, { "epoch": 0.5598712446351931, "grad_norm": 0.66015625, "learning_rate": 4.912407149430657e-06, "loss": 2.3853, "step": 10436 }, { "epoch": 0.5599248927038627, "grad_norm": 0.5546875, "learning_rate": 4.912384352756908e-06, "loss": 2.2187, "step": 10437 }, { "epoch": 0.5599785407725322, "grad_norm": 0.5078125, "learning_rate": 4.912361553169956e-06, "loss": 2.3185, "step": 10438 }, { "epoch": 0.5600321888412018, "grad_norm": 0.458984375, "learning_rate": 4.912338750669826e-06, "loss": 1.9264, "step": 10439 }, { "epoch": 0.5600858369098712, "grad_norm": 0.53125, "learning_rate": 4.912315945256548e-06, "loss": 2.1613, "step": 10440 }, { "epoch": 0.5601394849785408, "grad_norm": 0.4140625, "learning_rate": 4.912293136930147e-06, "loss": 2.378, "step": 10441 }, { "epoch": 0.5601931330472103, "grad_norm": 0.447265625, "learning_rate": 4.912270325690652e-06, "loss": 2.2051, "step": 10442 }, { "epoch": 0.5602467811158798, "grad_norm": 0.439453125, "learning_rate": 4.912247511538091e-06, "loss": 2.6013, "step": 10443 }, { "epoch": 0.5603004291845494, "grad_norm": 0.55859375, "learning_rate": 4.91222469447249e-06, "loss": 1.8456, "step": 10444 }, { "epoch": 0.5603540772532188, "grad_norm": 0.52734375, "learning_rate": 4.912201874493878e-06, "loss": 2.0707, "step": 10445 }, { "epoch": 0.5604077253218884, "grad_norm": 0.412109375, "learning_rate": 4.912179051602282e-06, "loss": 2.2318, "step": 10446 }, { "epoch": 0.5604613733905579, "grad_norm": 0.5625, "learning_rate": 4.912156225797729e-06, "loss": 2.2885, "step": 10447 }, { "epoch": 0.5605150214592275, "grad_norm": 0.37890625, "learning_rate": 4.912133397080248e-06, "loss": 2.0329, "step": 10448 }, { "epoch": 0.560568669527897, "grad_norm": 0.53515625, "learning_rate": 4.912110565449864e-06, "loss": 2.2342, "step": 10449 }, { "epoch": 0.5606223175965666, "grad_norm": 0.453125, "learning_rate": 4.9120877309066074e-06, "loss": 2.3728, "step": 10450 }, { "epoch": 0.560675965665236, "grad_norm": 0.484375, "learning_rate": 4.912064893450506e-06, "loss": 2.3845, "step": 10451 }, { "epoch": 0.5607296137339056, "grad_norm": 0.435546875, "learning_rate": 4.912042053081583e-06, "loss": 2.1314, "step": 10452 }, { "epoch": 0.5607832618025751, "grad_norm": 0.451171875, "learning_rate": 4.912019209799871e-06, "loss": 2.4595, "step": 10453 }, { "epoch": 0.5608369098712447, "grad_norm": 1.5859375, "learning_rate": 4.911996363605395e-06, "loss": 2.3416, "step": 10454 }, { "epoch": 0.5608905579399142, "grad_norm": 0.427734375, "learning_rate": 4.911973514498183e-06, "loss": 2.4981, "step": 10455 }, { "epoch": 0.5609442060085837, "grad_norm": 0.404296875, "learning_rate": 4.911950662478263e-06, "loss": 2.3582, "step": 10456 }, { "epoch": 0.5609978540772532, "grad_norm": 0.51953125, "learning_rate": 4.911927807545663e-06, "loss": 2.1639, "step": 10457 }, { "epoch": 0.5610515021459227, "grad_norm": 0.5, "learning_rate": 4.9119049497004086e-06, "loss": 2.4593, "step": 10458 }, { "epoch": 0.5611051502145923, "grad_norm": 0.515625, "learning_rate": 4.91188208894253e-06, "loss": 2.3322, "step": 10459 }, { "epoch": 0.5611587982832618, "grad_norm": 0.36328125, "learning_rate": 4.9118592252720536e-06, "loss": 2.1968, "step": 10460 }, { "epoch": 0.5612124463519313, "grad_norm": 0.4609375, "learning_rate": 4.9118363586890055e-06, "loss": 2.2871, "step": 10461 }, { "epoch": 0.5612660944206008, "grad_norm": 0.412109375, "learning_rate": 4.911813489193415e-06, "loss": 2.3612, "step": 10462 }, { "epoch": 0.5613197424892704, "grad_norm": 0.41796875, "learning_rate": 4.9117906167853105e-06, "loss": 2.225, "step": 10463 }, { "epoch": 0.5613733905579399, "grad_norm": 0.421875, "learning_rate": 4.911767741464718e-06, "loss": 2.3144, "step": 10464 }, { "epoch": 0.5614270386266095, "grad_norm": 1.140625, "learning_rate": 4.911744863231667e-06, "loss": 2.364, "step": 10465 }, { "epoch": 0.561480686695279, "grad_norm": 0.4609375, "learning_rate": 4.9117219820861825e-06, "loss": 2.4318, "step": 10466 }, { "epoch": 0.5615343347639485, "grad_norm": 0.416015625, "learning_rate": 4.911699098028293e-06, "loss": 1.9739, "step": 10467 }, { "epoch": 0.561587982832618, "grad_norm": 0.4609375, "learning_rate": 4.911676211058028e-06, "loss": 2.3199, "step": 10468 }, { "epoch": 0.5616416309012876, "grad_norm": 0.478515625, "learning_rate": 4.911653321175413e-06, "loss": 2.384, "step": 10469 }, { "epoch": 0.5616952789699571, "grad_norm": 0.45703125, "learning_rate": 4.9116304283804766e-06, "loss": 2.5337, "step": 10470 }, { "epoch": 0.5617489270386266, "grad_norm": 0.42578125, "learning_rate": 4.911607532673246e-06, "loss": 2.361, "step": 10471 }, { "epoch": 0.5618025751072961, "grad_norm": 0.435546875, "learning_rate": 4.91158463405375e-06, "loss": 2.4393, "step": 10472 }, { "epoch": 0.5618562231759656, "grad_norm": 0.435546875, "learning_rate": 4.911561732522015e-06, "loss": 2.1309, "step": 10473 }, { "epoch": 0.5619098712446352, "grad_norm": 0.486328125, "learning_rate": 4.911538828078069e-06, "loss": 2.4627, "step": 10474 }, { "epoch": 0.5619635193133047, "grad_norm": 0.431640625, "learning_rate": 4.9115159207219385e-06, "loss": 2.3006, "step": 10475 }, { "epoch": 0.5620171673819743, "grad_norm": 0.6328125, "learning_rate": 4.911493010453653e-06, "loss": 2.1624, "step": 10476 }, { "epoch": 0.5620708154506437, "grad_norm": 0.43359375, "learning_rate": 4.91147009727324e-06, "loss": 2.2086, "step": 10477 }, { "epoch": 0.5621244635193133, "grad_norm": 0.40625, "learning_rate": 4.911447181180726e-06, "loss": 2.1272, "step": 10478 }, { "epoch": 0.5621781115879828, "grad_norm": 0.4453125, "learning_rate": 4.911424262176141e-06, "loss": 2.0839, "step": 10479 }, { "epoch": 0.5622317596566524, "grad_norm": 0.396484375, "learning_rate": 4.911401340259509e-06, "loss": 2.4086, "step": 10480 }, { "epoch": 0.5622854077253219, "grad_norm": 0.859375, "learning_rate": 4.9113784154308605e-06, "loss": 2.1722, "step": 10481 }, { "epoch": 0.5623390557939915, "grad_norm": 0.412109375, "learning_rate": 4.911355487690223e-06, "loss": 2.337, "step": 10482 }, { "epoch": 0.5623927038626609, "grad_norm": 0.453125, "learning_rate": 4.911332557037623e-06, "loss": 2.31, "step": 10483 }, { "epoch": 0.5624463519313305, "grad_norm": 0.453125, "learning_rate": 4.911309623473088e-06, "loss": 2.3575, "step": 10484 }, { "epoch": 0.5625, "grad_norm": 0.43359375, "learning_rate": 4.911286686996648e-06, "loss": 2.2574, "step": 10485 }, { "epoch": 0.5625536480686695, "grad_norm": 0.4140625, "learning_rate": 4.911263747608328e-06, "loss": 1.9185, "step": 10486 }, { "epoch": 0.5626072961373391, "grad_norm": 0.375, "learning_rate": 4.9112408053081576e-06, "loss": 2.0371, "step": 10487 }, { "epoch": 0.5626609442060085, "grad_norm": 0.51171875, "learning_rate": 4.911217860096163e-06, "loss": 2.1064, "step": 10488 }, { "epoch": 0.5627145922746781, "grad_norm": 0.482421875, "learning_rate": 4.911194911972373e-06, "loss": 2.3591, "step": 10489 }, { "epoch": 0.5627682403433476, "grad_norm": 0.36328125, "learning_rate": 4.911171960936815e-06, "loss": 2.1036, "step": 10490 }, { "epoch": 0.5628218884120172, "grad_norm": 0.396484375, "learning_rate": 4.911149006989517e-06, "loss": 2.0115, "step": 10491 }, { "epoch": 0.5628755364806867, "grad_norm": 0.41015625, "learning_rate": 4.9111260501305055e-06, "loss": 2.3409, "step": 10492 }, { "epoch": 0.5629291845493563, "grad_norm": 0.4765625, "learning_rate": 4.91110309035981e-06, "loss": 2.1183, "step": 10493 }, { "epoch": 0.5629828326180257, "grad_norm": 0.423828125, "learning_rate": 4.911080127677458e-06, "loss": 2.3326, "step": 10494 }, { "epoch": 0.5630364806866953, "grad_norm": 0.474609375, "learning_rate": 4.911057162083475e-06, "loss": 2.0564, "step": 10495 }, { "epoch": 0.5630901287553648, "grad_norm": 0.39453125, "learning_rate": 4.9110341935778915e-06, "loss": 2.421, "step": 10496 }, { "epoch": 0.5631437768240344, "grad_norm": 0.578125, "learning_rate": 4.911011222160733e-06, "loss": 2.2086, "step": 10497 }, { "epoch": 0.5631974248927039, "grad_norm": 0.455078125, "learning_rate": 4.91098824783203e-06, "loss": 2.25, "step": 10498 }, { "epoch": 0.5632510729613734, "grad_norm": 0.48046875, "learning_rate": 4.910965270591808e-06, "loss": 2.3944, "step": 10499 }, { "epoch": 0.5633047210300429, "grad_norm": 0.427734375, "learning_rate": 4.9109422904400935e-06, "loss": 2.2465, "step": 10500 }, { "epoch": 0.5633583690987124, "grad_norm": 0.4609375, "learning_rate": 4.9109193073769175e-06, "loss": 2.2636, "step": 10501 }, { "epoch": 0.563412017167382, "grad_norm": 0.59765625, "learning_rate": 4.910896321402306e-06, "loss": 2.1036, "step": 10502 }, { "epoch": 0.5634656652360515, "grad_norm": 0.419921875, "learning_rate": 4.910873332516288e-06, "loss": 2.5222, "step": 10503 }, { "epoch": 0.563519313304721, "grad_norm": 0.40625, "learning_rate": 4.91085034071889e-06, "loss": 2.5072, "step": 10504 }, { "epoch": 0.5635729613733905, "grad_norm": 0.73828125, "learning_rate": 4.910827346010139e-06, "loss": 2.0792, "step": 10505 }, { "epoch": 0.5636266094420601, "grad_norm": 0.58203125, "learning_rate": 4.910804348390064e-06, "loss": 2.3971, "step": 10506 }, { "epoch": 0.5636802575107296, "grad_norm": 0.484375, "learning_rate": 4.910781347858694e-06, "loss": 2.2961, "step": 10507 }, { "epoch": 0.5637339055793992, "grad_norm": 0.58203125, "learning_rate": 4.910758344416055e-06, "loss": 2.3071, "step": 10508 }, { "epoch": 0.5637875536480687, "grad_norm": 0.52734375, "learning_rate": 4.910735338062175e-06, "loss": 2.4189, "step": 10509 }, { "epoch": 0.5638412017167382, "grad_norm": 0.412109375, "learning_rate": 4.910712328797082e-06, "loss": 2.3476, "step": 10510 }, { "epoch": 0.5638948497854077, "grad_norm": 0.52734375, "learning_rate": 4.910689316620803e-06, "loss": 2.3512, "step": 10511 }, { "epoch": 0.5639484978540773, "grad_norm": 0.5078125, "learning_rate": 4.910666301533367e-06, "loss": 2.331, "step": 10512 }, { "epoch": 0.5640021459227468, "grad_norm": 0.478515625, "learning_rate": 4.910643283534802e-06, "loss": 2.2528, "step": 10513 }, { "epoch": 0.5640557939914163, "grad_norm": 0.41015625, "learning_rate": 4.910620262625134e-06, "loss": 2.1991, "step": 10514 }, { "epoch": 0.5641094420600858, "grad_norm": 0.462890625, "learning_rate": 4.910597238804393e-06, "loss": 2.4106, "step": 10515 }, { "epoch": 0.5641630901287553, "grad_norm": 0.380859375, "learning_rate": 4.910574212072605e-06, "loss": 2.1801, "step": 10516 }, { "epoch": 0.5642167381974249, "grad_norm": 0.400390625, "learning_rate": 4.910551182429799e-06, "loss": 1.9715, "step": 10517 }, { "epoch": 0.5642703862660944, "grad_norm": 0.376953125, "learning_rate": 4.9105281498760025e-06, "loss": 2.2446, "step": 10518 }, { "epoch": 0.564324034334764, "grad_norm": 0.4375, "learning_rate": 4.9105051144112425e-06, "loss": 2.2608, "step": 10519 }, { "epoch": 0.5643776824034334, "grad_norm": 0.427734375, "learning_rate": 4.910482076035548e-06, "loss": 2.198, "step": 10520 }, { "epoch": 0.564431330472103, "grad_norm": 0.52734375, "learning_rate": 4.910459034748946e-06, "loss": 2.247, "step": 10521 }, { "epoch": 0.5644849785407725, "grad_norm": 0.470703125, "learning_rate": 4.910435990551465e-06, "loss": 2.4167, "step": 10522 }, { "epoch": 0.5645386266094421, "grad_norm": 0.388671875, "learning_rate": 4.910412943443132e-06, "loss": 2.2001, "step": 10523 }, { "epoch": 0.5645922746781116, "grad_norm": 0.39453125, "learning_rate": 4.910389893423976e-06, "loss": 2.0907, "step": 10524 }, { "epoch": 0.5646459227467812, "grad_norm": 0.58984375, "learning_rate": 4.910366840494023e-06, "loss": 2.3075, "step": 10525 }, { "epoch": 0.5646995708154506, "grad_norm": 0.45703125, "learning_rate": 4.910343784653303e-06, "loss": 1.9511, "step": 10526 }, { "epoch": 0.5647532188841202, "grad_norm": 0.51953125, "learning_rate": 4.910320725901843e-06, "loss": 2.2483, "step": 10527 }, { "epoch": 0.5648068669527897, "grad_norm": 0.37890625, "learning_rate": 4.910297664239669e-06, "loss": 2.0708, "step": 10528 }, { "epoch": 0.5648605150214592, "grad_norm": 0.8359375, "learning_rate": 4.910274599666812e-06, "loss": 2.5296, "step": 10529 }, { "epoch": 0.5649141630901288, "grad_norm": 0.384765625, "learning_rate": 4.910251532183298e-06, "loss": 2.253, "step": 10530 }, { "epoch": 0.5649678111587982, "grad_norm": 0.515625, "learning_rate": 4.910228461789155e-06, "loss": 2.0785, "step": 10531 }, { "epoch": 0.5650214592274678, "grad_norm": 0.4140625, "learning_rate": 4.910205388484411e-06, "loss": 2.4044, "step": 10532 }, { "epoch": 0.5650751072961373, "grad_norm": 0.71484375, "learning_rate": 4.910182312269095e-06, "loss": 2.3353, "step": 10533 }, { "epoch": 0.5651287553648069, "grad_norm": 0.44921875, "learning_rate": 4.910159233143232e-06, "loss": 2.3192, "step": 10534 }, { "epoch": 0.5651824034334764, "grad_norm": 0.546875, "learning_rate": 4.910136151106853e-06, "loss": 2.1964, "step": 10535 }, { "epoch": 0.565236051502146, "grad_norm": 0.46484375, "learning_rate": 4.910113066159985e-06, "loss": 2.4669, "step": 10536 }, { "epoch": 0.5652896995708154, "grad_norm": 0.416015625, "learning_rate": 4.910089978302655e-06, "loss": 2.4673, "step": 10537 }, { "epoch": 0.565343347639485, "grad_norm": 0.345703125, "learning_rate": 4.91006688753489e-06, "loss": 2.2189, "step": 10538 }, { "epoch": 0.5653969957081545, "grad_norm": 0.79296875, "learning_rate": 4.91004379385672e-06, "loss": 2.0187, "step": 10539 }, { "epoch": 0.5654506437768241, "grad_norm": 0.423828125, "learning_rate": 4.910020697268173e-06, "loss": 2.3156, "step": 10540 }, { "epoch": 0.5655042918454936, "grad_norm": 0.46484375, "learning_rate": 4.9099975977692745e-06, "loss": 2.2041, "step": 10541 }, { "epoch": 0.5655579399141631, "grad_norm": 0.40625, "learning_rate": 4.909974495360055e-06, "loss": 1.8554, "step": 10542 }, { "epoch": 0.5656115879828326, "grad_norm": 0.482421875, "learning_rate": 4.909951390040541e-06, "loss": 2.4094, "step": 10543 }, { "epoch": 0.5656652360515021, "grad_norm": 0.421875, "learning_rate": 4.909928281810761e-06, "loss": 2.2774, "step": 10544 }, { "epoch": 0.5657188841201717, "grad_norm": 0.416015625, "learning_rate": 4.909905170670742e-06, "loss": 2.1409, "step": 10545 }, { "epoch": 0.5657725321888412, "grad_norm": 0.408203125, "learning_rate": 4.909882056620512e-06, "loss": 2.2405, "step": 10546 }, { "epoch": 0.5658261802575107, "grad_norm": 0.4140625, "learning_rate": 4.9098589396601e-06, "loss": 2.2113, "step": 10547 }, { "epoch": 0.5658798283261802, "grad_norm": 0.404296875, "learning_rate": 4.909835819789533e-06, "loss": 2.2393, "step": 10548 }, { "epoch": 0.5659334763948498, "grad_norm": 0.44921875, "learning_rate": 4.90981269700884e-06, "loss": 2.2873, "step": 10549 }, { "epoch": 0.5659871244635193, "grad_norm": 0.453125, "learning_rate": 4.909789571318048e-06, "loss": 2.3901, "step": 10550 }, { "epoch": 0.5660407725321889, "grad_norm": 2.21875, "learning_rate": 4.9097664427171845e-06, "loss": 2.3127, "step": 10551 }, { "epoch": 0.5660944206008584, "grad_norm": 0.4453125, "learning_rate": 4.909743311206279e-06, "loss": 2.3618, "step": 10552 }, { "epoch": 0.5661480686695279, "grad_norm": 1.0703125, "learning_rate": 4.909720176785357e-06, "loss": 1.8823, "step": 10553 }, { "epoch": 0.5662017167381974, "grad_norm": 0.60546875, "learning_rate": 4.909697039454448e-06, "loss": 2.3368, "step": 10554 }, { "epoch": 0.566255364806867, "grad_norm": 0.376953125, "learning_rate": 4.909673899213582e-06, "loss": 2.2203, "step": 10555 }, { "epoch": 0.5663090128755365, "grad_norm": 0.470703125, "learning_rate": 4.909650756062782e-06, "loss": 2.2598, "step": 10556 }, { "epoch": 0.566362660944206, "grad_norm": 0.486328125, "learning_rate": 4.909627610002081e-06, "loss": 2.4975, "step": 10557 }, { "epoch": 0.5664163090128755, "grad_norm": 0.4609375, "learning_rate": 4.909604461031503e-06, "loss": 2.2625, "step": 10558 }, { "epoch": 0.566469957081545, "grad_norm": 0.451171875, "learning_rate": 4.909581309151079e-06, "loss": 2.3888, "step": 10559 }, { "epoch": 0.5665236051502146, "grad_norm": 0.416015625, "learning_rate": 4.909558154360834e-06, "loss": 2.0768, "step": 10560 }, { "epoch": 0.5665772532188841, "grad_norm": 0.439453125, "learning_rate": 4.909534996660799e-06, "loss": 2.4348, "step": 10561 }, { "epoch": 0.5666309012875537, "grad_norm": 0.416015625, "learning_rate": 4.909511836050999e-06, "loss": 2.2152, "step": 10562 }, { "epoch": 0.5666845493562231, "grad_norm": 0.392578125, "learning_rate": 4.909488672531465e-06, "loss": 2.2248, "step": 10563 }, { "epoch": 0.5667381974248927, "grad_norm": 0.40234375, "learning_rate": 4.909465506102223e-06, "loss": 2.2764, "step": 10564 }, { "epoch": 0.5667918454935622, "grad_norm": 0.453125, "learning_rate": 4.9094423367633006e-06, "loss": 2.3372, "step": 10565 }, { "epoch": 0.5668454935622318, "grad_norm": 0.486328125, "learning_rate": 4.909419164514728e-06, "loss": 2.2726, "step": 10566 }, { "epoch": 0.5668991416309013, "grad_norm": 0.400390625, "learning_rate": 4.90939598935653e-06, "loss": 2.2442, "step": 10567 }, { "epoch": 0.5669527896995709, "grad_norm": 0.369140625, "learning_rate": 4.909372811288738e-06, "loss": 2.2385, "step": 10568 }, { "epoch": 0.5670064377682403, "grad_norm": 0.3671875, "learning_rate": 4.909349630311377e-06, "loss": 2.0233, "step": 10569 }, { "epoch": 0.5670600858369099, "grad_norm": 0.44140625, "learning_rate": 4.909326446424478e-06, "loss": 2.4834, "step": 10570 }, { "epoch": 0.5671137339055794, "grad_norm": 0.7734375, "learning_rate": 4.909303259628066e-06, "loss": 2.2639, "step": 10571 }, { "epoch": 0.5671673819742489, "grad_norm": 0.396484375, "learning_rate": 4.909280069922171e-06, "loss": 2.3064, "step": 10572 }, { "epoch": 0.5672210300429185, "grad_norm": 0.69140625, "learning_rate": 4.909256877306821e-06, "loss": 2.1869, "step": 10573 }, { "epoch": 0.5672746781115879, "grad_norm": 0.3984375, "learning_rate": 4.909233681782042e-06, "loss": 2.2037, "step": 10574 }, { "epoch": 0.5673283261802575, "grad_norm": 0.458984375, "learning_rate": 4.909210483347864e-06, "loss": 2.2063, "step": 10575 }, { "epoch": 0.567381974248927, "grad_norm": 0.5625, "learning_rate": 4.909187282004314e-06, "loss": 1.8283, "step": 10576 }, { "epoch": 0.5674356223175966, "grad_norm": 0.4609375, "learning_rate": 4.90916407775142e-06, "loss": 2.3004, "step": 10577 }, { "epoch": 0.5674892703862661, "grad_norm": 0.484375, "learning_rate": 4.909140870589212e-06, "loss": 1.8113, "step": 10578 }, { "epoch": 0.5675429184549357, "grad_norm": 0.451171875, "learning_rate": 4.909117660517715e-06, "loss": 2.3414, "step": 10579 }, { "epoch": 0.5675965665236051, "grad_norm": 0.6171875, "learning_rate": 4.90909444753696e-06, "loss": 2.3471, "step": 10580 }, { "epoch": 0.5676502145922747, "grad_norm": 0.46484375, "learning_rate": 4.909071231646973e-06, "loss": 2.1995, "step": 10581 }, { "epoch": 0.5677038626609442, "grad_norm": 0.482421875, "learning_rate": 4.909048012847781e-06, "loss": 2.2607, "step": 10582 }, { "epoch": 0.5677575107296138, "grad_norm": 0.396484375, "learning_rate": 4.9090247911394154e-06, "loss": 2.0651, "step": 10583 }, { "epoch": 0.5678111587982833, "grad_norm": 0.474609375, "learning_rate": 4.909001566521902e-06, "loss": 2.2247, "step": 10584 }, { "epoch": 0.5678648068669528, "grad_norm": 0.5234375, "learning_rate": 4.908978338995269e-06, "loss": 2.0491, "step": 10585 }, { "epoch": 0.5679184549356223, "grad_norm": 0.345703125, "learning_rate": 4.908955108559544e-06, "loss": 2.1376, "step": 10586 }, { "epoch": 0.5679721030042918, "grad_norm": 0.427734375, "learning_rate": 4.908931875214757e-06, "loss": 2.1962, "step": 10587 }, { "epoch": 0.5680257510729614, "grad_norm": 0.486328125, "learning_rate": 4.908908638960934e-06, "loss": 2.4419, "step": 10588 }, { "epoch": 0.5680793991416309, "grad_norm": 1.3203125, "learning_rate": 4.908885399798105e-06, "loss": 2.1888, "step": 10589 }, { "epoch": 0.5681330472103004, "grad_norm": 0.375, "learning_rate": 4.908862157726296e-06, "loss": 2.3526, "step": 10590 }, { "epoch": 0.5681866952789699, "grad_norm": 0.458984375, "learning_rate": 4.908838912745536e-06, "loss": 2.2932, "step": 10591 }, { "epoch": 0.5682403433476395, "grad_norm": 0.443359375, "learning_rate": 4.9088156648558535e-06, "loss": 2.4873, "step": 10592 }, { "epoch": 0.568293991416309, "grad_norm": 0.462890625, "learning_rate": 4.9087924140572765e-06, "loss": 2.4434, "step": 10593 }, { "epoch": 0.5683476394849786, "grad_norm": 0.40234375, "learning_rate": 4.9087691603498325e-06, "loss": 2.2894, "step": 10594 }, { "epoch": 0.568401287553648, "grad_norm": 0.43359375, "learning_rate": 4.9087459037335495e-06, "loss": 2.2818, "step": 10595 }, { "epoch": 0.5684549356223176, "grad_norm": 0.453125, "learning_rate": 4.908722644208455e-06, "loss": 2.3495, "step": 10596 }, { "epoch": 0.5685085836909871, "grad_norm": 0.55859375, "learning_rate": 4.90869938177458e-06, "loss": 2.4559, "step": 10597 }, { "epoch": 0.5685622317596567, "grad_norm": 0.6015625, "learning_rate": 4.90867611643195e-06, "loss": 2.1457, "step": 10598 }, { "epoch": 0.5686158798283262, "grad_norm": 0.43359375, "learning_rate": 4.908652848180593e-06, "loss": 1.9307, "step": 10599 }, { "epoch": 0.5686695278969958, "grad_norm": 0.400390625, "learning_rate": 4.908629577020538e-06, "loss": 2.2086, "step": 10600 }, { "epoch": 0.5687231759656652, "grad_norm": 0.494140625, "learning_rate": 4.9086063029518136e-06, "loss": 2.7626, "step": 10601 }, { "epoch": 0.5687768240343347, "grad_norm": 0.55078125, "learning_rate": 4.908583025974447e-06, "loss": 2.164, "step": 10602 }, { "epoch": 0.5688304721030043, "grad_norm": 0.5078125, "learning_rate": 4.908559746088466e-06, "loss": 2.3347, "step": 10603 }, { "epoch": 0.5688841201716738, "grad_norm": 0.4375, "learning_rate": 4.9085364632939e-06, "loss": 2.0469, "step": 10604 }, { "epoch": 0.5689377682403434, "grad_norm": 0.37890625, "learning_rate": 4.908513177590775e-06, "loss": 2.1044, "step": 10605 }, { "epoch": 0.5689914163090128, "grad_norm": 0.43359375, "learning_rate": 4.908489888979121e-06, "loss": 2.3568, "step": 10606 }, { "epoch": 0.5690450643776824, "grad_norm": 0.5078125, "learning_rate": 4.908466597458966e-06, "loss": 2.3465, "step": 10607 }, { "epoch": 0.5690987124463519, "grad_norm": 0.4453125, "learning_rate": 4.908443303030338e-06, "loss": 2.4606, "step": 10608 }, { "epoch": 0.5691523605150215, "grad_norm": 0.44921875, "learning_rate": 4.908420005693264e-06, "loss": 2.2641, "step": 10609 }, { "epoch": 0.569206008583691, "grad_norm": 0.90625, "learning_rate": 4.9083967054477735e-06, "loss": 1.5524, "step": 10610 }, { "epoch": 0.5692596566523606, "grad_norm": 0.47265625, "learning_rate": 4.9083734022938935e-06, "loss": 2.0027, "step": 10611 }, { "epoch": 0.56931330472103, "grad_norm": 0.74609375, "learning_rate": 4.908350096231653e-06, "loss": 2.2217, "step": 10612 }, { "epoch": 0.5693669527896996, "grad_norm": 0.396484375, "learning_rate": 4.908326787261081e-06, "loss": 2.127, "step": 10613 }, { "epoch": 0.5694206008583691, "grad_norm": 0.462890625, "learning_rate": 4.908303475382203e-06, "loss": 2.4292, "step": 10614 }, { "epoch": 0.5694742489270386, "grad_norm": 0.365234375, "learning_rate": 4.90828016059505e-06, "loss": 1.9942, "step": 10615 }, { "epoch": 0.5695278969957082, "grad_norm": 0.4453125, "learning_rate": 4.908256842899648e-06, "loss": 2.3087, "step": 10616 }, { "epoch": 0.5695815450643776, "grad_norm": 0.4921875, "learning_rate": 4.908233522296026e-06, "loss": 2.165, "step": 10617 }, { "epoch": 0.5696351931330472, "grad_norm": 0.41015625, "learning_rate": 4.9082101987842125e-06, "loss": 2.2823, "step": 10618 }, { "epoch": 0.5696888412017167, "grad_norm": 0.443359375, "learning_rate": 4.908186872364236e-06, "loss": 2.37, "step": 10619 }, { "epoch": 0.5697424892703863, "grad_norm": 0.404296875, "learning_rate": 4.908163543036122e-06, "loss": 2.1622, "step": 10620 }, { "epoch": 0.5697961373390558, "grad_norm": 0.427734375, "learning_rate": 4.908140210799902e-06, "loss": 2.2022, "step": 10621 }, { "epoch": 0.5698497854077254, "grad_norm": 0.455078125, "learning_rate": 4.908116875655603e-06, "loss": 2.1902, "step": 10622 }, { "epoch": 0.5699034334763948, "grad_norm": 0.5078125, "learning_rate": 4.908093537603253e-06, "loss": 2.1775, "step": 10623 }, { "epoch": 0.5699570815450644, "grad_norm": 0.76171875, "learning_rate": 4.90807019664288e-06, "loss": 2.0874, "step": 10624 }, { "epoch": 0.5700107296137339, "grad_norm": 0.455078125, "learning_rate": 4.908046852774513e-06, "loss": 2.3897, "step": 10625 }, { "epoch": 0.5700643776824035, "grad_norm": 0.423828125, "learning_rate": 4.908023505998179e-06, "loss": 2.2153, "step": 10626 }, { "epoch": 0.570118025751073, "grad_norm": 0.3828125, "learning_rate": 4.908000156313907e-06, "loss": 2.2288, "step": 10627 }, { "epoch": 0.5701716738197425, "grad_norm": 0.5234375, "learning_rate": 4.907976803721724e-06, "loss": 2.3038, "step": 10628 }, { "epoch": 0.570225321888412, "grad_norm": 0.466796875, "learning_rate": 4.90795344822166e-06, "loss": 2.5818, "step": 10629 }, { "epoch": 0.5702789699570815, "grad_norm": 0.447265625, "learning_rate": 4.907930089813743e-06, "loss": 2.5694, "step": 10630 }, { "epoch": 0.5703326180257511, "grad_norm": 0.408203125, "learning_rate": 4.907906728497999e-06, "loss": 2.2223, "step": 10631 }, { "epoch": 0.5703862660944206, "grad_norm": 0.43359375, "learning_rate": 4.907883364274459e-06, "loss": 2.4767, "step": 10632 }, { "epoch": 0.5704399141630901, "grad_norm": 0.427734375, "learning_rate": 4.90785999714315e-06, "loss": 2.4375, "step": 10633 }, { "epoch": 0.5704935622317596, "grad_norm": 0.37890625, "learning_rate": 4.9078366271041e-06, "loss": 2.1005, "step": 10634 }, { "epoch": 0.5705472103004292, "grad_norm": 0.466796875, "learning_rate": 4.907813254157338e-06, "loss": 2.3029, "step": 10635 }, { "epoch": 0.5706008583690987, "grad_norm": 0.45703125, "learning_rate": 4.907789878302891e-06, "loss": 2.2528, "step": 10636 }, { "epoch": 0.5706545064377683, "grad_norm": 0.390625, "learning_rate": 4.907766499540788e-06, "loss": 2.1434, "step": 10637 }, { "epoch": 0.5707081545064377, "grad_norm": 0.4765625, "learning_rate": 4.907743117871057e-06, "loss": 2.4177, "step": 10638 }, { "epoch": 0.5707618025751073, "grad_norm": 0.55859375, "learning_rate": 4.907719733293727e-06, "loss": 2.5035, "step": 10639 }, { "epoch": 0.5708154506437768, "grad_norm": 0.515625, "learning_rate": 4.907696345808825e-06, "loss": 2.3966, "step": 10640 }, { "epoch": 0.5708690987124464, "grad_norm": 0.5078125, "learning_rate": 4.90767295541638e-06, "loss": 2.369, "step": 10641 }, { "epoch": 0.5709227467811159, "grad_norm": 1.1640625, "learning_rate": 4.907649562116421e-06, "loss": 2.3461, "step": 10642 }, { "epoch": 0.5709763948497855, "grad_norm": 0.494140625, "learning_rate": 4.907626165908975e-06, "loss": 2.2556, "step": 10643 }, { "epoch": 0.5710300429184549, "grad_norm": 0.486328125, "learning_rate": 4.90760276679407e-06, "loss": 2.3767, "step": 10644 }, { "epoch": 0.5710836909871244, "grad_norm": 0.408203125, "learning_rate": 4.9075793647717355e-06, "loss": 2.4805, "step": 10645 }, { "epoch": 0.571137339055794, "grad_norm": 0.44140625, "learning_rate": 4.9075559598419984e-06, "loss": 2.4589, "step": 10646 }, { "epoch": 0.5711909871244635, "grad_norm": 0.46875, "learning_rate": 4.907532552004888e-06, "loss": 2.2771, "step": 10647 }, { "epoch": 0.5712446351931331, "grad_norm": 0.44140625, "learning_rate": 4.907509141260432e-06, "loss": 2.5204, "step": 10648 }, { "epoch": 0.5712982832618025, "grad_norm": 0.5390625, "learning_rate": 4.90748572760866e-06, "loss": 2.5269, "step": 10649 }, { "epoch": 0.5713519313304721, "grad_norm": 1.96875, "learning_rate": 4.907462311049598e-06, "loss": 2.1695, "step": 10650 }, { "epoch": 0.5714055793991416, "grad_norm": 0.49609375, "learning_rate": 4.907438891583276e-06, "loss": 2.1606, "step": 10651 }, { "epoch": 0.5714592274678112, "grad_norm": 0.453125, "learning_rate": 4.907415469209722e-06, "loss": 2.0841, "step": 10652 }, { "epoch": 0.5715128755364807, "grad_norm": 0.490234375, "learning_rate": 4.907392043928964e-06, "loss": 1.8168, "step": 10653 }, { "epoch": 0.5715665236051503, "grad_norm": 0.3984375, "learning_rate": 4.90736861574103e-06, "loss": 2.386, "step": 10654 }, { "epoch": 0.5716201716738197, "grad_norm": 0.4140625, "learning_rate": 4.907345184645949e-06, "loss": 2.1701, "step": 10655 }, { "epoch": 0.5716738197424893, "grad_norm": 0.419921875, "learning_rate": 4.907321750643748e-06, "loss": 2.091, "step": 10656 }, { "epoch": 0.5717274678111588, "grad_norm": 0.365234375, "learning_rate": 4.907298313734458e-06, "loss": 2.1297, "step": 10657 }, { "epoch": 0.5717811158798283, "grad_norm": 0.6796875, "learning_rate": 4.907274873918104e-06, "loss": 2.3923, "step": 10658 }, { "epoch": 0.5718347639484979, "grad_norm": 0.40234375, "learning_rate": 4.907251431194717e-06, "loss": 2.2941, "step": 10659 }, { "epoch": 0.5718884120171673, "grad_norm": 0.53125, "learning_rate": 4.907227985564323e-06, "loss": 2.2555, "step": 10660 }, { "epoch": 0.5719420600858369, "grad_norm": 0.421875, "learning_rate": 4.907204537026952e-06, "loss": 2.2768, "step": 10661 }, { "epoch": 0.5719957081545064, "grad_norm": 0.58203125, "learning_rate": 4.907181085582632e-06, "loss": 2.319, "step": 10662 }, { "epoch": 0.572049356223176, "grad_norm": 0.431640625, "learning_rate": 4.90715763123139e-06, "loss": 2.4497, "step": 10663 }, { "epoch": 0.5721030042918455, "grad_norm": 0.53125, "learning_rate": 4.907134173973257e-06, "loss": 2.4019, "step": 10664 }, { "epoch": 0.572156652360515, "grad_norm": 0.462890625, "learning_rate": 4.907110713808259e-06, "loss": 2.1833, "step": 10665 }, { "epoch": 0.5722103004291845, "grad_norm": 0.423828125, "learning_rate": 4.907087250736425e-06, "loss": 2.4105, "step": 10666 }, { "epoch": 0.5722639484978541, "grad_norm": 0.5078125, "learning_rate": 4.907063784757784e-06, "loss": 1.8279, "step": 10667 }, { "epoch": 0.5723175965665236, "grad_norm": 0.392578125, "learning_rate": 4.907040315872363e-06, "loss": 2.1554, "step": 10668 }, { "epoch": 0.5723712446351932, "grad_norm": 0.734375, "learning_rate": 4.9070168440801915e-06, "loss": 2.2253, "step": 10669 }, { "epoch": 0.5724248927038627, "grad_norm": 0.44921875, "learning_rate": 4.906993369381297e-06, "loss": 2.3039, "step": 10670 }, { "epoch": 0.5724785407725322, "grad_norm": 0.5, "learning_rate": 4.906969891775709e-06, "loss": 2.3564, "step": 10671 }, { "epoch": 0.5725321888412017, "grad_norm": 0.396484375, "learning_rate": 4.906946411263455e-06, "loss": 2.3342, "step": 10672 }, { "epoch": 0.5725858369098712, "grad_norm": 0.46484375, "learning_rate": 4.906922927844563e-06, "loss": 1.9816, "step": 10673 }, { "epoch": 0.5726394849785408, "grad_norm": 0.41796875, "learning_rate": 4.906899441519062e-06, "loss": 2.261, "step": 10674 }, { "epoch": 0.5726931330472103, "grad_norm": 0.412109375, "learning_rate": 4.90687595228698e-06, "loss": 2.2582, "step": 10675 }, { "epoch": 0.5727467811158798, "grad_norm": 0.46484375, "learning_rate": 4.9068524601483454e-06, "loss": 2.2133, "step": 10676 }, { "epoch": 0.5728004291845493, "grad_norm": 0.423828125, "learning_rate": 4.906828965103187e-06, "loss": 2.0702, "step": 10677 }, { "epoch": 0.5728540772532189, "grad_norm": 0.63671875, "learning_rate": 4.906805467151534e-06, "loss": 2.3374, "step": 10678 }, { "epoch": 0.5729077253218884, "grad_norm": 0.443359375, "learning_rate": 4.906781966293413e-06, "loss": 2.2254, "step": 10679 }, { "epoch": 0.572961373390558, "grad_norm": 0.404296875, "learning_rate": 4.906758462528853e-06, "loss": 1.9499, "step": 10680 }, { "epoch": 0.5730150214592274, "grad_norm": 0.427734375, "learning_rate": 4.906734955857882e-06, "loss": 2.36, "step": 10681 }, { "epoch": 0.573068669527897, "grad_norm": 0.47265625, "learning_rate": 4.906711446280529e-06, "loss": 2.4593, "step": 10682 }, { "epoch": 0.5731223175965665, "grad_norm": 0.4609375, "learning_rate": 4.9066879337968235e-06, "loss": 2.4825, "step": 10683 }, { "epoch": 0.5731759656652361, "grad_norm": 0.396484375, "learning_rate": 4.906664418406791e-06, "loss": 2.1328, "step": 10684 }, { "epoch": 0.5732296137339056, "grad_norm": 0.45703125, "learning_rate": 4.906640900110462e-06, "loss": 2.4232, "step": 10685 }, { "epoch": 0.5732832618025752, "grad_norm": 0.46875, "learning_rate": 4.906617378907865e-06, "loss": 2.2592, "step": 10686 }, { "epoch": 0.5733369098712446, "grad_norm": 0.474609375, "learning_rate": 4.906593854799027e-06, "loss": 2.4493, "step": 10687 }, { "epoch": 0.5733905579399141, "grad_norm": 0.470703125, "learning_rate": 4.9065703277839775e-06, "loss": 2.4723, "step": 10688 }, { "epoch": 0.5734442060085837, "grad_norm": 0.462890625, "learning_rate": 4.906546797862745e-06, "loss": 2.4193, "step": 10689 }, { "epoch": 0.5734978540772532, "grad_norm": 0.455078125, "learning_rate": 4.906523265035358e-06, "loss": 2.2276, "step": 10690 }, { "epoch": 0.5735515021459228, "grad_norm": 0.46875, "learning_rate": 4.906499729301843e-06, "loss": 1.6999, "step": 10691 }, { "epoch": 0.5736051502145922, "grad_norm": 0.451171875, "learning_rate": 4.906476190662231e-06, "loss": 2.3372, "step": 10692 }, { "epoch": 0.5736587982832618, "grad_norm": 0.37109375, "learning_rate": 4.906452649116549e-06, "loss": 2.4262, "step": 10693 }, { "epoch": 0.5737124463519313, "grad_norm": 0.412109375, "learning_rate": 4.906429104664826e-06, "loss": 2.4349, "step": 10694 }, { "epoch": 0.5737660944206009, "grad_norm": 0.435546875, "learning_rate": 4.90640555730709e-06, "loss": 2.2145, "step": 10695 }, { "epoch": 0.5738197424892704, "grad_norm": 0.443359375, "learning_rate": 4.9063820070433696e-06, "loss": 2.5643, "step": 10696 }, { "epoch": 0.57387339055794, "grad_norm": 0.423828125, "learning_rate": 4.906358453873693e-06, "loss": 2.4786, "step": 10697 }, { "epoch": 0.5739270386266094, "grad_norm": 0.435546875, "learning_rate": 4.9063348977980895e-06, "loss": 2.3745, "step": 10698 }, { "epoch": 0.573980686695279, "grad_norm": 0.44140625, "learning_rate": 4.906311338816587e-06, "loss": 2.3879, "step": 10699 }, { "epoch": 0.5740343347639485, "grad_norm": 0.36328125, "learning_rate": 4.906287776929214e-06, "loss": 1.9527, "step": 10700 }, { "epoch": 0.574087982832618, "grad_norm": 0.423828125, "learning_rate": 4.906264212135998e-06, "loss": 2.3689, "step": 10701 }, { "epoch": 0.5741416309012876, "grad_norm": 0.494140625, "learning_rate": 4.906240644436969e-06, "loss": 2.1944, "step": 10702 }, { "epoch": 0.574195278969957, "grad_norm": 0.478515625, "learning_rate": 4.906217073832155e-06, "loss": 2.2842, "step": 10703 }, { "epoch": 0.5742489270386266, "grad_norm": 0.419921875, "learning_rate": 4.906193500321583e-06, "loss": 2.2354, "step": 10704 }, { "epoch": 0.5743025751072961, "grad_norm": 0.515625, "learning_rate": 4.906169923905284e-06, "loss": 2.4566, "step": 10705 }, { "epoch": 0.5743562231759657, "grad_norm": 0.4296875, "learning_rate": 4.9061463445832846e-06, "loss": 2.1935, "step": 10706 }, { "epoch": 0.5744098712446352, "grad_norm": 0.41796875, "learning_rate": 4.906122762355614e-06, "loss": 2.2226, "step": 10707 }, { "epoch": 0.5744635193133047, "grad_norm": 0.3984375, "learning_rate": 4.906099177222301e-06, "loss": 1.9905, "step": 10708 }, { "epoch": 0.5745171673819742, "grad_norm": 0.6328125, "learning_rate": 4.906075589183373e-06, "loss": 2.2852, "step": 10709 }, { "epoch": 0.5745708154506438, "grad_norm": 0.41796875, "learning_rate": 4.906051998238859e-06, "loss": 2.0785, "step": 10710 }, { "epoch": 0.5746244635193133, "grad_norm": 0.451171875, "learning_rate": 4.9060284043887875e-06, "loss": 2.3405, "step": 10711 }, { "epoch": 0.5746781115879829, "grad_norm": 0.498046875, "learning_rate": 4.906004807633188e-06, "loss": 2.064, "step": 10712 }, { "epoch": 0.5747317596566524, "grad_norm": 0.35546875, "learning_rate": 4.905981207972087e-06, "loss": 1.9928, "step": 10713 }, { "epoch": 0.5747854077253219, "grad_norm": 0.373046875, "learning_rate": 4.905957605405515e-06, "loss": 2.2475, "step": 10714 }, { "epoch": 0.5748390557939914, "grad_norm": 0.43359375, "learning_rate": 4.905933999933499e-06, "loss": 2.2739, "step": 10715 }, { "epoch": 0.5748927038626609, "grad_norm": 0.474609375, "learning_rate": 4.905910391556068e-06, "loss": 2.2861, "step": 10716 }, { "epoch": 0.5749463519313305, "grad_norm": 0.546875, "learning_rate": 4.905886780273251e-06, "loss": 2.6108, "step": 10717 }, { "epoch": 0.575, "grad_norm": 0.48828125, "learning_rate": 4.905863166085076e-06, "loss": 2.3283, "step": 10718 }, { "epoch": 0.5750536480686695, "grad_norm": 0.43359375, "learning_rate": 4.905839548991572e-06, "loss": 2.2395, "step": 10719 }, { "epoch": 0.575107296137339, "grad_norm": 0.43359375, "learning_rate": 4.9058159289927665e-06, "loss": 2.1468, "step": 10720 }, { "epoch": 0.5751609442060086, "grad_norm": 0.44921875, "learning_rate": 4.905792306088689e-06, "loss": 2.3303, "step": 10721 }, { "epoch": 0.5752145922746781, "grad_norm": 0.439453125, "learning_rate": 4.905768680279367e-06, "loss": 2.3979, "step": 10722 }, { "epoch": 0.5752682403433477, "grad_norm": 0.3828125, "learning_rate": 4.905745051564831e-06, "loss": 2.2033, "step": 10723 }, { "epoch": 0.5753218884120171, "grad_norm": 1.3125, "learning_rate": 4.905721419945107e-06, "loss": 2.3425, "step": 10724 }, { "epoch": 0.5753755364806867, "grad_norm": 0.451171875, "learning_rate": 4.905697785420225e-06, "loss": 2.3512, "step": 10725 }, { "epoch": 0.5754291845493562, "grad_norm": 0.453125, "learning_rate": 4.905674147990214e-06, "loss": 2.1507, "step": 10726 }, { "epoch": 0.5754828326180258, "grad_norm": 0.474609375, "learning_rate": 4.905650507655102e-06, "loss": 2.3602, "step": 10727 }, { "epoch": 0.5755364806866953, "grad_norm": 0.408203125, "learning_rate": 4.905626864414917e-06, "loss": 2.2381, "step": 10728 }, { "epoch": 0.5755901287553649, "grad_norm": 0.4296875, "learning_rate": 4.905603218269687e-06, "loss": 2.2413, "step": 10729 }, { "epoch": 0.5756437768240343, "grad_norm": 0.328125, "learning_rate": 4.905579569219443e-06, "loss": 2.2403, "step": 10730 }, { "epoch": 0.5756974248927038, "grad_norm": 0.4140625, "learning_rate": 4.905555917264212e-06, "loss": 2.3157, "step": 10731 }, { "epoch": 0.5757510729613734, "grad_norm": 0.419921875, "learning_rate": 4.905532262404022e-06, "loss": 2.1599, "step": 10732 }, { "epoch": 0.5758047210300429, "grad_norm": 0.400390625, "learning_rate": 4.905508604638903e-06, "loss": 2.2635, "step": 10733 }, { "epoch": 0.5758583690987125, "grad_norm": 0.4921875, "learning_rate": 4.905484943968882e-06, "loss": 2.3447, "step": 10734 }, { "epoch": 0.5759120171673819, "grad_norm": 0.43359375, "learning_rate": 4.905461280393988e-06, "loss": 2.0003, "step": 10735 }, { "epoch": 0.5759656652360515, "grad_norm": 0.419921875, "learning_rate": 4.905437613914251e-06, "loss": 1.9872, "step": 10736 }, { "epoch": 0.576019313304721, "grad_norm": 0.34765625, "learning_rate": 4.905413944529698e-06, "loss": 2.0643, "step": 10737 }, { "epoch": 0.5760729613733906, "grad_norm": 0.453125, "learning_rate": 4.905390272240359e-06, "loss": 2.2045, "step": 10738 }, { "epoch": 0.5761266094420601, "grad_norm": 0.98046875, "learning_rate": 4.90536659704626e-06, "loss": 2.1081, "step": 10739 }, { "epoch": 0.5761802575107297, "grad_norm": 0.443359375, "learning_rate": 4.905342918947433e-06, "loss": 2.2029, "step": 10740 }, { "epoch": 0.5762339055793991, "grad_norm": 0.47265625, "learning_rate": 4.905319237943904e-06, "loss": 2.388, "step": 10741 }, { "epoch": 0.5762875536480687, "grad_norm": 0.451171875, "learning_rate": 4.9052955540357025e-06, "loss": 2.3128, "step": 10742 }, { "epoch": 0.5763412017167382, "grad_norm": 0.470703125, "learning_rate": 4.905271867222857e-06, "loss": 1.9276, "step": 10743 }, { "epoch": 0.5763948497854077, "grad_norm": 0.4609375, "learning_rate": 4.905248177505396e-06, "loss": 2.4647, "step": 10744 }, { "epoch": 0.5764484978540773, "grad_norm": 0.59765625, "learning_rate": 4.905224484883349e-06, "loss": 2.5024, "step": 10745 }, { "epoch": 0.5765021459227467, "grad_norm": 0.72265625, "learning_rate": 4.9052007893567435e-06, "loss": 2.3646, "step": 10746 }, { "epoch": 0.5765557939914163, "grad_norm": 0.421875, "learning_rate": 4.9051770909256086e-06, "loss": 2.3713, "step": 10747 }, { "epoch": 0.5766094420600858, "grad_norm": 0.447265625, "learning_rate": 4.905153389589973e-06, "loss": 2.3224, "step": 10748 }, { "epoch": 0.5766630901287554, "grad_norm": 0.423828125, "learning_rate": 4.905129685349865e-06, "loss": 2.1986, "step": 10749 }, { "epoch": 0.5767167381974249, "grad_norm": 0.50390625, "learning_rate": 4.9051059782053125e-06, "loss": 2.4695, "step": 10750 }, { "epoch": 0.5767703862660944, "grad_norm": 0.51171875, "learning_rate": 4.905082268156346e-06, "loss": 2.2972, "step": 10751 }, { "epoch": 0.5768240343347639, "grad_norm": 0.66015625, "learning_rate": 4.9050585552029935e-06, "loss": 1.9357, "step": 10752 }, { "epoch": 0.5768776824034335, "grad_norm": 0.609375, "learning_rate": 4.905034839345282e-06, "loss": 2.3585, "step": 10753 }, { "epoch": 0.576931330472103, "grad_norm": 0.6875, "learning_rate": 4.905011120583243e-06, "loss": 2.3726, "step": 10754 }, { "epoch": 0.5769849785407726, "grad_norm": 0.4453125, "learning_rate": 4.904987398916902e-06, "loss": 1.9493, "step": 10755 }, { "epoch": 0.577038626609442, "grad_norm": 0.44921875, "learning_rate": 4.9049636743462906e-06, "loss": 2.2261, "step": 10756 }, { "epoch": 0.5770922746781116, "grad_norm": 0.427734375, "learning_rate": 4.904939946871435e-06, "loss": 2.4055, "step": 10757 }, { "epoch": 0.5771459227467811, "grad_norm": 0.408203125, "learning_rate": 4.904916216492366e-06, "loss": 2.5306, "step": 10758 }, { "epoch": 0.5771995708154506, "grad_norm": 0.7109375, "learning_rate": 4.90489248320911e-06, "loss": 2.3503, "step": 10759 }, { "epoch": 0.5772532188841202, "grad_norm": 0.58984375, "learning_rate": 4.904868747021697e-06, "loss": 2.2025, "step": 10760 }, { "epoch": 0.5773068669527897, "grad_norm": 0.474609375, "learning_rate": 4.904845007930156e-06, "loss": 2.2571, "step": 10761 }, { "epoch": 0.5773605150214592, "grad_norm": 0.453125, "learning_rate": 4.904821265934514e-06, "loss": 2.4115, "step": 10762 }, { "epoch": 0.5774141630901287, "grad_norm": 0.4375, "learning_rate": 4.904797521034802e-06, "loss": 2.2893, "step": 10763 }, { "epoch": 0.5774678111587983, "grad_norm": 0.431640625, "learning_rate": 4.904773773231047e-06, "loss": 2.1654, "step": 10764 }, { "epoch": 0.5775214592274678, "grad_norm": 0.47265625, "learning_rate": 4.9047500225232785e-06, "loss": 2.2506, "step": 10765 }, { "epoch": 0.5775751072961374, "grad_norm": 0.44140625, "learning_rate": 4.904726268911525e-06, "loss": 2.2015, "step": 10766 }, { "epoch": 0.5776287553648068, "grad_norm": 0.419921875, "learning_rate": 4.904702512395815e-06, "loss": 2.2151, "step": 10767 }, { "epoch": 0.5776824034334764, "grad_norm": 0.56640625, "learning_rate": 4.9046787529761765e-06, "loss": 2.1298, "step": 10768 }, { "epoch": 0.5777360515021459, "grad_norm": 0.3984375, "learning_rate": 4.90465499065264e-06, "loss": 2.2135, "step": 10769 }, { "epoch": 0.5777896995708155, "grad_norm": 0.62109375, "learning_rate": 4.904631225425232e-06, "loss": 2.3677, "step": 10770 }, { "epoch": 0.577843347639485, "grad_norm": 0.408203125, "learning_rate": 4.904607457293983e-06, "loss": 2.4468, "step": 10771 }, { "epoch": 0.5778969957081546, "grad_norm": 0.416015625, "learning_rate": 4.904583686258921e-06, "loss": 2.2333, "step": 10772 }, { "epoch": 0.577950643776824, "grad_norm": 0.44140625, "learning_rate": 4.904559912320075e-06, "loss": 2.5236, "step": 10773 }, { "epoch": 0.5780042918454935, "grad_norm": 0.65625, "learning_rate": 4.904536135477473e-06, "loss": 2.1617, "step": 10774 }, { "epoch": 0.5780579399141631, "grad_norm": 0.44140625, "learning_rate": 4.904512355731144e-06, "loss": 2.3369, "step": 10775 }, { "epoch": 0.5781115879828326, "grad_norm": 0.42578125, "learning_rate": 4.904488573081118e-06, "loss": 2.2694, "step": 10776 }, { "epoch": 0.5781652360515022, "grad_norm": 0.43359375, "learning_rate": 4.904464787527421e-06, "loss": 2.0846, "step": 10777 }, { "epoch": 0.5782188841201716, "grad_norm": 0.4609375, "learning_rate": 4.904440999070084e-06, "loss": 2.2614, "step": 10778 }, { "epoch": 0.5782725321888412, "grad_norm": 0.349609375, "learning_rate": 4.904417207709135e-06, "loss": 1.8824, "step": 10779 }, { "epoch": 0.5783261802575107, "grad_norm": 0.4375, "learning_rate": 4.904393413444602e-06, "loss": 2.2626, "step": 10780 }, { "epoch": 0.5783798283261803, "grad_norm": 0.498046875, "learning_rate": 4.904369616276516e-06, "loss": 2.2516, "step": 10781 }, { "epoch": 0.5784334763948498, "grad_norm": 0.423828125, "learning_rate": 4.904345816204904e-06, "loss": 2.2924, "step": 10782 }, { "epoch": 0.5784871244635194, "grad_norm": 0.412109375, "learning_rate": 4.904322013229794e-06, "loss": 2.4628, "step": 10783 }, { "epoch": 0.5785407725321888, "grad_norm": 0.470703125, "learning_rate": 4.904298207351217e-06, "loss": 2.3914, "step": 10784 }, { "epoch": 0.5785944206008584, "grad_norm": 0.4765625, "learning_rate": 4.9042743985692e-06, "loss": 2.2709, "step": 10785 }, { "epoch": 0.5786480686695279, "grad_norm": 0.39453125, "learning_rate": 4.904250586883771e-06, "loss": 2.3129, "step": 10786 }, { "epoch": 0.5787017167381975, "grad_norm": 0.50390625, "learning_rate": 4.904226772294961e-06, "loss": 2.0917, "step": 10787 }, { "epoch": 0.578755364806867, "grad_norm": 0.40234375, "learning_rate": 4.904202954802798e-06, "loss": 2.2708, "step": 10788 }, { "epoch": 0.5788090128755364, "grad_norm": 0.5078125, "learning_rate": 4.90417913440731e-06, "loss": 2.2078, "step": 10789 }, { "epoch": 0.578862660944206, "grad_norm": 0.37109375, "learning_rate": 4.904155311108527e-06, "loss": 2.0708, "step": 10790 }, { "epoch": 0.5789163090128755, "grad_norm": 0.455078125, "learning_rate": 4.904131484906476e-06, "loss": 2.3444, "step": 10791 }, { "epoch": 0.5789699570815451, "grad_norm": 0.74609375, "learning_rate": 4.904107655801188e-06, "loss": 1.8166, "step": 10792 }, { "epoch": 0.5790236051502146, "grad_norm": 0.439453125, "learning_rate": 4.9040838237926895e-06, "loss": 1.5023, "step": 10793 }, { "epoch": 0.5790772532188841, "grad_norm": 0.451171875, "learning_rate": 4.90405998888101e-06, "loss": 2.3187, "step": 10794 }, { "epoch": 0.5791309012875536, "grad_norm": 0.51953125, "learning_rate": 4.90403615106618e-06, "loss": 2.567, "step": 10795 }, { "epoch": 0.5791845493562232, "grad_norm": 0.4375, "learning_rate": 4.904012310348226e-06, "loss": 2.3475, "step": 10796 }, { "epoch": 0.5792381974248927, "grad_norm": 0.79296875, "learning_rate": 4.903988466727179e-06, "loss": 2.5253, "step": 10797 }, { "epoch": 0.5792918454935623, "grad_norm": 0.6328125, "learning_rate": 4.903964620203065e-06, "loss": 2.3051, "step": 10798 }, { "epoch": 0.5793454935622318, "grad_norm": 0.55859375, "learning_rate": 4.903940770775914e-06, "loss": 2.3939, "step": 10799 }, { "epoch": 0.5793991416309013, "grad_norm": 0.44921875, "learning_rate": 4.9039169184457565e-06, "loss": 2.2457, "step": 10800 }, { "epoch": 0.5794527896995708, "grad_norm": 3.78125, "learning_rate": 4.90389306321262e-06, "loss": 2.1461, "step": 10801 }, { "epoch": 0.5795064377682403, "grad_norm": 0.625, "learning_rate": 4.9038692050765326e-06, "loss": 2.3597, "step": 10802 }, { "epoch": 0.5795600858369099, "grad_norm": 0.3828125, "learning_rate": 4.903845344037523e-06, "loss": 2.4078, "step": 10803 }, { "epoch": 0.5796137339055794, "grad_norm": 0.423828125, "learning_rate": 4.903821480095623e-06, "loss": 2.1619, "step": 10804 }, { "epoch": 0.5796673819742489, "grad_norm": 1.375, "learning_rate": 4.9037976132508565e-06, "loss": 2.3271, "step": 10805 }, { "epoch": 0.5797210300429184, "grad_norm": 0.458984375, "learning_rate": 4.903773743503257e-06, "loss": 2.5284, "step": 10806 }, { "epoch": 0.579774678111588, "grad_norm": 0.4296875, "learning_rate": 4.90374987085285e-06, "loss": 2.22, "step": 10807 }, { "epoch": 0.5798283261802575, "grad_norm": 0.4453125, "learning_rate": 4.903725995299666e-06, "loss": 2.2866, "step": 10808 }, { "epoch": 0.5798819742489271, "grad_norm": 0.71484375, "learning_rate": 4.9037021168437335e-06, "loss": 2.1974, "step": 10809 }, { "epoch": 0.5799356223175965, "grad_norm": 0.455078125, "learning_rate": 4.903678235485082e-06, "loss": 2.2356, "step": 10810 }, { "epoch": 0.5799892703862661, "grad_norm": 0.439453125, "learning_rate": 4.903654351223739e-06, "loss": 2.4389, "step": 10811 }, { "epoch": 0.5800429184549356, "grad_norm": 0.44140625, "learning_rate": 4.903630464059734e-06, "loss": 2.4483, "step": 10812 }, { "epoch": 0.5800965665236052, "grad_norm": 0.44140625, "learning_rate": 4.903606573993096e-06, "loss": 2.3049, "step": 10813 }, { "epoch": 0.5801502145922747, "grad_norm": 0.470703125, "learning_rate": 4.903582681023854e-06, "loss": 2.4691, "step": 10814 }, { "epoch": 0.5802038626609443, "grad_norm": 0.435546875, "learning_rate": 4.903558785152036e-06, "loss": 2.3541, "step": 10815 }, { "epoch": 0.5802575107296137, "grad_norm": 0.412109375, "learning_rate": 4.903534886377672e-06, "loss": 2.3449, "step": 10816 }, { "epoch": 0.5803111587982832, "grad_norm": 0.37890625, "learning_rate": 4.90351098470079e-06, "loss": 2.0583, "step": 10817 }, { "epoch": 0.5803648068669528, "grad_norm": 0.5, "learning_rate": 4.903487080121418e-06, "loss": 2.5684, "step": 10818 }, { "epoch": 0.5804184549356223, "grad_norm": 0.439453125, "learning_rate": 4.903463172639587e-06, "loss": 2.2119, "step": 10819 }, { "epoch": 0.5804721030042919, "grad_norm": 1.0, "learning_rate": 4.903439262255325e-06, "loss": 2.3796, "step": 10820 }, { "epoch": 0.5805257510729613, "grad_norm": 0.484375, "learning_rate": 4.9034153489686615e-06, "loss": 2.3573, "step": 10821 }, { "epoch": 0.5805793991416309, "grad_norm": 0.421875, "learning_rate": 4.903391432779624e-06, "loss": 2.4347, "step": 10822 }, { "epoch": 0.5806330472103004, "grad_norm": 0.41015625, "learning_rate": 4.903367513688241e-06, "loss": 2.2293, "step": 10823 }, { "epoch": 0.58068669527897, "grad_norm": 0.408203125, "learning_rate": 4.903343591694544e-06, "loss": 2.1768, "step": 10824 }, { "epoch": 0.5807403433476395, "grad_norm": 0.470703125, "learning_rate": 4.903319666798559e-06, "loss": 2.5361, "step": 10825 }, { "epoch": 0.580793991416309, "grad_norm": 0.48046875, "learning_rate": 4.903295739000317e-06, "loss": 2.1624, "step": 10826 }, { "epoch": 0.5808476394849785, "grad_norm": 0.41015625, "learning_rate": 4.903271808299846e-06, "loss": 2.4739, "step": 10827 }, { "epoch": 0.5809012875536481, "grad_norm": 0.6640625, "learning_rate": 4.903247874697173e-06, "loss": 2.3846, "step": 10828 }, { "epoch": 0.5809549356223176, "grad_norm": 0.486328125, "learning_rate": 4.903223938192331e-06, "loss": 2.3363, "step": 10829 }, { "epoch": 0.5810085836909872, "grad_norm": 0.453125, "learning_rate": 4.903199998785346e-06, "loss": 2.2443, "step": 10830 }, { "epoch": 0.5810622317596567, "grad_norm": 0.369140625, "learning_rate": 4.903176056476248e-06, "loss": 1.9817, "step": 10831 }, { "epoch": 0.5811158798283261, "grad_norm": 0.482421875, "learning_rate": 4.903152111265066e-06, "loss": 2.4969, "step": 10832 }, { "epoch": 0.5811695278969957, "grad_norm": 0.43359375, "learning_rate": 4.903128163151828e-06, "loss": 1.9815, "step": 10833 }, { "epoch": 0.5812231759656652, "grad_norm": 0.6015625, "learning_rate": 4.903104212136563e-06, "loss": 2.3594, "step": 10834 }, { "epoch": 0.5812768240343348, "grad_norm": 0.51171875, "learning_rate": 4.9030802582193e-06, "loss": 2.4288, "step": 10835 }, { "epoch": 0.5813304721030043, "grad_norm": 0.48046875, "learning_rate": 4.90305630140007e-06, "loss": 2.4065, "step": 10836 }, { "epoch": 0.5813841201716738, "grad_norm": 0.458984375, "learning_rate": 4.903032341678899e-06, "loss": 2.3228, "step": 10837 }, { "epoch": 0.5814377682403433, "grad_norm": 0.42578125, "learning_rate": 4.903008379055817e-06, "loss": 2.4811, "step": 10838 }, { "epoch": 0.5814914163090129, "grad_norm": 0.4765625, "learning_rate": 4.902984413530854e-06, "loss": 2.3773, "step": 10839 }, { "epoch": 0.5815450643776824, "grad_norm": 0.515625, "learning_rate": 4.902960445104037e-06, "loss": 2.2857, "step": 10840 }, { "epoch": 0.581598712446352, "grad_norm": 0.46484375, "learning_rate": 4.9029364737753966e-06, "loss": 2.5032, "step": 10841 }, { "epoch": 0.5816523605150214, "grad_norm": 0.486328125, "learning_rate": 4.9029124995449605e-06, "loss": 2.3989, "step": 10842 }, { "epoch": 0.581706008583691, "grad_norm": 0.484375, "learning_rate": 4.902888522412759e-06, "loss": 2.2889, "step": 10843 }, { "epoch": 0.5817596566523605, "grad_norm": 0.490234375, "learning_rate": 4.90286454237882e-06, "loss": 2.3216, "step": 10844 }, { "epoch": 0.58181330472103, "grad_norm": 0.453125, "learning_rate": 4.902840559443173e-06, "loss": 2.5005, "step": 10845 }, { "epoch": 0.5818669527896996, "grad_norm": 0.76953125, "learning_rate": 4.902816573605846e-06, "loss": 2.4111, "step": 10846 }, { "epoch": 0.581920600858369, "grad_norm": 0.44921875, "learning_rate": 4.90279258486687e-06, "loss": 2.3621, "step": 10847 }, { "epoch": 0.5819742489270386, "grad_norm": 0.412109375, "learning_rate": 4.902768593226271e-06, "loss": 2.2649, "step": 10848 }, { "epoch": 0.5820278969957081, "grad_norm": 0.474609375, "learning_rate": 4.902744598684081e-06, "loss": 2.3906, "step": 10849 }, { "epoch": 0.5820815450643777, "grad_norm": 6.25, "learning_rate": 4.902720601240327e-06, "loss": 2.2333, "step": 10850 }, { "epoch": 0.5821351931330472, "grad_norm": 0.51953125, "learning_rate": 4.9026966008950386e-06, "loss": 2.383, "step": 10851 }, { "epoch": 0.5821888412017168, "grad_norm": 0.5234375, "learning_rate": 4.902672597648245e-06, "loss": 2.6143, "step": 10852 }, { "epoch": 0.5822424892703862, "grad_norm": 0.609375, "learning_rate": 4.902648591499975e-06, "loss": 2.365, "step": 10853 }, { "epoch": 0.5822961373390558, "grad_norm": 0.396484375, "learning_rate": 4.902624582450257e-06, "loss": 2.1109, "step": 10854 }, { "epoch": 0.5823497854077253, "grad_norm": 0.443359375, "learning_rate": 4.902600570499121e-06, "loss": 2.5076, "step": 10855 }, { "epoch": 0.5824034334763949, "grad_norm": 0.470703125, "learning_rate": 4.902576555646596e-06, "loss": 2.1775, "step": 10856 }, { "epoch": 0.5824570815450644, "grad_norm": 0.4765625, "learning_rate": 4.90255253789271e-06, "loss": 2.4687, "step": 10857 }, { "epoch": 0.582510729613734, "grad_norm": 0.427734375, "learning_rate": 4.902528517237493e-06, "loss": 2.1666, "step": 10858 }, { "epoch": 0.5825643776824034, "grad_norm": 0.46484375, "learning_rate": 4.902504493680973e-06, "loss": 2.3346, "step": 10859 }, { "epoch": 0.5826180257510729, "grad_norm": 0.439453125, "learning_rate": 4.902480467223179e-06, "loss": 2.3274, "step": 10860 }, { "epoch": 0.5826716738197425, "grad_norm": 0.4453125, "learning_rate": 4.902456437864142e-06, "loss": 2.1652, "step": 10861 }, { "epoch": 0.582725321888412, "grad_norm": 0.46484375, "learning_rate": 4.902432405603888e-06, "loss": 2.3199, "step": 10862 }, { "epoch": 0.5827789699570816, "grad_norm": 0.51171875, "learning_rate": 4.90240837044245e-06, "loss": 2.4267, "step": 10863 }, { "epoch": 0.582832618025751, "grad_norm": 0.4765625, "learning_rate": 4.902384332379853e-06, "loss": 2.2087, "step": 10864 }, { "epoch": 0.5828862660944206, "grad_norm": 0.447265625, "learning_rate": 4.902360291416127e-06, "loss": 2.1823, "step": 10865 }, { "epoch": 0.5829399141630901, "grad_norm": 0.455078125, "learning_rate": 4.902336247551303e-06, "loss": 2.349, "step": 10866 }, { "epoch": 0.5829935622317597, "grad_norm": 0.470703125, "learning_rate": 4.902312200785409e-06, "loss": 2.2725, "step": 10867 }, { "epoch": 0.5830472103004292, "grad_norm": 0.54296875, "learning_rate": 4.902288151118473e-06, "loss": 2.2137, "step": 10868 }, { "epoch": 0.5831008583690988, "grad_norm": 0.41015625, "learning_rate": 4.9022640985505245e-06, "loss": 2.34, "step": 10869 }, { "epoch": 0.5831545064377682, "grad_norm": 0.462890625, "learning_rate": 4.902240043081594e-06, "loss": 2.405, "step": 10870 }, { "epoch": 0.5832081545064378, "grad_norm": 0.361328125, "learning_rate": 4.902215984711708e-06, "loss": 2.3188, "step": 10871 }, { "epoch": 0.5832618025751073, "grad_norm": 0.42578125, "learning_rate": 4.9021919234408975e-06, "loss": 2.1659, "step": 10872 }, { "epoch": 0.5833154506437769, "grad_norm": 0.431640625, "learning_rate": 4.902167859269192e-06, "loss": 2.3551, "step": 10873 }, { "epoch": 0.5833690987124464, "grad_norm": 0.76953125, "learning_rate": 4.902143792196619e-06, "loss": 2.0556, "step": 10874 }, { "epoch": 0.5834227467811158, "grad_norm": 0.6484375, "learning_rate": 4.902119722223208e-06, "loss": 2.2296, "step": 10875 }, { "epoch": 0.5834763948497854, "grad_norm": 0.349609375, "learning_rate": 4.9020956493489876e-06, "loss": 2.275, "step": 10876 }, { "epoch": 0.5835300429184549, "grad_norm": 0.5703125, "learning_rate": 4.902071573573989e-06, "loss": 2.2179, "step": 10877 }, { "epoch": 0.5835836909871245, "grad_norm": 0.423828125, "learning_rate": 4.902047494898238e-06, "loss": 1.6634, "step": 10878 }, { "epoch": 0.583637339055794, "grad_norm": 0.423828125, "learning_rate": 4.902023413321767e-06, "loss": 2.1859, "step": 10879 }, { "epoch": 0.5836909871244635, "grad_norm": 0.400390625, "learning_rate": 4.901999328844602e-06, "loss": 2.3707, "step": 10880 }, { "epoch": 0.583744635193133, "grad_norm": 0.99609375, "learning_rate": 4.9019752414667746e-06, "loss": 2.244, "step": 10881 }, { "epoch": 0.5837982832618026, "grad_norm": 0.51171875, "learning_rate": 4.901951151188313e-06, "loss": 2.3305, "step": 10882 }, { "epoch": 0.5838519313304721, "grad_norm": 0.3671875, "learning_rate": 4.901927058009246e-06, "loss": 2.4097, "step": 10883 }, { "epoch": 0.5839055793991417, "grad_norm": 0.416015625, "learning_rate": 4.901902961929603e-06, "loss": 2.305, "step": 10884 }, { "epoch": 0.5839592274678111, "grad_norm": 0.6171875, "learning_rate": 4.901878862949413e-06, "loss": 2.5305, "step": 10885 }, { "epoch": 0.5840128755364807, "grad_norm": 0.62109375, "learning_rate": 4.901854761068705e-06, "loss": 2.3992, "step": 10886 }, { "epoch": 0.5840665236051502, "grad_norm": 0.4375, "learning_rate": 4.901830656287507e-06, "loss": 2.4524, "step": 10887 }, { "epoch": 0.5841201716738197, "grad_norm": 0.4609375, "learning_rate": 4.901806548605851e-06, "loss": 2.3755, "step": 10888 }, { "epoch": 0.5841738197424893, "grad_norm": 0.8515625, "learning_rate": 4.901782438023764e-06, "loss": 2.1859, "step": 10889 }, { "epoch": 0.5842274678111588, "grad_norm": 0.5, "learning_rate": 4.901758324541275e-06, "loss": 2.3436, "step": 10890 }, { "epoch": 0.5842811158798283, "grad_norm": 0.53125, "learning_rate": 4.901734208158414e-06, "loss": 2.5059, "step": 10891 }, { "epoch": 0.5843347639484978, "grad_norm": 0.4296875, "learning_rate": 4.90171008887521e-06, "loss": 2.0949, "step": 10892 }, { "epoch": 0.5843884120171674, "grad_norm": 0.42578125, "learning_rate": 4.9016859666916915e-06, "loss": 2.2819, "step": 10893 }, { "epoch": 0.5844420600858369, "grad_norm": 0.45703125, "learning_rate": 4.901661841607888e-06, "loss": 2.3438, "step": 10894 }, { "epoch": 0.5844957081545065, "grad_norm": 0.478515625, "learning_rate": 4.901637713623829e-06, "loss": 2.1678, "step": 10895 }, { "epoch": 0.5845493562231759, "grad_norm": 0.4453125, "learning_rate": 4.901613582739543e-06, "loss": 2.5054, "step": 10896 }, { "epoch": 0.5846030042918455, "grad_norm": 0.4140625, "learning_rate": 4.90158944895506e-06, "loss": 2.3685, "step": 10897 }, { "epoch": 0.584656652360515, "grad_norm": 0.421875, "learning_rate": 4.901565312270408e-06, "loss": 2.285, "step": 10898 }, { "epoch": 0.5847103004291846, "grad_norm": 0.37109375, "learning_rate": 4.901541172685617e-06, "loss": 2.5403, "step": 10899 }, { "epoch": 0.5847639484978541, "grad_norm": 0.4375, "learning_rate": 4.901517030200715e-06, "loss": 2.2863, "step": 10900 }, { "epoch": 0.5848175965665237, "grad_norm": 0.41015625, "learning_rate": 4.901492884815733e-06, "loss": 2.2601, "step": 10901 }, { "epoch": 0.5848712446351931, "grad_norm": 0.423828125, "learning_rate": 4.901468736530699e-06, "loss": 2.3039, "step": 10902 }, { "epoch": 0.5849248927038626, "grad_norm": 0.3828125, "learning_rate": 4.9014445853456424e-06, "loss": 2.1036, "step": 10903 }, { "epoch": 0.5849785407725322, "grad_norm": 0.42578125, "learning_rate": 4.9014204312605925e-06, "loss": 2.1816, "step": 10904 }, { "epoch": 0.5850321888412017, "grad_norm": 0.421875, "learning_rate": 4.901396274275578e-06, "loss": 2.3698, "step": 10905 }, { "epoch": 0.5850858369098713, "grad_norm": 0.3671875, "learning_rate": 4.9013721143906275e-06, "loss": 2.2403, "step": 10906 }, { "epoch": 0.5851394849785407, "grad_norm": 0.421875, "learning_rate": 4.9013479516057725e-06, "loss": 2.401, "step": 10907 }, { "epoch": 0.5851931330472103, "grad_norm": 0.55078125, "learning_rate": 4.901323785921041e-06, "loss": 1.0841, "step": 10908 }, { "epoch": 0.5852467811158798, "grad_norm": 0.494140625, "learning_rate": 4.90129961733646e-06, "loss": 2.2885, "step": 10909 }, { "epoch": 0.5853004291845494, "grad_norm": 0.494140625, "learning_rate": 4.9012754458520615e-06, "loss": 2.2457, "step": 10910 }, { "epoch": 0.5853540772532189, "grad_norm": 0.41796875, "learning_rate": 4.901251271467874e-06, "loss": 1.8118, "step": 10911 }, { "epoch": 0.5854077253218885, "grad_norm": 0.427734375, "learning_rate": 4.9012270941839264e-06, "loss": 2.0747, "step": 10912 }, { "epoch": 0.5854613733905579, "grad_norm": 0.46875, "learning_rate": 4.901202914000248e-06, "loss": 2.5035, "step": 10913 }, { "epoch": 0.5855150214592275, "grad_norm": 0.41015625, "learning_rate": 4.901178730916868e-06, "loss": 2.3184, "step": 10914 }, { "epoch": 0.585568669527897, "grad_norm": 0.484375, "learning_rate": 4.901154544933816e-06, "loss": 2.0849, "step": 10915 }, { "epoch": 0.5856223175965666, "grad_norm": 0.83984375, "learning_rate": 4.90113035605112e-06, "loss": 2.2511, "step": 10916 }, { "epoch": 0.585675965665236, "grad_norm": 0.462890625, "learning_rate": 4.9011061642688104e-06, "loss": 2.4757, "step": 10917 }, { "epoch": 0.5857296137339055, "grad_norm": 0.90234375, "learning_rate": 4.901081969586916e-06, "loss": 2.3958, "step": 10918 }, { "epoch": 0.5857832618025751, "grad_norm": 0.51171875, "learning_rate": 4.901057772005465e-06, "loss": 2.4483, "step": 10919 }, { "epoch": 0.5858369098712446, "grad_norm": 0.373046875, "learning_rate": 4.901033571524488e-06, "loss": 2.4341, "step": 10920 }, { "epoch": 0.5858905579399142, "grad_norm": 0.498046875, "learning_rate": 4.901009368144015e-06, "loss": 2.3556, "step": 10921 }, { "epoch": 0.5859442060085837, "grad_norm": 0.54296875, "learning_rate": 4.900985161864074e-06, "loss": 2.3273, "step": 10922 }, { "epoch": 0.5859978540772532, "grad_norm": 0.41015625, "learning_rate": 4.900960952684693e-06, "loss": 2.1867, "step": 10923 }, { "epoch": 0.5860515021459227, "grad_norm": 0.59375, "learning_rate": 4.900936740605904e-06, "loss": 2.3185, "step": 10924 }, { "epoch": 0.5861051502145923, "grad_norm": 1.03125, "learning_rate": 4.900912525627733e-06, "loss": 2.2588, "step": 10925 }, { "epoch": 0.5861587982832618, "grad_norm": 0.384765625, "learning_rate": 4.900888307750212e-06, "loss": 2.326, "step": 10926 }, { "epoch": 0.5862124463519314, "grad_norm": 0.423828125, "learning_rate": 4.90086408697337e-06, "loss": 2.2302, "step": 10927 }, { "epoch": 0.5862660944206008, "grad_norm": 0.435546875, "learning_rate": 4.900839863297235e-06, "loss": 2.3283, "step": 10928 }, { "epoch": 0.5863197424892704, "grad_norm": 0.412109375, "learning_rate": 4.9008156367218365e-06, "loss": 2.2599, "step": 10929 }, { "epoch": 0.5863733905579399, "grad_norm": 0.419921875, "learning_rate": 4.9007914072472045e-06, "loss": 2.3509, "step": 10930 }, { "epoch": 0.5864270386266094, "grad_norm": 0.44921875, "learning_rate": 4.9007671748733675e-06, "loss": 2.2455, "step": 10931 }, { "epoch": 0.586480686695279, "grad_norm": 0.494140625, "learning_rate": 4.9007429396003545e-06, "loss": 1.883, "step": 10932 }, { "epoch": 0.5865343347639485, "grad_norm": 0.44921875, "learning_rate": 4.900718701428197e-06, "loss": 2.3666, "step": 10933 }, { "epoch": 0.586587982832618, "grad_norm": 0.41015625, "learning_rate": 4.90069446035692e-06, "loss": 2.5014, "step": 10934 }, { "epoch": 0.5866416309012875, "grad_norm": 0.435546875, "learning_rate": 4.900670216386559e-06, "loss": 2.106, "step": 10935 }, { "epoch": 0.5866952789699571, "grad_norm": 0.494140625, "learning_rate": 4.900645969517137e-06, "loss": 2.5251, "step": 10936 }, { "epoch": 0.5867489270386266, "grad_norm": 0.458984375, "learning_rate": 4.900621719748686e-06, "loss": 2.4989, "step": 10937 }, { "epoch": 0.5868025751072962, "grad_norm": 0.3828125, "learning_rate": 4.900597467081236e-06, "loss": 1.9875, "step": 10938 }, { "epoch": 0.5868562231759656, "grad_norm": 0.333984375, "learning_rate": 4.900573211514815e-06, "loss": 2.1266, "step": 10939 }, { "epoch": 0.5869098712446352, "grad_norm": 0.55859375, "learning_rate": 4.900548953049453e-06, "loss": 2.4424, "step": 10940 }, { "epoch": 0.5869635193133047, "grad_norm": 0.376953125, "learning_rate": 4.900524691685179e-06, "loss": 2.0319, "step": 10941 }, { "epoch": 0.5870171673819743, "grad_norm": 0.392578125, "learning_rate": 4.900500427422022e-06, "loss": 2.0924, "step": 10942 }, { "epoch": 0.5870708154506438, "grad_norm": 0.43359375, "learning_rate": 4.900476160260013e-06, "loss": 2.5116, "step": 10943 }, { "epoch": 0.5871244635193134, "grad_norm": 0.42578125, "learning_rate": 4.900451890199179e-06, "loss": 2.4147, "step": 10944 }, { "epoch": 0.5871781115879828, "grad_norm": 0.494140625, "learning_rate": 4.9004276172395505e-06, "loss": 2.3367, "step": 10945 }, { "epoch": 0.5872317596566523, "grad_norm": 1.703125, "learning_rate": 4.900403341381156e-06, "loss": 2.3716, "step": 10946 }, { "epoch": 0.5872854077253219, "grad_norm": 0.443359375, "learning_rate": 4.900379062624026e-06, "loss": 2.291, "step": 10947 }, { "epoch": 0.5873390557939914, "grad_norm": 0.53125, "learning_rate": 4.9003547809681896e-06, "loss": 2.36, "step": 10948 }, { "epoch": 0.587392703862661, "grad_norm": 0.5234375, "learning_rate": 4.900330496413676e-06, "loss": 2.3737, "step": 10949 }, { "epoch": 0.5874463519313304, "grad_norm": 0.447265625, "learning_rate": 4.900306208960513e-06, "loss": 2.2479, "step": 10950 }, { "epoch": 0.5875, "grad_norm": 0.421875, "learning_rate": 4.900281918608732e-06, "loss": 1.9838, "step": 10951 }, { "epoch": 0.5875536480686695, "grad_norm": 0.44140625, "learning_rate": 4.900257625358362e-06, "loss": 2.3167, "step": 10952 }, { "epoch": 0.5876072961373391, "grad_norm": 0.48828125, "learning_rate": 4.900233329209431e-06, "loss": 2.689, "step": 10953 }, { "epoch": 0.5876609442060086, "grad_norm": 0.46484375, "learning_rate": 4.90020903016197e-06, "loss": 2.4791, "step": 10954 }, { "epoch": 0.5877145922746781, "grad_norm": 0.431640625, "learning_rate": 4.900184728216007e-06, "loss": 2.4233, "step": 10955 }, { "epoch": 0.5877682403433476, "grad_norm": 0.42578125, "learning_rate": 4.900160423371572e-06, "loss": 2.0669, "step": 10956 }, { "epoch": 0.5878218884120172, "grad_norm": 0.4453125, "learning_rate": 4.900136115628694e-06, "loss": 2.4874, "step": 10957 }, { "epoch": 0.5878755364806867, "grad_norm": 0.46484375, "learning_rate": 4.900111804987403e-06, "loss": 1.6763, "step": 10958 }, { "epoch": 0.5879291845493563, "grad_norm": 0.42578125, "learning_rate": 4.9000874914477284e-06, "loss": 2.2605, "step": 10959 }, { "epoch": 0.5879828326180258, "grad_norm": 0.419921875, "learning_rate": 4.900063175009699e-06, "loss": 2.2618, "step": 10960 }, { "epoch": 0.5880364806866952, "grad_norm": 0.56640625, "learning_rate": 4.900038855673344e-06, "loss": 2.2717, "step": 10961 }, { "epoch": 0.5880901287553648, "grad_norm": 0.609375, "learning_rate": 4.900014533438693e-06, "loss": 2.2996, "step": 10962 }, { "epoch": 0.5881437768240343, "grad_norm": 0.5, "learning_rate": 4.899990208305776e-06, "loss": 2.5179, "step": 10963 }, { "epoch": 0.5881974248927039, "grad_norm": 0.44140625, "learning_rate": 4.899965880274621e-06, "loss": 2.3383, "step": 10964 }, { "epoch": 0.5882510729613734, "grad_norm": 0.34765625, "learning_rate": 4.899941549345259e-06, "loss": 2.1443, "step": 10965 }, { "epoch": 0.5883047210300429, "grad_norm": 0.51953125, "learning_rate": 4.899917215517719e-06, "loss": 1.2931, "step": 10966 }, { "epoch": 0.5883583690987124, "grad_norm": 0.451171875, "learning_rate": 4.899892878792028e-06, "loss": 2.2804, "step": 10967 }, { "epoch": 0.588412017167382, "grad_norm": 0.369140625, "learning_rate": 4.89986853916822e-06, "loss": 2.3128, "step": 10968 }, { "epoch": 0.5884656652360515, "grad_norm": 0.44921875, "learning_rate": 4.89984419664632e-06, "loss": 2.2248, "step": 10969 }, { "epoch": 0.5885193133047211, "grad_norm": 0.44921875, "learning_rate": 4.89981985122636e-06, "loss": 2.4505, "step": 10970 }, { "epoch": 0.5885729613733905, "grad_norm": 0.65234375, "learning_rate": 4.8997955029083675e-06, "loss": 2.3967, "step": 10971 }, { "epoch": 0.5886266094420601, "grad_norm": 0.46484375, "learning_rate": 4.899771151692373e-06, "loss": 2.4424, "step": 10972 }, { "epoch": 0.5886802575107296, "grad_norm": 1.3125, "learning_rate": 4.899746797578407e-06, "loss": 2.5007, "step": 10973 }, { "epoch": 0.5887339055793992, "grad_norm": 1.8203125, "learning_rate": 4.8997224405664964e-06, "loss": 2.2184, "step": 10974 }, { "epoch": 0.5887875536480687, "grad_norm": 0.4609375, "learning_rate": 4.899698080656674e-06, "loss": 2.3554, "step": 10975 }, { "epoch": 0.5888412017167381, "grad_norm": 0.423828125, "learning_rate": 4.899673717848965e-06, "loss": 2.0799, "step": 10976 }, { "epoch": 0.5888948497854077, "grad_norm": 0.419921875, "learning_rate": 4.899649352143403e-06, "loss": 2.0408, "step": 10977 }, { "epoch": 0.5889484978540772, "grad_norm": 0.470703125, "learning_rate": 4.899624983540014e-06, "loss": 2.5514, "step": 10978 }, { "epoch": 0.5890021459227468, "grad_norm": 0.427734375, "learning_rate": 4.8996006120388294e-06, "loss": 2.3744, "step": 10979 }, { "epoch": 0.5890557939914163, "grad_norm": 0.443359375, "learning_rate": 4.899576237639878e-06, "loss": 2.1057, "step": 10980 }, { "epoch": 0.5891094420600859, "grad_norm": 0.4765625, "learning_rate": 4.899551860343189e-06, "loss": 2.4161, "step": 10981 }, { "epoch": 0.5891630901287553, "grad_norm": 0.5, "learning_rate": 4.899527480148792e-06, "loss": 2.4724, "step": 10982 }, { "epoch": 0.5892167381974249, "grad_norm": 0.396484375, "learning_rate": 4.899503097056718e-06, "loss": 1.9584, "step": 10983 }, { "epoch": 0.5892703862660944, "grad_norm": 0.4140625, "learning_rate": 4.899478711066994e-06, "loss": 2.343, "step": 10984 }, { "epoch": 0.589324034334764, "grad_norm": 0.40625, "learning_rate": 4.899454322179651e-06, "loss": 2.3631, "step": 10985 }, { "epoch": 0.5893776824034335, "grad_norm": 0.5, "learning_rate": 4.899429930394718e-06, "loss": 2.2326, "step": 10986 }, { "epoch": 0.589431330472103, "grad_norm": 0.9609375, "learning_rate": 4.899405535712224e-06, "loss": 2.1626, "step": 10987 }, { "epoch": 0.5894849785407725, "grad_norm": 0.423828125, "learning_rate": 4.899381138132199e-06, "loss": 2.3556, "step": 10988 }, { "epoch": 0.589538626609442, "grad_norm": 0.43359375, "learning_rate": 4.899356737654673e-06, "loss": 2.0862, "step": 10989 }, { "epoch": 0.5895922746781116, "grad_norm": 0.453125, "learning_rate": 4.899332334279674e-06, "loss": 2.5002, "step": 10990 }, { "epoch": 0.5896459227467811, "grad_norm": 0.546875, "learning_rate": 4.899307928007232e-06, "loss": 2.4418, "step": 10991 }, { "epoch": 0.5896995708154507, "grad_norm": 0.427734375, "learning_rate": 4.899283518837377e-06, "loss": 2.3417, "step": 10992 }, { "epoch": 0.5897532188841201, "grad_norm": 0.416015625, "learning_rate": 4.899259106770139e-06, "loss": 2.2565, "step": 10993 }, { "epoch": 0.5898068669527897, "grad_norm": 0.462890625, "learning_rate": 4.8992346918055455e-06, "loss": 2.3296, "step": 10994 }, { "epoch": 0.5898605150214592, "grad_norm": 0.54296875, "learning_rate": 4.8992102739436285e-06, "loss": 2.3005, "step": 10995 }, { "epoch": 0.5899141630901288, "grad_norm": 0.4375, "learning_rate": 4.899185853184415e-06, "loss": 2.3078, "step": 10996 }, { "epoch": 0.5899678111587983, "grad_norm": 0.4765625, "learning_rate": 4.899161429527936e-06, "loss": 2.1035, "step": 10997 }, { "epoch": 0.5900214592274678, "grad_norm": 0.66796875, "learning_rate": 4.899137002974221e-06, "loss": 2.5889, "step": 10998 }, { "epoch": 0.5900751072961373, "grad_norm": 0.474609375, "learning_rate": 4.8991125735232995e-06, "loss": 2.6797, "step": 10999 }, { "epoch": 0.5901287553648069, "grad_norm": 0.421875, "learning_rate": 4.8990881411752e-06, "loss": 2.3291, "step": 11000 }, { "epoch": 0.5901824034334764, "grad_norm": 0.64453125, "learning_rate": 4.899063705929953e-06, "loss": 2.1417, "step": 11001 }, { "epoch": 0.590236051502146, "grad_norm": 0.52734375, "learning_rate": 4.899039267787588e-06, "loss": 2.3259, "step": 11002 }, { "epoch": 0.5902896995708155, "grad_norm": 0.3984375, "learning_rate": 4.899014826748134e-06, "loss": 2.2117, "step": 11003 }, { "epoch": 0.5903433476394849, "grad_norm": 0.49609375, "learning_rate": 4.89899038281162e-06, "loss": 2.2252, "step": 11004 }, { "epoch": 0.5903969957081545, "grad_norm": 0.408203125, "learning_rate": 4.898965935978076e-06, "loss": 2.2278, "step": 11005 }, { "epoch": 0.590450643776824, "grad_norm": 0.39453125, "learning_rate": 4.898941486247533e-06, "loss": 2.355, "step": 11006 }, { "epoch": 0.5905042918454936, "grad_norm": 0.44140625, "learning_rate": 4.898917033620019e-06, "loss": 2.2332, "step": 11007 }, { "epoch": 0.590557939914163, "grad_norm": 0.466796875, "learning_rate": 4.898892578095563e-06, "loss": 1.9416, "step": 11008 }, { "epoch": 0.5906115879828326, "grad_norm": 0.453125, "learning_rate": 4.898868119674196e-06, "loss": 2.3184, "step": 11009 }, { "epoch": 0.5906652360515021, "grad_norm": 0.44921875, "learning_rate": 4.898843658355947e-06, "loss": 2.2417, "step": 11010 }, { "epoch": 0.5907188841201717, "grad_norm": 0.44921875, "learning_rate": 4.898819194140845e-06, "loss": 1.7658, "step": 11011 }, { "epoch": 0.5907725321888412, "grad_norm": 0.5625, "learning_rate": 4.898794727028921e-06, "loss": 2.4206, "step": 11012 }, { "epoch": 0.5908261802575108, "grad_norm": 0.4140625, "learning_rate": 4.898770257020201e-06, "loss": 2.1721, "step": 11013 }, { "epoch": 0.5908798283261802, "grad_norm": 0.4375, "learning_rate": 4.898745784114719e-06, "loss": 2.2398, "step": 11014 }, { "epoch": 0.5909334763948498, "grad_norm": 0.4609375, "learning_rate": 4.898721308312503e-06, "loss": 2.4761, "step": 11015 }, { "epoch": 0.5909871244635193, "grad_norm": 0.447265625, "learning_rate": 4.898696829613581e-06, "loss": 2.2362, "step": 11016 }, { "epoch": 0.5910407725321889, "grad_norm": 0.4453125, "learning_rate": 4.8986723480179845e-06, "loss": 2.4892, "step": 11017 }, { "epoch": 0.5910944206008584, "grad_norm": 0.43359375, "learning_rate": 4.898647863525742e-06, "loss": 2.3713, "step": 11018 }, { "epoch": 0.5911480686695278, "grad_norm": 0.37890625, "learning_rate": 4.898623376136883e-06, "loss": 2.2131, "step": 11019 }, { "epoch": 0.5912017167381974, "grad_norm": 0.4375, "learning_rate": 4.898598885851437e-06, "loss": 2.152, "step": 11020 }, { "epoch": 0.5912553648068669, "grad_norm": 0.40625, "learning_rate": 4.898574392669435e-06, "loss": 2.2892, "step": 11021 }, { "epoch": 0.5913090128755365, "grad_norm": 0.47265625, "learning_rate": 4.898549896590905e-06, "loss": 2.2938, "step": 11022 }, { "epoch": 0.591362660944206, "grad_norm": 0.40625, "learning_rate": 4.898525397615877e-06, "loss": 2.5107, "step": 11023 }, { "epoch": 0.5914163090128756, "grad_norm": 0.361328125, "learning_rate": 4.898500895744381e-06, "loss": 2.282, "step": 11024 }, { "epoch": 0.591469957081545, "grad_norm": 1.5546875, "learning_rate": 4.898476390976447e-06, "loss": 2.257, "step": 11025 }, { "epoch": 0.5915236051502146, "grad_norm": 0.392578125, "learning_rate": 4.898451883312103e-06, "loss": 2.2444, "step": 11026 }, { "epoch": 0.5915772532188841, "grad_norm": 0.50390625, "learning_rate": 4.89842737275138e-06, "loss": 2.4575, "step": 11027 }, { "epoch": 0.5916309012875537, "grad_norm": 0.41796875, "learning_rate": 4.898402859294307e-06, "loss": 2.3446, "step": 11028 }, { "epoch": 0.5916845493562232, "grad_norm": 0.396484375, "learning_rate": 4.898378342940914e-06, "loss": 2.5588, "step": 11029 }, { "epoch": 0.5917381974248928, "grad_norm": 0.44140625, "learning_rate": 4.89835382369123e-06, "loss": 2.3513, "step": 11030 }, { "epoch": 0.5917918454935622, "grad_norm": 0.75, "learning_rate": 4.898329301545285e-06, "loss": 2.412, "step": 11031 }, { "epoch": 0.5918454935622317, "grad_norm": 0.369140625, "learning_rate": 4.8983047765031085e-06, "loss": 2.1419, "step": 11032 }, { "epoch": 0.5918991416309013, "grad_norm": 0.498046875, "learning_rate": 4.898280248564731e-06, "loss": 2.3432, "step": 11033 }, { "epoch": 0.5919527896995708, "grad_norm": 0.3984375, "learning_rate": 4.89825571773018e-06, "loss": 2.321, "step": 11034 }, { "epoch": 0.5920064377682404, "grad_norm": 0.462890625, "learning_rate": 4.898231183999487e-06, "loss": 2.3956, "step": 11035 }, { "epoch": 0.5920600858369098, "grad_norm": 0.478515625, "learning_rate": 4.898206647372681e-06, "loss": 2.4456, "step": 11036 }, { "epoch": 0.5921137339055794, "grad_norm": 0.4140625, "learning_rate": 4.898182107849791e-06, "loss": 2.3531, "step": 11037 }, { "epoch": 0.5921673819742489, "grad_norm": 0.494140625, "learning_rate": 4.898157565430848e-06, "loss": 2.115, "step": 11038 }, { "epoch": 0.5922210300429185, "grad_norm": 0.470703125, "learning_rate": 4.8981330201158805e-06, "loss": 2.2907, "step": 11039 }, { "epoch": 0.592274678111588, "grad_norm": 0.388671875, "learning_rate": 4.89810847190492e-06, "loss": 2.0687, "step": 11040 }, { "epoch": 0.5923283261802575, "grad_norm": 0.48046875, "learning_rate": 4.898083920797993e-06, "loss": 1.9469, "step": 11041 }, { "epoch": 0.592381974248927, "grad_norm": 0.43359375, "learning_rate": 4.898059366795132e-06, "loss": 2.3471, "step": 11042 }, { "epoch": 0.5924356223175966, "grad_norm": 0.431640625, "learning_rate": 4.898034809896365e-06, "loss": 2.34, "step": 11043 }, { "epoch": 0.5924892703862661, "grad_norm": 0.9296875, "learning_rate": 4.898010250101722e-06, "loss": 2.4549, "step": 11044 }, { "epoch": 0.5925429184549357, "grad_norm": 0.482421875, "learning_rate": 4.8979856874112334e-06, "loss": 1.8809, "step": 11045 }, { "epoch": 0.5925965665236052, "grad_norm": 0.4765625, "learning_rate": 4.897961121824927e-06, "loss": 2.3006, "step": 11046 }, { "epoch": 0.5926502145922746, "grad_norm": 0.4921875, "learning_rate": 4.897936553342835e-06, "loss": 2.4099, "step": 11047 }, { "epoch": 0.5927038626609442, "grad_norm": 0.447265625, "learning_rate": 4.897911981964986e-06, "loss": 2.5023, "step": 11048 }, { "epoch": 0.5927575107296137, "grad_norm": 0.453125, "learning_rate": 4.897887407691408e-06, "loss": 2.4582, "step": 11049 }, { "epoch": 0.5928111587982833, "grad_norm": 0.45703125, "learning_rate": 4.897862830522133e-06, "loss": 2.3364, "step": 11050 }, { "epoch": 0.5928648068669528, "grad_norm": 0.369140625, "learning_rate": 4.897838250457191e-06, "loss": 2.0391, "step": 11051 }, { "epoch": 0.5929184549356223, "grad_norm": 0.38671875, "learning_rate": 4.897813667496609e-06, "loss": 2.175, "step": 11052 }, { "epoch": 0.5929721030042918, "grad_norm": 0.416015625, "learning_rate": 4.897789081640419e-06, "loss": 2.2579, "step": 11053 }, { "epoch": 0.5930257510729614, "grad_norm": 0.451171875, "learning_rate": 4.8977644928886505e-06, "loss": 2.3753, "step": 11054 }, { "epoch": 0.5930793991416309, "grad_norm": 0.4921875, "learning_rate": 4.897739901241331e-06, "loss": 2.3342, "step": 11055 }, { "epoch": 0.5931330472103005, "grad_norm": 0.466796875, "learning_rate": 4.897715306698493e-06, "loss": 2.2662, "step": 11056 }, { "epoch": 0.5931866952789699, "grad_norm": 0.421875, "learning_rate": 4.897690709260164e-06, "loss": 2.2789, "step": 11057 }, { "epoch": 0.5932403433476395, "grad_norm": 0.35546875, "learning_rate": 4.897666108926376e-06, "loss": 2.0801, "step": 11058 }, { "epoch": 0.593293991416309, "grad_norm": 0.443359375, "learning_rate": 4.897641505697157e-06, "loss": 2.2711, "step": 11059 }, { "epoch": 0.5933476394849786, "grad_norm": 0.431640625, "learning_rate": 4.8976168995725374e-06, "loss": 2.4076, "step": 11060 }, { "epoch": 0.5934012875536481, "grad_norm": 0.44140625, "learning_rate": 4.897592290552545e-06, "loss": 2.2625, "step": 11061 }, { "epoch": 0.5934549356223175, "grad_norm": 0.5, "learning_rate": 4.897567678637213e-06, "loss": 2.5009, "step": 11062 }, { "epoch": 0.5935085836909871, "grad_norm": 0.5078125, "learning_rate": 4.897543063826569e-06, "loss": 2.3784, "step": 11063 }, { "epoch": 0.5935622317596566, "grad_norm": 0.486328125, "learning_rate": 4.897518446120642e-06, "loss": 2.0313, "step": 11064 }, { "epoch": 0.5936158798283262, "grad_norm": 0.439453125, "learning_rate": 4.897493825519463e-06, "loss": 2.2931, "step": 11065 }, { "epoch": 0.5936695278969957, "grad_norm": 0.62890625, "learning_rate": 4.897469202023063e-06, "loss": 2.3455, "step": 11066 }, { "epoch": 0.5937231759656653, "grad_norm": 0.466796875, "learning_rate": 4.8974445756314685e-06, "loss": 2.1597, "step": 11067 }, { "epoch": 0.5937768240343347, "grad_norm": 0.451171875, "learning_rate": 4.897419946344711e-06, "loss": 2.2541, "step": 11068 }, { "epoch": 0.5938304721030043, "grad_norm": 0.408203125, "learning_rate": 4.897395314162821e-06, "loss": 2.2881, "step": 11069 }, { "epoch": 0.5938841201716738, "grad_norm": 0.46484375, "learning_rate": 4.897370679085828e-06, "loss": 2.3663, "step": 11070 }, { "epoch": 0.5939377682403434, "grad_norm": 0.46484375, "learning_rate": 4.89734604111376e-06, "loss": 2.2416, "step": 11071 }, { "epoch": 0.5939914163090129, "grad_norm": 0.404296875, "learning_rate": 4.897321400246649e-06, "loss": 2.3732, "step": 11072 }, { "epoch": 0.5940450643776825, "grad_norm": 0.5703125, "learning_rate": 4.897296756484522e-06, "loss": 2.2114, "step": 11073 }, { "epoch": 0.5940987124463519, "grad_norm": 0.3359375, "learning_rate": 4.8972721098274115e-06, "loss": 2.0546, "step": 11074 }, { "epoch": 0.5941523605150214, "grad_norm": 0.46484375, "learning_rate": 4.897247460275346e-06, "loss": 2.3405, "step": 11075 }, { "epoch": 0.594206008583691, "grad_norm": 0.43359375, "learning_rate": 4.897222807828356e-06, "loss": 2.4531, "step": 11076 }, { "epoch": 0.5942596566523605, "grad_norm": 0.427734375, "learning_rate": 4.89719815248647e-06, "loss": 2.2618, "step": 11077 }, { "epoch": 0.5943133047210301, "grad_norm": 0.52734375, "learning_rate": 4.897173494249719e-06, "loss": 2.3391, "step": 11078 }, { "epoch": 0.5943669527896995, "grad_norm": 0.421875, "learning_rate": 4.897148833118133e-06, "loss": 2.5782, "step": 11079 }, { "epoch": 0.5944206008583691, "grad_norm": 0.4140625, "learning_rate": 4.89712416909174e-06, "loss": 2.4893, "step": 11080 }, { "epoch": 0.5944742489270386, "grad_norm": 0.439453125, "learning_rate": 4.897099502170571e-06, "loss": 1.4792, "step": 11081 }, { "epoch": 0.5945278969957082, "grad_norm": 0.423828125, "learning_rate": 4.8970748323546555e-06, "loss": 2.4827, "step": 11082 }, { "epoch": 0.5945815450643777, "grad_norm": 0.435546875, "learning_rate": 4.897050159644024e-06, "loss": 2.0886, "step": 11083 }, { "epoch": 0.5946351931330472, "grad_norm": 0.4453125, "learning_rate": 4.897025484038706e-06, "loss": 2.3112, "step": 11084 }, { "epoch": 0.5946888412017167, "grad_norm": 0.359375, "learning_rate": 4.89700080553873e-06, "loss": 2.0947, "step": 11085 }, { "epoch": 0.5947424892703863, "grad_norm": 0.3984375, "learning_rate": 4.896976124144127e-06, "loss": 2.4048, "step": 11086 }, { "epoch": 0.5947961373390558, "grad_norm": 0.57421875, "learning_rate": 4.896951439854927e-06, "loss": 2.3467, "step": 11087 }, { "epoch": 0.5948497854077254, "grad_norm": 0.40625, "learning_rate": 4.89692675267116e-06, "loss": 2.2235, "step": 11088 }, { "epoch": 0.5949034334763948, "grad_norm": 0.482421875, "learning_rate": 4.896902062592854e-06, "loss": 2.3017, "step": 11089 }, { "epoch": 0.5949570815450643, "grad_norm": 0.435546875, "learning_rate": 4.896877369620041e-06, "loss": 2.0172, "step": 11090 }, { "epoch": 0.5950107296137339, "grad_norm": 1.265625, "learning_rate": 4.89685267375275e-06, "loss": 1.5996, "step": 11091 }, { "epoch": 0.5950643776824034, "grad_norm": 0.431640625, "learning_rate": 4.896827974991011e-06, "loss": 2.3539, "step": 11092 }, { "epoch": 0.595118025751073, "grad_norm": 0.404296875, "learning_rate": 4.896803273334852e-06, "loss": 2.1274, "step": 11093 }, { "epoch": 0.5951716738197425, "grad_norm": 0.49609375, "learning_rate": 4.896778568784306e-06, "loss": 2.3396, "step": 11094 }, { "epoch": 0.595225321888412, "grad_norm": 0.515625, "learning_rate": 4.896753861339401e-06, "loss": 2.5052, "step": 11095 }, { "epoch": 0.5952789699570815, "grad_norm": 0.416015625, "learning_rate": 4.8967291510001665e-06, "loss": 2.2696, "step": 11096 }, { "epoch": 0.5953326180257511, "grad_norm": 0.451171875, "learning_rate": 4.8967044377666325e-06, "loss": 1.8538, "step": 11097 }, { "epoch": 0.5953862660944206, "grad_norm": 0.388671875, "learning_rate": 4.896679721638831e-06, "loss": 2.2579, "step": 11098 }, { "epoch": 0.5954399141630902, "grad_norm": 0.46484375, "learning_rate": 4.896655002616788e-06, "loss": 2.5381, "step": 11099 }, { "epoch": 0.5954935622317596, "grad_norm": 0.55078125, "learning_rate": 4.896630280700537e-06, "loss": 2.2556, "step": 11100 }, { "epoch": 0.5955472103004292, "grad_norm": 0.423828125, "learning_rate": 4.896605555890105e-06, "loss": 2.1523, "step": 11101 }, { "epoch": 0.5956008583690987, "grad_norm": 0.41796875, "learning_rate": 4.896580828185524e-06, "loss": 2.2024, "step": 11102 }, { "epoch": 0.5956545064377683, "grad_norm": 0.4453125, "learning_rate": 4.896556097586823e-06, "loss": 2.2758, "step": 11103 }, { "epoch": 0.5957081545064378, "grad_norm": 0.5078125, "learning_rate": 4.896531364094031e-06, "loss": 2.1881, "step": 11104 }, { "epoch": 0.5957618025751072, "grad_norm": 0.40234375, "learning_rate": 4.89650662770718e-06, "loss": 2.3213, "step": 11105 }, { "epoch": 0.5958154506437768, "grad_norm": 0.421875, "learning_rate": 4.896481888426298e-06, "loss": 2.3119, "step": 11106 }, { "epoch": 0.5958690987124463, "grad_norm": 0.3515625, "learning_rate": 4.896457146251416e-06, "loss": 2.2785, "step": 11107 }, { "epoch": 0.5959227467811159, "grad_norm": 0.439453125, "learning_rate": 4.896432401182562e-06, "loss": 2.2833, "step": 11108 }, { "epoch": 0.5959763948497854, "grad_norm": 0.4296875, "learning_rate": 4.8964076532197694e-06, "loss": 2.2464, "step": 11109 }, { "epoch": 0.596030042918455, "grad_norm": 0.4375, "learning_rate": 4.896382902363064e-06, "loss": 1.9754, "step": 11110 }, { "epoch": 0.5960836909871244, "grad_norm": 0.421875, "learning_rate": 4.896358148612479e-06, "loss": 2.364, "step": 11111 }, { "epoch": 0.596137339055794, "grad_norm": 0.478515625, "learning_rate": 4.896333391968042e-06, "loss": 2.2628, "step": 11112 }, { "epoch": 0.5961909871244635, "grad_norm": 0.421875, "learning_rate": 4.896308632429784e-06, "loss": 2.2699, "step": 11113 }, { "epoch": 0.5962446351931331, "grad_norm": 1.1171875, "learning_rate": 4.896283869997734e-06, "loss": 2.3059, "step": 11114 }, { "epoch": 0.5962982832618026, "grad_norm": 0.404296875, "learning_rate": 4.896259104671924e-06, "loss": 2.2203, "step": 11115 }, { "epoch": 0.5963519313304722, "grad_norm": 0.470703125, "learning_rate": 4.896234336452382e-06, "loss": 1.7437, "step": 11116 }, { "epoch": 0.5964055793991416, "grad_norm": 0.6484375, "learning_rate": 4.896209565339138e-06, "loss": 2.0526, "step": 11117 }, { "epoch": 0.5964592274678111, "grad_norm": 0.42578125, "learning_rate": 4.896184791332223e-06, "loss": 2.3444, "step": 11118 }, { "epoch": 0.5965128755364807, "grad_norm": 0.47265625, "learning_rate": 4.896160014431666e-06, "loss": 2.3665, "step": 11119 }, { "epoch": 0.5965665236051502, "grad_norm": 0.41796875, "learning_rate": 4.896135234637497e-06, "loss": 2.2989, "step": 11120 }, { "epoch": 0.5966201716738198, "grad_norm": 0.482421875, "learning_rate": 4.896110451949746e-06, "loss": 2.3324, "step": 11121 }, { "epoch": 0.5966738197424892, "grad_norm": 0.38671875, "learning_rate": 4.896085666368444e-06, "loss": 1.7687, "step": 11122 }, { "epoch": 0.5967274678111588, "grad_norm": 0.455078125, "learning_rate": 4.896060877893619e-06, "loss": 2.2109, "step": 11123 }, { "epoch": 0.5967811158798283, "grad_norm": 0.43359375, "learning_rate": 4.896036086525302e-06, "loss": 2.3134, "step": 11124 }, { "epoch": 0.5968347639484979, "grad_norm": 0.404296875, "learning_rate": 4.896011292263523e-06, "loss": 2.3695, "step": 11125 }, { "epoch": 0.5968884120171674, "grad_norm": 0.478515625, "learning_rate": 4.895986495108313e-06, "loss": 2.3994, "step": 11126 }, { "epoch": 0.596942060085837, "grad_norm": 0.40234375, "learning_rate": 4.895961695059699e-06, "loss": 2.2887, "step": 11127 }, { "epoch": 0.5969957081545064, "grad_norm": 0.396484375, "learning_rate": 4.8959368921177134e-06, "loss": 2.0581, "step": 11128 }, { "epoch": 0.597049356223176, "grad_norm": 0.5, "learning_rate": 4.895912086282385e-06, "loss": 2.1444, "step": 11129 }, { "epoch": 0.5971030042918455, "grad_norm": 0.5234375, "learning_rate": 4.895887277553744e-06, "loss": 2.3117, "step": 11130 }, { "epoch": 0.5971566523605151, "grad_norm": 0.58203125, "learning_rate": 4.895862465931821e-06, "loss": 2.2634, "step": 11131 }, { "epoch": 0.5972103004291845, "grad_norm": 0.4296875, "learning_rate": 4.8958376514166454e-06, "loss": 2.222, "step": 11132 }, { "epoch": 0.597263948497854, "grad_norm": 0.734375, "learning_rate": 4.895812834008248e-06, "loss": 2.183, "step": 11133 }, { "epoch": 0.5973175965665236, "grad_norm": 0.43359375, "learning_rate": 4.895788013706657e-06, "loss": 2.3269, "step": 11134 }, { "epoch": 0.5973712446351931, "grad_norm": 0.4453125, "learning_rate": 4.895763190511904e-06, "loss": 2.3474, "step": 11135 }, { "epoch": 0.5974248927038627, "grad_norm": 0.447265625, "learning_rate": 4.895738364424018e-06, "loss": 2.5157, "step": 11136 }, { "epoch": 0.5974785407725322, "grad_norm": 1.1015625, "learning_rate": 4.89571353544303e-06, "loss": 2.2486, "step": 11137 }, { "epoch": 0.5975321888412017, "grad_norm": 0.486328125, "learning_rate": 4.895688703568968e-06, "loss": 2.3395, "step": 11138 }, { "epoch": 0.5975858369098712, "grad_norm": 0.345703125, "learning_rate": 4.895663868801865e-06, "loss": 2.2868, "step": 11139 }, { "epoch": 0.5976394849785408, "grad_norm": 0.5078125, "learning_rate": 4.8956390311417484e-06, "loss": 2.4576, "step": 11140 }, { "epoch": 0.5976931330472103, "grad_norm": 0.46484375, "learning_rate": 4.895614190588649e-06, "loss": 2.6141, "step": 11141 }, { "epoch": 0.5977467811158799, "grad_norm": 0.3984375, "learning_rate": 4.895589347142598e-06, "loss": 2.2649, "step": 11142 }, { "epoch": 0.5978004291845493, "grad_norm": 0.37890625, "learning_rate": 4.895564500803623e-06, "loss": 2.1089, "step": 11143 }, { "epoch": 0.5978540772532189, "grad_norm": 1.203125, "learning_rate": 4.8955396515717565e-06, "loss": 2.2504, "step": 11144 }, { "epoch": 0.5979077253218884, "grad_norm": 0.453125, "learning_rate": 4.895514799447027e-06, "loss": 2.3383, "step": 11145 }, { "epoch": 0.597961373390558, "grad_norm": 1.3125, "learning_rate": 4.895489944429464e-06, "loss": 2.2216, "step": 11146 }, { "epoch": 0.5980150214592275, "grad_norm": 0.451171875, "learning_rate": 4.895465086519099e-06, "loss": 2.4352, "step": 11147 }, { "epoch": 0.598068669527897, "grad_norm": 0.46875, "learning_rate": 4.895440225715962e-06, "loss": 2.3533, "step": 11148 }, { "epoch": 0.5981223175965665, "grad_norm": 0.447265625, "learning_rate": 4.895415362020081e-06, "loss": 2.2794, "step": 11149 }, { "epoch": 0.598175965665236, "grad_norm": 0.498046875, "learning_rate": 4.8953904954314884e-06, "loss": 2.3095, "step": 11150 }, { "epoch": 0.5982296137339056, "grad_norm": 0.484375, "learning_rate": 4.895365625950213e-06, "loss": 2.4953, "step": 11151 }, { "epoch": 0.5982832618025751, "grad_norm": 0.466796875, "learning_rate": 4.895340753576285e-06, "loss": 2.056, "step": 11152 }, { "epoch": 0.5983369098712447, "grad_norm": 0.478515625, "learning_rate": 4.895315878309735e-06, "loss": 2.4469, "step": 11153 }, { "epoch": 0.5983905579399141, "grad_norm": 0.484375, "learning_rate": 4.895291000150592e-06, "loss": 2.2472, "step": 11154 }, { "epoch": 0.5984442060085837, "grad_norm": 0.36328125, "learning_rate": 4.895266119098887e-06, "loss": 1.8533, "step": 11155 }, { "epoch": 0.5984978540772532, "grad_norm": 0.390625, "learning_rate": 4.8952412351546495e-06, "loss": 2.5247, "step": 11156 }, { "epoch": 0.5985515021459228, "grad_norm": 0.4765625, "learning_rate": 4.89521634831791e-06, "loss": 2.1414, "step": 11157 }, { "epoch": 0.5986051502145923, "grad_norm": 0.4375, "learning_rate": 4.8951914585886975e-06, "loss": 2.274, "step": 11158 }, { "epoch": 0.5986587982832619, "grad_norm": 0.40234375, "learning_rate": 4.895166565967043e-06, "loss": 2.1669, "step": 11159 }, { "epoch": 0.5987124463519313, "grad_norm": 0.36328125, "learning_rate": 4.895141670452978e-06, "loss": 2.2726, "step": 11160 }, { "epoch": 0.5987660944206008, "grad_norm": 0.439453125, "learning_rate": 4.895116772046529e-06, "loss": 2.1808, "step": 11161 }, { "epoch": 0.5988197424892704, "grad_norm": 0.5703125, "learning_rate": 4.895091870747729e-06, "loss": 1.9405, "step": 11162 }, { "epoch": 0.5988733905579399, "grad_norm": 0.462890625, "learning_rate": 4.895066966556606e-06, "loss": 2.3217, "step": 11163 }, { "epoch": 0.5989270386266095, "grad_norm": 0.47265625, "learning_rate": 4.895042059473192e-06, "loss": 2.1939, "step": 11164 }, { "epoch": 0.5989806866952789, "grad_norm": 0.458984375, "learning_rate": 4.895017149497517e-06, "loss": 2.4489, "step": 11165 }, { "epoch": 0.5990343347639485, "grad_norm": 0.43359375, "learning_rate": 4.894992236629609e-06, "loss": 2.4781, "step": 11166 }, { "epoch": 0.599087982832618, "grad_norm": 0.3984375, "learning_rate": 4.894967320869499e-06, "loss": 2.3382, "step": 11167 }, { "epoch": 0.5991416309012876, "grad_norm": 0.42578125, "learning_rate": 4.894942402217219e-06, "loss": 2.2221, "step": 11168 }, { "epoch": 0.5991952789699571, "grad_norm": 0.376953125, "learning_rate": 4.894917480672798e-06, "loss": 2.3964, "step": 11169 }, { "epoch": 0.5992489270386266, "grad_norm": 0.38671875, "learning_rate": 4.894892556236264e-06, "loss": 2.4411, "step": 11170 }, { "epoch": 0.5993025751072961, "grad_norm": 0.43359375, "learning_rate": 4.894867628907649e-06, "loss": 2.2089, "step": 11171 }, { "epoch": 0.5993562231759657, "grad_norm": 0.4453125, "learning_rate": 4.894842698686983e-06, "loss": 2.0028, "step": 11172 }, { "epoch": 0.5994098712446352, "grad_norm": 0.443359375, "learning_rate": 4.894817765574296e-06, "loss": 2.2447, "step": 11173 }, { "epoch": 0.5994635193133048, "grad_norm": 0.470703125, "learning_rate": 4.894792829569618e-06, "loss": 2.262, "step": 11174 }, { "epoch": 0.5995171673819742, "grad_norm": 0.45703125, "learning_rate": 4.89476789067298e-06, "loss": 2.3511, "step": 11175 }, { "epoch": 0.5995708154506437, "grad_norm": 0.482421875, "learning_rate": 4.894742948884411e-06, "loss": 2.271, "step": 11176 }, { "epoch": 0.5996244635193133, "grad_norm": 0.515625, "learning_rate": 4.894718004203941e-06, "loss": 2.3631, "step": 11177 }, { "epoch": 0.5996781115879828, "grad_norm": 0.369140625, "learning_rate": 4.894693056631601e-06, "loss": 1.9559, "step": 11178 }, { "epoch": 0.5997317596566524, "grad_norm": 0.494140625, "learning_rate": 4.89466810616742e-06, "loss": 2.2944, "step": 11179 }, { "epoch": 0.5997854077253219, "grad_norm": 0.400390625, "learning_rate": 4.89464315281143e-06, "loss": 2.1204, "step": 11180 }, { "epoch": 0.5998390557939914, "grad_norm": 0.486328125, "learning_rate": 4.894618196563659e-06, "loss": 2.0028, "step": 11181 }, { "epoch": 0.5998927038626609, "grad_norm": 0.478515625, "learning_rate": 4.894593237424139e-06, "loss": 2.2674, "step": 11182 }, { "epoch": 0.5999463519313305, "grad_norm": 0.5, "learning_rate": 4.894568275392898e-06, "loss": 2.2006, "step": 11183 }, { "epoch": 0.6, "grad_norm": 0.36328125, "learning_rate": 4.894543310469968e-06, "loss": 2.2823, "step": 11184 }, { "epoch": 0.6000536480686696, "grad_norm": 0.6328125, "learning_rate": 4.894518342655379e-06, "loss": 2.4638, "step": 11185 }, { "epoch": 0.600107296137339, "grad_norm": 0.486328125, "learning_rate": 4.89449337194916e-06, "loss": 2.4272, "step": 11186 }, { "epoch": 0.6001609442060086, "grad_norm": 0.50390625, "learning_rate": 4.894468398351342e-06, "loss": 2.1525, "step": 11187 }, { "epoch": 0.6002145922746781, "grad_norm": 1.359375, "learning_rate": 4.894443421861955e-06, "loss": 1.3858, "step": 11188 }, { "epoch": 0.6002682403433477, "grad_norm": 0.515625, "learning_rate": 4.894418442481029e-06, "loss": 2.2738, "step": 11189 }, { "epoch": 0.6003218884120172, "grad_norm": 0.423828125, "learning_rate": 4.894393460208594e-06, "loss": 2.1792, "step": 11190 }, { "epoch": 0.6003755364806866, "grad_norm": 0.455078125, "learning_rate": 4.894368475044682e-06, "loss": 2.3577, "step": 11191 }, { "epoch": 0.6004291845493562, "grad_norm": 0.53125, "learning_rate": 4.89434348698932e-06, "loss": 2.5142, "step": 11192 }, { "epoch": 0.6004828326180257, "grad_norm": 0.3984375, "learning_rate": 4.894318496042541e-06, "loss": 2.3198, "step": 11193 }, { "epoch": 0.6005364806866953, "grad_norm": 0.3984375, "learning_rate": 4.894293502204373e-06, "loss": 2.3249, "step": 11194 }, { "epoch": 0.6005901287553648, "grad_norm": 0.45703125, "learning_rate": 4.894268505474848e-06, "loss": 2.2141, "step": 11195 }, { "epoch": 0.6006437768240344, "grad_norm": 0.4140625, "learning_rate": 4.894243505853995e-06, "loss": 2.2986, "step": 11196 }, { "epoch": 0.6006974248927038, "grad_norm": 0.5, "learning_rate": 4.894218503341844e-06, "loss": 2.4275, "step": 11197 }, { "epoch": 0.6007510729613734, "grad_norm": 0.46484375, "learning_rate": 4.894193497938426e-06, "loss": 1.4809, "step": 11198 }, { "epoch": 0.6008047210300429, "grad_norm": 0.43359375, "learning_rate": 4.894168489643772e-06, "loss": 2.4346, "step": 11199 }, { "epoch": 0.6008583690987125, "grad_norm": 1.1171875, "learning_rate": 4.894143478457911e-06, "loss": 2.1724, "step": 11200 }, { "epoch": 0.600912017167382, "grad_norm": 0.375, "learning_rate": 4.894118464380873e-06, "loss": 2.0122, "step": 11201 }, { "epoch": 0.6009656652360515, "grad_norm": 0.470703125, "learning_rate": 4.894093447412688e-06, "loss": 2.3644, "step": 11202 }, { "epoch": 0.601019313304721, "grad_norm": 0.40625, "learning_rate": 4.894068427553386e-06, "loss": 2.2053, "step": 11203 }, { "epoch": 0.6010729613733906, "grad_norm": 0.703125, "learning_rate": 4.894043404803e-06, "loss": 2.1871, "step": 11204 }, { "epoch": 0.6011266094420601, "grad_norm": 0.55078125, "learning_rate": 4.894018379161558e-06, "loss": 2.5728, "step": 11205 }, { "epoch": 0.6011802575107296, "grad_norm": 0.439453125, "learning_rate": 4.8939933506290896e-06, "loss": 2.258, "step": 11206 }, { "epoch": 0.6012339055793992, "grad_norm": 0.4375, "learning_rate": 4.893968319205627e-06, "loss": 2.2607, "step": 11207 }, { "epoch": 0.6012875536480686, "grad_norm": 0.47265625, "learning_rate": 4.893943284891197e-06, "loss": 1.997, "step": 11208 }, { "epoch": 0.6013412017167382, "grad_norm": 0.453125, "learning_rate": 4.893918247685834e-06, "loss": 2.295, "step": 11209 }, { "epoch": 0.6013948497854077, "grad_norm": 0.455078125, "learning_rate": 4.8938932075895666e-06, "loss": 2.0959, "step": 11210 }, { "epoch": 0.6014484978540773, "grad_norm": 0.44921875, "learning_rate": 4.893868164602423e-06, "loss": 2.2101, "step": 11211 }, { "epoch": 0.6015021459227468, "grad_norm": 0.439453125, "learning_rate": 4.893843118724436e-06, "loss": 2.2857, "step": 11212 }, { "epoch": 0.6015557939914163, "grad_norm": 0.462890625, "learning_rate": 4.893818069955636e-06, "loss": 2.379, "step": 11213 }, { "epoch": 0.6016094420600858, "grad_norm": 0.5078125, "learning_rate": 4.893793018296051e-06, "loss": 2.3547, "step": 11214 }, { "epoch": 0.6016630901287554, "grad_norm": 0.40625, "learning_rate": 4.893767963745714e-06, "loss": 2.3785, "step": 11215 }, { "epoch": 0.6017167381974249, "grad_norm": 0.416015625, "learning_rate": 4.893742906304653e-06, "loss": 2.2548, "step": 11216 }, { "epoch": 0.6017703862660945, "grad_norm": 0.47265625, "learning_rate": 4.893717845972899e-06, "loss": 2.3196, "step": 11217 }, { "epoch": 0.601824034334764, "grad_norm": 0.5, "learning_rate": 4.893692782750484e-06, "loss": 1.6778, "step": 11218 }, { "epoch": 0.6018776824034334, "grad_norm": 0.435546875, "learning_rate": 4.893667716637434e-06, "loss": 2.2743, "step": 11219 }, { "epoch": 0.601931330472103, "grad_norm": 0.404296875, "learning_rate": 4.8936426476337825e-06, "loss": 2.1392, "step": 11220 }, { "epoch": 0.6019849785407725, "grad_norm": 0.490234375, "learning_rate": 4.8936175757395605e-06, "loss": 2.3903, "step": 11221 }, { "epoch": 0.6020386266094421, "grad_norm": 0.3984375, "learning_rate": 4.893592500954795e-06, "loss": 2.2524, "step": 11222 }, { "epoch": 0.6020922746781115, "grad_norm": 0.443359375, "learning_rate": 4.89356742327952e-06, "loss": 2.4361, "step": 11223 }, { "epoch": 0.6021459227467811, "grad_norm": 0.42578125, "learning_rate": 4.893542342713764e-06, "loss": 2.2623, "step": 11224 }, { "epoch": 0.6021995708154506, "grad_norm": 5.0625, "learning_rate": 4.893517259257556e-06, "loss": 2.2346, "step": 11225 }, { "epoch": 0.6022532188841202, "grad_norm": 0.484375, "learning_rate": 4.893492172910929e-06, "loss": 2.5183, "step": 11226 }, { "epoch": 0.6023068669527897, "grad_norm": 0.4609375, "learning_rate": 4.893467083673911e-06, "loss": 2.3623, "step": 11227 }, { "epoch": 0.6023605150214593, "grad_norm": 0.408203125, "learning_rate": 4.893441991546534e-06, "loss": 2.3038, "step": 11228 }, { "epoch": 0.6024141630901287, "grad_norm": 0.46875, "learning_rate": 4.893416896528826e-06, "loss": 2.3718, "step": 11229 }, { "epoch": 0.6024678111587983, "grad_norm": 3.0, "learning_rate": 4.89339179862082e-06, "loss": 2.3377, "step": 11230 }, { "epoch": 0.6025214592274678, "grad_norm": 0.5, "learning_rate": 4.893366697822545e-06, "loss": 2.3902, "step": 11231 }, { "epoch": 0.6025751072961374, "grad_norm": 0.359375, "learning_rate": 4.893341594134031e-06, "loss": 2.2439, "step": 11232 }, { "epoch": 0.6026287553648069, "grad_norm": 0.3828125, "learning_rate": 4.8933164875553084e-06, "loss": 1.7375, "step": 11233 }, { "epoch": 0.6026824034334763, "grad_norm": 0.408203125, "learning_rate": 4.8932913780864085e-06, "loss": 2.4737, "step": 11234 }, { "epoch": 0.6027360515021459, "grad_norm": 0.439453125, "learning_rate": 4.893266265727361e-06, "loss": 2.3512, "step": 11235 }, { "epoch": 0.6027896995708154, "grad_norm": 0.462890625, "learning_rate": 4.893241150478196e-06, "loss": 2.3059, "step": 11236 }, { "epoch": 0.602843347639485, "grad_norm": 0.431640625, "learning_rate": 4.893216032338944e-06, "loss": 2.4523, "step": 11237 }, { "epoch": 0.6028969957081545, "grad_norm": 0.46484375, "learning_rate": 4.893190911309635e-06, "loss": 2.2879, "step": 11238 }, { "epoch": 0.6029506437768241, "grad_norm": 0.41796875, "learning_rate": 4.893165787390301e-06, "loss": 2.3436, "step": 11239 }, { "epoch": 0.6030042918454935, "grad_norm": 0.50390625, "learning_rate": 4.893140660580969e-06, "loss": 2.2067, "step": 11240 }, { "epoch": 0.6030579399141631, "grad_norm": 0.43359375, "learning_rate": 4.893115530881672e-06, "loss": 2.3788, "step": 11241 }, { "epoch": 0.6031115879828326, "grad_norm": 0.4375, "learning_rate": 4.893090398292441e-06, "loss": 2.2807, "step": 11242 }, { "epoch": 0.6031652360515022, "grad_norm": 0.458984375, "learning_rate": 4.893065262813304e-06, "loss": 2.2844, "step": 11243 }, { "epoch": 0.6032188841201717, "grad_norm": 0.5625, "learning_rate": 4.893040124444292e-06, "loss": 2.0803, "step": 11244 }, { "epoch": 0.6032725321888412, "grad_norm": 0.41796875, "learning_rate": 4.893014983185437e-06, "loss": 2.238, "step": 11245 }, { "epoch": 0.6033261802575107, "grad_norm": 0.380859375, "learning_rate": 4.892989839036768e-06, "loss": 2.2833, "step": 11246 }, { "epoch": 0.6033798283261803, "grad_norm": 0.369140625, "learning_rate": 4.892964691998315e-06, "loss": 2.2223, "step": 11247 }, { "epoch": 0.6034334763948498, "grad_norm": 2.84375, "learning_rate": 4.892939542070109e-06, "loss": 2.2633, "step": 11248 }, { "epoch": 0.6034871244635193, "grad_norm": 0.462890625, "learning_rate": 4.892914389252179e-06, "loss": 2.2661, "step": 11249 }, { "epoch": 0.6035407725321889, "grad_norm": 0.55078125, "learning_rate": 4.892889233544559e-06, "loss": 2.2009, "step": 11250 }, { "epoch": 0.6035944206008583, "grad_norm": 0.384765625, "learning_rate": 4.892864074947275e-06, "loss": 2.33, "step": 11251 }, { "epoch": 0.6036480686695279, "grad_norm": 0.455078125, "learning_rate": 4.892838913460361e-06, "loss": 2.3228, "step": 11252 }, { "epoch": 0.6037017167381974, "grad_norm": 0.4375, "learning_rate": 4.892813749083844e-06, "loss": 2.2416, "step": 11253 }, { "epoch": 0.603755364806867, "grad_norm": 0.400390625, "learning_rate": 4.892788581817757e-06, "loss": 2.2575, "step": 11254 }, { "epoch": 0.6038090128755365, "grad_norm": 0.408203125, "learning_rate": 4.89276341166213e-06, "loss": 2.2346, "step": 11255 }, { "epoch": 0.603862660944206, "grad_norm": 0.5234375, "learning_rate": 4.892738238616992e-06, "loss": 2.6144, "step": 11256 }, { "epoch": 0.6039163090128755, "grad_norm": 0.419921875, "learning_rate": 4.892713062682376e-06, "loss": 2.4239, "step": 11257 }, { "epoch": 0.6039699570815451, "grad_norm": 0.45703125, "learning_rate": 4.8926878838583095e-06, "loss": 2.1495, "step": 11258 }, { "epoch": 0.6040236051502146, "grad_norm": 0.455078125, "learning_rate": 4.892662702144823e-06, "loss": 2.3261, "step": 11259 }, { "epoch": 0.6040772532188842, "grad_norm": 0.44140625, "learning_rate": 4.89263751754195e-06, "loss": 2.2756, "step": 11260 }, { "epoch": 0.6041309012875536, "grad_norm": 0.4765625, "learning_rate": 4.892612330049718e-06, "loss": 2.3862, "step": 11261 }, { "epoch": 0.6041845493562231, "grad_norm": 0.3984375, "learning_rate": 4.892587139668159e-06, "loss": 1.8072, "step": 11262 }, { "epoch": 0.6042381974248927, "grad_norm": 0.4765625, "learning_rate": 4.892561946397303e-06, "loss": 2.2908, "step": 11263 }, { "epoch": 0.6042918454935622, "grad_norm": 2.0, "learning_rate": 4.892536750237179e-06, "loss": 2.2337, "step": 11264 }, { "epoch": 0.6043454935622318, "grad_norm": 0.423828125, "learning_rate": 4.89251155118782e-06, "loss": 2.3515, "step": 11265 }, { "epoch": 0.6043991416309012, "grad_norm": 0.482421875, "learning_rate": 4.8924863492492535e-06, "loss": 2.1024, "step": 11266 }, { "epoch": 0.6044527896995708, "grad_norm": 0.52734375, "learning_rate": 4.892461144421513e-06, "loss": 2.3231, "step": 11267 }, { "epoch": 0.6045064377682403, "grad_norm": 0.4453125, "learning_rate": 4.892435936704627e-06, "loss": 2.3216, "step": 11268 }, { "epoch": 0.6045600858369099, "grad_norm": 0.56640625, "learning_rate": 4.892410726098626e-06, "loss": 2.1879, "step": 11269 }, { "epoch": 0.6046137339055794, "grad_norm": 0.50390625, "learning_rate": 4.892385512603541e-06, "loss": 2.0713, "step": 11270 }, { "epoch": 0.604667381974249, "grad_norm": 0.41796875, "learning_rate": 4.892360296219403e-06, "loss": 2.2445, "step": 11271 }, { "epoch": 0.6047210300429184, "grad_norm": 0.52734375, "learning_rate": 4.892335076946241e-06, "loss": 2.2247, "step": 11272 }, { "epoch": 0.604774678111588, "grad_norm": 0.419921875, "learning_rate": 4.892309854784087e-06, "loss": 2.2793, "step": 11273 }, { "epoch": 0.6048283261802575, "grad_norm": 0.42578125, "learning_rate": 4.892284629732969e-06, "loss": 2.2784, "step": 11274 }, { "epoch": 0.6048819742489271, "grad_norm": 0.427734375, "learning_rate": 4.89225940179292e-06, "loss": 2.1591, "step": 11275 }, { "epoch": 0.6049356223175966, "grad_norm": 0.41796875, "learning_rate": 4.89223417096397e-06, "loss": 2.36, "step": 11276 }, { "epoch": 0.604989270386266, "grad_norm": 0.4765625, "learning_rate": 4.892208937246149e-06, "loss": 2.5092, "step": 11277 }, { "epoch": 0.6050429184549356, "grad_norm": 0.451171875, "learning_rate": 4.892183700639487e-06, "loss": 2.4249, "step": 11278 }, { "epoch": 0.6050965665236051, "grad_norm": 0.443359375, "learning_rate": 4.892158461144015e-06, "loss": 2.0125, "step": 11279 }, { "epoch": 0.6051502145922747, "grad_norm": 0.96875, "learning_rate": 4.892133218759763e-06, "loss": 2.4037, "step": 11280 }, { "epoch": 0.6052038626609442, "grad_norm": 0.61328125, "learning_rate": 4.892107973486764e-06, "loss": 2.2746, "step": 11281 }, { "epoch": 0.6052575107296138, "grad_norm": 0.65625, "learning_rate": 4.892082725325045e-06, "loss": 2.3481, "step": 11282 }, { "epoch": 0.6053111587982832, "grad_norm": 0.62109375, "learning_rate": 4.8920574742746376e-06, "loss": 2.2911, "step": 11283 }, { "epoch": 0.6053648068669528, "grad_norm": 0.54296875, "learning_rate": 4.892032220335573e-06, "loss": 2.4654, "step": 11284 }, { "epoch": 0.6054184549356223, "grad_norm": 0.431640625, "learning_rate": 4.892006963507882e-06, "loss": 2.2055, "step": 11285 }, { "epoch": 0.6054721030042919, "grad_norm": 0.4765625, "learning_rate": 4.891981703791594e-06, "loss": 2.4677, "step": 11286 }, { "epoch": 0.6055257510729614, "grad_norm": 0.435546875, "learning_rate": 4.891956441186739e-06, "loss": 2.4712, "step": 11287 }, { "epoch": 0.605579399141631, "grad_norm": 0.46875, "learning_rate": 4.89193117569335e-06, "loss": 2.2703, "step": 11288 }, { "epoch": 0.6056330472103004, "grad_norm": 0.390625, "learning_rate": 4.891905907311455e-06, "loss": 2.0144, "step": 11289 }, { "epoch": 0.60568669527897, "grad_norm": 0.498046875, "learning_rate": 4.8918806360410855e-06, "loss": 2.4904, "step": 11290 }, { "epoch": 0.6057403433476395, "grad_norm": 0.44140625, "learning_rate": 4.891855361882272e-06, "loss": 2.0683, "step": 11291 }, { "epoch": 0.605793991416309, "grad_norm": 0.447265625, "learning_rate": 4.8918300848350455e-06, "loss": 2.485, "step": 11292 }, { "epoch": 0.6058476394849786, "grad_norm": 0.80078125, "learning_rate": 4.891804804899436e-06, "loss": 1.6163, "step": 11293 }, { "epoch": 0.605901287553648, "grad_norm": 0.43359375, "learning_rate": 4.8917795220754735e-06, "loss": 2.2337, "step": 11294 }, { "epoch": 0.6059549356223176, "grad_norm": 0.419921875, "learning_rate": 4.891754236363189e-06, "loss": 2.2121, "step": 11295 }, { "epoch": 0.6060085836909871, "grad_norm": 0.369140625, "learning_rate": 4.891728947762614e-06, "loss": 2.2552, "step": 11296 }, { "epoch": 0.6060622317596567, "grad_norm": 0.482421875, "learning_rate": 4.891703656273778e-06, "loss": 1.9662, "step": 11297 }, { "epoch": 0.6061158798283262, "grad_norm": 0.4765625, "learning_rate": 4.8916783618967104e-06, "loss": 2.2004, "step": 11298 }, { "epoch": 0.6061695278969957, "grad_norm": 0.44140625, "learning_rate": 4.8916530646314445e-06, "loss": 2.3169, "step": 11299 }, { "epoch": 0.6062231759656652, "grad_norm": 0.486328125, "learning_rate": 4.891627764478009e-06, "loss": 2.3267, "step": 11300 }, { "epoch": 0.6062768240343348, "grad_norm": 0.458984375, "learning_rate": 4.891602461436434e-06, "loss": 2.4511, "step": 11301 }, { "epoch": 0.6063304721030043, "grad_norm": 0.412109375, "learning_rate": 4.891577155506751e-06, "loss": 2.3923, "step": 11302 }, { "epoch": 0.6063841201716739, "grad_norm": 0.466796875, "learning_rate": 4.891551846688992e-06, "loss": 2.1406, "step": 11303 }, { "epoch": 0.6064377682403433, "grad_norm": 0.439453125, "learning_rate": 4.891526534983185e-06, "loss": 2.1466, "step": 11304 }, { "epoch": 0.6064914163090128, "grad_norm": 0.396484375, "learning_rate": 4.891501220389361e-06, "loss": 2.4384, "step": 11305 }, { "epoch": 0.6065450643776824, "grad_norm": 0.451171875, "learning_rate": 4.891475902907552e-06, "loss": 2.3514, "step": 11306 }, { "epoch": 0.6065987124463519, "grad_norm": 0.431640625, "learning_rate": 4.891450582537788e-06, "loss": 2.3867, "step": 11307 }, { "epoch": 0.6066523605150215, "grad_norm": 0.44921875, "learning_rate": 4.891425259280099e-06, "loss": 2.4196, "step": 11308 }, { "epoch": 0.606706008583691, "grad_norm": 0.416015625, "learning_rate": 4.8913999331345156e-06, "loss": 2.1005, "step": 11309 }, { "epoch": 0.6067596566523605, "grad_norm": 0.40234375, "learning_rate": 4.891374604101069e-06, "loss": 2.245, "step": 11310 }, { "epoch": 0.60681330472103, "grad_norm": 0.38671875, "learning_rate": 4.89134927217979e-06, "loss": 1.8727, "step": 11311 }, { "epoch": 0.6068669527896996, "grad_norm": 0.51171875, "learning_rate": 4.891323937370707e-06, "loss": 2.4308, "step": 11312 }, { "epoch": 0.6069206008583691, "grad_norm": 0.5, "learning_rate": 4.8912985996738534e-06, "loss": 1.2641, "step": 11313 }, { "epoch": 0.6069742489270387, "grad_norm": 0.458984375, "learning_rate": 4.891273259089258e-06, "loss": 2.4393, "step": 11314 }, { "epoch": 0.6070278969957081, "grad_norm": 0.47265625, "learning_rate": 4.891247915616953e-06, "loss": 2.4991, "step": 11315 }, { "epoch": 0.6070815450643777, "grad_norm": 0.46875, "learning_rate": 4.891222569256968e-06, "loss": 2.437, "step": 11316 }, { "epoch": 0.6071351931330472, "grad_norm": 0.6875, "learning_rate": 4.891197220009333e-06, "loss": 2.228, "step": 11317 }, { "epoch": 0.6071888412017168, "grad_norm": 0.47265625, "learning_rate": 4.891171867874079e-06, "loss": 2.4793, "step": 11318 }, { "epoch": 0.6072424892703863, "grad_norm": 0.455078125, "learning_rate": 4.891146512851238e-06, "loss": 2.3279, "step": 11319 }, { "epoch": 0.6072961373390557, "grad_norm": 0.5078125, "learning_rate": 4.891121154940839e-06, "loss": 2.2831, "step": 11320 }, { "epoch": 0.6073497854077253, "grad_norm": 0.42578125, "learning_rate": 4.8910957941429125e-06, "loss": 2.2541, "step": 11321 }, { "epoch": 0.6074034334763948, "grad_norm": 0.46875, "learning_rate": 4.891070430457491e-06, "loss": 2.1874, "step": 11322 }, { "epoch": 0.6074570815450644, "grad_norm": 0.55859375, "learning_rate": 4.891045063884603e-06, "loss": 2.3329, "step": 11323 }, { "epoch": 0.6075107296137339, "grad_norm": 0.38671875, "learning_rate": 4.89101969442428e-06, "loss": 2.3811, "step": 11324 }, { "epoch": 0.6075643776824035, "grad_norm": 0.470703125, "learning_rate": 4.890994322076553e-06, "loss": 2.4082, "step": 11325 }, { "epoch": 0.6076180257510729, "grad_norm": 0.4375, "learning_rate": 4.890968946841452e-06, "loss": 2.0136, "step": 11326 }, { "epoch": 0.6076716738197425, "grad_norm": 0.400390625, "learning_rate": 4.890943568719008e-06, "loss": 2.3564, "step": 11327 }, { "epoch": 0.607725321888412, "grad_norm": 0.5859375, "learning_rate": 4.890918187709252e-06, "loss": 2.332, "step": 11328 }, { "epoch": 0.6077789699570816, "grad_norm": 0.416015625, "learning_rate": 4.890892803812214e-06, "loss": 2.1842, "step": 11329 }, { "epoch": 0.6078326180257511, "grad_norm": 0.49609375, "learning_rate": 4.890867417027925e-06, "loss": 2.411, "step": 11330 }, { "epoch": 0.6078862660944206, "grad_norm": 0.46875, "learning_rate": 4.890842027356415e-06, "loss": 2.2627, "step": 11331 }, { "epoch": 0.6079399141630901, "grad_norm": 0.6640625, "learning_rate": 4.890816634797716e-06, "loss": 2.0945, "step": 11332 }, { "epoch": 0.6079935622317597, "grad_norm": 0.515625, "learning_rate": 4.890791239351857e-06, "loss": 1.9933, "step": 11333 }, { "epoch": 0.6080472103004292, "grad_norm": 0.6953125, "learning_rate": 4.89076584101887e-06, "loss": 2.1614, "step": 11334 }, { "epoch": 0.6081008583690987, "grad_norm": 0.40625, "learning_rate": 4.890740439798785e-06, "loss": 2.0532, "step": 11335 }, { "epoch": 0.6081545064377682, "grad_norm": 0.40625, "learning_rate": 4.890715035691633e-06, "loss": 2.2059, "step": 11336 }, { "epoch": 0.6082081545064377, "grad_norm": 0.482421875, "learning_rate": 4.890689628697446e-06, "loss": 2.5389, "step": 11337 }, { "epoch": 0.6082618025751073, "grad_norm": 0.484375, "learning_rate": 4.890664218816251e-06, "loss": 2.2289, "step": 11338 }, { "epoch": 0.6083154506437768, "grad_norm": 0.44921875, "learning_rate": 4.890638806048082e-06, "loss": 2.2361, "step": 11339 }, { "epoch": 0.6083690987124464, "grad_norm": 0.44140625, "learning_rate": 4.890613390392969e-06, "loss": 2.4094, "step": 11340 }, { "epoch": 0.6084227467811159, "grad_norm": 0.421875, "learning_rate": 4.890587971850941e-06, "loss": 2.2881, "step": 11341 }, { "epoch": 0.6084763948497854, "grad_norm": 0.44921875, "learning_rate": 4.890562550422031e-06, "loss": 2.2805, "step": 11342 }, { "epoch": 0.6085300429184549, "grad_norm": 0.6328125, "learning_rate": 4.890537126106268e-06, "loss": 2.1327, "step": 11343 }, { "epoch": 0.6085836909871245, "grad_norm": 0.392578125, "learning_rate": 4.890511698903684e-06, "loss": 2.2801, "step": 11344 }, { "epoch": 0.608637339055794, "grad_norm": 0.435546875, "learning_rate": 4.890486268814309e-06, "loss": 2.3502, "step": 11345 }, { "epoch": 0.6086909871244636, "grad_norm": 0.376953125, "learning_rate": 4.890460835838174e-06, "loss": 2.1867, "step": 11346 }, { "epoch": 0.608744635193133, "grad_norm": 0.38671875, "learning_rate": 4.890435399975309e-06, "loss": 2.2863, "step": 11347 }, { "epoch": 0.6087982832618025, "grad_norm": 0.8046875, "learning_rate": 4.890409961225746e-06, "loss": 2.2958, "step": 11348 }, { "epoch": 0.6088519313304721, "grad_norm": 0.50390625, "learning_rate": 4.890384519589515e-06, "loss": 2.3147, "step": 11349 }, { "epoch": 0.6089055793991416, "grad_norm": 0.55859375, "learning_rate": 4.890359075066646e-06, "loss": 2.2537, "step": 11350 }, { "epoch": 0.6089592274678112, "grad_norm": 0.404296875, "learning_rate": 4.890333627657171e-06, "loss": 2.1979, "step": 11351 }, { "epoch": 0.6090128755364806, "grad_norm": 0.478515625, "learning_rate": 4.89030817736112e-06, "loss": 2.324, "step": 11352 }, { "epoch": 0.6090665236051502, "grad_norm": 0.4609375, "learning_rate": 4.890282724178523e-06, "loss": 1.8083, "step": 11353 }, { "epoch": 0.6091201716738197, "grad_norm": 0.3671875, "learning_rate": 4.890257268109413e-06, "loss": 2.1647, "step": 11354 }, { "epoch": 0.6091738197424893, "grad_norm": 0.466796875, "learning_rate": 4.8902318091538185e-06, "loss": 2.3428, "step": 11355 }, { "epoch": 0.6092274678111588, "grad_norm": 0.47265625, "learning_rate": 4.890206347311771e-06, "loss": 2.3015, "step": 11356 }, { "epoch": 0.6092811158798284, "grad_norm": 0.466796875, "learning_rate": 4.890180882583302e-06, "loss": 2.3775, "step": 11357 }, { "epoch": 0.6093347639484978, "grad_norm": 0.4609375, "learning_rate": 4.890155414968441e-06, "loss": 2.296, "step": 11358 }, { "epoch": 0.6093884120171674, "grad_norm": 0.404296875, "learning_rate": 4.89012994446722e-06, "loss": 1.9925, "step": 11359 }, { "epoch": 0.6094420600858369, "grad_norm": 0.53515625, "learning_rate": 4.890104471079668e-06, "loss": 2.518, "step": 11360 }, { "epoch": 0.6094957081545065, "grad_norm": 0.455078125, "learning_rate": 4.890078994805819e-06, "loss": 2.4109, "step": 11361 }, { "epoch": 0.609549356223176, "grad_norm": 0.416015625, "learning_rate": 4.8900535156457e-06, "loss": 2.2739, "step": 11362 }, { "epoch": 0.6096030042918454, "grad_norm": 0.478515625, "learning_rate": 4.890028033599344e-06, "loss": 2.1157, "step": 11363 }, { "epoch": 0.609656652360515, "grad_norm": 0.369140625, "learning_rate": 4.89000254866678e-06, "loss": 2.0105, "step": 11364 }, { "epoch": 0.6097103004291845, "grad_norm": 0.388671875, "learning_rate": 4.889977060848041e-06, "loss": 2.2488, "step": 11365 }, { "epoch": 0.6097639484978541, "grad_norm": 0.431640625, "learning_rate": 4.889951570143157e-06, "loss": 2.368, "step": 11366 }, { "epoch": 0.6098175965665236, "grad_norm": 0.53515625, "learning_rate": 4.8899260765521585e-06, "loss": 1.3463, "step": 11367 }, { "epoch": 0.6098712446351932, "grad_norm": 0.51171875, "learning_rate": 4.8899005800750755e-06, "loss": 2.3254, "step": 11368 }, { "epoch": 0.6099248927038626, "grad_norm": 0.45703125, "learning_rate": 4.88987508071194e-06, "loss": 2.463, "step": 11369 }, { "epoch": 0.6099785407725322, "grad_norm": 0.5546875, "learning_rate": 4.8898495784627835e-06, "loss": 2.1095, "step": 11370 }, { "epoch": 0.6100321888412017, "grad_norm": 0.4921875, "learning_rate": 4.889824073327634e-06, "loss": 2.0541, "step": 11371 }, { "epoch": 0.6100858369098713, "grad_norm": 0.466796875, "learning_rate": 4.889798565306525e-06, "loss": 2.5968, "step": 11372 }, { "epoch": 0.6101394849785408, "grad_norm": 0.431640625, "learning_rate": 4.889773054399486e-06, "loss": 2.3922, "step": 11373 }, { "epoch": 0.6101931330472103, "grad_norm": 0.5859375, "learning_rate": 4.889747540606548e-06, "loss": 2.251, "step": 11374 }, { "epoch": 0.6102467811158798, "grad_norm": 0.65625, "learning_rate": 4.889722023927742e-06, "loss": 2.2835, "step": 11375 }, { "epoch": 0.6103004291845494, "grad_norm": 0.41015625, "learning_rate": 4.8896965043630984e-06, "loss": 2.2215, "step": 11376 }, { "epoch": 0.6103540772532189, "grad_norm": 0.412109375, "learning_rate": 4.889670981912649e-06, "loss": 2.1847, "step": 11377 }, { "epoch": 0.6104077253218884, "grad_norm": 0.546875, "learning_rate": 4.889645456576423e-06, "loss": 2.2836, "step": 11378 }, { "epoch": 0.610461373390558, "grad_norm": 0.41796875, "learning_rate": 4.889619928354453e-06, "loss": 2.2553, "step": 11379 }, { "epoch": 0.6105150214592274, "grad_norm": 0.42578125, "learning_rate": 4.889594397246769e-06, "loss": 2.2823, "step": 11380 }, { "epoch": 0.610568669527897, "grad_norm": 0.3984375, "learning_rate": 4.889568863253402e-06, "loss": 1.9768, "step": 11381 }, { "epoch": 0.6106223175965665, "grad_norm": 0.47265625, "learning_rate": 4.889543326374382e-06, "loss": 2.4839, "step": 11382 }, { "epoch": 0.6106759656652361, "grad_norm": 0.42578125, "learning_rate": 4.889517786609741e-06, "loss": 2.3096, "step": 11383 }, { "epoch": 0.6107296137339056, "grad_norm": 0.546875, "learning_rate": 4.889492243959509e-06, "loss": 2.1032, "step": 11384 }, { "epoch": 0.6107832618025751, "grad_norm": 0.470703125, "learning_rate": 4.889466698423718e-06, "loss": 2.4425, "step": 11385 }, { "epoch": 0.6108369098712446, "grad_norm": 0.48828125, "learning_rate": 4.889441150002398e-06, "loss": 1.7237, "step": 11386 }, { "epoch": 0.6108905579399142, "grad_norm": 0.4296875, "learning_rate": 4.8894155986955786e-06, "loss": 2.2569, "step": 11387 }, { "epoch": 0.6109442060085837, "grad_norm": 0.51953125, "learning_rate": 4.889390044503293e-06, "loss": 2.4406, "step": 11388 }, { "epoch": 0.6109978540772533, "grad_norm": 0.458984375, "learning_rate": 4.889364487425571e-06, "loss": 2.2136, "step": 11389 }, { "epoch": 0.6110515021459227, "grad_norm": 0.47265625, "learning_rate": 4.889338927462443e-06, "loss": 2.3472, "step": 11390 }, { "epoch": 0.6111051502145923, "grad_norm": 0.49609375, "learning_rate": 4.889313364613941e-06, "loss": 2.2886, "step": 11391 }, { "epoch": 0.6111587982832618, "grad_norm": 0.50390625, "learning_rate": 4.889287798880094e-06, "loss": 2.4447, "step": 11392 }, { "epoch": 0.6112124463519313, "grad_norm": 0.4375, "learning_rate": 4.8892622302609365e-06, "loss": 2.2329, "step": 11393 }, { "epoch": 0.6112660944206009, "grad_norm": 0.36328125, "learning_rate": 4.889236658756495e-06, "loss": 1.9204, "step": 11394 }, { "epoch": 0.6113197424892703, "grad_norm": 0.439453125, "learning_rate": 4.889211084366803e-06, "loss": 2.3585, "step": 11395 }, { "epoch": 0.6113733905579399, "grad_norm": 0.47265625, "learning_rate": 4.88918550709189e-06, "loss": 2.5905, "step": 11396 }, { "epoch": 0.6114270386266094, "grad_norm": 0.427734375, "learning_rate": 4.889159926931788e-06, "loss": 2.1771, "step": 11397 }, { "epoch": 0.611480686695279, "grad_norm": 0.47265625, "learning_rate": 4.889134343886528e-06, "loss": 2.3797, "step": 11398 }, { "epoch": 0.6115343347639485, "grad_norm": 0.45703125, "learning_rate": 4.88910875795614e-06, "loss": 2.3553, "step": 11399 }, { "epoch": 0.6115879828326181, "grad_norm": 0.4453125, "learning_rate": 4.889083169140656e-06, "loss": 2.2756, "step": 11400 }, { "epoch": 0.6116416309012875, "grad_norm": 0.4375, "learning_rate": 4.889057577440106e-06, "loss": 1.9752, "step": 11401 }, { "epoch": 0.6116952789699571, "grad_norm": 0.4140625, "learning_rate": 4.88903198285452e-06, "loss": 2.3375, "step": 11402 }, { "epoch": 0.6117489270386266, "grad_norm": 0.384765625, "learning_rate": 4.889006385383931e-06, "loss": 2.1998, "step": 11403 }, { "epoch": 0.6118025751072962, "grad_norm": 0.5078125, "learning_rate": 4.888980785028369e-06, "loss": 2.363, "step": 11404 }, { "epoch": 0.6118562231759657, "grad_norm": 0.423828125, "learning_rate": 4.888955181787864e-06, "loss": 2.3752, "step": 11405 }, { "epoch": 0.6119098712446351, "grad_norm": 0.427734375, "learning_rate": 4.8889295756624485e-06, "loss": 1.9659, "step": 11406 }, { "epoch": 0.6119635193133047, "grad_norm": 0.41015625, "learning_rate": 4.888903966652153e-06, "loss": 2.4261, "step": 11407 }, { "epoch": 0.6120171673819742, "grad_norm": 0.451171875, "learning_rate": 4.888878354757008e-06, "loss": 2.478, "step": 11408 }, { "epoch": 0.6120708154506438, "grad_norm": 0.376953125, "learning_rate": 4.888852739977044e-06, "loss": 2.1211, "step": 11409 }, { "epoch": 0.6121244635193133, "grad_norm": 0.43359375, "learning_rate": 4.888827122312293e-06, "loss": 2.2261, "step": 11410 }, { "epoch": 0.6121781115879829, "grad_norm": 0.4609375, "learning_rate": 4.8888015017627844e-06, "loss": 2.2067, "step": 11411 }, { "epoch": 0.6122317596566523, "grad_norm": 0.39453125, "learning_rate": 4.888775878328551e-06, "loss": 2.2827, "step": 11412 }, { "epoch": 0.6122854077253219, "grad_norm": 0.490234375, "learning_rate": 4.888750252009623e-06, "loss": 2.1517, "step": 11413 }, { "epoch": 0.6123390557939914, "grad_norm": 0.421875, "learning_rate": 4.888724622806031e-06, "loss": 2.1131, "step": 11414 }, { "epoch": 0.612392703862661, "grad_norm": 0.439453125, "learning_rate": 4.8886989907178065e-06, "loss": 2.0492, "step": 11415 }, { "epoch": 0.6124463519313305, "grad_norm": 0.46484375, "learning_rate": 4.888673355744979e-06, "loss": 2.2615, "step": 11416 }, { "epoch": 0.6125, "grad_norm": 0.67578125, "learning_rate": 4.888647717887582e-06, "loss": 2.4148, "step": 11417 }, { "epoch": 0.6125536480686695, "grad_norm": 0.416015625, "learning_rate": 4.888622077145645e-06, "loss": 2.3359, "step": 11418 }, { "epoch": 0.6126072961373391, "grad_norm": 0.447265625, "learning_rate": 4.888596433519198e-06, "loss": 2.2774, "step": 11419 }, { "epoch": 0.6126609442060086, "grad_norm": 0.447265625, "learning_rate": 4.888570787008273e-06, "loss": 2.2987, "step": 11420 }, { "epoch": 0.6127145922746781, "grad_norm": 0.427734375, "learning_rate": 4.8885451376129025e-06, "loss": 2.2697, "step": 11421 }, { "epoch": 0.6127682403433476, "grad_norm": 0.4453125, "learning_rate": 4.8885194853331145e-06, "loss": 2.0808, "step": 11422 }, { "epoch": 0.6128218884120171, "grad_norm": 0.6328125, "learning_rate": 4.888493830168942e-06, "loss": 2.2783, "step": 11423 }, { "epoch": 0.6128755364806867, "grad_norm": 0.4921875, "learning_rate": 4.888468172120415e-06, "loss": 2.4597, "step": 11424 }, { "epoch": 0.6129291845493562, "grad_norm": 0.5390625, "learning_rate": 4.8884425111875655e-06, "loss": 2.3026, "step": 11425 }, { "epoch": 0.6129828326180258, "grad_norm": 0.8046875, "learning_rate": 4.888416847370424e-06, "loss": 2.2159, "step": 11426 }, { "epoch": 0.6130364806866953, "grad_norm": 0.5390625, "learning_rate": 4.888391180669021e-06, "loss": 2.387, "step": 11427 }, { "epoch": 0.6130901287553648, "grad_norm": 0.36328125, "learning_rate": 4.8883655110833874e-06, "loss": 2.1289, "step": 11428 }, { "epoch": 0.6131437768240343, "grad_norm": 0.423828125, "learning_rate": 4.888339838613555e-06, "loss": 2.3614, "step": 11429 }, { "epoch": 0.6131974248927039, "grad_norm": 0.50390625, "learning_rate": 4.888314163259555e-06, "loss": 2.5995, "step": 11430 }, { "epoch": 0.6132510729613734, "grad_norm": 0.419921875, "learning_rate": 4.888288485021416e-06, "loss": 2.2495, "step": 11431 }, { "epoch": 0.613304721030043, "grad_norm": 0.408203125, "learning_rate": 4.888262803899173e-06, "loss": 2.278, "step": 11432 }, { "epoch": 0.6133583690987124, "grad_norm": 0.49609375, "learning_rate": 4.888237119892854e-06, "loss": 2.3195, "step": 11433 }, { "epoch": 0.613412017167382, "grad_norm": 0.4140625, "learning_rate": 4.888211433002491e-06, "loss": 2.2859, "step": 11434 }, { "epoch": 0.6134656652360515, "grad_norm": 0.60546875, "learning_rate": 4.888185743228115e-06, "loss": 2.3589, "step": 11435 }, { "epoch": 0.613519313304721, "grad_norm": 0.43359375, "learning_rate": 4.8881600505697565e-06, "loss": 1.9618, "step": 11436 }, { "epoch": 0.6135729613733906, "grad_norm": 0.423828125, "learning_rate": 4.888134355027447e-06, "loss": 2.3619, "step": 11437 }, { "epoch": 0.61362660944206, "grad_norm": 0.435546875, "learning_rate": 4.888108656601218e-06, "loss": 2.4455, "step": 11438 }, { "epoch": 0.6136802575107296, "grad_norm": 0.4765625, "learning_rate": 4.888082955291101e-06, "loss": 2.1453, "step": 11439 }, { "epoch": 0.6137339055793991, "grad_norm": 0.51953125, "learning_rate": 4.888057251097125e-06, "loss": 2.4146, "step": 11440 }, { "epoch": 0.6137875536480687, "grad_norm": 0.39453125, "learning_rate": 4.888031544019321e-06, "loss": 2.0722, "step": 11441 }, { "epoch": 0.6138412017167382, "grad_norm": 0.427734375, "learning_rate": 4.888005834057723e-06, "loss": 2.2394, "step": 11442 }, { "epoch": 0.6138948497854078, "grad_norm": 1.0859375, "learning_rate": 4.887980121212359e-06, "loss": 2.2721, "step": 11443 }, { "epoch": 0.6139484978540772, "grad_norm": 0.439453125, "learning_rate": 4.887954405483262e-06, "loss": 2.3885, "step": 11444 }, { "epoch": 0.6140021459227468, "grad_norm": 0.478515625, "learning_rate": 4.887928686870461e-06, "loss": 1.9825, "step": 11445 }, { "epoch": 0.6140557939914163, "grad_norm": 0.46875, "learning_rate": 4.88790296537399e-06, "loss": 2.4219, "step": 11446 }, { "epoch": 0.6141094420600859, "grad_norm": 0.353515625, "learning_rate": 4.887877240993878e-06, "loss": 2.0951, "step": 11447 }, { "epoch": 0.6141630901287554, "grad_norm": 0.4609375, "learning_rate": 4.887851513730156e-06, "loss": 2.4177, "step": 11448 }, { "epoch": 0.6142167381974248, "grad_norm": 0.59375, "learning_rate": 4.887825783582856e-06, "loss": 2.0289, "step": 11449 }, { "epoch": 0.6142703862660944, "grad_norm": 0.4765625, "learning_rate": 4.887800050552009e-06, "loss": 2.5397, "step": 11450 }, { "epoch": 0.6143240343347639, "grad_norm": 0.56640625, "learning_rate": 4.887774314637645e-06, "loss": 2.0802, "step": 11451 }, { "epoch": 0.6143776824034335, "grad_norm": 0.451171875, "learning_rate": 4.887748575839795e-06, "loss": 2.1316, "step": 11452 }, { "epoch": 0.614431330472103, "grad_norm": 0.390625, "learning_rate": 4.887722834158493e-06, "loss": 2.4346, "step": 11453 }, { "epoch": 0.6144849785407726, "grad_norm": 0.396484375, "learning_rate": 4.887697089593767e-06, "loss": 2.4268, "step": 11454 }, { "epoch": 0.614538626609442, "grad_norm": 0.4453125, "learning_rate": 4.887671342145648e-06, "loss": 2.4345, "step": 11455 }, { "epoch": 0.6145922746781116, "grad_norm": 0.404296875, "learning_rate": 4.887645591814169e-06, "loss": 2.2451, "step": 11456 }, { "epoch": 0.6146459227467811, "grad_norm": 0.4375, "learning_rate": 4.887619838599361e-06, "loss": 2.5977, "step": 11457 }, { "epoch": 0.6146995708154507, "grad_norm": 0.453125, "learning_rate": 4.887594082501254e-06, "loss": 2.3573, "step": 11458 }, { "epoch": 0.6147532188841202, "grad_norm": 0.443359375, "learning_rate": 4.887568323519878e-06, "loss": 2.2336, "step": 11459 }, { "epoch": 0.6148068669527897, "grad_norm": 0.443359375, "learning_rate": 4.887542561655267e-06, "loss": 2.3577, "step": 11460 }, { "epoch": 0.6148605150214592, "grad_norm": 0.400390625, "learning_rate": 4.88751679690745e-06, "loss": 2.2224, "step": 11461 }, { "epoch": 0.6149141630901288, "grad_norm": 0.49609375, "learning_rate": 4.887491029276459e-06, "loss": 2.423, "step": 11462 }, { "epoch": 0.6149678111587983, "grad_norm": 0.447265625, "learning_rate": 4.887465258762325e-06, "loss": 2.1287, "step": 11463 }, { "epoch": 0.6150214592274678, "grad_norm": 0.419921875, "learning_rate": 4.887439485365079e-06, "loss": 2.3695, "step": 11464 }, { "epoch": 0.6150751072961373, "grad_norm": 0.46875, "learning_rate": 4.887413709084752e-06, "loss": 2.0306, "step": 11465 }, { "epoch": 0.6151287553648068, "grad_norm": 0.4140625, "learning_rate": 4.887387929921376e-06, "loss": 2.1684, "step": 11466 }, { "epoch": 0.6151824034334764, "grad_norm": 0.6875, "learning_rate": 4.88736214787498e-06, "loss": 2.0862, "step": 11467 }, { "epoch": 0.6152360515021459, "grad_norm": 0.423828125, "learning_rate": 4.887336362945597e-06, "loss": 2.3715, "step": 11468 }, { "epoch": 0.6152896995708155, "grad_norm": 0.447265625, "learning_rate": 4.887310575133258e-06, "loss": 2.1448, "step": 11469 }, { "epoch": 0.615343347639485, "grad_norm": 0.55078125, "learning_rate": 4.887284784437994e-06, "loss": 2.7205, "step": 11470 }, { "epoch": 0.6153969957081545, "grad_norm": 0.41015625, "learning_rate": 4.887258990859835e-06, "loss": 2.2126, "step": 11471 }, { "epoch": 0.615450643776824, "grad_norm": 0.408203125, "learning_rate": 4.887233194398814e-06, "loss": 2.3991, "step": 11472 }, { "epoch": 0.6155042918454936, "grad_norm": 0.5, "learning_rate": 4.887207395054961e-06, "loss": 2.498, "step": 11473 }, { "epoch": 0.6155579399141631, "grad_norm": 0.455078125, "learning_rate": 4.887181592828307e-06, "loss": 2.3854, "step": 11474 }, { "epoch": 0.6156115879828327, "grad_norm": 0.43359375, "learning_rate": 4.887155787718884e-06, "loss": 2.2397, "step": 11475 }, { "epoch": 0.6156652360515021, "grad_norm": 0.47265625, "learning_rate": 4.887129979726723e-06, "loss": 2.1733, "step": 11476 }, { "epoch": 0.6157188841201717, "grad_norm": 0.478515625, "learning_rate": 4.887104168851854e-06, "loss": 2.1485, "step": 11477 }, { "epoch": 0.6157725321888412, "grad_norm": 0.4921875, "learning_rate": 4.88707835509431e-06, "loss": 2.2087, "step": 11478 }, { "epoch": 0.6158261802575107, "grad_norm": 0.59765625, "learning_rate": 4.88705253845412e-06, "loss": 2.2591, "step": 11479 }, { "epoch": 0.6158798283261803, "grad_norm": 0.384765625, "learning_rate": 4.887026718931318e-06, "loss": 2.2004, "step": 11480 }, { "epoch": 0.6159334763948497, "grad_norm": 0.46875, "learning_rate": 4.887000896525933e-06, "loss": 2.6066, "step": 11481 }, { "epoch": 0.6159871244635193, "grad_norm": 0.40234375, "learning_rate": 4.886975071237996e-06, "loss": 2.3325, "step": 11482 }, { "epoch": 0.6160407725321888, "grad_norm": 1.4609375, "learning_rate": 4.8869492430675405e-06, "loss": 2.2285, "step": 11483 }, { "epoch": 0.6160944206008584, "grad_norm": 0.45703125, "learning_rate": 4.886923412014595e-06, "loss": 2.5393, "step": 11484 }, { "epoch": 0.6161480686695279, "grad_norm": 0.515625, "learning_rate": 4.886897578079192e-06, "loss": 2.243, "step": 11485 }, { "epoch": 0.6162017167381975, "grad_norm": 0.416015625, "learning_rate": 4.886871741261362e-06, "loss": 1.9777, "step": 11486 }, { "epoch": 0.6162553648068669, "grad_norm": 0.6015625, "learning_rate": 4.886845901561137e-06, "loss": 2.5097, "step": 11487 }, { "epoch": 0.6163090128755365, "grad_norm": 0.404296875, "learning_rate": 4.8868200589785484e-06, "loss": 2.1179, "step": 11488 }, { "epoch": 0.616362660944206, "grad_norm": 0.458984375, "learning_rate": 4.886794213513627e-06, "loss": 2.1566, "step": 11489 }, { "epoch": 0.6164163090128756, "grad_norm": 0.69921875, "learning_rate": 4.886768365166404e-06, "loss": 2.2162, "step": 11490 }, { "epoch": 0.6164699570815451, "grad_norm": 0.484375, "learning_rate": 4.8867425139369104e-06, "loss": 2.2814, "step": 11491 }, { "epoch": 0.6165236051502145, "grad_norm": 0.431640625, "learning_rate": 4.8867166598251775e-06, "loss": 2.3058, "step": 11492 }, { "epoch": 0.6165772532188841, "grad_norm": 0.498046875, "learning_rate": 4.886690802831236e-06, "loss": 2.501, "step": 11493 }, { "epoch": 0.6166309012875536, "grad_norm": 0.4296875, "learning_rate": 4.88666494295512e-06, "loss": 2.2755, "step": 11494 }, { "epoch": 0.6166845493562232, "grad_norm": 0.427734375, "learning_rate": 4.886639080196856e-06, "loss": 2.2552, "step": 11495 }, { "epoch": 0.6167381974248927, "grad_norm": 0.404296875, "learning_rate": 4.8866132145564784e-06, "loss": 2.2216, "step": 11496 }, { "epoch": 0.6167918454935623, "grad_norm": 0.5, "learning_rate": 4.886587346034018e-06, "loss": 2.713, "step": 11497 }, { "epoch": 0.6168454935622317, "grad_norm": 0.4375, "learning_rate": 4.886561474629505e-06, "loss": 2.288, "step": 11498 }, { "epoch": 0.6168991416309013, "grad_norm": 0.4765625, "learning_rate": 4.886535600342973e-06, "loss": 2.4681, "step": 11499 }, { "epoch": 0.6169527896995708, "grad_norm": 0.40234375, "learning_rate": 4.886509723174451e-06, "loss": 2.3172, "step": 11500 }, { "epoch": 0.6170064377682404, "grad_norm": 0.484375, "learning_rate": 4.8864838431239706e-06, "loss": 2.3955, "step": 11501 }, { "epoch": 0.6170600858369099, "grad_norm": 0.65625, "learning_rate": 4.886457960191563e-06, "loss": 2.1908, "step": 11502 }, { "epoch": 0.6171137339055794, "grad_norm": 0.546875, "learning_rate": 4.8864320743772604e-06, "loss": 2.3747, "step": 11503 }, { "epoch": 0.6171673819742489, "grad_norm": 0.4609375, "learning_rate": 4.8864061856810935e-06, "loss": 2.371, "step": 11504 }, { "epoch": 0.6172210300429185, "grad_norm": 1.0234375, "learning_rate": 4.8863802941030935e-06, "loss": 2.1678, "step": 11505 }, { "epoch": 0.617274678111588, "grad_norm": 0.48046875, "learning_rate": 4.886354399643292e-06, "loss": 2.3486, "step": 11506 }, { "epoch": 0.6173283261802575, "grad_norm": 0.55859375, "learning_rate": 4.8863285023017195e-06, "loss": 1.4225, "step": 11507 }, { "epoch": 0.617381974248927, "grad_norm": 0.6328125, "learning_rate": 4.886302602078407e-06, "loss": 1.8268, "step": 11508 }, { "epoch": 0.6174356223175965, "grad_norm": 0.373046875, "learning_rate": 4.8862766989733875e-06, "loss": 2.423, "step": 11509 }, { "epoch": 0.6174892703862661, "grad_norm": 0.7578125, "learning_rate": 4.886250792986692e-06, "loss": 1.6803, "step": 11510 }, { "epoch": 0.6175429184549356, "grad_norm": 0.396484375, "learning_rate": 4.88622488411835e-06, "loss": 2.0604, "step": 11511 }, { "epoch": 0.6175965665236052, "grad_norm": 0.44921875, "learning_rate": 4.886198972368394e-06, "loss": 2.4886, "step": 11512 }, { "epoch": 0.6176502145922746, "grad_norm": 0.466796875, "learning_rate": 4.886173057736856e-06, "loss": 2.3158, "step": 11513 }, { "epoch": 0.6177038626609442, "grad_norm": 0.53515625, "learning_rate": 4.886147140223766e-06, "loss": 2.2563, "step": 11514 }, { "epoch": 0.6177575107296137, "grad_norm": 0.48046875, "learning_rate": 4.886121219829155e-06, "loss": 1.8435, "step": 11515 }, { "epoch": 0.6178111587982833, "grad_norm": 0.453125, "learning_rate": 4.886095296553056e-06, "loss": 2.3827, "step": 11516 }, { "epoch": 0.6178648068669528, "grad_norm": 0.369140625, "learning_rate": 4.8860693703954985e-06, "loss": 2.1508, "step": 11517 }, { "epoch": 0.6179184549356224, "grad_norm": 0.39453125, "learning_rate": 4.886043441356516e-06, "loss": 2.5492, "step": 11518 }, { "epoch": 0.6179721030042918, "grad_norm": 0.451171875, "learning_rate": 4.8860175094361375e-06, "loss": 2.2134, "step": 11519 }, { "epoch": 0.6180257510729614, "grad_norm": 0.455078125, "learning_rate": 4.885991574634396e-06, "loss": 2.0502, "step": 11520 }, { "epoch": 0.6180793991416309, "grad_norm": 0.419921875, "learning_rate": 4.885965636951321e-06, "loss": 2.2403, "step": 11521 }, { "epoch": 0.6181330472103004, "grad_norm": 0.486328125, "learning_rate": 4.885939696386946e-06, "loss": 2.3829, "step": 11522 }, { "epoch": 0.61818669527897, "grad_norm": 0.453125, "learning_rate": 4.885913752941301e-06, "loss": 2.5446, "step": 11523 }, { "epoch": 0.6182403433476394, "grad_norm": 0.640625, "learning_rate": 4.8858878066144175e-06, "loss": 2.5528, "step": 11524 }, { "epoch": 0.618293991416309, "grad_norm": 0.47265625, "learning_rate": 4.885861857406327e-06, "loss": 2.2779, "step": 11525 }, { "epoch": 0.6183476394849785, "grad_norm": 0.380859375, "learning_rate": 4.885835905317061e-06, "loss": 2.1349, "step": 11526 }, { "epoch": 0.6184012875536481, "grad_norm": 0.44140625, "learning_rate": 4.8858099503466505e-06, "loss": 1.9759, "step": 11527 }, { "epoch": 0.6184549356223176, "grad_norm": 0.59765625, "learning_rate": 4.885783992495127e-06, "loss": 1.9112, "step": 11528 }, { "epoch": 0.6185085836909872, "grad_norm": 0.55859375, "learning_rate": 4.885758031762521e-06, "loss": 2.2465, "step": 11529 }, { "epoch": 0.6185622317596566, "grad_norm": 0.44140625, "learning_rate": 4.8857320681488656e-06, "loss": 2.1173, "step": 11530 }, { "epoch": 0.6186158798283262, "grad_norm": 0.39453125, "learning_rate": 4.88570610165419e-06, "loss": 2.1442, "step": 11531 }, { "epoch": 0.6186695278969957, "grad_norm": 0.60546875, "learning_rate": 4.885680132278528e-06, "loss": 1.3683, "step": 11532 }, { "epoch": 0.6187231759656653, "grad_norm": 0.41015625, "learning_rate": 4.88565416002191e-06, "loss": 2.2469, "step": 11533 }, { "epoch": 0.6187768240343348, "grad_norm": 0.671875, "learning_rate": 4.885628184884367e-06, "loss": 2.3374, "step": 11534 }, { "epoch": 0.6188304721030042, "grad_norm": 0.423828125, "learning_rate": 4.8856022068659294e-06, "loss": 2.2802, "step": 11535 }, { "epoch": 0.6188841201716738, "grad_norm": 0.53125, "learning_rate": 4.8855762259666305e-06, "loss": 2.2407, "step": 11536 }, { "epoch": 0.6189377682403433, "grad_norm": 0.494140625, "learning_rate": 4.8855502421865006e-06, "loss": 2.2477, "step": 11537 }, { "epoch": 0.6189914163090129, "grad_norm": 0.390625, "learning_rate": 4.885524255525571e-06, "loss": 1.931, "step": 11538 }, { "epoch": 0.6190450643776824, "grad_norm": 0.4609375, "learning_rate": 4.885498265983874e-06, "loss": 2.2294, "step": 11539 }, { "epoch": 0.619098712446352, "grad_norm": 0.4296875, "learning_rate": 4.88547227356144e-06, "loss": 2.6007, "step": 11540 }, { "epoch": 0.6191523605150214, "grad_norm": 0.6171875, "learning_rate": 4.8854462782583e-06, "loss": 2.2275, "step": 11541 }, { "epoch": 0.619206008583691, "grad_norm": 0.5234375, "learning_rate": 4.885420280074488e-06, "loss": 2.2789, "step": 11542 }, { "epoch": 0.6192596566523605, "grad_norm": 0.486328125, "learning_rate": 4.885394279010031e-06, "loss": 2.3515, "step": 11543 }, { "epoch": 0.6193133047210301, "grad_norm": 0.435546875, "learning_rate": 4.885368275064965e-06, "loss": 2.1332, "step": 11544 }, { "epoch": 0.6193669527896996, "grad_norm": 2.5625, "learning_rate": 4.885342268239319e-06, "loss": 2.5585, "step": 11545 }, { "epoch": 0.6194206008583691, "grad_norm": 0.51953125, "learning_rate": 4.885316258533124e-06, "loss": 2.2461, "step": 11546 }, { "epoch": 0.6194742489270386, "grad_norm": 0.396484375, "learning_rate": 4.8852902459464125e-06, "loss": 2.0252, "step": 11547 }, { "epoch": 0.6195278969957082, "grad_norm": 0.5078125, "learning_rate": 4.885264230479216e-06, "loss": 2.2898, "step": 11548 }, { "epoch": 0.6195815450643777, "grad_norm": 0.4375, "learning_rate": 4.885238212131564e-06, "loss": 2.0515, "step": 11549 }, { "epoch": 0.6196351931330472, "grad_norm": 0.388671875, "learning_rate": 4.885212190903491e-06, "loss": 2.0386, "step": 11550 }, { "epoch": 0.6196888412017167, "grad_norm": 0.4609375, "learning_rate": 4.885186166795026e-06, "loss": 2.299, "step": 11551 }, { "epoch": 0.6197424892703862, "grad_norm": 0.447265625, "learning_rate": 4.885160139806202e-06, "loss": 2.5281, "step": 11552 }, { "epoch": 0.6197961373390558, "grad_norm": 0.46875, "learning_rate": 4.885134109937049e-06, "loss": 1.5749, "step": 11553 }, { "epoch": 0.6198497854077253, "grad_norm": 0.5625, "learning_rate": 4.8851080771876e-06, "loss": 2.4364, "step": 11554 }, { "epoch": 0.6199034334763949, "grad_norm": 0.388671875, "learning_rate": 4.885082041557885e-06, "loss": 2.0529, "step": 11555 }, { "epoch": 0.6199570815450643, "grad_norm": 0.48828125, "learning_rate": 4.885056003047935e-06, "loss": 2.129, "step": 11556 }, { "epoch": 0.6200107296137339, "grad_norm": 0.396484375, "learning_rate": 4.885029961657783e-06, "loss": 2.1471, "step": 11557 }, { "epoch": 0.6200643776824034, "grad_norm": 0.59375, "learning_rate": 4.885003917387461e-06, "loss": 2.3555, "step": 11558 }, { "epoch": 0.620118025751073, "grad_norm": 0.443359375, "learning_rate": 4.884977870236998e-06, "loss": 2.2899, "step": 11559 }, { "epoch": 0.6201716738197425, "grad_norm": 0.478515625, "learning_rate": 4.884951820206427e-06, "loss": 2.3169, "step": 11560 }, { "epoch": 0.6202253218884121, "grad_norm": 0.7890625, "learning_rate": 4.88492576729578e-06, "loss": 1.7141, "step": 11561 }, { "epoch": 0.6202789699570815, "grad_norm": 0.380859375, "learning_rate": 4.884899711505087e-06, "loss": 1.8717, "step": 11562 }, { "epoch": 0.6203326180257511, "grad_norm": 0.443359375, "learning_rate": 4.884873652834381e-06, "loss": 2.6487, "step": 11563 }, { "epoch": 0.6203862660944206, "grad_norm": 0.43359375, "learning_rate": 4.884847591283691e-06, "loss": 2.275, "step": 11564 }, { "epoch": 0.6204399141630901, "grad_norm": 0.427734375, "learning_rate": 4.884821526853052e-06, "loss": 2.3478, "step": 11565 }, { "epoch": 0.6204935622317597, "grad_norm": 0.466796875, "learning_rate": 4.884795459542493e-06, "loss": 2.3139, "step": 11566 }, { "epoch": 0.6205472103004291, "grad_norm": 0.494140625, "learning_rate": 4.8847693893520456e-06, "loss": 2.4745, "step": 11567 }, { "epoch": 0.6206008583690987, "grad_norm": 0.44921875, "learning_rate": 4.884743316281742e-06, "loss": 2.2648, "step": 11568 }, { "epoch": 0.6206545064377682, "grad_norm": 0.427734375, "learning_rate": 4.884717240331613e-06, "loss": 2.4113, "step": 11569 }, { "epoch": 0.6207081545064378, "grad_norm": 0.443359375, "learning_rate": 4.884691161501691e-06, "loss": 2.2406, "step": 11570 }, { "epoch": 0.6207618025751073, "grad_norm": 0.435546875, "learning_rate": 4.884665079792007e-06, "loss": 2.4243, "step": 11571 }, { "epoch": 0.6208154506437769, "grad_norm": 0.43359375, "learning_rate": 4.884638995202592e-06, "loss": 2.239, "step": 11572 }, { "epoch": 0.6208690987124463, "grad_norm": 0.53515625, "learning_rate": 4.884612907733479e-06, "loss": 2.2545, "step": 11573 }, { "epoch": 0.6209227467811159, "grad_norm": 0.498046875, "learning_rate": 4.8845868173846975e-06, "loss": 2.2171, "step": 11574 }, { "epoch": 0.6209763948497854, "grad_norm": 0.484375, "learning_rate": 4.884560724156281e-06, "loss": 2.2868, "step": 11575 }, { "epoch": 0.621030042918455, "grad_norm": 0.462890625, "learning_rate": 4.88453462804826e-06, "loss": 2.2427, "step": 11576 }, { "epoch": 0.6210836909871245, "grad_norm": 0.484375, "learning_rate": 4.884508529060666e-06, "loss": 2.4392, "step": 11577 }, { "epoch": 0.621137339055794, "grad_norm": 0.42578125, "learning_rate": 4.884482427193529e-06, "loss": 2.2338, "step": 11578 }, { "epoch": 0.6211909871244635, "grad_norm": 0.474609375, "learning_rate": 4.884456322446883e-06, "loss": 2.2219, "step": 11579 }, { "epoch": 0.621244635193133, "grad_norm": 0.439453125, "learning_rate": 4.884430214820759e-06, "loss": 2.1389, "step": 11580 }, { "epoch": 0.6212982832618026, "grad_norm": 0.65625, "learning_rate": 4.8844041043151876e-06, "loss": 2.3026, "step": 11581 }, { "epoch": 0.6213519313304721, "grad_norm": 0.416015625, "learning_rate": 4.884377990930202e-06, "loss": 2.1746, "step": 11582 }, { "epoch": 0.6214055793991416, "grad_norm": 0.486328125, "learning_rate": 4.884351874665831e-06, "loss": 2.2118, "step": 11583 }, { "epoch": 0.6214592274678111, "grad_norm": 0.498046875, "learning_rate": 4.884325755522109e-06, "loss": 2.1974, "step": 11584 }, { "epoch": 0.6215128755364807, "grad_norm": 0.484375, "learning_rate": 4.884299633499066e-06, "loss": 2.1346, "step": 11585 }, { "epoch": 0.6215665236051502, "grad_norm": 0.423828125, "learning_rate": 4.884273508596734e-06, "loss": 2.4957, "step": 11586 }, { "epoch": 0.6216201716738198, "grad_norm": 0.5859375, "learning_rate": 4.884247380815144e-06, "loss": 2.2132, "step": 11587 }, { "epoch": 0.6216738197424893, "grad_norm": 0.46484375, "learning_rate": 4.884221250154327e-06, "loss": 2.1546, "step": 11588 }, { "epoch": 0.6217274678111588, "grad_norm": 0.48828125, "learning_rate": 4.884195116614318e-06, "loss": 2.2044, "step": 11589 }, { "epoch": 0.6217811158798283, "grad_norm": 0.44921875, "learning_rate": 4.884168980195144e-06, "loss": 2.1855, "step": 11590 }, { "epoch": 0.6218347639484979, "grad_norm": 0.443359375, "learning_rate": 4.884142840896839e-06, "loss": 2.4234, "step": 11591 }, { "epoch": 0.6218884120171674, "grad_norm": 0.498046875, "learning_rate": 4.884116698719434e-06, "loss": 2.3095, "step": 11592 }, { "epoch": 0.6219420600858369, "grad_norm": 2.328125, "learning_rate": 4.884090553662961e-06, "loss": 2.3284, "step": 11593 }, { "epoch": 0.6219957081545064, "grad_norm": 0.478515625, "learning_rate": 4.884064405727451e-06, "loss": 2.2969, "step": 11594 }, { "epoch": 0.6220493562231759, "grad_norm": 0.44140625, "learning_rate": 4.8840382549129366e-06, "loss": 2.4343, "step": 11595 }, { "epoch": 0.6221030042918455, "grad_norm": 0.412109375, "learning_rate": 4.884012101219448e-06, "loss": 2.4067, "step": 11596 }, { "epoch": 0.622156652360515, "grad_norm": 0.6953125, "learning_rate": 4.883985944647018e-06, "loss": 2.6129, "step": 11597 }, { "epoch": 0.6222103004291846, "grad_norm": 0.439453125, "learning_rate": 4.883959785195678e-06, "loss": 2.4445, "step": 11598 }, { "epoch": 0.622263948497854, "grad_norm": 0.71875, "learning_rate": 4.883933622865459e-06, "loss": 2.302, "step": 11599 }, { "epoch": 0.6223175965665236, "grad_norm": 0.56640625, "learning_rate": 4.8839074576563914e-06, "loss": 2.2735, "step": 11600 }, { "epoch": 0.6223712446351931, "grad_norm": 0.462890625, "learning_rate": 4.883881289568509e-06, "loss": 2.5784, "step": 11601 }, { "epoch": 0.6224248927038627, "grad_norm": 0.416015625, "learning_rate": 4.883855118601843e-06, "loss": 1.6253, "step": 11602 }, { "epoch": 0.6224785407725322, "grad_norm": 0.47265625, "learning_rate": 4.883828944756425e-06, "loss": 2.4058, "step": 11603 }, { "epoch": 0.6225321888412018, "grad_norm": 0.515625, "learning_rate": 4.883802768032286e-06, "loss": 2.331, "step": 11604 }, { "epoch": 0.6225858369098712, "grad_norm": 0.33984375, "learning_rate": 4.8837765884294575e-06, "loss": 2.2578, "step": 11605 }, { "epoch": 0.6226394849785408, "grad_norm": 0.5, "learning_rate": 4.8837504059479715e-06, "loss": 2.2556, "step": 11606 }, { "epoch": 0.6226931330472103, "grad_norm": 0.42578125, "learning_rate": 4.883724220587859e-06, "loss": 2.2155, "step": 11607 }, { "epoch": 0.6227467811158798, "grad_norm": 0.478515625, "learning_rate": 4.883698032349154e-06, "loss": 2.2335, "step": 11608 }, { "epoch": 0.6228004291845494, "grad_norm": 0.416015625, "learning_rate": 4.883671841231884e-06, "loss": 2.3487, "step": 11609 }, { "epoch": 0.6228540772532188, "grad_norm": 0.419921875, "learning_rate": 4.883645647236084e-06, "loss": 2.1295, "step": 11610 }, { "epoch": 0.6229077253218884, "grad_norm": 0.42578125, "learning_rate": 4.883619450361785e-06, "loss": 2.0862, "step": 11611 }, { "epoch": 0.6229613733905579, "grad_norm": 0.53125, "learning_rate": 4.8835932506090175e-06, "loss": 2.1842, "step": 11612 }, { "epoch": 0.6230150214592275, "grad_norm": 0.447265625, "learning_rate": 4.883567047977815e-06, "loss": 2.1495, "step": 11613 }, { "epoch": 0.623068669527897, "grad_norm": 0.431640625, "learning_rate": 4.883540842468207e-06, "loss": 2.0737, "step": 11614 }, { "epoch": 0.6231223175965666, "grad_norm": 0.40625, "learning_rate": 4.883514634080226e-06, "loss": 1.9345, "step": 11615 }, { "epoch": 0.623175965665236, "grad_norm": 0.453125, "learning_rate": 4.883488422813904e-06, "loss": 2.3478, "step": 11616 }, { "epoch": 0.6232296137339056, "grad_norm": 0.890625, "learning_rate": 4.883462208669274e-06, "loss": 2.1922, "step": 11617 }, { "epoch": 0.6232832618025751, "grad_norm": 0.44140625, "learning_rate": 4.883435991646364e-06, "loss": 2.1402, "step": 11618 }, { "epoch": 0.6233369098712447, "grad_norm": 0.43359375, "learning_rate": 4.883409771745209e-06, "loss": 2.3376, "step": 11619 }, { "epoch": 0.6233905579399142, "grad_norm": 0.345703125, "learning_rate": 4.883383548965838e-06, "loss": 2.0865, "step": 11620 }, { "epoch": 0.6234442060085837, "grad_norm": 0.69140625, "learning_rate": 4.883357323308285e-06, "loss": 2.1325, "step": 11621 }, { "epoch": 0.6234978540772532, "grad_norm": 0.458984375, "learning_rate": 4.883331094772581e-06, "loss": 2.2553, "step": 11622 }, { "epoch": 0.6235515021459227, "grad_norm": 0.47265625, "learning_rate": 4.883304863358757e-06, "loss": 2.4351, "step": 11623 }, { "epoch": 0.6236051502145923, "grad_norm": 0.453125, "learning_rate": 4.883278629066846e-06, "loss": 2.3908, "step": 11624 }, { "epoch": 0.6236587982832618, "grad_norm": 0.4453125, "learning_rate": 4.883252391896878e-06, "loss": 2.2645, "step": 11625 }, { "epoch": 0.6237124463519313, "grad_norm": 0.4765625, "learning_rate": 4.883226151848885e-06, "loss": 2.256, "step": 11626 }, { "epoch": 0.6237660944206008, "grad_norm": 0.4609375, "learning_rate": 4.883199908922901e-06, "loss": 2.1559, "step": 11627 }, { "epoch": 0.6238197424892704, "grad_norm": 0.51171875, "learning_rate": 4.883173663118954e-06, "loss": 2.2804, "step": 11628 }, { "epoch": 0.6238733905579399, "grad_norm": 0.431640625, "learning_rate": 4.8831474144370785e-06, "loss": 2.2828, "step": 11629 }, { "epoch": 0.6239270386266095, "grad_norm": 0.39453125, "learning_rate": 4.883121162877304e-06, "loss": 2.0707, "step": 11630 }, { "epoch": 0.623980686695279, "grad_norm": 0.466796875, "learning_rate": 4.883094908439665e-06, "loss": 2.3479, "step": 11631 }, { "epoch": 0.6240343347639485, "grad_norm": 0.451171875, "learning_rate": 4.883068651124192e-06, "loss": 2.4009, "step": 11632 }, { "epoch": 0.624087982832618, "grad_norm": 0.435546875, "learning_rate": 4.883042390930915e-06, "loss": 2.1793, "step": 11633 }, { "epoch": 0.6241416309012876, "grad_norm": 0.453125, "learning_rate": 4.8830161278598674e-06, "loss": 2.4986, "step": 11634 }, { "epoch": 0.6241952789699571, "grad_norm": 0.4296875, "learning_rate": 4.8829898619110805e-06, "loss": 2.3331, "step": 11635 }, { "epoch": 0.6242489270386266, "grad_norm": 0.474609375, "learning_rate": 4.8829635930845865e-06, "loss": 2.2527, "step": 11636 }, { "epoch": 0.6243025751072961, "grad_norm": 0.4609375, "learning_rate": 4.8829373213804165e-06, "loss": 2.1674, "step": 11637 }, { "epoch": 0.6243562231759656, "grad_norm": 0.470703125, "learning_rate": 4.882911046798603e-06, "loss": 2.4679, "step": 11638 }, { "epoch": 0.6244098712446352, "grad_norm": 0.474609375, "learning_rate": 4.882884769339176e-06, "loss": 2.1293, "step": 11639 }, { "epoch": 0.6244635193133047, "grad_norm": 0.41796875, "learning_rate": 4.882858489002169e-06, "loss": 2.2101, "step": 11640 }, { "epoch": 0.6245171673819743, "grad_norm": 0.41796875, "learning_rate": 4.8828322057876135e-06, "loss": 2.2677, "step": 11641 }, { "epoch": 0.6245708154506437, "grad_norm": 0.375, "learning_rate": 4.882805919695541e-06, "loss": 2.021, "step": 11642 }, { "epoch": 0.6246244635193133, "grad_norm": 0.43359375, "learning_rate": 4.882779630725983e-06, "loss": 1.9758, "step": 11643 }, { "epoch": 0.6246781115879828, "grad_norm": 0.423828125, "learning_rate": 4.88275333887897e-06, "loss": 2.3747, "step": 11644 }, { "epoch": 0.6247317596566524, "grad_norm": 0.51171875, "learning_rate": 4.882727044154537e-06, "loss": 2.451, "step": 11645 }, { "epoch": 0.6247854077253219, "grad_norm": 0.37109375, "learning_rate": 4.882700746552713e-06, "loss": 2.2596, "step": 11646 }, { "epoch": 0.6248390557939915, "grad_norm": 0.55078125, "learning_rate": 4.8826744460735315e-06, "loss": 2.2858, "step": 11647 }, { "epoch": 0.6248927038626609, "grad_norm": 0.62890625, "learning_rate": 4.882648142717023e-06, "loss": 2.2288, "step": 11648 }, { "epoch": 0.6249463519313305, "grad_norm": 0.388671875, "learning_rate": 4.882621836483219e-06, "loss": 2.1437, "step": 11649 }, { "epoch": 0.625, "grad_norm": 0.51953125, "learning_rate": 4.8825955273721524e-06, "loss": 2.4253, "step": 11650 }, { "epoch": 0.6250536480686695, "grad_norm": 0.49609375, "learning_rate": 4.8825692153838545e-06, "loss": 2.2878, "step": 11651 }, { "epoch": 0.6251072961373391, "grad_norm": 0.44921875, "learning_rate": 4.8825429005183575e-06, "loss": 2.4567, "step": 11652 }, { "epoch": 0.6251609442060085, "grad_norm": 0.4765625, "learning_rate": 4.882516582775693e-06, "loss": 2.4459, "step": 11653 }, { "epoch": 0.6252145922746781, "grad_norm": 0.490234375, "learning_rate": 4.882490262155891e-06, "loss": 2.6244, "step": 11654 }, { "epoch": 0.6252682403433476, "grad_norm": 0.451171875, "learning_rate": 4.882463938658985e-06, "loss": 2.4555, "step": 11655 }, { "epoch": 0.6253218884120172, "grad_norm": 0.419921875, "learning_rate": 4.8824376122850085e-06, "loss": 1.9677, "step": 11656 }, { "epoch": 0.6253755364806867, "grad_norm": 0.462890625, "learning_rate": 4.88241128303399e-06, "loss": 2.2968, "step": 11657 }, { "epoch": 0.6254291845493563, "grad_norm": 0.447265625, "learning_rate": 4.882384950905963e-06, "loss": 2.3199, "step": 11658 }, { "epoch": 0.6254828326180257, "grad_norm": 0.48828125, "learning_rate": 4.88235861590096e-06, "loss": 2.3846, "step": 11659 }, { "epoch": 0.6255364806866953, "grad_norm": 0.515625, "learning_rate": 4.882332278019009e-06, "loss": 2.4428, "step": 11660 }, { "epoch": 0.6255901287553648, "grad_norm": 0.42578125, "learning_rate": 4.882305937260148e-06, "loss": 2.2708, "step": 11661 }, { "epoch": 0.6256437768240344, "grad_norm": 0.38671875, "learning_rate": 4.882279593624404e-06, "loss": 2.1819, "step": 11662 }, { "epoch": 0.6256974248927039, "grad_norm": 0.70703125, "learning_rate": 4.8822532471118085e-06, "loss": 2.2352, "step": 11663 }, { "epoch": 0.6257510729613734, "grad_norm": 0.5234375, "learning_rate": 4.882226897722397e-06, "loss": 2.1836, "step": 11664 }, { "epoch": 0.6258047210300429, "grad_norm": 0.48046875, "learning_rate": 4.8822005454561994e-06, "loss": 2.2332, "step": 11665 }, { "epoch": 0.6258583690987124, "grad_norm": 0.486328125, "learning_rate": 4.8821741903132465e-06, "loss": 2.4031, "step": 11666 }, { "epoch": 0.625912017167382, "grad_norm": 0.4140625, "learning_rate": 4.882147832293572e-06, "loss": 2.3089, "step": 11667 }, { "epoch": 0.6259656652360515, "grad_norm": 0.431640625, "learning_rate": 4.882121471397207e-06, "loss": 2.1701, "step": 11668 }, { "epoch": 0.626019313304721, "grad_norm": 0.455078125, "learning_rate": 4.882095107624183e-06, "loss": 2.2405, "step": 11669 }, { "epoch": 0.6260729613733905, "grad_norm": 0.412109375, "learning_rate": 4.882068740974532e-06, "loss": 2.6176, "step": 11670 }, { "epoch": 0.6261266094420601, "grad_norm": 0.546875, "learning_rate": 4.882042371448285e-06, "loss": 2.3761, "step": 11671 }, { "epoch": 0.6261802575107296, "grad_norm": 0.478515625, "learning_rate": 4.882015999045475e-06, "loss": 2.3227, "step": 11672 }, { "epoch": 0.6262339055793992, "grad_norm": 0.357421875, "learning_rate": 4.881989623766135e-06, "loss": 2.0097, "step": 11673 }, { "epoch": 0.6262875536480687, "grad_norm": 0.484375, "learning_rate": 4.881963245610294e-06, "loss": 2.3109, "step": 11674 }, { "epoch": 0.6263412017167382, "grad_norm": 0.56640625, "learning_rate": 4.881936864577985e-06, "loss": 2.2023, "step": 11675 }, { "epoch": 0.6263948497854077, "grad_norm": 0.45703125, "learning_rate": 4.8819104806692404e-06, "loss": 2.4648, "step": 11676 }, { "epoch": 0.6264484978540773, "grad_norm": 0.5078125, "learning_rate": 4.881884093884093e-06, "loss": 2.2742, "step": 11677 }, { "epoch": 0.6265021459227468, "grad_norm": 0.51953125, "learning_rate": 4.881857704222572e-06, "loss": 2.2099, "step": 11678 }, { "epoch": 0.6265557939914163, "grad_norm": 0.419921875, "learning_rate": 4.881831311684712e-06, "loss": 2.1408, "step": 11679 }, { "epoch": 0.6266094420600858, "grad_norm": 0.3984375, "learning_rate": 4.8818049162705415e-06, "loss": 2.3515, "step": 11680 }, { "epoch": 0.6266630901287553, "grad_norm": 0.458984375, "learning_rate": 4.881778517980096e-06, "loss": 2.2079, "step": 11681 }, { "epoch": 0.6267167381974249, "grad_norm": 0.470703125, "learning_rate": 4.881752116813406e-06, "loss": 2.4768, "step": 11682 }, { "epoch": 0.6267703862660944, "grad_norm": 0.419921875, "learning_rate": 4.881725712770503e-06, "loss": 2.3192, "step": 11683 }, { "epoch": 0.626824034334764, "grad_norm": 1.2734375, "learning_rate": 4.881699305851419e-06, "loss": 2.4701, "step": 11684 }, { "epoch": 0.6268776824034334, "grad_norm": 0.9765625, "learning_rate": 4.8816728960561855e-06, "loss": 2.2907, "step": 11685 }, { "epoch": 0.626931330472103, "grad_norm": 0.357421875, "learning_rate": 4.881646483384835e-06, "loss": 1.8404, "step": 11686 }, { "epoch": 0.6269849785407725, "grad_norm": 0.3828125, "learning_rate": 4.8816200678374e-06, "loss": 2.414, "step": 11687 }, { "epoch": 0.6270386266094421, "grad_norm": 0.408203125, "learning_rate": 4.881593649413911e-06, "loss": 2.2011, "step": 11688 }, { "epoch": 0.6270922746781116, "grad_norm": 0.45703125, "learning_rate": 4.881567228114401e-06, "loss": 2.3232, "step": 11689 }, { "epoch": 0.6271459227467812, "grad_norm": 0.466796875, "learning_rate": 4.881540803938901e-06, "loss": 2.557, "step": 11690 }, { "epoch": 0.6271995708154506, "grad_norm": 0.416015625, "learning_rate": 4.881514376887444e-06, "loss": 2.2275, "step": 11691 }, { "epoch": 0.6272532188841202, "grad_norm": 0.392578125, "learning_rate": 4.881487946960061e-06, "loss": 2.1678, "step": 11692 }, { "epoch": 0.6273068669527897, "grad_norm": 0.50390625, "learning_rate": 4.881461514156784e-06, "loss": 2.4071, "step": 11693 }, { "epoch": 0.6273605150214592, "grad_norm": 0.47265625, "learning_rate": 4.881435078477645e-06, "loss": 1.8481, "step": 11694 }, { "epoch": 0.6274141630901288, "grad_norm": 0.48046875, "learning_rate": 4.8814086399226765e-06, "loss": 2.2289, "step": 11695 }, { "epoch": 0.6274678111587982, "grad_norm": 0.5703125, "learning_rate": 4.8813821984919106e-06, "loss": 2.3475, "step": 11696 }, { "epoch": 0.6275214592274678, "grad_norm": 0.52734375, "learning_rate": 4.881355754185378e-06, "loss": 1.4403, "step": 11697 }, { "epoch": 0.6275751072961373, "grad_norm": 0.54296875, "learning_rate": 4.881329307003111e-06, "loss": 2.3968, "step": 11698 }, { "epoch": 0.6276287553648069, "grad_norm": 0.421875, "learning_rate": 4.881302856945143e-06, "loss": 2.058, "step": 11699 }, { "epoch": 0.6276824034334764, "grad_norm": 0.4921875, "learning_rate": 4.881276404011503e-06, "loss": 1.6995, "step": 11700 }, { "epoch": 0.627736051502146, "grad_norm": 0.72265625, "learning_rate": 4.881249948202226e-06, "loss": 2.3219, "step": 11701 }, { "epoch": 0.6277896995708154, "grad_norm": 0.52734375, "learning_rate": 4.881223489517341e-06, "loss": 2.4404, "step": 11702 }, { "epoch": 0.627843347639485, "grad_norm": 0.40234375, "learning_rate": 4.881197027956883e-06, "loss": 2.2833, "step": 11703 }, { "epoch": 0.6278969957081545, "grad_norm": 0.44921875, "learning_rate": 4.881170563520883e-06, "loss": 2.2556, "step": 11704 }, { "epoch": 0.6279506437768241, "grad_norm": 0.4296875, "learning_rate": 4.881144096209372e-06, "loss": 2.1451, "step": 11705 }, { "epoch": 0.6280042918454936, "grad_norm": 0.4921875, "learning_rate": 4.881117626022382e-06, "loss": 2.5437, "step": 11706 }, { "epoch": 0.6280579399141631, "grad_norm": 0.447265625, "learning_rate": 4.881091152959946e-06, "loss": 2.1739, "step": 11707 }, { "epoch": 0.6281115879828326, "grad_norm": 0.46875, "learning_rate": 4.881064677022095e-06, "loss": 1.8445, "step": 11708 }, { "epoch": 0.6281652360515021, "grad_norm": 0.4296875, "learning_rate": 4.881038198208861e-06, "loss": 2.3074, "step": 11709 }, { "epoch": 0.6282188841201717, "grad_norm": 0.390625, "learning_rate": 4.8810117165202765e-06, "loss": 2.3671, "step": 11710 }, { "epoch": 0.6282725321888412, "grad_norm": 0.46875, "learning_rate": 4.8809852319563746e-06, "loss": 2.2994, "step": 11711 }, { "epoch": 0.6283261802575107, "grad_norm": 0.427734375, "learning_rate": 4.8809587445171845e-06, "loss": 2.091, "step": 11712 }, { "epoch": 0.6283798283261802, "grad_norm": 0.46484375, "learning_rate": 4.88093225420274e-06, "loss": 2.3575, "step": 11713 }, { "epoch": 0.6284334763948498, "grad_norm": 0.453125, "learning_rate": 4.880905761013073e-06, "loss": 2.2847, "step": 11714 }, { "epoch": 0.6284871244635193, "grad_norm": 0.43359375, "learning_rate": 4.880879264948216e-06, "loss": 2.2354, "step": 11715 }, { "epoch": 0.6285407725321889, "grad_norm": 0.61328125, "learning_rate": 4.880852766008199e-06, "loss": 2.4198, "step": 11716 }, { "epoch": 0.6285944206008584, "grad_norm": 0.43359375, "learning_rate": 4.880826264193056e-06, "loss": 2.4352, "step": 11717 }, { "epoch": 0.6286480686695279, "grad_norm": 0.478515625, "learning_rate": 4.880799759502819e-06, "loss": 1.8205, "step": 11718 }, { "epoch": 0.6287017167381974, "grad_norm": 0.482421875, "learning_rate": 4.880773251937518e-06, "loss": 2.2127, "step": 11719 }, { "epoch": 0.628755364806867, "grad_norm": 0.462890625, "learning_rate": 4.880746741497187e-06, "loss": 2.4817, "step": 11720 }, { "epoch": 0.6288090128755365, "grad_norm": 0.44140625, "learning_rate": 4.880720228181857e-06, "loss": 2.1723, "step": 11721 }, { "epoch": 0.628862660944206, "grad_norm": 0.458984375, "learning_rate": 4.88069371199156e-06, "loss": 2.2111, "step": 11722 }, { "epoch": 0.6289163090128755, "grad_norm": 2.015625, "learning_rate": 4.880667192926328e-06, "loss": 2.2679, "step": 11723 }, { "epoch": 0.628969957081545, "grad_norm": 0.376953125, "learning_rate": 4.880640670986194e-06, "loss": 2.2098, "step": 11724 }, { "epoch": 0.6290236051502146, "grad_norm": 0.40625, "learning_rate": 4.88061414617119e-06, "loss": 2.2554, "step": 11725 }, { "epoch": 0.6290772532188841, "grad_norm": 0.484375, "learning_rate": 4.880587618481347e-06, "loss": 2.3417, "step": 11726 }, { "epoch": 0.6291309012875537, "grad_norm": 0.453125, "learning_rate": 4.8805610879166975e-06, "loss": 2.4953, "step": 11727 }, { "epoch": 0.6291845493562231, "grad_norm": 0.4453125, "learning_rate": 4.880534554477273e-06, "loss": 2.3051, "step": 11728 }, { "epoch": 0.6292381974248927, "grad_norm": 0.609375, "learning_rate": 4.880508018163107e-06, "loss": 2.3779, "step": 11729 }, { "epoch": 0.6292918454935622, "grad_norm": 0.44140625, "learning_rate": 4.88048147897423e-06, "loss": 2.1911, "step": 11730 }, { "epoch": 0.6293454935622318, "grad_norm": 0.59375, "learning_rate": 4.880454936910675e-06, "loss": 2.2581, "step": 11731 }, { "epoch": 0.6293991416309013, "grad_norm": 0.53515625, "learning_rate": 4.8804283919724735e-06, "loss": 2.3569, "step": 11732 }, { "epoch": 0.6294527896995709, "grad_norm": 0.443359375, "learning_rate": 4.880401844159658e-06, "loss": 1.6709, "step": 11733 }, { "epoch": 0.6295064377682403, "grad_norm": 0.4375, "learning_rate": 4.880375293472259e-06, "loss": 1.9374, "step": 11734 }, { "epoch": 0.6295600858369099, "grad_norm": 0.435546875, "learning_rate": 4.880348739910311e-06, "loss": 1.6321, "step": 11735 }, { "epoch": 0.6296137339055794, "grad_norm": 0.388671875, "learning_rate": 4.880322183473845e-06, "loss": 2.2682, "step": 11736 }, { "epoch": 0.6296673819742489, "grad_norm": 0.44921875, "learning_rate": 4.880295624162893e-06, "loss": 2.3649, "step": 11737 }, { "epoch": 0.6297210300429185, "grad_norm": 0.48828125, "learning_rate": 4.880269061977487e-06, "loss": 2.2683, "step": 11738 }, { "epoch": 0.6297746781115879, "grad_norm": 0.4765625, "learning_rate": 4.880242496917659e-06, "loss": 2.2554, "step": 11739 }, { "epoch": 0.6298283261802575, "grad_norm": 0.404296875, "learning_rate": 4.880215928983441e-06, "loss": 2.1311, "step": 11740 }, { "epoch": 0.629881974248927, "grad_norm": 0.44140625, "learning_rate": 4.880189358174866e-06, "loss": 2.2048, "step": 11741 }, { "epoch": 0.6299356223175966, "grad_norm": 0.6484375, "learning_rate": 4.880162784491965e-06, "loss": 2.2803, "step": 11742 }, { "epoch": 0.6299892703862661, "grad_norm": 0.51171875, "learning_rate": 4.88013620793477e-06, "loss": 2.1665, "step": 11743 }, { "epoch": 0.6300429184549357, "grad_norm": 0.423828125, "learning_rate": 4.8801096285033145e-06, "loss": 2.3927, "step": 11744 }, { "epoch": 0.6300965665236051, "grad_norm": 0.39453125, "learning_rate": 4.8800830461976295e-06, "loss": 2.2507, "step": 11745 }, { "epoch": 0.6301502145922747, "grad_norm": 0.5546875, "learning_rate": 4.880056461017747e-06, "loss": 2.2628, "step": 11746 }, { "epoch": 0.6302038626609442, "grad_norm": 0.490234375, "learning_rate": 4.8800298729637e-06, "loss": 2.3561, "step": 11747 }, { "epoch": 0.6302575107296138, "grad_norm": 0.4375, "learning_rate": 4.880003282035519e-06, "loss": 1.9534, "step": 11748 }, { "epoch": 0.6303111587982833, "grad_norm": 0.515625, "learning_rate": 4.879976688233238e-06, "loss": 2.5674, "step": 11749 }, { "epoch": 0.6303648068669528, "grad_norm": 0.470703125, "learning_rate": 4.879950091556888e-06, "loss": 2.2503, "step": 11750 }, { "epoch": 0.6304184549356223, "grad_norm": 0.35546875, "learning_rate": 4.879923492006501e-06, "loss": 2.0907, "step": 11751 }, { "epoch": 0.6304721030042918, "grad_norm": 0.494140625, "learning_rate": 4.8798968895821095e-06, "loss": 2.2917, "step": 11752 }, { "epoch": 0.6305257510729614, "grad_norm": 0.37890625, "learning_rate": 4.879870284283746e-06, "loss": 1.9699, "step": 11753 }, { "epoch": 0.6305793991416309, "grad_norm": 0.439453125, "learning_rate": 4.879843676111442e-06, "loss": 2.4737, "step": 11754 }, { "epoch": 0.6306330472103004, "grad_norm": 0.4609375, "learning_rate": 4.87981706506523e-06, "loss": 2.1372, "step": 11755 }, { "epoch": 0.6306866952789699, "grad_norm": 0.515625, "learning_rate": 4.879790451145141e-06, "loss": 2.4914, "step": 11756 }, { "epoch": 0.6307403433476395, "grad_norm": 0.455078125, "learning_rate": 4.87976383435121e-06, "loss": 2.4188, "step": 11757 }, { "epoch": 0.630793991416309, "grad_norm": 0.44921875, "learning_rate": 4.8797372146834655e-06, "loss": 2.3233, "step": 11758 }, { "epoch": 0.6308476394849786, "grad_norm": 0.427734375, "learning_rate": 4.879710592141943e-06, "loss": 2.5221, "step": 11759 }, { "epoch": 0.630901287553648, "grad_norm": 0.388671875, "learning_rate": 4.879683966726672e-06, "loss": 2.0214, "step": 11760 }, { "epoch": 0.6309549356223176, "grad_norm": 0.4375, "learning_rate": 4.879657338437686e-06, "loss": 2.1715, "step": 11761 }, { "epoch": 0.6310085836909871, "grad_norm": 0.53125, "learning_rate": 4.879630707275017e-06, "loss": 2.2904, "step": 11762 }, { "epoch": 0.6310622317596567, "grad_norm": 0.50390625, "learning_rate": 4.879604073238697e-06, "loss": 2.1998, "step": 11763 }, { "epoch": 0.6311158798283262, "grad_norm": 0.462890625, "learning_rate": 4.879577436328758e-06, "loss": 2.1842, "step": 11764 }, { "epoch": 0.6311695278969958, "grad_norm": 0.359375, "learning_rate": 4.879550796545233e-06, "loss": 2.0206, "step": 11765 }, { "epoch": 0.6312231759656652, "grad_norm": 0.470703125, "learning_rate": 4.8795241538881525e-06, "loss": 2.4145, "step": 11766 }, { "epoch": 0.6312768240343347, "grad_norm": 0.51953125, "learning_rate": 4.87949750835755e-06, "loss": 2.2013, "step": 11767 }, { "epoch": 0.6313304721030043, "grad_norm": 0.474609375, "learning_rate": 4.879470859953458e-06, "loss": 2.2615, "step": 11768 }, { "epoch": 0.6313841201716738, "grad_norm": 0.4375, "learning_rate": 4.879444208675907e-06, "loss": 2.6502, "step": 11769 }, { "epoch": 0.6314377682403434, "grad_norm": 0.416015625, "learning_rate": 4.879417554524931e-06, "loss": 2.3578, "step": 11770 }, { "epoch": 0.6314914163090128, "grad_norm": 0.474609375, "learning_rate": 4.879390897500562e-06, "loss": 2.5694, "step": 11771 }, { "epoch": 0.6315450643776824, "grad_norm": 0.7421875, "learning_rate": 4.879364237602831e-06, "loss": 2.1856, "step": 11772 }, { "epoch": 0.6315987124463519, "grad_norm": 0.443359375, "learning_rate": 4.87933757483177e-06, "loss": 2.173, "step": 11773 }, { "epoch": 0.6316523605150215, "grad_norm": 0.443359375, "learning_rate": 4.879310909187413e-06, "loss": 2.3273, "step": 11774 }, { "epoch": 0.631706008583691, "grad_norm": 0.43359375, "learning_rate": 4.879284240669792e-06, "loss": 2.3673, "step": 11775 }, { "epoch": 0.6317596566523606, "grad_norm": 0.45703125, "learning_rate": 4.879257569278937e-06, "loss": 2.4583, "step": 11776 }, { "epoch": 0.63181330472103, "grad_norm": 0.455078125, "learning_rate": 4.879230895014883e-06, "loss": 2.2747, "step": 11777 }, { "epoch": 0.6318669527896996, "grad_norm": 0.38671875, "learning_rate": 4.87920421787766e-06, "loss": 2.4013, "step": 11778 }, { "epoch": 0.6319206008583691, "grad_norm": 0.54296875, "learning_rate": 4.8791775378673016e-06, "loss": 2.3647, "step": 11779 }, { "epoch": 0.6319742489270386, "grad_norm": 0.98046875, "learning_rate": 4.8791508549838385e-06, "loss": 2.457, "step": 11780 }, { "epoch": 0.6320278969957082, "grad_norm": 1.2265625, "learning_rate": 4.879124169227304e-06, "loss": 2.4625, "step": 11781 }, { "epoch": 0.6320815450643776, "grad_norm": 0.5234375, "learning_rate": 4.879097480597731e-06, "loss": 2.3734, "step": 11782 }, { "epoch": 0.6321351931330472, "grad_norm": 0.5703125, "learning_rate": 4.879070789095151e-06, "loss": 2.1941, "step": 11783 }, { "epoch": 0.6321888412017167, "grad_norm": 0.38671875, "learning_rate": 4.879044094719595e-06, "loss": 2.4067, "step": 11784 }, { "epoch": 0.6322424892703863, "grad_norm": 0.462890625, "learning_rate": 4.879017397471097e-06, "loss": 2.3288, "step": 11785 }, { "epoch": 0.6322961373390558, "grad_norm": 0.470703125, "learning_rate": 4.878990697349689e-06, "loss": 2.2667, "step": 11786 }, { "epoch": 0.6323497854077254, "grad_norm": 0.447265625, "learning_rate": 4.878963994355404e-06, "loss": 2.3711, "step": 11787 }, { "epoch": 0.6324034334763948, "grad_norm": 0.375, "learning_rate": 4.878937288488271e-06, "loss": 2.1579, "step": 11788 }, { "epoch": 0.6324570815450644, "grad_norm": 0.515625, "learning_rate": 4.878910579748326e-06, "loss": 2.5605, "step": 11789 }, { "epoch": 0.6325107296137339, "grad_norm": 0.5234375, "learning_rate": 4.8788838681355985e-06, "loss": 2.3089, "step": 11790 }, { "epoch": 0.6325643776824035, "grad_norm": 0.921875, "learning_rate": 4.878857153650123e-06, "loss": 2.2065, "step": 11791 }, { "epoch": 0.632618025751073, "grad_norm": 0.44921875, "learning_rate": 4.87883043629193e-06, "loss": 2.1796, "step": 11792 }, { "epoch": 0.6326716738197425, "grad_norm": 0.408203125, "learning_rate": 4.8788037160610526e-06, "loss": 2.1618, "step": 11793 }, { "epoch": 0.632725321888412, "grad_norm": 0.412109375, "learning_rate": 4.8787769929575235e-06, "loss": 2.0211, "step": 11794 }, { "epoch": 0.6327789699570815, "grad_norm": 0.48828125, "learning_rate": 4.878750266981374e-06, "loss": 2.4827, "step": 11795 }, { "epoch": 0.6328326180257511, "grad_norm": 0.419921875, "learning_rate": 4.878723538132636e-06, "loss": 2.2533, "step": 11796 }, { "epoch": 0.6328862660944206, "grad_norm": 0.474609375, "learning_rate": 4.878696806411344e-06, "loss": 2.3723, "step": 11797 }, { "epoch": 0.6329399141630901, "grad_norm": 0.392578125, "learning_rate": 4.878670071817527e-06, "loss": 1.9956, "step": 11798 }, { "epoch": 0.6329935622317596, "grad_norm": 0.55859375, "learning_rate": 4.8786433343512206e-06, "loss": 1.5947, "step": 11799 }, { "epoch": 0.6330472103004292, "grad_norm": 0.53515625, "learning_rate": 4.878616594012456e-06, "loss": 2.4018, "step": 11800 }, { "epoch": 0.6331008583690987, "grad_norm": 1.2421875, "learning_rate": 4.878589850801264e-06, "loss": 2.3683, "step": 11801 }, { "epoch": 0.6331545064377683, "grad_norm": 0.44921875, "learning_rate": 4.878563104717678e-06, "loss": 2.246, "step": 11802 }, { "epoch": 0.6332081545064377, "grad_norm": 0.4453125, "learning_rate": 4.878536355761731e-06, "loss": 2.447, "step": 11803 }, { "epoch": 0.6332618025751073, "grad_norm": 0.427734375, "learning_rate": 4.878509603933453e-06, "loss": 2.2863, "step": 11804 }, { "epoch": 0.6333154506437768, "grad_norm": 0.45703125, "learning_rate": 4.878482849232879e-06, "loss": 2.4683, "step": 11805 }, { "epoch": 0.6333690987124464, "grad_norm": 0.44921875, "learning_rate": 4.8784560916600394e-06, "loss": 2.2012, "step": 11806 }, { "epoch": 0.6334227467811159, "grad_norm": 0.62109375, "learning_rate": 4.878429331214969e-06, "loss": 2.4155, "step": 11807 }, { "epoch": 0.6334763948497855, "grad_norm": 0.482421875, "learning_rate": 4.878402567897698e-06, "loss": 2.3274, "step": 11808 }, { "epoch": 0.6335300429184549, "grad_norm": 0.53125, "learning_rate": 4.878375801708258e-06, "loss": 1.9642, "step": 11809 }, { "epoch": 0.6335836909871244, "grad_norm": 0.4453125, "learning_rate": 4.878349032646683e-06, "loss": 2.4588, "step": 11810 }, { "epoch": 0.633637339055794, "grad_norm": 0.41796875, "learning_rate": 4.878322260713005e-06, "loss": 2.4855, "step": 11811 }, { "epoch": 0.6336909871244635, "grad_norm": 0.462890625, "learning_rate": 4.878295485907256e-06, "loss": 2.0803, "step": 11812 }, { "epoch": 0.6337446351931331, "grad_norm": 0.5234375, "learning_rate": 4.878268708229468e-06, "loss": 2.126, "step": 11813 }, { "epoch": 0.6337982832618025, "grad_norm": 0.41796875, "learning_rate": 4.878241927679675e-06, "loss": 2.2381, "step": 11814 }, { "epoch": 0.6338519313304721, "grad_norm": 0.42578125, "learning_rate": 4.878215144257907e-06, "loss": 1.6145, "step": 11815 }, { "epoch": 0.6339055793991416, "grad_norm": 0.396484375, "learning_rate": 4.878188357964197e-06, "loss": 2.2135, "step": 11816 }, { "epoch": 0.6339592274678112, "grad_norm": 0.39453125, "learning_rate": 4.87816156879858e-06, "loss": 2.3639, "step": 11817 }, { "epoch": 0.6340128755364807, "grad_norm": 0.41796875, "learning_rate": 4.878134776761084e-06, "loss": 2.1797, "step": 11818 }, { "epoch": 0.6340665236051503, "grad_norm": 0.396484375, "learning_rate": 4.878107981851745e-06, "loss": 2.1324, "step": 11819 }, { "epoch": 0.6341201716738197, "grad_norm": 0.423828125, "learning_rate": 4.878081184070593e-06, "loss": 2.2006, "step": 11820 }, { "epoch": 0.6341738197424893, "grad_norm": 0.48046875, "learning_rate": 4.878054383417662e-06, "loss": 2.1313, "step": 11821 }, { "epoch": 0.6342274678111588, "grad_norm": 0.41796875, "learning_rate": 4.8780275798929835e-06, "loss": 2.3998, "step": 11822 }, { "epoch": 0.6342811158798283, "grad_norm": 0.455078125, "learning_rate": 4.878000773496589e-06, "loss": 2.2859, "step": 11823 }, { "epoch": 0.6343347639484979, "grad_norm": 0.43359375, "learning_rate": 4.877973964228513e-06, "loss": 1.9177, "step": 11824 }, { "epoch": 0.6343884120171673, "grad_norm": 0.486328125, "learning_rate": 4.8779471520887854e-06, "loss": 2.2461, "step": 11825 }, { "epoch": 0.6344420600858369, "grad_norm": 0.49609375, "learning_rate": 4.877920337077441e-06, "loss": 2.1263, "step": 11826 }, { "epoch": 0.6344957081545064, "grad_norm": 0.41015625, "learning_rate": 4.877893519194511e-06, "loss": 2.2492, "step": 11827 }, { "epoch": 0.634549356223176, "grad_norm": 0.67578125, "learning_rate": 4.877866698440028e-06, "loss": 2.3071, "step": 11828 }, { "epoch": 0.6346030042918455, "grad_norm": 0.4296875, "learning_rate": 4.877839874814024e-06, "loss": 2.1393, "step": 11829 }, { "epoch": 0.634656652360515, "grad_norm": 0.65625, "learning_rate": 4.877813048316531e-06, "loss": 2.3267, "step": 11830 }, { "epoch": 0.6347103004291845, "grad_norm": 0.546875, "learning_rate": 4.877786218947583e-06, "loss": 2.4507, "step": 11831 }, { "epoch": 0.6347639484978541, "grad_norm": 0.5, "learning_rate": 4.877759386707211e-06, "loss": 2.3006, "step": 11832 }, { "epoch": 0.6348175965665236, "grad_norm": 0.466796875, "learning_rate": 4.877732551595448e-06, "loss": 2.33, "step": 11833 }, { "epoch": 0.6348712446351932, "grad_norm": 0.404296875, "learning_rate": 4.877705713612327e-06, "loss": 2.3092, "step": 11834 }, { "epoch": 0.6349248927038627, "grad_norm": 0.443359375, "learning_rate": 4.877678872757878e-06, "loss": 2.4515, "step": 11835 }, { "epoch": 0.6349785407725322, "grad_norm": 0.447265625, "learning_rate": 4.877652029032136e-06, "loss": 2.3205, "step": 11836 }, { "epoch": 0.6350321888412017, "grad_norm": 0.443359375, "learning_rate": 4.8776251824351324e-06, "loss": 2.0317, "step": 11837 }, { "epoch": 0.6350858369098712, "grad_norm": 0.4140625, "learning_rate": 4.8775983329669e-06, "loss": 2.2372, "step": 11838 }, { "epoch": 0.6351394849785408, "grad_norm": 0.421875, "learning_rate": 4.877571480627471e-06, "loss": 2.1135, "step": 11839 }, { "epoch": 0.6351931330472103, "grad_norm": 0.49609375, "learning_rate": 4.877544625416877e-06, "loss": 2.4208, "step": 11840 }, { "epoch": 0.6352467811158798, "grad_norm": 0.44140625, "learning_rate": 4.877517767335152e-06, "loss": 2.2109, "step": 11841 }, { "epoch": 0.6353004291845493, "grad_norm": 0.4453125, "learning_rate": 4.877490906382327e-06, "loss": 2.3259, "step": 11842 }, { "epoch": 0.6353540772532189, "grad_norm": 0.451171875, "learning_rate": 4.877464042558435e-06, "loss": 2.2421, "step": 11843 }, { "epoch": 0.6354077253218884, "grad_norm": 0.6015625, "learning_rate": 4.877437175863509e-06, "loss": 2.4803, "step": 11844 }, { "epoch": 0.635461373390558, "grad_norm": 0.458984375, "learning_rate": 4.877410306297582e-06, "loss": 2.0452, "step": 11845 }, { "epoch": 0.6355150214592274, "grad_norm": 0.4453125, "learning_rate": 4.877383433860684e-06, "loss": 2.3705, "step": 11846 }, { "epoch": 0.635568669527897, "grad_norm": 0.49609375, "learning_rate": 4.877356558552849e-06, "loss": 2.466, "step": 11847 }, { "epoch": 0.6356223175965665, "grad_norm": 0.451171875, "learning_rate": 4.87732968037411e-06, "loss": 1.8462, "step": 11848 }, { "epoch": 0.6356759656652361, "grad_norm": 0.4609375, "learning_rate": 4.877302799324498e-06, "loss": 2.3517, "step": 11849 }, { "epoch": 0.6357296137339056, "grad_norm": 0.486328125, "learning_rate": 4.8772759154040464e-06, "loss": 2.358, "step": 11850 }, { "epoch": 0.6357832618025752, "grad_norm": 0.40625, "learning_rate": 4.877249028612787e-06, "loss": 2.1162, "step": 11851 }, { "epoch": 0.6358369098712446, "grad_norm": 0.416015625, "learning_rate": 4.877222138950753e-06, "loss": 2.185, "step": 11852 }, { "epoch": 0.6358905579399141, "grad_norm": 0.44140625, "learning_rate": 4.877195246417978e-06, "loss": 2.1345, "step": 11853 }, { "epoch": 0.6359442060085837, "grad_norm": 0.482421875, "learning_rate": 4.877168351014492e-06, "loss": 2.4815, "step": 11854 }, { "epoch": 0.6359978540772532, "grad_norm": 0.451171875, "learning_rate": 4.877141452740328e-06, "loss": 2.3892, "step": 11855 }, { "epoch": 0.6360515021459228, "grad_norm": 0.47265625, "learning_rate": 4.87711455159552e-06, "loss": 2.3608, "step": 11856 }, { "epoch": 0.6361051502145922, "grad_norm": 0.4765625, "learning_rate": 4.8770876475801e-06, "loss": 2.1978, "step": 11857 }, { "epoch": 0.6361587982832618, "grad_norm": 0.423828125, "learning_rate": 4.877060740694099e-06, "loss": 2.0492, "step": 11858 }, { "epoch": 0.6362124463519313, "grad_norm": 0.41796875, "learning_rate": 4.877033830937551e-06, "loss": 2.349, "step": 11859 }, { "epoch": 0.6362660944206009, "grad_norm": 0.609375, "learning_rate": 4.8770069183104875e-06, "loss": 2.3584, "step": 11860 }, { "epoch": 0.6363197424892704, "grad_norm": 0.53515625, "learning_rate": 4.8769800028129426e-06, "loss": 2.3116, "step": 11861 }, { "epoch": 0.63637339055794, "grad_norm": 0.37890625, "learning_rate": 4.876953084444946e-06, "loss": 2.0673, "step": 11862 }, { "epoch": 0.6364270386266094, "grad_norm": 0.45703125, "learning_rate": 4.876926163206534e-06, "loss": 2.3335, "step": 11863 }, { "epoch": 0.636480686695279, "grad_norm": 0.96875, "learning_rate": 4.876899239097735e-06, "loss": 1.5406, "step": 11864 }, { "epoch": 0.6365343347639485, "grad_norm": 0.44921875, "learning_rate": 4.876872312118586e-06, "loss": 2.1918, "step": 11865 }, { "epoch": 0.636587982832618, "grad_norm": 0.40234375, "learning_rate": 4.876845382269115e-06, "loss": 2.0408, "step": 11866 }, { "epoch": 0.6366416309012876, "grad_norm": 0.48828125, "learning_rate": 4.8768184495493576e-06, "loss": 2.4202, "step": 11867 }, { "epoch": 0.636695278969957, "grad_norm": 0.447265625, "learning_rate": 4.876791513959345e-06, "loss": 2.261, "step": 11868 }, { "epoch": 0.6367489270386266, "grad_norm": 0.466796875, "learning_rate": 4.87676457549911e-06, "loss": 2.3267, "step": 11869 }, { "epoch": 0.6368025751072961, "grad_norm": 0.4296875, "learning_rate": 4.876737634168686e-06, "loss": 2.2486, "step": 11870 }, { "epoch": 0.6368562231759657, "grad_norm": 0.48828125, "learning_rate": 4.876710689968104e-06, "loss": 2.2177, "step": 11871 }, { "epoch": 0.6369098712446352, "grad_norm": 0.498046875, "learning_rate": 4.876683742897397e-06, "loss": 2.3522, "step": 11872 }, { "epoch": 0.6369635193133047, "grad_norm": 0.421875, "learning_rate": 4.8766567929565986e-06, "loss": 2.3133, "step": 11873 }, { "epoch": 0.6370171673819742, "grad_norm": 0.45703125, "learning_rate": 4.87662984014574e-06, "loss": 2.1171, "step": 11874 }, { "epoch": 0.6370708154506438, "grad_norm": 0.42578125, "learning_rate": 4.876602884464854e-06, "loss": 2.2438, "step": 11875 }, { "epoch": 0.6371244635193133, "grad_norm": 0.41015625, "learning_rate": 4.876575925913975e-06, "loss": 2.1847, "step": 11876 }, { "epoch": 0.6371781115879829, "grad_norm": 0.447265625, "learning_rate": 4.8765489644931315e-06, "loss": 2.3364, "step": 11877 }, { "epoch": 0.6372317596566524, "grad_norm": 0.443359375, "learning_rate": 4.87652200020236e-06, "loss": 1.1997, "step": 11878 }, { "epoch": 0.6372854077253219, "grad_norm": 0.486328125, "learning_rate": 4.876495033041692e-06, "loss": 2.2854, "step": 11879 }, { "epoch": 0.6373390557939914, "grad_norm": 0.40234375, "learning_rate": 4.876468063011159e-06, "loss": 2.2361, "step": 11880 }, { "epoch": 0.6373927038626609, "grad_norm": 0.4296875, "learning_rate": 4.876441090110795e-06, "loss": 2.4235, "step": 11881 }, { "epoch": 0.6374463519313305, "grad_norm": 0.419921875, "learning_rate": 4.876414114340631e-06, "loss": 2.2496, "step": 11882 }, { "epoch": 0.6375, "grad_norm": 0.58984375, "learning_rate": 4.876387135700701e-06, "loss": 2.519, "step": 11883 }, { "epoch": 0.6375536480686695, "grad_norm": 0.44140625, "learning_rate": 4.876360154191037e-06, "loss": 2.1286, "step": 11884 }, { "epoch": 0.637607296137339, "grad_norm": 0.3828125, "learning_rate": 4.876333169811671e-06, "loss": 2.24, "step": 11885 }, { "epoch": 0.6376609442060086, "grad_norm": 0.4375, "learning_rate": 4.8763061825626365e-06, "loss": 2.2204, "step": 11886 }, { "epoch": 0.6377145922746781, "grad_norm": 0.43359375, "learning_rate": 4.876279192443966e-06, "loss": 2.4267, "step": 11887 }, { "epoch": 0.6377682403433477, "grad_norm": 0.33203125, "learning_rate": 4.876252199455691e-06, "loss": 1.9296, "step": 11888 }, { "epoch": 0.6378218884120171, "grad_norm": 0.74609375, "learning_rate": 4.876225203597845e-06, "loss": 2.251, "step": 11889 }, { "epoch": 0.6378755364806867, "grad_norm": 0.49609375, "learning_rate": 4.876198204870462e-06, "loss": 1.9477, "step": 11890 }, { "epoch": 0.6379291845493562, "grad_norm": 0.4140625, "learning_rate": 4.876171203273572e-06, "loss": 2.3304, "step": 11891 }, { "epoch": 0.6379828326180258, "grad_norm": 0.453125, "learning_rate": 4.876144198807209e-06, "loss": 2.2492, "step": 11892 }, { "epoch": 0.6380364806866953, "grad_norm": 0.3984375, "learning_rate": 4.876117191471405e-06, "loss": 2.4401, "step": 11893 }, { "epoch": 0.6380901287553649, "grad_norm": 0.451171875, "learning_rate": 4.876090181266193e-06, "loss": 2.3311, "step": 11894 }, { "epoch": 0.6381437768240343, "grad_norm": 0.890625, "learning_rate": 4.876063168191606e-06, "loss": 2.2862, "step": 11895 }, { "epoch": 0.6381974248927038, "grad_norm": 0.484375, "learning_rate": 4.876036152247676e-06, "loss": 2.19, "step": 11896 }, { "epoch": 0.6382510729613734, "grad_norm": 0.423828125, "learning_rate": 4.876009133434436e-06, "loss": 2.3617, "step": 11897 }, { "epoch": 0.6383047210300429, "grad_norm": 0.44140625, "learning_rate": 4.875982111751918e-06, "loss": 2.2852, "step": 11898 }, { "epoch": 0.6383583690987125, "grad_norm": 0.41796875, "learning_rate": 4.875955087200156e-06, "loss": 2.3747, "step": 11899 }, { "epoch": 0.6384120171673819, "grad_norm": 0.484375, "learning_rate": 4.875928059779181e-06, "loss": 2.2046, "step": 11900 }, { "epoch": 0.6384656652360515, "grad_norm": 0.482421875, "learning_rate": 4.875901029489027e-06, "loss": 2.4334, "step": 11901 }, { "epoch": 0.638519313304721, "grad_norm": 0.734375, "learning_rate": 4.875873996329726e-06, "loss": 2.4366, "step": 11902 }, { "epoch": 0.6385729613733906, "grad_norm": 0.42578125, "learning_rate": 4.875846960301309e-06, "loss": 2.3238, "step": 11903 }, { "epoch": 0.6386266094420601, "grad_norm": 0.373046875, "learning_rate": 4.8758199214038125e-06, "loss": 2.2032, "step": 11904 }, { "epoch": 0.6386802575107297, "grad_norm": 0.45703125, "learning_rate": 4.875792879637265e-06, "loss": 2.4571, "step": 11905 }, { "epoch": 0.6387339055793991, "grad_norm": 0.4609375, "learning_rate": 4.875765835001703e-06, "loss": 2.3898, "step": 11906 }, { "epoch": 0.6387875536480687, "grad_norm": 0.45703125, "learning_rate": 4.875738787497157e-06, "loss": 2.2991, "step": 11907 }, { "epoch": 0.6388412017167382, "grad_norm": 0.458984375, "learning_rate": 4.875711737123659e-06, "loss": 2.1931, "step": 11908 }, { "epoch": 0.6388948497854077, "grad_norm": 0.4765625, "learning_rate": 4.875684683881242e-06, "loss": 2.4827, "step": 11909 }, { "epoch": 0.6389484978540773, "grad_norm": 0.52734375, "learning_rate": 4.8756576277699406e-06, "loss": 2.0544, "step": 11910 }, { "epoch": 0.6390021459227467, "grad_norm": 0.400390625, "learning_rate": 4.875630568789786e-06, "loss": 2.3204, "step": 11911 }, { "epoch": 0.6390557939914163, "grad_norm": 0.40625, "learning_rate": 4.875603506940811e-06, "loss": 2.3467, "step": 11912 }, { "epoch": 0.6391094420600858, "grad_norm": 0.462890625, "learning_rate": 4.875576442223048e-06, "loss": 2.3393, "step": 11913 }, { "epoch": 0.6391630901287554, "grad_norm": 0.46484375, "learning_rate": 4.87554937463653e-06, "loss": 2.6052, "step": 11914 }, { "epoch": 0.6392167381974249, "grad_norm": 0.3984375, "learning_rate": 4.875522304181289e-06, "loss": 2.3061, "step": 11915 }, { "epoch": 0.6392703862660944, "grad_norm": 0.466796875, "learning_rate": 4.87549523085736e-06, "loss": 2.2742, "step": 11916 }, { "epoch": 0.6393240343347639, "grad_norm": 0.478515625, "learning_rate": 4.875468154664773e-06, "loss": 2.1077, "step": 11917 }, { "epoch": 0.6393776824034335, "grad_norm": 0.6640625, "learning_rate": 4.875441075603562e-06, "loss": 2.1671, "step": 11918 }, { "epoch": 0.639431330472103, "grad_norm": 0.419921875, "learning_rate": 4.875413993673759e-06, "loss": 2.2107, "step": 11919 }, { "epoch": 0.6394849785407726, "grad_norm": 0.478515625, "learning_rate": 4.875386908875398e-06, "loss": 2.4184, "step": 11920 }, { "epoch": 0.639538626609442, "grad_norm": 0.8359375, "learning_rate": 4.8753598212085105e-06, "loss": 2.235, "step": 11921 }, { "epoch": 0.6395922746781116, "grad_norm": 0.43359375, "learning_rate": 4.875332730673129e-06, "loss": 2.3377, "step": 11922 }, { "epoch": 0.6396459227467811, "grad_norm": 0.376953125, "learning_rate": 4.875305637269288e-06, "loss": 2.1455, "step": 11923 }, { "epoch": 0.6396995708154506, "grad_norm": 0.447265625, "learning_rate": 4.875278540997017e-06, "loss": 2.2567, "step": 11924 }, { "epoch": 0.6397532188841202, "grad_norm": 0.498046875, "learning_rate": 4.875251441856352e-06, "loss": 2.3012, "step": 11925 }, { "epoch": 0.6398068669527897, "grad_norm": 0.427734375, "learning_rate": 4.875224339847325e-06, "loss": 2.1507, "step": 11926 }, { "epoch": 0.6398605150214592, "grad_norm": 0.36328125, "learning_rate": 4.875197234969968e-06, "loss": 2.2614, "step": 11927 }, { "epoch": 0.6399141630901287, "grad_norm": 0.6015625, "learning_rate": 4.875170127224314e-06, "loss": 2.2028, "step": 11928 }, { "epoch": 0.6399678111587983, "grad_norm": 0.43359375, "learning_rate": 4.875143016610394e-06, "loss": 2.161, "step": 11929 }, { "epoch": 0.6400214592274678, "grad_norm": 0.546875, "learning_rate": 4.875115903128244e-06, "loss": 2.4515, "step": 11930 }, { "epoch": 0.6400751072961374, "grad_norm": 0.48046875, "learning_rate": 4.8750887867778955e-06, "loss": 2.5667, "step": 11931 }, { "epoch": 0.6401287553648068, "grad_norm": 0.419921875, "learning_rate": 4.87506166755938e-06, "loss": 2.3205, "step": 11932 }, { "epoch": 0.6401824034334764, "grad_norm": 0.466796875, "learning_rate": 4.875034545472731e-06, "loss": 2.3668, "step": 11933 }, { "epoch": 0.6402360515021459, "grad_norm": 0.41015625, "learning_rate": 4.875007420517981e-06, "loss": 1.8906, "step": 11934 }, { "epoch": 0.6402896995708155, "grad_norm": 0.408203125, "learning_rate": 4.874980292695164e-06, "loss": 2.1502, "step": 11935 }, { "epoch": 0.640343347639485, "grad_norm": 0.421875, "learning_rate": 4.874953162004312e-06, "loss": 2.3188, "step": 11936 }, { "epoch": 0.6403969957081546, "grad_norm": 0.404296875, "learning_rate": 4.874926028445458e-06, "loss": 2.2475, "step": 11937 }, { "epoch": 0.640450643776824, "grad_norm": 0.4921875, "learning_rate": 4.874898892018633e-06, "loss": 2.2609, "step": 11938 }, { "epoch": 0.6405042918454935, "grad_norm": 0.470703125, "learning_rate": 4.874871752723872e-06, "loss": 2.3788, "step": 11939 }, { "epoch": 0.6405579399141631, "grad_norm": 0.39453125, "learning_rate": 4.874844610561207e-06, "loss": 2.0149, "step": 11940 }, { "epoch": 0.6406115879828326, "grad_norm": 0.431640625, "learning_rate": 4.874817465530671e-06, "loss": 2.1003, "step": 11941 }, { "epoch": 0.6406652360515022, "grad_norm": 0.76171875, "learning_rate": 4.874790317632296e-06, "loss": 1.5294, "step": 11942 }, { "epoch": 0.6407188841201716, "grad_norm": 0.5546875, "learning_rate": 4.874763166866116e-06, "loss": 2.5656, "step": 11943 }, { "epoch": 0.6407725321888412, "grad_norm": 0.4140625, "learning_rate": 4.874736013232162e-06, "loss": 2.3825, "step": 11944 }, { "epoch": 0.6408261802575107, "grad_norm": 0.408203125, "learning_rate": 4.874708856730469e-06, "loss": 2.2411, "step": 11945 }, { "epoch": 0.6408798283261803, "grad_norm": 0.41015625, "learning_rate": 4.874681697361067e-06, "loss": 2.2783, "step": 11946 }, { "epoch": 0.6409334763948498, "grad_norm": 0.44140625, "learning_rate": 4.874654535123993e-06, "loss": 2.4234, "step": 11947 }, { "epoch": 0.6409871244635194, "grad_norm": 0.390625, "learning_rate": 4.874627370019276e-06, "loss": 2.4619, "step": 11948 }, { "epoch": 0.6410407725321888, "grad_norm": 0.431640625, "learning_rate": 4.87460020204695e-06, "loss": 2.3207, "step": 11949 }, { "epoch": 0.6410944206008584, "grad_norm": 0.421875, "learning_rate": 4.874573031207048e-06, "loss": 2.3373, "step": 11950 }, { "epoch": 0.6411480686695279, "grad_norm": 0.80078125, "learning_rate": 4.8745458574996015e-06, "loss": 2.1705, "step": 11951 }, { "epoch": 0.6412017167381975, "grad_norm": 0.453125, "learning_rate": 4.874518680924646e-06, "loss": 2.2546, "step": 11952 }, { "epoch": 0.641255364806867, "grad_norm": 0.6328125, "learning_rate": 4.874491501482213e-06, "loss": 1.9634, "step": 11953 }, { "epoch": 0.6413090128755364, "grad_norm": 0.341796875, "learning_rate": 4.874464319172335e-06, "loss": 2.182, "step": 11954 }, { "epoch": 0.641362660944206, "grad_norm": 0.546875, "learning_rate": 4.8744371339950445e-06, "loss": 2.4232, "step": 11955 }, { "epoch": 0.6414163090128755, "grad_norm": 0.494140625, "learning_rate": 4.874409945950375e-06, "loss": 2.2292, "step": 11956 }, { "epoch": 0.6414699570815451, "grad_norm": 0.419921875, "learning_rate": 4.874382755038359e-06, "loss": 2.2799, "step": 11957 }, { "epoch": 0.6415236051502146, "grad_norm": 0.439453125, "learning_rate": 4.87435556125903e-06, "loss": 2.3516, "step": 11958 }, { "epoch": 0.6415772532188841, "grad_norm": 0.390625, "learning_rate": 4.874328364612419e-06, "loss": 2.2057, "step": 11959 }, { "epoch": 0.6416309012875536, "grad_norm": 0.46484375, "learning_rate": 4.8743011650985616e-06, "loss": 2.5369, "step": 11960 }, { "epoch": 0.6416845493562232, "grad_norm": 0.4609375, "learning_rate": 4.874273962717489e-06, "loss": 2.3696, "step": 11961 }, { "epoch": 0.6417381974248927, "grad_norm": 0.390625, "learning_rate": 4.874246757469234e-06, "loss": 2.3275, "step": 11962 }, { "epoch": 0.6417918454935623, "grad_norm": 0.53515625, "learning_rate": 4.87421954935383e-06, "loss": 1.5317, "step": 11963 }, { "epoch": 0.6418454935622318, "grad_norm": 0.4453125, "learning_rate": 4.874192338371309e-06, "loss": 2.3663, "step": 11964 }, { "epoch": 0.6418991416309013, "grad_norm": 0.5625, "learning_rate": 4.8741651245217045e-06, "loss": 2.2747, "step": 11965 }, { "epoch": 0.6419527896995708, "grad_norm": 0.4375, "learning_rate": 4.87413790780505e-06, "loss": 2.3496, "step": 11966 }, { "epoch": 0.6420064377682403, "grad_norm": 0.494140625, "learning_rate": 4.8741106882213774e-06, "loss": 2.2415, "step": 11967 }, { "epoch": 0.6420600858369099, "grad_norm": 0.4375, "learning_rate": 4.87408346577072e-06, "loss": 2.2715, "step": 11968 }, { "epoch": 0.6421137339055794, "grad_norm": 0.578125, "learning_rate": 4.874056240453109e-06, "loss": 2.248, "step": 11969 }, { "epoch": 0.6421673819742489, "grad_norm": 0.5234375, "learning_rate": 4.874029012268581e-06, "loss": 1.8675, "step": 11970 }, { "epoch": 0.6422210300429184, "grad_norm": 0.447265625, "learning_rate": 4.874001781217165e-06, "loss": 2.4116, "step": 11971 }, { "epoch": 0.642274678111588, "grad_norm": 0.90625, "learning_rate": 4.873974547298896e-06, "loss": 2.2794, "step": 11972 }, { "epoch": 0.6423283261802575, "grad_norm": 0.65625, "learning_rate": 4.873947310513807e-06, "loss": 2.5128, "step": 11973 }, { "epoch": 0.6423819742489271, "grad_norm": 0.453125, "learning_rate": 4.873920070861929e-06, "loss": 2.4211, "step": 11974 }, { "epoch": 0.6424356223175965, "grad_norm": 0.47265625, "learning_rate": 4.873892828343298e-06, "loss": 2.4086, "step": 11975 }, { "epoch": 0.6424892703862661, "grad_norm": 0.462890625, "learning_rate": 4.873865582957944e-06, "loss": 2.0983, "step": 11976 }, { "epoch": 0.6425429184549356, "grad_norm": 0.4375, "learning_rate": 4.873838334705901e-06, "loss": 2.3056, "step": 11977 }, { "epoch": 0.6425965665236052, "grad_norm": 0.49609375, "learning_rate": 4.8738110835872025e-06, "loss": 2.2619, "step": 11978 }, { "epoch": 0.6426502145922747, "grad_norm": 0.56640625, "learning_rate": 4.873783829601881e-06, "loss": 2.3306, "step": 11979 }, { "epoch": 0.6427038626609443, "grad_norm": 0.46875, "learning_rate": 4.873756572749968e-06, "loss": 2.2093, "step": 11980 }, { "epoch": 0.6427575107296137, "grad_norm": 0.431640625, "learning_rate": 4.873729313031499e-06, "loss": 2.3175, "step": 11981 }, { "epoch": 0.6428111587982832, "grad_norm": 0.4296875, "learning_rate": 4.873702050446505e-06, "loss": 2.2699, "step": 11982 }, { "epoch": 0.6428648068669528, "grad_norm": 0.51953125, "learning_rate": 4.87367478499502e-06, "loss": 2.399, "step": 11983 }, { "epoch": 0.6429184549356223, "grad_norm": 0.486328125, "learning_rate": 4.873647516677076e-06, "loss": 2.2355, "step": 11984 }, { "epoch": 0.6429721030042919, "grad_norm": 0.5234375, "learning_rate": 4.873620245492706e-06, "loss": 1.5135, "step": 11985 }, { "epoch": 0.6430257510729613, "grad_norm": 0.7109375, "learning_rate": 4.873592971441944e-06, "loss": 2.2612, "step": 11986 }, { "epoch": 0.6430793991416309, "grad_norm": 0.50390625, "learning_rate": 4.873565694524822e-06, "loss": 2.3748, "step": 11987 }, { "epoch": 0.6431330472103004, "grad_norm": 0.4765625, "learning_rate": 4.873538414741373e-06, "loss": 2.3767, "step": 11988 }, { "epoch": 0.64318669527897, "grad_norm": 0.486328125, "learning_rate": 4.8735111320916305e-06, "loss": 2.5317, "step": 11989 }, { "epoch": 0.6432403433476395, "grad_norm": 0.365234375, "learning_rate": 4.873483846575626e-06, "loss": 2.1346, "step": 11990 }, { "epoch": 0.643293991416309, "grad_norm": 0.390625, "learning_rate": 4.873456558193395e-06, "loss": 2.1809, "step": 11991 }, { "epoch": 0.6433476394849785, "grad_norm": 0.470703125, "learning_rate": 4.873429266944968e-06, "loss": 2.3697, "step": 11992 }, { "epoch": 0.6434012875536481, "grad_norm": 0.453125, "learning_rate": 4.873401972830379e-06, "loss": 2.1523, "step": 11993 }, { "epoch": 0.6434549356223176, "grad_norm": 0.48046875, "learning_rate": 4.873374675849662e-06, "loss": 2.1894, "step": 11994 }, { "epoch": 0.6435085836909872, "grad_norm": 0.4140625, "learning_rate": 4.873347376002847e-06, "loss": 2.3086, "step": 11995 }, { "epoch": 0.6435622317596567, "grad_norm": 0.4375, "learning_rate": 4.87332007328997e-06, "loss": 1.8426, "step": 11996 }, { "epoch": 0.6436158798283261, "grad_norm": 0.462890625, "learning_rate": 4.873292767711062e-06, "loss": 2.3313, "step": 11997 }, { "epoch": 0.6436695278969957, "grad_norm": 0.458984375, "learning_rate": 4.873265459266158e-06, "loss": 2.2297, "step": 11998 }, { "epoch": 0.6437231759656652, "grad_norm": 0.45703125, "learning_rate": 4.873238147955289e-06, "loss": 1.6657, "step": 11999 }, { "epoch": 0.6437768240343348, "grad_norm": 0.46875, "learning_rate": 4.8732108337784885e-06, "loss": 2.0471, "step": 12000 }, { "epoch": 0.6438304721030043, "grad_norm": 0.443359375, "learning_rate": 4.87318351673579e-06, "loss": 2.4456, "step": 12001 }, { "epoch": 0.6438841201716738, "grad_norm": 0.4296875, "learning_rate": 4.873156196827226e-06, "loss": 2.2482, "step": 12002 }, { "epoch": 0.6439377682403433, "grad_norm": 0.443359375, "learning_rate": 4.87312887405283e-06, "loss": 2.1915, "step": 12003 }, { "epoch": 0.6439914163090129, "grad_norm": 0.6015625, "learning_rate": 4.873101548412634e-06, "loss": 2.2766, "step": 12004 }, { "epoch": 0.6440450643776824, "grad_norm": 0.46875, "learning_rate": 4.873074219906673e-06, "loss": 2.3394, "step": 12005 }, { "epoch": 0.644098712446352, "grad_norm": 0.40234375, "learning_rate": 4.873046888534977e-06, "loss": 2.3193, "step": 12006 }, { "epoch": 0.6441523605150214, "grad_norm": 0.53515625, "learning_rate": 4.8730195542975815e-06, "loss": 2.0842, "step": 12007 }, { "epoch": 0.644206008583691, "grad_norm": 0.45703125, "learning_rate": 4.872992217194518e-06, "loss": 2.5838, "step": 12008 }, { "epoch": 0.6442596566523605, "grad_norm": 0.5390625, "learning_rate": 4.872964877225822e-06, "loss": 2.354, "step": 12009 }, { "epoch": 0.64431330472103, "grad_norm": 0.45703125, "learning_rate": 4.872937534391523e-06, "loss": 2.0685, "step": 12010 }, { "epoch": 0.6443669527896996, "grad_norm": 0.4609375, "learning_rate": 4.872910188691657e-06, "loss": 2.0636, "step": 12011 }, { "epoch": 0.644420600858369, "grad_norm": 0.421875, "learning_rate": 4.872882840126255e-06, "loss": 2.0566, "step": 12012 }, { "epoch": 0.6444742489270386, "grad_norm": 0.462890625, "learning_rate": 4.87285548869535e-06, "loss": 2.3334, "step": 12013 }, { "epoch": 0.6445278969957081, "grad_norm": 0.427734375, "learning_rate": 4.872828134398978e-06, "loss": 2.231, "step": 12014 }, { "epoch": 0.6445815450643777, "grad_norm": 0.3828125, "learning_rate": 4.872800777237168e-06, "loss": 2.1531, "step": 12015 }, { "epoch": 0.6446351931330472, "grad_norm": 0.365234375, "learning_rate": 4.872773417209956e-06, "loss": 1.8977, "step": 12016 }, { "epoch": 0.6446888412017168, "grad_norm": 1.15625, "learning_rate": 4.8727460543173734e-06, "loss": 2.1965, "step": 12017 }, { "epoch": 0.6447424892703862, "grad_norm": 1.4453125, "learning_rate": 4.872718688559453e-06, "loss": 2.3075, "step": 12018 }, { "epoch": 0.6447961373390558, "grad_norm": 0.490234375, "learning_rate": 4.87269131993623e-06, "loss": 2.28, "step": 12019 }, { "epoch": 0.6448497854077253, "grad_norm": 0.52734375, "learning_rate": 4.872663948447736e-06, "loss": 2.41, "step": 12020 }, { "epoch": 0.6449034334763949, "grad_norm": 0.458984375, "learning_rate": 4.872636574094003e-06, "loss": 2.1708, "step": 12021 }, { "epoch": 0.6449570815450644, "grad_norm": 0.4375, "learning_rate": 4.872609196875065e-06, "loss": 2.2001, "step": 12022 }, { "epoch": 0.645010729613734, "grad_norm": 0.484375, "learning_rate": 4.872581816790957e-06, "loss": 2.5217, "step": 12023 }, { "epoch": 0.6450643776824034, "grad_norm": 0.47265625, "learning_rate": 4.872554433841709e-06, "loss": 2.3895, "step": 12024 }, { "epoch": 0.6451180257510729, "grad_norm": 0.56640625, "learning_rate": 4.872527048027356e-06, "loss": 2.114, "step": 12025 }, { "epoch": 0.6451716738197425, "grad_norm": 0.416015625, "learning_rate": 4.87249965934793e-06, "loss": 2.3425, "step": 12026 }, { "epoch": 0.645225321888412, "grad_norm": 0.482421875, "learning_rate": 4.872472267803465e-06, "loss": 1.5596, "step": 12027 }, { "epoch": 0.6452789699570816, "grad_norm": 0.52734375, "learning_rate": 4.872444873393993e-06, "loss": 2.3225, "step": 12028 }, { "epoch": 0.645332618025751, "grad_norm": 0.435546875, "learning_rate": 4.872417476119548e-06, "loss": 2.1502, "step": 12029 }, { "epoch": 0.6453862660944206, "grad_norm": 8.8125, "learning_rate": 4.872390075980163e-06, "loss": 2.3449, "step": 12030 }, { "epoch": 0.6454399141630901, "grad_norm": 0.435546875, "learning_rate": 4.87236267297587e-06, "loss": 2.3209, "step": 12031 }, { "epoch": 0.6454935622317597, "grad_norm": 0.455078125, "learning_rate": 4.872335267106703e-06, "loss": 2.3147, "step": 12032 }, { "epoch": 0.6455472103004292, "grad_norm": 0.443359375, "learning_rate": 4.8723078583726965e-06, "loss": 2.2644, "step": 12033 }, { "epoch": 0.6456008583690988, "grad_norm": 0.4453125, "learning_rate": 4.872280446773881e-06, "loss": 2.4722, "step": 12034 }, { "epoch": 0.6456545064377682, "grad_norm": 0.64453125, "learning_rate": 4.872253032310291e-06, "loss": 2.3444, "step": 12035 }, { "epoch": 0.6457081545064378, "grad_norm": 0.427734375, "learning_rate": 4.872225614981959e-06, "loss": 1.6992, "step": 12036 }, { "epoch": 0.6457618025751073, "grad_norm": 0.419921875, "learning_rate": 4.8721981947889194e-06, "loss": 2.3223, "step": 12037 }, { "epoch": 0.6458154506437769, "grad_norm": 0.486328125, "learning_rate": 4.872170771731203e-06, "loss": 2.0948, "step": 12038 }, { "epoch": 0.6458690987124464, "grad_norm": 0.5546875, "learning_rate": 4.872143345808845e-06, "loss": 2.309, "step": 12039 }, { "epoch": 0.6459227467811158, "grad_norm": 0.5078125, "learning_rate": 4.8721159170218775e-06, "loss": 2.4359, "step": 12040 }, { "epoch": 0.6459763948497854, "grad_norm": 0.49609375, "learning_rate": 4.872088485370335e-06, "loss": 2.435, "step": 12041 }, { "epoch": 0.6460300429184549, "grad_norm": 0.447265625, "learning_rate": 4.872061050854249e-06, "loss": 2.1348, "step": 12042 }, { "epoch": 0.6460836909871245, "grad_norm": 0.4296875, "learning_rate": 4.872033613473652e-06, "loss": 2.2337, "step": 12043 }, { "epoch": 0.646137339055794, "grad_norm": 0.4609375, "learning_rate": 4.87200617322858e-06, "loss": 2.4445, "step": 12044 }, { "epoch": 0.6461909871244635, "grad_norm": 0.435546875, "learning_rate": 4.871978730119064e-06, "loss": 2.4353, "step": 12045 }, { "epoch": 0.646244635193133, "grad_norm": 0.47265625, "learning_rate": 4.871951284145137e-06, "loss": 2.2571, "step": 12046 }, { "epoch": 0.6462982832618026, "grad_norm": 0.373046875, "learning_rate": 4.8719238353068335e-06, "loss": 2.1849, "step": 12047 }, { "epoch": 0.6463519313304721, "grad_norm": 0.47265625, "learning_rate": 4.871896383604185e-06, "loss": 2.345, "step": 12048 }, { "epoch": 0.6464055793991417, "grad_norm": 0.49609375, "learning_rate": 4.871868929037226e-06, "loss": 2.3282, "step": 12049 }, { "epoch": 0.6464592274678111, "grad_norm": 0.447265625, "learning_rate": 4.871841471605989e-06, "loss": 2.3208, "step": 12050 }, { "epoch": 0.6465128755364807, "grad_norm": 0.5703125, "learning_rate": 4.8718140113105085e-06, "loss": 2.4313, "step": 12051 }, { "epoch": 0.6465665236051502, "grad_norm": 0.4375, "learning_rate": 4.8717865481508155e-06, "loss": 2.3948, "step": 12052 }, { "epoch": 0.6466201716738197, "grad_norm": 0.359375, "learning_rate": 4.871759082126943e-06, "loss": 2.1247, "step": 12053 }, { "epoch": 0.6466738197424893, "grad_norm": 0.408203125, "learning_rate": 4.871731613238927e-06, "loss": 2.3461, "step": 12054 }, { "epoch": 0.6467274678111588, "grad_norm": 0.48046875, "learning_rate": 4.871704141486799e-06, "loss": 2.003, "step": 12055 }, { "epoch": 0.6467811158798283, "grad_norm": 0.48046875, "learning_rate": 4.871676666870592e-06, "loss": 2.3144, "step": 12056 }, { "epoch": 0.6468347639484978, "grad_norm": 0.89453125, "learning_rate": 4.871649189390339e-06, "loss": 2.568, "step": 12057 }, { "epoch": 0.6468884120171674, "grad_norm": 0.484375, "learning_rate": 4.8716217090460735e-06, "loss": 2.2503, "step": 12058 }, { "epoch": 0.6469420600858369, "grad_norm": 0.453125, "learning_rate": 4.871594225837829e-06, "loss": 2.4429, "step": 12059 }, { "epoch": 0.6469957081545065, "grad_norm": 0.451171875, "learning_rate": 4.871566739765639e-06, "loss": 2.2236, "step": 12060 }, { "epoch": 0.6470493562231759, "grad_norm": 0.447265625, "learning_rate": 4.871539250829535e-06, "loss": 2.3408, "step": 12061 }, { "epoch": 0.6471030042918455, "grad_norm": 0.5546875, "learning_rate": 4.871511759029552e-06, "loss": 2.2085, "step": 12062 }, { "epoch": 0.647156652360515, "grad_norm": 0.59375, "learning_rate": 4.871484264365722e-06, "loss": 2.59, "step": 12063 }, { "epoch": 0.6472103004291846, "grad_norm": 0.51953125, "learning_rate": 4.871456766838079e-06, "loss": 2.4015, "step": 12064 }, { "epoch": 0.6472639484978541, "grad_norm": 0.5390625, "learning_rate": 4.871429266446656e-06, "loss": 2.3626, "step": 12065 }, { "epoch": 0.6473175965665237, "grad_norm": 0.4296875, "learning_rate": 4.871401763191486e-06, "loss": 2.2792, "step": 12066 }, { "epoch": 0.6473712446351931, "grad_norm": 0.53515625, "learning_rate": 4.871374257072603e-06, "loss": 2.2105, "step": 12067 }, { "epoch": 0.6474248927038626, "grad_norm": 0.51171875, "learning_rate": 4.871346748090039e-06, "loss": 2.7045, "step": 12068 }, { "epoch": 0.6474785407725322, "grad_norm": 0.431640625, "learning_rate": 4.871319236243828e-06, "loss": 2.0424, "step": 12069 }, { "epoch": 0.6475321888412017, "grad_norm": 0.427734375, "learning_rate": 4.871291721534002e-06, "loss": 2.1801, "step": 12070 }, { "epoch": 0.6475858369098713, "grad_norm": 0.46484375, "learning_rate": 4.8712642039605965e-06, "loss": 2.4236, "step": 12071 }, { "epoch": 0.6476394849785407, "grad_norm": 0.365234375, "learning_rate": 4.871236683523643e-06, "loss": 2.2138, "step": 12072 }, { "epoch": 0.6476931330472103, "grad_norm": 0.703125, "learning_rate": 4.871209160223175e-06, "loss": 2.0633, "step": 12073 }, { "epoch": 0.6477467811158798, "grad_norm": 0.439453125, "learning_rate": 4.871181634059226e-06, "loss": 2.443, "step": 12074 }, { "epoch": 0.6478004291845494, "grad_norm": 0.4453125, "learning_rate": 4.871154105031829e-06, "loss": 2.3606, "step": 12075 }, { "epoch": 0.6478540772532189, "grad_norm": 0.515625, "learning_rate": 4.871126573141018e-06, "loss": 2.3512, "step": 12076 }, { "epoch": 0.6479077253218885, "grad_norm": 0.5625, "learning_rate": 4.8710990383868255e-06, "loss": 2.3043, "step": 12077 }, { "epoch": 0.6479613733905579, "grad_norm": 0.451171875, "learning_rate": 4.871071500769284e-06, "loss": 2.383, "step": 12078 }, { "epoch": 0.6480150214592275, "grad_norm": 0.45703125, "learning_rate": 4.871043960288428e-06, "loss": 2.4759, "step": 12079 }, { "epoch": 0.648068669527897, "grad_norm": 0.478515625, "learning_rate": 4.871016416944292e-06, "loss": 2.4158, "step": 12080 }, { "epoch": 0.6481223175965666, "grad_norm": 0.47265625, "learning_rate": 4.870988870736906e-06, "loss": 2.3924, "step": 12081 }, { "epoch": 0.648175965665236, "grad_norm": 0.4375, "learning_rate": 4.870961321666306e-06, "loss": 2.475, "step": 12082 }, { "epoch": 0.6482296137339055, "grad_norm": 0.68359375, "learning_rate": 4.870933769732523e-06, "loss": 2.1704, "step": 12083 }, { "epoch": 0.6482832618025751, "grad_norm": 0.41796875, "learning_rate": 4.870906214935593e-06, "loss": 2.3809, "step": 12084 }, { "epoch": 0.6483369098712446, "grad_norm": 0.44921875, "learning_rate": 4.870878657275547e-06, "loss": 2.0431, "step": 12085 }, { "epoch": 0.6483905579399142, "grad_norm": 0.5078125, "learning_rate": 4.870851096752419e-06, "loss": 2.2374, "step": 12086 }, { "epoch": 0.6484442060085837, "grad_norm": 0.37890625, "learning_rate": 4.870823533366242e-06, "loss": 2.0748, "step": 12087 }, { "epoch": 0.6484978540772532, "grad_norm": 0.6171875, "learning_rate": 4.87079596711705e-06, "loss": 2.4054, "step": 12088 }, { "epoch": 0.6485515021459227, "grad_norm": 0.439453125, "learning_rate": 4.8707683980048755e-06, "loss": 2.2099, "step": 12089 }, { "epoch": 0.6486051502145923, "grad_norm": 0.42578125, "learning_rate": 4.870740826029753e-06, "loss": 2.1924, "step": 12090 }, { "epoch": 0.6486587982832618, "grad_norm": 0.416015625, "learning_rate": 4.870713251191715e-06, "loss": 2.2605, "step": 12091 }, { "epoch": 0.6487124463519314, "grad_norm": 0.4609375, "learning_rate": 4.870685673490794e-06, "loss": 2.1104, "step": 12092 }, { "epoch": 0.6487660944206008, "grad_norm": 0.4609375, "learning_rate": 4.870658092927025e-06, "loss": 2.2513, "step": 12093 }, { "epoch": 0.6488197424892704, "grad_norm": 0.55078125, "learning_rate": 4.870630509500439e-06, "loss": 2.3768, "step": 12094 }, { "epoch": 0.6488733905579399, "grad_norm": 0.4140625, "learning_rate": 4.870602923211072e-06, "loss": 2.3945, "step": 12095 }, { "epoch": 0.6489270386266094, "grad_norm": 0.47265625, "learning_rate": 4.870575334058955e-06, "loss": 2.5213, "step": 12096 }, { "epoch": 0.648980686695279, "grad_norm": 2.546875, "learning_rate": 4.870547742044124e-06, "loss": 2.2907, "step": 12097 }, { "epoch": 0.6490343347639485, "grad_norm": 0.466796875, "learning_rate": 4.870520147166609e-06, "loss": 2.3326, "step": 12098 }, { "epoch": 0.649087982832618, "grad_norm": 0.40234375, "learning_rate": 4.870492549426446e-06, "loss": 2.0486, "step": 12099 }, { "epoch": 0.6491416309012875, "grad_norm": 0.46484375, "learning_rate": 4.870464948823666e-06, "loss": 2.207, "step": 12100 }, { "epoch": 0.6491952789699571, "grad_norm": 0.486328125, "learning_rate": 4.870437345358305e-06, "loss": 2.2027, "step": 12101 }, { "epoch": 0.6492489270386266, "grad_norm": 0.51171875, "learning_rate": 4.870409739030395e-06, "loss": 2.1949, "step": 12102 }, { "epoch": 0.6493025751072962, "grad_norm": 0.37109375, "learning_rate": 4.8703821298399685e-06, "loss": 2.2378, "step": 12103 }, { "epoch": 0.6493562231759656, "grad_norm": 0.443359375, "learning_rate": 4.8703545177870605e-06, "loss": 2.2289, "step": 12104 }, { "epoch": 0.6494098712446352, "grad_norm": 0.47265625, "learning_rate": 4.870326902871703e-06, "loss": 2.3014, "step": 12105 }, { "epoch": 0.6494635193133047, "grad_norm": 0.42578125, "learning_rate": 4.87029928509393e-06, "loss": 2.1402, "step": 12106 }, { "epoch": 0.6495171673819743, "grad_norm": 0.494140625, "learning_rate": 4.870271664453774e-06, "loss": 2.3939, "step": 12107 }, { "epoch": 0.6495708154506438, "grad_norm": 0.455078125, "learning_rate": 4.87024404095127e-06, "loss": 2.281, "step": 12108 }, { "epoch": 0.6496244635193134, "grad_norm": 0.39453125, "learning_rate": 4.87021641458645e-06, "loss": 2.36, "step": 12109 }, { "epoch": 0.6496781115879828, "grad_norm": 0.431640625, "learning_rate": 4.870188785359348e-06, "loss": 2.3363, "step": 12110 }, { "epoch": 0.6497317596566523, "grad_norm": 0.396484375, "learning_rate": 4.870161153269996e-06, "loss": 2.3969, "step": 12111 }, { "epoch": 0.6497854077253219, "grad_norm": 0.5625, "learning_rate": 4.87013351831843e-06, "loss": 2.4075, "step": 12112 }, { "epoch": 0.6498390557939914, "grad_norm": 0.36328125, "learning_rate": 4.870105880504681e-06, "loss": 2.2075, "step": 12113 }, { "epoch": 0.649892703862661, "grad_norm": 0.42578125, "learning_rate": 4.870078239828784e-06, "loss": 2.3578, "step": 12114 }, { "epoch": 0.6499463519313304, "grad_norm": 0.40625, "learning_rate": 4.870050596290771e-06, "loss": 2.1551, "step": 12115 }, { "epoch": 0.65, "grad_norm": 0.455078125, "learning_rate": 4.870022949890676e-06, "loss": 2.4343, "step": 12116 }, { "epoch": 0.6500536480686695, "grad_norm": 0.43359375, "learning_rate": 4.8699953006285334e-06, "loss": 2.345, "step": 12117 }, { "epoch": 0.6501072961373391, "grad_norm": 1.2109375, "learning_rate": 4.869967648504375e-06, "loss": 2.2912, "step": 12118 }, { "epoch": 0.6501609442060086, "grad_norm": 0.97265625, "learning_rate": 4.8699399935182345e-06, "loss": 2.3441, "step": 12119 }, { "epoch": 0.6502145922746781, "grad_norm": 1.296875, "learning_rate": 4.8699123356701455e-06, "loss": 2.3856, "step": 12120 }, { "epoch": 0.6502682403433476, "grad_norm": 0.431640625, "learning_rate": 4.869884674960141e-06, "loss": 1.5361, "step": 12121 }, { "epoch": 0.6503218884120172, "grad_norm": 0.421875, "learning_rate": 4.869857011388256e-06, "loss": 2.3277, "step": 12122 }, { "epoch": 0.6503755364806867, "grad_norm": 0.482421875, "learning_rate": 4.869829344954523e-06, "loss": 2.3295, "step": 12123 }, { "epoch": 0.6504291845493563, "grad_norm": 0.33984375, "learning_rate": 4.869801675658974e-06, "loss": 2.0048, "step": 12124 }, { "epoch": 0.6504828326180258, "grad_norm": 0.462890625, "learning_rate": 4.8697740035016444e-06, "loss": 2.3077, "step": 12125 }, { "epoch": 0.6505364806866952, "grad_norm": 0.416015625, "learning_rate": 4.869746328482566e-06, "loss": 2.2765, "step": 12126 }, { "epoch": 0.6505901287553648, "grad_norm": 0.369140625, "learning_rate": 4.869718650601774e-06, "loss": 2.1342, "step": 12127 }, { "epoch": 0.6506437768240343, "grad_norm": 0.48828125, "learning_rate": 4.869690969859301e-06, "loss": 2.3355, "step": 12128 }, { "epoch": 0.6506974248927039, "grad_norm": 0.46875, "learning_rate": 4.86966328625518e-06, "loss": 2.4852, "step": 12129 }, { "epoch": 0.6507510729613734, "grad_norm": 0.5390625, "learning_rate": 4.869635599789444e-06, "loss": 2.1422, "step": 12130 }, { "epoch": 0.6508047210300429, "grad_norm": 0.68359375, "learning_rate": 4.8696079104621285e-06, "loss": 2.3431, "step": 12131 }, { "epoch": 0.6508583690987124, "grad_norm": 0.9296875, "learning_rate": 4.869580218273265e-06, "loss": 2.0004, "step": 12132 }, { "epoch": 0.650912017167382, "grad_norm": 0.5078125, "learning_rate": 4.869552523222887e-06, "loss": 2.3267, "step": 12133 }, { "epoch": 0.6509656652360515, "grad_norm": 0.375, "learning_rate": 4.869524825311029e-06, "loss": 2.2253, "step": 12134 }, { "epoch": 0.6510193133047211, "grad_norm": 0.443359375, "learning_rate": 4.8694971245377235e-06, "loss": 2.0741, "step": 12135 }, { "epoch": 0.6510729613733905, "grad_norm": 0.54296875, "learning_rate": 4.869469420903005e-06, "loss": 2.4077, "step": 12136 }, { "epoch": 0.6511266094420601, "grad_norm": 0.451171875, "learning_rate": 4.869441714406906e-06, "loss": 2.4227, "step": 12137 }, { "epoch": 0.6511802575107296, "grad_norm": 0.4609375, "learning_rate": 4.86941400504946e-06, "loss": 2.2918, "step": 12138 }, { "epoch": 0.6512339055793992, "grad_norm": 0.423828125, "learning_rate": 4.8693862928307015e-06, "loss": 2.3611, "step": 12139 }, { "epoch": 0.6512875536480687, "grad_norm": 0.412109375, "learning_rate": 4.869358577750663e-06, "loss": 2.2823, "step": 12140 }, { "epoch": 0.6513412017167381, "grad_norm": 0.74609375, "learning_rate": 4.869330859809378e-06, "loss": 2.3189, "step": 12141 }, { "epoch": 0.6513948497854077, "grad_norm": 0.435546875, "learning_rate": 4.86930313900688e-06, "loss": 2.295, "step": 12142 }, { "epoch": 0.6514484978540772, "grad_norm": 0.388671875, "learning_rate": 4.869275415343203e-06, "loss": 2.2468, "step": 12143 }, { "epoch": 0.6515021459227468, "grad_norm": 0.52734375, "learning_rate": 4.8692476888183805e-06, "loss": 2.3536, "step": 12144 }, { "epoch": 0.6515557939914163, "grad_norm": 0.59765625, "learning_rate": 4.869219959432445e-06, "loss": 2.0595, "step": 12145 }, { "epoch": 0.6516094420600859, "grad_norm": 0.3984375, "learning_rate": 4.8691922271854305e-06, "loss": 2.3097, "step": 12146 }, { "epoch": 0.6516630901287553, "grad_norm": 0.4765625, "learning_rate": 4.869164492077371e-06, "loss": 2.2555, "step": 12147 }, { "epoch": 0.6517167381974249, "grad_norm": 0.419921875, "learning_rate": 4.8691367541083e-06, "loss": 2.3248, "step": 12148 }, { "epoch": 0.6517703862660944, "grad_norm": 0.53125, "learning_rate": 4.8691090132782496e-06, "loss": 1.8482, "step": 12149 }, { "epoch": 0.651824034334764, "grad_norm": 0.462890625, "learning_rate": 4.869081269587255e-06, "loss": 2.2779, "step": 12150 }, { "epoch": 0.6518776824034335, "grad_norm": 0.51171875, "learning_rate": 4.869053523035349e-06, "loss": 2.2314, "step": 12151 }, { "epoch": 0.651931330472103, "grad_norm": 0.60546875, "learning_rate": 4.869025773622565e-06, "loss": 2.2397, "step": 12152 }, { "epoch": 0.6519849785407725, "grad_norm": 0.486328125, "learning_rate": 4.868998021348935e-06, "loss": 2.1552, "step": 12153 }, { "epoch": 0.652038626609442, "grad_norm": 1.203125, "learning_rate": 4.8689702662144965e-06, "loss": 2.416, "step": 12154 }, { "epoch": 0.6520922746781116, "grad_norm": 0.65625, "learning_rate": 4.868942508219279e-06, "loss": 2.3617, "step": 12155 }, { "epoch": 0.6521459227467811, "grad_norm": 0.51171875, "learning_rate": 4.868914747363318e-06, "loss": 2.5216, "step": 12156 }, { "epoch": 0.6521995708154507, "grad_norm": 0.47265625, "learning_rate": 4.868886983646647e-06, "loss": 2.2412, "step": 12157 }, { "epoch": 0.6522532188841201, "grad_norm": 0.36328125, "learning_rate": 4.868859217069299e-06, "loss": 2.0532, "step": 12158 }, { "epoch": 0.6523068669527897, "grad_norm": 0.51953125, "learning_rate": 4.868831447631308e-06, "loss": 2.3039, "step": 12159 }, { "epoch": 0.6523605150214592, "grad_norm": 0.43359375, "learning_rate": 4.8688036753327075e-06, "loss": 2.1068, "step": 12160 }, { "epoch": 0.6524141630901288, "grad_norm": 0.453125, "learning_rate": 4.86877590017353e-06, "loss": 2.296, "step": 12161 }, { "epoch": 0.6524678111587983, "grad_norm": 0.427734375, "learning_rate": 4.8687481221538105e-06, "loss": 1.9132, "step": 12162 }, { "epoch": 0.6525214592274678, "grad_norm": 0.9765625, "learning_rate": 4.868720341273582e-06, "loss": 2.6714, "step": 12163 }, { "epoch": 0.6525751072961373, "grad_norm": 0.453125, "learning_rate": 4.8686925575328765e-06, "loss": 2.5256, "step": 12164 }, { "epoch": 0.6526287553648069, "grad_norm": 0.357421875, "learning_rate": 4.868664770931731e-06, "loss": 2.1516, "step": 12165 }, { "epoch": 0.6526824034334764, "grad_norm": 0.4609375, "learning_rate": 4.868636981470176e-06, "loss": 2.3297, "step": 12166 }, { "epoch": 0.652736051502146, "grad_norm": 0.5546875, "learning_rate": 4.8686091891482465e-06, "loss": 2.2057, "step": 12167 }, { "epoch": 0.6527896995708155, "grad_norm": 0.57421875, "learning_rate": 4.868581393965975e-06, "loss": 2.3079, "step": 12168 }, { "epoch": 0.6528433476394849, "grad_norm": 0.46875, "learning_rate": 4.868553595923396e-06, "loss": 2.4559, "step": 12169 }, { "epoch": 0.6528969957081545, "grad_norm": 0.40625, "learning_rate": 4.868525795020544e-06, "loss": 2.15, "step": 12170 }, { "epoch": 0.652950643776824, "grad_norm": 0.380859375, "learning_rate": 4.868497991257449e-06, "loss": 2.0731, "step": 12171 }, { "epoch": 0.6530042918454936, "grad_norm": 0.486328125, "learning_rate": 4.8684701846341485e-06, "loss": 2.0798, "step": 12172 }, { "epoch": 0.653057939914163, "grad_norm": 0.494140625, "learning_rate": 4.868442375150675e-06, "loss": 2.1414, "step": 12173 }, { "epoch": 0.6531115879828326, "grad_norm": 0.515625, "learning_rate": 4.8684145628070606e-06, "loss": 2.2996, "step": 12174 }, { "epoch": 0.6531652360515021, "grad_norm": 0.46875, "learning_rate": 4.86838674760334e-06, "loss": 2.1941, "step": 12175 }, { "epoch": 0.6532188841201717, "grad_norm": 0.44140625, "learning_rate": 4.8683589295395475e-06, "loss": 2.3237, "step": 12176 }, { "epoch": 0.6532725321888412, "grad_norm": 0.453125, "learning_rate": 4.868331108615714e-06, "loss": 2.5219, "step": 12177 }, { "epoch": 0.6533261802575108, "grad_norm": 0.44140625, "learning_rate": 4.868303284831877e-06, "loss": 2.3179, "step": 12178 }, { "epoch": 0.6533798283261802, "grad_norm": 0.478515625, "learning_rate": 4.868275458188068e-06, "loss": 2.1859, "step": 12179 }, { "epoch": 0.6534334763948498, "grad_norm": 0.404296875, "learning_rate": 4.868247628684319e-06, "loss": 2.3354, "step": 12180 }, { "epoch": 0.6534871244635193, "grad_norm": 0.4140625, "learning_rate": 4.868219796320667e-06, "loss": 2.2751, "step": 12181 }, { "epoch": 0.6535407725321889, "grad_norm": 4.0625, "learning_rate": 4.868191961097143e-06, "loss": 2.538, "step": 12182 }, { "epoch": 0.6535944206008584, "grad_norm": 0.380859375, "learning_rate": 4.868164123013782e-06, "loss": 2.1727, "step": 12183 }, { "epoch": 0.6536480686695278, "grad_norm": 0.470703125, "learning_rate": 4.8681362820706164e-06, "loss": 2.2947, "step": 12184 }, { "epoch": 0.6537017167381974, "grad_norm": 0.431640625, "learning_rate": 4.868108438267681e-06, "loss": 2.0522, "step": 12185 }, { "epoch": 0.6537553648068669, "grad_norm": 0.515625, "learning_rate": 4.868080591605009e-06, "loss": 2.5177, "step": 12186 }, { "epoch": 0.6538090128755365, "grad_norm": 0.455078125, "learning_rate": 4.868052742082634e-06, "loss": 2.2407, "step": 12187 }, { "epoch": 0.653862660944206, "grad_norm": 0.40625, "learning_rate": 4.868024889700589e-06, "loss": 2.2818, "step": 12188 }, { "epoch": 0.6539163090128756, "grad_norm": 0.6328125, "learning_rate": 4.86799703445891e-06, "loss": 2.2152, "step": 12189 }, { "epoch": 0.653969957081545, "grad_norm": 0.462890625, "learning_rate": 4.867969176357627e-06, "loss": 2.3748, "step": 12190 }, { "epoch": 0.6540236051502146, "grad_norm": 0.44921875, "learning_rate": 4.867941315396776e-06, "loss": 2.2187, "step": 12191 }, { "epoch": 0.6540772532188841, "grad_norm": 0.458984375, "learning_rate": 4.867913451576391e-06, "loss": 2.2769, "step": 12192 }, { "epoch": 0.6541309012875537, "grad_norm": 0.451171875, "learning_rate": 4.8678855848965046e-06, "loss": 1.9258, "step": 12193 }, { "epoch": 0.6541845493562232, "grad_norm": 0.52734375, "learning_rate": 4.86785771535715e-06, "loss": 2.4738, "step": 12194 }, { "epoch": 0.6542381974248928, "grad_norm": 0.37109375, "learning_rate": 4.8678298429583626e-06, "loss": 1.8898, "step": 12195 }, { "epoch": 0.6542918454935622, "grad_norm": 0.404296875, "learning_rate": 4.867801967700175e-06, "loss": 2.3072, "step": 12196 }, { "epoch": 0.6543454935622317, "grad_norm": 0.50390625, "learning_rate": 4.8677740895826195e-06, "loss": 2.6223, "step": 12197 }, { "epoch": 0.6543991416309013, "grad_norm": 0.45703125, "learning_rate": 4.867746208605732e-06, "loss": 2.441, "step": 12198 }, { "epoch": 0.6544527896995708, "grad_norm": 0.4375, "learning_rate": 4.867718324769545e-06, "loss": 2.1275, "step": 12199 }, { "epoch": 0.6545064377682404, "grad_norm": 0.416015625, "learning_rate": 4.867690438074093e-06, "loss": 2.4188, "step": 12200 }, { "epoch": 0.6545600858369098, "grad_norm": 0.4921875, "learning_rate": 4.867662548519408e-06, "loss": 2.1024, "step": 12201 }, { "epoch": 0.6546137339055794, "grad_norm": 0.439453125, "learning_rate": 4.867634656105527e-06, "loss": 2.3339, "step": 12202 }, { "epoch": 0.6546673819742489, "grad_norm": 0.435546875, "learning_rate": 4.86760676083248e-06, "loss": 2.329, "step": 12203 }, { "epoch": 0.6547210300429185, "grad_norm": 0.4453125, "learning_rate": 4.867578862700302e-06, "loss": 2.3659, "step": 12204 }, { "epoch": 0.654774678111588, "grad_norm": 0.447265625, "learning_rate": 4.867550961709027e-06, "loss": 2.2883, "step": 12205 }, { "epoch": 0.6548283261802575, "grad_norm": 0.74609375, "learning_rate": 4.8675230578586895e-06, "loss": 2.3577, "step": 12206 }, { "epoch": 0.654881974248927, "grad_norm": 0.59765625, "learning_rate": 4.867495151149321e-06, "loss": 2.3331, "step": 12207 }, { "epoch": 0.6549356223175966, "grad_norm": 0.51171875, "learning_rate": 4.8674672415809575e-06, "loss": 2.3164, "step": 12208 }, { "epoch": 0.6549892703862661, "grad_norm": 0.3515625, "learning_rate": 4.8674393291536315e-06, "loss": 2.0063, "step": 12209 }, { "epoch": 0.6550429184549357, "grad_norm": 0.51171875, "learning_rate": 4.867411413867377e-06, "loss": 2.4769, "step": 12210 }, { "epoch": 0.6550965665236052, "grad_norm": 0.482421875, "learning_rate": 4.867383495722226e-06, "loss": 2.3052, "step": 12211 }, { "epoch": 0.6551502145922746, "grad_norm": 0.453125, "learning_rate": 4.867355574718216e-06, "loss": 2.2897, "step": 12212 }, { "epoch": 0.6552038626609442, "grad_norm": 0.427734375, "learning_rate": 4.867327650855378e-06, "loss": 2.4138, "step": 12213 }, { "epoch": 0.6552575107296137, "grad_norm": 0.53515625, "learning_rate": 4.867299724133746e-06, "loss": 2.3876, "step": 12214 }, { "epoch": 0.6553111587982833, "grad_norm": 0.462890625, "learning_rate": 4.867271794553353e-06, "loss": 2.2288, "step": 12215 }, { "epoch": 0.6553648068669528, "grad_norm": 0.51171875, "learning_rate": 4.867243862114235e-06, "loss": 2.4348, "step": 12216 }, { "epoch": 0.6554184549356223, "grad_norm": 0.4140625, "learning_rate": 4.867215926816425e-06, "loss": 1.9127, "step": 12217 }, { "epoch": 0.6554721030042918, "grad_norm": 2.6875, "learning_rate": 4.8671879886599545e-06, "loss": 2.1533, "step": 12218 }, { "epoch": 0.6555257510729614, "grad_norm": 0.38671875, "learning_rate": 4.86716004764486e-06, "loss": 2.2262, "step": 12219 }, { "epoch": 0.6555793991416309, "grad_norm": 0.4296875, "learning_rate": 4.867132103771174e-06, "loss": 2.1197, "step": 12220 }, { "epoch": 0.6556330472103005, "grad_norm": 0.392578125, "learning_rate": 4.86710415703893e-06, "loss": 1.9354, "step": 12221 }, { "epoch": 0.6556866952789699, "grad_norm": 0.494140625, "learning_rate": 4.867076207448162e-06, "loss": 2.4617, "step": 12222 }, { "epoch": 0.6557403433476395, "grad_norm": 0.49609375, "learning_rate": 4.867048254998904e-06, "loss": 2.1941, "step": 12223 }, { "epoch": 0.655793991416309, "grad_norm": 0.515625, "learning_rate": 4.86702029969119e-06, "loss": 2.1914, "step": 12224 }, { "epoch": 0.6558476394849786, "grad_norm": 0.482421875, "learning_rate": 4.866992341525054e-06, "loss": 2.255, "step": 12225 }, { "epoch": 0.6559012875536481, "grad_norm": 0.515625, "learning_rate": 4.866964380500529e-06, "loss": 2.1208, "step": 12226 }, { "epoch": 0.6559549356223175, "grad_norm": 0.447265625, "learning_rate": 4.866936416617648e-06, "loss": 2.4169, "step": 12227 }, { "epoch": 0.6560085836909871, "grad_norm": 0.470703125, "learning_rate": 4.866908449876446e-06, "loss": 2.4845, "step": 12228 }, { "epoch": 0.6560622317596566, "grad_norm": 0.45703125, "learning_rate": 4.866880480276956e-06, "loss": 2.3457, "step": 12229 }, { "epoch": 0.6561158798283262, "grad_norm": 0.6484375, "learning_rate": 4.8668525078192136e-06, "loss": 2.4699, "step": 12230 }, { "epoch": 0.6561695278969957, "grad_norm": 0.408203125, "learning_rate": 4.86682453250325e-06, "loss": 2.1431, "step": 12231 }, { "epoch": 0.6562231759656653, "grad_norm": 0.43359375, "learning_rate": 4.866796554329102e-06, "loss": 2.3068, "step": 12232 }, { "epoch": 0.6562768240343347, "grad_norm": 0.65234375, "learning_rate": 4.8667685732967995e-06, "loss": 2.3657, "step": 12233 }, { "epoch": 0.6563304721030043, "grad_norm": 0.453125, "learning_rate": 4.866740589406379e-06, "loss": 2.2576, "step": 12234 }, { "epoch": 0.6563841201716738, "grad_norm": 0.4375, "learning_rate": 4.8667126026578745e-06, "loss": 2.2066, "step": 12235 }, { "epoch": 0.6564377682403434, "grad_norm": 0.55859375, "learning_rate": 4.866684613051319e-06, "loss": 2.4178, "step": 12236 }, { "epoch": 0.6564914163090129, "grad_norm": 1.796875, "learning_rate": 4.866656620586745e-06, "loss": 2.3846, "step": 12237 }, { "epoch": 0.6565450643776825, "grad_norm": 0.357421875, "learning_rate": 4.866628625264189e-06, "loss": 2.0755, "step": 12238 }, { "epoch": 0.6565987124463519, "grad_norm": 0.4453125, "learning_rate": 4.866600627083683e-06, "loss": 1.9867, "step": 12239 }, { "epoch": 0.6566523605150214, "grad_norm": 0.470703125, "learning_rate": 4.866572626045261e-06, "loss": 2.3646, "step": 12240 }, { "epoch": 0.656706008583691, "grad_norm": 2.28125, "learning_rate": 4.866544622148957e-06, "loss": 2.242, "step": 12241 }, { "epoch": 0.6567596566523605, "grad_norm": 0.39453125, "learning_rate": 4.866516615394804e-06, "loss": 2.2639, "step": 12242 }, { "epoch": 0.6568133047210301, "grad_norm": 0.68359375, "learning_rate": 4.866488605782837e-06, "loss": 2.0085, "step": 12243 }, { "epoch": 0.6568669527896995, "grad_norm": 0.5234375, "learning_rate": 4.866460593313091e-06, "loss": 2.2795, "step": 12244 }, { "epoch": 0.6569206008583691, "grad_norm": 0.416015625, "learning_rate": 4.866432577985597e-06, "loss": 2.2095, "step": 12245 }, { "epoch": 0.6569742489270386, "grad_norm": 0.40625, "learning_rate": 4.866404559800391e-06, "loss": 2.436, "step": 12246 }, { "epoch": 0.6570278969957082, "grad_norm": 0.52734375, "learning_rate": 4.866376538757505e-06, "loss": 2.2402, "step": 12247 }, { "epoch": 0.6570815450643777, "grad_norm": 0.44921875, "learning_rate": 4.8663485148569745e-06, "loss": 2.1256, "step": 12248 }, { "epoch": 0.6571351931330472, "grad_norm": 0.625, "learning_rate": 4.866320488098833e-06, "loss": 2.4138, "step": 12249 }, { "epoch": 0.6571888412017167, "grad_norm": 0.373046875, "learning_rate": 4.866292458483113e-06, "loss": 2.1534, "step": 12250 }, { "epoch": 0.6572424892703863, "grad_norm": 0.8671875, "learning_rate": 4.86626442600985e-06, "loss": 2.3105, "step": 12251 }, { "epoch": 0.6572961373390558, "grad_norm": 0.46875, "learning_rate": 4.866236390679077e-06, "loss": 2.3178, "step": 12252 }, { "epoch": 0.6573497854077254, "grad_norm": 0.43359375, "learning_rate": 4.866208352490828e-06, "loss": 2.1978, "step": 12253 }, { "epoch": 0.6574034334763948, "grad_norm": 0.3984375, "learning_rate": 4.866180311445137e-06, "loss": 2.2392, "step": 12254 }, { "epoch": 0.6574570815450643, "grad_norm": 0.46875, "learning_rate": 4.866152267542038e-06, "loss": 2.4529, "step": 12255 }, { "epoch": 0.6575107296137339, "grad_norm": 0.453125, "learning_rate": 4.866124220781564e-06, "loss": 2.1175, "step": 12256 }, { "epoch": 0.6575643776824034, "grad_norm": 0.73046875, "learning_rate": 4.866096171163751e-06, "loss": 2.4651, "step": 12257 }, { "epoch": 0.657618025751073, "grad_norm": 0.431640625, "learning_rate": 4.86606811868863e-06, "loss": 2.3068, "step": 12258 }, { "epoch": 0.6576716738197425, "grad_norm": 0.490234375, "learning_rate": 4.8660400633562365e-06, "loss": 2.0511, "step": 12259 }, { "epoch": 0.657725321888412, "grad_norm": 1.8515625, "learning_rate": 4.8660120051666045e-06, "loss": 2.308, "step": 12260 }, { "epoch": 0.6577789699570815, "grad_norm": 0.6015625, "learning_rate": 4.865983944119768e-06, "loss": 2.5052, "step": 12261 }, { "epoch": 0.6578326180257511, "grad_norm": 0.45703125, "learning_rate": 4.865955880215759e-06, "loss": 2.4425, "step": 12262 }, { "epoch": 0.6578862660944206, "grad_norm": 0.392578125, "learning_rate": 4.865927813454614e-06, "loss": 2.1146, "step": 12263 }, { "epoch": 0.6579399141630902, "grad_norm": 0.6640625, "learning_rate": 4.865899743836365e-06, "loss": 2.254, "step": 12264 }, { "epoch": 0.6579935622317596, "grad_norm": 0.462890625, "learning_rate": 4.865871671361047e-06, "loss": 2.1818, "step": 12265 }, { "epoch": 0.6580472103004292, "grad_norm": 0.42578125, "learning_rate": 4.8658435960286935e-06, "loss": 2.0859, "step": 12266 }, { "epoch": 0.6581008583690987, "grad_norm": 0.369140625, "learning_rate": 4.865815517839339e-06, "loss": 2.0291, "step": 12267 }, { "epoch": 0.6581545064377683, "grad_norm": 0.60546875, "learning_rate": 4.865787436793015e-06, "loss": 2.34, "step": 12268 }, { "epoch": 0.6582081545064378, "grad_norm": 0.380859375, "learning_rate": 4.865759352889758e-06, "loss": 2.3206, "step": 12269 }, { "epoch": 0.6582618025751072, "grad_norm": 0.412109375, "learning_rate": 4.8657312661296015e-06, "loss": 2.0935, "step": 12270 }, { "epoch": 0.6583154506437768, "grad_norm": 0.470703125, "learning_rate": 4.86570317651258e-06, "loss": 2.3718, "step": 12271 }, { "epoch": 0.6583690987124463, "grad_norm": 0.41015625, "learning_rate": 4.865675084038726e-06, "loss": 2.2422, "step": 12272 }, { "epoch": 0.6584227467811159, "grad_norm": 0.5703125, "learning_rate": 4.865646988708072e-06, "loss": 2.2561, "step": 12273 }, { "epoch": 0.6584763948497854, "grad_norm": 0.466796875, "learning_rate": 4.865618890520656e-06, "loss": 2.2279, "step": 12274 }, { "epoch": 0.658530042918455, "grad_norm": 0.55859375, "learning_rate": 4.865590789476509e-06, "loss": 2.4892, "step": 12275 }, { "epoch": 0.6585836909871244, "grad_norm": 0.45703125, "learning_rate": 4.865562685575665e-06, "loss": 2.3305, "step": 12276 }, { "epoch": 0.658637339055794, "grad_norm": 0.5546875, "learning_rate": 4.86553457881816e-06, "loss": 2.302, "step": 12277 }, { "epoch": 0.6586909871244635, "grad_norm": 0.408203125, "learning_rate": 4.865506469204026e-06, "loss": 2.0901, "step": 12278 }, { "epoch": 0.6587446351931331, "grad_norm": 0.447265625, "learning_rate": 4.865478356733298e-06, "loss": 2.4, "step": 12279 }, { "epoch": 0.6587982832618026, "grad_norm": 0.53125, "learning_rate": 4.865450241406009e-06, "loss": 2.4822, "step": 12280 }, { "epoch": 0.6588519313304722, "grad_norm": 0.73046875, "learning_rate": 4.865422123222194e-06, "loss": 2.1403, "step": 12281 }, { "epoch": 0.6589055793991416, "grad_norm": 0.451171875, "learning_rate": 4.865394002181884e-06, "loss": 2.1831, "step": 12282 }, { "epoch": 0.6589592274678111, "grad_norm": 0.421875, "learning_rate": 4.865365878285117e-06, "loss": 2.096, "step": 12283 }, { "epoch": 0.6590128755364807, "grad_norm": 0.60546875, "learning_rate": 4.865337751531926e-06, "loss": 2.4844, "step": 12284 }, { "epoch": 0.6590665236051502, "grad_norm": 0.41796875, "learning_rate": 4.865309621922344e-06, "loss": 2.3683, "step": 12285 }, { "epoch": 0.6591201716738198, "grad_norm": 0.52734375, "learning_rate": 4.865281489456405e-06, "loss": 1.5405, "step": 12286 }, { "epoch": 0.6591738197424892, "grad_norm": 0.439453125, "learning_rate": 4.865253354134143e-06, "loss": 2.3584, "step": 12287 }, { "epoch": 0.6592274678111588, "grad_norm": 0.703125, "learning_rate": 4.865225215955592e-06, "loss": 2.1664, "step": 12288 }, { "epoch": 0.6592811158798283, "grad_norm": 0.4609375, "learning_rate": 4.8651970749207864e-06, "loss": 2.5236, "step": 12289 }, { "epoch": 0.6593347639484979, "grad_norm": 0.462890625, "learning_rate": 4.86516893102976e-06, "loss": 2.1059, "step": 12290 }, { "epoch": 0.6593884120171674, "grad_norm": 0.341796875, "learning_rate": 4.865140784282547e-06, "loss": 2.2401, "step": 12291 }, { "epoch": 0.659442060085837, "grad_norm": 0.41015625, "learning_rate": 4.865112634679181e-06, "loss": 2.3157, "step": 12292 }, { "epoch": 0.6594957081545064, "grad_norm": 0.5546875, "learning_rate": 4.865084482219695e-06, "loss": 2.2078, "step": 12293 }, { "epoch": 0.659549356223176, "grad_norm": 0.494140625, "learning_rate": 4.865056326904126e-06, "loss": 2.1956, "step": 12294 }, { "epoch": 0.6596030042918455, "grad_norm": 0.43359375, "learning_rate": 4.865028168732505e-06, "loss": 2.1983, "step": 12295 }, { "epoch": 0.6596566523605151, "grad_norm": 0.50390625, "learning_rate": 4.8650000077048675e-06, "loss": 2.0322, "step": 12296 }, { "epoch": 0.6597103004291845, "grad_norm": 0.46875, "learning_rate": 4.864971843821246e-06, "loss": 2.2667, "step": 12297 }, { "epoch": 0.659763948497854, "grad_norm": 0.4140625, "learning_rate": 4.864943677081677e-06, "loss": 2.1312, "step": 12298 }, { "epoch": 0.6598175965665236, "grad_norm": 4.46875, "learning_rate": 4.864915507486193e-06, "loss": 2.5598, "step": 12299 }, { "epoch": 0.6598712446351931, "grad_norm": 0.765625, "learning_rate": 4.864887335034828e-06, "loss": 2.2492, "step": 12300 }, { "epoch": 0.6599248927038627, "grad_norm": 0.470703125, "learning_rate": 4.864859159727616e-06, "loss": 2.2669, "step": 12301 }, { "epoch": 0.6599785407725322, "grad_norm": 0.4140625, "learning_rate": 4.8648309815645915e-06, "loss": 2.3821, "step": 12302 }, { "epoch": 0.6600321888412017, "grad_norm": 0.498046875, "learning_rate": 4.864802800545788e-06, "loss": 2.3567, "step": 12303 }, { "epoch": 0.6600858369098712, "grad_norm": 0.431640625, "learning_rate": 4.86477461667124e-06, "loss": 2.4, "step": 12304 }, { "epoch": 0.6601394849785408, "grad_norm": 0.439453125, "learning_rate": 4.8647464299409816e-06, "loss": 2.0743, "step": 12305 }, { "epoch": 0.6601931330472103, "grad_norm": 0.35546875, "learning_rate": 4.864718240355046e-06, "loss": 2.3361, "step": 12306 }, { "epoch": 0.6602467811158799, "grad_norm": 0.53515625, "learning_rate": 4.864690047913468e-06, "loss": 2.3153, "step": 12307 }, { "epoch": 0.6603004291845493, "grad_norm": 0.453125, "learning_rate": 4.864661852616281e-06, "loss": 2.3998, "step": 12308 }, { "epoch": 0.6603540772532189, "grad_norm": 0.4453125, "learning_rate": 4.86463365446352e-06, "loss": 2.2911, "step": 12309 }, { "epoch": 0.6604077253218884, "grad_norm": 0.45703125, "learning_rate": 4.864605453455218e-06, "loss": 2.4683, "step": 12310 }, { "epoch": 0.660461373390558, "grad_norm": 0.470703125, "learning_rate": 4.86457724959141e-06, "loss": 2.204, "step": 12311 }, { "epoch": 0.6605150214592275, "grad_norm": 0.494140625, "learning_rate": 4.86454904287213e-06, "loss": 2.1787, "step": 12312 }, { "epoch": 0.660568669527897, "grad_norm": 0.3984375, "learning_rate": 4.864520833297411e-06, "loss": 2.1063, "step": 12313 }, { "epoch": 0.6606223175965665, "grad_norm": 0.4296875, "learning_rate": 4.8644926208672886e-06, "loss": 2.2, "step": 12314 }, { "epoch": 0.660675965665236, "grad_norm": 0.4296875, "learning_rate": 4.864464405581795e-06, "loss": 2.1101, "step": 12315 }, { "epoch": 0.6607296137339056, "grad_norm": 0.49609375, "learning_rate": 4.864436187440966e-06, "loss": 2.3437, "step": 12316 }, { "epoch": 0.6607832618025751, "grad_norm": 0.4453125, "learning_rate": 4.864407966444835e-06, "loss": 2.3436, "step": 12317 }, { "epoch": 0.6608369098712447, "grad_norm": 0.392578125, "learning_rate": 4.8643797425934355e-06, "loss": 2.1285, "step": 12318 }, { "epoch": 0.6608905579399141, "grad_norm": 0.60546875, "learning_rate": 4.864351515886803e-06, "loss": 2.2166, "step": 12319 }, { "epoch": 0.6609442060085837, "grad_norm": 0.48828125, "learning_rate": 4.86432328632497e-06, "loss": 2.2007, "step": 12320 }, { "epoch": 0.6609978540772532, "grad_norm": 0.5703125, "learning_rate": 4.864295053907972e-06, "loss": 2.714, "step": 12321 }, { "epoch": 0.6610515021459228, "grad_norm": 0.427734375, "learning_rate": 4.864266818635842e-06, "loss": 2.2618, "step": 12322 }, { "epoch": 0.6611051502145923, "grad_norm": 0.4765625, "learning_rate": 4.864238580508614e-06, "loss": 1.5559, "step": 12323 }, { "epoch": 0.6611587982832619, "grad_norm": 0.478515625, "learning_rate": 4.864210339526324e-06, "loss": 1.534, "step": 12324 }, { "epoch": 0.6612124463519313, "grad_norm": 0.58984375, "learning_rate": 4.864182095689004e-06, "loss": 2.3087, "step": 12325 }, { "epoch": 0.6612660944206008, "grad_norm": 0.46875, "learning_rate": 4.864153848996689e-06, "loss": 2.2179, "step": 12326 }, { "epoch": 0.6613197424892704, "grad_norm": 0.41015625, "learning_rate": 4.864125599449413e-06, "loss": 2.2525, "step": 12327 }, { "epoch": 0.6613733905579399, "grad_norm": 0.50390625, "learning_rate": 4.86409734704721e-06, "loss": 2.1338, "step": 12328 }, { "epoch": 0.6614270386266095, "grad_norm": 0.515625, "learning_rate": 4.8640690917901135e-06, "loss": 2.3609, "step": 12329 }, { "epoch": 0.6614806866952789, "grad_norm": 0.453125, "learning_rate": 4.864040833678159e-06, "loss": 2.2772, "step": 12330 }, { "epoch": 0.6615343347639485, "grad_norm": 0.408203125, "learning_rate": 4.8640125727113796e-06, "loss": 2.0843, "step": 12331 }, { "epoch": 0.661587982832618, "grad_norm": 0.498046875, "learning_rate": 4.86398430888981e-06, "loss": 2.5117, "step": 12332 }, { "epoch": 0.6616416309012876, "grad_norm": 0.451171875, "learning_rate": 4.863956042213484e-06, "loss": 2.1961, "step": 12333 }, { "epoch": 0.6616952789699571, "grad_norm": 0.41015625, "learning_rate": 4.863927772682436e-06, "loss": 2.2222, "step": 12334 }, { "epoch": 0.6617489270386266, "grad_norm": 0.51171875, "learning_rate": 4.863899500296701e-06, "loss": 1.6262, "step": 12335 }, { "epoch": 0.6618025751072961, "grad_norm": 0.484375, "learning_rate": 4.86387122505631e-06, "loss": 2.1372, "step": 12336 }, { "epoch": 0.6618562231759657, "grad_norm": 0.5234375, "learning_rate": 4.8638429469613e-06, "loss": 2.279, "step": 12337 }, { "epoch": 0.6619098712446352, "grad_norm": 0.44921875, "learning_rate": 4.863814666011705e-06, "loss": 2.5454, "step": 12338 }, { "epoch": 0.6619635193133048, "grad_norm": 0.455078125, "learning_rate": 4.8637863822075585e-06, "loss": 2.2823, "step": 12339 }, { "epoch": 0.6620171673819742, "grad_norm": 0.376953125, "learning_rate": 4.8637580955488935e-06, "loss": 2.2345, "step": 12340 }, { "epoch": 0.6620708154506437, "grad_norm": 0.408203125, "learning_rate": 4.863729806035746e-06, "loss": 2.2311, "step": 12341 }, { "epoch": 0.6621244635193133, "grad_norm": 0.4140625, "learning_rate": 4.86370151366815e-06, "loss": 2.4048, "step": 12342 }, { "epoch": 0.6621781115879828, "grad_norm": 0.384765625, "learning_rate": 4.863673218446139e-06, "loss": 2.1903, "step": 12343 }, { "epoch": 0.6622317596566524, "grad_norm": 0.390625, "learning_rate": 4.8636449203697475e-06, "loss": 2.4011, "step": 12344 }, { "epoch": 0.6622854077253219, "grad_norm": 0.53515625, "learning_rate": 4.863616619439009e-06, "loss": 2.3016, "step": 12345 }, { "epoch": 0.6623390557939914, "grad_norm": 0.45703125, "learning_rate": 4.863588315653959e-06, "loss": 2.3385, "step": 12346 }, { "epoch": 0.6623927038626609, "grad_norm": 0.453125, "learning_rate": 4.86356000901463e-06, "loss": 2.1856, "step": 12347 }, { "epoch": 0.6624463519313305, "grad_norm": 0.494140625, "learning_rate": 4.863531699521058e-06, "loss": 2.2517, "step": 12348 }, { "epoch": 0.6625, "grad_norm": 0.390625, "learning_rate": 4.863503387173276e-06, "loss": 2.1837, "step": 12349 }, { "epoch": 0.6625536480686696, "grad_norm": 0.4296875, "learning_rate": 4.863475071971317e-06, "loss": 2.4258, "step": 12350 }, { "epoch": 0.662607296137339, "grad_norm": 0.67578125, "learning_rate": 4.863446753915219e-06, "loss": 2.0798, "step": 12351 }, { "epoch": 0.6626609442060086, "grad_norm": 0.462890625, "learning_rate": 4.863418433005013e-06, "loss": 2.5215, "step": 12352 }, { "epoch": 0.6627145922746781, "grad_norm": 0.431640625, "learning_rate": 4.863390109240733e-06, "loss": 2.1346, "step": 12353 }, { "epoch": 0.6627682403433477, "grad_norm": 0.421875, "learning_rate": 4.8633617826224154e-06, "loss": 2.2611, "step": 12354 }, { "epoch": 0.6628218884120172, "grad_norm": 0.474609375, "learning_rate": 4.863333453150093e-06, "loss": 2.4681, "step": 12355 }, { "epoch": 0.6628755364806866, "grad_norm": 0.45703125, "learning_rate": 4.8633051208238e-06, "loss": 2.4471, "step": 12356 }, { "epoch": 0.6629291845493562, "grad_norm": 0.37109375, "learning_rate": 4.863276785643571e-06, "loss": 1.9981, "step": 12357 }, { "epoch": 0.6629828326180257, "grad_norm": 0.7421875, "learning_rate": 4.86324844760944e-06, "loss": 2.1666, "step": 12358 }, { "epoch": 0.6630364806866953, "grad_norm": 0.6875, "learning_rate": 4.863220106721441e-06, "loss": 2.5872, "step": 12359 }, { "epoch": 0.6630901287553648, "grad_norm": 0.58203125, "learning_rate": 4.8631917629796095e-06, "loss": 2.2853, "step": 12360 }, { "epoch": 0.6631437768240344, "grad_norm": 0.54296875, "learning_rate": 4.863163416383978e-06, "loss": 2.0991, "step": 12361 }, { "epoch": 0.6631974248927038, "grad_norm": 0.453125, "learning_rate": 4.863135066934582e-06, "loss": 2.3962, "step": 12362 }, { "epoch": 0.6632510729613734, "grad_norm": 0.38671875, "learning_rate": 4.863106714631455e-06, "loss": 2.3247, "step": 12363 }, { "epoch": 0.6633047210300429, "grad_norm": 0.85546875, "learning_rate": 4.8630783594746316e-06, "loss": 2.3571, "step": 12364 }, { "epoch": 0.6633583690987125, "grad_norm": 0.453125, "learning_rate": 4.8630500014641455e-06, "loss": 2.2842, "step": 12365 }, { "epoch": 0.663412017167382, "grad_norm": 0.4375, "learning_rate": 4.863021640600032e-06, "loss": 1.9923, "step": 12366 }, { "epoch": 0.6634656652360515, "grad_norm": 0.52734375, "learning_rate": 4.862993276882324e-06, "loss": 2.1579, "step": 12367 }, { "epoch": 0.663519313304721, "grad_norm": 0.44921875, "learning_rate": 4.862964910311057e-06, "loss": 2.1073, "step": 12368 }, { "epoch": 0.6635729613733906, "grad_norm": 0.439453125, "learning_rate": 4.862936540886264e-06, "loss": 2.2867, "step": 12369 }, { "epoch": 0.6636266094420601, "grad_norm": 0.4453125, "learning_rate": 4.86290816860798e-06, "loss": 2.3123, "step": 12370 }, { "epoch": 0.6636802575107296, "grad_norm": 0.390625, "learning_rate": 4.86287979347624e-06, "loss": 2.3451, "step": 12371 }, { "epoch": 0.6637339055793992, "grad_norm": 0.494140625, "learning_rate": 4.862851415491078e-06, "loss": 2.354, "step": 12372 }, { "epoch": 0.6637875536480686, "grad_norm": 0.416015625, "learning_rate": 4.862823034652527e-06, "loss": 2.4811, "step": 12373 }, { "epoch": 0.6638412017167382, "grad_norm": 0.453125, "learning_rate": 4.862794650960622e-06, "loss": 2.2897, "step": 12374 }, { "epoch": 0.6638948497854077, "grad_norm": 0.48046875, "learning_rate": 4.8627662644153975e-06, "loss": 2.3665, "step": 12375 }, { "epoch": 0.6639484978540773, "grad_norm": 0.40234375, "learning_rate": 4.862737875016888e-06, "loss": 2.1516, "step": 12376 }, { "epoch": 0.6640021459227468, "grad_norm": 0.466796875, "learning_rate": 4.862709482765126e-06, "loss": 2.1191, "step": 12377 }, { "epoch": 0.6640557939914163, "grad_norm": 0.4375, "learning_rate": 4.862681087660148e-06, "loss": 2.2577, "step": 12378 }, { "epoch": 0.6641094420600858, "grad_norm": 0.427734375, "learning_rate": 4.862652689701988e-06, "loss": 2.4007, "step": 12379 }, { "epoch": 0.6641630901287554, "grad_norm": 0.466796875, "learning_rate": 4.8626242888906795e-06, "loss": 2.2801, "step": 12380 }, { "epoch": 0.6642167381974249, "grad_norm": 0.453125, "learning_rate": 4.862595885226257e-06, "loss": 2.2821, "step": 12381 }, { "epoch": 0.6642703862660945, "grad_norm": 0.4296875, "learning_rate": 4.862567478708754e-06, "loss": 2.2268, "step": 12382 }, { "epoch": 0.664324034334764, "grad_norm": 0.431640625, "learning_rate": 4.8625390693382065e-06, "loss": 2.18, "step": 12383 }, { "epoch": 0.6643776824034334, "grad_norm": 0.47265625, "learning_rate": 4.862510657114647e-06, "loss": 2.3442, "step": 12384 }, { "epoch": 0.664431330472103, "grad_norm": 0.51953125, "learning_rate": 4.8624822420381124e-06, "loss": 2.1374, "step": 12385 }, { "epoch": 0.6644849785407725, "grad_norm": 0.376953125, "learning_rate": 4.862453824108635e-06, "loss": 2.1159, "step": 12386 }, { "epoch": 0.6645386266094421, "grad_norm": 0.435546875, "learning_rate": 4.8624254033262485e-06, "loss": 2.4344, "step": 12387 }, { "epoch": 0.6645922746781115, "grad_norm": 0.431640625, "learning_rate": 4.862396979690989e-06, "loss": 2.3422, "step": 12388 }, { "epoch": 0.6646459227467811, "grad_norm": 0.43359375, "learning_rate": 4.86236855320289e-06, "loss": 2.1777, "step": 12389 }, { "epoch": 0.6646995708154506, "grad_norm": 0.48828125, "learning_rate": 4.8623401238619864e-06, "loss": 2.2284, "step": 12390 }, { "epoch": 0.6647532188841202, "grad_norm": 0.44140625, "learning_rate": 4.862311691668311e-06, "loss": 2.1325, "step": 12391 }, { "epoch": 0.6648068669527897, "grad_norm": 0.51171875, "learning_rate": 4.8622832566218995e-06, "loss": 2.2284, "step": 12392 }, { "epoch": 0.6648605150214593, "grad_norm": 0.5, "learning_rate": 4.862254818722786e-06, "loss": 2.0959, "step": 12393 }, { "epoch": 0.6649141630901287, "grad_norm": 0.392578125, "learning_rate": 4.862226377971005e-06, "loss": 2.3094, "step": 12394 }, { "epoch": 0.6649678111587983, "grad_norm": 0.41015625, "learning_rate": 4.862197934366589e-06, "loss": 2.1701, "step": 12395 }, { "epoch": 0.6650214592274678, "grad_norm": 0.455078125, "learning_rate": 4.862169487909576e-06, "loss": 2.0466, "step": 12396 }, { "epoch": 0.6650751072961374, "grad_norm": 0.46875, "learning_rate": 4.862141038599997e-06, "loss": 2.329, "step": 12397 }, { "epoch": 0.6651287553648069, "grad_norm": 0.7265625, "learning_rate": 4.862112586437888e-06, "loss": 2.2158, "step": 12398 }, { "epoch": 0.6651824034334763, "grad_norm": 0.451171875, "learning_rate": 4.862084131423283e-06, "loss": 2.4112, "step": 12399 }, { "epoch": 0.6652360515021459, "grad_norm": 0.5, "learning_rate": 4.862055673556216e-06, "loss": 2.4372, "step": 12400 }, { "epoch": 0.6652896995708154, "grad_norm": 0.419921875, "learning_rate": 4.862027212836723e-06, "loss": 2.2595, "step": 12401 }, { "epoch": 0.665343347639485, "grad_norm": 0.46875, "learning_rate": 4.861998749264836e-06, "loss": 2.2384, "step": 12402 }, { "epoch": 0.6653969957081545, "grad_norm": 0.4296875, "learning_rate": 4.86197028284059e-06, "loss": 2.2432, "step": 12403 }, { "epoch": 0.6654506437768241, "grad_norm": 0.4609375, "learning_rate": 4.8619418135640205e-06, "loss": 2.4418, "step": 12404 }, { "epoch": 0.6655042918454935, "grad_norm": 0.470703125, "learning_rate": 4.8619133414351615e-06, "loss": 2.2409, "step": 12405 }, { "epoch": 0.6655579399141631, "grad_norm": 0.46875, "learning_rate": 4.861884866454047e-06, "loss": 1.9955, "step": 12406 }, { "epoch": 0.6656115879828326, "grad_norm": 0.4921875, "learning_rate": 4.8618563886207114e-06, "loss": 1.9396, "step": 12407 }, { "epoch": 0.6656652360515022, "grad_norm": 0.423828125, "learning_rate": 4.861827907935189e-06, "loss": 2.1195, "step": 12408 }, { "epoch": 0.6657188841201717, "grad_norm": 0.546875, "learning_rate": 4.8617994243975145e-06, "loss": 2.3636, "step": 12409 }, { "epoch": 0.6657725321888412, "grad_norm": 0.39453125, "learning_rate": 4.8617709380077216e-06, "loss": 2.4473, "step": 12410 }, { "epoch": 0.6658261802575107, "grad_norm": 0.443359375, "learning_rate": 4.861742448765846e-06, "loss": 2.4611, "step": 12411 }, { "epoch": 0.6658798283261803, "grad_norm": 0.45703125, "learning_rate": 4.8617139566719205e-06, "loss": 2.3807, "step": 12412 }, { "epoch": 0.6659334763948498, "grad_norm": 0.453125, "learning_rate": 4.861685461725981e-06, "loss": 2.2984, "step": 12413 }, { "epoch": 0.6659871244635193, "grad_norm": 0.59375, "learning_rate": 4.861656963928062e-06, "loss": 2.3963, "step": 12414 }, { "epoch": 0.6660407725321889, "grad_norm": 0.49609375, "learning_rate": 4.861628463278196e-06, "loss": 2.3943, "step": 12415 }, { "epoch": 0.6660944206008583, "grad_norm": 0.431640625, "learning_rate": 4.86159995977642e-06, "loss": 2.1844, "step": 12416 }, { "epoch": 0.6661480686695279, "grad_norm": 0.474609375, "learning_rate": 4.861571453422765e-06, "loss": 2.1834, "step": 12417 }, { "epoch": 0.6662017167381974, "grad_norm": 0.62890625, "learning_rate": 4.861542944217269e-06, "loss": 2.219, "step": 12418 }, { "epoch": 0.666255364806867, "grad_norm": 0.443359375, "learning_rate": 4.8615144321599645e-06, "loss": 2.3397, "step": 12419 }, { "epoch": 0.6663090128755365, "grad_norm": 0.419921875, "learning_rate": 4.8614859172508865e-06, "loss": 2.369, "step": 12420 }, { "epoch": 0.666362660944206, "grad_norm": 0.421875, "learning_rate": 4.86145739949007e-06, "loss": 2.443, "step": 12421 }, { "epoch": 0.6664163090128755, "grad_norm": 0.388671875, "learning_rate": 4.861428878877546e-06, "loss": 2.1845, "step": 12422 }, { "epoch": 0.6664699570815451, "grad_norm": 0.4609375, "learning_rate": 4.861400355413354e-06, "loss": 1.9744, "step": 12423 }, { "epoch": 0.6665236051502146, "grad_norm": 0.5, "learning_rate": 4.861371829097526e-06, "loss": 2.3083, "step": 12424 }, { "epoch": 0.6665772532188842, "grad_norm": 0.38671875, "learning_rate": 4.8613432999300955e-06, "loss": 1.8124, "step": 12425 }, { "epoch": 0.6666309012875536, "grad_norm": 0.40234375, "learning_rate": 4.8613147679110985e-06, "loss": 2.1109, "step": 12426 }, { "epoch": 0.6666845493562231, "grad_norm": 0.43359375, "learning_rate": 4.8612862330405685e-06, "loss": 2.266, "step": 12427 }, { "epoch": 0.6667381974248927, "grad_norm": 0.46484375, "learning_rate": 4.861257695318541e-06, "loss": 2.2259, "step": 12428 }, { "epoch": 0.6667918454935622, "grad_norm": 0.41796875, "learning_rate": 4.86122915474505e-06, "loss": 2.3046, "step": 12429 }, { "epoch": 0.6668454935622318, "grad_norm": 0.369140625, "learning_rate": 4.861200611320129e-06, "loss": 2.3083, "step": 12430 }, { "epoch": 0.6668991416309012, "grad_norm": 0.5390625, "learning_rate": 4.861172065043814e-06, "loss": 1.5401, "step": 12431 }, { "epoch": 0.6669527896995708, "grad_norm": 0.494140625, "learning_rate": 4.861143515916139e-06, "loss": 2.4225, "step": 12432 }, { "epoch": 0.6670064377682403, "grad_norm": 0.44921875, "learning_rate": 4.861114963937137e-06, "loss": 1.8754, "step": 12433 }, { "epoch": 0.6670600858369099, "grad_norm": 0.466796875, "learning_rate": 4.861086409106846e-06, "loss": 2.5415, "step": 12434 }, { "epoch": 0.6671137339055794, "grad_norm": 0.451171875, "learning_rate": 4.861057851425296e-06, "loss": 2.1275, "step": 12435 }, { "epoch": 0.667167381974249, "grad_norm": 0.4609375, "learning_rate": 4.8610292908925236e-06, "loss": 2.2642, "step": 12436 }, { "epoch": 0.6672210300429184, "grad_norm": 0.51171875, "learning_rate": 4.861000727508565e-06, "loss": 2.3403, "step": 12437 }, { "epoch": 0.667274678111588, "grad_norm": 0.48046875, "learning_rate": 4.860972161273452e-06, "loss": 2.4241, "step": 12438 }, { "epoch": 0.6673283261802575, "grad_norm": 0.609375, "learning_rate": 4.860943592187221e-06, "loss": 1.5696, "step": 12439 }, { "epoch": 0.6673819742489271, "grad_norm": 0.455078125, "learning_rate": 4.8609150202499046e-06, "loss": 2.2215, "step": 12440 }, { "epoch": 0.6674356223175966, "grad_norm": 0.4140625, "learning_rate": 4.860886445461539e-06, "loss": 2.285, "step": 12441 }, { "epoch": 0.667489270386266, "grad_norm": 1.09375, "learning_rate": 4.860857867822158e-06, "loss": 2.2634, "step": 12442 }, { "epoch": 0.6675429184549356, "grad_norm": 0.390625, "learning_rate": 4.8608292873317965e-06, "loss": 2.1254, "step": 12443 }, { "epoch": 0.6675965665236051, "grad_norm": 0.43359375, "learning_rate": 4.860800703990488e-06, "loss": 2.4502, "step": 12444 }, { "epoch": 0.6676502145922747, "grad_norm": 0.435546875, "learning_rate": 4.860772117798268e-06, "loss": 2.2276, "step": 12445 }, { "epoch": 0.6677038626609442, "grad_norm": 0.46484375, "learning_rate": 4.860743528755171e-06, "loss": 2.3397, "step": 12446 }, { "epoch": 0.6677575107296138, "grad_norm": 0.486328125, "learning_rate": 4.860714936861231e-06, "loss": 2.5993, "step": 12447 }, { "epoch": 0.6678111587982832, "grad_norm": 0.43359375, "learning_rate": 4.860686342116483e-06, "loss": 2.1564, "step": 12448 }, { "epoch": 0.6678648068669528, "grad_norm": 0.412109375, "learning_rate": 4.860657744520962e-06, "loss": 1.9293, "step": 12449 }, { "epoch": 0.6679184549356223, "grad_norm": 0.439453125, "learning_rate": 4.8606291440747e-06, "loss": 2.2869, "step": 12450 }, { "epoch": 0.6679721030042919, "grad_norm": 0.42578125, "learning_rate": 4.860600540777734e-06, "loss": 2.4082, "step": 12451 }, { "epoch": 0.6680257510729614, "grad_norm": 0.45703125, "learning_rate": 4.860571934630099e-06, "loss": 2.3936, "step": 12452 }, { "epoch": 0.668079399141631, "grad_norm": 0.4453125, "learning_rate": 4.860543325631828e-06, "loss": 2.5485, "step": 12453 }, { "epoch": 0.6681330472103004, "grad_norm": 0.466796875, "learning_rate": 4.8605147137829555e-06, "loss": 2.1145, "step": 12454 }, { "epoch": 0.66818669527897, "grad_norm": 0.345703125, "learning_rate": 4.860486099083517e-06, "loss": 2.3245, "step": 12455 }, { "epoch": 0.6682403433476395, "grad_norm": 0.458984375, "learning_rate": 4.860457481533547e-06, "loss": 2.3219, "step": 12456 }, { "epoch": 0.668293991416309, "grad_norm": 0.482421875, "learning_rate": 4.8604288611330795e-06, "loss": 2.0359, "step": 12457 }, { "epoch": 0.6683476394849786, "grad_norm": 0.458984375, "learning_rate": 4.860400237882149e-06, "loss": 2.0282, "step": 12458 }, { "epoch": 0.668401287553648, "grad_norm": 0.51171875, "learning_rate": 4.8603716117807904e-06, "loss": 2.1264, "step": 12459 }, { "epoch": 0.6684549356223176, "grad_norm": 0.4375, "learning_rate": 4.860342982829038e-06, "loss": 2.1498, "step": 12460 }, { "epoch": 0.6685085836909871, "grad_norm": 0.54296875, "learning_rate": 4.860314351026927e-06, "loss": 2.3339, "step": 12461 }, { "epoch": 0.6685622317596567, "grad_norm": 0.4453125, "learning_rate": 4.860285716374491e-06, "loss": 2.3265, "step": 12462 }, { "epoch": 0.6686158798283262, "grad_norm": 0.44921875, "learning_rate": 4.8602570788717665e-06, "loss": 2.158, "step": 12463 }, { "epoch": 0.6686695278969957, "grad_norm": 0.41796875, "learning_rate": 4.8602284385187845e-06, "loss": 2.2969, "step": 12464 }, { "epoch": 0.6687231759656652, "grad_norm": 0.3984375, "learning_rate": 4.860199795315583e-06, "loss": 2.5209, "step": 12465 }, { "epoch": 0.6687768240343348, "grad_norm": 1.109375, "learning_rate": 4.8601711492621964e-06, "loss": 2.1997, "step": 12466 }, { "epoch": 0.6688304721030043, "grad_norm": 0.455078125, "learning_rate": 4.8601425003586565e-06, "loss": 2.2169, "step": 12467 }, { "epoch": 0.6688841201716739, "grad_norm": 0.609375, "learning_rate": 4.860113848605001e-06, "loss": 2.396, "step": 12468 }, { "epoch": 0.6689377682403433, "grad_norm": 0.408203125, "learning_rate": 4.860085194001262e-06, "loss": 2.2031, "step": 12469 }, { "epoch": 0.6689914163090128, "grad_norm": 1.2421875, "learning_rate": 4.8600565365474765e-06, "loss": 2.2396, "step": 12470 }, { "epoch": 0.6690450643776824, "grad_norm": 0.4453125, "learning_rate": 4.860027876243677e-06, "loss": 2.2618, "step": 12471 }, { "epoch": 0.6690987124463519, "grad_norm": 0.83203125, "learning_rate": 4.859999213089899e-06, "loss": 2.2615, "step": 12472 }, { "epoch": 0.6691523605150215, "grad_norm": 0.48828125, "learning_rate": 4.859970547086177e-06, "loss": 2.3953, "step": 12473 }, { "epoch": 0.669206008583691, "grad_norm": 0.42578125, "learning_rate": 4.8599418782325465e-06, "loss": 2.4158, "step": 12474 }, { "epoch": 0.6692596566523605, "grad_norm": 0.45703125, "learning_rate": 4.859913206529041e-06, "loss": 2.1432, "step": 12475 }, { "epoch": 0.66931330472103, "grad_norm": 0.5, "learning_rate": 4.859884531975695e-06, "loss": 2.1535, "step": 12476 }, { "epoch": 0.6693669527896996, "grad_norm": 0.484375, "learning_rate": 4.8598558545725434e-06, "loss": 2.3907, "step": 12477 }, { "epoch": 0.6694206008583691, "grad_norm": 0.5234375, "learning_rate": 4.859827174319622e-06, "loss": 2.1619, "step": 12478 }, { "epoch": 0.6694742489270387, "grad_norm": 0.39453125, "learning_rate": 4.859798491216964e-06, "loss": 2.3204, "step": 12479 }, { "epoch": 0.6695278969957081, "grad_norm": 0.486328125, "learning_rate": 4.859769805264605e-06, "loss": 2.4046, "step": 12480 }, { "epoch": 0.6695815450643777, "grad_norm": 0.447265625, "learning_rate": 4.859741116462578e-06, "loss": 2.2409, "step": 12481 }, { "epoch": 0.6696351931330472, "grad_norm": 1.078125, "learning_rate": 4.85971242481092e-06, "loss": 2.5228, "step": 12482 }, { "epoch": 0.6696888412017168, "grad_norm": 0.5078125, "learning_rate": 4.859683730309664e-06, "loss": 2.2055, "step": 12483 }, { "epoch": 0.6697424892703863, "grad_norm": 0.431640625, "learning_rate": 4.859655032958845e-06, "loss": 2.2412, "step": 12484 }, { "epoch": 0.6697961373390557, "grad_norm": 0.50390625, "learning_rate": 4.859626332758498e-06, "loss": 2.2988, "step": 12485 }, { "epoch": 0.6698497854077253, "grad_norm": 0.546875, "learning_rate": 4.859597629708656e-06, "loss": 2.2402, "step": 12486 }, { "epoch": 0.6699034334763948, "grad_norm": 0.419921875, "learning_rate": 4.859568923809357e-06, "loss": 2.2803, "step": 12487 }, { "epoch": 0.6699570815450644, "grad_norm": 0.4296875, "learning_rate": 4.859540215060633e-06, "loss": 2.029, "step": 12488 }, { "epoch": 0.6700107296137339, "grad_norm": 0.421875, "learning_rate": 4.8595115034625195e-06, "loss": 2.0959, "step": 12489 }, { "epoch": 0.6700643776824035, "grad_norm": 0.4296875, "learning_rate": 4.859482789015051e-06, "loss": 2.1956, "step": 12490 }, { "epoch": 0.6701180257510729, "grad_norm": 0.51953125, "learning_rate": 4.859454071718262e-06, "loss": 2.1623, "step": 12491 }, { "epoch": 0.6701716738197425, "grad_norm": 0.4375, "learning_rate": 4.8594253515721874e-06, "loss": 2.23, "step": 12492 }, { "epoch": 0.670225321888412, "grad_norm": 0.44921875, "learning_rate": 4.859396628576862e-06, "loss": 2.3118, "step": 12493 }, { "epoch": 0.6702789699570816, "grad_norm": 0.48828125, "learning_rate": 4.85936790273232e-06, "loss": 2.2122, "step": 12494 }, { "epoch": 0.6703326180257511, "grad_norm": 0.53125, "learning_rate": 4.859339174038598e-06, "loss": 2.4373, "step": 12495 }, { "epoch": 0.6703862660944206, "grad_norm": 0.51171875, "learning_rate": 4.8593104424957275e-06, "loss": 2.2541, "step": 12496 }, { "epoch": 0.6704399141630901, "grad_norm": 0.462890625, "learning_rate": 4.859281708103745e-06, "loss": 2.462, "step": 12497 }, { "epoch": 0.6704935622317597, "grad_norm": 0.423828125, "learning_rate": 4.859252970862686e-06, "loss": 2.1569, "step": 12498 }, { "epoch": 0.6705472103004292, "grad_norm": 0.349609375, "learning_rate": 4.859224230772584e-06, "loss": 2.0464, "step": 12499 }, { "epoch": 0.6706008583690987, "grad_norm": 0.37890625, "learning_rate": 4.859195487833474e-06, "loss": 2.175, "step": 12500 }, { "epoch": 0.6706545064377682, "grad_norm": 0.462890625, "learning_rate": 4.85916674204539e-06, "loss": 2.4285, "step": 12501 }, { "epoch": 0.6707081545064377, "grad_norm": 0.455078125, "learning_rate": 4.859137993408368e-06, "loss": 2.4529, "step": 12502 }, { "epoch": 0.6707618025751073, "grad_norm": 0.318359375, "learning_rate": 4.8591092419224415e-06, "loss": 1.9056, "step": 12503 }, { "epoch": 0.6708154506437768, "grad_norm": 0.431640625, "learning_rate": 4.859080487587646e-06, "loss": 2.07, "step": 12504 }, { "epoch": 0.6708690987124464, "grad_norm": 0.58203125, "learning_rate": 4.859051730404017e-06, "loss": 2.325, "step": 12505 }, { "epoch": 0.6709227467811159, "grad_norm": 0.515625, "learning_rate": 4.859022970371587e-06, "loss": 2.3246, "step": 12506 }, { "epoch": 0.6709763948497854, "grad_norm": 0.47265625, "learning_rate": 4.8589942074903925e-06, "loss": 2.4192, "step": 12507 }, { "epoch": 0.6710300429184549, "grad_norm": 0.57421875, "learning_rate": 4.858965441760469e-06, "loss": 2.4694, "step": 12508 }, { "epoch": 0.6710836909871245, "grad_norm": 0.55859375, "learning_rate": 4.858936673181849e-06, "loss": 2.0829, "step": 12509 }, { "epoch": 0.671137339055794, "grad_norm": 0.45703125, "learning_rate": 4.858907901754567e-06, "loss": 2.2833, "step": 12510 }, { "epoch": 0.6711909871244636, "grad_norm": 0.435546875, "learning_rate": 4.85887912747866e-06, "loss": 2.2418, "step": 12511 }, { "epoch": 0.671244635193133, "grad_norm": 0.53515625, "learning_rate": 4.858850350354163e-06, "loss": 2.4471, "step": 12512 }, { "epoch": 0.6712982832618025, "grad_norm": 0.94921875, "learning_rate": 4.858821570381108e-06, "loss": 2.4182, "step": 12513 }, { "epoch": 0.6713519313304721, "grad_norm": 0.43359375, "learning_rate": 4.8587927875595315e-06, "loss": 2.4544, "step": 12514 }, { "epoch": 0.6714055793991416, "grad_norm": 0.43359375, "learning_rate": 4.858764001889468e-06, "loss": 2.2094, "step": 12515 }, { "epoch": 0.6714592274678112, "grad_norm": 0.48046875, "learning_rate": 4.8587352133709525e-06, "loss": 2.5213, "step": 12516 }, { "epoch": 0.6715128755364806, "grad_norm": 0.5390625, "learning_rate": 4.8587064220040196e-06, "loss": 2.0646, "step": 12517 }, { "epoch": 0.6715665236051502, "grad_norm": 0.44921875, "learning_rate": 4.858677627788704e-06, "loss": 2.1449, "step": 12518 }, { "epoch": 0.6716201716738197, "grad_norm": 0.46875, "learning_rate": 4.85864883072504e-06, "loss": 2.2518, "step": 12519 }, { "epoch": 0.6716738197424893, "grad_norm": 0.392578125, "learning_rate": 4.858620030813062e-06, "loss": 2.3075, "step": 12520 }, { "epoch": 0.6717274678111588, "grad_norm": 0.431640625, "learning_rate": 4.858591228052807e-06, "loss": 2.2605, "step": 12521 }, { "epoch": 0.6717811158798284, "grad_norm": 0.4609375, "learning_rate": 4.858562422444308e-06, "loss": 2.4732, "step": 12522 }, { "epoch": 0.6718347639484978, "grad_norm": 0.490234375, "learning_rate": 4.858533613987601e-06, "loss": 2.4651, "step": 12523 }, { "epoch": 0.6718884120171674, "grad_norm": 0.5546875, "learning_rate": 4.858504802682718e-06, "loss": 2.3864, "step": 12524 }, { "epoch": 0.6719420600858369, "grad_norm": 0.4375, "learning_rate": 4.858475988529697e-06, "loss": 2.4988, "step": 12525 }, { "epoch": 0.6719957081545065, "grad_norm": 0.57421875, "learning_rate": 4.858447171528572e-06, "loss": 2.2601, "step": 12526 }, { "epoch": 0.672049356223176, "grad_norm": 0.4453125, "learning_rate": 4.858418351679377e-06, "loss": 2.3194, "step": 12527 }, { "epoch": 0.6721030042918454, "grad_norm": 0.357421875, "learning_rate": 4.858389528982147e-06, "loss": 1.8581, "step": 12528 }, { "epoch": 0.672156652360515, "grad_norm": 0.416015625, "learning_rate": 4.858360703436917e-06, "loss": 2.2616, "step": 12529 }, { "epoch": 0.6722103004291845, "grad_norm": 0.5234375, "learning_rate": 4.8583318750437216e-06, "loss": 2.3725, "step": 12530 }, { "epoch": 0.6722639484978541, "grad_norm": 0.447265625, "learning_rate": 4.858303043802596e-06, "loss": 2.4553, "step": 12531 }, { "epoch": 0.6723175965665236, "grad_norm": 0.451171875, "learning_rate": 4.858274209713575e-06, "loss": 2.2231, "step": 12532 }, { "epoch": 0.6723712446351932, "grad_norm": 0.51171875, "learning_rate": 4.858245372776693e-06, "loss": 2.4055, "step": 12533 }, { "epoch": 0.6724248927038626, "grad_norm": 0.455078125, "learning_rate": 4.858216532991985e-06, "loss": 2.3877, "step": 12534 }, { "epoch": 0.6724785407725322, "grad_norm": 0.484375, "learning_rate": 4.858187690359485e-06, "loss": 2.288, "step": 12535 }, { "epoch": 0.6725321888412017, "grad_norm": 0.43359375, "learning_rate": 4.85815884487923e-06, "loss": 2.3496, "step": 12536 }, { "epoch": 0.6725858369098713, "grad_norm": 0.37890625, "learning_rate": 4.858129996551253e-06, "loss": 2.3066, "step": 12537 }, { "epoch": 0.6726394849785408, "grad_norm": 0.5234375, "learning_rate": 4.85810114537559e-06, "loss": 2.2828, "step": 12538 }, { "epoch": 0.6726931330472103, "grad_norm": 0.486328125, "learning_rate": 4.858072291352275e-06, "loss": 2.2332, "step": 12539 }, { "epoch": 0.6727467811158798, "grad_norm": 0.51953125, "learning_rate": 4.858043434481342e-06, "loss": 2.3695, "step": 12540 }, { "epoch": 0.6728004291845494, "grad_norm": 0.41015625, "learning_rate": 4.858014574762828e-06, "loss": 2.5244, "step": 12541 }, { "epoch": 0.6728540772532189, "grad_norm": 0.431640625, "learning_rate": 4.857985712196766e-06, "loss": 2.2106, "step": 12542 }, { "epoch": 0.6729077253218884, "grad_norm": 0.4140625, "learning_rate": 4.857956846783193e-06, "loss": 2.1612, "step": 12543 }, { "epoch": 0.672961373390558, "grad_norm": 0.5234375, "learning_rate": 4.857927978522141e-06, "loss": 2.4217, "step": 12544 }, { "epoch": 0.6730150214592274, "grad_norm": 0.427734375, "learning_rate": 4.857899107413648e-06, "loss": 1.9511, "step": 12545 }, { "epoch": 0.673068669527897, "grad_norm": 0.5234375, "learning_rate": 4.8578702334577455e-06, "loss": 2.2214, "step": 12546 }, { "epoch": 0.6731223175965665, "grad_norm": 0.44140625, "learning_rate": 4.85784135665447e-06, "loss": 2.4623, "step": 12547 }, { "epoch": 0.6731759656652361, "grad_norm": 0.5625, "learning_rate": 4.857812477003857e-06, "loss": 2.1093, "step": 12548 }, { "epoch": 0.6732296137339056, "grad_norm": 0.4609375, "learning_rate": 4.857783594505941e-06, "loss": 2.4319, "step": 12549 }, { "epoch": 0.6732832618025751, "grad_norm": 0.50390625, "learning_rate": 4.857754709160757e-06, "loss": 2.6545, "step": 12550 }, { "epoch": 0.6733369098712446, "grad_norm": 0.384765625, "learning_rate": 4.857725820968339e-06, "loss": 2.4148, "step": 12551 }, { "epoch": 0.6733905579399142, "grad_norm": 0.427734375, "learning_rate": 4.857696929928723e-06, "loss": 1.9926, "step": 12552 }, { "epoch": 0.6734442060085837, "grad_norm": 0.453125, "learning_rate": 4.857668036041942e-06, "loss": 2.1087, "step": 12553 }, { "epoch": 0.6734978540772533, "grad_norm": 0.416015625, "learning_rate": 4.857639139308034e-06, "loss": 2.2824, "step": 12554 }, { "epoch": 0.6735515021459227, "grad_norm": 0.451171875, "learning_rate": 4.8576102397270315e-06, "loss": 2.3265, "step": 12555 }, { "epoch": 0.6736051502145923, "grad_norm": 0.7578125, "learning_rate": 4.85758133729897e-06, "loss": 1.7308, "step": 12556 }, { "epoch": 0.6736587982832618, "grad_norm": 0.447265625, "learning_rate": 4.857552432023884e-06, "loss": 2.3559, "step": 12557 }, { "epoch": 0.6737124463519313, "grad_norm": 0.447265625, "learning_rate": 4.857523523901809e-06, "loss": 2.3122, "step": 12558 }, { "epoch": 0.6737660944206009, "grad_norm": 0.4296875, "learning_rate": 4.857494612932781e-06, "loss": 2.3283, "step": 12559 }, { "epoch": 0.6738197424892703, "grad_norm": 0.55078125, "learning_rate": 4.857465699116832e-06, "loss": 2.382, "step": 12560 }, { "epoch": 0.6738733905579399, "grad_norm": 0.51953125, "learning_rate": 4.857436782454e-06, "loss": 2.276, "step": 12561 }, { "epoch": 0.6739270386266094, "grad_norm": 0.4765625, "learning_rate": 4.8574078629443174e-06, "loss": 2.3359, "step": 12562 }, { "epoch": 0.673980686695279, "grad_norm": 0.447265625, "learning_rate": 4.85737894058782e-06, "loss": 1.3447, "step": 12563 }, { "epoch": 0.6740343347639485, "grad_norm": 0.46484375, "learning_rate": 4.857350015384544e-06, "loss": 2.2518, "step": 12564 }, { "epoch": 0.6740879828326181, "grad_norm": 0.470703125, "learning_rate": 4.857321087334523e-06, "loss": 2.3759, "step": 12565 }, { "epoch": 0.6741416309012875, "grad_norm": 1.046875, "learning_rate": 4.8572921564377915e-06, "loss": 2.1801, "step": 12566 }, { "epoch": 0.6741952789699571, "grad_norm": 0.41015625, "learning_rate": 4.8572632226943864e-06, "loss": 2.1873, "step": 12567 }, { "epoch": 0.6742489270386266, "grad_norm": 0.466796875, "learning_rate": 4.85723428610434e-06, "loss": 2.3836, "step": 12568 }, { "epoch": 0.6743025751072962, "grad_norm": 0.4453125, "learning_rate": 4.857205346667689e-06, "loss": 2.3755, "step": 12569 }, { "epoch": 0.6743562231759657, "grad_norm": 0.671875, "learning_rate": 4.857176404384469e-06, "loss": 1.4355, "step": 12570 }, { "epoch": 0.6744098712446351, "grad_norm": 0.4765625, "learning_rate": 4.857147459254713e-06, "loss": 2.3121, "step": 12571 }, { "epoch": 0.6744635193133047, "grad_norm": 0.4453125, "learning_rate": 4.857118511278457e-06, "loss": 2.1192, "step": 12572 }, { "epoch": 0.6745171673819742, "grad_norm": 0.435546875, "learning_rate": 4.857089560455737e-06, "loss": 2.3757, "step": 12573 }, { "epoch": 0.6745708154506438, "grad_norm": 0.4921875, "learning_rate": 4.857060606786585e-06, "loss": 2.5113, "step": 12574 }, { "epoch": 0.6746244635193133, "grad_norm": 0.68359375, "learning_rate": 4.857031650271039e-06, "loss": 2.4231, "step": 12575 }, { "epoch": 0.6746781115879829, "grad_norm": 0.42578125, "learning_rate": 4.857002690909133e-06, "loss": 2.2784, "step": 12576 }, { "epoch": 0.6747317596566523, "grad_norm": 0.48828125, "learning_rate": 4.856973728700901e-06, "loss": 2.3156, "step": 12577 }, { "epoch": 0.6747854077253219, "grad_norm": 0.396484375, "learning_rate": 4.856944763646378e-06, "loss": 2.4221, "step": 12578 }, { "epoch": 0.6748390557939914, "grad_norm": 0.58984375, "learning_rate": 4.856915795745601e-06, "loss": 2.4856, "step": 12579 }, { "epoch": 0.674892703862661, "grad_norm": 0.4609375, "learning_rate": 4.856886824998603e-06, "loss": 2.452, "step": 12580 }, { "epoch": 0.6749463519313305, "grad_norm": 0.4765625, "learning_rate": 4.85685785140542e-06, "loss": 2.1973, "step": 12581 }, { "epoch": 0.675, "grad_norm": 0.4921875, "learning_rate": 4.856828874966086e-06, "loss": 2.1959, "step": 12582 }, { "epoch": 0.6750536480686695, "grad_norm": 0.435546875, "learning_rate": 4.856799895680637e-06, "loss": 2.469, "step": 12583 }, { "epoch": 0.6751072961373391, "grad_norm": 0.470703125, "learning_rate": 4.856770913549107e-06, "loss": 2.355, "step": 12584 }, { "epoch": 0.6751609442060086, "grad_norm": 0.40625, "learning_rate": 4.856741928571532e-06, "loss": 2.3163, "step": 12585 }, { "epoch": 0.6752145922746781, "grad_norm": 0.455078125, "learning_rate": 4.856712940747947e-06, "loss": 2.5179, "step": 12586 }, { "epoch": 0.6752682403433476, "grad_norm": 0.416015625, "learning_rate": 4.856683950078387e-06, "loss": 2.2818, "step": 12587 }, { "epoch": 0.6753218884120171, "grad_norm": 0.400390625, "learning_rate": 4.8566549565628865e-06, "loss": 2.2808, "step": 12588 }, { "epoch": 0.6753755364806867, "grad_norm": 0.484375, "learning_rate": 4.85662596020148e-06, "loss": 2.4493, "step": 12589 }, { "epoch": 0.6754291845493562, "grad_norm": 0.50390625, "learning_rate": 4.8565969609942034e-06, "loss": 2.1927, "step": 12590 }, { "epoch": 0.6754828326180258, "grad_norm": 0.431640625, "learning_rate": 4.856567958941091e-06, "loss": 2.2909, "step": 12591 }, { "epoch": 0.6755364806866953, "grad_norm": 0.5546875, "learning_rate": 4.856538954042179e-06, "loss": 1.5986, "step": 12592 }, { "epoch": 0.6755901287553648, "grad_norm": 0.478515625, "learning_rate": 4.856509946297501e-06, "loss": 1.9933, "step": 12593 }, { "epoch": 0.6756437768240343, "grad_norm": 0.390625, "learning_rate": 4.856480935707094e-06, "loss": 2.124, "step": 12594 }, { "epoch": 0.6756974248927039, "grad_norm": 0.40625, "learning_rate": 4.85645192227099e-06, "loss": 2.3859, "step": 12595 }, { "epoch": 0.6757510729613734, "grad_norm": 0.5, "learning_rate": 4.856422905989228e-06, "loss": 2.2525, "step": 12596 }, { "epoch": 0.675804721030043, "grad_norm": 0.486328125, "learning_rate": 4.856393886861839e-06, "loss": 2.4381, "step": 12597 }, { "epoch": 0.6758583690987124, "grad_norm": 0.50390625, "learning_rate": 4.8563648648888605e-06, "loss": 2.1525, "step": 12598 }, { "epoch": 0.675912017167382, "grad_norm": 0.51953125, "learning_rate": 4.8563358400703274e-06, "loss": 2.2483, "step": 12599 }, { "epoch": 0.6759656652360515, "grad_norm": 0.42578125, "learning_rate": 4.856306812406274e-06, "loss": 2.3103, "step": 12600 }, { "epoch": 0.676019313304721, "grad_norm": 0.4609375, "learning_rate": 4.856277781896735e-06, "loss": 2.0467, "step": 12601 }, { "epoch": 0.6760729613733906, "grad_norm": 0.42578125, "learning_rate": 4.8562487485417474e-06, "loss": 2.3124, "step": 12602 }, { "epoch": 0.67612660944206, "grad_norm": 0.4453125, "learning_rate": 4.856219712341345e-06, "loss": 2.2226, "step": 12603 }, { "epoch": 0.6761802575107296, "grad_norm": 0.4921875, "learning_rate": 4.856190673295562e-06, "loss": 2.3319, "step": 12604 }, { "epoch": 0.6762339055793991, "grad_norm": 0.365234375, "learning_rate": 4.856161631404434e-06, "loss": 1.9439, "step": 12605 }, { "epoch": 0.6762875536480687, "grad_norm": 0.4453125, "learning_rate": 4.856132586667997e-06, "loss": 2.336, "step": 12606 }, { "epoch": 0.6763412017167382, "grad_norm": 0.5859375, "learning_rate": 4.856103539086286e-06, "loss": 2.4578, "step": 12607 }, { "epoch": 0.6763948497854078, "grad_norm": 0.46875, "learning_rate": 4.8560744886593344e-06, "loss": 2.2679, "step": 12608 }, { "epoch": 0.6764484978540772, "grad_norm": 0.40234375, "learning_rate": 4.856045435387179e-06, "loss": 2.3595, "step": 12609 }, { "epoch": 0.6765021459227468, "grad_norm": 0.96875, "learning_rate": 4.8560163792698545e-06, "loss": 2.1481, "step": 12610 }, { "epoch": 0.6765557939914163, "grad_norm": 0.435546875, "learning_rate": 4.8559873203073945e-06, "loss": 1.3531, "step": 12611 }, { "epoch": 0.6766094420600859, "grad_norm": 0.490234375, "learning_rate": 4.855958258499837e-06, "loss": 2.1455, "step": 12612 }, { "epoch": 0.6766630901287554, "grad_norm": 0.5, "learning_rate": 4.855929193847215e-06, "loss": 2.3504, "step": 12613 }, { "epoch": 0.6767167381974248, "grad_norm": 0.466796875, "learning_rate": 4.855900126349564e-06, "loss": 2.2276, "step": 12614 }, { "epoch": 0.6767703862660944, "grad_norm": 0.486328125, "learning_rate": 4.855871056006919e-06, "loss": 2.364, "step": 12615 }, { "epoch": 0.6768240343347639, "grad_norm": 0.515625, "learning_rate": 4.855841982819316e-06, "loss": 2.4897, "step": 12616 }, { "epoch": 0.6768776824034335, "grad_norm": 0.49609375, "learning_rate": 4.855812906786788e-06, "loss": 2.2919, "step": 12617 }, { "epoch": 0.676931330472103, "grad_norm": 0.455078125, "learning_rate": 4.855783827909373e-06, "loss": 2.0476, "step": 12618 }, { "epoch": 0.6769849785407726, "grad_norm": 0.412109375, "learning_rate": 4.855754746187104e-06, "loss": 2.093, "step": 12619 }, { "epoch": 0.677038626609442, "grad_norm": 0.59765625, "learning_rate": 4.855725661620016e-06, "loss": 1.9557, "step": 12620 }, { "epoch": 0.6770922746781116, "grad_norm": 0.44921875, "learning_rate": 4.855696574208146e-06, "loss": 2.1739, "step": 12621 }, { "epoch": 0.6771459227467811, "grad_norm": 0.5546875, "learning_rate": 4.855667483951527e-06, "loss": 2.1745, "step": 12622 }, { "epoch": 0.6771995708154507, "grad_norm": 0.38671875, "learning_rate": 4.855638390850196e-06, "loss": 2.3284, "step": 12623 }, { "epoch": 0.6772532188841202, "grad_norm": 0.625, "learning_rate": 4.855609294904186e-06, "loss": 2.3476, "step": 12624 }, { "epoch": 0.6773068669527897, "grad_norm": 0.474609375, "learning_rate": 4.855580196113535e-06, "loss": 2.3978, "step": 12625 }, { "epoch": 0.6773605150214592, "grad_norm": 0.421875, "learning_rate": 4.855551094478276e-06, "loss": 2.2696, "step": 12626 }, { "epoch": 0.6774141630901288, "grad_norm": 0.416015625, "learning_rate": 4.855521989998444e-06, "loss": 2.1374, "step": 12627 }, { "epoch": 0.6774678111587983, "grad_norm": 0.416015625, "learning_rate": 4.855492882674075e-06, "loss": 2.3093, "step": 12628 }, { "epoch": 0.6775214592274678, "grad_norm": 0.4609375, "learning_rate": 4.855463772505205e-06, "loss": 2.2369, "step": 12629 }, { "epoch": 0.6775751072961373, "grad_norm": 0.484375, "learning_rate": 4.8554346594918676e-06, "loss": 2.248, "step": 12630 }, { "epoch": 0.6776287553648068, "grad_norm": 0.412109375, "learning_rate": 4.855405543634098e-06, "loss": 2.2725, "step": 12631 }, { "epoch": 0.6776824034334764, "grad_norm": 0.423828125, "learning_rate": 4.855376424931932e-06, "loss": 1.9811, "step": 12632 }, { "epoch": 0.6777360515021459, "grad_norm": 0.421875, "learning_rate": 4.855347303385405e-06, "loss": 2.1499, "step": 12633 }, { "epoch": 0.6777896995708155, "grad_norm": 0.466796875, "learning_rate": 4.855318178994552e-06, "loss": 2.6702, "step": 12634 }, { "epoch": 0.677843347639485, "grad_norm": 0.5078125, "learning_rate": 4.855289051759407e-06, "loss": 2.3291, "step": 12635 }, { "epoch": 0.6778969957081545, "grad_norm": 0.443359375, "learning_rate": 4.855259921680006e-06, "loss": 2.2084, "step": 12636 }, { "epoch": 0.677950643776824, "grad_norm": 0.44921875, "learning_rate": 4.855230788756385e-06, "loss": 2.3914, "step": 12637 }, { "epoch": 0.6780042918454936, "grad_norm": 0.453125, "learning_rate": 4.855201652988578e-06, "loss": 1.9932, "step": 12638 }, { "epoch": 0.6780579399141631, "grad_norm": 0.515625, "learning_rate": 4.855172514376622e-06, "loss": 2.3948, "step": 12639 }, { "epoch": 0.6781115879828327, "grad_norm": 5.15625, "learning_rate": 4.855143372920549e-06, "loss": 1.2581, "step": 12640 }, { "epoch": 0.6781652360515021, "grad_norm": 0.427734375, "learning_rate": 4.855114228620396e-06, "loss": 2.2659, "step": 12641 }, { "epoch": 0.6782188841201717, "grad_norm": 0.41796875, "learning_rate": 4.855085081476199e-06, "loss": 2.1148, "step": 12642 }, { "epoch": 0.6782725321888412, "grad_norm": 0.421875, "learning_rate": 4.855055931487993e-06, "loss": 2.3002, "step": 12643 }, { "epoch": 0.6783261802575107, "grad_norm": 0.40234375, "learning_rate": 4.855026778655812e-06, "loss": 2.2737, "step": 12644 }, { "epoch": 0.6783798283261803, "grad_norm": 0.400390625, "learning_rate": 4.854997622979693e-06, "loss": 2.2341, "step": 12645 }, { "epoch": 0.6784334763948497, "grad_norm": 0.43359375, "learning_rate": 4.854968464459669e-06, "loss": 2.2136, "step": 12646 }, { "epoch": 0.6784871244635193, "grad_norm": 0.515625, "learning_rate": 4.8549393030957755e-06, "loss": 2.1988, "step": 12647 }, { "epoch": 0.6785407725321888, "grad_norm": 0.41015625, "learning_rate": 4.85491013888805e-06, "loss": 2.344, "step": 12648 }, { "epoch": 0.6785944206008584, "grad_norm": 0.47265625, "learning_rate": 4.854880971836525e-06, "loss": 2.3994, "step": 12649 }, { "epoch": 0.6786480686695279, "grad_norm": 0.453125, "learning_rate": 4.854851801941237e-06, "loss": 2.4463, "step": 12650 }, { "epoch": 0.6787017167381975, "grad_norm": 0.435546875, "learning_rate": 4.854822629202222e-06, "loss": 2.506, "step": 12651 }, { "epoch": 0.6787553648068669, "grad_norm": 0.43359375, "learning_rate": 4.854793453619514e-06, "loss": 2.4141, "step": 12652 }, { "epoch": 0.6788090128755365, "grad_norm": 0.515625, "learning_rate": 4.854764275193148e-06, "loss": 2.4908, "step": 12653 }, { "epoch": 0.678862660944206, "grad_norm": 0.46875, "learning_rate": 4.8547350939231605e-06, "loss": 2.4408, "step": 12654 }, { "epoch": 0.6789163090128756, "grad_norm": 0.416015625, "learning_rate": 4.854705909809586e-06, "loss": 2.2668, "step": 12655 }, { "epoch": 0.6789699570815451, "grad_norm": 0.50390625, "learning_rate": 4.854676722852459e-06, "loss": 2.5308, "step": 12656 }, { "epoch": 0.6790236051502145, "grad_norm": 0.466796875, "learning_rate": 4.854647533051817e-06, "loss": 2.0071, "step": 12657 }, { "epoch": 0.6790772532188841, "grad_norm": 0.408203125, "learning_rate": 4.854618340407693e-06, "loss": 2.2618, "step": 12658 }, { "epoch": 0.6791309012875536, "grad_norm": 1.3359375, "learning_rate": 4.854589144920123e-06, "loss": 2.2062, "step": 12659 }, { "epoch": 0.6791845493562232, "grad_norm": 1.6328125, "learning_rate": 4.854559946589142e-06, "loss": 2.3659, "step": 12660 }, { "epoch": 0.6792381974248927, "grad_norm": 0.62890625, "learning_rate": 4.854530745414786e-06, "loss": 2.1632, "step": 12661 }, { "epoch": 0.6792918454935623, "grad_norm": 0.5078125, "learning_rate": 4.85450154139709e-06, "loss": 2.6386, "step": 12662 }, { "epoch": 0.6793454935622317, "grad_norm": 0.8203125, "learning_rate": 4.854472334536089e-06, "loss": 2.0966, "step": 12663 }, { "epoch": 0.6793991416309013, "grad_norm": 0.443359375, "learning_rate": 4.854443124831819e-06, "loss": 2.3301, "step": 12664 }, { "epoch": 0.6794527896995708, "grad_norm": 0.44921875, "learning_rate": 4.8544139122843135e-06, "loss": 2.3595, "step": 12665 }, { "epoch": 0.6795064377682404, "grad_norm": 0.46484375, "learning_rate": 4.85438469689361e-06, "loss": 2.307, "step": 12666 }, { "epoch": 0.6795600858369099, "grad_norm": 0.474609375, "learning_rate": 4.854355478659742e-06, "loss": 2.1022, "step": 12667 }, { "epoch": 0.6796137339055794, "grad_norm": 0.51171875, "learning_rate": 4.854326257582745e-06, "loss": 2.5401, "step": 12668 }, { "epoch": 0.6796673819742489, "grad_norm": 0.43359375, "learning_rate": 4.854297033662656e-06, "loss": 2.3536, "step": 12669 }, { "epoch": 0.6797210300429185, "grad_norm": 0.5, "learning_rate": 4.854267806899509e-06, "loss": 2.2706, "step": 12670 }, { "epoch": 0.679774678111588, "grad_norm": 0.54296875, "learning_rate": 4.854238577293339e-06, "loss": 2.1222, "step": 12671 }, { "epoch": 0.6798283261802575, "grad_norm": 0.419921875, "learning_rate": 4.854209344844181e-06, "loss": 2.4186, "step": 12672 }, { "epoch": 0.679881974248927, "grad_norm": 0.470703125, "learning_rate": 4.854180109552072e-06, "loss": 2.2238, "step": 12673 }, { "epoch": 0.6799356223175965, "grad_norm": 0.54296875, "learning_rate": 4.854150871417046e-06, "loss": 2.3162, "step": 12674 }, { "epoch": 0.6799892703862661, "grad_norm": 0.447265625, "learning_rate": 4.854121630439138e-06, "loss": 2.3421, "step": 12675 }, { "epoch": 0.6800429184549356, "grad_norm": 0.462890625, "learning_rate": 4.854092386618384e-06, "loss": 2.4187, "step": 12676 }, { "epoch": 0.6800965665236052, "grad_norm": 0.490234375, "learning_rate": 4.85406313995482e-06, "loss": 2.4221, "step": 12677 }, { "epoch": 0.6801502145922746, "grad_norm": 0.4765625, "learning_rate": 4.854033890448481e-06, "loss": 2.4142, "step": 12678 }, { "epoch": 0.6802038626609442, "grad_norm": 0.44921875, "learning_rate": 4.8540046380994e-06, "loss": 2.2045, "step": 12679 }, { "epoch": 0.6802575107296137, "grad_norm": 0.52734375, "learning_rate": 4.853975382907616e-06, "loss": 2.561, "step": 12680 }, { "epoch": 0.6803111587982833, "grad_norm": 0.53125, "learning_rate": 4.853946124873161e-06, "loss": 1.4767, "step": 12681 }, { "epoch": 0.6803648068669528, "grad_norm": 0.4453125, "learning_rate": 4.853916863996072e-06, "loss": 2.5475, "step": 12682 }, { "epoch": 0.6804184549356224, "grad_norm": 0.46484375, "learning_rate": 4.8538876002763845e-06, "loss": 2.1947, "step": 12683 }, { "epoch": 0.6804721030042918, "grad_norm": 0.345703125, "learning_rate": 4.853858333714134e-06, "loss": 2.1924, "step": 12684 }, { "epoch": 0.6805257510729614, "grad_norm": 0.51171875, "learning_rate": 4.8538290643093555e-06, "loss": 2.3768, "step": 12685 }, { "epoch": 0.6805793991416309, "grad_norm": 0.470703125, "learning_rate": 4.853799792062083e-06, "loss": 2.3469, "step": 12686 }, { "epoch": 0.6806330472103004, "grad_norm": 0.482421875, "learning_rate": 4.853770516972353e-06, "loss": 2.1141, "step": 12687 }, { "epoch": 0.68068669527897, "grad_norm": 0.4765625, "learning_rate": 4.853741239040203e-06, "loss": 2.1777, "step": 12688 }, { "epoch": 0.6807403433476394, "grad_norm": 0.423828125, "learning_rate": 4.8537119582656644e-06, "loss": 1.6733, "step": 12689 }, { "epoch": 0.680793991416309, "grad_norm": 0.447265625, "learning_rate": 4.853682674648775e-06, "loss": 2.2257, "step": 12690 }, { "epoch": 0.6808476394849785, "grad_norm": 0.4296875, "learning_rate": 4.85365338818957e-06, "loss": 2.3214, "step": 12691 }, { "epoch": 0.6809012875536481, "grad_norm": 0.349609375, "learning_rate": 4.853624098888083e-06, "loss": 2.1201, "step": 12692 }, { "epoch": 0.6809549356223176, "grad_norm": 0.58203125, "learning_rate": 4.853594806744353e-06, "loss": 2.1543, "step": 12693 }, { "epoch": 0.6810085836909872, "grad_norm": 0.4375, "learning_rate": 4.853565511758411e-06, "loss": 2.3863, "step": 12694 }, { "epoch": 0.6810622317596566, "grad_norm": 0.427734375, "learning_rate": 4.853536213930295e-06, "loss": 2.1193, "step": 12695 }, { "epoch": 0.6811158798283262, "grad_norm": 0.47265625, "learning_rate": 4.85350691326004e-06, "loss": 2.2245, "step": 12696 }, { "epoch": 0.6811695278969957, "grad_norm": 0.466796875, "learning_rate": 4.853477609747681e-06, "loss": 2.3257, "step": 12697 }, { "epoch": 0.6812231759656653, "grad_norm": 0.408203125, "learning_rate": 4.853448303393254e-06, "loss": 1.8116, "step": 12698 }, { "epoch": 0.6812768240343348, "grad_norm": 0.49609375, "learning_rate": 4.853418994196794e-06, "loss": 2.3174, "step": 12699 }, { "epoch": 0.6813304721030042, "grad_norm": 0.42578125, "learning_rate": 4.853389682158336e-06, "loss": 2.3773, "step": 12700 }, { "epoch": 0.6813841201716738, "grad_norm": 0.46484375, "learning_rate": 4.8533603672779165e-06, "loss": 2.5045, "step": 12701 }, { "epoch": 0.6814377682403433, "grad_norm": 0.4453125, "learning_rate": 4.85333104955557e-06, "loss": 2.3892, "step": 12702 }, { "epoch": 0.6814914163090129, "grad_norm": 0.4296875, "learning_rate": 4.853301728991331e-06, "loss": 2.2981, "step": 12703 }, { "epoch": 0.6815450643776824, "grad_norm": 0.4453125, "learning_rate": 4.8532724055852374e-06, "loss": 2.1296, "step": 12704 }, { "epoch": 0.681598712446352, "grad_norm": 0.4296875, "learning_rate": 4.853243079337323e-06, "loss": 2.2329, "step": 12705 }, { "epoch": 0.6816523605150214, "grad_norm": 0.458984375, "learning_rate": 4.853213750247623e-06, "loss": 2.3145, "step": 12706 }, { "epoch": 0.681706008583691, "grad_norm": 0.427734375, "learning_rate": 4.853184418316173e-06, "loss": 2.3705, "step": 12707 }, { "epoch": 0.6817596566523605, "grad_norm": 0.671875, "learning_rate": 4.853155083543009e-06, "loss": 2.3999, "step": 12708 }, { "epoch": 0.6818133047210301, "grad_norm": 0.419921875, "learning_rate": 4.853125745928166e-06, "loss": 2.4163, "step": 12709 }, { "epoch": 0.6818669527896996, "grad_norm": 0.361328125, "learning_rate": 4.85309640547168e-06, "loss": 2.1544, "step": 12710 }, { "epoch": 0.6819206008583691, "grad_norm": 1.046875, "learning_rate": 4.853067062173585e-06, "loss": 2.2486, "step": 12711 }, { "epoch": 0.6819742489270386, "grad_norm": 0.5390625, "learning_rate": 4.8530377160339186e-06, "loss": 2.4506, "step": 12712 }, { "epoch": 0.6820278969957082, "grad_norm": 0.40234375, "learning_rate": 4.853008367052715e-06, "loss": 2.131, "step": 12713 }, { "epoch": 0.6820815450643777, "grad_norm": 0.490234375, "learning_rate": 4.852979015230009e-06, "loss": 2.1259, "step": 12714 }, { "epoch": 0.6821351931330472, "grad_norm": 0.37890625, "learning_rate": 4.8529496605658365e-06, "loss": 1.9769, "step": 12715 }, { "epoch": 0.6821888412017167, "grad_norm": 0.57421875, "learning_rate": 4.852920303060234e-06, "loss": 2.2635, "step": 12716 }, { "epoch": 0.6822424892703862, "grad_norm": 0.41796875, "learning_rate": 4.852890942713236e-06, "loss": 2.247, "step": 12717 }, { "epoch": 0.6822961373390558, "grad_norm": 0.4765625, "learning_rate": 4.852861579524877e-06, "loss": 2.3567, "step": 12718 }, { "epoch": 0.6823497854077253, "grad_norm": 1.703125, "learning_rate": 4.852832213495194e-06, "loss": 2.385, "step": 12719 }, { "epoch": 0.6824034334763949, "grad_norm": 0.41796875, "learning_rate": 4.852802844624223e-06, "loss": 2.4627, "step": 12720 }, { "epoch": 0.6824570815450643, "grad_norm": 0.42578125, "learning_rate": 4.852773472911998e-06, "loss": 2.151, "step": 12721 }, { "epoch": 0.6825107296137339, "grad_norm": 0.388671875, "learning_rate": 4.852744098358554e-06, "loss": 2.2107, "step": 12722 }, { "epoch": 0.6825643776824034, "grad_norm": 0.41796875, "learning_rate": 4.852714720963928e-06, "loss": 1.9504, "step": 12723 }, { "epoch": 0.682618025751073, "grad_norm": 0.458984375, "learning_rate": 4.852685340728155e-06, "loss": 2.2785, "step": 12724 }, { "epoch": 0.6826716738197425, "grad_norm": 0.43359375, "learning_rate": 4.852655957651271e-06, "loss": 2.1967, "step": 12725 }, { "epoch": 0.6827253218884121, "grad_norm": 0.404296875, "learning_rate": 4.85262657173331e-06, "loss": 2.2051, "step": 12726 }, { "epoch": 0.6827789699570815, "grad_norm": 0.41015625, "learning_rate": 4.852597182974308e-06, "loss": 2.5159, "step": 12727 }, { "epoch": 0.6828326180257511, "grad_norm": 0.375, "learning_rate": 4.852567791374302e-06, "loss": 2.0664, "step": 12728 }, { "epoch": 0.6828862660944206, "grad_norm": 0.431640625, "learning_rate": 4.852538396933326e-06, "loss": 2.2745, "step": 12729 }, { "epoch": 0.6829399141630901, "grad_norm": 0.4765625, "learning_rate": 4.852508999651416e-06, "loss": 2.2205, "step": 12730 }, { "epoch": 0.6829935622317597, "grad_norm": 0.4765625, "learning_rate": 4.852479599528606e-06, "loss": 2.4256, "step": 12731 }, { "epoch": 0.6830472103004291, "grad_norm": 0.5625, "learning_rate": 4.852450196564934e-06, "loss": 2.3759, "step": 12732 }, { "epoch": 0.6831008583690987, "grad_norm": 0.56640625, "learning_rate": 4.852420790760434e-06, "loss": 2.305, "step": 12733 }, { "epoch": 0.6831545064377682, "grad_norm": 0.51953125, "learning_rate": 4.852391382115141e-06, "loss": 2.3046, "step": 12734 }, { "epoch": 0.6832081545064378, "grad_norm": 0.380859375, "learning_rate": 4.8523619706290924e-06, "loss": 2.1631, "step": 12735 }, { "epoch": 0.6832618025751073, "grad_norm": 0.43359375, "learning_rate": 4.852332556302323e-06, "loss": 2.0389, "step": 12736 }, { "epoch": 0.6833154506437769, "grad_norm": 0.4296875, "learning_rate": 4.852303139134868e-06, "loss": 2.4124, "step": 12737 }, { "epoch": 0.6833690987124463, "grad_norm": 4.28125, "learning_rate": 4.852273719126761e-06, "loss": 2.2199, "step": 12738 }, { "epoch": 0.6834227467811159, "grad_norm": 0.4453125, "learning_rate": 4.852244296278042e-06, "loss": 2.2605, "step": 12739 }, { "epoch": 0.6834763948497854, "grad_norm": 0.49609375, "learning_rate": 4.852214870588742e-06, "loss": 2.3796, "step": 12740 }, { "epoch": 0.683530042918455, "grad_norm": 0.453125, "learning_rate": 4.8521854420588995e-06, "loss": 2.2782, "step": 12741 }, { "epoch": 0.6835836909871245, "grad_norm": 0.4765625, "learning_rate": 4.852156010688549e-06, "loss": 2.328, "step": 12742 }, { "epoch": 0.683637339055794, "grad_norm": 0.41796875, "learning_rate": 4.852126576477726e-06, "loss": 2.122, "step": 12743 }, { "epoch": 0.6836909871244635, "grad_norm": 0.375, "learning_rate": 4.852097139426465e-06, "loss": 1.9893, "step": 12744 }, { "epoch": 0.683744635193133, "grad_norm": 0.439453125, "learning_rate": 4.852067699534804e-06, "loss": 2.39, "step": 12745 }, { "epoch": 0.6837982832618026, "grad_norm": 0.474609375, "learning_rate": 4.8520382568027765e-06, "loss": 2.4284, "step": 12746 }, { "epoch": 0.6838519313304721, "grad_norm": 0.427734375, "learning_rate": 4.852008811230419e-06, "loss": 2.2292, "step": 12747 }, { "epoch": 0.6839055793991416, "grad_norm": 0.65234375, "learning_rate": 4.851979362817768e-06, "loss": 2.1811, "step": 12748 }, { "epoch": 0.6839592274678111, "grad_norm": 0.4140625, "learning_rate": 4.851949911564857e-06, "loss": 2.1949, "step": 12749 }, { "epoch": 0.6840128755364807, "grad_norm": 0.451171875, "learning_rate": 4.851920457471723e-06, "loss": 2.283, "step": 12750 }, { "epoch": 0.6840665236051502, "grad_norm": 0.458984375, "learning_rate": 4.8518910005384e-06, "loss": 2.1659, "step": 12751 }, { "epoch": 0.6841201716738198, "grad_norm": 0.423828125, "learning_rate": 4.851861540764925e-06, "loss": 2.4222, "step": 12752 }, { "epoch": 0.6841738197424893, "grad_norm": 0.439453125, "learning_rate": 4.851832078151333e-06, "loss": 2.4696, "step": 12753 }, { "epoch": 0.6842274678111588, "grad_norm": 0.4765625, "learning_rate": 4.85180261269766e-06, "loss": 2.3804, "step": 12754 }, { "epoch": 0.6842811158798283, "grad_norm": 0.447265625, "learning_rate": 4.851773144403941e-06, "loss": 2.3106, "step": 12755 }, { "epoch": 0.6843347639484979, "grad_norm": 0.671875, "learning_rate": 4.851743673270213e-06, "loss": 2.3863, "step": 12756 }, { "epoch": 0.6843884120171674, "grad_norm": 0.48046875, "learning_rate": 4.85171419929651e-06, "loss": 2.5669, "step": 12757 }, { "epoch": 0.6844420600858369, "grad_norm": 0.462890625, "learning_rate": 4.851684722482868e-06, "loss": 2.424, "step": 12758 }, { "epoch": 0.6844957081545064, "grad_norm": 0.4296875, "learning_rate": 4.851655242829323e-06, "loss": 2.1961, "step": 12759 }, { "epoch": 0.6845493562231759, "grad_norm": 0.439453125, "learning_rate": 4.851625760335909e-06, "loss": 2.3124, "step": 12760 }, { "epoch": 0.6846030042918455, "grad_norm": 0.5234375, "learning_rate": 4.851596275002664e-06, "loss": 2.1225, "step": 12761 }, { "epoch": 0.684656652360515, "grad_norm": 0.515625, "learning_rate": 4.851566786829624e-06, "loss": 2.2532, "step": 12762 }, { "epoch": 0.6847103004291846, "grad_norm": 0.4375, "learning_rate": 4.8515372958168205e-06, "loss": 2.4447, "step": 12763 }, { "epoch": 0.684763948497854, "grad_norm": 0.4609375, "learning_rate": 4.8515078019642935e-06, "loss": 2.0042, "step": 12764 }, { "epoch": 0.6848175965665236, "grad_norm": 0.5, "learning_rate": 4.851478305272076e-06, "loss": 2.2535, "step": 12765 }, { "epoch": 0.6848712446351931, "grad_norm": 0.494140625, "learning_rate": 4.851448805740204e-06, "loss": 2.3501, "step": 12766 }, { "epoch": 0.6849248927038627, "grad_norm": 0.427734375, "learning_rate": 4.851419303368715e-06, "loss": 1.7391, "step": 12767 }, { "epoch": 0.6849785407725322, "grad_norm": 0.4140625, "learning_rate": 4.8513897981576425e-06, "loss": 2.0992, "step": 12768 }, { "epoch": 0.6850321888412018, "grad_norm": 0.490234375, "learning_rate": 4.851360290107023e-06, "loss": 2.3258, "step": 12769 }, { "epoch": 0.6850858369098712, "grad_norm": 0.46875, "learning_rate": 4.851330779216892e-06, "loss": 2.189, "step": 12770 }, { "epoch": 0.6851394849785408, "grad_norm": 0.427734375, "learning_rate": 4.851301265487285e-06, "loss": 2.1045, "step": 12771 }, { "epoch": 0.6851931330472103, "grad_norm": 0.54296875, "learning_rate": 4.851271748918238e-06, "loss": 2.2936, "step": 12772 }, { "epoch": 0.6852467811158798, "grad_norm": 0.46875, "learning_rate": 4.851242229509786e-06, "loss": 2.3077, "step": 12773 }, { "epoch": 0.6853004291845494, "grad_norm": 0.474609375, "learning_rate": 4.851212707261965e-06, "loss": 2.2978, "step": 12774 }, { "epoch": 0.6853540772532188, "grad_norm": 0.5, "learning_rate": 4.851183182174811e-06, "loss": 2.2827, "step": 12775 }, { "epoch": 0.6854077253218884, "grad_norm": 0.458984375, "learning_rate": 4.85115365424836e-06, "loss": 2.0839, "step": 12776 }, { "epoch": 0.6854613733905579, "grad_norm": 0.49609375, "learning_rate": 4.8511241234826465e-06, "loss": 2.135, "step": 12777 }, { "epoch": 0.6855150214592275, "grad_norm": 0.431640625, "learning_rate": 4.851094589877707e-06, "loss": 2.3878, "step": 12778 }, { "epoch": 0.685568669527897, "grad_norm": 0.478515625, "learning_rate": 4.851065053433576e-06, "loss": 2.1973, "step": 12779 }, { "epoch": 0.6856223175965666, "grad_norm": 0.396484375, "learning_rate": 4.85103551415029e-06, "loss": 2.2258, "step": 12780 }, { "epoch": 0.685675965665236, "grad_norm": 0.474609375, "learning_rate": 4.851005972027886e-06, "loss": 2.2777, "step": 12781 }, { "epoch": 0.6857296137339056, "grad_norm": 0.51171875, "learning_rate": 4.850976427066398e-06, "loss": 2.2261, "step": 12782 }, { "epoch": 0.6857832618025751, "grad_norm": 0.4921875, "learning_rate": 4.8509468792658615e-06, "loss": 2.4445, "step": 12783 }, { "epoch": 0.6858369098712447, "grad_norm": 0.546875, "learning_rate": 4.850917328626313e-06, "loss": 2.1629, "step": 12784 }, { "epoch": 0.6858905579399142, "grad_norm": 0.349609375, "learning_rate": 4.850887775147788e-06, "loss": 2.1521, "step": 12785 }, { "epoch": 0.6859442060085837, "grad_norm": 1.046875, "learning_rate": 4.850858218830322e-06, "loss": 2.295, "step": 12786 }, { "epoch": 0.6859978540772532, "grad_norm": 0.384765625, "learning_rate": 4.85082865967395e-06, "loss": 2.03, "step": 12787 }, { "epoch": 0.6860515021459227, "grad_norm": 0.4765625, "learning_rate": 4.85079909767871e-06, "loss": 2.1925, "step": 12788 }, { "epoch": 0.6861051502145923, "grad_norm": 0.431640625, "learning_rate": 4.850769532844635e-06, "loss": 2.0351, "step": 12789 }, { "epoch": 0.6861587982832618, "grad_norm": 0.453125, "learning_rate": 4.850739965171762e-06, "loss": 2.3859, "step": 12790 }, { "epoch": 0.6862124463519313, "grad_norm": 0.515625, "learning_rate": 4.850710394660128e-06, "loss": 2.3576, "step": 12791 }, { "epoch": 0.6862660944206008, "grad_norm": 0.59375, "learning_rate": 4.8506808213097665e-06, "loss": 1.5238, "step": 12792 }, { "epoch": 0.6863197424892704, "grad_norm": 0.490234375, "learning_rate": 4.850651245120713e-06, "loss": 2.5347, "step": 12793 }, { "epoch": 0.6863733905579399, "grad_norm": 0.376953125, "learning_rate": 4.850621666093005e-06, "loss": 1.9016, "step": 12794 }, { "epoch": 0.6864270386266095, "grad_norm": 0.400390625, "learning_rate": 4.850592084226678e-06, "loss": 2.1588, "step": 12795 }, { "epoch": 0.686480686695279, "grad_norm": 0.45703125, "learning_rate": 4.8505624995217674e-06, "loss": 2.0633, "step": 12796 }, { "epoch": 0.6865343347639485, "grad_norm": 0.5234375, "learning_rate": 4.850532911978307e-06, "loss": 2.3722, "step": 12797 }, { "epoch": 0.686587982832618, "grad_norm": 0.4453125, "learning_rate": 4.850503321596336e-06, "loss": 1.9362, "step": 12798 }, { "epoch": 0.6866416309012876, "grad_norm": 0.5390625, "learning_rate": 4.850473728375888e-06, "loss": 2.2354, "step": 12799 }, { "epoch": 0.6866952789699571, "grad_norm": 0.5078125, "learning_rate": 4.8504441323169984e-06, "loss": 2.5566, "step": 12800 }, { "epoch": 0.6867489270386266, "grad_norm": 0.46484375, "learning_rate": 4.850414533419704e-06, "loss": 2.2593, "step": 12801 }, { "epoch": 0.6868025751072961, "grad_norm": 0.765625, "learning_rate": 4.85038493168404e-06, "loss": 2.1785, "step": 12802 }, { "epoch": 0.6868562231759656, "grad_norm": 0.478515625, "learning_rate": 4.850355327110043e-06, "loss": 2.3082, "step": 12803 }, { "epoch": 0.6869098712446352, "grad_norm": 0.498046875, "learning_rate": 4.850325719697747e-06, "loss": 2.2019, "step": 12804 }, { "epoch": 0.6869635193133047, "grad_norm": 0.50390625, "learning_rate": 4.85029610944719e-06, "loss": 2.5668, "step": 12805 }, { "epoch": 0.6870171673819743, "grad_norm": 0.484375, "learning_rate": 4.850266496358407e-06, "loss": 2.3738, "step": 12806 }, { "epoch": 0.6870708154506437, "grad_norm": 0.41015625, "learning_rate": 4.850236880431432e-06, "loss": 2.2584, "step": 12807 }, { "epoch": 0.6871244635193133, "grad_norm": 0.4609375, "learning_rate": 4.850207261666303e-06, "loss": 2.0897, "step": 12808 }, { "epoch": 0.6871781115879828, "grad_norm": 0.4609375, "learning_rate": 4.850177640063054e-06, "loss": 2.2377, "step": 12809 }, { "epoch": 0.6872317596566524, "grad_norm": 0.46875, "learning_rate": 4.850148015621723e-06, "loss": 1.6721, "step": 12810 }, { "epoch": 0.6872854077253219, "grad_norm": 0.447265625, "learning_rate": 4.850118388342343e-06, "loss": 2.4062, "step": 12811 }, { "epoch": 0.6873390557939915, "grad_norm": 0.4140625, "learning_rate": 4.850088758224952e-06, "loss": 2.2891, "step": 12812 }, { "epoch": 0.6873927038626609, "grad_norm": 0.41015625, "learning_rate": 4.850059125269585e-06, "loss": 2.1556, "step": 12813 }, { "epoch": 0.6874463519313305, "grad_norm": 0.46484375, "learning_rate": 4.850029489476278e-06, "loss": 2.1582, "step": 12814 }, { "epoch": 0.6875, "grad_norm": 0.44140625, "learning_rate": 4.849999850845066e-06, "loss": 2.3827, "step": 12815 }, { "epoch": 0.6875536480686695, "grad_norm": 0.48046875, "learning_rate": 4.849970209375986e-06, "loss": 2.2945, "step": 12816 }, { "epoch": 0.6876072961373391, "grad_norm": 0.349609375, "learning_rate": 4.849940565069073e-06, "loss": 1.7368, "step": 12817 }, { "epoch": 0.6876609442060085, "grad_norm": 0.5078125, "learning_rate": 4.8499109179243624e-06, "loss": 1.5175, "step": 12818 }, { "epoch": 0.6877145922746781, "grad_norm": 0.36328125, "learning_rate": 4.849881267941891e-06, "loss": 1.9276, "step": 12819 }, { "epoch": 0.6877682403433476, "grad_norm": 0.419921875, "learning_rate": 4.849851615121694e-06, "loss": 2.3661, "step": 12820 }, { "epoch": 0.6878218884120172, "grad_norm": 0.7578125, "learning_rate": 4.8498219594638076e-06, "loss": 2.1904, "step": 12821 }, { "epoch": 0.6878755364806867, "grad_norm": 0.419921875, "learning_rate": 4.849792300968268e-06, "loss": 2.1767, "step": 12822 }, { "epoch": 0.6879291845493563, "grad_norm": 1.296875, "learning_rate": 4.84976263963511e-06, "loss": 2.0282, "step": 12823 }, { "epoch": 0.6879828326180257, "grad_norm": 0.72265625, "learning_rate": 4.849732975464369e-06, "loss": 2.3746, "step": 12824 }, { "epoch": 0.6880364806866953, "grad_norm": 0.44921875, "learning_rate": 4.849703308456082e-06, "loss": 2.1902, "step": 12825 }, { "epoch": 0.6880901287553648, "grad_norm": 0.5, "learning_rate": 4.849673638610285e-06, "loss": 2.2943, "step": 12826 }, { "epoch": 0.6881437768240344, "grad_norm": 0.453125, "learning_rate": 4.8496439659270134e-06, "loss": 2.2981, "step": 12827 }, { "epoch": 0.6881974248927039, "grad_norm": 0.4609375, "learning_rate": 4.849614290406302e-06, "loss": 2.0788, "step": 12828 }, { "epoch": 0.6882510729613734, "grad_norm": 0.52734375, "learning_rate": 4.849584612048188e-06, "loss": 2.4239, "step": 12829 }, { "epoch": 0.6883047210300429, "grad_norm": 0.5078125, "learning_rate": 4.849554930852707e-06, "loss": 2.4132, "step": 12830 }, { "epoch": 0.6883583690987124, "grad_norm": 0.376953125, "learning_rate": 4.849525246819895e-06, "loss": 2.304, "step": 12831 }, { "epoch": 0.688412017167382, "grad_norm": 0.486328125, "learning_rate": 4.849495559949787e-06, "loss": 2.3235, "step": 12832 }, { "epoch": 0.6884656652360515, "grad_norm": 0.41796875, "learning_rate": 4.84946587024242e-06, "loss": 1.867, "step": 12833 }, { "epoch": 0.688519313304721, "grad_norm": 0.4453125, "learning_rate": 4.849436177697828e-06, "loss": 1.7693, "step": 12834 }, { "epoch": 0.6885729613733905, "grad_norm": 0.515625, "learning_rate": 4.849406482316049e-06, "loss": 2.4765, "step": 12835 }, { "epoch": 0.6886266094420601, "grad_norm": 0.46875, "learning_rate": 4.849376784097118e-06, "loss": 2.2362, "step": 12836 }, { "epoch": 0.6886802575107296, "grad_norm": 0.423828125, "learning_rate": 4.84934708304107e-06, "loss": 2.2853, "step": 12837 }, { "epoch": 0.6887339055793992, "grad_norm": 0.490234375, "learning_rate": 4.849317379147943e-06, "loss": 2.5206, "step": 12838 }, { "epoch": 0.6887875536480687, "grad_norm": 0.58203125, "learning_rate": 4.849287672417771e-06, "loss": 2.3197, "step": 12839 }, { "epoch": 0.6888412017167382, "grad_norm": 0.42578125, "learning_rate": 4.849257962850589e-06, "loss": 2.1594, "step": 12840 }, { "epoch": 0.6888948497854077, "grad_norm": 0.458984375, "learning_rate": 4.849228250446436e-06, "loss": 2.7862, "step": 12841 }, { "epoch": 0.6889484978540773, "grad_norm": 0.51171875, "learning_rate": 4.849198535205345e-06, "loss": 2.2945, "step": 12842 }, { "epoch": 0.6890021459227468, "grad_norm": 0.43359375, "learning_rate": 4.849168817127354e-06, "loss": 2.1874, "step": 12843 }, { "epoch": 0.6890557939914163, "grad_norm": 0.478515625, "learning_rate": 4.849139096212497e-06, "loss": 2.4017, "step": 12844 }, { "epoch": 0.6891094420600858, "grad_norm": 0.45703125, "learning_rate": 4.849109372460812e-06, "loss": 2.2928, "step": 12845 }, { "epoch": 0.6891630901287553, "grad_norm": 0.376953125, "learning_rate": 4.849079645872333e-06, "loss": 2.1026, "step": 12846 }, { "epoch": 0.6892167381974249, "grad_norm": 0.46875, "learning_rate": 4.849049916447096e-06, "loss": 2.4737, "step": 12847 }, { "epoch": 0.6892703862660944, "grad_norm": 0.50390625, "learning_rate": 4.8490201841851386e-06, "loss": 2.4402, "step": 12848 }, { "epoch": 0.689324034334764, "grad_norm": 0.56640625, "learning_rate": 4.848990449086496e-06, "loss": 2.1952, "step": 12849 }, { "epoch": 0.6893776824034334, "grad_norm": 0.439453125, "learning_rate": 4.848960711151202e-06, "loss": 2.127, "step": 12850 }, { "epoch": 0.689431330472103, "grad_norm": 0.443359375, "learning_rate": 4.848930970379295e-06, "loss": 2.2638, "step": 12851 }, { "epoch": 0.6894849785407725, "grad_norm": 0.7890625, "learning_rate": 4.8489012267708104e-06, "loss": 2.1772, "step": 12852 }, { "epoch": 0.6895386266094421, "grad_norm": 0.396484375, "learning_rate": 4.848871480325784e-06, "loss": 2.3468, "step": 12853 }, { "epoch": 0.6895922746781116, "grad_norm": 0.494140625, "learning_rate": 4.8488417310442515e-06, "loss": 2.5321, "step": 12854 }, { "epoch": 0.6896459227467812, "grad_norm": 0.51953125, "learning_rate": 4.848811978926249e-06, "loss": 2.5261, "step": 12855 }, { "epoch": 0.6896995708154506, "grad_norm": 0.56640625, "learning_rate": 4.8487822239718115e-06, "loss": 1.4485, "step": 12856 }, { "epoch": 0.6897532188841202, "grad_norm": 0.42578125, "learning_rate": 4.848752466180977e-06, "loss": 2.4075, "step": 12857 }, { "epoch": 0.6898068669527897, "grad_norm": 0.423828125, "learning_rate": 4.84872270555378e-06, "loss": 2.5739, "step": 12858 }, { "epoch": 0.6898605150214592, "grad_norm": 0.439453125, "learning_rate": 4.848692942090256e-06, "loss": 2.2718, "step": 12859 }, { "epoch": 0.6899141630901288, "grad_norm": 0.57421875, "learning_rate": 4.8486631757904424e-06, "loss": 2.5093, "step": 12860 }, { "epoch": 0.6899678111587982, "grad_norm": 0.39453125, "learning_rate": 4.8486334066543735e-06, "loss": 2.187, "step": 12861 }, { "epoch": 0.6900214592274678, "grad_norm": 0.4453125, "learning_rate": 4.848603634682087e-06, "loss": 2.3789, "step": 12862 }, { "epoch": 0.6900751072961373, "grad_norm": 0.482421875, "learning_rate": 4.848573859873617e-06, "loss": 2.3842, "step": 12863 }, { "epoch": 0.6901287553648069, "grad_norm": 0.494140625, "learning_rate": 4.848544082229001e-06, "loss": 2.156, "step": 12864 }, { "epoch": 0.6901824034334764, "grad_norm": 0.404296875, "learning_rate": 4.8485143017482735e-06, "loss": 2.1807, "step": 12865 }, { "epoch": 0.690236051502146, "grad_norm": 0.578125, "learning_rate": 4.848484518431472e-06, "loss": 2.1435, "step": 12866 }, { "epoch": 0.6902896995708154, "grad_norm": 2.125, "learning_rate": 4.848454732278632e-06, "loss": 1.6514, "step": 12867 }, { "epoch": 0.690343347639485, "grad_norm": 0.3671875, "learning_rate": 4.84842494328979e-06, "loss": 2.2691, "step": 12868 }, { "epoch": 0.6903969957081545, "grad_norm": 0.466796875, "learning_rate": 4.8483951514649795e-06, "loss": 2.2521, "step": 12869 }, { "epoch": 0.6904506437768241, "grad_norm": 0.419921875, "learning_rate": 4.848365356804239e-06, "loss": 2.1408, "step": 12870 }, { "epoch": 0.6905042918454936, "grad_norm": 0.45703125, "learning_rate": 4.848335559307604e-06, "loss": 2.2229, "step": 12871 }, { "epoch": 0.6905579399141631, "grad_norm": 0.41015625, "learning_rate": 4.84830575897511e-06, "loss": 2.227, "step": 12872 }, { "epoch": 0.6906115879828326, "grad_norm": 0.6484375, "learning_rate": 4.848275955806793e-06, "loss": 2.2268, "step": 12873 }, { "epoch": 0.6906652360515021, "grad_norm": 0.4765625, "learning_rate": 4.848246149802689e-06, "loss": 2.1791, "step": 12874 }, { "epoch": 0.6907188841201717, "grad_norm": 0.4921875, "learning_rate": 4.848216340962835e-06, "loss": 1.8813, "step": 12875 }, { "epoch": 0.6907725321888412, "grad_norm": 0.458984375, "learning_rate": 4.8481865292872655e-06, "loss": 2.3463, "step": 12876 }, { "epoch": 0.6908261802575107, "grad_norm": 0.4453125, "learning_rate": 4.848156714776017e-06, "loss": 2.4028, "step": 12877 }, { "epoch": 0.6908798283261802, "grad_norm": 0.404296875, "learning_rate": 4.848126897429125e-06, "loss": 2.0865, "step": 12878 }, { "epoch": 0.6909334763948498, "grad_norm": 0.4765625, "learning_rate": 4.848097077246628e-06, "loss": 2.3442, "step": 12879 }, { "epoch": 0.6909871244635193, "grad_norm": 0.50390625, "learning_rate": 4.848067254228559e-06, "loss": 2.2068, "step": 12880 }, { "epoch": 0.6910407725321889, "grad_norm": 0.4296875, "learning_rate": 4.848037428374955e-06, "loss": 2.3763, "step": 12881 }, { "epoch": 0.6910944206008584, "grad_norm": 0.60546875, "learning_rate": 4.848007599685853e-06, "loss": 2.362, "step": 12882 }, { "epoch": 0.6911480686695279, "grad_norm": 0.4765625, "learning_rate": 4.847977768161288e-06, "loss": 1.9906, "step": 12883 }, { "epoch": 0.6912017167381974, "grad_norm": 0.455078125, "learning_rate": 4.847947933801296e-06, "loss": 2.1135, "step": 12884 }, { "epoch": 0.691255364806867, "grad_norm": 0.48046875, "learning_rate": 4.847918096605913e-06, "loss": 2.4532, "step": 12885 }, { "epoch": 0.6913090128755365, "grad_norm": 0.51953125, "learning_rate": 4.8478882565751756e-06, "loss": 2.538, "step": 12886 }, { "epoch": 0.691362660944206, "grad_norm": 0.443359375, "learning_rate": 4.8478584137091195e-06, "loss": 2.1004, "step": 12887 }, { "epoch": 0.6914163090128755, "grad_norm": 0.4453125, "learning_rate": 4.847828568007781e-06, "loss": 2.3532, "step": 12888 }, { "epoch": 0.691469957081545, "grad_norm": 0.50390625, "learning_rate": 4.847798719471196e-06, "loss": 2.2594, "step": 12889 }, { "epoch": 0.6915236051502146, "grad_norm": 0.5234375, "learning_rate": 4.8477688680994e-06, "loss": 2.6574, "step": 12890 }, { "epoch": 0.6915772532188841, "grad_norm": 0.490234375, "learning_rate": 4.84773901389243e-06, "loss": 2.1851, "step": 12891 }, { "epoch": 0.6916309012875537, "grad_norm": 0.68359375, "learning_rate": 4.847709156850321e-06, "loss": 2.1063, "step": 12892 }, { "epoch": 0.6916845493562231, "grad_norm": 0.4765625, "learning_rate": 4.84767929697311e-06, "loss": 2.3974, "step": 12893 }, { "epoch": 0.6917381974248927, "grad_norm": 0.400390625, "learning_rate": 4.847649434260832e-06, "loss": 2.0893, "step": 12894 }, { "epoch": 0.6917918454935622, "grad_norm": 0.74609375, "learning_rate": 4.847619568713524e-06, "loss": 2.3117, "step": 12895 }, { "epoch": 0.6918454935622318, "grad_norm": 0.4296875, "learning_rate": 4.847589700331223e-06, "loss": 1.9125, "step": 12896 }, { "epoch": 0.6918991416309013, "grad_norm": 0.69140625, "learning_rate": 4.847559829113963e-06, "loss": 1.8522, "step": 12897 }, { "epoch": 0.6919527896995709, "grad_norm": 0.416015625, "learning_rate": 4.847529955061781e-06, "loss": 2.3649, "step": 12898 }, { "epoch": 0.6920064377682403, "grad_norm": 0.443359375, "learning_rate": 4.847500078174713e-06, "loss": 2.1918, "step": 12899 }, { "epoch": 0.6920600858369099, "grad_norm": 0.466796875, "learning_rate": 4.847470198452795e-06, "loss": 2.1375, "step": 12900 }, { "epoch": 0.6921137339055794, "grad_norm": 0.451171875, "learning_rate": 4.847440315896064e-06, "loss": 2.1688, "step": 12901 }, { "epoch": 0.6921673819742489, "grad_norm": 0.515625, "learning_rate": 4.847410430504554e-06, "loss": 2.4027, "step": 12902 }, { "epoch": 0.6922210300429185, "grad_norm": 0.353515625, "learning_rate": 4.847380542278303e-06, "loss": 1.8355, "step": 12903 }, { "epoch": 0.6922746781115879, "grad_norm": 1.1015625, "learning_rate": 4.847350651217346e-06, "loss": 1.9952, "step": 12904 }, { "epoch": 0.6923283261802575, "grad_norm": 0.4765625, "learning_rate": 4.847320757321721e-06, "loss": 1.7462, "step": 12905 }, { "epoch": 0.692381974248927, "grad_norm": 0.4375, "learning_rate": 4.8472908605914615e-06, "loss": 2.3755, "step": 12906 }, { "epoch": 0.6924356223175966, "grad_norm": 0.515625, "learning_rate": 4.847260961026605e-06, "loss": 2.3253, "step": 12907 }, { "epoch": 0.6924892703862661, "grad_norm": 0.474609375, "learning_rate": 4.847231058627187e-06, "loss": 2.3225, "step": 12908 }, { "epoch": 0.6925429184549357, "grad_norm": 0.56640625, "learning_rate": 4.847201153393244e-06, "loss": 2.2375, "step": 12909 }, { "epoch": 0.6925965665236051, "grad_norm": 0.40234375, "learning_rate": 4.847171245324812e-06, "loss": 2.113, "step": 12910 }, { "epoch": 0.6926502145922747, "grad_norm": 0.4375, "learning_rate": 4.847141334421927e-06, "loss": 2.2503, "step": 12911 }, { "epoch": 0.6927038626609442, "grad_norm": 0.51171875, "learning_rate": 4.847111420684627e-06, "loss": 2.5648, "step": 12912 }, { "epoch": 0.6927575107296138, "grad_norm": 0.51953125, "learning_rate": 4.847081504112945e-06, "loss": 2.3659, "step": 12913 }, { "epoch": 0.6928111587982833, "grad_norm": 0.46484375, "learning_rate": 4.847051584706919e-06, "loss": 2.2996, "step": 12914 }, { "epoch": 0.6928648068669528, "grad_norm": 0.50390625, "learning_rate": 4.847021662466584e-06, "loss": 2.3286, "step": 12915 }, { "epoch": 0.6929184549356223, "grad_norm": 0.52734375, "learning_rate": 4.846991737391978e-06, "loss": 2.3957, "step": 12916 }, { "epoch": 0.6929721030042918, "grad_norm": 0.4375, "learning_rate": 4.846961809483136e-06, "loss": 2.0813, "step": 12917 }, { "epoch": 0.6930257510729614, "grad_norm": 0.373046875, "learning_rate": 4.846931878740093e-06, "loss": 2.095, "step": 12918 }, { "epoch": 0.6930793991416309, "grad_norm": 0.458984375, "learning_rate": 4.846901945162888e-06, "loss": 2.1165, "step": 12919 }, { "epoch": 0.6931330472103004, "grad_norm": 0.58203125, "learning_rate": 4.846872008751553e-06, "loss": 2.2875, "step": 12920 }, { "epoch": 0.6931866952789699, "grad_norm": 0.5703125, "learning_rate": 4.846842069506128e-06, "loss": 2.1616, "step": 12921 }, { "epoch": 0.6932403433476395, "grad_norm": 0.40625, "learning_rate": 4.8468121274266475e-06, "loss": 2.4027, "step": 12922 }, { "epoch": 0.693293991416309, "grad_norm": 0.388671875, "learning_rate": 4.846782182513148e-06, "loss": 2.0505, "step": 12923 }, { "epoch": 0.6933476394849786, "grad_norm": 0.5078125, "learning_rate": 4.846752234765666e-06, "loss": 1.9902, "step": 12924 }, { "epoch": 0.693401287553648, "grad_norm": 0.447265625, "learning_rate": 4.846722284184237e-06, "loss": 2.261, "step": 12925 }, { "epoch": 0.6934549356223176, "grad_norm": 0.48046875, "learning_rate": 4.846692330768896e-06, "loss": 2.3329, "step": 12926 }, { "epoch": 0.6935085836909871, "grad_norm": 0.9140625, "learning_rate": 4.846662374519682e-06, "loss": 2.4756, "step": 12927 }, { "epoch": 0.6935622317596567, "grad_norm": 0.48046875, "learning_rate": 4.846632415436629e-06, "loss": 2.0448, "step": 12928 }, { "epoch": 0.6936158798283262, "grad_norm": 0.44140625, "learning_rate": 4.846602453519774e-06, "loss": 2.1582, "step": 12929 }, { "epoch": 0.6936695278969958, "grad_norm": 0.451171875, "learning_rate": 4.846572488769154e-06, "loss": 2.0827, "step": 12930 }, { "epoch": 0.6937231759656652, "grad_norm": 0.5546875, "learning_rate": 4.846542521184804e-06, "loss": 1.5934, "step": 12931 }, { "epoch": 0.6937768240343347, "grad_norm": 0.421875, "learning_rate": 4.846512550766759e-06, "loss": 2.0478, "step": 12932 }, { "epoch": 0.6938304721030043, "grad_norm": 0.423828125, "learning_rate": 4.846482577515058e-06, "loss": 2.3221, "step": 12933 }, { "epoch": 0.6938841201716738, "grad_norm": 0.451171875, "learning_rate": 4.846452601429735e-06, "loss": 2.593, "step": 12934 }, { "epoch": 0.6939377682403434, "grad_norm": 0.453125, "learning_rate": 4.846422622510828e-06, "loss": 1.5098, "step": 12935 }, { "epoch": 0.6939914163090128, "grad_norm": 0.9375, "learning_rate": 4.846392640758372e-06, "loss": 2.3783, "step": 12936 }, { "epoch": 0.6940450643776824, "grad_norm": 0.447265625, "learning_rate": 4.846362656172403e-06, "loss": 2.2731, "step": 12937 }, { "epoch": 0.6940987124463519, "grad_norm": 0.43359375, "learning_rate": 4.846332668752958e-06, "loss": 2.3118, "step": 12938 }, { "epoch": 0.6941523605150215, "grad_norm": 0.482421875, "learning_rate": 4.846302678500073e-06, "loss": 2.2649, "step": 12939 }, { "epoch": 0.694206008583691, "grad_norm": 0.384765625, "learning_rate": 4.8462726854137845e-06, "loss": 2.0665, "step": 12940 }, { "epoch": 0.6942596566523606, "grad_norm": 0.5078125, "learning_rate": 4.846242689494127e-06, "loss": 2.1024, "step": 12941 }, { "epoch": 0.69431330472103, "grad_norm": 0.50390625, "learning_rate": 4.846212690741139e-06, "loss": 2.2367, "step": 12942 }, { "epoch": 0.6943669527896996, "grad_norm": 0.46875, "learning_rate": 4.846182689154856e-06, "loss": 2.372, "step": 12943 }, { "epoch": 0.6944206008583691, "grad_norm": 0.447265625, "learning_rate": 4.846152684735313e-06, "loss": 2.3682, "step": 12944 }, { "epoch": 0.6944742489270386, "grad_norm": 0.5, "learning_rate": 4.846122677482548e-06, "loss": 2.0953, "step": 12945 }, { "epoch": 0.6945278969957082, "grad_norm": 0.33984375, "learning_rate": 4.846092667396596e-06, "loss": 2.1136, "step": 12946 }, { "epoch": 0.6945815450643776, "grad_norm": 0.44921875, "learning_rate": 4.846062654477494e-06, "loss": 2.2192, "step": 12947 }, { "epoch": 0.6946351931330472, "grad_norm": 0.53515625, "learning_rate": 4.846032638725278e-06, "loss": 2.3902, "step": 12948 }, { "epoch": 0.6946888412017167, "grad_norm": 0.45703125, "learning_rate": 4.846002620139984e-06, "loss": 2.2918, "step": 12949 }, { "epoch": 0.6947424892703863, "grad_norm": 0.392578125, "learning_rate": 4.845972598721648e-06, "loss": 2.3641, "step": 12950 }, { "epoch": 0.6947961373390558, "grad_norm": 0.462890625, "learning_rate": 4.845942574470307e-06, "loss": 1.9589, "step": 12951 }, { "epoch": 0.6948497854077254, "grad_norm": 0.58203125, "learning_rate": 4.8459125473859975e-06, "loss": 2.4055, "step": 12952 }, { "epoch": 0.6949034334763948, "grad_norm": 0.4296875, "learning_rate": 4.845882517468755e-06, "loss": 2.1277, "step": 12953 }, { "epoch": 0.6949570815450644, "grad_norm": 0.416015625, "learning_rate": 4.845852484718616e-06, "loss": 2.2294, "step": 12954 }, { "epoch": 0.6950107296137339, "grad_norm": 0.5, "learning_rate": 4.845822449135617e-06, "loss": 2.4344, "step": 12955 }, { "epoch": 0.6950643776824035, "grad_norm": 0.443359375, "learning_rate": 4.845792410719793e-06, "loss": 2.3508, "step": 12956 }, { "epoch": 0.695118025751073, "grad_norm": 0.4375, "learning_rate": 4.845762369471182e-06, "loss": 2.2717, "step": 12957 }, { "epoch": 0.6951716738197425, "grad_norm": 9.25, "learning_rate": 4.845732325389819e-06, "loss": 1.9862, "step": 12958 }, { "epoch": 0.695225321888412, "grad_norm": 1.1328125, "learning_rate": 4.845702278475741e-06, "loss": 1.9882, "step": 12959 }, { "epoch": 0.6952789699570815, "grad_norm": 0.498046875, "learning_rate": 4.845672228728985e-06, "loss": 2.2641, "step": 12960 }, { "epoch": 0.6953326180257511, "grad_norm": 0.458984375, "learning_rate": 4.8456421761495855e-06, "loss": 2.3232, "step": 12961 }, { "epoch": 0.6953862660944206, "grad_norm": 0.5703125, "learning_rate": 4.845612120737581e-06, "loss": 1.5266, "step": 12962 }, { "epoch": 0.6954399141630901, "grad_norm": 0.408203125, "learning_rate": 4.845582062493005e-06, "loss": 2.0904, "step": 12963 }, { "epoch": 0.6954935622317596, "grad_norm": 0.5, "learning_rate": 4.845552001415896e-06, "loss": 2.1884, "step": 12964 }, { "epoch": 0.6955472103004292, "grad_norm": 0.458984375, "learning_rate": 4.845521937506289e-06, "loss": 2.3248, "step": 12965 }, { "epoch": 0.6956008583690987, "grad_norm": 0.5546875, "learning_rate": 4.845491870764222e-06, "loss": 2.47, "step": 12966 }, { "epoch": 0.6956545064377683, "grad_norm": 0.482421875, "learning_rate": 4.845461801189729e-06, "loss": 2.3374, "step": 12967 }, { "epoch": 0.6957081545064377, "grad_norm": 0.46875, "learning_rate": 4.845431728782849e-06, "loss": 2.2632, "step": 12968 }, { "epoch": 0.6957618025751073, "grad_norm": 0.4609375, "learning_rate": 4.845401653543616e-06, "loss": 2.2431, "step": 12969 }, { "epoch": 0.6958154506437768, "grad_norm": 0.478515625, "learning_rate": 4.845371575472067e-06, "loss": 2.2688, "step": 12970 }, { "epoch": 0.6958690987124464, "grad_norm": 0.373046875, "learning_rate": 4.845341494568239e-06, "loss": 2.0086, "step": 12971 }, { "epoch": 0.6959227467811159, "grad_norm": 0.40234375, "learning_rate": 4.845311410832167e-06, "loss": 2.0259, "step": 12972 }, { "epoch": 0.6959763948497855, "grad_norm": 0.412109375, "learning_rate": 4.845281324263889e-06, "loss": 2.2464, "step": 12973 }, { "epoch": 0.6960300429184549, "grad_norm": 0.443359375, "learning_rate": 4.845251234863441e-06, "loss": 2.3477, "step": 12974 }, { "epoch": 0.6960836909871244, "grad_norm": 0.5234375, "learning_rate": 4.845221142630857e-06, "loss": 2.2559, "step": 12975 }, { "epoch": 0.696137339055794, "grad_norm": 0.466796875, "learning_rate": 4.845191047566176e-06, "loss": 2.2712, "step": 12976 }, { "epoch": 0.6961909871244635, "grad_norm": 0.71875, "learning_rate": 4.845160949669434e-06, "loss": 2.203, "step": 12977 }, { "epoch": 0.6962446351931331, "grad_norm": 0.4140625, "learning_rate": 4.845130848940667e-06, "loss": 2.1761, "step": 12978 }, { "epoch": 0.6962982832618025, "grad_norm": 0.392578125, "learning_rate": 4.84510074537991e-06, "loss": 2.5223, "step": 12979 }, { "epoch": 0.6963519313304721, "grad_norm": 0.462890625, "learning_rate": 4.845070638987202e-06, "loss": 2.1627, "step": 12980 }, { "epoch": 0.6964055793991416, "grad_norm": 0.50390625, "learning_rate": 4.845040529762577e-06, "loss": 2.0913, "step": 12981 }, { "epoch": 0.6964592274678112, "grad_norm": 0.447265625, "learning_rate": 4.8450104177060725e-06, "loss": 2.3983, "step": 12982 }, { "epoch": 0.6965128755364807, "grad_norm": 0.51953125, "learning_rate": 4.844980302817725e-06, "loss": 1.3666, "step": 12983 }, { "epoch": 0.6965665236051503, "grad_norm": 0.423828125, "learning_rate": 4.84495018509757e-06, "loss": 1.9106, "step": 12984 }, { "epoch": 0.6966201716738197, "grad_norm": 0.5390625, "learning_rate": 4.844920064545644e-06, "loss": 2.2455, "step": 12985 }, { "epoch": 0.6966738197424893, "grad_norm": 0.5078125, "learning_rate": 4.844889941161985e-06, "loss": 2.1422, "step": 12986 }, { "epoch": 0.6967274678111588, "grad_norm": 0.486328125, "learning_rate": 4.844859814946626e-06, "loss": 2.1032, "step": 12987 }, { "epoch": 0.6967811158798283, "grad_norm": 0.54296875, "learning_rate": 4.844829685899607e-06, "loss": 2.4033, "step": 12988 }, { "epoch": 0.6968347639484979, "grad_norm": 0.486328125, "learning_rate": 4.844799554020963e-06, "loss": 2.6246, "step": 12989 }, { "epoch": 0.6968884120171673, "grad_norm": 0.453125, "learning_rate": 4.84476941931073e-06, "loss": 2.3977, "step": 12990 }, { "epoch": 0.6969420600858369, "grad_norm": 0.60546875, "learning_rate": 4.844739281768945e-06, "loss": 2.4015, "step": 12991 }, { "epoch": 0.6969957081545064, "grad_norm": 0.4296875, "learning_rate": 4.8447091413956436e-06, "loss": 2.2905, "step": 12992 }, { "epoch": 0.697049356223176, "grad_norm": 0.91015625, "learning_rate": 4.844678998190863e-06, "loss": 2.3134, "step": 12993 }, { "epoch": 0.6971030042918455, "grad_norm": 0.49609375, "learning_rate": 4.8446488521546385e-06, "loss": 2.6127, "step": 12994 }, { "epoch": 0.697156652360515, "grad_norm": 0.5703125, "learning_rate": 4.844618703287008e-06, "loss": 2.2645, "step": 12995 }, { "epoch": 0.6972103004291845, "grad_norm": 0.3671875, "learning_rate": 4.844588551588007e-06, "loss": 2.4146, "step": 12996 }, { "epoch": 0.6972639484978541, "grad_norm": 0.443359375, "learning_rate": 4.844558397057672e-06, "loss": 2.4006, "step": 12997 }, { "epoch": 0.6973175965665236, "grad_norm": 0.44921875, "learning_rate": 4.844528239696039e-06, "loss": 2.3076, "step": 12998 }, { "epoch": 0.6973712446351932, "grad_norm": 0.4296875, "learning_rate": 4.844498079503146e-06, "loss": 2.2146, "step": 12999 }, { "epoch": 0.6974248927038627, "grad_norm": 0.451171875, "learning_rate": 4.844467916479028e-06, "loss": 2.3201, "step": 13000 }, { "epoch": 0.6974785407725322, "grad_norm": 0.74609375, "learning_rate": 4.844437750623721e-06, "loss": 1.1543, "step": 13001 }, { "epoch": 0.6975321888412017, "grad_norm": 0.455078125, "learning_rate": 4.844407581937262e-06, "loss": 2.0874, "step": 13002 }, { "epoch": 0.6975858369098712, "grad_norm": 0.380859375, "learning_rate": 4.844377410419689e-06, "loss": 2.1632, "step": 13003 }, { "epoch": 0.6976394849785408, "grad_norm": 0.46875, "learning_rate": 4.844347236071037e-06, "loss": 2.0066, "step": 13004 }, { "epoch": 0.6976931330472103, "grad_norm": 0.427734375, "learning_rate": 4.8443170588913414e-06, "loss": 2.2881, "step": 13005 }, { "epoch": 0.6977467811158798, "grad_norm": 0.5078125, "learning_rate": 4.844286878880639e-06, "loss": 2.3931, "step": 13006 }, { "epoch": 0.6978004291845493, "grad_norm": 0.486328125, "learning_rate": 4.844256696038969e-06, "loss": 2.5967, "step": 13007 }, { "epoch": 0.6978540772532189, "grad_norm": 0.462890625, "learning_rate": 4.8442265103663646e-06, "loss": 1.8807, "step": 13008 }, { "epoch": 0.6979077253218884, "grad_norm": 0.90234375, "learning_rate": 4.844196321862864e-06, "loss": 1.4596, "step": 13009 }, { "epoch": 0.697961373390558, "grad_norm": 0.48046875, "learning_rate": 4.8441661305285036e-06, "loss": 2.2404, "step": 13010 }, { "epoch": 0.6980150214592274, "grad_norm": 0.45703125, "learning_rate": 4.8441359363633185e-06, "loss": 2.234, "step": 13011 }, { "epoch": 0.698068669527897, "grad_norm": 0.453125, "learning_rate": 4.844105739367346e-06, "loss": 2.1798, "step": 13012 }, { "epoch": 0.6981223175965665, "grad_norm": 0.490234375, "learning_rate": 4.844075539540623e-06, "loss": 2.5076, "step": 13013 }, { "epoch": 0.6981759656652361, "grad_norm": 0.5703125, "learning_rate": 4.844045336883187e-06, "loss": 2.5414, "step": 13014 }, { "epoch": 0.6982296137339056, "grad_norm": 0.51953125, "learning_rate": 4.84401513139507e-06, "loss": 2.4797, "step": 13015 }, { "epoch": 0.6982832618025752, "grad_norm": 0.408203125, "learning_rate": 4.843984923076314e-06, "loss": 2.3576, "step": 13016 }, { "epoch": 0.6983369098712446, "grad_norm": 0.451171875, "learning_rate": 4.843954711926952e-06, "loss": 2.0324, "step": 13017 }, { "epoch": 0.6983905579399141, "grad_norm": 0.44921875, "learning_rate": 4.8439244979470215e-06, "loss": 2.1754, "step": 13018 }, { "epoch": 0.6984442060085837, "grad_norm": 0.5390625, "learning_rate": 4.84389428113656e-06, "loss": 2.4161, "step": 13019 }, { "epoch": 0.6984978540772532, "grad_norm": 0.396484375, "learning_rate": 4.843864061495601e-06, "loss": 2.225, "step": 13020 }, { "epoch": 0.6985515021459228, "grad_norm": 0.478515625, "learning_rate": 4.843833839024184e-06, "loss": 2.1831, "step": 13021 }, { "epoch": 0.6986051502145922, "grad_norm": 0.77734375, "learning_rate": 4.843803613722345e-06, "loss": 2.0884, "step": 13022 }, { "epoch": 0.6986587982832618, "grad_norm": 0.50390625, "learning_rate": 4.843773385590121e-06, "loss": 2.3154, "step": 13023 }, { "epoch": 0.6987124463519313, "grad_norm": 0.50390625, "learning_rate": 4.843743154627545e-06, "loss": 2.3948, "step": 13024 }, { "epoch": 0.6987660944206009, "grad_norm": 0.46484375, "learning_rate": 4.843712920834657e-06, "loss": 2.4387, "step": 13025 }, { "epoch": 0.6988197424892704, "grad_norm": 0.416015625, "learning_rate": 4.8436826842114924e-06, "loss": 2.0861, "step": 13026 }, { "epoch": 0.69887339055794, "grad_norm": 0.5, "learning_rate": 4.843652444758088e-06, "loss": 2.1898, "step": 13027 }, { "epoch": 0.6989270386266094, "grad_norm": 0.4921875, "learning_rate": 4.84362220247448e-06, "loss": 2.4679, "step": 13028 }, { "epoch": 0.698980686695279, "grad_norm": 0.470703125, "learning_rate": 4.843591957360706e-06, "loss": 2.0667, "step": 13029 }, { "epoch": 0.6990343347639485, "grad_norm": 0.44921875, "learning_rate": 4.8435617094168e-06, "loss": 2.2431, "step": 13030 }, { "epoch": 0.699087982832618, "grad_norm": 0.4765625, "learning_rate": 4.843531458642801e-06, "loss": 2.1968, "step": 13031 }, { "epoch": 0.6991416309012876, "grad_norm": 0.443359375, "learning_rate": 4.8435012050387445e-06, "loss": 2.2317, "step": 13032 }, { "epoch": 0.699195278969957, "grad_norm": 0.455078125, "learning_rate": 4.8434709486046675e-06, "loss": 2.127, "step": 13033 }, { "epoch": 0.6992489270386266, "grad_norm": 0.392578125, "learning_rate": 4.843440689340605e-06, "loss": 1.9233, "step": 13034 }, { "epoch": 0.6993025751072961, "grad_norm": 0.546875, "learning_rate": 4.843410427246596e-06, "loss": 2.4402, "step": 13035 }, { "epoch": 0.6993562231759657, "grad_norm": 0.51953125, "learning_rate": 4.843380162322675e-06, "loss": 2.3436, "step": 13036 }, { "epoch": 0.6994098712446352, "grad_norm": 0.44921875, "learning_rate": 4.84334989456888e-06, "loss": 2.2759, "step": 13037 }, { "epoch": 0.6994635193133047, "grad_norm": 0.46875, "learning_rate": 4.843319623985246e-06, "loss": 2.3104, "step": 13038 }, { "epoch": 0.6995171673819742, "grad_norm": 0.478515625, "learning_rate": 4.843289350571811e-06, "loss": 2.4016, "step": 13039 }, { "epoch": 0.6995708154506438, "grad_norm": 0.5234375, "learning_rate": 4.843259074328611e-06, "loss": 2.3175, "step": 13040 }, { "epoch": 0.6996244635193133, "grad_norm": 0.423828125, "learning_rate": 4.843228795255684e-06, "loss": 2.1936, "step": 13041 }, { "epoch": 0.6996781115879829, "grad_norm": 0.5546875, "learning_rate": 4.8431985133530625e-06, "loss": 2.1996, "step": 13042 }, { "epoch": 0.6997317596566524, "grad_norm": 0.57421875, "learning_rate": 4.843168228620787e-06, "loss": 2.4496, "step": 13043 }, { "epoch": 0.6997854077253219, "grad_norm": 0.4296875, "learning_rate": 4.843137941058893e-06, "loss": 2.2143, "step": 13044 }, { "epoch": 0.6998390557939914, "grad_norm": 0.4765625, "learning_rate": 4.843107650667416e-06, "loss": 2.1697, "step": 13045 }, { "epoch": 0.6998927038626609, "grad_norm": 0.443359375, "learning_rate": 4.843077357446394e-06, "loss": 2.3422, "step": 13046 }, { "epoch": 0.6999463519313305, "grad_norm": 0.404296875, "learning_rate": 4.843047061395863e-06, "loss": 2.1918, "step": 13047 }, { "epoch": 0.7, "grad_norm": 0.46484375, "learning_rate": 4.84301676251586e-06, "loss": 2.2475, "step": 13048 }, { "epoch": 0.7000536480686695, "grad_norm": 0.53515625, "learning_rate": 4.842986460806421e-06, "loss": 2.3083, "step": 13049 }, { "epoch": 0.700107296137339, "grad_norm": 0.470703125, "learning_rate": 4.842956156267582e-06, "loss": 2.2639, "step": 13050 }, { "epoch": 0.7001609442060086, "grad_norm": 0.45703125, "learning_rate": 4.84292584889938e-06, "loss": 2.5768, "step": 13051 }, { "epoch": 0.7002145922746781, "grad_norm": 0.41015625, "learning_rate": 4.842895538701853e-06, "loss": 2.2073, "step": 13052 }, { "epoch": 0.7002682403433477, "grad_norm": 0.443359375, "learning_rate": 4.842865225675036e-06, "loss": 2.3083, "step": 13053 }, { "epoch": 0.7003218884120171, "grad_norm": 0.47265625, "learning_rate": 4.842834909818967e-06, "loss": 2.3011, "step": 13054 }, { "epoch": 0.7003755364806867, "grad_norm": 0.498046875, "learning_rate": 4.842804591133681e-06, "loss": 2.6914, "step": 13055 }, { "epoch": 0.7004291845493562, "grad_norm": 0.49609375, "learning_rate": 4.842774269619216e-06, "loss": 2.7249, "step": 13056 }, { "epoch": 0.7004828326180258, "grad_norm": 0.466796875, "learning_rate": 4.8427439452756075e-06, "loss": 2.2104, "step": 13057 }, { "epoch": 0.7005364806866953, "grad_norm": 0.50390625, "learning_rate": 4.842713618102894e-06, "loss": 2.1667, "step": 13058 }, { "epoch": 0.7005901287553649, "grad_norm": 0.490234375, "learning_rate": 4.842683288101108e-06, "loss": 2.2452, "step": 13059 }, { "epoch": 0.7006437768240343, "grad_norm": 0.375, "learning_rate": 4.842652955270291e-06, "loss": 2.1758, "step": 13060 }, { "epoch": 0.7006974248927038, "grad_norm": 0.65234375, "learning_rate": 4.842622619610477e-06, "loss": 2.491, "step": 13061 }, { "epoch": 0.7007510729613734, "grad_norm": 0.52734375, "learning_rate": 4.842592281121703e-06, "loss": 2.1912, "step": 13062 }, { "epoch": 0.7008047210300429, "grad_norm": 0.412109375, "learning_rate": 4.8425619398040055e-06, "loss": 2.3034, "step": 13063 }, { "epoch": 0.7008583690987125, "grad_norm": 0.44921875, "learning_rate": 4.842531595657422e-06, "loss": 2.1059, "step": 13064 }, { "epoch": 0.7009120171673819, "grad_norm": 0.50390625, "learning_rate": 4.842501248681989e-06, "loss": 2.6011, "step": 13065 }, { "epoch": 0.7009656652360515, "grad_norm": 0.470703125, "learning_rate": 4.842470898877742e-06, "loss": 2.3181, "step": 13066 }, { "epoch": 0.701019313304721, "grad_norm": 0.490234375, "learning_rate": 4.842440546244718e-06, "loss": 2.1879, "step": 13067 }, { "epoch": 0.7010729613733906, "grad_norm": 0.396484375, "learning_rate": 4.842410190782954e-06, "loss": 2.3802, "step": 13068 }, { "epoch": 0.7011266094420601, "grad_norm": 0.400390625, "learning_rate": 4.842379832492488e-06, "loss": 2.098, "step": 13069 }, { "epoch": 0.7011802575107297, "grad_norm": 0.39453125, "learning_rate": 4.842349471373354e-06, "loss": 2.3456, "step": 13070 }, { "epoch": 0.7012339055793991, "grad_norm": 0.466796875, "learning_rate": 4.842319107425591e-06, "loss": 2.4007, "step": 13071 }, { "epoch": 0.7012875536480687, "grad_norm": 0.435546875, "learning_rate": 4.842288740649234e-06, "loss": 2.3291, "step": 13072 }, { "epoch": 0.7013412017167382, "grad_norm": 0.75390625, "learning_rate": 4.84225837104432e-06, "loss": 2.3342, "step": 13073 }, { "epoch": 0.7013948497854077, "grad_norm": 0.53515625, "learning_rate": 4.842227998610887e-06, "loss": 2.5304, "step": 13074 }, { "epoch": 0.7014484978540773, "grad_norm": 0.50390625, "learning_rate": 4.84219762334897e-06, "loss": 2.4168, "step": 13075 }, { "epoch": 0.7015021459227467, "grad_norm": 0.42578125, "learning_rate": 4.8421672452586064e-06, "loss": 2.2003, "step": 13076 }, { "epoch": 0.7015557939914163, "grad_norm": 0.5078125, "learning_rate": 4.8421368643398335e-06, "loss": 2.3969, "step": 13077 }, { "epoch": 0.7016094420600858, "grad_norm": 0.51171875, "learning_rate": 4.842106480592687e-06, "loss": 2.317, "step": 13078 }, { "epoch": 0.7016630901287554, "grad_norm": 0.416015625, "learning_rate": 4.842076094017204e-06, "loss": 2.3993, "step": 13079 }, { "epoch": 0.7017167381974249, "grad_norm": 0.462890625, "learning_rate": 4.842045704613421e-06, "loss": 2.3755, "step": 13080 }, { "epoch": 0.7017703862660944, "grad_norm": 0.53125, "learning_rate": 4.842015312381375e-06, "loss": 2.47, "step": 13081 }, { "epoch": 0.7018240343347639, "grad_norm": 0.408203125, "learning_rate": 4.841984917321102e-06, "loss": 2.3046, "step": 13082 }, { "epoch": 0.7018776824034335, "grad_norm": 0.470703125, "learning_rate": 4.841954519432639e-06, "loss": 2.2666, "step": 13083 }, { "epoch": 0.701931330472103, "grad_norm": 0.419921875, "learning_rate": 4.841924118716024e-06, "loss": 1.9717, "step": 13084 }, { "epoch": 0.7019849785407726, "grad_norm": 0.47265625, "learning_rate": 4.841893715171292e-06, "loss": 2.1692, "step": 13085 }, { "epoch": 0.702038626609442, "grad_norm": 0.37109375, "learning_rate": 4.841863308798481e-06, "loss": 2.3128, "step": 13086 }, { "epoch": 0.7020922746781116, "grad_norm": 0.55859375, "learning_rate": 4.841832899597626e-06, "loss": 2.6206, "step": 13087 }, { "epoch": 0.7021459227467811, "grad_norm": 0.482421875, "learning_rate": 4.841802487568765e-06, "loss": 2.2345, "step": 13088 }, { "epoch": 0.7021995708154506, "grad_norm": 0.60546875, "learning_rate": 4.841772072711936e-06, "loss": 2.2863, "step": 13089 }, { "epoch": 0.7022532188841202, "grad_norm": 0.50390625, "learning_rate": 4.8417416550271726e-06, "loss": 2.1871, "step": 13090 }, { "epoch": 0.7023068669527897, "grad_norm": 0.412109375, "learning_rate": 4.841711234514514e-06, "loss": 2.3036, "step": 13091 }, { "epoch": 0.7023605150214592, "grad_norm": 0.443359375, "learning_rate": 4.841680811173996e-06, "loss": 2.2541, "step": 13092 }, { "epoch": 0.7024141630901287, "grad_norm": 0.421875, "learning_rate": 4.841650385005655e-06, "loss": 2.2371, "step": 13093 }, { "epoch": 0.7024678111587983, "grad_norm": 0.44140625, "learning_rate": 4.841619956009529e-06, "loss": 2.3044, "step": 13094 }, { "epoch": 0.7025214592274678, "grad_norm": 0.421875, "learning_rate": 4.841589524185654e-06, "loss": 2.1092, "step": 13095 }, { "epoch": 0.7025751072961374, "grad_norm": 0.41796875, "learning_rate": 4.841559089534066e-06, "loss": 2.0053, "step": 13096 }, { "epoch": 0.7026287553648068, "grad_norm": 0.4296875, "learning_rate": 4.841528652054803e-06, "loss": 2.2374, "step": 13097 }, { "epoch": 0.7026824034334764, "grad_norm": 0.439453125, "learning_rate": 4.841498211747902e-06, "loss": 2.336, "step": 13098 }, { "epoch": 0.7027360515021459, "grad_norm": 0.33203125, "learning_rate": 4.841467768613397e-06, "loss": 1.9505, "step": 13099 }, { "epoch": 0.7027896995708155, "grad_norm": 0.443359375, "learning_rate": 4.841437322651328e-06, "loss": 2.4485, "step": 13100 }, { "epoch": 0.702843347639485, "grad_norm": 2.5, "learning_rate": 4.8414068738617304e-06, "loss": 1.6141, "step": 13101 }, { "epoch": 0.7028969957081546, "grad_norm": 0.47265625, "learning_rate": 4.841376422244641e-06, "loss": 2.1422, "step": 13102 }, { "epoch": 0.702950643776824, "grad_norm": 0.439453125, "learning_rate": 4.8413459678000964e-06, "loss": 2.4115, "step": 13103 }, { "epoch": 0.7030042918454935, "grad_norm": 0.462890625, "learning_rate": 4.841315510528134e-06, "loss": 2.2805, "step": 13104 }, { "epoch": 0.7030579399141631, "grad_norm": 0.59765625, "learning_rate": 4.8412850504287904e-06, "loss": 2.5027, "step": 13105 }, { "epoch": 0.7031115879828326, "grad_norm": 0.396484375, "learning_rate": 4.841254587502101e-06, "loss": 2.3105, "step": 13106 }, { "epoch": 0.7031652360515022, "grad_norm": 0.4296875, "learning_rate": 4.841224121748105e-06, "loss": 2.374, "step": 13107 }, { "epoch": 0.7032188841201716, "grad_norm": 0.4765625, "learning_rate": 4.841193653166838e-06, "loss": 2.566, "step": 13108 }, { "epoch": 0.7032725321888412, "grad_norm": 0.458984375, "learning_rate": 4.841163181758336e-06, "loss": 2.3609, "step": 13109 }, { "epoch": 0.7033261802575107, "grad_norm": 0.478515625, "learning_rate": 4.841132707522637e-06, "loss": 2.073, "step": 13110 }, { "epoch": 0.7033798283261803, "grad_norm": 0.68359375, "learning_rate": 4.841102230459778e-06, "loss": 1.3595, "step": 13111 }, { "epoch": 0.7034334763948498, "grad_norm": 0.484375, "learning_rate": 4.841071750569794e-06, "loss": 2.3249, "step": 13112 }, { "epoch": 0.7034871244635194, "grad_norm": 0.4296875, "learning_rate": 4.841041267852724e-06, "loss": 2.2712, "step": 13113 }, { "epoch": 0.7035407725321888, "grad_norm": 0.416015625, "learning_rate": 4.841010782308603e-06, "loss": 2.0415, "step": 13114 }, { "epoch": 0.7035944206008584, "grad_norm": 0.453125, "learning_rate": 4.840980293937469e-06, "loss": 2.2143, "step": 13115 }, { "epoch": 0.7036480686695279, "grad_norm": 0.431640625, "learning_rate": 4.840949802739359e-06, "loss": 1.7461, "step": 13116 }, { "epoch": 0.7037017167381975, "grad_norm": 0.353515625, "learning_rate": 4.840919308714308e-06, "loss": 1.9974, "step": 13117 }, { "epoch": 0.703755364806867, "grad_norm": 0.46484375, "learning_rate": 4.840888811862354e-06, "loss": 2.3414, "step": 13118 }, { "epoch": 0.7038090128755364, "grad_norm": 0.41015625, "learning_rate": 4.840858312183535e-06, "loss": 1.9464, "step": 13119 }, { "epoch": 0.703862660944206, "grad_norm": 0.427734375, "learning_rate": 4.840827809677886e-06, "loss": 2.348, "step": 13120 }, { "epoch": 0.7039163090128755, "grad_norm": 0.51953125, "learning_rate": 4.8407973043454444e-06, "loss": 2.3039, "step": 13121 }, { "epoch": 0.7039699570815451, "grad_norm": 0.4453125, "learning_rate": 4.840766796186248e-06, "loss": 2.4221, "step": 13122 }, { "epoch": 0.7040236051502146, "grad_norm": 0.486328125, "learning_rate": 4.840736285200333e-06, "loss": 2.4763, "step": 13123 }, { "epoch": 0.7040772532188841, "grad_norm": 0.462890625, "learning_rate": 4.840705771387735e-06, "loss": 2.3538, "step": 13124 }, { "epoch": 0.7041309012875536, "grad_norm": 0.44140625, "learning_rate": 4.840675254748493e-06, "loss": 1.8603, "step": 13125 }, { "epoch": 0.7041845493562232, "grad_norm": 0.4765625, "learning_rate": 4.840644735282642e-06, "loss": 2.4872, "step": 13126 }, { "epoch": 0.7042381974248927, "grad_norm": 0.54296875, "learning_rate": 4.84061421299022e-06, "loss": 2.2984, "step": 13127 }, { "epoch": 0.7042918454935623, "grad_norm": 0.427734375, "learning_rate": 4.840583687871264e-06, "loss": 1.9798, "step": 13128 }, { "epoch": 0.7043454935622318, "grad_norm": 0.515625, "learning_rate": 4.8405531599258094e-06, "loss": 2.3693, "step": 13129 }, { "epoch": 0.7043991416309013, "grad_norm": 0.56640625, "learning_rate": 4.840522629153894e-06, "loss": 2.2607, "step": 13130 }, { "epoch": 0.7044527896995708, "grad_norm": 0.431640625, "learning_rate": 4.840492095555556e-06, "loss": 2.1643, "step": 13131 }, { "epoch": 0.7045064377682403, "grad_norm": 0.375, "learning_rate": 4.84046155913083e-06, "loss": 2.1832, "step": 13132 }, { "epoch": 0.7045600858369099, "grad_norm": 0.423828125, "learning_rate": 4.840431019879754e-06, "loss": 2.2281, "step": 13133 }, { "epoch": 0.7046137339055794, "grad_norm": 0.45703125, "learning_rate": 4.840400477802365e-06, "loss": 2.2341, "step": 13134 }, { "epoch": 0.7046673819742489, "grad_norm": 0.5390625, "learning_rate": 4.840369932898699e-06, "loss": 1.3801, "step": 13135 }, { "epoch": 0.7047210300429184, "grad_norm": 0.443359375, "learning_rate": 4.840339385168794e-06, "loss": 2.1717, "step": 13136 }, { "epoch": 0.704774678111588, "grad_norm": 0.498046875, "learning_rate": 4.840308834612686e-06, "loss": 2.2054, "step": 13137 }, { "epoch": 0.7048283261802575, "grad_norm": 0.427734375, "learning_rate": 4.840278281230413e-06, "loss": 2.249, "step": 13138 }, { "epoch": 0.7048819742489271, "grad_norm": 0.498046875, "learning_rate": 4.84024772502201e-06, "loss": 2.3785, "step": 13139 }, { "epoch": 0.7049356223175965, "grad_norm": 0.4921875, "learning_rate": 4.840217165987516e-06, "loss": 2.1667, "step": 13140 }, { "epoch": 0.7049892703862661, "grad_norm": 0.4453125, "learning_rate": 4.840186604126966e-06, "loss": 2.2209, "step": 13141 }, { "epoch": 0.7050429184549356, "grad_norm": 0.453125, "learning_rate": 4.840156039440399e-06, "loss": 2.3443, "step": 13142 }, { "epoch": 0.7050965665236052, "grad_norm": 1.0078125, "learning_rate": 4.84012547192785e-06, "loss": 2.109, "step": 13143 }, { "epoch": 0.7051502145922747, "grad_norm": 0.416015625, "learning_rate": 4.8400949015893576e-06, "loss": 2.2371, "step": 13144 }, { "epoch": 0.7052038626609443, "grad_norm": 0.474609375, "learning_rate": 4.840064328424958e-06, "loss": 2.2887, "step": 13145 }, { "epoch": 0.7052575107296137, "grad_norm": 0.451171875, "learning_rate": 4.840033752434687e-06, "loss": 2.4313, "step": 13146 }, { "epoch": 0.7053111587982832, "grad_norm": 0.458984375, "learning_rate": 4.840003173618583e-06, "loss": 1.3412, "step": 13147 }, { "epoch": 0.7053648068669528, "grad_norm": 0.40625, "learning_rate": 4.839972591976682e-06, "loss": 1.9295, "step": 13148 }, { "epoch": 0.7054184549356223, "grad_norm": 0.390625, "learning_rate": 4.8399420075090215e-06, "loss": 2.2655, "step": 13149 }, { "epoch": 0.7054721030042919, "grad_norm": 0.53515625, "learning_rate": 4.839911420215638e-06, "loss": 2.5736, "step": 13150 }, { "epoch": 0.7055257510729613, "grad_norm": 0.451171875, "learning_rate": 4.8398808300965695e-06, "loss": 2.163, "step": 13151 }, { "epoch": 0.7055793991416309, "grad_norm": 0.59375, "learning_rate": 4.839850237151851e-06, "loss": 2.1744, "step": 13152 }, { "epoch": 0.7056330472103004, "grad_norm": 0.51171875, "learning_rate": 4.839819641381522e-06, "loss": 2.4633, "step": 13153 }, { "epoch": 0.70568669527897, "grad_norm": 0.494140625, "learning_rate": 4.839789042785617e-06, "loss": 2.2276, "step": 13154 }, { "epoch": 0.7057403433476395, "grad_norm": 0.474609375, "learning_rate": 4.839758441364174e-06, "loss": 2.4222, "step": 13155 }, { "epoch": 0.705793991416309, "grad_norm": 0.427734375, "learning_rate": 4.83972783711723e-06, "loss": 2.1978, "step": 13156 }, { "epoch": 0.7058476394849785, "grad_norm": 0.447265625, "learning_rate": 4.839697230044822e-06, "loss": 2.5406, "step": 13157 }, { "epoch": 0.7059012875536481, "grad_norm": 0.462890625, "learning_rate": 4.8396666201469865e-06, "loss": 1.5295, "step": 13158 }, { "epoch": 0.7059549356223176, "grad_norm": 0.38671875, "learning_rate": 4.839636007423761e-06, "loss": 2.2326, "step": 13159 }, { "epoch": 0.7060085836909872, "grad_norm": 1.5390625, "learning_rate": 4.839605391875183e-06, "loss": 2.3679, "step": 13160 }, { "epoch": 0.7060622317596567, "grad_norm": 0.49609375, "learning_rate": 4.839574773501289e-06, "loss": 2.5879, "step": 13161 }, { "epoch": 0.7061158798283261, "grad_norm": 0.51953125, "learning_rate": 4.839544152302115e-06, "loss": 2.1864, "step": 13162 }, { "epoch": 0.7061695278969957, "grad_norm": 0.41796875, "learning_rate": 4.839513528277698e-06, "loss": 2.3425, "step": 13163 }, { "epoch": 0.7062231759656652, "grad_norm": 0.546875, "learning_rate": 4.839482901428077e-06, "loss": 2.1047, "step": 13164 }, { "epoch": 0.7062768240343348, "grad_norm": 0.52734375, "learning_rate": 4.839452271753287e-06, "loss": 2.3252, "step": 13165 }, { "epoch": 0.7063304721030043, "grad_norm": 0.4375, "learning_rate": 4.839421639253365e-06, "loss": 2.4954, "step": 13166 }, { "epoch": 0.7063841201716738, "grad_norm": 0.453125, "learning_rate": 4.83939100392835e-06, "loss": 2.4642, "step": 13167 }, { "epoch": 0.7064377682403433, "grad_norm": 0.48828125, "learning_rate": 4.839360365778277e-06, "loss": 2.2634, "step": 13168 }, { "epoch": 0.7064914163090129, "grad_norm": 0.376953125, "learning_rate": 4.839329724803183e-06, "loss": 2.4843, "step": 13169 }, { "epoch": 0.7065450643776824, "grad_norm": 0.38671875, "learning_rate": 4.839299081003107e-06, "loss": 2.1934, "step": 13170 }, { "epoch": 0.706598712446352, "grad_norm": 0.42578125, "learning_rate": 4.839268434378084e-06, "loss": 2.1523, "step": 13171 }, { "epoch": 0.7066523605150214, "grad_norm": 0.42578125, "learning_rate": 4.839237784928152e-06, "loss": 1.9449, "step": 13172 }, { "epoch": 0.706706008583691, "grad_norm": 0.494140625, "learning_rate": 4.839207132653347e-06, "loss": 2.4337, "step": 13173 }, { "epoch": 0.7067596566523605, "grad_norm": 0.4296875, "learning_rate": 4.8391764775537074e-06, "loss": 2.2556, "step": 13174 }, { "epoch": 0.70681330472103, "grad_norm": 0.478515625, "learning_rate": 4.839145819629269e-06, "loss": 2.181, "step": 13175 }, { "epoch": 0.7068669527896996, "grad_norm": 0.55078125, "learning_rate": 4.83911515888007e-06, "loss": 2.4915, "step": 13176 }, { "epoch": 0.706920600858369, "grad_norm": 0.56640625, "learning_rate": 4.839084495306146e-06, "loss": 2.3079, "step": 13177 }, { "epoch": 0.7069742489270386, "grad_norm": 0.478515625, "learning_rate": 4.8390538289075354e-06, "loss": 1.5921, "step": 13178 }, { "epoch": 0.7070278969957081, "grad_norm": 0.431640625, "learning_rate": 4.839023159684274e-06, "loss": 2.2194, "step": 13179 }, { "epoch": 0.7070815450643777, "grad_norm": 0.439453125, "learning_rate": 4.8389924876364005e-06, "loss": 2.3039, "step": 13180 }, { "epoch": 0.7071351931330472, "grad_norm": 0.46875, "learning_rate": 4.838961812763949e-06, "loss": 2.4356, "step": 13181 }, { "epoch": 0.7071888412017168, "grad_norm": 0.44921875, "learning_rate": 4.838931135066961e-06, "loss": 2.403, "step": 13182 }, { "epoch": 0.7072424892703862, "grad_norm": 0.46484375, "learning_rate": 4.838900454545469e-06, "loss": 1.5847, "step": 13183 }, { "epoch": 0.7072961373390558, "grad_norm": 0.486328125, "learning_rate": 4.838869771199513e-06, "loss": 2.3879, "step": 13184 }, { "epoch": 0.7073497854077253, "grad_norm": 0.435546875, "learning_rate": 4.838839085029129e-06, "loss": 2.1656, "step": 13185 }, { "epoch": 0.7074034334763949, "grad_norm": 1.078125, "learning_rate": 4.838808396034354e-06, "loss": 2.4373, "step": 13186 }, { "epoch": 0.7074570815450644, "grad_norm": 0.5078125, "learning_rate": 4.838777704215225e-06, "loss": 1.6554, "step": 13187 }, { "epoch": 0.707510729613734, "grad_norm": 0.421875, "learning_rate": 4.838747009571779e-06, "loss": 2.408, "step": 13188 }, { "epoch": 0.7075643776824034, "grad_norm": 0.4765625, "learning_rate": 4.8387163121040544e-06, "loss": 2.1601, "step": 13189 }, { "epoch": 0.7076180257510729, "grad_norm": 0.482421875, "learning_rate": 4.8386856118120864e-06, "loss": 2.1976, "step": 13190 }, { "epoch": 0.7076716738197425, "grad_norm": 0.51171875, "learning_rate": 4.838654908695913e-06, "loss": 2.2599, "step": 13191 }, { "epoch": 0.707725321888412, "grad_norm": 0.453125, "learning_rate": 4.838624202755571e-06, "loss": 2.3179, "step": 13192 }, { "epoch": 0.7077789699570816, "grad_norm": 0.447265625, "learning_rate": 4.838593493991099e-06, "loss": 2.376, "step": 13193 }, { "epoch": 0.707832618025751, "grad_norm": 0.49609375, "learning_rate": 4.838562782402531e-06, "loss": 2.4011, "step": 13194 }, { "epoch": 0.7078862660944206, "grad_norm": 0.51171875, "learning_rate": 4.838532067989907e-06, "loss": 2.4036, "step": 13195 }, { "epoch": 0.7079399141630901, "grad_norm": 0.7421875, "learning_rate": 4.838501350753262e-06, "loss": 2.3763, "step": 13196 }, { "epoch": 0.7079935622317597, "grad_norm": 0.578125, "learning_rate": 4.838470630692634e-06, "loss": 2.4551, "step": 13197 }, { "epoch": 0.7080472103004292, "grad_norm": 0.45703125, "learning_rate": 4.838439907808061e-06, "loss": 1.6532, "step": 13198 }, { "epoch": 0.7081008583690988, "grad_norm": 0.46484375, "learning_rate": 4.838409182099579e-06, "loss": 2.371, "step": 13199 }, { "epoch": 0.7081545064377682, "grad_norm": 0.43359375, "learning_rate": 4.838378453567225e-06, "loss": 2.3581, "step": 13200 }, { "epoch": 0.7082081545064378, "grad_norm": 0.41015625, "learning_rate": 4.838347722211037e-06, "loss": 2.4372, "step": 13201 }, { "epoch": 0.7082618025751073, "grad_norm": 0.4609375, "learning_rate": 4.83831698803105e-06, "loss": 2.3341, "step": 13202 }, { "epoch": 0.7083154506437769, "grad_norm": 0.46875, "learning_rate": 4.838286251027303e-06, "loss": 2.3492, "step": 13203 }, { "epoch": 0.7083690987124464, "grad_norm": 0.44140625, "learning_rate": 4.8382555111998345e-06, "loss": 2.4038, "step": 13204 }, { "epoch": 0.7084227467811158, "grad_norm": 0.388671875, "learning_rate": 4.838224768548678e-06, "loss": 2.1305, "step": 13205 }, { "epoch": 0.7084763948497854, "grad_norm": 0.4296875, "learning_rate": 4.838194023073874e-06, "loss": 1.997, "step": 13206 }, { "epoch": 0.7085300429184549, "grad_norm": 0.431640625, "learning_rate": 4.838163274775457e-06, "loss": 2.1748, "step": 13207 }, { "epoch": 0.7085836909871245, "grad_norm": 0.4375, "learning_rate": 4.838132523653466e-06, "loss": 2.3011, "step": 13208 }, { "epoch": 0.708637339055794, "grad_norm": 0.482421875, "learning_rate": 4.838101769707937e-06, "loss": 2.1979, "step": 13209 }, { "epoch": 0.7086909871244635, "grad_norm": 0.5390625, "learning_rate": 4.838071012938908e-06, "loss": 2.3135, "step": 13210 }, { "epoch": 0.708744635193133, "grad_norm": 0.4375, "learning_rate": 4.838040253346415e-06, "loss": 2.3451, "step": 13211 }, { "epoch": 0.7087982832618026, "grad_norm": 0.45703125, "learning_rate": 4.838009490930496e-06, "loss": 2.3695, "step": 13212 }, { "epoch": 0.7088519313304721, "grad_norm": 0.4921875, "learning_rate": 4.837978725691188e-06, "loss": 2.334, "step": 13213 }, { "epoch": 0.7089055793991417, "grad_norm": 0.5234375, "learning_rate": 4.837947957628528e-06, "loss": 2.1642, "step": 13214 }, { "epoch": 0.7089592274678111, "grad_norm": 0.486328125, "learning_rate": 4.837917186742553e-06, "loss": 2.4229, "step": 13215 }, { "epoch": 0.7090128755364807, "grad_norm": 0.4296875, "learning_rate": 4.8378864130333016e-06, "loss": 2.2436, "step": 13216 }, { "epoch": 0.7090665236051502, "grad_norm": 0.4921875, "learning_rate": 4.837855636500809e-06, "loss": 2.2395, "step": 13217 }, { "epoch": 0.7091201716738197, "grad_norm": 1.28125, "learning_rate": 4.837824857145114e-06, "loss": 2.4306, "step": 13218 }, { "epoch": 0.7091738197424893, "grad_norm": 0.50390625, "learning_rate": 4.837794074966252e-06, "loss": 2.2364, "step": 13219 }, { "epoch": 0.7092274678111588, "grad_norm": 0.431640625, "learning_rate": 4.837763289964261e-06, "loss": 2.4481, "step": 13220 }, { "epoch": 0.7092811158798283, "grad_norm": 0.41015625, "learning_rate": 4.837732502139179e-06, "loss": 2.3769, "step": 13221 }, { "epoch": 0.7093347639484978, "grad_norm": 0.427734375, "learning_rate": 4.837701711491042e-06, "loss": 2.3581, "step": 13222 }, { "epoch": 0.7093884120171674, "grad_norm": 0.466796875, "learning_rate": 4.8376709180198875e-06, "loss": 2.2127, "step": 13223 }, { "epoch": 0.7094420600858369, "grad_norm": 0.8515625, "learning_rate": 4.837640121725754e-06, "loss": 1.7498, "step": 13224 }, { "epoch": 0.7094957081545065, "grad_norm": 0.44140625, "learning_rate": 4.837609322608676e-06, "loss": 2.5095, "step": 13225 }, { "epoch": 0.7095493562231759, "grad_norm": 0.47265625, "learning_rate": 4.837578520668693e-06, "loss": 2.2571, "step": 13226 }, { "epoch": 0.7096030042918455, "grad_norm": 0.5859375, "learning_rate": 4.837547715905842e-06, "loss": 2.6914, "step": 13227 }, { "epoch": 0.709656652360515, "grad_norm": 0.79296875, "learning_rate": 4.837516908320159e-06, "loss": 2.1805, "step": 13228 }, { "epoch": 0.7097103004291846, "grad_norm": 0.5390625, "learning_rate": 4.837486097911681e-06, "loss": 2.4225, "step": 13229 }, { "epoch": 0.7097639484978541, "grad_norm": 0.4453125, "learning_rate": 4.837455284680447e-06, "loss": 2.1587, "step": 13230 }, { "epoch": 0.7098175965665237, "grad_norm": 0.416015625, "learning_rate": 4.837424468626493e-06, "loss": 2.1864, "step": 13231 }, { "epoch": 0.7098712446351931, "grad_norm": 0.384765625, "learning_rate": 4.837393649749856e-06, "loss": 2.4066, "step": 13232 }, { "epoch": 0.7099248927038626, "grad_norm": 0.4375, "learning_rate": 4.8373628280505745e-06, "loss": 2.2481, "step": 13233 }, { "epoch": 0.7099785407725322, "grad_norm": 0.458984375, "learning_rate": 4.8373320035286845e-06, "loss": 2.0951, "step": 13234 }, { "epoch": 0.7100321888412017, "grad_norm": 0.4140625, "learning_rate": 4.837301176184224e-06, "loss": 2.4905, "step": 13235 }, { "epoch": 0.7100858369098713, "grad_norm": 0.54296875, "learning_rate": 4.8372703460172295e-06, "loss": 2.3645, "step": 13236 }, { "epoch": 0.7101394849785407, "grad_norm": 0.439453125, "learning_rate": 4.837239513027738e-06, "loss": 2.2793, "step": 13237 }, { "epoch": 0.7101931330472103, "grad_norm": 0.392578125, "learning_rate": 4.8372086772157885e-06, "loss": 2.0872, "step": 13238 }, { "epoch": 0.7102467811158798, "grad_norm": 0.419921875, "learning_rate": 4.8371778385814164e-06, "loss": 2.0936, "step": 13239 }, { "epoch": 0.7103004291845494, "grad_norm": 0.416015625, "learning_rate": 4.8371469971246595e-06, "loss": 2.2724, "step": 13240 }, { "epoch": 0.7103540772532189, "grad_norm": 0.46875, "learning_rate": 4.837116152845555e-06, "loss": 2.2577, "step": 13241 }, { "epoch": 0.7104077253218885, "grad_norm": 0.494140625, "learning_rate": 4.837085305744141e-06, "loss": 2.2883, "step": 13242 }, { "epoch": 0.7104613733905579, "grad_norm": 0.44921875, "learning_rate": 4.837054455820453e-06, "loss": 2.4285, "step": 13243 }, { "epoch": 0.7105150214592275, "grad_norm": 0.48046875, "learning_rate": 4.83702360307453e-06, "loss": 2.4403, "step": 13244 }, { "epoch": 0.710568669527897, "grad_norm": 0.4140625, "learning_rate": 4.8369927475064085e-06, "loss": 1.8247, "step": 13245 }, { "epoch": 0.7106223175965666, "grad_norm": 0.61328125, "learning_rate": 4.836961889116125e-06, "loss": 2.3418, "step": 13246 }, { "epoch": 0.710675965665236, "grad_norm": 0.53125, "learning_rate": 4.836931027903718e-06, "loss": 2.3362, "step": 13247 }, { "epoch": 0.7107296137339055, "grad_norm": 0.466796875, "learning_rate": 4.8369001638692244e-06, "loss": 2.4442, "step": 13248 }, { "epoch": 0.7107832618025751, "grad_norm": 0.578125, "learning_rate": 4.836869297012683e-06, "loss": 2.4225, "step": 13249 }, { "epoch": 0.7108369098712446, "grad_norm": 0.87890625, "learning_rate": 4.836838427334127e-06, "loss": 2.2148, "step": 13250 }, { "epoch": 0.7108905579399142, "grad_norm": 0.462890625, "learning_rate": 4.836807554833597e-06, "loss": 2.4094, "step": 13251 }, { "epoch": 0.7109442060085837, "grad_norm": 0.55859375, "learning_rate": 4.83677667951113e-06, "loss": 2.0893, "step": 13252 }, { "epoch": 0.7109978540772532, "grad_norm": 0.447265625, "learning_rate": 4.836745801366762e-06, "loss": 2.2455, "step": 13253 }, { "epoch": 0.7110515021459227, "grad_norm": 0.765625, "learning_rate": 4.836714920400531e-06, "loss": 2.287, "step": 13254 }, { "epoch": 0.7111051502145923, "grad_norm": 0.478515625, "learning_rate": 4.836684036612476e-06, "loss": 2.4615, "step": 13255 }, { "epoch": 0.7111587982832618, "grad_norm": 0.4140625, "learning_rate": 4.83665315000263e-06, "loss": 2.1939, "step": 13256 }, { "epoch": 0.7112124463519314, "grad_norm": 0.390625, "learning_rate": 4.836622260571035e-06, "loss": 1.9236, "step": 13257 }, { "epoch": 0.7112660944206008, "grad_norm": 0.41015625, "learning_rate": 4.836591368317726e-06, "loss": 2.4249, "step": 13258 }, { "epoch": 0.7113197424892704, "grad_norm": 0.490234375, "learning_rate": 4.836560473242739e-06, "loss": 2.6043, "step": 13259 }, { "epoch": 0.7113733905579399, "grad_norm": 0.48828125, "learning_rate": 4.836529575346114e-06, "loss": 2.0595, "step": 13260 }, { "epoch": 0.7114270386266094, "grad_norm": 0.455078125, "learning_rate": 4.836498674627886e-06, "loss": 2.4907, "step": 13261 }, { "epoch": 0.711480686695279, "grad_norm": 0.416015625, "learning_rate": 4.836467771088095e-06, "loss": 2.1509, "step": 13262 }, { "epoch": 0.7115343347639485, "grad_norm": 0.55078125, "learning_rate": 4.836436864726775e-06, "loss": 2.204, "step": 13263 }, { "epoch": 0.711587982832618, "grad_norm": 0.3671875, "learning_rate": 4.836405955543967e-06, "loss": 2.239, "step": 13264 }, { "epoch": 0.7116416309012875, "grad_norm": 0.48046875, "learning_rate": 4.836375043539706e-06, "loss": 2.281, "step": 13265 }, { "epoch": 0.7116952789699571, "grad_norm": 0.447265625, "learning_rate": 4.836344128714029e-06, "loss": 2.2542, "step": 13266 }, { "epoch": 0.7117489270386266, "grad_norm": 0.68359375, "learning_rate": 4.836313211066974e-06, "loss": 2.3517, "step": 13267 }, { "epoch": 0.7118025751072962, "grad_norm": 0.70703125, "learning_rate": 4.836282290598579e-06, "loss": 2.303, "step": 13268 }, { "epoch": 0.7118562231759656, "grad_norm": 0.4296875, "learning_rate": 4.836251367308881e-06, "loss": 2.5071, "step": 13269 }, { "epoch": 0.7119098712446352, "grad_norm": 0.43359375, "learning_rate": 4.836220441197916e-06, "loss": 2.3399, "step": 13270 }, { "epoch": 0.7119635193133047, "grad_norm": 0.470703125, "learning_rate": 4.836189512265723e-06, "loss": 2.4055, "step": 13271 }, { "epoch": 0.7120171673819743, "grad_norm": 0.466796875, "learning_rate": 4.836158580512339e-06, "loss": 2.1513, "step": 13272 }, { "epoch": 0.7120708154506438, "grad_norm": 0.486328125, "learning_rate": 4.8361276459378005e-06, "loss": 2.378, "step": 13273 }, { "epoch": 0.7121244635193134, "grad_norm": 0.384765625, "learning_rate": 4.8360967085421465e-06, "loss": 2.2389, "step": 13274 }, { "epoch": 0.7121781115879828, "grad_norm": 1.703125, "learning_rate": 4.836065768325413e-06, "loss": 2.3935, "step": 13275 }, { "epoch": 0.7122317596566523, "grad_norm": 0.5, "learning_rate": 4.8360348252876375e-06, "loss": 2.2135, "step": 13276 }, { "epoch": 0.7122854077253219, "grad_norm": 0.47265625, "learning_rate": 4.836003879428857e-06, "loss": 2.2611, "step": 13277 }, { "epoch": 0.7123390557939914, "grad_norm": 0.53515625, "learning_rate": 4.835972930749111e-06, "loss": 2.1137, "step": 13278 }, { "epoch": 0.712392703862661, "grad_norm": 0.5625, "learning_rate": 4.835941979248434e-06, "loss": 2.0992, "step": 13279 }, { "epoch": 0.7124463519313304, "grad_norm": 0.625, "learning_rate": 4.835911024926865e-06, "loss": 2.2138, "step": 13280 }, { "epoch": 0.7125, "grad_norm": 0.427734375, "learning_rate": 4.835880067784441e-06, "loss": 2.368, "step": 13281 }, { "epoch": 0.7125536480686695, "grad_norm": 0.41015625, "learning_rate": 4.8358491078212e-06, "loss": 2.3086, "step": 13282 }, { "epoch": 0.7126072961373391, "grad_norm": 0.4140625, "learning_rate": 4.835818145037179e-06, "loss": 2.1532, "step": 13283 }, { "epoch": 0.7126609442060086, "grad_norm": 0.447265625, "learning_rate": 4.835787179432414e-06, "loss": 2.0909, "step": 13284 }, { "epoch": 0.7127145922746781, "grad_norm": 0.423828125, "learning_rate": 4.835756211006945e-06, "loss": 2.1154, "step": 13285 }, { "epoch": 0.7127682403433476, "grad_norm": 0.4609375, "learning_rate": 4.8357252397608074e-06, "loss": 2.2667, "step": 13286 }, { "epoch": 0.7128218884120172, "grad_norm": 0.46484375, "learning_rate": 4.835694265694039e-06, "loss": 2.3038, "step": 13287 }, { "epoch": 0.7128755364806867, "grad_norm": 0.4453125, "learning_rate": 4.835663288806678e-06, "loss": 2.4378, "step": 13288 }, { "epoch": 0.7129291845493563, "grad_norm": 1.046875, "learning_rate": 4.835632309098761e-06, "loss": 2.1537, "step": 13289 }, { "epoch": 0.7129828326180258, "grad_norm": 0.470703125, "learning_rate": 4.835601326570326e-06, "loss": 2.4043, "step": 13290 }, { "epoch": 0.7130364806866952, "grad_norm": 0.498046875, "learning_rate": 4.835570341221409e-06, "loss": 2.5134, "step": 13291 }, { "epoch": 0.7130901287553648, "grad_norm": 0.50390625, "learning_rate": 4.83553935305205e-06, "loss": 1.5813, "step": 13292 }, { "epoch": 0.7131437768240343, "grad_norm": 0.451171875, "learning_rate": 4.835508362062284e-06, "loss": 2.2004, "step": 13293 }, { "epoch": 0.7131974248927039, "grad_norm": 0.341796875, "learning_rate": 4.83547736825215e-06, "loss": 1.6695, "step": 13294 }, { "epoch": 0.7132510729613734, "grad_norm": 0.4453125, "learning_rate": 4.835446371621685e-06, "loss": 2.3222, "step": 13295 }, { "epoch": 0.7133047210300429, "grad_norm": 0.44140625, "learning_rate": 4.835415372170925e-06, "loss": 1.9533, "step": 13296 }, { "epoch": 0.7133583690987124, "grad_norm": 0.390625, "learning_rate": 4.835384369899909e-06, "loss": 2.2421, "step": 13297 }, { "epoch": 0.713412017167382, "grad_norm": 0.49609375, "learning_rate": 4.835353364808675e-06, "loss": 2.1799, "step": 13298 }, { "epoch": 0.7134656652360515, "grad_norm": 2.453125, "learning_rate": 4.835322356897258e-06, "loss": 2.3855, "step": 13299 }, { "epoch": 0.7135193133047211, "grad_norm": 0.4375, "learning_rate": 4.835291346165698e-06, "loss": 2.2717, "step": 13300 }, { "epoch": 0.7135729613733905, "grad_norm": 0.62109375, "learning_rate": 4.835260332614031e-06, "loss": 2.4662, "step": 13301 }, { "epoch": 0.7136266094420601, "grad_norm": 0.439453125, "learning_rate": 4.835229316242296e-06, "loss": 2.2045, "step": 13302 }, { "epoch": 0.7136802575107296, "grad_norm": 0.482421875, "learning_rate": 4.835198297050529e-06, "loss": 2.4156, "step": 13303 }, { "epoch": 0.7137339055793992, "grad_norm": 0.63671875, "learning_rate": 4.835167275038766e-06, "loss": 2.2602, "step": 13304 }, { "epoch": 0.7137875536480687, "grad_norm": 0.41796875, "learning_rate": 4.8351362502070475e-06, "loss": 2.4519, "step": 13305 }, { "epoch": 0.7138412017167381, "grad_norm": 0.49609375, "learning_rate": 4.83510522255541e-06, "loss": 2.3057, "step": 13306 }, { "epoch": 0.7138948497854077, "grad_norm": 0.46484375, "learning_rate": 4.83507419208389e-06, "loss": 2.315, "step": 13307 }, { "epoch": 0.7139484978540772, "grad_norm": 0.48828125, "learning_rate": 4.835043158792526e-06, "loss": 1.5186, "step": 13308 }, { "epoch": 0.7140021459227468, "grad_norm": 0.3984375, "learning_rate": 4.835012122681356e-06, "loss": 2.2927, "step": 13309 }, { "epoch": 0.7140557939914163, "grad_norm": 0.515625, "learning_rate": 4.834981083750415e-06, "loss": 2.4548, "step": 13310 }, { "epoch": 0.7141094420600859, "grad_norm": 0.447265625, "learning_rate": 4.834950041999743e-06, "loss": 2.2832, "step": 13311 }, { "epoch": 0.7141630901287553, "grad_norm": 0.640625, "learning_rate": 4.8349189974293765e-06, "loss": 2.1389, "step": 13312 }, { "epoch": 0.7142167381974249, "grad_norm": 0.4453125, "learning_rate": 4.834887950039353e-06, "loss": 2.1926, "step": 13313 }, { "epoch": 0.7142703862660944, "grad_norm": 0.390625, "learning_rate": 4.83485689982971e-06, "loss": 2.1311, "step": 13314 }, { "epoch": 0.714324034334764, "grad_norm": 0.4140625, "learning_rate": 4.834825846800486e-06, "loss": 2.1684, "step": 13315 }, { "epoch": 0.7143776824034335, "grad_norm": 0.40625, "learning_rate": 4.834794790951716e-06, "loss": 2.2876, "step": 13316 }, { "epoch": 0.714431330472103, "grad_norm": 0.453125, "learning_rate": 4.834763732283439e-06, "loss": 2.3351, "step": 13317 }, { "epoch": 0.7144849785407725, "grad_norm": 0.451171875, "learning_rate": 4.834732670795694e-06, "loss": 2.2773, "step": 13318 }, { "epoch": 0.714538626609442, "grad_norm": 0.51171875, "learning_rate": 4.834701606488517e-06, "loss": 2.4611, "step": 13319 }, { "epoch": 0.7145922746781116, "grad_norm": 0.40625, "learning_rate": 4.834670539361944e-06, "loss": 2.3108, "step": 13320 }, { "epoch": 0.7146459227467811, "grad_norm": 0.48046875, "learning_rate": 4.834639469416016e-06, "loss": 2.4186, "step": 13321 }, { "epoch": 0.7146995708154507, "grad_norm": 0.3515625, "learning_rate": 4.834608396650768e-06, "loss": 1.8473, "step": 13322 }, { "epoch": 0.7147532188841201, "grad_norm": 0.453125, "learning_rate": 4.834577321066238e-06, "loss": 2.2146, "step": 13323 }, { "epoch": 0.7148068669527897, "grad_norm": 0.392578125, "learning_rate": 4.834546242662463e-06, "loss": 2.3335, "step": 13324 }, { "epoch": 0.7148605150214592, "grad_norm": 0.57421875, "learning_rate": 4.834515161439482e-06, "loss": 2.0955, "step": 13325 }, { "epoch": 0.7149141630901288, "grad_norm": 0.4921875, "learning_rate": 4.834484077397331e-06, "loss": 2.265, "step": 13326 }, { "epoch": 0.7149678111587983, "grad_norm": 0.421875, "learning_rate": 4.834452990536049e-06, "loss": 1.6156, "step": 13327 }, { "epoch": 0.7150214592274678, "grad_norm": 0.3984375, "learning_rate": 4.834421900855673e-06, "loss": 2.413, "step": 13328 }, { "epoch": 0.7150751072961373, "grad_norm": 0.515625, "learning_rate": 4.8343908083562396e-06, "loss": 2.2073, "step": 13329 }, { "epoch": 0.7151287553648069, "grad_norm": 0.376953125, "learning_rate": 4.8343597130377875e-06, "loss": 2.3023, "step": 13330 }, { "epoch": 0.7151824034334764, "grad_norm": 0.44921875, "learning_rate": 4.834328614900353e-06, "loss": 2.1491, "step": 13331 }, { "epoch": 0.715236051502146, "grad_norm": 0.41796875, "learning_rate": 4.834297513943976e-06, "loss": 2.3602, "step": 13332 }, { "epoch": 0.7152896995708155, "grad_norm": 0.47265625, "learning_rate": 4.834266410168692e-06, "loss": 2.2802, "step": 13333 }, { "epoch": 0.7153433476394849, "grad_norm": 0.609375, "learning_rate": 4.834235303574539e-06, "loss": 2.2269, "step": 13334 }, { "epoch": 0.7153969957081545, "grad_norm": 0.77734375, "learning_rate": 4.834204194161555e-06, "loss": 2.3514, "step": 13335 }, { "epoch": 0.715450643776824, "grad_norm": 0.46484375, "learning_rate": 4.8341730819297765e-06, "loss": 2.2619, "step": 13336 }, { "epoch": 0.7155042918454936, "grad_norm": 3.0625, "learning_rate": 4.834141966879242e-06, "loss": 2.2514, "step": 13337 }, { "epoch": 0.715557939914163, "grad_norm": 0.5078125, "learning_rate": 4.83411084900999e-06, "loss": 2.3141, "step": 13338 }, { "epoch": 0.7156115879828326, "grad_norm": 0.50390625, "learning_rate": 4.8340797283220565e-06, "loss": 2.6727, "step": 13339 }, { "epoch": 0.7156652360515021, "grad_norm": 0.43359375, "learning_rate": 4.834048604815479e-06, "loss": 2.3489, "step": 13340 }, { "epoch": 0.7157188841201717, "grad_norm": 0.470703125, "learning_rate": 4.8340174784902964e-06, "loss": 2.1859, "step": 13341 }, { "epoch": 0.7157725321888412, "grad_norm": 0.49609375, "learning_rate": 4.833986349346544e-06, "loss": 2.3274, "step": 13342 }, { "epoch": 0.7158261802575108, "grad_norm": 0.447265625, "learning_rate": 4.833955217384263e-06, "loss": 2.0564, "step": 13343 }, { "epoch": 0.7158798283261802, "grad_norm": 0.478515625, "learning_rate": 4.833924082603488e-06, "loss": 2.2118, "step": 13344 }, { "epoch": 0.7159334763948498, "grad_norm": 0.52734375, "learning_rate": 4.833892945004257e-06, "loss": 2.5211, "step": 13345 }, { "epoch": 0.7159871244635193, "grad_norm": 0.474609375, "learning_rate": 4.83386180458661e-06, "loss": 2.342, "step": 13346 }, { "epoch": 0.7160407725321889, "grad_norm": 0.92578125, "learning_rate": 4.8338306613505805e-06, "loss": 2.1839, "step": 13347 }, { "epoch": 0.7160944206008584, "grad_norm": 0.482421875, "learning_rate": 4.833799515296209e-06, "loss": 2.5219, "step": 13348 }, { "epoch": 0.7161480686695278, "grad_norm": 0.443359375, "learning_rate": 4.833768366423533e-06, "loss": 2.2125, "step": 13349 }, { "epoch": 0.7162017167381974, "grad_norm": 0.44921875, "learning_rate": 4.833737214732589e-06, "loss": 2.385, "step": 13350 }, { "epoch": 0.7162553648068669, "grad_norm": 0.46484375, "learning_rate": 4.8337060602234155e-06, "loss": 2.3724, "step": 13351 }, { "epoch": 0.7163090128755365, "grad_norm": 0.50390625, "learning_rate": 4.83367490289605e-06, "loss": 2.5186, "step": 13352 }, { "epoch": 0.716362660944206, "grad_norm": 0.4375, "learning_rate": 4.83364374275053e-06, "loss": 2.2392, "step": 13353 }, { "epoch": 0.7164163090128756, "grad_norm": 0.447265625, "learning_rate": 4.8336125797868925e-06, "loss": 2.8449, "step": 13354 }, { "epoch": 0.716469957081545, "grad_norm": 0.47265625, "learning_rate": 4.8335814140051765e-06, "loss": 2.2814, "step": 13355 }, { "epoch": 0.7165236051502146, "grad_norm": 0.470703125, "learning_rate": 4.8335502454054175e-06, "loss": 2.2993, "step": 13356 }, { "epoch": 0.7165772532188841, "grad_norm": 1.0390625, "learning_rate": 4.833519073987656e-06, "loss": 2.1736, "step": 13357 }, { "epoch": 0.7166309012875537, "grad_norm": 0.59375, "learning_rate": 4.833487899751927e-06, "loss": 1.7813, "step": 13358 }, { "epoch": 0.7166845493562232, "grad_norm": 0.40625, "learning_rate": 4.83345672269827e-06, "loss": 2.412, "step": 13359 }, { "epoch": 0.7167381974248928, "grad_norm": 0.4375, "learning_rate": 4.833425542826722e-06, "loss": 2.317, "step": 13360 }, { "epoch": 0.7167918454935622, "grad_norm": 0.5546875, "learning_rate": 4.83339436013732e-06, "loss": 2.2403, "step": 13361 }, { "epoch": 0.7168454935622317, "grad_norm": 0.482421875, "learning_rate": 4.8333631746301025e-06, "loss": 2.277, "step": 13362 }, { "epoch": 0.7168991416309013, "grad_norm": 0.39453125, "learning_rate": 4.833331986305106e-06, "loss": 2.2708, "step": 13363 }, { "epoch": 0.7169527896995708, "grad_norm": 0.53515625, "learning_rate": 4.833300795162371e-06, "loss": 1.8568, "step": 13364 }, { "epoch": 0.7170064377682404, "grad_norm": 0.419921875, "learning_rate": 4.833269601201931e-06, "loss": 2.281, "step": 13365 }, { "epoch": 0.7170600858369098, "grad_norm": 0.55078125, "learning_rate": 4.833238404423828e-06, "loss": 2.1582, "step": 13366 }, { "epoch": 0.7171137339055794, "grad_norm": 0.458984375, "learning_rate": 4.833207204828097e-06, "loss": 2.4328, "step": 13367 }, { "epoch": 0.7171673819742489, "grad_norm": 0.56640625, "learning_rate": 4.833176002414775e-06, "loss": 2.2704, "step": 13368 }, { "epoch": 0.7172210300429185, "grad_norm": 0.4296875, "learning_rate": 4.833144797183902e-06, "loss": 2.2496, "step": 13369 }, { "epoch": 0.717274678111588, "grad_norm": 0.43359375, "learning_rate": 4.833113589135514e-06, "loss": 2.5104, "step": 13370 }, { "epoch": 0.7173283261802575, "grad_norm": 0.412109375, "learning_rate": 4.833082378269649e-06, "loss": 2.3002, "step": 13371 }, { "epoch": 0.717381974248927, "grad_norm": 0.625, "learning_rate": 4.833051164586346e-06, "loss": 1.4585, "step": 13372 }, { "epoch": 0.7174356223175966, "grad_norm": 0.50390625, "learning_rate": 4.833019948085641e-06, "loss": 2.2905, "step": 13373 }, { "epoch": 0.7174892703862661, "grad_norm": 0.50390625, "learning_rate": 4.832988728767573e-06, "loss": 2.3892, "step": 13374 }, { "epoch": 0.7175429184549357, "grad_norm": 0.490234375, "learning_rate": 4.832957506632179e-06, "loss": 2.151, "step": 13375 }, { "epoch": 0.7175965665236052, "grad_norm": 0.75390625, "learning_rate": 4.832926281679496e-06, "loss": 1.9873, "step": 13376 }, { "epoch": 0.7176502145922746, "grad_norm": 0.69140625, "learning_rate": 4.832895053909563e-06, "loss": 2.3781, "step": 13377 }, { "epoch": 0.7177038626609442, "grad_norm": 0.44921875, "learning_rate": 4.832863823322417e-06, "loss": 2.3976, "step": 13378 }, { "epoch": 0.7177575107296137, "grad_norm": 0.44140625, "learning_rate": 4.832832589918096e-06, "loss": 2.3941, "step": 13379 }, { "epoch": 0.7178111587982833, "grad_norm": 0.486328125, "learning_rate": 4.832801353696638e-06, "loss": 2.6544, "step": 13380 }, { "epoch": 0.7178648068669528, "grad_norm": 0.4921875, "learning_rate": 4.83277011465808e-06, "loss": 2.5178, "step": 13381 }, { "epoch": 0.7179184549356223, "grad_norm": 0.4765625, "learning_rate": 4.832738872802459e-06, "loss": 2.2375, "step": 13382 }, { "epoch": 0.7179721030042918, "grad_norm": 0.41796875, "learning_rate": 4.832707628129816e-06, "loss": 2.2574, "step": 13383 }, { "epoch": 0.7180257510729614, "grad_norm": 0.4921875, "learning_rate": 4.832676380640185e-06, "loss": 2.3792, "step": 13384 }, { "epoch": 0.7180793991416309, "grad_norm": 0.41796875, "learning_rate": 4.832645130333605e-06, "loss": 2.3837, "step": 13385 }, { "epoch": 0.7181330472103005, "grad_norm": 0.49609375, "learning_rate": 4.832613877210115e-06, "loss": 2.2299, "step": 13386 }, { "epoch": 0.7181866952789699, "grad_norm": 0.95703125, "learning_rate": 4.832582621269752e-06, "loss": 2.0621, "step": 13387 }, { "epoch": 0.7182403433476395, "grad_norm": 0.41015625, "learning_rate": 4.832551362512552e-06, "loss": 2.067, "step": 13388 }, { "epoch": 0.718293991416309, "grad_norm": 0.478515625, "learning_rate": 4.832520100938555e-06, "loss": 2.4883, "step": 13389 }, { "epoch": 0.7183476394849786, "grad_norm": 0.484375, "learning_rate": 4.832488836547799e-06, "loss": 2.2722, "step": 13390 }, { "epoch": 0.7184012875536481, "grad_norm": 0.51953125, "learning_rate": 4.832457569340319e-06, "loss": 2.385, "step": 13391 }, { "epoch": 0.7184549356223175, "grad_norm": 0.5234375, "learning_rate": 4.832426299316155e-06, "loss": 2.0329, "step": 13392 }, { "epoch": 0.7185085836909871, "grad_norm": 0.451171875, "learning_rate": 4.832395026475345e-06, "loss": 2.3441, "step": 13393 }, { "epoch": 0.7185622317596566, "grad_norm": 0.41796875, "learning_rate": 4.832363750817925e-06, "loss": 2.3476, "step": 13394 }, { "epoch": 0.7186158798283262, "grad_norm": 0.5078125, "learning_rate": 4.832332472343935e-06, "loss": 1.8784, "step": 13395 }, { "epoch": 0.7186695278969957, "grad_norm": 0.421875, "learning_rate": 4.8323011910534114e-06, "loss": 2.2509, "step": 13396 }, { "epoch": 0.7187231759656653, "grad_norm": 0.53515625, "learning_rate": 4.832269906946391e-06, "loss": 2.4294, "step": 13397 }, { "epoch": 0.7187768240343347, "grad_norm": 0.466796875, "learning_rate": 4.832238620022913e-06, "loss": 2.4037, "step": 13398 }, { "epoch": 0.7188304721030043, "grad_norm": 0.396484375, "learning_rate": 4.832207330283016e-06, "loss": 1.9784, "step": 13399 }, { "epoch": 0.7188841201716738, "grad_norm": 0.435546875, "learning_rate": 4.832176037726736e-06, "loss": 2.2441, "step": 13400 }, { "epoch": 0.7189377682403434, "grad_norm": 0.4921875, "learning_rate": 4.832144742354111e-06, "loss": 2.2934, "step": 13401 }, { "epoch": 0.7189914163090129, "grad_norm": 0.486328125, "learning_rate": 4.83211344416518e-06, "loss": 2.261, "step": 13402 }, { "epoch": 0.7190450643776825, "grad_norm": 0.5546875, "learning_rate": 4.83208214315998e-06, "loss": 2.3135, "step": 13403 }, { "epoch": 0.7190987124463519, "grad_norm": 0.70703125, "learning_rate": 4.832050839338549e-06, "loss": 2.3576, "step": 13404 }, { "epoch": 0.7191523605150214, "grad_norm": 0.341796875, "learning_rate": 4.832019532700924e-06, "loss": 2.2658, "step": 13405 }, { "epoch": 0.719206008583691, "grad_norm": 0.63671875, "learning_rate": 4.831988223247144e-06, "loss": 2.381, "step": 13406 }, { "epoch": 0.7192596566523605, "grad_norm": 0.42578125, "learning_rate": 4.831956910977246e-06, "loss": 2.3713, "step": 13407 }, { "epoch": 0.7193133047210301, "grad_norm": 0.443359375, "learning_rate": 4.8319255958912684e-06, "loss": 2.3356, "step": 13408 }, { "epoch": 0.7193669527896995, "grad_norm": 0.5, "learning_rate": 4.831894277989248e-06, "loss": 2.2173, "step": 13409 }, { "epoch": 0.7194206008583691, "grad_norm": 0.412109375, "learning_rate": 4.831862957271224e-06, "loss": 2.2113, "step": 13410 }, { "epoch": 0.7194742489270386, "grad_norm": 0.4140625, "learning_rate": 4.831831633737234e-06, "loss": 2.1781, "step": 13411 }, { "epoch": 0.7195278969957082, "grad_norm": 0.53515625, "learning_rate": 4.831800307387315e-06, "loss": 2.3924, "step": 13412 }, { "epoch": 0.7195815450643777, "grad_norm": 0.4921875, "learning_rate": 4.831768978221504e-06, "loss": 2.5603, "step": 13413 }, { "epoch": 0.7196351931330472, "grad_norm": 0.482421875, "learning_rate": 4.831737646239841e-06, "loss": 2.1959, "step": 13414 }, { "epoch": 0.7196888412017167, "grad_norm": 0.54296875, "learning_rate": 4.8317063114423625e-06, "loss": 2.2681, "step": 13415 }, { "epoch": 0.7197424892703863, "grad_norm": 0.47265625, "learning_rate": 4.831674973829108e-06, "loss": 2.3578, "step": 13416 }, { "epoch": 0.7197961373390558, "grad_norm": 0.6015625, "learning_rate": 4.831643633400113e-06, "loss": 2.6003, "step": 13417 }, { "epoch": 0.7198497854077254, "grad_norm": 0.4609375, "learning_rate": 4.831612290155416e-06, "loss": 2.3887, "step": 13418 }, { "epoch": 0.7199034334763948, "grad_norm": 0.4375, "learning_rate": 4.8315809440950565e-06, "loss": 2.4121, "step": 13419 }, { "epoch": 0.7199570815450643, "grad_norm": 2.140625, "learning_rate": 4.8315495952190705e-06, "loss": 2.1648, "step": 13420 }, { "epoch": 0.7200107296137339, "grad_norm": 0.451171875, "learning_rate": 4.831518243527496e-06, "loss": 2.4392, "step": 13421 }, { "epoch": 0.7200643776824034, "grad_norm": 0.404296875, "learning_rate": 4.831486889020372e-06, "loss": 2.1508, "step": 13422 }, { "epoch": 0.720118025751073, "grad_norm": 0.453125, "learning_rate": 4.831455531697735e-06, "loss": 1.9795, "step": 13423 }, { "epoch": 0.7201716738197425, "grad_norm": 0.5625, "learning_rate": 4.831424171559624e-06, "loss": 2.4914, "step": 13424 }, { "epoch": 0.720225321888412, "grad_norm": 0.5234375, "learning_rate": 4.831392808606077e-06, "loss": 2.2944, "step": 13425 }, { "epoch": 0.7202789699570815, "grad_norm": 0.5234375, "learning_rate": 4.83136144283713e-06, "loss": 2.4007, "step": 13426 }, { "epoch": 0.7203326180257511, "grad_norm": 0.427734375, "learning_rate": 4.8313300742528235e-06, "loss": 2.3982, "step": 13427 }, { "epoch": 0.7203862660944206, "grad_norm": 0.41796875, "learning_rate": 4.831298702853193e-06, "loss": 2.054, "step": 13428 }, { "epoch": 0.7204399141630902, "grad_norm": 0.435546875, "learning_rate": 4.831267328638278e-06, "loss": 2.1408, "step": 13429 }, { "epoch": 0.7204935622317596, "grad_norm": 0.50390625, "learning_rate": 4.831235951608116e-06, "loss": 2.2727, "step": 13430 }, { "epoch": 0.7205472103004292, "grad_norm": 0.443359375, "learning_rate": 4.831204571762745e-06, "loss": 2.3934, "step": 13431 }, { "epoch": 0.7206008583690987, "grad_norm": 0.41015625, "learning_rate": 4.831173189102202e-06, "loss": 2.2639, "step": 13432 }, { "epoch": 0.7206545064377683, "grad_norm": 0.365234375, "learning_rate": 4.831141803626526e-06, "loss": 2.4264, "step": 13433 }, { "epoch": 0.7207081545064378, "grad_norm": 0.40234375, "learning_rate": 4.831110415335753e-06, "loss": 2.3164, "step": 13434 }, { "epoch": 0.7207618025751072, "grad_norm": 0.54296875, "learning_rate": 4.831079024229924e-06, "loss": 2.2811, "step": 13435 }, { "epoch": 0.7208154506437768, "grad_norm": 0.443359375, "learning_rate": 4.831047630309075e-06, "loss": 2.3366, "step": 13436 }, { "epoch": 0.7208690987124463, "grad_norm": 0.337890625, "learning_rate": 4.831016233573244e-06, "loss": 1.9628, "step": 13437 }, { "epoch": 0.7209227467811159, "grad_norm": 0.45703125, "learning_rate": 4.830984834022468e-06, "loss": 2.4711, "step": 13438 }, { "epoch": 0.7209763948497854, "grad_norm": 0.4609375, "learning_rate": 4.830953431656788e-06, "loss": 2.3322, "step": 13439 }, { "epoch": 0.721030042918455, "grad_norm": 0.427734375, "learning_rate": 4.830922026476238e-06, "loss": 2.4088, "step": 13440 }, { "epoch": 0.7210836909871244, "grad_norm": 0.46484375, "learning_rate": 4.830890618480859e-06, "loss": 2.3057, "step": 13441 }, { "epoch": 0.721137339055794, "grad_norm": 0.3671875, "learning_rate": 4.830859207670687e-06, "loss": 1.8765, "step": 13442 }, { "epoch": 0.7211909871244635, "grad_norm": 0.48046875, "learning_rate": 4.830827794045761e-06, "loss": 2.3021, "step": 13443 }, { "epoch": 0.7212446351931331, "grad_norm": 0.42578125, "learning_rate": 4.8307963776061184e-06, "loss": 2.381, "step": 13444 }, { "epoch": 0.7212982832618026, "grad_norm": 0.47265625, "learning_rate": 4.830764958351798e-06, "loss": 2.3565, "step": 13445 }, { "epoch": 0.7213519313304722, "grad_norm": 0.62109375, "learning_rate": 4.830733536282837e-06, "loss": 1.9682, "step": 13446 }, { "epoch": 0.7214055793991416, "grad_norm": 0.42578125, "learning_rate": 4.830702111399273e-06, "loss": 2.3161, "step": 13447 }, { "epoch": 0.7214592274678111, "grad_norm": 0.494140625, "learning_rate": 4.830670683701144e-06, "loss": 2.3072, "step": 13448 }, { "epoch": 0.7215128755364807, "grad_norm": 0.486328125, "learning_rate": 4.830639253188489e-06, "loss": 2.3511, "step": 13449 }, { "epoch": 0.7215665236051502, "grad_norm": 0.40625, "learning_rate": 4.830607819861346e-06, "loss": 2.2254, "step": 13450 }, { "epoch": 0.7216201716738198, "grad_norm": 1.4375, "learning_rate": 4.8305763837197505e-06, "loss": 2.4117, "step": 13451 }, { "epoch": 0.7216738197424892, "grad_norm": 0.50390625, "learning_rate": 4.8305449447637435e-06, "loss": 2.4766, "step": 13452 }, { "epoch": 0.7217274678111588, "grad_norm": 0.470703125, "learning_rate": 4.830513502993361e-06, "loss": 2.3023, "step": 13453 }, { "epoch": 0.7217811158798283, "grad_norm": 5.15625, "learning_rate": 4.830482058408642e-06, "loss": 2.2015, "step": 13454 }, { "epoch": 0.7218347639484979, "grad_norm": 0.515625, "learning_rate": 4.830450611009624e-06, "loss": 2.3418, "step": 13455 }, { "epoch": 0.7218884120171674, "grad_norm": 0.423828125, "learning_rate": 4.830419160796346e-06, "loss": 2.5267, "step": 13456 }, { "epoch": 0.721942060085837, "grad_norm": 0.40625, "learning_rate": 4.830387707768844e-06, "loss": 2.2745, "step": 13457 }, { "epoch": 0.7219957081545064, "grad_norm": 0.56640625, "learning_rate": 4.830356251927157e-06, "loss": 2.188, "step": 13458 }, { "epoch": 0.722049356223176, "grad_norm": 0.57421875, "learning_rate": 4.830324793271324e-06, "loss": 2.3976, "step": 13459 }, { "epoch": 0.7221030042918455, "grad_norm": 0.439453125, "learning_rate": 4.830293331801382e-06, "loss": 2.2292, "step": 13460 }, { "epoch": 0.7221566523605151, "grad_norm": 0.439453125, "learning_rate": 4.8302618675173686e-06, "loss": 2.3081, "step": 13461 }, { "epoch": 0.7222103004291845, "grad_norm": 0.458984375, "learning_rate": 4.830230400419322e-06, "loss": 2.1054, "step": 13462 }, { "epoch": 0.722263948497854, "grad_norm": 0.45703125, "learning_rate": 4.8301989305072805e-06, "loss": 2.4738, "step": 13463 }, { "epoch": 0.7223175965665236, "grad_norm": 0.4375, "learning_rate": 4.830167457781282e-06, "loss": 2.3119, "step": 13464 }, { "epoch": 0.7223712446351931, "grad_norm": 0.796875, "learning_rate": 4.8301359822413654e-06, "loss": 2.3655, "step": 13465 }, { "epoch": 0.7224248927038627, "grad_norm": 0.482421875, "learning_rate": 4.8301045038875665e-06, "loss": 2.3517, "step": 13466 }, { "epoch": 0.7224785407725322, "grad_norm": 0.369140625, "learning_rate": 4.830073022719926e-06, "loss": 2.3565, "step": 13467 }, { "epoch": 0.7225321888412017, "grad_norm": 0.427734375, "learning_rate": 4.830041538738479e-06, "loss": 2.7301, "step": 13468 }, { "epoch": 0.7225858369098712, "grad_norm": 0.71484375, "learning_rate": 4.8300100519432665e-06, "loss": 2.3037, "step": 13469 }, { "epoch": 0.7226394849785408, "grad_norm": 0.4609375, "learning_rate": 4.829978562334324e-06, "loss": 2.4617, "step": 13470 }, { "epoch": 0.7226931330472103, "grad_norm": 0.52734375, "learning_rate": 4.8299470699116926e-06, "loss": 1.9166, "step": 13471 }, { "epoch": 0.7227467811158799, "grad_norm": 0.5078125, "learning_rate": 4.829915574675406e-06, "loss": 1.9651, "step": 13472 }, { "epoch": 0.7228004291845493, "grad_norm": 0.5, "learning_rate": 4.829884076625506e-06, "loss": 2.295, "step": 13473 }, { "epoch": 0.7228540772532189, "grad_norm": 0.45703125, "learning_rate": 4.8298525757620295e-06, "loss": 2.3745, "step": 13474 }, { "epoch": 0.7229077253218884, "grad_norm": 0.59765625, "learning_rate": 4.829821072085013e-06, "loss": 2.541, "step": 13475 }, { "epoch": 0.722961373390558, "grad_norm": 0.44921875, "learning_rate": 4.829789565594497e-06, "loss": 2.101, "step": 13476 }, { "epoch": 0.7230150214592275, "grad_norm": 0.37890625, "learning_rate": 4.829758056290518e-06, "loss": 2.2607, "step": 13477 }, { "epoch": 0.723068669527897, "grad_norm": 0.458984375, "learning_rate": 4.829726544173114e-06, "loss": 2.2183, "step": 13478 }, { "epoch": 0.7231223175965665, "grad_norm": 0.443359375, "learning_rate": 4.8296950292423244e-06, "loss": 2.38, "step": 13479 }, { "epoch": 0.723175965665236, "grad_norm": 0.486328125, "learning_rate": 4.829663511498186e-06, "loss": 2.1703, "step": 13480 }, { "epoch": 0.7232296137339056, "grad_norm": 0.455078125, "learning_rate": 4.829631990940737e-06, "loss": 1.7885, "step": 13481 }, { "epoch": 0.7232832618025751, "grad_norm": 0.50390625, "learning_rate": 4.8296004675700155e-06, "loss": 2.5101, "step": 13482 }, { "epoch": 0.7233369098712447, "grad_norm": 0.388671875, "learning_rate": 4.82956894138606e-06, "loss": 1.4829, "step": 13483 }, { "epoch": 0.7233905579399141, "grad_norm": 0.50390625, "learning_rate": 4.829537412388909e-06, "loss": 2.1688, "step": 13484 }, { "epoch": 0.7234442060085837, "grad_norm": 0.439453125, "learning_rate": 4.829505880578599e-06, "loss": 2.3205, "step": 13485 }, { "epoch": 0.7234978540772532, "grad_norm": 0.455078125, "learning_rate": 4.82947434595517e-06, "loss": 2.2838, "step": 13486 }, { "epoch": 0.7235515021459228, "grad_norm": 0.380859375, "learning_rate": 4.829442808518657e-06, "loss": 2.0849, "step": 13487 }, { "epoch": 0.7236051502145923, "grad_norm": 0.4765625, "learning_rate": 4.829411268269102e-06, "loss": 2.1599, "step": 13488 }, { "epoch": 0.7236587982832619, "grad_norm": 0.53125, "learning_rate": 4.829379725206541e-06, "loss": 2.4598, "step": 13489 }, { "epoch": 0.7237124463519313, "grad_norm": 7.4375, "learning_rate": 4.829348179331011e-06, "loss": 2.2189, "step": 13490 }, { "epoch": 0.7237660944206008, "grad_norm": 0.439453125, "learning_rate": 4.829316630642553e-06, "loss": 2.1711, "step": 13491 }, { "epoch": 0.7238197424892704, "grad_norm": 0.6328125, "learning_rate": 4.829285079141202e-06, "loss": 2.1562, "step": 13492 }, { "epoch": 0.7238733905579399, "grad_norm": 0.43359375, "learning_rate": 4.829253524826999e-06, "loss": 2.2293, "step": 13493 }, { "epoch": 0.7239270386266095, "grad_norm": 0.45703125, "learning_rate": 4.8292219676999804e-06, "loss": 2.3749, "step": 13494 }, { "epoch": 0.7239806866952789, "grad_norm": 0.5078125, "learning_rate": 4.829190407760185e-06, "loss": 2.6038, "step": 13495 }, { "epoch": 0.7240343347639485, "grad_norm": 0.474609375, "learning_rate": 4.8291588450076495e-06, "loss": 2.0086, "step": 13496 }, { "epoch": 0.724087982832618, "grad_norm": 0.58203125, "learning_rate": 4.829127279442413e-06, "loss": 2.5217, "step": 13497 }, { "epoch": 0.7241416309012876, "grad_norm": 0.48046875, "learning_rate": 4.8290957110645145e-06, "loss": 2.136, "step": 13498 }, { "epoch": 0.7241952789699571, "grad_norm": 0.427734375, "learning_rate": 4.829064139873991e-06, "loss": 2.0456, "step": 13499 }, { "epoch": 0.7242489270386266, "grad_norm": 0.482421875, "learning_rate": 4.829032565870881e-06, "loss": 2.4294, "step": 13500 }, { "epoch": 0.7243025751072961, "grad_norm": 0.369140625, "learning_rate": 4.829000989055222e-06, "loss": 2.2927, "step": 13501 }, { "epoch": 0.7243562231759657, "grad_norm": 9.6875, "learning_rate": 4.828969409427053e-06, "loss": 2.2655, "step": 13502 }, { "epoch": 0.7244098712446352, "grad_norm": 0.416015625, "learning_rate": 4.8289378269864125e-06, "loss": 2.2439, "step": 13503 }, { "epoch": 0.7244635193133048, "grad_norm": 0.4375, "learning_rate": 4.828906241733337e-06, "loss": 2.261, "step": 13504 }, { "epoch": 0.7245171673819742, "grad_norm": 6.0, "learning_rate": 4.828874653667866e-06, "loss": 2.3502, "step": 13505 }, { "epoch": 0.7245708154506437, "grad_norm": 0.490234375, "learning_rate": 4.828843062790037e-06, "loss": 2.4487, "step": 13506 }, { "epoch": 0.7246244635193133, "grad_norm": 0.4453125, "learning_rate": 4.828811469099889e-06, "loss": 2.3302, "step": 13507 }, { "epoch": 0.7246781115879828, "grad_norm": 0.431640625, "learning_rate": 4.828779872597459e-06, "loss": 2.336, "step": 13508 }, { "epoch": 0.7247317596566524, "grad_norm": 0.6171875, "learning_rate": 4.828748273282786e-06, "loss": 2.3376, "step": 13509 }, { "epoch": 0.7247854077253219, "grad_norm": 0.5234375, "learning_rate": 4.8287166711559075e-06, "loss": 2.5637, "step": 13510 }, { "epoch": 0.7248390557939914, "grad_norm": 0.66796875, "learning_rate": 4.828685066216862e-06, "loss": 2.1866, "step": 13511 }, { "epoch": 0.7248927038626609, "grad_norm": 0.447265625, "learning_rate": 4.828653458465688e-06, "loss": 2.3394, "step": 13512 }, { "epoch": 0.7249463519313305, "grad_norm": 0.5234375, "learning_rate": 4.828621847902423e-06, "loss": 2.2164, "step": 13513 }, { "epoch": 0.725, "grad_norm": 0.4453125, "learning_rate": 4.828590234527107e-06, "loss": 2.4284, "step": 13514 }, { "epoch": 0.7250536480686696, "grad_norm": 1.234375, "learning_rate": 4.828558618339775e-06, "loss": 2.3854, "step": 13515 }, { "epoch": 0.725107296137339, "grad_norm": 0.54296875, "learning_rate": 4.828526999340467e-06, "loss": 2.2875, "step": 13516 }, { "epoch": 0.7251609442060086, "grad_norm": 0.53515625, "learning_rate": 4.828495377529221e-06, "loss": 2.1942, "step": 13517 }, { "epoch": 0.7252145922746781, "grad_norm": 0.419921875, "learning_rate": 4.8284637529060765e-06, "loss": 2.0996, "step": 13518 }, { "epoch": 0.7252682403433477, "grad_norm": 0.455078125, "learning_rate": 4.8284321254710695e-06, "loss": 2.3366, "step": 13519 }, { "epoch": 0.7253218884120172, "grad_norm": 0.486328125, "learning_rate": 4.828400495224239e-06, "loss": 2.4074, "step": 13520 }, { "epoch": 0.7253755364806866, "grad_norm": 0.5390625, "learning_rate": 4.828368862165623e-06, "loss": 1.3858, "step": 13521 }, { "epoch": 0.7254291845493562, "grad_norm": 0.5078125, "learning_rate": 4.8283372262952614e-06, "loss": 2.4098, "step": 13522 }, { "epoch": 0.7254828326180257, "grad_norm": 0.51953125, "learning_rate": 4.82830558761319e-06, "loss": 2.4467, "step": 13523 }, { "epoch": 0.7255364806866953, "grad_norm": 0.435546875, "learning_rate": 4.828273946119449e-06, "loss": 1.8937, "step": 13524 }, { "epoch": 0.7255901287553648, "grad_norm": 0.451171875, "learning_rate": 4.828242301814075e-06, "loss": 2.2423, "step": 13525 }, { "epoch": 0.7256437768240344, "grad_norm": 0.52734375, "learning_rate": 4.828210654697106e-06, "loss": 2.2859, "step": 13526 }, { "epoch": 0.7256974248927038, "grad_norm": 0.42578125, "learning_rate": 4.828179004768582e-06, "loss": 2.4562, "step": 13527 }, { "epoch": 0.7257510729613734, "grad_norm": 0.3828125, "learning_rate": 4.82814735202854e-06, "loss": 2.0623, "step": 13528 }, { "epoch": 0.7258047210300429, "grad_norm": 0.427734375, "learning_rate": 4.828115696477019e-06, "loss": 2.2038, "step": 13529 }, { "epoch": 0.7258583690987125, "grad_norm": 0.466796875, "learning_rate": 4.828084038114057e-06, "loss": 2.3272, "step": 13530 }, { "epoch": 0.725912017167382, "grad_norm": 0.44140625, "learning_rate": 4.828052376939691e-06, "loss": 2.392, "step": 13531 }, { "epoch": 0.7259656652360515, "grad_norm": 0.482421875, "learning_rate": 4.8280207129539615e-06, "loss": 2.375, "step": 13532 }, { "epoch": 0.726019313304721, "grad_norm": 0.55859375, "learning_rate": 4.827989046156905e-06, "loss": 2.0927, "step": 13533 }, { "epoch": 0.7260729613733906, "grad_norm": 0.5, "learning_rate": 4.8279573765485594e-06, "loss": 2.2254, "step": 13534 }, { "epoch": 0.7261266094420601, "grad_norm": 0.41796875, "learning_rate": 4.827925704128964e-06, "loss": 2.3205, "step": 13535 }, { "epoch": 0.7261802575107296, "grad_norm": 0.4453125, "learning_rate": 4.827894028898157e-06, "loss": 2.4517, "step": 13536 }, { "epoch": 0.7262339055793992, "grad_norm": 0.53125, "learning_rate": 4.8278623508561775e-06, "loss": 2.1989, "step": 13537 }, { "epoch": 0.7262875536480686, "grad_norm": 0.3828125, "learning_rate": 4.827830670003061e-06, "loss": 2.4182, "step": 13538 }, { "epoch": 0.7263412017167382, "grad_norm": 0.470703125, "learning_rate": 4.827798986338849e-06, "loss": 2.5488, "step": 13539 }, { "epoch": 0.7263948497854077, "grad_norm": 0.474609375, "learning_rate": 4.827767299863577e-06, "loss": 2.5419, "step": 13540 }, { "epoch": 0.7264484978540773, "grad_norm": 0.46875, "learning_rate": 4.827735610577284e-06, "loss": 2.5088, "step": 13541 }, { "epoch": 0.7265021459227468, "grad_norm": 0.44921875, "learning_rate": 4.82770391848001e-06, "loss": 2.3505, "step": 13542 }, { "epoch": 0.7265557939914163, "grad_norm": 0.6328125, "learning_rate": 4.827672223571792e-06, "loss": 2.4434, "step": 13543 }, { "epoch": 0.7266094420600858, "grad_norm": 0.486328125, "learning_rate": 4.827640525852668e-06, "loss": 2.3989, "step": 13544 }, { "epoch": 0.7266630901287554, "grad_norm": 0.46875, "learning_rate": 4.827608825322676e-06, "loss": 2.3429, "step": 13545 }, { "epoch": 0.7267167381974249, "grad_norm": 0.4609375, "learning_rate": 4.827577121981856e-06, "loss": 2.4077, "step": 13546 }, { "epoch": 0.7267703862660945, "grad_norm": 0.9921875, "learning_rate": 4.827545415830244e-06, "loss": 2.481, "step": 13547 }, { "epoch": 0.726824034334764, "grad_norm": 0.345703125, "learning_rate": 4.8275137068678804e-06, "loss": 2.1512, "step": 13548 }, { "epoch": 0.7268776824034334, "grad_norm": 0.41015625, "learning_rate": 4.8274819950948024e-06, "loss": 2.3224, "step": 13549 }, { "epoch": 0.726931330472103, "grad_norm": 0.3984375, "learning_rate": 4.827450280511048e-06, "loss": 2.2012, "step": 13550 }, { "epoch": 0.7269849785407725, "grad_norm": 0.53125, "learning_rate": 4.827418563116656e-06, "loss": 2.1097, "step": 13551 }, { "epoch": 0.7270386266094421, "grad_norm": 0.443359375, "learning_rate": 4.827386842911664e-06, "loss": 2.1827, "step": 13552 }, { "epoch": 0.7270922746781115, "grad_norm": 0.65234375, "learning_rate": 4.827355119896112e-06, "loss": 2.252, "step": 13553 }, { "epoch": 0.7271459227467811, "grad_norm": 5.84375, "learning_rate": 4.827323394070037e-06, "loss": 1.869, "step": 13554 }, { "epoch": 0.7271995708154506, "grad_norm": 0.4609375, "learning_rate": 4.827291665433478e-06, "loss": 1.9135, "step": 13555 }, { "epoch": 0.7272532188841202, "grad_norm": 0.5546875, "learning_rate": 4.827259933986471e-06, "loss": 2.4687, "step": 13556 }, { "epoch": 0.7273068669527897, "grad_norm": 0.48046875, "learning_rate": 4.827228199729058e-06, "loss": 2.3234, "step": 13557 }, { "epoch": 0.7273605150214593, "grad_norm": 0.5078125, "learning_rate": 4.827196462661275e-06, "loss": 2.3801, "step": 13558 }, { "epoch": 0.7274141630901287, "grad_norm": 1.2421875, "learning_rate": 4.827164722783161e-06, "loss": 2.4511, "step": 13559 }, { "epoch": 0.7274678111587983, "grad_norm": 0.4609375, "learning_rate": 4.827132980094754e-06, "loss": 1.8194, "step": 13560 }, { "epoch": 0.7275214592274678, "grad_norm": 0.390625, "learning_rate": 4.827101234596092e-06, "loss": 2.0863, "step": 13561 }, { "epoch": 0.7275751072961374, "grad_norm": 0.462890625, "learning_rate": 4.827069486287214e-06, "loss": 2.2466, "step": 13562 }, { "epoch": 0.7276287553648069, "grad_norm": 0.7109375, "learning_rate": 4.827037735168159e-06, "loss": 2.199, "step": 13563 }, { "epoch": 0.7276824034334763, "grad_norm": 0.427734375, "learning_rate": 4.827005981238963e-06, "loss": 2.2059, "step": 13564 }, { "epoch": 0.7277360515021459, "grad_norm": 0.474609375, "learning_rate": 4.8269742244996675e-06, "loss": 2.2134, "step": 13565 }, { "epoch": 0.7277896995708154, "grad_norm": 0.6328125, "learning_rate": 4.8269424649503085e-06, "loss": 2.3223, "step": 13566 }, { "epoch": 0.727843347639485, "grad_norm": 0.67578125, "learning_rate": 4.8269107025909245e-06, "loss": 2.2327, "step": 13567 }, { "epoch": 0.7278969957081545, "grad_norm": 0.47265625, "learning_rate": 4.826878937421555e-06, "loss": 2.3588, "step": 13568 }, { "epoch": 0.7279506437768241, "grad_norm": 0.5078125, "learning_rate": 4.826847169442238e-06, "loss": 2.395, "step": 13569 }, { "epoch": 0.7280042918454935, "grad_norm": 0.470703125, "learning_rate": 4.826815398653011e-06, "loss": 2.3616, "step": 13570 }, { "epoch": 0.7280579399141631, "grad_norm": 0.46484375, "learning_rate": 4.826783625053913e-06, "loss": 1.6312, "step": 13571 }, { "epoch": 0.7281115879828326, "grad_norm": 0.48828125, "learning_rate": 4.826751848644984e-06, "loss": 2.298, "step": 13572 }, { "epoch": 0.7281652360515022, "grad_norm": 0.36328125, "learning_rate": 4.82672006942626e-06, "loss": 2.3098, "step": 13573 }, { "epoch": 0.7282188841201717, "grad_norm": 0.40625, "learning_rate": 4.826688287397778e-06, "loss": 1.9953, "step": 13574 }, { "epoch": 0.7282725321888412, "grad_norm": 0.494140625, "learning_rate": 4.826656502559581e-06, "loss": 2.2542, "step": 13575 }, { "epoch": 0.7283261802575107, "grad_norm": 0.388671875, "learning_rate": 4.826624714911704e-06, "loss": 2.0188, "step": 13576 }, { "epoch": 0.7283798283261803, "grad_norm": 0.451171875, "learning_rate": 4.8265929244541865e-06, "loss": 2.2086, "step": 13577 }, { "epoch": 0.7284334763948498, "grad_norm": 0.57421875, "learning_rate": 4.826561131187067e-06, "loss": 2.4192, "step": 13578 }, { "epoch": 0.7284871244635193, "grad_norm": 0.443359375, "learning_rate": 4.826529335110382e-06, "loss": 2.2381, "step": 13579 }, { "epoch": 0.7285407725321889, "grad_norm": 0.427734375, "learning_rate": 4.826497536224173e-06, "loss": 2.2976, "step": 13580 }, { "epoch": 0.7285944206008583, "grad_norm": 0.5, "learning_rate": 4.826465734528476e-06, "loss": 2.4627, "step": 13581 }, { "epoch": 0.7286480686695279, "grad_norm": 0.345703125, "learning_rate": 4.826433930023331e-06, "loss": 1.9387, "step": 13582 }, { "epoch": 0.7287017167381974, "grad_norm": 0.4140625, "learning_rate": 4.826402122708775e-06, "loss": 2.2349, "step": 13583 }, { "epoch": 0.728755364806867, "grad_norm": 0.62890625, "learning_rate": 4.826370312584847e-06, "loss": 2.2508, "step": 13584 }, { "epoch": 0.7288090128755365, "grad_norm": 0.447265625, "learning_rate": 4.826338499651586e-06, "loss": 2.3336, "step": 13585 }, { "epoch": 0.728862660944206, "grad_norm": 0.478515625, "learning_rate": 4.82630668390903e-06, "loss": 2.3741, "step": 13586 }, { "epoch": 0.7289163090128755, "grad_norm": 0.4609375, "learning_rate": 4.826274865357216e-06, "loss": 2.4296, "step": 13587 }, { "epoch": 0.7289699570815451, "grad_norm": 0.4375, "learning_rate": 4.826243043996185e-06, "loss": 2.3337, "step": 13588 }, { "epoch": 0.7290236051502146, "grad_norm": 2.21875, "learning_rate": 4.826211219825975e-06, "loss": 2.204, "step": 13589 }, { "epoch": 0.7290772532188842, "grad_norm": 0.458984375, "learning_rate": 4.826179392846622e-06, "loss": 2.2089, "step": 13590 }, { "epoch": 0.7291309012875536, "grad_norm": 0.44140625, "learning_rate": 4.826147563058167e-06, "loss": 2.1212, "step": 13591 }, { "epoch": 0.7291845493562231, "grad_norm": 0.447265625, "learning_rate": 4.8261157304606465e-06, "loss": 2.204, "step": 13592 }, { "epoch": 0.7292381974248927, "grad_norm": 0.474609375, "learning_rate": 4.826083895054101e-06, "loss": 1.9447, "step": 13593 }, { "epoch": 0.7292918454935622, "grad_norm": 0.51171875, "learning_rate": 4.826052056838567e-06, "loss": 2.3176, "step": 13594 }, { "epoch": 0.7293454935622318, "grad_norm": 0.458984375, "learning_rate": 4.826020215814084e-06, "loss": 2.1672, "step": 13595 }, { "epoch": 0.7293991416309012, "grad_norm": 2.78125, "learning_rate": 4.825988371980691e-06, "loss": 2.2642, "step": 13596 }, { "epoch": 0.7294527896995708, "grad_norm": 0.427734375, "learning_rate": 4.825956525338426e-06, "loss": 2.395, "step": 13597 }, { "epoch": 0.7295064377682403, "grad_norm": 0.5625, "learning_rate": 4.825924675887326e-06, "loss": 2.3779, "step": 13598 }, { "epoch": 0.7295600858369099, "grad_norm": 0.34765625, "learning_rate": 4.825892823627431e-06, "loss": 1.9917, "step": 13599 }, { "epoch": 0.7296137339055794, "grad_norm": 0.435546875, "learning_rate": 4.82586096855878e-06, "loss": 2.1832, "step": 13600 }, { "epoch": 0.729667381974249, "grad_norm": 0.435546875, "learning_rate": 4.82582911068141e-06, "loss": 2.1322, "step": 13601 }, { "epoch": 0.7297210300429184, "grad_norm": 0.451171875, "learning_rate": 4.825797249995359e-06, "loss": 2.2351, "step": 13602 }, { "epoch": 0.729774678111588, "grad_norm": 0.4609375, "learning_rate": 4.825765386500667e-06, "loss": 2.3805, "step": 13603 }, { "epoch": 0.7298283261802575, "grad_norm": 0.419921875, "learning_rate": 4.825733520197373e-06, "loss": 2.2146, "step": 13604 }, { "epoch": 0.7298819742489271, "grad_norm": 0.462890625, "learning_rate": 4.8257016510855135e-06, "loss": 2.2635, "step": 13605 }, { "epoch": 0.7299356223175966, "grad_norm": 0.466796875, "learning_rate": 4.825669779165128e-06, "loss": 2.0539, "step": 13606 }, { "epoch": 0.729989270386266, "grad_norm": 0.3515625, "learning_rate": 4.825637904436255e-06, "loss": 1.977, "step": 13607 }, { "epoch": 0.7300429184549356, "grad_norm": 0.640625, "learning_rate": 4.825606026898934e-06, "loss": 2.1057, "step": 13608 }, { "epoch": 0.7300965665236051, "grad_norm": 0.578125, "learning_rate": 4.8255741465532015e-06, "loss": 2.3105, "step": 13609 }, { "epoch": 0.7301502145922747, "grad_norm": 0.416015625, "learning_rate": 4.825542263399097e-06, "loss": 2.0961, "step": 13610 }, { "epoch": 0.7302038626609442, "grad_norm": 0.4921875, "learning_rate": 4.825510377436659e-06, "loss": 2.3799, "step": 13611 }, { "epoch": 0.7302575107296138, "grad_norm": 0.53125, "learning_rate": 4.825478488665926e-06, "loss": 2.2163, "step": 13612 }, { "epoch": 0.7303111587982832, "grad_norm": 0.435546875, "learning_rate": 4.825446597086937e-06, "loss": 2.2334, "step": 13613 }, { "epoch": 0.7303648068669528, "grad_norm": 0.44140625, "learning_rate": 4.825414702699729e-06, "loss": 2.303, "step": 13614 }, { "epoch": 0.7304184549356223, "grad_norm": 0.453125, "learning_rate": 4.825382805504342e-06, "loss": 2.2504, "step": 13615 }, { "epoch": 0.7304721030042919, "grad_norm": 0.41015625, "learning_rate": 4.825350905500814e-06, "loss": 2.219, "step": 13616 }, { "epoch": 0.7305257510729614, "grad_norm": 0.609375, "learning_rate": 4.825319002689184e-06, "loss": 2.2901, "step": 13617 }, { "epoch": 0.730579399141631, "grad_norm": 0.4609375, "learning_rate": 4.825287097069488e-06, "loss": 2.3792, "step": 13618 }, { "epoch": 0.7306330472103004, "grad_norm": 0.51171875, "learning_rate": 4.825255188641769e-06, "loss": 2.3568, "step": 13619 }, { "epoch": 0.73068669527897, "grad_norm": 0.51171875, "learning_rate": 4.825223277406063e-06, "loss": 1.9622, "step": 13620 }, { "epoch": 0.7307403433476395, "grad_norm": 0.419921875, "learning_rate": 4.825191363362407e-06, "loss": 2.452, "step": 13621 }, { "epoch": 0.730793991416309, "grad_norm": 0.38671875, "learning_rate": 4.825159446510842e-06, "loss": 2.1838, "step": 13622 }, { "epoch": 0.7308476394849786, "grad_norm": 0.4140625, "learning_rate": 4.825127526851406e-06, "loss": 2.3623, "step": 13623 }, { "epoch": 0.730901287553648, "grad_norm": 0.44921875, "learning_rate": 4.8250956043841366e-06, "loss": 2.3439, "step": 13624 }, { "epoch": 0.7309549356223176, "grad_norm": 0.443359375, "learning_rate": 4.825063679109074e-06, "loss": 2.3324, "step": 13625 }, { "epoch": 0.7310085836909871, "grad_norm": 0.625, "learning_rate": 4.8250317510262555e-06, "loss": 2.1726, "step": 13626 }, { "epoch": 0.7310622317596567, "grad_norm": 0.384765625, "learning_rate": 4.824999820135719e-06, "loss": 2.4344, "step": 13627 }, { "epoch": 0.7311158798283262, "grad_norm": 0.47265625, "learning_rate": 4.824967886437505e-06, "loss": 2.3771, "step": 13628 }, { "epoch": 0.7311695278969957, "grad_norm": 0.41796875, "learning_rate": 4.8249359499316516e-06, "loss": 1.8794, "step": 13629 }, { "epoch": 0.7312231759656652, "grad_norm": 0.4609375, "learning_rate": 4.824904010618196e-06, "loss": 2.421, "step": 13630 }, { "epoch": 0.7312768240343348, "grad_norm": 0.43359375, "learning_rate": 4.824872068497177e-06, "loss": 2.2412, "step": 13631 }, { "epoch": 0.7313304721030043, "grad_norm": 0.48046875, "learning_rate": 4.824840123568635e-06, "loss": 2.5004, "step": 13632 }, { "epoch": 0.7313841201716739, "grad_norm": 0.37109375, "learning_rate": 4.824808175832607e-06, "loss": 2.4248, "step": 13633 }, { "epoch": 0.7314377682403433, "grad_norm": 0.44140625, "learning_rate": 4.824776225289132e-06, "loss": 2.2628, "step": 13634 }, { "epoch": 0.7314914163090128, "grad_norm": 0.443359375, "learning_rate": 4.824744271938248e-06, "loss": 2.37, "step": 13635 }, { "epoch": 0.7315450643776824, "grad_norm": 0.56640625, "learning_rate": 4.824712315779995e-06, "loss": 2.1202, "step": 13636 }, { "epoch": 0.7315987124463519, "grad_norm": 0.453125, "learning_rate": 4.8246803568144095e-06, "loss": 2.3132, "step": 13637 }, { "epoch": 0.7316523605150215, "grad_norm": 0.4296875, "learning_rate": 4.8246483950415316e-06, "loss": 2.4207, "step": 13638 }, { "epoch": 0.731706008583691, "grad_norm": 0.41015625, "learning_rate": 4.8246164304614e-06, "loss": 2.2708, "step": 13639 }, { "epoch": 0.7317596566523605, "grad_norm": 0.380859375, "learning_rate": 4.824584463074053e-06, "loss": 1.871, "step": 13640 }, { "epoch": 0.73181330472103, "grad_norm": 0.439453125, "learning_rate": 4.824552492879528e-06, "loss": 2.3593, "step": 13641 }, { "epoch": 0.7318669527896996, "grad_norm": 0.439453125, "learning_rate": 4.824520519877866e-06, "loss": 2.2653, "step": 13642 }, { "epoch": 0.7319206008583691, "grad_norm": 0.54296875, "learning_rate": 4.8244885440691044e-06, "loss": 2.4352, "step": 13643 }, { "epoch": 0.7319742489270387, "grad_norm": 0.431640625, "learning_rate": 4.82445656545328e-06, "loss": 2.1422, "step": 13644 }, { "epoch": 0.7320278969957081, "grad_norm": 0.5078125, "learning_rate": 4.824424584030435e-06, "loss": 2.1746, "step": 13645 }, { "epoch": 0.7320815450643777, "grad_norm": 0.4296875, "learning_rate": 4.824392599800606e-06, "loss": 2.2125, "step": 13646 }, { "epoch": 0.7321351931330472, "grad_norm": 0.546875, "learning_rate": 4.824360612763831e-06, "loss": 2.204, "step": 13647 }, { "epoch": 0.7321888412017168, "grad_norm": 0.416015625, "learning_rate": 4.82432862292015e-06, "loss": 1.9507, "step": 13648 }, { "epoch": 0.7322424892703863, "grad_norm": 0.421875, "learning_rate": 4.8242966302696e-06, "loss": 2.3042, "step": 13649 }, { "epoch": 0.7322961373390557, "grad_norm": 0.419921875, "learning_rate": 4.8242646348122215e-06, "loss": 2.4776, "step": 13650 }, { "epoch": 0.7323497854077253, "grad_norm": 0.490234375, "learning_rate": 4.824232636548052e-06, "loss": 2.2711, "step": 13651 }, { "epoch": 0.7324034334763948, "grad_norm": 0.4765625, "learning_rate": 4.824200635477131e-06, "loss": 2.2505, "step": 13652 }, { "epoch": 0.7324570815450644, "grad_norm": 19.5, "learning_rate": 4.824168631599497e-06, "loss": 2.3827, "step": 13653 }, { "epoch": 0.7325107296137339, "grad_norm": 1.296875, "learning_rate": 4.824136624915187e-06, "loss": 2.3101, "step": 13654 }, { "epoch": 0.7325643776824035, "grad_norm": 0.419921875, "learning_rate": 4.824104615424241e-06, "loss": 2.3457, "step": 13655 }, { "epoch": 0.7326180257510729, "grad_norm": 0.435546875, "learning_rate": 4.824072603126698e-06, "loss": 2.3992, "step": 13656 }, { "epoch": 0.7326716738197425, "grad_norm": 0.48046875, "learning_rate": 4.824040588022596e-06, "loss": 2.4273, "step": 13657 }, { "epoch": 0.732725321888412, "grad_norm": 0.5859375, "learning_rate": 4.824008570111975e-06, "loss": 2.2526, "step": 13658 }, { "epoch": 0.7327789699570816, "grad_norm": 0.47265625, "learning_rate": 4.823976549394871e-06, "loss": 2.4728, "step": 13659 }, { "epoch": 0.7328326180257511, "grad_norm": 0.400390625, "learning_rate": 4.823944525871324e-06, "loss": 2.4001, "step": 13660 }, { "epoch": 0.7328862660944206, "grad_norm": 0.51953125, "learning_rate": 4.8239124995413746e-06, "loss": 2.3934, "step": 13661 }, { "epoch": 0.7329399141630901, "grad_norm": 0.46484375, "learning_rate": 4.823880470405059e-06, "loss": 2.4659, "step": 13662 }, { "epoch": 0.7329935622317597, "grad_norm": 0.50390625, "learning_rate": 4.8238484384624165e-06, "loss": 2.3209, "step": 13663 }, { "epoch": 0.7330472103004292, "grad_norm": 0.421875, "learning_rate": 4.823816403713486e-06, "loss": 2.0035, "step": 13664 }, { "epoch": 0.7331008583690987, "grad_norm": 0.52734375, "learning_rate": 4.823784366158306e-06, "loss": 2.4717, "step": 13665 }, { "epoch": 0.7331545064377682, "grad_norm": 0.50390625, "learning_rate": 4.823752325796916e-06, "loss": 2.5356, "step": 13666 }, { "epoch": 0.7332081545064377, "grad_norm": 0.8984375, "learning_rate": 4.823720282629353e-06, "loss": 2.2891, "step": 13667 }, { "epoch": 0.7332618025751073, "grad_norm": 0.390625, "learning_rate": 4.823688236655656e-06, "loss": 2.5518, "step": 13668 }, { "epoch": 0.7333154506437768, "grad_norm": 0.68359375, "learning_rate": 4.823656187875866e-06, "loss": 2.4054, "step": 13669 }, { "epoch": 0.7333690987124464, "grad_norm": 0.65625, "learning_rate": 4.823624136290019e-06, "loss": 2.321, "step": 13670 }, { "epoch": 0.7334227467811159, "grad_norm": 0.44140625, "learning_rate": 4.823592081898155e-06, "loss": 2.1604, "step": 13671 }, { "epoch": 0.7334763948497854, "grad_norm": 0.486328125, "learning_rate": 4.823560024700313e-06, "loss": 2.3267, "step": 13672 }, { "epoch": 0.7335300429184549, "grad_norm": 0.39453125, "learning_rate": 4.82352796469653e-06, "loss": 2.2806, "step": 13673 }, { "epoch": 0.7335836909871245, "grad_norm": 0.474609375, "learning_rate": 4.823495901886848e-06, "loss": 2.3336, "step": 13674 }, { "epoch": 0.733637339055794, "grad_norm": 0.453125, "learning_rate": 4.823463836271302e-06, "loss": 2.1559, "step": 13675 }, { "epoch": 0.7336909871244636, "grad_norm": 0.50390625, "learning_rate": 4.823431767849933e-06, "loss": 2.374, "step": 13676 }, { "epoch": 0.733744635193133, "grad_norm": 0.40234375, "learning_rate": 4.823399696622779e-06, "loss": 2.0139, "step": 13677 }, { "epoch": 0.7337982832618025, "grad_norm": 0.45703125, "learning_rate": 4.823367622589879e-06, "loss": 2.3296, "step": 13678 }, { "epoch": 0.7338519313304721, "grad_norm": 0.4453125, "learning_rate": 4.823335545751271e-06, "loss": 1.715, "step": 13679 }, { "epoch": 0.7339055793991416, "grad_norm": 0.4140625, "learning_rate": 4.823303466106994e-06, "loss": 2.3595, "step": 13680 }, { "epoch": 0.7339592274678112, "grad_norm": 0.546875, "learning_rate": 4.8232713836570885e-06, "loss": 2.3426, "step": 13681 }, { "epoch": 0.7340128755364806, "grad_norm": 0.416015625, "learning_rate": 4.823239298401591e-06, "loss": 2.0661, "step": 13682 }, { "epoch": 0.7340665236051502, "grad_norm": 0.39453125, "learning_rate": 4.823207210340541e-06, "loss": 2.1786, "step": 13683 }, { "epoch": 0.7341201716738197, "grad_norm": 0.66015625, "learning_rate": 4.823175119473976e-06, "loss": 2.448, "step": 13684 }, { "epoch": 0.7341738197424893, "grad_norm": 0.4921875, "learning_rate": 4.823143025801937e-06, "loss": 2.2452, "step": 13685 }, { "epoch": 0.7342274678111588, "grad_norm": 0.421875, "learning_rate": 4.8231109293244625e-06, "loss": 2.2152, "step": 13686 }, { "epoch": 0.7342811158798284, "grad_norm": 0.5, "learning_rate": 4.82307883004159e-06, "loss": 2.7476, "step": 13687 }, { "epoch": 0.7343347639484978, "grad_norm": 0.4765625, "learning_rate": 4.8230467279533585e-06, "loss": 2.3091, "step": 13688 }, { "epoch": 0.7343884120171674, "grad_norm": 0.408203125, "learning_rate": 4.823014623059808e-06, "loss": 2.293, "step": 13689 }, { "epoch": 0.7344420600858369, "grad_norm": 0.482421875, "learning_rate": 4.822982515360975e-06, "loss": 2.3996, "step": 13690 }, { "epoch": 0.7344957081545065, "grad_norm": 0.494140625, "learning_rate": 4.822950404856901e-06, "loss": 2.5701, "step": 13691 }, { "epoch": 0.734549356223176, "grad_norm": 0.58984375, "learning_rate": 4.8229182915476225e-06, "loss": 2.2643, "step": 13692 }, { "epoch": 0.7346030042918454, "grad_norm": 0.52734375, "learning_rate": 4.822886175433179e-06, "loss": 2.3997, "step": 13693 }, { "epoch": 0.734656652360515, "grad_norm": 0.4375, "learning_rate": 4.8228540565136105e-06, "loss": 2.3468, "step": 13694 }, { "epoch": 0.7347103004291845, "grad_norm": 0.439453125, "learning_rate": 4.8228219347889535e-06, "loss": 2.327, "step": 13695 }, { "epoch": 0.7347639484978541, "grad_norm": 0.6328125, "learning_rate": 4.822789810259249e-06, "loss": 1.1763, "step": 13696 }, { "epoch": 0.7348175965665236, "grad_norm": 0.357421875, "learning_rate": 4.822757682924534e-06, "loss": 2.0033, "step": 13697 }, { "epoch": 0.7348712446351932, "grad_norm": 0.396484375, "learning_rate": 4.822725552784849e-06, "loss": 2.2087, "step": 13698 }, { "epoch": 0.7349248927038626, "grad_norm": 0.48046875, "learning_rate": 4.822693419840231e-06, "loss": 2.3518, "step": 13699 }, { "epoch": 0.7349785407725322, "grad_norm": 0.478515625, "learning_rate": 4.822661284090721e-06, "loss": 2.3173, "step": 13700 }, { "epoch": 0.7350321888412017, "grad_norm": 0.45703125, "learning_rate": 4.822629145536355e-06, "loss": 2.264, "step": 13701 }, { "epoch": 0.7350858369098713, "grad_norm": 0.46484375, "learning_rate": 4.822597004177174e-06, "loss": 1.7053, "step": 13702 }, { "epoch": 0.7351394849785408, "grad_norm": 0.41796875, "learning_rate": 4.822564860013216e-06, "loss": 2.0071, "step": 13703 }, { "epoch": 0.7351931330472103, "grad_norm": 0.45703125, "learning_rate": 4.8225327130445204e-06, "loss": 2.3287, "step": 13704 }, { "epoch": 0.7352467811158798, "grad_norm": 0.376953125, "learning_rate": 4.8225005632711255e-06, "loss": 2.1355, "step": 13705 }, { "epoch": 0.7353004291845494, "grad_norm": 0.44921875, "learning_rate": 4.82246841069307e-06, "loss": 2.2502, "step": 13706 }, { "epoch": 0.7353540772532189, "grad_norm": 0.431640625, "learning_rate": 4.822436255310393e-06, "loss": 2.2273, "step": 13707 }, { "epoch": 0.7354077253218884, "grad_norm": 1.1796875, "learning_rate": 4.822404097123134e-06, "loss": 2.2985, "step": 13708 }, { "epoch": 0.735461373390558, "grad_norm": 0.58203125, "learning_rate": 4.8223719361313295e-06, "loss": 2.3232, "step": 13709 }, { "epoch": 0.7355150214592274, "grad_norm": 0.4140625, "learning_rate": 4.82233977233502e-06, "loss": 2.51, "step": 13710 }, { "epoch": 0.735568669527897, "grad_norm": 0.54296875, "learning_rate": 4.822307605734245e-06, "loss": 2.4663, "step": 13711 }, { "epoch": 0.7356223175965665, "grad_norm": 0.380859375, "learning_rate": 4.822275436329043e-06, "loss": 2.298, "step": 13712 }, { "epoch": 0.7356759656652361, "grad_norm": 0.4765625, "learning_rate": 4.822243264119452e-06, "loss": 2.3584, "step": 13713 }, { "epoch": 0.7357296137339056, "grad_norm": 0.59375, "learning_rate": 4.822211089105512e-06, "loss": 2.4949, "step": 13714 }, { "epoch": 0.7357832618025751, "grad_norm": 0.4140625, "learning_rate": 4.822178911287261e-06, "loss": 2.3925, "step": 13715 }, { "epoch": 0.7358369098712446, "grad_norm": 0.51953125, "learning_rate": 4.822146730664738e-06, "loss": 2.2389, "step": 13716 }, { "epoch": 0.7358905579399142, "grad_norm": 0.453125, "learning_rate": 4.822114547237982e-06, "loss": 2.3654, "step": 13717 }, { "epoch": 0.7359442060085837, "grad_norm": 0.4453125, "learning_rate": 4.822082361007031e-06, "loss": 2.3141, "step": 13718 }, { "epoch": 0.7359978540772533, "grad_norm": 0.4140625, "learning_rate": 4.822050171971925e-06, "loss": 2.0818, "step": 13719 }, { "epoch": 0.7360515021459227, "grad_norm": 0.486328125, "learning_rate": 4.822017980132703e-06, "loss": 2.5209, "step": 13720 }, { "epoch": 0.7361051502145923, "grad_norm": 0.37890625, "learning_rate": 4.821985785489402e-06, "loss": 2.0254, "step": 13721 }, { "epoch": 0.7361587982832618, "grad_norm": 0.44140625, "learning_rate": 4.821953588042064e-06, "loss": 2.2684, "step": 13722 }, { "epoch": 0.7362124463519313, "grad_norm": 0.44140625, "learning_rate": 4.821921387790726e-06, "loss": 2.0125, "step": 13723 }, { "epoch": 0.7362660944206009, "grad_norm": 0.39453125, "learning_rate": 4.8218891847354254e-06, "loss": 2.3766, "step": 13724 }, { "epoch": 0.7363197424892703, "grad_norm": 0.78125, "learning_rate": 4.821856978876204e-06, "loss": 2.355, "step": 13725 }, { "epoch": 0.7363733905579399, "grad_norm": 0.486328125, "learning_rate": 4.821824770213099e-06, "loss": 2.6, "step": 13726 }, { "epoch": 0.7364270386266094, "grad_norm": 0.37109375, "learning_rate": 4.82179255874615e-06, "loss": 2.283, "step": 13727 }, { "epoch": 0.736480686695279, "grad_norm": 0.51171875, "learning_rate": 4.821760344475396e-06, "loss": 2.3389, "step": 13728 }, { "epoch": 0.7365343347639485, "grad_norm": 0.5546875, "learning_rate": 4.8217281274008744e-06, "loss": 2.1825, "step": 13729 }, { "epoch": 0.7365879828326181, "grad_norm": 0.451171875, "learning_rate": 4.821695907522626e-06, "loss": 2.6591, "step": 13730 }, { "epoch": 0.7366416309012875, "grad_norm": 0.458984375, "learning_rate": 4.821663684840689e-06, "loss": 1.9544, "step": 13731 }, { "epoch": 0.7366952789699571, "grad_norm": 0.498046875, "learning_rate": 4.821631459355102e-06, "loss": 2.219, "step": 13732 }, { "epoch": 0.7367489270386266, "grad_norm": 1.734375, "learning_rate": 4.821599231065903e-06, "loss": 2.3356, "step": 13733 }, { "epoch": 0.7368025751072962, "grad_norm": 0.455078125, "learning_rate": 4.821566999973134e-06, "loss": 2.3334, "step": 13734 }, { "epoch": 0.7368562231759657, "grad_norm": 0.45703125, "learning_rate": 4.821534766076831e-06, "loss": 2.2827, "step": 13735 }, { "epoch": 0.7369098712446351, "grad_norm": 0.43359375, "learning_rate": 4.821502529377034e-06, "loss": 2.3027, "step": 13736 }, { "epoch": 0.7369635193133047, "grad_norm": 1.15625, "learning_rate": 4.821470289873782e-06, "loss": 2.0233, "step": 13737 }, { "epoch": 0.7370171673819742, "grad_norm": 0.55859375, "learning_rate": 4.821438047567114e-06, "loss": 2.3898, "step": 13738 }, { "epoch": 0.7370708154506438, "grad_norm": 0.490234375, "learning_rate": 4.821405802457068e-06, "loss": 2.2213, "step": 13739 }, { "epoch": 0.7371244635193133, "grad_norm": 0.482421875, "learning_rate": 4.821373554543685e-06, "loss": 2.2932, "step": 13740 }, { "epoch": 0.7371781115879829, "grad_norm": 0.42578125, "learning_rate": 4.821341303827001e-06, "loss": 2.4808, "step": 13741 }, { "epoch": 0.7372317596566523, "grad_norm": 0.474609375, "learning_rate": 4.821309050307057e-06, "loss": 2.232, "step": 13742 }, { "epoch": 0.7372854077253219, "grad_norm": 0.439453125, "learning_rate": 4.821276793983891e-06, "loss": 2.2012, "step": 13743 }, { "epoch": 0.7373390557939914, "grad_norm": 0.41796875, "learning_rate": 4.821244534857544e-06, "loss": 2.3265, "step": 13744 }, { "epoch": 0.737392703862661, "grad_norm": 0.515625, "learning_rate": 4.821212272928053e-06, "loss": 2.4117, "step": 13745 }, { "epoch": 0.7374463519313305, "grad_norm": 0.45703125, "learning_rate": 4.821180008195456e-06, "loss": 2.6936, "step": 13746 }, { "epoch": 0.7375, "grad_norm": 0.4375, "learning_rate": 4.821147740659795e-06, "loss": 2.21, "step": 13747 }, { "epoch": 0.7375536480686695, "grad_norm": 0.490234375, "learning_rate": 4.821115470321106e-06, "loss": 2.2294, "step": 13748 }, { "epoch": 0.7376072961373391, "grad_norm": 0.48828125, "learning_rate": 4.821083197179431e-06, "loss": 2.4449, "step": 13749 }, { "epoch": 0.7376609442060086, "grad_norm": 0.412109375, "learning_rate": 4.821050921234805e-06, "loss": 1.8894, "step": 13750 }, { "epoch": 0.7377145922746781, "grad_norm": 1.171875, "learning_rate": 4.82101864248727e-06, "loss": 2.3862, "step": 13751 }, { "epoch": 0.7377682403433476, "grad_norm": 0.458984375, "learning_rate": 4.820986360936865e-06, "loss": 2.4238, "step": 13752 }, { "epoch": 0.7378218884120171, "grad_norm": 0.455078125, "learning_rate": 4.820954076583627e-06, "loss": 2.6324, "step": 13753 }, { "epoch": 0.7378755364806867, "grad_norm": 0.494140625, "learning_rate": 4.820921789427597e-06, "loss": 2.2189, "step": 13754 }, { "epoch": 0.7379291845493562, "grad_norm": 0.47265625, "learning_rate": 4.820889499468812e-06, "loss": 1.9847, "step": 13755 }, { "epoch": 0.7379828326180258, "grad_norm": 0.478515625, "learning_rate": 4.820857206707313e-06, "loss": 2.1127, "step": 13756 }, { "epoch": 0.7380364806866953, "grad_norm": 0.482421875, "learning_rate": 4.820824911143139e-06, "loss": 2.4267, "step": 13757 }, { "epoch": 0.7380901287553648, "grad_norm": 0.546875, "learning_rate": 4.820792612776327e-06, "loss": 2.3384, "step": 13758 }, { "epoch": 0.7381437768240343, "grad_norm": 1.359375, "learning_rate": 4.820760311606918e-06, "loss": 1.9084, "step": 13759 }, { "epoch": 0.7381974248927039, "grad_norm": 0.45703125, "learning_rate": 4.820728007634949e-06, "loss": 2.2819, "step": 13760 }, { "epoch": 0.7382510729613734, "grad_norm": 0.498046875, "learning_rate": 4.820695700860461e-06, "loss": 2.4706, "step": 13761 }, { "epoch": 0.738304721030043, "grad_norm": 0.53125, "learning_rate": 4.820663391283492e-06, "loss": 2.1461, "step": 13762 }, { "epoch": 0.7383583690987124, "grad_norm": 0.43359375, "learning_rate": 4.820631078904081e-06, "loss": 2.2916, "step": 13763 }, { "epoch": 0.738412017167382, "grad_norm": 0.4609375, "learning_rate": 4.8205987637222674e-06, "loss": 2.4207, "step": 13764 }, { "epoch": 0.7384656652360515, "grad_norm": 0.5, "learning_rate": 4.8205664457380895e-06, "loss": 2.3512, "step": 13765 }, { "epoch": 0.738519313304721, "grad_norm": 0.58203125, "learning_rate": 4.820534124951588e-06, "loss": 2.1926, "step": 13766 }, { "epoch": 0.7385729613733906, "grad_norm": 0.478515625, "learning_rate": 4.8205018013628e-06, "loss": 2.4243, "step": 13767 }, { "epoch": 0.73862660944206, "grad_norm": 0.396484375, "learning_rate": 4.820469474971765e-06, "loss": 2.3893, "step": 13768 }, { "epoch": 0.7386802575107296, "grad_norm": 0.466796875, "learning_rate": 4.820437145778523e-06, "loss": 2.2131, "step": 13769 }, { "epoch": 0.7387339055793991, "grad_norm": 0.515625, "learning_rate": 4.8204048137831125e-06, "loss": 2.2547, "step": 13770 }, { "epoch": 0.7387875536480687, "grad_norm": 0.4609375, "learning_rate": 4.820372478985572e-06, "loss": 2.3581, "step": 13771 }, { "epoch": 0.7388412017167382, "grad_norm": 0.419921875, "learning_rate": 4.820340141385941e-06, "loss": 2.1779, "step": 13772 }, { "epoch": 0.7388948497854078, "grad_norm": 0.443359375, "learning_rate": 4.820307800984259e-06, "loss": 2.3478, "step": 13773 }, { "epoch": 0.7389484978540772, "grad_norm": 0.490234375, "learning_rate": 4.820275457780564e-06, "loss": 2.3288, "step": 13774 }, { "epoch": 0.7390021459227468, "grad_norm": 0.5234375, "learning_rate": 4.8202431117748965e-06, "loss": 2.2763, "step": 13775 }, { "epoch": 0.7390557939914163, "grad_norm": 0.466796875, "learning_rate": 4.820210762967295e-06, "loss": 2.541, "step": 13776 }, { "epoch": 0.7391094420600859, "grad_norm": 0.400390625, "learning_rate": 4.820178411357797e-06, "loss": 2.4144, "step": 13777 }, { "epoch": 0.7391630901287554, "grad_norm": 0.423828125, "learning_rate": 4.820146056946443e-06, "loss": 2.6429, "step": 13778 }, { "epoch": 0.7392167381974248, "grad_norm": 0.47265625, "learning_rate": 4.820113699733272e-06, "loss": 2.2794, "step": 13779 }, { "epoch": 0.7392703862660944, "grad_norm": 0.578125, "learning_rate": 4.820081339718323e-06, "loss": 2.3343, "step": 13780 }, { "epoch": 0.7393240343347639, "grad_norm": 0.3984375, "learning_rate": 4.8200489769016355e-06, "loss": 2.2927, "step": 13781 }, { "epoch": 0.7393776824034335, "grad_norm": 0.439453125, "learning_rate": 4.8200166112832485e-06, "loss": 2.282, "step": 13782 }, { "epoch": 0.739431330472103, "grad_norm": 0.44921875, "learning_rate": 4.8199842428632e-06, "loss": 2.0995, "step": 13783 }, { "epoch": 0.7394849785407726, "grad_norm": 0.65234375, "learning_rate": 4.819951871641531e-06, "loss": 2.254, "step": 13784 }, { "epoch": 0.739538626609442, "grad_norm": 0.7578125, "learning_rate": 4.819919497618277e-06, "loss": 2.381, "step": 13785 }, { "epoch": 0.7395922746781116, "grad_norm": 1.0703125, "learning_rate": 4.819887120793482e-06, "loss": 2.4172, "step": 13786 }, { "epoch": 0.7396459227467811, "grad_norm": 0.51953125, "learning_rate": 4.819854741167182e-06, "loss": 2.2173, "step": 13787 }, { "epoch": 0.7396995708154507, "grad_norm": 0.443359375, "learning_rate": 4.819822358739416e-06, "loss": 1.9059, "step": 13788 }, { "epoch": 0.7397532188841202, "grad_norm": 0.4453125, "learning_rate": 4.819789973510225e-06, "loss": 2.189, "step": 13789 }, { "epoch": 0.7398068669527897, "grad_norm": 0.51171875, "learning_rate": 4.819757585479645e-06, "loss": 1.9083, "step": 13790 }, { "epoch": 0.7398605150214592, "grad_norm": 0.59375, "learning_rate": 4.819725194647719e-06, "loss": 2.1742, "step": 13791 }, { "epoch": 0.7399141630901288, "grad_norm": 0.57421875, "learning_rate": 4.819692801014483e-06, "loss": 2.2466, "step": 13792 }, { "epoch": 0.7399678111587983, "grad_norm": 0.400390625, "learning_rate": 4.819660404579978e-06, "loss": 2.2568, "step": 13793 }, { "epoch": 0.7400214592274678, "grad_norm": 0.6328125, "learning_rate": 4.819628005344242e-06, "loss": 2.6334, "step": 13794 }, { "epoch": 0.7400751072961373, "grad_norm": 1.0546875, "learning_rate": 4.819595603307314e-06, "loss": 2.3767, "step": 13795 }, { "epoch": 0.7401287553648068, "grad_norm": 0.453125, "learning_rate": 4.819563198469235e-06, "loss": 2.2962, "step": 13796 }, { "epoch": 0.7401824034334764, "grad_norm": 0.5, "learning_rate": 4.819530790830042e-06, "loss": 2.561, "step": 13797 }, { "epoch": 0.7402360515021459, "grad_norm": 0.40234375, "learning_rate": 4.819498380389775e-06, "loss": 2.157, "step": 13798 }, { "epoch": 0.7402896995708155, "grad_norm": 0.400390625, "learning_rate": 4.819465967148472e-06, "loss": 2.1045, "step": 13799 }, { "epoch": 0.740343347639485, "grad_norm": 0.40625, "learning_rate": 4.819433551106175e-06, "loss": 2.3603, "step": 13800 }, { "epoch": 0.7403969957081545, "grad_norm": 0.54296875, "learning_rate": 4.81940113226292e-06, "loss": 2.3873, "step": 13801 }, { "epoch": 0.740450643776824, "grad_norm": 0.447265625, "learning_rate": 4.8193687106187484e-06, "loss": 2.2529, "step": 13802 }, { "epoch": 0.7405042918454936, "grad_norm": 0.48046875, "learning_rate": 4.819336286173698e-06, "loss": 2.3393, "step": 13803 }, { "epoch": 0.7405579399141631, "grad_norm": 0.466796875, "learning_rate": 4.8193038589278085e-06, "loss": 2.2273, "step": 13804 }, { "epoch": 0.7406115879828327, "grad_norm": 0.453125, "learning_rate": 4.819271428881119e-06, "loss": 2.0686, "step": 13805 }, { "epoch": 0.7406652360515021, "grad_norm": 0.63671875, "learning_rate": 4.819238996033669e-06, "loss": 2.3178, "step": 13806 }, { "epoch": 0.7407188841201717, "grad_norm": 0.4140625, "learning_rate": 4.819206560385496e-06, "loss": 2.3632, "step": 13807 }, { "epoch": 0.7407725321888412, "grad_norm": 0.427734375, "learning_rate": 4.819174121936641e-06, "loss": 2.3714, "step": 13808 }, { "epoch": 0.7408261802575107, "grad_norm": 0.5, "learning_rate": 4.819141680687144e-06, "loss": 2.2666, "step": 13809 }, { "epoch": 0.7408798283261803, "grad_norm": 0.380859375, "learning_rate": 4.819109236637042e-06, "loss": 2.5855, "step": 13810 }, { "epoch": 0.7409334763948497, "grad_norm": 0.3828125, "learning_rate": 4.819076789786374e-06, "loss": 2.1397, "step": 13811 }, { "epoch": 0.7409871244635193, "grad_norm": 0.462890625, "learning_rate": 4.819044340135182e-06, "loss": 2.3752, "step": 13812 }, { "epoch": 0.7410407725321888, "grad_norm": 0.478515625, "learning_rate": 4.819011887683503e-06, "loss": 2.5326, "step": 13813 }, { "epoch": 0.7410944206008584, "grad_norm": 5.0625, "learning_rate": 4.818979432431375e-06, "loss": 2.5696, "step": 13814 }, { "epoch": 0.7411480686695279, "grad_norm": 0.41796875, "learning_rate": 4.81894697437884e-06, "loss": 1.7665, "step": 13815 }, { "epoch": 0.7412017167381975, "grad_norm": 0.44921875, "learning_rate": 4.818914513525935e-06, "loss": 2.3077, "step": 13816 }, { "epoch": 0.7412553648068669, "grad_norm": 0.466796875, "learning_rate": 4.8188820498727016e-06, "loss": 2.0859, "step": 13817 }, { "epoch": 0.7413090128755365, "grad_norm": 0.447265625, "learning_rate": 4.818849583419177e-06, "loss": 2.2715, "step": 13818 }, { "epoch": 0.741362660944206, "grad_norm": 0.416015625, "learning_rate": 4.8188171141654e-06, "loss": 2.3108, "step": 13819 }, { "epoch": 0.7414163090128756, "grad_norm": 0.859375, "learning_rate": 4.818784642111411e-06, "loss": 2.308, "step": 13820 }, { "epoch": 0.7414699570815451, "grad_norm": 0.4453125, "learning_rate": 4.81875216725725e-06, "loss": 2.486, "step": 13821 }, { "epoch": 0.7415236051502145, "grad_norm": 0.45703125, "learning_rate": 4.818719689602954e-06, "loss": 2.4895, "step": 13822 }, { "epoch": 0.7415772532188841, "grad_norm": 0.4140625, "learning_rate": 4.818687209148565e-06, "loss": 2.2781, "step": 13823 }, { "epoch": 0.7416309012875536, "grad_norm": 0.46875, "learning_rate": 4.818654725894119e-06, "loss": 2.4792, "step": 13824 }, { "epoch": 0.7416845493562232, "grad_norm": 0.5234375, "learning_rate": 4.818622239839658e-06, "loss": 2.2707, "step": 13825 }, { "epoch": 0.7417381974248927, "grad_norm": 0.5625, "learning_rate": 4.818589750985219e-06, "loss": 2.3793, "step": 13826 }, { "epoch": 0.7417918454935623, "grad_norm": 0.412109375, "learning_rate": 4.8185572593308434e-06, "loss": 2.1386, "step": 13827 }, { "epoch": 0.7418454935622317, "grad_norm": 0.4296875, "learning_rate": 4.818524764876569e-06, "loss": 2.4534, "step": 13828 }, { "epoch": 0.7418991416309013, "grad_norm": 0.51953125, "learning_rate": 4.818492267622435e-06, "loss": 2.1962, "step": 13829 }, { "epoch": 0.7419527896995708, "grad_norm": 0.67578125, "learning_rate": 4.818459767568481e-06, "loss": 2.5351, "step": 13830 }, { "epoch": 0.7420064377682404, "grad_norm": 0.54296875, "learning_rate": 4.818427264714747e-06, "loss": 2.5398, "step": 13831 }, { "epoch": 0.7420600858369099, "grad_norm": 0.482421875, "learning_rate": 4.8183947590612714e-06, "loss": 2.2426, "step": 13832 }, { "epoch": 0.7421137339055794, "grad_norm": 0.44140625, "learning_rate": 4.818362250608093e-06, "loss": 2.2963, "step": 13833 }, { "epoch": 0.7421673819742489, "grad_norm": 0.4296875, "learning_rate": 4.818329739355252e-06, "loss": 2.2417, "step": 13834 }, { "epoch": 0.7422210300429185, "grad_norm": 0.42578125, "learning_rate": 4.818297225302788e-06, "loss": 2.3716, "step": 13835 }, { "epoch": 0.742274678111588, "grad_norm": 0.408203125, "learning_rate": 4.818264708450738e-06, "loss": 2.4405, "step": 13836 }, { "epoch": 0.7423283261802575, "grad_norm": 0.384765625, "learning_rate": 4.818232188799144e-06, "loss": 2.2937, "step": 13837 }, { "epoch": 0.742381974248927, "grad_norm": 0.474609375, "learning_rate": 4.818199666348044e-06, "loss": 2.4655, "step": 13838 }, { "epoch": 0.7424356223175965, "grad_norm": 0.54296875, "learning_rate": 4.818167141097477e-06, "loss": 2.2866, "step": 13839 }, { "epoch": 0.7424892703862661, "grad_norm": 0.453125, "learning_rate": 4.8181346130474836e-06, "loss": 2.272, "step": 13840 }, { "epoch": 0.7425429184549356, "grad_norm": 0.609375, "learning_rate": 4.818102082198102e-06, "loss": 1.691, "step": 13841 }, { "epoch": 0.7425965665236052, "grad_norm": 0.419921875, "learning_rate": 4.818069548549371e-06, "loss": 2.1966, "step": 13842 }, { "epoch": 0.7426502145922746, "grad_norm": 0.462890625, "learning_rate": 4.818037012101331e-06, "loss": 2.2996, "step": 13843 }, { "epoch": 0.7427038626609442, "grad_norm": 0.490234375, "learning_rate": 4.81800447285402e-06, "loss": 2.3441, "step": 13844 }, { "epoch": 0.7427575107296137, "grad_norm": 0.48046875, "learning_rate": 4.817971930807479e-06, "loss": 2.2845, "step": 13845 }, { "epoch": 0.7428111587982833, "grad_norm": 0.5234375, "learning_rate": 4.817939385961746e-06, "loss": 2.2816, "step": 13846 }, { "epoch": 0.7428648068669528, "grad_norm": 0.55078125, "learning_rate": 4.81790683831686e-06, "loss": 2.1847, "step": 13847 }, { "epoch": 0.7429184549356224, "grad_norm": 0.474609375, "learning_rate": 4.817874287872862e-06, "loss": 2.3934, "step": 13848 }, { "epoch": 0.7429721030042918, "grad_norm": 2.9375, "learning_rate": 4.81784173462979e-06, "loss": 1.379, "step": 13849 }, { "epoch": 0.7430257510729614, "grad_norm": 0.392578125, "learning_rate": 4.817809178587684e-06, "loss": 2.2398, "step": 13850 }, { "epoch": 0.7430793991416309, "grad_norm": 0.453125, "learning_rate": 4.817776619746582e-06, "loss": 2.3725, "step": 13851 }, { "epoch": 0.7431330472103004, "grad_norm": 0.498046875, "learning_rate": 4.817744058106526e-06, "loss": 2.4389, "step": 13852 }, { "epoch": 0.74318669527897, "grad_norm": 0.451171875, "learning_rate": 4.817711493667552e-06, "loss": 2.229, "step": 13853 }, { "epoch": 0.7432403433476394, "grad_norm": 0.48046875, "learning_rate": 4.817678926429702e-06, "loss": 2.5244, "step": 13854 }, { "epoch": 0.743293991416309, "grad_norm": 0.482421875, "learning_rate": 4.817646356393013e-06, "loss": 2.5017, "step": 13855 }, { "epoch": 0.7433476394849785, "grad_norm": 1.7734375, "learning_rate": 4.817613783557526e-06, "loss": 2.322, "step": 13856 }, { "epoch": 0.7434012875536481, "grad_norm": 0.66015625, "learning_rate": 4.817581207923281e-06, "loss": 2.3173, "step": 13857 }, { "epoch": 0.7434549356223176, "grad_norm": 0.58984375, "learning_rate": 4.817548629490315e-06, "loss": 2.3735, "step": 13858 }, { "epoch": 0.7435085836909872, "grad_norm": 0.423828125, "learning_rate": 4.817516048258669e-06, "loss": 2.5418, "step": 13859 }, { "epoch": 0.7435622317596566, "grad_norm": 0.435546875, "learning_rate": 4.817483464228382e-06, "loss": 2.206, "step": 13860 }, { "epoch": 0.7436158798283262, "grad_norm": 0.474609375, "learning_rate": 4.817450877399493e-06, "loss": 2.3668, "step": 13861 }, { "epoch": 0.7436695278969957, "grad_norm": 0.75, "learning_rate": 4.817418287772042e-06, "loss": 2.4693, "step": 13862 }, { "epoch": 0.7437231759656653, "grad_norm": 0.49609375, "learning_rate": 4.817385695346068e-06, "loss": 2.2759, "step": 13863 }, { "epoch": 0.7437768240343348, "grad_norm": 0.458984375, "learning_rate": 4.817353100121611e-06, "loss": 2.4082, "step": 13864 }, { "epoch": 0.7438304721030042, "grad_norm": 0.59375, "learning_rate": 4.817320502098709e-06, "loss": 2.3491, "step": 13865 }, { "epoch": 0.7438841201716738, "grad_norm": 0.408203125, "learning_rate": 4.817287901277401e-06, "loss": 2.1899, "step": 13866 }, { "epoch": 0.7439377682403433, "grad_norm": 0.486328125, "learning_rate": 4.817255297657729e-06, "loss": 2.1358, "step": 13867 }, { "epoch": 0.7439914163090129, "grad_norm": 0.51953125, "learning_rate": 4.81722269123973e-06, "loss": 2.1378, "step": 13868 }, { "epoch": 0.7440450643776824, "grad_norm": 0.439453125, "learning_rate": 4.8171900820234455e-06, "loss": 2.2857, "step": 13869 }, { "epoch": 0.744098712446352, "grad_norm": 0.59765625, "learning_rate": 4.817157470008912e-06, "loss": 2.1812, "step": 13870 }, { "epoch": 0.7441523605150214, "grad_norm": 0.470703125, "learning_rate": 4.817124855196171e-06, "loss": 2.1748, "step": 13871 }, { "epoch": 0.744206008583691, "grad_norm": 0.55078125, "learning_rate": 4.817092237585261e-06, "loss": 2.1946, "step": 13872 }, { "epoch": 0.7442596566523605, "grad_norm": 0.3984375, "learning_rate": 4.817059617176222e-06, "loss": 2.232, "step": 13873 }, { "epoch": 0.7443133047210301, "grad_norm": 0.4375, "learning_rate": 4.8170269939690936e-06, "loss": 2.329, "step": 13874 }, { "epoch": 0.7443669527896996, "grad_norm": 0.3984375, "learning_rate": 4.816994367963914e-06, "loss": 2.0368, "step": 13875 }, { "epoch": 0.7444206008583691, "grad_norm": 1.34375, "learning_rate": 4.816961739160724e-06, "loss": 1.4315, "step": 13876 }, { "epoch": 0.7444742489270386, "grad_norm": 0.359375, "learning_rate": 4.816929107559561e-06, "loss": 2.0758, "step": 13877 }, { "epoch": 0.7445278969957082, "grad_norm": 0.515625, "learning_rate": 4.816896473160468e-06, "loss": 2.4137, "step": 13878 }, { "epoch": 0.7445815450643777, "grad_norm": 0.486328125, "learning_rate": 4.81686383596348e-06, "loss": 2.4658, "step": 13879 }, { "epoch": 0.7446351931330472, "grad_norm": 0.4375, "learning_rate": 4.81683119596864e-06, "loss": 2.2986, "step": 13880 }, { "epoch": 0.7446888412017167, "grad_norm": 0.392578125, "learning_rate": 4.816798553175985e-06, "loss": 2.4378, "step": 13881 }, { "epoch": 0.7447424892703862, "grad_norm": 0.4375, "learning_rate": 4.816765907585556e-06, "loss": 2.2038, "step": 13882 }, { "epoch": 0.7447961373390558, "grad_norm": 0.44140625, "learning_rate": 4.816733259197391e-06, "loss": 2.4647, "step": 13883 }, { "epoch": 0.7448497854077253, "grad_norm": 0.419921875, "learning_rate": 4.816700608011531e-06, "loss": 2.4701, "step": 13884 }, { "epoch": 0.7449034334763949, "grad_norm": 0.458984375, "learning_rate": 4.816667954028014e-06, "loss": 2.5496, "step": 13885 }, { "epoch": 0.7449570815450643, "grad_norm": 0.50390625, "learning_rate": 4.816635297246881e-06, "loss": 1.9996, "step": 13886 }, { "epoch": 0.7450107296137339, "grad_norm": 0.41015625, "learning_rate": 4.81660263766817e-06, "loss": 2.3703, "step": 13887 }, { "epoch": 0.7450643776824034, "grad_norm": 0.50390625, "learning_rate": 4.816569975291921e-06, "loss": 2.3347, "step": 13888 }, { "epoch": 0.745118025751073, "grad_norm": 0.5, "learning_rate": 4.816537310118173e-06, "loss": 2.1348, "step": 13889 }, { "epoch": 0.7451716738197425, "grad_norm": 0.462890625, "learning_rate": 4.816504642146966e-06, "loss": 2.0499, "step": 13890 }, { "epoch": 0.7452253218884121, "grad_norm": 0.74609375, "learning_rate": 4.8164719713783404e-06, "loss": 2.1375, "step": 13891 }, { "epoch": 0.7452789699570815, "grad_norm": 0.455078125, "learning_rate": 4.816439297812333e-06, "loss": 2.1691, "step": 13892 }, { "epoch": 0.7453326180257511, "grad_norm": 0.404296875, "learning_rate": 4.816406621448986e-06, "loss": 2.2926, "step": 13893 }, { "epoch": 0.7453862660944206, "grad_norm": 0.6171875, "learning_rate": 4.8163739422883375e-06, "loss": 2.2391, "step": 13894 }, { "epoch": 0.7454399141630901, "grad_norm": 0.45703125, "learning_rate": 4.816341260330427e-06, "loss": 2.1602, "step": 13895 }, { "epoch": 0.7454935622317597, "grad_norm": 0.47265625, "learning_rate": 4.816308575575294e-06, "loss": 2.0994, "step": 13896 }, { "epoch": 0.7455472103004291, "grad_norm": 0.53515625, "learning_rate": 4.816275888022978e-06, "loss": 2.4502, "step": 13897 }, { "epoch": 0.7456008583690987, "grad_norm": 0.5625, "learning_rate": 4.816243197673519e-06, "loss": 2.3627, "step": 13898 }, { "epoch": 0.7456545064377682, "grad_norm": 0.52734375, "learning_rate": 4.816210504526956e-06, "loss": 2.1279, "step": 13899 }, { "epoch": 0.7457081545064378, "grad_norm": 0.423828125, "learning_rate": 4.8161778085833276e-06, "loss": 2.3916, "step": 13900 }, { "epoch": 0.7457618025751073, "grad_norm": 0.384765625, "learning_rate": 4.8161451098426745e-06, "loss": 2.1373, "step": 13901 }, { "epoch": 0.7458154506437769, "grad_norm": 0.4921875, "learning_rate": 4.816112408305037e-06, "loss": 2.2173, "step": 13902 }, { "epoch": 0.7458690987124463, "grad_norm": 0.5390625, "learning_rate": 4.816079703970452e-06, "loss": 2.0661, "step": 13903 }, { "epoch": 0.7459227467811159, "grad_norm": 0.44921875, "learning_rate": 4.8160469968389615e-06, "loss": 2.4984, "step": 13904 }, { "epoch": 0.7459763948497854, "grad_norm": 0.59375, "learning_rate": 4.816014286910604e-06, "loss": 2.3965, "step": 13905 }, { "epoch": 0.746030042918455, "grad_norm": 0.59375, "learning_rate": 4.8159815741854185e-06, "loss": 2.4537, "step": 13906 }, { "epoch": 0.7460836909871245, "grad_norm": 0.390625, "learning_rate": 4.815948858663445e-06, "loss": 2.3275, "step": 13907 }, { "epoch": 0.746137339055794, "grad_norm": 0.41796875, "learning_rate": 4.815916140344723e-06, "loss": 2.4526, "step": 13908 }, { "epoch": 0.7461909871244635, "grad_norm": 0.451171875, "learning_rate": 4.815883419229293e-06, "loss": 2.2255, "step": 13909 }, { "epoch": 0.746244635193133, "grad_norm": 0.494140625, "learning_rate": 4.815850695317191e-06, "loss": 1.7053, "step": 13910 }, { "epoch": 0.7462982832618026, "grad_norm": 0.49609375, "learning_rate": 4.815817968608461e-06, "loss": 2.5233, "step": 13911 }, { "epoch": 0.7463519313304721, "grad_norm": 0.474609375, "learning_rate": 4.81578523910314e-06, "loss": 2.2018, "step": 13912 }, { "epoch": 0.7464055793991416, "grad_norm": 0.388671875, "learning_rate": 4.815752506801269e-06, "loss": 2.2184, "step": 13913 }, { "epoch": 0.7464592274678111, "grad_norm": 0.6796875, "learning_rate": 4.815719771702885e-06, "loss": 2.3341, "step": 13914 }, { "epoch": 0.7465128755364807, "grad_norm": 0.44140625, "learning_rate": 4.815687033808031e-06, "loss": 2.1727, "step": 13915 }, { "epoch": 0.7465665236051502, "grad_norm": 0.490234375, "learning_rate": 4.815654293116743e-06, "loss": 2.1352, "step": 13916 }, { "epoch": 0.7466201716738198, "grad_norm": 0.48828125, "learning_rate": 4.815621549629063e-06, "loss": 2.5746, "step": 13917 }, { "epoch": 0.7466738197424893, "grad_norm": 0.546875, "learning_rate": 4.815588803345029e-06, "loss": 2.3728, "step": 13918 }, { "epoch": 0.7467274678111588, "grad_norm": 0.4296875, "learning_rate": 4.815556054264682e-06, "loss": 2.3041, "step": 13919 }, { "epoch": 0.7467811158798283, "grad_norm": 0.470703125, "learning_rate": 4.81552330238806e-06, "loss": 2.1806, "step": 13920 }, { "epoch": 0.7468347639484979, "grad_norm": 0.43359375, "learning_rate": 4.8154905477152046e-06, "loss": 2.3722, "step": 13921 }, { "epoch": 0.7468884120171674, "grad_norm": 0.51171875, "learning_rate": 4.815457790246153e-06, "loss": 2.4891, "step": 13922 }, { "epoch": 0.7469420600858369, "grad_norm": 0.453125, "learning_rate": 4.815425029980947e-06, "loss": 2.0073, "step": 13923 }, { "epoch": 0.7469957081545064, "grad_norm": 0.462890625, "learning_rate": 4.815392266919624e-06, "loss": 2.121, "step": 13924 }, { "epoch": 0.7470493562231759, "grad_norm": 0.4296875, "learning_rate": 4.815359501062226e-06, "loss": 2.2608, "step": 13925 }, { "epoch": 0.7471030042918455, "grad_norm": 0.443359375, "learning_rate": 4.815326732408789e-06, "loss": 2.4035, "step": 13926 }, { "epoch": 0.747156652360515, "grad_norm": 0.451171875, "learning_rate": 4.815293960959357e-06, "loss": 2.2912, "step": 13927 }, { "epoch": 0.7472103004291846, "grad_norm": 0.390625, "learning_rate": 4.815261186713966e-06, "loss": 2.5598, "step": 13928 }, { "epoch": 0.747263948497854, "grad_norm": 0.474609375, "learning_rate": 4.815228409672658e-06, "loss": 2.4549, "step": 13929 }, { "epoch": 0.7473175965665236, "grad_norm": 1.09375, "learning_rate": 4.81519562983547e-06, "loss": 1.8618, "step": 13930 }, { "epoch": 0.7473712446351931, "grad_norm": 0.474609375, "learning_rate": 4.815162847202444e-06, "loss": 2.5468, "step": 13931 }, { "epoch": 0.7474248927038627, "grad_norm": 0.431640625, "learning_rate": 4.8151300617736184e-06, "loss": 2.4941, "step": 13932 }, { "epoch": 0.7474785407725322, "grad_norm": 0.65234375, "learning_rate": 4.815097273549033e-06, "loss": 2.2823, "step": 13933 }, { "epoch": 0.7475321888412018, "grad_norm": 0.419921875, "learning_rate": 4.815064482528728e-06, "loss": 2.2091, "step": 13934 }, { "epoch": 0.7475858369098712, "grad_norm": 0.36328125, "learning_rate": 4.815031688712742e-06, "loss": 2.1119, "step": 13935 }, { "epoch": 0.7476394849785408, "grad_norm": 0.486328125, "learning_rate": 4.814998892101116e-06, "loss": 2.3697, "step": 13936 }, { "epoch": 0.7476931330472103, "grad_norm": 0.423828125, "learning_rate": 4.814966092693887e-06, "loss": 2.3832, "step": 13937 }, { "epoch": 0.7477467811158798, "grad_norm": 0.419921875, "learning_rate": 4.8149332904910975e-06, "loss": 2.3022, "step": 13938 }, { "epoch": 0.7478004291845494, "grad_norm": 0.47265625, "learning_rate": 4.814900485492785e-06, "loss": 2.3411, "step": 13939 }, { "epoch": 0.7478540772532188, "grad_norm": 0.48828125, "learning_rate": 4.814867677698991e-06, "loss": 2.4195, "step": 13940 }, { "epoch": 0.7479077253218884, "grad_norm": 0.51171875, "learning_rate": 4.814834867109753e-06, "loss": 2.3889, "step": 13941 }, { "epoch": 0.7479613733905579, "grad_norm": 0.5703125, "learning_rate": 4.8148020537251125e-06, "loss": 2.1563, "step": 13942 }, { "epoch": 0.7480150214592275, "grad_norm": 0.56640625, "learning_rate": 4.814769237545108e-06, "loss": 2.4205, "step": 13943 }, { "epoch": 0.748068669527897, "grad_norm": 0.53515625, "learning_rate": 4.81473641856978e-06, "loss": 2.244, "step": 13944 }, { "epoch": 0.7481223175965666, "grad_norm": 0.5703125, "learning_rate": 4.814703596799168e-06, "loss": 2.1805, "step": 13945 }, { "epoch": 0.748175965665236, "grad_norm": 0.33984375, "learning_rate": 4.814670772233309e-06, "loss": 2.0767, "step": 13946 }, { "epoch": 0.7482296137339056, "grad_norm": 0.515625, "learning_rate": 4.8146379448722475e-06, "loss": 2.1455, "step": 13947 }, { "epoch": 0.7482832618025751, "grad_norm": 0.4375, "learning_rate": 4.814605114716019e-06, "loss": 2.2332, "step": 13948 }, { "epoch": 0.7483369098712447, "grad_norm": 0.408203125, "learning_rate": 4.814572281764666e-06, "loss": 2.1744, "step": 13949 }, { "epoch": 0.7483905579399142, "grad_norm": 0.6484375, "learning_rate": 4.814539446018226e-06, "loss": 2.3165, "step": 13950 }, { "epoch": 0.7484442060085837, "grad_norm": 0.421875, "learning_rate": 4.814506607476739e-06, "loss": 2.3046, "step": 13951 }, { "epoch": 0.7484978540772532, "grad_norm": 0.4765625, "learning_rate": 4.814473766140246e-06, "loss": 2.6923, "step": 13952 }, { "epoch": 0.7485515021459227, "grad_norm": 0.427734375, "learning_rate": 4.8144409220087854e-06, "loss": 2.2799, "step": 13953 }, { "epoch": 0.7486051502145923, "grad_norm": 0.453125, "learning_rate": 4.8144080750823974e-06, "loss": 2.2894, "step": 13954 }, { "epoch": 0.7486587982832618, "grad_norm": 0.8203125, "learning_rate": 4.814375225361122e-06, "loss": 2.4685, "step": 13955 }, { "epoch": 0.7487124463519313, "grad_norm": 0.455078125, "learning_rate": 4.814342372844998e-06, "loss": 2.4229, "step": 13956 }, { "epoch": 0.7487660944206008, "grad_norm": 0.380859375, "learning_rate": 4.814309517534065e-06, "loss": 2.2475, "step": 13957 }, { "epoch": 0.7488197424892704, "grad_norm": 0.4921875, "learning_rate": 4.814276659428363e-06, "loss": 2.0739, "step": 13958 }, { "epoch": 0.7488733905579399, "grad_norm": 0.47265625, "learning_rate": 4.814243798527933e-06, "loss": 2.147, "step": 13959 }, { "epoch": 0.7489270386266095, "grad_norm": 0.416015625, "learning_rate": 4.814210934832813e-06, "loss": 2.2096, "step": 13960 }, { "epoch": 0.748980686695279, "grad_norm": 0.5390625, "learning_rate": 4.8141780683430425e-06, "loss": 2.4858, "step": 13961 }, { "epoch": 0.7490343347639485, "grad_norm": 0.69921875, "learning_rate": 4.814145199058663e-06, "loss": 1.3965, "step": 13962 }, { "epoch": 0.749087982832618, "grad_norm": 0.376953125, "learning_rate": 4.8141123269797124e-06, "loss": 2.2045, "step": 13963 }, { "epoch": 0.7491416309012876, "grad_norm": 0.490234375, "learning_rate": 4.814079452106232e-06, "loss": 2.2734, "step": 13964 }, { "epoch": 0.7491952789699571, "grad_norm": 0.49609375, "learning_rate": 4.814046574438259e-06, "loss": 2.4053, "step": 13965 }, { "epoch": 0.7492489270386266, "grad_norm": 0.427734375, "learning_rate": 4.814013693975836e-06, "loss": 2.647, "step": 13966 }, { "epoch": 0.7493025751072961, "grad_norm": 0.388671875, "learning_rate": 4.813980810719001e-06, "loss": 1.8591, "step": 13967 }, { "epoch": 0.7493562231759656, "grad_norm": 0.4375, "learning_rate": 4.8139479246677935e-06, "loss": 2.0673, "step": 13968 }, { "epoch": 0.7494098712446352, "grad_norm": 0.443359375, "learning_rate": 4.813915035822254e-06, "loss": 1.8195, "step": 13969 }, { "epoch": 0.7494635193133047, "grad_norm": 0.703125, "learning_rate": 4.813882144182423e-06, "loss": 2.3939, "step": 13970 }, { "epoch": 0.7495171673819743, "grad_norm": 0.453125, "learning_rate": 4.813849249748338e-06, "loss": 2.3576, "step": 13971 }, { "epoch": 0.7495708154506437, "grad_norm": 0.45703125, "learning_rate": 4.813816352520041e-06, "loss": 2.1827, "step": 13972 }, { "epoch": 0.7496244635193133, "grad_norm": 0.62109375, "learning_rate": 4.81378345249757e-06, "loss": 2.4203, "step": 13973 }, { "epoch": 0.7496781115879828, "grad_norm": 0.51171875, "learning_rate": 4.813750549680966e-06, "loss": 2.3616, "step": 13974 }, { "epoch": 0.7497317596566524, "grad_norm": 0.4765625, "learning_rate": 4.813717644070268e-06, "loss": 2.12, "step": 13975 }, { "epoch": 0.7497854077253219, "grad_norm": 0.50390625, "learning_rate": 4.813684735665516e-06, "loss": 2.2397, "step": 13976 }, { "epoch": 0.7498390557939915, "grad_norm": 0.5390625, "learning_rate": 4.81365182446675e-06, "loss": 2.1905, "step": 13977 }, { "epoch": 0.7498927038626609, "grad_norm": 0.357421875, "learning_rate": 4.813618910474008e-06, "loss": 2.0472, "step": 13978 }, { "epoch": 0.7499463519313305, "grad_norm": 0.46484375, "learning_rate": 4.813585993687332e-06, "loss": 2.4517, "step": 13979 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 4.813553074106761e-06, "loss": 2.4689, "step": 13980 }, { "epoch": 0.7500536480686695, "grad_norm": 0.4375, "learning_rate": 4.813520151732334e-06, "loss": 2.3643, "step": 13981 }, { "epoch": 0.7501072961373391, "grad_norm": 0.4609375, "learning_rate": 4.8134872265640915e-06, "loss": 2.3755, "step": 13982 }, { "epoch": 0.7501609442060085, "grad_norm": 0.431640625, "learning_rate": 4.813454298602074e-06, "loss": 2.4076, "step": 13983 }, { "epoch": 0.7502145922746781, "grad_norm": 0.41796875, "learning_rate": 4.81342136784632e-06, "loss": 2.3618, "step": 13984 }, { "epoch": 0.7502682403433476, "grad_norm": 0.447265625, "learning_rate": 4.8133884342968695e-06, "loss": 2.3422, "step": 13985 }, { "epoch": 0.7503218884120172, "grad_norm": 0.49609375, "learning_rate": 4.8133554979537635e-06, "loss": 2.556, "step": 13986 }, { "epoch": 0.7503755364806867, "grad_norm": 0.49609375, "learning_rate": 4.8133225588170395e-06, "loss": 2.2921, "step": 13987 }, { "epoch": 0.7504291845493563, "grad_norm": 0.49609375, "learning_rate": 4.813289616886738e-06, "loss": 1.9726, "step": 13988 }, { "epoch": 0.7504828326180257, "grad_norm": 0.43359375, "learning_rate": 4.8132566721629e-06, "loss": 2.2608, "step": 13989 }, { "epoch": 0.7505364806866953, "grad_norm": 0.458984375, "learning_rate": 4.813223724645565e-06, "loss": 2.3391, "step": 13990 }, { "epoch": 0.7505901287553648, "grad_norm": 0.45703125, "learning_rate": 4.8131907743347725e-06, "loss": 2.2286, "step": 13991 }, { "epoch": 0.7506437768240344, "grad_norm": 0.4765625, "learning_rate": 4.813157821230562e-06, "loss": 2.267, "step": 13992 }, { "epoch": 0.7506974248927039, "grad_norm": 2.71875, "learning_rate": 4.813124865332973e-06, "loss": 2.3886, "step": 13993 }, { "epoch": 0.7507510729613734, "grad_norm": 0.4921875, "learning_rate": 4.813091906642046e-06, "loss": 2.3617, "step": 13994 }, { "epoch": 0.7508047210300429, "grad_norm": 0.42578125, "learning_rate": 4.81305894515782e-06, "loss": 2.1266, "step": 13995 }, { "epoch": 0.7508583690987124, "grad_norm": 0.44921875, "learning_rate": 4.813025980880336e-06, "loss": 2.4849, "step": 13996 }, { "epoch": 0.750912017167382, "grad_norm": 0.458984375, "learning_rate": 4.8129930138096325e-06, "loss": 2.3139, "step": 13997 }, { "epoch": 0.7509656652360515, "grad_norm": 0.482421875, "learning_rate": 4.812960043945751e-06, "loss": 2.1897, "step": 13998 }, { "epoch": 0.751019313304721, "grad_norm": 0.412109375, "learning_rate": 4.8129270712887296e-06, "loss": 2.1273, "step": 13999 }, { "epoch": 0.7510729613733905, "grad_norm": 0.47265625, "learning_rate": 4.812894095838609e-06, "loss": 2.2879, "step": 14000 }, { "epoch": 0.7511266094420601, "grad_norm": 0.41796875, "learning_rate": 4.812861117595429e-06, "loss": 2.3241, "step": 14001 }, { "epoch": 0.7511802575107296, "grad_norm": 0.408203125, "learning_rate": 4.812828136559228e-06, "loss": 2.1914, "step": 14002 }, { "epoch": 0.7512339055793992, "grad_norm": 0.42578125, "learning_rate": 4.8127951527300485e-06, "loss": 2.1414, "step": 14003 }, { "epoch": 0.7512875536480687, "grad_norm": 0.419921875, "learning_rate": 4.812762166107928e-06, "loss": 2.154, "step": 14004 }, { "epoch": 0.7513412017167382, "grad_norm": 0.44921875, "learning_rate": 4.812729176692908e-06, "loss": 2.2789, "step": 14005 }, { "epoch": 0.7513948497854077, "grad_norm": 0.447265625, "learning_rate": 4.812696184485027e-06, "loss": 2.5194, "step": 14006 }, { "epoch": 0.7514484978540773, "grad_norm": 0.482421875, "learning_rate": 4.8126631894843265e-06, "loss": 2.3426, "step": 14007 }, { "epoch": 0.7515021459227468, "grad_norm": 0.5078125, "learning_rate": 4.812630191690844e-06, "loss": 2.2453, "step": 14008 }, { "epoch": 0.7515557939914163, "grad_norm": 0.4609375, "learning_rate": 4.812597191104621e-06, "loss": 2.358, "step": 14009 }, { "epoch": 0.7516094420600858, "grad_norm": 0.43359375, "learning_rate": 4.812564187725696e-06, "loss": 2.1281, "step": 14010 }, { "epoch": 0.7516630901287553, "grad_norm": 0.462890625, "learning_rate": 4.8125311815541124e-06, "loss": 2.3356, "step": 14011 }, { "epoch": 0.7517167381974249, "grad_norm": 0.44921875, "learning_rate": 4.812498172589906e-06, "loss": 2.5792, "step": 14012 }, { "epoch": 0.7517703862660944, "grad_norm": 0.490234375, "learning_rate": 4.812465160833118e-06, "loss": 2.2613, "step": 14013 }, { "epoch": 0.751824034334764, "grad_norm": 0.458984375, "learning_rate": 4.8124321462837884e-06, "loss": 2.3402, "step": 14014 }, { "epoch": 0.7518776824034334, "grad_norm": 0.392578125, "learning_rate": 4.812399128941957e-06, "loss": 2.2156, "step": 14015 }, { "epoch": 0.751931330472103, "grad_norm": 0.484375, "learning_rate": 4.812366108807663e-06, "loss": 2.4178, "step": 14016 }, { "epoch": 0.7519849785407725, "grad_norm": 0.51171875, "learning_rate": 4.812333085880948e-06, "loss": 2.385, "step": 14017 }, { "epoch": 0.7520386266094421, "grad_norm": 0.455078125, "learning_rate": 4.812300060161852e-06, "loss": 2.2388, "step": 14018 }, { "epoch": 0.7520922746781116, "grad_norm": 0.44140625, "learning_rate": 4.812267031650412e-06, "loss": 2.2034, "step": 14019 }, { "epoch": 0.7521459227467812, "grad_norm": 0.4375, "learning_rate": 4.81223400034667e-06, "loss": 2.392, "step": 14020 }, { "epoch": 0.7521995708154506, "grad_norm": 0.466796875, "learning_rate": 4.8122009662506665e-06, "loss": 2.2397, "step": 14021 }, { "epoch": 0.7522532188841202, "grad_norm": 0.453125, "learning_rate": 4.81216792936244e-06, "loss": 2.3948, "step": 14022 }, { "epoch": 0.7523068669527897, "grad_norm": 0.57421875, "learning_rate": 4.81213488968203e-06, "loss": 2.0378, "step": 14023 }, { "epoch": 0.7523605150214592, "grad_norm": 0.431640625, "learning_rate": 4.8121018472094785e-06, "loss": 2.1458, "step": 14024 }, { "epoch": 0.7524141630901288, "grad_norm": 0.458984375, "learning_rate": 4.812068801944823e-06, "loss": 2.4432, "step": 14025 }, { "epoch": 0.7524678111587982, "grad_norm": 0.5078125, "learning_rate": 4.812035753888105e-06, "loss": 2.2452, "step": 14026 }, { "epoch": 0.7525214592274678, "grad_norm": 0.451171875, "learning_rate": 4.812002703039364e-06, "loss": 2.6156, "step": 14027 }, { "epoch": 0.7525751072961373, "grad_norm": 0.486328125, "learning_rate": 4.81196964939864e-06, "loss": 2.3919, "step": 14028 }, { "epoch": 0.7526287553648069, "grad_norm": 0.474609375, "learning_rate": 4.811936592965973e-06, "loss": 2.2183, "step": 14029 }, { "epoch": 0.7526824034334764, "grad_norm": 0.498046875, "learning_rate": 4.8119035337414025e-06, "loss": 2.2745, "step": 14030 }, { "epoch": 0.752736051502146, "grad_norm": 0.419921875, "learning_rate": 4.811870471724968e-06, "loss": 2.3133, "step": 14031 }, { "epoch": 0.7527896995708154, "grad_norm": 0.419921875, "learning_rate": 4.8118374069167105e-06, "loss": 2.3495, "step": 14032 }, { "epoch": 0.752843347639485, "grad_norm": 0.447265625, "learning_rate": 4.81180433931667e-06, "loss": 2.4492, "step": 14033 }, { "epoch": 0.7528969957081545, "grad_norm": 0.470703125, "learning_rate": 4.811771268924885e-06, "loss": 2.1065, "step": 14034 }, { "epoch": 0.7529506437768241, "grad_norm": 0.55859375, "learning_rate": 4.811738195741397e-06, "loss": 2.262, "step": 14035 }, { "epoch": 0.7530042918454936, "grad_norm": 0.4921875, "learning_rate": 4.811705119766245e-06, "loss": 2.213, "step": 14036 }, { "epoch": 0.7530579399141631, "grad_norm": 0.376953125, "learning_rate": 4.8116720409994695e-06, "loss": 2.161, "step": 14037 }, { "epoch": 0.7531115879828326, "grad_norm": 0.43359375, "learning_rate": 4.8116389594411096e-06, "loss": 2.0204, "step": 14038 }, { "epoch": 0.7531652360515021, "grad_norm": 0.61328125, "learning_rate": 4.811605875091207e-06, "loss": 2.4405, "step": 14039 }, { "epoch": 0.7532188841201717, "grad_norm": 0.46484375, "learning_rate": 4.8115727879497995e-06, "loss": 2.4423, "step": 14040 }, { "epoch": 0.7532725321888412, "grad_norm": 0.4296875, "learning_rate": 4.811539698016928e-06, "loss": 2.1276, "step": 14041 }, { "epoch": 0.7533261802575107, "grad_norm": 0.5078125, "learning_rate": 4.811506605292633e-06, "loss": 2.5342, "step": 14042 }, { "epoch": 0.7533798283261802, "grad_norm": 0.443359375, "learning_rate": 4.811473509776954e-06, "loss": 2.3215, "step": 14043 }, { "epoch": 0.7534334763948498, "grad_norm": 0.435546875, "learning_rate": 4.81144041146993e-06, "loss": 2.2913, "step": 14044 }, { "epoch": 0.7534871244635193, "grad_norm": 0.421875, "learning_rate": 4.811407310371603e-06, "loss": 2.3728, "step": 14045 }, { "epoch": 0.7535407725321889, "grad_norm": 0.46484375, "learning_rate": 4.811374206482011e-06, "loss": 2.2066, "step": 14046 }, { "epoch": 0.7535944206008584, "grad_norm": 0.482421875, "learning_rate": 4.811341099801196e-06, "loss": 2.3001, "step": 14047 }, { "epoch": 0.7536480686695279, "grad_norm": 0.44921875, "learning_rate": 4.8113079903291955e-06, "loss": 2.1416, "step": 14048 }, { "epoch": 0.7537017167381974, "grad_norm": 0.416015625, "learning_rate": 4.811274878066051e-06, "loss": 2.3438, "step": 14049 }, { "epoch": 0.753755364806867, "grad_norm": 0.4609375, "learning_rate": 4.811241763011803e-06, "loss": 2.394, "step": 14050 }, { "epoch": 0.7538090128755365, "grad_norm": 0.51171875, "learning_rate": 4.811208645166491e-06, "loss": 2.6688, "step": 14051 }, { "epoch": 0.753862660944206, "grad_norm": 0.63671875, "learning_rate": 4.811175524530154e-06, "loss": 2.2813, "step": 14052 }, { "epoch": 0.7539163090128755, "grad_norm": 0.8671875, "learning_rate": 4.811142401102833e-06, "loss": 2.3258, "step": 14053 }, { "epoch": 0.753969957081545, "grad_norm": 0.431640625, "learning_rate": 4.8111092748845675e-06, "loss": 2.1272, "step": 14054 }, { "epoch": 0.7540236051502146, "grad_norm": 0.40625, "learning_rate": 4.811076145875398e-06, "loss": 2.1293, "step": 14055 }, { "epoch": 0.7540772532188841, "grad_norm": 0.4140625, "learning_rate": 4.811043014075365e-06, "loss": 1.9424, "step": 14056 }, { "epoch": 0.7541309012875537, "grad_norm": 0.6640625, "learning_rate": 4.811009879484506e-06, "loss": 1.1301, "step": 14057 }, { "epoch": 0.7541845493562231, "grad_norm": 0.439453125, "learning_rate": 4.810976742102864e-06, "loss": 2.2612, "step": 14058 }, { "epoch": 0.7542381974248927, "grad_norm": 0.52734375, "learning_rate": 4.810943601930479e-06, "loss": 2.476, "step": 14059 }, { "epoch": 0.7542918454935622, "grad_norm": 0.427734375, "learning_rate": 4.810910458967388e-06, "loss": 2.0257, "step": 14060 }, { "epoch": 0.7543454935622318, "grad_norm": 0.69140625, "learning_rate": 4.810877313213634e-06, "loss": 2.279, "step": 14061 }, { "epoch": 0.7543991416309013, "grad_norm": 0.42578125, "learning_rate": 4.8108441646692546e-06, "loss": 2.5204, "step": 14062 }, { "epoch": 0.7544527896995709, "grad_norm": 0.53515625, "learning_rate": 4.810811013334292e-06, "loss": 2.2321, "step": 14063 }, { "epoch": 0.7545064377682403, "grad_norm": 0.4296875, "learning_rate": 4.810777859208786e-06, "loss": 2.4926, "step": 14064 }, { "epoch": 0.7545600858369099, "grad_norm": 0.376953125, "learning_rate": 4.810744702292775e-06, "loss": 1.9745, "step": 14065 }, { "epoch": 0.7546137339055794, "grad_norm": 0.5234375, "learning_rate": 4.8107115425863e-06, "loss": 2.2391, "step": 14066 }, { "epoch": 0.7546673819742489, "grad_norm": 0.466796875, "learning_rate": 4.810678380089402e-06, "loss": 2.4559, "step": 14067 }, { "epoch": 0.7547210300429185, "grad_norm": 0.486328125, "learning_rate": 4.81064521480212e-06, "loss": 2.3848, "step": 14068 }, { "epoch": 0.7547746781115879, "grad_norm": 0.51953125, "learning_rate": 4.8106120467244936e-06, "loss": 2.4061, "step": 14069 }, { "epoch": 0.7548283261802575, "grad_norm": 0.4296875, "learning_rate": 4.810578875856563e-06, "loss": 2.2843, "step": 14070 }, { "epoch": 0.754881974248927, "grad_norm": 0.37109375, "learning_rate": 4.810545702198369e-06, "loss": 2.1866, "step": 14071 }, { "epoch": 0.7549356223175966, "grad_norm": 0.50390625, "learning_rate": 4.8105125257499515e-06, "loss": 2.4117, "step": 14072 }, { "epoch": 0.7549892703862661, "grad_norm": 0.482421875, "learning_rate": 4.810479346511351e-06, "loss": 2.1396, "step": 14073 }, { "epoch": 0.7550429184549357, "grad_norm": 0.4140625, "learning_rate": 4.810446164482607e-06, "loss": 2.3628, "step": 14074 }, { "epoch": 0.7550965665236051, "grad_norm": 0.421875, "learning_rate": 4.810412979663759e-06, "loss": 2.152, "step": 14075 }, { "epoch": 0.7551502145922747, "grad_norm": 0.404296875, "learning_rate": 4.810379792054847e-06, "loss": 2.3851, "step": 14076 }, { "epoch": 0.7552038626609442, "grad_norm": 0.61328125, "learning_rate": 4.810346601655913e-06, "loss": 2.0904, "step": 14077 }, { "epoch": 0.7552575107296138, "grad_norm": 0.400390625, "learning_rate": 4.810313408466995e-06, "loss": 2.3592, "step": 14078 }, { "epoch": 0.7553111587982833, "grad_norm": 0.4453125, "learning_rate": 4.810280212488134e-06, "loss": 2.16, "step": 14079 }, { "epoch": 0.7553648068669528, "grad_norm": 0.5546875, "learning_rate": 4.81024701371937e-06, "loss": 2.4642, "step": 14080 }, { "epoch": 0.7554184549356223, "grad_norm": 0.37890625, "learning_rate": 4.810213812160743e-06, "loss": 2.202, "step": 14081 }, { "epoch": 0.7554721030042918, "grad_norm": 0.4765625, "learning_rate": 4.810180607812293e-06, "loss": 2.3982, "step": 14082 }, { "epoch": 0.7555257510729614, "grad_norm": 0.58203125, "learning_rate": 4.81014740067406e-06, "loss": 2.2093, "step": 14083 }, { "epoch": 0.7555793991416309, "grad_norm": 0.490234375, "learning_rate": 4.810114190746086e-06, "loss": 2.2817, "step": 14084 }, { "epoch": 0.7556330472103004, "grad_norm": 0.50390625, "learning_rate": 4.810080978028407e-06, "loss": 2.4462, "step": 14085 }, { "epoch": 0.7556866952789699, "grad_norm": 0.412109375, "learning_rate": 4.8100477625210675e-06, "loss": 2.3183, "step": 14086 }, { "epoch": 0.7557403433476395, "grad_norm": 0.5859375, "learning_rate": 4.8100145442241045e-06, "loss": 2.1836, "step": 14087 }, { "epoch": 0.755793991416309, "grad_norm": 0.484375, "learning_rate": 4.8099813231375605e-06, "loss": 2.3118, "step": 14088 }, { "epoch": 0.7558476394849786, "grad_norm": 0.453125, "learning_rate": 4.8099480992614736e-06, "loss": 2.3802, "step": 14089 }, { "epoch": 0.755901287553648, "grad_norm": 0.5, "learning_rate": 4.809914872595884e-06, "loss": 2.3978, "step": 14090 }, { "epoch": 0.7559549356223176, "grad_norm": 0.474609375, "learning_rate": 4.8098816431408335e-06, "loss": 2.2524, "step": 14091 }, { "epoch": 0.7560085836909871, "grad_norm": 0.498046875, "learning_rate": 4.809848410896361e-06, "loss": 2.3304, "step": 14092 }, { "epoch": 0.7560622317596567, "grad_norm": 0.94921875, "learning_rate": 4.809815175862506e-06, "loss": 2.2848, "step": 14093 }, { "epoch": 0.7561158798283262, "grad_norm": 0.64453125, "learning_rate": 4.809781938039312e-06, "loss": 2.3018, "step": 14094 }, { "epoch": 0.7561695278969958, "grad_norm": 0.3828125, "learning_rate": 4.809748697426815e-06, "loss": 1.9415, "step": 14095 }, { "epoch": 0.7562231759656652, "grad_norm": 0.48046875, "learning_rate": 4.809715454025057e-06, "loss": 2.3536, "step": 14096 }, { "epoch": 0.7562768240343347, "grad_norm": 0.453125, "learning_rate": 4.809682207834078e-06, "loss": 2.3269, "step": 14097 }, { "epoch": 0.7563304721030043, "grad_norm": 1.7578125, "learning_rate": 4.809648958853917e-06, "loss": 2.3025, "step": 14098 }, { "epoch": 0.7563841201716738, "grad_norm": 0.453125, "learning_rate": 4.809615707084617e-06, "loss": 2.2556, "step": 14099 }, { "epoch": 0.7564377682403434, "grad_norm": 0.62890625, "learning_rate": 4.809582452526216e-06, "loss": 2.2544, "step": 14100 }, { "epoch": 0.7564914163090128, "grad_norm": 0.63671875, "learning_rate": 4.8095491951787546e-06, "loss": 2.3006, "step": 14101 }, { "epoch": 0.7565450643776824, "grad_norm": 0.375, "learning_rate": 4.809515935042272e-06, "loss": 2.0508, "step": 14102 }, { "epoch": 0.7565987124463519, "grad_norm": 0.41796875, "learning_rate": 4.809482672116809e-06, "loss": 2.2786, "step": 14103 }, { "epoch": 0.7566523605150215, "grad_norm": 0.5703125, "learning_rate": 4.8094494064024075e-06, "loss": 2.2618, "step": 14104 }, { "epoch": 0.756706008583691, "grad_norm": 0.52734375, "learning_rate": 4.8094161378991065e-06, "loss": 2.3999, "step": 14105 }, { "epoch": 0.7567596566523606, "grad_norm": 2.078125, "learning_rate": 4.809382866606945e-06, "loss": 2.3501, "step": 14106 }, { "epoch": 0.75681330472103, "grad_norm": 0.466796875, "learning_rate": 4.8093495925259644e-06, "loss": 2.1464, "step": 14107 }, { "epoch": 0.7568669527896996, "grad_norm": 0.498046875, "learning_rate": 4.809316315656204e-06, "loss": 2.0855, "step": 14108 }, { "epoch": 0.7569206008583691, "grad_norm": 0.4453125, "learning_rate": 4.809283035997705e-06, "loss": 2.2234, "step": 14109 }, { "epoch": 0.7569742489270386, "grad_norm": 0.53515625, "learning_rate": 4.809249753550508e-06, "loss": 2.2656, "step": 14110 }, { "epoch": 0.7570278969957082, "grad_norm": 0.416015625, "learning_rate": 4.809216468314651e-06, "loss": 2.2991, "step": 14111 }, { "epoch": 0.7570815450643776, "grad_norm": 0.71875, "learning_rate": 4.8091831802901766e-06, "loss": 2.2017, "step": 14112 }, { "epoch": 0.7571351931330472, "grad_norm": 0.5078125, "learning_rate": 4.809149889477123e-06, "loss": 2.3404, "step": 14113 }, { "epoch": 0.7571888412017167, "grad_norm": 0.47265625, "learning_rate": 4.809116595875532e-06, "loss": 2.3371, "step": 14114 }, { "epoch": 0.7572424892703863, "grad_norm": 0.5234375, "learning_rate": 4.809083299485442e-06, "loss": 2.3005, "step": 14115 }, { "epoch": 0.7572961373390558, "grad_norm": 0.4453125, "learning_rate": 4.8090500003068954e-06, "loss": 2.373, "step": 14116 }, { "epoch": 0.7573497854077254, "grad_norm": 0.462890625, "learning_rate": 4.8090166983399315e-06, "loss": 2.1256, "step": 14117 }, { "epoch": 0.7574034334763948, "grad_norm": 0.40234375, "learning_rate": 4.808983393584589e-06, "loss": 2.2335, "step": 14118 }, { "epoch": 0.7574570815450644, "grad_norm": 0.66015625, "learning_rate": 4.808950086040911e-06, "loss": 2.6147, "step": 14119 }, { "epoch": 0.7575107296137339, "grad_norm": 0.462890625, "learning_rate": 4.808916775708936e-06, "loss": 2.0911, "step": 14120 }, { "epoch": 0.7575643776824035, "grad_norm": 0.453125, "learning_rate": 4.808883462588705e-06, "loss": 2.6301, "step": 14121 }, { "epoch": 0.757618025751073, "grad_norm": 0.37890625, "learning_rate": 4.808850146680256e-06, "loss": 2.4974, "step": 14122 }, { "epoch": 0.7576716738197425, "grad_norm": 0.443359375, "learning_rate": 4.808816827983632e-06, "loss": 2.5323, "step": 14123 }, { "epoch": 0.757725321888412, "grad_norm": 0.5, "learning_rate": 4.8087835064988725e-06, "loss": 2.2834, "step": 14124 }, { "epoch": 0.7577789699570815, "grad_norm": 0.458984375, "learning_rate": 4.808750182226017e-06, "loss": 1.8735, "step": 14125 }, { "epoch": 0.7578326180257511, "grad_norm": 0.4375, "learning_rate": 4.808716855165105e-06, "loss": 2.2493, "step": 14126 }, { "epoch": 0.7578862660944206, "grad_norm": 0.474609375, "learning_rate": 4.808683525316179e-06, "loss": 2.3412, "step": 14127 }, { "epoch": 0.7579399141630901, "grad_norm": 0.42578125, "learning_rate": 4.808650192679278e-06, "loss": 2.334, "step": 14128 }, { "epoch": 0.7579935622317596, "grad_norm": 0.482421875, "learning_rate": 4.808616857254442e-06, "loss": 2.3732, "step": 14129 }, { "epoch": 0.7580472103004292, "grad_norm": 0.5625, "learning_rate": 4.808583519041713e-06, "loss": 2.5481, "step": 14130 }, { "epoch": 0.7581008583690987, "grad_norm": 0.447265625, "learning_rate": 4.808550178041129e-06, "loss": 2.0894, "step": 14131 }, { "epoch": 0.7581545064377683, "grad_norm": 0.484375, "learning_rate": 4.808516834252731e-06, "loss": 2.4123, "step": 14132 }, { "epoch": 0.7582081545064377, "grad_norm": 0.443359375, "learning_rate": 4.80848348767656e-06, "loss": 2.2109, "step": 14133 }, { "epoch": 0.7582618025751073, "grad_norm": 0.41015625, "learning_rate": 4.808450138312655e-06, "loss": 2.4419, "step": 14134 }, { "epoch": 0.7583154506437768, "grad_norm": 2.046875, "learning_rate": 4.808416786161057e-06, "loss": 2.2633, "step": 14135 }, { "epoch": 0.7583690987124464, "grad_norm": 0.42578125, "learning_rate": 4.808383431221806e-06, "loss": 2.2984, "step": 14136 }, { "epoch": 0.7584227467811159, "grad_norm": 0.44140625, "learning_rate": 4.8083500734949426e-06, "loss": 2.1032, "step": 14137 }, { "epoch": 0.7584763948497855, "grad_norm": 0.4375, "learning_rate": 4.808316712980507e-06, "loss": 2.2689, "step": 14138 }, { "epoch": 0.7585300429184549, "grad_norm": 0.396484375, "learning_rate": 4.80828334967854e-06, "loss": 2.039, "step": 14139 }, { "epoch": 0.7585836909871244, "grad_norm": 0.5234375, "learning_rate": 4.8082499835890815e-06, "loss": 2.314, "step": 14140 }, { "epoch": 0.758637339055794, "grad_norm": 0.462890625, "learning_rate": 4.808216614712171e-06, "loss": 2.3095, "step": 14141 }, { "epoch": 0.7586909871244635, "grad_norm": 0.482421875, "learning_rate": 4.80818324304785e-06, "loss": 2.5406, "step": 14142 }, { "epoch": 0.7587446351931331, "grad_norm": 0.484375, "learning_rate": 4.808149868596158e-06, "loss": 2.3516, "step": 14143 }, { "epoch": 0.7587982832618025, "grad_norm": 0.484375, "learning_rate": 4.808116491357136e-06, "loss": 2.4397, "step": 14144 }, { "epoch": 0.7588519313304721, "grad_norm": 0.494140625, "learning_rate": 4.808083111330823e-06, "loss": 2.3361, "step": 14145 }, { "epoch": 0.7589055793991416, "grad_norm": 0.423828125, "learning_rate": 4.8080497285172605e-06, "loss": 2.4435, "step": 14146 }, { "epoch": 0.7589592274678112, "grad_norm": 0.49609375, "learning_rate": 4.8080163429164885e-06, "loss": 2.4039, "step": 14147 }, { "epoch": 0.7590128755364807, "grad_norm": 0.4765625, "learning_rate": 4.807982954528548e-06, "loss": 2.2738, "step": 14148 }, { "epoch": 0.7590665236051503, "grad_norm": 0.46875, "learning_rate": 4.8079495633534775e-06, "loss": 2.0218, "step": 14149 }, { "epoch": 0.7591201716738197, "grad_norm": 0.447265625, "learning_rate": 4.80791616939132e-06, "loss": 2.3487, "step": 14150 }, { "epoch": 0.7591738197424893, "grad_norm": 0.94140625, "learning_rate": 4.807882772642113e-06, "loss": 1.9233, "step": 14151 }, { "epoch": 0.7592274678111588, "grad_norm": 0.4375, "learning_rate": 4.807849373105898e-06, "loss": 2.1726, "step": 14152 }, { "epoch": 0.7592811158798283, "grad_norm": 0.447265625, "learning_rate": 4.807815970782717e-06, "loss": 2.065, "step": 14153 }, { "epoch": 0.7593347639484979, "grad_norm": 0.46484375, "learning_rate": 4.807782565672607e-06, "loss": 2.3174, "step": 14154 }, { "epoch": 0.7593884120171673, "grad_norm": 0.4609375, "learning_rate": 4.807749157775611e-06, "loss": 2.1846, "step": 14155 }, { "epoch": 0.7594420600858369, "grad_norm": 0.5703125, "learning_rate": 4.807715747091768e-06, "loss": 2.2724, "step": 14156 }, { "epoch": 0.7594957081545064, "grad_norm": 0.490234375, "learning_rate": 4.807682333621119e-06, "loss": 2.1739, "step": 14157 }, { "epoch": 0.759549356223176, "grad_norm": 0.431640625, "learning_rate": 4.807648917363704e-06, "loss": 2.1707, "step": 14158 }, { "epoch": 0.7596030042918455, "grad_norm": 1.21875, "learning_rate": 4.807615498319565e-06, "loss": 1.9428, "step": 14159 }, { "epoch": 0.759656652360515, "grad_norm": 0.5390625, "learning_rate": 4.807582076488739e-06, "loss": 2.3239, "step": 14160 }, { "epoch": 0.7597103004291845, "grad_norm": 0.44921875, "learning_rate": 4.807548651871269e-06, "loss": 2.2713, "step": 14161 }, { "epoch": 0.7597639484978541, "grad_norm": 0.435546875, "learning_rate": 4.807515224467194e-06, "loss": 2.2577, "step": 14162 }, { "epoch": 0.7598175965665236, "grad_norm": 0.390625, "learning_rate": 4.807481794276555e-06, "loss": 2.1032, "step": 14163 }, { "epoch": 0.7598712446351932, "grad_norm": 0.7265625, "learning_rate": 4.8074483612993934e-06, "loss": 2.216, "step": 14164 }, { "epoch": 0.7599248927038627, "grad_norm": 0.400390625, "learning_rate": 4.807414925535748e-06, "loss": 1.8083, "step": 14165 }, { "epoch": 0.7599785407725322, "grad_norm": 0.388671875, "learning_rate": 4.807381486985659e-06, "loss": 2.2407, "step": 14166 }, { "epoch": 0.7600321888412017, "grad_norm": 0.46484375, "learning_rate": 4.807348045649168e-06, "loss": 2.3244, "step": 14167 }, { "epoch": 0.7600858369098712, "grad_norm": 0.447265625, "learning_rate": 4.807314601526315e-06, "loss": 2.5791, "step": 14168 }, { "epoch": 0.7601394849785408, "grad_norm": 0.5, "learning_rate": 4.80728115461714e-06, "loss": 2.245, "step": 14169 }, { "epoch": 0.7601931330472103, "grad_norm": 0.494140625, "learning_rate": 4.807247704921685e-06, "loss": 2.5717, "step": 14170 }, { "epoch": 0.7602467811158798, "grad_norm": 0.6953125, "learning_rate": 4.807214252439988e-06, "loss": 2.4851, "step": 14171 }, { "epoch": 0.7603004291845493, "grad_norm": 0.4296875, "learning_rate": 4.80718079717209e-06, "loss": 2.3482, "step": 14172 }, { "epoch": 0.7603540772532189, "grad_norm": 0.404296875, "learning_rate": 4.8071473391180315e-06, "loss": 1.9565, "step": 14173 }, { "epoch": 0.7604077253218884, "grad_norm": 0.44140625, "learning_rate": 4.807113878277855e-06, "loss": 2.2972, "step": 14174 }, { "epoch": 0.760461373390558, "grad_norm": 0.50390625, "learning_rate": 4.807080414651597e-06, "loss": 1.938, "step": 14175 }, { "epoch": 0.7605150214592274, "grad_norm": 0.8203125, "learning_rate": 4.8070469482393016e-06, "loss": 1.4477, "step": 14176 }, { "epoch": 0.760568669527897, "grad_norm": 0.458984375, "learning_rate": 4.807013479041006e-06, "loss": 2.0543, "step": 14177 }, { "epoch": 0.7606223175965665, "grad_norm": 0.458984375, "learning_rate": 4.806980007056754e-06, "loss": 2.3945, "step": 14178 }, { "epoch": 0.7606759656652361, "grad_norm": 0.6171875, "learning_rate": 4.806946532286584e-06, "loss": 2.1727, "step": 14179 }, { "epoch": 0.7607296137339056, "grad_norm": 0.443359375, "learning_rate": 4.806913054730535e-06, "loss": 2.3614, "step": 14180 }, { "epoch": 0.7607832618025752, "grad_norm": 0.50390625, "learning_rate": 4.806879574388651e-06, "loss": 2.3514, "step": 14181 }, { "epoch": 0.7608369098712446, "grad_norm": 0.55859375, "learning_rate": 4.806846091260971e-06, "loss": 2.3091, "step": 14182 }, { "epoch": 0.7608905579399141, "grad_norm": 0.49609375, "learning_rate": 4.8068126053475336e-06, "loss": 2.3046, "step": 14183 }, { "epoch": 0.7609442060085837, "grad_norm": 0.45703125, "learning_rate": 4.806779116648381e-06, "loss": 2.3392, "step": 14184 }, { "epoch": 0.7609978540772532, "grad_norm": 0.49609375, "learning_rate": 4.8067456251635535e-06, "loss": 2.3855, "step": 14185 }, { "epoch": 0.7610515021459228, "grad_norm": 0.447265625, "learning_rate": 4.806712130893091e-06, "loss": 2.1509, "step": 14186 }, { "epoch": 0.7611051502145922, "grad_norm": 0.4609375, "learning_rate": 4.8066786338370345e-06, "loss": 2.5275, "step": 14187 }, { "epoch": 0.7611587982832618, "grad_norm": 0.470703125, "learning_rate": 4.806645133995425e-06, "loss": 2.1422, "step": 14188 }, { "epoch": 0.7612124463519313, "grad_norm": 0.447265625, "learning_rate": 4.806611631368301e-06, "loss": 2.3167, "step": 14189 }, { "epoch": 0.7612660944206009, "grad_norm": 0.4609375, "learning_rate": 4.806578125955704e-06, "loss": 2.1603, "step": 14190 }, { "epoch": 0.7613197424892704, "grad_norm": 0.53515625, "learning_rate": 4.806544617757676e-06, "loss": 2.3344, "step": 14191 }, { "epoch": 0.76137339055794, "grad_norm": 1.1328125, "learning_rate": 4.806511106774256e-06, "loss": 2.2685, "step": 14192 }, { "epoch": 0.7614270386266094, "grad_norm": 0.5625, "learning_rate": 4.806477593005483e-06, "loss": 1.9234, "step": 14193 }, { "epoch": 0.761480686695279, "grad_norm": 0.515625, "learning_rate": 4.8064440764514e-06, "loss": 2.3294, "step": 14194 }, { "epoch": 0.7615343347639485, "grad_norm": 0.462890625, "learning_rate": 4.806410557112046e-06, "loss": 2.161, "step": 14195 }, { "epoch": 0.761587982832618, "grad_norm": 0.453125, "learning_rate": 4.806377034987463e-06, "loss": 2.2835, "step": 14196 }, { "epoch": 0.7616416309012876, "grad_norm": 0.4296875, "learning_rate": 4.806343510077689e-06, "loss": 1.5677, "step": 14197 }, { "epoch": 0.761695278969957, "grad_norm": 0.392578125, "learning_rate": 4.8063099823827675e-06, "loss": 2.3279, "step": 14198 }, { "epoch": 0.7617489270386266, "grad_norm": 0.447265625, "learning_rate": 4.806276451902736e-06, "loss": 2.2565, "step": 14199 }, { "epoch": 0.7618025751072961, "grad_norm": 0.40234375, "learning_rate": 4.806242918637637e-06, "loss": 2.2545, "step": 14200 }, { "epoch": 0.7618562231759657, "grad_norm": 0.443359375, "learning_rate": 4.806209382587511e-06, "loss": 2.2537, "step": 14201 }, { "epoch": 0.7619098712446352, "grad_norm": 0.5078125, "learning_rate": 4.806175843752398e-06, "loss": 2.4707, "step": 14202 }, { "epoch": 0.7619635193133047, "grad_norm": 0.341796875, "learning_rate": 4.806142302132337e-06, "loss": 2.0556, "step": 14203 }, { "epoch": 0.7620171673819742, "grad_norm": 0.44140625, "learning_rate": 4.806108757727371e-06, "loss": 2.2061, "step": 14204 }, { "epoch": 0.7620708154506438, "grad_norm": 2.375, "learning_rate": 4.806075210537538e-06, "loss": 2.2436, "step": 14205 }, { "epoch": 0.7621244635193133, "grad_norm": 0.64453125, "learning_rate": 4.8060416605628815e-06, "loss": 2.0606, "step": 14206 }, { "epoch": 0.7621781115879829, "grad_norm": 0.49609375, "learning_rate": 4.8060081078034405e-06, "loss": 2.2077, "step": 14207 }, { "epoch": 0.7622317596566524, "grad_norm": 0.482421875, "learning_rate": 4.805974552259255e-06, "loss": 2.1605, "step": 14208 }, { "epoch": 0.7622854077253219, "grad_norm": 1.53125, "learning_rate": 4.805940993930366e-06, "loss": 2.3272, "step": 14209 }, { "epoch": 0.7623390557939914, "grad_norm": 0.396484375, "learning_rate": 4.805907432816814e-06, "loss": 2.0947, "step": 14210 }, { "epoch": 0.7623927038626609, "grad_norm": 0.453125, "learning_rate": 4.805873868918639e-06, "loss": 2.4697, "step": 14211 }, { "epoch": 0.7624463519313305, "grad_norm": 0.46875, "learning_rate": 4.805840302235882e-06, "loss": 2.2009, "step": 14212 }, { "epoch": 0.7625, "grad_norm": 0.419921875, "learning_rate": 4.805806732768585e-06, "loss": 2.2866, "step": 14213 }, { "epoch": 0.7625536480686695, "grad_norm": 0.4765625, "learning_rate": 4.8057731605167865e-06, "loss": 2.4168, "step": 14214 }, { "epoch": 0.762607296137339, "grad_norm": 0.494140625, "learning_rate": 4.805739585480528e-06, "loss": 2.2057, "step": 14215 }, { "epoch": 0.7626609442060086, "grad_norm": 0.4375, "learning_rate": 4.805706007659849e-06, "loss": 2.2335, "step": 14216 }, { "epoch": 0.7627145922746781, "grad_norm": 0.625, "learning_rate": 4.805672427054791e-06, "loss": 2.3705, "step": 14217 }, { "epoch": 0.7627682403433477, "grad_norm": 0.486328125, "learning_rate": 4.805638843665394e-06, "loss": 2.3559, "step": 14218 }, { "epoch": 0.7628218884120171, "grad_norm": 0.470703125, "learning_rate": 4.805605257491699e-06, "loss": 2.1833, "step": 14219 }, { "epoch": 0.7628755364806867, "grad_norm": 0.341796875, "learning_rate": 4.805571668533747e-06, "loss": 2.0416, "step": 14220 }, { "epoch": 0.7629291845493562, "grad_norm": 0.80859375, "learning_rate": 4.805538076791578e-06, "loss": 1.2474, "step": 14221 }, { "epoch": 0.7629828326180258, "grad_norm": 0.435546875, "learning_rate": 4.805504482265232e-06, "loss": 2.3856, "step": 14222 }, { "epoch": 0.7630364806866953, "grad_norm": 0.44140625, "learning_rate": 4.805470884954751e-06, "loss": 2.1544, "step": 14223 }, { "epoch": 0.7630901287553649, "grad_norm": 0.51171875, "learning_rate": 4.805437284860174e-06, "loss": 2.1709, "step": 14224 }, { "epoch": 0.7631437768240343, "grad_norm": 0.453125, "learning_rate": 4.805403681981542e-06, "loss": 2.2973, "step": 14225 }, { "epoch": 0.7631974248927038, "grad_norm": 0.54296875, "learning_rate": 4.805370076318897e-06, "loss": 2.1101, "step": 14226 }, { "epoch": 0.7632510729613734, "grad_norm": 0.66796875, "learning_rate": 4.805336467872278e-06, "loss": 2.3108, "step": 14227 }, { "epoch": 0.7633047210300429, "grad_norm": 0.51171875, "learning_rate": 4.8053028566417255e-06, "loss": 1.9681, "step": 14228 }, { "epoch": 0.7633583690987125, "grad_norm": 0.50390625, "learning_rate": 4.805269242627281e-06, "loss": 2.186, "step": 14229 }, { "epoch": 0.7634120171673819, "grad_norm": 0.52734375, "learning_rate": 4.8052356258289845e-06, "loss": 2.2812, "step": 14230 }, { "epoch": 0.7634656652360515, "grad_norm": 0.369140625, "learning_rate": 4.805202006246877e-06, "loss": 1.8298, "step": 14231 }, { "epoch": 0.763519313304721, "grad_norm": 0.5, "learning_rate": 4.805168383880999e-06, "loss": 2.3852, "step": 14232 }, { "epoch": 0.7635729613733906, "grad_norm": 0.48828125, "learning_rate": 4.805134758731391e-06, "loss": 2.4576, "step": 14233 }, { "epoch": 0.7636266094420601, "grad_norm": 0.484375, "learning_rate": 4.8051011307980935e-06, "loss": 2.3499, "step": 14234 }, { "epoch": 0.7636802575107297, "grad_norm": 0.40625, "learning_rate": 4.805067500081148e-06, "loss": 2.239, "step": 14235 }, { "epoch": 0.7637339055793991, "grad_norm": 0.41796875, "learning_rate": 4.805033866580593e-06, "loss": 2.4198, "step": 14236 }, { "epoch": 0.7637875536480687, "grad_norm": 0.69140625, "learning_rate": 4.805000230296472e-06, "loss": 2.3895, "step": 14237 }, { "epoch": 0.7638412017167382, "grad_norm": 0.421875, "learning_rate": 4.804966591228823e-06, "loss": 2.2327, "step": 14238 }, { "epoch": 0.7638948497854077, "grad_norm": 0.482421875, "learning_rate": 4.804932949377687e-06, "loss": 2.5071, "step": 14239 }, { "epoch": 0.7639484978540773, "grad_norm": 0.4296875, "learning_rate": 4.804899304743107e-06, "loss": 2.1906, "step": 14240 }, { "epoch": 0.7640021459227467, "grad_norm": 0.447265625, "learning_rate": 4.804865657325121e-06, "loss": 2.4597, "step": 14241 }, { "epoch": 0.7640557939914163, "grad_norm": 0.78515625, "learning_rate": 4.804832007123771e-06, "loss": 2.2193, "step": 14242 }, { "epoch": 0.7641094420600858, "grad_norm": 0.451171875, "learning_rate": 4.804798354139097e-06, "loss": 2.5786, "step": 14243 }, { "epoch": 0.7641630901287554, "grad_norm": 0.40234375, "learning_rate": 4.804764698371139e-06, "loss": 2.1371, "step": 14244 }, { "epoch": 0.7642167381974249, "grad_norm": 0.35546875, "learning_rate": 4.80473103981994e-06, "loss": 2.159, "step": 14245 }, { "epoch": 0.7642703862660944, "grad_norm": 0.4453125, "learning_rate": 4.804697378485539e-06, "loss": 2.0532, "step": 14246 }, { "epoch": 0.7643240343347639, "grad_norm": 0.4609375, "learning_rate": 4.804663714367977e-06, "loss": 2.4508, "step": 14247 }, { "epoch": 0.7643776824034335, "grad_norm": 0.578125, "learning_rate": 4.804630047467293e-06, "loss": 2.4173, "step": 14248 }, { "epoch": 0.764431330472103, "grad_norm": 0.455078125, "learning_rate": 4.8045963777835305e-06, "loss": 2.4156, "step": 14249 }, { "epoch": 0.7644849785407726, "grad_norm": 0.408203125, "learning_rate": 4.804562705316728e-06, "loss": 2.1518, "step": 14250 }, { "epoch": 0.764538626609442, "grad_norm": 0.470703125, "learning_rate": 4.804529030066927e-06, "loss": 2.4284, "step": 14251 }, { "epoch": 0.7645922746781116, "grad_norm": 0.8203125, "learning_rate": 4.804495352034169e-06, "loss": 2.1213, "step": 14252 }, { "epoch": 0.7646459227467811, "grad_norm": 0.484375, "learning_rate": 4.804461671218492e-06, "loss": 2.2906, "step": 14253 }, { "epoch": 0.7646995708154506, "grad_norm": 0.53125, "learning_rate": 4.804427987619939e-06, "loss": 2.3549, "step": 14254 }, { "epoch": 0.7647532188841202, "grad_norm": 0.431640625, "learning_rate": 4.804394301238552e-06, "loss": 2.2785, "step": 14255 }, { "epoch": 0.7648068669527897, "grad_norm": 0.5078125, "learning_rate": 4.804360612074368e-06, "loss": 2.5578, "step": 14256 }, { "epoch": 0.7648605150214592, "grad_norm": 0.408203125, "learning_rate": 4.80432692012743e-06, "loss": 2.2486, "step": 14257 }, { "epoch": 0.7649141630901287, "grad_norm": 0.859375, "learning_rate": 4.804293225397778e-06, "loss": 1.3267, "step": 14258 }, { "epoch": 0.7649678111587983, "grad_norm": 0.380859375, "learning_rate": 4.804259527885453e-06, "loss": 2.2171, "step": 14259 }, { "epoch": 0.7650214592274678, "grad_norm": 0.46875, "learning_rate": 4.804225827590496e-06, "loss": 2.3089, "step": 14260 }, { "epoch": 0.7650751072961374, "grad_norm": 0.404296875, "learning_rate": 4.8041921245129465e-06, "loss": 2.3807, "step": 14261 }, { "epoch": 0.7651287553648068, "grad_norm": 1.625, "learning_rate": 4.804158418652846e-06, "loss": 2.164, "step": 14262 }, { "epoch": 0.7651824034334764, "grad_norm": 0.447265625, "learning_rate": 4.804124710010236e-06, "loss": 2.322, "step": 14263 }, { "epoch": 0.7652360515021459, "grad_norm": 0.54296875, "learning_rate": 4.804090998585156e-06, "loss": 2.414, "step": 14264 }, { "epoch": 0.7652896995708155, "grad_norm": 0.474609375, "learning_rate": 4.804057284377647e-06, "loss": 2.3174, "step": 14265 }, { "epoch": 0.765343347639485, "grad_norm": 0.447265625, "learning_rate": 4.8040235673877485e-06, "loss": 2.4134, "step": 14266 }, { "epoch": 0.7653969957081546, "grad_norm": 0.431640625, "learning_rate": 4.803989847615504e-06, "loss": 2.3167, "step": 14267 }, { "epoch": 0.765450643776824, "grad_norm": 0.400390625, "learning_rate": 4.8039561250609526e-06, "loss": 2.325, "step": 14268 }, { "epoch": 0.7655042918454935, "grad_norm": 0.41796875, "learning_rate": 4.803922399724135e-06, "loss": 2.3957, "step": 14269 }, { "epoch": 0.7655579399141631, "grad_norm": 0.486328125, "learning_rate": 4.803888671605092e-06, "loss": 2.2592, "step": 14270 }, { "epoch": 0.7656115879828326, "grad_norm": 0.435546875, "learning_rate": 4.803854940703864e-06, "loss": 2.252, "step": 14271 }, { "epoch": 0.7656652360515022, "grad_norm": 0.67578125, "learning_rate": 4.803821207020492e-06, "loss": 2.3094, "step": 14272 }, { "epoch": 0.7657188841201716, "grad_norm": 0.498046875, "learning_rate": 4.803787470555018e-06, "loss": 2.1258, "step": 14273 }, { "epoch": 0.7657725321888412, "grad_norm": 4.8125, "learning_rate": 4.803753731307481e-06, "loss": 2.4088, "step": 14274 }, { "epoch": 0.7658261802575107, "grad_norm": 0.64453125, "learning_rate": 4.803719989277923e-06, "loss": 2.2973, "step": 14275 }, { "epoch": 0.7658798283261803, "grad_norm": 0.4609375, "learning_rate": 4.803686244466383e-06, "loss": 2.3419, "step": 14276 }, { "epoch": 0.7659334763948498, "grad_norm": 0.431640625, "learning_rate": 4.803652496872904e-06, "loss": 2.3473, "step": 14277 }, { "epoch": 0.7659871244635194, "grad_norm": 0.423828125, "learning_rate": 4.803618746497525e-06, "loss": 2.1666, "step": 14278 }, { "epoch": 0.7660407725321888, "grad_norm": 0.47265625, "learning_rate": 4.803584993340286e-06, "loss": 2.4023, "step": 14279 }, { "epoch": 0.7660944206008584, "grad_norm": 0.412109375, "learning_rate": 4.803551237401232e-06, "loss": 2.1002, "step": 14280 }, { "epoch": 0.7661480686695279, "grad_norm": 0.5078125, "learning_rate": 4.803517478680399e-06, "loss": 2.3021, "step": 14281 }, { "epoch": 0.7662017167381975, "grad_norm": 0.45703125, "learning_rate": 4.80348371717783e-06, "loss": 2.2887, "step": 14282 }, { "epoch": 0.766255364806867, "grad_norm": 0.453125, "learning_rate": 4.803449952893565e-06, "loss": 2.3776, "step": 14283 }, { "epoch": 0.7663090128755364, "grad_norm": 0.498046875, "learning_rate": 4.803416185827646e-06, "loss": 2.4115, "step": 14284 }, { "epoch": 0.766362660944206, "grad_norm": 0.421875, "learning_rate": 4.803382415980113e-06, "loss": 2.2478, "step": 14285 }, { "epoch": 0.7664163090128755, "grad_norm": 0.447265625, "learning_rate": 4.803348643351006e-06, "loss": 2.3001, "step": 14286 }, { "epoch": 0.7664699570815451, "grad_norm": 0.94921875, "learning_rate": 4.8033148679403676e-06, "loss": 2.252, "step": 14287 }, { "epoch": 0.7665236051502146, "grad_norm": 1.125, "learning_rate": 4.8032810897482364e-06, "loss": 2.2536, "step": 14288 }, { "epoch": 0.7665772532188841, "grad_norm": 0.7890625, "learning_rate": 4.803247308774654e-06, "loss": 2.3619, "step": 14289 }, { "epoch": 0.7666309012875536, "grad_norm": 0.439453125, "learning_rate": 4.803213525019663e-06, "loss": 2.3687, "step": 14290 }, { "epoch": 0.7666845493562232, "grad_norm": 0.443359375, "learning_rate": 4.803179738483303e-06, "loss": 2.2048, "step": 14291 }, { "epoch": 0.7667381974248927, "grad_norm": 0.462890625, "learning_rate": 4.803145949165613e-06, "loss": 2.4001, "step": 14292 }, { "epoch": 0.7667918454935623, "grad_norm": 0.45703125, "learning_rate": 4.803112157066636e-06, "loss": 2.2274, "step": 14293 }, { "epoch": 0.7668454935622318, "grad_norm": 0.369140625, "learning_rate": 4.8030783621864115e-06, "loss": 2.2408, "step": 14294 }, { "epoch": 0.7668991416309013, "grad_norm": 0.671875, "learning_rate": 4.803044564524982e-06, "loss": 2.3735, "step": 14295 }, { "epoch": 0.7669527896995708, "grad_norm": 1.5234375, "learning_rate": 4.803010764082387e-06, "loss": 2.4847, "step": 14296 }, { "epoch": 0.7670064377682403, "grad_norm": 0.85546875, "learning_rate": 4.802976960858667e-06, "loss": 2.6512, "step": 14297 }, { "epoch": 0.7670600858369099, "grad_norm": 0.41015625, "learning_rate": 4.802943154853863e-06, "loss": 2.5271, "step": 14298 }, { "epoch": 0.7671137339055794, "grad_norm": 0.474609375, "learning_rate": 4.802909346068018e-06, "loss": 2.4473, "step": 14299 }, { "epoch": 0.7671673819742489, "grad_norm": 0.431640625, "learning_rate": 4.802875534501169e-06, "loss": 2.1782, "step": 14300 }, { "epoch": 0.7672210300429184, "grad_norm": 0.484375, "learning_rate": 4.80284172015336e-06, "loss": 2.2806, "step": 14301 }, { "epoch": 0.767274678111588, "grad_norm": 0.384765625, "learning_rate": 4.802807903024631e-06, "loss": 1.9771, "step": 14302 }, { "epoch": 0.7673283261802575, "grad_norm": 0.400390625, "learning_rate": 4.802774083115021e-06, "loss": 2.4329, "step": 14303 }, { "epoch": 0.7673819742489271, "grad_norm": 0.431640625, "learning_rate": 4.802740260424574e-06, "loss": 2.122, "step": 14304 }, { "epoch": 0.7674356223175965, "grad_norm": 0.5, "learning_rate": 4.802706434953329e-06, "loss": 2.3874, "step": 14305 }, { "epoch": 0.7674892703862661, "grad_norm": 0.435546875, "learning_rate": 4.802672606701326e-06, "loss": 2.3581, "step": 14306 }, { "epoch": 0.7675429184549356, "grad_norm": 0.4296875, "learning_rate": 4.802638775668608e-06, "loss": 2.1222, "step": 14307 }, { "epoch": 0.7675965665236052, "grad_norm": 0.392578125, "learning_rate": 4.802604941855215e-06, "loss": 2.1858, "step": 14308 }, { "epoch": 0.7676502145922747, "grad_norm": 0.5, "learning_rate": 4.802571105261187e-06, "loss": 2.5422, "step": 14309 }, { "epoch": 0.7677038626609443, "grad_norm": 0.400390625, "learning_rate": 4.802537265886566e-06, "loss": 2.1802, "step": 14310 }, { "epoch": 0.7677575107296137, "grad_norm": 0.87109375, "learning_rate": 4.802503423731391e-06, "loss": 2.5094, "step": 14311 }, { "epoch": 0.7678111587982832, "grad_norm": 0.546875, "learning_rate": 4.802469578795705e-06, "loss": 2.6146, "step": 14312 }, { "epoch": 0.7678648068669528, "grad_norm": 0.4921875, "learning_rate": 4.802435731079549e-06, "loss": 2.5328, "step": 14313 }, { "epoch": 0.7679184549356223, "grad_norm": 1.390625, "learning_rate": 4.802401880582963e-06, "loss": 2.0177, "step": 14314 }, { "epoch": 0.7679721030042919, "grad_norm": 0.48046875, "learning_rate": 4.8023680273059865e-06, "loss": 2.3305, "step": 14315 }, { "epoch": 0.7680257510729613, "grad_norm": 0.45703125, "learning_rate": 4.802334171248663e-06, "loss": 2.2612, "step": 14316 }, { "epoch": 0.7680793991416309, "grad_norm": 0.5078125, "learning_rate": 4.802300312411031e-06, "loss": 2.246, "step": 14317 }, { "epoch": 0.7681330472103004, "grad_norm": 0.51171875, "learning_rate": 4.8022664507931335e-06, "loss": 2.3164, "step": 14318 }, { "epoch": 0.76818669527897, "grad_norm": 0.388671875, "learning_rate": 4.802232586395009e-06, "loss": 1.8741, "step": 14319 }, { "epoch": 0.7682403433476395, "grad_norm": 0.81640625, "learning_rate": 4.802198719216701e-06, "loss": 2.199, "step": 14320 }, { "epoch": 0.768293991416309, "grad_norm": 1.53125, "learning_rate": 4.8021648492582496e-06, "loss": 2.2701, "step": 14321 }, { "epoch": 0.7683476394849785, "grad_norm": 0.466796875, "learning_rate": 4.802130976519694e-06, "loss": 2.5478, "step": 14322 }, { "epoch": 0.7684012875536481, "grad_norm": 0.447265625, "learning_rate": 4.802097101001077e-06, "loss": 2.2978, "step": 14323 }, { "epoch": 0.7684549356223176, "grad_norm": 0.51171875, "learning_rate": 4.802063222702439e-06, "loss": 2.3123, "step": 14324 }, { "epoch": 0.7685085836909872, "grad_norm": 0.451171875, "learning_rate": 4.8020293416238215e-06, "loss": 2.2542, "step": 14325 }, { "epoch": 0.7685622317596567, "grad_norm": 0.48046875, "learning_rate": 4.801995457765264e-06, "loss": 2.5164, "step": 14326 }, { "epoch": 0.7686158798283261, "grad_norm": 0.4453125, "learning_rate": 4.801961571126808e-06, "loss": 2.3325, "step": 14327 }, { "epoch": 0.7686695278969957, "grad_norm": 0.3671875, "learning_rate": 4.8019276817084944e-06, "loss": 2.1418, "step": 14328 }, { "epoch": 0.7687231759656652, "grad_norm": 0.38671875, "learning_rate": 4.801893789510365e-06, "loss": 2.0988, "step": 14329 }, { "epoch": 0.7687768240343348, "grad_norm": 0.4609375, "learning_rate": 4.80185989453246e-06, "loss": 2.289, "step": 14330 }, { "epoch": 0.7688304721030043, "grad_norm": 0.466796875, "learning_rate": 4.80182599677482e-06, "loss": 2.4609, "step": 14331 }, { "epoch": 0.7688841201716738, "grad_norm": 0.5078125, "learning_rate": 4.8017920962374865e-06, "loss": 2.461, "step": 14332 }, { "epoch": 0.7689377682403433, "grad_norm": 0.53125, "learning_rate": 4.801758192920501e-06, "loss": 2.1901, "step": 14333 }, { "epoch": 0.7689914163090129, "grad_norm": 0.46875, "learning_rate": 4.8017242868239025e-06, "loss": 2.4482, "step": 14334 }, { "epoch": 0.7690450643776824, "grad_norm": 3.734375, "learning_rate": 4.801690377947733e-06, "loss": 2.0743, "step": 14335 }, { "epoch": 0.769098712446352, "grad_norm": 0.439453125, "learning_rate": 4.801656466292034e-06, "loss": 2.2432, "step": 14336 }, { "epoch": 0.7691523605150214, "grad_norm": 0.494140625, "learning_rate": 4.801622551856846e-06, "loss": 2.3055, "step": 14337 }, { "epoch": 0.769206008583691, "grad_norm": 0.55078125, "learning_rate": 4.80158863464221e-06, "loss": 2.1196, "step": 14338 }, { "epoch": 0.7692596566523605, "grad_norm": 0.455078125, "learning_rate": 4.801554714648166e-06, "loss": 2.4579, "step": 14339 }, { "epoch": 0.76931330472103, "grad_norm": 0.578125, "learning_rate": 4.8015207918747574e-06, "loss": 2.1373, "step": 14340 }, { "epoch": 0.7693669527896996, "grad_norm": 0.421875, "learning_rate": 4.801486866322023e-06, "loss": 2.1831, "step": 14341 }, { "epoch": 0.769420600858369, "grad_norm": 0.44140625, "learning_rate": 4.801452937990006e-06, "loss": 2.4552, "step": 14342 }, { "epoch": 0.7694742489270386, "grad_norm": 0.37890625, "learning_rate": 4.801419006878743e-06, "loss": 2.3598, "step": 14343 }, { "epoch": 0.7695278969957081, "grad_norm": 1.8515625, "learning_rate": 4.8013850729882794e-06, "loss": 2.3231, "step": 14344 }, { "epoch": 0.7695815450643777, "grad_norm": 0.455078125, "learning_rate": 4.801351136318655e-06, "loss": 2.3393, "step": 14345 }, { "epoch": 0.7696351931330472, "grad_norm": 0.48046875, "learning_rate": 4.801317196869909e-06, "loss": 2.2552, "step": 14346 }, { "epoch": 0.7696888412017168, "grad_norm": 0.427734375, "learning_rate": 4.8012832546420846e-06, "loss": 2.3234, "step": 14347 }, { "epoch": 0.7697424892703862, "grad_norm": 0.5859375, "learning_rate": 4.801249309635222e-06, "loss": 2.3598, "step": 14348 }, { "epoch": 0.7697961373390558, "grad_norm": 0.423828125, "learning_rate": 4.8012153618493614e-06, "loss": 2.2253, "step": 14349 }, { "epoch": 0.7698497854077253, "grad_norm": 0.54296875, "learning_rate": 4.801181411284545e-06, "loss": 2.2368, "step": 14350 }, { "epoch": 0.7699034334763949, "grad_norm": 0.423828125, "learning_rate": 4.801147457940813e-06, "loss": 1.9123, "step": 14351 }, { "epoch": 0.7699570815450644, "grad_norm": 0.39453125, "learning_rate": 4.801113501818208e-06, "loss": 1.9941, "step": 14352 }, { "epoch": 0.770010729613734, "grad_norm": 1.15625, "learning_rate": 4.801079542916768e-06, "loss": 2.7421, "step": 14353 }, { "epoch": 0.7700643776824034, "grad_norm": 0.494140625, "learning_rate": 4.801045581236536e-06, "loss": 2.2223, "step": 14354 }, { "epoch": 0.7701180257510729, "grad_norm": 0.53515625, "learning_rate": 4.8010116167775534e-06, "loss": 2.2624, "step": 14355 }, { "epoch": 0.7701716738197425, "grad_norm": 0.44921875, "learning_rate": 4.80097764953986e-06, "loss": 2.35, "step": 14356 }, { "epoch": 0.770225321888412, "grad_norm": 0.486328125, "learning_rate": 4.8009436795234975e-06, "loss": 2.3174, "step": 14357 }, { "epoch": 0.7702789699570816, "grad_norm": 0.421875, "learning_rate": 4.800909706728507e-06, "loss": 2.2941, "step": 14358 }, { "epoch": 0.770332618025751, "grad_norm": 0.515625, "learning_rate": 4.800875731154929e-06, "loss": 2.3882, "step": 14359 }, { "epoch": 0.7703862660944206, "grad_norm": 0.462890625, "learning_rate": 4.800841752802805e-06, "loss": 2.3466, "step": 14360 }, { "epoch": 0.7704399141630901, "grad_norm": 0.453125, "learning_rate": 4.800807771672177e-06, "loss": 2.3066, "step": 14361 }, { "epoch": 0.7704935622317597, "grad_norm": 0.498046875, "learning_rate": 4.800773787763083e-06, "loss": 2.3063, "step": 14362 }, { "epoch": 0.7705472103004292, "grad_norm": 0.42578125, "learning_rate": 4.800739801075567e-06, "loss": 2.0621, "step": 14363 }, { "epoch": 0.7706008583690988, "grad_norm": 0.45703125, "learning_rate": 4.8007058116096685e-06, "loss": 2.1874, "step": 14364 }, { "epoch": 0.7706545064377682, "grad_norm": 0.53515625, "learning_rate": 4.80067181936543e-06, "loss": 2.4454, "step": 14365 }, { "epoch": 0.7707081545064378, "grad_norm": 0.43359375, "learning_rate": 4.800637824342891e-06, "loss": 2.2513, "step": 14366 }, { "epoch": 0.7707618025751073, "grad_norm": 0.435546875, "learning_rate": 4.800603826542093e-06, "loss": 2.325, "step": 14367 }, { "epoch": 0.7708154506437769, "grad_norm": 0.41796875, "learning_rate": 4.800569825963078e-06, "loss": 2.3178, "step": 14368 }, { "epoch": 0.7708690987124464, "grad_norm": 0.53125, "learning_rate": 4.800535822605885e-06, "loss": 2.1727, "step": 14369 }, { "epoch": 0.7709227467811158, "grad_norm": 0.466796875, "learning_rate": 4.800501816470556e-06, "loss": 2.2397, "step": 14370 }, { "epoch": 0.7709763948497854, "grad_norm": 0.408203125, "learning_rate": 4.800467807557134e-06, "loss": 2.1482, "step": 14371 }, { "epoch": 0.7710300429184549, "grad_norm": 0.53515625, "learning_rate": 4.800433795865657e-06, "loss": 1.4842, "step": 14372 }, { "epoch": 0.7710836909871245, "grad_norm": 0.73828125, "learning_rate": 4.800399781396168e-06, "loss": 2.1175, "step": 14373 }, { "epoch": 0.771137339055794, "grad_norm": 0.5234375, "learning_rate": 4.800365764148708e-06, "loss": 2.3467, "step": 14374 }, { "epoch": 0.7711909871244635, "grad_norm": 0.427734375, "learning_rate": 4.800331744123317e-06, "loss": 2.0718, "step": 14375 }, { "epoch": 0.771244635193133, "grad_norm": 0.462890625, "learning_rate": 4.800297721320038e-06, "loss": 2.3336, "step": 14376 }, { "epoch": 0.7712982832618026, "grad_norm": 0.51953125, "learning_rate": 4.800263695738911e-06, "loss": 2.2706, "step": 14377 }, { "epoch": 0.7713519313304721, "grad_norm": 0.4453125, "learning_rate": 4.800229667379975e-06, "loss": 2.2334, "step": 14378 }, { "epoch": 0.7714055793991417, "grad_norm": 0.455078125, "learning_rate": 4.800195636243275e-06, "loss": 2.3234, "step": 14379 }, { "epoch": 0.7714592274678111, "grad_norm": 0.435546875, "learning_rate": 4.800161602328849e-06, "loss": 2.1731, "step": 14380 }, { "epoch": 0.7715128755364807, "grad_norm": 0.484375, "learning_rate": 4.800127565636739e-06, "loss": 2.2278, "step": 14381 }, { "epoch": 0.7715665236051502, "grad_norm": 0.43359375, "learning_rate": 4.800093526166987e-06, "loss": 2.1003, "step": 14382 }, { "epoch": 0.7716201716738197, "grad_norm": 0.4453125, "learning_rate": 4.800059483919633e-06, "loss": 2.2566, "step": 14383 }, { "epoch": 0.7716738197424893, "grad_norm": 0.4765625, "learning_rate": 4.800025438894718e-06, "loss": 2.4948, "step": 14384 }, { "epoch": 0.7717274678111588, "grad_norm": 0.38671875, "learning_rate": 4.7999913910922855e-06, "loss": 1.855, "step": 14385 }, { "epoch": 0.7717811158798283, "grad_norm": 0.412109375, "learning_rate": 4.799957340512373e-06, "loss": 2.0059, "step": 14386 }, { "epoch": 0.7718347639484978, "grad_norm": 0.55078125, "learning_rate": 4.799923287155025e-06, "loss": 2.1216, "step": 14387 }, { "epoch": 0.7718884120171674, "grad_norm": 0.42578125, "learning_rate": 4.79988923102028e-06, "loss": 2.3228, "step": 14388 }, { "epoch": 0.7719420600858369, "grad_norm": 0.53125, "learning_rate": 4.79985517210818e-06, "loss": 2.3849, "step": 14389 }, { "epoch": 0.7719957081545065, "grad_norm": 0.7578125, "learning_rate": 4.799821110418766e-06, "loss": 2.2007, "step": 14390 }, { "epoch": 0.7720493562231759, "grad_norm": 0.466796875, "learning_rate": 4.79978704595208e-06, "loss": 2.1803, "step": 14391 }, { "epoch": 0.7721030042918455, "grad_norm": 0.69140625, "learning_rate": 4.799752978708163e-06, "loss": 2.0313, "step": 14392 }, { "epoch": 0.772156652360515, "grad_norm": 0.48828125, "learning_rate": 4.799718908687055e-06, "loss": 2.3949, "step": 14393 }, { "epoch": 0.7722103004291846, "grad_norm": 0.494140625, "learning_rate": 4.7996848358887986e-06, "loss": 2.4321, "step": 14394 }, { "epoch": 0.7722639484978541, "grad_norm": 0.37890625, "learning_rate": 4.799650760313433e-06, "loss": 2.0064, "step": 14395 }, { "epoch": 0.7723175965665237, "grad_norm": 0.50390625, "learning_rate": 4.799616681961002e-06, "loss": 2.1283, "step": 14396 }, { "epoch": 0.7723712446351931, "grad_norm": 0.47265625, "learning_rate": 4.799582600831543e-06, "loss": 2.4187, "step": 14397 }, { "epoch": 0.7724248927038626, "grad_norm": 0.494140625, "learning_rate": 4.799548516925101e-06, "loss": 2.2358, "step": 14398 }, { "epoch": 0.7724785407725322, "grad_norm": 0.51171875, "learning_rate": 4.799514430241716e-06, "loss": 2.4053, "step": 14399 }, { "epoch": 0.7725321888412017, "grad_norm": 0.421875, "learning_rate": 4.7994803407814275e-06, "loss": 2.216, "step": 14400 }, { "epoch": 0.7725858369098713, "grad_norm": 0.48046875, "learning_rate": 4.799446248544279e-06, "loss": 2.3875, "step": 14401 }, { "epoch": 0.7726394849785407, "grad_norm": 0.50390625, "learning_rate": 4.79941215353031e-06, "loss": 2.2788, "step": 14402 }, { "epoch": 0.7726931330472103, "grad_norm": 0.470703125, "learning_rate": 4.799378055739562e-06, "loss": 2.2702, "step": 14403 }, { "epoch": 0.7727467811158798, "grad_norm": 0.384765625, "learning_rate": 4.7993439551720775e-06, "loss": 2.1796, "step": 14404 }, { "epoch": 0.7728004291845494, "grad_norm": 0.53125, "learning_rate": 4.799309851827896e-06, "loss": 2.2254, "step": 14405 }, { "epoch": 0.7728540772532189, "grad_norm": 0.5546875, "learning_rate": 4.7992757457070594e-06, "loss": 2.2437, "step": 14406 }, { "epoch": 0.7729077253218885, "grad_norm": 0.462890625, "learning_rate": 4.799241636809608e-06, "loss": 2.011, "step": 14407 }, { "epoch": 0.7729613733905579, "grad_norm": 0.419921875, "learning_rate": 4.799207525135585e-06, "loss": 2.4255, "step": 14408 }, { "epoch": 0.7730150214592275, "grad_norm": 0.50390625, "learning_rate": 4.79917341068503e-06, "loss": 2.2777, "step": 14409 }, { "epoch": 0.773068669527897, "grad_norm": 0.48046875, "learning_rate": 4.7991392934579836e-06, "loss": 2.3156, "step": 14410 }, { "epoch": 0.7731223175965666, "grad_norm": 0.453125, "learning_rate": 4.7991051734544895e-06, "loss": 2.4287, "step": 14411 }, { "epoch": 0.773175965665236, "grad_norm": 0.46484375, "learning_rate": 4.7990710506745865e-06, "loss": 2.1522, "step": 14412 }, { "epoch": 0.7732296137339055, "grad_norm": 0.443359375, "learning_rate": 4.799036925118316e-06, "loss": 2.2687, "step": 14413 }, { "epoch": 0.7732832618025751, "grad_norm": 0.46484375, "learning_rate": 4.799002796785722e-06, "loss": 2.2788, "step": 14414 }, { "epoch": 0.7733369098712446, "grad_norm": 0.4296875, "learning_rate": 4.798968665676842e-06, "loss": 2.5286, "step": 14415 }, { "epoch": 0.7733905579399142, "grad_norm": 0.404296875, "learning_rate": 4.798934531791719e-06, "loss": 2.0331, "step": 14416 }, { "epoch": 0.7734442060085837, "grad_norm": 0.40234375, "learning_rate": 4.7989003951303945e-06, "loss": 1.9995, "step": 14417 }, { "epoch": 0.7734978540772532, "grad_norm": 0.51171875, "learning_rate": 4.7988662556929085e-06, "loss": 2.4235, "step": 14418 }, { "epoch": 0.7735515021459227, "grad_norm": 0.4921875, "learning_rate": 4.798832113479303e-06, "loss": 2.0641, "step": 14419 }, { "epoch": 0.7736051502145923, "grad_norm": 0.4921875, "learning_rate": 4.7987979684896206e-06, "loss": 1.7188, "step": 14420 }, { "epoch": 0.7736587982832618, "grad_norm": 0.5, "learning_rate": 4.798763820723901e-06, "loss": 2.0971, "step": 14421 }, { "epoch": 0.7737124463519314, "grad_norm": 0.39453125, "learning_rate": 4.798729670182184e-06, "loss": 2.3367, "step": 14422 }, { "epoch": 0.7737660944206008, "grad_norm": 0.82421875, "learning_rate": 4.798695516864513e-06, "loss": 2.4034, "step": 14423 }, { "epoch": 0.7738197424892704, "grad_norm": 0.43359375, "learning_rate": 4.7986613607709295e-06, "loss": 2.1593, "step": 14424 }, { "epoch": 0.7738733905579399, "grad_norm": 0.51171875, "learning_rate": 4.7986272019014734e-06, "loss": 2.3091, "step": 14425 }, { "epoch": 0.7739270386266094, "grad_norm": 0.515625, "learning_rate": 4.798593040256186e-06, "loss": 2.551, "step": 14426 }, { "epoch": 0.773980686695279, "grad_norm": 0.49609375, "learning_rate": 4.79855887583511e-06, "loss": 1.8593, "step": 14427 }, { "epoch": 0.7740343347639485, "grad_norm": 0.55078125, "learning_rate": 4.798524708638286e-06, "loss": 2.1409, "step": 14428 }, { "epoch": 0.774087982832618, "grad_norm": 0.384765625, "learning_rate": 4.7984905386657536e-06, "loss": 2.1642, "step": 14429 }, { "epoch": 0.7741416309012875, "grad_norm": 0.46484375, "learning_rate": 4.798456365917556e-06, "loss": 2.3757, "step": 14430 }, { "epoch": 0.7741952789699571, "grad_norm": 0.447265625, "learning_rate": 4.798422190393734e-06, "loss": 2.0164, "step": 14431 }, { "epoch": 0.7742489270386266, "grad_norm": 0.45703125, "learning_rate": 4.798388012094328e-06, "loss": 2.1557, "step": 14432 }, { "epoch": 0.7743025751072962, "grad_norm": 0.51171875, "learning_rate": 4.798353831019381e-06, "loss": 2.319, "step": 14433 }, { "epoch": 0.7743562231759656, "grad_norm": 0.453125, "learning_rate": 4.798319647168933e-06, "loss": 2.1648, "step": 14434 }, { "epoch": 0.7744098712446352, "grad_norm": 0.515625, "learning_rate": 4.798285460543025e-06, "loss": 2.3985, "step": 14435 }, { "epoch": 0.7744635193133047, "grad_norm": 0.48046875, "learning_rate": 4.7982512711416995e-06, "loss": 2.3613, "step": 14436 }, { "epoch": 0.7745171673819743, "grad_norm": 0.451171875, "learning_rate": 4.798217078964997e-06, "loss": 2.4701, "step": 14437 }, { "epoch": 0.7745708154506438, "grad_norm": 0.5625, "learning_rate": 4.79818288401296e-06, "loss": 2.2305, "step": 14438 }, { "epoch": 0.7746244635193134, "grad_norm": 0.5625, "learning_rate": 4.798148686285627e-06, "loss": 2.3828, "step": 14439 }, { "epoch": 0.7746781115879828, "grad_norm": 0.443359375, "learning_rate": 4.798114485783042e-06, "loss": 2.3226, "step": 14440 }, { "epoch": 0.7747317596566523, "grad_norm": 0.66796875, "learning_rate": 4.798080282505245e-06, "loss": 1.2811, "step": 14441 }, { "epoch": 0.7747854077253219, "grad_norm": 0.48046875, "learning_rate": 4.798046076452277e-06, "loss": 2.4583, "step": 14442 }, { "epoch": 0.7748390557939914, "grad_norm": 0.5078125, "learning_rate": 4.7980118676241816e-06, "loss": 2.3757, "step": 14443 }, { "epoch": 0.774892703862661, "grad_norm": 0.451171875, "learning_rate": 4.797977656020997e-06, "loss": 2.1204, "step": 14444 }, { "epoch": 0.7749463519313304, "grad_norm": 0.4609375, "learning_rate": 4.7979434416427665e-06, "loss": 2.3446, "step": 14445 }, { "epoch": 0.775, "grad_norm": 0.51171875, "learning_rate": 4.797909224489531e-06, "loss": 2.1491, "step": 14446 }, { "epoch": 0.7750536480686695, "grad_norm": 0.40234375, "learning_rate": 4.797875004561331e-06, "loss": 2.0418, "step": 14447 }, { "epoch": 0.7751072961373391, "grad_norm": 0.431640625, "learning_rate": 4.797840781858209e-06, "loss": 2.1933, "step": 14448 }, { "epoch": 0.7751609442060086, "grad_norm": 0.478515625, "learning_rate": 4.7978065563802065e-06, "loss": 2.1989, "step": 14449 }, { "epoch": 0.7752145922746781, "grad_norm": 0.486328125, "learning_rate": 4.797772328127364e-06, "loss": 2.452, "step": 14450 }, { "epoch": 0.7752682403433476, "grad_norm": 0.466796875, "learning_rate": 4.797738097099722e-06, "loss": 2.5061, "step": 14451 }, { "epoch": 0.7753218884120172, "grad_norm": 0.421875, "learning_rate": 4.797703863297324e-06, "loss": 2.2111, "step": 14452 }, { "epoch": 0.7753755364806867, "grad_norm": 0.53125, "learning_rate": 4.79766962672021e-06, "loss": 2.5677, "step": 14453 }, { "epoch": 0.7754291845493563, "grad_norm": 0.4921875, "learning_rate": 4.797635387368421e-06, "loss": 2.4606, "step": 14454 }, { "epoch": 0.7754828326180258, "grad_norm": 0.427734375, "learning_rate": 4.797601145241999e-06, "loss": 2.3931, "step": 14455 }, { "epoch": 0.7755364806866952, "grad_norm": 0.44921875, "learning_rate": 4.797566900340986e-06, "loss": 2.2295, "step": 14456 }, { "epoch": 0.7755901287553648, "grad_norm": 0.443359375, "learning_rate": 4.797532652665422e-06, "loss": 2.3341, "step": 14457 }, { "epoch": 0.7756437768240343, "grad_norm": 0.4375, "learning_rate": 4.797498402215349e-06, "loss": 2.3742, "step": 14458 }, { "epoch": 0.7756974248927039, "grad_norm": 0.47265625, "learning_rate": 4.797464148990808e-06, "loss": 2.3541, "step": 14459 }, { "epoch": 0.7757510729613734, "grad_norm": 0.4609375, "learning_rate": 4.797429892991841e-06, "loss": 2.3256, "step": 14460 }, { "epoch": 0.7758047210300429, "grad_norm": 0.447265625, "learning_rate": 4.797395634218489e-06, "loss": 2.0445, "step": 14461 }, { "epoch": 0.7758583690987124, "grad_norm": 0.4921875, "learning_rate": 4.797361372670793e-06, "loss": 2.2795, "step": 14462 }, { "epoch": 0.775912017167382, "grad_norm": 0.435546875, "learning_rate": 4.797327108348796e-06, "loss": 2.4398, "step": 14463 }, { "epoch": 0.7759656652360515, "grad_norm": 0.52734375, "learning_rate": 4.797292841252537e-06, "loss": 2.4159, "step": 14464 }, { "epoch": 0.7760193133047211, "grad_norm": 0.486328125, "learning_rate": 4.79725857138206e-06, "loss": 2.3639, "step": 14465 }, { "epoch": 0.7760729613733905, "grad_norm": 0.6328125, "learning_rate": 4.7972242987374034e-06, "loss": 2.355, "step": 14466 }, { "epoch": 0.7761266094420601, "grad_norm": 0.5390625, "learning_rate": 4.797190023318611e-06, "loss": 2.3515, "step": 14467 }, { "epoch": 0.7761802575107296, "grad_norm": 0.46875, "learning_rate": 4.797155745125722e-06, "loss": 2.2748, "step": 14468 }, { "epoch": 0.7762339055793992, "grad_norm": 0.494140625, "learning_rate": 4.797121464158781e-06, "loss": 2.3, "step": 14469 }, { "epoch": 0.7762875536480687, "grad_norm": 0.5234375, "learning_rate": 4.797087180417827e-06, "loss": 2.4246, "step": 14470 }, { "epoch": 0.7763412017167381, "grad_norm": 0.40234375, "learning_rate": 4.797052893902901e-06, "loss": 2.644, "step": 14471 }, { "epoch": 0.7763948497854077, "grad_norm": 0.4296875, "learning_rate": 4.797018604614046e-06, "loss": 2.3444, "step": 14472 }, { "epoch": 0.7764484978540772, "grad_norm": 0.47265625, "learning_rate": 4.796984312551303e-06, "loss": 2.017, "step": 14473 }, { "epoch": 0.7765021459227468, "grad_norm": 0.4296875, "learning_rate": 4.796950017714712e-06, "loss": 2.2044, "step": 14474 }, { "epoch": 0.7765557939914163, "grad_norm": 0.4453125, "learning_rate": 4.7969157201043155e-06, "loss": 2.3363, "step": 14475 }, { "epoch": 0.7766094420600859, "grad_norm": 0.458984375, "learning_rate": 4.796881419720156e-06, "loss": 2.4403, "step": 14476 }, { "epoch": 0.7766630901287553, "grad_norm": 0.4140625, "learning_rate": 4.796847116562275e-06, "loss": 2.2081, "step": 14477 }, { "epoch": 0.7767167381974249, "grad_norm": 0.4921875, "learning_rate": 4.796812810630711e-06, "loss": 2.395, "step": 14478 }, { "epoch": 0.7767703862660944, "grad_norm": 0.439453125, "learning_rate": 4.796778501925507e-06, "loss": 2.3647, "step": 14479 }, { "epoch": 0.776824034334764, "grad_norm": 0.57421875, "learning_rate": 4.796744190446706e-06, "loss": 2.4063, "step": 14480 }, { "epoch": 0.7768776824034335, "grad_norm": 0.46484375, "learning_rate": 4.796709876194347e-06, "loss": 2.4133, "step": 14481 }, { "epoch": 0.776931330472103, "grad_norm": 0.435546875, "learning_rate": 4.796675559168473e-06, "loss": 2.1628, "step": 14482 }, { "epoch": 0.7769849785407725, "grad_norm": 0.427734375, "learning_rate": 4.796641239369125e-06, "loss": 2.2059, "step": 14483 }, { "epoch": 0.777038626609442, "grad_norm": 0.4453125, "learning_rate": 4.796606916796345e-06, "loss": 2.1382, "step": 14484 }, { "epoch": 0.7770922746781116, "grad_norm": 0.443359375, "learning_rate": 4.796572591450173e-06, "loss": 2.2868, "step": 14485 }, { "epoch": 0.7771459227467811, "grad_norm": 0.6640625, "learning_rate": 4.796538263330651e-06, "loss": 2.4208, "step": 14486 }, { "epoch": 0.7771995708154507, "grad_norm": 0.484375, "learning_rate": 4.796503932437821e-06, "loss": 2.1625, "step": 14487 }, { "epoch": 0.7772532188841201, "grad_norm": 0.56640625, "learning_rate": 4.7964695987717256e-06, "loss": 2.349, "step": 14488 }, { "epoch": 0.7773068669527897, "grad_norm": 0.48046875, "learning_rate": 4.796435262332404e-06, "loss": 1.8406, "step": 14489 }, { "epoch": 0.7773605150214592, "grad_norm": 0.466796875, "learning_rate": 4.796400923119898e-06, "loss": 2.4664, "step": 14490 }, { "epoch": 0.7774141630901288, "grad_norm": 0.44921875, "learning_rate": 4.796366581134251e-06, "loss": 2.5074, "step": 14491 }, { "epoch": 0.7774678111587983, "grad_norm": 0.5, "learning_rate": 4.796332236375501e-06, "loss": 2.298, "step": 14492 }, { "epoch": 0.7775214592274678, "grad_norm": 0.41796875, "learning_rate": 4.796297888843694e-06, "loss": 2.3252, "step": 14493 }, { "epoch": 0.7775751072961373, "grad_norm": 0.5703125, "learning_rate": 4.796263538538868e-06, "loss": 2.2563, "step": 14494 }, { "epoch": 0.7776287553648069, "grad_norm": 0.453125, "learning_rate": 4.796229185461066e-06, "loss": 2.0714, "step": 14495 }, { "epoch": 0.7776824034334764, "grad_norm": 0.447265625, "learning_rate": 4.796194829610328e-06, "loss": 2.2578, "step": 14496 }, { "epoch": 0.777736051502146, "grad_norm": 0.953125, "learning_rate": 4.796160470986697e-06, "loss": 2.392, "step": 14497 }, { "epoch": 0.7777896995708155, "grad_norm": 0.470703125, "learning_rate": 4.796126109590214e-06, "loss": 2.1855, "step": 14498 }, { "epoch": 0.7778433476394849, "grad_norm": 0.7109375, "learning_rate": 4.796091745420921e-06, "loss": 2.338, "step": 14499 }, { "epoch": 0.7778969957081545, "grad_norm": 0.55078125, "learning_rate": 4.796057378478858e-06, "loss": 2.4692, "step": 14500 }, { "epoch": 0.777950643776824, "grad_norm": 0.4921875, "learning_rate": 4.796023008764068e-06, "loss": 2.2266, "step": 14501 }, { "epoch": 0.7780042918454936, "grad_norm": 0.78125, "learning_rate": 4.795988636276592e-06, "loss": 2.2951, "step": 14502 }, { "epoch": 0.778057939914163, "grad_norm": 0.392578125, "learning_rate": 4.795954261016472e-06, "loss": 2.3415, "step": 14503 }, { "epoch": 0.7781115879828326, "grad_norm": 0.46875, "learning_rate": 4.795919882983748e-06, "loss": 2.3257, "step": 14504 }, { "epoch": 0.7781652360515021, "grad_norm": 0.412109375, "learning_rate": 4.795885502178463e-06, "loss": 2.1355, "step": 14505 }, { "epoch": 0.7782188841201717, "grad_norm": 0.5078125, "learning_rate": 4.795851118600658e-06, "loss": 2.0922, "step": 14506 }, { "epoch": 0.7782725321888412, "grad_norm": 0.421875, "learning_rate": 4.795816732250375e-06, "loss": 2.2482, "step": 14507 }, { "epoch": 0.7783261802575108, "grad_norm": 0.41796875, "learning_rate": 4.795782343127655e-06, "loss": 2.3502, "step": 14508 }, { "epoch": 0.7783798283261802, "grad_norm": 0.443359375, "learning_rate": 4.79574795123254e-06, "loss": 2.2572, "step": 14509 }, { "epoch": 0.7784334763948498, "grad_norm": 0.4296875, "learning_rate": 4.795713556565071e-06, "loss": 2.2804, "step": 14510 }, { "epoch": 0.7784871244635193, "grad_norm": 0.427734375, "learning_rate": 4.79567915912529e-06, "loss": 2.3782, "step": 14511 }, { "epoch": 0.7785407725321889, "grad_norm": 0.5, "learning_rate": 4.795644758913237e-06, "loss": 2.1153, "step": 14512 }, { "epoch": 0.7785944206008584, "grad_norm": 0.42578125, "learning_rate": 4.795610355928956e-06, "loss": 2.2412, "step": 14513 }, { "epoch": 0.7786480686695278, "grad_norm": 0.388671875, "learning_rate": 4.795575950172486e-06, "loss": 2.1538, "step": 14514 }, { "epoch": 0.7787017167381974, "grad_norm": 0.453125, "learning_rate": 4.795541541643871e-06, "loss": 2.5133, "step": 14515 }, { "epoch": 0.7787553648068669, "grad_norm": 0.421875, "learning_rate": 4.795507130343151e-06, "loss": 2.2768, "step": 14516 }, { "epoch": 0.7788090128755365, "grad_norm": 0.4609375, "learning_rate": 4.7954727162703685e-06, "loss": 2.257, "step": 14517 }, { "epoch": 0.778862660944206, "grad_norm": 0.44921875, "learning_rate": 4.795438299425564e-06, "loss": 2.1037, "step": 14518 }, { "epoch": 0.7789163090128756, "grad_norm": 0.462890625, "learning_rate": 4.79540387980878e-06, "loss": 2.1329, "step": 14519 }, { "epoch": 0.778969957081545, "grad_norm": 0.66015625, "learning_rate": 4.795369457420057e-06, "loss": 2.2606, "step": 14520 }, { "epoch": 0.7790236051502146, "grad_norm": 0.447265625, "learning_rate": 4.795335032259437e-06, "loss": 2.2949, "step": 14521 }, { "epoch": 0.7790772532188841, "grad_norm": 0.49609375, "learning_rate": 4.795300604326964e-06, "loss": 2.2625, "step": 14522 }, { "epoch": 0.7791309012875537, "grad_norm": 0.5234375, "learning_rate": 4.795266173622676e-06, "loss": 2.4439, "step": 14523 }, { "epoch": 0.7791845493562232, "grad_norm": 0.498046875, "learning_rate": 4.795231740146615e-06, "loss": 2.0711, "step": 14524 }, { "epoch": 0.7792381974248928, "grad_norm": 0.4140625, "learning_rate": 4.795197303898824e-06, "loss": 2.3683, "step": 14525 }, { "epoch": 0.7792918454935622, "grad_norm": 0.4140625, "learning_rate": 4.795162864879345e-06, "loss": 2.2347, "step": 14526 }, { "epoch": 0.7793454935622317, "grad_norm": 0.42578125, "learning_rate": 4.795128423088218e-06, "loss": 2.4972, "step": 14527 }, { "epoch": 0.7793991416309013, "grad_norm": 0.4296875, "learning_rate": 4.795093978525486e-06, "loss": 2.4036, "step": 14528 }, { "epoch": 0.7794527896995708, "grad_norm": 0.439453125, "learning_rate": 4.795059531191189e-06, "loss": 2.3455, "step": 14529 }, { "epoch": 0.7795064377682404, "grad_norm": 0.55859375, "learning_rate": 4.79502508108537e-06, "loss": 2.5054, "step": 14530 }, { "epoch": 0.7795600858369098, "grad_norm": 0.53125, "learning_rate": 4.79499062820807e-06, "loss": 2.5391, "step": 14531 }, { "epoch": 0.7796137339055794, "grad_norm": 0.54296875, "learning_rate": 4.794956172559331e-06, "loss": 2.1563, "step": 14532 }, { "epoch": 0.7796673819742489, "grad_norm": 0.4453125, "learning_rate": 4.794921714139194e-06, "loss": 2.3001, "step": 14533 }, { "epoch": 0.7797210300429185, "grad_norm": 0.443359375, "learning_rate": 4.794887252947701e-06, "loss": 2.3595, "step": 14534 }, { "epoch": 0.779774678111588, "grad_norm": 0.498046875, "learning_rate": 4.794852788984894e-06, "loss": 2.2578, "step": 14535 }, { "epoch": 0.7798283261802575, "grad_norm": 0.447265625, "learning_rate": 4.794818322250814e-06, "loss": 2.3912, "step": 14536 }, { "epoch": 0.779881974248927, "grad_norm": 0.58203125, "learning_rate": 4.794783852745503e-06, "loss": 2.4363, "step": 14537 }, { "epoch": 0.7799356223175966, "grad_norm": 0.45703125, "learning_rate": 4.794749380469001e-06, "loss": 2.0799, "step": 14538 }, { "epoch": 0.7799892703862661, "grad_norm": 0.93359375, "learning_rate": 4.7947149054213525e-06, "loss": 2.1512, "step": 14539 }, { "epoch": 0.7800429184549357, "grad_norm": 0.46875, "learning_rate": 4.794680427602597e-06, "loss": 2.3873, "step": 14540 }, { "epoch": 0.7800965665236052, "grad_norm": 0.4765625, "learning_rate": 4.794645947012777e-06, "loss": 2.3949, "step": 14541 }, { "epoch": 0.7801502145922746, "grad_norm": 0.58203125, "learning_rate": 4.7946114636519346e-06, "loss": 2.3206, "step": 14542 }, { "epoch": 0.7802038626609442, "grad_norm": 0.52734375, "learning_rate": 4.794576977520109e-06, "loss": 2.4687, "step": 14543 }, { "epoch": 0.7802575107296137, "grad_norm": 0.39453125, "learning_rate": 4.794542488617345e-06, "loss": 2.2009, "step": 14544 }, { "epoch": 0.7803111587982833, "grad_norm": 0.419921875, "learning_rate": 4.794507996943684e-06, "loss": 2.2605, "step": 14545 }, { "epoch": 0.7803648068669528, "grad_norm": 0.42578125, "learning_rate": 4.794473502499164e-06, "loss": 2.2441, "step": 14546 }, { "epoch": 0.7804184549356223, "grad_norm": 0.625, "learning_rate": 4.794439005283831e-06, "loss": 2.3667, "step": 14547 }, { "epoch": 0.7804721030042918, "grad_norm": 0.474609375, "learning_rate": 4.794404505297724e-06, "loss": 2.5692, "step": 14548 }, { "epoch": 0.7805257510729614, "grad_norm": 0.45703125, "learning_rate": 4.794370002540886e-06, "loss": 2.3148, "step": 14549 }, { "epoch": 0.7805793991416309, "grad_norm": 0.48046875, "learning_rate": 4.794335497013358e-06, "loss": 2.2902, "step": 14550 }, { "epoch": 0.7806330472103005, "grad_norm": 0.51171875, "learning_rate": 4.794300988715183e-06, "loss": 2.0916, "step": 14551 }, { "epoch": 0.7806866952789699, "grad_norm": 0.52734375, "learning_rate": 4.7942664776464e-06, "loss": 2.286, "step": 14552 }, { "epoch": 0.7807403433476395, "grad_norm": 0.44140625, "learning_rate": 4.794231963807052e-06, "loss": 2.0869, "step": 14553 }, { "epoch": 0.780793991416309, "grad_norm": 0.5625, "learning_rate": 4.794197447197182e-06, "loss": 2.4849, "step": 14554 }, { "epoch": 0.7808476394849786, "grad_norm": 0.5078125, "learning_rate": 4.79416292781683e-06, "loss": 2.5819, "step": 14555 }, { "epoch": 0.7809012875536481, "grad_norm": 0.478515625, "learning_rate": 4.794128405666039e-06, "loss": 2.5375, "step": 14556 }, { "epoch": 0.7809549356223175, "grad_norm": 0.42578125, "learning_rate": 4.794093880744849e-06, "loss": 2.306, "step": 14557 }, { "epoch": 0.7810085836909871, "grad_norm": 0.435546875, "learning_rate": 4.794059353053303e-06, "loss": 2.2116, "step": 14558 }, { "epoch": 0.7810622317596566, "grad_norm": 0.451171875, "learning_rate": 4.794024822591442e-06, "loss": 2.3188, "step": 14559 }, { "epoch": 0.7811158798283262, "grad_norm": 0.4609375, "learning_rate": 4.793990289359308e-06, "loss": 2.2114, "step": 14560 }, { "epoch": 0.7811695278969957, "grad_norm": 0.6328125, "learning_rate": 4.793955753356943e-06, "loss": 2.4891, "step": 14561 }, { "epoch": 0.7812231759656653, "grad_norm": 0.60546875, "learning_rate": 4.793921214584388e-06, "loss": 2.3644, "step": 14562 }, { "epoch": 0.7812768240343347, "grad_norm": 1.3828125, "learning_rate": 4.793886673041686e-06, "loss": 2.2976, "step": 14563 }, { "epoch": 0.7813304721030043, "grad_norm": 0.46875, "learning_rate": 4.793852128728878e-06, "loss": 2.2281, "step": 14564 }, { "epoch": 0.7813841201716738, "grad_norm": 0.48046875, "learning_rate": 4.793817581646004e-06, "loss": 2.4457, "step": 14565 }, { "epoch": 0.7814377682403434, "grad_norm": 0.412109375, "learning_rate": 4.793783031793109e-06, "loss": 1.7771, "step": 14566 }, { "epoch": 0.7814914163090129, "grad_norm": 0.494140625, "learning_rate": 4.793748479170232e-06, "loss": 2.0835, "step": 14567 }, { "epoch": 0.7815450643776825, "grad_norm": 0.47265625, "learning_rate": 4.793713923777416e-06, "loss": 2.2676, "step": 14568 }, { "epoch": 0.7815987124463519, "grad_norm": 0.6953125, "learning_rate": 4.793679365614703e-06, "loss": 2.0326, "step": 14569 }, { "epoch": 0.7816523605150214, "grad_norm": 0.50390625, "learning_rate": 4.793644804682134e-06, "loss": 2.3929, "step": 14570 }, { "epoch": 0.781706008583691, "grad_norm": 0.46875, "learning_rate": 4.79361024097975e-06, "loss": 2.4483, "step": 14571 }, { "epoch": 0.7817596566523605, "grad_norm": 0.419921875, "learning_rate": 4.7935756745075944e-06, "loss": 2.1311, "step": 14572 }, { "epoch": 0.7818133047210301, "grad_norm": 0.50390625, "learning_rate": 4.793541105265709e-06, "loss": 2.4549, "step": 14573 }, { "epoch": 0.7818669527896995, "grad_norm": 1.0625, "learning_rate": 4.793506533254133e-06, "loss": 2.1898, "step": 14574 }, { "epoch": 0.7819206008583691, "grad_norm": 0.431640625, "learning_rate": 4.79347195847291e-06, "loss": 2.317, "step": 14575 }, { "epoch": 0.7819742489270386, "grad_norm": 0.48828125, "learning_rate": 4.793437380922084e-06, "loss": 2.1841, "step": 14576 }, { "epoch": 0.7820278969957082, "grad_norm": 0.62109375, "learning_rate": 4.793402800601692e-06, "loss": 2.3028, "step": 14577 }, { "epoch": 0.7820815450643777, "grad_norm": 0.498046875, "learning_rate": 4.793368217511779e-06, "loss": 2.1613, "step": 14578 }, { "epoch": 0.7821351931330472, "grad_norm": 0.466796875, "learning_rate": 4.7933336316523865e-06, "loss": 2.232, "step": 14579 }, { "epoch": 0.7821888412017167, "grad_norm": 0.490234375, "learning_rate": 4.7932990430235555e-06, "loss": 2.3265, "step": 14580 }, { "epoch": 0.7822424892703863, "grad_norm": 0.37890625, "learning_rate": 4.793264451625328e-06, "loss": 2.2934, "step": 14581 }, { "epoch": 0.7822961373390558, "grad_norm": 0.484375, "learning_rate": 4.793229857457745e-06, "loss": 2.3285, "step": 14582 }, { "epoch": 0.7823497854077254, "grad_norm": 0.466796875, "learning_rate": 4.79319526052085e-06, "loss": 2.277, "step": 14583 }, { "epoch": 0.7824034334763948, "grad_norm": 0.41796875, "learning_rate": 4.793160660814683e-06, "loss": 2.3666, "step": 14584 }, { "epoch": 0.7824570815450643, "grad_norm": 0.458984375, "learning_rate": 4.793126058339287e-06, "loss": 2.1824, "step": 14585 }, { "epoch": 0.7825107296137339, "grad_norm": 0.46875, "learning_rate": 4.7930914530947035e-06, "loss": 2.3356, "step": 14586 }, { "epoch": 0.7825643776824034, "grad_norm": 0.45703125, "learning_rate": 4.793056845080974e-06, "loss": 2.3284, "step": 14587 }, { "epoch": 0.782618025751073, "grad_norm": 0.435546875, "learning_rate": 4.79302223429814e-06, "loss": 2.1021, "step": 14588 }, { "epoch": 0.7826716738197425, "grad_norm": 0.37890625, "learning_rate": 4.792987620746245e-06, "loss": 2.3638, "step": 14589 }, { "epoch": 0.782725321888412, "grad_norm": 0.5078125, "learning_rate": 4.792953004425328e-06, "loss": 2.0912, "step": 14590 }, { "epoch": 0.7827789699570815, "grad_norm": 0.400390625, "learning_rate": 4.792918385335433e-06, "loss": 2.0514, "step": 14591 }, { "epoch": 0.7828326180257511, "grad_norm": 0.609375, "learning_rate": 4.792883763476601e-06, "loss": 2.137, "step": 14592 }, { "epoch": 0.7828862660944206, "grad_norm": 1.3046875, "learning_rate": 4.7928491388488755e-06, "loss": 2.2562, "step": 14593 }, { "epoch": 0.7829399141630902, "grad_norm": 0.3671875, "learning_rate": 4.792814511452295e-06, "loss": 2.1781, "step": 14594 }, { "epoch": 0.7829935622317596, "grad_norm": 0.4140625, "learning_rate": 4.792779881286904e-06, "loss": 2.1157, "step": 14595 }, { "epoch": 0.7830472103004292, "grad_norm": 0.44921875, "learning_rate": 4.792745248352742e-06, "loss": 2.3413, "step": 14596 }, { "epoch": 0.7831008583690987, "grad_norm": 0.478515625, "learning_rate": 4.792710612649854e-06, "loss": 2.2012, "step": 14597 }, { "epoch": 0.7831545064377683, "grad_norm": 0.50390625, "learning_rate": 4.7926759741782794e-06, "loss": 2.3261, "step": 14598 }, { "epoch": 0.7832081545064378, "grad_norm": 0.45703125, "learning_rate": 4.79264133293806e-06, "loss": 2.2995, "step": 14599 }, { "epoch": 0.7832618025751072, "grad_norm": 0.435546875, "learning_rate": 4.79260668892924e-06, "loss": 2.2485, "step": 14600 }, { "epoch": 0.7833154506437768, "grad_norm": 0.400390625, "learning_rate": 4.792572042151858e-06, "loss": 2.1225, "step": 14601 }, { "epoch": 0.7833690987124463, "grad_norm": 0.478515625, "learning_rate": 4.7925373926059584e-06, "loss": 2.323, "step": 14602 }, { "epoch": 0.7834227467811159, "grad_norm": 0.416015625, "learning_rate": 4.792502740291581e-06, "loss": 2.1535, "step": 14603 }, { "epoch": 0.7834763948497854, "grad_norm": 0.5, "learning_rate": 4.79246808520877e-06, "loss": 2.1792, "step": 14604 }, { "epoch": 0.783530042918455, "grad_norm": 0.515625, "learning_rate": 4.792433427357565e-06, "loss": 2.2012, "step": 14605 }, { "epoch": 0.7835836909871244, "grad_norm": 0.5703125, "learning_rate": 4.792398766738009e-06, "loss": 2.5013, "step": 14606 }, { "epoch": 0.783637339055794, "grad_norm": 0.51953125, "learning_rate": 4.792364103350145e-06, "loss": 2.322, "step": 14607 }, { "epoch": 0.7836909871244635, "grad_norm": 0.474609375, "learning_rate": 4.792329437194011e-06, "loss": 2.3374, "step": 14608 }, { "epoch": 0.7837446351931331, "grad_norm": 0.423828125, "learning_rate": 4.792294768269653e-06, "loss": 2.5436, "step": 14609 }, { "epoch": 0.7837982832618026, "grad_norm": 0.484375, "learning_rate": 4.792260096577111e-06, "loss": 2.4556, "step": 14610 }, { "epoch": 0.7838519313304722, "grad_norm": 0.45703125, "learning_rate": 4.792225422116427e-06, "loss": 2.2135, "step": 14611 }, { "epoch": 0.7839055793991416, "grad_norm": 0.47265625, "learning_rate": 4.792190744887643e-06, "loss": 2.1305, "step": 14612 }, { "epoch": 0.7839592274678111, "grad_norm": 0.458984375, "learning_rate": 4.7921560648908e-06, "loss": 2.3914, "step": 14613 }, { "epoch": 0.7840128755364807, "grad_norm": 0.48046875, "learning_rate": 4.792121382125943e-06, "loss": 2.2441, "step": 14614 }, { "epoch": 0.7840665236051502, "grad_norm": 0.388671875, "learning_rate": 4.79208669659311e-06, "loss": 1.7687, "step": 14615 }, { "epoch": 0.7841201716738198, "grad_norm": 0.408203125, "learning_rate": 4.792052008292345e-06, "loss": 2.0941, "step": 14616 }, { "epoch": 0.7841738197424892, "grad_norm": 0.462890625, "learning_rate": 4.792017317223689e-06, "loss": 2.3881, "step": 14617 }, { "epoch": 0.7842274678111588, "grad_norm": 0.484375, "learning_rate": 4.791982623387185e-06, "loss": 2.3265, "step": 14618 }, { "epoch": 0.7842811158798283, "grad_norm": 0.470703125, "learning_rate": 4.791947926782873e-06, "loss": 2.22, "step": 14619 }, { "epoch": 0.7843347639484979, "grad_norm": 0.44140625, "learning_rate": 4.791913227410797e-06, "loss": 2.2368, "step": 14620 }, { "epoch": 0.7843884120171674, "grad_norm": 0.431640625, "learning_rate": 4.791878525270998e-06, "loss": 2.1366, "step": 14621 }, { "epoch": 0.784442060085837, "grad_norm": 0.466796875, "learning_rate": 4.791843820363517e-06, "loss": 2.2587, "step": 14622 }, { "epoch": 0.7844957081545064, "grad_norm": 0.52734375, "learning_rate": 4.791809112688398e-06, "loss": 2.4076, "step": 14623 }, { "epoch": 0.784549356223176, "grad_norm": 0.53515625, "learning_rate": 4.791774402245681e-06, "loss": 2.3506, "step": 14624 }, { "epoch": 0.7846030042918455, "grad_norm": 0.59765625, "learning_rate": 4.791739689035408e-06, "loss": 2.2251, "step": 14625 }, { "epoch": 0.7846566523605151, "grad_norm": 0.53515625, "learning_rate": 4.791704973057623e-06, "loss": 2.4132, "step": 14626 }, { "epoch": 0.7847103004291845, "grad_norm": 0.44921875, "learning_rate": 4.791670254312366e-06, "loss": 2.2336, "step": 14627 }, { "epoch": 0.784763948497854, "grad_norm": 0.80078125, "learning_rate": 4.791635532799679e-06, "loss": 2.411, "step": 14628 }, { "epoch": 0.7848175965665236, "grad_norm": 0.412109375, "learning_rate": 4.791600808519605e-06, "loss": 2.3299, "step": 14629 }, { "epoch": 0.7848712446351931, "grad_norm": 0.490234375, "learning_rate": 4.791566081472185e-06, "loss": 2.2813, "step": 14630 }, { "epoch": 0.7849248927038627, "grad_norm": 0.470703125, "learning_rate": 4.7915313516574616e-06, "loss": 2.1457, "step": 14631 }, { "epoch": 0.7849785407725322, "grad_norm": 0.41796875, "learning_rate": 4.791496619075475e-06, "loss": 2.1753, "step": 14632 }, { "epoch": 0.7850321888412017, "grad_norm": 0.91015625, "learning_rate": 4.79146188372627e-06, "loss": 2.5532, "step": 14633 }, { "epoch": 0.7850858369098712, "grad_norm": 0.49609375, "learning_rate": 4.791427145609886e-06, "loss": 2.0606, "step": 14634 }, { "epoch": 0.7851394849785408, "grad_norm": 0.439453125, "learning_rate": 4.791392404726367e-06, "loss": 2.3166, "step": 14635 }, { "epoch": 0.7851931330472103, "grad_norm": 0.484375, "learning_rate": 4.791357661075753e-06, "loss": 2.2268, "step": 14636 }, { "epoch": 0.7852467811158799, "grad_norm": 0.4296875, "learning_rate": 4.791322914658088e-06, "loss": 2.314, "step": 14637 }, { "epoch": 0.7853004291845493, "grad_norm": 0.447265625, "learning_rate": 4.791288165473411e-06, "loss": 2.0048, "step": 14638 }, { "epoch": 0.7853540772532189, "grad_norm": 0.41015625, "learning_rate": 4.791253413521768e-06, "loss": 2.1938, "step": 14639 }, { "epoch": 0.7854077253218884, "grad_norm": 0.474609375, "learning_rate": 4.791218658803197e-06, "loss": 2.2015, "step": 14640 }, { "epoch": 0.785461373390558, "grad_norm": 0.51953125, "learning_rate": 4.791183901317743e-06, "loss": 2.2636, "step": 14641 }, { "epoch": 0.7855150214592275, "grad_norm": 0.466796875, "learning_rate": 4.791149141065447e-06, "loss": 2.4919, "step": 14642 }, { "epoch": 0.785568669527897, "grad_norm": 0.69140625, "learning_rate": 4.791114378046349e-06, "loss": 2.2087, "step": 14643 }, { "epoch": 0.7856223175965665, "grad_norm": 0.5703125, "learning_rate": 4.791079612260494e-06, "loss": 2.1224, "step": 14644 }, { "epoch": 0.785675965665236, "grad_norm": 0.5546875, "learning_rate": 4.791044843707922e-06, "loss": 2.0675, "step": 14645 }, { "epoch": 0.7857296137339056, "grad_norm": 0.484375, "learning_rate": 4.791010072388676e-06, "loss": 2.1578, "step": 14646 }, { "epoch": 0.7857832618025751, "grad_norm": 0.474609375, "learning_rate": 4.790975298302798e-06, "loss": 2.1783, "step": 14647 }, { "epoch": 0.7858369098712447, "grad_norm": 0.99609375, "learning_rate": 4.790940521450329e-06, "loss": 2.3515, "step": 14648 }, { "epoch": 0.7858905579399141, "grad_norm": 0.50390625, "learning_rate": 4.790905741831312e-06, "loss": 2.3275, "step": 14649 }, { "epoch": 0.7859442060085837, "grad_norm": 0.482421875, "learning_rate": 4.790870959445788e-06, "loss": 2.1106, "step": 14650 }, { "epoch": 0.7859978540772532, "grad_norm": 0.474609375, "learning_rate": 4.7908361742938e-06, "loss": 2.3451, "step": 14651 }, { "epoch": 0.7860515021459228, "grad_norm": 0.451171875, "learning_rate": 4.79080138637539e-06, "loss": 2.1077, "step": 14652 }, { "epoch": 0.7861051502145923, "grad_norm": 0.451171875, "learning_rate": 4.7907665956906e-06, "loss": 2.2702, "step": 14653 }, { "epoch": 0.7861587982832619, "grad_norm": 0.435546875, "learning_rate": 4.790731802239471e-06, "loss": 2.1435, "step": 14654 }, { "epoch": 0.7862124463519313, "grad_norm": 0.4296875, "learning_rate": 4.790697006022046e-06, "loss": 2.2483, "step": 14655 }, { "epoch": 0.7862660944206008, "grad_norm": 0.4453125, "learning_rate": 4.790662207038367e-06, "loss": 2.3219, "step": 14656 }, { "epoch": 0.7863197424892704, "grad_norm": 0.51953125, "learning_rate": 4.7906274052884746e-06, "loss": 1.9913, "step": 14657 }, { "epoch": 0.7863733905579399, "grad_norm": 0.7578125, "learning_rate": 4.790592600772413e-06, "loss": 2.4476, "step": 14658 }, { "epoch": 0.7864270386266095, "grad_norm": 0.46484375, "learning_rate": 4.790557793490223e-06, "loss": 2.0521, "step": 14659 }, { "epoch": 0.7864806866952789, "grad_norm": 0.5625, "learning_rate": 4.790522983441946e-06, "loss": 2.195, "step": 14660 }, { "epoch": 0.7865343347639485, "grad_norm": 0.494140625, "learning_rate": 4.790488170627626e-06, "loss": 2.1466, "step": 14661 }, { "epoch": 0.786587982832618, "grad_norm": 0.392578125, "learning_rate": 4.790453355047304e-06, "loss": 1.8616, "step": 14662 }, { "epoch": 0.7866416309012876, "grad_norm": 0.4921875, "learning_rate": 4.790418536701022e-06, "loss": 2.3934, "step": 14663 }, { "epoch": 0.7866952789699571, "grad_norm": 0.376953125, "learning_rate": 4.79038371558882e-06, "loss": 2.0744, "step": 14664 }, { "epoch": 0.7867489270386266, "grad_norm": 0.53515625, "learning_rate": 4.790348891710745e-06, "loss": 2.187, "step": 14665 }, { "epoch": 0.7868025751072961, "grad_norm": 0.53125, "learning_rate": 4.790314065066834e-06, "loss": 2.5473, "step": 14666 }, { "epoch": 0.7868562231759657, "grad_norm": 0.435546875, "learning_rate": 4.790279235657131e-06, "loss": 2.2804, "step": 14667 }, { "epoch": 0.7869098712446352, "grad_norm": 0.5078125, "learning_rate": 4.790244403481679e-06, "loss": 2.4452, "step": 14668 }, { "epoch": 0.7869635193133048, "grad_norm": 0.46875, "learning_rate": 4.790209568540519e-06, "loss": 2.3546, "step": 14669 }, { "epoch": 0.7870171673819742, "grad_norm": 1.078125, "learning_rate": 4.790174730833694e-06, "loss": 2.1197, "step": 14670 }, { "epoch": 0.7870708154506437, "grad_norm": 0.39453125, "learning_rate": 4.790139890361245e-06, "loss": 2.1185, "step": 14671 }, { "epoch": 0.7871244635193133, "grad_norm": 0.470703125, "learning_rate": 4.790105047123214e-06, "loss": 2.341, "step": 14672 }, { "epoch": 0.7871781115879828, "grad_norm": 0.4375, "learning_rate": 4.790070201119644e-06, "loss": 2.2212, "step": 14673 }, { "epoch": 0.7872317596566524, "grad_norm": 0.41015625, "learning_rate": 4.790035352350577e-06, "loss": 2.24, "step": 14674 }, { "epoch": 0.7872854077253219, "grad_norm": 0.4765625, "learning_rate": 4.790000500816054e-06, "loss": 1.608, "step": 14675 }, { "epoch": 0.7873390557939914, "grad_norm": 0.4453125, "learning_rate": 4.789965646516118e-06, "loss": 2.186, "step": 14676 }, { "epoch": 0.7873927038626609, "grad_norm": 0.51171875, "learning_rate": 4.7899307894508115e-06, "loss": 2.2727, "step": 14677 }, { "epoch": 0.7874463519313305, "grad_norm": 0.52734375, "learning_rate": 4.789895929620175e-06, "loss": 2.6985, "step": 14678 }, { "epoch": 0.7875, "grad_norm": 0.482421875, "learning_rate": 4.789861067024253e-06, "loss": 2.4237, "step": 14679 }, { "epoch": 0.7875536480686696, "grad_norm": 0.40625, "learning_rate": 4.7898262016630844e-06, "loss": 2.1956, "step": 14680 }, { "epoch": 0.787607296137339, "grad_norm": 0.484375, "learning_rate": 4.789791333536715e-06, "loss": 2.3287, "step": 14681 }, { "epoch": 0.7876609442060086, "grad_norm": 0.47265625, "learning_rate": 4.789756462645183e-06, "loss": 2.2545, "step": 14682 }, { "epoch": 0.7877145922746781, "grad_norm": 0.4765625, "learning_rate": 4.789721588988534e-06, "loss": 2.2114, "step": 14683 }, { "epoch": 0.7877682403433477, "grad_norm": 0.37890625, "learning_rate": 4.789686712566809e-06, "loss": 2.1673, "step": 14684 }, { "epoch": 0.7878218884120172, "grad_norm": 0.45703125, "learning_rate": 4.789651833380049e-06, "loss": 2.5978, "step": 14685 }, { "epoch": 0.7878755364806866, "grad_norm": 0.435546875, "learning_rate": 4.789616951428296e-06, "loss": 2.0426, "step": 14686 }, { "epoch": 0.7879291845493562, "grad_norm": 0.46875, "learning_rate": 4.789582066711595e-06, "loss": 2.4358, "step": 14687 }, { "epoch": 0.7879828326180257, "grad_norm": 0.451171875, "learning_rate": 4.7895471792299845e-06, "loss": 2.2802, "step": 14688 }, { "epoch": 0.7880364806866953, "grad_norm": 0.427734375, "learning_rate": 4.789512288983508e-06, "loss": 2.2262, "step": 14689 }, { "epoch": 0.7880901287553648, "grad_norm": 0.62109375, "learning_rate": 4.789477395972208e-06, "loss": 2.1964, "step": 14690 }, { "epoch": 0.7881437768240344, "grad_norm": 0.71484375, "learning_rate": 4.789442500196128e-06, "loss": 2.3911, "step": 14691 }, { "epoch": 0.7881974248927038, "grad_norm": 0.49609375, "learning_rate": 4.789407601655307e-06, "loss": 2.2212, "step": 14692 }, { "epoch": 0.7882510729613734, "grad_norm": 0.51171875, "learning_rate": 4.789372700349791e-06, "loss": 2.3366, "step": 14693 }, { "epoch": 0.7883047210300429, "grad_norm": 0.36328125, "learning_rate": 4.789337796279617e-06, "loss": 2.1454, "step": 14694 }, { "epoch": 0.7883583690987125, "grad_norm": 0.53515625, "learning_rate": 4.789302889444833e-06, "loss": 2.3455, "step": 14695 }, { "epoch": 0.788412017167382, "grad_norm": 0.458984375, "learning_rate": 4.789267979845476e-06, "loss": 2.3686, "step": 14696 }, { "epoch": 0.7884656652360515, "grad_norm": 0.498046875, "learning_rate": 4.789233067481591e-06, "loss": 2.0671, "step": 14697 }, { "epoch": 0.788519313304721, "grad_norm": 0.474609375, "learning_rate": 4.78919815235322e-06, "loss": 2.4455, "step": 14698 }, { "epoch": 0.7885729613733906, "grad_norm": 0.47265625, "learning_rate": 4.789163234460405e-06, "loss": 2.4577, "step": 14699 }, { "epoch": 0.7886266094420601, "grad_norm": 0.482421875, "learning_rate": 4.789128313803187e-06, "loss": 1.8383, "step": 14700 }, { "epoch": 0.7886802575107296, "grad_norm": 0.46484375, "learning_rate": 4.789093390381609e-06, "loss": 2.0105, "step": 14701 }, { "epoch": 0.7887339055793992, "grad_norm": 0.49609375, "learning_rate": 4.7890584641957134e-06, "loss": 2.3868, "step": 14702 }, { "epoch": 0.7887875536480686, "grad_norm": 0.400390625, "learning_rate": 4.7890235352455436e-06, "loss": 2.4835, "step": 14703 }, { "epoch": 0.7888412017167382, "grad_norm": 1.40625, "learning_rate": 4.788988603531139e-06, "loss": 2.1504, "step": 14704 }, { "epoch": 0.7888948497854077, "grad_norm": 0.482421875, "learning_rate": 4.788953669052544e-06, "loss": 2.3959, "step": 14705 }, { "epoch": 0.7889484978540773, "grad_norm": 0.447265625, "learning_rate": 4.788918731809799e-06, "loss": 2.056, "step": 14706 }, { "epoch": 0.7890021459227468, "grad_norm": 0.423828125, "learning_rate": 4.788883791802948e-06, "loss": 2.236, "step": 14707 }, { "epoch": 0.7890557939914163, "grad_norm": 0.46484375, "learning_rate": 4.788848849032032e-06, "loss": 2.2771, "step": 14708 }, { "epoch": 0.7891094420600858, "grad_norm": 0.435546875, "learning_rate": 4.788813903497094e-06, "loss": 2.5129, "step": 14709 }, { "epoch": 0.7891630901287554, "grad_norm": 0.4765625, "learning_rate": 4.788778955198174e-06, "loss": 2.377, "step": 14710 }, { "epoch": 0.7892167381974249, "grad_norm": 0.40234375, "learning_rate": 4.788744004135318e-06, "loss": 2.1584, "step": 14711 }, { "epoch": 0.7892703862660945, "grad_norm": 0.4765625, "learning_rate": 4.788709050308566e-06, "loss": 2.1859, "step": 14712 }, { "epoch": 0.789324034334764, "grad_norm": 0.61328125, "learning_rate": 4.788674093717959e-06, "loss": 2.2791, "step": 14713 }, { "epoch": 0.7893776824034334, "grad_norm": 0.609375, "learning_rate": 4.7886391343635415e-06, "loss": 2.0221, "step": 14714 }, { "epoch": 0.789431330472103, "grad_norm": 0.53515625, "learning_rate": 4.788604172245355e-06, "loss": 2.3941, "step": 14715 }, { "epoch": 0.7894849785407725, "grad_norm": 0.55078125, "learning_rate": 4.7885692073634415e-06, "loss": 2.2268, "step": 14716 }, { "epoch": 0.7895386266094421, "grad_norm": 0.515625, "learning_rate": 4.788534239717843e-06, "loss": 2.2491, "step": 14717 }, { "epoch": 0.7895922746781115, "grad_norm": 0.48828125, "learning_rate": 4.788499269308602e-06, "loss": 2.0192, "step": 14718 }, { "epoch": 0.7896459227467811, "grad_norm": 0.498046875, "learning_rate": 4.78846429613576e-06, "loss": 2.3247, "step": 14719 }, { "epoch": 0.7896995708154506, "grad_norm": 0.458984375, "learning_rate": 4.788429320199361e-06, "loss": 2.3875, "step": 14720 }, { "epoch": 0.7897532188841202, "grad_norm": 0.5234375, "learning_rate": 4.788394341499446e-06, "loss": 2.3601, "step": 14721 }, { "epoch": 0.7898068669527897, "grad_norm": 0.57421875, "learning_rate": 4.788359360036057e-06, "loss": 1.6083, "step": 14722 }, { "epoch": 0.7898605150214593, "grad_norm": 0.470703125, "learning_rate": 4.788324375809237e-06, "loss": 2.3425, "step": 14723 }, { "epoch": 0.7899141630901287, "grad_norm": 0.4765625, "learning_rate": 4.788289388819027e-06, "loss": 2.1538, "step": 14724 }, { "epoch": 0.7899678111587983, "grad_norm": 0.41796875, "learning_rate": 4.788254399065472e-06, "loss": 2.2972, "step": 14725 }, { "epoch": 0.7900214592274678, "grad_norm": 0.427734375, "learning_rate": 4.788219406548611e-06, "loss": 2.2272, "step": 14726 }, { "epoch": 0.7900751072961374, "grad_norm": 0.5859375, "learning_rate": 4.788184411268488e-06, "loss": 2.2821, "step": 14727 }, { "epoch": 0.7901287553648069, "grad_norm": 0.357421875, "learning_rate": 4.788149413225145e-06, "loss": 2.1045, "step": 14728 }, { "epoch": 0.7901824034334763, "grad_norm": 0.435546875, "learning_rate": 4.788114412418624e-06, "loss": 1.955, "step": 14729 }, { "epoch": 0.7902360515021459, "grad_norm": 0.478515625, "learning_rate": 4.788079408848968e-06, "loss": 2.2621, "step": 14730 }, { "epoch": 0.7902896995708154, "grad_norm": 0.3984375, "learning_rate": 4.788044402516218e-06, "loss": 2.2204, "step": 14731 }, { "epoch": 0.790343347639485, "grad_norm": 0.50390625, "learning_rate": 4.788009393420417e-06, "loss": 2.2484, "step": 14732 }, { "epoch": 0.7903969957081545, "grad_norm": 0.578125, "learning_rate": 4.787974381561607e-06, "loss": 2.5005, "step": 14733 }, { "epoch": 0.7904506437768241, "grad_norm": 0.48828125, "learning_rate": 4.787939366939832e-06, "loss": 2.2152, "step": 14734 }, { "epoch": 0.7905042918454935, "grad_norm": 0.37109375, "learning_rate": 4.787904349555133e-06, "loss": 1.9078, "step": 14735 }, { "epoch": 0.7905579399141631, "grad_norm": 0.408203125, "learning_rate": 4.78786932940755e-06, "loss": 2.1781, "step": 14736 }, { "epoch": 0.7906115879828326, "grad_norm": 0.42578125, "learning_rate": 4.7878343064971286e-06, "loss": 2.2193, "step": 14737 }, { "epoch": 0.7906652360515022, "grad_norm": 0.43359375, "learning_rate": 4.787799280823911e-06, "loss": 2.3697, "step": 14738 }, { "epoch": 0.7907188841201717, "grad_norm": 0.55859375, "learning_rate": 4.7877642523879365e-06, "loss": 2.3706, "step": 14739 }, { "epoch": 0.7907725321888412, "grad_norm": 0.453125, "learning_rate": 4.78772922118925e-06, "loss": 2.2196, "step": 14740 }, { "epoch": 0.7908261802575107, "grad_norm": 0.423828125, "learning_rate": 4.7876941872278935e-06, "loss": 2.1778, "step": 14741 }, { "epoch": 0.7908798283261803, "grad_norm": 0.412109375, "learning_rate": 4.7876591505039085e-06, "loss": 2.2161, "step": 14742 }, { "epoch": 0.7909334763948498, "grad_norm": 0.52734375, "learning_rate": 4.787624111017338e-06, "loss": 2.3273, "step": 14743 }, { "epoch": 0.7909871244635193, "grad_norm": 0.484375, "learning_rate": 4.7875890687682244e-06, "loss": 2.2548, "step": 14744 }, { "epoch": 0.7910407725321889, "grad_norm": 0.50390625, "learning_rate": 4.787554023756609e-06, "loss": 2.3058, "step": 14745 }, { "epoch": 0.7910944206008583, "grad_norm": 0.49609375, "learning_rate": 4.787518975982535e-06, "loss": 2.3265, "step": 14746 }, { "epoch": 0.7911480686695279, "grad_norm": 0.4921875, "learning_rate": 4.7874839254460445e-06, "loss": 2.244, "step": 14747 }, { "epoch": 0.7912017167381974, "grad_norm": 0.447265625, "learning_rate": 4.78744887214718e-06, "loss": 2.2606, "step": 14748 }, { "epoch": 0.791255364806867, "grad_norm": 0.447265625, "learning_rate": 4.787413816085983e-06, "loss": 2.3252, "step": 14749 }, { "epoch": 0.7913090128755365, "grad_norm": 0.73828125, "learning_rate": 4.787378757262497e-06, "loss": 2.3452, "step": 14750 }, { "epoch": 0.791362660944206, "grad_norm": 0.44140625, "learning_rate": 4.787343695676765e-06, "loss": 2.4092, "step": 14751 }, { "epoch": 0.7914163090128755, "grad_norm": 0.44921875, "learning_rate": 4.787308631328826e-06, "loss": 2.4481, "step": 14752 }, { "epoch": 0.7914699570815451, "grad_norm": 0.8203125, "learning_rate": 4.787273564218726e-06, "loss": 2.2452, "step": 14753 }, { "epoch": 0.7915236051502146, "grad_norm": 0.45703125, "learning_rate": 4.787238494346506e-06, "loss": 2.2263, "step": 14754 }, { "epoch": 0.7915772532188842, "grad_norm": 0.45703125, "learning_rate": 4.7872034217122074e-06, "loss": 2.3889, "step": 14755 }, { "epoch": 0.7916309012875536, "grad_norm": 0.4921875, "learning_rate": 4.787168346315874e-06, "loss": 2.2941, "step": 14756 }, { "epoch": 0.7916845493562231, "grad_norm": 0.39453125, "learning_rate": 4.787133268157547e-06, "loss": 2.3878, "step": 14757 }, { "epoch": 0.7917381974248927, "grad_norm": 0.474609375, "learning_rate": 4.787098187237269e-06, "loss": 2.181, "step": 14758 }, { "epoch": 0.7917918454935622, "grad_norm": 0.38671875, "learning_rate": 4.787063103555083e-06, "loss": 1.8449, "step": 14759 }, { "epoch": 0.7918454935622318, "grad_norm": 0.5703125, "learning_rate": 4.787028017111032e-06, "loss": 1.8323, "step": 14760 }, { "epoch": 0.7918991416309012, "grad_norm": 0.388671875, "learning_rate": 4.786992927905156e-06, "loss": 2.1854, "step": 14761 }, { "epoch": 0.7919527896995708, "grad_norm": 0.486328125, "learning_rate": 4.786957835937499e-06, "loss": 2.2068, "step": 14762 }, { "epoch": 0.7920064377682403, "grad_norm": 1.234375, "learning_rate": 4.786922741208103e-06, "loss": 2.2273, "step": 14763 }, { "epoch": 0.7920600858369099, "grad_norm": 0.423828125, "learning_rate": 4.786887643717012e-06, "loss": 2.2915, "step": 14764 }, { "epoch": 0.7921137339055794, "grad_norm": 0.4921875, "learning_rate": 4.786852543464266e-06, "loss": 2.2017, "step": 14765 }, { "epoch": 0.792167381974249, "grad_norm": 0.5234375, "learning_rate": 4.786817440449908e-06, "loss": 2.2464, "step": 14766 }, { "epoch": 0.7922210300429184, "grad_norm": 0.431640625, "learning_rate": 4.786782334673981e-06, "loss": 2.3244, "step": 14767 }, { "epoch": 0.792274678111588, "grad_norm": 0.6015625, "learning_rate": 4.786747226136527e-06, "loss": 1.4077, "step": 14768 }, { "epoch": 0.7923283261802575, "grad_norm": 0.5078125, "learning_rate": 4.7867121148375885e-06, "loss": 2.1471, "step": 14769 }, { "epoch": 0.7923819742489271, "grad_norm": 0.431640625, "learning_rate": 4.786677000777208e-06, "loss": 2.1905, "step": 14770 }, { "epoch": 0.7924356223175966, "grad_norm": 0.51171875, "learning_rate": 4.786641883955427e-06, "loss": 2.3105, "step": 14771 }, { "epoch": 0.792489270386266, "grad_norm": 0.515625, "learning_rate": 4.78660676437229e-06, "loss": 2.1614, "step": 14772 }, { "epoch": 0.7925429184549356, "grad_norm": 0.4453125, "learning_rate": 4.786571642027837e-06, "loss": 2.3878, "step": 14773 }, { "epoch": 0.7925965665236051, "grad_norm": 0.466796875, "learning_rate": 4.786536516922112e-06, "loss": 2.2979, "step": 14774 }, { "epoch": 0.7926502145922747, "grad_norm": 0.458984375, "learning_rate": 4.786501389055157e-06, "loss": 2.2327, "step": 14775 }, { "epoch": 0.7927038626609442, "grad_norm": 2.640625, "learning_rate": 4.786466258427014e-06, "loss": 2.4863, "step": 14776 }, { "epoch": 0.7927575107296138, "grad_norm": 0.380859375, "learning_rate": 4.786431125037726e-06, "loss": 1.7651, "step": 14777 }, { "epoch": 0.7928111587982832, "grad_norm": 0.8046875, "learning_rate": 4.7863959888873355e-06, "loss": 2.45, "step": 14778 }, { "epoch": 0.7928648068669528, "grad_norm": 0.66015625, "learning_rate": 4.786360849975884e-06, "loss": 2.5973, "step": 14779 }, { "epoch": 0.7929184549356223, "grad_norm": 0.5859375, "learning_rate": 4.7863257083034156e-06, "loss": 2.4694, "step": 14780 }, { "epoch": 0.7929721030042919, "grad_norm": 0.6328125, "learning_rate": 4.786290563869971e-06, "loss": 1.6103, "step": 14781 }, { "epoch": 0.7930257510729614, "grad_norm": 0.470703125, "learning_rate": 4.786255416675593e-06, "loss": 2.4431, "step": 14782 }, { "epoch": 0.793079399141631, "grad_norm": 0.443359375, "learning_rate": 4.786220266720325e-06, "loss": 2.3214, "step": 14783 }, { "epoch": 0.7931330472103004, "grad_norm": 0.50390625, "learning_rate": 4.786185114004208e-06, "loss": 2.2548, "step": 14784 }, { "epoch": 0.79318669527897, "grad_norm": 0.427734375, "learning_rate": 4.7861499585272864e-06, "loss": 2.3006, "step": 14785 }, { "epoch": 0.7932403433476395, "grad_norm": 0.4921875, "learning_rate": 4.786114800289601e-06, "loss": 2.2927, "step": 14786 }, { "epoch": 0.793293991416309, "grad_norm": 0.4375, "learning_rate": 4.786079639291195e-06, "loss": 2.0355, "step": 14787 }, { "epoch": 0.7933476394849786, "grad_norm": 0.486328125, "learning_rate": 4.78604447553211e-06, "loss": 2.214, "step": 14788 }, { "epoch": 0.793401287553648, "grad_norm": 0.4921875, "learning_rate": 4.78600930901239e-06, "loss": 2.5031, "step": 14789 }, { "epoch": 0.7934549356223176, "grad_norm": 0.54296875, "learning_rate": 4.785974139732076e-06, "loss": 2.5963, "step": 14790 }, { "epoch": 0.7935085836909871, "grad_norm": 0.51171875, "learning_rate": 4.785938967691212e-06, "loss": 2.0034, "step": 14791 }, { "epoch": 0.7935622317596567, "grad_norm": 0.51953125, "learning_rate": 4.785903792889838e-06, "loss": 2.2829, "step": 14792 }, { "epoch": 0.7936158798283262, "grad_norm": 0.5625, "learning_rate": 4.785868615327999e-06, "loss": 1.7131, "step": 14793 }, { "epoch": 0.7936695278969957, "grad_norm": 0.427734375, "learning_rate": 4.785833435005736e-06, "loss": 2.2258, "step": 14794 }, { "epoch": 0.7937231759656652, "grad_norm": 0.462890625, "learning_rate": 4.785798251923093e-06, "loss": 2.2309, "step": 14795 }, { "epoch": 0.7937768240343348, "grad_norm": 0.46484375, "learning_rate": 4.78576306608011e-06, "loss": 2.3664, "step": 14796 }, { "epoch": 0.7938304721030043, "grad_norm": 0.56640625, "learning_rate": 4.785727877476832e-06, "loss": 2.1314, "step": 14797 }, { "epoch": 0.7938841201716739, "grad_norm": 0.416015625, "learning_rate": 4.7856926861133005e-06, "loss": 2.4253, "step": 14798 }, { "epoch": 0.7939377682403433, "grad_norm": 0.435546875, "learning_rate": 4.785657491989557e-06, "loss": 1.9826, "step": 14799 }, { "epoch": 0.7939914163090128, "grad_norm": 0.47265625, "learning_rate": 4.785622295105646e-06, "loss": 2.3667, "step": 14800 }, { "epoch": 0.7940450643776824, "grad_norm": 0.5859375, "learning_rate": 4.785587095461608e-06, "loss": 2.3378, "step": 14801 }, { "epoch": 0.7940987124463519, "grad_norm": 0.70703125, "learning_rate": 4.7855518930574875e-06, "loss": 2.6766, "step": 14802 }, { "epoch": 0.7941523605150215, "grad_norm": 0.453125, "learning_rate": 4.785516687893326e-06, "loss": 1.6674, "step": 14803 }, { "epoch": 0.794206008583691, "grad_norm": 0.49609375, "learning_rate": 4.785481479969165e-06, "loss": 1.9075, "step": 14804 }, { "epoch": 0.7942596566523605, "grad_norm": 0.455078125, "learning_rate": 4.785446269285048e-06, "loss": 2.3536, "step": 14805 }, { "epoch": 0.79431330472103, "grad_norm": 0.478515625, "learning_rate": 4.785411055841018e-06, "loss": 2.2294, "step": 14806 }, { "epoch": 0.7943669527896996, "grad_norm": 0.51953125, "learning_rate": 4.785375839637117e-06, "loss": 2.2384, "step": 14807 }, { "epoch": 0.7944206008583691, "grad_norm": 0.43359375, "learning_rate": 4.785340620673388e-06, "loss": 2.3173, "step": 14808 }, { "epoch": 0.7944742489270387, "grad_norm": 0.54296875, "learning_rate": 4.785305398949872e-06, "loss": 2.2471, "step": 14809 }, { "epoch": 0.7945278969957081, "grad_norm": 1.3046875, "learning_rate": 4.785270174466614e-06, "loss": 2.2271, "step": 14810 }, { "epoch": 0.7945815450643777, "grad_norm": 0.365234375, "learning_rate": 4.785234947223654e-06, "loss": 2.3307, "step": 14811 }, { "epoch": 0.7946351931330472, "grad_norm": 0.435546875, "learning_rate": 4.785199717221036e-06, "loss": 2.2419, "step": 14812 }, { "epoch": 0.7946888412017168, "grad_norm": 0.5, "learning_rate": 4.785164484458803e-06, "loss": 2.4145, "step": 14813 }, { "epoch": 0.7947424892703863, "grad_norm": 0.47265625, "learning_rate": 4.785129248936996e-06, "loss": 2.3186, "step": 14814 }, { "epoch": 0.7947961373390557, "grad_norm": 0.5, "learning_rate": 4.785094010655659e-06, "loss": 2.2978, "step": 14815 }, { "epoch": 0.7948497854077253, "grad_norm": 0.62890625, "learning_rate": 4.785058769614833e-06, "loss": 2.6446, "step": 14816 }, { "epoch": 0.7949034334763948, "grad_norm": 0.4921875, "learning_rate": 4.785023525814562e-06, "loss": 2.2522, "step": 14817 }, { "epoch": 0.7949570815450644, "grad_norm": 0.6796875, "learning_rate": 4.784988279254889e-06, "loss": 2.3923, "step": 14818 }, { "epoch": 0.7950107296137339, "grad_norm": 0.546875, "learning_rate": 4.784953029935853e-06, "loss": 2.2476, "step": 14819 }, { "epoch": 0.7950643776824035, "grad_norm": 0.50390625, "learning_rate": 4.7849177778575016e-06, "loss": 2.5638, "step": 14820 }, { "epoch": 0.7951180257510729, "grad_norm": 0.65234375, "learning_rate": 4.784882523019874e-06, "loss": 2.3809, "step": 14821 }, { "epoch": 0.7951716738197425, "grad_norm": 0.47265625, "learning_rate": 4.7848472654230135e-06, "loss": 2.3841, "step": 14822 }, { "epoch": 0.795225321888412, "grad_norm": 0.388671875, "learning_rate": 4.784812005066964e-06, "loss": 2.165, "step": 14823 }, { "epoch": 0.7952789699570816, "grad_norm": 0.5546875, "learning_rate": 4.784776741951766e-06, "loss": 2.5189, "step": 14824 }, { "epoch": 0.7953326180257511, "grad_norm": 0.734375, "learning_rate": 4.784741476077462e-06, "loss": 2.3456, "step": 14825 }, { "epoch": 0.7953862660944206, "grad_norm": 0.48046875, "learning_rate": 4.784706207444097e-06, "loss": 2.5622, "step": 14826 }, { "epoch": 0.7954399141630901, "grad_norm": 0.51953125, "learning_rate": 4.784670936051713e-06, "loss": 2.4629, "step": 14827 }, { "epoch": 0.7954935622317597, "grad_norm": 0.4453125, "learning_rate": 4.784635661900351e-06, "loss": 2.3142, "step": 14828 }, { "epoch": 0.7955472103004292, "grad_norm": 0.48828125, "learning_rate": 4.784600384990053e-06, "loss": 2.3089, "step": 14829 }, { "epoch": 0.7956008583690987, "grad_norm": 0.482421875, "learning_rate": 4.784565105320864e-06, "loss": 1.6201, "step": 14830 }, { "epoch": 0.7956545064377682, "grad_norm": 0.5234375, "learning_rate": 4.784529822892827e-06, "loss": 2.2752, "step": 14831 }, { "epoch": 0.7957081545064377, "grad_norm": 0.52734375, "learning_rate": 4.784494537705981e-06, "loss": 2.4236, "step": 14832 }, { "epoch": 0.7957618025751073, "grad_norm": 0.431640625, "learning_rate": 4.7844592497603726e-06, "loss": 2.1601, "step": 14833 }, { "epoch": 0.7958154506437768, "grad_norm": 0.69921875, "learning_rate": 4.784423959056042e-06, "loss": 2.3016, "step": 14834 }, { "epoch": 0.7958690987124464, "grad_norm": 0.466796875, "learning_rate": 4.7843886655930325e-06, "loss": 2.1929, "step": 14835 }, { "epoch": 0.7959227467811159, "grad_norm": 0.474609375, "learning_rate": 4.784353369371387e-06, "loss": 2.3068, "step": 14836 }, { "epoch": 0.7959763948497854, "grad_norm": 0.52734375, "learning_rate": 4.784318070391148e-06, "loss": 2.3891, "step": 14837 }, { "epoch": 0.7960300429184549, "grad_norm": 0.384765625, "learning_rate": 4.784282768652356e-06, "loss": 2.0051, "step": 14838 }, { "epoch": 0.7960836909871245, "grad_norm": 0.451171875, "learning_rate": 4.784247464155057e-06, "loss": 2.4167, "step": 14839 }, { "epoch": 0.796137339055794, "grad_norm": 0.4296875, "learning_rate": 4.784212156899293e-06, "loss": 2.2968, "step": 14840 }, { "epoch": 0.7961909871244636, "grad_norm": 0.55859375, "learning_rate": 4.7841768468851045e-06, "loss": 2.3132, "step": 14841 }, { "epoch": 0.796244635193133, "grad_norm": 0.546875, "learning_rate": 4.7841415341125365e-06, "loss": 2.1249, "step": 14842 }, { "epoch": 0.7962982832618025, "grad_norm": 0.494140625, "learning_rate": 4.78410621858163e-06, "loss": 2.0234, "step": 14843 }, { "epoch": 0.7963519313304721, "grad_norm": 0.416015625, "learning_rate": 4.784070900292428e-06, "loss": 2.2308, "step": 14844 }, { "epoch": 0.7964055793991416, "grad_norm": 0.47265625, "learning_rate": 4.784035579244974e-06, "loss": 1.685, "step": 14845 }, { "epoch": 0.7964592274678112, "grad_norm": 0.40234375, "learning_rate": 4.7840002554393094e-06, "loss": 2.3522, "step": 14846 }, { "epoch": 0.7965128755364806, "grad_norm": 0.69921875, "learning_rate": 4.783964928875479e-06, "loss": 2.2155, "step": 14847 }, { "epoch": 0.7965665236051502, "grad_norm": 0.62109375, "learning_rate": 4.783929599553522e-06, "loss": 1.6529, "step": 14848 }, { "epoch": 0.7966201716738197, "grad_norm": 0.361328125, "learning_rate": 4.783894267473484e-06, "loss": 1.9483, "step": 14849 }, { "epoch": 0.7966738197424893, "grad_norm": 0.484375, "learning_rate": 4.783858932635407e-06, "loss": 2.4528, "step": 14850 }, { "epoch": 0.7967274678111588, "grad_norm": 0.408203125, "learning_rate": 4.783823595039333e-06, "loss": 2.4142, "step": 14851 }, { "epoch": 0.7967811158798284, "grad_norm": 0.396484375, "learning_rate": 4.783788254685305e-06, "loss": 2.2817, "step": 14852 }, { "epoch": 0.7968347639484978, "grad_norm": 0.7890625, "learning_rate": 4.7837529115733666e-06, "loss": 1.9745, "step": 14853 }, { "epoch": 0.7968884120171674, "grad_norm": 0.546875, "learning_rate": 4.783717565703559e-06, "loss": 2.5399, "step": 14854 }, { "epoch": 0.7969420600858369, "grad_norm": 0.36328125, "learning_rate": 4.783682217075925e-06, "loss": 2.2081, "step": 14855 }, { "epoch": 0.7969957081545065, "grad_norm": 0.51953125, "learning_rate": 4.783646865690509e-06, "loss": 2.3846, "step": 14856 }, { "epoch": 0.797049356223176, "grad_norm": 0.48828125, "learning_rate": 4.783611511547351e-06, "loss": 2.2103, "step": 14857 }, { "epoch": 0.7971030042918454, "grad_norm": 0.498046875, "learning_rate": 4.783576154646496e-06, "loss": 2.3034, "step": 14858 }, { "epoch": 0.797156652360515, "grad_norm": 0.51953125, "learning_rate": 4.783540794987986e-06, "loss": 2.4907, "step": 14859 }, { "epoch": 0.7972103004291845, "grad_norm": 0.578125, "learning_rate": 4.7835054325718635e-06, "loss": 2.0207, "step": 14860 }, { "epoch": 0.7972639484978541, "grad_norm": 0.494140625, "learning_rate": 4.7834700673981715e-06, "loss": 2.2818, "step": 14861 }, { "epoch": 0.7973175965665236, "grad_norm": 0.37109375, "learning_rate": 4.783434699466952e-06, "loss": 1.9832, "step": 14862 }, { "epoch": 0.7973712446351932, "grad_norm": 0.50390625, "learning_rate": 4.783399328778249e-06, "loss": 2.4141, "step": 14863 }, { "epoch": 0.7974248927038626, "grad_norm": 0.53125, "learning_rate": 4.7833639553321034e-06, "loss": 2.5888, "step": 14864 }, { "epoch": 0.7974785407725322, "grad_norm": 0.423828125, "learning_rate": 4.783328579128559e-06, "loss": 2.2958, "step": 14865 }, { "epoch": 0.7975321888412017, "grad_norm": 0.388671875, "learning_rate": 4.7832932001676595e-06, "loss": 2.3909, "step": 14866 }, { "epoch": 0.7975858369098713, "grad_norm": 0.5703125, "learning_rate": 4.7832578184494455e-06, "loss": 2.0702, "step": 14867 }, { "epoch": 0.7976394849785408, "grad_norm": 1.3125, "learning_rate": 4.7832224339739614e-06, "loss": 2.5711, "step": 14868 }, { "epoch": 0.7976931330472103, "grad_norm": 0.54296875, "learning_rate": 4.783187046741249e-06, "loss": 2.2882, "step": 14869 }, { "epoch": 0.7977467811158798, "grad_norm": 0.4375, "learning_rate": 4.7831516567513514e-06, "loss": 2.2641, "step": 14870 }, { "epoch": 0.7978004291845494, "grad_norm": 0.484375, "learning_rate": 4.783116264004312e-06, "loss": 2.4453, "step": 14871 }, { "epoch": 0.7978540772532189, "grad_norm": 0.47265625, "learning_rate": 4.783080868500172e-06, "loss": 2.2157, "step": 14872 }, { "epoch": 0.7979077253218884, "grad_norm": 0.4765625, "learning_rate": 4.783045470238975e-06, "loss": 2.2076, "step": 14873 }, { "epoch": 0.797961373390558, "grad_norm": 0.69921875, "learning_rate": 4.783010069220764e-06, "loss": 1.373, "step": 14874 }, { "epoch": 0.7980150214592274, "grad_norm": 0.455078125, "learning_rate": 4.782974665445581e-06, "loss": 2.4967, "step": 14875 }, { "epoch": 0.798068669527897, "grad_norm": 0.412109375, "learning_rate": 4.78293925891347e-06, "loss": 1.9742, "step": 14876 }, { "epoch": 0.7981223175965665, "grad_norm": 0.431640625, "learning_rate": 4.782903849624473e-06, "loss": 1.7892, "step": 14877 }, { "epoch": 0.7981759656652361, "grad_norm": 0.453125, "learning_rate": 4.782868437578631e-06, "loss": 2.3092, "step": 14878 }, { "epoch": 0.7982296137339056, "grad_norm": 0.447265625, "learning_rate": 4.782833022775991e-06, "loss": 2.2387, "step": 14879 }, { "epoch": 0.7982832618025751, "grad_norm": 0.451171875, "learning_rate": 4.782797605216592e-06, "loss": 2.6064, "step": 14880 }, { "epoch": 0.7983369098712446, "grad_norm": 0.55859375, "learning_rate": 4.782762184900478e-06, "loss": 2.0012, "step": 14881 }, { "epoch": 0.7983905579399142, "grad_norm": 0.46484375, "learning_rate": 4.782726761827693e-06, "loss": 2.3783, "step": 14882 }, { "epoch": 0.7984442060085837, "grad_norm": 0.4140625, "learning_rate": 4.782691335998277e-06, "loss": 2.2443, "step": 14883 }, { "epoch": 0.7984978540772533, "grad_norm": 0.419921875, "learning_rate": 4.782655907412275e-06, "loss": 2.3112, "step": 14884 }, { "epoch": 0.7985515021459227, "grad_norm": 0.408203125, "learning_rate": 4.782620476069729e-06, "loss": 2.5462, "step": 14885 }, { "epoch": 0.7986051502145923, "grad_norm": 0.451171875, "learning_rate": 4.782585041970683e-06, "loss": 2.3996, "step": 14886 }, { "epoch": 0.7986587982832618, "grad_norm": 2.203125, "learning_rate": 4.782549605115178e-06, "loss": 2.0981, "step": 14887 }, { "epoch": 0.7987124463519313, "grad_norm": 0.546875, "learning_rate": 4.782514165503257e-06, "loss": 2.3126, "step": 14888 }, { "epoch": 0.7987660944206009, "grad_norm": 0.5, "learning_rate": 4.782478723134965e-06, "loss": 2.3191, "step": 14889 }, { "epoch": 0.7988197424892703, "grad_norm": 0.55078125, "learning_rate": 4.782443278010341e-06, "loss": 2.9391, "step": 14890 }, { "epoch": 0.7988733905579399, "grad_norm": 0.455078125, "learning_rate": 4.782407830129431e-06, "loss": 2.1724, "step": 14891 }, { "epoch": 0.7989270386266094, "grad_norm": 0.796875, "learning_rate": 4.782372379492277e-06, "loss": 2.2596, "step": 14892 }, { "epoch": 0.798980686695279, "grad_norm": 0.51171875, "learning_rate": 4.782336926098921e-06, "loss": 2.4532, "step": 14893 }, { "epoch": 0.7990343347639485, "grad_norm": 0.4140625, "learning_rate": 4.782301469949406e-06, "loss": 2.4302, "step": 14894 }, { "epoch": 0.7990879828326181, "grad_norm": 0.5234375, "learning_rate": 4.782266011043777e-06, "loss": 2.4159, "step": 14895 }, { "epoch": 0.7991416309012875, "grad_norm": 0.42578125, "learning_rate": 4.782230549382073e-06, "loss": 2.5238, "step": 14896 }, { "epoch": 0.7991952789699571, "grad_norm": 0.46484375, "learning_rate": 4.78219508496434e-06, "loss": 2.2997, "step": 14897 }, { "epoch": 0.7992489270386266, "grad_norm": 0.45703125, "learning_rate": 4.782159617790619e-06, "loss": 2.2047, "step": 14898 }, { "epoch": 0.7993025751072962, "grad_norm": 0.70703125, "learning_rate": 4.782124147860954e-06, "loss": 2.4397, "step": 14899 }, { "epoch": 0.7993562231759657, "grad_norm": 0.515625, "learning_rate": 4.782088675175387e-06, "loss": 2.4215, "step": 14900 }, { "epoch": 0.7994098712446351, "grad_norm": 0.48046875, "learning_rate": 4.782053199733961e-06, "loss": 2.0735, "step": 14901 }, { "epoch": 0.7994635193133047, "grad_norm": 0.458984375, "learning_rate": 4.78201772153672e-06, "loss": 2.4429, "step": 14902 }, { "epoch": 0.7995171673819742, "grad_norm": 0.4375, "learning_rate": 4.781982240583705e-06, "loss": 2.3252, "step": 14903 }, { "epoch": 0.7995708154506438, "grad_norm": 0.498046875, "learning_rate": 4.78194675687496e-06, "loss": 2.289, "step": 14904 }, { "epoch": 0.7996244635193133, "grad_norm": 0.4296875, "learning_rate": 4.7819112704105276e-06, "loss": 2.1134, "step": 14905 }, { "epoch": 0.7996781115879829, "grad_norm": 0.44921875, "learning_rate": 4.78187578119045e-06, "loss": 2.2531, "step": 14906 }, { "epoch": 0.7997317596566523, "grad_norm": 0.46484375, "learning_rate": 4.781840289214772e-06, "loss": 2.2517, "step": 14907 }, { "epoch": 0.7997854077253219, "grad_norm": 0.447265625, "learning_rate": 4.781804794483533e-06, "loss": 2.1399, "step": 14908 }, { "epoch": 0.7998390557939914, "grad_norm": 0.423828125, "learning_rate": 4.78176929699678e-06, "loss": 2.1898, "step": 14909 }, { "epoch": 0.799892703862661, "grad_norm": 0.478515625, "learning_rate": 4.781733796754553e-06, "loss": 2.2517, "step": 14910 }, { "epoch": 0.7999463519313305, "grad_norm": 0.5625, "learning_rate": 4.781698293756896e-06, "loss": 2.485, "step": 14911 }, { "epoch": 0.8, "grad_norm": 0.50390625, "learning_rate": 4.781662788003851e-06, "loss": 2.2626, "step": 14912 }, { "epoch": 0.8000536480686695, "grad_norm": 0.462890625, "learning_rate": 4.781627279495461e-06, "loss": 2.5236, "step": 14913 }, { "epoch": 0.8001072961373391, "grad_norm": 0.46875, "learning_rate": 4.78159176823177e-06, "loss": 2.4449, "step": 14914 }, { "epoch": 0.8001609442060086, "grad_norm": 0.478515625, "learning_rate": 4.781556254212821e-06, "loss": 2.5292, "step": 14915 }, { "epoch": 0.8002145922746781, "grad_norm": 0.45703125, "learning_rate": 4.7815207374386555e-06, "loss": 2.5472, "step": 14916 }, { "epoch": 0.8002682403433476, "grad_norm": 0.494140625, "learning_rate": 4.781485217909317e-06, "loss": 2.432, "step": 14917 }, { "epoch": 0.8003218884120171, "grad_norm": 0.455078125, "learning_rate": 4.781449695624849e-06, "loss": 2.3831, "step": 14918 }, { "epoch": 0.8003755364806867, "grad_norm": 0.45703125, "learning_rate": 4.781414170585292e-06, "loss": 2.3226, "step": 14919 }, { "epoch": 0.8004291845493562, "grad_norm": 0.39453125, "learning_rate": 4.7813786427906924e-06, "loss": 2.2885, "step": 14920 }, { "epoch": 0.8004828326180258, "grad_norm": 0.490234375, "learning_rate": 4.781343112241091e-06, "loss": 2.2447, "step": 14921 }, { "epoch": 0.8005364806866953, "grad_norm": 0.435546875, "learning_rate": 4.781307578936531e-06, "loss": 2.1995, "step": 14922 }, { "epoch": 0.8005901287553648, "grad_norm": 0.56640625, "learning_rate": 4.7812720428770555e-06, "loss": 2.3153, "step": 14923 }, { "epoch": 0.8006437768240343, "grad_norm": 0.48046875, "learning_rate": 4.781236504062707e-06, "loss": 2.3525, "step": 14924 }, { "epoch": 0.8006974248927039, "grad_norm": 0.91015625, "learning_rate": 4.781200962493529e-06, "loss": 2.0827, "step": 14925 }, { "epoch": 0.8007510729613734, "grad_norm": 0.5078125, "learning_rate": 4.781165418169565e-06, "loss": 2.4639, "step": 14926 }, { "epoch": 0.800804721030043, "grad_norm": 0.408203125, "learning_rate": 4.781129871090856e-06, "loss": 2.3646, "step": 14927 }, { "epoch": 0.8008583690987124, "grad_norm": 0.5859375, "learning_rate": 4.781094321257445e-06, "loss": 1.4788, "step": 14928 }, { "epoch": 0.800912017167382, "grad_norm": 0.98046875, "learning_rate": 4.781058768669377e-06, "loss": 2.3349, "step": 14929 }, { "epoch": 0.8009656652360515, "grad_norm": 0.431640625, "learning_rate": 4.7810232133266945e-06, "loss": 2.1254, "step": 14930 }, { "epoch": 0.801019313304721, "grad_norm": 0.482421875, "learning_rate": 4.7809876552294395e-06, "loss": 2.1574, "step": 14931 }, { "epoch": 0.8010729613733906, "grad_norm": 0.734375, "learning_rate": 4.780952094377655e-06, "loss": 2.2925, "step": 14932 }, { "epoch": 0.80112660944206, "grad_norm": 0.44140625, "learning_rate": 4.780916530771385e-06, "loss": 2.5915, "step": 14933 }, { "epoch": 0.8011802575107296, "grad_norm": 0.46484375, "learning_rate": 4.7808809644106715e-06, "loss": 2.2954, "step": 14934 }, { "epoch": 0.8012339055793991, "grad_norm": 0.423828125, "learning_rate": 4.7808453952955565e-06, "loss": 2.108, "step": 14935 }, { "epoch": 0.8012875536480687, "grad_norm": 0.5078125, "learning_rate": 4.7808098234260845e-06, "loss": 2.3527, "step": 14936 }, { "epoch": 0.8013412017167382, "grad_norm": 0.470703125, "learning_rate": 4.780774248802298e-06, "loss": 2.2865, "step": 14937 }, { "epoch": 0.8013948497854078, "grad_norm": 0.40625, "learning_rate": 4.780738671424241e-06, "loss": 2.2973, "step": 14938 }, { "epoch": 0.8014484978540772, "grad_norm": 0.484375, "learning_rate": 4.780703091291954e-06, "loss": 2.7187, "step": 14939 }, { "epoch": 0.8015021459227468, "grad_norm": 0.51171875, "learning_rate": 4.780667508405482e-06, "loss": 2.1226, "step": 14940 }, { "epoch": 0.8015557939914163, "grad_norm": 0.40625, "learning_rate": 4.780631922764868e-06, "loss": 2.2727, "step": 14941 }, { "epoch": 0.8016094420600859, "grad_norm": 0.4765625, "learning_rate": 4.7805963343701535e-06, "loss": 2.4668, "step": 14942 }, { "epoch": 0.8016630901287554, "grad_norm": 0.51171875, "learning_rate": 4.780560743221382e-06, "loss": 2.1179, "step": 14943 }, { "epoch": 0.8017167381974248, "grad_norm": 0.5, "learning_rate": 4.780525149318598e-06, "loss": 2.222, "step": 14944 }, { "epoch": 0.8017703862660944, "grad_norm": 0.60546875, "learning_rate": 4.780489552661842e-06, "loss": 2.2843, "step": 14945 }, { "epoch": 0.8018240343347639, "grad_norm": 0.412109375, "learning_rate": 4.780453953251159e-06, "loss": 2.1147, "step": 14946 }, { "epoch": 0.8018776824034335, "grad_norm": 0.384765625, "learning_rate": 4.780418351086591e-06, "loss": 1.9722, "step": 14947 }, { "epoch": 0.801931330472103, "grad_norm": 0.455078125, "learning_rate": 4.780382746168182e-06, "loss": 2.3675, "step": 14948 }, { "epoch": 0.8019849785407726, "grad_norm": 0.5625, "learning_rate": 4.780347138495973e-06, "loss": 2.3414, "step": 14949 }, { "epoch": 0.802038626609442, "grad_norm": 0.53125, "learning_rate": 4.78031152807001e-06, "loss": 1.1946, "step": 14950 }, { "epoch": 0.8020922746781116, "grad_norm": 0.45703125, "learning_rate": 4.780275914890332e-06, "loss": 2.3053, "step": 14951 }, { "epoch": 0.8021459227467811, "grad_norm": 0.52734375, "learning_rate": 4.780240298956985e-06, "loss": 2.2702, "step": 14952 }, { "epoch": 0.8021995708154507, "grad_norm": 0.53125, "learning_rate": 4.7802046802700125e-06, "loss": 2.3875, "step": 14953 }, { "epoch": 0.8022532188841202, "grad_norm": 0.5, "learning_rate": 4.780169058829456e-06, "loss": 2.3918, "step": 14954 }, { "epoch": 0.8023068669527897, "grad_norm": 0.416015625, "learning_rate": 4.780133434635358e-06, "loss": 2.2797, "step": 14955 }, { "epoch": 0.8023605150214592, "grad_norm": 0.56640625, "learning_rate": 4.780097807687762e-06, "loss": 2.3098, "step": 14956 }, { "epoch": 0.8024141630901288, "grad_norm": 0.4609375, "learning_rate": 4.780062177986712e-06, "loss": 2.1874, "step": 14957 }, { "epoch": 0.8024678111587983, "grad_norm": 0.486328125, "learning_rate": 4.78002654553225e-06, "loss": 2.1552, "step": 14958 }, { "epoch": 0.8025214592274678, "grad_norm": 0.45703125, "learning_rate": 4.77999091032442e-06, "loss": 2.2004, "step": 14959 }, { "epoch": 0.8025751072961373, "grad_norm": 0.44921875, "learning_rate": 4.779955272363264e-06, "loss": 2.1938, "step": 14960 }, { "epoch": 0.8026287553648068, "grad_norm": 0.5234375, "learning_rate": 4.779919631648826e-06, "loss": 2.2372, "step": 14961 }, { "epoch": 0.8026824034334764, "grad_norm": 0.466796875, "learning_rate": 4.779883988181148e-06, "loss": 2.2111, "step": 14962 }, { "epoch": 0.8027360515021459, "grad_norm": 0.416015625, "learning_rate": 4.779848341960274e-06, "loss": 2.2774, "step": 14963 }, { "epoch": 0.8027896995708155, "grad_norm": 0.47265625, "learning_rate": 4.779812692986246e-06, "loss": 2.2319, "step": 14964 }, { "epoch": 0.802843347639485, "grad_norm": 0.43359375, "learning_rate": 4.779777041259108e-06, "loss": 1.9921, "step": 14965 }, { "epoch": 0.8028969957081545, "grad_norm": 0.5078125, "learning_rate": 4.779741386778902e-06, "loss": 2.1925, "step": 14966 }, { "epoch": 0.802950643776824, "grad_norm": 0.41796875, "learning_rate": 4.779705729545673e-06, "loss": 2.2143, "step": 14967 }, { "epoch": 0.8030042918454936, "grad_norm": 0.447265625, "learning_rate": 4.779670069559463e-06, "loss": 2.1349, "step": 14968 }, { "epoch": 0.8030579399141631, "grad_norm": 0.486328125, "learning_rate": 4.779634406820313e-06, "loss": 2.4154, "step": 14969 }, { "epoch": 0.8031115879828327, "grad_norm": 0.49609375, "learning_rate": 4.779598741328269e-06, "loss": 2.3654, "step": 14970 }, { "epoch": 0.8031652360515021, "grad_norm": 0.47265625, "learning_rate": 4.779563073083373e-06, "loss": 2.4706, "step": 14971 }, { "epoch": 0.8032188841201717, "grad_norm": 1.0, "learning_rate": 4.779527402085668e-06, "loss": 1.8608, "step": 14972 }, { "epoch": 0.8032725321888412, "grad_norm": 0.458984375, "learning_rate": 4.779491728335197e-06, "loss": 2.2798, "step": 14973 }, { "epoch": 0.8033261802575107, "grad_norm": 0.45703125, "learning_rate": 4.779456051832004e-06, "loss": 2.3841, "step": 14974 }, { "epoch": 0.8033798283261803, "grad_norm": 0.57421875, "learning_rate": 4.779420372576131e-06, "loss": 2.2556, "step": 14975 }, { "epoch": 0.8034334763948497, "grad_norm": 0.423828125, "learning_rate": 4.779384690567621e-06, "loss": 2.3862, "step": 14976 }, { "epoch": 0.8034871244635193, "grad_norm": 0.5703125, "learning_rate": 4.779349005806518e-06, "loss": 2.3022, "step": 14977 }, { "epoch": 0.8035407725321888, "grad_norm": 0.55078125, "learning_rate": 4.7793133182928646e-06, "loss": 2.5139, "step": 14978 }, { "epoch": 0.8035944206008584, "grad_norm": 0.50390625, "learning_rate": 4.779277628026703e-06, "loss": 2.4865, "step": 14979 }, { "epoch": 0.8036480686695279, "grad_norm": 0.44921875, "learning_rate": 4.779241935008077e-06, "loss": 1.9392, "step": 14980 }, { "epoch": 0.8037017167381975, "grad_norm": 0.5859375, "learning_rate": 4.779206239237032e-06, "loss": 2.2365, "step": 14981 }, { "epoch": 0.8037553648068669, "grad_norm": 0.4453125, "learning_rate": 4.779170540713608e-06, "loss": 2.6186, "step": 14982 }, { "epoch": 0.8038090128755365, "grad_norm": 0.4453125, "learning_rate": 4.779134839437849e-06, "loss": 2.3575, "step": 14983 }, { "epoch": 0.803862660944206, "grad_norm": 0.482421875, "learning_rate": 4.779099135409797e-06, "loss": 2.3692, "step": 14984 }, { "epoch": 0.8039163090128756, "grad_norm": 0.48828125, "learning_rate": 4.7790634286294975e-06, "loss": 2.2627, "step": 14985 }, { "epoch": 0.8039699570815451, "grad_norm": 0.359375, "learning_rate": 4.779027719096993e-06, "loss": 2.2672, "step": 14986 }, { "epoch": 0.8040236051502145, "grad_norm": 0.4921875, "learning_rate": 4.778992006812325e-06, "loss": 2.3455, "step": 14987 }, { "epoch": 0.8040772532188841, "grad_norm": 0.5, "learning_rate": 4.7789562917755386e-06, "loss": 2.3442, "step": 14988 }, { "epoch": 0.8041309012875536, "grad_norm": 0.486328125, "learning_rate": 4.778920573986676e-06, "loss": 2.3226, "step": 14989 }, { "epoch": 0.8041845493562232, "grad_norm": 0.451171875, "learning_rate": 4.77888485344578e-06, "loss": 2.3025, "step": 14990 }, { "epoch": 0.8042381974248927, "grad_norm": 0.51953125, "learning_rate": 4.778849130152894e-06, "loss": 2.0717, "step": 14991 }, { "epoch": 0.8042918454935623, "grad_norm": 0.5, "learning_rate": 4.778813404108062e-06, "loss": 2.0998, "step": 14992 }, { "epoch": 0.8043454935622317, "grad_norm": 0.4765625, "learning_rate": 4.778777675311325e-06, "loss": 2.0595, "step": 14993 }, { "epoch": 0.8043991416309013, "grad_norm": 0.484375, "learning_rate": 4.778741943762728e-06, "loss": 2.3306, "step": 14994 }, { "epoch": 0.8044527896995708, "grad_norm": 0.625, "learning_rate": 4.778706209462314e-06, "loss": 2.5378, "step": 14995 }, { "epoch": 0.8045064377682404, "grad_norm": 0.470703125, "learning_rate": 4.778670472410126e-06, "loss": 2.2265, "step": 14996 }, { "epoch": 0.8045600858369099, "grad_norm": 0.451171875, "learning_rate": 4.778634732606207e-06, "loss": 2.1966, "step": 14997 }, { "epoch": 0.8046137339055794, "grad_norm": 0.462890625, "learning_rate": 4.7785989900506e-06, "loss": 2.2377, "step": 14998 }, { "epoch": 0.8046673819742489, "grad_norm": 0.46875, "learning_rate": 4.778563244743348e-06, "loss": 2.1684, "step": 14999 }, { "epoch": 0.8047210300429185, "grad_norm": 0.451171875, "learning_rate": 4.778527496684495e-06, "loss": 2.2456, "step": 15000 }, { "epoch": 0.804774678111588, "grad_norm": 0.46875, "learning_rate": 4.778491745874083e-06, "loss": 2.5625, "step": 15001 }, { "epoch": 0.8048283261802575, "grad_norm": 4.03125, "learning_rate": 4.778455992312157e-06, "loss": 2.221, "step": 15002 }, { "epoch": 0.804881974248927, "grad_norm": 0.42578125, "learning_rate": 4.778420235998758e-06, "loss": 2.1369, "step": 15003 }, { "epoch": 0.8049356223175965, "grad_norm": 0.447265625, "learning_rate": 4.778384476933931e-06, "loss": 2.4255, "step": 15004 }, { "epoch": 0.8049892703862661, "grad_norm": 0.7734375, "learning_rate": 4.7783487151177175e-06, "loss": 2.1463, "step": 15005 }, { "epoch": 0.8050429184549356, "grad_norm": 0.4921875, "learning_rate": 4.778312950550162e-06, "loss": 2.3315, "step": 15006 }, { "epoch": 0.8050965665236052, "grad_norm": 0.359375, "learning_rate": 4.7782771832313065e-06, "loss": 2.1212, "step": 15007 }, { "epoch": 0.8051502145922746, "grad_norm": 0.474609375, "learning_rate": 4.778241413161196e-06, "loss": 2.2241, "step": 15008 }, { "epoch": 0.8052038626609442, "grad_norm": 0.447265625, "learning_rate": 4.778205640339872e-06, "loss": 2.0931, "step": 15009 }, { "epoch": 0.8052575107296137, "grad_norm": 0.388671875, "learning_rate": 4.778169864767379e-06, "loss": 2.1142, "step": 15010 }, { "epoch": 0.8053111587982833, "grad_norm": 0.57421875, "learning_rate": 4.778134086443758e-06, "loss": 2.1097, "step": 15011 }, { "epoch": 0.8053648068669528, "grad_norm": 1.5859375, "learning_rate": 4.778098305369056e-06, "loss": 2.3049, "step": 15012 }, { "epoch": 0.8054184549356224, "grad_norm": 0.4609375, "learning_rate": 4.778062521543312e-06, "loss": 2.2633, "step": 15013 }, { "epoch": 0.8054721030042918, "grad_norm": 0.390625, "learning_rate": 4.778026734966572e-06, "loss": 1.9816, "step": 15014 }, { "epoch": 0.8055257510729614, "grad_norm": 0.376953125, "learning_rate": 4.777990945638878e-06, "loss": 2.2987, "step": 15015 }, { "epoch": 0.8055793991416309, "grad_norm": 0.416015625, "learning_rate": 4.777955153560274e-06, "loss": 2.2844, "step": 15016 }, { "epoch": 0.8056330472103004, "grad_norm": 0.3984375, "learning_rate": 4.777919358730803e-06, "loss": 2.2354, "step": 15017 }, { "epoch": 0.80568669527897, "grad_norm": 0.4296875, "learning_rate": 4.777883561150508e-06, "loss": 2.3892, "step": 15018 }, { "epoch": 0.8057403433476394, "grad_norm": 0.466796875, "learning_rate": 4.777847760819432e-06, "loss": 2.5137, "step": 15019 }, { "epoch": 0.805793991416309, "grad_norm": 0.431640625, "learning_rate": 4.777811957737619e-06, "loss": 2.1984, "step": 15020 }, { "epoch": 0.8058476394849785, "grad_norm": 0.4765625, "learning_rate": 4.77777615190511e-06, "loss": 2.4052, "step": 15021 }, { "epoch": 0.8059012875536481, "grad_norm": 0.435546875, "learning_rate": 4.7777403433219525e-06, "loss": 2.2766, "step": 15022 }, { "epoch": 0.8059549356223176, "grad_norm": 0.74609375, "learning_rate": 4.7777045319881855e-06, "loss": 2.422, "step": 15023 }, { "epoch": 0.8060085836909872, "grad_norm": 0.51171875, "learning_rate": 4.777668717903855e-06, "loss": 2.2787, "step": 15024 }, { "epoch": 0.8060622317596566, "grad_norm": 0.4453125, "learning_rate": 4.777632901069002e-06, "loss": 2.2137, "step": 15025 }, { "epoch": 0.8061158798283262, "grad_norm": 0.478515625, "learning_rate": 4.777597081483672e-06, "loss": 2.6921, "step": 15026 }, { "epoch": 0.8061695278969957, "grad_norm": 0.51953125, "learning_rate": 4.777561259147907e-06, "loss": 1.7681, "step": 15027 }, { "epoch": 0.8062231759656653, "grad_norm": 0.4453125, "learning_rate": 4.7775254340617505e-06, "loss": 2.0463, "step": 15028 }, { "epoch": 0.8062768240343348, "grad_norm": 0.3671875, "learning_rate": 4.777489606225245e-06, "loss": 1.8435, "step": 15029 }, { "epoch": 0.8063304721030042, "grad_norm": 0.4375, "learning_rate": 4.777453775638435e-06, "loss": 2.0454, "step": 15030 }, { "epoch": 0.8063841201716738, "grad_norm": 0.42578125, "learning_rate": 4.777417942301364e-06, "loss": 2.3927, "step": 15031 }, { "epoch": 0.8064377682403433, "grad_norm": 0.44140625, "learning_rate": 4.777382106214074e-06, "loss": 2.3013, "step": 15032 }, { "epoch": 0.8064914163090129, "grad_norm": 0.546875, "learning_rate": 4.777346267376609e-06, "loss": 2.5261, "step": 15033 }, { "epoch": 0.8065450643776824, "grad_norm": 0.58203125, "learning_rate": 4.777310425789012e-06, "loss": 2.0738, "step": 15034 }, { "epoch": 0.806598712446352, "grad_norm": 0.42578125, "learning_rate": 4.777274581451326e-06, "loss": 2.3927, "step": 15035 }, { "epoch": 0.8066523605150214, "grad_norm": 0.486328125, "learning_rate": 4.777238734363595e-06, "loss": 2.2729, "step": 15036 }, { "epoch": 0.806706008583691, "grad_norm": 0.490234375, "learning_rate": 4.777202884525862e-06, "loss": 2.3251, "step": 15037 }, { "epoch": 0.8067596566523605, "grad_norm": 0.71484375, "learning_rate": 4.7771670319381704e-06, "loss": 1.4422, "step": 15038 }, { "epoch": 0.8068133047210301, "grad_norm": 0.431640625, "learning_rate": 4.777131176600563e-06, "loss": 2.1636, "step": 15039 }, { "epoch": 0.8068669527896996, "grad_norm": 0.3828125, "learning_rate": 4.777095318513084e-06, "loss": 2.2576, "step": 15040 }, { "epoch": 0.8069206008583691, "grad_norm": 0.490234375, "learning_rate": 4.777059457675776e-06, "loss": 2.3289, "step": 15041 }, { "epoch": 0.8069742489270386, "grad_norm": 0.5390625, "learning_rate": 4.777023594088682e-06, "loss": 2.3151, "step": 15042 }, { "epoch": 0.8070278969957082, "grad_norm": 0.462890625, "learning_rate": 4.776987727751846e-06, "loss": 2.3277, "step": 15043 }, { "epoch": 0.8070815450643777, "grad_norm": 0.6953125, "learning_rate": 4.776951858665311e-06, "loss": 1.9036, "step": 15044 }, { "epoch": 0.8071351931330472, "grad_norm": 0.41796875, "learning_rate": 4.77691598682912e-06, "loss": 2.3633, "step": 15045 }, { "epoch": 0.8071888412017167, "grad_norm": 0.376953125, "learning_rate": 4.7768801122433185e-06, "loss": 2.3043, "step": 15046 }, { "epoch": 0.8072424892703862, "grad_norm": 0.470703125, "learning_rate": 4.776844234907946e-06, "loss": 2.2447, "step": 15047 }, { "epoch": 0.8072961373390558, "grad_norm": 0.609375, "learning_rate": 4.776808354823049e-06, "loss": 2.3439, "step": 15048 }, { "epoch": 0.8073497854077253, "grad_norm": 0.46484375, "learning_rate": 4.776772471988669e-06, "loss": 1.9946, "step": 15049 }, { "epoch": 0.8074034334763949, "grad_norm": 0.51171875, "learning_rate": 4.776736586404851e-06, "loss": 2.4924, "step": 15050 }, { "epoch": 0.8074570815450643, "grad_norm": 0.4921875, "learning_rate": 4.776700698071637e-06, "loss": 2.2564, "step": 15051 }, { "epoch": 0.8075107296137339, "grad_norm": 0.51171875, "learning_rate": 4.77666480698907e-06, "loss": 2.3283, "step": 15052 }, { "epoch": 0.8075643776824034, "grad_norm": 0.55078125, "learning_rate": 4.776628913157194e-06, "loss": 2.4132, "step": 15053 }, { "epoch": 0.807618025751073, "grad_norm": 0.462890625, "learning_rate": 4.776593016576053e-06, "loss": 2.3791, "step": 15054 }, { "epoch": 0.8076716738197425, "grad_norm": 0.466796875, "learning_rate": 4.776557117245691e-06, "loss": 2.3347, "step": 15055 }, { "epoch": 0.8077253218884121, "grad_norm": 0.44921875, "learning_rate": 4.7765212151661475e-06, "loss": 2.2724, "step": 15056 }, { "epoch": 0.8077789699570815, "grad_norm": 0.58203125, "learning_rate": 4.77648531033747e-06, "loss": 1.7292, "step": 15057 }, { "epoch": 0.8078326180257511, "grad_norm": 0.53125, "learning_rate": 4.7764494027597004e-06, "loss": 2.3112, "step": 15058 }, { "epoch": 0.8078862660944206, "grad_norm": 0.443359375, "learning_rate": 4.776413492432881e-06, "loss": 2.1523, "step": 15059 }, { "epoch": 0.8079399141630901, "grad_norm": 0.458984375, "learning_rate": 4.776377579357057e-06, "loss": 2.2028, "step": 15060 }, { "epoch": 0.8079935622317597, "grad_norm": 0.45703125, "learning_rate": 4.776341663532271e-06, "loss": 2.1899, "step": 15061 }, { "epoch": 0.8080472103004291, "grad_norm": 0.65625, "learning_rate": 4.776305744958566e-06, "loss": 2.14, "step": 15062 }, { "epoch": 0.8081008583690987, "grad_norm": 0.5078125, "learning_rate": 4.776269823635985e-06, "loss": 2.3352, "step": 15063 }, { "epoch": 0.8081545064377682, "grad_norm": 0.46875, "learning_rate": 4.776233899564573e-06, "loss": 1.7284, "step": 15064 }, { "epoch": 0.8082081545064378, "grad_norm": 0.44921875, "learning_rate": 4.776197972744372e-06, "loss": 2.1119, "step": 15065 }, { "epoch": 0.8082618025751073, "grad_norm": 0.41015625, "learning_rate": 4.776162043175425e-06, "loss": 2.068, "step": 15066 }, { "epoch": 0.8083154506437769, "grad_norm": 0.546875, "learning_rate": 4.7761261108577775e-06, "loss": 2.4325, "step": 15067 }, { "epoch": 0.8083690987124463, "grad_norm": 0.640625, "learning_rate": 4.776090175791471e-06, "loss": 2.2317, "step": 15068 }, { "epoch": 0.8084227467811159, "grad_norm": 0.490234375, "learning_rate": 4.776054237976549e-06, "loss": 2.4719, "step": 15069 }, { "epoch": 0.8084763948497854, "grad_norm": 0.50390625, "learning_rate": 4.776018297413057e-06, "loss": 2.2792, "step": 15070 }, { "epoch": 0.808530042918455, "grad_norm": 0.431640625, "learning_rate": 4.775982354101036e-06, "loss": 2.4166, "step": 15071 }, { "epoch": 0.8085836909871245, "grad_norm": 0.4765625, "learning_rate": 4.775946408040529e-06, "loss": 2.4003, "step": 15072 }, { "epoch": 0.808637339055794, "grad_norm": 0.43359375, "learning_rate": 4.775910459231582e-06, "loss": 2.3922, "step": 15073 }, { "epoch": 0.8086909871244635, "grad_norm": 0.490234375, "learning_rate": 4.775874507674236e-06, "loss": 2.378, "step": 15074 }, { "epoch": 0.808744635193133, "grad_norm": 0.5390625, "learning_rate": 4.775838553368536e-06, "loss": 2.1628, "step": 15075 }, { "epoch": 0.8087982832618026, "grad_norm": 0.400390625, "learning_rate": 4.775802596314525e-06, "loss": 2.2236, "step": 15076 }, { "epoch": 0.8088519313304721, "grad_norm": 0.462890625, "learning_rate": 4.7757666365122455e-06, "loss": 2.1184, "step": 15077 }, { "epoch": 0.8089055793991416, "grad_norm": 0.439453125, "learning_rate": 4.775730673961743e-06, "loss": 1.9073, "step": 15078 }, { "epoch": 0.8089592274678111, "grad_norm": 0.4609375, "learning_rate": 4.775694708663058e-06, "loss": 2.188, "step": 15079 }, { "epoch": 0.8090128755364807, "grad_norm": 1.03125, "learning_rate": 4.7756587406162365e-06, "loss": 2.2946, "step": 15080 }, { "epoch": 0.8090665236051502, "grad_norm": 0.443359375, "learning_rate": 4.775622769821321e-06, "loss": 1.974, "step": 15081 }, { "epoch": 0.8091201716738198, "grad_norm": 0.625, "learning_rate": 4.775586796278354e-06, "loss": 2.4099, "step": 15082 }, { "epoch": 0.8091738197424893, "grad_norm": 0.50390625, "learning_rate": 4.775550819987381e-06, "loss": 1.4813, "step": 15083 }, { "epoch": 0.8092274678111588, "grad_norm": 0.46875, "learning_rate": 4.775514840948444e-06, "loss": 2.3324, "step": 15084 }, { "epoch": 0.8092811158798283, "grad_norm": 0.458984375, "learning_rate": 4.775478859161587e-06, "loss": 2.271, "step": 15085 }, { "epoch": 0.8093347639484979, "grad_norm": 0.396484375, "learning_rate": 4.775442874626852e-06, "loss": 2.4117, "step": 15086 }, { "epoch": 0.8093884120171674, "grad_norm": 0.515625, "learning_rate": 4.775406887344285e-06, "loss": 2.0731, "step": 15087 }, { "epoch": 0.8094420600858369, "grad_norm": 0.47265625, "learning_rate": 4.775370897313927e-06, "loss": 2.3715, "step": 15088 }, { "epoch": 0.8094957081545064, "grad_norm": 0.48828125, "learning_rate": 4.775334904535823e-06, "loss": 2.2396, "step": 15089 }, { "epoch": 0.8095493562231759, "grad_norm": 0.56640625, "learning_rate": 4.775298909010016e-06, "loss": 2.3837, "step": 15090 }, { "epoch": 0.8096030042918455, "grad_norm": 0.44140625, "learning_rate": 4.77526291073655e-06, "loss": 2.3332, "step": 15091 }, { "epoch": 0.809656652360515, "grad_norm": 0.44140625, "learning_rate": 4.775226909715468e-06, "loss": 2.3646, "step": 15092 }, { "epoch": 0.8097103004291846, "grad_norm": 0.52734375, "learning_rate": 4.7751909059468126e-06, "loss": 2.4356, "step": 15093 }, { "epoch": 0.809763948497854, "grad_norm": 0.5, "learning_rate": 4.7751548994306294e-06, "loss": 2.1486, "step": 15094 }, { "epoch": 0.8098175965665236, "grad_norm": 0.404296875, "learning_rate": 4.7751188901669595e-06, "loss": 1.8424, "step": 15095 }, { "epoch": 0.8098712446351931, "grad_norm": 0.52734375, "learning_rate": 4.7750828781558475e-06, "loss": 2.3697, "step": 15096 }, { "epoch": 0.8099248927038627, "grad_norm": 0.60546875, "learning_rate": 4.7750468633973375e-06, "loss": 2.448, "step": 15097 }, { "epoch": 0.8099785407725322, "grad_norm": 0.453125, "learning_rate": 4.775010845891472e-06, "loss": 2.3659, "step": 15098 }, { "epoch": 0.8100321888412018, "grad_norm": 0.45703125, "learning_rate": 4.774974825638296e-06, "loss": 2.1729, "step": 15099 }, { "epoch": 0.8100858369098712, "grad_norm": 0.4453125, "learning_rate": 4.774938802637851e-06, "loss": 2.2699, "step": 15100 }, { "epoch": 0.8101394849785408, "grad_norm": 0.458984375, "learning_rate": 4.7749027768901805e-06, "loss": 2.3731, "step": 15101 }, { "epoch": 0.8101931330472103, "grad_norm": 0.423828125, "learning_rate": 4.77486674839533e-06, "loss": 2.2718, "step": 15102 }, { "epoch": 0.8102467811158798, "grad_norm": 0.453125, "learning_rate": 4.774830717153342e-06, "loss": 2.14, "step": 15103 }, { "epoch": 0.8103004291845494, "grad_norm": 0.470703125, "learning_rate": 4.774794683164259e-06, "loss": 2.2531, "step": 15104 }, { "epoch": 0.8103540772532188, "grad_norm": 0.4921875, "learning_rate": 4.774758646428126e-06, "loss": 2.1885, "step": 15105 }, { "epoch": 0.8104077253218884, "grad_norm": 0.48046875, "learning_rate": 4.774722606944985e-06, "loss": 2.4504, "step": 15106 }, { "epoch": 0.8104613733905579, "grad_norm": 0.52734375, "learning_rate": 4.7746865647148824e-06, "loss": 2.0186, "step": 15107 }, { "epoch": 0.8105150214592275, "grad_norm": 0.470703125, "learning_rate": 4.774650519737858e-06, "loss": 2.2704, "step": 15108 }, { "epoch": 0.810568669527897, "grad_norm": 0.5, "learning_rate": 4.774614472013957e-06, "loss": 2.3405, "step": 15109 }, { "epoch": 0.8106223175965666, "grad_norm": 0.49609375, "learning_rate": 4.774578421543225e-06, "loss": 2.3512, "step": 15110 }, { "epoch": 0.810675965665236, "grad_norm": 0.474609375, "learning_rate": 4.774542368325702e-06, "loss": 2.2645, "step": 15111 }, { "epoch": 0.8107296137339056, "grad_norm": 0.392578125, "learning_rate": 4.774506312361433e-06, "loss": 2.1136, "step": 15112 }, { "epoch": 0.8107832618025751, "grad_norm": 0.470703125, "learning_rate": 4.774470253650463e-06, "loss": 2.3407, "step": 15113 }, { "epoch": 0.8108369098712447, "grad_norm": 0.380859375, "learning_rate": 4.774434192192833e-06, "loss": 2.2786, "step": 15114 }, { "epoch": 0.8108905579399142, "grad_norm": 0.5078125, "learning_rate": 4.7743981279885875e-06, "loss": 1.997, "step": 15115 }, { "epoch": 0.8109442060085837, "grad_norm": 0.48828125, "learning_rate": 4.774362061037772e-06, "loss": 2.2354, "step": 15116 }, { "epoch": 0.8109978540772532, "grad_norm": 0.490234375, "learning_rate": 4.774325991340427e-06, "loss": 2.1993, "step": 15117 }, { "epoch": 0.8110515021459227, "grad_norm": 0.4453125, "learning_rate": 4.774289918896597e-06, "loss": 2.2995, "step": 15118 }, { "epoch": 0.8111051502145923, "grad_norm": 0.5, "learning_rate": 4.774253843706327e-06, "loss": 2.2236, "step": 15119 }, { "epoch": 0.8111587982832618, "grad_norm": 0.5234375, "learning_rate": 4.774217765769659e-06, "loss": 2.2058, "step": 15120 }, { "epoch": 0.8112124463519313, "grad_norm": 0.4921875, "learning_rate": 4.774181685086637e-06, "loss": 2.2516, "step": 15121 }, { "epoch": 0.8112660944206008, "grad_norm": 0.49609375, "learning_rate": 4.7741456016573055e-06, "loss": 2.5028, "step": 15122 }, { "epoch": 0.8113197424892704, "grad_norm": 0.365234375, "learning_rate": 4.7741095154817064e-06, "loss": 2.0789, "step": 15123 }, { "epoch": 0.8113733905579399, "grad_norm": 0.6015625, "learning_rate": 4.774073426559884e-06, "loss": 1.4295, "step": 15124 }, { "epoch": 0.8114270386266095, "grad_norm": 0.455078125, "learning_rate": 4.774037334891883e-06, "loss": 2.2696, "step": 15125 }, { "epoch": 0.811480686695279, "grad_norm": 0.44921875, "learning_rate": 4.774001240477745e-06, "loss": 2.2375, "step": 15126 }, { "epoch": 0.8115343347639485, "grad_norm": 0.77734375, "learning_rate": 4.773965143317515e-06, "loss": 2.233, "step": 15127 }, { "epoch": 0.811587982832618, "grad_norm": 0.49609375, "learning_rate": 4.773929043411236e-06, "loss": 2.0752, "step": 15128 }, { "epoch": 0.8116416309012876, "grad_norm": 0.375, "learning_rate": 4.773892940758952e-06, "loss": 2.3974, "step": 15129 }, { "epoch": 0.8116952789699571, "grad_norm": 0.41015625, "learning_rate": 4.773856835360706e-06, "loss": 2.3384, "step": 15130 }, { "epoch": 0.8117489270386266, "grad_norm": 0.515625, "learning_rate": 4.7738207272165425e-06, "loss": 2.2478, "step": 15131 }, { "epoch": 0.8118025751072961, "grad_norm": 0.451171875, "learning_rate": 4.773784616326504e-06, "loss": 2.5398, "step": 15132 }, { "epoch": 0.8118562231759656, "grad_norm": 1.1953125, "learning_rate": 4.773748502690636e-06, "loss": 2.4463, "step": 15133 }, { "epoch": 0.8119098712446352, "grad_norm": 0.515625, "learning_rate": 4.773712386308979e-06, "loss": 2.3298, "step": 15134 }, { "epoch": 0.8119635193133047, "grad_norm": 0.443359375, "learning_rate": 4.7736762671815794e-06, "loss": 2.3064, "step": 15135 }, { "epoch": 0.8120171673819743, "grad_norm": 0.546875, "learning_rate": 4.77364014530848e-06, "loss": 1.6181, "step": 15136 }, { "epoch": 0.8120708154506437, "grad_norm": 0.453125, "learning_rate": 4.773604020689724e-06, "loss": 2.2248, "step": 15137 }, { "epoch": 0.8121244635193133, "grad_norm": 0.43359375, "learning_rate": 4.7735678933253545e-06, "loss": 2.2404, "step": 15138 }, { "epoch": 0.8121781115879828, "grad_norm": 0.53125, "learning_rate": 4.773531763215417e-06, "loss": 2.2584, "step": 15139 }, { "epoch": 0.8122317596566524, "grad_norm": 0.55078125, "learning_rate": 4.773495630359953e-06, "loss": 1.785, "step": 15140 }, { "epoch": 0.8122854077253219, "grad_norm": 0.5078125, "learning_rate": 4.7734594947590075e-06, "loss": 2.38, "step": 15141 }, { "epoch": 0.8123390557939915, "grad_norm": 0.47265625, "learning_rate": 4.773423356412624e-06, "loss": 2.3511, "step": 15142 }, { "epoch": 0.8123927038626609, "grad_norm": 0.453125, "learning_rate": 4.773387215320846e-06, "loss": 2.2579, "step": 15143 }, { "epoch": 0.8124463519313305, "grad_norm": 0.46484375, "learning_rate": 4.773351071483718e-06, "loss": 2.3263, "step": 15144 }, { "epoch": 0.8125, "grad_norm": 0.53125, "learning_rate": 4.773314924901281e-06, "loss": 2.1028, "step": 15145 }, { "epoch": 0.8125536480686695, "grad_norm": 0.470703125, "learning_rate": 4.773278775573581e-06, "loss": 1.9924, "step": 15146 }, { "epoch": 0.8126072961373391, "grad_norm": 0.455078125, "learning_rate": 4.773242623500661e-06, "loss": 1.8978, "step": 15147 }, { "epoch": 0.8126609442060085, "grad_norm": 0.453125, "learning_rate": 4.773206468682565e-06, "loss": 2.2471, "step": 15148 }, { "epoch": 0.8127145922746781, "grad_norm": 0.40625, "learning_rate": 4.773170311119336e-06, "loss": 2.4024, "step": 15149 }, { "epoch": 0.8127682403433476, "grad_norm": 0.44921875, "learning_rate": 4.773134150811018e-06, "loss": 2.2911, "step": 15150 }, { "epoch": 0.8128218884120172, "grad_norm": 0.447265625, "learning_rate": 4.773097987757655e-06, "loss": 2.108, "step": 15151 }, { "epoch": 0.8128755364806867, "grad_norm": 0.37890625, "learning_rate": 4.77306182195929e-06, "loss": 2.0619, "step": 15152 }, { "epoch": 0.8129291845493563, "grad_norm": 0.44921875, "learning_rate": 4.7730256534159665e-06, "loss": 2.2398, "step": 15153 }, { "epoch": 0.8129828326180257, "grad_norm": 0.40234375, "learning_rate": 4.77298948212773e-06, "loss": 1.9308, "step": 15154 }, { "epoch": 0.8130364806866953, "grad_norm": 0.5, "learning_rate": 4.772953308094622e-06, "loss": 2.3427, "step": 15155 }, { "epoch": 0.8130901287553648, "grad_norm": 0.48828125, "learning_rate": 4.772917131316688e-06, "loss": 2.3513, "step": 15156 }, { "epoch": 0.8131437768240344, "grad_norm": 0.9609375, "learning_rate": 4.77288095179397e-06, "loss": 2.3525, "step": 15157 }, { "epoch": 0.8131974248927039, "grad_norm": 0.435546875, "learning_rate": 4.772844769526513e-06, "loss": 2.3432, "step": 15158 }, { "epoch": 0.8132510729613734, "grad_norm": 0.41796875, "learning_rate": 4.772808584514359e-06, "loss": 2.4844, "step": 15159 }, { "epoch": 0.8133047210300429, "grad_norm": 0.490234375, "learning_rate": 4.772772396757554e-06, "loss": 1.9541, "step": 15160 }, { "epoch": 0.8133583690987124, "grad_norm": 0.609375, "learning_rate": 4.77273620625614e-06, "loss": 2.5801, "step": 15161 }, { "epoch": 0.813412017167382, "grad_norm": 0.53515625, "learning_rate": 4.772700013010161e-06, "loss": 2.1293, "step": 15162 }, { "epoch": 0.8134656652360515, "grad_norm": 0.6796875, "learning_rate": 4.772663817019662e-06, "loss": 2.2429, "step": 15163 }, { "epoch": 0.813519313304721, "grad_norm": 0.474609375, "learning_rate": 4.772627618284684e-06, "loss": 2.3913, "step": 15164 }, { "epoch": 0.8135729613733905, "grad_norm": 0.470703125, "learning_rate": 4.772591416805274e-06, "loss": 2.3551, "step": 15165 }, { "epoch": 0.8136266094420601, "grad_norm": 0.5390625, "learning_rate": 4.772555212581473e-06, "loss": 2.1631, "step": 15166 }, { "epoch": 0.8136802575107296, "grad_norm": 0.45703125, "learning_rate": 4.772519005613326e-06, "loss": 2.1993, "step": 15167 }, { "epoch": 0.8137339055793992, "grad_norm": 0.5546875, "learning_rate": 4.772482795900877e-06, "loss": 2.5725, "step": 15168 }, { "epoch": 0.8137875536480687, "grad_norm": 0.462890625, "learning_rate": 4.772446583444169e-06, "loss": 2.2296, "step": 15169 }, { "epoch": 0.8138412017167382, "grad_norm": 0.45703125, "learning_rate": 4.772410368243246e-06, "loss": 2.0027, "step": 15170 }, { "epoch": 0.8138948497854077, "grad_norm": 0.478515625, "learning_rate": 4.772374150298152e-06, "loss": 2.2699, "step": 15171 }, { "epoch": 0.8139484978540773, "grad_norm": 0.451171875, "learning_rate": 4.77233792960893e-06, "loss": 2.3144, "step": 15172 }, { "epoch": 0.8140021459227468, "grad_norm": 0.44921875, "learning_rate": 4.772301706175625e-06, "loss": 2.1922, "step": 15173 }, { "epoch": 0.8140557939914163, "grad_norm": 0.48828125, "learning_rate": 4.772265479998279e-06, "loss": 2.3627, "step": 15174 }, { "epoch": 0.8141094420600858, "grad_norm": 0.466796875, "learning_rate": 4.772229251076936e-06, "loss": 2.273, "step": 15175 }, { "epoch": 0.8141630901287553, "grad_norm": 0.609375, "learning_rate": 4.772193019411642e-06, "loss": 2.3407, "step": 15176 }, { "epoch": 0.8142167381974249, "grad_norm": 0.419921875, "learning_rate": 4.772156785002439e-06, "loss": 2.2113, "step": 15177 }, { "epoch": 0.8142703862660944, "grad_norm": 0.466796875, "learning_rate": 4.7721205478493705e-06, "loss": 2.3591, "step": 15178 }, { "epoch": 0.814324034334764, "grad_norm": 0.384765625, "learning_rate": 4.7720843079524804e-06, "loss": 2.0915, "step": 15179 }, { "epoch": 0.8143776824034334, "grad_norm": 0.40234375, "learning_rate": 4.7720480653118136e-06, "loss": 2.3681, "step": 15180 }, { "epoch": 0.814431330472103, "grad_norm": 0.43359375, "learning_rate": 4.772011819927412e-06, "loss": 1.9815, "step": 15181 }, { "epoch": 0.8144849785407725, "grad_norm": 0.48046875, "learning_rate": 4.771975571799322e-06, "loss": 2.2191, "step": 15182 }, { "epoch": 0.8145386266094421, "grad_norm": 0.373046875, "learning_rate": 4.771939320927584e-06, "loss": 2.1934, "step": 15183 }, { "epoch": 0.8145922746781116, "grad_norm": 0.466796875, "learning_rate": 4.7719030673122445e-06, "loss": 2.3421, "step": 15184 }, { "epoch": 0.8146459227467812, "grad_norm": 0.5078125, "learning_rate": 4.771866810953346e-06, "loss": 2.4754, "step": 15185 }, { "epoch": 0.8146995708154506, "grad_norm": 0.4609375, "learning_rate": 4.771830551850934e-06, "loss": 2.2241, "step": 15186 }, { "epoch": 0.8147532188841202, "grad_norm": 0.455078125, "learning_rate": 4.771794290005049e-06, "loss": 2.3121, "step": 15187 }, { "epoch": 0.8148068669527897, "grad_norm": 0.43359375, "learning_rate": 4.771758025415738e-06, "loss": 2.188, "step": 15188 }, { "epoch": 0.8148605150214592, "grad_norm": 0.375, "learning_rate": 4.771721758083042e-06, "loss": 2.1505, "step": 15189 }, { "epoch": 0.8149141630901288, "grad_norm": 0.49609375, "learning_rate": 4.771685488007008e-06, "loss": 2.3241, "step": 15190 }, { "epoch": 0.8149678111587982, "grad_norm": 0.419921875, "learning_rate": 4.771649215187677e-06, "loss": 1.8503, "step": 15191 }, { "epoch": 0.8150214592274678, "grad_norm": 0.37109375, "learning_rate": 4.771612939625094e-06, "loss": 2.0851, "step": 15192 }, { "epoch": 0.8150751072961373, "grad_norm": 0.54296875, "learning_rate": 4.771576661319303e-06, "loss": 2.1994, "step": 15193 }, { "epoch": 0.8151287553648069, "grad_norm": 0.466796875, "learning_rate": 4.771540380270348e-06, "loss": 2.3224, "step": 15194 }, { "epoch": 0.8151824034334764, "grad_norm": 0.76953125, "learning_rate": 4.771504096478271e-06, "loss": 2.2727, "step": 15195 }, { "epoch": 0.815236051502146, "grad_norm": 0.5625, "learning_rate": 4.771467809943117e-06, "loss": 2.3851, "step": 15196 }, { "epoch": 0.8152896995708154, "grad_norm": 0.4375, "learning_rate": 4.771431520664932e-06, "loss": 2.355, "step": 15197 }, { "epoch": 0.815343347639485, "grad_norm": 0.64453125, "learning_rate": 4.771395228643757e-06, "loss": 2.3812, "step": 15198 }, { "epoch": 0.8153969957081545, "grad_norm": 0.55078125, "learning_rate": 4.771358933879636e-06, "loss": 2.2375, "step": 15199 }, { "epoch": 0.8154506437768241, "grad_norm": 0.474609375, "learning_rate": 4.771322636372614e-06, "loss": 2.2744, "step": 15200 }, { "epoch": 0.8155042918454936, "grad_norm": 0.45703125, "learning_rate": 4.771286336122733e-06, "loss": 2.3876, "step": 15201 }, { "epoch": 0.8155579399141631, "grad_norm": 0.50390625, "learning_rate": 4.771250033130039e-06, "loss": 2.6186, "step": 15202 }, { "epoch": 0.8156115879828326, "grad_norm": 0.431640625, "learning_rate": 4.7712137273945756e-06, "loss": 2.2935, "step": 15203 }, { "epoch": 0.8156652360515021, "grad_norm": 0.361328125, "learning_rate": 4.771177418916385e-06, "loss": 2.0543, "step": 15204 }, { "epoch": 0.8157188841201717, "grad_norm": 0.51171875, "learning_rate": 4.771141107695512e-06, "loss": 2.6428, "step": 15205 }, { "epoch": 0.8157725321888412, "grad_norm": 0.41015625, "learning_rate": 4.771104793732002e-06, "loss": 2.336, "step": 15206 }, { "epoch": 0.8158261802575107, "grad_norm": 0.453125, "learning_rate": 4.771068477025896e-06, "loss": 2.2847, "step": 15207 }, { "epoch": 0.8158798283261802, "grad_norm": 0.46484375, "learning_rate": 4.77103215757724e-06, "loss": 2.3655, "step": 15208 }, { "epoch": 0.8159334763948498, "grad_norm": 0.359375, "learning_rate": 4.770995835386077e-06, "loss": 1.907, "step": 15209 }, { "epoch": 0.8159871244635193, "grad_norm": 3.296875, "learning_rate": 4.77095951045245e-06, "loss": 2.1379, "step": 15210 }, { "epoch": 0.8160407725321889, "grad_norm": 0.4453125, "learning_rate": 4.770923182776404e-06, "loss": 2.4602, "step": 15211 }, { "epoch": 0.8160944206008584, "grad_norm": 0.53125, "learning_rate": 4.770886852357983e-06, "loss": 2.3512, "step": 15212 }, { "epoch": 0.8161480686695279, "grad_norm": 0.60546875, "learning_rate": 4.77085051919723e-06, "loss": 2.0954, "step": 15213 }, { "epoch": 0.8162017167381974, "grad_norm": 0.5, "learning_rate": 4.77081418329419e-06, "loss": 2.4977, "step": 15214 }, { "epoch": 0.816255364806867, "grad_norm": 0.42578125, "learning_rate": 4.770777844648906e-06, "loss": 2.0574, "step": 15215 }, { "epoch": 0.8163090128755365, "grad_norm": 0.431640625, "learning_rate": 4.7707415032614225e-06, "loss": 2.2834, "step": 15216 }, { "epoch": 0.816362660944206, "grad_norm": 0.416015625, "learning_rate": 4.770705159131783e-06, "loss": 1.8007, "step": 15217 }, { "epoch": 0.8164163090128755, "grad_norm": 0.51171875, "learning_rate": 4.7706688122600305e-06, "loss": 2.1356, "step": 15218 }, { "epoch": 0.816469957081545, "grad_norm": 0.58203125, "learning_rate": 4.770632462646211e-06, "loss": 2.0295, "step": 15219 }, { "epoch": 0.8165236051502146, "grad_norm": 0.443359375, "learning_rate": 4.7705961102903665e-06, "loss": 2.227, "step": 15220 }, { "epoch": 0.8165772532188841, "grad_norm": 0.447265625, "learning_rate": 4.770559755192542e-06, "loss": 2.393, "step": 15221 }, { "epoch": 0.8166309012875537, "grad_norm": 0.466796875, "learning_rate": 4.770523397352781e-06, "loss": 2.5425, "step": 15222 }, { "epoch": 0.8166845493562231, "grad_norm": 0.5078125, "learning_rate": 4.770487036771127e-06, "loss": 2.1413, "step": 15223 }, { "epoch": 0.8167381974248927, "grad_norm": 0.53515625, "learning_rate": 4.770450673447625e-06, "loss": 1.3895, "step": 15224 }, { "epoch": 0.8167918454935622, "grad_norm": 0.48046875, "learning_rate": 4.770414307382318e-06, "loss": 2.4448, "step": 15225 }, { "epoch": 0.8168454935622318, "grad_norm": 0.404296875, "learning_rate": 4.77037793857525e-06, "loss": 2.3979, "step": 15226 }, { "epoch": 0.8168991416309013, "grad_norm": 0.494140625, "learning_rate": 4.770341567026466e-06, "loss": 2.1568, "step": 15227 }, { "epoch": 0.8169527896995709, "grad_norm": 0.439453125, "learning_rate": 4.770305192736008e-06, "loss": 2.3209, "step": 15228 }, { "epoch": 0.8170064377682403, "grad_norm": 0.470703125, "learning_rate": 4.770268815703922e-06, "loss": 2.2676, "step": 15229 }, { "epoch": 0.8170600858369099, "grad_norm": 0.5, "learning_rate": 4.77023243593025e-06, "loss": 2.3618, "step": 15230 }, { "epoch": 0.8171137339055794, "grad_norm": 0.447265625, "learning_rate": 4.7701960534150365e-06, "loss": 2.3488, "step": 15231 }, { "epoch": 0.8171673819742489, "grad_norm": 0.39453125, "learning_rate": 4.770159668158326e-06, "loss": 2.6652, "step": 15232 }, { "epoch": 0.8172210300429185, "grad_norm": 0.458984375, "learning_rate": 4.770123280160163e-06, "loss": 2.2662, "step": 15233 }, { "epoch": 0.8172746781115879, "grad_norm": 0.474609375, "learning_rate": 4.77008688942059e-06, "loss": 2.5127, "step": 15234 }, { "epoch": 0.8173283261802575, "grad_norm": 0.470703125, "learning_rate": 4.770050495939651e-06, "loss": 2.3439, "step": 15235 }, { "epoch": 0.817381974248927, "grad_norm": 0.400390625, "learning_rate": 4.770014099717391e-06, "loss": 1.9863, "step": 15236 }, { "epoch": 0.8174356223175966, "grad_norm": 0.478515625, "learning_rate": 4.7699777007538535e-06, "loss": 2.3357, "step": 15237 }, { "epoch": 0.8174892703862661, "grad_norm": 0.49609375, "learning_rate": 4.769941299049082e-06, "loss": 2.1666, "step": 15238 }, { "epoch": 0.8175429184549357, "grad_norm": 0.5078125, "learning_rate": 4.769904894603121e-06, "loss": 2.2131, "step": 15239 }, { "epoch": 0.8175965665236051, "grad_norm": 0.4609375, "learning_rate": 4.769868487416015e-06, "loss": 1.9724, "step": 15240 }, { "epoch": 0.8176502145922747, "grad_norm": 0.4296875, "learning_rate": 4.769832077487807e-06, "loss": 2.231, "step": 15241 }, { "epoch": 0.8177038626609442, "grad_norm": 0.44921875, "learning_rate": 4.769795664818542e-06, "loss": 2.2599, "step": 15242 }, { "epoch": 0.8177575107296138, "grad_norm": 0.474609375, "learning_rate": 4.769759249408262e-06, "loss": 2.4111, "step": 15243 }, { "epoch": 0.8178111587982833, "grad_norm": 0.546875, "learning_rate": 4.769722831257012e-06, "loss": 2.4881, "step": 15244 }, { "epoch": 0.8178648068669528, "grad_norm": 0.470703125, "learning_rate": 4.769686410364837e-06, "loss": 2.3946, "step": 15245 }, { "epoch": 0.8179184549356223, "grad_norm": 0.88671875, "learning_rate": 4.7696499867317804e-06, "loss": 2.3104, "step": 15246 }, { "epoch": 0.8179721030042918, "grad_norm": 0.59375, "learning_rate": 4.769613560357886e-06, "loss": 2.5074, "step": 15247 }, { "epoch": 0.8180257510729614, "grad_norm": 0.46875, "learning_rate": 4.769577131243197e-06, "loss": 2.2523, "step": 15248 }, { "epoch": 0.8180793991416309, "grad_norm": 0.48828125, "learning_rate": 4.769540699387759e-06, "loss": 2.243, "step": 15249 }, { "epoch": 0.8181330472103004, "grad_norm": 0.56640625, "learning_rate": 4.769504264791614e-06, "loss": 2.2524, "step": 15250 }, { "epoch": 0.8181866952789699, "grad_norm": 0.408203125, "learning_rate": 4.7694678274548085e-06, "loss": 2.2976, "step": 15251 }, { "epoch": 0.8182403433476395, "grad_norm": 0.48046875, "learning_rate": 4.769431387377385e-06, "loss": 2.5022, "step": 15252 }, { "epoch": 0.818293991416309, "grad_norm": 0.39453125, "learning_rate": 4.769394944559386e-06, "loss": 2.0831, "step": 15253 }, { "epoch": 0.8183476394849786, "grad_norm": 0.4765625, "learning_rate": 4.769358499000859e-06, "loss": 2.1477, "step": 15254 }, { "epoch": 0.818401287553648, "grad_norm": 0.375, "learning_rate": 4.769322050701846e-06, "loss": 2.0774, "step": 15255 }, { "epoch": 0.8184549356223176, "grad_norm": 0.4921875, "learning_rate": 4.76928559966239e-06, "loss": 2.2709, "step": 15256 }, { "epoch": 0.8185085836909871, "grad_norm": 0.4921875, "learning_rate": 4.769249145882537e-06, "loss": 2.421, "step": 15257 }, { "epoch": 0.8185622317596567, "grad_norm": 0.33984375, "learning_rate": 4.76921268936233e-06, "loss": 2.1379, "step": 15258 }, { "epoch": 0.8186158798283262, "grad_norm": 0.462890625, "learning_rate": 4.769176230101813e-06, "loss": 2.2871, "step": 15259 }, { "epoch": 0.8186695278969958, "grad_norm": 0.3984375, "learning_rate": 4.769139768101031e-06, "loss": 2.1887, "step": 15260 }, { "epoch": 0.8187231759656652, "grad_norm": 0.59375, "learning_rate": 4.769103303360027e-06, "loss": 2.107, "step": 15261 }, { "epoch": 0.8187768240343347, "grad_norm": 0.58984375, "learning_rate": 4.769066835878845e-06, "loss": 1.3624, "step": 15262 }, { "epoch": 0.8188304721030043, "grad_norm": 0.482421875, "learning_rate": 4.76903036565753e-06, "loss": 2.3689, "step": 15263 }, { "epoch": 0.8188841201716738, "grad_norm": 0.53125, "learning_rate": 4.7689938926961246e-06, "loss": 2.3182, "step": 15264 }, { "epoch": 0.8189377682403434, "grad_norm": 0.50390625, "learning_rate": 4.768957416994674e-06, "loss": 2.3538, "step": 15265 }, { "epoch": 0.8189914163090128, "grad_norm": 0.60546875, "learning_rate": 4.768920938553222e-06, "loss": 1.62, "step": 15266 }, { "epoch": 0.8190450643776824, "grad_norm": 0.443359375, "learning_rate": 4.768884457371813e-06, "loss": 2.1049, "step": 15267 }, { "epoch": 0.8190987124463519, "grad_norm": 0.443359375, "learning_rate": 4.7688479734504905e-06, "loss": 2.1122, "step": 15268 }, { "epoch": 0.8191523605150215, "grad_norm": 0.38671875, "learning_rate": 4.768811486789299e-06, "loss": 2.2548, "step": 15269 }, { "epoch": 0.819206008583691, "grad_norm": 0.4921875, "learning_rate": 4.768774997388281e-06, "loss": 2.2323, "step": 15270 }, { "epoch": 0.8192596566523606, "grad_norm": 0.4921875, "learning_rate": 4.7687385052474835e-06, "loss": 2.3171, "step": 15271 }, { "epoch": 0.81931330472103, "grad_norm": 0.41015625, "learning_rate": 4.7687020103669474e-06, "loss": 2.0284, "step": 15272 }, { "epoch": 0.8193669527896996, "grad_norm": 0.38671875, "learning_rate": 4.768665512746719e-06, "loss": 2.1529, "step": 15273 }, { "epoch": 0.8194206008583691, "grad_norm": 0.7109375, "learning_rate": 4.768629012386841e-06, "loss": 2.2503, "step": 15274 }, { "epoch": 0.8194742489270386, "grad_norm": 0.56640625, "learning_rate": 4.768592509287359e-06, "loss": 2.0177, "step": 15275 }, { "epoch": 0.8195278969957082, "grad_norm": 0.443359375, "learning_rate": 4.768556003448315e-06, "loss": 2.2692, "step": 15276 }, { "epoch": 0.8195815450643776, "grad_norm": 0.392578125, "learning_rate": 4.768519494869755e-06, "loss": 2.3267, "step": 15277 }, { "epoch": 0.8196351931330472, "grad_norm": 0.494140625, "learning_rate": 4.7684829835517225e-06, "loss": 2.224, "step": 15278 }, { "epoch": 0.8196888412017167, "grad_norm": 0.482421875, "learning_rate": 4.768446469494261e-06, "loss": 2.2157, "step": 15279 }, { "epoch": 0.8197424892703863, "grad_norm": 0.416015625, "learning_rate": 4.768409952697416e-06, "loss": 2.4004, "step": 15280 }, { "epoch": 0.8197961373390558, "grad_norm": 0.4765625, "learning_rate": 4.76837343316123e-06, "loss": 2.3358, "step": 15281 }, { "epoch": 0.8198497854077254, "grad_norm": 0.73046875, "learning_rate": 4.768336910885747e-06, "loss": 2.3434, "step": 15282 }, { "epoch": 0.8199034334763948, "grad_norm": 0.46484375, "learning_rate": 4.768300385871013e-06, "loss": 2.4556, "step": 15283 }, { "epoch": 0.8199570815450644, "grad_norm": 0.365234375, "learning_rate": 4.768263858117071e-06, "loss": 2.0566, "step": 15284 }, { "epoch": 0.8200107296137339, "grad_norm": 0.419921875, "learning_rate": 4.7682273276239635e-06, "loss": 2.1475, "step": 15285 }, { "epoch": 0.8200643776824035, "grad_norm": 0.388671875, "learning_rate": 4.768190794391737e-06, "loss": 2.3134, "step": 15286 }, { "epoch": 0.820118025751073, "grad_norm": 0.53515625, "learning_rate": 4.768154258420435e-06, "loss": 2.2213, "step": 15287 }, { "epoch": 0.8201716738197425, "grad_norm": 2.765625, "learning_rate": 4.768117719710102e-06, "loss": 2.1864, "step": 15288 }, { "epoch": 0.820225321888412, "grad_norm": 0.52734375, "learning_rate": 4.7680811782607805e-06, "loss": 2.0093, "step": 15289 }, { "epoch": 0.8202789699570815, "grad_norm": 0.4375, "learning_rate": 4.768044634072516e-06, "loss": 2.2903, "step": 15290 }, { "epoch": 0.8203326180257511, "grad_norm": 0.44921875, "learning_rate": 4.768008087145353e-06, "loss": 2.1558, "step": 15291 }, { "epoch": 0.8203862660944206, "grad_norm": 0.447265625, "learning_rate": 4.767971537479335e-06, "loss": 2.352, "step": 15292 }, { "epoch": 0.8204399141630901, "grad_norm": 0.38671875, "learning_rate": 4.767934985074505e-06, "loss": 2.3172, "step": 15293 }, { "epoch": 0.8204935622317596, "grad_norm": 0.46484375, "learning_rate": 4.7678984299309085e-06, "loss": 2.1912, "step": 15294 }, { "epoch": 0.8205472103004292, "grad_norm": 0.466796875, "learning_rate": 4.7678618720485895e-06, "loss": 2.3458, "step": 15295 }, { "epoch": 0.8206008583690987, "grad_norm": 0.427734375, "learning_rate": 4.767825311427592e-06, "loss": 2.1984, "step": 15296 }, { "epoch": 0.8206545064377683, "grad_norm": 0.4609375, "learning_rate": 4.767788748067961e-06, "loss": 2.3722, "step": 15297 }, { "epoch": 0.8207081545064377, "grad_norm": 0.48828125, "learning_rate": 4.767752181969738e-06, "loss": 2.2805, "step": 15298 }, { "epoch": 0.8207618025751073, "grad_norm": 0.51171875, "learning_rate": 4.7677156131329705e-06, "loss": 2.2054, "step": 15299 }, { "epoch": 0.8208154506437768, "grad_norm": 0.474609375, "learning_rate": 4.7676790415577e-06, "loss": 2.3807, "step": 15300 }, { "epoch": 0.8208690987124464, "grad_norm": 0.435546875, "learning_rate": 4.7676424672439725e-06, "loss": 2.247, "step": 15301 }, { "epoch": 0.8209227467811159, "grad_norm": 0.51953125, "learning_rate": 4.767605890191832e-06, "loss": 2.4393, "step": 15302 }, { "epoch": 0.8209763948497855, "grad_norm": 0.453125, "learning_rate": 4.767569310401321e-06, "loss": 2.496, "step": 15303 }, { "epoch": 0.8210300429184549, "grad_norm": 0.400390625, "learning_rate": 4.767532727872485e-06, "loss": 2.2781, "step": 15304 }, { "epoch": 0.8210836909871244, "grad_norm": 0.44921875, "learning_rate": 4.767496142605369e-06, "loss": 2.4737, "step": 15305 }, { "epoch": 0.821137339055794, "grad_norm": 0.62109375, "learning_rate": 4.7674595546000145e-06, "loss": 2.0356, "step": 15306 }, { "epoch": 0.8211909871244635, "grad_norm": 0.478515625, "learning_rate": 4.767422963856469e-06, "loss": 2.3103, "step": 15307 }, { "epoch": 0.8212446351931331, "grad_norm": 0.466796875, "learning_rate": 4.767386370374774e-06, "loss": 2.134, "step": 15308 }, { "epoch": 0.8212982832618025, "grad_norm": 0.419921875, "learning_rate": 4.767349774154974e-06, "loss": 2.0649, "step": 15309 }, { "epoch": 0.8213519313304721, "grad_norm": 0.5078125, "learning_rate": 4.767313175197116e-06, "loss": 2.3698, "step": 15310 }, { "epoch": 0.8214055793991416, "grad_norm": 0.5546875, "learning_rate": 4.767276573501241e-06, "loss": 2.1351, "step": 15311 }, { "epoch": 0.8214592274678112, "grad_norm": 0.40234375, "learning_rate": 4.7672399690673945e-06, "loss": 1.9976, "step": 15312 }, { "epoch": 0.8215128755364807, "grad_norm": 0.42578125, "learning_rate": 4.76720336189562e-06, "loss": 2.1604, "step": 15313 }, { "epoch": 0.8215665236051503, "grad_norm": 0.54296875, "learning_rate": 4.7671667519859625e-06, "loss": 2.2694, "step": 15314 }, { "epoch": 0.8216201716738197, "grad_norm": 0.51953125, "learning_rate": 4.7671301393384666e-06, "loss": 2.4296, "step": 15315 }, { "epoch": 0.8216738197424893, "grad_norm": 0.498046875, "learning_rate": 4.767093523953175e-06, "loss": 2.4957, "step": 15316 }, { "epoch": 0.8217274678111588, "grad_norm": 0.3984375, "learning_rate": 4.767056905830133e-06, "loss": 2.1285, "step": 15317 }, { "epoch": 0.8217811158798283, "grad_norm": 0.5078125, "learning_rate": 4.767020284969385e-06, "loss": 2.3236, "step": 15318 }, { "epoch": 0.8218347639484979, "grad_norm": 0.443359375, "learning_rate": 4.766983661370974e-06, "loss": 2.0892, "step": 15319 }, { "epoch": 0.8218884120171673, "grad_norm": 0.6953125, "learning_rate": 4.766947035034946e-06, "loss": 2.4366, "step": 15320 }, { "epoch": 0.8219420600858369, "grad_norm": 0.51171875, "learning_rate": 4.766910405961343e-06, "loss": 2.1933, "step": 15321 }, { "epoch": 0.8219957081545064, "grad_norm": 0.44140625, "learning_rate": 4.766873774150211e-06, "loss": 2.2143, "step": 15322 }, { "epoch": 0.822049356223176, "grad_norm": 0.404296875, "learning_rate": 4.766837139601594e-06, "loss": 2.0645, "step": 15323 }, { "epoch": 0.8221030042918455, "grad_norm": 0.443359375, "learning_rate": 4.766800502315536e-06, "loss": 2.315, "step": 15324 }, { "epoch": 0.822156652360515, "grad_norm": 0.8125, "learning_rate": 4.766763862292082e-06, "loss": 2.3956, "step": 15325 }, { "epoch": 0.8222103004291845, "grad_norm": 0.48046875, "learning_rate": 4.7667272195312745e-06, "loss": 2.2692, "step": 15326 }, { "epoch": 0.8222639484978541, "grad_norm": 0.51953125, "learning_rate": 4.766690574033158e-06, "loss": 2.4415, "step": 15327 }, { "epoch": 0.8223175965665236, "grad_norm": 0.48828125, "learning_rate": 4.766653925797778e-06, "loss": 2.1076, "step": 15328 }, { "epoch": 0.8223712446351932, "grad_norm": 0.458984375, "learning_rate": 4.7666172748251795e-06, "loss": 2.4077, "step": 15329 }, { "epoch": 0.8224248927038627, "grad_norm": 0.484375, "learning_rate": 4.766580621115404e-06, "loss": 2.3814, "step": 15330 }, { "epoch": 0.8224785407725322, "grad_norm": 0.62890625, "learning_rate": 4.766543964668498e-06, "loss": 2.2899, "step": 15331 }, { "epoch": 0.8225321888412017, "grad_norm": 0.404296875, "learning_rate": 4.766507305484504e-06, "loss": 2.228, "step": 15332 }, { "epoch": 0.8225858369098712, "grad_norm": 0.443359375, "learning_rate": 4.766470643563469e-06, "loss": 2.1317, "step": 15333 }, { "epoch": 0.8226394849785408, "grad_norm": 0.7265625, "learning_rate": 4.766433978905434e-06, "loss": 2.2609, "step": 15334 }, { "epoch": 0.8226931330472103, "grad_norm": 0.453125, "learning_rate": 4.766397311510446e-06, "loss": 2.2959, "step": 15335 }, { "epoch": 0.8227467811158798, "grad_norm": 0.515625, "learning_rate": 4.766360641378547e-06, "loss": 2.2818, "step": 15336 }, { "epoch": 0.8228004291845493, "grad_norm": 0.427734375, "learning_rate": 4.7663239685097835e-06, "loss": 2.2705, "step": 15337 }, { "epoch": 0.8228540772532189, "grad_norm": 0.8359375, "learning_rate": 4.766287292904198e-06, "loss": 2.3714, "step": 15338 }, { "epoch": 0.8229077253218884, "grad_norm": 0.52734375, "learning_rate": 4.766250614561836e-06, "loss": 2.346, "step": 15339 }, { "epoch": 0.822961373390558, "grad_norm": 0.625, "learning_rate": 4.766213933482741e-06, "loss": 2.7081, "step": 15340 }, { "epoch": 0.8230150214592274, "grad_norm": 0.4609375, "learning_rate": 4.7661772496669564e-06, "loss": 2.4447, "step": 15341 }, { "epoch": 0.823068669527897, "grad_norm": 0.427734375, "learning_rate": 4.766140563114529e-06, "loss": 2.1986, "step": 15342 }, { "epoch": 0.8231223175965665, "grad_norm": 0.70703125, "learning_rate": 4.766103873825501e-06, "loss": 2.1623, "step": 15343 }, { "epoch": 0.8231759656652361, "grad_norm": 0.55859375, "learning_rate": 4.766067181799918e-06, "loss": 2.2462, "step": 15344 }, { "epoch": 0.8232296137339056, "grad_norm": 0.455078125, "learning_rate": 4.766030487037824e-06, "loss": 2.4118, "step": 15345 }, { "epoch": 0.8232832618025752, "grad_norm": 0.49609375, "learning_rate": 4.765993789539263e-06, "loss": 2.2744, "step": 15346 }, { "epoch": 0.8233369098712446, "grad_norm": 0.47265625, "learning_rate": 4.765957089304279e-06, "loss": 2.2798, "step": 15347 }, { "epoch": 0.8233905579399141, "grad_norm": 0.416015625, "learning_rate": 4.7659203863329174e-06, "loss": 2.3186, "step": 15348 }, { "epoch": 0.8234442060085837, "grad_norm": 0.58203125, "learning_rate": 4.765883680625221e-06, "loss": 2.2532, "step": 15349 }, { "epoch": 0.8234978540772532, "grad_norm": 0.478515625, "learning_rate": 4.765846972181235e-06, "loss": 2.1852, "step": 15350 }, { "epoch": 0.8235515021459228, "grad_norm": 0.5, "learning_rate": 4.765810261001005e-06, "loss": 2.2109, "step": 15351 }, { "epoch": 0.8236051502145922, "grad_norm": 0.4296875, "learning_rate": 4.765773547084573e-06, "loss": 2.1995, "step": 15352 }, { "epoch": 0.8236587982832618, "grad_norm": 0.546875, "learning_rate": 4.765736830431984e-06, "loss": 2.3797, "step": 15353 }, { "epoch": 0.8237124463519313, "grad_norm": 0.392578125, "learning_rate": 4.7657001110432834e-06, "loss": 2.1544, "step": 15354 }, { "epoch": 0.8237660944206009, "grad_norm": 0.451171875, "learning_rate": 4.765663388918515e-06, "loss": 2.2685, "step": 15355 }, { "epoch": 0.8238197424892704, "grad_norm": 0.41796875, "learning_rate": 4.765626664057722e-06, "loss": 2.2873, "step": 15356 }, { "epoch": 0.82387339055794, "grad_norm": 0.484375, "learning_rate": 4.765589936460951e-06, "loss": 2.3642, "step": 15357 }, { "epoch": 0.8239270386266094, "grad_norm": 0.50390625, "learning_rate": 4.765553206128244e-06, "loss": 2.3018, "step": 15358 }, { "epoch": 0.823980686695279, "grad_norm": 0.72265625, "learning_rate": 4.765516473059647e-06, "loss": 2.1141, "step": 15359 }, { "epoch": 0.8240343347639485, "grad_norm": 0.416015625, "learning_rate": 4.765479737255204e-06, "loss": 2.2647, "step": 15360 }, { "epoch": 0.824087982832618, "grad_norm": 0.4765625, "learning_rate": 4.765442998714959e-06, "loss": 2.1487, "step": 15361 }, { "epoch": 0.8241416309012876, "grad_norm": 0.498046875, "learning_rate": 4.7654062574389565e-06, "loss": 2.2573, "step": 15362 }, { "epoch": 0.824195278969957, "grad_norm": 0.439453125, "learning_rate": 4.76536951342724e-06, "loss": 2.2012, "step": 15363 }, { "epoch": 0.8242489270386266, "grad_norm": 0.50390625, "learning_rate": 4.765332766679856e-06, "loss": 2.164, "step": 15364 }, { "epoch": 0.8243025751072961, "grad_norm": 0.447265625, "learning_rate": 4.765296017196847e-06, "loss": 2.1215, "step": 15365 }, { "epoch": 0.8243562231759657, "grad_norm": 0.4921875, "learning_rate": 4.765259264978259e-06, "loss": 2.5166, "step": 15366 }, { "epoch": 0.8244098712446352, "grad_norm": 0.59375, "learning_rate": 4.765222510024135e-06, "loss": 2.4165, "step": 15367 }, { "epoch": 0.8244635193133047, "grad_norm": 0.455078125, "learning_rate": 4.765185752334519e-06, "loss": 1.8234, "step": 15368 }, { "epoch": 0.8245171673819742, "grad_norm": 0.58203125, "learning_rate": 4.7651489919094574e-06, "loss": 2.3583, "step": 15369 }, { "epoch": 0.8245708154506438, "grad_norm": 0.55859375, "learning_rate": 4.7651122287489925e-06, "loss": 2.3873, "step": 15370 }, { "epoch": 0.8246244635193133, "grad_norm": 0.62890625, "learning_rate": 4.76507546285317e-06, "loss": 2.2227, "step": 15371 }, { "epoch": 0.8246781115879829, "grad_norm": 0.439453125, "learning_rate": 4.765038694222034e-06, "loss": 2.2703, "step": 15372 }, { "epoch": 0.8247317596566524, "grad_norm": 0.439453125, "learning_rate": 4.765001922855629e-06, "loss": 2.2791, "step": 15373 }, { "epoch": 0.8247854077253219, "grad_norm": 0.494140625, "learning_rate": 4.7649651487539986e-06, "loss": 2.3653, "step": 15374 }, { "epoch": 0.8248390557939914, "grad_norm": 0.609375, "learning_rate": 4.764928371917188e-06, "loss": 2.0868, "step": 15375 }, { "epoch": 0.8248927038626609, "grad_norm": 0.4765625, "learning_rate": 4.764891592345241e-06, "loss": 2.2889, "step": 15376 }, { "epoch": 0.8249463519313305, "grad_norm": 0.416015625, "learning_rate": 4.764854810038203e-06, "loss": 2.2854, "step": 15377 }, { "epoch": 0.825, "grad_norm": 0.51171875, "learning_rate": 4.764818024996117e-06, "loss": 2.1827, "step": 15378 }, { "epoch": 0.8250536480686695, "grad_norm": 1.3125, "learning_rate": 4.764781237219029e-06, "loss": 2.2415, "step": 15379 }, { "epoch": 0.825107296137339, "grad_norm": 0.42578125, "learning_rate": 4.764744446706983e-06, "loss": 2.2536, "step": 15380 }, { "epoch": 0.8251609442060086, "grad_norm": 0.4140625, "learning_rate": 4.764707653460022e-06, "loss": 2.1692, "step": 15381 }, { "epoch": 0.8252145922746781, "grad_norm": 0.478515625, "learning_rate": 4.764670857478193e-06, "loss": 2.2638, "step": 15382 }, { "epoch": 0.8252682403433477, "grad_norm": 0.50390625, "learning_rate": 4.764634058761538e-06, "loss": 2.2741, "step": 15383 }, { "epoch": 0.8253218884120171, "grad_norm": 0.4140625, "learning_rate": 4.764597257310103e-06, "loss": 2.0138, "step": 15384 }, { "epoch": 0.8253755364806867, "grad_norm": 0.46484375, "learning_rate": 4.764560453123932e-06, "loss": 2.2687, "step": 15385 }, { "epoch": 0.8254291845493562, "grad_norm": 0.458984375, "learning_rate": 4.764523646203068e-06, "loss": 2.3246, "step": 15386 }, { "epoch": 0.8254828326180258, "grad_norm": 0.455078125, "learning_rate": 4.764486836547558e-06, "loss": 2.2386, "step": 15387 }, { "epoch": 0.8255364806866953, "grad_norm": 0.4140625, "learning_rate": 4.764450024157445e-06, "loss": 2.2081, "step": 15388 }, { "epoch": 0.8255901287553649, "grad_norm": 0.4921875, "learning_rate": 4.764413209032773e-06, "loss": 2.1777, "step": 15389 }, { "epoch": 0.8256437768240343, "grad_norm": 0.443359375, "learning_rate": 4.764376391173588e-06, "loss": 2.36, "step": 15390 }, { "epoch": 0.8256974248927038, "grad_norm": 0.466796875, "learning_rate": 4.7643395705799326e-06, "loss": 2.4947, "step": 15391 }, { "epoch": 0.8257510729613734, "grad_norm": 0.5078125, "learning_rate": 4.7643027472518536e-06, "loss": 2.3288, "step": 15392 }, { "epoch": 0.8258047210300429, "grad_norm": 0.458984375, "learning_rate": 4.764265921189393e-06, "loss": 2.1154, "step": 15393 }, { "epoch": 0.8258583690987125, "grad_norm": 0.486328125, "learning_rate": 4.7642290923925965e-06, "loss": 2.2685, "step": 15394 }, { "epoch": 0.8259120171673819, "grad_norm": 0.40625, "learning_rate": 4.764192260861509e-06, "loss": 2.1519, "step": 15395 }, { "epoch": 0.8259656652360515, "grad_norm": 0.466796875, "learning_rate": 4.764155426596174e-06, "loss": 2.2984, "step": 15396 }, { "epoch": 0.826019313304721, "grad_norm": 0.4453125, "learning_rate": 4.764118589596637e-06, "loss": 2.2026, "step": 15397 }, { "epoch": 0.8260729613733906, "grad_norm": 0.5703125, "learning_rate": 4.764081749862941e-06, "loss": 2.236, "step": 15398 }, { "epoch": 0.8261266094420601, "grad_norm": 0.4375, "learning_rate": 4.7640449073951315e-06, "loss": 2.4739, "step": 15399 }, { "epoch": 0.8261802575107297, "grad_norm": 0.5546875, "learning_rate": 4.764008062193254e-06, "loss": 2.6317, "step": 15400 }, { "epoch": 0.8262339055793991, "grad_norm": 0.609375, "learning_rate": 4.763971214257351e-06, "loss": 2.0816, "step": 15401 }, { "epoch": 0.8262875536480687, "grad_norm": 0.50390625, "learning_rate": 4.763934363587468e-06, "loss": 2.3848, "step": 15402 }, { "epoch": 0.8263412017167382, "grad_norm": 1.4765625, "learning_rate": 4.763897510183649e-06, "loss": 2.2667, "step": 15403 }, { "epoch": 0.8263948497854077, "grad_norm": 0.5, "learning_rate": 4.763860654045939e-06, "loss": 2.2832, "step": 15404 }, { "epoch": 0.8264484978540773, "grad_norm": 0.390625, "learning_rate": 4.763823795174383e-06, "loss": 2.0142, "step": 15405 }, { "epoch": 0.8265021459227467, "grad_norm": 0.494140625, "learning_rate": 4.763786933569025e-06, "loss": 2.3898, "step": 15406 }, { "epoch": 0.8265557939914163, "grad_norm": 0.47265625, "learning_rate": 4.7637500692299085e-06, "loss": 2.1925, "step": 15407 }, { "epoch": 0.8266094420600858, "grad_norm": 0.4453125, "learning_rate": 4.763713202157079e-06, "loss": 2.1494, "step": 15408 }, { "epoch": 0.8266630901287554, "grad_norm": 0.6015625, "learning_rate": 4.763676332350582e-06, "loss": 1.8301, "step": 15409 }, { "epoch": 0.8267167381974249, "grad_norm": 0.419921875, "learning_rate": 4.7636394598104594e-06, "loss": 2.1894, "step": 15410 }, { "epoch": 0.8267703862660944, "grad_norm": 0.38671875, "learning_rate": 4.763602584536759e-06, "loss": 2.1971, "step": 15411 }, { "epoch": 0.8268240343347639, "grad_norm": 0.4765625, "learning_rate": 4.763565706529523e-06, "loss": 2.3654, "step": 15412 }, { "epoch": 0.8268776824034335, "grad_norm": 0.498046875, "learning_rate": 4.763528825788796e-06, "loss": 2.3457, "step": 15413 }, { "epoch": 0.826931330472103, "grad_norm": 0.58203125, "learning_rate": 4.763491942314624e-06, "loss": 2.1926, "step": 15414 }, { "epoch": 0.8269849785407726, "grad_norm": 0.431640625, "learning_rate": 4.76345505610705e-06, "loss": 2.3304, "step": 15415 }, { "epoch": 0.827038626609442, "grad_norm": 0.396484375, "learning_rate": 4.76341816716612e-06, "loss": 2.3595, "step": 15416 }, { "epoch": 0.8270922746781116, "grad_norm": 0.453125, "learning_rate": 4.763381275491876e-06, "loss": 2.2406, "step": 15417 }, { "epoch": 0.8271459227467811, "grad_norm": 0.44140625, "learning_rate": 4.763344381084366e-06, "loss": 2.1637, "step": 15418 }, { "epoch": 0.8271995708154506, "grad_norm": 0.6015625, "learning_rate": 4.763307483943632e-06, "loss": 2.2737, "step": 15419 }, { "epoch": 0.8272532188841202, "grad_norm": 0.462890625, "learning_rate": 4.76327058406972e-06, "loss": 2.0246, "step": 15420 }, { "epoch": 0.8273068669527897, "grad_norm": 0.5078125, "learning_rate": 4.763233681462673e-06, "loss": 2.216, "step": 15421 }, { "epoch": 0.8273605150214592, "grad_norm": 0.4140625, "learning_rate": 4.7631967761225374e-06, "loss": 2.2918, "step": 15422 }, { "epoch": 0.8274141630901287, "grad_norm": 0.412109375, "learning_rate": 4.763159868049357e-06, "loss": 2.2496, "step": 15423 }, { "epoch": 0.8274678111587983, "grad_norm": 0.421875, "learning_rate": 4.763122957243176e-06, "loss": 2.3686, "step": 15424 }, { "epoch": 0.8275214592274678, "grad_norm": 0.486328125, "learning_rate": 4.763086043704039e-06, "loss": 2.257, "step": 15425 }, { "epoch": 0.8275751072961374, "grad_norm": 0.453125, "learning_rate": 4.763049127431991e-06, "loss": 2.1786, "step": 15426 }, { "epoch": 0.8276287553648068, "grad_norm": 0.5078125, "learning_rate": 4.763012208427076e-06, "loss": 2.2073, "step": 15427 }, { "epoch": 0.8276824034334764, "grad_norm": 0.408203125, "learning_rate": 4.7629752866893394e-06, "loss": 2.1874, "step": 15428 }, { "epoch": 0.8277360515021459, "grad_norm": 0.455078125, "learning_rate": 4.762938362218825e-06, "loss": 2.2544, "step": 15429 }, { "epoch": 0.8277896995708155, "grad_norm": 0.61328125, "learning_rate": 4.762901435015579e-06, "loss": 2.132, "step": 15430 }, { "epoch": 0.827843347639485, "grad_norm": 0.3515625, "learning_rate": 4.762864505079643e-06, "loss": 2.1137, "step": 15431 }, { "epoch": 0.8278969957081546, "grad_norm": 0.51171875, "learning_rate": 4.762827572411064e-06, "loss": 2.1943, "step": 15432 }, { "epoch": 0.827950643776824, "grad_norm": 0.40625, "learning_rate": 4.762790637009887e-06, "loss": 2.2847, "step": 15433 }, { "epoch": 0.8280042918454935, "grad_norm": 0.4296875, "learning_rate": 4.762753698876154e-06, "loss": 2.3425, "step": 15434 }, { "epoch": 0.8280579399141631, "grad_norm": 0.38671875, "learning_rate": 4.762716758009912e-06, "loss": 1.6981, "step": 15435 }, { "epoch": 0.8281115879828326, "grad_norm": 0.42578125, "learning_rate": 4.762679814411204e-06, "loss": 2.145, "step": 15436 }, { "epoch": 0.8281652360515022, "grad_norm": 0.466796875, "learning_rate": 4.762642868080076e-06, "loss": 2.1504, "step": 15437 }, { "epoch": 0.8282188841201716, "grad_norm": 0.4921875, "learning_rate": 4.762605919016572e-06, "loss": 2.1786, "step": 15438 }, { "epoch": 0.8282725321888412, "grad_norm": 0.490234375, "learning_rate": 4.7625689672207365e-06, "loss": 2.3317, "step": 15439 }, { "epoch": 0.8283261802575107, "grad_norm": 0.46484375, "learning_rate": 4.762532012692615e-06, "loss": 2.1406, "step": 15440 }, { "epoch": 0.8283798283261803, "grad_norm": 0.65625, "learning_rate": 4.76249505543225e-06, "loss": 2.3714, "step": 15441 }, { "epoch": 0.8284334763948498, "grad_norm": 0.5, "learning_rate": 4.762458095439688e-06, "loss": 2.3649, "step": 15442 }, { "epoch": 0.8284871244635194, "grad_norm": 0.431640625, "learning_rate": 4.762421132714973e-06, "loss": 2.4963, "step": 15443 }, { "epoch": 0.8285407725321888, "grad_norm": 0.39453125, "learning_rate": 4.7623841672581504e-06, "loss": 1.9692, "step": 15444 }, { "epoch": 0.8285944206008584, "grad_norm": 0.45703125, "learning_rate": 4.7623471990692635e-06, "loss": 2.2365, "step": 15445 }, { "epoch": 0.8286480686695279, "grad_norm": 0.36328125, "learning_rate": 4.762310228148358e-06, "loss": 1.7543, "step": 15446 }, { "epoch": 0.8287017167381975, "grad_norm": 0.41796875, "learning_rate": 4.762273254495479e-06, "loss": 2.1763, "step": 15447 }, { "epoch": 0.828755364806867, "grad_norm": 0.453125, "learning_rate": 4.762236278110669e-06, "loss": 2.4123, "step": 15448 }, { "epoch": 0.8288090128755364, "grad_norm": 0.423828125, "learning_rate": 4.762199298993974e-06, "loss": 2.185, "step": 15449 }, { "epoch": 0.828862660944206, "grad_norm": 0.490234375, "learning_rate": 4.762162317145439e-06, "loss": 1.801, "step": 15450 }, { "epoch": 0.8289163090128755, "grad_norm": 0.431640625, "learning_rate": 4.762125332565109e-06, "loss": 2.2621, "step": 15451 }, { "epoch": 0.8289699570815451, "grad_norm": 0.453125, "learning_rate": 4.762088345253027e-06, "loss": 2.26, "step": 15452 }, { "epoch": 0.8290236051502146, "grad_norm": 0.43359375, "learning_rate": 4.762051355209239e-06, "loss": 2.2418, "step": 15453 }, { "epoch": 0.8290772532188841, "grad_norm": 0.42578125, "learning_rate": 4.76201436243379e-06, "loss": 2.0396, "step": 15454 }, { "epoch": 0.8291309012875536, "grad_norm": 0.5, "learning_rate": 4.761977366926722e-06, "loss": 2.2187, "step": 15455 }, { "epoch": 0.8291845493562232, "grad_norm": 0.51953125, "learning_rate": 4.761940368688084e-06, "loss": 2.2082, "step": 15456 }, { "epoch": 0.8292381974248927, "grad_norm": 2.046875, "learning_rate": 4.761903367717917e-06, "loss": 2.4708, "step": 15457 }, { "epoch": 0.8292918454935623, "grad_norm": 0.498046875, "learning_rate": 4.761866364016268e-06, "loss": 2.3786, "step": 15458 }, { "epoch": 0.8293454935622318, "grad_norm": 0.416015625, "learning_rate": 4.76182935758318e-06, "loss": 2.2404, "step": 15459 }, { "epoch": 0.8293991416309013, "grad_norm": 0.43359375, "learning_rate": 4.761792348418698e-06, "loss": 2.2665, "step": 15460 }, { "epoch": 0.8294527896995708, "grad_norm": 0.6171875, "learning_rate": 4.761755336522867e-06, "loss": 2.2251, "step": 15461 }, { "epoch": 0.8295064377682403, "grad_norm": 1.046875, "learning_rate": 4.761718321895733e-06, "loss": 2.4228, "step": 15462 }, { "epoch": 0.8295600858369099, "grad_norm": 0.51953125, "learning_rate": 4.761681304537339e-06, "loss": 2.4983, "step": 15463 }, { "epoch": 0.8296137339055794, "grad_norm": 0.46484375, "learning_rate": 4.761644284447729e-06, "loss": 2.4488, "step": 15464 }, { "epoch": 0.8296673819742489, "grad_norm": 0.5703125, "learning_rate": 4.76160726162695e-06, "loss": 2.5004, "step": 15465 }, { "epoch": 0.8297210300429184, "grad_norm": 0.443359375, "learning_rate": 4.761570236075046e-06, "loss": 2.3599, "step": 15466 }, { "epoch": 0.829774678111588, "grad_norm": 0.44140625, "learning_rate": 4.7615332077920604e-06, "loss": 2.1454, "step": 15467 }, { "epoch": 0.8298283261802575, "grad_norm": 0.44140625, "learning_rate": 4.7614961767780385e-06, "loss": 2.4706, "step": 15468 }, { "epoch": 0.8298819742489271, "grad_norm": 0.62890625, "learning_rate": 4.7614591430330264e-06, "loss": 2.2957, "step": 15469 }, { "epoch": 0.8299356223175965, "grad_norm": 0.5078125, "learning_rate": 4.7614221065570675e-06, "loss": 2.0807, "step": 15470 }, { "epoch": 0.8299892703862661, "grad_norm": 0.4921875, "learning_rate": 4.761385067350207e-06, "loss": 2.1495, "step": 15471 }, { "epoch": 0.8300429184549356, "grad_norm": 0.71875, "learning_rate": 4.761348025412489e-06, "loss": 2.0896, "step": 15472 }, { "epoch": 0.8300965665236052, "grad_norm": 0.42578125, "learning_rate": 4.7613109807439584e-06, "loss": 2.1737, "step": 15473 }, { "epoch": 0.8301502145922747, "grad_norm": 0.6328125, "learning_rate": 4.76127393334466e-06, "loss": 2.3746, "step": 15474 }, { "epoch": 0.8302038626609443, "grad_norm": 0.46875, "learning_rate": 4.761236883214639e-06, "loss": 2.1082, "step": 15475 }, { "epoch": 0.8302575107296137, "grad_norm": 0.51953125, "learning_rate": 4.761199830353941e-06, "loss": 2.4486, "step": 15476 }, { "epoch": 0.8303111587982832, "grad_norm": 0.44140625, "learning_rate": 4.7611627747626084e-06, "loss": 2.3838, "step": 15477 }, { "epoch": 0.8303648068669528, "grad_norm": 0.455078125, "learning_rate": 4.761125716440687e-06, "loss": 2.4361, "step": 15478 }, { "epoch": 0.8304184549356223, "grad_norm": 0.4609375, "learning_rate": 4.761088655388223e-06, "loss": 2.1332, "step": 15479 }, { "epoch": 0.8304721030042919, "grad_norm": 0.5234375, "learning_rate": 4.761051591605259e-06, "loss": 2.6931, "step": 15480 }, { "epoch": 0.8305257510729613, "grad_norm": 0.412109375, "learning_rate": 4.76101452509184e-06, "loss": 2.4599, "step": 15481 }, { "epoch": 0.8305793991416309, "grad_norm": 0.5, "learning_rate": 4.760977455848013e-06, "loss": 1.5384, "step": 15482 }, { "epoch": 0.8306330472103004, "grad_norm": 0.451171875, "learning_rate": 4.76094038387382e-06, "loss": 2.2435, "step": 15483 }, { "epoch": 0.83068669527897, "grad_norm": 0.59375, "learning_rate": 4.760903309169307e-06, "loss": 2.3353, "step": 15484 }, { "epoch": 0.8307403433476395, "grad_norm": 0.4296875, "learning_rate": 4.760866231734519e-06, "loss": 2.2715, "step": 15485 }, { "epoch": 0.830793991416309, "grad_norm": 0.380859375, "learning_rate": 4.7608291515695e-06, "loss": 2.4145, "step": 15486 }, { "epoch": 0.8308476394849785, "grad_norm": 0.4140625, "learning_rate": 4.760792068674296e-06, "loss": 2.5204, "step": 15487 }, { "epoch": 0.8309012875536481, "grad_norm": 0.90625, "learning_rate": 4.760754983048949e-06, "loss": 2.4537, "step": 15488 }, { "epoch": 0.8309549356223176, "grad_norm": 0.44921875, "learning_rate": 4.760717894693508e-06, "loss": 2.3976, "step": 15489 }, { "epoch": 0.8310085836909872, "grad_norm": 0.4140625, "learning_rate": 4.7606808036080154e-06, "loss": 2.1567, "step": 15490 }, { "epoch": 0.8310622317596567, "grad_norm": 0.455078125, "learning_rate": 4.760643709792515e-06, "loss": 1.8012, "step": 15491 }, { "epoch": 0.8311158798283261, "grad_norm": 0.765625, "learning_rate": 4.760606613247054e-06, "loss": 2.3484, "step": 15492 }, { "epoch": 0.8311695278969957, "grad_norm": 0.51171875, "learning_rate": 4.760569513971676e-06, "loss": 2.2702, "step": 15493 }, { "epoch": 0.8312231759656652, "grad_norm": 0.4375, "learning_rate": 4.760532411966425e-06, "loss": 2.4532, "step": 15494 }, { "epoch": 0.8312768240343348, "grad_norm": 0.375, "learning_rate": 4.760495307231346e-06, "loss": 2.1451, "step": 15495 }, { "epoch": 0.8313304721030043, "grad_norm": 0.44140625, "learning_rate": 4.760458199766485e-06, "loss": 1.8767, "step": 15496 }, { "epoch": 0.8313841201716738, "grad_norm": 0.466796875, "learning_rate": 4.760421089571887e-06, "loss": 2.3781, "step": 15497 }, { "epoch": 0.8314377682403433, "grad_norm": 0.51953125, "learning_rate": 4.760383976647595e-06, "loss": 2.5758, "step": 15498 }, { "epoch": 0.8314914163090129, "grad_norm": 0.53125, "learning_rate": 4.760346860993656e-06, "loss": 2.281, "step": 15499 }, { "epoch": 0.8315450643776824, "grad_norm": 0.40234375, "learning_rate": 4.760309742610113e-06, "loss": 2.4509, "step": 15500 }, { "epoch": 0.831598712446352, "grad_norm": 1.1015625, "learning_rate": 4.7602726214970115e-06, "loss": 2.4119, "step": 15501 }, { "epoch": 0.8316523605150214, "grad_norm": 0.5, "learning_rate": 4.7602354976543966e-06, "loss": 2.4264, "step": 15502 }, { "epoch": 0.831706008583691, "grad_norm": 0.474609375, "learning_rate": 4.760198371082312e-06, "loss": 2.3562, "step": 15503 }, { "epoch": 0.8317596566523605, "grad_norm": 0.50390625, "learning_rate": 4.760161241780804e-06, "loss": 2.2693, "step": 15504 }, { "epoch": 0.83181330472103, "grad_norm": 0.52734375, "learning_rate": 4.760124109749918e-06, "loss": 2.2613, "step": 15505 }, { "epoch": 0.8318669527896996, "grad_norm": 0.50390625, "learning_rate": 4.760086974989695e-06, "loss": 2.2874, "step": 15506 }, { "epoch": 0.831920600858369, "grad_norm": 0.72265625, "learning_rate": 4.760049837500184e-06, "loss": 2.3304, "step": 15507 }, { "epoch": 0.8319742489270386, "grad_norm": 0.7734375, "learning_rate": 4.760012697281428e-06, "loss": 2.2315, "step": 15508 }, { "epoch": 0.8320278969957081, "grad_norm": 0.412109375, "learning_rate": 4.759975554333473e-06, "loss": 2.3139, "step": 15509 }, { "epoch": 0.8320815450643777, "grad_norm": 0.4921875, "learning_rate": 4.759938408656363e-06, "loss": 2.3698, "step": 15510 }, { "epoch": 0.8321351931330472, "grad_norm": 0.48828125, "learning_rate": 4.759901260250141e-06, "loss": 2.4528, "step": 15511 }, { "epoch": 0.8321888412017168, "grad_norm": 0.5078125, "learning_rate": 4.759864109114855e-06, "loss": 2.2806, "step": 15512 }, { "epoch": 0.8322424892703862, "grad_norm": 0.498046875, "learning_rate": 4.759826955250549e-06, "loss": 2.5171, "step": 15513 }, { "epoch": 0.8322961373390558, "grad_norm": 0.359375, "learning_rate": 4.759789798657268e-06, "loss": 2.1315, "step": 15514 }, { "epoch": 0.8323497854077253, "grad_norm": 0.515625, "learning_rate": 4.759752639335055e-06, "loss": 2.4621, "step": 15515 }, { "epoch": 0.8324034334763949, "grad_norm": 0.494140625, "learning_rate": 4.759715477283957e-06, "loss": 2.2541, "step": 15516 }, { "epoch": 0.8324570815450644, "grad_norm": 0.419921875, "learning_rate": 4.759678312504019e-06, "loss": 2.1583, "step": 15517 }, { "epoch": 0.832510729613734, "grad_norm": 0.58203125, "learning_rate": 4.759641144995284e-06, "loss": 2.0785, "step": 15518 }, { "epoch": 0.8325643776824034, "grad_norm": 0.59765625, "learning_rate": 4.759603974757797e-06, "loss": 2.1221, "step": 15519 }, { "epoch": 0.8326180257510729, "grad_norm": 0.50390625, "learning_rate": 4.759566801791605e-06, "loss": 2.3379, "step": 15520 }, { "epoch": 0.8326716738197425, "grad_norm": 0.59765625, "learning_rate": 4.759529626096752e-06, "loss": 1.0114, "step": 15521 }, { "epoch": 0.832725321888412, "grad_norm": 0.70703125, "learning_rate": 4.759492447673282e-06, "loss": 2.3815, "step": 15522 }, { "epoch": 0.8327789699570816, "grad_norm": 0.453125, "learning_rate": 4.75945526652124e-06, "loss": 2.2158, "step": 15523 }, { "epoch": 0.832832618025751, "grad_norm": 0.5078125, "learning_rate": 4.759418082640671e-06, "loss": 2.2529, "step": 15524 }, { "epoch": 0.8328862660944206, "grad_norm": 0.4921875, "learning_rate": 4.759380896031621e-06, "loss": 2.147, "step": 15525 }, { "epoch": 0.8329399141630901, "grad_norm": 0.4921875, "learning_rate": 4.7593437066941344e-06, "loss": 2.1088, "step": 15526 }, { "epoch": 0.8329935622317597, "grad_norm": 0.470703125, "learning_rate": 4.759306514628255e-06, "loss": 2.2916, "step": 15527 }, { "epoch": 0.8330472103004292, "grad_norm": 0.50390625, "learning_rate": 4.75926931983403e-06, "loss": 2.439, "step": 15528 }, { "epoch": 0.8331008583690988, "grad_norm": 0.51171875, "learning_rate": 4.759232122311503e-06, "loss": 2.3838, "step": 15529 }, { "epoch": 0.8331545064377682, "grad_norm": 0.447265625, "learning_rate": 4.759194922060717e-06, "loss": 2.0787, "step": 15530 }, { "epoch": 0.8332081545064378, "grad_norm": 0.416015625, "learning_rate": 4.75915771908172e-06, "loss": 2.3871, "step": 15531 }, { "epoch": 0.8332618025751073, "grad_norm": 0.52734375, "learning_rate": 4.759120513374555e-06, "loss": 2.5711, "step": 15532 }, { "epoch": 0.8333154506437769, "grad_norm": 0.63671875, "learning_rate": 4.759083304939268e-06, "loss": 1.7148, "step": 15533 }, { "epoch": 0.8333690987124464, "grad_norm": 0.515625, "learning_rate": 4.759046093775903e-06, "loss": 2.245, "step": 15534 }, { "epoch": 0.8334227467811158, "grad_norm": 0.3828125, "learning_rate": 4.759008879884507e-06, "loss": 1.7036, "step": 15535 }, { "epoch": 0.8334763948497854, "grad_norm": 0.4765625, "learning_rate": 4.758971663265122e-06, "loss": 2.2422, "step": 15536 }, { "epoch": 0.8335300429184549, "grad_norm": 0.423828125, "learning_rate": 4.758934443917794e-06, "loss": 2.1646, "step": 15537 }, { "epoch": 0.8335836909871245, "grad_norm": 0.484375, "learning_rate": 4.7588972218425685e-06, "loss": 2.0599, "step": 15538 }, { "epoch": 0.833637339055794, "grad_norm": 0.431640625, "learning_rate": 4.758859997039491e-06, "loss": 2.1375, "step": 15539 }, { "epoch": 0.8336909871244635, "grad_norm": 0.486328125, "learning_rate": 4.758822769508606e-06, "loss": 2.366, "step": 15540 }, { "epoch": 0.833744635193133, "grad_norm": 0.408203125, "learning_rate": 4.758785539249957e-06, "loss": 2.1596, "step": 15541 }, { "epoch": 0.8337982832618026, "grad_norm": 0.447265625, "learning_rate": 4.758748306263591e-06, "loss": 2.2048, "step": 15542 }, { "epoch": 0.8338519313304721, "grad_norm": 0.369140625, "learning_rate": 4.758711070549552e-06, "loss": 2.1128, "step": 15543 }, { "epoch": 0.8339055793991417, "grad_norm": 0.462890625, "learning_rate": 4.7586738321078855e-06, "loss": 2.4783, "step": 15544 }, { "epoch": 0.8339592274678111, "grad_norm": 0.447265625, "learning_rate": 4.758636590938635e-06, "loss": 2.3139, "step": 15545 }, { "epoch": 0.8340128755364807, "grad_norm": 0.427734375, "learning_rate": 4.758599347041847e-06, "loss": 2.07, "step": 15546 }, { "epoch": 0.8340665236051502, "grad_norm": 0.44140625, "learning_rate": 4.758562100417565e-06, "loss": 2.1843, "step": 15547 }, { "epoch": 0.8341201716738197, "grad_norm": 0.412109375, "learning_rate": 4.758524851065836e-06, "loss": 2.1831, "step": 15548 }, { "epoch": 0.8341738197424893, "grad_norm": 0.451171875, "learning_rate": 4.758487598986704e-06, "loss": 2.1761, "step": 15549 }, { "epoch": 0.8342274678111588, "grad_norm": 0.484375, "learning_rate": 4.758450344180214e-06, "loss": 2.1873, "step": 15550 }, { "epoch": 0.8342811158798283, "grad_norm": 0.439453125, "learning_rate": 4.758413086646409e-06, "loss": 2.1664, "step": 15551 }, { "epoch": 0.8343347639484978, "grad_norm": 0.5859375, "learning_rate": 4.758375826385338e-06, "loss": 2.3695, "step": 15552 }, { "epoch": 0.8343884120171674, "grad_norm": 0.361328125, "learning_rate": 4.758338563397043e-06, "loss": 2.2688, "step": 15553 }, { "epoch": 0.8344420600858369, "grad_norm": 0.458984375, "learning_rate": 4.758301297681571e-06, "loss": 2.5159, "step": 15554 }, { "epoch": 0.8344957081545065, "grad_norm": 0.55078125, "learning_rate": 4.758264029238965e-06, "loss": 2.238, "step": 15555 }, { "epoch": 0.8345493562231759, "grad_norm": 0.41015625, "learning_rate": 4.758226758069271e-06, "loss": 2.2809, "step": 15556 }, { "epoch": 0.8346030042918455, "grad_norm": 0.59765625, "learning_rate": 4.758189484172534e-06, "loss": 1.9762, "step": 15557 }, { "epoch": 0.834656652360515, "grad_norm": 0.423828125, "learning_rate": 4.758152207548798e-06, "loss": 2.2581, "step": 15558 }, { "epoch": 0.8347103004291846, "grad_norm": 0.416015625, "learning_rate": 4.75811492819811e-06, "loss": 2.2306, "step": 15559 }, { "epoch": 0.8347639484978541, "grad_norm": 0.400390625, "learning_rate": 4.758077646120514e-06, "loss": 2.4212, "step": 15560 }, { "epoch": 0.8348175965665237, "grad_norm": 0.412109375, "learning_rate": 4.7580403613160545e-06, "loss": 2.2795, "step": 15561 }, { "epoch": 0.8348712446351931, "grad_norm": 0.421875, "learning_rate": 4.758003073784777e-06, "loss": 2.5594, "step": 15562 }, { "epoch": 0.8349248927038626, "grad_norm": 0.435546875, "learning_rate": 4.757965783526727e-06, "loss": 2.2616, "step": 15563 }, { "epoch": 0.8349785407725322, "grad_norm": 0.4453125, "learning_rate": 4.757928490541949e-06, "loss": 2.4127, "step": 15564 }, { "epoch": 0.8350321888412017, "grad_norm": 0.484375, "learning_rate": 4.757891194830488e-06, "loss": 2.2546, "step": 15565 }, { "epoch": 0.8350858369098713, "grad_norm": 0.41796875, "learning_rate": 4.757853896392388e-06, "loss": 2.5716, "step": 15566 }, { "epoch": 0.8351394849785407, "grad_norm": 0.33203125, "learning_rate": 4.757816595227697e-06, "loss": 2.2153, "step": 15567 }, { "epoch": 0.8351931330472103, "grad_norm": 0.408203125, "learning_rate": 4.757779291336456e-06, "loss": 2.1351, "step": 15568 }, { "epoch": 0.8352467811158798, "grad_norm": 0.56640625, "learning_rate": 4.757741984718714e-06, "loss": 2.3528, "step": 15569 }, { "epoch": 0.8353004291845494, "grad_norm": 0.4453125, "learning_rate": 4.757704675374513e-06, "loss": 1.6626, "step": 15570 }, { "epoch": 0.8353540772532189, "grad_norm": 0.51953125, "learning_rate": 4.7576673633039015e-06, "loss": 2.2591, "step": 15571 }, { "epoch": 0.8354077253218885, "grad_norm": 0.51171875, "learning_rate": 4.757630048506921e-06, "loss": 1.9719, "step": 15572 }, { "epoch": 0.8354613733905579, "grad_norm": 0.50390625, "learning_rate": 4.757592730983618e-06, "loss": 2.1078, "step": 15573 }, { "epoch": 0.8355150214592275, "grad_norm": 0.478515625, "learning_rate": 4.7575554107340386e-06, "loss": 2.3931, "step": 15574 }, { "epoch": 0.835568669527897, "grad_norm": 0.482421875, "learning_rate": 4.757518087758226e-06, "loss": 2.3844, "step": 15575 }, { "epoch": 0.8356223175965666, "grad_norm": 0.49609375, "learning_rate": 4.757480762056226e-06, "loss": 2.4034, "step": 15576 }, { "epoch": 0.835675965665236, "grad_norm": 0.42578125, "learning_rate": 4.757443433628083e-06, "loss": 2.4203, "step": 15577 }, { "epoch": 0.8357296137339055, "grad_norm": 0.47265625, "learning_rate": 4.757406102473844e-06, "loss": 2.4865, "step": 15578 }, { "epoch": 0.8357832618025751, "grad_norm": 0.5, "learning_rate": 4.757368768593553e-06, "loss": 2.3864, "step": 15579 }, { "epoch": 0.8358369098712446, "grad_norm": 0.44140625, "learning_rate": 4.7573314319872545e-06, "loss": 2.4628, "step": 15580 }, { "epoch": 0.8358905579399142, "grad_norm": 0.50390625, "learning_rate": 4.757294092654994e-06, "loss": 2.1662, "step": 15581 }, { "epoch": 0.8359442060085837, "grad_norm": 0.52734375, "learning_rate": 4.757256750596817e-06, "loss": 1.9531, "step": 15582 }, { "epoch": 0.8359978540772532, "grad_norm": 0.419921875, "learning_rate": 4.757219405812769e-06, "loss": 2.2217, "step": 15583 }, { "epoch": 0.8360515021459227, "grad_norm": 0.6796875, "learning_rate": 4.757182058302893e-06, "loss": 2.315, "step": 15584 }, { "epoch": 0.8361051502145923, "grad_norm": 0.462890625, "learning_rate": 4.757144708067236e-06, "loss": 1.7489, "step": 15585 }, { "epoch": 0.8361587982832618, "grad_norm": 0.490234375, "learning_rate": 4.757107355105843e-06, "loss": 2.3548, "step": 15586 }, { "epoch": 0.8362124463519314, "grad_norm": 0.45703125, "learning_rate": 4.7570699994187585e-06, "loss": 2.2763, "step": 15587 }, { "epoch": 0.8362660944206008, "grad_norm": 0.380859375, "learning_rate": 4.757032641006027e-06, "loss": 2.442, "step": 15588 }, { "epoch": 0.8363197424892704, "grad_norm": 0.51171875, "learning_rate": 4.756995279867695e-06, "loss": 2.3524, "step": 15589 }, { "epoch": 0.8363733905579399, "grad_norm": 0.4765625, "learning_rate": 4.756957916003808e-06, "loss": 2.0541, "step": 15590 }, { "epoch": 0.8364270386266094, "grad_norm": 0.62890625, "learning_rate": 4.756920549414409e-06, "loss": 2.3367, "step": 15591 }, { "epoch": 0.836480686695279, "grad_norm": 0.416015625, "learning_rate": 4.756883180099545e-06, "loss": 1.6427, "step": 15592 }, { "epoch": 0.8365343347639485, "grad_norm": 0.404296875, "learning_rate": 4.756845808059259e-06, "loss": 2.3411, "step": 15593 }, { "epoch": 0.836587982832618, "grad_norm": 0.45703125, "learning_rate": 4.756808433293599e-06, "loss": 2.1586, "step": 15594 }, { "epoch": 0.8366416309012875, "grad_norm": 0.7421875, "learning_rate": 4.756771055802608e-06, "loss": 2.632, "step": 15595 }, { "epoch": 0.8366952789699571, "grad_norm": 0.6328125, "learning_rate": 4.756733675586332e-06, "loss": 2.2381, "step": 15596 }, { "epoch": 0.8367489270386266, "grad_norm": 0.451171875, "learning_rate": 4.756696292644817e-06, "loss": 2.3202, "step": 15597 }, { "epoch": 0.8368025751072962, "grad_norm": 0.470703125, "learning_rate": 4.756658906978106e-06, "loss": 2.4018, "step": 15598 }, { "epoch": 0.8368562231759656, "grad_norm": 0.45703125, "learning_rate": 4.756621518586245e-06, "loss": 2.2978, "step": 15599 }, { "epoch": 0.8369098712446352, "grad_norm": 0.443359375, "learning_rate": 4.75658412746928e-06, "loss": 2.4125, "step": 15600 }, { "epoch": 0.8369635193133047, "grad_norm": 0.486328125, "learning_rate": 4.756546733627255e-06, "loss": 2.2824, "step": 15601 }, { "epoch": 0.8370171673819743, "grad_norm": 0.5, "learning_rate": 4.756509337060217e-06, "loss": 2.3232, "step": 15602 }, { "epoch": 0.8370708154506438, "grad_norm": 0.400390625, "learning_rate": 4.756471937768208e-06, "loss": 2.3013, "step": 15603 }, { "epoch": 0.8371244635193134, "grad_norm": 0.482421875, "learning_rate": 4.756434535751276e-06, "loss": 2.3294, "step": 15604 }, { "epoch": 0.8371781115879828, "grad_norm": 0.44921875, "learning_rate": 4.756397131009466e-06, "loss": 2.4301, "step": 15605 }, { "epoch": 0.8372317596566523, "grad_norm": 0.57421875, "learning_rate": 4.756359723542821e-06, "loss": 2.0946, "step": 15606 }, { "epoch": 0.8372854077253219, "grad_norm": 0.484375, "learning_rate": 4.756322313351388e-06, "loss": 1.9812, "step": 15607 }, { "epoch": 0.8373390557939914, "grad_norm": 0.46484375, "learning_rate": 4.756284900435212e-06, "loss": 2.2365, "step": 15608 }, { "epoch": 0.837392703862661, "grad_norm": 0.466796875, "learning_rate": 4.7562474847943375e-06, "loss": 2.559, "step": 15609 }, { "epoch": 0.8374463519313304, "grad_norm": 0.53125, "learning_rate": 4.756210066428811e-06, "loss": 2.3582, "step": 15610 }, { "epoch": 0.8375, "grad_norm": 0.453125, "learning_rate": 4.756172645338675e-06, "loss": 2.1683, "step": 15611 }, { "epoch": 0.8375536480686695, "grad_norm": 0.408203125, "learning_rate": 4.756135221523978e-06, "loss": 2.0181, "step": 15612 }, { "epoch": 0.8376072961373391, "grad_norm": 0.51953125, "learning_rate": 4.756097794984763e-06, "loss": 2.2495, "step": 15613 }, { "epoch": 0.8376609442060086, "grad_norm": 0.46484375, "learning_rate": 4.756060365721076e-06, "loss": 2.454, "step": 15614 }, { "epoch": 0.8377145922746781, "grad_norm": 0.4921875, "learning_rate": 4.756022933732962e-06, "loss": 2.3596, "step": 15615 }, { "epoch": 0.8377682403433476, "grad_norm": 0.451171875, "learning_rate": 4.755985499020466e-06, "loss": 2.3436, "step": 15616 }, { "epoch": 0.8378218884120172, "grad_norm": 0.51171875, "learning_rate": 4.755948061583634e-06, "loss": 2.192, "step": 15617 }, { "epoch": 0.8378755364806867, "grad_norm": 0.42578125, "learning_rate": 4.75591062142251e-06, "loss": 2.4001, "step": 15618 }, { "epoch": 0.8379291845493563, "grad_norm": 0.390625, "learning_rate": 4.75587317853714e-06, "loss": 2.2379, "step": 15619 }, { "epoch": 0.8379828326180258, "grad_norm": 0.5234375, "learning_rate": 4.75583573292757e-06, "loss": 2.5281, "step": 15620 }, { "epoch": 0.8380364806866952, "grad_norm": 0.3828125, "learning_rate": 4.755798284593843e-06, "loss": 2.2051, "step": 15621 }, { "epoch": 0.8380901287553648, "grad_norm": 0.47265625, "learning_rate": 4.7557608335360075e-06, "loss": 2.1278, "step": 15622 }, { "epoch": 0.8381437768240343, "grad_norm": 0.4453125, "learning_rate": 4.755723379754104e-06, "loss": 2.2209, "step": 15623 }, { "epoch": 0.8381974248927039, "grad_norm": 0.5, "learning_rate": 4.755685923248183e-06, "loss": 2.3838, "step": 15624 }, { "epoch": 0.8382510729613734, "grad_norm": 0.447265625, "learning_rate": 4.755648464018287e-06, "loss": 2.2817, "step": 15625 }, { "epoch": 0.8383047210300429, "grad_norm": 0.48046875, "learning_rate": 4.75561100206446e-06, "loss": 2.1373, "step": 15626 }, { "epoch": 0.8383583690987124, "grad_norm": 0.75390625, "learning_rate": 4.755573537386749e-06, "loss": 1.9314, "step": 15627 }, { "epoch": 0.838412017167382, "grad_norm": 0.55859375, "learning_rate": 4.755536069985199e-06, "loss": 2.2473, "step": 15628 }, { "epoch": 0.8384656652360515, "grad_norm": 0.52734375, "learning_rate": 4.7554985998598555e-06, "loss": 1.661, "step": 15629 }, { "epoch": 0.8385193133047211, "grad_norm": 1.0546875, "learning_rate": 4.755461127010764e-06, "loss": 2.3067, "step": 15630 }, { "epoch": 0.8385729613733905, "grad_norm": 0.47265625, "learning_rate": 4.755423651437968e-06, "loss": 1.9417, "step": 15631 }, { "epoch": 0.8386266094420601, "grad_norm": 0.5546875, "learning_rate": 4.7553861731415154e-06, "loss": 2.4257, "step": 15632 }, { "epoch": 0.8386802575107296, "grad_norm": 0.51171875, "learning_rate": 4.7553486921214485e-06, "loss": 2.3177, "step": 15633 }, { "epoch": 0.8387339055793992, "grad_norm": 0.478515625, "learning_rate": 4.755311208377815e-06, "loss": 2.2925, "step": 15634 }, { "epoch": 0.8387875536480687, "grad_norm": 0.48828125, "learning_rate": 4.755273721910659e-06, "loss": 2.5574, "step": 15635 }, { "epoch": 0.8388412017167381, "grad_norm": 0.462890625, "learning_rate": 4.755236232720026e-06, "loss": 2.1968, "step": 15636 }, { "epoch": 0.8388948497854077, "grad_norm": 0.5625, "learning_rate": 4.755198740805961e-06, "loss": 2.2908, "step": 15637 }, { "epoch": 0.8389484978540772, "grad_norm": 8.5, "learning_rate": 4.7551612461685105e-06, "loss": 1.6123, "step": 15638 }, { "epoch": 0.8390021459227468, "grad_norm": 0.625, "learning_rate": 4.755123748807718e-06, "loss": 2.1598, "step": 15639 }, { "epoch": 0.8390557939914163, "grad_norm": 0.484375, "learning_rate": 4.755086248723629e-06, "loss": 2.3306, "step": 15640 }, { "epoch": 0.8391094420600859, "grad_norm": 0.4921875, "learning_rate": 4.755048745916291e-06, "loss": 2.3557, "step": 15641 }, { "epoch": 0.8391630901287553, "grad_norm": 0.42578125, "learning_rate": 4.755011240385746e-06, "loss": 1.7808, "step": 15642 }, { "epoch": 0.8392167381974249, "grad_norm": 0.458984375, "learning_rate": 4.7549737321320425e-06, "loss": 2.3784, "step": 15643 }, { "epoch": 0.8392703862660944, "grad_norm": 0.421875, "learning_rate": 4.754936221155223e-06, "loss": 1.8477, "step": 15644 }, { "epoch": 0.839324034334764, "grad_norm": 0.443359375, "learning_rate": 4.754898707455336e-06, "loss": 2.101, "step": 15645 }, { "epoch": 0.8393776824034335, "grad_norm": 0.65234375, "learning_rate": 4.754861191032423e-06, "loss": 2.2564, "step": 15646 }, { "epoch": 0.839431330472103, "grad_norm": 0.515625, "learning_rate": 4.7548236718865316e-06, "loss": 2.2297, "step": 15647 }, { "epoch": 0.8394849785407725, "grad_norm": 0.5234375, "learning_rate": 4.754786150017706e-06, "loss": 2.3661, "step": 15648 }, { "epoch": 0.839538626609442, "grad_norm": 0.46875, "learning_rate": 4.754748625425993e-06, "loss": 2.3934, "step": 15649 }, { "epoch": 0.8395922746781116, "grad_norm": 0.431640625, "learning_rate": 4.754711098111437e-06, "loss": 2.1946, "step": 15650 }, { "epoch": 0.8396459227467811, "grad_norm": 0.515625, "learning_rate": 4.754673568074084e-06, "loss": 2.4806, "step": 15651 }, { "epoch": 0.8396995708154507, "grad_norm": 0.37109375, "learning_rate": 4.754636035313979e-06, "loss": 2.1867, "step": 15652 }, { "epoch": 0.8397532188841201, "grad_norm": 0.5, "learning_rate": 4.754598499831165e-06, "loss": 2.3941, "step": 15653 }, { "epoch": 0.8398068669527897, "grad_norm": 0.427734375, "learning_rate": 4.754560961625692e-06, "loss": 2.2912, "step": 15654 }, { "epoch": 0.8398605150214592, "grad_norm": 0.43359375, "learning_rate": 4.754523420697601e-06, "loss": 2.2888, "step": 15655 }, { "epoch": 0.8399141630901288, "grad_norm": 0.392578125, "learning_rate": 4.7544858770469395e-06, "loss": 2.1871, "step": 15656 }, { "epoch": 0.8399678111587983, "grad_norm": 0.78515625, "learning_rate": 4.754448330673752e-06, "loss": 2.1473, "step": 15657 }, { "epoch": 0.8400214592274678, "grad_norm": 0.4453125, "learning_rate": 4.7544107815780845e-06, "loss": 2.2228, "step": 15658 }, { "epoch": 0.8400751072961373, "grad_norm": 0.40625, "learning_rate": 4.7543732297599825e-06, "loss": 2.1595, "step": 15659 }, { "epoch": 0.8401287553648069, "grad_norm": 0.42578125, "learning_rate": 4.754335675219491e-06, "loss": 2.2146, "step": 15660 }, { "epoch": 0.8401824034334764, "grad_norm": 0.447265625, "learning_rate": 4.754298117956655e-06, "loss": 2.3724, "step": 15661 }, { "epoch": 0.840236051502146, "grad_norm": 0.474609375, "learning_rate": 4.75426055797152e-06, "loss": 2.2932, "step": 15662 }, { "epoch": 0.8402896995708155, "grad_norm": 0.443359375, "learning_rate": 4.754222995264132e-06, "loss": 2.3596, "step": 15663 }, { "epoch": 0.8403433476394849, "grad_norm": 0.5390625, "learning_rate": 4.7541854298345355e-06, "loss": 2.2528, "step": 15664 }, { "epoch": 0.8403969957081545, "grad_norm": 0.50390625, "learning_rate": 4.754147861682776e-06, "loss": 2.2251, "step": 15665 }, { "epoch": 0.840450643776824, "grad_norm": 0.453125, "learning_rate": 4.754110290808899e-06, "loss": 2.3852, "step": 15666 }, { "epoch": 0.8405042918454936, "grad_norm": 0.42578125, "learning_rate": 4.754072717212951e-06, "loss": 2.4096, "step": 15667 }, { "epoch": 0.840557939914163, "grad_norm": 0.51953125, "learning_rate": 4.7540351408949756e-06, "loss": 2.2146, "step": 15668 }, { "epoch": 0.8406115879828326, "grad_norm": 0.45703125, "learning_rate": 4.7539975618550184e-06, "loss": 2.3316, "step": 15669 }, { "epoch": 0.8406652360515021, "grad_norm": 0.470703125, "learning_rate": 4.753959980093126e-06, "loss": 2.5095, "step": 15670 }, { "epoch": 0.8407188841201717, "grad_norm": 0.453125, "learning_rate": 4.753922395609343e-06, "loss": 2.2417, "step": 15671 }, { "epoch": 0.8407725321888412, "grad_norm": 0.4765625, "learning_rate": 4.753884808403715e-06, "loss": 2.3195, "step": 15672 }, { "epoch": 0.8408261802575108, "grad_norm": 0.51171875, "learning_rate": 4.753847218476287e-06, "loss": 2.4431, "step": 15673 }, { "epoch": 0.8408798283261802, "grad_norm": 0.416015625, "learning_rate": 4.753809625827105e-06, "loss": 2.1902, "step": 15674 }, { "epoch": 0.8409334763948498, "grad_norm": 0.453125, "learning_rate": 4.753772030456213e-06, "loss": 2.1643, "step": 15675 }, { "epoch": 0.8409871244635193, "grad_norm": 0.44140625, "learning_rate": 4.753734432363659e-06, "loss": 2.2782, "step": 15676 }, { "epoch": 0.8410407725321889, "grad_norm": 0.6015625, "learning_rate": 4.7536968315494855e-06, "loss": 1.5372, "step": 15677 }, { "epoch": 0.8410944206008584, "grad_norm": 0.4375, "learning_rate": 4.75365922801374e-06, "loss": 2.4515, "step": 15678 }, { "epoch": 0.8411480686695278, "grad_norm": 0.48828125, "learning_rate": 4.7536216217564666e-06, "loss": 2.1859, "step": 15679 }, { "epoch": 0.8412017167381974, "grad_norm": 0.4140625, "learning_rate": 4.753584012777712e-06, "loss": 2.1039, "step": 15680 }, { "epoch": 0.8412553648068669, "grad_norm": 0.5546875, "learning_rate": 4.753546401077521e-06, "loss": 1.4437, "step": 15681 }, { "epoch": 0.8413090128755365, "grad_norm": 0.51953125, "learning_rate": 4.753508786655938e-06, "loss": 2.4012, "step": 15682 }, { "epoch": 0.841362660944206, "grad_norm": 0.48046875, "learning_rate": 4.75347116951301e-06, "loss": 2.3043, "step": 15683 }, { "epoch": 0.8414163090128756, "grad_norm": 0.494140625, "learning_rate": 4.753433549648781e-06, "loss": 1.86, "step": 15684 }, { "epoch": 0.841469957081545, "grad_norm": 0.56640625, "learning_rate": 4.753395927063298e-06, "loss": 2.5044, "step": 15685 }, { "epoch": 0.8415236051502146, "grad_norm": 0.376953125, "learning_rate": 4.753358301756606e-06, "loss": 2.4138, "step": 15686 }, { "epoch": 0.8415772532188841, "grad_norm": 0.498046875, "learning_rate": 4.75332067372875e-06, "loss": 2.2478, "step": 15687 }, { "epoch": 0.8416309012875537, "grad_norm": 0.431640625, "learning_rate": 4.753283042979774e-06, "loss": 2.3384, "step": 15688 }, { "epoch": 0.8416845493562232, "grad_norm": 0.365234375, "learning_rate": 4.753245409509726e-06, "loss": 2.2754, "step": 15689 }, { "epoch": 0.8417381974248928, "grad_norm": 0.486328125, "learning_rate": 4.753207773318651e-06, "loss": 2.4514, "step": 15690 }, { "epoch": 0.8417918454935622, "grad_norm": 0.455078125, "learning_rate": 4.753170134406593e-06, "loss": 2.308, "step": 15691 }, { "epoch": 0.8418454935622317, "grad_norm": 0.5625, "learning_rate": 4.7531324927735986e-06, "loss": 2.2229, "step": 15692 }, { "epoch": 0.8418991416309013, "grad_norm": 0.5078125, "learning_rate": 4.753094848419713e-06, "loss": 2.5024, "step": 15693 }, { "epoch": 0.8419527896995708, "grad_norm": 0.431640625, "learning_rate": 4.753057201344982e-06, "loss": 2.1563, "step": 15694 }, { "epoch": 0.8420064377682404, "grad_norm": 0.427734375, "learning_rate": 4.7530195515494495e-06, "loss": 1.968, "step": 15695 }, { "epoch": 0.8420600858369098, "grad_norm": 1.6640625, "learning_rate": 4.752981899033164e-06, "loss": 2.285, "step": 15696 }, { "epoch": 0.8421137339055794, "grad_norm": 0.5390625, "learning_rate": 4.752944243796167e-06, "loss": 2.4805, "step": 15697 }, { "epoch": 0.8421673819742489, "grad_norm": 0.421875, "learning_rate": 4.752906585838507e-06, "loss": 2.2149, "step": 15698 }, { "epoch": 0.8422210300429185, "grad_norm": 0.498046875, "learning_rate": 4.752868925160229e-06, "loss": 2.3006, "step": 15699 }, { "epoch": 0.842274678111588, "grad_norm": 0.37109375, "learning_rate": 4.752831261761377e-06, "loss": 2.008, "step": 15700 }, { "epoch": 0.8423283261802575, "grad_norm": 0.57421875, "learning_rate": 4.752793595641999e-06, "loss": 2.417, "step": 15701 }, { "epoch": 0.842381974248927, "grad_norm": 0.4765625, "learning_rate": 4.752755926802137e-06, "loss": 2.3366, "step": 15702 }, { "epoch": 0.8424356223175966, "grad_norm": 0.515625, "learning_rate": 4.752718255241841e-06, "loss": 2.476, "step": 15703 }, { "epoch": 0.8424892703862661, "grad_norm": 0.4375, "learning_rate": 4.752680580961152e-06, "loss": 2.3549, "step": 15704 }, { "epoch": 0.8425429184549357, "grad_norm": 0.5078125, "learning_rate": 4.752642903960118e-06, "loss": 2.0809, "step": 15705 }, { "epoch": 0.8425965665236052, "grad_norm": 0.423828125, "learning_rate": 4.752605224238784e-06, "loss": 2.2167, "step": 15706 }, { "epoch": 0.8426502145922746, "grad_norm": 0.4765625, "learning_rate": 4.752567541797196e-06, "loss": 2.1609, "step": 15707 }, { "epoch": 0.8427038626609442, "grad_norm": 0.431640625, "learning_rate": 4.752529856635398e-06, "loss": 2.4438, "step": 15708 }, { "epoch": 0.8427575107296137, "grad_norm": 0.4296875, "learning_rate": 4.752492168753438e-06, "loss": 2.2726, "step": 15709 }, { "epoch": 0.8428111587982833, "grad_norm": 0.40625, "learning_rate": 4.752454478151359e-06, "loss": 2.1059, "step": 15710 }, { "epoch": 0.8428648068669528, "grad_norm": 0.5390625, "learning_rate": 4.752416784829208e-06, "loss": 2.1951, "step": 15711 }, { "epoch": 0.8429184549356223, "grad_norm": 0.4375, "learning_rate": 4.752379088787029e-06, "loss": 2.3453, "step": 15712 }, { "epoch": 0.8429721030042918, "grad_norm": 0.3671875, "learning_rate": 4.7523413900248695e-06, "loss": 1.9137, "step": 15713 }, { "epoch": 0.8430257510729614, "grad_norm": 0.458984375, "learning_rate": 4.7523036885427745e-06, "loss": 2.4932, "step": 15714 }, { "epoch": 0.8430793991416309, "grad_norm": 1.109375, "learning_rate": 4.752265984340788e-06, "loss": 2.1829, "step": 15715 }, { "epoch": 0.8431330472103005, "grad_norm": 0.359375, "learning_rate": 4.752228277418957e-06, "loss": 2.0385, "step": 15716 }, { "epoch": 0.8431866952789699, "grad_norm": 0.423828125, "learning_rate": 4.752190567777327e-06, "loss": 2.2084, "step": 15717 }, { "epoch": 0.8432403433476395, "grad_norm": 0.478515625, "learning_rate": 4.752152855415944e-06, "loss": 2.11, "step": 15718 }, { "epoch": 0.843293991416309, "grad_norm": 0.494140625, "learning_rate": 4.752115140334851e-06, "loss": 2.0566, "step": 15719 }, { "epoch": 0.8433476394849786, "grad_norm": 0.447265625, "learning_rate": 4.752077422534096e-06, "loss": 2.3729, "step": 15720 }, { "epoch": 0.8434012875536481, "grad_norm": 0.51171875, "learning_rate": 4.752039702013724e-06, "loss": 2.2912, "step": 15721 }, { "epoch": 0.8434549356223175, "grad_norm": 0.6015625, "learning_rate": 4.752001978773781e-06, "loss": 1.4097, "step": 15722 }, { "epoch": 0.8435085836909871, "grad_norm": 0.51953125, "learning_rate": 4.751964252814311e-06, "loss": 2.3249, "step": 15723 }, { "epoch": 0.8435622317596566, "grad_norm": 0.375, "learning_rate": 4.751926524135361e-06, "loss": 2.0867, "step": 15724 }, { "epoch": 0.8436158798283262, "grad_norm": 0.4609375, "learning_rate": 4.7518887927369765e-06, "loss": 2.3159, "step": 15725 }, { "epoch": 0.8436695278969957, "grad_norm": 0.51171875, "learning_rate": 4.751851058619203e-06, "loss": 2.3614, "step": 15726 }, { "epoch": 0.8437231759656653, "grad_norm": 0.37890625, "learning_rate": 4.751813321782084e-06, "loss": 2.1156, "step": 15727 }, { "epoch": 0.8437768240343347, "grad_norm": 0.421875, "learning_rate": 4.751775582225668e-06, "loss": 2.1633, "step": 15728 }, { "epoch": 0.8438304721030043, "grad_norm": 0.4375, "learning_rate": 4.7517378399499994e-06, "loss": 2.3322, "step": 15729 }, { "epoch": 0.8438841201716738, "grad_norm": 0.66015625, "learning_rate": 4.751700094955123e-06, "loss": 2.3337, "step": 15730 }, { "epoch": 0.8439377682403434, "grad_norm": 0.53125, "learning_rate": 4.7516623472410856e-06, "loss": 2.392, "step": 15731 }, { "epoch": 0.8439914163090129, "grad_norm": 0.49609375, "learning_rate": 4.7516245968079325e-06, "loss": 2.2783, "step": 15732 }, { "epoch": 0.8440450643776825, "grad_norm": 0.498046875, "learning_rate": 4.751586843655709e-06, "loss": 2.3108, "step": 15733 }, { "epoch": 0.8440987124463519, "grad_norm": 0.50390625, "learning_rate": 4.751549087784461e-06, "loss": 2.352, "step": 15734 }, { "epoch": 0.8441523605150214, "grad_norm": 0.48828125, "learning_rate": 4.751511329194232e-06, "loss": 2.0687, "step": 15735 }, { "epoch": 0.844206008583691, "grad_norm": 0.458984375, "learning_rate": 4.751473567885072e-06, "loss": 2.2653, "step": 15736 }, { "epoch": 0.8442596566523605, "grad_norm": 0.4609375, "learning_rate": 4.751435803857023e-06, "loss": 2.3166, "step": 15737 }, { "epoch": 0.8443133047210301, "grad_norm": 0.51953125, "learning_rate": 4.751398037110132e-06, "loss": 1.3712, "step": 15738 }, { "epoch": 0.8443669527896995, "grad_norm": 0.451171875, "learning_rate": 4.751360267644444e-06, "loss": 2.4694, "step": 15739 }, { "epoch": 0.8444206008583691, "grad_norm": 0.3984375, "learning_rate": 4.7513224954600055e-06, "loss": 2.3113, "step": 15740 }, { "epoch": 0.8444742489270386, "grad_norm": 0.439453125, "learning_rate": 4.75128472055686e-06, "loss": 2.3028, "step": 15741 }, { "epoch": 0.8445278969957082, "grad_norm": 0.458984375, "learning_rate": 4.751246942935056e-06, "loss": 2.4082, "step": 15742 }, { "epoch": 0.8445815450643777, "grad_norm": 0.5, "learning_rate": 4.751209162594638e-06, "loss": 2.4278, "step": 15743 }, { "epoch": 0.8446351931330472, "grad_norm": 0.51171875, "learning_rate": 4.751171379535651e-06, "loss": 2.3992, "step": 15744 }, { "epoch": 0.8446888412017167, "grad_norm": 1.1953125, "learning_rate": 4.7511335937581406e-06, "loss": 2.4998, "step": 15745 }, { "epoch": 0.8447424892703863, "grad_norm": 0.41015625, "learning_rate": 4.7510958052621525e-06, "loss": 2.1786, "step": 15746 }, { "epoch": 0.8447961373390558, "grad_norm": 0.50390625, "learning_rate": 4.751058014047733e-06, "loss": 2.1155, "step": 15747 }, { "epoch": 0.8448497854077254, "grad_norm": 0.453125, "learning_rate": 4.751020220114928e-06, "loss": 2.4618, "step": 15748 }, { "epoch": 0.8449034334763948, "grad_norm": 0.47265625, "learning_rate": 4.750982423463782e-06, "loss": 2.2758, "step": 15749 }, { "epoch": 0.8449570815450643, "grad_norm": 0.484375, "learning_rate": 4.7509446240943415e-06, "loss": 2.1721, "step": 15750 }, { "epoch": 0.8450107296137339, "grad_norm": 0.51953125, "learning_rate": 4.750906822006652e-06, "loss": 2.2575, "step": 15751 }, { "epoch": 0.8450643776824034, "grad_norm": 0.51171875, "learning_rate": 4.750869017200759e-06, "loss": 2.4472, "step": 15752 }, { "epoch": 0.845118025751073, "grad_norm": 0.5234375, "learning_rate": 4.750831209676708e-06, "loss": 2.0275, "step": 15753 }, { "epoch": 0.8451716738197425, "grad_norm": 0.478515625, "learning_rate": 4.750793399434544e-06, "loss": 2.3622, "step": 15754 }, { "epoch": 0.845225321888412, "grad_norm": 0.453125, "learning_rate": 4.750755586474314e-06, "loss": 2.3545, "step": 15755 }, { "epoch": 0.8452789699570815, "grad_norm": 0.470703125, "learning_rate": 4.750717770796063e-06, "loss": 2.3528, "step": 15756 }, { "epoch": 0.8453326180257511, "grad_norm": 0.484375, "learning_rate": 4.750679952399837e-06, "loss": 2.3065, "step": 15757 }, { "epoch": 0.8453862660944206, "grad_norm": 0.40234375, "learning_rate": 4.750642131285682e-06, "loss": 2.3359, "step": 15758 }, { "epoch": 0.8454399141630902, "grad_norm": 0.4765625, "learning_rate": 4.750604307453642e-06, "loss": 2.3271, "step": 15759 }, { "epoch": 0.8454935622317596, "grad_norm": 0.51171875, "learning_rate": 4.750566480903764e-06, "loss": 2.2256, "step": 15760 }, { "epoch": 0.8455472103004292, "grad_norm": 0.462890625, "learning_rate": 4.750528651636094e-06, "loss": 2.1589, "step": 15761 }, { "epoch": 0.8456008583690987, "grad_norm": 0.53515625, "learning_rate": 4.7504908196506775e-06, "loss": 2.2964, "step": 15762 }, { "epoch": 0.8456545064377683, "grad_norm": 1.1015625, "learning_rate": 4.750452984947559e-06, "loss": 2.3625, "step": 15763 }, { "epoch": 0.8457081545064378, "grad_norm": 0.4765625, "learning_rate": 4.750415147526785e-06, "loss": 2.4857, "step": 15764 }, { "epoch": 0.8457618025751072, "grad_norm": 0.61328125, "learning_rate": 4.750377307388402e-06, "loss": 2.3834, "step": 15765 }, { "epoch": 0.8458154506437768, "grad_norm": 0.6015625, "learning_rate": 4.750339464532454e-06, "loss": 2.175, "step": 15766 }, { "epoch": 0.8458690987124463, "grad_norm": 0.474609375, "learning_rate": 4.750301618958989e-06, "loss": 2.2757, "step": 15767 }, { "epoch": 0.8459227467811159, "grad_norm": 0.42578125, "learning_rate": 4.75026377066805e-06, "loss": 2.4521, "step": 15768 }, { "epoch": 0.8459763948497854, "grad_norm": 0.453125, "learning_rate": 4.750225919659685e-06, "loss": 2.5083, "step": 15769 }, { "epoch": 0.846030042918455, "grad_norm": 0.578125, "learning_rate": 4.750188065933939e-06, "loss": 2.426, "step": 15770 }, { "epoch": 0.8460836909871244, "grad_norm": 0.384765625, "learning_rate": 4.7501502094908556e-06, "loss": 2.0678, "step": 15771 }, { "epoch": 0.846137339055794, "grad_norm": 0.50390625, "learning_rate": 4.750112350330483e-06, "loss": 2.1201, "step": 15772 }, { "epoch": 0.8461909871244635, "grad_norm": 0.416015625, "learning_rate": 4.750074488452868e-06, "loss": 2.3854, "step": 15773 }, { "epoch": 0.8462446351931331, "grad_norm": 0.515625, "learning_rate": 4.7500366238580535e-06, "loss": 2.23, "step": 15774 }, { "epoch": 0.8462982832618026, "grad_norm": 0.47265625, "learning_rate": 4.749998756546086e-06, "loss": 2.0209, "step": 15775 }, { "epoch": 0.8463519313304722, "grad_norm": 0.490234375, "learning_rate": 4.749960886517012e-06, "loss": 2.1672, "step": 15776 }, { "epoch": 0.8464055793991416, "grad_norm": 0.47265625, "learning_rate": 4.749923013770877e-06, "loss": 2.1391, "step": 15777 }, { "epoch": 0.8464592274678111, "grad_norm": 0.484375, "learning_rate": 4.749885138307726e-06, "loss": 2.243, "step": 15778 }, { "epoch": 0.8465128755364807, "grad_norm": 0.45703125, "learning_rate": 4.749847260127605e-06, "loss": 2.1778, "step": 15779 }, { "epoch": 0.8465665236051502, "grad_norm": 0.4921875, "learning_rate": 4.749809379230561e-06, "loss": 2.2085, "step": 15780 }, { "epoch": 0.8466201716738198, "grad_norm": 0.4765625, "learning_rate": 4.749771495616638e-06, "loss": 2.4868, "step": 15781 }, { "epoch": 0.8466738197424892, "grad_norm": 2.734375, "learning_rate": 4.749733609285883e-06, "loss": 1.8886, "step": 15782 }, { "epoch": 0.8467274678111588, "grad_norm": 0.46484375, "learning_rate": 4.749695720238341e-06, "loss": 2.2105, "step": 15783 }, { "epoch": 0.8467811158798283, "grad_norm": 0.515625, "learning_rate": 4.749657828474058e-06, "loss": 2.3084, "step": 15784 }, { "epoch": 0.8468347639484979, "grad_norm": 0.52734375, "learning_rate": 4.74961993399308e-06, "loss": 2.3585, "step": 15785 }, { "epoch": 0.8468884120171674, "grad_norm": 0.408203125, "learning_rate": 4.749582036795452e-06, "loss": 2.1603, "step": 15786 }, { "epoch": 0.846942060085837, "grad_norm": 0.365234375, "learning_rate": 4.749544136881221e-06, "loss": 2.2064, "step": 15787 }, { "epoch": 0.8469957081545064, "grad_norm": 0.4609375, "learning_rate": 4.749506234250432e-06, "loss": 2.3137, "step": 15788 }, { "epoch": 0.847049356223176, "grad_norm": 0.373046875, "learning_rate": 4.7494683289031306e-06, "loss": 2.0133, "step": 15789 }, { "epoch": 0.8471030042918455, "grad_norm": 0.423828125, "learning_rate": 4.749430420839362e-06, "loss": 2.3212, "step": 15790 }, { "epoch": 0.8471566523605151, "grad_norm": 0.451171875, "learning_rate": 4.7493925100591745e-06, "loss": 2.2679, "step": 15791 }, { "epoch": 0.8472103004291845, "grad_norm": 0.72265625, "learning_rate": 4.749354596562611e-06, "loss": 2.2395, "step": 15792 }, { "epoch": 0.847263948497854, "grad_norm": 0.462890625, "learning_rate": 4.749316680349718e-06, "loss": 2.058, "step": 15793 }, { "epoch": 0.8473175965665236, "grad_norm": 0.408203125, "learning_rate": 4.7492787614205425e-06, "loss": 2.1376, "step": 15794 }, { "epoch": 0.8473712446351931, "grad_norm": 0.62109375, "learning_rate": 4.74924083977513e-06, "loss": 2.4068, "step": 15795 }, { "epoch": 0.8474248927038627, "grad_norm": 0.416015625, "learning_rate": 4.749202915413526e-06, "loss": 2.0154, "step": 15796 }, { "epoch": 0.8474785407725322, "grad_norm": 0.40234375, "learning_rate": 4.749164988335775e-06, "loss": 2.0567, "step": 15797 }, { "epoch": 0.8475321888412017, "grad_norm": 0.4375, "learning_rate": 4.749127058541924e-06, "loss": 2.2471, "step": 15798 }, { "epoch": 0.8475858369098712, "grad_norm": 0.45703125, "learning_rate": 4.749089126032019e-06, "loss": 2.1963, "step": 15799 }, { "epoch": 0.8476394849785408, "grad_norm": 0.4921875, "learning_rate": 4.749051190806105e-06, "loss": 2.4619, "step": 15800 }, { "epoch": 0.8476931330472103, "grad_norm": 0.51953125, "learning_rate": 4.749013252864229e-06, "loss": 2.2281, "step": 15801 }, { "epoch": 0.8477467811158799, "grad_norm": 0.37109375, "learning_rate": 4.748975312206437e-06, "loss": 2.0761, "step": 15802 }, { "epoch": 0.8478004291845493, "grad_norm": 0.447265625, "learning_rate": 4.7489373688327736e-06, "loss": 2.2297, "step": 15803 }, { "epoch": 0.8478540772532189, "grad_norm": 0.375, "learning_rate": 4.7488994227432835e-06, "loss": 2.2093, "step": 15804 }, { "epoch": 0.8479077253218884, "grad_norm": 0.60546875, "learning_rate": 4.748861473938016e-06, "loss": 2.2191, "step": 15805 }, { "epoch": 0.847961373390558, "grad_norm": 0.578125, "learning_rate": 4.7488235224170135e-06, "loss": 2.3137, "step": 15806 }, { "epoch": 0.8480150214592275, "grad_norm": 3.28125, "learning_rate": 4.748785568180324e-06, "loss": 2.3551, "step": 15807 }, { "epoch": 0.848068669527897, "grad_norm": 0.423828125, "learning_rate": 4.748747611227993e-06, "loss": 2.2486, "step": 15808 }, { "epoch": 0.8481223175965665, "grad_norm": 0.451171875, "learning_rate": 4.748709651560065e-06, "loss": 2.2204, "step": 15809 }, { "epoch": 0.848175965665236, "grad_norm": 0.859375, "learning_rate": 4.748671689176587e-06, "loss": 2.4545, "step": 15810 }, { "epoch": 0.8482296137339056, "grad_norm": 0.515625, "learning_rate": 4.748633724077605e-06, "loss": 2.2116, "step": 15811 }, { "epoch": 0.8482832618025751, "grad_norm": 0.486328125, "learning_rate": 4.748595756263165e-06, "loss": 2.2226, "step": 15812 }, { "epoch": 0.8483369098712447, "grad_norm": 0.4375, "learning_rate": 4.7485577857333105e-06, "loss": 2.3869, "step": 15813 }, { "epoch": 0.8483905579399141, "grad_norm": 0.421875, "learning_rate": 4.748519812488091e-06, "loss": 2.2552, "step": 15814 }, { "epoch": 0.8484442060085837, "grad_norm": 0.4296875, "learning_rate": 4.74848183652755e-06, "loss": 2.1821, "step": 15815 }, { "epoch": 0.8484978540772532, "grad_norm": 0.44921875, "learning_rate": 4.748443857851734e-06, "loss": 2.2694, "step": 15816 }, { "epoch": 0.8485515021459228, "grad_norm": 0.48828125, "learning_rate": 4.748405876460689e-06, "loss": 2.4083, "step": 15817 }, { "epoch": 0.8486051502145923, "grad_norm": 0.47265625, "learning_rate": 4.74836789235446e-06, "loss": 1.9768, "step": 15818 }, { "epoch": 0.8486587982832619, "grad_norm": 0.396484375, "learning_rate": 4.748329905533093e-06, "loss": 2.2372, "step": 15819 }, { "epoch": 0.8487124463519313, "grad_norm": 0.578125, "learning_rate": 4.748291915996635e-06, "loss": 2.1333, "step": 15820 }, { "epoch": 0.8487660944206008, "grad_norm": 0.70703125, "learning_rate": 4.748253923745132e-06, "loss": 1.423, "step": 15821 }, { "epoch": 0.8488197424892704, "grad_norm": 0.45703125, "learning_rate": 4.748215928778629e-06, "loss": 2.0276, "step": 15822 }, { "epoch": 0.8488733905579399, "grad_norm": 0.458984375, "learning_rate": 4.748177931097171e-06, "loss": 2.4252, "step": 15823 }, { "epoch": 0.8489270386266095, "grad_norm": 0.462890625, "learning_rate": 4.7481399307008055e-06, "loss": 2.3966, "step": 15824 }, { "epoch": 0.8489806866952789, "grad_norm": 0.51953125, "learning_rate": 4.7481019275895775e-06, "loss": 2.3091, "step": 15825 }, { "epoch": 0.8490343347639485, "grad_norm": 0.4765625, "learning_rate": 4.7480639217635335e-06, "loss": 2.205, "step": 15826 }, { "epoch": 0.849087982832618, "grad_norm": 0.478515625, "learning_rate": 4.748025913222719e-06, "loss": 2.4231, "step": 15827 }, { "epoch": 0.8491416309012876, "grad_norm": 0.4453125, "learning_rate": 4.7479879019671805e-06, "loss": 2.2696, "step": 15828 }, { "epoch": 0.8491952789699571, "grad_norm": 0.76953125, "learning_rate": 4.747949887996963e-06, "loss": 2.4802, "step": 15829 }, { "epoch": 0.8492489270386266, "grad_norm": 0.55859375, "learning_rate": 4.7479118713121124e-06, "loss": 2.43, "step": 15830 }, { "epoch": 0.8493025751072961, "grad_norm": 0.58984375, "learning_rate": 4.7478738519126755e-06, "loss": 2.4333, "step": 15831 }, { "epoch": 0.8493562231759657, "grad_norm": 0.4453125, "learning_rate": 4.747835829798697e-06, "loss": 2.2237, "step": 15832 }, { "epoch": 0.8494098712446352, "grad_norm": 0.455078125, "learning_rate": 4.747797804970224e-06, "loss": 2.5033, "step": 15833 }, { "epoch": 0.8494635193133048, "grad_norm": 0.435546875, "learning_rate": 4.7477597774273025e-06, "loss": 2.2745, "step": 15834 }, { "epoch": 0.8495171673819742, "grad_norm": 0.53515625, "learning_rate": 4.747721747169977e-06, "loss": 1.9625, "step": 15835 }, { "epoch": 0.8495708154506437, "grad_norm": 0.51953125, "learning_rate": 4.747683714198295e-06, "loss": 2.3531, "step": 15836 }, { "epoch": 0.8496244635193133, "grad_norm": 0.4296875, "learning_rate": 4.747645678512302e-06, "loss": 2.3084, "step": 15837 }, { "epoch": 0.8496781115879828, "grad_norm": 0.43359375, "learning_rate": 4.747607640112042e-06, "loss": 2.3538, "step": 15838 }, { "epoch": 0.8497317596566524, "grad_norm": 0.427734375, "learning_rate": 4.747569598997564e-06, "loss": 2.4625, "step": 15839 }, { "epoch": 0.8497854077253219, "grad_norm": 0.4609375, "learning_rate": 4.747531555168912e-06, "loss": 2.4765, "step": 15840 }, { "epoch": 0.8498390557939914, "grad_norm": 0.5546875, "learning_rate": 4.747493508626132e-06, "loss": 2.3, "step": 15841 }, { "epoch": 0.8498927038626609, "grad_norm": 0.359375, "learning_rate": 4.747455459369272e-06, "loss": 2.2039, "step": 15842 }, { "epoch": 0.8499463519313305, "grad_norm": 0.640625, "learning_rate": 4.747417407398375e-06, "loss": 2.4356, "step": 15843 }, { "epoch": 0.85, "grad_norm": 0.443359375, "learning_rate": 4.747379352713489e-06, "loss": 2.3711, "step": 15844 }, { "epoch": 0.8500536480686696, "grad_norm": 0.462890625, "learning_rate": 4.747341295314659e-06, "loss": 2.2933, "step": 15845 }, { "epoch": 0.850107296137339, "grad_norm": 0.4453125, "learning_rate": 4.747303235201931e-06, "loss": 2.0626, "step": 15846 }, { "epoch": 0.8501609442060086, "grad_norm": 0.375, "learning_rate": 4.747265172375351e-06, "loss": 1.8262, "step": 15847 }, { "epoch": 0.8502145922746781, "grad_norm": 0.4609375, "learning_rate": 4.747227106834966e-06, "loss": 2.1868, "step": 15848 }, { "epoch": 0.8502682403433477, "grad_norm": 0.453125, "learning_rate": 4.747189038580821e-06, "loss": 2.4542, "step": 15849 }, { "epoch": 0.8503218884120172, "grad_norm": 0.412109375, "learning_rate": 4.747150967612961e-06, "loss": 2.1684, "step": 15850 }, { "epoch": 0.8503755364806866, "grad_norm": 0.427734375, "learning_rate": 4.747112893931434e-06, "loss": 2.0438, "step": 15851 }, { "epoch": 0.8504291845493562, "grad_norm": 0.5, "learning_rate": 4.747074817536285e-06, "loss": 2.4103, "step": 15852 }, { "epoch": 0.8504828326180257, "grad_norm": 0.462890625, "learning_rate": 4.74703673842756e-06, "loss": 2.3037, "step": 15853 }, { "epoch": 0.8505364806866953, "grad_norm": 0.515625, "learning_rate": 4.746998656605305e-06, "loss": 2.2087, "step": 15854 }, { "epoch": 0.8505901287553648, "grad_norm": 0.46875, "learning_rate": 4.746960572069565e-06, "loss": 2.2923, "step": 15855 }, { "epoch": 0.8506437768240344, "grad_norm": 0.458984375, "learning_rate": 4.746922484820389e-06, "loss": 2.2455, "step": 15856 }, { "epoch": 0.8506974248927038, "grad_norm": 0.453125, "learning_rate": 4.74688439485782e-06, "loss": 2.2518, "step": 15857 }, { "epoch": 0.8507510729613734, "grad_norm": 0.5390625, "learning_rate": 4.746846302181904e-06, "loss": 2.2101, "step": 15858 }, { "epoch": 0.8508047210300429, "grad_norm": 0.47265625, "learning_rate": 4.746808206792689e-06, "loss": 1.6049, "step": 15859 }, { "epoch": 0.8508583690987125, "grad_norm": 0.50390625, "learning_rate": 4.7467701086902205e-06, "loss": 2.2073, "step": 15860 }, { "epoch": 0.850912017167382, "grad_norm": 1.8046875, "learning_rate": 4.746732007874543e-06, "loss": 2.3623, "step": 15861 }, { "epoch": 0.8509656652360515, "grad_norm": 0.59375, "learning_rate": 4.7466939043457035e-06, "loss": 1.7253, "step": 15862 }, { "epoch": 0.851019313304721, "grad_norm": 0.45703125, "learning_rate": 4.746655798103749e-06, "loss": 2.1889, "step": 15863 }, { "epoch": 0.8510729613733906, "grad_norm": 0.546875, "learning_rate": 4.7466176891487234e-06, "loss": 2.4205, "step": 15864 }, { "epoch": 0.8511266094420601, "grad_norm": 0.498046875, "learning_rate": 4.746579577480675e-06, "loss": 2.2862, "step": 15865 }, { "epoch": 0.8511802575107296, "grad_norm": 0.49609375, "learning_rate": 4.746541463099648e-06, "loss": 2.1745, "step": 15866 }, { "epoch": 0.8512339055793992, "grad_norm": 0.4765625, "learning_rate": 4.746503346005689e-06, "loss": 2.2497, "step": 15867 }, { "epoch": 0.8512875536480686, "grad_norm": 0.390625, "learning_rate": 4.7464652261988444e-06, "loss": 2.2489, "step": 15868 }, { "epoch": 0.8513412017167382, "grad_norm": 0.419921875, "learning_rate": 4.74642710367916e-06, "loss": 2.3089, "step": 15869 }, { "epoch": 0.8513948497854077, "grad_norm": 0.4375, "learning_rate": 4.7463889784466824e-06, "loss": 2.4136, "step": 15870 }, { "epoch": 0.8514484978540773, "grad_norm": 0.4140625, "learning_rate": 4.7463508505014556e-06, "loss": 2.1319, "step": 15871 }, { "epoch": 0.8515021459227468, "grad_norm": 1.1875, "learning_rate": 4.746312719843528e-06, "loss": 2.1887, "step": 15872 }, { "epoch": 0.8515557939914163, "grad_norm": 0.5234375, "learning_rate": 4.746274586472944e-06, "loss": 2.1816, "step": 15873 }, { "epoch": 0.8516094420600858, "grad_norm": 0.419921875, "learning_rate": 4.7462364503897515e-06, "loss": 2.2274, "step": 15874 }, { "epoch": 0.8516630901287554, "grad_norm": 0.53515625, "learning_rate": 4.7461983115939944e-06, "loss": 2.2441, "step": 15875 }, { "epoch": 0.8517167381974249, "grad_norm": 0.419921875, "learning_rate": 4.746160170085721e-06, "loss": 2.1408, "step": 15876 }, { "epoch": 0.8517703862660945, "grad_norm": 0.490234375, "learning_rate": 4.7461220258649756e-06, "loss": 2.1905, "step": 15877 }, { "epoch": 0.851824034334764, "grad_norm": 0.416015625, "learning_rate": 4.746083878931804e-06, "loss": 2.1324, "step": 15878 }, { "epoch": 0.8518776824034334, "grad_norm": 0.53515625, "learning_rate": 4.746045729286254e-06, "loss": 2.1043, "step": 15879 }, { "epoch": 0.851931330472103, "grad_norm": 0.41796875, "learning_rate": 4.7460075769283705e-06, "loss": 2.1509, "step": 15880 }, { "epoch": 0.8519849785407725, "grad_norm": 0.4765625, "learning_rate": 4.7459694218582e-06, "loss": 2.3404, "step": 15881 }, { "epoch": 0.8520386266094421, "grad_norm": 0.427734375, "learning_rate": 4.745931264075789e-06, "loss": 2.2135, "step": 15882 }, { "epoch": 0.8520922746781115, "grad_norm": 0.5, "learning_rate": 4.745893103581181e-06, "loss": 2.4058, "step": 15883 }, { "epoch": 0.8521459227467811, "grad_norm": 0.53125, "learning_rate": 4.745854940374426e-06, "loss": 2.2408, "step": 15884 }, { "epoch": 0.8521995708154506, "grad_norm": 0.48046875, "learning_rate": 4.745816774455569e-06, "loss": 2.3536, "step": 15885 }, { "epoch": 0.8522532188841202, "grad_norm": 0.58984375, "learning_rate": 4.745778605824652e-06, "loss": 2.3336, "step": 15886 }, { "epoch": 0.8523068669527897, "grad_norm": 0.494140625, "learning_rate": 4.745740434481727e-06, "loss": 2.2412, "step": 15887 }, { "epoch": 0.8523605150214593, "grad_norm": 0.458984375, "learning_rate": 4.745702260426836e-06, "loss": 2.1283, "step": 15888 }, { "epoch": 0.8524141630901287, "grad_norm": 0.546875, "learning_rate": 4.745664083660028e-06, "loss": 2.3902, "step": 15889 }, { "epoch": 0.8524678111587983, "grad_norm": 0.48046875, "learning_rate": 4.745625904181345e-06, "loss": 2.3342, "step": 15890 }, { "epoch": 0.8525214592274678, "grad_norm": 0.51953125, "learning_rate": 4.7455877219908386e-06, "loss": 2.0295, "step": 15891 }, { "epoch": 0.8525751072961374, "grad_norm": 0.404296875, "learning_rate": 4.745549537088551e-06, "loss": 2.3118, "step": 15892 }, { "epoch": 0.8526287553648069, "grad_norm": 0.4375, "learning_rate": 4.745511349474529e-06, "loss": 1.8996, "step": 15893 }, { "epoch": 0.8526824034334763, "grad_norm": 0.447265625, "learning_rate": 4.74547315914882e-06, "loss": 2.3803, "step": 15894 }, { "epoch": 0.8527360515021459, "grad_norm": 0.390625, "learning_rate": 4.745434966111468e-06, "loss": 2.0102, "step": 15895 }, { "epoch": 0.8527896995708154, "grad_norm": 0.419921875, "learning_rate": 4.745396770362521e-06, "loss": 2.2651, "step": 15896 }, { "epoch": 0.852843347639485, "grad_norm": 0.625, "learning_rate": 4.745358571902025e-06, "loss": 2.4302, "step": 15897 }, { "epoch": 0.8528969957081545, "grad_norm": 0.478515625, "learning_rate": 4.745320370730025e-06, "loss": 2.2927, "step": 15898 }, { "epoch": 0.8529506437768241, "grad_norm": 0.361328125, "learning_rate": 4.745282166846566e-06, "loss": 2.0959, "step": 15899 }, { "epoch": 0.8530042918454935, "grad_norm": 0.51171875, "learning_rate": 4.7452439602516985e-06, "loss": 2.2176, "step": 15900 }, { "epoch": 0.8530579399141631, "grad_norm": 0.62109375, "learning_rate": 4.745205750945465e-06, "loss": 2.265, "step": 15901 }, { "epoch": 0.8531115879828326, "grad_norm": 0.55859375, "learning_rate": 4.745167538927912e-06, "loss": 2.1366, "step": 15902 }, { "epoch": 0.8531652360515022, "grad_norm": 0.546875, "learning_rate": 4.745129324199087e-06, "loss": 2.0696, "step": 15903 }, { "epoch": 0.8532188841201717, "grad_norm": 0.57421875, "learning_rate": 4.745091106759036e-06, "loss": 2.296, "step": 15904 }, { "epoch": 0.8532725321888412, "grad_norm": 0.49609375, "learning_rate": 4.745052886607803e-06, "loss": 2.3112, "step": 15905 }, { "epoch": 0.8533261802575107, "grad_norm": 0.453125, "learning_rate": 4.745014663745436e-06, "loss": 2.4604, "step": 15906 }, { "epoch": 0.8533798283261803, "grad_norm": 0.39453125, "learning_rate": 4.744976438171982e-06, "loss": 1.9949, "step": 15907 }, { "epoch": 0.8534334763948498, "grad_norm": 0.419921875, "learning_rate": 4.744938209887485e-06, "loss": 2.3016, "step": 15908 }, { "epoch": 0.8534871244635193, "grad_norm": 0.45703125, "learning_rate": 4.744899978891993e-06, "loss": 2.1106, "step": 15909 }, { "epoch": 0.8535407725321889, "grad_norm": 0.486328125, "learning_rate": 4.744861745185551e-06, "loss": 2.3277, "step": 15910 }, { "epoch": 0.8535944206008583, "grad_norm": 0.453125, "learning_rate": 4.744823508768206e-06, "loss": 2.4992, "step": 15911 }, { "epoch": 0.8536480686695279, "grad_norm": 0.4765625, "learning_rate": 4.744785269640003e-06, "loss": 2.1477, "step": 15912 }, { "epoch": 0.8537017167381974, "grad_norm": 0.9453125, "learning_rate": 4.744747027800989e-06, "loss": 2.2194, "step": 15913 }, { "epoch": 0.853755364806867, "grad_norm": 0.55078125, "learning_rate": 4.74470878325121e-06, "loss": 2.381, "step": 15914 }, { "epoch": 0.8538090128755365, "grad_norm": 0.462890625, "learning_rate": 4.744670535990713e-06, "loss": 2.1906, "step": 15915 }, { "epoch": 0.853862660944206, "grad_norm": 0.45703125, "learning_rate": 4.744632286019543e-06, "loss": 2.2693, "step": 15916 }, { "epoch": 0.8539163090128755, "grad_norm": 0.412109375, "learning_rate": 4.744594033337746e-06, "loss": 1.8295, "step": 15917 }, { "epoch": 0.8539699570815451, "grad_norm": 0.435546875, "learning_rate": 4.7445557779453695e-06, "loss": 2.1967, "step": 15918 }, { "epoch": 0.8540236051502146, "grad_norm": 0.458984375, "learning_rate": 4.74451751984246e-06, "loss": 2.2544, "step": 15919 }, { "epoch": 0.8540772532188842, "grad_norm": 0.52734375, "learning_rate": 4.744479259029061e-06, "loss": 2.5571, "step": 15920 }, { "epoch": 0.8541309012875536, "grad_norm": 0.48046875, "learning_rate": 4.744440995505221e-06, "loss": 2.2807, "step": 15921 }, { "epoch": 0.8541845493562231, "grad_norm": 0.48046875, "learning_rate": 4.744402729270986e-06, "loss": 2.1818, "step": 15922 }, { "epoch": 0.8542381974248927, "grad_norm": 0.6015625, "learning_rate": 4.744364460326402e-06, "loss": 2.1816, "step": 15923 }, { "epoch": 0.8542918454935622, "grad_norm": 0.54296875, "learning_rate": 4.744326188671514e-06, "loss": 2.7351, "step": 15924 }, { "epoch": 0.8543454935622318, "grad_norm": 0.478515625, "learning_rate": 4.744287914306371e-06, "loss": 2.3719, "step": 15925 }, { "epoch": 0.8543991416309012, "grad_norm": 0.421875, "learning_rate": 4.744249637231015e-06, "loss": 2.1673, "step": 15926 }, { "epoch": 0.8544527896995708, "grad_norm": 0.46875, "learning_rate": 4.744211357445497e-06, "loss": 2.31, "step": 15927 }, { "epoch": 0.8545064377682403, "grad_norm": 0.515625, "learning_rate": 4.744173074949861e-06, "loss": 2.3979, "step": 15928 }, { "epoch": 0.8545600858369099, "grad_norm": 0.5078125, "learning_rate": 4.744134789744151e-06, "loss": 2.3157, "step": 15929 }, { "epoch": 0.8546137339055794, "grad_norm": 0.431640625, "learning_rate": 4.744096501828417e-06, "loss": 2.2577, "step": 15930 }, { "epoch": 0.854667381974249, "grad_norm": 0.4453125, "learning_rate": 4.744058211202704e-06, "loss": 2.4478, "step": 15931 }, { "epoch": 0.8547210300429184, "grad_norm": 0.349609375, "learning_rate": 4.744019917867057e-06, "loss": 2.1987, "step": 15932 }, { "epoch": 0.854774678111588, "grad_norm": 0.57421875, "learning_rate": 4.743981621821523e-06, "loss": 1.3968, "step": 15933 }, { "epoch": 0.8548283261802575, "grad_norm": 0.38671875, "learning_rate": 4.743943323066149e-06, "loss": 2.2005, "step": 15934 }, { "epoch": 0.8548819742489271, "grad_norm": 0.484375, "learning_rate": 4.743905021600981e-06, "loss": 2.3938, "step": 15935 }, { "epoch": 0.8549356223175966, "grad_norm": 0.427734375, "learning_rate": 4.743866717426064e-06, "loss": 2.4793, "step": 15936 }, { "epoch": 0.854989270386266, "grad_norm": 0.482421875, "learning_rate": 4.743828410541445e-06, "loss": 2.7063, "step": 15937 }, { "epoch": 0.8550429184549356, "grad_norm": 1.1875, "learning_rate": 4.743790100947171e-06, "loss": 2.1428, "step": 15938 }, { "epoch": 0.8550965665236051, "grad_norm": 10.1875, "learning_rate": 4.743751788643288e-06, "loss": 3.0857, "step": 15939 }, { "epoch": 0.8551502145922747, "grad_norm": 0.3828125, "learning_rate": 4.743713473629842e-06, "loss": 2.2867, "step": 15940 }, { "epoch": 0.8552038626609442, "grad_norm": 0.5546875, "learning_rate": 4.743675155906878e-06, "loss": 2.7735, "step": 15941 }, { "epoch": 0.8552575107296138, "grad_norm": 0.451171875, "learning_rate": 4.743636835474444e-06, "loss": 2.2915, "step": 15942 }, { "epoch": 0.8553111587982832, "grad_norm": 0.47265625, "learning_rate": 4.743598512332586e-06, "loss": 2.4443, "step": 15943 }, { "epoch": 0.8553648068669528, "grad_norm": 0.4140625, "learning_rate": 4.74356018648135e-06, "loss": 2.4922, "step": 15944 }, { "epoch": 0.8554184549356223, "grad_norm": 0.4765625, "learning_rate": 4.743521857920782e-06, "loss": 1.9469, "step": 15945 }, { "epoch": 0.8554721030042919, "grad_norm": 0.439453125, "learning_rate": 4.74348352665093e-06, "loss": 2.1032, "step": 15946 }, { "epoch": 0.8555257510729614, "grad_norm": 0.470703125, "learning_rate": 4.7434451926718375e-06, "loss": 2.4103, "step": 15947 }, { "epoch": 0.855579399141631, "grad_norm": 0.43359375, "learning_rate": 4.743406855983552e-06, "loss": 2.3222, "step": 15948 }, { "epoch": 0.8556330472103004, "grad_norm": 0.47265625, "learning_rate": 4.7433685165861204e-06, "loss": 2.4901, "step": 15949 }, { "epoch": 0.85568669527897, "grad_norm": 0.45703125, "learning_rate": 4.743330174479589e-06, "loss": 2.3615, "step": 15950 }, { "epoch": 0.8557403433476395, "grad_norm": 0.5234375, "learning_rate": 4.7432918296640025e-06, "loss": 2.203, "step": 15951 }, { "epoch": 0.855793991416309, "grad_norm": 0.46875, "learning_rate": 4.743253482139409e-06, "loss": 2.4082, "step": 15952 }, { "epoch": 0.8558476394849786, "grad_norm": 0.51171875, "learning_rate": 4.743215131905855e-06, "loss": 2.3864, "step": 15953 }, { "epoch": 0.855901287553648, "grad_norm": 0.421875, "learning_rate": 4.743176778963386e-06, "loss": 2.162, "step": 15954 }, { "epoch": 0.8559549356223176, "grad_norm": 0.4453125, "learning_rate": 4.743138423312047e-06, "loss": 2.1048, "step": 15955 }, { "epoch": 0.8560085836909871, "grad_norm": 0.46484375, "learning_rate": 4.743100064951885e-06, "loss": 2.3436, "step": 15956 }, { "epoch": 0.8560622317596567, "grad_norm": 0.48828125, "learning_rate": 4.743061703882949e-06, "loss": 2.2494, "step": 15957 }, { "epoch": 0.8561158798283262, "grad_norm": 0.4296875, "learning_rate": 4.7430233401052825e-06, "loss": 2.2183, "step": 15958 }, { "epoch": 0.8561695278969957, "grad_norm": 0.43359375, "learning_rate": 4.742984973618933e-06, "loss": 2.2999, "step": 15959 }, { "epoch": 0.8562231759656652, "grad_norm": 0.419921875, "learning_rate": 4.742946604423946e-06, "loss": 2.2428, "step": 15960 }, { "epoch": 0.8562768240343348, "grad_norm": 0.498046875, "learning_rate": 4.742908232520368e-06, "loss": 2.3828, "step": 15961 }, { "epoch": 0.8563304721030043, "grad_norm": 0.384765625, "learning_rate": 4.742869857908246e-06, "loss": 1.8307, "step": 15962 }, { "epoch": 0.8563841201716739, "grad_norm": 0.52734375, "learning_rate": 4.742831480587626e-06, "loss": 2.1752, "step": 15963 }, { "epoch": 0.8564377682403433, "grad_norm": 0.78515625, "learning_rate": 4.7427931005585535e-06, "loss": 2.2881, "step": 15964 }, { "epoch": 0.8564914163090128, "grad_norm": 0.5859375, "learning_rate": 4.742754717821077e-06, "loss": 2.0376, "step": 15965 }, { "epoch": 0.8565450643776824, "grad_norm": 1.25, "learning_rate": 4.7427163323752406e-06, "loss": 1.3768, "step": 15966 }, { "epoch": 0.8565987124463519, "grad_norm": 0.474609375, "learning_rate": 4.742677944221092e-06, "loss": 2.2069, "step": 15967 }, { "epoch": 0.8566523605150215, "grad_norm": 0.546875, "learning_rate": 4.742639553358677e-06, "loss": 2.2814, "step": 15968 }, { "epoch": 0.856706008583691, "grad_norm": 0.45703125, "learning_rate": 4.742601159788042e-06, "loss": 2.2773, "step": 15969 }, { "epoch": 0.8567596566523605, "grad_norm": 1.0390625, "learning_rate": 4.742562763509233e-06, "loss": 2.2486, "step": 15970 }, { "epoch": 0.85681330472103, "grad_norm": 0.6171875, "learning_rate": 4.742524364522297e-06, "loss": 2.2679, "step": 15971 }, { "epoch": 0.8568669527896996, "grad_norm": 0.51953125, "learning_rate": 4.742485962827281e-06, "loss": 2.2355, "step": 15972 }, { "epoch": 0.8569206008583691, "grad_norm": 0.90625, "learning_rate": 4.742447558424229e-06, "loss": 2.3514, "step": 15973 }, { "epoch": 0.8569742489270387, "grad_norm": 0.44140625, "learning_rate": 4.74240915131319e-06, "loss": 2.2739, "step": 15974 }, { "epoch": 0.8570278969957081, "grad_norm": 0.47265625, "learning_rate": 4.742370741494209e-06, "loss": 2.3662, "step": 15975 }, { "epoch": 0.8570815450643777, "grad_norm": 0.39453125, "learning_rate": 4.7423323289673335e-06, "loss": 2.2523, "step": 15976 }, { "epoch": 0.8571351931330472, "grad_norm": 0.65234375, "learning_rate": 4.742293913732609e-06, "loss": 2.2476, "step": 15977 }, { "epoch": 0.8571888412017168, "grad_norm": 0.48046875, "learning_rate": 4.7422554957900805e-06, "loss": 2.3707, "step": 15978 }, { "epoch": 0.8572424892703863, "grad_norm": 0.439453125, "learning_rate": 4.742217075139796e-06, "loss": 2.3568, "step": 15979 }, { "epoch": 0.8572961373390557, "grad_norm": 0.43359375, "learning_rate": 4.742178651781804e-06, "loss": 2.3667, "step": 15980 }, { "epoch": 0.8573497854077253, "grad_norm": 0.498046875, "learning_rate": 4.7421402257161465e-06, "loss": 2.1036, "step": 15981 }, { "epoch": 0.8574034334763948, "grad_norm": 0.36328125, "learning_rate": 4.742101796942873e-06, "loss": 2.3705, "step": 15982 }, { "epoch": 0.8574570815450644, "grad_norm": 0.4921875, "learning_rate": 4.742063365462028e-06, "loss": 2.2703, "step": 15983 }, { "epoch": 0.8575107296137339, "grad_norm": 0.451171875, "learning_rate": 4.74202493127366e-06, "loss": 2.1598, "step": 15984 }, { "epoch": 0.8575643776824035, "grad_norm": 0.51953125, "learning_rate": 4.741986494377814e-06, "loss": 2.362, "step": 15985 }, { "epoch": 0.8576180257510729, "grad_norm": 0.91015625, "learning_rate": 4.7419480547745366e-06, "loss": 2.3252, "step": 15986 }, { "epoch": 0.8576716738197425, "grad_norm": 0.73828125, "learning_rate": 4.741909612463874e-06, "loss": 2.4608, "step": 15987 }, { "epoch": 0.857725321888412, "grad_norm": 0.65234375, "learning_rate": 4.7418711674458735e-06, "loss": 2.2539, "step": 15988 }, { "epoch": 0.8577789699570816, "grad_norm": 0.470703125, "learning_rate": 4.74183271972058e-06, "loss": 2.3626, "step": 15989 }, { "epoch": 0.8578326180257511, "grad_norm": 0.455078125, "learning_rate": 4.741794269288042e-06, "loss": 2.3326, "step": 15990 }, { "epoch": 0.8578862660944206, "grad_norm": 0.546875, "learning_rate": 4.741755816148304e-06, "loss": 2.3163, "step": 15991 }, { "epoch": 0.8579399141630901, "grad_norm": 0.57421875, "learning_rate": 4.741717360301414e-06, "loss": 2.0449, "step": 15992 }, { "epoch": 0.8579935622317597, "grad_norm": 0.625, "learning_rate": 4.741678901747417e-06, "loss": 1.9352, "step": 15993 }, { "epoch": 0.8580472103004292, "grad_norm": 0.51171875, "learning_rate": 4.741640440486361e-06, "loss": 2.0259, "step": 15994 }, { "epoch": 0.8581008583690987, "grad_norm": 0.3984375, "learning_rate": 4.741601976518292e-06, "loss": 2.2382, "step": 15995 }, { "epoch": 0.8581545064377682, "grad_norm": 0.470703125, "learning_rate": 4.741563509843255e-06, "loss": 1.4793, "step": 15996 }, { "epoch": 0.8582081545064377, "grad_norm": 0.443359375, "learning_rate": 4.741525040461298e-06, "loss": 2.218, "step": 15997 }, { "epoch": 0.8582618025751073, "grad_norm": 0.5546875, "learning_rate": 4.7414865683724665e-06, "loss": 2.2599, "step": 15998 }, { "epoch": 0.8583154506437768, "grad_norm": 0.498046875, "learning_rate": 4.741448093576807e-06, "loss": 2.1112, "step": 15999 }, { "epoch": 0.8583690987124464, "grad_norm": 0.41015625, "learning_rate": 4.741409616074367e-06, "loss": 2.2718, "step": 16000 }, { "epoch": 0.8584227467811159, "grad_norm": 0.451171875, "learning_rate": 4.7413711358651925e-06, "loss": 2.1596, "step": 16001 }, { "epoch": 0.8584763948497854, "grad_norm": 0.55859375, "learning_rate": 4.74133265294933e-06, "loss": 2.4565, "step": 16002 }, { "epoch": 0.8585300429184549, "grad_norm": 0.44140625, "learning_rate": 4.7412941673268255e-06, "loss": 2.2112, "step": 16003 }, { "epoch": 0.8585836909871245, "grad_norm": 0.365234375, "learning_rate": 4.741255678997726e-06, "loss": 2.1162, "step": 16004 }, { "epoch": 0.858637339055794, "grad_norm": 0.51171875, "learning_rate": 4.741217187962077e-06, "loss": 2.2476, "step": 16005 }, { "epoch": 0.8586909871244636, "grad_norm": 0.48828125, "learning_rate": 4.741178694219927e-06, "loss": 2.2134, "step": 16006 }, { "epoch": 0.858744635193133, "grad_norm": 0.484375, "learning_rate": 4.74114019777132e-06, "loss": 2.1756, "step": 16007 }, { "epoch": 0.8587982832618025, "grad_norm": 0.5, "learning_rate": 4.741101698616304e-06, "loss": 2.2411, "step": 16008 }, { "epoch": 0.8588519313304721, "grad_norm": 0.474609375, "learning_rate": 4.741063196754925e-06, "loss": 2.3175, "step": 16009 }, { "epoch": 0.8589055793991416, "grad_norm": 0.6796875, "learning_rate": 4.74102469218723e-06, "loss": 2.4347, "step": 16010 }, { "epoch": 0.8589592274678112, "grad_norm": 0.451171875, "learning_rate": 4.740986184913266e-06, "loss": 2.4184, "step": 16011 }, { "epoch": 0.8590128755364806, "grad_norm": 0.59375, "learning_rate": 4.740947674933078e-06, "loss": 2.4437, "step": 16012 }, { "epoch": 0.8590665236051502, "grad_norm": 0.6328125, "learning_rate": 4.7409091622467125e-06, "loss": 2.4567, "step": 16013 }, { "epoch": 0.8591201716738197, "grad_norm": 0.48828125, "learning_rate": 4.740870646854218e-06, "loss": 2.279, "step": 16014 }, { "epoch": 0.8591738197424893, "grad_norm": 0.482421875, "learning_rate": 4.740832128755638e-06, "loss": 2.3773, "step": 16015 }, { "epoch": 0.8592274678111588, "grad_norm": 0.4375, "learning_rate": 4.740793607951022e-06, "loss": 2.3317, "step": 16016 }, { "epoch": 0.8592811158798284, "grad_norm": 0.48828125, "learning_rate": 4.7407550844404145e-06, "loss": 2.1749, "step": 16017 }, { "epoch": 0.8593347639484978, "grad_norm": 0.435546875, "learning_rate": 4.7407165582238635e-06, "loss": 2.3476, "step": 16018 }, { "epoch": 0.8593884120171674, "grad_norm": 0.5, "learning_rate": 4.740678029301414e-06, "loss": 2.307, "step": 16019 }, { "epoch": 0.8594420600858369, "grad_norm": 0.66796875, "learning_rate": 4.740639497673114e-06, "loss": 2.1577, "step": 16020 }, { "epoch": 0.8594957081545065, "grad_norm": 0.44140625, "learning_rate": 4.740600963339009e-06, "loss": 2.1581, "step": 16021 }, { "epoch": 0.859549356223176, "grad_norm": 0.439453125, "learning_rate": 4.7405624262991465e-06, "loss": 2.0356, "step": 16022 }, { "epoch": 0.8596030042918454, "grad_norm": 0.423828125, "learning_rate": 4.740523886553572e-06, "loss": 2.2892, "step": 16023 }, { "epoch": 0.859656652360515, "grad_norm": 0.482421875, "learning_rate": 4.7404853441023325e-06, "loss": 2.3676, "step": 16024 }, { "epoch": 0.8597103004291845, "grad_norm": 0.435546875, "learning_rate": 4.7404467989454735e-06, "loss": 2.1533, "step": 16025 }, { "epoch": 0.8597639484978541, "grad_norm": 0.451171875, "learning_rate": 4.740408251083044e-06, "loss": 2.4032, "step": 16026 }, { "epoch": 0.8598175965665236, "grad_norm": 0.41015625, "learning_rate": 4.7403697005150885e-06, "loss": 2.29, "step": 16027 }, { "epoch": 0.8598712446351932, "grad_norm": 0.55078125, "learning_rate": 4.740331147241655e-06, "loss": 2.4176, "step": 16028 }, { "epoch": 0.8599248927038626, "grad_norm": 0.51953125, "learning_rate": 4.740292591262787e-06, "loss": 2.078, "step": 16029 }, { "epoch": 0.8599785407725322, "grad_norm": 0.453125, "learning_rate": 4.740254032578535e-06, "loss": 2.2664, "step": 16030 }, { "epoch": 0.8600321888412017, "grad_norm": 0.60546875, "learning_rate": 4.740215471188943e-06, "loss": 2.1346, "step": 16031 }, { "epoch": 0.8600858369098713, "grad_norm": 0.4609375, "learning_rate": 4.74017690709406e-06, "loss": 2.1605, "step": 16032 }, { "epoch": 0.8601394849785408, "grad_norm": 0.490234375, "learning_rate": 4.740138340293929e-06, "loss": 2.469, "step": 16033 }, { "epoch": 0.8601931330472103, "grad_norm": 0.462890625, "learning_rate": 4.7400997707886e-06, "loss": 2.2435, "step": 16034 }, { "epoch": 0.8602467811158798, "grad_norm": 0.439453125, "learning_rate": 4.7400611985781176e-06, "loss": 2.6332, "step": 16035 }, { "epoch": 0.8603004291845494, "grad_norm": 0.46484375, "learning_rate": 4.740022623662529e-06, "loss": 1.8484, "step": 16036 }, { "epoch": 0.8603540772532189, "grad_norm": 0.482421875, "learning_rate": 4.73998404604188e-06, "loss": 2.1589, "step": 16037 }, { "epoch": 0.8604077253218884, "grad_norm": 0.43359375, "learning_rate": 4.739945465716219e-06, "loss": 2.1218, "step": 16038 }, { "epoch": 0.860461373390558, "grad_norm": 1.3671875, "learning_rate": 4.739906882685591e-06, "loss": 2.3202, "step": 16039 }, { "epoch": 0.8605150214592274, "grad_norm": 0.42578125, "learning_rate": 4.739868296950043e-06, "loss": 2.384, "step": 16040 }, { "epoch": 0.860568669527897, "grad_norm": 0.47265625, "learning_rate": 4.739829708509621e-06, "loss": 2.055, "step": 16041 }, { "epoch": 0.8606223175965665, "grad_norm": 0.5703125, "learning_rate": 4.739791117364373e-06, "loss": 2.4718, "step": 16042 }, { "epoch": 0.8606759656652361, "grad_norm": 0.578125, "learning_rate": 4.739752523514345e-06, "loss": 2.1893, "step": 16043 }, { "epoch": 0.8607296137339056, "grad_norm": 0.5390625, "learning_rate": 4.739713926959583e-06, "loss": 2.4773, "step": 16044 }, { "epoch": 0.8607832618025751, "grad_norm": 0.443359375, "learning_rate": 4.739675327700134e-06, "loss": 2.2409, "step": 16045 }, { "epoch": 0.8608369098712446, "grad_norm": 0.4765625, "learning_rate": 4.739636725736045e-06, "loss": 2.1323, "step": 16046 }, { "epoch": 0.8608905579399142, "grad_norm": 0.474609375, "learning_rate": 4.739598121067363e-06, "loss": 2.4302, "step": 16047 }, { "epoch": 0.8609442060085837, "grad_norm": 0.9609375, "learning_rate": 4.739559513694132e-06, "loss": 2.3395, "step": 16048 }, { "epoch": 0.8609978540772533, "grad_norm": 0.482421875, "learning_rate": 4.739520903616402e-06, "loss": 2.2386, "step": 16049 }, { "epoch": 0.8610515021459227, "grad_norm": 0.51953125, "learning_rate": 4.739482290834217e-06, "loss": 2.2519, "step": 16050 }, { "epoch": 0.8611051502145923, "grad_norm": 1.0390625, "learning_rate": 4.739443675347627e-06, "loss": 2.3502, "step": 16051 }, { "epoch": 0.8611587982832618, "grad_norm": 0.46875, "learning_rate": 4.739405057156674e-06, "loss": 2.4679, "step": 16052 }, { "epoch": 0.8612124463519313, "grad_norm": 0.609375, "learning_rate": 4.739366436261409e-06, "loss": 2.2221, "step": 16053 }, { "epoch": 0.8612660944206009, "grad_norm": 0.484375, "learning_rate": 4.739327812661875e-06, "loss": 2.3802, "step": 16054 }, { "epoch": 0.8613197424892703, "grad_norm": 0.458984375, "learning_rate": 4.739289186358121e-06, "loss": 2.2695, "step": 16055 }, { "epoch": 0.8613733905579399, "grad_norm": 0.5078125, "learning_rate": 4.739250557350194e-06, "loss": 2.4719, "step": 16056 }, { "epoch": 0.8614270386266094, "grad_norm": 0.4296875, "learning_rate": 4.739211925638139e-06, "loss": 1.9472, "step": 16057 }, { "epoch": 0.861480686695279, "grad_norm": 0.44921875, "learning_rate": 4.739173291222002e-06, "loss": 2.102, "step": 16058 }, { "epoch": 0.8615343347639485, "grad_norm": 0.47265625, "learning_rate": 4.739134654101833e-06, "loss": 2.3453, "step": 16059 }, { "epoch": 0.8615879828326181, "grad_norm": 0.388671875, "learning_rate": 4.7390960142776746e-06, "loss": 2.21, "step": 16060 }, { "epoch": 0.8616416309012875, "grad_norm": 0.4921875, "learning_rate": 4.739057371749577e-06, "loss": 2.4388, "step": 16061 }, { "epoch": 0.8616952789699571, "grad_norm": 0.458984375, "learning_rate": 4.739018726517585e-06, "loss": 2.4618, "step": 16062 }, { "epoch": 0.8617489270386266, "grad_norm": 0.4375, "learning_rate": 4.738980078581745e-06, "loss": 2.268, "step": 16063 }, { "epoch": 0.8618025751072962, "grad_norm": 37.25, "learning_rate": 4.738941427942104e-06, "loss": 2.2462, "step": 16064 }, { "epoch": 0.8618562231759657, "grad_norm": 0.59375, "learning_rate": 4.73890277459871e-06, "loss": 2.4255, "step": 16065 }, { "epoch": 0.8619098712446351, "grad_norm": 0.44140625, "learning_rate": 4.738864118551608e-06, "loss": 2.2443, "step": 16066 }, { "epoch": 0.8619635193133047, "grad_norm": 0.46875, "learning_rate": 4.7388254598008455e-06, "loss": 2.2943, "step": 16067 }, { "epoch": 0.8620171673819742, "grad_norm": 0.44921875, "learning_rate": 4.7387867983464695e-06, "loss": 2.3255, "step": 16068 }, { "epoch": 0.8620708154506438, "grad_norm": 0.5, "learning_rate": 4.738748134188525e-06, "loss": 2.419, "step": 16069 }, { "epoch": 0.8621244635193133, "grad_norm": 0.482421875, "learning_rate": 4.73870946732706e-06, "loss": 2.2061, "step": 16070 }, { "epoch": 0.8621781115879829, "grad_norm": 0.498046875, "learning_rate": 4.738670797762123e-06, "loss": 2.3227, "step": 16071 }, { "epoch": 0.8622317596566523, "grad_norm": 0.474609375, "learning_rate": 4.738632125493757e-06, "loss": 2.1801, "step": 16072 }, { "epoch": 0.8622854077253219, "grad_norm": 0.53125, "learning_rate": 4.738593450522011e-06, "loss": 2.5233, "step": 16073 }, { "epoch": 0.8623390557939914, "grad_norm": 0.5546875, "learning_rate": 4.738554772846931e-06, "loss": 2.4235, "step": 16074 }, { "epoch": 0.862392703862661, "grad_norm": 0.462890625, "learning_rate": 4.738516092468563e-06, "loss": 2.3824, "step": 16075 }, { "epoch": 0.8624463519313305, "grad_norm": 0.44921875, "learning_rate": 4.738477409386955e-06, "loss": 2.1734, "step": 16076 }, { "epoch": 0.8625, "grad_norm": 0.4375, "learning_rate": 4.738438723602154e-06, "loss": 1.9114, "step": 16077 }, { "epoch": 0.8625536480686695, "grad_norm": 0.416015625, "learning_rate": 4.738400035114206e-06, "loss": 2.2932, "step": 16078 }, { "epoch": 0.8626072961373391, "grad_norm": 0.43359375, "learning_rate": 4.738361343923158e-06, "loss": 2.2231, "step": 16079 }, { "epoch": 0.8626609442060086, "grad_norm": 0.40625, "learning_rate": 4.738322650029056e-06, "loss": 2.3534, "step": 16080 }, { "epoch": 0.8627145922746781, "grad_norm": 0.376953125, "learning_rate": 4.738283953431946e-06, "loss": 2.1781, "step": 16081 }, { "epoch": 0.8627682403433476, "grad_norm": 0.53515625, "learning_rate": 4.738245254131877e-06, "loss": 2.3511, "step": 16082 }, { "epoch": 0.8628218884120171, "grad_norm": 0.41015625, "learning_rate": 4.7382065521288945e-06, "loss": 2.2328, "step": 16083 }, { "epoch": 0.8628755364806867, "grad_norm": 0.46484375, "learning_rate": 4.738167847423046e-06, "loss": 2.1798, "step": 16084 }, { "epoch": 0.8629291845493562, "grad_norm": 0.4609375, "learning_rate": 4.738129140014377e-06, "loss": 2.227, "step": 16085 }, { "epoch": 0.8629828326180258, "grad_norm": 0.45703125, "learning_rate": 4.738090429902935e-06, "loss": 2.121, "step": 16086 }, { "epoch": 0.8630364806866953, "grad_norm": 0.36328125, "learning_rate": 4.738051717088766e-06, "loss": 2.0907, "step": 16087 }, { "epoch": 0.8630901287553648, "grad_norm": 0.43359375, "learning_rate": 4.738013001571919e-06, "loss": 2.2259, "step": 16088 }, { "epoch": 0.8631437768240343, "grad_norm": 0.431640625, "learning_rate": 4.737974283352437e-06, "loss": 2.3161, "step": 16089 }, { "epoch": 0.8631974248927039, "grad_norm": 0.515625, "learning_rate": 4.73793556243037e-06, "loss": 2.2171, "step": 16090 }, { "epoch": 0.8632510729613734, "grad_norm": 0.478515625, "learning_rate": 4.7378968388057635e-06, "loss": 2.0835, "step": 16091 }, { "epoch": 0.863304721030043, "grad_norm": 0.46875, "learning_rate": 4.737858112478665e-06, "loss": 2.2779, "step": 16092 }, { "epoch": 0.8633583690987124, "grad_norm": 0.43359375, "learning_rate": 4.73781938344912e-06, "loss": 2.2909, "step": 16093 }, { "epoch": 0.863412017167382, "grad_norm": 0.515625, "learning_rate": 4.737780651717175e-06, "loss": 2.2676, "step": 16094 }, { "epoch": 0.8634656652360515, "grad_norm": 0.5, "learning_rate": 4.73774191728288e-06, "loss": 2.0911, "step": 16095 }, { "epoch": 0.863519313304721, "grad_norm": 0.49609375, "learning_rate": 4.737703180146277e-06, "loss": 2.2118, "step": 16096 }, { "epoch": 0.8635729613733906, "grad_norm": 0.44140625, "learning_rate": 4.737664440307417e-06, "loss": 2.2778, "step": 16097 }, { "epoch": 0.86362660944206, "grad_norm": 0.4453125, "learning_rate": 4.737625697766345e-06, "loss": 2.3596, "step": 16098 }, { "epoch": 0.8636802575107296, "grad_norm": 0.53515625, "learning_rate": 4.737586952523107e-06, "loss": 2.0895, "step": 16099 }, { "epoch": 0.8637339055793991, "grad_norm": 0.53125, "learning_rate": 4.737548204577751e-06, "loss": 2.1661, "step": 16100 }, { "epoch": 0.8637875536480687, "grad_norm": 0.427734375, "learning_rate": 4.737509453930324e-06, "loss": 2.2915, "step": 16101 }, { "epoch": 0.8638412017167382, "grad_norm": 0.443359375, "learning_rate": 4.737470700580871e-06, "loss": 1.8711, "step": 16102 }, { "epoch": 0.8638948497854078, "grad_norm": 0.3984375, "learning_rate": 4.73743194452944e-06, "loss": 2.2005, "step": 16103 }, { "epoch": 0.8639484978540772, "grad_norm": 0.62109375, "learning_rate": 4.737393185776078e-06, "loss": 2.1533, "step": 16104 }, { "epoch": 0.8640021459227468, "grad_norm": 0.443359375, "learning_rate": 4.737354424320833e-06, "loss": 2.4258, "step": 16105 }, { "epoch": 0.8640557939914163, "grad_norm": 0.455078125, "learning_rate": 4.737315660163748e-06, "loss": 2.3426, "step": 16106 }, { "epoch": 0.8641094420600859, "grad_norm": 0.5078125, "learning_rate": 4.737276893304874e-06, "loss": 2.2376, "step": 16107 }, { "epoch": 0.8641630901287554, "grad_norm": 0.498046875, "learning_rate": 4.737238123744255e-06, "loss": 2.3137, "step": 16108 }, { "epoch": 0.8642167381974248, "grad_norm": 0.61328125, "learning_rate": 4.737199351481941e-06, "loss": 2.0734, "step": 16109 }, { "epoch": 0.8642703862660944, "grad_norm": 0.49609375, "learning_rate": 4.737160576517974e-06, "loss": 2.5085, "step": 16110 }, { "epoch": 0.8643240343347639, "grad_norm": 0.48828125, "learning_rate": 4.737121798852405e-06, "loss": 2.1387, "step": 16111 }, { "epoch": 0.8643776824034335, "grad_norm": 0.46484375, "learning_rate": 4.737083018485279e-06, "loss": 2.1598, "step": 16112 }, { "epoch": 0.864431330472103, "grad_norm": 0.48828125, "learning_rate": 4.737044235416643e-06, "loss": 2.2226, "step": 16113 }, { "epoch": 0.8644849785407726, "grad_norm": 0.6796875, "learning_rate": 4.737005449646544e-06, "loss": 1.9435, "step": 16114 }, { "epoch": 0.864538626609442, "grad_norm": 0.55078125, "learning_rate": 4.736966661175029e-06, "loss": 2.3176, "step": 16115 }, { "epoch": 0.8645922746781116, "grad_norm": 0.421875, "learning_rate": 4.736927870002145e-06, "loss": 2.2145, "step": 16116 }, { "epoch": 0.8646459227467811, "grad_norm": 0.55078125, "learning_rate": 4.736889076127938e-06, "loss": 2.1062, "step": 16117 }, { "epoch": 0.8646995708154507, "grad_norm": 0.484375, "learning_rate": 4.736850279552455e-06, "loss": 2.5156, "step": 16118 }, { "epoch": 0.8647532188841202, "grad_norm": 0.470703125, "learning_rate": 4.736811480275743e-06, "loss": 2.1973, "step": 16119 }, { "epoch": 0.8648068669527897, "grad_norm": 0.478515625, "learning_rate": 4.736772678297851e-06, "loss": 2.3051, "step": 16120 }, { "epoch": 0.8648605150214592, "grad_norm": 0.55078125, "learning_rate": 4.736733873618823e-06, "loss": 2.4905, "step": 16121 }, { "epoch": 0.8649141630901288, "grad_norm": 0.484375, "learning_rate": 4.736695066238705e-06, "loss": 2.4649, "step": 16122 }, { "epoch": 0.8649678111587983, "grad_norm": 0.498046875, "learning_rate": 4.736656256157548e-06, "loss": 2.5072, "step": 16123 }, { "epoch": 0.8650214592274678, "grad_norm": 1.0859375, "learning_rate": 4.736617443375396e-06, "loss": 2.5309, "step": 16124 }, { "epoch": 0.8650751072961373, "grad_norm": 0.37890625, "learning_rate": 4.736578627892296e-06, "loss": 2.0651, "step": 16125 }, { "epoch": 0.8651287553648068, "grad_norm": 0.51171875, "learning_rate": 4.736539809708295e-06, "loss": 2.2098, "step": 16126 }, { "epoch": 0.8651824034334764, "grad_norm": 0.46875, "learning_rate": 4.736500988823441e-06, "loss": 2.1331, "step": 16127 }, { "epoch": 0.8652360515021459, "grad_norm": 0.55859375, "learning_rate": 4.736462165237779e-06, "loss": 2.3736, "step": 16128 }, { "epoch": 0.8652896995708155, "grad_norm": 0.423828125, "learning_rate": 4.7364233389513584e-06, "loss": 2.1553, "step": 16129 }, { "epoch": 0.865343347639485, "grad_norm": 0.46484375, "learning_rate": 4.736384509964223e-06, "loss": 2.1132, "step": 16130 }, { "epoch": 0.8653969957081545, "grad_norm": 0.41796875, "learning_rate": 4.736345678276422e-06, "loss": 2.1515, "step": 16131 }, { "epoch": 0.865450643776824, "grad_norm": 0.56640625, "learning_rate": 4.736306843888002e-06, "loss": 2.3105, "step": 16132 }, { "epoch": 0.8655042918454936, "grad_norm": 0.79296875, "learning_rate": 4.736268006799009e-06, "loss": 2.1385, "step": 16133 }, { "epoch": 0.8655579399141631, "grad_norm": 0.4375, "learning_rate": 4.73622916700949e-06, "loss": 2.2212, "step": 16134 }, { "epoch": 0.8656115879828327, "grad_norm": 0.51953125, "learning_rate": 4.736190324519493e-06, "loss": 2.2106, "step": 16135 }, { "epoch": 0.8656652360515021, "grad_norm": 0.455078125, "learning_rate": 4.736151479329063e-06, "loss": 2.163, "step": 16136 }, { "epoch": 0.8657188841201717, "grad_norm": 0.48046875, "learning_rate": 4.73611263143825e-06, "loss": 2.3667, "step": 16137 }, { "epoch": 0.8657725321888412, "grad_norm": 0.470703125, "learning_rate": 4.736073780847097e-06, "loss": 2.2156, "step": 16138 }, { "epoch": 0.8658261802575107, "grad_norm": 0.380859375, "learning_rate": 4.736034927555654e-06, "loss": 1.9898, "step": 16139 }, { "epoch": 0.8658798283261803, "grad_norm": 0.47265625, "learning_rate": 4.735996071563966e-06, "loss": 2.1736, "step": 16140 }, { "epoch": 0.8659334763948497, "grad_norm": 1.9375, "learning_rate": 4.735957212872082e-06, "loss": 2.5765, "step": 16141 }, { "epoch": 0.8659871244635193, "grad_norm": 0.36328125, "learning_rate": 4.7359183514800475e-06, "loss": 1.8862, "step": 16142 }, { "epoch": 0.8660407725321888, "grad_norm": 0.435546875, "learning_rate": 4.7358794873879086e-06, "loss": 2.1555, "step": 16143 }, { "epoch": 0.8660944206008584, "grad_norm": 0.45703125, "learning_rate": 4.735840620595714e-06, "loss": 2.1733, "step": 16144 }, { "epoch": 0.8661480686695279, "grad_norm": 2.03125, "learning_rate": 4.7358017511035105e-06, "loss": 2.2055, "step": 16145 }, { "epoch": 0.8662017167381975, "grad_norm": 1.125, "learning_rate": 4.735762878911344e-06, "loss": 2.255, "step": 16146 }, { "epoch": 0.8662553648068669, "grad_norm": 0.42578125, "learning_rate": 4.735724004019261e-06, "loss": 2.2608, "step": 16147 }, { "epoch": 0.8663090128755365, "grad_norm": 0.515625, "learning_rate": 4.735685126427309e-06, "loss": 2.2679, "step": 16148 }, { "epoch": 0.866362660944206, "grad_norm": 0.3359375, "learning_rate": 4.7356462461355366e-06, "loss": 1.9109, "step": 16149 }, { "epoch": 0.8664163090128756, "grad_norm": 0.498046875, "learning_rate": 4.735607363143989e-06, "loss": 2.4609, "step": 16150 }, { "epoch": 0.8664699570815451, "grad_norm": 0.515625, "learning_rate": 4.7355684774527145e-06, "loss": 2.5038, "step": 16151 }, { "epoch": 0.8665236051502145, "grad_norm": 0.53515625, "learning_rate": 4.7355295890617575e-06, "loss": 2.2028, "step": 16152 }, { "epoch": 0.8665772532188841, "grad_norm": 0.5, "learning_rate": 4.735490697971168e-06, "loss": 2.3347, "step": 16153 }, { "epoch": 0.8666309012875536, "grad_norm": 0.44140625, "learning_rate": 4.7354518041809905e-06, "loss": 2.2546, "step": 16154 }, { "epoch": 0.8666845493562232, "grad_norm": 0.4609375, "learning_rate": 4.735412907691273e-06, "loss": 2.2319, "step": 16155 }, { "epoch": 0.8667381974248927, "grad_norm": 0.5, "learning_rate": 4.735374008502063e-06, "loss": 1.8664, "step": 16156 }, { "epoch": 0.8667918454935623, "grad_norm": 0.37109375, "learning_rate": 4.735335106613407e-06, "loss": 2.2296, "step": 16157 }, { "epoch": 0.8668454935622317, "grad_norm": 0.9140625, "learning_rate": 4.7352962020253515e-06, "loss": 1.4953, "step": 16158 }, { "epoch": 0.8668991416309013, "grad_norm": 0.412109375, "learning_rate": 4.735257294737944e-06, "loss": 2.2091, "step": 16159 }, { "epoch": 0.8669527896995708, "grad_norm": 0.5546875, "learning_rate": 4.735218384751232e-06, "loss": 2.3106, "step": 16160 }, { "epoch": 0.8670064377682404, "grad_norm": 0.482421875, "learning_rate": 4.735179472065261e-06, "loss": 2.3012, "step": 16161 }, { "epoch": 0.8670600858369099, "grad_norm": 0.498046875, "learning_rate": 4.73514055668008e-06, "loss": 2.5067, "step": 16162 }, { "epoch": 0.8671137339055794, "grad_norm": 0.416015625, "learning_rate": 4.7351016385957335e-06, "loss": 2.3641, "step": 16163 }, { "epoch": 0.8671673819742489, "grad_norm": 0.51171875, "learning_rate": 4.735062717812271e-06, "loss": 2.6828, "step": 16164 }, { "epoch": 0.8672210300429185, "grad_norm": 0.5234375, "learning_rate": 4.735023794329738e-06, "loss": 2.5088, "step": 16165 }, { "epoch": 0.867274678111588, "grad_norm": 0.46875, "learning_rate": 4.734984868148183e-06, "loss": 2.2188, "step": 16166 }, { "epoch": 0.8673283261802575, "grad_norm": 0.546875, "learning_rate": 4.73494593926765e-06, "loss": 2.4729, "step": 16167 }, { "epoch": 0.867381974248927, "grad_norm": 0.484375, "learning_rate": 4.734907007688189e-06, "loss": 2.0514, "step": 16168 }, { "epoch": 0.8674356223175965, "grad_norm": 0.486328125, "learning_rate": 4.734868073409845e-06, "loss": 2.2371, "step": 16169 }, { "epoch": 0.8674892703862661, "grad_norm": 0.439453125, "learning_rate": 4.734829136432667e-06, "loss": 2.1226, "step": 16170 }, { "epoch": 0.8675429184549356, "grad_norm": 0.466796875, "learning_rate": 4.734790196756701e-06, "loss": 2.3728, "step": 16171 }, { "epoch": 0.8675965665236052, "grad_norm": 0.494140625, "learning_rate": 4.734751254381993e-06, "loss": 2.3332, "step": 16172 }, { "epoch": 0.8676502145922746, "grad_norm": 0.45703125, "learning_rate": 4.734712309308592e-06, "loss": 2.3154, "step": 16173 }, { "epoch": 0.8677038626609442, "grad_norm": 0.5703125, "learning_rate": 4.734673361536544e-06, "loss": 2.1104, "step": 16174 }, { "epoch": 0.8677575107296137, "grad_norm": 0.53515625, "learning_rate": 4.734634411065895e-06, "loss": 2.2734, "step": 16175 }, { "epoch": 0.8678111587982833, "grad_norm": 0.431640625, "learning_rate": 4.734595457896693e-06, "loss": 2.2576, "step": 16176 }, { "epoch": 0.8678648068669528, "grad_norm": 0.4765625, "learning_rate": 4.734556502028987e-06, "loss": 1.204, "step": 16177 }, { "epoch": 0.8679184549356224, "grad_norm": 0.490234375, "learning_rate": 4.734517543462821e-06, "loss": 2.2199, "step": 16178 }, { "epoch": 0.8679721030042918, "grad_norm": 0.53125, "learning_rate": 4.734478582198243e-06, "loss": 2.4259, "step": 16179 }, { "epoch": 0.8680257510729614, "grad_norm": 0.3984375, "learning_rate": 4.734439618235302e-06, "loss": 1.5815, "step": 16180 }, { "epoch": 0.8680793991416309, "grad_norm": 0.4765625, "learning_rate": 4.734400651574041e-06, "loss": 2.2412, "step": 16181 }, { "epoch": 0.8681330472103004, "grad_norm": 0.52734375, "learning_rate": 4.734361682214511e-06, "loss": 2.4472, "step": 16182 }, { "epoch": 0.86818669527897, "grad_norm": 0.53515625, "learning_rate": 4.734322710156756e-06, "loss": 2.3987, "step": 16183 }, { "epoch": 0.8682403433476394, "grad_norm": 0.47265625, "learning_rate": 4.734283735400826e-06, "loss": 2.2632, "step": 16184 }, { "epoch": 0.868293991416309, "grad_norm": 0.44140625, "learning_rate": 4.734244757946765e-06, "loss": 2.2033, "step": 16185 }, { "epoch": 0.8683476394849785, "grad_norm": 0.447265625, "learning_rate": 4.734205777794624e-06, "loss": 2.41, "step": 16186 }, { "epoch": 0.8684012875536481, "grad_norm": 0.470703125, "learning_rate": 4.734166794944446e-06, "loss": 2.4802, "step": 16187 }, { "epoch": 0.8684549356223176, "grad_norm": 0.51953125, "learning_rate": 4.73412780939628e-06, "loss": 2.329, "step": 16188 }, { "epoch": 0.8685085836909872, "grad_norm": 0.4765625, "learning_rate": 4.734088821150173e-06, "loss": 2.2145, "step": 16189 }, { "epoch": 0.8685622317596566, "grad_norm": 0.5625, "learning_rate": 4.7340498302061725e-06, "loss": 2.0311, "step": 16190 }, { "epoch": 0.8686158798283262, "grad_norm": 0.5234375, "learning_rate": 4.7340108365643245e-06, "loss": 2.1693, "step": 16191 }, { "epoch": 0.8686695278969957, "grad_norm": 0.4296875, "learning_rate": 4.733971840224677e-06, "loss": 2.2516, "step": 16192 }, { "epoch": 0.8687231759656653, "grad_norm": 0.451171875, "learning_rate": 4.733932841187276e-06, "loss": 2.1702, "step": 16193 }, { "epoch": 0.8687768240343348, "grad_norm": 0.546875, "learning_rate": 4.73389383945217e-06, "loss": 1.2244, "step": 16194 }, { "epoch": 0.8688304721030042, "grad_norm": 0.44921875, "learning_rate": 4.733854835019405e-06, "loss": 2.3824, "step": 16195 }, { "epoch": 0.8688841201716738, "grad_norm": 0.47265625, "learning_rate": 4.733815827889028e-06, "loss": 2.31, "step": 16196 }, { "epoch": 0.8689377682403433, "grad_norm": 0.482421875, "learning_rate": 4.733776818061088e-06, "loss": 2.274, "step": 16197 }, { "epoch": 0.8689914163090129, "grad_norm": 0.490234375, "learning_rate": 4.73373780553563e-06, "loss": 2.2036, "step": 16198 }, { "epoch": 0.8690450643776824, "grad_norm": 0.458984375, "learning_rate": 4.733698790312702e-06, "loss": 2.2761, "step": 16199 }, { "epoch": 0.869098712446352, "grad_norm": 0.380859375, "learning_rate": 4.73365977239235e-06, "loss": 1.9815, "step": 16200 }, { "epoch": 0.8691523605150214, "grad_norm": 0.44921875, "learning_rate": 4.733620751774624e-06, "loss": 2.133, "step": 16201 }, { "epoch": 0.869206008583691, "grad_norm": 0.53515625, "learning_rate": 4.733581728459568e-06, "loss": 2.1794, "step": 16202 }, { "epoch": 0.8692596566523605, "grad_norm": 0.484375, "learning_rate": 4.733542702447231e-06, "loss": 2.3222, "step": 16203 }, { "epoch": 0.8693133047210301, "grad_norm": 0.51171875, "learning_rate": 4.733503673737659e-06, "loss": 2.4082, "step": 16204 }, { "epoch": 0.8693669527896996, "grad_norm": 0.84375, "learning_rate": 4.733464642330899e-06, "loss": 2.2511, "step": 16205 }, { "epoch": 0.8694206008583691, "grad_norm": 0.5703125, "learning_rate": 4.733425608227e-06, "loss": 2.2299, "step": 16206 }, { "epoch": 0.8694742489270386, "grad_norm": 0.7109375, "learning_rate": 4.733386571426007e-06, "loss": 2.0396, "step": 16207 }, { "epoch": 0.8695278969957082, "grad_norm": 0.55078125, "learning_rate": 4.7333475319279684e-06, "loss": 2.408, "step": 16208 }, { "epoch": 0.8695815450643777, "grad_norm": 0.45703125, "learning_rate": 4.733308489732931e-06, "loss": 2.1631, "step": 16209 }, { "epoch": 0.8696351931330472, "grad_norm": 0.44140625, "learning_rate": 4.733269444840941e-06, "loss": 2.1235, "step": 16210 }, { "epoch": 0.8696888412017167, "grad_norm": 0.439453125, "learning_rate": 4.733230397252048e-06, "loss": 1.9867, "step": 16211 }, { "epoch": 0.8697424892703862, "grad_norm": 0.419921875, "learning_rate": 4.733191346966296e-06, "loss": 2.1343, "step": 16212 }, { "epoch": 0.8697961373390558, "grad_norm": 0.392578125, "learning_rate": 4.733152293983735e-06, "loss": 2.3282, "step": 16213 }, { "epoch": 0.8698497854077253, "grad_norm": 0.53125, "learning_rate": 4.73311323830441e-06, "loss": 2.1746, "step": 16214 }, { "epoch": 0.8699034334763949, "grad_norm": 0.416015625, "learning_rate": 4.733074179928369e-06, "loss": 2.3811, "step": 16215 }, { "epoch": 0.8699570815450643, "grad_norm": 0.46875, "learning_rate": 4.73303511885566e-06, "loss": 2.156, "step": 16216 }, { "epoch": 0.8700107296137339, "grad_norm": 0.455078125, "learning_rate": 4.73299605508633e-06, "loss": 2.2319, "step": 16217 }, { "epoch": 0.8700643776824034, "grad_norm": 0.498046875, "learning_rate": 4.732956988620424e-06, "loss": 2.1062, "step": 16218 }, { "epoch": 0.870118025751073, "grad_norm": 0.41796875, "learning_rate": 4.732917919457992e-06, "loss": 2.3941, "step": 16219 }, { "epoch": 0.8701716738197425, "grad_norm": 0.421875, "learning_rate": 4.732878847599079e-06, "loss": 2.2916, "step": 16220 }, { "epoch": 0.8702253218884121, "grad_norm": 0.439453125, "learning_rate": 4.732839773043734e-06, "loss": 2.1455, "step": 16221 }, { "epoch": 0.8702789699570815, "grad_norm": 0.66796875, "learning_rate": 4.732800695792002e-06, "loss": 2.3452, "step": 16222 }, { "epoch": 0.8703326180257511, "grad_norm": 0.71875, "learning_rate": 4.732761615843934e-06, "loss": 2.3055, "step": 16223 }, { "epoch": 0.8703862660944206, "grad_norm": 0.44140625, "learning_rate": 4.732722533199573e-06, "loss": 2.09, "step": 16224 }, { "epoch": 0.8704399141630901, "grad_norm": 0.56640625, "learning_rate": 4.732683447858968e-06, "loss": 2.2565, "step": 16225 }, { "epoch": 0.8704935622317597, "grad_norm": 0.53125, "learning_rate": 4.7326443598221665e-06, "loss": 2.1602, "step": 16226 }, { "epoch": 0.8705472103004291, "grad_norm": 0.427734375, "learning_rate": 4.732605269089214e-06, "loss": 2.4423, "step": 16227 }, { "epoch": 0.8706008583690987, "grad_norm": 0.453125, "learning_rate": 4.7325661756601606e-06, "loss": 2.4568, "step": 16228 }, { "epoch": 0.8706545064377682, "grad_norm": 0.40625, "learning_rate": 4.732527079535051e-06, "loss": 1.8182, "step": 16229 }, { "epoch": 0.8707081545064378, "grad_norm": 0.59765625, "learning_rate": 4.732487980713934e-06, "loss": 2.467, "step": 16230 }, { "epoch": 0.8707618025751073, "grad_norm": 0.486328125, "learning_rate": 4.732448879196855e-06, "loss": 2.2069, "step": 16231 }, { "epoch": 0.8708154506437769, "grad_norm": 0.455078125, "learning_rate": 4.732409774983863e-06, "loss": 2.1856, "step": 16232 }, { "epoch": 0.8708690987124463, "grad_norm": 0.42578125, "learning_rate": 4.732370668075005e-06, "loss": 2.1901, "step": 16233 }, { "epoch": 0.8709227467811159, "grad_norm": 0.435546875, "learning_rate": 4.732331558470328e-06, "loss": 2.3575, "step": 16234 }, { "epoch": 0.8709763948497854, "grad_norm": 0.5859375, "learning_rate": 4.732292446169878e-06, "loss": 1.3674, "step": 16235 }, { "epoch": 0.871030042918455, "grad_norm": 0.4609375, "learning_rate": 4.732253331173704e-06, "loss": 2.2972, "step": 16236 }, { "epoch": 0.8710836909871245, "grad_norm": 0.57421875, "learning_rate": 4.7322142134818525e-06, "loss": 2.2526, "step": 16237 }, { "epoch": 0.871137339055794, "grad_norm": 0.466796875, "learning_rate": 4.732175093094371e-06, "loss": 2.174, "step": 16238 }, { "epoch": 0.8711909871244635, "grad_norm": 0.6484375, "learning_rate": 4.732135970011306e-06, "loss": 2.2787, "step": 16239 }, { "epoch": 0.871244635193133, "grad_norm": 0.51171875, "learning_rate": 4.732096844232705e-06, "loss": 2.0985, "step": 16240 }, { "epoch": 0.8712982832618026, "grad_norm": 0.494140625, "learning_rate": 4.732057715758616e-06, "loss": 2.646, "step": 16241 }, { "epoch": 0.8713519313304721, "grad_norm": 0.447265625, "learning_rate": 4.732018584589086e-06, "loss": 2.4647, "step": 16242 }, { "epoch": 0.8714055793991416, "grad_norm": 0.486328125, "learning_rate": 4.731979450724161e-06, "loss": 2.2826, "step": 16243 }, { "epoch": 0.8714592274678111, "grad_norm": 0.49609375, "learning_rate": 4.731940314163891e-06, "loss": 2.2483, "step": 16244 }, { "epoch": 0.8715128755364807, "grad_norm": 0.75, "learning_rate": 4.73190117490832e-06, "loss": 1.8352, "step": 16245 }, { "epoch": 0.8715665236051502, "grad_norm": 0.4765625, "learning_rate": 4.731862032957497e-06, "loss": 2.5003, "step": 16246 }, { "epoch": 0.8716201716738198, "grad_norm": 0.4921875, "learning_rate": 4.7318228883114694e-06, "loss": 2.3847, "step": 16247 }, { "epoch": 0.8716738197424893, "grad_norm": 0.462890625, "learning_rate": 4.731783740970284e-06, "loss": 2.3879, "step": 16248 }, { "epoch": 0.8717274678111588, "grad_norm": 0.46484375, "learning_rate": 4.731744590933988e-06, "loss": 2.2693, "step": 16249 }, { "epoch": 0.8717811158798283, "grad_norm": 0.396484375, "learning_rate": 4.7317054382026285e-06, "loss": 2.2298, "step": 16250 }, { "epoch": 0.8718347639484979, "grad_norm": 0.5546875, "learning_rate": 4.731666282776253e-06, "loss": 2.428, "step": 16251 }, { "epoch": 0.8718884120171674, "grad_norm": 0.5078125, "learning_rate": 4.73162712465491e-06, "loss": 2.2805, "step": 16252 }, { "epoch": 0.8719420600858369, "grad_norm": 0.50390625, "learning_rate": 4.731587963838645e-06, "loss": 2.2185, "step": 16253 }, { "epoch": 0.8719957081545064, "grad_norm": 0.55859375, "learning_rate": 4.7315488003275065e-06, "loss": 2.2564, "step": 16254 }, { "epoch": 0.8720493562231759, "grad_norm": 0.46875, "learning_rate": 4.731509634121541e-06, "loss": 2.1544, "step": 16255 }, { "epoch": 0.8721030042918455, "grad_norm": 0.52734375, "learning_rate": 4.731470465220796e-06, "loss": 2.5443, "step": 16256 }, { "epoch": 0.872156652360515, "grad_norm": 0.56640625, "learning_rate": 4.7314312936253196e-06, "loss": 2.3801, "step": 16257 }, { "epoch": 0.8722103004291846, "grad_norm": 0.462890625, "learning_rate": 4.731392119335157e-06, "loss": 2.2574, "step": 16258 }, { "epoch": 0.872263948497854, "grad_norm": 0.4609375, "learning_rate": 4.731352942350358e-06, "loss": 2.4634, "step": 16259 }, { "epoch": 0.8723175965665236, "grad_norm": 0.62109375, "learning_rate": 4.731313762670969e-06, "loss": 2.4834, "step": 16260 }, { "epoch": 0.8723712446351931, "grad_norm": 0.48828125, "learning_rate": 4.731274580297037e-06, "loss": 2.5936, "step": 16261 }, { "epoch": 0.8724248927038627, "grad_norm": 0.451171875, "learning_rate": 4.731235395228609e-06, "loss": 2.3465, "step": 16262 }, { "epoch": 0.8724785407725322, "grad_norm": 0.55078125, "learning_rate": 4.7311962074657335e-06, "loss": 2.2139, "step": 16263 }, { "epoch": 0.8725321888412018, "grad_norm": 0.51171875, "learning_rate": 4.731157017008457e-06, "loss": 2.3767, "step": 16264 }, { "epoch": 0.8725858369098712, "grad_norm": 0.48828125, "learning_rate": 4.731117823856826e-06, "loss": 2.0574, "step": 16265 }, { "epoch": 0.8726394849785408, "grad_norm": 0.51171875, "learning_rate": 4.731078628010889e-06, "loss": 2.2764, "step": 16266 }, { "epoch": 0.8726931330472103, "grad_norm": 1.671875, "learning_rate": 4.731039429470693e-06, "loss": 2.5203, "step": 16267 }, { "epoch": 0.8727467811158798, "grad_norm": 0.40625, "learning_rate": 4.731000228236286e-06, "loss": 2.0125, "step": 16268 }, { "epoch": 0.8728004291845494, "grad_norm": 0.400390625, "learning_rate": 4.7309610243077145e-06, "loss": 2.1089, "step": 16269 }, { "epoch": 0.8728540772532188, "grad_norm": 0.423828125, "learning_rate": 4.730921817685027e-06, "loss": 2.1711, "step": 16270 }, { "epoch": 0.8729077253218884, "grad_norm": 0.453125, "learning_rate": 4.730882608368268e-06, "loss": 2.1471, "step": 16271 }, { "epoch": 0.8729613733905579, "grad_norm": 0.4296875, "learning_rate": 4.7308433963574885e-06, "loss": 2.1033, "step": 16272 }, { "epoch": 0.8730150214592275, "grad_norm": 0.455078125, "learning_rate": 4.730804181652733e-06, "loss": 2.3065, "step": 16273 }, { "epoch": 0.873068669527897, "grad_norm": 0.435546875, "learning_rate": 4.730764964254051e-06, "loss": 2.2693, "step": 16274 }, { "epoch": 0.8731223175965666, "grad_norm": 0.4921875, "learning_rate": 4.730725744161488e-06, "loss": 2.3322, "step": 16275 }, { "epoch": 0.873175965665236, "grad_norm": 0.484375, "learning_rate": 4.730686521375093e-06, "loss": 2.1912, "step": 16276 }, { "epoch": 0.8732296137339056, "grad_norm": 0.40234375, "learning_rate": 4.7306472958949126e-06, "loss": 2.2481, "step": 16277 }, { "epoch": 0.8732832618025751, "grad_norm": 0.44140625, "learning_rate": 4.730608067720993e-06, "loss": 2.2522, "step": 16278 }, { "epoch": 0.8733369098712447, "grad_norm": 0.439453125, "learning_rate": 4.730568836853384e-06, "loss": 2.3632, "step": 16279 }, { "epoch": 0.8733905579399142, "grad_norm": 0.46875, "learning_rate": 4.730529603292131e-06, "loss": 2.3769, "step": 16280 }, { "epoch": 0.8734442060085837, "grad_norm": 0.59765625, "learning_rate": 4.730490367037282e-06, "loss": 1.5526, "step": 16281 }, { "epoch": 0.8734978540772532, "grad_norm": 0.451171875, "learning_rate": 4.7304511280888855e-06, "loss": 2.3333, "step": 16282 }, { "epoch": 0.8735515021459227, "grad_norm": 0.396484375, "learning_rate": 4.7304118864469875e-06, "loss": 2.2426, "step": 16283 }, { "epoch": 0.8736051502145923, "grad_norm": 0.4921875, "learning_rate": 4.730372642111636e-06, "loss": 2.3454, "step": 16284 }, { "epoch": 0.8736587982832618, "grad_norm": 0.56640625, "learning_rate": 4.730333395082877e-06, "loss": 2.2076, "step": 16285 }, { "epoch": 0.8737124463519313, "grad_norm": 0.451171875, "learning_rate": 4.7302941453607595e-06, "loss": 2.1566, "step": 16286 }, { "epoch": 0.8737660944206008, "grad_norm": 0.62109375, "learning_rate": 4.730254892945331e-06, "loss": 2.2096, "step": 16287 }, { "epoch": 0.8738197424892704, "grad_norm": 0.451171875, "learning_rate": 4.730215637836637e-06, "loss": 2.2807, "step": 16288 }, { "epoch": 0.8738733905579399, "grad_norm": 0.482421875, "learning_rate": 4.730176380034728e-06, "loss": 2.4151, "step": 16289 }, { "epoch": 0.8739270386266095, "grad_norm": 0.447265625, "learning_rate": 4.730137119539648e-06, "loss": 2.0483, "step": 16290 }, { "epoch": 0.873980686695279, "grad_norm": 0.515625, "learning_rate": 4.730097856351447e-06, "loss": 2.2633, "step": 16291 }, { "epoch": 0.8740343347639485, "grad_norm": 0.5, "learning_rate": 4.730058590470172e-06, "loss": 1.8068, "step": 16292 }, { "epoch": 0.874087982832618, "grad_norm": 0.515625, "learning_rate": 4.730019321895869e-06, "loss": 2.3338, "step": 16293 }, { "epoch": 0.8741416309012876, "grad_norm": 0.4375, "learning_rate": 4.729980050628586e-06, "loss": 2.2128, "step": 16294 }, { "epoch": 0.8741952789699571, "grad_norm": 0.58984375, "learning_rate": 4.729940776668372e-06, "loss": 2.2581, "step": 16295 }, { "epoch": 0.8742489270386266, "grad_norm": 0.78515625, "learning_rate": 4.729901500015271e-06, "loss": 2.31, "step": 16296 }, { "epoch": 0.8743025751072961, "grad_norm": 0.41015625, "learning_rate": 4.729862220669335e-06, "loss": 2.2249, "step": 16297 }, { "epoch": 0.8743562231759656, "grad_norm": 0.51953125, "learning_rate": 4.729822938630608e-06, "loss": 2.2727, "step": 16298 }, { "epoch": 0.8744098712446352, "grad_norm": 0.5546875, "learning_rate": 4.729783653899138e-06, "loss": 2.4156, "step": 16299 }, { "epoch": 0.8744635193133047, "grad_norm": 0.68359375, "learning_rate": 4.729744366474974e-06, "loss": 2.3764, "step": 16300 }, { "epoch": 0.8745171673819743, "grad_norm": 0.46875, "learning_rate": 4.729705076358161e-06, "loss": 2.0917, "step": 16301 }, { "epoch": 0.8745708154506437, "grad_norm": 0.458984375, "learning_rate": 4.729665783548749e-06, "loss": 2.2785, "step": 16302 }, { "epoch": 0.8746244635193133, "grad_norm": 0.4453125, "learning_rate": 4.729626488046783e-06, "loss": 2.0628, "step": 16303 }, { "epoch": 0.8746781115879828, "grad_norm": 0.515625, "learning_rate": 4.729587189852313e-06, "loss": 2.2636, "step": 16304 }, { "epoch": 0.8747317596566524, "grad_norm": 0.44140625, "learning_rate": 4.729547888965385e-06, "loss": 2.2525, "step": 16305 }, { "epoch": 0.8747854077253219, "grad_norm": 0.482421875, "learning_rate": 4.729508585386047e-06, "loss": 2.2571, "step": 16306 }, { "epoch": 0.8748390557939915, "grad_norm": 0.63671875, "learning_rate": 4.729469279114345e-06, "loss": 2.0406, "step": 16307 }, { "epoch": 0.8748927038626609, "grad_norm": 0.58984375, "learning_rate": 4.729429970150328e-06, "loss": 2.3148, "step": 16308 }, { "epoch": 0.8749463519313305, "grad_norm": 0.515625, "learning_rate": 4.729390658494042e-06, "loss": 1.8872, "step": 16309 }, { "epoch": 0.875, "grad_norm": 0.423828125, "learning_rate": 4.729351344145536e-06, "loss": 2.2012, "step": 16310 }, { "epoch": 0.8750536480686695, "grad_norm": 0.462890625, "learning_rate": 4.729312027104858e-06, "loss": 2.1605, "step": 16311 }, { "epoch": 0.8751072961373391, "grad_norm": 0.5234375, "learning_rate": 4.729272707372053e-06, "loss": 2.3841, "step": 16312 }, { "epoch": 0.8751609442060085, "grad_norm": 0.42578125, "learning_rate": 4.729233384947171e-06, "loss": 2.3695, "step": 16313 }, { "epoch": 0.8752145922746781, "grad_norm": 0.515625, "learning_rate": 4.7291940598302584e-06, "loss": 2.4318, "step": 16314 }, { "epoch": 0.8752682403433476, "grad_norm": 0.50390625, "learning_rate": 4.729154732021362e-06, "loss": 2.3995, "step": 16315 }, { "epoch": 0.8753218884120172, "grad_norm": 0.470703125, "learning_rate": 4.729115401520531e-06, "loss": 2.3581, "step": 16316 }, { "epoch": 0.8753755364806867, "grad_norm": 0.875, "learning_rate": 4.729076068327811e-06, "loss": 2.5068, "step": 16317 }, { "epoch": 0.8754291845493563, "grad_norm": 0.390625, "learning_rate": 4.7290367324432504e-06, "loss": 2.0655, "step": 16318 }, { "epoch": 0.8754828326180257, "grad_norm": 0.7265625, "learning_rate": 4.728997393866898e-06, "loss": 2.5516, "step": 16319 }, { "epoch": 0.8755364806866953, "grad_norm": 0.54296875, "learning_rate": 4.728958052598798e-06, "loss": 2.119, "step": 16320 }, { "epoch": 0.8755901287553648, "grad_norm": 0.462890625, "learning_rate": 4.728918708639001e-06, "loss": 2.07, "step": 16321 }, { "epoch": 0.8756437768240344, "grad_norm": 0.412109375, "learning_rate": 4.728879361987554e-06, "loss": 2.3478, "step": 16322 }, { "epoch": 0.8756974248927039, "grad_norm": 0.4921875, "learning_rate": 4.7288400126445025e-06, "loss": 2.4422, "step": 16323 }, { "epoch": 0.8757510729613734, "grad_norm": 0.431640625, "learning_rate": 4.728800660609896e-06, "loss": 2.322, "step": 16324 }, { "epoch": 0.8758047210300429, "grad_norm": 0.47265625, "learning_rate": 4.7287613058837815e-06, "loss": 2.398, "step": 16325 }, { "epoch": 0.8758583690987124, "grad_norm": 0.5078125, "learning_rate": 4.728721948466207e-06, "loss": 2.4751, "step": 16326 }, { "epoch": 0.875912017167382, "grad_norm": 0.53125, "learning_rate": 4.7286825883572184e-06, "loss": 2.2814, "step": 16327 }, { "epoch": 0.8759656652360515, "grad_norm": 0.68359375, "learning_rate": 4.728643225556865e-06, "loss": 2.2777, "step": 16328 }, { "epoch": 0.876019313304721, "grad_norm": 0.48828125, "learning_rate": 4.728603860065194e-06, "loss": 2.34, "step": 16329 }, { "epoch": 0.8760729613733905, "grad_norm": 0.55078125, "learning_rate": 4.7285644918822525e-06, "loss": 1.3401, "step": 16330 }, { "epoch": 0.8761266094420601, "grad_norm": 0.44140625, "learning_rate": 4.728525121008089e-06, "loss": 2.2227, "step": 16331 }, { "epoch": 0.8761802575107296, "grad_norm": 0.40234375, "learning_rate": 4.728485747442748e-06, "loss": 2.3253, "step": 16332 }, { "epoch": 0.8762339055793992, "grad_norm": 1.3984375, "learning_rate": 4.728446371186281e-06, "loss": 2.4403, "step": 16333 }, { "epoch": 0.8762875536480687, "grad_norm": 0.55859375, "learning_rate": 4.728406992238733e-06, "loss": 2.247, "step": 16334 }, { "epoch": 0.8763412017167382, "grad_norm": 0.53125, "learning_rate": 4.728367610600153e-06, "loss": 2.2502, "step": 16335 }, { "epoch": 0.8763948497854077, "grad_norm": 0.44921875, "learning_rate": 4.728328226270588e-06, "loss": 2.3163, "step": 16336 }, { "epoch": 0.8764484978540773, "grad_norm": 0.419921875, "learning_rate": 4.728288839250085e-06, "loss": 2.3977, "step": 16337 }, { "epoch": 0.8765021459227468, "grad_norm": 0.42578125, "learning_rate": 4.728249449538692e-06, "loss": 2.2391, "step": 16338 }, { "epoch": 0.8765557939914163, "grad_norm": 0.90234375, "learning_rate": 4.7282100571364564e-06, "loss": 2.3131, "step": 16339 }, { "epoch": 0.8766094420600858, "grad_norm": 0.5390625, "learning_rate": 4.728170662043426e-06, "loss": 2.2965, "step": 16340 }, { "epoch": 0.8766630901287553, "grad_norm": 0.41796875, "learning_rate": 4.728131264259649e-06, "loss": 2.4084, "step": 16341 }, { "epoch": 0.8767167381974249, "grad_norm": 0.4921875, "learning_rate": 4.728091863785173e-06, "loss": 2.0385, "step": 16342 }, { "epoch": 0.8767703862660944, "grad_norm": 0.50390625, "learning_rate": 4.728052460620044e-06, "loss": 2.4489, "step": 16343 }, { "epoch": 0.876824034334764, "grad_norm": 0.400390625, "learning_rate": 4.72801305476431e-06, "loss": 2.1927, "step": 16344 }, { "epoch": 0.8768776824034334, "grad_norm": 0.42578125, "learning_rate": 4.727973646218019e-06, "loss": 2.2975, "step": 16345 }, { "epoch": 0.876931330472103, "grad_norm": 0.62890625, "learning_rate": 4.7279342349812194e-06, "loss": 1.4975, "step": 16346 }, { "epoch": 0.8769849785407725, "grad_norm": 0.455078125, "learning_rate": 4.727894821053958e-06, "loss": 2.1954, "step": 16347 }, { "epoch": 0.8770386266094421, "grad_norm": 0.458984375, "learning_rate": 4.727855404436283e-06, "loss": 2.2411, "step": 16348 }, { "epoch": 0.8770922746781116, "grad_norm": 0.390625, "learning_rate": 4.7278159851282405e-06, "loss": 2.1311, "step": 16349 }, { "epoch": 0.8771459227467812, "grad_norm": 0.462890625, "learning_rate": 4.727776563129879e-06, "loss": 2.1715, "step": 16350 }, { "epoch": 0.8771995708154506, "grad_norm": 0.474609375, "learning_rate": 4.727737138441247e-06, "loss": 2.1835, "step": 16351 }, { "epoch": 0.8772532188841202, "grad_norm": 0.48828125, "learning_rate": 4.72769771106239e-06, "loss": 2.3419, "step": 16352 }, { "epoch": 0.8773068669527897, "grad_norm": 0.48046875, "learning_rate": 4.727658280993358e-06, "loss": 2.545, "step": 16353 }, { "epoch": 0.8773605150214592, "grad_norm": 0.4296875, "learning_rate": 4.727618848234198e-06, "loss": 2.1419, "step": 16354 }, { "epoch": 0.8774141630901288, "grad_norm": 0.458984375, "learning_rate": 4.727579412784956e-06, "loss": 2.3553, "step": 16355 }, { "epoch": 0.8774678111587982, "grad_norm": 0.482421875, "learning_rate": 4.727539974645681e-06, "loss": 2.4087, "step": 16356 }, { "epoch": 0.8775214592274678, "grad_norm": 0.427734375, "learning_rate": 4.7275005338164205e-06, "loss": 2.3149, "step": 16357 }, { "epoch": 0.8775751072961373, "grad_norm": 0.60546875, "learning_rate": 4.727461090297222e-06, "loss": 2.3017, "step": 16358 }, { "epoch": 0.8776287553648069, "grad_norm": 0.40625, "learning_rate": 4.727421644088134e-06, "loss": 1.8492, "step": 16359 }, { "epoch": 0.8776824034334764, "grad_norm": 0.43359375, "learning_rate": 4.7273821951892015e-06, "loss": 2.211, "step": 16360 }, { "epoch": 0.877736051502146, "grad_norm": 0.5078125, "learning_rate": 4.7273427436004745e-06, "loss": 1.6825, "step": 16361 }, { "epoch": 0.8777896995708154, "grad_norm": 0.515625, "learning_rate": 4.727303289322001e-06, "loss": 2.37, "step": 16362 }, { "epoch": 0.877843347639485, "grad_norm": 0.357421875, "learning_rate": 4.727263832353827e-06, "loss": 2.022, "step": 16363 }, { "epoch": 0.8778969957081545, "grad_norm": 0.427734375, "learning_rate": 4.727224372696001e-06, "loss": 2.262, "step": 16364 }, { "epoch": 0.8779506437768241, "grad_norm": 0.490234375, "learning_rate": 4.727184910348571e-06, "loss": 2.143, "step": 16365 }, { "epoch": 0.8780042918454936, "grad_norm": 0.890625, "learning_rate": 4.727145445311583e-06, "loss": 2.3503, "step": 16366 }, { "epoch": 0.8780579399141631, "grad_norm": 0.470703125, "learning_rate": 4.727105977585087e-06, "loss": 2.3659, "step": 16367 }, { "epoch": 0.8781115879828326, "grad_norm": 0.462890625, "learning_rate": 4.7270665071691285e-06, "loss": 1.9458, "step": 16368 }, { "epoch": 0.8781652360515021, "grad_norm": 0.54296875, "learning_rate": 4.727027034063757e-06, "loss": 2.2772, "step": 16369 }, { "epoch": 0.8782188841201717, "grad_norm": 0.51171875, "learning_rate": 4.726987558269018e-06, "loss": 2.5537, "step": 16370 }, { "epoch": 0.8782725321888412, "grad_norm": 0.494140625, "learning_rate": 4.726948079784962e-06, "loss": 2.3826, "step": 16371 }, { "epoch": 0.8783261802575107, "grad_norm": 0.458984375, "learning_rate": 4.7269085986116345e-06, "loss": 2.3189, "step": 16372 }, { "epoch": 0.8783798283261802, "grad_norm": 0.546875, "learning_rate": 4.726869114749084e-06, "loss": 2.4812, "step": 16373 }, { "epoch": 0.8784334763948498, "grad_norm": 0.421875, "learning_rate": 4.7268296281973575e-06, "loss": 2.2198, "step": 16374 }, { "epoch": 0.8784871244635193, "grad_norm": 0.455078125, "learning_rate": 4.726790138956503e-06, "loss": 2.2894, "step": 16375 }, { "epoch": 0.8785407725321889, "grad_norm": 0.59765625, "learning_rate": 4.726750647026569e-06, "loss": 1.9866, "step": 16376 }, { "epoch": 0.8785944206008584, "grad_norm": 0.3671875, "learning_rate": 4.726711152407602e-06, "loss": 2.1283, "step": 16377 }, { "epoch": 0.8786480686695279, "grad_norm": 0.47265625, "learning_rate": 4.726671655099652e-06, "loss": 2.3605, "step": 16378 }, { "epoch": 0.8787017167381974, "grad_norm": 0.44921875, "learning_rate": 4.726632155102763e-06, "loss": 2.3469, "step": 16379 }, { "epoch": 0.878755364806867, "grad_norm": 0.455078125, "learning_rate": 4.726592652416986e-06, "loss": 2.0517, "step": 16380 }, { "epoch": 0.8788090128755365, "grad_norm": 0.5390625, "learning_rate": 4.726553147042366e-06, "loss": 2.2361, "step": 16381 }, { "epoch": 0.878862660944206, "grad_norm": 0.4296875, "learning_rate": 4.726513638978953e-06, "loss": 2.1154, "step": 16382 }, { "epoch": 0.8789163090128755, "grad_norm": 0.490234375, "learning_rate": 4.726474128226794e-06, "loss": 2.5161, "step": 16383 }, { "epoch": 0.878969957081545, "grad_norm": 0.40625, "learning_rate": 4.726434614785936e-06, "loss": 2.282, "step": 16384 }, { "epoch": 0.8790236051502146, "grad_norm": 0.5078125, "learning_rate": 4.726395098656427e-06, "loss": 2.6406, "step": 16385 }, { "epoch": 0.8790772532188841, "grad_norm": 0.4453125, "learning_rate": 4.726355579838315e-06, "loss": 2.2109, "step": 16386 }, { "epoch": 0.8791309012875537, "grad_norm": 0.4609375, "learning_rate": 4.726316058331648e-06, "loss": 2.3403, "step": 16387 }, { "epoch": 0.8791845493562231, "grad_norm": 0.44140625, "learning_rate": 4.726276534136474e-06, "loss": 2.1609, "step": 16388 }, { "epoch": 0.8792381974248927, "grad_norm": 0.427734375, "learning_rate": 4.726237007252839e-06, "loss": 2.1859, "step": 16389 }, { "epoch": 0.8792918454935622, "grad_norm": 0.51171875, "learning_rate": 4.726197477680792e-06, "loss": 2.291, "step": 16390 }, { "epoch": 0.8793454935622318, "grad_norm": 0.384765625, "learning_rate": 4.726157945420381e-06, "loss": 2.1448, "step": 16391 }, { "epoch": 0.8793991416309013, "grad_norm": 0.6796875, "learning_rate": 4.726118410471653e-06, "loss": 2.1147, "step": 16392 }, { "epoch": 0.8794527896995709, "grad_norm": 2.734375, "learning_rate": 4.726078872834656e-06, "loss": 2.246, "step": 16393 }, { "epoch": 0.8795064377682403, "grad_norm": 0.4453125, "learning_rate": 4.726039332509439e-06, "loss": 2.4351, "step": 16394 }, { "epoch": 0.8795600858369099, "grad_norm": 0.51953125, "learning_rate": 4.725999789496047e-06, "loss": 2.4841, "step": 16395 }, { "epoch": 0.8796137339055794, "grad_norm": 0.46875, "learning_rate": 4.7259602437945305e-06, "loss": 2.2046, "step": 16396 }, { "epoch": 0.8796673819742489, "grad_norm": 0.400390625, "learning_rate": 4.725920695404935e-06, "loss": 1.8241, "step": 16397 }, { "epoch": 0.8797210300429185, "grad_norm": 0.515625, "learning_rate": 4.72588114432731e-06, "loss": 1.8696, "step": 16398 }, { "epoch": 0.8797746781115879, "grad_norm": 0.5, "learning_rate": 4.725841590561703e-06, "loss": 2.5771, "step": 16399 }, { "epoch": 0.8798283261802575, "grad_norm": 0.37890625, "learning_rate": 4.72580203410816e-06, "loss": 2.1634, "step": 16400 }, { "epoch": 0.879881974248927, "grad_norm": 0.4609375, "learning_rate": 4.725762474966731e-06, "loss": 2.3388, "step": 16401 }, { "epoch": 0.8799356223175966, "grad_norm": 0.3671875, "learning_rate": 4.725722913137462e-06, "loss": 2.2891, "step": 16402 }, { "epoch": 0.8799892703862661, "grad_norm": 0.455078125, "learning_rate": 4.725683348620404e-06, "loss": 1.8716, "step": 16403 }, { "epoch": 0.8800429184549357, "grad_norm": 0.81640625, "learning_rate": 4.7256437814156e-06, "loss": 2.0613, "step": 16404 }, { "epoch": 0.8800965665236051, "grad_norm": 0.4296875, "learning_rate": 4.7256042115231015e-06, "loss": 2.2003, "step": 16405 }, { "epoch": 0.8801502145922747, "grad_norm": 0.484375, "learning_rate": 4.725564638942954e-06, "loss": 2.19, "step": 16406 }, { "epoch": 0.8802038626609442, "grad_norm": 0.45703125, "learning_rate": 4.725525063675207e-06, "loss": 2.393, "step": 16407 }, { "epoch": 0.8802575107296138, "grad_norm": 0.45703125, "learning_rate": 4.725485485719908e-06, "loss": 2.3229, "step": 16408 }, { "epoch": 0.8803111587982833, "grad_norm": 0.421875, "learning_rate": 4.725445905077104e-06, "loss": 2.1427, "step": 16409 }, { "epoch": 0.8803648068669528, "grad_norm": 1.09375, "learning_rate": 4.7254063217468426e-06, "loss": 2.0658, "step": 16410 }, { "epoch": 0.8804184549356223, "grad_norm": 0.443359375, "learning_rate": 4.725366735729173e-06, "loss": 2.19, "step": 16411 }, { "epoch": 0.8804721030042918, "grad_norm": 0.48046875, "learning_rate": 4.725327147024142e-06, "loss": 2.4263, "step": 16412 }, { "epoch": 0.8805257510729614, "grad_norm": 0.54296875, "learning_rate": 4.725287555631797e-06, "loss": 2.321, "step": 16413 }, { "epoch": 0.8805793991416309, "grad_norm": 0.71484375, "learning_rate": 4.7252479615521875e-06, "loss": 2.1463, "step": 16414 }, { "epoch": 0.8806330472103004, "grad_norm": 0.5234375, "learning_rate": 4.725208364785359e-06, "loss": 2.3944, "step": 16415 }, { "epoch": 0.8806866952789699, "grad_norm": 0.458984375, "learning_rate": 4.725168765331362e-06, "loss": 2.3283, "step": 16416 }, { "epoch": 0.8807403433476395, "grad_norm": 0.5703125, "learning_rate": 4.725129163190241e-06, "loss": 1.4441, "step": 16417 }, { "epoch": 0.880793991416309, "grad_norm": 0.3671875, "learning_rate": 4.725089558362047e-06, "loss": 1.9807, "step": 16418 }, { "epoch": 0.8808476394849786, "grad_norm": 0.57421875, "learning_rate": 4.725049950846826e-06, "loss": 2.2144, "step": 16419 }, { "epoch": 0.880901287553648, "grad_norm": 0.5078125, "learning_rate": 4.725010340644627e-06, "loss": 2.0861, "step": 16420 }, { "epoch": 0.8809549356223176, "grad_norm": 1.7734375, "learning_rate": 4.7249707277554965e-06, "loss": 2.5426, "step": 16421 }, { "epoch": 0.8810085836909871, "grad_norm": 0.46484375, "learning_rate": 4.7249311121794825e-06, "loss": 2.0962, "step": 16422 }, { "epoch": 0.8810622317596567, "grad_norm": 0.51953125, "learning_rate": 4.724891493916634e-06, "loss": 2.5349, "step": 16423 }, { "epoch": 0.8811158798283262, "grad_norm": 0.48828125, "learning_rate": 4.724851872966999e-06, "loss": 2.4559, "step": 16424 }, { "epoch": 0.8811695278969958, "grad_norm": 0.4453125, "learning_rate": 4.724812249330624e-06, "loss": 2.2682, "step": 16425 }, { "epoch": 0.8812231759656652, "grad_norm": 0.64453125, "learning_rate": 4.724772623007557e-06, "loss": 2.2705, "step": 16426 }, { "epoch": 0.8812768240343347, "grad_norm": 0.4453125, "learning_rate": 4.724732993997846e-06, "loss": 2.0501, "step": 16427 }, { "epoch": 0.8813304721030043, "grad_norm": 0.5546875, "learning_rate": 4.7246933623015405e-06, "loss": 2.1777, "step": 16428 }, { "epoch": 0.8813841201716738, "grad_norm": 0.90234375, "learning_rate": 4.724653727918685e-06, "loss": 2.5921, "step": 16429 }, { "epoch": 0.8814377682403434, "grad_norm": 0.447265625, "learning_rate": 4.724614090849331e-06, "loss": 2.2988, "step": 16430 }, { "epoch": 0.8814914163090128, "grad_norm": 0.435546875, "learning_rate": 4.724574451093524e-06, "loss": 2.2949, "step": 16431 }, { "epoch": 0.8815450643776824, "grad_norm": 1.6171875, "learning_rate": 4.724534808651312e-06, "loss": 2.4338, "step": 16432 }, { "epoch": 0.8815987124463519, "grad_norm": 0.494140625, "learning_rate": 4.7244951635227444e-06, "loss": 2.3398, "step": 16433 }, { "epoch": 0.8816523605150215, "grad_norm": 0.44140625, "learning_rate": 4.724455515707868e-06, "loss": 2.3864, "step": 16434 }, { "epoch": 0.881706008583691, "grad_norm": 0.4296875, "learning_rate": 4.72441586520673e-06, "loss": 2.4605, "step": 16435 }, { "epoch": 0.8817596566523606, "grad_norm": 0.474609375, "learning_rate": 4.724376212019379e-06, "loss": 2.3215, "step": 16436 }, { "epoch": 0.88181330472103, "grad_norm": 0.494140625, "learning_rate": 4.724336556145864e-06, "loss": 2.1774, "step": 16437 }, { "epoch": 0.8818669527896996, "grad_norm": 0.55078125, "learning_rate": 4.724296897586231e-06, "loss": 2.0653, "step": 16438 }, { "epoch": 0.8819206008583691, "grad_norm": 0.50390625, "learning_rate": 4.724257236340529e-06, "loss": 2.1144, "step": 16439 }, { "epoch": 0.8819742489270386, "grad_norm": 0.478515625, "learning_rate": 4.7242175724088054e-06, "loss": 2.5431, "step": 16440 }, { "epoch": 0.8820278969957082, "grad_norm": 0.46484375, "learning_rate": 4.7241779057911095e-06, "loss": 2.313, "step": 16441 }, { "epoch": 0.8820815450643776, "grad_norm": 0.478515625, "learning_rate": 4.7241382364874865e-06, "loss": 1.8721, "step": 16442 }, { "epoch": 0.8821351931330472, "grad_norm": 0.458984375, "learning_rate": 4.724098564497987e-06, "loss": 2.2043, "step": 16443 }, { "epoch": 0.8821888412017167, "grad_norm": 0.419921875, "learning_rate": 4.724058889822657e-06, "loss": 2.157, "step": 16444 }, { "epoch": 0.8822424892703863, "grad_norm": 0.6015625, "learning_rate": 4.7240192124615455e-06, "loss": 2.2684, "step": 16445 }, { "epoch": 0.8822961373390558, "grad_norm": 0.46484375, "learning_rate": 4.7239795324147e-06, "loss": 2.263, "step": 16446 }, { "epoch": 0.8823497854077254, "grad_norm": 0.53515625, "learning_rate": 4.723939849682169e-06, "loss": 2.3136, "step": 16447 }, { "epoch": 0.8824034334763948, "grad_norm": 0.625, "learning_rate": 4.723900164263999e-06, "loss": 2.5362, "step": 16448 }, { "epoch": 0.8824570815450644, "grad_norm": 0.44921875, "learning_rate": 4.723860476160238e-06, "loss": 1.9976, "step": 16449 }, { "epoch": 0.8825107296137339, "grad_norm": 0.45703125, "learning_rate": 4.723820785370937e-06, "loss": 2.3853, "step": 16450 }, { "epoch": 0.8825643776824035, "grad_norm": 0.41796875, "learning_rate": 4.72378109189614e-06, "loss": 2.2189, "step": 16451 }, { "epoch": 0.882618025751073, "grad_norm": 0.447265625, "learning_rate": 4.723741395735898e-06, "loss": 2.2349, "step": 16452 }, { "epoch": 0.8826716738197425, "grad_norm": 0.56640625, "learning_rate": 4.723701696890256e-06, "loss": 2.4318, "step": 16453 }, { "epoch": 0.882725321888412, "grad_norm": 0.451171875, "learning_rate": 4.7236619953592644e-06, "loss": 1.8156, "step": 16454 }, { "epoch": 0.8827789699570815, "grad_norm": 0.400390625, "learning_rate": 4.7236222911429695e-06, "loss": 2.1871, "step": 16455 }, { "epoch": 0.8828326180257511, "grad_norm": 0.486328125, "learning_rate": 4.723582584241422e-06, "loss": 2.3214, "step": 16456 }, { "epoch": 0.8828862660944206, "grad_norm": 0.462890625, "learning_rate": 4.723542874654665e-06, "loss": 2.4489, "step": 16457 }, { "epoch": 0.8829399141630901, "grad_norm": 0.490234375, "learning_rate": 4.723503162382751e-06, "loss": 2.3063, "step": 16458 }, { "epoch": 0.8829935622317596, "grad_norm": 0.5078125, "learning_rate": 4.723463447425725e-06, "loss": 2.0961, "step": 16459 }, { "epoch": 0.8830472103004292, "grad_norm": 0.443359375, "learning_rate": 4.723423729783638e-06, "loss": 2.2652, "step": 16460 }, { "epoch": 0.8831008583690987, "grad_norm": 0.5078125, "learning_rate": 4.723384009456535e-06, "loss": 2.339, "step": 16461 }, { "epoch": 0.8831545064377683, "grad_norm": 0.3671875, "learning_rate": 4.723344286444465e-06, "loss": 2.2367, "step": 16462 }, { "epoch": 0.8832081545064377, "grad_norm": 0.451171875, "learning_rate": 4.7233045607474775e-06, "loss": 2.3269, "step": 16463 }, { "epoch": 0.8832618025751073, "grad_norm": 0.451171875, "learning_rate": 4.723264832365618e-06, "loss": 2.39, "step": 16464 }, { "epoch": 0.8833154506437768, "grad_norm": 0.51171875, "learning_rate": 4.723225101298936e-06, "loss": 2.5264, "step": 16465 }, { "epoch": 0.8833690987124464, "grad_norm": 0.396484375, "learning_rate": 4.723185367547479e-06, "loss": 2.3668, "step": 16466 }, { "epoch": 0.8834227467811159, "grad_norm": 0.4375, "learning_rate": 4.723145631111295e-06, "loss": 2.3752, "step": 16467 }, { "epoch": 0.8834763948497855, "grad_norm": 0.41796875, "learning_rate": 4.723105891990431e-06, "loss": 2.176, "step": 16468 }, { "epoch": 0.8835300429184549, "grad_norm": 0.455078125, "learning_rate": 4.723066150184937e-06, "loss": 2.2862, "step": 16469 }, { "epoch": 0.8835836909871244, "grad_norm": 0.4453125, "learning_rate": 4.7230264056948596e-06, "loss": 2.2568, "step": 16470 }, { "epoch": 0.883637339055794, "grad_norm": 0.51953125, "learning_rate": 4.722986658520248e-06, "loss": 2.4269, "step": 16471 }, { "epoch": 0.8836909871244635, "grad_norm": 0.470703125, "learning_rate": 4.722946908661148e-06, "loss": 2.3, "step": 16472 }, { "epoch": 0.8837446351931331, "grad_norm": 0.66015625, "learning_rate": 4.72290715611761e-06, "loss": 1.976, "step": 16473 }, { "epoch": 0.8837982832618025, "grad_norm": 0.478515625, "learning_rate": 4.722867400889681e-06, "loss": 2.3009, "step": 16474 }, { "epoch": 0.8838519313304721, "grad_norm": 0.65234375, "learning_rate": 4.722827642977408e-06, "loss": 2.3304, "step": 16475 }, { "epoch": 0.8839055793991416, "grad_norm": 0.46484375, "learning_rate": 4.722787882380841e-06, "loss": 2.1485, "step": 16476 }, { "epoch": 0.8839592274678112, "grad_norm": 0.404296875, "learning_rate": 4.722748119100027e-06, "loss": 2.0086, "step": 16477 }, { "epoch": 0.8840128755364807, "grad_norm": 0.51171875, "learning_rate": 4.7227083531350135e-06, "loss": 2.3367, "step": 16478 }, { "epoch": 0.8840665236051503, "grad_norm": 0.416015625, "learning_rate": 4.72266858448585e-06, "loss": 2.5173, "step": 16479 }, { "epoch": 0.8841201716738197, "grad_norm": 0.51171875, "learning_rate": 4.7226288131525825e-06, "loss": 2.3073, "step": 16480 }, { "epoch": 0.8841738197424893, "grad_norm": 0.63671875, "learning_rate": 4.72258903913526e-06, "loss": 2.5121, "step": 16481 }, { "epoch": 0.8842274678111588, "grad_norm": 0.4609375, "learning_rate": 4.722549262433931e-06, "loss": 2.426, "step": 16482 }, { "epoch": 0.8842811158798283, "grad_norm": 0.46484375, "learning_rate": 4.722509483048644e-06, "loss": 2.4662, "step": 16483 }, { "epoch": 0.8843347639484979, "grad_norm": 0.546875, "learning_rate": 4.722469700979445e-06, "loss": 1.991, "step": 16484 }, { "epoch": 0.8843884120171673, "grad_norm": 0.41015625, "learning_rate": 4.722429916226384e-06, "loss": 2.0598, "step": 16485 }, { "epoch": 0.8844420600858369, "grad_norm": 0.5703125, "learning_rate": 4.722390128789508e-06, "loss": 2.4333, "step": 16486 }, { "epoch": 0.8844957081545064, "grad_norm": 0.53515625, "learning_rate": 4.7223503386688654e-06, "loss": 2.2312, "step": 16487 }, { "epoch": 0.884549356223176, "grad_norm": 1.046875, "learning_rate": 4.722310545864505e-06, "loss": 2.3265, "step": 16488 }, { "epoch": 0.8846030042918455, "grad_norm": 0.482421875, "learning_rate": 4.722270750376473e-06, "loss": 2.3887, "step": 16489 }, { "epoch": 0.884656652360515, "grad_norm": 0.53515625, "learning_rate": 4.722230952204818e-06, "loss": 2.2348, "step": 16490 }, { "epoch": 0.8847103004291845, "grad_norm": 0.3515625, "learning_rate": 4.72219115134959e-06, "loss": 2.0265, "step": 16491 }, { "epoch": 0.8847639484978541, "grad_norm": 0.462890625, "learning_rate": 4.722151347810835e-06, "loss": 2.1039, "step": 16492 }, { "epoch": 0.8848175965665236, "grad_norm": 0.5625, "learning_rate": 4.722111541588602e-06, "loss": 2.2308, "step": 16493 }, { "epoch": 0.8848712446351932, "grad_norm": 0.482421875, "learning_rate": 4.722071732682939e-06, "loss": 2.2876, "step": 16494 }, { "epoch": 0.8849248927038627, "grad_norm": 0.43359375, "learning_rate": 4.722031921093893e-06, "loss": 2.3647, "step": 16495 }, { "epoch": 0.8849785407725322, "grad_norm": 0.47265625, "learning_rate": 4.721992106821513e-06, "loss": 2.4094, "step": 16496 }, { "epoch": 0.8850321888412017, "grad_norm": 0.48046875, "learning_rate": 4.721952289865848e-06, "loss": 2.2452, "step": 16497 }, { "epoch": 0.8850858369098712, "grad_norm": 0.4140625, "learning_rate": 4.721912470226944e-06, "loss": 2.3343, "step": 16498 }, { "epoch": 0.8851394849785408, "grad_norm": 0.408203125, "learning_rate": 4.721872647904851e-06, "loss": 2.4394, "step": 16499 }, { "epoch": 0.8851931330472103, "grad_norm": 0.470703125, "learning_rate": 4.721832822899616e-06, "loss": 2.4101, "step": 16500 }, { "epoch": 0.8852467811158798, "grad_norm": 0.451171875, "learning_rate": 4.721792995211287e-06, "loss": 2.3371, "step": 16501 }, { "epoch": 0.8853004291845493, "grad_norm": 0.57421875, "learning_rate": 4.721753164839912e-06, "loss": 2.3241, "step": 16502 }, { "epoch": 0.8853540772532189, "grad_norm": 0.373046875, "learning_rate": 4.721713331785541e-06, "loss": 1.8056, "step": 16503 }, { "epoch": 0.8854077253218884, "grad_norm": 0.462890625, "learning_rate": 4.7216734960482195e-06, "loss": 2.3391, "step": 16504 }, { "epoch": 0.885461373390558, "grad_norm": 0.486328125, "learning_rate": 4.721633657627997e-06, "loss": 2.4016, "step": 16505 }, { "epoch": 0.8855150214592274, "grad_norm": 0.5234375, "learning_rate": 4.721593816524922e-06, "loss": 2.3436, "step": 16506 }, { "epoch": 0.885568669527897, "grad_norm": 0.486328125, "learning_rate": 4.7215539727390415e-06, "loss": 2.2552, "step": 16507 }, { "epoch": 0.8856223175965665, "grad_norm": 0.458984375, "learning_rate": 4.721514126270404e-06, "loss": 1.7141, "step": 16508 }, { "epoch": 0.8856759656652361, "grad_norm": 0.609375, "learning_rate": 4.7214742771190575e-06, "loss": 2.4842, "step": 16509 }, { "epoch": 0.8857296137339056, "grad_norm": 0.5390625, "learning_rate": 4.721434425285051e-06, "loss": 2.2346, "step": 16510 }, { "epoch": 0.8857832618025752, "grad_norm": 0.44921875, "learning_rate": 4.7213945707684315e-06, "loss": 2.0905, "step": 16511 }, { "epoch": 0.8858369098712446, "grad_norm": 0.431640625, "learning_rate": 4.721354713569247e-06, "loss": 1.8545, "step": 16512 }, { "epoch": 0.8858905579399141, "grad_norm": 0.419921875, "learning_rate": 4.721314853687547e-06, "loss": 1.9777, "step": 16513 }, { "epoch": 0.8859442060085837, "grad_norm": 0.466796875, "learning_rate": 4.721274991123379e-06, "loss": 2.3883, "step": 16514 }, { "epoch": 0.8859978540772532, "grad_norm": 0.5390625, "learning_rate": 4.721235125876791e-06, "loss": 2.206, "step": 16515 }, { "epoch": 0.8860515021459228, "grad_norm": 0.54296875, "learning_rate": 4.7211952579478306e-06, "loss": 2.2918, "step": 16516 }, { "epoch": 0.8861051502145922, "grad_norm": 0.578125, "learning_rate": 4.721155387336546e-06, "loss": 2.3035, "step": 16517 }, { "epoch": 0.8861587982832618, "grad_norm": 0.76171875, "learning_rate": 4.7211155140429875e-06, "loss": 2.6304, "step": 16518 }, { "epoch": 0.8862124463519313, "grad_norm": 0.423828125, "learning_rate": 4.7210756380672006e-06, "loss": 2.3696, "step": 16519 }, { "epoch": 0.8862660944206009, "grad_norm": 0.5078125, "learning_rate": 4.721035759409235e-06, "loss": 2.241, "step": 16520 }, { "epoch": 0.8863197424892704, "grad_norm": 0.484375, "learning_rate": 4.720995878069138e-06, "loss": 2.3996, "step": 16521 }, { "epoch": 0.88637339055794, "grad_norm": 0.515625, "learning_rate": 4.720955994046957e-06, "loss": 2.4835, "step": 16522 }, { "epoch": 0.8864270386266094, "grad_norm": 0.6796875, "learning_rate": 4.720916107342743e-06, "loss": 2.4694, "step": 16523 }, { "epoch": 0.886480686695279, "grad_norm": 0.455078125, "learning_rate": 4.720876217956541e-06, "loss": 2.2357, "step": 16524 }, { "epoch": 0.8865343347639485, "grad_norm": 0.4921875, "learning_rate": 4.720836325888401e-06, "loss": 2.2049, "step": 16525 }, { "epoch": 0.886587982832618, "grad_norm": 0.578125, "learning_rate": 4.7207964311383705e-06, "loss": 2.2432, "step": 16526 }, { "epoch": 0.8866416309012876, "grad_norm": 0.62890625, "learning_rate": 4.720756533706499e-06, "loss": 2.293, "step": 16527 }, { "epoch": 0.886695278969957, "grad_norm": 0.455078125, "learning_rate": 4.7207166335928325e-06, "loss": 2.3337, "step": 16528 }, { "epoch": 0.8867489270386266, "grad_norm": 0.453125, "learning_rate": 4.72067673079742e-06, "loss": 2.2507, "step": 16529 }, { "epoch": 0.8868025751072961, "grad_norm": 0.48046875, "learning_rate": 4.720636825320311e-06, "loss": 2.3507, "step": 16530 }, { "epoch": 0.8868562231759657, "grad_norm": 0.482421875, "learning_rate": 4.720596917161552e-06, "loss": 2.4114, "step": 16531 }, { "epoch": 0.8869098712446352, "grad_norm": 0.48828125, "learning_rate": 4.720557006321192e-06, "loss": 2.3692, "step": 16532 }, { "epoch": 0.8869635193133047, "grad_norm": 1.21875, "learning_rate": 4.720517092799279e-06, "loss": 2.1435, "step": 16533 }, { "epoch": 0.8870171673819742, "grad_norm": 0.73828125, "learning_rate": 4.720477176595862e-06, "loss": 2.2638, "step": 16534 }, { "epoch": 0.8870708154506438, "grad_norm": 0.4140625, "learning_rate": 4.720437257710987e-06, "loss": 1.9433, "step": 16535 }, { "epoch": 0.8871244635193133, "grad_norm": 0.4765625, "learning_rate": 4.720397336144704e-06, "loss": 2.2112, "step": 16536 }, { "epoch": 0.8871781115879829, "grad_norm": 0.408203125, "learning_rate": 4.720357411897062e-06, "loss": 2.1033, "step": 16537 }, { "epoch": 0.8872317596566524, "grad_norm": 0.365234375, "learning_rate": 4.720317484968107e-06, "loss": 2.0405, "step": 16538 }, { "epoch": 0.8872854077253219, "grad_norm": 0.443359375, "learning_rate": 4.720277555357889e-06, "loss": 2.2456, "step": 16539 }, { "epoch": 0.8873390557939914, "grad_norm": 0.439453125, "learning_rate": 4.720237623066454e-06, "loss": 2.3165, "step": 16540 }, { "epoch": 0.8873927038626609, "grad_norm": 0.3984375, "learning_rate": 4.720197688093852e-06, "loss": 1.9759, "step": 16541 }, { "epoch": 0.8874463519313305, "grad_norm": 0.5546875, "learning_rate": 4.720157750440133e-06, "loss": 2.3237, "step": 16542 }, { "epoch": 0.8875, "grad_norm": 0.478515625, "learning_rate": 4.720117810105341e-06, "loss": 2.3415, "step": 16543 }, { "epoch": 0.8875536480686695, "grad_norm": 0.439453125, "learning_rate": 4.720077867089528e-06, "loss": 2.2163, "step": 16544 }, { "epoch": 0.887607296137339, "grad_norm": 0.482421875, "learning_rate": 4.72003792139274e-06, "loss": 2.2476, "step": 16545 }, { "epoch": 0.8876609442060086, "grad_norm": 0.486328125, "learning_rate": 4.719997973015026e-06, "loss": 2.3072, "step": 16546 }, { "epoch": 0.8877145922746781, "grad_norm": 0.490234375, "learning_rate": 4.719958021956433e-06, "loss": 2.4217, "step": 16547 }, { "epoch": 0.8877682403433477, "grad_norm": 0.4765625, "learning_rate": 4.719918068217012e-06, "loss": 2.5857, "step": 16548 }, { "epoch": 0.8878218884120171, "grad_norm": 0.52734375, "learning_rate": 4.719878111796809e-06, "loss": 2.3105, "step": 16549 }, { "epoch": 0.8878755364806867, "grad_norm": 0.421875, "learning_rate": 4.719838152695873e-06, "loss": 2.1671, "step": 16550 }, { "epoch": 0.8879291845493562, "grad_norm": 0.44921875, "learning_rate": 4.719798190914252e-06, "loss": 2.4106, "step": 16551 }, { "epoch": 0.8879828326180258, "grad_norm": 0.73046875, "learning_rate": 4.7197582264519946e-06, "loss": 2.3365, "step": 16552 }, { "epoch": 0.8880364806866953, "grad_norm": 0.5078125, "learning_rate": 4.7197182593091486e-06, "loss": 2.006, "step": 16553 }, { "epoch": 0.8880901287553649, "grad_norm": 0.44921875, "learning_rate": 4.719678289485763e-06, "loss": 2.5343, "step": 16554 }, { "epoch": 0.8881437768240343, "grad_norm": 8.9375, "learning_rate": 4.719638316981884e-06, "loss": 2.2675, "step": 16555 }, { "epoch": 0.8881974248927038, "grad_norm": 0.353515625, "learning_rate": 4.719598341797563e-06, "loss": 2.165, "step": 16556 }, { "epoch": 0.8882510729613734, "grad_norm": 0.8515625, "learning_rate": 4.719558363932845e-06, "loss": 2.3101, "step": 16557 }, { "epoch": 0.8883047210300429, "grad_norm": 0.671875, "learning_rate": 4.7195183833877824e-06, "loss": 2.1603, "step": 16558 }, { "epoch": 0.8883583690987125, "grad_norm": 0.62890625, "learning_rate": 4.71947840016242e-06, "loss": 1.6322, "step": 16559 }, { "epoch": 0.8884120171673819, "grad_norm": 0.482421875, "learning_rate": 4.7194384142568076e-06, "loss": 2.3531, "step": 16560 }, { "epoch": 0.8884656652360515, "grad_norm": 0.5, "learning_rate": 4.719398425670992e-06, "loss": 2.3317, "step": 16561 }, { "epoch": 0.888519313304721, "grad_norm": 0.484375, "learning_rate": 4.719358434405024e-06, "loss": 2.2959, "step": 16562 }, { "epoch": 0.8885729613733906, "grad_norm": 0.51953125, "learning_rate": 4.7193184404589485e-06, "loss": 2.404, "step": 16563 }, { "epoch": 0.8886266094420601, "grad_norm": 0.48828125, "learning_rate": 4.719278443832817e-06, "loss": 2.5441, "step": 16564 }, { "epoch": 0.8886802575107297, "grad_norm": 0.60546875, "learning_rate": 4.719238444526676e-06, "loss": 2.2593, "step": 16565 }, { "epoch": 0.8887339055793991, "grad_norm": 0.494140625, "learning_rate": 4.719198442540575e-06, "loss": 2.3379, "step": 16566 }, { "epoch": 0.8887875536480687, "grad_norm": 0.62890625, "learning_rate": 4.719158437874561e-06, "loss": 2.2331, "step": 16567 }, { "epoch": 0.8888412017167382, "grad_norm": 0.45703125, "learning_rate": 4.719118430528684e-06, "loss": 2.3725, "step": 16568 }, { "epoch": 0.8888948497854077, "grad_norm": 0.46484375, "learning_rate": 4.7190784205029905e-06, "loss": 2.2896, "step": 16569 }, { "epoch": 0.8889484978540773, "grad_norm": 0.55859375, "learning_rate": 4.719038407797529e-06, "loss": 1.4731, "step": 16570 }, { "epoch": 0.8890021459227467, "grad_norm": 0.46875, "learning_rate": 4.718998392412349e-06, "loss": 2.4014, "step": 16571 }, { "epoch": 0.8890557939914163, "grad_norm": 0.38671875, "learning_rate": 4.718958374347499e-06, "loss": 2.102, "step": 16572 }, { "epoch": 0.8891094420600858, "grad_norm": 0.365234375, "learning_rate": 4.7189183536030254e-06, "loss": 2.2489, "step": 16573 }, { "epoch": 0.8891630901287554, "grad_norm": 0.439453125, "learning_rate": 4.718878330178978e-06, "loss": 2.4661, "step": 16574 }, { "epoch": 0.8892167381974249, "grad_norm": 0.5234375, "learning_rate": 4.718838304075405e-06, "loss": 2.4087, "step": 16575 }, { "epoch": 0.8892703862660944, "grad_norm": 0.42578125, "learning_rate": 4.7187982752923545e-06, "loss": 2.1745, "step": 16576 }, { "epoch": 0.8893240343347639, "grad_norm": 0.455078125, "learning_rate": 4.718758243829875e-06, "loss": 2.381, "step": 16577 }, { "epoch": 0.8893776824034335, "grad_norm": 0.4296875, "learning_rate": 4.718718209688015e-06, "loss": 2.569, "step": 16578 }, { "epoch": 0.889431330472103, "grad_norm": 0.60546875, "learning_rate": 4.718678172866822e-06, "loss": 2.0991, "step": 16579 }, { "epoch": 0.8894849785407726, "grad_norm": 0.44140625, "learning_rate": 4.7186381333663445e-06, "loss": 2.2847, "step": 16580 }, { "epoch": 0.889538626609442, "grad_norm": 0.384765625, "learning_rate": 4.718598091186632e-06, "loss": 2.2526, "step": 16581 }, { "epoch": 0.8895922746781116, "grad_norm": 0.4453125, "learning_rate": 4.718558046327732e-06, "loss": 2.2741, "step": 16582 }, { "epoch": 0.8896459227467811, "grad_norm": 0.412109375, "learning_rate": 4.718517998789693e-06, "loss": 2.2577, "step": 16583 }, { "epoch": 0.8896995708154506, "grad_norm": 0.609375, "learning_rate": 4.718477948572563e-06, "loss": 2.363, "step": 16584 }, { "epoch": 0.8897532188841202, "grad_norm": 0.447265625, "learning_rate": 4.718437895676391e-06, "loss": 2.2134, "step": 16585 }, { "epoch": 0.8898068669527897, "grad_norm": 0.458984375, "learning_rate": 4.718397840101226e-06, "loss": 1.8647, "step": 16586 }, { "epoch": 0.8898605150214592, "grad_norm": 0.47265625, "learning_rate": 4.718357781847114e-06, "loss": 2.3702, "step": 16587 }, { "epoch": 0.8899141630901287, "grad_norm": 0.4765625, "learning_rate": 4.718317720914105e-06, "loss": 2.2717, "step": 16588 }, { "epoch": 0.8899678111587983, "grad_norm": 0.59765625, "learning_rate": 4.7182776573022476e-06, "loss": 2.3281, "step": 16589 }, { "epoch": 0.8900214592274678, "grad_norm": 0.474609375, "learning_rate": 4.71823759101159e-06, "loss": 2.237, "step": 16590 }, { "epoch": 0.8900751072961374, "grad_norm": 0.453125, "learning_rate": 4.71819752204218e-06, "loss": 2.463, "step": 16591 }, { "epoch": 0.8901287553648068, "grad_norm": 0.4453125, "learning_rate": 4.718157450394066e-06, "loss": 2.2325, "step": 16592 }, { "epoch": 0.8901824034334764, "grad_norm": 0.4609375, "learning_rate": 4.718117376067298e-06, "loss": 1.8055, "step": 16593 }, { "epoch": 0.8902360515021459, "grad_norm": 0.76171875, "learning_rate": 4.718077299061922e-06, "loss": 2.2875, "step": 16594 }, { "epoch": 0.8902896995708155, "grad_norm": 0.5859375, "learning_rate": 4.7180372193779884e-06, "loss": 2.2996, "step": 16595 }, { "epoch": 0.890343347639485, "grad_norm": 0.4609375, "learning_rate": 4.717997137015544e-06, "loss": 2.132, "step": 16596 }, { "epoch": 0.8903969957081546, "grad_norm": 0.451171875, "learning_rate": 4.717957051974639e-06, "loss": 2.2325, "step": 16597 }, { "epoch": 0.890450643776824, "grad_norm": 0.65234375, "learning_rate": 4.717916964255319e-06, "loss": 2.2566, "step": 16598 }, { "epoch": 0.8905042918454935, "grad_norm": 0.625, "learning_rate": 4.717876873857635e-06, "loss": 2.3344, "step": 16599 }, { "epoch": 0.8905579399141631, "grad_norm": 0.48046875, "learning_rate": 4.717836780781635e-06, "loss": 2.329, "step": 16600 }, { "epoch": 0.8906115879828326, "grad_norm": 0.59375, "learning_rate": 4.717796685027367e-06, "loss": 2.3308, "step": 16601 }, { "epoch": 0.8906652360515022, "grad_norm": 0.427734375, "learning_rate": 4.7177565865948795e-06, "loss": 2.1511, "step": 16602 }, { "epoch": 0.8907188841201716, "grad_norm": 0.482421875, "learning_rate": 4.71771648548422e-06, "loss": 2.5601, "step": 16603 }, { "epoch": 0.8907725321888412, "grad_norm": 0.7265625, "learning_rate": 4.717676381695438e-06, "loss": 2.4124, "step": 16604 }, { "epoch": 0.8908261802575107, "grad_norm": 0.515625, "learning_rate": 4.717636275228582e-06, "loss": 2.2489, "step": 16605 }, { "epoch": 0.8908798283261803, "grad_norm": 0.4921875, "learning_rate": 4.7175961660837e-06, "loss": 2.2117, "step": 16606 }, { "epoch": 0.8909334763948498, "grad_norm": 0.447265625, "learning_rate": 4.717556054260841e-06, "loss": 2.2215, "step": 16607 }, { "epoch": 0.8909871244635194, "grad_norm": 0.609375, "learning_rate": 4.7175159397600525e-06, "loss": 2.7231, "step": 16608 }, { "epoch": 0.8910407725321888, "grad_norm": 0.46875, "learning_rate": 4.717475822581383e-06, "loss": 1.9627, "step": 16609 }, { "epoch": 0.8910944206008584, "grad_norm": 0.3984375, "learning_rate": 4.717435702724882e-06, "loss": 2.0445, "step": 16610 }, { "epoch": 0.8911480686695279, "grad_norm": 0.466796875, "learning_rate": 4.717395580190598e-06, "loss": 2.4561, "step": 16611 }, { "epoch": 0.8912017167381975, "grad_norm": 0.44921875, "learning_rate": 4.7173554549785775e-06, "loss": 2.078, "step": 16612 }, { "epoch": 0.891255364806867, "grad_norm": 0.58984375, "learning_rate": 4.7173153270888715e-06, "loss": 2.0659, "step": 16613 }, { "epoch": 0.8913090128755364, "grad_norm": 0.48046875, "learning_rate": 4.717275196521526e-06, "loss": 2.354, "step": 16614 }, { "epoch": 0.891362660944206, "grad_norm": 0.51171875, "learning_rate": 4.717235063276591e-06, "loss": 2.178, "step": 16615 }, { "epoch": 0.8914163090128755, "grad_norm": 0.435546875, "learning_rate": 4.717194927354115e-06, "loss": 2.0946, "step": 16616 }, { "epoch": 0.8914699570815451, "grad_norm": 0.466796875, "learning_rate": 4.717154788754146e-06, "loss": 2.2124, "step": 16617 }, { "epoch": 0.8915236051502146, "grad_norm": 0.4375, "learning_rate": 4.717114647476732e-06, "loss": 2.4341, "step": 16618 }, { "epoch": 0.8915772532188841, "grad_norm": 0.462890625, "learning_rate": 4.717074503521923e-06, "loss": 2.0636, "step": 16619 }, { "epoch": 0.8916309012875536, "grad_norm": 0.484375, "learning_rate": 4.717034356889766e-06, "loss": 2.4862, "step": 16620 }, { "epoch": 0.8916845493562232, "grad_norm": 0.380859375, "learning_rate": 4.71699420758031e-06, "loss": 2.2379, "step": 16621 }, { "epoch": 0.8917381974248927, "grad_norm": 0.52734375, "learning_rate": 4.716954055593603e-06, "loss": 2.4634, "step": 16622 }, { "epoch": 0.8917918454935623, "grad_norm": 0.451171875, "learning_rate": 4.716913900929695e-06, "loss": 2.304, "step": 16623 }, { "epoch": 0.8918454935622318, "grad_norm": 0.54296875, "learning_rate": 4.716873743588633e-06, "loss": 2.2361, "step": 16624 }, { "epoch": 0.8918991416309013, "grad_norm": 0.64453125, "learning_rate": 4.7168335835704655e-06, "loss": 1.9312, "step": 16625 }, { "epoch": 0.8919527896995708, "grad_norm": 0.47265625, "learning_rate": 4.716793420875242e-06, "loss": 2.1, "step": 16626 }, { "epoch": 0.8920064377682403, "grad_norm": 0.412109375, "learning_rate": 4.71675325550301e-06, "loss": 2.3143, "step": 16627 }, { "epoch": 0.8920600858369099, "grad_norm": 0.55078125, "learning_rate": 4.716713087453819e-06, "loss": 2.318, "step": 16628 }, { "epoch": 0.8921137339055794, "grad_norm": 0.416015625, "learning_rate": 4.716672916727717e-06, "loss": 2.1641, "step": 16629 }, { "epoch": 0.8921673819742489, "grad_norm": 0.4921875, "learning_rate": 4.716632743324751e-06, "loss": 2.4145, "step": 16630 }, { "epoch": 0.8922210300429184, "grad_norm": 0.435546875, "learning_rate": 4.716592567244973e-06, "loss": 2.032, "step": 16631 }, { "epoch": 0.892274678111588, "grad_norm": 0.4140625, "learning_rate": 4.716552388488429e-06, "loss": 2.2537, "step": 16632 }, { "epoch": 0.8923283261802575, "grad_norm": 0.427734375, "learning_rate": 4.716512207055167e-06, "loss": 2.3227, "step": 16633 }, { "epoch": 0.8923819742489271, "grad_norm": 0.458984375, "learning_rate": 4.716472022945238e-06, "loss": 2.1608, "step": 16634 }, { "epoch": 0.8924356223175965, "grad_norm": 0.494140625, "learning_rate": 4.716431836158688e-06, "loss": 2.3651, "step": 16635 }, { "epoch": 0.8924892703862661, "grad_norm": 0.392578125, "learning_rate": 4.716391646695567e-06, "loss": 1.7982, "step": 16636 }, { "epoch": 0.8925429184549356, "grad_norm": 0.384765625, "learning_rate": 4.716351454555923e-06, "loss": 2.3052, "step": 16637 }, { "epoch": 0.8925965665236052, "grad_norm": 0.427734375, "learning_rate": 4.716311259739806e-06, "loss": 1.9553, "step": 16638 }, { "epoch": 0.8926502145922747, "grad_norm": 0.52734375, "learning_rate": 4.716271062247261e-06, "loss": 2.5554, "step": 16639 }, { "epoch": 0.8927038626609443, "grad_norm": 0.3984375, "learning_rate": 4.716230862078341e-06, "loss": 2.1858, "step": 16640 }, { "epoch": 0.8927575107296137, "grad_norm": 0.4765625, "learning_rate": 4.71619065923309e-06, "loss": 2.2193, "step": 16641 }, { "epoch": 0.8928111587982832, "grad_norm": 0.43359375, "learning_rate": 4.71615045371156e-06, "loss": 2.2625, "step": 16642 }, { "epoch": 0.8928648068669528, "grad_norm": 0.494140625, "learning_rate": 4.716110245513799e-06, "loss": 2.4416, "step": 16643 }, { "epoch": 0.8929184549356223, "grad_norm": 0.546875, "learning_rate": 4.716070034639854e-06, "loss": 2.336, "step": 16644 }, { "epoch": 0.8929721030042919, "grad_norm": 0.494140625, "learning_rate": 4.716029821089775e-06, "loss": 2.1339, "step": 16645 }, { "epoch": 0.8930257510729613, "grad_norm": 0.466796875, "learning_rate": 4.71598960486361e-06, "loss": 2.3116, "step": 16646 }, { "epoch": 0.8930793991416309, "grad_norm": 0.50390625, "learning_rate": 4.715949385961407e-06, "loss": 2.2236, "step": 16647 }, { "epoch": 0.8931330472103004, "grad_norm": 0.470703125, "learning_rate": 4.715909164383215e-06, "loss": 2.2362, "step": 16648 }, { "epoch": 0.89318669527897, "grad_norm": 0.49609375, "learning_rate": 4.715868940129085e-06, "loss": 2.2867, "step": 16649 }, { "epoch": 0.8932403433476395, "grad_norm": 0.439453125, "learning_rate": 4.715828713199061e-06, "loss": 2.3485, "step": 16650 }, { "epoch": 0.893293991416309, "grad_norm": 0.453125, "learning_rate": 4.7157884835931955e-06, "loss": 2.2146, "step": 16651 }, { "epoch": 0.8933476394849785, "grad_norm": 0.42578125, "learning_rate": 4.715748251311535e-06, "loss": 2.2375, "step": 16652 }, { "epoch": 0.8934012875536481, "grad_norm": 0.53515625, "learning_rate": 4.715708016354128e-06, "loss": 2.3867, "step": 16653 }, { "epoch": 0.8934549356223176, "grad_norm": 0.51171875, "learning_rate": 4.715667778721025e-06, "loss": 2.2094, "step": 16654 }, { "epoch": 0.8935085836909872, "grad_norm": 0.486328125, "learning_rate": 4.715627538412272e-06, "loss": 2.4704, "step": 16655 }, { "epoch": 0.8935622317596567, "grad_norm": 0.451171875, "learning_rate": 4.715587295427919e-06, "loss": 2.1172, "step": 16656 }, { "epoch": 0.8936158798283261, "grad_norm": 0.482421875, "learning_rate": 4.715547049768015e-06, "loss": 2.3882, "step": 16657 }, { "epoch": 0.8936695278969957, "grad_norm": 0.423828125, "learning_rate": 4.715506801432608e-06, "loss": 2.2842, "step": 16658 }, { "epoch": 0.8937231759656652, "grad_norm": 0.416015625, "learning_rate": 4.715466550421747e-06, "loss": 2.087, "step": 16659 }, { "epoch": 0.8937768240343348, "grad_norm": 0.466796875, "learning_rate": 4.71542629673548e-06, "loss": 2.2785, "step": 16660 }, { "epoch": 0.8938304721030043, "grad_norm": 0.4609375, "learning_rate": 4.715386040373856e-06, "loss": 2.2315, "step": 16661 }, { "epoch": 0.8938841201716738, "grad_norm": 0.70703125, "learning_rate": 4.715345781336923e-06, "loss": 2.4957, "step": 16662 }, { "epoch": 0.8939377682403433, "grad_norm": 0.5234375, "learning_rate": 4.71530551962473e-06, "loss": 2.4036, "step": 16663 }, { "epoch": 0.8939914163090129, "grad_norm": 0.486328125, "learning_rate": 4.715265255237327e-06, "loss": 2.2096, "step": 16664 }, { "epoch": 0.8940450643776824, "grad_norm": 0.498046875, "learning_rate": 4.7152249881747605e-06, "loss": 2.2854, "step": 16665 }, { "epoch": 0.894098712446352, "grad_norm": 0.45703125, "learning_rate": 4.71518471843708e-06, "loss": 2.4846, "step": 16666 }, { "epoch": 0.8941523605150214, "grad_norm": 0.4921875, "learning_rate": 4.715144446024333e-06, "loss": 2.3479, "step": 16667 }, { "epoch": 0.894206008583691, "grad_norm": 0.490234375, "learning_rate": 4.715104170936572e-06, "loss": 2.2703, "step": 16668 }, { "epoch": 0.8942596566523605, "grad_norm": 0.71875, "learning_rate": 4.715063893173841e-06, "loss": 2.0941, "step": 16669 }, { "epoch": 0.89431330472103, "grad_norm": 0.5703125, "learning_rate": 4.7150236127361905e-06, "loss": 2.2287, "step": 16670 }, { "epoch": 0.8943669527896996, "grad_norm": 0.490234375, "learning_rate": 4.71498332962367e-06, "loss": 2.349, "step": 16671 }, { "epoch": 0.894420600858369, "grad_norm": 0.45703125, "learning_rate": 4.714943043836326e-06, "loss": 2.5314, "step": 16672 }, { "epoch": 0.8944742489270386, "grad_norm": 0.365234375, "learning_rate": 4.71490275537421e-06, "loss": 2.181, "step": 16673 }, { "epoch": 0.8945278969957081, "grad_norm": 0.44921875, "learning_rate": 4.714862464237369e-06, "loss": 2.1284, "step": 16674 }, { "epoch": 0.8945815450643777, "grad_norm": 0.44921875, "learning_rate": 4.714822170425851e-06, "loss": 2.3908, "step": 16675 }, { "epoch": 0.8946351931330472, "grad_norm": 0.484375, "learning_rate": 4.714781873939706e-06, "loss": 2.4071, "step": 16676 }, { "epoch": 0.8946888412017168, "grad_norm": 0.5, "learning_rate": 4.714741574778982e-06, "loss": 2.46, "step": 16677 }, { "epoch": 0.8947424892703862, "grad_norm": 0.51953125, "learning_rate": 4.714701272943727e-06, "loss": 2.3036, "step": 16678 }, { "epoch": 0.8947961373390558, "grad_norm": 0.64453125, "learning_rate": 4.7146609684339915e-06, "loss": 2.1309, "step": 16679 }, { "epoch": 0.8948497854077253, "grad_norm": 0.40234375, "learning_rate": 4.714620661249822e-06, "loss": 2.2952, "step": 16680 }, { "epoch": 0.8949034334763949, "grad_norm": 0.412109375, "learning_rate": 4.71458035139127e-06, "loss": 1.9572, "step": 16681 }, { "epoch": 0.8949570815450644, "grad_norm": 0.73828125, "learning_rate": 4.714540038858382e-06, "loss": 2.3002, "step": 16682 }, { "epoch": 0.895010729613734, "grad_norm": 0.43359375, "learning_rate": 4.714499723651206e-06, "loss": 2.4292, "step": 16683 }, { "epoch": 0.8950643776824034, "grad_norm": 0.4609375, "learning_rate": 4.714459405769792e-06, "loss": 2.0382, "step": 16684 }, { "epoch": 0.8951180257510729, "grad_norm": 0.404296875, "learning_rate": 4.714419085214189e-06, "loss": 2.0845, "step": 16685 }, { "epoch": 0.8951716738197425, "grad_norm": 0.478515625, "learning_rate": 4.714378761984446e-06, "loss": 2.188, "step": 16686 }, { "epoch": 0.895225321888412, "grad_norm": 0.416015625, "learning_rate": 4.71433843608061e-06, "loss": 2.0758, "step": 16687 }, { "epoch": 0.8952789699570816, "grad_norm": 0.6953125, "learning_rate": 4.714298107502731e-06, "loss": 2.404, "step": 16688 }, { "epoch": 0.895332618025751, "grad_norm": 0.46484375, "learning_rate": 4.7142577762508565e-06, "loss": 2.3459, "step": 16689 }, { "epoch": 0.8953862660944206, "grad_norm": 0.52734375, "learning_rate": 4.714217442325036e-06, "loss": 2.1147, "step": 16690 }, { "epoch": 0.8954399141630901, "grad_norm": 0.462890625, "learning_rate": 4.714177105725319e-06, "loss": 2.3578, "step": 16691 }, { "epoch": 0.8954935622317597, "grad_norm": 0.59765625, "learning_rate": 4.714136766451753e-06, "loss": 2.7477, "step": 16692 }, { "epoch": 0.8955472103004292, "grad_norm": 0.640625, "learning_rate": 4.714096424504388e-06, "loss": 2.2688, "step": 16693 }, { "epoch": 0.8956008583690988, "grad_norm": 0.462890625, "learning_rate": 4.7140560798832715e-06, "loss": 2.0698, "step": 16694 }, { "epoch": 0.8956545064377682, "grad_norm": 0.62109375, "learning_rate": 4.7140157325884516e-06, "loss": 2.4399, "step": 16695 }, { "epoch": 0.8957081545064378, "grad_norm": 0.4765625, "learning_rate": 4.713975382619979e-06, "loss": 2.3546, "step": 16696 }, { "epoch": 0.8957618025751073, "grad_norm": 0.5703125, "learning_rate": 4.713935029977901e-06, "loss": 2.4918, "step": 16697 }, { "epoch": 0.8958154506437769, "grad_norm": 0.4375, "learning_rate": 4.713894674662267e-06, "loss": 2.1669, "step": 16698 }, { "epoch": 0.8958690987124464, "grad_norm": 0.490234375, "learning_rate": 4.713854316673126e-06, "loss": 2.4297, "step": 16699 }, { "epoch": 0.8959227467811158, "grad_norm": 0.447265625, "learning_rate": 4.713813956010524e-06, "loss": 2.4203, "step": 16700 }, { "epoch": 0.8959763948497854, "grad_norm": 0.443359375, "learning_rate": 4.713773592674514e-06, "loss": 2.2068, "step": 16701 }, { "epoch": 0.8960300429184549, "grad_norm": 0.5, "learning_rate": 4.713733226665142e-06, "loss": 2.1845, "step": 16702 }, { "epoch": 0.8960836909871245, "grad_norm": 0.478515625, "learning_rate": 4.713692857982458e-06, "loss": 2.3749, "step": 16703 }, { "epoch": 0.896137339055794, "grad_norm": 0.53515625, "learning_rate": 4.7136524866265106e-06, "loss": 2.6247, "step": 16704 }, { "epoch": 0.8961909871244635, "grad_norm": 0.51171875, "learning_rate": 4.713612112597347e-06, "loss": 2.0409, "step": 16705 }, { "epoch": 0.896244635193133, "grad_norm": 0.466796875, "learning_rate": 4.713571735895018e-06, "loss": 2.2321, "step": 16706 }, { "epoch": 0.8962982832618026, "grad_norm": 0.390625, "learning_rate": 4.7135313565195705e-06, "loss": 1.9408, "step": 16707 }, { "epoch": 0.8963519313304721, "grad_norm": 0.470703125, "learning_rate": 4.713490974471056e-06, "loss": 2.0805, "step": 16708 }, { "epoch": 0.8964055793991417, "grad_norm": 0.48046875, "learning_rate": 4.71345058974952e-06, "loss": 2.4156, "step": 16709 }, { "epoch": 0.8964592274678111, "grad_norm": 0.451171875, "learning_rate": 4.7134102023550135e-06, "loss": 2.1129, "step": 16710 }, { "epoch": 0.8965128755364807, "grad_norm": 0.400390625, "learning_rate": 4.713369812287584e-06, "loss": 2.1391, "step": 16711 }, { "epoch": 0.8965665236051502, "grad_norm": 0.478515625, "learning_rate": 4.71332941954728e-06, "loss": 2.4057, "step": 16712 }, { "epoch": 0.8966201716738197, "grad_norm": 0.46875, "learning_rate": 4.7132890241341525e-06, "loss": 2.245, "step": 16713 }, { "epoch": 0.8966738197424893, "grad_norm": 0.8515625, "learning_rate": 4.713248626048249e-06, "loss": 2.2575, "step": 16714 }, { "epoch": 0.8967274678111588, "grad_norm": 0.5078125, "learning_rate": 4.713208225289617e-06, "loss": 2.4806, "step": 16715 }, { "epoch": 0.8967811158798283, "grad_norm": 0.4609375, "learning_rate": 4.713167821858307e-06, "loss": 2.286, "step": 16716 }, { "epoch": 0.8968347639484978, "grad_norm": 0.494140625, "learning_rate": 4.713127415754367e-06, "loss": 2.3337, "step": 16717 }, { "epoch": 0.8968884120171674, "grad_norm": 0.4609375, "learning_rate": 4.7130870069778465e-06, "loss": 2.2295, "step": 16718 }, { "epoch": 0.8969420600858369, "grad_norm": 0.47265625, "learning_rate": 4.713046595528794e-06, "loss": 2.2045, "step": 16719 }, { "epoch": 0.8969957081545065, "grad_norm": 0.578125, "learning_rate": 4.713006181407258e-06, "loss": 2.1637, "step": 16720 }, { "epoch": 0.8970493562231759, "grad_norm": 0.419921875, "learning_rate": 4.712965764613287e-06, "loss": 2.1184, "step": 16721 }, { "epoch": 0.8971030042918455, "grad_norm": 1.890625, "learning_rate": 4.712925345146931e-06, "loss": 1.9566, "step": 16722 }, { "epoch": 0.897156652360515, "grad_norm": 0.55078125, "learning_rate": 4.712884923008237e-06, "loss": 2.1995, "step": 16723 }, { "epoch": 0.8972103004291846, "grad_norm": 1.765625, "learning_rate": 4.712844498197256e-06, "loss": 2.6244, "step": 16724 }, { "epoch": 0.8972639484978541, "grad_norm": 0.4765625, "learning_rate": 4.712804070714035e-06, "loss": 2.5658, "step": 16725 }, { "epoch": 0.8973175965665237, "grad_norm": 0.431640625, "learning_rate": 4.7127636405586236e-06, "loss": 2.4908, "step": 16726 }, { "epoch": 0.8973712446351931, "grad_norm": 0.482421875, "learning_rate": 4.7127232077310705e-06, "loss": 2.2916, "step": 16727 }, { "epoch": 0.8974248927038626, "grad_norm": 0.44921875, "learning_rate": 4.712682772231425e-06, "loss": 2.1469, "step": 16728 }, { "epoch": 0.8974785407725322, "grad_norm": 0.48046875, "learning_rate": 4.712642334059735e-06, "loss": 2.2783, "step": 16729 }, { "epoch": 0.8975321888412017, "grad_norm": 0.4453125, "learning_rate": 4.71260189321605e-06, "loss": 2.3101, "step": 16730 }, { "epoch": 0.8975858369098713, "grad_norm": 0.498046875, "learning_rate": 4.712561449700418e-06, "loss": 1.9226, "step": 16731 }, { "epoch": 0.8976394849785407, "grad_norm": 0.48046875, "learning_rate": 4.71252100351289e-06, "loss": 2.2849, "step": 16732 }, { "epoch": 0.8976931330472103, "grad_norm": 0.474609375, "learning_rate": 4.712480554653513e-06, "loss": 2.3194, "step": 16733 }, { "epoch": 0.8977467811158798, "grad_norm": 0.427734375, "learning_rate": 4.712440103122335e-06, "loss": 2.2085, "step": 16734 }, { "epoch": 0.8978004291845494, "grad_norm": 0.375, "learning_rate": 4.712399648919407e-06, "loss": 2.1898, "step": 16735 }, { "epoch": 0.8978540772532189, "grad_norm": 1.2890625, "learning_rate": 4.712359192044776e-06, "loss": 2.1538, "step": 16736 }, { "epoch": 0.8979077253218885, "grad_norm": 0.5234375, "learning_rate": 4.712318732498493e-06, "loss": 2.166, "step": 16737 }, { "epoch": 0.8979613733905579, "grad_norm": 0.48828125, "learning_rate": 4.712278270280605e-06, "loss": 2.3937, "step": 16738 }, { "epoch": 0.8980150214592275, "grad_norm": 0.4296875, "learning_rate": 4.712237805391161e-06, "loss": 2.198, "step": 16739 }, { "epoch": 0.898068669527897, "grad_norm": 0.50390625, "learning_rate": 4.712197337830211e-06, "loss": 2.2305, "step": 16740 }, { "epoch": 0.8981223175965666, "grad_norm": 0.54296875, "learning_rate": 4.712156867597803e-06, "loss": 2.1755, "step": 16741 }, { "epoch": 0.898175965665236, "grad_norm": 1.4296875, "learning_rate": 4.712116394693986e-06, "loss": 2.2719, "step": 16742 }, { "epoch": 0.8982296137339055, "grad_norm": 0.494140625, "learning_rate": 4.712075919118809e-06, "loss": 2.4433, "step": 16743 }, { "epoch": 0.8982832618025751, "grad_norm": 0.41796875, "learning_rate": 4.712035440872321e-06, "loss": 2.3465, "step": 16744 }, { "epoch": 0.8983369098712446, "grad_norm": 0.458984375, "learning_rate": 4.71199495995457e-06, "loss": 2.3354, "step": 16745 }, { "epoch": 0.8983905579399142, "grad_norm": 0.470703125, "learning_rate": 4.711954476365607e-06, "loss": 2.3363, "step": 16746 }, { "epoch": 0.8984442060085837, "grad_norm": 0.396484375, "learning_rate": 4.7119139901054775e-06, "loss": 2.0453, "step": 16747 }, { "epoch": 0.8984978540772532, "grad_norm": 0.458984375, "learning_rate": 4.711873501174234e-06, "loss": 2.231, "step": 16748 }, { "epoch": 0.8985515021459227, "grad_norm": 0.423828125, "learning_rate": 4.7118330095719236e-06, "loss": 1.6363, "step": 16749 }, { "epoch": 0.8986051502145923, "grad_norm": 0.53125, "learning_rate": 4.711792515298595e-06, "loss": 2.1137, "step": 16750 }, { "epoch": 0.8986587982832618, "grad_norm": 0.44921875, "learning_rate": 4.7117520183542975e-06, "loss": 2.4051, "step": 16751 }, { "epoch": 0.8987124463519314, "grad_norm": 0.546875, "learning_rate": 4.711711518739079e-06, "loss": 2.2405, "step": 16752 }, { "epoch": 0.8987660944206008, "grad_norm": 0.4921875, "learning_rate": 4.711671016452991e-06, "loss": 2.3241, "step": 16753 }, { "epoch": 0.8988197424892704, "grad_norm": 0.515625, "learning_rate": 4.71163051149608e-06, "loss": 2.3062, "step": 16754 }, { "epoch": 0.8988733905579399, "grad_norm": 0.4921875, "learning_rate": 4.711590003868396e-06, "loss": 2.3132, "step": 16755 }, { "epoch": 0.8989270386266094, "grad_norm": 0.4375, "learning_rate": 4.7115494935699875e-06, "loss": 2.3508, "step": 16756 }, { "epoch": 0.898980686695279, "grad_norm": 10.625, "learning_rate": 4.711508980600904e-06, "loss": 2.0821, "step": 16757 }, { "epoch": 0.8990343347639485, "grad_norm": 0.4609375, "learning_rate": 4.711468464961193e-06, "loss": 2.1557, "step": 16758 }, { "epoch": 0.899087982832618, "grad_norm": 0.421875, "learning_rate": 4.711427946650905e-06, "loss": 2.2296, "step": 16759 }, { "epoch": 0.8991416309012875, "grad_norm": 0.470703125, "learning_rate": 4.7113874256700885e-06, "loss": 2.2824, "step": 16760 }, { "epoch": 0.8991952789699571, "grad_norm": 0.408203125, "learning_rate": 4.711346902018792e-06, "loss": 2.2768, "step": 16761 }, { "epoch": 0.8992489270386266, "grad_norm": 0.53515625, "learning_rate": 4.711306375697064e-06, "loss": 2.5995, "step": 16762 }, { "epoch": 0.8993025751072962, "grad_norm": 0.46875, "learning_rate": 4.711265846704955e-06, "loss": 2.3122, "step": 16763 }, { "epoch": 0.8993562231759656, "grad_norm": 0.412109375, "learning_rate": 4.711225315042513e-06, "loss": 2.422, "step": 16764 }, { "epoch": 0.8994098712446352, "grad_norm": 0.46875, "learning_rate": 4.7111847807097875e-06, "loss": 2.3984, "step": 16765 }, { "epoch": 0.8994635193133047, "grad_norm": 0.4609375, "learning_rate": 4.7111442437068255e-06, "loss": 2.3181, "step": 16766 }, { "epoch": 0.8995171673819743, "grad_norm": 0.4453125, "learning_rate": 4.7111037040336784e-06, "loss": 2.3211, "step": 16767 }, { "epoch": 0.8995708154506438, "grad_norm": 0.474609375, "learning_rate": 4.711063161690395e-06, "loss": 2.2875, "step": 16768 }, { "epoch": 0.8996244635193134, "grad_norm": 0.4140625, "learning_rate": 4.711022616677023e-06, "loss": 2.0459, "step": 16769 }, { "epoch": 0.8996781115879828, "grad_norm": 0.462890625, "learning_rate": 4.71098206899361e-06, "loss": 2.2598, "step": 16770 }, { "epoch": 0.8997317596566523, "grad_norm": 0.54296875, "learning_rate": 4.710941518640209e-06, "loss": 2.0259, "step": 16771 }, { "epoch": 0.8997854077253219, "grad_norm": 0.455078125, "learning_rate": 4.710900965616865e-06, "loss": 2.3071, "step": 16772 }, { "epoch": 0.8998390557939914, "grad_norm": 0.5234375, "learning_rate": 4.71086040992363e-06, "loss": 2.6727, "step": 16773 }, { "epoch": 0.899892703862661, "grad_norm": 0.498046875, "learning_rate": 4.710819851560551e-06, "loss": 2.3419, "step": 16774 }, { "epoch": 0.8999463519313304, "grad_norm": 1.875, "learning_rate": 4.710779290527678e-06, "loss": 2.4541, "step": 16775 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 4.710738726825059e-06, "loss": 2.3535, "step": 16776 }, { "epoch": 0.9000536480686695, "grad_norm": 0.416015625, "learning_rate": 4.710698160452745e-06, "loss": 2.1425, "step": 16777 }, { "epoch": 0.9001072961373391, "grad_norm": 0.51953125, "learning_rate": 4.710657591410782e-06, "loss": 2.2354, "step": 16778 }, { "epoch": 0.9001609442060086, "grad_norm": 0.55859375, "learning_rate": 4.710617019699222e-06, "loss": 2.4895, "step": 16779 }, { "epoch": 0.9002145922746781, "grad_norm": 0.4921875, "learning_rate": 4.710576445318111e-06, "loss": 2.2857, "step": 16780 }, { "epoch": 0.9002682403433476, "grad_norm": 0.53125, "learning_rate": 4.710535868267502e-06, "loss": 2.1785, "step": 16781 }, { "epoch": 0.9003218884120172, "grad_norm": 0.486328125, "learning_rate": 4.71049528854744e-06, "loss": 2.0971, "step": 16782 }, { "epoch": 0.9003755364806867, "grad_norm": 0.41796875, "learning_rate": 4.710454706157975e-06, "loss": 2.1797, "step": 16783 }, { "epoch": 0.9004291845493563, "grad_norm": 0.419921875, "learning_rate": 4.710414121099158e-06, "loss": 2.0552, "step": 16784 }, { "epoch": 0.9004828326180258, "grad_norm": 0.52734375, "learning_rate": 4.7103735333710354e-06, "loss": 2.0116, "step": 16785 }, { "epoch": 0.9005364806866952, "grad_norm": 0.5, "learning_rate": 4.710332942973658e-06, "loss": 2.073, "step": 16786 }, { "epoch": 0.9005901287553648, "grad_norm": 0.5234375, "learning_rate": 4.710292349907075e-06, "loss": 2.2605, "step": 16787 }, { "epoch": 0.9006437768240343, "grad_norm": 0.48828125, "learning_rate": 4.710251754171333e-06, "loss": 2.3634, "step": 16788 }, { "epoch": 0.9006974248927039, "grad_norm": 0.4765625, "learning_rate": 4.7102111557664845e-06, "loss": 2.3547, "step": 16789 }, { "epoch": 0.9007510729613734, "grad_norm": 0.59765625, "learning_rate": 4.710170554692576e-06, "loss": 2.1238, "step": 16790 }, { "epoch": 0.9008047210300429, "grad_norm": 0.515625, "learning_rate": 4.7101299509496565e-06, "loss": 2.1912, "step": 16791 }, { "epoch": 0.9008583690987124, "grad_norm": 0.43359375, "learning_rate": 4.710089344537777e-06, "loss": 2.3444, "step": 16792 }, { "epoch": 0.900912017167382, "grad_norm": 0.484375, "learning_rate": 4.7100487354569845e-06, "loss": 2.2445, "step": 16793 }, { "epoch": 0.9009656652360515, "grad_norm": 0.49609375, "learning_rate": 4.710008123707329e-06, "loss": 2.425, "step": 16794 }, { "epoch": 0.9010193133047211, "grad_norm": 0.5546875, "learning_rate": 4.70996750928886e-06, "loss": 2.5078, "step": 16795 }, { "epoch": 0.9010729613733905, "grad_norm": 0.45703125, "learning_rate": 4.709926892201625e-06, "loss": 2.3537, "step": 16796 }, { "epoch": 0.9011266094420601, "grad_norm": 1.0703125, "learning_rate": 4.7098862724456755e-06, "loss": 2.2973, "step": 16797 }, { "epoch": 0.9011802575107296, "grad_norm": 0.4765625, "learning_rate": 4.709845650021058e-06, "loss": 2.4202, "step": 16798 }, { "epoch": 0.9012339055793992, "grad_norm": 0.4296875, "learning_rate": 4.709805024927823e-06, "loss": 2.2141, "step": 16799 }, { "epoch": 0.9012875536480687, "grad_norm": 0.404296875, "learning_rate": 4.709764397166018e-06, "loss": 2.1066, "step": 16800 }, { "epoch": 0.9013412017167381, "grad_norm": 0.55859375, "learning_rate": 4.709723766735695e-06, "loss": 2.585, "step": 16801 }, { "epoch": 0.9013948497854077, "grad_norm": 0.56640625, "learning_rate": 4.7096831336369e-06, "loss": 2.3248, "step": 16802 }, { "epoch": 0.9014484978540772, "grad_norm": 0.50390625, "learning_rate": 4.709642497869683e-06, "loss": 2.3664, "step": 16803 }, { "epoch": 0.9015021459227468, "grad_norm": 1.171875, "learning_rate": 4.709601859434094e-06, "loss": 2.3364, "step": 16804 }, { "epoch": 0.9015557939914163, "grad_norm": 0.4921875, "learning_rate": 4.709561218330182e-06, "loss": 2.3182, "step": 16805 }, { "epoch": 0.9016094420600859, "grad_norm": 0.482421875, "learning_rate": 4.7095205745579955e-06, "loss": 2.1542, "step": 16806 }, { "epoch": 0.9016630901287553, "grad_norm": 17.75, "learning_rate": 4.709479928117584e-06, "loss": 2.2111, "step": 16807 }, { "epoch": 0.9017167381974249, "grad_norm": 0.46875, "learning_rate": 4.709439279008995e-06, "loss": 2.2702, "step": 16808 }, { "epoch": 0.9017703862660944, "grad_norm": 0.5390625, "learning_rate": 4.70939862723228e-06, "loss": 2.3394, "step": 16809 }, { "epoch": 0.901824034334764, "grad_norm": 0.435546875, "learning_rate": 4.709357972787486e-06, "loss": 2.3195, "step": 16810 }, { "epoch": 0.9018776824034335, "grad_norm": 0.392578125, "learning_rate": 4.7093173156746645e-06, "loss": 2.2443, "step": 16811 }, { "epoch": 0.901931330472103, "grad_norm": 0.486328125, "learning_rate": 4.709276655893862e-06, "loss": 2.2586, "step": 16812 }, { "epoch": 0.9019849785407725, "grad_norm": 0.546875, "learning_rate": 4.709235993445128e-06, "loss": 2.1631, "step": 16813 }, { "epoch": 0.902038626609442, "grad_norm": 0.40625, "learning_rate": 4.709195328328514e-06, "loss": 1.9777, "step": 16814 }, { "epoch": 0.9020922746781116, "grad_norm": 0.423828125, "learning_rate": 4.709154660544066e-06, "loss": 2.0425, "step": 16815 }, { "epoch": 0.9021459227467811, "grad_norm": 0.431640625, "learning_rate": 4.709113990091836e-06, "loss": 2.2952, "step": 16816 }, { "epoch": 0.9021995708154507, "grad_norm": 0.404296875, "learning_rate": 4.709073316971871e-06, "loss": 2.4809, "step": 16817 }, { "epoch": 0.9022532188841201, "grad_norm": 0.423828125, "learning_rate": 4.709032641184221e-06, "loss": 2.1937, "step": 16818 }, { "epoch": 0.9023068669527897, "grad_norm": 0.59375, "learning_rate": 4.708991962728934e-06, "loss": 2.3952, "step": 16819 }, { "epoch": 0.9023605150214592, "grad_norm": 0.84765625, "learning_rate": 4.7089512816060605e-06, "loss": 1.3163, "step": 16820 }, { "epoch": 0.9024141630901288, "grad_norm": 0.46875, "learning_rate": 4.708910597815649e-06, "loss": 2.2905, "step": 16821 }, { "epoch": 0.9024678111587983, "grad_norm": 0.51171875, "learning_rate": 4.70886991135775e-06, "loss": 2.2135, "step": 16822 }, { "epoch": 0.9025214592274678, "grad_norm": 0.515625, "learning_rate": 4.70882922223241e-06, "loss": 2.6306, "step": 16823 }, { "epoch": 0.9025751072961373, "grad_norm": 0.51171875, "learning_rate": 4.70878853043968e-06, "loss": 2.3586, "step": 16824 }, { "epoch": 0.9026287553648069, "grad_norm": 0.453125, "learning_rate": 4.708747835979608e-06, "loss": 2.3762, "step": 16825 }, { "epoch": 0.9026824034334764, "grad_norm": 0.451171875, "learning_rate": 4.708707138852245e-06, "loss": 2.2058, "step": 16826 }, { "epoch": 0.902736051502146, "grad_norm": 0.392578125, "learning_rate": 4.708666439057638e-06, "loss": 1.9084, "step": 16827 }, { "epoch": 0.9027896995708155, "grad_norm": 0.498046875, "learning_rate": 4.708625736595838e-06, "loss": 2.4415, "step": 16828 }, { "epoch": 0.9028433476394849, "grad_norm": 0.56640625, "learning_rate": 4.708585031466894e-06, "loss": 2.2687, "step": 16829 }, { "epoch": 0.9028969957081545, "grad_norm": 0.41796875, "learning_rate": 4.7085443236708525e-06, "loss": 2.2242, "step": 16830 }, { "epoch": 0.902950643776824, "grad_norm": 0.404296875, "learning_rate": 4.7085036132077655e-06, "loss": 2.1514, "step": 16831 }, { "epoch": 0.9030042918454936, "grad_norm": 0.443359375, "learning_rate": 4.708462900077682e-06, "loss": 2.3484, "step": 16832 }, { "epoch": 0.903057939914163, "grad_norm": 0.5546875, "learning_rate": 4.708422184280649e-06, "loss": 2.3717, "step": 16833 }, { "epoch": 0.9031115879828326, "grad_norm": 0.4296875, "learning_rate": 4.708381465816718e-06, "loss": 2.3771, "step": 16834 }, { "epoch": 0.9031652360515021, "grad_norm": 0.5078125, "learning_rate": 4.708340744685937e-06, "loss": 2.3733, "step": 16835 }, { "epoch": 0.9032188841201717, "grad_norm": 0.87109375, "learning_rate": 4.708300020888355e-06, "loss": 2.185, "step": 16836 }, { "epoch": 0.9032725321888412, "grad_norm": 0.4453125, "learning_rate": 4.708259294424022e-06, "loss": 2.5128, "step": 16837 }, { "epoch": 0.9033261802575108, "grad_norm": 0.5859375, "learning_rate": 4.708218565292987e-06, "loss": 2.5605, "step": 16838 }, { "epoch": 0.9033798283261802, "grad_norm": 0.4140625, "learning_rate": 4.7081778334953e-06, "loss": 2.3107, "step": 16839 }, { "epoch": 0.9034334763948498, "grad_norm": 0.4765625, "learning_rate": 4.708137099031007e-06, "loss": 2.0206, "step": 16840 }, { "epoch": 0.9034871244635193, "grad_norm": 0.451171875, "learning_rate": 4.708096361900161e-06, "loss": 2.2788, "step": 16841 }, { "epoch": 0.9035407725321889, "grad_norm": 0.51171875, "learning_rate": 4.708055622102809e-06, "loss": 2.3957, "step": 16842 }, { "epoch": 0.9035944206008584, "grad_norm": 0.431640625, "learning_rate": 4.708014879639e-06, "loss": 2.3427, "step": 16843 }, { "epoch": 0.9036480686695278, "grad_norm": 1.046875, "learning_rate": 4.707974134508785e-06, "loss": 2.175, "step": 16844 }, { "epoch": 0.9037017167381974, "grad_norm": 0.5, "learning_rate": 4.7079333867122125e-06, "loss": 2.4912, "step": 16845 }, { "epoch": 0.9037553648068669, "grad_norm": 0.51171875, "learning_rate": 4.70789263624933e-06, "loss": 2.1661, "step": 16846 }, { "epoch": 0.9038090128755365, "grad_norm": 0.45703125, "learning_rate": 4.707851883120189e-06, "loss": 2.4656, "step": 16847 }, { "epoch": 0.903862660944206, "grad_norm": 0.498046875, "learning_rate": 4.7078111273248374e-06, "loss": 2.0302, "step": 16848 }, { "epoch": 0.9039163090128756, "grad_norm": 0.40625, "learning_rate": 4.707770368863325e-06, "loss": 1.6158, "step": 16849 }, { "epoch": 0.903969957081545, "grad_norm": 0.4765625, "learning_rate": 4.707729607735702e-06, "loss": 2.3366, "step": 16850 }, { "epoch": 0.9040236051502146, "grad_norm": 0.48828125, "learning_rate": 4.707688843942014e-06, "loss": 2.2047, "step": 16851 }, { "epoch": 0.9040772532188841, "grad_norm": 0.9609375, "learning_rate": 4.707648077482314e-06, "loss": 2.3696, "step": 16852 }, { "epoch": 0.9041309012875537, "grad_norm": 0.4296875, "learning_rate": 4.7076073083566494e-06, "loss": 2.2099, "step": 16853 }, { "epoch": 0.9041845493562232, "grad_norm": 0.52734375, "learning_rate": 4.707566536565071e-06, "loss": 2.464, "step": 16854 }, { "epoch": 0.9042381974248928, "grad_norm": 0.5625, "learning_rate": 4.707525762107627e-06, "loss": 2.3567, "step": 16855 }, { "epoch": 0.9042918454935622, "grad_norm": 0.5078125, "learning_rate": 4.707484984984365e-06, "loss": 2.3522, "step": 16856 }, { "epoch": 0.9043454935622317, "grad_norm": 0.53515625, "learning_rate": 4.707444205195337e-06, "loss": 2.3934, "step": 16857 }, { "epoch": 0.9043991416309013, "grad_norm": 0.5390625, "learning_rate": 4.707403422740591e-06, "loss": 2.1816, "step": 16858 }, { "epoch": 0.9044527896995708, "grad_norm": 0.494140625, "learning_rate": 4.707362637620176e-06, "loss": 2.5101, "step": 16859 }, { "epoch": 0.9045064377682404, "grad_norm": 0.625, "learning_rate": 4.7073218498341426e-06, "loss": 2.3294, "step": 16860 }, { "epoch": 0.9045600858369098, "grad_norm": 0.98046875, "learning_rate": 4.7072810593825385e-06, "loss": 2.2213, "step": 16861 }, { "epoch": 0.9046137339055794, "grad_norm": 0.44921875, "learning_rate": 4.707240266265414e-06, "loss": 1.9858, "step": 16862 }, { "epoch": 0.9046673819742489, "grad_norm": 0.78125, "learning_rate": 4.707199470482817e-06, "loss": 2.104, "step": 16863 }, { "epoch": 0.9047210300429185, "grad_norm": 0.474609375, "learning_rate": 4.707158672034797e-06, "loss": 2.5625, "step": 16864 }, { "epoch": 0.904774678111588, "grad_norm": 0.453125, "learning_rate": 4.707117870921406e-06, "loss": 2.1967, "step": 16865 }, { "epoch": 0.9048283261802575, "grad_norm": 0.54296875, "learning_rate": 4.707077067142689e-06, "loss": 2.2689, "step": 16866 }, { "epoch": 0.904881974248927, "grad_norm": 0.44921875, "learning_rate": 4.7070362606986995e-06, "loss": 1.6632, "step": 16867 }, { "epoch": 0.9049356223175966, "grad_norm": 0.52734375, "learning_rate": 4.706995451589483e-06, "loss": 2.5191, "step": 16868 }, { "epoch": 0.9049892703862661, "grad_norm": 0.62890625, "learning_rate": 4.706954639815091e-06, "loss": 2.2582, "step": 16869 }, { "epoch": 0.9050429184549357, "grad_norm": 0.4609375, "learning_rate": 4.706913825375573e-06, "loss": 2.1387, "step": 16870 }, { "epoch": 0.9050965665236052, "grad_norm": 0.40234375, "learning_rate": 4.706873008270977e-06, "loss": 2.1594, "step": 16871 }, { "epoch": 0.9051502145922746, "grad_norm": 0.46484375, "learning_rate": 4.7068321885013536e-06, "loss": 1.7409, "step": 16872 }, { "epoch": 0.9052038626609442, "grad_norm": 0.38671875, "learning_rate": 4.706791366066751e-06, "loss": 1.9926, "step": 16873 }, { "epoch": 0.9052575107296137, "grad_norm": 0.4609375, "learning_rate": 4.706750540967219e-06, "loss": 2.2946, "step": 16874 }, { "epoch": 0.9053111587982833, "grad_norm": 0.52734375, "learning_rate": 4.706709713202806e-06, "loss": 2.4508, "step": 16875 }, { "epoch": 0.9053648068669528, "grad_norm": 0.380859375, "learning_rate": 4.706668882773563e-06, "loss": 2.3105, "step": 16876 }, { "epoch": 0.9054184549356223, "grad_norm": 0.53125, "learning_rate": 4.706628049679538e-06, "loss": 2.2373, "step": 16877 }, { "epoch": 0.9054721030042918, "grad_norm": 0.474609375, "learning_rate": 4.70658721392078e-06, "loss": 2.1114, "step": 16878 }, { "epoch": 0.9055257510729614, "grad_norm": 0.46484375, "learning_rate": 4.7065463754973396e-06, "loss": 2.1206, "step": 16879 }, { "epoch": 0.9055793991416309, "grad_norm": 0.474609375, "learning_rate": 4.706505534409266e-06, "loss": 1.975, "step": 16880 }, { "epoch": 0.9056330472103005, "grad_norm": 0.392578125, "learning_rate": 4.706464690656607e-06, "loss": 2.2067, "step": 16881 }, { "epoch": 0.9056866952789699, "grad_norm": 0.65234375, "learning_rate": 4.706423844239414e-06, "loss": 2.1431, "step": 16882 }, { "epoch": 0.9057403433476395, "grad_norm": 0.48046875, "learning_rate": 4.706382995157734e-06, "loss": 2.149, "step": 16883 }, { "epoch": 0.905793991416309, "grad_norm": 0.56640625, "learning_rate": 4.706342143411619e-06, "loss": 2.2955, "step": 16884 }, { "epoch": 0.9058476394849786, "grad_norm": 0.47265625, "learning_rate": 4.7063012890011165e-06, "loss": 2.4915, "step": 16885 }, { "epoch": 0.9059012875536481, "grad_norm": 0.7265625, "learning_rate": 4.7062604319262766e-06, "loss": 2.2061, "step": 16886 }, { "epoch": 0.9059549356223175, "grad_norm": 0.59375, "learning_rate": 4.706219572187149e-06, "loss": 2.4193, "step": 16887 }, { "epoch": 0.9060085836909871, "grad_norm": 0.46484375, "learning_rate": 4.7061787097837804e-06, "loss": 2.2849, "step": 16888 }, { "epoch": 0.9060622317596566, "grad_norm": 0.435546875, "learning_rate": 4.706137844716223e-06, "loss": 2.3148, "step": 16889 }, { "epoch": 0.9061158798283262, "grad_norm": 0.482421875, "learning_rate": 4.706096976984526e-06, "loss": 2.5239, "step": 16890 }, { "epoch": 0.9061695278969957, "grad_norm": 0.466796875, "learning_rate": 4.7060561065887365e-06, "loss": 2.3804, "step": 16891 }, { "epoch": 0.9062231759656653, "grad_norm": 0.55859375, "learning_rate": 4.706015233528907e-06, "loss": 2.4246, "step": 16892 }, { "epoch": 0.9062768240343347, "grad_norm": 0.47265625, "learning_rate": 4.705974357805084e-06, "loss": 2.2744, "step": 16893 }, { "epoch": 0.9063304721030043, "grad_norm": 0.51953125, "learning_rate": 4.705933479417319e-06, "loss": 2.3492, "step": 16894 }, { "epoch": 0.9063841201716738, "grad_norm": 0.94140625, "learning_rate": 4.70589259836566e-06, "loss": 1.6949, "step": 16895 }, { "epoch": 0.9064377682403434, "grad_norm": 0.96875, "learning_rate": 4.705851714650157e-06, "loss": 2.3471, "step": 16896 }, { "epoch": 0.9064914163090129, "grad_norm": 0.56640625, "learning_rate": 4.705810828270859e-06, "loss": 2.192, "step": 16897 }, { "epoch": 0.9065450643776825, "grad_norm": 0.470703125, "learning_rate": 4.705769939227816e-06, "loss": 2.2715, "step": 16898 }, { "epoch": 0.9065987124463519, "grad_norm": 0.458984375, "learning_rate": 4.705729047521077e-06, "loss": 2.2306, "step": 16899 }, { "epoch": 0.9066523605150214, "grad_norm": 0.453125, "learning_rate": 4.705688153150691e-06, "loss": 2.0636, "step": 16900 }, { "epoch": 0.906706008583691, "grad_norm": 0.455078125, "learning_rate": 4.705647256116707e-06, "loss": 2.2029, "step": 16901 }, { "epoch": 0.9067596566523605, "grad_norm": 0.59765625, "learning_rate": 4.7056063564191755e-06, "loss": 2.4436, "step": 16902 }, { "epoch": 0.9068133047210301, "grad_norm": 0.474609375, "learning_rate": 4.705565454058146e-06, "loss": 2.6332, "step": 16903 }, { "epoch": 0.9068669527896995, "grad_norm": 0.63671875, "learning_rate": 4.705524549033668e-06, "loss": 2.6436, "step": 16904 }, { "epoch": 0.9069206008583691, "grad_norm": 0.494140625, "learning_rate": 4.70548364134579e-06, "loss": 2.3273, "step": 16905 }, { "epoch": 0.9069742489270386, "grad_norm": 0.44921875, "learning_rate": 4.70544273099456e-06, "loss": 2.2994, "step": 16906 }, { "epoch": 0.9070278969957082, "grad_norm": 0.408203125, "learning_rate": 4.705401817980031e-06, "loss": 2.2351, "step": 16907 }, { "epoch": 0.9070815450643777, "grad_norm": 0.55859375, "learning_rate": 4.70536090230225e-06, "loss": 2.387, "step": 16908 }, { "epoch": 0.9071351931330472, "grad_norm": 0.48828125, "learning_rate": 4.705319983961266e-06, "loss": 2.3738, "step": 16909 }, { "epoch": 0.9071888412017167, "grad_norm": 0.431640625, "learning_rate": 4.70527906295713e-06, "loss": 2.1139, "step": 16910 }, { "epoch": 0.9072424892703863, "grad_norm": 0.65234375, "learning_rate": 4.70523813928989e-06, "loss": 2.3741, "step": 16911 }, { "epoch": 0.9072961373390558, "grad_norm": 0.53125, "learning_rate": 4.705197212959598e-06, "loss": 2.4257, "step": 16912 }, { "epoch": 0.9073497854077254, "grad_norm": 0.51953125, "learning_rate": 4.7051562839663005e-06, "loss": 2.2235, "step": 16913 }, { "epoch": 0.9074034334763948, "grad_norm": 0.52734375, "learning_rate": 4.705115352310048e-06, "loss": 2.5829, "step": 16914 }, { "epoch": 0.9074570815450643, "grad_norm": 0.5234375, "learning_rate": 4.7050744179908895e-06, "loss": 2.3515, "step": 16915 }, { "epoch": 0.9075107296137339, "grad_norm": 0.94921875, "learning_rate": 4.705033481008876e-06, "loss": 2.3749, "step": 16916 }, { "epoch": 0.9075643776824034, "grad_norm": 0.5859375, "learning_rate": 4.7049925413640545e-06, "loss": 2.3726, "step": 16917 }, { "epoch": 0.907618025751073, "grad_norm": 0.44921875, "learning_rate": 4.704951599056476e-06, "loss": 2.1851, "step": 16918 }, { "epoch": 0.9076716738197425, "grad_norm": 0.46484375, "learning_rate": 4.70491065408619e-06, "loss": 2.4512, "step": 16919 }, { "epoch": 0.907725321888412, "grad_norm": 0.50390625, "learning_rate": 4.704869706453246e-06, "loss": 2.2423, "step": 16920 }, { "epoch": 0.9077789699570815, "grad_norm": 0.435546875, "learning_rate": 4.704828756157693e-06, "loss": 2.5735, "step": 16921 }, { "epoch": 0.9078326180257511, "grad_norm": 0.458984375, "learning_rate": 4.704787803199581e-06, "loss": 2.2705, "step": 16922 }, { "epoch": 0.9078862660944206, "grad_norm": 0.443359375, "learning_rate": 4.7047468475789574e-06, "loss": 2.4136, "step": 16923 }, { "epoch": 0.9079399141630902, "grad_norm": 0.439453125, "learning_rate": 4.704705889295874e-06, "loss": 2.2655, "step": 16924 }, { "epoch": 0.9079935622317596, "grad_norm": 0.47265625, "learning_rate": 4.70466492835038e-06, "loss": 2.155, "step": 16925 }, { "epoch": 0.9080472103004292, "grad_norm": 0.55078125, "learning_rate": 4.704623964742524e-06, "loss": 2.3338, "step": 16926 }, { "epoch": 0.9081008583690987, "grad_norm": 0.455078125, "learning_rate": 4.704582998472356e-06, "loss": 2.4539, "step": 16927 }, { "epoch": 0.9081545064377683, "grad_norm": 0.427734375, "learning_rate": 4.7045420295399245e-06, "loss": 2.2604, "step": 16928 }, { "epoch": 0.9082081545064378, "grad_norm": 0.51953125, "learning_rate": 4.704501057945281e-06, "loss": 2.6077, "step": 16929 }, { "epoch": 0.9082618025751072, "grad_norm": 0.484375, "learning_rate": 4.704460083688473e-06, "loss": 2.4635, "step": 16930 }, { "epoch": 0.9083154506437768, "grad_norm": 0.4765625, "learning_rate": 4.704419106769551e-06, "loss": 2.1803, "step": 16931 }, { "epoch": 0.9083690987124463, "grad_norm": 0.4140625, "learning_rate": 4.704378127188563e-06, "loss": 2.4555, "step": 16932 }, { "epoch": 0.9084227467811159, "grad_norm": 0.474609375, "learning_rate": 4.7043371449455624e-06, "loss": 2.2668, "step": 16933 }, { "epoch": 0.9084763948497854, "grad_norm": 0.443359375, "learning_rate": 4.704296160040594e-06, "loss": 2.4278, "step": 16934 }, { "epoch": 0.908530042918455, "grad_norm": 1.953125, "learning_rate": 4.7042551724737096e-06, "loss": 2.1079, "step": 16935 }, { "epoch": 0.9085836909871244, "grad_norm": 0.5234375, "learning_rate": 4.704214182244958e-06, "loss": 2.4723, "step": 16936 }, { "epoch": 0.908637339055794, "grad_norm": 0.484375, "learning_rate": 4.70417318935439e-06, "loss": 2.4098, "step": 16937 }, { "epoch": 0.9086909871244635, "grad_norm": 0.40625, "learning_rate": 4.704132193802054e-06, "loss": 2.3017, "step": 16938 }, { "epoch": 0.9087446351931331, "grad_norm": 0.55859375, "learning_rate": 4.704091195588e-06, "loss": 2.1075, "step": 16939 }, { "epoch": 0.9087982832618026, "grad_norm": 0.482421875, "learning_rate": 4.704050194712277e-06, "loss": 2.3091, "step": 16940 }, { "epoch": 0.9088519313304722, "grad_norm": 0.484375, "learning_rate": 4.7040091911749345e-06, "loss": 2.4143, "step": 16941 }, { "epoch": 0.9089055793991416, "grad_norm": 0.470703125, "learning_rate": 4.703968184976022e-06, "loss": 2.4435, "step": 16942 }, { "epoch": 0.9089592274678111, "grad_norm": 0.44921875, "learning_rate": 4.703927176115589e-06, "loss": 1.998, "step": 16943 }, { "epoch": 0.9090128755364807, "grad_norm": 0.484375, "learning_rate": 4.703886164593686e-06, "loss": 2.3498, "step": 16944 }, { "epoch": 0.9090665236051502, "grad_norm": 0.4765625, "learning_rate": 4.703845150410361e-06, "loss": 2.476, "step": 16945 }, { "epoch": 0.9091201716738198, "grad_norm": 0.462890625, "learning_rate": 4.703804133565665e-06, "loss": 2.5432, "step": 16946 }, { "epoch": 0.9091738197424892, "grad_norm": 0.5078125, "learning_rate": 4.703763114059648e-06, "loss": 2.4439, "step": 16947 }, { "epoch": 0.9092274678111588, "grad_norm": 0.5546875, "learning_rate": 4.703722091892356e-06, "loss": 1.1261, "step": 16948 }, { "epoch": 0.9092811158798283, "grad_norm": 0.486328125, "learning_rate": 4.703681067063842e-06, "loss": 2.1705, "step": 16949 }, { "epoch": 0.9093347639484979, "grad_norm": 0.498046875, "learning_rate": 4.703640039574156e-06, "loss": 2.3561, "step": 16950 }, { "epoch": 0.9093884120171674, "grad_norm": 0.56640625, "learning_rate": 4.703599009423344e-06, "loss": 1.2869, "step": 16951 }, { "epoch": 0.909442060085837, "grad_norm": 0.498046875, "learning_rate": 4.703557976611458e-06, "loss": 2.2646, "step": 16952 }, { "epoch": 0.9094957081545064, "grad_norm": 0.578125, "learning_rate": 4.703516941138547e-06, "loss": 2.2995, "step": 16953 }, { "epoch": 0.909549356223176, "grad_norm": 0.451171875, "learning_rate": 4.7034759030046615e-06, "loss": 2.138, "step": 16954 }, { "epoch": 0.9096030042918455, "grad_norm": 0.4453125, "learning_rate": 4.70343486220985e-06, "loss": 2.1084, "step": 16955 }, { "epoch": 0.9096566523605151, "grad_norm": 1.0, "learning_rate": 4.703393818754162e-06, "loss": 2.569, "step": 16956 }, { "epoch": 0.9097103004291845, "grad_norm": 0.47265625, "learning_rate": 4.703352772637647e-06, "loss": 2.3181, "step": 16957 }, { "epoch": 0.909763948497854, "grad_norm": 0.498046875, "learning_rate": 4.703311723860356e-06, "loss": 2.4446, "step": 16958 }, { "epoch": 0.9098175965665236, "grad_norm": 0.484375, "learning_rate": 4.703270672422336e-06, "loss": 2.1471, "step": 16959 }, { "epoch": 0.9098712446351931, "grad_norm": 0.484375, "learning_rate": 4.703229618323639e-06, "loss": 2.405, "step": 16960 }, { "epoch": 0.9099248927038627, "grad_norm": 0.54296875, "learning_rate": 4.703188561564314e-06, "loss": 2.1857, "step": 16961 }, { "epoch": 0.9099785407725322, "grad_norm": 0.46875, "learning_rate": 4.70314750214441e-06, "loss": 2.2655, "step": 16962 }, { "epoch": 0.9100321888412017, "grad_norm": 0.55078125, "learning_rate": 4.703106440063977e-06, "loss": 2.3637, "step": 16963 }, { "epoch": 0.9100858369098712, "grad_norm": 0.427734375, "learning_rate": 4.703065375323064e-06, "loss": 2.1627, "step": 16964 }, { "epoch": 0.9101394849785408, "grad_norm": 0.4453125, "learning_rate": 4.703024307921721e-06, "loss": 2.2918, "step": 16965 }, { "epoch": 0.9101931330472103, "grad_norm": 0.490234375, "learning_rate": 4.702983237859997e-06, "loss": 2.435, "step": 16966 }, { "epoch": 0.9102467811158799, "grad_norm": 0.484375, "learning_rate": 4.702942165137943e-06, "loss": 2.3212, "step": 16967 }, { "epoch": 0.9103004291845493, "grad_norm": 0.69140625, "learning_rate": 4.702901089755608e-06, "loss": 2.3487, "step": 16968 }, { "epoch": 0.9103540772532189, "grad_norm": 0.380859375, "learning_rate": 4.702860011713041e-06, "loss": 2.1914, "step": 16969 }, { "epoch": 0.9104077253218884, "grad_norm": 0.458984375, "learning_rate": 4.7028189310102926e-06, "loss": 2.1317, "step": 16970 }, { "epoch": 0.910461373390558, "grad_norm": 1.140625, "learning_rate": 4.702777847647411e-06, "loss": 2.0916, "step": 16971 }, { "epoch": 0.9105150214592275, "grad_norm": 0.470703125, "learning_rate": 4.702736761624447e-06, "loss": 2.2853, "step": 16972 }, { "epoch": 0.910568669527897, "grad_norm": 0.4296875, "learning_rate": 4.7026956729414494e-06, "loss": 2.4337, "step": 16973 }, { "epoch": 0.9106223175965665, "grad_norm": 0.5078125, "learning_rate": 4.702654581598468e-06, "loss": 2.2304, "step": 16974 }, { "epoch": 0.910675965665236, "grad_norm": 0.48046875, "learning_rate": 4.702613487595554e-06, "loss": 2.3412, "step": 16975 }, { "epoch": 0.9107296137339056, "grad_norm": 0.58984375, "learning_rate": 4.702572390932755e-06, "loss": 2.5127, "step": 16976 }, { "epoch": 0.9107832618025751, "grad_norm": 0.5, "learning_rate": 4.702531291610121e-06, "loss": 2.4245, "step": 16977 }, { "epoch": 0.9108369098712447, "grad_norm": 0.482421875, "learning_rate": 4.7024901896277016e-06, "loss": 2.5677, "step": 16978 }, { "epoch": 0.9108905579399141, "grad_norm": 0.9609375, "learning_rate": 4.702449084985547e-06, "loss": 2.2836, "step": 16979 }, { "epoch": 0.9109442060085837, "grad_norm": 0.5234375, "learning_rate": 4.702407977683708e-06, "loss": 2.4232, "step": 16980 }, { "epoch": 0.9109978540772532, "grad_norm": 0.578125, "learning_rate": 4.702366867722231e-06, "loss": 2.4609, "step": 16981 }, { "epoch": 0.9110515021459228, "grad_norm": 0.392578125, "learning_rate": 4.702325755101169e-06, "loss": 2.3374, "step": 16982 }, { "epoch": 0.9111051502145923, "grad_norm": 0.43359375, "learning_rate": 4.70228463982057e-06, "loss": 2.1105, "step": 16983 }, { "epoch": 0.9111587982832619, "grad_norm": 0.51171875, "learning_rate": 4.7022435218804826e-06, "loss": 2.3209, "step": 16984 }, { "epoch": 0.9112124463519313, "grad_norm": 0.462890625, "learning_rate": 4.7022024012809585e-06, "loss": 2.2401, "step": 16985 }, { "epoch": 0.9112660944206008, "grad_norm": 0.5078125, "learning_rate": 4.702161278022047e-06, "loss": 2.2961, "step": 16986 }, { "epoch": 0.9113197424892704, "grad_norm": 0.42578125, "learning_rate": 4.702120152103796e-06, "loss": 2.0982, "step": 16987 }, { "epoch": 0.9113733905579399, "grad_norm": 0.546875, "learning_rate": 4.702079023526258e-06, "loss": 1.9645, "step": 16988 }, { "epoch": 0.9114270386266095, "grad_norm": 0.4921875, "learning_rate": 4.70203789228948e-06, "loss": 1.7023, "step": 16989 }, { "epoch": 0.9114806866952789, "grad_norm": 0.35546875, "learning_rate": 4.701996758393513e-06, "loss": 2.2666, "step": 16990 }, { "epoch": 0.9115343347639485, "grad_norm": 0.392578125, "learning_rate": 4.701955621838407e-06, "loss": 2.1145, "step": 16991 }, { "epoch": 0.911587982832618, "grad_norm": 0.48046875, "learning_rate": 4.70191448262421e-06, "loss": 2.1629, "step": 16992 }, { "epoch": 0.9116416309012876, "grad_norm": 0.45703125, "learning_rate": 4.7018733407509744e-06, "loss": 2.3473, "step": 16993 }, { "epoch": 0.9116952789699571, "grad_norm": 1.015625, "learning_rate": 4.701832196218747e-06, "loss": 2.6355, "step": 16994 }, { "epoch": 0.9117489270386266, "grad_norm": 0.4375, "learning_rate": 4.70179104902758e-06, "loss": 2.1323, "step": 16995 }, { "epoch": 0.9118025751072961, "grad_norm": 0.6484375, "learning_rate": 4.70174989917752e-06, "loss": 1.4922, "step": 16996 }, { "epoch": 0.9118562231759657, "grad_norm": 0.5, "learning_rate": 4.70170874666862e-06, "loss": 2.3226, "step": 16997 }, { "epoch": 0.9119098712446352, "grad_norm": 0.484375, "learning_rate": 4.701667591500928e-06, "loss": 2.3582, "step": 16998 }, { "epoch": 0.9119635193133048, "grad_norm": 0.423828125, "learning_rate": 4.7016264336744945e-06, "loss": 2.1402, "step": 16999 }, { "epoch": 0.9120171673819742, "grad_norm": 0.44140625, "learning_rate": 4.701585273189367e-06, "loss": 2.2928, "step": 17000 }, { "epoch": 0.9120708154506437, "grad_norm": 0.439453125, "learning_rate": 4.701544110045598e-06, "loss": 2.5351, "step": 17001 }, { "epoch": 0.9121244635193133, "grad_norm": 0.50390625, "learning_rate": 4.701502944243237e-06, "loss": 2.4516, "step": 17002 }, { "epoch": 0.9121781115879828, "grad_norm": 0.50390625, "learning_rate": 4.701461775782331e-06, "loss": 2.2966, "step": 17003 }, { "epoch": 0.9122317596566524, "grad_norm": 0.51171875, "learning_rate": 4.701420604662933e-06, "loss": 2.2023, "step": 17004 }, { "epoch": 0.9122854077253219, "grad_norm": 0.40625, "learning_rate": 4.70137943088509e-06, "loss": 2.2316, "step": 17005 }, { "epoch": 0.9123390557939914, "grad_norm": 0.431640625, "learning_rate": 4.701338254448854e-06, "loss": 2.3547, "step": 17006 }, { "epoch": 0.9123927038626609, "grad_norm": 0.48046875, "learning_rate": 4.701297075354273e-06, "loss": 2.2036, "step": 17007 }, { "epoch": 0.9124463519313305, "grad_norm": 0.4296875, "learning_rate": 4.701255893601398e-06, "loss": 2.1483, "step": 17008 }, { "epoch": 0.9125, "grad_norm": 0.458984375, "learning_rate": 4.701214709190277e-06, "loss": 2.4044, "step": 17009 }, { "epoch": 0.9125536480686696, "grad_norm": 0.3828125, "learning_rate": 4.701173522120962e-06, "loss": 2.0014, "step": 17010 }, { "epoch": 0.912607296137339, "grad_norm": 0.4921875, "learning_rate": 4.701132332393501e-06, "loss": 2.0218, "step": 17011 }, { "epoch": 0.9126609442060086, "grad_norm": 0.470703125, "learning_rate": 4.701091140007945e-06, "loss": 2.3036, "step": 17012 }, { "epoch": 0.9127145922746781, "grad_norm": 0.439453125, "learning_rate": 4.701049944964342e-06, "loss": 2.2931, "step": 17013 }, { "epoch": 0.9127682403433477, "grad_norm": 0.47265625, "learning_rate": 4.701008747262744e-06, "loss": 2.3222, "step": 17014 }, { "epoch": 0.9128218884120172, "grad_norm": 0.46484375, "learning_rate": 4.700967546903199e-06, "loss": 2.1964, "step": 17015 }, { "epoch": 0.9128755364806866, "grad_norm": 0.48046875, "learning_rate": 4.700926343885758e-06, "loss": 2.2047, "step": 17016 }, { "epoch": 0.9129291845493562, "grad_norm": 0.44140625, "learning_rate": 4.700885138210469e-06, "loss": 2.4821, "step": 17017 }, { "epoch": 0.9129828326180257, "grad_norm": 0.453125, "learning_rate": 4.700843929877383e-06, "loss": 2.2376, "step": 17018 }, { "epoch": 0.9130364806866953, "grad_norm": 0.55078125, "learning_rate": 4.70080271888655e-06, "loss": 2.2115, "step": 17019 }, { "epoch": 0.9130901287553648, "grad_norm": 0.68359375, "learning_rate": 4.700761505238019e-06, "loss": 2.1931, "step": 17020 }, { "epoch": 0.9131437768240344, "grad_norm": 0.453125, "learning_rate": 4.700720288931842e-06, "loss": 2.1333, "step": 17021 }, { "epoch": 0.9131974248927038, "grad_norm": 0.96875, "learning_rate": 4.7006790699680646e-06, "loss": 2.4584, "step": 17022 }, { "epoch": 0.9132510729613734, "grad_norm": 0.546875, "learning_rate": 4.700637848346739e-06, "loss": 2.2625, "step": 17023 }, { "epoch": 0.9133047210300429, "grad_norm": 0.5234375, "learning_rate": 4.700596624067917e-06, "loss": 2.166, "step": 17024 }, { "epoch": 0.9133583690987125, "grad_norm": 0.5625, "learning_rate": 4.700555397131644e-06, "loss": 2.346, "step": 17025 }, { "epoch": 0.913412017167382, "grad_norm": 0.4765625, "learning_rate": 4.700514167537973e-06, "loss": 2.2653, "step": 17026 }, { "epoch": 0.9134656652360515, "grad_norm": 0.443359375, "learning_rate": 4.7004729352869535e-06, "loss": 2.3207, "step": 17027 }, { "epoch": 0.913519313304721, "grad_norm": 0.416015625, "learning_rate": 4.700431700378634e-06, "loss": 2.2127, "step": 17028 }, { "epoch": 0.9135729613733906, "grad_norm": 0.49609375, "learning_rate": 4.700390462813065e-06, "loss": 2.3742, "step": 17029 }, { "epoch": 0.9136266094420601, "grad_norm": 0.76953125, "learning_rate": 4.700349222590296e-06, "loss": 2.3778, "step": 17030 }, { "epoch": 0.9136802575107296, "grad_norm": 0.44140625, "learning_rate": 4.700307979710377e-06, "loss": 2.3079, "step": 17031 }, { "epoch": 0.9137339055793992, "grad_norm": 0.5625, "learning_rate": 4.7002667341733575e-06, "loss": 2.321, "step": 17032 }, { "epoch": 0.9137875536480686, "grad_norm": 0.5234375, "learning_rate": 4.700225485979288e-06, "loss": 2.1029, "step": 17033 }, { "epoch": 0.9138412017167382, "grad_norm": 0.427734375, "learning_rate": 4.700184235128219e-06, "loss": 2.1164, "step": 17034 }, { "epoch": 0.9138948497854077, "grad_norm": 0.45703125, "learning_rate": 4.7001429816201984e-06, "loss": 1.9037, "step": 17035 }, { "epoch": 0.9139484978540773, "grad_norm": 0.486328125, "learning_rate": 4.700101725455276e-06, "loss": 1.8428, "step": 17036 }, { "epoch": 0.9140021459227468, "grad_norm": 0.77734375, "learning_rate": 4.700060466633504e-06, "loss": 2.3522, "step": 17037 }, { "epoch": 0.9140557939914163, "grad_norm": 0.494140625, "learning_rate": 4.70001920515493e-06, "loss": 2.3238, "step": 17038 }, { "epoch": 0.9141094420600858, "grad_norm": 0.458984375, "learning_rate": 4.699977941019605e-06, "loss": 2.2212, "step": 17039 }, { "epoch": 0.9141630901287554, "grad_norm": 0.49609375, "learning_rate": 4.699936674227578e-06, "loss": 2.0266, "step": 17040 }, { "epoch": 0.9142167381974249, "grad_norm": 0.498046875, "learning_rate": 4.699895404778899e-06, "loss": 2.1464, "step": 17041 }, { "epoch": 0.9142703862660945, "grad_norm": 0.447265625, "learning_rate": 4.699854132673619e-06, "loss": 2.1538, "step": 17042 }, { "epoch": 0.914324034334764, "grad_norm": 0.455078125, "learning_rate": 4.699812857911786e-06, "loss": 2.4823, "step": 17043 }, { "epoch": 0.9143776824034334, "grad_norm": 0.462890625, "learning_rate": 4.699771580493451e-06, "loss": 2.3933, "step": 17044 }, { "epoch": 0.914431330472103, "grad_norm": 0.490234375, "learning_rate": 4.699730300418663e-06, "loss": 2.5348, "step": 17045 }, { "epoch": 0.9144849785407725, "grad_norm": 0.474609375, "learning_rate": 4.6996890176874735e-06, "loss": 2.2396, "step": 17046 }, { "epoch": 0.9145386266094421, "grad_norm": 0.51171875, "learning_rate": 4.699647732299932e-06, "loss": 2.2085, "step": 17047 }, { "epoch": 0.9145922746781115, "grad_norm": 0.498046875, "learning_rate": 4.699606444256085e-06, "loss": 2.2451, "step": 17048 }, { "epoch": 0.9146459227467811, "grad_norm": 0.4453125, "learning_rate": 4.699565153555987e-06, "loss": 2.1632, "step": 17049 }, { "epoch": 0.9146995708154506, "grad_norm": 0.48828125, "learning_rate": 4.699523860199685e-06, "loss": 2.3482, "step": 17050 }, { "epoch": 0.9147532188841202, "grad_norm": 0.40625, "learning_rate": 4.69948256418723e-06, "loss": 2.3051, "step": 17051 }, { "epoch": 0.9148068669527897, "grad_norm": 0.7734375, "learning_rate": 4.6994412655186725e-06, "loss": 2.1614, "step": 17052 }, { "epoch": 0.9148605150214593, "grad_norm": 0.40625, "learning_rate": 4.699399964194061e-06, "loss": 1.991, "step": 17053 }, { "epoch": 0.9149141630901287, "grad_norm": 0.4453125, "learning_rate": 4.699358660213445e-06, "loss": 2.2612, "step": 17054 }, { "epoch": 0.9149678111587983, "grad_norm": 0.515625, "learning_rate": 4.699317353576876e-06, "loss": 2.1575, "step": 17055 }, { "epoch": 0.9150214592274678, "grad_norm": 0.59765625, "learning_rate": 4.699276044284403e-06, "loss": 2.2772, "step": 17056 }, { "epoch": 0.9150751072961374, "grad_norm": 0.484375, "learning_rate": 4.699234732336076e-06, "loss": 2.3087, "step": 17057 }, { "epoch": 0.9151287553648069, "grad_norm": 0.451171875, "learning_rate": 4.699193417731946e-06, "loss": 2.2234, "step": 17058 }, { "epoch": 0.9151824034334763, "grad_norm": 0.498046875, "learning_rate": 4.69915210047206e-06, "loss": 1.6029, "step": 17059 }, { "epoch": 0.9152360515021459, "grad_norm": 0.44140625, "learning_rate": 4.69911078055647e-06, "loss": 2.3207, "step": 17060 }, { "epoch": 0.9152896995708154, "grad_norm": 0.458984375, "learning_rate": 4.699069457985226e-06, "loss": 2.4322, "step": 17061 }, { "epoch": 0.915343347639485, "grad_norm": 0.5546875, "learning_rate": 4.699028132758378e-06, "loss": 2.2606, "step": 17062 }, { "epoch": 0.9153969957081545, "grad_norm": 0.61328125, "learning_rate": 4.698986804875974e-06, "loss": 2.2615, "step": 17063 }, { "epoch": 0.9154506437768241, "grad_norm": 0.443359375, "learning_rate": 4.6989454743380665e-06, "loss": 2.1232, "step": 17064 }, { "epoch": 0.9155042918454935, "grad_norm": 0.515625, "learning_rate": 4.698904141144703e-06, "loss": 2.1888, "step": 17065 }, { "epoch": 0.9155579399141631, "grad_norm": 0.6796875, "learning_rate": 4.698862805295936e-06, "loss": 2.1976, "step": 17066 }, { "epoch": 0.9156115879828326, "grad_norm": 0.51953125, "learning_rate": 4.698821466791813e-06, "loss": 2.3735, "step": 17067 }, { "epoch": 0.9156652360515022, "grad_norm": 0.5, "learning_rate": 4.698780125632386e-06, "loss": 2.5135, "step": 17068 }, { "epoch": 0.9157188841201717, "grad_norm": 0.59765625, "learning_rate": 4.698738781817703e-06, "loss": 2.1416, "step": 17069 }, { "epoch": 0.9157725321888412, "grad_norm": 0.447265625, "learning_rate": 4.698697435347815e-06, "loss": 2.3041, "step": 17070 }, { "epoch": 0.9158261802575107, "grad_norm": 0.494140625, "learning_rate": 4.698656086222771e-06, "loss": 2.3315, "step": 17071 }, { "epoch": 0.9158798283261803, "grad_norm": 0.4375, "learning_rate": 4.6986147344426235e-06, "loss": 1.4249, "step": 17072 }, { "epoch": 0.9159334763948498, "grad_norm": 0.490234375, "learning_rate": 4.69857338000742e-06, "loss": 2.1788, "step": 17073 }, { "epoch": 0.9159871244635193, "grad_norm": 0.416015625, "learning_rate": 4.698532022917211e-06, "loss": 2.3844, "step": 17074 }, { "epoch": 0.9160407725321889, "grad_norm": 0.5546875, "learning_rate": 4.698490663172045e-06, "loss": 2.294, "step": 17075 }, { "epoch": 0.9160944206008583, "grad_norm": 1.0390625, "learning_rate": 4.698449300771974e-06, "loss": 2.2782, "step": 17076 }, { "epoch": 0.9161480686695279, "grad_norm": 0.439453125, "learning_rate": 4.698407935717048e-06, "loss": 2.2478, "step": 17077 }, { "epoch": 0.9162017167381974, "grad_norm": 0.51171875, "learning_rate": 4.698366568007317e-06, "loss": 2.2997, "step": 17078 }, { "epoch": 0.916255364806867, "grad_norm": 0.55078125, "learning_rate": 4.6983251976428305e-06, "loss": 2.2601, "step": 17079 }, { "epoch": 0.9163090128755365, "grad_norm": 0.404296875, "learning_rate": 4.698283824623637e-06, "loss": 2.2351, "step": 17080 }, { "epoch": 0.916362660944206, "grad_norm": 0.765625, "learning_rate": 4.698242448949788e-06, "loss": 2.2184, "step": 17081 }, { "epoch": 0.9164163090128755, "grad_norm": 0.455078125, "learning_rate": 4.6982010706213334e-06, "loss": 2.365, "step": 17082 }, { "epoch": 0.9164699570815451, "grad_norm": 0.55078125, "learning_rate": 4.698159689638322e-06, "loss": 2.5088, "step": 17083 }, { "epoch": 0.9165236051502146, "grad_norm": 0.50390625, "learning_rate": 4.6981183060008065e-06, "loss": 2.4173, "step": 17084 }, { "epoch": 0.9165772532188842, "grad_norm": 0.443359375, "learning_rate": 4.698076919708834e-06, "loss": 2.1697, "step": 17085 }, { "epoch": 0.9166309012875536, "grad_norm": 0.796875, "learning_rate": 4.698035530762455e-06, "loss": 1.7488, "step": 17086 }, { "epoch": 0.9166845493562231, "grad_norm": 0.41796875, "learning_rate": 4.697994139161721e-06, "loss": 2.334, "step": 17087 }, { "epoch": 0.9167381974248927, "grad_norm": 0.48828125, "learning_rate": 4.6979527449066804e-06, "loss": 2.2526, "step": 17088 }, { "epoch": 0.9167918454935622, "grad_norm": 0.51171875, "learning_rate": 4.697911347997385e-06, "loss": 2.2138, "step": 17089 }, { "epoch": 0.9168454935622318, "grad_norm": 0.765625, "learning_rate": 4.697869948433883e-06, "loss": 2.2537, "step": 17090 }, { "epoch": 0.9168991416309012, "grad_norm": 0.4765625, "learning_rate": 4.697828546216224e-06, "loss": 2.0962, "step": 17091 }, { "epoch": 0.9169527896995708, "grad_norm": 0.474609375, "learning_rate": 4.6977871413444595e-06, "loss": 1.5613, "step": 17092 }, { "epoch": 0.9170064377682403, "grad_norm": 0.447265625, "learning_rate": 4.69774573381864e-06, "loss": 2.3176, "step": 17093 }, { "epoch": 0.9170600858369099, "grad_norm": 0.419921875, "learning_rate": 4.697704323638814e-06, "loss": 1.5768, "step": 17094 }, { "epoch": 0.9171137339055794, "grad_norm": 0.400390625, "learning_rate": 4.697662910805032e-06, "loss": 2.0833, "step": 17095 }, { "epoch": 0.917167381974249, "grad_norm": 0.474609375, "learning_rate": 4.697621495317344e-06, "loss": 2.066, "step": 17096 }, { "epoch": 0.9172210300429184, "grad_norm": 0.494140625, "learning_rate": 4.697580077175799e-06, "loss": 2.41, "step": 17097 }, { "epoch": 0.917274678111588, "grad_norm": 0.55859375, "learning_rate": 4.69753865638045e-06, "loss": 2.5491, "step": 17098 }, { "epoch": 0.9173283261802575, "grad_norm": 0.474609375, "learning_rate": 4.697497232931344e-06, "loss": 2.6717, "step": 17099 }, { "epoch": 0.9173819742489271, "grad_norm": 0.5546875, "learning_rate": 4.697455806828532e-06, "loss": 2.4551, "step": 17100 }, { "epoch": 0.9174356223175966, "grad_norm": 0.53125, "learning_rate": 4.697414378072064e-06, "loss": 2.3071, "step": 17101 }, { "epoch": 0.917489270386266, "grad_norm": 0.419921875, "learning_rate": 4.697372946661991e-06, "loss": 2.1574, "step": 17102 }, { "epoch": 0.9175429184549356, "grad_norm": 0.427734375, "learning_rate": 4.697331512598362e-06, "loss": 2.5167, "step": 17103 }, { "epoch": 0.9175965665236051, "grad_norm": 0.4609375, "learning_rate": 4.697290075881226e-06, "loss": 2.0828, "step": 17104 }, { "epoch": 0.9176502145922747, "grad_norm": 0.5859375, "learning_rate": 4.697248636510636e-06, "loss": 2.1312, "step": 17105 }, { "epoch": 0.9177038626609442, "grad_norm": 0.458984375, "learning_rate": 4.697207194486639e-06, "loss": 2.2498, "step": 17106 }, { "epoch": 0.9177575107296138, "grad_norm": 0.48828125, "learning_rate": 4.697165749809287e-06, "loss": 2.3527, "step": 17107 }, { "epoch": 0.9178111587982832, "grad_norm": 0.5703125, "learning_rate": 4.697124302478629e-06, "loss": 2.0723, "step": 17108 }, { "epoch": 0.9178648068669528, "grad_norm": 0.625, "learning_rate": 4.697082852494717e-06, "loss": 2.574, "step": 17109 }, { "epoch": 0.9179184549356223, "grad_norm": 0.40625, "learning_rate": 4.697041399857598e-06, "loss": 1.9969, "step": 17110 }, { "epoch": 0.9179721030042919, "grad_norm": 0.4609375, "learning_rate": 4.696999944567324e-06, "loss": 2.1793, "step": 17111 }, { "epoch": 0.9180257510729614, "grad_norm": 0.49609375, "learning_rate": 4.696958486623944e-06, "loss": 2.4037, "step": 17112 }, { "epoch": 0.918079399141631, "grad_norm": 0.4296875, "learning_rate": 4.696917026027509e-06, "loss": 2.1741, "step": 17113 }, { "epoch": 0.9181330472103004, "grad_norm": 0.4921875, "learning_rate": 4.696875562778069e-06, "loss": 2.2557, "step": 17114 }, { "epoch": 0.91818669527897, "grad_norm": 0.5390625, "learning_rate": 4.696834096875673e-06, "loss": 2.3138, "step": 17115 }, { "epoch": 0.9182403433476395, "grad_norm": 0.50390625, "learning_rate": 4.696792628320374e-06, "loss": 2.3575, "step": 17116 }, { "epoch": 0.918293991416309, "grad_norm": 0.52734375, "learning_rate": 4.6967511571122186e-06, "loss": 2.254, "step": 17117 }, { "epoch": 0.9183476394849786, "grad_norm": 0.58984375, "learning_rate": 4.696709683251258e-06, "loss": 2.3426, "step": 17118 }, { "epoch": 0.918401287553648, "grad_norm": 0.4765625, "learning_rate": 4.696668206737543e-06, "loss": 2.4069, "step": 17119 }, { "epoch": 0.9184549356223176, "grad_norm": 0.4609375, "learning_rate": 4.696626727571123e-06, "loss": 2.327, "step": 17120 }, { "epoch": 0.9185085836909871, "grad_norm": 0.451171875, "learning_rate": 4.696585245752049e-06, "loss": 2.2844, "step": 17121 }, { "epoch": 0.9185622317596567, "grad_norm": 0.515625, "learning_rate": 4.696543761280369e-06, "loss": 2.2719, "step": 17122 }, { "epoch": 0.9186158798283262, "grad_norm": 0.421875, "learning_rate": 4.696502274156136e-06, "loss": 2.3616, "step": 17123 }, { "epoch": 0.9186695278969957, "grad_norm": 0.447265625, "learning_rate": 4.696460784379398e-06, "loss": 2.2528, "step": 17124 }, { "epoch": 0.9187231759656652, "grad_norm": 0.404296875, "learning_rate": 4.696419291950206e-06, "loss": 2.1346, "step": 17125 }, { "epoch": 0.9187768240343348, "grad_norm": 0.45703125, "learning_rate": 4.696377796868609e-06, "loss": 2.2305, "step": 17126 }, { "epoch": 0.9188304721030043, "grad_norm": 0.490234375, "learning_rate": 4.696336299134659e-06, "loss": 2.247, "step": 17127 }, { "epoch": 0.9188841201716739, "grad_norm": 0.55078125, "learning_rate": 4.696294798748404e-06, "loss": 2.3162, "step": 17128 }, { "epoch": 0.9189377682403433, "grad_norm": 0.474609375, "learning_rate": 4.696253295709896e-06, "loss": 2.1514, "step": 17129 }, { "epoch": 0.9189914163090128, "grad_norm": 0.478515625, "learning_rate": 4.696211790019184e-06, "loss": 2.4157, "step": 17130 }, { "epoch": 0.9190450643776824, "grad_norm": 0.443359375, "learning_rate": 4.696170281676319e-06, "loss": 2.1855, "step": 17131 }, { "epoch": 0.9190987124463519, "grad_norm": 0.54296875, "learning_rate": 4.69612877068135e-06, "loss": 1.5206, "step": 17132 }, { "epoch": 0.9191523605150215, "grad_norm": 0.494140625, "learning_rate": 4.696087257034327e-06, "loss": 2.3839, "step": 17133 }, { "epoch": 0.919206008583691, "grad_norm": 0.421875, "learning_rate": 4.6960457407353024e-06, "loss": 2.1383, "step": 17134 }, { "epoch": 0.9192596566523605, "grad_norm": 8.5, "learning_rate": 4.696004221784324e-06, "loss": 2.3062, "step": 17135 }, { "epoch": 0.91931330472103, "grad_norm": 0.482421875, "learning_rate": 4.695962700181442e-06, "loss": 2.1192, "step": 17136 }, { "epoch": 0.9193669527896996, "grad_norm": 0.494140625, "learning_rate": 4.695921175926708e-06, "loss": 2.2909, "step": 17137 }, { "epoch": 0.9194206008583691, "grad_norm": 0.390625, "learning_rate": 4.695879649020171e-06, "loss": 2.1239, "step": 17138 }, { "epoch": 0.9194742489270387, "grad_norm": 0.490234375, "learning_rate": 4.695838119461881e-06, "loss": 2.3372, "step": 17139 }, { "epoch": 0.9195278969957081, "grad_norm": 0.44921875, "learning_rate": 4.695796587251889e-06, "loss": 2.2803, "step": 17140 }, { "epoch": 0.9195815450643777, "grad_norm": 0.8125, "learning_rate": 4.695755052390245e-06, "loss": 1.8878, "step": 17141 }, { "epoch": 0.9196351931330472, "grad_norm": 0.515625, "learning_rate": 4.695713514877e-06, "loss": 2.8479, "step": 17142 }, { "epoch": 0.9196888412017168, "grad_norm": 0.376953125, "learning_rate": 4.695671974712201e-06, "loss": 1.9298, "step": 17143 }, { "epoch": 0.9197424892703863, "grad_norm": 0.482421875, "learning_rate": 4.695630431895902e-06, "loss": 2.3901, "step": 17144 }, { "epoch": 0.9197961373390557, "grad_norm": 0.5234375, "learning_rate": 4.695588886428151e-06, "loss": 2.3437, "step": 17145 }, { "epoch": 0.9198497854077253, "grad_norm": 0.46875, "learning_rate": 4.695547338308999e-06, "loss": 1.9046, "step": 17146 }, { "epoch": 0.9199034334763948, "grad_norm": 1.109375, "learning_rate": 4.6955057875384945e-06, "loss": 2.2473, "step": 17147 }, { "epoch": 0.9199570815450644, "grad_norm": 0.490234375, "learning_rate": 4.69546423411669e-06, "loss": 2.2614, "step": 17148 }, { "epoch": 0.9200107296137339, "grad_norm": 0.5078125, "learning_rate": 4.695422678043634e-06, "loss": 2.1566, "step": 17149 }, { "epoch": 0.9200643776824035, "grad_norm": 0.5546875, "learning_rate": 4.695381119319379e-06, "loss": 2.4845, "step": 17150 }, { "epoch": 0.9201180257510729, "grad_norm": 0.482421875, "learning_rate": 4.695339557943972e-06, "loss": 2.1872, "step": 17151 }, { "epoch": 0.9201716738197425, "grad_norm": 0.42578125, "learning_rate": 4.695297993917465e-06, "loss": 2.2114, "step": 17152 }, { "epoch": 0.920225321888412, "grad_norm": 0.55859375, "learning_rate": 4.695256427239908e-06, "loss": 2.2492, "step": 17153 }, { "epoch": 0.9202789699570816, "grad_norm": 0.466796875, "learning_rate": 4.695214857911351e-06, "loss": 2.4019, "step": 17154 }, { "epoch": 0.9203326180257511, "grad_norm": 0.45703125, "learning_rate": 4.695173285931845e-06, "loss": 2.5404, "step": 17155 }, { "epoch": 0.9203862660944206, "grad_norm": 0.6328125, "learning_rate": 4.695131711301438e-06, "loss": 2.3475, "step": 17156 }, { "epoch": 0.9204399141630901, "grad_norm": 0.39453125, "learning_rate": 4.695090134020182e-06, "loss": 2.3497, "step": 17157 }, { "epoch": 0.9204935622317597, "grad_norm": 0.47265625, "learning_rate": 4.695048554088128e-06, "loss": 2.1927, "step": 17158 }, { "epoch": 0.9205472103004292, "grad_norm": 0.400390625, "learning_rate": 4.6950069715053245e-06, "loss": 1.6789, "step": 17159 }, { "epoch": 0.9206008583690987, "grad_norm": 0.435546875, "learning_rate": 4.694965386271823e-06, "loss": 2.1864, "step": 17160 }, { "epoch": 0.9206545064377682, "grad_norm": 0.431640625, "learning_rate": 4.6949237983876724e-06, "loss": 2.1859, "step": 17161 }, { "epoch": 0.9207081545064377, "grad_norm": 0.458984375, "learning_rate": 4.694882207852924e-06, "loss": 2.4276, "step": 17162 }, { "epoch": 0.9207618025751073, "grad_norm": 0.466796875, "learning_rate": 4.694840614667628e-06, "loss": 2.4465, "step": 17163 }, { "epoch": 0.9208154506437768, "grad_norm": 0.65234375, "learning_rate": 4.694799018831834e-06, "loss": 2.3165, "step": 17164 }, { "epoch": 0.9208690987124464, "grad_norm": 0.51953125, "learning_rate": 4.694757420345592e-06, "loss": 2.3468, "step": 17165 }, { "epoch": 0.9209227467811159, "grad_norm": 0.4921875, "learning_rate": 4.694715819208953e-06, "loss": 2.2492, "step": 17166 }, { "epoch": 0.9209763948497854, "grad_norm": 0.5390625, "learning_rate": 4.694674215421967e-06, "loss": 2.1305, "step": 17167 }, { "epoch": 0.9210300429184549, "grad_norm": 2.34375, "learning_rate": 4.694632608984684e-06, "loss": 2.2966, "step": 17168 }, { "epoch": 0.9210836909871245, "grad_norm": 0.462890625, "learning_rate": 4.6945909998971536e-06, "loss": 2.5417, "step": 17169 }, { "epoch": 0.921137339055794, "grad_norm": 0.494140625, "learning_rate": 4.694549388159428e-06, "loss": 2.511, "step": 17170 }, { "epoch": 0.9211909871244636, "grad_norm": 0.5234375, "learning_rate": 4.694507773771556e-06, "loss": 2.0688, "step": 17171 }, { "epoch": 0.921244635193133, "grad_norm": 0.439453125, "learning_rate": 4.694466156733588e-06, "loss": 2.0239, "step": 17172 }, { "epoch": 0.9212982832618025, "grad_norm": 0.390625, "learning_rate": 4.694424537045575e-06, "loss": 2.361, "step": 17173 }, { "epoch": 0.9213519313304721, "grad_norm": 0.4296875, "learning_rate": 4.694382914707567e-06, "loss": 2.5299, "step": 17174 }, { "epoch": 0.9214055793991416, "grad_norm": 0.57421875, "learning_rate": 4.694341289719613e-06, "loss": 2.2951, "step": 17175 }, { "epoch": 0.9214592274678112, "grad_norm": 0.470703125, "learning_rate": 4.694299662081765e-06, "loss": 2.3427, "step": 17176 }, { "epoch": 0.9215128755364806, "grad_norm": 0.5390625, "learning_rate": 4.694258031794072e-06, "loss": 2.0986, "step": 17177 }, { "epoch": 0.9215665236051502, "grad_norm": 0.49609375, "learning_rate": 4.694216398856584e-06, "loss": 1.9963, "step": 17178 }, { "epoch": 0.9216201716738197, "grad_norm": 0.486328125, "learning_rate": 4.694174763269354e-06, "loss": 2.3049, "step": 17179 }, { "epoch": 0.9216738197424893, "grad_norm": 0.515625, "learning_rate": 4.69413312503243e-06, "loss": 2.2498, "step": 17180 }, { "epoch": 0.9217274678111588, "grad_norm": 0.5390625, "learning_rate": 4.694091484145861e-06, "loss": 2.1627, "step": 17181 }, { "epoch": 0.9217811158798284, "grad_norm": 0.5078125, "learning_rate": 4.6940498406097e-06, "loss": 2.2517, "step": 17182 }, { "epoch": 0.9218347639484978, "grad_norm": 0.451171875, "learning_rate": 4.694008194423996e-06, "loss": 2.2557, "step": 17183 }, { "epoch": 0.9218884120171674, "grad_norm": 0.47265625, "learning_rate": 4.693966545588799e-06, "loss": 1.6167, "step": 17184 }, { "epoch": 0.9219420600858369, "grad_norm": 0.4609375, "learning_rate": 4.693924894104161e-06, "loss": 2.0925, "step": 17185 }, { "epoch": 0.9219957081545065, "grad_norm": 0.42578125, "learning_rate": 4.6938832399701305e-06, "loss": 2.0887, "step": 17186 }, { "epoch": 0.922049356223176, "grad_norm": 0.5546875, "learning_rate": 4.693841583186758e-06, "loss": 2.3151, "step": 17187 }, { "epoch": 0.9221030042918454, "grad_norm": 0.625, "learning_rate": 4.6937999237540945e-06, "loss": 2.2665, "step": 17188 }, { "epoch": 0.922156652360515, "grad_norm": 0.4296875, "learning_rate": 4.69375826167219e-06, "loss": 2.4222, "step": 17189 }, { "epoch": 0.9222103004291845, "grad_norm": 0.55859375, "learning_rate": 4.693716596941094e-06, "loss": 2.0608, "step": 17190 }, { "epoch": 0.9222639484978541, "grad_norm": 0.46484375, "learning_rate": 4.693674929560858e-06, "loss": 2.0428, "step": 17191 }, { "epoch": 0.9223175965665236, "grad_norm": 0.435546875, "learning_rate": 4.693633259531533e-06, "loss": 1.7248, "step": 17192 }, { "epoch": 0.9223712446351932, "grad_norm": 0.435546875, "learning_rate": 4.693591586853168e-06, "loss": 2.1989, "step": 17193 }, { "epoch": 0.9224248927038626, "grad_norm": 0.458984375, "learning_rate": 4.693549911525813e-06, "loss": 2.3986, "step": 17194 }, { "epoch": 0.9224785407725322, "grad_norm": 0.59765625, "learning_rate": 4.693508233549518e-06, "loss": 2.1128, "step": 17195 }, { "epoch": 0.9225321888412017, "grad_norm": 0.427734375, "learning_rate": 4.693466552924336e-06, "loss": 2.3889, "step": 17196 }, { "epoch": 0.9225858369098713, "grad_norm": 0.470703125, "learning_rate": 4.693424869650315e-06, "loss": 2.1986, "step": 17197 }, { "epoch": 0.9226394849785408, "grad_norm": 0.482421875, "learning_rate": 4.693383183727506e-06, "loss": 2.2569, "step": 17198 }, { "epoch": 0.9226931330472103, "grad_norm": 0.48828125, "learning_rate": 4.693341495155959e-06, "loss": 2.3744, "step": 17199 }, { "epoch": 0.9227467811158798, "grad_norm": 0.431640625, "learning_rate": 4.693299803935724e-06, "loss": 2.6345, "step": 17200 }, { "epoch": 0.9228004291845494, "grad_norm": 0.5, "learning_rate": 4.693258110066853e-06, "loss": 2.3768, "step": 17201 }, { "epoch": 0.9228540772532189, "grad_norm": 0.6796875, "learning_rate": 4.693216413549394e-06, "loss": 1.7885, "step": 17202 }, { "epoch": 0.9229077253218884, "grad_norm": 0.5390625, "learning_rate": 4.693174714383399e-06, "loss": 2.4486, "step": 17203 }, { "epoch": 0.922961373390558, "grad_norm": 0.44921875, "learning_rate": 4.693133012568919e-06, "loss": 2.4721, "step": 17204 }, { "epoch": 0.9230150214592274, "grad_norm": 0.443359375, "learning_rate": 4.693091308106002e-06, "loss": 2.2104, "step": 17205 }, { "epoch": 0.923068669527897, "grad_norm": 0.5703125, "learning_rate": 4.6930496009947014e-06, "loss": 2.4692, "step": 17206 }, { "epoch": 0.9231223175965665, "grad_norm": 0.57421875, "learning_rate": 4.693007891235065e-06, "loss": 2.2101, "step": 17207 }, { "epoch": 0.9231759656652361, "grad_norm": 0.421875, "learning_rate": 4.692966178827144e-06, "loss": 2.1896, "step": 17208 }, { "epoch": 0.9232296137339056, "grad_norm": 0.62890625, "learning_rate": 4.69292446377099e-06, "loss": 1.4237, "step": 17209 }, { "epoch": 0.9232832618025751, "grad_norm": 0.482421875, "learning_rate": 4.69288274606665e-06, "loss": 2.1933, "step": 17210 }, { "epoch": 0.9233369098712446, "grad_norm": 0.400390625, "learning_rate": 4.692841025714178e-06, "loss": 2.0797, "step": 17211 }, { "epoch": 0.9233905579399142, "grad_norm": 0.5, "learning_rate": 4.692799302713622e-06, "loss": 2.2832, "step": 17212 }, { "epoch": 0.9234442060085837, "grad_norm": 0.4609375, "learning_rate": 4.692757577065034e-06, "loss": 2.2228, "step": 17213 }, { "epoch": 0.9234978540772533, "grad_norm": 0.54296875, "learning_rate": 4.692715848768464e-06, "loss": 2.3186, "step": 17214 }, { "epoch": 0.9235515021459227, "grad_norm": 0.453125, "learning_rate": 4.6926741178239615e-06, "loss": 2.1801, "step": 17215 }, { "epoch": 0.9236051502145923, "grad_norm": 0.38671875, "learning_rate": 4.692632384231578e-06, "loss": 2.2086, "step": 17216 }, { "epoch": 0.9236587982832618, "grad_norm": 0.54296875, "learning_rate": 4.692590647991363e-06, "loss": 2.5804, "step": 17217 }, { "epoch": 0.9237124463519313, "grad_norm": 0.455078125, "learning_rate": 4.6925489091033675e-06, "loss": 2.1532, "step": 17218 }, { "epoch": 0.9237660944206009, "grad_norm": 0.5859375, "learning_rate": 4.6925071675676415e-06, "loss": 2.2231, "step": 17219 }, { "epoch": 0.9238197424892703, "grad_norm": 0.51171875, "learning_rate": 4.692465423384236e-06, "loss": 2.027, "step": 17220 }, { "epoch": 0.9238733905579399, "grad_norm": 0.484375, "learning_rate": 4.692423676553201e-06, "loss": 2.4149, "step": 17221 }, { "epoch": 0.9239270386266094, "grad_norm": 0.443359375, "learning_rate": 4.6923819270745875e-06, "loss": 2.2133, "step": 17222 }, { "epoch": 0.923980686695279, "grad_norm": 0.482421875, "learning_rate": 4.692340174948444e-06, "loss": 2.3776, "step": 17223 }, { "epoch": 0.9240343347639485, "grad_norm": 0.41015625, "learning_rate": 4.692298420174823e-06, "loss": 2.1064, "step": 17224 }, { "epoch": 0.9240879828326181, "grad_norm": 0.447265625, "learning_rate": 4.692256662753774e-06, "loss": 2.2157, "step": 17225 }, { "epoch": 0.9241416309012875, "grad_norm": 0.451171875, "learning_rate": 4.692214902685348e-06, "loss": 2.1785, "step": 17226 }, { "epoch": 0.9241952789699571, "grad_norm": 0.5390625, "learning_rate": 4.692173139969596e-06, "loss": 2.4523, "step": 17227 }, { "epoch": 0.9242489270386266, "grad_norm": 0.421875, "learning_rate": 4.692131374606565e-06, "loss": 2.4106, "step": 17228 }, { "epoch": 0.9243025751072962, "grad_norm": 0.41015625, "learning_rate": 4.6920896065963105e-06, "loss": 2.3383, "step": 17229 }, { "epoch": 0.9243562231759657, "grad_norm": 0.474609375, "learning_rate": 4.6920478359388785e-06, "loss": 2.5121, "step": 17230 }, { "epoch": 0.9244098712446351, "grad_norm": 0.5546875, "learning_rate": 4.692006062634322e-06, "loss": 1.9979, "step": 17231 }, { "epoch": 0.9244635193133047, "grad_norm": 0.458984375, "learning_rate": 4.691964286682691e-06, "loss": 2.0812, "step": 17232 }, { "epoch": 0.9245171673819742, "grad_norm": 0.515625, "learning_rate": 4.691922508084036e-06, "loss": 2.2748, "step": 17233 }, { "epoch": 0.9245708154506438, "grad_norm": 0.453125, "learning_rate": 4.691880726838407e-06, "loss": 2.2734, "step": 17234 }, { "epoch": 0.9246244635193133, "grad_norm": 0.51953125, "learning_rate": 4.691838942945854e-06, "loss": 2.3656, "step": 17235 }, { "epoch": 0.9246781115879829, "grad_norm": 0.5390625, "learning_rate": 4.691797156406428e-06, "loss": 2.0882, "step": 17236 }, { "epoch": 0.9247317596566523, "grad_norm": 0.5, "learning_rate": 4.69175536722018e-06, "loss": 2.0625, "step": 17237 }, { "epoch": 0.9247854077253219, "grad_norm": 0.353515625, "learning_rate": 4.69171357538716e-06, "loss": 2.0844, "step": 17238 }, { "epoch": 0.9248390557939914, "grad_norm": 0.51171875, "learning_rate": 4.691671780907419e-06, "loss": 2.1943, "step": 17239 }, { "epoch": 0.924892703862661, "grad_norm": 0.36328125, "learning_rate": 4.691629983781006e-06, "loss": 2.118, "step": 17240 }, { "epoch": 0.9249463519313305, "grad_norm": 0.46484375, "learning_rate": 4.691588184007973e-06, "loss": 2.3588, "step": 17241 }, { "epoch": 0.925, "grad_norm": 0.37109375, "learning_rate": 4.69154638158837e-06, "loss": 1.8774, "step": 17242 }, { "epoch": 0.9250536480686695, "grad_norm": 0.5, "learning_rate": 4.691504576522247e-06, "loss": 2.2567, "step": 17243 }, { "epoch": 0.9251072961373391, "grad_norm": 0.48828125, "learning_rate": 4.691462768809655e-06, "loss": 2.3949, "step": 17244 }, { "epoch": 0.9251609442060086, "grad_norm": 0.65625, "learning_rate": 4.691420958450644e-06, "loss": 2.4208, "step": 17245 }, { "epoch": 0.9252145922746781, "grad_norm": 0.42578125, "learning_rate": 4.6913791454452655e-06, "loss": 2.2579, "step": 17246 }, { "epoch": 0.9252682403433476, "grad_norm": 0.4296875, "learning_rate": 4.691337329793568e-06, "loss": 2.2802, "step": 17247 }, { "epoch": 0.9253218884120171, "grad_norm": 0.478515625, "learning_rate": 4.691295511495605e-06, "loss": 2.2964, "step": 17248 }, { "epoch": 0.9253755364806867, "grad_norm": 0.447265625, "learning_rate": 4.691253690551424e-06, "loss": 2.2422, "step": 17249 }, { "epoch": 0.9254291845493562, "grad_norm": 0.890625, "learning_rate": 4.691211866961078e-06, "loss": 2.2486, "step": 17250 }, { "epoch": 0.9254828326180258, "grad_norm": 0.5390625, "learning_rate": 4.691170040724616e-06, "loss": 2.3117, "step": 17251 }, { "epoch": 0.9255364806866953, "grad_norm": 0.56640625, "learning_rate": 4.691128211842089e-06, "loss": 2.3708, "step": 17252 }, { "epoch": 0.9255901287553648, "grad_norm": 0.431640625, "learning_rate": 4.691086380313546e-06, "loss": 2.319, "step": 17253 }, { "epoch": 0.9256437768240343, "grad_norm": 0.640625, "learning_rate": 4.6910445461390405e-06, "loss": 2.451, "step": 17254 }, { "epoch": 0.9256974248927039, "grad_norm": 0.5625, "learning_rate": 4.691002709318621e-06, "loss": 2.215, "step": 17255 }, { "epoch": 0.9257510729613734, "grad_norm": 0.46484375, "learning_rate": 4.690960869852338e-06, "loss": 2.1974, "step": 17256 }, { "epoch": 0.925804721030043, "grad_norm": 0.453125, "learning_rate": 4.690919027740242e-06, "loss": 2.2848, "step": 17257 }, { "epoch": 0.9258583690987124, "grad_norm": 0.58984375, "learning_rate": 4.690877182982385e-06, "loss": 2.3048, "step": 17258 }, { "epoch": 0.925912017167382, "grad_norm": 0.412109375, "learning_rate": 4.690835335578816e-06, "loss": 2.4513, "step": 17259 }, { "epoch": 0.9259656652360515, "grad_norm": 0.50390625, "learning_rate": 4.690793485529586e-06, "loss": 2.3365, "step": 17260 }, { "epoch": 0.926019313304721, "grad_norm": 0.365234375, "learning_rate": 4.690751632834746e-06, "loss": 2.2876, "step": 17261 }, { "epoch": 0.9260729613733906, "grad_norm": 0.494140625, "learning_rate": 4.690709777494345e-06, "loss": 2.2699, "step": 17262 }, { "epoch": 0.92612660944206, "grad_norm": 0.515625, "learning_rate": 4.690667919508436e-06, "loss": 2.5102, "step": 17263 }, { "epoch": 0.9261802575107296, "grad_norm": 0.52734375, "learning_rate": 4.690626058877068e-06, "loss": 2.1372, "step": 17264 }, { "epoch": 0.9262339055793991, "grad_norm": 0.5078125, "learning_rate": 4.690584195600291e-06, "loss": 2.3306, "step": 17265 }, { "epoch": 0.9262875536480687, "grad_norm": 0.5, "learning_rate": 4.690542329678157e-06, "loss": 2.1942, "step": 17266 }, { "epoch": 0.9263412017167382, "grad_norm": 0.53125, "learning_rate": 4.690500461110716e-06, "loss": 2.3363, "step": 17267 }, { "epoch": 0.9263948497854078, "grad_norm": 0.474609375, "learning_rate": 4.690458589898018e-06, "loss": 2.3945, "step": 17268 }, { "epoch": 0.9264484978540772, "grad_norm": 0.416015625, "learning_rate": 4.690416716040114e-06, "loss": 2.345, "step": 17269 }, { "epoch": 0.9265021459227468, "grad_norm": 0.56640625, "learning_rate": 4.690374839537054e-06, "loss": 2.4127, "step": 17270 }, { "epoch": 0.9265557939914163, "grad_norm": 0.458984375, "learning_rate": 4.6903329603888905e-06, "loss": 2.3497, "step": 17271 }, { "epoch": 0.9266094420600859, "grad_norm": 0.486328125, "learning_rate": 4.6902910785956715e-06, "loss": 2.5957, "step": 17272 }, { "epoch": 0.9266630901287554, "grad_norm": 0.435546875, "learning_rate": 4.69024919415745e-06, "loss": 2.2694, "step": 17273 }, { "epoch": 0.9267167381974248, "grad_norm": 0.400390625, "learning_rate": 4.6902073070742746e-06, "loss": 2.3187, "step": 17274 }, { "epoch": 0.9267703862660944, "grad_norm": 0.6875, "learning_rate": 4.6901654173461966e-06, "loss": 2.3751, "step": 17275 }, { "epoch": 0.9268240343347639, "grad_norm": 0.490234375, "learning_rate": 4.690123524973267e-06, "loss": 2.0938, "step": 17276 }, { "epoch": 0.9268776824034335, "grad_norm": 0.859375, "learning_rate": 4.690081629955535e-06, "loss": 1.849, "step": 17277 }, { "epoch": 0.926931330472103, "grad_norm": 0.42578125, "learning_rate": 4.690039732293054e-06, "loss": 1.9534, "step": 17278 }, { "epoch": 0.9269849785407726, "grad_norm": 0.453125, "learning_rate": 4.68999783198587e-06, "loss": 2.3351, "step": 17279 }, { "epoch": 0.927038626609442, "grad_norm": 0.51171875, "learning_rate": 4.689955929034039e-06, "loss": 2.2804, "step": 17280 }, { "epoch": 0.9270922746781116, "grad_norm": 1.21875, "learning_rate": 4.689914023437608e-06, "loss": 2.2372, "step": 17281 }, { "epoch": 0.9271459227467811, "grad_norm": 0.474609375, "learning_rate": 4.689872115196629e-06, "loss": 2.2449, "step": 17282 }, { "epoch": 0.9271995708154507, "grad_norm": 0.42578125, "learning_rate": 4.689830204311152e-06, "loss": 2.2263, "step": 17283 }, { "epoch": 0.9272532188841202, "grad_norm": 0.5078125, "learning_rate": 4.689788290781228e-06, "loss": 2.4385, "step": 17284 }, { "epoch": 0.9273068669527897, "grad_norm": 0.58203125, "learning_rate": 4.689746374606907e-06, "loss": 2.6569, "step": 17285 }, { "epoch": 0.9273605150214592, "grad_norm": 0.43359375, "learning_rate": 4.689704455788241e-06, "loss": 2.1562, "step": 17286 }, { "epoch": 0.9274141630901288, "grad_norm": 0.408203125, "learning_rate": 4.6896625343252784e-06, "loss": 2.3139, "step": 17287 }, { "epoch": 0.9274678111587983, "grad_norm": 0.419921875, "learning_rate": 4.689620610218072e-06, "loss": 2.1527, "step": 17288 }, { "epoch": 0.9275214592274678, "grad_norm": 0.5546875, "learning_rate": 4.689578683466671e-06, "loss": 2.301, "step": 17289 }, { "epoch": 0.9275751072961373, "grad_norm": 0.51171875, "learning_rate": 4.689536754071128e-06, "loss": 2.2408, "step": 17290 }, { "epoch": 0.9276287553648068, "grad_norm": 0.671875, "learning_rate": 4.689494822031491e-06, "loss": 2.1796, "step": 17291 }, { "epoch": 0.9276824034334764, "grad_norm": 0.43359375, "learning_rate": 4.689452887347812e-06, "loss": 2.271, "step": 17292 }, { "epoch": 0.9277360515021459, "grad_norm": 0.486328125, "learning_rate": 4.689410950020141e-06, "loss": 2.5314, "step": 17293 }, { "epoch": 0.9277896995708155, "grad_norm": 1.0859375, "learning_rate": 4.68936901004853e-06, "loss": 2.3972, "step": 17294 }, { "epoch": 0.927843347639485, "grad_norm": 0.439453125, "learning_rate": 4.689327067433028e-06, "loss": 2.1222, "step": 17295 }, { "epoch": 0.9278969957081545, "grad_norm": 0.42578125, "learning_rate": 4.689285122173687e-06, "loss": 2.371, "step": 17296 }, { "epoch": 0.927950643776824, "grad_norm": 0.47265625, "learning_rate": 4.689243174270557e-06, "loss": 2.2188, "step": 17297 }, { "epoch": 0.9280042918454936, "grad_norm": 0.75390625, "learning_rate": 4.689201223723689e-06, "loss": 1.955, "step": 17298 }, { "epoch": 0.9280579399141631, "grad_norm": 0.443359375, "learning_rate": 4.6891592705331325e-06, "loss": 2.3515, "step": 17299 }, { "epoch": 0.9281115879828327, "grad_norm": 0.419921875, "learning_rate": 4.68911731469894e-06, "loss": 2.4492, "step": 17300 }, { "epoch": 0.9281652360515021, "grad_norm": 0.42578125, "learning_rate": 4.689075356221161e-06, "loss": 2.3028, "step": 17301 }, { "epoch": 0.9282188841201717, "grad_norm": 0.4453125, "learning_rate": 4.689033395099845e-06, "loss": 2.1484, "step": 17302 }, { "epoch": 0.9282725321888412, "grad_norm": 0.55078125, "learning_rate": 4.688991431335046e-06, "loss": 2.147, "step": 17303 }, { "epoch": 0.9283261802575107, "grad_norm": 0.515625, "learning_rate": 4.6889494649268116e-06, "loss": 2.2788, "step": 17304 }, { "epoch": 0.9283798283261803, "grad_norm": 0.5234375, "learning_rate": 4.688907495875195e-06, "loss": 2.5973, "step": 17305 }, { "epoch": 0.9284334763948497, "grad_norm": 0.439453125, "learning_rate": 4.6888655241802435e-06, "loss": 2.053, "step": 17306 }, { "epoch": 0.9284871244635193, "grad_norm": 0.51953125, "learning_rate": 4.688823549842011e-06, "loss": 2.4308, "step": 17307 }, { "epoch": 0.9285407725321888, "grad_norm": 0.478515625, "learning_rate": 4.688781572860548e-06, "loss": 2.1419, "step": 17308 }, { "epoch": 0.9285944206008584, "grad_norm": 0.65234375, "learning_rate": 4.6887395932359025e-06, "loss": 2.2245, "step": 17309 }, { "epoch": 0.9286480686695279, "grad_norm": 0.43359375, "learning_rate": 4.6886976109681274e-06, "loss": 2.4606, "step": 17310 }, { "epoch": 0.9287017167381975, "grad_norm": 0.462890625, "learning_rate": 4.688655626057272e-06, "loss": 2.1979, "step": 17311 }, { "epoch": 0.9287553648068669, "grad_norm": 0.43359375, "learning_rate": 4.68861363850339e-06, "loss": 2.3147, "step": 17312 }, { "epoch": 0.9288090128755365, "grad_norm": 0.470703125, "learning_rate": 4.688571648306528e-06, "loss": 2.0954, "step": 17313 }, { "epoch": 0.928862660944206, "grad_norm": 0.435546875, "learning_rate": 4.6885296554667395e-06, "loss": 2.1617, "step": 17314 }, { "epoch": 0.9289163090128756, "grad_norm": 0.48828125, "learning_rate": 4.688487659984075e-06, "loss": 2.0764, "step": 17315 }, { "epoch": 0.9289699570815451, "grad_norm": 0.43359375, "learning_rate": 4.6884456618585825e-06, "loss": 2.3244, "step": 17316 }, { "epoch": 0.9290236051502145, "grad_norm": 0.494140625, "learning_rate": 4.688403661090317e-06, "loss": 2.2082, "step": 17317 }, { "epoch": 0.9290772532188841, "grad_norm": 0.4453125, "learning_rate": 4.6883616576793255e-06, "loss": 1.4786, "step": 17318 }, { "epoch": 0.9291309012875536, "grad_norm": 0.57421875, "learning_rate": 4.688319651625662e-06, "loss": 2.4386, "step": 17319 }, { "epoch": 0.9291845493562232, "grad_norm": 1.0703125, "learning_rate": 4.688277642929374e-06, "loss": 2.1325, "step": 17320 }, { "epoch": 0.9292381974248927, "grad_norm": 0.578125, "learning_rate": 4.6882356315905146e-06, "loss": 2.2286, "step": 17321 }, { "epoch": 0.9292918454935623, "grad_norm": 0.55078125, "learning_rate": 4.688193617609132e-06, "loss": 2.4061, "step": 17322 }, { "epoch": 0.9293454935622317, "grad_norm": 0.5546875, "learning_rate": 4.68815160098528e-06, "loss": 2.3897, "step": 17323 }, { "epoch": 0.9293991416309013, "grad_norm": 0.458984375, "learning_rate": 4.688109581719008e-06, "loss": 1.9039, "step": 17324 }, { "epoch": 0.9294527896995708, "grad_norm": 0.4375, "learning_rate": 4.688067559810366e-06, "loss": 2.4182, "step": 17325 }, { "epoch": 0.9295064377682404, "grad_norm": 0.484375, "learning_rate": 4.688025535259406e-06, "loss": 2.247, "step": 17326 }, { "epoch": 0.9295600858369099, "grad_norm": 0.435546875, "learning_rate": 4.687983508066178e-06, "loss": 2.2912, "step": 17327 }, { "epoch": 0.9296137339055794, "grad_norm": 0.41796875, "learning_rate": 4.687941478230733e-06, "loss": 2.1705, "step": 17328 }, { "epoch": 0.9296673819742489, "grad_norm": 0.57421875, "learning_rate": 4.687899445753122e-06, "loss": 2.4432, "step": 17329 }, { "epoch": 0.9297210300429185, "grad_norm": 0.43359375, "learning_rate": 4.687857410633394e-06, "loss": 2.1777, "step": 17330 }, { "epoch": 0.929774678111588, "grad_norm": 0.4296875, "learning_rate": 4.6878153728716025e-06, "loss": 2.386, "step": 17331 }, { "epoch": 0.9298283261802575, "grad_norm": 0.4609375, "learning_rate": 4.6877733324677965e-06, "loss": 2.3935, "step": 17332 }, { "epoch": 0.929881974248927, "grad_norm": 0.404296875, "learning_rate": 4.687731289422027e-06, "loss": 2.2131, "step": 17333 }, { "epoch": 0.9299356223175965, "grad_norm": 0.419921875, "learning_rate": 4.6876892437343455e-06, "loss": 2.2046, "step": 17334 }, { "epoch": 0.9299892703862661, "grad_norm": 0.6015625, "learning_rate": 4.687647195404802e-06, "loss": 2.0071, "step": 17335 }, { "epoch": 0.9300429184549356, "grad_norm": 0.4140625, "learning_rate": 4.687605144433448e-06, "loss": 2.2628, "step": 17336 }, { "epoch": 0.9300965665236052, "grad_norm": 0.484375, "learning_rate": 4.687563090820334e-06, "loss": 2.185, "step": 17337 }, { "epoch": 0.9301502145922746, "grad_norm": 0.37109375, "learning_rate": 4.68752103456551e-06, "loss": 2.0795, "step": 17338 }, { "epoch": 0.9302038626609442, "grad_norm": 0.482421875, "learning_rate": 4.687478975669027e-06, "loss": 2.3482, "step": 17339 }, { "epoch": 0.9302575107296137, "grad_norm": 0.482421875, "learning_rate": 4.687436914130937e-06, "loss": 2.4763, "step": 17340 }, { "epoch": 0.9303111587982833, "grad_norm": 0.5, "learning_rate": 4.68739484995129e-06, "loss": 2.3021, "step": 17341 }, { "epoch": 0.9303648068669528, "grad_norm": 0.4765625, "learning_rate": 4.687352783130136e-06, "loss": 2.2565, "step": 17342 }, { "epoch": 0.9304184549356224, "grad_norm": 0.41796875, "learning_rate": 4.687310713667527e-06, "loss": 2.193, "step": 17343 }, { "epoch": 0.9304721030042918, "grad_norm": 0.455078125, "learning_rate": 4.687268641563514e-06, "loss": 2.1179, "step": 17344 }, { "epoch": 0.9305257510729614, "grad_norm": 0.45703125, "learning_rate": 4.687226566818146e-06, "loss": 2.4068, "step": 17345 }, { "epoch": 0.9305793991416309, "grad_norm": 0.9296875, "learning_rate": 4.687184489431476e-06, "loss": 2.4231, "step": 17346 }, { "epoch": 0.9306330472103004, "grad_norm": 0.57421875, "learning_rate": 4.687142409403553e-06, "loss": 2.4376, "step": 17347 }, { "epoch": 0.93068669527897, "grad_norm": 0.431640625, "learning_rate": 4.687100326734429e-06, "loss": 2.22, "step": 17348 }, { "epoch": 0.9307403433476394, "grad_norm": 0.76171875, "learning_rate": 4.687058241424155e-06, "loss": 2.2564, "step": 17349 }, { "epoch": 0.930793991416309, "grad_norm": 0.45703125, "learning_rate": 4.6870161534727806e-06, "loss": 2.1507, "step": 17350 }, { "epoch": 0.9308476394849785, "grad_norm": 0.451171875, "learning_rate": 4.686974062880358e-06, "loss": 2.2334, "step": 17351 }, { "epoch": 0.9309012875536481, "grad_norm": 0.5078125, "learning_rate": 4.686931969646937e-06, "loss": 2.343, "step": 17352 }, { "epoch": 0.9309549356223176, "grad_norm": 0.5078125, "learning_rate": 4.6868898737725686e-06, "loss": 2.2686, "step": 17353 }, { "epoch": 0.9310085836909872, "grad_norm": 0.48828125, "learning_rate": 4.686847775257303e-06, "loss": 2.53, "step": 17354 }, { "epoch": 0.9310622317596566, "grad_norm": 1.71875, "learning_rate": 4.686805674101193e-06, "loss": 2.4241, "step": 17355 }, { "epoch": 0.9311158798283262, "grad_norm": 0.4453125, "learning_rate": 4.686763570304288e-06, "loss": 2.3035, "step": 17356 }, { "epoch": 0.9311695278969957, "grad_norm": 0.462890625, "learning_rate": 4.686721463866639e-06, "loss": 2.3161, "step": 17357 }, { "epoch": 0.9312231759656653, "grad_norm": 0.416015625, "learning_rate": 4.686679354788297e-06, "loss": 2.2278, "step": 17358 }, { "epoch": 0.9312768240343348, "grad_norm": 0.421875, "learning_rate": 4.6866372430693125e-06, "loss": 2.3322, "step": 17359 }, { "epoch": 0.9313304721030042, "grad_norm": 0.46484375, "learning_rate": 4.686595128709737e-06, "loss": 2.4139, "step": 17360 }, { "epoch": 0.9313841201716738, "grad_norm": 0.39453125, "learning_rate": 4.686553011709621e-06, "loss": 2.0169, "step": 17361 }, { "epoch": 0.9314377682403433, "grad_norm": 0.427734375, "learning_rate": 4.686510892069015e-06, "loss": 2.1778, "step": 17362 }, { "epoch": 0.9314914163090129, "grad_norm": 0.455078125, "learning_rate": 4.686468769787971e-06, "loss": 2.2346, "step": 17363 }, { "epoch": 0.9315450643776824, "grad_norm": 0.443359375, "learning_rate": 4.686426644866539e-06, "loss": 2.3296, "step": 17364 }, { "epoch": 0.931598712446352, "grad_norm": 0.4140625, "learning_rate": 4.6863845173047694e-06, "loss": 2.3682, "step": 17365 }, { "epoch": 0.9316523605150214, "grad_norm": 0.44921875, "learning_rate": 4.686342387102714e-06, "loss": 1.9272, "step": 17366 }, { "epoch": 0.931706008583691, "grad_norm": 0.53515625, "learning_rate": 4.686300254260423e-06, "loss": 2.2976, "step": 17367 }, { "epoch": 0.9317596566523605, "grad_norm": 0.478515625, "learning_rate": 4.686258118777948e-06, "loss": 2.3323, "step": 17368 }, { "epoch": 0.9318133047210301, "grad_norm": 0.353515625, "learning_rate": 4.68621598065534e-06, "loss": 2.1322, "step": 17369 }, { "epoch": 0.9318669527896996, "grad_norm": 0.45703125, "learning_rate": 4.686173839892648e-06, "loss": 2.2306, "step": 17370 }, { "epoch": 0.9319206008583691, "grad_norm": 0.7734375, "learning_rate": 4.686131696489925e-06, "loss": 2.17, "step": 17371 }, { "epoch": 0.9319742489270386, "grad_norm": 0.77734375, "learning_rate": 4.686089550447222e-06, "loss": 2.028, "step": 17372 }, { "epoch": 0.9320278969957082, "grad_norm": 0.427734375, "learning_rate": 4.686047401764589e-06, "loss": 2.159, "step": 17373 }, { "epoch": 0.9320815450643777, "grad_norm": 0.466796875, "learning_rate": 4.686005250442076e-06, "loss": 2.2047, "step": 17374 }, { "epoch": 0.9321351931330472, "grad_norm": 0.80078125, "learning_rate": 4.6859630964797345e-06, "loss": 2.2762, "step": 17375 }, { "epoch": 0.9321888412017167, "grad_norm": 0.51171875, "learning_rate": 4.685920939877617e-06, "loss": 2.2997, "step": 17376 }, { "epoch": 0.9322424892703862, "grad_norm": 0.46484375, "learning_rate": 4.685878780635772e-06, "loss": 1.9534, "step": 17377 }, { "epoch": 0.9322961373390558, "grad_norm": 0.44140625, "learning_rate": 4.685836618754253e-06, "loss": 2.3866, "step": 17378 }, { "epoch": 0.9323497854077253, "grad_norm": 0.515625, "learning_rate": 4.685794454233108e-06, "loss": 2.1553, "step": 17379 }, { "epoch": 0.9324034334763949, "grad_norm": 0.5, "learning_rate": 4.68575228707239e-06, "loss": 2.4934, "step": 17380 }, { "epoch": 0.9324570815450643, "grad_norm": 0.6484375, "learning_rate": 4.68571011727215e-06, "loss": 2.34, "step": 17381 }, { "epoch": 0.9325107296137339, "grad_norm": 0.48046875, "learning_rate": 4.685667944832437e-06, "loss": 1.9479, "step": 17382 }, { "epoch": 0.9325643776824034, "grad_norm": 0.421875, "learning_rate": 4.685625769753305e-06, "loss": 2.1584, "step": 17383 }, { "epoch": 0.932618025751073, "grad_norm": 0.53125, "learning_rate": 4.685583592034802e-06, "loss": 2.12, "step": 17384 }, { "epoch": 0.9326716738197425, "grad_norm": 0.5234375, "learning_rate": 4.685541411676981e-06, "loss": 2.2927, "step": 17385 }, { "epoch": 0.9327253218884121, "grad_norm": 0.52734375, "learning_rate": 4.685499228679891e-06, "loss": 2.4059, "step": 17386 }, { "epoch": 0.9327789699570815, "grad_norm": 0.47265625, "learning_rate": 4.685457043043583e-06, "loss": 2.3335, "step": 17387 }, { "epoch": 0.9328326180257511, "grad_norm": 0.43359375, "learning_rate": 4.68541485476811e-06, "loss": 2.2318, "step": 17388 }, { "epoch": 0.9328862660944206, "grad_norm": 0.37890625, "learning_rate": 4.685372663853523e-06, "loss": 2.2189, "step": 17389 }, { "epoch": 0.9329399141630901, "grad_norm": 0.44921875, "learning_rate": 4.68533047029987e-06, "loss": 2.2322, "step": 17390 }, { "epoch": 0.9329935622317597, "grad_norm": 0.625, "learning_rate": 4.685288274107204e-06, "loss": 2.139, "step": 17391 }, { "epoch": 0.9330472103004291, "grad_norm": 0.6796875, "learning_rate": 4.685246075275576e-06, "loss": 2.5342, "step": 17392 }, { "epoch": 0.9331008583690987, "grad_norm": 0.498046875, "learning_rate": 4.685203873805037e-06, "loss": 2.3827, "step": 17393 }, { "epoch": 0.9331545064377682, "grad_norm": 0.546875, "learning_rate": 4.685161669695637e-06, "loss": 2.3344, "step": 17394 }, { "epoch": 0.9332081545064378, "grad_norm": 0.482421875, "learning_rate": 4.685119462947428e-06, "loss": 2.2627, "step": 17395 }, { "epoch": 0.9332618025751073, "grad_norm": 0.48046875, "learning_rate": 4.68507725356046e-06, "loss": 2.4274, "step": 17396 }, { "epoch": 0.9333154506437769, "grad_norm": 0.49609375, "learning_rate": 4.685035041534786e-06, "loss": 2.2985, "step": 17397 }, { "epoch": 0.9333690987124463, "grad_norm": 0.47265625, "learning_rate": 4.6849928268704535e-06, "loss": 2.1183, "step": 17398 }, { "epoch": 0.9334227467811159, "grad_norm": 0.478515625, "learning_rate": 4.684950609567517e-06, "loss": 2.1028, "step": 17399 }, { "epoch": 0.9334763948497854, "grad_norm": 0.44140625, "learning_rate": 4.684908389626025e-06, "loss": 2.4135, "step": 17400 }, { "epoch": 0.933530042918455, "grad_norm": 0.486328125, "learning_rate": 4.68486616704603e-06, "loss": 2.2013, "step": 17401 }, { "epoch": 0.9335836909871245, "grad_norm": 0.48828125, "learning_rate": 4.684823941827582e-06, "loss": 2.385, "step": 17402 }, { "epoch": 0.933637339055794, "grad_norm": 0.4609375, "learning_rate": 4.684781713970733e-06, "loss": 2.0611, "step": 17403 }, { "epoch": 0.9336909871244635, "grad_norm": 0.50390625, "learning_rate": 4.684739483475533e-06, "loss": 2.6556, "step": 17404 }, { "epoch": 0.933744635193133, "grad_norm": 0.443359375, "learning_rate": 4.684697250342034e-06, "loss": 2.4015, "step": 17405 }, { "epoch": 0.9337982832618026, "grad_norm": 0.3984375, "learning_rate": 4.684655014570285e-06, "loss": 2.375, "step": 17406 }, { "epoch": 0.9338519313304721, "grad_norm": 0.458984375, "learning_rate": 4.68461277616034e-06, "loss": 2.0891, "step": 17407 }, { "epoch": 0.9339055793991416, "grad_norm": 0.396484375, "learning_rate": 4.6845705351122475e-06, "loss": 2.3596, "step": 17408 }, { "epoch": 0.9339592274678111, "grad_norm": 0.404296875, "learning_rate": 4.684528291426059e-06, "loss": 2.3244, "step": 17409 }, { "epoch": 0.9340128755364807, "grad_norm": 0.466796875, "learning_rate": 4.684486045101827e-06, "loss": 2.1955, "step": 17410 }, { "epoch": 0.9340665236051502, "grad_norm": 0.5390625, "learning_rate": 4.684443796139602e-06, "loss": 2.0946, "step": 17411 }, { "epoch": 0.9341201716738198, "grad_norm": 0.455078125, "learning_rate": 4.684401544539433e-06, "loss": 2.0712, "step": 17412 }, { "epoch": 0.9341738197424893, "grad_norm": 0.447265625, "learning_rate": 4.6843592903013725e-06, "loss": 2.4151, "step": 17413 }, { "epoch": 0.9342274678111588, "grad_norm": 0.50390625, "learning_rate": 4.684317033425473e-06, "loss": 2.5932, "step": 17414 }, { "epoch": 0.9342811158798283, "grad_norm": 0.427734375, "learning_rate": 4.684274773911783e-06, "loss": 2.2673, "step": 17415 }, { "epoch": 0.9343347639484979, "grad_norm": 0.7734375, "learning_rate": 4.684232511760355e-06, "loss": 2.3864, "step": 17416 }, { "epoch": 0.9343884120171674, "grad_norm": 0.6015625, "learning_rate": 4.684190246971239e-06, "loss": 2.2081, "step": 17417 }, { "epoch": 0.9344420600858369, "grad_norm": 0.439453125, "learning_rate": 4.6841479795444874e-06, "loss": 2.4297, "step": 17418 }, { "epoch": 0.9344957081545064, "grad_norm": 0.60546875, "learning_rate": 4.68410570948015e-06, "loss": 2.1528, "step": 17419 }, { "epoch": 0.9345493562231759, "grad_norm": 0.470703125, "learning_rate": 4.684063436778278e-06, "loss": 2.2571, "step": 17420 }, { "epoch": 0.9346030042918455, "grad_norm": 0.470703125, "learning_rate": 4.684021161438924e-06, "loss": 2.4106, "step": 17421 }, { "epoch": 0.934656652360515, "grad_norm": 0.46875, "learning_rate": 4.683978883462137e-06, "loss": 2.4423, "step": 17422 }, { "epoch": 0.9347103004291846, "grad_norm": 0.384765625, "learning_rate": 4.6839366028479695e-06, "loss": 1.9858, "step": 17423 }, { "epoch": 0.934763948497854, "grad_norm": 0.484375, "learning_rate": 4.683894319596472e-06, "loss": 2.2193, "step": 17424 }, { "epoch": 0.9348175965665236, "grad_norm": 0.5390625, "learning_rate": 4.6838520337076945e-06, "loss": 2.4561, "step": 17425 }, { "epoch": 0.9348712446351931, "grad_norm": 0.400390625, "learning_rate": 4.68380974518169e-06, "loss": 2.3659, "step": 17426 }, { "epoch": 0.9349248927038627, "grad_norm": 0.498046875, "learning_rate": 4.683767454018508e-06, "loss": 2.2608, "step": 17427 }, { "epoch": 0.9349785407725322, "grad_norm": 0.45703125, "learning_rate": 4.683725160218201e-06, "loss": 2.3394, "step": 17428 }, { "epoch": 0.9350321888412018, "grad_norm": 0.6171875, "learning_rate": 4.683682863780819e-06, "loss": 2.2429, "step": 17429 }, { "epoch": 0.9350858369098712, "grad_norm": 0.451171875, "learning_rate": 4.683640564706413e-06, "loss": 2.2141, "step": 17430 }, { "epoch": 0.9351394849785408, "grad_norm": 0.458984375, "learning_rate": 4.683598262995035e-06, "loss": 2.2491, "step": 17431 }, { "epoch": 0.9351931330472103, "grad_norm": 0.484375, "learning_rate": 4.683555958646735e-06, "loss": 2.2421, "step": 17432 }, { "epoch": 0.9352467811158798, "grad_norm": 0.380859375, "learning_rate": 4.6835136516615656e-06, "loss": 2.0621, "step": 17433 }, { "epoch": 0.9353004291845494, "grad_norm": 0.47265625, "learning_rate": 4.6834713420395765e-06, "loss": 2.2428, "step": 17434 }, { "epoch": 0.9353540772532188, "grad_norm": 0.498046875, "learning_rate": 4.683429029780819e-06, "loss": 2.2843, "step": 17435 }, { "epoch": 0.9354077253218884, "grad_norm": 1.6015625, "learning_rate": 4.683386714885344e-06, "loss": 2.4204, "step": 17436 }, { "epoch": 0.9354613733905579, "grad_norm": 0.4921875, "learning_rate": 4.683344397353204e-06, "loss": 2.2065, "step": 17437 }, { "epoch": 0.9355150214592275, "grad_norm": 0.5390625, "learning_rate": 4.6833020771844486e-06, "loss": 1.5003, "step": 17438 }, { "epoch": 0.935568669527897, "grad_norm": 0.46875, "learning_rate": 4.6832597543791294e-06, "loss": 2.119, "step": 17439 }, { "epoch": 0.9356223175965666, "grad_norm": 0.50390625, "learning_rate": 4.6832174289372975e-06, "loss": 2.6228, "step": 17440 }, { "epoch": 0.935675965665236, "grad_norm": 0.75, "learning_rate": 4.683175100859004e-06, "loss": 2.288, "step": 17441 }, { "epoch": 0.9357296137339056, "grad_norm": 0.45703125, "learning_rate": 4.683132770144301e-06, "loss": 2.1078, "step": 17442 }, { "epoch": 0.9357832618025751, "grad_norm": 0.546875, "learning_rate": 4.683090436793238e-06, "loss": 2.4298, "step": 17443 }, { "epoch": 0.9358369098712447, "grad_norm": 0.5078125, "learning_rate": 4.683048100805866e-06, "loss": 2.5322, "step": 17444 }, { "epoch": 0.9358905579399142, "grad_norm": 0.5546875, "learning_rate": 4.683005762182238e-06, "loss": 2.1926, "step": 17445 }, { "epoch": 0.9359442060085837, "grad_norm": 0.47265625, "learning_rate": 4.682963420922404e-06, "loss": 2.1845, "step": 17446 }, { "epoch": 0.9359978540772532, "grad_norm": 0.453125, "learning_rate": 4.6829210770264145e-06, "loss": 2.1989, "step": 17447 }, { "epoch": 0.9360515021459227, "grad_norm": 0.439453125, "learning_rate": 4.682878730494322e-06, "loss": 2.1515, "step": 17448 }, { "epoch": 0.9361051502145923, "grad_norm": 0.48828125, "learning_rate": 4.682836381326177e-06, "loss": 2.2987, "step": 17449 }, { "epoch": 0.9361587982832618, "grad_norm": 0.455078125, "learning_rate": 4.6827940295220295e-06, "loss": 2.5076, "step": 17450 }, { "epoch": 0.9362124463519313, "grad_norm": 0.392578125, "learning_rate": 4.682751675081933e-06, "loss": 2.4035, "step": 17451 }, { "epoch": 0.9362660944206008, "grad_norm": 0.55859375, "learning_rate": 4.682709318005937e-06, "loss": 2.4449, "step": 17452 }, { "epoch": 0.9363197424892704, "grad_norm": 0.44921875, "learning_rate": 4.682666958294093e-06, "loss": 2.2897, "step": 17453 }, { "epoch": 0.9363733905579399, "grad_norm": 8.75, "learning_rate": 4.682624595946452e-06, "loss": 2.3232, "step": 17454 }, { "epoch": 0.9364270386266095, "grad_norm": 0.5546875, "learning_rate": 4.6825822309630654e-06, "loss": 2.4464, "step": 17455 }, { "epoch": 0.936480686695279, "grad_norm": 0.51171875, "learning_rate": 4.682539863343984e-06, "loss": 2.4058, "step": 17456 }, { "epoch": 0.9365343347639485, "grad_norm": 0.890625, "learning_rate": 4.682497493089261e-06, "loss": 2.5826, "step": 17457 }, { "epoch": 0.936587982832618, "grad_norm": 0.427734375, "learning_rate": 4.682455120198945e-06, "loss": 2.3539, "step": 17458 }, { "epoch": 0.9366416309012876, "grad_norm": 0.396484375, "learning_rate": 4.682412744673087e-06, "loss": 2.5317, "step": 17459 }, { "epoch": 0.9366952789699571, "grad_norm": 0.48828125, "learning_rate": 4.682370366511739e-06, "loss": 2.2068, "step": 17460 }, { "epoch": 0.9367489270386266, "grad_norm": 0.478515625, "learning_rate": 4.682327985714954e-06, "loss": 2.2039, "step": 17461 }, { "epoch": 0.9368025751072961, "grad_norm": 0.498046875, "learning_rate": 4.68228560228278e-06, "loss": 2.2177, "step": 17462 }, { "epoch": 0.9368562231759656, "grad_norm": 0.408203125, "learning_rate": 4.682243216215271e-06, "loss": 2.1356, "step": 17463 }, { "epoch": 0.9369098712446352, "grad_norm": 0.60546875, "learning_rate": 4.6822008275124765e-06, "loss": 2.341, "step": 17464 }, { "epoch": 0.9369635193133047, "grad_norm": 0.435546875, "learning_rate": 4.6821584361744475e-06, "loss": 2.409, "step": 17465 }, { "epoch": 0.9370171673819743, "grad_norm": 0.44140625, "learning_rate": 4.682116042201237e-06, "loss": 2.1412, "step": 17466 }, { "epoch": 0.9370708154506437, "grad_norm": 0.4453125, "learning_rate": 4.682073645592894e-06, "loss": 2.4776, "step": 17467 }, { "epoch": 0.9371244635193133, "grad_norm": 0.67578125, "learning_rate": 4.682031246349471e-06, "loss": 2.3431, "step": 17468 }, { "epoch": 0.9371781115879828, "grad_norm": 0.6875, "learning_rate": 4.681988844471018e-06, "loss": 2.1019, "step": 17469 }, { "epoch": 0.9372317596566524, "grad_norm": 0.4765625, "learning_rate": 4.681946439957589e-06, "loss": 2.3489, "step": 17470 }, { "epoch": 0.9372854077253219, "grad_norm": 0.796875, "learning_rate": 4.681904032809232e-06, "loss": 2.5057, "step": 17471 }, { "epoch": 0.9373390557939915, "grad_norm": 0.484375, "learning_rate": 4.681861623025999e-06, "loss": 2.228, "step": 17472 }, { "epoch": 0.9373927038626609, "grad_norm": 0.5, "learning_rate": 4.681819210607943e-06, "loss": 2.3004, "step": 17473 }, { "epoch": 0.9374463519313305, "grad_norm": 0.40625, "learning_rate": 4.681776795555114e-06, "loss": 2.3885, "step": 17474 }, { "epoch": 0.9375, "grad_norm": 0.51171875, "learning_rate": 4.681734377867562e-06, "loss": 2.3576, "step": 17475 }, { "epoch": 0.9375536480686695, "grad_norm": 0.38671875, "learning_rate": 4.68169195754534e-06, "loss": 2.1367, "step": 17476 }, { "epoch": 0.9376072961373391, "grad_norm": 0.578125, "learning_rate": 4.681649534588498e-06, "loss": 2.4512, "step": 17477 }, { "epoch": 0.9376609442060085, "grad_norm": 0.4375, "learning_rate": 4.681607108997089e-06, "loss": 2.5496, "step": 17478 }, { "epoch": 0.9377145922746781, "grad_norm": 0.478515625, "learning_rate": 4.6815646807711625e-06, "loss": 2.2896, "step": 17479 }, { "epoch": 0.9377682403433476, "grad_norm": 0.49609375, "learning_rate": 4.68152224991077e-06, "loss": 2.5103, "step": 17480 }, { "epoch": 0.9378218884120172, "grad_norm": 0.54296875, "learning_rate": 4.681479816415964e-06, "loss": 2.1478, "step": 17481 }, { "epoch": 0.9378755364806867, "grad_norm": 0.48828125, "learning_rate": 4.681437380286794e-06, "loss": 2.4232, "step": 17482 }, { "epoch": 0.9379291845493563, "grad_norm": 0.494140625, "learning_rate": 4.681394941523312e-06, "loss": 2.2622, "step": 17483 }, { "epoch": 0.9379828326180257, "grad_norm": 0.46484375, "learning_rate": 4.68135250012557e-06, "loss": 2.2876, "step": 17484 }, { "epoch": 0.9380364806866953, "grad_norm": 0.5546875, "learning_rate": 4.681310056093618e-06, "loss": 2.193, "step": 17485 }, { "epoch": 0.9380901287553648, "grad_norm": 0.4609375, "learning_rate": 4.681267609427508e-06, "loss": 2.3922, "step": 17486 }, { "epoch": 0.9381437768240344, "grad_norm": 0.466796875, "learning_rate": 4.68122516012729e-06, "loss": 2.1763, "step": 17487 }, { "epoch": 0.9381974248927039, "grad_norm": 0.6953125, "learning_rate": 4.681182708193018e-06, "loss": 2.2889, "step": 17488 }, { "epoch": 0.9382510729613734, "grad_norm": 0.5625, "learning_rate": 4.681140253624741e-06, "loss": 2.2257, "step": 17489 }, { "epoch": 0.9383047210300429, "grad_norm": 0.73046875, "learning_rate": 4.681097796422511e-06, "loss": 2.5271, "step": 17490 }, { "epoch": 0.9383583690987124, "grad_norm": 0.466796875, "learning_rate": 4.681055336586379e-06, "loss": 2.4408, "step": 17491 }, { "epoch": 0.938412017167382, "grad_norm": 0.46875, "learning_rate": 4.6810128741163955e-06, "loss": 2.5416, "step": 17492 }, { "epoch": 0.9384656652360515, "grad_norm": 0.484375, "learning_rate": 4.6809704090126135e-06, "loss": 2.319, "step": 17493 }, { "epoch": 0.938519313304721, "grad_norm": 0.41796875, "learning_rate": 4.680927941275083e-06, "loss": 2.284, "step": 17494 }, { "epoch": 0.9385729613733905, "grad_norm": 0.470703125, "learning_rate": 4.6808854709038565e-06, "loss": 2.3385, "step": 17495 }, { "epoch": 0.9386266094420601, "grad_norm": 0.796875, "learning_rate": 4.680842997898984e-06, "loss": 1.8793, "step": 17496 }, { "epoch": 0.9386802575107296, "grad_norm": 0.421875, "learning_rate": 4.680800522260518e-06, "loss": 2.3117, "step": 17497 }, { "epoch": 0.9387339055793992, "grad_norm": 0.5703125, "learning_rate": 4.680758043988508e-06, "loss": 2.2477, "step": 17498 }, { "epoch": 0.9387875536480687, "grad_norm": 0.41015625, "learning_rate": 4.680715563083007e-06, "loss": 2.079, "step": 17499 }, { "epoch": 0.9388412017167382, "grad_norm": 0.498046875, "learning_rate": 4.6806730795440656e-06, "loss": 2.2317, "step": 17500 }, { "epoch": 0.9388948497854077, "grad_norm": 0.443359375, "learning_rate": 4.680630593371736e-06, "loss": 2.1793, "step": 17501 }, { "epoch": 0.9389484978540773, "grad_norm": 0.53125, "learning_rate": 4.680588104566068e-06, "loss": 2.3825, "step": 17502 }, { "epoch": 0.9390021459227468, "grad_norm": 0.453125, "learning_rate": 4.680545613127113e-06, "loss": 2.3836, "step": 17503 }, { "epoch": 0.9390557939914163, "grad_norm": 0.44921875, "learning_rate": 4.6805031190549245e-06, "loss": 2.2103, "step": 17504 }, { "epoch": 0.9391094420600858, "grad_norm": 3.703125, "learning_rate": 4.680460622349551e-06, "loss": 1.2311, "step": 17505 }, { "epoch": 0.9391630901287553, "grad_norm": 0.45703125, "learning_rate": 4.680418123011046e-06, "loss": 2.5425, "step": 17506 }, { "epoch": 0.9392167381974249, "grad_norm": 0.44921875, "learning_rate": 4.680375621039459e-06, "loss": 2.3418, "step": 17507 }, { "epoch": 0.9392703862660944, "grad_norm": 0.51953125, "learning_rate": 4.680333116434842e-06, "loss": 2.2579, "step": 17508 }, { "epoch": 0.939324034334764, "grad_norm": 0.490234375, "learning_rate": 4.680290609197248e-06, "loss": 2.0995, "step": 17509 }, { "epoch": 0.9393776824034334, "grad_norm": 0.47265625, "learning_rate": 4.680248099326726e-06, "loss": 2.2567, "step": 17510 }, { "epoch": 0.939431330472103, "grad_norm": 0.474609375, "learning_rate": 4.680205586823327e-06, "loss": 2.451, "step": 17511 }, { "epoch": 0.9394849785407725, "grad_norm": 0.4296875, "learning_rate": 4.6801630716871064e-06, "loss": 2.3811, "step": 17512 }, { "epoch": 0.9395386266094421, "grad_norm": 1.859375, "learning_rate": 4.680120553918111e-06, "loss": 1.7076, "step": 17513 }, { "epoch": 0.9395922746781116, "grad_norm": 0.40234375, "learning_rate": 4.680078033516393e-06, "loss": 2.1767, "step": 17514 }, { "epoch": 0.9396459227467812, "grad_norm": 0.490234375, "learning_rate": 4.680035510482006e-06, "loss": 2.4134, "step": 17515 }, { "epoch": 0.9396995708154506, "grad_norm": 0.462890625, "learning_rate": 4.679992984814999e-06, "loss": 2.3526, "step": 17516 }, { "epoch": 0.9397532188841202, "grad_norm": 0.404296875, "learning_rate": 4.679950456515425e-06, "loss": 2.2851, "step": 17517 }, { "epoch": 0.9398068669527897, "grad_norm": 0.3984375, "learning_rate": 4.679907925583335e-06, "loss": 2.1502, "step": 17518 }, { "epoch": 0.9398605150214592, "grad_norm": 0.47265625, "learning_rate": 4.67986539201878e-06, "loss": 2.1606, "step": 17519 }, { "epoch": 0.9399141630901288, "grad_norm": 0.486328125, "learning_rate": 4.679822855821811e-06, "loss": 2.2718, "step": 17520 }, { "epoch": 0.9399678111587982, "grad_norm": 0.94140625, "learning_rate": 4.679780316992479e-06, "loss": 2.2476, "step": 17521 }, { "epoch": 0.9400214592274678, "grad_norm": 0.44921875, "learning_rate": 4.679737775530837e-06, "loss": 2.2964, "step": 17522 }, { "epoch": 0.9400751072961373, "grad_norm": 0.40625, "learning_rate": 4.679695231436936e-06, "loss": 2.1187, "step": 17523 }, { "epoch": 0.9401287553648069, "grad_norm": 0.46484375, "learning_rate": 4.679652684710825e-06, "loss": 2.1178, "step": 17524 }, { "epoch": 0.9401824034334764, "grad_norm": 0.56640625, "learning_rate": 4.679610135352559e-06, "loss": 2.1845, "step": 17525 }, { "epoch": 0.940236051502146, "grad_norm": 0.64453125, "learning_rate": 4.679567583362187e-06, "loss": 2.0508, "step": 17526 }, { "epoch": 0.9402896995708154, "grad_norm": 0.484375, "learning_rate": 4.679525028739761e-06, "loss": 2.2645, "step": 17527 }, { "epoch": 0.940343347639485, "grad_norm": 0.796875, "learning_rate": 4.679482471485332e-06, "loss": 2.0591, "step": 17528 }, { "epoch": 0.9403969957081545, "grad_norm": 0.431640625, "learning_rate": 4.679439911598952e-06, "loss": 2.2579, "step": 17529 }, { "epoch": 0.9404506437768241, "grad_norm": 0.451171875, "learning_rate": 4.679397349080673e-06, "loss": 2.1679, "step": 17530 }, { "epoch": 0.9405042918454936, "grad_norm": 0.4765625, "learning_rate": 4.679354783930545e-06, "loss": 2.4728, "step": 17531 }, { "epoch": 0.9405579399141631, "grad_norm": 0.408203125, "learning_rate": 4.67931221614862e-06, "loss": 2.3862, "step": 17532 }, { "epoch": 0.9406115879828326, "grad_norm": 0.40625, "learning_rate": 4.6792696457349495e-06, "loss": 2.0105, "step": 17533 }, { "epoch": 0.9406652360515021, "grad_norm": 0.5234375, "learning_rate": 4.679227072689585e-06, "loss": 2.3822, "step": 17534 }, { "epoch": 0.9407188841201717, "grad_norm": 0.5, "learning_rate": 4.679184497012578e-06, "loss": 2.254, "step": 17535 }, { "epoch": 0.9407725321888412, "grad_norm": 0.462890625, "learning_rate": 4.679141918703979e-06, "loss": 2.2589, "step": 17536 }, { "epoch": 0.9408261802575107, "grad_norm": 0.55859375, "learning_rate": 4.679099337763839e-06, "loss": 2.3575, "step": 17537 }, { "epoch": 0.9408798283261802, "grad_norm": 0.51953125, "learning_rate": 4.679056754192212e-06, "loss": 2.4794, "step": 17538 }, { "epoch": 0.9409334763948498, "grad_norm": 0.494140625, "learning_rate": 4.679014167989148e-06, "loss": 2.3794, "step": 17539 }, { "epoch": 0.9409871244635193, "grad_norm": 0.404296875, "learning_rate": 4.678971579154698e-06, "loss": 2.2924, "step": 17540 }, { "epoch": 0.9410407725321889, "grad_norm": 0.515625, "learning_rate": 4.6789289876889135e-06, "loss": 2.5395, "step": 17541 }, { "epoch": 0.9410944206008584, "grad_norm": 0.48046875, "learning_rate": 4.678886393591846e-06, "loss": 2.4227, "step": 17542 }, { "epoch": 0.9411480686695279, "grad_norm": 0.435546875, "learning_rate": 4.678843796863548e-06, "loss": 2.459, "step": 17543 }, { "epoch": 0.9412017167381974, "grad_norm": 0.466796875, "learning_rate": 4.678801197504069e-06, "loss": 2.2478, "step": 17544 }, { "epoch": 0.941255364806867, "grad_norm": 0.4609375, "learning_rate": 4.678758595513462e-06, "loss": 2.249, "step": 17545 }, { "epoch": 0.9413090128755365, "grad_norm": 0.474609375, "learning_rate": 4.678715990891778e-06, "loss": 2.1262, "step": 17546 }, { "epoch": 0.941362660944206, "grad_norm": 0.5859375, "learning_rate": 4.678673383639069e-06, "loss": 2.5117, "step": 17547 }, { "epoch": 0.9414163090128755, "grad_norm": 0.4765625, "learning_rate": 4.678630773755384e-06, "loss": 2.3561, "step": 17548 }, { "epoch": 0.941469957081545, "grad_norm": 0.466796875, "learning_rate": 4.678588161240778e-06, "loss": 2.2165, "step": 17549 }, { "epoch": 0.9415236051502146, "grad_norm": 0.51953125, "learning_rate": 4.6785455460953e-06, "loss": 2.3094, "step": 17550 }, { "epoch": 0.9415772532188841, "grad_norm": 0.470703125, "learning_rate": 4.6785029283190026e-06, "loss": 2.3956, "step": 17551 }, { "epoch": 0.9416309012875537, "grad_norm": 0.39453125, "learning_rate": 4.678460307911937e-06, "loss": 2.2286, "step": 17552 }, { "epoch": 0.9416845493562231, "grad_norm": 0.470703125, "learning_rate": 4.678417684874155e-06, "loss": 2.412, "step": 17553 }, { "epoch": 0.9417381974248927, "grad_norm": 0.4375, "learning_rate": 4.678375059205706e-06, "loss": 2.4189, "step": 17554 }, { "epoch": 0.9417918454935622, "grad_norm": 0.5546875, "learning_rate": 4.6783324309066444e-06, "loss": 2.2537, "step": 17555 }, { "epoch": 0.9418454935622318, "grad_norm": 0.48046875, "learning_rate": 4.678289799977021e-06, "loss": 2.1249, "step": 17556 }, { "epoch": 0.9418991416309013, "grad_norm": 0.86328125, "learning_rate": 4.678247166416885e-06, "loss": 2.3183, "step": 17557 }, { "epoch": 0.9419527896995709, "grad_norm": 0.5546875, "learning_rate": 4.6782045302262904e-06, "loss": 2.2015, "step": 17558 }, { "epoch": 0.9420064377682403, "grad_norm": 0.484375, "learning_rate": 4.678161891405288e-06, "loss": 2.161, "step": 17559 }, { "epoch": 0.9420600858369099, "grad_norm": 0.4296875, "learning_rate": 4.6781192499539285e-06, "loss": 2.244, "step": 17560 }, { "epoch": 0.9421137339055794, "grad_norm": 0.48828125, "learning_rate": 4.678076605872265e-06, "loss": 2.4145, "step": 17561 }, { "epoch": 0.9421673819742489, "grad_norm": 0.49609375, "learning_rate": 4.6780339591603476e-06, "loss": 2.1738, "step": 17562 }, { "epoch": 0.9422210300429185, "grad_norm": 0.482421875, "learning_rate": 4.6779913098182275e-06, "loss": 2.1413, "step": 17563 }, { "epoch": 0.9422746781115879, "grad_norm": 0.546875, "learning_rate": 4.6779486578459576e-06, "loss": 2.3309, "step": 17564 }, { "epoch": 0.9423283261802575, "grad_norm": 0.61328125, "learning_rate": 4.677906003243589e-06, "loss": 2.2436, "step": 17565 }, { "epoch": 0.942381974248927, "grad_norm": 0.498046875, "learning_rate": 4.677863346011173e-06, "loss": 2.1568, "step": 17566 }, { "epoch": 0.9424356223175966, "grad_norm": 0.416015625, "learning_rate": 4.67782068614876e-06, "loss": 2.2249, "step": 17567 }, { "epoch": 0.9424892703862661, "grad_norm": 0.447265625, "learning_rate": 4.677778023656404e-06, "loss": 2.1654, "step": 17568 }, { "epoch": 0.9425429184549357, "grad_norm": 0.5234375, "learning_rate": 4.677735358534155e-06, "loss": 1.6549, "step": 17569 }, { "epoch": 0.9425965665236051, "grad_norm": 0.51171875, "learning_rate": 4.677692690782063e-06, "loss": 2.3495, "step": 17570 }, { "epoch": 0.9426502145922747, "grad_norm": 0.443359375, "learning_rate": 4.677650020400182e-06, "loss": 2.1646, "step": 17571 }, { "epoch": 0.9427038626609442, "grad_norm": 0.50390625, "learning_rate": 4.677607347388563e-06, "loss": 2.2599, "step": 17572 }, { "epoch": 0.9427575107296138, "grad_norm": 0.58203125, "learning_rate": 4.677564671747257e-06, "loss": 2.3875, "step": 17573 }, { "epoch": 0.9428111587982833, "grad_norm": 0.6015625, "learning_rate": 4.6775219934763164e-06, "loss": 2.2923, "step": 17574 }, { "epoch": 0.9428648068669528, "grad_norm": 0.48046875, "learning_rate": 4.677479312575791e-06, "loss": 2.1187, "step": 17575 }, { "epoch": 0.9429184549356223, "grad_norm": 0.58203125, "learning_rate": 4.677436629045734e-06, "loss": 2.1622, "step": 17576 }, { "epoch": 0.9429721030042918, "grad_norm": 0.421875, "learning_rate": 4.677393942886196e-06, "loss": 1.9146, "step": 17577 }, { "epoch": 0.9430257510729614, "grad_norm": 0.67578125, "learning_rate": 4.67735125409723e-06, "loss": 1.9976, "step": 17578 }, { "epoch": 0.9430793991416309, "grad_norm": 0.42578125, "learning_rate": 4.677308562678885e-06, "loss": 2.2989, "step": 17579 }, { "epoch": 0.9431330472103004, "grad_norm": 0.578125, "learning_rate": 4.677265868631215e-06, "loss": 2.1893, "step": 17580 }, { "epoch": 0.9431866952789699, "grad_norm": 0.50390625, "learning_rate": 4.67722317195427e-06, "loss": 2.0241, "step": 17581 }, { "epoch": 0.9432403433476395, "grad_norm": 0.451171875, "learning_rate": 4.677180472648103e-06, "loss": 2.186, "step": 17582 }, { "epoch": 0.943293991416309, "grad_norm": 0.427734375, "learning_rate": 4.677137770712764e-06, "loss": 2.2781, "step": 17583 }, { "epoch": 0.9433476394849786, "grad_norm": 0.458984375, "learning_rate": 4.677095066148305e-06, "loss": 2.4137, "step": 17584 }, { "epoch": 0.943401287553648, "grad_norm": 0.396484375, "learning_rate": 4.677052358954778e-06, "loss": 2.0561, "step": 17585 }, { "epoch": 0.9434549356223176, "grad_norm": 0.373046875, "learning_rate": 4.6770096491322346e-06, "loss": 2.0955, "step": 17586 }, { "epoch": 0.9435085836909871, "grad_norm": 0.482421875, "learning_rate": 4.676966936680726e-06, "loss": 2.3536, "step": 17587 }, { "epoch": 0.9435622317596567, "grad_norm": 0.453125, "learning_rate": 4.676924221600304e-06, "loss": 2.0219, "step": 17588 }, { "epoch": 0.9436158798283262, "grad_norm": 0.625, "learning_rate": 4.676881503891021e-06, "loss": 2.4451, "step": 17589 }, { "epoch": 0.9436695278969958, "grad_norm": 0.48828125, "learning_rate": 4.6768387835529275e-06, "loss": 2.5062, "step": 17590 }, { "epoch": 0.9437231759656652, "grad_norm": 0.7109375, "learning_rate": 4.676796060586074e-06, "loss": 2.3574, "step": 17591 }, { "epoch": 0.9437768240343347, "grad_norm": 0.47265625, "learning_rate": 4.676753334990515e-06, "loss": 2.379, "step": 17592 }, { "epoch": 0.9438304721030043, "grad_norm": 0.60546875, "learning_rate": 4.676710606766299e-06, "loss": 1.7875, "step": 17593 }, { "epoch": 0.9438841201716738, "grad_norm": 0.48828125, "learning_rate": 4.67666787591348e-06, "loss": 2.3558, "step": 17594 }, { "epoch": 0.9439377682403434, "grad_norm": 0.447265625, "learning_rate": 4.676625142432108e-06, "loss": 2.2594, "step": 17595 }, { "epoch": 0.9439914163090128, "grad_norm": 0.515625, "learning_rate": 4.676582406322236e-06, "loss": 2.371, "step": 17596 }, { "epoch": 0.9440450643776824, "grad_norm": 0.470703125, "learning_rate": 4.676539667583916e-06, "loss": 2.1469, "step": 17597 }, { "epoch": 0.9440987124463519, "grad_norm": 0.453125, "learning_rate": 4.676496926217197e-06, "loss": 2.32, "step": 17598 }, { "epoch": 0.9441523605150215, "grad_norm": 0.498046875, "learning_rate": 4.676454182222133e-06, "loss": 2.4927, "step": 17599 }, { "epoch": 0.944206008583691, "grad_norm": 0.44140625, "learning_rate": 4.676411435598774e-06, "loss": 2.1153, "step": 17600 }, { "epoch": 0.9442596566523606, "grad_norm": 0.4375, "learning_rate": 4.6763686863471725e-06, "loss": 2.1279, "step": 17601 }, { "epoch": 0.94431330472103, "grad_norm": 0.53125, "learning_rate": 4.676325934467381e-06, "loss": 2.5651, "step": 17602 }, { "epoch": 0.9443669527896996, "grad_norm": 0.478515625, "learning_rate": 4.676283179959449e-06, "loss": 2.5047, "step": 17603 }, { "epoch": 0.9444206008583691, "grad_norm": 0.419921875, "learning_rate": 4.67624042282343e-06, "loss": 2.0441, "step": 17604 }, { "epoch": 0.9444742489270386, "grad_norm": 0.6484375, "learning_rate": 4.676197663059374e-06, "loss": 2.1627, "step": 17605 }, { "epoch": 0.9445278969957082, "grad_norm": 0.74609375, "learning_rate": 4.676154900667335e-06, "loss": 2.4068, "step": 17606 }, { "epoch": 0.9445815450643776, "grad_norm": 0.53515625, "learning_rate": 4.6761121356473615e-06, "loss": 2.1501, "step": 17607 }, { "epoch": 0.9446351931330472, "grad_norm": 1.1953125, "learning_rate": 4.6760693679995086e-06, "loss": 2.417, "step": 17608 }, { "epoch": 0.9446888412017167, "grad_norm": 0.462890625, "learning_rate": 4.676026597723825e-06, "loss": 2.3736, "step": 17609 }, { "epoch": 0.9447424892703863, "grad_norm": 0.66796875, "learning_rate": 4.675983824820364e-06, "loss": 2.169, "step": 17610 }, { "epoch": 0.9447961373390558, "grad_norm": 0.494140625, "learning_rate": 4.675941049289176e-06, "loss": 2.5438, "step": 17611 }, { "epoch": 0.9448497854077254, "grad_norm": 0.53125, "learning_rate": 4.675898271130313e-06, "loss": 2.2648, "step": 17612 }, { "epoch": 0.9449034334763948, "grad_norm": 0.51953125, "learning_rate": 4.675855490343829e-06, "loss": 2.3863, "step": 17613 }, { "epoch": 0.9449570815450644, "grad_norm": 0.546875, "learning_rate": 4.675812706929773e-06, "loss": 1.9642, "step": 17614 }, { "epoch": 0.9450107296137339, "grad_norm": 0.41796875, "learning_rate": 4.6757699208881966e-06, "loss": 2.1284, "step": 17615 }, { "epoch": 0.9450643776824035, "grad_norm": 0.470703125, "learning_rate": 4.675727132219153e-06, "loss": 2.2819, "step": 17616 }, { "epoch": 0.945118025751073, "grad_norm": 0.4609375, "learning_rate": 4.675684340922694e-06, "loss": 2.2514, "step": 17617 }, { "epoch": 0.9451716738197425, "grad_norm": 0.47265625, "learning_rate": 4.675641546998869e-06, "loss": 2.6714, "step": 17618 }, { "epoch": 0.945225321888412, "grad_norm": 0.66015625, "learning_rate": 4.675598750447732e-06, "loss": 1.5334, "step": 17619 }, { "epoch": 0.9452789699570815, "grad_norm": 0.5390625, "learning_rate": 4.675555951269333e-06, "loss": 2.4488, "step": 17620 }, { "epoch": 0.9453326180257511, "grad_norm": 0.82421875, "learning_rate": 4.675513149463725e-06, "loss": 2.2437, "step": 17621 }, { "epoch": 0.9453862660944206, "grad_norm": 0.61328125, "learning_rate": 4.675470345030959e-06, "loss": 1.5038, "step": 17622 }, { "epoch": 0.9454399141630901, "grad_norm": 0.515625, "learning_rate": 4.675427537971087e-06, "loss": 2.0865, "step": 17623 }, { "epoch": 0.9454935622317596, "grad_norm": 0.41796875, "learning_rate": 4.675384728284161e-06, "loss": 2.4615, "step": 17624 }, { "epoch": 0.9455472103004292, "grad_norm": 0.49609375, "learning_rate": 4.675341915970232e-06, "loss": 2.3302, "step": 17625 }, { "epoch": 0.9456008583690987, "grad_norm": 0.43359375, "learning_rate": 4.675299101029351e-06, "loss": 2.3182, "step": 17626 }, { "epoch": 0.9456545064377683, "grad_norm": 0.4765625, "learning_rate": 4.675256283461572e-06, "loss": 2.3121, "step": 17627 }, { "epoch": 0.9457081545064377, "grad_norm": 0.484375, "learning_rate": 4.675213463266944e-06, "loss": 2.4289, "step": 17628 }, { "epoch": 0.9457618025751073, "grad_norm": 0.46875, "learning_rate": 4.675170640445521e-06, "loss": 2.349, "step": 17629 }, { "epoch": 0.9458154506437768, "grad_norm": 0.453125, "learning_rate": 4.675127814997353e-06, "loss": 2.2948, "step": 17630 }, { "epoch": 0.9458690987124464, "grad_norm": 0.427734375, "learning_rate": 4.675084986922493e-06, "loss": 2.2045, "step": 17631 }, { "epoch": 0.9459227467811159, "grad_norm": 0.625, "learning_rate": 4.675042156220993e-06, "loss": 2.3161, "step": 17632 }, { "epoch": 0.9459763948497855, "grad_norm": 1.0625, "learning_rate": 4.674999322892903e-06, "loss": 2.0993, "step": 17633 }, { "epoch": 0.9460300429184549, "grad_norm": 0.466796875, "learning_rate": 4.674956486938276e-06, "loss": 2.4357, "step": 17634 }, { "epoch": 0.9460836909871244, "grad_norm": 0.9296875, "learning_rate": 4.674913648357164e-06, "loss": 2.4085, "step": 17635 }, { "epoch": 0.946137339055794, "grad_norm": 0.427734375, "learning_rate": 4.674870807149617e-06, "loss": 2.2776, "step": 17636 }, { "epoch": 0.9461909871244635, "grad_norm": 0.494140625, "learning_rate": 4.674827963315688e-06, "loss": 2.3961, "step": 17637 }, { "epoch": 0.9462446351931331, "grad_norm": 0.478515625, "learning_rate": 4.674785116855429e-06, "loss": 2.336, "step": 17638 }, { "epoch": 0.9462982832618025, "grad_norm": 0.52734375, "learning_rate": 4.674742267768891e-06, "loss": 2.4184, "step": 17639 }, { "epoch": 0.9463519313304721, "grad_norm": 0.56640625, "learning_rate": 4.674699416056127e-06, "loss": 1.6024, "step": 17640 }, { "epoch": 0.9464055793991416, "grad_norm": 0.453125, "learning_rate": 4.674656561717187e-06, "loss": 2.076, "step": 17641 }, { "epoch": 0.9464592274678112, "grad_norm": 0.453125, "learning_rate": 4.674613704752124e-06, "loss": 2.3244, "step": 17642 }, { "epoch": 0.9465128755364807, "grad_norm": 0.455078125, "learning_rate": 4.674570845160988e-06, "loss": 2.4787, "step": 17643 }, { "epoch": 0.9465665236051503, "grad_norm": 0.9609375, "learning_rate": 4.6745279829438335e-06, "loss": 2.5088, "step": 17644 }, { "epoch": 0.9466201716738197, "grad_norm": 0.43359375, "learning_rate": 4.674485118100711e-06, "loss": 2.1777, "step": 17645 }, { "epoch": 0.9466738197424893, "grad_norm": 0.52734375, "learning_rate": 4.674442250631671e-06, "loss": 2.2226, "step": 17646 }, { "epoch": 0.9467274678111588, "grad_norm": 0.62109375, "learning_rate": 4.674399380536767e-06, "loss": 2.2295, "step": 17647 }, { "epoch": 0.9467811158798283, "grad_norm": 0.462890625, "learning_rate": 4.6743565078160506e-06, "loss": 2.1733, "step": 17648 }, { "epoch": 0.9468347639484979, "grad_norm": 0.423828125, "learning_rate": 4.674313632469573e-06, "loss": 2.4258, "step": 17649 }, { "epoch": 0.9468884120171673, "grad_norm": 0.482421875, "learning_rate": 4.674270754497385e-06, "loss": 2.3242, "step": 17650 }, { "epoch": 0.9469420600858369, "grad_norm": 0.60546875, "learning_rate": 4.674227873899541e-06, "loss": 2.5546, "step": 17651 }, { "epoch": 0.9469957081545064, "grad_norm": 0.46875, "learning_rate": 4.674184990676091e-06, "loss": 2.1586, "step": 17652 }, { "epoch": 0.947049356223176, "grad_norm": 0.416015625, "learning_rate": 4.674142104827086e-06, "loss": 2.2933, "step": 17653 }, { "epoch": 0.9471030042918455, "grad_norm": 0.482421875, "learning_rate": 4.67409921635258e-06, "loss": 2.3316, "step": 17654 }, { "epoch": 0.947156652360515, "grad_norm": 0.447265625, "learning_rate": 4.674056325252623e-06, "loss": 2.2557, "step": 17655 }, { "epoch": 0.9472103004291845, "grad_norm": 0.494140625, "learning_rate": 4.674013431527268e-06, "loss": 2.3362, "step": 17656 }, { "epoch": 0.9472639484978541, "grad_norm": 0.5234375, "learning_rate": 4.673970535176565e-06, "loss": 2.272, "step": 17657 }, { "epoch": 0.9473175965665236, "grad_norm": 0.498046875, "learning_rate": 4.673927636200568e-06, "loss": 2.2075, "step": 17658 }, { "epoch": 0.9473712446351932, "grad_norm": 0.482421875, "learning_rate": 4.673884734599328e-06, "loss": 2.2975, "step": 17659 }, { "epoch": 0.9474248927038627, "grad_norm": 0.60546875, "learning_rate": 4.6738418303728965e-06, "loss": 2.4723, "step": 17660 }, { "epoch": 0.9474785407725322, "grad_norm": 0.6015625, "learning_rate": 4.6737989235213255e-06, "loss": 2.4783, "step": 17661 }, { "epoch": 0.9475321888412017, "grad_norm": 0.474609375, "learning_rate": 4.673756014044666e-06, "loss": 2.4154, "step": 17662 }, { "epoch": 0.9475858369098712, "grad_norm": 0.58203125, "learning_rate": 4.673713101942972e-06, "loss": 1.569, "step": 17663 }, { "epoch": 0.9476394849785408, "grad_norm": 0.44921875, "learning_rate": 4.673670187216293e-06, "loss": 2.4146, "step": 17664 }, { "epoch": 0.9476931330472103, "grad_norm": 0.390625, "learning_rate": 4.673627269864681e-06, "loss": 2.2973, "step": 17665 }, { "epoch": 0.9477467811158798, "grad_norm": 0.462890625, "learning_rate": 4.67358434988819e-06, "loss": 2.1561, "step": 17666 }, { "epoch": 0.9478004291845493, "grad_norm": 0.53125, "learning_rate": 4.6735414272868696e-06, "loss": 2.2162, "step": 17667 }, { "epoch": 0.9478540772532189, "grad_norm": 0.51171875, "learning_rate": 4.673498502060773e-06, "loss": 2.1689, "step": 17668 }, { "epoch": 0.9479077253218884, "grad_norm": 0.482421875, "learning_rate": 4.673455574209951e-06, "loss": 2.0554, "step": 17669 }, { "epoch": 0.947961373390558, "grad_norm": 0.4140625, "learning_rate": 4.673412643734455e-06, "loss": 2.4154, "step": 17670 }, { "epoch": 0.9480150214592274, "grad_norm": 0.5234375, "learning_rate": 4.673369710634339e-06, "loss": 2.3805, "step": 17671 }, { "epoch": 0.948068669527897, "grad_norm": 0.74609375, "learning_rate": 4.673326774909653e-06, "loss": 2.3269, "step": 17672 }, { "epoch": 0.9481223175965665, "grad_norm": 0.4609375, "learning_rate": 4.67328383656045e-06, "loss": 2.4638, "step": 17673 }, { "epoch": 0.9481759656652361, "grad_norm": 0.515625, "learning_rate": 4.6732408955867814e-06, "loss": 2.3006, "step": 17674 }, { "epoch": 0.9482296137339056, "grad_norm": 0.396484375, "learning_rate": 4.673197951988699e-06, "loss": 2.4958, "step": 17675 }, { "epoch": 0.9482832618025752, "grad_norm": 0.62890625, "learning_rate": 4.673155005766254e-06, "loss": 2.3594, "step": 17676 }, { "epoch": 0.9483369098712446, "grad_norm": 0.50390625, "learning_rate": 4.673112056919499e-06, "loss": 2.2661, "step": 17677 }, { "epoch": 0.9483905579399141, "grad_norm": 0.443359375, "learning_rate": 4.6730691054484856e-06, "loss": 2.0571, "step": 17678 }, { "epoch": 0.9484442060085837, "grad_norm": 0.4609375, "learning_rate": 4.673026151353266e-06, "loss": 2.1984, "step": 17679 }, { "epoch": 0.9484978540772532, "grad_norm": 0.48046875, "learning_rate": 4.672983194633892e-06, "loss": 2.3811, "step": 17680 }, { "epoch": 0.9485515021459228, "grad_norm": 0.439453125, "learning_rate": 4.6729402352904155e-06, "loss": 2.3689, "step": 17681 }, { "epoch": 0.9486051502145922, "grad_norm": 0.43359375, "learning_rate": 4.672897273322888e-06, "loss": 2.0243, "step": 17682 }, { "epoch": 0.9486587982832618, "grad_norm": 0.4453125, "learning_rate": 4.672854308731361e-06, "loss": 2.3173, "step": 17683 }, { "epoch": 0.9487124463519313, "grad_norm": 0.423828125, "learning_rate": 4.672811341515888e-06, "loss": 1.8696, "step": 17684 }, { "epoch": 0.9487660944206009, "grad_norm": 0.466796875, "learning_rate": 4.672768371676519e-06, "loss": 2.4498, "step": 17685 }, { "epoch": 0.9488197424892704, "grad_norm": 0.58984375, "learning_rate": 4.672725399213307e-06, "loss": 2.2172, "step": 17686 }, { "epoch": 0.94887339055794, "grad_norm": 0.41015625, "learning_rate": 4.672682424126304e-06, "loss": 2.4583, "step": 17687 }, { "epoch": 0.9489270386266094, "grad_norm": 0.44140625, "learning_rate": 4.672639446415561e-06, "loss": 1.9716, "step": 17688 }, { "epoch": 0.948980686695279, "grad_norm": 0.443359375, "learning_rate": 4.672596466081132e-06, "loss": 2.3287, "step": 17689 }, { "epoch": 0.9490343347639485, "grad_norm": 0.431640625, "learning_rate": 4.672553483123065e-06, "loss": 2.1995, "step": 17690 }, { "epoch": 0.949087982832618, "grad_norm": 0.443359375, "learning_rate": 4.672510497541416e-06, "loss": 2.4459, "step": 17691 }, { "epoch": 0.9491416309012876, "grad_norm": 0.48046875, "learning_rate": 4.672467509336235e-06, "loss": 2.3475, "step": 17692 }, { "epoch": 0.949195278969957, "grad_norm": 0.345703125, "learning_rate": 4.672424518507574e-06, "loss": 2.2736, "step": 17693 }, { "epoch": 0.9492489270386266, "grad_norm": 0.51953125, "learning_rate": 4.672381525055485e-06, "loss": 2.4275, "step": 17694 }, { "epoch": 0.9493025751072961, "grad_norm": 0.46875, "learning_rate": 4.67233852898002e-06, "loss": 2.1459, "step": 17695 }, { "epoch": 0.9493562231759657, "grad_norm": 0.48828125, "learning_rate": 4.67229553028123e-06, "loss": 2.3832, "step": 17696 }, { "epoch": 0.9494098712446352, "grad_norm": 0.41796875, "learning_rate": 4.672252528959169e-06, "loss": 2.245, "step": 17697 }, { "epoch": 0.9494635193133047, "grad_norm": 0.462890625, "learning_rate": 4.672209525013887e-06, "loss": 2.3327, "step": 17698 }, { "epoch": 0.9495171673819742, "grad_norm": 0.59765625, "learning_rate": 4.672166518445437e-06, "loss": 2.3333, "step": 17699 }, { "epoch": 0.9495708154506438, "grad_norm": 0.51953125, "learning_rate": 4.6721235092538695e-06, "loss": 2.4332, "step": 17700 }, { "epoch": 0.9496244635193133, "grad_norm": 0.453125, "learning_rate": 4.672080497439239e-06, "loss": 2.3138, "step": 17701 }, { "epoch": 0.9496781115879829, "grad_norm": 0.7421875, "learning_rate": 4.672037483001595e-06, "loss": 2.0316, "step": 17702 }, { "epoch": 0.9497317596566524, "grad_norm": 0.56640625, "learning_rate": 4.67199446594099e-06, "loss": 2.5245, "step": 17703 }, { "epoch": 0.9497854077253219, "grad_norm": 0.44140625, "learning_rate": 4.6719514462574774e-06, "loss": 2.2531, "step": 17704 }, { "epoch": 0.9498390557939914, "grad_norm": 0.44140625, "learning_rate": 4.671908423951108e-06, "loss": 2.2538, "step": 17705 }, { "epoch": 0.9498927038626609, "grad_norm": 0.46875, "learning_rate": 4.6718653990219336e-06, "loss": 2.3439, "step": 17706 }, { "epoch": 0.9499463519313305, "grad_norm": 0.443359375, "learning_rate": 4.6718223714700066e-06, "loss": 2.2391, "step": 17707 }, { "epoch": 0.95, "grad_norm": 0.609375, "learning_rate": 4.671779341295378e-06, "loss": 2.3037, "step": 17708 }, { "epoch": 0.9500536480686695, "grad_norm": 0.48828125, "learning_rate": 4.671736308498101e-06, "loss": 2.3707, "step": 17709 }, { "epoch": 0.950107296137339, "grad_norm": 0.4921875, "learning_rate": 4.671693273078227e-06, "loss": 2.3577, "step": 17710 }, { "epoch": 0.9501609442060086, "grad_norm": 0.43359375, "learning_rate": 4.671650235035808e-06, "loss": 2.1663, "step": 17711 }, { "epoch": 0.9502145922746781, "grad_norm": 2.5625, "learning_rate": 4.671607194370896e-06, "loss": 2.4535, "step": 17712 }, { "epoch": 0.9502682403433477, "grad_norm": 0.515625, "learning_rate": 4.671564151083544e-06, "loss": 2.232, "step": 17713 }, { "epoch": 0.9503218884120171, "grad_norm": 0.4609375, "learning_rate": 4.671521105173802e-06, "loss": 2.3944, "step": 17714 }, { "epoch": 0.9503755364806867, "grad_norm": 0.51171875, "learning_rate": 4.671478056641723e-06, "loss": 2.4427, "step": 17715 }, { "epoch": 0.9504291845493562, "grad_norm": 0.361328125, "learning_rate": 4.6714350054873596e-06, "loss": 2.147, "step": 17716 }, { "epoch": 0.9504828326180258, "grad_norm": 0.451171875, "learning_rate": 4.671391951710763e-06, "loss": 2.5017, "step": 17717 }, { "epoch": 0.9505364806866953, "grad_norm": 0.447265625, "learning_rate": 4.671348895311985e-06, "loss": 2.3607, "step": 17718 }, { "epoch": 0.9505901287553649, "grad_norm": 0.6171875, "learning_rate": 4.671305836291078e-06, "loss": 2.2515, "step": 17719 }, { "epoch": 0.9506437768240343, "grad_norm": 0.484375, "learning_rate": 4.671262774648094e-06, "loss": 2.4014, "step": 17720 }, { "epoch": 0.9506974248927038, "grad_norm": 0.46484375, "learning_rate": 4.671219710383085e-06, "loss": 2.2972, "step": 17721 }, { "epoch": 0.9507510729613734, "grad_norm": 0.466796875, "learning_rate": 4.6711766434961025e-06, "loss": 2.2917, "step": 17722 }, { "epoch": 0.9508047210300429, "grad_norm": 0.431640625, "learning_rate": 4.6711335739872e-06, "loss": 2.3161, "step": 17723 }, { "epoch": 0.9508583690987125, "grad_norm": 0.61328125, "learning_rate": 4.671090501856427e-06, "loss": 2.2815, "step": 17724 }, { "epoch": 0.9509120171673819, "grad_norm": 0.498046875, "learning_rate": 4.671047427103838e-06, "loss": 2.8428, "step": 17725 }, { "epoch": 0.9509656652360515, "grad_norm": 0.494140625, "learning_rate": 4.671004349729484e-06, "loss": 2.2197, "step": 17726 }, { "epoch": 0.951019313304721, "grad_norm": 0.466796875, "learning_rate": 4.670961269733416e-06, "loss": 2.2837, "step": 17727 }, { "epoch": 0.9510729613733906, "grad_norm": 0.48828125, "learning_rate": 4.670918187115688e-06, "loss": 2.2087, "step": 17728 }, { "epoch": 0.9511266094420601, "grad_norm": 0.482421875, "learning_rate": 4.670875101876351e-06, "loss": 2.2309, "step": 17729 }, { "epoch": 0.9511802575107297, "grad_norm": 0.5078125, "learning_rate": 4.670832014015456e-06, "loss": 2.3335, "step": 17730 }, { "epoch": 0.9512339055793991, "grad_norm": 0.498046875, "learning_rate": 4.670788923533058e-06, "loss": 2.0628, "step": 17731 }, { "epoch": 0.9512875536480687, "grad_norm": 0.60546875, "learning_rate": 4.670745830429206e-06, "loss": 2.3384, "step": 17732 }, { "epoch": 0.9513412017167382, "grad_norm": 0.4375, "learning_rate": 4.670702734703954e-06, "loss": 2.277, "step": 17733 }, { "epoch": 0.9513948497854077, "grad_norm": 0.58984375, "learning_rate": 4.670659636357352e-06, "loss": 2.4134, "step": 17734 }, { "epoch": 0.9514484978540773, "grad_norm": 0.435546875, "learning_rate": 4.670616535389454e-06, "loss": 2.3504, "step": 17735 }, { "epoch": 0.9515021459227467, "grad_norm": 0.365234375, "learning_rate": 4.670573431800311e-06, "loss": 2.2333, "step": 17736 }, { "epoch": 0.9515557939914163, "grad_norm": 0.498046875, "learning_rate": 4.670530325589976e-06, "loss": 2.4197, "step": 17737 }, { "epoch": 0.9516094420600858, "grad_norm": 0.6328125, "learning_rate": 4.6704872167585e-06, "loss": 2.4402, "step": 17738 }, { "epoch": 0.9516630901287554, "grad_norm": 0.44921875, "learning_rate": 4.670444105305936e-06, "loss": 2.4907, "step": 17739 }, { "epoch": 0.9517167381974249, "grad_norm": 0.451171875, "learning_rate": 4.6704009912323345e-06, "loss": 2.2876, "step": 17740 }, { "epoch": 0.9517703862660944, "grad_norm": 0.474609375, "learning_rate": 4.6703578745377495e-06, "loss": 2.0598, "step": 17741 }, { "epoch": 0.9518240343347639, "grad_norm": 0.45703125, "learning_rate": 4.670314755222232e-06, "loss": 2.2764, "step": 17742 }, { "epoch": 0.9518776824034335, "grad_norm": 0.498046875, "learning_rate": 4.6702716332858346e-06, "loss": 2.1527, "step": 17743 }, { "epoch": 0.951931330472103, "grad_norm": 0.49609375, "learning_rate": 4.670228508728608e-06, "loss": 2.0266, "step": 17744 }, { "epoch": 0.9519849785407726, "grad_norm": 0.43359375, "learning_rate": 4.670185381550606e-06, "loss": 2.1893, "step": 17745 }, { "epoch": 0.952038626609442, "grad_norm": 0.453125, "learning_rate": 4.67014225175188e-06, "loss": 2.2744, "step": 17746 }, { "epoch": 0.9520922746781116, "grad_norm": 0.5, "learning_rate": 4.670099119332482e-06, "loss": 2.26, "step": 17747 }, { "epoch": 0.9521459227467811, "grad_norm": 0.474609375, "learning_rate": 4.670055984292464e-06, "loss": 2.4299, "step": 17748 }, { "epoch": 0.9521995708154506, "grad_norm": 0.50390625, "learning_rate": 4.6700128466318785e-06, "loss": 2.3774, "step": 17749 }, { "epoch": 0.9522532188841202, "grad_norm": 0.498046875, "learning_rate": 4.669969706350778e-06, "loss": 2.397, "step": 17750 }, { "epoch": 0.9523068669527897, "grad_norm": 0.64453125, "learning_rate": 4.669926563449213e-06, "loss": 1.2123, "step": 17751 }, { "epoch": 0.9523605150214592, "grad_norm": 0.91015625, "learning_rate": 4.669883417927237e-06, "loss": 2.2807, "step": 17752 }, { "epoch": 0.9524141630901287, "grad_norm": 0.5390625, "learning_rate": 4.669840269784901e-06, "loss": 2.3976, "step": 17753 }, { "epoch": 0.9524678111587983, "grad_norm": 0.439453125, "learning_rate": 4.669797119022259e-06, "loss": 2.0201, "step": 17754 }, { "epoch": 0.9525214592274678, "grad_norm": 0.5, "learning_rate": 4.669753965639361e-06, "loss": 2.3915, "step": 17755 }, { "epoch": 0.9525751072961374, "grad_norm": 0.486328125, "learning_rate": 4.669710809636261e-06, "loss": 2.5378, "step": 17756 }, { "epoch": 0.9526287553648068, "grad_norm": 0.88671875, "learning_rate": 4.669667651013009e-06, "loss": 2.363, "step": 17757 }, { "epoch": 0.9526824034334764, "grad_norm": 0.5, "learning_rate": 4.669624489769658e-06, "loss": 2.2271, "step": 17758 }, { "epoch": 0.9527360515021459, "grad_norm": 0.55859375, "learning_rate": 4.669581325906261e-06, "loss": 2.3797, "step": 17759 }, { "epoch": 0.9527896995708155, "grad_norm": 0.486328125, "learning_rate": 4.669538159422869e-06, "loss": 2.3051, "step": 17760 }, { "epoch": 0.952843347639485, "grad_norm": 0.421875, "learning_rate": 4.669494990319535e-06, "loss": 2.0563, "step": 17761 }, { "epoch": 0.9528969957081546, "grad_norm": 0.404296875, "learning_rate": 4.66945181859631e-06, "loss": 2.1109, "step": 17762 }, { "epoch": 0.952950643776824, "grad_norm": 0.486328125, "learning_rate": 4.669408644253249e-06, "loss": 2.4816, "step": 17763 }, { "epoch": 0.9530042918454935, "grad_norm": 0.46875, "learning_rate": 4.669365467290399e-06, "loss": 2.2783, "step": 17764 }, { "epoch": 0.9530579399141631, "grad_norm": 0.515625, "learning_rate": 4.669322287707817e-06, "loss": 2.4452, "step": 17765 }, { "epoch": 0.9531115879828326, "grad_norm": 0.50390625, "learning_rate": 4.669279105505553e-06, "loss": 2.469, "step": 17766 }, { "epoch": 0.9531652360515022, "grad_norm": 0.380859375, "learning_rate": 4.669235920683659e-06, "loss": 1.8192, "step": 17767 }, { "epoch": 0.9532188841201716, "grad_norm": 0.388671875, "learning_rate": 4.669192733242188e-06, "loss": 2.0291, "step": 17768 }, { "epoch": 0.9532725321888412, "grad_norm": 0.84765625, "learning_rate": 4.669149543181191e-06, "loss": 2.2618, "step": 17769 }, { "epoch": 0.9533261802575107, "grad_norm": 0.43359375, "learning_rate": 4.669106350500722e-06, "loss": 2.2735, "step": 17770 }, { "epoch": 0.9533798283261803, "grad_norm": 0.47265625, "learning_rate": 4.669063155200832e-06, "loss": 2.6165, "step": 17771 }, { "epoch": 0.9534334763948498, "grad_norm": 0.5, "learning_rate": 4.669019957281572e-06, "loss": 2.3344, "step": 17772 }, { "epoch": 0.9534871244635194, "grad_norm": 0.4609375, "learning_rate": 4.668976756742996e-06, "loss": 2.3425, "step": 17773 }, { "epoch": 0.9535407725321888, "grad_norm": 0.62109375, "learning_rate": 4.668933553585155e-06, "loss": 2.2297, "step": 17774 }, { "epoch": 0.9535944206008584, "grad_norm": 0.466796875, "learning_rate": 4.668890347808103e-06, "loss": 2.3055, "step": 17775 }, { "epoch": 0.9536480686695279, "grad_norm": 0.375, "learning_rate": 4.668847139411889e-06, "loss": 2.2759, "step": 17776 }, { "epoch": 0.9537017167381975, "grad_norm": 0.5, "learning_rate": 4.668803928396569e-06, "loss": 2.3727, "step": 17777 }, { "epoch": 0.953755364806867, "grad_norm": 0.453125, "learning_rate": 4.668760714762193e-06, "loss": 2.1305, "step": 17778 }, { "epoch": 0.9538090128755364, "grad_norm": 0.63671875, "learning_rate": 4.668717498508812e-06, "loss": 2.4295, "step": 17779 }, { "epoch": 0.953862660944206, "grad_norm": 0.384765625, "learning_rate": 4.668674279636481e-06, "loss": 2.1053, "step": 17780 }, { "epoch": 0.9539163090128755, "grad_norm": 0.63671875, "learning_rate": 4.66863105814525e-06, "loss": 2.1757, "step": 17781 }, { "epoch": 0.9539699570815451, "grad_norm": 0.4765625, "learning_rate": 4.668587834035172e-06, "loss": 2.004, "step": 17782 }, { "epoch": 0.9540236051502146, "grad_norm": 0.51171875, "learning_rate": 4.6685446073063e-06, "loss": 2.2865, "step": 17783 }, { "epoch": 0.9540772532188841, "grad_norm": 0.4375, "learning_rate": 4.668501377958685e-06, "loss": 2.2347, "step": 17784 }, { "epoch": 0.9541309012875536, "grad_norm": 0.490234375, "learning_rate": 4.668458145992379e-06, "loss": 2.3226, "step": 17785 }, { "epoch": 0.9541845493562232, "grad_norm": 0.48046875, "learning_rate": 4.668414911407436e-06, "loss": 2.1917, "step": 17786 }, { "epoch": 0.9542381974248927, "grad_norm": 0.48828125, "learning_rate": 4.668371674203907e-06, "loss": 2.176, "step": 17787 }, { "epoch": 0.9542918454935623, "grad_norm": 0.453125, "learning_rate": 4.668328434381844e-06, "loss": 2.3225, "step": 17788 }, { "epoch": 0.9543454935622318, "grad_norm": 0.48046875, "learning_rate": 4.668285191941298e-06, "loss": 2.2614, "step": 17789 }, { "epoch": 0.9543991416309013, "grad_norm": 0.62109375, "learning_rate": 4.6682419468823245e-06, "loss": 2.2065, "step": 17790 }, { "epoch": 0.9544527896995708, "grad_norm": 0.41015625, "learning_rate": 4.668198699204974e-06, "loss": 2.0477, "step": 17791 }, { "epoch": 0.9545064377682403, "grad_norm": 0.462890625, "learning_rate": 4.6681554489092964e-06, "loss": 2.2284, "step": 17792 }, { "epoch": 0.9545600858369099, "grad_norm": 0.5234375, "learning_rate": 4.668112195995348e-06, "loss": 2.2762, "step": 17793 }, { "epoch": 0.9546137339055794, "grad_norm": 0.5, "learning_rate": 4.668068940463179e-06, "loss": 2.3979, "step": 17794 }, { "epoch": 0.9546673819742489, "grad_norm": 0.470703125, "learning_rate": 4.668025682312841e-06, "loss": 2.4425, "step": 17795 }, { "epoch": 0.9547210300429184, "grad_norm": 0.5859375, "learning_rate": 4.667982421544388e-06, "loss": 1.9595, "step": 17796 }, { "epoch": 0.954774678111588, "grad_norm": 0.361328125, "learning_rate": 4.667939158157871e-06, "loss": 2.0722, "step": 17797 }, { "epoch": 0.9548283261802575, "grad_norm": 0.59765625, "learning_rate": 4.667895892153342e-06, "loss": 2.2138, "step": 17798 }, { "epoch": 0.9548819742489271, "grad_norm": 0.43359375, "learning_rate": 4.667852623530855e-06, "loss": 2.3499, "step": 17799 }, { "epoch": 0.9549356223175965, "grad_norm": 0.443359375, "learning_rate": 4.66780935229046e-06, "loss": 2.1612, "step": 17800 }, { "epoch": 0.9549892703862661, "grad_norm": 0.490234375, "learning_rate": 4.667766078432211e-06, "loss": 2.3547, "step": 17801 }, { "epoch": 0.9550429184549356, "grad_norm": 1.015625, "learning_rate": 4.667722801956159e-06, "loss": 2.2162, "step": 17802 }, { "epoch": 0.9550965665236052, "grad_norm": 0.48828125, "learning_rate": 4.667679522862357e-06, "loss": 2.2287, "step": 17803 }, { "epoch": 0.9551502145922747, "grad_norm": 0.421875, "learning_rate": 4.667636241150857e-06, "loss": 2.1874, "step": 17804 }, { "epoch": 0.9552038626609443, "grad_norm": 0.39453125, "learning_rate": 4.667592956821711e-06, "loss": 2.1661, "step": 17805 }, { "epoch": 0.9552575107296137, "grad_norm": 0.69140625, "learning_rate": 4.667549669874973e-06, "loss": 2.4888, "step": 17806 }, { "epoch": 0.9553111587982832, "grad_norm": 0.478515625, "learning_rate": 4.667506380310692e-06, "loss": 2.5963, "step": 17807 }, { "epoch": 0.9553648068669528, "grad_norm": 0.5, "learning_rate": 4.667463088128923e-06, "loss": 2.2806, "step": 17808 }, { "epoch": 0.9554184549356223, "grad_norm": 0.46484375, "learning_rate": 4.667419793329718e-06, "loss": 2.4153, "step": 17809 }, { "epoch": 0.9554721030042919, "grad_norm": 0.47265625, "learning_rate": 4.667376495913128e-06, "loss": 2.3438, "step": 17810 }, { "epoch": 0.9555257510729613, "grad_norm": 0.5546875, "learning_rate": 4.667333195879207e-06, "loss": 2.4212, "step": 17811 }, { "epoch": 0.9555793991416309, "grad_norm": 0.427734375, "learning_rate": 4.667289893228005e-06, "loss": 2.1834, "step": 17812 }, { "epoch": 0.9556330472103004, "grad_norm": 0.4609375, "learning_rate": 4.667246587959577e-06, "loss": 2.2095, "step": 17813 }, { "epoch": 0.95568669527897, "grad_norm": 0.51171875, "learning_rate": 4.667203280073973e-06, "loss": 2.3813, "step": 17814 }, { "epoch": 0.9557403433476395, "grad_norm": 0.47265625, "learning_rate": 4.6671599695712466e-06, "loss": 2.0616, "step": 17815 }, { "epoch": 0.955793991416309, "grad_norm": 0.53515625, "learning_rate": 4.667116656451449e-06, "loss": 2.0706, "step": 17816 }, { "epoch": 0.9558476394849785, "grad_norm": 0.69140625, "learning_rate": 4.667073340714634e-06, "loss": 2.1913, "step": 17817 }, { "epoch": 0.9559012875536481, "grad_norm": 0.6953125, "learning_rate": 4.667030022360853e-06, "loss": 2.3662, "step": 17818 }, { "epoch": 0.9559549356223176, "grad_norm": 0.416015625, "learning_rate": 4.666986701390158e-06, "loss": 2.2546, "step": 17819 }, { "epoch": 0.9560085836909872, "grad_norm": 0.462890625, "learning_rate": 4.666943377802602e-06, "loss": 2.4478, "step": 17820 }, { "epoch": 0.9560622317596567, "grad_norm": 0.48828125, "learning_rate": 4.666900051598237e-06, "loss": 2.3468, "step": 17821 }, { "epoch": 0.9561158798283261, "grad_norm": 0.498046875, "learning_rate": 4.666856722777116e-06, "loss": 2.346, "step": 17822 }, { "epoch": 0.9561695278969957, "grad_norm": 0.5078125, "learning_rate": 4.666813391339289e-06, "loss": 1.7465, "step": 17823 }, { "epoch": 0.9562231759656652, "grad_norm": 0.427734375, "learning_rate": 4.6667700572848115e-06, "loss": 2.4238, "step": 17824 }, { "epoch": 0.9562768240343348, "grad_norm": 0.50390625, "learning_rate": 4.666726720613734e-06, "loss": 2.448, "step": 17825 }, { "epoch": 0.9563304721030043, "grad_norm": 0.3828125, "learning_rate": 4.666683381326109e-06, "loss": 2.0573, "step": 17826 }, { "epoch": 0.9563841201716738, "grad_norm": 0.4453125, "learning_rate": 4.666640039421989e-06, "loss": 2.1582, "step": 17827 }, { "epoch": 0.9564377682403433, "grad_norm": 0.453125, "learning_rate": 4.6665966949014264e-06, "loss": 2.2258, "step": 17828 }, { "epoch": 0.9564914163090129, "grad_norm": 0.66015625, "learning_rate": 4.666553347764474e-06, "loss": 2.4949, "step": 17829 }, { "epoch": 0.9565450643776824, "grad_norm": 0.4609375, "learning_rate": 4.6665099980111836e-06, "loss": 2.2261, "step": 17830 }, { "epoch": 0.956598712446352, "grad_norm": 0.52734375, "learning_rate": 4.666466645641607e-06, "loss": 2.3395, "step": 17831 }, { "epoch": 0.9566523605150214, "grad_norm": 0.546875, "learning_rate": 4.666423290655798e-06, "loss": 2.2474, "step": 17832 }, { "epoch": 0.956706008583691, "grad_norm": 0.48046875, "learning_rate": 4.666379933053808e-06, "loss": 2.1317, "step": 17833 }, { "epoch": 0.9567596566523605, "grad_norm": 0.5078125, "learning_rate": 4.666336572835688e-06, "loss": 2.3581, "step": 17834 }, { "epoch": 0.95681330472103, "grad_norm": 4.90625, "learning_rate": 4.666293210001494e-06, "loss": 2.5209, "step": 17835 }, { "epoch": 0.9568669527896996, "grad_norm": 0.51171875, "learning_rate": 4.666249844551275e-06, "loss": 2.2747, "step": 17836 }, { "epoch": 0.956920600858369, "grad_norm": 0.8046875, "learning_rate": 4.6662064764850844e-06, "loss": 2.3146, "step": 17837 }, { "epoch": 0.9569742489270386, "grad_norm": 0.390625, "learning_rate": 4.666163105802974e-06, "loss": 1.8342, "step": 17838 }, { "epoch": 0.9570278969957081, "grad_norm": 0.56640625, "learning_rate": 4.666119732504999e-06, "loss": 2.6517, "step": 17839 }, { "epoch": 0.9570815450643777, "grad_norm": 0.47265625, "learning_rate": 4.666076356591208e-06, "loss": 2.3902, "step": 17840 }, { "epoch": 0.9571351931330472, "grad_norm": 0.412109375, "learning_rate": 4.666032978061656e-06, "loss": 2.1366, "step": 17841 }, { "epoch": 0.9571888412017168, "grad_norm": 0.412109375, "learning_rate": 4.665989596916395e-06, "loss": 2.3877, "step": 17842 }, { "epoch": 0.9572424892703862, "grad_norm": 0.640625, "learning_rate": 4.665946213155476e-06, "loss": 2.481, "step": 17843 }, { "epoch": 0.9572961373390558, "grad_norm": 0.4453125, "learning_rate": 4.665902826778952e-06, "loss": 2.1459, "step": 17844 }, { "epoch": 0.9573497854077253, "grad_norm": 0.435546875, "learning_rate": 4.665859437786876e-06, "loss": 2.2057, "step": 17845 }, { "epoch": 0.9574034334763949, "grad_norm": 0.5234375, "learning_rate": 4.665816046179299e-06, "loss": 2.3876, "step": 17846 }, { "epoch": 0.9574570815450644, "grad_norm": 0.36328125, "learning_rate": 4.665772651956276e-06, "loss": 2.0955, "step": 17847 }, { "epoch": 0.957510729613734, "grad_norm": 0.703125, "learning_rate": 4.665729255117857e-06, "loss": 2.1213, "step": 17848 }, { "epoch": 0.9575643776824034, "grad_norm": 0.5703125, "learning_rate": 4.665685855664095e-06, "loss": 2.3521, "step": 17849 }, { "epoch": 0.9576180257510729, "grad_norm": 0.57421875, "learning_rate": 4.665642453595043e-06, "loss": 2.1939, "step": 17850 }, { "epoch": 0.9576716738197425, "grad_norm": 0.455078125, "learning_rate": 4.665599048910753e-06, "loss": 2.4719, "step": 17851 }, { "epoch": 0.957725321888412, "grad_norm": 0.43359375, "learning_rate": 4.665555641611278e-06, "loss": 2.2006, "step": 17852 }, { "epoch": 0.9577789699570816, "grad_norm": 0.478515625, "learning_rate": 4.66551223169667e-06, "loss": 2.1455, "step": 17853 }, { "epoch": 0.957832618025751, "grad_norm": 0.5234375, "learning_rate": 4.665468819166981e-06, "loss": 2.2386, "step": 17854 }, { "epoch": 0.9578862660944206, "grad_norm": 0.453125, "learning_rate": 4.665425404022262e-06, "loss": 2.2293, "step": 17855 }, { "epoch": 0.9579399141630901, "grad_norm": 0.408203125, "learning_rate": 4.665381986262569e-06, "loss": 2.0173, "step": 17856 }, { "epoch": 0.9579935622317597, "grad_norm": 0.455078125, "learning_rate": 4.665338565887952e-06, "loss": 2.1544, "step": 17857 }, { "epoch": 0.9580472103004292, "grad_norm": 0.498046875, "learning_rate": 4.665295142898465e-06, "loss": 2.3091, "step": 17858 }, { "epoch": 0.9581008583690988, "grad_norm": 0.4921875, "learning_rate": 4.6652517172941586e-06, "loss": 2.1978, "step": 17859 }, { "epoch": 0.9581545064377682, "grad_norm": 0.91796875, "learning_rate": 4.665208289075086e-06, "loss": 2.3814, "step": 17860 }, { "epoch": 0.9582081545064378, "grad_norm": 0.54296875, "learning_rate": 4.6651648582413e-06, "loss": 2.506, "step": 17861 }, { "epoch": 0.9582618025751073, "grad_norm": 0.4765625, "learning_rate": 4.6651214247928536e-06, "loss": 2.5139, "step": 17862 }, { "epoch": 0.9583154506437769, "grad_norm": 0.484375, "learning_rate": 4.665077988729797e-06, "loss": 2.171, "step": 17863 }, { "epoch": 0.9583690987124464, "grad_norm": 0.48828125, "learning_rate": 4.665034550052185e-06, "loss": 2.3052, "step": 17864 }, { "epoch": 0.9584227467811158, "grad_norm": 1.1640625, "learning_rate": 4.6649911087600695e-06, "loss": 2.5694, "step": 17865 }, { "epoch": 0.9584763948497854, "grad_norm": 0.58203125, "learning_rate": 4.664947664853502e-06, "loss": 2.2674, "step": 17866 }, { "epoch": 0.9585300429184549, "grad_norm": 0.4609375, "learning_rate": 4.664904218332536e-06, "loss": 2.0701, "step": 17867 }, { "epoch": 0.9585836909871245, "grad_norm": 0.48046875, "learning_rate": 4.664860769197223e-06, "loss": 2.2611, "step": 17868 }, { "epoch": 0.958637339055794, "grad_norm": 0.5, "learning_rate": 4.664817317447616e-06, "loss": 2.2912, "step": 17869 }, { "epoch": 0.9586909871244635, "grad_norm": 0.376953125, "learning_rate": 4.664773863083768e-06, "loss": 2.2924, "step": 17870 }, { "epoch": 0.958744635193133, "grad_norm": 0.5234375, "learning_rate": 4.664730406105731e-06, "loss": 2.3563, "step": 17871 }, { "epoch": 0.9587982832618026, "grad_norm": 0.5078125, "learning_rate": 4.664686946513557e-06, "loss": 2.4982, "step": 17872 }, { "epoch": 0.9588519313304721, "grad_norm": 0.4765625, "learning_rate": 4.6646434843072984e-06, "loss": 2.3215, "step": 17873 }, { "epoch": 0.9589055793991417, "grad_norm": 0.41015625, "learning_rate": 4.66460001948701e-06, "loss": 2.2173, "step": 17874 }, { "epoch": 0.9589592274678111, "grad_norm": 0.53515625, "learning_rate": 4.664556552052741e-06, "loss": 2.2479, "step": 17875 }, { "epoch": 0.9590128755364807, "grad_norm": 0.4140625, "learning_rate": 4.664513082004546e-06, "loss": 2.2588, "step": 17876 }, { "epoch": 0.9590665236051502, "grad_norm": 0.478515625, "learning_rate": 4.664469609342476e-06, "loss": 2.1312, "step": 17877 }, { "epoch": 0.9591201716738197, "grad_norm": 0.4765625, "learning_rate": 4.664426134066585e-06, "loss": 2.3766, "step": 17878 }, { "epoch": 0.9591738197424893, "grad_norm": 0.458984375, "learning_rate": 4.664382656176926e-06, "loss": 2.315, "step": 17879 }, { "epoch": 0.9592274678111588, "grad_norm": 0.47265625, "learning_rate": 4.664339175673549e-06, "loss": 2.3653, "step": 17880 }, { "epoch": 0.9592811158798283, "grad_norm": 0.5, "learning_rate": 4.664295692556509e-06, "loss": 2.2742, "step": 17881 }, { "epoch": 0.9593347639484978, "grad_norm": 0.427734375, "learning_rate": 4.6642522068258555e-06, "loss": 2.1996, "step": 17882 }, { "epoch": 0.9593884120171674, "grad_norm": 0.419921875, "learning_rate": 4.664208718481644e-06, "loss": 2.3342, "step": 17883 }, { "epoch": 0.9594420600858369, "grad_norm": 0.5625, "learning_rate": 4.664165227523927e-06, "loss": 2.5298, "step": 17884 }, { "epoch": 0.9594957081545065, "grad_norm": 0.55078125, "learning_rate": 4.664121733952754e-06, "loss": 2.4906, "step": 17885 }, { "epoch": 0.9595493562231759, "grad_norm": 0.400390625, "learning_rate": 4.6640782377681815e-06, "loss": 2.0652, "step": 17886 }, { "epoch": 0.9596030042918455, "grad_norm": 0.341796875, "learning_rate": 4.664034738970259e-06, "loss": 2.055, "step": 17887 }, { "epoch": 0.959656652360515, "grad_norm": 0.451171875, "learning_rate": 4.66399123755904e-06, "loss": 2.3918, "step": 17888 }, { "epoch": 0.9597103004291846, "grad_norm": 0.421875, "learning_rate": 4.663947733534577e-06, "loss": 2.3045, "step": 17889 }, { "epoch": 0.9597639484978541, "grad_norm": 0.46484375, "learning_rate": 4.6639042268969226e-06, "loss": 2.0995, "step": 17890 }, { "epoch": 0.9598175965665237, "grad_norm": 0.58984375, "learning_rate": 4.6638607176461295e-06, "loss": 1.9653, "step": 17891 }, { "epoch": 0.9598712446351931, "grad_norm": 0.48828125, "learning_rate": 4.66381720578225e-06, "loss": 2.095, "step": 17892 }, { "epoch": 0.9599248927038626, "grad_norm": 0.4765625, "learning_rate": 4.663773691305336e-06, "loss": 2.1157, "step": 17893 }, { "epoch": 0.9599785407725322, "grad_norm": 0.453125, "learning_rate": 4.663730174215443e-06, "loss": 2.1883, "step": 17894 }, { "epoch": 0.9600321888412017, "grad_norm": 0.412109375, "learning_rate": 4.663686654512619e-06, "loss": 2.0253, "step": 17895 }, { "epoch": 0.9600858369098713, "grad_norm": 0.458984375, "learning_rate": 4.66364313219692e-06, "loss": 2.3904, "step": 17896 }, { "epoch": 0.9601394849785407, "grad_norm": 0.5, "learning_rate": 4.663599607268397e-06, "loss": 2.2621, "step": 17897 }, { "epoch": 0.9601931330472103, "grad_norm": 0.5625, "learning_rate": 4.663556079727104e-06, "loss": 2.8706, "step": 17898 }, { "epoch": 0.9602467811158798, "grad_norm": 0.5234375, "learning_rate": 4.6635125495730915e-06, "loss": 2.4845, "step": 17899 }, { "epoch": 0.9603004291845494, "grad_norm": 0.40234375, "learning_rate": 4.663469016806413e-06, "loss": 2.3991, "step": 17900 }, { "epoch": 0.9603540772532189, "grad_norm": 0.46875, "learning_rate": 4.663425481427122e-06, "loss": 2.6408, "step": 17901 }, { "epoch": 0.9604077253218885, "grad_norm": 0.51171875, "learning_rate": 4.66338194343527e-06, "loss": 2.6081, "step": 17902 }, { "epoch": 0.9604613733905579, "grad_norm": 0.49609375, "learning_rate": 4.66333840283091e-06, "loss": 2.2986, "step": 17903 }, { "epoch": 0.9605150214592275, "grad_norm": 0.58203125, "learning_rate": 4.663294859614095e-06, "loss": 2.3463, "step": 17904 }, { "epoch": 0.960568669527897, "grad_norm": 0.47265625, "learning_rate": 4.663251313784876e-06, "loss": 2.2568, "step": 17905 }, { "epoch": 0.9606223175965666, "grad_norm": 0.4765625, "learning_rate": 4.6632077653433065e-06, "loss": 2.5032, "step": 17906 }, { "epoch": 0.960675965665236, "grad_norm": 0.50390625, "learning_rate": 4.663164214289439e-06, "loss": 2.3227, "step": 17907 }, { "epoch": 0.9607296137339055, "grad_norm": 0.44921875, "learning_rate": 4.663120660623327e-06, "loss": 2.2312, "step": 17908 }, { "epoch": 0.9607832618025751, "grad_norm": 0.55859375, "learning_rate": 4.663077104345023e-06, "loss": 2.4935, "step": 17909 }, { "epoch": 0.9608369098712446, "grad_norm": 6.0625, "learning_rate": 4.663033545454578e-06, "loss": 2.5312, "step": 17910 }, { "epoch": 0.9608905579399142, "grad_norm": 0.55078125, "learning_rate": 4.662989983952045e-06, "loss": 1.9605, "step": 17911 }, { "epoch": 0.9609442060085837, "grad_norm": 0.447265625, "learning_rate": 4.662946419837478e-06, "loss": 2.3152, "step": 17912 }, { "epoch": 0.9609978540772532, "grad_norm": 0.44921875, "learning_rate": 4.662902853110929e-06, "loss": 2.3839, "step": 17913 }, { "epoch": 0.9610515021459227, "grad_norm": 0.55078125, "learning_rate": 4.6628592837724505e-06, "loss": 2.3359, "step": 17914 }, { "epoch": 0.9611051502145923, "grad_norm": 0.41015625, "learning_rate": 4.662815711822095e-06, "loss": 2.0931, "step": 17915 }, { "epoch": 0.9611587982832618, "grad_norm": 0.482421875, "learning_rate": 4.662772137259914e-06, "loss": 2.2148, "step": 17916 }, { "epoch": 0.9612124463519314, "grad_norm": 0.51171875, "learning_rate": 4.6627285600859625e-06, "loss": 2.2154, "step": 17917 }, { "epoch": 0.9612660944206008, "grad_norm": 0.44140625, "learning_rate": 4.662684980300291e-06, "loss": 2.2285, "step": 17918 }, { "epoch": 0.9613197424892704, "grad_norm": 0.75, "learning_rate": 4.662641397902954e-06, "loss": 1.908, "step": 17919 }, { "epoch": 0.9613733905579399, "grad_norm": 0.54296875, "learning_rate": 4.662597812894003e-06, "loss": 2.303, "step": 17920 }, { "epoch": 0.9614270386266094, "grad_norm": 0.51953125, "learning_rate": 4.66255422527349e-06, "loss": 2.4434, "step": 17921 }, { "epoch": 0.961480686695279, "grad_norm": 0.478515625, "learning_rate": 4.6625106350414694e-06, "loss": 2.2799, "step": 17922 }, { "epoch": 0.9615343347639485, "grad_norm": 0.43359375, "learning_rate": 4.662467042197993e-06, "loss": 2.1446, "step": 17923 }, { "epoch": 0.961587982832618, "grad_norm": 0.451171875, "learning_rate": 4.6624234467431115e-06, "loss": 2.2036, "step": 17924 }, { "epoch": 0.9616416309012875, "grad_norm": 0.5390625, "learning_rate": 4.662379848676881e-06, "loss": 2.3422, "step": 17925 }, { "epoch": 0.9616952789699571, "grad_norm": 0.412109375, "learning_rate": 4.662336247999352e-06, "loss": 2.179, "step": 17926 }, { "epoch": 0.9617489270386266, "grad_norm": 0.462890625, "learning_rate": 4.662292644710578e-06, "loss": 2.3648, "step": 17927 }, { "epoch": 0.9618025751072962, "grad_norm": 0.484375, "learning_rate": 4.66224903881061e-06, "loss": 2.2384, "step": 17928 }, { "epoch": 0.9618562231759656, "grad_norm": 0.470703125, "learning_rate": 4.662205430299504e-06, "loss": 2.4247, "step": 17929 }, { "epoch": 0.9619098712446352, "grad_norm": 0.67578125, "learning_rate": 4.662161819177309e-06, "loss": 2.423, "step": 17930 }, { "epoch": 0.9619635193133047, "grad_norm": 0.5, "learning_rate": 4.6621182054440796e-06, "loss": 1.9977, "step": 17931 }, { "epoch": 0.9620171673819743, "grad_norm": 0.546875, "learning_rate": 4.662074589099868e-06, "loss": 2.0691, "step": 17932 }, { "epoch": 0.9620708154506438, "grad_norm": 0.52734375, "learning_rate": 4.662030970144727e-06, "loss": 2.1087, "step": 17933 }, { "epoch": 0.9621244635193134, "grad_norm": 0.55859375, "learning_rate": 4.6619873485787105e-06, "loss": 2.2873, "step": 17934 }, { "epoch": 0.9621781115879828, "grad_norm": 0.546875, "learning_rate": 4.661943724401868e-06, "loss": 2.3634, "step": 17935 }, { "epoch": 0.9622317596566523, "grad_norm": 0.37109375, "learning_rate": 4.661900097614256e-06, "loss": 2.0998, "step": 17936 }, { "epoch": 0.9622854077253219, "grad_norm": 0.55859375, "learning_rate": 4.661856468215924e-06, "loss": 1.7207, "step": 17937 }, { "epoch": 0.9623390557939914, "grad_norm": 0.43359375, "learning_rate": 4.661812836206927e-06, "loss": 2.2912, "step": 17938 }, { "epoch": 0.962392703862661, "grad_norm": 0.40234375, "learning_rate": 4.661769201587317e-06, "loss": 2.1708, "step": 17939 }, { "epoch": 0.9624463519313304, "grad_norm": 0.423828125, "learning_rate": 4.661725564357146e-06, "loss": 2.1242, "step": 17940 }, { "epoch": 0.9625, "grad_norm": 0.423828125, "learning_rate": 4.661681924516466e-06, "loss": 2.0708, "step": 17941 }, { "epoch": 0.9625536480686695, "grad_norm": 0.419921875, "learning_rate": 4.661638282065332e-06, "loss": 2.2372, "step": 17942 }, { "epoch": 0.9626072961373391, "grad_norm": 0.435546875, "learning_rate": 4.661594637003795e-06, "loss": 2.2896, "step": 17943 }, { "epoch": 0.9626609442060086, "grad_norm": 0.49609375, "learning_rate": 4.6615509893319085e-06, "loss": 2.4894, "step": 17944 }, { "epoch": 0.9627145922746781, "grad_norm": 0.435546875, "learning_rate": 4.661507339049725e-06, "loss": 1.7269, "step": 17945 }, { "epoch": 0.9627682403433476, "grad_norm": 0.4453125, "learning_rate": 4.661463686157297e-06, "loss": 2.3667, "step": 17946 }, { "epoch": 0.9628218884120172, "grad_norm": 0.453125, "learning_rate": 4.661420030654677e-06, "loss": 2.4267, "step": 17947 }, { "epoch": 0.9628755364806867, "grad_norm": 0.490234375, "learning_rate": 4.661376372541918e-06, "loss": 2.2758, "step": 17948 }, { "epoch": 0.9629291845493563, "grad_norm": 0.455078125, "learning_rate": 4.661332711819074e-06, "loss": 2.2129, "step": 17949 }, { "epoch": 0.9629828326180258, "grad_norm": 0.5078125, "learning_rate": 4.661289048486195e-06, "loss": 2.3918, "step": 17950 }, { "epoch": 0.9630364806866952, "grad_norm": 0.462890625, "learning_rate": 4.661245382543336e-06, "loss": 2.1831, "step": 17951 }, { "epoch": 0.9630901287553648, "grad_norm": 0.44921875, "learning_rate": 4.661201713990549e-06, "loss": 2.1745, "step": 17952 }, { "epoch": 0.9631437768240343, "grad_norm": 0.470703125, "learning_rate": 4.661158042827887e-06, "loss": 2.0545, "step": 17953 }, { "epoch": 0.9631974248927039, "grad_norm": 0.5078125, "learning_rate": 4.661114369055402e-06, "loss": 2.3423, "step": 17954 }, { "epoch": 0.9632510729613734, "grad_norm": 0.48046875, "learning_rate": 4.6610706926731465e-06, "loss": 2.2726, "step": 17955 }, { "epoch": 0.9633047210300429, "grad_norm": 0.4765625, "learning_rate": 4.661027013681175e-06, "loss": 2.0591, "step": 17956 }, { "epoch": 0.9633583690987124, "grad_norm": 0.47265625, "learning_rate": 4.66098333207954e-06, "loss": 2.2182, "step": 17957 }, { "epoch": 0.963412017167382, "grad_norm": 0.52734375, "learning_rate": 4.660939647868291e-06, "loss": 2.3336, "step": 17958 }, { "epoch": 0.9634656652360515, "grad_norm": 0.578125, "learning_rate": 4.6608959610474844e-06, "loss": 2.2744, "step": 17959 }, { "epoch": 0.9635193133047211, "grad_norm": 0.3984375, "learning_rate": 4.660852271617172e-06, "loss": 2.3341, "step": 17960 }, { "epoch": 0.9635729613733905, "grad_norm": 0.482421875, "learning_rate": 4.6608085795774065e-06, "loss": 2.2689, "step": 17961 }, { "epoch": 0.9636266094420601, "grad_norm": 0.443359375, "learning_rate": 4.66076488492824e-06, "loss": 2.4305, "step": 17962 }, { "epoch": 0.9636802575107296, "grad_norm": 0.4375, "learning_rate": 4.660721187669725e-06, "loss": 2.1985, "step": 17963 }, { "epoch": 0.9637339055793992, "grad_norm": 0.408203125, "learning_rate": 4.660677487801917e-06, "loss": 2.0304, "step": 17964 }, { "epoch": 0.9637875536480687, "grad_norm": 0.41015625, "learning_rate": 4.660633785324865e-06, "loss": 2.3126, "step": 17965 }, { "epoch": 0.9638412017167381, "grad_norm": 0.365234375, "learning_rate": 4.660590080238625e-06, "loss": 1.9451, "step": 17966 }, { "epoch": 0.9638948497854077, "grad_norm": 0.67578125, "learning_rate": 4.660546372543247e-06, "loss": 2.3733, "step": 17967 }, { "epoch": 0.9639484978540772, "grad_norm": 0.498046875, "learning_rate": 4.660502662238785e-06, "loss": 2.196, "step": 17968 }, { "epoch": 0.9640021459227468, "grad_norm": 0.46875, "learning_rate": 4.660458949325293e-06, "loss": 2.4398, "step": 17969 }, { "epoch": 0.9640557939914163, "grad_norm": 0.546875, "learning_rate": 4.660415233802822e-06, "loss": 1.3928, "step": 17970 }, { "epoch": 0.9641094420600859, "grad_norm": 0.51953125, "learning_rate": 4.660371515671426e-06, "loss": 2.1278, "step": 17971 }, { "epoch": 0.9641630901287553, "grad_norm": 0.7734375, "learning_rate": 4.660327794931157e-06, "loss": 2.3782, "step": 17972 }, { "epoch": 0.9642167381974249, "grad_norm": 0.49609375, "learning_rate": 4.660284071582067e-06, "loss": 2.2306, "step": 17973 }, { "epoch": 0.9642703862660944, "grad_norm": 0.4609375, "learning_rate": 4.660240345624211e-06, "loss": 2.3412, "step": 17974 }, { "epoch": 0.964324034334764, "grad_norm": 0.51171875, "learning_rate": 4.66019661705764e-06, "loss": 2.1774, "step": 17975 }, { "epoch": 0.9643776824034335, "grad_norm": 0.466796875, "learning_rate": 4.660152885882408e-06, "loss": 2.2601, "step": 17976 }, { "epoch": 0.964431330472103, "grad_norm": 0.515625, "learning_rate": 4.660109152098567e-06, "loss": 2.3757, "step": 17977 }, { "epoch": 0.9644849785407725, "grad_norm": 0.50390625, "learning_rate": 4.660065415706171e-06, "loss": 2.3939, "step": 17978 }, { "epoch": 0.964538626609442, "grad_norm": 0.494140625, "learning_rate": 4.660021676705271e-06, "loss": 2.4246, "step": 17979 }, { "epoch": 0.9645922746781116, "grad_norm": 0.4609375, "learning_rate": 4.659977935095921e-06, "loss": 2.362, "step": 17980 }, { "epoch": 0.9646459227467811, "grad_norm": 0.455078125, "learning_rate": 4.659934190878174e-06, "loss": 1.8477, "step": 17981 }, { "epoch": 0.9646995708154507, "grad_norm": 0.54296875, "learning_rate": 4.659890444052081e-06, "loss": 2.3595, "step": 17982 }, { "epoch": 0.9647532188841201, "grad_norm": 5.03125, "learning_rate": 4.659846694617697e-06, "loss": 2.2807, "step": 17983 }, { "epoch": 0.9648068669527897, "grad_norm": 0.43359375, "learning_rate": 4.659802942575075e-06, "loss": 2.2645, "step": 17984 }, { "epoch": 0.9648605150214592, "grad_norm": 0.48828125, "learning_rate": 4.6597591879242655e-06, "loss": 2.1295, "step": 17985 }, { "epoch": 0.9649141630901288, "grad_norm": 0.4765625, "learning_rate": 4.659715430665322e-06, "loss": 2.3274, "step": 17986 }, { "epoch": 0.9649678111587983, "grad_norm": 0.48046875, "learning_rate": 4.6596716707983e-06, "loss": 2.3047, "step": 17987 }, { "epoch": 0.9650214592274678, "grad_norm": 0.49609375, "learning_rate": 4.65962790832325e-06, "loss": 2.3755, "step": 17988 }, { "epoch": 0.9650751072961373, "grad_norm": 0.703125, "learning_rate": 4.6595841432402245e-06, "loss": 2.4964, "step": 17989 }, { "epoch": 0.9651287553648069, "grad_norm": 0.3984375, "learning_rate": 4.659540375549278e-06, "loss": 2.1963, "step": 17990 }, { "epoch": 0.9651824034334764, "grad_norm": 0.51953125, "learning_rate": 4.659496605250461e-06, "loss": 2.3464, "step": 17991 }, { "epoch": 0.965236051502146, "grad_norm": 0.58984375, "learning_rate": 4.659452832343829e-06, "loss": 2.4125, "step": 17992 }, { "epoch": 0.9652896995708155, "grad_norm": 0.4140625, "learning_rate": 4.659409056829434e-06, "loss": 2.3591, "step": 17993 }, { "epoch": 0.9653433476394849, "grad_norm": 0.52734375, "learning_rate": 4.659365278707327e-06, "loss": 2.3184, "step": 17994 }, { "epoch": 0.9653969957081545, "grad_norm": 0.470703125, "learning_rate": 4.659321497977563e-06, "loss": 2.3464, "step": 17995 }, { "epoch": 0.965450643776824, "grad_norm": 0.45703125, "learning_rate": 4.659277714640195e-06, "loss": 2.1157, "step": 17996 }, { "epoch": 0.9655042918454936, "grad_norm": 0.44921875, "learning_rate": 4.659233928695275e-06, "loss": 2.313, "step": 17997 }, { "epoch": 0.965557939914163, "grad_norm": 0.53125, "learning_rate": 4.659190140142855e-06, "loss": 2.2136, "step": 17998 }, { "epoch": 0.9656115879828326, "grad_norm": 0.50390625, "learning_rate": 4.65914634898299e-06, "loss": 2.2089, "step": 17999 }, { "epoch": 0.9656652360515021, "grad_norm": 0.458984375, "learning_rate": 4.659102555215732e-06, "loss": 2.1177, "step": 18000 }, { "epoch": 0.9657188841201717, "grad_norm": 0.51171875, "learning_rate": 4.659058758841133e-06, "loss": 2.153, "step": 18001 }, { "epoch": 0.9657725321888412, "grad_norm": 0.39453125, "learning_rate": 4.659014959859246e-06, "loss": 1.9536, "step": 18002 }, { "epoch": 0.9658261802575108, "grad_norm": 0.58984375, "learning_rate": 4.658971158270125e-06, "loss": 2.1033, "step": 18003 }, { "epoch": 0.9658798283261802, "grad_norm": 0.51953125, "learning_rate": 4.658927354073823e-06, "loss": 1.3716, "step": 18004 }, { "epoch": 0.9659334763948498, "grad_norm": 0.51953125, "learning_rate": 4.658883547270392e-06, "loss": 2.3048, "step": 18005 }, { "epoch": 0.9659871244635193, "grad_norm": 0.47265625, "learning_rate": 4.658839737859884e-06, "loss": 2.4142, "step": 18006 }, { "epoch": 0.9660407725321889, "grad_norm": 0.43359375, "learning_rate": 4.658795925842354e-06, "loss": 1.9933, "step": 18007 }, { "epoch": 0.9660944206008584, "grad_norm": 0.482421875, "learning_rate": 4.658752111217853e-06, "loss": 2.3474, "step": 18008 }, { "epoch": 0.9661480686695278, "grad_norm": 0.486328125, "learning_rate": 4.658708293986436e-06, "loss": 2.0425, "step": 18009 }, { "epoch": 0.9662017167381974, "grad_norm": 0.455078125, "learning_rate": 4.658664474148155e-06, "loss": 2.4052, "step": 18010 }, { "epoch": 0.9662553648068669, "grad_norm": 0.46484375, "learning_rate": 4.658620651703062e-06, "loss": 2.1841, "step": 18011 }, { "epoch": 0.9663090128755365, "grad_norm": 0.486328125, "learning_rate": 4.65857682665121e-06, "loss": 2.3241, "step": 18012 }, { "epoch": 0.966362660944206, "grad_norm": 0.546875, "learning_rate": 4.658532998992654e-06, "loss": 2.2221, "step": 18013 }, { "epoch": 0.9664163090128756, "grad_norm": 0.5625, "learning_rate": 4.658489168727445e-06, "loss": 2.4998, "step": 18014 }, { "epoch": 0.966469957081545, "grad_norm": 0.451171875, "learning_rate": 4.658445335855635e-06, "loss": 2.2404, "step": 18015 }, { "epoch": 0.9665236051502146, "grad_norm": 0.431640625, "learning_rate": 4.65840150037728e-06, "loss": 2.1405, "step": 18016 }, { "epoch": 0.9665772532188841, "grad_norm": 0.59375, "learning_rate": 4.658357662292431e-06, "loss": 1.8169, "step": 18017 }, { "epoch": 0.9666309012875537, "grad_norm": 0.5390625, "learning_rate": 4.65831382160114e-06, "loss": 1.6235, "step": 18018 }, { "epoch": 0.9666845493562232, "grad_norm": 0.455078125, "learning_rate": 4.658269978303462e-06, "loss": 2.3632, "step": 18019 }, { "epoch": 0.9667381974248928, "grad_norm": 0.4921875, "learning_rate": 4.658226132399449e-06, "loss": 2.6164, "step": 18020 }, { "epoch": 0.9667918454935622, "grad_norm": 0.56640625, "learning_rate": 4.658182283889154e-06, "loss": 2.4559, "step": 18021 }, { "epoch": 0.9668454935622317, "grad_norm": 0.439453125, "learning_rate": 4.658138432772631e-06, "loss": 2.1884, "step": 18022 }, { "epoch": 0.9668991416309013, "grad_norm": 0.58984375, "learning_rate": 4.658094579049931e-06, "loss": 1.9105, "step": 18023 }, { "epoch": 0.9669527896995708, "grad_norm": 0.5390625, "learning_rate": 4.658050722721108e-06, "loss": 2.3518, "step": 18024 }, { "epoch": 0.9670064377682404, "grad_norm": 0.66015625, "learning_rate": 4.6580068637862144e-06, "loss": 2.3164, "step": 18025 }, { "epoch": 0.9670600858369098, "grad_norm": 0.3828125, "learning_rate": 4.657963002245304e-06, "loss": 2.03, "step": 18026 }, { "epoch": 0.9671137339055794, "grad_norm": 0.515625, "learning_rate": 4.65791913809843e-06, "loss": 1.8207, "step": 18027 }, { "epoch": 0.9671673819742489, "grad_norm": 0.443359375, "learning_rate": 4.657875271345644e-06, "loss": 2.1337, "step": 18028 }, { "epoch": 0.9672210300429185, "grad_norm": 0.375, "learning_rate": 4.657831401987e-06, "loss": 2.2519, "step": 18029 }, { "epoch": 0.967274678111588, "grad_norm": 0.44140625, "learning_rate": 4.6577875300225505e-06, "loss": 2.3338, "step": 18030 }, { "epoch": 0.9673283261802575, "grad_norm": 0.466796875, "learning_rate": 4.657743655452348e-06, "loss": 2.1954, "step": 18031 }, { "epoch": 0.967381974248927, "grad_norm": 0.50390625, "learning_rate": 4.657699778276448e-06, "loss": 2.2925, "step": 18032 }, { "epoch": 0.9674356223175966, "grad_norm": 0.439453125, "learning_rate": 4.6576558984948995e-06, "loss": 2.3537, "step": 18033 }, { "epoch": 0.9674892703862661, "grad_norm": 0.390625, "learning_rate": 4.65761201610776e-06, "loss": 2.1996, "step": 18034 }, { "epoch": 0.9675429184549357, "grad_norm": 14.25, "learning_rate": 4.6575681311150785e-06, "loss": 2.4458, "step": 18035 }, { "epoch": 0.9675965665236052, "grad_norm": 0.50390625, "learning_rate": 4.65752424351691e-06, "loss": 2.3078, "step": 18036 }, { "epoch": 0.9676502145922746, "grad_norm": 0.4296875, "learning_rate": 4.657480353313307e-06, "loss": 2.3982, "step": 18037 }, { "epoch": 0.9677038626609442, "grad_norm": 0.396484375, "learning_rate": 4.657436460504323e-06, "loss": 2.2387, "step": 18038 }, { "epoch": 0.9677575107296137, "grad_norm": 0.51171875, "learning_rate": 4.657392565090011e-06, "loss": 2.3062, "step": 18039 }, { "epoch": 0.9678111587982833, "grad_norm": 0.7421875, "learning_rate": 4.657348667070423e-06, "loss": 2.2644, "step": 18040 }, { "epoch": 0.9678648068669528, "grad_norm": 0.458984375, "learning_rate": 4.657304766445613e-06, "loss": 2.3402, "step": 18041 }, { "epoch": 0.9679184549356223, "grad_norm": 0.490234375, "learning_rate": 4.657260863215633e-06, "loss": 2.2737, "step": 18042 }, { "epoch": 0.9679721030042918, "grad_norm": 0.412109375, "learning_rate": 4.657216957380537e-06, "loss": 2.06, "step": 18043 }, { "epoch": 0.9680257510729614, "grad_norm": 0.51171875, "learning_rate": 4.657173048940378e-06, "loss": 2.1611, "step": 18044 }, { "epoch": 0.9680793991416309, "grad_norm": 0.412109375, "learning_rate": 4.657129137895209e-06, "loss": 2.3196, "step": 18045 }, { "epoch": 0.9681330472103005, "grad_norm": 0.5, "learning_rate": 4.657085224245083e-06, "loss": 2.3179, "step": 18046 }, { "epoch": 0.9681866952789699, "grad_norm": 0.392578125, "learning_rate": 4.6570413079900516e-06, "loss": 2.0607, "step": 18047 }, { "epoch": 0.9682403433476395, "grad_norm": 0.5078125, "learning_rate": 4.65699738913017e-06, "loss": 1.838, "step": 18048 }, { "epoch": 0.968293991416309, "grad_norm": 0.408203125, "learning_rate": 4.6569534676654896e-06, "loss": 2.1502, "step": 18049 }, { "epoch": 0.9683476394849786, "grad_norm": 0.431640625, "learning_rate": 4.6569095435960645e-06, "loss": 2.22, "step": 18050 }, { "epoch": 0.9684012875536481, "grad_norm": 0.81640625, "learning_rate": 4.656865616921947e-06, "loss": 2.3097, "step": 18051 }, { "epoch": 0.9684549356223175, "grad_norm": 0.69140625, "learning_rate": 4.656821687643191e-06, "loss": 2.2386, "step": 18052 }, { "epoch": 0.9685085836909871, "grad_norm": 0.4765625, "learning_rate": 4.6567777557598495e-06, "loss": 2.2099, "step": 18053 }, { "epoch": 0.9685622317596566, "grad_norm": 0.52734375, "learning_rate": 4.656733821271973e-06, "loss": 2.6154, "step": 18054 }, { "epoch": 0.9686158798283262, "grad_norm": 0.474609375, "learning_rate": 4.656689884179619e-06, "loss": 2.3901, "step": 18055 }, { "epoch": 0.9686695278969957, "grad_norm": 0.390625, "learning_rate": 4.656645944482837e-06, "loss": 2.203, "step": 18056 }, { "epoch": 0.9687231759656653, "grad_norm": 0.435546875, "learning_rate": 4.656602002181682e-06, "loss": 2.2611, "step": 18057 }, { "epoch": 0.9687768240343347, "grad_norm": 0.431640625, "learning_rate": 4.656558057276206e-06, "loss": 2.4351, "step": 18058 }, { "epoch": 0.9688304721030043, "grad_norm": 0.46875, "learning_rate": 4.656514109766462e-06, "loss": 2.1938, "step": 18059 }, { "epoch": 0.9688841201716738, "grad_norm": 0.43359375, "learning_rate": 4.656470159652504e-06, "loss": 2.1494, "step": 18060 }, { "epoch": 0.9689377682403434, "grad_norm": 0.48828125, "learning_rate": 4.6564262069343845e-06, "loss": 2.3746, "step": 18061 }, { "epoch": 0.9689914163090129, "grad_norm": 0.57421875, "learning_rate": 4.656382251612157e-06, "loss": 2.4649, "step": 18062 }, { "epoch": 0.9690450643776825, "grad_norm": 0.3984375, "learning_rate": 4.656338293685873e-06, "loss": 2.182, "step": 18063 }, { "epoch": 0.9690987124463519, "grad_norm": 0.65234375, "learning_rate": 4.6562943331555875e-06, "loss": 2.4837, "step": 18064 }, { "epoch": 0.9691523605150214, "grad_norm": 0.482421875, "learning_rate": 4.656250370021353e-06, "loss": 2.3742, "step": 18065 }, { "epoch": 0.969206008583691, "grad_norm": 0.458984375, "learning_rate": 4.656206404283222e-06, "loss": 2.372, "step": 18066 }, { "epoch": 0.9692596566523605, "grad_norm": 0.59375, "learning_rate": 4.656162435941249e-06, "loss": 2.567, "step": 18067 }, { "epoch": 0.9693133047210301, "grad_norm": 0.4765625, "learning_rate": 4.656118464995486e-06, "loss": 2.0909, "step": 18068 }, { "epoch": 0.9693669527896995, "grad_norm": 0.56640625, "learning_rate": 4.656074491445986e-06, "loss": 2.3224, "step": 18069 }, { "epoch": 0.9694206008583691, "grad_norm": 0.53515625, "learning_rate": 4.656030515292801e-06, "loss": 2.1253, "step": 18070 }, { "epoch": 0.9694742489270386, "grad_norm": 0.443359375, "learning_rate": 4.655986536535987e-06, "loss": 2.243, "step": 18071 }, { "epoch": 0.9695278969957082, "grad_norm": 0.51953125, "learning_rate": 4.655942555175595e-06, "loss": 2.4214, "step": 18072 }, { "epoch": 0.9695815450643777, "grad_norm": 0.408203125, "learning_rate": 4.655898571211679e-06, "loss": 2.0153, "step": 18073 }, { "epoch": 0.9696351931330472, "grad_norm": 0.453125, "learning_rate": 4.655854584644292e-06, "loss": 2.4078, "step": 18074 }, { "epoch": 0.9696888412017167, "grad_norm": 0.3984375, "learning_rate": 4.655810595473486e-06, "loss": 2.0457, "step": 18075 }, { "epoch": 0.9697424892703863, "grad_norm": 0.52734375, "learning_rate": 4.655766603699317e-06, "loss": 2.404, "step": 18076 }, { "epoch": 0.9697961373390558, "grad_norm": 0.5703125, "learning_rate": 4.655722609321835e-06, "loss": 2.1763, "step": 18077 }, { "epoch": 0.9698497854077254, "grad_norm": 0.48046875, "learning_rate": 4.6556786123410935e-06, "loss": 2.4248, "step": 18078 }, { "epoch": 0.9699034334763948, "grad_norm": 0.51953125, "learning_rate": 4.655634612757147e-06, "loss": 2.5723, "step": 18079 }, { "epoch": 0.9699570815450643, "grad_norm": 0.5234375, "learning_rate": 4.6555906105700485e-06, "loss": 2.2587, "step": 18080 }, { "epoch": 0.9700107296137339, "grad_norm": 0.45703125, "learning_rate": 4.655546605779851e-06, "loss": 1.3247, "step": 18081 }, { "epoch": 0.9700643776824034, "grad_norm": 0.68359375, "learning_rate": 4.655502598386606e-06, "loss": 2.2216, "step": 18082 }, { "epoch": 0.970118025751073, "grad_norm": 0.412109375, "learning_rate": 4.655458588390369e-06, "loss": 2.1003, "step": 18083 }, { "epoch": 0.9701716738197425, "grad_norm": 0.44921875, "learning_rate": 4.655414575791192e-06, "loss": 2.2038, "step": 18084 }, { "epoch": 0.970225321888412, "grad_norm": 0.5078125, "learning_rate": 4.655370560589128e-06, "loss": 2.0069, "step": 18085 }, { "epoch": 0.9702789699570815, "grad_norm": 0.48046875, "learning_rate": 4.655326542784231e-06, "loss": 2.3204, "step": 18086 }, { "epoch": 0.9703326180257511, "grad_norm": 0.435546875, "learning_rate": 4.655282522376553e-06, "loss": 2.2134, "step": 18087 }, { "epoch": 0.9703862660944206, "grad_norm": 0.4296875, "learning_rate": 4.655238499366147e-06, "loss": 2.4361, "step": 18088 }, { "epoch": 0.9704399141630902, "grad_norm": 0.4921875, "learning_rate": 4.655194473753069e-06, "loss": 2.1808, "step": 18089 }, { "epoch": 0.9704935622317596, "grad_norm": 0.63671875, "learning_rate": 4.655150445537368e-06, "loss": 2.5085, "step": 18090 }, { "epoch": 0.9705472103004292, "grad_norm": 0.490234375, "learning_rate": 4.655106414719101e-06, "loss": 2.4444, "step": 18091 }, { "epoch": 0.9706008583690987, "grad_norm": 0.482421875, "learning_rate": 4.655062381298318e-06, "loss": 1.9547, "step": 18092 }, { "epoch": 0.9706545064377683, "grad_norm": 0.462890625, "learning_rate": 4.655018345275074e-06, "loss": 2.2122, "step": 18093 }, { "epoch": 0.9707081545064378, "grad_norm": 0.4765625, "learning_rate": 4.654974306649422e-06, "loss": 2.3438, "step": 18094 }, { "epoch": 0.9707618025751072, "grad_norm": 0.53515625, "learning_rate": 4.654930265421414e-06, "loss": 2.269, "step": 18095 }, { "epoch": 0.9708154506437768, "grad_norm": 0.423828125, "learning_rate": 4.654886221591106e-06, "loss": 2.1738, "step": 18096 }, { "epoch": 0.9708690987124463, "grad_norm": 0.482421875, "learning_rate": 4.6548421751585486e-06, "loss": 2.322, "step": 18097 }, { "epoch": 0.9709227467811159, "grad_norm": 0.478515625, "learning_rate": 4.654798126123795e-06, "loss": 1.9793, "step": 18098 }, { "epoch": 0.9709763948497854, "grad_norm": 0.515625, "learning_rate": 4.6547540744869e-06, "loss": 2.3975, "step": 18099 }, { "epoch": 0.971030042918455, "grad_norm": 0.41796875, "learning_rate": 4.654710020247915e-06, "loss": 2.2891, "step": 18100 }, { "epoch": 0.9710836909871244, "grad_norm": 0.470703125, "learning_rate": 4.654665963406895e-06, "loss": 2.1569, "step": 18101 }, { "epoch": 0.971137339055794, "grad_norm": 0.439453125, "learning_rate": 4.654621903963892e-06, "loss": 2.2407, "step": 18102 }, { "epoch": 0.9711909871244635, "grad_norm": 0.494140625, "learning_rate": 4.6545778419189594e-06, "loss": 2.3203, "step": 18103 }, { "epoch": 0.9712446351931331, "grad_norm": 0.69921875, "learning_rate": 4.65453377727215e-06, "loss": 2.0406, "step": 18104 }, { "epoch": 0.9712982832618026, "grad_norm": 0.52734375, "learning_rate": 4.654489710023517e-06, "loss": 2.1972, "step": 18105 }, { "epoch": 0.9713519313304722, "grad_norm": 0.53515625, "learning_rate": 4.654445640173116e-06, "loss": 2.4296, "step": 18106 }, { "epoch": 0.9714055793991416, "grad_norm": 0.46484375, "learning_rate": 4.654401567720997e-06, "loss": 1.663, "step": 18107 }, { "epoch": 0.9714592274678111, "grad_norm": 0.455078125, "learning_rate": 4.654357492667216e-06, "loss": 2.1008, "step": 18108 }, { "epoch": 0.9715128755364807, "grad_norm": 0.458984375, "learning_rate": 4.654313415011824e-06, "loss": 2.217, "step": 18109 }, { "epoch": 0.9715665236051502, "grad_norm": 0.423828125, "learning_rate": 4.654269334754875e-06, "loss": 1.6328, "step": 18110 }, { "epoch": 0.9716201716738198, "grad_norm": 0.494140625, "learning_rate": 4.654225251896422e-06, "loss": 2.3437, "step": 18111 }, { "epoch": 0.9716738197424892, "grad_norm": 0.4453125, "learning_rate": 4.654181166436519e-06, "loss": 2.1845, "step": 18112 }, { "epoch": 0.9717274678111588, "grad_norm": 0.44921875, "learning_rate": 4.654137078375218e-06, "loss": 2.4662, "step": 18113 }, { "epoch": 0.9717811158798283, "grad_norm": 0.66015625, "learning_rate": 4.654092987712574e-06, "loss": 2.4455, "step": 18114 }, { "epoch": 0.9718347639484979, "grad_norm": 0.51953125, "learning_rate": 4.654048894448639e-06, "loss": 2.0912, "step": 18115 }, { "epoch": 0.9718884120171674, "grad_norm": 0.46484375, "learning_rate": 4.6540047985834664e-06, "loss": 2.3746, "step": 18116 }, { "epoch": 0.971942060085837, "grad_norm": 0.671875, "learning_rate": 4.653960700117109e-06, "loss": 2.5161, "step": 18117 }, { "epoch": 0.9719957081545064, "grad_norm": 0.419921875, "learning_rate": 4.653916599049622e-06, "loss": 2.2236, "step": 18118 }, { "epoch": 0.972049356223176, "grad_norm": 0.470703125, "learning_rate": 4.653872495381056e-06, "loss": 2.2477, "step": 18119 }, { "epoch": 0.9721030042918455, "grad_norm": 0.458984375, "learning_rate": 4.653828389111465e-06, "loss": 2.1763, "step": 18120 }, { "epoch": 0.9721566523605151, "grad_norm": 0.400390625, "learning_rate": 4.653784280240904e-06, "loss": 2.0979, "step": 18121 }, { "epoch": 0.9722103004291845, "grad_norm": 0.6484375, "learning_rate": 4.653740168769424e-06, "loss": 2.3458, "step": 18122 }, { "epoch": 0.972263948497854, "grad_norm": 0.4921875, "learning_rate": 4.65369605469708e-06, "loss": 2.2088, "step": 18123 }, { "epoch": 0.9723175965665236, "grad_norm": 0.453125, "learning_rate": 4.653651938023924e-06, "loss": 2.2375, "step": 18124 }, { "epoch": 0.9723712446351931, "grad_norm": 0.478515625, "learning_rate": 4.65360781875001e-06, "loss": 2.1786, "step": 18125 }, { "epoch": 0.9724248927038627, "grad_norm": 0.5390625, "learning_rate": 4.653563696875392e-06, "loss": 2.4229, "step": 18126 }, { "epoch": 0.9724785407725322, "grad_norm": 0.56640625, "learning_rate": 4.653519572400121e-06, "loss": 2.2809, "step": 18127 }, { "epoch": 0.9725321888412017, "grad_norm": 0.51953125, "learning_rate": 4.653475445324254e-06, "loss": 2.2048, "step": 18128 }, { "epoch": 0.9725858369098712, "grad_norm": 0.53515625, "learning_rate": 4.653431315647839e-06, "loss": 2.6341, "step": 18129 }, { "epoch": 0.9726394849785408, "grad_norm": 0.515625, "learning_rate": 4.653387183370934e-06, "loss": 2.2509, "step": 18130 }, { "epoch": 0.9726931330472103, "grad_norm": 0.427734375, "learning_rate": 4.65334304849359e-06, "loss": 2.3241, "step": 18131 }, { "epoch": 0.9727467811158799, "grad_norm": 0.404296875, "learning_rate": 4.653298911015862e-06, "loss": 2.1099, "step": 18132 }, { "epoch": 0.9728004291845493, "grad_norm": 0.40625, "learning_rate": 4.653254770937801e-06, "loss": 1.9049, "step": 18133 }, { "epoch": 0.9728540772532189, "grad_norm": 0.423828125, "learning_rate": 4.653210628259462e-06, "loss": 2.1538, "step": 18134 }, { "epoch": 0.9729077253218884, "grad_norm": 0.46875, "learning_rate": 4.653166482980898e-06, "loss": 2.3054, "step": 18135 }, { "epoch": 0.972961373390558, "grad_norm": 0.443359375, "learning_rate": 4.653122335102161e-06, "loss": 1.981, "step": 18136 }, { "epoch": 0.9730150214592275, "grad_norm": 0.50390625, "learning_rate": 4.653078184623306e-06, "loss": 2.2661, "step": 18137 }, { "epoch": 0.973068669527897, "grad_norm": 0.46875, "learning_rate": 4.653034031544386e-06, "loss": 2.1664, "step": 18138 }, { "epoch": 0.9731223175965665, "grad_norm": 0.4375, "learning_rate": 4.652989875865454e-06, "loss": 2.1137, "step": 18139 }, { "epoch": 0.973175965665236, "grad_norm": 0.486328125, "learning_rate": 4.652945717586563e-06, "loss": 2.1091, "step": 18140 }, { "epoch": 0.9732296137339056, "grad_norm": 0.40234375, "learning_rate": 4.652901556707767e-06, "loss": 2.4271, "step": 18141 }, { "epoch": 0.9732832618025751, "grad_norm": 0.7421875, "learning_rate": 4.652857393229119e-06, "loss": 2.2855, "step": 18142 }, { "epoch": 0.9733369098712447, "grad_norm": 0.380859375, "learning_rate": 4.652813227150672e-06, "loss": 2.5566, "step": 18143 }, { "epoch": 0.9733905579399141, "grad_norm": 0.498046875, "learning_rate": 4.652769058472481e-06, "loss": 2.2366, "step": 18144 }, { "epoch": 0.9734442060085837, "grad_norm": 0.515625, "learning_rate": 4.652724887194596e-06, "loss": 2.2173, "step": 18145 }, { "epoch": 0.9734978540772532, "grad_norm": 0.51171875, "learning_rate": 4.652680713317073e-06, "loss": 2.1894, "step": 18146 }, { "epoch": 0.9735515021459228, "grad_norm": 0.71484375, "learning_rate": 4.652636536839965e-06, "loss": 2.5423, "step": 18147 }, { "epoch": 0.9736051502145923, "grad_norm": 0.435546875, "learning_rate": 4.652592357763325e-06, "loss": 2.3142, "step": 18148 }, { "epoch": 0.9736587982832619, "grad_norm": 0.421875, "learning_rate": 4.652548176087207e-06, "loss": 2.2363, "step": 18149 }, { "epoch": 0.9737124463519313, "grad_norm": 0.5078125, "learning_rate": 4.652503991811663e-06, "loss": 2.3838, "step": 18150 }, { "epoch": 0.9737660944206008, "grad_norm": 0.408203125, "learning_rate": 4.6524598049367475e-06, "loss": 1.9243, "step": 18151 }, { "epoch": 0.9738197424892704, "grad_norm": 0.4375, "learning_rate": 4.652415615462513e-06, "loss": 2.4713, "step": 18152 }, { "epoch": 0.9738733905579399, "grad_norm": 0.431640625, "learning_rate": 4.652371423389014e-06, "loss": 2.1426, "step": 18153 }, { "epoch": 0.9739270386266095, "grad_norm": 0.458984375, "learning_rate": 4.652327228716302e-06, "loss": 2.4506, "step": 18154 }, { "epoch": 0.9739806866952789, "grad_norm": 0.5, "learning_rate": 4.6522830314444326e-06, "loss": 2.3126, "step": 18155 }, { "epoch": 0.9740343347639485, "grad_norm": 0.53125, "learning_rate": 4.652238831573458e-06, "loss": 2.0453, "step": 18156 }, { "epoch": 0.974087982832618, "grad_norm": 0.4609375, "learning_rate": 4.652194629103431e-06, "loss": 2.2988, "step": 18157 }, { "epoch": 0.9741416309012876, "grad_norm": 0.49609375, "learning_rate": 4.652150424034407e-06, "loss": 2.2391, "step": 18158 }, { "epoch": 0.9741952789699571, "grad_norm": 0.458984375, "learning_rate": 4.652106216366438e-06, "loss": 1.6757, "step": 18159 }, { "epoch": 0.9742489270386266, "grad_norm": 0.470703125, "learning_rate": 4.652062006099577e-06, "loss": 2.3188, "step": 18160 }, { "epoch": 0.9743025751072961, "grad_norm": 0.6484375, "learning_rate": 4.652017793233877e-06, "loss": 2.4624, "step": 18161 }, { "epoch": 0.9743562231759657, "grad_norm": 0.5078125, "learning_rate": 4.651973577769393e-06, "loss": 2.2515, "step": 18162 }, { "epoch": 0.9744098712446352, "grad_norm": 0.462890625, "learning_rate": 4.651929359706178e-06, "loss": 2.549, "step": 18163 }, { "epoch": 0.9744635193133048, "grad_norm": 0.5703125, "learning_rate": 4.6518851390442844e-06, "loss": 2.2224, "step": 18164 }, { "epoch": 0.9745171673819742, "grad_norm": 0.494140625, "learning_rate": 4.651840915783766e-06, "loss": 2.186, "step": 18165 }, { "epoch": 0.9745708154506437, "grad_norm": 0.498046875, "learning_rate": 4.651796689924677e-06, "loss": 2.3443, "step": 18166 }, { "epoch": 0.9746244635193133, "grad_norm": 0.50390625, "learning_rate": 4.65175246146707e-06, "loss": 2.3074, "step": 18167 }, { "epoch": 0.9746781115879828, "grad_norm": 0.63671875, "learning_rate": 4.651708230410999e-06, "loss": 2.3677, "step": 18168 }, { "epoch": 0.9747317596566524, "grad_norm": 0.43359375, "learning_rate": 4.651663996756518e-06, "loss": 2.1601, "step": 18169 }, { "epoch": 0.9747854077253219, "grad_norm": 0.4609375, "learning_rate": 4.651619760503678e-06, "loss": 2.1669, "step": 18170 }, { "epoch": 0.9748390557939914, "grad_norm": 0.5, "learning_rate": 4.651575521652535e-06, "loss": 2.1406, "step": 18171 }, { "epoch": 0.9748927038626609, "grad_norm": 2.3125, "learning_rate": 4.651531280203141e-06, "loss": 1.4426, "step": 18172 }, { "epoch": 0.9749463519313305, "grad_norm": 0.515625, "learning_rate": 4.6514870361555485e-06, "loss": 2.2544, "step": 18173 }, { "epoch": 0.975, "grad_norm": 0.423828125, "learning_rate": 4.651442789509813e-06, "loss": 2.2366, "step": 18174 }, { "epoch": 0.9750536480686696, "grad_norm": 0.4765625, "learning_rate": 4.651398540265988e-06, "loss": 2.2975, "step": 18175 }, { "epoch": 0.975107296137339, "grad_norm": 0.52734375, "learning_rate": 4.651354288424125e-06, "loss": 2.3749, "step": 18176 }, { "epoch": 0.9751609442060086, "grad_norm": 0.423828125, "learning_rate": 4.651310033984279e-06, "loss": 2.1592, "step": 18177 }, { "epoch": 0.9752145922746781, "grad_norm": 0.416015625, "learning_rate": 4.651265776946503e-06, "loss": 2.1454, "step": 18178 }, { "epoch": 0.9752682403433477, "grad_norm": 0.490234375, "learning_rate": 4.65122151731085e-06, "loss": 2.3477, "step": 18179 }, { "epoch": 0.9753218884120172, "grad_norm": 0.5390625, "learning_rate": 4.651177255077374e-06, "loss": 2.1678, "step": 18180 }, { "epoch": 0.9753755364806866, "grad_norm": 0.52734375, "learning_rate": 4.651132990246128e-06, "loss": 2.5118, "step": 18181 }, { "epoch": 0.9754291845493562, "grad_norm": 0.470703125, "learning_rate": 4.651088722817166e-06, "loss": 2.2754, "step": 18182 }, { "epoch": 0.9754828326180257, "grad_norm": 0.55078125, "learning_rate": 4.651044452790542e-06, "loss": 1.0661, "step": 18183 }, { "epoch": 0.9755364806866953, "grad_norm": 0.7734375, "learning_rate": 4.6510001801663075e-06, "loss": 2.3529, "step": 18184 }, { "epoch": 0.9755901287553648, "grad_norm": 0.478515625, "learning_rate": 4.650955904944517e-06, "loss": 2.1827, "step": 18185 }, { "epoch": 0.9756437768240344, "grad_norm": 0.48046875, "learning_rate": 4.650911627125225e-06, "loss": 2.1981, "step": 18186 }, { "epoch": 0.9756974248927038, "grad_norm": 0.49609375, "learning_rate": 4.6508673467084835e-06, "loss": 2.5798, "step": 18187 }, { "epoch": 0.9757510729613734, "grad_norm": 0.390625, "learning_rate": 4.650823063694347e-06, "loss": 2.2576, "step": 18188 }, { "epoch": 0.9758047210300429, "grad_norm": 0.53125, "learning_rate": 4.650778778082868e-06, "loss": 2.441, "step": 18189 }, { "epoch": 0.9758583690987125, "grad_norm": 3.21875, "learning_rate": 4.650734489874101e-06, "loss": 2.1906, "step": 18190 }, { "epoch": 0.975912017167382, "grad_norm": 0.45703125, "learning_rate": 4.650690199068099e-06, "loss": 2.5424, "step": 18191 }, { "epoch": 0.9759656652360515, "grad_norm": 0.466796875, "learning_rate": 4.650645905664914e-06, "loss": 2.3822, "step": 18192 }, { "epoch": 0.976019313304721, "grad_norm": 0.5234375, "learning_rate": 4.650601609664603e-06, "loss": 2.3046, "step": 18193 }, { "epoch": 0.9760729613733906, "grad_norm": 0.5234375, "learning_rate": 4.650557311067216e-06, "loss": 2.4315, "step": 18194 }, { "epoch": 0.9761266094420601, "grad_norm": 0.4296875, "learning_rate": 4.650513009872809e-06, "loss": 2.1504, "step": 18195 }, { "epoch": 0.9761802575107296, "grad_norm": 0.462890625, "learning_rate": 4.650468706081433e-06, "loss": 2.5573, "step": 18196 }, { "epoch": 0.9762339055793992, "grad_norm": 0.5234375, "learning_rate": 4.6504243996931445e-06, "loss": 2.1392, "step": 18197 }, { "epoch": 0.9762875536480686, "grad_norm": 0.494140625, "learning_rate": 4.650380090707994e-06, "loss": 2.2641, "step": 18198 }, { "epoch": 0.9763412017167382, "grad_norm": 0.52734375, "learning_rate": 4.650335779126037e-06, "loss": 2.433, "step": 18199 }, { "epoch": 0.9763948497854077, "grad_norm": 0.54296875, "learning_rate": 4.650291464947327e-06, "loss": 2.1759, "step": 18200 }, { "epoch": 0.9764484978540773, "grad_norm": 0.359375, "learning_rate": 4.650247148171917e-06, "loss": 1.9313, "step": 18201 }, { "epoch": 0.9765021459227468, "grad_norm": 0.484375, "learning_rate": 4.6502028287998605e-06, "loss": 2.2011, "step": 18202 }, { "epoch": 0.9765557939914163, "grad_norm": 0.8359375, "learning_rate": 4.6501585068312095e-06, "loss": 2.2535, "step": 18203 }, { "epoch": 0.9766094420600858, "grad_norm": 0.474609375, "learning_rate": 4.650114182266021e-06, "loss": 2.3686, "step": 18204 }, { "epoch": 0.9766630901287554, "grad_norm": 0.439453125, "learning_rate": 4.650069855104345e-06, "loss": 2.169, "step": 18205 }, { "epoch": 0.9767167381974249, "grad_norm": 0.6484375, "learning_rate": 4.650025525346237e-06, "loss": 2.2155, "step": 18206 }, { "epoch": 0.9767703862660945, "grad_norm": 0.490234375, "learning_rate": 4.64998119299175e-06, "loss": 2.1116, "step": 18207 }, { "epoch": 0.976824034334764, "grad_norm": 0.486328125, "learning_rate": 4.649936858040939e-06, "loss": 2.3199, "step": 18208 }, { "epoch": 0.9768776824034334, "grad_norm": 0.55859375, "learning_rate": 4.649892520493855e-06, "loss": 2.2738, "step": 18209 }, { "epoch": 0.976931330472103, "grad_norm": 0.49609375, "learning_rate": 4.649848180350553e-06, "loss": 2.2883, "step": 18210 }, { "epoch": 0.9769849785407725, "grad_norm": 0.51953125, "learning_rate": 4.649803837611086e-06, "loss": 1.9225, "step": 18211 }, { "epoch": 0.9770386266094421, "grad_norm": 0.455078125, "learning_rate": 4.6497594922755084e-06, "loss": 2.0953, "step": 18212 }, { "epoch": 0.9770922746781115, "grad_norm": 0.5234375, "learning_rate": 4.649715144343873e-06, "loss": 2.2886, "step": 18213 }, { "epoch": 0.9771459227467811, "grad_norm": 0.46484375, "learning_rate": 4.649670793816233e-06, "loss": 2.5382, "step": 18214 }, { "epoch": 0.9771995708154506, "grad_norm": 0.447265625, "learning_rate": 4.649626440692643e-06, "loss": 1.9611, "step": 18215 }, { "epoch": 0.9772532188841202, "grad_norm": 0.5546875, "learning_rate": 4.649582084973156e-06, "loss": 1.4438, "step": 18216 }, { "epoch": 0.9773068669527897, "grad_norm": 0.5, "learning_rate": 4.649537726657825e-06, "loss": 2.2857, "step": 18217 }, { "epoch": 0.9773605150214593, "grad_norm": 0.54296875, "learning_rate": 4.649493365746706e-06, "loss": 2.3959, "step": 18218 }, { "epoch": 0.9774141630901287, "grad_norm": 0.431640625, "learning_rate": 4.649449002239849e-06, "loss": 2.0896, "step": 18219 }, { "epoch": 0.9774678111587983, "grad_norm": 0.58203125, "learning_rate": 4.649404636137309e-06, "loss": 2.7634, "step": 18220 }, { "epoch": 0.9775214592274678, "grad_norm": 0.41015625, "learning_rate": 4.6493602674391416e-06, "loss": 2.2758, "step": 18221 }, { "epoch": 0.9775751072961374, "grad_norm": 0.4765625, "learning_rate": 4.649315896145398e-06, "loss": 2.2252, "step": 18222 }, { "epoch": 0.9776287553648069, "grad_norm": 0.640625, "learning_rate": 4.649271522256132e-06, "loss": 2.2666, "step": 18223 }, { "epoch": 0.9776824034334763, "grad_norm": 0.50390625, "learning_rate": 4.649227145771398e-06, "loss": 2.4013, "step": 18224 }, { "epoch": 0.9777360515021459, "grad_norm": 0.5078125, "learning_rate": 4.649182766691249e-06, "loss": 2.5494, "step": 18225 }, { "epoch": 0.9777896995708154, "grad_norm": 0.4375, "learning_rate": 4.649138385015739e-06, "loss": 2.3414, "step": 18226 }, { "epoch": 0.977843347639485, "grad_norm": 0.419921875, "learning_rate": 4.649094000744922e-06, "loss": 2.0143, "step": 18227 }, { "epoch": 0.9778969957081545, "grad_norm": 0.45703125, "learning_rate": 4.64904961387885e-06, "loss": 2.2907, "step": 18228 }, { "epoch": 0.9779506437768241, "grad_norm": 0.5546875, "learning_rate": 4.649005224417577e-06, "loss": 2.2548, "step": 18229 }, { "epoch": 0.9780042918454935, "grad_norm": 0.490234375, "learning_rate": 4.648960832361159e-06, "loss": 2.4886, "step": 18230 }, { "epoch": 0.9780579399141631, "grad_norm": 0.462890625, "learning_rate": 4.6489164377096475e-06, "loss": 2.3767, "step": 18231 }, { "epoch": 0.9781115879828326, "grad_norm": 1.2109375, "learning_rate": 4.648872040463096e-06, "loss": 2.3347, "step": 18232 }, { "epoch": 0.9781652360515022, "grad_norm": 0.609375, "learning_rate": 4.6488276406215585e-06, "loss": 2.3944, "step": 18233 }, { "epoch": 0.9782188841201717, "grad_norm": 0.44140625, "learning_rate": 4.648783238185089e-06, "loss": 2.3078, "step": 18234 }, { "epoch": 0.9782725321888412, "grad_norm": 0.44921875, "learning_rate": 4.6487388331537405e-06, "loss": 2.2085, "step": 18235 }, { "epoch": 0.9783261802575107, "grad_norm": 0.48828125, "learning_rate": 4.648694425527568e-06, "loss": 2.3156, "step": 18236 }, { "epoch": 0.9783798283261803, "grad_norm": 0.50390625, "learning_rate": 4.648650015306623e-06, "loss": 2.6501, "step": 18237 }, { "epoch": 0.9784334763948498, "grad_norm": 0.4609375, "learning_rate": 4.64860560249096e-06, "loss": 2.075, "step": 18238 }, { "epoch": 0.9784871244635193, "grad_norm": 0.466796875, "learning_rate": 4.648561187080634e-06, "loss": 2.4001, "step": 18239 }, { "epoch": 0.9785407725321889, "grad_norm": 0.4609375, "learning_rate": 4.648516769075696e-06, "loss": 2.0609, "step": 18240 }, { "epoch": 0.9785944206008583, "grad_norm": 0.466796875, "learning_rate": 4.648472348476202e-06, "loss": 2.5051, "step": 18241 }, { "epoch": 0.9786480686695279, "grad_norm": 0.6171875, "learning_rate": 4.648427925282205e-06, "loss": 2.2255, "step": 18242 }, { "epoch": 0.9787017167381974, "grad_norm": 0.427734375, "learning_rate": 4.648383499493757e-06, "loss": 2.0176, "step": 18243 }, { "epoch": 0.978755364806867, "grad_norm": 0.455078125, "learning_rate": 4.648339071110915e-06, "loss": 2.3311, "step": 18244 }, { "epoch": 0.9788090128755365, "grad_norm": 0.4296875, "learning_rate": 4.64829464013373e-06, "loss": 2.1623, "step": 18245 }, { "epoch": 0.978862660944206, "grad_norm": 0.65625, "learning_rate": 4.6482502065622546e-06, "loss": 2.266, "step": 18246 }, { "epoch": 0.9789163090128755, "grad_norm": 0.5390625, "learning_rate": 4.6482057703965455e-06, "loss": 2.4698, "step": 18247 }, { "epoch": 0.9789699570815451, "grad_norm": 0.578125, "learning_rate": 4.648161331636656e-06, "loss": 2.0524, "step": 18248 }, { "epoch": 0.9790236051502146, "grad_norm": 0.5625, "learning_rate": 4.648116890282638e-06, "loss": 2.2455, "step": 18249 }, { "epoch": 0.9790772532188842, "grad_norm": 0.44921875, "learning_rate": 4.648072446334545e-06, "loss": 2.5042, "step": 18250 }, { "epoch": 0.9791309012875536, "grad_norm": 0.451171875, "learning_rate": 4.648027999792433e-06, "loss": 2.0393, "step": 18251 }, { "epoch": 0.9791845493562231, "grad_norm": 0.53515625, "learning_rate": 4.647983550656354e-06, "loss": 2.4963, "step": 18252 }, { "epoch": 0.9792381974248927, "grad_norm": 0.486328125, "learning_rate": 4.647939098926362e-06, "loss": 2.3148, "step": 18253 }, { "epoch": 0.9792918454935622, "grad_norm": 0.55859375, "learning_rate": 4.647894644602511e-06, "loss": 2.2771, "step": 18254 }, { "epoch": 0.9793454935622318, "grad_norm": 0.498046875, "learning_rate": 4.647850187684854e-06, "loss": 2.4723, "step": 18255 }, { "epoch": 0.9793991416309012, "grad_norm": 0.48828125, "learning_rate": 4.647805728173445e-06, "loss": 2.3855, "step": 18256 }, { "epoch": 0.9794527896995708, "grad_norm": 0.357421875, "learning_rate": 4.6477612660683374e-06, "loss": 2.1113, "step": 18257 }, { "epoch": 0.9795064377682403, "grad_norm": 0.498046875, "learning_rate": 4.647716801369586e-06, "loss": 2.3064, "step": 18258 }, { "epoch": 0.9795600858369099, "grad_norm": 0.5859375, "learning_rate": 4.647672334077244e-06, "loss": 2.2296, "step": 18259 }, { "epoch": 0.9796137339055794, "grad_norm": 0.46875, "learning_rate": 4.647627864191364e-06, "loss": 2.1086, "step": 18260 }, { "epoch": 0.979667381974249, "grad_norm": 0.51953125, "learning_rate": 4.647583391712e-06, "loss": 2.3541, "step": 18261 }, { "epoch": 0.9797210300429184, "grad_norm": 0.400390625, "learning_rate": 4.647538916639207e-06, "loss": 2.1923, "step": 18262 }, { "epoch": 0.979774678111588, "grad_norm": 0.5078125, "learning_rate": 4.647494438973039e-06, "loss": 2.1968, "step": 18263 }, { "epoch": 0.9798283261802575, "grad_norm": 0.55859375, "learning_rate": 4.647449958713547e-06, "loss": 2.251, "step": 18264 }, { "epoch": 0.9798819742489271, "grad_norm": 0.72265625, "learning_rate": 4.647405475860787e-06, "loss": 2.2026, "step": 18265 }, { "epoch": 0.9799356223175966, "grad_norm": 0.384765625, "learning_rate": 4.647360990414812e-06, "loss": 2.1744, "step": 18266 }, { "epoch": 0.979989270386266, "grad_norm": 0.51953125, "learning_rate": 4.647316502375676e-06, "loss": 2.4697, "step": 18267 }, { "epoch": 0.9800429184549356, "grad_norm": 0.478515625, "learning_rate": 4.647272011743433e-06, "loss": 2.3245, "step": 18268 }, { "epoch": 0.9800965665236051, "grad_norm": 0.443359375, "learning_rate": 4.647227518518136e-06, "loss": 2.197, "step": 18269 }, { "epoch": 0.9801502145922747, "grad_norm": 0.40234375, "learning_rate": 4.647183022699839e-06, "loss": 2.1656, "step": 18270 }, { "epoch": 0.9802038626609442, "grad_norm": 0.50390625, "learning_rate": 4.6471385242885955e-06, "loss": 2.3542, "step": 18271 }, { "epoch": 0.9802575107296138, "grad_norm": 0.5078125, "learning_rate": 4.64709402328446e-06, "loss": 2.4734, "step": 18272 }, { "epoch": 0.9803111587982832, "grad_norm": 1.078125, "learning_rate": 4.6470495196874844e-06, "loss": 2.3718, "step": 18273 }, { "epoch": 0.9803648068669528, "grad_norm": 0.51171875, "learning_rate": 4.647005013497724e-06, "loss": 2.3551, "step": 18274 }, { "epoch": 0.9804184549356223, "grad_norm": 0.37890625, "learning_rate": 4.646960504715233e-06, "loss": 2.1816, "step": 18275 }, { "epoch": 0.9804721030042919, "grad_norm": 0.5078125, "learning_rate": 4.6469159933400645e-06, "loss": 2.4149, "step": 18276 }, { "epoch": 0.9805257510729614, "grad_norm": 0.396484375, "learning_rate": 4.646871479372273e-06, "loss": 1.9634, "step": 18277 }, { "epoch": 0.980579399141631, "grad_norm": 0.4921875, "learning_rate": 4.64682696281191e-06, "loss": 2.2865, "step": 18278 }, { "epoch": 0.9806330472103004, "grad_norm": 0.46484375, "learning_rate": 4.6467824436590315e-06, "loss": 2.4355, "step": 18279 }, { "epoch": 0.98068669527897, "grad_norm": 0.50390625, "learning_rate": 4.64673792191369e-06, "loss": 2.3421, "step": 18280 }, { "epoch": 0.9807403433476395, "grad_norm": 0.494140625, "learning_rate": 4.64669339757594e-06, "loss": 2.3861, "step": 18281 }, { "epoch": 0.980793991416309, "grad_norm": 0.421875, "learning_rate": 4.646648870645835e-06, "loss": 2.0817, "step": 18282 }, { "epoch": 0.9808476394849786, "grad_norm": 0.4453125, "learning_rate": 4.646604341123429e-06, "loss": 2.0823, "step": 18283 }, { "epoch": 0.980901287553648, "grad_norm": 0.447265625, "learning_rate": 4.646559809008775e-06, "loss": 2.2566, "step": 18284 }, { "epoch": 0.9809549356223176, "grad_norm": 0.404296875, "learning_rate": 4.646515274301927e-06, "loss": 2.0561, "step": 18285 }, { "epoch": 0.9810085836909871, "grad_norm": 0.5546875, "learning_rate": 4.6464707370029396e-06, "loss": 2.3044, "step": 18286 }, { "epoch": 0.9810622317596567, "grad_norm": 0.4921875, "learning_rate": 4.646426197111866e-06, "loss": 2.0746, "step": 18287 }, { "epoch": 0.9811158798283262, "grad_norm": 0.37109375, "learning_rate": 4.646381654628761e-06, "loss": 1.8967, "step": 18288 }, { "epoch": 0.9811695278969957, "grad_norm": 0.6015625, "learning_rate": 4.646337109553677e-06, "loss": 2.2778, "step": 18289 }, { "epoch": 0.9812231759656652, "grad_norm": 0.443359375, "learning_rate": 4.646292561886668e-06, "loss": 2.387, "step": 18290 }, { "epoch": 0.9812768240343348, "grad_norm": 0.44140625, "learning_rate": 4.646248011627787e-06, "loss": 2.3607, "step": 18291 }, { "epoch": 0.9813304721030043, "grad_norm": 0.64453125, "learning_rate": 4.646203458777091e-06, "loss": 2.7044, "step": 18292 }, { "epoch": 0.9813841201716739, "grad_norm": 0.47265625, "learning_rate": 4.64615890333463e-06, "loss": 1.894, "step": 18293 }, { "epoch": 0.9814377682403433, "grad_norm": 0.470703125, "learning_rate": 4.6461143453004605e-06, "loss": 2.3953, "step": 18294 }, { "epoch": 0.9814914163090128, "grad_norm": 0.453125, "learning_rate": 4.646069784674635e-06, "loss": 2.2603, "step": 18295 }, { "epoch": 0.9815450643776824, "grad_norm": 0.52734375, "learning_rate": 4.646025221457206e-06, "loss": 2.3868, "step": 18296 }, { "epoch": 0.9815987124463519, "grad_norm": 0.478515625, "learning_rate": 4.645980655648231e-06, "loss": 2.2801, "step": 18297 }, { "epoch": 0.9816523605150215, "grad_norm": 0.45703125, "learning_rate": 4.645936087247761e-06, "loss": 1.9398, "step": 18298 }, { "epoch": 0.981706008583691, "grad_norm": 0.5234375, "learning_rate": 4.645891516255851e-06, "loss": 2.3853, "step": 18299 }, { "epoch": 0.9817596566523605, "grad_norm": 0.486328125, "learning_rate": 4.645846942672554e-06, "loss": 2.1435, "step": 18300 }, { "epoch": 0.98181330472103, "grad_norm": 0.47265625, "learning_rate": 4.645802366497924e-06, "loss": 2.4931, "step": 18301 }, { "epoch": 0.9818669527896996, "grad_norm": 0.55078125, "learning_rate": 4.645757787732015e-06, "loss": 2.4519, "step": 18302 }, { "epoch": 0.9819206008583691, "grad_norm": 0.51953125, "learning_rate": 4.645713206374881e-06, "loss": 2.3879, "step": 18303 }, { "epoch": 0.9819742489270387, "grad_norm": 0.53125, "learning_rate": 4.645668622426575e-06, "loss": 2.2247, "step": 18304 }, { "epoch": 0.9820278969957081, "grad_norm": 0.408203125, "learning_rate": 4.645624035887153e-06, "loss": 2.2715, "step": 18305 }, { "epoch": 0.9820815450643777, "grad_norm": 0.61328125, "learning_rate": 4.645579446756667e-06, "loss": 2.4953, "step": 18306 }, { "epoch": 0.9821351931330472, "grad_norm": 0.44921875, "learning_rate": 4.645534855035171e-06, "loss": 2.3847, "step": 18307 }, { "epoch": 0.9821888412017168, "grad_norm": 1.6640625, "learning_rate": 4.645490260722718e-06, "loss": 2.4102, "step": 18308 }, { "epoch": 0.9822424892703863, "grad_norm": 0.46484375, "learning_rate": 4.645445663819364e-06, "loss": 2.3608, "step": 18309 }, { "epoch": 0.9822961373390557, "grad_norm": 0.482421875, "learning_rate": 4.645401064325162e-06, "loss": 2.4284, "step": 18310 }, { "epoch": 0.9823497854077253, "grad_norm": 0.69140625, "learning_rate": 4.645356462240165e-06, "loss": 2.403, "step": 18311 }, { "epoch": 0.9824034334763948, "grad_norm": 5.375, "learning_rate": 4.6453118575644275e-06, "loss": 1.8188, "step": 18312 }, { "epoch": 0.9824570815450644, "grad_norm": 0.59765625, "learning_rate": 4.645267250298004e-06, "loss": 2.2467, "step": 18313 }, { "epoch": 0.9825107296137339, "grad_norm": 0.53515625, "learning_rate": 4.6452226404409475e-06, "loss": 2.1741, "step": 18314 }, { "epoch": 0.9825643776824035, "grad_norm": 0.46484375, "learning_rate": 4.645178027993311e-06, "loss": 2.3103, "step": 18315 }, { "epoch": 0.9826180257510729, "grad_norm": 0.51171875, "learning_rate": 4.64513341295515e-06, "loss": 2.4759, "step": 18316 }, { "epoch": 0.9826716738197425, "grad_norm": 0.45703125, "learning_rate": 4.645088795326519e-06, "loss": 2.3173, "step": 18317 }, { "epoch": 0.982725321888412, "grad_norm": 0.45703125, "learning_rate": 4.64504417510747e-06, "loss": 2.4701, "step": 18318 }, { "epoch": 0.9827789699570816, "grad_norm": 0.50390625, "learning_rate": 4.644999552298057e-06, "loss": 2.5217, "step": 18319 }, { "epoch": 0.9828326180257511, "grad_norm": 0.41015625, "learning_rate": 4.644954926898336e-06, "loss": 2.3592, "step": 18320 }, { "epoch": 0.9828862660944206, "grad_norm": 0.498046875, "learning_rate": 4.644910298908358e-06, "loss": 1.675, "step": 18321 }, { "epoch": 0.9829399141630901, "grad_norm": 0.39453125, "learning_rate": 4.644865668328179e-06, "loss": 1.9448, "step": 18322 }, { "epoch": 0.9829935622317597, "grad_norm": 1.3359375, "learning_rate": 4.6448210351578515e-06, "loss": 2.4443, "step": 18323 }, { "epoch": 0.9830472103004292, "grad_norm": 0.703125, "learning_rate": 4.644776399397431e-06, "loss": 2.3392, "step": 18324 }, { "epoch": 0.9831008583690987, "grad_norm": 0.5390625, "learning_rate": 4.64473176104697e-06, "loss": 2.3152, "step": 18325 }, { "epoch": 0.9831545064377682, "grad_norm": 0.5078125, "learning_rate": 4.6446871201065225e-06, "loss": 2.0434, "step": 18326 }, { "epoch": 0.9832081545064377, "grad_norm": 0.49609375, "learning_rate": 4.644642476576143e-06, "loss": 2.3069, "step": 18327 }, { "epoch": 0.9832618025751073, "grad_norm": 0.466796875, "learning_rate": 4.644597830455886e-06, "loss": 2.3584, "step": 18328 }, { "epoch": 0.9833154506437768, "grad_norm": 0.4140625, "learning_rate": 4.644553181745804e-06, "loss": 2.3054, "step": 18329 }, { "epoch": 0.9833690987124464, "grad_norm": 0.478515625, "learning_rate": 4.644508530445951e-06, "loss": 2.3191, "step": 18330 }, { "epoch": 0.9834227467811159, "grad_norm": 0.51953125, "learning_rate": 4.644463876556382e-06, "loss": 2.4645, "step": 18331 }, { "epoch": 0.9834763948497854, "grad_norm": 0.5703125, "learning_rate": 4.644419220077151e-06, "loss": 2.0096, "step": 18332 }, { "epoch": 0.9835300429184549, "grad_norm": 0.4375, "learning_rate": 4.6443745610083106e-06, "loss": 2.3732, "step": 18333 }, { "epoch": 0.9835836909871245, "grad_norm": 0.373046875, "learning_rate": 4.644329899349915e-06, "loss": 2.0889, "step": 18334 }, { "epoch": 0.983637339055794, "grad_norm": 0.46484375, "learning_rate": 4.64428523510202e-06, "loss": 2.4048, "step": 18335 }, { "epoch": 0.9836909871244636, "grad_norm": 0.86328125, "learning_rate": 4.644240568264677e-06, "loss": 1.9291, "step": 18336 }, { "epoch": 0.983744635193133, "grad_norm": 0.88671875, "learning_rate": 4.6441958988379406e-06, "loss": 2.4201, "step": 18337 }, { "epoch": 0.9837982832618025, "grad_norm": 0.42578125, "learning_rate": 4.644151226821866e-06, "loss": 2.433, "step": 18338 }, { "epoch": 0.9838519313304721, "grad_norm": 0.435546875, "learning_rate": 4.644106552216506e-06, "loss": 2.3299, "step": 18339 }, { "epoch": 0.9839055793991416, "grad_norm": 0.451171875, "learning_rate": 4.644061875021916e-06, "loss": 2.1225, "step": 18340 }, { "epoch": 0.9839592274678112, "grad_norm": 0.58984375, "learning_rate": 4.644017195238147e-06, "loss": 2.1866, "step": 18341 }, { "epoch": 0.9840128755364806, "grad_norm": 0.62890625, "learning_rate": 4.643972512865257e-06, "loss": 2.1716, "step": 18342 }, { "epoch": 0.9840665236051502, "grad_norm": 0.51171875, "learning_rate": 4.643927827903296e-06, "loss": 2.2265, "step": 18343 }, { "epoch": 0.9841201716738197, "grad_norm": 0.55859375, "learning_rate": 4.643883140352321e-06, "loss": 2.2083, "step": 18344 }, { "epoch": 0.9841738197424893, "grad_norm": 0.72265625, "learning_rate": 4.643838450212383e-06, "loss": 2.3521, "step": 18345 }, { "epoch": 0.9842274678111588, "grad_norm": 0.439453125, "learning_rate": 4.643793757483539e-06, "loss": 2.0907, "step": 18346 }, { "epoch": 0.9842811158798284, "grad_norm": 0.4375, "learning_rate": 4.643749062165841e-06, "loss": 2.0301, "step": 18347 }, { "epoch": 0.9843347639484978, "grad_norm": 0.455078125, "learning_rate": 4.643704364259344e-06, "loss": 2.1274, "step": 18348 }, { "epoch": 0.9843884120171674, "grad_norm": 0.40625, "learning_rate": 4.643659663764102e-06, "loss": 2.2335, "step": 18349 }, { "epoch": 0.9844420600858369, "grad_norm": 0.44921875, "learning_rate": 4.643614960680167e-06, "loss": 2.1252, "step": 18350 }, { "epoch": 0.9844957081545065, "grad_norm": 0.47265625, "learning_rate": 4.6435702550075954e-06, "loss": 2.5558, "step": 18351 }, { "epoch": 0.984549356223176, "grad_norm": 0.44140625, "learning_rate": 4.643525546746442e-06, "loss": 2.5125, "step": 18352 }, { "epoch": 0.9846030042918454, "grad_norm": 0.5, "learning_rate": 4.643480835896757e-06, "loss": 2.2009, "step": 18353 }, { "epoch": 0.984656652360515, "grad_norm": 0.458984375, "learning_rate": 4.643436122458597e-06, "loss": 2.2877, "step": 18354 }, { "epoch": 0.9847103004291845, "grad_norm": 0.62890625, "learning_rate": 4.6433914064320154e-06, "loss": 2.4353, "step": 18355 }, { "epoch": 0.9847639484978541, "grad_norm": 0.498046875, "learning_rate": 4.643346687817066e-06, "loss": 2.46, "step": 18356 }, { "epoch": 0.9848175965665236, "grad_norm": 0.5703125, "learning_rate": 4.643301966613804e-06, "loss": 2.3611, "step": 18357 }, { "epoch": 0.9848712446351932, "grad_norm": 0.4765625, "learning_rate": 4.6432572428222816e-06, "loss": 2.5812, "step": 18358 }, { "epoch": 0.9849248927038626, "grad_norm": 0.48046875, "learning_rate": 4.643212516442554e-06, "loss": 2.2744, "step": 18359 }, { "epoch": 0.9849785407725322, "grad_norm": 0.447265625, "learning_rate": 4.6431677874746756e-06, "loss": 2.2875, "step": 18360 }, { "epoch": 0.9850321888412017, "grad_norm": 0.443359375, "learning_rate": 4.643123055918699e-06, "loss": 2.2003, "step": 18361 }, { "epoch": 0.9850858369098713, "grad_norm": 0.4921875, "learning_rate": 4.6430783217746795e-06, "loss": 2.2876, "step": 18362 }, { "epoch": 0.9851394849785408, "grad_norm": 0.5, "learning_rate": 4.64303358504267e-06, "loss": 2.2513, "step": 18363 }, { "epoch": 0.9851931330472103, "grad_norm": 0.4921875, "learning_rate": 4.642988845722725e-06, "loss": 2.1817, "step": 18364 }, { "epoch": 0.9852467811158798, "grad_norm": 0.458984375, "learning_rate": 4.6429441038148995e-06, "loss": 2.262, "step": 18365 }, { "epoch": 0.9853004291845494, "grad_norm": 0.462890625, "learning_rate": 4.642899359319246e-06, "loss": 1.6052, "step": 18366 }, { "epoch": 0.9853540772532189, "grad_norm": 0.609375, "learning_rate": 4.64285461223582e-06, "loss": 2.2638, "step": 18367 }, { "epoch": 0.9854077253218884, "grad_norm": 1.796875, "learning_rate": 4.642809862564675e-06, "loss": 2.34, "step": 18368 }, { "epoch": 0.985461373390558, "grad_norm": 0.546875, "learning_rate": 4.642765110305863e-06, "loss": 2.3192, "step": 18369 }, { "epoch": 0.9855150214592274, "grad_norm": 0.51171875, "learning_rate": 4.6427203554594415e-06, "loss": 2.5325, "step": 18370 }, { "epoch": 0.985568669527897, "grad_norm": 1.03125, "learning_rate": 4.642675598025462e-06, "loss": 2.2122, "step": 18371 }, { "epoch": 0.9856223175965665, "grad_norm": 0.7265625, "learning_rate": 4.642630838003979e-06, "loss": 2.12, "step": 18372 }, { "epoch": 0.9856759656652361, "grad_norm": 0.40234375, "learning_rate": 4.642586075395048e-06, "loss": 2.3173, "step": 18373 }, { "epoch": 0.9857296137339056, "grad_norm": 0.404296875, "learning_rate": 4.642541310198722e-06, "loss": 1.7969, "step": 18374 }, { "epoch": 0.9857832618025751, "grad_norm": 0.51171875, "learning_rate": 4.642496542415054e-06, "loss": 2.3282, "step": 18375 }, { "epoch": 0.9858369098712446, "grad_norm": 0.42578125, "learning_rate": 4.6424517720441e-06, "loss": 2.0497, "step": 18376 }, { "epoch": 0.9858905579399142, "grad_norm": 0.4375, "learning_rate": 4.642406999085913e-06, "loss": 2.3351, "step": 18377 }, { "epoch": 0.9859442060085837, "grad_norm": 0.55859375, "learning_rate": 4.642362223540547e-06, "loss": 2.0642, "step": 18378 }, { "epoch": 0.9859978540772533, "grad_norm": 0.40625, "learning_rate": 4.6423174454080565e-06, "loss": 1.9755, "step": 18379 }, { "epoch": 0.9860515021459227, "grad_norm": 1.0859375, "learning_rate": 4.642272664688497e-06, "loss": 2.3104, "step": 18380 }, { "epoch": 0.9861051502145923, "grad_norm": 0.48046875, "learning_rate": 4.642227881381919e-06, "loss": 2.3195, "step": 18381 }, { "epoch": 0.9861587982832618, "grad_norm": 0.44140625, "learning_rate": 4.6421830954883784e-06, "loss": 2.2079, "step": 18382 }, { "epoch": 0.9862124463519313, "grad_norm": 0.44921875, "learning_rate": 4.642138307007931e-06, "loss": 2.4301, "step": 18383 }, { "epoch": 0.9862660944206009, "grad_norm": 0.384765625, "learning_rate": 4.642093515940629e-06, "loss": 1.9266, "step": 18384 }, { "epoch": 0.9863197424892703, "grad_norm": 0.47265625, "learning_rate": 4.642048722286526e-06, "loss": 2.0899, "step": 18385 }, { "epoch": 0.9863733905579399, "grad_norm": 0.5078125, "learning_rate": 4.642003926045677e-06, "loss": 2.3328, "step": 18386 }, { "epoch": 0.9864270386266094, "grad_norm": 1.1328125, "learning_rate": 4.641959127218137e-06, "loss": 2.5108, "step": 18387 }, { "epoch": 0.986480686695279, "grad_norm": 0.66796875, "learning_rate": 4.641914325803958e-06, "loss": 2.3346, "step": 18388 }, { "epoch": 0.9865343347639485, "grad_norm": 0.48828125, "learning_rate": 4.641869521803196e-06, "loss": 2.2384, "step": 18389 }, { "epoch": 0.9865879828326181, "grad_norm": 1.359375, "learning_rate": 4.641824715215904e-06, "loss": 2.2263, "step": 18390 }, { "epoch": 0.9866416309012875, "grad_norm": 0.4609375, "learning_rate": 4.641779906042136e-06, "loss": 2.3532, "step": 18391 }, { "epoch": 0.9866952789699571, "grad_norm": 0.46484375, "learning_rate": 4.641735094281947e-06, "loss": 2.31, "step": 18392 }, { "epoch": 0.9867489270386266, "grad_norm": 0.44140625, "learning_rate": 4.641690279935391e-06, "loss": 2.3238, "step": 18393 }, { "epoch": 0.9868025751072962, "grad_norm": 0.490234375, "learning_rate": 4.6416454630025225e-06, "loss": 2.0891, "step": 18394 }, { "epoch": 0.9868562231759657, "grad_norm": 0.486328125, "learning_rate": 4.641600643483393e-06, "loss": 2.4459, "step": 18395 }, { "epoch": 0.9869098712446351, "grad_norm": 0.64453125, "learning_rate": 4.64155582137806e-06, "loss": 2.2839, "step": 18396 }, { "epoch": 0.9869635193133047, "grad_norm": 0.53125, "learning_rate": 4.641510996686575e-06, "loss": 2.3408, "step": 18397 }, { "epoch": 0.9870171673819742, "grad_norm": 0.69140625, "learning_rate": 4.641466169408995e-06, "loss": 2.2278, "step": 18398 }, { "epoch": 0.9870708154506438, "grad_norm": 0.3828125, "learning_rate": 4.641421339545371e-06, "loss": 2.0439, "step": 18399 }, { "epoch": 0.9871244635193133, "grad_norm": 0.494140625, "learning_rate": 4.6413765070957585e-06, "loss": 2.4207, "step": 18400 }, { "epoch": 0.9871781115879829, "grad_norm": 0.40234375, "learning_rate": 4.641331672060213e-06, "loss": 2.4205, "step": 18401 }, { "epoch": 0.9872317596566523, "grad_norm": 0.46875, "learning_rate": 4.641286834438786e-06, "loss": 2.4843, "step": 18402 }, { "epoch": 0.9872854077253219, "grad_norm": 0.63671875, "learning_rate": 4.641241994231533e-06, "loss": 2.3621, "step": 18403 }, { "epoch": 0.9873390557939914, "grad_norm": 0.53515625, "learning_rate": 4.641197151438509e-06, "loss": 2.5753, "step": 18404 }, { "epoch": 0.987392703862661, "grad_norm": 0.412109375, "learning_rate": 4.641152306059768e-06, "loss": 2.0776, "step": 18405 }, { "epoch": 0.9874463519313305, "grad_norm": 1.3359375, "learning_rate": 4.641107458095362e-06, "loss": 2.387, "step": 18406 }, { "epoch": 0.9875, "grad_norm": 0.515625, "learning_rate": 4.641062607545347e-06, "loss": 2.0552, "step": 18407 }, { "epoch": 0.9875536480686695, "grad_norm": 0.458984375, "learning_rate": 4.641017754409776e-06, "loss": 2.4363, "step": 18408 }, { "epoch": 0.9876072961373391, "grad_norm": 0.5078125, "learning_rate": 4.6409728986887046e-06, "loss": 2.3307, "step": 18409 }, { "epoch": 0.9876609442060086, "grad_norm": 0.478515625, "learning_rate": 4.640928040382186e-06, "loss": 2.3797, "step": 18410 }, { "epoch": 0.9877145922746781, "grad_norm": 0.400390625, "learning_rate": 4.640883179490276e-06, "loss": 2.3315, "step": 18411 }, { "epoch": 0.9877682403433476, "grad_norm": 0.4765625, "learning_rate": 4.640838316013027e-06, "loss": 2.2896, "step": 18412 }, { "epoch": 0.9878218884120171, "grad_norm": 0.50390625, "learning_rate": 4.640793449950493e-06, "loss": 2.5304, "step": 18413 }, { "epoch": 0.9878755364806867, "grad_norm": 0.49609375, "learning_rate": 4.640748581302729e-06, "loss": 2.335, "step": 18414 }, { "epoch": 0.9879291845493562, "grad_norm": 0.490234375, "learning_rate": 4.640703710069788e-06, "loss": 2.523, "step": 18415 }, { "epoch": 0.9879828326180258, "grad_norm": 0.470703125, "learning_rate": 4.640658836251727e-06, "loss": 2.2468, "step": 18416 }, { "epoch": 0.9880364806866953, "grad_norm": 0.52734375, "learning_rate": 4.640613959848598e-06, "loss": 2.2626, "step": 18417 }, { "epoch": 0.9880901287553648, "grad_norm": 0.427734375, "learning_rate": 4.640569080860454e-06, "loss": 2.0258, "step": 18418 }, { "epoch": 0.9881437768240343, "grad_norm": 0.458984375, "learning_rate": 4.640524199287353e-06, "loss": 2.2647, "step": 18419 }, { "epoch": 0.9881974248927039, "grad_norm": 0.431640625, "learning_rate": 4.640479315129345e-06, "loss": 2.2784, "step": 18420 }, { "epoch": 0.9882510729613734, "grad_norm": 0.546875, "learning_rate": 4.640434428386486e-06, "loss": 1.4126, "step": 18421 }, { "epoch": 0.988304721030043, "grad_norm": 0.484375, "learning_rate": 4.640389539058832e-06, "loss": 2.2709, "step": 18422 }, { "epoch": 0.9883583690987124, "grad_norm": 0.412109375, "learning_rate": 4.640344647146435e-06, "loss": 2.3514, "step": 18423 }, { "epoch": 0.988412017167382, "grad_norm": 0.439453125, "learning_rate": 4.64029975264935e-06, "loss": 2.1818, "step": 18424 }, { "epoch": 0.9884656652360515, "grad_norm": 0.46875, "learning_rate": 4.640254855567631e-06, "loss": 2.4063, "step": 18425 }, { "epoch": 0.988519313304721, "grad_norm": 0.455078125, "learning_rate": 4.640209955901331e-06, "loss": 2.1131, "step": 18426 }, { "epoch": 0.9885729613733906, "grad_norm": 0.466796875, "learning_rate": 4.640165053650507e-06, "loss": 2.3453, "step": 18427 }, { "epoch": 0.98862660944206, "grad_norm": 0.474609375, "learning_rate": 4.640120148815211e-06, "loss": 2.309, "step": 18428 }, { "epoch": 0.9886802575107296, "grad_norm": 1.2734375, "learning_rate": 4.640075241395498e-06, "loss": 2.2755, "step": 18429 }, { "epoch": 0.9887339055793991, "grad_norm": 0.52734375, "learning_rate": 4.640030331391422e-06, "loss": 2.311, "step": 18430 }, { "epoch": 0.9887875536480687, "grad_norm": 0.625, "learning_rate": 4.639985418803037e-06, "loss": 2.1015, "step": 18431 }, { "epoch": 0.9888412017167382, "grad_norm": 0.5078125, "learning_rate": 4.639940503630398e-06, "loss": 2.1983, "step": 18432 }, { "epoch": 0.9888948497854078, "grad_norm": 0.443359375, "learning_rate": 4.6398955858735584e-06, "loss": 2.2116, "step": 18433 }, { "epoch": 0.9889484978540772, "grad_norm": 0.5, "learning_rate": 4.639850665532574e-06, "loss": 2.5622, "step": 18434 }, { "epoch": 0.9890021459227468, "grad_norm": 0.478515625, "learning_rate": 4.639805742607497e-06, "loss": 2.4134, "step": 18435 }, { "epoch": 0.9890557939914163, "grad_norm": 0.48828125, "learning_rate": 4.639760817098382e-06, "loss": 2.3762, "step": 18436 }, { "epoch": 0.9891094420600859, "grad_norm": 0.53515625, "learning_rate": 4.639715889005285e-06, "loss": 2.2242, "step": 18437 }, { "epoch": 0.9891630901287554, "grad_norm": 0.451171875, "learning_rate": 4.639670958328258e-06, "loss": 2.2098, "step": 18438 }, { "epoch": 0.9892167381974248, "grad_norm": 0.53515625, "learning_rate": 4.639626025067357e-06, "loss": 2.1885, "step": 18439 }, { "epoch": 0.9892703862660944, "grad_norm": 0.9453125, "learning_rate": 4.639581089222636e-06, "loss": 1.8377, "step": 18440 }, { "epoch": 0.9893240343347639, "grad_norm": 0.53125, "learning_rate": 4.639536150794149e-06, "loss": 2.345, "step": 18441 }, { "epoch": 0.9893776824034335, "grad_norm": 0.4921875, "learning_rate": 4.639491209781949e-06, "loss": 2.3659, "step": 18442 }, { "epoch": 0.989431330472103, "grad_norm": 0.4765625, "learning_rate": 4.639446266186091e-06, "loss": 2.155, "step": 18443 }, { "epoch": 0.9894849785407726, "grad_norm": 0.421875, "learning_rate": 4.639401320006631e-06, "loss": 2.2412, "step": 18444 }, { "epoch": 0.989538626609442, "grad_norm": 0.53515625, "learning_rate": 4.639356371243622e-06, "loss": 2.24, "step": 18445 }, { "epoch": 0.9895922746781116, "grad_norm": 0.5078125, "learning_rate": 4.639311419897118e-06, "loss": 2.4263, "step": 18446 }, { "epoch": 0.9896459227467811, "grad_norm": 0.453125, "learning_rate": 4.639266465967172e-06, "loss": 2.5268, "step": 18447 }, { "epoch": 0.9896995708154507, "grad_norm": 0.431640625, "learning_rate": 4.6392215094538415e-06, "loss": 1.9019, "step": 18448 }, { "epoch": 0.9897532188841202, "grad_norm": 0.376953125, "learning_rate": 4.63917655035718e-06, "loss": 2.1554, "step": 18449 }, { "epoch": 0.9898068669527897, "grad_norm": 0.453125, "learning_rate": 4.639131588677238e-06, "loss": 2.2699, "step": 18450 }, { "epoch": 0.9898605150214592, "grad_norm": 0.462890625, "learning_rate": 4.6390866244140745e-06, "loss": 2.2872, "step": 18451 }, { "epoch": 0.9899141630901288, "grad_norm": 2.171875, "learning_rate": 4.639041657567742e-06, "loss": 2.272, "step": 18452 }, { "epoch": 0.9899678111587983, "grad_norm": 0.48828125, "learning_rate": 4.638996688138294e-06, "loss": 2.3619, "step": 18453 }, { "epoch": 0.9900214592274678, "grad_norm": 0.466796875, "learning_rate": 4.638951716125787e-06, "loss": 2.0418, "step": 18454 }, { "epoch": 0.9900751072961373, "grad_norm": 0.4375, "learning_rate": 4.638906741530273e-06, "loss": 2.1857, "step": 18455 }, { "epoch": 0.9901287553648068, "grad_norm": 0.609375, "learning_rate": 4.638861764351806e-06, "loss": 2.2169, "step": 18456 }, { "epoch": 0.9901824034334764, "grad_norm": 0.41015625, "learning_rate": 4.638816784590443e-06, "loss": 1.9544, "step": 18457 }, { "epoch": 0.9902360515021459, "grad_norm": 0.408203125, "learning_rate": 4.638771802246237e-06, "loss": 2.3212, "step": 18458 }, { "epoch": 0.9902896995708155, "grad_norm": 0.494140625, "learning_rate": 4.638726817319241e-06, "loss": 2.3257, "step": 18459 }, { "epoch": 0.990343347639485, "grad_norm": 0.5078125, "learning_rate": 4.638681829809511e-06, "loss": 2.0873, "step": 18460 }, { "epoch": 0.9903969957081545, "grad_norm": 0.421875, "learning_rate": 4.638636839717101e-06, "loss": 2.3783, "step": 18461 }, { "epoch": 0.990450643776824, "grad_norm": 0.4609375, "learning_rate": 4.638591847042065e-06, "loss": 2.2434, "step": 18462 }, { "epoch": 0.9905042918454936, "grad_norm": 0.369140625, "learning_rate": 4.638546851784458e-06, "loss": 2.0599, "step": 18463 }, { "epoch": 0.9905579399141631, "grad_norm": 0.46875, "learning_rate": 4.638501853944334e-06, "loss": 2.0638, "step": 18464 }, { "epoch": 0.9906115879828327, "grad_norm": 0.51171875, "learning_rate": 4.638456853521746e-06, "loss": 2.6954, "step": 18465 }, { "epoch": 0.9906652360515021, "grad_norm": 0.88671875, "learning_rate": 4.638411850516749e-06, "loss": 2.4783, "step": 18466 }, { "epoch": 0.9907188841201717, "grad_norm": 0.427734375, "learning_rate": 4.638366844929399e-06, "loss": 2.294, "step": 18467 }, { "epoch": 0.9907725321888412, "grad_norm": 0.51171875, "learning_rate": 4.63832183675975e-06, "loss": 1.556, "step": 18468 }, { "epoch": 0.9908261802575107, "grad_norm": 0.47265625, "learning_rate": 4.638276826007855e-06, "loss": 1.9315, "step": 18469 }, { "epoch": 0.9908798283261803, "grad_norm": 0.45703125, "learning_rate": 4.638231812673768e-06, "loss": 2.4161, "step": 18470 }, { "epoch": 0.9909334763948497, "grad_norm": 0.5546875, "learning_rate": 4.638186796757545e-06, "loss": 2.0786, "step": 18471 }, { "epoch": 0.9909871244635193, "grad_norm": 0.47265625, "learning_rate": 4.638141778259239e-06, "loss": 2.4437, "step": 18472 }, { "epoch": 0.9910407725321888, "grad_norm": 0.52734375, "learning_rate": 4.638096757178905e-06, "loss": 2.1273, "step": 18473 }, { "epoch": 0.9910944206008584, "grad_norm": 0.435546875, "learning_rate": 4.638051733516598e-06, "loss": 2.4259, "step": 18474 }, { "epoch": 0.9911480686695279, "grad_norm": 0.5078125, "learning_rate": 4.638006707272371e-06, "loss": 2.1839, "step": 18475 }, { "epoch": 0.9912017167381975, "grad_norm": 0.58984375, "learning_rate": 4.637961678446279e-06, "loss": 2.3231, "step": 18476 }, { "epoch": 0.9912553648068669, "grad_norm": 0.4609375, "learning_rate": 4.637916647038377e-06, "loss": 2.3497, "step": 18477 }, { "epoch": 0.9913090128755365, "grad_norm": 0.765625, "learning_rate": 4.637871613048719e-06, "loss": 1.4131, "step": 18478 }, { "epoch": 0.991362660944206, "grad_norm": 0.474609375, "learning_rate": 4.637826576477359e-06, "loss": 2.2083, "step": 18479 }, { "epoch": 0.9914163090128756, "grad_norm": 0.478515625, "learning_rate": 4.637781537324352e-06, "loss": 2.3453, "step": 18480 }, { "epoch": 0.9914699570815451, "grad_norm": 0.4765625, "learning_rate": 4.637736495589751e-06, "loss": 2.1214, "step": 18481 }, { "epoch": 0.9915236051502145, "grad_norm": 0.4921875, "learning_rate": 4.637691451273613e-06, "loss": 2.195, "step": 18482 }, { "epoch": 0.9915772532188841, "grad_norm": 0.412109375, "learning_rate": 4.637646404375989e-06, "loss": 2.1244, "step": 18483 }, { "epoch": 0.9916309012875536, "grad_norm": 0.455078125, "learning_rate": 4.637601354896936e-06, "loss": 2.2528, "step": 18484 }, { "epoch": 0.9916845493562232, "grad_norm": 0.5859375, "learning_rate": 4.637556302836508e-06, "loss": 2.3156, "step": 18485 }, { "epoch": 0.9917381974248927, "grad_norm": 0.51953125, "learning_rate": 4.6375112481947585e-06, "loss": 2.0302, "step": 18486 }, { "epoch": 0.9917918454935623, "grad_norm": 0.4296875, "learning_rate": 4.637466190971742e-06, "loss": 2.2013, "step": 18487 }, { "epoch": 0.9918454935622317, "grad_norm": 0.439453125, "learning_rate": 4.637421131167514e-06, "loss": 1.8367, "step": 18488 }, { "epoch": 0.9918991416309013, "grad_norm": 0.5, "learning_rate": 4.637376068782128e-06, "loss": 2.2049, "step": 18489 }, { "epoch": 0.9919527896995708, "grad_norm": 1.03125, "learning_rate": 4.637331003815638e-06, "loss": 2.5601, "step": 18490 }, { "epoch": 0.9920064377682404, "grad_norm": 0.435546875, "learning_rate": 4.6372859362681e-06, "loss": 2.4137, "step": 18491 }, { "epoch": 0.9920600858369099, "grad_norm": 0.466796875, "learning_rate": 4.637240866139567e-06, "loss": 2.2295, "step": 18492 }, { "epoch": 0.9921137339055794, "grad_norm": 0.46875, "learning_rate": 4.637195793430095e-06, "loss": 2.2801, "step": 18493 }, { "epoch": 0.9921673819742489, "grad_norm": 0.48828125, "learning_rate": 4.637150718139736e-06, "loss": 2.5397, "step": 18494 }, { "epoch": 0.9922210300429185, "grad_norm": 0.59765625, "learning_rate": 4.637105640268547e-06, "loss": 2.0078, "step": 18495 }, { "epoch": 0.992274678111588, "grad_norm": 0.4609375, "learning_rate": 4.637060559816579e-06, "loss": 2.1839, "step": 18496 }, { "epoch": 0.9923283261802575, "grad_norm": 0.458984375, "learning_rate": 4.637015476783891e-06, "loss": 2.4861, "step": 18497 }, { "epoch": 0.992381974248927, "grad_norm": 0.50390625, "learning_rate": 4.6369703911705345e-06, "loss": 2.406, "step": 18498 }, { "epoch": 0.9924356223175965, "grad_norm": 0.4375, "learning_rate": 4.636925302976564e-06, "loss": 2.2573, "step": 18499 }, { "epoch": 0.9924892703862661, "grad_norm": 0.5, "learning_rate": 4.636880212202035e-06, "loss": 2.341, "step": 18500 }, { "epoch": 0.9925429184549356, "grad_norm": 0.408203125, "learning_rate": 4.6368351188470015e-06, "loss": 2.2043, "step": 18501 }, { "epoch": 0.9925965665236052, "grad_norm": 0.431640625, "learning_rate": 4.636790022911517e-06, "loss": 2.4666, "step": 18502 }, { "epoch": 0.9926502145922746, "grad_norm": 3.453125, "learning_rate": 4.636744924395638e-06, "loss": 2.2664, "step": 18503 }, { "epoch": 0.9927038626609442, "grad_norm": 0.55859375, "learning_rate": 4.6366998232994176e-06, "loss": 2.3533, "step": 18504 }, { "epoch": 0.9927575107296137, "grad_norm": 0.52734375, "learning_rate": 4.6366547196229095e-06, "loss": 2.1668, "step": 18505 }, { "epoch": 0.9928111587982833, "grad_norm": 0.51953125, "learning_rate": 4.6366096133661705e-06, "loss": 2.3851, "step": 18506 }, { "epoch": 0.9928648068669528, "grad_norm": 0.466796875, "learning_rate": 4.636564504529253e-06, "loss": 2.261, "step": 18507 }, { "epoch": 0.9929184549356224, "grad_norm": 0.5078125, "learning_rate": 4.636519393112212e-06, "loss": 2.333, "step": 18508 }, { "epoch": 0.9929721030042918, "grad_norm": 0.6796875, "learning_rate": 4.636474279115103e-06, "loss": 2.3285, "step": 18509 }, { "epoch": 0.9930257510729614, "grad_norm": 0.53125, "learning_rate": 4.6364291625379785e-06, "loss": 2.4331, "step": 18510 }, { "epoch": 0.9930793991416309, "grad_norm": 0.51171875, "learning_rate": 4.636384043380895e-06, "loss": 2.3475, "step": 18511 }, { "epoch": 0.9931330472103004, "grad_norm": 0.546875, "learning_rate": 4.636338921643906e-06, "loss": 2.301, "step": 18512 }, { "epoch": 0.99318669527897, "grad_norm": 0.490234375, "learning_rate": 4.636293797327066e-06, "loss": 2.2414, "step": 18513 }, { "epoch": 0.9932403433476394, "grad_norm": 0.453125, "learning_rate": 4.6362486704304294e-06, "loss": 2.1805, "step": 18514 }, { "epoch": 0.993293991416309, "grad_norm": 0.51953125, "learning_rate": 4.636203540954051e-06, "loss": 2.3307, "step": 18515 }, { "epoch": 0.9933476394849785, "grad_norm": 0.48046875, "learning_rate": 4.636158408897985e-06, "loss": 2.2297, "step": 18516 }, { "epoch": 0.9934012875536481, "grad_norm": 0.70703125, "learning_rate": 4.636113274262286e-06, "loss": 2.1286, "step": 18517 }, { "epoch": 0.9934549356223176, "grad_norm": 0.59375, "learning_rate": 4.63606813704701e-06, "loss": 2.2771, "step": 18518 }, { "epoch": 0.9935085836909872, "grad_norm": 0.51171875, "learning_rate": 4.636022997252209e-06, "loss": 2.4226, "step": 18519 }, { "epoch": 0.9935622317596566, "grad_norm": 0.484375, "learning_rate": 4.635977854877939e-06, "loss": 2.279, "step": 18520 }, { "epoch": 0.9936158798283262, "grad_norm": 0.71484375, "learning_rate": 4.6359327099242535e-06, "loss": 2.4439, "step": 18521 }, { "epoch": 0.9936695278969957, "grad_norm": 0.5, "learning_rate": 4.6358875623912085e-06, "loss": 2.2861, "step": 18522 }, { "epoch": 0.9937231759656653, "grad_norm": 0.388671875, "learning_rate": 4.635842412278857e-06, "loss": 2.3415, "step": 18523 }, { "epoch": 0.9937768240343348, "grad_norm": 0.462890625, "learning_rate": 4.635797259587254e-06, "loss": 2.2722, "step": 18524 }, { "epoch": 0.9938304721030042, "grad_norm": 0.455078125, "learning_rate": 4.6357521043164545e-06, "loss": 2.1334, "step": 18525 }, { "epoch": 0.9938841201716738, "grad_norm": 0.400390625, "learning_rate": 4.6357069464665135e-06, "loss": 2.2731, "step": 18526 }, { "epoch": 0.9939377682403433, "grad_norm": 0.470703125, "learning_rate": 4.635661786037484e-06, "loss": 2.36, "step": 18527 }, { "epoch": 0.9939914163090129, "grad_norm": 0.447265625, "learning_rate": 4.635616623029422e-06, "loss": 2.1463, "step": 18528 }, { "epoch": 0.9940450643776824, "grad_norm": 0.439453125, "learning_rate": 4.63557145744238e-06, "loss": 2.0941, "step": 18529 }, { "epoch": 0.994098712446352, "grad_norm": 0.48828125, "learning_rate": 4.6355262892764155e-06, "loss": 2.3246, "step": 18530 }, { "epoch": 0.9941523605150214, "grad_norm": 0.4453125, "learning_rate": 4.63548111853158e-06, "loss": 2.2348, "step": 18531 }, { "epoch": 0.994206008583691, "grad_norm": 0.73046875, "learning_rate": 4.6354359452079305e-06, "loss": 2.0869, "step": 18532 }, { "epoch": 0.9942596566523605, "grad_norm": 0.484375, "learning_rate": 4.63539076930552e-06, "loss": 2.56, "step": 18533 }, { "epoch": 0.9943133047210301, "grad_norm": 0.455078125, "learning_rate": 4.6353455908244035e-06, "loss": 2.3095, "step": 18534 }, { "epoch": 0.9943669527896996, "grad_norm": 0.49609375, "learning_rate": 4.6353004097646366e-06, "loss": 2.2348, "step": 18535 }, { "epoch": 0.9944206008583691, "grad_norm": 0.58203125, "learning_rate": 4.635255226126273e-06, "loss": 2.0877, "step": 18536 }, { "epoch": 0.9944742489270386, "grad_norm": 0.41015625, "learning_rate": 4.635210039909366e-06, "loss": 2.4045, "step": 18537 }, { "epoch": 0.9945278969957082, "grad_norm": 0.53125, "learning_rate": 4.6351648511139714e-06, "loss": 2.4566, "step": 18538 }, { "epoch": 0.9945815450643777, "grad_norm": 0.5078125, "learning_rate": 4.635119659740145e-06, "loss": 1.8445, "step": 18539 }, { "epoch": 0.9946351931330472, "grad_norm": 0.4296875, "learning_rate": 4.635074465787939e-06, "loss": 2.3432, "step": 18540 }, { "epoch": 0.9946888412017167, "grad_norm": 0.462890625, "learning_rate": 4.63502926925741e-06, "loss": 2.3522, "step": 18541 }, { "epoch": 0.9947424892703862, "grad_norm": 0.4765625, "learning_rate": 4.634984070148611e-06, "loss": 2.4215, "step": 18542 }, { "epoch": 0.9947961373390558, "grad_norm": 0.466796875, "learning_rate": 4.634938868461597e-06, "loss": 1.9957, "step": 18543 }, { "epoch": 0.9948497854077253, "grad_norm": 0.76953125, "learning_rate": 4.634893664196423e-06, "loss": 2.3136, "step": 18544 }, { "epoch": 0.9949034334763949, "grad_norm": 0.4140625, "learning_rate": 4.634848457353143e-06, "loss": 2.3308, "step": 18545 }, { "epoch": 0.9949570815450643, "grad_norm": 0.52734375, "learning_rate": 4.634803247931813e-06, "loss": 2.3265, "step": 18546 }, { "epoch": 0.9950107296137339, "grad_norm": 0.40625, "learning_rate": 4.634758035932486e-06, "loss": 2.5011, "step": 18547 }, { "epoch": 0.9950643776824034, "grad_norm": 0.439453125, "learning_rate": 4.634712821355217e-06, "loss": 2.0595, "step": 18548 }, { "epoch": 0.995118025751073, "grad_norm": 0.51953125, "learning_rate": 4.634667604200061e-06, "loss": 2.5218, "step": 18549 }, { "epoch": 0.9951716738197425, "grad_norm": 0.45703125, "learning_rate": 4.634622384467073e-06, "loss": 2.3134, "step": 18550 }, { "epoch": 0.9952253218884121, "grad_norm": 0.4609375, "learning_rate": 4.634577162156306e-06, "loss": 2.2172, "step": 18551 }, { "epoch": 0.9952789699570815, "grad_norm": 0.435546875, "learning_rate": 4.634531937267817e-06, "loss": 2.0138, "step": 18552 }, { "epoch": 0.9953326180257511, "grad_norm": 0.51953125, "learning_rate": 4.634486709801658e-06, "loss": 2.3406, "step": 18553 }, { "epoch": 0.9953862660944206, "grad_norm": 0.458984375, "learning_rate": 4.6344414797578855e-06, "loss": 2.1366, "step": 18554 }, { "epoch": 0.9954399141630901, "grad_norm": 1.09375, "learning_rate": 4.634396247136552e-06, "loss": 2.1051, "step": 18555 }, { "epoch": 0.9954935622317597, "grad_norm": 0.423828125, "learning_rate": 4.634351011937716e-06, "loss": 2.226, "step": 18556 }, { "epoch": 0.9955472103004291, "grad_norm": 0.5546875, "learning_rate": 4.634305774161428e-06, "loss": 2.2205, "step": 18557 }, { "epoch": 0.9956008583690987, "grad_norm": 0.4453125, "learning_rate": 4.634260533807745e-06, "loss": 2.4483, "step": 18558 }, { "epoch": 0.9956545064377682, "grad_norm": 3.6875, "learning_rate": 4.63421529087672e-06, "loss": 1.5685, "step": 18559 }, { "epoch": 0.9957081545064378, "grad_norm": 0.470703125, "learning_rate": 4.63417004536841e-06, "loss": 2.3141, "step": 18560 }, { "epoch": 0.9957618025751073, "grad_norm": 0.5390625, "learning_rate": 4.634124797282867e-06, "loss": 1.7588, "step": 18561 }, { "epoch": 0.9958154506437769, "grad_norm": 0.42578125, "learning_rate": 4.634079546620148e-06, "loss": 2.3839, "step": 18562 }, { "epoch": 0.9958690987124463, "grad_norm": 0.39453125, "learning_rate": 4.634034293380307e-06, "loss": 1.2817, "step": 18563 }, { "epoch": 0.9959227467811159, "grad_norm": 0.482421875, "learning_rate": 4.633989037563396e-06, "loss": 2.262, "step": 18564 }, { "epoch": 0.9959763948497854, "grad_norm": 0.455078125, "learning_rate": 4.633943779169474e-06, "loss": 2.2673, "step": 18565 }, { "epoch": 0.996030042918455, "grad_norm": 0.37890625, "learning_rate": 4.6338985181985925e-06, "loss": 2.1201, "step": 18566 }, { "epoch": 0.9960836909871245, "grad_norm": 0.546875, "learning_rate": 4.6338532546508074e-06, "loss": 2.3382, "step": 18567 }, { "epoch": 0.996137339055794, "grad_norm": 0.59375, "learning_rate": 4.633807988526174e-06, "loss": 1.1146, "step": 18568 }, { "epoch": 0.9961909871244635, "grad_norm": 0.443359375, "learning_rate": 4.6337627198247445e-06, "loss": 2.6407, "step": 18569 }, { "epoch": 0.996244635193133, "grad_norm": 0.408203125, "learning_rate": 4.633717448546575e-06, "loss": 2.2518, "step": 18570 }, { "epoch": 0.9962982832618026, "grad_norm": 1.1953125, "learning_rate": 4.633672174691721e-06, "loss": 2.3065, "step": 18571 }, { "epoch": 0.9963519313304721, "grad_norm": 1.109375, "learning_rate": 4.633626898260237e-06, "loss": 2.1976, "step": 18572 }, { "epoch": 0.9964055793991416, "grad_norm": 0.447265625, "learning_rate": 4.6335816192521764e-06, "loss": 1.8336, "step": 18573 }, { "epoch": 0.9964592274678111, "grad_norm": 0.462890625, "learning_rate": 4.6335363376675956e-06, "loss": 2.2248, "step": 18574 }, { "epoch": 0.9965128755364807, "grad_norm": 0.86328125, "learning_rate": 4.633491053506547e-06, "loss": 1.5772, "step": 18575 }, { "epoch": 0.9965665236051502, "grad_norm": 0.451171875, "learning_rate": 4.633445766769088e-06, "loss": 2.4071, "step": 18576 }, { "epoch": 0.9966201716738198, "grad_norm": 0.47265625, "learning_rate": 4.633400477455271e-06, "loss": 2.2095, "step": 18577 }, { "epoch": 0.9966738197424893, "grad_norm": 0.74609375, "learning_rate": 4.633355185565151e-06, "loss": 2.281, "step": 18578 }, { "epoch": 0.9967274678111588, "grad_norm": 0.45703125, "learning_rate": 4.633309891098785e-06, "loss": 1.8198, "step": 18579 }, { "epoch": 0.9967811158798283, "grad_norm": 0.470703125, "learning_rate": 4.633264594056225e-06, "loss": 2.4718, "step": 18580 }, { "epoch": 0.9968347639484979, "grad_norm": 0.439453125, "learning_rate": 4.633219294437526e-06, "loss": 2.1427, "step": 18581 }, { "epoch": 0.9968884120171674, "grad_norm": 0.4140625, "learning_rate": 4.633173992242744e-06, "loss": 2.0254, "step": 18582 }, { "epoch": 0.9969420600858369, "grad_norm": 0.5, "learning_rate": 4.633128687471934e-06, "loss": 2.6161, "step": 18583 }, { "epoch": 0.9969957081545064, "grad_norm": 0.47265625, "learning_rate": 4.633083380125148e-06, "loss": 2.4282, "step": 18584 }, { "epoch": 0.9970493562231759, "grad_norm": 0.482421875, "learning_rate": 4.633038070202444e-06, "loss": 2.2225, "step": 18585 }, { "epoch": 0.9971030042918455, "grad_norm": 0.4609375, "learning_rate": 4.6329927577038745e-06, "loss": 2.189, "step": 18586 }, { "epoch": 0.997156652360515, "grad_norm": 0.494140625, "learning_rate": 4.6329474426294955e-06, "loss": 1.7412, "step": 18587 }, { "epoch": 0.9972103004291846, "grad_norm": 0.357421875, "learning_rate": 4.632902124979361e-06, "loss": 1.9748, "step": 18588 }, { "epoch": 0.997263948497854, "grad_norm": 0.60546875, "learning_rate": 4.632856804753526e-06, "loss": 2.1228, "step": 18589 }, { "epoch": 0.9973175965665236, "grad_norm": 0.5859375, "learning_rate": 4.632811481952045e-06, "loss": 2.3545, "step": 18590 }, { "epoch": 0.9973712446351931, "grad_norm": 0.52734375, "learning_rate": 4.632766156574972e-06, "loss": 2.0743, "step": 18591 }, { "epoch": 0.9974248927038627, "grad_norm": 0.396484375, "learning_rate": 4.632720828622364e-06, "loss": 2.0964, "step": 18592 }, { "epoch": 0.9974785407725322, "grad_norm": 0.50390625, "learning_rate": 4.6326754980942735e-06, "loss": 2.4147, "step": 18593 }, { "epoch": 0.9975321888412018, "grad_norm": 0.435546875, "learning_rate": 4.632630164990756e-06, "loss": 2.3403, "step": 18594 }, { "epoch": 0.9975858369098712, "grad_norm": 0.46875, "learning_rate": 4.632584829311867e-06, "loss": 2.1093, "step": 18595 }, { "epoch": 0.9976394849785408, "grad_norm": 0.482421875, "learning_rate": 4.63253949105766e-06, "loss": 2.4088, "step": 18596 }, { "epoch": 0.9976931330472103, "grad_norm": 0.5390625, "learning_rate": 4.632494150228191e-06, "loss": 2.1453, "step": 18597 }, { "epoch": 0.9977467811158798, "grad_norm": 0.453125, "learning_rate": 4.632448806823513e-06, "loss": 2.3289, "step": 18598 }, { "epoch": 0.9978004291845494, "grad_norm": 0.609375, "learning_rate": 4.632403460843682e-06, "loss": 2.4617, "step": 18599 }, { "epoch": 0.9978540772532188, "grad_norm": 0.58984375, "learning_rate": 4.632358112288753e-06, "loss": 2.4396, "step": 18600 }, { "epoch": 0.9979077253218884, "grad_norm": 0.51953125, "learning_rate": 4.63231276115878e-06, "loss": 2.2861, "step": 18601 }, { "epoch": 0.9979613733905579, "grad_norm": 0.3984375, "learning_rate": 4.632267407453819e-06, "loss": 2.282, "step": 18602 }, { "epoch": 0.9980150214592275, "grad_norm": 0.515625, "learning_rate": 4.632222051173924e-06, "loss": 2.3248, "step": 18603 }, { "epoch": 0.998068669527897, "grad_norm": 0.49609375, "learning_rate": 4.632176692319148e-06, "loss": 2.3974, "step": 18604 }, { "epoch": 0.9981223175965666, "grad_norm": 0.439453125, "learning_rate": 4.632131330889549e-06, "loss": 2.2842, "step": 18605 }, { "epoch": 0.998175965665236, "grad_norm": 0.431640625, "learning_rate": 4.632085966885179e-06, "loss": 2.2313, "step": 18606 }, { "epoch": 0.9982296137339056, "grad_norm": 0.41796875, "learning_rate": 4.632040600306095e-06, "loss": 2.4737, "step": 18607 }, { "epoch": 0.9982832618025751, "grad_norm": 0.494140625, "learning_rate": 4.631995231152351e-06, "loss": 2.0868, "step": 18608 }, { "epoch": 0.9983369098712447, "grad_norm": 0.42578125, "learning_rate": 4.631949859424001e-06, "loss": 1.9142, "step": 18609 }, { "epoch": 0.9983905579399142, "grad_norm": 0.490234375, "learning_rate": 4.6319044851211e-06, "loss": 2.2672, "step": 18610 }, { "epoch": 0.9984442060085837, "grad_norm": 0.609375, "learning_rate": 4.631859108243704e-06, "loss": 2.4555, "step": 18611 }, { "epoch": 0.9984978540772532, "grad_norm": 0.47265625, "learning_rate": 4.631813728791866e-06, "loss": 2.4307, "step": 18612 }, { "epoch": 0.9985515021459227, "grad_norm": 0.41015625, "learning_rate": 4.631768346765642e-06, "loss": 2.1144, "step": 18613 }, { "epoch": 0.9986051502145923, "grad_norm": 0.392578125, "learning_rate": 4.631722962165087e-06, "loss": 2.3457, "step": 18614 }, { "epoch": 0.9986587982832618, "grad_norm": 0.451171875, "learning_rate": 4.631677574990255e-06, "loss": 2.3586, "step": 18615 }, { "epoch": 0.9987124463519313, "grad_norm": 0.5859375, "learning_rate": 4.631632185241202e-06, "loss": 2.3716, "step": 18616 }, { "epoch": 0.9987660944206008, "grad_norm": 0.482421875, "learning_rate": 4.631586792917981e-06, "loss": 2.3199, "step": 18617 }, { "epoch": 0.9988197424892704, "grad_norm": 0.400390625, "learning_rate": 4.631541398020647e-06, "loss": 2.3497, "step": 18618 }, { "epoch": 0.9988733905579399, "grad_norm": 0.5234375, "learning_rate": 4.631496000549257e-06, "loss": 2.4496, "step": 18619 }, { "epoch": 0.9989270386266095, "grad_norm": 0.482421875, "learning_rate": 4.631450600503864e-06, "loss": 2.4108, "step": 18620 }, { "epoch": 0.998980686695279, "grad_norm": 0.96875, "learning_rate": 4.631405197884524e-06, "loss": 2.512, "step": 18621 }, { "epoch": 0.9990343347639485, "grad_norm": 0.423828125, "learning_rate": 4.63135979269129e-06, "loss": 2.2648, "step": 18622 }, { "epoch": 0.999087982832618, "grad_norm": 0.470703125, "learning_rate": 4.6313143849242185e-06, "loss": 2.2955, "step": 18623 }, { "epoch": 0.9991416309012876, "grad_norm": 0.53515625, "learning_rate": 4.631268974583364e-06, "loss": 2.348, "step": 18624 }, { "epoch": 0.9991952789699571, "grad_norm": 1.40625, "learning_rate": 4.631223561668781e-06, "loss": 1.472, "step": 18625 }, { "epoch": 0.9992489270386266, "grad_norm": 0.419921875, "learning_rate": 4.631178146180524e-06, "loss": 2.2418, "step": 18626 }, { "epoch": 0.9993025751072961, "grad_norm": 0.44140625, "learning_rate": 4.631132728118649e-06, "loss": 2.0519, "step": 18627 }, { "epoch": 0.9993562231759656, "grad_norm": 0.4921875, "learning_rate": 4.631087307483209e-06, "loss": 2.5763, "step": 18628 }, { "epoch": 0.9994098712446352, "grad_norm": 0.5390625, "learning_rate": 4.631041884274261e-06, "loss": 2.3582, "step": 18629 }, { "epoch": 0.9994635193133047, "grad_norm": 0.671875, "learning_rate": 4.630996458491858e-06, "loss": 2.0687, "step": 18630 }, { "epoch": 0.9995171673819743, "grad_norm": 0.44921875, "learning_rate": 4.630951030136056e-06, "loss": 2.4045, "step": 18631 }, { "epoch": 0.9995708154506437, "grad_norm": 0.447265625, "learning_rate": 4.63090559920691e-06, "loss": 2.0803, "step": 18632 }, { "epoch": 0.9996244635193133, "grad_norm": 0.63671875, "learning_rate": 4.630860165704474e-06, "loss": 1.8399, "step": 18633 }, { "epoch": 0.9996781115879828, "grad_norm": 0.5, "learning_rate": 4.6308147296288035e-06, "loss": 2.5026, "step": 18634 }, { "epoch": 0.9997317596566524, "grad_norm": 0.4921875, "learning_rate": 4.630769290979953e-06, "loss": 2.2246, "step": 18635 }, { "epoch": 0.9997854077253219, "grad_norm": 0.6875, "learning_rate": 4.630723849757978e-06, "loss": 2.3125, "step": 18636 }, { "epoch": 0.9998390557939915, "grad_norm": 0.4921875, "learning_rate": 4.6306784059629315e-06, "loss": 2.3083, "step": 18637 }, { "epoch": 0.9998927038626609, "grad_norm": 0.515625, "learning_rate": 4.630632959594871e-06, "loss": 2.1613, "step": 18638 }, { "epoch": 0.9999463519313305, "grad_norm": 0.6171875, "learning_rate": 4.63058751065385e-06, "loss": 2.377, "step": 18639 }, { "epoch": 1.0, "grad_norm": 0.494140625, "learning_rate": 4.630542059139923e-06, "loss": 2.3591, "step": 18640 } ], "logging_steps": 1, "max_steps": 93200, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.5966987500157665e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }