{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6562, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015239256324291374, "grad_norm": 0.7578125, "learning_rate": 1.015228426395939e-06, "loss": 0.7205, "step": 1 }, { "epoch": 0.00030478512648582747, "grad_norm": 1.171875, "learning_rate": 2.030456852791878e-06, "loss": 0.9582, "step": 2 }, { "epoch": 0.00045717768972874126, "grad_norm": 0.83203125, "learning_rate": 3.0456852791878177e-06, "loss": 0.9217, "step": 3 }, { "epoch": 0.0006095702529716549, "grad_norm": 0.984375, "learning_rate": 4.060913705583756e-06, "loss": 0.9203, "step": 4 }, { "epoch": 0.0007619628162145687, "grad_norm": 0.98046875, "learning_rate": 5.076142131979695e-06, "loss": 1.0185, "step": 5 }, { "epoch": 0.0009143553794574825, "grad_norm": 0.90234375, "learning_rate": 6.091370558375635e-06, "loss": 0.8611, "step": 6 }, { "epoch": 0.0010667479427003963, "grad_norm": 0.95703125, "learning_rate": 7.106598984771575e-06, "loss": 1.1762, "step": 7 }, { "epoch": 0.0012191405059433099, "grad_norm": 1.140625, "learning_rate": 8.121827411167512e-06, "loss": 0.917, "step": 8 }, { "epoch": 0.0013715330691862237, "grad_norm": 0.60546875, "learning_rate": 9.137055837563452e-06, "loss": 0.9471, "step": 9 }, { "epoch": 0.0015239256324291375, "grad_norm": 1.03125, "learning_rate": 1.015228426395939e-05, "loss": 0.9496, "step": 10 }, { "epoch": 0.0016763181956720513, "grad_norm": 0.86328125, "learning_rate": 1.116751269035533e-05, "loss": 0.9342, "step": 11 }, { "epoch": 0.001828710758914965, "grad_norm": 1.0546875, "learning_rate": 1.218274111675127e-05, "loss": 1.0556, "step": 12 }, { "epoch": 0.0019811033221578786, "grad_norm": 1.078125, "learning_rate": 1.3197969543147209e-05, "loss": 1.0586, "step": 13 }, { "epoch": 0.0021334958854007926, "grad_norm": 1.03125, "learning_rate": 1.421319796954315e-05, "loss": 0.9775, "step": 14 }, { "epoch": 0.002285888448643706, "grad_norm": 0.71875, "learning_rate": 1.5228426395939088e-05, "loss": 0.9953, "step": 15 }, { "epoch": 0.0024382810118866198, "grad_norm": 1.09375, "learning_rate": 1.6243654822335024e-05, "loss": 0.9753, "step": 16 }, { "epoch": 0.0025906735751295338, "grad_norm": 0.96484375, "learning_rate": 1.7258883248730966e-05, "loss": 0.9816, "step": 17 }, { "epoch": 0.0027430661383724473, "grad_norm": 1.109375, "learning_rate": 1.8274111675126904e-05, "loss": 1.0085, "step": 18 }, { "epoch": 0.0028954587016153614, "grad_norm": 0.828125, "learning_rate": 1.9289340101522843e-05, "loss": 1.0766, "step": 19 }, { "epoch": 0.003047851264858275, "grad_norm": 0.98046875, "learning_rate": 2.030456852791878e-05, "loss": 0.9647, "step": 20 }, { "epoch": 0.0032002438281011885, "grad_norm": 1.046875, "learning_rate": 2.1319796954314723e-05, "loss": 0.9234, "step": 21 }, { "epoch": 0.0033526363913441025, "grad_norm": 0.8828125, "learning_rate": 2.233502538071066e-05, "loss": 0.873, "step": 22 }, { "epoch": 0.003505028954587016, "grad_norm": 0.80078125, "learning_rate": 2.33502538071066e-05, "loss": 0.9569, "step": 23 }, { "epoch": 0.00365742151782993, "grad_norm": 0.8984375, "learning_rate": 2.436548223350254e-05, "loss": 1.048, "step": 24 }, { "epoch": 0.0038098140810728437, "grad_norm": 0.88671875, "learning_rate": 2.5380710659898476e-05, "loss": 0.9817, "step": 25 }, { "epoch": 0.003962206644315757, "grad_norm": 0.96484375, "learning_rate": 2.6395939086294418e-05, "loss": 0.9559, "step": 26 }, { "epoch": 0.004114599207558671, "grad_norm": 1.0546875, "learning_rate": 2.7411167512690357e-05, "loss": 1.0673, "step": 27 }, { "epoch": 0.004266991770801585, "grad_norm": 0.875, "learning_rate": 2.84263959390863e-05, "loss": 0.8704, "step": 28 }, { "epoch": 0.004419384334044499, "grad_norm": 1.328125, "learning_rate": 2.9441624365482233e-05, "loss": 1.1046, "step": 29 }, { "epoch": 0.004571776897287412, "grad_norm": 1.15625, "learning_rate": 3.0456852791878175e-05, "loss": 0.9479, "step": 30 }, { "epoch": 0.004724169460530326, "grad_norm": 0.91796875, "learning_rate": 3.147208121827411e-05, "loss": 0.8353, "step": 31 }, { "epoch": 0.0048765620237732395, "grad_norm": 0.95703125, "learning_rate": 3.248730964467005e-05, "loss": 0.9964, "step": 32 }, { "epoch": 0.005028954587016154, "grad_norm": 0.80078125, "learning_rate": 3.3502538071065994e-05, "loss": 0.9566, "step": 33 }, { "epoch": 0.0051813471502590676, "grad_norm": 0.94921875, "learning_rate": 3.451776649746193e-05, "loss": 0.9568, "step": 34 }, { "epoch": 0.005333739713501981, "grad_norm": 0.8359375, "learning_rate": 3.553299492385787e-05, "loss": 0.867, "step": 35 }, { "epoch": 0.005486132276744895, "grad_norm": 0.78515625, "learning_rate": 3.654822335025381e-05, "loss": 1.0921, "step": 36 }, { "epoch": 0.005638524839987808, "grad_norm": 0.96484375, "learning_rate": 3.756345177664975e-05, "loss": 0.9191, "step": 37 }, { "epoch": 0.005790917403230723, "grad_norm": 0.84375, "learning_rate": 3.8578680203045685e-05, "loss": 0.8852, "step": 38 }, { "epoch": 0.005943309966473636, "grad_norm": 0.875, "learning_rate": 3.959390862944163e-05, "loss": 0.9605, "step": 39 }, { "epoch": 0.00609570252971655, "grad_norm": 0.859375, "learning_rate": 4.060913705583756e-05, "loss": 1.0461, "step": 40 }, { "epoch": 0.006248095092959463, "grad_norm": 0.8359375, "learning_rate": 4.162436548223351e-05, "loss": 0.9165, "step": 41 }, { "epoch": 0.006400487656202377, "grad_norm": 1.0, "learning_rate": 4.2639593908629446e-05, "loss": 0.9774, "step": 42 }, { "epoch": 0.0065528802194452914, "grad_norm": 1.0859375, "learning_rate": 4.365482233502538e-05, "loss": 0.9915, "step": 43 }, { "epoch": 0.006705272782688205, "grad_norm": 0.9765625, "learning_rate": 4.467005076142132e-05, "loss": 0.8786, "step": 44 }, { "epoch": 0.006857665345931119, "grad_norm": 0.74609375, "learning_rate": 4.568527918781726e-05, "loss": 0.8457, "step": 45 }, { "epoch": 0.007010057909174032, "grad_norm": 1.2890625, "learning_rate": 4.67005076142132e-05, "loss": 0.9083, "step": 46 }, { "epoch": 0.007162450472416946, "grad_norm": 1.234375, "learning_rate": 4.771573604060914e-05, "loss": 1.0551, "step": 47 }, { "epoch": 0.00731484303565986, "grad_norm": 0.90625, "learning_rate": 4.873096446700508e-05, "loss": 1.0611, "step": 48 }, { "epoch": 0.007467235598902774, "grad_norm": 1.0703125, "learning_rate": 4.9746192893401014e-05, "loss": 0.9896, "step": 49 }, { "epoch": 0.007619628162145687, "grad_norm": 1.09375, "learning_rate": 5.076142131979695e-05, "loss": 0.9979, "step": 50 }, { "epoch": 0.007772020725388601, "grad_norm": 0.8046875, "learning_rate": 5.17766497461929e-05, "loss": 1.0679, "step": 51 }, { "epoch": 0.007924413288631514, "grad_norm": 0.80859375, "learning_rate": 5.2791878172588836e-05, "loss": 0.8748, "step": 52 }, { "epoch": 0.008076805851874428, "grad_norm": 0.87109375, "learning_rate": 5.380710659898477e-05, "loss": 0.9422, "step": 53 }, { "epoch": 0.008229198415117342, "grad_norm": 1.0078125, "learning_rate": 5.482233502538071e-05, "loss": 0.9792, "step": 54 }, { "epoch": 0.008381590978360255, "grad_norm": 1.15625, "learning_rate": 5.583756345177665e-05, "loss": 0.9485, "step": 55 }, { "epoch": 0.00853398354160317, "grad_norm": 1.0546875, "learning_rate": 5.68527918781726e-05, "loss": 0.9123, "step": 56 }, { "epoch": 0.008686376104846084, "grad_norm": 0.828125, "learning_rate": 5.786802030456853e-05, "loss": 0.8716, "step": 57 }, { "epoch": 0.008838768668088998, "grad_norm": 0.890625, "learning_rate": 5.8883248730964467e-05, "loss": 1.0111, "step": 58 }, { "epoch": 0.008991161231331911, "grad_norm": 1.0078125, "learning_rate": 5.989847715736041e-05, "loss": 0.8152, "step": 59 }, { "epoch": 0.009143553794574825, "grad_norm": 1.0390625, "learning_rate": 6.091370558375635e-05, "loss": 1.1772, "step": 60 }, { "epoch": 0.009295946357817738, "grad_norm": 0.83984375, "learning_rate": 6.192893401015228e-05, "loss": 0.9497, "step": 61 }, { "epoch": 0.009448338921060652, "grad_norm": 0.93359375, "learning_rate": 6.294416243654822e-05, "loss": 1.0744, "step": 62 }, { "epoch": 0.009600731484303566, "grad_norm": 1.109375, "learning_rate": 6.395939086294417e-05, "loss": 0.7621, "step": 63 }, { "epoch": 0.009753124047546479, "grad_norm": 1.171875, "learning_rate": 6.49746192893401e-05, "loss": 1.1644, "step": 64 }, { "epoch": 0.009905516610789393, "grad_norm": 1.0, "learning_rate": 6.598984771573604e-05, "loss": 0.8988, "step": 65 }, { "epoch": 0.010057909174032308, "grad_norm": 0.921875, "learning_rate": 6.700507614213199e-05, "loss": 1.0022, "step": 66 }, { "epoch": 0.010210301737275222, "grad_norm": 0.8359375, "learning_rate": 6.802030456852793e-05, "loss": 0.9092, "step": 67 }, { "epoch": 0.010362694300518135, "grad_norm": 1.09375, "learning_rate": 6.903553299492386e-05, "loss": 1.0944, "step": 68 }, { "epoch": 0.010515086863761049, "grad_norm": 0.8515625, "learning_rate": 7.00507614213198e-05, "loss": 0.9674, "step": 69 }, { "epoch": 0.010667479427003962, "grad_norm": 0.79296875, "learning_rate": 7.106598984771574e-05, "loss": 0.9559, "step": 70 }, { "epoch": 0.010819871990246876, "grad_norm": 1.0390625, "learning_rate": 7.208121827411168e-05, "loss": 1.0529, "step": 71 }, { "epoch": 0.01097226455348979, "grad_norm": 0.8671875, "learning_rate": 7.309644670050762e-05, "loss": 1.0138, "step": 72 }, { "epoch": 0.011124657116732703, "grad_norm": 0.765625, "learning_rate": 7.411167512690356e-05, "loss": 1.0677, "step": 73 }, { "epoch": 0.011277049679975617, "grad_norm": 0.8046875, "learning_rate": 7.51269035532995e-05, "loss": 0.9338, "step": 74 }, { "epoch": 0.01142944224321853, "grad_norm": 0.8359375, "learning_rate": 7.614213197969543e-05, "loss": 0.998, "step": 75 }, { "epoch": 0.011581834806461445, "grad_norm": 0.8984375, "learning_rate": 7.715736040609137e-05, "loss": 0.9141, "step": 76 }, { "epoch": 0.011734227369704359, "grad_norm": 1.0234375, "learning_rate": 7.817258883248731e-05, "loss": 1.081, "step": 77 }, { "epoch": 0.011886619932947273, "grad_norm": 0.8984375, "learning_rate": 7.918781725888326e-05, "loss": 0.9523, "step": 78 }, { "epoch": 0.012039012496190186, "grad_norm": 0.953125, "learning_rate": 8.020304568527919e-05, "loss": 1.0154, "step": 79 }, { "epoch": 0.0121914050594331, "grad_norm": 0.79296875, "learning_rate": 8.121827411167512e-05, "loss": 0.9038, "step": 80 }, { "epoch": 0.012343797622676013, "grad_norm": 1.09375, "learning_rate": 8.223350253807108e-05, "loss": 1.1407, "step": 81 }, { "epoch": 0.012496190185918927, "grad_norm": 0.8828125, "learning_rate": 8.324873096446701e-05, "loss": 1.1505, "step": 82 }, { "epoch": 0.01264858274916184, "grad_norm": 0.859375, "learning_rate": 8.426395939086294e-05, "loss": 1.0808, "step": 83 }, { "epoch": 0.012800975312404754, "grad_norm": 0.875, "learning_rate": 8.527918781725889e-05, "loss": 0.9784, "step": 84 }, { "epoch": 0.012953367875647668, "grad_norm": 0.66796875, "learning_rate": 8.629441624365483e-05, "loss": 0.9313, "step": 85 }, { "epoch": 0.013105760438890583, "grad_norm": 0.85546875, "learning_rate": 8.730964467005075e-05, "loss": 0.9209, "step": 86 }, { "epoch": 0.013258153002133496, "grad_norm": 1.0625, "learning_rate": 8.83248730964467e-05, "loss": 1.0023, "step": 87 }, { "epoch": 0.01341054556537641, "grad_norm": 1.0078125, "learning_rate": 8.934010152284265e-05, "loss": 0.9408, "step": 88 }, { "epoch": 0.013562938128619324, "grad_norm": 1.1953125, "learning_rate": 9.035532994923858e-05, "loss": 1.1023, "step": 89 }, { "epoch": 0.013715330691862237, "grad_norm": 0.91015625, "learning_rate": 9.137055837563452e-05, "loss": 0.957, "step": 90 }, { "epoch": 0.01386772325510515, "grad_norm": 0.9453125, "learning_rate": 9.238578680203046e-05, "loss": 1.061, "step": 91 }, { "epoch": 0.014020115818348064, "grad_norm": 1.2734375, "learning_rate": 9.34010152284264e-05, "loss": 0.9003, "step": 92 }, { "epoch": 0.014172508381590978, "grad_norm": 0.8359375, "learning_rate": 9.441624365482235e-05, "loss": 0.9065, "step": 93 }, { "epoch": 0.014324900944833891, "grad_norm": 0.9375, "learning_rate": 9.543147208121828e-05, "loss": 1.0612, "step": 94 }, { "epoch": 0.014477293508076805, "grad_norm": 1.0390625, "learning_rate": 9.644670050761421e-05, "loss": 1.0182, "step": 95 }, { "epoch": 0.01462968607131972, "grad_norm": 1.3515625, "learning_rate": 9.746192893401017e-05, "loss": 0.9418, "step": 96 }, { "epoch": 0.014782078634562634, "grad_norm": 0.796875, "learning_rate": 9.847715736040609e-05, "loss": 0.9644, "step": 97 }, { "epoch": 0.014934471197805547, "grad_norm": 0.93359375, "learning_rate": 9.949238578680203e-05, "loss": 0.9253, "step": 98 }, { "epoch": 0.015086863761048461, "grad_norm": 1.1171875, "learning_rate": 0.00010050761421319797, "loss": 1.0129, "step": 99 }, { "epoch": 0.015239256324291375, "grad_norm": 0.91796875, "learning_rate": 0.0001015228426395939, "loss": 1.186, "step": 100 }, { "epoch": 0.015391648887534288, "grad_norm": 1.0234375, "learning_rate": 0.00010253807106598984, "loss": 1.0104, "step": 101 }, { "epoch": 0.015544041450777202, "grad_norm": 0.9296875, "learning_rate": 0.0001035532994923858, "loss": 0.9711, "step": 102 }, { "epoch": 0.015696434014020117, "grad_norm": 0.8984375, "learning_rate": 0.00010456852791878173, "loss": 0.8904, "step": 103 }, { "epoch": 0.01584882657726303, "grad_norm": 0.83203125, "learning_rate": 0.00010558375634517767, "loss": 0.9196, "step": 104 }, { "epoch": 0.016001219140505944, "grad_norm": 1.015625, "learning_rate": 0.00010659898477157362, "loss": 0.9542, "step": 105 }, { "epoch": 0.016153611703748856, "grad_norm": 0.70703125, "learning_rate": 0.00010761421319796954, "loss": 0.8944, "step": 106 }, { "epoch": 0.01630600426699177, "grad_norm": 1.0234375, "learning_rate": 0.00010862944162436547, "loss": 0.9202, "step": 107 }, { "epoch": 0.016458396830234683, "grad_norm": 0.85546875, "learning_rate": 0.00010964467005076143, "loss": 0.998, "step": 108 }, { "epoch": 0.0166107893934776, "grad_norm": 0.8046875, "learning_rate": 0.00011065989847715736, "loss": 0.9602, "step": 109 }, { "epoch": 0.01676318195672051, "grad_norm": 1.0546875, "learning_rate": 0.0001116751269035533, "loss": 0.9523, "step": 110 }, { "epoch": 0.016915574519963426, "grad_norm": 0.8359375, "learning_rate": 0.00011269035532994925, "loss": 1.1677, "step": 111 }, { "epoch": 0.01706796708320634, "grad_norm": 0.8359375, "learning_rate": 0.0001137055837563452, "loss": 0.961, "step": 112 }, { "epoch": 0.017220359646449253, "grad_norm": 0.8125, "learning_rate": 0.00011472081218274113, "loss": 1.1584, "step": 113 }, { "epoch": 0.017372752209692168, "grad_norm": 0.86328125, "learning_rate": 0.00011573604060913706, "loss": 0.9677, "step": 114 }, { "epoch": 0.01752514477293508, "grad_norm": 0.8515625, "learning_rate": 0.000116751269035533, "loss": 0.9164, "step": 115 }, { "epoch": 0.017677537336177995, "grad_norm": 0.80078125, "learning_rate": 0.00011776649746192893, "loss": 1.0596, "step": 116 }, { "epoch": 0.017829929899420907, "grad_norm": 0.953125, "learning_rate": 0.00011878172588832489, "loss": 1.0332, "step": 117 }, { "epoch": 0.017982322462663822, "grad_norm": 0.8125, "learning_rate": 0.00011979695431472082, "loss": 1.0497, "step": 118 }, { "epoch": 0.018134715025906734, "grad_norm": 0.796875, "learning_rate": 0.00012081218274111676, "loss": 0.9036, "step": 119 }, { "epoch": 0.01828710758914965, "grad_norm": 0.890625, "learning_rate": 0.0001218274111675127, "loss": 1.039, "step": 120 }, { "epoch": 0.018439500152392565, "grad_norm": 0.66796875, "learning_rate": 0.00012284263959390864, "loss": 0.9148, "step": 121 }, { "epoch": 0.018591892715635477, "grad_norm": 1.0703125, "learning_rate": 0.00012385786802030456, "loss": 1.0124, "step": 122 }, { "epoch": 0.018744285278878392, "grad_norm": 0.8515625, "learning_rate": 0.00012487309644670052, "loss": 1.0621, "step": 123 }, { "epoch": 0.018896677842121304, "grad_norm": 0.97265625, "learning_rate": 0.00012588832487309644, "loss": 0.8195, "step": 124 }, { "epoch": 0.01904907040536422, "grad_norm": 0.94921875, "learning_rate": 0.0001269035532994924, "loss": 0.9112, "step": 125 }, { "epoch": 0.01920146296860713, "grad_norm": 0.92578125, "learning_rate": 0.00012791878172588834, "loss": 0.9384, "step": 126 }, { "epoch": 0.019353855531850046, "grad_norm": 0.79296875, "learning_rate": 0.00012893401015228427, "loss": 0.8993, "step": 127 }, { "epoch": 0.019506248095092958, "grad_norm": 0.88671875, "learning_rate": 0.0001299492385786802, "loss": 1.1261, "step": 128 }, { "epoch": 0.019658640658335873, "grad_norm": 0.8515625, "learning_rate": 0.00013096446700507615, "loss": 0.9578, "step": 129 }, { "epoch": 0.019811033221578785, "grad_norm": 0.828125, "learning_rate": 0.00013197969543147207, "loss": 0.8715, "step": 130 }, { "epoch": 0.0199634257848217, "grad_norm": 1.0546875, "learning_rate": 0.00013299492385786802, "loss": 1.1728, "step": 131 }, { "epoch": 0.020115818348064616, "grad_norm": 1.015625, "learning_rate": 0.00013401015228426397, "loss": 1.1392, "step": 132 }, { "epoch": 0.020268210911307528, "grad_norm": 0.98828125, "learning_rate": 0.0001350253807106599, "loss": 1.0943, "step": 133 }, { "epoch": 0.020420603474550443, "grad_norm": 1.109375, "learning_rate": 0.00013604060913705585, "loss": 1.0992, "step": 134 }, { "epoch": 0.020572996037793355, "grad_norm": 0.97265625, "learning_rate": 0.00013705583756345178, "loss": 1.0841, "step": 135 }, { "epoch": 0.02072538860103627, "grad_norm": 0.94921875, "learning_rate": 0.00013807106598984773, "loss": 1.0025, "step": 136 }, { "epoch": 0.020877781164279182, "grad_norm": 1.109375, "learning_rate": 0.00013908629441624365, "loss": 1.1857, "step": 137 }, { "epoch": 0.021030173727522097, "grad_norm": 0.85546875, "learning_rate": 0.0001401015228426396, "loss": 1.0023, "step": 138 }, { "epoch": 0.02118256629076501, "grad_norm": 1.1484375, "learning_rate": 0.00014111675126903553, "loss": 1.1159, "step": 139 }, { "epoch": 0.021334958854007924, "grad_norm": 1.0234375, "learning_rate": 0.00014213197969543148, "loss": 0.9144, "step": 140 }, { "epoch": 0.02148735141725084, "grad_norm": 1.0390625, "learning_rate": 0.00014314720812182743, "loss": 1.1774, "step": 141 }, { "epoch": 0.02163974398049375, "grad_norm": 1.2265625, "learning_rate": 0.00014416243654822336, "loss": 1.1209, "step": 142 }, { "epoch": 0.021792136543736667, "grad_norm": 0.97265625, "learning_rate": 0.00014517766497461928, "loss": 0.7939, "step": 143 }, { "epoch": 0.02194452910697958, "grad_norm": 0.92578125, "learning_rate": 0.00014619289340101523, "loss": 0.9842, "step": 144 }, { "epoch": 0.022096921670222494, "grad_norm": 1.2890625, "learning_rate": 0.00014720812182741116, "loss": 1.031, "step": 145 }, { "epoch": 0.022249314233465406, "grad_norm": 1.15625, "learning_rate": 0.0001482233502538071, "loss": 1.1485, "step": 146 }, { "epoch": 0.02240170679670832, "grad_norm": 1.09375, "learning_rate": 0.00014923857868020306, "loss": 1.073, "step": 147 }, { "epoch": 0.022554099359951233, "grad_norm": 0.953125, "learning_rate": 0.000150253807106599, "loss": 1.0159, "step": 148 }, { "epoch": 0.02270649192319415, "grad_norm": 0.86328125, "learning_rate": 0.00015126903553299494, "loss": 1.0191, "step": 149 }, { "epoch": 0.02285888448643706, "grad_norm": 0.9609375, "learning_rate": 0.00015228426395939087, "loss": 0.9758, "step": 150 }, { "epoch": 0.023011277049679976, "grad_norm": 1.0859375, "learning_rate": 0.0001532994923857868, "loss": 1.0242, "step": 151 }, { "epoch": 0.02316366961292289, "grad_norm": 0.90625, "learning_rate": 0.00015431472081218274, "loss": 1.028, "step": 152 }, { "epoch": 0.023316062176165803, "grad_norm": 1.0, "learning_rate": 0.0001553299492385787, "loss": 0.8286, "step": 153 }, { "epoch": 0.023468454739408718, "grad_norm": 1.0546875, "learning_rate": 0.00015634517766497462, "loss": 1.0554, "step": 154 }, { "epoch": 0.02362084730265163, "grad_norm": 0.734375, "learning_rate": 0.00015736040609137057, "loss": 1.0287, "step": 155 }, { "epoch": 0.023773239865894545, "grad_norm": 0.84375, "learning_rate": 0.00015837563451776652, "loss": 0.8802, "step": 156 }, { "epoch": 0.023925632429137457, "grad_norm": 1.1796875, "learning_rate": 0.00015939086294416242, "loss": 0.949, "step": 157 }, { "epoch": 0.024078024992380372, "grad_norm": 1.09375, "learning_rate": 0.00016040609137055837, "loss": 1.1338, "step": 158 }, { "epoch": 0.024230417555623284, "grad_norm": 1.21875, "learning_rate": 0.00016142131979695432, "loss": 1.0915, "step": 159 }, { "epoch": 0.0243828101188662, "grad_norm": 0.84765625, "learning_rate": 0.00016243654822335025, "loss": 0.9156, "step": 160 }, { "epoch": 0.024535202682109115, "grad_norm": 0.79296875, "learning_rate": 0.0001634517766497462, "loss": 1.0043, "step": 161 }, { "epoch": 0.024687595245352027, "grad_norm": 1.265625, "learning_rate": 0.00016446700507614215, "loss": 1.0961, "step": 162 }, { "epoch": 0.024839987808594942, "grad_norm": 0.92578125, "learning_rate": 0.00016548223350253808, "loss": 0.9317, "step": 163 }, { "epoch": 0.024992380371837854, "grad_norm": 0.69140625, "learning_rate": 0.00016649746192893403, "loss": 0.8042, "step": 164 }, { "epoch": 0.02514477293508077, "grad_norm": 0.859375, "learning_rate": 0.00016751269035532995, "loss": 0.987, "step": 165 }, { "epoch": 0.02529716549832368, "grad_norm": 1.109375, "learning_rate": 0.00016852791878172588, "loss": 1.2074, "step": 166 }, { "epoch": 0.025449558061566596, "grad_norm": 0.92578125, "learning_rate": 0.00016954314720812183, "loss": 0.9698, "step": 167 }, { "epoch": 0.025601950624809508, "grad_norm": 1.0078125, "learning_rate": 0.00017055837563451778, "loss": 1.1308, "step": 168 }, { "epoch": 0.025754343188052423, "grad_norm": 0.9296875, "learning_rate": 0.0001715736040609137, "loss": 1.0576, "step": 169 }, { "epoch": 0.025906735751295335, "grad_norm": 0.7578125, "learning_rate": 0.00017258883248730966, "loss": 0.8457, "step": 170 }, { "epoch": 0.02605912831453825, "grad_norm": 0.734375, "learning_rate": 0.0001736040609137056, "loss": 0.9251, "step": 171 }, { "epoch": 0.026211520877781166, "grad_norm": 1.109375, "learning_rate": 0.0001746192893401015, "loss": 1.0924, "step": 172 }, { "epoch": 0.026363913441024078, "grad_norm": 1.046875, "learning_rate": 0.00017563451776649746, "loss": 1.1698, "step": 173 }, { "epoch": 0.026516306004266993, "grad_norm": 0.96875, "learning_rate": 0.0001766497461928934, "loss": 0.958, "step": 174 }, { "epoch": 0.026668698567509905, "grad_norm": 1.1171875, "learning_rate": 0.00017766497461928934, "loss": 0.9896, "step": 175 }, { "epoch": 0.02682109113075282, "grad_norm": 1.0546875, "learning_rate": 0.0001786802030456853, "loss": 1.1044, "step": 176 }, { "epoch": 0.026973483693995732, "grad_norm": 0.7421875, "learning_rate": 0.00017969543147208124, "loss": 0.9807, "step": 177 }, { "epoch": 0.027125876257238647, "grad_norm": 1.296875, "learning_rate": 0.00018071065989847717, "loss": 1.1164, "step": 178 }, { "epoch": 0.02727826882048156, "grad_norm": 0.81640625, "learning_rate": 0.0001817258883248731, "loss": 1.0367, "step": 179 }, { "epoch": 0.027430661383724474, "grad_norm": 1.15625, "learning_rate": 0.00018274111675126904, "loss": 0.9836, "step": 180 }, { "epoch": 0.02758305394696739, "grad_norm": 1.0078125, "learning_rate": 0.00018375634517766497, "loss": 0.8772, "step": 181 }, { "epoch": 0.0277354465102103, "grad_norm": 0.9296875, "learning_rate": 0.00018477157360406092, "loss": 1.0627, "step": 182 }, { "epoch": 0.027887839073453217, "grad_norm": 1.0546875, "learning_rate": 0.00018578680203045687, "loss": 0.9796, "step": 183 }, { "epoch": 0.02804023163669613, "grad_norm": 1.1953125, "learning_rate": 0.0001868020304568528, "loss": 0.9119, "step": 184 }, { "epoch": 0.028192624199939044, "grad_norm": 1.0078125, "learning_rate": 0.00018781725888324875, "loss": 1.0308, "step": 185 }, { "epoch": 0.028345016763181956, "grad_norm": 0.921875, "learning_rate": 0.0001888324873096447, "loss": 1.0528, "step": 186 }, { "epoch": 0.02849740932642487, "grad_norm": 1.5703125, "learning_rate": 0.0001898477157360406, "loss": 0.9185, "step": 187 }, { "epoch": 0.028649801889667783, "grad_norm": 1.0703125, "learning_rate": 0.00019086294416243655, "loss": 1.1204, "step": 188 }, { "epoch": 0.028802194452910698, "grad_norm": 1.3359375, "learning_rate": 0.0001918781725888325, "loss": 0.9493, "step": 189 }, { "epoch": 0.02895458701615361, "grad_norm": 1.4609375, "learning_rate": 0.00019289340101522843, "loss": 1.0063, "step": 190 }, { "epoch": 0.029106979579396525, "grad_norm": 0.86328125, "learning_rate": 0.00019390862944162438, "loss": 0.8969, "step": 191 }, { "epoch": 0.02925937214263944, "grad_norm": 1.015625, "learning_rate": 0.00019492385786802033, "loss": 1.284, "step": 192 }, { "epoch": 0.029411764705882353, "grad_norm": 0.8828125, "learning_rate": 0.00019593908629441626, "loss": 1.0236, "step": 193 }, { "epoch": 0.029564157269125268, "grad_norm": 1.1015625, "learning_rate": 0.00019695431472081218, "loss": 1.0283, "step": 194 }, { "epoch": 0.02971654983236818, "grad_norm": 1.1171875, "learning_rate": 0.00019796954314720813, "loss": 0.9227, "step": 195 }, { "epoch": 0.029868942395611095, "grad_norm": 0.984375, "learning_rate": 0.00019898477157360406, "loss": 1.0534, "step": 196 }, { "epoch": 0.030021334958854007, "grad_norm": 1.1328125, "learning_rate": 0.0002, "loss": 1.0342, "step": 197 }, { "epoch": 0.030173727522096922, "grad_norm": 0.95703125, "learning_rate": 0.000199999987819281, "loss": 1.0286, "step": 198 }, { "epoch": 0.030326120085339834, "grad_norm": 0.76171875, "learning_rate": 0.00019999995127712694, "loss": 0.823, "step": 199 }, { "epoch": 0.03047851264858275, "grad_norm": 0.8125, "learning_rate": 0.0001999998903735467, "loss": 0.7691, "step": 200 }, { "epoch": 0.030630905211825665, "grad_norm": 1.0390625, "learning_rate": 0.00019999980510855515, "loss": 1.0232, "step": 201 }, { "epoch": 0.030783297775068576, "grad_norm": 0.9296875, "learning_rate": 0.00019999969548217307, "loss": 1.0349, "step": 202 }, { "epoch": 0.03093569033831149, "grad_norm": 0.9296875, "learning_rate": 0.00019999956149442712, "loss": 1.1024, "step": 203 }, { "epoch": 0.031088082901554404, "grad_norm": 0.90234375, "learning_rate": 0.00019999940314535, "loss": 1.066, "step": 204 }, { "epoch": 0.03124047546479732, "grad_norm": 0.74609375, "learning_rate": 0.00019999922043498024, "loss": 1.1117, "step": 205 }, { "epoch": 0.031392868028040234, "grad_norm": 0.61328125, "learning_rate": 0.00019999901336336236, "loss": 0.8319, "step": 206 }, { "epoch": 0.031545260591283146, "grad_norm": 0.76953125, "learning_rate": 0.0001999987819305468, "loss": 0.8868, "step": 207 }, { "epoch": 0.03169765315452606, "grad_norm": 0.87890625, "learning_rate": 0.00019999852613658998, "loss": 1.0696, "step": 208 }, { "epoch": 0.03185004571776897, "grad_norm": 1.0390625, "learning_rate": 0.0001999982459815542, "loss": 1.1077, "step": 209 }, { "epoch": 0.03200243828101189, "grad_norm": 0.88671875, "learning_rate": 0.00019999794146550767, "loss": 0.8409, "step": 210 }, { "epoch": 0.0321548308442548, "grad_norm": 0.97265625, "learning_rate": 0.0001999976125885246, "loss": 1.1005, "step": 211 }, { "epoch": 0.03230722340749771, "grad_norm": 0.875, "learning_rate": 0.00019999725935068515, "loss": 1.0082, "step": 212 }, { "epoch": 0.03245961597074063, "grad_norm": 1.0859375, "learning_rate": 0.0001999968817520753, "loss": 1.1599, "step": 213 }, { "epoch": 0.03261200853398354, "grad_norm": 1.015625, "learning_rate": 0.00019999647979278707, "loss": 0.9409, "step": 214 }, { "epoch": 0.032764401097226455, "grad_norm": 1.4609375, "learning_rate": 0.00019999605347291842, "loss": 0.9594, "step": 215 }, { "epoch": 0.032916793660469366, "grad_norm": 0.83984375, "learning_rate": 0.00019999560279257314, "loss": 1.066, "step": 216 }, { "epoch": 0.033069186223712285, "grad_norm": 0.74609375, "learning_rate": 0.00019999512775186108, "loss": 0.7921, "step": 217 }, { "epoch": 0.0332215787869552, "grad_norm": 0.98046875, "learning_rate": 0.00019999462835089792, "loss": 1.0295, "step": 218 }, { "epoch": 0.03337397135019811, "grad_norm": 0.890625, "learning_rate": 0.00019999410458980537, "loss": 1.0372, "step": 219 }, { "epoch": 0.03352636391344102, "grad_norm": 0.6875, "learning_rate": 0.000199993556468711, "loss": 0.8258, "step": 220 }, { "epoch": 0.03367875647668394, "grad_norm": 1.0234375, "learning_rate": 0.00019999298398774832, "loss": 1.0693, "step": 221 }, { "epoch": 0.03383114903992685, "grad_norm": 0.7109375, "learning_rate": 0.00019999238714705684, "loss": 0.9111, "step": 222 }, { "epoch": 0.03398354160316976, "grad_norm": 1.0546875, "learning_rate": 0.00019999176594678191, "loss": 1.0711, "step": 223 }, { "epoch": 0.03413593416641268, "grad_norm": 0.81640625, "learning_rate": 0.0001999911203870749, "loss": 0.9229, "step": 224 }, { "epoch": 0.034288326729655594, "grad_norm": 0.83984375, "learning_rate": 0.00019999045046809307, "loss": 0.9824, "step": 225 }, { "epoch": 0.034440719292898506, "grad_norm": 0.9921875, "learning_rate": 0.00019998975618999961, "loss": 0.9255, "step": 226 }, { "epoch": 0.03459311185614142, "grad_norm": 0.9609375, "learning_rate": 0.00019998903755296367, "loss": 1.0798, "step": 227 }, { "epoch": 0.034745504419384336, "grad_norm": 1.1953125, "learning_rate": 0.0001999882945571603, "loss": 0.9189, "step": 228 }, { "epoch": 0.03489789698262725, "grad_norm": 0.9453125, "learning_rate": 0.00019998752720277053, "loss": 0.9213, "step": 229 }, { "epoch": 0.03505028954587016, "grad_norm": 0.85546875, "learning_rate": 0.0001999867354899813, "loss": 0.9826, "step": 230 }, { "epoch": 0.03520268210911308, "grad_norm": 0.83984375, "learning_rate": 0.00019998591941898543, "loss": 1.0365, "step": 231 }, { "epoch": 0.03535507467235599, "grad_norm": 0.9921875, "learning_rate": 0.0001999850789899818, "loss": 1.0672, "step": 232 }, { "epoch": 0.0355074672355989, "grad_norm": 1.078125, "learning_rate": 0.0001999842142031751, "loss": 0.9586, "step": 233 }, { "epoch": 0.035659859798841814, "grad_norm": 0.828125, "learning_rate": 0.00019998332505877605, "loss": 0.9838, "step": 234 }, { "epoch": 0.03581225236208473, "grad_norm": 0.91015625, "learning_rate": 0.0001999824115570012, "loss": 0.8488, "step": 235 }, { "epoch": 0.035964644925327645, "grad_norm": 0.93359375, "learning_rate": 0.00019998147369807313, "loss": 1.1119, "step": 236 }, { "epoch": 0.03611703748857056, "grad_norm": 0.8359375, "learning_rate": 0.0001999805114822203, "loss": 1.1544, "step": 237 }, { "epoch": 0.03626943005181347, "grad_norm": 1.1953125, "learning_rate": 0.00019997952490967712, "loss": 1.0722, "step": 238 }, { "epoch": 0.03642182261505639, "grad_norm": 0.9765625, "learning_rate": 0.00019997851398068396, "loss": 1.143, "step": 239 }, { "epoch": 0.0365742151782993, "grad_norm": 1.03125, "learning_rate": 0.00019997747869548706, "loss": 1.0654, "step": 240 }, { "epoch": 0.03672660774154221, "grad_norm": 1.078125, "learning_rate": 0.00019997641905433869, "loss": 1.1018, "step": 241 }, { "epoch": 0.03687900030478513, "grad_norm": 0.86328125, "learning_rate": 0.0001999753350574969, "loss": 0.9869, "step": 242 }, { "epoch": 0.03703139286802804, "grad_norm": 0.75390625, "learning_rate": 0.00019997422670522586, "loss": 1.0451, "step": 243 }, { "epoch": 0.03718378543127095, "grad_norm": 0.8359375, "learning_rate": 0.00019997309399779551, "loss": 0.9428, "step": 244 }, { "epoch": 0.037336177994513865, "grad_norm": 0.83203125, "learning_rate": 0.00019997193693548182, "loss": 0.7763, "step": 245 }, { "epoch": 0.037488570557756784, "grad_norm": 1.0390625, "learning_rate": 0.00019997075551856668, "loss": 1.0506, "step": 246 }, { "epoch": 0.037640963120999696, "grad_norm": 0.9453125, "learning_rate": 0.0001999695497473379, "loss": 1.0215, "step": 247 }, { "epoch": 0.03779335568424261, "grad_norm": 1.0625, "learning_rate": 0.0001999683196220892, "loss": 0.8834, "step": 248 }, { "epoch": 0.03794574824748552, "grad_norm": 0.81640625, "learning_rate": 0.00019996706514312026, "loss": 0.8865, "step": 249 }, { "epoch": 0.03809814081072844, "grad_norm": 1.4140625, "learning_rate": 0.00019996578631073674, "loss": 1.1014, "step": 250 }, { "epoch": 0.03825053337397135, "grad_norm": 0.72265625, "learning_rate": 0.0001999644831252501, "loss": 0.8861, "step": 251 }, { "epoch": 0.03840292593721426, "grad_norm": 1.0703125, "learning_rate": 0.00019996315558697787, "loss": 0.998, "step": 252 }, { "epoch": 0.03855531850045718, "grad_norm": 0.8515625, "learning_rate": 0.00019996180369624345, "loss": 0.9546, "step": 253 }, { "epoch": 0.03870771106370009, "grad_norm": 0.8671875, "learning_rate": 0.00019996042745337617, "loss": 0.8575, "step": 254 }, { "epoch": 0.038860103626943004, "grad_norm": 0.78125, "learning_rate": 0.0001999590268587113, "loss": 0.9153, "step": 255 }, { "epoch": 0.039012496190185916, "grad_norm": 0.94921875, "learning_rate": 0.00019995760191259004, "loss": 1.0723, "step": 256 }, { "epoch": 0.039164888753428835, "grad_norm": 0.8828125, "learning_rate": 0.00019995615261535953, "loss": 0.9129, "step": 257 }, { "epoch": 0.03931728131667175, "grad_norm": 0.98046875, "learning_rate": 0.00019995467896737287, "loss": 0.9207, "step": 258 }, { "epoch": 0.03946967387991466, "grad_norm": 0.77734375, "learning_rate": 0.00019995318096898906, "loss": 0.998, "step": 259 }, { "epoch": 0.03962206644315757, "grad_norm": 0.69921875, "learning_rate": 0.00019995165862057298, "loss": 0.8226, "step": 260 }, { "epoch": 0.03977445900640049, "grad_norm": 0.921875, "learning_rate": 0.00019995011192249553, "loss": 1.0203, "step": 261 }, { "epoch": 0.0399268515696434, "grad_norm": 0.953125, "learning_rate": 0.0001999485408751335, "loss": 0.8918, "step": 262 }, { "epoch": 0.04007924413288631, "grad_norm": 0.98046875, "learning_rate": 0.00019994694547886965, "loss": 0.9566, "step": 263 }, { "epoch": 0.04023163669612923, "grad_norm": 1.2421875, "learning_rate": 0.00019994532573409262, "loss": 1.282, "step": 264 }, { "epoch": 0.040384029259372144, "grad_norm": 0.7890625, "learning_rate": 0.000199943681641197, "loss": 0.921, "step": 265 }, { "epoch": 0.040536421822615055, "grad_norm": 0.828125, "learning_rate": 0.00019994201320058328, "loss": 0.9392, "step": 266 }, { "epoch": 0.04068881438585797, "grad_norm": 0.9921875, "learning_rate": 0.00019994032041265798, "loss": 1.1203, "step": 267 }, { "epoch": 0.040841206949100886, "grad_norm": 1.0859375, "learning_rate": 0.00019993860327783346, "loss": 0.9084, "step": 268 }, { "epoch": 0.0409935995123438, "grad_norm": 1.0234375, "learning_rate": 0.00019993686179652805, "loss": 0.8947, "step": 269 }, { "epoch": 0.04114599207558671, "grad_norm": 0.6953125, "learning_rate": 0.00019993509596916598, "loss": 0.9161, "step": 270 }, { "epoch": 0.04129838463882963, "grad_norm": 0.71875, "learning_rate": 0.00019993330579617743, "loss": 0.9358, "step": 271 }, { "epoch": 0.04145077720207254, "grad_norm": 0.9921875, "learning_rate": 0.00019993149127799851, "loss": 0.9136, "step": 272 }, { "epoch": 0.04160316976531545, "grad_norm": 0.8671875, "learning_rate": 0.0001999296524150713, "loss": 0.8133, "step": 273 }, { "epoch": 0.041755562328558364, "grad_norm": 1.046875, "learning_rate": 0.00019992778920784371, "loss": 0.9302, "step": 274 }, { "epoch": 0.04190795489180128, "grad_norm": 1.203125, "learning_rate": 0.00019992590165676975, "loss": 1.1677, "step": 275 }, { "epoch": 0.042060347455044195, "grad_norm": 1.1875, "learning_rate": 0.00019992398976230913, "loss": 1.0898, "step": 276 }, { "epoch": 0.04221274001828711, "grad_norm": 0.9140625, "learning_rate": 0.00019992205352492768, "loss": 0.8355, "step": 277 }, { "epoch": 0.04236513258153002, "grad_norm": 0.7734375, "learning_rate": 0.0001999200929450971, "loss": 0.9893, "step": 278 }, { "epoch": 0.04251752514477294, "grad_norm": 0.87109375, "learning_rate": 0.00019991810802329497, "loss": 0.9623, "step": 279 }, { "epoch": 0.04266991770801585, "grad_norm": 1.2421875, "learning_rate": 0.0001999160987600049, "loss": 1.0089, "step": 280 }, { "epoch": 0.04282231027125876, "grad_norm": 0.90234375, "learning_rate": 0.00019991406515571636, "loss": 1.0584, "step": 281 }, { "epoch": 0.04297470283450168, "grad_norm": 0.8828125, "learning_rate": 0.00019991200721092476, "loss": 1.1137, "step": 282 }, { "epoch": 0.04312709539774459, "grad_norm": 0.84375, "learning_rate": 0.0001999099249261314, "loss": 1.0398, "step": 283 }, { "epoch": 0.0432794879609875, "grad_norm": 0.90234375, "learning_rate": 0.00019990781830184364, "loss": 0.8726, "step": 284 }, { "epoch": 0.043431880524230415, "grad_norm": 1.015625, "learning_rate": 0.00019990568733857465, "loss": 1.0769, "step": 285 }, { "epoch": 0.043584273087473334, "grad_norm": 0.87890625, "learning_rate": 0.00019990353203684354, "loss": 0.9464, "step": 286 }, { "epoch": 0.043736665650716246, "grad_norm": 1.03125, "learning_rate": 0.00019990135239717537, "loss": 0.9359, "step": 287 }, { "epoch": 0.04388905821395916, "grad_norm": 1.0078125, "learning_rate": 0.00019989914842010115, "loss": 1.0602, "step": 288 }, { "epoch": 0.04404145077720207, "grad_norm": 1.078125, "learning_rate": 0.00019989692010615785, "loss": 0.9259, "step": 289 }, { "epoch": 0.04419384334044499, "grad_norm": 0.89453125, "learning_rate": 0.0001998946674558882, "loss": 0.9085, "step": 290 }, { "epoch": 0.0443462359036879, "grad_norm": 0.9609375, "learning_rate": 0.00019989239046984107, "loss": 1.1952, "step": 291 }, { "epoch": 0.04449862846693081, "grad_norm": 0.84375, "learning_rate": 0.00019989008914857116, "loss": 0.9005, "step": 292 }, { "epoch": 0.04465102103017373, "grad_norm": 0.8203125, "learning_rate": 0.00019988776349263906, "loss": 0.906, "step": 293 }, { "epoch": 0.04480341359341664, "grad_norm": 0.99609375, "learning_rate": 0.0001998854135026114, "loss": 1.0398, "step": 294 }, { "epoch": 0.044955806156659554, "grad_norm": 0.89453125, "learning_rate": 0.0001998830391790606, "loss": 0.9734, "step": 295 }, { "epoch": 0.045108198719902466, "grad_norm": 0.8203125, "learning_rate": 0.00019988064052256513, "loss": 0.9427, "step": 296 }, { "epoch": 0.045260591283145385, "grad_norm": 0.6875, "learning_rate": 0.0001998782175337093, "loss": 0.8479, "step": 297 }, { "epoch": 0.0454129838463883, "grad_norm": 0.8828125, "learning_rate": 0.0001998757702130834, "loss": 0.9326, "step": 298 }, { "epoch": 0.04556537640963121, "grad_norm": 0.9765625, "learning_rate": 0.00019987329856128368, "loss": 0.9602, "step": 299 }, { "epoch": 0.04571776897287412, "grad_norm": 1.1328125, "learning_rate": 0.00019987080257891218, "loss": 1.085, "step": 300 }, { "epoch": 0.04587016153611704, "grad_norm": 0.94140625, "learning_rate": 0.00019986828226657705, "loss": 0.8767, "step": 301 }, { "epoch": 0.04602255409935995, "grad_norm": 0.8828125, "learning_rate": 0.00019986573762489215, "loss": 0.943, "step": 302 }, { "epoch": 0.04617494666260286, "grad_norm": 1.03125, "learning_rate": 0.00019986316865447753, "loss": 1.1201, "step": 303 }, { "epoch": 0.04632733922584578, "grad_norm": 1.0625, "learning_rate": 0.00019986057535595898, "loss": 0.9256, "step": 304 }, { "epoch": 0.046479731789088694, "grad_norm": 0.91015625, "learning_rate": 0.00019985795772996823, "loss": 1.0312, "step": 305 }, { "epoch": 0.046632124352331605, "grad_norm": 0.83984375, "learning_rate": 0.00019985531577714296, "loss": 0.9609, "step": 306 }, { "epoch": 0.04678451691557452, "grad_norm": 1.0859375, "learning_rate": 0.00019985264949812684, "loss": 0.8905, "step": 307 }, { "epoch": 0.046936909478817436, "grad_norm": 1.015625, "learning_rate": 0.00019984995889356944, "loss": 0.9086, "step": 308 }, { "epoch": 0.04708930204206035, "grad_norm": 1.0703125, "learning_rate": 0.00019984724396412615, "loss": 1.0363, "step": 309 }, { "epoch": 0.04724169460530326, "grad_norm": 1.1328125, "learning_rate": 0.00019984450471045842, "loss": 1.0319, "step": 310 }, { "epoch": 0.04739408716854617, "grad_norm": 1.0546875, "learning_rate": 0.00019984174113323353, "loss": 1.0892, "step": 311 }, { "epoch": 0.04754647973178909, "grad_norm": 0.98046875, "learning_rate": 0.00019983895323312477, "loss": 0.7884, "step": 312 }, { "epoch": 0.047698872295032, "grad_norm": 1.0078125, "learning_rate": 0.00019983614101081124, "loss": 0.9175, "step": 313 }, { "epoch": 0.047851264858274914, "grad_norm": 1.1328125, "learning_rate": 0.00019983330446697814, "loss": 1.1251, "step": 314 }, { "epoch": 0.04800365742151783, "grad_norm": 1.0, "learning_rate": 0.00019983044360231644, "loss": 1.2414, "step": 315 }, { "epoch": 0.048156049984760745, "grad_norm": 0.9140625, "learning_rate": 0.00019982755841752307, "loss": 0.852, "step": 316 }, { "epoch": 0.048308442548003656, "grad_norm": 1.0390625, "learning_rate": 0.00019982464891330092, "loss": 1.0165, "step": 317 }, { "epoch": 0.04846083511124657, "grad_norm": 0.81640625, "learning_rate": 0.00019982171509035882, "loss": 0.8185, "step": 318 }, { "epoch": 0.04861322767448949, "grad_norm": 0.9921875, "learning_rate": 0.00019981875694941145, "loss": 1.0755, "step": 319 }, { "epoch": 0.0487656202377324, "grad_norm": 1.0234375, "learning_rate": 0.00019981577449117947, "loss": 1.1725, "step": 320 }, { "epoch": 0.04891801280097531, "grad_norm": 0.82421875, "learning_rate": 0.00019981276771638943, "loss": 0.9712, "step": 321 }, { "epoch": 0.04907040536421823, "grad_norm": 0.9921875, "learning_rate": 0.00019980973662577384, "loss": 1.1417, "step": 322 }, { "epoch": 0.04922279792746114, "grad_norm": 0.87890625, "learning_rate": 0.00019980668122007115, "loss": 1.0349, "step": 323 }, { "epoch": 0.04937519049070405, "grad_norm": 0.8671875, "learning_rate": 0.00019980360150002565, "loss": 1.0212, "step": 324 }, { "epoch": 0.049527583053946965, "grad_norm": 0.9375, "learning_rate": 0.00019980049746638761, "loss": 1.0339, "step": 325 }, { "epoch": 0.049679975617189884, "grad_norm": 0.8125, "learning_rate": 0.00019979736911991328, "loss": 0.7988, "step": 326 }, { "epoch": 0.049832368180432796, "grad_norm": 0.8828125, "learning_rate": 0.00019979421646136464, "loss": 1.0533, "step": 327 }, { "epoch": 0.04998476074367571, "grad_norm": 1.5625, "learning_rate": 0.00019979103949150986, "loss": 1.1661, "step": 328 }, { "epoch": 0.05013715330691862, "grad_norm": 1.109375, "learning_rate": 0.00019978783821112282, "loss": 0.9181, "step": 329 }, { "epoch": 0.05028954587016154, "grad_norm": 0.84375, "learning_rate": 0.00019978461262098343, "loss": 0.9991, "step": 330 }, { "epoch": 0.05044193843340445, "grad_norm": 1.046875, "learning_rate": 0.00019978136272187747, "loss": 1.0475, "step": 331 }, { "epoch": 0.05059433099664736, "grad_norm": 0.8125, "learning_rate": 0.00019977808851459665, "loss": 0.9103, "step": 332 }, { "epoch": 0.05074672355989028, "grad_norm": 1.015625, "learning_rate": 0.00019977478999993865, "loss": 0.922, "step": 333 }, { "epoch": 0.05089911612313319, "grad_norm": 0.93359375, "learning_rate": 0.00019977146717870702, "loss": 1.0749, "step": 334 }, { "epoch": 0.051051508686376104, "grad_norm": 0.82421875, "learning_rate": 0.00019976812005171124, "loss": 0.9866, "step": 335 }, { "epoch": 0.051203901249619016, "grad_norm": 0.97265625, "learning_rate": 0.00019976474861976672, "loss": 0.8576, "step": 336 }, { "epoch": 0.051356293812861935, "grad_norm": 0.80078125, "learning_rate": 0.00019976135288369483, "loss": 0.8456, "step": 337 }, { "epoch": 0.05150868637610485, "grad_norm": 1.1171875, "learning_rate": 0.00019975793284432273, "loss": 1.1343, "step": 338 }, { "epoch": 0.05166107893934776, "grad_norm": 0.92578125, "learning_rate": 0.00019975448850248368, "loss": 1.1164, "step": 339 }, { "epoch": 0.05181347150259067, "grad_norm": 1.0390625, "learning_rate": 0.0001997510198590167, "loss": 0.9002, "step": 340 }, { "epoch": 0.05196586406583359, "grad_norm": 1.0234375, "learning_rate": 0.00019974752691476687, "loss": 1.1543, "step": 341 }, { "epoch": 0.0521182566290765, "grad_norm": 1.015625, "learning_rate": 0.0001997440096705851, "loss": 1.002, "step": 342 }, { "epoch": 0.05227064919231941, "grad_norm": 0.87890625, "learning_rate": 0.0001997404681273282, "loss": 1.0487, "step": 343 }, { "epoch": 0.05242304175556233, "grad_norm": 1.0078125, "learning_rate": 0.00019973690228585898, "loss": 1.0186, "step": 344 }, { "epoch": 0.05257543431880524, "grad_norm": 0.9765625, "learning_rate": 0.00019973331214704612, "loss": 1.0291, "step": 345 }, { "epoch": 0.052727826882048155, "grad_norm": 0.73046875, "learning_rate": 0.00019972969771176423, "loss": 0.9423, "step": 346 }, { "epoch": 0.05288021944529107, "grad_norm": 1.046875, "learning_rate": 0.00019972605898089385, "loss": 0.9147, "step": 347 }, { "epoch": 0.053032612008533986, "grad_norm": 1.1171875, "learning_rate": 0.0001997223959553214, "loss": 1.0007, "step": 348 }, { "epoch": 0.0531850045717769, "grad_norm": 1.015625, "learning_rate": 0.00019971870863593925, "loss": 0.9447, "step": 349 }, { "epoch": 0.05333739713501981, "grad_norm": 0.96875, "learning_rate": 0.00019971499702364572, "loss": 0.9947, "step": 350 }, { "epoch": 0.05348978969826272, "grad_norm": 1.0859375, "learning_rate": 0.00019971126111934496, "loss": 0.9578, "step": 351 }, { "epoch": 0.05364218226150564, "grad_norm": 1.078125, "learning_rate": 0.00019970750092394716, "loss": 0.924, "step": 352 }, { "epoch": 0.05379457482474855, "grad_norm": 0.8125, "learning_rate": 0.00019970371643836826, "loss": 0.9198, "step": 353 }, { "epoch": 0.053946967387991464, "grad_norm": 0.8125, "learning_rate": 0.00019969990766353031, "loss": 0.9998, "step": 354 }, { "epoch": 0.05409935995123438, "grad_norm": 0.9609375, "learning_rate": 0.00019969607460036115, "loss": 0.9067, "step": 355 }, { "epoch": 0.054251752514477294, "grad_norm": 1.0859375, "learning_rate": 0.00019969221724979453, "loss": 1.1258, "step": 356 }, { "epoch": 0.054404145077720206, "grad_norm": 1.109375, "learning_rate": 0.00019968833561277022, "loss": 1.1187, "step": 357 }, { "epoch": 0.05455653764096312, "grad_norm": 1.2890625, "learning_rate": 0.00019968442969023377, "loss": 1.1537, "step": 358 }, { "epoch": 0.05470893020420604, "grad_norm": 0.96875, "learning_rate": 0.00019968049948313678, "loss": 0.8835, "step": 359 }, { "epoch": 0.05486132276744895, "grad_norm": 1.140625, "learning_rate": 0.00019967654499243668, "loss": 1.0197, "step": 360 }, { "epoch": 0.05501371533069186, "grad_norm": 0.67578125, "learning_rate": 0.00019967256621909686, "loss": 0.9618, "step": 361 }, { "epoch": 0.05516610789393478, "grad_norm": 0.93359375, "learning_rate": 0.00019966856316408659, "loss": 0.9653, "step": 362 }, { "epoch": 0.05531850045717769, "grad_norm": 0.94140625, "learning_rate": 0.00019966453582838107, "loss": 1.035, "step": 363 }, { "epoch": 0.0554708930204206, "grad_norm": 1.0390625, "learning_rate": 0.00019966048421296142, "loss": 0.8922, "step": 364 }, { "epoch": 0.055623285583663515, "grad_norm": 1.0078125, "learning_rate": 0.00019965640831881465, "loss": 1.0036, "step": 365 }, { "epoch": 0.055775678146906434, "grad_norm": 0.9609375, "learning_rate": 0.00019965230814693373, "loss": 0.9913, "step": 366 }, { "epoch": 0.055928070710149345, "grad_norm": 0.875, "learning_rate": 0.00019964818369831754, "loss": 0.8536, "step": 367 }, { "epoch": 0.05608046327339226, "grad_norm": 0.82421875, "learning_rate": 0.00019964403497397084, "loss": 1.121, "step": 368 }, { "epoch": 0.05623285583663517, "grad_norm": 0.92578125, "learning_rate": 0.00019963986197490429, "loss": 1.0349, "step": 369 }, { "epoch": 0.05638524839987809, "grad_norm": 0.890625, "learning_rate": 0.00019963566470213454, "loss": 0.8498, "step": 370 }, { "epoch": 0.056537640963121, "grad_norm": 0.84765625, "learning_rate": 0.00019963144315668407, "loss": 0.8778, "step": 371 }, { "epoch": 0.05669003352636391, "grad_norm": 0.80078125, "learning_rate": 0.00019962719733958133, "loss": 1.0433, "step": 372 }, { "epoch": 0.05684242608960683, "grad_norm": 1.1171875, "learning_rate": 0.00019962292725186066, "loss": 1.0183, "step": 373 }, { "epoch": 0.05699481865284974, "grad_norm": 1.0234375, "learning_rate": 0.00019961863289456226, "loss": 0.8186, "step": 374 }, { "epoch": 0.057147211216092654, "grad_norm": 1.0546875, "learning_rate": 0.00019961431426873238, "loss": 0.8383, "step": 375 }, { "epoch": 0.057299603779335566, "grad_norm": 0.875, "learning_rate": 0.00019960997137542308, "loss": 0.8188, "step": 376 }, { "epoch": 0.057451996342578485, "grad_norm": 0.70703125, "learning_rate": 0.00019960560421569231, "loss": 0.8916, "step": 377 }, { "epoch": 0.057604388905821396, "grad_norm": 0.8984375, "learning_rate": 0.000199601212790604, "loss": 1.0563, "step": 378 }, { "epoch": 0.05775678146906431, "grad_norm": 0.765625, "learning_rate": 0.00019959679710122798, "loss": 0.9432, "step": 379 }, { "epoch": 0.05790917403230722, "grad_norm": 1.0390625, "learning_rate": 0.00019959235714863996, "loss": 1.0996, "step": 380 }, { "epoch": 0.05806156659555014, "grad_norm": 0.90625, "learning_rate": 0.00019958789293392156, "loss": 0.9133, "step": 381 }, { "epoch": 0.05821395915879305, "grad_norm": 1.078125, "learning_rate": 0.00019958340445816033, "loss": 1.1101, "step": 382 }, { "epoch": 0.05836635172203596, "grad_norm": 0.9609375, "learning_rate": 0.00019957889172244974, "loss": 1.018, "step": 383 }, { "epoch": 0.05851874428527888, "grad_norm": 0.9140625, "learning_rate": 0.00019957435472788918, "loss": 1.0342, "step": 384 }, { "epoch": 0.05867113684852179, "grad_norm": 0.890625, "learning_rate": 0.00019956979347558388, "loss": 0.8097, "step": 385 }, { "epoch": 0.058823529411764705, "grad_norm": 0.84375, "learning_rate": 0.00019956520796664506, "loss": 0.9413, "step": 386 }, { "epoch": 0.05897592197500762, "grad_norm": 0.89453125, "learning_rate": 0.00019956059820218982, "loss": 0.824, "step": 387 }, { "epoch": 0.059128314538250536, "grad_norm": 1.0234375, "learning_rate": 0.00019955596418334116, "loss": 0.9896, "step": 388 }, { "epoch": 0.05928070710149345, "grad_norm": 1.21875, "learning_rate": 0.00019955130591122795, "loss": 0.9605, "step": 389 }, { "epoch": 0.05943309966473636, "grad_norm": 0.921875, "learning_rate": 0.00019954662338698503, "loss": 1.0222, "step": 390 }, { "epoch": 0.05958549222797927, "grad_norm": 0.84375, "learning_rate": 0.00019954191661175318, "loss": 0.8537, "step": 391 }, { "epoch": 0.05973788479122219, "grad_norm": 1.0390625, "learning_rate": 0.000199537185586679, "loss": 0.9052, "step": 392 }, { "epoch": 0.0598902773544651, "grad_norm": 1.0078125, "learning_rate": 0.00019953243031291503, "loss": 1.1847, "step": 393 }, { "epoch": 0.060042669917708014, "grad_norm": 0.8203125, "learning_rate": 0.00019952765079161977, "loss": 1.0655, "step": 394 }, { "epoch": 0.06019506248095093, "grad_norm": 0.96484375, "learning_rate": 0.0001995228470239575, "loss": 0.9553, "step": 395 }, { "epoch": 0.060347455044193844, "grad_norm": 1.015625, "learning_rate": 0.00019951801901109856, "loss": 1.0424, "step": 396 }, { "epoch": 0.060499847607436756, "grad_norm": 0.85546875, "learning_rate": 0.00019951316675421906, "loss": 0.9366, "step": 397 }, { "epoch": 0.06065224017067967, "grad_norm": 0.96484375, "learning_rate": 0.00019950829025450114, "loss": 0.9997, "step": 398 }, { "epoch": 0.06080463273392259, "grad_norm": 0.875, "learning_rate": 0.00019950338951313276, "loss": 0.9607, "step": 399 }, { "epoch": 0.0609570252971655, "grad_norm": 0.87109375, "learning_rate": 0.0001994984645313078, "loss": 0.9494, "step": 400 }, { "epoch": 0.06110941786040841, "grad_norm": 0.9453125, "learning_rate": 0.00019949351531022607, "loss": 1.097, "step": 401 }, { "epoch": 0.06126181042365133, "grad_norm": 1.1171875, "learning_rate": 0.00019948854185109326, "loss": 1.0597, "step": 402 }, { "epoch": 0.06141420298689424, "grad_norm": 1.0234375, "learning_rate": 0.000199483544155121, "loss": 1.1185, "step": 403 }, { "epoch": 0.06156659555013715, "grad_norm": 0.984375, "learning_rate": 0.0001994785222235268, "loss": 1.0865, "step": 404 }, { "epoch": 0.061718988113380065, "grad_norm": 0.953125, "learning_rate": 0.000199473476057534, "loss": 0.9695, "step": 405 }, { "epoch": 0.06187138067662298, "grad_norm": 0.984375, "learning_rate": 0.00019946840565837203, "loss": 0.9979, "step": 406 }, { "epoch": 0.062023773239865895, "grad_norm": 0.8203125, "learning_rate": 0.00019946331102727605, "loss": 0.9028, "step": 407 }, { "epoch": 0.06217616580310881, "grad_norm": 1.203125, "learning_rate": 0.00019945819216548718, "loss": 1.0653, "step": 408 }, { "epoch": 0.06232855836635172, "grad_norm": 1.328125, "learning_rate": 0.00019945304907425246, "loss": 1.038, "step": 409 }, { "epoch": 0.06248095092959464, "grad_norm": 0.87890625, "learning_rate": 0.00019944788175482484, "loss": 0.9478, "step": 410 }, { "epoch": 0.06263334349283754, "grad_norm": 0.8046875, "learning_rate": 0.00019944269020846314, "loss": 0.9297, "step": 411 }, { "epoch": 0.06278573605608047, "grad_norm": 1.0625, "learning_rate": 0.0001994374744364321, "loss": 1.0548, "step": 412 }, { "epoch": 0.06293812861932338, "grad_norm": 0.703125, "learning_rate": 0.0001994322344400023, "loss": 1.0419, "step": 413 }, { "epoch": 0.06309052118256629, "grad_norm": 1.0, "learning_rate": 0.0001994269702204504, "loss": 0.842, "step": 414 }, { "epoch": 0.0632429137458092, "grad_norm": 1.0234375, "learning_rate": 0.00019942168177905875, "loss": 0.9452, "step": 415 }, { "epoch": 0.06339530630905212, "grad_norm": 1.0703125, "learning_rate": 0.00019941636911711567, "loss": 0.8936, "step": 416 }, { "epoch": 0.06354769887229503, "grad_norm": 0.89453125, "learning_rate": 0.0001994110322359155, "loss": 0.9496, "step": 417 }, { "epoch": 0.06370009143553794, "grad_norm": 1.03125, "learning_rate": 0.00019940567113675828, "loss": 1.0961, "step": 418 }, { "epoch": 0.06385248399878087, "grad_norm": 1.140625, "learning_rate": 0.0001994002858209501, "loss": 1.1257, "step": 419 }, { "epoch": 0.06400487656202378, "grad_norm": 1.359375, "learning_rate": 0.00019939487628980288, "loss": 1.1337, "step": 420 }, { "epoch": 0.06415726912526669, "grad_norm": 1.15625, "learning_rate": 0.00019938944254463447, "loss": 1.0699, "step": 421 }, { "epoch": 0.0643096616885096, "grad_norm": 0.8671875, "learning_rate": 0.0001993839845867686, "loss": 0.8975, "step": 422 }, { "epoch": 0.06446205425175251, "grad_norm": 0.9296875, "learning_rate": 0.00019937850241753499, "loss": 0.9068, "step": 423 }, { "epoch": 0.06461444681499542, "grad_norm": 0.94140625, "learning_rate": 0.00019937299603826902, "loss": 1.1045, "step": 424 }, { "epoch": 0.06476683937823834, "grad_norm": 0.8125, "learning_rate": 0.00019936746545031223, "loss": 0.8842, "step": 425 }, { "epoch": 0.06491923194148126, "grad_norm": 0.77734375, "learning_rate": 0.00019936191065501193, "loss": 0.837, "step": 426 }, { "epoch": 0.06507162450472417, "grad_norm": 0.87890625, "learning_rate": 0.00019935633165372137, "loss": 0.9144, "step": 427 }, { "epoch": 0.06522401706796709, "grad_norm": 0.9375, "learning_rate": 0.0001993507284477996, "loss": 0.9266, "step": 428 }, { "epoch": 0.06537640963121, "grad_norm": 0.9453125, "learning_rate": 0.00019934510103861172, "loss": 1.0872, "step": 429 }, { "epoch": 0.06552880219445291, "grad_norm": 0.796875, "learning_rate": 0.00019933944942752865, "loss": 0.9141, "step": 430 }, { "epoch": 0.06568119475769582, "grad_norm": 1.15625, "learning_rate": 0.00019933377361592712, "loss": 1.1848, "step": 431 }, { "epoch": 0.06583358732093873, "grad_norm": 0.82421875, "learning_rate": 0.0001993280736051899, "loss": 1.0234, "step": 432 }, { "epoch": 0.06598597988418166, "grad_norm": 0.80078125, "learning_rate": 0.00019932234939670562, "loss": 0.8515, "step": 433 }, { "epoch": 0.06613837244742457, "grad_norm": 1.0078125, "learning_rate": 0.0001993166009918687, "loss": 0.849, "step": 434 }, { "epoch": 0.06629076501066748, "grad_norm": 0.8125, "learning_rate": 0.0001993108283920796, "loss": 0.9821, "step": 435 }, { "epoch": 0.0664431575739104, "grad_norm": 1.140625, "learning_rate": 0.00019930503159874458, "loss": 1.0171, "step": 436 }, { "epoch": 0.0665955501371533, "grad_norm": 0.87109375, "learning_rate": 0.00019929921061327585, "loss": 0.9689, "step": 437 }, { "epoch": 0.06674794270039622, "grad_norm": 1.125, "learning_rate": 0.00019929336543709147, "loss": 0.9135, "step": 438 }, { "epoch": 0.06690033526363913, "grad_norm": 0.89453125, "learning_rate": 0.00019928749607161538, "loss": 1.1167, "step": 439 }, { "epoch": 0.06705272782688204, "grad_norm": 1.09375, "learning_rate": 0.00019928160251827748, "loss": 1.0632, "step": 440 }, { "epoch": 0.06720512039012497, "grad_norm": 1.296875, "learning_rate": 0.0001992756847785135, "loss": 1.0173, "step": 441 }, { "epoch": 0.06735751295336788, "grad_norm": 0.984375, "learning_rate": 0.00019926974285376512, "loss": 1.0806, "step": 442 }, { "epoch": 0.06750990551661079, "grad_norm": 1.015625, "learning_rate": 0.00019926377674547985, "loss": 0.9264, "step": 443 }, { "epoch": 0.0676622980798537, "grad_norm": 1.046875, "learning_rate": 0.0001992577864551111, "loss": 1.105, "step": 444 }, { "epoch": 0.06781469064309661, "grad_norm": 1.03125, "learning_rate": 0.00019925177198411824, "loss": 1.1039, "step": 445 }, { "epoch": 0.06796708320633953, "grad_norm": 0.86328125, "learning_rate": 0.00019924573333396646, "loss": 1.1316, "step": 446 }, { "epoch": 0.06811947576958244, "grad_norm": 0.828125, "learning_rate": 0.00019923967050612684, "loss": 0.9086, "step": 447 }, { "epoch": 0.06827186833282536, "grad_norm": 0.92578125, "learning_rate": 0.0001992335835020764, "loss": 0.9488, "step": 448 }, { "epoch": 0.06842426089606828, "grad_norm": 0.890625, "learning_rate": 0.00019922747232329805, "loss": 0.9568, "step": 449 }, { "epoch": 0.06857665345931119, "grad_norm": 1.1640625, "learning_rate": 0.0001992213369712805, "loss": 1.0175, "step": 450 }, { "epoch": 0.0687290460225541, "grad_norm": 1.109375, "learning_rate": 0.00019921517744751844, "loss": 0.7923, "step": 451 }, { "epoch": 0.06888143858579701, "grad_norm": 1.0859375, "learning_rate": 0.00019920899375351238, "loss": 0.9906, "step": 452 }, { "epoch": 0.06903383114903992, "grad_norm": 0.76171875, "learning_rate": 0.00019920278589076882, "loss": 0.8194, "step": 453 }, { "epoch": 0.06918622371228283, "grad_norm": 1.1171875, "learning_rate": 0.00019919655386080006, "loss": 1.0738, "step": 454 }, { "epoch": 0.06933861627552576, "grad_norm": 1.109375, "learning_rate": 0.00019919029766512426, "loss": 0.956, "step": 455 }, { "epoch": 0.06949100883876867, "grad_norm": 1.1171875, "learning_rate": 0.00019918401730526563, "loss": 0.8875, "step": 456 }, { "epoch": 0.06964340140201158, "grad_norm": 0.81640625, "learning_rate": 0.00019917771278275405, "loss": 0.7414, "step": 457 }, { "epoch": 0.0697957939652545, "grad_norm": 0.953125, "learning_rate": 0.0001991713840991255, "loss": 1.0258, "step": 458 }, { "epoch": 0.06994818652849741, "grad_norm": 0.84375, "learning_rate": 0.00019916503125592158, "loss": 0.9117, "step": 459 }, { "epoch": 0.07010057909174032, "grad_norm": 1.0390625, "learning_rate": 0.00019915865425469006, "loss": 0.869, "step": 460 }, { "epoch": 0.07025297165498323, "grad_norm": 0.94140625, "learning_rate": 0.0001991522530969845, "loss": 0.9099, "step": 461 }, { "epoch": 0.07040536421822616, "grad_norm": 1.15625, "learning_rate": 0.00019914582778436416, "loss": 1.0339, "step": 462 }, { "epoch": 0.07055775678146907, "grad_norm": 0.95703125, "learning_rate": 0.0001991393783183945, "loss": 1.0592, "step": 463 }, { "epoch": 0.07071014934471198, "grad_norm": 0.84375, "learning_rate": 0.0001991329047006466, "loss": 1.0014, "step": 464 }, { "epoch": 0.07086254190795489, "grad_norm": 1.03125, "learning_rate": 0.00019912640693269752, "loss": 0.8788, "step": 465 }, { "epoch": 0.0710149344711978, "grad_norm": 0.87109375, "learning_rate": 0.0001991198850161303, "loss": 0.7926, "step": 466 }, { "epoch": 0.07116732703444072, "grad_norm": 0.93359375, "learning_rate": 0.0001991133389525337, "loss": 1.0245, "step": 467 }, { "epoch": 0.07131971959768363, "grad_norm": 0.953125, "learning_rate": 0.00019910676874350248, "loss": 0.9228, "step": 468 }, { "epoch": 0.07147211216092654, "grad_norm": 0.859375, "learning_rate": 0.00019910017439063717, "loss": 0.9084, "step": 469 }, { "epoch": 0.07162450472416947, "grad_norm": 1.171875, "learning_rate": 0.00019909355589554432, "loss": 1.0, "step": 470 }, { "epoch": 0.07177689728741238, "grad_norm": 0.87109375, "learning_rate": 0.00019908691325983626, "loss": 0.8339, "step": 471 }, { "epoch": 0.07192928985065529, "grad_norm": 1.15625, "learning_rate": 0.0001990802464851312, "loss": 1.1061, "step": 472 }, { "epoch": 0.0720816824138982, "grad_norm": 0.74609375, "learning_rate": 0.00019907355557305335, "loss": 0.8941, "step": 473 }, { "epoch": 0.07223407497714111, "grad_norm": 1.0234375, "learning_rate": 0.00019906684052523263, "loss": 1.0641, "step": 474 }, { "epoch": 0.07238646754038403, "grad_norm": 0.93359375, "learning_rate": 0.00019906010134330499, "loss": 1.0931, "step": 475 }, { "epoch": 0.07253886010362694, "grad_norm": 0.96484375, "learning_rate": 0.0001990533380289121, "loss": 0.9464, "step": 476 }, { "epoch": 0.07269125266686986, "grad_norm": 0.8515625, "learning_rate": 0.00019904655058370168, "loss": 0.9419, "step": 477 }, { "epoch": 0.07284364523011277, "grad_norm": 0.7421875, "learning_rate": 0.0001990397390093272, "loss": 0.9849, "step": 478 }, { "epoch": 0.07299603779335569, "grad_norm": 0.91796875, "learning_rate": 0.0001990329033074481, "loss": 1.031, "step": 479 }, { "epoch": 0.0731484303565986, "grad_norm": 0.7734375, "learning_rate": 0.00019902604347972965, "loss": 0.9186, "step": 480 }, { "epoch": 0.07330082291984151, "grad_norm": 0.9921875, "learning_rate": 0.00019901915952784296, "loss": 0.8674, "step": 481 }, { "epoch": 0.07345321548308442, "grad_norm": 0.76171875, "learning_rate": 0.0001990122514534651, "loss": 0.9273, "step": 482 }, { "epoch": 0.07360560804632733, "grad_norm": 1.40625, "learning_rate": 0.00019900531925827898, "loss": 1.0726, "step": 483 }, { "epoch": 0.07375800060957026, "grad_norm": 1.0234375, "learning_rate": 0.00019899836294397333, "loss": 1.0304, "step": 484 }, { "epoch": 0.07391039317281317, "grad_norm": 0.9296875, "learning_rate": 0.00019899138251224286, "loss": 1.0895, "step": 485 }, { "epoch": 0.07406278573605608, "grad_norm": 1.1328125, "learning_rate": 0.0001989843779647881, "loss": 1.0304, "step": 486 }, { "epoch": 0.074215178299299, "grad_norm": 0.81640625, "learning_rate": 0.00019897734930331544, "loss": 1.0151, "step": 487 }, { "epoch": 0.0743675708625419, "grad_norm": 0.84765625, "learning_rate": 0.00019897029652953717, "loss": 0.9714, "step": 488 }, { "epoch": 0.07451996342578482, "grad_norm": 1.0546875, "learning_rate": 0.00019896321964517143, "loss": 0.8082, "step": 489 }, { "epoch": 0.07467235598902773, "grad_norm": 1.2265625, "learning_rate": 0.00019895611865194227, "loss": 1.0635, "step": 490 }, { "epoch": 0.07482474855227064, "grad_norm": 0.9765625, "learning_rate": 0.00019894899355157964, "loss": 0.8819, "step": 491 }, { "epoch": 0.07497714111551357, "grad_norm": 0.8359375, "learning_rate": 0.0001989418443458192, "loss": 1.0908, "step": 492 }, { "epoch": 0.07512953367875648, "grad_norm": 0.859375, "learning_rate": 0.00019893467103640272, "loss": 0.8612, "step": 493 }, { "epoch": 0.07528192624199939, "grad_norm": 1.1640625, "learning_rate": 0.00019892747362507764, "loss": 1.208, "step": 494 }, { "epoch": 0.0754343188052423, "grad_norm": 0.94921875, "learning_rate": 0.00019892025211359741, "loss": 1.1525, "step": 495 }, { "epoch": 0.07558671136848522, "grad_norm": 0.765625, "learning_rate": 0.00019891300650372125, "loss": 0.9456, "step": 496 }, { "epoch": 0.07573910393172813, "grad_norm": 1.0703125, "learning_rate": 0.00019890573679721428, "loss": 0.9299, "step": 497 }, { "epoch": 0.07589149649497104, "grad_norm": 0.796875, "learning_rate": 0.00019889844299584758, "loss": 0.8221, "step": 498 }, { "epoch": 0.07604388905821396, "grad_norm": 1.0703125, "learning_rate": 0.00019889112510139797, "loss": 0.9054, "step": 499 }, { "epoch": 0.07619628162145688, "grad_norm": 1.1640625, "learning_rate": 0.00019888378311564822, "loss": 1.127, "step": 500 }, { "epoch": 0.07634867418469979, "grad_norm": 1.0, "learning_rate": 0.00019887641704038688, "loss": 1.0342, "step": 501 }, { "epoch": 0.0765010667479427, "grad_norm": 0.93359375, "learning_rate": 0.0001988690268774085, "loss": 1.0887, "step": 502 }, { "epoch": 0.07665345931118561, "grad_norm": 1.1796875, "learning_rate": 0.00019886161262851345, "loss": 0.9269, "step": 503 }, { "epoch": 0.07680585187442852, "grad_norm": 1.2421875, "learning_rate": 0.00019885417429550787, "loss": 1.1444, "step": 504 }, { "epoch": 0.07695824443767144, "grad_norm": 0.9609375, "learning_rate": 0.0001988467118802039, "loss": 0.9132, "step": 505 }, { "epoch": 0.07711063700091436, "grad_norm": 1.1171875, "learning_rate": 0.00019883922538441946, "loss": 0.9949, "step": 506 }, { "epoch": 0.07726302956415727, "grad_norm": 0.8828125, "learning_rate": 0.0001988317148099784, "loss": 0.9268, "step": 507 }, { "epoch": 0.07741542212740019, "grad_norm": 0.96875, "learning_rate": 0.00019882418015871036, "loss": 1.0005, "step": 508 }, { "epoch": 0.0775678146906431, "grad_norm": 0.81640625, "learning_rate": 0.00019881662143245092, "loss": 1.053, "step": 509 }, { "epoch": 0.07772020725388601, "grad_norm": 1.046875, "learning_rate": 0.0001988090386330415, "loss": 1.0385, "step": 510 }, { "epoch": 0.07787259981712892, "grad_norm": 0.984375, "learning_rate": 0.00019880143176232936, "loss": 0.9572, "step": 511 }, { "epoch": 0.07802499238037183, "grad_norm": 0.91796875, "learning_rate": 0.00019879380082216767, "loss": 1.0716, "step": 512 }, { "epoch": 0.07817738494361476, "grad_norm": 1.046875, "learning_rate": 0.00019878614581441542, "loss": 0.9244, "step": 513 }, { "epoch": 0.07832977750685767, "grad_norm": 0.82421875, "learning_rate": 0.00019877846674093747, "loss": 0.8527, "step": 514 }, { "epoch": 0.07848217007010058, "grad_norm": 1.1015625, "learning_rate": 0.00019877076360360455, "loss": 0.8301, "step": 515 }, { "epoch": 0.0786345626333435, "grad_norm": 0.87109375, "learning_rate": 0.0001987630364042933, "loss": 0.9355, "step": 516 }, { "epoch": 0.0787869551965864, "grad_norm": 1.0234375, "learning_rate": 0.00019875528514488614, "loss": 0.8334, "step": 517 }, { "epoch": 0.07893934775982932, "grad_norm": 0.87890625, "learning_rate": 0.00019874750982727134, "loss": 0.987, "step": 518 }, { "epoch": 0.07909174032307223, "grad_norm": 1.109375, "learning_rate": 0.00019873971045334318, "loss": 1.106, "step": 519 }, { "epoch": 0.07924413288631514, "grad_norm": 0.96484375, "learning_rate": 0.00019873188702500163, "loss": 0.9208, "step": 520 }, { "epoch": 0.07939652544955807, "grad_norm": 1.03125, "learning_rate": 0.00019872403954415262, "loss": 1.1444, "step": 521 }, { "epoch": 0.07954891801280098, "grad_norm": 0.9921875, "learning_rate": 0.0001987161680127079, "loss": 0.9838, "step": 522 }, { "epoch": 0.07970131057604389, "grad_norm": 0.82421875, "learning_rate": 0.00019870827243258509, "loss": 0.9818, "step": 523 }, { "epoch": 0.0798537031392868, "grad_norm": 0.828125, "learning_rate": 0.00019870035280570764, "loss": 0.9842, "step": 524 }, { "epoch": 0.08000609570252971, "grad_norm": 1.1875, "learning_rate": 0.00019869240913400496, "loss": 1.0893, "step": 525 }, { "epoch": 0.08015848826577263, "grad_norm": 0.79296875, "learning_rate": 0.00019868444141941214, "loss": 0.9572, "step": 526 }, { "epoch": 0.08031088082901554, "grad_norm": 1.015625, "learning_rate": 0.0001986764496638703, "loss": 1.1139, "step": 527 }, { "epoch": 0.08046327339225846, "grad_norm": 0.765625, "learning_rate": 0.00019866843386932633, "loss": 0.8722, "step": 528 }, { "epoch": 0.08061566595550138, "grad_norm": 1.125, "learning_rate": 0.000198660394037733, "loss": 0.9633, "step": 529 }, { "epoch": 0.08076805851874429, "grad_norm": 1.0703125, "learning_rate": 0.00019865233017104893, "loss": 1.0017, "step": 530 }, { "epoch": 0.0809204510819872, "grad_norm": 0.85546875, "learning_rate": 0.00019864424227123854, "loss": 0.9469, "step": 531 }, { "epoch": 0.08107284364523011, "grad_norm": 0.88671875, "learning_rate": 0.00019863613034027224, "loss": 1.0051, "step": 532 }, { "epoch": 0.08122523620847302, "grad_norm": 1.03125, "learning_rate": 0.0001986279943801262, "loss": 1.022, "step": 533 }, { "epoch": 0.08137762877171593, "grad_norm": 1.046875, "learning_rate": 0.00019861983439278238, "loss": 0.9996, "step": 534 }, { "epoch": 0.08153002133495886, "grad_norm": 0.8359375, "learning_rate": 0.00019861165038022874, "loss": 0.992, "step": 535 }, { "epoch": 0.08168241389820177, "grad_norm": 1.1171875, "learning_rate": 0.00019860344234445902, "loss": 1.055, "step": 536 }, { "epoch": 0.08183480646144468, "grad_norm": 1.1328125, "learning_rate": 0.00019859521028747277, "loss": 1.1829, "step": 537 }, { "epoch": 0.0819871990246876, "grad_norm": 0.8515625, "learning_rate": 0.00019858695421127548, "loss": 0.7837, "step": 538 }, { "epoch": 0.08213959158793051, "grad_norm": 1.125, "learning_rate": 0.00019857867411787847, "loss": 1.0323, "step": 539 }, { "epoch": 0.08229198415117342, "grad_norm": 0.984375, "learning_rate": 0.00019857037000929883, "loss": 1.1108, "step": 540 }, { "epoch": 0.08244437671441633, "grad_norm": 0.97265625, "learning_rate": 0.0001985620418875596, "loss": 1.0156, "step": 541 }, { "epoch": 0.08259676927765926, "grad_norm": 0.80859375, "learning_rate": 0.0001985536897546896, "loss": 1.0482, "step": 542 }, { "epoch": 0.08274916184090217, "grad_norm": 0.83203125, "learning_rate": 0.00019854531361272358, "loss": 1.042, "step": 543 }, { "epoch": 0.08290155440414508, "grad_norm": 0.80859375, "learning_rate": 0.00019853691346370203, "loss": 0.9866, "step": 544 }, { "epoch": 0.08305394696738799, "grad_norm": 0.8671875, "learning_rate": 0.00019852848930967137, "loss": 0.9853, "step": 545 }, { "epoch": 0.0832063395306309, "grad_norm": 0.96875, "learning_rate": 0.00019852004115268387, "loss": 0.9792, "step": 546 }, { "epoch": 0.08335873209387382, "grad_norm": 1.0859375, "learning_rate": 0.0001985115689947976, "loss": 0.9715, "step": 547 }, { "epoch": 0.08351112465711673, "grad_norm": 0.875, "learning_rate": 0.00019850307283807647, "loss": 0.9555, "step": 548 }, { "epoch": 0.08366351722035964, "grad_norm": 0.95703125, "learning_rate": 0.00019849455268459033, "loss": 1.1281, "step": 549 }, { "epoch": 0.08381590978360257, "grad_norm": 1.1484375, "learning_rate": 0.00019848600853641476, "loss": 1.0023, "step": 550 }, { "epoch": 0.08396830234684548, "grad_norm": 0.921875, "learning_rate": 0.00019847744039563128, "loss": 1.0483, "step": 551 }, { "epoch": 0.08412069491008839, "grad_norm": 1.0859375, "learning_rate": 0.00019846884826432717, "loss": 1.061, "step": 552 }, { "epoch": 0.0842730874733313, "grad_norm": 0.921875, "learning_rate": 0.00019846023214459561, "loss": 1.0615, "step": 553 }, { "epoch": 0.08442548003657421, "grad_norm": 1.0078125, "learning_rate": 0.00019845159203853562, "loss": 1.0384, "step": 554 }, { "epoch": 0.08457787259981712, "grad_norm": 1.6953125, "learning_rate": 0.00019844292794825207, "loss": 0.8072, "step": 555 }, { "epoch": 0.08473026516306004, "grad_norm": 0.9921875, "learning_rate": 0.0001984342398758556, "loss": 1.1538, "step": 556 }, { "epoch": 0.08488265772630296, "grad_norm": 0.8515625, "learning_rate": 0.00019842552782346282, "loss": 1.1008, "step": 557 }, { "epoch": 0.08503505028954587, "grad_norm": 1.171875, "learning_rate": 0.00019841679179319606, "loss": 1.0507, "step": 558 }, { "epoch": 0.08518744285278879, "grad_norm": 1.078125, "learning_rate": 0.00019840803178718358, "loss": 1.1768, "step": 559 }, { "epoch": 0.0853398354160317, "grad_norm": 0.97265625, "learning_rate": 0.00019839924780755942, "loss": 1.0087, "step": 560 }, { "epoch": 0.08549222797927461, "grad_norm": 1.03125, "learning_rate": 0.00019839043985646346, "loss": 0.7699, "step": 561 }, { "epoch": 0.08564462054251752, "grad_norm": 0.8984375, "learning_rate": 0.00019838160793604148, "loss": 0.8262, "step": 562 }, { "epoch": 0.08579701310576043, "grad_norm": 0.91796875, "learning_rate": 0.00019837275204844505, "loss": 1.0332, "step": 563 }, { "epoch": 0.08594940566900336, "grad_norm": 0.6796875, "learning_rate": 0.0001983638721958316, "loss": 0.9089, "step": 564 }, { "epoch": 0.08610179823224627, "grad_norm": 1.015625, "learning_rate": 0.0001983549683803644, "loss": 1.0308, "step": 565 }, { "epoch": 0.08625419079548918, "grad_norm": 1.3125, "learning_rate": 0.00019834604060421253, "loss": 1.0481, "step": 566 }, { "epoch": 0.0864065833587321, "grad_norm": 0.91796875, "learning_rate": 0.00019833708886955091, "loss": 0.9434, "step": 567 }, { "epoch": 0.086558975921975, "grad_norm": 1.1171875, "learning_rate": 0.00019832811317856033, "loss": 1.0835, "step": 568 }, { "epoch": 0.08671136848521792, "grad_norm": 0.83203125, "learning_rate": 0.00019831911353342742, "loss": 1.0718, "step": 569 }, { "epoch": 0.08686376104846083, "grad_norm": 1.078125, "learning_rate": 0.00019831008993634458, "loss": 0.9544, "step": 570 }, { "epoch": 0.08701615361170374, "grad_norm": 0.875, "learning_rate": 0.0001983010423895101, "loss": 0.9481, "step": 571 }, { "epoch": 0.08716854617494667, "grad_norm": 1.1640625, "learning_rate": 0.00019829197089512812, "loss": 1.0705, "step": 572 }, { "epoch": 0.08732093873818958, "grad_norm": 1.046875, "learning_rate": 0.00019828287545540856, "loss": 1.0537, "step": 573 }, { "epoch": 0.08747333130143249, "grad_norm": 0.91015625, "learning_rate": 0.0001982737560725672, "loss": 0.9773, "step": 574 }, { "epoch": 0.0876257238646754, "grad_norm": 0.9921875, "learning_rate": 0.00019826461274882564, "loss": 0.8659, "step": 575 }, { "epoch": 0.08777811642791832, "grad_norm": 0.86328125, "learning_rate": 0.00019825544548641134, "loss": 1.0355, "step": 576 }, { "epoch": 0.08793050899116123, "grad_norm": 0.80078125, "learning_rate": 0.0001982462542875576, "loss": 1.0362, "step": 577 }, { "epoch": 0.08808290155440414, "grad_norm": 0.98046875, "learning_rate": 0.00019823703915450354, "loss": 1.0636, "step": 578 }, { "epoch": 0.08823529411764706, "grad_norm": 1.109375, "learning_rate": 0.00019822780008949402, "loss": 1.0967, "step": 579 }, { "epoch": 0.08838768668088998, "grad_norm": 0.69921875, "learning_rate": 0.0001982185370947799, "loss": 0.8737, "step": 580 }, { "epoch": 0.08854007924413289, "grad_norm": 1.0859375, "learning_rate": 0.0001982092501726177, "loss": 0.897, "step": 581 }, { "epoch": 0.0886924718073758, "grad_norm": 0.72265625, "learning_rate": 0.00019819993932526991, "loss": 1.0573, "step": 582 }, { "epoch": 0.08884486437061871, "grad_norm": 0.90234375, "learning_rate": 0.00019819060455500474, "loss": 1.0215, "step": 583 }, { "epoch": 0.08899725693386162, "grad_norm": 0.8203125, "learning_rate": 0.00019818124586409627, "loss": 1.0069, "step": 584 }, { "epoch": 0.08914964949710454, "grad_norm": 0.91796875, "learning_rate": 0.00019817186325482447, "loss": 0.857, "step": 585 }, { "epoch": 0.08930204206034746, "grad_norm": 0.953125, "learning_rate": 0.00019816245672947503, "loss": 0.9841, "step": 586 }, { "epoch": 0.08945443462359037, "grad_norm": 0.859375, "learning_rate": 0.00019815302629033957, "loss": 1.0189, "step": 587 }, { "epoch": 0.08960682718683328, "grad_norm": 0.71875, "learning_rate": 0.0001981435719397154, "loss": 0.9995, "step": 588 }, { "epoch": 0.0897592197500762, "grad_norm": 0.73828125, "learning_rate": 0.00019813409367990578, "loss": 0.8471, "step": 589 }, { "epoch": 0.08991161231331911, "grad_norm": 0.984375, "learning_rate": 0.00019812459151321977, "loss": 1.1081, "step": 590 }, { "epoch": 0.09006400487656202, "grad_norm": 0.6484375, "learning_rate": 0.00019811506544197216, "loss": 0.8528, "step": 591 }, { "epoch": 0.09021639743980493, "grad_norm": 0.78125, "learning_rate": 0.00019810551546848372, "loss": 1.121, "step": 592 }, { "epoch": 0.09036879000304786, "grad_norm": 1.046875, "learning_rate": 0.00019809594159508092, "loss": 1.0512, "step": 593 }, { "epoch": 0.09052118256629077, "grad_norm": 0.7734375, "learning_rate": 0.00019808634382409613, "loss": 1.1485, "step": 594 }, { "epoch": 0.09067357512953368, "grad_norm": 1.1328125, "learning_rate": 0.00019807672215786743, "loss": 1.094, "step": 595 }, { "epoch": 0.0908259676927766, "grad_norm": 0.91015625, "learning_rate": 0.00019806707659873887, "loss": 0.9736, "step": 596 }, { "epoch": 0.0909783602560195, "grad_norm": 0.921875, "learning_rate": 0.00019805740714906023, "loss": 0.9697, "step": 597 }, { "epoch": 0.09113075281926242, "grad_norm": 0.71484375, "learning_rate": 0.0001980477138111871, "loss": 1.0001, "step": 598 }, { "epoch": 0.09128314538250533, "grad_norm": 0.8359375, "learning_rate": 0.00019803799658748094, "loss": 0.9342, "step": 599 }, { "epoch": 0.09143553794574824, "grad_norm": 0.86328125, "learning_rate": 0.00019802825548030902, "loss": 0.9098, "step": 600 }, { "epoch": 0.09158793050899117, "grad_norm": 1.0234375, "learning_rate": 0.00019801849049204436, "loss": 1.1722, "step": 601 }, { "epoch": 0.09174032307223408, "grad_norm": 0.921875, "learning_rate": 0.00019800870162506589, "loss": 0.9732, "step": 602 }, { "epoch": 0.09189271563547699, "grad_norm": 0.68359375, "learning_rate": 0.00019799888888175833, "loss": 0.871, "step": 603 }, { "epoch": 0.0920451081987199, "grad_norm": 0.7421875, "learning_rate": 0.00019798905226451217, "loss": 0.9361, "step": 604 }, { "epoch": 0.09219750076196281, "grad_norm": 0.87109375, "learning_rate": 0.00019797919177572378, "loss": 1.0544, "step": 605 }, { "epoch": 0.09234989332520573, "grad_norm": 0.87109375, "learning_rate": 0.0001979693074177953, "loss": 0.9774, "step": 606 }, { "epoch": 0.09250228588844864, "grad_norm": 1.0546875, "learning_rate": 0.00019795939919313473, "loss": 1.0413, "step": 607 }, { "epoch": 0.09265467845169156, "grad_norm": 0.76953125, "learning_rate": 0.00019794946710415584, "loss": 0.9527, "step": 608 }, { "epoch": 0.09280707101493448, "grad_norm": 0.76953125, "learning_rate": 0.0001979395111532782, "loss": 0.8993, "step": 609 }, { "epoch": 0.09295946357817739, "grad_norm": 1.296875, "learning_rate": 0.00019792953134292724, "loss": 0.99, "step": 610 }, { "epoch": 0.0931118561414203, "grad_norm": 0.86328125, "learning_rate": 0.00019791952767553422, "loss": 0.9517, "step": 611 }, { "epoch": 0.09326424870466321, "grad_norm": 0.91015625, "learning_rate": 0.00019790950015353612, "loss": 0.949, "step": 612 }, { "epoch": 0.09341664126790612, "grad_norm": 0.91796875, "learning_rate": 0.00019789944877937585, "loss": 0.9717, "step": 613 }, { "epoch": 0.09356903383114903, "grad_norm": 0.859375, "learning_rate": 0.00019788937355550202, "loss": 0.9463, "step": 614 }, { "epoch": 0.09372142639439196, "grad_norm": 1.15625, "learning_rate": 0.0001978792744843691, "loss": 1.1192, "step": 615 }, { "epoch": 0.09387381895763487, "grad_norm": 0.96875, "learning_rate": 0.00019786915156843742, "loss": 1.0325, "step": 616 }, { "epoch": 0.09402621152087778, "grad_norm": 0.7421875, "learning_rate": 0.000197859004810173, "loss": 0.951, "step": 617 }, { "epoch": 0.0941786040841207, "grad_norm": 1.0, "learning_rate": 0.00019784883421204778, "loss": 0.8444, "step": 618 }, { "epoch": 0.09433099664736361, "grad_norm": 0.85546875, "learning_rate": 0.00019783863977653948, "loss": 0.9385, "step": 619 }, { "epoch": 0.09448338921060652, "grad_norm": 1.25, "learning_rate": 0.00019782842150613158, "loss": 1.1502, "step": 620 }, { "epoch": 0.09463578177384943, "grad_norm": 0.953125, "learning_rate": 0.00019781817940331338, "loss": 1.1029, "step": 621 }, { "epoch": 0.09478817433709234, "grad_norm": 0.9453125, "learning_rate": 0.00019780791347058006, "loss": 0.9424, "step": 622 }, { "epoch": 0.09494056690033527, "grad_norm": 1.1328125, "learning_rate": 0.00019779762371043247, "loss": 0.905, "step": 623 }, { "epoch": 0.09509295946357818, "grad_norm": 0.60546875, "learning_rate": 0.00019778731012537741, "loss": 0.7809, "step": 624 }, { "epoch": 0.09524535202682109, "grad_norm": 0.890625, "learning_rate": 0.0001977769727179274, "loss": 0.8937, "step": 625 }, { "epoch": 0.095397744590064, "grad_norm": 1.0, "learning_rate": 0.0001977666114906008, "loss": 0.9387, "step": 626 }, { "epoch": 0.09555013715330692, "grad_norm": 1.171875, "learning_rate": 0.00019775622644592171, "loss": 1.3385, "step": 627 }, { "epoch": 0.09570252971654983, "grad_norm": 1.1171875, "learning_rate": 0.00019774581758642007, "loss": 1.134, "step": 628 }, { "epoch": 0.09585492227979274, "grad_norm": 0.9453125, "learning_rate": 0.00019773538491463168, "loss": 0.9335, "step": 629 }, { "epoch": 0.09600731484303567, "grad_norm": 1.0078125, "learning_rate": 0.00019772492843309807, "loss": 0.9678, "step": 630 }, { "epoch": 0.09615970740627858, "grad_norm": 0.828125, "learning_rate": 0.0001977144481443666, "loss": 1.014, "step": 631 }, { "epoch": 0.09631209996952149, "grad_norm": 1.1484375, "learning_rate": 0.00019770394405099038, "loss": 0.9523, "step": 632 }, { "epoch": 0.0964644925327644, "grad_norm": 1.125, "learning_rate": 0.0001976934161555284, "loss": 0.8816, "step": 633 }, { "epoch": 0.09661688509600731, "grad_norm": 0.9296875, "learning_rate": 0.00019768286446054532, "loss": 0.8709, "step": 634 }, { "epoch": 0.09676927765925022, "grad_norm": 1.09375, "learning_rate": 0.00019767228896861182, "loss": 1.0031, "step": 635 }, { "epoch": 0.09692167022249314, "grad_norm": 1.140625, "learning_rate": 0.00019766168968230415, "loss": 1.1712, "step": 636 }, { "epoch": 0.09707406278573606, "grad_norm": 1.140625, "learning_rate": 0.0001976510666042045, "loss": 1.0196, "step": 637 }, { "epoch": 0.09722645534897897, "grad_norm": 1.3125, "learning_rate": 0.00019764041973690074, "loss": 1.0052, "step": 638 }, { "epoch": 0.09737884791222189, "grad_norm": 0.7890625, "learning_rate": 0.0001976297490829867, "loss": 0.904, "step": 639 }, { "epoch": 0.0975312404754648, "grad_norm": 1.2109375, "learning_rate": 0.00019761905464506176, "loss": 0.9643, "step": 640 }, { "epoch": 0.09768363303870771, "grad_norm": 1.0546875, "learning_rate": 0.00019760833642573137, "loss": 0.8407, "step": 641 }, { "epoch": 0.09783602560195062, "grad_norm": 1.125, "learning_rate": 0.0001975975944276066, "loss": 1.158, "step": 642 }, { "epoch": 0.09798841816519353, "grad_norm": 0.87109375, "learning_rate": 0.00019758682865330434, "loss": 0.8986, "step": 643 }, { "epoch": 0.09814081072843646, "grad_norm": 0.9765625, "learning_rate": 0.00019757603910544727, "loss": 1.0501, "step": 644 }, { "epoch": 0.09829320329167937, "grad_norm": 0.77734375, "learning_rate": 0.0001975652257866639, "loss": 0.7535, "step": 645 }, { "epoch": 0.09844559585492228, "grad_norm": 0.87109375, "learning_rate": 0.00019755438869958856, "loss": 0.9945, "step": 646 }, { "epoch": 0.0985979884181652, "grad_norm": 1.0390625, "learning_rate": 0.00019754352784686125, "loss": 0.9174, "step": 647 }, { "epoch": 0.0987503809814081, "grad_norm": 1.0234375, "learning_rate": 0.00019753264323112786, "loss": 1.1219, "step": 648 }, { "epoch": 0.09890277354465102, "grad_norm": 0.921875, "learning_rate": 0.00019752173485504005, "loss": 1.0998, "step": 649 }, { "epoch": 0.09905516610789393, "grad_norm": 0.90234375, "learning_rate": 0.0001975108027212552, "loss": 1.0033, "step": 650 }, { "epoch": 0.09920755867113684, "grad_norm": 0.73828125, "learning_rate": 0.0001974998468324366, "loss": 0.8379, "step": 651 }, { "epoch": 0.09935995123437977, "grad_norm": 0.97265625, "learning_rate": 0.00019748886719125325, "loss": 0.8044, "step": 652 }, { "epoch": 0.09951234379762268, "grad_norm": 1.0703125, "learning_rate": 0.0001974778638003799, "loss": 0.9689, "step": 653 }, { "epoch": 0.09966473636086559, "grad_norm": 0.859375, "learning_rate": 0.00019746683666249721, "loss": 1.073, "step": 654 }, { "epoch": 0.0998171289241085, "grad_norm": 0.703125, "learning_rate": 0.0001974557857802915, "loss": 1.0065, "step": 655 }, { "epoch": 0.09996952148735141, "grad_norm": 0.84375, "learning_rate": 0.00019744471115645492, "loss": 1.0235, "step": 656 }, { "epoch": 0.10012191405059433, "grad_norm": 1.03125, "learning_rate": 0.00019743361279368543, "loss": 0.901, "step": 657 }, { "epoch": 0.10027430661383724, "grad_norm": 1.1953125, "learning_rate": 0.00019742249069468676, "loss": 1.2351, "step": 658 }, { "epoch": 0.10042669917708016, "grad_norm": 0.88671875, "learning_rate": 0.0001974113448621684, "loss": 0.9597, "step": 659 }, { "epoch": 0.10057909174032308, "grad_norm": 0.98046875, "learning_rate": 0.0001974001752988456, "loss": 1.0456, "step": 660 }, { "epoch": 0.10073148430356599, "grad_norm": 0.8125, "learning_rate": 0.00019738898200743945, "loss": 0.8654, "step": 661 }, { "epoch": 0.1008838768668089, "grad_norm": 0.9453125, "learning_rate": 0.00019737776499067683, "loss": 0.903, "step": 662 }, { "epoch": 0.10103626943005181, "grad_norm": 0.8125, "learning_rate": 0.00019736652425129034, "loss": 0.8555, "step": 663 }, { "epoch": 0.10118866199329472, "grad_norm": 0.90625, "learning_rate": 0.00019735525979201838, "loss": 1.0654, "step": 664 }, { "epoch": 0.10134105455653764, "grad_norm": 1.046875, "learning_rate": 0.00019734397161560514, "loss": 0.9203, "step": 665 }, { "epoch": 0.10149344711978056, "grad_norm": 1.0234375, "learning_rate": 0.0001973326597248006, "loss": 1.0565, "step": 666 }, { "epoch": 0.10164583968302347, "grad_norm": 1.546875, "learning_rate": 0.00019732132412236047, "loss": 1.0844, "step": 667 }, { "epoch": 0.10179823224626638, "grad_norm": 1.1328125, "learning_rate": 0.00019730996481104627, "loss": 1.0618, "step": 668 }, { "epoch": 0.1019506248095093, "grad_norm": 1.0703125, "learning_rate": 0.00019729858179362531, "loss": 0.9479, "step": 669 }, { "epoch": 0.10210301737275221, "grad_norm": 1.0703125, "learning_rate": 0.00019728717507287063, "loss": 0.9653, "step": 670 }, { "epoch": 0.10225540993599512, "grad_norm": 0.83203125, "learning_rate": 0.0001972757446515611, "loss": 0.8957, "step": 671 }, { "epoch": 0.10240780249923803, "grad_norm": 0.9921875, "learning_rate": 0.0001972642905324813, "loss": 1.1096, "step": 672 }, { "epoch": 0.10256019506248096, "grad_norm": 1.0234375, "learning_rate": 0.00019725281271842167, "loss": 1.0335, "step": 673 }, { "epoch": 0.10271258762572387, "grad_norm": 0.90625, "learning_rate": 0.00019724131121217836, "loss": 0.892, "step": 674 }, { "epoch": 0.10286498018896678, "grad_norm": 1.015625, "learning_rate": 0.00019722978601655324, "loss": 1.02, "step": 675 }, { "epoch": 0.1030173727522097, "grad_norm": 1.1796875, "learning_rate": 0.00019721823713435404, "loss": 1.225, "step": 676 }, { "epoch": 0.1031697653154526, "grad_norm": 1.046875, "learning_rate": 0.00019720666456839433, "loss": 0.9743, "step": 677 }, { "epoch": 0.10332215787869552, "grad_norm": 1.15625, "learning_rate": 0.00019719506832149318, "loss": 0.9516, "step": 678 }, { "epoch": 0.10347455044193843, "grad_norm": 0.9609375, "learning_rate": 0.00019718344839647576, "loss": 1.1194, "step": 679 }, { "epoch": 0.10362694300518134, "grad_norm": 0.90625, "learning_rate": 0.00019717180479617277, "loss": 1.151, "step": 680 }, { "epoch": 0.10377933556842427, "grad_norm": 1.421875, "learning_rate": 0.00019716013752342078, "loss": 1.11, "step": 681 }, { "epoch": 0.10393172813166718, "grad_norm": 1.21875, "learning_rate": 0.0001971484465810621, "loss": 0.9864, "step": 682 }, { "epoch": 0.10408412069491009, "grad_norm": 1.2421875, "learning_rate": 0.00019713673197194483, "loss": 1.1759, "step": 683 }, { "epoch": 0.104236513258153, "grad_norm": 0.8515625, "learning_rate": 0.00019712499369892275, "loss": 1.0367, "step": 684 }, { "epoch": 0.10438890582139591, "grad_norm": 0.8671875, "learning_rate": 0.00019711323176485555, "loss": 0.8981, "step": 685 }, { "epoch": 0.10454129838463883, "grad_norm": 1.109375, "learning_rate": 0.00019710144617260858, "loss": 1.0765, "step": 686 }, { "epoch": 0.10469369094788174, "grad_norm": 0.93359375, "learning_rate": 0.000197089636925053, "loss": 1.0446, "step": 687 }, { "epoch": 0.10484608351112466, "grad_norm": 1.3671875, "learning_rate": 0.00019707780402506567, "loss": 0.9462, "step": 688 }, { "epoch": 0.10499847607436757, "grad_norm": 0.875, "learning_rate": 0.00019706594747552925, "loss": 0.9668, "step": 689 }, { "epoch": 0.10515086863761049, "grad_norm": 0.88671875, "learning_rate": 0.00019705406727933223, "loss": 1.2528, "step": 690 }, { "epoch": 0.1053032612008534, "grad_norm": 0.87890625, "learning_rate": 0.00019704216343936873, "loss": 1.0603, "step": 691 }, { "epoch": 0.10545565376409631, "grad_norm": 0.94921875, "learning_rate": 0.00019703023595853876, "loss": 1.0395, "step": 692 }, { "epoch": 0.10560804632733922, "grad_norm": 0.8203125, "learning_rate": 0.00019701828483974796, "loss": 1.0723, "step": 693 }, { "epoch": 0.10576043889058213, "grad_norm": 0.6953125, "learning_rate": 0.00019700631008590783, "loss": 1.0096, "step": 694 }, { "epoch": 0.10591283145382506, "grad_norm": 0.87890625, "learning_rate": 0.0001969943116999356, "loss": 0.9585, "step": 695 }, { "epoch": 0.10606522401706797, "grad_norm": 1.296875, "learning_rate": 0.00019698228968475422, "loss": 0.9611, "step": 696 }, { "epoch": 0.10621761658031088, "grad_norm": 0.8515625, "learning_rate": 0.00019697024404329244, "loss": 0.9423, "step": 697 }, { "epoch": 0.1063700091435538, "grad_norm": 0.8125, "learning_rate": 0.00019695817477848477, "loss": 0.8296, "step": 698 }, { "epoch": 0.10652240170679671, "grad_norm": 0.8203125, "learning_rate": 0.00019694608189327144, "loss": 0.9179, "step": 699 }, { "epoch": 0.10667479427003962, "grad_norm": 1.1171875, "learning_rate": 0.00019693396539059843, "loss": 1.0248, "step": 700 }, { "epoch": 0.10682718683328253, "grad_norm": 0.78125, "learning_rate": 0.00019692182527341755, "loss": 0.8862, "step": 701 }, { "epoch": 0.10697957939652544, "grad_norm": 1.3125, "learning_rate": 0.00019690966154468624, "loss": 1.1074, "step": 702 }, { "epoch": 0.10713197195976837, "grad_norm": 0.8984375, "learning_rate": 0.0001968974742073678, "loss": 0.9496, "step": 703 }, { "epoch": 0.10728436452301128, "grad_norm": 0.8359375, "learning_rate": 0.00019688526326443127, "loss": 0.8793, "step": 704 }, { "epoch": 0.10743675708625419, "grad_norm": 0.87890625, "learning_rate": 0.00019687302871885132, "loss": 1.0884, "step": 705 }, { "epoch": 0.1075891496494971, "grad_norm": 1.28125, "learning_rate": 0.0001968607705736085, "loss": 0.9501, "step": 706 }, { "epoch": 0.10774154221274002, "grad_norm": 0.91796875, "learning_rate": 0.00019684848883168914, "loss": 1.0861, "step": 707 }, { "epoch": 0.10789393477598293, "grad_norm": 0.88671875, "learning_rate": 0.0001968361834960852, "loss": 0.8762, "step": 708 }, { "epoch": 0.10804632733922584, "grad_norm": 0.86328125, "learning_rate": 0.00019682385456979437, "loss": 1.0265, "step": 709 }, { "epoch": 0.10819871990246877, "grad_norm": 0.984375, "learning_rate": 0.00019681150205582025, "loss": 1.0075, "step": 710 }, { "epoch": 0.10835111246571168, "grad_norm": 0.8515625, "learning_rate": 0.00019679912595717207, "loss": 0.8974, "step": 711 }, { "epoch": 0.10850350502895459, "grad_norm": 0.9296875, "learning_rate": 0.00019678672627686478, "loss": 0.9946, "step": 712 }, { "epoch": 0.1086558975921975, "grad_norm": 0.80859375, "learning_rate": 0.00019677430301791917, "loss": 0.7898, "step": 713 }, { "epoch": 0.10880829015544041, "grad_norm": 1.125, "learning_rate": 0.00019676185618336173, "loss": 0.9676, "step": 714 }, { "epoch": 0.10896068271868332, "grad_norm": 1.015625, "learning_rate": 0.00019674938577622463, "loss": 0.9071, "step": 715 }, { "epoch": 0.10911307528192624, "grad_norm": 0.91796875, "learning_rate": 0.00019673689179954584, "loss": 1.0219, "step": 716 }, { "epoch": 0.10926546784516916, "grad_norm": 1.15625, "learning_rate": 0.00019672437425636915, "loss": 0.9448, "step": 717 }, { "epoch": 0.10941786040841207, "grad_norm": 0.6953125, "learning_rate": 0.00019671183314974399, "loss": 0.7997, "step": 718 }, { "epoch": 0.10957025297165499, "grad_norm": 0.83203125, "learning_rate": 0.0001966992684827255, "loss": 0.8621, "step": 719 }, { "epoch": 0.1097226455348979, "grad_norm": 1.09375, "learning_rate": 0.00019668668025837462, "loss": 1.0998, "step": 720 }, { "epoch": 0.10987503809814081, "grad_norm": 0.76171875, "learning_rate": 0.0001966740684797581, "loss": 1.0077, "step": 721 }, { "epoch": 0.11002743066138372, "grad_norm": 0.82421875, "learning_rate": 0.0001966614331499483, "loss": 0.988, "step": 722 }, { "epoch": 0.11017982322462663, "grad_norm": 1.1171875, "learning_rate": 0.00019664877427202332, "loss": 0.9484, "step": 723 }, { "epoch": 0.11033221578786956, "grad_norm": 1.3515625, "learning_rate": 0.00019663609184906712, "loss": 1.0955, "step": 724 }, { "epoch": 0.11048460835111247, "grad_norm": 0.89453125, "learning_rate": 0.0001966233858841693, "loss": 1.0073, "step": 725 }, { "epoch": 0.11063700091435538, "grad_norm": 1.5546875, "learning_rate": 0.00019661065638042522, "loss": 1.1109, "step": 726 }, { "epoch": 0.1107893934775983, "grad_norm": 1.03125, "learning_rate": 0.00019659790334093592, "loss": 1.0143, "step": 727 }, { "epoch": 0.1109417860408412, "grad_norm": 0.78125, "learning_rate": 0.0001965851267688083, "loss": 0.9042, "step": 728 }, { "epoch": 0.11109417860408412, "grad_norm": 0.91015625, "learning_rate": 0.00019657232666715486, "loss": 1.0038, "step": 729 }, { "epoch": 0.11124657116732703, "grad_norm": 1.28125, "learning_rate": 0.00019655950303909393, "loss": 1.0719, "step": 730 }, { "epoch": 0.11139896373056994, "grad_norm": 1.015625, "learning_rate": 0.00019654665588774947, "loss": 1.1603, "step": 731 }, { "epoch": 0.11155135629381287, "grad_norm": 0.87109375, "learning_rate": 0.0001965337852162513, "loss": 0.911, "step": 732 }, { "epoch": 0.11170374885705578, "grad_norm": 0.87890625, "learning_rate": 0.00019652089102773488, "loss": 0.9267, "step": 733 }, { "epoch": 0.11185614142029869, "grad_norm": 1.0234375, "learning_rate": 0.0001965079733253414, "loss": 0.9511, "step": 734 }, { "epoch": 0.1120085339835416, "grad_norm": 1.046875, "learning_rate": 0.0001964950321122178, "loss": 1.0968, "step": 735 }, { "epoch": 0.11216092654678451, "grad_norm": 0.6875, "learning_rate": 0.00019648206739151676, "loss": 0.9167, "step": 736 }, { "epoch": 0.11231331911002743, "grad_norm": 1.1015625, "learning_rate": 0.00019646907916639664, "loss": 0.9704, "step": 737 }, { "epoch": 0.11246571167327034, "grad_norm": 1.140625, "learning_rate": 0.00019645606744002163, "loss": 1.2013, "step": 738 }, { "epoch": 0.11261810423651326, "grad_norm": 1.09375, "learning_rate": 0.0001964430322155615, "loss": 0.8962, "step": 739 }, { "epoch": 0.11277049679975618, "grad_norm": 1.1875, "learning_rate": 0.00019642997349619186, "loss": 1.168, "step": 740 }, { "epoch": 0.11292288936299909, "grad_norm": 0.6953125, "learning_rate": 0.00019641689128509397, "loss": 0.98, "step": 741 }, { "epoch": 0.113075281926242, "grad_norm": 0.94921875, "learning_rate": 0.00019640378558545487, "loss": 0.9177, "step": 742 }, { "epoch": 0.11322767448948491, "grad_norm": 0.98046875, "learning_rate": 0.0001963906564004673, "loss": 0.895, "step": 743 }, { "epoch": 0.11338006705272782, "grad_norm": 1.109375, "learning_rate": 0.0001963775037333297, "loss": 1.0045, "step": 744 }, { "epoch": 0.11353245961597073, "grad_norm": 0.90625, "learning_rate": 0.00019636432758724626, "loss": 0.9518, "step": 745 }, { "epoch": 0.11368485217921366, "grad_norm": 0.89453125, "learning_rate": 0.00019635112796542687, "loss": 0.9831, "step": 746 }, { "epoch": 0.11383724474245657, "grad_norm": 1.046875, "learning_rate": 0.00019633790487108717, "loss": 1.284, "step": 747 }, { "epoch": 0.11398963730569948, "grad_norm": 1.09375, "learning_rate": 0.00019632465830744846, "loss": 1.0881, "step": 748 }, { "epoch": 0.1141420298689424, "grad_norm": 1.0390625, "learning_rate": 0.0001963113882777378, "loss": 1.0787, "step": 749 }, { "epoch": 0.11429442243218531, "grad_norm": 1.03125, "learning_rate": 0.00019629809478518802, "loss": 1.0236, "step": 750 }, { "epoch": 0.11444681499542822, "grad_norm": 0.7265625, "learning_rate": 0.0001962847778330375, "loss": 0.8943, "step": 751 }, { "epoch": 0.11459920755867113, "grad_norm": 0.94921875, "learning_rate": 0.00019627143742453055, "loss": 0.9604, "step": 752 }, { "epoch": 0.11475160012191406, "grad_norm": 1.1171875, "learning_rate": 0.000196258073562917, "loss": 0.9735, "step": 753 }, { "epoch": 0.11490399268515697, "grad_norm": 0.765625, "learning_rate": 0.00019624468625145254, "loss": 1.0016, "step": 754 }, { "epoch": 0.11505638524839988, "grad_norm": 0.8515625, "learning_rate": 0.00019623127549339846, "loss": 0.9088, "step": 755 }, { "epoch": 0.11520877781164279, "grad_norm": 1.171875, "learning_rate": 0.00019621784129202188, "loss": 1.0631, "step": 756 }, { "epoch": 0.1153611703748857, "grad_norm": 0.90625, "learning_rate": 0.00019620438365059548, "loss": 0.8517, "step": 757 }, { "epoch": 0.11551356293812862, "grad_norm": 0.703125, "learning_rate": 0.0001961909025723978, "loss": 0.9287, "step": 758 }, { "epoch": 0.11566595550137153, "grad_norm": 0.83984375, "learning_rate": 0.000196177398060713, "loss": 0.9148, "step": 759 }, { "epoch": 0.11581834806461444, "grad_norm": 1.1953125, "learning_rate": 0.00019616387011883098, "loss": 1.1966, "step": 760 }, { "epoch": 0.11597074062785737, "grad_norm": 1.203125, "learning_rate": 0.00019615031875004732, "loss": 0.9035, "step": 761 }, { "epoch": 0.11612313319110028, "grad_norm": 0.95703125, "learning_rate": 0.00019613674395766334, "loss": 0.9665, "step": 762 }, { "epoch": 0.11627552575434319, "grad_norm": 0.984375, "learning_rate": 0.0001961231457449861, "loss": 0.9781, "step": 763 }, { "epoch": 0.1164279183175861, "grad_norm": 0.93359375, "learning_rate": 0.00019610952411532826, "loss": 0.9164, "step": 764 }, { "epoch": 0.11658031088082901, "grad_norm": 1.203125, "learning_rate": 0.00019609587907200825, "loss": 1.159, "step": 765 }, { "epoch": 0.11673270344407193, "grad_norm": 1.0390625, "learning_rate": 0.00019608221061835025, "loss": 1.0007, "step": 766 }, { "epoch": 0.11688509600731484, "grad_norm": 1.046875, "learning_rate": 0.000196068518757684, "loss": 0.8915, "step": 767 }, { "epoch": 0.11703748857055776, "grad_norm": 0.98046875, "learning_rate": 0.00019605480349334516, "loss": 1.1156, "step": 768 }, { "epoch": 0.11718988113380067, "grad_norm": 0.8359375, "learning_rate": 0.00019604106482867486, "loss": 0.8345, "step": 769 }, { "epoch": 0.11734227369704359, "grad_norm": 0.875, "learning_rate": 0.00019602730276702007, "loss": 1.0439, "step": 770 }, { "epoch": 0.1174946662602865, "grad_norm": 0.96484375, "learning_rate": 0.00019601351731173344, "loss": 1.1254, "step": 771 }, { "epoch": 0.11764705882352941, "grad_norm": 1.0703125, "learning_rate": 0.0001959997084661733, "loss": 1.1987, "step": 772 }, { "epoch": 0.11779945138677232, "grad_norm": 0.8046875, "learning_rate": 0.00019598587623370362, "loss": 1.1002, "step": 773 }, { "epoch": 0.11795184395001523, "grad_norm": 1.0, "learning_rate": 0.00019597202061769425, "loss": 1.0355, "step": 774 }, { "epoch": 0.11810423651325816, "grad_norm": 0.7109375, "learning_rate": 0.00019595814162152056, "loss": 0.8295, "step": 775 }, { "epoch": 0.11825662907650107, "grad_norm": 0.9140625, "learning_rate": 0.00019594423924856362, "loss": 1.0598, "step": 776 }, { "epoch": 0.11840902163974398, "grad_norm": 1.078125, "learning_rate": 0.0001959303135022103, "loss": 1.1327, "step": 777 }, { "epoch": 0.1185614142029869, "grad_norm": 0.98828125, "learning_rate": 0.00019591636438585314, "loss": 1.0565, "step": 778 }, { "epoch": 0.1187138067662298, "grad_norm": 1.1328125, "learning_rate": 0.00019590239190289032, "loss": 1.0661, "step": 779 }, { "epoch": 0.11886619932947272, "grad_norm": 1.2265625, "learning_rate": 0.0001958883960567257, "loss": 1.2785, "step": 780 }, { "epoch": 0.11901859189271563, "grad_norm": 0.9921875, "learning_rate": 0.00019587437685076892, "loss": 0.9938, "step": 781 }, { "epoch": 0.11917098445595854, "grad_norm": 1.375, "learning_rate": 0.00019586033428843522, "loss": 1.1089, "step": 782 }, { "epoch": 0.11932337701920147, "grad_norm": 1.0, "learning_rate": 0.0001958462683731456, "loss": 1.1787, "step": 783 }, { "epoch": 0.11947576958244438, "grad_norm": 0.8515625, "learning_rate": 0.00019583217910832673, "loss": 1.0588, "step": 784 }, { "epoch": 0.11962816214568729, "grad_norm": 1.0390625, "learning_rate": 0.00019581806649741093, "loss": 1.0881, "step": 785 }, { "epoch": 0.1197805547089302, "grad_norm": 0.87890625, "learning_rate": 0.00019580393054383622, "loss": 0.9047, "step": 786 }, { "epoch": 0.11993294727217312, "grad_norm": 1.0078125, "learning_rate": 0.00019578977125104635, "loss": 1.0142, "step": 787 }, { "epoch": 0.12008533983541603, "grad_norm": 0.765625, "learning_rate": 0.00019577558862249076, "loss": 1.0895, "step": 788 }, { "epoch": 0.12023773239865894, "grad_norm": 0.93359375, "learning_rate": 0.00019576138266162444, "loss": 0.9392, "step": 789 }, { "epoch": 0.12039012496190186, "grad_norm": 0.89453125, "learning_rate": 0.00019574715337190827, "loss": 1.0307, "step": 790 }, { "epoch": 0.12054251752514478, "grad_norm": 0.9375, "learning_rate": 0.0001957329007568087, "loss": 0.8018, "step": 791 }, { "epoch": 0.12069491008838769, "grad_norm": 0.81640625, "learning_rate": 0.00019571862481979776, "loss": 0.9165, "step": 792 }, { "epoch": 0.1208473026516306, "grad_norm": 0.93359375, "learning_rate": 0.0001957043255643534, "loss": 0.8394, "step": 793 }, { "epoch": 0.12099969521487351, "grad_norm": 0.92578125, "learning_rate": 0.0001956900029939591, "loss": 1.0938, "step": 794 }, { "epoch": 0.12115208777811642, "grad_norm": 1.46875, "learning_rate": 0.000195675657112104, "loss": 1.0086, "step": 795 }, { "epoch": 0.12130448034135934, "grad_norm": 0.9296875, "learning_rate": 0.000195661287922283, "loss": 1.1357, "step": 796 }, { "epoch": 0.12145687290460226, "grad_norm": 1.0859375, "learning_rate": 0.0001956468954279966, "loss": 1.1627, "step": 797 }, { "epoch": 0.12160926546784517, "grad_norm": 0.91796875, "learning_rate": 0.00019563247963275108, "loss": 0.9345, "step": 798 }, { "epoch": 0.12176165803108809, "grad_norm": 1.203125, "learning_rate": 0.00019561804054005826, "loss": 1.0277, "step": 799 }, { "epoch": 0.121914050594331, "grad_norm": 1.03125, "learning_rate": 0.00019560357815343577, "loss": 0.9411, "step": 800 }, { "epoch": 0.12206644315757391, "grad_norm": 1.0859375, "learning_rate": 0.00019558909247640685, "loss": 1.03, "step": 801 }, { "epoch": 0.12221883572081682, "grad_norm": 0.86328125, "learning_rate": 0.0001955745835125004, "loss": 1.074, "step": 802 }, { "epoch": 0.12237122828405973, "grad_norm": 0.8671875, "learning_rate": 0.00019556005126525103, "loss": 0.9837, "step": 803 }, { "epoch": 0.12252362084730266, "grad_norm": 0.7109375, "learning_rate": 0.00019554549573819898, "loss": 0.9615, "step": 804 }, { "epoch": 0.12267601341054557, "grad_norm": 0.90234375, "learning_rate": 0.00019553091693489018, "loss": 0.8962, "step": 805 }, { "epoch": 0.12282840597378848, "grad_norm": 0.890625, "learning_rate": 0.0001955163148588763, "loss": 0.9434, "step": 806 }, { "epoch": 0.1229807985370314, "grad_norm": 1.0703125, "learning_rate": 0.00019550168951371454, "loss": 1.1571, "step": 807 }, { "epoch": 0.1231331911002743, "grad_norm": 1.0625, "learning_rate": 0.00019548704090296788, "loss": 0.9729, "step": 808 }, { "epoch": 0.12328558366351722, "grad_norm": 0.99609375, "learning_rate": 0.00019547236903020494, "loss": 1.0205, "step": 809 }, { "epoch": 0.12343797622676013, "grad_norm": 1.1484375, "learning_rate": 0.00019545767389899998, "loss": 1.1246, "step": 810 }, { "epoch": 0.12359036879000304, "grad_norm": 0.84375, "learning_rate": 0.00019544295551293295, "loss": 0.8429, "step": 811 }, { "epoch": 0.12374276135324597, "grad_norm": 0.79296875, "learning_rate": 0.00019542821387558945, "loss": 0.9106, "step": 812 }, { "epoch": 0.12389515391648888, "grad_norm": 1.0078125, "learning_rate": 0.0001954134489905608, "loss": 0.9236, "step": 813 }, { "epoch": 0.12404754647973179, "grad_norm": 1.1015625, "learning_rate": 0.00019539866086144389, "loss": 1.1102, "step": 814 }, { "epoch": 0.1241999390429747, "grad_norm": 0.8984375, "learning_rate": 0.00019538384949184133, "loss": 0.9407, "step": 815 }, { "epoch": 0.12435233160621761, "grad_norm": 0.94921875, "learning_rate": 0.00019536901488536137, "loss": 0.9293, "step": 816 }, { "epoch": 0.12450472416946053, "grad_norm": 0.98828125, "learning_rate": 0.00019535415704561798, "loss": 1.0294, "step": 817 }, { "epoch": 0.12465711673270344, "grad_norm": 1.1796875, "learning_rate": 0.00019533927597623069, "loss": 1.2111, "step": 818 }, { "epoch": 0.12480950929594636, "grad_norm": 0.9765625, "learning_rate": 0.0001953243716808248, "loss": 1.066, "step": 819 }, { "epoch": 0.12496190185918928, "grad_norm": 0.88671875, "learning_rate": 0.00019530944416303115, "loss": 1.0595, "step": 820 }, { "epoch": 0.1251142944224322, "grad_norm": 0.9453125, "learning_rate": 0.00019529449342648637, "loss": 0.8964, "step": 821 }, { "epoch": 0.12526668698567509, "grad_norm": 0.796875, "learning_rate": 0.00019527951947483261, "loss": 0.7588, "step": 822 }, { "epoch": 0.125419079548918, "grad_norm": 1.1484375, "learning_rate": 0.00019526452231171775, "loss": 1.0798, "step": 823 }, { "epoch": 0.12557147211216094, "grad_norm": 0.9140625, "learning_rate": 0.00019524950194079534, "loss": 1.078, "step": 824 }, { "epoch": 0.12572386467540383, "grad_norm": 1.0390625, "learning_rate": 0.00019523445836572455, "loss": 1.1267, "step": 825 }, { "epoch": 0.12587625723864676, "grad_norm": 0.9375, "learning_rate": 0.00019521939159017018, "loss": 0.9032, "step": 826 }, { "epoch": 0.12602864980188966, "grad_norm": 0.98828125, "learning_rate": 0.00019520430161780277, "loss": 0.9799, "step": 827 }, { "epoch": 0.12618104236513258, "grad_norm": 0.8125, "learning_rate": 0.00019518918845229838, "loss": 1.0018, "step": 828 }, { "epoch": 0.12633343492837548, "grad_norm": 0.9140625, "learning_rate": 0.00019517405209733887, "loss": 0.8557, "step": 829 }, { "epoch": 0.1264858274916184, "grad_norm": 1.078125, "learning_rate": 0.00019515889255661165, "loss": 0.7873, "step": 830 }, { "epoch": 0.12663822005486133, "grad_norm": 0.78515625, "learning_rate": 0.00019514370983380976, "loss": 0.854, "step": 831 }, { "epoch": 0.12679061261810423, "grad_norm": 0.77734375, "learning_rate": 0.000195128503932632, "loss": 0.9887, "step": 832 }, { "epoch": 0.12694300518134716, "grad_norm": 0.859375, "learning_rate": 0.0001951132748567827, "loss": 0.9942, "step": 833 }, { "epoch": 0.12709539774459006, "grad_norm": 1.34375, "learning_rate": 0.00019509802260997186, "loss": 0.9857, "step": 834 }, { "epoch": 0.12724779030783298, "grad_norm": 0.6796875, "learning_rate": 0.0001950827471959152, "loss": 0.7831, "step": 835 }, { "epoch": 0.12740018287107588, "grad_norm": 0.8359375, "learning_rate": 0.00019506744861833402, "loss": 1.0726, "step": 836 }, { "epoch": 0.1275525754343188, "grad_norm": 0.9296875, "learning_rate": 0.00019505212688095526, "loss": 1.0748, "step": 837 }, { "epoch": 0.12770496799756173, "grad_norm": 0.62109375, "learning_rate": 0.0001950367819875115, "loss": 0.7029, "step": 838 }, { "epoch": 0.12785736056080463, "grad_norm": 1.3359375, "learning_rate": 0.000195021413941741, "loss": 1.09, "step": 839 }, { "epoch": 0.12800975312404755, "grad_norm": 1.0703125, "learning_rate": 0.00019500602274738764, "loss": 1.0593, "step": 840 }, { "epoch": 0.12816214568729045, "grad_norm": 1.1015625, "learning_rate": 0.0001949906084082009, "loss": 1.0133, "step": 841 }, { "epoch": 0.12831453825053338, "grad_norm": 1.109375, "learning_rate": 0.000194975170927936, "loss": 0.9323, "step": 842 }, { "epoch": 0.12846693081377628, "grad_norm": 0.93359375, "learning_rate": 0.00019495971031035367, "loss": 0.9327, "step": 843 }, { "epoch": 0.1286193233770192, "grad_norm": 1.1328125, "learning_rate": 0.00019494422655922037, "loss": 0.9288, "step": 844 }, { "epoch": 0.12877171594026213, "grad_norm": 0.78125, "learning_rate": 0.00019492871967830816, "loss": 1.0907, "step": 845 }, { "epoch": 0.12892410850350502, "grad_norm": 1.0078125, "learning_rate": 0.00019491318967139476, "loss": 0.8812, "step": 846 }, { "epoch": 0.12907650106674795, "grad_norm": 0.83984375, "learning_rate": 0.00019489763654226345, "loss": 0.828, "step": 847 }, { "epoch": 0.12922889362999085, "grad_norm": 1.125, "learning_rate": 0.0001948820602947032, "loss": 1.0374, "step": 848 }, { "epoch": 0.12938128619323377, "grad_norm": 0.765625, "learning_rate": 0.0001948664609325087, "loss": 0.9027, "step": 849 }, { "epoch": 0.12953367875647667, "grad_norm": 0.92578125, "learning_rate": 0.00019485083845948003, "loss": 0.9312, "step": 850 }, { "epoch": 0.1296860713197196, "grad_norm": 0.97265625, "learning_rate": 0.0001948351928794232, "loss": 1.0252, "step": 851 }, { "epoch": 0.12983846388296252, "grad_norm": 1.1328125, "learning_rate": 0.00019481952419614961, "loss": 1.0083, "step": 852 }, { "epoch": 0.12999085644620542, "grad_norm": 0.77734375, "learning_rate": 0.0001948038324134764, "loss": 0.8753, "step": 853 }, { "epoch": 0.13014324900944835, "grad_norm": 1.0625, "learning_rate": 0.0001947881175352263, "loss": 0.8963, "step": 854 }, { "epoch": 0.13029564157269125, "grad_norm": 1.09375, "learning_rate": 0.0001947723795652277, "loss": 0.955, "step": 855 }, { "epoch": 0.13044803413593417, "grad_norm": 0.96484375, "learning_rate": 0.0001947566185073146, "loss": 1.0398, "step": 856 }, { "epoch": 0.13060042669917707, "grad_norm": 1.015625, "learning_rate": 0.00019474083436532658, "loss": 1.0654, "step": 857 }, { "epoch": 0.13075281926242, "grad_norm": 0.91796875, "learning_rate": 0.00019472502714310892, "loss": 1.0112, "step": 858 }, { "epoch": 0.13090521182566292, "grad_norm": 0.74609375, "learning_rate": 0.00019470919684451245, "loss": 1.0232, "step": 859 }, { "epoch": 0.13105760438890582, "grad_norm": 0.64453125, "learning_rate": 0.00019469334347339373, "loss": 0.805, "step": 860 }, { "epoch": 0.13120999695214874, "grad_norm": 1.015625, "learning_rate": 0.0001946774670336148, "loss": 1.1194, "step": 861 }, { "epoch": 0.13136238951539164, "grad_norm": 0.79296875, "learning_rate": 0.00019466156752904343, "loss": 0.926, "step": 862 }, { "epoch": 0.13151478207863457, "grad_norm": 1.1171875, "learning_rate": 0.00019464564496355293, "loss": 1.0574, "step": 863 }, { "epoch": 0.13166717464187747, "grad_norm": 1.2421875, "learning_rate": 0.0001946296993410223, "loss": 1.0389, "step": 864 }, { "epoch": 0.1318195672051204, "grad_norm": 0.7265625, "learning_rate": 0.00019461373066533613, "loss": 0.8993, "step": 865 }, { "epoch": 0.13197195976836332, "grad_norm": 1.03125, "learning_rate": 0.00019459773894038457, "loss": 1.0063, "step": 866 }, { "epoch": 0.13212435233160622, "grad_norm": 0.90234375, "learning_rate": 0.00019458172417006347, "loss": 0.9811, "step": 867 }, { "epoch": 0.13227674489484914, "grad_norm": 0.92578125, "learning_rate": 0.00019456568635827428, "loss": 0.9502, "step": 868 }, { "epoch": 0.13242913745809204, "grad_norm": 1.078125, "learning_rate": 0.00019454962550892398, "loss": 1.0147, "step": 869 }, { "epoch": 0.13258153002133496, "grad_norm": 0.8828125, "learning_rate": 0.00019453354162592525, "loss": 0.907, "step": 870 }, { "epoch": 0.13273392258457786, "grad_norm": 0.8125, "learning_rate": 0.00019451743471319638, "loss": 0.9898, "step": 871 }, { "epoch": 0.1328863151478208, "grad_norm": 0.86328125, "learning_rate": 0.00019450130477466124, "loss": 0.8563, "step": 872 }, { "epoch": 0.13303870771106371, "grad_norm": 0.83203125, "learning_rate": 0.00019448515181424931, "loss": 0.9084, "step": 873 }, { "epoch": 0.1331911002743066, "grad_norm": 0.71875, "learning_rate": 0.00019446897583589565, "loss": 0.9052, "step": 874 }, { "epoch": 0.13334349283754954, "grad_norm": 0.80078125, "learning_rate": 0.000194452776843541, "loss": 1.0242, "step": 875 }, { "epoch": 0.13349588540079244, "grad_norm": 0.8203125, "learning_rate": 0.00019443655484113165, "loss": 0.8958, "step": 876 }, { "epoch": 0.13364827796403536, "grad_norm": 1.1328125, "learning_rate": 0.00019442030983261952, "loss": 1.1671, "step": 877 }, { "epoch": 0.13380067052727826, "grad_norm": 0.84765625, "learning_rate": 0.00019440404182196214, "loss": 0.8973, "step": 878 }, { "epoch": 0.13395306309052118, "grad_norm": 0.75390625, "learning_rate": 0.0001943877508131226, "loss": 1.0688, "step": 879 }, { "epoch": 0.13410545565376408, "grad_norm": 0.734375, "learning_rate": 0.00019437143681006965, "loss": 0.8178, "step": 880 }, { "epoch": 0.134257848217007, "grad_norm": 1.0546875, "learning_rate": 0.00019435509981677762, "loss": 1.0363, "step": 881 }, { "epoch": 0.13441024078024993, "grad_norm": 1.5390625, "learning_rate": 0.0001943387398372264, "loss": 1.2216, "step": 882 }, { "epoch": 0.13456263334349283, "grad_norm": 0.68359375, "learning_rate": 0.00019432235687540157, "loss": 0.7484, "step": 883 }, { "epoch": 0.13471502590673576, "grad_norm": 0.8125, "learning_rate": 0.0001943059509352942, "loss": 0.8053, "step": 884 }, { "epoch": 0.13486741846997866, "grad_norm": 0.921875, "learning_rate": 0.00019428952202090103, "loss": 0.9504, "step": 885 }, { "epoch": 0.13501981103322158, "grad_norm": 1.0, "learning_rate": 0.0001942730701362244, "loss": 0.9844, "step": 886 }, { "epoch": 0.13517220359646448, "grad_norm": 0.8515625, "learning_rate": 0.0001942565952852722, "loss": 0.9433, "step": 887 }, { "epoch": 0.1353245961597074, "grad_norm": 0.734375, "learning_rate": 0.00019424009747205797, "loss": 0.845, "step": 888 }, { "epoch": 0.13547698872295033, "grad_norm": 1.0625, "learning_rate": 0.0001942235767006008, "loss": 1.0061, "step": 889 }, { "epoch": 0.13562938128619323, "grad_norm": 0.78515625, "learning_rate": 0.0001942070329749254, "loss": 0.993, "step": 890 }, { "epoch": 0.13578177384943615, "grad_norm": 1.1484375, "learning_rate": 0.00019419046629906204, "loss": 1.0914, "step": 891 }, { "epoch": 0.13593416641267905, "grad_norm": 1.0703125, "learning_rate": 0.0001941738766770466, "loss": 1.0396, "step": 892 }, { "epoch": 0.13608655897592198, "grad_norm": 0.95703125, "learning_rate": 0.00019415726411292053, "loss": 1.1547, "step": 893 }, { "epoch": 0.13623895153916488, "grad_norm": 0.8203125, "learning_rate": 0.0001941406286107309, "loss": 0.9051, "step": 894 }, { "epoch": 0.1363913441024078, "grad_norm": 0.74609375, "learning_rate": 0.00019412397017453046, "loss": 0.872, "step": 895 }, { "epoch": 0.13654373666565073, "grad_norm": 0.80078125, "learning_rate": 0.0001941072888083773, "loss": 0.9148, "step": 896 }, { "epoch": 0.13669612922889363, "grad_norm": 1.0078125, "learning_rate": 0.0001940905845163353, "loss": 0.9506, "step": 897 }, { "epoch": 0.13684852179213655, "grad_norm": 0.8359375, "learning_rate": 0.00019407385730247387, "loss": 0.9078, "step": 898 }, { "epoch": 0.13700091435537945, "grad_norm": 1.2578125, "learning_rate": 0.000194057107170868, "loss": 1.0408, "step": 899 }, { "epoch": 0.13715330691862238, "grad_norm": 0.84765625, "learning_rate": 0.00019404033412559826, "loss": 1.035, "step": 900 }, { "epoch": 0.13730569948186527, "grad_norm": 0.86328125, "learning_rate": 0.00019402353817075078, "loss": 0.8642, "step": 901 }, { "epoch": 0.1374580920451082, "grad_norm": 0.84375, "learning_rate": 0.00019400671931041737, "loss": 1.002, "step": 902 }, { "epoch": 0.13761048460835112, "grad_norm": 1.140625, "learning_rate": 0.00019398987754869524, "loss": 1.0066, "step": 903 }, { "epoch": 0.13776287717159402, "grad_norm": 0.82421875, "learning_rate": 0.00019397301288968737, "loss": 0.7368, "step": 904 }, { "epoch": 0.13791526973483695, "grad_norm": 0.8984375, "learning_rate": 0.0001939561253375022, "loss": 0.9958, "step": 905 }, { "epoch": 0.13806766229807985, "grad_norm": 0.69140625, "learning_rate": 0.00019393921489625377, "loss": 0.9181, "step": 906 }, { "epoch": 0.13822005486132277, "grad_norm": 0.7734375, "learning_rate": 0.00019392228157006175, "loss": 0.8753, "step": 907 }, { "epoch": 0.13837244742456567, "grad_norm": 1.03125, "learning_rate": 0.00019390532536305125, "loss": 1.0433, "step": 908 }, { "epoch": 0.1385248399878086, "grad_norm": 1.0859375, "learning_rate": 0.00019388834627935317, "loss": 0.9108, "step": 909 }, { "epoch": 0.13867723255105152, "grad_norm": 1.0546875, "learning_rate": 0.00019387134432310378, "loss": 1.1164, "step": 910 }, { "epoch": 0.13882962511429442, "grad_norm": 1.09375, "learning_rate": 0.000193854319498445, "loss": 0.9931, "step": 911 }, { "epoch": 0.13898201767753735, "grad_norm": 0.91796875, "learning_rate": 0.00019383727180952439, "loss": 0.9602, "step": 912 }, { "epoch": 0.13913441024078024, "grad_norm": 0.859375, "learning_rate": 0.0001938202012604949, "loss": 1.0474, "step": 913 }, { "epoch": 0.13928680280402317, "grad_norm": 0.87890625, "learning_rate": 0.00019380310785551528, "loss": 0.8751, "step": 914 }, { "epoch": 0.13943919536726607, "grad_norm": 0.9140625, "learning_rate": 0.00019378599159874965, "loss": 1.0503, "step": 915 }, { "epoch": 0.139591587930509, "grad_norm": 0.7734375, "learning_rate": 0.00019376885249436777, "loss": 0.803, "step": 916 }, { "epoch": 0.13974398049375192, "grad_norm": 1.0859375, "learning_rate": 0.00019375169054654503, "loss": 1.1344, "step": 917 }, { "epoch": 0.13989637305699482, "grad_norm": 0.87890625, "learning_rate": 0.0001937345057594623, "loss": 1.0443, "step": 918 }, { "epoch": 0.14004876562023774, "grad_norm": 1.0703125, "learning_rate": 0.00019371729813730606, "loss": 0.8192, "step": 919 }, { "epoch": 0.14020115818348064, "grad_norm": 0.7890625, "learning_rate": 0.00019370006768426828, "loss": 0.9286, "step": 920 }, { "epoch": 0.14035355074672357, "grad_norm": 0.96875, "learning_rate": 0.0001936828144045466, "loss": 1.0353, "step": 921 }, { "epoch": 0.14050594330996646, "grad_norm": 0.96484375, "learning_rate": 0.00019366553830234414, "loss": 0.9784, "step": 922 }, { "epoch": 0.1406583358732094, "grad_norm": 0.81640625, "learning_rate": 0.00019364823938186962, "loss": 0.9192, "step": 923 }, { "epoch": 0.14081072843645231, "grad_norm": 0.98828125, "learning_rate": 0.0001936309176473373, "loss": 1.0218, "step": 924 }, { "epoch": 0.1409631209996952, "grad_norm": 1.21875, "learning_rate": 0.000193613573102967, "loss": 1.0039, "step": 925 }, { "epoch": 0.14111551356293814, "grad_norm": 1.078125, "learning_rate": 0.0001935962057529841, "loss": 1.0078, "step": 926 }, { "epoch": 0.14126790612618104, "grad_norm": 0.90625, "learning_rate": 0.00019357881560161958, "loss": 1.0149, "step": 927 }, { "epoch": 0.14142029868942396, "grad_norm": 0.91015625, "learning_rate": 0.00019356140265310983, "loss": 0.9545, "step": 928 }, { "epoch": 0.14157269125266686, "grad_norm": 1.0, "learning_rate": 0.000193543966911697, "loss": 0.8412, "step": 929 }, { "epoch": 0.14172508381590979, "grad_norm": 0.93359375, "learning_rate": 0.00019352650838162861, "loss": 0.9287, "step": 930 }, { "epoch": 0.14187747637915268, "grad_norm": 0.77734375, "learning_rate": 0.0001935090270671579, "loss": 1.0268, "step": 931 }, { "epoch": 0.1420298689423956, "grad_norm": 1.09375, "learning_rate": 0.00019349152297254345, "loss": 1.0703, "step": 932 }, { "epoch": 0.14218226150563854, "grad_norm": 1.1796875, "learning_rate": 0.00019347399610204958, "loss": 0.9131, "step": 933 }, { "epoch": 0.14233465406888143, "grad_norm": 0.80078125, "learning_rate": 0.0001934564464599461, "loss": 0.9696, "step": 934 }, { "epoch": 0.14248704663212436, "grad_norm": 0.9375, "learning_rate": 0.00019343887405050834, "loss": 0.982, "step": 935 }, { "epoch": 0.14263943919536726, "grad_norm": 0.96875, "learning_rate": 0.00019342127887801716, "loss": 1.0061, "step": 936 }, { "epoch": 0.14279183175861018, "grad_norm": 0.90234375, "learning_rate": 0.00019340366094675903, "loss": 0.9542, "step": 937 }, { "epoch": 0.14294422432185308, "grad_norm": 0.86328125, "learning_rate": 0.00019338602026102594, "loss": 0.9773, "step": 938 }, { "epoch": 0.143096616885096, "grad_norm": 0.90234375, "learning_rate": 0.0001933683568251154, "loss": 0.9621, "step": 939 }, { "epoch": 0.14324900944833893, "grad_norm": 0.97265625, "learning_rate": 0.00019335067064333046, "loss": 0.9616, "step": 940 }, { "epoch": 0.14340140201158183, "grad_norm": 0.828125, "learning_rate": 0.00019333296171997975, "loss": 0.8997, "step": 941 }, { "epoch": 0.14355379457482476, "grad_norm": 1.1640625, "learning_rate": 0.00019331523005937742, "loss": 0.9109, "step": 942 }, { "epoch": 0.14370618713806765, "grad_norm": 0.8203125, "learning_rate": 0.00019329747566584313, "loss": 1.0113, "step": 943 }, { "epoch": 0.14385857970131058, "grad_norm": 0.8984375, "learning_rate": 0.00019327969854370216, "loss": 0.944, "step": 944 }, { "epoch": 0.14401097226455348, "grad_norm": 0.828125, "learning_rate": 0.00019326189869728523, "loss": 0.9849, "step": 945 }, { "epoch": 0.1441633648277964, "grad_norm": 0.984375, "learning_rate": 0.0001932440761309286, "loss": 0.7688, "step": 946 }, { "epoch": 0.14431575739103933, "grad_norm": 0.90625, "learning_rate": 0.0001932262308489742, "loss": 0.9942, "step": 947 }, { "epoch": 0.14446814995428223, "grad_norm": 0.953125, "learning_rate": 0.00019320836285576933, "loss": 1.0105, "step": 948 }, { "epoch": 0.14462054251752515, "grad_norm": 0.7890625, "learning_rate": 0.0001931904721556669, "loss": 0.9569, "step": 949 }, { "epoch": 0.14477293508076805, "grad_norm": 0.70703125, "learning_rate": 0.00019317255875302535, "loss": 0.8689, "step": 950 }, { "epoch": 0.14492532764401098, "grad_norm": 0.91015625, "learning_rate": 0.00019315462265220867, "loss": 0.9971, "step": 951 }, { "epoch": 0.14507772020725387, "grad_norm": 0.96875, "learning_rate": 0.0001931366638575863, "loss": 1.0863, "step": 952 }, { "epoch": 0.1452301127704968, "grad_norm": 0.9765625, "learning_rate": 0.0001931186823735333, "loss": 0.9543, "step": 953 }, { "epoch": 0.14538250533373973, "grad_norm": 0.90234375, "learning_rate": 0.00019310067820443017, "loss": 0.9417, "step": 954 }, { "epoch": 0.14553489789698262, "grad_norm": 1.21875, "learning_rate": 0.00019308265135466307, "loss": 1.052, "step": 955 }, { "epoch": 0.14568729046022555, "grad_norm": 1.1015625, "learning_rate": 0.0001930646018286235, "loss": 1.3072, "step": 956 }, { "epoch": 0.14583968302346845, "grad_norm": 0.91015625, "learning_rate": 0.0001930465296307087, "loss": 1.041, "step": 957 }, { "epoch": 0.14599207558671137, "grad_norm": 0.71875, "learning_rate": 0.00019302843476532117, "loss": 0.9825, "step": 958 }, { "epoch": 0.14614446814995427, "grad_norm": 0.85546875, "learning_rate": 0.00019301031723686918, "loss": 1.049, "step": 959 }, { "epoch": 0.1462968607131972, "grad_norm": 0.72265625, "learning_rate": 0.00019299217704976643, "loss": 0.9242, "step": 960 }, { "epoch": 0.14644925327644012, "grad_norm": 0.92578125, "learning_rate": 0.00019297401420843206, "loss": 1.0237, "step": 961 }, { "epoch": 0.14660164583968302, "grad_norm": 0.984375, "learning_rate": 0.00019295582871729086, "loss": 1.0586, "step": 962 }, { "epoch": 0.14675403840292595, "grad_norm": 1.015625, "learning_rate": 0.00019293762058077306, "loss": 0.9352, "step": 963 }, { "epoch": 0.14690643096616884, "grad_norm": 0.7890625, "learning_rate": 0.00019291938980331438, "loss": 0.9039, "step": 964 }, { "epoch": 0.14705882352941177, "grad_norm": 0.9921875, "learning_rate": 0.00019290113638935615, "loss": 1.0679, "step": 965 }, { "epoch": 0.14721121609265467, "grad_norm": 0.875, "learning_rate": 0.0001928828603433452, "loss": 0.844, "step": 966 }, { "epoch": 0.1473636086558976, "grad_norm": 0.88671875, "learning_rate": 0.00019286456166973376, "loss": 0.9329, "step": 967 }, { "epoch": 0.14751600121914052, "grad_norm": 1.0859375, "learning_rate": 0.0001928462403729797, "loss": 1.0891, "step": 968 }, { "epoch": 0.14766839378238342, "grad_norm": 1.0859375, "learning_rate": 0.00019282789645754629, "loss": 0.928, "step": 969 }, { "epoch": 0.14782078634562634, "grad_norm": 1.2265625, "learning_rate": 0.00019280952992790245, "loss": 0.9278, "step": 970 }, { "epoch": 0.14797317890886924, "grad_norm": 0.96875, "learning_rate": 0.00019279114078852246, "loss": 1.058, "step": 971 }, { "epoch": 0.14812557147211217, "grad_norm": 0.9140625, "learning_rate": 0.00019277272904388623, "loss": 0.9232, "step": 972 }, { "epoch": 0.14827796403535506, "grad_norm": 0.8515625, "learning_rate": 0.00019275429469847914, "loss": 0.9831, "step": 973 }, { "epoch": 0.148430356598598, "grad_norm": 1.171875, "learning_rate": 0.000192735837756792, "loss": 1.0071, "step": 974 }, { "epoch": 0.14858274916184092, "grad_norm": 0.95703125, "learning_rate": 0.00019271735822332122, "loss": 0.9855, "step": 975 }, { "epoch": 0.1487351417250838, "grad_norm": 0.82421875, "learning_rate": 0.00019269885610256865, "loss": 0.866, "step": 976 }, { "epoch": 0.14888753428832674, "grad_norm": 1.21875, "learning_rate": 0.00019268033139904173, "loss": 1.0646, "step": 977 }, { "epoch": 0.14903992685156964, "grad_norm": 0.921875, "learning_rate": 0.00019266178411725334, "loss": 0.9542, "step": 978 }, { "epoch": 0.14919231941481256, "grad_norm": 0.6171875, "learning_rate": 0.0001926432142617218, "loss": 0.7183, "step": 979 }, { "epoch": 0.14934471197805546, "grad_norm": 0.91015625, "learning_rate": 0.00019262462183697104, "loss": 1.0146, "step": 980 }, { "epoch": 0.1494971045412984, "grad_norm": 0.9375, "learning_rate": 0.00019260600684753044, "loss": 1.0049, "step": 981 }, { "epoch": 0.14964949710454128, "grad_norm": 0.86328125, "learning_rate": 0.00019258736929793487, "loss": 0.9972, "step": 982 }, { "epoch": 0.1498018896677842, "grad_norm": 0.81640625, "learning_rate": 0.0001925687091927247, "loss": 1.1177, "step": 983 }, { "epoch": 0.14995428223102714, "grad_norm": 0.84375, "learning_rate": 0.00019255002653644583, "loss": 1.0845, "step": 984 }, { "epoch": 0.15010667479427003, "grad_norm": 0.890625, "learning_rate": 0.0001925313213336496, "loss": 0.8379, "step": 985 }, { "epoch": 0.15025906735751296, "grad_norm": 0.84765625, "learning_rate": 0.00019251259358889287, "loss": 0.9579, "step": 986 }, { "epoch": 0.15041145992075586, "grad_norm": 0.9453125, "learning_rate": 0.00019249384330673793, "loss": 0.9486, "step": 987 }, { "epoch": 0.15056385248399878, "grad_norm": 1.1015625, "learning_rate": 0.00019247507049175276, "loss": 1.0827, "step": 988 }, { "epoch": 0.15071624504724168, "grad_norm": 0.8984375, "learning_rate": 0.00019245627514851056, "loss": 1.0088, "step": 989 }, { "epoch": 0.1508686376104846, "grad_norm": 0.87890625, "learning_rate": 0.00019243745728159017, "loss": 1.0643, "step": 990 }, { "epoch": 0.15102103017372753, "grad_norm": 1.3359375, "learning_rate": 0.00019241861689557594, "loss": 0.8147, "step": 991 }, { "epoch": 0.15117342273697043, "grad_norm": 0.95703125, "learning_rate": 0.00019239975399505763, "loss": 1.0629, "step": 992 }, { "epoch": 0.15132581530021336, "grad_norm": 0.9921875, "learning_rate": 0.0001923808685846305, "loss": 0.8365, "step": 993 }, { "epoch": 0.15147820786345625, "grad_norm": 1.0625, "learning_rate": 0.00019236196066889534, "loss": 0.8663, "step": 994 }, { "epoch": 0.15163060042669918, "grad_norm": 1.1484375, "learning_rate": 0.00019234303025245835, "loss": 0.9553, "step": 995 }, { "epoch": 0.15178299298994208, "grad_norm": 1.25, "learning_rate": 0.0001923240773399313, "loss": 0.9782, "step": 996 }, { "epoch": 0.151935385553185, "grad_norm": 0.99609375, "learning_rate": 0.00019230510193593133, "loss": 0.7689, "step": 997 }, { "epoch": 0.15208777811642793, "grad_norm": 0.8046875, "learning_rate": 0.00019228610404508118, "loss": 1.0858, "step": 998 }, { "epoch": 0.15224017067967083, "grad_norm": 0.62890625, "learning_rate": 0.00019226708367200897, "loss": 0.7863, "step": 999 }, { "epoch": 0.15239256324291375, "grad_norm": 0.7734375, "learning_rate": 0.00019224804082134837, "loss": 0.8993, "step": 1000 }, { "epoch": 0.15254495580615665, "grad_norm": 0.83984375, "learning_rate": 0.00019222897549773848, "loss": 0.8931, "step": 1001 }, { "epoch": 0.15269734836939958, "grad_norm": 1.0625, "learning_rate": 0.00019220988770582388, "loss": 0.986, "step": 1002 }, { "epoch": 0.15284974093264247, "grad_norm": 0.8671875, "learning_rate": 0.00019219077745025463, "loss": 0.8914, "step": 1003 }, { "epoch": 0.1530021334958854, "grad_norm": 0.78125, "learning_rate": 0.00019217164473568624, "loss": 1.0879, "step": 1004 }, { "epoch": 0.15315452605912833, "grad_norm": 1.0234375, "learning_rate": 0.00019215248956677976, "loss": 1.2107, "step": 1005 }, { "epoch": 0.15330691862237122, "grad_norm": 1.078125, "learning_rate": 0.00019213331194820166, "loss": 1.1035, "step": 1006 }, { "epoch": 0.15345931118561415, "grad_norm": 1.0234375, "learning_rate": 0.00019211411188462386, "loss": 1.1458, "step": 1007 }, { "epoch": 0.15361170374885705, "grad_norm": 0.890625, "learning_rate": 0.00019209488938072377, "loss": 0.9162, "step": 1008 }, { "epoch": 0.15376409631209997, "grad_norm": 0.8359375, "learning_rate": 0.00019207564444118427, "loss": 0.8972, "step": 1009 }, { "epoch": 0.15391648887534287, "grad_norm": 0.734375, "learning_rate": 0.00019205637707069375, "loss": 0.9696, "step": 1010 }, { "epoch": 0.1540688814385858, "grad_norm": 1.0390625, "learning_rate": 0.00019203708727394596, "loss": 0.9442, "step": 1011 }, { "epoch": 0.15422127400182872, "grad_norm": 1.203125, "learning_rate": 0.0001920177750556402, "loss": 1.1905, "step": 1012 }, { "epoch": 0.15437366656507162, "grad_norm": 1.1953125, "learning_rate": 0.00019199844042048117, "loss": 0.9822, "step": 1013 }, { "epoch": 0.15452605912831455, "grad_norm": 0.82421875, "learning_rate": 0.0001919790833731791, "loss": 0.8916, "step": 1014 }, { "epoch": 0.15467845169155744, "grad_norm": 0.77734375, "learning_rate": 0.00019195970391844966, "loss": 0.8945, "step": 1015 }, { "epoch": 0.15483084425480037, "grad_norm": 0.90234375, "learning_rate": 0.00019194030206101393, "loss": 1.0082, "step": 1016 }, { "epoch": 0.15498323681804327, "grad_norm": 1.28125, "learning_rate": 0.0001919208778055985, "loss": 0.9712, "step": 1017 }, { "epoch": 0.1551356293812862, "grad_norm": 0.953125, "learning_rate": 0.00019190143115693534, "loss": 0.793, "step": 1018 }, { "epoch": 0.15528802194452912, "grad_norm": 0.9609375, "learning_rate": 0.00019188196211976204, "loss": 0.9645, "step": 1019 }, { "epoch": 0.15544041450777202, "grad_norm": 0.890625, "learning_rate": 0.00019186247069882147, "loss": 1.0311, "step": 1020 }, { "epoch": 0.15559280707101494, "grad_norm": 1.1640625, "learning_rate": 0.000191842956898862, "loss": 1.0526, "step": 1021 }, { "epoch": 0.15574519963425784, "grad_norm": 1.0390625, "learning_rate": 0.00019182342072463754, "loss": 0.9388, "step": 1022 }, { "epoch": 0.15589759219750077, "grad_norm": 1.0390625, "learning_rate": 0.00019180386218090734, "loss": 1.1132, "step": 1023 }, { "epoch": 0.15604998476074367, "grad_norm": 0.875, "learning_rate": 0.00019178428127243616, "loss": 0.987, "step": 1024 }, { "epoch": 0.1562023773239866, "grad_norm": 0.71875, "learning_rate": 0.00019176467800399415, "loss": 0.9361, "step": 1025 }, { "epoch": 0.15635476988722952, "grad_norm": 0.73046875, "learning_rate": 0.000191745052380357, "loss": 0.8241, "step": 1026 }, { "epoch": 0.15650716245047241, "grad_norm": 1.1640625, "learning_rate": 0.00019172540440630576, "loss": 0.9696, "step": 1027 }, { "epoch": 0.15665955501371534, "grad_norm": 1.171875, "learning_rate": 0.00019170573408662698, "loss": 1.0731, "step": 1028 }, { "epoch": 0.15681194757695824, "grad_norm": 0.7734375, "learning_rate": 0.00019168604142611262, "loss": 1.0069, "step": 1029 }, { "epoch": 0.15696434014020116, "grad_norm": 1.1171875, "learning_rate": 0.00019166632642956012, "loss": 1.0517, "step": 1030 }, { "epoch": 0.15711673270344406, "grad_norm": 1.0078125, "learning_rate": 0.0001916465891017723, "loss": 1.0042, "step": 1031 }, { "epoch": 0.157269125266687, "grad_norm": 0.68359375, "learning_rate": 0.00019162682944755746, "loss": 0.7448, "step": 1032 }, { "epoch": 0.15742151782992989, "grad_norm": 1.5703125, "learning_rate": 0.00019160704747172934, "loss": 0.94, "step": 1033 }, { "epoch": 0.1575739103931728, "grad_norm": 0.796875, "learning_rate": 0.00019158724317910718, "loss": 0.8631, "step": 1034 }, { "epoch": 0.15772630295641574, "grad_norm": 0.8359375, "learning_rate": 0.00019156741657451546, "loss": 0.846, "step": 1035 }, { "epoch": 0.15787869551965864, "grad_norm": 1.0390625, "learning_rate": 0.00019154756766278435, "loss": 0.9594, "step": 1036 }, { "epoch": 0.15803108808290156, "grad_norm": 0.83984375, "learning_rate": 0.00019152769644874927, "loss": 0.828, "step": 1037 }, { "epoch": 0.15818348064614446, "grad_norm": 0.93359375, "learning_rate": 0.00019150780293725113, "loss": 0.9828, "step": 1038 }, { "epoch": 0.15833587320938738, "grad_norm": 0.79296875, "learning_rate": 0.0001914878871331363, "loss": 1.0389, "step": 1039 }, { "epoch": 0.15848826577263028, "grad_norm": 0.90234375, "learning_rate": 0.00019146794904125654, "loss": 0.9034, "step": 1040 }, { "epoch": 0.1586406583358732, "grad_norm": 0.9375, "learning_rate": 0.00019144798866646906, "loss": 0.9639, "step": 1041 }, { "epoch": 0.15879305089911613, "grad_norm": 1.2109375, "learning_rate": 0.0001914280060136365, "loss": 0.9031, "step": 1042 }, { "epoch": 0.15894544346235903, "grad_norm": 0.8125, "learning_rate": 0.0001914080010876269, "loss": 0.8617, "step": 1043 }, { "epoch": 0.15909783602560196, "grad_norm": 1.2109375, "learning_rate": 0.0001913879738933138, "loss": 1.1093, "step": 1044 }, { "epoch": 0.15925022858884486, "grad_norm": 0.859375, "learning_rate": 0.00019136792443557603, "loss": 0.9238, "step": 1045 }, { "epoch": 0.15940262115208778, "grad_norm": 1.0625, "learning_rate": 0.000191347852719298, "loss": 1.1198, "step": 1046 }, { "epoch": 0.15955501371533068, "grad_norm": 0.85546875, "learning_rate": 0.00019132775874936947, "loss": 0.888, "step": 1047 }, { "epoch": 0.1597074062785736, "grad_norm": 0.92578125, "learning_rate": 0.00019130764253068555, "loss": 1.1161, "step": 1048 }, { "epoch": 0.15985979884181653, "grad_norm": 0.90625, "learning_rate": 0.00019128750406814687, "loss": 1.0967, "step": 1049 }, { "epoch": 0.16001219140505943, "grad_norm": 0.9921875, "learning_rate": 0.0001912673433666595, "loss": 1.0028, "step": 1050 }, { "epoch": 0.16016458396830235, "grad_norm": 1.1796875, "learning_rate": 0.00019124716043113481, "loss": 1.0489, "step": 1051 }, { "epoch": 0.16031697653154525, "grad_norm": 0.8984375, "learning_rate": 0.00019122695526648968, "loss": 1.0155, "step": 1052 }, { "epoch": 0.16046936909478818, "grad_norm": 0.8515625, "learning_rate": 0.0001912067278776464, "loss": 0.8715, "step": 1053 }, { "epoch": 0.16062176165803108, "grad_norm": 0.859375, "learning_rate": 0.00019118647826953263, "loss": 1.3036, "step": 1054 }, { "epoch": 0.160774154221274, "grad_norm": 1.1015625, "learning_rate": 0.00019116620644708145, "loss": 1.1815, "step": 1055 }, { "epoch": 0.16092654678451693, "grad_norm": 0.7734375, "learning_rate": 0.0001911459124152314, "loss": 1.0127, "step": 1056 }, { "epoch": 0.16107893934775983, "grad_norm": 1.0078125, "learning_rate": 0.00019112559617892637, "loss": 1.0939, "step": 1057 }, { "epoch": 0.16123133191100275, "grad_norm": 0.9609375, "learning_rate": 0.00019110525774311573, "loss": 1.0947, "step": 1058 }, { "epoch": 0.16138372447424565, "grad_norm": 1.0078125, "learning_rate": 0.00019108489711275418, "loss": 0.9813, "step": 1059 }, { "epoch": 0.16153611703748857, "grad_norm": 1.09375, "learning_rate": 0.00019106451429280185, "loss": 0.8242, "step": 1060 }, { "epoch": 0.16168850960073147, "grad_norm": 0.71484375, "learning_rate": 0.0001910441092882243, "loss": 0.898, "step": 1061 }, { "epoch": 0.1618409021639744, "grad_norm": 0.61328125, "learning_rate": 0.0001910236821039925, "loss": 0.8634, "step": 1062 }, { "epoch": 0.16199329472721732, "grad_norm": 0.79296875, "learning_rate": 0.0001910032327450828, "loss": 0.9302, "step": 1063 }, { "epoch": 0.16214568729046022, "grad_norm": 0.84765625, "learning_rate": 0.00019098276121647695, "loss": 0.8795, "step": 1064 }, { "epoch": 0.16229807985370315, "grad_norm": 0.92578125, "learning_rate": 0.0001909622675231621, "loss": 0.8676, "step": 1065 }, { "epoch": 0.16245047241694605, "grad_norm": 1.1953125, "learning_rate": 0.00019094175167013084, "loss": 1.1053, "step": 1066 }, { "epoch": 0.16260286498018897, "grad_norm": 0.828125, "learning_rate": 0.00019092121366238112, "loss": 0.8968, "step": 1067 }, { "epoch": 0.16275525754343187, "grad_norm": 0.859375, "learning_rate": 0.00019090065350491626, "loss": 0.9281, "step": 1068 }, { "epoch": 0.1629076501066748, "grad_norm": 0.859375, "learning_rate": 0.00019088007120274502, "loss": 1.0041, "step": 1069 }, { "epoch": 0.16306004266991772, "grad_norm": 1.171875, "learning_rate": 0.00019085946676088158, "loss": 0.8957, "step": 1070 }, { "epoch": 0.16321243523316062, "grad_norm": 0.828125, "learning_rate": 0.00019083884018434547, "loss": 1.1072, "step": 1071 }, { "epoch": 0.16336482779640354, "grad_norm": 0.93359375, "learning_rate": 0.00019081819147816155, "loss": 0.8413, "step": 1072 }, { "epoch": 0.16351722035964644, "grad_norm": 0.87890625, "learning_rate": 0.00019079752064736022, "loss": 0.9502, "step": 1073 }, { "epoch": 0.16366961292288937, "grad_norm": 1.0078125, "learning_rate": 0.0001907768276969772, "loss": 1.1205, "step": 1074 }, { "epoch": 0.16382200548613227, "grad_norm": 0.71875, "learning_rate": 0.00019075611263205352, "loss": 0.8727, "step": 1075 }, { "epoch": 0.1639743980493752, "grad_norm": 1.2734375, "learning_rate": 0.00019073537545763572, "loss": 0.901, "step": 1076 }, { "epoch": 0.16412679061261812, "grad_norm": 1.0234375, "learning_rate": 0.00019071461617877565, "loss": 1.0619, "step": 1077 }, { "epoch": 0.16427918317586102, "grad_norm": 1.1328125, "learning_rate": 0.00019069383480053057, "loss": 1.0468, "step": 1078 }, { "epoch": 0.16443157573910394, "grad_norm": 1.171875, "learning_rate": 0.00019067303132796318, "loss": 1.1226, "step": 1079 }, { "epoch": 0.16458396830234684, "grad_norm": 1.015625, "learning_rate": 0.00019065220576614143, "loss": 0.9602, "step": 1080 }, { "epoch": 0.16473636086558976, "grad_norm": 0.85546875, "learning_rate": 0.00019063135812013874, "loss": 1.0302, "step": 1081 }, { "epoch": 0.16488875342883266, "grad_norm": 0.9375, "learning_rate": 0.00019061048839503393, "loss": 0.9413, "step": 1082 }, { "epoch": 0.1650411459920756, "grad_norm": 0.859375, "learning_rate": 0.00019058959659591112, "loss": 0.9988, "step": 1083 }, { "epoch": 0.16519353855531851, "grad_norm": 1.0703125, "learning_rate": 0.00019056868272785988, "loss": 1.037, "step": 1084 }, { "epoch": 0.1653459311185614, "grad_norm": 0.89453125, "learning_rate": 0.00019054774679597513, "loss": 1.0304, "step": 1085 }, { "epoch": 0.16549832368180434, "grad_norm": 1.1171875, "learning_rate": 0.00019052678880535719, "loss": 1.0393, "step": 1086 }, { "epoch": 0.16565071624504724, "grad_norm": 0.921875, "learning_rate": 0.00019050580876111165, "loss": 1.0502, "step": 1087 }, { "epoch": 0.16580310880829016, "grad_norm": 1.1796875, "learning_rate": 0.00019048480666834965, "loss": 1.0313, "step": 1088 }, { "epoch": 0.16595550137153306, "grad_norm": 0.87109375, "learning_rate": 0.0001904637825321875, "loss": 0.9175, "step": 1089 }, { "epoch": 0.16610789393477599, "grad_norm": 0.765625, "learning_rate": 0.00019044273635774705, "loss": 0.9655, "step": 1090 }, { "epoch": 0.16626028649801888, "grad_norm": 1.0234375, "learning_rate": 0.00019042166815015548, "loss": 0.9924, "step": 1091 }, { "epoch": 0.1664126790612618, "grad_norm": 1.140625, "learning_rate": 0.0001904005779145452, "loss": 1.053, "step": 1092 }, { "epoch": 0.16656507162450473, "grad_norm": 0.96484375, "learning_rate": 0.00019037946565605418, "loss": 0.9933, "step": 1093 }, { "epoch": 0.16671746418774763, "grad_norm": 0.73828125, "learning_rate": 0.00019035833137982563, "loss": 1.1257, "step": 1094 }, { "epoch": 0.16686985675099056, "grad_norm": 1.171875, "learning_rate": 0.0001903371750910082, "loss": 0.8516, "step": 1095 }, { "epoch": 0.16702224931423346, "grad_norm": 1.078125, "learning_rate": 0.00019031599679475585, "loss": 0.9712, "step": 1096 }, { "epoch": 0.16717464187747638, "grad_norm": 0.91796875, "learning_rate": 0.0001902947964962279, "loss": 0.8825, "step": 1097 }, { "epoch": 0.16732703444071928, "grad_norm": 1.21875, "learning_rate": 0.00019027357420058904, "loss": 0.9668, "step": 1098 }, { "epoch": 0.1674794270039622, "grad_norm": 0.984375, "learning_rate": 0.0001902523299130094, "loss": 1.1545, "step": 1099 }, { "epoch": 0.16763181956720513, "grad_norm": 0.765625, "learning_rate": 0.0001902310636386643, "loss": 1.0589, "step": 1100 }, { "epoch": 0.16778421213044803, "grad_norm": 0.8359375, "learning_rate": 0.00019020977538273458, "loss": 0.9438, "step": 1101 }, { "epoch": 0.16793660469369096, "grad_norm": 0.8125, "learning_rate": 0.00019018846515040633, "loss": 0.928, "step": 1102 }, { "epoch": 0.16808899725693385, "grad_norm": 1.2421875, "learning_rate": 0.000190167132946871, "loss": 1.2064, "step": 1103 }, { "epoch": 0.16824138982017678, "grad_norm": 0.8203125, "learning_rate": 0.0001901457787773255, "loss": 0.9659, "step": 1104 }, { "epoch": 0.16839378238341968, "grad_norm": 1.484375, "learning_rate": 0.00019012440264697193, "loss": 1.1082, "step": 1105 }, { "epoch": 0.1685461749466626, "grad_norm": 0.97265625, "learning_rate": 0.00019010300456101788, "loss": 0.9724, "step": 1106 }, { "epoch": 0.16869856750990553, "grad_norm": 0.9453125, "learning_rate": 0.0001900815845246762, "loss": 1.0455, "step": 1107 }, { "epoch": 0.16885096007314843, "grad_norm": 1.0078125, "learning_rate": 0.00019006014254316518, "loss": 1.0446, "step": 1108 }, { "epoch": 0.16900335263639135, "grad_norm": 0.7578125, "learning_rate": 0.00019003867862170832, "loss": 0.945, "step": 1109 }, { "epoch": 0.16915574519963425, "grad_norm": 1.015625, "learning_rate": 0.00019001719276553458, "loss": 1.0559, "step": 1110 }, { "epoch": 0.16930813776287718, "grad_norm": 0.79296875, "learning_rate": 0.0001899956849798782, "loss": 0.8827, "step": 1111 }, { "epoch": 0.16946053032612007, "grad_norm": 0.89453125, "learning_rate": 0.0001899741552699788, "loss": 0.913, "step": 1112 }, { "epoch": 0.169612922889363, "grad_norm": 1.0234375, "learning_rate": 0.0001899526036410813, "loss": 1.0238, "step": 1113 }, { "epoch": 0.16976531545260592, "grad_norm": 0.828125, "learning_rate": 0.00018993103009843604, "loss": 0.8947, "step": 1114 }, { "epoch": 0.16991770801584882, "grad_norm": 0.828125, "learning_rate": 0.00018990943464729864, "loss": 0.9042, "step": 1115 }, { "epoch": 0.17007010057909175, "grad_norm": 0.96484375, "learning_rate": 0.00018988781729292997, "loss": 1.0382, "step": 1116 }, { "epoch": 0.17022249314233465, "grad_norm": 0.875, "learning_rate": 0.00018986617804059644, "loss": 1.0855, "step": 1117 }, { "epoch": 0.17037488570557757, "grad_norm": 0.98046875, "learning_rate": 0.00018984451689556963, "loss": 0.9498, "step": 1118 }, { "epoch": 0.17052727826882047, "grad_norm": 0.890625, "learning_rate": 0.00018982283386312652, "loss": 0.9415, "step": 1119 }, { "epoch": 0.1706796708320634, "grad_norm": 0.87109375, "learning_rate": 0.00018980112894854942, "loss": 0.9402, "step": 1120 }, { "epoch": 0.17083206339530632, "grad_norm": 0.98046875, "learning_rate": 0.00018977940215712593, "loss": 0.989, "step": 1121 }, { "epoch": 0.17098445595854922, "grad_norm": 0.80859375, "learning_rate": 0.00018975765349414902, "loss": 1.1541, "step": 1122 }, { "epoch": 0.17113684852179215, "grad_norm": 0.87890625, "learning_rate": 0.000189735882964917, "loss": 1.1358, "step": 1123 }, { "epoch": 0.17128924108503504, "grad_norm": 0.95703125, "learning_rate": 0.00018971409057473343, "loss": 0.8979, "step": 1124 }, { "epoch": 0.17144163364827797, "grad_norm": 0.7734375, "learning_rate": 0.00018969227632890733, "loss": 0.7288, "step": 1125 }, { "epoch": 0.17159402621152087, "grad_norm": 1.03125, "learning_rate": 0.00018967044023275289, "loss": 1.0126, "step": 1126 }, { "epoch": 0.1717464187747638, "grad_norm": 1.0859375, "learning_rate": 0.00018964858229158973, "loss": 0.9624, "step": 1127 }, { "epoch": 0.17189881133800672, "grad_norm": 0.859375, "learning_rate": 0.00018962670251074275, "loss": 1.0081, "step": 1128 }, { "epoch": 0.17205120390124962, "grad_norm": 0.94140625, "learning_rate": 0.00018960480089554217, "loss": 1.0686, "step": 1129 }, { "epoch": 0.17220359646449254, "grad_norm": 1.0234375, "learning_rate": 0.00018958287745132358, "loss": 0.9985, "step": 1130 }, { "epoch": 0.17235598902773544, "grad_norm": 0.9375, "learning_rate": 0.0001895609321834278, "loss": 0.8536, "step": 1131 }, { "epoch": 0.17250838159097837, "grad_norm": 1.1953125, "learning_rate": 0.00018953896509720105, "loss": 1.0238, "step": 1132 }, { "epoch": 0.17266077415422126, "grad_norm": 0.96875, "learning_rate": 0.0001895169761979948, "loss": 0.9937, "step": 1133 }, { "epoch": 0.1728131667174642, "grad_norm": 0.83203125, "learning_rate": 0.00018949496549116584, "loss": 0.7983, "step": 1134 }, { "epoch": 0.17296555928070712, "grad_norm": 0.9296875, "learning_rate": 0.00018947293298207635, "loss": 0.9757, "step": 1135 }, { "epoch": 0.17311795184395, "grad_norm": 0.734375, "learning_rate": 0.00018945087867609374, "loss": 0.975, "step": 1136 }, { "epoch": 0.17327034440719294, "grad_norm": 0.88671875, "learning_rate": 0.00018942880257859077, "loss": 1.0993, "step": 1137 }, { "epoch": 0.17342273697043584, "grad_norm": 0.7734375, "learning_rate": 0.00018940670469494547, "loss": 0.9034, "step": 1138 }, { "epoch": 0.17357512953367876, "grad_norm": 0.6640625, "learning_rate": 0.00018938458503054122, "loss": 0.8546, "step": 1139 }, { "epoch": 0.17372752209692166, "grad_norm": 0.890625, "learning_rate": 0.00018936244359076668, "loss": 0.764, "step": 1140 }, { "epoch": 0.1738799146601646, "grad_norm": 0.94140625, "learning_rate": 0.00018934028038101582, "loss": 0.8334, "step": 1141 }, { "epoch": 0.17403230722340748, "grad_norm": 0.9609375, "learning_rate": 0.00018931809540668793, "loss": 0.905, "step": 1142 }, { "epoch": 0.1741846997866504, "grad_norm": 1.1640625, "learning_rate": 0.00018929588867318758, "loss": 0.8419, "step": 1143 }, { "epoch": 0.17433709234989334, "grad_norm": 0.875, "learning_rate": 0.00018927366018592462, "loss": 0.9498, "step": 1144 }, { "epoch": 0.17448948491313623, "grad_norm": 0.875, "learning_rate": 0.0001892514099503143, "loss": 0.9081, "step": 1145 }, { "epoch": 0.17464187747637916, "grad_norm": 0.8203125, "learning_rate": 0.00018922913797177706, "loss": 1.1052, "step": 1146 }, { "epoch": 0.17479427003962206, "grad_norm": 1.265625, "learning_rate": 0.00018920684425573865, "loss": 0.9738, "step": 1147 }, { "epoch": 0.17494666260286498, "grad_norm": 0.84375, "learning_rate": 0.00018918452880763018, "loss": 0.9192, "step": 1148 }, { "epoch": 0.17509905516610788, "grad_norm": 0.8984375, "learning_rate": 0.000189162191632888, "loss": 0.9124, "step": 1149 }, { "epoch": 0.1752514477293508, "grad_norm": 1.0546875, "learning_rate": 0.00018913983273695375, "loss": 0.9969, "step": 1150 }, { "epoch": 0.17540384029259373, "grad_norm": 1.1171875, "learning_rate": 0.00018911745212527435, "loss": 1.0264, "step": 1151 }, { "epoch": 0.17555623285583663, "grad_norm": 1.140625, "learning_rate": 0.00018909504980330215, "loss": 1.0005, "step": 1152 }, { "epoch": 0.17570862541907956, "grad_norm": 1.0078125, "learning_rate": 0.0001890726257764946, "loss": 0.9895, "step": 1153 }, { "epoch": 0.17586101798232245, "grad_norm": 1.15625, "learning_rate": 0.00018905018005031446, "loss": 1.1643, "step": 1154 }, { "epoch": 0.17601341054556538, "grad_norm": 1.0078125, "learning_rate": 0.00018902771263022995, "loss": 0.9143, "step": 1155 }, { "epoch": 0.17616580310880828, "grad_norm": 0.72265625, "learning_rate": 0.00018900522352171439, "loss": 0.8551, "step": 1156 }, { "epoch": 0.1763181956720512, "grad_norm": 0.875, "learning_rate": 0.00018898271273024646, "loss": 1.191, "step": 1157 }, { "epoch": 0.17647058823529413, "grad_norm": 0.91015625, "learning_rate": 0.00018896018026131012, "loss": 0.9465, "step": 1158 }, { "epoch": 0.17662298079853703, "grad_norm": 0.8359375, "learning_rate": 0.0001889376261203946, "loss": 1.0077, "step": 1159 }, { "epoch": 0.17677537336177995, "grad_norm": 0.875, "learning_rate": 0.0001889150503129944, "loss": 0.8852, "step": 1160 }, { "epoch": 0.17692776592502285, "grad_norm": 1.0078125, "learning_rate": 0.00018889245284460932, "loss": 1.0759, "step": 1161 }, { "epoch": 0.17708015848826578, "grad_norm": 1.015625, "learning_rate": 0.00018886983372074444, "loss": 1.1039, "step": 1162 }, { "epoch": 0.17723255105150867, "grad_norm": 0.7734375, "learning_rate": 0.0001888471929469101, "loss": 0.9051, "step": 1163 }, { "epoch": 0.1773849436147516, "grad_norm": 0.9453125, "learning_rate": 0.0001888245305286219, "loss": 1.0796, "step": 1164 }, { "epoch": 0.17753733617799453, "grad_norm": 1.8125, "learning_rate": 0.00018880184647140076, "loss": 0.9226, "step": 1165 }, { "epoch": 0.17768972874123742, "grad_norm": 1.0234375, "learning_rate": 0.0001887791407807728, "loss": 1.118, "step": 1166 }, { "epoch": 0.17784212130448035, "grad_norm": 0.859375, "learning_rate": 0.0001887564134622695, "loss": 0.9846, "step": 1167 }, { "epoch": 0.17799451386772325, "grad_norm": 0.98828125, "learning_rate": 0.0001887336645214275, "loss": 0.9231, "step": 1168 }, { "epoch": 0.17814690643096617, "grad_norm": 1.1171875, "learning_rate": 0.00018871089396378885, "loss": 1.0237, "step": 1169 }, { "epoch": 0.17829929899420907, "grad_norm": 1.0078125, "learning_rate": 0.00018868810179490075, "loss": 1.0441, "step": 1170 }, { "epoch": 0.178451691557452, "grad_norm": 0.8984375, "learning_rate": 0.00018866528802031568, "loss": 0.9227, "step": 1171 }, { "epoch": 0.17860408412069492, "grad_norm": 1.0546875, "learning_rate": 0.0001886424526455914, "loss": 1.075, "step": 1172 }, { "epoch": 0.17875647668393782, "grad_norm": 0.7265625, "learning_rate": 0.00018861959567629099, "loss": 0.892, "step": 1173 }, { "epoch": 0.17890886924718075, "grad_norm": 0.8046875, "learning_rate": 0.00018859671711798267, "loss": 0.9809, "step": 1174 }, { "epoch": 0.17906126181042364, "grad_norm": 1.1015625, "learning_rate": 0.00018857381697624, "loss": 1.0507, "step": 1175 }, { "epoch": 0.17921365437366657, "grad_norm": 0.83203125, "learning_rate": 0.00018855089525664185, "loss": 1.0982, "step": 1176 }, { "epoch": 0.17936604693690947, "grad_norm": 0.8125, "learning_rate": 0.00018852795196477222, "loss": 0.8542, "step": 1177 }, { "epoch": 0.1795184395001524, "grad_norm": 0.88671875, "learning_rate": 0.00018850498710622042, "loss": 0.9351, "step": 1178 }, { "epoch": 0.17967083206339532, "grad_norm": 0.8046875, "learning_rate": 0.00018848200068658107, "loss": 0.9455, "step": 1179 }, { "epoch": 0.17982322462663822, "grad_norm": 1.1484375, "learning_rate": 0.0001884589927114539, "loss": 1.0946, "step": 1180 }, { "epoch": 0.17997561718988114, "grad_norm": 0.9296875, "learning_rate": 0.00018843596318644408, "loss": 1.0991, "step": 1181 }, { "epoch": 0.18012800975312404, "grad_norm": 0.7890625, "learning_rate": 0.00018841291211716188, "loss": 0.8554, "step": 1182 }, { "epoch": 0.18028040231636697, "grad_norm": 0.90234375, "learning_rate": 0.00018838983950922292, "loss": 1.0232, "step": 1183 }, { "epoch": 0.18043279487960986, "grad_norm": 1.1875, "learning_rate": 0.00018836674536824795, "loss": 1.0277, "step": 1184 }, { "epoch": 0.1805851874428528, "grad_norm": 0.92578125, "learning_rate": 0.00018834362969986308, "loss": 0.8957, "step": 1185 }, { "epoch": 0.18073758000609572, "grad_norm": 1.2734375, "learning_rate": 0.00018832049250969962, "loss": 1.0761, "step": 1186 }, { "epoch": 0.18088997256933861, "grad_norm": 0.890625, "learning_rate": 0.0001882973338033941, "loss": 0.9916, "step": 1187 }, { "epoch": 0.18104236513258154, "grad_norm": 1.4375, "learning_rate": 0.00018827415358658832, "loss": 1.0994, "step": 1188 }, { "epoch": 0.18119475769582444, "grad_norm": 0.8125, "learning_rate": 0.00018825095186492935, "loss": 0.9744, "step": 1189 }, { "epoch": 0.18134715025906736, "grad_norm": 0.9921875, "learning_rate": 0.0001882277286440694, "loss": 1.1009, "step": 1190 }, { "epoch": 0.18149954282231026, "grad_norm": 0.94140625, "learning_rate": 0.00018820448392966606, "loss": 1.0372, "step": 1191 }, { "epoch": 0.1816519353855532, "grad_norm": 0.92578125, "learning_rate": 0.000188181217727382, "loss": 0.924, "step": 1192 }, { "epoch": 0.18180432794879609, "grad_norm": 1.1796875, "learning_rate": 0.00018815793004288526, "loss": 0.9697, "step": 1193 }, { "epoch": 0.181956720512039, "grad_norm": 0.67578125, "learning_rate": 0.00018813462088184898, "loss": 0.8572, "step": 1194 }, { "epoch": 0.18210911307528194, "grad_norm": 1.1015625, "learning_rate": 0.00018811129024995168, "loss": 1.1528, "step": 1195 }, { "epoch": 0.18226150563852483, "grad_norm": 0.76953125, "learning_rate": 0.00018808793815287703, "loss": 1.0202, "step": 1196 }, { "epoch": 0.18241389820176776, "grad_norm": 1.09375, "learning_rate": 0.0001880645645963139, "loss": 1.2665, "step": 1197 }, { "epoch": 0.18256629076501066, "grad_norm": 0.765625, "learning_rate": 0.00018804116958595643, "loss": 0.8104, "step": 1198 }, { "epoch": 0.18271868332825358, "grad_norm": 0.8671875, "learning_rate": 0.000188017753127504, "loss": 0.9092, "step": 1199 }, { "epoch": 0.18287107589149648, "grad_norm": 1.0234375, "learning_rate": 0.0001879943152266612, "loss": 0.9655, "step": 1200 }, { "epoch": 0.1830234684547394, "grad_norm": 0.734375, "learning_rate": 0.00018797085588913783, "loss": 0.944, "step": 1201 }, { "epoch": 0.18317586101798233, "grad_norm": 0.875, "learning_rate": 0.0001879473751206489, "loss": 1.018, "step": 1202 }, { "epoch": 0.18332825358122523, "grad_norm": 1.0703125, "learning_rate": 0.00018792387292691472, "loss": 1.0058, "step": 1203 }, { "epoch": 0.18348064614446816, "grad_norm": 0.69921875, "learning_rate": 0.00018790034931366072, "loss": 0.8346, "step": 1204 }, { "epoch": 0.18363303870771105, "grad_norm": 1.0078125, "learning_rate": 0.00018787680428661755, "loss": 1.238, "step": 1205 }, { "epoch": 0.18378543127095398, "grad_norm": 0.63671875, "learning_rate": 0.00018785323785152123, "loss": 0.8688, "step": 1206 }, { "epoch": 0.18393782383419688, "grad_norm": 1.0078125, "learning_rate": 0.0001878296500141128, "loss": 0.8741, "step": 1207 }, { "epoch": 0.1840902163974398, "grad_norm": 0.890625, "learning_rate": 0.0001878060407801386, "loss": 0.9348, "step": 1208 }, { "epoch": 0.18424260896068273, "grad_norm": 0.87890625, "learning_rate": 0.0001877824101553502, "loss": 0.804, "step": 1209 }, { "epoch": 0.18439500152392563, "grad_norm": 1.046875, "learning_rate": 0.00018775875814550433, "loss": 0.9711, "step": 1210 }, { "epoch": 0.18454739408716855, "grad_norm": 0.9921875, "learning_rate": 0.000187735084756363, "loss": 0.7455, "step": 1211 }, { "epoch": 0.18469978665041145, "grad_norm": 1.0078125, "learning_rate": 0.00018771138999369336, "loss": 0.9603, "step": 1212 }, { "epoch": 0.18485217921365438, "grad_norm": 1.078125, "learning_rate": 0.00018768767386326783, "loss": 1.0724, "step": 1213 }, { "epoch": 0.18500457177689728, "grad_norm": 0.98828125, "learning_rate": 0.00018766393637086395, "loss": 1.0217, "step": 1214 }, { "epoch": 0.1851569643401402, "grad_norm": 0.90625, "learning_rate": 0.00018764017752226453, "loss": 0.9958, "step": 1215 }, { "epoch": 0.18530935690338313, "grad_norm": 1.234375, "learning_rate": 0.0001876163973232576, "loss": 0.9896, "step": 1216 }, { "epoch": 0.18546174946662602, "grad_norm": 1.3203125, "learning_rate": 0.00018759259577963633, "loss": 1.1134, "step": 1217 }, { "epoch": 0.18561414202986895, "grad_norm": 0.85546875, "learning_rate": 0.00018756877289719914, "loss": 0.8647, "step": 1218 }, { "epoch": 0.18576653459311185, "grad_norm": 1.0546875, "learning_rate": 0.00018754492868174958, "loss": 1.1345, "step": 1219 }, { "epoch": 0.18591892715635477, "grad_norm": 0.98046875, "learning_rate": 0.0001875210631390965, "loss": 1.1088, "step": 1220 }, { "epoch": 0.18607131971959767, "grad_norm": 1.078125, "learning_rate": 0.00018749717627505384, "loss": 0.9981, "step": 1221 }, { "epoch": 0.1862237122828406, "grad_norm": 1.03125, "learning_rate": 0.00018747326809544083, "loss": 0.9704, "step": 1222 }, { "epoch": 0.18637610484608352, "grad_norm": 0.94140625, "learning_rate": 0.00018744933860608183, "loss": 1.0698, "step": 1223 }, { "epoch": 0.18652849740932642, "grad_norm": 1.1328125, "learning_rate": 0.00018742538781280635, "loss": 1.126, "step": 1224 }, { "epoch": 0.18668088997256935, "grad_norm": 0.77734375, "learning_rate": 0.00018740141572144925, "loss": 0.9323, "step": 1225 }, { "epoch": 0.18683328253581225, "grad_norm": 0.890625, "learning_rate": 0.0001873774223378504, "loss": 0.9845, "step": 1226 }, { "epoch": 0.18698567509905517, "grad_norm": 0.90625, "learning_rate": 0.00018735340766785495, "loss": 1.0732, "step": 1227 }, { "epoch": 0.18713806766229807, "grad_norm": 0.77734375, "learning_rate": 0.00018732937171731326, "loss": 0.8705, "step": 1228 }, { "epoch": 0.187290460225541, "grad_norm": 0.93359375, "learning_rate": 0.00018730531449208078, "loss": 1.0518, "step": 1229 }, { "epoch": 0.18744285278878392, "grad_norm": 1.0625, "learning_rate": 0.00018728123599801824, "loss": 0.9513, "step": 1230 }, { "epoch": 0.18759524535202682, "grad_norm": 0.8359375, "learning_rate": 0.00018725713624099147, "loss": 1.0057, "step": 1231 }, { "epoch": 0.18774763791526974, "grad_norm": 0.83984375, "learning_rate": 0.00018723301522687152, "loss": 0.9633, "step": 1232 }, { "epoch": 0.18790003047851264, "grad_norm": 1.03125, "learning_rate": 0.00018720887296153462, "loss": 1.0658, "step": 1233 }, { "epoch": 0.18805242304175557, "grad_norm": 0.8125, "learning_rate": 0.00018718470945086222, "loss": 0.7939, "step": 1234 }, { "epoch": 0.18820481560499847, "grad_norm": 1.03125, "learning_rate": 0.00018716052470074083, "loss": 0.9449, "step": 1235 }, { "epoch": 0.1883572081682414, "grad_norm": 0.921875, "learning_rate": 0.00018713631871706223, "loss": 1.0358, "step": 1236 }, { "epoch": 0.18850960073148432, "grad_norm": 0.703125, "learning_rate": 0.00018711209150572335, "loss": 0.8374, "step": 1237 }, { "epoch": 0.18866199329472721, "grad_norm": 0.96484375, "learning_rate": 0.0001870878430726263, "loss": 0.6703, "step": 1238 }, { "epoch": 0.18881438585797014, "grad_norm": 0.7578125, "learning_rate": 0.0001870635734236783, "loss": 0.8533, "step": 1239 }, { "epoch": 0.18896677842121304, "grad_norm": 0.6953125, "learning_rate": 0.00018703928256479183, "loss": 0.9645, "step": 1240 }, { "epoch": 0.18911917098445596, "grad_norm": 1.28125, "learning_rate": 0.0001870149705018845, "loss": 1.0156, "step": 1241 }, { "epoch": 0.18927156354769886, "grad_norm": 0.7421875, "learning_rate": 0.00018699063724087904, "loss": 0.9378, "step": 1242 }, { "epoch": 0.1894239561109418, "grad_norm": 0.90234375, "learning_rate": 0.0001869662827877034, "loss": 0.8677, "step": 1243 }, { "epoch": 0.18957634867418469, "grad_norm": 0.73046875, "learning_rate": 0.0001869419071482907, "loss": 0.8943, "step": 1244 }, { "epoch": 0.1897287412374276, "grad_norm": 0.78515625, "learning_rate": 0.00018691751032857913, "loss": 0.8189, "step": 1245 }, { "epoch": 0.18988113380067054, "grad_norm": 0.90625, "learning_rate": 0.00018689309233451215, "loss": 0.8926, "step": 1246 }, { "epoch": 0.19003352636391344, "grad_norm": 1.0078125, "learning_rate": 0.00018686865317203838, "loss": 0.8235, "step": 1247 }, { "epoch": 0.19018591892715636, "grad_norm": 1.2421875, "learning_rate": 0.00018684419284711146, "loss": 0.9717, "step": 1248 }, { "epoch": 0.19033831149039926, "grad_norm": 1.109375, "learning_rate": 0.00018681971136569035, "loss": 1.2108, "step": 1249 }, { "epoch": 0.19049070405364218, "grad_norm": 0.81640625, "learning_rate": 0.00018679520873373902, "loss": 1.1191, "step": 1250 }, { "epoch": 0.19064309661688508, "grad_norm": 1.2109375, "learning_rate": 0.00018677068495722674, "loss": 1.2713, "step": 1251 }, { "epoch": 0.190795489180128, "grad_norm": 0.85546875, "learning_rate": 0.0001867461400421278, "loss": 1.0528, "step": 1252 }, { "epoch": 0.19094788174337093, "grad_norm": 0.9140625, "learning_rate": 0.00018672157399442173, "loss": 1.0539, "step": 1253 }, { "epoch": 0.19110027430661383, "grad_norm": 0.91015625, "learning_rate": 0.00018669698682009315, "loss": 0.8696, "step": 1254 }, { "epoch": 0.19125266686985676, "grad_norm": 0.87109375, "learning_rate": 0.00018667237852513187, "loss": 0.8748, "step": 1255 }, { "epoch": 0.19140505943309966, "grad_norm": 1.078125, "learning_rate": 0.00018664774911553278, "loss": 1.1254, "step": 1256 }, { "epoch": 0.19155745199634258, "grad_norm": 1.03125, "learning_rate": 0.000186623098597296, "loss": 1.1543, "step": 1257 }, { "epoch": 0.19170984455958548, "grad_norm": 0.7578125, "learning_rate": 0.00018659842697642677, "loss": 0.8906, "step": 1258 }, { "epoch": 0.1918622371228284, "grad_norm": 1.109375, "learning_rate": 0.00018657373425893535, "loss": 0.8176, "step": 1259 }, { "epoch": 0.19201462968607133, "grad_norm": 1.0703125, "learning_rate": 0.00018654902045083739, "loss": 1.1175, "step": 1260 }, { "epoch": 0.19216702224931423, "grad_norm": 0.81640625, "learning_rate": 0.0001865242855581534, "loss": 0.9813, "step": 1261 }, { "epoch": 0.19231941481255715, "grad_norm": 0.99609375, "learning_rate": 0.00018649952958690923, "loss": 0.7435, "step": 1262 }, { "epoch": 0.19247180737580005, "grad_norm": 0.8828125, "learning_rate": 0.00018647475254313575, "loss": 0.9855, "step": 1263 }, { "epoch": 0.19262419993904298, "grad_norm": 0.98046875, "learning_rate": 0.00018644995443286902, "loss": 1.0771, "step": 1264 }, { "epoch": 0.19277659250228588, "grad_norm": 0.9140625, "learning_rate": 0.00018642513526215024, "loss": 1.1015, "step": 1265 }, { "epoch": 0.1929289850655288, "grad_norm": 0.9609375, "learning_rate": 0.00018640029503702568, "loss": 1.146, "step": 1266 }, { "epoch": 0.19308137762877173, "grad_norm": 0.890625, "learning_rate": 0.00018637543376354677, "loss": 1.0298, "step": 1267 }, { "epoch": 0.19323377019201463, "grad_norm": 1.1328125, "learning_rate": 0.00018635055144777013, "loss": 1.1742, "step": 1268 }, { "epoch": 0.19338616275525755, "grad_norm": 0.70703125, "learning_rate": 0.00018632564809575742, "loss": 0.8311, "step": 1269 }, { "epoch": 0.19353855531850045, "grad_norm": 0.7734375, "learning_rate": 0.0001863007237135754, "loss": 0.8678, "step": 1270 }, { "epoch": 0.19369094788174337, "grad_norm": 0.86328125, "learning_rate": 0.0001862757783072961, "loss": 0.8928, "step": 1271 }, { "epoch": 0.19384334044498627, "grad_norm": 1.0859375, "learning_rate": 0.00018625081188299652, "loss": 0.8986, "step": 1272 }, { "epoch": 0.1939957330082292, "grad_norm": 0.88671875, "learning_rate": 0.00018622582444675881, "loss": 0.8945, "step": 1273 }, { "epoch": 0.19414812557147212, "grad_norm": 0.9375, "learning_rate": 0.00018620081600467038, "loss": 0.865, "step": 1274 }, { "epoch": 0.19430051813471502, "grad_norm": 0.90625, "learning_rate": 0.00018617578656282357, "loss": 0.9607, "step": 1275 }, { "epoch": 0.19445291069795795, "grad_norm": 1.0078125, "learning_rate": 0.0001861507361273159, "loss": 1.0648, "step": 1276 }, { "epoch": 0.19460530326120085, "grad_norm": 0.9296875, "learning_rate": 0.00018612566470425007, "loss": 0.8589, "step": 1277 }, { "epoch": 0.19475769582444377, "grad_norm": 0.82421875, "learning_rate": 0.0001861005722997338, "loss": 1.0245, "step": 1278 }, { "epoch": 0.19491008838768667, "grad_norm": 0.94921875, "learning_rate": 0.00018607545891987997, "loss": 0.8495, "step": 1279 }, { "epoch": 0.1950624809509296, "grad_norm": 0.75, "learning_rate": 0.00018605032457080653, "loss": 0.8758, "step": 1280 }, { "epoch": 0.19521487351417252, "grad_norm": 1.234375, "learning_rate": 0.00018602516925863666, "loss": 0.9725, "step": 1281 }, { "epoch": 0.19536726607741542, "grad_norm": 0.828125, "learning_rate": 0.0001859999929894984, "loss": 0.8486, "step": 1282 }, { "epoch": 0.19551965864065834, "grad_norm": 0.80078125, "learning_rate": 0.00018597479576952522, "loss": 1.0588, "step": 1283 }, { "epoch": 0.19567205120390124, "grad_norm": 0.96875, "learning_rate": 0.00018594957760485544, "loss": 0.9923, "step": 1284 }, { "epoch": 0.19582444376714417, "grad_norm": 1.140625, "learning_rate": 0.00018592433850163256, "loss": 1.1754, "step": 1285 }, { "epoch": 0.19597683633038707, "grad_norm": 1.078125, "learning_rate": 0.0001858990784660052, "loss": 1.0674, "step": 1286 }, { "epoch": 0.19612922889363, "grad_norm": 1.0390625, "learning_rate": 0.0001858737975041271, "loss": 0.9182, "step": 1287 }, { "epoch": 0.19628162145687292, "grad_norm": 0.953125, "learning_rate": 0.00018584849562215702, "loss": 1.15, "step": 1288 }, { "epoch": 0.19643401402011582, "grad_norm": 0.90625, "learning_rate": 0.00018582317282625892, "loss": 1.0043, "step": 1289 }, { "epoch": 0.19658640658335874, "grad_norm": 1.203125, "learning_rate": 0.00018579782912260174, "loss": 1.0215, "step": 1290 }, { "epoch": 0.19673879914660164, "grad_norm": 1.609375, "learning_rate": 0.00018577246451735956, "loss": 1.1499, "step": 1291 }, { "epoch": 0.19689119170984457, "grad_norm": 1.171875, "learning_rate": 0.00018574707901671163, "loss": 0.9306, "step": 1292 }, { "epoch": 0.19704358427308746, "grad_norm": 1.21875, "learning_rate": 0.00018572167262684216, "loss": 1.1456, "step": 1293 }, { "epoch": 0.1971959768363304, "grad_norm": 0.94921875, "learning_rate": 0.00018569624535394054, "loss": 1.0198, "step": 1294 }, { "epoch": 0.19734836939957331, "grad_norm": 0.9453125, "learning_rate": 0.00018567079720420123, "loss": 1.0451, "step": 1295 }, { "epoch": 0.1975007619628162, "grad_norm": 1.1796875, "learning_rate": 0.00018564532818382377, "loss": 1.193, "step": 1296 }, { "epoch": 0.19765315452605914, "grad_norm": 0.74609375, "learning_rate": 0.00018561983829901274, "loss": 0.853, "step": 1297 }, { "epoch": 0.19780554708930204, "grad_norm": 1.1328125, "learning_rate": 0.00018559432755597786, "loss": 1.0439, "step": 1298 }, { "epoch": 0.19795793965254496, "grad_norm": 1.0078125, "learning_rate": 0.00018556879596093393, "loss": 1.0729, "step": 1299 }, { "epoch": 0.19811033221578786, "grad_norm": 0.66015625, "learning_rate": 0.00018554324352010082, "loss": 0.917, "step": 1300 }, { "epoch": 0.19826272477903079, "grad_norm": 0.66015625, "learning_rate": 0.00018551767023970342, "loss": 0.789, "step": 1301 }, { "epoch": 0.19841511734227368, "grad_norm": 1.09375, "learning_rate": 0.00018549207612597178, "loss": 0.9334, "step": 1302 }, { "epoch": 0.1985675099055166, "grad_norm": 0.87109375, "learning_rate": 0.000185466461185141, "loss": 1.1137, "step": 1303 }, { "epoch": 0.19871990246875954, "grad_norm": 0.92578125, "learning_rate": 0.00018544082542345128, "loss": 0.8355, "step": 1304 }, { "epoch": 0.19887229503200243, "grad_norm": 0.80859375, "learning_rate": 0.0001854151688471478, "loss": 0.9141, "step": 1305 }, { "epoch": 0.19902468759524536, "grad_norm": 1.0703125, "learning_rate": 0.00018538949146248092, "loss": 1.2592, "step": 1306 }, { "epoch": 0.19917708015848826, "grad_norm": 0.82421875, "learning_rate": 0.00018536379327570597, "loss": 0.8636, "step": 1307 }, { "epoch": 0.19932947272173118, "grad_norm": 1.1640625, "learning_rate": 0.00018533807429308344, "loss": 1.0123, "step": 1308 }, { "epoch": 0.19948186528497408, "grad_norm": 0.859375, "learning_rate": 0.00018531233452087883, "loss": 1.0902, "step": 1309 }, { "epoch": 0.199634257848217, "grad_norm": 0.66796875, "learning_rate": 0.00018528657396536268, "loss": 0.771, "step": 1310 }, { "epoch": 0.19978665041145993, "grad_norm": 1.0703125, "learning_rate": 0.0001852607926328107, "loss": 0.9508, "step": 1311 }, { "epoch": 0.19993904297470283, "grad_norm": 0.81640625, "learning_rate": 0.00018523499052950358, "loss": 0.9485, "step": 1312 }, { "epoch": 0.20009143553794576, "grad_norm": 0.94140625, "learning_rate": 0.00018520916766172704, "loss": 0.8849, "step": 1313 }, { "epoch": 0.20024382810118865, "grad_norm": 1.03125, "learning_rate": 0.00018518332403577193, "loss": 0.8742, "step": 1314 }, { "epoch": 0.20039622066443158, "grad_norm": 1.15625, "learning_rate": 0.00018515745965793414, "loss": 1.0485, "step": 1315 }, { "epoch": 0.20054861322767448, "grad_norm": 1.1015625, "learning_rate": 0.00018513157453451458, "loss": 0.9675, "step": 1316 }, { "epoch": 0.2007010057909174, "grad_norm": 0.83203125, "learning_rate": 0.00018510566867181925, "loss": 0.911, "step": 1317 }, { "epoch": 0.20085339835416033, "grad_norm": 1.0625, "learning_rate": 0.00018507974207615917, "loss": 1.0996, "step": 1318 }, { "epoch": 0.20100579091740323, "grad_norm": 0.88671875, "learning_rate": 0.00018505379475385048, "loss": 0.8614, "step": 1319 }, { "epoch": 0.20115818348064615, "grad_norm": 0.83984375, "learning_rate": 0.00018502782671121429, "loss": 0.9244, "step": 1320 }, { "epoch": 0.20131057604388905, "grad_norm": 0.97265625, "learning_rate": 0.00018500183795457678, "loss": 0.8821, "step": 1321 }, { "epoch": 0.20146296860713198, "grad_norm": 0.67578125, "learning_rate": 0.00018497582849026918, "loss": 0.7634, "step": 1322 }, { "epoch": 0.20161536117037487, "grad_norm": 0.9921875, "learning_rate": 0.00018494979832462782, "loss": 0.8758, "step": 1323 }, { "epoch": 0.2017677537336178, "grad_norm": 0.98046875, "learning_rate": 0.00018492374746399397, "loss": 1.0884, "step": 1324 }, { "epoch": 0.20192014629686073, "grad_norm": 0.97265625, "learning_rate": 0.000184897675914714, "loss": 0.8277, "step": 1325 }, { "epoch": 0.20207253886010362, "grad_norm": 1.0546875, "learning_rate": 0.00018487158368313934, "loss": 1.1633, "step": 1326 }, { "epoch": 0.20222493142334655, "grad_norm": 1.375, "learning_rate": 0.00018484547077562643, "loss": 1.0727, "step": 1327 }, { "epoch": 0.20237732398658945, "grad_norm": 1.0546875, "learning_rate": 0.00018481933719853672, "loss": 0.9925, "step": 1328 }, { "epoch": 0.20252971654983237, "grad_norm": 0.80078125, "learning_rate": 0.00018479318295823677, "loss": 0.858, "step": 1329 }, { "epoch": 0.20268210911307527, "grad_norm": 0.9296875, "learning_rate": 0.0001847670080610981, "loss": 0.983, "step": 1330 }, { "epoch": 0.2028345016763182, "grad_norm": 0.83984375, "learning_rate": 0.00018474081251349726, "loss": 0.8707, "step": 1331 }, { "epoch": 0.20298689423956112, "grad_norm": 0.9375, "learning_rate": 0.00018471459632181595, "loss": 1.0513, "step": 1332 }, { "epoch": 0.20313928680280402, "grad_norm": 0.76953125, "learning_rate": 0.0001846883594924407, "loss": 0.8628, "step": 1333 }, { "epoch": 0.20329167936604695, "grad_norm": 0.73046875, "learning_rate": 0.00018466210203176326, "loss": 0.9132, "step": 1334 }, { "epoch": 0.20344407192928984, "grad_norm": 0.8671875, "learning_rate": 0.0001846358239461803, "loss": 0.8785, "step": 1335 }, { "epoch": 0.20359646449253277, "grad_norm": 0.95703125, "learning_rate": 0.00018460952524209355, "loss": 1.0763, "step": 1336 }, { "epoch": 0.20374885705577567, "grad_norm": 1.0625, "learning_rate": 0.00018458320592590975, "loss": 1.1401, "step": 1337 }, { "epoch": 0.2039012496190186, "grad_norm": 1.203125, "learning_rate": 0.00018455686600404064, "loss": 0.877, "step": 1338 }, { "epoch": 0.20405364218226152, "grad_norm": 0.953125, "learning_rate": 0.00018453050548290304, "loss": 0.9289, "step": 1339 }, { "epoch": 0.20420603474550442, "grad_norm": 1.0234375, "learning_rate": 0.00018450412436891872, "loss": 0.9893, "step": 1340 }, { "epoch": 0.20435842730874734, "grad_norm": 0.87109375, "learning_rate": 0.0001844777226685145, "loss": 0.8447, "step": 1341 }, { "epoch": 0.20451081987199024, "grad_norm": 1.125, "learning_rate": 0.00018445130038812225, "loss": 1.0009, "step": 1342 }, { "epoch": 0.20466321243523317, "grad_norm": 0.98828125, "learning_rate": 0.0001844248575341788, "loss": 1.0595, "step": 1343 }, { "epoch": 0.20481560499847606, "grad_norm": 0.80859375, "learning_rate": 0.00018439839411312598, "loss": 0.8842, "step": 1344 }, { "epoch": 0.204967997561719, "grad_norm": 0.71484375, "learning_rate": 0.0001843719101314107, "loss": 0.9226, "step": 1345 }, { "epoch": 0.20512039012496192, "grad_norm": 1.03125, "learning_rate": 0.0001843454055954848, "loss": 0.9231, "step": 1346 }, { "epoch": 0.2052727826882048, "grad_norm": 0.8671875, "learning_rate": 0.00018431888051180522, "loss": 0.8647, "step": 1347 }, { "epoch": 0.20542517525144774, "grad_norm": 0.9453125, "learning_rate": 0.00018429233488683383, "loss": 1.0469, "step": 1348 }, { "epoch": 0.20557756781469064, "grad_norm": 0.9140625, "learning_rate": 0.00018426576872703745, "loss": 1.0339, "step": 1349 }, { "epoch": 0.20572996037793356, "grad_norm": 1.1953125, "learning_rate": 0.0001842391820388881, "loss": 0.9675, "step": 1350 }, { "epoch": 0.20588235294117646, "grad_norm": 0.84765625, "learning_rate": 0.0001842125748288626, "loss": 0.9072, "step": 1351 }, { "epoch": 0.2060347455044194, "grad_norm": 0.8828125, "learning_rate": 0.00018418594710344289, "loss": 0.9135, "step": 1352 }, { "epoch": 0.20618713806766228, "grad_norm": 0.92578125, "learning_rate": 0.00018415929886911583, "loss": 0.9233, "step": 1353 }, { "epoch": 0.2063395306309052, "grad_norm": 1.0546875, "learning_rate": 0.00018413263013237333, "loss": 1.0745, "step": 1354 }, { "epoch": 0.20649192319414814, "grad_norm": 1.0625, "learning_rate": 0.00018410594089971228, "loss": 0.9431, "step": 1355 }, { "epoch": 0.20664431575739103, "grad_norm": 0.78125, "learning_rate": 0.00018407923117763462, "loss": 0.9393, "step": 1356 }, { "epoch": 0.20679670832063396, "grad_norm": 1.2890625, "learning_rate": 0.0001840525009726471, "loss": 1.0608, "step": 1357 }, { "epoch": 0.20694910088387686, "grad_norm": 0.84765625, "learning_rate": 0.00018402575029126167, "loss": 0.9341, "step": 1358 }, { "epoch": 0.20710149344711978, "grad_norm": 1.015625, "learning_rate": 0.00018399897913999515, "loss": 1.0293, "step": 1359 }, { "epoch": 0.20725388601036268, "grad_norm": 0.8203125, "learning_rate": 0.00018397218752536938, "loss": 0.8735, "step": 1360 }, { "epoch": 0.2074062785736056, "grad_norm": 0.9296875, "learning_rate": 0.0001839453754539112, "loss": 0.73, "step": 1361 }, { "epoch": 0.20755867113684853, "grad_norm": 0.95703125, "learning_rate": 0.00018391854293215237, "loss": 0.9709, "step": 1362 }, { "epoch": 0.20771106370009143, "grad_norm": 0.92578125, "learning_rate": 0.00018389168996662975, "loss": 1.0392, "step": 1363 }, { "epoch": 0.20786345626333436, "grad_norm": 0.95703125, "learning_rate": 0.00018386481656388509, "loss": 0.9755, "step": 1364 }, { "epoch": 0.20801584882657725, "grad_norm": 1.1640625, "learning_rate": 0.00018383792273046504, "loss": 1.0909, "step": 1365 }, { "epoch": 0.20816824138982018, "grad_norm": 0.83984375, "learning_rate": 0.00018381100847292148, "loss": 0.8962, "step": 1366 }, { "epoch": 0.20832063395306308, "grad_norm": 0.85546875, "learning_rate": 0.00018378407379781098, "loss": 0.8493, "step": 1367 }, { "epoch": 0.208473026516306, "grad_norm": 0.80078125, "learning_rate": 0.00018375711871169528, "loss": 1.0421, "step": 1368 }, { "epoch": 0.20862541907954893, "grad_norm": 1.171875, "learning_rate": 0.00018373014322114103, "loss": 1.2846, "step": 1369 }, { "epoch": 0.20877781164279183, "grad_norm": 1.0859375, "learning_rate": 0.0001837031473327198, "loss": 0.9815, "step": 1370 }, { "epoch": 0.20893020420603475, "grad_norm": 0.90625, "learning_rate": 0.00018367613105300823, "loss": 0.9537, "step": 1371 }, { "epoch": 0.20908259676927765, "grad_norm": 1.1953125, "learning_rate": 0.00018364909438858785, "loss": 1.0607, "step": 1372 }, { "epoch": 0.20923498933252058, "grad_norm": 1.0703125, "learning_rate": 0.00018362203734604519, "loss": 1.0555, "step": 1373 }, { "epoch": 0.20938738189576347, "grad_norm": 1.0234375, "learning_rate": 0.0001835949599319717, "loss": 1.1281, "step": 1374 }, { "epoch": 0.2095397744590064, "grad_norm": 0.90625, "learning_rate": 0.00018356786215296386, "loss": 0.9561, "step": 1375 }, { "epoch": 0.20969216702224933, "grad_norm": 0.73046875, "learning_rate": 0.00018354074401562306, "loss": 0.867, "step": 1376 }, { "epoch": 0.20984455958549222, "grad_norm": 0.765625, "learning_rate": 0.0001835136055265557, "loss": 0.9094, "step": 1377 }, { "epoch": 0.20999695214873515, "grad_norm": 0.921875, "learning_rate": 0.0001834864466923731, "loss": 1.019, "step": 1378 }, { "epoch": 0.21014934471197805, "grad_norm": 0.94140625, "learning_rate": 0.0001834592675196915, "loss": 0.9964, "step": 1379 }, { "epoch": 0.21030173727522097, "grad_norm": 0.984375, "learning_rate": 0.00018343206801513218, "loss": 0.8834, "step": 1380 }, { "epoch": 0.21045412983846387, "grad_norm": 0.9453125, "learning_rate": 0.00018340484818532132, "loss": 1.0293, "step": 1381 }, { "epoch": 0.2106065224017068, "grad_norm": 0.9609375, "learning_rate": 0.00018337760803689005, "loss": 0.8056, "step": 1382 }, { "epoch": 0.21075891496494972, "grad_norm": 0.94140625, "learning_rate": 0.0001833503475764745, "loss": 0.9811, "step": 1383 }, { "epoch": 0.21091130752819262, "grad_norm": 1.171875, "learning_rate": 0.00018332306681071562, "loss": 0.9728, "step": 1384 }, { "epoch": 0.21106370009143555, "grad_norm": 1.3515625, "learning_rate": 0.00018329576574625952, "loss": 1.1979, "step": 1385 }, { "epoch": 0.21121609265467844, "grad_norm": 1.328125, "learning_rate": 0.00018326844438975703, "loss": 1.1107, "step": 1386 }, { "epoch": 0.21136848521792137, "grad_norm": 0.921875, "learning_rate": 0.00018324110274786407, "loss": 0.8826, "step": 1387 }, { "epoch": 0.21152087778116427, "grad_norm": 1.21875, "learning_rate": 0.00018321374082724145, "loss": 0.8894, "step": 1388 }, { "epoch": 0.2116732703444072, "grad_norm": 0.8671875, "learning_rate": 0.00018318635863455495, "loss": 0.8778, "step": 1389 }, { "epoch": 0.21182566290765012, "grad_norm": 1.015625, "learning_rate": 0.00018315895617647523, "loss": 0.8801, "step": 1390 }, { "epoch": 0.21197805547089302, "grad_norm": 0.83203125, "learning_rate": 0.00018313153345967794, "loss": 1.0799, "step": 1391 }, { "epoch": 0.21213044803413594, "grad_norm": 1.140625, "learning_rate": 0.00018310409049084365, "loss": 0.9917, "step": 1392 }, { "epoch": 0.21228284059737884, "grad_norm": 0.9375, "learning_rate": 0.00018307662727665784, "loss": 1.0476, "step": 1393 }, { "epoch": 0.21243523316062177, "grad_norm": 1.015625, "learning_rate": 0.000183049143823811, "loss": 0.8822, "step": 1394 }, { "epoch": 0.21258762572386466, "grad_norm": 0.97265625, "learning_rate": 0.0001830216401389984, "loss": 0.9811, "step": 1395 }, { "epoch": 0.2127400182871076, "grad_norm": 0.98828125, "learning_rate": 0.00018299411622892044, "loss": 0.8537, "step": 1396 }, { "epoch": 0.21289241085035052, "grad_norm": 1.1328125, "learning_rate": 0.00018296657210028228, "loss": 0.8872, "step": 1397 }, { "epoch": 0.21304480341359341, "grad_norm": 0.87109375, "learning_rate": 0.00018293900775979407, "loss": 0.847, "step": 1398 }, { "epoch": 0.21319719597683634, "grad_norm": 1.0859375, "learning_rate": 0.00018291142321417089, "loss": 1.0259, "step": 1399 }, { "epoch": 0.21334958854007924, "grad_norm": 0.86328125, "learning_rate": 0.00018288381847013268, "loss": 0.9937, "step": 1400 }, { "epoch": 0.21350198110332216, "grad_norm": 0.73828125, "learning_rate": 0.00018285619353440447, "loss": 0.8376, "step": 1401 }, { "epoch": 0.21365437366656506, "grad_norm": 0.75390625, "learning_rate": 0.000182828548413716, "loss": 0.7529, "step": 1402 }, { "epoch": 0.213806766229808, "grad_norm": 1.1640625, "learning_rate": 0.00018280088311480201, "loss": 1.0318, "step": 1403 }, { "epoch": 0.21395915879305089, "grad_norm": 0.73046875, "learning_rate": 0.0001827731976444022, "loss": 0.9232, "step": 1404 }, { "epoch": 0.2141115513562938, "grad_norm": 1.21875, "learning_rate": 0.00018274549200926119, "loss": 1.0498, "step": 1405 }, { "epoch": 0.21426394391953674, "grad_norm": 0.984375, "learning_rate": 0.0001827177662161284, "loss": 0.9736, "step": 1406 }, { "epoch": 0.21441633648277963, "grad_norm": 0.75390625, "learning_rate": 0.00018269002027175826, "loss": 1.0207, "step": 1407 }, { "epoch": 0.21456872904602256, "grad_norm": 0.92578125, "learning_rate": 0.00018266225418291004, "loss": 0.8419, "step": 1408 }, { "epoch": 0.21472112160926546, "grad_norm": 1.0234375, "learning_rate": 0.00018263446795634806, "loss": 0.9465, "step": 1409 }, { "epoch": 0.21487351417250838, "grad_norm": 1.2421875, "learning_rate": 0.0001826066615988413, "loss": 1.0892, "step": 1410 }, { "epoch": 0.21502590673575128, "grad_norm": 0.75, "learning_rate": 0.00018257883511716392, "loss": 0.9674, "step": 1411 }, { "epoch": 0.2151782992989942, "grad_norm": 1.5390625, "learning_rate": 0.0001825509885180948, "loss": 0.9125, "step": 1412 }, { "epoch": 0.21533069186223713, "grad_norm": 0.65234375, "learning_rate": 0.00018252312180841776, "loss": 0.8395, "step": 1413 }, { "epoch": 0.21548308442548003, "grad_norm": 1.046875, "learning_rate": 0.00018249523499492157, "loss": 1.0138, "step": 1414 }, { "epoch": 0.21563547698872296, "grad_norm": 0.87109375, "learning_rate": 0.0001824673280843998, "loss": 1.0316, "step": 1415 }, { "epoch": 0.21578786955196586, "grad_norm": 1.1484375, "learning_rate": 0.00018243940108365103, "loss": 1.0335, "step": 1416 }, { "epoch": 0.21594026211520878, "grad_norm": 0.796875, "learning_rate": 0.00018241145399947866, "loss": 1.0603, "step": 1417 }, { "epoch": 0.21609265467845168, "grad_norm": 0.95703125, "learning_rate": 0.000182383486838691, "loss": 0.899, "step": 1418 }, { "epoch": 0.2162450472416946, "grad_norm": 0.890625, "learning_rate": 0.0001823554996081012, "loss": 0.8955, "step": 1419 }, { "epoch": 0.21639743980493753, "grad_norm": 1.234375, "learning_rate": 0.00018232749231452745, "loss": 1.1219, "step": 1420 }, { "epoch": 0.21654983236818043, "grad_norm": 0.96875, "learning_rate": 0.00018229946496479266, "loss": 1.0375, "step": 1421 }, { "epoch": 0.21670222493142335, "grad_norm": 0.90234375, "learning_rate": 0.00018227141756572472, "loss": 0.9874, "step": 1422 }, { "epoch": 0.21685461749466625, "grad_norm": 0.7890625, "learning_rate": 0.00018224335012415638, "loss": 0.8251, "step": 1423 }, { "epoch": 0.21700701005790918, "grad_norm": 0.87890625, "learning_rate": 0.0001822152626469253, "loss": 0.8942, "step": 1424 }, { "epoch": 0.21715940262115208, "grad_norm": 0.6875, "learning_rate": 0.0001821871551408739, "loss": 0.9349, "step": 1425 }, { "epoch": 0.217311795184395, "grad_norm": 0.85546875, "learning_rate": 0.00018215902761284966, "loss": 1.0122, "step": 1426 }, { "epoch": 0.21746418774763793, "grad_norm": 0.9453125, "learning_rate": 0.00018213088006970485, "loss": 0.9903, "step": 1427 }, { "epoch": 0.21761658031088082, "grad_norm": 0.75390625, "learning_rate": 0.00018210271251829657, "loss": 0.9684, "step": 1428 }, { "epoch": 0.21776897287412375, "grad_norm": 0.77734375, "learning_rate": 0.00018207452496548686, "loss": 0.8159, "step": 1429 }, { "epoch": 0.21792136543736665, "grad_norm": 0.75390625, "learning_rate": 0.00018204631741814263, "loss": 0.8811, "step": 1430 }, { "epoch": 0.21807375800060957, "grad_norm": 0.7890625, "learning_rate": 0.00018201808988313562, "loss": 0.8273, "step": 1431 }, { "epoch": 0.21822615056385247, "grad_norm": 1.390625, "learning_rate": 0.00018198984236734246, "loss": 1.16, "step": 1432 }, { "epoch": 0.2183785431270954, "grad_norm": 0.88671875, "learning_rate": 0.0001819615748776447, "loss": 0.9537, "step": 1433 }, { "epoch": 0.21853093569033832, "grad_norm": 0.79296875, "learning_rate": 0.00018193328742092865, "loss": 0.819, "step": 1434 }, { "epoch": 0.21868332825358122, "grad_norm": 0.69921875, "learning_rate": 0.00018190498000408554, "loss": 1.0011, "step": 1435 }, { "epoch": 0.21883572081682415, "grad_norm": 0.81640625, "learning_rate": 0.00018187665263401154, "loss": 0.9306, "step": 1436 }, { "epoch": 0.21898811338006705, "grad_norm": 0.82421875, "learning_rate": 0.00018184830531760748, "loss": 0.9615, "step": 1437 }, { "epoch": 0.21914050594330997, "grad_norm": 0.87890625, "learning_rate": 0.00018181993806177927, "loss": 0.9333, "step": 1438 }, { "epoch": 0.21929289850655287, "grad_norm": 1.3125, "learning_rate": 0.0001817915508734376, "loss": 1.058, "step": 1439 }, { "epoch": 0.2194452910697958, "grad_norm": 1.1953125, "learning_rate": 0.0001817631437594979, "loss": 1.0718, "step": 1440 }, { "epoch": 0.21959768363303872, "grad_norm": 0.89453125, "learning_rate": 0.00018173471672688064, "loss": 0.9299, "step": 1441 }, { "epoch": 0.21975007619628162, "grad_norm": 0.89453125, "learning_rate": 0.000181706269782511, "loss": 1.1096, "step": 1442 }, { "epoch": 0.21990246875952454, "grad_norm": 0.6875, "learning_rate": 0.00018167780293331908, "loss": 0.8553, "step": 1443 }, { "epoch": 0.22005486132276744, "grad_norm": 1.0078125, "learning_rate": 0.00018164931618623985, "loss": 0.9137, "step": 1444 }, { "epoch": 0.22020725388601037, "grad_norm": 0.88671875, "learning_rate": 0.00018162080954821304, "loss": 0.9826, "step": 1445 }, { "epoch": 0.22035964644925327, "grad_norm": 0.90625, "learning_rate": 0.00018159228302618328, "loss": 0.9994, "step": 1446 }, { "epoch": 0.2205120390124962, "grad_norm": 1.2734375, "learning_rate": 0.00018156373662710007, "loss": 0.9771, "step": 1447 }, { "epoch": 0.22066443157573912, "grad_norm": 0.7734375, "learning_rate": 0.00018153517035791772, "loss": 0.8458, "step": 1448 }, { "epoch": 0.22081682413898202, "grad_norm": 1.1015625, "learning_rate": 0.00018150658422559537, "loss": 0.8468, "step": 1449 }, { "epoch": 0.22096921670222494, "grad_norm": 1.0546875, "learning_rate": 0.000181477978237097, "loss": 1.0287, "step": 1450 }, { "epoch": 0.22112160926546784, "grad_norm": 0.8203125, "learning_rate": 0.00018144935239939144, "loss": 0.8802, "step": 1451 }, { "epoch": 0.22127400182871076, "grad_norm": 0.68359375, "learning_rate": 0.00018142070671945242, "loss": 0.8523, "step": 1452 }, { "epoch": 0.22142639439195366, "grad_norm": 1.1171875, "learning_rate": 0.00018139204120425838, "loss": 1.0805, "step": 1453 }, { "epoch": 0.2215787869551966, "grad_norm": 0.88671875, "learning_rate": 0.00018136335586079266, "loss": 1.0354, "step": 1454 }, { "epoch": 0.22173117951843951, "grad_norm": 0.89453125, "learning_rate": 0.0001813346506960434, "loss": 0.959, "step": 1455 }, { "epoch": 0.2218835720816824, "grad_norm": 1.0078125, "learning_rate": 0.00018130592571700364, "loss": 1.0225, "step": 1456 }, { "epoch": 0.22203596464492534, "grad_norm": 0.97265625, "learning_rate": 0.00018127718093067117, "loss": 0.9242, "step": 1457 }, { "epoch": 0.22218835720816824, "grad_norm": 0.7265625, "learning_rate": 0.00018124841634404863, "loss": 0.8247, "step": 1458 }, { "epoch": 0.22234074977141116, "grad_norm": 0.87109375, "learning_rate": 0.00018121963196414352, "loss": 0.9464, "step": 1459 }, { "epoch": 0.22249314233465406, "grad_norm": 0.72265625, "learning_rate": 0.00018119082779796808, "loss": 0.9566, "step": 1460 }, { "epoch": 0.22264553489789699, "grad_norm": 0.94921875, "learning_rate": 0.00018116200385253945, "loss": 0.9899, "step": 1461 }, { "epoch": 0.22279792746113988, "grad_norm": 0.94921875, "learning_rate": 0.00018113316013487955, "loss": 0.94, "step": 1462 }, { "epoch": 0.2229503200243828, "grad_norm": 0.77734375, "learning_rate": 0.00018110429665201515, "loss": 0.9018, "step": 1463 }, { "epoch": 0.22310271258762573, "grad_norm": 1.21875, "learning_rate": 0.00018107541341097773, "loss": 1.1917, "step": 1464 }, { "epoch": 0.22325510515086863, "grad_norm": 0.91015625, "learning_rate": 0.00018104651041880377, "loss": 0.9443, "step": 1465 }, { "epoch": 0.22340749771411156, "grad_norm": 0.73828125, "learning_rate": 0.0001810175876825344, "loss": 0.9317, "step": 1466 }, { "epoch": 0.22355989027735446, "grad_norm": 1.2421875, "learning_rate": 0.0001809886452092156, "loss": 0.9975, "step": 1467 }, { "epoch": 0.22371228284059738, "grad_norm": 0.75, "learning_rate": 0.00018095968300589816, "loss": 0.8607, "step": 1468 }, { "epoch": 0.22386467540384028, "grad_norm": 1.0234375, "learning_rate": 0.00018093070107963778, "loss": 0.9481, "step": 1469 }, { "epoch": 0.2240170679670832, "grad_norm": 1.6171875, "learning_rate": 0.00018090169943749476, "loss": 1.1182, "step": 1470 }, { "epoch": 0.22416946053032613, "grad_norm": 1.0625, "learning_rate": 0.00018087267808653438, "loss": 0.9998, "step": 1471 }, { "epoch": 0.22432185309356903, "grad_norm": 0.8671875, "learning_rate": 0.00018084363703382668, "loss": 0.9167, "step": 1472 }, { "epoch": 0.22447424565681195, "grad_norm": 0.921875, "learning_rate": 0.0001808145762864464, "loss": 0.9564, "step": 1473 }, { "epoch": 0.22462663822005485, "grad_norm": 1.125, "learning_rate": 0.00018078549585147323, "loss": 0.9133, "step": 1474 }, { "epoch": 0.22477903078329778, "grad_norm": 0.875, "learning_rate": 0.00018075639573599155, "loss": 1.0478, "step": 1475 }, { "epoch": 0.22493142334654068, "grad_norm": 0.90234375, "learning_rate": 0.00018072727594709056, "loss": 1.0209, "step": 1476 }, { "epoch": 0.2250838159097836, "grad_norm": 0.99609375, "learning_rate": 0.00018069813649186428, "loss": 0.9641, "step": 1477 }, { "epoch": 0.22523620847302653, "grad_norm": 0.83203125, "learning_rate": 0.00018066897737741152, "loss": 1.0433, "step": 1478 }, { "epoch": 0.22538860103626943, "grad_norm": 1.109375, "learning_rate": 0.00018063979861083576, "loss": 1.0085, "step": 1479 }, { "epoch": 0.22554099359951235, "grad_norm": 1.15625, "learning_rate": 0.0001806106001992455, "loss": 1.059, "step": 1480 }, { "epoch": 0.22569338616275525, "grad_norm": 0.953125, "learning_rate": 0.0001805813821497538, "loss": 0.9234, "step": 1481 }, { "epoch": 0.22584577872599818, "grad_norm": 0.80078125, "learning_rate": 0.00018055214446947862, "loss": 0.7988, "step": 1482 }, { "epoch": 0.22599817128924107, "grad_norm": 1.0390625, "learning_rate": 0.0001805228871655427, "loss": 1.0216, "step": 1483 }, { "epoch": 0.226150563852484, "grad_norm": 0.8125, "learning_rate": 0.00018049361024507354, "loss": 0.8471, "step": 1484 }, { "epoch": 0.22630295641572692, "grad_norm": 0.984375, "learning_rate": 0.0001804643137152034, "loss": 1.1273, "step": 1485 }, { "epoch": 0.22645534897896982, "grad_norm": 0.734375, "learning_rate": 0.00018043499758306933, "loss": 0.9263, "step": 1486 }, { "epoch": 0.22660774154221275, "grad_norm": 0.88671875, "learning_rate": 0.00018040566185581315, "loss": 0.8965, "step": 1487 }, { "epoch": 0.22676013410545565, "grad_norm": 0.68359375, "learning_rate": 0.00018037630654058154, "loss": 1.0788, "step": 1488 }, { "epoch": 0.22691252666869857, "grad_norm": 1.1484375, "learning_rate": 0.00018034693164452578, "loss": 1.0814, "step": 1489 }, { "epoch": 0.22706491923194147, "grad_norm": 1.1484375, "learning_rate": 0.00018031753717480208, "loss": 1.019, "step": 1490 }, { "epoch": 0.2272173117951844, "grad_norm": 0.98828125, "learning_rate": 0.00018028812313857135, "loss": 1.0758, "step": 1491 }, { "epoch": 0.22736970435842732, "grad_norm": 1.1875, "learning_rate": 0.00018025868954299923, "loss": 1.0697, "step": 1492 }, { "epoch": 0.22752209692167022, "grad_norm": 0.94140625, "learning_rate": 0.00018022923639525625, "loss": 0.7896, "step": 1493 }, { "epoch": 0.22767448948491315, "grad_norm": 0.859375, "learning_rate": 0.00018019976370251749, "loss": 0.9168, "step": 1494 }, { "epoch": 0.22782688204815604, "grad_norm": 0.7578125, "learning_rate": 0.00018017027147196302, "loss": 0.8469, "step": 1495 }, { "epoch": 0.22797927461139897, "grad_norm": 1.1640625, "learning_rate": 0.0001801407597107776, "loss": 1.0171, "step": 1496 }, { "epoch": 0.22813166717464187, "grad_norm": 0.921875, "learning_rate": 0.00018011122842615063, "loss": 0.9127, "step": 1497 }, { "epoch": 0.2282840597378848, "grad_norm": 1.0078125, "learning_rate": 0.0001800816776252764, "loss": 1.0635, "step": 1498 }, { "epoch": 0.22843645230112772, "grad_norm": 1.171875, "learning_rate": 0.0001800521073153539, "loss": 1.0792, "step": 1499 }, { "epoch": 0.22858884486437062, "grad_norm": 0.90234375, "learning_rate": 0.00018002251750358692, "loss": 0.8944, "step": 1500 }, { "epoch": 0.22874123742761354, "grad_norm": 0.9921875, "learning_rate": 0.0001799929081971839, "loss": 0.9673, "step": 1501 }, { "epoch": 0.22889362999085644, "grad_norm": 1.1640625, "learning_rate": 0.00017996327940335814, "loss": 0.9549, "step": 1502 }, { "epoch": 0.22904602255409937, "grad_norm": 0.84375, "learning_rate": 0.0001799336311293276, "loss": 0.9763, "step": 1503 }, { "epoch": 0.22919841511734226, "grad_norm": 0.97265625, "learning_rate": 0.00017990396338231509, "loss": 0.9749, "step": 1504 }, { "epoch": 0.2293508076805852, "grad_norm": 0.7890625, "learning_rate": 0.000179874276169548, "loss": 0.902, "step": 1505 }, { "epoch": 0.22950320024382811, "grad_norm": 0.80859375, "learning_rate": 0.00017984456949825869, "loss": 0.914, "step": 1506 }, { "epoch": 0.229655592807071, "grad_norm": 1.5625, "learning_rate": 0.00017981484337568402, "loss": 1.035, "step": 1507 }, { "epoch": 0.22980798537031394, "grad_norm": 0.62890625, "learning_rate": 0.0001797850978090658, "loss": 0.8431, "step": 1508 }, { "epoch": 0.22996037793355684, "grad_norm": 1.0390625, "learning_rate": 0.00017975533280565036, "loss": 0.8997, "step": 1509 }, { "epoch": 0.23011277049679976, "grad_norm": 0.796875, "learning_rate": 0.00017972554837268896, "loss": 0.8353, "step": 1510 }, { "epoch": 0.23026516306004266, "grad_norm": 0.859375, "learning_rate": 0.0001796957445174375, "loss": 0.9948, "step": 1511 }, { "epoch": 0.23041755562328559, "grad_norm": 0.96484375, "learning_rate": 0.00017966592124715665, "loss": 0.9616, "step": 1512 }, { "epoch": 0.23056994818652848, "grad_norm": 1.1328125, "learning_rate": 0.00017963607856911175, "loss": 0.9121, "step": 1513 }, { "epoch": 0.2307223407497714, "grad_norm": 1.015625, "learning_rate": 0.00017960621649057293, "loss": 0.9786, "step": 1514 }, { "epoch": 0.23087473331301434, "grad_norm": 0.89453125, "learning_rate": 0.00017957633501881502, "loss": 0.9386, "step": 1515 }, { "epoch": 0.23102712587625723, "grad_norm": 1.046875, "learning_rate": 0.00017954643416111756, "loss": 1.0788, "step": 1516 }, { "epoch": 0.23117951843950016, "grad_norm": 1.2265625, "learning_rate": 0.00017951651392476484, "loss": 0.96, "step": 1517 }, { "epoch": 0.23133191100274306, "grad_norm": 1.0703125, "learning_rate": 0.00017948657431704587, "loss": 1.1087, "step": 1518 }, { "epoch": 0.23148430356598598, "grad_norm": 1.328125, "learning_rate": 0.00017945661534525436, "loss": 1.2039, "step": 1519 }, { "epoch": 0.23163669612922888, "grad_norm": 0.88671875, "learning_rate": 0.00017942663701668875, "loss": 0.9487, "step": 1520 }, { "epoch": 0.2317890886924718, "grad_norm": 1.21875, "learning_rate": 0.00017939663933865218, "loss": 0.7033, "step": 1521 }, { "epoch": 0.23194148125571473, "grad_norm": 0.8359375, "learning_rate": 0.00017936662231845252, "loss": 0.979, "step": 1522 }, { "epoch": 0.23209387381895763, "grad_norm": 1.1015625, "learning_rate": 0.00017933658596340233, "loss": 0.8453, "step": 1523 }, { "epoch": 0.23224626638220056, "grad_norm": 1.0234375, "learning_rate": 0.00017930653028081896, "loss": 0.9452, "step": 1524 }, { "epoch": 0.23239865894544345, "grad_norm": 1.0234375, "learning_rate": 0.00017927645527802434, "loss": 1.1123, "step": 1525 }, { "epoch": 0.23255105150868638, "grad_norm": 0.890625, "learning_rate": 0.0001792463609623452, "loss": 0.9444, "step": 1526 }, { "epoch": 0.23270344407192928, "grad_norm": 0.84375, "learning_rate": 0.00017921624734111292, "loss": 0.8948, "step": 1527 }, { "epoch": 0.2328558366351722, "grad_norm": 0.80859375, "learning_rate": 0.00017918611442166367, "loss": 0.9186, "step": 1528 }, { "epoch": 0.23300822919841513, "grad_norm": 0.7421875, "learning_rate": 0.00017915596221133826, "loss": 0.8643, "step": 1529 }, { "epoch": 0.23316062176165803, "grad_norm": 0.796875, "learning_rate": 0.00017912579071748214, "loss": 1.0141, "step": 1530 }, { "epoch": 0.23331301432490095, "grad_norm": 0.796875, "learning_rate": 0.00017909559994744553, "loss": 0.9966, "step": 1531 }, { "epoch": 0.23346540688814385, "grad_norm": 0.7578125, "learning_rate": 0.00017906538990858337, "loss": 0.9346, "step": 1532 }, { "epoch": 0.23361779945138678, "grad_norm": 0.92578125, "learning_rate": 0.00017903516060825526, "loss": 0.882, "step": 1533 }, { "epoch": 0.23377019201462967, "grad_norm": 1.015625, "learning_rate": 0.00017900491205382547, "loss": 1.0081, "step": 1534 }, { "epoch": 0.2339225845778726, "grad_norm": 0.984375, "learning_rate": 0.000178974644252663, "loss": 0.9634, "step": 1535 }, { "epoch": 0.23407497714111553, "grad_norm": 0.92578125, "learning_rate": 0.0001789443572121415, "loss": 0.9923, "step": 1536 }, { "epoch": 0.23422736970435842, "grad_norm": 0.83984375, "learning_rate": 0.00017891405093963938, "loss": 1.0083, "step": 1537 }, { "epoch": 0.23437976226760135, "grad_norm": 0.80859375, "learning_rate": 0.0001788837254425396, "loss": 1.0065, "step": 1538 }, { "epoch": 0.23453215483084425, "grad_norm": 0.89453125, "learning_rate": 0.00017885338072822998, "loss": 0.9055, "step": 1539 }, { "epoch": 0.23468454739408717, "grad_norm": 0.71484375, "learning_rate": 0.00017882301680410283, "loss": 0.7768, "step": 1540 }, { "epoch": 0.23483693995733007, "grad_norm": 0.984375, "learning_rate": 0.00017879263367755533, "loss": 1.2049, "step": 1541 }, { "epoch": 0.234989332520573, "grad_norm": 0.8828125, "learning_rate": 0.0001787622313559892, "loss": 0.8025, "step": 1542 }, { "epoch": 0.23514172508381592, "grad_norm": 1.1953125, "learning_rate": 0.00017873180984681088, "loss": 0.9629, "step": 1543 }, { "epoch": 0.23529411764705882, "grad_norm": 1.125, "learning_rate": 0.0001787013691574315, "loss": 1.0054, "step": 1544 }, { "epoch": 0.23544651021030175, "grad_norm": 1.1484375, "learning_rate": 0.00017867090929526684, "loss": 0.9629, "step": 1545 }, { "epoch": 0.23559890277354464, "grad_norm": 0.99609375, "learning_rate": 0.0001786404302677374, "loss": 1.0487, "step": 1546 }, { "epoch": 0.23575129533678757, "grad_norm": 0.7734375, "learning_rate": 0.00017860993208226823, "loss": 0.9368, "step": 1547 }, { "epoch": 0.23590368790003047, "grad_norm": 0.875, "learning_rate": 0.0001785794147462892, "loss": 0.8925, "step": 1548 }, { "epoch": 0.2360560804632734, "grad_norm": 1.3046875, "learning_rate": 0.00017854887826723473, "loss": 0.9874, "step": 1549 }, { "epoch": 0.23620847302651632, "grad_norm": 0.984375, "learning_rate": 0.00017851832265254397, "loss": 0.9363, "step": 1550 }, { "epoch": 0.23636086558975922, "grad_norm": 1.2421875, "learning_rate": 0.00017848774790966073, "loss": 1.0128, "step": 1551 }, { "epoch": 0.23651325815300214, "grad_norm": 0.91796875, "learning_rate": 0.0001784571540460334, "loss": 0.9005, "step": 1552 }, { "epoch": 0.23666565071624504, "grad_norm": 1.03125, "learning_rate": 0.0001784265410691151, "loss": 0.9022, "step": 1553 }, { "epoch": 0.23681804327948797, "grad_norm": 1.0859375, "learning_rate": 0.0001783959089863636, "loss": 1.0453, "step": 1554 }, { "epoch": 0.23697043584273086, "grad_norm": 1.3125, "learning_rate": 0.0001783652578052413, "loss": 0.9652, "step": 1555 }, { "epoch": 0.2371228284059738, "grad_norm": 0.8203125, "learning_rate": 0.00017833458753321535, "loss": 1.1179, "step": 1556 }, { "epoch": 0.23727522096921672, "grad_norm": 1.109375, "learning_rate": 0.00017830389817775734, "loss": 1.1262, "step": 1557 }, { "epoch": 0.2374276135324596, "grad_norm": 0.99609375, "learning_rate": 0.00017827318974634375, "loss": 0.9353, "step": 1558 }, { "epoch": 0.23758000609570254, "grad_norm": 1.0390625, "learning_rate": 0.00017824246224645553, "loss": 1.0866, "step": 1559 }, { "epoch": 0.23773239865894544, "grad_norm": 0.97265625, "learning_rate": 0.00017821171568557835, "loss": 0.8879, "step": 1560 }, { "epoch": 0.23788479122218836, "grad_norm": 1.1875, "learning_rate": 0.00017818095007120251, "loss": 1.1616, "step": 1561 }, { "epoch": 0.23803718378543126, "grad_norm": 1.171875, "learning_rate": 0.000178150165410823, "loss": 1.0278, "step": 1562 }, { "epoch": 0.2381895763486742, "grad_norm": 1.109375, "learning_rate": 0.0001781193617119394, "loss": 0.9744, "step": 1563 }, { "epoch": 0.23834196891191708, "grad_norm": 0.92578125, "learning_rate": 0.00017808853898205587, "loss": 1.04, "step": 1564 }, { "epoch": 0.23849436147516, "grad_norm": 0.7890625, "learning_rate": 0.0001780576972286813, "loss": 0.9686, "step": 1565 }, { "epoch": 0.23864675403840294, "grad_norm": 0.91796875, "learning_rate": 0.00017802683645932923, "loss": 0.9047, "step": 1566 }, { "epoch": 0.23879914660164583, "grad_norm": 0.9609375, "learning_rate": 0.00017799595668151774, "loss": 1.0812, "step": 1567 }, { "epoch": 0.23895153916488876, "grad_norm": 0.81640625, "learning_rate": 0.00017796505790276958, "loss": 0.9854, "step": 1568 }, { "epoch": 0.23910393172813166, "grad_norm": 0.80078125, "learning_rate": 0.00017793414013061217, "loss": 0.9088, "step": 1569 }, { "epoch": 0.23925632429137458, "grad_norm": 0.875, "learning_rate": 0.00017790320337257752, "loss": 0.8268, "step": 1570 }, { "epoch": 0.23940871685461748, "grad_norm": 0.90234375, "learning_rate": 0.00017787224763620227, "loss": 0.8564, "step": 1571 }, { "epoch": 0.2395611094178604, "grad_norm": 0.91015625, "learning_rate": 0.00017784127292902767, "loss": 0.9066, "step": 1572 }, { "epoch": 0.23971350198110333, "grad_norm": 1.1484375, "learning_rate": 0.00017781027925859957, "loss": 0.9623, "step": 1573 }, { "epoch": 0.23986589454434623, "grad_norm": 0.828125, "learning_rate": 0.00017777926663246855, "loss": 1.0596, "step": 1574 }, { "epoch": 0.24001828710758916, "grad_norm": 0.9765625, "learning_rate": 0.00017774823505818966, "loss": 1.0284, "step": 1575 }, { "epoch": 0.24017067967083205, "grad_norm": 1.0234375, "learning_rate": 0.00017771718454332271, "loss": 1.0527, "step": 1576 }, { "epoch": 0.24032307223407498, "grad_norm": 1.109375, "learning_rate": 0.00017768611509543197, "loss": 0.906, "step": 1577 }, { "epoch": 0.24047546479731788, "grad_norm": 0.84375, "learning_rate": 0.00017765502672208646, "loss": 0.8957, "step": 1578 }, { "epoch": 0.2406278573605608, "grad_norm": 0.98046875, "learning_rate": 0.00017762391943085975, "loss": 0.9245, "step": 1579 }, { "epoch": 0.24078024992380373, "grad_norm": 0.765625, "learning_rate": 0.00017759279322933003, "loss": 0.9338, "step": 1580 }, { "epoch": 0.24093264248704663, "grad_norm": 0.94140625, "learning_rate": 0.00017756164812508004, "loss": 0.8542, "step": 1581 }, { "epoch": 0.24108503505028955, "grad_norm": 1.1484375, "learning_rate": 0.00017753048412569723, "loss": 1.0801, "step": 1582 }, { "epoch": 0.24123742761353245, "grad_norm": 0.7890625, "learning_rate": 0.00017749930123877355, "loss": 0.8874, "step": 1583 }, { "epoch": 0.24138982017677538, "grad_norm": 0.84375, "learning_rate": 0.0001774680994719057, "loss": 0.956, "step": 1584 }, { "epoch": 0.24154221274001828, "grad_norm": 0.87890625, "learning_rate": 0.00017743687883269474, "loss": 0.8918, "step": 1585 }, { "epoch": 0.2416946053032612, "grad_norm": 1.0703125, "learning_rate": 0.00017740563932874655, "loss": 1.1423, "step": 1586 }, { "epoch": 0.24184699786650413, "grad_norm": 1.1171875, "learning_rate": 0.00017737438096767153, "loss": 1.1879, "step": 1587 }, { "epoch": 0.24199939042974702, "grad_norm": 0.9296875, "learning_rate": 0.0001773431037570846, "loss": 1.0423, "step": 1588 }, { "epoch": 0.24215178299298995, "grad_norm": 0.77734375, "learning_rate": 0.0001773118077046054, "loss": 1.0114, "step": 1589 }, { "epoch": 0.24230417555623285, "grad_norm": 0.9140625, "learning_rate": 0.0001772804928178581, "loss": 1.0791, "step": 1590 }, { "epoch": 0.24245656811947577, "grad_norm": 0.7109375, "learning_rate": 0.00017724915910447142, "loss": 0.8669, "step": 1591 }, { "epoch": 0.24260896068271867, "grad_norm": 0.87109375, "learning_rate": 0.0001772178065720787, "loss": 0.9045, "step": 1592 }, { "epoch": 0.2427613532459616, "grad_norm": 0.921875, "learning_rate": 0.0001771864352283179, "loss": 1.1583, "step": 1593 }, { "epoch": 0.24291374580920452, "grad_norm": 0.96875, "learning_rate": 0.0001771550450808315, "loss": 1.0271, "step": 1594 }, { "epoch": 0.24306613837244742, "grad_norm": 1.078125, "learning_rate": 0.00017712363613726665, "loss": 1.1676, "step": 1595 }, { "epoch": 0.24321853093569035, "grad_norm": 0.9375, "learning_rate": 0.00017709220840527495, "loss": 1.0027, "step": 1596 }, { "epoch": 0.24337092349893324, "grad_norm": 0.98828125, "learning_rate": 0.00017706076189251272, "loss": 0.8389, "step": 1597 }, { "epoch": 0.24352331606217617, "grad_norm": 0.75, "learning_rate": 0.00017702929660664069, "loss": 0.9726, "step": 1598 }, { "epoch": 0.24367570862541907, "grad_norm": 0.9375, "learning_rate": 0.0001769978125553243, "loss": 0.9564, "step": 1599 }, { "epoch": 0.243828101188662, "grad_norm": 0.9453125, "learning_rate": 0.00017696630974623357, "loss": 0.88, "step": 1600 }, { "epoch": 0.24398049375190492, "grad_norm": 0.91015625, "learning_rate": 0.00017693478818704293, "loss": 1.1458, "step": 1601 }, { "epoch": 0.24413288631514782, "grad_norm": 0.83203125, "learning_rate": 0.0001769032478854316, "loss": 0.9292, "step": 1602 }, { "epoch": 0.24428527887839074, "grad_norm": 0.93359375, "learning_rate": 0.00017687168884908316, "loss": 0.9296, "step": 1603 }, { "epoch": 0.24443767144163364, "grad_norm": 0.875, "learning_rate": 0.00017684011108568592, "loss": 1.0308, "step": 1604 }, { "epoch": 0.24459006400487657, "grad_norm": 0.88671875, "learning_rate": 0.00017680851460293262, "loss": 1.0195, "step": 1605 }, { "epoch": 0.24474245656811947, "grad_norm": 0.953125, "learning_rate": 0.0001767768994085206, "loss": 1.0507, "step": 1606 }, { "epoch": 0.2448948491313624, "grad_norm": 0.90625, "learning_rate": 0.00017674526551015186, "loss": 0.9862, "step": 1607 }, { "epoch": 0.24504724169460532, "grad_norm": 0.91015625, "learning_rate": 0.0001767136129155328, "loss": 0.912, "step": 1608 }, { "epoch": 0.24519963425784821, "grad_norm": 0.96484375, "learning_rate": 0.0001766819416323745, "loss": 0.9925, "step": 1609 }, { "epoch": 0.24535202682109114, "grad_norm": 0.82421875, "learning_rate": 0.00017665025166839246, "loss": 0.9867, "step": 1610 }, { "epoch": 0.24550441938433404, "grad_norm": 0.6796875, "learning_rate": 0.00017661854303130693, "loss": 0.8582, "step": 1611 }, { "epoch": 0.24565681194757696, "grad_norm": 0.91015625, "learning_rate": 0.00017658681572884247, "loss": 0.8302, "step": 1612 }, { "epoch": 0.24580920451081986, "grad_norm": 1.0234375, "learning_rate": 0.00017655506976872837, "loss": 0.9101, "step": 1613 }, { "epoch": 0.2459615970740628, "grad_norm": 0.72265625, "learning_rate": 0.0001765233051586984, "loss": 0.8911, "step": 1614 }, { "epoch": 0.24611398963730569, "grad_norm": 1.0859375, "learning_rate": 0.00017649152190649087, "loss": 1.0264, "step": 1615 }, { "epoch": 0.2462663822005486, "grad_norm": 0.984375, "learning_rate": 0.00017645972001984866, "loss": 0.8498, "step": 1616 }, { "epoch": 0.24641877476379154, "grad_norm": 0.796875, "learning_rate": 0.0001764278995065191, "loss": 0.7893, "step": 1617 }, { "epoch": 0.24657116732703444, "grad_norm": 1.0859375, "learning_rate": 0.00017639606037425418, "loss": 1.1634, "step": 1618 }, { "epoch": 0.24672355989027736, "grad_norm": 0.80078125, "learning_rate": 0.00017636420263081036, "loss": 0.9194, "step": 1619 }, { "epoch": 0.24687595245352026, "grad_norm": 1.1171875, "learning_rate": 0.00017633232628394864, "loss": 0.8371, "step": 1620 }, { "epoch": 0.24702834501676318, "grad_norm": 0.97265625, "learning_rate": 0.00017630043134143458, "loss": 1.0084, "step": 1621 }, { "epoch": 0.24718073758000608, "grad_norm": 1.046875, "learning_rate": 0.0001762685178110382, "loss": 0.8587, "step": 1622 }, { "epoch": 0.247333130143249, "grad_norm": 1.03125, "learning_rate": 0.00017623658570053413, "loss": 0.9402, "step": 1623 }, { "epoch": 0.24748552270649193, "grad_norm": 0.74609375, "learning_rate": 0.0001762046350177015, "loss": 0.912, "step": 1624 }, { "epoch": 0.24763791526973483, "grad_norm": 0.8046875, "learning_rate": 0.0001761726657703239, "loss": 0.8737, "step": 1625 }, { "epoch": 0.24779030783297776, "grad_norm": 0.8359375, "learning_rate": 0.00017614067796618957, "loss": 1.0421, "step": 1626 }, { "epoch": 0.24794270039622066, "grad_norm": 0.87109375, "learning_rate": 0.00017610867161309114, "loss": 1.1541, "step": 1627 }, { "epoch": 0.24809509295946358, "grad_norm": 0.7890625, "learning_rate": 0.00017607664671882584, "loss": 0.9425, "step": 1628 }, { "epoch": 0.24824748552270648, "grad_norm": 0.90625, "learning_rate": 0.0001760446032911954, "loss": 0.9614, "step": 1629 }, { "epoch": 0.2483998780859494, "grad_norm": 0.921875, "learning_rate": 0.00017601254133800607, "loss": 0.9113, "step": 1630 }, { "epoch": 0.24855227064919233, "grad_norm": 1.7890625, "learning_rate": 0.00017598046086706858, "loss": 0.9731, "step": 1631 }, { "epoch": 0.24870466321243523, "grad_norm": 0.9921875, "learning_rate": 0.00017594836188619822, "loss": 0.9555, "step": 1632 }, { "epoch": 0.24885705577567815, "grad_norm": 1.0625, "learning_rate": 0.00017591624440321474, "loss": 0.9769, "step": 1633 }, { "epoch": 0.24900944833892105, "grad_norm": 1.1484375, "learning_rate": 0.00017588410842594242, "loss": 1.0964, "step": 1634 }, { "epoch": 0.24916184090216398, "grad_norm": 1.0390625, "learning_rate": 0.00017585195396221007, "loss": 0.9775, "step": 1635 }, { "epoch": 0.24931423346540688, "grad_norm": 0.9296875, "learning_rate": 0.00017581978101985092, "loss": 1.1604, "step": 1636 }, { "epoch": 0.2494666260286498, "grad_norm": 0.859375, "learning_rate": 0.00017578758960670285, "loss": 0.896, "step": 1637 }, { "epoch": 0.24961901859189273, "grad_norm": 0.76171875, "learning_rate": 0.00017575537973060808, "loss": 0.9939, "step": 1638 }, { "epoch": 0.24977141115513563, "grad_norm": 1.0, "learning_rate": 0.00017572315139941343, "loss": 1.0054, "step": 1639 }, { "epoch": 0.24992380371837855, "grad_norm": 2.453125, "learning_rate": 0.0001756909046209702, "loss": 0.9819, "step": 1640 }, { "epoch": 0.2500761962816215, "grad_norm": 0.95703125, "learning_rate": 0.00017565863940313415, "loss": 0.9971, "step": 1641 }, { "epoch": 0.2502285888448644, "grad_norm": 0.859375, "learning_rate": 0.00017562635575376555, "loss": 0.9418, "step": 1642 }, { "epoch": 0.2503809814081073, "grad_norm": 1.1171875, "learning_rate": 0.00017559405368072913, "loss": 1.1021, "step": 1643 }, { "epoch": 0.25053337397135017, "grad_norm": 0.9921875, "learning_rate": 0.00017556173319189418, "loss": 1.019, "step": 1644 }, { "epoch": 0.2506857665345931, "grad_norm": 0.7890625, "learning_rate": 0.00017552939429513447, "loss": 0.9083, "step": 1645 }, { "epoch": 0.250838159097836, "grad_norm": 0.93359375, "learning_rate": 0.0001754970369983281, "loss": 0.9524, "step": 1646 }, { "epoch": 0.2509905516610789, "grad_norm": 1.15625, "learning_rate": 0.00017546466130935793, "loss": 1.0573, "step": 1647 }, { "epoch": 0.2511429442243219, "grad_norm": 1.0, "learning_rate": 0.000175432267236111, "loss": 1.1177, "step": 1648 }, { "epoch": 0.25129533678756477, "grad_norm": 0.83984375, "learning_rate": 0.00017539985478647909, "loss": 0.851, "step": 1649 }, { "epoch": 0.25144772935080767, "grad_norm": 0.80078125, "learning_rate": 0.00017536742396835825, "loss": 1.0222, "step": 1650 }, { "epoch": 0.25160012191405057, "grad_norm": 0.83984375, "learning_rate": 0.00017533497478964914, "loss": 0.87, "step": 1651 }, { "epoch": 0.2517525144772935, "grad_norm": 0.80078125, "learning_rate": 0.00017530250725825687, "loss": 0.8178, "step": 1652 }, { "epoch": 0.2519049070405364, "grad_norm": 0.8515625, "learning_rate": 0.0001752700213820909, "loss": 0.9789, "step": 1653 }, { "epoch": 0.2520572996037793, "grad_norm": 0.80859375, "learning_rate": 0.00017523751716906535, "loss": 0.9663, "step": 1654 }, { "epoch": 0.25220969216702227, "grad_norm": 0.8984375, "learning_rate": 0.00017520499462709866, "loss": 0.9605, "step": 1655 }, { "epoch": 0.25236208473026517, "grad_norm": 1.453125, "learning_rate": 0.00017517245376411382, "loss": 1.0017, "step": 1656 }, { "epoch": 0.25251447729350807, "grad_norm": 0.84375, "learning_rate": 0.00017513989458803827, "loss": 0.9273, "step": 1657 }, { "epoch": 0.25266686985675096, "grad_norm": 1.0625, "learning_rate": 0.00017510731710680384, "loss": 1.1105, "step": 1658 }, { "epoch": 0.2528192624199939, "grad_norm": 1.0, "learning_rate": 0.00017507472132834693, "loss": 1.0021, "step": 1659 }, { "epoch": 0.2529716549832368, "grad_norm": 0.890625, "learning_rate": 0.00017504210726060828, "loss": 1.2184, "step": 1660 }, { "epoch": 0.2531240475464797, "grad_norm": 0.71875, "learning_rate": 0.00017500947491153317, "loss": 0.9095, "step": 1661 }, { "epoch": 0.25327644010972267, "grad_norm": 1.109375, "learning_rate": 0.00017497682428907135, "loss": 1.024, "step": 1662 }, { "epoch": 0.25342883267296556, "grad_norm": 1.03125, "learning_rate": 0.0001749441554011769, "loss": 1.1043, "step": 1663 }, { "epoch": 0.25358122523620846, "grad_norm": 0.77734375, "learning_rate": 0.00017491146825580854, "loss": 0.8228, "step": 1664 }, { "epoch": 0.25373361779945136, "grad_norm": 0.890625, "learning_rate": 0.00017487876286092919, "loss": 0.9716, "step": 1665 }, { "epoch": 0.2538860103626943, "grad_norm": 0.87109375, "learning_rate": 0.0001748460392245065, "loss": 0.9718, "step": 1666 }, { "epoch": 0.2540384029259372, "grad_norm": 0.859375, "learning_rate": 0.00017481329735451233, "loss": 1.0192, "step": 1667 }, { "epoch": 0.2541907954891801, "grad_norm": 0.953125, "learning_rate": 0.00017478053725892306, "loss": 0.9965, "step": 1668 }, { "epoch": 0.25434318805242306, "grad_norm": 0.96484375, "learning_rate": 0.0001747477589457196, "loss": 1.0117, "step": 1669 }, { "epoch": 0.25449558061566596, "grad_norm": 0.80078125, "learning_rate": 0.00017471496242288712, "loss": 0.8184, "step": 1670 }, { "epoch": 0.25464797317890886, "grad_norm": 0.84765625, "learning_rate": 0.0001746821476984154, "loss": 0.9865, "step": 1671 }, { "epoch": 0.25480036574215176, "grad_norm": 0.7734375, "learning_rate": 0.00017464931478029856, "loss": 0.8635, "step": 1672 }, { "epoch": 0.2549527583053947, "grad_norm": 1.0859375, "learning_rate": 0.00017461646367653512, "loss": 1.1878, "step": 1673 }, { "epoch": 0.2551051508686376, "grad_norm": 1.203125, "learning_rate": 0.00017458359439512816, "loss": 0.949, "step": 1674 }, { "epoch": 0.2552575434318805, "grad_norm": 0.94921875, "learning_rate": 0.00017455070694408507, "loss": 1.0258, "step": 1675 }, { "epoch": 0.25540993599512346, "grad_norm": 1.1796875, "learning_rate": 0.0001745178013314177, "loss": 0.9815, "step": 1676 }, { "epoch": 0.25556232855836636, "grad_norm": 0.93359375, "learning_rate": 0.00017448487756514236, "loss": 1.0531, "step": 1677 }, { "epoch": 0.25571472112160926, "grad_norm": 0.80078125, "learning_rate": 0.0001744519356532797, "loss": 0.9735, "step": 1678 }, { "epoch": 0.25586711368485215, "grad_norm": 0.8515625, "learning_rate": 0.00017441897560385491, "loss": 0.8354, "step": 1679 }, { "epoch": 0.2560195062480951, "grad_norm": 1.078125, "learning_rate": 0.0001743859974248975, "loss": 1.0019, "step": 1680 }, { "epoch": 0.256171898811338, "grad_norm": 1.046875, "learning_rate": 0.00017435300112444142, "loss": 0.8151, "step": 1681 }, { "epoch": 0.2563242913745809, "grad_norm": 0.91796875, "learning_rate": 0.000174319986710525, "loss": 1.0151, "step": 1682 }, { "epoch": 0.25647668393782386, "grad_norm": 1.0234375, "learning_rate": 0.00017428695419119115, "loss": 1.1164, "step": 1683 }, { "epoch": 0.25662907650106676, "grad_norm": 1.546875, "learning_rate": 0.00017425390357448696, "loss": 1.1243, "step": 1684 }, { "epoch": 0.25678146906430965, "grad_norm": 0.8515625, "learning_rate": 0.00017422083486846404, "loss": 1.0916, "step": 1685 }, { "epoch": 0.25693386162755255, "grad_norm": 0.8125, "learning_rate": 0.00017418774808117848, "loss": 1.0128, "step": 1686 }, { "epoch": 0.2570862541907955, "grad_norm": 1.0, "learning_rate": 0.00017415464322069065, "loss": 0.9906, "step": 1687 }, { "epoch": 0.2572386467540384, "grad_norm": 0.8984375, "learning_rate": 0.00017412152029506534, "loss": 1.1312, "step": 1688 }, { "epoch": 0.2573910393172813, "grad_norm": 0.671875, "learning_rate": 0.0001740883793123718, "loss": 0.9646, "step": 1689 }, { "epoch": 0.25754343188052425, "grad_norm": 0.96875, "learning_rate": 0.0001740552202806837, "loss": 1.003, "step": 1690 }, { "epoch": 0.25769582444376715, "grad_norm": 1.0, "learning_rate": 0.00017402204320807893, "loss": 0.9694, "step": 1691 }, { "epoch": 0.25784821700701005, "grad_norm": 1.09375, "learning_rate": 0.00017398884810264, "loss": 1.0183, "step": 1692 }, { "epoch": 0.25800060957025295, "grad_norm": 0.74609375, "learning_rate": 0.00017395563497245374, "loss": 0.8477, "step": 1693 }, { "epoch": 0.2581530021334959, "grad_norm": 0.8671875, "learning_rate": 0.00017392240382561124, "loss": 0.9458, "step": 1694 }, { "epoch": 0.2583053946967388, "grad_norm": 0.96875, "learning_rate": 0.0001738891546702082, "loss": 0.7956, "step": 1695 }, { "epoch": 0.2584577872599817, "grad_norm": 1.2578125, "learning_rate": 0.00017385588751434448, "loss": 0.8941, "step": 1696 }, { "epoch": 0.25861017982322465, "grad_norm": 0.8984375, "learning_rate": 0.00017382260236612453, "loss": 1.1041, "step": 1697 }, { "epoch": 0.25876257238646755, "grad_norm": 1.140625, "learning_rate": 0.00017378929923365704, "loss": 1.0371, "step": 1698 }, { "epoch": 0.25891496494971045, "grad_norm": 1.265625, "learning_rate": 0.00017375597812505515, "loss": 1.035, "step": 1699 }, { "epoch": 0.25906735751295334, "grad_norm": 1.3125, "learning_rate": 0.00017372263904843636, "loss": 1.1454, "step": 1700 }, { "epoch": 0.2592197500761963, "grad_norm": 1.15625, "learning_rate": 0.00017368928201192256, "loss": 0.83, "step": 1701 }, { "epoch": 0.2593721426394392, "grad_norm": 0.80859375, "learning_rate": 0.00017365590702364, "loss": 0.7341, "step": 1702 }, { "epoch": 0.2595245352026821, "grad_norm": 0.90625, "learning_rate": 0.00017362251409171927, "loss": 1.1569, "step": 1703 }, { "epoch": 0.25967692776592505, "grad_norm": 0.8515625, "learning_rate": 0.0001735891032242954, "loss": 0.8649, "step": 1704 }, { "epoch": 0.25982932032916795, "grad_norm": 0.84375, "learning_rate": 0.00017355567442950775, "loss": 0.9246, "step": 1705 }, { "epoch": 0.25998171289241084, "grad_norm": 1.0546875, "learning_rate": 0.00017352222771550008, "loss": 1.0831, "step": 1706 }, { "epoch": 0.26013410545565374, "grad_norm": 0.80859375, "learning_rate": 0.00017348876309042046, "loss": 0.8186, "step": 1707 }, { "epoch": 0.2602864980188967, "grad_norm": 1.0625, "learning_rate": 0.00017345528056242134, "loss": 1.0597, "step": 1708 }, { "epoch": 0.2604388905821396, "grad_norm": 1.09375, "learning_rate": 0.00017342178013965962, "loss": 1.1535, "step": 1709 }, { "epoch": 0.2605912831453825, "grad_norm": 0.8515625, "learning_rate": 0.0001733882618302964, "loss": 1.105, "step": 1710 }, { "epoch": 0.26074367570862544, "grad_norm": 0.84375, "learning_rate": 0.00017335472564249726, "loss": 0.7851, "step": 1711 }, { "epoch": 0.26089606827186834, "grad_norm": 0.859375, "learning_rate": 0.00017332117158443212, "loss": 0.8551, "step": 1712 }, { "epoch": 0.26104846083511124, "grad_norm": 0.81640625, "learning_rate": 0.00017328759966427515, "loss": 1.0036, "step": 1713 }, { "epoch": 0.26120085339835414, "grad_norm": 0.90234375, "learning_rate": 0.00017325400989020505, "loss": 1.0516, "step": 1714 }, { "epoch": 0.2613532459615971, "grad_norm": 0.90625, "learning_rate": 0.0001732204022704047, "loss": 1.132, "step": 1715 }, { "epoch": 0.26150563852484, "grad_norm": 1.015625, "learning_rate": 0.00017318677681306145, "loss": 1.1818, "step": 1716 }, { "epoch": 0.2616580310880829, "grad_norm": 0.859375, "learning_rate": 0.0001731531335263669, "loss": 1.1359, "step": 1717 }, { "epoch": 0.26181042365132584, "grad_norm": 0.96484375, "learning_rate": 0.0001731194724185171, "loss": 0.9953, "step": 1718 }, { "epoch": 0.26196281621456874, "grad_norm": 1.015625, "learning_rate": 0.00017308579349771232, "loss": 1.1109, "step": 1719 }, { "epoch": 0.26211520877781164, "grad_norm": 0.89453125, "learning_rate": 0.00017305209677215725, "loss": 0.9095, "step": 1720 }, { "epoch": 0.26226760134105453, "grad_norm": 0.98046875, "learning_rate": 0.00017301838225006088, "loss": 1.0344, "step": 1721 }, { "epoch": 0.2624199939042975, "grad_norm": 0.99609375, "learning_rate": 0.00017298464993963658, "loss": 0.9752, "step": 1722 }, { "epoch": 0.2625723864675404, "grad_norm": 1.0625, "learning_rate": 0.00017295089984910205, "loss": 0.9109, "step": 1723 }, { "epoch": 0.2627247790307833, "grad_norm": 0.859375, "learning_rate": 0.00017291713198667923, "loss": 0.9614, "step": 1724 }, { "epoch": 0.26287717159402624, "grad_norm": 1.1171875, "learning_rate": 0.0001728833463605945, "loss": 1.0007, "step": 1725 }, { "epoch": 0.26302956415726914, "grad_norm": 1.078125, "learning_rate": 0.00017284954297907854, "loss": 1.1386, "step": 1726 }, { "epoch": 0.26318195672051203, "grad_norm": 0.86328125, "learning_rate": 0.00017281572185036628, "loss": 1.1342, "step": 1727 }, { "epoch": 0.26333434928375493, "grad_norm": 0.80078125, "learning_rate": 0.00017278188298269708, "loss": 0.8078, "step": 1728 }, { "epoch": 0.2634867418469979, "grad_norm": 1.125, "learning_rate": 0.00017274802638431457, "loss": 1.0946, "step": 1729 }, { "epoch": 0.2636391344102408, "grad_norm": 0.87890625, "learning_rate": 0.00017271415206346666, "loss": 0.8825, "step": 1730 }, { "epoch": 0.2637915269734837, "grad_norm": 1.171875, "learning_rate": 0.0001726802600284057, "loss": 0.9463, "step": 1731 }, { "epoch": 0.26394391953672663, "grad_norm": 0.69921875, "learning_rate": 0.00017264635028738822, "loss": 0.856, "step": 1732 }, { "epoch": 0.26409631209996953, "grad_norm": 0.98046875, "learning_rate": 0.00017261242284867515, "loss": 0.845, "step": 1733 }, { "epoch": 0.26424870466321243, "grad_norm": 0.96484375, "learning_rate": 0.00017257847772053167, "loss": 1.0244, "step": 1734 }, { "epoch": 0.26440109722645533, "grad_norm": 0.796875, "learning_rate": 0.00017254451491122735, "loss": 0.9258, "step": 1735 }, { "epoch": 0.2645534897896983, "grad_norm": 0.96484375, "learning_rate": 0.00017251053442903595, "loss": 1.0125, "step": 1736 }, { "epoch": 0.2647058823529412, "grad_norm": 1.0078125, "learning_rate": 0.00017247653628223568, "loss": 0.8394, "step": 1737 }, { "epoch": 0.2648582749161841, "grad_norm": 0.703125, "learning_rate": 0.00017244252047910892, "loss": 0.8561, "step": 1738 }, { "epoch": 0.26501066747942703, "grad_norm": 0.96875, "learning_rate": 0.00017240848702794242, "loss": 0.924, "step": 1739 }, { "epoch": 0.26516306004266993, "grad_norm": 0.8515625, "learning_rate": 0.0001723744359370272, "loss": 0.881, "step": 1740 }, { "epoch": 0.2653154526059128, "grad_norm": 1.125, "learning_rate": 0.00017234036721465868, "loss": 0.924, "step": 1741 }, { "epoch": 0.2654678451691557, "grad_norm": 0.88671875, "learning_rate": 0.00017230628086913643, "loss": 1.0039, "step": 1742 }, { "epoch": 0.2656202377323987, "grad_norm": 0.98828125, "learning_rate": 0.00017227217690876434, "loss": 0.8805, "step": 1743 }, { "epoch": 0.2657726302956416, "grad_norm": 0.9375, "learning_rate": 0.0001722380553418507, "loss": 0.9445, "step": 1744 }, { "epoch": 0.2659250228588845, "grad_norm": 1.03125, "learning_rate": 0.00017220391617670794, "loss": 1.153, "step": 1745 }, { "epoch": 0.26607741542212743, "grad_norm": 1.046875, "learning_rate": 0.00017216975942165293, "loss": 0.9761, "step": 1746 }, { "epoch": 0.2662298079853703, "grad_norm": 0.94921875, "learning_rate": 0.00017213558508500667, "loss": 1.0138, "step": 1747 }, { "epoch": 0.2663822005486132, "grad_norm": 0.8984375, "learning_rate": 0.00017210139317509456, "loss": 0.9009, "step": 1748 }, { "epoch": 0.2665345931118561, "grad_norm": 0.84765625, "learning_rate": 0.00017206718370024624, "loss": 0.9582, "step": 1749 }, { "epoch": 0.2666869856750991, "grad_norm": 0.97265625, "learning_rate": 0.00017203295666879564, "loss": 0.9337, "step": 1750 }, { "epoch": 0.266839378238342, "grad_norm": 0.78125, "learning_rate": 0.00017199871208908093, "loss": 0.9987, "step": 1751 }, { "epoch": 0.26699177080158487, "grad_norm": 0.87109375, "learning_rate": 0.00017196444996944458, "loss": 0.9423, "step": 1752 }, { "epoch": 0.26714416336482777, "grad_norm": 1.0703125, "learning_rate": 0.0001719301703182334, "loss": 1.0518, "step": 1753 }, { "epoch": 0.2672965559280707, "grad_norm": 0.99609375, "learning_rate": 0.0001718958731437983, "loss": 0.9567, "step": 1754 }, { "epoch": 0.2674489484913136, "grad_norm": 0.83203125, "learning_rate": 0.00017186155845449466, "loss": 0.7597, "step": 1755 }, { "epoch": 0.2676013410545565, "grad_norm": 0.8984375, "learning_rate": 0.00017182722625868198, "loss": 0.8712, "step": 1756 }, { "epoch": 0.26775373361779947, "grad_norm": 0.77734375, "learning_rate": 0.00017179287656472406, "loss": 1.025, "step": 1757 }, { "epoch": 0.26790612618104237, "grad_norm": 0.7265625, "learning_rate": 0.00017175850938098904, "loss": 0.9549, "step": 1758 }, { "epoch": 0.26805851874428527, "grad_norm": 0.984375, "learning_rate": 0.0001717241247158492, "loss": 1.12, "step": 1759 }, { "epoch": 0.26821091130752817, "grad_norm": 0.765625, "learning_rate": 0.00017168972257768122, "loss": 1.0141, "step": 1760 }, { "epoch": 0.2683633038707711, "grad_norm": 0.93359375, "learning_rate": 0.00017165530297486584, "loss": 0.9497, "step": 1761 }, { "epoch": 0.268515696434014, "grad_norm": 0.8828125, "learning_rate": 0.00017162086591578828, "loss": 0.9396, "step": 1762 }, { "epoch": 0.2686680889972569, "grad_norm": 1.0234375, "learning_rate": 0.00017158641140883784, "loss": 1.0621, "step": 1763 }, { "epoch": 0.26882048156049987, "grad_norm": 1.09375, "learning_rate": 0.00017155193946240816, "loss": 0.9947, "step": 1764 }, { "epoch": 0.26897287412374277, "grad_norm": 1.1015625, "learning_rate": 0.00017151745008489708, "loss": 1.0051, "step": 1765 }, { "epoch": 0.26912526668698566, "grad_norm": 0.92578125, "learning_rate": 0.0001714829432847067, "loss": 1.0302, "step": 1766 }, { "epoch": 0.26927765925022856, "grad_norm": 0.70703125, "learning_rate": 0.00017144841907024345, "loss": 0.8841, "step": 1767 }, { "epoch": 0.2694300518134715, "grad_norm": 1.078125, "learning_rate": 0.00017141387744991783, "loss": 1.0695, "step": 1768 }, { "epoch": 0.2695824443767144, "grad_norm": 0.640625, "learning_rate": 0.0001713793184321447, "loss": 0.8797, "step": 1769 }, { "epoch": 0.2697348369399573, "grad_norm": 0.76953125, "learning_rate": 0.00017134474202534317, "loss": 0.955, "step": 1770 }, { "epoch": 0.26988722950320027, "grad_norm": 1.0078125, "learning_rate": 0.00017131014823793653, "loss": 1.109, "step": 1771 }, { "epoch": 0.27003962206644316, "grad_norm": 0.90625, "learning_rate": 0.0001712755370783523, "loss": 0.9479, "step": 1772 }, { "epoch": 0.27019201462968606, "grad_norm": 0.75, "learning_rate": 0.00017124090855502228, "loss": 1.0118, "step": 1773 }, { "epoch": 0.27034440719292896, "grad_norm": 0.77734375, "learning_rate": 0.0001712062626763825, "loss": 1.0276, "step": 1774 }, { "epoch": 0.2704967997561719, "grad_norm": 0.9140625, "learning_rate": 0.00017117159945087313, "loss": 0.9108, "step": 1775 }, { "epoch": 0.2706491923194148, "grad_norm": 0.98828125, "learning_rate": 0.0001711369188869387, "loss": 0.9012, "step": 1776 }, { "epoch": 0.2708015848826577, "grad_norm": 0.90625, "learning_rate": 0.00017110222099302785, "loss": 0.9801, "step": 1777 }, { "epoch": 0.27095397744590066, "grad_norm": 1.1875, "learning_rate": 0.0001710675057775935, "loss": 0.9498, "step": 1778 }, { "epoch": 0.27110637000914356, "grad_norm": 1.171875, "learning_rate": 0.00017103277324909274, "loss": 0.9362, "step": 1779 }, { "epoch": 0.27125876257238646, "grad_norm": 1.2734375, "learning_rate": 0.00017099802341598698, "loss": 0.8974, "step": 1780 }, { "epoch": 0.27141115513562936, "grad_norm": 0.9921875, "learning_rate": 0.00017096325628674174, "loss": 0.8366, "step": 1781 }, { "epoch": 0.2715635476988723, "grad_norm": 0.8359375, "learning_rate": 0.00017092847186982678, "loss": 0.8378, "step": 1782 }, { "epoch": 0.2717159402621152, "grad_norm": 1.140625, "learning_rate": 0.00017089367017371614, "loss": 0.9529, "step": 1783 }, { "epoch": 0.2718683328253581, "grad_norm": 0.7734375, "learning_rate": 0.00017085885120688796, "loss": 0.9198, "step": 1784 }, { "epoch": 0.27202072538860106, "grad_norm": 0.95703125, "learning_rate": 0.00017082401497782467, "loss": 0.911, "step": 1785 }, { "epoch": 0.27217311795184396, "grad_norm": 0.71484375, "learning_rate": 0.00017078916149501283, "loss": 0.895, "step": 1786 }, { "epoch": 0.27232551051508685, "grad_norm": 0.83984375, "learning_rate": 0.0001707542907669433, "loss": 1.0, "step": 1787 }, { "epoch": 0.27247790307832975, "grad_norm": 1.0546875, "learning_rate": 0.00017071940280211106, "loss": 0.8715, "step": 1788 }, { "epoch": 0.2726302956415727, "grad_norm": 0.859375, "learning_rate": 0.00017068449760901537, "loss": 0.8875, "step": 1789 }, { "epoch": 0.2727826882048156, "grad_norm": 0.75, "learning_rate": 0.0001706495751961596, "loss": 0.9023, "step": 1790 }, { "epoch": 0.2729350807680585, "grad_norm": 0.9609375, "learning_rate": 0.00017061463557205132, "loss": 1.1746, "step": 1791 }, { "epoch": 0.27308747333130146, "grad_norm": 0.875, "learning_rate": 0.00017057967874520234, "loss": 0.8575, "step": 1792 }, { "epoch": 0.27323986589454435, "grad_norm": 1.15625, "learning_rate": 0.00017054470472412873, "loss": 1.1059, "step": 1793 }, { "epoch": 0.27339225845778725, "grad_norm": 1.1484375, "learning_rate": 0.00017050971351735057, "loss": 1.0152, "step": 1794 }, { "epoch": 0.27354465102103015, "grad_norm": 0.90625, "learning_rate": 0.00017047470513339224, "loss": 0.9222, "step": 1795 }, { "epoch": 0.2736970435842731, "grad_norm": 1.125, "learning_rate": 0.00017043967958078229, "loss": 0.9303, "step": 1796 }, { "epoch": 0.273849436147516, "grad_norm": 0.8125, "learning_rate": 0.00017040463686805347, "loss": 0.9972, "step": 1797 }, { "epoch": 0.2740018287107589, "grad_norm": 0.921875, "learning_rate": 0.00017036957700374266, "loss": 0.9347, "step": 1798 }, { "epoch": 0.27415422127400185, "grad_norm": 0.8046875, "learning_rate": 0.00017033449999639096, "loss": 1.0211, "step": 1799 }, { "epoch": 0.27430661383724475, "grad_norm": 1.0859375, "learning_rate": 0.00017029940585454363, "loss": 0.9329, "step": 1800 }, { "epoch": 0.27445900640048765, "grad_norm": 0.95703125, "learning_rate": 0.00017026429458675012, "loss": 1.1189, "step": 1801 }, { "epoch": 0.27461139896373055, "grad_norm": 0.78515625, "learning_rate": 0.000170229166201564, "loss": 1.0574, "step": 1802 }, { "epoch": 0.2747637915269735, "grad_norm": 1.0625, "learning_rate": 0.0001701940207075431, "loss": 1.0106, "step": 1803 }, { "epoch": 0.2749161840902164, "grad_norm": 0.81640625, "learning_rate": 0.00017015885811324936, "loss": 0.9786, "step": 1804 }, { "epoch": 0.2750685766534593, "grad_norm": 0.890625, "learning_rate": 0.00017012367842724887, "loss": 0.9726, "step": 1805 }, { "epoch": 0.27522096921670225, "grad_norm": 0.90625, "learning_rate": 0.0001700884816581119, "loss": 1.0195, "step": 1806 }, { "epoch": 0.27537336177994515, "grad_norm": 0.8515625, "learning_rate": 0.00017005326781441296, "loss": 0.9355, "step": 1807 }, { "epoch": 0.27552575434318805, "grad_norm": 0.91015625, "learning_rate": 0.00017001803690473054, "loss": 0.7751, "step": 1808 }, { "epoch": 0.27567814690643094, "grad_norm": 0.953125, "learning_rate": 0.00016998278893764747, "loss": 1.1155, "step": 1809 }, { "epoch": 0.2758305394696739, "grad_norm": 0.87109375, "learning_rate": 0.00016994752392175067, "loss": 1.0138, "step": 1810 }, { "epoch": 0.2759829320329168, "grad_norm": 0.90625, "learning_rate": 0.00016991224186563116, "loss": 0.9881, "step": 1811 }, { "epoch": 0.2761353245961597, "grad_norm": 1.1328125, "learning_rate": 0.00016987694277788417, "loss": 1.0742, "step": 1812 }, { "epoch": 0.27628771715940265, "grad_norm": 0.984375, "learning_rate": 0.00016984162666710908, "loss": 0.9187, "step": 1813 }, { "epoch": 0.27644010972264554, "grad_norm": 0.80078125, "learning_rate": 0.0001698062935419094, "loss": 0.8582, "step": 1814 }, { "epoch": 0.27659250228588844, "grad_norm": 0.83203125, "learning_rate": 0.00016977094341089278, "loss": 0.9881, "step": 1815 }, { "epoch": 0.27674489484913134, "grad_norm": 1.1875, "learning_rate": 0.00016973557628267098, "loss": 1.1156, "step": 1816 }, { "epoch": 0.2768972874123743, "grad_norm": 0.953125, "learning_rate": 0.00016970019216586003, "loss": 0.9695, "step": 1817 }, { "epoch": 0.2770496799756172, "grad_norm": 0.84375, "learning_rate": 0.00016966479106907993, "loss": 0.9509, "step": 1818 }, { "epoch": 0.2772020725388601, "grad_norm": 0.92578125, "learning_rate": 0.00016962937300095494, "loss": 0.917, "step": 1819 }, { "epoch": 0.27735446510210304, "grad_norm": 1.1953125, "learning_rate": 0.0001695939379701134, "loss": 1.1265, "step": 1820 }, { "epoch": 0.27750685766534594, "grad_norm": 0.94921875, "learning_rate": 0.0001695584859851878, "loss": 0.8972, "step": 1821 }, { "epoch": 0.27765925022858884, "grad_norm": 1.109375, "learning_rate": 0.00016952301705481474, "loss": 1.0149, "step": 1822 }, { "epoch": 0.27781164279183174, "grad_norm": 1.0, "learning_rate": 0.00016948753118763493, "loss": 1.0568, "step": 1823 }, { "epoch": 0.2779640353550747, "grad_norm": 1.0390625, "learning_rate": 0.00016945202839229332, "loss": 0.8553, "step": 1824 }, { "epoch": 0.2781164279183176, "grad_norm": 0.9375, "learning_rate": 0.00016941650867743885, "loss": 1.0906, "step": 1825 }, { "epoch": 0.2782688204815605, "grad_norm": 0.98828125, "learning_rate": 0.00016938097205172463, "loss": 0.9797, "step": 1826 }, { "epoch": 0.27842121304480344, "grad_norm": 0.9609375, "learning_rate": 0.0001693454185238079, "loss": 1.0004, "step": 1827 }, { "epoch": 0.27857360560804634, "grad_norm": 0.91796875, "learning_rate": 0.00016930984810235001, "loss": 0.8532, "step": 1828 }, { "epoch": 0.27872599817128924, "grad_norm": 1.1015625, "learning_rate": 0.00016927426079601642, "loss": 1.2421, "step": 1829 }, { "epoch": 0.27887839073453213, "grad_norm": 1.03125, "learning_rate": 0.00016923865661347672, "loss": 0.9807, "step": 1830 }, { "epoch": 0.2790307832977751, "grad_norm": 1.125, "learning_rate": 0.0001692030355634046, "loss": 0.8837, "step": 1831 }, { "epoch": 0.279183175861018, "grad_norm": 0.93359375, "learning_rate": 0.0001691673976544779, "loss": 1.0841, "step": 1832 }, { "epoch": 0.2793355684242609, "grad_norm": 1.203125, "learning_rate": 0.00016913174289537845, "loss": 1.1572, "step": 1833 }, { "epoch": 0.27948796098750384, "grad_norm": 0.94140625, "learning_rate": 0.0001690960712947923, "loss": 0.9817, "step": 1834 }, { "epoch": 0.27964035355074673, "grad_norm": 0.94921875, "learning_rate": 0.00016906038286140958, "loss": 0.9223, "step": 1835 }, { "epoch": 0.27979274611398963, "grad_norm": 1.1875, "learning_rate": 0.00016902467760392445, "loss": 0.9898, "step": 1836 }, { "epoch": 0.27994513867723253, "grad_norm": 0.7421875, "learning_rate": 0.0001689889555310353, "loss": 0.8672, "step": 1837 }, { "epoch": 0.2800975312404755, "grad_norm": 0.8125, "learning_rate": 0.0001689532166514445, "loss": 0.8578, "step": 1838 }, { "epoch": 0.2802499238037184, "grad_norm": 1.0078125, "learning_rate": 0.00016891746097385854, "loss": 1.1878, "step": 1839 }, { "epoch": 0.2804023163669613, "grad_norm": 0.98828125, "learning_rate": 0.00016888168850698803, "loss": 0.9423, "step": 1840 }, { "epoch": 0.28055470893020423, "grad_norm": 0.89453125, "learning_rate": 0.0001688458992595477, "loss": 0.9706, "step": 1841 }, { "epoch": 0.28070710149344713, "grad_norm": 0.734375, "learning_rate": 0.00016881009324025626, "loss": 1.0178, "step": 1842 }, { "epoch": 0.28085949405669003, "grad_norm": 1.2421875, "learning_rate": 0.0001687742704578366, "loss": 1.3708, "step": 1843 }, { "epoch": 0.2810118866199329, "grad_norm": 1.0546875, "learning_rate": 0.00016873843092101568, "loss": 0.9918, "step": 1844 }, { "epoch": 0.2811642791831759, "grad_norm": 1.078125, "learning_rate": 0.0001687025746385245, "loss": 1.0189, "step": 1845 }, { "epoch": 0.2813166717464188, "grad_norm": 0.87890625, "learning_rate": 0.00016866670161909818, "loss": 0.9694, "step": 1846 }, { "epoch": 0.2814690643096617, "grad_norm": 1.0078125, "learning_rate": 0.00016863081187147588, "loss": 1.0081, "step": 1847 }, { "epoch": 0.28162145687290463, "grad_norm": 0.75390625, "learning_rate": 0.00016859490540440094, "loss": 0.8408, "step": 1848 }, { "epoch": 0.2817738494361475, "grad_norm": 0.81640625, "learning_rate": 0.00016855898222662056, "loss": 1.0148, "step": 1849 }, { "epoch": 0.2819262419993904, "grad_norm": 1.140625, "learning_rate": 0.00016852304234688626, "loss": 1.0662, "step": 1850 }, { "epoch": 0.2820786345626333, "grad_norm": 0.93359375, "learning_rate": 0.00016848708577395344, "loss": 1.0166, "step": 1851 }, { "epoch": 0.2822310271258763, "grad_norm": 1.125, "learning_rate": 0.00016845111251658168, "loss": 1.1271, "step": 1852 }, { "epoch": 0.2823834196891192, "grad_norm": 0.91015625, "learning_rate": 0.00016841512258353457, "loss": 0.898, "step": 1853 }, { "epoch": 0.2825358122523621, "grad_norm": 0.84765625, "learning_rate": 0.00016837911598357975, "loss": 0.9668, "step": 1854 }, { "epoch": 0.28268820481560497, "grad_norm": 0.91015625, "learning_rate": 0.000168343092725489, "loss": 0.8432, "step": 1855 }, { "epoch": 0.2828405973788479, "grad_norm": 0.9375, "learning_rate": 0.00016830705281803803, "loss": 1.1934, "step": 1856 }, { "epoch": 0.2829929899420908, "grad_norm": 1.234375, "learning_rate": 0.00016827099627000672, "loss": 1.0353, "step": 1857 }, { "epoch": 0.2831453825053337, "grad_norm": 0.66015625, "learning_rate": 0.000168234923090179, "loss": 0.7536, "step": 1858 }, { "epoch": 0.2832977750685767, "grad_norm": 0.828125, "learning_rate": 0.00016819883328734276, "loss": 0.9744, "step": 1859 }, { "epoch": 0.28345016763181957, "grad_norm": 1.109375, "learning_rate": 0.00016816272687029, "loss": 1.0207, "step": 1860 }, { "epoch": 0.28360256019506247, "grad_norm": 0.8984375, "learning_rate": 0.0001681266038478168, "loss": 1.0395, "step": 1861 }, { "epoch": 0.28375495275830537, "grad_norm": 0.921875, "learning_rate": 0.00016809046422872322, "loss": 0.9806, "step": 1862 }, { "epoch": 0.2839073453215483, "grad_norm": 1.296875, "learning_rate": 0.00016805430802181338, "loss": 1.1544, "step": 1863 }, { "epoch": 0.2840597378847912, "grad_norm": 1.140625, "learning_rate": 0.00016801813523589546, "loss": 0.9385, "step": 1864 }, { "epoch": 0.2842121304480341, "grad_norm": 0.92578125, "learning_rate": 0.00016798194587978174, "loss": 0.9634, "step": 1865 }, { "epoch": 0.28436452301127707, "grad_norm": 1.1015625, "learning_rate": 0.00016794573996228835, "loss": 0.8097, "step": 1866 }, { "epoch": 0.28451691557451997, "grad_norm": 0.84765625, "learning_rate": 0.00016790951749223564, "loss": 0.9404, "step": 1867 }, { "epoch": 0.28466930813776287, "grad_norm": 0.8515625, "learning_rate": 0.0001678732784784479, "loss": 1.0695, "step": 1868 }, { "epoch": 0.28482170070100576, "grad_norm": 1.0078125, "learning_rate": 0.0001678370229297535, "loss": 1.03, "step": 1869 }, { "epoch": 0.2849740932642487, "grad_norm": 1.21875, "learning_rate": 0.00016780075085498478, "loss": 1.0956, "step": 1870 }, { "epoch": 0.2851264858274916, "grad_norm": 1.0234375, "learning_rate": 0.00016776446226297818, "loss": 1.1086, "step": 1871 }, { "epoch": 0.2852788783907345, "grad_norm": 0.98828125, "learning_rate": 0.00016772815716257412, "loss": 0.933, "step": 1872 }, { "epoch": 0.28543127095397747, "grad_norm": 0.8671875, "learning_rate": 0.000167691835562617, "loss": 1.1177, "step": 1873 }, { "epoch": 0.28558366351722037, "grad_norm": 0.93359375, "learning_rate": 0.0001676554974719553, "loss": 0.8693, "step": 1874 }, { "epoch": 0.28573605608046326, "grad_norm": 1.0078125, "learning_rate": 0.00016761914289944156, "loss": 0.9128, "step": 1875 }, { "epoch": 0.28588844864370616, "grad_norm": 0.6484375, "learning_rate": 0.0001675827718539322, "loss": 0.8868, "step": 1876 }, { "epoch": 0.2860408412069491, "grad_norm": 1.1171875, "learning_rate": 0.00016754638434428776, "loss": 1.1106, "step": 1877 }, { "epoch": 0.286193233770192, "grad_norm": 0.95703125, "learning_rate": 0.00016750998037937275, "loss": 0.9169, "step": 1878 }, { "epoch": 0.2863456263334349, "grad_norm": 0.87890625, "learning_rate": 0.00016747355996805572, "loss": 0.9442, "step": 1879 }, { "epoch": 0.28649801889667786, "grad_norm": 0.8203125, "learning_rate": 0.00016743712311920918, "loss": 0.9695, "step": 1880 }, { "epoch": 0.28665041145992076, "grad_norm": 1.0078125, "learning_rate": 0.0001674006698417097, "loss": 1.1781, "step": 1881 }, { "epoch": 0.28680280402316366, "grad_norm": 0.9765625, "learning_rate": 0.0001673642001444378, "loss": 0.8693, "step": 1882 }, { "epoch": 0.28695519658640656, "grad_norm": 0.953125, "learning_rate": 0.00016732771403627804, "loss": 0.9957, "step": 1883 }, { "epoch": 0.2871075891496495, "grad_norm": 0.7421875, "learning_rate": 0.00016729121152611894, "loss": 0.9773, "step": 1884 }, { "epoch": 0.2872599817128924, "grad_norm": 0.8046875, "learning_rate": 0.00016725469262285304, "loss": 1.0515, "step": 1885 }, { "epoch": 0.2874123742761353, "grad_norm": 0.87890625, "learning_rate": 0.0001672181573353769, "loss": 0.9477, "step": 1886 }, { "epoch": 0.28756476683937826, "grad_norm": 1.0234375, "learning_rate": 0.000167181605672591, "loss": 0.919, "step": 1887 }, { "epoch": 0.28771715940262116, "grad_norm": 0.9375, "learning_rate": 0.00016714503764339987, "loss": 0.9373, "step": 1888 }, { "epoch": 0.28786955196586406, "grad_norm": 1.0859375, "learning_rate": 0.00016710845325671202, "loss": 0.7895, "step": 1889 }, { "epoch": 0.28802194452910695, "grad_norm": 0.8359375, "learning_rate": 0.00016707185252143992, "loss": 0.9105, "step": 1890 }, { "epoch": 0.2881743370923499, "grad_norm": 1.09375, "learning_rate": 0.0001670352354465, "loss": 1.0373, "step": 1891 }, { "epoch": 0.2883267296555928, "grad_norm": 1.0625, "learning_rate": 0.00016699860204081278, "loss": 1.1256, "step": 1892 }, { "epoch": 0.2884791222188357, "grad_norm": 0.7421875, "learning_rate": 0.00016696195231330263, "loss": 0.7764, "step": 1893 }, { "epoch": 0.28863151478207866, "grad_norm": 0.984375, "learning_rate": 0.00016692528627289797, "loss": 1.068, "step": 1894 }, { "epoch": 0.28878390734532156, "grad_norm": 1.0625, "learning_rate": 0.00016688860392853117, "loss": 0.9355, "step": 1895 }, { "epoch": 0.28893629990856445, "grad_norm": 1.03125, "learning_rate": 0.00016685190528913858, "loss": 0.9284, "step": 1896 }, { "epoch": 0.28908869247180735, "grad_norm": 1.0234375, "learning_rate": 0.0001668151903636605, "loss": 0.9395, "step": 1897 }, { "epoch": 0.2892410850350503, "grad_norm": 0.82421875, "learning_rate": 0.00016677845916104124, "loss": 0.9352, "step": 1898 }, { "epoch": 0.2893934775982932, "grad_norm": 0.8828125, "learning_rate": 0.00016674171169022906, "loss": 0.8999, "step": 1899 }, { "epoch": 0.2895458701615361, "grad_norm": 0.79296875, "learning_rate": 0.00016670494796017612, "loss": 0.941, "step": 1900 }, { "epoch": 0.28969826272477905, "grad_norm": 1.0, "learning_rate": 0.00016666816797983861, "loss": 1.122, "step": 1901 }, { "epoch": 0.28985065528802195, "grad_norm": 0.76953125, "learning_rate": 0.00016663137175817672, "loss": 0.9822, "step": 1902 }, { "epoch": 0.29000304785126485, "grad_norm": 0.90234375, "learning_rate": 0.00016659455930415445, "loss": 0.9058, "step": 1903 }, { "epoch": 0.29015544041450775, "grad_norm": 0.76953125, "learning_rate": 0.0001665577306267399, "loss": 0.8858, "step": 1904 }, { "epoch": 0.2903078329777507, "grad_norm": 0.90234375, "learning_rate": 0.00016652088573490504, "loss": 1.0109, "step": 1905 }, { "epoch": 0.2904602255409936, "grad_norm": 1.15625, "learning_rate": 0.00016648402463762584, "loss": 0.9863, "step": 1906 }, { "epoch": 0.2906126181042365, "grad_norm": 0.96484375, "learning_rate": 0.00016644714734388217, "loss": 1.0401, "step": 1907 }, { "epoch": 0.29076501066747945, "grad_norm": 1.171875, "learning_rate": 0.0001664102538626579, "loss": 1.0312, "step": 1908 }, { "epoch": 0.29091740323072235, "grad_norm": 0.859375, "learning_rate": 0.0001663733442029408, "loss": 0.912, "step": 1909 }, { "epoch": 0.29106979579396525, "grad_norm": 0.98828125, "learning_rate": 0.00016633641837372255, "loss": 1.0489, "step": 1910 }, { "epoch": 0.29122218835720814, "grad_norm": 1.0625, "learning_rate": 0.00016629947638399886, "loss": 0.992, "step": 1911 }, { "epoch": 0.2913745809204511, "grad_norm": 0.86328125, "learning_rate": 0.00016626251824276934, "loss": 1.1532, "step": 1912 }, { "epoch": 0.291526973483694, "grad_norm": 1.0390625, "learning_rate": 0.0001662255439590375, "loss": 0.9769, "step": 1913 }, { "epoch": 0.2916793660469369, "grad_norm": 1.078125, "learning_rate": 0.0001661885535418108, "loss": 1.0992, "step": 1914 }, { "epoch": 0.29183175861017985, "grad_norm": 1.0, "learning_rate": 0.00016615154700010064, "loss": 0.9795, "step": 1915 }, { "epoch": 0.29198415117342275, "grad_norm": 1.0390625, "learning_rate": 0.00016611452434292238, "loss": 0.924, "step": 1916 }, { "epoch": 0.29213654373666564, "grad_norm": 1.2890625, "learning_rate": 0.0001660774855792952, "loss": 1.1058, "step": 1917 }, { "epoch": 0.29228893629990854, "grad_norm": 0.94921875, "learning_rate": 0.00016604043071824238, "loss": 0.9733, "step": 1918 }, { "epoch": 0.2924413288631515, "grad_norm": 0.90234375, "learning_rate": 0.00016600335976879095, "loss": 0.9201, "step": 1919 }, { "epoch": 0.2925937214263944, "grad_norm": 0.90625, "learning_rate": 0.00016596627273997189, "loss": 0.8746, "step": 1920 }, { "epoch": 0.2927461139896373, "grad_norm": 0.8515625, "learning_rate": 0.00016592916964082018, "loss": 0.8913, "step": 1921 }, { "epoch": 0.29289850655288024, "grad_norm": 1.0546875, "learning_rate": 0.0001658920504803747, "loss": 1.1688, "step": 1922 }, { "epoch": 0.29305089911612314, "grad_norm": 1.0234375, "learning_rate": 0.0001658549152676782, "loss": 0.9868, "step": 1923 }, { "epoch": 0.29320329167936604, "grad_norm": 0.96875, "learning_rate": 0.00016581776401177728, "loss": 1.0985, "step": 1924 }, { "epoch": 0.29335568424260894, "grad_norm": 0.6875, "learning_rate": 0.0001657805967217226, "loss": 0.8165, "step": 1925 }, { "epoch": 0.2935080768058519, "grad_norm": 0.8515625, "learning_rate": 0.0001657434134065686, "loss": 0.8758, "step": 1926 }, { "epoch": 0.2936604693690948, "grad_norm": 0.8671875, "learning_rate": 0.0001657062140753737, "loss": 0.9521, "step": 1927 }, { "epoch": 0.2938128619323377, "grad_norm": 0.91015625, "learning_rate": 0.00016566899873720017, "loss": 0.913, "step": 1928 }, { "epoch": 0.29396525449558064, "grad_norm": 0.63671875, "learning_rate": 0.0001656317674011142, "loss": 0.8441, "step": 1929 }, { "epoch": 0.29411764705882354, "grad_norm": 0.89453125, "learning_rate": 0.00016559452007618592, "loss": 0.8799, "step": 1930 }, { "epoch": 0.29427003962206644, "grad_norm": 1.2109375, "learning_rate": 0.00016555725677148926, "loss": 1.0562, "step": 1931 }, { "epoch": 0.29442243218530934, "grad_norm": 0.9296875, "learning_rate": 0.0001655199774961021, "loss": 1.1616, "step": 1932 }, { "epoch": 0.2945748247485523, "grad_norm": 0.7734375, "learning_rate": 0.00016548268225910628, "loss": 0.9565, "step": 1933 }, { "epoch": 0.2947272173117952, "grad_norm": 0.8671875, "learning_rate": 0.00016544537106958734, "loss": 0.889, "step": 1934 }, { "epoch": 0.2948796098750381, "grad_norm": 1.0625, "learning_rate": 0.00016540804393663493, "loss": 0.9072, "step": 1935 }, { "epoch": 0.29503200243828104, "grad_norm": 0.921875, "learning_rate": 0.00016537070086934243, "loss": 0.8297, "step": 1936 }, { "epoch": 0.29518439500152394, "grad_norm": 1.078125, "learning_rate": 0.00016533334187680715, "loss": 1.2225, "step": 1937 }, { "epoch": 0.29533678756476683, "grad_norm": 0.7890625, "learning_rate": 0.00016529596696813028, "loss": 0.937, "step": 1938 }, { "epoch": 0.29548918012800973, "grad_norm": 1.0, "learning_rate": 0.00016525857615241687, "loss": 1.1855, "step": 1939 }, { "epoch": 0.2956415726912527, "grad_norm": 1.015625, "learning_rate": 0.00016522116943877587, "loss": 0.9357, "step": 1940 }, { "epoch": 0.2957939652544956, "grad_norm": 1.2265625, "learning_rate": 0.00016518374683632012, "loss": 0.9311, "step": 1941 }, { "epoch": 0.2959463578177385, "grad_norm": 1.0, "learning_rate": 0.00016514630835416628, "loss": 1.0427, "step": 1942 }, { "epoch": 0.29609875038098143, "grad_norm": 0.92578125, "learning_rate": 0.00016510885400143486, "loss": 0.9362, "step": 1943 }, { "epoch": 0.29625114294422433, "grad_norm": 1.0, "learning_rate": 0.00016507138378725039, "loss": 0.9166, "step": 1944 }, { "epoch": 0.29640353550746723, "grad_norm": 1.03125, "learning_rate": 0.00016503389772074104, "loss": 1.0222, "step": 1945 }, { "epoch": 0.29655592807071013, "grad_norm": 0.9609375, "learning_rate": 0.000164996395811039, "loss": 1.0539, "step": 1946 }, { "epoch": 0.2967083206339531, "grad_norm": 1.140625, "learning_rate": 0.0001649588780672803, "loss": 0.8941, "step": 1947 }, { "epoch": 0.296860713197196, "grad_norm": 1.140625, "learning_rate": 0.00016492134449860478, "loss": 0.9779, "step": 1948 }, { "epoch": 0.2970131057604389, "grad_norm": 0.93359375, "learning_rate": 0.0001648837951141561, "loss": 1.0436, "step": 1949 }, { "epoch": 0.29716549832368183, "grad_norm": 1.234375, "learning_rate": 0.00016484622992308197, "loss": 1.0463, "step": 1950 }, { "epoch": 0.29731789088692473, "grad_norm": 0.8203125, "learning_rate": 0.00016480864893453364, "loss": 0.883, "step": 1951 }, { "epoch": 0.2974702834501676, "grad_norm": 0.83203125, "learning_rate": 0.0001647710521576665, "loss": 0.8843, "step": 1952 }, { "epoch": 0.2976226760134105, "grad_norm": 1.171875, "learning_rate": 0.00016473343960163965, "loss": 1.0599, "step": 1953 }, { "epoch": 0.2977750685766535, "grad_norm": 1.015625, "learning_rate": 0.00016469581127561602, "loss": 1.101, "step": 1954 }, { "epoch": 0.2979274611398964, "grad_norm": 1.1953125, "learning_rate": 0.0001646581671887624, "loss": 0.9951, "step": 1955 }, { "epoch": 0.2980798537031393, "grad_norm": 0.9453125, "learning_rate": 0.00016462050735024946, "loss": 1.0259, "step": 1956 }, { "epoch": 0.29823224626638223, "grad_norm": 0.89453125, "learning_rate": 0.00016458283176925168, "loss": 0.8938, "step": 1957 }, { "epoch": 0.2983846388296251, "grad_norm": 0.875, "learning_rate": 0.00016454514045494736, "loss": 1.0621, "step": 1958 }, { "epoch": 0.298537031392868, "grad_norm": 0.765625, "learning_rate": 0.00016450743341651865, "loss": 0.916, "step": 1959 }, { "epoch": 0.2986894239561109, "grad_norm": 1.1484375, "learning_rate": 0.00016446971066315152, "loss": 0.9685, "step": 1960 }, { "epoch": 0.2988418165193539, "grad_norm": 0.93359375, "learning_rate": 0.0001644319722040358, "loss": 0.9983, "step": 1961 }, { "epoch": 0.2989942090825968, "grad_norm": 0.98046875, "learning_rate": 0.0001643942180483651, "loss": 0.903, "step": 1962 }, { "epoch": 0.29914660164583967, "grad_norm": 0.890625, "learning_rate": 0.00016435644820533683, "loss": 1.1286, "step": 1963 }, { "epoch": 0.29929899420908257, "grad_norm": 1.046875, "learning_rate": 0.00016431866268415237, "loss": 1.0286, "step": 1964 }, { "epoch": 0.2994513867723255, "grad_norm": 0.859375, "learning_rate": 0.00016428086149401674, "loss": 0.8815, "step": 1965 }, { "epoch": 0.2996037793355684, "grad_norm": 0.859375, "learning_rate": 0.00016424304464413884, "loss": 1.0089, "step": 1966 }, { "epoch": 0.2997561718988113, "grad_norm": 0.9921875, "learning_rate": 0.00016420521214373151, "loss": 0.9408, "step": 1967 }, { "epoch": 0.2999085644620543, "grad_norm": 0.9609375, "learning_rate": 0.00016416736400201115, "loss": 0.9505, "step": 1968 }, { "epoch": 0.30006095702529717, "grad_norm": 0.9609375, "learning_rate": 0.0001641295002281982, "loss": 1.0111, "step": 1969 }, { "epoch": 0.30021334958854007, "grad_norm": 1.1328125, "learning_rate": 0.00016409162083151675, "loss": 0.9345, "step": 1970 }, { "epoch": 0.30036574215178297, "grad_norm": 1.25, "learning_rate": 0.0001640537258211949, "loss": 1.0944, "step": 1971 }, { "epoch": 0.3005181347150259, "grad_norm": 1.21875, "learning_rate": 0.00016401581520646428, "loss": 0.9986, "step": 1972 }, { "epoch": 0.3006705272782688, "grad_norm": 0.953125, "learning_rate": 0.0001639778889965605, "loss": 0.8688, "step": 1973 }, { "epoch": 0.3008229198415117, "grad_norm": 0.99609375, "learning_rate": 0.00016393994720072298, "loss": 0.9315, "step": 1974 }, { "epoch": 0.30097531240475467, "grad_norm": 1.078125, "learning_rate": 0.00016390198982819482, "loss": 0.9375, "step": 1975 }, { "epoch": 0.30112770496799757, "grad_norm": 1.078125, "learning_rate": 0.00016386401688822303, "loss": 1.1136, "step": 1976 }, { "epoch": 0.30128009753124046, "grad_norm": 1.171875, "learning_rate": 0.0001638260283900583, "loss": 1.0971, "step": 1977 }, { "epoch": 0.30143249009448336, "grad_norm": 0.953125, "learning_rate": 0.00016378802434295525, "loss": 1.0871, "step": 1978 }, { "epoch": 0.3015848826577263, "grad_norm": 1.2265625, "learning_rate": 0.0001637500047561722, "loss": 0.9948, "step": 1979 }, { "epoch": 0.3017372752209692, "grad_norm": 0.93359375, "learning_rate": 0.00016371196963897125, "loss": 1.033, "step": 1980 }, { "epoch": 0.3018896677842121, "grad_norm": 1.2265625, "learning_rate": 0.00016367391900061828, "loss": 1.0619, "step": 1981 }, { "epoch": 0.30204206034745507, "grad_norm": 0.95703125, "learning_rate": 0.00016363585285038298, "loss": 1.0193, "step": 1982 }, { "epoch": 0.30219445291069796, "grad_norm": 1.2109375, "learning_rate": 0.00016359777119753885, "loss": 1.0346, "step": 1983 }, { "epoch": 0.30234684547394086, "grad_norm": 1.3515625, "learning_rate": 0.0001635596740513631, "loss": 0.9046, "step": 1984 }, { "epoch": 0.30249923803718376, "grad_norm": 1.0703125, "learning_rate": 0.00016352156142113673, "loss": 0.9945, "step": 1985 }, { "epoch": 0.3026516306004267, "grad_norm": 0.93359375, "learning_rate": 0.00016348343331614457, "loss": 0.9615, "step": 1986 }, { "epoch": 0.3028040231636696, "grad_norm": 0.87109375, "learning_rate": 0.00016344528974567512, "loss": 0.9577, "step": 1987 }, { "epoch": 0.3029564157269125, "grad_norm": 0.9453125, "learning_rate": 0.00016340713071902077, "loss": 1.0418, "step": 1988 }, { "epoch": 0.30310880829015546, "grad_norm": 0.9765625, "learning_rate": 0.00016336895624547752, "loss": 1.2177, "step": 1989 }, { "epoch": 0.30326120085339836, "grad_norm": 0.94921875, "learning_rate": 0.00016333076633434526, "loss": 0.8353, "step": 1990 }, { "epoch": 0.30341359341664126, "grad_norm": 1.015625, "learning_rate": 0.00016329256099492764, "loss": 0.9843, "step": 1991 }, { "epoch": 0.30356598597988416, "grad_norm": 0.8359375, "learning_rate": 0.000163254340236532, "loss": 1.1242, "step": 1992 }, { "epoch": 0.3037183785431271, "grad_norm": 0.765625, "learning_rate": 0.00016321610406846944, "loss": 0.8839, "step": 1993 }, { "epoch": 0.30387077110637, "grad_norm": 1.0546875, "learning_rate": 0.00016317785250005487, "loss": 0.9226, "step": 1994 }, { "epoch": 0.3040231636696129, "grad_norm": 0.80859375, "learning_rate": 0.00016313958554060694, "loss": 0.9523, "step": 1995 }, { "epoch": 0.30417555623285586, "grad_norm": 0.91796875, "learning_rate": 0.000163101303199448, "loss": 1.0158, "step": 1996 }, { "epoch": 0.30432794879609876, "grad_norm": 0.98828125, "learning_rate": 0.00016306300548590415, "loss": 1.0271, "step": 1997 }, { "epoch": 0.30448034135934166, "grad_norm": 0.96875, "learning_rate": 0.00016302469240930532, "loss": 0.8991, "step": 1998 }, { "epoch": 0.30463273392258455, "grad_norm": 1.1015625, "learning_rate": 0.00016298636397898514, "loss": 0.8481, "step": 1999 }, { "epoch": 0.3047851264858275, "grad_norm": 1.0703125, "learning_rate": 0.00016294802020428093, "loss": 1.1793, "step": 2000 }, { "epoch": 0.3049375190490704, "grad_norm": 0.875, "learning_rate": 0.00016290966109453375, "loss": 0.9343, "step": 2001 }, { "epoch": 0.3050899116123133, "grad_norm": 0.7265625, "learning_rate": 0.0001628712866590885, "loss": 0.8172, "step": 2002 }, { "epoch": 0.30524230417555626, "grad_norm": 0.79296875, "learning_rate": 0.0001628328969072937, "loss": 1.1185, "step": 2003 }, { "epoch": 0.30539469673879915, "grad_norm": 1.1875, "learning_rate": 0.0001627944918485017, "loss": 1.1392, "step": 2004 }, { "epoch": 0.30554708930204205, "grad_norm": 0.94921875, "learning_rate": 0.00016275607149206843, "loss": 0.9534, "step": 2005 }, { "epoch": 0.30569948186528495, "grad_norm": 0.890625, "learning_rate": 0.0001627176358473537, "loss": 0.8772, "step": 2006 }, { "epoch": 0.3058518744285279, "grad_norm": 0.8984375, "learning_rate": 0.000162679184923721, "loss": 0.9827, "step": 2007 }, { "epoch": 0.3060042669917708, "grad_norm": 0.84375, "learning_rate": 0.00016264071873053752, "loss": 1.0967, "step": 2008 }, { "epoch": 0.3061566595550137, "grad_norm": 0.8515625, "learning_rate": 0.00016260223727717415, "loss": 0.8054, "step": 2009 }, { "epoch": 0.30630905211825665, "grad_norm": 1.0, "learning_rate": 0.00016256374057300557, "loss": 1.1867, "step": 2010 }, { "epoch": 0.30646144468149955, "grad_norm": 0.96484375, "learning_rate": 0.00016252522862741006, "loss": 0.988, "step": 2011 }, { "epoch": 0.30661383724474245, "grad_norm": 1.140625, "learning_rate": 0.00016248670144976976, "loss": 1.0303, "step": 2012 }, { "epoch": 0.30676622980798535, "grad_norm": 1.0234375, "learning_rate": 0.00016244815904947039, "loss": 1.0632, "step": 2013 }, { "epoch": 0.3069186223712283, "grad_norm": 0.9375, "learning_rate": 0.00016240960143590146, "loss": 0.9314, "step": 2014 }, { "epoch": 0.3070710149344712, "grad_norm": 0.9140625, "learning_rate": 0.00016237102861845618, "loss": 1.0255, "step": 2015 }, { "epoch": 0.3072234074977141, "grad_norm": 0.90234375, "learning_rate": 0.00016233244060653138, "loss": 1.1368, "step": 2016 }, { "epoch": 0.30737580006095705, "grad_norm": 0.92578125, "learning_rate": 0.0001622938374095277, "loss": 0.7816, "step": 2017 }, { "epoch": 0.30752819262419995, "grad_norm": 0.8359375, "learning_rate": 0.00016225521903684946, "loss": 0.9932, "step": 2018 }, { "epoch": 0.30768058518744285, "grad_norm": 0.9375, "learning_rate": 0.00016221658549790457, "loss": 0.8592, "step": 2019 }, { "epoch": 0.30783297775068574, "grad_norm": 1.0234375, "learning_rate": 0.0001621779368021048, "loss": 0.9947, "step": 2020 }, { "epoch": 0.3079853703139287, "grad_norm": 1.1796875, "learning_rate": 0.00016213927295886547, "loss": 0.9408, "step": 2021 }, { "epoch": 0.3081377628771716, "grad_norm": 0.8359375, "learning_rate": 0.0001621005939776057, "loss": 0.9678, "step": 2022 }, { "epoch": 0.3082901554404145, "grad_norm": 1.109375, "learning_rate": 0.00016206189986774815, "loss": 1.0459, "step": 2023 }, { "epoch": 0.30844254800365745, "grad_norm": 0.97265625, "learning_rate": 0.00016202319063871935, "loss": 0.8615, "step": 2024 }, { "epoch": 0.30859494056690034, "grad_norm": 0.9375, "learning_rate": 0.00016198446629994942, "loss": 0.9625, "step": 2025 }, { "epoch": 0.30874733313014324, "grad_norm": 0.94140625, "learning_rate": 0.00016194572686087215, "loss": 1.0553, "step": 2026 }, { "epoch": 0.30889972569338614, "grad_norm": 0.70703125, "learning_rate": 0.000161906972330925, "loss": 0.9512, "step": 2027 }, { "epoch": 0.3090521182566291, "grad_norm": 0.91015625, "learning_rate": 0.00016186820271954914, "loss": 0.8452, "step": 2028 }, { "epoch": 0.309204510819872, "grad_norm": 0.80078125, "learning_rate": 0.00016182941803618946, "loss": 0.905, "step": 2029 }, { "epoch": 0.3093569033831149, "grad_norm": 1.125, "learning_rate": 0.0001617906182902944, "loss": 1.0977, "step": 2030 }, { "epoch": 0.30950929594635784, "grad_norm": 0.91796875, "learning_rate": 0.00016175180349131617, "loss": 0.961, "step": 2031 }, { "epoch": 0.30966168850960074, "grad_norm": 1.0390625, "learning_rate": 0.0001617129736487106, "loss": 1.0412, "step": 2032 }, { "epoch": 0.30981408107284364, "grad_norm": 0.85546875, "learning_rate": 0.00016167412877193718, "loss": 1.0149, "step": 2033 }, { "epoch": 0.30996647363608654, "grad_norm": 0.9375, "learning_rate": 0.00016163526887045913, "loss": 1.0476, "step": 2034 }, { "epoch": 0.3101188661993295, "grad_norm": 0.75, "learning_rate": 0.0001615963939537432, "loss": 0.7634, "step": 2035 }, { "epoch": 0.3102712587625724, "grad_norm": 1.1015625, "learning_rate": 0.00016155750403125998, "loss": 1.0055, "step": 2036 }, { "epoch": 0.3104236513258153, "grad_norm": 0.8828125, "learning_rate": 0.00016151859911248354, "loss": 1.0916, "step": 2037 }, { "epoch": 0.31057604388905824, "grad_norm": 0.9765625, "learning_rate": 0.00016147967920689173, "loss": 0.8763, "step": 2038 }, { "epoch": 0.31072843645230114, "grad_norm": 1.1171875, "learning_rate": 0.00016144074432396596, "loss": 1.013, "step": 2039 }, { "epoch": 0.31088082901554404, "grad_norm": 0.81640625, "learning_rate": 0.00016140179447319132, "loss": 0.9104, "step": 2040 }, { "epoch": 0.31103322157878693, "grad_norm": 0.9296875, "learning_rate": 0.0001613628296640566, "loss": 0.9601, "step": 2041 }, { "epoch": 0.3111856141420299, "grad_norm": 0.94140625, "learning_rate": 0.00016132384990605414, "loss": 0.9644, "step": 2042 }, { "epoch": 0.3113380067052728, "grad_norm": 1.0390625, "learning_rate": 0.00016128485520868, "loss": 1.0568, "step": 2043 }, { "epoch": 0.3114903992685157, "grad_norm": 0.82421875, "learning_rate": 0.00016124584558143386, "loss": 0.8978, "step": 2044 }, { "epoch": 0.31164279183175864, "grad_norm": 0.671875, "learning_rate": 0.00016120682103381898, "loss": 0.8126, "step": 2045 }, { "epoch": 0.31179518439500153, "grad_norm": 0.8125, "learning_rate": 0.00016116778157534233, "loss": 0.9017, "step": 2046 }, { "epoch": 0.31194757695824443, "grad_norm": 0.88671875, "learning_rate": 0.00016112872721551447, "loss": 0.9492, "step": 2047 }, { "epoch": 0.31209996952148733, "grad_norm": 0.70703125, "learning_rate": 0.00016108965796384964, "loss": 0.9074, "step": 2048 }, { "epoch": 0.3122523620847303, "grad_norm": 0.71875, "learning_rate": 0.00016105057382986558, "loss": 0.8914, "step": 2049 }, { "epoch": 0.3124047546479732, "grad_norm": 0.89453125, "learning_rate": 0.00016101147482308387, "loss": 0.9926, "step": 2050 }, { "epoch": 0.3125571472112161, "grad_norm": 0.86328125, "learning_rate": 0.0001609723609530295, "loss": 1.0754, "step": 2051 }, { "epoch": 0.31270953977445903, "grad_norm": 0.859375, "learning_rate": 0.00016093323222923125, "loss": 0.8807, "step": 2052 }, { "epoch": 0.31286193233770193, "grad_norm": 1.0703125, "learning_rate": 0.00016089408866122137, "loss": 0.9558, "step": 2053 }, { "epoch": 0.31301432490094483, "grad_norm": 1.0234375, "learning_rate": 0.00016085493025853583, "loss": 0.8841, "step": 2054 }, { "epoch": 0.3131667174641877, "grad_norm": 0.80859375, "learning_rate": 0.00016081575703071416, "loss": 1.0136, "step": 2055 }, { "epoch": 0.3133191100274307, "grad_norm": 1.0078125, "learning_rate": 0.0001607765689872995, "loss": 1.0448, "step": 2056 }, { "epoch": 0.3134715025906736, "grad_norm": 0.90625, "learning_rate": 0.00016073736613783868, "loss": 0.9715, "step": 2057 }, { "epoch": 0.3136238951539165, "grad_norm": 1.1328125, "learning_rate": 0.00016069814849188204, "loss": 1.0998, "step": 2058 }, { "epoch": 0.31377628771715943, "grad_norm": 0.9453125, "learning_rate": 0.00016065891605898357, "loss": 1.0848, "step": 2059 }, { "epoch": 0.31392868028040233, "grad_norm": 1.1171875, "learning_rate": 0.0001606196688487009, "loss": 0.875, "step": 2060 }, { "epoch": 0.3140810728436452, "grad_norm": 0.8984375, "learning_rate": 0.00016058040687059511, "loss": 0.8688, "step": 2061 }, { "epoch": 0.3142334654068881, "grad_norm": 0.93359375, "learning_rate": 0.00016054113013423108, "loss": 1.0288, "step": 2062 }, { "epoch": 0.3143858579701311, "grad_norm": 0.94140625, "learning_rate": 0.00016050183864917716, "loss": 0.8908, "step": 2063 }, { "epoch": 0.314538250533374, "grad_norm": 1.1015625, "learning_rate": 0.0001604625324250053, "loss": 0.959, "step": 2064 }, { "epoch": 0.3146906430966169, "grad_norm": 1.2421875, "learning_rate": 0.00016042321147129106, "loss": 0.9567, "step": 2065 }, { "epoch": 0.31484303565985977, "grad_norm": 1.0234375, "learning_rate": 0.00016038387579761363, "loss": 1.1774, "step": 2066 }, { "epoch": 0.3149954282231027, "grad_norm": 0.875, "learning_rate": 0.0001603445254135557, "loss": 0.9437, "step": 2067 }, { "epoch": 0.3151478207863456, "grad_norm": 1.125, "learning_rate": 0.00016030516032870362, "loss": 0.9041, "step": 2068 }, { "epoch": 0.3153002133495885, "grad_norm": 0.85546875, "learning_rate": 0.00016026578055264724, "loss": 1.0676, "step": 2069 }, { "epoch": 0.3154526059128315, "grad_norm": 0.76171875, "learning_rate": 0.00016022638609498015, "loss": 0.9282, "step": 2070 }, { "epoch": 0.31560499847607437, "grad_norm": 0.7734375, "learning_rate": 0.00016018697696529928, "loss": 0.8832, "step": 2071 }, { "epoch": 0.31575739103931727, "grad_norm": 1.0234375, "learning_rate": 0.00016014755317320535, "loss": 0.9052, "step": 2072 }, { "epoch": 0.31590978360256017, "grad_norm": 0.8828125, "learning_rate": 0.00016010811472830252, "loss": 1.0946, "step": 2073 }, { "epoch": 0.3160621761658031, "grad_norm": 1.15625, "learning_rate": 0.00016006866164019855, "loss": 1.0304, "step": 2074 }, { "epoch": 0.316214568729046, "grad_norm": 1.0234375, "learning_rate": 0.00016002919391850483, "loss": 0.8853, "step": 2075 }, { "epoch": 0.3163669612922889, "grad_norm": 1.0859375, "learning_rate": 0.00015998971157283625, "loss": 1.175, "step": 2076 }, { "epoch": 0.31651935385553187, "grad_norm": 1.0234375, "learning_rate": 0.00015995021461281123, "loss": 0.9044, "step": 2077 }, { "epoch": 0.31667174641877477, "grad_norm": 1.0078125, "learning_rate": 0.00015991070304805183, "loss": 0.8973, "step": 2078 }, { "epoch": 0.31682413898201767, "grad_norm": 1.2578125, "learning_rate": 0.00015987117688818364, "loss": 1.0271, "step": 2079 }, { "epoch": 0.31697653154526056, "grad_norm": 1.2109375, "learning_rate": 0.00015983163614283582, "loss": 1.0796, "step": 2080 }, { "epoch": 0.3171289241085035, "grad_norm": 1.0234375, "learning_rate": 0.000159792080821641, "loss": 1.0021, "step": 2081 }, { "epoch": 0.3172813166717464, "grad_norm": 0.8046875, "learning_rate": 0.00015975251093423552, "loss": 0.9941, "step": 2082 }, { "epoch": 0.3174337092349893, "grad_norm": 0.90625, "learning_rate": 0.00015971292649025908, "loss": 0.8408, "step": 2083 }, { "epoch": 0.31758610179823227, "grad_norm": 1.265625, "learning_rate": 0.00015967332749935507, "loss": 0.8805, "step": 2084 }, { "epoch": 0.31773849436147517, "grad_norm": 0.9765625, "learning_rate": 0.00015963371397117036, "loss": 0.915, "step": 2085 }, { "epoch": 0.31789088692471806, "grad_norm": 0.85546875, "learning_rate": 0.0001595940859153554, "loss": 0.7712, "step": 2086 }, { "epoch": 0.31804327948796096, "grad_norm": 0.87890625, "learning_rate": 0.0001595544433415641, "loss": 0.8739, "step": 2087 }, { "epoch": 0.3181956720512039, "grad_norm": 0.98828125, "learning_rate": 0.000159514786259454, "loss": 1.0485, "step": 2088 }, { "epoch": 0.3183480646144468, "grad_norm": 1.0859375, "learning_rate": 0.00015947511467868613, "loss": 1.1365, "step": 2089 }, { "epoch": 0.3185004571776897, "grad_norm": 1.1015625, "learning_rate": 0.00015943542860892505, "loss": 1.0531, "step": 2090 }, { "epoch": 0.31865284974093266, "grad_norm": 0.8671875, "learning_rate": 0.0001593957280598389, "loss": 0.9169, "step": 2091 }, { "epoch": 0.31880524230417556, "grad_norm": 0.7578125, "learning_rate": 0.00015935601304109924, "loss": 0.7049, "step": 2092 }, { "epoch": 0.31895763486741846, "grad_norm": 0.98828125, "learning_rate": 0.00015931628356238126, "loss": 1.1038, "step": 2093 }, { "epoch": 0.31911002743066136, "grad_norm": 1.1640625, "learning_rate": 0.00015927653963336363, "loss": 1.0648, "step": 2094 }, { "epoch": 0.3192624199939043, "grad_norm": 0.875, "learning_rate": 0.0001592367812637285, "loss": 0.8967, "step": 2095 }, { "epoch": 0.3194148125571472, "grad_norm": 1.0546875, "learning_rate": 0.00015919700846316162, "loss": 1.0075, "step": 2096 }, { "epoch": 0.3195672051203901, "grad_norm": 0.9453125, "learning_rate": 0.00015915722124135227, "loss": 0.9284, "step": 2097 }, { "epoch": 0.31971959768363306, "grad_norm": 0.89453125, "learning_rate": 0.00015911741960799306, "loss": 0.8391, "step": 2098 }, { "epoch": 0.31987199024687596, "grad_norm": 0.89453125, "learning_rate": 0.00015907760357278033, "loss": 0.963, "step": 2099 }, { "epoch": 0.32002438281011886, "grad_norm": 0.99609375, "learning_rate": 0.00015903777314541382, "loss": 0.8015, "step": 2100 }, { "epoch": 0.32017677537336175, "grad_norm": 1.0234375, "learning_rate": 0.00015899792833559679, "loss": 1.1391, "step": 2101 }, { "epoch": 0.3203291679366047, "grad_norm": 0.984375, "learning_rate": 0.000158958069153036, "loss": 1.1597, "step": 2102 }, { "epoch": 0.3204815604998476, "grad_norm": 0.84765625, "learning_rate": 0.00015891819560744176, "loss": 0.7975, "step": 2103 }, { "epoch": 0.3206339530630905, "grad_norm": 1.1328125, "learning_rate": 0.0001588783077085278, "loss": 1.1437, "step": 2104 }, { "epoch": 0.32078634562633346, "grad_norm": 0.99609375, "learning_rate": 0.0001588384054660114, "loss": 0.9916, "step": 2105 }, { "epoch": 0.32093873818957636, "grad_norm": 1.2734375, "learning_rate": 0.00015879848888961332, "loss": 0.7082, "step": 2106 }, { "epoch": 0.32109113075281925, "grad_norm": 1.171875, "learning_rate": 0.00015875855798905776, "loss": 1.0411, "step": 2107 }, { "epoch": 0.32124352331606215, "grad_norm": 1.125, "learning_rate": 0.00015871861277407255, "loss": 1.2797, "step": 2108 }, { "epoch": 0.3213959158793051, "grad_norm": 0.92578125, "learning_rate": 0.0001586786532543889, "loss": 0.8151, "step": 2109 }, { "epoch": 0.321548308442548, "grad_norm": 0.77734375, "learning_rate": 0.00015863867943974148, "loss": 0.9647, "step": 2110 }, { "epoch": 0.3217007010057909, "grad_norm": 0.8828125, "learning_rate": 0.00015859869133986852, "loss": 0.8596, "step": 2111 }, { "epoch": 0.32185309356903385, "grad_norm": 0.80859375, "learning_rate": 0.00015855868896451168, "loss": 1.0655, "step": 2112 }, { "epoch": 0.32200548613227675, "grad_norm": 1.078125, "learning_rate": 0.0001585186723234161, "loss": 1.2443, "step": 2113 }, { "epoch": 0.32215787869551965, "grad_norm": 1.34375, "learning_rate": 0.00015847864142633044, "loss": 0.9531, "step": 2114 }, { "epoch": 0.32231027125876255, "grad_norm": 1.265625, "learning_rate": 0.0001584385962830068, "loss": 1.1304, "step": 2115 }, { "epoch": 0.3224626638220055, "grad_norm": 1.2578125, "learning_rate": 0.00015839853690320074, "loss": 1.1347, "step": 2116 }, { "epoch": 0.3226150563852484, "grad_norm": 1.1640625, "learning_rate": 0.0001583584632966713, "loss": 1.0876, "step": 2117 }, { "epoch": 0.3227674489484913, "grad_norm": 1.078125, "learning_rate": 0.00015831837547318101, "loss": 0.8381, "step": 2118 }, { "epoch": 0.32291984151173425, "grad_norm": 1.1015625, "learning_rate": 0.00015827827344249578, "loss": 0.8716, "step": 2119 }, { "epoch": 0.32307223407497715, "grad_norm": 1.3203125, "learning_rate": 0.00015823815721438512, "loss": 0.9926, "step": 2120 }, { "epoch": 0.32322462663822005, "grad_norm": 0.890625, "learning_rate": 0.00015819802679862188, "loss": 0.9535, "step": 2121 }, { "epoch": 0.32337701920146295, "grad_norm": 1.03125, "learning_rate": 0.0001581578822049824, "loss": 1.1135, "step": 2122 }, { "epoch": 0.3235294117647059, "grad_norm": 1.0078125, "learning_rate": 0.0001581177234432465, "loss": 1.1836, "step": 2123 }, { "epoch": 0.3236818043279488, "grad_norm": 1.2890625, "learning_rate": 0.00015807755052319738, "loss": 1.0691, "step": 2124 }, { "epoch": 0.3238341968911917, "grad_norm": 0.87109375, "learning_rate": 0.00015803736345462182, "loss": 0.8218, "step": 2125 }, { "epoch": 0.32398658945443465, "grad_norm": 0.93359375, "learning_rate": 0.00015799716224730993, "loss": 1.0378, "step": 2126 }, { "epoch": 0.32413898201767755, "grad_norm": 0.9453125, "learning_rate": 0.0001579569469110553, "loss": 1.0978, "step": 2127 }, { "epoch": 0.32429137458092044, "grad_norm": 0.8515625, "learning_rate": 0.00015791671745565497, "loss": 1.0454, "step": 2128 }, { "epoch": 0.32444376714416334, "grad_norm": 1.140625, "learning_rate": 0.0001578764738909094, "loss": 0.8685, "step": 2129 }, { "epoch": 0.3245961597074063, "grad_norm": 1.15625, "learning_rate": 0.00015783621622662247, "loss": 0.8576, "step": 2130 }, { "epoch": 0.3247485522706492, "grad_norm": 1.2734375, "learning_rate": 0.00015779594447260165, "loss": 1.0781, "step": 2131 }, { "epoch": 0.3249009448338921, "grad_norm": 1.0078125, "learning_rate": 0.00015775565863865757, "loss": 0.9074, "step": 2132 }, { "epoch": 0.32505333739713504, "grad_norm": 0.90625, "learning_rate": 0.00015771535873460453, "loss": 0.914, "step": 2133 }, { "epoch": 0.32520572996037794, "grad_norm": 0.98046875, "learning_rate": 0.00015767504477026014, "loss": 1.1234, "step": 2134 }, { "epoch": 0.32535812252362084, "grad_norm": 1.109375, "learning_rate": 0.00015763471675544547, "loss": 0.9949, "step": 2135 }, { "epoch": 0.32551051508686374, "grad_norm": 0.9453125, "learning_rate": 0.00015759437469998498, "loss": 1.0163, "step": 2136 }, { "epoch": 0.3256629076501067, "grad_norm": 1.0859375, "learning_rate": 0.0001575540186137066, "loss": 0.9849, "step": 2137 }, { "epoch": 0.3258153002133496, "grad_norm": 0.8671875, "learning_rate": 0.00015751364850644166, "loss": 0.972, "step": 2138 }, { "epoch": 0.3259676927765925, "grad_norm": 0.80859375, "learning_rate": 0.00015747326438802486, "loss": 0.9936, "step": 2139 }, { "epoch": 0.32612008533983544, "grad_norm": 1.171875, "learning_rate": 0.00015743286626829437, "loss": 1.0429, "step": 2140 }, { "epoch": 0.32627247790307834, "grad_norm": 0.984375, "learning_rate": 0.00015739245415709175, "loss": 1.118, "step": 2141 }, { "epoch": 0.32642487046632124, "grad_norm": 0.90625, "learning_rate": 0.00015735202806426202, "loss": 0.9165, "step": 2142 }, { "epoch": 0.32657726302956414, "grad_norm": 0.96484375, "learning_rate": 0.00015731158799965348, "loss": 1.158, "step": 2143 }, { "epoch": 0.3267296555928071, "grad_norm": 1.0625, "learning_rate": 0.00015727113397311798, "loss": 1.0067, "step": 2144 }, { "epoch": 0.32688204815605, "grad_norm": 1.0, "learning_rate": 0.00015723066599451064, "loss": 0.9667, "step": 2145 }, { "epoch": 0.3270344407192929, "grad_norm": 1.3359375, "learning_rate": 0.00015719018407369008, "loss": 1.0446, "step": 2146 }, { "epoch": 0.32718683328253584, "grad_norm": 0.7421875, "learning_rate": 0.00015714968822051826, "loss": 0.8185, "step": 2147 }, { "epoch": 0.32733922584577874, "grad_norm": 1.1015625, "learning_rate": 0.00015710917844486056, "loss": 1.237, "step": 2148 }, { "epoch": 0.32749161840902163, "grad_norm": 0.83984375, "learning_rate": 0.00015706865475658582, "loss": 0.8021, "step": 2149 }, { "epoch": 0.32764401097226453, "grad_norm": 1.1484375, "learning_rate": 0.00015702811716556604, "loss": 1.0306, "step": 2150 }, { "epoch": 0.3277964035355075, "grad_norm": 0.83203125, "learning_rate": 0.00015698756568167685, "loss": 1.0783, "step": 2151 }, { "epoch": 0.3279487960987504, "grad_norm": 0.87890625, "learning_rate": 0.00015694700031479718, "loss": 0.9718, "step": 2152 }, { "epoch": 0.3281011886619933, "grad_norm": 1.2578125, "learning_rate": 0.0001569064210748093, "loss": 1.1749, "step": 2153 }, { "epoch": 0.32825358122523624, "grad_norm": 0.84375, "learning_rate": 0.00015686582797159893, "loss": 0.8589, "step": 2154 }, { "epoch": 0.32840597378847913, "grad_norm": 1.0703125, "learning_rate": 0.0001568252210150551, "loss": 0.9552, "step": 2155 }, { "epoch": 0.32855836635172203, "grad_norm": 1.1015625, "learning_rate": 0.00015678460021507034, "loss": 1.0692, "step": 2156 }, { "epoch": 0.32871075891496493, "grad_norm": 1.1171875, "learning_rate": 0.00015674396558154033, "loss": 1.0444, "step": 2157 }, { "epoch": 0.3288631514782079, "grad_norm": 0.953125, "learning_rate": 0.00015670331712436432, "loss": 1.0214, "step": 2158 }, { "epoch": 0.3290155440414508, "grad_norm": 0.7578125, "learning_rate": 0.00015666265485344484, "loss": 1.0308, "step": 2159 }, { "epoch": 0.3291679366046937, "grad_norm": 0.9765625, "learning_rate": 0.00015662197877868784, "loss": 0.8493, "step": 2160 }, { "epoch": 0.32932032916793663, "grad_norm": 0.96484375, "learning_rate": 0.00015658128891000254, "loss": 0.8779, "step": 2161 }, { "epoch": 0.32947272173117953, "grad_norm": 0.8671875, "learning_rate": 0.00015654058525730165, "loss": 0.9064, "step": 2162 }, { "epoch": 0.3296251142944224, "grad_norm": 0.85546875, "learning_rate": 0.0001564998678305011, "loss": 1.0485, "step": 2163 }, { "epoch": 0.3297775068576653, "grad_norm": 1.3046875, "learning_rate": 0.00015645913663952024, "loss": 0.8863, "step": 2164 }, { "epoch": 0.3299298994209083, "grad_norm": 0.984375, "learning_rate": 0.00015641839169428182, "loss": 0.9996, "step": 2165 }, { "epoch": 0.3300822919841512, "grad_norm": 0.97265625, "learning_rate": 0.00015637763300471188, "loss": 0.9507, "step": 2166 }, { "epoch": 0.3302346845473941, "grad_norm": 0.921875, "learning_rate": 0.0001563368605807398, "loss": 1.0376, "step": 2167 }, { "epoch": 0.33038707711063703, "grad_norm": 0.96484375, "learning_rate": 0.00015629607443229835, "loss": 0.9564, "step": 2168 }, { "epoch": 0.3305394696738799, "grad_norm": 0.85546875, "learning_rate": 0.00015625527456932363, "loss": 0.928, "step": 2169 }, { "epoch": 0.3306918622371228, "grad_norm": 0.9453125, "learning_rate": 0.00015621446100175503, "loss": 1.011, "step": 2170 }, { "epoch": 0.3308442548003657, "grad_norm": 0.8828125, "learning_rate": 0.00015617363373953536, "loss": 0.9395, "step": 2171 }, { "epoch": 0.3309966473636087, "grad_norm": 1.2265625, "learning_rate": 0.00015613279279261074, "loss": 1.0711, "step": 2172 }, { "epoch": 0.3311490399268516, "grad_norm": 0.7578125, "learning_rate": 0.00015609193817093058, "loss": 1.016, "step": 2173 }, { "epoch": 0.33130143249009447, "grad_norm": 0.84375, "learning_rate": 0.00015605106988444764, "loss": 0.9065, "step": 2174 }, { "epoch": 0.33145382505333737, "grad_norm": 0.9375, "learning_rate": 0.0001560101879431181, "loss": 0.9812, "step": 2175 }, { "epoch": 0.3316062176165803, "grad_norm": 1.03125, "learning_rate": 0.0001559692923569013, "loss": 1.0566, "step": 2176 }, { "epoch": 0.3317586101798232, "grad_norm": 0.92578125, "learning_rate": 0.00015592838313576004, "loss": 0.9399, "step": 2177 }, { "epoch": 0.3319110027430661, "grad_norm": 1.0, "learning_rate": 0.00015588746028966037, "loss": 1.1038, "step": 2178 }, { "epoch": 0.3320633953063091, "grad_norm": 0.90625, "learning_rate": 0.0001558465238285717, "loss": 0.9825, "step": 2179 }, { "epoch": 0.33221578786955197, "grad_norm": 0.93359375, "learning_rate": 0.00015580557376246675, "loss": 0.9645, "step": 2180 }, { "epoch": 0.33236818043279487, "grad_norm": 0.953125, "learning_rate": 0.00015576461010132154, "loss": 0.92, "step": 2181 }, { "epoch": 0.33252057299603777, "grad_norm": 0.93359375, "learning_rate": 0.00015572363285511534, "loss": 0.9519, "step": 2182 }, { "epoch": 0.3326729655592807, "grad_norm": 1.140625, "learning_rate": 0.00015568264203383094, "loss": 1.0871, "step": 2183 }, { "epoch": 0.3328253581225236, "grad_norm": 0.734375, "learning_rate": 0.00015564163764745416, "loss": 0.8753, "step": 2184 }, { "epoch": 0.3329777506857665, "grad_norm": 0.68359375, "learning_rate": 0.00015560061970597432, "loss": 0.8627, "step": 2185 }, { "epoch": 0.33313014324900947, "grad_norm": 0.8203125, "learning_rate": 0.00015555958821938397, "loss": 0.8498, "step": 2186 }, { "epoch": 0.33328253581225237, "grad_norm": 1.0390625, "learning_rate": 0.00015551854319767897, "loss": 1.1254, "step": 2187 }, { "epoch": 0.33343492837549527, "grad_norm": 0.890625, "learning_rate": 0.00015547748465085848, "loss": 0.9991, "step": 2188 }, { "epoch": 0.33358732093873816, "grad_norm": 1.015625, "learning_rate": 0.00015543641258892492, "loss": 1.0083, "step": 2189 }, { "epoch": 0.3337397135019811, "grad_norm": 1.265625, "learning_rate": 0.0001553953270218841, "loss": 1.0025, "step": 2190 }, { "epoch": 0.333892106065224, "grad_norm": 0.90234375, "learning_rate": 0.00015535422795974498, "loss": 0.9679, "step": 2191 }, { "epoch": 0.3340444986284669, "grad_norm": 1.21875, "learning_rate": 0.00015531311541251995, "loss": 0.9936, "step": 2192 }, { "epoch": 0.33419689119170987, "grad_norm": 0.8828125, "learning_rate": 0.00015527198939022457, "loss": 0.8933, "step": 2193 }, { "epoch": 0.33434928375495276, "grad_norm": 1.046875, "learning_rate": 0.0001552308499028778, "loss": 1.1582, "step": 2194 }, { "epoch": 0.33450167631819566, "grad_norm": 1.0859375, "learning_rate": 0.0001551896969605017, "loss": 1.016, "step": 2195 }, { "epoch": 0.33465406888143856, "grad_norm": 0.7578125, "learning_rate": 0.0001551485305731218, "loss": 0.8317, "step": 2196 }, { "epoch": 0.3348064614446815, "grad_norm": 0.9453125, "learning_rate": 0.00015510735075076681, "loss": 1.0317, "step": 2197 }, { "epoch": 0.3349588540079244, "grad_norm": 0.9296875, "learning_rate": 0.0001550661575034687, "loss": 0.8957, "step": 2198 }, { "epoch": 0.3351112465711673, "grad_norm": 1.0625, "learning_rate": 0.00015502495084126277, "loss": 0.957, "step": 2199 }, { "epoch": 0.33526363913441026, "grad_norm": 0.6875, "learning_rate": 0.00015498373077418753, "loss": 0.9671, "step": 2200 }, { "epoch": 0.33541603169765316, "grad_norm": 0.90625, "learning_rate": 0.0001549424973122848, "loss": 1.068, "step": 2201 }, { "epoch": 0.33556842426089606, "grad_norm": 0.80078125, "learning_rate": 0.00015490125046559964, "loss": 0.983, "step": 2202 }, { "epoch": 0.33572081682413896, "grad_norm": 1.2109375, "learning_rate": 0.00015485999024418039, "loss": 1.0785, "step": 2203 }, { "epoch": 0.3358732093873819, "grad_norm": 0.984375, "learning_rate": 0.00015481871665807856, "loss": 1.1641, "step": 2204 }, { "epoch": 0.3360256019506248, "grad_norm": 0.8984375, "learning_rate": 0.0001547774297173491, "loss": 1.1834, "step": 2205 }, { "epoch": 0.3361779945138677, "grad_norm": 1.109375, "learning_rate": 0.00015473612943205, "loss": 0.9978, "step": 2206 }, { "epoch": 0.33633038707711066, "grad_norm": 0.91015625, "learning_rate": 0.00015469481581224272, "loss": 0.8564, "step": 2207 }, { "epoch": 0.33648277964035356, "grad_norm": 0.890625, "learning_rate": 0.00015465348886799173, "loss": 0.894, "step": 2208 }, { "epoch": 0.33663517220359646, "grad_norm": 0.87890625, "learning_rate": 0.00015461214860936493, "loss": 0.8472, "step": 2209 }, { "epoch": 0.33678756476683935, "grad_norm": 1.171875, "learning_rate": 0.0001545707950464334, "loss": 1.111, "step": 2210 }, { "epoch": 0.3369399573300823, "grad_norm": 0.9453125, "learning_rate": 0.00015452942818927143, "loss": 0.9173, "step": 2211 }, { "epoch": 0.3370923498933252, "grad_norm": 0.92578125, "learning_rate": 0.00015448804804795663, "loss": 0.9752, "step": 2212 }, { "epoch": 0.3372447424565681, "grad_norm": 0.93359375, "learning_rate": 0.00015444665463256976, "loss": 1.0086, "step": 2213 }, { "epoch": 0.33739713501981106, "grad_norm": 0.98046875, "learning_rate": 0.00015440524795319485, "loss": 0.8777, "step": 2214 }, { "epoch": 0.33754952758305395, "grad_norm": 1.0703125, "learning_rate": 0.00015436382801991923, "loss": 1.1377, "step": 2215 }, { "epoch": 0.33770192014629685, "grad_norm": 0.97265625, "learning_rate": 0.0001543223948428333, "loss": 1.0483, "step": 2216 }, { "epoch": 0.33785431270953975, "grad_norm": 1.1953125, "learning_rate": 0.00015428094843203082, "loss": 0.9766, "step": 2217 }, { "epoch": 0.3380067052727827, "grad_norm": 0.87109375, "learning_rate": 0.00015423948879760872, "loss": 0.8302, "step": 2218 }, { "epoch": 0.3381590978360256, "grad_norm": 1.625, "learning_rate": 0.00015419801594966718, "loss": 0.9906, "step": 2219 }, { "epoch": 0.3383114903992685, "grad_norm": 0.8359375, "learning_rate": 0.00015415652989830955, "loss": 0.9379, "step": 2220 }, { "epoch": 0.33846388296251145, "grad_norm": 1.140625, "learning_rate": 0.00015411503065364252, "loss": 1.1853, "step": 2221 }, { "epoch": 0.33861627552575435, "grad_norm": 1.1328125, "learning_rate": 0.00015407351822577578, "loss": 1.1835, "step": 2222 }, { "epoch": 0.33876866808899725, "grad_norm": 0.95703125, "learning_rate": 0.00015403199262482243, "loss": 0.833, "step": 2223 }, { "epoch": 0.33892106065224015, "grad_norm": 0.90625, "learning_rate": 0.0001539904538608987, "loss": 1.0473, "step": 2224 }, { "epoch": 0.3390734532154831, "grad_norm": 0.89453125, "learning_rate": 0.00015394890194412396, "loss": 1.1203, "step": 2225 }, { "epoch": 0.339225845778726, "grad_norm": 0.86328125, "learning_rate": 0.00015390733688462092, "loss": 1.0145, "step": 2226 }, { "epoch": 0.3393782383419689, "grad_norm": 1.3046875, "learning_rate": 0.00015386575869251543, "loss": 1.1815, "step": 2227 }, { "epoch": 0.33953063090521185, "grad_norm": 0.79296875, "learning_rate": 0.0001538241673779365, "loss": 0.9595, "step": 2228 }, { "epoch": 0.33968302346845475, "grad_norm": 1.109375, "learning_rate": 0.0001537825629510164, "loss": 0.9956, "step": 2229 }, { "epoch": 0.33983541603169765, "grad_norm": 0.99609375, "learning_rate": 0.00015374094542189054, "loss": 0.9343, "step": 2230 }, { "epoch": 0.33998780859494054, "grad_norm": 0.828125, "learning_rate": 0.00015369931480069761, "loss": 1.0098, "step": 2231 }, { "epoch": 0.3401402011581835, "grad_norm": 1.359375, "learning_rate": 0.00015365767109757936, "loss": 0.8496, "step": 2232 }, { "epoch": 0.3402925937214264, "grad_norm": 0.83203125, "learning_rate": 0.00015361601432268082, "loss": 0.9637, "step": 2233 }, { "epoch": 0.3404449862846693, "grad_norm": 0.9453125, "learning_rate": 0.0001535743444861502, "loss": 1.0433, "step": 2234 }, { "epoch": 0.34059737884791225, "grad_norm": 1.0625, "learning_rate": 0.00015353266159813883, "loss": 1.0151, "step": 2235 }, { "epoch": 0.34074977141115514, "grad_norm": 0.73828125, "learning_rate": 0.00015349096566880127, "loss": 0.8536, "step": 2236 }, { "epoch": 0.34090216397439804, "grad_norm": 0.984375, "learning_rate": 0.00015344925670829527, "loss": 0.9937, "step": 2237 }, { "epoch": 0.34105455653764094, "grad_norm": 0.87109375, "learning_rate": 0.00015340753472678172, "loss": 1.085, "step": 2238 }, { "epoch": 0.3412069491008839, "grad_norm": 1.078125, "learning_rate": 0.0001533657997344247, "loss": 1.152, "step": 2239 }, { "epoch": 0.3413593416641268, "grad_norm": 1.2890625, "learning_rate": 0.00015332405174139146, "loss": 0.9264, "step": 2240 }, { "epoch": 0.3415117342273697, "grad_norm": 0.74609375, "learning_rate": 0.00015328229075785242, "loss": 0.8176, "step": 2241 }, { "epoch": 0.34166412679061264, "grad_norm": 1.125, "learning_rate": 0.00015324051679398108, "loss": 1.1353, "step": 2242 }, { "epoch": 0.34181651935385554, "grad_norm": 0.81640625, "learning_rate": 0.0001531987298599543, "loss": 0.9197, "step": 2243 }, { "epoch": 0.34196891191709844, "grad_norm": 1.34375, "learning_rate": 0.00015315692996595187, "loss": 1.148, "step": 2244 }, { "epoch": 0.34212130448034134, "grad_norm": 0.9140625, "learning_rate": 0.0001531151171221569, "loss": 1.1638, "step": 2245 }, { "epoch": 0.3422736970435843, "grad_norm": 1.1953125, "learning_rate": 0.0001530732913387556, "loss": 1.1054, "step": 2246 }, { "epoch": 0.3424260896068272, "grad_norm": 1.0390625, "learning_rate": 0.00015303145262593728, "loss": 1.0542, "step": 2247 }, { "epoch": 0.3425784821700701, "grad_norm": 0.99609375, "learning_rate": 0.00015298960099389454, "loss": 1.0361, "step": 2248 }, { "epoch": 0.34273087473331304, "grad_norm": 1.140625, "learning_rate": 0.000152947736452823, "loss": 0.9025, "step": 2249 }, { "epoch": 0.34288326729655594, "grad_norm": 1.0546875, "learning_rate": 0.0001529058590129214, "loss": 1.0063, "step": 2250 }, { "epoch": 0.34303565985979884, "grad_norm": 0.91015625, "learning_rate": 0.00015286396868439183, "loss": 0.9637, "step": 2251 }, { "epoch": 0.34318805242304173, "grad_norm": 0.96875, "learning_rate": 0.00015282206547743922, "loss": 1.052, "step": 2252 }, { "epoch": 0.3433404449862847, "grad_norm": 0.9296875, "learning_rate": 0.0001527801494022719, "loss": 0.9796, "step": 2253 }, { "epoch": 0.3434928375495276, "grad_norm": 0.87109375, "learning_rate": 0.00015273822046910117, "loss": 0.8304, "step": 2254 }, { "epoch": 0.3436452301127705, "grad_norm": 0.81640625, "learning_rate": 0.0001526962786881416, "loss": 0.9468, "step": 2255 }, { "epoch": 0.34379762267601344, "grad_norm": 0.76953125, "learning_rate": 0.00015265432406961067, "loss": 0.8994, "step": 2256 }, { "epoch": 0.34395001523925633, "grad_norm": 0.82421875, "learning_rate": 0.00015261235662372927, "loss": 0.9554, "step": 2257 }, { "epoch": 0.34410240780249923, "grad_norm": 1.1484375, "learning_rate": 0.0001525703763607212, "loss": 0.9817, "step": 2258 }, { "epoch": 0.34425480036574213, "grad_norm": 1.109375, "learning_rate": 0.00015252838329081345, "loss": 0.9438, "step": 2259 }, { "epoch": 0.3444071929289851, "grad_norm": 0.9140625, "learning_rate": 0.00015248637742423622, "loss": 1.0409, "step": 2260 }, { "epoch": 0.344559585492228, "grad_norm": 0.828125, "learning_rate": 0.00015244435877122262, "loss": 0.777, "step": 2261 }, { "epoch": 0.3447119780554709, "grad_norm": 0.9375, "learning_rate": 0.00015240232734200907, "loss": 1.0022, "step": 2262 }, { "epoch": 0.34486437061871383, "grad_norm": 0.9296875, "learning_rate": 0.00015236028314683506, "loss": 1.0103, "step": 2263 }, { "epoch": 0.34501676318195673, "grad_norm": 1.0078125, "learning_rate": 0.00015231822619594308, "loss": 1.0418, "step": 2264 }, { "epoch": 0.34516915574519963, "grad_norm": 1.1328125, "learning_rate": 0.00015227615649957888, "loss": 1.0695, "step": 2265 }, { "epoch": 0.3453215483084425, "grad_norm": 0.73828125, "learning_rate": 0.0001522340740679912, "loss": 1.1287, "step": 2266 }, { "epoch": 0.3454739408716855, "grad_norm": 0.74609375, "learning_rate": 0.00015219197891143193, "loss": 0.9707, "step": 2267 }, { "epoch": 0.3456263334349284, "grad_norm": 0.73046875, "learning_rate": 0.0001521498710401561, "loss": 0.8827, "step": 2268 }, { "epoch": 0.3457787259981713, "grad_norm": 1.046875, "learning_rate": 0.0001521077504644217, "loss": 1.0704, "step": 2269 }, { "epoch": 0.34593111856141423, "grad_norm": 1.03125, "learning_rate": 0.00015206561719449, "loss": 0.9346, "step": 2270 }, { "epoch": 0.34608351112465713, "grad_norm": 1.0078125, "learning_rate": 0.0001520234712406252, "loss": 1.0257, "step": 2271 }, { "epoch": 0.3462359036879, "grad_norm": 0.953125, "learning_rate": 0.00015198131261309473, "loss": 0.9917, "step": 2272 }, { "epoch": 0.3463882962511429, "grad_norm": 0.94140625, "learning_rate": 0.00015193914132216897, "loss": 0.8207, "step": 2273 }, { "epoch": 0.3465406888143859, "grad_norm": 0.9453125, "learning_rate": 0.00015189695737812152, "loss": 1.042, "step": 2274 }, { "epoch": 0.3466930813776288, "grad_norm": 0.765625, "learning_rate": 0.00015185476079122897, "loss": 0.9764, "step": 2275 }, { "epoch": 0.3468454739408717, "grad_norm": 0.77734375, "learning_rate": 0.00015181255157177095, "loss": 0.9183, "step": 2276 }, { "epoch": 0.34699786650411457, "grad_norm": 1.1875, "learning_rate": 0.00015177032973003033, "loss": 1.0437, "step": 2277 }, { "epoch": 0.3471502590673575, "grad_norm": 0.9375, "learning_rate": 0.0001517280952762929, "loss": 1.1609, "step": 2278 }, { "epoch": 0.3473026516306004, "grad_norm": 1.0078125, "learning_rate": 0.00015168584822084763, "loss": 0.7888, "step": 2279 }, { "epoch": 0.3474550441938433, "grad_norm": 0.73046875, "learning_rate": 0.00015164358857398645, "loss": 0.9627, "step": 2280 }, { "epoch": 0.3476074367570863, "grad_norm": 0.9765625, "learning_rate": 0.00015160131634600442, "loss": 1.0505, "step": 2281 }, { "epoch": 0.3477598293203292, "grad_norm": 0.90234375, "learning_rate": 0.00015155903154719975, "loss": 0.943, "step": 2282 }, { "epoch": 0.34791222188357207, "grad_norm": 0.9765625, "learning_rate": 0.0001515167341878735, "loss": 0.9968, "step": 2283 }, { "epoch": 0.34806461444681497, "grad_norm": 1.1015625, "learning_rate": 0.00015147442427833002, "loss": 0.9911, "step": 2284 }, { "epoch": 0.3482170070100579, "grad_norm": 1.109375, "learning_rate": 0.00015143210182887656, "loss": 0.9441, "step": 2285 }, { "epoch": 0.3483693995733008, "grad_norm": 1.046875, "learning_rate": 0.00015138976684982348, "loss": 1.0931, "step": 2286 }, { "epoch": 0.3485217921365437, "grad_norm": 1.09375, "learning_rate": 0.0001513474193514842, "loss": 1.1364, "step": 2287 }, { "epoch": 0.34867418469978667, "grad_norm": 0.953125, "learning_rate": 0.00015130505934417518, "loss": 1.0299, "step": 2288 }, { "epoch": 0.34882657726302957, "grad_norm": 0.77734375, "learning_rate": 0.00015126268683821593, "loss": 1.0118, "step": 2289 }, { "epoch": 0.34897896982627247, "grad_norm": 0.96484375, "learning_rate": 0.00015122030184392898, "loss": 0.8238, "step": 2290 }, { "epoch": 0.34913136238951537, "grad_norm": 0.91796875, "learning_rate": 0.00015117790437163997, "loss": 0.922, "step": 2291 }, { "epoch": 0.3492837549527583, "grad_norm": 1.0, "learning_rate": 0.00015113549443167748, "loss": 1.0041, "step": 2292 }, { "epoch": 0.3494361475160012, "grad_norm": 0.7734375, "learning_rate": 0.0001510930720343732, "loss": 0.8364, "step": 2293 }, { "epoch": 0.3495885400792441, "grad_norm": 0.80078125, "learning_rate": 0.00015105063719006182, "loss": 1.0569, "step": 2294 }, { "epoch": 0.34974093264248707, "grad_norm": 1.0859375, "learning_rate": 0.0001510081899090811, "loss": 1.0083, "step": 2295 }, { "epoch": 0.34989332520572997, "grad_norm": 0.94921875, "learning_rate": 0.00015096573020177188, "loss": 1.0572, "step": 2296 }, { "epoch": 0.35004571776897286, "grad_norm": 0.83984375, "learning_rate": 0.00015092325807847782, "loss": 0.8305, "step": 2297 }, { "epoch": 0.35019811033221576, "grad_norm": 1.015625, "learning_rate": 0.0001508807735495458, "loss": 1.0375, "step": 2298 }, { "epoch": 0.3503505028954587, "grad_norm": 1.1328125, "learning_rate": 0.0001508382766253257, "loss": 1.0209, "step": 2299 }, { "epoch": 0.3505028954587016, "grad_norm": 0.98046875, "learning_rate": 0.00015079576731617033, "loss": 0.8294, "step": 2300 }, { "epoch": 0.3506552880219445, "grad_norm": 0.88671875, "learning_rate": 0.00015075324563243556, "loss": 1.1599, "step": 2301 }, { "epoch": 0.35080768058518746, "grad_norm": 0.94921875, "learning_rate": 0.00015071071158448036, "loss": 1.0029, "step": 2302 }, { "epoch": 0.35096007314843036, "grad_norm": 0.921875, "learning_rate": 0.00015066816518266655, "loss": 0.9073, "step": 2303 }, { "epoch": 0.35111246571167326, "grad_norm": 0.85546875, "learning_rate": 0.00015062560643735909, "loss": 1.0147, "step": 2304 }, { "epoch": 0.35126485827491616, "grad_norm": 1.0625, "learning_rate": 0.0001505830353589259, "loss": 0.8946, "step": 2305 }, { "epoch": 0.3514172508381591, "grad_norm": 1.2109375, "learning_rate": 0.0001505404519577379, "loss": 0.9264, "step": 2306 }, { "epoch": 0.351569643401402, "grad_norm": 1.046875, "learning_rate": 0.000150497856244169, "loss": 0.9504, "step": 2307 }, { "epoch": 0.3517220359646449, "grad_norm": 0.9609375, "learning_rate": 0.00015045524822859615, "loss": 1.2397, "step": 2308 }, { "epoch": 0.35187442852788786, "grad_norm": 1.0, "learning_rate": 0.00015041262792139928, "loss": 0.9998, "step": 2309 }, { "epoch": 0.35202682109113076, "grad_norm": 1.046875, "learning_rate": 0.0001503699953329613, "loss": 0.8013, "step": 2310 }, { "epoch": 0.35217921365437366, "grad_norm": 0.92578125, "learning_rate": 0.0001503273504736681, "loss": 0.984, "step": 2311 }, { "epoch": 0.35233160621761656, "grad_norm": 0.9296875, "learning_rate": 0.0001502846933539086, "loss": 0.9662, "step": 2312 }, { "epoch": 0.3524839987808595, "grad_norm": 1.09375, "learning_rate": 0.00015024202398407477, "loss": 0.9669, "step": 2313 }, { "epoch": 0.3526363913441024, "grad_norm": 1.3984375, "learning_rate": 0.00015019934237456132, "loss": 0.9737, "step": 2314 }, { "epoch": 0.3527887839073453, "grad_norm": 1.0859375, "learning_rate": 0.00015015664853576622, "loss": 0.9463, "step": 2315 }, { "epoch": 0.35294117647058826, "grad_norm": 0.9140625, "learning_rate": 0.00015011394247809029, "loss": 0.9389, "step": 2316 }, { "epoch": 0.35309356903383116, "grad_norm": 0.84765625, "learning_rate": 0.00015007122421193729, "loss": 1.0504, "step": 2317 }, { "epoch": 0.35324596159707405, "grad_norm": 1.125, "learning_rate": 0.00015002849374771401, "loss": 1.081, "step": 2318 }, { "epoch": 0.35339835416031695, "grad_norm": 0.91796875, "learning_rate": 0.00014998575109583027, "loss": 0.9223, "step": 2319 }, { "epoch": 0.3535507467235599, "grad_norm": 0.97265625, "learning_rate": 0.00014994299626669877, "loss": 1.043, "step": 2320 }, { "epoch": 0.3537031392868028, "grad_norm": 1.09375, "learning_rate": 0.00014990022927073517, "loss": 0.9279, "step": 2321 }, { "epoch": 0.3538555318500457, "grad_norm": 1.21875, "learning_rate": 0.00014985745011835815, "loss": 1.1361, "step": 2322 }, { "epoch": 0.35400792441328865, "grad_norm": 0.7265625, "learning_rate": 0.0001498146588199893, "loss": 0.9451, "step": 2323 }, { "epoch": 0.35416031697653155, "grad_norm": 1.0234375, "learning_rate": 0.00014977185538605323, "loss": 0.853, "step": 2324 }, { "epoch": 0.35431270953977445, "grad_norm": 1.1328125, "learning_rate": 0.00014972903982697744, "loss": 0.9508, "step": 2325 }, { "epoch": 0.35446510210301735, "grad_norm": 0.83203125, "learning_rate": 0.00014968621215319242, "loss": 1.1311, "step": 2326 }, { "epoch": 0.3546174946662603, "grad_norm": 1.1015625, "learning_rate": 0.00014964337237513166, "loss": 1.0668, "step": 2327 }, { "epoch": 0.3547698872295032, "grad_norm": 0.72265625, "learning_rate": 0.00014960052050323145, "loss": 0.9286, "step": 2328 }, { "epoch": 0.3549222797927461, "grad_norm": 0.8984375, "learning_rate": 0.0001495576565479312, "loss": 0.9911, "step": 2329 }, { "epoch": 0.35507467235598905, "grad_norm": 0.96484375, "learning_rate": 0.0001495147805196732, "loss": 0.9659, "step": 2330 }, { "epoch": 0.35522706491923195, "grad_norm": 0.90234375, "learning_rate": 0.0001494718924289026, "loss": 0.816, "step": 2331 }, { "epoch": 0.35537945748247485, "grad_norm": 0.87890625, "learning_rate": 0.00014942899228606758, "loss": 0.9509, "step": 2332 }, { "epoch": 0.35553185004571775, "grad_norm": 0.90625, "learning_rate": 0.00014938608010161926, "loss": 1.0248, "step": 2333 }, { "epoch": 0.3556842426089607, "grad_norm": 0.9921875, "learning_rate": 0.00014934315588601161, "loss": 0.9894, "step": 2334 }, { "epoch": 0.3558366351722036, "grad_norm": 0.9921875, "learning_rate": 0.00014930021964970164, "loss": 0.9037, "step": 2335 }, { "epoch": 0.3559890277354465, "grad_norm": 0.80078125, "learning_rate": 0.0001492572714031492, "loss": 0.957, "step": 2336 }, { "epoch": 0.35614142029868945, "grad_norm": 1.2265625, "learning_rate": 0.00014921431115681713, "loss": 0.8711, "step": 2337 }, { "epoch": 0.35629381286193235, "grad_norm": 1.140625, "learning_rate": 0.00014917133892117116, "loss": 1.0854, "step": 2338 }, { "epoch": 0.35644620542517524, "grad_norm": 0.83984375, "learning_rate": 0.00014912835470667988, "loss": 0.8225, "step": 2339 }, { "epoch": 0.35659859798841814, "grad_norm": 1.0078125, "learning_rate": 0.00014908535852381497, "loss": 0.9383, "step": 2340 }, { "epoch": 0.3567509905516611, "grad_norm": 0.828125, "learning_rate": 0.00014904235038305083, "loss": 1.0533, "step": 2341 }, { "epoch": 0.356903383114904, "grad_norm": 1.0703125, "learning_rate": 0.0001489993302948649, "loss": 1.0315, "step": 2342 }, { "epoch": 0.3570557756781469, "grad_norm": 0.859375, "learning_rate": 0.00014895629826973747, "loss": 0.8932, "step": 2343 }, { "epoch": 0.35720816824138985, "grad_norm": 1.0078125, "learning_rate": 0.00014891325431815183, "loss": 0.87, "step": 2344 }, { "epoch": 0.35736056080463274, "grad_norm": 1.078125, "learning_rate": 0.00014887019845059401, "loss": 1.1612, "step": 2345 }, { "epoch": 0.35751295336787564, "grad_norm": 0.87109375, "learning_rate": 0.0001488271306775531, "loss": 0.9618, "step": 2346 }, { "epoch": 0.35766534593111854, "grad_norm": 0.6484375, "learning_rate": 0.00014878405100952102, "loss": 0.8885, "step": 2347 }, { "epoch": 0.3578177384943615, "grad_norm": 1.609375, "learning_rate": 0.00014874095945699255, "loss": 1.1048, "step": 2348 }, { "epoch": 0.3579701310576044, "grad_norm": 0.96875, "learning_rate": 0.00014869785603046544, "loss": 1.2317, "step": 2349 }, { "epoch": 0.3581225236208473, "grad_norm": 1.1328125, "learning_rate": 0.00014865474074044036, "loss": 0.9561, "step": 2350 }, { "epoch": 0.35827491618409024, "grad_norm": 0.9609375, "learning_rate": 0.00014861161359742075, "loss": 1.0932, "step": 2351 }, { "epoch": 0.35842730874733314, "grad_norm": 0.92578125, "learning_rate": 0.00014856847461191302, "loss": 1.0969, "step": 2352 }, { "epoch": 0.35857970131057604, "grad_norm": 1.125, "learning_rate": 0.00014852532379442646, "loss": 0.9148, "step": 2353 }, { "epoch": 0.35873209387381894, "grad_norm": 0.75390625, "learning_rate": 0.0001484821611554732, "loss": 1.034, "step": 2354 }, { "epoch": 0.3588844864370619, "grad_norm": 0.80859375, "learning_rate": 0.0001484389867055683, "loss": 0.9027, "step": 2355 }, { "epoch": 0.3590368790003048, "grad_norm": 0.90625, "learning_rate": 0.00014839580045522966, "loss": 0.7919, "step": 2356 }, { "epoch": 0.3591892715635477, "grad_norm": 0.87109375, "learning_rate": 0.00014835260241497812, "loss": 0.7608, "step": 2357 }, { "epoch": 0.35934166412679064, "grad_norm": 0.828125, "learning_rate": 0.0001483093925953373, "loss": 0.8627, "step": 2358 }, { "epoch": 0.35949405669003354, "grad_norm": 0.93359375, "learning_rate": 0.0001482661710068337, "loss": 0.9469, "step": 2359 }, { "epoch": 0.35964644925327643, "grad_norm": 1.046875, "learning_rate": 0.00014822293765999678, "loss": 0.9006, "step": 2360 }, { "epoch": 0.35979884181651933, "grad_norm": 0.9765625, "learning_rate": 0.00014817969256535881, "loss": 0.76, "step": 2361 }, { "epoch": 0.3599512343797623, "grad_norm": 0.8203125, "learning_rate": 0.00014813643573345487, "loss": 0.9835, "step": 2362 }, { "epoch": 0.3601036269430052, "grad_norm": 0.84765625, "learning_rate": 0.000148093167174823, "loss": 1.0365, "step": 2363 }, { "epoch": 0.3602560195062481, "grad_norm": 1.09375, "learning_rate": 0.00014804988690000402, "loss": 0.9249, "step": 2364 }, { "epoch": 0.36040841206949104, "grad_norm": 1.015625, "learning_rate": 0.0001480065949195416, "loss": 1.1759, "step": 2365 }, { "epoch": 0.36056080463273393, "grad_norm": 0.9765625, "learning_rate": 0.0001479632912439823, "loss": 1.045, "step": 2366 }, { "epoch": 0.36071319719597683, "grad_norm": 0.8203125, "learning_rate": 0.00014791997588387554, "loss": 0.8358, "step": 2367 }, { "epoch": 0.36086558975921973, "grad_norm": 1.03125, "learning_rate": 0.00014787664884977354, "loss": 1.0797, "step": 2368 }, { "epoch": 0.3610179823224627, "grad_norm": 0.9765625, "learning_rate": 0.00014783331015223142, "loss": 0.98, "step": 2369 }, { "epoch": 0.3611703748857056, "grad_norm": 0.83203125, "learning_rate": 0.00014778995980180707, "loss": 1.0067, "step": 2370 }, { "epoch": 0.3613227674489485, "grad_norm": 0.8203125, "learning_rate": 0.0001477465978090613, "loss": 0.7713, "step": 2371 }, { "epoch": 0.36147516001219143, "grad_norm": 0.68359375, "learning_rate": 0.0001477032241845577, "loss": 0.8394, "step": 2372 }, { "epoch": 0.36162755257543433, "grad_norm": 0.890625, "learning_rate": 0.00014765983893886268, "loss": 0.836, "step": 2373 }, { "epoch": 0.36177994513867723, "grad_norm": 1.1640625, "learning_rate": 0.00014761644208254556, "loss": 1.0503, "step": 2374 }, { "epoch": 0.3619323377019201, "grad_norm": 0.7578125, "learning_rate": 0.0001475730336261784, "loss": 0.7296, "step": 2375 }, { "epoch": 0.3620847302651631, "grad_norm": 1.0625, "learning_rate": 0.00014752961358033612, "loss": 0.8972, "step": 2376 }, { "epoch": 0.362237122828406, "grad_norm": 1.125, "learning_rate": 0.00014748618195559648, "loss": 1.0307, "step": 2377 }, { "epoch": 0.3623895153916489, "grad_norm": 0.91015625, "learning_rate": 0.00014744273876254012, "loss": 0.7788, "step": 2378 }, { "epoch": 0.36254190795489183, "grad_norm": 0.953125, "learning_rate": 0.0001473992840117503, "loss": 0.8983, "step": 2379 }, { "epoch": 0.3626943005181347, "grad_norm": 0.7734375, "learning_rate": 0.00014735581771381328, "loss": 0.8627, "step": 2380 }, { "epoch": 0.3628466930813776, "grad_norm": 0.84375, "learning_rate": 0.0001473123398793181, "loss": 0.9665, "step": 2381 }, { "epoch": 0.3629990856446205, "grad_norm": 0.97265625, "learning_rate": 0.00014726885051885653, "loss": 1.0167, "step": 2382 }, { "epoch": 0.3631514782078635, "grad_norm": 0.9140625, "learning_rate": 0.00014722534964302326, "loss": 1.013, "step": 2383 }, { "epoch": 0.3633038707711064, "grad_norm": 0.875, "learning_rate": 0.00014718183726241567, "loss": 0.9562, "step": 2384 }, { "epoch": 0.36345626333434927, "grad_norm": 1.1640625, "learning_rate": 0.0001471383133876341, "loss": 1.0942, "step": 2385 }, { "epoch": 0.36360865589759217, "grad_norm": 0.96875, "learning_rate": 0.00014709477802928147, "loss": 0.9725, "step": 2386 }, { "epoch": 0.3637610484608351, "grad_norm": 1.03125, "learning_rate": 0.0001470512311979637, "loss": 0.957, "step": 2387 }, { "epoch": 0.363913441024078, "grad_norm": 0.796875, "learning_rate": 0.00014700767290428942, "loss": 0.9019, "step": 2388 }, { "epoch": 0.3640658335873209, "grad_norm": 1.1171875, "learning_rate": 0.00014696410315887, "loss": 1.0607, "step": 2389 }, { "epoch": 0.3642182261505639, "grad_norm": 1.296875, "learning_rate": 0.0001469205219723197, "loss": 0.9371, "step": 2390 }, { "epoch": 0.36437061871380677, "grad_norm": 1.015625, "learning_rate": 0.00014687692935525553, "loss": 0.9223, "step": 2391 }, { "epoch": 0.36452301127704967, "grad_norm": 0.90234375, "learning_rate": 0.00014683332531829728, "loss": 0.9638, "step": 2392 }, { "epoch": 0.36467540384029257, "grad_norm": 0.9375, "learning_rate": 0.0001467897098720675, "loss": 0.805, "step": 2393 }, { "epoch": 0.3648277964035355, "grad_norm": 0.890625, "learning_rate": 0.00014674608302719152, "loss": 0.8709, "step": 2394 }, { "epoch": 0.3649801889667784, "grad_norm": 1.03125, "learning_rate": 0.00014670244479429752, "loss": 0.9158, "step": 2395 }, { "epoch": 0.3651325815300213, "grad_norm": 0.9453125, "learning_rate": 0.00014665879518401638, "loss": 0.8652, "step": 2396 }, { "epoch": 0.36528497409326427, "grad_norm": 1.140625, "learning_rate": 0.00014661513420698174, "loss": 1.0997, "step": 2397 }, { "epoch": 0.36543736665650717, "grad_norm": 1.125, "learning_rate": 0.0001465714618738301, "loss": 1.0139, "step": 2398 }, { "epoch": 0.36558975921975007, "grad_norm": 0.8046875, "learning_rate": 0.0001465277781952006, "loss": 0.9091, "step": 2399 }, { "epoch": 0.36574215178299296, "grad_norm": 0.8828125, "learning_rate": 0.0001464840831817353, "loss": 0.8397, "step": 2400 }, { "epoch": 0.3658945443462359, "grad_norm": 1.1640625, "learning_rate": 0.00014644037684407882, "loss": 1.0972, "step": 2401 }, { "epoch": 0.3660469369094788, "grad_norm": 1.03125, "learning_rate": 0.00014639665919287878, "loss": 1.1362, "step": 2402 }, { "epoch": 0.3661993294727217, "grad_norm": 0.79296875, "learning_rate": 0.00014635293023878535, "loss": 0.9307, "step": 2403 }, { "epoch": 0.36635172203596467, "grad_norm": 1.25, "learning_rate": 0.00014630918999245152, "loss": 0.9596, "step": 2404 }, { "epoch": 0.36650411459920756, "grad_norm": 0.921875, "learning_rate": 0.0001462654384645331, "loss": 0.9947, "step": 2405 }, { "epoch": 0.36665650716245046, "grad_norm": 0.90625, "learning_rate": 0.00014622167566568853, "loss": 0.8423, "step": 2406 }, { "epoch": 0.36680889972569336, "grad_norm": 0.91796875, "learning_rate": 0.0001461779016065791, "loss": 0.9458, "step": 2407 }, { "epoch": 0.3669612922889363, "grad_norm": 1.171875, "learning_rate": 0.0001461341162978688, "loss": 0.8029, "step": 2408 }, { "epoch": 0.3671136848521792, "grad_norm": 0.8828125, "learning_rate": 0.00014609031975022432, "loss": 1.0172, "step": 2409 }, { "epoch": 0.3672660774154221, "grad_norm": 1.0703125, "learning_rate": 0.00014604651197431517, "loss": 1.0966, "step": 2410 }, { "epoch": 0.36741846997866506, "grad_norm": 0.984375, "learning_rate": 0.00014600269298081358, "loss": 0.9816, "step": 2411 }, { "epoch": 0.36757086254190796, "grad_norm": 0.890625, "learning_rate": 0.00014595886278039442, "loss": 1.0814, "step": 2412 }, { "epoch": 0.36772325510515086, "grad_norm": 0.86328125, "learning_rate": 0.00014591502138373537, "loss": 1.0429, "step": 2413 }, { "epoch": 0.36787564766839376, "grad_norm": 0.83984375, "learning_rate": 0.00014587116880151685, "loss": 1.0346, "step": 2414 }, { "epoch": 0.3680280402316367, "grad_norm": 0.8203125, "learning_rate": 0.00014582730504442197, "loss": 0.9523, "step": 2415 }, { "epoch": 0.3681804327948796, "grad_norm": 1.4453125, "learning_rate": 0.0001457834301231366, "loss": 0.9995, "step": 2416 }, { "epoch": 0.3683328253581225, "grad_norm": 0.8671875, "learning_rate": 0.00014573954404834927, "loss": 1.075, "step": 2417 }, { "epoch": 0.36848521792136546, "grad_norm": 0.9140625, "learning_rate": 0.00014569564683075123, "loss": 0.9546, "step": 2418 }, { "epoch": 0.36863761048460836, "grad_norm": 0.8359375, "learning_rate": 0.00014565173848103653, "loss": 0.9907, "step": 2419 }, { "epoch": 0.36879000304785126, "grad_norm": 1.0, "learning_rate": 0.00014560781900990185, "loss": 0.9488, "step": 2420 }, { "epoch": 0.36894239561109415, "grad_norm": 1.1953125, "learning_rate": 0.00014556388842804657, "loss": 1.0137, "step": 2421 }, { "epoch": 0.3690947881743371, "grad_norm": 1.09375, "learning_rate": 0.0001455199467461729, "loss": 0.9965, "step": 2422 }, { "epoch": 0.36924718073758, "grad_norm": 1.0390625, "learning_rate": 0.00014547599397498562, "loss": 0.9179, "step": 2423 }, { "epoch": 0.3693995733008229, "grad_norm": 1.0234375, "learning_rate": 0.00014543203012519222, "loss": 1.0591, "step": 2424 }, { "epoch": 0.36955196586406586, "grad_norm": 1.1484375, "learning_rate": 0.00014538805520750297, "loss": 0.9815, "step": 2425 }, { "epoch": 0.36970435842730875, "grad_norm": 1.0546875, "learning_rate": 0.00014534406923263077, "loss": 1.1159, "step": 2426 }, { "epoch": 0.36985675099055165, "grad_norm": 0.83984375, "learning_rate": 0.00014530007221129124, "loss": 0.9628, "step": 2427 }, { "epoch": 0.37000914355379455, "grad_norm": 0.98046875, "learning_rate": 0.0001452560641542027, "loss": 0.9323, "step": 2428 }, { "epoch": 0.3701615361170375, "grad_norm": 1.0390625, "learning_rate": 0.00014521204507208615, "loss": 0.9867, "step": 2429 }, { "epoch": 0.3703139286802804, "grad_norm": 1.140625, "learning_rate": 0.00014516801497566523, "loss": 1.2031, "step": 2430 }, { "epoch": 0.3704663212435233, "grad_norm": 0.86328125, "learning_rate": 0.00014512397387566638, "loss": 0.9587, "step": 2431 }, { "epoch": 0.37061871380676625, "grad_norm": 1.0546875, "learning_rate": 0.00014507992178281856, "loss": 0.935, "step": 2432 }, { "epoch": 0.37077110637000915, "grad_norm": 0.8671875, "learning_rate": 0.00014503585870785353, "loss": 1.1898, "step": 2433 }, { "epoch": 0.37092349893325205, "grad_norm": 1.234375, "learning_rate": 0.00014499178466150571, "loss": 1.0886, "step": 2434 }, { "epoch": 0.37107589149649495, "grad_norm": 0.9296875, "learning_rate": 0.00014494769965451215, "loss": 0.9582, "step": 2435 }, { "epoch": 0.3712282840597379, "grad_norm": 1.0078125, "learning_rate": 0.0001449036036976126, "loss": 0.9235, "step": 2436 }, { "epoch": 0.3713806766229808, "grad_norm": 1.0703125, "learning_rate": 0.00014485949680154947, "loss": 0.9187, "step": 2437 }, { "epoch": 0.3715330691862237, "grad_norm": 1.1796875, "learning_rate": 0.0001448153789770678, "loss": 0.9772, "step": 2438 }, { "epoch": 0.37168546174946665, "grad_norm": 0.8984375, "learning_rate": 0.00014477125023491536, "loss": 1.0885, "step": 2439 }, { "epoch": 0.37183785431270955, "grad_norm": 0.91015625, "learning_rate": 0.00014472711058584255, "loss": 0.8823, "step": 2440 }, { "epoch": 0.37199024687595245, "grad_norm": 0.9296875, "learning_rate": 0.00014468296004060242, "loss": 1.0759, "step": 2441 }, { "epoch": 0.37214263943919534, "grad_norm": 0.890625, "learning_rate": 0.00014463879860995068, "loss": 0.7902, "step": 2442 }, { "epoch": 0.3722950320024383, "grad_norm": 0.953125, "learning_rate": 0.00014459462630464566, "loss": 0.7203, "step": 2443 }, { "epoch": 0.3724474245656812, "grad_norm": 1.109375, "learning_rate": 0.00014455044313544838, "loss": 1.0739, "step": 2444 }, { "epoch": 0.3725998171289241, "grad_norm": 0.8671875, "learning_rate": 0.00014450624911312255, "loss": 0.9627, "step": 2445 }, { "epoch": 0.37275220969216705, "grad_norm": 0.9296875, "learning_rate": 0.00014446204424843438, "loss": 0.8684, "step": 2446 }, { "epoch": 0.37290460225540994, "grad_norm": 0.9765625, "learning_rate": 0.00014441782855215287, "loss": 0.7565, "step": 2447 }, { "epoch": 0.37305699481865284, "grad_norm": 0.94921875, "learning_rate": 0.0001443736020350496, "loss": 0.9713, "step": 2448 }, { "epoch": 0.37320938738189574, "grad_norm": 0.91015625, "learning_rate": 0.00014432936470789876, "loss": 0.9381, "step": 2449 }, { "epoch": 0.3733617799451387, "grad_norm": 0.89453125, "learning_rate": 0.00014428511658147718, "loss": 0.9204, "step": 2450 }, { "epoch": 0.3735141725083816, "grad_norm": 1.140625, "learning_rate": 0.0001442408576665644, "loss": 0.8903, "step": 2451 }, { "epoch": 0.3736665650716245, "grad_norm": 0.8046875, "learning_rate": 0.00014419658797394248, "loss": 0.7762, "step": 2452 }, { "epoch": 0.37381895763486744, "grad_norm": 0.9296875, "learning_rate": 0.0001441523075143962, "loss": 1.145, "step": 2453 }, { "epoch": 0.37397135019811034, "grad_norm": 1.03125, "learning_rate": 0.0001441080162987129, "loss": 0.9947, "step": 2454 }, { "epoch": 0.37412374276135324, "grad_norm": 0.87109375, "learning_rate": 0.00014406371433768248, "loss": 0.9629, "step": 2455 }, { "epoch": 0.37427613532459614, "grad_norm": 0.98046875, "learning_rate": 0.00014401940164209765, "loss": 0.7706, "step": 2456 }, { "epoch": 0.3744285278878391, "grad_norm": 0.9765625, "learning_rate": 0.00014397507822275357, "loss": 0.7983, "step": 2457 }, { "epoch": 0.374580920451082, "grad_norm": 0.70703125, "learning_rate": 0.00014393074409044802, "loss": 0.9731, "step": 2458 }, { "epoch": 0.3747333130143249, "grad_norm": 1.3046875, "learning_rate": 0.00014388639925598152, "loss": 1.0697, "step": 2459 }, { "epoch": 0.37488570557756784, "grad_norm": 0.73046875, "learning_rate": 0.00014384204373015704, "loss": 0.8745, "step": 2460 }, { "epoch": 0.37503809814081074, "grad_norm": 1.0234375, "learning_rate": 0.00014379767752378025, "loss": 1.0431, "step": 2461 }, { "epoch": 0.37519049070405364, "grad_norm": 0.88671875, "learning_rate": 0.0001437533006476594, "loss": 1.0107, "step": 2462 }, { "epoch": 0.37534288326729653, "grad_norm": 0.98046875, "learning_rate": 0.00014370891311260532, "loss": 1.0669, "step": 2463 }, { "epoch": 0.3754952758305395, "grad_norm": 0.98046875, "learning_rate": 0.00014366451492943148, "loss": 0.9505, "step": 2464 }, { "epoch": 0.3756476683937824, "grad_norm": 1.078125, "learning_rate": 0.00014362010610895386, "loss": 0.9437, "step": 2465 }, { "epoch": 0.3758000609570253, "grad_norm": 0.9375, "learning_rate": 0.00014357568666199114, "loss": 0.9869, "step": 2466 }, { "epoch": 0.37595245352026824, "grad_norm": 1.09375, "learning_rate": 0.00014353125659936452, "loss": 1.0026, "step": 2467 }, { "epoch": 0.37610484608351114, "grad_norm": 1.0, "learning_rate": 0.00014348681593189783, "loss": 0.8941, "step": 2468 }, { "epoch": 0.37625723864675403, "grad_norm": 0.87890625, "learning_rate": 0.00014344236467041737, "loss": 1.1523, "step": 2469 }, { "epoch": 0.37640963120999693, "grad_norm": 0.9375, "learning_rate": 0.0001433979028257522, "loss": 0.8897, "step": 2470 }, { "epoch": 0.3765620237732399, "grad_norm": 0.76171875, "learning_rate": 0.00014335343040873384, "loss": 0.9929, "step": 2471 }, { "epoch": 0.3767144163364828, "grad_norm": 0.73828125, "learning_rate": 0.0001433089474301964, "loss": 0.8654, "step": 2472 }, { "epoch": 0.3768668088997257, "grad_norm": 1.3828125, "learning_rate": 0.00014326445390097653, "loss": 0.9943, "step": 2473 }, { "epoch": 0.37701920146296863, "grad_norm": 0.953125, "learning_rate": 0.00014321994983191357, "loss": 0.971, "step": 2474 }, { "epoch": 0.37717159402621153, "grad_norm": 1.0078125, "learning_rate": 0.00014317543523384928, "loss": 1.1219, "step": 2475 }, { "epoch": 0.37732398658945443, "grad_norm": 0.7109375, "learning_rate": 0.00014313091011762815, "loss": 0.9225, "step": 2476 }, { "epoch": 0.37747637915269733, "grad_norm": 0.93359375, "learning_rate": 0.00014308637449409706, "loss": 0.953, "step": 2477 }, { "epoch": 0.3776287717159403, "grad_norm": 1.0703125, "learning_rate": 0.0001430418283741055, "loss": 0.9247, "step": 2478 }, { "epoch": 0.3777811642791832, "grad_norm": 0.83984375, "learning_rate": 0.00014299727176850566, "loss": 0.9244, "step": 2479 }, { "epoch": 0.3779335568424261, "grad_norm": 0.7578125, "learning_rate": 0.0001429527046881521, "loss": 0.989, "step": 2480 }, { "epoch": 0.37808594940566903, "grad_norm": 1.078125, "learning_rate": 0.00014290812714390198, "loss": 1.0629, "step": 2481 }, { "epoch": 0.37823834196891193, "grad_norm": 0.96875, "learning_rate": 0.00014286353914661507, "loss": 1.103, "step": 2482 }, { "epoch": 0.3783907345321548, "grad_norm": 1.09375, "learning_rate": 0.00014281894070715363, "loss": 0.9564, "step": 2483 }, { "epoch": 0.3785431270953977, "grad_norm": 1.046875, "learning_rate": 0.00014277433183638248, "loss": 0.9796, "step": 2484 }, { "epoch": 0.3786955196586407, "grad_norm": 1.0390625, "learning_rate": 0.000142729712545169, "loss": 1.0574, "step": 2485 }, { "epoch": 0.3788479122218836, "grad_norm": 0.74609375, "learning_rate": 0.00014268508284438305, "loss": 0.8665, "step": 2486 }, { "epoch": 0.3790003047851265, "grad_norm": 0.92578125, "learning_rate": 0.00014264044274489717, "loss": 0.9835, "step": 2487 }, { "epoch": 0.37915269734836937, "grad_norm": 1.078125, "learning_rate": 0.0001425957922575862, "loss": 1.0475, "step": 2488 }, { "epoch": 0.3793050899116123, "grad_norm": 1.03125, "learning_rate": 0.0001425511313933277, "loss": 0.9904, "step": 2489 }, { "epoch": 0.3794574824748552, "grad_norm": 0.85546875, "learning_rate": 0.0001425064601630017, "loss": 0.9477, "step": 2490 }, { "epoch": 0.3796098750380981, "grad_norm": 0.91796875, "learning_rate": 0.00014246177857749076, "loss": 0.9912, "step": 2491 }, { "epoch": 0.3797622676013411, "grad_norm": 1.296875, "learning_rate": 0.00014241708664767993, "loss": 1.0108, "step": 2492 }, { "epoch": 0.379914660164584, "grad_norm": 1.2265625, "learning_rate": 0.00014237238438445684, "loss": 0.9743, "step": 2493 }, { "epoch": 0.38006705272782687, "grad_norm": 1.109375, "learning_rate": 0.0001423276717987116, "loss": 1.1406, "step": 2494 }, { "epoch": 0.38021944529106977, "grad_norm": 1.1328125, "learning_rate": 0.0001422829489013368, "loss": 1.0216, "step": 2495 }, { "epoch": 0.3803718378543127, "grad_norm": 0.87109375, "learning_rate": 0.00014223821570322762, "loss": 0.8725, "step": 2496 }, { "epoch": 0.3805242304175556, "grad_norm": 0.91015625, "learning_rate": 0.00014219347221528167, "loss": 0.9054, "step": 2497 }, { "epoch": 0.3806766229807985, "grad_norm": 0.94140625, "learning_rate": 0.00014214871844839917, "loss": 0.9068, "step": 2498 }, { "epoch": 0.38082901554404147, "grad_norm": 1.1015625, "learning_rate": 0.00014210395441348274, "loss": 0.9196, "step": 2499 }, { "epoch": 0.38098140810728437, "grad_norm": 0.99609375, "learning_rate": 0.00014205918012143756, "loss": 0.9604, "step": 2500 }, { "epoch": 0.38113380067052727, "grad_norm": 0.87109375, "learning_rate": 0.00014201439558317127, "loss": 0.9538, "step": 2501 }, { "epoch": 0.38128619323377017, "grad_norm": 0.78515625, "learning_rate": 0.00014196960080959403, "loss": 0.9031, "step": 2502 }, { "epoch": 0.3814385857970131, "grad_norm": 0.83984375, "learning_rate": 0.0001419247958116185, "loss": 0.8151, "step": 2503 }, { "epoch": 0.381590978360256, "grad_norm": 0.76171875, "learning_rate": 0.00014187998060015983, "loss": 0.9285, "step": 2504 }, { "epoch": 0.3817433709234989, "grad_norm": 0.9296875, "learning_rate": 0.00014183515518613563, "loss": 0.9267, "step": 2505 }, { "epoch": 0.38189576348674187, "grad_norm": 0.953125, "learning_rate": 0.00014179031958046602, "loss": 1.0529, "step": 2506 }, { "epoch": 0.38204815604998477, "grad_norm": 0.765625, "learning_rate": 0.00014174547379407364, "loss": 0.9445, "step": 2507 }, { "epoch": 0.38220054861322766, "grad_norm": 0.83984375, "learning_rate": 0.0001417006178378835, "loss": 0.9583, "step": 2508 }, { "epoch": 0.38235294117647056, "grad_norm": 1.015625, "learning_rate": 0.0001416557517228232, "loss": 0.9259, "step": 2509 }, { "epoch": 0.3825053337397135, "grad_norm": 1.078125, "learning_rate": 0.00014161087545982274, "loss": 0.9478, "step": 2510 }, { "epoch": 0.3826577263029564, "grad_norm": 0.91796875, "learning_rate": 0.0001415659890598147, "loss": 0.9861, "step": 2511 }, { "epoch": 0.3828101188661993, "grad_norm": 0.78125, "learning_rate": 0.00014152109253373393, "loss": 0.9951, "step": 2512 }, { "epoch": 0.38296251142944226, "grad_norm": 0.90234375, "learning_rate": 0.00014147618589251796, "loss": 1.0308, "step": 2513 }, { "epoch": 0.38311490399268516, "grad_norm": 1.046875, "learning_rate": 0.00014143126914710664, "loss": 0.9342, "step": 2514 }, { "epoch": 0.38326729655592806, "grad_norm": 0.86328125, "learning_rate": 0.0001413863423084424, "loss": 0.9999, "step": 2515 }, { "epoch": 0.38341968911917096, "grad_norm": 1.0859375, "learning_rate": 0.00014134140538747005, "loss": 0.9651, "step": 2516 }, { "epoch": 0.3835720816824139, "grad_norm": 0.8671875, "learning_rate": 0.0001412964583951368, "loss": 0.9673, "step": 2517 }, { "epoch": 0.3837244742456568, "grad_norm": 1.015625, "learning_rate": 0.00014125150134239247, "loss": 0.9824, "step": 2518 }, { "epoch": 0.3838768668088997, "grad_norm": 0.8125, "learning_rate": 0.00014120653424018917, "loss": 0.8927, "step": 2519 }, { "epoch": 0.38402925937214266, "grad_norm": 1.1875, "learning_rate": 0.0001411615570994816, "loss": 1.1488, "step": 2520 }, { "epoch": 0.38418165193538556, "grad_norm": 0.83984375, "learning_rate": 0.00014111656993122677, "loss": 0.8138, "step": 2521 }, { "epoch": 0.38433404449862846, "grad_norm": 0.83203125, "learning_rate": 0.00014107157274638427, "loss": 0.9863, "step": 2522 }, { "epoch": 0.38448643706187136, "grad_norm": 1.125, "learning_rate": 0.000141026565555916, "loss": 0.9589, "step": 2523 }, { "epoch": 0.3846388296251143, "grad_norm": 0.9921875, "learning_rate": 0.0001409815483707864, "loss": 0.9947, "step": 2524 }, { "epoch": 0.3847912221883572, "grad_norm": 0.91796875, "learning_rate": 0.0001409365212019623, "loss": 1.0063, "step": 2525 }, { "epoch": 0.3849436147516001, "grad_norm": 1.078125, "learning_rate": 0.00014089148406041296, "loss": 0.9788, "step": 2526 }, { "epoch": 0.38509600731484306, "grad_norm": 0.734375, "learning_rate": 0.00014084643695711006, "loss": 0.916, "step": 2527 }, { "epoch": 0.38524839987808596, "grad_norm": 0.98046875, "learning_rate": 0.00014080137990302776, "loss": 1.062, "step": 2528 }, { "epoch": 0.38540079244132885, "grad_norm": 0.94140625, "learning_rate": 0.00014075631290914252, "loss": 0.9821, "step": 2529 }, { "epoch": 0.38555318500457175, "grad_norm": 1.2578125, "learning_rate": 0.00014071123598643343, "loss": 1.0989, "step": 2530 }, { "epoch": 0.3857055775678147, "grad_norm": 0.9453125, "learning_rate": 0.0001406661491458818, "loss": 1.0188, "step": 2531 }, { "epoch": 0.3858579701310576, "grad_norm": 0.8046875, "learning_rate": 0.00014062105239847142, "loss": 0.7585, "step": 2532 }, { "epoch": 0.3860103626943005, "grad_norm": 1.0234375, "learning_rate": 0.0001405759457551886, "loss": 1.1376, "step": 2533 }, { "epoch": 0.38616275525754346, "grad_norm": 0.98046875, "learning_rate": 0.00014053082922702183, "loss": 0.9885, "step": 2534 }, { "epoch": 0.38631514782078635, "grad_norm": 1.1171875, "learning_rate": 0.00014048570282496227, "loss": 1.1557, "step": 2535 }, { "epoch": 0.38646754038402925, "grad_norm": 0.953125, "learning_rate": 0.00014044056656000326, "loss": 1.0778, "step": 2536 }, { "epoch": 0.38661993294727215, "grad_norm": 0.8203125, "learning_rate": 0.00014039542044314073, "loss": 0.8218, "step": 2537 }, { "epoch": 0.3867723255105151, "grad_norm": 1.03125, "learning_rate": 0.00014035026448537286, "loss": 0.8951, "step": 2538 }, { "epoch": 0.386924718073758, "grad_norm": 1.0703125, "learning_rate": 0.00014030509869770031, "loss": 0.8219, "step": 2539 }, { "epoch": 0.3870771106370009, "grad_norm": 0.7734375, "learning_rate": 0.00014025992309112613, "loss": 0.8007, "step": 2540 }, { "epoch": 0.38722950320024385, "grad_norm": 1.0546875, "learning_rate": 0.00014021473767665573, "loss": 1.0143, "step": 2541 }, { "epoch": 0.38738189576348675, "grad_norm": 1.171875, "learning_rate": 0.00014016954246529696, "loss": 0.9174, "step": 2542 }, { "epoch": 0.38753428832672965, "grad_norm": 0.99609375, "learning_rate": 0.00014012433746805997, "loss": 0.8234, "step": 2543 }, { "epoch": 0.38768668088997255, "grad_norm": 0.73828125, "learning_rate": 0.00014007912269595738, "loss": 0.9309, "step": 2544 }, { "epoch": 0.3878390734532155, "grad_norm": 0.90234375, "learning_rate": 0.00014003389816000415, "loss": 0.9535, "step": 2545 }, { "epoch": 0.3879914660164584, "grad_norm": 1.09375, "learning_rate": 0.00013998866387121764, "loss": 1.0296, "step": 2546 }, { "epoch": 0.3881438585797013, "grad_norm": 0.90234375, "learning_rate": 0.00013994341984061755, "loss": 0.9002, "step": 2547 }, { "epoch": 0.38829625114294425, "grad_norm": 1.140625, "learning_rate": 0.000139898166079226, "loss": 1.1246, "step": 2548 }, { "epoch": 0.38844864370618715, "grad_norm": 0.77734375, "learning_rate": 0.00013985290259806748, "loss": 1.0726, "step": 2549 }, { "epoch": 0.38860103626943004, "grad_norm": 1.0703125, "learning_rate": 0.00013980762940816874, "loss": 1.0137, "step": 2550 }, { "epoch": 0.38875342883267294, "grad_norm": 0.7421875, "learning_rate": 0.00013976234652055903, "loss": 0.9271, "step": 2551 }, { "epoch": 0.3889058213959159, "grad_norm": 0.88671875, "learning_rate": 0.00013971705394626998, "loss": 0.9875, "step": 2552 }, { "epoch": 0.3890582139591588, "grad_norm": 0.875, "learning_rate": 0.00013967175169633537, "loss": 0.9807, "step": 2553 }, { "epoch": 0.3892106065224017, "grad_norm": 0.828125, "learning_rate": 0.00013962643978179159, "loss": 1.1491, "step": 2554 }, { "epoch": 0.38936299908564465, "grad_norm": 0.86328125, "learning_rate": 0.00013958111821367724, "loss": 1.049, "step": 2555 }, { "epoch": 0.38951539164888754, "grad_norm": 1.078125, "learning_rate": 0.0001395357870030333, "loss": 1.2968, "step": 2556 }, { "epoch": 0.38966778421213044, "grad_norm": 1.1640625, "learning_rate": 0.0001394904461609031, "loss": 1.0331, "step": 2557 }, { "epoch": 0.38982017677537334, "grad_norm": 0.90625, "learning_rate": 0.00013944509569833237, "loss": 0.8674, "step": 2558 }, { "epoch": 0.3899725693386163, "grad_norm": 0.95703125, "learning_rate": 0.00013939973562636908, "loss": 1.1052, "step": 2559 }, { "epoch": 0.3901249619018592, "grad_norm": 0.8984375, "learning_rate": 0.00013935436595606358, "loss": 0.9734, "step": 2560 }, { "epoch": 0.3902773544651021, "grad_norm": 0.96484375, "learning_rate": 0.00013930898669846866, "loss": 0.9439, "step": 2561 }, { "epoch": 0.39042974702834504, "grad_norm": 0.7578125, "learning_rate": 0.00013926359786463926, "loss": 0.9133, "step": 2562 }, { "epoch": 0.39058213959158794, "grad_norm": 1.296875, "learning_rate": 0.00013921819946563285, "loss": 1.1832, "step": 2563 }, { "epoch": 0.39073453215483084, "grad_norm": 1.1328125, "learning_rate": 0.00013917279151250903, "loss": 0.9712, "step": 2564 }, { "epoch": 0.39088692471807374, "grad_norm": 1.0234375, "learning_rate": 0.0001391273740163299, "loss": 0.9683, "step": 2565 }, { "epoch": 0.3910393172813167, "grad_norm": 0.8984375, "learning_rate": 0.00013908194698815984, "loss": 0.9821, "step": 2566 }, { "epoch": 0.3911917098445596, "grad_norm": 0.91015625, "learning_rate": 0.00013903651043906546, "loss": 1.0093, "step": 2567 }, { "epoch": 0.3913441024078025, "grad_norm": 0.90625, "learning_rate": 0.00013899106438011572, "loss": 1.1496, "step": 2568 }, { "epoch": 0.39149649497104544, "grad_norm": 1.03125, "learning_rate": 0.0001389456088223821, "loss": 0.9353, "step": 2569 }, { "epoch": 0.39164888753428834, "grad_norm": 0.98046875, "learning_rate": 0.00013890014377693804, "loss": 0.9602, "step": 2570 }, { "epoch": 0.39180128009753123, "grad_norm": 1.1484375, "learning_rate": 0.0001388546692548596, "loss": 0.9756, "step": 2571 }, { "epoch": 0.39195367266077413, "grad_norm": 1.0859375, "learning_rate": 0.00013880918526722497, "loss": 1.0824, "step": 2572 }, { "epoch": 0.3921060652240171, "grad_norm": 0.91015625, "learning_rate": 0.00013876369182511475, "loss": 0.8989, "step": 2573 }, { "epoch": 0.39225845778726, "grad_norm": 1.171875, "learning_rate": 0.00013871818893961173, "loss": 1.1642, "step": 2574 }, { "epoch": 0.3924108503505029, "grad_norm": 1.03125, "learning_rate": 0.00013867267662180115, "loss": 0.8941, "step": 2575 }, { "epoch": 0.39256324291374584, "grad_norm": 0.8671875, "learning_rate": 0.0001386271548827704, "loss": 0.8829, "step": 2576 }, { "epoch": 0.39271563547698873, "grad_norm": 0.8515625, "learning_rate": 0.00013858162373360928, "loss": 1.1891, "step": 2577 }, { "epoch": 0.39286802804023163, "grad_norm": 0.9296875, "learning_rate": 0.00013853608318540976, "loss": 0.889, "step": 2578 }, { "epoch": 0.39302042060347453, "grad_norm": 0.890625, "learning_rate": 0.0001384905332492662, "loss": 0.9549, "step": 2579 }, { "epoch": 0.3931728131667175, "grad_norm": 0.87109375, "learning_rate": 0.00013844497393627531, "loss": 1.035, "step": 2580 }, { "epoch": 0.3933252057299604, "grad_norm": 0.91015625, "learning_rate": 0.00013839940525753588, "loss": 0.9689, "step": 2581 }, { "epoch": 0.3934775982932033, "grad_norm": 0.86328125, "learning_rate": 0.00013835382722414912, "loss": 1.0161, "step": 2582 }, { "epoch": 0.39362999085644623, "grad_norm": 1.1015625, "learning_rate": 0.00013830823984721855, "loss": 0.9287, "step": 2583 }, { "epoch": 0.39378238341968913, "grad_norm": 0.9453125, "learning_rate": 0.00013826264313784983, "loss": 0.9002, "step": 2584 }, { "epoch": 0.39393477598293203, "grad_norm": 1.0625, "learning_rate": 0.00013821703710715103, "loss": 1.0784, "step": 2585 }, { "epoch": 0.3940871685461749, "grad_norm": 0.8671875, "learning_rate": 0.00013817142176623243, "loss": 1.076, "step": 2586 }, { "epoch": 0.3942395611094179, "grad_norm": 1.171875, "learning_rate": 0.00013812579712620658, "loss": 0.7941, "step": 2587 }, { "epoch": 0.3943919536726608, "grad_norm": 0.7421875, "learning_rate": 0.00013808016319818826, "loss": 0.9426, "step": 2588 }, { "epoch": 0.3945443462359037, "grad_norm": 0.99609375, "learning_rate": 0.00013803451999329458, "loss": 1.0363, "step": 2589 }, { "epoch": 0.39469673879914663, "grad_norm": 1.1015625, "learning_rate": 0.00013798886752264491, "loss": 0.9339, "step": 2590 }, { "epoch": 0.3948491313623895, "grad_norm": 1.0859375, "learning_rate": 0.00013794320579736083, "loss": 1.1452, "step": 2591 }, { "epoch": 0.3950015239256324, "grad_norm": 1.046875, "learning_rate": 0.00013789753482856617, "loss": 0.9614, "step": 2592 }, { "epoch": 0.3951539164888753, "grad_norm": 0.76171875, "learning_rate": 0.00013785185462738707, "loss": 0.8936, "step": 2593 }, { "epoch": 0.3953063090521183, "grad_norm": 0.8515625, "learning_rate": 0.00013780616520495185, "loss": 0.8573, "step": 2594 }, { "epoch": 0.3954587016153612, "grad_norm": 0.953125, "learning_rate": 0.0001377604665723911, "loss": 1.0282, "step": 2595 }, { "epoch": 0.3956110941786041, "grad_norm": 0.6953125, "learning_rate": 0.00013771475874083767, "loss": 0.9207, "step": 2596 }, { "epoch": 0.39576348674184697, "grad_norm": 1.3125, "learning_rate": 0.00013766904172142673, "loss": 0.9112, "step": 2597 }, { "epoch": 0.3959158793050899, "grad_norm": 0.75390625, "learning_rate": 0.0001376233155252955, "loss": 0.8249, "step": 2598 }, { "epoch": 0.3960682718683328, "grad_norm": 1.203125, "learning_rate": 0.00013757758016358354, "loss": 1.0368, "step": 2599 }, { "epoch": 0.3962206644315757, "grad_norm": 1.078125, "learning_rate": 0.00013753183564743274, "loss": 1.1124, "step": 2600 }, { "epoch": 0.3963730569948187, "grad_norm": 0.96875, "learning_rate": 0.00013748608198798702, "loss": 0.8413, "step": 2601 }, { "epoch": 0.39652544955806157, "grad_norm": 1.1875, "learning_rate": 0.0001374403191963927, "loss": 1.115, "step": 2602 }, { "epoch": 0.39667784212130447, "grad_norm": 0.67578125, "learning_rate": 0.0001373945472837982, "loss": 0.9804, "step": 2603 }, { "epoch": 0.39683023468454737, "grad_norm": 0.94921875, "learning_rate": 0.00013734876626135425, "loss": 0.8482, "step": 2604 }, { "epoch": 0.3969826272477903, "grad_norm": 1.34375, "learning_rate": 0.00013730297614021375, "loss": 1.1104, "step": 2605 }, { "epoch": 0.3971350198110332, "grad_norm": 1.1171875, "learning_rate": 0.00013725717693153182, "loss": 1.1033, "step": 2606 }, { "epoch": 0.3972874123742761, "grad_norm": 0.95703125, "learning_rate": 0.00013721136864646582, "loss": 0.9225, "step": 2607 }, { "epoch": 0.39743980493751907, "grad_norm": 0.8984375, "learning_rate": 0.00013716555129617534, "loss": 0.9519, "step": 2608 }, { "epoch": 0.39759219750076197, "grad_norm": 1.0234375, "learning_rate": 0.00013711972489182208, "loss": 0.893, "step": 2609 }, { "epoch": 0.39774459006400487, "grad_norm": 0.84765625, "learning_rate": 0.00013707388944457006, "loss": 1.0119, "step": 2610 }, { "epoch": 0.39789698262724776, "grad_norm": 0.8671875, "learning_rate": 0.00013702804496558546, "loss": 0.8298, "step": 2611 }, { "epoch": 0.3980493751904907, "grad_norm": 1.421875, "learning_rate": 0.00013698219146603659, "loss": 1.1866, "step": 2612 }, { "epoch": 0.3982017677537336, "grad_norm": 0.9140625, "learning_rate": 0.00013693632895709409, "loss": 1.0378, "step": 2613 }, { "epoch": 0.3983541603169765, "grad_norm": 1.046875, "learning_rate": 0.00013689045744993072, "loss": 1.0022, "step": 2614 }, { "epoch": 0.39850655288021947, "grad_norm": 0.7265625, "learning_rate": 0.0001368445769557214, "loss": 0.8973, "step": 2615 }, { "epoch": 0.39865894544346236, "grad_norm": 1.0234375, "learning_rate": 0.00013679868748564327, "loss": 0.8979, "step": 2616 }, { "epoch": 0.39881133800670526, "grad_norm": 0.9140625, "learning_rate": 0.00013675278905087574, "loss": 0.9278, "step": 2617 }, { "epoch": 0.39896373056994816, "grad_norm": 0.98046875, "learning_rate": 0.00013670688166260027, "loss": 0.9797, "step": 2618 }, { "epoch": 0.3991161231331911, "grad_norm": 1.1328125, "learning_rate": 0.00013666096533200057, "loss": 0.9637, "step": 2619 }, { "epoch": 0.399268515696434, "grad_norm": 0.9765625, "learning_rate": 0.00013661504007026252, "loss": 0.9218, "step": 2620 }, { "epoch": 0.3994209082596769, "grad_norm": 1.0703125, "learning_rate": 0.00013656910588857415, "loss": 1.115, "step": 2621 }, { "epoch": 0.39957330082291986, "grad_norm": 1.203125, "learning_rate": 0.00013652316279812575, "loss": 1.1055, "step": 2622 }, { "epoch": 0.39972569338616276, "grad_norm": 0.85546875, "learning_rate": 0.00013647721081010966, "loss": 0.8166, "step": 2623 }, { "epoch": 0.39987808594940566, "grad_norm": 0.72265625, "learning_rate": 0.00013643124993572048, "loss": 0.7408, "step": 2624 }, { "epoch": 0.40003047851264856, "grad_norm": 0.96875, "learning_rate": 0.00013638528018615492, "loss": 0.8651, "step": 2625 }, { "epoch": 0.4001828710758915, "grad_norm": 0.8984375, "learning_rate": 0.00013633930157261185, "loss": 0.8461, "step": 2626 }, { "epoch": 0.4003352636391344, "grad_norm": 0.87109375, "learning_rate": 0.00013629331410629236, "loss": 0.9633, "step": 2627 }, { "epoch": 0.4004876562023773, "grad_norm": 0.93359375, "learning_rate": 0.0001362473177983997, "loss": 0.8754, "step": 2628 }, { "epoch": 0.40064004876562026, "grad_norm": 0.91015625, "learning_rate": 0.00013620131266013912, "loss": 1.0007, "step": 2629 }, { "epoch": 0.40079244132886316, "grad_norm": 0.9453125, "learning_rate": 0.00013615529870271819, "loss": 1.1162, "step": 2630 }, { "epoch": 0.40094483389210606, "grad_norm": 0.8046875, "learning_rate": 0.0001361092759373466, "loss": 0.8337, "step": 2631 }, { "epoch": 0.40109722645534895, "grad_norm": 0.84765625, "learning_rate": 0.00013606324437523613, "loss": 0.9829, "step": 2632 }, { "epoch": 0.4012496190185919, "grad_norm": 0.890625, "learning_rate": 0.00013601720402760075, "loss": 1.0746, "step": 2633 }, { "epoch": 0.4014020115818348, "grad_norm": 1.078125, "learning_rate": 0.00013597115490565647, "loss": 0.9423, "step": 2634 }, { "epoch": 0.4015544041450777, "grad_norm": 1.09375, "learning_rate": 0.0001359250970206216, "loss": 0.811, "step": 2635 }, { "epoch": 0.40170679670832066, "grad_norm": 0.8359375, "learning_rate": 0.00013587903038371653, "loss": 0.9198, "step": 2636 }, { "epoch": 0.40185918927156355, "grad_norm": 1.375, "learning_rate": 0.00013583295500616368, "loss": 0.9664, "step": 2637 }, { "epoch": 0.40201158183480645, "grad_norm": 1.2265625, "learning_rate": 0.0001357868708991877, "loss": 0.8848, "step": 2638 }, { "epoch": 0.40216397439804935, "grad_norm": 1.078125, "learning_rate": 0.00013574077807401534, "loss": 1.0481, "step": 2639 }, { "epoch": 0.4023163669612923, "grad_norm": 1.0703125, "learning_rate": 0.00013569467654187548, "loss": 0.9389, "step": 2640 }, { "epoch": 0.4024687595245352, "grad_norm": 0.890625, "learning_rate": 0.00013564856631399913, "loss": 0.8395, "step": 2641 }, { "epoch": 0.4026211520877781, "grad_norm": 0.875, "learning_rate": 0.00013560244740161938, "loss": 0.913, "step": 2642 }, { "epoch": 0.40277354465102105, "grad_norm": 1.1015625, "learning_rate": 0.00013555631981597145, "loss": 1.1077, "step": 2643 }, { "epoch": 0.40292593721426395, "grad_norm": 0.9140625, "learning_rate": 0.00013551018356829274, "loss": 1.0462, "step": 2644 }, { "epoch": 0.40307832977750685, "grad_norm": 1.0625, "learning_rate": 0.00013546403866982268, "loss": 1.1024, "step": 2645 }, { "epoch": 0.40323072234074975, "grad_norm": 0.9296875, "learning_rate": 0.00013541788513180277, "loss": 0.9238, "step": 2646 }, { "epoch": 0.4033831149039927, "grad_norm": 1.1015625, "learning_rate": 0.00013537172296547673, "loss": 0.9894, "step": 2647 }, { "epoch": 0.4035355074672356, "grad_norm": 1.015625, "learning_rate": 0.00013532555218209036, "loss": 1.0509, "step": 2648 }, { "epoch": 0.4036879000304785, "grad_norm": 0.765625, "learning_rate": 0.00013527937279289145, "loss": 0.9502, "step": 2649 }, { "epoch": 0.40384029259372145, "grad_norm": 0.890625, "learning_rate": 0.00013523318480913, "loss": 0.9262, "step": 2650 }, { "epoch": 0.40399268515696435, "grad_norm": 1.1171875, "learning_rate": 0.00013518698824205812, "loss": 0.9501, "step": 2651 }, { "epoch": 0.40414507772020725, "grad_norm": 0.86328125, "learning_rate": 0.00013514078310292984, "loss": 0.9384, "step": 2652 }, { "epoch": 0.40429747028345014, "grad_norm": 0.9375, "learning_rate": 0.0001350945694030015, "loss": 0.8671, "step": 2653 }, { "epoch": 0.4044498628466931, "grad_norm": 0.890625, "learning_rate": 0.00013504834715353137, "loss": 1.1585, "step": 2654 }, { "epoch": 0.404602255409936, "grad_norm": 0.9296875, "learning_rate": 0.00013500211636577987, "loss": 0.7808, "step": 2655 }, { "epoch": 0.4047546479731789, "grad_norm": 0.921875, "learning_rate": 0.0001349558770510095, "loss": 0.9755, "step": 2656 }, { "epoch": 0.40490704053642185, "grad_norm": 1.1015625, "learning_rate": 0.00013490962922048478, "loss": 0.8666, "step": 2657 }, { "epoch": 0.40505943309966475, "grad_norm": 0.8515625, "learning_rate": 0.0001348633728854724, "loss": 0.9288, "step": 2658 }, { "epoch": 0.40521182566290764, "grad_norm": 1.0078125, "learning_rate": 0.00013481710805724102, "loss": 0.9739, "step": 2659 }, { "epoch": 0.40536421822615054, "grad_norm": 1.265625, "learning_rate": 0.00013477083474706142, "loss": 1.0319, "step": 2660 }, { "epoch": 0.4055166107893935, "grad_norm": 0.734375, "learning_rate": 0.0001347245529662065, "loss": 0.8123, "step": 2661 }, { "epoch": 0.4056690033526364, "grad_norm": 0.94140625, "learning_rate": 0.00013467826272595113, "loss": 0.8879, "step": 2662 }, { "epoch": 0.4058213959158793, "grad_norm": 0.87890625, "learning_rate": 0.00013463196403757226, "loss": 1.0212, "step": 2663 }, { "epoch": 0.40597378847912224, "grad_norm": 0.95703125, "learning_rate": 0.00013458565691234893, "loss": 0.9394, "step": 2664 }, { "epoch": 0.40612618104236514, "grad_norm": 0.9453125, "learning_rate": 0.00013453934136156223, "loss": 0.9488, "step": 2665 }, { "epoch": 0.40627857360560804, "grad_norm": 0.9453125, "learning_rate": 0.00013449301739649528, "loss": 0.9936, "step": 2666 }, { "epoch": 0.40643096616885094, "grad_norm": 0.95703125, "learning_rate": 0.0001344466850284333, "loss": 0.9399, "step": 2667 }, { "epoch": 0.4065833587320939, "grad_norm": 0.703125, "learning_rate": 0.00013440034426866348, "loss": 0.879, "step": 2668 }, { "epoch": 0.4067357512953368, "grad_norm": 1.0703125, "learning_rate": 0.0001343539951284751, "loss": 0.9711, "step": 2669 }, { "epoch": 0.4068881438585797, "grad_norm": 0.7890625, "learning_rate": 0.00013430763761915952, "loss": 0.9779, "step": 2670 }, { "epoch": 0.40704053642182264, "grad_norm": 0.921875, "learning_rate": 0.00013426127175201002, "loss": 0.8108, "step": 2671 }, { "epoch": 0.40719292898506554, "grad_norm": 0.84765625, "learning_rate": 0.00013421489753832205, "loss": 0.9463, "step": 2672 }, { "epoch": 0.40734532154830844, "grad_norm": 1.1640625, "learning_rate": 0.00013416851498939306, "loss": 0.8616, "step": 2673 }, { "epoch": 0.40749771411155133, "grad_norm": 1.1875, "learning_rate": 0.0001341221241165224, "loss": 0.7334, "step": 2674 }, { "epoch": 0.4076501066747943, "grad_norm": 0.828125, "learning_rate": 0.00013407572493101167, "loss": 0.8764, "step": 2675 }, { "epoch": 0.4078024992380372, "grad_norm": 1.1328125, "learning_rate": 0.00013402931744416433, "loss": 0.8758, "step": 2676 }, { "epoch": 0.4079548918012801, "grad_norm": 1.2734375, "learning_rate": 0.00013398290166728586, "loss": 1.22, "step": 2677 }, { "epoch": 0.40810728436452304, "grad_norm": 0.9375, "learning_rate": 0.0001339364776116839, "loss": 1.2016, "step": 2678 }, { "epoch": 0.40825967692776594, "grad_norm": 1.171875, "learning_rate": 0.000133890045288668, "loss": 0.9817, "step": 2679 }, { "epoch": 0.40841206949100883, "grad_norm": 1.4453125, "learning_rate": 0.00013384360470954966, "loss": 1.0795, "step": 2680 }, { "epoch": 0.40856446205425173, "grad_norm": 0.87109375, "learning_rate": 0.0001337971558856426, "loss": 1.0107, "step": 2681 }, { "epoch": 0.4087168546174947, "grad_norm": 1.03125, "learning_rate": 0.00013375069882826232, "loss": 0.9341, "step": 2682 }, { "epoch": 0.4088692471807376, "grad_norm": 1.078125, "learning_rate": 0.00013370423354872643, "loss": 0.946, "step": 2683 }, { "epoch": 0.4090216397439805, "grad_norm": 1.2421875, "learning_rate": 0.00013365776005835463, "loss": 1.0018, "step": 2684 }, { "epoch": 0.40917403230722343, "grad_norm": 1.078125, "learning_rate": 0.00013361127836846842, "loss": 1.0675, "step": 2685 }, { "epoch": 0.40932642487046633, "grad_norm": 1.1015625, "learning_rate": 0.0001335647884903915, "loss": 0.9751, "step": 2686 }, { "epoch": 0.40947881743370923, "grad_norm": 0.82421875, "learning_rate": 0.0001335182904354494, "loss": 0.9222, "step": 2687 }, { "epoch": 0.40963120999695213, "grad_norm": 1.3203125, "learning_rate": 0.0001334717842149698, "loss": 0.9615, "step": 2688 }, { "epoch": 0.4097836025601951, "grad_norm": 1.4765625, "learning_rate": 0.00013342526984028218, "loss": 0.9654, "step": 2689 }, { "epoch": 0.409935995123438, "grad_norm": 0.79296875, "learning_rate": 0.0001333787473227182, "loss": 1.082, "step": 2690 }, { "epoch": 0.4100883876866809, "grad_norm": 0.859375, "learning_rate": 0.00013333221667361132, "loss": 1.1309, "step": 2691 }, { "epoch": 0.41024078024992383, "grad_norm": 0.921875, "learning_rate": 0.0001332856779042972, "loss": 0.9528, "step": 2692 }, { "epoch": 0.41039317281316673, "grad_norm": 0.984375, "learning_rate": 0.00013323913102611327, "loss": 0.7589, "step": 2693 }, { "epoch": 0.4105455653764096, "grad_norm": 0.6953125, "learning_rate": 0.000133192576050399, "loss": 1.0617, "step": 2694 }, { "epoch": 0.4106979579396525, "grad_norm": 0.89453125, "learning_rate": 0.00013314601298849591, "loss": 0.9783, "step": 2695 }, { "epoch": 0.4108503505028955, "grad_norm": 0.95703125, "learning_rate": 0.0001330994418517474, "loss": 0.9585, "step": 2696 }, { "epoch": 0.4110027430661384, "grad_norm": 0.94140625, "learning_rate": 0.0001330528626514989, "loss": 1.0938, "step": 2697 }, { "epoch": 0.4111551356293813, "grad_norm": 0.98828125, "learning_rate": 0.00013300627539909773, "loss": 1.1181, "step": 2698 }, { "epoch": 0.41130752819262417, "grad_norm": 1.0, "learning_rate": 0.00013295968010589325, "loss": 0.9956, "step": 2699 }, { "epoch": 0.4114599207558671, "grad_norm": 0.93359375, "learning_rate": 0.0001329130767832367, "loss": 0.849, "step": 2700 }, { "epoch": 0.41161231331911, "grad_norm": 0.9765625, "learning_rate": 0.00013286646544248136, "loss": 1.0224, "step": 2701 }, { "epoch": 0.4117647058823529, "grad_norm": 0.734375, "learning_rate": 0.00013281984609498238, "loss": 0.9477, "step": 2702 }, { "epoch": 0.4119170984455959, "grad_norm": 0.9921875, "learning_rate": 0.000132773218752097, "loss": 1.098, "step": 2703 }, { "epoch": 0.4120694910088388, "grad_norm": 0.76953125, "learning_rate": 0.0001327265834251842, "loss": 0.8712, "step": 2704 }, { "epoch": 0.41222188357208167, "grad_norm": 0.87109375, "learning_rate": 0.00013267994012560504, "loss": 0.9289, "step": 2705 }, { "epoch": 0.41237427613532457, "grad_norm": 0.83203125, "learning_rate": 0.00013263328886472256, "loss": 0.7026, "step": 2706 }, { "epoch": 0.4125266686985675, "grad_norm": 1.0703125, "learning_rate": 0.00013258662965390166, "loss": 0.8522, "step": 2707 }, { "epoch": 0.4126790612618104, "grad_norm": 0.859375, "learning_rate": 0.0001325399625045091, "loss": 0.9229, "step": 2708 }, { "epoch": 0.4128314538250533, "grad_norm": 1.0234375, "learning_rate": 0.0001324932874279138, "loss": 1.087, "step": 2709 }, { "epoch": 0.41298384638829627, "grad_norm": 1.3515625, "learning_rate": 0.0001324466044354864, "loss": 1.0907, "step": 2710 }, { "epoch": 0.41313623895153917, "grad_norm": 0.8125, "learning_rate": 0.00013239991353859956, "loss": 0.7897, "step": 2711 }, { "epoch": 0.41328863151478207, "grad_norm": 1.03125, "learning_rate": 0.00013235321474862788, "loss": 1.0074, "step": 2712 }, { "epoch": 0.41344102407802497, "grad_norm": 1.03125, "learning_rate": 0.00013230650807694784, "loss": 0.9984, "step": 2713 }, { "epoch": 0.4135934166412679, "grad_norm": 0.75, "learning_rate": 0.00013225979353493781, "loss": 0.8536, "step": 2714 }, { "epoch": 0.4137458092045108, "grad_norm": 0.87109375, "learning_rate": 0.00013221307113397824, "loss": 0.9489, "step": 2715 }, { "epoch": 0.4138982017677537, "grad_norm": 1.0703125, "learning_rate": 0.0001321663408854513, "loss": 0.9762, "step": 2716 }, { "epoch": 0.41405059433099667, "grad_norm": 1.0546875, "learning_rate": 0.00013211960280074109, "loss": 0.9802, "step": 2717 }, { "epoch": 0.41420298689423957, "grad_norm": 1.0390625, "learning_rate": 0.0001320728568912338, "loss": 0.9779, "step": 2718 }, { "epoch": 0.41435537945748246, "grad_norm": 1.1796875, "learning_rate": 0.00013202610316831733, "loss": 1.339, "step": 2719 }, { "epoch": 0.41450777202072536, "grad_norm": 0.8515625, "learning_rate": 0.00013197934164338164, "loss": 0.8383, "step": 2720 }, { "epoch": 0.4146601645839683, "grad_norm": 1.125, "learning_rate": 0.0001319325723278184, "loss": 0.9239, "step": 2721 }, { "epoch": 0.4148125571472112, "grad_norm": 1.0234375, "learning_rate": 0.00013188579523302138, "loss": 0.9384, "step": 2722 }, { "epoch": 0.4149649497104541, "grad_norm": 0.8984375, "learning_rate": 0.0001318390103703861, "loss": 1.1353, "step": 2723 }, { "epoch": 0.41511734227369707, "grad_norm": 0.92578125, "learning_rate": 0.00013179221775131005, "loss": 1.077, "step": 2724 }, { "epoch": 0.41526973483693996, "grad_norm": 1.03125, "learning_rate": 0.00013174541738719255, "loss": 1.051, "step": 2725 }, { "epoch": 0.41542212740018286, "grad_norm": 0.84375, "learning_rate": 0.00013169860928943488, "loss": 0.9206, "step": 2726 }, { "epoch": 0.41557451996342576, "grad_norm": 0.98046875, "learning_rate": 0.00013165179346944015, "loss": 0.9632, "step": 2727 }, { "epoch": 0.4157269125266687, "grad_norm": 0.96875, "learning_rate": 0.00013160496993861338, "loss": 1.1154, "step": 2728 }, { "epoch": 0.4158793050899116, "grad_norm": 0.71875, "learning_rate": 0.00013155813870836144, "loss": 0.9825, "step": 2729 }, { "epoch": 0.4160316976531545, "grad_norm": 1.078125, "learning_rate": 0.0001315112997900931, "loss": 0.8919, "step": 2730 }, { "epoch": 0.41618409021639746, "grad_norm": 1.0234375, "learning_rate": 0.00013146445319521898, "loss": 0.9921, "step": 2731 }, { "epoch": 0.41633648277964036, "grad_norm": 1.0078125, "learning_rate": 0.0001314175989351516, "loss": 0.9635, "step": 2732 }, { "epoch": 0.41648887534288326, "grad_norm": 1.109375, "learning_rate": 0.00013137073702130533, "loss": 1.0921, "step": 2733 }, { "epoch": 0.41664126790612616, "grad_norm": 1.328125, "learning_rate": 0.0001313238674650964, "loss": 1.2981, "step": 2734 }, { "epoch": 0.4167936604693691, "grad_norm": 0.81640625, "learning_rate": 0.00013127699027794292, "loss": 0.9114, "step": 2735 }, { "epoch": 0.416946053032612, "grad_norm": 0.88671875, "learning_rate": 0.0001312301054712648, "loss": 0.975, "step": 2736 }, { "epoch": 0.4170984455958549, "grad_norm": 1.15625, "learning_rate": 0.00013118321305648393, "loss": 0.9443, "step": 2737 }, { "epoch": 0.41725083815909786, "grad_norm": 0.9375, "learning_rate": 0.00013113631304502392, "loss": 0.8632, "step": 2738 }, { "epoch": 0.41740323072234076, "grad_norm": 1.0703125, "learning_rate": 0.0001310894054483103, "loss": 1.0267, "step": 2739 }, { "epoch": 0.41755562328558365, "grad_norm": 1.046875, "learning_rate": 0.00013104249027777045, "loss": 1.0578, "step": 2740 }, { "epoch": 0.41770801584882655, "grad_norm": 0.95703125, "learning_rate": 0.00013099556754483357, "loss": 0.9938, "step": 2741 }, { "epoch": 0.4178604084120695, "grad_norm": 0.82421875, "learning_rate": 0.0001309486372609307, "loss": 0.9157, "step": 2742 }, { "epoch": 0.4180128009753124, "grad_norm": 0.85546875, "learning_rate": 0.00013090169943749476, "loss": 0.8717, "step": 2743 }, { "epoch": 0.4181651935385553, "grad_norm": 1.21875, "learning_rate": 0.00013085475408596047, "loss": 0.9831, "step": 2744 }, { "epoch": 0.41831758610179826, "grad_norm": 1.1015625, "learning_rate": 0.00013080780121776434, "loss": 1.1123, "step": 2745 }, { "epoch": 0.41846997866504115, "grad_norm": 0.75390625, "learning_rate": 0.00013076084084434486, "loss": 0.8621, "step": 2746 }, { "epoch": 0.41862237122828405, "grad_norm": 1.046875, "learning_rate": 0.00013071387297714217, "loss": 0.9572, "step": 2747 }, { "epoch": 0.41877476379152695, "grad_norm": 0.984375, "learning_rate": 0.00013066689762759835, "loss": 0.8324, "step": 2748 }, { "epoch": 0.4189271563547699, "grad_norm": 0.859375, "learning_rate": 0.00013061991480715728, "loss": 0.9316, "step": 2749 }, { "epoch": 0.4190795489180128, "grad_norm": 1.046875, "learning_rate": 0.0001305729245272646, "loss": 0.9934, "step": 2750 }, { "epoch": 0.4192319414812557, "grad_norm": 0.9765625, "learning_rate": 0.00013052592679936792, "loss": 0.9009, "step": 2751 }, { "epoch": 0.41938433404449865, "grad_norm": 0.9296875, "learning_rate": 0.00013047892163491648, "loss": 1.0095, "step": 2752 }, { "epoch": 0.41953672660774155, "grad_norm": 0.78125, "learning_rate": 0.00013043190904536145, "loss": 0.8862, "step": 2753 }, { "epoch": 0.41968911917098445, "grad_norm": 1.0546875, "learning_rate": 0.00013038488904215572, "loss": 1.0879, "step": 2754 }, { "epoch": 0.41984151173422735, "grad_norm": 1.03125, "learning_rate": 0.00013033786163675412, "loss": 1.1509, "step": 2755 }, { "epoch": 0.4199939042974703, "grad_norm": 0.68359375, "learning_rate": 0.00013029082684061312, "loss": 0.7908, "step": 2756 }, { "epoch": 0.4201462968607132, "grad_norm": 1.6328125, "learning_rate": 0.00013024378466519113, "loss": 0.9777, "step": 2757 }, { "epoch": 0.4202986894239561, "grad_norm": 0.82421875, "learning_rate": 0.0001301967351219483, "loss": 0.8974, "step": 2758 }, { "epoch": 0.42045108198719905, "grad_norm": 0.94921875, "learning_rate": 0.0001301496782223465, "loss": 1.0597, "step": 2759 }, { "epoch": 0.42060347455044195, "grad_norm": 0.734375, "learning_rate": 0.00013010261397784957, "loss": 0.9562, "step": 2760 }, { "epoch": 0.42075586711368484, "grad_norm": 0.9296875, "learning_rate": 0.000130055542399923, "loss": 0.9246, "step": 2761 }, { "epoch": 0.42090825967692774, "grad_norm": 0.9453125, "learning_rate": 0.0001300084635000341, "loss": 1.0485, "step": 2762 }, { "epoch": 0.4210606522401707, "grad_norm": 1.1015625, "learning_rate": 0.0001299613772896519, "loss": 1.0387, "step": 2763 }, { "epoch": 0.4212130448034136, "grad_norm": 0.94140625, "learning_rate": 0.00012991428378024742, "loss": 0.8516, "step": 2764 }, { "epoch": 0.4213654373666565, "grad_norm": 1.0, "learning_rate": 0.00012986718298329316, "loss": 0.8819, "step": 2765 }, { "epoch": 0.42151782992989945, "grad_norm": 0.74609375, "learning_rate": 0.0001298200749102637, "loss": 0.8206, "step": 2766 }, { "epoch": 0.42167022249314234, "grad_norm": 0.765625, "learning_rate": 0.0001297729595726351, "loss": 0.9652, "step": 2767 }, { "epoch": 0.42182261505638524, "grad_norm": 1.0, "learning_rate": 0.00012972583698188546, "loss": 0.9514, "step": 2768 }, { "epoch": 0.42197500761962814, "grad_norm": 0.8984375, "learning_rate": 0.00012967870714949444, "loss": 0.9413, "step": 2769 }, { "epoch": 0.4221274001828711, "grad_norm": 1.265625, "learning_rate": 0.0001296315700869436, "loss": 1.1572, "step": 2770 }, { "epoch": 0.422279792746114, "grad_norm": 0.7578125, "learning_rate": 0.00012958442580571612, "loss": 0.9225, "step": 2771 }, { "epoch": 0.4224321853093569, "grad_norm": 0.9765625, "learning_rate": 0.00012953727431729714, "loss": 0.7863, "step": 2772 }, { "epoch": 0.42258457787259984, "grad_norm": 1.09375, "learning_rate": 0.00012949011563317335, "loss": 1.0512, "step": 2773 }, { "epoch": 0.42273697043584274, "grad_norm": 0.97265625, "learning_rate": 0.00012944294976483333, "loss": 0.9116, "step": 2774 }, { "epoch": 0.42288936299908564, "grad_norm": 0.859375, "learning_rate": 0.00012939577672376734, "loss": 0.8859, "step": 2775 }, { "epoch": 0.42304175556232854, "grad_norm": 0.97265625, "learning_rate": 0.00012934859652146742, "loss": 1.1537, "step": 2776 }, { "epoch": 0.4231941481255715, "grad_norm": 0.99609375, "learning_rate": 0.00012930140916942736, "loss": 1.0658, "step": 2777 }, { "epoch": 0.4233465406888144, "grad_norm": 1.296875, "learning_rate": 0.00012925421467914266, "loss": 0.9389, "step": 2778 }, { "epoch": 0.4234989332520573, "grad_norm": 0.92578125, "learning_rate": 0.00012920701306211058, "loss": 0.8578, "step": 2779 }, { "epoch": 0.42365132581530024, "grad_norm": 0.69921875, "learning_rate": 0.0001291598043298301, "loss": 0.8518, "step": 2780 }, { "epoch": 0.42380371837854314, "grad_norm": 0.84765625, "learning_rate": 0.000129112588493802, "loss": 1.1169, "step": 2781 }, { "epoch": 0.42395611094178604, "grad_norm": 0.74609375, "learning_rate": 0.00012906536556552865, "loss": 0.8793, "step": 2782 }, { "epoch": 0.42410850350502893, "grad_norm": 0.70703125, "learning_rate": 0.0001290181355565143, "loss": 0.7914, "step": 2783 }, { "epoch": 0.4242608960682719, "grad_norm": 0.90625, "learning_rate": 0.00012897089847826483, "loss": 0.9027, "step": 2784 }, { "epoch": 0.4244132886315148, "grad_norm": 0.9375, "learning_rate": 0.0001289236543422879, "loss": 0.938, "step": 2785 }, { "epoch": 0.4245656811947577, "grad_norm": 1.0703125, "learning_rate": 0.00012887640316009284, "loss": 1.0967, "step": 2786 }, { "epoch": 0.42471807375800064, "grad_norm": 0.88671875, "learning_rate": 0.0001288291449431907, "loss": 0.7544, "step": 2787 }, { "epoch": 0.42487046632124353, "grad_norm": 0.796875, "learning_rate": 0.00012878187970309432, "loss": 0.8522, "step": 2788 }, { "epoch": 0.42502285888448643, "grad_norm": 1.0234375, "learning_rate": 0.00012873460745131812, "loss": 1.0869, "step": 2789 }, { "epoch": 0.42517525144772933, "grad_norm": 1.03125, "learning_rate": 0.00012868732819937832, "loss": 1.0435, "step": 2790 }, { "epoch": 0.4253276440109723, "grad_norm": 1.0078125, "learning_rate": 0.00012864004195879287, "loss": 0.9393, "step": 2791 }, { "epoch": 0.4254800365742152, "grad_norm": 1.1484375, "learning_rate": 0.00012859274874108136, "loss": 0.8969, "step": 2792 }, { "epoch": 0.4256324291374581, "grad_norm": 0.99609375, "learning_rate": 0.00012854544855776502, "loss": 1.0291, "step": 2793 }, { "epoch": 0.42578482170070103, "grad_norm": 0.953125, "learning_rate": 0.00012849814142036698, "loss": 0.9055, "step": 2794 }, { "epoch": 0.42593721426394393, "grad_norm": 0.7890625, "learning_rate": 0.00012845082734041187, "loss": 1.0497, "step": 2795 }, { "epoch": 0.42608960682718683, "grad_norm": 0.875, "learning_rate": 0.00012840350632942608, "loss": 0.9526, "step": 2796 }, { "epoch": 0.4262419993904297, "grad_norm": 0.9609375, "learning_rate": 0.00012835617839893773, "loss": 0.9071, "step": 2797 }, { "epoch": 0.4263943919536727, "grad_norm": 0.9609375, "learning_rate": 0.0001283088435604765, "loss": 0.9788, "step": 2798 }, { "epoch": 0.4265467845169156, "grad_norm": 1.3125, "learning_rate": 0.00012826150182557392, "loss": 0.8933, "step": 2799 }, { "epoch": 0.4266991770801585, "grad_norm": 0.8203125, "learning_rate": 0.0001282141532057631, "loss": 0.9442, "step": 2800 }, { "epoch": 0.42685156964340143, "grad_norm": 0.8203125, "learning_rate": 0.00012816679771257878, "loss": 0.8766, "step": 2801 }, { "epoch": 0.4270039622066443, "grad_norm": 0.96875, "learning_rate": 0.00012811943535755754, "loss": 1.0641, "step": 2802 }, { "epoch": 0.4271563547698872, "grad_norm": 1.0234375, "learning_rate": 0.00012807206615223749, "loss": 0.9535, "step": 2803 }, { "epoch": 0.4273087473331301, "grad_norm": 0.94140625, "learning_rate": 0.00012802469010815838, "loss": 1.1641, "step": 2804 }, { "epoch": 0.4274611398963731, "grad_norm": 0.76171875, "learning_rate": 0.00012797730723686183, "loss": 0.9695, "step": 2805 }, { "epoch": 0.427613532459616, "grad_norm": 0.859375, "learning_rate": 0.00012792991754989087, "loss": 0.922, "step": 2806 }, { "epoch": 0.4277659250228589, "grad_norm": 1.2265625, "learning_rate": 0.00012788252105879036, "loss": 1.0129, "step": 2807 }, { "epoch": 0.42791831758610177, "grad_norm": 0.8046875, "learning_rate": 0.00012783511777510678, "loss": 0.9312, "step": 2808 }, { "epoch": 0.4280707101493447, "grad_norm": 0.90234375, "learning_rate": 0.0001277877077103882, "loss": 1.0043, "step": 2809 }, { "epoch": 0.4282231027125876, "grad_norm": 0.7734375, "learning_rate": 0.00012774029087618446, "loss": 0.8618, "step": 2810 }, { "epoch": 0.4283754952758305, "grad_norm": 0.91015625, "learning_rate": 0.00012769286728404696, "loss": 0.8826, "step": 2811 }, { "epoch": 0.4285278878390735, "grad_norm": 0.80078125, "learning_rate": 0.00012764543694552874, "loss": 0.7664, "step": 2812 }, { "epoch": 0.42868028040231637, "grad_norm": 0.984375, "learning_rate": 0.00012759799987218452, "loss": 0.9701, "step": 2813 }, { "epoch": 0.42883267296555927, "grad_norm": 1.125, "learning_rate": 0.00012755055607557066, "loss": 0.9664, "step": 2814 }, { "epoch": 0.42898506552880217, "grad_norm": 0.9609375, "learning_rate": 0.0001275031055672452, "loss": 0.9338, "step": 2815 }, { "epoch": 0.4291374580920451, "grad_norm": 1.0078125, "learning_rate": 0.00012745564835876772, "loss": 1.0566, "step": 2816 }, { "epoch": 0.429289850655288, "grad_norm": 0.83984375, "learning_rate": 0.0001274081844616994, "loss": 1.0235, "step": 2817 }, { "epoch": 0.4294422432185309, "grad_norm": 1.140625, "learning_rate": 0.00012736071388760327, "loss": 0.9051, "step": 2818 }, { "epoch": 0.42959463578177387, "grad_norm": 0.8515625, "learning_rate": 0.0001273132366480438, "loss": 0.8331, "step": 2819 }, { "epoch": 0.42974702834501677, "grad_norm": 0.83984375, "learning_rate": 0.00012726575275458707, "loss": 1.0474, "step": 2820 }, { "epoch": 0.42989942090825967, "grad_norm": 1.1015625, "learning_rate": 0.00012721826221880089, "loss": 0.9763, "step": 2821 }, { "epoch": 0.43005181347150256, "grad_norm": 0.93359375, "learning_rate": 0.00012717076505225464, "loss": 1.0285, "step": 2822 }, { "epoch": 0.4302042060347455, "grad_norm": 0.734375, "learning_rate": 0.00012712326126651933, "loss": 0.9053, "step": 2823 }, { "epoch": 0.4303565985979884, "grad_norm": 0.80078125, "learning_rate": 0.00012707575087316748, "loss": 0.8021, "step": 2824 }, { "epoch": 0.4305089911612313, "grad_norm": 0.98046875, "learning_rate": 0.00012702823388377338, "loss": 0.8303, "step": 2825 }, { "epoch": 0.43066138372447427, "grad_norm": 0.7890625, "learning_rate": 0.00012698071030991286, "loss": 0.7593, "step": 2826 }, { "epoch": 0.43081377628771717, "grad_norm": 0.65625, "learning_rate": 0.00012693318016316328, "loss": 0.8451, "step": 2827 }, { "epoch": 0.43096616885096006, "grad_norm": 1.0546875, "learning_rate": 0.0001268856434551037, "loss": 0.7518, "step": 2828 }, { "epoch": 0.43111856141420296, "grad_norm": 0.89453125, "learning_rate": 0.0001268381001973148, "loss": 0.8788, "step": 2829 }, { "epoch": 0.4312709539774459, "grad_norm": 0.78515625, "learning_rate": 0.00012679055040137871, "loss": 0.9135, "step": 2830 }, { "epoch": 0.4314233465406888, "grad_norm": 0.95703125, "learning_rate": 0.0001267429940788793, "loss": 1.0978, "step": 2831 }, { "epoch": 0.4315757391039317, "grad_norm": 0.828125, "learning_rate": 0.00012669543124140196, "loss": 0.8928, "step": 2832 }, { "epoch": 0.43172813166717466, "grad_norm": 0.96875, "learning_rate": 0.00012664786190053371, "loss": 0.9573, "step": 2833 }, { "epoch": 0.43188052423041756, "grad_norm": 0.7578125, "learning_rate": 0.00012660028606786304, "loss": 0.8788, "step": 2834 }, { "epoch": 0.43203291679366046, "grad_norm": 1.0390625, "learning_rate": 0.0001265527037549802, "loss": 0.9181, "step": 2835 }, { "epoch": 0.43218530935690336, "grad_norm": 1.03125, "learning_rate": 0.00012650511497347693, "loss": 0.9832, "step": 2836 }, { "epoch": 0.4323377019201463, "grad_norm": 1.1640625, "learning_rate": 0.00012645751973494644, "loss": 1.0878, "step": 2837 }, { "epoch": 0.4324900944833892, "grad_norm": 0.93359375, "learning_rate": 0.00012640991805098367, "loss": 0.9116, "step": 2838 }, { "epoch": 0.4326424870466321, "grad_norm": 0.875, "learning_rate": 0.0001263623099331851, "loss": 0.9585, "step": 2839 }, { "epoch": 0.43279487960987506, "grad_norm": 1.078125, "learning_rate": 0.00012631469539314878, "loss": 1.1063, "step": 2840 }, { "epoch": 0.43294727217311796, "grad_norm": 0.88671875, "learning_rate": 0.0001262670744424742, "loss": 1.0928, "step": 2841 }, { "epoch": 0.43309966473636086, "grad_norm": 1.078125, "learning_rate": 0.00012621944709276256, "loss": 0.9645, "step": 2842 }, { "epoch": 0.43325205729960375, "grad_norm": 0.953125, "learning_rate": 0.0001261718133556166, "loss": 1.0493, "step": 2843 }, { "epoch": 0.4334044498628467, "grad_norm": 0.76953125, "learning_rate": 0.00012612417324264053, "loss": 0.8353, "step": 2844 }, { "epoch": 0.4335568424260896, "grad_norm": 1.0625, "learning_rate": 0.00012607652676544016, "loss": 1.0569, "step": 2845 }, { "epoch": 0.4337092349893325, "grad_norm": 0.77734375, "learning_rate": 0.00012602887393562293, "loss": 0.8886, "step": 2846 }, { "epoch": 0.43386162755257546, "grad_norm": 1.3203125, "learning_rate": 0.00012598121476479766, "loss": 1.1248, "step": 2847 }, { "epoch": 0.43401402011581836, "grad_norm": 0.953125, "learning_rate": 0.00012593354926457488, "loss": 1.0077, "step": 2848 }, { "epoch": 0.43416641267906125, "grad_norm": 0.859375, "learning_rate": 0.00012588587744656655, "loss": 1.0487, "step": 2849 }, { "epoch": 0.43431880524230415, "grad_norm": 0.81640625, "learning_rate": 0.00012583819932238628, "loss": 0.8828, "step": 2850 }, { "epoch": 0.4344711978055471, "grad_norm": 0.98828125, "learning_rate": 0.00012579051490364904, "loss": 0.9499, "step": 2851 }, { "epoch": 0.43462359036879, "grad_norm": 0.875, "learning_rate": 0.0001257428242019715, "loss": 1.1003, "step": 2852 }, { "epoch": 0.4347759829320329, "grad_norm": 0.66015625, "learning_rate": 0.00012569512722897182, "loss": 0.9348, "step": 2853 }, { "epoch": 0.43492837549527585, "grad_norm": 0.91015625, "learning_rate": 0.00012564742399626962, "loss": 0.9253, "step": 2854 }, { "epoch": 0.43508076805851875, "grad_norm": 0.94921875, "learning_rate": 0.00012559971451548614, "loss": 1.0803, "step": 2855 }, { "epoch": 0.43523316062176165, "grad_norm": 0.765625, "learning_rate": 0.0001255519987982441, "loss": 1.1068, "step": 2856 }, { "epoch": 0.43538555318500455, "grad_norm": 0.86328125, "learning_rate": 0.00012550427685616765, "loss": 0.8649, "step": 2857 }, { "epoch": 0.4355379457482475, "grad_norm": 1.0625, "learning_rate": 0.00012545654870088264, "loss": 1.2188, "step": 2858 }, { "epoch": 0.4356903383114904, "grad_norm": 0.85546875, "learning_rate": 0.00012540881434401627, "loss": 1.0728, "step": 2859 }, { "epoch": 0.4358427308747333, "grad_norm": 1.03125, "learning_rate": 0.00012536107379719735, "loss": 1.137, "step": 2860 }, { "epoch": 0.43599512343797625, "grad_norm": 1.0546875, "learning_rate": 0.00012531332707205618, "loss": 1.0184, "step": 2861 }, { "epoch": 0.43614751600121915, "grad_norm": 0.8671875, "learning_rate": 0.00012526557418022447, "loss": 0.7076, "step": 2862 }, { "epoch": 0.43629990856446205, "grad_norm": 1.15625, "learning_rate": 0.00012521781513333556, "loss": 1.206, "step": 2863 }, { "epoch": 0.43645230112770494, "grad_norm": 1.2265625, "learning_rate": 0.0001251700499430243, "loss": 1.1403, "step": 2864 }, { "epoch": 0.4366046936909479, "grad_norm": 0.9296875, "learning_rate": 0.0001251222786209269, "loss": 0.9154, "step": 2865 }, { "epoch": 0.4367570862541908, "grad_norm": 0.82421875, "learning_rate": 0.00012507450117868113, "loss": 0.9322, "step": 2866 }, { "epoch": 0.4369094788174337, "grad_norm": 0.8125, "learning_rate": 0.00012502671762792638, "loss": 0.8789, "step": 2867 }, { "epoch": 0.43706187138067665, "grad_norm": 0.875, "learning_rate": 0.00012497892798030323, "loss": 0.9478, "step": 2868 }, { "epoch": 0.43721426394391955, "grad_norm": 1.09375, "learning_rate": 0.00012493113224745406, "loss": 0.8743, "step": 2869 }, { "epoch": 0.43736665650716244, "grad_norm": 0.8203125, "learning_rate": 0.00012488333044102258, "loss": 0.9464, "step": 2870 }, { "epoch": 0.43751904907040534, "grad_norm": 1.0, "learning_rate": 0.00012483552257265394, "loss": 0.9167, "step": 2871 }, { "epoch": 0.4376714416336483, "grad_norm": 0.92578125, "learning_rate": 0.0001247877086539949, "loss": 0.8145, "step": 2872 }, { "epoch": 0.4378238341968912, "grad_norm": 0.875, "learning_rate": 0.00012473988869669353, "loss": 0.8897, "step": 2873 }, { "epoch": 0.4379762267601341, "grad_norm": 1.03125, "learning_rate": 0.00012469206271239954, "loss": 1.1792, "step": 2874 }, { "epoch": 0.43812861932337704, "grad_norm": 0.91796875, "learning_rate": 0.00012464423071276398, "loss": 0.9874, "step": 2875 }, { "epoch": 0.43828101188661994, "grad_norm": 0.9609375, "learning_rate": 0.00012459639270943944, "loss": 1.0943, "step": 2876 }, { "epoch": 0.43843340444986284, "grad_norm": 1.171875, "learning_rate": 0.00012454854871407994, "loss": 0.9601, "step": 2877 }, { "epoch": 0.43858579701310574, "grad_norm": 1.1640625, "learning_rate": 0.00012450069873834094, "loss": 1.1033, "step": 2878 }, { "epoch": 0.4387381895763487, "grad_norm": 0.9765625, "learning_rate": 0.00012445284279387938, "loss": 1.0088, "step": 2879 }, { "epoch": 0.4388905821395916, "grad_norm": 0.953125, "learning_rate": 0.0001244049808923537, "loss": 1.119, "step": 2880 }, { "epoch": 0.4390429747028345, "grad_norm": 0.875, "learning_rate": 0.00012435711304542376, "loss": 0.8849, "step": 2881 }, { "epoch": 0.43919536726607744, "grad_norm": 1.21875, "learning_rate": 0.00012430923926475077, "loss": 0.9874, "step": 2882 }, { "epoch": 0.43934775982932034, "grad_norm": 1.0703125, "learning_rate": 0.00012426135956199755, "loss": 1.0431, "step": 2883 }, { "epoch": 0.43950015239256324, "grad_norm": 0.7890625, "learning_rate": 0.00012421347394882826, "loss": 0.9871, "step": 2884 }, { "epoch": 0.43965254495580613, "grad_norm": 0.875, "learning_rate": 0.0001241655824369085, "loss": 0.994, "step": 2885 }, { "epoch": 0.4398049375190491, "grad_norm": 0.96484375, "learning_rate": 0.00012411768503790534, "loss": 0.958, "step": 2886 }, { "epoch": 0.439957330082292, "grad_norm": 0.9140625, "learning_rate": 0.00012406978176348734, "loss": 1.0146, "step": 2887 }, { "epoch": 0.4401097226455349, "grad_norm": 1.0625, "learning_rate": 0.00012402187262532433, "loss": 0.9942, "step": 2888 }, { "epoch": 0.44026211520877784, "grad_norm": 1.1171875, "learning_rate": 0.0001239739576350877, "loss": 0.9183, "step": 2889 }, { "epoch": 0.44041450777202074, "grad_norm": 1.03125, "learning_rate": 0.00012392603680445025, "loss": 1.0784, "step": 2890 }, { "epoch": 0.44056690033526363, "grad_norm": 0.81640625, "learning_rate": 0.0001238781101450862, "loss": 0.8418, "step": 2891 }, { "epoch": 0.44071929289850653, "grad_norm": 1.015625, "learning_rate": 0.0001238301776686711, "loss": 0.9663, "step": 2892 }, { "epoch": 0.4408716854617495, "grad_norm": 1.078125, "learning_rate": 0.00012378223938688205, "loss": 1.0224, "step": 2893 }, { "epoch": 0.4410240780249924, "grad_norm": 0.84765625, "learning_rate": 0.0001237342953113975, "loss": 0.8827, "step": 2894 }, { "epoch": 0.4411764705882353, "grad_norm": 0.9453125, "learning_rate": 0.00012368634545389733, "loss": 0.8454, "step": 2895 }, { "epoch": 0.44132886315147823, "grad_norm": 0.796875, "learning_rate": 0.00012363838982606273, "loss": 1.0191, "step": 2896 }, { "epoch": 0.44148125571472113, "grad_norm": 0.9140625, "learning_rate": 0.00012359042843957646, "loss": 0.922, "step": 2897 }, { "epoch": 0.44163364827796403, "grad_norm": 1.078125, "learning_rate": 0.00012354246130612265, "loss": 1.1164, "step": 2898 }, { "epoch": 0.44178604084120693, "grad_norm": 0.91796875, "learning_rate": 0.00012349448843738665, "loss": 1.171, "step": 2899 }, { "epoch": 0.4419384334044499, "grad_norm": 0.984375, "learning_rate": 0.00012344650984505543, "loss": 0.9964, "step": 2900 }, { "epoch": 0.4420908259676928, "grad_norm": 1.0078125, "learning_rate": 0.00012339852554081726, "loss": 1.1018, "step": 2901 }, { "epoch": 0.4422432185309357, "grad_norm": 1.03125, "learning_rate": 0.00012335053553636175, "loss": 1.0302, "step": 2902 }, { "epoch": 0.44239561109417863, "grad_norm": 0.8515625, "learning_rate": 0.00012330253984338006, "loss": 0.8988, "step": 2903 }, { "epoch": 0.44254800365742153, "grad_norm": 0.859375, "learning_rate": 0.0001232545384735645, "loss": 1.069, "step": 2904 }, { "epoch": 0.4427003962206644, "grad_norm": 0.95703125, "learning_rate": 0.00012320653143860902, "loss": 1.0336, "step": 2905 }, { "epoch": 0.4428527887839073, "grad_norm": 0.91015625, "learning_rate": 0.00012315851875020873, "loss": 1.0403, "step": 2906 }, { "epoch": 0.4430051813471503, "grad_norm": 0.9921875, "learning_rate": 0.00012311050042006024, "loss": 0.9451, "step": 2907 }, { "epoch": 0.4431575739103932, "grad_norm": 0.9765625, "learning_rate": 0.00012306247645986154, "loss": 1.0236, "step": 2908 }, { "epoch": 0.4433099664736361, "grad_norm": 0.79296875, "learning_rate": 0.00012301444688131191, "loss": 0.7988, "step": 2909 }, { "epoch": 0.44346235903687903, "grad_norm": 0.765625, "learning_rate": 0.00012296641169611206, "loss": 0.9046, "step": 2910 }, { "epoch": 0.4436147516001219, "grad_norm": 0.9140625, "learning_rate": 0.00012291837091596409, "loss": 1.017, "step": 2911 }, { "epoch": 0.4437671441633648, "grad_norm": 0.88671875, "learning_rate": 0.0001228703245525714, "loss": 0.9049, "step": 2912 }, { "epoch": 0.4439195367266077, "grad_norm": 1.078125, "learning_rate": 0.00012282227261763872, "loss": 0.8276, "step": 2913 }, { "epoch": 0.4440719292898507, "grad_norm": 0.890625, "learning_rate": 0.00012277421512287226, "loss": 1.047, "step": 2914 }, { "epoch": 0.4442243218530936, "grad_norm": 1.2890625, "learning_rate": 0.0001227261520799795, "loss": 1.015, "step": 2915 }, { "epoch": 0.44437671441633647, "grad_norm": 0.8046875, "learning_rate": 0.00012267808350066928, "loss": 0.8978, "step": 2916 }, { "epoch": 0.44452910697957937, "grad_norm": 0.8359375, "learning_rate": 0.00012263000939665182, "loss": 0.9853, "step": 2917 }, { "epoch": 0.4446814995428223, "grad_norm": 0.94140625, "learning_rate": 0.00012258192977963861, "loss": 0.9754, "step": 2918 }, { "epoch": 0.4448338921060652, "grad_norm": 0.86328125, "learning_rate": 0.0001225338446613426, "loss": 0.8024, "step": 2919 }, { "epoch": 0.4449862846693081, "grad_norm": 1.2109375, "learning_rate": 0.000122485754053478, "loss": 1.1133, "step": 2920 }, { "epoch": 0.44513867723255107, "grad_norm": 0.76953125, "learning_rate": 0.00012243765796776033, "loss": 1.0362, "step": 2921 }, { "epoch": 0.44529106979579397, "grad_norm": 1.3203125, "learning_rate": 0.00012238955641590655, "loss": 0.9365, "step": 2922 }, { "epoch": 0.44544346235903687, "grad_norm": 0.8671875, "learning_rate": 0.00012234144940963485, "loss": 0.8966, "step": 2923 }, { "epoch": 0.44559585492227977, "grad_norm": 0.9296875, "learning_rate": 0.00012229333696066476, "loss": 0.9264, "step": 2924 }, { "epoch": 0.4457482474855227, "grad_norm": 0.890625, "learning_rate": 0.00012224521908071724, "loss": 0.9521, "step": 2925 }, { "epoch": 0.4459006400487656, "grad_norm": 0.94140625, "learning_rate": 0.00012219709578151447, "loss": 1.1265, "step": 2926 }, { "epoch": 0.4460530326120085, "grad_norm": 0.921875, "learning_rate": 0.00012214896707477994, "loss": 0.9163, "step": 2927 }, { "epoch": 0.44620542517525147, "grad_norm": 1.1796875, "learning_rate": 0.00012210083297223853, "loss": 0.9603, "step": 2928 }, { "epoch": 0.44635781773849437, "grad_norm": 0.8828125, "learning_rate": 0.00012205269348561641, "loss": 0.8162, "step": 2929 }, { "epoch": 0.44651021030173726, "grad_norm": 1.2265625, "learning_rate": 0.00012200454862664102, "loss": 1.003, "step": 2930 }, { "epoch": 0.44666260286498016, "grad_norm": 0.83984375, "learning_rate": 0.00012195639840704115, "loss": 0.889, "step": 2931 }, { "epoch": 0.4468149954282231, "grad_norm": 0.8984375, "learning_rate": 0.00012190824283854689, "loss": 0.9406, "step": 2932 }, { "epoch": 0.446967387991466, "grad_norm": 1.1796875, "learning_rate": 0.00012186008193288962, "loss": 1.0405, "step": 2933 }, { "epoch": 0.4471197805547089, "grad_norm": 0.96875, "learning_rate": 0.00012181191570180205, "loss": 1.0018, "step": 2934 }, { "epoch": 0.44727217311795187, "grad_norm": 0.89453125, "learning_rate": 0.00012176374415701817, "loss": 0.9618, "step": 2935 }, { "epoch": 0.44742456568119476, "grad_norm": 1.078125, "learning_rate": 0.0001217155673102732, "loss": 0.9853, "step": 2936 }, { "epoch": 0.44757695824443766, "grad_norm": 0.8984375, "learning_rate": 0.0001216673851733038, "loss": 0.8764, "step": 2937 }, { "epoch": 0.44772935080768056, "grad_norm": 0.9453125, "learning_rate": 0.00012161919775784775, "loss": 1.0485, "step": 2938 }, { "epoch": 0.4478817433709235, "grad_norm": 0.87890625, "learning_rate": 0.00012157100507564427, "loss": 1.0121, "step": 2939 }, { "epoch": 0.4480341359341664, "grad_norm": 1.171875, "learning_rate": 0.00012152280713843379, "loss": 0.9348, "step": 2940 }, { "epoch": 0.4481865284974093, "grad_norm": 1.0859375, "learning_rate": 0.00012147460395795791, "loss": 0.8188, "step": 2941 }, { "epoch": 0.44833892106065226, "grad_norm": 1.03125, "learning_rate": 0.00012142639554595974, "loss": 1.0394, "step": 2942 }, { "epoch": 0.44849131362389516, "grad_norm": 1.0234375, "learning_rate": 0.00012137818191418348, "loss": 0.888, "step": 2943 }, { "epoch": 0.44864370618713806, "grad_norm": 0.98046875, "learning_rate": 0.0001213299630743747, "loss": 1.0319, "step": 2944 }, { "epoch": 0.44879609875038096, "grad_norm": 1.3125, "learning_rate": 0.00012128173903828018, "loss": 1.0389, "step": 2945 }, { "epoch": 0.4489484913136239, "grad_norm": 0.89453125, "learning_rate": 0.00012123350981764799, "loss": 0.929, "step": 2946 }, { "epoch": 0.4491008838768668, "grad_norm": 1.0390625, "learning_rate": 0.00012118527542422744, "loss": 1.0017, "step": 2947 }, { "epoch": 0.4492532764401097, "grad_norm": 0.96875, "learning_rate": 0.0001211370358697692, "loss": 0.8748, "step": 2948 }, { "epoch": 0.44940566900335266, "grad_norm": 0.9375, "learning_rate": 0.00012108879116602503, "loss": 0.8368, "step": 2949 }, { "epoch": 0.44955806156659556, "grad_norm": 0.83984375, "learning_rate": 0.00012104054132474804, "loss": 0.9607, "step": 2950 }, { "epoch": 0.44971045412983846, "grad_norm": 0.8515625, "learning_rate": 0.00012099228635769268, "loss": 0.8877, "step": 2951 }, { "epoch": 0.44986284669308135, "grad_norm": 0.96875, "learning_rate": 0.00012094402627661447, "loss": 0.8974, "step": 2952 }, { "epoch": 0.4500152392563243, "grad_norm": 0.85546875, "learning_rate": 0.00012089576109327025, "loss": 0.8878, "step": 2953 }, { "epoch": 0.4501676318195672, "grad_norm": 0.953125, "learning_rate": 0.00012084749081941815, "loss": 0.9363, "step": 2954 }, { "epoch": 0.4503200243828101, "grad_norm": 1.0625, "learning_rate": 0.00012079921546681749, "loss": 1.2199, "step": 2955 }, { "epoch": 0.45047241694605306, "grad_norm": 0.9609375, "learning_rate": 0.00012075093504722888, "loss": 1.1741, "step": 2956 }, { "epoch": 0.45062480950929595, "grad_norm": 0.8984375, "learning_rate": 0.00012070264957241406, "loss": 0.9586, "step": 2957 }, { "epoch": 0.45077720207253885, "grad_norm": 0.83203125, "learning_rate": 0.00012065435905413609, "loss": 0.9006, "step": 2958 }, { "epoch": 0.45092959463578175, "grad_norm": 0.93359375, "learning_rate": 0.00012060606350415926, "loss": 1.0285, "step": 2959 }, { "epoch": 0.4510819871990247, "grad_norm": 1.0859375, "learning_rate": 0.00012055776293424905, "loss": 0.9179, "step": 2960 }, { "epoch": 0.4512343797622676, "grad_norm": 0.79296875, "learning_rate": 0.00012050945735617212, "loss": 0.8551, "step": 2961 }, { "epoch": 0.4513867723255105, "grad_norm": 1.1875, "learning_rate": 0.00012046114678169647, "loss": 1.0246, "step": 2962 }, { "epoch": 0.45153916488875345, "grad_norm": 0.9375, "learning_rate": 0.00012041283122259124, "loss": 1.2547, "step": 2963 }, { "epoch": 0.45169155745199635, "grad_norm": 0.73828125, "learning_rate": 0.00012036451069062675, "loss": 0.9337, "step": 2964 }, { "epoch": 0.45184395001523925, "grad_norm": 0.69140625, "learning_rate": 0.00012031618519757463, "loss": 0.8869, "step": 2965 }, { "epoch": 0.45199634257848215, "grad_norm": 0.703125, "learning_rate": 0.00012026785475520766, "loss": 0.975, "step": 2966 }, { "epoch": 0.4521487351417251, "grad_norm": 1.3125, "learning_rate": 0.00012021951937529979, "loss": 1.103, "step": 2967 }, { "epoch": 0.452301127704968, "grad_norm": 0.84375, "learning_rate": 0.00012017117906962625, "loss": 0.96, "step": 2968 }, { "epoch": 0.4524535202682109, "grad_norm": 0.7578125, "learning_rate": 0.00012012283384996339, "loss": 0.8625, "step": 2969 }, { "epoch": 0.45260591283145385, "grad_norm": 0.99609375, "learning_rate": 0.00012007448372808886, "loss": 0.9229, "step": 2970 }, { "epoch": 0.45275830539469675, "grad_norm": 1.0625, "learning_rate": 0.00012002612871578143, "loss": 1.0955, "step": 2971 }, { "epoch": 0.45291069795793965, "grad_norm": 1.0390625, "learning_rate": 0.00011997776882482103, "loss": 1.0479, "step": 2972 }, { "epoch": 0.45306309052118254, "grad_norm": 0.95703125, "learning_rate": 0.00011992940406698889, "loss": 0.9458, "step": 2973 }, { "epoch": 0.4532154830844255, "grad_norm": 1.1640625, "learning_rate": 0.00011988103445406733, "loss": 1.0501, "step": 2974 }, { "epoch": 0.4533678756476684, "grad_norm": 0.859375, "learning_rate": 0.00011983265999783986, "loss": 1.0747, "step": 2975 }, { "epoch": 0.4535202682109113, "grad_norm": 1.1015625, "learning_rate": 0.00011978428071009124, "loss": 0.9361, "step": 2976 }, { "epoch": 0.45367266077415425, "grad_norm": 0.84765625, "learning_rate": 0.00011973589660260734, "loss": 0.8621, "step": 2977 }, { "epoch": 0.45382505333739714, "grad_norm": 0.96484375, "learning_rate": 0.00011968750768717519, "loss": 1.0318, "step": 2978 }, { "epoch": 0.45397744590064004, "grad_norm": 1.15625, "learning_rate": 0.00011963911397558308, "loss": 1.1293, "step": 2979 }, { "epoch": 0.45412983846388294, "grad_norm": 0.92578125, "learning_rate": 0.00011959071547962037, "loss": 0.9482, "step": 2980 }, { "epoch": 0.4542822310271259, "grad_norm": 1.078125, "learning_rate": 0.00011954231221107766, "loss": 1.0528, "step": 2981 }, { "epoch": 0.4544346235903688, "grad_norm": 1.140625, "learning_rate": 0.00011949390418174666, "loss": 1.0679, "step": 2982 }, { "epoch": 0.4545870161536117, "grad_norm": 1.125, "learning_rate": 0.0001194454914034203, "loss": 0.8914, "step": 2983 }, { "epoch": 0.45473940871685464, "grad_norm": 1.0625, "learning_rate": 0.00011939707388789256, "loss": 0.9306, "step": 2984 }, { "epoch": 0.45489180128009754, "grad_norm": 0.8515625, "learning_rate": 0.00011934865164695872, "loss": 0.9892, "step": 2985 }, { "epoch": 0.45504419384334044, "grad_norm": 1.046875, "learning_rate": 0.00011930022469241509, "loss": 0.93, "step": 2986 }, { "epoch": 0.45519658640658334, "grad_norm": 0.80078125, "learning_rate": 0.00011925179303605919, "loss": 1.0169, "step": 2987 }, { "epoch": 0.4553489789698263, "grad_norm": 0.9765625, "learning_rate": 0.00011920335668968967, "loss": 1.0091, "step": 2988 }, { "epoch": 0.4555013715330692, "grad_norm": 0.84765625, "learning_rate": 0.00011915491566510628, "loss": 0.8414, "step": 2989 }, { "epoch": 0.4556537640963121, "grad_norm": 1.0703125, "learning_rate": 0.00011910646997411001, "loss": 0.972, "step": 2990 }, { "epoch": 0.45580615665955504, "grad_norm": 0.953125, "learning_rate": 0.0001190580196285029, "loss": 0.989, "step": 2991 }, { "epoch": 0.45595854922279794, "grad_norm": 1.0, "learning_rate": 0.00011900956464008813, "loss": 0.9742, "step": 2992 }, { "epoch": 0.45611094178604084, "grad_norm": 1.0234375, "learning_rate": 0.00011896110502067003, "loss": 0.9032, "step": 2993 }, { "epoch": 0.45626333434928373, "grad_norm": 0.91015625, "learning_rate": 0.00011891264078205413, "loss": 0.9099, "step": 2994 }, { "epoch": 0.4564157269125267, "grad_norm": 0.80859375, "learning_rate": 0.00011886417193604694, "loss": 0.8556, "step": 2995 }, { "epoch": 0.4565681194757696, "grad_norm": 1.296875, "learning_rate": 0.0001188156984944562, "loss": 0.8101, "step": 2996 }, { "epoch": 0.4567205120390125, "grad_norm": 1.2109375, "learning_rate": 0.00011876722046909075, "loss": 0.816, "step": 2997 }, { "epoch": 0.45687290460225544, "grad_norm": 0.89453125, "learning_rate": 0.00011871873787176046, "loss": 0.8747, "step": 2998 }, { "epoch": 0.45702529716549833, "grad_norm": 1.15625, "learning_rate": 0.00011867025071427652, "loss": 1.0319, "step": 2999 }, { "epoch": 0.45717768972874123, "grad_norm": 1.4296875, "learning_rate": 0.00011862175900845098, "loss": 0.8881, "step": 3000 }, { "epoch": 0.45733008229198413, "grad_norm": 0.92578125, "learning_rate": 0.00011857326276609715, "loss": 0.8188, "step": 3001 }, { "epoch": 0.4574824748552271, "grad_norm": 0.90625, "learning_rate": 0.00011852476199902945, "loss": 0.9109, "step": 3002 }, { "epoch": 0.45763486741847, "grad_norm": 1.09375, "learning_rate": 0.00011847625671906331, "loss": 0.9476, "step": 3003 }, { "epoch": 0.4577872599817129, "grad_norm": 1.171875, "learning_rate": 0.00011842774693801536, "loss": 0.8475, "step": 3004 }, { "epoch": 0.45793965254495583, "grad_norm": 0.95703125, "learning_rate": 0.00011837923266770326, "loss": 0.8558, "step": 3005 }, { "epoch": 0.45809204510819873, "grad_norm": 1.21875, "learning_rate": 0.00011833071391994577, "loss": 0.9178, "step": 3006 }, { "epoch": 0.45824443767144163, "grad_norm": 1.0859375, "learning_rate": 0.00011828219070656277, "loss": 1.1597, "step": 3007 }, { "epoch": 0.4583968302346845, "grad_norm": 0.9140625, "learning_rate": 0.00011823366303937523, "loss": 1.073, "step": 3008 }, { "epoch": 0.4585492227979275, "grad_norm": 0.96875, "learning_rate": 0.00011818513093020513, "loss": 0.9589, "step": 3009 }, { "epoch": 0.4587016153611704, "grad_norm": 0.90625, "learning_rate": 0.00011813659439087568, "loss": 1.0487, "step": 3010 }, { "epoch": 0.4588540079244133, "grad_norm": 0.9453125, "learning_rate": 0.000118088053433211, "loss": 0.9221, "step": 3011 }, { "epoch": 0.45900640048765623, "grad_norm": 0.828125, "learning_rate": 0.00011803950806903639, "loss": 0.8415, "step": 3012 }, { "epoch": 0.45915879305089913, "grad_norm": 0.79296875, "learning_rate": 0.00011799095831017821, "loss": 0.9182, "step": 3013 }, { "epoch": 0.459311185614142, "grad_norm": 0.890625, "learning_rate": 0.00011794240416846388, "loss": 1.0316, "step": 3014 }, { "epoch": 0.4594635781773849, "grad_norm": 0.9296875, "learning_rate": 0.00011789384565572183, "loss": 0.8025, "step": 3015 }, { "epoch": 0.4596159707406279, "grad_norm": 0.78515625, "learning_rate": 0.00011784528278378171, "loss": 0.9533, "step": 3016 }, { "epoch": 0.4597683633038708, "grad_norm": 1.1484375, "learning_rate": 0.0001177967155644741, "loss": 1.0288, "step": 3017 }, { "epoch": 0.4599207558671137, "grad_norm": 0.8359375, "learning_rate": 0.0001177481440096306, "loss": 0.9097, "step": 3018 }, { "epoch": 0.46007314843035657, "grad_norm": 0.78515625, "learning_rate": 0.00011769956813108402, "loss": 0.8404, "step": 3019 }, { "epoch": 0.4602255409935995, "grad_norm": 0.87890625, "learning_rate": 0.00011765098794066812, "loss": 0.8647, "step": 3020 }, { "epoch": 0.4603779335568424, "grad_norm": 0.97265625, "learning_rate": 0.00011760240345021774, "loss": 0.9574, "step": 3021 }, { "epoch": 0.4605303261200853, "grad_norm": 1.2265625, "learning_rate": 0.00011755381467156873, "loss": 1.0655, "step": 3022 }, { "epoch": 0.4606827186833283, "grad_norm": 0.91015625, "learning_rate": 0.00011750522161655805, "loss": 0.9194, "step": 3023 }, { "epoch": 0.46083511124657117, "grad_norm": 0.8984375, "learning_rate": 0.00011745662429702369, "loss": 0.9249, "step": 3024 }, { "epoch": 0.46098750380981407, "grad_norm": 1.1328125, "learning_rate": 0.00011740802272480458, "loss": 0.9398, "step": 3025 }, { "epoch": 0.46113989637305697, "grad_norm": 0.921875, "learning_rate": 0.00011735941691174079, "loss": 1.0145, "step": 3026 }, { "epoch": 0.4612922889362999, "grad_norm": 0.83203125, "learning_rate": 0.00011731080686967344, "loss": 0.9235, "step": 3027 }, { "epoch": 0.4614446814995428, "grad_norm": 0.875, "learning_rate": 0.0001172621926104446, "loss": 1.0345, "step": 3028 }, { "epoch": 0.4615970740627857, "grad_norm": 0.76171875, "learning_rate": 0.00011721357414589737, "loss": 0.947, "step": 3029 }, { "epoch": 0.46174946662602867, "grad_norm": 1.0078125, "learning_rate": 0.00011716495148787599, "loss": 0.9949, "step": 3030 }, { "epoch": 0.46190185918927157, "grad_norm": 0.796875, "learning_rate": 0.00011711632464822557, "loss": 0.9118, "step": 3031 }, { "epoch": 0.46205425175251447, "grad_norm": 0.93359375, "learning_rate": 0.00011706769363879233, "loss": 1.0189, "step": 3032 }, { "epoch": 0.46220664431575736, "grad_norm": 1.0234375, "learning_rate": 0.00011701905847142348, "loss": 1.1277, "step": 3033 }, { "epoch": 0.4623590368790003, "grad_norm": 0.84765625, "learning_rate": 0.00011697041915796723, "loss": 1.0955, "step": 3034 }, { "epoch": 0.4625114294422432, "grad_norm": 0.796875, "learning_rate": 0.00011692177571027287, "loss": 0.9897, "step": 3035 }, { "epoch": 0.4626638220054861, "grad_norm": 1.0625, "learning_rate": 0.0001168731281401906, "loss": 0.9858, "step": 3036 }, { "epoch": 0.46281621456872907, "grad_norm": 1.0234375, "learning_rate": 0.00011682447645957164, "loss": 1.0565, "step": 3037 }, { "epoch": 0.46296860713197197, "grad_norm": 0.875, "learning_rate": 0.00011677582068026831, "loss": 0.9613, "step": 3038 }, { "epoch": 0.46312099969521486, "grad_norm": 0.84765625, "learning_rate": 0.00011672716081413381, "loss": 0.9228, "step": 3039 }, { "epoch": 0.46327339225845776, "grad_norm": 1.078125, "learning_rate": 0.00011667849687302239, "loss": 0.946, "step": 3040 }, { "epoch": 0.4634257848217007, "grad_norm": 0.90234375, "learning_rate": 0.0001166298288687893, "loss": 0.9388, "step": 3041 }, { "epoch": 0.4635781773849436, "grad_norm": 0.94921875, "learning_rate": 0.00011658115681329076, "loss": 1.0223, "step": 3042 }, { "epoch": 0.4637305699481865, "grad_norm": 0.8515625, "learning_rate": 0.00011653248071838397, "loss": 0.889, "step": 3043 }, { "epoch": 0.46388296251142946, "grad_norm": 0.8203125, "learning_rate": 0.00011648380059592713, "loss": 0.8471, "step": 3044 }, { "epoch": 0.46403535507467236, "grad_norm": 0.98046875, "learning_rate": 0.00011643511645777945, "loss": 1.0201, "step": 3045 }, { "epoch": 0.46418774763791526, "grad_norm": 1.0703125, "learning_rate": 0.00011638642831580102, "loss": 1.2013, "step": 3046 }, { "epoch": 0.46434014020115816, "grad_norm": 0.9453125, "learning_rate": 0.00011633773618185302, "loss": 0.9966, "step": 3047 }, { "epoch": 0.4644925327644011, "grad_norm": 0.89453125, "learning_rate": 0.00011628904006779757, "loss": 0.8633, "step": 3048 }, { "epoch": 0.464644925327644, "grad_norm": 1.234375, "learning_rate": 0.00011624033998549771, "loss": 0.8375, "step": 3049 }, { "epoch": 0.4647973178908869, "grad_norm": 1.125, "learning_rate": 0.00011619163594681748, "loss": 1.0434, "step": 3050 }, { "epoch": 0.46494971045412986, "grad_norm": 0.87890625, "learning_rate": 0.00011614292796362188, "loss": 0.8614, "step": 3051 }, { "epoch": 0.46510210301737276, "grad_norm": 0.73828125, "learning_rate": 0.00011609421604777691, "loss": 0.8772, "step": 3052 }, { "epoch": 0.46525449558061566, "grad_norm": 0.96484375, "learning_rate": 0.00011604550021114948, "loss": 1.0656, "step": 3053 }, { "epoch": 0.46540688814385855, "grad_norm": 0.94140625, "learning_rate": 0.00011599678046560743, "loss": 0.9908, "step": 3054 }, { "epoch": 0.4655592807071015, "grad_norm": 1.0859375, "learning_rate": 0.00011594805682301963, "loss": 1.0415, "step": 3055 }, { "epoch": 0.4657116732703444, "grad_norm": 0.80859375, "learning_rate": 0.00011589932929525588, "loss": 0.9143, "step": 3056 }, { "epoch": 0.4658640658335873, "grad_norm": 1.0, "learning_rate": 0.00011585059789418683, "loss": 0.922, "step": 3057 }, { "epoch": 0.46601645839683026, "grad_norm": 0.91015625, "learning_rate": 0.00011580186263168425, "loss": 0.8058, "step": 3058 }, { "epoch": 0.46616885096007316, "grad_norm": 0.90625, "learning_rate": 0.00011575312351962067, "loss": 1.0485, "step": 3059 }, { "epoch": 0.46632124352331605, "grad_norm": 0.953125, "learning_rate": 0.00011570438056986965, "loss": 1.0627, "step": 3060 }, { "epoch": 0.46647363608655895, "grad_norm": 0.88671875, "learning_rate": 0.00011565563379430572, "loss": 0.9674, "step": 3061 }, { "epoch": 0.4666260286498019, "grad_norm": 1.0703125, "learning_rate": 0.00011560688320480426, "loss": 0.9146, "step": 3062 }, { "epoch": 0.4667784212130448, "grad_norm": 1.0, "learning_rate": 0.0001155581288132416, "loss": 0.759, "step": 3063 }, { "epoch": 0.4669308137762877, "grad_norm": 1.0234375, "learning_rate": 0.00011550937063149505, "loss": 1.1457, "step": 3064 }, { "epoch": 0.46708320633953065, "grad_norm": 1.0546875, "learning_rate": 0.00011546060867144279, "loss": 0.9587, "step": 3065 }, { "epoch": 0.46723559890277355, "grad_norm": 0.99609375, "learning_rate": 0.00011541184294496392, "loss": 0.8652, "step": 3066 }, { "epoch": 0.46738799146601645, "grad_norm": 1.0390625, "learning_rate": 0.00011536307346393849, "loss": 1.0284, "step": 3067 }, { "epoch": 0.46754038402925935, "grad_norm": 0.80078125, "learning_rate": 0.0001153143002402474, "loss": 0.9025, "step": 3068 }, { "epoch": 0.4676927765925023, "grad_norm": 1.1796875, "learning_rate": 0.0001152655232857726, "loss": 1.0054, "step": 3069 }, { "epoch": 0.4678451691557452, "grad_norm": 0.97265625, "learning_rate": 0.00011521674261239679, "loss": 0.931, "step": 3070 }, { "epoch": 0.4679975617189881, "grad_norm": 0.95703125, "learning_rate": 0.00011516795823200364, "loss": 1.1748, "step": 3071 }, { "epoch": 0.46814995428223105, "grad_norm": 0.93359375, "learning_rate": 0.0001151191701564778, "loss": 1.0686, "step": 3072 }, { "epoch": 0.46830234684547395, "grad_norm": 0.8203125, "learning_rate": 0.00011507037839770463, "loss": 0.8798, "step": 3073 }, { "epoch": 0.46845473940871685, "grad_norm": 1.21875, "learning_rate": 0.00011502158296757057, "loss": 1.1276, "step": 3074 }, { "epoch": 0.46860713197195974, "grad_norm": 0.9609375, "learning_rate": 0.0001149727838779629, "loss": 0.9737, "step": 3075 }, { "epoch": 0.4687595245352027, "grad_norm": 0.89453125, "learning_rate": 0.00011492398114076978, "loss": 1.0624, "step": 3076 }, { "epoch": 0.4689119170984456, "grad_norm": 1.09375, "learning_rate": 0.00011487517476788024, "loss": 1.0068, "step": 3077 }, { "epoch": 0.4690643096616885, "grad_norm": 0.80078125, "learning_rate": 0.0001148263647711842, "loss": 0.9826, "step": 3078 }, { "epoch": 0.46921670222493145, "grad_norm": 0.984375, "learning_rate": 0.00011477755116257252, "loss": 1.0542, "step": 3079 }, { "epoch": 0.46936909478817435, "grad_norm": 1.1171875, "learning_rate": 0.00011472873395393684, "loss": 1.113, "step": 3080 }, { "epoch": 0.46952148735141724, "grad_norm": 0.95703125, "learning_rate": 0.0001146799131571698, "loss": 1.0517, "step": 3081 }, { "epoch": 0.46967387991466014, "grad_norm": 1.0859375, "learning_rate": 0.0001146310887841648, "loss": 0.9462, "step": 3082 }, { "epoch": 0.4698262724779031, "grad_norm": 0.9140625, "learning_rate": 0.00011458226084681613, "loss": 0.902, "step": 3083 }, { "epoch": 0.469978665041146, "grad_norm": 0.83984375, "learning_rate": 0.00011453342935701908, "loss": 1.0443, "step": 3084 }, { "epoch": 0.4701310576043889, "grad_norm": 0.71875, "learning_rate": 0.00011448459432666961, "loss": 0.9112, "step": 3085 }, { "epoch": 0.47028345016763184, "grad_norm": 0.91796875, "learning_rate": 0.00011443575576766472, "loss": 0.9712, "step": 3086 }, { "epoch": 0.47043584273087474, "grad_norm": 1.0859375, "learning_rate": 0.00011438691369190208, "loss": 1.0708, "step": 3087 }, { "epoch": 0.47058823529411764, "grad_norm": 0.9453125, "learning_rate": 0.00011433806811128038, "loss": 1.0326, "step": 3088 }, { "epoch": 0.47074062785736054, "grad_norm": 1.0859375, "learning_rate": 0.00011428921903769914, "loss": 1.159, "step": 3089 }, { "epoch": 0.4708930204206035, "grad_norm": 1.1796875, "learning_rate": 0.00011424036648305863, "loss": 0.9103, "step": 3090 }, { "epoch": 0.4710454129838464, "grad_norm": 1.2421875, "learning_rate": 0.00011419151045926007, "loss": 0.9565, "step": 3091 }, { "epoch": 0.4711978055470893, "grad_norm": 0.85546875, "learning_rate": 0.00011414265097820548, "loss": 0.9305, "step": 3092 }, { "epoch": 0.47135019811033224, "grad_norm": 1.25, "learning_rate": 0.00011409378805179776, "loss": 0.9366, "step": 3093 }, { "epoch": 0.47150259067357514, "grad_norm": 1.015625, "learning_rate": 0.00011404492169194056, "loss": 0.855, "step": 3094 }, { "epoch": 0.47165498323681804, "grad_norm": 1.0078125, "learning_rate": 0.0001139960519105385, "loss": 0.8178, "step": 3095 }, { "epoch": 0.47180737580006094, "grad_norm": 0.890625, "learning_rate": 0.00011394717871949692, "loss": 0.8856, "step": 3096 }, { "epoch": 0.4719597683633039, "grad_norm": 1.0625, "learning_rate": 0.00011389830213072204, "loss": 0.9152, "step": 3097 }, { "epoch": 0.4721121609265468, "grad_norm": 0.796875, "learning_rate": 0.00011384942215612089, "loss": 0.9906, "step": 3098 }, { "epoch": 0.4722645534897897, "grad_norm": 0.78125, "learning_rate": 0.00011380053880760133, "loss": 1.0937, "step": 3099 }, { "epoch": 0.47241694605303264, "grad_norm": 1.078125, "learning_rate": 0.00011375165209707213, "loss": 1.1267, "step": 3100 }, { "epoch": 0.47256933861627554, "grad_norm": 1.03125, "learning_rate": 0.00011370276203644266, "loss": 0.9704, "step": 3101 }, { "epoch": 0.47272173117951843, "grad_norm": 1.109375, "learning_rate": 0.0001136538686376233, "loss": 1.1606, "step": 3102 }, { "epoch": 0.47287412374276133, "grad_norm": 0.98046875, "learning_rate": 0.00011360497191252527, "loss": 0.9923, "step": 3103 }, { "epoch": 0.4730265163060043, "grad_norm": 0.9921875, "learning_rate": 0.00011355607187306037, "loss": 0.88, "step": 3104 }, { "epoch": 0.4731789088692472, "grad_norm": 0.77734375, "learning_rate": 0.00011350716853114143, "loss": 0.8746, "step": 3105 }, { "epoch": 0.4733313014324901, "grad_norm": 0.9140625, "learning_rate": 0.00011345826189868203, "loss": 1.0369, "step": 3106 }, { "epoch": 0.47348369399573303, "grad_norm": 0.81640625, "learning_rate": 0.00011340935198759646, "loss": 0.9055, "step": 3107 }, { "epoch": 0.47363608655897593, "grad_norm": 1.015625, "learning_rate": 0.00011336043880979992, "loss": 0.9799, "step": 3108 }, { "epoch": 0.47378847912221883, "grad_norm": 0.8515625, "learning_rate": 0.00011331152237720838, "loss": 0.9332, "step": 3109 }, { "epoch": 0.47394087168546173, "grad_norm": 1.1953125, "learning_rate": 0.00011326260270173859, "loss": 0.9572, "step": 3110 }, { "epoch": 0.4740932642487047, "grad_norm": 1.078125, "learning_rate": 0.00011321367979530799, "loss": 0.7858, "step": 3111 }, { "epoch": 0.4742456568119476, "grad_norm": 0.80078125, "learning_rate": 0.00011316475366983504, "loss": 1.0612, "step": 3112 }, { "epoch": 0.4743980493751905, "grad_norm": 0.8828125, "learning_rate": 0.00011311582433723877, "loss": 1.0307, "step": 3113 }, { "epoch": 0.47455044193843343, "grad_norm": 1.2109375, "learning_rate": 0.00011306689180943909, "loss": 0.9088, "step": 3114 }, { "epoch": 0.47470283450167633, "grad_norm": 0.8515625, "learning_rate": 0.00011301795609835662, "loss": 1.1087, "step": 3115 }, { "epoch": 0.4748552270649192, "grad_norm": 1.078125, "learning_rate": 0.00011296901721591284, "loss": 0.9388, "step": 3116 }, { "epoch": 0.4750076196281621, "grad_norm": 0.796875, "learning_rate": 0.00011292007517403003, "loss": 0.9456, "step": 3117 }, { "epoch": 0.4751600121914051, "grad_norm": 0.91015625, "learning_rate": 0.00011287112998463107, "loss": 1.1169, "step": 3118 }, { "epoch": 0.475312404754648, "grad_norm": 1.125, "learning_rate": 0.00011282218165963976, "loss": 0.9727, "step": 3119 }, { "epoch": 0.4754647973178909, "grad_norm": 0.8125, "learning_rate": 0.00011277323021098064, "loss": 0.9313, "step": 3120 }, { "epoch": 0.47561718988113383, "grad_norm": 0.828125, "learning_rate": 0.0001127242756505789, "loss": 0.8985, "step": 3121 }, { "epoch": 0.4757695824443767, "grad_norm": 0.99609375, "learning_rate": 0.00011267531799036066, "loss": 1.0133, "step": 3122 }, { "epoch": 0.4759219750076196, "grad_norm": 0.92578125, "learning_rate": 0.00011262635724225272, "loss": 0.9498, "step": 3123 }, { "epoch": 0.4760743675708625, "grad_norm": 1.109375, "learning_rate": 0.00011257739341818254, "loss": 1.091, "step": 3124 }, { "epoch": 0.4762267601341055, "grad_norm": 0.93359375, "learning_rate": 0.00011252842653007847, "loss": 1.0588, "step": 3125 }, { "epoch": 0.4763791526973484, "grad_norm": 0.796875, "learning_rate": 0.00011247945658986954, "loss": 1.0066, "step": 3126 }, { "epoch": 0.47653154526059127, "grad_norm": 1.0, "learning_rate": 0.00011243048360948554, "loss": 1.1727, "step": 3127 }, { "epoch": 0.47668393782383417, "grad_norm": 1.03125, "learning_rate": 0.00011238150760085697, "loss": 1.0021, "step": 3128 }, { "epoch": 0.4768363303870771, "grad_norm": 0.96875, "learning_rate": 0.00011233252857591507, "loss": 0.837, "step": 3129 }, { "epoch": 0.47698872295032, "grad_norm": 1.0859375, "learning_rate": 0.0001122835465465919, "loss": 1.0617, "step": 3130 }, { "epoch": 0.4771411155135629, "grad_norm": 1.359375, "learning_rate": 0.00011223456152482014, "loss": 0.8877, "step": 3131 }, { "epoch": 0.4772935080768059, "grad_norm": 1.25, "learning_rate": 0.00011218557352253325, "loss": 0.9946, "step": 3132 }, { "epoch": 0.47744590064004877, "grad_norm": 1.078125, "learning_rate": 0.00011213658255166539, "loss": 0.9417, "step": 3133 }, { "epoch": 0.47759829320329167, "grad_norm": 1.8359375, "learning_rate": 0.00011208758862415156, "loss": 0.9766, "step": 3134 }, { "epoch": 0.47775068576653457, "grad_norm": 1.0625, "learning_rate": 0.00011203859175192729, "loss": 0.8286, "step": 3135 }, { "epoch": 0.4779030783297775, "grad_norm": 0.890625, "learning_rate": 0.00011198959194692891, "loss": 1.112, "step": 3136 }, { "epoch": 0.4780554708930204, "grad_norm": 1.0859375, "learning_rate": 0.00011194058922109359, "loss": 0.9977, "step": 3137 }, { "epoch": 0.4782078634562633, "grad_norm": 0.85546875, "learning_rate": 0.00011189158358635896, "loss": 0.8969, "step": 3138 }, { "epoch": 0.47836025601950627, "grad_norm": 0.78125, "learning_rate": 0.0001118425750546636, "loss": 0.9761, "step": 3139 }, { "epoch": 0.47851264858274917, "grad_norm": 1.03125, "learning_rate": 0.00011179356363794664, "loss": 0.8739, "step": 3140 }, { "epoch": 0.47866504114599207, "grad_norm": 1.078125, "learning_rate": 0.00011174454934814802, "loss": 0.9811, "step": 3141 }, { "epoch": 0.47881743370923496, "grad_norm": 1.046875, "learning_rate": 0.00011169553219720828, "loss": 0.8804, "step": 3142 }, { "epoch": 0.4789698262724779, "grad_norm": 1.0625, "learning_rate": 0.00011164651219706867, "loss": 1.0713, "step": 3143 }, { "epoch": 0.4791222188357208, "grad_norm": 1.1015625, "learning_rate": 0.00011159748935967124, "loss": 0.8403, "step": 3144 }, { "epoch": 0.4792746113989637, "grad_norm": 0.79296875, "learning_rate": 0.00011154846369695863, "loss": 1.0491, "step": 3145 }, { "epoch": 0.47942700396220667, "grad_norm": 1.0703125, "learning_rate": 0.00011149943522087416, "loss": 1.0837, "step": 3146 }, { "epoch": 0.47957939652544956, "grad_norm": 1.0390625, "learning_rate": 0.00011145040394336195, "loss": 1.0661, "step": 3147 }, { "epoch": 0.47973178908869246, "grad_norm": 0.96484375, "learning_rate": 0.00011140136987636666, "loss": 0.9438, "step": 3148 }, { "epoch": 0.47988418165193536, "grad_norm": 1.046875, "learning_rate": 0.0001113523330318337, "loss": 0.9295, "step": 3149 }, { "epoch": 0.4800365742151783, "grad_norm": 1.0234375, "learning_rate": 0.00011130329342170917, "loss": 0.9826, "step": 3150 }, { "epoch": 0.4801889667784212, "grad_norm": 0.93359375, "learning_rate": 0.00011125425105793985, "loss": 0.8156, "step": 3151 }, { "epoch": 0.4803413593416641, "grad_norm": 1.15625, "learning_rate": 0.0001112052059524731, "loss": 1.0285, "step": 3152 }, { "epoch": 0.48049375190490706, "grad_norm": 0.90625, "learning_rate": 0.00011115615811725703, "loss": 0.976, "step": 3153 }, { "epoch": 0.48064614446814996, "grad_norm": 0.9609375, "learning_rate": 0.00011110710756424048, "loss": 0.8592, "step": 3154 }, { "epoch": 0.48079853703139286, "grad_norm": 0.8515625, "learning_rate": 0.00011105805430537275, "loss": 0.9495, "step": 3155 }, { "epoch": 0.48095092959463576, "grad_norm": 1.3515625, "learning_rate": 0.00011100899835260399, "loss": 0.8794, "step": 3156 }, { "epoch": 0.4811033221578787, "grad_norm": 1.265625, "learning_rate": 0.00011095993971788492, "loss": 1.0079, "step": 3157 }, { "epoch": 0.4812557147211216, "grad_norm": 0.87890625, "learning_rate": 0.00011091087841316692, "loss": 0.9921, "step": 3158 }, { "epoch": 0.4814081072843645, "grad_norm": 0.95703125, "learning_rate": 0.00011086181445040205, "loss": 1.0678, "step": 3159 }, { "epoch": 0.48156049984760746, "grad_norm": 1.0625, "learning_rate": 0.00011081274784154296, "loss": 0.8219, "step": 3160 }, { "epoch": 0.48171289241085036, "grad_norm": 0.66015625, "learning_rate": 0.00011076367859854304, "loss": 0.9117, "step": 3161 }, { "epoch": 0.48186528497409326, "grad_norm": 0.7421875, "learning_rate": 0.00011071460673335621, "loss": 1.0226, "step": 3162 }, { "epoch": 0.48201767753733615, "grad_norm": 0.8203125, "learning_rate": 0.00011066553225793708, "loss": 0.897, "step": 3163 }, { "epoch": 0.4821700701005791, "grad_norm": 0.90625, "learning_rate": 0.00011061645518424093, "loss": 0.9651, "step": 3164 }, { "epoch": 0.482322462663822, "grad_norm": 0.79296875, "learning_rate": 0.00011056737552422367, "loss": 1.0311, "step": 3165 }, { "epoch": 0.4824748552270649, "grad_norm": 1.046875, "learning_rate": 0.00011051829328984173, "loss": 1.0241, "step": 3166 }, { "epoch": 0.48262724779030786, "grad_norm": 0.66015625, "learning_rate": 0.00011046920849305229, "loss": 0.8404, "step": 3167 }, { "epoch": 0.48277964035355075, "grad_norm": 0.77734375, "learning_rate": 0.00011042012114581315, "loss": 0.8888, "step": 3168 }, { "epoch": 0.48293203291679365, "grad_norm": 0.7734375, "learning_rate": 0.00011037103126008261, "loss": 0.8694, "step": 3169 }, { "epoch": 0.48308442548003655, "grad_norm": 0.9140625, "learning_rate": 0.00011032193884781973, "loss": 0.9834, "step": 3170 }, { "epoch": 0.4832368180432795, "grad_norm": 0.890625, "learning_rate": 0.00011027284392098414, "loss": 1.0828, "step": 3171 }, { "epoch": 0.4833892106065224, "grad_norm": 0.9453125, "learning_rate": 0.00011022374649153599, "loss": 0.9883, "step": 3172 }, { "epoch": 0.4835416031697653, "grad_norm": 0.93359375, "learning_rate": 0.00011017464657143621, "loss": 1.0528, "step": 3173 }, { "epoch": 0.48369399573300825, "grad_norm": 1.046875, "learning_rate": 0.0001101255441726462, "loss": 1.0925, "step": 3174 }, { "epoch": 0.48384638829625115, "grad_norm": 0.91796875, "learning_rate": 0.00011007643930712804, "loss": 1.0121, "step": 3175 }, { "epoch": 0.48399878085949405, "grad_norm": 0.9765625, "learning_rate": 0.00011002733198684436, "loss": 0.9812, "step": 3176 }, { "epoch": 0.48415117342273695, "grad_norm": 1.3359375, "learning_rate": 0.0001099782222237584, "loss": 1.0764, "step": 3177 }, { "epoch": 0.4843035659859799, "grad_norm": 0.796875, "learning_rate": 0.00010992911002983401, "loss": 0.8668, "step": 3178 }, { "epoch": 0.4844559585492228, "grad_norm": 1.109375, "learning_rate": 0.00010987999541703566, "loss": 0.9637, "step": 3179 }, { "epoch": 0.4846083511124657, "grad_norm": 0.88671875, "learning_rate": 0.00010983087839732833, "loss": 0.9017, "step": 3180 }, { "epoch": 0.48476074367570865, "grad_norm": 0.9375, "learning_rate": 0.00010978175898267764, "loss": 0.9855, "step": 3181 }, { "epoch": 0.48491313623895155, "grad_norm": 1.1171875, "learning_rate": 0.00010973263718504983, "loss": 0.7606, "step": 3182 }, { "epoch": 0.48506552880219445, "grad_norm": 0.921875, "learning_rate": 0.00010968351301641162, "loss": 1.0428, "step": 3183 }, { "epoch": 0.48521792136543734, "grad_norm": 0.99609375, "learning_rate": 0.00010963438648873041, "loss": 1.1246, "step": 3184 }, { "epoch": 0.4853703139286803, "grad_norm": 0.6796875, "learning_rate": 0.00010958525761397409, "loss": 0.8494, "step": 3185 }, { "epoch": 0.4855227064919232, "grad_norm": 0.73046875, "learning_rate": 0.00010953612640411117, "loss": 0.8966, "step": 3186 }, { "epoch": 0.4856750990551661, "grad_norm": 0.9609375, "learning_rate": 0.00010948699287111073, "loss": 1.0317, "step": 3187 }, { "epoch": 0.48582749161840905, "grad_norm": 1.296875, "learning_rate": 0.00010943785702694243, "loss": 0.9538, "step": 3188 }, { "epoch": 0.48597988418165194, "grad_norm": 0.7734375, "learning_rate": 0.00010938871888357638, "loss": 0.9406, "step": 3189 }, { "epoch": 0.48613227674489484, "grad_norm": 0.828125, "learning_rate": 0.00010933957845298346, "loss": 0.7653, "step": 3190 }, { "epoch": 0.48628466930813774, "grad_norm": 0.9140625, "learning_rate": 0.00010929043574713487, "loss": 0.9894, "step": 3191 }, { "epoch": 0.4864370618713807, "grad_norm": 0.84765625, "learning_rate": 0.00010924129077800255, "loss": 0.874, "step": 3192 }, { "epoch": 0.4865894544346236, "grad_norm": 1.0234375, "learning_rate": 0.00010919214355755893, "loss": 1.1028, "step": 3193 }, { "epoch": 0.4867418469978665, "grad_norm": 0.80078125, "learning_rate": 0.00010914299409777694, "loss": 0.9505, "step": 3194 }, { "epoch": 0.48689423956110944, "grad_norm": 1.015625, "learning_rate": 0.0001090938424106301, "loss": 0.9935, "step": 3195 }, { "epoch": 0.48704663212435234, "grad_norm": 1.2734375, "learning_rate": 0.00010904468850809248, "loss": 1.0609, "step": 3196 }, { "epoch": 0.48719902468759524, "grad_norm": 1.3203125, "learning_rate": 0.00010899553240213869, "loss": 1.1474, "step": 3197 }, { "epoch": 0.48735141725083814, "grad_norm": 0.91796875, "learning_rate": 0.00010894637410474384, "loss": 1.0473, "step": 3198 }, { "epoch": 0.4875038098140811, "grad_norm": 1.078125, "learning_rate": 0.00010889721362788361, "loss": 0.9898, "step": 3199 }, { "epoch": 0.487656202377324, "grad_norm": 1.0625, "learning_rate": 0.00010884805098353417, "loss": 1.0033, "step": 3200 }, { "epoch": 0.4878085949405669, "grad_norm": 0.76953125, "learning_rate": 0.00010879888618367229, "loss": 0.8389, "step": 3201 }, { "epoch": 0.48796098750380984, "grad_norm": 0.92578125, "learning_rate": 0.00010874971924027522, "loss": 1.0108, "step": 3202 }, { "epoch": 0.48811338006705274, "grad_norm": 1.28125, "learning_rate": 0.0001087005501653207, "loss": 1.1676, "step": 3203 }, { "epoch": 0.48826577263029564, "grad_norm": 0.82421875, "learning_rate": 0.00010865137897078706, "loss": 0.9857, "step": 3204 }, { "epoch": 0.48841816519353853, "grad_norm": 0.75, "learning_rate": 0.00010860220566865306, "loss": 0.976, "step": 3205 }, { "epoch": 0.4885705577567815, "grad_norm": 1.03125, "learning_rate": 0.0001085530302708981, "loss": 0.8249, "step": 3206 }, { "epoch": 0.4887229503200244, "grad_norm": 1.0546875, "learning_rate": 0.00010850385278950195, "loss": 0.8328, "step": 3207 }, { "epoch": 0.4888753428832673, "grad_norm": 0.93359375, "learning_rate": 0.00010845467323644496, "loss": 1.0382, "step": 3208 }, { "epoch": 0.48902773544651024, "grad_norm": 0.8125, "learning_rate": 0.00010840549162370801, "loss": 0.8975, "step": 3209 }, { "epoch": 0.48918012800975313, "grad_norm": 0.984375, "learning_rate": 0.00010835630796327243, "loss": 0.8588, "step": 3210 }, { "epoch": 0.48933252057299603, "grad_norm": 0.87890625, "learning_rate": 0.00010830712226712006, "loss": 1.1676, "step": 3211 }, { "epoch": 0.48948491313623893, "grad_norm": 1.296875, "learning_rate": 0.00010825793454723325, "loss": 1.0735, "step": 3212 }, { "epoch": 0.4896373056994819, "grad_norm": 1.0625, "learning_rate": 0.00010820874481559482, "loss": 1.0455, "step": 3213 }, { "epoch": 0.4897896982627248, "grad_norm": 0.72265625, "learning_rate": 0.0001081595530841881, "loss": 0.958, "step": 3214 }, { "epoch": 0.4899420908259677, "grad_norm": 1.0234375, "learning_rate": 0.00010811035936499694, "loss": 1.0031, "step": 3215 }, { "epoch": 0.49009448338921063, "grad_norm": 0.96875, "learning_rate": 0.0001080611636700056, "loss": 0.9706, "step": 3216 }, { "epoch": 0.49024687595245353, "grad_norm": 0.85546875, "learning_rate": 0.00010801196601119887, "loss": 0.9813, "step": 3217 }, { "epoch": 0.49039926851569643, "grad_norm": 1.2265625, "learning_rate": 0.000107962766400562, "loss": 1.2237, "step": 3218 }, { "epoch": 0.4905516610789393, "grad_norm": 1.1171875, "learning_rate": 0.00010791356485008075, "loss": 1.1279, "step": 3219 }, { "epoch": 0.4907040536421823, "grad_norm": 0.96875, "learning_rate": 0.00010786436137174126, "loss": 0.8925, "step": 3220 }, { "epoch": 0.4908564462054252, "grad_norm": 0.96875, "learning_rate": 0.00010781515597753027, "loss": 0.8685, "step": 3221 }, { "epoch": 0.4910088387686681, "grad_norm": 1.2109375, "learning_rate": 0.0001077659486794349, "loss": 0.978, "step": 3222 }, { "epoch": 0.49116123133191103, "grad_norm": 0.9296875, "learning_rate": 0.00010771673948944277, "loss": 1.0501, "step": 3223 }, { "epoch": 0.49131362389515393, "grad_norm": 1.421875, "learning_rate": 0.00010766752841954191, "loss": 0.9907, "step": 3224 }, { "epoch": 0.4914660164583968, "grad_norm": 1.0625, "learning_rate": 0.00010761831548172088, "loss": 0.9838, "step": 3225 }, { "epoch": 0.4916184090216397, "grad_norm": 0.84765625, "learning_rate": 0.00010756910068796864, "loss": 0.8156, "step": 3226 }, { "epoch": 0.4917708015848827, "grad_norm": 0.8125, "learning_rate": 0.00010751988405027462, "loss": 0.9766, "step": 3227 }, { "epoch": 0.4919231941481256, "grad_norm": 0.88671875, "learning_rate": 0.00010747066558062871, "loss": 1.0308, "step": 3228 }, { "epoch": 0.4920755867113685, "grad_norm": 0.90234375, "learning_rate": 0.00010742144529102125, "loss": 1.0249, "step": 3229 }, { "epoch": 0.49222797927461137, "grad_norm": 1.1796875, "learning_rate": 0.00010737222319344298, "loss": 1.0641, "step": 3230 }, { "epoch": 0.4923803718378543, "grad_norm": 0.984375, "learning_rate": 0.00010732299929988512, "loss": 0.9952, "step": 3231 }, { "epoch": 0.4925327644010972, "grad_norm": 1.0, "learning_rate": 0.00010727377362233934, "loss": 0.9706, "step": 3232 }, { "epoch": 0.4926851569643401, "grad_norm": 0.84375, "learning_rate": 0.00010722454617279772, "loss": 0.8593, "step": 3233 }, { "epoch": 0.4928375495275831, "grad_norm": 0.9765625, "learning_rate": 0.00010717531696325271, "loss": 1.0011, "step": 3234 }, { "epoch": 0.49298994209082597, "grad_norm": 1.15625, "learning_rate": 0.00010712608600569733, "loss": 1.0293, "step": 3235 }, { "epoch": 0.49314233465406887, "grad_norm": 0.93359375, "learning_rate": 0.00010707685331212496, "loss": 1.0042, "step": 3236 }, { "epoch": 0.49329472721731177, "grad_norm": 0.9609375, "learning_rate": 0.0001070276188945293, "loss": 0.9967, "step": 3237 }, { "epoch": 0.4934471197805547, "grad_norm": 1.3984375, "learning_rate": 0.00010697838276490466, "loss": 1.1284, "step": 3238 }, { "epoch": 0.4935995123437976, "grad_norm": 0.8671875, "learning_rate": 0.0001069291449352456, "loss": 1.0847, "step": 3239 }, { "epoch": 0.4937519049070405, "grad_norm": 1.0390625, "learning_rate": 0.0001068799054175472, "loss": 0.8199, "step": 3240 }, { "epoch": 0.49390429747028347, "grad_norm": 0.82421875, "learning_rate": 0.00010683066422380492, "loss": 0.8627, "step": 3241 }, { "epoch": 0.49405669003352637, "grad_norm": 1.09375, "learning_rate": 0.00010678142136601458, "loss": 0.8815, "step": 3242 }, { "epoch": 0.49420908259676927, "grad_norm": 0.875, "learning_rate": 0.0001067321768561725, "loss": 1.1312, "step": 3243 }, { "epoch": 0.49436147516001216, "grad_norm": 1.015625, "learning_rate": 0.00010668293070627533, "loss": 0.913, "step": 3244 }, { "epoch": 0.4945138677232551, "grad_norm": 0.7578125, "learning_rate": 0.00010663368292832012, "loss": 0.8835, "step": 3245 }, { "epoch": 0.494666260286498, "grad_norm": 0.91015625, "learning_rate": 0.00010658443353430436, "loss": 1.074, "step": 3246 }, { "epoch": 0.4948186528497409, "grad_norm": 1.25, "learning_rate": 0.00010653518253622592, "loss": 1.3205, "step": 3247 }, { "epoch": 0.49497104541298387, "grad_norm": 0.99609375, "learning_rate": 0.00010648592994608302, "loss": 0.9068, "step": 3248 }, { "epoch": 0.49512343797622677, "grad_norm": 0.9375, "learning_rate": 0.00010643667577587433, "loss": 0.9949, "step": 3249 }, { "epoch": 0.49527583053946966, "grad_norm": 0.98828125, "learning_rate": 0.00010638742003759886, "loss": 0.8719, "step": 3250 }, { "epoch": 0.49542822310271256, "grad_norm": 0.83984375, "learning_rate": 0.00010633816274325599, "loss": 0.7082, "step": 3251 }, { "epoch": 0.4955806156659555, "grad_norm": 1.0234375, "learning_rate": 0.00010628890390484554, "loss": 1.0136, "step": 3252 }, { "epoch": 0.4957330082291984, "grad_norm": 1.0546875, "learning_rate": 0.00010623964353436768, "loss": 1.0089, "step": 3253 }, { "epoch": 0.4958854007924413, "grad_norm": 1.1640625, "learning_rate": 0.0001061903816438229, "loss": 0.9898, "step": 3254 }, { "epoch": 0.49603779335568426, "grad_norm": 1.1640625, "learning_rate": 0.00010614111824521215, "loss": 0.9099, "step": 3255 }, { "epoch": 0.49619018591892716, "grad_norm": 1.0703125, "learning_rate": 0.00010609185335053669, "loss": 0.9882, "step": 3256 }, { "epoch": 0.49634257848217006, "grad_norm": 0.73046875, "learning_rate": 0.00010604258697179814, "loss": 0.9182, "step": 3257 }, { "epoch": 0.49649497104541296, "grad_norm": 1.078125, "learning_rate": 0.00010599331912099852, "loss": 0.9721, "step": 3258 }, { "epoch": 0.4966473636086559, "grad_norm": 0.90234375, "learning_rate": 0.00010594404981014017, "loss": 0.9327, "step": 3259 }, { "epoch": 0.4967997561718988, "grad_norm": 0.8828125, "learning_rate": 0.00010589477905122582, "loss": 0.9913, "step": 3260 }, { "epoch": 0.4969521487351417, "grad_norm": 0.9921875, "learning_rate": 0.00010584550685625852, "loss": 0.9563, "step": 3261 }, { "epoch": 0.49710454129838466, "grad_norm": 1.03125, "learning_rate": 0.00010579623323724166, "loss": 1.0008, "step": 3262 }, { "epoch": 0.49725693386162756, "grad_norm": 1.171875, "learning_rate": 0.00010574695820617908, "loss": 1.0131, "step": 3263 }, { "epoch": 0.49740932642487046, "grad_norm": 1.0703125, "learning_rate": 0.00010569768177507482, "loss": 1.1129, "step": 3264 }, { "epoch": 0.49756171898811336, "grad_norm": 0.859375, "learning_rate": 0.00010564840395593332, "loss": 0.8644, "step": 3265 }, { "epoch": 0.4977141115513563, "grad_norm": 0.83984375, "learning_rate": 0.00010559912476075941, "loss": 0.985, "step": 3266 }, { "epoch": 0.4978665041145992, "grad_norm": 0.91015625, "learning_rate": 0.00010554984420155823, "loss": 0.9231, "step": 3267 }, { "epoch": 0.4980188966778421, "grad_norm": 0.93359375, "learning_rate": 0.00010550056229033513, "loss": 1.0174, "step": 3268 }, { "epoch": 0.49817128924108506, "grad_norm": 0.84375, "learning_rate": 0.00010545127903909599, "loss": 0.8889, "step": 3269 }, { "epoch": 0.49832368180432796, "grad_norm": 0.7421875, "learning_rate": 0.00010540199445984687, "loss": 0.9082, "step": 3270 }, { "epoch": 0.49847607436757085, "grad_norm": 1.3203125, "learning_rate": 0.00010535270856459422, "loss": 0.9428, "step": 3271 }, { "epoch": 0.49862846693081375, "grad_norm": 0.87890625, "learning_rate": 0.00010530342136534482, "loss": 1.1498, "step": 3272 }, { "epoch": 0.4987808594940567, "grad_norm": 0.96484375, "learning_rate": 0.00010525413287410569, "loss": 0.9527, "step": 3273 }, { "epoch": 0.4989332520572996, "grad_norm": 0.875, "learning_rate": 0.00010520484310288424, "loss": 0.9847, "step": 3274 }, { "epoch": 0.4990856446205425, "grad_norm": 0.8828125, "learning_rate": 0.00010515555206368815, "loss": 1.0257, "step": 3275 }, { "epoch": 0.49923803718378545, "grad_norm": 0.8515625, "learning_rate": 0.00010510625976852544, "loss": 0.9323, "step": 3276 }, { "epoch": 0.49939042974702835, "grad_norm": 1.046875, "learning_rate": 0.00010505696622940443, "loss": 0.8702, "step": 3277 }, { "epoch": 0.49954282231027125, "grad_norm": 1.046875, "learning_rate": 0.00010500767145833372, "loss": 1.1368, "step": 3278 }, { "epoch": 0.49969521487351415, "grad_norm": 0.73828125, "learning_rate": 0.00010495837546732224, "loss": 0.823, "step": 3279 }, { "epoch": 0.4998476074367571, "grad_norm": 0.9765625, "learning_rate": 0.00010490907826837919, "loss": 0.9766, "step": 3280 }, { "epoch": 0.5, "grad_norm": 1.15625, "learning_rate": 0.00010485977987351407, "loss": 1.1041, "step": 3281 }, { "epoch": 0.500152392563243, "grad_norm": 0.94921875, "learning_rate": 0.0001048104802947367, "loss": 0.8801, "step": 3282 }, { "epoch": 0.5003047851264858, "grad_norm": 0.98046875, "learning_rate": 0.00010476117954405715, "loss": 0.9352, "step": 3283 }, { "epoch": 0.5004571776897287, "grad_norm": 1.2109375, "learning_rate": 0.0001047118776334858, "loss": 0.9361, "step": 3284 }, { "epoch": 0.5006095702529717, "grad_norm": 0.890625, "learning_rate": 0.0001046625745750333, "loss": 0.9422, "step": 3285 }, { "epoch": 0.5007619628162145, "grad_norm": 1.109375, "learning_rate": 0.00010461327038071059, "loss": 1.1304, "step": 3286 }, { "epoch": 0.5009143553794575, "grad_norm": 0.9609375, "learning_rate": 0.00010456396506252884, "loss": 1.0162, "step": 3287 }, { "epoch": 0.5010667479427003, "grad_norm": 1.015625, "learning_rate": 0.00010451465863249961, "loss": 1.0568, "step": 3288 }, { "epoch": 0.5012191405059433, "grad_norm": 0.984375, "learning_rate": 0.0001044653511026346, "loss": 1.0605, "step": 3289 }, { "epoch": 0.5013715330691862, "grad_norm": 1.3203125, "learning_rate": 0.00010441604248494586, "loss": 0.9845, "step": 3290 }, { "epoch": 0.5015239256324291, "grad_norm": 0.9609375, "learning_rate": 0.00010436673279144564, "loss": 0.8914, "step": 3291 }, { "epoch": 0.501676318195672, "grad_norm": 1.140625, "learning_rate": 0.00010431742203414656, "loss": 1.1073, "step": 3292 }, { "epoch": 0.501828710758915, "grad_norm": 0.89453125, "learning_rate": 0.00010426811022506134, "loss": 1.0583, "step": 3293 }, { "epoch": 0.5019811033221578, "grad_norm": 0.93359375, "learning_rate": 0.00010421879737620312, "loss": 0.8484, "step": 3294 }, { "epoch": 0.5021334958854008, "grad_norm": 0.859375, "learning_rate": 0.0001041694834995852, "loss": 1.0146, "step": 3295 }, { "epoch": 0.5022858884486437, "grad_norm": 1.0703125, "learning_rate": 0.00010412016860722112, "loss": 1.0965, "step": 3296 }, { "epoch": 0.5024382810118866, "grad_norm": 1.1953125, "learning_rate": 0.00010407085271112472, "loss": 1.0726, "step": 3297 }, { "epoch": 0.5025906735751295, "grad_norm": 1.0234375, "learning_rate": 0.00010402153582331006, "loss": 1.1279, "step": 3298 }, { "epoch": 0.5027430661383725, "grad_norm": 0.8828125, "learning_rate": 0.00010397221795579144, "loss": 0.9953, "step": 3299 }, { "epoch": 0.5028954587016153, "grad_norm": 1.0078125, "learning_rate": 0.00010392289912058342, "loss": 0.9272, "step": 3300 }, { "epoch": 0.5030478512648583, "grad_norm": 0.88671875, "learning_rate": 0.00010387357932970076, "loss": 1.0196, "step": 3301 }, { "epoch": 0.5032002438281011, "grad_norm": 0.85546875, "learning_rate": 0.00010382425859515846, "loss": 0.9606, "step": 3302 }, { "epoch": 0.5033526363913441, "grad_norm": 0.80078125, "learning_rate": 0.00010377493692897178, "loss": 0.9332, "step": 3303 }, { "epoch": 0.503505028954587, "grad_norm": 0.8671875, "learning_rate": 0.00010372561434315618, "loss": 1.0389, "step": 3304 }, { "epoch": 0.5036574215178299, "grad_norm": 0.9296875, "learning_rate": 0.00010367629084972735, "loss": 1.0428, "step": 3305 }, { "epoch": 0.5038098140810728, "grad_norm": 0.8671875, "learning_rate": 0.00010362696646070121, "loss": 1.0134, "step": 3306 }, { "epoch": 0.5039622066443158, "grad_norm": 1.078125, "learning_rate": 0.00010357764118809386, "loss": 0.9016, "step": 3307 }, { "epoch": 0.5041145992075586, "grad_norm": 0.90625, "learning_rate": 0.00010352831504392168, "loss": 0.8075, "step": 3308 }, { "epoch": 0.5042669917708016, "grad_norm": 1.0859375, "learning_rate": 0.00010347898804020124, "loss": 0.9259, "step": 3309 }, { "epoch": 0.5044193843340445, "grad_norm": 0.86328125, "learning_rate": 0.00010342966018894925, "loss": 0.7854, "step": 3310 }, { "epoch": 0.5045717768972874, "grad_norm": 1.0546875, "learning_rate": 0.00010338033150218274, "loss": 1.0284, "step": 3311 }, { "epoch": 0.5047241694605303, "grad_norm": 0.83203125, "learning_rate": 0.00010333100199191887, "loss": 1.0429, "step": 3312 }, { "epoch": 0.5048765620237733, "grad_norm": 0.9921875, "learning_rate": 0.000103281671670175, "loss": 0.9724, "step": 3313 }, { "epoch": 0.5050289545870161, "grad_norm": 1.140625, "learning_rate": 0.00010323234054896871, "loss": 1.2916, "step": 3314 }, { "epoch": 0.5051813471502591, "grad_norm": 0.8203125, "learning_rate": 0.0001031830086403178, "loss": 0.9024, "step": 3315 }, { "epoch": 0.5053337397135019, "grad_norm": 0.9296875, "learning_rate": 0.00010313367595624019, "loss": 1.0753, "step": 3316 }, { "epoch": 0.5054861322767449, "grad_norm": 1.1875, "learning_rate": 0.0001030843425087541, "loss": 0.8899, "step": 3317 }, { "epoch": 0.5056385248399878, "grad_norm": 0.87890625, "learning_rate": 0.0001030350083098778, "loss": 0.8718, "step": 3318 }, { "epoch": 0.5057909174032307, "grad_norm": 0.96484375, "learning_rate": 0.00010298567337162981, "loss": 0.9835, "step": 3319 }, { "epoch": 0.5059433099664736, "grad_norm": 1.015625, "learning_rate": 0.00010293633770602886, "loss": 1.0535, "step": 3320 }, { "epoch": 0.5060957025297166, "grad_norm": 1.3359375, "learning_rate": 0.00010288700132509381, "loss": 1.1357, "step": 3321 }, { "epoch": 0.5062480950929594, "grad_norm": 0.828125, "learning_rate": 0.00010283766424084376, "loss": 1.1553, "step": 3322 }, { "epoch": 0.5064004876562024, "grad_norm": 0.77734375, "learning_rate": 0.00010278832646529788, "loss": 0.9592, "step": 3323 }, { "epoch": 0.5065528802194453, "grad_norm": 0.8671875, "learning_rate": 0.00010273898801047555, "loss": 0.9319, "step": 3324 }, { "epoch": 0.5067052727826882, "grad_norm": 1.078125, "learning_rate": 0.0001026896488883964, "loss": 1.032, "step": 3325 }, { "epoch": 0.5068576653459311, "grad_norm": 0.9296875, "learning_rate": 0.00010264030911108007, "loss": 0.8497, "step": 3326 }, { "epoch": 0.5070100579091741, "grad_norm": 1.0703125, "learning_rate": 0.00010259096869054646, "loss": 0.923, "step": 3327 }, { "epoch": 0.5071624504724169, "grad_norm": 1.03125, "learning_rate": 0.00010254162763881563, "loss": 0.9849, "step": 3328 }, { "epoch": 0.5073148430356599, "grad_norm": 0.890625, "learning_rate": 0.00010249228596790777, "loss": 1.0473, "step": 3329 }, { "epoch": 0.5074672355989027, "grad_norm": 0.75, "learning_rate": 0.0001024429436898432, "loss": 0.9708, "step": 3330 }, { "epoch": 0.5076196281621457, "grad_norm": 0.81640625, "learning_rate": 0.00010239360081664242, "loss": 0.9655, "step": 3331 }, { "epoch": 0.5077720207253886, "grad_norm": 0.76953125, "learning_rate": 0.00010234425736032607, "loss": 0.8732, "step": 3332 }, { "epoch": 0.5079244132886315, "grad_norm": 0.9375, "learning_rate": 0.00010229491333291488, "loss": 0.9399, "step": 3333 }, { "epoch": 0.5080768058518744, "grad_norm": 0.9765625, "learning_rate": 0.00010224556874642983, "loss": 1.1136, "step": 3334 }, { "epoch": 0.5082291984151174, "grad_norm": 1.21875, "learning_rate": 0.00010219622361289191, "loss": 1.0267, "step": 3335 }, { "epoch": 0.5083815909783602, "grad_norm": 0.98828125, "learning_rate": 0.00010214687794432238, "loss": 1.0695, "step": 3336 }, { "epoch": 0.5085339835416032, "grad_norm": 1.2265625, "learning_rate": 0.0001020975317527425, "loss": 0.983, "step": 3337 }, { "epoch": 0.5086863761048461, "grad_norm": 0.75, "learning_rate": 0.00010204818505017371, "loss": 0.8459, "step": 3338 }, { "epoch": 0.508838768668089, "grad_norm": 0.953125, "learning_rate": 0.00010199883784863762, "loss": 1.0004, "step": 3339 }, { "epoch": 0.5089911612313319, "grad_norm": 1.359375, "learning_rate": 0.00010194949016015584, "loss": 0.9174, "step": 3340 }, { "epoch": 0.5091435537945749, "grad_norm": 1.09375, "learning_rate": 0.00010190014199675022, "loss": 0.9722, "step": 3341 }, { "epoch": 0.5092959463578177, "grad_norm": 0.75390625, "learning_rate": 0.00010185079337044274, "loss": 1.0491, "step": 3342 }, { "epoch": 0.5094483389210607, "grad_norm": 1.296875, "learning_rate": 0.00010180144429325533, "loss": 1.0303, "step": 3343 }, { "epoch": 0.5096007314843035, "grad_norm": 0.85546875, "learning_rate": 0.00010175209477721019, "loss": 0.9432, "step": 3344 }, { "epoch": 0.5097531240475465, "grad_norm": 0.8828125, "learning_rate": 0.0001017027448343296, "loss": 0.8959, "step": 3345 }, { "epoch": 0.5099055166107894, "grad_norm": 0.7109375, "learning_rate": 0.00010165339447663587, "loss": 0.8993, "step": 3346 }, { "epoch": 0.5100579091740323, "grad_norm": 1.0, "learning_rate": 0.00010160404371615144, "loss": 0.8917, "step": 3347 }, { "epoch": 0.5102103017372752, "grad_norm": 1.4140625, "learning_rate": 0.00010155469256489891, "loss": 1.0146, "step": 3348 }, { "epoch": 0.5103626943005182, "grad_norm": 0.98046875, "learning_rate": 0.00010150534103490095, "loss": 1.0802, "step": 3349 }, { "epoch": 0.510515086863761, "grad_norm": 1.015625, "learning_rate": 0.0001014559891381802, "loss": 0.874, "step": 3350 }, { "epoch": 0.510667479427004, "grad_norm": 0.94921875, "learning_rate": 0.0001014066368867596, "loss": 1.0443, "step": 3351 }, { "epoch": 0.5108198719902469, "grad_norm": 0.98046875, "learning_rate": 0.00010135728429266203, "loss": 0.9455, "step": 3352 }, { "epoch": 0.5109722645534898, "grad_norm": 0.9375, "learning_rate": 0.0001013079313679105, "loss": 0.9716, "step": 3353 }, { "epoch": 0.5111246571167327, "grad_norm": 0.96875, "learning_rate": 0.00010125857812452804, "loss": 1.0899, "step": 3354 }, { "epoch": 0.5112770496799757, "grad_norm": 1.0, "learning_rate": 0.00010120922457453785, "loss": 0.9318, "step": 3355 }, { "epoch": 0.5114294422432185, "grad_norm": 1.0546875, "learning_rate": 0.00010115987072996323, "loss": 0.9946, "step": 3356 }, { "epoch": 0.5115818348064615, "grad_norm": 0.859375, "learning_rate": 0.00010111051660282736, "loss": 0.7756, "step": 3357 }, { "epoch": 0.5117342273697043, "grad_norm": 0.71875, "learning_rate": 0.00010106116220515367, "loss": 0.8886, "step": 3358 }, { "epoch": 0.5118866199329473, "grad_norm": 0.9921875, "learning_rate": 0.00010101180754896564, "loss": 1.0331, "step": 3359 }, { "epoch": 0.5120390124961902, "grad_norm": 0.86328125, "learning_rate": 0.00010096245264628672, "loss": 0.9745, "step": 3360 }, { "epoch": 0.5121914050594331, "grad_norm": 0.8515625, "learning_rate": 0.00010091309750914049, "loss": 0.8742, "step": 3361 }, { "epoch": 0.512343797622676, "grad_norm": 0.96875, "learning_rate": 0.00010086374214955061, "loss": 1.1317, "step": 3362 }, { "epoch": 0.512496190185919, "grad_norm": 0.8828125, "learning_rate": 0.0001008143865795407, "loss": 0.9828, "step": 3363 }, { "epoch": 0.5126485827491618, "grad_norm": 1.0, "learning_rate": 0.00010076503081113449, "loss": 0.99, "step": 3364 }, { "epoch": 0.5128009753124048, "grad_norm": 1.0859375, "learning_rate": 0.0001007156748563558, "loss": 0.9729, "step": 3365 }, { "epoch": 0.5129533678756477, "grad_norm": 0.87109375, "learning_rate": 0.00010066631872722842, "loss": 1.0012, "step": 3366 }, { "epoch": 0.5131057604388906, "grad_norm": 0.8203125, "learning_rate": 0.00010061696243577623, "loss": 0.9849, "step": 3367 }, { "epoch": 0.5132581530021335, "grad_norm": 1.078125, "learning_rate": 0.00010056760599402308, "loss": 0.9587, "step": 3368 }, { "epoch": 0.5134105455653765, "grad_norm": 0.71484375, "learning_rate": 0.00010051824941399297, "loss": 0.8969, "step": 3369 }, { "epoch": 0.5135629381286193, "grad_norm": 1.296875, "learning_rate": 0.00010046889270770987, "loss": 1.0334, "step": 3370 }, { "epoch": 0.5137153306918623, "grad_norm": 1.15625, "learning_rate": 0.00010041953588719772, "loss": 1.0097, "step": 3371 }, { "epoch": 0.5138677232551051, "grad_norm": 1.1640625, "learning_rate": 0.00010037017896448062, "loss": 1.0317, "step": 3372 }, { "epoch": 0.5140201158183481, "grad_norm": 0.98046875, "learning_rate": 0.0001003208219515826, "loss": 0.8734, "step": 3373 }, { "epoch": 0.514172508381591, "grad_norm": 0.94921875, "learning_rate": 0.00010027146486052775, "loss": 1.0323, "step": 3374 }, { "epoch": 0.5143249009448339, "grad_norm": 0.86328125, "learning_rate": 0.00010022210770334013, "loss": 1.2279, "step": 3375 }, { "epoch": 0.5144772935080768, "grad_norm": 0.94921875, "learning_rate": 0.00010017275049204389, "loss": 1.0011, "step": 3376 }, { "epoch": 0.5146296860713198, "grad_norm": 1.171875, "learning_rate": 0.00010012339323866315, "loss": 0.9495, "step": 3377 }, { "epoch": 0.5147820786345626, "grad_norm": 1.109375, "learning_rate": 0.00010007403595522204, "loss": 0.9149, "step": 3378 }, { "epoch": 0.5149344711978056, "grad_norm": 1.1953125, "learning_rate": 0.00010002467865374472, "loss": 0.9464, "step": 3379 }, { "epoch": 0.5150868637610485, "grad_norm": 0.98828125, "learning_rate": 9.997532134625531e-05, "loss": 0.9079, "step": 3380 }, { "epoch": 0.5152392563242914, "grad_norm": 0.95703125, "learning_rate": 9.992596404477797e-05, "loss": 0.8251, "step": 3381 }, { "epoch": 0.5153916488875343, "grad_norm": 0.81640625, "learning_rate": 9.987660676133687e-05, "loss": 1.0259, "step": 3382 }, { "epoch": 0.5155440414507773, "grad_norm": 1.0703125, "learning_rate": 9.982724950795613e-05, "loss": 1.1618, "step": 3383 }, { "epoch": 0.5156964340140201, "grad_norm": 0.91015625, "learning_rate": 9.977789229665988e-05, "loss": 0.883, "step": 3384 }, { "epoch": 0.515848826577263, "grad_norm": 1.140625, "learning_rate": 9.972853513947228e-05, "loss": 0.9713, "step": 3385 }, { "epoch": 0.5160012191405059, "grad_norm": 1.0703125, "learning_rate": 9.967917804841742e-05, "loss": 0.9567, "step": 3386 }, { "epoch": 0.5161536117037488, "grad_norm": 1.2421875, "learning_rate": 9.962982103551939e-05, "loss": 1.0192, "step": 3387 }, { "epoch": 0.5163060042669918, "grad_norm": 0.96484375, "learning_rate": 9.95804641128023e-05, "loss": 0.9767, "step": 3388 }, { "epoch": 0.5164583968302346, "grad_norm": 1.1953125, "learning_rate": 9.953110729229017e-05, "loss": 0.9294, "step": 3389 }, { "epoch": 0.5166107893934776, "grad_norm": 1.0, "learning_rate": 9.948175058600704e-05, "loss": 0.96, "step": 3390 }, { "epoch": 0.5167631819567206, "grad_norm": 0.9921875, "learning_rate": 9.943239400597694e-05, "loss": 0.7016, "step": 3391 }, { "epoch": 0.5169155745199634, "grad_norm": 1.0546875, "learning_rate": 9.938303756422382e-05, "loss": 0.9751, "step": 3392 }, { "epoch": 0.5170679670832063, "grad_norm": 0.828125, "learning_rate": 9.93336812727716e-05, "loss": 0.9379, "step": 3393 }, { "epoch": 0.5172203596464493, "grad_norm": 1.09375, "learning_rate": 9.928432514364422e-05, "loss": 1.021, "step": 3394 }, { "epoch": 0.5173727522096921, "grad_norm": 0.92578125, "learning_rate": 9.923496918886552e-05, "loss": 1.1649, "step": 3395 }, { "epoch": 0.5175251447729351, "grad_norm": 1.0546875, "learning_rate": 9.918561342045932e-05, "loss": 0.9033, "step": 3396 }, { "epoch": 0.5176775373361779, "grad_norm": 1.765625, "learning_rate": 9.913625785044941e-05, "loss": 1.0168, "step": 3397 }, { "epoch": 0.5178299298994209, "grad_norm": 0.8671875, "learning_rate": 9.90869024908595e-05, "loss": 1.112, "step": 3398 }, { "epoch": 0.5179823224626638, "grad_norm": 0.91015625, "learning_rate": 9.90375473537133e-05, "loss": 0.9342, "step": 3399 }, { "epoch": 0.5181347150259067, "grad_norm": 0.8828125, "learning_rate": 9.898819245103439e-05, "loss": 0.8916, "step": 3400 }, { "epoch": 0.5182871075891496, "grad_norm": 0.8203125, "learning_rate": 9.893883779484634e-05, "loss": 0.9249, "step": 3401 }, { "epoch": 0.5184395001523926, "grad_norm": 1.03125, "learning_rate": 9.888948339717268e-05, "loss": 0.8677, "step": 3402 }, { "epoch": 0.5185918927156354, "grad_norm": 0.9609375, "learning_rate": 9.884012927003684e-05, "loss": 0.9504, "step": 3403 }, { "epoch": 0.5187442852788784, "grad_norm": 0.89453125, "learning_rate": 9.879077542546214e-05, "loss": 1.0343, "step": 3404 }, { "epoch": 0.5188966778421213, "grad_norm": 0.85546875, "learning_rate": 9.874142187547198e-05, "loss": 1.1939, "step": 3405 }, { "epoch": 0.5190490704053642, "grad_norm": 0.83203125, "learning_rate": 9.869206863208955e-05, "loss": 0.8102, "step": 3406 }, { "epoch": 0.5192014629686071, "grad_norm": 1.6640625, "learning_rate": 9.864271570733798e-05, "loss": 1.011, "step": 3407 }, { "epoch": 0.5193538555318501, "grad_norm": 1.03125, "learning_rate": 9.859336311324041e-05, "loss": 0.8837, "step": 3408 }, { "epoch": 0.5195062480950929, "grad_norm": 0.7421875, "learning_rate": 9.854401086181981e-05, "loss": 0.9243, "step": 3409 }, { "epoch": 0.5196586406583359, "grad_norm": 0.9453125, "learning_rate": 9.849465896509909e-05, "loss": 1.0596, "step": 3410 }, { "epoch": 0.5198110332215787, "grad_norm": 1.1484375, "learning_rate": 9.84453074351011e-05, "loss": 1.0037, "step": 3411 }, { "epoch": 0.5199634257848217, "grad_norm": 0.99609375, "learning_rate": 9.839595628384858e-05, "loss": 0.9986, "step": 3412 }, { "epoch": 0.5201158183480646, "grad_norm": 1.1953125, "learning_rate": 9.834660552336415e-05, "loss": 0.867, "step": 3413 }, { "epoch": 0.5202682109113075, "grad_norm": 0.90234375, "learning_rate": 9.829725516567044e-05, "loss": 0.8309, "step": 3414 }, { "epoch": 0.5204206034745504, "grad_norm": 1.359375, "learning_rate": 9.82479052227898e-05, "loss": 0.9473, "step": 3415 }, { "epoch": 0.5205729960377934, "grad_norm": 0.8125, "learning_rate": 9.819855570674468e-05, "loss": 0.9537, "step": 3416 }, { "epoch": 0.5207253886010362, "grad_norm": 1.0078125, "learning_rate": 9.81492066295573e-05, "loss": 0.8887, "step": 3417 }, { "epoch": 0.5208777811642792, "grad_norm": 1.0078125, "learning_rate": 9.809985800324977e-05, "loss": 1.0111, "step": 3418 }, { "epoch": 0.5210301737275221, "grad_norm": 0.953125, "learning_rate": 9.805050983984417e-05, "loss": 0.931, "step": 3419 }, { "epoch": 0.521182566290765, "grad_norm": 0.9609375, "learning_rate": 9.800116215136242e-05, "loss": 0.8909, "step": 3420 }, { "epoch": 0.5213349588540079, "grad_norm": 1.125, "learning_rate": 9.795181494982629e-05, "loss": 0.8777, "step": 3421 }, { "epoch": 0.5214873514172509, "grad_norm": 1.1640625, "learning_rate": 9.790246824725752e-05, "loss": 1.1093, "step": 3422 }, { "epoch": 0.5216397439804937, "grad_norm": 0.875, "learning_rate": 9.785312205567763e-05, "loss": 0.9759, "step": 3423 }, { "epoch": 0.5217921365437367, "grad_norm": 0.8046875, "learning_rate": 9.780377638710807e-05, "loss": 1.084, "step": 3424 }, { "epoch": 0.5219445291069795, "grad_norm": 0.9921875, "learning_rate": 9.77544312535702e-05, "loss": 1.034, "step": 3425 }, { "epoch": 0.5220969216702225, "grad_norm": 1.3515625, "learning_rate": 9.770508666708514e-05, "loss": 0.8289, "step": 3426 }, { "epoch": 0.5222493142334654, "grad_norm": 0.74609375, "learning_rate": 9.765574263967396e-05, "loss": 0.8395, "step": 3427 }, { "epoch": 0.5224017067967083, "grad_norm": 0.8203125, "learning_rate": 9.76063991833576e-05, "loss": 0.9499, "step": 3428 }, { "epoch": 0.5225540993599512, "grad_norm": 1.2109375, "learning_rate": 9.755705631015682e-05, "loss": 1.0992, "step": 3429 }, { "epoch": 0.5227064919231942, "grad_norm": 1.2109375, "learning_rate": 9.750771403209224e-05, "loss": 0.8409, "step": 3430 }, { "epoch": 0.522858884486437, "grad_norm": 1.2578125, "learning_rate": 9.74583723611844e-05, "loss": 1.0776, "step": 3431 }, { "epoch": 0.52301127704968, "grad_norm": 1.015625, "learning_rate": 9.740903130945355e-05, "loss": 0.9161, "step": 3432 }, { "epoch": 0.5231636696129229, "grad_norm": 0.9296875, "learning_rate": 9.735969088891997e-05, "loss": 1.0436, "step": 3433 }, { "epoch": 0.5233160621761658, "grad_norm": 0.81640625, "learning_rate": 9.731035111160365e-05, "loss": 0.8222, "step": 3434 }, { "epoch": 0.5234684547394087, "grad_norm": 0.94921875, "learning_rate": 9.726101198952445e-05, "loss": 0.9617, "step": 3435 }, { "epoch": 0.5236208473026517, "grad_norm": 0.9921875, "learning_rate": 9.721167353470215e-05, "loss": 1.1295, "step": 3436 }, { "epoch": 0.5237732398658945, "grad_norm": 0.89453125, "learning_rate": 9.716233575915627e-05, "loss": 0.9471, "step": 3437 }, { "epoch": 0.5239256324291375, "grad_norm": 0.92578125, "learning_rate": 9.711299867490617e-05, "loss": 0.9561, "step": 3438 }, { "epoch": 0.5240780249923803, "grad_norm": 1.0078125, "learning_rate": 9.706366229397116e-05, "loss": 0.9454, "step": 3439 }, { "epoch": 0.5242304175556233, "grad_norm": 0.828125, "learning_rate": 9.701432662837023e-05, "loss": 0.8372, "step": 3440 }, { "epoch": 0.5243828101188662, "grad_norm": 0.9609375, "learning_rate": 9.696499169012223e-05, "loss": 1.3425, "step": 3441 }, { "epoch": 0.5245352026821091, "grad_norm": 1.015625, "learning_rate": 9.691565749124593e-05, "loss": 1.0062, "step": 3442 }, { "epoch": 0.524687595245352, "grad_norm": 1.0546875, "learning_rate": 9.686632404375982e-05, "loss": 1.0638, "step": 3443 }, { "epoch": 0.524839987808595, "grad_norm": 0.671875, "learning_rate": 9.681699135968221e-05, "loss": 0.9531, "step": 3444 }, { "epoch": 0.5249923803718378, "grad_norm": 0.90234375, "learning_rate": 9.67676594510313e-05, "loss": 1.0453, "step": 3445 }, { "epoch": 0.5251447729350808, "grad_norm": 1.1875, "learning_rate": 9.671832832982502e-05, "loss": 0.9877, "step": 3446 }, { "epoch": 0.5252971654983237, "grad_norm": 1.109375, "learning_rate": 9.666899800808116e-05, "loss": 0.997, "step": 3447 }, { "epoch": 0.5254495580615666, "grad_norm": 1.640625, "learning_rate": 9.661966849781729e-05, "loss": 1.0429, "step": 3448 }, { "epoch": 0.5256019506248095, "grad_norm": 1.0703125, "learning_rate": 9.657033981105076e-05, "loss": 1.0936, "step": 3449 }, { "epoch": 0.5257543431880525, "grad_norm": 0.8671875, "learning_rate": 9.652101195979879e-05, "loss": 0.9633, "step": 3450 }, { "epoch": 0.5259067357512953, "grad_norm": 0.78515625, "learning_rate": 9.647168495607834e-05, "loss": 0.9806, "step": 3451 }, { "epoch": 0.5260591283145383, "grad_norm": 0.93359375, "learning_rate": 9.642235881190614e-05, "loss": 1.0209, "step": 3452 }, { "epoch": 0.5262115208777811, "grad_norm": 0.98828125, "learning_rate": 9.637303353929881e-05, "loss": 1.0172, "step": 3453 }, { "epoch": 0.5263639134410241, "grad_norm": 1.0625, "learning_rate": 9.632370915027268e-05, "loss": 1.0745, "step": 3454 }, { "epoch": 0.526516306004267, "grad_norm": 0.89453125, "learning_rate": 9.627438565684383e-05, "loss": 0.8943, "step": 3455 }, { "epoch": 0.5266686985675099, "grad_norm": 1.0546875, "learning_rate": 9.622506307102824e-05, "loss": 1.0366, "step": 3456 }, { "epoch": 0.5268210911307528, "grad_norm": 1.53125, "learning_rate": 9.617574140484157e-05, "loss": 1.0289, "step": 3457 }, { "epoch": 0.5269734836939958, "grad_norm": 1.0703125, "learning_rate": 9.612642067029926e-05, "loss": 1.0152, "step": 3458 }, { "epoch": 0.5271258762572386, "grad_norm": 0.8046875, "learning_rate": 9.607710087941659e-05, "loss": 0.9265, "step": 3459 }, { "epoch": 0.5272782688204816, "grad_norm": 1.2578125, "learning_rate": 9.602778204420859e-05, "loss": 1.087, "step": 3460 }, { "epoch": 0.5274306613837245, "grad_norm": 1.0078125, "learning_rate": 9.597846417668996e-05, "loss": 1.054, "step": 3461 }, { "epoch": 0.5275830539469674, "grad_norm": 0.96875, "learning_rate": 9.592914728887529e-05, "loss": 1.1532, "step": 3462 }, { "epoch": 0.5277354465102103, "grad_norm": 1.1328125, "learning_rate": 9.587983139277889e-05, "loss": 1.2201, "step": 3463 }, { "epoch": 0.5278878390734533, "grad_norm": 1.1640625, "learning_rate": 9.583051650041482e-05, "loss": 1.0497, "step": 3464 }, { "epoch": 0.5280402316366961, "grad_norm": 1.0546875, "learning_rate": 9.57812026237969e-05, "loss": 0.8747, "step": 3465 }, { "epoch": 0.5281926241999391, "grad_norm": 0.9921875, "learning_rate": 9.573188977493866e-05, "loss": 0.9965, "step": 3466 }, { "epoch": 0.5283450167631819, "grad_norm": 1.0625, "learning_rate": 9.568257796585348e-05, "loss": 1.1161, "step": 3467 }, { "epoch": 0.5284974093264249, "grad_norm": 0.80078125, "learning_rate": 9.563326720855437e-05, "loss": 1.0446, "step": 3468 }, { "epoch": 0.5286498018896678, "grad_norm": 0.92578125, "learning_rate": 9.558395751505415e-05, "loss": 0.915, "step": 3469 }, { "epoch": 0.5288021944529107, "grad_norm": 1.2265625, "learning_rate": 9.55346488973654e-05, "loss": 0.9382, "step": 3470 }, { "epoch": 0.5289545870161536, "grad_norm": 1.203125, "learning_rate": 9.548534136750041e-05, "loss": 1.0144, "step": 3471 }, { "epoch": 0.5291069795793966, "grad_norm": 0.82421875, "learning_rate": 9.543603493747116e-05, "loss": 1.0516, "step": 3472 }, { "epoch": 0.5292593721426394, "grad_norm": 0.9296875, "learning_rate": 9.538672961928944e-05, "loss": 0.792, "step": 3473 }, { "epoch": 0.5294117647058824, "grad_norm": 1.0078125, "learning_rate": 9.533742542496672e-05, "loss": 0.9687, "step": 3474 }, { "epoch": 0.5295641572691253, "grad_norm": 0.7578125, "learning_rate": 9.52881223665142e-05, "loss": 0.9451, "step": 3475 }, { "epoch": 0.5297165498323682, "grad_norm": 1.1484375, "learning_rate": 9.523882045594286e-05, "loss": 1.0558, "step": 3476 }, { "epoch": 0.5298689423956111, "grad_norm": 0.88671875, "learning_rate": 9.518951970526332e-05, "loss": 1.1983, "step": 3477 }, { "epoch": 0.5300213349588541, "grad_norm": 0.79296875, "learning_rate": 9.514022012648594e-05, "loss": 0.9019, "step": 3478 }, { "epoch": 0.5301737275220969, "grad_norm": 0.81640625, "learning_rate": 9.509092173162082e-05, "loss": 0.8953, "step": 3479 }, { "epoch": 0.5303261200853399, "grad_norm": 0.86328125, "learning_rate": 9.504162453267777e-05, "loss": 0.8866, "step": 3480 }, { "epoch": 0.5304785126485827, "grad_norm": 0.90234375, "learning_rate": 9.499232854166629e-05, "loss": 1.0328, "step": 3481 }, { "epoch": 0.5306309052118257, "grad_norm": 0.859375, "learning_rate": 9.494303377059559e-05, "loss": 1.092, "step": 3482 }, { "epoch": 0.5307832977750686, "grad_norm": 0.8828125, "learning_rate": 9.489374023147457e-05, "loss": 0.9689, "step": 3483 }, { "epoch": 0.5309356903383115, "grad_norm": 0.90625, "learning_rate": 9.484444793631186e-05, "loss": 0.935, "step": 3484 }, { "epoch": 0.5310880829015544, "grad_norm": 0.90234375, "learning_rate": 9.47951568971158e-05, "loss": 0.7909, "step": 3485 }, { "epoch": 0.5312404754647974, "grad_norm": 0.8515625, "learning_rate": 9.474586712589432e-05, "loss": 0.9149, "step": 3486 }, { "epoch": 0.5313928680280402, "grad_norm": 0.8203125, "learning_rate": 9.46965786346552e-05, "loss": 0.9421, "step": 3487 }, { "epoch": 0.5315452605912832, "grad_norm": 0.93359375, "learning_rate": 9.464729143540579e-05, "loss": 1.2216, "step": 3488 }, { "epoch": 0.5316976531545261, "grad_norm": 1.0078125, "learning_rate": 9.459800554015314e-05, "loss": 1.0028, "step": 3489 }, { "epoch": 0.531850045717769, "grad_norm": 0.765625, "learning_rate": 9.454872096090403e-05, "loss": 0.9555, "step": 3490 }, { "epoch": 0.5320024382810119, "grad_norm": 0.94921875, "learning_rate": 9.449943770966489e-05, "loss": 1.0035, "step": 3491 }, { "epoch": 0.5321548308442549, "grad_norm": 1.0234375, "learning_rate": 9.445015579844181e-05, "loss": 1.028, "step": 3492 }, { "epoch": 0.5323072234074977, "grad_norm": 1.09375, "learning_rate": 9.44008752392406e-05, "loss": 1.1241, "step": 3493 }, { "epoch": 0.5324596159707407, "grad_norm": 1.03125, "learning_rate": 9.435159604406667e-05, "loss": 1.0541, "step": 3494 }, { "epoch": 0.5326120085339835, "grad_norm": 0.88671875, "learning_rate": 9.43023182249252e-05, "loss": 0.9814, "step": 3495 }, { "epoch": 0.5327644010972264, "grad_norm": 1.09375, "learning_rate": 9.425304179382095e-05, "loss": 1.0857, "step": 3496 }, { "epoch": 0.5329167936604694, "grad_norm": 1.0234375, "learning_rate": 9.420376676275833e-05, "loss": 1.1025, "step": 3497 }, { "epoch": 0.5330691862237122, "grad_norm": 0.78515625, "learning_rate": 9.415449314374151e-05, "loss": 0.9733, "step": 3498 }, { "epoch": 0.5332215787869552, "grad_norm": 0.98046875, "learning_rate": 9.41052209487742e-05, "loss": 0.9845, "step": 3499 }, { "epoch": 0.5333739713501982, "grad_norm": 0.890625, "learning_rate": 9.405595018985984e-05, "loss": 0.8783, "step": 3500 }, { "epoch": 0.533526363913441, "grad_norm": 1.1171875, "learning_rate": 9.40066808790015e-05, "loss": 0.8525, "step": 3501 }, { "epoch": 0.533678756476684, "grad_norm": 1.0390625, "learning_rate": 9.395741302820189e-05, "loss": 1.0382, "step": 3502 }, { "epoch": 0.5338311490399269, "grad_norm": 0.9765625, "learning_rate": 9.390814664946331e-05, "loss": 1.2717, "step": 3503 }, { "epoch": 0.5339835416031697, "grad_norm": 1.140625, "learning_rate": 9.385888175478786e-05, "loss": 0.8601, "step": 3504 }, { "epoch": 0.5341359341664127, "grad_norm": 0.91796875, "learning_rate": 9.380961835617712e-05, "loss": 0.9574, "step": 3505 }, { "epoch": 0.5342883267296555, "grad_norm": 0.953125, "learning_rate": 9.376035646563233e-05, "loss": 0.9506, "step": 3506 }, { "epoch": 0.5344407192928985, "grad_norm": 0.80078125, "learning_rate": 9.371109609515448e-05, "loss": 1.0108, "step": 3507 }, { "epoch": 0.5345931118561414, "grad_norm": 0.765625, "learning_rate": 9.366183725674406e-05, "loss": 0.8327, "step": 3508 }, { "epoch": 0.5347455044193843, "grad_norm": 1.046875, "learning_rate": 9.361257996240118e-05, "loss": 1.0295, "step": 3509 }, { "epoch": 0.5348978969826272, "grad_norm": 0.86328125, "learning_rate": 9.356332422412571e-05, "loss": 0.9341, "step": 3510 }, { "epoch": 0.5350502895458702, "grad_norm": 0.90625, "learning_rate": 9.351407005391699e-05, "loss": 1.0899, "step": 3511 }, { "epoch": 0.535202682109113, "grad_norm": 0.734375, "learning_rate": 9.34648174637741e-05, "loss": 0.9676, "step": 3512 }, { "epoch": 0.535355074672356, "grad_norm": 0.7578125, "learning_rate": 9.341556646569565e-05, "loss": 0.8109, "step": 3513 }, { "epoch": 0.5355074672355989, "grad_norm": 0.77734375, "learning_rate": 9.336631707167989e-05, "loss": 0.9586, "step": 3514 }, { "epoch": 0.5356598597988418, "grad_norm": 1.0, "learning_rate": 9.331706929372469e-05, "loss": 0.9568, "step": 3515 }, { "epoch": 0.5358122523620847, "grad_norm": 0.78125, "learning_rate": 9.326782314382751e-05, "loss": 0.8643, "step": 3516 }, { "epoch": 0.5359646449253277, "grad_norm": 0.890625, "learning_rate": 9.321857863398543e-05, "loss": 0.9309, "step": 3517 }, { "epoch": 0.5361170374885705, "grad_norm": 1.0, "learning_rate": 9.31693357761951e-05, "loss": 0.9479, "step": 3518 }, { "epoch": 0.5362694300518135, "grad_norm": 0.88671875, "learning_rate": 9.312009458245282e-05, "loss": 0.977, "step": 3519 }, { "epoch": 0.5364218226150563, "grad_norm": 0.828125, "learning_rate": 9.307085506475441e-05, "loss": 0.898, "step": 3520 }, { "epoch": 0.5365742151782993, "grad_norm": 0.92578125, "learning_rate": 9.302161723509537e-05, "loss": 1.0785, "step": 3521 }, { "epoch": 0.5367266077415422, "grad_norm": 0.9453125, "learning_rate": 9.297238110547074e-05, "loss": 0.9921, "step": 3522 }, { "epoch": 0.5368790003047851, "grad_norm": 1.1328125, "learning_rate": 9.292314668787507e-05, "loss": 1.0378, "step": 3523 }, { "epoch": 0.537031392868028, "grad_norm": 1.1640625, "learning_rate": 9.287391399430268e-05, "loss": 1.0437, "step": 3524 }, { "epoch": 0.537183785431271, "grad_norm": 0.97265625, "learning_rate": 9.282468303674732e-05, "loss": 1.1769, "step": 3525 }, { "epoch": 0.5373361779945138, "grad_norm": 0.953125, "learning_rate": 9.277545382720232e-05, "loss": 1.0163, "step": 3526 }, { "epoch": 0.5374885705577568, "grad_norm": 0.83203125, "learning_rate": 9.272622637766068e-05, "loss": 0.7927, "step": 3527 }, { "epoch": 0.5376409631209997, "grad_norm": 1.109375, "learning_rate": 9.267700070011489e-05, "loss": 0.9001, "step": 3528 }, { "epoch": 0.5377933556842426, "grad_norm": 0.828125, "learning_rate": 9.262777680655703e-05, "loss": 0.8935, "step": 3529 }, { "epoch": 0.5379457482474855, "grad_norm": 0.85546875, "learning_rate": 9.257855470897878e-05, "loss": 0.9775, "step": 3530 }, { "epoch": 0.5380981408107285, "grad_norm": 1.1171875, "learning_rate": 9.25293344193713e-05, "loss": 1.0671, "step": 3531 }, { "epoch": 0.5382505333739713, "grad_norm": 1.0703125, "learning_rate": 9.248011594972539e-05, "loss": 1.0012, "step": 3532 }, { "epoch": 0.5384029259372143, "grad_norm": 1.0390625, "learning_rate": 9.24308993120314e-05, "loss": 0.9687, "step": 3533 }, { "epoch": 0.5385553185004571, "grad_norm": 1.203125, "learning_rate": 9.238168451827913e-05, "loss": 0.9862, "step": 3534 }, { "epoch": 0.5387077110637001, "grad_norm": 0.9453125, "learning_rate": 9.23324715804581e-05, "loss": 1.1459, "step": 3535 }, { "epoch": 0.538860103626943, "grad_norm": 1.0625, "learning_rate": 9.228326051055728e-05, "loss": 1.1119, "step": 3536 }, { "epoch": 0.5390124961901859, "grad_norm": 1.1640625, "learning_rate": 9.22340513205651e-05, "loss": 0.9702, "step": 3537 }, { "epoch": 0.5391648887534288, "grad_norm": 1.046875, "learning_rate": 9.218484402246974e-05, "loss": 1.0568, "step": 3538 }, { "epoch": 0.5393172813166718, "grad_norm": 0.83203125, "learning_rate": 9.213563862825878e-05, "loss": 0.9456, "step": 3539 }, { "epoch": 0.5394696738799146, "grad_norm": 0.82421875, "learning_rate": 9.208643514991928e-05, "loss": 1.0485, "step": 3540 }, { "epoch": 0.5396220664431576, "grad_norm": 0.8515625, "learning_rate": 9.203723359943802e-05, "loss": 1.0305, "step": 3541 }, { "epoch": 0.5397744590064005, "grad_norm": 1.109375, "learning_rate": 9.198803398880118e-05, "loss": 0.9674, "step": 3542 }, { "epoch": 0.5399268515696434, "grad_norm": 0.859375, "learning_rate": 9.193883632999441e-05, "loss": 0.9437, "step": 3543 }, { "epoch": 0.5400792441328863, "grad_norm": 0.9453125, "learning_rate": 9.188964063500307e-05, "loss": 0.9365, "step": 3544 }, { "epoch": 0.5402316366961293, "grad_norm": 0.97265625, "learning_rate": 9.184044691581188e-05, "loss": 0.9702, "step": 3545 }, { "epoch": 0.5403840292593721, "grad_norm": 0.77734375, "learning_rate": 9.179125518440518e-05, "loss": 0.9079, "step": 3546 }, { "epoch": 0.5405364218226151, "grad_norm": 1.3671875, "learning_rate": 9.174206545276677e-05, "loss": 1.0101, "step": 3547 }, { "epoch": 0.5406888143858579, "grad_norm": 1.15625, "learning_rate": 9.169287773287995e-05, "loss": 0.9957, "step": 3548 }, { "epoch": 0.5408412069491009, "grad_norm": 1.0234375, "learning_rate": 9.164369203672758e-05, "loss": 1.0304, "step": 3549 }, { "epoch": 0.5409935995123438, "grad_norm": 0.84765625, "learning_rate": 9.159450837629202e-05, "loss": 0.8194, "step": 3550 }, { "epoch": 0.5411459920755867, "grad_norm": 1.1328125, "learning_rate": 9.154532676355502e-05, "loss": 0.8751, "step": 3551 }, { "epoch": 0.5412983846388296, "grad_norm": 0.94140625, "learning_rate": 9.149614721049806e-05, "loss": 0.8661, "step": 3552 }, { "epoch": 0.5414507772020726, "grad_norm": 0.97265625, "learning_rate": 9.144696972910196e-05, "loss": 1.0002, "step": 3553 }, { "epoch": 0.5416031697653154, "grad_norm": 0.84765625, "learning_rate": 9.139779433134693e-05, "loss": 0.8512, "step": 3554 }, { "epoch": 0.5417555623285584, "grad_norm": 1.1015625, "learning_rate": 9.134862102921297e-05, "loss": 1.0318, "step": 3555 }, { "epoch": 0.5419079548918013, "grad_norm": 1.09375, "learning_rate": 9.129944983467935e-05, "loss": 0.9455, "step": 3556 }, { "epoch": 0.5420603474550442, "grad_norm": 0.82421875, "learning_rate": 9.125028075972479e-05, "loss": 0.8797, "step": 3557 }, { "epoch": 0.5422127400182871, "grad_norm": 1.0234375, "learning_rate": 9.120111381632772e-05, "loss": 1.102, "step": 3558 }, { "epoch": 0.5423651325815301, "grad_norm": 0.7421875, "learning_rate": 9.115194901646583e-05, "loss": 0.9539, "step": 3559 }, { "epoch": 0.5425175251447729, "grad_norm": 0.79296875, "learning_rate": 9.110278637211643e-05, "loss": 0.9955, "step": 3560 }, { "epoch": 0.5426699177080159, "grad_norm": 0.6953125, "learning_rate": 9.10536258952562e-05, "loss": 0.8557, "step": 3561 }, { "epoch": 0.5428223102712587, "grad_norm": 0.90234375, "learning_rate": 9.100446759786132e-05, "loss": 0.932, "step": 3562 }, { "epoch": 0.5429747028345017, "grad_norm": 1.734375, "learning_rate": 9.095531149190753e-05, "loss": 1.1118, "step": 3563 }, { "epoch": 0.5431270953977446, "grad_norm": 0.71875, "learning_rate": 9.090615758936994e-05, "loss": 0.813, "step": 3564 }, { "epoch": 0.5432794879609875, "grad_norm": 1.0234375, "learning_rate": 9.085700590222307e-05, "loss": 1.0631, "step": 3565 }, { "epoch": 0.5434318805242304, "grad_norm": 1.234375, "learning_rate": 9.080785644244108e-05, "loss": 1.0049, "step": 3566 }, { "epoch": 0.5435842730874734, "grad_norm": 1.40625, "learning_rate": 9.075870922199747e-05, "loss": 1.0863, "step": 3567 }, { "epoch": 0.5437366656507162, "grad_norm": 1.0625, "learning_rate": 9.070956425286514e-05, "loss": 1.0225, "step": 3568 }, { "epoch": 0.5438890582139592, "grad_norm": 1.1796875, "learning_rate": 9.066042154701658e-05, "loss": 0.8675, "step": 3569 }, { "epoch": 0.5440414507772021, "grad_norm": 1.125, "learning_rate": 9.061128111642365e-05, "loss": 1.136, "step": 3570 }, { "epoch": 0.544193843340445, "grad_norm": 1.15625, "learning_rate": 9.05621429730576e-05, "loss": 0.9438, "step": 3571 }, { "epoch": 0.5443462359036879, "grad_norm": 0.7421875, "learning_rate": 9.051300712888928e-05, "loss": 0.8696, "step": 3572 }, { "epoch": 0.5444986284669309, "grad_norm": 1.0703125, "learning_rate": 9.046387359588888e-05, "loss": 1.0455, "step": 3573 }, { "epoch": 0.5446510210301737, "grad_norm": 0.796875, "learning_rate": 9.041474238602594e-05, "loss": 0.7583, "step": 3574 }, { "epoch": 0.5448034135934167, "grad_norm": 1.0078125, "learning_rate": 9.036561351126963e-05, "loss": 1.0884, "step": 3575 }, { "epoch": 0.5449558061566595, "grad_norm": 0.74609375, "learning_rate": 9.031648698358838e-05, "loss": 0.9585, "step": 3576 }, { "epoch": 0.5451081987199025, "grad_norm": 0.796875, "learning_rate": 9.026736281495018e-05, "loss": 1.0085, "step": 3577 }, { "epoch": 0.5452605912831454, "grad_norm": 0.67578125, "learning_rate": 9.021824101732239e-05, "loss": 0.8025, "step": 3578 }, { "epoch": 0.5454129838463883, "grad_norm": 0.84765625, "learning_rate": 9.016912160267168e-05, "loss": 1.036, "step": 3579 }, { "epoch": 0.5455653764096312, "grad_norm": 0.734375, "learning_rate": 9.012000458296436e-05, "loss": 0.8768, "step": 3580 }, { "epoch": 0.5457177689728742, "grad_norm": 1.0078125, "learning_rate": 9.007088997016603e-05, "loss": 1.0552, "step": 3581 }, { "epoch": 0.545870161536117, "grad_norm": 1.0078125, "learning_rate": 9.002177777624162e-05, "loss": 1.033, "step": 3582 }, { "epoch": 0.54602255409936, "grad_norm": 0.95703125, "learning_rate": 8.997266801315565e-05, "loss": 0.9212, "step": 3583 }, { "epoch": 0.5461749466626029, "grad_norm": 0.94921875, "learning_rate": 8.9923560692872e-05, "loss": 0.987, "step": 3584 }, { "epoch": 0.5463273392258458, "grad_norm": 0.8671875, "learning_rate": 8.98744558273538e-05, "loss": 0.8618, "step": 3585 }, { "epoch": 0.5464797317890887, "grad_norm": 1.0703125, "learning_rate": 8.98253534285638e-05, "loss": 1.2136, "step": 3586 }, { "epoch": 0.5466321243523317, "grad_norm": 0.8046875, "learning_rate": 8.977625350846405e-05, "loss": 0.948, "step": 3587 }, { "epoch": 0.5467845169155745, "grad_norm": 0.875, "learning_rate": 8.97271560790159e-05, "loss": 0.9081, "step": 3588 }, { "epoch": 0.5469369094788175, "grad_norm": 1.046875, "learning_rate": 8.967806115218027e-05, "loss": 1.0182, "step": 3589 }, { "epoch": 0.5470893020420603, "grad_norm": 1.1640625, "learning_rate": 8.962896873991744e-05, "loss": 1.0715, "step": 3590 }, { "epoch": 0.5472416946053033, "grad_norm": 1.1328125, "learning_rate": 8.957987885418689e-05, "loss": 1.0823, "step": 3591 }, { "epoch": 0.5473940871685462, "grad_norm": 1.171875, "learning_rate": 8.953079150694774e-05, "loss": 1.1815, "step": 3592 }, { "epoch": 0.547546479731789, "grad_norm": 0.75390625, "learning_rate": 8.948170671015828e-05, "loss": 0.964, "step": 3593 }, { "epoch": 0.547698872295032, "grad_norm": 0.87109375, "learning_rate": 8.943262447577635e-05, "loss": 0.7737, "step": 3594 }, { "epoch": 0.547851264858275, "grad_norm": 1.125, "learning_rate": 8.938354481575908e-05, "loss": 0.9191, "step": 3595 }, { "epoch": 0.5480036574215178, "grad_norm": 0.8046875, "learning_rate": 8.93344677420629e-05, "loss": 0.9752, "step": 3596 }, { "epoch": 0.5481560499847608, "grad_norm": 0.87109375, "learning_rate": 8.928539326664382e-05, "loss": 1.0838, "step": 3597 }, { "epoch": 0.5483084425480037, "grad_norm": 0.97265625, "learning_rate": 8.923632140145701e-05, "loss": 1.0297, "step": 3598 }, { "epoch": 0.5484608351112465, "grad_norm": 0.96484375, "learning_rate": 8.918725215845704e-05, "loss": 0.8041, "step": 3599 }, { "epoch": 0.5486132276744895, "grad_norm": 1.0, "learning_rate": 8.913818554959797e-05, "loss": 0.9533, "step": 3600 }, { "epoch": 0.5487656202377323, "grad_norm": 1.0859375, "learning_rate": 8.908912158683312e-05, "loss": 1.0597, "step": 3601 }, { "epoch": 0.5489180128009753, "grad_norm": 0.88671875, "learning_rate": 8.904006028211509e-05, "loss": 0.9778, "step": 3602 }, { "epoch": 0.5490704053642183, "grad_norm": 0.78125, "learning_rate": 8.899100164739602e-05, "loss": 0.8106, "step": 3603 }, { "epoch": 0.5492227979274611, "grad_norm": 0.87890625, "learning_rate": 8.894194569462729e-05, "loss": 1.0601, "step": 3604 }, { "epoch": 0.549375190490704, "grad_norm": 0.8828125, "learning_rate": 8.889289243575953e-05, "loss": 1.0254, "step": 3605 }, { "epoch": 0.549527583053947, "grad_norm": 1.015625, "learning_rate": 8.884384188274298e-05, "loss": 0.9623, "step": 3606 }, { "epoch": 0.5496799756171898, "grad_norm": 1.0078125, "learning_rate": 8.879479404752694e-05, "loss": 0.9539, "step": 3607 }, { "epoch": 0.5498323681804328, "grad_norm": 0.83203125, "learning_rate": 8.874574894206018e-05, "loss": 0.8512, "step": 3608 }, { "epoch": 0.5499847607436757, "grad_norm": 0.8984375, "learning_rate": 8.869670657829085e-05, "loss": 1.0493, "step": 3609 }, { "epoch": 0.5501371533069186, "grad_norm": 1.03125, "learning_rate": 8.864766696816629e-05, "loss": 0.9844, "step": 3610 }, { "epoch": 0.5502895458701615, "grad_norm": 0.8515625, "learning_rate": 8.859863012363335e-05, "loss": 1.0083, "step": 3611 }, { "epoch": 0.5504419384334045, "grad_norm": 0.984375, "learning_rate": 8.85495960566381e-05, "loss": 1.023, "step": 3612 }, { "epoch": 0.5505943309966473, "grad_norm": 1.1484375, "learning_rate": 8.850056477912583e-05, "loss": 0.9976, "step": 3613 }, { "epoch": 0.5507467235598903, "grad_norm": 0.80859375, "learning_rate": 8.845153630304139e-05, "loss": 1.09, "step": 3614 }, { "epoch": 0.5508991161231331, "grad_norm": 0.81640625, "learning_rate": 8.840251064032881e-05, "loss": 0.9207, "step": 3615 }, { "epoch": 0.5510515086863761, "grad_norm": 1.0703125, "learning_rate": 8.835348780293133e-05, "loss": 1.2033, "step": 3616 }, { "epoch": 0.551203901249619, "grad_norm": 1.2421875, "learning_rate": 8.830446780279176e-05, "loss": 1.0792, "step": 3617 }, { "epoch": 0.5513562938128619, "grad_norm": 0.953125, "learning_rate": 8.825545065185203e-05, "loss": 1.0155, "step": 3618 }, { "epoch": 0.5515086863761048, "grad_norm": 0.90625, "learning_rate": 8.820643636205335e-05, "loss": 0.9608, "step": 3619 }, { "epoch": 0.5516610789393478, "grad_norm": 0.8984375, "learning_rate": 8.815742494533644e-05, "loss": 0.9284, "step": 3620 }, { "epoch": 0.5518134715025906, "grad_norm": 0.6875, "learning_rate": 8.810841641364107e-05, "loss": 0.7877, "step": 3621 }, { "epoch": 0.5519658640658336, "grad_norm": 0.83203125, "learning_rate": 8.805941077890643e-05, "loss": 0.7871, "step": 3622 }, { "epoch": 0.5521182566290765, "grad_norm": 0.9375, "learning_rate": 8.801040805307112e-05, "loss": 0.9616, "step": 3623 }, { "epoch": 0.5522706491923194, "grad_norm": 0.86328125, "learning_rate": 8.796140824807272e-05, "loss": 0.9579, "step": 3624 }, { "epoch": 0.5524230417555623, "grad_norm": 0.8828125, "learning_rate": 8.791241137584846e-05, "loss": 0.9142, "step": 3625 }, { "epoch": 0.5525754343188053, "grad_norm": 0.82421875, "learning_rate": 8.786341744833463e-05, "loss": 0.8446, "step": 3626 }, { "epoch": 0.5527278268820481, "grad_norm": 0.59765625, "learning_rate": 8.781442647746676e-05, "loss": 0.9197, "step": 3627 }, { "epoch": 0.5528802194452911, "grad_norm": 1.1953125, "learning_rate": 8.776543847517988e-05, "loss": 1.0532, "step": 3628 }, { "epoch": 0.5530326120085339, "grad_norm": 0.83984375, "learning_rate": 8.771645345340815e-05, "loss": 0.9863, "step": 3629 }, { "epoch": 0.5531850045717769, "grad_norm": 1.0078125, "learning_rate": 8.766747142408494e-05, "loss": 0.952, "step": 3630 }, { "epoch": 0.5533373971350198, "grad_norm": 1.0703125, "learning_rate": 8.761849239914307e-05, "loss": 0.9009, "step": 3631 }, { "epoch": 0.5534897896982627, "grad_norm": 0.72265625, "learning_rate": 8.756951639051451e-05, "loss": 0.9464, "step": 3632 }, { "epoch": 0.5536421822615056, "grad_norm": 1.015625, "learning_rate": 8.752054341013047e-05, "loss": 1.0381, "step": 3633 }, { "epoch": 0.5537945748247486, "grad_norm": 0.9375, "learning_rate": 8.747157346992156e-05, "loss": 0.9689, "step": 3634 }, { "epoch": 0.5539469673879914, "grad_norm": 0.984375, "learning_rate": 8.742260658181751e-05, "loss": 0.9725, "step": 3635 }, { "epoch": 0.5540993599512344, "grad_norm": 0.89453125, "learning_rate": 8.73736427577473e-05, "loss": 0.9181, "step": 3636 }, { "epoch": 0.5542517525144773, "grad_norm": 1.0078125, "learning_rate": 8.732468200963936e-05, "loss": 0.919, "step": 3637 }, { "epoch": 0.5544041450777202, "grad_norm": 1.0546875, "learning_rate": 8.727572434942112e-05, "loss": 0.9085, "step": 3638 }, { "epoch": 0.5545565376409631, "grad_norm": 0.89453125, "learning_rate": 8.722676978901939e-05, "loss": 1.0985, "step": 3639 }, { "epoch": 0.5547089302042061, "grad_norm": 0.859375, "learning_rate": 8.717781834036027e-05, "loss": 0.9949, "step": 3640 }, { "epoch": 0.5548613227674489, "grad_norm": 0.94921875, "learning_rate": 8.712887001536893e-05, "loss": 0.8087, "step": 3641 }, { "epoch": 0.5550137153306919, "grad_norm": 1.2265625, "learning_rate": 8.707992482596998e-05, "loss": 1.1798, "step": 3642 }, { "epoch": 0.5551661078939347, "grad_norm": 1.1875, "learning_rate": 8.703098278408717e-05, "loss": 0.9647, "step": 3643 }, { "epoch": 0.5553185004571777, "grad_norm": 0.86328125, "learning_rate": 8.698204390164339e-05, "loss": 1.0993, "step": 3644 }, { "epoch": 0.5554708930204206, "grad_norm": 1.3125, "learning_rate": 8.693310819056094e-05, "loss": 1.1994, "step": 3645 }, { "epoch": 0.5556232855836635, "grad_norm": 1.28125, "learning_rate": 8.688417566276128e-05, "loss": 0.9916, "step": 3646 }, { "epoch": 0.5557756781469064, "grad_norm": 1.0859375, "learning_rate": 8.683524633016497e-05, "loss": 0.9592, "step": 3647 }, { "epoch": 0.5559280707101494, "grad_norm": 0.9921875, "learning_rate": 8.678632020469202e-05, "loss": 0.92, "step": 3648 }, { "epoch": 0.5560804632733922, "grad_norm": 1.0859375, "learning_rate": 8.673739729826148e-05, "loss": 0.8497, "step": 3649 }, { "epoch": 0.5562328558366352, "grad_norm": 1.1328125, "learning_rate": 8.668847762279163e-05, "loss": 0.9516, "step": 3650 }, { "epoch": 0.5563852483998781, "grad_norm": 0.8203125, "learning_rate": 8.663956119020009e-05, "loss": 1.0006, "step": 3651 }, { "epoch": 0.556537640963121, "grad_norm": 0.73046875, "learning_rate": 8.659064801240358e-05, "loss": 0.8822, "step": 3652 }, { "epoch": 0.5566900335263639, "grad_norm": 0.92578125, "learning_rate": 8.654173810131799e-05, "loss": 0.927, "step": 3653 }, { "epoch": 0.5568424260896069, "grad_norm": 1.234375, "learning_rate": 8.64928314688586e-05, "loss": 1.2534, "step": 3654 }, { "epoch": 0.5569948186528497, "grad_norm": 0.828125, "learning_rate": 8.644392812693968e-05, "loss": 0.9085, "step": 3655 }, { "epoch": 0.5571472112160927, "grad_norm": 1.1015625, "learning_rate": 8.639502808747477e-05, "loss": 0.8991, "step": 3656 }, { "epoch": 0.5572996037793355, "grad_norm": 1.1484375, "learning_rate": 8.634613136237671e-05, "loss": 1.2042, "step": 3657 }, { "epoch": 0.5574519963425785, "grad_norm": 0.99609375, "learning_rate": 8.629723796355735e-05, "loss": 1.2058, "step": 3658 }, { "epoch": 0.5576043889058214, "grad_norm": 1.140625, "learning_rate": 8.624834790292791e-05, "loss": 0.9005, "step": 3659 }, { "epoch": 0.5577567814690643, "grad_norm": 0.85546875, "learning_rate": 8.619946119239868e-05, "loss": 1.0209, "step": 3660 }, { "epoch": 0.5579091740323072, "grad_norm": 0.9453125, "learning_rate": 8.61505778438791e-05, "loss": 0.9614, "step": 3661 }, { "epoch": 0.5580615665955502, "grad_norm": 0.84765625, "learning_rate": 8.6101697869278e-05, "loss": 0.897, "step": 3662 }, { "epoch": 0.558213959158793, "grad_norm": 0.859375, "learning_rate": 8.605282128050311e-05, "loss": 1.0191, "step": 3663 }, { "epoch": 0.558366351722036, "grad_norm": 0.921875, "learning_rate": 8.600394808946151e-05, "loss": 0.9993, "step": 3664 }, { "epoch": 0.5585187442852789, "grad_norm": 0.75390625, "learning_rate": 8.595507830805946e-05, "loss": 0.903, "step": 3665 }, { "epoch": 0.5586711368485218, "grad_norm": 0.87109375, "learning_rate": 8.590621194820229e-05, "loss": 0.9489, "step": 3666 }, { "epoch": 0.5588235294117647, "grad_norm": 0.9453125, "learning_rate": 8.585734902179453e-05, "loss": 1.0892, "step": 3667 }, { "epoch": 0.5589759219750077, "grad_norm": 0.98046875, "learning_rate": 8.580848954073997e-05, "loss": 0.9623, "step": 3668 }, { "epoch": 0.5591283145382505, "grad_norm": 0.796875, "learning_rate": 8.575963351694141e-05, "loss": 0.8539, "step": 3669 }, { "epoch": 0.5592807071014935, "grad_norm": 0.9453125, "learning_rate": 8.571078096230087e-05, "loss": 1.0683, "step": 3670 }, { "epoch": 0.5594330996647363, "grad_norm": 0.8828125, "learning_rate": 8.566193188871965e-05, "loss": 0.9905, "step": 3671 }, { "epoch": 0.5595854922279793, "grad_norm": 0.75, "learning_rate": 8.561308630809797e-05, "loss": 0.9501, "step": 3672 }, { "epoch": 0.5597378847912222, "grad_norm": 1.0078125, "learning_rate": 8.55642442323353e-05, "loss": 0.9029, "step": 3673 }, { "epoch": 0.5598902773544651, "grad_norm": 0.86328125, "learning_rate": 8.55154056733304e-05, "loss": 1.0607, "step": 3674 }, { "epoch": 0.560042669917708, "grad_norm": 1.2265625, "learning_rate": 8.546657064298092e-05, "loss": 0.9962, "step": 3675 }, { "epoch": 0.560195062480951, "grad_norm": 0.984375, "learning_rate": 8.541773915318388e-05, "loss": 0.9591, "step": 3676 }, { "epoch": 0.5603474550441938, "grad_norm": 0.8671875, "learning_rate": 8.536891121583526e-05, "loss": 0.9632, "step": 3677 }, { "epoch": 0.5604998476074368, "grad_norm": 1.1328125, "learning_rate": 8.532008684283021e-05, "loss": 1.1506, "step": 3678 }, { "epoch": 0.5606522401706797, "grad_norm": 0.8671875, "learning_rate": 8.52712660460632e-05, "loss": 0.9771, "step": 3679 }, { "epoch": 0.5608046327339226, "grad_norm": 0.921875, "learning_rate": 8.522244883742753e-05, "loss": 0.9028, "step": 3680 }, { "epoch": 0.5609570252971655, "grad_norm": 0.984375, "learning_rate": 8.517363522881579e-05, "loss": 1.084, "step": 3681 }, { "epoch": 0.5611094178604085, "grad_norm": 0.84375, "learning_rate": 8.51248252321198e-05, "loss": 0.9307, "step": 3682 }, { "epoch": 0.5612618104236513, "grad_norm": 0.921875, "learning_rate": 8.507601885923025e-05, "loss": 0.8588, "step": 3683 }, { "epoch": 0.5614142029868943, "grad_norm": 1.046875, "learning_rate": 8.502721612203708e-05, "loss": 1.0059, "step": 3684 }, { "epoch": 0.5615665955501371, "grad_norm": 0.77734375, "learning_rate": 8.497841703242945e-05, "loss": 0.9753, "step": 3685 }, { "epoch": 0.5617189881133801, "grad_norm": 0.8359375, "learning_rate": 8.492962160229542e-05, "loss": 1.0801, "step": 3686 }, { "epoch": 0.561871380676623, "grad_norm": 0.921875, "learning_rate": 8.488082984352224e-05, "loss": 0.9489, "step": 3687 }, { "epoch": 0.5620237732398659, "grad_norm": 0.97265625, "learning_rate": 8.483204176799639e-05, "loss": 0.9665, "step": 3688 }, { "epoch": 0.5621761658031088, "grad_norm": 0.95703125, "learning_rate": 8.47832573876032e-05, "loss": 0.9054, "step": 3689 }, { "epoch": 0.5623285583663518, "grad_norm": 0.84765625, "learning_rate": 8.473447671422742e-05, "loss": 1.055, "step": 3690 }, { "epoch": 0.5624809509295946, "grad_norm": 1.1484375, "learning_rate": 8.468569975975262e-05, "loss": 1.0231, "step": 3691 }, { "epoch": 0.5626333434928376, "grad_norm": 1.0625, "learning_rate": 8.463692653606152e-05, "loss": 1.013, "step": 3692 }, { "epoch": 0.5627857360560805, "grad_norm": 1.0390625, "learning_rate": 8.458815705503611e-05, "loss": 0.907, "step": 3693 }, { "epoch": 0.5629381286193234, "grad_norm": 0.63671875, "learning_rate": 8.453939132855724e-05, "loss": 0.716, "step": 3694 }, { "epoch": 0.5630905211825663, "grad_norm": 0.90234375, "learning_rate": 8.449062936850494e-05, "loss": 0.9261, "step": 3695 }, { "epoch": 0.5632429137458093, "grad_norm": 1.359375, "learning_rate": 8.444187118675841e-05, "loss": 1.1537, "step": 3696 }, { "epoch": 0.5633953063090521, "grad_norm": 0.9453125, "learning_rate": 8.439311679519578e-05, "loss": 0.8989, "step": 3697 }, { "epoch": 0.563547698872295, "grad_norm": 0.703125, "learning_rate": 8.434436620569429e-05, "loss": 0.9732, "step": 3698 }, { "epoch": 0.5637000914355379, "grad_norm": 0.890625, "learning_rate": 8.429561943013036e-05, "loss": 0.8824, "step": 3699 }, { "epoch": 0.5638524839987809, "grad_norm": 0.8984375, "learning_rate": 8.424687648037937e-05, "loss": 1.0255, "step": 3700 }, { "epoch": 0.5640048765620238, "grad_norm": 1.03125, "learning_rate": 8.419813736831576e-05, "loss": 0.9873, "step": 3701 }, { "epoch": 0.5641572691252666, "grad_norm": 0.95703125, "learning_rate": 8.414940210581318e-05, "loss": 1.0746, "step": 3702 }, { "epoch": 0.5643096616885096, "grad_norm": 0.81640625, "learning_rate": 8.410067070474417e-05, "loss": 0.8515, "step": 3703 }, { "epoch": 0.5644620542517526, "grad_norm": 0.72265625, "learning_rate": 8.405194317698038e-05, "loss": 0.9518, "step": 3704 }, { "epoch": 0.5646144468149954, "grad_norm": 0.9453125, "learning_rate": 8.400321953439259e-05, "loss": 0.9061, "step": 3705 }, { "epoch": 0.5647668393782384, "grad_norm": 0.95703125, "learning_rate": 8.395449978885053e-05, "loss": 1.0439, "step": 3706 }, { "epoch": 0.5649192319414813, "grad_norm": 0.90625, "learning_rate": 8.39057839522231e-05, "loss": 0.9356, "step": 3707 }, { "epoch": 0.5650716245047241, "grad_norm": 0.84765625, "learning_rate": 8.385707203637814e-05, "loss": 0.8939, "step": 3708 }, { "epoch": 0.5652240170679671, "grad_norm": 0.75390625, "learning_rate": 8.380836405318253e-05, "loss": 0.9772, "step": 3709 }, { "epoch": 0.5653764096312099, "grad_norm": 1.1171875, "learning_rate": 8.375966001450233e-05, "loss": 1.1264, "step": 3710 }, { "epoch": 0.5655288021944529, "grad_norm": 1.0078125, "learning_rate": 8.371095993220248e-05, "loss": 0.9724, "step": 3711 }, { "epoch": 0.5656811947576958, "grad_norm": 0.69921875, "learning_rate": 8.366226381814697e-05, "loss": 0.9, "step": 3712 }, { "epoch": 0.5658335873209387, "grad_norm": 1.0546875, "learning_rate": 8.3613571684199e-05, "loss": 1.0179, "step": 3713 }, { "epoch": 0.5659859798841816, "grad_norm": 0.83203125, "learning_rate": 8.35648835422206e-05, "loss": 0.9162, "step": 3714 }, { "epoch": 0.5661383724474246, "grad_norm": 1.234375, "learning_rate": 8.351619940407288e-05, "loss": 1.1743, "step": 3715 }, { "epoch": 0.5662907650106674, "grad_norm": 0.78515625, "learning_rate": 8.346751928161607e-05, "loss": 1.0456, "step": 3716 }, { "epoch": 0.5664431575739104, "grad_norm": 0.93359375, "learning_rate": 8.341884318670929e-05, "loss": 0.8026, "step": 3717 }, { "epoch": 0.5665955501371533, "grad_norm": 0.88671875, "learning_rate": 8.337017113121073e-05, "loss": 1.0266, "step": 3718 }, { "epoch": 0.5667479427003962, "grad_norm": 0.90234375, "learning_rate": 8.332150312697765e-05, "loss": 0.9993, "step": 3719 }, { "epoch": 0.5669003352636391, "grad_norm": 0.7109375, "learning_rate": 8.327283918586624e-05, "loss": 0.9879, "step": 3720 }, { "epoch": 0.5670527278268821, "grad_norm": 1.0703125, "learning_rate": 8.322417931973173e-05, "loss": 0.9757, "step": 3721 }, { "epoch": 0.5672051203901249, "grad_norm": 1.1171875, "learning_rate": 8.317552354042838e-05, "loss": 1.2723, "step": 3722 }, { "epoch": 0.5673575129533679, "grad_norm": 0.79296875, "learning_rate": 8.312687185980941e-05, "loss": 0.8632, "step": 3723 }, { "epoch": 0.5675099055166107, "grad_norm": 0.85546875, "learning_rate": 8.307822428972715e-05, "loss": 1.0458, "step": 3724 }, { "epoch": 0.5676622980798537, "grad_norm": 1.0390625, "learning_rate": 8.302958084203278e-05, "loss": 0.8589, "step": 3725 }, { "epoch": 0.5678146906430966, "grad_norm": 0.8046875, "learning_rate": 8.298094152857652e-05, "loss": 0.9894, "step": 3726 }, { "epoch": 0.5679670832063395, "grad_norm": 0.79296875, "learning_rate": 8.29323063612077e-05, "loss": 0.8474, "step": 3727 }, { "epoch": 0.5681194757695824, "grad_norm": 0.78125, "learning_rate": 8.288367535177446e-05, "loss": 0.9603, "step": 3728 }, { "epoch": 0.5682718683328254, "grad_norm": 0.88671875, "learning_rate": 8.283504851212401e-05, "loss": 0.9205, "step": 3729 }, { "epoch": 0.5684242608960682, "grad_norm": 0.9921875, "learning_rate": 8.278642585410264e-05, "loss": 1.1211, "step": 3730 }, { "epoch": 0.5685766534593112, "grad_norm": 1.09375, "learning_rate": 8.273780738955544e-05, "loss": 1.0859, "step": 3731 }, { "epoch": 0.5687290460225541, "grad_norm": 0.76171875, "learning_rate": 8.268919313032659e-05, "loss": 0.952, "step": 3732 }, { "epoch": 0.568881438585797, "grad_norm": 1.09375, "learning_rate": 8.264058308825923e-05, "loss": 0.878, "step": 3733 }, { "epoch": 0.5690338311490399, "grad_norm": 0.8125, "learning_rate": 8.259197727519547e-05, "loss": 0.974, "step": 3734 }, { "epoch": 0.5691862237122829, "grad_norm": 0.91015625, "learning_rate": 8.254337570297634e-05, "loss": 0.7308, "step": 3735 }, { "epoch": 0.5693386162755257, "grad_norm": 1.0625, "learning_rate": 8.249477838344196e-05, "loss": 0.8779, "step": 3736 }, { "epoch": 0.5694910088387687, "grad_norm": 1.1484375, "learning_rate": 8.244618532843129e-05, "loss": 0.9168, "step": 3737 }, { "epoch": 0.5696434014020115, "grad_norm": 0.98046875, "learning_rate": 8.239759654978229e-05, "loss": 0.926, "step": 3738 }, { "epoch": 0.5697957939652545, "grad_norm": 0.765625, "learning_rate": 8.23490120593319e-05, "loss": 0.9124, "step": 3739 }, { "epoch": 0.5699481865284974, "grad_norm": 1.078125, "learning_rate": 8.230043186891598e-05, "loss": 0.9166, "step": 3740 }, { "epoch": 0.5701005790917403, "grad_norm": 0.85546875, "learning_rate": 8.225185599036942e-05, "loss": 0.9636, "step": 3741 }, { "epoch": 0.5702529716549832, "grad_norm": 1.1015625, "learning_rate": 8.220328443552597e-05, "loss": 1.0558, "step": 3742 }, { "epoch": 0.5704053642182262, "grad_norm": 0.69140625, "learning_rate": 8.215471721621829e-05, "loss": 1.111, "step": 3743 }, { "epoch": 0.570557756781469, "grad_norm": 1.0234375, "learning_rate": 8.210615434427818e-05, "loss": 1.15, "step": 3744 }, { "epoch": 0.570710149344712, "grad_norm": 1.171875, "learning_rate": 8.205759583153617e-05, "loss": 1.273, "step": 3745 }, { "epoch": 0.5708625419079549, "grad_norm": 0.99609375, "learning_rate": 8.200904168982182e-05, "loss": 1.1596, "step": 3746 }, { "epoch": 0.5710149344711978, "grad_norm": 0.7578125, "learning_rate": 8.196049193096364e-05, "loss": 0.9731, "step": 3747 }, { "epoch": 0.5711673270344407, "grad_norm": 0.79296875, "learning_rate": 8.191194656678904e-05, "loss": 1.004, "step": 3748 }, { "epoch": 0.5713197195976837, "grad_norm": 0.92578125, "learning_rate": 8.186340560912434e-05, "loss": 0.9245, "step": 3749 }, { "epoch": 0.5714721121609265, "grad_norm": 0.7734375, "learning_rate": 8.181486906979487e-05, "loss": 0.8986, "step": 3750 }, { "epoch": 0.5716245047241695, "grad_norm": 0.8515625, "learning_rate": 8.176633696062481e-05, "loss": 0.9869, "step": 3751 }, { "epoch": 0.5717768972874123, "grad_norm": 0.96484375, "learning_rate": 8.171780929343724e-05, "loss": 0.9002, "step": 3752 }, { "epoch": 0.5719292898506553, "grad_norm": 0.984375, "learning_rate": 8.166928608005427e-05, "loss": 1.0417, "step": 3753 }, { "epoch": 0.5720816824138982, "grad_norm": 0.8046875, "learning_rate": 8.162076733229674e-05, "loss": 0.9613, "step": 3754 }, { "epoch": 0.5722340749771411, "grad_norm": 0.99609375, "learning_rate": 8.157225306198465e-05, "loss": 0.9818, "step": 3755 }, { "epoch": 0.572386467540384, "grad_norm": 1.46875, "learning_rate": 8.152374328093672e-05, "loss": 1.1222, "step": 3756 }, { "epoch": 0.572538860103627, "grad_norm": 1.046875, "learning_rate": 8.147523800097055e-05, "loss": 1.1794, "step": 3757 }, { "epoch": 0.5726912526668698, "grad_norm": 0.9140625, "learning_rate": 8.142673723390288e-05, "loss": 1.056, "step": 3758 }, { "epoch": 0.5728436452301128, "grad_norm": 0.9140625, "learning_rate": 8.137824099154906e-05, "loss": 0.9968, "step": 3759 }, { "epoch": 0.5729960377933557, "grad_norm": 0.83984375, "learning_rate": 8.132974928572351e-05, "loss": 0.9196, "step": 3760 }, { "epoch": 0.5731484303565986, "grad_norm": 0.9453125, "learning_rate": 8.128126212823955e-05, "loss": 0.9214, "step": 3761 }, { "epoch": 0.5733008229198415, "grad_norm": 0.8046875, "learning_rate": 8.12327795309093e-05, "loss": 0.9382, "step": 3762 }, { "epoch": 0.5734532154830845, "grad_norm": 1.0859375, "learning_rate": 8.118430150554381e-05, "loss": 1.1229, "step": 3763 }, { "epoch": 0.5736056080463273, "grad_norm": 0.90234375, "learning_rate": 8.113582806395309e-05, "loss": 0.9204, "step": 3764 }, { "epoch": 0.5737580006095703, "grad_norm": 1.125, "learning_rate": 8.108735921794591e-05, "loss": 0.9712, "step": 3765 }, { "epoch": 0.5739103931728131, "grad_norm": 1.0859375, "learning_rate": 8.103889497932998e-05, "loss": 1.081, "step": 3766 }, { "epoch": 0.5740627857360561, "grad_norm": 0.91015625, "learning_rate": 8.099043535991191e-05, "loss": 0.7465, "step": 3767 }, { "epoch": 0.574215178299299, "grad_norm": 1.0546875, "learning_rate": 8.094198037149716e-05, "loss": 0.853, "step": 3768 }, { "epoch": 0.5743675708625419, "grad_norm": 0.73828125, "learning_rate": 8.089353002589001e-05, "loss": 0.9275, "step": 3769 }, { "epoch": 0.5745199634257848, "grad_norm": 1.09375, "learning_rate": 8.084508433489375e-05, "loss": 1.0202, "step": 3770 }, { "epoch": 0.5746723559890278, "grad_norm": 0.87109375, "learning_rate": 8.079664331031034e-05, "loss": 0.913, "step": 3771 }, { "epoch": 0.5748247485522706, "grad_norm": 1.0546875, "learning_rate": 8.074820696394082e-05, "loss": 1.0562, "step": 3772 }, { "epoch": 0.5749771411155136, "grad_norm": 1.046875, "learning_rate": 8.069977530758493e-05, "loss": 1.0541, "step": 3773 }, { "epoch": 0.5751295336787565, "grad_norm": 1.0, "learning_rate": 8.065134835304129e-05, "loss": 1.1194, "step": 3774 }, { "epoch": 0.5752819262419994, "grad_norm": 1.078125, "learning_rate": 8.060292611210745e-05, "loss": 1.1101, "step": 3775 }, { "epoch": 0.5754343188052423, "grad_norm": 0.8984375, "learning_rate": 8.055450859657974e-05, "loss": 1.0301, "step": 3776 }, { "epoch": 0.5755867113684853, "grad_norm": 1.15625, "learning_rate": 8.050609581825336e-05, "loss": 0.9556, "step": 3777 }, { "epoch": 0.5757391039317281, "grad_norm": 0.98046875, "learning_rate": 8.045768778892238e-05, "loss": 1.225, "step": 3778 }, { "epoch": 0.5758914964949711, "grad_norm": 0.890625, "learning_rate": 8.040928452037966e-05, "loss": 1.0213, "step": 3779 }, { "epoch": 0.5760438890582139, "grad_norm": 0.91796875, "learning_rate": 8.036088602441696e-05, "loss": 0.9056, "step": 3780 }, { "epoch": 0.5761962816214569, "grad_norm": 1.2421875, "learning_rate": 8.031249231282485e-05, "loss": 0.822, "step": 3781 }, { "epoch": 0.5763486741846998, "grad_norm": 1.015625, "learning_rate": 8.026410339739271e-05, "loss": 0.8517, "step": 3782 }, { "epoch": 0.5765010667479427, "grad_norm": 0.984375, "learning_rate": 8.021571928990878e-05, "loss": 0.8543, "step": 3783 }, { "epoch": 0.5766534593111856, "grad_norm": 0.9375, "learning_rate": 8.016734000216015e-05, "loss": 0.9516, "step": 3784 }, { "epoch": 0.5768058518744286, "grad_norm": 0.70703125, "learning_rate": 8.01189655459327e-05, "loss": 0.9051, "step": 3785 }, { "epoch": 0.5769582444376714, "grad_norm": 0.90234375, "learning_rate": 8.007059593301112e-05, "loss": 0.9714, "step": 3786 }, { "epoch": 0.5771106370009144, "grad_norm": 0.87109375, "learning_rate": 8.002223117517898e-05, "loss": 1.1937, "step": 3787 }, { "epoch": 0.5772630295641573, "grad_norm": 1.0625, "learning_rate": 7.997387128421858e-05, "loss": 0.9057, "step": 3788 }, { "epoch": 0.5774154221274002, "grad_norm": 0.78515625, "learning_rate": 7.992551627191115e-05, "loss": 0.9553, "step": 3789 }, { "epoch": 0.5775678146906431, "grad_norm": 0.9609375, "learning_rate": 7.987716615003662e-05, "loss": 1.2554, "step": 3790 }, { "epoch": 0.5777202072538861, "grad_norm": 1.3359375, "learning_rate": 7.982882093037378e-05, "loss": 1.059, "step": 3791 }, { "epoch": 0.5778725998171289, "grad_norm": 1.0703125, "learning_rate": 7.978048062470023e-05, "loss": 0.9592, "step": 3792 }, { "epoch": 0.5780249923803719, "grad_norm": 0.77734375, "learning_rate": 7.973214524479238e-05, "loss": 0.9313, "step": 3793 }, { "epoch": 0.5781773849436147, "grad_norm": 0.87890625, "learning_rate": 7.968381480242539e-05, "loss": 0.996, "step": 3794 }, { "epoch": 0.5783297775068577, "grad_norm": 0.8359375, "learning_rate": 7.963548930937327e-05, "loss": 1.0214, "step": 3795 }, { "epoch": 0.5784821700701006, "grad_norm": 0.98046875, "learning_rate": 7.95871687774088e-05, "loss": 0.995, "step": 3796 }, { "epoch": 0.5786345626333435, "grad_norm": 1.109375, "learning_rate": 7.953885321830354e-05, "loss": 0.9397, "step": 3797 }, { "epoch": 0.5787869551965864, "grad_norm": 0.921875, "learning_rate": 7.94905426438279e-05, "loss": 1.0726, "step": 3798 }, { "epoch": 0.5789393477598294, "grad_norm": 1.0625, "learning_rate": 7.9442237065751e-05, "loss": 0.8689, "step": 3799 }, { "epoch": 0.5790917403230722, "grad_norm": 0.99609375, "learning_rate": 7.939393649584076e-05, "loss": 0.939, "step": 3800 }, { "epoch": 0.5792441328863152, "grad_norm": 1.2109375, "learning_rate": 7.934564094586392e-05, "loss": 0.8324, "step": 3801 }, { "epoch": 0.5793965254495581, "grad_norm": 1.1484375, "learning_rate": 7.929735042758597e-05, "loss": 1.009, "step": 3802 }, { "epoch": 0.579548918012801, "grad_norm": 0.8203125, "learning_rate": 7.924906495277115e-05, "loss": 0.8477, "step": 3803 }, { "epoch": 0.5797013105760439, "grad_norm": 1.015625, "learning_rate": 7.920078453318252e-05, "loss": 0.7789, "step": 3804 }, { "epoch": 0.5798537031392869, "grad_norm": 0.96484375, "learning_rate": 7.915250918058186e-05, "loss": 1.0337, "step": 3805 }, { "epoch": 0.5800060957025297, "grad_norm": 0.79296875, "learning_rate": 7.910423890672977e-05, "loss": 0.8309, "step": 3806 }, { "epoch": 0.5801584882657727, "grad_norm": 0.9375, "learning_rate": 7.905597372338558e-05, "loss": 1.063, "step": 3807 }, { "epoch": 0.5803108808290155, "grad_norm": 0.984375, "learning_rate": 7.900771364230734e-05, "loss": 0.859, "step": 3808 }, { "epoch": 0.5804632733922584, "grad_norm": 0.85546875, "learning_rate": 7.895945867525197e-05, "loss": 1.1549, "step": 3809 }, { "epoch": 0.5806156659555014, "grad_norm": 0.7890625, "learning_rate": 7.891120883397502e-05, "loss": 0.9094, "step": 3810 }, { "epoch": 0.5807680585187442, "grad_norm": 0.91015625, "learning_rate": 7.886296413023084e-05, "loss": 1.0826, "step": 3811 }, { "epoch": 0.5809204510819872, "grad_norm": 0.83203125, "learning_rate": 7.881472457577257e-05, "loss": 1.0133, "step": 3812 }, { "epoch": 0.5810728436452302, "grad_norm": 0.984375, "learning_rate": 7.876649018235206e-05, "loss": 1.0477, "step": 3813 }, { "epoch": 0.581225236208473, "grad_norm": 1.0234375, "learning_rate": 7.871826096171985e-05, "loss": 0.9657, "step": 3814 }, { "epoch": 0.581377628771716, "grad_norm": 1.0, "learning_rate": 7.867003692562534e-05, "loss": 1.1637, "step": 3815 }, { "epoch": 0.5815300213349589, "grad_norm": 0.84375, "learning_rate": 7.862181808581654e-05, "loss": 1.0399, "step": 3816 }, { "epoch": 0.5816824138982017, "grad_norm": 1.0859375, "learning_rate": 7.857360445404028e-05, "loss": 0.9397, "step": 3817 }, { "epoch": 0.5818348064614447, "grad_norm": 0.91796875, "learning_rate": 7.852539604204211e-05, "loss": 0.9772, "step": 3818 }, { "epoch": 0.5819871990246875, "grad_norm": 1.0546875, "learning_rate": 7.847719286156628e-05, "loss": 0.967, "step": 3819 }, { "epoch": 0.5821395915879305, "grad_norm": 0.95703125, "learning_rate": 7.842899492435574e-05, "loss": 0.8536, "step": 3820 }, { "epoch": 0.5822919841511734, "grad_norm": 0.83984375, "learning_rate": 7.838080224215226e-05, "loss": 0.9406, "step": 3821 }, { "epoch": 0.5824443767144163, "grad_norm": 0.921875, "learning_rate": 7.833261482669621e-05, "loss": 1.002, "step": 3822 }, { "epoch": 0.5825967692776592, "grad_norm": 0.9765625, "learning_rate": 7.82844326897268e-05, "loss": 1.117, "step": 3823 }, { "epoch": 0.5827491618409022, "grad_norm": 1.1015625, "learning_rate": 7.823625584298188e-05, "loss": 1.1651, "step": 3824 }, { "epoch": 0.582901554404145, "grad_norm": 0.77734375, "learning_rate": 7.818808429819796e-05, "loss": 0.9493, "step": 3825 }, { "epoch": 0.583053946967388, "grad_norm": 1.0390625, "learning_rate": 7.813991806711039e-05, "loss": 0.8488, "step": 3826 }, { "epoch": 0.583206339530631, "grad_norm": 1.6796875, "learning_rate": 7.809175716145313e-05, "loss": 1.0473, "step": 3827 }, { "epoch": 0.5833587320938738, "grad_norm": 1.046875, "learning_rate": 7.804360159295886e-05, "loss": 1.1667, "step": 3828 }, { "epoch": 0.5835111246571167, "grad_norm": 0.77734375, "learning_rate": 7.799545137335902e-05, "loss": 0.8743, "step": 3829 }, { "epoch": 0.5836635172203597, "grad_norm": 0.7421875, "learning_rate": 7.794730651438364e-05, "loss": 0.859, "step": 3830 }, { "epoch": 0.5838159097836025, "grad_norm": 1.1796875, "learning_rate": 7.789916702776148e-05, "loss": 0.9273, "step": 3831 }, { "epoch": 0.5839683023468455, "grad_norm": 1.171875, "learning_rate": 7.78510329252201e-05, "loss": 0.9383, "step": 3832 }, { "epoch": 0.5841206949100883, "grad_norm": 0.9765625, "learning_rate": 7.780290421848557e-05, "loss": 0.8973, "step": 3833 }, { "epoch": 0.5842730874733313, "grad_norm": 1.0546875, "learning_rate": 7.775478091928277e-05, "loss": 0.9605, "step": 3834 }, { "epoch": 0.5844254800365742, "grad_norm": 1.09375, "learning_rate": 7.770666303933525e-05, "loss": 1.0715, "step": 3835 }, { "epoch": 0.5845778725998171, "grad_norm": 0.8828125, "learning_rate": 7.765855059036518e-05, "loss": 0.8273, "step": 3836 }, { "epoch": 0.58473026516306, "grad_norm": 0.7734375, "learning_rate": 7.761044358409349e-05, "loss": 0.9644, "step": 3837 }, { "epoch": 0.584882657726303, "grad_norm": 0.90625, "learning_rate": 7.756234203223969e-05, "loss": 1.0179, "step": 3838 }, { "epoch": 0.5850350502895458, "grad_norm": 0.8984375, "learning_rate": 7.751424594652202e-05, "loss": 1.0669, "step": 3839 }, { "epoch": 0.5851874428527888, "grad_norm": 0.9296875, "learning_rate": 7.746615533865742e-05, "loss": 1.1961, "step": 3840 }, { "epoch": 0.5853398354160317, "grad_norm": 0.8046875, "learning_rate": 7.741807022036141e-05, "loss": 1.0308, "step": 3841 }, { "epoch": 0.5854922279792746, "grad_norm": 0.9296875, "learning_rate": 7.736999060334821e-05, "loss": 0.8862, "step": 3842 }, { "epoch": 0.5856446205425175, "grad_norm": 0.9765625, "learning_rate": 7.732191649933073e-05, "loss": 0.9539, "step": 3843 }, { "epoch": 0.5857970131057605, "grad_norm": 0.93359375, "learning_rate": 7.727384792002054e-05, "loss": 1.078, "step": 3844 }, { "epoch": 0.5859494056690033, "grad_norm": 1.1328125, "learning_rate": 7.722578487712776e-05, "loss": 1.0008, "step": 3845 }, { "epoch": 0.5861017982322463, "grad_norm": 1.03125, "learning_rate": 7.71777273823613e-05, "loss": 0.9802, "step": 3846 }, { "epoch": 0.5862541907954891, "grad_norm": 0.99609375, "learning_rate": 7.712967544742866e-05, "loss": 0.9192, "step": 3847 }, { "epoch": 0.5864065833587321, "grad_norm": 1.0234375, "learning_rate": 7.708162908403594e-05, "loss": 1.0296, "step": 3848 }, { "epoch": 0.586558975921975, "grad_norm": 0.83984375, "learning_rate": 7.703358830388795e-05, "loss": 0.9351, "step": 3849 }, { "epoch": 0.5867113684852179, "grad_norm": 0.74609375, "learning_rate": 7.698555311868812e-05, "loss": 0.8599, "step": 3850 }, { "epoch": 0.5868637610484608, "grad_norm": 0.890625, "learning_rate": 7.693752354013849e-05, "loss": 0.772, "step": 3851 }, { "epoch": 0.5870161536117038, "grad_norm": 1.0859375, "learning_rate": 7.688949957993978e-05, "loss": 0.8923, "step": 3852 }, { "epoch": 0.5871685461749466, "grad_norm": 0.828125, "learning_rate": 7.68414812497913e-05, "loss": 1.0355, "step": 3853 }, { "epoch": 0.5873209387381896, "grad_norm": 1.0546875, "learning_rate": 7.679346856139103e-05, "loss": 0.9209, "step": 3854 }, { "epoch": 0.5874733313014325, "grad_norm": 0.93359375, "learning_rate": 7.674546152643553e-05, "loss": 1.1258, "step": 3855 }, { "epoch": 0.5876257238646754, "grad_norm": 0.98046875, "learning_rate": 7.669746015661998e-05, "loss": 1.0751, "step": 3856 }, { "epoch": 0.5877781164279183, "grad_norm": 0.83984375, "learning_rate": 7.664946446363825e-05, "loss": 0.8803, "step": 3857 }, { "epoch": 0.5879305089911613, "grad_norm": 1.046875, "learning_rate": 7.660147445918279e-05, "loss": 0.9607, "step": 3858 }, { "epoch": 0.5880829015544041, "grad_norm": 1.2265625, "learning_rate": 7.655349015494458e-05, "loss": 0.9266, "step": 3859 }, { "epoch": 0.5882352941176471, "grad_norm": 0.9453125, "learning_rate": 7.650551156261337e-05, "loss": 0.9421, "step": 3860 }, { "epoch": 0.5883876866808899, "grad_norm": 0.91015625, "learning_rate": 7.64575386938774e-05, "loss": 0.8951, "step": 3861 }, { "epoch": 0.5885400792441329, "grad_norm": 0.99609375, "learning_rate": 7.640957156042354e-05, "loss": 1.0166, "step": 3862 }, { "epoch": 0.5886924718073758, "grad_norm": 0.8046875, "learning_rate": 7.636161017393729e-05, "loss": 0.88, "step": 3863 }, { "epoch": 0.5888448643706187, "grad_norm": 0.921875, "learning_rate": 7.631365454610273e-05, "loss": 0.8392, "step": 3864 }, { "epoch": 0.5889972569338616, "grad_norm": 0.97265625, "learning_rate": 7.626570468860252e-05, "loss": 0.9823, "step": 3865 }, { "epoch": 0.5891496494971046, "grad_norm": 0.87890625, "learning_rate": 7.621776061311797e-05, "loss": 0.8637, "step": 3866 }, { "epoch": 0.5893020420603474, "grad_norm": 1.109375, "learning_rate": 7.616982233132895e-05, "loss": 1.0838, "step": 3867 }, { "epoch": 0.5894544346235904, "grad_norm": 0.8203125, "learning_rate": 7.612188985491385e-05, "loss": 0.9032, "step": 3868 }, { "epoch": 0.5896068271868333, "grad_norm": 1.140625, "learning_rate": 7.607396319554978e-05, "loss": 1.0583, "step": 3869 }, { "epoch": 0.5897592197500762, "grad_norm": 0.7734375, "learning_rate": 7.602604236491231e-05, "loss": 0.7704, "step": 3870 }, { "epoch": 0.5899116123133191, "grad_norm": 0.7109375, "learning_rate": 7.597812737467572e-05, "loss": 0.9181, "step": 3871 }, { "epoch": 0.5900640048765621, "grad_norm": 0.71484375, "learning_rate": 7.593021823651272e-05, "loss": 0.8736, "step": 3872 }, { "epoch": 0.5902163974398049, "grad_norm": 0.94921875, "learning_rate": 7.588231496209466e-05, "loss": 1.055, "step": 3873 }, { "epoch": 0.5903687900030479, "grad_norm": 0.87109375, "learning_rate": 7.583441756309153e-05, "loss": 1.0755, "step": 3874 }, { "epoch": 0.5905211825662907, "grad_norm": 0.8828125, "learning_rate": 7.578652605117178e-05, "loss": 0.9889, "step": 3875 }, { "epoch": 0.5906735751295337, "grad_norm": 0.8828125, "learning_rate": 7.573864043800246e-05, "loss": 0.8489, "step": 3876 }, { "epoch": 0.5908259676927766, "grad_norm": 0.953125, "learning_rate": 7.569076073524924e-05, "loss": 0.9471, "step": 3877 }, { "epoch": 0.5909783602560195, "grad_norm": 1.359375, "learning_rate": 7.564288695457629e-05, "loss": 0.9843, "step": 3878 }, { "epoch": 0.5911307528192624, "grad_norm": 0.98046875, "learning_rate": 7.559501910764629e-05, "loss": 1.1527, "step": 3879 }, { "epoch": 0.5912831453825054, "grad_norm": 0.83203125, "learning_rate": 7.554715720612063e-05, "loss": 0.856, "step": 3880 }, { "epoch": 0.5914355379457482, "grad_norm": 1.0078125, "learning_rate": 7.54993012616591e-05, "loss": 0.9877, "step": 3881 }, { "epoch": 0.5915879305089912, "grad_norm": 0.98828125, "learning_rate": 7.54514512859201e-05, "loss": 0.9735, "step": 3882 }, { "epoch": 0.5917403230722341, "grad_norm": 1.3515625, "learning_rate": 7.540360729056058e-05, "loss": 1.2914, "step": 3883 }, { "epoch": 0.591892715635477, "grad_norm": 0.76953125, "learning_rate": 7.535576928723604e-05, "loss": 0.9659, "step": 3884 }, { "epoch": 0.5920451081987199, "grad_norm": 0.875, "learning_rate": 7.530793728760048e-05, "loss": 0.8839, "step": 3885 }, { "epoch": 0.5921975007619629, "grad_norm": 1.1015625, "learning_rate": 7.526011130330649e-05, "loss": 0.9153, "step": 3886 }, { "epoch": 0.5923498933252057, "grad_norm": 0.8359375, "learning_rate": 7.521229134600513e-05, "loss": 0.9236, "step": 3887 }, { "epoch": 0.5925022858884487, "grad_norm": 1.453125, "learning_rate": 7.516447742734607e-05, "loss": 1.0368, "step": 3888 }, { "epoch": 0.5926546784516915, "grad_norm": 0.94140625, "learning_rate": 7.511666955897745e-05, "loss": 0.9824, "step": 3889 }, { "epoch": 0.5928070710149345, "grad_norm": 0.95703125, "learning_rate": 7.506886775254593e-05, "loss": 1.0721, "step": 3890 }, { "epoch": 0.5929594635781774, "grad_norm": 0.76953125, "learning_rate": 7.502107201969678e-05, "loss": 0.9492, "step": 3891 }, { "epoch": 0.5931118561414203, "grad_norm": 0.8203125, "learning_rate": 7.497328237207368e-05, "loss": 1.0424, "step": 3892 }, { "epoch": 0.5932642487046632, "grad_norm": 1.1484375, "learning_rate": 7.492549882131885e-05, "loss": 0.93, "step": 3893 }, { "epoch": 0.5934166412679062, "grad_norm": 1.046875, "learning_rate": 7.487772137907312e-05, "loss": 0.9269, "step": 3894 }, { "epoch": 0.593569033831149, "grad_norm": 0.91015625, "learning_rate": 7.482995005697573e-05, "loss": 1.022, "step": 3895 }, { "epoch": 0.593721426394392, "grad_norm": 0.875, "learning_rate": 7.478218486666443e-05, "loss": 0.8445, "step": 3896 }, { "epoch": 0.5938738189576349, "grad_norm": 0.91015625, "learning_rate": 7.473442581977556e-05, "loss": 0.8804, "step": 3897 }, { "epoch": 0.5940262115208778, "grad_norm": 1.09375, "learning_rate": 7.468667292794389e-05, "loss": 0.9606, "step": 3898 }, { "epoch": 0.5941786040841207, "grad_norm": 1.46875, "learning_rate": 7.463892620280267e-05, "loss": 1.2645, "step": 3899 }, { "epoch": 0.5943309966473637, "grad_norm": 1.03125, "learning_rate": 7.459118565598376e-05, "loss": 0.9789, "step": 3900 }, { "epoch": 0.5944833892106065, "grad_norm": 0.81640625, "learning_rate": 7.454345129911739e-05, "loss": 0.9435, "step": 3901 }, { "epoch": 0.5946357817738495, "grad_norm": 0.9609375, "learning_rate": 7.449572314383237e-05, "loss": 1.2477, "step": 3902 }, { "epoch": 0.5947881743370923, "grad_norm": 0.83984375, "learning_rate": 7.444800120175594e-05, "loss": 0.7523, "step": 3903 }, { "epoch": 0.5949405669003353, "grad_norm": 1.0859375, "learning_rate": 7.440028548451386e-05, "loss": 0.8926, "step": 3904 }, { "epoch": 0.5950929594635782, "grad_norm": 0.90234375, "learning_rate": 7.435257600373039e-05, "loss": 1.1871, "step": 3905 }, { "epoch": 0.595245352026821, "grad_norm": 0.78515625, "learning_rate": 7.43048727710282e-05, "loss": 0.97, "step": 3906 }, { "epoch": 0.595397744590064, "grad_norm": 0.77734375, "learning_rate": 7.42571757980285e-05, "loss": 0.7566, "step": 3907 }, { "epoch": 0.595550137153307, "grad_norm": 0.921875, "learning_rate": 7.420948509635097e-05, "loss": 0.956, "step": 3908 }, { "epoch": 0.5957025297165498, "grad_norm": 0.89453125, "learning_rate": 7.416180067761377e-05, "loss": 1.0305, "step": 3909 }, { "epoch": 0.5958549222797928, "grad_norm": 0.77734375, "learning_rate": 7.411412255343344e-05, "loss": 1.021, "step": 3910 }, { "epoch": 0.5960073148430357, "grad_norm": 1.0234375, "learning_rate": 7.406645073542514e-05, "loss": 0.9878, "step": 3911 }, { "epoch": 0.5961597074062785, "grad_norm": 0.79296875, "learning_rate": 7.401878523520236e-05, "loss": 0.939, "step": 3912 }, { "epoch": 0.5963120999695215, "grad_norm": 0.83984375, "learning_rate": 7.39711260643771e-05, "loss": 0.7252, "step": 3913 }, { "epoch": 0.5964644925327645, "grad_norm": 0.92578125, "learning_rate": 7.392347323455986e-05, "loss": 1.183, "step": 3914 }, { "epoch": 0.5966168850960073, "grad_norm": 1.2265625, "learning_rate": 7.38758267573595e-05, "loss": 1.0203, "step": 3915 }, { "epoch": 0.5967692776592503, "grad_norm": 0.9453125, "learning_rate": 7.382818664438342e-05, "loss": 0.9833, "step": 3916 }, { "epoch": 0.5969216702224931, "grad_norm": 0.765625, "learning_rate": 7.378055290723744e-05, "loss": 0.9311, "step": 3917 }, { "epoch": 0.597074062785736, "grad_norm": 1.0234375, "learning_rate": 7.37329255575258e-05, "loss": 1.1494, "step": 3918 }, { "epoch": 0.597226455348979, "grad_norm": 1.109375, "learning_rate": 7.368530460685124e-05, "loss": 1.0384, "step": 3919 }, { "epoch": 0.5973788479122218, "grad_norm": 0.859375, "learning_rate": 7.363769006681489e-05, "loss": 1.0286, "step": 3920 }, { "epoch": 0.5975312404754648, "grad_norm": 1.1796875, "learning_rate": 7.359008194901632e-05, "loss": 1.0647, "step": 3921 }, { "epoch": 0.5976836330387078, "grad_norm": 1.2109375, "learning_rate": 7.354248026505359e-05, "loss": 1.1266, "step": 3922 }, { "epoch": 0.5978360256019506, "grad_norm": 0.8828125, "learning_rate": 7.349488502652313e-05, "loss": 0.9542, "step": 3923 }, { "epoch": 0.5979884181651935, "grad_norm": 0.8046875, "learning_rate": 7.34472962450198e-05, "loss": 0.825, "step": 3924 }, { "epoch": 0.5981408107284365, "grad_norm": 1.03125, "learning_rate": 7.339971393213695e-05, "loss": 1.0699, "step": 3925 }, { "epoch": 0.5982932032916793, "grad_norm": 0.859375, "learning_rate": 7.335213809946634e-05, "loss": 0.8812, "step": 3926 }, { "epoch": 0.5984455958549223, "grad_norm": 0.87890625, "learning_rate": 7.330456875859805e-05, "loss": 1.0283, "step": 3927 }, { "epoch": 0.5985979884181651, "grad_norm": 1.0, "learning_rate": 7.325700592112072e-05, "loss": 0.973, "step": 3928 }, { "epoch": 0.5987503809814081, "grad_norm": 0.94921875, "learning_rate": 7.320944959862132e-05, "loss": 0.9973, "step": 3929 }, { "epoch": 0.598902773544651, "grad_norm": 0.8515625, "learning_rate": 7.316189980268524e-05, "loss": 0.9284, "step": 3930 }, { "epoch": 0.5990551661078939, "grad_norm": 0.87109375, "learning_rate": 7.311435654489631e-05, "loss": 0.9561, "step": 3931 }, { "epoch": 0.5992075586711368, "grad_norm": 0.84765625, "learning_rate": 7.306681983683676e-05, "loss": 0.9312, "step": 3932 }, { "epoch": 0.5993599512343798, "grad_norm": 1.21875, "learning_rate": 7.301928969008717e-05, "loss": 0.9758, "step": 3933 }, { "epoch": 0.5995123437976226, "grad_norm": 0.921875, "learning_rate": 7.297176611622664e-05, "loss": 1.0254, "step": 3934 }, { "epoch": 0.5996647363608656, "grad_norm": 0.9140625, "learning_rate": 7.292424912683253e-05, "loss": 1.0044, "step": 3935 }, { "epoch": 0.5998171289241085, "grad_norm": 0.62890625, "learning_rate": 7.28767387334807e-05, "loss": 0.781, "step": 3936 }, { "epoch": 0.5999695214873514, "grad_norm": 1.0078125, "learning_rate": 7.282923494774537e-05, "loss": 0.8754, "step": 3937 }, { "epoch": 0.6001219140505943, "grad_norm": 1.140625, "learning_rate": 7.27817377811991e-05, "loss": 0.8132, "step": 3938 }, { "epoch": 0.6002743066138373, "grad_norm": 0.546875, "learning_rate": 7.273424724541293e-05, "loss": 0.7118, "step": 3939 }, { "epoch": 0.6004266991770801, "grad_norm": 0.84765625, "learning_rate": 7.268676335195623e-05, "loss": 0.95, "step": 3940 }, { "epoch": 0.6005790917403231, "grad_norm": 1.390625, "learning_rate": 7.263928611239672e-05, "loss": 0.9423, "step": 3941 }, { "epoch": 0.6007314843035659, "grad_norm": 0.84765625, "learning_rate": 7.25918155383006e-05, "loss": 0.9581, "step": 3942 }, { "epoch": 0.6008838768668089, "grad_norm": 0.9375, "learning_rate": 7.254435164123234e-05, "loss": 0.9653, "step": 3943 }, { "epoch": 0.6010362694300518, "grad_norm": 0.88671875, "learning_rate": 7.249689443275482e-05, "loss": 0.832, "step": 3944 }, { "epoch": 0.6011886619932947, "grad_norm": 1.0625, "learning_rate": 7.244944392442935e-05, "loss": 0.9651, "step": 3945 }, { "epoch": 0.6013410545565376, "grad_norm": 0.8984375, "learning_rate": 7.240200012781551e-05, "loss": 1.058, "step": 3946 }, { "epoch": 0.6014934471197806, "grad_norm": 1.1953125, "learning_rate": 7.235456305447129e-05, "loss": 1.0359, "step": 3947 }, { "epoch": 0.6016458396830234, "grad_norm": 0.734375, "learning_rate": 7.230713271595307e-05, "loss": 0.8015, "step": 3948 }, { "epoch": 0.6017982322462664, "grad_norm": 0.91796875, "learning_rate": 7.225970912381556e-05, "loss": 0.9864, "step": 3949 }, { "epoch": 0.6019506248095093, "grad_norm": 1.03125, "learning_rate": 7.221229228961179e-05, "loss": 1.1419, "step": 3950 }, { "epoch": 0.6021030173727522, "grad_norm": 0.921875, "learning_rate": 7.216488222489325e-05, "loss": 1.0426, "step": 3951 }, { "epoch": 0.6022554099359951, "grad_norm": 0.78125, "learning_rate": 7.211747894120964e-05, "loss": 0.8224, "step": 3952 }, { "epoch": 0.6024078024992381, "grad_norm": 0.9609375, "learning_rate": 7.207008245010915e-05, "loss": 1.2136, "step": 3953 }, { "epoch": 0.6025601950624809, "grad_norm": 0.70703125, "learning_rate": 7.20226927631382e-05, "loss": 0.8924, "step": 3954 }, { "epoch": 0.6027125876257239, "grad_norm": 1.03125, "learning_rate": 7.197530989184161e-05, "loss": 1.0629, "step": 3955 }, { "epoch": 0.6028649801889667, "grad_norm": 0.91015625, "learning_rate": 7.192793384776255e-05, "loss": 1.038, "step": 3956 }, { "epoch": 0.6030173727522097, "grad_norm": 1.296875, "learning_rate": 7.188056464244249e-05, "loss": 0.8511, "step": 3957 }, { "epoch": 0.6031697653154526, "grad_norm": 0.9765625, "learning_rate": 7.183320228742122e-05, "loss": 1.1172, "step": 3958 }, { "epoch": 0.6033221578786955, "grad_norm": 0.77734375, "learning_rate": 7.178584679423695e-05, "loss": 0.8936, "step": 3959 }, { "epoch": 0.6034745504419384, "grad_norm": 0.671875, "learning_rate": 7.17384981744261e-05, "loss": 0.993, "step": 3960 }, { "epoch": 0.6036269430051814, "grad_norm": 0.91796875, "learning_rate": 7.169115643952351e-05, "loss": 1.1321, "step": 3961 }, { "epoch": 0.6037793355684242, "grad_norm": 1.078125, "learning_rate": 7.164382160106231e-05, "loss": 0.9836, "step": 3962 }, { "epoch": 0.6039317281316672, "grad_norm": 0.9609375, "learning_rate": 7.159649367057395e-05, "loss": 0.9716, "step": 3963 }, { "epoch": 0.6040841206949101, "grad_norm": 1.0703125, "learning_rate": 7.154917265958814e-05, "loss": 0.8576, "step": 3964 }, { "epoch": 0.604236513258153, "grad_norm": 0.9453125, "learning_rate": 7.150185857963303e-05, "loss": 1.0519, "step": 3965 }, { "epoch": 0.6043889058213959, "grad_norm": 0.88671875, "learning_rate": 7.145455144223496e-05, "loss": 1.0349, "step": 3966 }, { "epoch": 0.6045412983846389, "grad_norm": 1.375, "learning_rate": 7.140725125891868e-05, "loss": 1.0207, "step": 3967 }, { "epoch": 0.6046936909478817, "grad_norm": 0.91015625, "learning_rate": 7.135995804120715e-05, "loss": 0.8857, "step": 3968 }, { "epoch": 0.6048460835111247, "grad_norm": 0.80078125, "learning_rate": 7.131267180062168e-05, "loss": 0.8689, "step": 3969 }, { "epoch": 0.6049984760743675, "grad_norm": 0.96484375, "learning_rate": 7.12653925486819e-05, "loss": 0.9444, "step": 3970 }, { "epoch": 0.6051508686376105, "grad_norm": 0.9375, "learning_rate": 7.121812029690572e-05, "loss": 1.0025, "step": 3971 }, { "epoch": 0.6053032612008534, "grad_norm": 0.66796875, "learning_rate": 7.11708550568093e-05, "loss": 0.9248, "step": 3972 }, { "epoch": 0.6054556537640963, "grad_norm": 1.109375, "learning_rate": 7.11235968399072e-05, "loss": 1.0393, "step": 3973 }, { "epoch": 0.6056080463273392, "grad_norm": 0.96484375, "learning_rate": 7.107634565771212e-05, "loss": 0.9511, "step": 3974 }, { "epoch": 0.6057604388905822, "grad_norm": 0.7890625, "learning_rate": 7.102910152173517e-05, "loss": 0.8437, "step": 3975 }, { "epoch": 0.605912831453825, "grad_norm": 0.86328125, "learning_rate": 7.098186444348571e-05, "loss": 0.9844, "step": 3976 }, { "epoch": 0.606065224017068, "grad_norm": 0.78515625, "learning_rate": 7.093463443447137e-05, "loss": 0.7285, "step": 3977 }, { "epoch": 0.6062176165803109, "grad_norm": 0.9765625, "learning_rate": 7.088741150619803e-05, "loss": 0.8267, "step": 3978 }, { "epoch": 0.6063700091435538, "grad_norm": 0.89453125, "learning_rate": 7.08401956701699e-05, "loss": 0.9363, "step": 3979 }, { "epoch": 0.6065224017067967, "grad_norm": 0.9921875, "learning_rate": 7.079298693788945e-05, "loss": 0.982, "step": 3980 }, { "epoch": 0.6066747942700397, "grad_norm": 1.0859375, "learning_rate": 7.074578532085736e-05, "loss": 1.0229, "step": 3981 }, { "epoch": 0.6068271868332825, "grad_norm": 2.59375, "learning_rate": 7.069859083057266e-05, "loss": 1.0282, "step": 3982 }, { "epoch": 0.6069795793965255, "grad_norm": 0.9296875, "learning_rate": 7.065140347853258e-05, "loss": 1.0143, "step": 3983 }, { "epoch": 0.6071319719597683, "grad_norm": 1.0078125, "learning_rate": 7.060422327623267e-05, "loss": 0.9154, "step": 3984 }, { "epoch": 0.6072843645230113, "grad_norm": 0.76171875, "learning_rate": 7.05570502351667e-05, "loss": 0.9234, "step": 3985 }, { "epoch": 0.6074367570862542, "grad_norm": 1.4140625, "learning_rate": 7.050988436682666e-05, "loss": 0.9002, "step": 3986 }, { "epoch": 0.6075891496494971, "grad_norm": 1.03125, "learning_rate": 7.046272568270288e-05, "loss": 1.079, "step": 3987 }, { "epoch": 0.60774154221274, "grad_norm": 1.03125, "learning_rate": 7.041557419428389e-05, "loss": 1.048, "step": 3988 }, { "epoch": 0.607893934775983, "grad_norm": 0.84765625, "learning_rate": 7.036842991305644e-05, "loss": 0.8781, "step": 3989 }, { "epoch": 0.6080463273392258, "grad_norm": 1.1484375, "learning_rate": 7.032129285050557e-05, "loss": 0.9433, "step": 3990 }, { "epoch": 0.6081987199024688, "grad_norm": 0.9296875, "learning_rate": 7.027416301811456e-05, "loss": 1.0532, "step": 3991 }, { "epoch": 0.6083511124657117, "grad_norm": 1.1171875, "learning_rate": 7.02270404273649e-05, "loss": 0.9314, "step": 3992 }, { "epoch": 0.6085035050289546, "grad_norm": 1.28125, "learning_rate": 7.017992508973635e-05, "loss": 1.0326, "step": 3993 }, { "epoch": 0.6086558975921975, "grad_norm": 0.89453125, "learning_rate": 7.013281701670684e-05, "loss": 0.9267, "step": 3994 }, { "epoch": 0.6088082901554405, "grad_norm": 0.84765625, "learning_rate": 7.008571621975262e-05, "loss": 0.8763, "step": 3995 }, { "epoch": 0.6089606827186833, "grad_norm": 1.09375, "learning_rate": 7.00386227103481e-05, "loss": 1.0264, "step": 3996 }, { "epoch": 0.6091130752819263, "grad_norm": 0.8359375, "learning_rate": 6.999153649996595e-05, "loss": 0.8562, "step": 3997 }, { "epoch": 0.6092654678451691, "grad_norm": 0.87890625, "learning_rate": 6.994445760007702e-05, "loss": 0.8381, "step": 3998 }, { "epoch": 0.6094178604084121, "grad_norm": 1.0, "learning_rate": 6.989738602215044e-05, "loss": 0.8984, "step": 3999 }, { "epoch": 0.609570252971655, "grad_norm": 0.703125, "learning_rate": 6.985032177765348e-05, "loss": 0.917, "step": 4000 }, { "epoch": 0.6097226455348979, "grad_norm": 1.171875, "learning_rate": 6.980326487805174e-05, "loss": 1.0559, "step": 4001 }, { "epoch": 0.6098750380981408, "grad_norm": 0.8046875, "learning_rate": 6.975621533480888e-05, "loss": 0.8656, "step": 4002 }, { "epoch": 0.6100274306613838, "grad_norm": 0.7578125, "learning_rate": 6.970917315938687e-05, "loss": 0.8404, "step": 4003 }, { "epoch": 0.6101798232246266, "grad_norm": 0.8828125, "learning_rate": 6.966213836324591e-05, "loss": 0.9891, "step": 4004 }, { "epoch": 0.6103322157878696, "grad_norm": 1.0625, "learning_rate": 6.961511095784429e-05, "loss": 0.8489, "step": 4005 }, { "epoch": 0.6104846083511125, "grad_norm": 0.9921875, "learning_rate": 6.956809095463856e-05, "loss": 0.9807, "step": 4006 }, { "epoch": 0.6106370009143554, "grad_norm": 0.953125, "learning_rate": 6.952107836508352e-05, "loss": 0.9944, "step": 4007 }, { "epoch": 0.6107893934775983, "grad_norm": 0.9453125, "learning_rate": 6.947407320063209e-05, "loss": 0.9524, "step": 4008 }, { "epoch": 0.6109417860408413, "grad_norm": 0.7890625, "learning_rate": 6.942707547273537e-05, "loss": 0.8872, "step": 4009 }, { "epoch": 0.6110941786040841, "grad_norm": 0.9609375, "learning_rate": 6.938008519284273e-05, "loss": 0.9363, "step": 4010 }, { "epoch": 0.6112465711673271, "grad_norm": 0.94921875, "learning_rate": 6.933310237240167e-05, "loss": 0.9529, "step": 4011 }, { "epoch": 0.6113989637305699, "grad_norm": 0.77734375, "learning_rate": 6.928612702285785e-05, "loss": 0.7765, "step": 4012 }, { "epoch": 0.6115513562938129, "grad_norm": 0.8125, "learning_rate": 6.923915915565517e-05, "loss": 1.0225, "step": 4013 }, { "epoch": 0.6117037488570558, "grad_norm": 0.74609375, "learning_rate": 6.919219878223568e-05, "loss": 0.8042, "step": 4014 }, { "epoch": 0.6118561414202986, "grad_norm": 0.69921875, "learning_rate": 6.914524591403957e-05, "loss": 0.8171, "step": 4015 }, { "epoch": 0.6120085339835416, "grad_norm": 0.90625, "learning_rate": 6.909830056250527e-05, "loss": 1.0006, "step": 4016 }, { "epoch": 0.6121609265467846, "grad_norm": 0.90234375, "learning_rate": 6.90513627390693e-05, "loss": 1.0267, "step": 4017 }, { "epoch": 0.6123133191100274, "grad_norm": 1.109375, "learning_rate": 6.900443245516646e-05, "loss": 1.0635, "step": 4018 }, { "epoch": 0.6124657116732704, "grad_norm": 1.0859375, "learning_rate": 6.895750972222958e-05, "loss": 1.2096, "step": 4019 }, { "epoch": 0.6126181042365133, "grad_norm": 0.71484375, "learning_rate": 6.891059455168972e-05, "loss": 1.0048, "step": 4020 }, { "epoch": 0.6127704967997561, "grad_norm": 1.0859375, "learning_rate": 6.886368695497609e-05, "loss": 0.9087, "step": 4021 }, { "epoch": 0.6129228893629991, "grad_norm": 1.390625, "learning_rate": 6.88167869435161e-05, "loss": 0.9535, "step": 4022 }, { "epoch": 0.6130752819262419, "grad_norm": 0.859375, "learning_rate": 6.87698945287352e-05, "loss": 0.9352, "step": 4023 }, { "epoch": 0.6132276744894849, "grad_norm": 0.8359375, "learning_rate": 6.872300972205712e-05, "loss": 1.0744, "step": 4024 }, { "epoch": 0.6133800670527279, "grad_norm": 1.0390625, "learning_rate": 6.867613253490364e-05, "loss": 0.9311, "step": 4025 }, { "epoch": 0.6135324596159707, "grad_norm": 0.75, "learning_rate": 6.862926297869468e-05, "loss": 0.9167, "step": 4026 }, { "epoch": 0.6136848521792136, "grad_norm": 1.1640625, "learning_rate": 6.858240106484841e-05, "loss": 0.935, "step": 4027 }, { "epoch": 0.6138372447424566, "grad_norm": 0.85546875, "learning_rate": 6.853554680478105e-05, "loss": 1.0715, "step": 4028 }, { "epoch": 0.6139896373056994, "grad_norm": 0.73828125, "learning_rate": 6.848870020990691e-05, "loss": 1.0145, "step": 4029 }, { "epoch": 0.6141420298689424, "grad_norm": 0.828125, "learning_rate": 6.844186129163859e-05, "loss": 0.9256, "step": 4030 }, { "epoch": 0.6142944224321853, "grad_norm": 1.1640625, "learning_rate": 6.839503006138663e-05, "loss": 1.1668, "step": 4031 }, { "epoch": 0.6144468149954282, "grad_norm": 1.0078125, "learning_rate": 6.834820653055987e-05, "loss": 0.8716, "step": 4032 }, { "epoch": 0.6145992075586711, "grad_norm": 0.9765625, "learning_rate": 6.830139071056513e-05, "loss": 1.0509, "step": 4033 }, { "epoch": 0.6147516001219141, "grad_norm": 0.90234375, "learning_rate": 6.825458261280746e-05, "loss": 1.0075, "step": 4034 }, { "epoch": 0.6149039926851569, "grad_norm": 1.0546875, "learning_rate": 6.820778224868998e-05, "loss": 0.8186, "step": 4035 }, { "epoch": 0.6150563852483999, "grad_norm": 0.8671875, "learning_rate": 6.816098962961393e-05, "loss": 1.0305, "step": 4036 }, { "epoch": 0.6152087778116427, "grad_norm": 0.91796875, "learning_rate": 6.811420476697863e-05, "loss": 0.9822, "step": 4037 }, { "epoch": 0.6153611703748857, "grad_norm": 0.85546875, "learning_rate": 6.806742767218159e-05, "loss": 1.0543, "step": 4038 }, { "epoch": 0.6155135629381286, "grad_norm": 0.796875, "learning_rate": 6.80206583566184e-05, "loss": 0.9359, "step": 4039 }, { "epoch": 0.6156659555013715, "grad_norm": 1.1796875, "learning_rate": 6.797389683168264e-05, "loss": 0.9975, "step": 4040 }, { "epoch": 0.6158183480646144, "grad_norm": 0.86328125, "learning_rate": 6.79271431087662e-05, "loss": 1.0435, "step": 4041 }, { "epoch": 0.6159707406278574, "grad_norm": 0.8984375, "learning_rate": 6.788039719925891e-05, "loss": 0.9451, "step": 4042 }, { "epoch": 0.6161231331911002, "grad_norm": 0.984375, "learning_rate": 6.783365911454875e-05, "loss": 0.7251, "step": 4043 }, { "epoch": 0.6162755257543432, "grad_norm": 0.7578125, "learning_rate": 6.778692886602178e-05, "loss": 1.0392, "step": 4044 }, { "epoch": 0.6164279183175861, "grad_norm": 0.9296875, "learning_rate": 6.774020646506222e-05, "loss": 1.0822, "step": 4045 }, { "epoch": 0.616580310880829, "grad_norm": 1.109375, "learning_rate": 6.769349192305218e-05, "loss": 1.112, "step": 4046 }, { "epoch": 0.6167327034440719, "grad_norm": 1.1171875, "learning_rate": 6.764678525137214e-05, "loss": 1.1057, "step": 4047 }, { "epoch": 0.6168850960073149, "grad_norm": 0.79296875, "learning_rate": 6.760008646140043e-05, "loss": 0.9882, "step": 4048 }, { "epoch": 0.6170374885705577, "grad_norm": 0.88671875, "learning_rate": 6.755339556451361e-05, "loss": 0.8414, "step": 4049 }, { "epoch": 0.6171898811338007, "grad_norm": 0.8046875, "learning_rate": 6.750671257208623e-05, "loss": 1.0684, "step": 4050 }, { "epoch": 0.6173422736970435, "grad_norm": 0.59765625, "learning_rate": 6.746003749549089e-05, "loss": 0.9017, "step": 4051 }, { "epoch": 0.6174946662602865, "grad_norm": 1.0234375, "learning_rate": 6.741337034609838e-05, "loss": 1.0801, "step": 4052 }, { "epoch": 0.6176470588235294, "grad_norm": 1.0625, "learning_rate": 6.736671113527745e-05, "loss": 1.0809, "step": 4053 }, { "epoch": 0.6177994513867723, "grad_norm": 1.5078125, "learning_rate": 6.732005987439494e-05, "loss": 1.1487, "step": 4054 }, { "epoch": 0.6179518439500152, "grad_norm": 0.9296875, "learning_rate": 6.727341657481581e-05, "loss": 1.0457, "step": 4055 }, { "epoch": 0.6181042365132582, "grad_norm": 1.078125, "learning_rate": 6.722678124790304e-05, "loss": 0.9764, "step": 4056 }, { "epoch": 0.618256629076501, "grad_norm": 1.265625, "learning_rate": 6.71801539050176e-05, "loss": 0.9304, "step": 4057 }, { "epoch": 0.618409021639744, "grad_norm": 0.953125, "learning_rate": 6.713353455751866e-05, "loss": 1.0785, "step": 4058 }, { "epoch": 0.6185614142029869, "grad_norm": 0.91796875, "learning_rate": 6.708692321676335e-05, "loss": 0.8593, "step": 4059 }, { "epoch": 0.6187138067662298, "grad_norm": 1.09375, "learning_rate": 6.704031989410678e-05, "loss": 0.9896, "step": 4060 }, { "epoch": 0.6188661993294727, "grad_norm": 1.0234375, "learning_rate": 6.69937246009023e-05, "loss": 1.0723, "step": 4061 }, { "epoch": 0.6190185918927157, "grad_norm": 1.0, "learning_rate": 6.694713734850115e-05, "loss": 1.0491, "step": 4062 }, { "epoch": 0.6191709844559585, "grad_norm": 0.99609375, "learning_rate": 6.69005581482526e-05, "loss": 0.9682, "step": 4063 }, { "epoch": 0.6193233770192015, "grad_norm": 0.859375, "learning_rate": 6.685398701150411e-05, "loss": 0.9138, "step": 4064 }, { "epoch": 0.6194757695824443, "grad_norm": 0.81640625, "learning_rate": 6.6807423949601e-05, "loss": 1.0322, "step": 4065 }, { "epoch": 0.6196281621456873, "grad_norm": 1.0078125, "learning_rate": 6.676086897388676e-05, "loss": 1.038, "step": 4066 }, { "epoch": 0.6197805547089302, "grad_norm": 1.21875, "learning_rate": 6.671432209570284e-05, "loss": 1.1604, "step": 4067 }, { "epoch": 0.6199329472721731, "grad_norm": 1.171875, "learning_rate": 6.666778332638866e-05, "loss": 0.892, "step": 4068 }, { "epoch": 0.620085339835416, "grad_norm": 1.21875, "learning_rate": 6.662125267728183e-05, "loss": 1.0183, "step": 4069 }, { "epoch": 0.620237732398659, "grad_norm": 0.89453125, "learning_rate": 6.657473015971785e-05, "loss": 1.0348, "step": 4070 }, { "epoch": 0.6203901249619018, "grad_norm": 1.4296875, "learning_rate": 6.652821578503022e-05, "loss": 1.0394, "step": 4071 }, { "epoch": 0.6205425175251448, "grad_norm": 1.015625, "learning_rate": 6.648170956455059e-05, "loss": 0.964, "step": 4072 }, { "epoch": 0.6206949100883877, "grad_norm": 0.984375, "learning_rate": 6.643521150960854e-05, "loss": 0.9798, "step": 4073 }, { "epoch": 0.6208473026516306, "grad_norm": 0.875, "learning_rate": 6.638872163153158e-05, "loss": 0.835, "step": 4074 }, { "epoch": 0.6209996952148735, "grad_norm": 0.953125, "learning_rate": 6.63422399416454e-05, "loss": 1.16, "step": 4075 }, { "epoch": 0.6211520877781165, "grad_norm": 0.796875, "learning_rate": 6.62957664512736e-05, "loss": 0.8307, "step": 4076 }, { "epoch": 0.6213044803413593, "grad_norm": 0.9375, "learning_rate": 6.62493011717377e-05, "loss": 1.112, "step": 4077 }, { "epoch": 0.6214568729046023, "grad_norm": 1.0078125, "learning_rate": 6.620284411435745e-05, "loss": 1.0052, "step": 4078 }, { "epoch": 0.6216092654678451, "grad_norm": 0.8125, "learning_rate": 6.615639529045036e-05, "loss": 0.9884, "step": 4079 }, { "epoch": 0.6217616580310881, "grad_norm": 1.1953125, "learning_rate": 6.610995471133203e-05, "loss": 1.2262, "step": 4080 }, { "epoch": 0.621914050594331, "grad_norm": 0.94921875, "learning_rate": 6.60635223883161e-05, "loss": 1.0948, "step": 4081 }, { "epoch": 0.6220664431575739, "grad_norm": 0.8046875, "learning_rate": 6.601709833271412e-05, "loss": 1.0106, "step": 4082 }, { "epoch": 0.6222188357208168, "grad_norm": 1.2109375, "learning_rate": 6.59706825558357e-05, "loss": 0.9223, "step": 4083 }, { "epoch": 0.6223712282840598, "grad_norm": 0.91015625, "learning_rate": 6.592427506898835e-05, "loss": 1.0246, "step": 4084 }, { "epoch": 0.6225236208473026, "grad_norm": 1.1640625, "learning_rate": 6.587787588347758e-05, "loss": 0.987, "step": 4085 }, { "epoch": 0.6226760134105456, "grad_norm": 0.87109375, "learning_rate": 6.583148501060697e-05, "loss": 0.9865, "step": 4086 }, { "epoch": 0.6228284059737885, "grad_norm": 0.9140625, "learning_rate": 6.578510246167797e-05, "loss": 0.9351, "step": 4087 }, { "epoch": 0.6229807985370314, "grad_norm": 1.1484375, "learning_rate": 6.573872824798997e-05, "loss": 1.053, "step": 4088 }, { "epoch": 0.6231331911002743, "grad_norm": 0.8984375, "learning_rate": 6.569236238084051e-05, "loss": 0.9342, "step": 4089 }, { "epoch": 0.6232855836635173, "grad_norm": 0.9140625, "learning_rate": 6.564600487152492e-05, "loss": 0.911, "step": 4090 }, { "epoch": 0.6234379762267601, "grad_norm": 0.82421875, "learning_rate": 6.559965573133653e-05, "loss": 0.7655, "step": 4091 }, { "epoch": 0.6235903687900031, "grad_norm": 1.1328125, "learning_rate": 6.555331497156672e-05, "loss": 1.0779, "step": 4092 }, { "epoch": 0.6237427613532459, "grad_norm": 1.03125, "learning_rate": 6.550698260350475e-05, "loss": 0.7817, "step": 4093 }, { "epoch": 0.6238951539164889, "grad_norm": 0.84375, "learning_rate": 6.546065863843778e-05, "loss": 0.9902, "step": 4094 }, { "epoch": 0.6240475464797318, "grad_norm": 0.98046875, "learning_rate": 6.541434308765108e-05, "loss": 0.9961, "step": 4095 }, { "epoch": 0.6241999390429747, "grad_norm": 0.90234375, "learning_rate": 6.536803596242775e-05, "loss": 0.9634, "step": 4096 }, { "epoch": 0.6243523316062176, "grad_norm": 1.1875, "learning_rate": 6.53217372740489e-05, "loss": 1.0167, "step": 4097 }, { "epoch": 0.6245047241694606, "grad_norm": 1.0546875, "learning_rate": 6.527544703379351e-05, "loss": 0.953, "step": 4098 }, { "epoch": 0.6246571167327034, "grad_norm": 1.0078125, "learning_rate": 6.522916525293857e-05, "loss": 1.0467, "step": 4099 }, { "epoch": 0.6248095092959464, "grad_norm": 1.0703125, "learning_rate": 6.518289194275899e-05, "loss": 0.9987, "step": 4100 }, { "epoch": 0.6249619018591893, "grad_norm": 1.078125, "learning_rate": 6.513662711452766e-05, "loss": 1.0714, "step": 4101 }, { "epoch": 0.6251142944224322, "grad_norm": 0.80078125, "learning_rate": 6.509037077951523e-05, "loss": 0.8982, "step": 4102 }, { "epoch": 0.6252666869856751, "grad_norm": 1.109375, "learning_rate": 6.504412294899053e-05, "loss": 0.992, "step": 4103 }, { "epoch": 0.6254190795489181, "grad_norm": 0.93359375, "learning_rate": 6.499788363422017e-05, "loss": 0.7757, "step": 4104 }, { "epoch": 0.6255714721121609, "grad_norm": 0.9765625, "learning_rate": 6.495165284646865e-05, "loss": 0.9254, "step": 4105 }, { "epoch": 0.6257238646754039, "grad_norm": 0.81640625, "learning_rate": 6.490543059699852e-05, "loss": 0.9247, "step": 4106 }, { "epoch": 0.6258762572386467, "grad_norm": 0.97265625, "learning_rate": 6.485921689707019e-05, "loss": 1.0428, "step": 4107 }, { "epoch": 0.6260286498018897, "grad_norm": 1.046875, "learning_rate": 6.481301175794193e-05, "loss": 0.977, "step": 4108 }, { "epoch": 0.6261810423651326, "grad_norm": 0.90234375, "learning_rate": 6.476681519087e-05, "loss": 0.8981, "step": 4109 }, { "epoch": 0.6263334349283755, "grad_norm": 1.140625, "learning_rate": 6.47206272071086e-05, "loss": 0.9788, "step": 4110 }, { "epoch": 0.6264858274916184, "grad_norm": 1.2421875, "learning_rate": 6.467444781790966e-05, "loss": 1.0624, "step": 4111 }, { "epoch": 0.6266382200548614, "grad_norm": 1.0546875, "learning_rate": 6.462827703452327e-05, "loss": 0.9606, "step": 4112 }, { "epoch": 0.6267906126181042, "grad_norm": 0.92578125, "learning_rate": 6.458211486819724e-05, "loss": 0.8436, "step": 4113 }, { "epoch": 0.6269430051813472, "grad_norm": 0.7578125, "learning_rate": 6.453596133017736e-05, "loss": 0.9154, "step": 4114 }, { "epoch": 0.6270953977445901, "grad_norm": 1.1328125, "learning_rate": 6.44898164317073e-05, "loss": 0.9739, "step": 4115 }, { "epoch": 0.627247790307833, "grad_norm": 1.15625, "learning_rate": 6.444368018402853e-05, "loss": 1.0182, "step": 4116 }, { "epoch": 0.6274001828710759, "grad_norm": 0.734375, "learning_rate": 6.439755259838063e-05, "loss": 0.9483, "step": 4117 }, { "epoch": 0.6275525754343189, "grad_norm": 0.87890625, "learning_rate": 6.435143368600091e-05, "loss": 0.9459, "step": 4118 }, { "epoch": 0.6277049679975617, "grad_norm": 0.7265625, "learning_rate": 6.430532345812452e-05, "loss": 0.8979, "step": 4119 }, { "epoch": 0.6278573605608047, "grad_norm": 0.8671875, "learning_rate": 6.425922192598468e-05, "loss": 0.9712, "step": 4120 }, { "epoch": 0.6280097531240475, "grad_norm": 1.15625, "learning_rate": 6.421312910081235e-05, "loss": 0.8896, "step": 4121 }, { "epoch": 0.6281621456872905, "grad_norm": 0.80078125, "learning_rate": 6.416704499383633e-05, "loss": 1.0579, "step": 4122 }, { "epoch": 0.6283145382505334, "grad_norm": 0.87890625, "learning_rate": 6.412096961628349e-05, "loss": 0.8151, "step": 4123 }, { "epoch": 0.6284669308137762, "grad_norm": 0.86328125, "learning_rate": 6.407490297937841e-05, "loss": 0.8946, "step": 4124 }, { "epoch": 0.6286193233770192, "grad_norm": 0.96875, "learning_rate": 6.402884509434352e-05, "loss": 0.8105, "step": 4125 }, { "epoch": 0.6287717159402622, "grad_norm": 0.90234375, "learning_rate": 6.398279597239929e-05, "loss": 0.9439, "step": 4126 }, { "epoch": 0.628924108503505, "grad_norm": 1.1875, "learning_rate": 6.393675562476391e-05, "loss": 0.9986, "step": 4127 }, { "epoch": 0.629076501066748, "grad_norm": 1.1875, "learning_rate": 6.38907240626534e-05, "loss": 0.9311, "step": 4128 }, { "epoch": 0.6292288936299909, "grad_norm": 0.9453125, "learning_rate": 6.384470129728182e-05, "loss": 1.0662, "step": 4129 }, { "epoch": 0.6293812861932337, "grad_norm": 0.78515625, "learning_rate": 6.379868733986089e-05, "loss": 0.8782, "step": 4130 }, { "epoch": 0.6295336787564767, "grad_norm": 0.921875, "learning_rate": 6.375268220160033e-05, "loss": 0.8111, "step": 4131 }, { "epoch": 0.6296860713197195, "grad_norm": 1.2734375, "learning_rate": 6.370668589370765e-05, "loss": 0.8524, "step": 4132 }, { "epoch": 0.6298384638829625, "grad_norm": 0.703125, "learning_rate": 6.366069842738813e-05, "loss": 0.913, "step": 4133 }, { "epoch": 0.6299908564462054, "grad_norm": 0.9296875, "learning_rate": 6.36147198138451e-05, "loss": 1.1668, "step": 4134 }, { "epoch": 0.6301432490094483, "grad_norm": 0.90234375, "learning_rate": 6.356875006427957e-05, "loss": 1.0, "step": 4135 }, { "epoch": 0.6302956415726912, "grad_norm": 0.83203125, "learning_rate": 6.352278918989034e-05, "loss": 0.8717, "step": 4136 }, { "epoch": 0.6304480341359342, "grad_norm": 1.2421875, "learning_rate": 6.347683720187426e-05, "loss": 1.0691, "step": 4137 }, { "epoch": 0.630600426699177, "grad_norm": 1.2734375, "learning_rate": 6.343089411142587e-05, "loss": 0.9258, "step": 4138 }, { "epoch": 0.63075281926242, "grad_norm": 0.87109375, "learning_rate": 6.338495992973749e-05, "loss": 1.1499, "step": 4139 }, { "epoch": 0.630905211825663, "grad_norm": 1.1171875, "learning_rate": 6.333903466799945e-05, "loss": 0.9343, "step": 4140 }, { "epoch": 0.6310576043889058, "grad_norm": 1.2421875, "learning_rate": 6.329311833739978e-05, "loss": 0.9067, "step": 4141 }, { "epoch": 0.6312099969521487, "grad_norm": 1.1171875, "learning_rate": 6.324721094912427e-05, "loss": 1.0066, "step": 4142 }, { "epoch": 0.6313623895153917, "grad_norm": 0.87109375, "learning_rate": 6.320131251435675e-05, "loss": 1.077, "step": 4143 }, { "epoch": 0.6315147820786345, "grad_norm": 0.94140625, "learning_rate": 6.315542304427866e-05, "loss": 0.926, "step": 4144 }, { "epoch": 0.6316671746418775, "grad_norm": 0.91015625, "learning_rate": 6.31095425500693e-05, "loss": 0.9065, "step": 4145 }, { "epoch": 0.6318195672051203, "grad_norm": 1.2265625, "learning_rate": 6.306367104290594e-05, "loss": 0.9525, "step": 4146 }, { "epoch": 0.6319719597683633, "grad_norm": 1.0390625, "learning_rate": 6.301780853396341e-05, "loss": 1.0481, "step": 4147 }, { "epoch": 0.6321243523316062, "grad_norm": 0.8828125, "learning_rate": 6.297195503441457e-05, "loss": 0.8695, "step": 4148 }, { "epoch": 0.6322767448948491, "grad_norm": 1.1953125, "learning_rate": 6.292611055542998e-05, "loss": 0.8544, "step": 4149 }, { "epoch": 0.632429137458092, "grad_norm": 0.85546875, "learning_rate": 6.28802751081779e-05, "loss": 0.8599, "step": 4150 }, { "epoch": 0.632581530021335, "grad_norm": 1.234375, "learning_rate": 6.28344487038247e-05, "loss": 0.9593, "step": 4151 }, { "epoch": 0.6327339225845778, "grad_norm": 1.1328125, "learning_rate": 6.278863135353421e-05, "loss": 1.0365, "step": 4152 }, { "epoch": 0.6328863151478208, "grad_norm": 1.0078125, "learning_rate": 6.274282306846819e-05, "loss": 0.9822, "step": 4153 }, { "epoch": 0.6330387077110637, "grad_norm": 0.9765625, "learning_rate": 6.269702385978627e-05, "loss": 1.0944, "step": 4154 }, { "epoch": 0.6331911002743066, "grad_norm": 0.8828125, "learning_rate": 6.26512337386458e-05, "loss": 0.9009, "step": 4155 }, { "epoch": 0.6333434928375495, "grad_norm": 0.94140625, "learning_rate": 6.260545271620181e-05, "loss": 1.0483, "step": 4156 }, { "epoch": 0.6334958854007925, "grad_norm": 1.171875, "learning_rate": 6.255968080360733e-05, "loss": 1.0348, "step": 4157 }, { "epoch": 0.6336482779640353, "grad_norm": 0.66796875, "learning_rate": 6.251391801201301e-05, "loss": 0.8636, "step": 4158 }, { "epoch": 0.6338006705272783, "grad_norm": 1.5078125, "learning_rate": 6.246816435256725e-05, "loss": 1.0245, "step": 4159 }, { "epoch": 0.6339530630905211, "grad_norm": 0.77734375, "learning_rate": 6.242241983641645e-05, "loss": 0.9503, "step": 4160 }, { "epoch": 0.6341054556537641, "grad_norm": 0.765625, "learning_rate": 6.237668447470451e-05, "loss": 0.9564, "step": 4161 }, { "epoch": 0.634257848217007, "grad_norm": 0.85546875, "learning_rate": 6.23309582785733e-05, "loss": 1.027, "step": 4162 }, { "epoch": 0.6344102407802499, "grad_norm": 0.921875, "learning_rate": 6.228524125916234e-05, "loss": 1.0883, "step": 4163 }, { "epoch": 0.6345626333434928, "grad_norm": 0.9140625, "learning_rate": 6.223953342760891e-05, "loss": 0.9465, "step": 4164 }, { "epoch": 0.6347150259067358, "grad_norm": 0.76953125, "learning_rate": 6.219383479504818e-05, "loss": 1.0289, "step": 4165 }, { "epoch": 0.6348674184699786, "grad_norm": 1.140625, "learning_rate": 6.214814537261297e-05, "loss": 0.9374, "step": 4166 }, { "epoch": 0.6350198110332216, "grad_norm": 1.2734375, "learning_rate": 6.210246517143384e-05, "loss": 1.0508, "step": 4167 }, { "epoch": 0.6351722035964645, "grad_norm": 0.90625, "learning_rate": 6.205679420263916e-05, "loss": 0.9357, "step": 4168 }, { "epoch": 0.6353245961597074, "grad_norm": 1.0390625, "learning_rate": 6.201113247735511e-05, "loss": 0.8576, "step": 4169 }, { "epoch": 0.6354769887229503, "grad_norm": 0.9375, "learning_rate": 6.196548000670538e-05, "loss": 0.9654, "step": 4170 }, { "epoch": 0.6356293812861933, "grad_norm": 0.953125, "learning_rate": 6.191983680181176e-05, "loss": 1.0692, "step": 4171 }, { "epoch": 0.6357817738494361, "grad_norm": 0.90234375, "learning_rate": 6.187420287379346e-05, "loss": 1.0567, "step": 4172 }, { "epoch": 0.6359341664126791, "grad_norm": 0.84765625, "learning_rate": 6.182857823376758e-05, "loss": 0.8926, "step": 4173 }, { "epoch": 0.6360865589759219, "grad_norm": 0.95703125, "learning_rate": 6.178296289284899e-05, "loss": 0.8675, "step": 4174 }, { "epoch": 0.6362389515391649, "grad_norm": 0.890625, "learning_rate": 6.17373568621502e-05, "loss": 1.0562, "step": 4175 }, { "epoch": 0.6363913441024078, "grad_norm": 0.85546875, "learning_rate": 6.169176015278146e-05, "loss": 0.7949, "step": 4176 }, { "epoch": 0.6365437366656507, "grad_norm": 1.140625, "learning_rate": 6.16461727758509e-05, "loss": 1.1476, "step": 4177 }, { "epoch": 0.6366961292288936, "grad_norm": 0.86328125, "learning_rate": 6.160059474246413e-05, "loss": 1.0888, "step": 4178 }, { "epoch": 0.6368485217921366, "grad_norm": 1.015625, "learning_rate": 6.155502606372471e-05, "loss": 0.9498, "step": 4179 }, { "epoch": 0.6370009143553794, "grad_norm": 0.69140625, "learning_rate": 6.150946675073382e-05, "loss": 0.8749, "step": 4180 }, { "epoch": 0.6371533069186224, "grad_norm": 0.9453125, "learning_rate": 6.146391681459025e-05, "loss": 0.8197, "step": 4181 }, { "epoch": 0.6373056994818653, "grad_norm": 1.078125, "learning_rate": 6.141837626639075e-05, "loss": 1.136, "step": 4182 }, { "epoch": 0.6374580920451082, "grad_norm": 0.96875, "learning_rate": 6.137284511722964e-05, "loss": 0.9917, "step": 4183 }, { "epoch": 0.6376104846083511, "grad_norm": 0.98828125, "learning_rate": 6.132732337819886e-05, "loss": 1.0051, "step": 4184 }, { "epoch": 0.6377628771715941, "grad_norm": 0.828125, "learning_rate": 6.128181106038828e-05, "loss": 0.8513, "step": 4185 }, { "epoch": 0.6379152697348369, "grad_norm": 0.94921875, "learning_rate": 6.123630817488529e-05, "loss": 0.8334, "step": 4186 }, { "epoch": 0.6380676622980799, "grad_norm": 0.9921875, "learning_rate": 6.119081473277501e-05, "loss": 1.1312, "step": 4187 }, { "epoch": 0.6382200548613227, "grad_norm": 1.1171875, "learning_rate": 6.114533074514043e-05, "loss": 1.1097, "step": 4188 }, { "epoch": 0.6383724474245657, "grad_norm": 0.91796875, "learning_rate": 6.1099856223062e-05, "loss": 0.9465, "step": 4189 }, { "epoch": 0.6385248399878086, "grad_norm": 1.6640625, "learning_rate": 6.105439117761793e-05, "loss": 1.0803, "step": 4190 }, { "epoch": 0.6386772325510515, "grad_norm": 0.921875, "learning_rate": 6.100893561988428e-05, "loss": 0.9136, "step": 4191 }, { "epoch": 0.6388296251142944, "grad_norm": 0.9375, "learning_rate": 6.09634895609346e-05, "loss": 0.9774, "step": 4192 }, { "epoch": 0.6389820176775374, "grad_norm": 1.2890625, "learning_rate": 6.091805301184017e-05, "loss": 0.8753, "step": 4193 }, { "epoch": 0.6391344102407802, "grad_norm": 0.91015625, "learning_rate": 6.087262598367011e-05, "loss": 1.0618, "step": 4194 }, { "epoch": 0.6392868028040232, "grad_norm": 0.71484375, "learning_rate": 6.082720848749095e-05, "loss": 0.8017, "step": 4195 }, { "epoch": 0.6394391953672661, "grad_norm": 1.0625, "learning_rate": 6.078180053436716e-05, "loss": 0.8793, "step": 4196 }, { "epoch": 0.639591587930509, "grad_norm": 1.1328125, "learning_rate": 6.073640213536076e-05, "loss": 1.2335, "step": 4197 }, { "epoch": 0.6397439804937519, "grad_norm": 0.96875, "learning_rate": 6.069101330153134e-05, "loss": 0.9537, "step": 4198 }, { "epoch": 0.6398963730569949, "grad_norm": 0.7890625, "learning_rate": 6.064563404393642e-05, "loss": 0.9275, "step": 4199 }, { "epoch": 0.6400487656202377, "grad_norm": 1.09375, "learning_rate": 6.0600264373630954e-05, "loss": 0.9147, "step": 4200 }, { "epoch": 0.6402011581834807, "grad_norm": 0.80859375, "learning_rate": 6.0554904301667636e-05, "loss": 0.8379, "step": 4201 }, { "epoch": 0.6403535507467235, "grad_norm": 0.81640625, "learning_rate": 6.050955383909691e-05, "loss": 0.9407, "step": 4202 }, { "epoch": 0.6405059433099665, "grad_norm": 0.72265625, "learning_rate": 6.046421299696674e-05, "loss": 0.9436, "step": 4203 }, { "epoch": 0.6406583358732094, "grad_norm": 1.1640625, "learning_rate": 6.0418881786322755e-05, "loss": 0.8343, "step": 4204 }, { "epoch": 0.6408107284364523, "grad_norm": 1.2578125, "learning_rate": 6.037356021820844e-05, "loss": 1.114, "step": 4205 }, { "epoch": 0.6409631209996952, "grad_norm": 1.046875, "learning_rate": 6.032824830366466e-05, "loss": 0.9214, "step": 4206 }, { "epoch": 0.6411155135629382, "grad_norm": 0.8515625, "learning_rate": 6.028294605373006e-05, "loss": 0.9813, "step": 4207 }, { "epoch": 0.641267906126181, "grad_norm": 1.0703125, "learning_rate": 6.023765347944099e-05, "loss": 1.1048, "step": 4208 }, { "epoch": 0.641420298689424, "grad_norm": 0.87890625, "learning_rate": 6.0192370591831307e-05, "loss": 0.9455, "step": 4209 }, { "epoch": 0.6415726912526669, "grad_norm": 1.2109375, "learning_rate": 6.014709740193254e-05, "loss": 1.1223, "step": 4210 }, { "epoch": 0.6417250838159098, "grad_norm": 0.90625, "learning_rate": 6.010183392077402e-05, "loss": 1.0183, "step": 4211 }, { "epoch": 0.6418774763791527, "grad_norm": 0.93359375, "learning_rate": 6.005658015938244e-05, "loss": 1.1076, "step": 4212 }, { "epoch": 0.6420298689423957, "grad_norm": 0.86328125, "learning_rate": 6.001133612878238e-05, "loss": 0.9109, "step": 4213 }, { "epoch": 0.6421822615056385, "grad_norm": 0.765625, "learning_rate": 5.996610183999587e-05, "loss": 0.8968, "step": 4214 }, { "epoch": 0.6423346540688815, "grad_norm": 0.90234375, "learning_rate": 5.992087730404261e-05, "loss": 0.9629, "step": 4215 }, { "epoch": 0.6424870466321243, "grad_norm": 0.8359375, "learning_rate": 5.9875662531940055e-05, "loss": 0.8748, "step": 4216 }, { "epoch": 0.6426394391953673, "grad_norm": 0.86328125, "learning_rate": 5.983045753470308e-05, "loss": 0.8613, "step": 4217 }, { "epoch": 0.6427918317586102, "grad_norm": 1.34375, "learning_rate": 5.978526232334425e-05, "loss": 0.9034, "step": 4218 }, { "epoch": 0.642944224321853, "grad_norm": 0.78125, "learning_rate": 5.974007690887389e-05, "loss": 0.7898, "step": 4219 }, { "epoch": 0.643096616885096, "grad_norm": 0.79296875, "learning_rate": 5.969490130229971e-05, "loss": 0.7723, "step": 4220 }, { "epoch": 0.643249009448339, "grad_norm": 0.85546875, "learning_rate": 5.9649735514627134e-05, "loss": 0.9354, "step": 4221 }, { "epoch": 0.6434014020115818, "grad_norm": 0.84765625, "learning_rate": 5.9604579556859305e-05, "loss": 0.9026, "step": 4222 }, { "epoch": 0.6435537945748248, "grad_norm": 0.953125, "learning_rate": 5.955943343999677e-05, "loss": 0.9937, "step": 4223 }, { "epoch": 0.6437061871380677, "grad_norm": 0.84375, "learning_rate": 5.9514297175037745e-05, "loss": 0.9252, "step": 4224 }, { "epoch": 0.6438585797013106, "grad_norm": 0.85546875, "learning_rate": 5.9469170772978186e-05, "loss": 0.9708, "step": 4225 }, { "epoch": 0.6440109722645535, "grad_norm": 0.9765625, "learning_rate": 5.9424054244811414e-05, "loss": 1.0273, "step": 4226 }, { "epoch": 0.6441633648277965, "grad_norm": 1.1328125, "learning_rate": 5.9378947601528576e-05, "loss": 0.9478, "step": 4227 }, { "epoch": 0.6443157573910393, "grad_norm": 1.0234375, "learning_rate": 5.933385085411824e-05, "loss": 1.0232, "step": 4228 }, { "epoch": 0.6444681499542823, "grad_norm": 0.7578125, "learning_rate": 5.928876401356657e-05, "loss": 0.9784, "step": 4229 }, { "epoch": 0.6446205425175251, "grad_norm": 0.87890625, "learning_rate": 5.924368709085748e-05, "loss": 0.8702, "step": 4230 }, { "epoch": 0.644772935080768, "grad_norm": 0.90625, "learning_rate": 5.919862009697229e-05, "loss": 0.8149, "step": 4231 }, { "epoch": 0.644925327644011, "grad_norm": 0.87109375, "learning_rate": 5.9153563042889934e-05, "loss": 1.0229, "step": 4232 }, { "epoch": 0.6450777202072538, "grad_norm": 1.0, "learning_rate": 5.910851593958707e-05, "loss": 0.9962, "step": 4233 }, { "epoch": 0.6452301127704968, "grad_norm": 0.99609375, "learning_rate": 5.906347879803773e-05, "loss": 1.1458, "step": 4234 }, { "epoch": 0.6453825053337398, "grad_norm": 1.015625, "learning_rate": 5.90184516292136e-05, "loss": 0.903, "step": 4235 }, { "epoch": 0.6455348978969826, "grad_norm": 0.84375, "learning_rate": 5.897343444408403e-05, "loss": 0.9154, "step": 4236 }, { "epoch": 0.6456872904602255, "grad_norm": 0.77734375, "learning_rate": 5.892842725361578e-05, "loss": 0.9438, "step": 4237 }, { "epoch": 0.6458396830234685, "grad_norm": 0.7421875, "learning_rate": 5.8883430068773236e-05, "loss": 0.8549, "step": 4238 }, { "epoch": 0.6459920755867113, "grad_norm": 0.83984375, "learning_rate": 5.8838442900518454e-05, "loss": 0.8328, "step": 4239 }, { "epoch": 0.6461444681499543, "grad_norm": 1.0234375, "learning_rate": 5.8793465759810864e-05, "loss": 1.1672, "step": 4240 }, { "epoch": 0.6462968607131971, "grad_norm": 0.98828125, "learning_rate": 5.8748498657607564e-05, "loss": 0.939, "step": 4241 }, { "epoch": 0.6464492532764401, "grad_norm": 1.0703125, "learning_rate": 5.870354160486322e-05, "loss": 0.977, "step": 4242 }, { "epoch": 0.646601645839683, "grad_norm": 1.0078125, "learning_rate": 5.865859461252996e-05, "loss": 1.0238, "step": 4243 }, { "epoch": 0.6467540384029259, "grad_norm": 1.0703125, "learning_rate": 5.86136576915576e-05, "loss": 0.8067, "step": 4244 }, { "epoch": 0.6469064309661688, "grad_norm": 0.94140625, "learning_rate": 5.856873085289336e-05, "loss": 0.9646, "step": 4245 }, { "epoch": 0.6470588235294118, "grad_norm": 0.70703125, "learning_rate": 5.8523814107482046e-05, "loss": 0.9037, "step": 4246 }, { "epoch": 0.6472112160926546, "grad_norm": 0.83984375, "learning_rate": 5.84789074662661e-05, "loss": 0.8376, "step": 4247 }, { "epoch": 0.6473636086558976, "grad_norm": 1.2265625, "learning_rate": 5.843401094018536e-05, "loss": 1.0754, "step": 4248 }, { "epoch": 0.6475160012191405, "grad_norm": 1.21875, "learning_rate": 5.838912454017724e-05, "loss": 0.9874, "step": 4249 }, { "epoch": 0.6476683937823834, "grad_norm": 1.015625, "learning_rate": 5.8344248277176835e-05, "loss": 1.0481, "step": 4250 }, { "epoch": 0.6478207863456263, "grad_norm": 0.94921875, "learning_rate": 5.829938216211653e-05, "loss": 1.0363, "step": 4251 }, { "epoch": 0.6479731789088693, "grad_norm": 0.91015625, "learning_rate": 5.8254526205926375e-05, "loss": 1.1204, "step": 4252 }, { "epoch": 0.6481255714721121, "grad_norm": 1.0390625, "learning_rate": 5.8209680419533973e-05, "loss": 0.9738, "step": 4253 }, { "epoch": 0.6482779640353551, "grad_norm": 0.83984375, "learning_rate": 5.816484481386438e-05, "loss": 0.8441, "step": 4254 }, { "epoch": 0.6484303565985979, "grad_norm": 1.0546875, "learning_rate": 5.81200193998402e-05, "loss": 0.8009, "step": 4255 }, { "epoch": 0.6485827491618409, "grad_norm": 0.9375, "learning_rate": 5.8075204188381524e-05, "loss": 0.9067, "step": 4256 }, { "epoch": 0.6487351417250838, "grad_norm": 0.98828125, "learning_rate": 5.8030399190406e-05, "loss": 0.9038, "step": 4257 }, { "epoch": 0.6488875342883267, "grad_norm": 1.1328125, "learning_rate": 5.798560441682874e-05, "loss": 1.0214, "step": 4258 }, { "epoch": 0.6490399268515696, "grad_norm": 1.2265625, "learning_rate": 5.794081987856246e-05, "loss": 0.9428, "step": 4259 }, { "epoch": 0.6491923194148126, "grad_norm": 1.1015625, "learning_rate": 5.7896045586517264e-05, "loss": 1.1085, "step": 4260 }, { "epoch": 0.6493447119780554, "grad_norm": 0.77734375, "learning_rate": 5.7851281551600846e-05, "loss": 1.0319, "step": 4261 }, { "epoch": 0.6494971045412984, "grad_norm": 1.0546875, "learning_rate": 5.7806527784718336e-05, "loss": 1.0999, "step": 4262 }, { "epoch": 0.6496494971045413, "grad_norm": 0.859375, "learning_rate": 5.7761784296772395e-05, "loss": 1.0411, "step": 4263 }, { "epoch": 0.6498018896677842, "grad_norm": 0.984375, "learning_rate": 5.7717051098663214e-05, "loss": 1.0052, "step": 4264 }, { "epoch": 0.6499542822310271, "grad_norm": 1.078125, "learning_rate": 5.7672328201288425e-05, "loss": 1.0767, "step": 4265 }, { "epoch": 0.6501066747942701, "grad_norm": 0.96484375, "learning_rate": 5.762761561554317e-05, "loss": 1.1239, "step": 4266 }, { "epoch": 0.6502590673575129, "grad_norm": 1.0390625, "learning_rate": 5.758291335232009e-05, "loss": 1.168, "step": 4267 }, { "epoch": 0.6504114599207559, "grad_norm": 0.88671875, "learning_rate": 5.753822142250928e-05, "loss": 0.8794, "step": 4268 }, { "epoch": 0.6505638524839987, "grad_norm": 0.68359375, "learning_rate": 5.74935398369983e-05, "loss": 0.9115, "step": 4269 }, { "epoch": 0.6507162450472417, "grad_norm": 1.0078125, "learning_rate": 5.744886860667231e-05, "loss": 0.8916, "step": 4270 }, { "epoch": 0.6508686376104846, "grad_norm": 1.109375, "learning_rate": 5.7404207742413815e-05, "loss": 1.0132, "step": 4271 }, { "epoch": 0.6510210301737275, "grad_norm": 0.96875, "learning_rate": 5.735955725510287e-05, "loss": 0.7195, "step": 4272 }, { "epoch": 0.6511734227369704, "grad_norm": 0.80859375, "learning_rate": 5.731491715561694e-05, "loss": 0.9501, "step": 4273 }, { "epoch": 0.6513258153002134, "grad_norm": 0.9453125, "learning_rate": 5.727028745483103e-05, "loss": 1.0459, "step": 4274 }, { "epoch": 0.6514782078634562, "grad_norm": 0.89453125, "learning_rate": 5.722566816361752e-05, "loss": 0.9695, "step": 4275 }, { "epoch": 0.6516306004266992, "grad_norm": 1.5234375, "learning_rate": 5.718105929284638e-05, "loss": 0.9802, "step": 4276 }, { "epoch": 0.6517829929899421, "grad_norm": 1.0234375, "learning_rate": 5.713646085338496e-05, "loss": 1.0023, "step": 4277 }, { "epoch": 0.651935385553185, "grad_norm": 1.390625, "learning_rate": 5.709187285609805e-05, "loss": 1.1041, "step": 4278 }, { "epoch": 0.6520877781164279, "grad_norm": 0.9140625, "learning_rate": 5.704729531184794e-05, "loss": 1.0665, "step": 4279 }, { "epoch": 0.6522401706796709, "grad_norm": 0.859375, "learning_rate": 5.700272823149433e-05, "loss": 0.8006, "step": 4280 }, { "epoch": 0.6523925632429137, "grad_norm": 1.1953125, "learning_rate": 5.695817162589447e-05, "loss": 1.1684, "step": 4281 }, { "epoch": 0.6525449558061567, "grad_norm": 0.7265625, "learning_rate": 5.691362550590297e-05, "loss": 1.0639, "step": 4282 }, { "epoch": 0.6526973483693995, "grad_norm": 1.25, "learning_rate": 5.686908988237187e-05, "loss": 0.8393, "step": 4283 }, { "epoch": 0.6528497409326425, "grad_norm": 1.0078125, "learning_rate": 5.6824564766150726e-05, "loss": 1.0792, "step": 4284 }, { "epoch": 0.6530021334958854, "grad_norm": 0.90234375, "learning_rate": 5.678005016808648e-05, "loss": 0.9282, "step": 4285 }, { "epoch": 0.6531545260591283, "grad_norm": 0.90234375, "learning_rate": 5.673554609902346e-05, "loss": 0.9869, "step": 4286 }, { "epoch": 0.6533069186223712, "grad_norm": 1.0703125, "learning_rate": 5.669105256980363e-05, "loss": 0.931, "step": 4287 }, { "epoch": 0.6534593111856142, "grad_norm": 0.86328125, "learning_rate": 5.6646569591266174e-05, "loss": 0.9028, "step": 4288 }, { "epoch": 0.653611703748857, "grad_norm": 0.92578125, "learning_rate": 5.6602097174247806e-05, "loss": 1.0369, "step": 4289 }, { "epoch": 0.6537640963121, "grad_norm": 1.0390625, "learning_rate": 5.6557635329582645e-05, "loss": 1.1231, "step": 4290 }, { "epoch": 0.6539164888753429, "grad_norm": 0.77734375, "learning_rate": 5.6513184068102224e-05, "loss": 0.9945, "step": 4291 }, { "epoch": 0.6540688814385858, "grad_norm": 0.8828125, "learning_rate": 5.646874340063547e-05, "loss": 0.9989, "step": 4292 }, { "epoch": 0.6542212740018287, "grad_norm": 0.98828125, "learning_rate": 5.642431333800886e-05, "loss": 1.2282, "step": 4293 }, { "epoch": 0.6543736665650717, "grad_norm": 1.046875, "learning_rate": 5.6379893891046154e-05, "loss": 1.0487, "step": 4294 }, { "epoch": 0.6545260591283145, "grad_norm": 0.90234375, "learning_rate": 5.633548507056856e-05, "loss": 1.1273, "step": 4295 }, { "epoch": 0.6546784516915575, "grad_norm": 0.89453125, "learning_rate": 5.6291086887394706e-05, "loss": 0.8272, "step": 4296 }, { "epoch": 0.6548308442548003, "grad_norm": 0.92578125, "learning_rate": 5.62466993523406e-05, "loss": 1.0205, "step": 4297 }, { "epoch": 0.6549832368180433, "grad_norm": 0.8984375, "learning_rate": 5.620232247621975e-05, "loss": 0.9679, "step": 4298 }, { "epoch": 0.6551356293812862, "grad_norm": 0.984375, "learning_rate": 5.615795626984297e-05, "loss": 1.0693, "step": 4299 }, { "epoch": 0.6552880219445291, "grad_norm": 0.9921875, "learning_rate": 5.61136007440185e-05, "loss": 0.9511, "step": 4300 }, { "epoch": 0.655440414507772, "grad_norm": 0.71484375, "learning_rate": 5.606925590955199e-05, "loss": 0.852, "step": 4301 }, { "epoch": 0.655592807071015, "grad_norm": 0.90234375, "learning_rate": 5.6024921777246476e-05, "loss": 0.9692, "step": 4302 }, { "epoch": 0.6557451996342578, "grad_norm": 1.1796875, "learning_rate": 5.5980598357902346e-05, "loss": 0.8901, "step": 4303 }, { "epoch": 0.6558975921975008, "grad_norm": 0.9375, "learning_rate": 5.5936285662317526e-05, "loss": 1.0272, "step": 4304 }, { "epoch": 0.6560499847607437, "grad_norm": 0.8984375, "learning_rate": 5.589198370128718e-05, "loss": 1.0801, "step": 4305 }, { "epoch": 0.6562023773239866, "grad_norm": 0.8828125, "learning_rate": 5.584769248560382e-05, "loss": 1.0309, "step": 4306 }, { "epoch": 0.6563547698872295, "grad_norm": 0.9375, "learning_rate": 5.580341202605752e-05, "loss": 1.0505, "step": 4307 }, { "epoch": 0.6565071624504725, "grad_norm": 0.9375, "learning_rate": 5.5759142333435585e-05, "loss": 0.8539, "step": 4308 }, { "epoch": 0.6566595550137153, "grad_norm": 1.0, "learning_rate": 5.571488341852281e-05, "loss": 0.8944, "step": 4309 }, { "epoch": 0.6568119475769583, "grad_norm": 0.9609375, "learning_rate": 5.567063529210126e-05, "loss": 1.0012, "step": 4310 }, { "epoch": 0.6569643401402011, "grad_norm": 1.0546875, "learning_rate": 5.562639796495043e-05, "loss": 1.1009, "step": 4311 }, { "epoch": 0.6571167327034441, "grad_norm": 0.7109375, "learning_rate": 5.558217144784714e-05, "loss": 0.8205, "step": 4312 }, { "epoch": 0.657269125266687, "grad_norm": 0.8828125, "learning_rate": 5.553795575156565e-05, "loss": 0.8605, "step": 4313 }, { "epoch": 0.6574215178299299, "grad_norm": 0.859375, "learning_rate": 5.549375088687746e-05, "loss": 1.0265, "step": 4314 }, { "epoch": 0.6575739103931728, "grad_norm": 1.1875, "learning_rate": 5.544955686455161e-05, "loss": 1.1376, "step": 4315 }, { "epoch": 0.6577263029564158, "grad_norm": 1.0546875, "learning_rate": 5.5405373695354353e-05, "loss": 1.0277, "step": 4316 }, { "epoch": 0.6578786955196586, "grad_norm": 0.64453125, "learning_rate": 5.5361201390049345e-05, "loss": 0.7974, "step": 4317 }, { "epoch": 0.6580310880829016, "grad_norm": 1.0078125, "learning_rate": 5.5317039959397606e-05, "loss": 0.9395, "step": 4318 }, { "epoch": 0.6581834806461445, "grad_norm": 0.98046875, "learning_rate": 5.527288941415747e-05, "loss": 0.8477, "step": 4319 }, { "epoch": 0.6583358732093874, "grad_norm": 0.796875, "learning_rate": 5.522874976508463e-05, "loss": 1.1312, "step": 4320 }, { "epoch": 0.6584882657726303, "grad_norm": 0.79296875, "learning_rate": 5.51846210229322e-05, "loss": 1.0501, "step": 4321 }, { "epoch": 0.6586406583358733, "grad_norm": 0.90234375, "learning_rate": 5.51405031984506e-05, "loss": 0.9168, "step": 4322 }, { "epoch": 0.6587930508991161, "grad_norm": 0.859375, "learning_rate": 5.5096396302387434e-05, "loss": 0.8754, "step": 4323 }, { "epoch": 0.6589454434623591, "grad_norm": 0.7421875, "learning_rate": 5.5052300345487875e-05, "loss": 1.0222, "step": 4324 }, { "epoch": 0.6590978360256019, "grad_norm": 1.015625, "learning_rate": 5.5008215338494275e-05, "loss": 1.015, "step": 4325 }, { "epoch": 0.6592502285888449, "grad_norm": 0.81640625, "learning_rate": 5.4964141292146464e-05, "loss": 0.9313, "step": 4326 }, { "epoch": 0.6594026211520878, "grad_norm": 0.984375, "learning_rate": 5.492007821718146e-05, "loss": 1.0051, "step": 4327 }, { "epoch": 0.6595550137153307, "grad_norm": 1.2109375, "learning_rate": 5.4876026124333654e-05, "loss": 1.1009, "step": 4328 }, { "epoch": 0.6597074062785736, "grad_norm": 1.0234375, "learning_rate": 5.483198502433479e-05, "loss": 0.8472, "step": 4329 }, { "epoch": 0.6598597988418166, "grad_norm": 0.7890625, "learning_rate": 5.4787954927913886e-05, "loss": 0.9097, "step": 4330 }, { "epoch": 0.6600121914050594, "grad_norm": 0.8046875, "learning_rate": 5.47439358457973e-05, "loss": 0.9587, "step": 4331 }, { "epoch": 0.6601645839683024, "grad_norm": 1.2265625, "learning_rate": 5.469992778870876e-05, "loss": 1.003, "step": 4332 }, { "epoch": 0.6603169765315453, "grad_norm": 0.765625, "learning_rate": 5.465593076736929e-05, "loss": 0.9696, "step": 4333 }, { "epoch": 0.6604693690947882, "grad_norm": 0.796875, "learning_rate": 5.461194479249706e-05, "loss": 0.9347, "step": 4334 }, { "epoch": 0.6606217616580311, "grad_norm": 0.87109375, "learning_rate": 5.4567969874807813e-05, "loss": 0.9861, "step": 4335 }, { "epoch": 0.6607741542212741, "grad_norm": 0.97265625, "learning_rate": 5.4524006025014426e-05, "loss": 1.082, "step": 4336 }, { "epoch": 0.6609265467845169, "grad_norm": 0.91015625, "learning_rate": 5.448005325382709e-05, "loss": 1.0244, "step": 4337 }, { "epoch": 0.6610789393477599, "grad_norm": 0.8125, "learning_rate": 5.443611157195341e-05, "loss": 0.8713, "step": 4338 }, { "epoch": 0.6612313319110027, "grad_norm": 0.8671875, "learning_rate": 5.439218099009822e-05, "loss": 0.9312, "step": 4339 }, { "epoch": 0.6613837244742456, "grad_norm": 0.95703125, "learning_rate": 5.4348261518963496e-05, "loss": 0.9813, "step": 4340 }, { "epoch": 0.6615361170374886, "grad_norm": 0.9765625, "learning_rate": 5.4304353169248804e-05, "loss": 0.9789, "step": 4341 }, { "epoch": 0.6616885096007314, "grad_norm": 0.83984375, "learning_rate": 5.426045595165075e-05, "loss": 0.9445, "step": 4342 }, { "epoch": 0.6618409021639744, "grad_norm": 0.8203125, "learning_rate": 5.421656987686341e-05, "loss": 0.9718, "step": 4343 }, { "epoch": 0.6619932947272174, "grad_norm": 0.93359375, "learning_rate": 5.417269495557802e-05, "loss": 0.8309, "step": 4344 }, { "epoch": 0.6621456872904602, "grad_norm": 0.94140625, "learning_rate": 5.412883119848315e-05, "loss": 0.8985, "step": 4345 }, { "epoch": 0.6622980798537031, "grad_norm": 0.859375, "learning_rate": 5.408497861626465e-05, "loss": 0.8903, "step": 4346 }, { "epoch": 0.6624504724169461, "grad_norm": 0.9296875, "learning_rate": 5.404113721960562e-05, "loss": 1.0117, "step": 4347 }, { "epoch": 0.6626028649801889, "grad_norm": 1.140625, "learning_rate": 5.3997307019186426e-05, "loss": 0.9901, "step": 4348 }, { "epoch": 0.6627552575434319, "grad_norm": 1.1953125, "learning_rate": 5.3953488025684815e-05, "loss": 1.1064, "step": 4349 }, { "epoch": 0.6629076501066747, "grad_norm": 0.8515625, "learning_rate": 5.390968024977572e-05, "loss": 0.9518, "step": 4350 }, { "epoch": 0.6630600426699177, "grad_norm": 0.94921875, "learning_rate": 5.386588370213124e-05, "loss": 0.9827, "step": 4351 }, { "epoch": 0.6632124352331606, "grad_norm": 0.7109375, "learning_rate": 5.382209839342093e-05, "loss": 0.8854, "step": 4352 }, { "epoch": 0.6633648277964035, "grad_norm": 0.7734375, "learning_rate": 5.3778324334311516e-05, "loss": 0.9739, "step": 4353 }, { "epoch": 0.6635172203596464, "grad_norm": 0.875, "learning_rate": 5.373456153546692e-05, "loss": 1.0709, "step": 4354 }, { "epoch": 0.6636696129228894, "grad_norm": 0.8203125, "learning_rate": 5.3690810007548485e-05, "loss": 1.1013, "step": 4355 }, { "epoch": 0.6638220054861322, "grad_norm": 1.109375, "learning_rate": 5.364706976121472e-05, "loss": 1.1039, "step": 4356 }, { "epoch": 0.6639743980493752, "grad_norm": 0.91796875, "learning_rate": 5.360334080712124e-05, "loss": 1.0319, "step": 4357 }, { "epoch": 0.6641267906126181, "grad_norm": 0.8671875, "learning_rate": 5.355962315592118e-05, "loss": 0.703, "step": 4358 }, { "epoch": 0.664279183175861, "grad_norm": 1.28125, "learning_rate": 5.351591681826471e-05, "loss": 0.9887, "step": 4359 }, { "epoch": 0.6644315757391039, "grad_norm": 0.7109375, "learning_rate": 5.347222180479938e-05, "loss": 0.9156, "step": 4360 }, { "epoch": 0.6645839683023469, "grad_norm": 1.046875, "learning_rate": 5.342853812616996e-05, "loss": 0.8573, "step": 4361 }, { "epoch": 0.6647363608655897, "grad_norm": 0.8984375, "learning_rate": 5.338486579301827e-05, "loss": 1.1177, "step": 4362 }, { "epoch": 0.6648887534288327, "grad_norm": 0.72265625, "learning_rate": 5.334120481598366e-05, "loss": 0.8453, "step": 4363 }, { "epoch": 0.6650411459920755, "grad_norm": 0.74609375, "learning_rate": 5.3297555205702507e-05, "loss": 0.9417, "step": 4364 }, { "epoch": 0.6651935385553185, "grad_norm": 0.94921875, "learning_rate": 5.325391697280847e-05, "loss": 0.9208, "step": 4365 }, { "epoch": 0.6653459311185614, "grad_norm": 0.86328125, "learning_rate": 5.3210290127932516e-05, "loss": 0.8745, "step": 4366 }, { "epoch": 0.6654983236818043, "grad_norm": 0.859375, "learning_rate": 5.316667468170278e-05, "loss": 0.8955, "step": 4367 }, { "epoch": 0.6656507162450472, "grad_norm": 0.83203125, "learning_rate": 5.3123070644744475e-05, "loss": 0.899, "step": 4368 }, { "epoch": 0.6658031088082902, "grad_norm": 0.91796875, "learning_rate": 5.307947802768032e-05, "loss": 0.9364, "step": 4369 }, { "epoch": 0.665955501371533, "grad_norm": 1.140625, "learning_rate": 5.303589684113003e-05, "loss": 0.9869, "step": 4370 }, { "epoch": 0.666107893934776, "grad_norm": 1.109375, "learning_rate": 5.29923270957106e-05, "loss": 1.1329, "step": 4371 }, { "epoch": 0.6662602864980189, "grad_norm": 1.1015625, "learning_rate": 5.29487688020363e-05, "loss": 1.1235, "step": 4372 }, { "epoch": 0.6664126790612618, "grad_norm": 0.6484375, "learning_rate": 5.2905221970718535e-05, "loss": 0.9461, "step": 4373 }, { "epoch": 0.6665650716245047, "grad_norm": 1.0625, "learning_rate": 5.286168661236592e-05, "loss": 1.1184, "step": 4374 }, { "epoch": 0.6667174641877477, "grad_norm": 1.1953125, "learning_rate": 5.281816273758433e-05, "loss": 1.201, "step": 4375 }, { "epoch": 0.6668698567509905, "grad_norm": 1.0703125, "learning_rate": 5.277465035697673e-05, "loss": 0.8836, "step": 4376 }, { "epoch": 0.6670222493142335, "grad_norm": 0.94140625, "learning_rate": 5.273114948114346e-05, "loss": 0.9149, "step": 4377 }, { "epoch": 0.6671746418774763, "grad_norm": 0.89453125, "learning_rate": 5.268766012068196e-05, "loss": 0.938, "step": 4378 }, { "epoch": 0.6673270344407193, "grad_norm": 0.94140625, "learning_rate": 5.264418228618673e-05, "loss": 1.1398, "step": 4379 }, { "epoch": 0.6674794270039622, "grad_norm": 0.85546875, "learning_rate": 5.260071598824974e-05, "loss": 1.0615, "step": 4380 }, { "epoch": 0.6676318195672051, "grad_norm": 1.1171875, "learning_rate": 5.255726123745992e-05, "loss": 0.9427, "step": 4381 }, { "epoch": 0.667784212130448, "grad_norm": 1.0703125, "learning_rate": 5.251381804440349e-05, "loss": 0.9301, "step": 4382 }, { "epoch": 0.667936604693691, "grad_norm": 0.73046875, "learning_rate": 5.247038641966386e-05, "loss": 1.0708, "step": 4383 }, { "epoch": 0.6680889972569338, "grad_norm": 0.95703125, "learning_rate": 5.242696637382165e-05, "loss": 0.9749, "step": 4384 }, { "epoch": 0.6682413898201768, "grad_norm": 1.0234375, "learning_rate": 5.238355791745446e-05, "loss": 0.9129, "step": 4385 }, { "epoch": 0.6683937823834197, "grad_norm": 1.0703125, "learning_rate": 5.2340161061137324e-05, "loss": 1.009, "step": 4386 }, { "epoch": 0.6685461749466626, "grad_norm": 0.8671875, "learning_rate": 5.2296775815442334e-05, "loss": 1.1514, "step": 4387 }, { "epoch": 0.6686985675099055, "grad_norm": 0.96484375, "learning_rate": 5.225340219093869e-05, "loss": 0.8692, "step": 4388 }, { "epoch": 0.6688509600731485, "grad_norm": 1.1484375, "learning_rate": 5.2210040198192956e-05, "loss": 1.067, "step": 4389 }, { "epoch": 0.6690033526363913, "grad_norm": 1.1328125, "learning_rate": 5.21666898477686e-05, "loss": 0.7778, "step": 4390 }, { "epoch": 0.6691557451996343, "grad_norm": 0.66796875, "learning_rate": 5.212335115022647e-05, "loss": 0.9345, "step": 4391 }, { "epoch": 0.6693081377628771, "grad_norm": 1.0390625, "learning_rate": 5.208002411612449e-05, "loss": 1.0448, "step": 4392 }, { "epoch": 0.6694605303261201, "grad_norm": 0.97265625, "learning_rate": 5.2036708756017696e-05, "loss": 1.2466, "step": 4393 }, { "epoch": 0.669612922889363, "grad_norm": 0.84375, "learning_rate": 5.199340508045842e-05, "loss": 0.7403, "step": 4394 }, { "epoch": 0.6697653154526059, "grad_norm": 0.9609375, "learning_rate": 5.1950113099996046e-05, "loss": 1.0688, "step": 4395 }, { "epoch": 0.6699177080158488, "grad_norm": 1.0546875, "learning_rate": 5.190683282517701e-05, "loss": 1.1098, "step": 4396 }, { "epoch": 0.6700701005790918, "grad_norm": 0.98828125, "learning_rate": 5.1863564266545125e-05, "loss": 0.9971, "step": 4397 }, { "epoch": 0.6702224931423346, "grad_norm": 0.796875, "learning_rate": 5.1820307434641216e-05, "loss": 0.9244, "step": 4398 }, { "epoch": 0.6703748857055776, "grad_norm": 0.77734375, "learning_rate": 5.177706234000319e-05, "loss": 0.899, "step": 4399 }, { "epoch": 0.6705272782688205, "grad_norm": 1.0625, "learning_rate": 5.17338289931663e-05, "loss": 1.1203, "step": 4400 }, { "epoch": 0.6706796708320634, "grad_norm": 0.78125, "learning_rate": 5.169060740466277e-05, "loss": 0.9709, "step": 4401 }, { "epoch": 0.6708320633953063, "grad_norm": 0.8359375, "learning_rate": 5.164739758502191e-05, "loss": 0.8882, "step": 4402 }, { "epoch": 0.6709844559585493, "grad_norm": 0.921875, "learning_rate": 5.160419954477036e-05, "loss": 0.9836, "step": 4403 }, { "epoch": 0.6711368485217921, "grad_norm": 1.015625, "learning_rate": 5.156101329443173e-05, "loss": 1.0681, "step": 4404 }, { "epoch": 0.6712892410850351, "grad_norm": 1.0625, "learning_rate": 5.15178388445268e-05, "loss": 1.0499, "step": 4405 }, { "epoch": 0.6714416336482779, "grad_norm": 1.03125, "learning_rate": 5.147467620557359e-05, "loss": 0.8915, "step": 4406 }, { "epoch": 0.6715940262115209, "grad_norm": 0.84375, "learning_rate": 5.1431525388086975e-05, "loss": 0.9441, "step": 4407 }, { "epoch": 0.6717464187747638, "grad_norm": 1.1171875, "learning_rate": 5.1388386402579255e-05, "loss": 1.1322, "step": 4408 }, { "epoch": 0.6718988113380067, "grad_norm": 1.2109375, "learning_rate": 5.1345259259559666e-05, "loss": 1.0276, "step": 4409 }, { "epoch": 0.6720512039012496, "grad_norm": 1.0078125, "learning_rate": 5.130214396953452e-05, "loss": 0.8943, "step": 4410 }, { "epoch": 0.6722035964644926, "grad_norm": 1.03125, "learning_rate": 5.125904054300745e-05, "loss": 1.0037, "step": 4411 }, { "epoch": 0.6723559890277354, "grad_norm": 0.703125, "learning_rate": 5.1215948990479044e-05, "loss": 0.9339, "step": 4412 }, { "epoch": 0.6725083815909784, "grad_norm": 1.1171875, "learning_rate": 5.117286932244692e-05, "loss": 0.8885, "step": 4413 }, { "epoch": 0.6726607741542213, "grad_norm": 0.68359375, "learning_rate": 5.112980154940601e-05, "loss": 0.8857, "step": 4414 }, { "epoch": 0.6728131667174642, "grad_norm": 0.8125, "learning_rate": 5.108674568184822e-05, "loss": 1.0512, "step": 4415 }, { "epoch": 0.6729655592807071, "grad_norm": 1.1875, "learning_rate": 5.1043701730262506e-05, "loss": 1.0887, "step": 4416 }, { "epoch": 0.6731179518439501, "grad_norm": 0.9765625, "learning_rate": 5.100066970513514e-05, "loss": 1.0959, "step": 4417 }, { "epoch": 0.6732703444071929, "grad_norm": 0.78515625, "learning_rate": 5.095764961694922e-05, "loss": 0.8675, "step": 4418 }, { "epoch": 0.6734227369704359, "grad_norm": 0.9921875, "learning_rate": 5.091464147618505e-05, "loss": 1.0217, "step": 4419 }, { "epoch": 0.6735751295336787, "grad_norm": 1.15625, "learning_rate": 5.0871645293320136e-05, "loss": 0.9237, "step": 4420 }, { "epoch": 0.6737275220969217, "grad_norm": 0.8125, "learning_rate": 5.082866107882889e-05, "loss": 0.8877, "step": 4421 }, { "epoch": 0.6738799146601646, "grad_norm": 1.1796875, "learning_rate": 5.078568884318287e-05, "loss": 1.0432, "step": 4422 }, { "epoch": 0.6740323072234075, "grad_norm": 1.421875, "learning_rate": 5.0742728596850844e-05, "loss": 0.9272, "step": 4423 }, { "epoch": 0.6741846997866504, "grad_norm": 1.015625, "learning_rate": 5.0699780350298374e-05, "loss": 0.8997, "step": 4424 }, { "epoch": 0.6743370923498934, "grad_norm": 0.92578125, "learning_rate": 5.065684411398841e-05, "loss": 0.9741, "step": 4425 }, { "epoch": 0.6744894849131362, "grad_norm": 1.0859375, "learning_rate": 5.061391989838078e-05, "loss": 1.2165, "step": 4426 }, { "epoch": 0.6746418774763792, "grad_norm": 1.0078125, "learning_rate": 5.057100771393242e-05, "loss": 1.0387, "step": 4427 }, { "epoch": 0.6747942700396221, "grad_norm": 0.95703125, "learning_rate": 5.05281075710974e-05, "loss": 1.0537, "step": 4428 }, { "epoch": 0.674946662602865, "grad_norm": 0.8515625, "learning_rate": 5.048521948032685e-05, "loss": 0.8983, "step": 4429 }, { "epoch": 0.6750990551661079, "grad_norm": 0.9609375, "learning_rate": 5.0442343452068796e-05, "loss": 1.0447, "step": 4430 }, { "epoch": 0.6752514477293509, "grad_norm": 1.0234375, "learning_rate": 5.0399479496768554e-05, "loss": 1.1976, "step": 4431 }, { "epoch": 0.6754038402925937, "grad_norm": 0.80859375, "learning_rate": 5.035662762486838e-05, "loss": 0.8116, "step": 4432 }, { "epoch": 0.6755562328558367, "grad_norm": 0.7890625, "learning_rate": 5.031378784680756e-05, "loss": 0.8615, "step": 4433 }, { "epoch": 0.6757086254190795, "grad_norm": 0.89453125, "learning_rate": 5.0270960173022604e-05, "loss": 0.7718, "step": 4434 }, { "epoch": 0.6758610179823225, "grad_norm": 0.97265625, "learning_rate": 5.022814461394683e-05, "loss": 0.9413, "step": 4435 }, { "epoch": 0.6760134105455654, "grad_norm": 0.91015625, "learning_rate": 5.0185341180010725e-05, "loss": 0.807, "step": 4436 }, { "epoch": 0.6761658031088082, "grad_norm": 1.21875, "learning_rate": 5.0142549881641884e-05, "loss": 1.0375, "step": 4437 }, { "epoch": 0.6763181956720512, "grad_norm": 1.0, "learning_rate": 5.0099770729264816e-05, "loss": 0.9423, "step": 4438 }, { "epoch": 0.6764705882352942, "grad_norm": 1.03125, "learning_rate": 5.0057003733301224e-05, "loss": 0.9681, "step": 4439 }, { "epoch": 0.676622980798537, "grad_norm": 0.9296875, "learning_rate": 5.001424890416975e-05, "loss": 1.1047, "step": 4440 }, { "epoch": 0.67677537336178, "grad_norm": 0.703125, "learning_rate": 4.997150625228597e-05, "loss": 0.7202, "step": 4441 }, { "epoch": 0.6769277659250229, "grad_norm": 1.078125, "learning_rate": 4.992877578806273e-05, "loss": 1.0377, "step": 4442 }, { "epoch": 0.6770801584882657, "grad_norm": 0.83984375, "learning_rate": 4.9886057521909744e-05, "loss": 0.9107, "step": 4443 }, { "epoch": 0.6772325510515087, "grad_norm": 0.96484375, "learning_rate": 4.9843351464233765e-05, "loss": 0.9936, "step": 4444 }, { "epoch": 0.6773849436147517, "grad_norm": 0.8515625, "learning_rate": 4.9800657625438674e-05, "loss": 0.8718, "step": 4445 }, { "epoch": 0.6775373361779945, "grad_norm": 0.9375, "learning_rate": 4.97579760159253e-05, "loss": 0.8216, "step": 4446 }, { "epoch": 0.6776897287412375, "grad_norm": 0.97265625, "learning_rate": 4.971530664609139e-05, "loss": 0.904, "step": 4447 }, { "epoch": 0.6778421213044803, "grad_norm": 1.0859375, "learning_rate": 4.967264952633192e-05, "loss": 1.1031, "step": 4448 }, { "epoch": 0.6779945138677232, "grad_norm": 1.375, "learning_rate": 4.9630004667038746e-05, "loss": 1.0274, "step": 4449 }, { "epoch": 0.6781469064309662, "grad_norm": 0.89453125, "learning_rate": 4.9587372078600736e-05, "loss": 0.8154, "step": 4450 }, { "epoch": 0.678299298994209, "grad_norm": 0.98046875, "learning_rate": 4.9544751771403896e-05, "loss": 0.8713, "step": 4451 }, { "epoch": 0.678451691557452, "grad_norm": 0.875, "learning_rate": 4.9502143755831066e-05, "loss": 1.0721, "step": 4452 }, { "epoch": 0.678604084120695, "grad_norm": 1.109375, "learning_rate": 4.945954804226214e-05, "loss": 1.0914, "step": 4453 }, { "epoch": 0.6787564766839378, "grad_norm": 1.4296875, "learning_rate": 4.9416964641074134e-05, "loss": 0.972, "step": 4454 }, { "epoch": 0.6789088692471807, "grad_norm": 0.94140625, "learning_rate": 4.93743935626409e-05, "loss": 1.0527, "step": 4455 }, { "epoch": 0.6790612618104237, "grad_norm": 1.015625, "learning_rate": 4.9331834817333445e-05, "loss": 1.0393, "step": 4456 }, { "epoch": 0.6792136543736665, "grad_norm": 0.9140625, "learning_rate": 4.9289288415519686e-05, "loss": 1.0478, "step": 4457 }, { "epoch": 0.6793660469369095, "grad_norm": 0.91796875, "learning_rate": 4.924675436756443e-05, "loss": 1.0457, "step": 4458 }, { "epoch": 0.6795184395001523, "grad_norm": 1.125, "learning_rate": 4.920423268382969e-05, "loss": 1.1506, "step": 4459 }, { "epoch": 0.6796708320633953, "grad_norm": 0.9375, "learning_rate": 4.9161723374674315e-05, "loss": 0.8436, "step": 4460 }, { "epoch": 0.6798232246266382, "grad_norm": 1.171875, "learning_rate": 4.911922645045417e-05, "loss": 1.0368, "step": 4461 }, { "epoch": 0.6799756171898811, "grad_norm": 1.1640625, "learning_rate": 4.907674192152222e-05, "loss": 0.9042, "step": 4462 }, { "epoch": 0.680128009753124, "grad_norm": 0.73046875, "learning_rate": 4.903426979822817e-05, "loss": 0.9615, "step": 4463 }, { "epoch": 0.680280402316367, "grad_norm": 0.8671875, "learning_rate": 4.899181009091888e-05, "loss": 1.1958, "step": 4464 }, { "epoch": 0.6804327948796098, "grad_norm": 0.734375, "learning_rate": 4.894936280993818e-05, "loss": 0.6819, "step": 4465 }, { "epoch": 0.6805851874428528, "grad_norm": 0.83984375, "learning_rate": 4.8906927965626835e-05, "loss": 0.9177, "step": 4466 }, { "epoch": 0.6807375800060957, "grad_norm": 0.8046875, "learning_rate": 4.8864505568322536e-05, "loss": 1.1727, "step": 4467 }, { "epoch": 0.6808899725693386, "grad_norm": 0.94140625, "learning_rate": 4.882209562836009e-05, "loss": 0.838, "step": 4468 }, { "epoch": 0.6810423651325815, "grad_norm": 1.15625, "learning_rate": 4.877969815607106e-05, "loss": 1.0041, "step": 4469 }, { "epoch": 0.6811947576958245, "grad_norm": 1.1640625, "learning_rate": 4.873731316178409e-05, "loss": 1.1463, "step": 4470 }, { "epoch": 0.6813471502590673, "grad_norm": 0.94921875, "learning_rate": 4.869494065582484e-05, "loss": 1.0538, "step": 4471 }, { "epoch": 0.6814995428223103, "grad_norm": 0.89453125, "learning_rate": 4.865258064851579e-05, "loss": 0.8811, "step": 4472 }, { "epoch": 0.6816519353855531, "grad_norm": 0.90234375, "learning_rate": 4.861023315017651e-05, "loss": 1.0536, "step": 4473 }, { "epoch": 0.6818043279487961, "grad_norm": 0.77734375, "learning_rate": 4.8567898171123485e-05, "loss": 0.8977, "step": 4474 }, { "epoch": 0.681956720512039, "grad_norm": 1.0078125, "learning_rate": 4.852557572166999e-05, "loss": 1.1163, "step": 4475 }, { "epoch": 0.6821091130752819, "grad_norm": 0.890625, "learning_rate": 4.84832658121265e-05, "loss": 0.8041, "step": 4476 }, { "epoch": 0.6822615056385248, "grad_norm": 0.8671875, "learning_rate": 4.8440968452800285e-05, "loss": 0.8342, "step": 4477 }, { "epoch": 0.6824138982017678, "grad_norm": 0.87890625, "learning_rate": 4.839868365399555e-05, "loss": 0.8554, "step": 4478 }, { "epoch": 0.6825662907650106, "grad_norm": 1.015625, "learning_rate": 4.8356411426013595e-05, "loss": 1.0438, "step": 4479 }, { "epoch": 0.6827186833282536, "grad_norm": 0.984375, "learning_rate": 4.831415177915243e-05, "loss": 1.0765, "step": 4480 }, { "epoch": 0.6828710758914965, "grad_norm": 0.75390625, "learning_rate": 4.82719047237071e-05, "loss": 0.8306, "step": 4481 }, { "epoch": 0.6830234684547394, "grad_norm": 0.95703125, "learning_rate": 4.822967026996968e-05, "loss": 0.8191, "step": 4482 }, { "epoch": 0.6831758610179823, "grad_norm": 1.390625, "learning_rate": 4.8187448428229064e-05, "loss": 0.8921, "step": 4483 }, { "epoch": 0.6833282535812253, "grad_norm": 0.74609375, "learning_rate": 4.8145239208771044e-05, "loss": 0.8572, "step": 4484 }, { "epoch": 0.6834806461444681, "grad_norm": 1.046875, "learning_rate": 4.810304262187852e-05, "loss": 0.9444, "step": 4485 }, { "epoch": 0.6836330387077111, "grad_norm": 0.9609375, "learning_rate": 4.806085867783105e-05, "loss": 0.941, "step": 4486 }, { "epoch": 0.6837854312709539, "grad_norm": 1.3046875, "learning_rate": 4.801868738690528e-05, "loss": 0.9869, "step": 4487 }, { "epoch": 0.6839378238341969, "grad_norm": 1.0390625, "learning_rate": 4.797652875937481e-05, "loss": 1.0272, "step": 4488 }, { "epoch": 0.6840902163974398, "grad_norm": 0.87109375, "learning_rate": 4.793438280551e-05, "loss": 0.887, "step": 4489 }, { "epoch": 0.6842426089606827, "grad_norm": 0.9375, "learning_rate": 4.789224953557834e-05, "loss": 0.9822, "step": 4490 }, { "epoch": 0.6843950015239256, "grad_norm": 1.109375, "learning_rate": 4.785012895984397e-05, "loss": 1.0233, "step": 4491 }, { "epoch": 0.6845473940871686, "grad_norm": 1.09375, "learning_rate": 4.780802108856807e-05, "loss": 0.9422, "step": 4492 }, { "epoch": 0.6846997866504114, "grad_norm": 0.93359375, "learning_rate": 4.7765925932008815e-05, "loss": 0.9647, "step": 4493 }, { "epoch": 0.6848521792136544, "grad_norm": 0.8125, "learning_rate": 4.772384350042114e-05, "loss": 0.6815, "step": 4494 }, { "epoch": 0.6850045717768973, "grad_norm": 0.9375, "learning_rate": 4.7681773804056895e-05, "loss": 0.9895, "step": 4495 }, { "epoch": 0.6851569643401402, "grad_norm": 0.90625, "learning_rate": 4.763971685316497e-05, "loss": 0.9361, "step": 4496 }, { "epoch": 0.6853093569033831, "grad_norm": 0.734375, "learning_rate": 4.7597672657990945e-05, "loss": 0.9425, "step": 4497 }, { "epoch": 0.6854617494666261, "grad_norm": 0.8828125, "learning_rate": 4.755564122877738e-05, "loss": 0.9708, "step": 4498 }, { "epoch": 0.6856141420298689, "grad_norm": 0.92578125, "learning_rate": 4.7513622575763814e-05, "loss": 0.9858, "step": 4499 }, { "epoch": 0.6857665345931119, "grad_norm": 1.1640625, "learning_rate": 4.747161670918656e-05, "loss": 1.1682, "step": 4500 }, { "epoch": 0.6859189271563547, "grad_norm": 0.8984375, "learning_rate": 4.74296236392788e-05, "loss": 0.9714, "step": 4501 }, { "epoch": 0.6860713197195977, "grad_norm": 0.80859375, "learning_rate": 4.7387643376270776e-05, "loss": 0.9482, "step": 4502 }, { "epoch": 0.6862237122828406, "grad_norm": 0.96484375, "learning_rate": 4.734567593038933e-05, "loss": 0.7831, "step": 4503 }, { "epoch": 0.6863761048460835, "grad_norm": 1.1328125, "learning_rate": 4.730372131185845e-05, "loss": 0.9605, "step": 4504 }, { "epoch": 0.6865284974093264, "grad_norm": 1.59375, "learning_rate": 4.726177953089884e-05, "loss": 1.2765, "step": 4505 }, { "epoch": 0.6866808899725694, "grad_norm": 0.91796875, "learning_rate": 4.72198505977281e-05, "loss": 0.9413, "step": 4506 }, { "epoch": 0.6868332825358122, "grad_norm": 0.88671875, "learning_rate": 4.717793452256081e-05, "loss": 0.9182, "step": 4507 }, { "epoch": 0.6869856750990552, "grad_norm": 0.9140625, "learning_rate": 4.713603131560823e-05, "loss": 0.9572, "step": 4508 }, { "epoch": 0.6871380676622981, "grad_norm": 0.79296875, "learning_rate": 4.7094140987078586e-05, "loss": 0.9313, "step": 4509 }, { "epoch": 0.687290460225541, "grad_norm": 0.91796875, "learning_rate": 4.705226354717703e-05, "loss": 0.9037, "step": 4510 }, { "epoch": 0.6874428527887839, "grad_norm": 0.84765625, "learning_rate": 4.701039900610548e-05, "loss": 0.9849, "step": 4511 }, { "epoch": 0.6875952453520269, "grad_norm": 0.8984375, "learning_rate": 4.69685473740627e-05, "loss": 0.8827, "step": 4512 }, { "epoch": 0.6877476379152697, "grad_norm": 1.1171875, "learning_rate": 4.692670866124446e-05, "loss": 1.0359, "step": 4513 }, { "epoch": 0.6879000304785127, "grad_norm": 1.359375, "learning_rate": 4.688488287784315e-05, "loss": 1.0965, "step": 4514 }, { "epoch": 0.6880524230417555, "grad_norm": 0.83984375, "learning_rate": 4.684307003404815e-05, "loss": 1.0609, "step": 4515 }, { "epoch": 0.6882048156049985, "grad_norm": 0.9609375, "learning_rate": 4.680127014004574e-05, "loss": 1.0089, "step": 4516 }, { "epoch": 0.6883572081682414, "grad_norm": 0.953125, "learning_rate": 4.675948320601894e-05, "loss": 1.0169, "step": 4517 }, { "epoch": 0.6885096007314843, "grad_norm": 1.1875, "learning_rate": 4.671770924214763e-05, "loss": 0.975, "step": 4518 }, { "epoch": 0.6886619932947272, "grad_norm": 0.8671875, "learning_rate": 4.6675948258608574e-05, "loss": 1.0009, "step": 4519 }, { "epoch": 0.6888143858579702, "grad_norm": 1.1328125, "learning_rate": 4.663420026557529e-05, "loss": 1.0721, "step": 4520 }, { "epoch": 0.688966778421213, "grad_norm": 0.80078125, "learning_rate": 4.659246527321828e-05, "loss": 0.9623, "step": 4521 }, { "epoch": 0.689119170984456, "grad_norm": 0.79296875, "learning_rate": 4.6550743291704736e-05, "loss": 0.7994, "step": 4522 }, { "epoch": 0.6892715635476989, "grad_norm": 1.0390625, "learning_rate": 4.650903433119871e-05, "loss": 1.149, "step": 4523 }, { "epoch": 0.6894239561109418, "grad_norm": 1.34375, "learning_rate": 4.646733840186121e-05, "loss": 1.2088, "step": 4524 }, { "epoch": 0.6895763486741847, "grad_norm": 0.9375, "learning_rate": 4.642565551384984e-05, "loss": 1.0603, "step": 4525 }, { "epoch": 0.6897287412374277, "grad_norm": 0.81640625, "learning_rate": 4.638398567731918e-05, "loss": 0.8777, "step": 4526 }, { "epoch": 0.6898811338006705, "grad_norm": 0.89453125, "learning_rate": 4.634232890242064e-05, "loss": 0.7865, "step": 4527 }, { "epoch": 0.6900335263639135, "grad_norm": 1.1171875, "learning_rate": 4.630068519930241e-05, "loss": 0.8973, "step": 4528 }, { "epoch": 0.6901859189271563, "grad_norm": 1.3203125, "learning_rate": 4.6259054578109426e-05, "loss": 0.8521, "step": 4529 }, { "epoch": 0.6903383114903993, "grad_norm": 0.75390625, "learning_rate": 4.621743704898363e-05, "loss": 0.9209, "step": 4530 }, { "epoch": 0.6904907040536422, "grad_norm": 0.77734375, "learning_rate": 4.6175832622063544e-05, "loss": 0.932, "step": 4531 }, { "epoch": 0.690643096616885, "grad_norm": 0.796875, "learning_rate": 4.6134241307484596e-05, "loss": 0.9812, "step": 4532 }, { "epoch": 0.690795489180128, "grad_norm": 0.96875, "learning_rate": 4.609266311537911e-05, "loss": 0.8626, "step": 4533 }, { "epoch": 0.690947881743371, "grad_norm": 1.2265625, "learning_rate": 4.6051098055876085e-05, "loss": 1.0406, "step": 4534 }, { "epoch": 0.6911002743066138, "grad_norm": 1.0, "learning_rate": 4.6009546139101377e-05, "loss": 0.9877, "step": 4535 }, { "epoch": 0.6912526668698568, "grad_norm": 1.171875, "learning_rate": 4.596800737517762e-05, "loss": 0.9377, "step": 4536 }, { "epoch": 0.6914050594330997, "grad_norm": 0.81640625, "learning_rate": 4.592648177422423e-05, "loss": 0.9942, "step": 4537 }, { "epoch": 0.6915574519963426, "grad_norm": 0.8046875, "learning_rate": 4.588496934635751e-05, "loss": 0.9541, "step": 4538 }, { "epoch": 0.6917098445595855, "grad_norm": 1.1328125, "learning_rate": 4.5843470101690446e-05, "loss": 1.0894, "step": 4539 }, { "epoch": 0.6918622371228285, "grad_norm": 0.83984375, "learning_rate": 4.58019840503328e-05, "loss": 0.879, "step": 4540 }, { "epoch": 0.6920146296860713, "grad_norm": 0.890625, "learning_rate": 4.576051120239131e-05, "loss": 0.9601, "step": 4541 }, { "epoch": 0.6921670222493143, "grad_norm": 1.453125, "learning_rate": 4.571905156796923e-05, "loss": 0.9817, "step": 4542 }, { "epoch": 0.6923194148125571, "grad_norm": 0.84765625, "learning_rate": 4.567760515716671e-05, "loss": 0.9264, "step": 4543 }, { "epoch": 0.6924718073758, "grad_norm": 1.09375, "learning_rate": 4.56361719800808e-05, "loss": 0.8931, "step": 4544 }, { "epoch": 0.692624199939043, "grad_norm": 0.92578125, "learning_rate": 4.559475204680516e-05, "loss": 0.9564, "step": 4545 }, { "epoch": 0.6927765925022858, "grad_norm": 0.8046875, "learning_rate": 4.555334536743027e-05, "loss": 0.8447, "step": 4546 }, { "epoch": 0.6929289850655288, "grad_norm": 0.84375, "learning_rate": 4.551195195204341e-05, "loss": 0.8382, "step": 4547 }, { "epoch": 0.6930813776287718, "grad_norm": 0.9140625, "learning_rate": 4.547057181072861e-05, "loss": 1.0053, "step": 4548 }, { "epoch": 0.6932337701920146, "grad_norm": 0.796875, "learning_rate": 4.542920495356663e-05, "loss": 0.8523, "step": 4549 }, { "epoch": 0.6933861627552576, "grad_norm": 1.1953125, "learning_rate": 4.5387851390635094e-05, "loss": 1.0198, "step": 4550 }, { "epoch": 0.6935385553185005, "grad_norm": 0.88671875, "learning_rate": 4.534651113200831e-05, "loss": 0.9937, "step": 4551 }, { "epoch": 0.6936909478817433, "grad_norm": 1.0859375, "learning_rate": 4.530518418775733e-05, "loss": 1.1003, "step": 4552 }, { "epoch": 0.6938433404449863, "grad_norm": 1.140625, "learning_rate": 4.5263870567950014e-05, "loss": 1.0308, "step": 4553 }, { "epoch": 0.6939957330082291, "grad_norm": 0.953125, "learning_rate": 4.522257028265091e-05, "loss": 1.0303, "step": 4554 }, { "epoch": 0.6941481255714721, "grad_norm": 0.953125, "learning_rate": 4.5181283341921445e-05, "loss": 1.0816, "step": 4555 }, { "epoch": 0.694300518134715, "grad_norm": 0.83984375, "learning_rate": 4.514000975581966e-05, "loss": 0.8444, "step": 4556 }, { "epoch": 0.6944529106979579, "grad_norm": 0.70703125, "learning_rate": 4.5098749534400354e-05, "loss": 0.8108, "step": 4557 }, { "epoch": 0.6946053032612008, "grad_norm": 1.296875, "learning_rate": 4.505750268771524e-05, "loss": 1.1919, "step": 4558 }, { "epoch": 0.6947576958244438, "grad_norm": 1.5, "learning_rate": 4.501626922581251e-05, "loss": 1.1244, "step": 4559 }, { "epoch": 0.6949100883876866, "grad_norm": 0.90234375, "learning_rate": 4.497504915873725e-05, "loss": 0.906, "step": 4560 }, { "epoch": 0.6950624809509296, "grad_norm": 0.875, "learning_rate": 4.493384249653132e-05, "loss": 0.9376, "step": 4561 }, { "epoch": 0.6952148735141725, "grad_norm": 1.1875, "learning_rate": 4.489264924923323e-05, "loss": 0.9244, "step": 4562 }, { "epoch": 0.6953672660774154, "grad_norm": 0.71875, "learning_rate": 4.4851469426878237e-05, "loss": 0.8578, "step": 4563 }, { "epoch": 0.6955196586406583, "grad_norm": 0.90234375, "learning_rate": 4.4810303039498334e-05, "loss": 0.7469, "step": 4564 }, { "epoch": 0.6956720512039013, "grad_norm": 1.0546875, "learning_rate": 4.476915009712227e-05, "loss": 1.0396, "step": 4565 }, { "epoch": 0.6958244437671441, "grad_norm": 1.21875, "learning_rate": 4.472801060977543e-05, "loss": 1.1638, "step": 4566 }, { "epoch": 0.6959768363303871, "grad_norm": 0.80859375, "learning_rate": 4.468688458748006e-05, "loss": 0.8377, "step": 4567 }, { "epoch": 0.6961292288936299, "grad_norm": 1.0625, "learning_rate": 4.4645772040255e-05, "loss": 0.7535, "step": 4568 }, { "epoch": 0.6962816214568729, "grad_norm": 0.9453125, "learning_rate": 4.460467297811594e-05, "loss": 0.8798, "step": 4569 }, { "epoch": 0.6964340140201158, "grad_norm": 0.96875, "learning_rate": 4.456358741107512e-05, "loss": 1.0061, "step": 4570 }, { "epoch": 0.6965864065833587, "grad_norm": 0.91796875, "learning_rate": 4.452251534914153e-05, "loss": 1.0061, "step": 4571 }, { "epoch": 0.6967387991466016, "grad_norm": 0.78125, "learning_rate": 4.448145680232105e-05, "loss": 0.9195, "step": 4572 }, { "epoch": 0.6968911917098446, "grad_norm": 0.92578125, "learning_rate": 4.4440411780616056e-05, "loss": 0.8887, "step": 4573 }, { "epoch": 0.6970435842730874, "grad_norm": 0.89453125, "learning_rate": 4.439938029402571e-05, "loss": 0.9755, "step": 4574 }, { "epoch": 0.6971959768363304, "grad_norm": 0.765625, "learning_rate": 4.435836235254587e-05, "loss": 0.8272, "step": 4575 }, { "epoch": 0.6973483693995733, "grad_norm": 1.1953125, "learning_rate": 4.431735796616911e-05, "loss": 0.9449, "step": 4576 }, { "epoch": 0.6975007619628162, "grad_norm": 0.92578125, "learning_rate": 4.4276367144884645e-05, "loss": 0.8302, "step": 4577 }, { "epoch": 0.6976531545260591, "grad_norm": 1.015625, "learning_rate": 4.4235389898678494e-05, "loss": 1.0073, "step": 4578 }, { "epoch": 0.6978055470893021, "grad_norm": 1.171875, "learning_rate": 4.419442623753327e-05, "loss": 1.0179, "step": 4579 }, { "epoch": 0.6979579396525449, "grad_norm": 0.83984375, "learning_rate": 4.415347617142832e-05, "loss": 0.9407, "step": 4580 }, { "epoch": 0.6981103322157879, "grad_norm": 0.86328125, "learning_rate": 4.411253971033967e-05, "loss": 0.9667, "step": 4581 }, { "epoch": 0.6982627247790307, "grad_norm": 1.171875, "learning_rate": 4.407161686424002e-05, "loss": 0.8118, "step": 4582 }, { "epoch": 0.6984151173422737, "grad_norm": 1.1328125, "learning_rate": 4.403070764309871e-05, "loss": 0.9757, "step": 4583 }, { "epoch": 0.6985675099055166, "grad_norm": 0.9609375, "learning_rate": 4.398981205688193e-05, "loss": 0.8922, "step": 4584 }, { "epoch": 0.6987199024687595, "grad_norm": 0.9453125, "learning_rate": 4.3948930115552334e-05, "loss": 1.034, "step": 4585 }, { "epoch": 0.6988722950320024, "grad_norm": 1.140625, "learning_rate": 4.390806182906946e-05, "loss": 0.973, "step": 4586 }, { "epoch": 0.6990246875952454, "grad_norm": 0.92578125, "learning_rate": 4.386720720738931e-05, "loss": 1.1013, "step": 4587 }, { "epoch": 0.6991770801584882, "grad_norm": 0.89453125, "learning_rate": 4.3826366260464635e-05, "loss": 0.8872, "step": 4588 }, { "epoch": 0.6993294727217312, "grad_norm": 0.859375, "learning_rate": 4.378553899824498e-05, "loss": 1.1202, "step": 4589 }, { "epoch": 0.6994818652849741, "grad_norm": 1.125, "learning_rate": 4.3744725430676406e-05, "loss": 1.1415, "step": 4590 }, { "epoch": 0.699634257848217, "grad_norm": 1.03125, "learning_rate": 4.3703925567701676e-05, "loss": 1.1152, "step": 4591 }, { "epoch": 0.6997866504114599, "grad_norm": 0.81640625, "learning_rate": 4.3663139419260233e-05, "loss": 0.8448, "step": 4592 }, { "epoch": 0.6999390429747029, "grad_norm": 0.890625, "learning_rate": 4.3622366995288166e-05, "loss": 1.1535, "step": 4593 }, { "epoch": 0.7000914355379457, "grad_norm": 1.1015625, "learning_rate": 4.358160830571818e-05, "loss": 0.9404, "step": 4594 }, { "epoch": 0.7002438281011887, "grad_norm": 0.9921875, "learning_rate": 4.354086336047977e-05, "loss": 0.9314, "step": 4595 }, { "epoch": 0.7003962206644315, "grad_norm": 1.015625, "learning_rate": 4.3500132169498945e-05, "loss": 1.149, "step": 4596 }, { "epoch": 0.7005486132276745, "grad_norm": 0.86328125, "learning_rate": 4.345941474269839e-05, "loss": 0.8618, "step": 4597 }, { "epoch": 0.7007010057909174, "grad_norm": 0.84765625, "learning_rate": 4.3418711089997476e-05, "loss": 1.025, "step": 4598 }, { "epoch": 0.7008533983541603, "grad_norm": 0.98828125, "learning_rate": 4.337802122131221e-05, "loss": 0.9362, "step": 4599 }, { "epoch": 0.7010057909174032, "grad_norm": 1.03125, "learning_rate": 4.3337345146555164e-05, "loss": 1.0261, "step": 4600 }, { "epoch": 0.7011581834806462, "grad_norm": 0.7890625, "learning_rate": 4.32966828756357e-05, "loss": 0.8493, "step": 4601 }, { "epoch": 0.701310576043889, "grad_norm": 0.921875, "learning_rate": 4.32560344184597e-05, "loss": 1.0273, "step": 4602 }, { "epoch": 0.701462968607132, "grad_norm": 0.9375, "learning_rate": 4.321539978492971e-05, "loss": 0.8348, "step": 4603 }, { "epoch": 0.7016153611703749, "grad_norm": 0.89453125, "learning_rate": 4.3174778984944905e-05, "loss": 1.0112, "step": 4604 }, { "epoch": 0.7017677537336178, "grad_norm": 1.1171875, "learning_rate": 4.313417202840106e-05, "loss": 1.4185, "step": 4605 }, { "epoch": 0.7019201462968607, "grad_norm": 0.671875, "learning_rate": 4.30935789251907e-05, "loss": 0.7373, "step": 4606 }, { "epoch": 0.7020725388601037, "grad_norm": 1.0234375, "learning_rate": 4.3052999685202844e-05, "loss": 1.0402, "step": 4607 }, { "epoch": 0.7022249314233465, "grad_norm": 1.3046875, "learning_rate": 4.301243431832317e-05, "loss": 0.9276, "step": 4608 }, { "epoch": 0.7023773239865895, "grad_norm": 1.390625, "learning_rate": 4.2971882834433995e-05, "loss": 0.9707, "step": 4609 }, { "epoch": 0.7025297165498323, "grad_norm": 0.91796875, "learning_rate": 4.2931345243414247e-05, "loss": 0.9601, "step": 4610 }, { "epoch": 0.7026821091130753, "grad_norm": 0.83203125, "learning_rate": 4.2890821555139414e-05, "loss": 0.908, "step": 4611 }, { "epoch": 0.7028345016763182, "grad_norm": 0.86328125, "learning_rate": 4.285031177948173e-05, "loss": 0.9368, "step": 4612 }, { "epoch": 0.7029868942395611, "grad_norm": 1.4453125, "learning_rate": 4.280981592630994e-05, "loss": 1.0937, "step": 4613 }, { "epoch": 0.703139286802804, "grad_norm": 0.859375, "learning_rate": 4.276933400548938e-05, "loss": 0.8175, "step": 4614 }, { "epoch": 0.703291679366047, "grad_norm": 0.88671875, "learning_rate": 4.2728866026882065e-05, "loss": 0.8516, "step": 4615 }, { "epoch": 0.7034440719292898, "grad_norm": 0.9453125, "learning_rate": 4.268841200034655e-05, "loss": 0.9568, "step": 4616 }, { "epoch": 0.7035964644925328, "grad_norm": 1.0390625, "learning_rate": 4.264797193573799e-05, "loss": 1.0601, "step": 4617 }, { "epoch": 0.7037488570557757, "grad_norm": 0.98828125, "learning_rate": 4.260754584290825e-05, "loss": 0.84, "step": 4618 }, { "epoch": 0.7039012496190186, "grad_norm": 1.1484375, "learning_rate": 4.256713373170564e-05, "loss": 0.9593, "step": 4619 }, { "epoch": 0.7040536421822615, "grad_norm": 0.94140625, "learning_rate": 4.2526735611975175e-05, "loss": 1.0383, "step": 4620 }, { "epoch": 0.7042060347455045, "grad_norm": 1.0078125, "learning_rate": 4.2486351493558387e-05, "loss": 1.1593, "step": 4621 }, { "epoch": 0.7043584273087473, "grad_norm": 0.9453125, "learning_rate": 4.24459813862934e-05, "loss": 0.8431, "step": 4622 }, { "epoch": 0.7045108198719903, "grad_norm": 1.0546875, "learning_rate": 4.2405625300015026e-05, "loss": 1.0056, "step": 4623 }, { "epoch": 0.7046632124352331, "grad_norm": 1.4453125, "learning_rate": 4.2365283244554545e-05, "loss": 1.2467, "step": 4624 }, { "epoch": 0.7048156049984761, "grad_norm": 1.3125, "learning_rate": 4.232495522973987e-05, "loss": 0.9997, "step": 4625 }, { "epoch": 0.704967997561719, "grad_norm": 0.8046875, "learning_rate": 4.22846412653955e-05, "loss": 0.7923, "step": 4626 }, { "epoch": 0.7051203901249619, "grad_norm": 0.84375, "learning_rate": 4.2244341361342456e-05, "loss": 0.8849, "step": 4627 }, { "epoch": 0.7052727826882048, "grad_norm": 0.79296875, "learning_rate": 4.220405552739838e-05, "loss": 0.9171, "step": 4628 }, { "epoch": 0.7054251752514478, "grad_norm": 0.97265625, "learning_rate": 4.216378377337752e-05, "loss": 0.8158, "step": 4629 }, { "epoch": 0.7055775678146906, "grad_norm": 1.2109375, "learning_rate": 4.2123526109090636e-05, "loss": 0.9015, "step": 4630 }, { "epoch": 0.7057299603779336, "grad_norm": 0.85546875, "learning_rate": 4.208328254434507e-05, "loss": 0.9856, "step": 4631 }, { "epoch": 0.7058823529411765, "grad_norm": 0.78515625, "learning_rate": 4.2043053088944736e-05, "loss": 0.9703, "step": 4632 }, { "epoch": 0.7060347455044194, "grad_norm": 0.86328125, "learning_rate": 4.200283775269007e-05, "loss": 0.9228, "step": 4633 }, { "epoch": 0.7061871380676623, "grad_norm": 1.140625, "learning_rate": 4.1962636545378184e-05, "loss": 0.958, "step": 4634 }, { "epoch": 0.7063395306309053, "grad_norm": 0.87109375, "learning_rate": 4.192244947680263e-05, "loss": 0.9197, "step": 4635 }, { "epoch": 0.7064919231941481, "grad_norm": 1.078125, "learning_rate": 4.188227655675354e-05, "loss": 0.8473, "step": 4636 }, { "epoch": 0.7066443157573911, "grad_norm": 0.81640625, "learning_rate": 4.1842117795017633e-05, "loss": 0.9391, "step": 4637 }, { "epoch": 0.7067967083206339, "grad_norm": 0.57421875, "learning_rate": 4.180197320137815e-05, "loss": 0.8469, "step": 4638 }, { "epoch": 0.7069491008838769, "grad_norm": 0.87890625, "learning_rate": 4.176184278561488e-05, "loss": 0.8168, "step": 4639 }, { "epoch": 0.7071014934471198, "grad_norm": 1.1171875, "learning_rate": 4.172172655750421e-05, "loss": 1.0284, "step": 4640 }, { "epoch": 0.7072538860103627, "grad_norm": 1.2109375, "learning_rate": 4.168162452681901e-05, "loss": 0.9967, "step": 4641 }, { "epoch": 0.7074062785736056, "grad_norm": 0.8046875, "learning_rate": 4.164153670332871e-05, "loss": 0.969, "step": 4642 }, { "epoch": 0.7075586711368486, "grad_norm": 0.95703125, "learning_rate": 4.1601463096799274e-05, "loss": 0.944, "step": 4643 }, { "epoch": 0.7077110637000914, "grad_norm": 0.84375, "learning_rate": 4.156140371699322e-05, "loss": 0.9076, "step": 4644 }, { "epoch": 0.7078634562633344, "grad_norm": 1.0546875, "learning_rate": 4.152135857366955e-05, "loss": 1.057, "step": 4645 }, { "epoch": 0.7080158488265773, "grad_norm": 0.70703125, "learning_rate": 4.1481327676583903e-05, "loss": 0.7476, "step": 4646 }, { "epoch": 0.7081682413898202, "grad_norm": 0.75, "learning_rate": 4.144131103548834e-05, "loss": 0.9754, "step": 4647 }, { "epoch": 0.7083206339530631, "grad_norm": 0.875, "learning_rate": 4.140130866013151e-05, "loss": 1.0544, "step": 4648 }, { "epoch": 0.7084730265163061, "grad_norm": 0.984375, "learning_rate": 4.136132056025854e-05, "loss": 1.0517, "step": 4649 }, { "epoch": 0.7086254190795489, "grad_norm": 0.9140625, "learning_rate": 4.13213467456111e-05, "loss": 0.9636, "step": 4650 }, { "epoch": 0.7087778116427919, "grad_norm": 0.90234375, "learning_rate": 4.1281387225927436e-05, "loss": 0.9411, "step": 4651 }, { "epoch": 0.7089302042060347, "grad_norm": 0.890625, "learning_rate": 4.124144201094223e-05, "loss": 1.0221, "step": 4652 }, { "epoch": 0.7090825967692777, "grad_norm": 1.0234375, "learning_rate": 4.1201511110386715e-05, "loss": 0.9037, "step": 4653 }, { "epoch": 0.7092349893325206, "grad_norm": 1.2421875, "learning_rate": 4.116159453398864e-05, "loss": 1.0125, "step": 4654 }, { "epoch": 0.7093873818957634, "grad_norm": 0.78125, "learning_rate": 4.112169229147224e-05, "loss": 1.0035, "step": 4655 }, { "epoch": 0.7095397744590064, "grad_norm": 1.1328125, "learning_rate": 4.108180439255824e-05, "loss": 1.0516, "step": 4656 }, { "epoch": 0.7096921670222494, "grad_norm": 1.0234375, "learning_rate": 4.104193084696399e-05, "loss": 0.8735, "step": 4657 }, { "epoch": 0.7098445595854922, "grad_norm": 1.1484375, "learning_rate": 4.100207166440323e-05, "loss": 1.0001, "step": 4658 }, { "epoch": 0.7099969521487351, "grad_norm": 1.0390625, "learning_rate": 4.096222685458621e-05, "loss": 0.8241, "step": 4659 }, { "epoch": 0.7101493447119781, "grad_norm": 1.34375, "learning_rate": 4.09223964272197e-05, "loss": 0.9881, "step": 4660 }, { "epoch": 0.710301737275221, "grad_norm": 1.09375, "learning_rate": 4.0882580392006976e-05, "loss": 0.988, "step": 4661 }, { "epoch": 0.7104541298384639, "grad_norm": 1.0625, "learning_rate": 4.084277875864776e-05, "loss": 1.1442, "step": 4662 }, { "epoch": 0.7106065224017067, "grad_norm": 0.796875, "learning_rate": 4.0802991536838366e-05, "loss": 0.9837, "step": 4663 }, { "epoch": 0.7107589149649497, "grad_norm": 1.0390625, "learning_rate": 4.07632187362715e-05, "loss": 0.945, "step": 4664 }, { "epoch": 0.7109113075281926, "grad_norm": 1.078125, "learning_rate": 4.07234603666364e-05, "loss": 1.0104, "step": 4665 }, { "epoch": 0.7110637000914355, "grad_norm": 0.88671875, "learning_rate": 4.0683716437618754e-05, "loss": 0.8363, "step": 4666 }, { "epoch": 0.7112160926546784, "grad_norm": 0.7890625, "learning_rate": 4.0643986958900745e-05, "loss": 0.8695, "step": 4667 }, { "epoch": 0.7113684852179214, "grad_norm": 0.9375, "learning_rate": 4.06042719401611e-05, "loss": 0.8545, "step": 4668 }, { "epoch": 0.7115208777811642, "grad_norm": 0.76171875, "learning_rate": 4.056457139107494e-05, "loss": 0.9217, "step": 4669 }, { "epoch": 0.7116732703444072, "grad_norm": 0.9609375, "learning_rate": 4.052488532131388e-05, "loss": 0.8404, "step": 4670 }, { "epoch": 0.7118256629076501, "grad_norm": 0.98046875, "learning_rate": 4.048521374054601e-05, "loss": 0.9938, "step": 4671 }, { "epoch": 0.711978055470893, "grad_norm": 0.98046875, "learning_rate": 4.0445556658435936e-05, "loss": 1.1865, "step": 4672 }, { "epoch": 0.7121304480341359, "grad_norm": 1.0078125, "learning_rate": 4.0405914084644616e-05, "loss": 1.028, "step": 4673 }, { "epoch": 0.7122828405973789, "grad_norm": 1.28125, "learning_rate": 4.036628602882965e-05, "loss": 1.124, "step": 4674 }, { "epoch": 0.7124352331606217, "grad_norm": 1.046875, "learning_rate": 4.032667250064495e-05, "loss": 0.9433, "step": 4675 }, { "epoch": 0.7125876257238647, "grad_norm": 1.1484375, "learning_rate": 4.028707350974093e-05, "loss": 1.0245, "step": 4676 }, { "epoch": 0.7127400182871075, "grad_norm": 0.9375, "learning_rate": 4.024748906576452e-05, "loss": 1.0456, "step": 4677 }, { "epoch": 0.7128924108503505, "grad_norm": 0.71875, "learning_rate": 4.020791917835901e-05, "loss": 0.9119, "step": 4678 }, { "epoch": 0.7130448034135934, "grad_norm": 0.71875, "learning_rate": 4.016836385716419e-05, "loss": 1.1312, "step": 4679 }, { "epoch": 0.7131971959768363, "grad_norm": 0.921875, "learning_rate": 4.012882311181636e-05, "loss": 1.0123, "step": 4680 }, { "epoch": 0.7133495885400792, "grad_norm": 1.015625, "learning_rate": 4.008929695194819e-05, "loss": 1.0053, "step": 4681 }, { "epoch": 0.7135019811033222, "grad_norm": 0.94140625, "learning_rate": 4.00497853871888e-05, "loss": 1.1766, "step": 4682 }, { "epoch": 0.713654373666565, "grad_norm": 0.87109375, "learning_rate": 4.00102884271638e-05, "loss": 0.852, "step": 4683 }, { "epoch": 0.713806766229808, "grad_norm": 1.1640625, "learning_rate": 3.997080608149516e-05, "loss": 1.1117, "step": 4684 }, { "epoch": 0.7139591587930509, "grad_norm": 1.015625, "learning_rate": 3.993133835980144e-05, "loss": 0.891, "step": 4685 }, { "epoch": 0.7141115513562938, "grad_norm": 0.74609375, "learning_rate": 3.9891885271697496e-05, "loss": 1.0039, "step": 4686 }, { "epoch": 0.7142639439195367, "grad_norm": 0.92578125, "learning_rate": 3.9852446826794664e-05, "loss": 0.9759, "step": 4687 }, { "epoch": 0.7144163364827797, "grad_norm": 1.1875, "learning_rate": 3.9813023034700734e-05, "loss": 0.9691, "step": 4688 }, { "epoch": 0.7145687290460225, "grad_norm": 1.1171875, "learning_rate": 3.977361390501989e-05, "loss": 1.0769, "step": 4689 }, { "epoch": 0.7147211216092655, "grad_norm": 1.1484375, "learning_rate": 3.973421944735274e-05, "loss": 0.9644, "step": 4690 }, { "epoch": 0.7148735141725083, "grad_norm": 1.0625, "learning_rate": 3.9694839671296404e-05, "loss": 0.9813, "step": 4691 }, { "epoch": 0.7150259067357513, "grad_norm": 0.9765625, "learning_rate": 3.965547458644432e-05, "loss": 0.906, "step": 4692 }, { "epoch": 0.7151782992989942, "grad_norm": 1.265625, "learning_rate": 3.961612420238641e-05, "loss": 1.106, "step": 4693 }, { "epoch": 0.7153306918622371, "grad_norm": 1.1640625, "learning_rate": 3.9576788528708964e-05, "loss": 0.895, "step": 4694 }, { "epoch": 0.71548308442548, "grad_norm": 0.77734375, "learning_rate": 3.9537467574994745e-05, "loss": 0.8901, "step": 4695 }, { "epoch": 0.715635476988723, "grad_norm": 0.9453125, "learning_rate": 3.9498161350822846e-05, "loss": 0.885, "step": 4696 }, { "epoch": 0.7157878695519658, "grad_norm": 0.96875, "learning_rate": 3.945886986576892e-05, "loss": 0.915, "step": 4697 }, { "epoch": 0.7159402621152088, "grad_norm": 1.234375, "learning_rate": 3.9419593129404895e-05, "loss": 1.0829, "step": 4698 }, { "epoch": 0.7160926546784517, "grad_norm": 0.98046875, "learning_rate": 3.9380331151299144e-05, "loss": 0.7676, "step": 4699 }, { "epoch": 0.7162450472416946, "grad_norm": 0.890625, "learning_rate": 3.9341083941016445e-05, "loss": 0.9792, "step": 4700 }, { "epoch": 0.7163974398049375, "grad_norm": 0.99609375, "learning_rate": 3.930185150811795e-05, "loss": 1.1152, "step": 4701 }, { "epoch": 0.7165498323681805, "grad_norm": 1.0078125, "learning_rate": 3.9262633862161324e-05, "loss": 0.9352, "step": 4702 }, { "epoch": 0.7167022249314233, "grad_norm": 0.9765625, "learning_rate": 3.9223431012700506e-05, "loss": 0.9351, "step": 4703 }, { "epoch": 0.7168546174946663, "grad_norm": 1.03125, "learning_rate": 3.9184242969285875e-05, "loss": 1.1068, "step": 4704 }, { "epoch": 0.7170070100579091, "grad_norm": 1.1328125, "learning_rate": 3.9145069741464216e-05, "loss": 0.9732, "step": 4705 }, { "epoch": 0.7171594026211521, "grad_norm": 1.015625, "learning_rate": 3.9105911338778665e-05, "loss": 1.0837, "step": 4706 }, { "epoch": 0.717311795184395, "grad_norm": 1.28125, "learning_rate": 3.906676777076875e-05, "loss": 1.0293, "step": 4707 }, { "epoch": 0.7174641877476379, "grad_norm": 0.79296875, "learning_rate": 3.902763904697049e-05, "loss": 1.0122, "step": 4708 }, { "epoch": 0.7176165803108808, "grad_norm": 1.1953125, "learning_rate": 3.8988525176916136e-05, "loss": 1.108, "step": 4709 }, { "epoch": 0.7177689728741238, "grad_norm": 1.125, "learning_rate": 3.894942617013443e-05, "loss": 1.2537, "step": 4710 }, { "epoch": 0.7179213654373666, "grad_norm": 0.90234375, "learning_rate": 3.891034203615042e-05, "loss": 1.0036, "step": 4711 }, { "epoch": 0.7180737580006096, "grad_norm": 1.0859375, "learning_rate": 3.8871272784485566e-05, "loss": 1.0717, "step": 4712 }, { "epoch": 0.7182261505638525, "grad_norm": 1.0703125, "learning_rate": 3.88322184246577e-05, "loss": 1.0431, "step": 4713 }, { "epoch": 0.7183785431270954, "grad_norm": 1.1328125, "learning_rate": 3.879317896618104e-05, "loss": 1.0427, "step": 4714 }, { "epoch": 0.7185309356903383, "grad_norm": 1.015625, "learning_rate": 3.875415441856618e-05, "loss": 0.9726, "step": 4715 }, { "epoch": 0.7186833282535813, "grad_norm": 0.828125, "learning_rate": 3.871514479132001e-05, "loss": 0.978, "step": 4716 }, { "epoch": 0.7188357208168241, "grad_norm": 0.9453125, "learning_rate": 3.8676150093945885e-05, "loss": 0.9257, "step": 4717 }, { "epoch": 0.7189881133800671, "grad_norm": 1.1171875, "learning_rate": 3.86371703359434e-05, "loss": 0.979, "step": 4718 }, { "epoch": 0.7191405059433099, "grad_norm": 0.88671875, "learning_rate": 3.859820552680867e-05, "loss": 0.976, "step": 4719 }, { "epoch": 0.7192928985065529, "grad_norm": 0.94921875, "learning_rate": 3.855925567603406e-05, "loss": 1.0626, "step": 4720 }, { "epoch": 0.7194452910697958, "grad_norm": 0.79296875, "learning_rate": 3.852032079310829e-05, "loss": 0.8739, "step": 4721 }, { "epoch": 0.7195976836330387, "grad_norm": 0.83984375, "learning_rate": 3.8481400887516474e-05, "loss": 1.0423, "step": 4722 }, { "epoch": 0.7197500761962816, "grad_norm": 1.046875, "learning_rate": 3.8442495968740045e-05, "loss": 1.1691, "step": 4723 }, { "epoch": 0.7199024687595246, "grad_norm": 0.91796875, "learning_rate": 3.840360604625679e-05, "loss": 0.9413, "step": 4724 }, { "epoch": 0.7200548613227674, "grad_norm": 0.91796875, "learning_rate": 3.8364731129540896e-05, "loss": 0.9697, "step": 4725 }, { "epoch": 0.7202072538860104, "grad_norm": 1.2109375, "learning_rate": 3.8325871228062834e-05, "loss": 1.1353, "step": 4726 }, { "epoch": 0.7203596464492533, "grad_norm": 0.9609375, "learning_rate": 3.828702635128944e-05, "loss": 0.9918, "step": 4727 }, { "epoch": 0.7205120390124962, "grad_norm": 0.9453125, "learning_rate": 3.824819650868387e-05, "loss": 0.9182, "step": 4728 }, { "epoch": 0.7206644315757391, "grad_norm": 1.0234375, "learning_rate": 3.820938170970564e-05, "loss": 0.9722, "step": 4729 }, { "epoch": 0.7208168241389821, "grad_norm": 1.0, "learning_rate": 3.8170581963810545e-05, "loss": 1.1321, "step": 4730 }, { "epoch": 0.7209692167022249, "grad_norm": 1.0390625, "learning_rate": 3.813179728045085e-05, "loss": 1.0613, "step": 4731 }, { "epoch": 0.7211216092654679, "grad_norm": 0.9375, "learning_rate": 3.809302766907502e-05, "loss": 1.0802, "step": 4732 }, { "epoch": 0.7212740018287107, "grad_norm": 1.5234375, "learning_rate": 3.8054273139127884e-05, "loss": 1.0763, "step": 4733 }, { "epoch": 0.7214263943919537, "grad_norm": 1.0390625, "learning_rate": 3.801553370005061e-05, "loss": 0.8014, "step": 4734 }, { "epoch": 0.7215787869551966, "grad_norm": 1.3984375, "learning_rate": 3.797680936128064e-05, "loss": 1.2878, "step": 4735 }, { "epoch": 0.7217311795184395, "grad_norm": 0.9765625, "learning_rate": 3.793810013225185e-05, "loss": 0.8804, "step": 4736 }, { "epoch": 0.7218835720816824, "grad_norm": 1.21875, "learning_rate": 3.7899406022394346e-05, "loss": 0.9705, "step": 4737 }, { "epoch": 0.7220359646449254, "grad_norm": 0.9609375, "learning_rate": 3.786072704113456e-05, "loss": 0.9685, "step": 4738 }, { "epoch": 0.7221883572081682, "grad_norm": 0.796875, "learning_rate": 3.782206319789523e-05, "loss": 0.8806, "step": 4739 }, { "epoch": 0.7223407497714112, "grad_norm": 1.0625, "learning_rate": 3.778341450209545e-05, "loss": 1.0538, "step": 4740 }, { "epoch": 0.7224931423346541, "grad_norm": 0.96875, "learning_rate": 3.774478096315055e-05, "loss": 0.9465, "step": 4741 }, { "epoch": 0.722645534897897, "grad_norm": 1.2578125, "learning_rate": 3.7706162590472296e-05, "loss": 1.131, "step": 4742 }, { "epoch": 0.7227979274611399, "grad_norm": 0.9609375, "learning_rate": 3.766755939346863e-05, "loss": 1.0929, "step": 4743 }, { "epoch": 0.7229503200243829, "grad_norm": 0.81640625, "learning_rate": 3.762897138154385e-05, "loss": 0.9247, "step": 4744 }, { "epoch": 0.7231027125876257, "grad_norm": 0.84765625, "learning_rate": 3.759039856409856e-05, "loss": 1.1784, "step": 4745 }, { "epoch": 0.7232551051508687, "grad_norm": 1.09375, "learning_rate": 3.755184095052964e-05, "loss": 1.1583, "step": 4746 }, { "epoch": 0.7234074977141115, "grad_norm": 1.125, "learning_rate": 3.7513298550230244e-05, "loss": 1.0896, "step": 4747 }, { "epoch": 0.7235598902773545, "grad_norm": 0.81640625, "learning_rate": 3.747477137258994e-05, "loss": 0.9748, "step": 4748 }, { "epoch": 0.7237122828405974, "grad_norm": 1.453125, "learning_rate": 3.7436259426994466e-05, "loss": 1.091, "step": 4749 }, { "epoch": 0.7238646754038403, "grad_norm": 0.9609375, "learning_rate": 3.739776272282587e-05, "loss": 0.9622, "step": 4750 }, { "epoch": 0.7240170679670832, "grad_norm": 1.296875, "learning_rate": 3.735928126946251e-05, "loss": 1.0516, "step": 4751 }, { "epoch": 0.7241694605303262, "grad_norm": 1.1328125, "learning_rate": 3.7320815076278994e-05, "loss": 0.9721, "step": 4752 }, { "epoch": 0.724321853093569, "grad_norm": 1.0, "learning_rate": 3.7282364152646297e-05, "loss": 0.9014, "step": 4753 }, { "epoch": 0.724474245656812, "grad_norm": 1.171875, "learning_rate": 3.724392850793159e-05, "loss": 0.8319, "step": 4754 }, { "epoch": 0.7246266382200549, "grad_norm": 1.015625, "learning_rate": 3.7205508151498344e-05, "loss": 1.1354, "step": 4755 }, { "epoch": 0.7247790307832978, "grad_norm": 1.0546875, "learning_rate": 3.7167103092706326e-05, "loss": 0.9386, "step": 4756 }, { "epoch": 0.7249314233465407, "grad_norm": 1.125, "learning_rate": 3.7128713340911535e-05, "loss": 0.9813, "step": 4757 }, { "epoch": 0.7250838159097837, "grad_norm": 1.25, "learning_rate": 3.709033890546625e-05, "loss": 1.0105, "step": 4758 }, { "epoch": 0.7252362084730265, "grad_norm": 0.765625, "learning_rate": 3.7051979795719095e-05, "loss": 0.9585, "step": 4759 }, { "epoch": 0.7253886010362695, "grad_norm": 0.96875, "learning_rate": 3.701363602101487e-05, "loss": 1.2457, "step": 4760 }, { "epoch": 0.7255409935995123, "grad_norm": 0.84765625, "learning_rate": 3.6975307590694686e-05, "loss": 0.9073, "step": 4761 }, { "epoch": 0.7256933861627552, "grad_norm": 0.73046875, "learning_rate": 3.6936994514095865e-05, "loss": 0.8987, "step": 4762 }, { "epoch": 0.7258457787259982, "grad_norm": 0.90625, "learning_rate": 3.689869680055206e-05, "loss": 0.9805, "step": 4763 }, { "epoch": 0.725998171289241, "grad_norm": 0.8671875, "learning_rate": 3.686041445939308e-05, "loss": 1.0063, "step": 4764 }, { "epoch": 0.726150563852484, "grad_norm": 0.8125, "learning_rate": 3.6822147499945134e-05, "loss": 0.748, "step": 4765 }, { "epoch": 0.726302956415727, "grad_norm": 1.078125, "learning_rate": 3.6783895931530574e-05, "loss": 1.0172, "step": 4766 }, { "epoch": 0.7264553489789698, "grad_norm": 0.90234375, "learning_rate": 3.674565976346804e-05, "loss": 0.9661, "step": 4767 }, { "epoch": 0.7266077415422127, "grad_norm": 1.546875, "learning_rate": 3.67074390050724e-05, "loss": 1.1555, "step": 4768 }, { "epoch": 0.7267601341054557, "grad_norm": 0.984375, "learning_rate": 3.6669233665654734e-05, "loss": 0.9725, "step": 4769 }, { "epoch": 0.7269125266686985, "grad_norm": 1.0078125, "learning_rate": 3.6631043754522495e-05, "loss": 1.0969, "step": 4770 }, { "epoch": 0.7270649192319415, "grad_norm": 1.09375, "learning_rate": 3.659286928097927e-05, "loss": 0.8264, "step": 4771 }, { "epoch": 0.7272173117951843, "grad_norm": 1.359375, "learning_rate": 3.65547102543249e-05, "loss": 0.9228, "step": 4772 }, { "epoch": 0.7273697043584273, "grad_norm": 1.0078125, "learning_rate": 3.651656668385547e-05, "loss": 1.0177, "step": 4773 }, { "epoch": 0.7275220969216702, "grad_norm": 1.1875, "learning_rate": 3.6478438578863296e-05, "loss": 1.0337, "step": 4774 }, { "epoch": 0.7276744894849131, "grad_norm": 1.4140625, "learning_rate": 3.644032594863691e-05, "loss": 1.0465, "step": 4775 }, { "epoch": 0.727826882048156, "grad_norm": 0.90625, "learning_rate": 3.640222880246117e-05, "loss": 0.8786, "step": 4776 }, { "epoch": 0.727979274611399, "grad_norm": 0.87890625, "learning_rate": 3.6364147149617035e-05, "loss": 1.0677, "step": 4777 }, { "epoch": 0.7281316671746418, "grad_norm": 1.21875, "learning_rate": 3.632608099938176e-05, "loss": 1.2219, "step": 4778 }, { "epoch": 0.7282840597378848, "grad_norm": 0.9375, "learning_rate": 3.6288030361028804e-05, "loss": 1.0739, "step": 4779 }, { "epoch": 0.7284364523011277, "grad_norm": 0.984375, "learning_rate": 3.6249995243827805e-05, "loss": 1.0649, "step": 4780 }, { "epoch": 0.7285888448643706, "grad_norm": 0.89453125, "learning_rate": 3.621197565704474e-05, "loss": 0.8527, "step": 4781 }, { "epoch": 0.7287412374276135, "grad_norm": 1.1015625, "learning_rate": 3.617397160994171e-05, "loss": 0.9278, "step": 4782 }, { "epoch": 0.7288936299908565, "grad_norm": 0.76953125, "learning_rate": 3.6135983111777e-05, "loss": 0.8397, "step": 4783 }, { "epoch": 0.7290460225540993, "grad_norm": 1.046875, "learning_rate": 3.609801017180521e-05, "loss": 1.1353, "step": 4784 }, { "epoch": 0.7291984151173423, "grad_norm": 0.98046875, "learning_rate": 3.6060052799277066e-05, "loss": 0.9865, "step": 4785 }, { "epoch": 0.7293508076805851, "grad_norm": 1.0, "learning_rate": 3.6022111003439495e-05, "loss": 0.8664, "step": 4786 }, { "epoch": 0.7295032002438281, "grad_norm": 1.1875, "learning_rate": 3.598418479353574e-05, "loss": 0.8862, "step": 4787 }, { "epoch": 0.729655592807071, "grad_norm": 0.8984375, "learning_rate": 3.5946274178805125e-05, "loss": 0.7954, "step": 4788 }, { "epoch": 0.7298079853703139, "grad_norm": 0.828125, "learning_rate": 3.590837916848323e-05, "loss": 0.8928, "step": 4789 }, { "epoch": 0.7299603779335568, "grad_norm": 1.2109375, "learning_rate": 3.587049977180184e-05, "loss": 0.9758, "step": 4790 }, { "epoch": 0.7301127704967998, "grad_norm": 0.73828125, "learning_rate": 3.5832635997988885e-05, "loss": 0.8111, "step": 4791 }, { "epoch": 0.7302651630600426, "grad_norm": 0.7734375, "learning_rate": 3.5794787856268516e-05, "loss": 0.8243, "step": 4792 }, { "epoch": 0.7304175556232856, "grad_norm": 0.7734375, "learning_rate": 3.5756955355861154e-05, "loss": 1.1616, "step": 4793 }, { "epoch": 0.7305699481865285, "grad_norm": 1.0625, "learning_rate": 3.5719138505983295e-05, "loss": 1.0172, "step": 4794 }, { "epoch": 0.7307223407497714, "grad_norm": 0.97265625, "learning_rate": 3.568133731584767e-05, "loss": 0.8217, "step": 4795 }, { "epoch": 0.7308747333130143, "grad_norm": 0.80078125, "learning_rate": 3.56435517946632e-05, "loss": 0.8129, "step": 4796 }, { "epoch": 0.7310271258762573, "grad_norm": 1.0546875, "learning_rate": 3.560578195163493e-05, "loss": 1.0605, "step": 4797 }, { "epoch": 0.7311795184395001, "grad_norm": 0.9609375, "learning_rate": 3.556802779596422e-05, "loss": 0.9345, "step": 4798 }, { "epoch": 0.7313319110027431, "grad_norm": 1.1640625, "learning_rate": 3.55302893368485e-05, "loss": 1.075, "step": 4799 }, { "epoch": 0.7314843035659859, "grad_norm": 0.984375, "learning_rate": 3.549256658348137e-05, "loss": 1.1312, "step": 4800 }, { "epoch": 0.7316366961292289, "grad_norm": 1.1328125, "learning_rate": 3.5454859545052664e-05, "loss": 1.0229, "step": 4801 }, { "epoch": 0.7317890886924718, "grad_norm": 0.7265625, "learning_rate": 3.5417168230748346e-05, "loss": 0.9718, "step": 4802 }, { "epoch": 0.7319414812557147, "grad_norm": 0.81640625, "learning_rate": 3.537949264975053e-05, "loss": 0.9325, "step": 4803 }, { "epoch": 0.7320938738189576, "grad_norm": 1.03125, "learning_rate": 3.534183281123761e-05, "loss": 1.145, "step": 4804 }, { "epoch": 0.7322462663822006, "grad_norm": 1.2421875, "learning_rate": 3.5304188724384e-05, "loss": 1.1011, "step": 4805 }, { "epoch": 0.7323986589454434, "grad_norm": 0.671875, "learning_rate": 3.526656039836037e-05, "loss": 0.8605, "step": 4806 }, { "epoch": 0.7325510515086864, "grad_norm": 0.67578125, "learning_rate": 3.522894784233351e-05, "loss": 0.8342, "step": 4807 }, { "epoch": 0.7327034440719293, "grad_norm": 1.3984375, "learning_rate": 3.519135106546638e-05, "loss": 1.0243, "step": 4808 }, { "epoch": 0.7328558366351722, "grad_norm": 1.0546875, "learning_rate": 3.515377007691806e-05, "loss": 1.0766, "step": 4809 }, { "epoch": 0.7330082291984151, "grad_norm": 0.6640625, "learning_rate": 3.5116204885843894e-05, "loss": 0.9277, "step": 4810 }, { "epoch": 0.7331606217616581, "grad_norm": 0.796875, "learning_rate": 3.5078655501395255e-05, "loss": 0.8638, "step": 4811 }, { "epoch": 0.7333130143249009, "grad_norm": 1.0703125, "learning_rate": 3.5041121932719725e-05, "loss": 0.8552, "step": 4812 }, { "epoch": 0.7334654068881439, "grad_norm": 0.9765625, "learning_rate": 3.500360418896101e-05, "loss": 1.0335, "step": 4813 }, { "epoch": 0.7336177994513867, "grad_norm": 0.76171875, "learning_rate": 3.496610227925896e-05, "loss": 0.8927, "step": 4814 }, { "epoch": 0.7337701920146297, "grad_norm": 0.75390625, "learning_rate": 3.492861621274963e-05, "loss": 0.8107, "step": 4815 }, { "epoch": 0.7339225845778726, "grad_norm": 1.046875, "learning_rate": 3.4891145998565134e-05, "loss": 1.2467, "step": 4816 }, { "epoch": 0.7340749771411155, "grad_norm": 0.8359375, "learning_rate": 3.485369164583375e-05, "loss": 1.1109, "step": 4817 }, { "epoch": 0.7342273697043584, "grad_norm": 0.9921875, "learning_rate": 3.4816253163679914e-05, "loss": 0.9641, "step": 4818 }, { "epoch": 0.7343797622676014, "grad_norm": 0.875, "learning_rate": 3.477883056122415e-05, "loss": 0.9705, "step": 4819 }, { "epoch": 0.7345321548308442, "grad_norm": 0.96484375, "learning_rate": 3.4741423847583134e-05, "loss": 1.0359, "step": 4820 }, { "epoch": 0.7346845473940872, "grad_norm": 1.1796875, "learning_rate": 3.470403303186973e-05, "loss": 1.1335, "step": 4821 }, { "epoch": 0.7348369399573301, "grad_norm": 1.046875, "learning_rate": 3.466665812319286e-05, "loss": 1.232, "step": 4822 }, { "epoch": 0.734989332520573, "grad_norm": 0.9375, "learning_rate": 3.462929913065758e-05, "loss": 1.0213, "step": 4823 }, { "epoch": 0.7351417250838159, "grad_norm": 0.96875, "learning_rate": 3.459195606336508e-05, "loss": 0.805, "step": 4824 }, { "epoch": 0.7352941176470589, "grad_norm": 0.9453125, "learning_rate": 3.455462893041267e-05, "loss": 0.8493, "step": 4825 }, { "epoch": 0.7354465102103017, "grad_norm": 1.140625, "learning_rate": 3.451731774089374e-05, "loss": 1.0302, "step": 4826 }, { "epoch": 0.7355989027735447, "grad_norm": 1.0078125, "learning_rate": 3.448002250389789e-05, "loss": 0.9602, "step": 4827 }, { "epoch": 0.7357512953367875, "grad_norm": 0.984375, "learning_rate": 3.44427432285108e-05, "loss": 1.0692, "step": 4828 }, { "epoch": 0.7359036879000305, "grad_norm": 1.1640625, "learning_rate": 3.440547992381411e-05, "loss": 0.9694, "step": 4829 }, { "epoch": 0.7360560804632734, "grad_norm": 0.83984375, "learning_rate": 3.436823259888582e-05, "loss": 0.9052, "step": 4830 }, { "epoch": 0.7362084730265163, "grad_norm": 0.953125, "learning_rate": 3.4331001262799836e-05, "loss": 1.1894, "step": 4831 }, { "epoch": 0.7363608655897592, "grad_norm": 1.1796875, "learning_rate": 3.429378592462631e-05, "loss": 1.0821, "step": 4832 }, { "epoch": 0.7365132581530022, "grad_norm": 0.87109375, "learning_rate": 3.4256586593431407e-05, "loss": 1.1483, "step": 4833 }, { "epoch": 0.736665650716245, "grad_norm": 1.046875, "learning_rate": 3.4219403278277416e-05, "loss": 0.9592, "step": 4834 }, { "epoch": 0.736818043279488, "grad_norm": 0.77734375, "learning_rate": 3.418223598822273e-05, "loss": 0.8115, "step": 4835 }, { "epoch": 0.7369704358427309, "grad_norm": 1.1484375, "learning_rate": 3.414508473232184e-05, "loss": 1.0314, "step": 4836 }, { "epoch": 0.7371228284059738, "grad_norm": 0.81640625, "learning_rate": 3.4107949519625284e-05, "loss": 0.7874, "step": 4837 }, { "epoch": 0.7372752209692167, "grad_norm": 0.84375, "learning_rate": 3.407083035917981e-05, "loss": 0.9863, "step": 4838 }, { "epoch": 0.7374276135324597, "grad_norm": 1.0234375, "learning_rate": 3.403372726002812e-05, "loss": 1.0419, "step": 4839 }, { "epoch": 0.7375800060957025, "grad_norm": 0.8984375, "learning_rate": 3.39966402312091e-05, "loss": 0.9519, "step": 4840 }, { "epoch": 0.7377323986589455, "grad_norm": 0.78515625, "learning_rate": 3.395956928175765e-05, "loss": 1.0564, "step": 4841 }, { "epoch": 0.7378847912221883, "grad_norm": 0.859375, "learning_rate": 3.392251442070481e-05, "loss": 0.8486, "step": 4842 }, { "epoch": 0.7380371837854313, "grad_norm": 1.09375, "learning_rate": 3.3885475657077634e-05, "loss": 1.0101, "step": 4843 }, { "epoch": 0.7381895763486742, "grad_norm": 1.171875, "learning_rate": 3.384845299989936e-05, "loss": 1.002, "step": 4844 }, { "epoch": 0.7383419689119171, "grad_norm": 0.94140625, "learning_rate": 3.3811446458189224e-05, "loss": 1.1223, "step": 4845 }, { "epoch": 0.73849436147516, "grad_norm": 1.5625, "learning_rate": 3.377445604096253e-05, "loss": 1.0715, "step": 4846 }, { "epoch": 0.738646754038403, "grad_norm": 0.94921875, "learning_rate": 3.3737481757230685e-05, "loss": 1.0709, "step": 4847 }, { "epoch": 0.7387991466016458, "grad_norm": 1.375, "learning_rate": 3.370052361600112e-05, "loss": 0.8891, "step": 4848 }, { "epoch": 0.7389515391648888, "grad_norm": 0.71875, "learning_rate": 3.3663581626277454e-05, "loss": 0.9372, "step": 4849 }, { "epoch": 0.7391039317281317, "grad_norm": 0.95703125, "learning_rate": 3.362665579705923e-05, "loss": 0.9467, "step": 4850 }, { "epoch": 0.7392563242913746, "grad_norm": 0.8828125, "learning_rate": 3.358974613734212e-05, "loss": 0.9794, "step": 4851 }, { "epoch": 0.7394087168546175, "grad_norm": 0.9609375, "learning_rate": 3.355285265611784e-05, "loss": 1.0233, "step": 4852 }, { "epoch": 0.7395611094178605, "grad_norm": 0.9453125, "learning_rate": 3.351597536237419e-05, "loss": 0.9262, "step": 4853 }, { "epoch": 0.7397135019811033, "grad_norm": 0.95703125, "learning_rate": 3.347911426509496e-05, "loss": 0.9321, "step": 4854 }, { "epoch": 0.7398658945443463, "grad_norm": 1.359375, "learning_rate": 3.344226937326012e-05, "loss": 1.2421, "step": 4855 }, { "epoch": 0.7400182871075891, "grad_norm": 1.1953125, "learning_rate": 3.34054406958456e-05, "loss": 1.0172, "step": 4856 }, { "epoch": 0.740170679670832, "grad_norm": 0.8203125, "learning_rate": 3.336862824182332e-05, "loss": 0.8269, "step": 4857 }, { "epoch": 0.740323072234075, "grad_norm": 0.890625, "learning_rate": 3.3331832020161416e-05, "loss": 0.9846, "step": 4858 }, { "epoch": 0.7404754647973179, "grad_norm": 0.85546875, "learning_rate": 3.3295052039823926e-05, "loss": 0.9293, "step": 4859 }, { "epoch": 0.7406278573605608, "grad_norm": 1.0078125, "learning_rate": 3.325828830977096e-05, "loss": 0.9434, "step": 4860 }, { "epoch": 0.7407802499238038, "grad_norm": 1.0859375, "learning_rate": 3.322154083895876e-05, "loss": 0.9917, "step": 4861 }, { "epoch": 0.7409326424870466, "grad_norm": 0.984375, "learning_rate": 3.318480963633951e-05, "loss": 0.9284, "step": 4862 }, { "epoch": 0.7410850350502896, "grad_norm": 0.69140625, "learning_rate": 3.314809471086144e-05, "loss": 0.8873, "step": 4863 }, { "epoch": 0.7412374276135325, "grad_norm": 0.890625, "learning_rate": 3.311139607146886e-05, "loss": 1.0545, "step": 4864 }, { "epoch": 0.7413898201767753, "grad_norm": 0.90234375, "learning_rate": 3.3074713727102026e-05, "loss": 1.0239, "step": 4865 }, { "epoch": 0.7415422127400183, "grad_norm": 1.171875, "learning_rate": 3.303804768669737e-05, "loss": 0.9874, "step": 4866 }, { "epoch": 0.7416946053032613, "grad_norm": 0.82421875, "learning_rate": 3.300139795918723e-05, "loss": 0.8666, "step": 4867 }, { "epoch": 0.7418469978665041, "grad_norm": 1.2734375, "learning_rate": 3.29647645535e-05, "loss": 1.2677, "step": 4868 }, { "epoch": 0.741999390429747, "grad_norm": 0.8359375, "learning_rate": 3.2928147478560126e-05, "loss": 1.035, "step": 4869 }, { "epoch": 0.7421517829929899, "grad_norm": 0.8515625, "learning_rate": 3.2891546743288014e-05, "loss": 0.9198, "step": 4870 }, { "epoch": 0.7423041755562328, "grad_norm": 0.82421875, "learning_rate": 3.2854962356600124e-05, "loss": 0.994, "step": 4871 }, { "epoch": 0.7424565681194758, "grad_norm": 1.03125, "learning_rate": 3.2818394327409006e-05, "loss": 0.9571, "step": 4872 }, { "epoch": 0.7426089606827186, "grad_norm": 0.9453125, "learning_rate": 3.2781842664623155e-05, "loss": 0.8702, "step": 4873 }, { "epoch": 0.7427613532459616, "grad_norm": 0.91015625, "learning_rate": 3.2745307377146975e-05, "loss": 0.9794, "step": 4874 }, { "epoch": 0.7429137458092046, "grad_norm": 0.76171875, "learning_rate": 3.270878847388109e-05, "loss": 0.9422, "step": 4875 }, { "epoch": 0.7430661383724474, "grad_norm": 1.1484375, "learning_rate": 3.2672285963721996e-05, "loss": 1.1916, "step": 4876 }, { "epoch": 0.7432185309356903, "grad_norm": 0.80859375, "learning_rate": 3.26357998555622e-05, "loss": 0.8723, "step": 4877 }, { "epoch": 0.7433709234989333, "grad_norm": 0.9765625, "learning_rate": 3.259933015829031e-05, "loss": 0.765, "step": 4878 }, { "epoch": 0.7435233160621761, "grad_norm": 0.8359375, "learning_rate": 3.256287688079083e-05, "loss": 0.9096, "step": 4879 }, { "epoch": 0.7436757086254191, "grad_norm": 1.3046875, "learning_rate": 3.2526440031944306e-05, "loss": 1.0013, "step": 4880 }, { "epoch": 0.7438281011886619, "grad_norm": 0.98046875, "learning_rate": 3.2490019620627276e-05, "loss": 1.1395, "step": 4881 }, { "epoch": 0.7439804937519049, "grad_norm": 0.99609375, "learning_rate": 3.2453615655712245e-05, "loss": 0.8104, "step": 4882 }, { "epoch": 0.7441328863151478, "grad_norm": 1.0703125, "learning_rate": 3.241722814606781e-05, "loss": 0.9716, "step": 4883 }, { "epoch": 0.7442852788783907, "grad_norm": 0.82421875, "learning_rate": 3.2380857100558494e-05, "loss": 0.8168, "step": 4884 }, { "epoch": 0.7444376714416336, "grad_norm": 0.8828125, "learning_rate": 3.23445025280447e-05, "loss": 0.9236, "step": 4885 }, { "epoch": 0.7445900640048766, "grad_norm": 0.78125, "learning_rate": 3.230816443738302e-05, "loss": 0.9611, "step": 4886 }, { "epoch": 0.7447424565681194, "grad_norm": 0.85546875, "learning_rate": 3.227184283742591e-05, "loss": 1.0378, "step": 4887 }, { "epoch": 0.7448948491313624, "grad_norm": 0.828125, "learning_rate": 3.223553773702181e-05, "loss": 1.011, "step": 4888 }, { "epoch": 0.7450472416946053, "grad_norm": 1.1640625, "learning_rate": 3.219924914501521e-05, "loss": 1.087, "step": 4889 }, { "epoch": 0.7451996342578482, "grad_norm": 1.078125, "learning_rate": 3.216297707024655e-05, "loss": 1.1362, "step": 4890 }, { "epoch": 0.7453520268210911, "grad_norm": 0.86328125, "learning_rate": 3.212672152155212e-05, "loss": 1.0017, "step": 4891 }, { "epoch": 0.7455044193843341, "grad_norm": 1.0078125, "learning_rate": 3.2090482507764395e-05, "loss": 0.997, "step": 4892 }, { "epoch": 0.7456568119475769, "grad_norm": 0.90234375, "learning_rate": 3.205426003771169e-05, "loss": 0.9873, "step": 4893 }, { "epoch": 0.7458092045108199, "grad_norm": 0.86328125, "learning_rate": 3.201805412021829e-05, "loss": 0.8538, "step": 4894 }, { "epoch": 0.7459615970740627, "grad_norm": 0.62109375, "learning_rate": 3.198186476410453e-05, "loss": 0.9236, "step": 4895 }, { "epoch": 0.7461139896373057, "grad_norm": 0.859375, "learning_rate": 3.194569197818663e-05, "loss": 0.9444, "step": 4896 }, { "epoch": 0.7462663822005486, "grad_norm": 1.1015625, "learning_rate": 3.190953577127681e-05, "loss": 0.9423, "step": 4897 }, { "epoch": 0.7464187747637915, "grad_norm": 1.1796875, "learning_rate": 3.187339615218323e-05, "loss": 1.0166, "step": 4898 }, { "epoch": 0.7465711673270344, "grad_norm": 0.8046875, "learning_rate": 3.183727312971e-05, "loss": 0.8722, "step": 4899 }, { "epoch": 0.7467235598902774, "grad_norm": 0.7890625, "learning_rate": 3.180116671265726e-05, "loss": 0.9405, "step": 4900 }, { "epoch": 0.7468759524535202, "grad_norm": 0.88671875, "learning_rate": 3.176507690982106e-05, "loss": 1.0042, "step": 4901 }, { "epoch": 0.7470283450167632, "grad_norm": 0.98046875, "learning_rate": 3.172900372999329e-05, "loss": 0.9039, "step": 4902 }, { "epoch": 0.7471807375800061, "grad_norm": 1.1328125, "learning_rate": 3.1692947181961994e-05, "loss": 0.9215, "step": 4903 }, { "epoch": 0.747333130143249, "grad_norm": 1.0703125, "learning_rate": 3.165690727451105e-05, "loss": 0.9205, "step": 4904 }, { "epoch": 0.7474855227064919, "grad_norm": 0.8984375, "learning_rate": 3.162088401642025e-05, "loss": 1.0675, "step": 4905 }, { "epoch": 0.7476379152697349, "grad_norm": 1.0078125, "learning_rate": 3.158487741646545e-05, "loss": 1.0028, "step": 4906 }, { "epoch": 0.7477903078329777, "grad_norm": 0.9609375, "learning_rate": 3.154888748341836e-05, "loss": 0.991, "step": 4907 }, { "epoch": 0.7479427003962207, "grad_norm": 0.78125, "learning_rate": 3.151291422604658e-05, "loss": 1.0813, "step": 4908 }, { "epoch": 0.7480950929594635, "grad_norm": 1.015625, "learning_rate": 3.147695765311377e-05, "loss": 1.1543, "step": 4909 }, { "epoch": 0.7482474855227065, "grad_norm": 0.96875, "learning_rate": 3.144101777337942e-05, "loss": 0.7955, "step": 4910 }, { "epoch": 0.7483998780859494, "grad_norm": 1.234375, "learning_rate": 3.140509459559908e-05, "loss": 0.8962, "step": 4911 }, { "epoch": 0.7485522706491923, "grad_norm": 0.88671875, "learning_rate": 3.136918812852413e-05, "loss": 0.9092, "step": 4912 }, { "epoch": 0.7487046632124352, "grad_norm": 0.921875, "learning_rate": 3.1333298380901834e-05, "loss": 1.0314, "step": 4913 }, { "epoch": 0.7488570557756782, "grad_norm": 0.81640625, "learning_rate": 3.129742536147551e-05, "loss": 0.8464, "step": 4914 }, { "epoch": 0.749009448338921, "grad_norm": 0.7890625, "learning_rate": 3.1261569078984346e-05, "loss": 0.9195, "step": 4915 }, { "epoch": 0.749161840902164, "grad_norm": 0.8046875, "learning_rate": 3.122572954216338e-05, "loss": 0.9739, "step": 4916 }, { "epoch": 0.7493142334654069, "grad_norm": 1.3671875, "learning_rate": 3.1189906759743736e-05, "loss": 1.1513, "step": 4917 }, { "epoch": 0.7494666260286498, "grad_norm": 0.9921875, "learning_rate": 3.1154100740452344e-05, "loss": 1.0455, "step": 4918 }, { "epoch": 0.7496190185918927, "grad_norm": 1.1015625, "learning_rate": 3.111831149301196e-05, "loss": 0.9805, "step": 4919 }, { "epoch": 0.7497714111551357, "grad_norm": 0.87890625, "learning_rate": 3.1082539026141476e-05, "loss": 0.7946, "step": 4920 }, { "epoch": 0.7499238037183785, "grad_norm": 0.7578125, "learning_rate": 3.104678334855553e-05, "loss": 0.8935, "step": 4921 }, { "epoch": 0.7500761962816215, "grad_norm": 1.0078125, "learning_rate": 3.10110444689647e-05, "loss": 0.8566, "step": 4922 }, { "epoch": 0.7502285888448643, "grad_norm": 0.9140625, "learning_rate": 3.0975322396075535e-05, "loss": 0.928, "step": 4923 }, { "epoch": 0.7503809814081073, "grad_norm": 0.9140625, "learning_rate": 3.093961713859047e-05, "loss": 0.8861, "step": 4924 }, { "epoch": 0.7505333739713502, "grad_norm": 1.09375, "learning_rate": 3.0903928705207706e-05, "loss": 1.0674, "step": 4925 }, { "epoch": 0.7506857665345931, "grad_norm": 0.96484375, "learning_rate": 3.0868257104621565e-05, "loss": 0.8418, "step": 4926 }, { "epoch": 0.750838159097836, "grad_norm": 0.78125, "learning_rate": 3.08326023455221e-05, "loss": 0.904, "step": 4927 }, { "epoch": 0.750990551661079, "grad_norm": 1.1015625, "learning_rate": 3.079696443659538e-05, "loss": 0.8269, "step": 4928 }, { "epoch": 0.7511429442243218, "grad_norm": 1.0703125, "learning_rate": 3.07613433865233e-05, "loss": 1.1142, "step": 4929 }, { "epoch": 0.7512953367875648, "grad_norm": 0.95703125, "learning_rate": 3.072573920398358e-05, "loss": 0.8424, "step": 4930 }, { "epoch": 0.7514477293508077, "grad_norm": 1.0078125, "learning_rate": 3.069015189765001e-05, "loss": 1.0043, "step": 4931 }, { "epoch": 0.7516001219140506, "grad_norm": 1.203125, "learning_rate": 3.0654581476192136e-05, "loss": 1.0522, "step": 4932 }, { "epoch": 0.7517525144772935, "grad_norm": 1.0546875, "learning_rate": 3.061902794827538e-05, "loss": 1.0019, "step": 4933 }, { "epoch": 0.7519049070405365, "grad_norm": 0.8515625, "learning_rate": 3.058349132256116e-05, "loss": 1.1072, "step": 4934 }, { "epoch": 0.7520572996037793, "grad_norm": 1.2734375, "learning_rate": 3.0547971607706716e-05, "loss": 0.9765, "step": 4935 }, { "epoch": 0.7522096921670223, "grad_norm": 1.0703125, "learning_rate": 3.051246881236507e-05, "loss": 1.0949, "step": 4936 }, { "epoch": 0.7523620847302651, "grad_norm": 0.88671875, "learning_rate": 3.0476982945185296e-05, "loss": 1.0202, "step": 4937 }, { "epoch": 0.7525144772935081, "grad_norm": 0.84375, "learning_rate": 3.0441514014812233e-05, "loss": 0.8548, "step": 4938 }, { "epoch": 0.752666869856751, "grad_norm": 0.9140625, "learning_rate": 3.0406062029886605e-05, "loss": 0.9647, "step": 4939 }, { "epoch": 0.7528192624199939, "grad_norm": 0.96875, "learning_rate": 3.0370626999045093e-05, "loss": 1.0265, "step": 4940 }, { "epoch": 0.7529716549832368, "grad_norm": 1.34375, "learning_rate": 3.033520893092011e-05, "loss": 0.816, "step": 4941 }, { "epoch": 0.7531240475464798, "grad_norm": 0.83984375, "learning_rate": 3.0299807834140005e-05, "loss": 0.8656, "step": 4942 }, { "epoch": 0.7532764401097226, "grad_norm": 0.92578125, "learning_rate": 3.026442371732904e-05, "loss": 0.9518, "step": 4943 }, { "epoch": 0.7534288326729656, "grad_norm": 0.7890625, "learning_rate": 3.0229056589107242e-05, "loss": 0.9687, "step": 4944 }, { "epoch": 0.7535812252362085, "grad_norm": 0.8515625, "learning_rate": 3.019370645809061e-05, "loss": 0.9355, "step": 4945 }, { "epoch": 0.7537336177994514, "grad_norm": 1.140625, "learning_rate": 3.0158373332890954e-05, "loss": 1.1312, "step": 4946 }, { "epoch": 0.7538860103626943, "grad_norm": 1.1015625, "learning_rate": 3.0123057222115836e-05, "loss": 1.0962, "step": 4947 }, { "epoch": 0.7540384029259373, "grad_norm": 0.72265625, "learning_rate": 3.008775813436886e-05, "loss": 0.8992, "step": 4948 }, { "epoch": 0.7541907954891801, "grad_norm": 0.91796875, "learning_rate": 3.005247607824936e-05, "loss": 0.9001, "step": 4949 }, { "epoch": 0.7543431880524231, "grad_norm": 0.88671875, "learning_rate": 3.0017211062352502e-05, "loss": 0.8396, "step": 4950 }, { "epoch": 0.7544955806156659, "grad_norm": 0.79296875, "learning_rate": 2.998196309526945e-05, "loss": 1.0568, "step": 4951 }, { "epoch": 0.7546479731789089, "grad_norm": 1.046875, "learning_rate": 2.994673218558709e-05, "loss": 1.0932, "step": 4952 }, { "epoch": 0.7548003657421518, "grad_norm": 0.91796875, "learning_rate": 2.991151834188809e-05, "loss": 1.0515, "step": 4953 }, { "epoch": 0.7549527583053947, "grad_norm": 1.0078125, "learning_rate": 2.9876321572751144e-05, "loss": 1.0198, "step": 4954 }, { "epoch": 0.7551051508686376, "grad_norm": 0.76953125, "learning_rate": 2.9841141886750668e-05, "loss": 0.981, "step": 4955 }, { "epoch": 0.7552575434318806, "grad_norm": 0.75, "learning_rate": 2.9805979292456887e-05, "loss": 0.8377, "step": 4956 }, { "epoch": 0.7554099359951234, "grad_norm": 1.0625, "learning_rate": 2.9770833798436028e-05, "loss": 0.9005, "step": 4957 }, { "epoch": 0.7555623285583664, "grad_norm": 0.79296875, "learning_rate": 2.9735705413249937e-05, "loss": 0.9757, "step": 4958 }, { "epoch": 0.7557147211216093, "grad_norm": 0.89453125, "learning_rate": 2.9700594145456396e-05, "loss": 1.0022, "step": 4959 }, { "epoch": 0.7558671136848522, "grad_norm": 1.046875, "learning_rate": 2.966550000360907e-05, "loss": 1.0877, "step": 4960 }, { "epoch": 0.7560195062480951, "grad_norm": 0.75, "learning_rate": 2.9630422996257344e-05, "loss": 0.8486, "step": 4961 }, { "epoch": 0.7561718988113381, "grad_norm": 0.9453125, "learning_rate": 2.959536313194655e-05, "loss": 0.8774, "step": 4962 }, { "epoch": 0.7563242913745809, "grad_norm": 1.0546875, "learning_rate": 2.9560320419217758e-05, "loss": 0.9633, "step": 4963 }, { "epoch": 0.7564766839378239, "grad_norm": 0.9609375, "learning_rate": 2.9525294866607787e-05, "loss": 1.0416, "step": 4964 }, { "epoch": 0.7566290765010667, "grad_norm": 1.046875, "learning_rate": 2.9490286482649466e-05, "loss": 0.9966, "step": 4965 }, { "epoch": 0.7567814690643097, "grad_norm": 0.83984375, "learning_rate": 2.9455295275871298e-05, "loss": 1.055, "step": 4966 }, { "epoch": 0.7569338616275526, "grad_norm": 1.0859375, "learning_rate": 2.9420321254797635e-05, "loss": 0.8891, "step": 4967 }, { "epoch": 0.7570862541907954, "grad_norm": 1.265625, "learning_rate": 2.9385364427948716e-05, "loss": 1.023, "step": 4968 }, { "epoch": 0.7572386467540384, "grad_norm": 1.1875, "learning_rate": 2.935042480384046e-05, "loss": 0.8427, "step": 4969 }, { "epoch": 0.7573910393172814, "grad_norm": 1.09375, "learning_rate": 2.9315502390984638e-05, "loss": 0.9583, "step": 4970 }, { "epoch": 0.7575434318805242, "grad_norm": 1.0234375, "learning_rate": 2.9280597197888937e-05, "loss": 1.1303, "step": 4971 }, { "epoch": 0.7576958244437672, "grad_norm": 1.1484375, "learning_rate": 2.9245709233056716e-05, "loss": 1.2296, "step": 4972 }, { "epoch": 0.7578482170070101, "grad_norm": 0.859375, "learning_rate": 2.921083850498717e-05, "loss": 0.7114, "step": 4973 }, { "epoch": 0.758000609570253, "grad_norm": 0.7578125, "learning_rate": 2.917598502217538e-05, "loss": 0.8751, "step": 4974 }, { "epoch": 0.7581530021334959, "grad_norm": 0.8203125, "learning_rate": 2.914114879311205e-05, "loss": 1.0313, "step": 4975 }, { "epoch": 0.7583053946967387, "grad_norm": 0.921875, "learning_rate": 2.9106329826283875e-05, "loss": 0.9587, "step": 4976 }, { "epoch": 0.7584577872599817, "grad_norm": 0.97265625, "learning_rate": 2.907152813017322e-05, "loss": 0.9297, "step": 4977 }, { "epoch": 0.7586101798232247, "grad_norm": 1.046875, "learning_rate": 2.9036743713258253e-05, "loss": 1.0747, "step": 4978 }, { "epoch": 0.7587625723864675, "grad_norm": 0.80078125, "learning_rate": 2.9001976584013024e-05, "loss": 0.8577, "step": 4979 }, { "epoch": 0.7589149649497104, "grad_norm": 1.1015625, "learning_rate": 2.8967226750907295e-05, "loss": 1.2111, "step": 4980 }, { "epoch": 0.7590673575129534, "grad_norm": 1.0390625, "learning_rate": 2.8932494222406537e-05, "loss": 0.8531, "step": 4981 }, { "epoch": 0.7592197500761962, "grad_norm": 1.109375, "learning_rate": 2.889777900697218e-05, "loss": 1.079, "step": 4982 }, { "epoch": 0.7593721426394392, "grad_norm": 0.953125, "learning_rate": 2.886308111306133e-05, "loss": 0.9966, "step": 4983 }, { "epoch": 0.7595245352026821, "grad_norm": 1.0546875, "learning_rate": 2.8828400549126865e-05, "loss": 1.0628, "step": 4984 }, { "epoch": 0.759676927765925, "grad_norm": 0.9375, "learning_rate": 2.8793737323617553e-05, "loss": 1.0038, "step": 4985 }, { "epoch": 0.759829320329168, "grad_norm": 0.828125, "learning_rate": 2.875909144497776e-05, "loss": 0.9594, "step": 4986 }, { "epoch": 0.7599817128924109, "grad_norm": 0.71484375, "learning_rate": 2.872446292164771e-05, "loss": 0.9232, "step": 4987 }, { "epoch": 0.7601341054556537, "grad_norm": 0.96875, "learning_rate": 2.8689851762063502e-05, "loss": 0.9709, "step": 4988 }, { "epoch": 0.7602864980188967, "grad_norm": 0.94921875, "learning_rate": 2.8655257974656857e-05, "loss": 0.8454, "step": 4989 }, { "epoch": 0.7604388905821395, "grad_norm": 0.87109375, "learning_rate": 2.862068156785529e-05, "loss": 0.9539, "step": 4990 }, { "epoch": 0.7605912831453825, "grad_norm": 0.7734375, "learning_rate": 2.8586122550082218e-05, "loss": 1.0651, "step": 4991 }, { "epoch": 0.7607436757086254, "grad_norm": 1.125, "learning_rate": 2.855158092975657e-05, "loss": 1.0896, "step": 4992 }, { "epoch": 0.7608960682718683, "grad_norm": 0.95703125, "learning_rate": 2.8517056715293288e-05, "loss": 1.1311, "step": 4993 }, { "epoch": 0.7610484608351112, "grad_norm": 0.63671875, "learning_rate": 2.8482549915102942e-05, "loss": 0.7478, "step": 4994 }, { "epoch": 0.7612008533983542, "grad_norm": 1.015625, "learning_rate": 2.8448060537591835e-05, "loss": 0.8779, "step": 4995 }, { "epoch": 0.761353245961597, "grad_norm": 1.0859375, "learning_rate": 2.841358859116219e-05, "loss": 1.0821, "step": 4996 }, { "epoch": 0.76150563852484, "grad_norm": 1.078125, "learning_rate": 2.8379134084211755e-05, "loss": 1.0859, "step": 4997 }, { "epoch": 0.7616580310880829, "grad_norm": 0.87109375, "learning_rate": 2.834469702513415e-05, "loss": 0.9764, "step": 4998 }, { "epoch": 0.7618104236513258, "grad_norm": 0.82421875, "learning_rate": 2.831027742231881e-05, "loss": 0.9608, "step": 4999 }, { "epoch": 0.7619628162145687, "grad_norm": 0.6875, "learning_rate": 2.827587528415081e-05, "loss": 0.9631, "step": 5000 }, { "epoch": 0.7621152087778117, "grad_norm": 1.0546875, "learning_rate": 2.8241490619010956e-05, "loss": 0.9942, "step": 5001 }, { "epoch": 0.7622676013410545, "grad_norm": 1.1171875, "learning_rate": 2.8207123435275974e-05, "loss": 0.986, "step": 5002 }, { "epoch": 0.7624199939042975, "grad_norm": 0.80078125, "learning_rate": 2.817277374131808e-05, "loss": 0.9891, "step": 5003 }, { "epoch": 0.7625723864675403, "grad_norm": 0.9765625, "learning_rate": 2.8138441545505366e-05, "loss": 0.9674, "step": 5004 }, { "epoch": 0.7627247790307833, "grad_norm": 1.0078125, "learning_rate": 2.810412685620172e-05, "loss": 0.9442, "step": 5005 }, { "epoch": 0.7628771715940262, "grad_norm": 0.91796875, "learning_rate": 2.8069829681766648e-05, "loss": 0.9341, "step": 5006 }, { "epoch": 0.7630295641572691, "grad_norm": 0.85546875, "learning_rate": 2.8035550030555402e-05, "loss": 1.033, "step": 5007 }, { "epoch": 0.763181956720512, "grad_norm": 0.85546875, "learning_rate": 2.8001287910919106e-05, "loss": 0.9011, "step": 5008 }, { "epoch": 0.763334349283755, "grad_norm": 0.85546875, "learning_rate": 2.796704333120437e-05, "loss": 1.0794, "step": 5009 }, { "epoch": 0.7634867418469978, "grad_norm": 0.95703125, "learning_rate": 2.793281629975376e-05, "loss": 1.0177, "step": 5010 }, { "epoch": 0.7636391344102408, "grad_norm": 0.78515625, "learning_rate": 2.789860682490545e-05, "loss": 0.9117, "step": 5011 }, { "epoch": 0.7637915269734837, "grad_norm": 0.78125, "learning_rate": 2.786441491499332e-05, "loss": 0.8863, "step": 5012 }, { "epoch": 0.7639439195367266, "grad_norm": 0.96875, "learning_rate": 2.783024057834711e-05, "loss": 0.7992, "step": 5013 }, { "epoch": 0.7640963120999695, "grad_norm": 1.234375, "learning_rate": 2.7796083823292084e-05, "loss": 1.1892, "step": 5014 }, { "epoch": 0.7642487046632125, "grad_norm": 0.9296875, "learning_rate": 2.776194465814932e-05, "loss": 0.9516, "step": 5015 }, { "epoch": 0.7644010972264553, "grad_norm": 1.6640625, "learning_rate": 2.7727823091235662e-05, "loss": 0.9727, "step": 5016 }, { "epoch": 0.7645534897896983, "grad_norm": 1.1796875, "learning_rate": 2.7693719130863606e-05, "loss": 1.0949, "step": 5017 }, { "epoch": 0.7647058823529411, "grad_norm": 1.1484375, "learning_rate": 2.7659632785341304e-05, "loss": 1.1829, "step": 5018 }, { "epoch": 0.7648582749161841, "grad_norm": 1.1171875, "learning_rate": 2.7625564062972797e-05, "loss": 0.9409, "step": 5019 }, { "epoch": 0.765010667479427, "grad_norm": 0.84765625, "learning_rate": 2.759151297205762e-05, "loss": 1.0116, "step": 5020 }, { "epoch": 0.7651630600426699, "grad_norm": 0.76171875, "learning_rate": 2.7557479520891104e-05, "loss": 0.8327, "step": 5021 }, { "epoch": 0.7653154526059128, "grad_norm": 1.0546875, "learning_rate": 2.7523463717764353e-05, "loss": 1.0704, "step": 5022 }, { "epoch": 0.7654678451691558, "grad_norm": 0.71875, "learning_rate": 2.748946557096407e-05, "loss": 0.7799, "step": 5023 }, { "epoch": 0.7656202377323986, "grad_norm": 1.0078125, "learning_rate": 2.7455485088772692e-05, "loss": 0.8416, "step": 5024 }, { "epoch": 0.7657726302956416, "grad_norm": 1.0, "learning_rate": 2.742152227946836e-05, "loss": 1.058, "step": 5025 }, { "epoch": 0.7659250228588845, "grad_norm": 1.1484375, "learning_rate": 2.738757715132486e-05, "loss": 0.9618, "step": 5026 }, { "epoch": 0.7660774154221274, "grad_norm": 0.69140625, "learning_rate": 2.7353649712611786e-05, "loss": 0.8513, "step": 5027 }, { "epoch": 0.7662298079853703, "grad_norm": 0.80078125, "learning_rate": 2.7319739971594314e-05, "loss": 0.8995, "step": 5028 }, { "epoch": 0.7663822005486133, "grad_norm": 0.80859375, "learning_rate": 2.7285847936533326e-05, "loss": 1.0247, "step": 5029 }, { "epoch": 0.7665345931118561, "grad_norm": 0.875, "learning_rate": 2.7251973615685476e-05, "loss": 0.887, "step": 5030 }, { "epoch": 0.7666869856750991, "grad_norm": 0.80859375, "learning_rate": 2.7218117017302968e-05, "loss": 0.911, "step": 5031 }, { "epoch": 0.7668393782383419, "grad_norm": 0.93359375, "learning_rate": 2.7184278149633737e-05, "loss": 0.9705, "step": 5032 }, { "epoch": 0.7669917708015849, "grad_norm": 0.7265625, "learning_rate": 2.7150457020921493e-05, "loss": 0.7977, "step": 5033 }, { "epoch": 0.7671441633648278, "grad_norm": 0.8984375, "learning_rate": 2.711665363940552e-05, "loss": 0.8445, "step": 5034 }, { "epoch": 0.7672965559280707, "grad_norm": 1.1796875, "learning_rate": 2.7082868013320762e-05, "loss": 1.182, "step": 5035 }, { "epoch": 0.7674489484913136, "grad_norm": 1.0859375, "learning_rate": 2.704910015089799e-05, "loss": 0.8797, "step": 5036 }, { "epoch": 0.7676013410545566, "grad_norm": 0.9375, "learning_rate": 2.7015350060363443e-05, "loss": 0.921, "step": 5037 }, { "epoch": 0.7677537336177994, "grad_norm": 0.57421875, "learning_rate": 2.6981617749939125e-05, "loss": 0.7819, "step": 5038 }, { "epoch": 0.7679061261810424, "grad_norm": 0.9375, "learning_rate": 2.6947903227842774e-05, "loss": 1.0812, "step": 5039 }, { "epoch": 0.7680585187442853, "grad_norm": 0.8671875, "learning_rate": 2.6914206502287685e-05, "loss": 0.9907, "step": 5040 }, { "epoch": 0.7682109113075282, "grad_norm": 0.73046875, "learning_rate": 2.688052758148294e-05, "loss": 0.9006, "step": 5041 }, { "epoch": 0.7683633038707711, "grad_norm": 0.9609375, "learning_rate": 2.6846866473633125e-05, "loss": 1.0144, "step": 5042 }, { "epoch": 0.7685156964340141, "grad_norm": 1.0, "learning_rate": 2.6813223186938564e-05, "loss": 1.0348, "step": 5043 }, { "epoch": 0.7686680889972569, "grad_norm": 0.7421875, "learning_rate": 2.6779597729595318e-05, "loss": 0.9642, "step": 5044 }, { "epoch": 0.7688204815604999, "grad_norm": 1.1328125, "learning_rate": 2.6745990109794983e-05, "loss": 1.2408, "step": 5045 }, { "epoch": 0.7689728741237427, "grad_norm": 1.1484375, "learning_rate": 2.671240033572484e-05, "loss": 0.8574, "step": 5046 }, { "epoch": 0.7691252666869857, "grad_norm": 0.8515625, "learning_rate": 2.6678828415567936e-05, "loss": 1.0381, "step": 5047 }, { "epoch": 0.7692776592502286, "grad_norm": 1.1875, "learning_rate": 2.664527435750278e-05, "loss": 0.7548, "step": 5048 }, { "epoch": 0.7694300518134715, "grad_norm": 0.7421875, "learning_rate": 2.6611738169703615e-05, "loss": 0.9841, "step": 5049 }, { "epoch": 0.7695824443767144, "grad_norm": 1.4140625, "learning_rate": 2.6578219860340402e-05, "loss": 0.938, "step": 5050 }, { "epoch": 0.7697348369399574, "grad_norm": 1.109375, "learning_rate": 2.654471943757866e-05, "loss": 1.1357, "step": 5051 }, { "epoch": 0.7698872295032002, "grad_norm": 0.8828125, "learning_rate": 2.6511236909579574e-05, "loss": 0.8925, "step": 5052 }, { "epoch": 0.7700396220664432, "grad_norm": 0.98828125, "learning_rate": 2.6477772284499945e-05, "loss": 1.1052, "step": 5053 }, { "epoch": 0.7701920146296861, "grad_norm": 0.80078125, "learning_rate": 2.6444325570492277e-05, "loss": 0.9229, "step": 5054 }, { "epoch": 0.770344407192929, "grad_norm": 0.90234375, "learning_rate": 2.6410896775704608e-05, "loss": 0.9983, "step": 5055 }, { "epoch": 0.7704967997561719, "grad_norm": 1.046875, "learning_rate": 2.6377485908280753e-05, "loss": 1.0623, "step": 5056 }, { "epoch": 0.7706491923194149, "grad_norm": 0.84375, "learning_rate": 2.6344092976360003e-05, "loss": 0.8559, "step": 5057 }, { "epoch": 0.7708015848826577, "grad_norm": 0.81640625, "learning_rate": 2.631071798807746e-05, "loss": 0.9237, "step": 5058 }, { "epoch": 0.7709539774459007, "grad_norm": 0.9296875, "learning_rate": 2.6277360951563658e-05, "loss": 0.7677, "step": 5059 }, { "epoch": 0.7711063700091435, "grad_norm": 0.9765625, "learning_rate": 2.6244021874944847e-05, "loss": 0.8607, "step": 5060 }, { "epoch": 0.7712587625723865, "grad_norm": 0.93359375, "learning_rate": 2.6210700766342965e-05, "loss": 0.8507, "step": 5061 }, { "epoch": 0.7714111551356294, "grad_norm": 0.76171875, "learning_rate": 2.617739763387549e-05, "loss": 0.9342, "step": 5062 }, { "epoch": 0.7715635476988723, "grad_norm": 0.8984375, "learning_rate": 2.6144112485655504e-05, "loss": 1.0725, "step": 5063 }, { "epoch": 0.7717159402621152, "grad_norm": 1.2421875, "learning_rate": 2.6110845329791845e-05, "loss": 0.9556, "step": 5064 }, { "epoch": 0.7718683328253582, "grad_norm": 1.046875, "learning_rate": 2.6077596174388785e-05, "loss": 0.9208, "step": 5065 }, { "epoch": 0.772020725388601, "grad_norm": 0.8125, "learning_rate": 2.6044365027546293e-05, "loss": 0.9772, "step": 5066 }, { "epoch": 0.772173117951844, "grad_norm": 0.87890625, "learning_rate": 2.6011151897359998e-05, "loss": 0.9316, "step": 5067 }, { "epoch": 0.7723255105150869, "grad_norm": 0.98828125, "learning_rate": 2.5977956791921088e-05, "loss": 0.992, "step": 5068 }, { "epoch": 0.7724779030783298, "grad_norm": 0.96875, "learning_rate": 2.594477971931636e-05, "loss": 0.941, "step": 5069 }, { "epoch": 0.7726302956415727, "grad_norm": 1.03125, "learning_rate": 2.5911620687628224e-05, "loss": 1.2294, "step": 5070 }, { "epoch": 0.7727826882048157, "grad_norm": 0.99609375, "learning_rate": 2.58784797049347e-05, "loss": 0.9939, "step": 5071 }, { "epoch": 0.7729350807680585, "grad_norm": 0.8515625, "learning_rate": 2.584535677930937e-05, "loss": 0.8934, "step": 5072 }, { "epoch": 0.7730874733313015, "grad_norm": 1.0078125, "learning_rate": 2.5812251918821527e-05, "loss": 0.9248, "step": 5073 }, { "epoch": 0.7732398658945443, "grad_norm": 1.171875, "learning_rate": 2.5779165131535922e-05, "loss": 0.917, "step": 5074 }, { "epoch": 0.7733922584577873, "grad_norm": 1.4453125, "learning_rate": 2.5746096425513066e-05, "loss": 1.2046, "step": 5075 }, { "epoch": 0.7735446510210302, "grad_norm": 1.0078125, "learning_rate": 2.571304580880889e-05, "loss": 1.0937, "step": 5076 }, { "epoch": 0.773697043584273, "grad_norm": 1.15625, "learning_rate": 2.568001328947498e-05, "loss": 1.1976, "step": 5077 }, { "epoch": 0.773849436147516, "grad_norm": 0.83984375, "learning_rate": 2.5646998875558613e-05, "loss": 1.0265, "step": 5078 }, { "epoch": 0.774001828710759, "grad_norm": 0.8125, "learning_rate": 2.5614002575102535e-05, "loss": 1.0045, "step": 5079 }, { "epoch": 0.7741542212740018, "grad_norm": 1.1796875, "learning_rate": 2.5581024396145116e-05, "loss": 1.1073, "step": 5080 }, { "epoch": 0.7743066138372448, "grad_norm": 1.03125, "learning_rate": 2.5548064346720315e-05, "loss": 0.9603, "step": 5081 }, { "epoch": 0.7744590064004877, "grad_norm": 0.890625, "learning_rate": 2.5515122434857685e-05, "loss": 1.0069, "step": 5082 }, { "epoch": 0.7746113989637305, "grad_norm": 1.078125, "learning_rate": 2.54821986685823e-05, "loss": 1.2146, "step": 5083 }, { "epoch": 0.7747637915269735, "grad_norm": 1.03125, "learning_rate": 2.5449293055914947e-05, "loss": 1.1336, "step": 5084 }, { "epoch": 0.7749161840902163, "grad_norm": 1.0625, "learning_rate": 2.5416405604871862e-05, "loss": 1.0927, "step": 5085 }, { "epoch": 0.7750685766534593, "grad_norm": 0.8203125, "learning_rate": 2.5383536323464895e-05, "loss": 1.0455, "step": 5086 }, { "epoch": 0.7752209692167022, "grad_norm": 0.8515625, "learning_rate": 2.5350685219701488e-05, "loss": 1.0843, "step": 5087 }, { "epoch": 0.7753733617799451, "grad_norm": 0.984375, "learning_rate": 2.5317852301584643e-05, "loss": 0.7811, "step": 5088 }, { "epoch": 0.775525754343188, "grad_norm": 1.40625, "learning_rate": 2.5285037577112892e-05, "loss": 0.9304, "step": 5089 }, { "epoch": 0.775678146906431, "grad_norm": 0.84375, "learning_rate": 2.5252241054280433e-05, "loss": 0.9264, "step": 5090 }, { "epoch": 0.7758305394696738, "grad_norm": 0.81640625, "learning_rate": 2.5219462741076926e-05, "loss": 0.9387, "step": 5091 }, { "epoch": 0.7759829320329168, "grad_norm": 0.87890625, "learning_rate": 2.5186702645487715e-05, "loss": 0.9646, "step": 5092 }, { "epoch": 0.7761353245961597, "grad_norm": 0.81640625, "learning_rate": 2.5153960775493535e-05, "loss": 1.0495, "step": 5093 }, { "epoch": 0.7762877171594026, "grad_norm": 1.21875, "learning_rate": 2.5121237139070796e-05, "loss": 1.1074, "step": 5094 }, { "epoch": 0.7764401097226455, "grad_norm": 0.828125, "learning_rate": 2.508853174419149e-05, "loss": 1.0308, "step": 5095 }, { "epoch": 0.7765925022858885, "grad_norm": 1.1171875, "learning_rate": 2.5055844598823107e-05, "loss": 1.0565, "step": 5096 }, { "epoch": 0.7767448948491313, "grad_norm": 0.91796875, "learning_rate": 2.502317571092869e-05, "loss": 0.9266, "step": 5097 }, { "epoch": 0.7768972874123743, "grad_norm": 1.0859375, "learning_rate": 2.4990525088466854e-05, "loss": 0.9448, "step": 5098 }, { "epoch": 0.7770496799756171, "grad_norm": 1.046875, "learning_rate": 2.4957892739391765e-05, "loss": 1.0666, "step": 5099 }, { "epoch": 0.7772020725388601, "grad_norm": 1.0078125, "learning_rate": 2.4925278671653095e-05, "loss": 1.129, "step": 5100 }, { "epoch": 0.777354465102103, "grad_norm": 0.94921875, "learning_rate": 2.4892682893196172e-05, "loss": 0.9961, "step": 5101 }, { "epoch": 0.7775068576653459, "grad_norm": 1.0546875, "learning_rate": 2.4860105411961755e-05, "loss": 1.0352, "step": 5102 }, { "epoch": 0.7776592502285888, "grad_norm": 1.0703125, "learning_rate": 2.482754623588619e-05, "loss": 1.0483, "step": 5103 }, { "epoch": 0.7778116427918318, "grad_norm": 1.2265625, "learning_rate": 2.479500537290137e-05, "loss": 1.0268, "step": 5104 }, { "epoch": 0.7779640353550746, "grad_norm": 0.8984375, "learning_rate": 2.4762482830934696e-05, "loss": 0.9597, "step": 5105 }, { "epoch": 0.7781164279183176, "grad_norm": 1.125, "learning_rate": 2.472997861790912e-05, "loss": 0.9897, "step": 5106 }, { "epoch": 0.7782688204815605, "grad_norm": 1.25, "learning_rate": 2.4697492741743177e-05, "loss": 1.0302, "step": 5107 }, { "epoch": 0.7784212130448034, "grad_norm": 0.89453125, "learning_rate": 2.4665025210350878e-05, "loss": 1.03, "step": 5108 }, { "epoch": 0.7785736056080463, "grad_norm": 0.84765625, "learning_rate": 2.4632576031641762e-05, "loss": 0.954, "step": 5109 }, { "epoch": 0.7787259981712893, "grad_norm": 1.046875, "learning_rate": 2.460014521352094e-05, "loss": 1.0685, "step": 5110 }, { "epoch": 0.7788783907345321, "grad_norm": 0.98828125, "learning_rate": 2.4567732763888985e-05, "loss": 1.0016, "step": 5111 }, { "epoch": 0.7790307832977751, "grad_norm": 0.76171875, "learning_rate": 2.4535338690642086e-05, "loss": 0.9352, "step": 5112 }, { "epoch": 0.7791831758610179, "grad_norm": 0.8125, "learning_rate": 2.4502963001671885e-05, "loss": 0.8366, "step": 5113 }, { "epoch": 0.7793355684242609, "grad_norm": 0.91796875, "learning_rate": 2.4470605704865568e-05, "loss": 0.9767, "step": 5114 }, { "epoch": 0.7794879609875038, "grad_norm": 0.85546875, "learning_rate": 2.4438266808105837e-05, "loss": 0.8985, "step": 5115 }, { "epoch": 0.7796403535507467, "grad_norm": 0.91015625, "learning_rate": 2.44059463192709e-05, "loss": 0.8357, "step": 5116 }, { "epoch": 0.7797927461139896, "grad_norm": 0.921875, "learning_rate": 2.4373644246234484e-05, "loss": 0.8885, "step": 5117 }, { "epoch": 0.7799451386772326, "grad_norm": 0.87109375, "learning_rate": 2.434136059686587e-05, "loss": 0.8839, "step": 5118 }, { "epoch": 0.7800975312404754, "grad_norm": 1.109375, "learning_rate": 2.4309095379029812e-05, "loss": 1.0336, "step": 5119 }, { "epoch": 0.7802499238037184, "grad_norm": 0.8359375, "learning_rate": 2.4276848600586576e-05, "loss": 0.7684, "step": 5120 }, { "epoch": 0.7804023163669613, "grad_norm": 0.83984375, "learning_rate": 2.4244620269391947e-05, "loss": 0.9259, "step": 5121 }, { "epoch": 0.7805547089302042, "grad_norm": 0.95703125, "learning_rate": 2.4212410393297156e-05, "loss": 1.0129, "step": 5122 }, { "epoch": 0.7807071014934471, "grad_norm": 0.78125, "learning_rate": 2.4180218980149083e-05, "loss": 0.964, "step": 5123 }, { "epoch": 0.7808594940566901, "grad_norm": 0.85546875, "learning_rate": 2.4148046037789974e-05, "loss": 0.7266, "step": 5124 }, { "epoch": 0.7810118866199329, "grad_norm": 0.88671875, "learning_rate": 2.4115891574057613e-05, "loss": 1.0919, "step": 5125 }, { "epoch": 0.7811642791831759, "grad_norm": 0.8359375, "learning_rate": 2.4083755596785294e-05, "loss": 1.0922, "step": 5126 }, { "epoch": 0.7813166717464187, "grad_norm": 0.859375, "learning_rate": 2.4051638113801823e-05, "loss": 0.9595, "step": 5127 }, { "epoch": 0.7814690643096617, "grad_norm": 1.109375, "learning_rate": 2.4019539132931422e-05, "loss": 1.0256, "step": 5128 }, { "epoch": 0.7816214568729046, "grad_norm": 1.1953125, "learning_rate": 2.3987458661993943e-05, "loss": 1.0896, "step": 5129 }, { "epoch": 0.7817738494361475, "grad_norm": 1.0078125, "learning_rate": 2.395539670880461e-05, "loss": 0.9387, "step": 5130 }, { "epoch": 0.7819262419993904, "grad_norm": 0.97265625, "learning_rate": 2.3923353281174186e-05, "loss": 0.9758, "step": 5131 }, { "epoch": 0.7820786345626334, "grad_norm": 1.3125, "learning_rate": 2.3891328386908895e-05, "loss": 1.0883, "step": 5132 }, { "epoch": 0.7822310271258762, "grad_norm": 0.98046875, "learning_rate": 2.3859322033810483e-05, "loss": 0.9149, "step": 5133 }, { "epoch": 0.7823834196891192, "grad_norm": 1.5703125, "learning_rate": 2.3827334229676112e-05, "loss": 1.095, "step": 5134 }, { "epoch": 0.7825358122523621, "grad_norm": 0.984375, "learning_rate": 2.379536498229853e-05, "loss": 0.8625, "step": 5135 }, { "epoch": 0.782688204815605, "grad_norm": 1.0546875, "learning_rate": 2.376341429946588e-05, "loss": 1.0646, "step": 5136 }, { "epoch": 0.7828405973788479, "grad_norm": 1.0703125, "learning_rate": 2.3731482188961818e-05, "loss": 1.0837, "step": 5137 }, { "epoch": 0.7829929899420909, "grad_norm": 0.84765625, "learning_rate": 2.3699568658565452e-05, "loss": 0.7698, "step": 5138 }, { "epoch": 0.7831453825053337, "grad_norm": 0.83984375, "learning_rate": 2.3667673716051353e-05, "loss": 0.7907, "step": 5139 }, { "epoch": 0.7832977750685767, "grad_norm": 0.92578125, "learning_rate": 2.3635797369189637e-05, "loss": 1.1927, "step": 5140 }, { "epoch": 0.7834501676318195, "grad_norm": 0.88671875, "learning_rate": 2.360393962574582e-05, "loss": 0.8848, "step": 5141 }, { "epoch": 0.7836025601950625, "grad_norm": 0.91015625, "learning_rate": 2.3572100493480908e-05, "loss": 0.9851, "step": 5142 }, { "epoch": 0.7837549527583054, "grad_norm": 0.80078125, "learning_rate": 2.354027998015138e-05, "loss": 0.9721, "step": 5143 }, { "epoch": 0.7839073453215483, "grad_norm": 0.765625, "learning_rate": 2.3508478093509144e-05, "loss": 0.9313, "step": 5144 }, { "epoch": 0.7840597378847912, "grad_norm": 0.8359375, "learning_rate": 2.3476694841301596e-05, "loss": 0.8687, "step": 5145 }, { "epoch": 0.7842121304480342, "grad_norm": 0.79296875, "learning_rate": 2.3444930231271635e-05, "loss": 0.8749, "step": 5146 }, { "epoch": 0.784364523011277, "grad_norm": 0.81640625, "learning_rate": 2.341318427115754e-05, "loss": 0.8786, "step": 5147 }, { "epoch": 0.78451691557452, "grad_norm": 1.09375, "learning_rate": 2.3381456968693105e-05, "loss": 0.9679, "step": 5148 }, { "epoch": 0.7846693081377629, "grad_norm": 0.95703125, "learning_rate": 2.3349748331607547e-05, "loss": 0.9929, "step": 5149 }, { "epoch": 0.7848217007010058, "grad_norm": 0.87890625, "learning_rate": 2.3318058367625538e-05, "loss": 0.8626, "step": 5150 }, { "epoch": 0.7849740932642487, "grad_norm": 0.92578125, "learning_rate": 2.32863870844672e-05, "loss": 1.0623, "step": 5151 }, { "epoch": 0.7851264858274917, "grad_norm": 0.984375, "learning_rate": 2.325473448984815e-05, "loss": 0.8687, "step": 5152 }, { "epoch": 0.7852788783907345, "grad_norm": 0.91015625, "learning_rate": 2.322310059147941e-05, "loss": 1.1094, "step": 5153 }, { "epoch": 0.7854312709539775, "grad_norm": 1.078125, "learning_rate": 2.3191485397067415e-05, "loss": 1.0512, "step": 5154 }, { "epoch": 0.7855836635172203, "grad_norm": 0.90625, "learning_rate": 2.315988891431412e-05, "loss": 1.0418, "step": 5155 }, { "epoch": 0.7857360560804633, "grad_norm": 0.9609375, "learning_rate": 2.3128311150916826e-05, "loss": 1.0497, "step": 5156 }, { "epoch": 0.7858884486437062, "grad_norm": 1.140625, "learning_rate": 2.309675211456841e-05, "loss": 1.0144, "step": 5157 }, { "epoch": 0.7860408412069491, "grad_norm": 0.78125, "learning_rate": 2.3065211812957067e-05, "loss": 0.8516, "step": 5158 }, { "epoch": 0.786193233770192, "grad_norm": 0.87109375, "learning_rate": 2.303369025376646e-05, "loss": 0.8171, "step": 5159 }, { "epoch": 0.786345626333435, "grad_norm": 1.1171875, "learning_rate": 2.3002187444675715e-05, "loss": 0.9172, "step": 5160 }, { "epoch": 0.7864980188966778, "grad_norm": 1.1953125, "learning_rate": 2.297070339335935e-05, "loss": 1.0331, "step": 5161 }, { "epoch": 0.7866504114599208, "grad_norm": 0.83984375, "learning_rate": 2.2939238107487305e-05, "loss": 1.0252, "step": 5162 }, { "epoch": 0.7868028040231637, "grad_norm": 0.98046875, "learning_rate": 2.2907791594725048e-05, "loss": 1.0181, "step": 5163 }, { "epoch": 0.7869551965864066, "grad_norm": 0.9296875, "learning_rate": 2.2876363862733362e-05, "loss": 1.0127, "step": 5164 }, { "epoch": 0.7871075891496495, "grad_norm": 0.95703125, "learning_rate": 2.2844954919168492e-05, "loss": 0.8168, "step": 5165 }, { "epoch": 0.7872599817128925, "grad_norm": 0.7890625, "learning_rate": 2.2813564771682115e-05, "loss": 1.0159, "step": 5166 }, { "epoch": 0.7874123742761353, "grad_norm": 0.88671875, "learning_rate": 2.2782193427921327e-05, "loss": 0.964, "step": 5167 }, { "epoch": 0.7875647668393783, "grad_norm": 1.078125, "learning_rate": 2.27508408955286e-05, "loss": 1.0263, "step": 5168 }, { "epoch": 0.7877171594026211, "grad_norm": 0.83984375, "learning_rate": 2.271950718214193e-05, "loss": 0.9365, "step": 5169 }, { "epoch": 0.7878695519658641, "grad_norm": 0.86328125, "learning_rate": 2.2688192295394618e-05, "loss": 0.9853, "step": 5170 }, { "epoch": 0.788021944529107, "grad_norm": 0.890625, "learning_rate": 2.2656896242915428e-05, "loss": 1.0787, "step": 5171 }, { "epoch": 0.7881743370923499, "grad_norm": 0.9609375, "learning_rate": 2.2625619032328514e-05, "loss": 0.8775, "step": 5172 }, { "epoch": 0.7883267296555928, "grad_norm": 1.0546875, "learning_rate": 2.2594360671253457e-05, "loss": 1.0338, "step": 5173 }, { "epoch": 0.7884791222188358, "grad_norm": 0.796875, "learning_rate": 2.256312116730528e-05, "loss": 1.1096, "step": 5174 }, { "epoch": 0.7886315147820786, "grad_norm": 0.91796875, "learning_rate": 2.253190052809434e-05, "loss": 1.1066, "step": 5175 }, { "epoch": 0.7887839073453216, "grad_norm": 0.8125, "learning_rate": 2.2500698761226447e-05, "loss": 0.9622, "step": 5176 }, { "epoch": 0.7889362999085645, "grad_norm": 0.96875, "learning_rate": 2.2469515874302793e-05, "loss": 1.1226, "step": 5177 }, { "epoch": 0.7890886924718074, "grad_norm": 0.89453125, "learning_rate": 2.2438351874919984e-05, "loss": 0.9822, "step": 5178 }, { "epoch": 0.7892410850350503, "grad_norm": 0.96484375, "learning_rate": 2.2407206770669987e-05, "loss": 0.9048, "step": 5179 }, { "epoch": 0.7893934775982933, "grad_norm": 0.7578125, "learning_rate": 2.2376080569140258e-05, "loss": 0.9541, "step": 5180 }, { "epoch": 0.7895458701615361, "grad_norm": 1.234375, "learning_rate": 2.234497327791354e-05, "loss": 1.1903, "step": 5181 }, { "epoch": 0.789698262724779, "grad_norm": 0.85546875, "learning_rate": 2.231388490456805e-05, "loss": 0.8263, "step": 5182 }, { "epoch": 0.7898506552880219, "grad_norm": 1.09375, "learning_rate": 2.228281545667733e-05, "loss": 1.1303, "step": 5183 }, { "epoch": 0.7900030478512649, "grad_norm": 0.96875, "learning_rate": 2.225176494181036e-05, "loss": 1.1455, "step": 5184 }, { "epoch": 0.7901554404145078, "grad_norm": 1.0234375, "learning_rate": 2.2220733367531464e-05, "loss": 1.0339, "step": 5185 }, { "epoch": 0.7903078329777506, "grad_norm": 1.40625, "learning_rate": 2.2189720741400434e-05, "loss": 1.0858, "step": 5186 }, { "epoch": 0.7904602255409936, "grad_norm": 0.95703125, "learning_rate": 2.215872707097236e-05, "loss": 1.0031, "step": 5187 }, { "epoch": 0.7906126181042366, "grad_norm": 0.94921875, "learning_rate": 2.212775236379776e-05, "loss": 1.0298, "step": 5188 }, { "epoch": 0.7907650106674794, "grad_norm": 0.94921875, "learning_rate": 2.20967966274225e-05, "loss": 0.8814, "step": 5189 }, { "epoch": 0.7909174032307223, "grad_norm": 1.3046875, "learning_rate": 2.2065859869387817e-05, "loss": 0.9902, "step": 5190 }, { "epoch": 0.7910697957939653, "grad_norm": 0.8671875, "learning_rate": 2.2034942097230417e-05, "loss": 0.9109, "step": 5191 }, { "epoch": 0.7912221883572081, "grad_norm": 0.87890625, "learning_rate": 2.200404331848228e-05, "loss": 1.1882, "step": 5192 }, { "epoch": 0.7913745809204511, "grad_norm": 1.0625, "learning_rate": 2.1973163540670794e-05, "loss": 1.1343, "step": 5193 }, { "epoch": 0.7915269734836939, "grad_norm": 0.76171875, "learning_rate": 2.1942302771318712e-05, "loss": 0.8873, "step": 5194 }, { "epoch": 0.7916793660469369, "grad_norm": 1.0234375, "learning_rate": 2.1911461017944168e-05, "loss": 1.1505, "step": 5195 }, { "epoch": 0.7918317586101798, "grad_norm": 0.95703125, "learning_rate": 2.1880638288060617e-05, "loss": 0.8583, "step": 5196 }, { "epoch": 0.7919841511734227, "grad_norm": 0.921875, "learning_rate": 2.1849834589176997e-05, "loss": 1.0608, "step": 5197 }, { "epoch": 0.7921365437366656, "grad_norm": 1.1328125, "learning_rate": 2.1819049928797485e-05, "loss": 1.0967, "step": 5198 }, { "epoch": 0.7922889362999086, "grad_norm": 0.99609375, "learning_rate": 2.1788284314421668e-05, "loss": 0.9707, "step": 5199 }, { "epoch": 0.7924413288631514, "grad_norm": 1.078125, "learning_rate": 2.17575377535445e-05, "loss": 0.9878, "step": 5200 }, { "epoch": 0.7925937214263944, "grad_norm": 0.98828125, "learning_rate": 2.1726810253656282e-05, "loss": 0.9723, "step": 5201 }, { "epoch": 0.7927461139896373, "grad_norm": 1.4609375, "learning_rate": 2.1696101822242654e-05, "loss": 0.9009, "step": 5202 }, { "epoch": 0.7928985065528802, "grad_norm": 0.78515625, "learning_rate": 2.1665412466784672e-05, "loss": 0.8681, "step": 5203 }, { "epoch": 0.7930508991161231, "grad_norm": 1.015625, "learning_rate": 2.1634742194758695e-05, "loss": 1.0097, "step": 5204 }, { "epoch": 0.7932032916793661, "grad_norm": 0.95703125, "learning_rate": 2.1604091013636418e-05, "loss": 0.7774, "step": 5205 }, { "epoch": 0.7933556842426089, "grad_norm": 0.87890625, "learning_rate": 2.1573458930884938e-05, "loss": 0.9736, "step": 5206 }, { "epoch": 0.7935080768058519, "grad_norm": 0.83203125, "learning_rate": 2.1542845953966618e-05, "loss": 1.0441, "step": 5207 }, { "epoch": 0.7936604693690947, "grad_norm": 0.9609375, "learning_rate": 2.1512252090339292e-05, "loss": 1.0087, "step": 5208 }, { "epoch": 0.7938128619323377, "grad_norm": 0.796875, "learning_rate": 2.148167734745602e-05, "loss": 0.9306, "step": 5209 }, { "epoch": 0.7939652544955806, "grad_norm": 0.796875, "learning_rate": 2.1451121732765268e-05, "loss": 0.8302, "step": 5210 }, { "epoch": 0.7941176470588235, "grad_norm": 0.96484375, "learning_rate": 2.1420585253710822e-05, "loss": 0.9748, "step": 5211 }, { "epoch": 0.7942700396220664, "grad_norm": 0.921875, "learning_rate": 2.1390067917731792e-05, "loss": 1.0292, "step": 5212 }, { "epoch": 0.7944224321853094, "grad_norm": 1.1875, "learning_rate": 2.135956973226262e-05, "loss": 1.3112, "step": 5213 }, { "epoch": 0.7945748247485522, "grad_norm": 0.7734375, "learning_rate": 2.132909070473317e-05, "loss": 0.8757, "step": 5214 }, { "epoch": 0.7947272173117952, "grad_norm": 0.9609375, "learning_rate": 2.1298630842568523e-05, "loss": 0.9566, "step": 5215 }, { "epoch": 0.7948796098750381, "grad_norm": 0.86328125, "learning_rate": 2.1268190153189148e-05, "loss": 0.9285, "step": 5216 }, { "epoch": 0.795032002438281, "grad_norm": 0.67578125, "learning_rate": 2.1237768644010836e-05, "loss": 0.7974, "step": 5217 }, { "epoch": 0.7951843950015239, "grad_norm": 1.1015625, "learning_rate": 2.12073663224447e-05, "loss": 0.8991, "step": 5218 }, { "epoch": 0.7953367875647669, "grad_norm": 0.78515625, "learning_rate": 2.117698319589717e-05, "loss": 0.8989, "step": 5219 }, { "epoch": 0.7954891801280097, "grad_norm": 0.9609375, "learning_rate": 2.114661927177005e-05, "loss": 0.976, "step": 5220 }, { "epoch": 0.7956415726912527, "grad_norm": 1.03125, "learning_rate": 2.11162745574604e-05, "loss": 1.0369, "step": 5221 }, { "epoch": 0.7957939652544955, "grad_norm": 0.953125, "learning_rate": 2.1085949060360654e-05, "loss": 0.9822, "step": 5222 }, { "epoch": 0.7959463578177385, "grad_norm": 1.140625, "learning_rate": 2.105564278785851e-05, "loss": 1.0795, "step": 5223 }, { "epoch": 0.7960987503809814, "grad_norm": 0.79296875, "learning_rate": 2.1025355747336994e-05, "loss": 1.1819, "step": 5224 }, { "epoch": 0.7962511429442243, "grad_norm": 1.0859375, "learning_rate": 2.099508794617453e-05, "loss": 1.0076, "step": 5225 }, { "epoch": 0.7964035355074672, "grad_norm": 0.640625, "learning_rate": 2.0964839391744752e-05, "loss": 0.901, "step": 5226 }, { "epoch": 0.7965559280707102, "grad_norm": 0.98046875, "learning_rate": 2.093461009141664e-05, "loss": 1.072, "step": 5227 }, { "epoch": 0.796708320633953, "grad_norm": 0.9453125, "learning_rate": 2.0904400052554484e-05, "loss": 0.9471, "step": 5228 }, { "epoch": 0.796860713197196, "grad_norm": 0.859375, "learning_rate": 2.0874209282517908e-05, "loss": 1.0629, "step": 5229 }, { "epoch": 0.7970131057604389, "grad_norm": 1.046875, "learning_rate": 2.0844037788661762e-05, "loss": 0.8712, "step": 5230 }, { "epoch": 0.7971654983236818, "grad_norm": 0.95703125, "learning_rate": 2.0813885578336322e-05, "loss": 1.0088, "step": 5231 }, { "epoch": 0.7973178908869247, "grad_norm": 1.0078125, "learning_rate": 2.0783752658887066e-05, "loss": 0.9499, "step": 5232 }, { "epoch": 0.7974702834501677, "grad_norm": 1.0, "learning_rate": 2.0753639037654827e-05, "loss": 1.0365, "step": 5233 }, { "epoch": 0.7976226760134105, "grad_norm": 0.6953125, "learning_rate": 2.0723544721975694e-05, "loss": 0.8065, "step": 5234 }, { "epoch": 0.7977750685766535, "grad_norm": 0.76171875, "learning_rate": 2.069346971918108e-05, "loss": 0.8572, "step": 5235 }, { "epoch": 0.7979274611398963, "grad_norm": 0.96484375, "learning_rate": 2.0663414036597662e-05, "loss": 0.9731, "step": 5236 }, { "epoch": 0.7980798537031393, "grad_norm": 1.0234375, "learning_rate": 2.0633377681547505e-05, "loss": 0.9352, "step": 5237 }, { "epoch": 0.7982322462663822, "grad_norm": 0.83984375, "learning_rate": 2.060336066134785e-05, "loss": 0.9287, "step": 5238 }, { "epoch": 0.7983846388296251, "grad_norm": 0.83984375, "learning_rate": 2.0573362983311283e-05, "loss": 0.8499, "step": 5239 }, { "epoch": 0.798537031392868, "grad_norm": 1.1953125, "learning_rate": 2.0543384654745667e-05, "loss": 1.0206, "step": 5240 }, { "epoch": 0.798689423956111, "grad_norm": 0.71875, "learning_rate": 2.0513425682954125e-05, "loss": 0.7092, "step": 5241 }, { "epoch": 0.7988418165193538, "grad_norm": 0.73828125, "learning_rate": 2.048348607523516e-05, "loss": 0.8547, "step": 5242 }, { "epoch": 0.7989942090825968, "grad_norm": 1.0078125, "learning_rate": 2.045356583888245e-05, "loss": 0.8104, "step": 5243 }, { "epoch": 0.7991466016458397, "grad_norm": 1.03125, "learning_rate": 2.0423664981185e-05, "loss": 1.1503, "step": 5244 }, { "epoch": 0.7992989942090826, "grad_norm": 1.015625, "learning_rate": 2.039378350942709e-05, "loss": 0.9334, "step": 5245 }, { "epoch": 0.7994513867723255, "grad_norm": 1.109375, "learning_rate": 2.0363921430888277e-05, "loss": 0.8258, "step": 5246 }, { "epoch": 0.7996037793355685, "grad_norm": 1.0546875, "learning_rate": 2.0334078752843367e-05, "loss": 1.0024, "step": 5247 }, { "epoch": 0.7997561718988113, "grad_norm": 0.97265625, "learning_rate": 2.0304255482562505e-05, "loss": 0.9927, "step": 5248 }, { "epoch": 0.7999085644620543, "grad_norm": 1.15625, "learning_rate": 2.0274451627311066e-05, "loss": 1.0371, "step": 5249 }, { "epoch": 0.8000609570252971, "grad_norm": 0.93359375, "learning_rate": 2.0244667194349676e-05, "loss": 0.9526, "step": 5250 }, { "epoch": 0.8002133495885401, "grad_norm": 0.90234375, "learning_rate": 2.021490219093426e-05, "loss": 1.1132, "step": 5251 }, { "epoch": 0.800365742151783, "grad_norm": 1.21875, "learning_rate": 2.0185156624315982e-05, "loss": 0.9451, "step": 5252 }, { "epoch": 0.8005181347150259, "grad_norm": 0.9765625, "learning_rate": 2.0155430501741324e-05, "loss": 0.9379, "step": 5253 }, { "epoch": 0.8006705272782688, "grad_norm": 0.8828125, "learning_rate": 2.0125723830451992e-05, "loss": 0.8723, "step": 5254 }, { "epoch": 0.8008229198415118, "grad_norm": 0.953125, "learning_rate": 2.0096036617684944e-05, "loss": 0.8568, "step": 5255 }, { "epoch": 0.8009753124047546, "grad_norm": 1.0625, "learning_rate": 2.0066368870672414e-05, "loss": 1.0758, "step": 5256 }, { "epoch": 0.8011277049679976, "grad_norm": 1.1171875, "learning_rate": 2.003672059664189e-05, "loss": 1.1357, "step": 5257 }, { "epoch": 0.8012800975312405, "grad_norm": 0.97265625, "learning_rate": 2.0007091802816102e-05, "loss": 1.0926, "step": 5258 }, { "epoch": 0.8014324900944834, "grad_norm": 0.90625, "learning_rate": 1.9977482496413092e-05, "loss": 0.9416, "step": 5259 }, { "epoch": 0.8015848826577263, "grad_norm": 0.83984375, "learning_rate": 1.9947892684646097e-05, "loss": 0.8265, "step": 5260 }, { "epoch": 0.8017372752209693, "grad_norm": 0.90625, "learning_rate": 1.9918322374723607e-05, "loss": 1.0403, "step": 5261 }, { "epoch": 0.8018896677842121, "grad_norm": 0.91015625, "learning_rate": 1.9888771573849384e-05, "loss": 0.9945, "step": 5262 }, { "epoch": 0.8020420603474551, "grad_norm": 0.80859375, "learning_rate": 1.9859240289222426e-05, "loss": 0.9414, "step": 5263 }, { "epoch": 0.8021944529106979, "grad_norm": 0.78515625, "learning_rate": 1.9829728528036952e-05, "loss": 0.8235, "step": 5264 }, { "epoch": 0.8023468454739409, "grad_norm": 0.921875, "learning_rate": 1.98002362974825e-05, "loss": 0.9949, "step": 5265 }, { "epoch": 0.8024992380371838, "grad_norm": 1.0234375, "learning_rate": 1.977076360474379e-05, "loss": 0.8562, "step": 5266 }, { "epoch": 0.8026516306004267, "grad_norm": 0.80859375, "learning_rate": 1.974131045700077e-05, "loss": 0.9465, "step": 5267 }, { "epoch": 0.8028040231636696, "grad_norm": 0.9140625, "learning_rate": 1.971187686142868e-05, "loss": 1.1478, "step": 5268 }, { "epoch": 0.8029564157269126, "grad_norm": 0.9453125, "learning_rate": 1.968246282519791e-05, "loss": 0.8771, "step": 5269 }, { "epoch": 0.8031088082901554, "grad_norm": 0.97265625, "learning_rate": 1.9653068355474214e-05, "loss": 1.1398, "step": 5270 }, { "epoch": 0.8032612008533984, "grad_norm": 0.96484375, "learning_rate": 1.962369345941848e-05, "loss": 1.0088, "step": 5271 }, { "epoch": 0.8034135934166413, "grad_norm": 1.0703125, "learning_rate": 1.959433814418684e-05, "loss": 0.8052, "step": 5272 }, { "epoch": 0.8035659859798842, "grad_norm": 1.1640625, "learning_rate": 1.95650024169307e-05, "loss": 1.0974, "step": 5273 }, { "epoch": 0.8037183785431271, "grad_norm": 1.1171875, "learning_rate": 1.953568628479664e-05, "loss": 1.0899, "step": 5274 }, { "epoch": 0.8038707711063701, "grad_norm": 1.640625, "learning_rate": 1.950638975492647e-05, "loss": 1.2246, "step": 5275 }, { "epoch": 0.8040231636696129, "grad_norm": 1.0, "learning_rate": 1.94771128344573e-05, "loss": 0.9883, "step": 5276 }, { "epoch": 0.8041755562328559, "grad_norm": 0.77734375, "learning_rate": 1.9447855530521388e-05, "loss": 0.9032, "step": 5277 }, { "epoch": 0.8043279487960987, "grad_norm": 1.65625, "learning_rate": 1.9418617850246225e-05, "loss": 1.0631, "step": 5278 }, { "epoch": 0.8044803413593417, "grad_norm": 0.7734375, "learning_rate": 1.9389399800754538e-05, "loss": 0.8824, "step": 5279 }, { "epoch": 0.8046327339225846, "grad_norm": 0.80078125, "learning_rate": 1.9360201389164257e-05, "loss": 0.8919, "step": 5280 }, { "epoch": 0.8047851264858275, "grad_norm": 1.046875, "learning_rate": 1.9331022622588514e-05, "loss": 0.9398, "step": 5281 }, { "epoch": 0.8049375190490704, "grad_norm": 0.75390625, "learning_rate": 1.930186350813573e-05, "loss": 0.8263, "step": 5282 }, { "epoch": 0.8050899116123134, "grad_norm": 1.2109375, "learning_rate": 1.927272405290945e-05, "loss": 0.965, "step": 5283 }, { "epoch": 0.8052423041755562, "grad_norm": 0.78125, "learning_rate": 1.9243604264008475e-05, "loss": 0.9582, "step": 5284 }, { "epoch": 0.8053946967387992, "grad_norm": 0.86328125, "learning_rate": 1.9214504148526802e-05, "loss": 0.762, "step": 5285 }, { "epoch": 0.8055470893020421, "grad_norm": 0.984375, "learning_rate": 1.9185423713553606e-05, "loss": 0.8635, "step": 5286 }, { "epoch": 0.805699481865285, "grad_norm": 0.96484375, "learning_rate": 1.9156362966173347e-05, "loss": 1.1748, "step": 5287 }, { "epoch": 0.8058518744285279, "grad_norm": 0.95703125, "learning_rate": 1.9127321913465636e-05, "loss": 1.1668, "step": 5288 }, { "epoch": 0.8060042669917709, "grad_norm": 0.71484375, "learning_rate": 1.9098300562505266e-05, "loss": 0.8535, "step": 5289 }, { "epoch": 0.8061566595550137, "grad_norm": 0.9921875, "learning_rate": 1.9069298920362265e-05, "loss": 1.0213, "step": 5290 }, { "epoch": 0.8063090521182567, "grad_norm": 0.84765625, "learning_rate": 1.904031699410186e-05, "loss": 0.9093, "step": 5291 }, { "epoch": 0.8064614446814995, "grad_norm": 1.1484375, "learning_rate": 1.901135479078443e-05, "loss": 0.9165, "step": 5292 }, { "epoch": 0.8066138372447424, "grad_norm": 0.90234375, "learning_rate": 1.8982412317465627e-05, "loss": 0.9453, "step": 5293 }, { "epoch": 0.8067662298079854, "grad_norm": 1.1484375, "learning_rate": 1.895348958119625e-05, "loss": 1.2131, "step": 5294 }, { "epoch": 0.8069186223712282, "grad_norm": 1.078125, "learning_rate": 1.8924586589022277e-05, "loss": 0.9307, "step": 5295 }, { "epoch": 0.8070710149344712, "grad_norm": 1.21875, "learning_rate": 1.8895703347984893e-05, "loss": 1.0731, "step": 5296 }, { "epoch": 0.8072234074977142, "grad_norm": 1.15625, "learning_rate": 1.886683986512048e-05, "loss": 0.8969, "step": 5297 }, { "epoch": 0.807375800060957, "grad_norm": 0.9140625, "learning_rate": 1.8837996147460556e-05, "loss": 0.8888, "step": 5298 }, { "epoch": 0.8075281926242, "grad_norm": 0.95703125, "learning_rate": 1.8809172202031933e-05, "loss": 1.0441, "step": 5299 }, { "epoch": 0.8076805851874429, "grad_norm": 1.125, "learning_rate": 1.878036803585651e-05, "loss": 1.0715, "step": 5300 }, { "epoch": 0.8078329777506857, "grad_norm": 0.72265625, "learning_rate": 1.8751583655951386e-05, "loss": 0.8975, "step": 5301 }, { "epoch": 0.8079853703139287, "grad_norm": 0.8828125, "learning_rate": 1.8722819069328856e-05, "loss": 1.0188, "step": 5302 }, { "epoch": 0.8081377628771715, "grad_norm": 0.8125, "learning_rate": 1.8694074282996378e-05, "loss": 0.83, "step": 5303 }, { "epoch": 0.8082901554404145, "grad_norm": 0.78515625, "learning_rate": 1.8665349303956614e-05, "loss": 1.0283, "step": 5304 }, { "epoch": 0.8084425480036574, "grad_norm": 1.578125, "learning_rate": 1.8636644139207382e-05, "loss": 0.9603, "step": 5305 }, { "epoch": 0.8085949405669003, "grad_norm": 0.9375, "learning_rate": 1.8607958795741654e-05, "loss": 1.0794, "step": 5306 }, { "epoch": 0.8087473331301432, "grad_norm": 0.89453125, "learning_rate": 1.8579293280547606e-05, "loss": 0.992, "step": 5307 }, { "epoch": 0.8088997256933862, "grad_norm": 0.80078125, "learning_rate": 1.8550647600608573e-05, "loss": 0.792, "step": 5308 }, { "epoch": 0.809052118256629, "grad_norm": 0.984375, "learning_rate": 1.852202176290302e-05, "loss": 1.1308, "step": 5309 }, { "epoch": 0.809204510819872, "grad_norm": 1.15625, "learning_rate": 1.8493415774404655e-05, "loss": 1.0057, "step": 5310 }, { "epoch": 0.809356903383115, "grad_norm": 0.83203125, "learning_rate": 1.8464829642082303e-05, "loss": 1.0869, "step": 5311 }, { "epoch": 0.8095092959463578, "grad_norm": 1.03125, "learning_rate": 1.8436263372899943e-05, "loss": 1.0087, "step": 5312 }, { "epoch": 0.8096616885096007, "grad_norm": 0.95703125, "learning_rate": 1.8407716973816734e-05, "loss": 0.8969, "step": 5313 }, { "epoch": 0.8098140810728437, "grad_norm": 0.97265625, "learning_rate": 1.8379190451787e-05, "loss": 0.9494, "step": 5314 }, { "epoch": 0.8099664736360865, "grad_norm": 1.09375, "learning_rate": 1.8350683813760162e-05, "loss": 1.0638, "step": 5315 }, { "epoch": 0.8101188661993295, "grad_norm": 0.72265625, "learning_rate": 1.8322197066680914e-05, "loss": 0.8916, "step": 5316 }, { "epoch": 0.8102712587625723, "grad_norm": 1.078125, "learning_rate": 1.8293730217489004e-05, "loss": 1.1521, "step": 5317 }, { "epoch": 0.8104236513258153, "grad_norm": 0.984375, "learning_rate": 1.826528327311937e-05, "loss": 0.9758, "step": 5318 }, { "epoch": 0.8105760438890582, "grad_norm": 0.984375, "learning_rate": 1.8236856240502108e-05, "loss": 0.8476, "step": 5319 }, { "epoch": 0.8107284364523011, "grad_norm": 1.1328125, "learning_rate": 1.820844912656241e-05, "loss": 0.9806, "step": 5320 }, { "epoch": 0.810880829015544, "grad_norm": 0.859375, "learning_rate": 1.8180061938220715e-05, "loss": 0.9929, "step": 5321 }, { "epoch": 0.811033221578787, "grad_norm": 0.82421875, "learning_rate": 1.815169468239252e-05, "loss": 0.9671, "step": 5322 }, { "epoch": 0.8111856141420298, "grad_norm": 1.2109375, "learning_rate": 1.8123347365988498e-05, "loss": 0.932, "step": 5323 }, { "epoch": 0.8113380067052728, "grad_norm": 1.3046875, "learning_rate": 1.8095019995914476e-05, "loss": 1.1545, "step": 5324 }, { "epoch": 0.8114903992685157, "grad_norm": 1.0390625, "learning_rate": 1.8066712579071388e-05, "loss": 1.1366, "step": 5325 }, { "epoch": 0.8116427918317586, "grad_norm": 1.078125, "learning_rate": 1.8038425122355317e-05, "loss": 1.0765, "step": 5326 }, { "epoch": 0.8117951843950015, "grad_norm": 1.0078125, "learning_rate": 1.8010157632657543e-05, "loss": 0.9029, "step": 5327 }, { "epoch": 0.8119475769582445, "grad_norm": 0.76953125, "learning_rate": 1.79819101168644e-05, "loss": 0.9519, "step": 5328 }, { "epoch": 0.8120999695214873, "grad_norm": 0.953125, "learning_rate": 1.795368258185739e-05, "loss": 1.0231, "step": 5329 }, { "epoch": 0.8122523620847303, "grad_norm": 1.0390625, "learning_rate": 1.7925475034513162e-05, "loss": 1.1273, "step": 5330 }, { "epoch": 0.8124047546479731, "grad_norm": 1.0390625, "learning_rate": 1.7897287481703463e-05, "loss": 1.0451, "step": 5331 }, { "epoch": 0.8125571472112161, "grad_norm": 0.875, "learning_rate": 1.7869119930295165e-05, "loss": 0.9877, "step": 5332 }, { "epoch": 0.812709539774459, "grad_norm": 0.9140625, "learning_rate": 1.7840972387150345e-05, "loss": 0.9953, "step": 5333 }, { "epoch": 0.8128619323377019, "grad_norm": 0.96484375, "learning_rate": 1.781284485912611e-05, "loss": 1.1101, "step": 5334 }, { "epoch": 0.8130143249009448, "grad_norm": 1.046875, "learning_rate": 1.7784737353074744e-05, "loss": 1.0062, "step": 5335 }, { "epoch": 0.8131667174641878, "grad_norm": 0.92578125, "learning_rate": 1.7756649875843644e-05, "loss": 0.9959, "step": 5336 }, { "epoch": 0.8133191100274306, "grad_norm": 1.046875, "learning_rate": 1.772858243427529e-05, "loss": 1.0866, "step": 5337 }, { "epoch": 0.8134715025906736, "grad_norm": 0.8984375, "learning_rate": 1.7700535035207355e-05, "loss": 1.1307, "step": 5338 }, { "epoch": 0.8136238951539165, "grad_norm": 1.171875, "learning_rate": 1.7672507685472573e-05, "loss": 1.0206, "step": 5339 }, { "epoch": 0.8137762877171594, "grad_norm": 0.9140625, "learning_rate": 1.764450039189881e-05, "loss": 0.9113, "step": 5340 }, { "epoch": 0.8139286802804023, "grad_norm": 0.9453125, "learning_rate": 1.7616513161309055e-05, "loss": 0.8323, "step": 5341 }, { "epoch": 0.8140810728436453, "grad_norm": 1.03125, "learning_rate": 1.7588546000521378e-05, "loss": 1.0595, "step": 5342 }, { "epoch": 0.8142334654068881, "grad_norm": 0.90234375, "learning_rate": 1.756059891634898e-05, "loss": 1.0188, "step": 5343 }, { "epoch": 0.8143858579701311, "grad_norm": 0.88671875, "learning_rate": 1.7532671915600196e-05, "loss": 0.8755, "step": 5344 }, { "epoch": 0.8145382505333739, "grad_norm": 1.0234375, "learning_rate": 1.750476500507845e-05, "loss": 1.0727, "step": 5345 }, { "epoch": 0.8146906430966169, "grad_norm": 1.0703125, "learning_rate": 1.7476878191582246e-05, "loss": 0.9557, "step": 5346 }, { "epoch": 0.8148430356598598, "grad_norm": 1.1953125, "learning_rate": 1.744901148190522e-05, "loss": 1.0651, "step": 5347 }, { "epoch": 0.8149954282231027, "grad_norm": 1.3515625, "learning_rate": 1.7421164882836095e-05, "loss": 1.181, "step": 5348 }, { "epoch": 0.8151478207863456, "grad_norm": 0.92578125, "learning_rate": 1.739333840115869e-05, "loss": 1.0516, "step": 5349 }, { "epoch": 0.8153002133495886, "grad_norm": 0.9921875, "learning_rate": 1.7365532043651978e-05, "loss": 0.8631, "step": 5350 }, { "epoch": 0.8154526059128314, "grad_norm": 0.859375, "learning_rate": 1.7337745817089968e-05, "loss": 0.8584, "step": 5351 }, { "epoch": 0.8156049984760744, "grad_norm": 0.98046875, "learning_rate": 1.730997972824179e-05, "loss": 0.9841, "step": 5352 }, { "epoch": 0.8157573910393173, "grad_norm": 1.015625, "learning_rate": 1.7282233783871637e-05, "loss": 0.8975, "step": 5353 }, { "epoch": 0.8159097836025602, "grad_norm": 1.359375, "learning_rate": 1.725450799073882e-05, "loss": 1.1523, "step": 5354 }, { "epoch": 0.8160621761658031, "grad_norm": 1.28125, "learning_rate": 1.7226802355597782e-05, "loss": 1.2313, "step": 5355 }, { "epoch": 0.8162145687290461, "grad_norm": 0.71484375, "learning_rate": 1.7199116885197995e-05, "loss": 0.7469, "step": 5356 }, { "epoch": 0.8163669612922889, "grad_norm": 1.09375, "learning_rate": 1.7171451586284025e-05, "loss": 1.0824, "step": 5357 }, { "epoch": 0.8165193538555319, "grad_norm": 1.0, "learning_rate": 1.714380646559556e-05, "loss": 0.8556, "step": 5358 }, { "epoch": 0.8166717464187747, "grad_norm": 1.0, "learning_rate": 1.7116181529867327e-05, "loss": 0.9704, "step": 5359 }, { "epoch": 0.8168241389820177, "grad_norm": 1.0859375, "learning_rate": 1.7088576785829123e-05, "loss": 1.2786, "step": 5360 }, { "epoch": 0.8169765315452606, "grad_norm": 0.82421875, "learning_rate": 1.7060992240205943e-05, "loss": 1.0438, "step": 5361 }, { "epoch": 0.8171289241085035, "grad_norm": 0.828125, "learning_rate": 1.7033427899717735e-05, "loss": 0.977, "step": 5362 }, { "epoch": 0.8172813166717464, "grad_norm": 1.0078125, "learning_rate": 1.7005883771079577e-05, "loss": 1.0792, "step": 5363 }, { "epoch": 0.8174337092349894, "grad_norm": 1.1171875, "learning_rate": 1.6978359861001603e-05, "loss": 1.0597, "step": 5364 }, { "epoch": 0.8175861017982322, "grad_norm": 1.03125, "learning_rate": 1.6950856176189033e-05, "loss": 0.8249, "step": 5365 }, { "epoch": 0.8177384943614752, "grad_norm": 0.71484375, "learning_rate": 1.6923372723342157e-05, "loss": 1.0914, "step": 5366 }, { "epoch": 0.8178908869247181, "grad_norm": 1.0625, "learning_rate": 1.689590950915636e-05, "loss": 0.9797, "step": 5367 }, { "epoch": 0.818043279487961, "grad_norm": 0.99609375, "learning_rate": 1.686846654032207e-05, "loss": 0.93, "step": 5368 }, { "epoch": 0.8181956720512039, "grad_norm": 0.73046875, "learning_rate": 1.6841043823524782e-05, "loss": 0.9465, "step": 5369 }, { "epoch": 0.8183480646144469, "grad_norm": 1.2265625, "learning_rate": 1.6813641365445077e-05, "loss": 1.0937, "step": 5370 }, { "epoch": 0.8185004571776897, "grad_norm": 0.98046875, "learning_rate": 1.6786259172758546e-05, "loss": 1.0661, "step": 5371 }, { "epoch": 0.8186528497409327, "grad_norm": 0.83984375, "learning_rate": 1.675889725213593e-05, "loss": 0.9309, "step": 5372 }, { "epoch": 0.8188052423041755, "grad_norm": 1.1015625, "learning_rate": 1.6731555610242987e-05, "loss": 1.1059, "step": 5373 }, { "epoch": 0.8189576348674185, "grad_norm": 0.80078125, "learning_rate": 1.6704234253740515e-05, "loss": 0.8672, "step": 5374 }, { "epoch": 0.8191100274306614, "grad_norm": 1.0390625, "learning_rate": 1.6676933189284382e-05, "loss": 1.296, "step": 5375 }, { "epoch": 0.8192624199939043, "grad_norm": 1.125, "learning_rate": 1.664965242352554e-05, "loss": 1.1071, "step": 5376 }, { "epoch": 0.8194148125571472, "grad_norm": 0.921875, "learning_rate": 1.662239196310995e-05, "loss": 1.024, "step": 5377 }, { "epoch": 0.8195672051203902, "grad_norm": 0.93359375, "learning_rate": 1.659515181467869e-05, "loss": 0.9688, "step": 5378 }, { "epoch": 0.819719597683633, "grad_norm": 0.8203125, "learning_rate": 1.6567931984867846e-05, "loss": 0.8762, "step": 5379 }, { "epoch": 0.819871990246876, "grad_norm": 0.96875, "learning_rate": 1.6540732480308508e-05, "loss": 0.9008, "step": 5380 }, { "epoch": 0.8200243828101189, "grad_norm": 0.921875, "learning_rate": 1.651355330762693e-05, "loss": 0.8991, "step": 5381 }, { "epoch": 0.8201767753733618, "grad_norm": 0.76953125, "learning_rate": 1.6486394473444287e-05, "loss": 0.9523, "step": 5382 }, { "epoch": 0.8203291679366047, "grad_norm": 1.0703125, "learning_rate": 1.645925598437693e-05, "loss": 0.9942, "step": 5383 }, { "epoch": 0.8204815604998477, "grad_norm": 1.015625, "learning_rate": 1.6432137847036145e-05, "loss": 0.9201, "step": 5384 }, { "epoch": 0.8206339530630905, "grad_norm": 0.8671875, "learning_rate": 1.6405040068028322e-05, "loss": 0.9917, "step": 5385 }, { "epoch": 0.8207863456263335, "grad_norm": 0.7421875, "learning_rate": 1.6377962653954837e-05, "loss": 0.8651, "step": 5386 }, { "epoch": 0.8209387381895763, "grad_norm": 0.88671875, "learning_rate": 1.635090561141217e-05, "loss": 0.9349, "step": 5387 }, { "epoch": 0.8210911307528193, "grad_norm": 0.8984375, "learning_rate": 1.632386894699177e-05, "loss": 1.0318, "step": 5388 }, { "epoch": 0.8212435233160622, "grad_norm": 1.1640625, "learning_rate": 1.6296852667280194e-05, "loss": 1.2213, "step": 5389 }, { "epoch": 0.821395915879305, "grad_norm": 0.97265625, "learning_rate": 1.6269856778858983e-05, "loss": 0.9784, "step": 5390 }, { "epoch": 0.821548308442548, "grad_norm": 0.984375, "learning_rate": 1.6242881288304724e-05, "loss": 1.0818, "step": 5391 }, { "epoch": 0.821700701005791, "grad_norm": 0.80859375, "learning_rate": 1.6215926202189034e-05, "loss": 0.9376, "step": 5392 }, { "epoch": 0.8218530935690338, "grad_norm": 0.9375, "learning_rate": 1.6188991527078556e-05, "loss": 1.1004, "step": 5393 }, { "epoch": 0.8220054861322768, "grad_norm": 0.94921875, "learning_rate": 1.6162077269534946e-05, "loss": 0.913, "step": 5394 }, { "epoch": 0.8221578786955197, "grad_norm": 0.60546875, "learning_rate": 1.613518343611494e-05, "loss": 0.8228, "step": 5395 }, { "epoch": 0.8223102712587625, "grad_norm": 0.82421875, "learning_rate": 1.6108310033370276e-05, "loss": 0.8355, "step": 5396 }, { "epoch": 0.8224626638220055, "grad_norm": 0.9453125, "learning_rate": 1.6081457067847628e-05, "loss": 0.9082, "step": 5397 }, { "epoch": 0.8226150563852483, "grad_norm": 1.046875, "learning_rate": 1.6054624546088825e-05, "loss": 0.9551, "step": 5398 }, { "epoch": 0.8227674489484913, "grad_norm": 0.7578125, "learning_rate": 1.602781247463062e-05, "loss": 0.9315, "step": 5399 }, { "epoch": 0.8229198415117343, "grad_norm": 0.7890625, "learning_rate": 1.6001020860004857e-05, "loss": 0.9252, "step": 5400 }, { "epoch": 0.8230722340749771, "grad_norm": 1.0, "learning_rate": 1.5974249708738343e-05, "loss": 0.7473, "step": 5401 }, { "epoch": 0.82322462663822, "grad_norm": 0.94921875, "learning_rate": 1.594749902735292e-05, "loss": 1.0564, "step": 5402 }, { "epoch": 0.823377019201463, "grad_norm": 0.765625, "learning_rate": 1.5920768822365418e-05, "loss": 0.8634, "step": 5403 }, { "epoch": 0.8235294117647058, "grad_norm": 1.234375, "learning_rate": 1.5894059100287717e-05, "loss": 1.1469, "step": 5404 }, { "epoch": 0.8236818043279488, "grad_norm": 0.92578125, "learning_rate": 1.5867369867626658e-05, "loss": 0.8267, "step": 5405 }, { "epoch": 0.8238341968911918, "grad_norm": 0.87109375, "learning_rate": 1.5840701130884173e-05, "loss": 0.965, "step": 5406 }, { "epoch": 0.8239865894544346, "grad_norm": 1.140625, "learning_rate": 1.581405289655715e-05, "loss": 0.987, "step": 5407 }, { "epoch": 0.8241389820176775, "grad_norm": 1.1171875, "learning_rate": 1.5787425171137416e-05, "loss": 0.9468, "step": 5408 }, { "epoch": 0.8242913745809205, "grad_norm": 1.1640625, "learning_rate": 1.576081796111193e-05, "loss": 0.9598, "step": 5409 }, { "epoch": 0.8244437671441633, "grad_norm": 1.1875, "learning_rate": 1.573423127296256e-05, "loss": 1.0174, "step": 5410 }, { "epoch": 0.8245961597074063, "grad_norm": 0.94140625, "learning_rate": 1.5707665113166203e-05, "loss": 1.0607, "step": 5411 }, { "epoch": 0.8247485522706491, "grad_norm": 0.80859375, "learning_rate": 1.5681119488194795e-05, "loss": 0.8709, "step": 5412 }, { "epoch": 0.8249009448338921, "grad_norm": 1.0859375, "learning_rate": 1.5654594404515222e-05, "loss": 0.8157, "step": 5413 }, { "epoch": 0.825053337397135, "grad_norm": 0.95703125, "learning_rate": 1.5628089868589313e-05, "loss": 0.9762, "step": 5414 }, { "epoch": 0.8252057299603779, "grad_norm": 0.69140625, "learning_rate": 1.5601605886874037e-05, "loss": 0.8653, "step": 5415 }, { "epoch": 0.8253581225236208, "grad_norm": 0.86328125, "learning_rate": 1.5575142465821203e-05, "loss": 0.8214, "step": 5416 }, { "epoch": 0.8255105150868638, "grad_norm": 0.83203125, "learning_rate": 1.554869961187775e-05, "loss": 1.0568, "step": 5417 }, { "epoch": 0.8256629076501066, "grad_norm": 1.2734375, "learning_rate": 1.552227733148549e-05, "loss": 0.902, "step": 5418 }, { "epoch": 0.8258153002133496, "grad_norm": 0.92578125, "learning_rate": 1.549587563108129e-05, "loss": 0.8249, "step": 5419 }, { "epoch": 0.8259676927765925, "grad_norm": 0.92578125, "learning_rate": 1.5469494517096984e-05, "loss": 1.0116, "step": 5420 }, { "epoch": 0.8261200853398354, "grad_norm": 0.69140625, "learning_rate": 1.544313399595938e-05, "loss": 1.0023, "step": 5421 }, { "epoch": 0.8262724779030783, "grad_norm": 1.0234375, "learning_rate": 1.5416794074090258e-05, "loss": 0.9635, "step": 5422 }, { "epoch": 0.8264248704663213, "grad_norm": 1.1640625, "learning_rate": 1.5390474757906446e-05, "loss": 1.0556, "step": 5423 }, { "epoch": 0.8265772630295641, "grad_norm": 0.76171875, "learning_rate": 1.5364176053819723e-05, "loss": 0.9318, "step": 5424 }, { "epoch": 0.8267296555928071, "grad_norm": 0.84765625, "learning_rate": 1.5337897968236748e-05, "loss": 0.8791, "step": 5425 }, { "epoch": 0.8268820481560499, "grad_norm": 0.96875, "learning_rate": 1.531164050755932e-05, "loss": 1.0176, "step": 5426 }, { "epoch": 0.8270344407192929, "grad_norm": 1.046875, "learning_rate": 1.5285403678184094e-05, "loss": 1.0465, "step": 5427 }, { "epoch": 0.8271868332825358, "grad_norm": 1.046875, "learning_rate": 1.5259187486502734e-05, "loss": 1.054, "step": 5428 }, { "epoch": 0.8273392258457787, "grad_norm": 0.8828125, "learning_rate": 1.523299193890193e-05, "loss": 0.9541, "step": 5429 }, { "epoch": 0.8274916184090216, "grad_norm": 0.84375, "learning_rate": 1.5206817041763266e-05, "loss": 1.0426, "step": 5430 }, { "epoch": 0.8276440109722646, "grad_norm": 0.83984375, "learning_rate": 1.5180662801463286e-05, "loss": 0.9132, "step": 5431 }, { "epoch": 0.8277964035355074, "grad_norm": 1.0703125, "learning_rate": 1.5154529224373593e-05, "loss": 1.0521, "step": 5432 }, { "epoch": 0.8279487960987504, "grad_norm": 0.8125, "learning_rate": 1.512841631686065e-05, "loss": 0.8926, "step": 5433 }, { "epoch": 0.8281011886619933, "grad_norm": 1.0234375, "learning_rate": 1.510232408528599e-05, "loss": 1.0697, "step": 5434 }, { "epoch": 0.8282535812252362, "grad_norm": 0.87109375, "learning_rate": 1.5076252536006063e-05, "loss": 0.8564, "step": 5435 }, { "epoch": 0.8284059737884791, "grad_norm": 1.421875, "learning_rate": 1.50502016753722e-05, "loss": 1.1253, "step": 5436 }, { "epoch": 0.8285583663517221, "grad_norm": 1.1640625, "learning_rate": 1.5024171509730833e-05, "loss": 1.0882, "step": 5437 }, { "epoch": 0.8287107589149649, "grad_norm": 1.171875, "learning_rate": 1.4998162045423247e-05, "loss": 0.9287, "step": 5438 }, { "epoch": 0.8288631514782079, "grad_norm": 0.80859375, "learning_rate": 1.4972173288785729e-05, "loss": 0.8541, "step": 5439 }, { "epoch": 0.8290155440414507, "grad_norm": 0.77734375, "learning_rate": 1.4946205246149536e-05, "loss": 0.881, "step": 5440 }, { "epoch": 0.8291679366046937, "grad_norm": 0.90625, "learning_rate": 1.4920257923840864e-05, "loss": 0.9992, "step": 5441 }, { "epoch": 0.8293203291679366, "grad_norm": 0.73046875, "learning_rate": 1.4894331328180778e-05, "loss": 0.8963, "step": 5442 }, { "epoch": 0.8294727217311795, "grad_norm": 0.76953125, "learning_rate": 1.4868425465485458e-05, "loss": 0.8961, "step": 5443 }, { "epoch": 0.8296251142944224, "grad_norm": 1.21875, "learning_rate": 1.4842540342065892e-05, "loss": 0.9919, "step": 5444 }, { "epoch": 0.8297775068576654, "grad_norm": 1.140625, "learning_rate": 1.4816675964228077e-05, "loss": 0.9238, "step": 5445 }, { "epoch": 0.8299298994209082, "grad_norm": 0.796875, "learning_rate": 1.4790832338272975e-05, "loss": 0.8956, "step": 5446 }, { "epoch": 0.8300822919841512, "grad_norm": 1.1796875, "learning_rate": 1.4765009470496437e-05, "loss": 1.0498, "step": 5447 }, { "epoch": 0.8302346845473941, "grad_norm": 1.046875, "learning_rate": 1.4739207367189301e-05, "loss": 0.8938, "step": 5448 }, { "epoch": 0.830387077110637, "grad_norm": 1.1875, "learning_rate": 1.4713426034637323e-05, "loss": 1.0534, "step": 5449 }, { "epoch": 0.8305394696738799, "grad_norm": 0.9453125, "learning_rate": 1.4687665479121182e-05, "loss": 1.0177, "step": 5450 }, { "epoch": 0.8306918622371229, "grad_norm": 0.859375, "learning_rate": 1.4661925706916568e-05, "loss": 0.907, "step": 5451 }, { "epoch": 0.8308442548003657, "grad_norm": 0.9296875, "learning_rate": 1.4636206724294065e-05, "loss": 0.9588, "step": 5452 }, { "epoch": 0.8309966473636087, "grad_norm": 0.8046875, "learning_rate": 1.4610508537519096e-05, "loss": 0.9315, "step": 5453 }, { "epoch": 0.8311490399268515, "grad_norm": 1.109375, "learning_rate": 1.4584831152852207e-05, "loss": 0.8947, "step": 5454 }, { "epoch": 0.8313014324900945, "grad_norm": 0.96484375, "learning_rate": 1.455917457654874e-05, "loss": 0.9485, "step": 5455 }, { "epoch": 0.8314538250533374, "grad_norm": 0.89453125, "learning_rate": 1.4533538814858971e-05, "loss": 0.7947, "step": 5456 }, { "epoch": 0.8316062176165803, "grad_norm": 0.79296875, "learning_rate": 1.4507923874028218e-05, "loss": 0.9558, "step": 5457 }, { "epoch": 0.8317586101798232, "grad_norm": 0.80859375, "learning_rate": 1.448232976029662e-05, "loss": 0.9585, "step": 5458 }, { "epoch": 0.8319110027430662, "grad_norm": 0.89453125, "learning_rate": 1.4456756479899213e-05, "loss": 0.924, "step": 5459 }, { "epoch": 0.832063395306309, "grad_norm": 0.9375, "learning_rate": 1.4431204039066082e-05, "loss": 0.8139, "step": 5460 }, { "epoch": 0.832215787869552, "grad_norm": 1.1953125, "learning_rate": 1.4405672444022155e-05, "loss": 0.9211, "step": 5461 }, { "epoch": 0.8323681804327949, "grad_norm": 0.95703125, "learning_rate": 1.438016170098726e-05, "loss": 0.8833, "step": 5462 }, { "epoch": 0.8325205729960378, "grad_norm": 0.8984375, "learning_rate": 1.4354671816176268e-05, "loss": 1.0709, "step": 5463 }, { "epoch": 0.8326729655592807, "grad_norm": 0.890625, "learning_rate": 1.4329202795798769e-05, "loss": 0.9061, "step": 5464 }, { "epoch": 0.8328253581225237, "grad_norm": 0.84765625, "learning_rate": 1.4303754646059464e-05, "loss": 0.9293, "step": 5465 }, { "epoch": 0.8329777506857665, "grad_norm": 0.81640625, "learning_rate": 1.4278327373157852e-05, "loss": 1.0094, "step": 5466 }, { "epoch": 0.8331301432490095, "grad_norm": 0.7421875, "learning_rate": 1.4252920983288377e-05, "loss": 0.9397, "step": 5467 }, { "epoch": 0.8332825358122523, "grad_norm": 1.1171875, "learning_rate": 1.4227535482640442e-05, "loss": 1.0259, "step": 5468 }, { "epoch": 0.8334349283754953, "grad_norm": 1.1328125, "learning_rate": 1.4202170877398313e-05, "loss": 0.9017, "step": 5469 }, { "epoch": 0.8335873209387382, "grad_norm": 0.94921875, "learning_rate": 1.4176827173741103e-05, "loss": 1.0588, "step": 5470 }, { "epoch": 0.8337397135019811, "grad_norm": 1.0390625, "learning_rate": 1.4151504377842984e-05, "loss": 0.9274, "step": 5471 }, { "epoch": 0.833892106065224, "grad_norm": 0.796875, "learning_rate": 1.4126202495872909e-05, "loss": 0.854, "step": 5472 }, { "epoch": 0.834044498628467, "grad_norm": 1.0625, "learning_rate": 1.4100921533994783e-05, "loss": 1.0015, "step": 5473 }, { "epoch": 0.8341968911917098, "grad_norm": 0.91796875, "learning_rate": 1.4075661498367443e-05, "loss": 0.9241, "step": 5474 }, { "epoch": 0.8343492837549528, "grad_norm": 1.03125, "learning_rate": 1.4050422395144602e-05, "loss": 0.9129, "step": 5475 }, { "epoch": 0.8345016763181957, "grad_norm": 0.703125, "learning_rate": 1.4025204230474787e-05, "loss": 0.7512, "step": 5476 }, { "epoch": 0.8346540688814386, "grad_norm": 0.8359375, "learning_rate": 1.4000007010501593e-05, "loss": 0.8639, "step": 5477 }, { "epoch": 0.8348064614446815, "grad_norm": 0.921875, "learning_rate": 1.3974830741363397e-05, "loss": 0.8822, "step": 5478 }, { "epoch": 0.8349588540079245, "grad_norm": 0.953125, "learning_rate": 1.3949675429193466e-05, "loss": 1.0443, "step": 5479 }, { "epoch": 0.8351112465711673, "grad_norm": 1.03125, "learning_rate": 1.3924541080120068e-05, "loss": 0.9533, "step": 5480 }, { "epoch": 0.8352636391344103, "grad_norm": 1.1640625, "learning_rate": 1.389942770026622e-05, "loss": 1.0708, "step": 5481 }, { "epoch": 0.8354160316976531, "grad_norm": 0.9375, "learning_rate": 1.387433529574994e-05, "loss": 1.2199, "step": 5482 }, { "epoch": 0.8355684242608961, "grad_norm": 0.87890625, "learning_rate": 1.3849263872684104e-05, "loss": 0.9444, "step": 5483 }, { "epoch": 0.835720816824139, "grad_norm": 0.89453125, "learning_rate": 1.3824213437176426e-05, "loss": 0.8287, "step": 5484 }, { "epoch": 0.8358732093873819, "grad_norm": 0.90625, "learning_rate": 1.3799183995329612e-05, "loss": 1.1481, "step": 5485 }, { "epoch": 0.8360256019506248, "grad_norm": 0.8515625, "learning_rate": 1.3774175553241187e-05, "loss": 0.8998, "step": 5486 }, { "epoch": 0.8361779945138678, "grad_norm": 1.1171875, "learning_rate": 1.3749188117003508e-05, "loss": 0.9374, "step": 5487 }, { "epoch": 0.8363303870771106, "grad_norm": 0.9765625, "learning_rate": 1.3724221692703931e-05, "loss": 1.1712, "step": 5488 }, { "epoch": 0.8364827796403536, "grad_norm": 1.015625, "learning_rate": 1.3699276286424622e-05, "loss": 0.8272, "step": 5489 }, { "epoch": 0.8366351722035965, "grad_norm": 0.79296875, "learning_rate": 1.3674351904242611e-05, "loss": 0.9698, "step": 5490 }, { "epoch": 0.8367875647668394, "grad_norm": 1.109375, "learning_rate": 1.3649448552229904e-05, "loss": 0.8869, "step": 5491 }, { "epoch": 0.8369399573300823, "grad_norm": 0.953125, "learning_rate": 1.362456623645325e-05, "loss": 0.8287, "step": 5492 }, { "epoch": 0.8370923498933253, "grad_norm": 0.8203125, "learning_rate": 1.3599704962974347e-05, "loss": 1.0179, "step": 5493 }, { "epoch": 0.8372447424565681, "grad_norm": 0.984375, "learning_rate": 1.3574864737849791e-05, "loss": 0.8897, "step": 5494 }, { "epoch": 0.8373971350198111, "grad_norm": 0.8046875, "learning_rate": 1.3550045567130998e-05, "loss": 0.9585, "step": 5495 }, { "epoch": 0.8375495275830539, "grad_norm": 1.2265625, "learning_rate": 1.3525247456864255e-05, "loss": 1.1453, "step": 5496 }, { "epoch": 0.8377019201462969, "grad_norm": 1.046875, "learning_rate": 1.3500470413090804e-05, "loss": 0.9262, "step": 5497 }, { "epoch": 0.8378543127095398, "grad_norm": 0.73046875, "learning_rate": 1.347571444184661e-05, "loss": 0.7524, "step": 5498 }, { "epoch": 0.8380067052727826, "grad_norm": 0.90234375, "learning_rate": 1.3450979549162645e-05, "loss": 0.9434, "step": 5499 }, { "epoch": 0.8381590978360256, "grad_norm": 0.98828125, "learning_rate": 1.3426265741064648e-05, "loss": 0.9513, "step": 5500 }, { "epoch": 0.8383114903992686, "grad_norm": 0.81640625, "learning_rate": 1.3401573023573256e-05, "loss": 0.894, "step": 5501 }, { "epoch": 0.8384638829625114, "grad_norm": 1.34375, "learning_rate": 1.3376901402704e-05, "loss": 0.8795, "step": 5502 }, { "epoch": 0.8386162755257544, "grad_norm": 0.82421875, "learning_rate": 1.3352250884467244e-05, "loss": 0.8694, "step": 5503 }, { "epoch": 0.8387686680889973, "grad_norm": 0.9453125, "learning_rate": 1.3327621474868158e-05, "loss": 1.007, "step": 5504 }, { "epoch": 0.8389210606522401, "grad_norm": 0.83984375, "learning_rate": 1.3303013179906864e-05, "loss": 0.9994, "step": 5505 }, { "epoch": 0.8390734532154831, "grad_norm": 1.375, "learning_rate": 1.3278426005578282e-05, "loss": 0.9929, "step": 5506 }, { "epoch": 0.8392258457787259, "grad_norm": 0.87109375, "learning_rate": 1.3253859957872184e-05, "loss": 0.9619, "step": 5507 }, { "epoch": 0.8393782383419689, "grad_norm": 1.1484375, "learning_rate": 1.3229315042773283e-05, "loss": 1.1728, "step": 5508 }, { "epoch": 0.8395306309052118, "grad_norm": 1.21875, "learning_rate": 1.3204791266260997e-05, "loss": 1.0762, "step": 5509 }, { "epoch": 0.8396830234684547, "grad_norm": 0.71875, "learning_rate": 1.318028863430968e-05, "loss": 0.9116, "step": 5510 }, { "epoch": 0.8398354160316976, "grad_norm": 1.0390625, "learning_rate": 1.3155807152888555e-05, "loss": 0.8256, "step": 5511 }, { "epoch": 0.8399878085949406, "grad_norm": 1.2109375, "learning_rate": 1.3131346827961643e-05, "loss": 0.9006, "step": 5512 }, { "epoch": 0.8401402011581834, "grad_norm": 1.03125, "learning_rate": 1.3106907665487833e-05, "loss": 0.9902, "step": 5513 }, { "epoch": 0.8402925937214264, "grad_norm": 0.8203125, "learning_rate": 1.3082489671420895e-05, "loss": 1.0671, "step": 5514 }, { "epoch": 0.8404449862846693, "grad_norm": 0.828125, "learning_rate": 1.3058092851709324e-05, "loss": 0.9776, "step": 5515 }, { "epoch": 0.8405973788479122, "grad_norm": 0.859375, "learning_rate": 1.3033717212296614e-05, "loss": 0.8134, "step": 5516 }, { "epoch": 0.8407497714111551, "grad_norm": 0.95703125, "learning_rate": 1.300936275912098e-05, "loss": 0.9199, "step": 5517 }, { "epoch": 0.8409021639743981, "grad_norm": 0.8125, "learning_rate": 1.2985029498115497e-05, "loss": 1.0132, "step": 5518 }, { "epoch": 0.8410545565376409, "grad_norm": 1.1875, "learning_rate": 1.296071743520818e-05, "loss": 1.0406, "step": 5519 }, { "epoch": 0.8412069491008839, "grad_norm": 0.77734375, "learning_rate": 1.2936426576321725e-05, "loss": 0.7679, "step": 5520 }, { "epoch": 0.8413593416641267, "grad_norm": 0.79296875, "learning_rate": 1.291215692737373e-05, "loss": 0.876, "step": 5521 }, { "epoch": 0.8415117342273697, "grad_norm": 1.0390625, "learning_rate": 1.2887908494276668e-05, "loss": 0.9058, "step": 5522 }, { "epoch": 0.8416641267906126, "grad_norm": 0.69140625, "learning_rate": 1.286368128293779e-05, "loss": 1.0034, "step": 5523 }, { "epoch": 0.8418165193538555, "grad_norm": 0.8671875, "learning_rate": 1.2839475299259184e-05, "loss": 0.9818, "step": 5524 }, { "epoch": 0.8419689119170984, "grad_norm": 1.0859375, "learning_rate": 1.2815290549137814e-05, "loss": 0.9779, "step": 5525 }, { "epoch": 0.8421213044803414, "grad_norm": 0.8359375, "learning_rate": 1.27911270384654e-05, "loss": 1.0428, "step": 5526 }, { "epoch": 0.8422736970435842, "grad_norm": 0.8828125, "learning_rate": 1.2766984773128499e-05, "loss": 0.9785, "step": 5527 }, { "epoch": 0.8424260896068272, "grad_norm": 1.3046875, "learning_rate": 1.274286375900856e-05, "loss": 1.0729, "step": 5528 }, { "epoch": 0.8425784821700701, "grad_norm": 0.71875, "learning_rate": 1.2718764001981765e-05, "loss": 0.858, "step": 5529 }, { "epoch": 0.842730874733313, "grad_norm": 0.8984375, "learning_rate": 1.2694685507919214e-05, "loss": 1.0921, "step": 5530 }, { "epoch": 0.8428832672965559, "grad_norm": 1.234375, "learning_rate": 1.2670628282686758e-05, "loss": 0.8992, "step": 5531 }, { "epoch": 0.8430356598597989, "grad_norm": 0.97265625, "learning_rate": 1.2646592332145036e-05, "loss": 1.0191, "step": 5532 }, { "epoch": 0.8431880524230417, "grad_norm": 1.0078125, "learning_rate": 1.2622577662149604e-05, "loss": 0.9701, "step": 5533 }, { "epoch": 0.8433404449862847, "grad_norm": 0.84375, "learning_rate": 1.2598584278550774e-05, "loss": 1.0749, "step": 5534 }, { "epoch": 0.8434928375495275, "grad_norm": 0.9921875, "learning_rate": 1.2574612187193635e-05, "loss": 0.9006, "step": 5535 }, { "epoch": 0.8436452301127705, "grad_norm": 1.09375, "learning_rate": 1.2550661393918217e-05, "loss": 1.0322, "step": 5536 }, { "epoch": 0.8437976226760134, "grad_norm": 1.1640625, "learning_rate": 1.2526731904559208e-05, "loss": 0.9993, "step": 5537 }, { "epoch": 0.8439500152392563, "grad_norm": 0.88671875, "learning_rate": 1.2502823724946166e-05, "loss": 0.9761, "step": 5538 }, { "epoch": 0.8441024078024992, "grad_norm": 0.9765625, "learning_rate": 1.2478936860903524e-05, "loss": 0.8196, "step": 5539 }, { "epoch": 0.8442548003657422, "grad_norm": 1.0078125, "learning_rate": 1.2455071318250434e-05, "loss": 1.0211, "step": 5540 }, { "epoch": 0.844407192928985, "grad_norm": 0.73046875, "learning_rate": 1.2431227102800868e-05, "loss": 0.9895, "step": 5541 }, { "epoch": 0.844559585492228, "grad_norm": 0.93359375, "learning_rate": 1.2407404220363694e-05, "loss": 0.952, "step": 5542 }, { "epoch": 0.8447119780554709, "grad_norm": 1.1015625, "learning_rate": 1.2383602676742423e-05, "loss": 0.8685, "step": 5543 }, { "epoch": 0.8448643706187138, "grad_norm": 0.9609375, "learning_rate": 1.2359822477735473e-05, "loss": 1.0265, "step": 5544 }, { "epoch": 0.8450167631819567, "grad_norm": 0.9296875, "learning_rate": 1.2336063629136074e-05, "loss": 0.8076, "step": 5545 }, { "epoch": 0.8451691557451997, "grad_norm": 1.0078125, "learning_rate": 1.2312326136732189e-05, "loss": 0.9036, "step": 5546 }, { "epoch": 0.8453215483084425, "grad_norm": 1.2265625, "learning_rate": 1.2288610006306667e-05, "loss": 0.9612, "step": 5547 }, { "epoch": 0.8454739408716855, "grad_norm": 0.8203125, "learning_rate": 1.2264915243637031e-05, "loss": 1.0481, "step": 5548 }, { "epoch": 0.8456263334349283, "grad_norm": 1.203125, "learning_rate": 1.2241241854495688e-05, "loss": 1.0676, "step": 5549 }, { "epoch": 0.8457787259981713, "grad_norm": 1.234375, "learning_rate": 1.2217589844649835e-05, "loss": 1.0925, "step": 5550 }, { "epoch": 0.8459311185614142, "grad_norm": 0.8828125, "learning_rate": 1.2193959219861428e-05, "loss": 0.8478, "step": 5551 }, { "epoch": 0.8460835111246571, "grad_norm": 1.0, "learning_rate": 1.217034998588722e-05, "loss": 1.1401, "step": 5552 }, { "epoch": 0.8462359036879, "grad_norm": 0.8828125, "learning_rate": 1.2146762148478797e-05, "loss": 0.9297, "step": 5553 }, { "epoch": 0.846388296251143, "grad_norm": 0.89453125, "learning_rate": 1.2123195713382452e-05, "loss": 0.9734, "step": 5554 }, { "epoch": 0.8465406888143858, "grad_norm": 1.1015625, "learning_rate": 1.2099650686339303e-05, "loss": 1.0282, "step": 5555 }, { "epoch": 0.8466930813776288, "grad_norm": 0.9765625, "learning_rate": 1.2076127073085298e-05, "loss": 1.0038, "step": 5556 }, { "epoch": 0.8468454739408717, "grad_norm": 0.83984375, "learning_rate": 1.2052624879351104e-05, "loss": 0.7453, "step": 5557 }, { "epoch": 0.8469978665041146, "grad_norm": 0.7265625, "learning_rate": 1.2029144110862168e-05, "loss": 1.0342, "step": 5558 }, { "epoch": 0.8471502590673575, "grad_norm": 0.9765625, "learning_rate": 1.2005684773338821e-05, "loss": 1.0386, "step": 5559 }, { "epoch": 0.8473026516306005, "grad_norm": 0.85546875, "learning_rate": 1.1982246872496028e-05, "loss": 0.9362, "step": 5560 }, { "epoch": 0.8474550441938433, "grad_norm": 0.94140625, "learning_rate": 1.1958830414043588e-05, "loss": 0.8891, "step": 5561 }, { "epoch": 0.8476074367570863, "grad_norm": 0.90625, "learning_rate": 1.1935435403686135e-05, "loss": 0.9709, "step": 5562 }, { "epoch": 0.8477598293203291, "grad_norm": 0.79296875, "learning_rate": 1.1912061847122979e-05, "loss": 1.041, "step": 5563 }, { "epoch": 0.8479122218835721, "grad_norm": 0.98046875, "learning_rate": 1.1888709750048344e-05, "loss": 0.8181, "step": 5564 }, { "epoch": 0.848064614446815, "grad_norm": 0.91015625, "learning_rate": 1.1865379118151043e-05, "loss": 0.9605, "step": 5565 }, { "epoch": 0.8482170070100579, "grad_norm": 1.046875, "learning_rate": 1.1842069957114777e-05, "loss": 0.9795, "step": 5566 }, { "epoch": 0.8483693995733008, "grad_norm": 0.8359375, "learning_rate": 1.1818782272618012e-05, "loss": 0.8936, "step": 5567 }, { "epoch": 0.8485217921365438, "grad_norm": 0.97265625, "learning_rate": 1.1795516070333966e-05, "loss": 0.9222, "step": 5568 }, { "epoch": 0.8486741846997866, "grad_norm": 0.8828125, "learning_rate": 1.1772271355930576e-05, "loss": 1.033, "step": 5569 }, { "epoch": 0.8488265772630296, "grad_norm": 0.82421875, "learning_rate": 1.1749048135070673e-05, "loss": 0.8671, "step": 5570 }, { "epoch": 0.8489789698262725, "grad_norm": 0.8828125, "learning_rate": 1.172584641341169e-05, "loss": 0.9392, "step": 5571 }, { "epoch": 0.8491313623895154, "grad_norm": 1.1796875, "learning_rate": 1.1702666196605904e-05, "loss": 0.9637, "step": 5572 }, { "epoch": 0.8492837549527583, "grad_norm": 0.98046875, "learning_rate": 1.1679507490300402e-05, "loss": 1.1592, "step": 5573 }, { "epoch": 0.8494361475160013, "grad_norm": 0.92578125, "learning_rate": 1.1656370300136943e-05, "loss": 1.0019, "step": 5574 }, { "epoch": 0.8495885400792441, "grad_norm": 1.265625, "learning_rate": 1.1633254631752077e-05, "loss": 1.0232, "step": 5575 }, { "epoch": 0.8497409326424871, "grad_norm": 1.421875, "learning_rate": 1.1610160490777122e-05, "loss": 1.0041, "step": 5576 }, { "epoch": 0.8498933252057299, "grad_norm": 1.0234375, "learning_rate": 1.1587087882838144e-05, "loss": 0.8978, "step": 5577 }, { "epoch": 0.8500457177689729, "grad_norm": 0.98046875, "learning_rate": 1.1564036813555933e-05, "loss": 1.0311, "step": 5578 }, { "epoch": 0.8501981103322158, "grad_norm": 0.8984375, "learning_rate": 1.1541007288546113e-05, "loss": 0.9692, "step": 5579 }, { "epoch": 0.8503505028954587, "grad_norm": 0.91796875, "learning_rate": 1.1517999313418948e-05, "loss": 1.0233, "step": 5580 }, { "epoch": 0.8505028954587016, "grad_norm": 1.09375, "learning_rate": 1.14950128937796e-05, "loss": 0.9413, "step": 5581 }, { "epoch": 0.8506552880219446, "grad_norm": 0.86328125, "learning_rate": 1.1472048035227812e-05, "loss": 0.9876, "step": 5582 }, { "epoch": 0.8508076805851874, "grad_norm": 0.69140625, "learning_rate": 1.1449104743358152e-05, "loss": 0.7341, "step": 5583 }, { "epoch": 0.8509600731484304, "grad_norm": 1.0546875, "learning_rate": 1.1426183023759985e-05, "loss": 1.0275, "step": 5584 }, { "epoch": 0.8511124657116733, "grad_norm": 0.8984375, "learning_rate": 1.1403282882017341e-05, "loss": 1.1008, "step": 5585 }, { "epoch": 0.8512648582749162, "grad_norm": 0.765625, "learning_rate": 1.1380404323709016e-05, "loss": 0.9308, "step": 5586 }, { "epoch": 0.8514172508381591, "grad_norm": 0.69140625, "learning_rate": 1.1357547354408615e-05, "loss": 0.9493, "step": 5587 }, { "epoch": 0.8515696434014021, "grad_norm": 1.2265625, "learning_rate": 1.1334711979684353e-05, "loss": 0.9544, "step": 5588 }, { "epoch": 0.8517220359646449, "grad_norm": 0.984375, "learning_rate": 1.1311898205099269e-05, "loss": 0.989, "step": 5589 }, { "epoch": 0.8518744285278879, "grad_norm": 0.94140625, "learning_rate": 1.1289106036211161e-05, "loss": 0.9087, "step": 5590 }, { "epoch": 0.8520268210911307, "grad_norm": 1.0703125, "learning_rate": 1.1266335478572499e-05, "loss": 1.063, "step": 5591 }, { "epoch": 0.8521792136543737, "grad_norm": 0.890625, "learning_rate": 1.1243586537730532e-05, "loss": 0.9394, "step": 5592 }, { "epoch": 0.8523316062176166, "grad_norm": 1.0390625, "learning_rate": 1.1220859219227232e-05, "loss": 1.0027, "step": 5593 }, { "epoch": 0.8524839987808595, "grad_norm": 0.94921875, "learning_rate": 1.1198153528599265e-05, "loss": 1.0061, "step": 5594 }, { "epoch": 0.8526363913441024, "grad_norm": 0.8515625, "learning_rate": 1.117546947137812e-05, "loss": 0.9681, "step": 5595 }, { "epoch": 0.8527887839073454, "grad_norm": 0.9453125, "learning_rate": 1.115280705308992e-05, "loss": 0.9432, "step": 5596 }, { "epoch": 0.8529411764705882, "grad_norm": 0.8671875, "learning_rate": 1.113016627925556e-05, "loss": 1.0561, "step": 5597 }, { "epoch": 0.8530935690338312, "grad_norm": 1.109375, "learning_rate": 1.1107547155390697e-05, "loss": 0.9568, "step": 5598 }, { "epoch": 0.8532459615970741, "grad_norm": 1.046875, "learning_rate": 1.108494968700563e-05, "loss": 0.9872, "step": 5599 }, { "epoch": 0.853398354160317, "grad_norm": 0.96875, "learning_rate": 1.1062373879605415e-05, "loss": 0.9743, "step": 5600 }, { "epoch": 0.8535507467235599, "grad_norm": 1.1015625, "learning_rate": 1.1039819738689894e-05, "loss": 0.9977, "step": 5601 }, { "epoch": 0.8537031392868029, "grad_norm": 0.9453125, "learning_rate": 1.101728726975355e-05, "loss": 1.0726, "step": 5602 }, { "epoch": 0.8538555318500457, "grad_norm": 1.0625, "learning_rate": 1.0994776478285618e-05, "loss": 1.0772, "step": 5603 }, { "epoch": 0.8540079244132887, "grad_norm": 0.91796875, "learning_rate": 1.0972287369770062e-05, "loss": 0.9646, "step": 5604 }, { "epoch": 0.8541603169765315, "grad_norm": 1.203125, "learning_rate": 1.0949819949685546e-05, "loss": 1.0655, "step": 5605 }, { "epoch": 0.8543127095397745, "grad_norm": 1.0390625, "learning_rate": 1.0927374223505427e-05, "loss": 1.0552, "step": 5606 }, { "epoch": 0.8544651021030174, "grad_norm": 1.015625, "learning_rate": 1.090495019669786e-05, "loss": 1.1305, "step": 5607 }, { "epoch": 0.8546174946662602, "grad_norm": 0.859375, "learning_rate": 1.0882547874725636e-05, "loss": 0.6881, "step": 5608 }, { "epoch": 0.8547698872295032, "grad_norm": 0.90625, "learning_rate": 1.0860167263046283e-05, "loss": 0.9635, "step": 5609 }, { "epoch": 0.8549222797927462, "grad_norm": 1.09375, "learning_rate": 1.0837808367112035e-05, "loss": 1.0671, "step": 5610 }, { "epoch": 0.855074672355989, "grad_norm": 0.7890625, "learning_rate": 1.0815471192369831e-05, "loss": 0.9076, "step": 5611 }, { "epoch": 0.855227064919232, "grad_norm": 1.046875, "learning_rate": 1.0793155744261351e-05, "loss": 0.9665, "step": 5612 }, { "epoch": 0.8553794574824749, "grad_norm": 0.91015625, "learning_rate": 1.077086202822295e-05, "loss": 0.961, "step": 5613 }, { "epoch": 0.8555318500457177, "grad_norm": 1.1328125, "learning_rate": 1.074859004968568e-05, "loss": 0.9189, "step": 5614 }, { "epoch": 0.8556842426089607, "grad_norm": 0.921875, "learning_rate": 1.072633981407538e-05, "loss": 0.9699, "step": 5615 }, { "epoch": 0.8558366351722035, "grad_norm": 0.7421875, "learning_rate": 1.070411132681245e-05, "loss": 0.8177, "step": 5616 }, { "epoch": 0.8559890277354465, "grad_norm": 1.0390625, "learning_rate": 1.0681904593312086e-05, "loss": 1.157, "step": 5617 }, { "epoch": 0.8561414202986894, "grad_norm": 0.8359375, "learning_rate": 1.06597196189842e-05, "loss": 1.0172, "step": 5618 }, { "epoch": 0.8562938128619323, "grad_norm": 0.87890625, "learning_rate": 1.063755640923334e-05, "loss": 0.7784, "step": 5619 }, { "epoch": 0.8564462054251752, "grad_norm": 1.0390625, "learning_rate": 1.0615414969458803e-05, "loss": 1.0682, "step": 5620 }, { "epoch": 0.8565985979884182, "grad_norm": 0.86328125, "learning_rate": 1.059329530505455e-05, "loss": 0.7792, "step": 5621 }, { "epoch": 0.856750990551661, "grad_norm": 0.8046875, "learning_rate": 1.0571197421409262e-05, "loss": 0.8116, "step": 5622 }, { "epoch": 0.856903383114904, "grad_norm": 0.98046875, "learning_rate": 1.0549121323906264e-05, "loss": 1.0371, "step": 5623 }, { "epoch": 0.857055775678147, "grad_norm": 0.671875, "learning_rate": 1.0527067017923654e-05, "loss": 0.8898, "step": 5624 }, { "epoch": 0.8572081682413898, "grad_norm": 0.8203125, "learning_rate": 1.0505034508834167e-05, "loss": 0.9695, "step": 5625 }, { "epoch": 0.8573605608046327, "grad_norm": 0.83203125, "learning_rate": 1.0483023802005242e-05, "loss": 0.9801, "step": 5626 }, { "epoch": 0.8575129533678757, "grad_norm": 0.80859375, "learning_rate": 1.0461034902798983e-05, "loss": 0.8961, "step": 5627 }, { "epoch": 0.8576653459311185, "grad_norm": 0.8671875, "learning_rate": 1.0439067816572202e-05, "loss": 0.955, "step": 5628 }, { "epoch": 0.8578177384943615, "grad_norm": 0.84765625, "learning_rate": 1.0417122548676428e-05, "loss": 1.0367, "step": 5629 }, { "epoch": 0.8579701310576043, "grad_norm": 1.1640625, "learning_rate": 1.0395199104457832e-05, "loss": 1.0246, "step": 5630 }, { "epoch": 0.8581225236208473, "grad_norm": 0.91796875, "learning_rate": 1.0373297489257272e-05, "loss": 0.9777, "step": 5631 }, { "epoch": 0.8582749161840902, "grad_norm": 1.015625, "learning_rate": 1.0351417708410293e-05, "loss": 1.1825, "step": 5632 }, { "epoch": 0.8584273087473331, "grad_norm": 0.97265625, "learning_rate": 1.0329559767247132e-05, "loss": 0.9841, "step": 5633 }, { "epoch": 0.858579701310576, "grad_norm": 1.0234375, "learning_rate": 1.0307723671092684e-05, "loss": 0.9297, "step": 5634 }, { "epoch": 0.858732093873819, "grad_norm": 0.80859375, "learning_rate": 1.028590942526656e-05, "loss": 0.9101, "step": 5635 }, { "epoch": 0.8588844864370618, "grad_norm": 1.015625, "learning_rate": 1.0264117035083009e-05, "loss": 1.0037, "step": 5636 }, { "epoch": 0.8590368790003048, "grad_norm": 0.91015625, "learning_rate": 1.0242346505850986e-05, "loss": 0.9569, "step": 5637 }, { "epoch": 0.8591892715635477, "grad_norm": 0.89453125, "learning_rate": 1.022059784287408e-05, "loss": 0.9054, "step": 5638 }, { "epoch": 0.8593416641267906, "grad_norm": 0.93359375, "learning_rate": 1.0198871051450598e-05, "loss": 0.9091, "step": 5639 }, { "epoch": 0.8594940566900335, "grad_norm": 0.828125, "learning_rate": 1.0177166136873472e-05, "loss": 0.9102, "step": 5640 }, { "epoch": 0.8596464492532765, "grad_norm": 0.7734375, "learning_rate": 1.0155483104430375e-05, "loss": 0.937, "step": 5641 }, { "epoch": 0.8597988418165193, "grad_norm": 0.80078125, "learning_rate": 1.0133821959403567e-05, "loss": 0.8785, "step": 5642 }, { "epoch": 0.8599512343797623, "grad_norm": 0.79296875, "learning_rate": 1.011218270707004e-05, "loss": 0.9855, "step": 5643 }, { "epoch": 0.8601036269430051, "grad_norm": 1.0234375, "learning_rate": 1.009056535270141e-05, "loss": 1.0289, "step": 5644 }, { "epoch": 0.8602560195062481, "grad_norm": 0.8828125, "learning_rate": 1.0068969901563963e-05, "loss": 1.0677, "step": 5645 }, { "epoch": 0.860408412069491, "grad_norm": 0.9140625, "learning_rate": 1.0047396358918704e-05, "loss": 0.8837, "step": 5646 }, { "epoch": 0.8605608046327339, "grad_norm": 1.3984375, "learning_rate": 1.0025844730021228e-05, "loss": 0.9601, "step": 5647 }, { "epoch": 0.8607131971959768, "grad_norm": 0.98046875, "learning_rate": 1.0004315020121835e-05, "loss": 0.9757, "step": 5648 }, { "epoch": 0.8608655897592198, "grad_norm": 0.90625, "learning_rate": 9.982807234465452e-06, "loss": 1.1106, "step": 5649 }, { "epoch": 0.8610179823224626, "grad_norm": 0.83203125, "learning_rate": 9.96132137829171e-06, "loss": 0.8327, "step": 5650 }, { "epoch": 0.8611703748857056, "grad_norm": 0.92578125, "learning_rate": 9.939857456834833e-06, "loss": 1.0278, "step": 5651 }, { "epoch": 0.8613227674489485, "grad_norm": 1.1484375, "learning_rate": 9.918415475323783e-06, "loss": 0.9308, "step": 5652 }, { "epoch": 0.8614751600121914, "grad_norm": 1.1875, "learning_rate": 9.896995438982126e-06, "loss": 0.8945, "step": 5653 }, { "epoch": 0.8616275525754343, "grad_norm": 1.4296875, "learning_rate": 9.875597353028088e-06, "loss": 1.1655, "step": 5654 }, { "epoch": 0.8617799451386773, "grad_norm": 1.1953125, "learning_rate": 9.854221222674542e-06, "loss": 0.8937, "step": 5655 }, { "epoch": 0.8619323377019201, "grad_norm": 0.88671875, "learning_rate": 9.832867053129035e-06, "loss": 0.7708, "step": 5656 }, { "epoch": 0.8620847302651631, "grad_norm": 0.86328125, "learning_rate": 9.811534849593706e-06, "loss": 1.0253, "step": 5657 }, { "epoch": 0.8622371228284059, "grad_norm": 0.9296875, "learning_rate": 9.790224617265454e-06, "loss": 1.0352, "step": 5658 }, { "epoch": 0.8623895153916489, "grad_norm": 0.671875, "learning_rate": 9.768936361335723e-06, "loss": 0.899, "step": 5659 }, { "epoch": 0.8625419079548918, "grad_norm": 0.953125, "learning_rate": 9.747670086990634e-06, "loss": 0.971, "step": 5660 }, { "epoch": 0.8626943005181347, "grad_norm": 1.28125, "learning_rate": 9.726425799410965e-06, "loss": 0.9722, "step": 5661 }, { "epoch": 0.8628466930813776, "grad_norm": 0.93359375, "learning_rate": 9.705203503772108e-06, "loss": 1.0125, "step": 5662 }, { "epoch": 0.8629990856446206, "grad_norm": 0.94140625, "learning_rate": 9.684003205244175e-06, "loss": 1.0855, "step": 5663 }, { "epoch": 0.8631514782078634, "grad_norm": 0.9296875, "learning_rate": 9.662824908991807e-06, "loss": 0.9319, "step": 5664 }, { "epoch": 0.8633038707711064, "grad_norm": 0.84765625, "learning_rate": 9.641668620174382e-06, "loss": 0.88, "step": 5665 }, { "epoch": 0.8634562633343493, "grad_norm": 1.171875, "learning_rate": 9.620534343945841e-06, "loss": 1.0486, "step": 5666 }, { "epoch": 0.8636086558975922, "grad_norm": 0.90234375, "learning_rate": 9.599422085454823e-06, "loss": 0.9671, "step": 5667 }, { "epoch": 0.8637610484608351, "grad_norm": 1.15625, "learning_rate": 9.578331849844547e-06, "loss": 0.9146, "step": 5668 }, { "epoch": 0.8639134410240781, "grad_norm": 1.1171875, "learning_rate": 9.557263642252945e-06, "loss": 0.9713, "step": 5669 }, { "epoch": 0.8640658335873209, "grad_norm": 0.83203125, "learning_rate": 9.536217467812502e-06, "loss": 0.8786, "step": 5670 }, { "epoch": 0.8642182261505639, "grad_norm": 0.9453125, "learning_rate": 9.515193331650385e-06, "loss": 0.9481, "step": 5671 }, { "epoch": 0.8643706187138067, "grad_norm": 1.046875, "learning_rate": 9.494191238888362e-06, "loss": 0.9619, "step": 5672 }, { "epoch": 0.8645230112770497, "grad_norm": 0.91015625, "learning_rate": 9.47321119464285e-06, "loss": 1.0824, "step": 5673 }, { "epoch": 0.8646754038402926, "grad_norm": 0.90234375, "learning_rate": 9.452253204024864e-06, "loss": 1.0373, "step": 5674 }, { "epoch": 0.8648277964035355, "grad_norm": 1.140625, "learning_rate": 9.431317272140128e-06, "loss": 1.1171, "step": 5675 }, { "epoch": 0.8649801889667784, "grad_norm": 1.2421875, "learning_rate": 9.410403404088896e-06, "loss": 1.0427, "step": 5676 }, { "epoch": 0.8651325815300214, "grad_norm": 1.078125, "learning_rate": 9.389511604966105e-06, "loss": 0.9861, "step": 5677 }, { "epoch": 0.8652849740932642, "grad_norm": 0.9140625, "learning_rate": 9.368641879861284e-06, "loss": 0.8994, "step": 5678 }, { "epoch": 0.8654373666565072, "grad_norm": 0.8359375, "learning_rate": 9.347794233858598e-06, "loss": 1.0241, "step": 5679 }, { "epoch": 0.8655897592197501, "grad_norm": 1.609375, "learning_rate": 9.32696867203684e-06, "loss": 1.0427, "step": 5680 }, { "epoch": 0.865742151782993, "grad_norm": 0.81640625, "learning_rate": 9.306165199469418e-06, "loss": 0.9216, "step": 5681 }, { "epoch": 0.8658945443462359, "grad_norm": 0.828125, "learning_rate": 9.28538382122437e-06, "loss": 0.815, "step": 5682 }, { "epoch": 0.8660469369094789, "grad_norm": 1.078125, "learning_rate": 9.264624542364309e-06, "loss": 1.0682, "step": 5683 }, { "epoch": 0.8661993294727217, "grad_norm": 1.09375, "learning_rate": 9.243887367946514e-06, "loss": 0.9227, "step": 5684 }, { "epoch": 0.8663517220359647, "grad_norm": 0.94140625, "learning_rate": 9.22317230302282e-06, "loss": 1.0053, "step": 5685 }, { "epoch": 0.8665041145992075, "grad_norm": 0.98046875, "learning_rate": 9.202479352639782e-06, "loss": 0.9657, "step": 5686 }, { "epoch": 0.8666565071624505, "grad_norm": 1.078125, "learning_rate": 9.181808521838465e-06, "loss": 0.8933, "step": 5687 }, { "epoch": 0.8668088997256934, "grad_norm": 0.97265625, "learning_rate": 9.161159815654574e-06, "loss": 1.06, "step": 5688 }, { "epoch": 0.8669612922889363, "grad_norm": 1.203125, "learning_rate": 9.140533239118443e-06, "loss": 0.9797, "step": 5689 }, { "epoch": 0.8671136848521792, "grad_norm": 1.203125, "learning_rate": 9.119928797254995e-06, "loss": 0.9591, "step": 5690 }, { "epoch": 0.8672660774154222, "grad_norm": 1.0390625, "learning_rate": 9.09934649508375e-06, "loss": 0.8813, "step": 5691 }, { "epoch": 0.867418469978665, "grad_norm": 1.1171875, "learning_rate": 9.078786337618895e-06, "loss": 1.0345, "step": 5692 }, { "epoch": 0.867570862541908, "grad_norm": 0.8046875, "learning_rate": 9.058248329869157e-06, "loss": 1.0611, "step": 5693 }, { "epoch": 0.8677232551051509, "grad_norm": 0.84765625, "learning_rate": 9.037732476837901e-06, "loss": 0.9756, "step": 5694 }, { "epoch": 0.8678756476683938, "grad_norm": 0.97265625, "learning_rate": 9.017238783523064e-06, "loss": 1.0462, "step": 5695 }, { "epoch": 0.8680280402316367, "grad_norm": 0.68359375, "learning_rate": 8.996767254917205e-06, "loss": 0.9386, "step": 5696 }, { "epoch": 0.8681804327948797, "grad_norm": 0.828125, "learning_rate": 8.976317896007502e-06, "loss": 0.9621, "step": 5697 }, { "epoch": 0.8683328253581225, "grad_norm": 0.87109375, "learning_rate": 8.955890711775705e-06, "loss": 0.8841, "step": 5698 }, { "epoch": 0.8684852179213655, "grad_norm": 1.1171875, "learning_rate": 8.935485707198177e-06, "loss": 0.9743, "step": 5699 }, { "epoch": 0.8686376104846083, "grad_norm": 1.0234375, "learning_rate": 8.915102887245863e-06, "loss": 1.057, "step": 5700 }, { "epoch": 0.8687900030478513, "grad_norm": 0.89453125, "learning_rate": 8.894742256884303e-06, "loss": 1.0111, "step": 5701 }, { "epoch": 0.8689423956110942, "grad_norm": 1.3671875, "learning_rate": 8.87440382107363e-06, "loss": 1.0749, "step": 5702 }, { "epoch": 0.869094788174337, "grad_norm": 0.96875, "learning_rate": 8.85408758476861e-06, "loss": 0.9137, "step": 5703 }, { "epoch": 0.86924718073758, "grad_norm": 1.1796875, "learning_rate": 8.833793552918557e-06, "loss": 1.2546, "step": 5704 }, { "epoch": 0.869399573300823, "grad_norm": 1.0703125, "learning_rate": 8.8135217304674e-06, "loss": 1.0598, "step": 5705 }, { "epoch": 0.8695519658640658, "grad_norm": 1.1015625, "learning_rate": 8.793272122353624e-06, "loss": 1.0454, "step": 5706 }, { "epoch": 0.8697043584273088, "grad_norm": 1.09375, "learning_rate": 8.773044733510338e-06, "loss": 1.0408, "step": 5707 }, { "epoch": 0.8698567509905517, "grad_norm": 0.87109375, "learning_rate": 8.752839568865202e-06, "loss": 0.8668, "step": 5708 }, { "epoch": 0.8700091435537946, "grad_norm": 1.0703125, "learning_rate": 8.732656633340531e-06, "loss": 1.0359, "step": 5709 }, { "epoch": 0.8701615361170375, "grad_norm": 0.88671875, "learning_rate": 8.712495931853137e-06, "loss": 0.9406, "step": 5710 }, { "epoch": 0.8703139286802805, "grad_norm": 0.73828125, "learning_rate": 8.692357469314482e-06, "loss": 1.0986, "step": 5711 }, { "epoch": 0.8704663212435233, "grad_norm": 0.80078125, "learning_rate": 8.672241250630575e-06, "loss": 0.9283, "step": 5712 }, { "epoch": 0.8706187138067663, "grad_norm": 0.640625, "learning_rate": 8.652147280702006e-06, "loss": 0.8141, "step": 5713 }, { "epoch": 0.8707711063700091, "grad_norm": 0.63671875, "learning_rate": 8.632075564423969e-06, "loss": 0.9401, "step": 5714 }, { "epoch": 0.870923498933252, "grad_norm": 0.80078125, "learning_rate": 8.612026106686233e-06, "loss": 0.8855, "step": 5715 }, { "epoch": 0.871075891496495, "grad_norm": 0.9296875, "learning_rate": 8.59199891237311e-06, "loss": 0.864, "step": 5716 }, { "epoch": 0.8712282840597378, "grad_norm": 0.9765625, "learning_rate": 8.571993986363524e-06, "loss": 0.9297, "step": 5717 }, { "epoch": 0.8713806766229808, "grad_norm": 0.88671875, "learning_rate": 8.552011333530963e-06, "loss": 0.9511, "step": 5718 }, { "epoch": 0.8715330691862238, "grad_norm": 0.9453125, "learning_rate": 8.532050958743465e-06, "loss": 1.1128, "step": 5719 }, { "epoch": 0.8716854617494666, "grad_norm": 1.109375, "learning_rate": 8.512112866863709e-06, "loss": 1.0264, "step": 5720 }, { "epoch": 0.8718378543127095, "grad_norm": 1.046875, "learning_rate": 8.492197062748885e-06, "loss": 0.7564, "step": 5721 }, { "epoch": 0.8719902468759525, "grad_norm": 1.15625, "learning_rate": 8.472303551250748e-06, "loss": 1.1842, "step": 5722 }, { "epoch": 0.8721426394391953, "grad_norm": 1.0625, "learning_rate": 8.452432337215666e-06, "loss": 1.0439, "step": 5723 }, { "epoch": 0.8722950320024383, "grad_norm": 0.953125, "learning_rate": 8.43258342548453e-06, "loss": 0.8609, "step": 5724 }, { "epoch": 0.8724474245656811, "grad_norm": 0.94921875, "learning_rate": 8.412756820892853e-06, "loss": 0.9116, "step": 5725 }, { "epoch": 0.8725998171289241, "grad_norm": 0.95703125, "learning_rate": 8.392952528270659e-06, "loss": 1.0028, "step": 5726 }, { "epoch": 0.872752209692167, "grad_norm": 0.86328125, "learning_rate": 8.373170552442555e-06, "loss": 1.0082, "step": 5727 }, { "epoch": 0.8729046022554099, "grad_norm": 1.140625, "learning_rate": 8.353410898227731e-06, "loss": 0.9934, "step": 5728 }, { "epoch": 0.8730569948186528, "grad_norm": 1.1796875, "learning_rate": 8.333673570439914e-06, "loss": 1.1175, "step": 5729 }, { "epoch": 0.8732093873818958, "grad_norm": 0.8125, "learning_rate": 8.313958573887382e-06, "loss": 0.9401, "step": 5730 }, { "epoch": 0.8733617799451386, "grad_norm": 0.9609375, "learning_rate": 8.294265913373022e-06, "loss": 0.9916, "step": 5731 }, { "epoch": 0.8735141725083816, "grad_norm": 0.87109375, "learning_rate": 8.274595593694246e-06, "loss": 0.9489, "step": 5732 }, { "epoch": 0.8736665650716245, "grad_norm": 0.78515625, "learning_rate": 8.254947619643017e-06, "loss": 0.8351, "step": 5733 }, { "epoch": 0.8738189576348674, "grad_norm": 0.78515625, "learning_rate": 8.235321996005863e-06, "loss": 0.8188, "step": 5734 }, { "epoch": 0.8739713501981103, "grad_norm": 0.92578125, "learning_rate": 8.21571872756387e-06, "loss": 0.8618, "step": 5735 }, { "epoch": 0.8741237427613533, "grad_norm": 0.93359375, "learning_rate": 8.196137819092665e-06, "loss": 0.9748, "step": 5736 }, { "epoch": 0.8742761353245961, "grad_norm": 0.80078125, "learning_rate": 8.17657927536246e-06, "loss": 0.9848, "step": 5737 }, { "epoch": 0.8744285278878391, "grad_norm": 0.859375, "learning_rate": 8.157043101137995e-06, "loss": 1.0159, "step": 5738 }, { "epoch": 0.8745809204510819, "grad_norm": 1.34375, "learning_rate": 8.137529301178559e-06, "loss": 1.0493, "step": 5739 }, { "epoch": 0.8747333130143249, "grad_norm": 0.76171875, "learning_rate": 8.118037880237983e-06, "loss": 0.9476, "step": 5740 }, { "epoch": 0.8748857055775678, "grad_norm": 1.0546875, "learning_rate": 8.098568843064647e-06, "loss": 1.1454, "step": 5741 }, { "epoch": 0.8750380981408107, "grad_norm": 0.8671875, "learning_rate": 8.079122194401534e-06, "loss": 0.9943, "step": 5742 }, { "epoch": 0.8751904907040536, "grad_norm": 0.8203125, "learning_rate": 8.059697938986099e-06, "loss": 0.9133, "step": 5743 }, { "epoch": 0.8753428832672966, "grad_norm": 1.078125, "learning_rate": 8.04029608155037e-06, "loss": 0.9776, "step": 5744 }, { "epoch": 0.8754952758305394, "grad_norm": 1.2421875, "learning_rate": 8.020916626820919e-06, "loss": 1.1249, "step": 5745 }, { "epoch": 0.8756476683937824, "grad_norm": 0.77734375, "learning_rate": 8.001559579518857e-06, "loss": 1.0446, "step": 5746 }, { "epoch": 0.8758000609570253, "grad_norm": 0.78515625, "learning_rate": 7.982224944359828e-06, "loss": 1.01, "step": 5747 }, { "epoch": 0.8759524535202682, "grad_norm": 1.0390625, "learning_rate": 7.962912726054061e-06, "loss": 0.9678, "step": 5748 }, { "epoch": 0.8761048460835111, "grad_norm": 1.015625, "learning_rate": 7.943622929306272e-06, "loss": 0.9676, "step": 5749 }, { "epoch": 0.8762572386467541, "grad_norm": 0.87109375, "learning_rate": 7.924355558815733e-06, "loss": 0.8303, "step": 5750 }, { "epoch": 0.8764096312099969, "grad_norm": 0.9765625, "learning_rate": 7.905110619276246e-06, "loss": 0.9041, "step": 5751 }, { "epoch": 0.8765620237732399, "grad_norm": 1.3046875, "learning_rate": 7.885888115376161e-06, "loss": 1.2715, "step": 5752 }, { "epoch": 0.8767144163364827, "grad_norm": 1.3125, "learning_rate": 7.866688051798342e-06, "loss": 1.0427, "step": 5753 }, { "epoch": 0.8768668088997257, "grad_norm": 0.953125, "learning_rate": 7.84751043322024e-06, "loss": 1.1035, "step": 5754 }, { "epoch": 0.8770192014629686, "grad_norm": 1.0234375, "learning_rate": 7.828355264313758e-06, "loss": 1.0044, "step": 5755 }, { "epoch": 0.8771715940262115, "grad_norm": 0.71484375, "learning_rate": 7.8092225497454e-06, "loss": 0.9417, "step": 5756 }, { "epoch": 0.8773239865894544, "grad_norm": 1.1640625, "learning_rate": 7.790112294176143e-06, "loss": 1.0205, "step": 5757 }, { "epoch": 0.8774763791526974, "grad_norm": 0.63671875, "learning_rate": 7.771024502261526e-06, "loss": 0.8866, "step": 5758 }, { "epoch": 0.8776287717159402, "grad_norm": 1.15625, "learning_rate": 7.751959178651635e-06, "loss": 0.9738, "step": 5759 }, { "epoch": 0.8777811642791832, "grad_norm": 0.99609375, "learning_rate": 7.732916327991024e-06, "loss": 1.0158, "step": 5760 }, { "epoch": 0.8779335568424261, "grad_norm": 0.9765625, "learning_rate": 7.713895954918838e-06, "loss": 0.9541, "step": 5761 }, { "epoch": 0.878085949405669, "grad_norm": 1.09375, "learning_rate": 7.694898064068689e-06, "loss": 1.0283, "step": 5762 }, { "epoch": 0.8782383419689119, "grad_norm": 0.84375, "learning_rate": 7.675922660068734e-06, "loss": 1.0464, "step": 5763 }, { "epoch": 0.8783907345321549, "grad_norm": 0.94140625, "learning_rate": 7.656969747541665e-06, "loss": 1.0043, "step": 5764 }, { "epoch": 0.8785431270953977, "grad_norm": 1.09375, "learning_rate": 7.638039331104685e-06, "loss": 1.0474, "step": 5765 }, { "epoch": 0.8786955196586407, "grad_norm": 1.2109375, "learning_rate": 7.619131415369518e-06, "loss": 1.0428, "step": 5766 }, { "epoch": 0.8788479122218835, "grad_norm": 1.2265625, "learning_rate": 7.600246004942402e-06, "loss": 0.9401, "step": 5767 }, { "epoch": 0.8790003047851265, "grad_norm": 1.2578125, "learning_rate": 7.581383104424078e-06, "loss": 1.095, "step": 5768 }, { "epoch": 0.8791526973483694, "grad_norm": 1.4140625, "learning_rate": 7.562542718409849e-06, "loss": 1.3811, "step": 5769 }, { "epoch": 0.8793050899116123, "grad_norm": 0.87890625, "learning_rate": 7.543724851489465e-06, "loss": 1.1066, "step": 5770 }, { "epoch": 0.8794574824748552, "grad_norm": 1.125, "learning_rate": 7.524929508247269e-06, "loss": 0.9889, "step": 5771 }, { "epoch": 0.8796098750380982, "grad_norm": 0.921875, "learning_rate": 7.5061566932620675e-06, "loss": 1.1795, "step": 5772 }, { "epoch": 0.879762267601341, "grad_norm": 1.2890625, "learning_rate": 7.487406411107168e-06, "loss": 0.8605, "step": 5773 }, { "epoch": 0.879914660164584, "grad_norm": 0.859375, "learning_rate": 7.468678666350426e-06, "loss": 0.9365, "step": 5774 }, { "epoch": 0.8800670527278269, "grad_norm": 1.1171875, "learning_rate": 7.449973463554172e-06, "loss": 1.0661, "step": 5775 }, { "epoch": 0.8802194452910698, "grad_norm": 0.73828125, "learning_rate": 7.431290807275293e-06, "loss": 0.8777, "step": 5776 }, { "epoch": 0.8803718378543127, "grad_norm": 0.96875, "learning_rate": 7.4126307020651374e-06, "loss": 1.0014, "step": 5777 }, { "epoch": 0.8805242304175557, "grad_norm": 0.921875, "learning_rate": 7.393993152469569e-06, "loss": 0.9937, "step": 5778 }, { "epoch": 0.8806766229807985, "grad_norm": 1.0078125, "learning_rate": 7.375378163028968e-06, "loss": 0.9951, "step": 5779 }, { "epoch": 0.8808290155440415, "grad_norm": 1.0625, "learning_rate": 7.356785738278216e-06, "loss": 1.1028, "step": 5780 }, { "epoch": 0.8809814081072843, "grad_norm": 0.890625, "learning_rate": 7.338215882746668e-06, "loss": 1.0132, "step": 5781 }, { "epoch": 0.8811338006705273, "grad_norm": 0.92578125, "learning_rate": 7.319668600958263e-06, "loss": 0.986, "step": 5782 }, { "epoch": 0.8812861932337702, "grad_norm": 0.94140625, "learning_rate": 7.301143897431339e-06, "loss": 0.9105, "step": 5783 }, { "epoch": 0.8814385857970131, "grad_norm": 0.796875, "learning_rate": 7.2826417766788e-06, "loss": 0.8819, "step": 5784 }, { "epoch": 0.881590978360256, "grad_norm": 0.91015625, "learning_rate": 7.264162243208028e-06, "loss": 0.9611, "step": 5785 }, { "epoch": 0.881743370923499, "grad_norm": 0.796875, "learning_rate": 7.245705301520888e-06, "loss": 1.0066, "step": 5786 }, { "epoch": 0.8818957634867418, "grad_norm": 0.7578125, "learning_rate": 7.227270956113763e-06, "loss": 0.8875, "step": 5787 }, { "epoch": 0.8820481560499848, "grad_norm": 1.4140625, "learning_rate": 7.208859211477537e-06, "loss": 1.1764, "step": 5788 }, { "epoch": 0.8822005486132277, "grad_norm": 1.015625, "learning_rate": 7.190470072097577e-06, "loss": 1.0292, "step": 5789 }, { "epoch": 0.8823529411764706, "grad_norm": 0.66015625, "learning_rate": 7.172103542453723e-06, "loss": 0.9503, "step": 5790 }, { "epoch": 0.8825053337397135, "grad_norm": 1.109375, "learning_rate": 7.15375962702034e-06, "loss": 0.9741, "step": 5791 }, { "epoch": 0.8826577263029565, "grad_norm": 0.9140625, "learning_rate": 7.135438330266242e-06, "loss": 0.9966, "step": 5792 }, { "epoch": 0.8828101188661993, "grad_norm": 1.015625, "learning_rate": 7.117139656654814e-06, "loss": 0.9489, "step": 5793 }, { "epoch": 0.8829625114294423, "grad_norm": 0.94140625, "learning_rate": 7.0988636106438355e-06, "loss": 1.01, "step": 5794 }, { "epoch": 0.8831149039926851, "grad_norm": 1.3046875, "learning_rate": 7.080610196685622e-06, "loss": 0.9873, "step": 5795 }, { "epoch": 0.8832672965559281, "grad_norm": 1.03125, "learning_rate": 7.062379419226972e-06, "loss": 1.0563, "step": 5796 }, { "epoch": 0.883419689119171, "grad_norm": 1.09375, "learning_rate": 7.0441712827091665e-06, "loss": 1.0602, "step": 5797 }, { "epoch": 0.8835720816824139, "grad_norm": 1.3125, "learning_rate": 7.025985791567946e-06, "loss": 1.23, "step": 5798 }, { "epoch": 0.8837244742456568, "grad_norm": 1.046875, "learning_rate": 7.0078229502336e-06, "loss": 1.0807, "step": 5799 }, { "epoch": 0.8838768668088998, "grad_norm": 1.125, "learning_rate": 6.989682763130823e-06, "loss": 0.9179, "step": 5800 }, { "epoch": 0.8840292593721426, "grad_norm": 1.40625, "learning_rate": 6.971565234678845e-06, "loss": 0.9705, "step": 5801 }, { "epoch": 0.8841816519353856, "grad_norm": 0.8359375, "learning_rate": 6.953470369291348e-06, "loss": 1.0336, "step": 5802 }, { "epoch": 0.8843340444986285, "grad_norm": 1.109375, "learning_rate": 6.935398171376506e-06, "loss": 0.939, "step": 5803 }, { "epoch": 0.8844864370618714, "grad_norm": 0.78125, "learning_rate": 6.91734864533694e-06, "loss": 1.0437, "step": 5804 }, { "epoch": 0.8846388296251143, "grad_norm": 0.921875, "learning_rate": 6.899321795569813e-06, "loss": 0.8508, "step": 5805 }, { "epoch": 0.8847912221883573, "grad_norm": 0.84375, "learning_rate": 6.88131762646671e-06, "loss": 0.9308, "step": 5806 }, { "epoch": 0.8849436147516001, "grad_norm": 0.8671875, "learning_rate": 6.863336142413701e-06, "loss": 1.0074, "step": 5807 }, { "epoch": 0.8850960073148431, "grad_norm": 0.91796875, "learning_rate": 6.845377347791349e-06, "loss": 0.8898, "step": 5808 }, { "epoch": 0.8852483998780859, "grad_norm": 0.9609375, "learning_rate": 6.827441246974631e-06, "loss": 0.9679, "step": 5809 }, { "epoch": 0.8854007924413289, "grad_norm": 0.703125, "learning_rate": 6.809527844333097e-06, "loss": 0.8987, "step": 5810 }, { "epoch": 0.8855531850045718, "grad_norm": 0.91796875, "learning_rate": 6.791637144230678e-06, "loss": 1.0148, "step": 5811 }, { "epoch": 0.8857055775678147, "grad_norm": 0.97265625, "learning_rate": 6.773769151025822e-06, "loss": 0.9724, "step": 5812 }, { "epoch": 0.8858579701310576, "grad_norm": 0.92578125, "learning_rate": 6.7559238690714126e-06, "loss": 1.0079, "step": 5813 }, { "epoch": 0.8860103626943006, "grad_norm": 0.75, "learning_rate": 6.738101302714817e-06, "loss": 0.8223, "step": 5814 }, { "epoch": 0.8861627552575434, "grad_norm": 0.7578125, "learning_rate": 6.720301456297862e-06, "loss": 0.7027, "step": 5815 }, { "epoch": 0.8863151478207864, "grad_norm": 1.078125, "learning_rate": 6.702524334156879e-06, "loss": 0.9645, "step": 5816 }, { "epoch": 0.8864675403840293, "grad_norm": 1.0390625, "learning_rate": 6.684769940622604e-06, "loss": 0.9536, "step": 5817 }, { "epoch": 0.8866199329472721, "grad_norm": 1.1953125, "learning_rate": 6.667038280020266e-06, "loss": 1.1355, "step": 5818 }, { "epoch": 0.8867723255105151, "grad_norm": 1.265625, "learning_rate": 6.6493293566695645e-06, "loss": 0.8135, "step": 5819 }, { "epoch": 0.8869247180737581, "grad_norm": 0.65625, "learning_rate": 6.631643174884627e-06, "loss": 0.8233, "step": 5820 }, { "epoch": 0.8870771106370009, "grad_norm": 0.9140625, "learning_rate": 6.613979738974074e-06, "loss": 0.9796, "step": 5821 }, { "epoch": 0.8872295032002439, "grad_norm": 1.296875, "learning_rate": 6.596339053240974e-06, "loss": 0.9118, "step": 5822 }, { "epoch": 0.8873818957634867, "grad_norm": 0.76953125, "learning_rate": 6.578721121982856e-06, "loss": 1.0153, "step": 5823 }, { "epoch": 0.8875342883267296, "grad_norm": 0.94140625, "learning_rate": 6.561125949491697e-06, "loss": 1.0232, "step": 5824 }, { "epoch": 0.8876866808899726, "grad_norm": 1.09375, "learning_rate": 6.543553540053926e-06, "loss": 0.7795, "step": 5825 }, { "epoch": 0.8878390734532154, "grad_norm": 0.7578125, "learning_rate": 6.526003897950428e-06, "loss": 0.7967, "step": 5826 }, { "epoch": 0.8879914660164584, "grad_norm": 1.03125, "learning_rate": 6.508477027456572e-06, "loss": 1.0229, "step": 5827 }, { "epoch": 0.8881438585797014, "grad_norm": 0.796875, "learning_rate": 6.490972932842143e-06, "loss": 0.8825, "step": 5828 }, { "epoch": 0.8882962511429442, "grad_norm": 0.82421875, "learning_rate": 6.473491618371408e-06, "loss": 0.9643, "step": 5829 }, { "epoch": 0.8884486437061871, "grad_norm": 0.84375, "learning_rate": 6.456033088303037e-06, "loss": 0.9565, "step": 5830 }, { "epoch": 0.8886010362694301, "grad_norm": 1.0078125, "learning_rate": 6.438597346890196e-06, "loss": 1.0399, "step": 5831 }, { "epoch": 0.8887534288326729, "grad_norm": 0.9140625, "learning_rate": 6.421184398380453e-06, "loss": 0.8869, "step": 5832 }, { "epoch": 0.8889058213959159, "grad_norm": 1.0859375, "learning_rate": 6.403794247015904e-06, "loss": 1.2435, "step": 5833 }, { "epoch": 0.8890582139591587, "grad_norm": 1.109375, "learning_rate": 6.386426897033016e-06, "loss": 1.0471, "step": 5834 }, { "epoch": 0.8892106065224017, "grad_norm": 0.9140625, "learning_rate": 6.369082352662714e-06, "loss": 0.9897, "step": 5835 }, { "epoch": 0.8893629990856446, "grad_norm": 0.734375, "learning_rate": 6.351760618130398e-06, "loss": 1.045, "step": 5836 }, { "epoch": 0.8895153916488875, "grad_norm": 1.0546875, "learning_rate": 6.33446169765588e-06, "loss": 1.0391, "step": 5837 }, { "epoch": 0.8896677842121304, "grad_norm": 1.0078125, "learning_rate": 6.317185595453412e-06, "loss": 0.8566, "step": 5838 }, { "epoch": 0.8898201767753734, "grad_norm": 0.98828125, "learning_rate": 6.299932315731727e-06, "loss": 0.8641, "step": 5839 }, { "epoch": 0.8899725693386162, "grad_norm": 1.0, "learning_rate": 6.2827018626939624e-06, "loss": 0.9241, "step": 5840 }, { "epoch": 0.8901249619018592, "grad_norm": 0.828125, "learning_rate": 6.265494240537706e-06, "loss": 0.988, "step": 5841 }, { "epoch": 0.8902773544651021, "grad_norm": 0.96875, "learning_rate": 6.2483094534549705e-06, "loss": 0.8779, "step": 5842 }, { "epoch": 0.890429747028345, "grad_norm": 0.87890625, "learning_rate": 6.231147505632218e-06, "loss": 0.9746, "step": 5843 }, { "epoch": 0.8905821395915879, "grad_norm": 1.0859375, "learning_rate": 6.214008401250371e-06, "loss": 0.8858, "step": 5844 }, { "epoch": 0.8907345321548309, "grad_norm": 0.93359375, "learning_rate": 6.196892144484745e-06, "loss": 0.9605, "step": 5845 }, { "epoch": 0.8908869247180737, "grad_norm": 1.140625, "learning_rate": 6.179798739505094e-06, "loss": 0.9053, "step": 5846 }, { "epoch": 0.8910393172813167, "grad_norm": 0.9375, "learning_rate": 6.1627281904756415e-06, "loss": 0.8531, "step": 5847 }, { "epoch": 0.8911917098445595, "grad_norm": 0.90234375, "learning_rate": 6.145680501555007e-06, "loss": 0.8716, "step": 5848 }, { "epoch": 0.8913441024078025, "grad_norm": 0.82421875, "learning_rate": 6.128655676896222e-06, "loss": 0.7902, "step": 5849 }, { "epoch": 0.8914964949710454, "grad_norm": 0.90625, "learning_rate": 6.111653720646837e-06, "loss": 1.0235, "step": 5850 }, { "epoch": 0.8916488875342883, "grad_norm": 0.734375, "learning_rate": 6.094674636948738e-06, "loss": 0.8954, "step": 5851 }, { "epoch": 0.8918012800975312, "grad_norm": 0.875, "learning_rate": 6.077718429938284e-06, "loss": 0.8938, "step": 5852 }, { "epoch": 0.8919536726607742, "grad_norm": 0.97265625, "learning_rate": 6.060785103746247e-06, "loss": 0.999, "step": 5853 }, { "epoch": 0.892106065224017, "grad_norm": 0.81640625, "learning_rate": 6.043874662497817e-06, "loss": 1.0995, "step": 5854 }, { "epoch": 0.89225845778726, "grad_norm": 0.76953125, "learning_rate": 6.026987110312643e-06, "loss": 0.8403, "step": 5855 }, { "epoch": 0.8924108503505029, "grad_norm": 1.0859375, "learning_rate": 6.010122451304767e-06, "loss": 0.9906, "step": 5856 }, { "epoch": 0.8925632429137458, "grad_norm": 0.8515625, "learning_rate": 5.993280689582659e-06, "loss": 0.9572, "step": 5857 }, { "epoch": 0.8927156354769887, "grad_norm": 0.8984375, "learning_rate": 5.976461829249225e-06, "loss": 0.946, "step": 5858 }, { "epoch": 0.8928680280402317, "grad_norm": 0.83984375, "learning_rate": 5.959665874401765e-06, "loss": 1.0913, "step": 5859 }, { "epoch": 0.8930204206034745, "grad_norm": 1.0703125, "learning_rate": 5.942892829132007e-06, "loss": 0.9498, "step": 5860 }, { "epoch": 0.8931728131667175, "grad_norm": 0.8203125, "learning_rate": 5.926142697526138e-06, "loss": 0.9021, "step": 5861 }, { "epoch": 0.8933252057299603, "grad_norm": 0.91796875, "learning_rate": 5.909415483664704e-06, "loss": 0.8263, "step": 5862 }, { "epoch": 0.8934775982932033, "grad_norm": 0.94921875, "learning_rate": 5.892711191622724e-06, "loss": 0.9361, "step": 5863 }, { "epoch": 0.8936299908564462, "grad_norm": 0.859375, "learning_rate": 5.876029825469576e-06, "loss": 0.9701, "step": 5864 }, { "epoch": 0.8937823834196891, "grad_norm": 1.0703125, "learning_rate": 5.859371389269086e-06, "loss": 0.9783, "step": 5865 }, { "epoch": 0.893934775982932, "grad_norm": 1.0625, "learning_rate": 5.842735887079476e-06, "loss": 1.1089, "step": 5866 }, { "epoch": 0.894087168546175, "grad_norm": 1.15625, "learning_rate": 5.826123322953436e-06, "loss": 0.8986, "step": 5867 }, { "epoch": 0.8942395611094178, "grad_norm": 0.79296875, "learning_rate": 5.8095337009379965e-06, "loss": 0.935, "step": 5868 }, { "epoch": 0.8943919536726608, "grad_norm": 0.984375, "learning_rate": 5.792967025074625e-06, "loss": 0.9295, "step": 5869 }, { "epoch": 0.8945443462359037, "grad_norm": 0.7734375, "learning_rate": 5.776423299399203e-06, "loss": 0.8504, "step": 5870 }, { "epoch": 0.8946967387991466, "grad_norm": 0.7734375, "learning_rate": 5.7599025279420184e-06, "loss": 0.9432, "step": 5871 }, { "epoch": 0.8948491313623895, "grad_norm": 0.98828125, "learning_rate": 5.743404714727796e-06, "loss": 0.9482, "step": 5872 }, { "epoch": 0.8950015239256325, "grad_norm": 1.0859375, "learning_rate": 5.72692986377561e-06, "loss": 1.0858, "step": 5873 }, { "epoch": 0.8951539164888753, "grad_norm": 0.828125, "learning_rate": 5.710477979098982e-06, "loss": 0.8058, "step": 5874 }, { "epoch": 0.8953063090521183, "grad_norm": 0.93359375, "learning_rate": 5.694049064705831e-06, "loss": 1.066, "step": 5875 }, { "epoch": 0.8954587016153611, "grad_norm": 0.8828125, "learning_rate": 5.677643124598464e-06, "loss": 0.9784, "step": 5876 }, { "epoch": 0.8956110941786041, "grad_norm": 0.96484375, "learning_rate": 5.6612601627736075e-06, "loss": 1.0555, "step": 5877 }, { "epoch": 0.895763486741847, "grad_norm": 0.9609375, "learning_rate": 5.6449001832223905e-06, "loss": 0.9482, "step": 5878 }, { "epoch": 0.8959158793050899, "grad_norm": 1.140625, "learning_rate": 5.628563189930347e-06, "loss": 1.0998, "step": 5879 }, { "epoch": 0.8960682718683328, "grad_norm": 0.87890625, "learning_rate": 5.612249186877406e-06, "loss": 0.8634, "step": 5880 }, { "epoch": 0.8962206644315758, "grad_norm": 0.8125, "learning_rate": 5.595958178037874e-06, "loss": 0.9659, "step": 5881 }, { "epoch": 0.8963730569948186, "grad_norm": 1.078125, "learning_rate": 5.57969016738048e-06, "loss": 1.1609, "step": 5882 }, { "epoch": 0.8965254495580616, "grad_norm": 1.1796875, "learning_rate": 5.563445158868341e-06, "loss": 1.0675, "step": 5883 }, { "epoch": 0.8966778421213045, "grad_norm": 0.78515625, "learning_rate": 5.547223156459003e-06, "loss": 0.756, "step": 5884 }, { "epoch": 0.8968302346845474, "grad_norm": 1.1796875, "learning_rate": 5.53102416410436e-06, "loss": 1.026, "step": 5885 }, { "epoch": 0.8969826272477903, "grad_norm": 0.77734375, "learning_rate": 5.514848185750709e-06, "loss": 0.952, "step": 5886 }, { "epoch": 0.8971350198110333, "grad_norm": 0.93359375, "learning_rate": 5.498695225338768e-06, "loss": 0.975, "step": 5887 }, { "epoch": 0.8972874123742761, "grad_norm": 1.1640625, "learning_rate": 5.482565286803609e-06, "loss": 1.0695, "step": 5888 }, { "epoch": 0.8974398049375191, "grad_norm": 0.80078125, "learning_rate": 5.466458374074746e-06, "loss": 0.955, "step": 5889 }, { "epoch": 0.8975921975007619, "grad_norm": 1.03125, "learning_rate": 5.4503744910760376e-06, "loss": 1.0207, "step": 5890 }, { "epoch": 0.8977445900640049, "grad_norm": 1.203125, "learning_rate": 5.434313641725753e-06, "loss": 1.0707, "step": 5891 }, { "epoch": 0.8978969826272478, "grad_norm": 1.125, "learning_rate": 5.418275829936537e-06, "loss": 1.1154, "step": 5892 }, { "epoch": 0.8980493751904907, "grad_norm": 0.9140625, "learning_rate": 5.402261059615443e-06, "loss": 0.9005, "step": 5893 }, { "epoch": 0.8982017677537336, "grad_norm": 0.87109375, "learning_rate": 5.3862693346638844e-06, "loss": 0.9241, "step": 5894 }, { "epoch": 0.8983541603169766, "grad_norm": 0.89453125, "learning_rate": 5.3703006589777e-06, "loss": 0.9726, "step": 5895 }, { "epoch": 0.8985065528802194, "grad_norm": 0.87890625, "learning_rate": 5.354355036447067e-06, "loss": 0.9763, "step": 5896 }, { "epoch": 0.8986589454434624, "grad_norm": 1.0546875, "learning_rate": 5.338432470956589e-06, "loss": 0.8965, "step": 5897 }, { "epoch": 0.8988113380067053, "grad_norm": 1.0, "learning_rate": 5.3225329663852075e-06, "loss": 0.7161, "step": 5898 }, { "epoch": 0.8989637305699482, "grad_norm": 0.98828125, "learning_rate": 5.3066565266062905e-06, "loss": 0.9617, "step": 5899 }, { "epoch": 0.8991161231331911, "grad_norm": 0.84375, "learning_rate": 5.2908031554875445e-06, "loss": 0.9009, "step": 5900 }, { "epoch": 0.8992685156964341, "grad_norm": 1.09375, "learning_rate": 5.2749728568911025e-06, "loss": 0.9973, "step": 5901 }, { "epoch": 0.8994209082596769, "grad_norm": 0.93359375, "learning_rate": 5.259165634673457e-06, "loss": 0.9965, "step": 5902 }, { "epoch": 0.8995733008229199, "grad_norm": 1.109375, "learning_rate": 5.243381492685428e-06, "loss": 1.0903, "step": 5903 }, { "epoch": 0.8997256933861627, "grad_norm": 1.015625, "learning_rate": 5.227620434772318e-06, "loss": 0.9532, "step": 5904 }, { "epoch": 0.8998780859494057, "grad_norm": 1.0078125, "learning_rate": 5.2118824647736985e-06, "loss": 1.0685, "step": 5905 }, { "epoch": 0.9000304785126486, "grad_norm": 0.94140625, "learning_rate": 5.196167586523604e-06, "loss": 1.0222, "step": 5906 }, { "epoch": 0.9001828710758915, "grad_norm": 0.7890625, "learning_rate": 5.180475803850393e-06, "loss": 0.8214, "step": 5907 }, { "epoch": 0.9003352636391344, "grad_norm": 0.88671875, "learning_rate": 5.164807120576809e-06, "loss": 0.9352, "step": 5908 }, { "epoch": 0.9004876562023774, "grad_norm": 0.8125, "learning_rate": 5.149161540519953e-06, "loss": 0.9309, "step": 5909 }, { "epoch": 0.9006400487656202, "grad_norm": 1.0703125, "learning_rate": 5.133539067491333e-06, "loss": 0.994, "step": 5910 }, { "epoch": 0.9007924413288632, "grad_norm": 1.0625, "learning_rate": 5.117939705296782e-06, "loss": 1.062, "step": 5911 }, { "epoch": 0.9009448338921061, "grad_norm": 0.92578125, "learning_rate": 5.10236345773657e-06, "loss": 1.039, "step": 5912 }, { "epoch": 0.901097226455349, "grad_norm": 1.0, "learning_rate": 5.086810328605263e-06, "loss": 0.9303, "step": 5913 }, { "epoch": 0.9012496190185919, "grad_norm": 1.0, "learning_rate": 5.0712803216918405e-06, "loss": 1.0045, "step": 5914 }, { "epoch": 0.9014020115818349, "grad_norm": 0.98828125, "learning_rate": 5.055773440779632e-06, "loss": 0.8613, "step": 5915 }, { "epoch": 0.9015544041450777, "grad_norm": 1.0546875, "learning_rate": 5.040289689646338e-06, "loss": 1.1128, "step": 5916 }, { "epoch": 0.9017067967083207, "grad_norm": 1.09375, "learning_rate": 5.024829072064008e-06, "loss": 0.9102, "step": 5917 }, { "epoch": 0.9018591892715635, "grad_norm": 1.203125, "learning_rate": 5.0093915917990955e-06, "loss": 1.0533, "step": 5918 }, { "epoch": 0.9020115818348065, "grad_norm": 0.89453125, "learning_rate": 4.993977252612369e-06, "loss": 1.095, "step": 5919 }, { "epoch": 0.9021639743980494, "grad_norm": 0.86328125, "learning_rate": 4.978586058259016e-06, "loss": 1.0674, "step": 5920 }, { "epoch": 0.9023163669612922, "grad_norm": 1.1328125, "learning_rate": 4.9632180124885134e-06, "loss": 0.9046, "step": 5921 }, { "epoch": 0.9024687595245352, "grad_norm": 0.9375, "learning_rate": 4.947873119044755e-06, "loss": 0.9655, "step": 5922 }, { "epoch": 0.9026211520877782, "grad_norm": 0.86328125, "learning_rate": 4.9325513816659845e-06, "loss": 0.8071, "step": 5923 }, { "epoch": 0.902773544651021, "grad_norm": 1.1171875, "learning_rate": 4.917252804084804e-06, "loss": 1.01, "step": 5924 }, { "epoch": 0.902925937214264, "grad_norm": 1.09375, "learning_rate": 4.901977390028145e-06, "loss": 1.0528, "step": 5925 }, { "epoch": 0.9030783297775069, "grad_norm": 1.0, "learning_rate": 4.88672514321733e-06, "loss": 1.2032, "step": 5926 }, { "epoch": 0.9032307223407497, "grad_norm": 1.2265625, "learning_rate": 4.871496067368031e-06, "loss": 1.0334, "step": 5927 }, { "epoch": 0.9033831149039927, "grad_norm": 0.765625, "learning_rate": 4.856290166190236e-06, "loss": 0.9741, "step": 5928 }, { "epoch": 0.9035355074672355, "grad_norm": 1.140625, "learning_rate": 4.841107443388371e-06, "loss": 1.0966, "step": 5929 }, { "epoch": 0.9036879000304785, "grad_norm": 1.1171875, "learning_rate": 4.825947902661154e-06, "loss": 0.9211, "step": 5930 }, { "epoch": 0.9038402925937215, "grad_norm": 0.76953125, "learning_rate": 4.810811547701621e-06, "loss": 0.9806, "step": 5931 }, { "epoch": 0.9039926851569643, "grad_norm": 1.21875, "learning_rate": 4.795698382197267e-06, "loss": 0.9077, "step": 5932 }, { "epoch": 0.9041450777202072, "grad_norm": 1.140625, "learning_rate": 4.780608409829846e-06, "loss": 0.9417, "step": 5933 }, { "epoch": 0.9042974702834502, "grad_norm": 0.96875, "learning_rate": 4.7655416342754725e-06, "loss": 0.997, "step": 5934 }, { "epoch": 0.904449862846693, "grad_norm": 0.8984375, "learning_rate": 4.750498059204677e-06, "loss": 1.0365, "step": 5935 }, { "epoch": 0.904602255409936, "grad_norm": 0.9140625, "learning_rate": 4.735477688282264e-06, "loss": 0.8431, "step": 5936 }, { "epoch": 0.904754647973179, "grad_norm": 0.76171875, "learning_rate": 4.720480525167415e-06, "loss": 0.9911, "step": 5937 }, { "epoch": 0.9049070405364218, "grad_norm": 0.8984375, "learning_rate": 4.705506573513652e-06, "loss": 0.9881, "step": 5938 }, { "epoch": 0.9050594330996647, "grad_norm": 0.9609375, "learning_rate": 4.690555836968835e-06, "loss": 0.9884, "step": 5939 }, { "epoch": 0.9052118256629077, "grad_norm": 1.0859375, "learning_rate": 4.675628319175207e-06, "loss": 0.9488, "step": 5940 }, { "epoch": 0.9053642182261505, "grad_norm": 0.7109375, "learning_rate": 4.660724023769303e-06, "loss": 0.9833, "step": 5941 }, { "epoch": 0.9055166107893935, "grad_norm": 1.09375, "learning_rate": 4.64584295438204e-06, "loss": 1.0537, "step": 5942 }, { "epoch": 0.9056690033526363, "grad_norm": 0.890625, "learning_rate": 4.630985114638642e-06, "loss": 0.92, "step": 5943 }, { "epoch": 0.9058213959158793, "grad_norm": 0.80859375, "learning_rate": 4.616150508158712e-06, "loss": 0.9645, "step": 5944 }, { "epoch": 0.9059737884791222, "grad_norm": 0.92578125, "learning_rate": 4.601339138556138e-06, "loss": 1.1034, "step": 5945 }, { "epoch": 0.9061261810423651, "grad_norm": 0.9296875, "learning_rate": 4.586551009439222e-06, "loss": 0.8219, "step": 5946 }, { "epoch": 0.906278573605608, "grad_norm": 0.91796875, "learning_rate": 4.5717861244105705e-06, "loss": 0.9699, "step": 5947 }, { "epoch": 0.906430966168851, "grad_norm": 0.9609375, "learning_rate": 4.557044487067075e-06, "loss": 0.9002, "step": 5948 }, { "epoch": 0.9065833587320938, "grad_norm": 1.1484375, "learning_rate": 4.542326101000038e-06, "loss": 0.9749, "step": 5949 }, { "epoch": 0.9067357512953368, "grad_norm": 0.85546875, "learning_rate": 4.527630969795082e-06, "loss": 0.7418, "step": 5950 }, { "epoch": 0.9068881438585797, "grad_norm": 0.90234375, "learning_rate": 4.512959097032121e-06, "loss": 0.8407, "step": 5951 }, { "epoch": 0.9070405364218226, "grad_norm": 1.1328125, "learning_rate": 4.498310486285473e-06, "loss": 1.0841, "step": 5952 }, { "epoch": 0.9071929289850655, "grad_norm": 1.0390625, "learning_rate": 4.483685141123717e-06, "loss": 1.0195, "step": 5953 }, { "epoch": 0.9073453215483085, "grad_norm": 0.91796875, "learning_rate": 4.469083065109825e-06, "loss": 0.9362, "step": 5954 }, { "epoch": 0.9074977141115513, "grad_norm": 0.84765625, "learning_rate": 4.45450426180104e-06, "loss": 0.8261, "step": 5955 }, { "epoch": 0.9076501066747943, "grad_norm": 1.21875, "learning_rate": 4.4399487347489885e-06, "loss": 1.297, "step": 5956 }, { "epoch": 0.9078024992380371, "grad_norm": 1.171875, "learning_rate": 4.425416487499612e-06, "loss": 1.2906, "step": 5957 }, { "epoch": 0.9079548918012801, "grad_norm": 0.97265625, "learning_rate": 4.410907523593177e-06, "loss": 0.9183, "step": 5958 }, { "epoch": 0.908107284364523, "grad_norm": 1.1484375, "learning_rate": 4.3964218465642355e-06, "loss": 1.2882, "step": 5959 }, { "epoch": 0.9082596769277659, "grad_norm": 0.828125, "learning_rate": 4.381959459941753e-06, "loss": 0.9854, "step": 5960 }, { "epoch": 0.9084120694910088, "grad_norm": 1.265625, "learning_rate": 4.3675203672489675e-06, "loss": 0.9913, "step": 5961 }, { "epoch": 0.9085644620542518, "grad_norm": 1.0546875, "learning_rate": 4.353104572003419e-06, "loss": 1.0592, "step": 5962 }, { "epoch": 0.9087168546174946, "grad_norm": 0.953125, "learning_rate": 4.338712077717033e-06, "loss": 0.9637, "step": 5963 }, { "epoch": 0.9088692471807376, "grad_norm": 0.8671875, "learning_rate": 4.324342887896038e-06, "loss": 0.9919, "step": 5964 }, { "epoch": 0.9090216397439805, "grad_norm": 0.9140625, "learning_rate": 4.3099970060409335e-06, "loss": 0.8561, "step": 5965 }, { "epoch": 0.9091740323072234, "grad_norm": 1.015625, "learning_rate": 4.295674435646613e-06, "loss": 1.2153, "step": 5966 }, { "epoch": 0.9093264248704663, "grad_norm": 1.09375, "learning_rate": 4.2813751802022515e-06, "loss": 0.9765, "step": 5967 }, { "epoch": 0.9094788174337093, "grad_norm": 1.015625, "learning_rate": 4.2670992431913414e-06, "loss": 1.0462, "step": 5968 }, { "epoch": 0.9096312099969521, "grad_norm": 0.72265625, "learning_rate": 4.252846628091733e-06, "loss": 0.8583, "step": 5969 }, { "epoch": 0.9097836025601951, "grad_norm": 1.2109375, "learning_rate": 4.238617338375561e-06, "loss": 1.1137, "step": 5970 }, { "epoch": 0.9099359951234379, "grad_norm": 0.92578125, "learning_rate": 4.224411377509274e-06, "loss": 0.9355, "step": 5971 }, { "epoch": 0.9100883876866809, "grad_norm": 0.87890625, "learning_rate": 4.21022874895366e-06, "loss": 0.8667, "step": 5972 }, { "epoch": 0.9102407802499238, "grad_norm": 0.84765625, "learning_rate": 4.196069456163787e-06, "loss": 1.0891, "step": 5973 }, { "epoch": 0.9103931728131667, "grad_norm": 1.40625, "learning_rate": 4.181933502589086e-06, "loss": 0.9589, "step": 5974 }, { "epoch": 0.9105455653764096, "grad_norm": 1.09375, "learning_rate": 4.167820891673302e-06, "loss": 1.0286, "step": 5975 }, { "epoch": 0.9106979579396526, "grad_norm": 0.91796875, "learning_rate": 4.153731626854396e-06, "loss": 1.0662, "step": 5976 }, { "epoch": 0.9108503505028954, "grad_norm": 1.0234375, "learning_rate": 4.139665711564788e-06, "loss": 1.0451, "step": 5977 }, { "epoch": 0.9110027430661384, "grad_norm": 1.0234375, "learning_rate": 4.125623149231106e-06, "loss": 0.8419, "step": 5978 }, { "epoch": 0.9111551356293813, "grad_norm": 0.9609375, "learning_rate": 4.1116039432743e-06, "loss": 0.9481, "step": 5979 }, { "epoch": 0.9113075281926242, "grad_norm": 1.3515625, "learning_rate": 4.097608097109695e-06, "loss": 1.0242, "step": 5980 }, { "epoch": 0.9114599207558671, "grad_norm": 0.96484375, "learning_rate": 4.083635614146875e-06, "loss": 0.988, "step": 5981 }, { "epoch": 0.9116123133191101, "grad_norm": 0.7578125, "learning_rate": 4.069686497789693e-06, "loss": 0.9025, "step": 5982 }, { "epoch": 0.9117647058823529, "grad_norm": 0.91015625, "learning_rate": 4.055760751436389e-06, "loss": 0.9548, "step": 5983 }, { "epoch": 0.9119170984455959, "grad_norm": 0.921875, "learning_rate": 4.04185837847948e-06, "loss": 1.0984, "step": 5984 }, { "epoch": 0.9120694910088387, "grad_norm": 1.0625, "learning_rate": 4.027979382305747e-06, "loss": 1.0714, "step": 5985 }, { "epoch": 0.9122218835720817, "grad_norm": 1.0, "learning_rate": 4.014123766296374e-06, "loss": 0.917, "step": 5986 }, { "epoch": 0.9123742761353246, "grad_norm": 0.8828125, "learning_rate": 4.000291533826728e-06, "loss": 0.9472, "step": 5987 }, { "epoch": 0.9125266686985675, "grad_norm": 0.98046875, "learning_rate": 3.98648268826658e-06, "loss": 0.8706, "step": 5988 }, { "epoch": 0.9126790612618104, "grad_norm": 0.84765625, "learning_rate": 3.9726972329799386e-06, "loss": 0.8664, "step": 5989 }, { "epoch": 0.9128314538250534, "grad_norm": 0.95703125, "learning_rate": 3.95893517132514e-06, "loss": 0.9449, "step": 5990 }, { "epoch": 0.9129838463882962, "grad_norm": 0.8359375, "learning_rate": 3.945196506654847e-06, "loss": 0.9428, "step": 5991 }, { "epoch": 0.9131362389515392, "grad_norm": 1.09375, "learning_rate": 3.931481242315993e-06, "loss": 0.9098, "step": 5992 }, { "epoch": 0.9132886315147821, "grad_norm": 0.9921875, "learning_rate": 3.917789381649761e-06, "loss": 1.0383, "step": 5993 }, { "epoch": 0.913441024078025, "grad_norm": 0.9140625, "learning_rate": 3.90412092799175e-06, "loss": 1.082, "step": 5994 }, { "epoch": 0.9135934166412679, "grad_norm": 1.0234375, "learning_rate": 3.890475884671752e-06, "loss": 0.9737, "step": 5995 }, { "epoch": 0.9137458092045109, "grad_norm": 1.1484375, "learning_rate": 3.8768542550139064e-06, "loss": 0.9998, "step": 5996 }, { "epoch": 0.9138982017677537, "grad_norm": 1.1328125, "learning_rate": 3.863256042336649e-06, "loss": 0.9062, "step": 5997 }, { "epoch": 0.9140505943309967, "grad_norm": 1.328125, "learning_rate": 3.849681249952697e-06, "loss": 1.0946, "step": 5998 }, { "epoch": 0.9142029868942395, "grad_norm": 0.7578125, "learning_rate": 3.836129881169037e-06, "loss": 0.7451, "step": 5999 }, { "epoch": 0.9143553794574825, "grad_norm": 1.0234375, "learning_rate": 3.822601939287018e-06, "loss": 0.826, "step": 6000 }, { "epoch": 0.9145077720207254, "grad_norm": 0.95703125, "learning_rate": 3.809097427602204e-06, "loss": 0.9434, "step": 6001 }, { "epoch": 0.9146601645839683, "grad_norm": 0.84765625, "learning_rate": 3.7956163494045294e-06, "loss": 1.0217, "step": 6002 }, { "epoch": 0.9148125571472112, "grad_norm": 0.9296875, "learning_rate": 3.782158707978156e-06, "loss": 1.0853, "step": 6003 }, { "epoch": 0.9149649497104542, "grad_norm": 0.87890625, "learning_rate": 3.7687245066015397e-06, "loss": 0.8662, "step": 6004 }, { "epoch": 0.915117342273697, "grad_norm": 0.82421875, "learning_rate": 3.755313748547473e-06, "loss": 0.9003, "step": 6005 }, { "epoch": 0.91526973483694, "grad_norm": 0.9921875, "learning_rate": 3.74192643708301e-06, "loss": 1.0036, "step": 6006 }, { "epoch": 0.9154221274001829, "grad_norm": 0.91015625, "learning_rate": 3.728562575469463e-06, "loss": 1.048, "step": 6007 }, { "epoch": 0.9155745199634258, "grad_norm": 0.77734375, "learning_rate": 3.7152221669624956e-06, "loss": 0.9177, "step": 6008 }, { "epoch": 0.9157269125266687, "grad_norm": 0.9453125, "learning_rate": 3.701905214812018e-06, "loss": 0.9388, "step": 6009 }, { "epoch": 0.9158793050899117, "grad_norm": 1.109375, "learning_rate": 3.688611722262203e-06, "loss": 0.8637, "step": 6010 }, { "epoch": 0.9160316976531545, "grad_norm": 0.89453125, "learning_rate": 3.6753416925515593e-06, "loss": 0.8731, "step": 6011 }, { "epoch": 0.9161840902163975, "grad_norm": 0.921875, "learning_rate": 3.662095128912857e-06, "loss": 1.0171, "step": 6012 }, { "epoch": 0.9163364827796403, "grad_norm": 0.8984375, "learning_rate": 3.6488720345731363e-06, "loss": 0.9012, "step": 6013 }, { "epoch": 0.9164888753428833, "grad_norm": 0.94140625, "learning_rate": 3.6356724127537654e-06, "loss": 1.1281, "step": 6014 }, { "epoch": 0.9166412679061262, "grad_norm": 1.3671875, "learning_rate": 3.6224962666703277e-06, "loss": 1.0657, "step": 6015 }, { "epoch": 0.916793660469369, "grad_norm": 0.83203125, "learning_rate": 3.6093435995327217e-06, "loss": 0.9176, "step": 6016 }, { "epoch": 0.916946053032612, "grad_norm": 0.7578125, "learning_rate": 3.59621441454514e-06, "loss": 0.9732, "step": 6017 }, { "epoch": 0.917098445595855, "grad_norm": 0.88671875, "learning_rate": 3.583108714906036e-06, "loss": 0.9617, "step": 6018 }, { "epoch": 0.9172508381590978, "grad_norm": 0.84765625, "learning_rate": 3.570026503808155e-06, "loss": 0.8662, "step": 6019 }, { "epoch": 0.9174032307223408, "grad_norm": 1.1953125, "learning_rate": 3.556967784438525e-06, "loss": 0.9749, "step": 6020 }, { "epoch": 0.9175556232855837, "grad_norm": 0.96484375, "learning_rate": 3.5439325599783914e-06, "loss": 0.8954, "step": 6021 }, { "epoch": 0.9177080158488266, "grad_norm": 0.77734375, "learning_rate": 3.5309208336033573e-06, "loss": 0.9577, "step": 6022 }, { "epoch": 0.9178604084120695, "grad_norm": 1.046875, "learning_rate": 3.5179326084832653e-06, "loss": 1.1893, "step": 6023 }, { "epoch": 0.9180128009753125, "grad_norm": 1.09375, "learning_rate": 3.504967887782207e-06, "loss": 0.9315, "step": 6024 }, { "epoch": 0.9181651935385553, "grad_norm": 1.015625, "learning_rate": 3.4920266746586106e-06, "loss": 0.9711, "step": 6025 }, { "epoch": 0.9183175861017983, "grad_norm": 0.8125, "learning_rate": 3.4791089722651436e-06, "loss": 0.8823, "step": 6026 }, { "epoch": 0.9184699786650411, "grad_norm": 1.1015625, "learning_rate": 3.4662147837487004e-06, "loss": 0.9821, "step": 6027 }, { "epoch": 0.918622371228284, "grad_norm": 0.84765625, "learning_rate": 3.4533441122505227e-06, "loss": 0.9214, "step": 6028 }, { "epoch": 0.918774763791527, "grad_norm": 1.1640625, "learning_rate": 3.4404969609060922e-06, "loss": 1.0274, "step": 6029 }, { "epoch": 0.9189271563547698, "grad_norm": 1.15625, "learning_rate": 3.427673332845138e-06, "loss": 1.0516, "step": 6030 }, { "epoch": 0.9190795489180128, "grad_norm": 0.8359375, "learning_rate": 3.4148732311917175e-06, "loss": 0.9829, "step": 6031 }, { "epoch": 0.9192319414812558, "grad_norm": 1.046875, "learning_rate": 3.4020966590640914e-06, "loss": 0.9355, "step": 6032 }, { "epoch": 0.9193843340444986, "grad_norm": 1.1484375, "learning_rate": 3.389343619574803e-06, "loss": 0.8901, "step": 6033 }, { "epoch": 0.9195367266077416, "grad_norm": 0.90234375, "learning_rate": 3.376614115830712e-06, "loss": 0.9103, "step": 6034 }, { "epoch": 0.9196891191709845, "grad_norm": 1.1328125, "learning_rate": 3.3639081509328706e-06, "loss": 1.0196, "step": 6035 }, { "epoch": 0.9198415117342273, "grad_norm": 0.97265625, "learning_rate": 3.351225727976681e-06, "loss": 0.9861, "step": 6036 }, { "epoch": 0.9199939042974703, "grad_norm": 0.9375, "learning_rate": 3.338566850051739e-06, "loss": 0.8687, "step": 6037 }, { "epoch": 0.9201462968607131, "grad_norm": 0.953125, "learning_rate": 3.3259315202419094e-06, "loss": 1.0186, "step": 6038 }, { "epoch": 0.9202986894239561, "grad_norm": 0.8125, "learning_rate": 3.313319741625376e-06, "loss": 0.9481, "step": 6039 }, { "epoch": 0.920451081987199, "grad_norm": 0.953125, "learning_rate": 3.3007315172745245e-06, "loss": 1.2117, "step": 6040 }, { "epoch": 0.9206034745504419, "grad_norm": 0.76171875, "learning_rate": 3.288166850256036e-06, "loss": 0.9489, "step": 6041 }, { "epoch": 0.9207558671136848, "grad_norm": 0.82421875, "learning_rate": 3.275625743630861e-06, "loss": 0.8988, "step": 6042 }, { "epoch": 0.9209082596769278, "grad_norm": 0.78125, "learning_rate": 3.263108200454168e-06, "loss": 0.8802, "step": 6043 }, { "epoch": 0.9210606522401706, "grad_norm": 1.03125, "learning_rate": 3.2506142237754056e-06, "loss": 0.9277, "step": 6044 }, { "epoch": 0.9212130448034136, "grad_norm": 1.0, "learning_rate": 3.238143816638306e-06, "loss": 0.8306, "step": 6045 }, { "epoch": 0.9213654373666565, "grad_norm": 0.87890625, "learning_rate": 3.2256969820808393e-06, "loss": 0.8837, "step": 6046 }, { "epoch": 0.9215178299298994, "grad_norm": 1.0234375, "learning_rate": 3.2132737231352126e-06, "loss": 0.8422, "step": 6047 }, { "epoch": 0.9216702224931423, "grad_norm": 0.9453125, "learning_rate": 3.2008740428279503e-06, "loss": 0.8954, "step": 6048 }, { "epoch": 0.9218226150563853, "grad_norm": 1.0234375, "learning_rate": 3.188497944179758e-06, "loss": 1.1514, "step": 6049 }, { "epoch": 0.9219750076196281, "grad_norm": 0.87890625, "learning_rate": 3.176145430205624e-06, "loss": 1.0299, "step": 6050 }, { "epoch": 0.9221274001828711, "grad_norm": 0.875, "learning_rate": 3.1638165039148294e-06, "loss": 0.9946, "step": 6051 }, { "epoch": 0.9222797927461139, "grad_norm": 1.0234375, "learning_rate": 3.15151116831085e-06, "loss": 0.9488, "step": 6052 }, { "epoch": 0.9224321853093569, "grad_norm": 0.94140625, "learning_rate": 3.1392294263914768e-06, "loss": 1.0298, "step": 6053 }, { "epoch": 0.9225845778725998, "grad_norm": 0.81640625, "learning_rate": 3.126971281148705e-06, "loss": 0.8702, "step": 6054 }, { "epoch": 0.9227369704358427, "grad_norm": 0.8046875, "learning_rate": 3.114736735568757e-06, "loss": 0.914, "step": 6055 }, { "epoch": 0.9228893629990856, "grad_norm": 0.87109375, "learning_rate": 3.102525792632205e-06, "loss": 0.9567, "step": 6056 }, { "epoch": 0.9230417555623286, "grad_norm": 0.87890625, "learning_rate": 3.0903384553137682e-06, "loss": 1.0153, "step": 6057 }, { "epoch": 0.9231941481255714, "grad_norm": 1.0546875, "learning_rate": 3.0781747265824614e-06, "loss": 1.0733, "step": 6058 }, { "epoch": 0.9233465406888144, "grad_norm": 0.953125, "learning_rate": 3.066034609401569e-06, "loss": 1.1461, "step": 6059 }, { "epoch": 0.9234989332520573, "grad_norm": 0.9765625, "learning_rate": 3.0539181067285817e-06, "loss": 0.9691, "step": 6060 }, { "epoch": 0.9236513258153002, "grad_norm": 0.828125, "learning_rate": 3.0418252215152376e-06, "loss": 0.8285, "step": 6061 }, { "epoch": 0.9238037183785431, "grad_norm": 0.98046875, "learning_rate": 3.0297559567075585e-06, "loss": 0.9766, "step": 6062 }, { "epoch": 0.9239561109417861, "grad_norm": 0.94921875, "learning_rate": 3.0177103152457918e-06, "loss": 0.9364, "step": 6063 }, { "epoch": 0.9241085035050289, "grad_norm": 0.9296875, "learning_rate": 3.0056883000644132e-06, "loss": 0.9584, "step": 6064 }, { "epoch": 0.9242608960682719, "grad_norm": 0.80078125, "learning_rate": 2.993689914092179e-06, "loss": 0.9361, "step": 6065 }, { "epoch": 0.9244132886315147, "grad_norm": 1.0078125, "learning_rate": 2.981715160252052e-06, "loss": 0.9339, "step": 6066 }, { "epoch": 0.9245656811947577, "grad_norm": 0.953125, "learning_rate": 2.9697640414612647e-06, "loss": 0.9198, "step": 6067 }, { "epoch": 0.9247180737580006, "grad_norm": 1.09375, "learning_rate": 2.9578365606312665e-06, "loss": 1.0387, "step": 6068 }, { "epoch": 0.9248704663212435, "grad_norm": 0.734375, "learning_rate": 2.9459327206677768e-06, "loss": 0.917, "step": 6069 }, { "epoch": 0.9250228588844864, "grad_norm": 0.83203125, "learning_rate": 2.934052524470754e-06, "loss": 1.1856, "step": 6070 }, { "epoch": 0.9251752514477294, "grad_norm": 1.015625, "learning_rate": 2.922195974934361e-06, "loss": 0.947, "step": 6071 }, { "epoch": 0.9253276440109722, "grad_norm": 0.90234375, "learning_rate": 2.91036307494702e-06, "loss": 1.0373, "step": 6072 }, { "epoch": 0.9254800365742152, "grad_norm": 1.3046875, "learning_rate": 2.8985538273914258e-06, "loss": 0.9462, "step": 6073 }, { "epoch": 0.9256324291374581, "grad_norm": 1.2109375, "learning_rate": 2.886768235144455e-06, "loss": 1.0778, "step": 6074 }, { "epoch": 0.925784821700701, "grad_norm": 0.75390625, "learning_rate": 2.8750063010772544e-06, "loss": 0.9547, "step": 6075 }, { "epoch": 0.9259372142639439, "grad_norm": 0.8671875, "learning_rate": 2.8632680280552103e-06, "loss": 1.0973, "step": 6076 }, { "epoch": 0.9260896068271869, "grad_norm": 1.2890625, "learning_rate": 2.8515534189379244e-06, "loss": 1.1791, "step": 6077 }, { "epoch": 0.9262419993904297, "grad_norm": 1.125, "learning_rate": 2.839862476579236e-06, "loss": 1.1171, "step": 6078 }, { "epoch": 0.9263943919536727, "grad_norm": 0.91015625, "learning_rate": 2.828195203827244e-06, "loss": 0.9092, "step": 6079 }, { "epoch": 0.9265467845169155, "grad_norm": 0.90234375, "learning_rate": 2.8165516035242534e-06, "loss": 0.9336, "step": 6080 }, { "epoch": 0.9266991770801585, "grad_norm": 0.97265625, "learning_rate": 2.804931678506806e-06, "loss": 1.0194, "step": 6081 }, { "epoch": 0.9268515696434014, "grad_norm": 0.80859375, "learning_rate": 2.793335431605715e-06, "loss": 1.06, "step": 6082 }, { "epoch": 0.9270039622066443, "grad_norm": 0.6640625, "learning_rate": 2.7817628656459427e-06, "loss": 0.9039, "step": 6083 }, { "epoch": 0.9271563547698872, "grad_norm": 0.83203125, "learning_rate": 2.7702139834467787e-06, "loss": 0.9219, "step": 6084 }, { "epoch": 0.9273087473331302, "grad_norm": 0.93359375, "learning_rate": 2.758688787821673e-06, "loss": 0.9168, "step": 6085 }, { "epoch": 0.927461139896373, "grad_norm": 0.78515625, "learning_rate": 2.747187281578334e-06, "loss": 0.9637, "step": 6086 }, { "epoch": 0.927613532459616, "grad_norm": 1.2578125, "learning_rate": 2.735709467518699e-06, "loss": 1.0226, "step": 6087 }, { "epoch": 0.9277659250228589, "grad_norm": 0.98828125, "learning_rate": 2.72425534843892e-06, "loss": 0.857, "step": 6088 }, { "epoch": 0.9279183175861018, "grad_norm": 0.79296875, "learning_rate": 2.7128249271293872e-06, "loss": 1.0149, "step": 6089 }, { "epoch": 0.9280707101493447, "grad_norm": 0.9765625, "learning_rate": 2.7014182063747175e-06, "loss": 1.0097, "step": 6090 }, { "epoch": 0.9282231027125877, "grad_norm": 1.015625, "learning_rate": 2.6900351889537547e-06, "loss": 1.0165, "step": 6091 }, { "epoch": 0.9283754952758305, "grad_norm": 1.0859375, "learning_rate": 2.678675877639547e-06, "loss": 1.1875, "step": 6092 }, { "epoch": 0.9285278878390735, "grad_norm": 0.84375, "learning_rate": 2.667340275199426e-06, "loss": 0.8381, "step": 6093 }, { "epoch": 0.9286802804023163, "grad_norm": 0.78515625, "learning_rate": 2.6560283843948708e-06, "loss": 0.8658, "step": 6094 }, { "epoch": 0.9288326729655593, "grad_norm": 0.7578125, "learning_rate": 2.644740207981622e-06, "loss": 1.0206, "step": 6095 }, { "epoch": 0.9289850655288022, "grad_norm": 0.83984375, "learning_rate": 2.6334757487096683e-06, "loss": 1.0478, "step": 6096 }, { "epoch": 0.9291374580920451, "grad_norm": 1.0234375, "learning_rate": 2.6222350093231705e-06, "loss": 0.9202, "step": 6097 }, { "epoch": 0.929289850655288, "grad_norm": 1.1640625, "learning_rate": 2.611017992560549e-06, "loss": 1.3946, "step": 6098 }, { "epoch": 0.929442243218531, "grad_norm": 1.078125, "learning_rate": 2.5998247011544186e-06, "loss": 1.1992, "step": 6099 }, { "epoch": 0.9295946357817738, "grad_norm": 0.796875, "learning_rate": 2.5886551378316306e-06, "loss": 0.9174, "step": 6100 }, { "epoch": 0.9297470283450168, "grad_norm": 0.8203125, "learning_rate": 2.5775093053132528e-06, "loss": 0.8925, "step": 6101 }, { "epoch": 0.9298994209082597, "grad_norm": 1.0390625, "learning_rate": 2.566387206314569e-06, "loss": 0.8255, "step": 6102 }, { "epoch": 0.9300518134715026, "grad_norm": 0.8046875, "learning_rate": 2.555288843545078e-06, "loss": 0.8703, "step": 6103 }, { "epoch": 0.9302042060347455, "grad_norm": 1.125, "learning_rate": 2.5442142197085182e-06, "loss": 0.8903, "step": 6104 }, { "epoch": 0.9303565985979885, "grad_norm": 0.7421875, "learning_rate": 2.5331633375028085e-06, "loss": 0.9251, "step": 6105 }, { "epoch": 0.9305089911612313, "grad_norm": 0.94921875, "learning_rate": 2.5221361996200955e-06, "loss": 1.2407, "step": 6106 }, { "epoch": 0.9306613837244743, "grad_norm": 1.0, "learning_rate": 2.5111328087467743e-06, "loss": 0.8138, "step": 6107 }, { "epoch": 0.9308137762877171, "grad_norm": 1.0390625, "learning_rate": 2.5001531675634127e-06, "loss": 1.0997, "step": 6108 }, { "epoch": 0.9309661688509601, "grad_norm": 0.80078125, "learning_rate": 2.4891972787448036e-06, "loss": 0.9921, "step": 6109 }, { "epoch": 0.931118561414203, "grad_norm": 0.97265625, "learning_rate": 2.47826514495999e-06, "loss": 1.034, "step": 6110 }, { "epoch": 0.9312709539774459, "grad_norm": 0.71484375, "learning_rate": 2.4673567688721646e-06, "loss": 0.7839, "step": 6111 }, { "epoch": 0.9314233465406888, "grad_norm": 0.75, "learning_rate": 2.4564721531387556e-06, "loss": 1.0126, "step": 6112 }, { "epoch": 0.9315757391039318, "grad_norm": 0.8515625, "learning_rate": 2.4456113004114546e-06, "loss": 0.9702, "step": 6113 }, { "epoch": 0.9317281316671746, "grad_norm": 0.78515625, "learning_rate": 2.4347742133360996e-06, "loss": 0.6921, "step": 6114 }, { "epoch": 0.9318805242304176, "grad_norm": 0.859375, "learning_rate": 2.423960894552746e-06, "loss": 1.0002, "step": 6115 }, { "epoch": 0.9320329167936605, "grad_norm": 0.88671875, "learning_rate": 2.4131713466956974e-06, "loss": 0.9717, "step": 6116 }, { "epoch": 0.9321853093569034, "grad_norm": 1.1796875, "learning_rate": 2.402405572393418e-06, "loss": 0.9487, "step": 6117 }, { "epoch": 0.9323377019201463, "grad_norm": 1.2578125, "learning_rate": 2.3916635742686324e-06, "loss": 1.2771, "step": 6118 }, { "epoch": 0.9324900944833893, "grad_norm": 0.70703125, "learning_rate": 2.3809453549382354e-06, "loss": 0.8547, "step": 6119 }, { "epoch": 0.9326424870466321, "grad_norm": 0.765625, "learning_rate": 2.370250917013328e-06, "loss": 0.9745, "step": 6120 }, { "epoch": 0.9327948796098751, "grad_norm": 0.95703125, "learning_rate": 2.359580263099259e-06, "loss": 0.9663, "step": 6121 }, { "epoch": 0.9329472721731179, "grad_norm": 1.2109375, "learning_rate": 2.348933395795516e-06, "loss": 1.0025, "step": 6122 }, { "epoch": 0.9330996647363609, "grad_norm": 1.03125, "learning_rate": 2.3383103176958356e-06, "loss": 0.9193, "step": 6123 }, { "epoch": 0.9332520572996038, "grad_norm": 0.75390625, "learning_rate": 2.32771103138818e-06, "loss": 0.9325, "step": 6124 }, { "epoch": 0.9334044498628467, "grad_norm": 0.88671875, "learning_rate": 2.3171355394546624e-06, "loss": 0.9844, "step": 6125 }, { "epoch": 0.9335568424260896, "grad_norm": 1.046875, "learning_rate": 2.3065838444716325e-06, "loss": 1.0063, "step": 6126 }, { "epoch": 0.9337092349893326, "grad_norm": 0.9296875, "learning_rate": 2.2960559490096457e-06, "loss": 0.9151, "step": 6127 }, { "epoch": 0.9338616275525754, "grad_norm": 0.9609375, "learning_rate": 2.2855518556334275e-06, "loss": 1.0146, "step": 6128 }, { "epoch": 0.9340140201158184, "grad_norm": 1.2890625, "learning_rate": 2.2750715669019317e-06, "loss": 1.0561, "step": 6129 }, { "epoch": 0.9341664126790613, "grad_norm": 0.85546875, "learning_rate": 2.2646150853683156e-06, "loss": 0.7689, "step": 6130 }, { "epoch": 0.9343188052423042, "grad_norm": 1.0703125, "learning_rate": 2.2541824135799194e-06, "loss": 1.1151, "step": 6131 }, { "epoch": 0.9344711978055471, "grad_norm": 0.95703125, "learning_rate": 2.2437735540783213e-06, "loss": 0.9233, "step": 6132 }, { "epoch": 0.9346235903687901, "grad_norm": 0.8984375, "learning_rate": 2.233388509399226e-06, "loss": 0.9886, "step": 6133 }, { "epoch": 0.9347759829320329, "grad_norm": 0.98828125, "learning_rate": 2.2230272820725986e-06, "loss": 0.8654, "step": 6134 }, { "epoch": 0.9349283754952759, "grad_norm": 1.0, "learning_rate": 2.2126898746225865e-06, "loss": 0.9855, "step": 6135 }, { "epoch": 0.9350807680585187, "grad_norm": 1.0625, "learning_rate": 2.2023762895675317e-06, "loss": 0.9623, "step": 6136 }, { "epoch": 0.9352331606217616, "grad_norm": 0.7578125, "learning_rate": 2.1920865294199567e-06, "loss": 0.7747, "step": 6137 }, { "epoch": 0.9353855531850046, "grad_norm": 0.98046875, "learning_rate": 2.1818205966866343e-06, "loss": 0.9572, "step": 6138 }, { "epoch": 0.9355379457482474, "grad_norm": 1.21875, "learning_rate": 2.1715784938684425e-06, "loss": 1.1163, "step": 6139 }, { "epoch": 0.9356903383114904, "grad_norm": 0.7890625, "learning_rate": 2.161360223460529e-06, "loss": 0.9501, "step": 6140 }, { "epoch": 0.9358427308747334, "grad_norm": 1.03125, "learning_rate": 2.151165787952214e-06, "loss": 1.0649, "step": 6141 }, { "epoch": 0.9359951234379762, "grad_norm": 1.0234375, "learning_rate": 2.140995189827e-06, "loss": 0.9935, "step": 6142 }, { "epoch": 0.9361475160012191, "grad_norm": 0.9296875, "learning_rate": 2.1308484315626044e-06, "loss": 0.8385, "step": 6143 }, { "epoch": 0.9362999085644621, "grad_norm": 1.0, "learning_rate": 2.120725515630906e-06, "loss": 1.0003, "step": 6144 }, { "epoch": 0.936452301127705, "grad_norm": 0.96484375, "learning_rate": 2.1106264444980096e-06, "loss": 0.9695, "step": 6145 }, { "epoch": 0.9366046936909479, "grad_norm": 1.046875, "learning_rate": 2.1005512206241696e-06, "loss": 1.0094, "step": 6146 }, { "epoch": 0.9367570862541907, "grad_norm": 0.8125, "learning_rate": 2.0904998464638892e-06, "loss": 0.8122, "step": 6147 }, { "epoch": 0.9369094788174337, "grad_norm": 0.75, "learning_rate": 2.0804723244657985e-06, "loss": 0.9398, "step": 6148 }, { "epoch": 0.9370618713806766, "grad_norm": 0.9375, "learning_rate": 2.070468657072777e-06, "loss": 1.0821, "step": 6149 }, { "epoch": 0.9372142639439195, "grad_norm": 0.8515625, "learning_rate": 2.060488846721831e-06, "loss": 0.9918, "step": 6150 }, { "epoch": 0.9373666565071624, "grad_norm": 0.98046875, "learning_rate": 2.0505328958441925e-06, "loss": 0.9428, "step": 6151 }, { "epoch": 0.9375190490704054, "grad_norm": 1.1484375, "learning_rate": 2.040600806865278e-06, "loss": 1.1071, "step": 6152 }, { "epoch": 0.9376714416336482, "grad_norm": 0.8125, "learning_rate": 2.0306925822046964e-06, "loss": 1.0315, "step": 6153 }, { "epoch": 0.9378238341968912, "grad_norm": 0.890625, "learning_rate": 2.020808224276227e-06, "loss": 1.0532, "step": 6154 }, { "epoch": 0.9379762267601341, "grad_norm": 0.828125, "learning_rate": 2.0109477354878335e-06, "loss": 0.8992, "step": 6155 }, { "epoch": 0.938128619323377, "grad_norm": 1.2890625, "learning_rate": 2.001111118241694e-06, "loss": 1.0188, "step": 6156 }, { "epoch": 0.9382810118866199, "grad_norm": 0.97265625, "learning_rate": 1.991298374934114e-06, "loss": 0.9936, "step": 6157 }, { "epoch": 0.9384334044498629, "grad_norm": 0.953125, "learning_rate": 1.981509507955659e-06, "loss": 1.0238, "step": 6158 }, { "epoch": 0.9385857970131057, "grad_norm": 0.80859375, "learning_rate": 1.97174451969101e-06, "loss": 1.0977, "step": 6159 }, { "epoch": 0.9387381895763487, "grad_norm": 1.078125, "learning_rate": 1.9620034125190644e-06, "loss": 1.1261, "step": 6160 }, { "epoch": 0.9388905821395915, "grad_norm": 0.79296875, "learning_rate": 1.952286188812913e-06, "loss": 0.9114, "step": 6161 }, { "epoch": 0.9390429747028345, "grad_norm": 1.1875, "learning_rate": 1.942592850939784e-06, "loss": 1.2325, "step": 6162 }, { "epoch": 0.9391953672660774, "grad_norm": 0.8671875, "learning_rate": 1.932923401261133e-06, "loss": 0.8011, "step": 6163 }, { "epoch": 0.9393477598293203, "grad_norm": 0.83203125, "learning_rate": 1.923277842132576e-06, "loss": 1.0859, "step": 6164 }, { "epoch": 0.9395001523925632, "grad_norm": 1.0625, "learning_rate": 1.9136561759038883e-06, "loss": 0.9327, "step": 6165 }, { "epoch": 0.9396525449558062, "grad_norm": 0.8671875, "learning_rate": 1.9040584049190845e-06, "loss": 1.0064, "step": 6166 }, { "epoch": 0.939804937519049, "grad_norm": 1.0078125, "learning_rate": 1.894484531516294e-06, "loss": 1.0407, "step": 6167 }, { "epoch": 0.939957330082292, "grad_norm": 0.90234375, "learning_rate": 1.88493455802784e-06, "loss": 1.046, "step": 6168 }, { "epoch": 0.9401097226455349, "grad_norm": 0.96875, "learning_rate": 1.8754084867802613e-06, "loss": 0.8915, "step": 6169 }, { "epoch": 0.9402621152087778, "grad_norm": 1.2265625, "learning_rate": 1.865906320094235e-06, "loss": 1.0776, "step": 6170 }, { "epoch": 0.9404145077720207, "grad_norm": 0.8046875, "learning_rate": 1.8564280602846207e-06, "loss": 0.9659, "step": 6171 }, { "epoch": 0.9405669003352637, "grad_norm": 0.83984375, "learning_rate": 1.8469737096604488e-06, "loss": 1.0606, "step": 6172 }, { "epoch": 0.9407192928985065, "grad_norm": 1.0703125, "learning_rate": 1.8375432705249663e-06, "loss": 0.9921, "step": 6173 }, { "epoch": 0.9408716854617495, "grad_norm": 0.98828125, "learning_rate": 1.8281367451755238e-06, "loss": 1.0182, "step": 6174 }, { "epoch": 0.9410240780249923, "grad_norm": 0.8671875, "learning_rate": 1.818754135903722e-06, "loss": 0.9338, "step": 6175 }, { "epoch": 0.9411764705882353, "grad_norm": 1.0078125, "learning_rate": 1.8093954449952766e-06, "loss": 0.919, "step": 6176 }, { "epoch": 0.9413288631514782, "grad_norm": 0.734375, "learning_rate": 1.8000606747301197e-06, "loss": 0.9536, "step": 6177 }, { "epoch": 0.9414812557147211, "grad_norm": 0.77734375, "learning_rate": 1.7907498273823099e-06, "loss": 1.043, "step": 6178 }, { "epoch": 0.941633648277964, "grad_norm": 0.93359375, "learning_rate": 1.7814629052201214e-06, "loss": 0.9261, "step": 6179 }, { "epoch": 0.941786040841207, "grad_norm": 0.8671875, "learning_rate": 1.7721999105059784e-06, "loss": 1.1286, "step": 6180 }, { "epoch": 0.9419384334044498, "grad_norm": 1.046875, "learning_rate": 1.7629608454964753e-06, "loss": 1.2189, "step": 6181 }, { "epoch": 0.9420908259676928, "grad_norm": 0.921875, "learning_rate": 1.7537457124423895e-06, "loss": 0.8965, "step": 6182 }, { "epoch": 0.9422432185309357, "grad_norm": 0.86328125, "learning_rate": 1.7445545135886477e-06, "loss": 1.063, "step": 6183 }, { "epoch": 0.9423956110941786, "grad_norm": 1.140625, "learning_rate": 1.7353872511743807e-06, "loss": 0.7525, "step": 6184 }, { "epoch": 0.9425480036574215, "grad_norm": 0.75390625, "learning_rate": 1.7262439274328246e-06, "loss": 0.8841, "step": 6185 }, { "epoch": 0.9427003962206645, "grad_norm": 0.875, "learning_rate": 1.7171245445914642e-06, "loss": 1.0708, "step": 6186 }, { "epoch": 0.9428527887839073, "grad_norm": 0.9609375, "learning_rate": 1.7080291048718998e-06, "loss": 0.9986, "step": 6187 }, { "epoch": 0.9430051813471503, "grad_norm": 0.87890625, "learning_rate": 1.698957610489915e-06, "loss": 1.1114, "step": 6188 }, { "epoch": 0.9431575739103931, "grad_norm": 1.0703125, "learning_rate": 1.6899100636554422e-06, "loss": 0.9023, "step": 6189 }, { "epoch": 0.9433099664736361, "grad_norm": 0.8984375, "learning_rate": 1.6808864665726065e-06, "loss": 0.8472, "step": 6190 }, { "epoch": 0.943462359036879, "grad_norm": 0.96484375, "learning_rate": 1.6718868214396721e-06, "loss": 1.008, "step": 6191 }, { "epoch": 0.9436147516001219, "grad_norm": 0.984375, "learning_rate": 1.6629111304491074e-06, "loss": 0.9327, "step": 6192 }, { "epoch": 0.9437671441633648, "grad_norm": 1.125, "learning_rate": 1.6539593957874966e-06, "loss": 1.0641, "step": 6193 }, { "epoch": 0.9439195367266078, "grad_norm": 0.8671875, "learning_rate": 1.6450316196356175e-06, "loss": 0.9438, "step": 6194 }, { "epoch": 0.9440719292898506, "grad_norm": 0.9140625, "learning_rate": 1.636127804168408e-06, "loss": 0.9951, "step": 6195 }, { "epoch": 0.9442243218530936, "grad_norm": 0.984375, "learning_rate": 1.6272479515549555e-06, "loss": 0.9384, "step": 6196 }, { "epoch": 0.9443767144163365, "grad_norm": 0.8046875, "learning_rate": 1.6183920639585403e-06, "loss": 0.9033, "step": 6197 }, { "epoch": 0.9445291069795794, "grad_norm": 0.9765625, "learning_rate": 1.6095601435365593e-06, "loss": 0.9357, "step": 6198 }, { "epoch": 0.9446814995428223, "grad_norm": 0.921875, "learning_rate": 1.6007521924406132e-06, "loss": 0.9354, "step": 6199 }, { "epoch": 0.9448338921060653, "grad_norm": 1.265625, "learning_rate": 1.5919682128164416e-06, "loss": 0.9102, "step": 6200 }, { "epoch": 0.9449862846693081, "grad_norm": 1.1796875, "learning_rate": 1.5832082068039544e-06, "loss": 1.0308, "step": 6201 }, { "epoch": 0.9451386772325511, "grad_norm": 0.80859375, "learning_rate": 1.5744721765371896e-06, "loss": 0.8861, "step": 6202 }, { "epoch": 0.9452910697957939, "grad_norm": 0.8671875, "learning_rate": 1.5657601241443997e-06, "loss": 0.9665, "step": 6203 }, { "epoch": 0.9454434623590369, "grad_norm": 0.8125, "learning_rate": 1.557072051747943e-06, "loss": 0.975, "step": 6204 }, { "epoch": 0.9455958549222798, "grad_norm": 0.7890625, "learning_rate": 1.548407961464382e-06, "loss": 1.0627, "step": 6205 }, { "epoch": 0.9457482474855227, "grad_norm": 0.953125, "learning_rate": 1.5397678554043947e-06, "loss": 1.0927, "step": 6206 }, { "epoch": 0.9459006400487656, "grad_norm": 1.0078125, "learning_rate": 1.5311517356728423e-06, "loss": 1.1234, "step": 6207 }, { "epoch": 0.9460530326120086, "grad_norm": 1.0078125, "learning_rate": 1.522559604368734e-06, "loss": 0.9983, "step": 6208 }, { "epoch": 0.9462054251752514, "grad_norm": 0.953125, "learning_rate": 1.5139914635852403e-06, "loss": 0.9159, "step": 6209 }, { "epoch": 0.9463578177384944, "grad_norm": 0.8046875, "learning_rate": 1.5054473154096804e-06, "loss": 0.9425, "step": 6210 }, { "epoch": 0.9465102103017373, "grad_norm": 0.87890625, "learning_rate": 1.4969271619235225e-06, "loss": 0.9623, "step": 6211 }, { "epoch": 0.9466626028649802, "grad_norm": 0.97265625, "learning_rate": 1.4884310052024175e-06, "loss": 0.9864, "step": 6212 }, { "epoch": 0.9468149954282231, "grad_norm": 1.2265625, "learning_rate": 1.4799588473161319e-06, "loss": 0.9914, "step": 6213 }, { "epoch": 0.9469673879914661, "grad_norm": 1.0078125, "learning_rate": 1.4715106903286257e-06, "loss": 0.9929, "step": 6214 }, { "epoch": 0.9471197805547089, "grad_norm": 1.015625, "learning_rate": 1.4630865362979863e-06, "loss": 0.9173, "step": 6215 }, { "epoch": 0.9472721731179519, "grad_norm": 0.9453125, "learning_rate": 1.4546863872764382e-06, "loss": 1.2422, "step": 6216 }, { "epoch": 0.9474245656811947, "grad_norm": 1.09375, "learning_rate": 1.4463102453104005e-06, "loss": 1.1722, "step": 6217 }, { "epoch": 0.9475769582444377, "grad_norm": 1.0390625, "learning_rate": 1.4379581124404185e-06, "loss": 0.9756, "step": 6218 }, { "epoch": 0.9477293508076806, "grad_norm": 0.88671875, "learning_rate": 1.4296299907011756e-06, "loss": 0.985, "step": 6219 }, { "epoch": 0.9478817433709235, "grad_norm": 0.6875, "learning_rate": 1.4213258821215381e-06, "loss": 0.8371, "step": 6220 }, { "epoch": 0.9480341359341664, "grad_norm": 0.81640625, "learning_rate": 1.4130457887245096e-06, "loss": 0.9365, "step": 6221 }, { "epoch": 0.9481865284974094, "grad_norm": 1.1015625, "learning_rate": 1.4047897125272325e-06, "loss": 1.061, "step": 6222 }, { "epoch": 0.9483389210606522, "grad_norm": 1.1171875, "learning_rate": 1.3965576555410088e-06, "loss": 0.9016, "step": 6223 }, { "epoch": 0.9484913136238952, "grad_norm": 1.046875, "learning_rate": 1.3883496197712786e-06, "loss": 0.9736, "step": 6224 }, { "epoch": 0.9486437061871381, "grad_norm": 1.609375, "learning_rate": 1.3801656072176316e-06, "loss": 1.1594, "step": 6225 }, { "epoch": 0.948796098750381, "grad_norm": 0.83203125, "learning_rate": 1.3720056198738395e-06, "loss": 0.8686, "step": 6226 }, { "epoch": 0.9489484913136239, "grad_norm": 0.97265625, "learning_rate": 1.3638696597277679e-06, "loss": 0.9163, "step": 6227 }, { "epoch": 0.9491008838768669, "grad_norm": 1.0703125, "learning_rate": 1.3557577287614532e-06, "loss": 0.879, "step": 6228 }, { "epoch": 0.9492532764401097, "grad_norm": 0.97265625, "learning_rate": 1.3476698289511037e-06, "loss": 1.1221, "step": 6229 }, { "epoch": 0.9494056690033527, "grad_norm": 0.921875, "learning_rate": 1.3396059622670099e-06, "loss": 1.0362, "step": 6230 }, { "epoch": 0.9495580615665955, "grad_norm": 0.8671875, "learning_rate": 1.3315661306736672e-06, "loss": 0.8494, "step": 6231 }, { "epoch": 0.9497104541298385, "grad_norm": 0.96875, "learning_rate": 1.3235503361297087e-06, "loss": 0.8853, "step": 6232 }, { "epoch": 0.9498628466930814, "grad_norm": 0.77734375, "learning_rate": 1.3155585805878723e-06, "loss": 0.7862, "step": 6233 }, { "epoch": 0.9500152392563243, "grad_norm": 1.0, "learning_rate": 1.3075908659950786e-06, "loss": 1.1376, "step": 6234 }, { "epoch": 0.9501676318195672, "grad_norm": 1.015625, "learning_rate": 1.299647194292364e-06, "loss": 1.0371, "step": 6235 }, { "epoch": 0.9503200243828102, "grad_norm": 0.8515625, "learning_rate": 1.2917275674149244e-06, "loss": 1.0214, "step": 6236 }, { "epoch": 0.950472416946053, "grad_norm": 1.03125, "learning_rate": 1.2838319872921167e-06, "loss": 0.9171, "step": 6237 }, { "epoch": 0.950624809509296, "grad_norm": 1.3046875, "learning_rate": 1.2759604558473914e-06, "loss": 1.0867, "step": 6238 }, { "epoch": 0.9507772020725389, "grad_norm": 0.8671875, "learning_rate": 1.268112974998381e-06, "loss": 0.9397, "step": 6239 }, { "epoch": 0.9509295946357817, "grad_norm": 1.1171875, "learning_rate": 1.2602895466568343e-06, "loss": 1.2939, "step": 6240 }, { "epoch": 0.9510819871990247, "grad_norm": 0.74609375, "learning_rate": 1.252490172728671e-06, "loss": 1.1359, "step": 6241 }, { "epoch": 0.9512343797622677, "grad_norm": 0.9296875, "learning_rate": 1.2447148551138932e-06, "loss": 1.0614, "step": 6242 }, { "epoch": 0.9513867723255105, "grad_norm": 0.859375, "learning_rate": 1.2369635957067193e-06, "loss": 0.9324, "step": 6243 }, { "epoch": 0.9515391648887535, "grad_norm": 0.80859375, "learning_rate": 1.2292363963954502e-06, "loss": 1.0183, "step": 6244 }, { "epoch": 0.9516915574519963, "grad_norm": 0.8671875, "learning_rate": 1.221533259062546e-06, "loss": 0.9366, "step": 6245 }, { "epoch": 0.9518439500152392, "grad_norm": 0.859375, "learning_rate": 1.2138541855845953e-06, "loss": 0.9236, "step": 6246 }, { "epoch": 0.9519963425784822, "grad_norm": 1.046875, "learning_rate": 1.2061991778323345e-06, "loss": 1.1572, "step": 6247 }, { "epoch": 0.952148735141725, "grad_norm": 1.0859375, "learning_rate": 1.1985682376706387e-06, "loss": 1.045, "step": 6248 }, { "epoch": 0.952301127704968, "grad_norm": 0.8359375, "learning_rate": 1.1909613669585096e-06, "loss": 1.1766, "step": 6249 }, { "epoch": 0.952453520268211, "grad_norm": 1.03125, "learning_rate": 1.1833785675490872e-06, "loss": 1.0189, "step": 6250 }, { "epoch": 0.9526059128314538, "grad_norm": 0.91015625, "learning_rate": 1.1758198412896604e-06, "loss": 1.0741, "step": 6251 }, { "epoch": 0.9527583053946967, "grad_norm": 1.3515625, "learning_rate": 1.168285190021634e-06, "loss": 1.0963, "step": 6252 }, { "epoch": 0.9529106979579397, "grad_norm": 0.875, "learning_rate": 1.160774615580551e-06, "loss": 0.8777, "step": 6253 }, { "epoch": 0.9530630905211825, "grad_norm": 0.7890625, "learning_rate": 1.1532881197961255e-06, "loss": 0.9732, "step": 6254 }, { "epoch": 0.9532154830844255, "grad_norm": 1.09375, "learning_rate": 1.1458257044921428e-06, "loss": 1.0441, "step": 6255 }, { "epoch": 0.9533678756476683, "grad_norm": 1.015625, "learning_rate": 1.138387371486571e-06, "loss": 1.1135, "step": 6256 }, { "epoch": 0.9535202682109113, "grad_norm": 0.91796875, "learning_rate": 1.1309731225914943e-06, "loss": 0.8866, "step": 6257 }, { "epoch": 0.9536726607741542, "grad_norm": 0.85546875, "learning_rate": 1.1235829596131232e-06, "loss": 1.022, "step": 6258 }, { "epoch": 0.9538250533373971, "grad_norm": 0.78125, "learning_rate": 1.116216884351806e-06, "loss": 1.0002, "step": 6259 }, { "epoch": 0.95397744590064, "grad_norm": 0.8828125, "learning_rate": 1.1088748986020415e-06, "loss": 0.9503, "step": 6260 }, { "epoch": 0.954129838463883, "grad_norm": 0.96484375, "learning_rate": 1.1015570041524315e-06, "loss": 0.8135, "step": 6261 }, { "epoch": 0.9542822310271258, "grad_norm": 1.0546875, "learning_rate": 1.0942632027857168e-06, "loss": 1.067, "step": 6262 }, { "epoch": 0.9544346235903688, "grad_norm": 0.828125, "learning_rate": 1.0869934962787764e-06, "loss": 0.7696, "step": 6263 }, { "epoch": 0.9545870161536117, "grad_norm": 0.7890625, "learning_rate": 1.0797478864026046e-06, "loss": 0.9284, "step": 6264 }, { "epoch": 0.9547394087168546, "grad_norm": 0.75, "learning_rate": 1.0725263749223558e-06, "loss": 0.8584, "step": 6265 }, { "epoch": 0.9548918012800975, "grad_norm": 0.8359375, "learning_rate": 1.0653289635972896e-06, "loss": 0.891, "step": 6266 }, { "epoch": 0.9550441938433405, "grad_norm": 0.88671875, "learning_rate": 1.0581556541807924e-06, "loss": 0.8711, "step": 6267 }, { "epoch": 0.9551965864065833, "grad_norm": 1.109375, "learning_rate": 1.0510064484203885e-06, "loss": 1.0245, "step": 6268 }, { "epoch": 0.9553489789698263, "grad_norm": 0.83984375, "learning_rate": 1.043881348057718e-06, "loss": 0.9395, "step": 6269 }, { "epoch": 0.9555013715330691, "grad_norm": 1.40625, "learning_rate": 1.0367803548285704e-06, "loss": 1.044, "step": 6270 }, { "epoch": 0.9556537640963121, "grad_norm": 0.90625, "learning_rate": 1.0297034704628395e-06, "loss": 1.1333, "step": 6271 }, { "epoch": 0.955806156659555, "grad_norm": 0.9609375, "learning_rate": 1.0226506966845683e-06, "loss": 0.8829, "step": 6272 }, { "epoch": 0.9559585492227979, "grad_norm": 0.984375, "learning_rate": 1.015622035211905e-06, "loss": 0.9424, "step": 6273 }, { "epoch": 0.9561109417860408, "grad_norm": 1.1015625, "learning_rate": 1.0086174877571464e-06, "loss": 0.8975, "step": 6274 }, { "epoch": 0.9562633343492838, "grad_norm": 1.1171875, "learning_rate": 1.0016370560266718e-06, "loss": 0.7636, "step": 6275 }, { "epoch": 0.9564157269125266, "grad_norm": 1.15625, "learning_rate": 9.946807417210435e-07, "loss": 0.887, "step": 6276 }, { "epoch": 0.9565681194757696, "grad_norm": 1.2890625, "learning_rate": 9.877485465349058e-07, "loss": 1.0337, "step": 6277 }, { "epoch": 0.9567205120390125, "grad_norm": 1.0703125, "learning_rate": 9.808404721570408e-07, "loss": 0.9422, "step": 6278 }, { "epoch": 0.9568729046022554, "grad_norm": 0.90625, "learning_rate": 9.739565202703693e-07, "loss": 0.9438, "step": 6279 }, { "epoch": 0.9570252971654983, "grad_norm": 1.078125, "learning_rate": 9.670966925518944e-07, "loss": 0.9827, "step": 6280 }, { "epoch": 0.9571776897287413, "grad_norm": 0.921875, "learning_rate": 9.6026099067279e-07, "loss": 1.0238, "step": 6281 }, { "epoch": 0.9573300822919841, "grad_norm": 0.84765625, "learning_rate": 9.534494162983354e-07, "loss": 1.0594, "step": 6282 }, { "epoch": 0.9574824748552271, "grad_norm": 0.9453125, "learning_rate": 9.466619710879033e-07, "loss": 1.1814, "step": 6283 }, { "epoch": 0.9576348674184699, "grad_norm": 0.76171875, "learning_rate": 9.398986566950374e-07, "loss": 0.9298, "step": 6284 }, { "epoch": 0.9577872599817129, "grad_norm": 0.8515625, "learning_rate": 9.331594747673644e-07, "loss": 1.03, "step": 6285 }, { "epoch": 0.9579396525449558, "grad_norm": 0.765625, "learning_rate": 9.264444269466598e-07, "loss": 1.0505, "step": 6286 }, { "epoch": 0.9580920451081987, "grad_norm": 1.1796875, "learning_rate": 9.197535148687819e-07, "loss": 1.1038, "step": 6287 }, { "epoch": 0.9582444376714416, "grad_norm": 0.90625, "learning_rate": 9.130867401637488e-07, "loss": 0.7287, "step": 6288 }, { "epoch": 0.9583968302346846, "grad_norm": 1.1953125, "learning_rate": 9.064441044556949e-07, "loss": 0.8949, "step": 6289 }, { "epoch": 0.9585492227979274, "grad_norm": 0.90234375, "learning_rate": 8.998256093628365e-07, "loss": 0.9521, "step": 6290 }, { "epoch": 0.9587016153611704, "grad_norm": 0.9140625, "learning_rate": 8.932312564975509e-07, "loss": 1.0036, "step": 6291 }, { "epoch": 0.9588540079244133, "grad_norm": 0.94921875, "learning_rate": 8.866610474663084e-07, "loss": 1.0293, "step": 6292 }, { "epoch": 0.9590064004876562, "grad_norm": 0.91015625, "learning_rate": 8.801149838697175e-07, "loss": 1.1275, "step": 6293 }, { "epoch": 0.9591587930508991, "grad_norm": 0.828125, "learning_rate": 8.735930673024806e-07, "loss": 0.9807, "step": 6294 }, { "epoch": 0.9593111856141421, "grad_norm": 0.8828125, "learning_rate": 8.670952993534375e-07, "loss": 1.0061, "step": 6295 }, { "epoch": 0.9594635781773849, "grad_norm": 0.890625, "learning_rate": 8.606216816055334e-07, "loss": 0.8939, "step": 6296 }, { "epoch": 0.9596159707406279, "grad_norm": 0.8125, "learning_rate": 8.5417221563584e-07, "loss": 0.9657, "step": 6297 }, { "epoch": 0.9597683633038707, "grad_norm": 1.0859375, "learning_rate": 8.477469030155338e-07, "loss": 1.0382, "step": 6298 }, { "epoch": 0.9599207558671137, "grad_norm": 0.953125, "learning_rate": 8.413457453099294e-07, "loss": 1.044, "step": 6299 }, { "epoch": 0.9600731484303566, "grad_norm": 1.015625, "learning_rate": 8.349687440784238e-07, "loss": 1.0829, "step": 6300 }, { "epoch": 0.9602255409935995, "grad_norm": 0.921875, "learning_rate": 8.286159008745408e-07, "loss": 0.9935, "step": 6301 }, { "epoch": 0.9603779335568424, "grad_norm": 0.8046875, "learning_rate": 8.222872172459428e-07, "loss": 0.9339, "step": 6302 }, { "epoch": 0.9605303261200854, "grad_norm": 0.9765625, "learning_rate": 8.159826947343852e-07, "loss": 1.0592, "step": 6303 }, { "epoch": 0.9606827186833282, "grad_norm": 1.2109375, "learning_rate": 8.097023348757283e-07, "loss": 0.9367, "step": 6304 }, { "epoch": 0.9608351112465712, "grad_norm": 0.99609375, "learning_rate": 8.034461391999593e-07, "loss": 1.1192, "step": 6305 }, { "epoch": 0.9609875038098141, "grad_norm": 1.0859375, "learning_rate": 7.972141092311925e-07, "loss": 1.1303, "step": 6306 }, { "epoch": 0.961139896373057, "grad_norm": 1.21875, "learning_rate": 7.910062464876244e-07, "loss": 1.0204, "step": 6307 }, { "epoch": 0.9612922889362999, "grad_norm": 1.09375, "learning_rate": 7.8482255248159e-07, "loss": 1.0435, "step": 6308 }, { "epoch": 0.9614446814995429, "grad_norm": 0.81640625, "learning_rate": 7.786630287195284e-07, "loss": 0.8421, "step": 6309 }, { "epoch": 0.9615970740627857, "grad_norm": 1.015625, "learning_rate": 7.725276767019729e-07, "loss": 1.1196, "step": 6310 }, { "epoch": 0.9617494666260287, "grad_norm": 1.0625, "learning_rate": 7.664164979235944e-07, "loss": 1.0482, "step": 6311 }, { "epoch": 0.9619018591892715, "grad_norm": 1.140625, "learning_rate": 7.603294938731576e-07, "loss": 0.953, "step": 6312 }, { "epoch": 0.9620542517525145, "grad_norm": 0.95703125, "learning_rate": 7.54266666033554e-07, "loss": 0.9024, "step": 6313 }, { "epoch": 0.9622066443157574, "grad_norm": 1.2109375, "learning_rate": 7.482280158817689e-07, "loss": 1.1048, "step": 6314 }, { "epoch": 0.9623590368790003, "grad_norm": 1.109375, "learning_rate": 7.422135448889033e-07, "loss": 1.0463, "step": 6315 }, { "epoch": 0.9625114294422432, "grad_norm": 1.0546875, "learning_rate": 7.36223254520163e-07, "loss": 1.1316, "step": 6316 }, { "epoch": 0.9626638220054862, "grad_norm": 0.875, "learning_rate": 7.302571462348918e-07, "loss": 0.9038, "step": 6317 }, { "epoch": 0.962816214568729, "grad_norm": 0.8203125, "learning_rate": 7.243152214865045e-07, "loss": 1.0069, "step": 6318 }, { "epoch": 0.962968607131972, "grad_norm": 0.98828125, "learning_rate": 7.183974817225325e-07, "loss": 0.949, "step": 6319 }, { "epoch": 0.9631209996952149, "grad_norm": 1.296875, "learning_rate": 7.125039283846335e-07, "loss": 1.0063, "step": 6320 }, { "epoch": 0.9632733922584578, "grad_norm": 1.1328125, "learning_rate": 7.06634562908548e-07, "loss": 1.0626, "step": 6321 }, { "epoch": 0.9634257848217007, "grad_norm": 0.9921875, "learning_rate": 7.007893867241544e-07, "loss": 1.0965, "step": 6322 }, { "epoch": 0.9635781773849437, "grad_norm": 0.88671875, "learning_rate": 6.949684012554136e-07, "loss": 0.9595, "step": 6323 }, { "epoch": 0.9637305699481865, "grad_norm": 0.9375, "learning_rate": 6.891716079204024e-07, "loss": 0.8354, "step": 6324 }, { "epoch": 0.9638829625114295, "grad_norm": 0.86328125, "learning_rate": 6.833990081313024e-07, "loss": 0.8738, "step": 6325 }, { "epoch": 0.9640353550746723, "grad_norm": 0.98046875, "learning_rate": 6.776506032943997e-07, "loss": 1.1379, "step": 6326 }, { "epoch": 0.9641877476379153, "grad_norm": 0.828125, "learning_rate": 6.719263948100963e-07, "loss": 0.9656, "step": 6327 }, { "epoch": 0.9643401402011582, "grad_norm": 0.640625, "learning_rate": 6.662263840728878e-07, "loss": 0.866, "step": 6328 }, { "epoch": 0.9644925327644011, "grad_norm": 1.1328125, "learning_rate": 6.605505724713745e-07, "loss": 1.1135, "step": 6329 }, { "epoch": 0.964644925327644, "grad_norm": 0.74609375, "learning_rate": 6.548989613882728e-07, "loss": 0.8995, "step": 6330 }, { "epoch": 0.964797317890887, "grad_norm": 0.98046875, "learning_rate": 6.492715522003922e-07, "loss": 1.0109, "step": 6331 }, { "epoch": 0.9649497104541298, "grad_norm": 0.82421875, "learning_rate": 6.436683462786475e-07, "loss": 1.0825, "step": 6332 }, { "epoch": 0.9651021030173728, "grad_norm": 0.83203125, "learning_rate": 6.380893449880687e-07, "loss": 0.9499, "step": 6333 }, { "epoch": 0.9652544955806157, "grad_norm": 0.95703125, "learning_rate": 6.325345496877688e-07, "loss": 0.9918, "step": 6334 }, { "epoch": 0.9654068881438586, "grad_norm": 0.90234375, "learning_rate": 6.270039617309875e-07, "loss": 1.0342, "step": 6335 }, { "epoch": 0.9655592807071015, "grad_norm": 0.78125, "learning_rate": 6.21497582465047e-07, "loss": 0.9924, "step": 6336 }, { "epoch": 0.9657116732703445, "grad_norm": 0.76171875, "learning_rate": 6.160154132313856e-07, "loss": 1.0138, "step": 6337 }, { "epoch": 0.9658640658335873, "grad_norm": 0.640625, "learning_rate": 6.105574553655347e-07, "loss": 0.8361, "step": 6338 }, { "epoch": 0.9660164583968303, "grad_norm": 1.0234375, "learning_rate": 6.051237101971419e-07, "loss": 1.0235, "step": 6339 }, { "epoch": 0.9661688509600731, "grad_norm": 0.9765625, "learning_rate": 5.997141790499261e-07, "loss": 1.064, "step": 6340 }, { "epoch": 0.966321243523316, "grad_norm": 1.15625, "learning_rate": 5.943288632417443e-07, "loss": 0.9282, "step": 6341 }, { "epoch": 0.966473636086559, "grad_norm": 1.2109375, "learning_rate": 5.88967764084536e-07, "loss": 0.9688, "step": 6342 }, { "epoch": 0.9666260286498018, "grad_norm": 1.0234375, "learning_rate": 5.836308828843228e-07, "loss": 0.9661, "step": 6343 }, { "epoch": 0.9667784212130448, "grad_norm": 0.9453125, "learning_rate": 5.783182209412763e-07, "loss": 0.9972, "step": 6344 }, { "epoch": 0.9669308137762878, "grad_norm": 1.203125, "learning_rate": 5.730297795496054e-07, "loss": 1.0853, "step": 6345 }, { "epoch": 0.9670832063395306, "grad_norm": 0.83203125, "learning_rate": 5.677655599976794e-07, "loss": 0.9646, "step": 6346 }, { "epoch": 0.9672355989027736, "grad_norm": 0.97265625, "learning_rate": 5.625255635679172e-07, "loss": 0.9839, "step": 6347 }, { "epoch": 0.9673879914660165, "grad_norm": 0.9609375, "learning_rate": 5.573097915368641e-07, "loss": 1.0615, "step": 6348 }, { "epoch": 0.9675403840292593, "grad_norm": 1.1328125, "learning_rate": 5.521182451751594e-07, "loss": 1.1833, "step": 6349 }, { "epoch": 0.9676927765925023, "grad_norm": 0.80859375, "learning_rate": 5.469509257475358e-07, "loss": 1.0011, "step": 6350 }, { "epoch": 0.9678451691557451, "grad_norm": 1.078125, "learning_rate": 5.418078345128308e-07, "loss": 0.8039, "step": 6351 }, { "epoch": 0.9679975617189881, "grad_norm": 0.80859375, "learning_rate": 5.366889727239644e-07, "loss": 1.1363, "step": 6352 }, { "epoch": 0.968149954282231, "grad_norm": 1.1328125, "learning_rate": 5.315943416279834e-07, "loss": 0.83, "step": 6353 }, { "epoch": 0.9683023468454739, "grad_norm": 0.8828125, "learning_rate": 5.265239424659952e-07, "loss": 0.9341, "step": 6354 }, { "epoch": 0.9684547394087168, "grad_norm": 1.0234375, "learning_rate": 5.214777764732226e-07, "loss": 1.0035, "step": 6355 }, { "epoch": 0.9686071319719598, "grad_norm": 1.0390625, "learning_rate": 5.164558448790047e-07, "loss": 0.8886, "step": 6356 }, { "epoch": 0.9687595245352026, "grad_norm": 0.84375, "learning_rate": 5.114581489067405e-07, "loss": 1.0689, "step": 6357 }, { "epoch": 0.9689119170984456, "grad_norm": 1.015625, "learning_rate": 5.064846897739339e-07, "loss": 1.0169, "step": 6358 }, { "epoch": 0.9690643096616885, "grad_norm": 0.90625, "learning_rate": 5.015354686922046e-07, "loss": 0.9626, "step": 6359 }, { "epoch": 0.9692167022249314, "grad_norm": 1.15625, "learning_rate": 4.966104868672439e-07, "loss": 0.9254, "step": 6360 }, { "epoch": 0.9693690947881743, "grad_norm": 0.921875, "learning_rate": 4.917097454988584e-07, "loss": 0.9332, "step": 6361 }, { "epoch": 0.9695214873514173, "grad_norm": 1.2265625, "learning_rate": 4.868332457809377e-07, "loss": 0.9176, "step": 6362 }, { "epoch": 0.9696738799146601, "grad_norm": 0.9375, "learning_rate": 4.819809889014537e-07, "loss": 1.0463, "step": 6363 }, { "epoch": 0.9698262724779031, "grad_norm": 1.1796875, "learning_rate": 4.771529760425053e-07, "loss": 1.0204, "step": 6364 }, { "epoch": 0.9699786650411459, "grad_norm": 0.671875, "learning_rate": 4.723492083802517e-07, "loss": 0.8352, "step": 6365 }, { "epoch": 0.9701310576043889, "grad_norm": 0.93359375, "learning_rate": 4.6756968708495664e-07, "loss": 1.0547, "step": 6366 }, { "epoch": 0.9702834501676318, "grad_norm": 1.2890625, "learning_rate": 4.6281441332099994e-07, "loss": 1.042, "step": 6367 }, { "epoch": 0.9704358427308747, "grad_norm": 1.0625, "learning_rate": 4.5808338824682163e-07, "loss": 1.1192, "step": 6368 }, { "epoch": 0.9705882352941176, "grad_norm": 1.4375, "learning_rate": 4.533766130149664e-07, "loss": 1.0277, "step": 6369 }, { "epoch": 0.9707406278573606, "grad_norm": 0.8515625, "learning_rate": 4.486940887720725e-07, "loss": 0.9464, "step": 6370 }, { "epoch": 0.9708930204206034, "grad_norm": 0.94921875, "learning_rate": 4.4403581665887206e-07, "loss": 0.8146, "step": 6371 }, { "epoch": 0.9710454129838464, "grad_norm": 0.96484375, "learning_rate": 4.3940179781019055e-07, "loss": 1.1542, "step": 6372 }, { "epoch": 0.9711978055470893, "grad_norm": 1.0703125, "learning_rate": 4.3479203335493603e-07, "loss": 0.942, "step": 6373 }, { "epoch": 0.9713501981103322, "grad_norm": 0.875, "learning_rate": 4.302065244161213e-07, "loss": 1.0218, "step": 6374 }, { "epoch": 0.9715025906735751, "grad_norm": 0.75390625, "learning_rate": 4.256452721108306e-07, "loss": 0.8515, "step": 6375 }, { "epoch": 0.9716549832368181, "grad_norm": 0.76171875, "learning_rate": 4.2110827755026393e-07, "loss": 0.7712, "step": 6376 }, { "epoch": 0.9718073758000609, "grad_norm": 1.0078125, "learning_rate": 4.1659554183968164e-07, "loss": 0.8418, "step": 6377 }, { "epoch": 0.9719597683633039, "grad_norm": 0.9140625, "learning_rate": 4.121070660784598e-07, "loss": 0.9035, "step": 6378 }, { "epoch": 0.9721121609265467, "grad_norm": 0.9453125, "learning_rate": 4.076428513600683e-07, "loss": 1.034, "step": 6379 }, { "epoch": 0.9722645534897897, "grad_norm": 0.74609375, "learning_rate": 4.032028987720371e-07, "loss": 0.8455, "step": 6380 }, { "epoch": 0.9724169460530326, "grad_norm": 1.3828125, "learning_rate": 3.98787209396001e-07, "loss": 1.0031, "step": 6381 }, { "epoch": 0.9725693386162755, "grad_norm": 0.76171875, "learning_rate": 3.9439578430769953e-07, "loss": 0.8545, "step": 6382 }, { "epoch": 0.9727217311795184, "grad_norm": 1.046875, "learning_rate": 3.9002862457693245e-07, "loss": 0.8983, "step": 6383 }, { "epoch": 0.9728741237427614, "grad_norm": 1.0234375, "learning_rate": 3.8568573126761543e-07, "loss": 0.8682, "step": 6384 }, { "epoch": 0.9730265163060042, "grad_norm": 0.921875, "learning_rate": 3.8136710543773547e-07, "loss": 0.9997, "step": 6385 }, { "epoch": 0.9731789088692472, "grad_norm": 1.03125, "learning_rate": 3.770727481393621e-07, "loss": 1.0336, "step": 6386 }, { "epoch": 0.9733313014324901, "grad_norm": 1.0234375, "learning_rate": 3.7280266041868073e-07, "loss": 1.0478, "step": 6387 }, { "epoch": 0.973483693995733, "grad_norm": 0.83203125, "learning_rate": 3.68556843315937e-07, "loss": 1.0871, "step": 6388 }, { "epoch": 0.9736360865589759, "grad_norm": 0.91796875, "learning_rate": 3.6433529786545907e-07, "loss": 0.9214, "step": 6389 }, { "epoch": 0.9737884791222189, "grad_norm": 0.9453125, "learning_rate": 3.6013802509570205e-07, "loss": 0.9482, "step": 6390 }, { "epoch": 0.9739408716854617, "grad_norm": 0.98828125, "learning_rate": 3.5596502602917027e-07, "loss": 0.9282, "step": 6391 }, { "epoch": 0.9740932642487047, "grad_norm": 0.9765625, "learning_rate": 3.518163016824616e-07, "loss": 1.0338, "step": 6392 }, { "epoch": 0.9742456568119475, "grad_norm": 1.0234375, "learning_rate": 3.4769185306626763e-07, "loss": 1.0276, "step": 6393 }, { "epoch": 0.9743980493751905, "grad_norm": 0.94140625, "learning_rate": 3.435916811853512e-07, "loss": 0.9196, "step": 6394 }, { "epoch": 0.9745504419384334, "grad_norm": 0.79296875, "learning_rate": 3.3951578703860234e-07, "loss": 0.9016, "step": 6395 }, { "epoch": 0.9747028345016763, "grad_norm": 0.9453125, "learning_rate": 3.3546417161894884e-07, "loss": 1.0228, "step": 6396 }, { "epoch": 0.9748552270649192, "grad_norm": 1.328125, "learning_rate": 3.3143683591342346e-07, "loss": 0.8044, "step": 6397 }, { "epoch": 0.9750076196281622, "grad_norm": 1.0078125, "learning_rate": 3.2743378090315244e-07, "loss": 1.0644, "step": 6398 }, { "epoch": 0.975160012191405, "grad_norm": 1.28125, "learning_rate": 3.234550075633225e-07, "loss": 0.9953, "step": 6399 }, { "epoch": 0.975312404754648, "grad_norm": 1.1640625, "learning_rate": 3.195005168632248e-07, "loss": 0.9731, "step": 6400 }, { "epoch": 0.9754647973178909, "grad_norm": 1.171875, "learning_rate": 3.155703097662332e-07, "loss": 1.0011, "step": 6401 }, { "epoch": 0.9756171898811338, "grad_norm": 0.91015625, "learning_rate": 3.116643872298153e-07, "loss": 0.8379, "step": 6402 }, { "epoch": 0.9757695824443767, "grad_norm": 0.90234375, "learning_rate": 3.0778275020548754e-07, "loss": 0.9955, "step": 6403 }, { "epoch": 0.9759219750076197, "grad_norm": 1.1953125, "learning_rate": 3.0392539963888247e-07, "loss": 0.949, "step": 6404 }, { "epoch": 0.9760743675708625, "grad_norm": 0.7109375, "learning_rate": 3.000923364697039e-07, "loss": 0.7812, "step": 6405 }, { "epoch": 0.9762267601341055, "grad_norm": 1.2109375, "learning_rate": 2.962835616317383e-07, "loss": 0.9632, "step": 6406 }, { "epoch": 0.9763791526973483, "grad_norm": 0.80078125, "learning_rate": 2.9249907605286564e-07, "loss": 1.0168, "step": 6407 }, { "epoch": 0.9765315452605913, "grad_norm": 0.92578125, "learning_rate": 2.887388806550373e-07, "loss": 0.9591, "step": 6408 }, { "epoch": 0.9766839378238342, "grad_norm": 0.765625, "learning_rate": 2.850029763542983e-07, "loss": 0.8911, "step": 6409 }, { "epoch": 0.9768363303870771, "grad_norm": 1.0703125, "learning_rate": 2.8129136406075394e-07, "loss": 1.1581, "step": 6410 }, { "epoch": 0.97698872295032, "grad_norm": 1.234375, "learning_rate": 2.776040446786143e-07, "loss": 0.9992, "step": 6411 }, { "epoch": 0.977141115513563, "grad_norm": 0.875, "learning_rate": 2.73941019106172e-07, "loss": 1.1254, "step": 6412 }, { "epoch": 0.9772935080768058, "grad_norm": 1.109375, "learning_rate": 2.7030228823577974e-07, "loss": 1.1307, "step": 6413 }, { "epoch": 0.9774459006400488, "grad_norm": 0.90234375, "learning_rate": 2.666878529538841e-07, "loss": 0.7181, "step": 6414 }, { "epoch": 0.9775982932032917, "grad_norm": 0.9765625, "learning_rate": 2.6309771414102515e-07, "loss": 0.9581, "step": 6415 }, { "epoch": 0.9777506857665346, "grad_norm": 1.0234375, "learning_rate": 2.5953187267180323e-07, "loss": 1.0867, "step": 6416 }, { "epoch": 0.9779030783297775, "grad_norm": 0.9453125, "learning_rate": 2.559903294149124e-07, "loss": 0.9815, "step": 6417 }, { "epoch": 0.9780554708930205, "grad_norm": 0.8359375, "learning_rate": 2.524730852331181e-07, "loss": 0.9704, "step": 6418 }, { "epoch": 0.9782078634562633, "grad_norm": 0.94140625, "learning_rate": 2.489801409832793e-07, "loss": 0.768, "step": 6419 }, { "epoch": 0.9783602560195063, "grad_norm": 1.0234375, "learning_rate": 2.455114975163264e-07, "loss": 1.0315, "step": 6420 }, { "epoch": 0.9785126485827491, "grad_norm": 0.9296875, "learning_rate": 2.4206715567726134e-07, "loss": 0.9984, "step": 6421 }, { "epoch": 0.9786650411459921, "grad_norm": 0.9375, "learning_rate": 2.3864711630519063e-07, "loss": 0.8553, "step": 6422 }, { "epoch": 0.978817433709235, "grad_norm": 0.875, "learning_rate": 2.352513802332701e-07, "loss": 0.9622, "step": 6423 }, { "epoch": 0.9789698262724779, "grad_norm": 0.8984375, "learning_rate": 2.3187994828876013e-07, "loss": 0.9209, "step": 6424 }, { "epoch": 0.9791222188357208, "grad_norm": 0.90625, "learning_rate": 2.285328212929816e-07, "loss": 1.2004, "step": 6425 }, { "epoch": 0.9792746113989638, "grad_norm": 0.7890625, "learning_rate": 2.2521000006134884e-07, "loss": 0.9433, "step": 6426 }, { "epoch": 0.9794270039622066, "grad_norm": 0.9140625, "learning_rate": 2.219114854033477e-07, "loss": 0.8551, "step": 6427 }, { "epoch": 0.9795793965254496, "grad_norm": 1.40625, "learning_rate": 2.1863727812254653e-07, "loss": 1.2268, "step": 6428 }, { "epoch": 0.9797317890886925, "grad_norm": 0.97265625, "learning_rate": 2.15387379016585e-07, "loss": 0.8086, "step": 6429 }, { "epoch": 0.9798841816519354, "grad_norm": 1.0859375, "learning_rate": 2.121617888771854e-07, "loss": 1.2063, "step": 6430 }, { "epoch": 0.9800365742151783, "grad_norm": 0.78125, "learning_rate": 2.0896050849015248e-07, "loss": 0.8835, "step": 6431 }, { "epoch": 0.9801889667784213, "grad_norm": 1.015625, "learning_rate": 2.0578353863535126e-07, "loss": 0.9961, "step": 6432 }, { "epoch": 0.9803413593416641, "grad_norm": 1.203125, "learning_rate": 2.0263088008675158e-07, "loss": 1.0243, "step": 6433 }, { "epoch": 0.9804937519049071, "grad_norm": 0.89453125, "learning_rate": 1.9950253361238347e-07, "loss": 0.9771, "step": 6434 }, { "epoch": 0.9806461444681499, "grad_norm": 1.453125, "learning_rate": 1.9639849997435954e-07, "loss": 1.0821, "step": 6435 }, { "epoch": 0.9807985370313929, "grad_norm": 1.1875, "learning_rate": 1.9331877992886383e-07, "loss": 0.88, "step": 6436 }, { "epoch": 0.9809509295946358, "grad_norm": 1.140625, "learning_rate": 1.902633742261517e-07, "loss": 1.0499, "step": 6437 }, { "epoch": 0.9811033221578787, "grad_norm": 0.890625, "learning_rate": 1.8723228361057222e-07, "loss": 0.8848, "step": 6438 }, { "epoch": 0.9812557147211216, "grad_norm": 1.0078125, "learning_rate": 1.842255088205458e-07, "loss": 0.8925, "step": 6439 }, { "epoch": 0.9814081072843646, "grad_norm": 1.0859375, "learning_rate": 1.8124305058855317e-07, "loss": 1.0033, "step": 6440 }, { "epoch": 0.9815604998476074, "grad_norm": 0.8359375, "learning_rate": 1.7828490964117983e-07, "loss": 0.9434, "step": 6441 }, { "epoch": 0.9817128924108504, "grad_norm": 1.015625, "learning_rate": 1.7535108669907153e-07, "loss": 0.9481, "step": 6442 }, { "epoch": 0.9818652849740933, "grad_norm": 0.95703125, "learning_rate": 1.724415824769343e-07, "loss": 0.8994, "step": 6443 }, { "epoch": 0.9820176775373362, "grad_norm": 1.15625, "learning_rate": 1.69556397683579e-07, "loss": 0.948, "step": 6444 }, { "epoch": 0.9821700701005791, "grad_norm": 0.96484375, "learning_rate": 1.6669553302186557e-07, "loss": 1.0316, "step": 6445 }, { "epoch": 0.9823224626638221, "grad_norm": 1.1796875, "learning_rate": 1.6385898918875875e-07, "loss": 1.0853, "step": 6446 }, { "epoch": 0.9824748552270649, "grad_norm": 0.99609375, "learning_rate": 1.6104676687526132e-07, "loss": 1.0253, "step": 6447 }, { "epoch": 0.9826272477903079, "grad_norm": 0.875, "learning_rate": 1.5825886676649192e-07, "loss": 0.8152, "step": 6448 }, { "epoch": 0.9827796403535507, "grad_norm": 0.81640625, "learning_rate": 1.554952895416073e-07, "loss": 0.8654, "step": 6449 }, { "epoch": 0.9829320329167937, "grad_norm": 0.85546875, "learning_rate": 1.5275603587386887e-07, "loss": 0.9372, "step": 6450 }, { "epoch": 0.9830844254800366, "grad_norm": 1.203125, "learning_rate": 1.5004110643057623e-07, "loss": 0.9909, "step": 6451 }, { "epoch": 0.9832368180432794, "grad_norm": 1.09375, "learning_rate": 1.473505018731447e-07, "loss": 1.0548, "step": 6452 }, { "epoch": 0.9833892106065224, "grad_norm": 1.2734375, "learning_rate": 1.4468422285703885e-07, "loss": 1.1612, "step": 6453 }, { "epoch": 0.9835416031697654, "grad_norm": 0.80078125, "learning_rate": 1.420422700317947e-07, "loss": 1.021, "step": 6454 }, { "epoch": 0.9836939957330082, "grad_norm": 1.1328125, "learning_rate": 1.3942464404104184e-07, "loss": 0.9502, "step": 6455 }, { "epoch": 0.9838463882962512, "grad_norm": 0.94140625, "learning_rate": 1.3683134552247012e-07, "loss": 0.9724, "step": 6456 }, { "epoch": 0.9839987808594941, "grad_norm": 1.09375, "learning_rate": 1.342623751078409e-07, "loss": 1.2509, "step": 6457 }, { "epoch": 0.984151173422737, "grad_norm": 1.078125, "learning_rate": 1.3171773342298687e-07, "loss": 1.2062, "step": 6458 }, { "epoch": 0.9843035659859799, "grad_norm": 0.95703125, "learning_rate": 1.2919742108783439e-07, "loss": 0.9351, "step": 6459 }, { "epoch": 0.9844559585492227, "grad_norm": 0.9921875, "learning_rate": 1.2670143871634788e-07, "loss": 0.9458, "step": 6460 }, { "epoch": 0.9846083511124657, "grad_norm": 1.3359375, "learning_rate": 1.2422978691659647e-07, "loss": 1.0012, "step": 6461 }, { "epoch": 0.9847607436757086, "grad_norm": 0.890625, "learning_rate": 1.2178246629070967e-07, "loss": 1.0318, "step": 6462 }, { "epoch": 0.9849131362389515, "grad_norm": 1.1640625, "learning_rate": 1.193594774348883e-07, "loss": 1.0884, "step": 6463 }, { "epoch": 0.9850655288021944, "grad_norm": 0.79296875, "learning_rate": 1.1696082093941574e-07, "loss": 0.8452, "step": 6464 }, { "epoch": 0.9852179213654374, "grad_norm": 1.21875, "learning_rate": 1.1458649738862459e-07, "loss": 0.9993, "step": 6465 }, { "epoch": 0.9853703139286802, "grad_norm": 0.94140625, "learning_rate": 1.1223650736094104e-07, "loss": 0.9429, "step": 6466 }, { "epoch": 0.9855227064919232, "grad_norm": 0.95703125, "learning_rate": 1.0991085142886271e-07, "loss": 0.8922, "step": 6467 }, { "epoch": 0.9856750990551661, "grad_norm": 0.91796875, "learning_rate": 1.0760953015893637e-07, "loss": 0.8954, "step": 6468 }, { "epoch": 0.985827491618409, "grad_norm": 1.03125, "learning_rate": 1.053325441118136e-07, "loss": 0.9719, "step": 6469 }, { "epoch": 0.9859798841816519, "grad_norm": 0.984375, "learning_rate": 1.0307989384219507e-07, "loss": 0.9776, "step": 6470 }, { "epoch": 0.9861322767448949, "grad_norm": 1.03125, "learning_rate": 1.0085157989885297e-07, "loss": 1.048, "step": 6471 }, { "epoch": 0.9862846693081377, "grad_norm": 0.81640625, "learning_rate": 9.864760282464192e-08, "loss": 1.0166, "step": 6472 }, { "epoch": 0.9864370618713807, "grad_norm": 1.1484375, "learning_rate": 9.646796315647688e-08, "loss": 0.9183, "step": 6473 }, { "epoch": 0.9865894544346235, "grad_norm": 0.86328125, "learning_rate": 9.431266142536643e-08, "loss": 0.9237, "step": 6474 }, { "epoch": 0.9867418469978665, "grad_norm": 0.9296875, "learning_rate": 9.218169815635724e-08, "loss": 1.1147, "step": 6475 }, { "epoch": 0.9868942395611094, "grad_norm": 0.91015625, "learning_rate": 9.00750738685896e-08, "loss": 0.8693, "step": 6476 }, { "epoch": 0.9870466321243523, "grad_norm": 0.76171875, "learning_rate": 8.799278907526409e-08, "loss": 0.8113, "step": 6477 }, { "epoch": 0.9871990246875952, "grad_norm": 0.9296875, "learning_rate": 8.593484428365272e-08, "loss": 0.8903, "step": 6478 }, { "epoch": 0.9873514172508382, "grad_norm": 0.9375, "learning_rate": 8.390123999509891e-08, "loss": 1.0863, "step": 6479 }, { "epoch": 0.987503809814081, "grad_norm": 0.859375, "learning_rate": 8.189197670502857e-08, "loss": 0.9614, "step": 6480 }, { "epoch": 0.987656202377324, "grad_norm": 0.8125, "learning_rate": 7.990705490292794e-08, "loss": 0.931, "step": 6481 }, { "epoch": 0.9878085949405669, "grad_norm": 0.875, "learning_rate": 7.794647507233244e-08, "loss": 1.1995, "step": 6482 }, { "epoch": 0.9879609875038098, "grad_norm": 1.25, "learning_rate": 7.601023769089333e-08, "loss": 0.9687, "step": 6483 }, { "epoch": 0.9881133800670527, "grad_norm": 1.0625, "learning_rate": 7.409834323027776e-08, "loss": 0.9548, "step": 6484 }, { "epoch": 0.9882657726302957, "grad_norm": 0.96875, "learning_rate": 7.22107921562798e-08, "loss": 1.2711, "step": 6485 }, { "epoch": 0.9884181651935385, "grad_norm": 1.0703125, "learning_rate": 7.034758492872052e-08, "loss": 1.0049, "step": 6486 }, { "epoch": 0.9885705577567815, "grad_norm": 1.140625, "learning_rate": 6.850872200149238e-08, "loss": 0.9996, "step": 6487 }, { "epoch": 0.9887229503200243, "grad_norm": 0.87109375, "learning_rate": 6.669420382259262e-08, "loss": 0.9998, "step": 6488 }, { "epoch": 0.9888753428832673, "grad_norm": 1.0234375, "learning_rate": 6.490403083404539e-08, "loss": 1.223, "step": 6489 }, { "epoch": 0.9890277354465102, "grad_norm": 0.89453125, "learning_rate": 6.313820347196853e-08, "loss": 0.9031, "step": 6490 }, { "epoch": 0.9891801280097531, "grad_norm": 0.78515625, "learning_rate": 6.139672216654014e-08, "loss": 0.8585, "step": 6491 }, { "epoch": 0.989332520572996, "grad_norm": 0.79296875, "learning_rate": 5.967958734202084e-08, "loss": 0.9712, "step": 6492 }, { "epoch": 0.989484913136239, "grad_norm": 1.078125, "learning_rate": 5.798679941672047e-08, "loss": 0.9927, "step": 6493 }, { "epoch": 0.9896373056994818, "grad_norm": 1.0625, "learning_rate": 5.631835880303138e-08, "loss": 0.991, "step": 6494 }, { "epoch": 0.9897896982627248, "grad_norm": 0.8046875, "learning_rate": 5.467426590739511e-08, "loss": 1.1465, "step": 6495 }, { "epoch": 0.9899420908259677, "grad_norm": 1.03125, "learning_rate": 5.305452113035792e-08, "loss": 0.9608, "step": 6496 }, { "epoch": 0.9900944833892106, "grad_norm": 0.9296875, "learning_rate": 5.145912486649307e-08, "loss": 0.9251, "step": 6497 }, { "epoch": 0.9902468759524535, "grad_norm": 0.87109375, "learning_rate": 4.988807750447855e-08, "loss": 0.9483, "step": 6498 }, { "epoch": 0.9903992685156965, "grad_norm": 0.97265625, "learning_rate": 4.8341379427041535e-08, "loss": 0.9291, "step": 6499 }, { "epoch": 0.9905516610789393, "grad_norm": 0.9609375, "learning_rate": 4.681903101096952e-08, "loss": 0.9649, "step": 6500 }, { "epoch": 0.9907040536421823, "grad_norm": 0.8125, "learning_rate": 4.5321032627132497e-08, "loss": 0.8492, "step": 6501 }, { "epoch": 0.9908564462054251, "grad_norm": 0.84765625, "learning_rate": 4.384738464047189e-08, "loss": 0.9379, "step": 6502 }, { "epoch": 0.9910088387686681, "grad_norm": 0.921875, "learning_rate": 4.2398087409978304e-08, "loss": 0.8799, "step": 6503 }, { "epoch": 0.991161231331911, "grad_norm": 1.0, "learning_rate": 4.097314128872487e-08, "loss": 1.0985, "step": 6504 }, { "epoch": 0.9913136238951539, "grad_norm": 1.0390625, "learning_rate": 3.9572546623856125e-08, "loss": 0.9444, "step": 6505 }, { "epoch": 0.9914660164583968, "grad_norm": 0.9140625, "learning_rate": 3.81963037565658e-08, "loss": 0.9473, "step": 6506 }, { "epoch": 0.9916184090216398, "grad_norm": 1.1171875, "learning_rate": 3.6844413022141254e-08, "loss": 0.9523, "step": 6507 }, { "epoch": 0.9917708015848826, "grad_norm": 0.890625, "learning_rate": 3.5516874749907947e-08, "loss": 0.8703, "step": 6508 }, { "epoch": 0.9919231941481256, "grad_norm": 0.87890625, "learning_rate": 3.421368926328494e-08, "loss": 1.0607, "step": 6509 }, { "epoch": 0.9920755867113685, "grad_norm": 0.77734375, "learning_rate": 3.293485687974052e-08, "loss": 1.0053, "step": 6510 }, { "epoch": 0.9922279792746114, "grad_norm": 1.0390625, "learning_rate": 3.168037791081435e-08, "loss": 0.906, "step": 6511 }, { "epoch": 0.9923803718378543, "grad_norm": 1.0390625, "learning_rate": 3.045025266211754e-08, "loss": 0.9956, "step": 6512 }, { "epoch": 0.9925327644010973, "grad_norm": 1.078125, "learning_rate": 2.924448143332148e-08, "loss": 1.0685, "step": 6513 }, { "epoch": 0.9926851569643401, "grad_norm": 0.8046875, "learning_rate": 2.8063064518191184e-08, "loss": 0.7952, "step": 6514 }, { "epoch": 0.9928375495275831, "grad_norm": 0.76171875, "learning_rate": 2.6906002204507562e-08, "loss": 0.8644, "step": 6515 }, { "epoch": 0.9929899420908259, "grad_norm": 0.8984375, "learning_rate": 2.5773294774167345e-08, "loss": 0.6781, "step": 6516 }, { "epoch": 0.9931423346540689, "grad_norm": 0.88671875, "learning_rate": 2.4664942503105358e-08, "loss": 0.8264, "step": 6517 }, { "epoch": 0.9932947272173118, "grad_norm": 0.96484375, "learning_rate": 2.358094566133895e-08, "loss": 0.9527, "step": 6518 }, { "epoch": 0.9934471197805547, "grad_norm": 1.0625, "learning_rate": 2.252130451293466e-08, "loss": 0.9186, "step": 6519 }, { "epoch": 0.9935995123437976, "grad_norm": 1.203125, "learning_rate": 2.148601931604155e-08, "loss": 0.7218, "step": 6520 }, { "epoch": 0.9937519049070406, "grad_norm": 0.93359375, "learning_rate": 2.0475090322880087e-08, "loss": 0.8232, "step": 6521 }, { "epoch": 0.9939042974702834, "grad_norm": 1.0234375, "learning_rate": 1.9488517779708838e-08, "loss": 0.9542, "step": 6522 }, { "epoch": 0.9940566900335264, "grad_norm": 0.84375, "learning_rate": 1.8526301926891088e-08, "loss": 0.9353, "step": 6523 }, { "epoch": 0.9942090825967693, "grad_norm": 1.046875, "learning_rate": 1.7588442998817122e-08, "loss": 0.9221, "step": 6524 }, { "epoch": 0.9943614751600122, "grad_norm": 0.83203125, "learning_rate": 1.667494122397084e-08, "loss": 0.9705, "step": 6525 }, { "epoch": 0.9945138677232551, "grad_norm": 0.87890625, "learning_rate": 1.5785796824896447e-08, "loss": 0.9995, "step": 6526 }, { "epoch": 0.9946662602864981, "grad_norm": 1.015625, "learning_rate": 1.492101001820956e-08, "loss": 1.1152, "step": 6527 }, { "epoch": 0.9948186528497409, "grad_norm": 1.1640625, "learning_rate": 1.4080581014563887e-08, "loss": 0.9727, "step": 6528 }, { "epoch": 0.9949710454129839, "grad_norm": 0.90234375, "learning_rate": 1.3264510018717869e-08, "loss": 0.8917, "step": 6529 }, { "epoch": 0.9951234379762267, "grad_norm": 0.96484375, "learning_rate": 1.2472797229479139e-08, "loss": 0.8746, "step": 6530 }, { "epoch": 0.9952758305394697, "grad_norm": 0.95703125, "learning_rate": 1.1705442839704539e-08, "loss": 0.7884, "step": 6531 }, { "epoch": 0.9954282231027126, "grad_norm": 0.76953125, "learning_rate": 1.0962447036344525e-08, "loss": 0.9864, "step": 6532 }, { "epoch": 0.9955806156659555, "grad_norm": 1.3203125, "learning_rate": 1.0243810000398757e-08, "loss": 1.1477, "step": 6533 }, { "epoch": 0.9957330082291984, "grad_norm": 1.0625, "learning_rate": 9.549531906938302e-09, "loss": 1.0232, "step": 6534 }, { "epoch": 0.9958854007924414, "grad_norm": 0.91015625, "learning_rate": 8.879612925105641e-09, "loss": 0.9906, "step": 6535 }, { "epoch": 0.9960377933556842, "grad_norm": 0.85546875, "learning_rate": 8.234053218092452e-09, "loss": 0.9697, "step": 6536 }, { "epoch": 0.9961901859189272, "grad_norm": 1.1015625, "learning_rate": 7.612852943184034e-09, "loss": 1.1492, "step": 6537 }, { "epoch": 0.9963425784821701, "grad_norm": 0.984375, "learning_rate": 7.016012251681581e-09, "loss": 1.1544, "step": 6538 }, { "epoch": 0.996494971045413, "grad_norm": 0.859375, "learning_rate": 6.443531289013205e-09, "loss": 0.7807, "step": 6539 }, { "epoch": 0.9966473636086559, "grad_norm": 1.171875, "learning_rate": 5.895410194645123e-09, "loss": 1.0381, "step": 6540 }, { "epoch": 0.9967997561718989, "grad_norm": 1.1015625, "learning_rate": 5.371649102081655e-09, "loss": 1.0075, "step": 6541 }, { "epoch": 0.9969521487351417, "grad_norm": 0.84375, "learning_rate": 4.872248138931834e-09, "loss": 0.965, "step": 6542 }, { "epoch": 0.9971045412983847, "grad_norm": 0.8671875, "learning_rate": 4.397207426865002e-09, "loss": 0.9207, "step": 6543 }, { "epoch": 0.9972569338616275, "grad_norm": 1.0859375, "learning_rate": 3.946527081599704e-09, "loss": 0.9477, "step": 6544 }, { "epoch": 0.9974093264248705, "grad_norm": 1.0703125, "learning_rate": 3.520207212936999e-09, "loss": 0.9703, "step": 6545 }, { "epoch": 0.9975617189881134, "grad_norm": 0.76171875, "learning_rate": 3.1182479247160444e-09, "loss": 0.8757, "step": 6546 }, { "epoch": 0.9977141115513563, "grad_norm": 0.97265625, "learning_rate": 2.740649314880717e-09, "loss": 0.9456, "step": 6547 }, { "epoch": 0.9978665041145992, "grad_norm": 0.96484375, "learning_rate": 2.3874114754018904e-09, "loss": 0.9417, "step": 6548 }, { "epoch": 0.9980188966778422, "grad_norm": 1.1484375, "learning_rate": 2.058534492344055e-09, "loss": 0.9483, "step": 6549 }, { "epoch": 0.998171289241085, "grad_norm": 0.79296875, "learning_rate": 1.754018445820904e-09, "loss": 0.7993, "step": 6550 }, { "epoch": 0.998323681804328, "grad_norm": 1.1328125, "learning_rate": 1.473863410028642e-09, "loss": 0.9051, "step": 6551 }, { "epoch": 0.9984760743675709, "grad_norm": 1.0234375, "learning_rate": 1.2180694532015758e-09, "loss": 1.1349, "step": 6552 }, { "epoch": 0.9986284669308138, "grad_norm": 0.81640625, "learning_rate": 9.866366376565239e-10, "loss": 0.9544, "step": 6553 }, { "epoch": 0.9987808594940567, "grad_norm": 1.15625, "learning_rate": 7.795650197928161e-10, "loss": 0.9809, "step": 6554 }, { "epoch": 0.9989332520572997, "grad_norm": 0.73828125, "learning_rate": 5.9685465002568e-10, "loss": 0.8528, "step": 6555 }, { "epoch": 0.9990856446205425, "grad_norm": 0.80078125, "learning_rate": 4.3850557289726315e-10, "loss": 1.0045, "step": 6556 }, { "epoch": 0.9992380371837855, "grad_norm": 0.87109375, "learning_rate": 3.04517826954509e-10, "loss": 1.0579, "step": 6557 }, { "epoch": 0.9993904297470283, "grad_norm": 0.890625, "learning_rate": 1.9489144486017906e-10, "loss": 0.8653, "step": 6558 }, { "epoch": 0.9995428223102713, "grad_norm": 0.61328125, "learning_rate": 1.0962645331513698e-10, "loss": 0.7667, "step": 6559 }, { "epoch": 0.9996952148735142, "grad_norm": 1.109375, "learning_rate": 4.872287308055334e-11, "loss": 1.1252, "step": 6560 }, { "epoch": 0.999847607436757, "grad_norm": 0.77734375, "learning_rate": 1.2180719011212205e-11, "loss": 0.787, "step": 6561 }, { "epoch": 1.0, "grad_norm": 1.03125, "learning_rate": 0.0, "loss": 1.122, "step": 6562 } ], "logging_steps": 1, "max_steps": 6562, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.299624826765312e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }