{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 6562,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00015239256324291374,
      "grad_norm": 0.7578125,
      "learning_rate": 1.015228426395939e-06,
      "loss": 0.7205,
      "step": 1
    },
    {
      "epoch": 0.00030478512648582747,
      "grad_norm": 1.171875,
      "learning_rate": 2.030456852791878e-06,
      "loss": 0.9582,
      "step": 2
    },
    {
      "epoch": 0.00045717768972874126,
      "grad_norm": 0.83203125,
      "learning_rate": 3.0456852791878177e-06,
      "loss": 0.9217,
      "step": 3
    },
    {
      "epoch": 0.0006095702529716549,
      "grad_norm": 0.984375,
      "learning_rate": 4.060913705583756e-06,
      "loss": 0.9203,
      "step": 4
    },
    {
      "epoch": 0.0007619628162145687,
      "grad_norm": 0.98046875,
      "learning_rate": 5.076142131979695e-06,
      "loss": 1.0185,
      "step": 5
    },
    {
      "epoch": 0.0009143553794574825,
      "grad_norm": 0.90234375,
      "learning_rate": 6.091370558375635e-06,
      "loss": 0.8611,
      "step": 6
    },
    {
      "epoch": 0.0010667479427003963,
      "grad_norm": 0.95703125,
      "learning_rate": 7.106598984771575e-06,
      "loss": 1.1762,
      "step": 7
    },
    {
      "epoch": 0.0012191405059433099,
      "grad_norm": 1.140625,
      "learning_rate": 8.121827411167512e-06,
      "loss": 0.917,
      "step": 8
    },
    {
      "epoch": 0.0013715330691862237,
      "grad_norm": 0.60546875,
      "learning_rate": 9.137055837563452e-06,
      "loss": 0.9471,
      "step": 9
    },
    {
      "epoch": 0.0015239256324291375,
      "grad_norm": 1.03125,
      "learning_rate": 1.015228426395939e-05,
      "loss": 0.9496,
      "step": 10
    },
    {
      "epoch": 0.0016763181956720513,
      "grad_norm": 0.86328125,
      "learning_rate": 1.116751269035533e-05,
      "loss": 0.9342,
      "step": 11
    },
    {
      "epoch": 0.001828710758914965,
      "grad_norm": 1.0546875,
      "learning_rate": 1.218274111675127e-05,
      "loss": 1.0556,
      "step": 12
    },
    {
      "epoch": 0.0019811033221578786,
      "grad_norm": 1.078125,
      "learning_rate": 1.3197969543147209e-05,
      "loss": 1.0586,
      "step": 13
    },
    {
      "epoch": 0.0021334958854007926,
      "grad_norm": 1.03125,
      "learning_rate": 1.421319796954315e-05,
      "loss": 0.9775,
      "step": 14
    },
    {
      "epoch": 0.002285888448643706,
      "grad_norm": 0.71875,
      "learning_rate": 1.5228426395939088e-05,
      "loss": 0.9953,
      "step": 15
    },
    {
      "epoch": 0.0024382810118866198,
      "grad_norm": 1.09375,
      "learning_rate": 1.6243654822335024e-05,
      "loss": 0.9753,
      "step": 16
    },
    {
      "epoch": 0.0025906735751295338,
      "grad_norm": 0.96484375,
      "learning_rate": 1.7258883248730966e-05,
      "loss": 0.9816,
      "step": 17
    },
    {
      "epoch": 0.0027430661383724473,
      "grad_norm": 1.109375,
      "learning_rate": 1.8274111675126904e-05,
      "loss": 1.0085,
      "step": 18
    },
    {
      "epoch": 0.0028954587016153614,
      "grad_norm": 0.828125,
      "learning_rate": 1.9289340101522843e-05,
      "loss": 1.0766,
      "step": 19
    },
    {
      "epoch": 0.003047851264858275,
      "grad_norm": 0.98046875,
      "learning_rate": 2.030456852791878e-05,
      "loss": 0.9647,
      "step": 20
    },
    {
      "epoch": 0.0032002438281011885,
      "grad_norm": 1.046875,
      "learning_rate": 2.1319796954314723e-05,
      "loss": 0.9234,
      "step": 21
    },
    {
      "epoch": 0.0033526363913441025,
      "grad_norm": 0.8828125,
      "learning_rate": 2.233502538071066e-05,
      "loss": 0.873,
      "step": 22
    },
    {
      "epoch": 0.003505028954587016,
      "grad_norm": 0.80078125,
      "learning_rate": 2.33502538071066e-05,
      "loss": 0.9569,
      "step": 23
    },
    {
      "epoch": 0.00365742151782993,
      "grad_norm": 0.8984375,
      "learning_rate": 2.436548223350254e-05,
      "loss": 1.048,
      "step": 24
    },
    {
      "epoch": 0.0038098140810728437,
      "grad_norm": 0.88671875,
      "learning_rate": 2.5380710659898476e-05,
      "loss": 0.9817,
      "step": 25
    },
    {
      "epoch": 0.003962206644315757,
      "grad_norm": 0.96484375,
      "learning_rate": 2.6395939086294418e-05,
      "loss": 0.9559,
      "step": 26
    },
    {
      "epoch": 0.004114599207558671,
      "grad_norm": 1.0546875,
      "learning_rate": 2.7411167512690357e-05,
      "loss": 1.0673,
      "step": 27
    },
    {
      "epoch": 0.004266991770801585,
      "grad_norm": 0.875,
      "learning_rate": 2.84263959390863e-05,
      "loss": 0.8704,
      "step": 28
    },
    {
      "epoch": 0.004419384334044499,
      "grad_norm": 1.328125,
      "learning_rate": 2.9441624365482233e-05,
      "loss": 1.1046,
      "step": 29
    },
    {
      "epoch": 0.004571776897287412,
      "grad_norm": 1.15625,
      "learning_rate": 3.0456852791878175e-05,
      "loss": 0.9479,
      "step": 30
    },
    {
      "epoch": 0.004724169460530326,
      "grad_norm": 0.91796875,
      "learning_rate": 3.147208121827411e-05,
      "loss": 0.8353,
      "step": 31
    },
    {
      "epoch": 0.0048765620237732395,
      "grad_norm": 0.95703125,
      "learning_rate": 3.248730964467005e-05,
      "loss": 0.9964,
      "step": 32
    },
    {
      "epoch": 0.005028954587016154,
      "grad_norm": 0.80078125,
      "learning_rate": 3.3502538071065994e-05,
      "loss": 0.9566,
      "step": 33
    },
    {
      "epoch": 0.0051813471502590676,
      "grad_norm": 0.94921875,
      "learning_rate": 3.451776649746193e-05,
      "loss": 0.9568,
      "step": 34
    },
    {
      "epoch": 0.005333739713501981,
      "grad_norm": 0.8359375,
      "learning_rate": 3.553299492385787e-05,
      "loss": 0.867,
      "step": 35
    },
    {
      "epoch": 0.005486132276744895,
      "grad_norm": 0.78515625,
      "learning_rate": 3.654822335025381e-05,
      "loss": 1.0921,
      "step": 36
    },
    {
      "epoch": 0.005638524839987808,
      "grad_norm": 0.96484375,
      "learning_rate": 3.756345177664975e-05,
      "loss": 0.9191,
      "step": 37
    },
    {
      "epoch": 0.005790917403230723,
      "grad_norm": 0.84375,
      "learning_rate": 3.8578680203045685e-05,
      "loss": 0.8852,
      "step": 38
    },
    {
      "epoch": 0.005943309966473636,
      "grad_norm": 0.875,
      "learning_rate": 3.959390862944163e-05,
      "loss": 0.9605,
      "step": 39
    },
    {
      "epoch": 0.00609570252971655,
      "grad_norm": 0.859375,
      "learning_rate": 4.060913705583756e-05,
      "loss": 1.0461,
      "step": 40
    },
    {
      "epoch": 0.006248095092959463,
      "grad_norm": 0.8359375,
      "learning_rate": 4.162436548223351e-05,
      "loss": 0.9165,
      "step": 41
    },
    {
      "epoch": 0.006400487656202377,
      "grad_norm": 1.0,
      "learning_rate": 4.2639593908629446e-05,
      "loss": 0.9774,
      "step": 42
    },
    {
      "epoch": 0.0065528802194452914,
      "grad_norm": 1.0859375,
      "learning_rate": 4.365482233502538e-05,
      "loss": 0.9915,
      "step": 43
    },
    {
      "epoch": 0.006705272782688205,
      "grad_norm": 0.9765625,
      "learning_rate": 4.467005076142132e-05,
      "loss": 0.8786,
      "step": 44
    },
    {
      "epoch": 0.006857665345931119,
      "grad_norm": 0.74609375,
      "learning_rate": 4.568527918781726e-05,
      "loss": 0.8457,
      "step": 45
    },
    {
      "epoch": 0.007010057909174032,
      "grad_norm": 1.2890625,
      "learning_rate": 4.67005076142132e-05,
      "loss": 0.9083,
      "step": 46
    },
    {
      "epoch": 0.007162450472416946,
      "grad_norm": 1.234375,
      "learning_rate": 4.771573604060914e-05,
      "loss": 1.0551,
      "step": 47
    },
    {
      "epoch": 0.00731484303565986,
      "grad_norm": 0.90625,
      "learning_rate": 4.873096446700508e-05,
      "loss": 1.0611,
      "step": 48
    },
    {
      "epoch": 0.007467235598902774,
      "grad_norm": 1.0703125,
      "learning_rate": 4.9746192893401014e-05,
      "loss": 0.9896,
      "step": 49
    },
    {
      "epoch": 0.007619628162145687,
      "grad_norm": 1.09375,
      "learning_rate": 5.076142131979695e-05,
      "loss": 0.9979,
      "step": 50
    },
    {
      "epoch": 0.007772020725388601,
      "grad_norm": 0.8046875,
      "learning_rate": 5.17766497461929e-05,
      "loss": 1.0679,
      "step": 51
    },
    {
      "epoch": 0.007924413288631514,
      "grad_norm": 0.80859375,
      "learning_rate": 5.2791878172588836e-05,
      "loss": 0.8748,
      "step": 52
    },
    {
      "epoch": 0.008076805851874428,
      "grad_norm": 0.87109375,
      "learning_rate": 5.380710659898477e-05,
      "loss": 0.9422,
      "step": 53
    },
    {
      "epoch": 0.008229198415117342,
      "grad_norm": 1.0078125,
      "learning_rate": 5.482233502538071e-05,
      "loss": 0.9792,
      "step": 54
    },
    {
      "epoch": 0.008381590978360255,
      "grad_norm": 1.15625,
      "learning_rate": 5.583756345177665e-05,
      "loss": 0.9485,
      "step": 55
    },
    {
      "epoch": 0.00853398354160317,
      "grad_norm": 1.0546875,
      "learning_rate": 5.68527918781726e-05,
      "loss": 0.9123,
      "step": 56
    },
    {
      "epoch": 0.008686376104846084,
      "grad_norm": 0.828125,
      "learning_rate": 5.786802030456853e-05,
      "loss": 0.8716,
      "step": 57
    },
    {
      "epoch": 0.008838768668088998,
      "grad_norm": 0.890625,
      "learning_rate": 5.8883248730964467e-05,
      "loss": 1.0111,
      "step": 58
    },
    {
      "epoch": 0.008991161231331911,
      "grad_norm": 1.0078125,
      "learning_rate": 5.989847715736041e-05,
      "loss": 0.8152,
      "step": 59
    },
    {
      "epoch": 0.009143553794574825,
      "grad_norm": 1.0390625,
      "learning_rate": 6.091370558375635e-05,
      "loss": 1.1772,
      "step": 60
    },
    {
      "epoch": 0.009295946357817738,
      "grad_norm": 0.83984375,
      "learning_rate": 6.192893401015228e-05,
      "loss": 0.9497,
      "step": 61
    },
    {
      "epoch": 0.009448338921060652,
      "grad_norm": 0.93359375,
      "learning_rate": 6.294416243654822e-05,
      "loss": 1.0744,
      "step": 62
    },
    {
      "epoch": 0.009600731484303566,
      "grad_norm": 1.109375,
      "learning_rate": 6.395939086294417e-05,
      "loss": 0.7621,
      "step": 63
    },
    {
      "epoch": 0.009753124047546479,
      "grad_norm": 1.171875,
      "learning_rate": 6.49746192893401e-05,
      "loss": 1.1644,
      "step": 64
    },
    {
      "epoch": 0.009905516610789393,
      "grad_norm": 1.0,
      "learning_rate": 6.598984771573604e-05,
      "loss": 0.8988,
      "step": 65
    },
    {
      "epoch": 0.010057909174032308,
      "grad_norm": 0.921875,
      "learning_rate": 6.700507614213199e-05,
      "loss": 1.0022,
      "step": 66
    },
    {
      "epoch": 0.010210301737275222,
      "grad_norm": 0.8359375,
      "learning_rate": 6.802030456852793e-05,
      "loss": 0.9092,
      "step": 67
    },
    {
      "epoch": 0.010362694300518135,
      "grad_norm": 1.09375,
      "learning_rate": 6.903553299492386e-05,
      "loss": 1.0944,
      "step": 68
    },
    {
      "epoch": 0.010515086863761049,
      "grad_norm": 0.8515625,
      "learning_rate": 7.00507614213198e-05,
      "loss": 0.9674,
      "step": 69
    },
    {
      "epoch": 0.010667479427003962,
      "grad_norm": 0.79296875,
      "learning_rate": 7.106598984771574e-05,
      "loss": 0.9559,
      "step": 70
    },
    {
      "epoch": 0.010819871990246876,
      "grad_norm": 1.0390625,
      "learning_rate": 7.208121827411168e-05,
      "loss": 1.0529,
      "step": 71
    },
    {
      "epoch": 0.01097226455348979,
      "grad_norm": 0.8671875,
      "learning_rate": 7.309644670050762e-05,
      "loss": 1.0138,
      "step": 72
    },
    {
      "epoch": 0.011124657116732703,
      "grad_norm": 0.765625,
      "learning_rate": 7.411167512690356e-05,
      "loss": 1.0677,
      "step": 73
    },
    {
      "epoch": 0.011277049679975617,
      "grad_norm": 0.8046875,
      "learning_rate": 7.51269035532995e-05,
      "loss": 0.9338,
      "step": 74
    },
    {
      "epoch": 0.01142944224321853,
      "grad_norm": 0.8359375,
      "learning_rate": 7.614213197969543e-05,
      "loss": 0.998,
      "step": 75
    },
    {
      "epoch": 0.011581834806461445,
      "grad_norm": 0.8984375,
      "learning_rate": 7.715736040609137e-05,
      "loss": 0.9141,
      "step": 76
    },
    {
      "epoch": 0.011734227369704359,
      "grad_norm": 1.0234375,
      "learning_rate": 7.817258883248731e-05,
      "loss": 1.081,
      "step": 77
    },
    {
      "epoch": 0.011886619932947273,
      "grad_norm": 0.8984375,
      "learning_rate": 7.918781725888326e-05,
      "loss": 0.9523,
      "step": 78
    },
    {
      "epoch": 0.012039012496190186,
      "grad_norm": 0.953125,
      "learning_rate": 8.020304568527919e-05,
      "loss": 1.0154,
      "step": 79
    },
    {
      "epoch": 0.0121914050594331,
      "grad_norm": 0.79296875,
      "learning_rate": 8.121827411167512e-05,
      "loss": 0.9038,
      "step": 80
    },
    {
      "epoch": 0.012343797622676013,
      "grad_norm": 1.09375,
      "learning_rate": 8.223350253807108e-05,
      "loss": 1.1407,
      "step": 81
    },
    {
      "epoch": 0.012496190185918927,
      "grad_norm": 0.8828125,
      "learning_rate": 8.324873096446701e-05,
      "loss": 1.1505,
      "step": 82
    },
    {
      "epoch": 0.01264858274916184,
      "grad_norm": 0.859375,
      "learning_rate": 8.426395939086294e-05,
      "loss": 1.0808,
      "step": 83
    },
    {
      "epoch": 0.012800975312404754,
      "grad_norm": 0.875,
      "learning_rate": 8.527918781725889e-05,
      "loss": 0.9784,
      "step": 84
    },
    {
      "epoch": 0.012953367875647668,
      "grad_norm": 0.66796875,
      "learning_rate": 8.629441624365483e-05,
      "loss": 0.9313,
      "step": 85
    },
    {
      "epoch": 0.013105760438890583,
      "grad_norm": 0.85546875,
      "learning_rate": 8.730964467005075e-05,
      "loss": 0.9209,
      "step": 86
    },
    {
      "epoch": 0.013258153002133496,
      "grad_norm": 1.0625,
      "learning_rate": 8.83248730964467e-05,
      "loss": 1.0023,
      "step": 87
    },
    {
      "epoch": 0.01341054556537641,
      "grad_norm": 1.0078125,
      "learning_rate": 8.934010152284265e-05,
      "loss": 0.9408,
      "step": 88
    },
    {
      "epoch": 0.013562938128619324,
      "grad_norm": 1.1953125,
      "learning_rate": 9.035532994923858e-05,
      "loss": 1.1023,
      "step": 89
    },
    {
      "epoch": 0.013715330691862237,
      "grad_norm": 0.91015625,
      "learning_rate": 9.137055837563452e-05,
      "loss": 0.957,
      "step": 90
    },
    {
      "epoch": 0.01386772325510515,
      "grad_norm": 0.9453125,
      "learning_rate": 9.238578680203046e-05,
      "loss": 1.061,
      "step": 91
    },
    {
      "epoch": 0.014020115818348064,
      "grad_norm": 1.2734375,
      "learning_rate": 9.34010152284264e-05,
      "loss": 0.9003,
      "step": 92
    },
    {
      "epoch": 0.014172508381590978,
      "grad_norm": 0.8359375,
      "learning_rate": 9.441624365482235e-05,
      "loss": 0.9065,
      "step": 93
    },
    {
      "epoch": 0.014324900944833891,
      "grad_norm": 0.9375,
      "learning_rate": 9.543147208121828e-05,
      "loss": 1.0612,
      "step": 94
    },
    {
      "epoch": 0.014477293508076805,
      "grad_norm": 1.0390625,
      "learning_rate": 9.644670050761421e-05,
      "loss": 1.0182,
      "step": 95
    },
    {
      "epoch": 0.01462968607131972,
      "grad_norm": 1.3515625,
      "learning_rate": 9.746192893401017e-05,
      "loss": 0.9418,
      "step": 96
    },
    {
      "epoch": 0.014782078634562634,
      "grad_norm": 0.796875,
      "learning_rate": 9.847715736040609e-05,
      "loss": 0.9644,
      "step": 97
    },
    {
      "epoch": 0.014934471197805547,
      "grad_norm": 0.93359375,
      "learning_rate": 9.949238578680203e-05,
      "loss": 0.9253,
      "step": 98
    },
    {
      "epoch": 0.015086863761048461,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00010050761421319797,
      "loss": 1.0129,
      "step": 99
    },
    {
      "epoch": 0.015239256324291375,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001015228426395939,
      "loss": 1.186,
      "step": 100
    },
    {
      "epoch": 0.015391648887534288,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00010253807106598984,
      "loss": 1.0104,
      "step": 101
    },
    {
      "epoch": 0.015544041450777202,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001035532994923858,
      "loss": 0.9711,
      "step": 102
    },
    {
      "epoch": 0.015696434014020117,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00010456852791878173,
      "loss": 0.8904,
      "step": 103
    },
    {
      "epoch": 0.01584882657726303,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00010558375634517767,
      "loss": 0.9196,
      "step": 104
    },
    {
      "epoch": 0.016001219140505944,
      "grad_norm": 1.015625,
      "learning_rate": 0.00010659898477157362,
      "loss": 0.9542,
      "step": 105
    },
    {
      "epoch": 0.016153611703748856,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00010761421319796954,
      "loss": 0.8944,
      "step": 106
    },
    {
      "epoch": 0.01630600426699177,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00010862944162436547,
      "loss": 0.9202,
      "step": 107
    },
    {
      "epoch": 0.016458396830234683,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00010964467005076143,
      "loss": 0.998,
      "step": 108
    },
    {
      "epoch": 0.0166107893934776,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00011065989847715736,
      "loss": 0.9602,
      "step": 109
    },
    {
      "epoch": 0.01676318195672051,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001116751269035533,
      "loss": 0.9523,
      "step": 110
    },
    {
      "epoch": 0.016915574519963426,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00011269035532994925,
      "loss": 1.1677,
      "step": 111
    },
    {
      "epoch": 0.01706796708320634,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001137055837563452,
      "loss": 0.961,
      "step": 112
    },
    {
      "epoch": 0.017220359646449253,
      "grad_norm": 0.8125,
      "learning_rate": 0.00011472081218274113,
      "loss": 1.1584,
      "step": 113
    },
    {
      "epoch": 0.017372752209692168,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00011573604060913706,
      "loss": 0.9677,
      "step": 114
    },
    {
      "epoch": 0.01752514477293508,
      "grad_norm": 0.8515625,
      "learning_rate": 0.000116751269035533,
      "loss": 0.9164,
      "step": 115
    },
    {
      "epoch": 0.017677537336177995,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00011776649746192893,
      "loss": 1.0596,
      "step": 116
    },
    {
      "epoch": 0.017829929899420907,
      "grad_norm": 0.953125,
      "learning_rate": 0.00011878172588832489,
      "loss": 1.0332,
      "step": 117
    },
    {
      "epoch": 0.017982322462663822,
      "grad_norm": 0.8125,
      "learning_rate": 0.00011979695431472082,
      "loss": 1.0497,
      "step": 118
    },
    {
      "epoch": 0.018134715025906734,
      "grad_norm": 0.796875,
      "learning_rate": 0.00012081218274111676,
      "loss": 0.9036,
      "step": 119
    },
    {
      "epoch": 0.01828710758914965,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001218274111675127,
      "loss": 1.039,
      "step": 120
    },
    {
      "epoch": 0.018439500152392565,
      "grad_norm": 0.66796875,
      "learning_rate": 0.00012284263959390864,
      "loss": 0.9148,
      "step": 121
    },
    {
      "epoch": 0.018591892715635477,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00012385786802030456,
      "loss": 1.0124,
      "step": 122
    },
    {
      "epoch": 0.018744285278878392,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00012487309644670052,
      "loss": 1.0621,
      "step": 123
    },
    {
      "epoch": 0.018896677842121304,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00012588832487309644,
      "loss": 0.8195,
      "step": 124
    },
    {
      "epoch": 0.01904907040536422,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001269035532994924,
      "loss": 0.9112,
      "step": 125
    },
    {
      "epoch": 0.01920146296860713,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00012791878172588834,
      "loss": 0.9384,
      "step": 126
    },
    {
      "epoch": 0.019353855531850046,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00012893401015228427,
      "loss": 0.8993,
      "step": 127
    },
    {
      "epoch": 0.019506248095092958,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001299492385786802,
      "loss": 1.1261,
      "step": 128
    },
    {
      "epoch": 0.019658640658335873,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00013096446700507615,
      "loss": 0.9578,
      "step": 129
    },
    {
      "epoch": 0.019811033221578785,
      "grad_norm": 0.828125,
      "learning_rate": 0.00013197969543147207,
      "loss": 0.8715,
      "step": 130
    },
    {
      "epoch": 0.0199634257848217,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00013299492385786802,
      "loss": 1.1728,
      "step": 131
    },
    {
      "epoch": 0.020115818348064616,
      "grad_norm": 1.015625,
      "learning_rate": 0.00013401015228426397,
      "loss": 1.1392,
      "step": 132
    },
    {
      "epoch": 0.020268210911307528,
      "grad_norm": 0.98828125,
      "learning_rate": 0.0001350253807106599,
      "loss": 1.0943,
      "step": 133
    },
    {
      "epoch": 0.020420603474550443,
      "grad_norm": 1.109375,
      "learning_rate": 0.00013604060913705585,
      "loss": 1.0992,
      "step": 134
    },
    {
      "epoch": 0.020572996037793355,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00013705583756345178,
      "loss": 1.0841,
      "step": 135
    },
    {
      "epoch": 0.02072538860103627,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00013807106598984773,
      "loss": 1.0025,
      "step": 136
    },
    {
      "epoch": 0.020877781164279182,
      "grad_norm": 1.109375,
      "learning_rate": 0.00013908629441624365,
      "loss": 1.1857,
      "step": 137
    },
    {
      "epoch": 0.021030173727522097,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001401015228426396,
      "loss": 1.0023,
      "step": 138
    },
    {
      "epoch": 0.02118256629076501,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00014111675126903553,
      "loss": 1.1159,
      "step": 139
    },
    {
      "epoch": 0.021334958854007924,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00014213197969543148,
      "loss": 0.9144,
      "step": 140
    },
    {
      "epoch": 0.02148735141725084,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00014314720812182743,
      "loss": 1.1774,
      "step": 141
    },
    {
      "epoch": 0.02163974398049375,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00014416243654822336,
      "loss": 1.1209,
      "step": 142
    },
    {
      "epoch": 0.021792136543736667,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00014517766497461928,
      "loss": 0.7939,
      "step": 143
    },
    {
      "epoch": 0.02194452910697958,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00014619289340101523,
      "loss": 0.9842,
      "step": 144
    },
    {
      "epoch": 0.022096921670222494,
      "grad_norm": 1.2890625,
      "learning_rate": 0.00014720812182741116,
      "loss": 1.031,
      "step": 145
    },
    {
      "epoch": 0.022249314233465406,
      "grad_norm": 1.15625,
      "learning_rate": 0.0001482233502538071,
      "loss": 1.1485,
      "step": 146
    },
    {
      "epoch": 0.02240170679670832,
      "grad_norm": 1.09375,
      "learning_rate": 0.00014923857868020306,
      "loss": 1.073,
      "step": 147
    },
    {
      "epoch": 0.022554099359951233,
      "grad_norm": 0.953125,
      "learning_rate": 0.000150253807106599,
      "loss": 1.0159,
      "step": 148
    },
    {
      "epoch": 0.02270649192319415,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00015126903553299494,
      "loss": 1.0191,
      "step": 149
    },
    {
      "epoch": 0.02285888448643706,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00015228426395939087,
      "loss": 0.9758,
      "step": 150
    },
    {
      "epoch": 0.023011277049679976,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001532994923857868,
      "loss": 1.0242,
      "step": 151
    },
    {
      "epoch": 0.02316366961292289,
      "grad_norm": 0.90625,
      "learning_rate": 0.00015431472081218274,
      "loss": 1.028,
      "step": 152
    },
    {
      "epoch": 0.023316062176165803,
      "grad_norm": 1.0,
      "learning_rate": 0.0001553299492385787,
      "loss": 0.8286,
      "step": 153
    },
    {
      "epoch": 0.023468454739408718,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00015634517766497462,
      "loss": 1.0554,
      "step": 154
    },
    {
      "epoch": 0.02362084730265163,
      "grad_norm": 0.734375,
      "learning_rate": 0.00015736040609137057,
      "loss": 1.0287,
      "step": 155
    },
    {
      "epoch": 0.023773239865894545,
      "grad_norm": 0.84375,
      "learning_rate": 0.00015837563451776652,
      "loss": 0.8802,
      "step": 156
    },
    {
      "epoch": 0.023925632429137457,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00015939086294416242,
      "loss": 0.949,
      "step": 157
    },
    {
      "epoch": 0.024078024992380372,
      "grad_norm": 1.09375,
      "learning_rate": 0.00016040609137055837,
      "loss": 1.1338,
      "step": 158
    },
    {
      "epoch": 0.024230417555623284,
      "grad_norm": 1.21875,
      "learning_rate": 0.00016142131979695432,
      "loss": 1.0915,
      "step": 159
    },
    {
      "epoch": 0.0243828101188662,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00016243654822335025,
      "loss": 0.9156,
      "step": 160
    },
    {
      "epoch": 0.024535202682109115,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001634517766497462,
      "loss": 1.0043,
      "step": 161
    },
    {
      "epoch": 0.024687595245352027,
      "grad_norm": 1.265625,
      "learning_rate": 0.00016446700507614215,
      "loss": 1.0961,
      "step": 162
    },
    {
      "epoch": 0.024839987808594942,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00016548223350253808,
      "loss": 0.9317,
      "step": 163
    },
    {
      "epoch": 0.024992380371837854,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00016649746192893403,
      "loss": 0.8042,
      "step": 164
    },
    {
      "epoch": 0.02514477293508077,
      "grad_norm": 0.859375,
      "learning_rate": 0.00016751269035532995,
      "loss": 0.987,
      "step": 165
    },
    {
      "epoch": 0.02529716549832368,
      "grad_norm": 1.109375,
      "learning_rate": 0.00016852791878172588,
      "loss": 1.2074,
      "step": 166
    },
    {
      "epoch": 0.025449558061566596,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00016954314720812183,
      "loss": 0.9698,
      "step": 167
    },
    {
      "epoch": 0.025601950624809508,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00017055837563451778,
      "loss": 1.1308,
      "step": 168
    },
    {
      "epoch": 0.025754343188052423,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001715736040609137,
      "loss": 1.0576,
      "step": 169
    },
    {
      "epoch": 0.025906735751295335,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00017258883248730966,
      "loss": 0.8457,
      "step": 170
    },
    {
      "epoch": 0.02605912831453825,
      "grad_norm": 0.734375,
      "learning_rate": 0.0001736040609137056,
      "loss": 0.9251,
      "step": 171
    },
    {
      "epoch": 0.026211520877781166,
      "grad_norm": 1.109375,
      "learning_rate": 0.0001746192893401015,
      "loss": 1.0924,
      "step": 172
    },
    {
      "epoch": 0.026363913441024078,
      "grad_norm": 1.046875,
      "learning_rate": 0.00017563451776649746,
      "loss": 1.1698,
      "step": 173
    },
    {
      "epoch": 0.026516306004266993,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001766497461928934,
      "loss": 0.958,
      "step": 174
    },
    {
      "epoch": 0.026668698567509905,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00017766497461928934,
      "loss": 0.9896,
      "step": 175
    },
    {
      "epoch": 0.02682109113075282,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001786802030456853,
      "loss": 1.1044,
      "step": 176
    },
    {
      "epoch": 0.026973483693995732,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00017969543147208124,
      "loss": 0.9807,
      "step": 177
    },
    {
      "epoch": 0.027125876257238647,
      "grad_norm": 1.296875,
      "learning_rate": 0.00018071065989847717,
      "loss": 1.1164,
      "step": 178
    },
    {
      "epoch": 0.02727826882048156,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001817258883248731,
      "loss": 1.0367,
      "step": 179
    },
    {
      "epoch": 0.027430661383724474,
      "grad_norm": 1.15625,
      "learning_rate": 0.00018274111675126904,
      "loss": 0.9836,
      "step": 180
    },
    {
      "epoch": 0.02758305394696739,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018375634517766497,
      "loss": 0.8772,
      "step": 181
    },
    {
      "epoch": 0.0277354465102103,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018477157360406092,
      "loss": 1.0627,
      "step": 182
    },
    {
      "epoch": 0.027887839073453217,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00018578680203045687,
      "loss": 0.9796,
      "step": 183
    },
    {
      "epoch": 0.02804023163669613,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001868020304568528,
      "loss": 0.9119,
      "step": 184
    },
    {
      "epoch": 0.028192624199939044,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018781725888324875,
      "loss": 1.0308,
      "step": 185
    },
    {
      "epoch": 0.028345016763181956,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001888324873096447,
      "loss": 1.0528,
      "step": 186
    },
    {
      "epoch": 0.02849740932642487,
      "grad_norm": 1.5703125,
      "learning_rate": 0.0001898477157360406,
      "loss": 0.9185,
      "step": 187
    },
    {
      "epoch": 0.028649801889667783,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019086294416243655,
      "loss": 1.1204,
      "step": 188
    },
    {
      "epoch": 0.028802194452910698,
      "grad_norm": 1.3359375,
      "learning_rate": 0.0001918781725888325,
      "loss": 0.9493,
      "step": 189
    },
    {
      "epoch": 0.02895458701615361,
      "grad_norm": 1.4609375,
      "learning_rate": 0.00019289340101522843,
      "loss": 1.0063,
      "step": 190
    },
    {
      "epoch": 0.029106979579396525,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019390862944162438,
      "loss": 0.8969,
      "step": 191
    },
    {
      "epoch": 0.02925937214263944,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019492385786802033,
      "loss": 1.284,
      "step": 192
    },
    {
      "epoch": 0.029411764705882353,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019593908629441626,
      "loss": 1.0236,
      "step": 193
    },
    {
      "epoch": 0.029564157269125268,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019695431472081218,
      "loss": 1.0283,
      "step": 194
    },
    {
      "epoch": 0.02971654983236818,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019796954314720813,
      "loss": 0.9227,
      "step": 195
    },
    {
      "epoch": 0.029868942395611095,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019898477157360406,
      "loss": 1.0534,
      "step": 196
    },
    {
      "epoch": 0.030021334958854007,
      "grad_norm": 1.1328125,
      "learning_rate": 0.0002,
      "loss": 1.0342,
      "step": 197
    },
    {
      "epoch": 0.030173727522096922,
      "grad_norm": 0.95703125,
      "learning_rate": 0.000199999987819281,
      "loss": 1.0286,
      "step": 198
    },
    {
      "epoch": 0.030326120085339834,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00019999995127712694,
      "loss": 0.823,
      "step": 199
    },
    {
      "epoch": 0.03047851264858275,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001999998903735467,
      "loss": 0.7691,
      "step": 200
    },
    {
      "epoch": 0.030630905211825665,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019999980510855515,
      "loss": 1.0232,
      "step": 201
    },
    {
      "epoch": 0.030783297775068576,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019999969548217307,
      "loss": 1.0349,
      "step": 202
    },
    {
      "epoch": 0.03093569033831149,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019999956149442712,
      "loss": 1.1024,
      "step": 203
    },
    {
      "epoch": 0.031088082901554404,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019999940314535,
      "loss": 1.066,
      "step": 204
    },
    {
      "epoch": 0.03124047546479732,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00019999922043498024,
      "loss": 1.1117,
      "step": 205
    },
    {
      "epoch": 0.031392868028040234,
      "grad_norm": 0.61328125,
      "learning_rate": 0.00019999901336336236,
      "loss": 0.8319,
      "step": 206
    },
    {
      "epoch": 0.031545260591283146,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001999987819305468,
      "loss": 0.8868,
      "step": 207
    },
    {
      "epoch": 0.03169765315452606,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019999852613658998,
      "loss": 1.0696,
      "step": 208
    },
    {
      "epoch": 0.03185004571776897,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001999982459815542,
      "loss": 1.1077,
      "step": 209
    },
    {
      "epoch": 0.03200243828101189,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019999794146550767,
      "loss": 0.8409,
      "step": 210
    },
    {
      "epoch": 0.0321548308442548,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001999976125885246,
      "loss": 1.1005,
      "step": 211
    },
    {
      "epoch": 0.03230722340749771,
      "grad_norm": 0.875,
      "learning_rate": 0.00019999725935068515,
      "loss": 1.0082,
      "step": 212
    },
    {
      "epoch": 0.03245961597074063,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001999968817520753,
      "loss": 1.1599,
      "step": 213
    },
    {
      "epoch": 0.03261200853398354,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019999647979278707,
      "loss": 0.9409,
      "step": 214
    },
    {
      "epoch": 0.032764401097226455,
      "grad_norm": 1.4609375,
      "learning_rate": 0.00019999605347291842,
      "loss": 0.9594,
      "step": 215
    },
    {
      "epoch": 0.032916793660469366,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019999560279257314,
      "loss": 1.066,
      "step": 216
    },
    {
      "epoch": 0.033069186223712285,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00019999512775186108,
      "loss": 0.7921,
      "step": 217
    },
    {
      "epoch": 0.0332215787869552,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019999462835089792,
      "loss": 1.0295,
      "step": 218
    },
    {
      "epoch": 0.03337397135019811,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019999410458980537,
      "loss": 1.0372,
      "step": 219
    },
    {
      "epoch": 0.03352636391344102,
      "grad_norm": 0.6875,
      "learning_rate": 0.000199993556468711,
      "loss": 0.8258,
      "step": 220
    },
    {
      "epoch": 0.03367875647668394,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019999298398774832,
      "loss": 1.0693,
      "step": 221
    },
    {
      "epoch": 0.03383114903992685,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00019999238714705684,
      "loss": 0.9111,
      "step": 222
    },
    {
      "epoch": 0.03398354160316976,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019999176594678191,
      "loss": 1.0711,
      "step": 223
    },
    {
      "epoch": 0.03413593416641268,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001999911203870749,
      "loss": 0.9229,
      "step": 224
    },
    {
      "epoch": 0.034288326729655594,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019999045046809307,
      "loss": 0.9824,
      "step": 225
    },
    {
      "epoch": 0.034440719292898506,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019998975618999961,
      "loss": 0.9255,
      "step": 226
    },
    {
      "epoch": 0.03459311185614142,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019998903755296367,
      "loss": 1.0798,
      "step": 227
    },
    {
      "epoch": 0.034745504419384336,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001999882945571603,
      "loss": 0.9189,
      "step": 228
    },
    {
      "epoch": 0.03489789698262725,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019998752720277053,
      "loss": 0.9213,
      "step": 229
    },
    {
      "epoch": 0.03505028954587016,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001999867354899813,
      "loss": 0.9826,
      "step": 230
    },
    {
      "epoch": 0.03520268210911308,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019998591941898543,
      "loss": 1.0365,
      "step": 231
    },
    {
      "epoch": 0.03535507467235599,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001999850789899818,
      "loss": 1.0672,
      "step": 232
    },
    {
      "epoch": 0.0355074672355989,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001999842142031751,
      "loss": 0.9586,
      "step": 233
    },
    {
      "epoch": 0.035659859798841814,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019998332505877605,
      "loss": 0.9838,
      "step": 234
    },
    {
      "epoch": 0.03581225236208473,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001999824115570012,
      "loss": 0.8488,
      "step": 235
    },
    {
      "epoch": 0.035964644925327645,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019998147369807313,
      "loss": 1.1119,
      "step": 236
    },
    {
      "epoch": 0.03611703748857056,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001999805114822203,
      "loss": 1.1544,
      "step": 237
    },
    {
      "epoch": 0.03626943005181347,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00019997952490967712,
      "loss": 1.0722,
      "step": 238
    },
    {
      "epoch": 0.03642182261505639,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00019997851398068396,
      "loss": 1.143,
      "step": 239
    },
    {
      "epoch": 0.0365742151782993,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019997747869548706,
      "loss": 1.0654,
      "step": 240
    },
    {
      "epoch": 0.03672660774154221,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019997641905433869,
      "loss": 1.1018,
      "step": 241
    },
    {
      "epoch": 0.03687900030478513,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001999753350574969,
      "loss": 0.9869,
      "step": 242
    },
    {
      "epoch": 0.03703139286802804,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00019997422670522586,
      "loss": 1.0451,
      "step": 243
    },
    {
      "epoch": 0.03718378543127095,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019997309399779551,
      "loss": 0.9428,
      "step": 244
    },
    {
      "epoch": 0.037336177994513865,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019997193693548182,
      "loss": 0.7763,
      "step": 245
    },
    {
      "epoch": 0.037488570557756784,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019997075551856668,
      "loss": 1.0506,
      "step": 246
    },
    {
      "epoch": 0.037640963120999696,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001999695497473379,
      "loss": 1.0215,
      "step": 247
    },
    {
      "epoch": 0.03779335568424261,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001999683196220892,
      "loss": 0.8834,
      "step": 248
    },
    {
      "epoch": 0.03794574824748552,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019996706514312026,
      "loss": 0.8865,
      "step": 249
    },
    {
      "epoch": 0.03809814081072844,
      "grad_norm": 1.4140625,
      "learning_rate": 0.00019996578631073674,
      "loss": 1.1014,
      "step": 250
    },
    {
      "epoch": 0.03825053337397135,
      "grad_norm": 0.72265625,
      "learning_rate": 0.0001999644831252501,
      "loss": 0.8861,
      "step": 251
    },
    {
      "epoch": 0.03840292593721426,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019996315558697787,
      "loss": 0.998,
      "step": 252
    },
    {
      "epoch": 0.03855531850045718,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019996180369624345,
      "loss": 0.9546,
      "step": 253
    },
    {
      "epoch": 0.03870771106370009,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019996042745337617,
      "loss": 0.8575,
      "step": 254
    },
    {
      "epoch": 0.038860103626943004,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001999590268587113,
      "loss": 0.9153,
      "step": 255
    },
    {
      "epoch": 0.039012496190185916,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019995760191259004,
      "loss": 1.0723,
      "step": 256
    },
    {
      "epoch": 0.039164888753428835,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019995615261535953,
      "loss": 0.9129,
      "step": 257
    },
    {
      "epoch": 0.03931728131667175,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019995467896737287,
      "loss": 0.9207,
      "step": 258
    },
    {
      "epoch": 0.03946967387991466,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00019995318096898906,
      "loss": 0.998,
      "step": 259
    },
    {
      "epoch": 0.03962206644315757,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00019995165862057298,
      "loss": 0.8226,
      "step": 260
    },
    {
      "epoch": 0.03977445900640049,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019995011192249553,
      "loss": 1.0203,
      "step": 261
    },
    {
      "epoch": 0.0399268515696434,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001999485408751335,
      "loss": 0.8918,
      "step": 262
    },
    {
      "epoch": 0.04007924413288631,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019994694547886965,
      "loss": 0.9566,
      "step": 263
    },
    {
      "epoch": 0.04023163669612923,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00019994532573409262,
      "loss": 1.282,
      "step": 264
    },
    {
      "epoch": 0.040384029259372144,
      "grad_norm": 0.7890625,
      "learning_rate": 0.000199943681641197,
      "loss": 0.921,
      "step": 265
    },
    {
      "epoch": 0.040536421822615055,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019994201320058328,
      "loss": 0.9392,
      "step": 266
    },
    {
      "epoch": 0.04068881438585797,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019994032041265798,
      "loss": 1.1203,
      "step": 267
    },
    {
      "epoch": 0.040841206949100886,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019993860327783346,
      "loss": 0.9084,
      "step": 268
    },
    {
      "epoch": 0.0409935995123438,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019993686179652805,
      "loss": 0.8947,
      "step": 269
    },
    {
      "epoch": 0.04114599207558671,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00019993509596916598,
      "loss": 0.9161,
      "step": 270
    },
    {
      "epoch": 0.04129838463882963,
      "grad_norm": 0.71875,
      "learning_rate": 0.00019993330579617743,
      "loss": 0.9358,
      "step": 271
    },
    {
      "epoch": 0.04145077720207254,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019993149127799851,
      "loss": 0.9136,
      "step": 272
    },
    {
      "epoch": 0.04160316976531545,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001999296524150713,
      "loss": 0.8133,
      "step": 273
    },
    {
      "epoch": 0.041755562328558364,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019992778920784371,
      "loss": 0.9302,
      "step": 274
    },
    {
      "epoch": 0.04190795489180128,
      "grad_norm": 1.203125,
      "learning_rate": 0.00019992590165676975,
      "loss": 1.1677,
      "step": 275
    },
    {
      "epoch": 0.042060347455044195,
      "grad_norm": 1.1875,
      "learning_rate": 0.00019992398976230913,
      "loss": 1.0898,
      "step": 276
    },
    {
      "epoch": 0.04221274001828711,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019992205352492768,
      "loss": 0.8355,
      "step": 277
    },
    {
      "epoch": 0.04236513258153002,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001999200929450971,
      "loss": 0.9893,
      "step": 278
    },
    {
      "epoch": 0.04251752514477294,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019991810802329497,
      "loss": 0.9623,
      "step": 279
    },
    {
      "epoch": 0.04266991770801585,
      "grad_norm": 1.2421875,
      "learning_rate": 0.0001999160987600049,
      "loss": 1.0089,
      "step": 280
    },
    {
      "epoch": 0.04282231027125876,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019991406515571636,
      "loss": 1.0584,
      "step": 281
    },
    {
      "epoch": 0.04297470283450168,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019991200721092476,
      "loss": 1.1137,
      "step": 282
    },
    {
      "epoch": 0.04312709539774459,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001999099249261314,
      "loss": 1.0398,
      "step": 283
    },
    {
      "epoch": 0.0432794879609875,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019990781830184364,
      "loss": 0.8726,
      "step": 284
    },
    {
      "epoch": 0.043431880524230415,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019990568733857465,
      "loss": 1.0769,
      "step": 285
    },
    {
      "epoch": 0.043584273087473334,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019990353203684354,
      "loss": 0.9464,
      "step": 286
    },
    {
      "epoch": 0.043736665650716246,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019990135239717537,
      "loss": 0.9359,
      "step": 287
    },
    {
      "epoch": 0.04388905821395916,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019989914842010115,
      "loss": 1.0602,
      "step": 288
    },
    {
      "epoch": 0.04404145077720207,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019989692010615785,
      "loss": 0.9259,
      "step": 289
    },
    {
      "epoch": 0.04419384334044499,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001998946674558882,
      "loss": 0.9085,
      "step": 290
    },
    {
      "epoch": 0.0443462359036879,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019989239046984107,
      "loss": 1.1952,
      "step": 291
    },
    {
      "epoch": 0.04449862846693081,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019989008914857116,
      "loss": 0.9005,
      "step": 292
    },
    {
      "epoch": 0.04465102103017373,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019988776349263906,
      "loss": 0.906,
      "step": 293
    },
    {
      "epoch": 0.04480341359341664,
      "grad_norm": 0.99609375,
      "learning_rate": 0.0001998854135026114,
      "loss": 1.0398,
      "step": 294
    },
    {
      "epoch": 0.044955806156659554,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001998830391790606,
      "loss": 0.9734,
      "step": 295
    },
    {
      "epoch": 0.045108198719902466,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019988064052256513,
      "loss": 0.9427,
      "step": 296
    },
    {
      "epoch": 0.045260591283145385,
      "grad_norm": 0.6875,
      "learning_rate": 0.0001998782175337093,
      "loss": 0.8479,
      "step": 297
    },
    {
      "epoch": 0.0454129838463883,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001998757702130834,
      "loss": 0.9326,
      "step": 298
    },
    {
      "epoch": 0.04556537640963121,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00019987329856128368,
      "loss": 0.9602,
      "step": 299
    },
    {
      "epoch": 0.04571776897287412,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019987080257891218,
      "loss": 1.085,
      "step": 300
    },
    {
      "epoch": 0.04587016153611704,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019986828226657705,
      "loss": 0.8767,
      "step": 301
    },
    {
      "epoch": 0.04602255409935995,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019986573762489215,
      "loss": 0.943,
      "step": 302
    },
    {
      "epoch": 0.04617494666260286,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019986316865447753,
      "loss": 1.1201,
      "step": 303
    },
    {
      "epoch": 0.04632733922584578,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019986057535595898,
      "loss": 0.9256,
      "step": 304
    },
    {
      "epoch": 0.046479731789088694,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019985795772996823,
      "loss": 1.0312,
      "step": 305
    },
    {
      "epoch": 0.046632124352331605,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019985531577714296,
      "loss": 0.9609,
      "step": 306
    },
    {
      "epoch": 0.04678451691557452,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019985264949812684,
      "loss": 0.8905,
      "step": 307
    },
    {
      "epoch": 0.046936909478817436,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019984995889356944,
      "loss": 0.9086,
      "step": 308
    },
    {
      "epoch": 0.04708930204206035,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019984724396412615,
      "loss": 1.0363,
      "step": 309
    },
    {
      "epoch": 0.04724169460530326,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019984450471045842,
      "loss": 1.0319,
      "step": 310
    },
    {
      "epoch": 0.04739408716854617,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019984174113323353,
      "loss": 1.0892,
      "step": 311
    },
    {
      "epoch": 0.04754647973178909,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019983895323312477,
      "loss": 0.7884,
      "step": 312
    },
    {
      "epoch": 0.047698872295032,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019983614101081124,
      "loss": 0.9175,
      "step": 313
    },
    {
      "epoch": 0.047851264858274914,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019983330446697814,
      "loss": 1.1251,
      "step": 314
    },
    {
      "epoch": 0.04800365742151783,
      "grad_norm": 1.0,
      "learning_rate": 0.00019983044360231644,
      "loss": 1.2414,
      "step": 315
    },
    {
      "epoch": 0.048156049984760745,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019982755841752307,
      "loss": 0.852,
      "step": 316
    },
    {
      "epoch": 0.048308442548003656,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019982464891330092,
      "loss": 1.0165,
      "step": 317
    },
    {
      "epoch": 0.04846083511124657,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019982171509035882,
      "loss": 0.8185,
      "step": 318
    },
    {
      "epoch": 0.04861322767448949,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019981875694941145,
      "loss": 1.0755,
      "step": 319
    },
    {
      "epoch": 0.0487656202377324,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019981577449117947,
      "loss": 1.1725,
      "step": 320
    },
    {
      "epoch": 0.04891801280097531,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019981276771638943,
      "loss": 0.9712,
      "step": 321
    },
    {
      "epoch": 0.04907040536421823,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019980973662577384,
      "loss": 1.1417,
      "step": 322
    },
    {
      "epoch": 0.04922279792746114,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019980668122007115,
      "loss": 1.0349,
      "step": 323
    },
    {
      "epoch": 0.04937519049070405,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019980360150002565,
      "loss": 1.0212,
      "step": 324
    },
    {
      "epoch": 0.049527583053946965,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019980049746638761,
      "loss": 1.0339,
      "step": 325
    },
    {
      "epoch": 0.049679975617189884,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019979736911991328,
      "loss": 0.7988,
      "step": 326
    },
    {
      "epoch": 0.049832368180432796,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019979421646136464,
      "loss": 1.0533,
      "step": 327
    },
    {
      "epoch": 0.04998476074367571,
      "grad_norm": 1.5625,
      "learning_rate": 0.00019979103949150986,
      "loss": 1.1661,
      "step": 328
    },
    {
      "epoch": 0.05013715330691862,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019978783821112282,
      "loss": 0.9181,
      "step": 329
    },
    {
      "epoch": 0.05028954587016154,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019978461262098343,
      "loss": 0.9991,
      "step": 330
    },
    {
      "epoch": 0.05044193843340445,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019978136272187747,
      "loss": 1.0475,
      "step": 331
    },
    {
      "epoch": 0.05059433099664736,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019977808851459665,
      "loss": 0.9103,
      "step": 332
    },
    {
      "epoch": 0.05074672355989028,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019977478999993865,
      "loss": 0.922,
      "step": 333
    },
    {
      "epoch": 0.05089911612313319,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019977146717870702,
      "loss": 1.0749,
      "step": 334
    },
    {
      "epoch": 0.051051508686376104,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019976812005171124,
      "loss": 0.9866,
      "step": 335
    },
    {
      "epoch": 0.051203901249619016,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019976474861976672,
      "loss": 0.8576,
      "step": 336
    },
    {
      "epoch": 0.051356293812861935,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00019976135288369483,
      "loss": 0.8456,
      "step": 337
    },
    {
      "epoch": 0.05150868637610485,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019975793284432273,
      "loss": 1.1343,
      "step": 338
    },
    {
      "epoch": 0.05166107893934776,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019975448850248368,
      "loss": 1.1164,
      "step": 339
    },
    {
      "epoch": 0.05181347150259067,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001997510198590167,
      "loss": 0.9002,
      "step": 340
    },
    {
      "epoch": 0.05196586406583359,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019974752691476687,
      "loss": 1.1543,
      "step": 341
    },
    {
      "epoch": 0.0521182566290765,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001997440096705851,
      "loss": 1.002,
      "step": 342
    },
    {
      "epoch": 0.05227064919231941,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001997404681273282,
      "loss": 1.0487,
      "step": 343
    },
    {
      "epoch": 0.05242304175556233,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019973690228585898,
      "loss": 1.0186,
      "step": 344
    },
    {
      "epoch": 0.05257543431880524,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00019973331214704612,
      "loss": 1.0291,
      "step": 345
    },
    {
      "epoch": 0.052727826882048155,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00019972969771176423,
      "loss": 0.9423,
      "step": 346
    },
    {
      "epoch": 0.05288021944529107,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019972605898089385,
      "loss": 0.9147,
      "step": 347
    },
    {
      "epoch": 0.053032612008533986,
      "grad_norm": 1.1171875,
      "learning_rate": 0.0001997223959553214,
      "loss": 1.0007,
      "step": 348
    },
    {
      "epoch": 0.0531850045717769,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019971870863593925,
      "loss": 0.9447,
      "step": 349
    },
    {
      "epoch": 0.05333739713501981,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019971499702364572,
      "loss": 0.9947,
      "step": 350
    },
    {
      "epoch": 0.05348978969826272,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019971126111934496,
      "loss": 0.9578,
      "step": 351
    },
    {
      "epoch": 0.05364218226150564,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019970750092394716,
      "loss": 0.924,
      "step": 352
    },
    {
      "epoch": 0.05379457482474855,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019970371643836826,
      "loss": 0.9198,
      "step": 353
    },
    {
      "epoch": 0.053946967387991464,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019969990766353031,
      "loss": 0.9998,
      "step": 354
    },
    {
      "epoch": 0.05409935995123438,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019969607460036115,
      "loss": 0.9067,
      "step": 355
    },
    {
      "epoch": 0.054251752514477294,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019969221724979453,
      "loss": 1.1258,
      "step": 356
    },
    {
      "epoch": 0.054404145077720206,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019968833561277022,
      "loss": 1.1187,
      "step": 357
    },
    {
      "epoch": 0.05455653764096312,
      "grad_norm": 1.2890625,
      "learning_rate": 0.00019968442969023377,
      "loss": 1.1537,
      "step": 358
    },
    {
      "epoch": 0.05470893020420604,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019968049948313678,
      "loss": 0.8835,
      "step": 359
    },
    {
      "epoch": 0.05486132276744895,
      "grad_norm": 1.140625,
      "learning_rate": 0.00019967654499243668,
      "loss": 1.0197,
      "step": 360
    },
    {
      "epoch": 0.05501371533069186,
      "grad_norm": 0.67578125,
      "learning_rate": 0.00019967256621909686,
      "loss": 0.9618,
      "step": 361
    },
    {
      "epoch": 0.05516610789393478,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019966856316408659,
      "loss": 0.9653,
      "step": 362
    },
    {
      "epoch": 0.05531850045717769,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019966453582838107,
      "loss": 1.035,
      "step": 363
    },
    {
      "epoch": 0.0554708930204206,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019966048421296142,
      "loss": 0.8922,
      "step": 364
    },
    {
      "epoch": 0.055623285583663515,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019965640831881465,
      "loss": 1.0036,
      "step": 365
    },
    {
      "epoch": 0.055775678146906434,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019965230814693373,
      "loss": 0.9913,
      "step": 366
    },
    {
      "epoch": 0.055928070710149345,
      "grad_norm": 0.875,
      "learning_rate": 0.00019964818369831754,
      "loss": 0.8536,
      "step": 367
    },
    {
      "epoch": 0.05608046327339226,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019964403497397084,
      "loss": 1.121,
      "step": 368
    },
    {
      "epoch": 0.05623285583663517,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019963986197490429,
      "loss": 1.0349,
      "step": 369
    },
    {
      "epoch": 0.05638524839987809,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019963566470213454,
      "loss": 0.8498,
      "step": 370
    },
    {
      "epoch": 0.056537640963121,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019963144315668407,
      "loss": 0.8778,
      "step": 371
    },
    {
      "epoch": 0.05669003352636391,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00019962719733958133,
      "loss": 1.0433,
      "step": 372
    },
    {
      "epoch": 0.05684242608960683,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019962292725186066,
      "loss": 1.0183,
      "step": 373
    },
    {
      "epoch": 0.05699481865284974,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019961863289456226,
      "loss": 0.8186,
      "step": 374
    },
    {
      "epoch": 0.057147211216092654,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019961431426873238,
      "loss": 0.8383,
      "step": 375
    },
    {
      "epoch": 0.057299603779335566,
      "grad_norm": 0.875,
      "learning_rate": 0.00019960997137542308,
      "loss": 0.8188,
      "step": 376
    },
    {
      "epoch": 0.057451996342578485,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00019960560421569231,
      "loss": 0.8916,
      "step": 377
    },
    {
      "epoch": 0.057604388905821396,
      "grad_norm": 0.8984375,
      "learning_rate": 0.000199601212790604,
      "loss": 1.0563,
      "step": 378
    },
    {
      "epoch": 0.05775678146906431,
      "grad_norm": 0.765625,
      "learning_rate": 0.00019959679710122798,
      "loss": 0.9432,
      "step": 379
    },
    {
      "epoch": 0.05790917403230722,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019959235714863996,
      "loss": 1.0996,
      "step": 380
    },
    {
      "epoch": 0.05806156659555014,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019958789293392156,
      "loss": 0.9133,
      "step": 381
    },
    {
      "epoch": 0.05821395915879305,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019958340445816033,
      "loss": 1.1101,
      "step": 382
    },
    {
      "epoch": 0.05836635172203596,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019957889172244974,
      "loss": 1.018,
      "step": 383
    },
    {
      "epoch": 0.05851874428527888,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019957435472788918,
      "loss": 1.0342,
      "step": 384
    },
    {
      "epoch": 0.05867113684852179,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019956979347558388,
      "loss": 0.8097,
      "step": 385
    },
    {
      "epoch": 0.058823529411764705,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019956520796664506,
      "loss": 0.9413,
      "step": 386
    },
    {
      "epoch": 0.05897592197500762,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019956059820218982,
      "loss": 0.824,
      "step": 387
    },
    {
      "epoch": 0.059128314538250536,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019955596418334116,
      "loss": 0.9896,
      "step": 388
    },
    {
      "epoch": 0.05928070710149345,
      "grad_norm": 1.21875,
      "learning_rate": 0.00019955130591122795,
      "loss": 0.9605,
      "step": 389
    },
    {
      "epoch": 0.05943309966473636,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019954662338698503,
      "loss": 1.0222,
      "step": 390
    },
    {
      "epoch": 0.05958549222797927,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019954191661175318,
      "loss": 0.8537,
      "step": 391
    },
    {
      "epoch": 0.05973788479122219,
      "grad_norm": 1.0390625,
      "learning_rate": 0.000199537185586679,
      "loss": 0.9052,
      "step": 392
    },
    {
      "epoch": 0.0598902773544651,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019953243031291503,
      "loss": 1.1847,
      "step": 393
    },
    {
      "epoch": 0.060042669917708014,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019952765079161977,
      "loss": 1.0655,
      "step": 394
    },
    {
      "epoch": 0.06019506248095093,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001995228470239575,
      "loss": 0.9553,
      "step": 395
    },
    {
      "epoch": 0.060347455044193844,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019951801901109856,
      "loss": 1.0424,
      "step": 396
    },
    {
      "epoch": 0.060499847607436756,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019951316675421906,
      "loss": 0.9366,
      "step": 397
    },
    {
      "epoch": 0.06065224017067967,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019950829025450114,
      "loss": 0.9997,
      "step": 398
    },
    {
      "epoch": 0.06080463273392259,
      "grad_norm": 0.875,
      "learning_rate": 0.00019950338951313276,
      "loss": 0.9607,
      "step": 399
    },
    {
      "epoch": 0.0609570252971655,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001994984645313078,
      "loss": 0.9494,
      "step": 400
    },
    {
      "epoch": 0.06110941786040841,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019949351531022607,
      "loss": 1.097,
      "step": 401
    },
    {
      "epoch": 0.06126181042365133,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019948854185109326,
      "loss": 1.0597,
      "step": 402
    },
    {
      "epoch": 0.06141420298689424,
      "grad_norm": 1.0234375,
      "learning_rate": 0.000199483544155121,
      "loss": 1.1185,
      "step": 403
    },
    {
      "epoch": 0.06156659555013715,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001994785222235268,
      "loss": 1.0865,
      "step": 404
    },
    {
      "epoch": 0.061718988113380065,
      "grad_norm": 0.953125,
      "learning_rate": 0.000199473476057534,
      "loss": 0.9695,
      "step": 405
    },
    {
      "epoch": 0.06187138067662298,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019946840565837203,
      "loss": 0.9979,
      "step": 406
    },
    {
      "epoch": 0.062023773239865895,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019946331102727605,
      "loss": 0.9028,
      "step": 407
    },
    {
      "epoch": 0.06217616580310881,
      "grad_norm": 1.203125,
      "learning_rate": 0.00019945819216548718,
      "loss": 1.0653,
      "step": 408
    },
    {
      "epoch": 0.06232855836635172,
      "grad_norm": 1.328125,
      "learning_rate": 0.00019945304907425246,
      "loss": 1.038,
      "step": 409
    },
    {
      "epoch": 0.06248095092959464,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019944788175482484,
      "loss": 0.9478,
      "step": 410
    },
    {
      "epoch": 0.06263334349283754,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00019944269020846314,
      "loss": 0.9297,
      "step": 411
    },
    {
      "epoch": 0.06278573605608047,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001994374744364321,
      "loss": 1.0548,
      "step": 412
    },
    {
      "epoch": 0.06293812861932338,
      "grad_norm": 0.703125,
      "learning_rate": 0.0001994322344400023,
      "loss": 1.0419,
      "step": 413
    },
    {
      "epoch": 0.06309052118256629,
      "grad_norm": 1.0,
      "learning_rate": 0.0001994269702204504,
      "loss": 0.842,
      "step": 414
    },
    {
      "epoch": 0.0632429137458092,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019942168177905875,
      "loss": 0.9452,
      "step": 415
    },
    {
      "epoch": 0.06339530630905212,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019941636911711567,
      "loss": 0.8936,
      "step": 416
    },
    {
      "epoch": 0.06354769887229503,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001994110322359155,
      "loss": 0.9496,
      "step": 417
    },
    {
      "epoch": 0.06370009143553794,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019940567113675828,
      "loss": 1.0961,
      "step": 418
    },
    {
      "epoch": 0.06385248399878087,
      "grad_norm": 1.140625,
      "learning_rate": 0.0001994002858209501,
      "loss": 1.1257,
      "step": 419
    },
    {
      "epoch": 0.06400487656202378,
      "grad_norm": 1.359375,
      "learning_rate": 0.00019939487628980288,
      "loss": 1.1337,
      "step": 420
    },
    {
      "epoch": 0.06415726912526669,
      "grad_norm": 1.15625,
      "learning_rate": 0.00019938944254463447,
      "loss": 1.0699,
      "step": 421
    },
    {
      "epoch": 0.0643096616885096,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001993839845867686,
      "loss": 0.8975,
      "step": 422
    },
    {
      "epoch": 0.06446205425175251,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019937850241753499,
      "loss": 0.9068,
      "step": 423
    },
    {
      "epoch": 0.06461444681499542,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00019937299603826902,
      "loss": 1.1045,
      "step": 424
    },
    {
      "epoch": 0.06476683937823834,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019936746545031223,
      "loss": 0.8842,
      "step": 425
    },
    {
      "epoch": 0.06491923194148126,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00019936191065501193,
      "loss": 0.837,
      "step": 426
    },
    {
      "epoch": 0.06507162450472417,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019935633165372137,
      "loss": 0.9144,
      "step": 427
    },
    {
      "epoch": 0.06522401706796709,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001993507284477996,
      "loss": 0.9266,
      "step": 428
    },
    {
      "epoch": 0.06537640963121,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019934510103861172,
      "loss": 1.0872,
      "step": 429
    },
    {
      "epoch": 0.06552880219445291,
      "grad_norm": 0.796875,
      "learning_rate": 0.00019933944942752865,
      "loss": 0.9141,
      "step": 430
    },
    {
      "epoch": 0.06568119475769582,
      "grad_norm": 1.15625,
      "learning_rate": 0.00019933377361592712,
      "loss": 1.1848,
      "step": 431
    },
    {
      "epoch": 0.06583358732093873,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001993280736051899,
      "loss": 1.0234,
      "step": 432
    },
    {
      "epoch": 0.06598597988418166,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00019932234939670562,
      "loss": 0.8515,
      "step": 433
    },
    {
      "epoch": 0.06613837244742457,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001993166009918687,
      "loss": 0.849,
      "step": 434
    },
    {
      "epoch": 0.06629076501066748,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001993108283920796,
      "loss": 0.9821,
      "step": 435
    },
    {
      "epoch": 0.0664431575739104,
      "grad_norm": 1.140625,
      "learning_rate": 0.00019930503159874458,
      "loss": 1.0171,
      "step": 436
    },
    {
      "epoch": 0.0665955501371533,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019929921061327585,
      "loss": 0.9689,
      "step": 437
    },
    {
      "epoch": 0.06674794270039622,
      "grad_norm": 1.125,
      "learning_rate": 0.00019929336543709147,
      "loss": 0.9135,
      "step": 438
    },
    {
      "epoch": 0.06690033526363913,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019928749607161538,
      "loss": 1.1167,
      "step": 439
    },
    {
      "epoch": 0.06705272782688204,
      "grad_norm": 1.09375,
      "learning_rate": 0.00019928160251827748,
      "loss": 1.0632,
      "step": 440
    },
    {
      "epoch": 0.06720512039012497,
      "grad_norm": 1.296875,
      "learning_rate": 0.0001992756847785135,
      "loss": 1.0173,
      "step": 441
    },
    {
      "epoch": 0.06735751295336788,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019926974285376512,
      "loss": 1.0806,
      "step": 442
    },
    {
      "epoch": 0.06750990551661079,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019926377674547985,
      "loss": 0.9264,
      "step": 443
    },
    {
      "epoch": 0.0676622980798537,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001992577864551111,
      "loss": 1.105,
      "step": 444
    },
    {
      "epoch": 0.06781469064309661,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019925177198411824,
      "loss": 1.1039,
      "step": 445
    },
    {
      "epoch": 0.06796708320633953,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019924573333396646,
      "loss": 1.1316,
      "step": 446
    },
    {
      "epoch": 0.06811947576958244,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019923967050612684,
      "loss": 0.9086,
      "step": 447
    },
    {
      "epoch": 0.06827186833282536,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001992335835020764,
      "loss": 0.9488,
      "step": 448
    },
    {
      "epoch": 0.06842426089606828,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019922747232329805,
      "loss": 0.9568,
      "step": 449
    },
    {
      "epoch": 0.06857665345931119,
      "grad_norm": 1.1640625,
      "learning_rate": 0.0001992213369712805,
      "loss": 1.0175,
      "step": 450
    },
    {
      "epoch": 0.0687290460225541,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019921517744751844,
      "loss": 0.7923,
      "step": 451
    },
    {
      "epoch": 0.06888143858579701,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019920899375351238,
      "loss": 0.9906,
      "step": 452
    },
    {
      "epoch": 0.06903383114903992,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00019920278589076882,
      "loss": 0.8194,
      "step": 453
    },
    {
      "epoch": 0.06918622371228283,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019919655386080006,
      "loss": 1.0738,
      "step": 454
    },
    {
      "epoch": 0.06933861627552576,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019919029766512426,
      "loss": 0.956,
      "step": 455
    },
    {
      "epoch": 0.06949100883876867,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019918401730526563,
      "loss": 0.8875,
      "step": 456
    },
    {
      "epoch": 0.06964340140201158,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019917771278275405,
      "loss": 0.7414,
      "step": 457
    },
    {
      "epoch": 0.0697957939652545,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001991713840991255,
      "loss": 1.0258,
      "step": 458
    },
    {
      "epoch": 0.06994818652849741,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019916503125592158,
      "loss": 0.9117,
      "step": 459
    },
    {
      "epoch": 0.07010057909174032,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019915865425469006,
      "loss": 0.869,
      "step": 460
    },
    {
      "epoch": 0.07025297165498323,
      "grad_norm": 0.94140625,
      "learning_rate": 0.0001991522530969845,
      "loss": 0.9099,
      "step": 461
    },
    {
      "epoch": 0.07040536421822616,
      "grad_norm": 1.15625,
      "learning_rate": 0.00019914582778436416,
      "loss": 1.0339,
      "step": 462
    },
    {
      "epoch": 0.07055775678146907,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001991393783183945,
      "loss": 1.0592,
      "step": 463
    },
    {
      "epoch": 0.07071014934471198,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001991329047006466,
      "loss": 1.0014,
      "step": 464
    },
    {
      "epoch": 0.07086254190795489,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019912640693269752,
      "loss": 0.8788,
      "step": 465
    },
    {
      "epoch": 0.0710149344711978,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001991198850161303,
      "loss": 0.7926,
      "step": 466
    },
    {
      "epoch": 0.07116732703444072,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001991133389525337,
      "loss": 1.0245,
      "step": 467
    },
    {
      "epoch": 0.07131971959768363,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019910676874350248,
      "loss": 0.9228,
      "step": 468
    },
    {
      "epoch": 0.07147211216092654,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019910017439063717,
      "loss": 0.9084,
      "step": 469
    },
    {
      "epoch": 0.07162450472416947,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019909355589554432,
      "loss": 1.0,
      "step": 470
    },
    {
      "epoch": 0.07177689728741238,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019908691325983626,
      "loss": 0.8339,
      "step": 471
    },
    {
      "epoch": 0.07192928985065529,
      "grad_norm": 1.15625,
      "learning_rate": 0.0001990802464851312,
      "loss": 1.1061,
      "step": 472
    },
    {
      "epoch": 0.0720816824138982,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00019907355557305335,
      "loss": 0.8941,
      "step": 473
    },
    {
      "epoch": 0.07223407497714111,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019906684052523263,
      "loss": 1.0641,
      "step": 474
    },
    {
      "epoch": 0.07238646754038403,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019906010134330499,
      "loss": 1.0931,
      "step": 475
    },
    {
      "epoch": 0.07253886010362694,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001990533380289121,
      "loss": 0.9464,
      "step": 476
    },
    {
      "epoch": 0.07269125266686986,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019904655058370168,
      "loss": 0.9419,
      "step": 477
    },
    {
      "epoch": 0.07284364523011277,
      "grad_norm": 0.7421875,
      "learning_rate": 0.0001990397390093272,
      "loss": 0.9849,
      "step": 478
    },
    {
      "epoch": 0.07299603779335569,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001990329033074481,
      "loss": 1.031,
      "step": 479
    },
    {
      "epoch": 0.0731484303565986,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00019902604347972965,
      "loss": 0.9186,
      "step": 480
    },
    {
      "epoch": 0.07330082291984151,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019901915952784296,
      "loss": 0.8674,
      "step": 481
    },
    {
      "epoch": 0.07345321548308442,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001990122514534651,
      "loss": 0.9273,
      "step": 482
    },
    {
      "epoch": 0.07360560804632733,
      "grad_norm": 1.40625,
      "learning_rate": 0.00019900531925827898,
      "loss": 1.0726,
      "step": 483
    },
    {
      "epoch": 0.07375800060957026,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019899836294397333,
      "loss": 1.0304,
      "step": 484
    },
    {
      "epoch": 0.07391039317281317,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019899138251224286,
      "loss": 1.0895,
      "step": 485
    },
    {
      "epoch": 0.07406278573605608,
      "grad_norm": 1.1328125,
      "learning_rate": 0.0001989843779647881,
      "loss": 1.0304,
      "step": 486
    },
    {
      "epoch": 0.074215178299299,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019897734930331544,
      "loss": 1.0151,
      "step": 487
    },
    {
      "epoch": 0.0743675708625419,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019897029652953717,
      "loss": 0.9714,
      "step": 488
    },
    {
      "epoch": 0.07451996342578482,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019896321964517143,
      "loss": 0.8082,
      "step": 489
    },
    {
      "epoch": 0.07467235598902773,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00019895611865194227,
      "loss": 1.0635,
      "step": 490
    },
    {
      "epoch": 0.07482474855227064,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00019894899355157964,
      "loss": 0.8819,
      "step": 491
    },
    {
      "epoch": 0.07497714111551357,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001989418443458192,
      "loss": 1.0908,
      "step": 492
    },
    {
      "epoch": 0.07512953367875648,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019893467103640272,
      "loss": 0.8612,
      "step": 493
    },
    {
      "epoch": 0.07528192624199939,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00019892747362507764,
      "loss": 1.208,
      "step": 494
    },
    {
      "epoch": 0.0754343188052423,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019892025211359741,
      "loss": 1.1525,
      "step": 495
    },
    {
      "epoch": 0.07558671136848522,
      "grad_norm": 0.765625,
      "learning_rate": 0.00019891300650372125,
      "loss": 0.9456,
      "step": 496
    },
    {
      "epoch": 0.07573910393172813,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019890573679721428,
      "loss": 0.9299,
      "step": 497
    },
    {
      "epoch": 0.07589149649497104,
      "grad_norm": 0.796875,
      "learning_rate": 0.00019889844299584758,
      "loss": 0.8221,
      "step": 498
    },
    {
      "epoch": 0.07604388905821396,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019889112510139797,
      "loss": 0.9054,
      "step": 499
    },
    {
      "epoch": 0.07619628162145688,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00019888378311564822,
      "loss": 1.127,
      "step": 500
    },
    {
      "epoch": 0.07634867418469979,
      "grad_norm": 1.0,
      "learning_rate": 0.00019887641704038688,
      "loss": 1.0342,
      "step": 501
    },
    {
      "epoch": 0.0765010667479427,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001988690268774085,
      "loss": 1.0887,
      "step": 502
    },
    {
      "epoch": 0.07665345931118561,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00019886161262851345,
      "loss": 0.9269,
      "step": 503
    },
    {
      "epoch": 0.07680585187442852,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00019885417429550787,
      "loss": 1.1444,
      "step": 504
    },
    {
      "epoch": 0.07695824443767144,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001988467118802039,
      "loss": 0.9132,
      "step": 505
    },
    {
      "epoch": 0.07711063700091436,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019883922538441946,
      "loss": 0.9949,
      "step": 506
    },
    {
      "epoch": 0.07726302956415727,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001988317148099784,
      "loss": 0.9268,
      "step": 507
    },
    {
      "epoch": 0.07741542212740019,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019882418015871036,
      "loss": 1.0005,
      "step": 508
    },
    {
      "epoch": 0.0775678146906431,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019881662143245092,
      "loss": 1.053,
      "step": 509
    },
    {
      "epoch": 0.07772020725388601,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001988090386330415,
      "loss": 1.0385,
      "step": 510
    },
    {
      "epoch": 0.07787259981712892,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019880143176232936,
      "loss": 0.9572,
      "step": 511
    },
    {
      "epoch": 0.07802499238037183,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019879380082216767,
      "loss": 1.0716,
      "step": 512
    },
    {
      "epoch": 0.07817738494361476,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019878614581441542,
      "loss": 0.9244,
      "step": 513
    },
    {
      "epoch": 0.07832977750685767,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019877846674093747,
      "loss": 0.8527,
      "step": 514
    },
    {
      "epoch": 0.07848217007010058,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019877076360360455,
      "loss": 0.8301,
      "step": 515
    },
    {
      "epoch": 0.0786345626333435,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001987630364042933,
      "loss": 0.9355,
      "step": 516
    },
    {
      "epoch": 0.0787869551965864,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019875528514488614,
      "loss": 0.8334,
      "step": 517
    },
    {
      "epoch": 0.07893934775982932,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019874750982727134,
      "loss": 0.987,
      "step": 518
    },
    {
      "epoch": 0.07909174032307223,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019873971045334318,
      "loss": 1.106,
      "step": 519
    },
    {
      "epoch": 0.07924413288631514,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019873188702500163,
      "loss": 0.9208,
      "step": 520
    },
    {
      "epoch": 0.07939652544955807,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019872403954415262,
      "loss": 1.1444,
      "step": 521
    },
    {
      "epoch": 0.07954891801280098,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001987161680127079,
      "loss": 0.9838,
      "step": 522
    },
    {
      "epoch": 0.07970131057604389,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019870827243258509,
      "loss": 0.9818,
      "step": 523
    },
    {
      "epoch": 0.0798537031392868,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019870035280570764,
      "loss": 0.9842,
      "step": 524
    },
    {
      "epoch": 0.08000609570252971,
      "grad_norm": 1.1875,
      "learning_rate": 0.00019869240913400496,
      "loss": 1.0893,
      "step": 525
    },
    {
      "epoch": 0.08015848826577263,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00019868444141941214,
      "loss": 0.9572,
      "step": 526
    },
    {
      "epoch": 0.08031088082901554,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001986764496638703,
      "loss": 1.1139,
      "step": 527
    },
    {
      "epoch": 0.08046327339225846,
      "grad_norm": 0.765625,
      "learning_rate": 0.00019866843386932633,
      "loss": 0.8722,
      "step": 528
    },
    {
      "epoch": 0.08061566595550138,
      "grad_norm": 1.125,
      "learning_rate": 0.000198660394037733,
      "loss": 0.9633,
      "step": 529
    },
    {
      "epoch": 0.08076805851874429,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019865233017104893,
      "loss": 1.0017,
      "step": 530
    },
    {
      "epoch": 0.0809204510819872,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019864424227123854,
      "loss": 0.9469,
      "step": 531
    },
    {
      "epoch": 0.08107284364523011,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019863613034027224,
      "loss": 1.0051,
      "step": 532
    },
    {
      "epoch": 0.08122523620847302,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001986279943801262,
      "loss": 1.022,
      "step": 533
    },
    {
      "epoch": 0.08137762877171593,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019861983439278238,
      "loss": 0.9996,
      "step": 534
    },
    {
      "epoch": 0.08153002133495886,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019861165038022874,
      "loss": 0.992,
      "step": 535
    },
    {
      "epoch": 0.08168241389820177,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019860344234445902,
      "loss": 1.055,
      "step": 536
    },
    {
      "epoch": 0.08183480646144468,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019859521028747277,
      "loss": 1.1829,
      "step": 537
    },
    {
      "epoch": 0.0819871990246876,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019858695421127548,
      "loss": 0.7837,
      "step": 538
    },
    {
      "epoch": 0.08213959158793051,
      "grad_norm": 1.125,
      "learning_rate": 0.00019857867411787847,
      "loss": 1.0323,
      "step": 539
    },
    {
      "epoch": 0.08229198415117342,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019857037000929883,
      "loss": 1.1108,
      "step": 540
    },
    {
      "epoch": 0.08244437671441633,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001985620418875596,
      "loss": 1.0156,
      "step": 541
    },
    {
      "epoch": 0.08259676927765926,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001985536897546896,
      "loss": 1.0482,
      "step": 542
    },
    {
      "epoch": 0.08274916184090217,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019854531361272358,
      "loss": 1.042,
      "step": 543
    },
    {
      "epoch": 0.08290155440414508,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00019853691346370203,
      "loss": 0.9866,
      "step": 544
    },
    {
      "epoch": 0.08305394696738799,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019852848930967137,
      "loss": 0.9853,
      "step": 545
    },
    {
      "epoch": 0.0832063395306309,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019852004115268387,
      "loss": 0.9792,
      "step": 546
    },
    {
      "epoch": 0.08335873209387382,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001985115689947976,
      "loss": 0.9715,
      "step": 547
    },
    {
      "epoch": 0.08351112465711673,
      "grad_norm": 0.875,
      "learning_rate": 0.00019850307283807647,
      "loss": 0.9555,
      "step": 548
    },
    {
      "epoch": 0.08366351722035964,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019849455268459033,
      "loss": 1.1281,
      "step": 549
    },
    {
      "epoch": 0.08381590978360257,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019848600853641476,
      "loss": 1.0023,
      "step": 550
    },
    {
      "epoch": 0.08396830234684548,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019847744039563128,
      "loss": 1.0483,
      "step": 551
    },
    {
      "epoch": 0.08412069491008839,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019846884826432717,
      "loss": 1.061,
      "step": 552
    },
    {
      "epoch": 0.0842730874733313,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019846023214459561,
      "loss": 1.0615,
      "step": 553
    },
    {
      "epoch": 0.08442548003657421,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019845159203853562,
      "loss": 1.0384,
      "step": 554
    },
    {
      "epoch": 0.08457787259981712,
      "grad_norm": 1.6953125,
      "learning_rate": 0.00019844292794825207,
      "loss": 0.8072,
      "step": 555
    },
    {
      "epoch": 0.08473026516306004,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001984342398758556,
      "loss": 1.1538,
      "step": 556
    },
    {
      "epoch": 0.08488265772630296,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019842552782346282,
      "loss": 1.1008,
      "step": 557
    },
    {
      "epoch": 0.08503505028954587,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019841679179319606,
      "loss": 1.0507,
      "step": 558
    },
    {
      "epoch": 0.08518744285278879,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019840803178718358,
      "loss": 1.1768,
      "step": 559
    },
    {
      "epoch": 0.0853398354160317,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019839924780755942,
      "loss": 1.0087,
      "step": 560
    },
    {
      "epoch": 0.08549222797927461,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019839043985646346,
      "loss": 0.7699,
      "step": 561
    },
    {
      "epoch": 0.08564462054251752,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019838160793604148,
      "loss": 0.8262,
      "step": 562
    },
    {
      "epoch": 0.08579701310576043,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019837275204844505,
      "loss": 1.0332,
      "step": 563
    },
    {
      "epoch": 0.08594940566900336,
      "grad_norm": 0.6796875,
      "learning_rate": 0.0001983638721958316,
      "loss": 0.9089,
      "step": 564
    },
    {
      "epoch": 0.08610179823224627,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001983549683803644,
      "loss": 1.0308,
      "step": 565
    },
    {
      "epoch": 0.08625419079548918,
      "grad_norm": 1.3125,
      "learning_rate": 0.00019834604060421253,
      "loss": 1.0481,
      "step": 566
    },
    {
      "epoch": 0.0864065833587321,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019833708886955091,
      "loss": 0.9434,
      "step": 567
    },
    {
      "epoch": 0.086558975921975,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019832811317856033,
      "loss": 1.0835,
      "step": 568
    },
    {
      "epoch": 0.08671136848521792,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019831911353342742,
      "loss": 1.0718,
      "step": 569
    },
    {
      "epoch": 0.08686376104846083,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019831008993634458,
      "loss": 0.9544,
      "step": 570
    },
    {
      "epoch": 0.08701615361170374,
      "grad_norm": 0.875,
      "learning_rate": 0.0001983010423895101,
      "loss": 0.9481,
      "step": 571
    },
    {
      "epoch": 0.08716854617494667,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00019829197089512812,
      "loss": 1.0705,
      "step": 572
    },
    {
      "epoch": 0.08732093873818958,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019828287545540856,
      "loss": 1.0537,
      "step": 573
    },
    {
      "epoch": 0.08747333130143249,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001982737560725672,
      "loss": 0.9773,
      "step": 574
    },
    {
      "epoch": 0.0876257238646754,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019826461274882564,
      "loss": 0.8659,
      "step": 575
    },
    {
      "epoch": 0.08777811642791832,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019825544548641134,
      "loss": 1.0355,
      "step": 576
    },
    {
      "epoch": 0.08793050899116123,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001982462542875576,
      "loss": 1.0362,
      "step": 577
    },
    {
      "epoch": 0.08808290155440414,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019823703915450354,
      "loss": 1.0636,
      "step": 578
    },
    {
      "epoch": 0.08823529411764706,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019822780008949402,
      "loss": 1.0967,
      "step": 579
    },
    {
      "epoch": 0.08838768668088998,
      "grad_norm": 0.69921875,
      "learning_rate": 0.0001982185370947799,
      "loss": 0.8737,
      "step": 580
    },
    {
      "epoch": 0.08854007924413289,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001982092501726177,
      "loss": 0.897,
      "step": 581
    },
    {
      "epoch": 0.0886924718073758,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00019819993932526991,
      "loss": 1.0573,
      "step": 582
    },
    {
      "epoch": 0.08884486437061871,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019819060455500474,
      "loss": 1.0215,
      "step": 583
    },
    {
      "epoch": 0.08899725693386162,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019818124586409627,
      "loss": 1.0069,
      "step": 584
    },
    {
      "epoch": 0.08914964949710454,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019817186325482447,
      "loss": 0.857,
      "step": 585
    },
    {
      "epoch": 0.08930204206034746,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019816245672947503,
      "loss": 0.9841,
      "step": 586
    },
    {
      "epoch": 0.08945443462359037,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019815302629033957,
      "loss": 1.0189,
      "step": 587
    },
    {
      "epoch": 0.08960682718683328,
      "grad_norm": 0.71875,
      "learning_rate": 0.0001981435719397154,
      "loss": 0.9995,
      "step": 588
    },
    {
      "epoch": 0.0897592197500762,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00019813409367990578,
      "loss": 0.8471,
      "step": 589
    },
    {
      "epoch": 0.08991161231331911,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019812459151321977,
      "loss": 1.1081,
      "step": 590
    },
    {
      "epoch": 0.09006400487656202,
      "grad_norm": 0.6484375,
      "learning_rate": 0.00019811506544197216,
      "loss": 0.8528,
      "step": 591
    },
    {
      "epoch": 0.09021639743980493,
      "grad_norm": 0.78125,
      "learning_rate": 0.00019810551546848372,
      "loss": 1.121,
      "step": 592
    },
    {
      "epoch": 0.09036879000304786,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019809594159508092,
      "loss": 1.0512,
      "step": 593
    },
    {
      "epoch": 0.09052118256629077,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00019808634382409613,
      "loss": 1.1485,
      "step": 594
    },
    {
      "epoch": 0.09067357512953368,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019807672215786743,
      "loss": 1.094,
      "step": 595
    },
    {
      "epoch": 0.0908259676927766,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019806707659873887,
      "loss": 0.9736,
      "step": 596
    },
    {
      "epoch": 0.0909783602560195,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019805740714906023,
      "loss": 0.9697,
      "step": 597
    },
    {
      "epoch": 0.09113075281926242,
      "grad_norm": 0.71484375,
      "learning_rate": 0.0001980477138111871,
      "loss": 1.0001,
      "step": 598
    },
    {
      "epoch": 0.09128314538250533,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019803799658748094,
      "loss": 0.9342,
      "step": 599
    },
    {
      "epoch": 0.09143553794574824,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019802825548030902,
      "loss": 0.9098,
      "step": 600
    },
    {
      "epoch": 0.09158793050899117,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019801849049204436,
      "loss": 1.1722,
      "step": 601
    },
    {
      "epoch": 0.09174032307223408,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019800870162506589,
      "loss": 0.9732,
      "step": 602
    },
    {
      "epoch": 0.09189271563547699,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00019799888888175833,
      "loss": 0.871,
      "step": 603
    },
    {
      "epoch": 0.0920451081987199,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00019798905226451217,
      "loss": 0.9361,
      "step": 604
    },
    {
      "epoch": 0.09219750076196281,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019797919177572378,
      "loss": 1.0544,
      "step": 605
    },
    {
      "epoch": 0.09234989332520573,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001979693074177953,
      "loss": 0.9774,
      "step": 606
    },
    {
      "epoch": 0.09250228588844864,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019795939919313473,
      "loss": 1.0413,
      "step": 607
    },
    {
      "epoch": 0.09265467845169156,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00019794946710415584,
      "loss": 0.9527,
      "step": 608
    },
    {
      "epoch": 0.09280707101493448,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001979395111532782,
      "loss": 0.8993,
      "step": 609
    },
    {
      "epoch": 0.09295946357817739,
      "grad_norm": 1.296875,
      "learning_rate": 0.00019792953134292724,
      "loss": 0.99,
      "step": 610
    },
    {
      "epoch": 0.0931118561414203,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019791952767553422,
      "loss": 0.9517,
      "step": 611
    },
    {
      "epoch": 0.09326424870466321,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019790950015353612,
      "loss": 0.949,
      "step": 612
    },
    {
      "epoch": 0.09341664126790612,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019789944877937585,
      "loss": 0.9717,
      "step": 613
    },
    {
      "epoch": 0.09356903383114903,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019788937355550202,
      "loss": 0.9463,
      "step": 614
    },
    {
      "epoch": 0.09372142639439196,
      "grad_norm": 1.15625,
      "learning_rate": 0.0001978792744843691,
      "loss": 1.1192,
      "step": 615
    },
    {
      "epoch": 0.09387381895763487,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019786915156843742,
      "loss": 1.0325,
      "step": 616
    },
    {
      "epoch": 0.09402621152087778,
      "grad_norm": 0.7421875,
      "learning_rate": 0.000197859004810173,
      "loss": 0.951,
      "step": 617
    },
    {
      "epoch": 0.0941786040841207,
      "grad_norm": 1.0,
      "learning_rate": 0.00019784883421204778,
      "loss": 0.8444,
      "step": 618
    },
    {
      "epoch": 0.09433099664736361,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019783863977653948,
      "loss": 0.9385,
      "step": 619
    },
    {
      "epoch": 0.09448338921060652,
      "grad_norm": 1.25,
      "learning_rate": 0.00019782842150613158,
      "loss": 1.1502,
      "step": 620
    },
    {
      "epoch": 0.09463578177384943,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019781817940331338,
      "loss": 1.1029,
      "step": 621
    },
    {
      "epoch": 0.09478817433709234,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019780791347058006,
      "loss": 0.9424,
      "step": 622
    },
    {
      "epoch": 0.09494056690033527,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019779762371043247,
      "loss": 0.905,
      "step": 623
    },
    {
      "epoch": 0.09509295946357818,
      "grad_norm": 0.60546875,
      "learning_rate": 0.00019778731012537741,
      "loss": 0.7809,
      "step": 624
    },
    {
      "epoch": 0.09524535202682109,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001977769727179274,
      "loss": 0.8937,
      "step": 625
    },
    {
      "epoch": 0.095397744590064,
      "grad_norm": 1.0,
      "learning_rate": 0.0001977666114906008,
      "loss": 0.9387,
      "step": 626
    },
    {
      "epoch": 0.09555013715330692,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019775622644592171,
      "loss": 1.3385,
      "step": 627
    },
    {
      "epoch": 0.09570252971654983,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019774581758642007,
      "loss": 1.134,
      "step": 628
    },
    {
      "epoch": 0.09585492227979274,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019773538491463168,
      "loss": 0.9335,
      "step": 629
    },
    {
      "epoch": 0.09600731484303567,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019772492843309807,
      "loss": 0.9678,
      "step": 630
    },
    {
      "epoch": 0.09615970740627858,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001977144481443666,
      "loss": 1.014,
      "step": 631
    },
    {
      "epoch": 0.09631209996952149,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019770394405099038,
      "loss": 0.9523,
      "step": 632
    },
    {
      "epoch": 0.0964644925327644,
      "grad_norm": 1.125,
      "learning_rate": 0.0001976934161555284,
      "loss": 0.8816,
      "step": 633
    },
    {
      "epoch": 0.09661688509600731,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019768286446054532,
      "loss": 0.8709,
      "step": 634
    },
    {
      "epoch": 0.09676927765925022,
      "grad_norm": 1.09375,
      "learning_rate": 0.00019767228896861182,
      "loss": 1.0031,
      "step": 635
    },
    {
      "epoch": 0.09692167022249314,
      "grad_norm": 1.140625,
      "learning_rate": 0.00019766168968230415,
      "loss": 1.1712,
      "step": 636
    },
    {
      "epoch": 0.09707406278573606,
      "grad_norm": 1.140625,
      "learning_rate": 0.0001976510666042045,
      "loss": 1.0196,
      "step": 637
    },
    {
      "epoch": 0.09722645534897897,
      "grad_norm": 1.3125,
      "learning_rate": 0.00019764041973690074,
      "loss": 1.0052,
      "step": 638
    },
    {
      "epoch": 0.09737884791222189,
      "grad_norm": 0.7890625,
      "learning_rate": 0.0001976297490829867,
      "loss": 0.904,
      "step": 639
    },
    {
      "epoch": 0.0975312404754648,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00019761905464506176,
      "loss": 0.9643,
      "step": 640
    },
    {
      "epoch": 0.09768363303870771,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019760833642573137,
      "loss": 0.8407,
      "step": 641
    },
    {
      "epoch": 0.09783602560195062,
      "grad_norm": 1.125,
      "learning_rate": 0.0001975975944276066,
      "loss": 1.158,
      "step": 642
    },
    {
      "epoch": 0.09798841816519353,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019758682865330434,
      "loss": 0.8986,
      "step": 643
    },
    {
      "epoch": 0.09814081072843646,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00019757603910544727,
      "loss": 1.0501,
      "step": 644
    },
    {
      "epoch": 0.09829320329167937,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001975652257866639,
      "loss": 0.7535,
      "step": 645
    },
    {
      "epoch": 0.09844559585492228,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00019755438869958856,
      "loss": 0.9945,
      "step": 646
    },
    {
      "epoch": 0.0985979884181652,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019754352784686125,
      "loss": 0.9174,
      "step": 647
    },
    {
      "epoch": 0.0987503809814081,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019753264323112786,
      "loss": 1.1219,
      "step": 648
    },
    {
      "epoch": 0.09890277354465102,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019752173485504005,
      "loss": 1.0998,
      "step": 649
    },
    {
      "epoch": 0.09905516610789393,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001975108027212552,
      "loss": 1.0033,
      "step": 650
    },
    {
      "epoch": 0.09920755867113684,
      "grad_norm": 0.73828125,
      "learning_rate": 0.0001974998468324366,
      "loss": 0.8379,
      "step": 651
    },
    {
      "epoch": 0.09935995123437977,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019748886719125325,
      "loss": 0.8044,
      "step": 652
    },
    {
      "epoch": 0.09951234379762268,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001974778638003799,
      "loss": 0.9689,
      "step": 653
    },
    {
      "epoch": 0.09966473636086559,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019746683666249721,
      "loss": 1.073,
      "step": 654
    },
    {
      "epoch": 0.0998171289241085,
      "grad_norm": 0.703125,
      "learning_rate": 0.0001974557857802915,
      "loss": 1.0065,
      "step": 655
    },
    {
      "epoch": 0.09996952148735141,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019744471115645492,
      "loss": 1.0235,
      "step": 656
    },
    {
      "epoch": 0.10012191405059433,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019743361279368543,
      "loss": 0.901,
      "step": 657
    },
    {
      "epoch": 0.10027430661383724,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00019742249069468676,
      "loss": 1.2351,
      "step": 658
    },
    {
      "epoch": 0.10042669917708016,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001974113448621684,
      "loss": 0.9597,
      "step": 659
    },
    {
      "epoch": 0.10057909174032308,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001974001752988456,
      "loss": 1.0456,
      "step": 660
    },
    {
      "epoch": 0.10073148430356599,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019738898200743945,
      "loss": 0.8654,
      "step": 661
    },
    {
      "epoch": 0.1008838768668089,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019737776499067683,
      "loss": 0.903,
      "step": 662
    },
    {
      "epoch": 0.10103626943005181,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019736652425129034,
      "loss": 0.8555,
      "step": 663
    },
    {
      "epoch": 0.10118866199329472,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019735525979201838,
      "loss": 1.0654,
      "step": 664
    },
    {
      "epoch": 0.10134105455653764,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019734397161560514,
      "loss": 0.9203,
      "step": 665
    },
    {
      "epoch": 0.10149344711978056,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001973326597248006,
      "loss": 1.0565,
      "step": 666
    },
    {
      "epoch": 0.10164583968302347,
      "grad_norm": 1.546875,
      "learning_rate": 0.00019732132412236047,
      "loss": 1.0844,
      "step": 667
    },
    {
      "epoch": 0.10179823224626638,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019730996481104627,
      "loss": 1.0618,
      "step": 668
    },
    {
      "epoch": 0.1019506248095093,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019729858179362531,
      "loss": 0.9479,
      "step": 669
    },
    {
      "epoch": 0.10210301737275221,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019728717507287063,
      "loss": 0.9653,
      "step": 670
    },
    {
      "epoch": 0.10225540993599512,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001972757446515611,
      "loss": 0.8957,
      "step": 671
    },
    {
      "epoch": 0.10240780249923803,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001972642905324813,
      "loss": 1.1096,
      "step": 672
    },
    {
      "epoch": 0.10256019506248096,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019725281271842167,
      "loss": 1.0335,
      "step": 673
    },
    {
      "epoch": 0.10271258762572387,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019724131121217836,
      "loss": 0.892,
      "step": 674
    },
    {
      "epoch": 0.10286498018896678,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019722978601655324,
      "loss": 1.02,
      "step": 675
    },
    {
      "epoch": 0.1030173727522097,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00019721823713435404,
      "loss": 1.225,
      "step": 676
    },
    {
      "epoch": 0.1031697653154526,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019720666456839433,
      "loss": 0.9743,
      "step": 677
    },
    {
      "epoch": 0.10332215787869552,
      "grad_norm": 1.15625,
      "learning_rate": 0.00019719506832149318,
      "loss": 0.9516,
      "step": 678
    },
    {
      "epoch": 0.10347455044193843,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019718344839647576,
      "loss": 1.1194,
      "step": 679
    },
    {
      "epoch": 0.10362694300518134,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019717180479617277,
      "loss": 1.151,
      "step": 680
    },
    {
      "epoch": 0.10377933556842427,
      "grad_norm": 1.421875,
      "learning_rate": 0.00019716013752342078,
      "loss": 1.11,
      "step": 681
    },
    {
      "epoch": 0.10393172813166718,
      "grad_norm": 1.21875,
      "learning_rate": 0.0001971484465810621,
      "loss": 0.9864,
      "step": 682
    },
    {
      "epoch": 0.10408412069491009,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00019713673197194483,
      "loss": 1.1759,
      "step": 683
    },
    {
      "epoch": 0.104236513258153,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019712499369892275,
      "loss": 1.0367,
      "step": 684
    },
    {
      "epoch": 0.10438890582139591,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019711323176485555,
      "loss": 0.8981,
      "step": 685
    },
    {
      "epoch": 0.10454129838463883,
      "grad_norm": 1.109375,
      "learning_rate": 0.00019710144617260858,
      "loss": 1.0765,
      "step": 686
    },
    {
      "epoch": 0.10469369094788174,
      "grad_norm": 0.93359375,
      "learning_rate": 0.000197089636925053,
      "loss": 1.0446,
      "step": 687
    },
    {
      "epoch": 0.10484608351112466,
      "grad_norm": 1.3671875,
      "learning_rate": 0.00019707780402506567,
      "loss": 0.9462,
      "step": 688
    },
    {
      "epoch": 0.10499847607436757,
      "grad_norm": 0.875,
      "learning_rate": 0.00019706594747552925,
      "loss": 0.9668,
      "step": 689
    },
    {
      "epoch": 0.10515086863761049,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019705406727933223,
      "loss": 1.2528,
      "step": 690
    },
    {
      "epoch": 0.1053032612008534,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019704216343936873,
      "loss": 1.0603,
      "step": 691
    },
    {
      "epoch": 0.10545565376409631,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019703023595853876,
      "loss": 1.0395,
      "step": 692
    },
    {
      "epoch": 0.10560804632733922,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019701828483974796,
      "loss": 1.0723,
      "step": 693
    },
    {
      "epoch": 0.10576043889058213,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00019700631008590783,
      "loss": 1.0096,
      "step": 694
    },
    {
      "epoch": 0.10591283145382506,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001969943116999356,
      "loss": 0.9585,
      "step": 695
    },
    {
      "epoch": 0.10606522401706797,
      "grad_norm": 1.296875,
      "learning_rate": 0.00019698228968475422,
      "loss": 0.9611,
      "step": 696
    },
    {
      "epoch": 0.10621761658031088,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019697024404329244,
      "loss": 0.9423,
      "step": 697
    },
    {
      "epoch": 0.1063700091435538,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019695817477848477,
      "loss": 0.8296,
      "step": 698
    },
    {
      "epoch": 0.10652240170679671,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019694608189327144,
      "loss": 0.9179,
      "step": 699
    },
    {
      "epoch": 0.10667479427003962,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019693396539059843,
      "loss": 1.0248,
      "step": 700
    },
    {
      "epoch": 0.10682718683328253,
      "grad_norm": 0.78125,
      "learning_rate": 0.00019692182527341755,
      "loss": 0.8862,
      "step": 701
    },
    {
      "epoch": 0.10697957939652544,
      "grad_norm": 1.3125,
      "learning_rate": 0.00019690966154468624,
      "loss": 1.1074,
      "step": 702
    },
    {
      "epoch": 0.10713197195976837,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001968974742073678,
      "loss": 0.9496,
      "step": 703
    },
    {
      "epoch": 0.10728436452301128,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019688526326443127,
      "loss": 0.8793,
      "step": 704
    },
    {
      "epoch": 0.10743675708625419,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019687302871885132,
      "loss": 1.0884,
      "step": 705
    },
    {
      "epoch": 0.1075891496494971,
      "grad_norm": 1.28125,
      "learning_rate": 0.0001968607705736085,
      "loss": 0.9501,
      "step": 706
    },
    {
      "epoch": 0.10774154221274002,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019684848883168914,
      "loss": 1.0861,
      "step": 707
    },
    {
      "epoch": 0.10789393477598293,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001968361834960852,
      "loss": 0.8762,
      "step": 708
    },
    {
      "epoch": 0.10804632733922584,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019682385456979437,
      "loss": 1.0265,
      "step": 709
    },
    {
      "epoch": 0.10819871990246877,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019681150205582025,
      "loss": 1.0075,
      "step": 710
    },
    {
      "epoch": 0.10835111246571168,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019679912595717207,
      "loss": 0.8974,
      "step": 711
    },
    {
      "epoch": 0.10850350502895459,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019678672627686478,
      "loss": 0.9946,
      "step": 712
    },
    {
      "epoch": 0.1086558975921975,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00019677430301791917,
      "loss": 0.7898,
      "step": 713
    },
    {
      "epoch": 0.10880829015544041,
      "grad_norm": 1.125,
      "learning_rate": 0.00019676185618336173,
      "loss": 0.9676,
      "step": 714
    },
    {
      "epoch": 0.10896068271868332,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019674938577622463,
      "loss": 0.9071,
      "step": 715
    },
    {
      "epoch": 0.10911307528192624,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019673689179954584,
      "loss": 1.0219,
      "step": 716
    },
    {
      "epoch": 0.10926546784516916,
      "grad_norm": 1.15625,
      "learning_rate": 0.00019672437425636915,
      "loss": 0.9448,
      "step": 717
    },
    {
      "epoch": 0.10941786040841207,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00019671183314974399,
      "loss": 0.7997,
      "step": 718
    },
    {
      "epoch": 0.10957025297165499,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001966992684827255,
      "loss": 0.8621,
      "step": 719
    },
    {
      "epoch": 0.1097226455348979,
      "grad_norm": 1.09375,
      "learning_rate": 0.00019668668025837462,
      "loss": 1.0998,
      "step": 720
    },
    {
      "epoch": 0.10987503809814081,
      "grad_norm": 0.76171875,
      "learning_rate": 0.0001966740684797581,
      "loss": 1.0077,
      "step": 721
    },
    {
      "epoch": 0.11002743066138372,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001966614331499483,
      "loss": 0.988,
      "step": 722
    },
    {
      "epoch": 0.11017982322462663,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019664877427202332,
      "loss": 0.9484,
      "step": 723
    },
    {
      "epoch": 0.11033221578786956,
      "grad_norm": 1.3515625,
      "learning_rate": 0.00019663609184906712,
      "loss": 1.0955,
      "step": 724
    },
    {
      "epoch": 0.11048460835111247,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001966233858841693,
      "loss": 1.0073,
      "step": 725
    },
    {
      "epoch": 0.11063700091435538,
      "grad_norm": 1.5546875,
      "learning_rate": 0.00019661065638042522,
      "loss": 1.1109,
      "step": 726
    },
    {
      "epoch": 0.1107893934775983,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019659790334093592,
      "loss": 1.0143,
      "step": 727
    },
    {
      "epoch": 0.1109417860408412,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001965851267688083,
      "loss": 0.9042,
      "step": 728
    },
    {
      "epoch": 0.11109417860408412,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019657232666715486,
      "loss": 1.0038,
      "step": 729
    },
    {
      "epoch": 0.11124657116732703,
      "grad_norm": 1.28125,
      "learning_rate": 0.00019655950303909393,
      "loss": 1.0719,
      "step": 730
    },
    {
      "epoch": 0.11139896373056994,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019654665588774947,
      "loss": 1.1603,
      "step": 731
    },
    {
      "epoch": 0.11155135629381287,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001965337852162513,
      "loss": 0.911,
      "step": 732
    },
    {
      "epoch": 0.11170374885705578,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019652089102773488,
      "loss": 0.9267,
      "step": 733
    },
    {
      "epoch": 0.11185614142029869,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001965079733253414,
      "loss": 0.9511,
      "step": 734
    },
    {
      "epoch": 0.1120085339835416,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001964950321122178,
      "loss": 1.0968,
      "step": 735
    },
    {
      "epoch": 0.11216092654678451,
      "grad_norm": 0.6875,
      "learning_rate": 0.00019648206739151676,
      "loss": 0.9167,
      "step": 736
    },
    {
      "epoch": 0.11231331911002743,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019646907916639664,
      "loss": 0.9704,
      "step": 737
    },
    {
      "epoch": 0.11246571167327034,
      "grad_norm": 1.140625,
      "learning_rate": 0.00019645606744002163,
      "loss": 1.2013,
      "step": 738
    },
    {
      "epoch": 0.11261810423651326,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001964430322155615,
      "loss": 0.8962,
      "step": 739
    },
    {
      "epoch": 0.11277049679975618,
      "grad_norm": 1.1875,
      "learning_rate": 0.00019642997349619186,
      "loss": 1.168,
      "step": 740
    },
    {
      "epoch": 0.11292288936299909,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00019641689128509397,
      "loss": 0.98,
      "step": 741
    },
    {
      "epoch": 0.113075281926242,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019640378558545487,
      "loss": 0.9177,
      "step": 742
    },
    {
      "epoch": 0.11322767448948491,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001963906564004673,
      "loss": 0.895,
      "step": 743
    },
    {
      "epoch": 0.11338006705272782,
      "grad_norm": 1.109375,
      "learning_rate": 0.0001963775037333297,
      "loss": 1.0045,
      "step": 744
    },
    {
      "epoch": 0.11353245961597073,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019636432758724626,
      "loss": 0.9518,
      "step": 745
    },
    {
      "epoch": 0.11368485217921366,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019635112796542687,
      "loss": 0.9831,
      "step": 746
    },
    {
      "epoch": 0.11383724474245657,
      "grad_norm": 1.046875,
      "learning_rate": 0.00019633790487108717,
      "loss": 1.284,
      "step": 747
    },
    {
      "epoch": 0.11398963730569948,
      "grad_norm": 1.09375,
      "learning_rate": 0.00019632465830744846,
      "loss": 1.0881,
      "step": 748
    },
    {
      "epoch": 0.1141420298689424,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001963113882777378,
      "loss": 1.0787,
      "step": 749
    },
    {
      "epoch": 0.11429442243218531,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019629809478518802,
      "loss": 1.0236,
      "step": 750
    },
    {
      "epoch": 0.11444681499542822,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001962847778330375,
      "loss": 0.8943,
      "step": 751
    },
    {
      "epoch": 0.11459920755867113,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019627143742453055,
      "loss": 0.9604,
      "step": 752
    },
    {
      "epoch": 0.11475160012191406,
      "grad_norm": 1.1171875,
      "learning_rate": 0.000196258073562917,
      "loss": 0.9735,
      "step": 753
    },
    {
      "epoch": 0.11490399268515697,
      "grad_norm": 0.765625,
      "learning_rate": 0.00019624468625145254,
      "loss": 1.0016,
      "step": 754
    },
    {
      "epoch": 0.11505638524839988,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019623127549339846,
      "loss": 0.9088,
      "step": 755
    },
    {
      "epoch": 0.11520877781164279,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019621784129202188,
      "loss": 1.0631,
      "step": 756
    },
    {
      "epoch": 0.1153611703748857,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019620438365059548,
      "loss": 0.8517,
      "step": 757
    },
    {
      "epoch": 0.11551356293812862,
      "grad_norm": 0.703125,
      "learning_rate": 0.0001961909025723978,
      "loss": 0.9287,
      "step": 758
    },
    {
      "epoch": 0.11566595550137153,
      "grad_norm": 0.83984375,
      "learning_rate": 0.000196177398060713,
      "loss": 0.9148,
      "step": 759
    },
    {
      "epoch": 0.11581834806461444,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00019616387011883098,
      "loss": 1.1966,
      "step": 760
    },
    {
      "epoch": 0.11597074062785737,
      "grad_norm": 1.203125,
      "learning_rate": 0.00019615031875004732,
      "loss": 0.9035,
      "step": 761
    },
    {
      "epoch": 0.11612313319110028,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019613674395766334,
      "loss": 0.9665,
      "step": 762
    },
    {
      "epoch": 0.11627552575434319,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001961231457449861,
      "loss": 0.9781,
      "step": 763
    },
    {
      "epoch": 0.1164279183175861,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019610952411532826,
      "loss": 0.9164,
      "step": 764
    },
    {
      "epoch": 0.11658031088082901,
      "grad_norm": 1.203125,
      "learning_rate": 0.00019609587907200825,
      "loss": 1.159,
      "step": 765
    },
    {
      "epoch": 0.11673270344407193,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019608221061835025,
      "loss": 1.0007,
      "step": 766
    },
    {
      "epoch": 0.11688509600731484,
      "grad_norm": 1.046875,
      "learning_rate": 0.000196068518757684,
      "loss": 0.8915,
      "step": 767
    },
    {
      "epoch": 0.11703748857055776,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00019605480349334516,
      "loss": 1.1156,
      "step": 768
    },
    {
      "epoch": 0.11718988113380067,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019604106482867486,
      "loss": 0.8345,
      "step": 769
    },
    {
      "epoch": 0.11734227369704359,
      "grad_norm": 0.875,
      "learning_rate": 0.00019602730276702007,
      "loss": 1.0439,
      "step": 770
    },
    {
      "epoch": 0.1174946662602865,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019601351731173344,
      "loss": 1.1254,
      "step": 771
    },
    {
      "epoch": 0.11764705882352941,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001959997084661733,
      "loss": 1.1987,
      "step": 772
    },
    {
      "epoch": 0.11779945138677232,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00019598587623370362,
      "loss": 1.1002,
      "step": 773
    },
    {
      "epoch": 0.11795184395001523,
      "grad_norm": 1.0,
      "learning_rate": 0.00019597202061769425,
      "loss": 1.0355,
      "step": 774
    },
    {
      "epoch": 0.11810423651325816,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00019595814162152056,
      "loss": 0.8295,
      "step": 775
    },
    {
      "epoch": 0.11825662907650107,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019594423924856362,
      "loss": 1.0598,
      "step": 776
    },
    {
      "epoch": 0.11840902163974398,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001959303135022103,
      "loss": 1.1327,
      "step": 777
    },
    {
      "epoch": 0.1185614142029869,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00019591636438585314,
      "loss": 1.0565,
      "step": 778
    },
    {
      "epoch": 0.1187138067662298,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019590239190289032,
      "loss": 1.0661,
      "step": 779
    },
    {
      "epoch": 0.11886619932947272,
      "grad_norm": 1.2265625,
      "learning_rate": 0.0001958883960567257,
      "loss": 1.2785,
      "step": 780
    },
    {
      "epoch": 0.11901859189271563,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019587437685076892,
      "loss": 0.9938,
      "step": 781
    },
    {
      "epoch": 0.11917098445595854,
      "grad_norm": 1.375,
      "learning_rate": 0.00019586033428843522,
      "loss": 1.1089,
      "step": 782
    },
    {
      "epoch": 0.11932337701920147,
      "grad_norm": 1.0,
      "learning_rate": 0.0001958462683731456,
      "loss": 1.1787,
      "step": 783
    },
    {
      "epoch": 0.11947576958244438,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019583217910832673,
      "loss": 1.0588,
      "step": 784
    },
    {
      "epoch": 0.11962816214568729,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019581806649741093,
      "loss": 1.0881,
      "step": 785
    },
    {
      "epoch": 0.1197805547089302,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019580393054383622,
      "loss": 0.9047,
      "step": 786
    },
    {
      "epoch": 0.11993294727217312,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019578977125104635,
      "loss": 1.0142,
      "step": 787
    },
    {
      "epoch": 0.12008533983541603,
      "grad_norm": 0.765625,
      "learning_rate": 0.00019577558862249076,
      "loss": 1.0895,
      "step": 788
    },
    {
      "epoch": 0.12023773239865894,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019576138266162444,
      "loss": 0.9392,
      "step": 789
    },
    {
      "epoch": 0.12039012496190186,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019574715337190827,
      "loss": 1.0307,
      "step": 790
    },
    {
      "epoch": 0.12054251752514478,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001957329007568087,
      "loss": 0.8018,
      "step": 791
    },
    {
      "epoch": 0.12069491008838769,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019571862481979776,
      "loss": 0.9165,
      "step": 792
    },
    {
      "epoch": 0.1208473026516306,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001957043255643534,
      "loss": 0.8394,
      "step": 793
    },
    {
      "epoch": 0.12099969521487351,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001956900029939591,
      "loss": 1.0938,
      "step": 794
    },
    {
      "epoch": 0.12115208777811642,
      "grad_norm": 1.46875,
      "learning_rate": 0.000195675657112104,
      "loss": 1.0086,
      "step": 795
    },
    {
      "epoch": 0.12130448034135934,
      "grad_norm": 0.9296875,
      "learning_rate": 0.000195661287922283,
      "loss": 1.1357,
      "step": 796
    },
    {
      "epoch": 0.12145687290460226,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001956468954279966,
      "loss": 1.1627,
      "step": 797
    },
    {
      "epoch": 0.12160926546784517,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019563247963275108,
      "loss": 0.9345,
      "step": 798
    },
    {
      "epoch": 0.12176165803108809,
      "grad_norm": 1.203125,
      "learning_rate": 0.00019561804054005826,
      "loss": 1.0277,
      "step": 799
    },
    {
      "epoch": 0.121914050594331,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019560357815343577,
      "loss": 0.9411,
      "step": 800
    },
    {
      "epoch": 0.12206644315757391,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019558909247640685,
      "loss": 1.03,
      "step": 801
    },
    {
      "epoch": 0.12221883572081682,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001955745835125004,
      "loss": 1.074,
      "step": 802
    },
    {
      "epoch": 0.12237122828405973,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019556005126525103,
      "loss": 0.9837,
      "step": 803
    },
    {
      "epoch": 0.12252362084730266,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00019554549573819898,
      "loss": 0.9615,
      "step": 804
    },
    {
      "epoch": 0.12267601341054557,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019553091693489018,
      "loss": 0.8962,
      "step": 805
    },
    {
      "epoch": 0.12282840597378848,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001955163148588763,
      "loss": 0.9434,
      "step": 806
    },
    {
      "epoch": 0.1229807985370314,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019550168951371454,
      "loss": 1.1571,
      "step": 807
    },
    {
      "epoch": 0.1231331911002743,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019548704090296788,
      "loss": 0.9729,
      "step": 808
    },
    {
      "epoch": 0.12328558366351722,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00019547236903020494,
      "loss": 1.0205,
      "step": 809
    },
    {
      "epoch": 0.12343797622676013,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019545767389899998,
      "loss": 1.1246,
      "step": 810
    },
    {
      "epoch": 0.12359036879000304,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019544295551293295,
      "loss": 0.8429,
      "step": 811
    },
    {
      "epoch": 0.12374276135324597,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00019542821387558945,
      "loss": 0.9106,
      "step": 812
    },
    {
      "epoch": 0.12389515391648888,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001954134489905608,
      "loss": 0.9236,
      "step": 813
    },
    {
      "epoch": 0.12404754647973179,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019539866086144389,
      "loss": 1.1102,
      "step": 814
    },
    {
      "epoch": 0.1241999390429747,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019538384949184133,
      "loss": 0.9407,
      "step": 815
    },
    {
      "epoch": 0.12435233160621761,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00019536901488536137,
      "loss": 0.9293,
      "step": 816
    },
    {
      "epoch": 0.12450472416946053,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00019535415704561798,
      "loss": 1.0294,
      "step": 817
    },
    {
      "epoch": 0.12465711673270344,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00019533927597623069,
      "loss": 1.2111,
      "step": 818
    },
    {
      "epoch": 0.12480950929594636,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001953243716808248,
      "loss": 1.066,
      "step": 819
    },
    {
      "epoch": 0.12496190185918928,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019530944416303115,
      "loss": 1.0595,
      "step": 820
    },
    {
      "epoch": 0.1251142944224322,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019529449342648637,
      "loss": 0.8964,
      "step": 821
    },
    {
      "epoch": 0.12526668698567509,
      "grad_norm": 0.796875,
      "learning_rate": 0.00019527951947483261,
      "loss": 0.7588,
      "step": 822
    },
    {
      "epoch": 0.125419079548918,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019526452231171775,
      "loss": 1.0798,
      "step": 823
    },
    {
      "epoch": 0.12557147211216094,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019524950194079534,
      "loss": 1.078,
      "step": 824
    },
    {
      "epoch": 0.12572386467540383,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019523445836572455,
      "loss": 1.1267,
      "step": 825
    },
    {
      "epoch": 0.12587625723864676,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019521939159017018,
      "loss": 0.9032,
      "step": 826
    },
    {
      "epoch": 0.12602864980188966,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00019520430161780277,
      "loss": 0.9799,
      "step": 827
    },
    {
      "epoch": 0.12618104236513258,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019518918845229838,
      "loss": 1.0018,
      "step": 828
    },
    {
      "epoch": 0.12633343492837548,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019517405209733887,
      "loss": 0.8557,
      "step": 829
    },
    {
      "epoch": 0.1264858274916184,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019515889255661165,
      "loss": 0.7873,
      "step": 830
    },
    {
      "epoch": 0.12663822005486133,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00019514370983380976,
      "loss": 0.854,
      "step": 831
    },
    {
      "epoch": 0.12679061261810423,
      "grad_norm": 0.77734375,
      "learning_rate": 0.000195128503932632,
      "loss": 0.9887,
      "step": 832
    },
    {
      "epoch": 0.12694300518134716,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001951132748567827,
      "loss": 0.9942,
      "step": 833
    },
    {
      "epoch": 0.12709539774459006,
      "grad_norm": 1.34375,
      "learning_rate": 0.00019509802260997186,
      "loss": 0.9857,
      "step": 834
    },
    {
      "epoch": 0.12724779030783298,
      "grad_norm": 0.6796875,
      "learning_rate": 0.0001950827471959152,
      "loss": 0.7831,
      "step": 835
    },
    {
      "epoch": 0.12740018287107588,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019506744861833402,
      "loss": 1.0726,
      "step": 836
    },
    {
      "epoch": 0.1275525754343188,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00019505212688095526,
      "loss": 1.0748,
      "step": 837
    },
    {
      "epoch": 0.12770496799756173,
      "grad_norm": 0.62109375,
      "learning_rate": 0.0001950367819875115,
      "loss": 0.7029,
      "step": 838
    },
    {
      "epoch": 0.12785736056080463,
      "grad_norm": 1.3359375,
      "learning_rate": 0.000195021413941741,
      "loss": 1.09,
      "step": 839
    },
    {
      "epoch": 0.12800975312404755,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019500602274738764,
      "loss": 1.0593,
      "step": 840
    },
    {
      "epoch": 0.12816214568729045,
      "grad_norm": 1.1015625,
      "learning_rate": 0.0001949906084082009,
      "loss": 1.0133,
      "step": 841
    },
    {
      "epoch": 0.12831453825053338,
      "grad_norm": 1.109375,
      "learning_rate": 0.000194975170927936,
      "loss": 0.9323,
      "step": 842
    },
    {
      "epoch": 0.12846693081377628,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019495971031035367,
      "loss": 0.9327,
      "step": 843
    },
    {
      "epoch": 0.1286193233770192,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019494422655922037,
      "loss": 0.9288,
      "step": 844
    },
    {
      "epoch": 0.12877171594026213,
      "grad_norm": 0.78125,
      "learning_rate": 0.00019492871967830816,
      "loss": 1.0907,
      "step": 845
    },
    {
      "epoch": 0.12892410850350502,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019491318967139476,
      "loss": 0.8812,
      "step": 846
    },
    {
      "epoch": 0.12907650106674795,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019489763654226345,
      "loss": 0.828,
      "step": 847
    },
    {
      "epoch": 0.12922889362999085,
      "grad_norm": 1.125,
      "learning_rate": 0.0001948820602947032,
      "loss": 1.0374,
      "step": 848
    },
    {
      "epoch": 0.12938128619323377,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001948664609325087,
      "loss": 0.9027,
      "step": 849
    },
    {
      "epoch": 0.12953367875647667,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019485083845948003,
      "loss": 0.9312,
      "step": 850
    },
    {
      "epoch": 0.1296860713197196,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001948351928794232,
      "loss": 1.0252,
      "step": 851
    },
    {
      "epoch": 0.12983846388296252,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019481952419614961,
      "loss": 1.0083,
      "step": 852
    },
    {
      "epoch": 0.12999085644620542,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001948038324134764,
      "loss": 0.8753,
      "step": 853
    },
    {
      "epoch": 0.13014324900944835,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001947881175352263,
      "loss": 0.8963,
      "step": 854
    },
    {
      "epoch": 0.13029564157269125,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001947723795652277,
      "loss": 0.955,
      "step": 855
    },
    {
      "epoch": 0.13044803413593417,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001947566185073146,
      "loss": 1.0398,
      "step": 856
    },
    {
      "epoch": 0.13060042669917707,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019474083436532658,
      "loss": 1.0654,
      "step": 857
    },
    {
      "epoch": 0.13075281926242,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019472502714310892,
      "loss": 1.0112,
      "step": 858
    },
    {
      "epoch": 0.13090521182566292,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00019470919684451245,
      "loss": 1.0232,
      "step": 859
    },
    {
      "epoch": 0.13105760438890582,
      "grad_norm": 0.64453125,
      "learning_rate": 0.00019469334347339373,
      "loss": 0.805,
      "step": 860
    },
    {
      "epoch": 0.13120999695214874,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001946774670336148,
      "loss": 1.1194,
      "step": 861
    },
    {
      "epoch": 0.13136238951539164,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00019466156752904343,
      "loss": 0.926,
      "step": 862
    },
    {
      "epoch": 0.13151478207863457,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019464564496355293,
      "loss": 1.0574,
      "step": 863
    },
    {
      "epoch": 0.13166717464187747,
      "grad_norm": 1.2421875,
      "learning_rate": 0.0001946296993410223,
      "loss": 1.0389,
      "step": 864
    },
    {
      "epoch": 0.1318195672051204,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00019461373066533613,
      "loss": 0.8993,
      "step": 865
    },
    {
      "epoch": 0.13197195976836332,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019459773894038457,
      "loss": 1.0063,
      "step": 866
    },
    {
      "epoch": 0.13212435233160622,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019458172417006347,
      "loss": 0.9811,
      "step": 867
    },
    {
      "epoch": 0.13227674489484914,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019456568635827428,
      "loss": 0.9502,
      "step": 868
    },
    {
      "epoch": 0.13242913745809204,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019454962550892398,
      "loss": 1.0147,
      "step": 869
    },
    {
      "epoch": 0.13258153002133496,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00019453354162592525,
      "loss": 0.907,
      "step": 870
    },
    {
      "epoch": 0.13273392258457786,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019451743471319638,
      "loss": 0.9898,
      "step": 871
    },
    {
      "epoch": 0.1328863151478208,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019450130477466124,
      "loss": 0.8563,
      "step": 872
    },
    {
      "epoch": 0.13303870771106371,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00019448515181424931,
      "loss": 0.9084,
      "step": 873
    },
    {
      "epoch": 0.1331911002743066,
      "grad_norm": 0.71875,
      "learning_rate": 0.00019446897583589565,
      "loss": 0.9052,
      "step": 874
    },
    {
      "epoch": 0.13334349283754954,
      "grad_norm": 0.80078125,
      "learning_rate": 0.000194452776843541,
      "loss": 1.0242,
      "step": 875
    },
    {
      "epoch": 0.13349588540079244,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019443655484113165,
      "loss": 0.8958,
      "step": 876
    },
    {
      "epoch": 0.13364827796403536,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019442030983261952,
      "loss": 1.1671,
      "step": 877
    },
    {
      "epoch": 0.13380067052727826,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019440404182196214,
      "loss": 0.8973,
      "step": 878
    },
    {
      "epoch": 0.13395306309052118,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001943877508131226,
      "loss": 1.0688,
      "step": 879
    },
    {
      "epoch": 0.13410545565376408,
      "grad_norm": 0.734375,
      "learning_rate": 0.00019437143681006965,
      "loss": 0.8178,
      "step": 880
    },
    {
      "epoch": 0.134257848217007,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019435509981677762,
      "loss": 1.0363,
      "step": 881
    },
    {
      "epoch": 0.13441024078024993,
      "grad_norm": 1.5390625,
      "learning_rate": 0.0001943387398372264,
      "loss": 1.2216,
      "step": 882
    },
    {
      "epoch": 0.13456263334349283,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00019432235687540157,
      "loss": 0.7484,
      "step": 883
    },
    {
      "epoch": 0.13471502590673576,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001943059509352942,
      "loss": 0.8053,
      "step": 884
    },
    {
      "epoch": 0.13486741846997866,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019428952202090103,
      "loss": 0.9504,
      "step": 885
    },
    {
      "epoch": 0.13501981103322158,
      "grad_norm": 1.0,
      "learning_rate": 0.0001942730701362244,
      "loss": 0.9844,
      "step": 886
    },
    {
      "epoch": 0.13517220359646448,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001942565952852722,
      "loss": 0.9433,
      "step": 887
    },
    {
      "epoch": 0.1353245961597074,
      "grad_norm": 0.734375,
      "learning_rate": 0.00019424009747205797,
      "loss": 0.845,
      "step": 888
    },
    {
      "epoch": 0.13547698872295033,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001942235767006008,
      "loss": 1.0061,
      "step": 889
    },
    {
      "epoch": 0.13562938128619323,
      "grad_norm": 0.78515625,
      "learning_rate": 0.0001942070329749254,
      "loss": 0.993,
      "step": 890
    },
    {
      "epoch": 0.13578177384943615,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019419046629906204,
      "loss": 1.0914,
      "step": 891
    },
    {
      "epoch": 0.13593416641267905,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001941738766770466,
      "loss": 1.0396,
      "step": 892
    },
    {
      "epoch": 0.13608655897592198,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019415726411292053,
      "loss": 1.1547,
      "step": 893
    },
    {
      "epoch": 0.13623895153916488,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001941406286107309,
      "loss": 0.9051,
      "step": 894
    },
    {
      "epoch": 0.1363913441024078,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00019412397017453046,
      "loss": 0.872,
      "step": 895
    },
    {
      "epoch": 0.13654373666565073,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001941072888083773,
      "loss": 0.9148,
      "step": 896
    },
    {
      "epoch": 0.13669612922889363,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001940905845163353,
      "loss": 0.9506,
      "step": 897
    },
    {
      "epoch": 0.13684852179213655,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019407385730247387,
      "loss": 0.9078,
      "step": 898
    },
    {
      "epoch": 0.13700091435537945,
      "grad_norm": 1.2578125,
      "learning_rate": 0.000194057107170868,
      "loss": 1.0408,
      "step": 899
    },
    {
      "epoch": 0.13715330691862238,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019404033412559826,
      "loss": 1.035,
      "step": 900
    },
    {
      "epoch": 0.13730569948186527,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019402353817075078,
      "loss": 0.8642,
      "step": 901
    },
    {
      "epoch": 0.1374580920451082,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019400671931041737,
      "loss": 1.002,
      "step": 902
    },
    {
      "epoch": 0.13761048460835112,
      "grad_norm": 1.140625,
      "learning_rate": 0.00019398987754869524,
      "loss": 1.0066,
      "step": 903
    },
    {
      "epoch": 0.13776287717159402,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019397301288968737,
      "loss": 0.7368,
      "step": 904
    },
    {
      "epoch": 0.13791526973483695,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001939561253375022,
      "loss": 0.9958,
      "step": 905
    },
    {
      "epoch": 0.13806766229807985,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00019393921489625377,
      "loss": 0.9181,
      "step": 906
    },
    {
      "epoch": 0.13822005486132277,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00019392228157006175,
      "loss": 0.8753,
      "step": 907
    },
    {
      "epoch": 0.13837244742456567,
      "grad_norm": 1.03125,
      "learning_rate": 0.00019390532536305125,
      "loss": 1.0433,
      "step": 908
    },
    {
      "epoch": 0.1385248399878086,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019388834627935317,
      "loss": 0.9108,
      "step": 909
    },
    {
      "epoch": 0.13867723255105152,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00019387134432310378,
      "loss": 1.1164,
      "step": 910
    },
    {
      "epoch": 0.13882962511429442,
      "grad_norm": 1.09375,
      "learning_rate": 0.000193854319498445,
      "loss": 0.9931,
      "step": 911
    },
    {
      "epoch": 0.13898201767753735,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00019383727180952439,
      "loss": 0.9602,
      "step": 912
    },
    {
      "epoch": 0.13913441024078024,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001938202012604949,
      "loss": 1.0474,
      "step": 913
    },
    {
      "epoch": 0.13928680280402317,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019380310785551528,
      "loss": 0.8751,
      "step": 914
    },
    {
      "epoch": 0.13943919536726607,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019378599159874965,
      "loss": 1.0503,
      "step": 915
    },
    {
      "epoch": 0.139591587930509,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00019376885249436777,
      "loss": 0.803,
      "step": 916
    },
    {
      "epoch": 0.13974398049375192,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019375169054654503,
      "loss": 1.1344,
      "step": 917
    },
    {
      "epoch": 0.13989637305699482,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001937345057594623,
      "loss": 1.0443,
      "step": 918
    },
    {
      "epoch": 0.14004876562023774,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019371729813730606,
      "loss": 0.8192,
      "step": 919
    },
    {
      "epoch": 0.14020115818348064,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00019370006768426828,
      "loss": 0.9286,
      "step": 920
    },
    {
      "epoch": 0.14035355074672357,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001936828144045466,
      "loss": 1.0353,
      "step": 921
    },
    {
      "epoch": 0.14050594330996646,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019366553830234414,
      "loss": 0.9784,
      "step": 922
    },
    {
      "epoch": 0.1406583358732094,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00019364823938186962,
      "loss": 0.9192,
      "step": 923
    },
    {
      "epoch": 0.14081072843645231,
      "grad_norm": 0.98828125,
      "learning_rate": 0.0001936309176473373,
      "loss": 1.0218,
      "step": 924
    },
    {
      "epoch": 0.1409631209996952,
      "grad_norm": 1.21875,
      "learning_rate": 0.000193613573102967,
      "loss": 1.0039,
      "step": 925
    },
    {
      "epoch": 0.14111551356293814,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001935962057529841,
      "loss": 1.0078,
      "step": 926
    },
    {
      "epoch": 0.14126790612618104,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019357881560161958,
      "loss": 1.0149,
      "step": 927
    },
    {
      "epoch": 0.14142029868942396,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019356140265310983,
      "loss": 0.9545,
      "step": 928
    },
    {
      "epoch": 0.14157269125266686,
      "grad_norm": 1.0,
      "learning_rate": 0.000193543966911697,
      "loss": 0.8412,
      "step": 929
    },
    {
      "epoch": 0.14172508381590979,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019352650838162861,
      "loss": 0.9287,
      "step": 930
    },
    {
      "epoch": 0.14187747637915268,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001935090270671579,
      "loss": 1.0268,
      "step": 931
    },
    {
      "epoch": 0.1420298689423956,
      "grad_norm": 1.09375,
      "learning_rate": 0.00019349152297254345,
      "loss": 1.0703,
      "step": 932
    },
    {
      "epoch": 0.14218226150563854,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00019347399610204958,
      "loss": 0.9131,
      "step": 933
    },
    {
      "epoch": 0.14233465406888143,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001934564464599461,
      "loss": 0.9696,
      "step": 934
    },
    {
      "epoch": 0.14248704663212436,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019343887405050834,
      "loss": 0.982,
      "step": 935
    },
    {
      "epoch": 0.14263943919536726,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019342127887801716,
      "loss": 1.0061,
      "step": 936
    },
    {
      "epoch": 0.14279183175861018,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019340366094675903,
      "loss": 0.9542,
      "step": 937
    },
    {
      "epoch": 0.14294422432185308,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019338602026102594,
      "loss": 0.9773,
      "step": 938
    },
    {
      "epoch": 0.143096616885096,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001933683568251154,
      "loss": 0.9621,
      "step": 939
    },
    {
      "epoch": 0.14324900944833893,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019335067064333046,
      "loss": 0.9616,
      "step": 940
    },
    {
      "epoch": 0.14340140201158183,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019333296171997975,
      "loss": 0.8997,
      "step": 941
    },
    {
      "epoch": 0.14355379457482476,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00019331523005937742,
      "loss": 0.9109,
      "step": 942
    },
    {
      "epoch": 0.14370618713806765,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00019329747566584313,
      "loss": 1.0113,
      "step": 943
    },
    {
      "epoch": 0.14385857970131058,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019327969854370216,
      "loss": 0.944,
      "step": 944
    },
    {
      "epoch": 0.14401097226455348,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019326189869728523,
      "loss": 0.9849,
      "step": 945
    },
    {
      "epoch": 0.1441633648277964,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001932440761309286,
      "loss": 0.7688,
      "step": 946
    },
    {
      "epoch": 0.14431575739103933,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001932262308489742,
      "loss": 0.9942,
      "step": 947
    },
    {
      "epoch": 0.14446814995428223,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019320836285576933,
      "loss": 1.0105,
      "step": 948
    },
    {
      "epoch": 0.14462054251752515,
      "grad_norm": 0.7890625,
      "learning_rate": 0.0001931904721556669,
      "loss": 0.9569,
      "step": 949
    },
    {
      "epoch": 0.14477293508076805,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00019317255875302535,
      "loss": 0.8689,
      "step": 950
    },
    {
      "epoch": 0.14492532764401098,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019315462265220867,
      "loss": 0.9971,
      "step": 951
    },
    {
      "epoch": 0.14507772020725387,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001931366638575863,
      "loss": 1.0863,
      "step": 952
    },
    {
      "epoch": 0.1452301127704968,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001931186823735333,
      "loss": 0.9543,
      "step": 953
    },
    {
      "epoch": 0.14538250533373973,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019310067820443017,
      "loss": 0.9417,
      "step": 954
    },
    {
      "epoch": 0.14553489789698262,
      "grad_norm": 1.21875,
      "learning_rate": 0.00019308265135466307,
      "loss": 1.052,
      "step": 955
    },
    {
      "epoch": 0.14568729046022555,
      "grad_norm": 1.1015625,
      "learning_rate": 0.0001930646018286235,
      "loss": 1.3072,
      "step": 956
    },
    {
      "epoch": 0.14583968302346845,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001930465296307087,
      "loss": 1.041,
      "step": 957
    },
    {
      "epoch": 0.14599207558671137,
      "grad_norm": 0.71875,
      "learning_rate": 0.00019302843476532117,
      "loss": 0.9825,
      "step": 958
    },
    {
      "epoch": 0.14614446814995427,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019301031723686918,
      "loss": 1.049,
      "step": 959
    },
    {
      "epoch": 0.1462968607131972,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00019299217704976643,
      "loss": 0.9242,
      "step": 960
    },
    {
      "epoch": 0.14644925327644012,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019297401420843206,
      "loss": 1.0237,
      "step": 961
    },
    {
      "epoch": 0.14660164583968302,
      "grad_norm": 0.984375,
      "learning_rate": 0.00019295582871729086,
      "loss": 1.0586,
      "step": 962
    },
    {
      "epoch": 0.14675403840292595,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019293762058077306,
      "loss": 0.9352,
      "step": 963
    },
    {
      "epoch": 0.14690643096616884,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00019291938980331438,
      "loss": 0.9039,
      "step": 964
    },
    {
      "epoch": 0.14705882352941177,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00019290113638935615,
      "loss": 1.0679,
      "step": 965
    },
    {
      "epoch": 0.14721121609265467,
      "grad_norm": 0.875,
      "learning_rate": 0.0001928828603433452,
      "loss": 0.844,
      "step": 966
    },
    {
      "epoch": 0.1473636086558976,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00019286456166973376,
      "loss": 0.9329,
      "step": 967
    },
    {
      "epoch": 0.14751600121914052,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001928462403729797,
      "loss": 1.0891,
      "step": 968
    },
    {
      "epoch": 0.14766839378238342,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00019282789645754629,
      "loss": 0.928,
      "step": 969
    },
    {
      "epoch": 0.14782078634562634,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00019280952992790245,
      "loss": 0.9278,
      "step": 970
    },
    {
      "epoch": 0.14797317890886924,
      "grad_norm": 0.96875,
      "learning_rate": 0.00019279114078852246,
      "loss": 1.058,
      "step": 971
    },
    {
      "epoch": 0.14812557147211217,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00019277272904388623,
      "loss": 0.9232,
      "step": 972
    },
    {
      "epoch": 0.14827796403535506,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00019275429469847914,
      "loss": 0.9831,
      "step": 973
    },
    {
      "epoch": 0.148430356598598,
      "grad_norm": 1.171875,
      "learning_rate": 0.000192735837756792,
      "loss": 1.0071,
      "step": 974
    },
    {
      "epoch": 0.14858274916184092,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019271735822332122,
      "loss": 0.9855,
      "step": 975
    },
    {
      "epoch": 0.1487351417250838,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00019269885610256865,
      "loss": 0.866,
      "step": 976
    },
    {
      "epoch": 0.14888753428832674,
      "grad_norm": 1.21875,
      "learning_rate": 0.00019268033139904173,
      "loss": 1.0646,
      "step": 977
    },
    {
      "epoch": 0.14903992685156964,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019266178411725334,
      "loss": 0.9542,
      "step": 978
    },
    {
      "epoch": 0.14919231941481256,
      "grad_norm": 0.6171875,
      "learning_rate": 0.0001926432142617218,
      "loss": 0.7183,
      "step": 979
    },
    {
      "epoch": 0.14934471197805546,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00019262462183697104,
      "loss": 1.0146,
      "step": 980
    },
    {
      "epoch": 0.1494971045412984,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019260600684753044,
      "loss": 1.0049,
      "step": 981
    },
    {
      "epoch": 0.14964949710454128,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00019258736929793487,
      "loss": 0.9972,
      "step": 982
    },
    {
      "epoch": 0.1498018896677842,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001925687091927247,
      "loss": 1.1177,
      "step": 983
    },
    {
      "epoch": 0.14995428223102714,
      "grad_norm": 0.84375,
      "learning_rate": 0.00019255002653644583,
      "loss": 1.0845,
      "step": 984
    },
    {
      "epoch": 0.15010667479427003,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001925313213336496,
      "loss": 0.8379,
      "step": 985
    },
    {
      "epoch": 0.15025906735751296,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019251259358889287,
      "loss": 0.9579,
      "step": 986
    },
    {
      "epoch": 0.15041145992075586,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00019249384330673793,
      "loss": 0.9486,
      "step": 987
    },
    {
      "epoch": 0.15056385248399878,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019247507049175276,
      "loss": 1.0827,
      "step": 988
    },
    {
      "epoch": 0.15071624504724168,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019245627514851056,
      "loss": 1.0088,
      "step": 989
    },
    {
      "epoch": 0.1508686376104846,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019243745728159017,
      "loss": 1.0643,
      "step": 990
    },
    {
      "epoch": 0.15102103017372753,
      "grad_norm": 1.3359375,
      "learning_rate": 0.00019241861689557594,
      "loss": 0.8147,
      "step": 991
    },
    {
      "epoch": 0.15117342273697043,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00019239975399505763,
      "loss": 1.0629,
      "step": 992
    },
    {
      "epoch": 0.15132581530021336,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001923808685846305,
      "loss": 0.8365,
      "step": 993
    },
    {
      "epoch": 0.15147820786345625,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019236196066889534,
      "loss": 0.8663,
      "step": 994
    },
    {
      "epoch": 0.15163060042669918,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00019234303025245835,
      "loss": 0.9553,
      "step": 995
    },
    {
      "epoch": 0.15178299298994208,
      "grad_norm": 1.25,
      "learning_rate": 0.0001923240773399313,
      "loss": 0.9782,
      "step": 996
    },
    {
      "epoch": 0.151935385553185,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00019230510193593133,
      "loss": 0.7689,
      "step": 997
    },
    {
      "epoch": 0.15208777811642793,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00019228610404508118,
      "loss": 1.0858,
      "step": 998
    },
    {
      "epoch": 0.15224017067967083,
      "grad_norm": 0.62890625,
      "learning_rate": 0.00019226708367200897,
      "loss": 0.7863,
      "step": 999
    },
    {
      "epoch": 0.15239256324291375,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00019224804082134837,
      "loss": 0.8993,
      "step": 1000
    },
    {
      "epoch": 0.15254495580615665,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019222897549773848,
      "loss": 0.8931,
      "step": 1001
    },
    {
      "epoch": 0.15269734836939958,
      "grad_norm": 1.0625,
      "learning_rate": 0.00019220988770582388,
      "loss": 0.986,
      "step": 1002
    },
    {
      "epoch": 0.15284974093264247,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00019219077745025463,
      "loss": 0.8914,
      "step": 1003
    },
    {
      "epoch": 0.1530021334958854,
      "grad_norm": 0.78125,
      "learning_rate": 0.00019217164473568624,
      "loss": 1.0879,
      "step": 1004
    },
    {
      "epoch": 0.15315452605912833,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019215248956677976,
      "loss": 1.2107,
      "step": 1005
    },
    {
      "epoch": 0.15330691862237122,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019213331194820166,
      "loss": 1.1035,
      "step": 1006
    },
    {
      "epoch": 0.15345931118561415,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019211411188462386,
      "loss": 1.1458,
      "step": 1007
    },
    {
      "epoch": 0.15361170374885705,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019209488938072377,
      "loss": 0.9162,
      "step": 1008
    },
    {
      "epoch": 0.15376409631209997,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019207564444118427,
      "loss": 0.8972,
      "step": 1009
    },
    {
      "epoch": 0.15391648887534287,
      "grad_norm": 0.734375,
      "learning_rate": 0.00019205637707069375,
      "loss": 0.9696,
      "step": 1010
    },
    {
      "epoch": 0.1540688814385858,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019203708727394596,
      "loss": 0.9442,
      "step": 1011
    },
    {
      "epoch": 0.15422127400182872,
      "grad_norm": 1.203125,
      "learning_rate": 0.0001920177750556402,
      "loss": 1.1905,
      "step": 1012
    },
    {
      "epoch": 0.15437366656507162,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00019199844042048117,
      "loss": 0.9822,
      "step": 1013
    },
    {
      "epoch": 0.15452605912831455,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001919790833731791,
      "loss": 0.8916,
      "step": 1014
    },
    {
      "epoch": 0.15467845169155744,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00019195970391844966,
      "loss": 0.8945,
      "step": 1015
    },
    {
      "epoch": 0.15483084425480037,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019194030206101393,
      "loss": 1.0082,
      "step": 1016
    },
    {
      "epoch": 0.15498323681804327,
      "grad_norm": 1.28125,
      "learning_rate": 0.0001919208778055985,
      "loss": 0.9712,
      "step": 1017
    },
    {
      "epoch": 0.1551356293812862,
      "grad_norm": 0.953125,
      "learning_rate": 0.00019190143115693534,
      "loss": 0.793,
      "step": 1018
    },
    {
      "epoch": 0.15528802194452912,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019188196211976204,
      "loss": 0.9645,
      "step": 1019
    },
    {
      "epoch": 0.15544041450777202,
      "grad_norm": 0.890625,
      "learning_rate": 0.00019186247069882147,
      "loss": 1.0311,
      "step": 1020
    },
    {
      "epoch": 0.15559280707101494,
      "grad_norm": 1.1640625,
      "learning_rate": 0.000191842956898862,
      "loss": 1.0526,
      "step": 1021
    },
    {
      "epoch": 0.15574519963425784,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019182342072463754,
      "loss": 0.9388,
      "step": 1022
    },
    {
      "epoch": 0.15589759219750077,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019180386218090734,
      "loss": 1.1132,
      "step": 1023
    },
    {
      "epoch": 0.15604998476074367,
      "grad_norm": 0.875,
      "learning_rate": 0.00019178428127243616,
      "loss": 0.987,
      "step": 1024
    },
    {
      "epoch": 0.1562023773239866,
      "grad_norm": 0.71875,
      "learning_rate": 0.00019176467800399415,
      "loss": 0.9361,
      "step": 1025
    },
    {
      "epoch": 0.15635476988722952,
      "grad_norm": 0.73046875,
      "learning_rate": 0.000191745052380357,
      "loss": 0.8241,
      "step": 1026
    },
    {
      "epoch": 0.15650716245047241,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00019172540440630576,
      "loss": 0.9696,
      "step": 1027
    },
    {
      "epoch": 0.15665955501371534,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019170573408662698,
      "loss": 1.0731,
      "step": 1028
    },
    {
      "epoch": 0.15681194757695824,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00019168604142611262,
      "loss": 1.0069,
      "step": 1029
    },
    {
      "epoch": 0.15696434014020116,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019166632642956012,
      "loss": 1.0517,
      "step": 1030
    },
    {
      "epoch": 0.15711673270344406,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001916465891017723,
      "loss": 1.0042,
      "step": 1031
    },
    {
      "epoch": 0.157269125266687,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00019162682944755746,
      "loss": 0.7448,
      "step": 1032
    },
    {
      "epoch": 0.15742151782992989,
      "grad_norm": 1.5703125,
      "learning_rate": 0.00019160704747172934,
      "loss": 0.94,
      "step": 1033
    },
    {
      "epoch": 0.1575739103931728,
      "grad_norm": 0.796875,
      "learning_rate": 0.00019158724317910718,
      "loss": 0.8631,
      "step": 1034
    },
    {
      "epoch": 0.15772630295641574,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019156741657451546,
      "loss": 0.846,
      "step": 1035
    },
    {
      "epoch": 0.15787869551965864,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00019154756766278435,
      "loss": 0.9594,
      "step": 1036
    },
    {
      "epoch": 0.15803108808290156,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00019152769644874927,
      "loss": 0.828,
      "step": 1037
    },
    {
      "epoch": 0.15818348064614446,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019150780293725113,
      "loss": 0.9828,
      "step": 1038
    },
    {
      "epoch": 0.15833587320938738,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001914878871331363,
      "loss": 1.0389,
      "step": 1039
    },
    {
      "epoch": 0.15848826577263028,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00019146794904125654,
      "loss": 0.9034,
      "step": 1040
    },
    {
      "epoch": 0.1586406583358732,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019144798866646906,
      "loss": 0.9639,
      "step": 1041
    },
    {
      "epoch": 0.15879305089911613,
      "grad_norm": 1.2109375,
      "learning_rate": 0.0001914280060136365,
      "loss": 0.9031,
      "step": 1042
    },
    {
      "epoch": 0.15894544346235903,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001914080010876269,
      "loss": 0.8617,
      "step": 1043
    },
    {
      "epoch": 0.15909783602560196,
      "grad_norm": 1.2109375,
      "learning_rate": 0.0001913879738933138,
      "loss": 1.1093,
      "step": 1044
    },
    {
      "epoch": 0.15925022858884486,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019136792443557603,
      "loss": 0.9238,
      "step": 1045
    },
    {
      "epoch": 0.15940262115208778,
      "grad_norm": 1.0625,
      "learning_rate": 0.000191347852719298,
      "loss": 1.1198,
      "step": 1046
    },
    {
      "epoch": 0.15955501371533068,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019132775874936947,
      "loss": 0.888,
      "step": 1047
    },
    {
      "epoch": 0.1597074062785736,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00019130764253068555,
      "loss": 1.1161,
      "step": 1048
    },
    {
      "epoch": 0.15985979884181653,
      "grad_norm": 0.90625,
      "learning_rate": 0.00019128750406814687,
      "loss": 1.0967,
      "step": 1049
    },
    {
      "epoch": 0.16001219140505943,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001912673433666595,
      "loss": 1.0028,
      "step": 1050
    },
    {
      "epoch": 0.16016458396830235,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00019124716043113481,
      "loss": 1.0489,
      "step": 1051
    },
    {
      "epoch": 0.16031697653154525,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00019122695526648968,
      "loss": 1.0155,
      "step": 1052
    },
    {
      "epoch": 0.16046936909478818,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001912067278776464,
      "loss": 0.8715,
      "step": 1053
    },
    {
      "epoch": 0.16062176165803108,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019118647826953263,
      "loss": 1.3036,
      "step": 1054
    },
    {
      "epoch": 0.160774154221274,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00019116620644708145,
      "loss": 1.1815,
      "step": 1055
    },
    {
      "epoch": 0.16092654678451693,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001911459124152314,
      "loss": 1.0127,
      "step": 1056
    },
    {
      "epoch": 0.16107893934775983,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019112559617892637,
      "loss": 1.0939,
      "step": 1057
    },
    {
      "epoch": 0.16123133191100275,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00019110525774311573,
      "loss": 1.0947,
      "step": 1058
    },
    {
      "epoch": 0.16138372447424565,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019108489711275418,
      "loss": 0.9813,
      "step": 1059
    },
    {
      "epoch": 0.16153611703748857,
      "grad_norm": 1.09375,
      "learning_rate": 0.00019106451429280185,
      "loss": 0.8242,
      "step": 1060
    },
    {
      "epoch": 0.16168850960073147,
      "grad_norm": 0.71484375,
      "learning_rate": 0.0001910441092882243,
      "loss": 0.898,
      "step": 1061
    },
    {
      "epoch": 0.1618409021639744,
      "grad_norm": 0.61328125,
      "learning_rate": 0.0001910236821039925,
      "loss": 0.8634,
      "step": 1062
    },
    {
      "epoch": 0.16199329472721732,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001910032327450828,
      "loss": 0.9302,
      "step": 1063
    },
    {
      "epoch": 0.16214568729046022,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00019098276121647695,
      "loss": 0.8795,
      "step": 1064
    },
    {
      "epoch": 0.16229807985370315,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001909622675231621,
      "loss": 0.8676,
      "step": 1065
    },
    {
      "epoch": 0.16245047241694605,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00019094175167013084,
      "loss": 1.1053,
      "step": 1066
    },
    {
      "epoch": 0.16260286498018897,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019092121366238112,
      "loss": 0.8968,
      "step": 1067
    },
    {
      "epoch": 0.16275525754343187,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019090065350491626,
      "loss": 0.9281,
      "step": 1068
    },
    {
      "epoch": 0.1629076501066748,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019088007120274502,
      "loss": 1.0041,
      "step": 1069
    },
    {
      "epoch": 0.16306004266991772,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019085946676088158,
      "loss": 0.8957,
      "step": 1070
    },
    {
      "epoch": 0.16321243523316062,
      "grad_norm": 0.828125,
      "learning_rate": 0.00019083884018434547,
      "loss": 1.1072,
      "step": 1071
    },
    {
      "epoch": 0.16336482779640354,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00019081819147816155,
      "loss": 0.8413,
      "step": 1072
    },
    {
      "epoch": 0.16351722035964644,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00019079752064736022,
      "loss": 0.9502,
      "step": 1073
    },
    {
      "epoch": 0.16366961292288937,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001907768276969772,
      "loss": 1.1205,
      "step": 1074
    },
    {
      "epoch": 0.16382200548613227,
      "grad_norm": 0.71875,
      "learning_rate": 0.00019075611263205352,
      "loss": 0.8727,
      "step": 1075
    },
    {
      "epoch": 0.1639743980493752,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00019073537545763572,
      "loss": 0.901,
      "step": 1076
    },
    {
      "epoch": 0.16412679061261812,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019071461617877565,
      "loss": 1.0619,
      "step": 1077
    },
    {
      "epoch": 0.16427918317586102,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00019069383480053057,
      "loss": 1.0468,
      "step": 1078
    },
    {
      "epoch": 0.16443157573910394,
      "grad_norm": 1.171875,
      "learning_rate": 0.00019067303132796318,
      "loss": 1.1226,
      "step": 1079
    },
    {
      "epoch": 0.16458396830234684,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019065220576614143,
      "loss": 0.9602,
      "step": 1080
    },
    {
      "epoch": 0.16473636086558976,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00019063135812013874,
      "loss": 1.0302,
      "step": 1081
    },
    {
      "epoch": 0.16488875342883266,
      "grad_norm": 0.9375,
      "learning_rate": 0.00019061048839503393,
      "loss": 0.9413,
      "step": 1082
    },
    {
      "epoch": 0.1650411459920756,
      "grad_norm": 0.859375,
      "learning_rate": 0.00019058959659591112,
      "loss": 0.9988,
      "step": 1083
    },
    {
      "epoch": 0.16519353855531851,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00019056868272785988,
      "loss": 1.037,
      "step": 1084
    },
    {
      "epoch": 0.1653459311185614,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00019054774679597513,
      "loss": 1.0304,
      "step": 1085
    },
    {
      "epoch": 0.16549832368180434,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00019052678880535719,
      "loss": 1.0393,
      "step": 1086
    },
    {
      "epoch": 0.16565071624504724,
      "grad_norm": 0.921875,
      "learning_rate": 0.00019050580876111165,
      "loss": 1.0502,
      "step": 1087
    },
    {
      "epoch": 0.16580310880829016,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00019048480666834965,
      "loss": 1.0313,
      "step": 1088
    },
    {
      "epoch": 0.16595550137153306,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001904637825321875,
      "loss": 0.9175,
      "step": 1089
    },
    {
      "epoch": 0.16610789393477599,
      "grad_norm": 0.765625,
      "learning_rate": 0.00019044273635774705,
      "loss": 0.9655,
      "step": 1090
    },
    {
      "epoch": 0.16626028649801888,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00019042166815015548,
      "loss": 0.9924,
      "step": 1091
    },
    {
      "epoch": 0.1664126790612618,
      "grad_norm": 1.140625,
      "learning_rate": 0.0001904005779145452,
      "loss": 1.053,
      "step": 1092
    },
    {
      "epoch": 0.16656507162450473,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00019037946565605418,
      "loss": 0.9933,
      "step": 1093
    },
    {
      "epoch": 0.16671746418774763,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00019035833137982563,
      "loss": 1.1257,
      "step": 1094
    },
    {
      "epoch": 0.16686985675099056,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001903371750910082,
      "loss": 0.8516,
      "step": 1095
    },
    {
      "epoch": 0.16702224931423346,
      "grad_norm": 1.078125,
      "learning_rate": 0.00019031599679475585,
      "loss": 0.9712,
      "step": 1096
    },
    {
      "epoch": 0.16717464187747638,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001902947964962279,
      "loss": 0.8825,
      "step": 1097
    },
    {
      "epoch": 0.16732703444071928,
      "grad_norm": 1.21875,
      "learning_rate": 0.00019027357420058904,
      "loss": 0.9668,
      "step": 1098
    },
    {
      "epoch": 0.1674794270039622,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001902523299130094,
      "loss": 1.1545,
      "step": 1099
    },
    {
      "epoch": 0.16763181956720513,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001902310636386643,
      "loss": 1.0589,
      "step": 1100
    },
    {
      "epoch": 0.16778421213044803,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00019020977538273458,
      "loss": 0.9438,
      "step": 1101
    },
    {
      "epoch": 0.16793660469369096,
      "grad_norm": 0.8125,
      "learning_rate": 0.00019018846515040633,
      "loss": 0.928,
      "step": 1102
    },
    {
      "epoch": 0.16808899725693385,
      "grad_norm": 1.2421875,
      "learning_rate": 0.000190167132946871,
      "loss": 1.2064,
      "step": 1103
    },
    {
      "epoch": 0.16824138982017678,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001901457787773255,
      "loss": 0.9659,
      "step": 1104
    },
    {
      "epoch": 0.16839378238341968,
      "grad_norm": 1.484375,
      "learning_rate": 0.00019012440264697193,
      "loss": 1.1082,
      "step": 1105
    },
    {
      "epoch": 0.1685461749466626,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00019010300456101788,
      "loss": 0.9724,
      "step": 1106
    },
    {
      "epoch": 0.16869856750990553,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001900815845246762,
      "loss": 1.0455,
      "step": 1107
    },
    {
      "epoch": 0.16885096007314843,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00019006014254316518,
      "loss": 1.0446,
      "step": 1108
    },
    {
      "epoch": 0.16900335263639135,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00019003867862170832,
      "loss": 0.945,
      "step": 1109
    },
    {
      "epoch": 0.16915574519963425,
      "grad_norm": 1.015625,
      "learning_rate": 0.00019001719276553458,
      "loss": 1.0559,
      "step": 1110
    },
    {
      "epoch": 0.16930813776287718,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001899956849798782,
      "loss": 0.8827,
      "step": 1111
    },
    {
      "epoch": 0.16946053032612007,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001899741552699788,
      "loss": 0.913,
      "step": 1112
    },
    {
      "epoch": 0.169612922889363,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001899526036410813,
      "loss": 1.0238,
      "step": 1113
    },
    {
      "epoch": 0.16976531545260592,
      "grad_norm": 0.828125,
      "learning_rate": 0.00018993103009843604,
      "loss": 0.8947,
      "step": 1114
    },
    {
      "epoch": 0.16991770801584882,
      "grad_norm": 0.828125,
      "learning_rate": 0.00018990943464729864,
      "loss": 0.9042,
      "step": 1115
    },
    {
      "epoch": 0.17007010057909175,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00018988781729292997,
      "loss": 1.0382,
      "step": 1116
    },
    {
      "epoch": 0.17022249314233465,
      "grad_norm": 0.875,
      "learning_rate": 0.00018986617804059644,
      "loss": 1.0855,
      "step": 1117
    },
    {
      "epoch": 0.17037488570557757,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00018984451689556963,
      "loss": 0.9498,
      "step": 1118
    },
    {
      "epoch": 0.17052727826882047,
      "grad_norm": 0.890625,
      "learning_rate": 0.00018982283386312652,
      "loss": 0.9415,
      "step": 1119
    },
    {
      "epoch": 0.1706796708320634,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018980112894854942,
      "loss": 0.9402,
      "step": 1120
    },
    {
      "epoch": 0.17083206339530632,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00018977940215712593,
      "loss": 0.989,
      "step": 1121
    },
    {
      "epoch": 0.17098445595854922,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00018975765349414902,
      "loss": 1.1541,
      "step": 1122
    },
    {
      "epoch": 0.17113684852179215,
      "grad_norm": 0.87890625,
      "learning_rate": 0.000189735882964917,
      "loss": 1.1358,
      "step": 1123
    },
    {
      "epoch": 0.17128924108503504,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00018971409057473343,
      "loss": 0.8979,
      "step": 1124
    },
    {
      "epoch": 0.17144163364827797,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00018969227632890733,
      "loss": 0.7288,
      "step": 1125
    },
    {
      "epoch": 0.17159402621152087,
      "grad_norm": 1.03125,
      "learning_rate": 0.00018967044023275289,
      "loss": 1.0126,
      "step": 1126
    },
    {
      "epoch": 0.1717464187747638,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00018964858229158973,
      "loss": 0.9624,
      "step": 1127
    },
    {
      "epoch": 0.17189881133800672,
      "grad_norm": 0.859375,
      "learning_rate": 0.00018962670251074275,
      "loss": 1.0081,
      "step": 1128
    },
    {
      "epoch": 0.17205120390124962,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018960480089554217,
      "loss": 1.0686,
      "step": 1129
    },
    {
      "epoch": 0.17220359646449254,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00018958287745132358,
      "loss": 0.9985,
      "step": 1130
    },
    {
      "epoch": 0.17235598902773544,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001895609321834278,
      "loss": 0.8536,
      "step": 1131
    },
    {
      "epoch": 0.17250838159097837,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00018953896509720105,
      "loss": 1.0238,
      "step": 1132
    },
    {
      "epoch": 0.17266077415422126,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001895169761979948,
      "loss": 0.9937,
      "step": 1133
    },
    {
      "epoch": 0.1728131667174642,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018949496549116584,
      "loss": 0.7983,
      "step": 1134
    },
    {
      "epoch": 0.17296555928070712,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018947293298207635,
      "loss": 0.9757,
      "step": 1135
    },
    {
      "epoch": 0.17311795184395,
      "grad_norm": 0.734375,
      "learning_rate": 0.00018945087867609374,
      "loss": 0.975,
      "step": 1136
    },
    {
      "epoch": 0.17327034440719294,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018942880257859077,
      "loss": 1.0993,
      "step": 1137
    },
    {
      "epoch": 0.17342273697043584,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00018940670469494547,
      "loss": 0.9034,
      "step": 1138
    },
    {
      "epoch": 0.17357512953367876,
      "grad_norm": 0.6640625,
      "learning_rate": 0.00018938458503054122,
      "loss": 0.8546,
      "step": 1139
    },
    {
      "epoch": 0.17372752209692166,
      "grad_norm": 0.890625,
      "learning_rate": 0.00018936244359076668,
      "loss": 0.764,
      "step": 1140
    },
    {
      "epoch": 0.1738799146601646,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018934028038101582,
      "loss": 0.8334,
      "step": 1141
    },
    {
      "epoch": 0.17403230722340748,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00018931809540668793,
      "loss": 0.905,
      "step": 1142
    },
    {
      "epoch": 0.1741846997866504,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00018929588867318758,
      "loss": 0.8419,
      "step": 1143
    },
    {
      "epoch": 0.17433709234989334,
      "grad_norm": 0.875,
      "learning_rate": 0.00018927366018592462,
      "loss": 0.9498,
      "step": 1144
    },
    {
      "epoch": 0.17448948491313623,
      "grad_norm": 0.875,
      "learning_rate": 0.0001892514099503143,
      "loss": 0.9081,
      "step": 1145
    },
    {
      "epoch": 0.17464187747637916,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00018922913797177706,
      "loss": 1.1052,
      "step": 1146
    },
    {
      "epoch": 0.17479427003962206,
      "grad_norm": 1.265625,
      "learning_rate": 0.00018920684425573865,
      "loss": 0.9738,
      "step": 1147
    },
    {
      "epoch": 0.17494666260286498,
      "grad_norm": 0.84375,
      "learning_rate": 0.00018918452880763018,
      "loss": 0.9192,
      "step": 1148
    },
    {
      "epoch": 0.17509905516610788,
      "grad_norm": 0.8984375,
      "learning_rate": 0.000189162191632888,
      "loss": 0.9124,
      "step": 1149
    },
    {
      "epoch": 0.1752514477293508,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00018913983273695375,
      "loss": 0.9969,
      "step": 1150
    },
    {
      "epoch": 0.17540384029259373,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00018911745212527435,
      "loss": 1.0264,
      "step": 1151
    },
    {
      "epoch": 0.17555623285583663,
      "grad_norm": 1.140625,
      "learning_rate": 0.00018909504980330215,
      "loss": 1.0005,
      "step": 1152
    },
    {
      "epoch": 0.17570862541907956,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001890726257764946,
      "loss": 0.9895,
      "step": 1153
    },
    {
      "epoch": 0.17586101798232245,
      "grad_norm": 1.15625,
      "learning_rate": 0.00018905018005031446,
      "loss": 1.1643,
      "step": 1154
    },
    {
      "epoch": 0.17601341054556538,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018902771263022995,
      "loss": 0.9143,
      "step": 1155
    },
    {
      "epoch": 0.17616580310880828,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00018900522352171439,
      "loss": 0.8551,
      "step": 1156
    },
    {
      "epoch": 0.1763181956720512,
      "grad_norm": 0.875,
      "learning_rate": 0.00018898271273024646,
      "loss": 1.191,
      "step": 1157
    },
    {
      "epoch": 0.17647058823529413,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00018896018026131012,
      "loss": 0.9465,
      "step": 1158
    },
    {
      "epoch": 0.17662298079853703,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001889376261203946,
      "loss": 1.0077,
      "step": 1159
    },
    {
      "epoch": 0.17677537336177995,
      "grad_norm": 0.875,
      "learning_rate": 0.0001889150503129944,
      "loss": 0.8852,
      "step": 1160
    },
    {
      "epoch": 0.17692776592502285,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018889245284460932,
      "loss": 1.0759,
      "step": 1161
    },
    {
      "epoch": 0.17708015848826578,
      "grad_norm": 1.015625,
      "learning_rate": 0.00018886983372074444,
      "loss": 1.1039,
      "step": 1162
    },
    {
      "epoch": 0.17723255105150867,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001888471929469101,
      "loss": 0.9051,
      "step": 1163
    },
    {
      "epoch": 0.1773849436147516,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001888245305286219,
      "loss": 1.0796,
      "step": 1164
    },
    {
      "epoch": 0.17753733617799453,
      "grad_norm": 1.8125,
      "learning_rate": 0.00018880184647140076,
      "loss": 0.9226,
      "step": 1165
    },
    {
      "epoch": 0.17768972874123742,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001887791407807728,
      "loss": 1.118,
      "step": 1166
    },
    {
      "epoch": 0.17784212130448035,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001887564134622695,
      "loss": 0.9846,
      "step": 1167
    },
    {
      "epoch": 0.17799451386772325,
      "grad_norm": 0.98828125,
      "learning_rate": 0.0001887336645214275,
      "loss": 0.9231,
      "step": 1168
    },
    {
      "epoch": 0.17814690643096617,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00018871089396378885,
      "loss": 1.0237,
      "step": 1169
    },
    {
      "epoch": 0.17829929899420907,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018868810179490075,
      "loss": 1.0441,
      "step": 1170
    },
    {
      "epoch": 0.178451691557452,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00018866528802031568,
      "loss": 0.9227,
      "step": 1171
    },
    {
      "epoch": 0.17860408412069492,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001886424526455914,
      "loss": 1.075,
      "step": 1172
    },
    {
      "epoch": 0.17875647668393782,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00018861959567629099,
      "loss": 0.892,
      "step": 1173
    },
    {
      "epoch": 0.17890886924718075,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00018859671711798267,
      "loss": 0.9809,
      "step": 1174
    },
    {
      "epoch": 0.17906126181042364,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00018857381697624,
      "loss": 1.0507,
      "step": 1175
    },
    {
      "epoch": 0.17921365437366657,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018855089525664185,
      "loss": 1.0982,
      "step": 1176
    },
    {
      "epoch": 0.17936604693690947,
      "grad_norm": 0.8125,
      "learning_rate": 0.00018852795196477222,
      "loss": 0.8542,
      "step": 1177
    },
    {
      "epoch": 0.1795184395001524,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018850498710622042,
      "loss": 0.9351,
      "step": 1178
    },
    {
      "epoch": 0.17967083206339532,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00018848200068658107,
      "loss": 0.9455,
      "step": 1179
    },
    {
      "epoch": 0.17982322462663822,
      "grad_norm": 1.1484375,
      "learning_rate": 0.0001884589927114539,
      "loss": 1.0946,
      "step": 1180
    },
    {
      "epoch": 0.17997561718988114,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018843596318644408,
      "loss": 1.0991,
      "step": 1181
    },
    {
      "epoch": 0.18012800975312404,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00018841291211716188,
      "loss": 0.8554,
      "step": 1182
    },
    {
      "epoch": 0.18028040231636697,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018838983950922292,
      "loss": 1.0232,
      "step": 1183
    },
    {
      "epoch": 0.18043279487960986,
      "grad_norm": 1.1875,
      "learning_rate": 0.00018836674536824795,
      "loss": 1.0277,
      "step": 1184
    },
    {
      "epoch": 0.1805851874428528,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00018834362969986308,
      "loss": 0.8957,
      "step": 1185
    },
    {
      "epoch": 0.18073758000609572,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00018832049250969962,
      "loss": 1.0761,
      "step": 1186
    },
    {
      "epoch": 0.18088997256933861,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001882973338033941,
      "loss": 0.9916,
      "step": 1187
    },
    {
      "epoch": 0.18104236513258154,
      "grad_norm": 1.4375,
      "learning_rate": 0.00018827415358658832,
      "loss": 1.0994,
      "step": 1188
    },
    {
      "epoch": 0.18119475769582444,
      "grad_norm": 0.8125,
      "learning_rate": 0.00018825095186492935,
      "loss": 0.9744,
      "step": 1189
    },
    {
      "epoch": 0.18134715025906736,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001882277286440694,
      "loss": 1.1009,
      "step": 1190
    },
    {
      "epoch": 0.18149954282231026,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018820448392966606,
      "loss": 1.0372,
      "step": 1191
    },
    {
      "epoch": 0.1816519353855532,
      "grad_norm": 0.92578125,
      "learning_rate": 0.000188181217727382,
      "loss": 0.924,
      "step": 1192
    },
    {
      "epoch": 0.18180432794879609,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00018815793004288526,
      "loss": 0.9697,
      "step": 1193
    },
    {
      "epoch": 0.181956720512039,
      "grad_norm": 0.67578125,
      "learning_rate": 0.00018813462088184898,
      "loss": 0.8572,
      "step": 1194
    },
    {
      "epoch": 0.18210911307528194,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00018811129024995168,
      "loss": 1.1528,
      "step": 1195
    },
    {
      "epoch": 0.18226150563852483,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00018808793815287703,
      "loss": 1.0202,
      "step": 1196
    },
    {
      "epoch": 0.18241389820176776,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001880645645963139,
      "loss": 1.2665,
      "step": 1197
    },
    {
      "epoch": 0.18256629076501066,
      "grad_norm": 0.765625,
      "learning_rate": 0.00018804116958595643,
      "loss": 0.8104,
      "step": 1198
    },
    {
      "epoch": 0.18271868332825358,
      "grad_norm": 0.8671875,
      "learning_rate": 0.000188017753127504,
      "loss": 0.9092,
      "step": 1199
    },
    {
      "epoch": 0.18287107589149648,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001879943152266612,
      "loss": 0.9655,
      "step": 1200
    },
    {
      "epoch": 0.1830234684547394,
      "grad_norm": 0.734375,
      "learning_rate": 0.00018797085588913783,
      "loss": 0.944,
      "step": 1201
    },
    {
      "epoch": 0.18317586101798233,
      "grad_norm": 0.875,
      "learning_rate": 0.0001879473751206489,
      "loss": 1.018,
      "step": 1202
    },
    {
      "epoch": 0.18332825358122523,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00018792387292691472,
      "loss": 1.0058,
      "step": 1203
    },
    {
      "epoch": 0.18348064614446816,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00018790034931366072,
      "loss": 0.8346,
      "step": 1204
    },
    {
      "epoch": 0.18363303870771105,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018787680428661755,
      "loss": 1.238,
      "step": 1205
    },
    {
      "epoch": 0.18378543127095398,
      "grad_norm": 0.63671875,
      "learning_rate": 0.00018785323785152123,
      "loss": 0.8688,
      "step": 1206
    },
    {
      "epoch": 0.18393782383419688,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001878296500141128,
      "loss": 0.8741,
      "step": 1207
    },
    {
      "epoch": 0.1840902163974398,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001878060407801386,
      "loss": 0.9348,
      "step": 1208
    },
    {
      "epoch": 0.18424260896068273,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001877824101553502,
      "loss": 0.804,
      "step": 1209
    },
    {
      "epoch": 0.18439500152392563,
      "grad_norm": 1.046875,
      "learning_rate": 0.00018775875814550433,
      "loss": 0.9711,
      "step": 1210
    },
    {
      "epoch": 0.18454739408716855,
      "grad_norm": 0.9921875,
      "learning_rate": 0.000187735084756363,
      "loss": 0.7455,
      "step": 1211
    },
    {
      "epoch": 0.18469978665041145,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018771138999369336,
      "loss": 0.9603,
      "step": 1212
    },
    {
      "epoch": 0.18485217921365438,
      "grad_norm": 1.078125,
      "learning_rate": 0.00018768767386326783,
      "loss": 1.0724,
      "step": 1213
    },
    {
      "epoch": 0.18500457177689728,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00018766393637086395,
      "loss": 1.0217,
      "step": 1214
    },
    {
      "epoch": 0.1851569643401402,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018764017752226453,
      "loss": 0.9958,
      "step": 1215
    },
    {
      "epoch": 0.18530935690338313,
      "grad_norm": 1.234375,
      "learning_rate": 0.0001876163973232576,
      "loss": 0.9896,
      "step": 1216
    },
    {
      "epoch": 0.18546174946662602,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00018759259577963633,
      "loss": 1.1134,
      "step": 1217
    },
    {
      "epoch": 0.18561414202986895,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018756877289719914,
      "loss": 0.8647,
      "step": 1218
    },
    {
      "epoch": 0.18576653459311185,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00018754492868174958,
      "loss": 1.1345,
      "step": 1219
    },
    {
      "epoch": 0.18591892715635477,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001875210631390965,
      "loss": 1.1088,
      "step": 1220
    },
    {
      "epoch": 0.18607131971959767,
      "grad_norm": 1.078125,
      "learning_rate": 0.00018749717627505384,
      "loss": 0.9981,
      "step": 1221
    },
    {
      "epoch": 0.1862237122828406,
      "grad_norm": 1.03125,
      "learning_rate": 0.00018747326809544083,
      "loss": 0.9704,
      "step": 1222
    },
    {
      "epoch": 0.18637610484608352,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018744933860608183,
      "loss": 1.0698,
      "step": 1223
    },
    {
      "epoch": 0.18652849740932642,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00018742538781280635,
      "loss": 1.126,
      "step": 1224
    },
    {
      "epoch": 0.18668088997256935,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00018740141572144925,
      "loss": 0.9323,
      "step": 1225
    },
    {
      "epoch": 0.18683328253581225,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001873774223378504,
      "loss": 0.9845,
      "step": 1226
    },
    {
      "epoch": 0.18698567509905517,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018735340766785495,
      "loss": 1.0732,
      "step": 1227
    },
    {
      "epoch": 0.18713806766229807,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00018732937171731326,
      "loss": 0.8705,
      "step": 1228
    },
    {
      "epoch": 0.187290460225541,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00018730531449208078,
      "loss": 1.0518,
      "step": 1229
    },
    {
      "epoch": 0.18744285278878392,
      "grad_norm": 1.0625,
      "learning_rate": 0.00018728123599801824,
      "loss": 0.9513,
      "step": 1230
    },
    {
      "epoch": 0.18759524535202682,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00018725713624099147,
      "loss": 1.0057,
      "step": 1231
    },
    {
      "epoch": 0.18774763791526974,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018723301522687152,
      "loss": 0.9633,
      "step": 1232
    },
    {
      "epoch": 0.18790003047851264,
      "grad_norm": 1.03125,
      "learning_rate": 0.00018720887296153462,
      "loss": 1.0658,
      "step": 1233
    },
    {
      "epoch": 0.18805242304175557,
      "grad_norm": 0.8125,
      "learning_rate": 0.00018718470945086222,
      "loss": 0.7939,
      "step": 1234
    },
    {
      "epoch": 0.18820481560499847,
      "grad_norm": 1.03125,
      "learning_rate": 0.00018716052470074083,
      "loss": 0.9449,
      "step": 1235
    },
    {
      "epoch": 0.1883572081682414,
      "grad_norm": 0.921875,
      "learning_rate": 0.00018713631871706223,
      "loss": 1.0358,
      "step": 1236
    },
    {
      "epoch": 0.18850960073148432,
      "grad_norm": 0.703125,
      "learning_rate": 0.00018711209150572335,
      "loss": 0.8374,
      "step": 1237
    },
    {
      "epoch": 0.18866199329472721,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001870878430726263,
      "loss": 0.6703,
      "step": 1238
    },
    {
      "epoch": 0.18881438585797014,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001870635734236783,
      "loss": 0.8533,
      "step": 1239
    },
    {
      "epoch": 0.18896677842121304,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00018703928256479183,
      "loss": 0.9645,
      "step": 1240
    },
    {
      "epoch": 0.18911917098445596,
      "grad_norm": 1.28125,
      "learning_rate": 0.0001870149705018845,
      "loss": 1.0156,
      "step": 1241
    },
    {
      "epoch": 0.18927156354769886,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00018699063724087904,
      "loss": 0.9378,
      "step": 1242
    },
    {
      "epoch": 0.1894239561109418,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001869662827877034,
      "loss": 0.8677,
      "step": 1243
    },
    {
      "epoch": 0.18957634867418469,
      "grad_norm": 0.73046875,
      "learning_rate": 0.0001869419071482907,
      "loss": 0.8943,
      "step": 1244
    },
    {
      "epoch": 0.1897287412374276,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00018691751032857913,
      "loss": 0.8189,
      "step": 1245
    },
    {
      "epoch": 0.18988113380067054,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018689309233451215,
      "loss": 0.8926,
      "step": 1246
    },
    {
      "epoch": 0.19003352636391344,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018686865317203838,
      "loss": 0.8235,
      "step": 1247
    },
    {
      "epoch": 0.19018591892715636,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00018684419284711146,
      "loss": 0.9717,
      "step": 1248
    },
    {
      "epoch": 0.19033831149039926,
      "grad_norm": 1.109375,
      "learning_rate": 0.00018681971136569035,
      "loss": 1.2108,
      "step": 1249
    },
    {
      "epoch": 0.19049070405364218,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00018679520873373902,
      "loss": 1.1191,
      "step": 1250
    },
    {
      "epoch": 0.19064309661688508,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00018677068495722674,
      "loss": 1.2713,
      "step": 1251
    },
    {
      "epoch": 0.190795489180128,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001867461400421278,
      "loss": 1.0528,
      "step": 1252
    },
    {
      "epoch": 0.19094788174337093,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00018672157399442173,
      "loss": 1.0539,
      "step": 1253
    },
    {
      "epoch": 0.19110027430661383,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00018669698682009315,
      "loss": 0.8696,
      "step": 1254
    },
    {
      "epoch": 0.19125266686985676,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018667237852513187,
      "loss": 0.8748,
      "step": 1255
    },
    {
      "epoch": 0.19140505943309966,
      "grad_norm": 1.078125,
      "learning_rate": 0.00018664774911553278,
      "loss": 1.1254,
      "step": 1256
    },
    {
      "epoch": 0.19155745199634258,
      "grad_norm": 1.03125,
      "learning_rate": 0.000186623098597296,
      "loss": 1.1543,
      "step": 1257
    },
    {
      "epoch": 0.19170984455958548,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00018659842697642677,
      "loss": 0.8906,
      "step": 1258
    },
    {
      "epoch": 0.1918622371228284,
      "grad_norm": 1.109375,
      "learning_rate": 0.00018657373425893535,
      "loss": 0.8176,
      "step": 1259
    },
    {
      "epoch": 0.19201462968607133,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00018654902045083739,
      "loss": 1.1175,
      "step": 1260
    },
    {
      "epoch": 0.19216702224931423,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001865242855581534,
      "loss": 0.9813,
      "step": 1261
    },
    {
      "epoch": 0.19231941481255715,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00018649952958690923,
      "loss": 0.7435,
      "step": 1262
    },
    {
      "epoch": 0.19247180737580005,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00018647475254313575,
      "loss": 0.9855,
      "step": 1263
    },
    {
      "epoch": 0.19262419993904298,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00018644995443286902,
      "loss": 1.0771,
      "step": 1264
    },
    {
      "epoch": 0.19277659250228588,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00018642513526215024,
      "loss": 1.1015,
      "step": 1265
    },
    {
      "epoch": 0.1929289850655288,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00018640029503702568,
      "loss": 1.146,
      "step": 1266
    },
    {
      "epoch": 0.19308137762877173,
      "grad_norm": 0.890625,
      "learning_rate": 0.00018637543376354677,
      "loss": 1.0298,
      "step": 1267
    },
    {
      "epoch": 0.19323377019201463,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00018635055144777013,
      "loss": 1.1742,
      "step": 1268
    },
    {
      "epoch": 0.19338616275525755,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00018632564809575742,
      "loss": 0.8311,
      "step": 1269
    },
    {
      "epoch": 0.19353855531850045,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001863007237135754,
      "loss": 0.8678,
      "step": 1270
    },
    {
      "epoch": 0.19369094788174337,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001862757783072961,
      "loss": 0.8928,
      "step": 1271
    },
    {
      "epoch": 0.19384334044498627,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00018625081188299652,
      "loss": 0.8986,
      "step": 1272
    },
    {
      "epoch": 0.1939957330082292,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018622582444675881,
      "loss": 0.8945,
      "step": 1273
    },
    {
      "epoch": 0.19414812557147212,
      "grad_norm": 0.9375,
      "learning_rate": 0.00018620081600467038,
      "loss": 0.865,
      "step": 1274
    },
    {
      "epoch": 0.19430051813471502,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018617578656282357,
      "loss": 0.9607,
      "step": 1275
    },
    {
      "epoch": 0.19445291069795795,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001861507361273159,
      "loss": 1.0648,
      "step": 1276
    },
    {
      "epoch": 0.19460530326120085,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00018612566470425007,
      "loss": 0.8589,
      "step": 1277
    },
    {
      "epoch": 0.19475769582444377,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001861005722997338,
      "loss": 1.0245,
      "step": 1278
    },
    {
      "epoch": 0.19491008838768667,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00018607545891987997,
      "loss": 0.8495,
      "step": 1279
    },
    {
      "epoch": 0.1950624809509296,
      "grad_norm": 0.75,
      "learning_rate": 0.00018605032457080653,
      "loss": 0.8758,
      "step": 1280
    },
    {
      "epoch": 0.19521487351417252,
      "grad_norm": 1.234375,
      "learning_rate": 0.00018602516925863666,
      "loss": 0.9725,
      "step": 1281
    },
    {
      "epoch": 0.19536726607741542,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001859999929894984,
      "loss": 0.8486,
      "step": 1282
    },
    {
      "epoch": 0.19551965864065834,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00018597479576952522,
      "loss": 1.0588,
      "step": 1283
    },
    {
      "epoch": 0.19567205120390124,
      "grad_norm": 0.96875,
      "learning_rate": 0.00018594957760485544,
      "loss": 0.9923,
      "step": 1284
    },
    {
      "epoch": 0.19582444376714417,
      "grad_norm": 1.140625,
      "learning_rate": 0.00018592433850163256,
      "loss": 1.1754,
      "step": 1285
    },
    {
      "epoch": 0.19597683633038707,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001858990784660052,
      "loss": 1.0674,
      "step": 1286
    },
    {
      "epoch": 0.19612922889363,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001858737975041271,
      "loss": 0.9182,
      "step": 1287
    },
    {
      "epoch": 0.19628162145687292,
      "grad_norm": 0.953125,
      "learning_rate": 0.00018584849562215702,
      "loss": 1.15,
      "step": 1288
    },
    {
      "epoch": 0.19643401402011582,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018582317282625892,
      "loss": 1.0043,
      "step": 1289
    },
    {
      "epoch": 0.19658640658335874,
      "grad_norm": 1.203125,
      "learning_rate": 0.00018579782912260174,
      "loss": 1.0215,
      "step": 1290
    },
    {
      "epoch": 0.19673879914660164,
      "grad_norm": 1.609375,
      "learning_rate": 0.00018577246451735956,
      "loss": 1.1499,
      "step": 1291
    },
    {
      "epoch": 0.19689119170984457,
      "grad_norm": 1.171875,
      "learning_rate": 0.00018574707901671163,
      "loss": 0.9306,
      "step": 1292
    },
    {
      "epoch": 0.19704358427308746,
      "grad_norm": 1.21875,
      "learning_rate": 0.00018572167262684216,
      "loss": 1.1456,
      "step": 1293
    },
    {
      "epoch": 0.1971959768363304,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00018569624535394054,
      "loss": 1.0198,
      "step": 1294
    },
    {
      "epoch": 0.19734836939957331,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00018567079720420123,
      "loss": 1.0451,
      "step": 1295
    },
    {
      "epoch": 0.1975007619628162,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00018564532818382377,
      "loss": 1.193,
      "step": 1296
    },
    {
      "epoch": 0.19765315452605914,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00018561983829901274,
      "loss": 0.853,
      "step": 1297
    },
    {
      "epoch": 0.19780554708930204,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00018559432755597786,
      "loss": 1.0439,
      "step": 1298
    },
    {
      "epoch": 0.19795793965254496,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018556879596093393,
      "loss": 1.0729,
      "step": 1299
    },
    {
      "epoch": 0.19811033221578786,
      "grad_norm": 0.66015625,
      "learning_rate": 0.00018554324352010082,
      "loss": 0.917,
      "step": 1300
    },
    {
      "epoch": 0.19826272477903079,
      "grad_norm": 0.66015625,
      "learning_rate": 0.00018551767023970342,
      "loss": 0.789,
      "step": 1301
    },
    {
      "epoch": 0.19841511734227368,
      "grad_norm": 1.09375,
      "learning_rate": 0.00018549207612597178,
      "loss": 0.9334,
      "step": 1302
    },
    {
      "epoch": 0.1985675099055166,
      "grad_norm": 0.87109375,
      "learning_rate": 0.000185466461185141,
      "loss": 1.1137,
      "step": 1303
    },
    {
      "epoch": 0.19871990246875954,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00018544082542345128,
      "loss": 0.8355,
      "step": 1304
    },
    {
      "epoch": 0.19887229503200243,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001854151688471478,
      "loss": 0.9141,
      "step": 1305
    },
    {
      "epoch": 0.19902468759524536,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00018538949146248092,
      "loss": 1.2592,
      "step": 1306
    },
    {
      "epoch": 0.19917708015848826,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00018536379327570597,
      "loss": 0.8636,
      "step": 1307
    },
    {
      "epoch": 0.19932947272173118,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00018533807429308344,
      "loss": 1.0123,
      "step": 1308
    },
    {
      "epoch": 0.19948186528497408,
      "grad_norm": 0.859375,
      "learning_rate": 0.00018531233452087883,
      "loss": 1.0902,
      "step": 1309
    },
    {
      "epoch": 0.199634257848217,
      "grad_norm": 0.66796875,
      "learning_rate": 0.00018528657396536268,
      "loss": 0.771,
      "step": 1310
    },
    {
      "epoch": 0.19978665041145993,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001852607926328107,
      "loss": 0.9508,
      "step": 1311
    },
    {
      "epoch": 0.19993904297470283,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00018523499052950358,
      "loss": 0.9485,
      "step": 1312
    },
    {
      "epoch": 0.20009143553794576,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018520916766172704,
      "loss": 0.8849,
      "step": 1313
    },
    {
      "epoch": 0.20024382810118865,
      "grad_norm": 1.03125,
      "learning_rate": 0.00018518332403577193,
      "loss": 0.8742,
      "step": 1314
    },
    {
      "epoch": 0.20039622066443158,
      "grad_norm": 1.15625,
      "learning_rate": 0.00018515745965793414,
      "loss": 1.0485,
      "step": 1315
    },
    {
      "epoch": 0.20054861322767448,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00018513157453451458,
      "loss": 0.9675,
      "step": 1316
    },
    {
      "epoch": 0.2007010057909174,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018510566867181925,
      "loss": 0.911,
      "step": 1317
    },
    {
      "epoch": 0.20085339835416033,
      "grad_norm": 1.0625,
      "learning_rate": 0.00018507974207615917,
      "loss": 1.0996,
      "step": 1318
    },
    {
      "epoch": 0.20100579091740323,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018505379475385048,
      "loss": 0.8614,
      "step": 1319
    },
    {
      "epoch": 0.20115818348064615,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018502782671121429,
      "loss": 0.9244,
      "step": 1320
    },
    {
      "epoch": 0.20131057604388905,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00018500183795457678,
      "loss": 0.8821,
      "step": 1321
    },
    {
      "epoch": 0.20146296860713198,
      "grad_norm": 0.67578125,
      "learning_rate": 0.00018497582849026918,
      "loss": 0.7634,
      "step": 1322
    },
    {
      "epoch": 0.20161536117037487,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00018494979832462782,
      "loss": 0.8758,
      "step": 1323
    },
    {
      "epoch": 0.2017677537336178,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00018492374746399397,
      "loss": 1.0884,
      "step": 1324
    },
    {
      "epoch": 0.20192014629686073,
      "grad_norm": 0.97265625,
      "learning_rate": 0.000184897675914714,
      "loss": 0.8277,
      "step": 1325
    },
    {
      "epoch": 0.20207253886010362,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00018487158368313934,
      "loss": 1.1633,
      "step": 1326
    },
    {
      "epoch": 0.20222493142334655,
      "grad_norm": 1.375,
      "learning_rate": 0.00018484547077562643,
      "loss": 1.0727,
      "step": 1327
    },
    {
      "epoch": 0.20237732398658945,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00018481933719853672,
      "loss": 0.9925,
      "step": 1328
    },
    {
      "epoch": 0.20252971654983237,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00018479318295823677,
      "loss": 0.858,
      "step": 1329
    },
    {
      "epoch": 0.20268210911307527,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001847670080610981,
      "loss": 0.983,
      "step": 1330
    },
    {
      "epoch": 0.2028345016763182,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018474081251349726,
      "loss": 0.8707,
      "step": 1331
    },
    {
      "epoch": 0.20298689423956112,
      "grad_norm": 0.9375,
      "learning_rate": 0.00018471459632181595,
      "loss": 1.0513,
      "step": 1332
    },
    {
      "epoch": 0.20313928680280402,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001846883594924407,
      "loss": 0.8628,
      "step": 1333
    },
    {
      "epoch": 0.20329167936604695,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00018466210203176326,
      "loss": 0.9132,
      "step": 1334
    },
    {
      "epoch": 0.20344407192928984,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001846358239461803,
      "loss": 0.8785,
      "step": 1335
    },
    {
      "epoch": 0.20359646449253277,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00018460952524209355,
      "loss": 1.0763,
      "step": 1336
    },
    {
      "epoch": 0.20374885705577567,
      "grad_norm": 1.0625,
      "learning_rate": 0.00018458320592590975,
      "loss": 1.1401,
      "step": 1337
    },
    {
      "epoch": 0.2039012496190186,
      "grad_norm": 1.203125,
      "learning_rate": 0.00018455686600404064,
      "loss": 0.877,
      "step": 1338
    },
    {
      "epoch": 0.20405364218226152,
      "grad_norm": 0.953125,
      "learning_rate": 0.00018453050548290304,
      "loss": 0.9289,
      "step": 1339
    },
    {
      "epoch": 0.20420603474550442,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00018450412436891872,
      "loss": 0.9893,
      "step": 1340
    },
    {
      "epoch": 0.20435842730874734,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001844777226685145,
      "loss": 0.8447,
      "step": 1341
    },
    {
      "epoch": 0.20451081987199024,
      "grad_norm": 1.125,
      "learning_rate": 0.00018445130038812225,
      "loss": 1.0009,
      "step": 1342
    },
    {
      "epoch": 0.20466321243523317,
      "grad_norm": 0.98828125,
      "learning_rate": 0.0001844248575341788,
      "loss": 1.0595,
      "step": 1343
    },
    {
      "epoch": 0.20481560499847606,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00018439839411312598,
      "loss": 0.8842,
      "step": 1344
    },
    {
      "epoch": 0.204967997561719,
      "grad_norm": 0.71484375,
      "learning_rate": 0.0001843719101314107,
      "loss": 0.9226,
      "step": 1345
    },
    {
      "epoch": 0.20512039012496192,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001843454055954848,
      "loss": 0.9231,
      "step": 1346
    },
    {
      "epoch": 0.2052727826882048,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018431888051180522,
      "loss": 0.8647,
      "step": 1347
    },
    {
      "epoch": 0.20542517525144774,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00018429233488683383,
      "loss": 1.0469,
      "step": 1348
    },
    {
      "epoch": 0.20557756781469064,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00018426576872703745,
      "loss": 1.0339,
      "step": 1349
    },
    {
      "epoch": 0.20572996037793356,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001842391820388881,
      "loss": 0.9675,
      "step": 1350
    },
    {
      "epoch": 0.20588235294117646,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001842125748288626,
      "loss": 0.9072,
      "step": 1351
    },
    {
      "epoch": 0.2060347455044194,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00018418594710344289,
      "loss": 0.9135,
      "step": 1352
    },
    {
      "epoch": 0.20618713806766228,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00018415929886911583,
      "loss": 0.9233,
      "step": 1353
    },
    {
      "epoch": 0.2063395306309052,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00018413263013237333,
      "loss": 1.0745,
      "step": 1354
    },
    {
      "epoch": 0.20649192319414814,
      "grad_norm": 1.0625,
      "learning_rate": 0.00018410594089971228,
      "loss": 0.9431,
      "step": 1355
    },
    {
      "epoch": 0.20664431575739103,
      "grad_norm": 0.78125,
      "learning_rate": 0.00018407923117763462,
      "loss": 0.9393,
      "step": 1356
    },
    {
      "epoch": 0.20679670832063396,
      "grad_norm": 1.2890625,
      "learning_rate": 0.0001840525009726471,
      "loss": 1.0608,
      "step": 1357
    },
    {
      "epoch": 0.20694910088387686,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00018402575029126167,
      "loss": 0.9341,
      "step": 1358
    },
    {
      "epoch": 0.20710149344711978,
      "grad_norm": 1.015625,
      "learning_rate": 0.00018399897913999515,
      "loss": 1.0293,
      "step": 1359
    },
    {
      "epoch": 0.20725388601036268,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00018397218752536938,
      "loss": 0.8735,
      "step": 1360
    },
    {
      "epoch": 0.2074062785736056,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001839453754539112,
      "loss": 0.73,
      "step": 1361
    },
    {
      "epoch": 0.20755867113684853,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00018391854293215237,
      "loss": 0.9709,
      "step": 1362
    },
    {
      "epoch": 0.20771106370009143,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00018389168996662975,
      "loss": 1.0392,
      "step": 1363
    },
    {
      "epoch": 0.20786345626333436,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00018386481656388509,
      "loss": 0.9755,
      "step": 1364
    },
    {
      "epoch": 0.20801584882657725,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00018383792273046504,
      "loss": 1.0909,
      "step": 1365
    },
    {
      "epoch": 0.20816824138982018,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00018381100847292148,
      "loss": 0.8962,
      "step": 1366
    },
    {
      "epoch": 0.20832063395306308,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018378407379781098,
      "loss": 0.8493,
      "step": 1367
    },
    {
      "epoch": 0.208473026516306,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00018375711871169528,
      "loss": 1.0421,
      "step": 1368
    },
    {
      "epoch": 0.20862541907954893,
      "grad_norm": 1.171875,
      "learning_rate": 0.00018373014322114103,
      "loss": 1.2846,
      "step": 1369
    },
    {
      "epoch": 0.20877781164279183,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001837031473327198,
      "loss": 0.9815,
      "step": 1370
    },
    {
      "epoch": 0.20893020420603475,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018367613105300823,
      "loss": 0.9537,
      "step": 1371
    },
    {
      "epoch": 0.20908259676927765,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00018364909438858785,
      "loss": 1.0607,
      "step": 1372
    },
    {
      "epoch": 0.20923498933252058,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00018362203734604519,
      "loss": 1.0555,
      "step": 1373
    },
    {
      "epoch": 0.20938738189576347,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001835949599319717,
      "loss": 1.1281,
      "step": 1374
    },
    {
      "epoch": 0.2095397744590064,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018356786215296386,
      "loss": 0.9561,
      "step": 1375
    },
    {
      "epoch": 0.20969216702224933,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00018354074401562306,
      "loss": 0.867,
      "step": 1376
    },
    {
      "epoch": 0.20984455958549222,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001835136055265557,
      "loss": 0.9094,
      "step": 1377
    },
    {
      "epoch": 0.20999695214873515,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001834864466923731,
      "loss": 1.019,
      "step": 1378
    },
    {
      "epoch": 0.21014934471197805,
      "grad_norm": 0.94140625,
      "learning_rate": 0.0001834592675196915,
      "loss": 0.9964,
      "step": 1379
    },
    {
      "epoch": 0.21030173727522097,
      "grad_norm": 0.984375,
      "learning_rate": 0.00018343206801513218,
      "loss": 0.8834,
      "step": 1380
    },
    {
      "epoch": 0.21045412983846387,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00018340484818532132,
      "loss": 1.0293,
      "step": 1381
    },
    {
      "epoch": 0.2106065224017068,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00018337760803689005,
      "loss": 0.8056,
      "step": 1382
    },
    {
      "epoch": 0.21075891496494972,
      "grad_norm": 0.94140625,
      "learning_rate": 0.0001833503475764745,
      "loss": 0.9811,
      "step": 1383
    },
    {
      "epoch": 0.21091130752819262,
      "grad_norm": 1.171875,
      "learning_rate": 0.00018332306681071562,
      "loss": 0.9728,
      "step": 1384
    },
    {
      "epoch": 0.21106370009143555,
      "grad_norm": 1.3515625,
      "learning_rate": 0.00018329576574625952,
      "loss": 1.1979,
      "step": 1385
    },
    {
      "epoch": 0.21121609265467844,
      "grad_norm": 1.328125,
      "learning_rate": 0.00018326844438975703,
      "loss": 1.1107,
      "step": 1386
    },
    {
      "epoch": 0.21136848521792137,
      "grad_norm": 0.921875,
      "learning_rate": 0.00018324110274786407,
      "loss": 0.8826,
      "step": 1387
    },
    {
      "epoch": 0.21152087778116427,
      "grad_norm": 1.21875,
      "learning_rate": 0.00018321374082724145,
      "loss": 0.8894,
      "step": 1388
    },
    {
      "epoch": 0.2116732703444072,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018318635863455495,
      "loss": 0.8778,
      "step": 1389
    },
    {
      "epoch": 0.21182566290765012,
      "grad_norm": 1.015625,
      "learning_rate": 0.00018315895617647523,
      "loss": 0.8801,
      "step": 1390
    },
    {
      "epoch": 0.21197805547089302,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018313153345967794,
      "loss": 1.0799,
      "step": 1391
    },
    {
      "epoch": 0.21213044803413594,
      "grad_norm": 1.140625,
      "learning_rate": 0.00018310409049084365,
      "loss": 0.9917,
      "step": 1392
    },
    {
      "epoch": 0.21228284059737884,
      "grad_norm": 0.9375,
      "learning_rate": 0.00018307662727665784,
      "loss": 1.0476,
      "step": 1393
    },
    {
      "epoch": 0.21243523316062177,
      "grad_norm": 1.015625,
      "learning_rate": 0.000183049143823811,
      "loss": 0.8822,
      "step": 1394
    },
    {
      "epoch": 0.21258762572386466,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001830216401389984,
      "loss": 0.9811,
      "step": 1395
    },
    {
      "epoch": 0.2127400182871076,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00018299411622892044,
      "loss": 0.8537,
      "step": 1396
    },
    {
      "epoch": 0.21289241085035052,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00018296657210028228,
      "loss": 0.8872,
      "step": 1397
    },
    {
      "epoch": 0.21304480341359341,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018293900775979407,
      "loss": 0.847,
      "step": 1398
    },
    {
      "epoch": 0.21319719597683634,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00018291142321417089,
      "loss": 1.0259,
      "step": 1399
    },
    {
      "epoch": 0.21334958854007924,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00018288381847013268,
      "loss": 0.9937,
      "step": 1400
    },
    {
      "epoch": 0.21350198110332216,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00018285619353440447,
      "loss": 0.8376,
      "step": 1401
    },
    {
      "epoch": 0.21365437366656506,
      "grad_norm": 0.75390625,
      "learning_rate": 0.000182828548413716,
      "loss": 0.7529,
      "step": 1402
    },
    {
      "epoch": 0.213806766229808,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00018280088311480201,
      "loss": 1.0318,
      "step": 1403
    },
    {
      "epoch": 0.21395915879305089,
      "grad_norm": 0.73046875,
      "learning_rate": 0.0001827731976444022,
      "loss": 0.9232,
      "step": 1404
    },
    {
      "epoch": 0.2141115513562938,
      "grad_norm": 1.21875,
      "learning_rate": 0.00018274549200926119,
      "loss": 1.0498,
      "step": 1405
    },
    {
      "epoch": 0.21426394391953674,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001827177662161284,
      "loss": 0.9736,
      "step": 1406
    },
    {
      "epoch": 0.21441633648277963,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00018269002027175826,
      "loss": 1.0207,
      "step": 1407
    },
    {
      "epoch": 0.21456872904602256,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00018266225418291004,
      "loss": 0.8419,
      "step": 1408
    },
    {
      "epoch": 0.21472112160926546,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00018263446795634806,
      "loss": 0.9465,
      "step": 1409
    },
    {
      "epoch": 0.21487351417250838,
      "grad_norm": 1.2421875,
      "learning_rate": 0.0001826066615988413,
      "loss": 1.0892,
      "step": 1410
    },
    {
      "epoch": 0.21502590673575128,
      "grad_norm": 0.75,
      "learning_rate": 0.00018257883511716392,
      "loss": 0.9674,
      "step": 1411
    },
    {
      "epoch": 0.2151782992989942,
      "grad_norm": 1.5390625,
      "learning_rate": 0.0001825509885180948,
      "loss": 0.9125,
      "step": 1412
    },
    {
      "epoch": 0.21533069186223713,
      "grad_norm": 0.65234375,
      "learning_rate": 0.00018252312180841776,
      "loss": 0.8395,
      "step": 1413
    },
    {
      "epoch": 0.21548308442548003,
      "grad_norm": 1.046875,
      "learning_rate": 0.00018249523499492157,
      "loss": 1.0138,
      "step": 1414
    },
    {
      "epoch": 0.21563547698872296,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001824673280843998,
      "loss": 1.0316,
      "step": 1415
    },
    {
      "epoch": 0.21578786955196586,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00018243940108365103,
      "loss": 1.0335,
      "step": 1416
    },
    {
      "epoch": 0.21594026211520878,
      "grad_norm": 0.796875,
      "learning_rate": 0.00018241145399947866,
      "loss": 1.0603,
      "step": 1417
    },
    {
      "epoch": 0.21609265467845168,
      "grad_norm": 0.95703125,
      "learning_rate": 0.000182383486838691,
      "loss": 0.899,
      "step": 1418
    },
    {
      "epoch": 0.2162450472416946,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001823554996081012,
      "loss": 0.8955,
      "step": 1419
    },
    {
      "epoch": 0.21639743980493753,
      "grad_norm": 1.234375,
      "learning_rate": 0.00018232749231452745,
      "loss": 1.1219,
      "step": 1420
    },
    {
      "epoch": 0.21654983236818043,
      "grad_norm": 0.96875,
      "learning_rate": 0.00018229946496479266,
      "loss": 1.0375,
      "step": 1421
    },
    {
      "epoch": 0.21670222493142335,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018227141756572472,
      "loss": 0.9874,
      "step": 1422
    },
    {
      "epoch": 0.21685461749466625,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00018224335012415638,
      "loss": 0.8251,
      "step": 1423
    },
    {
      "epoch": 0.21700701005790918,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001822152626469253,
      "loss": 0.8942,
      "step": 1424
    },
    {
      "epoch": 0.21715940262115208,
      "grad_norm": 0.6875,
      "learning_rate": 0.0001821871551408739,
      "loss": 0.9349,
      "step": 1425
    },
    {
      "epoch": 0.217311795184395,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00018215902761284966,
      "loss": 1.0122,
      "step": 1426
    },
    {
      "epoch": 0.21746418774763793,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00018213088006970485,
      "loss": 0.9903,
      "step": 1427
    },
    {
      "epoch": 0.21761658031088082,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00018210271251829657,
      "loss": 0.9684,
      "step": 1428
    },
    {
      "epoch": 0.21776897287412375,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00018207452496548686,
      "loss": 0.8159,
      "step": 1429
    },
    {
      "epoch": 0.21792136543736665,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00018204631741814263,
      "loss": 0.8811,
      "step": 1430
    },
    {
      "epoch": 0.21807375800060957,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00018201808988313562,
      "loss": 0.8273,
      "step": 1431
    },
    {
      "epoch": 0.21822615056385247,
      "grad_norm": 1.390625,
      "learning_rate": 0.00018198984236734246,
      "loss": 1.16,
      "step": 1432
    },
    {
      "epoch": 0.2183785431270954,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001819615748776447,
      "loss": 0.9537,
      "step": 1433
    },
    {
      "epoch": 0.21853093569033832,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00018193328742092865,
      "loss": 0.819,
      "step": 1434
    },
    {
      "epoch": 0.21868332825358122,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00018190498000408554,
      "loss": 1.0011,
      "step": 1435
    },
    {
      "epoch": 0.21883572081682415,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00018187665263401154,
      "loss": 0.9306,
      "step": 1436
    },
    {
      "epoch": 0.21898811338006705,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00018184830531760748,
      "loss": 0.9615,
      "step": 1437
    },
    {
      "epoch": 0.21914050594330997,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00018181993806177927,
      "loss": 0.9333,
      "step": 1438
    },
    {
      "epoch": 0.21929289850655287,
      "grad_norm": 1.3125,
      "learning_rate": 0.0001817915508734376,
      "loss": 1.058,
      "step": 1439
    },
    {
      "epoch": 0.2194452910697958,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001817631437594979,
      "loss": 1.0718,
      "step": 1440
    },
    {
      "epoch": 0.21959768363303872,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00018173471672688064,
      "loss": 0.9299,
      "step": 1441
    },
    {
      "epoch": 0.21975007619628162,
      "grad_norm": 0.89453125,
      "learning_rate": 0.000181706269782511,
      "loss": 1.1096,
      "step": 1442
    },
    {
      "epoch": 0.21990246875952454,
      "grad_norm": 0.6875,
      "learning_rate": 0.00018167780293331908,
      "loss": 0.8553,
      "step": 1443
    },
    {
      "epoch": 0.22005486132276744,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018164931618623985,
      "loss": 0.9137,
      "step": 1444
    },
    {
      "epoch": 0.22020725388601037,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018162080954821304,
      "loss": 0.9826,
      "step": 1445
    },
    {
      "epoch": 0.22035964644925327,
      "grad_norm": 0.90625,
      "learning_rate": 0.00018159228302618328,
      "loss": 0.9994,
      "step": 1446
    },
    {
      "epoch": 0.2205120390124962,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00018156373662710007,
      "loss": 0.9771,
      "step": 1447
    },
    {
      "epoch": 0.22066443157573912,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00018153517035791772,
      "loss": 0.8458,
      "step": 1448
    },
    {
      "epoch": 0.22081682413898202,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00018150658422559537,
      "loss": 0.8468,
      "step": 1449
    },
    {
      "epoch": 0.22096921670222494,
      "grad_norm": 1.0546875,
      "learning_rate": 0.000181477978237097,
      "loss": 1.0287,
      "step": 1450
    },
    {
      "epoch": 0.22112160926546784,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00018144935239939144,
      "loss": 0.8802,
      "step": 1451
    },
    {
      "epoch": 0.22127400182871076,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00018142070671945242,
      "loss": 0.8523,
      "step": 1452
    },
    {
      "epoch": 0.22142639439195366,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00018139204120425838,
      "loss": 1.0805,
      "step": 1453
    },
    {
      "epoch": 0.2215787869551966,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018136335586079266,
      "loss": 1.0354,
      "step": 1454
    },
    {
      "epoch": 0.22173117951843951,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001813346506960434,
      "loss": 0.959,
      "step": 1455
    },
    {
      "epoch": 0.2218835720816824,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00018130592571700364,
      "loss": 1.0225,
      "step": 1456
    },
    {
      "epoch": 0.22203596464492534,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00018127718093067117,
      "loss": 0.9242,
      "step": 1457
    },
    {
      "epoch": 0.22218835720816824,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00018124841634404863,
      "loss": 0.8247,
      "step": 1458
    },
    {
      "epoch": 0.22234074977141116,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00018121963196414352,
      "loss": 0.9464,
      "step": 1459
    },
    {
      "epoch": 0.22249314233465406,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00018119082779796808,
      "loss": 0.9566,
      "step": 1460
    },
    {
      "epoch": 0.22264553489789699,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00018116200385253945,
      "loss": 0.9899,
      "step": 1461
    },
    {
      "epoch": 0.22279792746113988,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00018113316013487955,
      "loss": 0.94,
      "step": 1462
    },
    {
      "epoch": 0.2229503200243828,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00018110429665201515,
      "loss": 0.9018,
      "step": 1463
    },
    {
      "epoch": 0.22310271258762573,
      "grad_norm": 1.21875,
      "learning_rate": 0.00018107541341097773,
      "loss": 1.1917,
      "step": 1464
    },
    {
      "epoch": 0.22325510515086863,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00018104651041880377,
      "loss": 0.9443,
      "step": 1465
    },
    {
      "epoch": 0.22340749771411156,
      "grad_norm": 0.73828125,
      "learning_rate": 0.0001810175876825344,
      "loss": 0.9317,
      "step": 1466
    },
    {
      "epoch": 0.22355989027735446,
      "grad_norm": 1.2421875,
      "learning_rate": 0.0001809886452092156,
      "loss": 0.9975,
      "step": 1467
    },
    {
      "epoch": 0.22371228284059738,
      "grad_norm": 0.75,
      "learning_rate": 0.00018095968300589816,
      "loss": 0.8607,
      "step": 1468
    },
    {
      "epoch": 0.22386467540384028,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00018093070107963778,
      "loss": 0.9481,
      "step": 1469
    },
    {
      "epoch": 0.2240170679670832,
      "grad_norm": 1.6171875,
      "learning_rate": 0.00018090169943749476,
      "loss": 1.1182,
      "step": 1470
    },
    {
      "epoch": 0.22416946053032613,
      "grad_norm": 1.0625,
      "learning_rate": 0.00018087267808653438,
      "loss": 0.9998,
      "step": 1471
    },
    {
      "epoch": 0.22432185309356903,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00018084363703382668,
      "loss": 0.9167,
      "step": 1472
    },
    {
      "epoch": 0.22447424565681195,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001808145762864464,
      "loss": 0.9564,
      "step": 1473
    },
    {
      "epoch": 0.22462663822005485,
      "grad_norm": 1.125,
      "learning_rate": 0.00018078549585147323,
      "loss": 0.9133,
      "step": 1474
    },
    {
      "epoch": 0.22477903078329778,
      "grad_norm": 0.875,
      "learning_rate": 0.00018075639573599155,
      "loss": 1.0478,
      "step": 1475
    },
    {
      "epoch": 0.22493142334654068,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018072727594709056,
      "loss": 1.0209,
      "step": 1476
    },
    {
      "epoch": 0.2250838159097836,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00018069813649186428,
      "loss": 0.9641,
      "step": 1477
    },
    {
      "epoch": 0.22523620847302653,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00018066897737741152,
      "loss": 1.0433,
      "step": 1478
    },
    {
      "epoch": 0.22538860103626943,
      "grad_norm": 1.109375,
      "learning_rate": 0.00018063979861083576,
      "loss": 1.0085,
      "step": 1479
    },
    {
      "epoch": 0.22554099359951235,
      "grad_norm": 1.15625,
      "learning_rate": 0.0001806106001992455,
      "loss": 1.059,
      "step": 1480
    },
    {
      "epoch": 0.22569338616275525,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001805813821497538,
      "loss": 0.9234,
      "step": 1481
    },
    {
      "epoch": 0.22584577872599818,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00018055214446947862,
      "loss": 0.7988,
      "step": 1482
    },
    {
      "epoch": 0.22599817128924107,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001805228871655427,
      "loss": 1.0216,
      "step": 1483
    },
    {
      "epoch": 0.226150563852484,
      "grad_norm": 0.8125,
      "learning_rate": 0.00018049361024507354,
      "loss": 0.8471,
      "step": 1484
    },
    {
      "epoch": 0.22630295641572692,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001804643137152034,
      "loss": 1.1273,
      "step": 1485
    },
    {
      "epoch": 0.22645534897896982,
      "grad_norm": 0.734375,
      "learning_rate": 0.00018043499758306933,
      "loss": 0.9263,
      "step": 1486
    },
    {
      "epoch": 0.22660774154221275,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00018040566185581315,
      "loss": 0.8965,
      "step": 1487
    },
    {
      "epoch": 0.22676013410545565,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00018037630654058154,
      "loss": 1.0788,
      "step": 1488
    },
    {
      "epoch": 0.22691252666869857,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00018034693164452578,
      "loss": 1.0814,
      "step": 1489
    },
    {
      "epoch": 0.22706491923194147,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00018031753717480208,
      "loss": 1.019,
      "step": 1490
    },
    {
      "epoch": 0.2272173117951844,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00018028812313857135,
      "loss": 1.0758,
      "step": 1491
    },
    {
      "epoch": 0.22736970435842732,
      "grad_norm": 1.1875,
      "learning_rate": 0.00018025868954299923,
      "loss": 1.0697,
      "step": 1492
    },
    {
      "epoch": 0.22752209692167022,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00018022923639525625,
      "loss": 0.7896,
      "step": 1493
    },
    {
      "epoch": 0.22767448948491315,
      "grad_norm": 0.859375,
      "learning_rate": 0.00018019976370251749,
      "loss": 0.9168,
      "step": 1494
    },
    {
      "epoch": 0.22782688204815604,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00018017027147196302,
      "loss": 0.8469,
      "step": 1495
    },
    {
      "epoch": 0.22797927461139897,
      "grad_norm": 1.1640625,
      "learning_rate": 0.0001801407597107776,
      "loss": 1.0171,
      "step": 1496
    },
    {
      "epoch": 0.22813166717464187,
      "grad_norm": 0.921875,
      "learning_rate": 0.00018011122842615063,
      "loss": 0.9127,
      "step": 1497
    },
    {
      "epoch": 0.2282840597378848,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001800816776252764,
      "loss": 1.0635,
      "step": 1498
    },
    {
      "epoch": 0.22843645230112772,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001800521073153539,
      "loss": 1.0792,
      "step": 1499
    },
    {
      "epoch": 0.22858884486437062,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00018002251750358692,
      "loss": 0.8944,
      "step": 1500
    },
    {
      "epoch": 0.22874123742761354,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001799929081971839,
      "loss": 0.9673,
      "step": 1501
    },
    {
      "epoch": 0.22889362999085644,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00017996327940335814,
      "loss": 0.9549,
      "step": 1502
    },
    {
      "epoch": 0.22904602255409937,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001799336311293276,
      "loss": 0.9763,
      "step": 1503
    },
    {
      "epoch": 0.22919841511734226,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00017990396338231509,
      "loss": 0.9749,
      "step": 1504
    },
    {
      "epoch": 0.2293508076805852,
      "grad_norm": 0.7890625,
      "learning_rate": 0.000179874276169548,
      "loss": 0.902,
      "step": 1505
    },
    {
      "epoch": 0.22950320024382811,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00017984456949825869,
      "loss": 0.914,
      "step": 1506
    },
    {
      "epoch": 0.229655592807071,
      "grad_norm": 1.5625,
      "learning_rate": 0.00017981484337568402,
      "loss": 1.035,
      "step": 1507
    },
    {
      "epoch": 0.22980798537031394,
      "grad_norm": 0.62890625,
      "learning_rate": 0.0001797850978090658,
      "loss": 0.8431,
      "step": 1508
    },
    {
      "epoch": 0.22996037793355684,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00017975533280565036,
      "loss": 0.8997,
      "step": 1509
    },
    {
      "epoch": 0.23011277049679976,
      "grad_norm": 0.796875,
      "learning_rate": 0.00017972554837268896,
      "loss": 0.8353,
      "step": 1510
    },
    {
      "epoch": 0.23026516306004266,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001796957445174375,
      "loss": 0.9948,
      "step": 1511
    },
    {
      "epoch": 0.23041755562328559,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00017966592124715665,
      "loss": 0.9616,
      "step": 1512
    },
    {
      "epoch": 0.23056994818652848,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00017963607856911175,
      "loss": 0.9121,
      "step": 1513
    },
    {
      "epoch": 0.2307223407497714,
      "grad_norm": 1.015625,
      "learning_rate": 0.00017960621649057293,
      "loss": 0.9786,
      "step": 1514
    },
    {
      "epoch": 0.23087473331301434,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00017957633501881502,
      "loss": 0.9386,
      "step": 1515
    },
    {
      "epoch": 0.23102712587625723,
      "grad_norm": 1.046875,
      "learning_rate": 0.00017954643416111756,
      "loss": 1.0788,
      "step": 1516
    },
    {
      "epoch": 0.23117951843950016,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00017951651392476484,
      "loss": 0.96,
      "step": 1517
    },
    {
      "epoch": 0.23133191100274306,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00017948657431704587,
      "loss": 1.1087,
      "step": 1518
    },
    {
      "epoch": 0.23148430356598598,
      "grad_norm": 1.328125,
      "learning_rate": 0.00017945661534525436,
      "loss": 1.2039,
      "step": 1519
    },
    {
      "epoch": 0.23163669612922888,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00017942663701668875,
      "loss": 0.9487,
      "step": 1520
    },
    {
      "epoch": 0.2317890886924718,
      "grad_norm": 1.21875,
      "learning_rate": 0.00017939663933865218,
      "loss": 0.7033,
      "step": 1521
    },
    {
      "epoch": 0.23194148125571473,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00017936662231845252,
      "loss": 0.979,
      "step": 1522
    },
    {
      "epoch": 0.23209387381895763,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00017933658596340233,
      "loss": 0.8453,
      "step": 1523
    },
    {
      "epoch": 0.23224626638220056,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017930653028081896,
      "loss": 0.9452,
      "step": 1524
    },
    {
      "epoch": 0.23239865894544345,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017927645527802434,
      "loss": 1.1123,
      "step": 1525
    },
    {
      "epoch": 0.23255105150868638,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001792463609623452,
      "loss": 0.9444,
      "step": 1526
    },
    {
      "epoch": 0.23270344407192928,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017921624734111292,
      "loss": 0.8948,
      "step": 1527
    },
    {
      "epoch": 0.2328558366351722,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00017918611442166367,
      "loss": 0.9186,
      "step": 1528
    },
    {
      "epoch": 0.23300822919841513,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00017915596221133826,
      "loss": 0.8643,
      "step": 1529
    },
    {
      "epoch": 0.23316062176165803,
      "grad_norm": 0.796875,
      "learning_rate": 0.00017912579071748214,
      "loss": 1.0141,
      "step": 1530
    },
    {
      "epoch": 0.23331301432490095,
      "grad_norm": 0.796875,
      "learning_rate": 0.00017909559994744553,
      "loss": 0.9966,
      "step": 1531
    },
    {
      "epoch": 0.23346540688814385,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00017906538990858337,
      "loss": 0.9346,
      "step": 1532
    },
    {
      "epoch": 0.23361779945138678,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00017903516060825526,
      "loss": 0.882,
      "step": 1533
    },
    {
      "epoch": 0.23377019201462967,
      "grad_norm": 1.015625,
      "learning_rate": 0.00017900491205382547,
      "loss": 1.0081,
      "step": 1534
    },
    {
      "epoch": 0.2339225845778726,
      "grad_norm": 0.984375,
      "learning_rate": 0.000178974644252663,
      "loss": 0.9634,
      "step": 1535
    },
    {
      "epoch": 0.23407497714111553,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001789443572121415,
      "loss": 0.9923,
      "step": 1536
    },
    {
      "epoch": 0.23422736970435842,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00017891405093963938,
      "loss": 1.0083,
      "step": 1537
    },
    {
      "epoch": 0.23437976226760135,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001788837254425396,
      "loss": 1.0065,
      "step": 1538
    },
    {
      "epoch": 0.23453215483084425,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00017885338072822998,
      "loss": 0.9055,
      "step": 1539
    },
    {
      "epoch": 0.23468454739408717,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00017882301680410283,
      "loss": 0.7768,
      "step": 1540
    },
    {
      "epoch": 0.23483693995733007,
      "grad_norm": 0.984375,
      "learning_rate": 0.00017879263367755533,
      "loss": 1.2049,
      "step": 1541
    },
    {
      "epoch": 0.234989332520573,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001787622313559892,
      "loss": 0.8025,
      "step": 1542
    },
    {
      "epoch": 0.23514172508381592,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00017873180984681088,
      "loss": 0.9629,
      "step": 1543
    },
    {
      "epoch": 0.23529411764705882,
      "grad_norm": 1.125,
      "learning_rate": 0.0001787013691574315,
      "loss": 1.0054,
      "step": 1544
    },
    {
      "epoch": 0.23544651021030175,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00017867090929526684,
      "loss": 0.9629,
      "step": 1545
    },
    {
      "epoch": 0.23559890277354464,
      "grad_norm": 0.99609375,
      "learning_rate": 0.0001786404302677374,
      "loss": 1.0487,
      "step": 1546
    },
    {
      "epoch": 0.23575129533678757,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00017860993208226823,
      "loss": 0.9368,
      "step": 1547
    },
    {
      "epoch": 0.23590368790003047,
      "grad_norm": 0.875,
      "learning_rate": 0.0001785794147462892,
      "loss": 0.8925,
      "step": 1548
    },
    {
      "epoch": 0.2360560804632734,
      "grad_norm": 1.3046875,
      "learning_rate": 0.00017854887826723473,
      "loss": 0.9874,
      "step": 1549
    },
    {
      "epoch": 0.23620847302651632,
      "grad_norm": 0.984375,
      "learning_rate": 0.00017851832265254397,
      "loss": 0.9363,
      "step": 1550
    },
    {
      "epoch": 0.23636086558975922,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00017848774790966073,
      "loss": 1.0128,
      "step": 1551
    },
    {
      "epoch": 0.23651325815300214,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001784571540460334,
      "loss": 0.9005,
      "step": 1552
    },
    {
      "epoch": 0.23666565071624504,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001784265410691151,
      "loss": 0.9022,
      "step": 1553
    },
    {
      "epoch": 0.23681804327948797,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001783959089863636,
      "loss": 1.0453,
      "step": 1554
    },
    {
      "epoch": 0.23697043584273086,
      "grad_norm": 1.3125,
      "learning_rate": 0.0001783652578052413,
      "loss": 0.9652,
      "step": 1555
    },
    {
      "epoch": 0.2371228284059738,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00017833458753321535,
      "loss": 1.1179,
      "step": 1556
    },
    {
      "epoch": 0.23727522096921672,
      "grad_norm": 1.109375,
      "learning_rate": 0.00017830389817775734,
      "loss": 1.1262,
      "step": 1557
    },
    {
      "epoch": 0.2374276135324596,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00017827318974634375,
      "loss": 0.9353,
      "step": 1558
    },
    {
      "epoch": 0.23758000609570254,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00017824246224645553,
      "loss": 1.0866,
      "step": 1559
    },
    {
      "epoch": 0.23773239865894544,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00017821171568557835,
      "loss": 0.8879,
      "step": 1560
    },
    {
      "epoch": 0.23788479122218836,
      "grad_norm": 1.1875,
      "learning_rate": 0.00017818095007120251,
      "loss": 1.1616,
      "step": 1561
    },
    {
      "epoch": 0.23803718378543126,
      "grad_norm": 1.171875,
      "learning_rate": 0.000178150165410823,
      "loss": 1.0278,
      "step": 1562
    },
    {
      "epoch": 0.2381895763486742,
      "grad_norm": 1.109375,
      "learning_rate": 0.0001781193617119394,
      "loss": 0.9744,
      "step": 1563
    },
    {
      "epoch": 0.23834196891191708,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00017808853898205587,
      "loss": 1.04,
      "step": 1564
    },
    {
      "epoch": 0.23849436147516,
      "grad_norm": 0.7890625,
      "learning_rate": 0.0001780576972286813,
      "loss": 0.9686,
      "step": 1565
    },
    {
      "epoch": 0.23864675403840294,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00017802683645932923,
      "loss": 0.9047,
      "step": 1566
    },
    {
      "epoch": 0.23879914660164583,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00017799595668151774,
      "loss": 1.0812,
      "step": 1567
    },
    {
      "epoch": 0.23895153916488876,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00017796505790276958,
      "loss": 0.9854,
      "step": 1568
    },
    {
      "epoch": 0.23910393172813166,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017793414013061217,
      "loss": 0.9088,
      "step": 1569
    },
    {
      "epoch": 0.23925632429137458,
      "grad_norm": 0.875,
      "learning_rate": 0.00017790320337257752,
      "loss": 0.8268,
      "step": 1570
    },
    {
      "epoch": 0.23940871685461748,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00017787224763620227,
      "loss": 0.8564,
      "step": 1571
    },
    {
      "epoch": 0.2395611094178604,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00017784127292902767,
      "loss": 0.9066,
      "step": 1572
    },
    {
      "epoch": 0.23971350198110333,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00017781027925859957,
      "loss": 0.9623,
      "step": 1573
    },
    {
      "epoch": 0.23986589454434623,
      "grad_norm": 0.828125,
      "learning_rate": 0.00017777926663246855,
      "loss": 1.0596,
      "step": 1574
    },
    {
      "epoch": 0.24001828710758916,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00017774823505818966,
      "loss": 1.0284,
      "step": 1575
    },
    {
      "epoch": 0.24017067967083205,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017771718454332271,
      "loss": 1.0527,
      "step": 1576
    },
    {
      "epoch": 0.24032307223407498,
      "grad_norm": 1.109375,
      "learning_rate": 0.00017768611509543197,
      "loss": 0.906,
      "step": 1577
    },
    {
      "epoch": 0.24047546479731788,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017765502672208646,
      "loss": 0.8957,
      "step": 1578
    },
    {
      "epoch": 0.2406278573605608,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00017762391943085975,
      "loss": 0.9245,
      "step": 1579
    },
    {
      "epoch": 0.24078024992380373,
      "grad_norm": 0.765625,
      "learning_rate": 0.00017759279322933003,
      "loss": 0.9338,
      "step": 1580
    },
    {
      "epoch": 0.24093264248704663,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00017756164812508004,
      "loss": 0.8542,
      "step": 1581
    },
    {
      "epoch": 0.24108503505028955,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00017753048412569723,
      "loss": 1.0801,
      "step": 1582
    },
    {
      "epoch": 0.24123742761353245,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00017749930123877355,
      "loss": 0.8874,
      "step": 1583
    },
    {
      "epoch": 0.24138982017677538,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001774680994719057,
      "loss": 0.956,
      "step": 1584
    },
    {
      "epoch": 0.24154221274001828,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00017743687883269474,
      "loss": 0.8918,
      "step": 1585
    },
    {
      "epoch": 0.2416946053032612,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00017740563932874655,
      "loss": 1.1423,
      "step": 1586
    },
    {
      "epoch": 0.24184699786650413,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00017737438096767153,
      "loss": 1.1879,
      "step": 1587
    },
    {
      "epoch": 0.24199939042974702,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001773431037570846,
      "loss": 1.0423,
      "step": 1588
    },
    {
      "epoch": 0.24215178299298995,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001773118077046054,
      "loss": 1.0114,
      "step": 1589
    },
    {
      "epoch": 0.24230417555623285,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001772804928178581,
      "loss": 1.0791,
      "step": 1590
    },
    {
      "epoch": 0.24245656811947577,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00017724915910447142,
      "loss": 0.8669,
      "step": 1591
    },
    {
      "epoch": 0.24260896068271867,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001772178065720787,
      "loss": 0.9045,
      "step": 1592
    },
    {
      "epoch": 0.2427613532459616,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001771864352283179,
      "loss": 1.1583,
      "step": 1593
    },
    {
      "epoch": 0.24291374580920452,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001771550450808315,
      "loss": 1.0271,
      "step": 1594
    },
    {
      "epoch": 0.24306613837244742,
      "grad_norm": 1.078125,
      "learning_rate": 0.00017712363613726665,
      "loss": 1.1676,
      "step": 1595
    },
    {
      "epoch": 0.24321853093569035,
      "grad_norm": 0.9375,
      "learning_rate": 0.00017709220840527495,
      "loss": 1.0027,
      "step": 1596
    },
    {
      "epoch": 0.24337092349893324,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00017706076189251272,
      "loss": 0.8389,
      "step": 1597
    },
    {
      "epoch": 0.24352331606217617,
      "grad_norm": 0.75,
      "learning_rate": 0.00017702929660664069,
      "loss": 0.9726,
      "step": 1598
    },
    {
      "epoch": 0.24367570862541907,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001769978125553243,
      "loss": 0.9564,
      "step": 1599
    },
    {
      "epoch": 0.243828101188662,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00017696630974623357,
      "loss": 0.88,
      "step": 1600
    },
    {
      "epoch": 0.24398049375190492,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00017693478818704293,
      "loss": 1.1458,
      "step": 1601
    },
    {
      "epoch": 0.24413288631514782,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001769032478854316,
      "loss": 0.9292,
      "step": 1602
    },
    {
      "epoch": 0.24428527887839074,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00017687168884908316,
      "loss": 0.9296,
      "step": 1603
    },
    {
      "epoch": 0.24443767144163364,
      "grad_norm": 0.875,
      "learning_rate": 0.00017684011108568592,
      "loss": 1.0308,
      "step": 1604
    },
    {
      "epoch": 0.24459006400487657,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00017680851460293262,
      "loss": 1.0195,
      "step": 1605
    },
    {
      "epoch": 0.24474245656811947,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001767768994085206,
      "loss": 1.0507,
      "step": 1606
    },
    {
      "epoch": 0.2448948491313624,
      "grad_norm": 0.90625,
      "learning_rate": 0.00017674526551015186,
      "loss": 0.9862,
      "step": 1607
    },
    {
      "epoch": 0.24504724169460532,
      "grad_norm": 0.91015625,
      "learning_rate": 0.0001767136129155328,
      "loss": 0.912,
      "step": 1608
    },
    {
      "epoch": 0.24519963425784821,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001766819416323745,
      "loss": 0.9925,
      "step": 1609
    },
    {
      "epoch": 0.24535202682109114,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00017665025166839246,
      "loss": 0.9867,
      "step": 1610
    },
    {
      "epoch": 0.24550441938433404,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00017661854303130693,
      "loss": 0.8582,
      "step": 1611
    },
    {
      "epoch": 0.24565681194757696,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00017658681572884247,
      "loss": 0.8302,
      "step": 1612
    },
    {
      "epoch": 0.24580920451081986,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017655506976872837,
      "loss": 0.9101,
      "step": 1613
    },
    {
      "epoch": 0.2459615970740628,
      "grad_norm": 0.72265625,
      "learning_rate": 0.0001765233051586984,
      "loss": 0.8911,
      "step": 1614
    },
    {
      "epoch": 0.24611398963730569,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00017649152190649087,
      "loss": 1.0264,
      "step": 1615
    },
    {
      "epoch": 0.2462663822005486,
      "grad_norm": 0.984375,
      "learning_rate": 0.00017645972001984866,
      "loss": 0.8498,
      "step": 1616
    },
    {
      "epoch": 0.24641877476379154,
      "grad_norm": 0.796875,
      "learning_rate": 0.0001764278995065191,
      "loss": 0.7893,
      "step": 1617
    },
    {
      "epoch": 0.24657116732703444,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00017639606037425418,
      "loss": 1.1634,
      "step": 1618
    },
    {
      "epoch": 0.24672355989027736,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017636420263081036,
      "loss": 0.9194,
      "step": 1619
    },
    {
      "epoch": 0.24687595245352026,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00017633232628394864,
      "loss": 0.8371,
      "step": 1620
    },
    {
      "epoch": 0.24702834501676318,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00017630043134143458,
      "loss": 1.0084,
      "step": 1621
    },
    {
      "epoch": 0.24718073758000608,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001762685178110382,
      "loss": 0.8587,
      "step": 1622
    },
    {
      "epoch": 0.247333130143249,
      "grad_norm": 1.03125,
      "learning_rate": 0.00017623658570053413,
      "loss": 0.9402,
      "step": 1623
    },
    {
      "epoch": 0.24748552270649193,
      "grad_norm": 0.74609375,
      "learning_rate": 0.0001762046350177015,
      "loss": 0.912,
      "step": 1624
    },
    {
      "epoch": 0.24763791526973483,
      "grad_norm": 0.8046875,
      "learning_rate": 0.0001761726657703239,
      "loss": 0.8737,
      "step": 1625
    },
    {
      "epoch": 0.24779030783297776,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00017614067796618957,
      "loss": 1.0421,
      "step": 1626
    },
    {
      "epoch": 0.24794270039622066,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017610867161309114,
      "loss": 1.1541,
      "step": 1627
    },
    {
      "epoch": 0.24809509295946358,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00017607664671882584,
      "loss": 0.9425,
      "step": 1628
    },
    {
      "epoch": 0.24824748552270648,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001760446032911954,
      "loss": 0.9614,
      "step": 1629
    },
    {
      "epoch": 0.2483998780859494,
      "grad_norm": 0.921875,
      "learning_rate": 0.00017601254133800607,
      "loss": 0.9113,
      "step": 1630
    },
    {
      "epoch": 0.24855227064919233,
      "grad_norm": 1.7890625,
      "learning_rate": 0.00017598046086706858,
      "loss": 0.9731,
      "step": 1631
    },
    {
      "epoch": 0.24870466321243523,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00017594836188619822,
      "loss": 0.9555,
      "step": 1632
    },
    {
      "epoch": 0.24885705577567815,
      "grad_norm": 1.0625,
      "learning_rate": 0.00017591624440321474,
      "loss": 0.9769,
      "step": 1633
    },
    {
      "epoch": 0.24900944833892105,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00017588410842594242,
      "loss": 1.0964,
      "step": 1634
    },
    {
      "epoch": 0.24916184090216398,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00017585195396221007,
      "loss": 0.9775,
      "step": 1635
    },
    {
      "epoch": 0.24931423346540688,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00017581978101985092,
      "loss": 1.1604,
      "step": 1636
    },
    {
      "epoch": 0.2494666260286498,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017578758960670285,
      "loss": 0.896,
      "step": 1637
    },
    {
      "epoch": 0.24961901859189273,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00017575537973060808,
      "loss": 0.9939,
      "step": 1638
    },
    {
      "epoch": 0.24977141115513563,
      "grad_norm": 1.0,
      "learning_rate": 0.00017572315139941343,
      "loss": 1.0054,
      "step": 1639
    },
    {
      "epoch": 0.24992380371837855,
      "grad_norm": 2.453125,
      "learning_rate": 0.0001756909046209702,
      "loss": 0.9819,
      "step": 1640
    },
    {
      "epoch": 0.2500761962816215,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00017565863940313415,
      "loss": 0.9971,
      "step": 1641
    },
    {
      "epoch": 0.2502285888448644,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017562635575376555,
      "loss": 0.9418,
      "step": 1642
    },
    {
      "epoch": 0.2503809814081073,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00017559405368072913,
      "loss": 1.1021,
      "step": 1643
    },
    {
      "epoch": 0.25053337397135017,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00017556173319189418,
      "loss": 1.019,
      "step": 1644
    },
    {
      "epoch": 0.2506857665345931,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00017552939429513447,
      "loss": 0.9083,
      "step": 1645
    },
    {
      "epoch": 0.250838159097836,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001754970369983281,
      "loss": 0.9524,
      "step": 1646
    },
    {
      "epoch": 0.2509905516610789,
      "grad_norm": 1.15625,
      "learning_rate": 0.00017546466130935793,
      "loss": 1.0573,
      "step": 1647
    },
    {
      "epoch": 0.2511429442243219,
      "grad_norm": 1.0,
      "learning_rate": 0.000175432267236111,
      "loss": 1.1177,
      "step": 1648
    },
    {
      "epoch": 0.25129533678756477,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00017539985478647909,
      "loss": 0.851,
      "step": 1649
    },
    {
      "epoch": 0.25144772935080767,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017536742396835825,
      "loss": 1.0222,
      "step": 1650
    },
    {
      "epoch": 0.25160012191405057,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00017533497478964914,
      "loss": 0.87,
      "step": 1651
    },
    {
      "epoch": 0.2517525144772935,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017530250725825687,
      "loss": 0.8178,
      "step": 1652
    },
    {
      "epoch": 0.2519049070405364,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001752700213820909,
      "loss": 0.9789,
      "step": 1653
    },
    {
      "epoch": 0.2520572996037793,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00017523751716906535,
      "loss": 0.9663,
      "step": 1654
    },
    {
      "epoch": 0.25220969216702227,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017520499462709866,
      "loss": 0.9605,
      "step": 1655
    },
    {
      "epoch": 0.25236208473026517,
      "grad_norm": 1.453125,
      "learning_rate": 0.00017517245376411382,
      "loss": 1.0017,
      "step": 1656
    },
    {
      "epoch": 0.25251447729350807,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017513989458803827,
      "loss": 0.9273,
      "step": 1657
    },
    {
      "epoch": 0.25266686985675096,
      "grad_norm": 1.0625,
      "learning_rate": 0.00017510731710680384,
      "loss": 1.1105,
      "step": 1658
    },
    {
      "epoch": 0.2528192624199939,
      "grad_norm": 1.0,
      "learning_rate": 0.00017507472132834693,
      "loss": 1.0021,
      "step": 1659
    },
    {
      "epoch": 0.2529716549832368,
      "grad_norm": 0.890625,
      "learning_rate": 0.00017504210726060828,
      "loss": 1.2184,
      "step": 1660
    },
    {
      "epoch": 0.2531240475464797,
      "grad_norm": 0.71875,
      "learning_rate": 0.00017500947491153317,
      "loss": 0.9095,
      "step": 1661
    },
    {
      "epoch": 0.25327644010972267,
      "grad_norm": 1.109375,
      "learning_rate": 0.00017497682428907135,
      "loss": 1.024,
      "step": 1662
    },
    {
      "epoch": 0.25342883267296556,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001749441554011769,
      "loss": 1.1043,
      "step": 1663
    },
    {
      "epoch": 0.25358122523620846,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00017491146825580854,
      "loss": 0.8228,
      "step": 1664
    },
    {
      "epoch": 0.25373361779945136,
      "grad_norm": 0.890625,
      "learning_rate": 0.00017487876286092919,
      "loss": 0.9716,
      "step": 1665
    },
    {
      "epoch": 0.2538860103626943,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001748460392245065,
      "loss": 0.9718,
      "step": 1666
    },
    {
      "epoch": 0.2540384029259372,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017481329735451233,
      "loss": 1.0192,
      "step": 1667
    },
    {
      "epoch": 0.2541907954891801,
      "grad_norm": 0.953125,
      "learning_rate": 0.00017478053725892306,
      "loss": 0.9965,
      "step": 1668
    },
    {
      "epoch": 0.25434318805242306,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001747477589457196,
      "loss": 1.0117,
      "step": 1669
    },
    {
      "epoch": 0.25449558061566596,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017471496242288712,
      "loss": 0.8184,
      "step": 1670
    },
    {
      "epoch": 0.25464797317890886,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001746821476984154,
      "loss": 0.9865,
      "step": 1671
    },
    {
      "epoch": 0.25480036574215176,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00017464931478029856,
      "loss": 0.8635,
      "step": 1672
    },
    {
      "epoch": 0.2549527583053947,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00017461646367653512,
      "loss": 1.1878,
      "step": 1673
    },
    {
      "epoch": 0.2551051508686376,
      "grad_norm": 1.203125,
      "learning_rate": 0.00017458359439512816,
      "loss": 0.949,
      "step": 1674
    },
    {
      "epoch": 0.2552575434318805,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00017455070694408507,
      "loss": 1.0258,
      "step": 1675
    },
    {
      "epoch": 0.25540993599512346,
      "grad_norm": 1.1796875,
      "learning_rate": 0.0001745178013314177,
      "loss": 0.9815,
      "step": 1676
    },
    {
      "epoch": 0.25556232855836636,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00017448487756514236,
      "loss": 1.0531,
      "step": 1677
    },
    {
      "epoch": 0.25571472112160926,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001744519356532797,
      "loss": 0.9735,
      "step": 1678
    },
    {
      "epoch": 0.25586711368485215,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00017441897560385491,
      "loss": 0.8354,
      "step": 1679
    },
    {
      "epoch": 0.2560195062480951,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001743859974248975,
      "loss": 1.0019,
      "step": 1680
    },
    {
      "epoch": 0.256171898811338,
      "grad_norm": 1.046875,
      "learning_rate": 0.00017435300112444142,
      "loss": 0.8151,
      "step": 1681
    },
    {
      "epoch": 0.2563242913745809,
      "grad_norm": 0.91796875,
      "learning_rate": 0.000174319986710525,
      "loss": 1.0151,
      "step": 1682
    },
    {
      "epoch": 0.25647668393782386,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017428695419119115,
      "loss": 1.1164,
      "step": 1683
    },
    {
      "epoch": 0.25662907650106676,
      "grad_norm": 1.546875,
      "learning_rate": 0.00017425390357448696,
      "loss": 1.1243,
      "step": 1684
    },
    {
      "epoch": 0.25678146906430965,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00017422083486846404,
      "loss": 1.0916,
      "step": 1685
    },
    {
      "epoch": 0.25693386162755255,
      "grad_norm": 0.8125,
      "learning_rate": 0.00017418774808117848,
      "loss": 1.0128,
      "step": 1686
    },
    {
      "epoch": 0.2570862541907955,
      "grad_norm": 1.0,
      "learning_rate": 0.00017415464322069065,
      "loss": 0.9906,
      "step": 1687
    },
    {
      "epoch": 0.2572386467540384,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017412152029506534,
      "loss": 1.1312,
      "step": 1688
    },
    {
      "epoch": 0.2573910393172813,
      "grad_norm": 0.671875,
      "learning_rate": 0.0001740883793123718,
      "loss": 0.9646,
      "step": 1689
    },
    {
      "epoch": 0.25754343188052425,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001740552202806837,
      "loss": 1.003,
      "step": 1690
    },
    {
      "epoch": 0.25769582444376715,
      "grad_norm": 1.0,
      "learning_rate": 0.00017402204320807893,
      "loss": 0.9694,
      "step": 1691
    },
    {
      "epoch": 0.25784821700701005,
      "grad_norm": 1.09375,
      "learning_rate": 0.00017398884810264,
      "loss": 1.0183,
      "step": 1692
    },
    {
      "epoch": 0.25800060957025295,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00017395563497245374,
      "loss": 0.8477,
      "step": 1693
    },
    {
      "epoch": 0.2581530021334959,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00017392240382561124,
      "loss": 0.9458,
      "step": 1694
    },
    {
      "epoch": 0.2583053946967388,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001738891546702082,
      "loss": 0.7956,
      "step": 1695
    },
    {
      "epoch": 0.2584577872599817,
      "grad_norm": 1.2578125,
      "learning_rate": 0.00017385588751434448,
      "loss": 0.8941,
      "step": 1696
    },
    {
      "epoch": 0.25861017982322465,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017382260236612453,
      "loss": 1.1041,
      "step": 1697
    },
    {
      "epoch": 0.25876257238646755,
      "grad_norm": 1.140625,
      "learning_rate": 0.00017378929923365704,
      "loss": 1.0371,
      "step": 1698
    },
    {
      "epoch": 0.25891496494971045,
      "grad_norm": 1.265625,
      "learning_rate": 0.00017375597812505515,
      "loss": 1.035,
      "step": 1699
    },
    {
      "epoch": 0.25906735751295334,
      "grad_norm": 1.3125,
      "learning_rate": 0.00017372263904843636,
      "loss": 1.1454,
      "step": 1700
    },
    {
      "epoch": 0.2592197500761963,
      "grad_norm": 1.15625,
      "learning_rate": 0.00017368928201192256,
      "loss": 0.83,
      "step": 1701
    },
    {
      "epoch": 0.2593721426394392,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00017365590702364,
      "loss": 0.7341,
      "step": 1702
    },
    {
      "epoch": 0.2595245352026821,
      "grad_norm": 0.90625,
      "learning_rate": 0.00017362251409171927,
      "loss": 1.1569,
      "step": 1703
    },
    {
      "epoch": 0.25967692776592505,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001735891032242954,
      "loss": 0.8649,
      "step": 1704
    },
    {
      "epoch": 0.25982932032916795,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017355567442950775,
      "loss": 0.9246,
      "step": 1705
    },
    {
      "epoch": 0.25998171289241084,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00017352222771550008,
      "loss": 1.0831,
      "step": 1706
    },
    {
      "epoch": 0.26013410545565374,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00017348876309042046,
      "loss": 0.8186,
      "step": 1707
    },
    {
      "epoch": 0.2602864980188967,
      "grad_norm": 1.0625,
      "learning_rate": 0.00017345528056242134,
      "loss": 1.0597,
      "step": 1708
    },
    {
      "epoch": 0.2604388905821396,
      "grad_norm": 1.09375,
      "learning_rate": 0.00017342178013965962,
      "loss": 1.1535,
      "step": 1709
    },
    {
      "epoch": 0.2605912831453825,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001733882618302964,
      "loss": 1.105,
      "step": 1710
    },
    {
      "epoch": 0.26074367570862544,
      "grad_norm": 0.84375,
      "learning_rate": 0.00017335472564249726,
      "loss": 0.7851,
      "step": 1711
    },
    {
      "epoch": 0.26089606827186834,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017332117158443212,
      "loss": 0.8551,
      "step": 1712
    },
    {
      "epoch": 0.26104846083511124,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00017328759966427515,
      "loss": 1.0036,
      "step": 1713
    },
    {
      "epoch": 0.26120085339835414,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00017325400989020505,
      "loss": 1.0516,
      "step": 1714
    },
    {
      "epoch": 0.2613532459615971,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001732204022704047,
      "loss": 1.132,
      "step": 1715
    },
    {
      "epoch": 0.26150563852484,
      "grad_norm": 1.015625,
      "learning_rate": 0.00017318677681306145,
      "loss": 1.1818,
      "step": 1716
    },
    {
      "epoch": 0.2616580310880829,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001731531335263669,
      "loss": 1.1359,
      "step": 1717
    },
    {
      "epoch": 0.26181042365132584,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001731194724185171,
      "loss": 0.9953,
      "step": 1718
    },
    {
      "epoch": 0.26196281621456874,
      "grad_norm": 1.015625,
      "learning_rate": 0.00017308579349771232,
      "loss": 1.1109,
      "step": 1719
    },
    {
      "epoch": 0.26211520877781164,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00017305209677215725,
      "loss": 0.9095,
      "step": 1720
    },
    {
      "epoch": 0.26226760134105453,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00017301838225006088,
      "loss": 1.0344,
      "step": 1721
    },
    {
      "epoch": 0.2624199939042975,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00017298464993963658,
      "loss": 0.9752,
      "step": 1722
    },
    {
      "epoch": 0.2625723864675404,
      "grad_norm": 1.0625,
      "learning_rate": 0.00017295089984910205,
      "loss": 0.9109,
      "step": 1723
    },
    {
      "epoch": 0.2627247790307833,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017291713198667923,
      "loss": 0.9614,
      "step": 1724
    },
    {
      "epoch": 0.26287717159402624,
      "grad_norm": 1.1171875,
      "learning_rate": 0.0001728833463605945,
      "loss": 1.0007,
      "step": 1725
    },
    {
      "epoch": 0.26302956415726914,
      "grad_norm": 1.078125,
      "learning_rate": 0.00017284954297907854,
      "loss": 1.1386,
      "step": 1726
    },
    {
      "epoch": 0.26318195672051203,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00017281572185036628,
      "loss": 1.1342,
      "step": 1727
    },
    {
      "epoch": 0.26333434928375493,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00017278188298269708,
      "loss": 0.8078,
      "step": 1728
    },
    {
      "epoch": 0.2634867418469979,
      "grad_norm": 1.125,
      "learning_rate": 0.00017274802638431457,
      "loss": 1.0946,
      "step": 1729
    },
    {
      "epoch": 0.2636391344102408,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00017271415206346666,
      "loss": 0.8825,
      "step": 1730
    },
    {
      "epoch": 0.2637915269734837,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001726802600284057,
      "loss": 0.9463,
      "step": 1731
    },
    {
      "epoch": 0.26394391953672663,
      "grad_norm": 0.69921875,
      "learning_rate": 0.00017264635028738822,
      "loss": 0.856,
      "step": 1732
    },
    {
      "epoch": 0.26409631209996953,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00017261242284867515,
      "loss": 0.845,
      "step": 1733
    },
    {
      "epoch": 0.26424870466321243,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00017257847772053167,
      "loss": 1.0244,
      "step": 1734
    },
    {
      "epoch": 0.26440109722645533,
      "grad_norm": 0.796875,
      "learning_rate": 0.00017254451491122735,
      "loss": 0.9258,
      "step": 1735
    },
    {
      "epoch": 0.2645534897896983,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00017251053442903595,
      "loss": 1.0125,
      "step": 1736
    },
    {
      "epoch": 0.2647058823529412,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00017247653628223568,
      "loss": 0.8394,
      "step": 1737
    },
    {
      "epoch": 0.2648582749161841,
      "grad_norm": 0.703125,
      "learning_rate": 0.00017244252047910892,
      "loss": 0.8561,
      "step": 1738
    },
    {
      "epoch": 0.26501066747942703,
      "grad_norm": 0.96875,
      "learning_rate": 0.00017240848702794242,
      "loss": 0.924,
      "step": 1739
    },
    {
      "epoch": 0.26516306004266993,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001723744359370272,
      "loss": 0.881,
      "step": 1740
    },
    {
      "epoch": 0.2653154526059128,
      "grad_norm": 1.125,
      "learning_rate": 0.00017234036721465868,
      "loss": 0.924,
      "step": 1741
    },
    {
      "epoch": 0.2654678451691557,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00017230628086913643,
      "loss": 1.0039,
      "step": 1742
    },
    {
      "epoch": 0.2656202377323987,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00017227217690876434,
      "loss": 0.8805,
      "step": 1743
    },
    {
      "epoch": 0.2657726302956416,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001722380553418507,
      "loss": 0.9445,
      "step": 1744
    },
    {
      "epoch": 0.2659250228588845,
      "grad_norm": 1.03125,
      "learning_rate": 0.00017220391617670794,
      "loss": 1.153,
      "step": 1745
    },
    {
      "epoch": 0.26607741542212743,
      "grad_norm": 1.046875,
      "learning_rate": 0.00017216975942165293,
      "loss": 0.9761,
      "step": 1746
    },
    {
      "epoch": 0.2662298079853703,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00017213558508500667,
      "loss": 1.0138,
      "step": 1747
    },
    {
      "epoch": 0.2663822005486132,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017210139317509456,
      "loss": 0.9009,
      "step": 1748
    },
    {
      "epoch": 0.2665345931118561,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00017206718370024624,
      "loss": 0.9582,
      "step": 1749
    },
    {
      "epoch": 0.2666869856750991,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00017203295666879564,
      "loss": 0.9337,
      "step": 1750
    },
    {
      "epoch": 0.266839378238342,
      "grad_norm": 0.78125,
      "learning_rate": 0.00017199871208908093,
      "loss": 0.9987,
      "step": 1751
    },
    {
      "epoch": 0.26699177080158487,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00017196444996944458,
      "loss": 0.9423,
      "step": 1752
    },
    {
      "epoch": 0.26714416336482777,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001719301703182334,
      "loss": 1.0518,
      "step": 1753
    },
    {
      "epoch": 0.2672965559280707,
      "grad_norm": 0.99609375,
      "learning_rate": 0.0001718958731437983,
      "loss": 0.9567,
      "step": 1754
    },
    {
      "epoch": 0.2674489484913136,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00017186155845449466,
      "loss": 0.7597,
      "step": 1755
    },
    {
      "epoch": 0.2676013410545565,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00017182722625868198,
      "loss": 0.8712,
      "step": 1756
    },
    {
      "epoch": 0.26775373361779947,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00017179287656472406,
      "loss": 1.025,
      "step": 1757
    },
    {
      "epoch": 0.26790612618104237,
      "grad_norm": 0.7265625,
      "learning_rate": 0.00017175850938098904,
      "loss": 0.9549,
      "step": 1758
    },
    {
      "epoch": 0.26805851874428527,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001717241247158492,
      "loss": 1.12,
      "step": 1759
    },
    {
      "epoch": 0.26821091130752817,
      "grad_norm": 0.765625,
      "learning_rate": 0.00017168972257768122,
      "loss": 1.0141,
      "step": 1760
    },
    {
      "epoch": 0.2683633038707711,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00017165530297486584,
      "loss": 0.9497,
      "step": 1761
    },
    {
      "epoch": 0.268515696434014,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00017162086591578828,
      "loss": 0.9396,
      "step": 1762
    },
    {
      "epoch": 0.2686680889972569,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00017158641140883784,
      "loss": 1.0621,
      "step": 1763
    },
    {
      "epoch": 0.26882048156049987,
      "grad_norm": 1.09375,
      "learning_rate": 0.00017155193946240816,
      "loss": 0.9947,
      "step": 1764
    },
    {
      "epoch": 0.26897287412374277,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00017151745008489708,
      "loss": 1.0051,
      "step": 1765
    },
    {
      "epoch": 0.26912526668698566,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001714829432847067,
      "loss": 1.0302,
      "step": 1766
    },
    {
      "epoch": 0.26927765925022856,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00017144841907024345,
      "loss": 0.8841,
      "step": 1767
    },
    {
      "epoch": 0.2694300518134715,
      "grad_norm": 1.078125,
      "learning_rate": 0.00017141387744991783,
      "loss": 1.0695,
      "step": 1768
    },
    {
      "epoch": 0.2695824443767144,
      "grad_norm": 0.640625,
      "learning_rate": 0.0001713793184321447,
      "loss": 0.8797,
      "step": 1769
    },
    {
      "epoch": 0.2697348369399573,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00017134474202534317,
      "loss": 0.955,
      "step": 1770
    },
    {
      "epoch": 0.26988722950320027,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00017131014823793653,
      "loss": 1.109,
      "step": 1771
    },
    {
      "epoch": 0.27003962206644316,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001712755370783523,
      "loss": 0.9479,
      "step": 1772
    },
    {
      "epoch": 0.27019201462968606,
      "grad_norm": 0.75,
      "learning_rate": 0.00017124090855502228,
      "loss": 1.0118,
      "step": 1773
    },
    {
      "epoch": 0.27034440719292896,
      "grad_norm": 0.77734375,
      "learning_rate": 0.0001712062626763825,
      "loss": 1.0276,
      "step": 1774
    },
    {
      "epoch": 0.2704967997561719,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00017117159945087313,
      "loss": 0.9108,
      "step": 1775
    },
    {
      "epoch": 0.2706491923194148,
      "grad_norm": 0.98828125,
      "learning_rate": 0.0001711369188869387,
      "loss": 0.9012,
      "step": 1776
    },
    {
      "epoch": 0.2708015848826577,
      "grad_norm": 0.90625,
      "learning_rate": 0.00017110222099302785,
      "loss": 0.9801,
      "step": 1777
    },
    {
      "epoch": 0.27095397744590066,
      "grad_norm": 1.1875,
      "learning_rate": 0.0001710675057775935,
      "loss": 0.9498,
      "step": 1778
    },
    {
      "epoch": 0.27110637000914356,
      "grad_norm": 1.171875,
      "learning_rate": 0.00017103277324909274,
      "loss": 0.9362,
      "step": 1779
    },
    {
      "epoch": 0.27125876257238646,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00017099802341598698,
      "loss": 0.8974,
      "step": 1780
    },
    {
      "epoch": 0.27141115513562936,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00017096325628674174,
      "loss": 0.8366,
      "step": 1781
    },
    {
      "epoch": 0.2715635476988723,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00017092847186982678,
      "loss": 0.8378,
      "step": 1782
    },
    {
      "epoch": 0.2717159402621152,
      "grad_norm": 1.140625,
      "learning_rate": 0.00017089367017371614,
      "loss": 0.9529,
      "step": 1783
    },
    {
      "epoch": 0.2718683328253581,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00017085885120688796,
      "loss": 0.9198,
      "step": 1784
    },
    {
      "epoch": 0.27202072538860106,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00017082401497782467,
      "loss": 0.911,
      "step": 1785
    },
    {
      "epoch": 0.27217311795184396,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00017078916149501283,
      "loss": 0.895,
      "step": 1786
    },
    {
      "epoch": 0.27232551051508685,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001707542907669433,
      "loss": 1.0,
      "step": 1787
    },
    {
      "epoch": 0.27247790307832975,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00017071940280211106,
      "loss": 0.8715,
      "step": 1788
    },
    {
      "epoch": 0.2726302956415727,
      "grad_norm": 0.859375,
      "learning_rate": 0.00017068449760901537,
      "loss": 0.8875,
      "step": 1789
    },
    {
      "epoch": 0.2727826882048156,
      "grad_norm": 0.75,
      "learning_rate": 0.0001706495751961596,
      "loss": 0.9023,
      "step": 1790
    },
    {
      "epoch": 0.2729350807680585,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00017061463557205132,
      "loss": 1.1746,
      "step": 1791
    },
    {
      "epoch": 0.27308747333130146,
      "grad_norm": 0.875,
      "learning_rate": 0.00017057967874520234,
      "loss": 0.8575,
      "step": 1792
    },
    {
      "epoch": 0.27323986589454435,
      "grad_norm": 1.15625,
      "learning_rate": 0.00017054470472412873,
      "loss": 1.1059,
      "step": 1793
    },
    {
      "epoch": 0.27339225845778725,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00017050971351735057,
      "loss": 1.0152,
      "step": 1794
    },
    {
      "epoch": 0.27354465102103015,
      "grad_norm": 0.90625,
      "learning_rate": 0.00017047470513339224,
      "loss": 0.9222,
      "step": 1795
    },
    {
      "epoch": 0.2736970435842731,
      "grad_norm": 1.125,
      "learning_rate": 0.00017043967958078229,
      "loss": 0.9303,
      "step": 1796
    },
    {
      "epoch": 0.273849436147516,
      "grad_norm": 0.8125,
      "learning_rate": 0.00017040463686805347,
      "loss": 0.9972,
      "step": 1797
    },
    {
      "epoch": 0.2740018287107589,
      "grad_norm": 0.921875,
      "learning_rate": 0.00017036957700374266,
      "loss": 0.9347,
      "step": 1798
    },
    {
      "epoch": 0.27415422127400185,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00017033449999639096,
      "loss": 1.0211,
      "step": 1799
    },
    {
      "epoch": 0.27430661383724475,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00017029940585454363,
      "loss": 0.9329,
      "step": 1800
    },
    {
      "epoch": 0.27445900640048765,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00017026429458675012,
      "loss": 1.1189,
      "step": 1801
    },
    {
      "epoch": 0.27461139896373055,
      "grad_norm": 0.78515625,
      "learning_rate": 0.000170229166201564,
      "loss": 1.0574,
      "step": 1802
    },
    {
      "epoch": 0.2747637915269735,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001701940207075431,
      "loss": 1.0106,
      "step": 1803
    },
    {
      "epoch": 0.2749161840902164,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00017015885811324936,
      "loss": 0.9786,
      "step": 1804
    },
    {
      "epoch": 0.2750685766534593,
      "grad_norm": 0.890625,
      "learning_rate": 0.00017012367842724887,
      "loss": 0.9726,
      "step": 1805
    },
    {
      "epoch": 0.27522096921670225,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001700884816581119,
      "loss": 1.0195,
      "step": 1806
    },
    {
      "epoch": 0.27537336177994515,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00017005326781441296,
      "loss": 0.9355,
      "step": 1807
    },
    {
      "epoch": 0.27552575434318805,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00017001803690473054,
      "loss": 0.7751,
      "step": 1808
    },
    {
      "epoch": 0.27567814690643094,
      "grad_norm": 0.953125,
      "learning_rate": 0.00016998278893764747,
      "loss": 1.1155,
      "step": 1809
    },
    {
      "epoch": 0.2758305394696739,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00016994752392175067,
      "loss": 1.0138,
      "step": 1810
    },
    {
      "epoch": 0.2759829320329168,
      "grad_norm": 0.90625,
      "learning_rate": 0.00016991224186563116,
      "loss": 0.9881,
      "step": 1811
    },
    {
      "epoch": 0.2761353245961597,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00016987694277788417,
      "loss": 1.0742,
      "step": 1812
    },
    {
      "epoch": 0.27628771715940265,
      "grad_norm": 0.984375,
      "learning_rate": 0.00016984162666710908,
      "loss": 0.9187,
      "step": 1813
    },
    {
      "epoch": 0.27644010972264554,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001698062935419094,
      "loss": 0.8582,
      "step": 1814
    },
    {
      "epoch": 0.27659250228588844,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00016977094341089278,
      "loss": 0.9881,
      "step": 1815
    },
    {
      "epoch": 0.27674489484913134,
      "grad_norm": 1.1875,
      "learning_rate": 0.00016973557628267098,
      "loss": 1.1156,
      "step": 1816
    },
    {
      "epoch": 0.2768972874123743,
      "grad_norm": 0.953125,
      "learning_rate": 0.00016970019216586003,
      "loss": 0.9695,
      "step": 1817
    },
    {
      "epoch": 0.2770496799756172,
      "grad_norm": 0.84375,
      "learning_rate": 0.00016966479106907993,
      "loss": 0.9509,
      "step": 1818
    },
    {
      "epoch": 0.2772020725388601,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00016962937300095494,
      "loss": 0.917,
      "step": 1819
    },
    {
      "epoch": 0.27735446510210304,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001695939379701134,
      "loss": 1.1265,
      "step": 1820
    },
    {
      "epoch": 0.27750685766534594,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001695584859851878,
      "loss": 0.8972,
      "step": 1821
    },
    {
      "epoch": 0.27765925022858884,
      "grad_norm": 1.109375,
      "learning_rate": 0.00016952301705481474,
      "loss": 1.0149,
      "step": 1822
    },
    {
      "epoch": 0.27781164279183174,
      "grad_norm": 1.0,
      "learning_rate": 0.00016948753118763493,
      "loss": 1.0568,
      "step": 1823
    },
    {
      "epoch": 0.2779640353550747,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00016945202839229332,
      "loss": 0.8553,
      "step": 1824
    },
    {
      "epoch": 0.2781164279183176,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016941650867743885,
      "loss": 1.0906,
      "step": 1825
    },
    {
      "epoch": 0.2782688204815605,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00016938097205172463,
      "loss": 0.9797,
      "step": 1826
    },
    {
      "epoch": 0.27842121304480344,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001693454185238079,
      "loss": 1.0004,
      "step": 1827
    },
    {
      "epoch": 0.27857360560804634,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00016930984810235001,
      "loss": 0.8532,
      "step": 1828
    },
    {
      "epoch": 0.27872599817128924,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00016927426079601642,
      "loss": 1.2421,
      "step": 1829
    },
    {
      "epoch": 0.27887839073453213,
      "grad_norm": 1.03125,
      "learning_rate": 0.00016923865661347672,
      "loss": 0.9807,
      "step": 1830
    },
    {
      "epoch": 0.2790307832977751,
      "grad_norm": 1.125,
      "learning_rate": 0.0001692030355634046,
      "loss": 0.8837,
      "step": 1831
    },
    {
      "epoch": 0.279183175861018,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001691673976544779,
      "loss": 1.0841,
      "step": 1832
    },
    {
      "epoch": 0.2793355684242609,
      "grad_norm": 1.203125,
      "learning_rate": 0.00016913174289537845,
      "loss": 1.1572,
      "step": 1833
    },
    {
      "epoch": 0.27948796098750384,
      "grad_norm": 0.94140625,
      "learning_rate": 0.0001690960712947923,
      "loss": 0.9817,
      "step": 1834
    },
    {
      "epoch": 0.27964035355074673,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00016906038286140958,
      "loss": 0.9223,
      "step": 1835
    },
    {
      "epoch": 0.27979274611398963,
      "grad_norm": 1.1875,
      "learning_rate": 0.00016902467760392445,
      "loss": 0.9898,
      "step": 1836
    },
    {
      "epoch": 0.27994513867723253,
      "grad_norm": 0.7421875,
      "learning_rate": 0.0001689889555310353,
      "loss": 0.8672,
      "step": 1837
    },
    {
      "epoch": 0.2800975312404755,
      "grad_norm": 0.8125,
      "learning_rate": 0.0001689532166514445,
      "loss": 0.8578,
      "step": 1838
    },
    {
      "epoch": 0.2802499238037184,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00016891746097385854,
      "loss": 1.1878,
      "step": 1839
    },
    {
      "epoch": 0.2804023163669613,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00016888168850698803,
      "loss": 0.9423,
      "step": 1840
    },
    {
      "epoch": 0.28055470893020423,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001688458992595477,
      "loss": 0.9706,
      "step": 1841
    },
    {
      "epoch": 0.28070710149344713,
      "grad_norm": 0.734375,
      "learning_rate": 0.00016881009324025626,
      "loss": 1.0178,
      "step": 1842
    },
    {
      "epoch": 0.28085949405669003,
      "grad_norm": 1.2421875,
      "learning_rate": 0.0001687742704578366,
      "loss": 1.3708,
      "step": 1843
    },
    {
      "epoch": 0.2810118866199329,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00016873843092101568,
      "loss": 0.9918,
      "step": 1844
    },
    {
      "epoch": 0.2811642791831759,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001687025746385245,
      "loss": 1.0189,
      "step": 1845
    },
    {
      "epoch": 0.2813166717464188,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00016866670161909818,
      "loss": 0.9694,
      "step": 1846
    },
    {
      "epoch": 0.2814690643096617,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00016863081187147588,
      "loss": 1.0081,
      "step": 1847
    },
    {
      "epoch": 0.28162145687290463,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00016859490540440094,
      "loss": 0.8408,
      "step": 1848
    },
    {
      "epoch": 0.2817738494361475,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00016855898222662056,
      "loss": 1.0148,
      "step": 1849
    },
    {
      "epoch": 0.2819262419993904,
      "grad_norm": 1.140625,
      "learning_rate": 0.00016852304234688626,
      "loss": 1.0662,
      "step": 1850
    },
    {
      "epoch": 0.2820786345626333,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00016848708577395344,
      "loss": 1.0166,
      "step": 1851
    },
    {
      "epoch": 0.2822310271258763,
      "grad_norm": 1.125,
      "learning_rate": 0.00016845111251658168,
      "loss": 1.1271,
      "step": 1852
    },
    {
      "epoch": 0.2823834196891192,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00016841512258353457,
      "loss": 0.898,
      "step": 1853
    },
    {
      "epoch": 0.2825358122523621,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00016837911598357975,
      "loss": 0.9668,
      "step": 1854
    },
    {
      "epoch": 0.28268820481560497,
      "grad_norm": 0.91015625,
      "learning_rate": 0.000168343092725489,
      "loss": 0.8432,
      "step": 1855
    },
    {
      "epoch": 0.2828405973788479,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016830705281803803,
      "loss": 1.1934,
      "step": 1856
    },
    {
      "epoch": 0.2829929899420908,
      "grad_norm": 1.234375,
      "learning_rate": 0.00016827099627000672,
      "loss": 1.0353,
      "step": 1857
    },
    {
      "epoch": 0.2831453825053337,
      "grad_norm": 0.66015625,
      "learning_rate": 0.000168234923090179,
      "loss": 0.7536,
      "step": 1858
    },
    {
      "epoch": 0.2832977750685767,
      "grad_norm": 0.828125,
      "learning_rate": 0.00016819883328734276,
      "loss": 0.9744,
      "step": 1859
    },
    {
      "epoch": 0.28345016763181957,
      "grad_norm": 1.109375,
      "learning_rate": 0.00016816272687029,
      "loss": 1.0207,
      "step": 1860
    },
    {
      "epoch": 0.28360256019506247,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001681266038478168,
      "loss": 1.0395,
      "step": 1861
    },
    {
      "epoch": 0.28375495275830537,
      "grad_norm": 0.921875,
      "learning_rate": 0.00016809046422872322,
      "loss": 0.9806,
      "step": 1862
    },
    {
      "epoch": 0.2839073453215483,
      "grad_norm": 1.296875,
      "learning_rate": 0.00016805430802181338,
      "loss": 1.1544,
      "step": 1863
    },
    {
      "epoch": 0.2840597378847912,
      "grad_norm": 1.140625,
      "learning_rate": 0.00016801813523589546,
      "loss": 0.9385,
      "step": 1864
    },
    {
      "epoch": 0.2842121304480341,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00016798194587978174,
      "loss": 0.9634,
      "step": 1865
    },
    {
      "epoch": 0.28436452301127707,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00016794573996228835,
      "loss": 0.8097,
      "step": 1866
    },
    {
      "epoch": 0.28451691557451997,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00016790951749223564,
      "loss": 0.9404,
      "step": 1867
    },
    {
      "epoch": 0.28466930813776287,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001678732784784479,
      "loss": 1.0695,
      "step": 1868
    },
    {
      "epoch": 0.28482170070100576,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001678370229297535,
      "loss": 1.03,
      "step": 1869
    },
    {
      "epoch": 0.2849740932642487,
      "grad_norm": 1.21875,
      "learning_rate": 0.00016780075085498478,
      "loss": 1.0956,
      "step": 1870
    },
    {
      "epoch": 0.2851264858274916,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00016776446226297818,
      "loss": 1.1086,
      "step": 1871
    },
    {
      "epoch": 0.2852788783907345,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00016772815716257412,
      "loss": 0.933,
      "step": 1872
    },
    {
      "epoch": 0.28543127095397747,
      "grad_norm": 0.8671875,
      "learning_rate": 0.000167691835562617,
      "loss": 1.1177,
      "step": 1873
    },
    {
      "epoch": 0.28558366351722037,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001676554974719553,
      "loss": 0.8693,
      "step": 1874
    },
    {
      "epoch": 0.28573605608046326,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00016761914289944156,
      "loss": 0.9128,
      "step": 1875
    },
    {
      "epoch": 0.28588844864370616,
      "grad_norm": 0.6484375,
      "learning_rate": 0.0001675827718539322,
      "loss": 0.8868,
      "step": 1876
    },
    {
      "epoch": 0.2860408412069491,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00016754638434428776,
      "loss": 1.1106,
      "step": 1877
    },
    {
      "epoch": 0.286193233770192,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00016750998037937275,
      "loss": 0.9169,
      "step": 1878
    },
    {
      "epoch": 0.2863456263334349,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00016747355996805572,
      "loss": 0.9442,
      "step": 1879
    },
    {
      "epoch": 0.28649801889667786,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00016743712311920918,
      "loss": 0.9695,
      "step": 1880
    },
    {
      "epoch": 0.28665041145992076,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001674006698417097,
      "loss": 1.1781,
      "step": 1881
    },
    {
      "epoch": 0.28680280402316366,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001673642001444378,
      "loss": 0.8693,
      "step": 1882
    },
    {
      "epoch": 0.28695519658640656,
      "grad_norm": 0.953125,
      "learning_rate": 0.00016732771403627804,
      "loss": 0.9957,
      "step": 1883
    },
    {
      "epoch": 0.2871075891496495,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00016729121152611894,
      "loss": 0.9773,
      "step": 1884
    },
    {
      "epoch": 0.2872599817128924,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00016725469262285304,
      "loss": 1.0515,
      "step": 1885
    },
    {
      "epoch": 0.2874123742761353,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001672181573353769,
      "loss": 0.9477,
      "step": 1886
    },
    {
      "epoch": 0.28756476683937826,
      "grad_norm": 1.0234375,
      "learning_rate": 0.000167181605672591,
      "loss": 0.919,
      "step": 1887
    },
    {
      "epoch": 0.28771715940262116,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016714503764339987,
      "loss": 0.9373,
      "step": 1888
    },
    {
      "epoch": 0.28786955196586406,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00016710845325671202,
      "loss": 0.7895,
      "step": 1889
    },
    {
      "epoch": 0.28802194452910695,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00016707185252143992,
      "loss": 0.9105,
      "step": 1890
    },
    {
      "epoch": 0.2881743370923499,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001670352354465,
      "loss": 1.0373,
      "step": 1891
    },
    {
      "epoch": 0.2883267296555928,
      "grad_norm": 1.0625,
      "learning_rate": 0.00016699860204081278,
      "loss": 1.1256,
      "step": 1892
    },
    {
      "epoch": 0.2884791222188357,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00016696195231330263,
      "loss": 0.7764,
      "step": 1893
    },
    {
      "epoch": 0.28863151478207866,
      "grad_norm": 0.984375,
      "learning_rate": 0.00016692528627289797,
      "loss": 1.068,
      "step": 1894
    },
    {
      "epoch": 0.28878390734532156,
      "grad_norm": 1.0625,
      "learning_rate": 0.00016688860392853117,
      "loss": 0.9355,
      "step": 1895
    },
    {
      "epoch": 0.28893629990856445,
      "grad_norm": 1.03125,
      "learning_rate": 0.00016685190528913858,
      "loss": 0.9284,
      "step": 1896
    },
    {
      "epoch": 0.28908869247180735,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001668151903636605,
      "loss": 0.9395,
      "step": 1897
    },
    {
      "epoch": 0.2892410850350503,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00016677845916104124,
      "loss": 0.9352,
      "step": 1898
    },
    {
      "epoch": 0.2893934775982932,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00016674171169022906,
      "loss": 0.8999,
      "step": 1899
    },
    {
      "epoch": 0.2895458701615361,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00016670494796017612,
      "loss": 0.941,
      "step": 1900
    },
    {
      "epoch": 0.28969826272477905,
      "grad_norm": 1.0,
      "learning_rate": 0.00016666816797983861,
      "loss": 1.122,
      "step": 1901
    },
    {
      "epoch": 0.28985065528802195,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00016663137175817672,
      "loss": 0.9822,
      "step": 1902
    },
    {
      "epoch": 0.29000304785126485,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00016659455930415445,
      "loss": 0.9058,
      "step": 1903
    },
    {
      "epoch": 0.29015544041450775,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001665577306267399,
      "loss": 0.8858,
      "step": 1904
    },
    {
      "epoch": 0.2903078329777507,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00016652088573490504,
      "loss": 1.0109,
      "step": 1905
    },
    {
      "epoch": 0.2904602255409936,
      "grad_norm": 1.15625,
      "learning_rate": 0.00016648402463762584,
      "loss": 0.9863,
      "step": 1906
    },
    {
      "epoch": 0.2906126181042365,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00016644714734388217,
      "loss": 1.0401,
      "step": 1907
    },
    {
      "epoch": 0.29076501066747945,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001664102538626579,
      "loss": 1.0312,
      "step": 1908
    },
    {
      "epoch": 0.29091740323072235,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001663733442029408,
      "loss": 0.912,
      "step": 1909
    },
    {
      "epoch": 0.29106979579396525,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00016633641837372255,
      "loss": 1.0489,
      "step": 1910
    },
    {
      "epoch": 0.29122218835720814,
      "grad_norm": 1.0625,
      "learning_rate": 0.00016629947638399886,
      "loss": 0.992,
      "step": 1911
    },
    {
      "epoch": 0.2913745809204511,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00016626251824276934,
      "loss": 1.1532,
      "step": 1912
    },
    {
      "epoch": 0.291526973483694,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001662255439590375,
      "loss": 0.9769,
      "step": 1913
    },
    {
      "epoch": 0.2916793660469369,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001661885535418108,
      "loss": 1.0992,
      "step": 1914
    },
    {
      "epoch": 0.29183175861017985,
      "grad_norm": 1.0,
      "learning_rate": 0.00016615154700010064,
      "loss": 0.9795,
      "step": 1915
    },
    {
      "epoch": 0.29198415117342275,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00016611452434292238,
      "loss": 0.924,
      "step": 1916
    },
    {
      "epoch": 0.29213654373666564,
      "grad_norm": 1.2890625,
      "learning_rate": 0.0001660774855792952,
      "loss": 1.1058,
      "step": 1917
    },
    {
      "epoch": 0.29228893629990854,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00016604043071824238,
      "loss": 0.9733,
      "step": 1918
    },
    {
      "epoch": 0.2924413288631515,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00016600335976879095,
      "loss": 0.9201,
      "step": 1919
    },
    {
      "epoch": 0.2925937214263944,
      "grad_norm": 0.90625,
      "learning_rate": 0.00016596627273997189,
      "loss": 0.8746,
      "step": 1920
    },
    {
      "epoch": 0.2927461139896373,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00016592916964082018,
      "loss": 0.8913,
      "step": 1921
    },
    {
      "epoch": 0.29289850655288024,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001658920504803747,
      "loss": 1.1688,
      "step": 1922
    },
    {
      "epoch": 0.29305089911612314,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001658549152676782,
      "loss": 0.9868,
      "step": 1923
    },
    {
      "epoch": 0.29320329167936604,
      "grad_norm": 0.96875,
      "learning_rate": 0.00016581776401177728,
      "loss": 1.0985,
      "step": 1924
    },
    {
      "epoch": 0.29335568424260894,
      "grad_norm": 0.6875,
      "learning_rate": 0.0001657805967217226,
      "loss": 0.8165,
      "step": 1925
    },
    {
      "epoch": 0.2935080768058519,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001657434134065686,
      "loss": 0.8758,
      "step": 1926
    },
    {
      "epoch": 0.2936604693690948,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001657062140753737,
      "loss": 0.9521,
      "step": 1927
    },
    {
      "epoch": 0.2938128619323377,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00016566899873720017,
      "loss": 0.913,
      "step": 1928
    },
    {
      "epoch": 0.29396525449558064,
      "grad_norm": 0.63671875,
      "learning_rate": 0.0001656317674011142,
      "loss": 0.8441,
      "step": 1929
    },
    {
      "epoch": 0.29411764705882354,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00016559452007618592,
      "loss": 0.8799,
      "step": 1930
    },
    {
      "epoch": 0.29427003962206644,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00016555725677148926,
      "loss": 1.0562,
      "step": 1931
    },
    {
      "epoch": 0.29442243218530934,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001655199774961021,
      "loss": 1.1616,
      "step": 1932
    },
    {
      "epoch": 0.2945748247485523,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00016548268225910628,
      "loss": 0.9565,
      "step": 1933
    },
    {
      "epoch": 0.2947272173117952,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00016544537106958734,
      "loss": 0.889,
      "step": 1934
    },
    {
      "epoch": 0.2948796098750381,
      "grad_norm": 1.0625,
      "learning_rate": 0.00016540804393663493,
      "loss": 0.9072,
      "step": 1935
    },
    {
      "epoch": 0.29503200243828104,
      "grad_norm": 0.921875,
      "learning_rate": 0.00016537070086934243,
      "loss": 0.8297,
      "step": 1936
    },
    {
      "epoch": 0.29518439500152394,
      "grad_norm": 1.078125,
      "learning_rate": 0.00016533334187680715,
      "loss": 1.2225,
      "step": 1937
    },
    {
      "epoch": 0.29533678756476683,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00016529596696813028,
      "loss": 0.937,
      "step": 1938
    },
    {
      "epoch": 0.29548918012800973,
      "grad_norm": 1.0,
      "learning_rate": 0.00016525857615241687,
      "loss": 1.1855,
      "step": 1939
    },
    {
      "epoch": 0.2956415726912527,
      "grad_norm": 1.015625,
      "learning_rate": 0.00016522116943877587,
      "loss": 0.9357,
      "step": 1940
    },
    {
      "epoch": 0.2957939652544956,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00016518374683632012,
      "loss": 0.9311,
      "step": 1941
    },
    {
      "epoch": 0.2959463578177385,
      "grad_norm": 1.0,
      "learning_rate": 0.00016514630835416628,
      "loss": 1.0427,
      "step": 1942
    },
    {
      "epoch": 0.29609875038098143,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00016510885400143486,
      "loss": 0.9362,
      "step": 1943
    },
    {
      "epoch": 0.29625114294422433,
      "grad_norm": 1.0,
      "learning_rate": 0.00016507138378725039,
      "loss": 0.9166,
      "step": 1944
    },
    {
      "epoch": 0.29640353550746723,
      "grad_norm": 1.03125,
      "learning_rate": 0.00016503389772074104,
      "loss": 1.0222,
      "step": 1945
    },
    {
      "epoch": 0.29655592807071013,
      "grad_norm": 0.9609375,
      "learning_rate": 0.000164996395811039,
      "loss": 1.0539,
      "step": 1946
    },
    {
      "epoch": 0.2967083206339531,
      "grad_norm": 1.140625,
      "learning_rate": 0.0001649588780672803,
      "loss": 0.8941,
      "step": 1947
    },
    {
      "epoch": 0.296860713197196,
      "grad_norm": 1.140625,
      "learning_rate": 0.00016492134449860478,
      "loss": 0.9779,
      "step": 1948
    },
    {
      "epoch": 0.2970131057604389,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001648837951141561,
      "loss": 1.0436,
      "step": 1949
    },
    {
      "epoch": 0.29716549832368183,
      "grad_norm": 1.234375,
      "learning_rate": 0.00016484622992308197,
      "loss": 1.0463,
      "step": 1950
    },
    {
      "epoch": 0.29731789088692473,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00016480864893453364,
      "loss": 0.883,
      "step": 1951
    },
    {
      "epoch": 0.2974702834501676,
      "grad_norm": 0.83203125,
      "learning_rate": 0.0001647710521576665,
      "loss": 0.8843,
      "step": 1952
    },
    {
      "epoch": 0.2976226760134105,
      "grad_norm": 1.171875,
      "learning_rate": 0.00016473343960163965,
      "loss": 1.0599,
      "step": 1953
    },
    {
      "epoch": 0.2977750685766535,
      "grad_norm": 1.015625,
      "learning_rate": 0.00016469581127561602,
      "loss": 1.101,
      "step": 1954
    },
    {
      "epoch": 0.2979274611398964,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001646581671887624,
      "loss": 0.9951,
      "step": 1955
    },
    {
      "epoch": 0.2980798537031393,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00016462050735024946,
      "loss": 1.0259,
      "step": 1956
    },
    {
      "epoch": 0.29823224626638223,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00016458283176925168,
      "loss": 0.8938,
      "step": 1957
    },
    {
      "epoch": 0.2983846388296251,
      "grad_norm": 0.875,
      "learning_rate": 0.00016454514045494736,
      "loss": 1.0621,
      "step": 1958
    },
    {
      "epoch": 0.298537031392868,
      "grad_norm": 0.765625,
      "learning_rate": 0.00016450743341651865,
      "loss": 0.916,
      "step": 1959
    },
    {
      "epoch": 0.2986894239561109,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00016446971066315152,
      "loss": 0.9685,
      "step": 1960
    },
    {
      "epoch": 0.2988418165193539,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001644319722040358,
      "loss": 0.9983,
      "step": 1961
    },
    {
      "epoch": 0.2989942090825968,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001643942180483651,
      "loss": 0.903,
      "step": 1962
    },
    {
      "epoch": 0.29914660164583967,
      "grad_norm": 0.890625,
      "learning_rate": 0.00016435644820533683,
      "loss": 1.1286,
      "step": 1963
    },
    {
      "epoch": 0.29929899420908257,
      "grad_norm": 1.046875,
      "learning_rate": 0.00016431866268415237,
      "loss": 1.0286,
      "step": 1964
    },
    {
      "epoch": 0.2994513867723255,
      "grad_norm": 0.859375,
      "learning_rate": 0.00016428086149401674,
      "loss": 0.8815,
      "step": 1965
    },
    {
      "epoch": 0.2996037793355684,
      "grad_norm": 0.859375,
      "learning_rate": 0.00016424304464413884,
      "loss": 1.0089,
      "step": 1966
    },
    {
      "epoch": 0.2997561718988113,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00016420521214373151,
      "loss": 0.9408,
      "step": 1967
    },
    {
      "epoch": 0.2999085644620543,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00016416736400201115,
      "loss": 0.9505,
      "step": 1968
    },
    {
      "epoch": 0.30006095702529717,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001641295002281982,
      "loss": 1.0111,
      "step": 1969
    },
    {
      "epoch": 0.30021334958854007,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00016409162083151675,
      "loss": 0.9345,
      "step": 1970
    },
    {
      "epoch": 0.30036574215178297,
      "grad_norm": 1.25,
      "learning_rate": 0.0001640537258211949,
      "loss": 1.0944,
      "step": 1971
    },
    {
      "epoch": 0.3005181347150259,
      "grad_norm": 1.21875,
      "learning_rate": 0.00016401581520646428,
      "loss": 0.9986,
      "step": 1972
    },
    {
      "epoch": 0.3006705272782688,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001639778889965605,
      "loss": 0.8688,
      "step": 1973
    },
    {
      "epoch": 0.3008229198415117,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00016393994720072298,
      "loss": 0.9315,
      "step": 1974
    },
    {
      "epoch": 0.30097531240475467,
      "grad_norm": 1.078125,
      "learning_rate": 0.00016390198982819482,
      "loss": 0.9375,
      "step": 1975
    },
    {
      "epoch": 0.30112770496799757,
      "grad_norm": 1.078125,
      "learning_rate": 0.00016386401688822303,
      "loss": 1.1136,
      "step": 1976
    },
    {
      "epoch": 0.30128009753124046,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001638260283900583,
      "loss": 1.0971,
      "step": 1977
    },
    {
      "epoch": 0.30143249009448336,
      "grad_norm": 0.953125,
      "learning_rate": 0.00016378802434295525,
      "loss": 1.0871,
      "step": 1978
    },
    {
      "epoch": 0.3015848826577263,
      "grad_norm": 1.2265625,
      "learning_rate": 0.0001637500047561722,
      "loss": 0.9948,
      "step": 1979
    },
    {
      "epoch": 0.3017372752209692,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00016371196963897125,
      "loss": 1.033,
      "step": 1980
    },
    {
      "epoch": 0.3018896677842121,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00016367391900061828,
      "loss": 1.0619,
      "step": 1981
    },
    {
      "epoch": 0.30204206034745507,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00016363585285038298,
      "loss": 1.0193,
      "step": 1982
    },
    {
      "epoch": 0.30219445291069796,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00016359777119753885,
      "loss": 1.0346,
      "step": 1983
    },
    {
      "epoch": 0.30234684547394086,
      "grad_norm": 1.3515625,
      "learning_rate": 0.0001635596740513631,
      "loss": 0.9046,
      "step": 1984
    },
    {
      "epoch": 0.30249923803718376,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00016352156142113673,
      "loss": 0.9945,
      "step": 1985
    },
    {
      "epoch": 0.3026516306004267,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00016348343331614457,
      "loss": 0.9615,
      "step": 1986
    },
    {
      "epoch": 0.3028040231636696,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00016344528974567512,
      "loss": 0.9577,
      "step": 1987
    },
    {
      "epoch": 0.3029564157269125,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00016340713071902077,
      "loss": 1.0418,
      "step": 1988
    },
    {
      "epoch": 0.30310880829015546,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00016336895624547752,
      "loss": 1.2177,
      "step": 1989
    },
    {
      "epoch": 0.30326120085339836,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00016333076633434526,
      "loss": 0.8353,
      "step": 1990
    },
    {
      "epoch": 0.30341359341664126,
      "grad_norm": 1.015625,
      "learning_rate": 0.00016329256099492764,
      "loss": 0.9843,
      "step": 1991
    },
    {
      "epoch": 0.30356598597988416,
      "grad_norm": 0.8359375,
      "learning_rate": 0.000163254340236532,
      "loss": 1.1242,
      "step": 1992
    },
    {
      "epoch": 0.3037183785431271,
      "grad_norm": 0.765625,
      "learning_rate": 0.00016321610406846944,
      "loss": 0.8839,
      "step": 1993
    },
    {
      "epoch": 0.30387077110637,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00016317785250005487,
      "loss": 0.9226,
      "step": 1994
    },
    {
      "epoch": 0.3040231636696129,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00016313958554060694,
      "loss": 0.9523,
      "step": 1995
    },
    {
      "epoch": 0.30417555623285586,
      "grad_norm": 0.91796875,
      "learning_rate": 0.000163101303199448,
      "loss": 1.0158,
      "step": 1996
    },
    {
      "epoch": 0.30432794879609876,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00016306300548590415,
      "loss": 1.0271,
      "step": 1997
    },
    {
      "epoch": 0.30448034135934166,
      "grad_norm": 0.96875,
      "learning_rate": 0.00016302469240930532,
      "loss": 0.8991,
      "step": 1998
    },
    {
      "epoch": 0.30463273392258455,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00016298636397898514,
      "loss": 0.8481,
      "step": 1999
    },
    {
      "epoch": 0.3047851264858275,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00016294802020428093,
      "loss": 1.1793,
      "step": 2000
    },
    {
      "epoch": 0.3049375190490704,
      "grad_norm": 0.875,
      "learning_rate": 0.00016290966109453375,
      "loss": 0.9343,
      "step": 2001
    },
    {
      "epoch": 0.3050899116123133,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001628712866590885,
      "loss": 0.8172,
      "step": 2002
    },
    {
      "epoch": 0.30524230417555626,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001628328969072937,
      "loss": 1.1185,
      "step": 2003
    },
    {
      "epoch": 0.30539469673879915,
      "grad_norm": 1.1875,
      "learning_rate": 0.0001627944918485017,
      "loss": 1.1392,
      "step": 2004
    },
    {
      "epoch": 0.30554708930204205,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00016275607149206843,
      "loss": 0.9534,
      "step": 2005
    },
    {
      "epoch": 0.30569948186528495,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001627176358473537,
      "loss": 0.8772,
      "step": 2006
    },
    {
      "epoch": 0.3058518744285279,
      "grad_norm": 0.8984375,
      "learning_rate": 0.000162679184923721,
      "loss": 0.9827,
      "step": 2007
    },
    {
      "epoch": 0.3060042669917708,
      "grad_norm": 0.84375,
      "learning_rate": 0.00016264071873053752,
      "loss": 1.0967,
      "step": 2008
    },
    {
      "epoch": 0.3061566595550137,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00016260223727717415,
      "loss": 0.8054,
      "step": 2009
    },
    {
      "epoch": 0.30630905211825665,
      "grad_norm": 1.0,
      "learning_rate": 0.00016256374057300557,
      "loss": 1.1867,
      "step": 2010
    },
    {
      "epoch": 0.30646144468149955,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00016252522862741006,
      "loss": 0.988,
      "step": 2011
    },
    {
      "epoch": 0.30661383724474245,
      "grad_norm": 1.140625,
      "learning_rate": 0.00016248670144976976,
      "loss": 1.0303,
      "step": 2012
    },
    {
      "epoch": 0.30676622980798535,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00016244815904947039,
      "loss": 1.0632,
      "step": 2013
    },
    {
      "epoch": 0.3069186223712283,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016240960143590146,
      "loss": 0.9314,
      "step": 2014
    },
    {
      "epoch": 0.3070710149344712,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00016237102861845618,
      "loss": 1.0255,
      "step": 2015
    },
    {
      "epoch": 0.3072234074977141,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00016233244060653138,
      "loss": 1.1368,
      "step": 2016
    },
    {
      "epoch": 0.30737580006095705,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001622938374095277,
      "loss": 0.7816,
      "step": 2017
    },
    {
      "epoch": 0.30752819262419995,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00016225521903684946,
      "loss": 0.9932,
      "step": 2018
    },
    {
      "epoch": 0.30768058518744285,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016221658549790457,
      "loss": 0.8592,
      "step": 2019
    },
    {
      "epoch": 0.30783297775068574,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001621779368021048,
      "loss": 0.9947,
      "step": 2020
    },
    {
      "epoch": 0.3079853703139287,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00016213927295886547,
      "loss": 0.9408,
      "step": 2021
    },
    {
      "epoch": 0.3081377628771716,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001621005939776057,
      "loss": 0.9678,
      "step": 2022
    },
    {
      "epoch": 0.3082901554404145,
      "grad_norm": 1.109375,
      "learning_rate": 0.00016206189986774815,
      "loss": 1.0459,
      "step": 2023
    },
    {
      "epoch": 0.30844254800365745,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00016202319063871935,
      "loss": 0.8615,
      "step": 2024
    },
    {
      "epoch": 0.30859494056690034,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016198446629994942,
      "loss": 0.9625,
      "step": 2025
    },
    {
      "epoch": 0.30874733313014324,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00016194572686087215,
      "loss": 1.0553,
      "step": 2026
    },
    {
      "epoch": 0.30889972569338614,
      "grad_norm": 0.70703125,
      "learning_rate": 0.000161906972330925,
      "loss": 0.9512,
      "step": 2027
    },
    {
      "epoch": 0.3090521182566291,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00016186820271954914,
      "loss": 0.8452,
      "step": 2028
    },
    {
      "epoch": 0.309204510819872,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00016182941803618946,
      "loss": 0.905,
      "step": 2029
    },
    {
      "epoch": 0.3093569033831149,
      "grad_norm": 1.125,
      "learning_rate": 0.0001617906182902944,
      "loss": 1.0977,
      "step": 2030
    },
    {
      "epoch": 0.30950929594635784,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00016175180349131617,
      "loss": 0.961,
      "step": 2031
    },
    {
      "epoch": 0.30966168850960074,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001617129736487106,
      "loss": 1.0412,
      "step": 2032
    },
    {
      "epoch": 0.30981408107284364,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00016167412877193718,
      "loss": 1.0149,
      "step": 2033
    },
    {
      "epoch": 0.30996647363608654,
      "grad_norm": 0.9375,
      "learning_rate": 0.00016163526887045913,
      "loss": 1.0476,
      "step": 2034
    },
    {
      "epoch": 0.3101188661993295,
      "grad_norm": 0.75,
      "learning_rate": 0.0001615963939537432,
      "loss": 0.7634,
      "step": 2035
    },
    {
      "epoch": 0.3102712587625724,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00016155750403125998,
      "loss": 1.0055,
      "step": 2036
    },
    {
      "epoch": 0.3104236513258153,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00016151859911248354,
      "loss": 1.0916,
      "step": 2037
    },
    {
      "epoch": 0.31057604388905824,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00016147967920689173,
      "loss": 0.8763,
      "step": 2038
    },
    {
      "epoch": 0.31072843645230114,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00016144074432396596,
      "loss": 1.013,
      "step": 2039
    },
    {
      "epoch": 0.31088082901554404,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00016140179447319132,
      "loss": 0.9104,
      "step": 2040
    },
    {
      "epoch": 0.31103322157878693,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001613628296640566,
      "loss": 0.9601,
      "step": 2041
    },
    {
      "epoch": 0.3111856141420299,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00016132384990605414,
      "loss": 0.9644,
      "step": 2042
    },
    {
      "epoch": 0.3113380067052728,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00016128485520868,
      "loss": 1.0568,
      "step": 2043
    },
    {
      "epoch": 0.3114903992685157,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00016124584558143386,
      "loss": 0.8978,
      "step": 2044
    },
    {
      "epoch": 0.31164279183175864,
      "grad_norm": 0.671875,
      "learning_rate": 0.00016120682103381898,
      "loss": 0.8126,
      "step": 2045
    },
    {
      "epoch": 0.31179518439500153,
      "grad_norm": 0.8125,
      "learning_rate": 0.00016116778157534233,
      "loss": 0.9017,
      "step": 2046
    },
    {
      "epoch": 0.31194757695824443,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00016112872721551447,
      "loss": 0.9492,
      "step": 2047
    },
    {
      "epoch": 0.31209996952148733,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00016108965796384964,
      "loss": 0.9074,
      "step": 2048
    },
    {
      "epoch": 0.3122523620847303,
      "grad_norm": 0.71875,
      "learning_rate": 0.00016105057382986558,
      "loss": 0.8914,
      "step": 2049
    },
    {
      "epoch": 0.3124047546479732,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00016101147482308387,
      "loss": 0.9926,
      "step": 2050
    },
    {
      "epoch": 0.3125571472112161,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001609723609530295,
      "loss": 1.0754,
      "step": 2051
    },
    {
      "epoch": 0.31270953977445903,
      "grad_norm": 0.859375,
      "learning_rate": 0.00016093323222923125,
      "loss": 0.8807,
      "step": 2052
    },
    {
      "epoch": 0.31286193233770193,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00016089408866122137,
      "loss": 0.9558,
      "step": 2053
    },
    {
      "epoch": 0.31301432490094483,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00016085493025853583,
      "loss": 0.8841,
      "step": 2054
    },
    {
      "epoch": 0.3131667174641877,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00016081575703071416,
      "loss": 1.0136,
      "step": 2055
    },
    {
      "epoch": 0.3133191100274307,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001607765689872995,
      "loss": 1.0448,
      "step": 2056
    },
    {
      "epoch": 0.3134715025906736,
      "grad_norm": 0.90625,
      "learning_rate": 0.00016073736613783868,
      "loss": 0.9715,
      "step": 2057
    },
    {
      "epoch": 0.3136238951539165,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00016069814849188204,
      "loss": 1.0998,
      "step": 2058
    },
    {
      "epoch": 0.31377628771715943,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00016065891605898357,
      "loss": 1.0848,
      "step": 2059
    },
    {
      "epoch": 0.31392868028040233,
      "grad_norm": 1.1171875,
      "learning_rate": 0.0001606196688487009,
      "loss": 0.875,
      "step": 2060
    },
    {
      "epoch": 0.3140810728436452,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00016058040687059511,
      "loss": 0.8688,
      "step": 2061
    },
    {
      "epoch": 0.3142334654068881,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00016054113013423108,
      "loss": 1.0288,
      "step": 2062
    },
    {
      "epoch": 0.3143858579701311,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00016050183864917716,
      "loss": 0.8908,
      "step": 2063
    },
    {
      "epoch": 0.314538250533374,
      "grad_norm": 1.1015625,
      "learning_rate": 0.0001604625324250053,
      "loss": 0.959,
      "step": 2064
    },
    {
      "epoch": 0.3146906430966169,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00016042321147129106,
      "loss": 0.9567,
      "step": 2065
    },
    {
      "epoch": 0.31484303565985977,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00016038387579761363,
      "loss": 1.1774,
      "step": 2066
    },
    {
      "epoch": 0.3149954282231027,
      "grad_norm": 0.875,
      "learning_rate": 0.0001603445254135557,
      "loss": 0.9437,
      "step": 2067
    },
    {
      "epoch": 0.3151478207863456,
      "grad_norm": 1.125,
      "learning_rate": 0.00016030516032870362,
      "loss": 0.9041,
      "step": 2068
    },
    {
      "epoch": 0.3153002133495885,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00016026578055264724,
      "loss": 1.0676,
      "step": 2069
    },
    {
      "epoch": 0.3154526059128315,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00016022638609498015,
      "loss": 0.9282,
      "step": 2070
    },
    {
      "epoch": 0.31560499847607437,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00016018697696529928,
      "loss": 0.8832,
      "step": 2071
    },
    {
      "epoch": 0.31575739103931727,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00016014755317320535,
      "loss": 0.9052,
      "step": 2072
    },
    {
      "epoch": 0.31590978360256017,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00016010811472830252,
      "loss": 1.0946,
      "step": 2073
    },
    {
      "epoch": 0.3160621761658031,
      "grad_norm": 1.15625,
      "learning_rate": 0.00016006866164019855,
      "loss": 1.0304,
      "step": 2074
    },
    {
      "epoch": 0.316214568729046,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00016002919391850483,
      "loss": 0.8853,
      "step": 2075
    },
    {
      "epoch": 0.3163669612922889,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00015998971157283625,
      "loss": 1.175,
      "step": 2076
    },
    {
      "epoch": 0.31651935385553187,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00015995021461281123,
      "loss": 0.9044,
      "step": 2077
    },
    {
      "epoch": 0.31667174641877477,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00015991070304805183,
      "loss": 0.8973,
      "step": 2078
    },
    {
      "epoch": 0.31682413898201767,
      "grad_norm": 1.2578125,
      "learning_rate": 0.00015987117688818364,
      "loss": 1.0271,
      "step": 2079
    },
    {
      "epoch": 0.31697653154526056,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00015983163614283582,
      "loss": 1.0796,
      "step": 2080
    },
    {
      "epoch": 0.3171289241085035,
      "grad_norm": 1.0234375,
      "learning_rate": 0.000159792080821641,
      "loss": 1.0021,
      "step": 2081
    },
    {
      "epoch": 0.3172813166717464,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00015975251093423552,
      "loss": 0.9941,
      "step": 2082
    },
    {
      "epoch": 0.3174337092349893,
      "grad_norm": 0.90625,
      "learning_rate": 0.00015971292649025908,
      "loss": 0.8408,
      "step": 2083
    },
    {
      "epoch": 0.31758610179823227,
      "grad_norm": 1.265625,
      "learning_rate": 0.00015967332749935507,
      "loss": 0.8805,
      "step": 2084
    },
    {
      "epoch": 0.31773849436147517,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00015963371397117036,
      "loss": 0.915,
      "step": 2085
    },
    {
      "epoch": 0.31789088692471806,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001595940859153554,
      "loss": 0.7712,
      "step": 2086
    },
    {
      "epoch": 0.31804327948796096,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001595544433415641,
      "loss": 0.8739,
      "step": 2087
    },
    {
      "epoch": 0.3181956720512039,
      "grad_norm": 0.98828125,
      "learning_rate": 0.000159514786259454,
      "loss": 1.0485,
      "step": 2088
    },
    {
      "epoch": 0.3183480646144468,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00015947511467868613,
      "loss": 1.1365,
      "step": 2089
    },
    {
      "epoch": 0.3185004571776897,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00015943542860892505,
      "loss": 1.0531,
      "step": 2090
    },
    {
      "epoch": 0.31865284974093266,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001593957280598389,
      "loss": 0.9169,
      "step": 2091
    },
    {
      "epoch": 0.31880524230417556,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015935601304109924,
      "loss": 0.7049,
      "step": 2092
    },
    {
      "epoch": 0.31895763486741846,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00015931628356238126,
      "loss": 1.1038,
      "step": 2093
    },
    {
      "epoch": 0.31911002743066136,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00015927653963336363,
      "loss": 1.0648,
      "step": 2094
    },
    {
      "epoch": 0.3192624199939043,
      "grad_norm": 0.875,
      "learning_rate": 0.0001592367812637285,
      "loss": 0.8967,
      "step": 2095
    },
    {
      "epoch": 0.3194148125571472,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00015919700846316162,
      "loss": 1.0075,
      "step": 2096
    },
    {
      "epoch": 0.3195672051203901,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00015915722124135227,
      "loss": 0.9284,
      "step": 2097
    },
    {
      "epoch": 0.31971959768363306,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00015911741960799306,
      "loss": 0.8391,
      "step": 2098
    },
    {
      "epoch": 0.31987199024687596,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00015907760357278033,
      "loss": 0.963,
      "step": 2099
    },
    {
      "epoch": 0.32002438281011886,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00015903777314541382,
      "loss": 0.8015,
      "step": 2100
    },
    {
      "epoch": 0.32017677537336175,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00015899792833559679,
      "loss": 1.1391,
      "step": 2101
    },
    {
      "epoch": 0.3203291679366047,
      "grad_norm": 0.984375,
      "learning_rate": 0.000158958069153036,
      "loss": 1.1597,
      "step": 2102
    },
    {
      "epoch": 0.3204815604998476,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00015891819560744176,
      "loss": 0.7975,
      "step": 2103
    },
    {
      "epoch": 0.3206339530630905,
      "grad_norm": 1.1328125,
      "learning_rate": 0.0001588783077085278,
      "loss": 1.1437,
      "step": 2104
    },
    {
      "epoch": 0.32078634562633346,
      "grad_norm": 0.99609375,
      "learning_rate": 0.0001588384054660114,
      "loss": 0.9916,
      "step": 2105
    },
    {
      "epoch": 0.32093873818957636,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00015879848888961332,
      "loss": 0.7082,
      "step": 2106
    },
    {
      "epoch": 0.32109113075281925,
      "grad_norm": 1.171875,
      "learning_rate": 0.00015875855798905776,
      "loss": 1.0411,
      "step": 2107
    },
    {
      "epoch": 0.32124352331606215,
      "grad_norm": 1.125,
      "learning_rate": 0.00015871861277407255,
      "loss": 1.2797,
      "step": 2108
    },
    {
      "epoch": 0.3213959158793051,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001586786532543889,
      "loss": 0.8151,
      "step": 2109
    },
    {
      "epoch": 0.321548308442548,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015863867943974148,
      "loss": 0.9647,
      "step": 2110
    },
    {
      "epoch": 0.3217007010057909,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00015859869133986852,
      "loss": 0.8596,
      "step": 2111
    },
    {
      "epoch": 0.32185309356903385,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00015855868896451168,
      "loss": 1.0655,
      "step": 2112
    },
    {
      "epoch": 0.32200548613227675,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001585186723234161,
      "loss": 1.2443,
      "step": 2113
    },
    {
      "epoch": 0.32215787869551965,
      "grad_norm": 1.34375,
      "learning_rate": 0.00015847864142633044,
      "loss": 0.9531,
      "step": 2114
    },
    {
      "epoch": 0.32231027125876255,
      "grad_norm": 1.265625,
      "learning_rate": 0.0001584385962830068,
      "loss": 1.1304,
      "step": 2115
    },
    {
      "epoch": 0.3224626638220055,
      "grad_norm": 1.2578125,
      "learning_rate": 0.00015839853690320074,
      "loss": 1.1347,
      "step": 2116
    },
    {
      "epoch": 0.3226150563852484,
      "grad_norm": 1.1640625,
      "learning_rate": 0.0001583584632966713,
      "loss": 1.0876,
      "step": 2117
    },
    {
      "epoch": 0.3227674489484913,
      "grad_norm": 1.078125,
      "learning_rate": 0.00015831837547318101,
      "loss": 0.8381,
      "step": 2118
    },
    {
      "epoch": 0.32291984151173425,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00015827827344249578,
      "loss": 0.8716,
      "step": 2119
    },
    {
      "epoch": 0.32307223407497715,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00015823815721438512,
      "loss": 0.9926,
      "step": 2120
    },
    {
      "epoch": 0.32322462663822005,
      "grad_norm": 0.890625,
      "learning_rate": 0.00015819802679862188,
      "loss": 0.9535,
      "step": 2121
    },
    {
      "epoch": 0.32337701920146295,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001581578822049824,
      "loss": 1.1135,
      "step": 2122
    },
    {
      "epoch": 0.3235294117647059,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001581177234432465,
      "loss": 1.1836,
      "step": 2123
    },
    {
      "epoch": 0.3236818043279488,
      "grad_norm": 1.2890625,
      "learning_rate": 0.00015807755052319738,
      "loss": 1.0691,
      "step": 2124
    },
    {
      "epoch": 0.3238341968911917,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00015803736345462182,
      "loss": 0.8218,
      "step": 2125
    },
    {
      "epoch": 0.32398658945443465,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00015799716224730993,
      "loss": 1.0378,
      "step": 2126
    },
    {
      "epoch": 0.32413898201767755,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001579569469110553,
      "loss": 1.0978,
      "step": 2127
    },
    {
      "epoch": 0.32429137458092044,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00015791671745565497,
      "loss": 1.0454,
      "step": 2128
    },
    {
      "epoch": 0.32444376714416334,
      "grad_norm": 1.140625,
      "learning_rate": 0.0001578764738909094,
      "loss": 0.8685,
      "step": 2129
    },
    {
      "epoch": 0.3245961597074063,
      "grad_norm": 1.15625,
      "learning_rate": 0.00015783621622662247,
      "loss": 0.8576,
      "step": 2130
    },
    {
      "epoch": 0.3247485522706492,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00015779594447260165,
      "loss": 1.0781,
      "step": 2131
    },
    {
      "epoch": 0.3249009448338921,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00015775565863865757,
      "loss": 0.9074,
      "step": 2132
    },
    {
      "epoch": 0.32505333739713504,
      "grad_norm": 0.90625,
      "learning_rate": 0.00015771535873460453,
      "loss": 0.914,
      "step": 2133
    },
    {
      "epoch": 0.32520572996037794,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00015767504477026014,
      "loss": 1.1234,
      "step": 2134
    },
    {
      "epoch": 0.32535812252362084,
      "grad_norm": 1.109375,
      "learning_rate": 0.00015763471675544547,
      "loss": 0.9949,
      "step": 2135
    },
    {
      "epoch": 0.32551051508686374,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00015759437469998498,
      "loss": 1.0163,
      "step": 2136
    },
    {
      "epoch": 0.3256629076501067,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001575540186137066,
      "loss": 0.9849,
      "step": 2137
    },
    {
      "epoch": 0.3258153002133496,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00015751364850644166,
      "loss": 0.972,
      "step": 2138
    },
    {
      "epoch": 0.3259676927765925,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00015747326438802486,
      "loss": 0.9936,
      "step": 2139
    },
    {
      "epoch": 0.32612008533983544,
      "grad_norm": 1.171875,
      "learning_rate": 0.00015743286626829437,
      "loss": 1.0429,
      "step": 2140
    },
    {
      "epoch": 0.32627247790307834,
      "grad_norm": 0.984375,
      "learning_rate": 0.00015739245415709175,
      "loss": 1.118,
      "step": 2141
    },
    {
      "epoch": 0.32642487046632124,
      "grad_norm": 0.90625,
      "learning_rate": 0.00015735202806426202,
      "loss": 0.9165,
      "step": 2142
    },
    {
      "epoch": 0.32657726302956414,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00015731158799965348,
      "loss": 1.158,
      "step": 2143
    },
    {
      "epoch": 0.3267296555928071,
      "grad_norm": 1.0625,
      "learning_rate": 0.00015727113397311798,
      "loss": 1.0067,
      "step": 2144
    },
    {
      "epoch": 0.32688204815605,
      "grad_norm": 1.0,
      "learning_rate": 0.00015723066599451064,
      "loss": 0.9667,
      "step": 2145
    },
    {
      "epoch": 0.3270344407192929,
      "grad_norm": 1.3359375,
      "learning_rate": 0.00015719018407369008,
      "loss": 1.0446,
      "step": 2146
    },
    {
      "epoch": 0.32718683328253584,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00015714968822051826,
      "loss": 0.8185,
      "step": 2147
    },
    {
      "epoch": 0.32733922584577874,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00015710917844486056,
      "loss": 1.237,
      "step": 2148
    },
    {
      "epoch": 0.32749161840902163,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00015706865475658582,
      "loss": 0.8021,
      "step": 2149
    },
    {
      "epoch": 0.32764401097226453,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00015702811716556604,
      "loss": 1.0306,
      "step": 2150
    },
    {
      "epoch": 0.3277964035355075,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00015698756568167685,
      "loss": 1.0783,
      "step": 2151
    },
    {
      "epoch": 0.3279487960987504,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00015694700031479718,
      "loss": 0.9718,
      "step": 2152
    },
    {
      "epoch": 0.3281011886619933,
      "grad_norm": 1.2578125,
      "learning_rate": 0.0001569064210748093,
      "loss": 1.1749,
      "step": 2153
    },
    {
      "epoch": 0.32825358122523624,
      "grad_norm": 0.84375,
      "learning_rate": 0.00015686582797159893,
      "loss": 0.8589,
      "step": 2154
    },
    {
      "epoch": 0.32840597378847913,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001568252210150551,
      "loss": 0.9552,
      "step": 2155
    },
    {
      "epoch": 0.32855836635172203,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00015678460021507034,
      "loss": 1.0692,
      "step": 2156
    },
    {
      "epoch": 0.32871075891496493,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00015674396558154033,
      "loss": 1.0444,
      "step": 2157
    },
    {
      "epoch": 0.3288631514782079,
      "grad_norm": 0.953125,
      "learning_rate": 0.00015670331712436432,
      "loss": 1.0214,
      "step": 2158
    },
    {
      "epoch": 0.3290155440414508,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015666265485344484,
      "loss": 1.0308,
      "step": 2159
    },
    {
      "epoch": 0.3291679366046937,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00015662197877868784,
      "loss": 0.8493,
      "step": 2160
    },
    {
      "epoch": 0.32932032916793663,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00015658128891000254,
      "loss": 0.8779,
      "step": 2161
    },
    {
      "epoch": 0.32947272173117953,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00015654058525730165,
      "loss": 0.9064,
      "step": 2162
    },
    {
      "epoch": 0.3296251142944224,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001564998678305011,
      "loss": 1.0485,
      "step": 2163
    },
    {
      "epoch": 0.3297775068576653,
      "grad_norm": 1.3046875,
      "learning_rate": 0.00015645913663952024,
      "loss": 0.8863,
      "step": 2164
    },
    {
      "epoch": 0.3299298994209083,
      "grad_norm": 0.984375,
      "learning_rate": 0.00015641839169428182,
      "loss": 0.9996,
      "step": 2165
    },
    {
      "epoch": 0.3300822919841512,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00015637763300471188,
      "loss": 0.9507,
      "step": 2166
    },
    {
      "epoch": 0.3302346845473941,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001563368605807398,
      "loss": 1.0376,
      "step": 2167
    },
    {
      "epoch": 0.33038707711063703,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00015629607443229835,
      "loss": 0.9564,
      "step": 2168
    },
    {
      "epoch": 0.3305394696738799,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00015625527456932363,
      "loss": 0.928,
      "step": 2169
    },
    {
      "epoch": 0.3306918622371228,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00015621446100175503,
      "loss": 1.011,
      "step": 2170
    },
    {
      "epoch": 0.3308442548003657,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00015617363373953536,
      "loss": 0.9395,
      "step": 2171
    },
    {
      "epoch": 0.3309966473636087,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00015613279279261074,
      "loss": 1.0711,
      "step": 2172
    },
    {
      "epoch": 0.3311490399268516,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00015609193817093058,
      "loss": 1.016,
      "step": 2173
    },
    {
      "epoch": 0.33130143249009447,
      "grad_norm": 0.84375,
      "learning_rate": 0.00015605106988444764,
      "loss": 0.9065,
      "step": 2174
    },
    {
      "epoch": 0.33145382505333737,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001560101879431181,
      "loss": 0.9812,
      "step": 2175
    },
    {
      "epoch": 0.3316062176165803,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001559692923569013,
      "loss": 1.0566,
      "step": 2176
    },
    {
      "epoch": 0.3317586101798232,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00015592838313576004,
      "loss": 0.9399,
      "step": 2177
    },
    {
      "epoch": 0.3319110027430661,
      "grad_norm": 1.0,
      "learning_rate": 0.00015588746028966037,
      "loss": 1.1038,
      "step": 2178
    },
    {
      "epoch": 0.3320633953063091,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001558465238285717,
      "loss": 0.9825,
      "step": 2179
    },
    {
      "epoch": 0.33221578786955197,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00015580557376246675,
      "loss": 0.9645,
      "step": 2180
    },
    {
      "epoch": 0.33236818043279487,
      "grad_norm": 0.953125,
      "learning_rate": 0.00015576461010132154,
      "loss": 0.92,
      "step": 2181
    },
    {
      "epoch": 0.33252057299603777,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00015572363285511534,
      "loss": 0.9519,
      "step": 2182
    },
    {
      "epoch": 0.3326729655592807,
      "grad_norm": 1.140625,
      "learning_rate": 0.00015568264203383094,
      "loss": 1.0871,
      "step": 2183
    },
    {
      "epoch": 0.3328253581225236,
      "grad_norm": 0.734375,
      "learning_rate": 0.00015564163764745416,
      "loss": 0.8753,
      "step": 2184
    },
    {
      "epoch": 0.3329777506857665,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00015560061970597432,
      "loss": 0.8627,
      "step": 2185
    },
    {
      "epoch": 0.33313014324900947,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00015555958821938397,
      "loss": 0.8498,
      "step": 2186
    },
    {
      "epoch": 0.33328253581225237,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00015551854319767897,
      "loss": 1.1254,
      "step": 2187
    },
    {
      "epoch": 0.33343492837549527,
      "grad_norm": 0.890625,
      "learning_rate": 0.00015547748465085848,
      "loss": 0.9991,
      "step": 2188
    },
    {
      "epoch": 0.33358732093873816,
      "grad_norm": 1.015625,
      "learning_rate": 0.00015543641258892492,
      "loss": 1.0083,
      "step": 2189
    },
    {
      "epoch": 0.3337397135019811,
      "grad_norm": 1.265625,
      "learning_rate": 0.0001553953270218841,
      "loss": 1.0025,
      "step": 2190
    },
    {
      "epoch": 0.333892106065224,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00015535422795974498,
      "loss": 0.9679,
      "step": 2191
    },
    {
      "epoch": 0.3340444986284669,
      "grad_norm": 1.21875,
      "learning_rate": 0.00015531311541251995,
      "loss": 0.9936,
      "step": 2192
    },
    {
      "epoch": 0.33419689119170987,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00015527198939022457,
      "loss": 0.8933,
      "step": 2193
    },
    {
      "epoch": 0.33434928375495276,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001552308499028778,
      "loss": 1.1582,
      "step": 2194
    },
    {
      "epoch": 0.33450167631819566,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001551896969605017,
      "loss": 1.016,
      "step": 2195
    },
    {
      "epoch": 0.33465406888143856,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001551485305731218,
      "loss": 0.8317,
      "step": 2196
    },
    {
      "epoch": 0.3348064614446815,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00015510735075076681,
      "loss": 1.0317,
      "step": 2197
    },
    {
      "epoch": 0.3349588540079244,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001550661575034687,
      "loss": 0.8957,
      "step": 2198
    },
    {
      "epoch": 0.3351112465711673,
      "grad_norm": 1.0625,
      "learning_rate": 0.00015502495084126277,
      "loss": 0.957,
      "step": 2199
    },
    {
      "epoch": 0.33526363913441026,
      "grad_norm": 0.6875,
      "learning_rate": 0.00015498373077418753,
      "loss": 0.9671,
      "step": 2200
    },
    {
      "epoch": 0.33541603169765316,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001549424973122848,
      "loss": 1.068,
      "step": 2201
    },
    {
      "epoch": 0.33556842426089606,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00015490125046559964,
      "loss": 0.983,
      "step": 2202
    },
    {
      "epoch": 0.33572081682413896,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00015485999024418039,
      "loss": 1.0785,
      "step": 2203
    },
    {
      "epoch": 0.3358732093873819,
      "grad_norm": 0.984375,
      "learning_rate": 0.00015481871665807856,
      "loss": 1.1641,
      "step": 2204
    },
    {
      "epoch": 0.3360256019506248,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001547774297173491,
      "loss": 1.1834,
      "step": 2205
    },
    {
      "epoch": 0.3361779945138677,
      "grad_norm": 1.109375,
      "learning_rate": 0.00015473612943205,
      "loss": 0.9978,
      "step": 2206
    },
    {
      "epoch": 0.33633038707711066,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00015469481581224272,
      "loss": 0.8564,
      "step": 2207
    },
    {
      "epoch": 0.33648277964035356,
      "grad_norm": 0.890625,
      "learning_rate": 0.00015465348886799173,
      "loss": 0.894,
      "step": 2208
    },
    {
      "epoch": 0.33663517220359646,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00015461214860936493,
      "loss": 0.8472,
      "step": 2209
    },
    {
      "epoch": 0.33678756476683935,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001545707950464334,
      "loss": 1.111,
      "step": 2210
    },
    {
      "epoch": 0.3369399573300823,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00015452942818927143,
      "loss": 0.9173,
      "step": 2211
    },
    {
      "epoch": 0.3370923498933252,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00015448804804795663,
      "loss": 0.9752,
      "step": 2212
    },
    {
      "epoch": 0.3372447424565681,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00015444665463256976,
      "loss": 1.0086,
      "step": 2213
    },
    {
      "epoch": 0.33739713501981106,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00015440524795319485,
      "loss": 0.8777,
      "step": 2214
    },
    {
      "epoch": 0.33754952758305395,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00015436382801991923,
      "loss": 1.1377,
      "step": 2215
    },
    {
      "epoch": 0.33770192014629685,
      "grad_norm": 0.97265625,
      "learning_rate": 0.0001543223948428333,
      "loss": 1.0483,
      "step": 2216
    },
    {
      "epoch": 0.33785431270953975,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00015428094843203082,
      "loss": 0.9766,
      "step": 2217
    },
    {
      "epoch": 0.3380067052727827,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00015423948879760872,
      "loss": 0.8302,
      "step": 2218
    },
    {
      "epoch": 0.3381590978360256,
      "grad_norm": 1.625,
      "learning_rate": 0.00015419801594966718,
      "loss": 0.9906,
      "step": 2219
    },
    {
      "epoch": 0.3383114903992685,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00015415652989830955,
      "loss": 0.9379,
      "step": 2220
    },
    {
      "epoch": 0.33846388296251145,
      "grad_norm": 1.140625,
      "learning_rate": 0.00015411503065364252,
      "loss": 1.1853,
      "step": 2221
    },
    {
      "epoch": 0.33861627552575435,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00015407351822577578,
      "loss": 1.1835,
      "step": 2222
    },
    {
      "epoch": 0.33876866808899725,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00015403199262482243,
      "loss": 0.833,
      "step": 2223
    },
    {
      "epoch": 0.33892106065224015,
      "grad_norm": 0.90625,
      "learning_rate": 0.0001539904538608987,
      "loss": 1.0473,
      "step": 2224
    },
    {
      "epoch": 0.3390734532154831,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00015394890194412396,
      "loss": 1.1203,
      "step": 2225
    },
    {
      "epoch": 0.339225845778726,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00015390733688462092,
      "loss": 1.0145,
      "step": 2226
    },
    {
      "epoch": 0.3393782383419689,
      "grad_norm": 1.3046875,
      "learning_rate": 0.00015386575869251543,
      "loss": 1.1815,
      "step": 2227
    },
    {
      "epoch": 0.33953063090521185,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001538241673779365,
      "loss": 0.9595,
      "step": 2228
    },
    {
      "epoch": 0.33968302346845475,
      "grad_norm": 1.109375,
      "learning_rate": 0.0001537825629510164,
      "loss": 0.9956,
      "step": 2229
    },
    {
      "epoch": 0.33983541603169765,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00015374094542189054,
      "loss": 0.9343,
      "step": 2230
    },
    {
      "epoch": 0.33998780859494054,
      "grad_norm": 0.828125,
      "learning_rate": 0.00015369931480069761,
      "loss": 1.0098,
      "step": 2231
    },
    {
      "epoch": 0.3401402011581835,
      "grad_norm": 1.359375,
      "learning_rate": 0.00015365767109757936,
      "loss": 0.8496,
      "step": 2232
    },
    {
      "epoch": 0.3402925937214264,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00015361601432268082,
      "loss": 0.9637,
      "step": 2233
    },
    {
      "epoch": 0.3404449862846693,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001535743444861502,
      "loss": 1.0433,
      "step": 2234
    },
    {
      "epoch": 0.34059737884791225,
      "grad_norm": 1.0625,
      "learning_rate": 0.00015353266159813883,
      "loss": 1.0151,
      "step": 2235
    },
    {
      "epoch": 0.34074977141115514,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00015349096566880127,
      "loss": 0.8536,
      "step": 2236
    },
    {
      "epoch": 0.34090216397439804,
      "grad_norm": 0.984375,
      "learning_rate": 0.00015344925670829527,
      "loss": 0.9937,
      "step": 2237
    },
    {
      "epoch": 0.34105455653764094,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00015340753472678172,
      "loss": 1.085,
      "step": 2238
    },
    {
      "epoch": 0.3412069491008839,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001533657997344247,
      "loss": 1.152,
      "step": 2239
    },
    {
      "epoch": 0.3413593416641268,
      "grad_norm": 1.2890625,
      "learning_rate": 0.00015332405174139146,
      "loss": 0.9264,
      "step": 2240
    },
    {
      "epoch": 0.3415117342273697,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00015328229075785242,
      "loss": 0.8176,
      "step": 2241
    },
    {
      "epoch": 0.34166412679061264,
      "grad_norm": 1.125,
      "learning_rate": 0.00015324051679398108,
      "loss": 1.1353,
      "step": 2242
    },
    {
      "epoch": 0.34181651935385554,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001531987298599543,
      "loss": 0.9197,
      "step": 2243
    },
    {
      "epoch": 0.34196891191709844,
      "grad_norm": 1.34375,
      "learning_rate": 0.00015315692996595187,
      "loss": 1.148,
      "step": 2244
    },
    {
      "epoch": 0.34212130448034134,
      "grad_norm": 0.9140625,
      "learning_rate": 0.0001531151171221569,
      "loss": 1.1638,
      "step": 2245
    },
    {
      "epoch": 0.3422736970435843,
      "grad_norm": 1.1953125,
      "learning_rate": 0.0001530732913387556,
      "loss": 1.1054,
      "step": 2246
    },
    {
      "epoch": 0.3424260896068272,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00015303145262593728,
      "loss": 1.0542,
      "step": 2247
    },
    {
      "epoch": 0.3425784821700701,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00015298960099389454,
      "loss": 1.0361,
      "step": 2248
    },
    {
      "epoch": 0.34273087473331304,
      "grad_norm": 1.140625,
      "learning_rate": 0.000152947736452823,
      "loss": 0.9025,
      "step": 2249
    },
    {
      "epoch": 0.34288326729655594,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001529058590129214,
      "loss": 1.0063,
      "step": 2250
    },
    {
      "epoch": 0.34303565985979884,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00015286396868439183,
      "loss": 0.9637,
      "step": 2251
    },
    {
      "epoch": 0.34318805242304173,
      "grad_norm": 0.96875,
      "learning_rate": 0.00015282206547743922,
      "loss": 1.052,
      "step": 2252
    },
    {
      "epoch": 0.3433404449862847,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001527801494022719,
      "loss": 0.9796,
      "step": 2253
    },
    {
      "epoch": 0.3434928375495276,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00015273822046910117,
      "loss": 0.8304,
      "step": 2254
    },
    {
      "epoch": 0.3436452301127705,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001526962786881416,
      "loss": 0.9468,
      "step": 2255
    },
    {
      "epoch": 0.34379762267601344,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00015265432406961067,
      "loss": 0.8994,
      "step": 2256
    },
    {
      "epoch": 0.34395001523925633,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00015261235662372927,
      "loss": 0.9554,
      "step": 2257
    },
    {
      "epoch": 0.34410240780249923,
      "grad_norm": 1.1484375,
      "learning_rate": 0.0001525703763607212,
      "loss": 0.9817,
      "step": 2258
    },
    {
      "epoch": 0.34425480036574213,
      "grad_norm": 1.109375,
      "learning_rate": 0.00015252838329081345,
      "loss": 0.9438,
      "step": 2259
    },
    {
      "epoch": 0.3444071929289851,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00015248637742423622,
      "loss": 1.0409,
      "step": 2260
    },
    {
      "epoch": 0.344559585492228,
      "grad_norm": 0.828125,
      "learning_rate": 0.00015244435877122262,
      "loss": 0.777,
      "step": 2261
    },
    {
      "epoch": 0.3447119780554709,
      "grad_norm": 0.9375,
      "learning_rate": 0.00015240232734200907,
      "loss": 1.0022,
      "step": 2262
    },
    {
      "epoch": 0.34486437061871383,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00015236028314683506,
      "loss": 1.0103,
      "step": 2263
    },
    {
      "epoch": 0.34501676318195673,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00015231822619594308,
      "loss": 1.0418,
      "step": 2264
    },
    {
      "epoch": 0.34516915574519963,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00015227615649957888,
      "loss": 1.0695,
      "step": 2265
    },
    {
      "epoch": 0.3453215483084425,
      "grad_norm": 0.73828125,
      "learning_rate": 0.0001522340740679912,
      "loss": 1.1287,
      "step": 2266
    },
    {
      "epoch": 0.3454739408716855,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00015219197891143193,
      "loss": 0.9707,
      "step": 2267
    },
    {
      "epoch": 0.3456263334349284,
      "grad_norm": 0.73046875,
      "learning_rate": 0.0001521498710401561,
      "loss": 0.8827,
      "step": 2268
    },
    {
      "epoch": 0.3457787259981713,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001521077504644217,
      "loss": 1.0704,
      "step": 2269
    },
    {
      "epoch": 0.34593111856141423,
      "grad_norm": 1.03125,
      "learning_rate": 0.00015206561719449,
      "loss": 0.9346,
      "step": 2270
    },
    {
      "epoch": 0.34608351112465713,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001520234712406252,
      "loss": 1.0257,
      "step": 2271
    },
    {
      "epoch": 0.3462359036879,
      "grad_norm": 0.953125,
      "learning_rate": 0.00015198131261309473,
      "loss": 0.9917,
      "step": 2272
    },
    {
      "epoch": 0.3463882962511429,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00015193914132216897,
      "loss": 0.8207,
      "step": 2273
    },
    {
      "epoch": 0.3465406888143859,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00015189695737812152,
      "loss": 1.042,
      "step": 2274
    },
    {
      "epoch": 0.3466930813776288,
      "grad_norm": 0.765625,
      "learning_rate": 0.00015185476079122897,
      "loss": 0.9764,
      "step": 2275
    },
    {
      "epoch": 0.3468454739408717,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015181255157177095,
      "loss": 0.9183,
      "step": 2276
    },
    {
      "epoch": 0.34699786650411457,
      "grad_norm": 1.1875,
      "learning_rate": 0.00015177032973003033,
      "loss": 1.0437,
      "step": 2277
    },
    {
      "epoch": 0.3471502590673575,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001517280952762929,
      "loss": 1.1609,
      "step": 2278
    },
    {
      "epoch": 0.3473026516306004,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00015168584822084763,
      "loss": 0.7888,
      "step": 2279
    },
    {
      "epoch": 0.3474550441938433,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00015164358857398645,
      "loss": 0.9627,
      "step": 2280
    },
    {
      "epoch": 0.3476074367570863,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00015160131634600442,
      "loss": 1.0505,
      "step": 2281
    },
    {
      "epoch": 0.3477598293203292,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00015155903154719975,
      "loss": 0.943,
      "step": 2282
    },
    {
      "epoch": 0.34791222188357207,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001515167341878735,
      "loss": 0.9968,
      "step": 2283
    },
    {
      "epoch": 0.34806461444681497,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00015147442427833002,
      "loss": 0.9911,
      "step": 2284
    },
    {
      "epoch": 0.3482170070100579,
      "grad_norm": 1.109375,
      "learning_rate": 0.00015143210182887656,
      "loss": 0.9441,
      "step": 2285
    },
    {
      "epoch": 0.3483693995733008,
      "grad_norm": 1.046875,
      "learning_rate": 0.00015138976684982348,
      "loss": 1.0931,
      "step": 2286
    },
    {
      "epoch": 0.3485217921365437,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001513474193514842,
      "loss": 1.1364,
      "step": 2287
    },
    {
      "epoch": 0.34867418469978667,
      "grad_norm": 0.953125,
      "learning_rate": 0.00015130505934417518,
      "loss": 1.0299,
      "step": 2288
    },
    {
      "epoch": 0.34882657726302957,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00015126268683821593,
      "loss": 1.0118,
      "step": 2289
    },
    {
      "epoch": 0.34897896982627247,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00015122030184392898,
      "loss": 0.8238,
      "step": 2290
    },
    {
      "epoch": 0.34913136238951537,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00015117790437163997,
      "loss": 0.922,
      "step": 2291
    },
    {
      "epoch": 0.3492837549527583,
      "grad_norm": 1.0,
      "learning_rate": 0.00015113549443167748,
      "loss": 1.0041,
      "step": 2292
    },
    {
      "epoch": 0.3494361475160012,
      "grad_norm": 0.7734375,
      "learning_rate": 0.0001510930720343732,
      "loss": 0.8364,
      "step": 2293
    },
    {
      "epoch": 0.3495885400792441,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00015105063719006182,
      "loss": 1.0569,
      "step": 2294
    },
    {
      "epoch": 0.34974093264248707,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001510081899090811,
      "loss": 1.0083,
      "step": 2295
    },
    {
      "epoch": 0.34989332520572997,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00015096573020177188,
      "loss": 1.0572,
      "step": 2296
    },
    {
      "epoch": 0.35004571776897286,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00015092325807847782,
      "loss": 0.8305,
      "step": 2297
    },
    {
      "epoch": 0.35019811033221576,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001508807735495458,
      "loss": 1.0375,
      "step": 2298
    },
    {
      "epoch": 0.3503505028954587,
      "grad_norm": 1.1328125,
      "learning_rate": 0.0001508382766253257,
      "loss": 1.0209,
      "step": 2299
    },
    {
      "epoch": 0.3505028954587016,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00015079576731617033,
      "loss": 0.8294,
      "step": 2300
    },
    {
      "epoch": 0.3506552880219445,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00015075324563243556,
      "loss": 1.1599,
      "step": 2301
    },
    {
      "epoch": 0.35080768058518746,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00015071071158448036,
      "loss": 1.0029,
      "step": 2302
    },
    {
      "epoch": 0.35096007314843036,
      "grad_norm": 0.921875,
      "learning_rate": 0.00015066816518266655,
      "loss": 0.9073,
      "step": 2303
    },
    {
      "epoch": 0.35111246571167326,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00015062560643735909,
      "loss": 1.0147,
      "step": 2304
    },
    {
      "epoch": 0.35126485827491616,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001505830353589259,
      "loss": 0.8946,
      "step": 2305
    },
    {
      "epoch": 0.3514172508381591,
      "grad_norm": 1.2109375,
      "learning_rate": 0.0001505404519577379,
      "loss": 0.9264,
      "step": 2306
    },
    {
      "epoch": 0.351569643401402,
      "grad_norm": 1.046875,
      "learning_rate": 0.000150497856244169,
      "loss": 0.9504,
      "step": 2307
    },
    {
      "epoch": 0.3517220359646449,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00015045524822859615,
      "loss": 1.2397,
      "step": 2308
    },
    {
      "epoch": 0.35187442852788786,
      "grad_norm": 1.0,
      "learning_rate": 0.00015041262792139928,
      "loss": 0.9998,
      "step": 2309
    },
    {
      "epoch": 0.35202682109113076,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001503699953329613,
      "loss": 0.8013,
      "step": 2310
    },
    {
      "epoch": 0.35217921365437366,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001503273504736681,
      "loss": 0.984,
      "step": 2311
    },
    {
      "epoch": 0.35233160621761656,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001502846933539086,
      "loss": 0.9662,
      "step": 2312
    },
    {
      "epoch": 0.3524839987808595,
      "grad_norm": 1.09375,
      "learning_rate": 0.00015024202398407477,
      "loss": 0.9669,
      "step": 2313
    },
    {
      "epoch": 0.3526363913441024,
      "grad_norm": 1.3984375,
      "learning_rate": 0.00015019934237456132,
      "loss": 0.9737,
      "step": 2314
    },
    {
      "epoch": 0.3527887839073453,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00015015664853576622,
      "loss": 0.9463,
      "step": 2315
    },
    {
      "epoch": 0.35294117647058826,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00015011394247809029,
      "loss": 0.9389,
      "step": 2316
    },
    {
      "epoch": 0.35309356903383116,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00015007122421193729,
      "loss": 1.0504,
      "step": 2317
    },
    {
      "epoch": 0.35324596159707405,
      "grad_norm": 1.125,
      "learning_rate": 0.00015002849374771401,
      "loss": 1.081,
      "step": 2318
    },
    {
      "epoch": 0.35339835416031695,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00014998575109583027,
      "loss": 0.9223,
      "step": 2319
    },
    {
      "epoch": 0.3535507467235599,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00014994299626669877,
      "loss": 1.043,
      "step": 2320
    },
    {
      "epoch": 0.3537031392868028,
      "grad_norm": 1.09375,
      "learning_rate": 0.00014990022927073517,
      "loss": 0.9279,
      "step": 2321
    },
    {
      "epoch": 0.3538555318500457,
      "grad_norm": 1.21875,
      "learning_rate": 0.00014985745011835815,
      "loss": 1.1361,
      "step": 2322
    },
    {
      "epoch": 0.35400792441328865,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001498146588199893,
      "loss": 0.9451,
      "step": 2323
    },
    {
      "epoch": 0.35416031697653155,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00014977185538605323,
      "loss": 0.853,
      "step": 2324
    },
    {
      "epoch": 0.35431270953977445,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00014972903982697744,
      "loss": 0.9508,
      "step": 2325
    },
    {
      "epoch": 0.35446510210301735,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00014968621215319242,
      "loss": 1.1311,
      "step": 2326
    },
    {
      "epoch": 0.3546174946662603,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00014964337237513166,
      "loss": 1.0668,
      "step": 2327
    },
    {
      "epoch": 0.3547698872295032,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00014960052050323145,
      "loss": 0.9286,
      "step": 2328
    },
    {
      "epoch": 0.3549222797927461,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001495576565479312,
      "loss": 0.9911,
      "step": 2329
    },
    {
      "epoch": 0.35507467235598905,
      "grad_norm": 0.96484375,
      "learning_rate": 0.0001495147805196732,
      "loss": 0.9659,
      "step": 2330
    },
    {
      "epoch": 0.35522706491923195,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001494718924289026,
      "loss": 0.816,
      "step": 2331
    },
    {
      "epoch": 0.35537945748247485,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00014942899228606758,
      "loss": 0.9509,
      "step": 2332
    },
    {
      "epoch": 0.35553185004571775,
      "grad_norm": 0.90625,
      "learning_rate": 0.00014938608010161926,
      "loss": 1.0248,
      "step": 2333
    },
    {
      "epoch": 0.3556842426089607,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00014934315588601161,
      "loss": 0.9894,
      "step": 2334
    },
    {
      "epoch": 0.3558366351722036,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00014930021964970164,
      "loss": 0.9037,
      "step": 2335
    },
    {
      "epoch": 0.3559890277354465,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001492572714031492,
      "loss": 0.957,
      "step": 2336
    },
    {
      "epoch": 0.35614142029868945,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00014921431115681713,
      "loss": 0.8711,
      "step": 2337
    },
    {
      "epoch": 0.35629381286193235,
      "grad_norm": 1.140625,
      "learning_rate": 0.00014917133892117116,
      "loss": 1.0854,
      "step": 2338
    },
    {
      "epoch": 0.35644620542517524,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00014912835470667988,
      "loss": 0.8225,
      "step": 2339
    },
    {
      "epoch": 0.35659859798841814,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00014908535852381497,
      "loss": 0.9383,
      "step": 2340
    },
    {
      "epoch": 0.3567509905516611,
      "grad_norm": 0.828125,
      "learning_rate": 0.00014904235038305083,
      "loss": 1.0533,
      "step": 2341
    },
    {
      "epoch": 0.356903383114904,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001489993302948649,
      "loss": 1.0315,
      "step": 2342
    },
    {
      "epoch": 0.3570557756781469,
      "grad_norm": 0.859375,
      "learning_rate": 0.00014895629826973747,
      "loss": 0.8932,
      "step": 2343
    },
    {
      "epoch": 0.35720816824138985,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00014891325431815183,
      "loss": 0.87,
      "step": 2344
    },
    {
      "epoch": 0.35736056080463274,
      "grad_norm": 1.078125,
      "learning_rate": 0.00014887019845059401,
      "loss": 1.1612,
      "step": 2345
    },
    {
      "epoch": 0.35751295336787564,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001488271306775531,
      "loss": 0.9618,
      "step": 2346
    },
    {
      "epoch": 0.35766534593111854,
      "grad_norm": 0.6484375,
      "learning_rate": 0.00014878405100952102,
      "loss": 0.8885,
      "step": 2347
    },
    {
      "epoch": 0.3578177384943615,
      "grad_norm": 1.609375,
      "learning_rate": 0.00014874095945699255,
      "loss": 1.1048,
      "step": 2348
    },
    {
      "epoch": 0.3579701310576044,
      "grad_norm": 0.96875,
      "learning_rate": 0.00014869785603046544,
      "loss": 1.2317,
      "step": 2349
    },
    {
      "epoch": 0.3581225236208473,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00014865474074044036,
      "loss": 0.9561,
      "step": 2350
    },
    {
      "epoch": 0.35827491618409024,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00014861161359742075,
      "loss": 1.0932,
      "step": 2351
    },
    {
      "epoch": 0.35842730874733314,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00014856847461191302,
      "loss": 1.0969,
      "step": 2352
    },
    {
      "epoch": 0.35857970131057604,
      "grad_norm": 1.125,
      "learning_rate": 0.00014852532379442646,
      "loss": 0.9148,
      "step": 2353
    },
    {
      "epoch": 0.35873209387381894,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001484821611554732,
      "loss": 1.034,
      "step": 2354
    },
    {
      "epoch": 0.3588844864370619,
      "grad_norm": 0.80859375,
      "learning_rate": 0.0001484389867055683,
      "loss": 0.9027,
      "step": 2355
    },
    {
      "epoch": 0.3590368790003048,
      "grad_norm": 0.90625,
      "learning_rate": 0.00014839580045522966,
      "loss": 0.7919,
      "step": 2356
    },
    {
      "epoch": 0.3591892715635477,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00014835260241497812,
      "loss": 0.7608,
      "step": 2357
    },
    {
      "epoch": 0.35934166412679064,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001483093925953373,
      "loss": 0.8627,
      "step": 2358
    },
    {
      "epoch": 0.35949405669003354,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001482661710068337,
      "loss": 0.9469,
      "step": 2359
    },
    {
      "epoch": 0.35964644925327643,
      "grad_norm": 1.046875,
      "learning_rate": 0.00014822293765999678,
      "loss": 0.9006,
      "step": 2360
    },
    {
      "epoch": 0.35979884181651933,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00014817969256535881,
      "loss": 0.76,
      "step": 2361
    },
    {
      "epoch": 0.3599512343797623,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00014813643573345487,
      "loss": 0.9835,
      "step": 2362
    },
    {
      "epoch": 0.3601036269430052,
      "grad_norm": 0.84765625,
      "learning_rate": 0.000148093167174823,
      "loss": 1.0365,
      "step": 2363
    },
    {
      "epoch": 0.3602560195062481,
      "grad_norm": 1.09375,
      "learning_rate": 0.00014804988690000402,
      "loss": 0.9249,
      "step": 2364
    },
    {
      "epoch": 0.36040841206949104,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001480065949195416,
      "loss": 1.1759,
      "step": 2365
    },
    {
      "epoch": 0.36056080463273393,
      "grad_norm": 0.9765625,
      "learning_rate": 0.0001479632912439823,
      "loss": 1.045,
      "step": 2366
    },
    {
      "epoch": 0.36071319719597683,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00014791997588387554,
      "loss": 0.8358,
      "step": 2367
    },
    {
      "epoch": 0.36086558975921973,
      "grad_norm": 1.03125,
      "learning_rate": 0.00014787664884977354,
      "loss": 1.0797,
      "step": 2368
    },
    {
      "epoch": 0.3610179823224627,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00014783331015223142,
      "loss": 0.98,
      "step": 2369
    },
    {
      "epoch": 0.3611703748857056,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00014778995980180707,
      "loss": 1.0067,
      "step": 2370
    },
    {
      "epoch": 0.3613227674489485,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001477465978090613,
      "loss": 0.7713,
      "step": 2371
    },
    {
      "epoch": 0.36147516001219143,
      "grad_norm": 0.68359375,
      "learning_rate": 0.0001477032241845577,
      "loss": 0.8394,
      "step": 2372
    },
    {
      "epoch": 0.36162755257543433,
      "grad_norm": 0.890625,
      "learning_rate": 0.00014765983893886268,
      "loss": 0.836,
      "step": 2373
    },
    {
      "epoch": 0.36177994513867723,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00014761644208254556,
      "loss": 1.0503,
      "step": 2374
    },
    {
      "epoch": 0.3619323377019201,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001475730336261784,
      "loss": 0.7296,
      "step": 2375
    },
    {
      "epoch": 0.3620847302651631,
      "grad_norm": 1.0625,
      "learning_rate": 0.00014752961358033612,
      "loss": 0.8972,
      "step": 2376
    },
    {
      "epoch": 0.362237122828406,
      "grad_norm": 1.125,
      "learning_rate": 0.00014748618195559648,
      "loss": 1.0307,
      "step": 2377
    },
    {
      "epoch": 0.3623895153916489,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00014744273876254012,
      "loss": 0.7788,
      "step": 2378
    },
    {
      "epoch": 0.36254190795489183,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001473992840117503,
      "loss": 0.8983,
      "step": 2379
    },
    {
      "epoch": 0.3626943005181347,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00014735581771381328,
      "loss": 0.8627,
      "step": 2380
    },
    {
      "epoch": 0.3628466930813776,
      "grad_norm": 0.84375,
      "learning_rate": 0.0001473123398793181,
      "loss": 0.9665,
      "step": 2381
    },
    {
      "epoch": 0.3629990856446205,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00014726885051885653,
      "loss": 1.0167,
      "step": 2382
    },
    {
      "epoch": 0.3631514782078635,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00014722534964302326,
      "loss": 1.013,
      "step": 2383
    },
    {
      "epoch": 0.3633038707711064,
      "grad_norm": 0.875,
      "learning_rate": 0.00014718183726241567,
      "loss": 0.9562,
      "step": 2384
    },
    {
      "epoch": 0.36345626333434927,
      "grad_norm": 1.1640625,
      "learning_rate": 0.0001471383133876341,
      "loss": 1.0942,
      "step": 2385
    },
    {
      "epoch": 0.36360865589759217,
      "grad_norm": 0.96875,
      "learning_rate": 0.00014709477802928147,
      "loss": 0.9725,
      "step": 2386
    },
    {
      "epoch": 0.3637610484608351,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001470512311979637,
      "loss": 0.957,
      "step": 2387
    },
    {
      "epoch": 0.363913441024078,
      "grad_norm": 0.796875,
      "learning_rate": 0.00014700767290428942,
      "loss": 0.9019,
      "step": 2388
    },
    {
      "epoch": 0.3640658335873209,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00014696410315887,
      "loss": 1.0607,
      "step": 2389
    },
    {
      "epoch": 0.3642182261505639,
      "grad_norm": 1.296875,
      "learning_rate": 0.0001469205219723197,
      "loss": 0.9371,
      "step": 2390
    },
    {
      "epoch": 0.36437061871380677,
      "grad_norm": 1.015625,
      "learning_rate": 0.00014687692935525553,
      "loss": 0.9223,
      "step": 2391
    },
    {
      "epoch": 0.36452301127704967,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00014683332531829728,
      "loss": 0.9638,
      "step": 2392
    },
    {
      "epoch": 0.36467540384029257,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001467897098720675,
      "loss": 0.805,
      "step": 2393
    },
    {
      "epoch": 0.3648277964035355,
      "grad_norm": 0.890625,
      "learning_rate": 0.00014674608302719152,
      "loss": 0.8709,
      "step": 2394
    },
    {
      "epoch": 0.3649801889667784,
      "grad_norm": 1.03125,
      "learning_rate": 0.00014670244479429752,
      "loss": 0.9158,
      "step": 2395
    },
    {
      "epoch": 0.3651325815300213,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00014665879518401638,
      "loss": 0.8652,
      "step": 2396
    },
    {
      "epoch": 0.36528497409326427,
      "grad_norm": 1.140625,
      "learning_rate": 0.00014661513420698174,
      "loss": 1.0997,
      "step": 2397
    },
    {
      "epoch": 0.36543736665650717,
      "grad_norm": 1.125,
      "learning_rate": 0.0001465714618738301,
      "loss": 1.0139,
      "step": 2398
    },
    {
      "epoch": 0.36558975921975007,
      "grad_norm": 0.8046875,
      "learning_rate": 0.0001465277781952006,
      "loss": 0.9091,
      "step": 2399
    },
    {
      "epoch": 0.36574215178299296,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001464840831817353,
      "loss": 0.8397,
      "step": 2400
    },
    {
      "epoch": 0.3658945443462359,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00014644037684407882,
      "loss": 1.0972,
      "step": 2401
    },
    {
      "epoch": 0.3660469369094788,
      "grad_norm": 1.03125,
      "learning_rate": 0.00014639665919287878,
      "loss": 1.1362,
      "step": 2402
    },
    {
      "epoch": 0.3661993294727217,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00014635293023878535,
      "loss": 0.9307,
      "step": 2403
    },
    {
      "epoch": 0.36635172203596467,
      "grad_norm": 1.25,
      "learning_rate": 0.00014630918999245152,
      "loss": 0.9596,
      "step": 2404
    },
    {
      "epoch": 0.36650411459920756,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001462654384645331,
      "loss": 0.9947,
      "step": 2405
    },
    {
      "epoch": 0.36665650716245046,
      "grad_norm": 0.90625,
      "learning_rate": 0.00014622167566568853,
      "loss": 0.8423,
      "step": 2406
    },
    {
      "epoch": 0.36680889972569336,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001461779016065791,
      "loss": 0.9458,
      "step": 2407
    },
    {
      "epoch": 0.3669612922889363,
      "grad_norm": 1.171875,
      "learning_rate": 0.0001461341162978688,
      "loss": 0.8029,
      "step": 2408
    },
    {
      "epoch": 0.3671136848521792,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00014609031975022432,
      "loss": 1.0172,
      "step": 2409
    },
    {
      "epoch": 0.3672660774154221,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00014604651197431517,
      "loss": 1.0966,
      "step": 2410
    },
    {
      "epoch": 0.36741846997866506,
      "grad_norm": 0.984375,
      "learning_rate": 0.00014600269298081358,
      "loss": 0.9816,
      "step": 2411
    },
    {
      "epoch": 0.36757086254190796,
      "grad_norm": 0.890625,
      "learning_rate": 0.00014595886278039442,
      "loss": 1.0814,
      "step": 2412
    },
    {
      "epoch": 0.36772325510515086,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00014591502138373537,
      "loss": 1.0429,
      "step": 2413
    },
    {
      "epoch": 0.36787564766839376,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00014587116880151685,
      "loss": 1.0346,
      "step": 2414
    },
    {
      "epoch": 0.3680280402316367,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00014582730504442197,
      "loss": 0.9523,
      "step": 2415
    },
    {
      "epoch": 0.3681804327948796,
      "grad_norm": 1.4453125,
      "learning_rate": 0.0001457834301231366,
      "loss": 0.9995,
      "step": 2416
    },
    {
      "epoch": 0.3683328253581225,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00014573954404834927,
      "loss": 1.075,
      "step": 2417
    },
    {
      "epoch": 0.36848521792136546,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00014569564683075123,
      "loss": 0.9546,
      "step": 2418
    },
    {
      "epoch": 0.36863761048460836,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00014565173848103653,
      "loss": 0.9907,
      "step": 2419
    },
    {
      "epoch": 0.36879000304785126,
      "grad_norm": 1.0,
      "learning_rate": 0.00014560781900990185,
      "loss": 0.9488,
      "step": 2420
    },
    {
      "epoch": 0.36894239561109415,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00014556388842804657,
      "loss": 1.0137,
      "step": 2421
    },
    {
      "epoch": 0.3690947881743371,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001455199467461729,
      "loss": 0.9965,
      "step": 2422
    },
    {
      "epoch": 0.36924718073758,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00014547599397498562,
      "loss": 0.9179,
      "step": 2423
    },
    {
      "epoch": 0.3693995733008229,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00014543203012519222,
      "loss": 1.0591,
      "step": 2424
    },
    {
      "epoch": 0.36955196586406586,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00014538805520750297,
      "loss": 0.9815,
      "step": 2425
    },
    {
      "epoch": 0.36970435842730875,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00014534406923263077,
      "loss": 1.1159,
      "step": 2426
    },
    {
      "epoch": 0.36985675099055165,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00014530007221129124,
      "loss": 0.9628,
      "step": 2427
    },
    {
      "epoch": 0.37000914355379455,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001452560641542027,
      "loss": 0.9323,
      "step": 2428
    },
    {
      "epoch": 0.3701615361170375,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00014521204507208615,
      "loss": 0.9867,
      "step": 2429
    },
    {
      "epoch": 0.3703139286802804,
      "grad_norm": 1.140625,
      "learning_rate": 0.00014516801497566523,
      "loss": 1.2031,
      "step": 2430
    },
    {
      "epoch": 0.3704663212435233,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00014512397387566638,
      "loss": 0.9587,
      "step": 2431
    },
    {
      "epoch": 0.37061871380676625,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00014507992178281856,
      "loss": 0.935,
      "step": 2432
    },
    {
      "epoch": 0.37077110637000915,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00014503585870785353,
      "loss": 1.1898,
      "step": 2433
    },
    {
      "epoch": 0.37092349893325205,
      "grad_norm": 1.234375,
      "learning_rate": 0.00014499178466150571,
      "loss": 1.0886,
      "step": 2434
    },
    {
      "epoch": 0.37107589149649495,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00014494769965451215,
      "loss": 0.9582,
      "step": 2435
    },
    {
      "epoch": 0.3712282840597379,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001449036036976126,
      "loss": 0.9235,
      "step": 2436
    },
    {
      "epoch": 0.3713806766229808,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00014485949680154947,
      "loss": 0.9187,
      "step": 2437
    },
    {
      "epoch": 0.3715330691862237,
      "grad_norm": 1.1796875,
      "learning_rate": 0.0001448153789770678,
      "loss": 0.9772,
      "step": 2438
    },
    {
      "epoch": 0.37168546174946665,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00014477125023491536,
      "loss": 1.0885,
      "step": 2439
    },
    {
      "epoch": 0.37183785431270955,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00014472711058584255,
      "loss": 0.8823,
      "step": 2440
    },
    {
      "epoch": 0.37199024687595245,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00014468296004060242,
      "loss": 1.0759,
      "step": 2441
    },
    {
      "epoch": 0.37214263943919534,
      "grad_norm": 0.890625,
      "learning_rate": 0.00014463879860995068,
      "loss": 0.7902,
      "step": 2442
    },
    {
      "epoch": 0.3722950320024383,
      "grad_norm": 0.953125,
      "learning_rate": 0.00014459462630464566,
      "loss": 0.7203,
      "step": 2443
    },
    {
      "epoch": 0.3724474245656812,
      "grad_norm": 1.109375,
      "learning_rate": 0.00014455044313544838,
      "loss": 1.0739,
      "step": 2444
    },
    {
      "epoch": 0.3725998171289241,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00014450624911312255,
      "loss": 0.9627,
      "step": 2445
    },
    {
      "epoch": 0.37275220969216705,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00014446204424843438,
      "loss": 0.8684,
      "step": 2446
    },
    {
      "epoch": 0.37290460225540994,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00014441782855215287,
      "loss": 0.7565,
      "step": 2447
    },
    {
      "epoch": 0.37305699481865284,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001443736020350496,
      "loss": 0.9713,
      "step": 2448
    },
    {
      "epoch": 0.37320938738189574,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00014432936470789876,
      "loss": 0.9381,
      "step": 2449
    },
    {
      "epoch": 0.3733617799451387,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00014428511658147718,
      "loss": 0.9204,
      "step": 2450
    },
    {
      "epoch": 0.3735141725083816,
      "grad_norm": 1.140625,
      "learning_rate": 0.0001442408576665644,
      "loss": 0.8903,
      "step": 2451
    },
    {
      "epoch": 0.3736665650716245,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00014419658797394248,
      "loss": 0.7762,
      "step": 2452
    },
    {
      "epoch": 0.37381895763486744,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001441523075143962,
      "loss": 1.145,
      "step": 2453
    },
    {
      "epoch": 0.37397135019811034,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001441080162987129,
      "loss": 0.9947,
      "step": 2454
    },
    {
      "epoch": 0.37412374276135324,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00014406371433768248,
      "loss": 0.9629,
      "step": 2455
    },
    {
      "epoch": 0.37427613532459614,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00014401940164209765,
      "loss": 0.7706,
      "step": 2456
    },
    {
      "epoch": 0.3744285278878391,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00014397507822275357,
      "loss": 0.7983,
      "step": 2457
    },
    {
      "epoch": 0.374580920451082,
      "grad_norm": 0.70703125,
      "learning_rate": 0.00014393074409044802,
      "loss": 0.9731,
      "step": 2458
    },
    {
      "epoch": 0.3747333130143249,
      "grad_norm": 1.3046875,
      "learning_rate": 0.00014388639925598152,
      "loss": 1.0697,
      "step": 2459
    },
    {
      "epoch": 0.37488570557756784,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00014384204373015704,
      "loss": 0.8745,
      "step": 2460
    },
    {
      "epoch": 0.37503809814081074,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00014379767752378025,
      "loss": 1.0431,
      "step": 2461
    },
    {
      "epoch": 0.37519049070405364,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001437533006476594,
      "loss": 1.0107,
      "step": 2462
    },
    {
      "epoch": 0.37534288326729653,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00014370891311260532,
      "loss": 1.0669,
      "step": 2463
    },
    {
      "epoch": 0.3754952758305395,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00014366451492943148,
      "loss": 0.9505,
      "step": 2464
    },
    {
      "epoch": 0.3756476683937824,
      "grad_norm": 1.078125,
      "learning_rate": 0.00014362010610895386,
      "loss": 0.9437,
      "step": 2465
    },
    {
      "epoch": 0.3758000609570253,
      "grad_norm": 0.9375,
      "learning_rate": 0.00014357568666199114,
      "loss": 0.9869,
      "step": 2466
    },
    {
      "epoch": 0.37595245352026824,
      "grad_norm": 1.09375,
      "learning_rate": 0.00014353125659936452,
      "loss": 1.0026,
      "step": 2467
    },
    {
      "epoch": 0.37610484608351114,
      "grad_norm": 1.0,
      "learning_rate": 0.00014348681593189783,
      "loss": 0.8941,
      "step": 2468
    },
    {
      "epoch": 0.37625723864675403,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00014344236467041737,
      "loss": 1.1523,
      "step": 2469
    },
    {
      "epoch": 0.37640963120999693,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001433979028257522,
      "loss": 0.8897,
      "step": 2470
    },
    {
      "epoch": 0.3765620237732399,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00014335343040873384,
      "loss": 0.9929,
      "step": 2471
    },
    {
      "epoch": 0.3767144163364828,
      "grad_norm": 0.73828125,
      "learning_rate": 0.0001433089474301964,
      "loss": 0.8654,
      "step": 2472
    },
    {
      "epoch": 0.3768668088997257,
      "grad_norm": 1.3828125,
      "learning_rate": 0.00014326445390097653,
      "loss": 0.9943,
      "step": 2473
    },
    {
      "epoch": 0.37701920146296863,
      "grad_norm": 0.953125,
      "learning_rate": 0.00014321994983191357,
      "loss": 0.971,
      "step": 2474
    },
    {
      "epoch": 0.37717159402621153,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00014317543523384928,
      "loss": 1.1219,
      "step": 2475
    },
    {
      "epoch": 0.37732398658945443,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00014313091011762815,
      "loss": 0.9225,
      "step": 2476
    },
    {
      "epoch": 0.37747637915269733,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00014308637449409706,
      "loss": 0.953,
      "step": 2477
    },
    {
      "epoch": 0.3776287717159403,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001430418283741055,
      "loss": 0.9247,
      "step": 2478
    },
    {
      "epoch": 0.3777811642791832,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00014299727176850566,
      "loss": 0.9244,
      "step": 2479
    },
    {
      "epoch": 0.3779335568424261,
      "grad_norm": 0.7578125,
      "learning_rate": 0.0001429527046881521,
      "loss": 0.989,
      "step": 2480
    },
    {
      "epoch": 0.37808594940566903,
      "grad_norm": 1.078125,
      "learning_rate": 0.00014290812714390198,
      "loss": 1.0629,
      "step": 2481
    },
    {
      "epoch": 0.37823834196891193,
      "grad_norm": 0.96875,
      "learning_rate": 0.00014286353914661507,
      "loss": 1.103,
      "step": 2482
    },
    {
      "epoch": 0.3783907345321548,
      "grad_norm": 1.09375,
      "learning_rate": 0.00014281894070715363,
      "loss": 0.9564,
      "step": 2483
    },
    {
      "epoch": 0.3785431270953977,
      "grad_norm": 1.046875,
      "learning_rate": 0.00014277433183638248,
      "loss": 0.9796,
      "step": 2484
    },
    {
      "epoch": 0.3786955196586407,
      "grad_norm": 1.0390625,
      "learning_rate": 0.000142729712545169,
      "loss": 1.0574,
      "step": 2485
    },
    {
      "epoch": 0.3788479122218836,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00014268508284438305,
      "loss": 0.8665,
      "step": 2486
    },
    {
      "epoch": 0.3790003047851265,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00014264044274489717,
      "loss": 0.9835,
      "step": 2487
    },
    {
      "epoch": 0.37915269734836937,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001425957922575862,
      "loss": 1.0475,
      "step": 2488
    },
    {
      "epoch": 0.3793050899116123,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001425511313933277,
      "loss": 0.9904,
      "step": 2489
    },
    {
      "epoch": 0.3794574824748552,
      "grad_norm": 0.85546875,
      "learning_rate": 0.0001425064601630017,
      "loss": 0.9477,
      "step": 2490
    },
    {
      "epoch": 0.3796098750380981,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00014246177857749076,
      "loss": 0.9912,
      "step": 2491
    },
    {
      "epoch": 0.3797622676013411,
      "grad_norm": 1.296875,
      "learning_rate": 0.00014241708664767993,
      "loss": 1.0108,
      "step": 2492
    },
    {
      "epoch": 0.379914660164584,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00014237238438445684,
      "loss": 0.9743,
      "step": 2493
    },
    {
      "epoch": 0.38006705272782687,
      "grad_norm": 1.109375,
      "learning_rate": 0.0001423276717987116,
      "loss": 1.1406,
      "step": 2494
    },
    {
      "epoch": 0.38021944529106977,
      "grad_norm": 1.1328125,
      "learning_rate": 0.0001422829489013368,
      "loss": 1.0216,
      "step": 2495
    },
    {
      "epoch": 0.3803718378543127,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00014223821570322762,
      "loss": 0.8725,
      "step": 2496
    },
    {
      "epoch": 0.3805242304175556,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00014219347221528167,
      "loss": 0.9054,
      "step": 2497
    },
    {
      "epoch": 0.3806766229807985,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00014214871844839917,
      "loss": 0.9068,
      "step": 2498
    },
    {
      "epoch": 0.38082901554404147,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00014210395441348274,
      "loss": 0.9196,
      "step": 2499
    },
    {
      "epoch": 0.38098140810728437,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00014205918012143756,
      "loss": 0.9604,
      "step": 2500
    },
    {
      "epoch": 0.38113380067052727,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00014201439558317127,
      "loss": 0.9538,
      "step": 2501
    },
    {
      "epoch": 0.38128619323377017,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00014196960080959403,
      "loss": 0.9031,
      "step": 2502
    },
    {
      "epoch": 0.3814385857970131,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001419247958116185,
      "loss": 0.8151,
      "step": 2503
    },
    {
      "epoch": 0.381590978360256,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00014187998060015983,
      "loss": 0.9285,
      "step": 2504
    },
    {
      "epoch": 0.3817433709234989,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00014183515518613563,
      "loss": 0.9267,
      "step": 2505
    },
    {
      "epoch": 0.38189576348674187,
      "grad_norm": 0.953125,
      "learning_rate": 0.00014179031958046602,
      "loss": 1.0529,
      "step": 2506
    },
    {
      "epoch": 0.38204815604998477,
      "grad_norm": 0.765625,
      "learning_rate": 0.00014174547379407364,
      "loss": 0.9445,
      "step": 2507
    },
    {
      "epoch": 0.38220054861322766,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001417006178378835,
      "loss": 0.9583,
      "step": 2508
    },
    {
      "epoch": 0.38235294117647056,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001416557517228232,
      "loss": 0.9259,
      "step": 2509
    },
    {
      "epoch": 0.3825053337397135,
      "grad_norm": 1.078125,
      "learning_rate": 0.00014161087545982274,
      "loss": 0.9478,
      "step": 2510
    },
    {
      "epoch": 0.3826577263029564,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001415659890598147,
      "loss": 0.9861,
      "step": 2511
    },
    {
      "epoch": 0.3828101188661993,
      "grad_norm": 0.78125,
      "learning_rate": 0.00014152109253373393,
      "loss": 0.9951,
      "step": 2512
    },
    {
      "epoch": 0.38296251142944226,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00014147618589251796,
      "loss": 1.0308,
      "step": 2513
    },
    {
      "epoch": 0.38311490399268516,
      "grad_norm": 1.046875,
      "learning_rate": 0.00014143126914710664,
      "loss": 0.9342,
      "step": 2514
    },
    {
      "epoch": 0.38326729655592806,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001413863423084424,
      "loss": 0.9999,
      "step": 2515
    },
    {
      "epoch": 0.38341968911917096,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00014134140538747005,
      "loss": 0.9651,
      "step": 2516
    },
    {
      "epoch": 0.3835720816824139,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001412964583951368,
      "loss": 0.9673,
      "step": 2517
    },
    {
      "epoch": 0.3837244742456568,
      "grad_norm": 1.015625,
      "learning_rate": 0.00014125150134239247,
      "loss": 0.9824,
      "step": 2518
    },
    {
      "epoch": 0.3838768668088997,
      "grad_norm": 0.8125,
      "learning_rate": 0.00014120653424018917,
      "loss": 0.8927,
      "step": 2519
    },
    {
      "epoch": 0.38402925937214266,
      "grad_norm": 1.1875,
      "learning_rate": 0.0001411615570994816,
      "loss": 1.1488,
      "step": 2520
    },
    {
      "epoch": 0.38418165193538556,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00014111656993122677,
      "loss": 0.8138,
      "step": 2521
    },
    {
      "epoch": 0.38433404449862846,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00014107157274638427,
      "loss": 0.9863,
      "step": 2522
    },
    {
      "epoch": 0.38448643706187136,
      "grad_norm": 1.125,
      "learning_rate": 0.000141026565555916,
      "loss": 0.9589,
      "step": 2523
    },
    {
      "epoch": 0.3846388296251143,
      "grad_norm": 0.9921875,
      "learning_rate": 0.0001409815483707864,
      "loss": 0.9947,
      "step": 2524
    },
    {
      "epoch": 0.3847912221883572,
      "grad_norm": 0.91796875,
      "learning_rate": 0.0001409365212019623,
      "loss": 1.0063,
      "step": 2525
    },
    {
      "epoch": 0.3849436147516001,
      "grad_norm": 1.078125,
      "learning_rate": 0.00014089148406041296,
      "loss": 0.9788,
      "step": 2526
    },
    {
      "epoch": 0.38509600731484306,
      "grad_norm": 0.734375,
      "learning_rate": 0.00014084643695711006,
      "loss": 0.916,
      "step": 2527
    },
    {
      "epoch": 0.38524839987808596,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00014080137990302776,
      "loss": 1.062,
      "step": 2528
    },
    {
      "epoch": 0.38540079244132885,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00014075631290914252,
      "loss": 0.9821,
      "step": 2529
    },
    {
      "epoch": 0.38555318500457175,
      "grad_norm": 1.2578125,
      "learning_rate": 0.00014071123598643343,
      "loss": 1.0989,
      "step": 2530
    },
    {
      "epoch": 0.3857055775678147,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001406661491458818,
      "loss": 1.0188,
      "step": 2531
    },
    {
      "epoch": 0.3858579701310576,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00014062105239847142,
      "loss": 0.7585,
      "step": 2532
    },
    {
      "epoch": 0.3860103626943005,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001405759457551886,
      "loss": 1.1376,
      "step": 2533
    },
    {
      "epoch": 0.38616275525754346,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00014053082922702183,
      "loss": 0.9885,
      "step": 2534
    },
    {
      "epoch": 0.38631514782078635,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00014048570282496227,
      "loss": 1.1557,
      "step": 2535
    },
    {
      "epoch": 0.38646754038402925,
      "grad_norm": 0.953125,
      "learning_rate": 0.00014044056656000326,
      "loss": 1.0778,
      "step": 2536
    },
    {
      "epoch": 0.38661993294727215,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00014039542044314073,
      "loss": 0.8218,
      "step": 2537
    },
    {
      "epoch": 0.3867723255105151,
      "grad_norm": 1.03125,
      "learning_rate": 0.00014035026448537286,
      "loss": 0.8951,
      "step": 2538
    },
    {
      "epoch": 0.386924718073758,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00014030509869770031,
      "loss": 0.8219,
      "step": 2539
    },
    {
      "epoch": 0.3870771106370009,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00014025992309112613,
      "loss": 0.8007,
      "step": 2540
    },
    {
      "epoch": 0.38722950320024385,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00014021473767665573,
      "loss": 1.0143,
      "step": 2541
    },
    {
      "epoch": 0.38738189576348675,
      "grad_norm": 1.171875,
      "learning_rate": 0.00014016954246529696,
      "loss": 0.9174,
      "step": 2542
    },
    {
      "epoch": 0.38753428832672965,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00014012433746805997,
      "loss": 0.8234,
      "step": 2543
    },
    {
      "epoch": 0.38768668088997255,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00014007912269595738,
      "loss": 0.9309,
      "step": 2544
    },
    {
      "epoch": 0.3878390734532155,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00014003389816000415,
      "loss": 0.9535,
      "step": 2545
    },
    {
      "epoch": 0.3879914660164584,
      "grad_norm": 1.09375,
      "learning_rate": 0.00013998866387121764,
      "loss": 1.0296,
      "step": 2546
    },
    {
      "epoch": 0.3881438585797013,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00013994341984061755,
      "loss": 0.9002,
      "step": 2547
    },
    {
      "epoch": 0.38829625114294425,
      "grad_norm": 1.140625,
      "learning_rate": 0.000139898166079226,
      "loss": 1.1246,
      "step": 2548
    },
    {
      "epoch": 0.38844864370618715,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00013985290259806748,
      "loss": 1.0726,
      "step": 2549
    },
    {
      "epoch": 0.38860103626943004,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00013980762940816874,
      "loss": 1.0137,
      "step": 2550
    },
    {
      "epoch": 0.38875342883267294,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00013976234652055903,
      "loss": 0.9271,
      "step": 2551
    },
    {
      "epoch": 0.3889058213959159,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00013971705394626998,
      "loss": 0.9875,
      "step": 2552
    },
    {
      "epoch": 0.3890582139591588,
      "grad_norm": 0.875,
      "learning_rate": 0.00013967175169633537,
      "loss": 0.9807,
      "step": 2553
    },
    {
      "epoch": 0.3892106065224017,
      "grad_norm": 0.828125,
      "learning_rate": 0.00013962643978179159,
      "loss": 1.1491,
      "step": 2554
    },
    {
      "epoch": 0.38936299908564465,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00013958111821367724,
      "loss": 1.049,
      "step": 2555
    },
    {
      "epoch": 0.38951539164888754,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001395357870030333,
      "loss": 1.2968,
      "step": 2556
    },
    {
      "epoch": 0.38966778421213044,
      "grad_norm": 1.1640625,
      "learning_rate": 0.0001394904461609031,
      "loss": 1.0331,
      "step": 2557
    },
    {
      "epoch": 0.38982017677537334,
      "grad_norm": 0.90625,
      "learning_rate": 0.00013944509569833237,
      "loss": 0.8674,
      "step": 2558
    },
    {
      "epoch": 0.3899725693386163,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00013939973562636908,
      "loss": 1.1052,
      "step": 2559
    },
    {
      "epoch": 0.3901249619018592,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00013935436595606358,
      "loss": 0.9734,
      "step": 2560
    },
    {
      "epoch": 0.3902773544651021,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00013930898669846866,
      "loss": 0.9439,
      "step": 2561
    },
    {
      "epoch": 0.39042974702834504,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00013926359786463926,
      "loss": 0.9133,
      "step": 2562
    },
    {
      "epoch": 0.39058213959158794,
      "grad_norm": 1.296875,
      "learning_rate": 0.00013921819946563285,
      "loss": 1.1832,
      "step": 2563
    },
    {
      "epoch": 0.39073453215483084,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00013917279151250903,
      "loss": 0.9712,
      "step": 2564
    },
    {
      "epoch": 0.39088692471807374,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001391273740163299,
      "loss": 0.9683,
      "step": 2565
    },
    {
      "epoch": 0.3910393172813167,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00013908194698815984,
      "loss": 0.9821,
      "step": 2566
    },
    {
      "epoch": 0.3911917098445596,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00013903651043906546,
      "loss": 1.0093,
      "step": 2567
    },
    {
      "epoch": 0.3913441024078025,
      "grad_norm": 0.90625,
      "learning_rate": 0.00013899106438011572,
      "loss": 1.1496,
      "step": 2568
    },
    {
      "epoch": 0.39149649497104544,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001389456088223821,
      "loss": 0.9353,
      "step": 2569
    },
    {
      "epoch": 0.39164888753428834,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00013890014377693804,
      "loss": 0.9602,
      "step": 2570
    },
    {
      "epoch": 0.39180128009753123,
      "grad_norm": 1.1484375,
      "learning_rate": 0.0001388546692548596,
      "loss": 0.9756,
      "step": 2571
    },
    {
      "epoch": 0.39195367266077413,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00013880918526722497,
      "loss": 1.0824,
      "step": 2572
    },
    {
      "epoch": 0.3921060652240171,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00013876369182511475,
      "loss": 0.8989,
      "step": 2573
    },
    {
      "epoch": 0.39225845778726,
      "grad_norm": 1.171875,
      "learning_rate": 0.00013871818893961173,
      "loss": 1.1642,
      "step": 2574
    },
    {
      "epoch": 0.3924108503505029,
      "grad_norm": 1.03125,
      "learning_rate": 0.00013867267662180115,
      "loss": 0.8941,
      "step": 2575
    },
    {
      "epoch": 0.39256324291374584,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001386271548827704,
      "loss": 0.8829,
      "step": 2576
    },
    {
      "epoch": 0.39271563547698873,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00013858162373360928,
      "loss": 1.1891,
      "step": 2577
    },
    {
      "epoch": 0.39286802804023163,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00013853608318540976,
      "loss": 0.889,
      "step": 2578
    },
    {
      "epoch": 0.39302042060347453,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001384905332492662,
      "loss": 0.9549,
      "step": 2579
    },
    {
      "epoch": 0.3931728131667175,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00013844497393627531,
      "loss": 1.035,
      "step": 2580
    },
    {
      "epoch": 0.3933252057299604,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00013839940525753588,
      "loss": 0.9689,
      "step": 2581
    },
    {
      "epoch": 0.3934775982932033,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00013835382722414912,
      "loss": 1.0161,
      "step": 2582
    },
    {
      "epoch": 0.39362999085644623,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00013830823984721855,
      "loss": 0.9287,
      "step": 2583
    },
    {
      "epoch": 0.39378238341968913,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00013826264313784983,
      "loss": 0.9002,
      "step": 2584
    },
    {
      "epoch": 0.39393477598293203,
      "grad_norm": 1.0625,
      "learning_rate": 0.00013821703710715103,
      "loss": 1.0784,
      "step": 2585
    },
    {
      "epoch": 0.3940871685461749,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00013817142176623243,
      "loss": 1.076,
      "step": 2586
    },
    {
      "epoch": 0.3942395611094179,
      "grad_norm": 1.171875,
      "learning_rate": 0.00013812579712620658,
      "loss": 0.7941,
      "step": 2587
    },
    {
      "epoch": 0.3943919536726608,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00013808016319818826,
      "loss": 0.9426,
      "step": 2588
    },
    {
      "epoch": 0.3945443462359037,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00013803451999329458,
      "loss": 1.0363,
      "step": 2589
    },
    {
      "epoch": 0.39469673879914663,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00013798886752264491,
      "loss": 0.9339,
      "step": 2590
    },
    {
      "epoch": 0.3948491313623895,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00013794320579736083,
      "loss": 1.1452,
      "step": 2591
    },
    {
      "epoch": 0.3950015239256324,
      "grad_norm": 1.046875,
      "learning_rate": 0.00013789753482856617,
      "loss": 0.9614,
      "step": 2592
    },
    {
      "epoch": 0.3951539164888753,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00013785185462738707,
      "loss": 0.8936,
      "step": 2593
    },
    {
      "epoch": 0.3953063090521183,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00013780616520495185,
      "loss": 0.8573,
      "step": 2594
    },
    {
      "epoch": 0.3954587016153612,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001377604665723911,
      "loss": 1.0282,
      "step": 2595
    },
    {
      "epoch": 0.3956110941786041,
      "grad_norm": 0.6953125,
      "learning_rate": 0.00013771475874083767,
      "loss": 0.9207,
      "step": 2596
    },
    {
      "epoch": 0.39576348674184697,
      "grad_norm": 1.3125,
      "learning_rate": 0.00013766904172142673,
      "loss": 0.9112,
      "step": 2597
    },
    {
      "epoch": 0.3959158793050899,
      "grad_norm": 0.75390625,
      "learning_rate": 0.0001376233155252955,
      "loss": 0.8249,
      "step": 2598
    },
    {
      "epoch": 0.3960682718683328,
      "grad_norm": 1.203125,
      "learning_rate": 0.00013757758016358354,
      "loss": 1.0368,
      "step": 2599
    },
    {
      "epoch": 0.3962206644315757,
      "grad_norm": 1.078125,
      "learning_rate": 0.00013753183564743274,
      "loss": 1.1124,
      "step": 2600
    },
    {
      "epoch": 0.3963730569948187,
      "grad_norm": 0.96875,
      "learning_rate": 0.00013748608198798702,
      "loss": 0.8413,
      "step": 2601
    },
    {
      "epoch": 0.39652544955806157,
      "grad_norm": 1.1875,
      "learning_rate": 0.0001374403191963927,
      "loss": 1.115,
      "step": 2602
    },
    {
      "epoch": 0.39667784212130447,
      "grad_norm": 0.67578125,
      "learning_rate": 0.0001373945472837982,
      "loss": 0.9804,
      "step": 2603
    },
    {
      "epoch": 0.39683023468454737,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00013734876626135425,
      "loss": 0.8482,
      "step": 2604
    },
    {
      "epoch": 0.3969826272477903,
      "grad_norm": 1.34375,
      "learning_rate": 0.00013730297614021375,
      "loss": 1.1104,
      "step": 2605
    },
    {
      "epoch": 0.3971350198110332,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00013725717693153182,
      "loss": 1.1033,
      "step": 2606
    },
    {
      "epoch": 0.3972874123742761,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00013721136864646582,
      "loss": 0.9225,
      "step": 2607
    },
    {
      "epoch": 0.39743980493751907,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00013716555129617534,
      "loss": 0.9519,
      "step": 2608
    },
    {
      "epoch": 0.39759219750076197,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00013711972489182208,
      "loss": 0.893,
      "step": 2609
    },
    {
      "epoch": 0.39774459006400487,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00013707388944457006,
      "loss": 1.0119,
      "step": 2610
    },
    {
      "epoch": 0.39789698262724776,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00013702804496558546,
      "loss": 0.8298,
      "step": 2611
    },
    {
      "epoch": 0.3980493751904907,
      "grad_norm": 1.421875,
      "learning_rate": 0.00013698219146603659,
      "loss": 1.1866,
      "step": 2612
    },
    {
      "epoch": 0.3982017677537336,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00013693632895709409,
      "loss": 1.0378,
      "step": 2613
    },
    {
      "epoch": 0.3983541603169765,
      "grad_norm": 1.046875,
      "learning_rate": 0.00013689045744993072,
      "loss": 1.0022,
      "step": 2614
    },
    {
      "epoch": 0.39850655288021947,
      "grad_norm": 0.7265625,
      "learning_rate": 0.0001368445769557214,
      "loss": 0.8973,
      "step": 2615
    },
    {
      "epoch": 0.39865894544346236,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00013679868748564327,
      "loss": 0.8979,
      "step": 2616
    },
    {
      "epoch": 0.39881133800670526,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00013675278905087574,
      "loss": 0.9278,
      "step": 2617
    },
    {
      "epoch": 0.39896373056994816,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00013670688166260027,
      "loss": 0.9797,
      "step": 2618
    },
    {
      "epoch": 0.3991161231331911,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00013666096533200057,
      "loss": 0.9637,
      "step": 2619
    },
    {
      "epoch": 0.399268515696434,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00013661504007026252,
      "loss": 0.9218,
      "step": 2620
    },
    {
      "epoch": 0.3994209082596769,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00013656910588857415,
      "loss": 1.115,
      "step": 2621
    },
    {
      "epoch": 0.39957330082291986,
      "grad_norm": 1.203125,
      "learning_rate": 0.00013652316279812575,
      "loss": 1.1055,
      "step": 2622
    },
    {
      "epoch": 0.39972569338616276,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00013647721081010966,
      "loss": 0.8166,
      "step": 2623
    },
    {
      "epoch": 0.39987808594940566,
      "grad_norm": 0.72265625,
      "learning_rate": 0.00013643124993572048,
      "loss": 0.7408,
      "step": 2624
    },
    {
      "epoch": 0.40003047851264856,
      "grad_norm": 0.96875,
      "learning_rate": 0.00013638528018615492,
      "loss": 0.8651,
      "step": 2625
    },
    {
      "epoch": 0.4001828710758915,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00013633930157261185,
      "loss": 0.8461,
      "step": 2626
    },
    {
      "epoch": 0.4003352636391344,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00013629331410629236,
      "loss": 0.9633,
      "step": 2627
    },
    {
      "epoch": 0.4004876562023773,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001362473177983997,
      "loss": 0.8754,
      "step": 2628
    },
    {
      "epoch": 0.40064004876562026,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00013620131266013912,
      "loss": 1.0007,
      "step": 2629
    },
    {
      "epoch": 0.40079244132886316,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00013615529870271819,
      "loss": 1.1162,
      "step": 2630
    },
    {
      "epoch": 0.40094483389210606,
      "grad_norm": 0.8046875,
      "learning_rate": 0.0001361092759373466,
      "loss": 0.8337,
      "step": 2631
    },
    {
      "epoch": 0.40109722645534895,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00013606324437523613,
      "loss": 0.9829,
      "step": 2632
    },
    {
      "epoch": 0.4012496190185919,
      "grad_norm": 0.890625,
      "learning_rate": 0.00013601720402760075,
      "loss": 1.0746,
      "step": 2633
    },
    {
      "epoch": 0.4014020115818348,
      "grad_norm": 1.078125,
      "learning_rate": 0.00013597115490565647,
      "loss": 0.9423,
      "step": 2634
    },
    {
      "epoch": 0.4015544041450777,
      "grad_norm": 1.09375,
      "learning_rate": 0.0001359250970206216,
      "loss": 0.811,
      "step": 2635
    },
    {
      "epoch": 0.40170679670832066,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00013587903038371653,
      "loss": 0.9198,
      "step": 2636
    },
    {
      "epoch": 0.40185918927156355,
      "grad_norm": 1.375,
      "learning_rate": 0.00013583295500616368,
      "loss": 0.9664,
      "step": 2637
    },
    {
      "epoch": 0.40201158183480645,
      "grad_norm": 1.2265625,
      "learning_rate": 0.0001357868708991877,
      "loss": 0.8848,
      "step": 2638
    },
    {
      "epoch": 0.40216397439804935,
      "grad_norm": 1.078125,
      "learning_rate": 0.00013574077807401534,
      "loss": 1.0481,
      "step": 2639
    },
    {
      "epoch": 0.4023163669612923,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00013569467654187548,
      "loss": 0.9389,
      "step": 2640
    },
    {
      "epoch": 0.4024687595245352,
      "grad_norm": 0.890625,
      "learning_rate": 0.00013564856631399913,
      "loss": 0.8395,
      "step": 2641
    },
    {
      "epoch": 0.4026211520877781,
      "grad_norm": 0.875,
      "learning_rate": 0.00013560244740161938,
      "loss": 0.913,
      "step": 2642
    },
    {
      "epoch": 0.40277354465102105,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00013555631981597145,
      "loss": 1.1077,
      "step": 2643
    },
    {
      "epoch": 0.40292593721426395,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00013551018356829274,
      "loss": 1.0462,
      "step": 2644
    },
    {
      "epoch": 0.40307832977750685,
      "grad_norm": 1.0625,
      "learning_rate": 0.00013546403866982268,
      "loss": 1.1024,
      "step": 2645
    },
    {
      "epoch": 0.40323072234074975,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00013541788513180277,
      "loss": 0.9238,
      "step": 2646
    },
    {
      "epoch": 0.4033831149039927,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00013537172296547673,
      "loss": 0.9894,
      "step": 2647
    },
    {
      "epoch": 0.4035355074672356,
      "grad_norm": 1.015625,
      "learning_rate": 0.00013532555218209036,
      "loss": 1.0509,
      "step": 2648
    },
    {
      "epoch": 0.4036879000304785,
      "grad_norm": 0.765625,
      "learning_rate": 0.00013527937279289145,
      "loss": 0.9502,
      "step": 2649
    },
    {
      "epoch": 0.40384029259372145,
      "grad_norm": 0.890625,
      "learning_rate": 0.00013523318480913,
      "loss": 0.9262,
      "step": 2650
    },
    {
      "epoch": 0.40399268515696435,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00013518698824205812,
      "loss": 0.9501,
      "step": 2651
    },
    {
      "epoch": 0.40414507772020725,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00013514078310292984,
      "loss": 0.9384,
      "step": 2652
    },
    {
      "epoch": 0.40429747028345014,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001350945694030015,
      "loss": 0.8671,
      "step": 2653
    },
    {
      "epoch": 0.4044498628466931,
      "grad_norm": 0.890625,
      "learning_rate": 0.00013504834715353137,
      "loss": 1.1585,
      "step": 2654
    },
    {
      "epoch": 0.404602255409936,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00013500211636577987,
      "loss": 0.7808,
      "step": 2655
    },
    {
      "epoch": 0.4047546479731789,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001349558770510095,
      "loss": 0.9755,
      "step": 2656
    },
    {
      "epoch": 0.40490704053642185,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00013490962922048478,
      "loss": 0.8666,
      "step": 2657
    },
    {
      "epoch": 0.40505943309966475,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001348633728854724,
      "loss": 0.9288,
      "step": 2658
    },
    {
      "epoch": 0.40521182566290764,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00013481710805724102,
      "loss": 0.9739,
      "step": 2659
    },
    {
      "epoch": 0.40536421822615054,
      "grad_norm": 1.265625,
      "learning_rate": 0.00013477083474706142,
      "loss": 1.0319,
      "step": 2660
    },
    {
      "epoch": 0.4055166107893935,
      "grad_norm": 0.734375,
      "learning_rate": 0.0001347245529662065,
      "loss": 0.8123,
      "step": 2661
    },
    {
      "epoch": 0.4056690033526364,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00013467826272595113,
      "loss": 0.8879,
      "step": 2662
    },
    {
      "epoch": 0.4058213959158793,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00013463196403757226,
      "loss": 1.0212,
      "step": 2663
    },
    {
      "epoch": 0.40597378847912224,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00013458565691234893,
      "loss": 0.9394,
      "step": 2664
    },
    {
      "epoch": 0.40612618104236514,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00013453934136156223,
      "loss": 0.9488,
      "step": 2665
    },
    {
      "epoch": 0.40627857360560804,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00013449301739649528,
      "loss": 0.9936,
      "step": 2666
    },
    {
      "epoch": 0.40643096616885094,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001344466850284333,
      "loss": 0.9399,
      "step": 2667
    },
    {
      "epoch": 0.4065833587320939,
      "grad_norm": 0.703125,
      "learning_rate": 0.00013440034426866348,
      "loss": 0.879,
      "step": 2668
    },
    {
      "epoch": 0.4067357512953368,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001343539951284751,
      "loss": 0.9711,
      "step": 2669
    },
    {
      "epoch": 0.4068881438585797,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00013430763761915952,
      "loss": 0.9779,
      "step": 2670
    },
    {
      "epoch": 0.40704053642182264,
      "grad_norm": 0.921875,
      "learning_rate": 0.00013426127175201002,
      "loss": 0.8108,
      "step": 2671
    },
    {
      "epoch": 0.40719292898506554,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00013421489753832205,
      "loss": 0.9463,
      "step": 2672
    },
    {
      "epoch": 0.40734532154830844,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00013416851498939306,
      "loss": 0.8616,
      "step": 2673
    },
    {
      "epoch": 0.40749771411155133,
      "grad_norm": 1.1875,
      "learning_rate": 0.0001341221241165224,
      "loss": 0.7334,
      "step": 2674
    },
    {
      "epoch": 0.4076501066747943,
      "grad_norm": 0.828125,
      "learning_rate": 0.00013407572493101167,
      "loss": 0.8764,
      "step": 2675
    },
    {
      "epoch": 0.4078024992380372,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00013402931744416433,
      "loss": 0.8758,
      "step": 2676
    },
    {
      "epoch": 0.4079548918012801,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00013398290166728586,
      "loss": 1.22,
      "step": 2677
    },
    {
      "epoch": 0.40810728436452304,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001339364776116839,
      "loss": 1.2016,
      "step": 2678
    },
    {
      "epoch": 0.40825967692776594,
      "grad_norm": 1.171875,
      "learning_rate": 0.000133890045288668,
      "loss": 0.9817,
      "step": 2679
    },
    {
      "epoch": 0.40841206949100883,
      "grad_norm": 1.4453125,
      "learning_rate": 0.00013384360470954966,
      "loss": 1.0795,
      "step": 2680
    },
    {
      "epoch": 0.40856446205425173,
      "grad_norm": 0.87109375,
      "learning_rate": 0.0001337971558856426,
      "loss": 1.0107,
      "step": 2681
    },
    {
      "epoch": 0.4087168546174947,
      "grad_norm": 1.03125,
      "learning_rate": 0.00013375069882826232,
      "loss": 0.9341,
      "step": 2682
    },
    {
      "epoch": 0.4088692471807376,
      "grad_norm": 1.078125,
      "learning_rate": 0.00013370423354872643,
      "loss": 0.946,
      "step": 2683
    },
    {
      "epoch": 0.4090216397439805,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00013365776005835463,
      "loss": 1.0018,
      "step": 2684
    },
    {
      "epoch": 0.40917403230722343,
      "grad_norm": 1.078125,
      "learning_rate": 0.00013361127836846842,
      "loss": 1.0675,
      "step": 2685
    },
    {
      "epoch": 0.40932642487046633,
      "grad_norm": 1.1015625,
      "learning_rate": 0.0001335647884903915,
      "loss": 0.9751,
      "step": 2686
    },
    {
      "epoch": 0.40947881743370923,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001335182904354494,
      "loss": 0.9222,
      "step": 2687
    },
    {
      "epoch": 0.40963120999695213,
      "grad_norm": 1.3203125,
      "learning_rate": 0.0001334717842149698,
      "loss": 0.9615,
      "step": 2688
    },
    {
      "epoch": 0.4097836025601951,
      "grad_norm": 1.4765625,
      "learning_rate": 0.00013342526984028218,
      "loss": 0.9654,
      "step": 2689
    },
    {
      "epoch": 0.409935995123438,
      "grad_norm": 0.79296875,
      "learning_rate": 0.0001333787473227182,
      "loss": 1.082,
      "step": 2690
    },
    {
      "epoch": 0.4100883876866809,
      "grad_norm": 0.859375,
      "learning_rate": 0.00013333221667361132,
      "loss": 1.1309,
      "step": 2691
    },
    {
      "epoch": 0.41024078024992383,
      "grad_norm": 0.921875,
      "learning_rate": 0.0001332856779042972,
      "loss": 0.9528,
      "step": 2692
    },
    {
      "epoch": 0.41039317281316673,
      "grad_norm": 0.984375,
      "learning_rate": 0.00013323913102611327,
      "loss": 0.7589,
      "step": 2693
    },
    {
      "epoch": 0.4105455653764096,
      "grad_norm": 0.6953125,
      "learning_rate": 0.000133192576050399,
      "loss": 1.0617,
      "step": 2694
    },
    {
      "epoch": 0.4106979579396525,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00013314601298849591,
      "loss": 0.9783,
      "step": 2695
    },
    {
      "epoch": 0.4108503505028955,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001330994418517474,
      "loss": 0.9585,
      "step": 2696
    },
    {
      "epoch": 0.4110027430661384,
      "grad_norm": 0.94140625,
      "learning_rate": 0.0001330528626514989,
      "loss": 1.0938,
      "step": 2697
    },
    {
      "epoch": 0.4111551356293813,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00013300627539909773,
      "loss": 1.1181,
      "step": 2698
    },
    {
      "epoch": 0.41130752819262417,
      "grad_norm": 1.0,
      "learning_rate": 0.00013295968010589325,
      "loss": 0.9956,
      "step": 2699
    },
    {
      "epoch": 0.4114599207558671,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001329130767832367,
      "loss": 0.849,
      "step": 2700
    },
    {
      "epoch": 0.41161231331911,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00013286646544248136,
      "loss": 1.0224,
      "step": 2701
    },
    {
      "epoch": 0.4117647058823529,
      "grad_norm": 0.734375,
      "learning_rate": 0.00013281984609498238,
      "loss": 0.9477,
      "step": 2702
    },
    {
      "epoch": 0.4119170984455959,
      "grad_norm": 0.9921875,
      "learning_rate": 0.000132773218752097,
      "loss": 1.098,
      "step": 2703
    },
    {
      "epoch": 0.4120694910088388,
      "grad_norm": 0.76953125,
      "learning_rate": 0.0001327265834251842,
      "loss": 0.8712,
      "step": 2704
    },
    {
      "epoch": 0.41222188357208167,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00013267994012560504,
      "loss": 0.9289,
      "step": 2705
    },
    {
      "epoch": 0.41237427613532457,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00013263328886472256,
      "loss": 0.7026,
      "step": 2706
    },
    {
      "epoch": 0.4125266686985675,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00013258662965390166,
      "loss": 0.8522,
      "step": 2707
    },
    {
      "epoch": 0.4126790612618104,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001325399625045091,
      "loss": 0.9229,
      "step": 2708
    },
    {
      "epoch": 0.4128314538250533,
      "grad_norm": 1.0234375,
      "learning_rate": 0.0001324932874279138,
      "loss": 1.087,
      "step": 2709
    },
    {
      "epoch": 0.41298384638829627,
      "grad_norm": 1.3515625,
      "learning_rate": 0.0001324466044354864,
      "loss": 1.0907,
      "step": 2710
    },
    {
      "epoch": 0.41313623895153917,
      "grad_norm": 0.8125,
      "learning_rate": 0.00013239991353859956,
      "loss": 0.7897,
      "step": 2711
    },
    {
      "epoch": 0.41328863151478207,
      "grad_norm": 1.03125,
      "learning_rate": 0.00013235321474862788,
      "loss": 1.0074,
      "step": 2712
    },
    {
      "epoch": 0.41344102407802497,
      "grad_norm": 1.03125,
      "learning_rate": 0.00013230650807694784,
      "loss": 0.9984,
      "step": 2713
    },
    {
      "epoch": 0.4135934166412679,
      "grad_norm": 0.75,
      "learning_rate": 0.00013225979353493781,
      "loss": 0.8536,
      "step": 2714
    },
    {
      "epoch": 0.4137458092045108,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00013221307113397824,
      "loss": 0.9489,
      "step": 2715
    },
    {
      "epoch": 0.4138982017677537,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001321663408854513,
      "loss": 0.9762,
      "step": 2716
    },
    {
      "epoch": 0.41405059433099667,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00013211960280074109,
      "loss": 0.9802,
      "step": 2717
    },
    {
      "epoch": 0.41420298689423957,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001320728568912338,
      "loss": 0.9779,
      "step": 2718
    },
    {
      "epoch": 0.41435537945748246,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00013202610316831733,
      "loss": 1.339,
      "step": 2719
    },
    {
      "epoch": 0.41450777202072536,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00013197934164338164,
      "loss": 0.8383,
      "step": 2720
    },
    {
      "epoch": 0.4146601645839683,
      "grad_norm": 1.125,
      "learning_rate": 0.0001319325723278184,
      "loss": 0.9239,
      "step": 2721
    },
    {
      "epoch": 0.4148125571472112,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00013188579523302138,
      "loss": 0.9384,
      "step": 2722
    },
    {
      "epoch": 0.4149649497104541,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001318390103703861,
      "loss": 1.1353,
      "step": 2723
    },
    {
      "epoch": 0.41511734227369707,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00013179221775131005,
      "loss": 1.077,
      "step": 2724
    },
    {
      "epoch": 0.41526973483693996,
      "grad_norm": 1.03125,
      "learning_rate": 0.00013174541738719255,
      "loss": 1.051,
      "step": 2725
    },
    {
      "epoch": 0.41542212740018286,
      "grad_norm": 0.84375,
      "learning_rate": 0.00013169860928943488,
      "loss": 0.9206,
      "step": 2726
    },
    {
      "epoch": 0.41557451996342576,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00013165179346944015,
      "loss": 0.9632,
      "step": 2727
    },
    {
      "epoch": 0.4157269125266687,
      "grad_norm": 0.96875,
      "learning_rate": 0.00013160496993861338,
      "loss": 1.1154,
      "step": 2728
    },
    {
      "epoch": 0.4158793050899116,
      "grad_norm": 0.71875,
      "learning_rate": 0.00013155813870836144,
      "loss": 0.9825,
      "step": 2729
    },
    {
      "epoch": 0.4160316976531545,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001315112997900931,
      "loss": 0.8919,
      "step": 2730
    },
    {
      "epoch": 0.41618409021639746,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00013146445319521898,
      "loss": 0.9921,
      "step": 2731
    },
    {
      "epoch": 0.41633648277964036,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001314175989351516,
      "loss": 0.9635,
      "step": 2732
    },
    {
      "epoch": 0.41648887534288326,
      "grad_norm": 1.109375,
      "learning_rate": 0.00013137073702130533,
      "loss": 1.0921,
      "step": 2733
    },
    {
      "epoch": 0.41664126790612616,
      "grad_norm": 1.328125,
      "learning_rate": 0.0001313238674650964,
      "loss": 1.2981,
      "step": 2734
    },
    {
      "epoch": 0.4167936604693691,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00013127699027794292,
      "loss": 0.9114,
      "step": 2735
    },
    {
      "epoch": 0.416946053032612,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001312301054712648,
      "loss": 0.975,
      "step": 2736
    },
    {
      "epoch": 0.4170984455958549,
      "grad_norm": 1.15625,
      "learning_rate": 0.00013118321305648393,
      "loss": 0.9443,
      "step": 2737
    },
    {
      "epoch": 0.41725083815909786,
      "grad_norm": 0.9375,
      "learning_rate": 0.00013113631304502392,
      "loss": 0.8632,
      "step": 2738
    },
    {
      "epoch": 0.41740323072234076,
      "grad_norm": 1.0703125,
      "learning_rate": 0.0001310894054483103,
      "loss": 1.0267,
      "step": 2739
    },
    {
      "epoch": 0.41755562328558365,
      "grad_norm": 1.046875,
      "learning_rate": 0.00013104249027777045,
      "loss": 1.0578,
      "step": 2740
    },
    {
      "epoch": 0.41770801584882655,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00013099556754483357,
      "loss": 0.9938,
      "step": 2741
    },
    {
      "epoch": 0.4178604084120695,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001309486372609307,
      "loss": 0.9157,
      "step": 2742
    },
    {
      "epoch": 0.4180128009753124,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00013090169943749476,
      "loss": 0.8717,
      "step": 2743
    },
    {
      "epoch": 0.4181651935385553,
      "grad_norm": 1.21875,
      "learning_rate": 0.00013085475408596047,
      "loss": 0.9831,
      "step": 2744
    },
    {
      "epoch": 0.41831758610179826,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00013080780121776434,
      "loss": 1.1123,
      "step": 2745
    },
    {
      "epoch": 0.41846997866504115,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00013076084084434486,
      "loss": 0.8621,
      "step": 2746
    },
    {
      "epoch": 0.41862237122828405,
      "grad_norm": 1.046875,
      "learning_rate": 0.00013071387297714217,
      "loss": 0.9572,
      "step": 2747
    },
    {
      "epoch": 0.41877476379152695,
      "grad_norm": 0.984375,
      "learning_rate": 0.00013066689762759835,
      "loss": 0.8324,
      "step": 2748
    },
    {
      "epoch": 0.4189271563547699,
      "grad_norm": 0.859375,
      "learning_rate": 0.00013061991480715728,
      "loss": 0.9316,
      "step": 2749
    },
    {
      "epoch": 0.4190795489180128,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001305729245272646,
      "loss": 0.9934,
      "step": 2750
    },
    {
      "epoch": 0.4192319414812557,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00013052592679936792,
      "loss": 0.9009,
      "step": 2751
    },
    {
      "epoch": 0.41938433404449865,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00013047892163491648,
      "loss": 1.0095,
      "step": 2752
    },
    {
      "epoch": 0.41953672660774155,
      "grad_norm": 0.78125,
      "learning_rate": 0.00013043190904536145,
      "loss": 0.8862,
      "step": 2753
    },
    {
      "epoch": 0.41968911917098445,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00013038488904215572,
      "loss": 1.0879,
      "step": 2754
    },
    {
      "epoch": 0.41984151173422735,
      "grad_norm": 1.03125,
      "learning_rate": 0.00013033786163675412,
      "loss": 1.1509,
      "step": 2755
    },
    {
      "epoch": 0.4199939042974703,
      "grad_norm": 0.68359375,
      "learning_rate": 0.00013029082684061312,
      "loss": 0.7908,
      "step": 2756
    },
    {
      "epoch": 0.4201462968607132,
      "grad_norm": 1.6328125,
      "learning_rate": 0.00013024378466519113,
      "loss": 0.9777,
      "step": 2757
    },
    {
      "epoch": 0.4202986894239561,
      "grad_norm": 0.82421875,
      "learning_rate": 0.0001301967351219483,
      "loss": 0.8974,
      "step": 2758
    },
    {
      "epoch": 0.42045108198719905,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001301496782223465,
      "loss": 1.0597,
      "step": 2759
    },
    {
      "epoch": 0.42060347455044195,
      "grad_norm": 0.734375,
      "learning_rate": 0.00013010261397784957,
      "loss": 0.9562,
      "step": 2760
    },
    {
      "epoch": 0.42075586711368484,
      "grad_norm": 0.9296875,
      "learning_rate": 0.000130055542399923,
      "loss": 0.9246,
      "step": 2761
    },
    {
      "epoch": 0.42090825967692774,
      "grad_norm": 0.9453125,
      "learning_rate": 0.0001300084635000341,
      "loss": 1.0485,
      "step": 2762
    },
    {
      "epoch": 0.4210606522401707,
      "grad_norm": 1.1015625,
      "learning_rate": 0.0001299613772896519,
      "loss": 1.0387,
      "step": 2763
    },
    {
      "epoch": 0.4212130448034136,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00012991428378024742,
      "loss": 0.8516,
      "step": 2764
    },
    {
      "epoch": 0.4213654373666565,
      "grad_norm": 1.0,
      "learning_rate": 0.00012986718298329316,
      "loss": 0.8819,
      "step": 2765
    },
    {
      "epoch": 0.42151782992989945,
      "grad_norm": 0.74609375,
      "learning_rate": 0.0001298200749102637,
      "loss": 0.8206,
      "step": 2766
    },
    {
      "epoch": 0.42167022249314234,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001297729595726351,
      "loss": 0.9652,
      "step": 2767
    },
    {
      "epoch": 0.42182261505638524,
      "grad_norm": 1.0,
      "learning_rate": 0.00012972583698188546,
      "loss": 0.9514,
      "step": 2768
    },
    {
      "epoch": 0.42197500761962814,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00012967870714949444,
      "loss": 0.9413,
      "step": 2769
    },
    {
      "epoch": 0.4221274001828711,
      "grad_norm": 1.265625,
      "learning_rate": 0.0001296315700869436,
      "loss": 1.1572,
      "step": 2770
    },
    {
      "epoch": 0.422279792746114,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00012958442580571612,
      "loss": 0.9225,
      "step": 2771
    },
    {
      "epoch": 0.4224321853093569,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00012953727431729714,
      "loss": 0.7863,
      "step": 2772
    },
    {
      "epoch": 0.42258457787259984,
      "grad_norm": 1.09375,
      "learning_rate": 0.00012949011563317335,
      "loss": 1.0512,
      "step": 2773
    },
    {
      "epoch": 0.42273697043584274,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00012944294976483333,
      "loss": 0.9116,
      "step": 2774
    },
    {
      "epoch": 0.42288936299908564,
      "grad_norm": 0.859375,
      "learning_rate": 0.00012939577672376734,
      "loss": 0.8859,
      "step": 2775
    },
    {
      "epoch": 0.42304175556232854,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00012934859652146742,
      "loss": 1.1537,
      "step": 2776
    },
    {
      "epoch": 0.4231941481255715,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00012930140916942736,
      "loss": 1.0658,
      "step": 2777
    },
    {
      "epoch": 0.4233465406888144,
      "grad_norm": 1.296875,
      "learning_rate": 0.00012925421467914266,
      "loss": 0.9389,
      "step": 2778
    },
    {
      "epoch": 0.4234989332520573,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00012920701306211058,
      "loss": 0.8578,
      "step": 2779
    },
    {
      "epoch": 0.42365132581530024,
      "grad_norm": 0.69921875,
      "learning_rate": 0.0001291598043298301,
      "loss": 0.8518,
      "step": 2780
    },
    {
      "epoch": 0.42380371837854314,
      "grad_norm": 0.84765625,
      "learning_rate": 0.000129112588493802,
      "loss": 1.1169,
      "step": 2781
    },
    {
      "epoch": 0.42395611094178604,
      "grad_norm": 0.74609375,
      "learning_rate": 0.00012906536556552865,
      "loss": 0.8793,
      "step": 2782
    },
    {
      "epoch": 0.42410850350502893,
      "grad_norm": 0.70703125,
      "learning_rate": 0.0001290181355565143,
      "loss": 0.7914,
      "step": 2783
    },
    {
      "epoch": 0.4242608960682719,
      "grad_norm": 0.90625,
      "learning_rate": 0.00012897089847826483,
      "loss": 0.9027,
      "step": 2784
    },
    {
      "epoch": 0.4244132886315148,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001289236543422879,
      "loss": 0.938,
      "step": 2785
    },
    {
      "epoch": 0.4245656811947577,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00012887640316009284,
      "loss": 1.0967,
      "step": 2786
    },
    {
      "epoch": 0.42471807375800064,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001288291449431907,
      "loss": 0.7544,
      "step": 2787
    },
    {
      "epoch": 0.42487046632124353,
      "grad_norm": 0.796875,
      "learning_rate": 0.00012878187970309432,
      "loss": 0.8522,
      "step": 2788
    },
    {
      "epoch": 0.42502285888448643,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00012873460745131812,
      "loss": 1.0869,
      "step": 2789
    },
    {
      "epoch": 0.42517525144772933,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012868732819937832,
      "loss": 1.0435,
      "step": 2790
    },
    {
      "epoch": 0.4253276440109723,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00012864004195879287,
      "loss": 0.9393,
      "step": 2791
    },
    {
      "epoch": 0.4254800365742152,
      "grad_norm": 1.1484375,
      "learning_rate": 0.00012859274874108136,
      "loss": 0.8969,
      "step": 2792
    },
    {
      "epoch": 0.4256324291374581,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00012854544855776502,
      "loss": 1.0291,
      "step": 2793
    },
    {
      "epoch": 0.42578482170070103,
      "grad_norm": 0.953125,
      "learning_rate": 0.00012849814142036698,
      "loss": 0.9055,
      "step": 2794
    },
    {
      "epoch": 0.42593721426394393,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00012845082734041187,
      "loss": 1.0497,
      "step": 2795
    },
    {
      "epoch": 0.42608960682718683,
      "grad_norm": 0.875,
      "learning_rate": 0.00012840350632942608,
      "loss": 0.9526,
      "step": 2796
    },
    {
      "epoch": 0.4262419993904297,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00012835617839893773,
      "loss": 0.9071,
      "step": 2797
    },
    {
      "epoch": 0.4263943919536727,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001283088435604765,
      "loss": 0.9788,
      "step": 2798
    },
    {
      "epoch": 0.4265467845169156,
      "grad_norm": 1.3125,
      "learning_rate": 0.00012826150182557392,
      "loss": 0.8933,
      "step": 2799
    },
    {
      "epoch": 0.4266991770801585,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001282141532057631,
      "loss": 0.9442,
      "step": 2800
    },
    {
      "epoch": 0.42685156964340143,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00012816679771257878,
      "loss": 0.8766,
      "step": 2801
    },
    {
      "epoch": 0.4270039622066443,
      "grad_norm": 0.96875,
      "learning_rate": 0.00012811943535755754,
      "loss": 1.0641,
      "step": 2802
    },
    {
      "epoch": 0.4271563547698872,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00012807206615223749,
      "loss": 0.9535,
      "step": 2803
    },
    {
      "epoch": 0.4273087473331301,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00012802469010815838,
      "loss": 1.1641,
      "step": 2804
    },
    {
      "epoch": 0.4274611398963731,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00012797730723686183,
      "loss": 0.9695,
      "step": 2805
    },
    {
      "epoch": 0.427613532459616,
      "grad_norm": 0.859375,
      "learning_rate": 0.00012792991754989087,
      "loss": 0.922,
      "step": 2806
    },
    {
      "epoch": 0.4277659250228589,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00012788252105879036,
      "loss": 1.0129,
      "step": 2807
    },
    {
      "epoch": 0.42791831758610177,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00012783511777510678,
      "loss": 0.9312,
      "step": 2808
    },
    {
      "epoch": 0.4280707101493447,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001277877077103882,
      "loss": 1.0043,
      "step": 2809
    },
    {
      "epoch": 0.4282231027125876,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00012774029087618446,
      "loss": 0.8618,
      "step": 2810
    },
    {
      "epoch": 0.4283754952758305,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00012769286728404696,
      "loss": 0.8826,
      "step": 2811
    },
    {
      "epoch": 0.4285278878390735,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00012764543694552874,
      "loss": 0.7664,
      "step": 2812
    },
    {
      "epoch": 0.42868028040231637,
      "grad_norm": 0.984375,
      "learning_rate": 0.00012759799987218452,
      "loss": 0.9701,
      "step": 2813
    },
    {
      "epoch": 0.42883267296555927,
      "grad_norm": 1.125,
      "learning_rate": 0.00012755055607557066,
      "loss": 0.9664,
      "step": 2814
    },
    {
      "epoch": 0.42898506552880217,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001275031055672452,
      "loss": 0.9338,
      "step": 2815
    },
    {
      "epoch": 0.4291374580920451,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00012745564835876772,
      "loss": 1.0566,
      "step": 2816
    },
    {
      "epoch": 0.429289850655288,
      "grad_norm": 0.83984375,
      "learning_rate": 0.0001274081844616994,
      "loss": 1.0235,
      "step": 2817
    },
    {
      "epoch": 0.4294422432185309,
      "grad_norm": 1.140625,
      "learning_rate": 0.00012736071388760327,
      "loss": 0.9051,
      "step": 2818
    },
    {
      "epoch": 0.42959463578177387,
      "grad_norm": 0.8515625,
      "learning_rate": 0.0001273132366480438,
      "loss": 0.8331,
      "step": 2819
    },
    {
      "epoch": 0.42974702834501677,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00012726575275458707,
      "loss": 1.0474,
      "step": 2820
    },
    {
      "epoch": 0.42989942090825967,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00012721826221880089,
      "loss": 0.9763,
      "step": 2821
    },
    {
      "epoch": 0.43005181347150256,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00012717076505225464,
      "loss": 1.0285,
      "step": 2822
    },
    {
      "epoch": 0.4302042060347455,
      "grad_norm": 0.734375,
      "learning_rate": 0.00012712326126651933,
      "loss": 0.9053,
      "step": 2823
    },
    {
      "epoch": 0.4303565985979884,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00012707575087316748,
      "loss": 0.8021,
      "step": 2824
    },
    {
      "epoch": 0.4305089911612313,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00012702823388377338,
      "loss": 0.8303,
      "step": 2825
    },
    {
      "epoch": 0.43066138372447427,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00012698071030991286,
      "loss": 0.7593,
      "step": 2826
    },
    {
      "epoch": 0.43081377628771717,
      "grad_norm": 0.65625,
      "learning_rate": 0.00012693318016316328,
      "loss": 0.8451,
      "step": 2827
    },
    {
      "epoch": 0.43096616885096006,
      "grad_norm": 1.0546875,
      "learning_rate": 0.0001268856434551037,
      "loss": 0.7518,
      "step": 2828
    },
    {
      "epoch": 0.43111856141420296,
      "grad_norm": 0.89453125,
      "learning_rate": 0.0001268381001973148,
      "loss": 0.8788,
      "step": 2829
    },
    {
      "epoch": 0.4312709539774459,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00012679055040137871,
      "loss": 0.9135,
      "step": 2830
    },
    {
      "epoch": 0.4314233465406888,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001267429940788793,
      "loss": 1.0978,
      "step": 2831
    },
    {
      "epoch": 0.4315757391039317,
      "grad_norm": 0.828125,
      "learning_rate": 0.00012669543124140196,
      "loss": 0.8928,
      "step": 2832
    },
    {
      "epoch": 0.43172813166717466,
      "grad_norm": 0.96875,
      "learning_rate": 0.00012664786190053371,
      "loss": 0.9573,
      "step": 2833
    },
    {
      "epoch": 0.43188052423041756,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00012660028606786304,
      "loss": 0.8788,
      "step": 2834
    },
    {
      "epoch": 0.43203291679366046,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001265527037549802,
      "loss": 0.9181,
      "step": 2835
    },
    {
      "epoch": 0.43218530935690336,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012650511497347693,
      "loss": 0.9832,
      "step": 2836
    },
    {
      "epoch": 0.4323377019201463,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00012645751973494644,
      "loss": 1.0878,
      "step": 2837
    },
    {
      "epoch": 0.4324900944833892,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00012640991805098367,
      "loss": 0.9116,
      "step": 2838
    },
    {
      "epoch": 0.4326424870466321,
      "grad_norm": 0.875,
      "learning_rate": 0.0001263623099331851,
      "loss": 0.9585,
      "step": 2839
    },
    {
      "epoch": 0.43279487960987506,
      "grad_norm": 1.078125,
      "learning_rate": 0.00012631469539314878,
      "loss": 1.1063,
      "step": 2840
    },
    {
      "epoch": 0.43294727217311796,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001262670744424742,
      "loss": 1.0928,
      "step": 2841
    },
    {
      "epoch": 0.43309966473636086,
      "grad_norm": 1.078125,
      "learning_rate": 0.00012621944709276256,
      "loss": 0.9645,
      "step": 2842
    },
    {
      "epoch": 0.43325205729960375,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001261718133556166,
      "loss": 1.0493,
      "step": 2843
    },
    {
      "epoch": 0.4334044498628467,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00012612417324264053,
      "loss": 0.8353,
      "step": 2844
    },
    {
      "epoch": 0.4335568424260896,
      "grad_norm": 1.0625,
      "learning_rate": 0.00012607652676544016,
      "loss": 1.0569,
      "step": 2845
    },
    {
      "epoch": 0.4337092349893325,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00012602887393562293,
      "loss": 0.8886,
      "step": 2846
    },
    {
      "epoch": 0.43386162755257546,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00012598121476479766,
      "loss": 1.1248,
      "step": 2847
    },
    {
      "epoch": 0.43401402011581836,
      "grad_norm": 0.953125,
      "learning_rate": 0.00012593354926457488,
      "loss": 1.0077,
      "step": 2848
    },
    {
      "epoch": 0.43416641267906125,
      "grad_norm": 0.859375,
      "learning_rate": 0.00012588587744656655,
      "loss": 1.0487,
      "step": 2849
    },
    {
      "epoch": 0.43431880524230415,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00012583819932238628,
      "loss": 0.8828,
      "step": 2850
    },
    {
      "epoch": 0.4344711978055471,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00012579051490364904,
      "loss": 0.9499,
      "step": 2851
    },
    {
      "epoch": 0.43462359036879,
      "grad_norm": 0.875,
      "learning_rate": 0.0001257428242019715,
      "loss": 1.1003,
      "step": 2852
    },
    {
      "epoch": 0.4347759829320329,
      "grad_norm": 0.66015625,
      "learning_rate": 0.00012569512722897182,
      "loss": 0.9348,
      "step": 2853
    },
    {
      "epoch": 0.43492837549527585,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00012564742399626962,
      "loss": 0.9253,
      "step": 2854
    },
    {
      "epoch": 0.43508076805851875,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00012559971451548614,
      "loss": 1.0803,
      "step": 2855
    },
    {
      "epoch": 0.43523316062176165,
      "grad_norm": 0.765625,
      "learning_rate": 0.0001255519987982441,
      "loss": 1.1068,
      "step": 2856
    },
    {
      "epoch": 0.43538555318500455,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00012550427685616765,
      "loss": 0.8649,
      "step": 2857
    },
    {
      "epoch": 0.4355379457482475,
      "grad_norm": 1.0625,
      "learning_rate": 0.00012545654870088264,
      "loss": 1.2188,
      "step": 2858
    },
    {
      "epoch": 0.4356903383114904,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00012540881434401627,
      "loss": 1.0728,
      "step": 2859
    },
    {
      "epoch": 0.4358427308747333,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012536107379719735,
      "loss": 1.137,
      "step": 2860
    },
    {
      "epoch": 0.43599512343797625,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00012531332707205618,
      "loss": 1.0184,
      "step": 2861
    },
    {
      "epoch": 0.43614751600121915,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00012526557418022447,
      "loss": 0.7076,
      "step": 2862
    },
    {
      "epoch": 0.43629990856446205,
      "grad_norm": 1.15625,
      "learning_rate": 0.00012521781513333556,
      "loss": 1.206,
      "step": 2863
    },
    {
      "epoch": 0.43645230112770494,
      "grad_norm": 1.2265625,
      "learning_rate": 0.0001251700499430243,
      "loss": 1.1403,
      "step": 2864
    },
    {
      "epoch": 0.4366046936909479,
      "grad_norm": 0.9296875,
      "learning_rate": 0.0001251222786209269,
      "loss": 0.9154,
      "step": 2865
    },
    {
      "epoch": 0.4367570862541908,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00012507450117868113,
      "loss": 0.9322,
      "step": 2866
    },
    {
      "epoch": 0.4369094788174337,
      "grad_norm": 0.8125,
      "learning_rate": 0.00012502671762792638,
      "loss": 0.8789,
      "step": 2867
    },
    {
      "epoch": 0.43706187138067665,
      "grad_norm": 0.875,
      "learning_rate": 0.00012497892798030323,
      "loss": 0.9478,
      "step": 2868
    },
    {
      "epoch": 0.43721426394391955,
      "grad_norm": 1.09375,
      "learning_rate": 0.00012493113224745406,
      "loss": 0.8743,
      "step": 2869
    },
    {
      "epoch": 0.43736665650716244,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00012488333044102258,
      "loss": 0.9464,
      "step": 2870
    },
    {
      "epoch": 0.43751904907040534,
      "grad_norm": 1.0,
      "learning_rate": 0.00012483552257265394,
      "loss": 0.9167,
      "step": 2871
    },
    {
      "epoch": 0.4376714416336483,
      "grad_norm": 0.92578125,
      "learning_rate": 0.0001247877086539949,
      "loss": 0.8145,
      "step": 2872
    },
    {
      "epoch": 0.4378238341968912,
      "grad_norm": 0.875,
      "learning_rate": 0.00012473988869669353,
      "loss": 0.8897,
      "step": 2873
    },
    {
      "epoch": 0.4379762267601341,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012469206271239954,
      "loss": 1.1792,
      "step": 2874
    },
    {
      "epoch": 0.43812861932337704,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00012464423071276398,
      "loss": 0.9874,
      "step": 2875
    },
    {
      "epoch": 0.43828101188661994,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00012459639270943944,
      "loss": 1.0943,
      "step": 2876
    },
    {
      "epoch": 0.43843340444986284,
      "grad_norm": 1.171875,
      "learning_rate": 0.00012454854871407994,
      "loss": 0.9601,
      "step": 2877
    },
    {
      "epoch": 0.43858579701310574,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00012450069873834094,
      "loss": 1.1033,
      "step": 2878
    },
    {
      "epoch": 0.4387381895763487,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00012445284279387938,
      "loss": 1.0088,
      "step": 2879
    },
    {
      "epoch": 0.4388905821395916,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001244049808923537,
      "loss": 1.119,
      "step": 2880
    },
    {
      "epoch": 0.4390429747028345,
      "grad_norm": 0.875,
      "learning_rate": 0.00012435711304542376,
      "loss": 0.8849,
      "step": 2881
    },
    {
      "epoch": 0.43919536726607744,
      "grad_norm": 1.21875,
      "learning_rate": 0.00012430923926475077,
      "loss": 0.9874,
      "step": 2882
    },
    {
      "epoch": 0.43934775982932034,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00012426135956199755,
      "loss": 1.0431,
      "step": 2883
    },
    {
      "epoch": 0.43950015239256324,
      "grad_norm": 0.7890625,
      "learning_rate": 0.00012421347394882826,
      "loss": 0.9871,
      "step": 2884
    },
    {
      "epoch": 0.43965254495580613,
      "grad_norm": 0.875,
      "learning_rate": 0.0001241655824369085,
      "loss": 0.994,
      "step": 2885
    },
    {
      "epoch": 0.4398049375190491,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00012411768503790534,
      "loss": 0.958,
      "step": 2886
    },
    {
      "epoch": 0.439957330082292,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00012406978176348734,
      "loss": 1.0146,
      "step": 2887
    },
    {
      "epoch": 0.4401097226455349,
      "grad_norm": 1.0625,
      "learning_rate": 0.00012402187262532433,
      "loss": 0.9942,
      "step": 2888
    },
    {
      "epoch": 0.44026211520877784,
      "grad_norm": 1.1171875,
      "learning_rate": 0.0001239739576350877,
      "loss": 0.9183,
      "step": 2889
    },
    {
      "epoch": 0.44041450777202074,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012392603680445025,
      "loss": 1.0784,
      "step": 2890
    },
    {
      "epoch": 0.44056690033526363,
      "grad_norm": 0.81640625,
      "learning_rate": 0.0001238781101450862,
      "loss": 0.8418,
      "step": 2891
    },
    {
      "epoch": 0.44071929289850653,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001238301776686711,
      "loss": 0.9663,
      "step": 2892
    },
    {
      "epoch": 0.4408716854617495,
      "grad_norm": 1.078125,
      "learning_rate": 0.00012378223938688205,
      "loss": 1.0224,
      "step": 2893
    },
    {
      "epoch": 0.4410240780249924,
      "grad_norm": 0.84765625,
      "learning_rate": 0.0001237342953113975,
      "loss": 0.8827,
      "step": 2894
    },
    {
      "epoch": 0.4411764705882353,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00012368634545389733,
      "loss": 0.8454,
      "step": 2895
    },
    {
      "epoch": 0.44132886315147823,
      "grad_norm": 0.796875,
      "learning_rate": 0.00012363838982606273,
      "loss": 1.0191,
      "step": 2896
    },
    {
      "epoch": 0.44148125571472113,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00012359042843957646,
      "loss": 0.922,
      "step": 2897
    },
    {
      "epoch": 0.44163364827796403,
      "grad_norm": 1.078125,
      "learning_rate": 0.00012354246130612265,
      "loss": 1.1164,
      "step": 2898
    },
    {
      "epoch": 0.44178604084120693,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00012349448843738665,
      "loss": 1.171,
      "step": 2899
    },
    {
      "epoch": 0.4419384334044499,
      "grad_norm": 0.984375,
      "learning_rate": 0.00012344650984505543,
      "loss": 0.9964,
      "step": 2900
    },
    {
      "epoch": 0.4420908259676928,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00012339852554081726,
      "loss": 1.1018,
      "step": 2901
    },
    {
      "epoch": 0.4422432185309357,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012335053553636175,
      "loss": 1.0302,
      "step": 2902
    },
    {
      "epoch": 0.44239561109417863,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00012330253984338006,
      "loss": 0.8988,
      "step": 2903
    },
    {
      "epoch": 0.44254800365742153,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001232545384735645,
      "loss": 1.069,
      "step": 2904
    },
    {
      "epoch": 0.4427003962206644,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00012320653143860902,
      "loss": 1.0336,
      "step": 2905
    },
    {
      "epoch": 0.4428527887839073,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00012315851875020873,
      "loss": 1.0403,
      "step": 2906
    },
    {
      "epoch": 0.4430051813471503,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00012311050042006024,
      "loss": 0.9451,
      "step": 2907
    },
    {
      "epoch": 0.4431575739103932,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00012306247645986154,
      "loss": 1.0236,
      "step": 2908
    },
    {
      "epoch": 0.4433099664736361,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00012301444688131191,
      "loss": 0.7988,
      "step": 2909
    },
    {
      "epoch": 0.44346235903687903,
      "grad_norm": 0.765625,
      "learning_rate": 0.00012296641169611206,
      "loss": 0.9046,
      "step": 2910
    },
    {
      "epoch": 0.4436147516001219,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00012291837091596409,
      "loss": 1.017,
      "step": 2911
    },
    {
      "epoch": 0.4437671441633648,
      "grad_norm": 0.88671875,
      "learning_rate": 0.0001228703245525714,
      "loss": 0.9049,
      "step": 2912
    },
    {
      "epoch": 0.4439195367266077,
      "grad_norm": 1.078125,
      "learning_rate": 0.00012282227261763872,
      "loss": 0.8276,
      "step": 2913
    },
    {
      "epoch": 0.4440719292898507,
      "grad_norm": 0.890625,
      "learning_rate": 0.00012277421512287226,
      "loss": 1.047,
      "step": 2914
    },
    {
      "epoch": 0.4442243218530936,
      "grad_norm": 1.2890625,
      "learning_rate": 0.0001227261520799795,
      "loss": 1.015,
      "step": 2915
    },
    {
      "epoch": 0.44437671441633647,
      "grad_norm": 0.8046875,
      "learning_rate": 0.00012267808350066928,
      "loss": 0.8978,
      "step": 2916
    },
    {
      "epoch": 0.44452910697957937,
      "grad_norm": 0.8359375,
      "learning_rate": 0.00012263000939665182,
      "loss": 0.9853,
      "step": 2917
    },
    {
      "epoch": 0.4446814995428223,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00012258192977963861,
      "loss": 0.9754,
      "step": 2918
    },
    {
      "epoch": 0.4448338921060652,
      "grad_norm": 0.86328125,
      "learning_rate": 0.0001225338446613426,
      "loss": 0.8024,
      "step": 2919
    },
    {
      "epoch": 0.4449862846693081,
      "grad_norm": 1.2109375,
      "learning_rate": 0.000122485754053478,
      "loss": 1.1133,
      "step": 2920
    },
    {
      "epoch": 0.44513867723255107,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00012243765796776033,
      "loss": 1.0362,
      "step": 2921
    },
    {
      "epoch": 0.44529106979579397,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00012238955641590655,
      "loss": 0.9365,
      "step": 2922
    },
    {
      "epoch": 0.44544346235903687,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00012234144940963485,
      "loss": 0.8966,
      "step": 2923
    },
    {
      "epoch": 0.44559585492227977,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00012229333696066476,
      "loss": 0.9264,
      "step": 2924
    },
    {
      "epoch": 0.4457482474855227,
      "grad_norm": 0.890625,
      "learning_rate": 0.00012224521908071724,
      "loss": 0.9521,
      "step": 2925
    },
    {
      "epoch": 0.4459006400487656,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00012219709578151447,
      "loss": 1.1265,
      "step": 2926
    },
    {
      "epoch": 0.4460530326120085,
      "grad_norm": 0.921875,
      "learning_rate": 0.00012214896707477994,
      "loss": 0.9163,
      "step": 2927
    },
    {
      "epoch": 0.44620542517525147,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00012210083297223853,
      "loss": 0.9603,
      "step": 2928
    },
    {
      "epoch": 0.44635781773849437,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00012205269348561641,
      "loss": 0.8162,
      "step": 2929
    },
    {
      "epoch": 0.44651021030173726,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00012200454862664102,
      "loss": 1.003,
      "step": 2930
    },
    {
      "epoch": 0.44666260286498016,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00012195639840704115,
      "loss": 0.889,
      "step": 2931
    },
    {
      "epoch": 0.4468149954282231,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00012190824283854689,
      "loss": 0.9406,
      "step": 2932
    },
    {
      "epoch": 0.446967387991466,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00012186008193288962,
      "loss": 1.0405,
      "step": 2933
    },
    {
      "epoch": 0.4471197805547089,
      "grad_norm": 0.96875,
      "learning_rate": 0.00012181191570180205,
      "loss": 1.0018,
      "step": 2934
    },
    {
      "epoch": 0.44727217311795187,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00012176374415701817,
      "loss": 0.9618,
      "step": 2935
    },
    {
      "epoch": 0.44742456568119476,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001217155673102732,
      "loss": 0.9853,
      "step": 2936
    },
    {
      "epoch": 0.44757695824443766,
      "grad_norm": 0.8984375,
      "learning_rate": 0.0001216673851733038,
      "loss": 0.8764,
      "step": 2937
    },
    {
      "epoch": 0.44772935080768056,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00012161919775784775,
      "loss": 1.0485,
      "step": 2938
    },
    {
      "epoch": 0.4478817433709235,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00012157100507564427,
      "loss": 1.0121,
      "step": 2939
    },
    {
      "epoch": 0.4480341359341664,
      "grad_norm": 1.171875,
      "learning_rate": 0.00012152280713843379,
      "loss": 0.9348,
      "step": 2940
    },
    {
      "epoch": 0.4481865284974093,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00012147460395795791,
      "loss": 0.8188,
      "step": 2941
    },
    {
      "epoch": 0.44833892106065226,
      "grad_norm": 1.03125,
      "learning_rate": 0.00012142639554595974,
      "loss": 1.0394,
      "step": 2942
    },
    {
      "epoch": 0.44849131362389516,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00012137818191418348,
      "loss": 0.888,
      "step": 2943
    },
    {
      "epoch": 0.44864370618713806,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001213299630743747,
      "loss": 1.0319,
      "step": 2944
    },
    {
      "epoch": 0.44879609875038096,
      "grad_norm": 1.3125,
      "learning_rate": 0.00012128173903828018,
      "loss": 1.0389,
      "step": 2945
    },
    {
      "epoch": 0.4489484913136239,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00012123350981764799,
      "loss": 0.929,
      "step": 2946
    },
    {
      "epoch": 0.4491008838768668,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00012118527542422744,
      "loss": 1.0017,
      "step": 2947
    },
    {
      "epoch": 0.4492532764401097,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001211370358697692,
      "loss": 0.8748,
      "step": 2948
    },
    {
      "epoch": 0.44940566900335266,
      "grad_norm": 0.9375,
      "learning_rate": 0.00012108879116602503,
      "loss": 0.8368,
      "step": 2949
    },
    {
      "epoch": 0.44955806156659556,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00012104054132474804,
      "loss": 0.9607,
      "step": 2950
    },
    {
      "epoch": 0.44971045412983846,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00012099228635769268,
      "loss": 0.8877,
      "step": 2951
    },
    {
      "epoch": 0.44986284669308135,
      "grad_norm": 0.96875,
      "learning_rate": 0.00012094402627661447,
      "loss": 0.8974,
      "step": 2952
    },
    {
      "epoch": 0.4500152392563243,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00012089576109327025,
      "loss": 0.8878,
      "step": 2953
    },
    {
      "epoch": 0.4501676318195672,
      "grad_norm": 0.953125,
      "learning_rate": 0.00012084749081941815,
      "loss": 0.9363,
      "step": 2954
    },
    {
      "epoch": 0.4503200243828101,
      "grad_norm": 1.0625,
      "learning_rate": 0.00012079921546681749,
      "loss": 1.2199,
      "step": 2955
    },
    {
      "epoch": 0.45047241694605306,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00012075093504722888,
      "loss": 1.1741,
      "step": 2956
    },
    {
      "epoch": 0.45062480950929595,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00012070264957241406,
      "loss": 0.9586,
      "step": 2957
    },
    {
      "epoch": 0.45077720207253885,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00012065435905413609,
      "loss": 0.9006,
      "step": 2958
    },
    {
      "epoch": 0.45092959463578175,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00012060606350415926,
      "loss": 1.0285,
      "step": 2959
    },
    {
      "epoch": 0.4510819871990247,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00012055776293424905,
      "loss": 0.9179,
      "step": 2960
    },
    {
      "epoch": 0.4512343797622676,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00012050945735617212,
      "loss": 0.8551,
      "step": 2961
    },
    {
      "epoch": 0.4513867723255105,
      "grad_norm": 1.1875,
      "learning_rate": 0.00012046114678169647,
      "loss": 1.0246,
      "step": 2962
    },
    {
      "epoch": 0.45153916488875345,
      "grad_norm": 0.9375,
      "learning_rate": 0.00012041283122259124,
      "loss": 1.2547,
      "step": 2963
    },
    {
      "epoch": 0.45169155745199635,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00012036451069062675,
      "loss": 0.9337,
      "step": 2964
    },
    {
      "epoch": 0.45184395001523925,
      "grad_norm": 0.69140625,
      "learning_rate": 0.00012031618519757463,
      "loss": 0.8869,
      "step": 2965
    },
    {
      "epoch": 0.45199634257848215,
      "grad_norm": 0.703125,
      "learning_rate": 0.00012026785475520766,
      "loss": 0.975,
      "step": 2966
    },
    {
      "epoch": 0.4521487351417251,
      "grad_norm": 1.3125,
      "learning_rate": 0.00012021951937529979,
      "loss": 1.103,
      "step": 2967
    },
    {
      "epoch": 0.452301127704968,
      "grad_norm": 0.84375,
      "learning_rate": 0.00012017117906962625,
      "loss": 0.96,
      "step": 2968
    },
    {
      "epoch": 0.4524535202682109,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00012012283384996339,
      "loss": 0.8625,
      "step": 2969
    },
    {
      "epoch": 0.45260591283145385,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00012007448372808886,
      "loss": 0.9229,
      "step": 2970
    },
    {
      "epoch": 0.45275830539469675,
      "grad_norm": 1.0625,
      "learning_rate": 0.00012002612871578143,
      "loss": 1.0955,
      "step": 2971
    },
    {
      "epoch": 0.45291069795793965,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00011997776882482103,
      "loss": 1.0479,
      "step": 2972
    },
    {
      "epoch": 0.45306309052118254,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00011992940406698889,
      "loss": 0.9458,
      "step": 2973
    },
    {
      "epoch": 0.4532154830844255,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00011988103445406733,
      "loss": 1.0501,
      "step": 2974
    },
    {
      "epoch": 0.4533678756476684,
      "grad_norm": 0.859375,
      "learning_rate": 0.00011983265999783986,
      "loss": 1.0747,
      "step": 2975
    },
    {
      "epoch": 0.4535202682109113,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00011978428071009124,
      "loss": 0.9361,
      "step": 2976
    },
    {
      "epoch": 0.45367266077415425,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00011973589660260734,
      "loss": 0.8621,
      "step": 2977
    },
    {
      "epoch": 0.45382505333739714,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00011968750768717519,
      "loss": 1.0318,
      "step": 2978
    },
    {
      "epoch": 0.45397744590064004,
      "grad_norm": 1.15625,
      "learning_rate": 0.00011963911397558308,
      "loss": 1.1293,
      "step": 2979
    },
    {
      "epoch": 0.45412983846388294,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00011959071547962037,
      "loss": 0.9482,
      "step": 2980
    },
    {
      "epoch": 0.4542822310271259,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011954231221107766,
      "loss": 1.0528,
      "step": 2981
    },
    {
      "epoch": 0.4544346235903688,
      "grad_norm": 1.140625,
      "learning_rate": 0.00011949390418174666,
      "loss": 1.0679,
      "step": 2982
    },
    {
      "epoch": 0.4545870161536117,
      "grad_norm": 1.125,
      "learning_rate": 0.0001194454914034203,
      "loss": 0.8914,
      "step": 2983
    },
    {
      "epoch": 0.45473940871685464,
      "grad_norm": 1.0625,
      "learning_rate": 0.00011939707388789256,
      "loss": 0.9306,
      "step": 2984
    },
    {
      "epoch": 0.45489180128009754,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00011934865164695872,
      "loss": 0.9892,
      "step": 2985
    },
    {
      "epoch": 0.45504419384334044,
      "grad_norm": 1.046875,
      "learning_rate": 0.00011930022469241509,
      "loss": 0.93,
      "step": 2986
    },
    {
      "epoch": 0.45519658640658334,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00011925179303605919,
      "loss": 1.0169,
      "step": 2987
    },
    {
      "epoch": 0.4553489789698263,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00011920335668968967,
      "loss": 1.0091,
      "step": 2988
    },
    {
      "epoch": 0.4555013715330692,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00011915491566510628,
      "loss": 0.8414,
      "step": 2989
    },
    {
      "epoch": 0.4556537640963121,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00011910646997411001,
      "loss": 0.972,
      "step": 2990
    },
    {
      "epoch": 0.45580615665955504,
      "grad_norm": 0.953125,
      "learning_rate": 0.0001190580196285029,
      "loss": 0.989,
      "step": 2991
    },
    {
      "epoch": 0.45595854922279794,
      "grad_norm": 1.0,
      "learning_rate": 0.00011900956464008813,
      "loss": 0.9742,
      "step": 2992
    },
    {
      "epoch": 0.45611094178604084,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00011896110502067003,
      "loss": 0.9032,
      "step": 2993
    },
    {
      "epoch": 0.45626333434928373,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00011891264078205413,
      "loss": 0.9099,
      "step": 2994
    },
    {
      "epoch": 0.4564157269125267,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00011886417193604694,
      "loss": 0.8556,
      "step": 2995
    },
    {
      "epoch": 0.4565681194757696,
      "grad_norm": 1.296875,
      "learning_rate": 0.0001188156984944562,
      "loss": 0.8101,
      "step": 2996
    },
    {
      "epoch": 0.4567205120390125,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00011876722046909075,
      "loss": 0.816,
      "step": 2997
    },
    {
      "epoch": 0.45687290460225544,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00011871873787176046,
      "loss": 0.8747,
      "step": 2998
    },
    {
      "epoch": 0.45702529716549833,
      "grad_norm": 1.15625,
      "learning_rate": 0.00011867025071427652,
      "loss": 1.0319,
      "step": 2999
    },
    {
      "epoch": 0.45717768972874123,
      "grad_norm": 1.4296875,
      "learning_rate": 0.00011862175900845098,
      "loss": 0.8881,
      "step": 3000
    },
    {
      "epoch": 0.45733008229198413,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00011857326276609715,
      "loss": 0.8188,
      "step": 3001
    },
    {
      "epoch": 0.4574824748552271,
      "grad_norm": 0.90625,
      "learning_rate": 0.00011852476199902945,
      "loss": 0.9109,
      "step": 3002
    },
    {
      "epoch": 0.45763486741847,
      "grad_norm": 1.09375,
      "learning_rate": 0.00011847625671906331,
      "loss": 0.9476,
      "step": 3003
    },
    {
      "epoch": 0.4577872599817129,
      "grad_norm": 1.171875,
      "learning_rate": 0.00011842774693801536,
      "loss": 0.8475,
      "step": 3004
    },
    {
      "epoch": 0.45793965254495583,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00011837923266770326,
      "loss": 0.8558,
      "step": 3005
    },
    {
      "epoch": 0.45809204510819873,
      "grad_norm": 1.21875,
      "learning_rate": 0.00011833071391994577,
      "loss": 0.9178,
      "step": 3006
    },
    {
      "epoch": 0.45824443767144163,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00011828219070656277,
      "loss": 1.1597,
      "step": 3007
    },
    {
      "epoch": 0.4583968302346845,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00011823366303937523,
      "loss": 1.073,
      "step": 3008
    },
    {
      "epoch": 0.4585492227979275,
      "grad_norm": 0.96875,
      "learning_rate": 0.00011818513093020513,
      "loss": 0.9589,
      "step": 3009
    },
    {
      "epoch": 0.4587016153611704,
      "grad_norm": 0.90625,
      "learning_rate": 0.00011813659439087568,
      "loss": 1.0487,
      "step": 3010
    },
    {
      "epoch": 0.4588540079244133,
      "grad_norm": 0.9453125,
      "learning_rate": 0.000118088053433211,
      "loss": 0.9221,
      "step": 3011
    },
    {
      "epoch": 0.45900640048765623,
      "grad_norm": 0.828125,
      "learning_rate": 0.00011803950806903639,
      "loss": 0.8415,
      "step": 3012
    },
    {
      "epoch": 0.45915879305089913,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00011799095831017821,
      "loss": 0.9182,
      "step": 3013
    },
    {
      "epoch": 0.459311185614142,
      "grad_norm": 0.890625,
      "learning_rate": 0.00011794240416846388,
      "loss": 1.0316,
      "step": 3014
    },
    {
      "epoch": 0.4594635781773849,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00011789384565572183,
      "loss": 0.8025,
      "step": 3015
    },
    {
      "epoch": 0.4596159707406279,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00011784528278378171,
      "loss": 0.9533,
      "step": 3016
    },
    {
      "epoch": 0.4597683633038708,
      "grad_norm": 1.1484375,
      "learning_rate": 0.0001177967155644741,
      "loss": 1.0288,
      "step": 3017
    },
    {
      "epoch": 0.4599207558671137,
      "grad_norm": 0.8359375,
      "learning_rate": 0.0001177481440096306,
      "loss": 0.9097,
      "step": 3018
    },
    {
      "epoch": 0.46007314843035657,
      "grad_norm": 0.78515625,
      "learning_rate": 0.00011769956813108402,
      "loss": 0.8404,
      "step": 3019
    },
    {
      "epoch": 0.4602255409935995,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00011765098794066812,
      "loss": 0.8647,
      "step": 3020
    },
    {
      "epoch": 0.4603779335568424,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00011760240345021774,
      "loss": 0.9574,
      "step": 3021
    },
    {
      "epoch": 0.4605303261200853,
      "grad_norm": 1.2265625,
      "learning_rate": 0.00011755381467156873,
      "loss": 1.0655,
      "step": 3022
    },
    {
      "epoch": 0.4606827186833283,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00011750522161655805,
      "loss": 0.9194,
      "step": 3023
    },
    {
      "epoch": 0.46083511124657117,
      "grad_norm": 0.8984375,
      "learning_rate": 0.00011745662429702369,
      "loss": 0.9249,
      "step": 3024
    },
    {
      "epoch": 0.46098750380981407,
      "grad_norm": 1.1328125,
      "learning_rate": 0.00011740802272480458,
      "loss": 0.9398,
      "step": 3025
    },
    {
      "epoch": 0.46113989637305697,
      "grad_norm": 0.921875,
      "learning_rate": 0.00011735941691174079,
      "loss": 1.0145,
      "step": 3026
    },
    {
      "epoch": 0.4612922889362999,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00011731080686967344,
      "loss": 0.9235,
      "step": 3027
    },
    {
      "epoch": 0.4614446814995428,
      "grad_norm": 0.875,
      "learning_rate": 0.0001172621926104446,
      "loss": 1.0345,
      "step": 3028
    },
    {
      "epoch": 0.4615970740627857,
      "grad_norm": 0.76171875,
      "learning_rate": 0.00011721357414589737,
      "loss": 0.947,
      "step": 3029
    },
    {
      "epoch": 0.46174946662602867,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00011716495148787599,
      "loss": 0.9949,
      "step": 3030
    },
    {
      "epoch": 0.46190185918927157,
      "grad_norm": 0.796875,
      "learning_rate": 0.00011711632464822557,
      "loss": 0.9118,
      "step": 3031
    },
    {
      "epoch": 0.46205425175251447,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00011706769363879233,
      "loss": 1.0189,
      "step": 3032
    },
    {
      "epoch": 0.46220664431575736,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00011701905847142348,
      "loss": 1.1277,
      "step": 3033
    },
    {
      "epoch": 0.4623590368790003,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00011697041915796723,
      "loss": 1.0955,
      "step": 3034
    },
    {
      "epoch": 0.4625114294422432,
      "grad_norm": 0.796875,
      "learning_rate": 0.00011692177571027287,
      "loss": 0.9897,
      "step": 3035
    },
    {
      "epoch": 0.4626638220054861,
      "grad_norm": 1.0625,
      "learning_rate": 0.0001168731281401906,
      "loss": 0.9858,
      "step": 3036
    },
    {
      "epoch": 0.46281621456872907,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00011682447645957164,
      "loss": 1.0565,
      "step": 3037
    },
    {
      "epoch": 0.46296860713197197,
      "grad_norm": 0.875,
      "learning_rate": 0.00011677582068026831,
      "loss": 0.9613,
      "step": 3038
    },
    {
      "epoch": 0.46312099969521486,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00011672716081413381,
      "loss": 0.9228,
      "step": 3039
    },
    {
      "epoch": 0.46327339225845776,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011667849687302239,
      "loss": 0.946,
      "step": 3040
    },
    {
      "epoch": 0.4634257848217007,
      "grad_norm": 0.90234375,
      "learning_rate": 0.0001166298288687893,
      "loss": 0.9388,
      "step": 3041
    },
    {
      "epoch": 0.4635781773849436,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00011658115681329076,
      "loss": 1.0223,
      "step": 3042
    },
    {
      "epoch": 0.4637305699481865,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00011653248071838397,
      "loss": 0.889,
      "step": 3043
    },
    {
      "epoch": 0.46388296251142946,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00011648380059592713,
      "loss": 0.8471,
      "step": 3044
    },
    {
      "epoch": 0.46403535507467236,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00011643511645777945,
      "loss": 1.0201,
      "step": 3045
    },
    {
      "epoch": 0.46418774763791526,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00011638642831580102,
      "loss": 1.2013,
      "step": 3046
    },
    {
      "epoch": 0.46434014020115816,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00011633773618185302,
      "loss": 0.9966,
      "step": 3047
    },
    {
      "epoch": 0.4644925327644011,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00011628904006779757,
      "loss": 0.8633,
      "step": 3048
    },
    {
      "epoch": 0.464644925327644,
      "grad_norm": 1.234375,
      "learning_rate": 0.00011624033998549771,
      "loss": 0.8375,
      "step": 3049
    },
    {
      "epoch": 0.4647973178908869,
      "grad_norm": 1.125,
      "learning_rate": 0.00011619163594681748,
      "loss": 1.0434,
      "step": 3050
    },
    {
      "epoch": 0.46494971045412986,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00011614292796362188,
      "loss": 0.8614,
      "step": 3051
    },
    {
      "epoch": 0.46510210301737276,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00011609421604777691,
      "loss": 0.8772,
      "step": 3052
    },
    {
      "epoch": 0.46525449558061566,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00011604550021114948,
      "loss": 1.0656,
      "step": 3053
    },
    {
      "epoch": 0.46540688814385855,
      "grad_norm": 0.94140625,
      "learning_rate": 0.00011599678046560743,
      "loss": 0.9908,
      "step": 3054
    },
    {
      "epoch": 0.4655592807071015,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00011594805682301963,
      "loss": 1.0415,
      "step": 3055
    },
    {
      "epoch": 0.4657116732703444,
      "grad_norm": 0.80859375,
      "learning_rate": 0.00011589932929525588,
      "loss": 0.9143,
      "step": 3056
    },
    {
      "epoch": 0.4658640658335873,
      "grad_norm": 1.0,
      "learning_rate": 0.00011585059789418683,
      "loss": 0.922,
      "step": 3057
    },
    {
      "epoch": 0.46601645839683026,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00011580186263168425,
      "loss": 0.8058,
      "step": 3058
    },
    {
      "epoch": 0.46616885096007316,
      "grad_norm": 0.90625,
      "learning_rate": 0.00011575312351962067,
      "loss": 1.0485,
      "step": 3059
    },
    {
      "epoch": 0.46632124352331605,
      "grad_norm": 0.953125,
      "learning_rate": 0.00011570438056986965,
      "loss": 1.0627,
      "step": 3060
    },
    {
      "epoch": 0.46647363608655895,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00011565563379430572,
      "loss": 0.9674,
      "step": 3061
    },
    {
      "epoch": 0.4666260286498019,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00011560688320480426,
      "loss": 0.9146,
      "step": 3062
    },
    {
      "epoch": 0.4667784212130448,
      "grad_norm": 1.0,
      "learning_rate": 0.0001155581288132416,
      "loss": 0.759,
      "step": 3063
    },
    {
      "epoch": 0.4669308137762877,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00011550937063149505,
      "loss": 1.1457,
      "step": 3064
    },
    {
      "epoch": 0.46708320633953065,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00011546060867144279,
      "loss": 0.9587,
      "step": 3065
    },
    {
      "epoch": 0.46723559890277355,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00011541184294496392,
      "loss": 0.8652,
      "step": 3066
    },
    {
      "epoch": 0.46738799146601645,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00011536307346393849,
      "loss": 1.0284,
      "step": 3067
    },
    {
      "epoch": 0.46754038402925935,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001153143002402474,
      "loss": 0.9025,
      "step": 3068
    },
    {
      "epoch": 0.4676927765925023,
      "grad_norm": 1.1796875,
      "learning_rate": 0.0001152655232857726,
      "loss": 1.0054,
      "step": 3069
    },
    {
      "epoch": 0.4678451691557452,
      "grad_norm": 0.97265625,
      "learning_rate": 0.00011521674261239679,
      "loss": 0.931,
      "step": 3070
    },
    {
      "epoch": 0.4679975617189881,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00011516795823200364,
      "loss": 1.1748,
      "step": 3071
    },
    {
      "epoch": 0.46814995428223105,
      "grad_norm": 0.93359375,
      "learning_rate": 0.0001151191701564778,
      "loss": 1.0686,
      "step": 3072
    },
    {
      "epoch": 0.46830234684547395,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00011507037839770463,
      "loss": 0.8798,
      "step": 3073
    },
    {
      "epoch": 0.46845473940871685,
      "grad_norm": 1.21875,
      "learning_rate": 0.00011502158296757057,
      "loss": 1.1276,
      "step": 3074
    },
    {
      "epoch": 0.46860713197195974,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001149727838779629,
      "loss": 0.9737,
      "step": 3075
    },
    {
      "epoch": 0.4687595245352027,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00011492398114076978,
      "loss": 1.0624,
      "step": 3076
    },
    {
      "epoch": 0.4689119170984456,
      "grad_norm": 1.09375,
      "learning_rate": 0.00011487517476788024,
      "loss": 1.0068,
      "step": 3077
    },
    {
      "epoch": 0.4690643096616885,
      "grad_norm": 0.80078125,
      "learning_rate": 0.0001148263647711842,
      "loss": 0.9826,
      "step": 3078
    },
    {
      "epoch": 0.46921670222493145,
      "grad_norm": 0.984375,
      "learning_rate": 0.00011477755116257252,
      "loss": 1.0542,
      "step": 3079
    },
    {
      "epoch": 0.46936909478817435,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00011472873395393684,
      "loss": 1.113,
      "step": 3080
    },
    {
      "epoch": 0.46952148735141724,
      "grad_norm": 0.95703125,
      "learning_rate": 0.0001146799131571698,
      "loss": 1.0517,
      "step": 3081
    },
    {
      "epoch": 0.46967387991466014,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001146310887841648,
      "loss": 0.9462,
      "step": 3082
    },
    {
      "epoch": 0.4698262724779031,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00011458226084681613,
      "loss": 0.902,
      "step": 3083
    },
    {
      "epoch": 0.469978665041146,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00011453342935701908,
      "loss": 1.0443,
      "step": 3084
    },
    {
      "epoch": 0.4701310576043889,
      "grad_norm": 0.71875,
      "learning_rate": 0.00011448459432666961,
      "loss": 0.9112,
      "step": 3085
    },
    {
      "epoch": 0.47028345016763184,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00011443575576766472,
      "loss": 0.9712,
      "step": 3086
    },
    {
      "epoch": 0.47043584273087474,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00011438691369190208,
      "loss": 1.0708,
      "step": 3087
    },
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00011433806811128038,
      "loss": 1.0326,
      "step": 3088
    },
    {
      "epoch": 0.47074062785736054,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00011428921903769914,
      "loss": 1.159,
      "step": 3089
    },
    {
      "epoch": 0.4708930204206035,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00011424036648305863,
      "loss": 0.9103,
      "step": 3090
    },
    {
      "epoch": 0.4710454129838464,
      "grad_norm": 1.2421875,
      "learning_rate": 0.00011419151045926007,
      "loss": 0.9565,
      "step": 3091
    },
    {
      "epoch": 0.4711978055470893,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00011414265097820548,
      "loss": 0.9305,
      "step": 3092
    },
    {
      "epoch": 0.47135019811033224,
      "grad_norm": 1.25,
      "learning_rate": 0.00011409378805179776,
      "loss": 0.9366,
      "step": 3093
    },
    {
      "epoch": 0.47150259067357514,
      "grad_norm": 1.015625,
      "learning_rate": 0.00011404492169194056,
      "loss": 0.855,
      "step": 3094
    },
    {
      "epoch": 0.47165498323681804,
      "grad_norm": 1.0078125,
      "learning_rate": 0.0001139960519105385,
      "loss": 0.8178,
      "step": 3095
    },
    {
      "epoch": 0.47180737580006094,
      "grad_norm": 0.890625,
      "learning_rate": 0.00011394717871949692,
      "loss": 0.8856,
      "step": 3096
    },
    {
      "epoch": 0.4719597683633039,
      "grad_norm": 1.0625,
      "learning_rate": 0.00011389830213072204,
      "loss": 0.9152,
      "step": 3097
    },
    {
      "epoch": 0.4721121609265468,
      "grad_norm": 0.796875,
      "learning_rate": 0.00011384942215612089,
      "loss": 0.9906,
      "step": 3098
    },
    {
      "epoch": 0.4722645534897897,
      "grad_norm": 0.78125,
      "learning_rate": 0.00011380053880760133,
      "loss": 1.0937,
      "step": 3099
    },
    {
      "epoch": 0.47241694605303264,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011375165209707213,
      "loss": 1.1267,
      "step": 3100
    },
    {
      "epoch": 0.47256933861627554,
      "grad_norm": 1.03125,
      "learning_rate": 0.00011370276203644266,
      "loss": 0.9704,
      "step": 3101
    },
    {
      "epoch": 0.47272173117951843,
      "grad_norm": 1.109375,
      "learning_rate": 0.0001136538686376233,
      "loss": 1.1606,
      "step": 3102
    },
    {
      "epoch": 0.47287412374276133,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00011360497191252527,
      "loss": 0.9923,
      "step": 3103
    },
    {
      "epoch": 0.4730265163060043,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00011355607187306037,
      "loss": 0.88,
      "step": 3104
    },
    {
      "epoch": 0.4731789088692472,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00011350716853114143,
      "loss": 0.8746,
      "step": 3105
    },
    {
      "epoch": 0.4733313014324901,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00011345826189868203,
      "loss": 1.0369,
      "step": 3106
    },
    {
      "epoch": 0.47348369399573303,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00011340935198759646,
      "loss": 0.9055,
      "step": 3107
    },
    {
      "epoch": 0.47363608655897593,
      "grad_norm": 1.015625,
      "learning_rate": 0.00011336043880979992,
      "loss": 0.9799,
      "step": 3108
    },
    {
      "epoch": 0.47378847912221883,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00011331152237720838,
      "loss": 0.9332,
      "step": 3109
    },
    {
      "epoch": 0.47394087168546173,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00011326260270173859,
      "loss": 0.9572,
      "step": 3110
    },
    {
      "epoch": 0.4740932642487047,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011321367979530799,
      "loss": 0.7858,
      "step": 3111
    },
    {
      "epoch": 0.4742456568119476,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00011316475366983504,
      "loss": 1.0612,
      "step": 3112
    },
    {
      "epoch": 0.4743980493751905,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00011311582433723877,
      "loss": 1.0307,
      "step": 3113
    },
    {
      "epoch": 0.47455044193843343,
      "grad_norm": 1.2109375,
      "learning_rate": 0.00011306689180943909,
      "loss": 0.9088,
      "step": 3114
    },
    {
      "epoch": 0.47470283450167633,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00011301795609835662,
      "loss": 1.1087,
      "step": 3115
    },
    {
      "epoch": 0.4748552270649192,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011296901721591284,
      "loss": 0.9388,
      "step": 3116
    },
    {
      "epoch": 0.4750076196281621,
      "grad_norm": 0.796875,
      "learning_rate": 0.00011292007517403003,
      "loss": 0.9456,
      "step": 3117
    },
    {
      "epoch": 0.4751600121914051,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00011287112998463107,
      "loss": 1.1169,
      "step": 3118
    },
    {
      "epoch": 0.475312404754648,
      "grad_norm": 1.125,
      "learning_rate": 0.00011282218165963976,
      "loss": 0.9727,
      "step": 3119
    },
    {
      "epoch": 0.4754647973178909,
      "grad_norm": 0.8125,
      "learning_rate": 0.00011277323021098064,
      "loss": 0.9313,
      "step": 3120
    },
    {
      "epoch": 0.47561718988113383,
      "grad_norm": 0.828125,
      "learning_rate": 0.0001127242756505789,
      "loss": 0.8985,
      "step": 3121
    },
    {
      "epoch": 0.4757695824443767,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00011267531799036066,
      "loss": 1.0133,
      "step": 3122
    },
    {
      "epoch": 0.4759219750076196,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00011262635724225272,
      "loss": 0.9498,
      "step": 3123
    },
    {
      "epoch": 0.4760743675708625,
      "grad_norm": 1.109375,
      "learning_rate": 0.00011257739341818254,
      "loss": 1.091,
      "step": 3124
    },
    {
      "epoch": 0.4762267601341055,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00011252842653007847,
      "loss": 1.0588,
      "step": 3125
    },
    {
      "epoch": 0.4763791526973484,
      "grad_norm": 0.796875,
      "learning_rate": 0.00011247945658986954,
      "loss": 1.0066,
      "step": 3126
    },
    {
      "epoch": 0.47653154526059127,
      "grad_norm": 1.0,
      "learning_rate": 0.00011243048360948554,
      "loss": 1.1727,
      "step": 3127
    },
    {
      "epoch": 0.47668393782383417,
      "grad_norm": 1.03125,
      "learning_rate": 0.00011238150760085697,
      "loss": 1.0021,
      "step": 3128
    },
    {
      "epoch": 0.4768363303870771,
      "grad_norm": 0.96875,
      "learning_rate": 0.00011233252857591507,
      "loss": 0.837,
      "step": 3129
    },
    {
      "epoch": 0.47698872295032,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001122835465465919,
      "loss": 1.0617,
      "step": 3130
    },
    {
      "epoch": 0.4771411155135629,
      "grad_norm": 1.359375,
      "learning_rate": 0.00011223456152482014,
      "loss": 0.8877,
      "step": 3131
    },
    {
      "epoch": 0.4772935080768059,
      "grad_norm": 1.25,
      "learning_rate": 0.00011218557352253325,
      "loss": 0.9946,
      "step": 3132
    },
    {
      "epoch": 0.47744590064004877,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011213658255166539,
      "loss": 0.9417,
      "step": 3133
    },
    {
      "epoch": 0.47759829320329167,
      "grad_norm": 1.8359375,
      "learning_rate": 0.00011208758862415156,
      "loss": 0.9766,
      "step": 3134
    },
    {
      "epoch": 0.47775068576653457,
      "grad_norm": 1.0625,
      "learning_rate": 0.00011203859175192729,
      "loss": 0.8286,
      "step": 3135
    },
    {
      "epoch": 0.4779030783297775,
      "grad_norm": 0.890625,
      "learning_rate": 0.00011198959194692891,
      "loss": 1.112,
      "step": 3136
    },
    {
      "epoch": 0.4780554708930204,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00011194058922109359,
      "loss": 0.9977,
      "step": 3137
    },
    {
      "epoch": 0.4782078634562633,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00011189158358635896,
      "loss": 0.8969,
      "step": 3138
    },
    {
      "epoch": 0.47836025601950627,
      "grad_norm": 0.78125,
      "learning_rate": 0.0001118425750546636,
      "loss": 0.9761,
      "step": 3139
    },
    {
      "epoch": 0.47851264858274917,
      "grad_norm": 1.03125,
      "learning_rate": 0.00011179356363794664,
      "loss": 0.8739,
      "step": 3140
    },
    {
      "epoch": 0.47866504114599207,
      "grad_norm": 1.078125,
      "learning_rate": 0.00011174454934814802,
      "loss": 0.9811,
      "step": 3141
    },
    {
      "epoch": 0.47881743370923496,
      "grad_norm": 1.046875,
      "learning_rate": 0.00011169553219720828,
      "loss": 0.8804,
      "step": 3142
    },
    {
      "epoch": 0.4789698262724779,
      "grad_norm": 1.0625,
      "learning_rate": 0.00011164651219706867,
      "loss": 1.0713,
      "step": 3143
    },
    {
      "epoch": 0.4791222188357208,
      "grad_norm": 1.1015625,
      "learning_rate": 0.00011159748935967124,
      "loss": 0.8403,
      "step": 3144
    },
    {
      "epoch": 0.4792746113989637,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00011154846369695863,
      "loss": 1.0491,
      "step": 3145
    },
    {
      "epoch": 0.47942700396220667,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00011149943522087416,
      "loss": 1.0837,
      "step": 3146
    },
    {
      "epoch": 0.47957939652544956,
      "grad_norm": 1.0390625,
      "learning_rate": 0.00011145040394336195,
      "loss": 1.0661,
      "step": 3147
    },
    {
      "epoch": 0.47973178908869246,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00011140136987636666,
      "loss": 0.9438,
      "step": 3148
    },
    {
      "epoch": 0.47988418165193536,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001113523330318337,
      "loss": 0.9295,
      "step": 3149
    },
    {
      "epoch": 0.4800365742151783,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00011130329342170917,
      "loss": 0.9826,
      "step": 3150
    },
    {
      "epoch": 0.4801889667784212,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00011125425105793985,
      "loss": 0.8156,
      "step": 3151
    },
    {
      "epoch": 0.4803413593416641,
      "grad_norm": 1.15625,
      "learning_rate": 0.0001112052059524731,
      "loss": 1.0285,
      "step": 3152
    },
    {
      "epoch": 0.48049375190490706,
      "grad_norm": 0.90625,
      "learning_rate": 0.00011115615811725703,
      "loss": 0.976,
      "step": 3153
    },
    {
      "epoch": 0.48064614446814996,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00011110710756424048,
      "loss": 0.8592,
      "step": 3154
    },
    {
      "epoch": 0.48079853703139286,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00011105805430537275,
      "loss": 0.9495,
      "step": 3155
    },
    {
      "epoch": 0.48095092959463576,
      "grad_norm": 1.3515625,
      "learning_rate": 0.00011100899835260399,
      "loss": 0.8794,
      "step": 3156
    },
    {
      "epoch": 0.4811033221578787,
      "grad_norm": 1.265625,
      "learning_rate": 0.00011095993971788492,
      "loss": 1.0079,
      "step": 3157
    },
    {
      "epoch": 0.4812557147211216,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00011091087841316692,
      "loss": 0.9921,
      "step": 3158
    },
    {
      "epoch": 0.4814081072843645,
      "grad_norm": 0.95703125,
      "learning_rate": 0.00011086181445040205,
      "loss": 1.0678,
      "step": 3159
    },
    {
      "epoch": 0.48156049984760746,
      "grad_norm": 1.0625,
      "learning_rate": 0.00011081274784154296,
      "loss": 0.8219,
      "step": 3160
    },
    {
      "epoch": 0.48171289241085036,
      "grad_norm": 0.66015625,
      "learning_rate": 0.00011076367859854304,
      "loss": 0.9117,
      "step": 3161
    },
    {
      "epoch": 0.48186528497409326,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00011071460673335621,
      "loss": 1.0226,
      "step": 3162
    },
    {
      "epoch": 0.48201767753733615,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00011066553225793708,
      "loss": 0.897,
      "step": 3163
    },
    {
      "epoch": 0.4821700701005791,
      "grad_norm": 0.90625,
      "learning_rate": 0.00011061645518424093,
      "loss": 0.9651,
      "step": 3164
    },
    {
      "epoch": 0.482322462663822,
      "grad_norm": 0.79296875,
      "learning_rate": 0.00011056737552422367,
      "loss": 1.0311,
      "step": 3165
    },
    {
      "epoch": 0.4824748552270649,
      "grad_norm": 1.046875,
      "learning_rate": 0.00011051829328984173,
      "loss": 1.0241,
      "step": 3166
    },
    {
      "epoch": 0.48262724779030786,
      "grad_norm": 0.66015625,
      "learning_rate": 0.00011046920849305229,
      "loss": 0.8404,
      "step": 3167
    },
    {
      "epoch": 0.48277964035355075,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00011042012114581315,
      "loss": 0.8888,
      "step": 3168
    },
    {
      "epoch": 0.48293203291679365,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00011037103126008261,
      "loss": 0.8694,
      "step": 3169
    },
    {
      "epoch": 0.48308442548003655,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00011032193884781973,
      "loss": 0.9834,
      "step": 3170
    },
    {
      "epoch": 0.4832368180432795,
      "grad_norm": 0.890625,
      "learning_rate": 0.00011027284392098414,
      "loss": 1.0828,
      "step": 3171
    },
    {
      "epoch": 0.4833892106065224,
      "grad_norm": 0.9453125,
      "learning_rate": 0.00011022374649153599,
      "loss": 0.9883,
      "step": 3172
    },
    {
      "epoch": 0.4835416031697653,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00011017464657143621,
      "loss": 1.0528,
      "step": 3173
    },
    {
      "epoch": 0.48369399573300825,
      "grad_norm": 1.046875,
      "learning_rate": 0.0001101255441726462,
      "loss": 1.0925,
      "step": 3174
    },
    {
      "epoch": 0.48384638829625115,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00011007643930712804,
      "loss": 1.0121,
      "step": 3175
    },
    {
      "epoch": 0.48399878085949405,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00011002733198684436,
      "loss": 0.9812,
      "step": 3176
    },
    {
      "epoch": 0.48415117342273695,
      "grad_norm": 1.3359375,
      "learning_rate": 0.0001099782222237584,
      "loss": 1.0764,
      "step": 3177
    },
    {
      "epoch": 0.4843035659859799,
      "grad_norm": 0.796875,
      "learning_rate": 0.00010992911002983401,
      "loss": 0.8668,
      "step": 3178
    },
    {
      "epoch": 0.4844559585492228,
      "grad_norm": 1.109375,
      "learning_rate": 0.00010987999541703566,
      "loss": 0.9637,
      "step": 3179
    },
    {
      "epoch": 0.4846083511124657,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00010983087839732833,
      "loss": 0.9017,
      "step": 3180
    },
    {
      "epoch": 0.48476074367570865,
      "grad_norm": 0.9375,
      "learning_rate": 0.00010978175898267764,
      "loss": 0.9855,
      "step": 3181
    },
    {
      "epoch": 0.48491313623895155,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00010973263718504983,
      "loss": 0.7606,
      "step": 3182
    },
    {
      "epoch": 0.48506552880219445,
      "grad_norm": 0.921875,
      "learning_rate": 0.00010968351301641162,
      "loss": 1.0428,
      "step": 3183
    },
    {
      "epoch": 0.48521792136543734,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00010963438648873041,
      "loss": 1.1246,
      "step": 3184
    },
    {
      "epoch": 0.4853703139286803,
      "grad_norm": 0.6796875,
      "learning_rate": 0.00010958525761397409,
      "loss": 0.8494,
      "step": 3185
    },
    {
      "epoch": 0.4855227064919232,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010953612640411117,
      "loss": 0.8966,
      "step": 3186
    },
    {
      "epoch": 0.4856750990551661,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00010948699287111073,
      "loss": 1.0317,
      "step": 3187
    },
    {
      "epoch": 0.48582749161840905,
      "grad_norm": 1.296875,
      "learning_rate": 0.00010943785702694243,
      "loss": 0.9538,
      "step": 3188
    },
    {
      "epoch": 0.48597988418165194,
      "grad_norm": 0.7734375,
      "learning_rate": 0.00010938871888357638,
      "loss": 0.9406,
      "step": 3189
    },
    {
      "epoch": 0.48613227674489484,
      "grad_norm": 0.828125,
      "learning_rate": 0.00010933957845298346,
      "loss": 0.7653,
      "step": 3190
    },
    {
      "epoch": 0.48628466930813774,
      "grad_norm": 0.9140625,
      "learning_rate": 0.00010929043574713487,
      "loss": 0.9894,
      "step": 3191
    },
    {
      "epoch": 0.4864370618713807,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00010924129077800255,
      "loss": 0.874,
      "step": 3192
    },
    {
      "epoch": 0.4865894544346236,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00010919214355755893,
      "loss": 1.1028,
      "step": 3193
    },
    {
      "epoch": 0.4867418469978665,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00010914299409777694,
      "loss": 0.9505,
      "step": 3194
    },
    {
      "epoch": 0.48689423956110944,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001090938424106301,
      "loss": 0.9935,
      "step": 3195
    },
    {
      "epoch": 0.48704663212435234,
      "grad_norm": 1.2734375,
      "learning_rate": 0.00010904468850809248,
      "loss": 1.0609,
      "step": 3196
    },
    {
      "epoch": 0.48719902468759524,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00010899553240213869,
      "loss": 1.1474,
      "step": 3197
    },
    {
      "epoch": 0.48735141725083814,
      "grad_norm": 0.91796875,
      "learning_rate": 0.00010894637410474384,
      "loss": 1.0473,
      "step": 3198
    },
    {
      "epoch": 0.4875038098140811,
      "grad_norm": 1.078125,
      "learning_rate": 0.00010889721362788361,
      "loss": 0.9898,
      "step": 3199
    },
    {
      "epoch": 0.487656202377324,
      "grad_norm": 1.0625,
      "learning_rate": 0.00010884805098353417,
      "loss": 1.0033,
      "step": 3200
    },
    {
      "epoch": 0.4878085949405669,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00010879888618367229,
      "loss": 0.8389,
      "step": 3201
    },
    {
      "epoch": 0.48796098750380984,
      "grad_norm": 0.92578125,
      "learning_rate": 0.00010874971924027522,
      "loss": 1.0108,
      "step": 3202
    },
    {
      "epoch": 0.48811338006705274,
      "grad_norm": 1.28125,
      "learning_rate": 0.0001087005501653207,
      "loss": 1.1676,
      "step": 3203
    },
    {
      "epoch": 0.48826577263029564,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00010865137897078706,
      "loss": 0.9857,
      "step": 3204
    },
    {
      "epoch": 0.48841816519353853,
      "grad_norm": 0.75,
      "learning_rate": 0.00010860220566865306,
      "loss": 0.976,
      "step": 3205
    },
    {
      "epoch": 0.4885705577567815,
      "grad_norm": 1.03125,
      "learning_rate": 0.0001085530302708981,
      "loss": 0.8249,
      "step": 3206
    },
    {
      "epoch": 0.4887229503200244,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00010850385278950195,
      "loss": 0.8328,
      "step": 3207
    },
    {
      "epoch": 0.4888753428832673,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00010845467323644496,
      "loss": 1.0382,
      "step": 3208
    },
    {
      "epoch": 0.48902773544651024,
      "grad_norm": 0.8125,
      "learning_rate": 0.00010840549162370801,
      "loss": 0.8975,
      "step": 3209
    },
    {
      "epoch": 0.48918012800975313,
      "grad_norm": 0.984375,
      "learning_rate": 0.00010835630796327243,
      "loss": 0.8588,
      "step": 3210
    },
    {
      "epoch": 0.48933252057299603,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00010830712226712006,
      "loss": 1.1676,
      "step": 3211
    },
    {
      "epoch": 0.48948491313623893,
      "grad_norm": 1.296875,
      "learning_rate": 0.00010825793454723325,
      "loss": 1.0735,
      "step": 3212
    },
    {
      "epoch": 0.4896373056994819,
      "grad_norm": 1.0625,
      "learning_rate": 0.00010820874481559482,
      "loss": 1.0455,
      "step": 3213
    },
    {
      "epoch": 0.4897896982627248,
      "grad_norm": 0.72265625,
      "learning_rate": 0.0001081595530841881,
      "loss": 0.958,
      "step": 3214
    },
    {
      "epoch": 0.4899420908259677,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00010811035936499694,
      "loss": 1.0031,
      "step": 3215
    },
    {
      "epoch": 0.49009448338921063,
      "grad_norm": 0.96875,
      "learning_rate": 0.0001080611636700056,
      "loss": 0.9706,
      "step": 3216
    },
    {
      "epoch": 0.49024687595245353,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00010801196601119887,
      "loss": 0.9813,
      "step": 3217
    },
    {
      "epoch": 0.49039926851569643,
      "grad_norm": 1.2265625,
      "learning_rate": 0.000107962766400562,
      "loss": 1.2237,
      "step": 3218
    },
    {
      "epoch": 0.4905516610789393,
      "grad_norm": 1.1171875,
      "learning_rate": 0.00010791356485008075,
      "loss": 1.1279,
      "step": 3219
    },
    {
      "epoch": 0.4907040536421823,
      "grad_norm": 0.96875,
      "learning_rate": 0.00010786436137174126,
      "loss": 0.8925,
      "step": 3220
    },
    {
      "epoch": 0.4908564462054252,
      "grad_norm": 0.96875,
      "learning_rate": 0.00010781515597753027,
      "loss": 0.8685,
      "step": 3221
    },
    {
      "epoch": 0.4910088387686681,
      "grad_norm": 1.2109375,
      "learning_rate": 0.0001077659486794349,
      "loss": 0.978,
      "step": 3222
    },
    {
      "epoch": 0.49116123133191103,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00010771673948944277,
      "loss": 1.0501,
      "step": 3223
    },
    {
      "epoch": 0.49131362389515393,
      "grad_norm": 1.421875,
      "learning_rate": 0.00010766752841954191,
      "loss": 0.9907,
      "step": 3224
    },
    {
      "epoch": 0.4914660164583968,
      "grad_norm": 1.0625,
      "learning_rate": 0.00010761831548172088,
      "loss": 0.9838,
      "step": 3225
    },
    {
      "epoch": 0.4916184090216397,
      "grad_norm": 0.84765625,
      "learning_rate": 0.00010756910068796864,
      "loss": 0.8156,
      "step": 3226
    },
    {
      "epoch": 0.4917708015848827,
      "grad_norm": 0.8125,
      "learning_rate": 0.00010751988405027462,
      "loss": 0.9766,
      "step": 3227
    },
    {
      "epoch": 0.4919231941481256,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00010747066558062871,
      "loss": 1.0308,
      "step": 3228
    },
    {
      "epoch": 0.4920755867113685,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00010742144529102125,
      "loss": 1.0249,
      "step": 3229
    },
    {
      "epoch": 0.49222797927461137,
      "grad_norm": 1.1796875,
      "learning_rate": 0.00010737222319344298,
      "loss": 1.0641,
      "step": 3230
    },
    {
      "epoch": 0.4923803718378543,
      "grad_norm": 0.984375,
      "learning_rate": 0.00010732299929988512,
      "loss": 0.9952,
      "step": 3231
    },
    {
      "epoch": 0.4925327644010972,
      "grad_norm": 1.0,
      "learning_rate": 0.00010727377362233934,
      "loss": 0.9706,
      "step": 3232
    },
    {
      "epoch": 0.4926851569643401,
      "grad_norm": 0.84375,
      "learning_rate": 0.00010722454617279772,
      "loss": 0.8593,
      "step": 3233
    },
    {
      "epoch": 0.4928375495275831,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00010717531696325271,
      "loss": 1.0011,
      "step": 3234
    },
    {
      "epoch": 0.49298994209082597,
      "grad_norm": 1.15625,
      "learning_rate": 0.00010712608600569733,
      "loss": 1.0293,
      "step": 3235
    },
    {
      "epoch": 0.49314233465406887,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00010707685331212496,
      "loss": 1.0042,
      "step": 3236
    },
    {
      "epoch": 0.49329472721731177,
      "grad_norm": 0.9609375,
      "learning_rate": 0.0001070276188945293,
      "loss": 0.9967,
      "step": 3237
    },
    {
      "epoch": 0.4934471197805547,
      "grad_norm": 1.3984375,
      "learning_rate": 0.00010697838276490466,
      "loss": 1.1284,
      "step": 3238
    },
    {
      "epoch": 0.4935995123437976,
      "grad_norm": 0.8671875,
      "learning_rate": 0.0001069291449352456,
      "loss": 1.0847,
      "step": 3239
    },
    {
      "epoch": 0.4937519049070405,
      "grad_norm": 1.0390625,
      "learning_rate": 0.0001068799054175472,
      "loss": 0.8199,
      "step": 3240
    },
    {
      "epoch": 0.49390429747028347,
      "grad_norm": 0.82421875,
      "learning_rate": 0.00010683066422380492,
      "loss": 0.8627,
      "step": 3241
    },
    {
      "epoch": 0.49405669003352637,
      "grad_norm": 1.09375,
      "learning_rate": 0.00010678142136601458,
      "loss": 0.8815,
      "step": 3242
    },
    {
      "epoch": 0.49420908259676927,
      "grad_norm": 0.875,
      "learning_rate": 0.0001067321768561725,
      "loss": 1.1312,
      "step": 3243
    },
    {
      "epoch": 0.49436147516001216,
      "grad_norm": 1.015625,
      "learning_rate": 0.00010668293070627533,
      "loss": 0.913,
      "step": 3244
    },
    {
      "epoch": 0.4945138677232551,
      "grad_norm": 0.7578125,
      "learning_rate": 0.00010663368292832012,
      "loss": 0.8835,
      "step": 3245
    },
    {
      "epoch": 0.494666260286498,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00010658443353430436,
      "loss": 1.074,
      "step": 3246
    },
    {
      "epoch": 0.4948186528497409,
      "grad_norm": 1.25,
      "learning_rate": 0.00010653518253622592,
      "loss": 1.3205,
      "step": 3247
    },
    {
      "epoch": 0.49497104541298387,
      "grad_norm": 0.99609375,
      "learning_rate": 0.00010648592994608302,
      "loss": 0.9068,
      "step": 3248
    },
    {
      "epoch": 0.49512343797622677,
      "grad_norm": 0.9375,
      "learning_rate": 0.00010643667577587433,
      "loss": 0.9949,
      "step": 3249
    },
    {
      "epoch": 0.49527583053946966,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00010638742003759886,
      "loss": 0.8719,
      "step": 3250
    },
    {
      "epoch": 0.49542822310271256,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00010633816274325599,
      "loss": 0.7082,
      "step": 3251
    },
    {
      "epoch": 0.4955806156659555,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00010628890390484554,
      "loss": 1.0136,
      "step": 3252
    },
    {
      "epoch": 0.4957330082291984,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00010623964353436768,
      "loss": 1.0089,
      "step": 3253
    },
    {
      "epoch": 0.4958854007924413,
      "grad_norm": 1.1640625,
      "learning_rate": 0.0001061903816438229,
      "loss": 0.9898,
      "step": 3254
    },
    {
      "epoch": 0.49603779335568426,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00010614111824521215,
      "loss": 0.9099,
      "step": 3255
    },
    {
      "epoch": 0.49619018591892716,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00010609185335053669,
      "loss": 0.9882,
      "step": 3256
    },
    {
      "epoch": 0.49634257848217006,
      "grad_norm": 0.73046875,
      "learning_rate": 0.00010604258697179814,
      "loss": 0.9182,
      "step": 3257
    },
    {
      "epoch": 0.49649497104541296,
      "grad_norm": 1.078125,
      "learning_rate": 0.00010599331912099852,
      "loss": 0.9721,
      "step": 3258
    },
    {
      "epoch": 0.4966473636086559,
      "grad_norm": 0.90234375,
      "learning_rate": 0.00010594404981014017,
      "loss": 0.9327,
      "step": 3259
    },
    {
      "epoch": 0.4967997561718988,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00010589477905122582,
      "loss": 0.9913,
      "step": 3260
    },
    {
      "epoch": 0.4969521487351417,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00010584550685625852,
      "loss": 0.9563,
      "step": 3261
    },
    {
      "epoch": 0.49710454129838466,
      "grad_norm": 1.03125,
      "learning_rate": 0.00010579623323724166,
      "loss": 1.0008,
      "step": 3262
    },
    {
      "epoch": 0.49725693386162756,
      "grad_norm": 1.171875,
      "learning_rate": 0.00010574695820617908,
      "loss": 1.0131,
      "step": 3263
    },
    {
      "epoch": 0.49740932642487046,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00010569768177507482,
      "loss": 1.1129,
      "step": 3264
    },
    {
      "epoch": 0.49756171898811336,
      "grad_norm": 0.859375,
      "learning_rate": 0.00010564840395593332,
      "loss": 0.8644,
      "step": 3265
    },
    {
      "epoch": 0.4977141115513563,
      "grad_norm": 0.83984375,
      "learning_rate": 0.00010559912476075941,
      "loss": 0.985,
      "step": 3266
    },
    {
      "epoch": 0.4978665041145992,
      "grad_norm": 0.91015625,
      "learning_rate": 0.00010554984420155823,
      "loss": 0.9231,
      "step": 3267
    },
    {
      "epoch": 0.4980188966778421,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00010550056229033513,
      "loss": 1.0174,
      "step": 3268
    },
    {
      "epoch": 0.49817128924108506,
      "grad_norm": 0.84375,
      "learning_rate": 0.00010545127903909599,
      "loss": 0.8889,
      "step": 3269
    },
    {
      "epoch": 0.49832368180432796,
      "grad_norm": 0.7421875,
      "learning_rate": 0.00010540199445984687,
      "loss": 0.9082,
      "step": 3270
    },
    {
      "epoch": 0.49847607436757085,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00010535270856459422,
      "loss": 0.9428,
      "step": 3271
    },
    {
      "epoch": 0.49862846693081375,
      "grad_norm": 0.87890625,
      "learning_rate": 0.00010530342136534482,
      "loss": 1.1498,
      "step": 3272
    },
    {
      "epoch": 0.4987808594940567,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00010525413287410569,
      "loss": 0.9527,
      "step": 3273
    },
    {
      "epoch": 0.4989332520572996,
      "grad_norm": 0.875,
      "learning_rate": 0.00010520484310288424,
      "loss": 0.9847,
      "step": 3274
    },
    {
      "epoch": 0.4990856446205425,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00010515555206368815,
      "loss": 1.0257,
      "step": 3275
    },
    {
      "epoch": 0.49923803718378545,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00010510625976852544,
      "loss": 0.9323,
      "step": 3276
    },
    {
      "epoch": 0.49939042974702835,
      "grad_norm": 1.046875,
      "learning_rate": 0.00010505696622940443,
      "loss": 0.8702,
      "step": 3277
    },
    {
      "epoch": 0.49954282231027125,
      "grad_norm": 1.046875,
      "learning_rate": 0.00010500767145833372,
      "loss": 1.1368,
      "step": 3278
    },
    {
      "epoch": 0.49969521487351415,
      "grad_norm": 0.73828125,
      "learning_rate": 0.00010495837546732224,
      "loss": 0.823,
      "step": 3279
    },
    {
      "epoch": 0.4998476074367571,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00010490907826837919,
      "loss": 0.9766,
      "step": 3280
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.15625,
      "learning_rate": 0.00010485977987351407,
      "loss": 1.1041,
      "step": 3281
    },
    {
      "epoch": 0.500152392563243,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001048104802947367,
      "loss": 0.8801,
      "step": 3282
    },
    {
      "epoch": 0.5003047851264858,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00010476117954405715,
      "loss": 0.9352,
      "step": 3283
    },
    {
      "epoch": 0.5004571776897287,
      "grad_norm": 1.2109375,
      "learning_rate": 0.0001047118776334858,
      "loss": 0.9361,
      "step": 3284
    },
    {
      "epoch": 0.5006095702529717,
      "grad_norm": 0.890625,
      "learning_rate": 0.0001046625745750333,
      "loss": 0.9422,
      "step": 3285
    },
    {
      "epoch": 0.5007619628162145,
      "grad_norm": 1.109375,
      "learning_rate": 0.00010461327038071059,
      "loss": 1.1304,
      "step": 3286
    },
    {
      "epoch": 0.5009143553794575,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00010456396506252884,
      "loss": 1.0162,
      "step": 3287
    },
    {
      "epoch": 0.5010667479427003,
      "grad_norm": 1.015625,
      "learning_rate": 0.00010451465863249961,
      "loss": 1.0568,
      "step": 3288
    },
    {
      "epoch": 0.5012191405059433,
      "grad_norm": 0.984375,
      "learning_rate": 0.0001044653511026346,
      "loss": 1.0605,
      "step": 3289
    },
    {
      "epoch": 0.5013715330691862,
      "grad_norm": 1.3203125,
      "learning_rate": 0.00010441604248494586,
      "loss": 0.9845,
      "step": 3290
    },
    {
      "epoch": 0.5015239256324291,
      "grad_norm": 0.9609375,
      "learning_rate": 0.00010436673279144564,
      "loss": 0.8914,
      "step": 3291
    },
    {
      "epoch": 0.501676318195672,
      "grad_norm": 1.140625,
      "learning_rate": 0.00010431742203414656,
      "loss": 1.1073,
      "step": 3292
    },
    {
      "epoch": 0.501828710758915,
      "grad_norm": 0.89453125,
      "learning_rate": 0.00010426811022506134,
      "loss": 1.0583,
      "step": 3293
    },
    {
      "epoch": 0.5019811033221578,
      "grad_norm": 0.93359375,
      "learning_rate": 0.00010421879737620312,
      "loss": 0.8484,
      "step": 3294
    },
    {
      "epoch": 0.5021334958854008,
      "grad_norm": 0.859375,
      "learning_rate": 0.0001041694834995852,
      "loss": 1.0146,
      "step": 3295
    },
    {
      "epoch": 0.5022858884486437,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00010412016860722112,
      "loss": 1.0965,
      "step": 3296
    },
    {
      "epoch": 0.5024382810118866,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00010407085271112472,
      "loss": 1.0726,
      "step": 3297
    },
    {
      "epoch": 0.5025906735751295,
      "grad_norm": 1.0234375,
      "learning_rate": 0.00010402153582331006,
      "loss": 1.1279,
      "step": 3298
    },
    {
      "epoch": 0.5027430661383725,
      "grad_norm": 0.8828125,
      "learning_rate": 0.00010397221795579144,
      "loss": 0.9953,
      "step": 3299
    },
    {
      "epoch": 0.5028954587016153,
      "grad_norm": 1.0078125,
      "learning_rate": 0.00010392289912058342,
      "loss": 0.9272,
      "step": 3300
    },
    {
      "epoch": 0.5030478512648583,
      "grad_norm": 0.88671875,
      "learning_rate": 0.00010387357932970076,
      "loss": 1.0196,
      "step": 3301
    },
    {
      "epoch": 0.5032002438281011,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00010382425859515846,
      "loss": 0.9606,
      "step": 3302
    },
    {
      "epoch": 0.5033526363913441,
      "grad_norm": 0.80078125,
      "learning_rate": 0.00010377493692897178,
      "loss": 0.9332,
      "step": 3303
    },
    {
      "epoch": 0.503505028954587,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00010372561434315618,
      "loss": 1.0389,
      "step": 3304
    },
    {
      "epoch": 0.5036574215178299,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00010367629084972735,
      "loss": 1.0428,
      "step": 3305
    },
    {
      "epoch": 0.5038098140810728,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00010362696646070121,
      "loss": 1.0134,
      "step": 3306
    },
    {
      "epoch": 0.5039622066443158,
      "grad_norm": 1.078125,
      "learning_rate": 0.00010357764118809386,
      "loss": 0.9016,
      "step": 3307
    },
    {
      "epoch": 0.5041145992075586,
      "grad_norm": 0.90625,
      "learning_rate": 0.00010352831504392168,
      "loss": 0.8075,
      "step": 3308
    },
    {
      "epoch": 0.5042669917708016,
      "grad_norm": 1.0859375,
      "learning_rate": 0.00010347898804020124,
      "loss": 0.9259,
      "step": 3309
    },
    {
      "epoch": 0.5044193843340445,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00010342966018894925,
      "loss": 0.7854,
      "step": 3310
    },
    {
      "epoch": 0.5045717768972874,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00010338033150218274,
      "loss": 1.0284,
      "step": 3311
    },
    {
      "epoch": 0.5047241694605303,
      "grad_norm": 0.83203125,
      "learning_rate": 0.00010333100199191887,
      "loss": 1.0429,
      "step": 3312
    },
    {
      "epoch": 0.5048765620237733,
      "grad_norm": 0.9921875,
      "learning_rate": 0.000103281671670175,
      "loss": 0.9724,
      "step": 3313
    },
    {
      "epoch": 0.5050289545870161,
      "grad_norm": 1.140625,
      "learning_rate": 0.00010323234054896871,
      "loss": 1.2916,
      "step": 3314
    },
    {
      "epoch": 0.5051813471502591,
      "grad_norm": 0.8203125,
      "learning_rate": 0.0001031830086403178,
      "loss": 0.9024,
      "step": 3315
    },
    {
      "epoch": 0.5053337397135019,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00010313367595624019,
      "loss": 1.0753,
      "step": 3316
    },
    {
      "epoch": 0.5054861322767449,
      "grad_norm": 1.1875,
      "learning_rate": 0.0001030843425087541,
      "loss": 0.8899,
      "step": 3317
    },
    {
      "epoch": 0.5056385248399878,
      "grad_norm": 0.87890625,
      "learning_rate": 0.0001030350083098778,
      "loss": 0.8718,
      "step": 3318
    },
    {
      "epoch": 0.5057909174032307,
      "grad_norm": 0.96484375,
      "learning_rate": 0.00010298567337162981,
      "loss": 0.9835,
      "step": 3319
    },
    {
      "epoch": 0.5059433099664736,
      "grad_norm": 1.015625,
      "learning_rate": 0.00010293633770602886,
      "loss": 1.0535,
      "step": 3320
    },
    {
      "epoch": 0.5060957025297166,
      "grad_norm": 1.3359375,
      "learning_rate": 0.00010288700132509381,
      "loss": 1.1357,
      "step": 3321
    },
    {
      "epoch": 0.5062480950929594,
      "grad_norm": 0.828125,
      "learning_rate": 0.00010283766424084376,
      "loss": 1.1553,
      "step": 3322
    },
    {
      "epoch": 0.5064004876562024,
      "grad_norm": 0.77734375,
      "learning_rate": 0.00010278832646529788,
      "loss": 0.9592,
      "step": 3323
    },
    {
      "epoch": 0.5065528802194453,
      "grad_norm": 0.8671875,
      "learning_rate": 0.00010273898801047555,
      "loss": 0.9319,
      "step": 3324
    },
    {
      "epoch": 0.5067052727826882,
      "grad_norm": 1.078125,
      "learning_rate": 0.0001026896488883964,
      "loss": 1.032,
      "step": 3325
    },
    {
      "epoch": 0.5068576653459311,
      "grad_norm": 0.9296875,
      "learning_rate": 0.00010264030911108007,
      "loss": 0.8497,
      "step": 3326
    },
    {
      "epoch": 0.5070100579091741,
      "grad_norm": 1.0703125,
      "learning_rate": 0.00010259096869054646,
      "loss": 0.923,
      "step": 3327
    },
    {
      "epoch": 0.5071624504724169,
      "grad_norm": 1.03125,
      "learning_rate": 0.00010254162763881563,
      "loss": 0.9849,
      "step": 3328
    },
    {
      "epoch": 0.5073148430356599,
      "grad_norm": 0.890625,
      "learning_rate": 0.00010249228596790777,
      "loss": 1.0473,
      "step": 3329
    },
    {
      "epoch": 0.5074672355989027,
      "grad_norm": 0.75,
      "learning_rate": 0.0001024429436898432,
      "loss": 0.9708,
      "step": 3330
    },
    {
      "epoch": 0.5076196281621457,
      "grad_norm": 0.81640625,
      "learning_rate": 0.00010239360081664242,
      "loss": 0.9655,
      "step": 3331
    },
    {
      "epoch": 0.5077720207253886,
      "grad_norm": 0.76953125,
      "learning_rate": 0.00010234425736032607,
      "loss": 0.8732,
      "step": 3332
    },
    {
      "epoch": 0.5079244132886315,
      "grad_norm": 0.9375,
      "learning_rate": 0.00010229491333291488,
      "loss": 0.9399,
      "step": 3333
    },
    {
      "epoch": 0.5080768058518744,
      "grad_norm": 0.9765625,
      "learning_rate": 0.00010224556874642983,
      "loss": 1.1136,
      "step": 3334
    },
    {
      "epoch": 0.5082291984151174,
      "grad_norm": 1.21875,
      "learning_rate": 0.00010219622361289191,
      "loss": 1.0267,
      "step": 3335
    },
    {
      "epoch": 0.5083815909783602,
      "grad_norm": 0.98828125,
      "learning_rate": 0.00010214687794432238,
      "loss": 1.0695,
      "step": 3336
    },
    {
      "epoch": 0.5085339835416032,
      "grad_norm": 1.2265625,
      "learning_rate": 0.0001020975317527425,
      "loss": 0.983,
      "step": 3337
    },
    {
      "epoch": 0.5086863761048461,
      "grad_norm": 0.75,
      "learning_rate": 0.00010204818505017371,
      "loss": 0.8459,
      "step": 3338
    },
    {
      "epoch": 0.508838768668089,
      "grad_norm": 0.953125,
      "learning_rate": 0.00010199883784863762,
      "loss": 1.0004,
      "step": 3339
    },
    {
      "epoch": 0.5089911612313319,
      "grad_norm": 1.359375,
      "learning_rate": 0.00010194949016015584,
      "loss": 0.9174,
      "step": 3340
    },
    {
      "epoch": 0.5091435537945749,
      "grad_norm": 1.09375,
      "learning_rate": 0.00010190014199675022,
      "loss": 0.9722,
      "step": 3341
    },
    {
      "epoch": 0.5092959463578177,
      "grad_norm": 0.75390625,
      "learning_rate": 0.00010185079337044274,
      "loss": 1.0491,
      "step": 3342
    },
    {
      "epoch": 0.5094483389210607,
      "grad_norm": 1.296875,
      "learning_rate": 0.00010180144429325533,
      "loss": 1.0303,
      "step": 3343
    },
    {
      "epoch": 0.5096007314843035,
      "grad_norm": 0.85546875,
      "learning_rate": 0.00010175209477721019,
      "loss": 0.9432,
      "step": 3344
    },
    {
      "epoch": 0.5097531240475465,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001017027448343296,
      "loss": 0.8959,
      "step": 3345
    },
    {
      "epoch": 0.5099055166107894,
      "grad_norm": 0.7109375,
      "learning_rate": 0.00010165339447663587,
      "loss": 0.8993,
      "step": 3346
    },
    {
      "epoch": 0.5100579091740323,
      "grad_norm": 1.0,
      "learning_rate": 0.00010160404371615144,
      "loss": 0.8917,
      "step": 3347
    },
    {
      "epoch": 0.5102103017372752,
      "grad_norm": 1.4140625,
      "learning_rate": 0.00010155469256489891,
      "loss": 1.0146,
      "step": 3348
    },
    {
      "epoch": 0.5103626943005182,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00010150534103490095,
      "loss": 1.0802,
      "step": 3349
    },
    {
      "epoch": 0.510515086863761,
      "grad_norm": 1.015625,
      "learning_rate": 0.0001014559891381802,
      "loss": 0.874,
      "step": 3350
    },
    {
      "epoch": 0.510667479427004,
      "grad_norm": 0.94921875,
      "learning_rate": 0.0001014066368867596,
      "loss": 1.0443,
      "step": 3351
    },
    {
      "epoch": 0.5108198719902469,
      "grad_norm": 0.98046875,
      "learning_rate": 0.00010135728429266203,
      "loss": 0.9455,
      "step": 3352
    },
    {
      "epoch": 0.5109722645534898,
      "grad_norm": 0.9375,
      "learning_rate": 0.0001013079313679105,
      "loss": 0.9716,
      "step": 3353
    },
    {
      "epoch": 0.5111246571167327,
      "grad_norm": 0.96875,
      "learning_rate": 0.00010125857812452804,
      "loss": 1.0899,
      "step": 3354
    },
    {
      "epoch": 0.5112770496799757,
      "grad_norm": 1.0,
      "learning_rate": 0.00010120922457453785,
      "loss": 0.9318,
      "step": 3355
    },
    {
      "epoch": 0.5114294422432185,
      "grad_norm": 1.0546875,
      "learning_rate": 0.00010115987072996323,
      "loss": 0.9946,
      "step": 3356
    },
    {
      "epoch": 0.5115818348064615,
      "grad_norm": 0.859375,
      "learning_rate": 0.00010111051660282736,
      "loss": 0.7756,
      "step": 3357
    },
    {
      "epoch": 0.5117342273697043,
      "grad_norm": 0.71875,
      "learning_rate": 0.00010106116220515367,
      "loss": 0.8886,
      "step": 3358
    },
    {
      "epoch": 0.5118866199329473,
      "grad_norm": 0.9921875,
      "learning_rate": 0.00010101180754896564,
      "loss": 1.0331,
      "step": 3359
    },
    {
      "epoch": 0.5120390124961902,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00010096245264628672,
      "loss": 0.9745,
      "step": 3360
    },
    {
      "epoch": 0.5121914050594331,
      "grad_norm": 0.8515625,
      "learning_rate": 0.00010091309750914049,
      "loss": 0.8742,
      "step": 3361
    },
    {
      "epoch": 0.512343797622676,
      "grad_norm": 0.96875,
      "learning_rate": 0.00010086374214955061,
      "loss": 1.1317,
      "step": 3362
    },
    {
      "epoch": 0.512496190185919,
      "grad_norm": 0.8828125,
      "learning_rate": 0.0001008143865795407,
      "loss": 0.9828,
      "step": 3363
    },
    {
      "epoch": 0.5126485827491618,
      "grad_norm": 1.0,
      "learning_rate": 0.00010076503081113449,
      "loss": 0.99,
      "step": 3364
    },
    {
      "epoch": 0.5128009753124048,
      "grad_norm": 1.0859375,
      "learning_rate": 0.0001007156748563558,
      "loss": 0.9729,
      "step": 3365
    },
    {
      "epoch": 0.5129533678756477,
      "grad_norm": 0.87109375,
      "learning_rate": 0.00010066631872722842,
      "loss": 1.0012,
      "step": 3366
    },
    {
      "epoch": 0.5131057604388906,
      "grad_norm": 0.8203125,
      "learning_rate": 0.00010061696243577623,
      "loss": 0.9849,
      "step": 3367
    },
    {
      "epoch": 0.5132581530021335,
      "grad_norm": 1.078125,
      "learning_rate": 0.00010056760599402308,
      "loss": 0.9587,
      "step": 3368
    },
    {
      "epoch": 0.5134105455653765,
      "grad_norm": 0.71484375,
      "learning_rate": 0.00010051824941399297,
      "loss": 0.8969,
      "step": 3369
    },
    {
      "epoch": 0.5135629381286193,
      "grad_norm": 1.296875,
      "learning_rate": 0.00010046889270770987,
      "loss": 1.0334,
      "step": 3370
    },
    {
      "epoch": 0.5137153306918623,
      "grad_norm": 1.15625,
      "learning_rate": 0.00010041953588719772,
      "loss": 1.0097,
      "step": 3371
    },
    {
      "epoch": 0.5138677232551051,
      "grad_norm": 1.1640625,
      "learning_rate": 0.00010037017896448062,
      "loss": 1.0317,
      "step": 3372
    },
    {
      "epoch": 0.5140201158183481,
      "grad_norm": 0.98046875,
      "learning_rate": 0.0001003208219515826,
      "loss": 0.8734,
      "step": 3373
    },
    {
      "epoch": 0.514172508381591,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00010027146486052775,
      "loss": 1.0323,
      "step": 3374
    },
    {
      "epoch": 0.5143249009448339,
      "grad_norm": 0.86328125,
      "learning_rate": 0.00010022210770334013,
      "loss": 1.2279,
      "step": 3375
    },
    {
      "epoch": 0.5144772935080768,
      "grad_norm": 0.94921875,
      "learning_rate": 0.00010017275049204389,
      "loss": 1.0011,
      "step": 3376
    },
    {
      "epoch": 0.5146296860713198,
      "grad_norm": 1.171875,
      "learning_rate": 0.00010012339323866315,
      "loss": 0.9495,
      "step": 3377
    },
    {
      "epoch": 0.5147820786345626,
      "grad_norm": 1.109375,
      "learning_rate": 0.00010007403595522204,
      "loss": 0.9149,
      "step": 3378
    },
    {
      "epoch": 0.5149344711978056,
      "grad_norm": 1.1953125,
      "learning_rate": 0.00010002467865374472,
      "loss": 0.9464,
      "step": 3379
    },
    {
      "epoch": 0.5150868637610485,
      "grad_norm": 0.98828125,
      "learning_rate": 9.997532134625531e-05,
      "loss": 0.9079,
      "step": 3380
    },
    {
      "epoch": 0.5152392563242914,
      "grad_norm": 0.95703125,
      "learning_rate": 9.992596404477797e-05,
      "loss": 0.8251,
      "step": 3381
    },
    {
      "epoch": 0.5153916488875343,
      "grad_norm": 0.81640625,
      "learning_rate": 9.987660676133687e-05,
      "loss": 1.0259,
      "step": 3382
    },
    {
      "epoch": 0.5155440414507773,
      "grad_norm": 1.0703125,
      "learning_rate": 9.982724950795613e-05,
      "loss": 1.1618,
      "step": 3383
    },
    {
      "epoch": 0.5156964340140201,
      "grad_norm": 0.91015625,
      "learning_rate": 9.977789229665988e-05,
      "loss": 0.883,
      "step": 3384
    },
    {
      "epoch": 0.515848826577263,
      "grad_norm": 1.140625,
      "learning_rate": 9.972853513947228e-05,
      "loss": 0.9713,
      "step": 3385
    },
    {
      "epoch": 0.5160012191405059,
      "grad_norm": 1.0703125,
      "learning_rate": 9.967917804841742e-05,
      "loss": 0.9567,
      "step": 3386
    },
    {
      "epoch": 0.5161536117037488,
      "grad_norm": 1.2421875,
      "learning_rate": 9.962982103551939e-05,
      "loss": 1.0192,
      "step": 3387
    },
    {
      "epoch": 0.5163060042669918,
      "grad_norm": 0.96484375,
      "learning_rate": 9.95804641128023e-05,
      "loss": 0.9767,
      "step": 3388
    },
    {
      "epoch": 0.5164583968302346,
      "grad_norm": 1.1953125,
      "learning_rate": 9.953110729229017e-05,
      "loss": 0.9294,
      "step": 3389
    },
    {
      "epoch": 0.5166107893934776,
      "grad_norm": 1.0,
      "learning_rate": 9.948175058600704e-05,
      "loss": 0.96,
      "step": 3390
    },
    {
      "epoch": 0.5167631819567206,
      "grad_norm": 0.9921875,
      "learning_rate": 9.943239400597694e-05,
      "loss": 0.7016,
      "step": 3391
    },
    {
      "epoch": 0.5169155745199634,
      "grad_norm": 1.0546875,
      "learning_rate": 9.938303756422382e-05,
      "loss": 0.9751,
      "step": 3392
    },
    {
      "epoch": 0.5170679670832063,
      "grad_norm": 0.828125,
      "learning_rate": 9.93336812727716e-05,
      "loss": 0.9379,
      "step": 3393
    },
    {
      "epoch": 0.5172203596464493,
      "grad_norm": 1.09375,
      "learning_rate": 9.928432514364422e-05,
      "loss": 1.021,
      "step": 3394
    },
    {
      "epoch": 0.5173727522096921,
      "grad_norm": 0.92578125,
      "learning_rate": 9.923496918886552e-05,
      "loss": 1.1649,
      "step": 3395
    },
    {
      "epoch": 0.5175251447729351,
      "grad_norm": 1.0546875,
      "learning_rate": 9.918561342045932e-05,
      "loss": 0.9033,
      "step": 3396
    },
    {
      "epoch": 0.5176775373361779,
      "grad_norm": 1.765625,
      "learning_rate": 9.913625785044941e-05,
      "loss": 1.0168,
      "step": 3397
    },
    {
      "epoch": 0.5178299298994209,
      "grad_norm": 0.8671875,
      "learning_rate": 9.90869024908595e-05,
      "loss": 1.112,
      "step": 3398
    },
    {
      "epoch": 0.5179823224626638,
      "grad_norm": 0.91015625,
      "learning_rate": 9.90375473537133e-05,
      "loss": 0.9342,
      "step": 3399
    },
    {
      "epoch": 0.5181347150259067,
      "grad_norm": 0.8828125,
      "learning_rate": 9.898819245103439e-05,
      "loss": 0.8916,
      "step": 3400
    },
    {
      "epoch": 0.5182871075891496,
      "grad_norm": 0.8203125,
      "learning_rate": 9.893883779484634e-05,
      "loss": 0.9249,
      "step": 3401
    },
    {
      "epoch": 0.5184395001523926,
      "grad_norm": 1.03125,
      "learning_rate": 9.888948339717268e-05,
      "loss": 0.8677,
      "step": 3402
    },
    {
      "epoch": 0.5185918927156354,
      "grad_norm": 0.9609375,
      "learning_rate": 9.884012927003684e-05,
      "loss": 0.9504,
      "step": 3403
    },
    {
      "epoch": 0.5187442852788784,
      "grad_norm": 0.89453125,
      "learning_rate": 9.879077542546214e-05,
      "loss": 1.0343,
      "step": 3404
    },
    {
      "epoch": 0.5188966778421213,
      "grad_norm": 0.85546875,
      "learning_rate": 9.874142187547198e-05,
      "loss": 1.1939,
      "step": 3405
    },
    {
      "epoch": 0.5190490704053642,
      "grad_norm": 0.83203125,
      "learning_rate": 9.869206863208955e-05,
      "loss": 0.8102,
      "step": 3406
    },
    {
      "epoch": 0.5192014629686071,
      "grad_norm": 1.6640625,
      "learning_rate": 9.864271570733798e-05,
      "loss": 1.011,
      "step": 3407
    },
    {
      "epoch": 0.5193538555318501,
      "grad_norm": 1.03125,
      "learning_rate": 9.859336311324041e-05,
      "loss": 0.8837,
      "step": 3408
    },
    {
      "epoch": 0.5195062480950929,
      "grad_norm": 0.7421875,
      "learning_rate": 9.854401086181981e-05,
      "loss": 0.9243,
      "step": 3409
    },
    {
      "epoch": 0.5196586406583359,
      "grad_norm": 0.9453125,
      "learning_rate": 9.849465896509909e-05,
      "loss": 1.0596,
      "step": 3410
    },
    {
      "epoch": 0.5198110332215787,
      "grad_norm": 1.1484375,
      "learning_rate": 9.84453074351011e-05,
      "loss": 1.0037,
      "step": 3411
    },
    {
      "epoch": 0.5199634257848217,
      "grad_norm": 0.99609375,
      "learning_rate": 9.839595628384858e-05,
      "loss": 0.9986,
      "step": 3412
    },
    {
      "epoch": 0.5201158183480646,
      "grad_norm": 1.1953125,
      "learning_rate": 9.834660552336415e-05,
      "loss": 0.867,
      "step": 3413
    },
    {
      "epoch": 0.5202682109113075,
      "grad_norm": 0.90234375,
      "learning_rate": 9.829725516567044e-05,
      "loss": 0.8309,
      "step": 3414
    },
    {
      "epoch": 0.5204206034745504,
      "grad_norm": 1.359375,
      "learning_rate": 9.82479052227898e-05,
      "loss": 0.9473,
      "step": 3415
    },
    {
      "epoch": 0.5205729960377934,
      "grad_norm": 0.8125,
      "learning_rate": 9.819855570674468e-05,
      "loss": 0.9537,
      "step": 3416
    },
    {
      "epoch": 0.5207253886010362,
      "grad_norm": 1.0078125,
      "learning_rate": 9.81492066295573e-05,
      "loss": 0.8887,
      "step": 3417
    },
    {
      "epoch": 0.5208777811642792,
      "grad_norm": 1.0078125,
      "learning_rate": 9.809985800324977e-05,
      "loss": 1.0111,
      "step": 3418
    },
    {
      "epoch": 0.5210301737275221,
      "grad_norm": 0.953125,
      "learning_rate": 9.805050983984417e-05,
      "loss": 0.931,
      "step": 3419
    },
    {
      "epoch": 0.521182566290765,
      "grad_norm": 0.9609375,
      "learning_rate": 9.800116215136242e-05,
      "loss": 0.8909,
      "step": 3420
    },
    {
      "epoch": 0.5213349588540079,
      "grad_norm": 1.125,
      "learning_rate": 9.795181494982629e-05,
      "loss": 0.8777,
      "step": 3421
    },
    {
      "epoch": 0.5214873514172509,
      "grad_norm": 1.1640625,
      "learning_rate": 9.790246824725752e-05,
      "loss": 1.1093,
      "step": 3422
    },
    {
      "epoch": 0.5216397439804937,
      "grad_norm": 0.875,
      "learning_rate": 9.785312205567763e-05,
      "loss": 0.9759,
      "step": 3423
    },
    {
      "epoch": 0.5217921365437367,
      "grad_norm": 0.8046875,
      "learning_rate": 9.780377638710807e-05,
      "loss": 1.084,
      "step": 3424
    },
    {
      "epoch": 0.5219445291069795,
      "grad_norm": 0.9921875,
      "learning_rate": 9.77544312535702e-05,
      "loss": 1.034,
      "step": 3425
    },
    {
      "epoch": 0.5220969216702225,
      "grad_norm": 1.3515625,
      "learning_rate": 9.770508666708514e-05,
      "loss": 0.8289,
      "step": 3426
    },
    {
      "epoch": 0.5222493142334654,
      "grad_norm": 0.74609375,
      "learning_rate": 9.765574263967396e-05,
      "loss": 0.8395,
      "step": 3427
    },
    {
      "epoch": 0.5224017067967083,
      "grad_norm": 0.8203125,
      "learning_rate": 9.76063991833576e-05,
      "loss": 0.9499,
      "step": 3428
    },
    {
      "epoch": 0.5225540993599512,
      "grad_norm": 1.2109375,
      "learning_rate": 9.755705631015682e-05,
      "loss": 1.0992,
      "step": 3429
    },
    {
      "epoch": 0.5227064919231942,
      "grad_norm": 1.2109375,
      "learning_rate": 9.750771403209224e-05,
      "loss": 0.8409,
      "step": 3430
    },
    {
      "epoch": 0.522858884486437,
      "grad_norm": 1.2578125,
      "learning_rate": 9.74583723611844e-05,
      "loss": 1.0776,
      "step": 3431
    },
    {
      "epoch": 0.52301127704968,
      "grad_norm": 1.015625,
      "learning_rate": 9.740903130945355e-05,
      "loss": 0.9161,
      "step": 3432
    },
    {
      "epoch": 0.5231636696129229,
      "grad_norm": 0.9296875,
      "learning_rate": 9.735969088891997e-05,
      "loss": 1.0436,
      "step": 3433
    },
    {
      "epoch": 0.5233160621761658,
      "grad_norm": 0.81640625,
      "learning_rate": 9.731035111160365e-05,
      "loss": 0.8222,
      "step": 3434
    },
    {
      "epoch": 0.5234684547394087,
      "grad_norm": 0.94921875,
      "learning_rate": 9.726101198952445e-05,
      "loss": 0.9617,
      "step": 3435
    },
    {
      "epoch": 0.5236208473026517,
      "grad_norm": 0.9921875,
      "learning_rate": 9.721167353470215e-05,
      "loss": 1.1295,
      "step": 3436
    },
    {
      "epoch": 0.5237732398658945,
      "grad_norm": 0.89453125,
      "learning_rate": 9.716233575915627e-05,
      "loss": 0.9471,
      "step": 3437
    },
    {
      "epoch": 0.5239256324291375,
      "grad_norm": 0.92578125,
      "learning_rate": 9.711299867490617e-05,
      "loss": 0.9561,
      "step": 3438
    },
    {
      "epoch": 0.5240780249923803,
      "grad_norm": 1.0078125,
      "learning_rate": 9.706366229397116e-05,
      "loss": 0.9454,
      "step": 3439
    },
    {
      "epoch": 0.5242304175556233,
      "grad_norm": 0.828125,
      "learning_rate": 9.701432662837023e-05,
      "loss": 0.8372,
      "step": 3440
    },
    {
      "epoch": 0.5243828101188662,
      "grad_norm": 0.9609375,
      "learning_rate": 9.696499169012223e-05,
      "loss": 1.3425,
      "step": 3441
    },
    {
      "epoch": 0.5245352026821091,
      "grad_norm": 1.015625,
      "learning_rate": 9.691565749124593e-05,
      "loss": 1.0062,
      "step": 3442
    },
    {
      "epoch": 0.524687595245352,
      "grad_norm": 1.0546875,
      "learning_rate": 9.686632404375982e-05,
      "loss": 1.0638,
      "step": 3443
    },
    {
      "epoch": 0.524839987808595,
      "grad_norm": 0.671875,
      "learning_rate": 9.681699135968221e-05,
      "loss": 0.9531,
      "step": 3444
    },
    {
      "epoch": 0.5249923803718378,
      "grad_norm": 0.90234375,
      "learning_rate": 9.67676594510313e-05,
      "loss": 1.0453,
      "step": 3445
    },
    {
      "epoch": 0.5251447729350808,
      "grad_norm": 1.1875,
      "learning_rate": 9.671832832982502e-05,
      "loss": 0.9877,
      "step": 3446
    },
    {
      "epoch": 0.5252971654983237,
      "grad_norm": 1.109375,
      "learning_rate": 9.666899800808116e-05,
      "loss": 0.997,
      "step": 3447
    },
    {
      "epoch": 0.5254495580615666,
      "grad_norm": 1.640625,
      "learning_rate": 9.661966849781729e-05,
      "loss": 1.0429,
      "step": 3448
    },
    {
      "epoch": 0.5256019506248095,
      "grad_norm": 1.0703125,
      "learning_rate": 9.657033981105076e-05,
      "loss": 1.0936,
      "step": 3449
    },
    {
      "epoch": 0.5257543431880525,
      "grad_norm": 0.8671875,
      "learning_rate": 9.652101195979879e-05,
      "loss": 0.9633,
      "step": 3450
    },
    {
      "epoch": 0.5259067357512953,
      "grad_norm": 0.78515625,
      "learning_rate": 9.647168495607834e-05,
      "loss": 0.9806,
      "step": 3451
    },
    {
      "epoch": 0.5260591283145383,
      "grad_norm": 0.93359375,
      "learning_rate": 9.642235881190614e-05,
      "loss": 1.0209,
      "step": 3452
    },
    {
      "epoch": 0.5262115208777811,
      "grad_norm": 0.98828125,
      "learning_rate": 9.637303353929881e-05,
      "loss": 1.0172,
      "step": 3453
    },
    {
      "epoch": 0.5263639134410241,
      "grad_norm": 1.0625,
      "learning_rate": 9.632370915027268e-05,
      "loss": 1.0745,
      "step": 3454
    },
    {
      "epoch": 0.526516306004267,
      "grad_norm": 0.89453125,
      "learning_rate": 9.627438565684383e-05,
      "loss": 0.8943,
      "step": 3455
    },
    {
      "epoch": 0.5266686985675099,
      "grad_norm": 1.0546875,
      "learning_rate": 9.622506307102824e-05,
      "loss": 1.0366,
      "step": 3456
    },
    {
      "epoch": 0.5268210911307528,
      "grad_norm": 1.53125,
      "learning_rate": 9.617574140484157e-05,
      "loss": 1.0289,
      "step": 3457
    },
    {
      "epoch": 0.5269734836939958,
      "grad_norm": 1.0703125,
      "learning_rate": 9.612642067029926e-05,
      "loss": 1.0152,
      "step": 3458
    },
    {
      "epoch": 0.5271258762572386,
      "grad_norm": 0.8046875,
      "learning_rate": 9.607710087941659e-05,
      "loss": 0.9265,
      "step": 3459
    },
    {
      "epoch": 0.5272782688204816,
      "grad_norm": 1.2578125,
      "learning_rate": 9.602778204420859e-05,
      "loss": 1.087,
      "step": 3460
    },
    {
      "epoch": 0.5274306613837245,
      "grad_norm": 1.0078125,
      "learning_rate": 9.597846417668996e-05,
      "loss": 1.054,
      "step": 3461
    },
    {
      "epoch": 0.5275830539469674,
      "grad_norm": 0.96875,
      "learning_rate": 9.592914728887529e-05,
      "loss": 1.1532,
      "step": 3462
    },
    {
      "epoch": 0.5277354465102103,
      "grad_norm": 1.1328125,
      "learning_rate": 9.587983139277889e-05,
      "loss": 1.2201,
      "step": 3463
    },
    {
      "epoch": 0.5278878390734533,
      "grad_norm": 1.1640625,
      "learning_rate": 9.583051650041482e-05,
      "loss": 1.0497,
      "step": 3464
    },
    {
      "epoch": 0.5280402316366961,
      "grad_norm": 1.0546875,
      "learning_rate": 9.57812026237969e-05,
      "loss": 0.8747,
      "step": 3465
    },
    {
      "epoch": 0.5281926241999391,
      "grad_norm": 0.9921875,
      "learning_rate": 9.573188977493866e-05,
      "loss": 0.9965,
      "step": 3466
    },
    {
      "epoch": 0.5283450167631819,
      "grad_norm": 1.0625,
      "learning_rate": 9.568257796585348e-05,
      "loss": 1.1161,
      "step": 3467
    },
    {
      "epoch": 0.5284974093264249,
      "grad_norm": 0.80078125,
      "learning_rate": 9.563326720855437e-05,
      "loss": 1.0446,
      "step": 3468
    },
    {
      "epoch": 0.5286498018896678,
      "grad_norm": 0.92578125,
      "learning_rate": 9.558395751505415e-05,
      "loss": 0.915,
      "step": 3469
    },
    {
      "epoch": 0.5288021944529107,
      "grad_norm": 1.2265625,
      "learning_rate": 9.55346488973654e-05,
      "loss": 0.9382,
      "step": 3470
    },
    {
      "epoch": 0.5289545870161536,
      "grad_norm": 1.203125,
      "learning_rate": 9.548534136750041e-05,
      "loss": 1.0144,
      "step": 3471
    },
    {
      "epoch": 0.5291069795793966,
      "grad_norm": 0.82421875,
      "learning_rate": 9.543603493747116e-05,
      "loss": 1.0516,
      "step": 3472
    },
    {
      "epoch": 0.5292593721426394,
      "grad_norm": 0.9296875,
      "learning_rate": 9.538672961928944e-05,
      "loss": 0.792,
      "step": 3473
    },
    {
      "epoch": 0.5294117647058824,
      "grad_norm": 1.0078125,
      "learning_rate": 9.533742542496672e-05,
      "loss": 0.9687,
      "step": 3474
    },
    {
      "epoch": 0.5295641572691253,
      "grad_norm": 0.7578125,
      "learning_rate": 9.52881223665142e-05,
      "loss": 0.9451,
      "step": 3475
    },
    {
      "epoch": 0.5297165498323682,
      "grad_norm": 1.1484375,
      "learning_rate": 9.523882045594286e-05,
      "loss": 1.0558,
      "step": 3476
    },
    {
      "epoch": 0.5298689423956111,
      "grad_norm": 0.88671875,
      "learning_rate": 9.518951970526332e-05,
      "loss": 1.1983,
      "step": 3477
    },
    {
      "epoch": 0.5300213349588541,
      "grad_norm": 0.79296875,
      "learning_rate": 9.514022012648594e-05,
      "loss": 0.9019,
      "step": 3478
    },
    {
      "epoch": 0.5301737275220969,
      "grad_norm": 0.81640625,
      "learning_rate": 9.509092173162082e-05,
      "loss": 0.8953,
      "step": 3479
    },
    {
      "epoch": 0.5303261200853399,
      "grad_norm": 0.86328125,
      "learning_rate": 9.504162453267777e-05,
      "loss": 0.8866,
      "step": 3480
    },
    {
      "epoch": 0.5304785126485827,
      "grad_norm": 0.90234375,
      "learning_rate": 9.499232854166629e-05,
      "loss": 1.0328,
      "step": 3481
    },
    {
      "epoch": 0.5306309052118257,
      "grad_norm": 0.859375,
      "learning_rate": 9.494303377059559e-05,
      "loss": 1.092,
      "step": 3482
    },
    {
      "epoch": 0.5307832977750686,
      "grad_norm": 0.8828125,
      "learning_rate": 9.489374023147457e-05,
      "loss": 0.9689,
      "step": 3483
    },
    {
      "epoch": 0.5309356903383115,
      "grad_norm": 0.90625,
      "learning_rate": 9.484444793631186e-05,
      "loss": 0.935,
      "step": 3484
    },
    {
      "epoch": 0.5310880829015544,
      "grad_norm": 0.90234375,
      "learning_rate": 9.47951568971158e-05,
      "loss": 0.7909,
      "step": 3485
    },
    {
      "epoch": 0.5312404754647974,
      "grad_norm": 0.8515625,
      "learning_rate": 9.474586712589432e-05,
      "loss": 0.9149,
      "step": 3486
    },
    {
      "epoch": 0.5313928680280402,
      "grad_norm": 0.8203125,
      "learning_rate": 9.46965786346552e-05,
      "loss": 0.9421,
      "step": 3487
    },
    {
      "epoch": 0.5315452605912832,
      "grad_norm": 0.93359375,
      "learning_rate": 9.464729143540579e-05,
      "loss": 1.2216,
      "step": 3488
    },
    {
      "epoch": 0.5316976531545261,
      "grad_norm": 1.0078125,
      "learning_rate": 9.459800554015314e-05,
      "loss": 1.0028,
      "step": 3489
    },
    {
      "epoch": 0.531850045717769,
      "grad_norm": 0.765625,
      "learning_rate": 9.454872096090403e-05,
      "loss": 0.9555,
      "step": 3490
    },
    {
      "epoch": 0.5320024382810119,
      "grad_norm": 0.94921875,
      "learning_rate": 9.449943770966489e-05,
      "loss": 1.0035,
      "step": 3491
    },
    {
      "epoch": 0.5321548308442549,
      "grad_norm": 1.0234375,
      "learning_rate": 9.445015579844181e-05,
      "loss": 1.028,
      "step": 3492
    },
    {
      "epoch": 0.5323072234074977,
      "grad_norm": 1.09375,
      "learning_rate": 9.44008752392406e-05,
      "loss": 1.1241,
      "step": 3493
    },
    {
      "epoch": 0.5324596159707407,
      "grad_norm": 1.03125,
      "learning_rate": 9.435159604406667e-05,
      "loss": 1.0541,
      "step": 3494
    },
    {
      "epoch": 0.5326120085339835,
      "grad_norm": 0.88671875,
      "learning_rate": 9.43023182249252e-05,
      "loss": 0.9814,
      "step": 3495
    },
    {
      "epoch": 0.5327644010972264,
      "grad_norm": 1.09375,
      "learning_rate": 9.425304179382095e-05,
      "loss": 1.0857,
      "step": 3496
    },
    {
      "epoch": 0.5329167936604694,
      "grad_norm": 1.0234375,
      "learning_rate": 9.420376676275833e-05,
      "loss": 1.1025,
      "step": 3497
    },
    {
      "epoch": 0.5330691862237122,
      "grad_norm": 0.78515625,
      "learning_rate": 9.415449314374151e-05,
      "loss": 0.9733,
      "step": 3498
    },
    {
      "epoch": 0.5332215787869552,
      "grad_norm": 0.98046875,
      "learning_rate": 9.41052209487742e-05,
      "loss": 0.9845,
      "step": 3499
    },
    {
      "epoch": 0.5333739713501982,
      "grad_norm": 0.890625,
      "learning_rate": 9.405595018985984e-05,
      "loss": 0.8783,
      "step": 3500
    },
    {
      "epoch": 0.533526363913441,
      "grad_norm": 1.1171875,
      "learning_rate": 9.40066808790015e-05,
      "loss": 0.8525,
      "step": 3501
    },
    {
      "epoch": 0.533678756476684,
      "grad_norm": 1.0390625,
      "learning_rate": 9.395741302820189e-05,
      "loss": 1.0382,
      "step": 3502
    },
    {
      "epoch": 0.5338311490399269,
      "grad_norm": 0.9765625,
      "learning_rate": 9.390814664946331e-05,
      "loss": 1.2717,
      "step": 3503
    },
    {
      "epoch": 0.5339835416031697,
      "grad_norm": 1.140625,
      "learning_rate": 9.385888175478786e-05,
      "loss": 0.8601,
      "step": 3504
    },
    {
      "epoch": 0.5341359341664127,
      "grad_norm": 0.91796875,
      "learning_rate": 9.380961835617712e-05,
      "loss": 0.9574,
      "step": 3505
    },
    {
      "epoch": 0.5342883267296555,
      "grad_norm": 0.953125,
      "learning_rate": 9.376035646563233e-05,
      "loss": 0.9506,
      "step": 3506
    },
    {
      "epoch": 0.5344407192928985,
      "grad_norm": 0.80078125,
      "learning_rate": 9.371109609515448e-05,
      "loss": 1.0108,
      "step": 3507
    },
    {
      "epoch": 0.5345931118561414,
      "grad_norm": 0.765625,
      "learning_rate": 9.366183725674406e-05,
      "loss": 0.8327,
      "step": 3508
    },
    {
      "epoch": 0.5347455044193843,
      "grad_norm": 1.046875,
      "learning_rate": 9.361257996240118e-05,
      "loss": 1.0295,
      "step": 3509
    },
    {
      "epoch": 0.5348978969826272,
      "grad_norm": 0.86328125,
      "learning_rate": 9.356332422412571e-05,
      "loss": 0.9341,
      "step": 3510
    },
    {
      "epoch": 0.5350502895458702,
      "grad_norm": 0.90625,
      "learning_rate": 9.351407005391699e-05,
      "loss": 1.0899,
      "step": 3511
    },
    {
      "epoch": 0.535202682109113,
      "grad_norm": 0.734375,
      "learning_rate": 9.34648174637741e-05,
      "loss": 0.9676,
      "step": 3512
    },
    {
      "epoch": 0.535355074672356,
      "grad_norm": 0.7578125,
      "learning_rate": 9.341556646569565e-05,
      "loss": 0.8109,
      "step": 3513
    },
    {
      "epoch": 0.5355074672355989,
      "grad_norm": 0.77734375,
      "learning_rate": 9.336631707167989e-05,
      "loss": 0.9586,
      "step": 3514
    },
    {
      "epoch": 0.5356598597988418,
      "grad_norm": 1.0,
      "learning_rate": 9.331706929372469e-05,
      "loss": 0.9568,
      "step": 3515
    },
    {
      "epoch": 0.5358122523620847,
      "grad_norm": 0.78125,
      "learning_rate": 9.326782314382751e-05,
      "loss": 0.8643,
      "step": 3516
    },
    {
      "epoch": 0.5359646449253277,
      "grad_norm": 0.890625,
      "learning_rate": 9.321857863398543e-05,
      "loss": 0.9309,
      "step": 3517
    },
    {
      "epoch": 0.5361170374885705,
      "grad_norm": 1.0,
      "learning_rate": 9.31693357761951e-05,
      "loss": 0.9479,
      "step": 3518
    },
    {
      "epoch": 0.5362694300518135,
      "grad_norm": 0.88671875,
      "learning_rate": 9.312009458245282e-05,
      "loss": 0.977,
      "step": 3519
    },
    {
      "epoch": 0.5364218226150563,
      "grad_norm": 0.828125,
      "learning_rate": 9.307085506475441e-05,
      "loss": 0.898,
      "step": 3520
    },
    {
      "epoch": 0.5365742151782993,
      "grad_norm": 0.92578125,
      "learning_rate": 9.302161723509537e-05,
      "loss": 1.0785,
      "step": 3521
    },
    {
      "epoch": 0.5367266077415422,
      "grad_norm": 0.9453125,
      "learning_rate": 9.297238110547074e-05,
      "loss": 0.9921,
      "step": 3522
    },
    {
      "epoch": 0.5368790003047851,
      "grad_norm": 1.1328125,
      "learning_rate": 9.292314668787507e-05,
      "loss": 1.0378,
      "step": 3523
    },
    {
      "epoch": 0.537031392868028,
      "grad_norm": 1.1640625,
      "learning_rate": 9.287391399430268e-05,
      "loss": 1.0437,
      "step": 3524
    },
    {
      "epoch": 0.537183785431271,
      "grad_norm": 0.97265625,
      "learning_rate": 9.282468303674732e-05,
      "loss": 1.1769,
      "step": 3525
    },
    {
      "epoch": 0.5373361779945138,
      "grad_norm": 0.953125,
      "learning_rate": 9.277545382720232e-05,
      "loss": 1.0163,
      "step": 3526
    },
    {
      "epoch": 0.5374885705577568,
      "grad_norm": 0.83203125,
      "learning_rate": 9.272622637766068e-05,
      "loss": 0.7927,
      "step": 3527
    },
    {
      "epoch": 0.5376409631209997,
      "grad_norm": 1.109375,
      "learning_rate": 9.267700070011489e-05,
      "loss": 0.9001,
      "step": 3528
    },
    {
      "epoch": 0.5377933556842426,
      "grad_norm": 0.828125,
      "learning_rate": 9.262777680655703e-05,
      "loss": 0.8935,
      "step": 3529
    },
    {
      "epoch": 0.5379457482474855,
      "grad_norm": 0.85546875,
      "learning_rate": 9.257855470897878e-05,
      "loss": 0.9775,
      "step": 3530
    },
    {
      "epoch": 0.5380981408107285,
      "grad_norm": 1.1171875,
      "learning_rate": 9.25293344193713e-05,
      "loss": 1.0671,
      "step": 3531
    },
    {
      "epoch": 0.5382505333739713,
      "grad_norm": 1.0703125,
      "learning_rate": 9.248011594972539e-05,
      "loss": 1.0012,
      "step": 3532
    },
    {
      "epoch": 0.5384029259372143,
      "grad_norm": 1.0390625,
      "learning_rate": 9.24308993120314e-05,
      "loss": 0.9687,
      "step": 3533
    },
    {
      "epoch": 0.5385553185004571,
      "grad_norm": 1.203125,
      "learning_rate": 9.238168451827913e-05,
      "loss": 0.9862,
      "step": 3534
    },
    {
      "epoch": 0.5387077110637001,
      "grad_norm": 0.9453125,
      "learning_rate": 9.23324715804581e-05,
      "loss": 1.1459,
      "step": 3535
    },
    {
      "epoch": 0.538860103626943,
      "grad_norm": 1.0625,
      "learning_rate": 9.228326051055728e-05,
      "loss": 1.1119,
      "step": 3536
    },
    {
      "epoch": 0.5390124961901859,
      "grad_norm": 1.1640625,
      "learning_rate": 9.22340513205651e-05,
      "loss": 0.9702,
      "step": 3537
    },
    {
      "epoch": 0.5391648887534288,
      "grad_norm": 1.046875,
      "learning_rate": 9.218484402246974e-05,
      "loss": 1.0568,
      "step": 3538
    },
    {
      "epoch": 0.5393172813166718,
      "grad_norm": 0.83203125,
      "learning_rate": 9.213563862825878e-05,
      "loss": 0.9456,
      "step": 3539
    },
    {
      "epoch": 0.5394696738799146,
      "grad_norm": 0.82421875,
      "learning_rate": 9.208643514991928e-05,
      "loss": 1.0485,
      "step": 3540
    },
    {
      "epoch": 0.5396220664431576,
      "grad_norm": 0.8515625,
      "learning_rate": 9.203723359943802e-05,
      "loss": 1.0305,
      "step": 3541
    },
    {
      "epoch": 0.5397744590064005,
      "grad_norm": 1.109375,
      "learning_rate": 9.198803398880118e-05,
      "loss": 0.9674,
      "step": 3542
    },
    {
      "epoch": 0.5399268515696434,
      "grad_norm": 0.859375,
      "learning_rate": 9.193883632999441e-05,
      "loss": 0.9437,
      "step": 3543
    },
    {
      "epoch": 0.5400792441328863,
      "grad_norm": 0.9453125,
      "learning_rate": 9.188964063500307e-05,
      "loss": 0.9365,
      "step": 3544
    },
    {
      "epoch": 0.5402316366961293,
      "grad_norm": 0.97265625,
      "learning_rate": 9.184044691581188e-05,
      "loss": 0.9702,
      "step": 3545
    },
    {
      "epoch": 0.5403840292593721,
      "grad_norm": 0.77734375,
      "learning_rate": 9.179125518440518e-05,
      "loss": 0.9079,
      "step": 3546
    },
    {
      "epoch": 0.5405364218226151,
      "grad_norm": 1.3671875,
      "learning_rate": 9.174206545276677e-05,
      "loss": 1.0101,
      "step": 3547
    },
    {
      "epoch": 0.5406888143858579,
      "grad_norm": 1.15625,
      "learning_rate": 9.169287773287995e-05,
      "loss": 0.9957,
      "step": 3548
    },
    {
      "epoch": 0.5408412069491009,
      "grad_norm": 1.0234375,
      "learning_rate": 9.164369203672758e-05,
      "loss": 1.0304,
      "step": 3549
    },
    {
      "epoch": 0.5409935995123438,
      "grad_norm": 0.84765625,
      "learning_rate": 9.159450837629202e-05,
      "loss": 0.8194,
      "step": 3550
    },
    {
      "epoch": 0.5411459920755867,
      "grad_norm": 1.1328125,
      "learning_rate": 9.154532676355502e-05,
      "loss": 0.8751,
      "step": 3551
    },
    {
      "epoch": 0.5412983846388296,
      "grad_norm": 0.94140625,
      "learning_rate": 9.149614721049806e-05,
      "loss": 0.8661,
      "step": 3552
    },
    {
      "epoch": 0.5414507772020726,
      "grad_norm": 0.97265625,
      "learning_rate": 9.144696972910196e-05,
      "loss": 1.0002,
      "step": 3553
    },
    {
      "epoch": 0.5416031697653154,
      "grad_norm": 0.84765625,
      "learning_rate": 9.139779433134693e-05,
      "loss": 0.8512,
      "step": 3554
    },
    {
      "epoch": 0.5417555623285584,
      "grad_norm": 1.1015625,
      "learning_rate": 9.134862102921297e-05,
      "loss": 1.0318,
      "step": 3555
    },
    {
      "epoch": 0.5419079548918013,
      "grad_norm": 1.09375,
      "learning_rate": 9.129944983467935e-05,
      "loss": 0.9455,
      "step": 3556
    },
    {
      "epoch": 0.5420603474550442,
      "grad_norm": 0.82421875,
      "learning_rate": 9.125028075972479e-05,
      "loss": 0.8797,
      "step": 3557
    },
    {
      "epoch": 0.5422127400182871,
      "grad_norm": 1.0234375,
      "learning_rate": 9.120111381632772e-05,
      "loss": 1.102,
      "step": 3558
    },
    {
      "epoch": 0.5423651325815301,
      "grad_norm": 0.7421875,
      "learning_rate": 9.115194901646583e-05,
      "loss": 0.9539,
      "step": 3559
    },
    {
      "epoch": 0.5425175251447729,
      "grad_norm": 0.79296875,
      "learning_rate": 9.110278637211643e-05,
      "loss": 0.9955,
      "step": 3560
    },
    {
      "epoch": 0.5426699177080159,
      "grad_norm": 0.6953125,
      "learning_rate": 9.10536258952562e-05,
      "loss": 0.8557,
      "step": 3561
    },
    {
      "epoch": 0.5428223102712587,
      "grad_norm": 0.90234375,
      "learning_rate": 9.100446759786132e-05,
      "loss": 0.932,
      "step": 3562
    },
    {
      "epoch": 0.5429747028345017,
      "grad_norm": 1.734375,
      "learning_rate": 9.095531149190753e-05,
      "loss": 1.1118,
      "step": 3563
    },
    {
      "epoch": 0.5431270953977446,
      "grad_norm": 0.71875,
      "learning_rate": 9.090615758936994e-05,
      "loss": 0.813,
      "step": 3564
    },
    {
      "epoch": 0.5432794879609875,
      "grad_norm": 1.0234375,
      "learning_rate": 9.085700590222307e-05,
      "loss": 1.0631,
      "step": 3565
    },
    {
      "epoch": 0.5434318805242304,
      "grad_norm": 1.234375,
      "learning_rate": 9.080785644244108e-05,
      "loss": 1.0049,
      "step": 3566
    },
    {
      "epoch": 0.5435842730874734,
      "grad_norm": 1.40625,
      "learning_rate": 9.075870922199747e-05,
      "loss": 1.0863,
      "step": 3567
    },
    {
      "epoch": 0.5437366656507162,
      "grad_norm": 1.0625,
      "learning_rate": 9.070956425286514e-05,
      "loss": 1.0225,
      "step": 3568
    },
    {
      "epoch": 0.5438890582139592,
      "grad_norm": 1.1796875,
      "learning_rate": 9.066042154701658e-05,
      "loss": 0.8675,
      "step": 3569
    },
    {
      "epoch": 0.5440414507772021,
      "grad_norm": 1.125,
      "learning_rate": 9.061128111642365e-05,
      "loss": 1.136,
      "step": 3570
    },
    {
      "epoch": 0.544193843340445,
      "grad_norm": 1.15625,
      "learning_rate": 9.05621429730576e-05,
      "loss": 0.9438,
      "step": 3571
    },
    {
      "epoch": 0.5443462359036879,
      "grad_norm": 0.7421875,
      "learning_rate": 9.051300712888928e-05,
      "loss": 0.8696,
      "step": 3572
    },
    {
      "epoch": 0.5444986284669309,
      "grad_norm": 1.0703125,
      "learning_rate": 9.046387359588888e-05,
      "loss": 1.0455,
      "step": 3573
    },
    {
      "epoch": 0.5446510210301737,
      "grad_norm": 0.796875,
      "learning_rate": 9.041474238602594e-05,
      "loss": 0.7583,
      "step": 3574
    },
    {
      "epoch": 0.5448034135934167,
      "grad_norm": 1.0078125,
      "learning_rate": 9.036561351126963e-05,
      "loss": 1.0884,
      "step": 3575
    },
    {
      "epoch": 0.5449558061566595,
      "grad_norm": 0.74609375,
      "learning_rate": 9.031648698358838e-05,
      "loss": 0.9585,
      "step": 3576
    },
    {
      "epoch": 0.5451081987199025,
      "grad_norm": 0.796875,
      "learning_rate": 9.026736281495018e-05,
      "loss": 1.0085,
      "step": 3577
    },
    {
      "epoch": 0.5452605912831454,
      "grad_norm": 0.67578125,
      "learning_rate": 9.021824101732239e-05,
      "loss": 0.8025,
      "step": 3578
    },
    {
      "epoch": 0.5454129838463883,
      "grad_norm": 0.84765625,
      "learning_rate": 9.016912160267168e-05,
      "loss": 1.036,
      "step": 3579
    },
    {
      "epoch": 0.5455653764096312,
      "grad_norm": 0.734375,
      "learning_rate": 9.012000458296436e-05,
      "loss": 0.8768,
      "step": 3580
    },
    {
      "epoch": 0.5457177689728742,
      "grad_norm": 1.0078125,
      "learning_rate": 9.007088997016603e-05,
      "loss": 1.0552,
      "step": 3581
    },
    {
      "epoch": 0.545870161536117,
      "grad_norm": 1.0078125,
      "learning_rate": 9.002177777624162e-05,
      "loss": 1.033,
      "step": 3582
    },
    {
      "epoch": 0.54602255409936,
      "grad_norm": 0.95703125,
      "learning_rate": 8.997266801315565e-05,
      "loss": 0.9212,
      "step": 3583
    },
    {
      "epoch": 0.5461749466626029,
      "grad_norm": 0.94921875,
      "learning_rate": 8.9923560692872e-05,
      "loss": 0.987,
      "step": 3584
    },
    {
      "epoch": 0.5463273392258458,
      "grad_norm": 0.8671875,
      "learning_rate": 8.98744558273538e-05,
      "loss": 0.8618,
      "step": 3585
    },
    {
      "epoch": 0.5464797317890887,
      "grad_norm": 1.0703125,
      "learning_rate": 8.98253534285638e-05,
      "loss": 1.2136,
      "step": 3586
    },
    {
      "epoch": 0.5466321243523317,
      "grad_norm": 0.8046875,
      "learning_rate": 8.977625350846405e-05,
      "loss": 0.948,
      "step": 3587
    },
    {
      "epoch": 0.5467845169155745,
      "grad_norm": 0.875,
      "learning_rate": 8.97271560790159e-05,
      "loss": 0.9081,
      "step": 3588
    },
    {
      "epoch": 0.5469369094788175,
      "grad_norm": 1.046875,
      "learning_rate": 8.967806115218027e-05,
      "loss": 1.0182,
      "step": 3589
    },
    {
      "epoch": 0.5470893020420603,
      "grad_norm": 1.1640625,
      "learning_rate": 8.962896873991744e-05,
      "loss": 1.0715,
      "step": 3590
    },
    {
      "epoch": 0.5472416946053033,
      "grad_norm": 1.1328125,
      "learning_rate": 8.957987885418689e-05,
      "loss": 1.0823,
      "step": 3591
    },
    {
      "epoch": 0.5473940871685462,
      "grad_norm": 1.171875,
      "learning_rate": 8.953079150694774e-05,
      "loss": 1.1815,
      "step": 3592
    },
    {
      "epoch": 0.547546479731789,
      "grad_norm": 0.75390625,
      "learning_rate": 8.948170671015828e-05,
      "loss": 0.964,
      "step": 3593
    },
    {
      "epoch": 0.547698872295032,
      "grad_norm": 0.87109375,
      "learning_rate": 8.943262447577635e-05,
      "loss": 0.7737,
      "step": 3594
    },
    {
      "epoch": 0.547851264858275,
      "grad_norm": 1.125,
      "learning_rate": 8.938354481575908e-05,
      "loss": 0.9191,
      "step": 3595
    },
    {
      "epoch": 0.5480036574215178,
      "grad_norm": 0.8046875,
      "learning_rate": 8.93344677420629e-05,
      "loss": 0.9752,
      "step": 3596
    },
    {
      "epoch": 0.5481560499847608,
      "grad_norm": 0.87109375,
      "learning_rate": 8.928539326664382e-05,
      "loss": 1.0838,
      "step": 3597
    },
    {
      "epoch": 0.5483084425480037,
      "grad_norm": 0.97265625,
      "learning_rate": 8.923632140145701e-05,
      "loss": 1.0297,
      "step": 3598
    },
    {
      "epoch": 0.5484608351112465,
      "grad_norm": 0.96484375,
      "learning_rate": 8.918725215845704e-05,
      "loss": 0.8041,
      "step": 3599
    },
    {
      "epoch": 0.5486132276744895,
      "grad_norm": 1.0,
      "learning_rate": 8.913818554959797e-05,
      "loss": 0.9533,
      "step": 3600
    },
    {
      "epoch": 0.5487656202377323,
      "grad_norm": 1.0859375,
      "learning_rate": 8.908912158683312e-05,
      "loss": 1.0597,
      "step": 3601
    },
    {
      "epoch": 0.5489180128009753,
      "grad_norm": 0.88671875,
      "learning_rate": 8.904006028211509e-05,
      "loss": 0.9778,
      "step": 3602
    },
    {
      "epoch": 0.5490704053642183,
      "grad_norm": 0.78125,
      "learning_rate": 8.899100164739602e-05,
      "loss": 0.8106,
      "step": 3603
    },
    {
      "epoch": 0.5492227979274611,
      "grad_norm": 0.87890625,
      "learning_rate": 8.894194569462729e-05,
      "loss": 1.0601,
      "step": 3604
    },
    {
      "epoch": 0.549375190490704,
      "grad_norm": 0.8828125,
      "learning_rate": 8.889289243575953e-05,
      "loss": 1.0254,
      "step": 3605
    },
    {
      "epoch": 0.549527583053947,
      "grad_norm": 1.015625,
      "learning_rate": 8.884384188274298e-05,
      "loss": 0.9623,
      "step": 3606
    },
    {
      "epoch": 0.5496799756171898,
      "grad_norm": 1.0078125,
      "learning_rate": 8.879479404752694e-05,
      "loss": 0.9539,
      "step": 3607
    },
    {
      "epoch": 0.5498323681804328,
      "grad_norm": 0.83203125,
      "learning_rate": 8.874574894206018e-05,
      "loss": 0.8512,
      "step": 3608
    },
    {
      "epoch": 0.5499847607436757,
      "grad_norm": 0.8984375,
      "learning_rate": 8.869670657829085e-05,
      "loss": 1.0493,
      "step": 3609
    },
    {
      "epoch": 0.5501371533069186,
      "grad_norm": 1.03125,
      "learning_rate": 8.864766696816629e-05,
      "loss": 0.9844,
      "step": 3610
    },
    {
      "epoch": 0.5502895458701615,
      "grad_norm": 0.8515625,
      "learning_rate": 8.859863012363335e-05,
      "loss": 1.0083,
      "step": 3611
    },
    {
      "epoch": 0.5504419384334045,
      "grad_norm": 0.984375,
      "learning_rate": 8.85495960566381e-05,
      "loss": 1.023,
      "step": 3612
    },
    {
      "epoch": 0.5505943309966473,
      "grad_norm": 1.1484375,
      "learning_rate": 8.850056477912583e-05,
      "loss": 0.9976,
      "step": 3613
    },
    {
      "epoch": 0.5507467235598903,
      "grad_norm": 0.80859375,
      "learning_rate": 8.845153630304139e-05,
      "loss": 1.09,
      "step": 3614
    },
    {
      "epoch": 0.5508991161231331,
      "grad_norm": 0.81640625,
      "learning_rate": 8.840251064032881e-05,
      "loss": 0.9207,
      "step": 3615
    },
    {
      "epoch": 0.5510515086863761,
      "grad_norm": 1.0703125,
      "learning_rate": 8.835348780293133e-05,
      "loss": 1.2033,
      "step": 3616
    },
    {
      "epoch": 0.551203901249619,
      "grad_norm": 1.2421875,
      "learning_rate": 8.830446780279176e-05,
      "loss": 1.0792,
      "step": 3617
    },
    {
      "epoch": 0.5513562938128619,
      "grad_norm": 0.953125,
      "learning_rate": 8.825545065185203e-05,
      "loss": 1.0155,
      "step": 3618
    },
    {
      "epoch": 0.5515086863761048,
      "grad_norm": 0.90625,
      "learning_rate": 8.820643636205335e-05,
      "loss": 0.9608,
      "step": 3619
    },
    {
      "epoch": 0.5516610789393478,
      "grad_norm": 0.8984375,
      "learning_rate": 8.815742494533644e-05,
      "loss": 0.9284,
      "step": 3620
    },
    {
      "epoch": 0.5518134715025906,
      "grad_norm": 0.6875,
      "learning_rate": 8.810841641364107e-05,
      "loss": 0.7877,
      "step": 3621
    },
    {
      "epoch": 0.5519658640658336,
      "grad_norm": 0.83203125,
      "learning_rate": 8.805941077890643e-05,
      "loss": 0.7871,
      "step": 3622
    },
    {
      "epoch": 0.5521182566290765,
      "grad_norm": 0.9375,
      "learning_rate": 8.801040805307112e-05,
      "loss": 0.9616,
      "step": 3623
    },
    {
      "epoch": 0.5522706491923194,
      "grad_norm": 0.86328125,
      "learning_rate": 8.796140824807272e-05,
      "loss": 0.9579,
      "step": 3624
    },
    {
      "epoch": 0.5524230417555623,
      "grad_norm": 0.8828125,
      "learning_rate": 8.791241137584846e-05,
      "loss": 0.9142,
      "step": 3625
    },
    {
      "epoch": 0.5525754343188053,
      "grad_norm": 0.82421875,
      "learning_rate": 8.786341744833463e-05,
      "loss": 0.8446,
      "step": 3626
    },
    {
      "epoch": 0.5527278268820481,
      "grad_norm": 0.59765625,
      "learning_rate": 8.781442647746676e-05,
      "loss": 0.9197,
      "step": 3627
    },
    {
      "epoch": 0.5528802194452911,
      "grad_norm": 1.1953125,
      "learning_rate": 8.776543847517988e-05,
      "loss": 1.0532,
      "step": 3628
    },
    {
      "epoch": 0.5530326120085339,
      "grad_norm": 0.83984375,
      "learning_rate": 8.771645345340815e-05,
      "loss": 0.9863,
      "step": 3629
    },
    {
      "epoch": 0.5531850045717769,
      "grad_norm": 1.0078125,
      "learning_rate": 8.766747142408494e-05,
      "loss": 0.952,
      "step": 3630
    },
    {
      "epoch": 0.5533373971350198,
      "grad_norm": 1.0703125,
      "learning_rate": 8.761849239914307e-05,
      "loss": 0.9009,
      "step": 3631
    },
    {
      "epoch": 0.5534897896982627,
      "grad_norm": 0.72265625,
      "learning_rate": 8.756951639051451e-05,
      "loss": 0.9464,
      "step": 3632
    },
    {
      "epoch": 0.5536421822615056,
      "grad_norm": 1.015625,
      "learning_rate": 8.752054341013047e-05,
      "loss": 1.0381,
      "step": 3633
    },
    {
      "epoch": 0.5537945748247486,
      "grad_norm": 0.9375,
      "learning_rate": 8.747157346992156e-05,
      "loss": 0.9689,
      "step": 3634
    },
    {
      "epoch": 0.5539469673879914,
      "grad_norm": 0.984375,
      "learning_rate": 8.742260658181751e-05,
      "loss": 0.9725,
      "step": 3635
    },
    {
      "epoch": 0.5540993599512344,
      "grad_norm": 0.89453125,
      "learning_rate": 8.73736427577473e-05,
      "loss": 0.9181,
      "step": 3636
    },
    {
      "epoch": 0.5542517525144773,
      "grad_norm": 1.0078125,
      "learning_rate": 8.732468200963936e-05,
      "loss": 0.919,
      "step": 3637
    },
    {
      "epoch": 0.5544041450777202,
      "grad_norm": 1.0546875,
      "learning_rate": 8.727572434942112e-05,
      "loss": 0.9085,
      "step": 3638
    },
    {
      "epoch": 0.5545565376409631,
      "grad_norm": 0.89453125,
      "learning_rate": 8.722676978901939e-05,
      "loss": 1.0985,
      "step": 3639
    },
    {
      "epoch": 0.5547089302042061,
      "grad_norm": 0.859375,
      "learning_rate": 8.717781834036027e-05,
      "loss": 0.9949,
      "step": 3640
    },
    {
      "epoch": 0.5548613227674489,
      "grad_norm": 0.94921875,
      "learning_rate": 8.712887001536893e-05,
      "loss": 0.8087,
      "step": 3641
    },
    {
      "epoch": 0.5550137153306919,
      "grad_norm": 1.2265625,
      "learning_rate": 8.707992482596998e-05,
      "loss": 1.1798,
      "step": 3642
    },
    {
      "epoch": 0.5551661078939347,
      "grad_norm": 1.1875,
      "learning_rate": 8.703098278408717e-05,
      "loss": 0.9647,
      "step": 3643
    },
    {
      "epoch": 0.5553185004571777,
      "grad_norm": 0.86328125,
      "learning_rate": 8.698204390164339e-05,
      "loss": 1.0993,
      "step": 3644
    },
    {
      "epoch": 0.5554708930204206,
      "grad_norm": 1.3125,
      "learning_rate": 8.693310819056094e-05,
      "loss": 1.1994,
      "step": 3645
    },
    {
      "epoch": 0.5556232855836635,
      "grad_norm": 1.28125,
      "learning_rate": 8.688417566276128e-05,
      "loss": 0.9916,
      "step": 3646
    },
    {
      "epoch": 0.5557756781469064,
      "grad_norm": 1.0859375,
      "learning_rate": 8.683524633016497e-05,
      "loss": 0.9592,
      "step": 3647
    },
    {
      "epoch": 0.5559280707101494,
      "grad_norm": 0.9921875,
      "learning_rate": 8.678632020469202e-05,
      "loss": 0.92,
      "step": 3648
    },
    {
      "epoch": 0.5560804632733922,
      "grad_norm": 1.0859375,
      "learning_rate": 8.673739729826148e-05,
      "loss": 0.8497,
      "step": 3649
    },
    {
      "epoch": 0.5562328558366352,
      "grad_norm": 1.1328125,
      "learning_rate": 8.668847762279163e-05,
      "loss": 0.9516,
      "step": 3650
    },
    {
      "epoch": 0.5563852483998781,
      "grad_norm": 0.8203125,
      "learning_rate": 8.663956119020009e-05,
      "loss": 1.0006,
      "step": 3651
    },
    {
      "epoch": 0.556537640963121,
      "grad_norm": 0.73046875,
      "learning_rate": 8.659064801240358e-05,
      "loss": 0.8822,
      "step": 3652
    },
    {
      "epoch": 0.5566900335263639,
      "grad_norm": 0.92578125,
      "learning_rate": 8.654173810131799e-05,
      "loss": 0.927,
      "step": 3653
    },
    {
      "epoch": 0.5568424260896069,
      "grad_norm": 1.234375,
      "learning_rate": 8.64928314688586e-05,
      "loss": 1.2534,
      "step": 3654
    },
    {
      "epoch": 0.5569948186528497,
      "grad_norm": 0.828125,
      "learning_rate": 8.644392812693968e-05,
      "loss": 0.9085,
      "step": 3655
    },
    {
      "epoch": 0.5571472112160927,
      "grad_norm": 1.1015625,
      "learning_rate": 8.639502808747477e-05,
      "loss": 0.8991,
      "step": 3656
    },
    {
      "epoch": 0.5572996037793355,
      "grad_norm": 1.1484375,
      "learning_rate": 8.634613136237671e-05,
      "loss": 1.2042,
      "step": 3657
    },
    {
      "epoch": 0.5574519963425785,
      "grad_norm": 0.99609375,
      "learning_rate": 8.629723796355735e-05,
      "loss": 1.2058,
      "step": 3658
    },
    {
      "epoch": 0.5576043889058214,
      "grad_norm": 1.140625,
      "learning_rate": 8.624834790292791e-05,
      "loss": 0.9005,
      "step": 3659
    },
    {
      "epoch": 0.5577567814690643,
      "grad_norm": 0.85546875,
      "learning_rate": 8.619946119239868e-05,
      "loss": 1.0209,
      "step": 3660
    },
    {
      "epoch": 0.5579091740323072,
      "grad_norm": 0.9453125,
      "learning_rate": 8.61505778438791e-05,
      "loss": 0.9614,
      "step": 3661
    },
    {
      "epoch": 0.5580615665955502,
      "grad_norm": 0.84765625,
      "learning_rate": 8.6101697869278e-05,
      "loss": 0.897,
      "step": 3662
    },
    {
      "epoch": 0.558213959158793,
      "grad_norm": 0.859375,
      "learning_rate": 8.605282128050311e-05,
      "loss": 1.0191,
      "step": 3663
    },
    {
      "epoch": 0.558366351722036,
      "grad_norm": 0.921875,
      "learning_rate": 8.600394808946151e-05,
      "loss": 0.9993,
      "step": 3664
    },
    {
      "epoch": 0.5585187442852789,
      "grad_norm": 0.75390625,
      "learning_rate": 8.595507830805946e-05,
      "loss": 0.903,
      "step": 3665
    },
    {
      "epoch": 0.5586711368485218,
      "grad_norm": 0.87109375,
      "learning_rate": 8.590621194820229e-05,
      "loss": 0.9489,
      "step": 3666
    },
    {
      "epoch": 0.5588235294117647,
      "grad_norm": 0.9453125,
      "learning_rate": 8.585734902179453e-05,
      "loss": 1.0892,
      "step": 3667
    },
    {
      "epoch": 0.5589759219750077,
      "grad_norm": 0.98046875,
      "learning_rate": 8.580848954073997e-05,
      "loss": 0.9623,
      "step": 3668
    },
    {
      "epoch": 0.5591283145382505,
      "grad_norm": 0.796875,
      "learning_rate": 8.575963351694141e-05,
      "loss": 0.8539,
      "step": 3669
    },
    {
      "epoch": 0.5592807071014935,
      "grad_norm": 0.9453125,
      "learning_rate": 8.571078096230087e-05,
      "loss": 1.0683,
      "step": 3670
    },
    {
      "epoch": 0.5594330996647363,
      "grad_norm": 0.8828125,
      "learning_rate": 8.566193188871965e-05,
      "loss": 0.9905,
      "step": 3671
    },
    {
      "epoch": 0.5595854922279793,
      "grad_norm": 0.75,
      "learning_rate": 8.561308630809797e-05,
      "loss": 0.9501,
      "step": 3672
    },
    {
      "epoch": 0.5597378847912222,
      "grad_norm": 1.0078125,
      "learning_rate": 8.55642442323353e-05,
      "loss": 0.9029,
      "step": 3673
    },
    {
      "epoch": 0.5598902773544651,
      "grad_norm": 0.86328125,
      "learning_rate": 8.55154056733304e-05,
      "loss": 1.0607,
      "step": 3674
    },
    {
      "epoch": 0.560042669917708,
      "grad_norm": 1.2265625,
      "learning_rate": 8.546657064298092e-05,
      "loss": 0.9962,
      "step": 3675
    },
    {
      "epoch": 0.560195062480951,
      "grad_norm": 0.984375,
      "learning_rate": 8.541773915318388e-05,
      "loss": 0.9591,
      "step": 3676
    },
    {
      "epoch": 0.5603474550441938,
      "grad_norm": 0.8671875,
      "learning_rate": 8.536891121583526e-05,
      "loss": 0.9632,
      "step": 3677
    },
    {
      "epoch": 0.5604998476074368,
      "grad_norm": 1.1328125,
      "learning_rate": 8.532008684283021e-05,
      "loss": 1.1506,
      "step": 3678
    },
    {
      "epoch": 0.5606522401706797,
      "grad_norm": 0.8671875,
      "learning_rate": 8.52712660460632e-05,
      "loss": 0.9771,
      "step": 3679
    },
    {
      "epoch": 0.5608046327339226,
      "grad_norm": 0.921875,
      "learning_rate": 8.522244883742753e-05,
      "loss": 0.9028,
      "step": 3680
    },
    {
      "epoch": 0.5609570252971655,
      "grad_norm": 0.984375,
      "learning_rate": 8.517363522881579e-05,
      "loss": 1.084,
      "step": 3681
    },
    {
      "epoch": 0.5611094178604085,
      "grad_norm": 0.84375,
      "learning_rate": 8.51248252321198e-05,
      "loss": 0.9307,
      "step": 3682
    },
    {
      "epoch": 0.5612618104236513,
      "grad_norm": 0.921875,
      "learning_rate": 8.507601885923025e-05,
      "loss": 0.8588,
      "step": 3683
    },
    {
      "epoch": 0.5614142029868943,
      "grad_norm": 1.046875,
      "learning_rate": 8.502721612203708e-05,
      "loss": 1.0059,
      "step": 3684
    },
    {
      "epoch": 0.5615665955501371,
      "grad_norm": 0.77734375,
      "learning_rate": 8.497841703242945e-05,
      "loss": 0.9753,
      "step": 3685
    },
    {
      "epoch": 0.5617189881133801,
      "grad_norm": 0.8359375,
      "learning_rate": 8.492962160229542e-05,
      "loss": 1.0801,
      "step": 3686
    },
    {
      "epoch": 0.561871380676623,
      "grad_norm": 0.921875,
      "learning_rate": 8.488082984352224e-05,
      "loss": 0.9489,
      "step": 3687
    },
    {
      "epoch": 0.5620237732398659,
      "grad_norm": 0.97265625,
      "learning_rate": 8.483204176799639e-05,
      "loss": 0.9665,
      "step": 3688
    },
    {
      "epoch": 0.5621761658031088,
      "grad_norm": 0.95703125,
      "learning_rate": 8.47832573876032e-05,
      "loss": 0.9054,
      "step": 3689
    },
    {
      "epoch": 0.5623285583663518,
      "grad_norm": 0.84765625,
      "learning_rate": 8.473447671422742e-05,
      "loss": 1.055,
      "step": 3690
    },
    {
      "epoch": 0.5624809509295946,
      "grad_norm": 1.1484375,
      "learning_rate": 8.468569975975262e-05,
      "loss": 1.0231,
      "step": 3691
    },
    {
      "epoch": 0.5626333434928376,
      "grad_norm": 1.0625,
      "learning_rate": 8.463692653606152e-05,
      "loss": 1.013,
      "step": 3692
    },
    {
      "epoch": 0.5627857360560805,
      "grad_norm": 1.0390625,
      "learning_rate": 8.458815705503611e-05,
      "loss": 0.907,
      "step": 3693
    },
    {
      "epoch": 0.5629381286193234,
      "grad_norm": 0.63671875,
      "learning_rate": 8.453939132855724e-05,
      "loss": 0.716,
      "step": 3694
    },
    {
      "epoch": 0.5630905211825663,
      "grad_norm": 0.90234375,
      "learning_rate": 8.449062936850494e-05,
      "loss": 0.9261,
      "step": 3695
    },
    {
      "epoch": 0.5632429137458093,
      "grad_norm": 1.359375,
      "learning_rate": 8.444187118675841e-05,
      "loss": 1.1537,
      "step": 3696
    },
    {
      "epoch": 0.5633953063090521,
      "grad_norm": 0.9453125,
      "learning_rate": 8.439311679519578e-05,
      "loss": 0.8989,
      "step": 3697
    },
    {
      "epoch": 0.563547698872295,
      "grad_norm": 0.703125,
      "learning_rate": 8.434436620569429e-05,
      "loss": 0.9732,
      "step": 3698
    },
    {
      "epoch": 0.5637000914355379,
      "grad_norm": 0.890625,
      "learning_rate": 8.429561943013036e-05,
      "loss": 0.8824,
      "step": 3699
    },
    {
      "epoch": 0.5638524839987809,
      "grad_norm": 0.8984375,
      "learning_rate": 8.424687648037937e-05,
      "loss": 1.0255,
      "step": 3700
    },
    {
      "epoch": 0.5640048765620238,
      "grad_norm": 1.03125,
      "learning_rate": 8.419813736831576e-05,
      "loss": 0.9873,
      "step": 3701
    },
    {
      "epoch": 0.5641572691252666,
      "grad_norm": 0.95703125,
      "learning_rate": 8.414940210581318e-05,
      "loss": 1.0746,
      "step": 3702
    },
    {
      "epoch": 0.5643096616885096,
      "grad_norm": 0.81640625,
      "learning_rate": 8.410067070474417e-05,
      "loss": 0.8515,
      "step": 3703
    },
    {
      "epoch": 0.5644620542517526,
      "grad_norm": 0.72265625,
      "learning_rate": 8.405194317698038e-05,
      "loss": 0.9518,
      "step": 3704
    },
    {
      "epoch": 0.5646144468149954,
      "grad_norm": 0.9453125,
      "learning_rate": 8.400321953439259e-05,
      "loss": 0.9061,
      "step": 3705
    },
    {
      "epoch": 0.5647668393782384,
      "grad_norm": 0.95703125,
      "learning_rate": 8.395449978885053e-05,
      "loss": 1.0439,
      "step": 3706
    },
    {
      "epoch": 0.5649192319414813,
      "grad_norm": 0.90625,
      "learning_rate": 8.39057839522231e-05,
      "loss": 0.9356,
      "step": 3707
    },
    {
      "epoch": 0.5650716245047241,
      "grad_norm": 0.84765625,
      "learning_rate": 8.385707203637814e-05,
      "loss": 0.8939,
      "step": 3708
    },
    {
      "epoch": 0.5652240170679671,
      "grad_norm": 0.75390625,
      "learning_rate": 8.380836405318253e-05,
      "loss": 0.9772,
      "step": 3709
    },
    {
      "epoch": 0.5653764096312099,
      "grad_norm": 1.1171875,
      "learning_rate": 8.375966001450233e-05,
      "loss": 1.1264,
      "step": 3710
    },
    {
      "epoch": 0.5655288021944529,
      "grad_norm": 1.0078125,
      "learning_rate": 8.371095993220248e-05,
      "loss": 0.9724,
      "step": 3711
    },
    {
      "epoch": 0.5656811947576958,
      "grad_norm": 0.69921875,
      "learning_rate": 8.366226381814697e-05,
      "loss": 0.9,
      "step": 3712
    },
    {
      "epoch": 0.5658335873209387,
      "grad_norm": 1.0546875,
      "learning_rate": 8.3613571684199e-05,
      "loss": 1.0179,
      "step": 3713
    },
    {
      "epoch": 0.5659859798841816,
      "grad_norm": 0.83203125,
      "learning_rate": 8.35648835422206e-05,
      "loss": 0.9162,
      "step": 3714
    },
    {
      "epoch": 0.5661383724474246,
      "grad_norm": 1.234375,
      "learning_rate": 8.351619940407288e-05,
      "loss": 1.1743,
      "step": 3715
    },
    {
      "epoch": 0.5662907650106674,
      "grad_norm": 0.78515625,
      "learning_rate": 8.346751928161607e-05,
      "loss": 1.0456,
      "step": 3716
    },
    {
      "epoch": 0.5664431575739104,
      "grad_norm": 0.93359375,
      "learning_rate": 8.341884318670929e-05,
      "loss": 0.8026,
      "step": 3717
    },
    {
      "epoch": 0.5665955501371533,
      "grad_norm": 0.88671875,
      "learning_rate": 8.337017113121073e-05,
      "loss": 1.0266,
      "step": 3718
    },
    {
      "epoch": 0.5667479427003962,
      "grad_norm": 0.90234375,
      "learning_rate": 8.332150312697765e-05,
      "loss": 0.9993,
      "step": 3719
    },
    {
      "epoch": 0.5669003352636391,
      "grad_norm": 0.7109375,
      "learning_rate": 8.327283918586624e-05,
      "loss": 0.9879,
      "step": 3720
    },
    {
      "epoch": 0.5670527278268821,
      "grad_norm": 1.0703125,
      "learning_rate": 8.322417931973173e-05,
      "loss": 0.9757,
      "step": 3721
    },
    {
      "epoch": 0.5672051203901249,
      "grad_norm": 1.1171875,
      "learning_rate": 8.317552354042838e-05,
      "loss": 1.2723,
      "step": 3722
    },
    {
      "epoch": 0.5673575129533679,
      "grad_norm": 0.79296875,
      "learning_rate": 8.312687185980941e-05,
      "loss": 0.8632,
      "step": 3723
    },
    {
      "epoch": 0.5675099055166107,
      "grad_norm": 0.85546875,
      "learning_rate": 8.307822428972715e-05,
      "loss": 1.0458,
      "step": 3724
    },
    {
      "epoch": 0.5676622980798537,
      "grad_norm": 1.0390625,
      "learning_rate": 8.302958084203278e-05,
      "loss": 0.8589,
      "step": 3725
    },
    {
      "epoch": 0.5678146906430966,
      "grad_norm": 0.8046875,
      "learning_rate": 8.298094152857652e-05,
      "loss": 0.9894,
      "step": 3726
    },
    {
      "epoch": 0.5679670832063395,
      "grad_norm": 0.79296875,
      "learning_rate": 8.29323063612077e-05,
      "loss": 0.8474,
      "step": 3727
    },
    {
      "epoch": 0.5681194757695824,
      "grad_norm": 0.78125,
      "learning_rate": 8.288367535177446e-05,
      "loss": 0.9603,
      "step": 3728
    },
    {
      "epoch": 0.5682718683328254,
      "grad_norm": 0.88671875,
      "learning_rate": 8.283504851212401e-05,
      "loss": 0.9205,
      "step": 3729
    },
    {
      "epoch": 0.5684242608960682,
      "grad_norm": 0.9921875,
      "learning_rate": 8.278642585410264e-05,
      "loss": 1.1211,
      "step": 3730
    },
    {
      "epoch": 0.5685766534593112,
      "grad_norm": 1.09375,
      "learning_rate": 8.273780738955544e-05,
      "loss": 1.0859,
      "step": 3731
    },
    {
      "epoch": 0.5687290460225541,
      "grad_norm": 0.76171875,
      "learning_rate": 8.268919313032659e-05,
      "loss": 0.952,
      "step": 3732
    },
    {
      "epoch": 0.568881438585797,
      "grad_norm": 1.09375,
      "learning_rate": 8.264058308825923e-05,
      "loss": 0.878,
      "step": 3733
    },
    {
      "epoch": 0.5690338311490399,
      "grad_norm": 0.8125,
      "learning_rate": 8.259197727519547e-05,
      "loss": 0.974,
      "step": 3734
    },
    {
      "epoch": 0.5691862237122829,
      "grad_norm": 0.91015625,
      "learning_rate": 8.254337570297634e-05,
      "loss": 0.7308,
      "step": 3735
    },
    {
      "epoch": 0.5693386162755257,
      "grad_norm": 1.0625,
      "learning_rate": 8.249477838344196e-05,
      "loss": 0.8779,
      "step": 3736
    },
    {
      "epoch": 0.5694910088387687,
      "grad_norm": 1.1484375,
      "learning_rate": 8.244618532843129e-05,
      "loss": 0.9168,
      "step": 3737
    },
    {
      "epoch": 0.5696434014020115,
      "grad_norm": 0.98046875,
      "learning_rate": 8.239759654978229e-05,
      "loss": 0.926,
      "step": 3738
    },
    {
      "epoch": 0.5697957939652545,
      "grad_norm": 0.765625,
      "learning_rate": 8.23490120593319e-05,
      "loss": 0.9124,
      "step": 3739
    },
    {
      "epoch": 0.5699481865284974,
      "grad_norm": 1.078125,
      "learning_rate": 8.230043186891598e-05,
      "loss": 0.9166,
      "step": 3740
    },
    {
      "epoch": 0.5701005790917403,
      "grad_norm": 0.85546875,
      "learning_rate": 8.225185599036942e-05,
      "loss": 0.9636,
      "step": 3741
    },
    {
      "epoch": 0.5702529716549832,
      "grad_norm": 1.1015625,
      "learning_rate": 8.220328443552597e-05,
      "loss": 1.0558,
      "step": 3742
    },
    {
      "epoch": 0.5704053642182262,
      "grad_norm": 0.69140625,
      "learning_rate": 8.215471721621829e-05,
      "loss": 1.111,
      "step": 3743
    },
    {
      "epoch": 0.570557756781469,
      "grad_norm": 1.0234375,
      "learning_rate": 8.210615434427818e-05,
      "loss": 1.15,
      "step": 3744
    },
    {
      "epoch": 0.570710149344712,
      "grad_norm": 1.171875,
      "learning_rate": 8.205759583153617e-05,
      "loss": 1.273,
      "step": 3745
    },
    {
      "epoch": 0.5708625419079549,
      "grad_norm": 0.99609375,
      "learning_rate": 8.200904168982182e-05,
      "loss": 1.1596,
      "step": 3746
    },
    {
      "epoch": 0.5710149344711978,
      "grad_norm": 0.7578125,
      "learning_rate": 8.196049193096364e-05,
      "loss": 0.9731,
      "step": 3747
    },
    {
      "epoch": 0.5711673270344407,
      "grad_norm": 0.79296875,
      "learning_rate": 8.191194656678904e-05,
      "loss": 1.004,
      "step": 3748
    },
    {
      "epoch": 0.5713197195976837,
      "grad_norm": 0.92578125,
      "learning_rate": 8.186340560912434e-05,
      "loss": 0.9245,
      "step": 3749
    },
    {
      "epoch": 0.5714721121609265,
      "grad_norm": 0.7734375,
      "learning_rate": 8.181486906979487e-05,
      "loss": 0.8986,
      "step": 3750
    },
    {
      "epoch": 0.5716245047241695,
      "grad_norm": 0.8515625,
      "learning_rate": 8.176633696062481e-05,
      "loss": 0.9869,
      "step": 3751
    },
    {
      "epoch": 0.5717768972874123,
      "grad_norm": 0.96484375,
      "learning_rate": 8.171780929343724e-05,
      "loss": 0.9002,
      "step": 3752
    },
    {
      "epoch": 0.5719292898506553,
      "grad_norm": 0.984375,
      "learning_rate": 8.166928608005427e-05,
      "loss": 1.0417,
      "step": 3753
    },
    {
      "epoch": 0.5720816824138982,
      "grad_norm": 0.8046875,
      "learning_rate": 8.162076733229674e-05,
      "loss": 0.9613,
      "step": 3754
    },
    {
      "epoch": 0.5722340749771411,
      "grad_norm": 0.99609375,
      "learning_rate": 8.157225306198465e-05,
      "loss": 0.9818,
      "step": 3755
    },
    {
      "epoch": 0.572386467540384,
      "grad_norm": 1.46875,
      "learning_rate": 8.152374328093672e-05,
      "loss": 1.1222,
      "step": 3756
    },
    {
      "epoch": 0.572538860103627,
      "grad_norm": 1.046875,
      "learning_rate": 8.147523800097055e-05,
      "loss": 1.1794,
      "step": 3757
    },
    {
      "epoch": 0.5726912526668698,
      "grad_norm": 0.9140625,
      "learning_rate": 8.142673723390288e-05,
      "loss": 1.056,
      "step": 3758
    },
    {
      "epoch": 0.5728436452301128,
      "grad_norm": 0.9140625,
      "learning_rate": 8.137824099154906e-05,
      "loss": 0.9968,
      "step": 3759
    },
    {
      "epoch": 0.5729960377933557,
      "grad_norm": 0.83984375,
      "learning_rate": 8.132974928572351e-05,
      "loss": 0.9196,
      "step": 3760
    },
    {
      "epoch": 0.5731484303565986,
      "grad_norm": 0.9453125,
      "learning_rate": 8.128126212823955e-05,
      "loss": 0.9214,
      "step": 3761
    },
    {
      "epoch": 0.5733008229198415,
      "grad_norm": 0.8046875,
      "learning_rate": 8.12327795309093e-05,
      "loss": 0.9382,
      "step": 3762
    },
    {
      "epoch": 0.5734532154830845,
      "grad_norm": 1.0859375,
      "learning_rate": 8.118430150554381e-05,
      "loss": 1.1229,
      "step": 3763
    },
    {
      "epoch": 0.5736056080463273,
      "grad_norm": 0.90234375,
      "learning_rate": 8.113582806395309e-05,
      "loss": 0.9204,
      "step": 3764
    },
    {
      "epoch": 0.5737580006095703,
      "grad_norm": 1.125,
      "learning_rate": 8.108735921794591e-05,
      "loss": 0.9712,
      "step": 3765
    },
    {
      "epoch": 0.5739103931728131,
      "grad_norm": 1.0859375,
      "learning_rate": 8.103889497932998e-05,
      "loss": 1.081,
      "step": 3766
    },
    {
      "epoch": 0.5740627857360561,
      "grad_norm": 0.91015625,
      "learning_rate": 8.099043535991191e-05,
      "loss": 0.7465,
      "step": 3767
    },
    {
      "epoch": 0.574215178299299,
      "grad_norm": 1.0546875,
      "learning_rate": 8.094198037149716e-05,
      "loss": 0.853,
      "step": 3768
    },
    {
      "epoch": 0.5743675708625419,
      "grad_norm": 0.73828125,
      "learning_rate": 8.089353002589001e-05,
      "loss": 0.9275,
      "step": 3769
    },
    {
      "epoch": 0.5745199634257848,
      "grad_norm": 1.09375,
      "learning_rate": 8.084508433489375e-05,
      "loss": 1.0202,
      "step": 3770
    },
    {
      "epoch": 0.5746723559890278,
      "grad_norm": 0.87109375,
      "learning_rate": 8.079664331031034e-05,
      "loss": 0.913,
      "step": 3771
    },
    {
      "epoch": 0.5748247485522706,
      "grad_norm": 1.0546875,
      "learning_rate": 8.074820696394082e-05,
      "loss": 1.0562,
      "step": 3772
    },
    {
      "epoch": 0.5749771411155136,
      "grad_norm": 1.046875,
      "learning_rate": 8.069977530758493e-05,
      "loss": 1.0541,
      "step": 3773
    },
    {
      "epoch": 0.5751295336787565,
      "grad_norm": 1.0,
      "learning_rate": 8.065134835304129e-05,
      "loss": 1.1194,
      "step": 3774
    },
    {
      "epoch": 0.5752819262419994,
      "grad_norm": 1.078125,
      "learning_rate": 8.060292611210745e-05,
      "loss": 1.1101,
      "step": 3775
    },
    {
      "epoch": 0.5754343188052423,
      "grad_norm": 0.8984375,
      "learning_rate": 8.055450859657974e-05,
      "loss": 1.0301,
      "step": 3776
    },
    {
      "epoch": 0.5755867113684853,
      "grad_norm": 1.15625,
      "learning_rate": 8.050609581825336e-05,
      "loss": 0.9556,
      "step": 3777
    },
    {
      "epoch": 0.5757391039317281,
      "grad_norm": 0.98046875,
      "learning_rate": 8.045768778892238e-05,
      "loss": 1.225,
      "step": 3778
    },
    {
      "epoch": 0.5758914964949711,
      "grad_norm": 0.890625,
      "learning_rate": 8.040928452037966e-05,
      "loss": 1.0213,
      "step": 3779
    },
    {
      "epoch": 0.5760438890582139,
      "grad_norm": 0.91796875,
      "learning_rate": 8.036088602441696e-05,
      "loss": 0.9056,
      "step": 3780
    },
    {
      "epoch": 0.5761962816214569,
      "grad_norm": 1.2421875,
      "learning_rate": 8.031249231282485e-05,
      "loss": 0.822,
      "step": 3781
    },
    {
      "epoch": 0.5763486741846998,
      "grad_norm": 1.015625,
      "learning_rate": 8.026410339739271e-05,
      "loss": 0.8517,
      "step": 3782
    },
    {
      "epoch": 0.5765010667479427,
      "grad_norm": 0.984375,
      "learning_rate": 8.021571928990878e-05,
      "loss": 0.8543,
      "step": 3783
    },
    {
      "epoch": 0.5766534593111856,
      "grad_norm": 0.9375,
      "learning_rate": 8.016734000216015e-05,
      "loss": 0.9516,
      "step": 3784
    },
    {
      "epoch": 0.5768058518744286,
      "grad_norm": 0.70703125,
      "learning_rate": 8.01189655459327e-05,
      "loss": 0.9051,
      "step": 3785
    },
    {
      "epoch": 0.5769582444376714,
      "grad_norm": 0.90234375,
      "learning_rate": 8.007059593301112e-05,
      "loss": 0.9714,
      "step": 3786
    },
    {
      "epoch": 0.5771106370009144,
      "grad_norm": 0.87109375,
      "learning_rate": 8.002223117517898e-05,
      "loss": 1.1937,
      "step": 3787
    },
    {
      "epoch": 0.5772630295641573,
      "grad_norm": 1.0625,
      "learning_rate": 7.997387128421858e-05,
      "loss": 0.9057,
      "step": 3788
    },
    {
      "epoch": 0.5774154221274002,
      "grad_norm": 0.78515625,
      "learning_rate": 7.992551627191115e-05,
      "loss": 0.9553,
      "step": 3789
    },
    {
      "epoch": 0.5775678146906431,
      "grad_norm": 0.9609375,
      "learning_rate": 7.987716615003662e-05,
      "loss": 1.2554,
      "step": 3790
    },
    {
      "epoch": 0.5777202072538861,
      "grad_norm": 1.3359375,
      "learning_rate": 7.982882093037378e-05,
      "loss": 1.059,
      "step": 3791
    },
    {
      "epoch": 0.5778725998171289,
      "grad_norm": 1.0703125,
      "learning_rate": 7.978048062470023e-05,
      "loss": 0.9592,
      "step": 3792
    },
    {
      "epoch": 0.5780249923803719,
      "grad_norm": 0.77734375,
      "learning_rate": 7.973214524479238e-05,
      "loss": 0.9313,
      "step": 3793
    },
    {
      "epoch": 0.5781773849436147,
      "grad_norm": 0.87890625,
      "learning_rate": 7.968381480242539e-05,
      "loss": 0.996,
      "step": 3794
    },
    {
      "epoch": 0.5783297775068577,
      "grad_norm": 0.8359375,
      "learning_rate": 7.963548930937327e-05,
      "loss": 1.0214,
      "step": 3795
    },
    {
      "epoch": 0.5784821700701006,
      "grad_norm": 0.98046875,
      "learning_rate": 7.95871687774088e-05,
      "loss": 0.995,
      "step": 3796
    },
    {
      "epoch": 0.5786345626333435,
      "grad_norm": 1.109375,
      "learning_rate": 7.953885321830354e-05,
      "loss": 0.9397,
      "step": 3797
    },
    {
      "epoch": 0.5787869551965864,
      "grad_norm": 0.921875,
      "learning_rate": 7.94905426438279e-05,
      "loss": 1.0726,
      "step": 3798
    },
    {
      "epoch": 0.5789393477598294,
      "grad_norm": 1.0625,
      "learning_rate": 7.9442237065751e-05,
      "loss": 0.8689,
      "step": 3799
    },
    {
      "epoch": 0.5790917403230722,
      "grad_norm": 0.99609375,
      "learning_rate": 7.939393649584076e-05,
      "loss": 0.939,
      "step": 3800
    },
    {
      "epoch": 0.5792441328863152,
      "grad_norm": 1.2109375,
      "learning_rate": 7.934564094586392e-05,
      "loss": 0.8324,
      "step": 3801
    },
    {
      "epoch": 0.5793965254495581,
      "grad_norm": 1.1484375,
      "learning_rate": 7.929735042758597e-05,
      "loss": 1.009,
      "step": 3802
    },
    {
      "epoch": 0.579548918012801,
      "grad_norm": 0.8203125,
      "learning_rate": 7.924906495277115e-05,
      "loss": 0.8477,
      "step": 3803
    },
    {
      "epoch": 0.5797013105760439,
      "grad_norm": 1.015625,
      "learning_rate": 7.920078453318252e-05,
      "loss": 0.7789,
      "step": 3804
    },
    {
      "epoch": 0.5798537031392869,
      "grad_norm": 0.96484375,
      "learning_rate": 7.915250918058186e-05,
      "loss": 1.0337,
      "step": 3805
    },
    {
      "epoch": 0.5800060957025297,
      "grad_norm": 0.79296875,
      "learning_rate": 7.910423890672977e-05,
      "loss": 0.8309,
      "step": 3806
    },
    {
      "epoch": 0.5801584882657727,
      "grad_norm": 0.9375,
      "learning_rate": 7.905597372338558e-05,
      "loss": 1.063,
      "step": 3807
    },
    {
      "epoch": 0.5803108808290155,
      "grad_norm": 0.984375,
      "learning_rate": 7.900771364230734e-05,
      "loss": 0.859,
      "step": 3808
    },
    {
      "epoch": 0.5804632733922584,
      "grad_norm": 0.85546875,
      "learning_rate": 7.895945867525197e-05,
      "loss": 1.1549,
      "step": 3809
    },
    {
      "epoch": 0.5806156659555014,
      "grad_norm": 0.7890625,
      "learning_rate": 7.891120883397502e-05,
      "loss": 0.9094,
      "step": 3810
    },
    {
      "epoch": 0.5807680585187442,
      "grad_norm": 0.91015625,
      "learning_rate": 7.886296413023084e-05,
      "loss": 1.0826,
      "step": 3811
    },
    {
      "epoch": 0.5809204510819872,
      "grad_norm": 0.83203125,
      "learning_rate": 7.881472457577257e-05,
      "loss": 1.0133,
      "step": 3812
    },
    {
      "epoch": 0.5810728436452302,
      "grad_norm": 0.984375,
      "learning_rate": 7.876649018235206e-05,
      "loss": 1.0477,
      "step": 3813
    },
    {
      "epoch": 0.581225236208473,
      "grad_norm": 1.0234375,
      "learning_rate": 7.871826096171985e-05,
      "loss": 0.9657,
      "step": 3814
    },
    {
      "epoch": 0.581377628771716,
      "grad_norm": 1.0,
      "learning_rate": 7.867003692562534e-05,
      "loss": 1.1637,
      "step": 3815
    },
    {
      "epoch": 0.5815300213349589,
      "grad_norm": 0.84375,
      "learning_rate": 7.862181808581654e-05,
      "loss": 1.0399,
      "step": 3816
    },
    {
      "epoch": 0.5816824138982017,
      "grad_norm": 1.0859375,
      "learning_rate": 7.857360445404028e-05,
      "loss": 0.9397,
      "step": 3817
    },
    {
      "epoch": 0.5818348064614447,
      "grad_norm": 0.91796875,
      "learning_rate": 7.852539604204211e-05,
      "loss": 0.9772,
      "step": 3818
    },
    {
      "epoch": 0.5819871990246875,
      "grad_norm": 1.0546875,
      "learning_rate": 7.847719286156628e-05,
      "loss": 0.967,
      "step": 3819
    },
    {
      "epoch": 0.5821395915879305,
      "grad_norm": 0.95703125,
      "learning_rate": 7.842899492435574e-05,
      "loss": 0.8536,
      "step": 3820
    },
    {
      "epoch": 0.5822919841511734,
      "grad_norm": 0.83984375,
      "learning_rate": 7.838080224215226e-05,
      "loss": 0.9406,
      "step": 3821
    },
    {
      "epoch": 0.5824443767144163,
      "grad_norm": 0.921875,
      "learning_rate": 7.833261482669621e-05,
      "loss": 1.002,
      "step": 3822
    },
    {
      "epoch": 0.5825967692776592,
      "grad_norm": 0.9765625,
      "learning_rate": 7.82844326897268e-05,
      "loss": 1.117,
      "step": 3823
    },
    {
      "epoch": 0.5827491618409022,
      "grad_norm": 1.1015625,
      "learning_rate": 7.823625584298188e-05,
      "loss": 1.1651,
      "step": 3824
    },
    {
      "epoch": 0.582901554404145,
      "grad_norm": 0.77734375,
      "learning_rate": 7.818808429819796e-05,
      "loss": 0.9493,
      "step": 3825
    },
    {
      "epoch": 0.583053946967388,
      "grad_norm": 1.0390625,
      "learning_rate": 7.813991806711039e-05,
      "loss": 0.8488,
      "step": 3826
    },
    {
      "epoch": 0.583206339530631,
      "grad_norm": 1.6796875,
      "learning_rate": 7.809175716145313e-05,
      "loss": 1.0473,
      "step": 3827
    },
    {
      "epoch": 0.5833587320938738,
      "grad_norm": 1.046875,
      "learning_rate": 7.804360159295886e-05,
      "loss": 1.1667,
      "step": 3828
    },
    {
      "epoch": 0.5835111246571167,
      "grad_norm": 0.77734375,
      "learning_rate": 7.799545137335902e-05,
      "loss": 0.8743,
      "step": 3829
    },
    {
      "epoch": 0.5836635172203597,
      "grad_norm": 0.7421875,
      "learning_rate": 7.794730651438364e-05,
      "loss": 0.859,
      "step": 3830
    },
    {
      "epoch": 0.5838159097836025,
      "grad_norm": 1.1796875,
      "learning_rate": 7.789916702776148e-05,
      "loss": 0.9273,
      "step": 3831
    },
    {
      "epoch": 0.5839683023468455,
      "grad_norm": 1.171875,
      "learning_rate": 7.78510329252201e-05,
      "loss": 0.9383,
      "step": 3832
    },
    {
      "epoch": 0.5841206949100883,
      "grad_norm": 0.9765625,
      "learning_rate": 7.780290421848557e-05,
      "loss": 0.8973,
      "step": 3833
    },
    {
      "epoch": 0.5842730874733313,
      "grad_norm": 1.0546875,
      "learning_rate": 7.775478091928277e-05,
      "loss": 0.9605,
      "step": 3834
    },
    {
      "epoch": 0.5844254800365742,
      "grad_norm": 1.09375,
      "learning_rate": 7.770666303933525e-05,
      "loss": 1.0715,
      "step": 3835
    },
    {
      "epoch": 0.5845778725998171,
      "grad_norm": 0.8828125,
      "learning_rate": 7.765855059036518e-05,
      "loss": 0.8273,
      "step": 3836
    },
    {
      "epoch": 0.58473026516306,
      "grad_norm": 0.7734375,
      "learning_rate": 7.761044358409349e-05,
      "loss": 0.9644,
      "step": 3837
    },
    {
      "epoch": 0.584882657726303,
      "grad_norm": 0.90625,
      "learning_rate": 7.756234203223969e-05,
      "loss": 1.0179,
      "step": 3838
    },
    {
      "epoch": 0.5850350502895458,
      "grad_norm": 0.8984375,
      "learning_rate": 7.751424594652202e-05,
      "loss": 1.0669,
      "step": 3839
    },
    {
      "epoch": 0.5851874428527888,
      "grad_norm": 0.9296875,
      "learning_rate": 7.746615533865742e-05,
      "loss": 1.1961,
      "step": 3840
    },
    {
      "epoch": 0.5853398354160317,
      "grad_norm": 0.8046875,
      "learning_rate": 7.741807022036141e-05,
      "loss": 1.0308,
      "step": 3841
    },
    {
      "epoch": 0.5854922279792746,
      "grad_norm": 0.9296875,
      "learning_rate": 7.736999060334821e-05,
      "loss": 0.8862,
      "step": 3842
    },
    {
      "epoch": 0.5856446205425175,
      "grad_norm": 0.9765625,
      "learning_rate": 7.732191649933073e-05,
      "loss": 0.9539,
      "step": 3843
    },
    {
      "epoch": 0.5857970131057605,
      "grad_norm": 0.93359375,
      "learning_rate": 7.727384792002054e-05,
      "loss": 1.078,
      "step": 3844
    },
    {
      "epoch": 0.5859494056690033,
      "grad_norm": 1.1328125,
      "learning_rate": 7.722578487712776e-05,
      "loss": 1.0008,
      "step": 3845
    },
    {
      "epoch": 0.5861017982322463,
      "grad_norm": 1.03125,
      "learning_rate": 7.71777273823613e-05,
      "loss": 0.9802,
      "step": 3846
    },
    {
      "epoch": 0.5862541907954891,
      "grad_norm": 0.99609375,
      "learning_rate": 7.712967544742866e-05,
      "loss": 0.9192,
      "step": 3847
    },
    {
      "epoch": 0.5864065833587321,
      "grad_norm": 1.0234375,
      "learning_rate": 7.708162908403594e-05,
      "loss": 1.0296,
      "step": 3848
    },
    {
      "epoch": 0.586558975921975,
      "grad_norm": 0.83984375,
      "learning_rate": 7.703358830388795e-05,
      "loss": 0.9351,
      "step": 3849
    },
    {
      "epoch": 0.5867113684852179,
      "grad_norm": 0.74609375,
      "learning_rate": 7.698555311868812e-05,
      "loss": 0.8599,
      "step": 3850
    },
    {
      "epoch": 0.5868637610484608,
      "grad_norm": 0.890625,
      "learning_rate": 7.693752354013849e-05,
      "loss": 0.772,
      "step": 3851
    },
    {
      "epoch": 0.5870161536117038,
      "grad_norm": 1.0859375,
      "learning_rate": 7.688949957993978e-05,
      "loss": 0.8923,
      "step": 3852
    },
    {
      "epoch": 0.5871685461749466,
      "grad_norm": 0.828125,
      "learning_rate": 7.68414812497913e-05,
      "loss": 1.0355,
      "step": 3853
    },
    {
      "epoch": 0.5873209387381896,
      "grad_norm": 1.0546875,
      "learning_rate": 7.679346856139103e-05,
      "loss": 0.9209,
      "step": 3854
    },
    {
      "epoch": 0.5874733313014325,
      "grad_norm": 0.93359375,
      "learning_rate": 7.674546152643553e-05,
      "loss": 1.1258,
      "step": 3855
    },
    {
      "epoch": 0.5876257238646754,
      "grad_norm": 0.98046875,
      "learning_rate": 7.669746015661998e-05,
      "loss": 1.0751,
      "step": 3856
    },
    {
      "epoch": 0.5877781164279183,
      "grad_norm": 0.83984375,
      "learning_rate": 7.664946446363825e-05,
      "loss": 0.8803,
      "step": 3857
    },
    {
      "epoch": 0.5879305089911613,
      "grad_norm": 1.046875,
      "learning_rate": 7.660147445918279e-05,
      "loss": 0.9607,
      "step": 3858
    },
    {
      "epoch": 0.5880829015544041,
      "grad_norm": 1.2265625,
      "learning_rate": 7.655349015494458e-05,
      "loss": 0.9266,
      "step": 3859
    },
    {
      "epoch": 0.5882352941176471,
      "grad_norm": 0.9453125,
      "learning_rate": 7.650551156261337e-05,
      "loss": 0.9421,
      "step": 3860
    },
    {
      "epoch": 0.5883876866808899,
      "grad_norm": 0.91015625,
      "learning_rate": 7.64575386938774e-05,
      "loss": 0.8951,
      "step": 3861
    },
    {
      "epoch": 0.5885400792441329,
      "grad_norm": 0.99609375,
      "learning_rate": 7.640957156042354e-05,
      "loss": 1.0166,
      "step": 3862
    },
    {
      "epoch": 0.5886924718073758,
      "grad_norm": 0.8046875,
      "learning_rate": 7.636161017393729e-05,
      "loss": 0.88,
      "step": 3863
    },
    {
      "epoch": 0.5888448643706187,
      "grad_norm": 0.921875,
      "learning_rate": 7.631365454610273e-05,
      "loss": 0.8392,
      "step": 3864
    },
    {
      "epoch": 0.5889972569338616,
      "grad_norm": 0.97265625,
      "learning_rate": 7.626570468860252e-05,
      "loss": 0.9823,
      "step": 3865
    },
    {
      "epoch": 0.5891496494971046,
      "grad_norm": 0.87890625,
      "learning_rate": 7.621776061311797e-05,
      "loss": 0.8637,
      "step": 3866
    },
    {
      "epoch": 0.5893020420603474,
      "grad_norm": 1.109375,
      "learning_rate": 7.616982233132895e-05,
      "loss": 1.0838,
      "step": 3867
    },
    {
      "epoch": 0.5894544346235904,
      "grad_norm": 0.8203125,
      "learning_rate": 7.612188985491385e-05,
      "loss": 0.9032,
      "step": 3868
    },
    {
      "epoch": 0.5896068271868333,
      "grad_norm": 1.140625,
      "learning_rate": 7.607396319554978e-05,
      "loss": 1.0583,
      "step": 3869
    },
    {
      "epoch": 0.5897592197500762,
      "grad_norm": 0.7734375,
      "learning_rate": 7.602604236491231e-05,
      "loss": 0.7704,
      "step": 3870
    },
    {
      "epoch": 0.5899116123133191,
      "grad_norm": 0.7109375,
      "learning_rate": 7.597812737467572e-05,
      "loss": 0.9181,
      "step": 3871
    },
    {
      "epoch": 0.5900640048765621,
      "grad_norm": 0.71484375,
      "learning_rate": 7.593021823651272e-05,
      "loss": 0.8736,
      "step": 3872
    },
    {
      "epoch": 0.5902163974398049,
      "grad_norm": 0.94921875,
      "learning_rate": 7.588231496209466e-05,
      "loss": 1.055,
      "step": 3873
    },
    {
      "epoch": 0.5903687900030479,
      "grad_norm": 0.87109375,
      "learning_rate": 7.583441756309153e-05,
      "loss": 1.0755,
      "step": 3874
    },
    {
      "epoch": 0.5905211825662907,
      "grad_norm": 0.8828125,
      "learning_rate": 7.578652605117178e-05,
      "loss": 0.9889,
      "step": 3875
    },
    {
      "epoch": 0.5906735751295337,
      "grad_norm": 0.8828125,
      "learning_rate": 7.573864043800246e-05,
      "loss": 0.8489,
      "step": 3876
    },
    {
      "epoch": 0.5908259676927766,
      "grad_norm": 0.953125,
      "learning_rate": 7.569076073524924e-05,
      "loss": 0.9471,
      "step": 3877
    },
    {
      "epoch": 0.5909783602560195,
      "grad_norm": 1.359375,
      "learning_rate": 7.564288695457629e-05,
      "loss": 0.9843,
      "step": 3878
    },
    {
      "epoch": 0.5911307528192624,
      "grad_norm": 0.98046875,
      "learning_rate": 7.559501910764629e-05,
      "loss": 1.1527,
      "step": 3879
    },
    {
      "epoch": 0.5912831453825054,
      "grad_norm": 0.83203125,
      "learning_rate": 7.554715720612063e-05,
      "loss": 0.856,
      "step": 3880
    },
    {
      "epoch": 0.5914355379457482,
      "grad_norm": 1.0078125,
      "learning_rate": 7.54993012616591e-05,
      "loss": 0.9877,
      "step": 3881
    },
    {
      "epoch": 0.5915879305089912,
      "grad_norm": 0.98828125,
      "learning_rate": 7.54514512859201e-05,
      "loss": 0.9735,
      "step": 3882
    },
    {
      "epoch": 0.5917403230722341,
      "grad_norm": 1.3515625,
      "learning_rate": 7.540360729056058e-05,
      "loss": 1.2914,
      "step": 3883
    },
    {
      "epoch": 0.591892715635477,
      "grad_norm": 0.76953125,
      "learning_rate": 7.535576928723604e-05,
      "loss": 0.9659,
      "step": 3884
    },
    {
      "epoch": 0.5920451081987199,
      "grad_norm": 0.875,
      "learning_rate": 7.530793728760048e-05,
      "loss": 0.8839,
      "step": 3885
    },
    {
      "epoch": 0.5921975007619629,
      "grad_norm": 1.1015625,
      "learning_rate": 7.526011130330649e-05,
      "loss": 0.9153,
      "step": 3886
    },
    {
      "epoch": 0.5923498933252057,
      "grad_norm": 0.8359375,
      "learning_rate": 7.521229134600513e-05,
      "loss": 0.9236,
      "step": 3887
    },
    {
      "epoch": 0.5925022858884487,
      "grad_norm": 1.453125,
      "learning_rate": 7.516447742734607e-05,
      "loss": 1.0368,
      "step": 3888
    },
    {
      "epoch": 0.5926546784516915,
      "grad_norm": 0.94140625,
      "learning_rate": 7.511666955897745e-05,
      "loss": 0.9824,
      "step": 3889
    },
    {
      "epoch": 0.5928070710149345,
      "grad_norm": 0.95703125,
      "learning_rate": 7.506886775254593e-05,
      "loss": 1.0721,
      "step": 3890
    },
    {
      "epoch": 0.5929594635781774,
      "grad_norm": 0.76953125,
      "learning_rate": 7.502107201969678e-05,
      "loss": 0.9492,
      "step": 3891
    },
    {
      "epoch": 0.5931118561414203,
      "grad_norm": 0.8203125,
      "learning_rate": 7.497328237207368e-05,
      "loss": 1.0424,
      "step": 3892
    },
    {
      "epoch": 0.5932642487046632,
      "grad_norm": 1.1484375,
      "learning_rate": 7.492549882131885e-05,
      "loss": 0.93,
      "step": 3893
    },
    {
      "epoch": 0.5934166412679062,
      "grad_norm": 1.046875,
      "learning_rate": 7.487772137907312e-05,
      "loss": 0.9269,
      "step": 3894
    },
    {
      "epoch": 0.593569033831149,
      "grad_norm": 0.91015625,
      "learning_rate": 7.482995005697573e-05,
      "loss": 1.022,
      "step": 3895
    },
    {
      "epoch": 0.593721426394392,
      "grad_norm": 0.875,
      "learning_rate": 7.478218486666443e-05,
      "loss": 0.8445,
      "step": 3896
    },
    {
      "epoch": 0.5938738189576349,
      "grad_norm": 0.91015625,
      "learning_rate": 7.473442581977556e-05,
      "loss": 0.8804,
      "step": 3897
    },
    {
      "epoch": 0.5940262115208778,
      "grad_norm": 1.09375,
      "learning_rate": 7.468667292794389e-05,
      "loss": 0.9606,
      "step": 3898
    },
    {
      "epoch": 0.5941786040841207,
      "grad_norm": 1.46875,
      "learning_rate": 7.463892620280267e-05,
      "loss": 1.2645,
      "step": 3899
    },
    {
      "epoch": 0.5943309966473637,
      "grad_norm": 1.03125,
      "learning_rate": 7.459118565598376e-05,
      "loss": 0.9789,
      "step": 3900
    },
    {
      "epoch": 0.5944833892106065,
      "grad_norm": 0.81640625,
      "learning_rate": 7.454345129911739e-05,
      "loss": 0.9435,
      "step": 3901
    },
    {
      "epoch": 0.5946357817738495,
      "grad_norm": 0.9609375,
      "learning_rate": 7.449572314383237e-05,
      "loss": 1.2477,
      "step": 3902
    },
    {
      "epoch": 0.5947881743370923,
      "grad_norm": 0.83984375,
      "learning_rate": 7.444800120175594e-05,
      "loss": 0.7523,
      "step": 3903
    },
    {
      "epoch": 0.5949405669003353,
      "grad_norm": 1.0859375,
      "learning_rate": 7.440028548451386e-05,
      "loss": 0.8926,
      "step": 3904
    },
    {
      "epoch": 0.5950929594635782,
      "grad_norm": 0.90234375,
      "learning_rate": 7.435257600373039e-05,
      "loss": 1.1871,
      "step": 3905
    },
    {
      "epoch": 0.595245352026821,
      "grad_norm": 0.78515625,
      "learning_rate": 7.43048727710282e-05,
      "loss": 0.97,
      "step": 3906
    },
    {
      "epoch": 0.595397744590064,
      "grad_norm": 0.77734375,
      "learning_rate": 7.42571757980285e-05,
      "loss": 0.7566,
      "step": 3907
    },
    {
      "epoch": 0.595550137153307,
      "grad_norm": 0.921875,
      "learning_rate": 7.420948509635097e-05,
      "loss": 0.956,
      "step": 3908
    },
    {
      "epoch": 0.5957025297165498,
      "grad_norm": 0.89453125,
      "learning_rate": 7.416180067761377e-05,
      "loss": 1.0305,
      "step": 3909
    },
    {
      "epoch": 0.5958549222797928,
      "grad_norm": 0.77734375,
      "learning_rate": 7.411412255343344e-05,
      "loss": 1.021,
      "step": 3910
    },
    {
      "epoch": 0.5960073148430357,
      "grad_norm": 1.0234375,
      "learning_rate": 7.406645073542514e-05,
      "loss": 0.9878,
      "step": 3911
    },
    {
      "epoch": 0.5961597074062785,
      "grad_norm": 0.79296875,
      "learning_rate": 7.401878523520236e-05,
      "loss": 0.939,
      "step": 3912
    },
    {
      "epoch": 0.5963120999695215,
      "grad_norm": 0.83984375,
      "learning_rate": 7.39711260643771e-05,
      "loss": 0.7252,
      "step": 3913
    },
    {
      "epoch": 0.5964644925327645,
      "grad_norm": 0.92578125,
      "learning_rate": 7.392347323455986e-05,
      "loss": 1.183,
      "step": 3914
    },
    {
      "epoch": 0.5966168850960073,
      "grad_norm": 1.2265625,
      "learning_rate": 7.38758267573595e-05,
      "loss": 1.0203,
      "step": 3915
    },
    {
      "epoch": 0.5967692776592503,
      "grad_norm": 0.9453125,
      "learning_rate": 7.382818664438342e-05,
      "loss": 0.9833,
      "step": 3916
    },
    {
      "epoch": 0.5969216702224931,
      "grad_norm": 0.765625,
      "learning_rate": 7.378055290723744e-05,
      "loss": 0.9311,
      "step": 3917
    },
    {
      "epoch": 0.597074062785736,
      "grad_norm": 1.0234375,
      "learning_rate": 7.37329255575258e-05,
      "loss": 1.1494,
      "step": 3918
    },
    {
      "epoch": 0.597226455348979,
      "grad_norm": 1.109375,
      "learning_rate": 7.368530460685124e-05,
      "loss": 1.0384,
      "step": 3919
    },
    {
      "epoch": 0.5973788479122218,
      "grad_norm": 0.859375,
      "learning_rate": 7.363769006681489e-05,
      "loss": 1.0286,
      "step": 3920
    },
    {
      "epoch": 0.5975312404754648,
      "grad_norm": 1.1796875,
      "learning_rate": 7.359008194901632e-05,
      "loss": 1.0647,
      "step": 3921
    },
    {
      "epoch": 0.5976836330387078,
      "grad_norm": 1.2109375,
      "learning_rate": 7.354248026505359e-05,
      "loss": 1.1266,
      "step": 3922
    },
    {
      "epoch": 0.5978360256019506,
      "grad_norm": 0.8828125,
      "learning_rate": 7.349488502652313e-05,
      "loss": 0.9542,
      "step": 3923
    },
    {
      "epoch": 0.5979884181651935,
      "grad_norm": 0.8046875,
      "learning_rate": 7.34472962450198e-05,
      "loss": 0.825,
      "step": 3924
    },
    {
      "epoch": 0.5981408107284365,
      "grad_norm": 1.03125,
      "learning_rate": 7.339971393213695e-05,
      "loss": 1.0699,
      "step": 3925
    },
    {
      "epoch": 0.5982932032916793,
      "grad_norm": 0.859375,
      "learning_rate": 7.335213809946634e-05,
      "loss": 0.8812,
      "step": 3926
    },
    {
      "epoch": 0.5984455958549223,
      "grad_norm": 0.87890625,
      "learning_rate": 7.330456875859805e-05,
      "loss": 1.0283,
      "step": 3927
    },
    {
      "epoch": 0.5985979884181651,
      "grad_norm": 1.0,
      "learning_rate": 7.325700592112072e-05,
      "loss": 0.973,
      "step": 3928
    },
    {
      "epoch": 0.5987503809814081,
      "grad_norm": 0.94921875,
      "learning_rate": 7.320944959862132e-05,
      "loss": 0.9973,
      "step": 3929
    },
    {
      "epoch": 0.598902773544651,
      "grad_norm": 0.8515625,
      "learning_rate": 7.316189980268524e-05,
      "loss": 0.9284,
      "step": 3930
    },
    {
      "epoch": 0.5990551661078939,
      "grad_norm": 0.87109375,
      "learning_rate": 7.311435654489631e-05,
      "loss": 0.9561,
      "step": 3931
    },
    {
      "epoch": 0.5992075586711368,
      "grad_norm": 0.84765625,
      "learning_rate": 7.306681983683676e-05,
      "loss": 0.9312,
      "step": 3932
    },
    {
      "epoch": 0.5993599512343798,
      "grad_norm": 1.21875,
      "learning_rate": 7.301928969008717e-05,
      "loss": 0.9758,
      "step": 3933
    },
    {
      "epoch": 0.5995123437976226,
      "grad_norm": 0.921875,
      "learning_rate": 7.297176611622664e-05,
      "loss": 1.0254,
      "step": 3934
    },
    {
      "epoch": 0.5996647363608656,
      "grad_norm": 0.9140625,
      "learning_rate": 7.292424912683253e-05,
      "loss": 1.0044,
      "step": 3935
    },
    {
      "epoch": 0.5998171289241085,
      "grad_norm": 0.62890625,
      "learning_rate": 7.28767387334807e-05,
      "loss": 0.781,
      "step": 3936
    },
    {
      "epoch": 0.5999695214873514,
      "grad_norm": 1.0078125,
      "learning_rate": 7.282923494774537e-05,
      "loss": 0.8754,
      "step": 3937
    },
    {
      "epoch": 0.6001219140505943,
      "grad_norm": 1.140625,
      "learning_rate": 7.27817377811991e-05,
      "loss": 0.8132,
      "step": 3938
    },
    {
      "epoch": 0.6002743066138373,
      "grad_norm": 0.546875,
      "learning_rate": 7.273424724541293e-05,
      "loss": 0.7118,
      "step": 3939
    },
    {
      "epoch": 0.6004266991770801,
      "grad_norm": 0.84765625,
      "learning_rate": 7.268676335195623e-05,
      "loss": 0.95,
      "step": 3940
    },
    {
      "epoch": 0.6005790917403231,
      "grad_norm": 1.390625,
      "learning_rate": 7.263928611239672e-05,
      "loss": 0.9423,
      "step": 3941
    },
    {
      "epoch": 0.6007314843035659,
      "grad_norm": 0.84765625,
      "learning_rate": 7.25918155383006e-05,
      "loss": 0.9581,
      "step": 3942
    },
    {
      "epoch": 0.6008838768668089,
      "grad_norm": 0.9375,
      "learning_rate": 7.254435164123234e-05,
      "loss": 0.9653,
      "step": 3943
    },
    {
      "epoch": 0.6010362694300518,
      "grad_norm": 0.88671875,
      "learning_rate": 7.249689443275482e-05,
      "loss": 0.832,
      "step": 3944
    },
    {
      "epoch": 0.6011886619932947,
      "grad_norm": 1.0625,
      "learning_rate": 7.244944392442935e-05,
      "loss": 0.9651,
      "step": 3945
    },
    {
      "epoch": 0.6013410545565376,
      "grad_norm": 0.8984375,
      "learning_rate": 7.240200012781551e-05,
      "loss": 1.058,
      "step": 3946
    },
    {
      "epoch": 0.6014934471197806,
      "grad_norm": 1.1953125,
      "learning_rate": 7.235456305447129e-05,
      "loss": 1.0359,
      "step": 3947
    },
    {
      "epoch": 0.6016458396830234,
      "grad_norm": 0.734375,
      "learning_rate": 7.230713271595307e-05,
      "loss": 0.8015,
      "step": 3948
    },
    {
      "epoch": 0.6017982322462664,
      "grad_norm": 0.91796875,
      "learning_rate": 7.225970912381556e-05,
      "loss": 0.9864,
      "step": 3949
    },
    {
      "epoch": 0.6019506248095093,
      "grad_norm": 1.03125,
      "learning_rate": 7.221229228961179e-05,
      "loss": 1.1419,
      "step": 3950
    },
    {
      "epoch": 0.6021030173727522,
      "grad_norm": 0.921875,
      "learning_rate": 7.216488222489325e-05,
      "loss": 1.0426,
      "step": 3951
    },
    {
      "epoch": 0.6022554099359951,
      "grad_norm": 0.78125,
      "learning_rate": 7.211747894120964e-05,
      "loss": 0.8224,
      "step": 3952
    },
    {
      "epoch": 0.6024078024992381,
      "grad_norm": 0.9609375,
      "learning_rate": 7.207008245010915e-05,
      "loss": 1.2136,
      "step": 3953
    },
    {
      "epoch": 0.6025601950624809,
      "grad_norm": 0.70703125,
      "learning_rate": 7.20226927631382e-05,
      "loss": 0.8924,
      "step": 3954
    },
    {
      "epoch": 0.6027125876257239,
      "grad_norm": 1.03125,
      "learning_rate": 7.197530989184161e-05,
      "loss": 1.0629,
      "step": 3955
    },
    {
      "epoch": 0.6028649801889667,
      "grad_norm": 0.91015625,
      "learning_rate": 7.192793384776255e-05,
      "loss": 1.038,
      "step": 3956
    },
    {
      "epoch": 0.6030173727522097,
      "grad_norm": 1.296875,
      "learning_rate": 7.188056464244249e-05,
      "loss": 0.8511,
      "step": 3957
    },
    {
      "epoch": 0.6031697653154526,
      "grad_norm": 0.9765625,
      "learning_rate": 7.183320228742122e-05,
      "loss": 1.1172,
      "step": 3958
    },
    {
      "epoch": 0.6033221578786955,
      "grad_norm": 0.77734375,
      "learning_rate": 7.178584679423695e-05,
      "loss": 0.8936,
      "step": 3959
    },
    {
      "epoch": 0.6034745504419384,
      "grad_norm": 0.671875,
      "learning_rate": 7.17384981744261e-05,
      "loss": 0.993,
      "step": 3960
    },
    {
      "epoch": 0.6036269430051814,
      "grad_norm": 0.91796875,
      "learning_rate": 7.169115643952351e-05,
      "loss": 1.1321,
      "step": 3961
    },
    {
      "epoch": 0.6037793355684242,
      "grad_norm": 1.078125,
      "learning_rate": 7.164382160106231e-05,
      "loss": 0.9836,
      "step": 3962
    },
    {
      "epoch": 0.6039317281316672,
      "grad_norm": 0.9609375,
      "learning_rate": 7.159649367057395e-05,
      "loss": 0.9716,
      "step": 3963
    },
    {
      "epoch": 0.6040841206949101,
      "grad_norm": 1.0703125,
      "learning_rate": 7.154917265958814e-05,
      "loss": 0.8576,
      "step": 3964
    },
    {
      "epoch": 0.604236513258153,
      "grad_norm": 0.9453125,
      "learning_rate": 7.150185857963303e-05,
      "loss": 1.0519,
      "step": 3965
    },
    {
      "epoch": 0.6043889058213959,
      "grad_norm": 0.88671875,
      "learning_rate": 7.145455144223496e-05,
      "loss": 1.0349,
      "step": 3966
    },
    {
      "epoch": 0.6045412983846389,
      "grad_norm": 1.375,
      "learning_rate": 7.140725125891868e-05,
      "loss": 1.0207,
      "step": 3967
    },
    {
      "epoch": 0.6046936909478817,
      "grad_norm": 0.91015625,
      "learning_rate": 7.135995804120715e-05,
      "loss": 0.8857,
      "step": 3968
    },
    {
      "epoch": 0.6048460835111247,
      "grad_norm": 0.80078125,
      "learning_rate": 7.131267180062168e-05,
      "loss": 0.8689,
      "step": 3969
    },
    {
      "epoch": 0.6049984760743675,
      "grad_norm": 0.96484375,
      "learning_rate": 7.12653925486819e-05,
      "loss": 0.9444,
      "step": 3970
    },
    {
      "epoch": 0.6051508686376105,
      "grad_norm": 0.9375,
      "learning_rate": 7.121812029690572e-05,
      "loss": 1.0025,
      "step": 3971
    },
    {
      "epoch": 0.6053032612008534,
      "grad_norm": 0.66796875,
      "learning_rate": 7.11708550568093e-05,
      "loss": 0.9248,
      "step": 3972
    },
    {
      "epoch": 0.6054556537640963,
      "grad_norm": 1.109375,
      "learning_rate": 7.11235968399072e-05,
      "loss": 1.0393,
      "step": 3973
    },
    {
      "epoch": 0.6056080463273392,
      "grad_norm": 0.96484375,
      "learning_rate": 7.107634565771212e-05,
      "loss": 0.9511,
      "step": 3974
    },
    {
      "epoch": 0.6057604388905822,
      "grad_norm": 0.7890625,
      "learning_rate": 7.102910152173517e-05,
      "loss": 0.8437,
      "step": 3975
    },
    {
      "epoch": 0.605912831453825,
      "grad_norm": 0.86328125,
      "learning_rate": 7.098186444348571e-05,
      "loss": 0.9844,
      "step": 3976
    },
    {
      "epoch": 0.606065224017068,
      "grad_norm": 0.78515625,
      "learning_rate": 7.093463443447137e-05,
      "loss": 0.7285,
      "step": 3977
    },
    {
      "epoch": 0.6062176165803109,
      "grad_norm": 0.9765625,
      "learning_rate": 7.088741150619803e-05,
      "loss": 0.8267,
      "step": 3978
    },
    {
      "epoch": 0.6063700091435538,
      "grad_norm": 0.89453125,
      "learning_rate": 7.08401956701699e-05,
      "loss": 0.9363,
      "step": 3979
    },
    {
      "epoch": 0.6065224017067967,
      "grad_norm": 0.9921875,
      "learning_rate": 7.079298693788945e-05,
      "loss": 0.982,
      "step": 3980
    },
    {
      "epoch": 0.6066747942700397,
      "grad_norm": 1.0859375,
      "learning_rate": 7.074578532085736e-05,
      "loss": 1.0229,
      "step": 3981
    },
    {
      "epoch": 0.6068271868332825,
      "grad_norm": 2.59375,
      "learning_rate": 7.069859083057266e-05,
      "loss": 1.0282,
      "step": 3982
    },
    {
      "epoch": 0.6069795793965255,
      "grad_norm": 0.9296875,
      "learning_rate": 7.065140347853258e-05,
      "loss": 1.0143,
      "step": 3983
    },
    {
      "epoch": 0.6071319719597683,
      "grad_norm": 1.0078125,
      "learning_rate": 7.060422327623267e-05,
      "loss": 0.9154,
      "step": 3984
    },
    {
      "epoch": 0.6072843645230113,
      "grad_norm": 0.76171875,
      "learning_rate": 7.05570502351667e-05,
      "loss": 0.9234,
      "step": 3985
    },
    {
      "epoch": 0.6074367570862542,
      "grad_norm": 1.4140625,
      "learning_rate": 7.050988436682666e-05,
      "loss": 0.9002,
      "step": 3986
    },
    {
      "epoch": 0.6075891496494971,
      "grad_norm": 1.03125,
      "learning_rate": 7.046272568270288e-05,
      "loss": 1.079,
      "step": 3987
    },
    {
      "epoch": 0.60774154221274,
      "grad_norm": 1.03125,
      "learning_rate": 7.041557419428389e-05,
      "loss": 1.048,
      "step": 3988
    },
    {
      "epoch": 0.607893934775983,
      "grad_norm": 0.84765625,
      "learning_rate": 7.036842991305644e-05,
      "loss": 0.8781,
      "step": 3989
    },
    {
      "epoch": 0.6080463273392258,
      "grad_norm": 1.1484375,
      "learning_rate": 7.032129285050557e-05,
      "loss": 0.9433,
      "step": 3990
    },
    {
      "epoch": 0.6081987199024688,
      "grad_norm": 0.9296875,
      "learning_rate": 7.027416301811456e-05,
      "loss": 1.0532,
      "step": 3991
    },
    {
      "epoch": 0.6083511124657117,
      "grad_norm": 1.1171875,
      "learning_rate": 7.02270404273649e-05,
      "loss": 0.9314,
      "step": 3992
    },
    {
      "epoch": 0.6085035050289546,
      "grad_norm": 1.28125,
      "learning_rate": 7.017992508973635e-05,
      "loss": 1.0326,
      "step": 3993
    },
    {
      "epoch": 0.6086558975921975,
      "grad_norm": 0.89453125,
      "learning_rate": 7.013281701670684e-05,
      "loss": 0.9267,
      "step": 3994
    },
    {
      "epoch": 0.6088082901554405,
      "grad_norm": 0.84765625,
      "learning_rate": 7.008571621975262e-05,
      "loss": 0.8763,
      "step": 3995
    },
    {
      "epoch": 0.6089606827186833,
      "grad_norm": 1.09375,
      "learning_rate": 7.00386227103481e-05,
      "loss": 1.0264,
      "step": 3996
    },
    {
      "epoch": 0.6091130752819263,
      "grad_norm": 0.8359375,
      "learning_rate": 6.999153649996595e-05,
      "loss": 0.8562,
      "step": 3997
    },
    {
      "epoch": 0.6092654678451691,
      "grad_norm": 0.87890625,
      "learning_rate": 6.994445760007702e-05,
      "loss": 0.8381,
      "step": 3998
    },
    {
      "epoch": 0.6094178604084121,
      "grad_norm": 1.0,
      "learning_rate": 6.989738602215044e-05,
      "loss": 0.8984,
      "step": 3999
    },
    {
      "epoch": 0.609570252971655,
      "grad_norm": 0.703125,
      "learning_rate": 6.985032177765348e-05,
      "loss": 0.917,
      "step": 4000
    },
    {
      "epoch": 0.6097226455348979,
      "grad_norm": 1.171875,
      "learning_rate": 6.980326487805174e-05,
      "loss": 1.0559,
      "step": 4001
    },
    {
      "epoch": 0.6098750380981408,
      "grad_norm": 0.8046875,
      "learning_rate": 6.975621533480888e-05,
      "loss": 0.8656,
      "step": 4002
    },
    {
      "epoch": 0.6100274306613838,
      "grad_norm": 0.7578125,
      "learning_rate": 6.970917315938687e-05,
      "loss": 0.8404,
      "step": 4003
    },
    {
      "epoch": 0.6101798232246266,
      "grad_norm": 0.8828125,
      "learning_rate": 6.966213836324591e-05,
      "loss": 0.9891,
      "step": 4004
    },
    {
      "epoch": 0.6103322157878696,
      "grad_norm": 1.0625,
      "learning_rate": 6.961511095784429e-05,
      "loss": 0.8489,
      "step": 4005
    },
    {
      "epoch": 0.6104846083511125,
      "grad_norm": 0.9921875,
      "learning_rate": 6.956809095463856e-05,
      "loss": 0.9807,
      "step": 4006
    },
    {
      "epoch": 0.6106370009143554,
      "grad_norm": 0.953125,
      "learning_rate": 6.952107836508352e-05,
      "loss": 0.9944,
      "step": 4007
    },
    {
      "epoch": 0.6107893934775983,
      "grad_norm": 0.9453125,
      "learning_rate": 6.947407320063209e-05,
      "loss": 0.9524,
      "step": 4008
    },
    {
      "epoch": 0.6109417860408413,
      "grad_norm": 0.7890625,
      "learning_rate": 6.942707547273537e-05,
      "loss": 0.8872,
      "step": 4009
    },
    {
      "epoch": 0.6110941786040841,
      "grad_norm": 0.9609375,
      "learning_rate": 6.938008519284273e-05,
      "loss": 0.9363,
      "step": 4010
    },
    {
      "epoch": 0.6112465711673271,
      "grad_norm": 0.94921875,
      "learning_rate": 6.933310237240167e-05,
      "loss": 0.9529,
      "step": 4011
    },
    {
      "epoch": 0.6113989637305699,
      "grad_norm": 0.77734375,
      "learning_rate": 6.928612702285785e-05,
      "loss": 0.7765,
      "step": 4012
    },
    {
      "epoch": 0.6115513562938129,
      "grad_norm": 0.8125,
      "learning_rate": 6.923915915565517e-05,
      "loss": 1.0225,
      "step": 4013
    },
    {
      "epoch": 0.6117037488570558,
      "grad_norm": 0.74609375,
      "learning_rate": 6.919219878223568e-05,
      "loss": 0.8042,
      "step": 4014
    },
    {
      "epoch": 0.6118561414202986,
      "grad_norm": 0.69921875,
      "learning_rate": 6.914524591403957e-05,
      "loss": 0.8171,
      "step": 4015
    },
    {
      "epoch": 0.6120085339835416,
      "grad_norm": 0.90625,
      "learning_rate": 6.909830056250527e-05,
      "loss": 1.0006,
      "step": 4016
    },
    {
      "epoch": 0.6121609265467846,
      "grad_norm": 0.90234375,
      "learning_rate": 6.90513627390693e-05,
      "loss": 1.0267,
      "step": 4017
    },
    {
      "epoch": 0.6123133191100274,
      "grad_norm": 1.109375,
      "learning_rate": 6.900443245516646e-05,
      "loss": 1.0635,
      "step": 4018
    },
    {
      "epoch": 0.6124657116732704,
      "grad_norm": 1.0859375,
      "learning_rate": 6.895750972222958e-05,
      "loss": 1.2096,
      "step": 4019
    },
    {
      "epoch": 0.6126181042365133,
      "grad_norm": 0.71484375,
      "learning_rate": 6.891059455168972e-05,
      "loss": 1.0048,
      "step": 4020
    },
    {
      "epoch": 0.6127704967997561,
      "grad_norm": 1.0859375,
      "learning_rate": 6.886368695497609e-05,
      "loss": 0.9087,
      "step": 4021
    },
    {
      "epoch": 0.6129228893629991,
      "grad_norm": 1.390625,
      "learning_rate": 6.88167869435161e-05,
      "loss": 0.9535,
      "step": 4022
    },
    {
      "epoch": 0.6130752819262419,
      "grad_norm": 0.859375,
      "learning_rate": 6.87698945287352e-05,
      "loss": 0.9352,
      "step": 4023
    },
    {
      "epoch": 0.6132276744894849,
      "grad_norm": 0.8359375,
      "learning_rate": 6.872300972205712e-05,
      "loss": 1.0744,
      "step": 4024
    },
    {
      "epoch": 0.6133800670527279,
      "grad_norm": 1.0390625,
      "learning_rate": 6.867613253490364e-05,
      "loss": 0.9311,
      "step": 4025
    },
    {
      "epoch": 0.6135324596159707,
      "grad_norm": 0.75,
      "learning_rate": 6.862926297869468e-05,
      "loss": 0.9167,
      "step": 4026
    },
    {
      "epoch": 0.6136848521792136,
      "grad_norm": 1.1640625,
      "learning_rate": 6.858240106484841e-05,
      "loss": 0.935,
      "step": 4027
    },
    {
      "epoch": 0.6138372447424566,
      "grad_norm": 0.85546875,
      "learning_rate": 6.853554680478105e-05,
      "loss": 1.0715,
      "step": 4028
    },
    {
      "epoch": 0.6139896373056994,
      "grad_norm": 0.73828125,
      "learning_rate": 6.848870020990691e-05,
      "loss": 1.0145,
      "step": 4029
    },
    {
      "epoch": 0.6141420298689424,
      "grad_norm": 0.828125,
      "learning_rate": 6.844186129163859e-05,
      "loss": 0.9256,
      "step": 4030
    },
    {
      "epoch": 0.6142944224321853,
      "grad_norm": 1.1640625,
      "learning_rate": 6.839503006138663e-05,
      "loss": 1.1668,
      "step": 4031
    },
    {
      "epoch": 0.6144468149954282,
      "grad_norm": 1.0078125,
      "learning_rate": 6.834820653055987e-05,
      "loss": 0.8716,
      "step": 4032
    },
    {
      "epoch": 0.6145992075586711,
      "grad_norm": 0.9765625,
      "learning_rate": 6.830139071056513e-05,
      "loss": 1.0509,
      "step": 4033
    },
    {
      "epoch": 0.6147516001219141,
      "grad_norm": 0.90234375,
      "learning_rate": 6.825458261280746e-05,
      "loss": 1.0075,
      "step": 4034
    },
    {
      "epoch": 0.6149039926851569,
      "grad_norm": 1.0546875,
      "learning_rate": 6.820778224868998e-05,
      "loss": 0.8186,
      "step": 4035
    },
    {
      "epoch": 0.6150563852483999,
      "grad_norm": 0.8671875,
      "learning_rate": 6.816098962961393e-05,
      "loss": 1.0305,
      "step": 4036
    },
    {
      "epoch": 0.6152087778116427,
      "grad_norm": 0.91796875,
      "learning_rate": 6.811420476697863e-05,
      "loss": 0.9822,
      "step": 4037
    },
    {
      "epoch": 0.6153611703748857,
      "grad_norm": 0.85546875,
      "learning_rate": 6.806742767218159e-05,
      "loss": 1.0543,
      "step": 4038
    },
    {
      "epoch": 0.6155135629381286,
      "grad_norm": 0.796875,
      "learning_rate": 6.80206583566184e-05,
      "loss": 0.9359,
      "step": 4039
    },
    {
      "epoch": 0.6156659555013715,
      "grad_norm": 1.1796875,
      "learning_rate": 6.797389683168264e-05,
      "loss": 0.9975,
      "step": 4040
    },
    {
      "epoch": 0.6158183480646144,
      "grad_norm": 0.86328125,
      "learning_rate": 6.79271431087662e-05,
      "loss": 1.0435,
      "step": 4041
    },
    {
      "epoch": 0.6159707406278574,
      "grad_norm": 0.8984375,
      "learning_rate": 6.788039719925891e-05,
      "loss": 0.9451,
      "step": 4042
    },
    {
      "epoch": 0.6161231331911002,
      "grad_norm": 0.984375,
      "learning_rate": 6.783365911454875e-05,
      "loss": 0.7251,
      "step": 4043
    },
    {
      "epoch": 0.6162755257543432,
      "grad_norm": 0.7578125,
      "learning_rate": 6.778692886602178e-05,
      "loss": 1.0392,
      "step": 4044
    },
    {
      "epoch": 0.6164279183175861,
      "grad_norm": 0.9296875,
      "learning_rate": 6.774020646506222e-05,
      "loss": 1.0822,
      "step": 4045
    },
    {
      "epoch": 0.616580310880829,
      "grad_norm": 1.109375,
      "learning_rate": 6.769349192305218e-05,
      "loss": 1.112,
      "step": 4046
    },
    {
      "epoch": 0.6167327034440719,
      "grad_norm": 1.1171875,
      "learning_rate": 6.764678525137214e-05,
      "loss": 1.1057,
      "step": 4047
    },
    {
      "epoch": 0.6168850960073149,
      "grad_norm": 0.79296875,
      "learning_rate": 6.760008646140043e-05,
      "loss": 0.9882,
      "step": 4048
    },
    {
      "epoch": 0.6170374885705577,
      "grad_norm": 0.88671875,
      "learning_rate": 6.755339556451361e-05,
      "loss": 0.8414,
      "step": 4049
    },
    {
      "epoch": 0.6171898811338007,
      "grad_norm": 0.8046875,
      "learning_rate": 6.750671257208623e-05,
      "loss": 1.0684,
      "step": 4050
    },
    {
      "epoch": 0.6173422736970435,
      "grad_norm": 0.59765625,
      "learning_rate": 6.746003749549089e-05,
      "loss": 0.9017,
      "step": 4051
    },
    {
      "epoch": 0.6174946662602865,
      "grad_norm": 1.0234375,
      "learning_rate": 6.741337034609838e-05,
      "loss": 1.0801,
      "step": 4052
    },
    {
      "epoch": 0.6176470588235294,
      "grad_norm": 1.0625,
      "learning_rate": 6.736671113527745e-05,
      "loss": 1.0809,
      "step": 4053
    },
    {
      "epoch": 0.6177994513867723,
      "grad_norm": 1.5078125,
      "learning_rate": 6.732005987439494e-05,
      "loss": 1.1487,
      "step": 4054
    },
    {
      "epoch": 0.6179518439500152,
      "grad_norm": 0.9296875,
      "learning_rate": 6.727341657481581e-05,
      "loss": 1.0457,
      "step": 4055
    },
    {
      "epoch": 0.6181042365132582,
      "grad_norm": 1.078125,
      "learning_rate": 6.722678124790304e-05,
      "loss": 0.9764,
      "step": 4056
    },
    {
      "epoch": 0.618256629076501,
      "grad_norm": 1.265625,
      "learning_rate": 6.71801539050176e-05,
      "loss": 0.9304,
      "step": 4057
    },
    {
      "epoch": 0.618409021639744,
      "grad_norm": 0.953125,
      "learning_rate": 6.713353455751866e-05,
      "loss": 1.0785,
      "step": 4058
    },
    {
      "epoch": 0.6185614142029869,
      "grad_norm": 0.91796875,
      "learning_rate": 6.708692321676335e-05,
      "loss": 0.8593,
      "step": 4059
    },
    {
      "epoch": 0.6187138067662298,
      "grad_norm": 1.09375,
      "learning_rate": 6.704031989410678e-05,
      "loss": 0.9896,
      "step": 4060
    },
    {
      "epoch": 0.6188661993294727,
      "grad_norm": 1.0234375,
      "learning_rate": 6.69937246009023e-05,
      "loss": 1.0723,
      "step": 4061
    },
    {
      "epoch": 0.6190185918927157,
      "grad_norm": 1.0,
      "learning_rate": 6.694713734850115e-05,
      "loss": 1.0491,
      "step": 4062
    },
    {
      "epoch": 0.6191709844559585,
      "grad_norm": 0.99609375,
      "learning_rate": 6.69005581482526e-05,
      "loss": 0.9682,
      "step": 4063
    },
    {
      "epoch": 0.6193233770192015,
      "grad_norm": 0.859375,
      "learning_rate": 6.685398701150411e-05,
      "loss": 0.9138,
      "step": 4064
    },
    {
      "epoch": 0.6194757695824443,
      "grad_norm": 0.81640625,
      "learning_rate": 6.6807423949601e-05,
      "loss": 1.0322,
      "step": 4065
    },
    {
      "epoch": 0.6196281621456873,
      "grad_norm": 1.0078125,
      "learning_rate": 6.676086897388676e-05,
      "loss": 1.038,
      "step": 4066
    },
    {
      "epoch": 0.6197805547089302,
      "grad_norm": 1.21875,
      "learning_rate": 6.671432209570284e-05,
      "loss": 1.1604,
      "step": 4067
    },
    {
      "epoch": 0.6199329472721731,
      "grad_norm": 1.171875,
      "learning_rate": 6.666778332638866e-05,
      "loss": 0.892,
      "step": 4068
    },
    {
      "epoch": 0.620085339835416,
      "grad_norm": 1.21875,
      "learning_rate": 6.662125267728183e-05,
      "loss": 1.0183,
      "step": 4069
    },
    {
      "epoch": 0.620237732398659,
      "grad_norm": 0.89453125,
      "learning_rate": 6.657473015971785e-05,
      "loss": 1.0348,
      "step": 4070
    },
    {
      "epoch": 0.6203901249619018,
      "grad_norm": 1.4296875,
      "learning_rate": 6.652821578503022e-05,
      "loss": 1.0394,
      "step": 4071
    },
    {
      "epoch": 0.6205425175251448,
      "grad_norm": 1.015625,
      "learning_rate": 6.648170956455059e-05,
      "loss": 0.964,
      "step": 4072
    },
    {
      "epoch": 0.6206949100883877,
      "grad_norm": 0.984375,
      "learning_rate": 6.643521150960854e-05,
      "loss": 0.9798,
      "step": 4073
    },
    {
      "epoch": 0.6208473026516306,
      "grad_norm": 0.875,
      "learning_rate": 6.638872163153158e-05,
      "loss": 0.835,
      "step": 4074
    },
    {
      "epoch": 0.6209996952148735,
      "grad_norm": 0.953125,
      "learning_rate": 6.63422399416454e-05,
      "loss": 1.16,
      "step": 4075
    },
    {
      "epoch": 0.6211520877781165,
      "grad_norm": 0.796875,
      "learning_rate": 6.62957664512736e-05,
      "loss": 0.8307,
      "step": 4076
    },
    {
      "epoch": 0.6213044803413593,
      "grad_norm": 0.9375,
      "learning_rate": 6.62493011717377e-05,
      "loss": 1.112,
      "step": 4077
    },
    {
      "epoch": 0.6214568729046023,
      "grad_norm": 1.0078125,
      "learning_rate": 6.620284411435745e-05,
      "loss": 1.0052,
      "step": 4078
    },
    {
      "epoch": 0.6216092654678451,
      "grad_norm": 0.8125,
      "learning_rate": 6.615639529045036e-05,
      "loss": 0.9884,
      "step": 4079
    },
    {
      "epoch": 0.6217616580310881,
      "grad_norm": 1.1953125,
      "learning_rate": 6.610995471133203e-05,
      "loss": 1.2262,
      "step": 4080
    },
    {
      "epoch": 0.621914050594331,
      "grad_norm": 0.94921875,
      "learning_rate": 6.60635223883161e-05,
      "loss": 1.0948,
      "step": 4081
    },
    {
      "epoch": 0.6220664431575739,
      "grad_norm": 0.8046875,
      "learning_rate": 6.601709833271412e-05,
      "loss": 1.0106,
      "step": 4082
    },
    {
      "epoch": 0.6222188357208168,
      "grad_norm": 1.2109375,
      "learning_rate": 6.59706825558357e-05,
      "loss": 0.9223,
      "step": 4083
    },
    {
      "epoch": 0.6223712282840598,
      "grad_norm": 0.91015625,
      "learning_rate": 6.592427506898835e-05,
      "loss": 1.0246,
      "step": 4084
    },
    {
      "epoch": 0.6225236208473026,
      "grad_norm": 1.1640625,
      "learning_rate": 6.587787588347758e-05,
      "loss": 0.987,
      "step": 4085
    },
    {
      "epoch": 0.6226760134105456,
      "grad_norm": 0.87109375,
      "learning_rate": 6.583148501060697e-05,
      "loss": 0.9865,
      "step": 4086
    },
    {
      "epoch": 0.6228284059737885,
      "grad_norm": 0.9140625,
      "learning_rate": 6.578510246167797e-05,
      "loss": 0.9351,
      "step": 4087
    },
    {
      "epoch": 0.6229807985370314,
      "grad_norm": 1.1484375,
      "learning_rate": 6.573872824798997e-05,
      "loss": 1.053,
      "step": 4088
    },
    {
      "epoch": 0.6231331911002743,
      "grad_norm": 0.8984375,
      "learning_rate": 6.569236238084051e-05,
      "loss": 0.9342,
      "step": 4089
    },
    {
      "epoch": 0.6232855836635173,
      "grad_norm": 0.9140625,
      "learning_rate": 6.564600487152492e-05,
      "loss": 0.911,
      "step": 4090
    },
    {
      "epoch": 0.6234379762267601,
      "grad_norm": 0.82421875,
      "learning_rate": 6.559965573133653e-05,
      "loss": 0.7655,
      "step": 4091
    },
    {
      "epoch": 0.6235903687900031,
      "grad_norm": 1.1328125,
      "learning_rate": 6.555331497156672e-05,
      "loss": 1.0779,
      "step": 4092
    },
    {
      "epoch": 0.6237427613532459,
      "grad_norm": 1.03125,
      "learning_rate": 6.550698260350475e-05,
      "loss": 0.7817,
      "step": 4093
    },
    {
      "epoch": 0.6238951539164889,
      "grad_norm": 0.84375,
      "learning_rate": 6.546065863843778e-05,
      "loss": 0.9902,
      "step": 4094
    },
    {
      "epoch": 0.6240475464797318,
      "grad_norm": 0.98046875,
      "learning_rate": 6.541434308765108e-05,
      "loss": 0.9961,
      "step": 4095
    },
    {
      "epoch": 0.6241999390429747,
      "grad_norm": 0.90234375,
      "learning_rate": 6.536803596242775e-05,
      "loss": 0.9634,
      "step": 4096
    },
    {
      "epoch": 0.6243523316062176,
      "grad_norm": 1.1875,
      "learning_rate": 6.53217372740489e-05,
      "loss": 1.0167,
      "step": 4097
    },
    {
      "epoch": 0.6245047241694606,
      "grad_norm": 1.0546875,
      "learning_rate": 6.527544703379351e-05,
      "loss": 0.953,
      "step": 4098
    },
    {
      "epoch": 0.6246571167327034,
      "grad_norm": 1.0078125,
      "learning_rate": 6.522916525293857e-05,
      "loss": 1.0467,
      "step": 4099
    },
    {
      "epoch": 0.6248095092959464,
      "grad_norm": 1.0703125,
      "learning_rate": 6.518289194275899e-05,
      "loss": 0.9987,
      "step": 4100
    },
    {
      "epoch": 0.6249619018591893,
      "grad_norm": 1.078125,
      "learning_rate": 6.513662711452766e-05,
      "loss": 1.0714,
      "step": 4101
    },
    {
      "epoch": 0.6251142944224322,
      "grad_norm": 0.80078125,
      "learning_rate": 6.509037077951523e-05,
      "loss": 0.8982,
      "step": 4102
    },
    {
      "epoch": 0.6252666869856751,
      "grad_norm": 1.109375,
      "learning_rate": 6.504412294899053e-05,
      "loss": 0.992,
      "step": 4103
    },
    {
      "epoch": 0.6254190795489181,
      "grad_norm": 0.93359375,
      "learning_rate": 6.499788363422017e-05,
      "loss": 0.7757,
      "step": 4104
    },
    {
      "epoch": 0.6255714721121609,
      "grad_norm": 0.9765625,
      "learning_rate": 6.495165284646865e-05,
      "loss": 0.9254,
      "step": 4105
    },
    {
      "epoch": 0.6257238646754039,
      "grad_norm": 0.81640625,
      "learning_rate": 6.490543059699852e-05,
      "loss": 0.9247,
      "step": 4106
    },
    {
      "epoch": 0.6258762572386467,
      "grad_norm": 0.97265625,
      "learning_rate": 6.485921689707019e-05,
      "loss": 1.0428,
      "step": 4107
    },
    {
      "epoch": 0.6260286498018897,
      "grad_norm": 1.046875,
      "learning_rate": 6.481301175794193e-05,
      "loss": 0.977,
      "step": 4108
    },
    {
      "epoch": 0.6261810423651326,
      "grad_norm": 0.90234375,
      "learning_rate": 6.476681519087e-05,
      "loss": 0.8981,
      "step": 4109
    },
    {
      "epoch": 0.6263334349283755,
      "grad_norm": 1.140625,
      "learning_rate": 6.47206272071086e-05,
      "loss": 0.9788,
      "step": 4110
    },
    {
      "epoch": 0.6264858274916184,
      "grad_norm": 1.2421875,
      "learning_rate": 6.467444781790966e-05,
      "loss": 1.0624,
      "step": 4111
    },
    {
      "epoch": 0.6266382200548614,
      "grad_norm": 1.0546875,
      "learning_rate": 6.462827703452327e-05,
      "loss": 0.9606,
      "step": 4112
    },
    {
      "epoch": 0.6267906126181042,
      "grad_norm": 0.92578125,
      "learning_rate": 6.458211486819724e-05,
      "loss": 0.8436,
      "step": 4113
    },
    {
      "epoch": 0.6269430051813472,
      "grad_norm": 0.7578125,
      "learning_rate": 6.453596133017736e-05,
      "loss": 0.9154,
      "step": 4114
    },
    {
      "epoch": 0.6270953977445901,
      "grad_norm": 1.1328125,
      "learning_rate": 6.44898164317073e-05,
      "loss": 0.9739,
      "step": 4115
    },
    {
      "epoch": 0.627247790307833,
      "grad_norm": 1.15625,
      "learning_rate": 6.444368018402853e-05,
      "loss": 1.0182,
      "step": 4116
    },
    {
      "epoch": 0.6274001828710759,
      "grad_norm": 0.734375,
      "learning_rate": 6.439755259838063e-05,
      "loss": 0.9483,
      "step": 4117
    },
    {
      "epoch": 0.6275525754343189,
      "grad_norm": 0.87890625,
      "learning_rate": 6.435143368600091e-05,
      "loss": 0.9459,
      "step": 4118
    },
    {
      "epoch": 0.6277049679975617,
      "grad_norm": 0.7265625,
      "learning_rate": 6.430532345812452e-05,
      "loss": 0.8979,
      "step": 4119
    },
    {
      "epoch": 0.6278573605608047,
      "grad_norm": 0.8671875,
      "learning_rate": 6.425922192598468e-05,
      "loss": 0.9712,
      "step": 4120
    },
    {
      "epoch": 0.6280097531240475,
      "grad_norm": 1.15625,
      "learning_rate": 6.421312910081235e-05,
      "loss": 0.8896,
      "step": 4121
    },
    {
      "epoch": 0.6281621456872905,
      "grad_norm": 0.80078125,
      "learning_rate": 6.416704499383633e-05,
      "loss": 1.0579,
      "step": 4122
    },
    {
      "epoch": 0.6283145382505334,
      "grad_norm": 0.87890625,
      "learning_rate": 6.412096961628349e-05,
      "loss": 0.8151,
      "step": 4123
    },
    {
      "epoch": 0.6284669308137762,
      "grad_norm": 0.86328125,
      "learning_rate": 6.407490297937841e-05,
      "loss": 0.8946,
      "step": 4124
    },
    {
      "epoch": 0.6286193233770192,
      "grad_norm": 0.96875,
      "learning_rate": 6.402884509434352e-05,
      "loss": 0.8105,
      "step": 4125
    },
    {
      "epoch": 0.6287717159402622,
      "grad_norm": 0.90234375,
      "learning_rate": 6.398279597239929e-05,
      "loss": 0.9439,
      "step": 4126
    },
    {
      "epoch": 0.628924108503505,
      "grad_norm": 1.1875,
      "learning_rate": 6.393675562476391e-05,
      "loss": 0.9986,
      "step": 4127
    },
    {
      "epoch": 0.629076501066748,
      "grad_norm": 1.1875,
      "learning_rate": 6.38907240626534e-05,
      "loss": 0.9311,
      "step": 4128
    },
    {
      "epoch": 0.6292288936299909,
      "grad_norm": 0.9453125,
      "learning_rate": 6.384470129728182e-05,
      "loss": 1.0662,
      "step": 4129
    },
    {
      "epoch": 0.6293812861932337,
      "grad_norm": 0.78515625,
      "learning_rate": 6.379868733986089e-05,
      "loss": 0.8782,
      "step": 4130
    },
    {
      "epoch": 0.6295336787564767,
      "grad_norm": 0.921875,
      "learning_rate": 6.375268220160033e-05,
      "loss": 0.8111,
      "step": 4131
    },
    {
      "epoch": 0.6296860713197195,
      "grad_norm": 1.2734375,
      "learning_rate": 6.370668589370765e-05,
      "loss": 0.8524,
      "step": 4132
    },
    {
      "epoch": 0.6298384638829625,
      "grad_norm": 0.703125,
      "learning_rate": 6.366069842738813e-05,
      "loss": 0.913,
      "step": 4133
    },
    {
      "epoch": 0.6299908564462054,
      "grad_norm": 0.9296875,
      "learning_rate": 6.36147198138451e-05,
      "loss": 1.1668,
      "step": 4134
    },
    {
      "epoch": 0.6301432490094483,
      "grad_norm": 0.90234375,
      "learning_rate": 6.356875006427957e-05,
      "loss": 1.0,
      "step": 4135
    },
    {
      "epoch": 0.6302956415726912,
      "grad_norm": 0.83203125,
      "learning_rate": 6.352278918989034e-05,
      "loss": 0.8717,
      "step": 4136
    },
    {
      "epoch": 0.6304480341359342,
      "grad_norm": 1.2421875,
      "learning_rate": 6.347683720187426e-05,
      "loss": 1.0691,
      "step": 4137
    },
    {
      "epoch": 0.630600426699177,
      "grad_norm": 1.2734375,
      "learning_rate": 6.343089411142587e-05,
      "loss": 0.9258,
      "step": 4138
    },
    {
      "epoch": 0.63075281926242,
      "grad_norm": 0.87109375,
      "learning_rate": 6.338495992973749e-05,
      "loss": 1.1499,
      "step": 4139
    },
    {
      "epoch": 0.630905211825663,
      "grad_norm": 1.1171875,
      "learning_rate": 6.333903466799945e-05,
      "loss": 0.9343,
      "step": 4140
    },
    {
      "epoch": 0.6310576043889058,
      "grad_norm": 1.2421875,
      "learning_rate": 6.329311833739978e-05,
      "loss": 0.9067,
      "step": 4141
    },
    {
      "epoch": 0.6312099969521487,
      "grad_norm": 1.1171875,
      "learning_rate": 6.324721094912427e-05,
      "loss": 1.0066,
      "step": 4142
    },
    {
      "epoch": 0.6313623895153917,
      "grad_norm": 0.87109375,
      "learning_rate": 6.320131251435675e-05,
      "loss": 1.077,
      "step": 4143
    },
    {
      "epoch": 0.6315147820786345,
      "grad_norm": 0.94140625,
      "learning_rate": 6.315542304427866e-05,
      "loss": 0.926,
      "step": 4144
    },
    {
      "epoch": 0.6316671746418775,
      "grad_norm": 0.91015625,
      "learning_rate": 6.31095425500693e-05,
      "loss": 0.9065,
      "step": 4145
    },
    {
      "epoch": 0.6318195672051203,
      "grad_norm": 1.2265625,
      "learning_rate": 6.306367104290594e-05,
      "loss": 0.9525,
      "step": 4146
    },
    {
      "epoch": 0.6319719597683633,
      "grad_norm": 1.0390625,
      "learning_rate": 6.301780853396341e-05,
      "loss": 1.0481,
      "step": 4147
    },
    {
      "epoch": 0.6321243523316062,
      "grad_norm": 0.8828125,
      "learning_rate": 6.297195503441457e-05,
      "loss": 0.8695,
      "step": 4148
    },
    {
      "epoch": 0.6322767448948491,
      "grad_norm": 1.1953125,
      "learning_rate": 6.292611055542998e-05,
      "loss": 0.8544,
      "step": 4149
    },
    {
      "epoch": 0.632429137458092,
      "grad_norm": 0.85546875,
      "learning_rate": 6.28802751081779e-05,
      "loss": 0.8599,
      "step": 4150
    },
    {
      "epoch": 0.632581530021335,
      "grad_norm": 1.234375,
      "learning_rate": 6.28344487038247e-05,
      "loss": 0.9593,
      "step": 4151
    },
    {
      "epoch": 0.6327339225845778,
      "grad_norm": 1.1328125,
      "learning_rate": 6.278863135353421e-05,
      "loss": 1.0365,
      "step": 4152
    },
    {
      "epoch": 0.6328863151478208,
      "grad_norm": 1.0078125,
      "learning_rate": 6.274282306846819e-05,
      "loss": 0.9822,
      "step": 4153
    },
    {
      "epoch": 0.6330387077110637,
      "grad_norm": 0.9765625,
      "learning_rate": 6.269702385978627e-05,
      "loss": 1.0944,
      "step": 4154
    },
    {
      "epoch": 0.6331911002743066,
      "grad_norm": 0.8828125,
      "learning_rate": 6.26512337386458e-05,
      "loss": 0.9009,
      "step": 4155
    },
    {
      "epoch": 0.6333434928375495,
      "grad_norm": 0.94140625,
      "learning_rate": 6.260545271620181e-05,
      "loss": 1.0483,
      "step": 4156
    },
    {
      "epoch": 0.6334958854007925,
      "grad_norm": 1.171875,
      "learning_rate": 6.255968080360733e-05,
      "loss": 1.0348,
      "step": 4157
    },
    {
      "epoch": 0.6336482779640353,
      "grad_norm": 0.66796875,
      "learning_rate": 6.251391801201301e-05,
      "loss": 0.8636,
      "step": 4158
    },
    {
      "epoch": 0.6338006705272783,
      "grad_norm": 1.5078125,
      "learning_rate": 6.246816435256725e-05,
      "loss": 1.0245,
      "step": 4159
    },
    {
      "epoch": 0.6339530630905211,
      "grad_norm": 0.77734375,
      "learning_rate": 6.242241983641645e-05,
      "loss": 0.9503,
      "step": 4160
    },
    {
      "epoch": 0.6341054556537641,
      "grad_norm": 0.765625,
      "learning_rate": 6.237668447470451e-05,
      "loss": 0.9564,
      "step": 4161
    },
    {
      "epoch": 0.634257848217007,
      "grad_norm": 0.85546875,
      "learning_rate": 6.23309582785733e-05,
      "loss": 1.027,
      "step": 4162
    },
    {
      "epoch": 0.6344102407802499,
      "grad_norm": 0.921875,
      "learning_rate": 6.228524125916234e-05,
      "loss": 1.0883,
      "step": 4163
    },
    {
      "epoch": 0.6345626333434928,
      "grad_norm": 0.9140625,
      "learning_rate": 6.223953342760891e-05,
      "loss": 0.9465,
      "step": 4164
    },
    {
      "epoch": 0.6347150259067358,
      "grad_norm": 0.76953125,
      "learning_rate": 6.219383479504818e-05,
      "loss": 1.0289,
      "step": 4165
    },
    {
      "epoch": 0.6348674184699786,
      "grad_norm": 1.140625,
      "learning_rate": 6.214814537261297e-05,
      "loss": 0.9374,
      "step": 4166
    },
    {
      "epoch": 0.6350198110332216,
      "grad_norm": 1.2734375,
      "learning_rate": 6.210246517143384e-05,
      "loss": 1.0508,
      "step": 4167
    },
    {
      "epoch": 0.6351722035964645,
      "grad_norm": 0.90625,
      "learning_rate": 6.205679420263916e-05,
      "loss": 0.9357,
      "step": 4168
    },
    {
      "epoch": 0.6353245961597074,
      "grad_norm": 1.0390625,
      "learning_rate": 6.201113247735511e-05,
      "loss": 0.8576,
      "step": 4169
    },
    {
      "epoch": 0.6354769887229503,
      "grad_norm": 0.9375,
      "learning_rate": 6.196548000670538e-05,
      "loss": 0.9654,
      "step": 4170
    },
    {
      "epoch": 0.6356293812861933,
      "grad_norm": 0.953125,
      "learning_rate": 6.191983680181176e-05,
      "loss": 1.0692,
      "step": 4171
    },
    {
      "epoch": 0.6357817738494361,
      "grad_norm": 0.90234375,
      "learning_rate": 6.187420287379346e-05,
      "loss": 1.0567,
      "step": 4172
    },
    {
      "epoch": 0.6359341664126791,
      "grad_norm": 0.84765625,
      "learning_rate": 6.182857823376758e-05,
      "loss": 0.8926,
      "step": 4173
    },
    {
      "epoch": 0.6360865589759219,
      "grad_norm": 0.95703125,
      "learning_rate": 6.178296289284899e-05,
      "loss": 0.8675,
      "step": 4174
    },
    {
      "epoch": 0.6362389515391649,
      "grad_norm": 0.890625,
      "learning_rate": 6.17373568621502e-05,
      "loss": 1.0562,
      "step": 4175
    },
    {
      "epoch": 0.6363913441024078,
      "grad_norm": 0.85546875,
      "learning_rate": 6.169176015278146e-05,
      "loss": 0.7949,
      "step": 4176
    },
    {
      "epoch": 0.6365437366656507,
      "grad_norm": 1.140625,
      "learning_rate": 6.16461727758509e-05,
      "loss": 1.1476,
      "step": 4177
    },
    {
      "epoch": 0.6366961292288936,
      "grad_norm": 0.86328125,
      "learning_rate": 6.160059474246413e-05,
      "loss": 1.0888,
      "step": 4178
    },
    {
      "epoch": 0.6368485217921366,
      "grad_norm": 1.015625,
      "learning_rate": 6.155502606372471e-05,
      "loss": 0.9498,
      "step": 4179
    },
    {
      "epoch": 0.6370009143553794,
      "grad_norm": 0.69140625,
      "learning_rate": 6.150946675073382e-05,
      "loss": 0.8749,
      "step": 4180
    },
    {
      "epoch": 0.6371533069186224,
      "grad_norm": 0.9453125,
      "learning_rate": 6.146391681459025e-05,
      "loss": 0.8197,
      "step": 4181
    },
    {
      "epoch": 0.6373056994818653,
      "grad_norm": 1.078125,
      "learning_rate": 6.141837626639075e-05,
      "loss": 1.136,
      "step": 4182
    },
    {
      "epoch": 0.6374580920451082,
      "grad_norm": 0.96875,
      "learning_rate": 6.137284511722964e-05,
      "loss": 0.9917,
      "step": 4183
    },
    {
      "epoch": 0.6376104846083511,
      "grad_norm": 0.98828125,
      "learning_rate": 6.132732337819886e-05,
      "loss": 1.0051,
      "step": 4184
    },
    {
      "epoch": 0.6377628771715941,
      "grad_norm": 0.828125,
      "learning_rate": 6.128181106038828e-05,
      "loss": 0.8513,
      "step": 4185
    },
    {
      "epoch": 0.6379152697348369,
      "grad_norm": 0.94921875,
      "learning_rate": 6.123630817488529e-05,
      "loss": 0.8334,
      "step": 4186
    },
    {
      "epoch": 0.6380676622980799,
      "grad_norm": 0.9921875,
      "learning_rate": 6.119081473277501e-05,
      "loss": 1.1312,
      "step": 4187
    },
    {
      "epoch": 0.6382200548613227,
      "grad_norm": 1.1171875,
      "learning_rate": 6.114533074514043e-05,
      "loss": 1.1097,
      "step": 4188
    },
    {
      "epoch": 0.6383724474245657,
      "grad_norm": 0.91796875,
      "learning_rate": 6.1099856223062e-05,
      "loss": 0.9465,
      "step": 4189
    },
    {
      "epoch": 0.6385248399878086,
      "grad_norm": 1.6640625,
      "learning_rate": 6.105439117761793e-05,
      "loss": 1.0803,
      "step": 4190
    },
    {
      "epoch": 0.6386772325510515,
      "grad_norm": 0.921875,
      "learning_rate": 6.100893561988428e-05,
      "loss": 0.9136,
      "step": 4191
    },
    {
      "epoch": 0.6388296251142944,
      "grad_norm": 0.9375,
      "learning_rate": 6.09634895609346e-05,
      "loss": 0.9774,
      "step": 4192
    },
    {
      "epoch": 0.6389820176775374,
      "grad_norm": 1.2890625,
      "learning_rate": 6.091805301184017e-05,
      "loss": 0.8753,
      "step": 4193
    },
    {
      "epoch": 0.6391344102407802,
      "grad_norm": 0.91015625,
      "learning_rate": 6.087262598367011e-05,
      "loss": 1.0618,
      "step": 4194
    },
    {
      "epoch": 0.6392868028040232,
      "grad_norm": 0.71484375,
      "learning_rate": 6.082720848749095e-05,
      "loss": 0.8017,
      "step": 4195
    },
    {
      "epoch": 0.6394391953672661,
      "grad_norm": 1.0625,
      "learning_rate": 6.078180053436716e-05,
      "loss": 0.8793,
      "step": 4196
    },
    {
      "epoch": 0.639591587930509,
      "grad_norm": 1.1328125,
      "learning_rate": 6.073640213536076e-05,
      "loss": 1.2335,
      "step": 4197
    },
    {
      "epoch": 0.6397439804937519,
      "grad_norm": 0.96875,
      "learning_rate": 6.069101330153134e-05,
      "loss": 0.9537,
      "step": 4198
    },
    {
      "epoch": 0.6398963730569949,
      "grad_norm": 0.7890625,
      "learning_rate": 6.064563404393642e-05,
      "loss": 0.9275,
      "step": 4199
    },
    {
      "epoch": 0.6400487656202377,
      "grad_norm": 1.09375,
      "learning_rate": 6.0600264373630954e-05,
      "loss": 0.9147,
      "step": 4200
    },
    {
      "epoch": 0.6402011581834807,
      "grad_norm": 0.80859375,
      "learning_rate": 6.0554904301667636e-05,
      "loss": 0.8379,
      "step": 4201
    },
    {
      "epoch": 0.6403535507467235,
      "grad_norm": 0.81640625,
      "learning_rate": 6.050955383909691e-05,
      "loss": 0.9407,
      "step": 4202
    },
    {
      "epoch": 0.6405059433099665,
      "grad_norm": 0.72265625,
      "learning_rate": 6.046421299696674e-05,
      "loss": 0.9436,
      "step": 4203
    },
    {
      "epoch": 0.6406583358732094,
      "grad_norm": 1.1640625,
      "learning_rate": 6.0418881786322755e-05,
      "loss": 0.8343,
      "step": 4204
    },
    {
      "epoch": 0.6408107284364523,
      "grad_norm": 1.2578125,
      "learning_rate": 6.037356021820844e-05,
      "loss": 1.114,
      "step": 4205
    },
    {
      "epoch": 0.6409631209996952,
      "grad_norm": 1.046875,
      "learning_rate": 6.032824830366466e-05,
      "loss": 0.9214,
      "step": 4206
    },
    {
      "epoch": 0.6411155135629382,
      "grad_norm": 0.8515625,
      "learning_rate": 6.028294605373006e-05,
      "loss": 0.9813,
      "step": 4207
    },
    {
      "epoch": 0.641267906126181,
      "grad_norm": 1.0703125,
      "learning_rate": 6.023765347944099e-05,
      "loss": 1.1048,
      "step": 4208
    },
    {
      "epoch": 0.641420298689424,
      "grad_norm": 0.87890625,
      "learning_rate": 6.0192370591831307e-05,
      "loss": 0.9455,
      "step": 4209
    },
    {
      "epoch": 0.6415726912526669,
      "grad_norm": 1.2109375,
      "learning_rate": 6.014709740193254e-05,
      "loss": 1.1223,
      "step": 4210
    },
    {
      "epoch": 0.6417250838159098,
      "grad_norm": 0.90625,
      "learning_rate": 6.010183392077402e-05,
      "loss": 1.0183,
      "step": 4211
    },
    {
      "epoch": 0.6418774763791527,
      "grad_norm": 0.93359375,
      "learning_rate": 6.005658015938244e-05,
      "loss": 1.1076,
      "step": 4212
    },
    {
      "epoch": 0.6420298689423957,
      "grad_norm": 0.86328125,
      "learning_rate": 6.001133612878238e-05,
      "loss": 0.9109,
      "step": 4213
    },
    {
      "epoch": 0.6421822615056385,
      "grad_norm": 0.765625,
      "learning_rate": 5.996610183999587e-05,
      "loss": 0.8968,
      "step": 4214
    },
    {
      "epoch": 0.6423346540688815,
      "grad_norm": 0.90234375,
      "learning_rate": 5.992087730404261e-05,
      "loss": 0.9629,
      "step": 4215
    },
    {
      "epoch": 0.6424870466321243,
      "grad_norm": 0.8359375,
      "learning_rate": 5.9875662531940055e-05,
      "loss": 0.8748,
      "step": 4216
    },
    {
      "epoch": 0.6426394391953673,
      "grad_norm": 0.86328125,
      "learning_rate": 5.983045753470308e-05,
      "loss": 0.8613,
      "step": 4217
    },
    {
      "epoch": 0.6427918317586102,
      "grad_norm": 1.34375,
      "learning_rate": 5.978526232334425e-05,
      "loss": 0.9034,
      "step": 4218
    },
    {
      "epoch": 0.642944224321853,
      "grad_norm": 0.78125,
      "learning_rate": 5.974007690887389e-05,
      "loss": 0.7898,
      "step": 4219
    },
    {
      "epoch": 0.643096616885096,
      "grad_norm": 0.79296875,
      "learning_rate": 5.969490130229971e-05,
      "loss": 0.7723,
      "step": 4220
    },
    {
      "epoch": 0.643249009448339,
      "grad_norm": 0.85546875,
      "learning_rate": 5.9649735514627134e-05,
      "loss": 0.9354,
      "step": 4221
    },
    {
      "epoch": 0.6434014020115818,
      "grad_norm": 0.84765625,
      "learning_rate": 5.9604579556859305e-05,
      "loss": 0.9026,
      "step": 4222
    },
    {
      "epoch": 0.6435537945748248,
      "grad_norm": 0.953125,
      "learning_rate": 5.955943343999677e-05,
      "loss": 0.9937,
      "step": 4223
    },
    {
      "epoch": 0.6437061871380677,
      "grad_norm": 0.84375,
      "learning_rate": 5.9514297175037745e-05,
      "loss": 0.9252,
      "step": 4224
    },
    {
      "epoch": 0.6438585797013106,
      "grad_norm": 0.85546875,
      "learning_rate": 5.9469170772978186e-05,
      "loss": 0.9708,
      "step": 4225
    },
    {
      "epoch": 0.6440109722645535,
      "grad_norm": 0.9765625,
      "learning_rate": 5.9424054244811414e-05,
      "loss": 1.0273,
      "step": 4226
    },
    {
      "epoch": 0.6441633648277965,
      "grad_norm": 1.1328125,
      "learning_rate": 5.9378947601528576e-05,
      "loss": 0.9478,
      "step": 4227
    },
    {
      "epoch": 0.6443157573910393,
      "grad_norm": 1.0234375,
      "learning_rate": 5.933385085411824e-05,
      "loss": 1.0232,
      "step": 4228
    },
    {
      "epoch": 0.6444681499542823,
      "grad_norm": 0.7578125,
      "learning_rate": 5.928876401356657e-05,
      "loss": 0.9784,
      "step": 4229
    },
    {
      "epoch": 0.6446205425175251,
      "grad_norm": 0.87890625,
      "learning_rate": 5.924368709085748e-05,
      "loss": 0.8702,
      "step": 4230
    },
    {
      "epoch": 0.644772935080768,
      "grad_norm": 0.90625,
      "learning_rate": 5.919862009697229e-05,
      "loss": 0.8149,
      "step": 4231
    },
    {
      "epoch": 0.644925327644011,
      "grad_norm": 0.87109375,
      "learning_rate": 5.9153563042889934e-05,
      "loss": 1.0229,
      "step": 4232
    },
    {
      "epoch": 0.6450777202072538,
      "grad_norm": 1.0,
      "learning_rate": 5.910851593958707e-05,
      "loss": 0.9962,
      "step": 4233
    },
    {
      "epoch": 0.6452301127704968,
      "grad_norm": 0.99609375,
      "learning_rate": 5.906347879803773e-05,
      "loss": 1.1458,
      "step": 4234
    },
    {
      "epoch": 0.6453825053337398,
      "grad_norm": 1.015625,
      "learning_rate": 5.90184516292136e-05,
      "loss": 0.903,
      "step": 4235
    },
    {
      "epoch": 0.6455348978969826,
      "grad_norm": 0.84375,
      "learning_rate": 5.897343444408403e-05,
      "loss": 0.9154,
      "step": 4236
    },
    {
      "epoch": 0.6456872904602255,
      "grad_norm": 0.77734375,
      "learning_rate": 5.892842725361578e-05,
      "loss": 0.9438,
      "step": 4237
    },
    {
      "epoch": 0.6458396830234685,
      "grad_norm": 0.7421875,
      "learning_rate": 5.8883430068773236e-05,
      "loss": 0.8549,
      "step": 4238
    },
    {
      "epoch": 0.6459920755867113,
      "grad_norm": 0.83984375,
      "learning_rate": 5.8838442900518454e-05,
      "loss": 0.8328,
      "step": 4239
    },
    {
      "epoch": 0.6461444681499543,
      "grad_norm": 1.0234375,
      "learning_rate": 5.8793465759810864e-05,
      "loss": 1.1672,
      "step": 4240
    },
    {
      "epoch": 0.6462968607131971,
      "grad_norm": 0.98828125,
      "learning_rate": 5.8748498657607564e-05,
      "loss": 0.939,
      "step": 4241
    },
    {
      "epoch": 0.6464492532764401,
      "grad_norm": 1.0703125,
      "learning_rate": 5.870354160486322e-05,
      "loss": 0.977,
      "step": 4242
    },
    {
      "epoch": 0.646601645839683,
      "grad_norm": 1.0078125,
      "learning_rate": 5.865859461252996e-05,
      "loss": 1.0238,
      "step": 4243
    },
    {
      "epoch": 0.6467540384029259,
      "grad_norm": 1.0703125,
      "learning_rate": 5.86136576915576e-05,
      "loss": 0.8067,
      "step": 4244
    },
    {
      "epoch": 0.6469064309661688,
      "grad_norm": 0.94140625,
      "learning_rate": 5.856873085289336e-05,
      "loss": 0.9646,
      "step": 4245
    },
    {
      "epoch": 0.6470588235294118,
      "grad_norm": 0.70703125,
      "learning_rate": 5.8523814107482046e-05,
      "loss": 0.9037,
      "step": 4246
    },
    {
      "epoch": 0.6472112160926546,
      "grad_norm": 0.83984375,
      "learning_rate": 5.84789074662661e-05,
      "loss": 0.8376,
      "step": 4247
    },
    {
      "epoch": 0.6473636086558976,
      "grad_norm": 1.2265625,
      "learning_rate": 5.843401094018536e-05,
      "loss": 1.0754,
      "step": 4248
    },
    {
      "epoch": 0.6475160012191405,
      "grad_norm": 1.21875,
      "learning_rate": 5.838912454017724e-05,
      "loss": 0.9874,
      "step": 4249
    },
    {
      "epoch": 0.6476683937823834,
      "grad_norm": 1.015625,
      "learning_rate": 5.8344248277176835e-05,
      "loss": 1.0481,
      "step": 4250
    },
    {
      "epoch": 0.6478207863456263,
      "grad_norm": 0.94921875,
      "learning_rate": 5.829938216211653e-05,
      "loss": 1.0363,
      "step": 4251
    },
    {
      "epoch": 0.6479731789088693,
      "grad_norm": 0.91015625,
      "learning_rate": 5.8254526205926375e-05,
      "loss": 1.1204,
      "step": 4252
    },
    {
      "epoch": 0.6481255714721121,
      "grad_norm": 1.0390625,
      "learning_rate": 5.8209680419533973e-05,
      "loss": 0.9738,
      "step": 4253
    },
    {
      "epoch": 0.6482779640353551,
      "grad_norm": 0.83984375,
      "learning_rate": 5.816484481386438e-05,
      "loss": 0.8441,
      "step": 4254
    },
    {
      "epoch": 0.6484303565985979,
      "grad_norm": 1.0546875,
      "learning_rate": 5.81200193998402e-05,
      "loss": 0.8009,
      "step": 4255
    },
    {
      "epoch": 0.6485827491618409,
      "grad_norm": 0.9375,
      "learning_rate": 5.8075204188381524e-05,
      "loss": 0.9067,
      "step": 4256
    },
    {
      "epoch": 0.6487351417250838,
      "grad_norm": 0.98828125,
      "learning_rate": 5.8030399190406e-05,
      "loss": 0.9038,
      "step": 4257
    },
    {
      "epoch": 0.6488875342883267,
      "grad_norm": 1.1328125,
      "learning_rate": 5.798560441682874e-05,
      "loss": 1.0214,
      "step": 4258
    },
    {
      "epoch": 0.6490399268515696,
      "grad_norm": 1.2265625,
      "learning_rate": 5.794081987856246e-05,
      "loss": 0.9428,
      "step": 4259
    },
    {
      "epoch": 0.6491923194148126,
      "grad_norm": 1.1015625,
      "learning_rate": 5.7896045586517264e-05,
      "loss": 1.1085,
      "step": 4260
    },
    {
      "epoch": 0.6493447119780554,
      "grad_norm": 0.77734375,
      "learning_rate": 5.7851281551600846e-05,
      "loss": 1.0319,
      "step": 4261
    },
    {
      "epoch": 0.6494971045412984,
      "grad_norm": 1.0546875,
      "learning_rate": 5.7806527784718336e-05,
      "loss": 1.0999,
      "step": 4262
    },
    {
      "epoch": 0.6496494971045413,
      "grad_norm": 0.859375,
      "learning_rate": 5.7761784296772395e-05,
      "loss": 1.0411,
      "step": 4263
    },
    {
      "epoch": 0.6498018896677842,
      "grad_norm": 0.984375,
      "learning_rate": 5.7717051098663214e-05,
      "loss": 1.0052,
      "step": 4264
    },
    {
      "epoch": 0.6499542822310271,
      "grad_norm": 1.078125,
      "learning_rate": 5.7672328201288425e-05,
      "loss": 1.0767,
      "step": 4265
    },
    {
      "epoch": 0.6501066747942701,
      "grad_norm": 0.96484375,
      "learning_rate": 5.762761561554317e-05,
      "loss": 1.1239,
      "step": 4266
    },
    {
      "epoch": 0.6502590673575129,
      "grad_norm": 1.0390625,
      "learning_rate": 5.758291335232009e-05,
      "loss": 1.168,
      "step": 4267
    },
    {
      "epoch": 0.6504114599207559,
      "grad_norm": 0.88671875,
      "learning_rate": 5.753822142250928e-05,
      "loss": 0.8794,
      "step": 4268
    },
    {
      "epoch": 0.6505638524839987,
      "grad_norm": 0.68359375,
      "learning_rate": 5.74935398369983e-05,
      "loss": 0.9115,
      "step": 4269
    },
    {
      "epoch": 0.6507162450472417,
      "grad_norm": 1.0078125,
      "learning_rate": 5.744886860667231e-05,
      "loss": 0.8916,
      "step": 4270
    },
    {
      "epoch": 0.6508686376104846,
      "grad_norm": 1.109375,
      "learning_rate": 5.7404207742413815e-05,
      "loss": 1.0132,
      "step": 4271
    },
    {
      "epoch": 0.6510210301737275,
      "grad_norm": 0.96875,
      "learning_rate": 5.735955725510287e-05,
      "loss": 0.7195,
      "step": 4272
    },
    {
      "epoch": 0.6511734227369704,
      "grad_norm": 0.80859375,
      "learning_rate": 5.731491715561694e-05,
      "loss": 0.9501,
      "step": 4273
    },
    {
      "epoch": 0.6513258153002134,
      "grad_norm": 0.9453125,
      "learning_rate": 5.727028745483103e-05,
      "loss": 1.0459,
      "step": 4274
    },
    {
      "epoch": 0.6514782078634562,
      "grad_norm": 0.89453125,
      "learning_rate": 5.722566816361752e-05,
      "loss": 0.9695,
      "step": 4275
    },
    {
      "epoch": 0.6516306004266992,
      "grad_norm": 1.5234375,
      "learning_rate": 5.718105929284638e-05,
      "loss": 0.9802,
      "step": 4276
    },
    {
      "epoch": 0.6517829929899421,
      "grad_norm": 1.0234375,
      "learning_rate": 5.713646085338496e-05,
      "loss": 1.0023,
      "step": 4277
    },
    {
      "epoch": 0.651935385553185,
      "grad_norm": 1.390625,
      "learning_rate": 5.709187285609805e-05,
      "loss": 1.1041,
      "step": 4278
    },
    {
      "epoch": 0.6520877781164279,
      "grad_norm": 0.9140625,
      "learning_rate": 5.704729531184794e-05,
      "loss": 1.0665,
      "step": 4279
    },
    {
      "epoch": 0.6522401706796709,
      "grad_norm": 0.859375,
      "learning_rate": 5.700272823149433e-05,
      "loss": 0.8006,
      "step": 4280
    },
    {
      "epoch": 0.6523925632429137,
      "grad_norm": 1.1953125,
      "learning_rate": 5.695817162589447e-05,
      "loss": 1.1684,
      "step": 4281
    },
    {
      "epoch": 0.6525449558061567,
      "grad_norm": 0.7265625,
      "learning_rate": 5.691362550590297e-05,
      "loss": 1.0639,
      "step": 4282
    },
    {
      "epoch": 0.6526973483693995,
      "grad_norm": 1.25,
      "learning_rate": 5.686908988237187e-05,
      "loss": 0.8393,
      "step": 4283
    },
    {
      "epoch": 0.6528497409326425,
      "grad_norm": 1.0078125,
      "learning_rate": 5.6824564766150726e-05,
      "loss": 1.0792,
      "step": 4284
    },
    {
      "epoch": 0.6530021334958854,
      "grad_norm": 0.90234375,
      "learning_rate": 5.678005016808648e-05,
      "loss": 0.9282,
      "step": 4285
    },
    {
      "epoch": 0.6531545260591283,
      "grad_norm": 0.90234375,
      "learning_rate": 5.673554609902346e-05,
      "loss": 0.9869,
      "step": 4286
    },
    {
      "epoch": 0.6533069186223712,
      "grad_norm": 1.0703125,
      "learning_rate": 5.669105256980363e-05,
      "loss": 0.931,
      "step": 4287
    },
    {
      "epoch": 0.6534593111856142,
      "grad_norm": 0.86328125,
      "learning_rate": 5.6646569591266174e-05,
      "loss": 0.9028,
      "step": 4288
    },
    {
      "epoch": 0.653611703748857,
      "grad_norm": 0.92578125,
      "learning_rate": 5.6602097174247806e-05,
      "loss": 1.0369,
      "step": 4289
    },
    {
      "epoch": 0.6537640963121,
      "grad_norm": 1.0390625,
      "learning_rate": 5.6557635329582645e-05,
      "loss": 1.1231,
      "step": 4290
    },
    {
      "epoch": 0.6539164888753429,
      "grad_norm": 0.77734375,
      "learning_rate": 5.6513184068102224e-05,
      "loss": 0.9945,
      "step": 4291
    },
    {
      "epoch": 0.6540688814385858,
      "grad_norm": 0.8828125,
      "learning_rate": 5.646874340063547e-05,
      "loss": 0.9989,
      "step": 4292
    },
    {
      "epoch": 0.6542212740018287,
      "grad_norm": 0.98828125,
      "learning_rate": 5.642431333800886e-05,
      "loss": 1.2282,
      "step": 4293
    },
    {
      "epoch": 0.6543736665650717,
      "grad_norm": 1.046875,
      "learning_rate": 5.6379893891046154e-05,
      "loss": 1.0487,
      "step": 4294
    },
    {
      "epoch": 0.6545260591283145,
      "grad_norm": 0.90234375,
      "learning_rate": 5.633548507056856e-05,
      "loss": 1.1273,
      "step": 4295
    },
    {
      "epoch": 0.6546784516915575,
      "grad_norm": 0.89453125,
      "learning_rate": 5.6291086887394706e-05,
      "loss": 0.8272,
      "step": 4296
    },
    {
      "epoch": 0.6548308442548003,
      "grad_norm": 0.92578125,
      "learning_rate": 5.62466993523406e-05,
      "loss": 1.0205,
      "step": 4297
    },
    {
      "epoch": 0.6549832368180433,
      "grad_norm": 0.8984375,
      "learning_rate": 5.620232247621975e-05,
      "loss": 0.9679,
      "step": 4298
    },
    {
      "epoch": 0.6551356293812862,
      "grad_norm": 0.984375,
      "learning_rate": 5.615795626984297e-05,
      "loss": 1.0693,
      "step": 4299
    },
    {
      "epoch": 0.6552880219445291,
      "grad_norm": 0.9921875,
      "learning_rate": 5.61136007440185e-05,
      "loss": 0.9511,
      "step": 4300
    },
    {
      "epoch": 0.655440414507772,
      "grad_norm": 0.71484375,
      "learning_rate": 5.606925590955199e-05,
      "loss": 0.852,
      "step": 4301
    },
    {
      "epoch": 0.655592807071015,
      "grad_norm": 0.90234375,
      "learning_rate": 5.6024921777246476e-05,
      "loss": 0.9692,
      "step": 4302
    },
    {
      "epoch": 0.6557451996342578,
      "grad_norm": 1.1796875,
      "learning_rate": 5.5980598357902346e-05,
      "loss": 0.8901,
      "step": 4303
    },
    {
      "epoch": 0.6558975921975008,
      "grad_norm": 0.9375,
      "learning_rate": 5.5936285662317526e-05,
      "loss": 1.0272,
      "step": 4304
    },
    {
      "epoch": 0.6560499847607437,
      "grad_norm": 0.8984375,
      "learning_rate": 5.589198370128718e-05,
      "loss": 1.0801,
      "step": 4305
    },
    {
      "epoch": 0.6562023773239866,
      "grad_norm": 0.8828125,
      "learning_rate": 5.584769248560382e-05,
      "loss": 1.0309,
      "step": 4306
    },
    {
      "epoch": 0.6563547698872295,
      "grad_norm": 0.9375,
      "learning_rate": 5.580341202605752e-05,
      "loss": 1.0505,
      "step": 4307
    },
    {
      "epoch": 0.6565071624504725,
      "grad_norm": 0.9375,
      "learning_rate": 5.5759142333435585e-05,
      "loss": 0.8539,
      "step": 4308
    },
    {
      "epoch": 0.6566595550137153,
      "grad_norm": 1.0,
      "learning_rate": 5.571488341852281e-05,
      "loss": 0.8944,
      "step": 4309
    },
    {
      "epoch": 0.6568119475769583,
      "grad_norm": 0.9609375,
      "learning_rate": 5.567063529210126e-05,
      "loss": 1.0012,
      "step": 4310
    },
    {
      "epoch": 0.6569643401402011,
      "grad_norm": 1.0546875,
      "learning_rate": 5.562639796495043e-05,
      "loss": 1.1009,
      "step": 4311
    },
    {
      "epoch": 0.6571167327034441,
      "grad_norm": 0.7109375,
      "learning_rate": 5.558217144784714e-05,
      "loss": 0.8205,
      "step": 4312
    },
    {
      "epoch": 0.657269125266687,
      "grad_norm": 0.8828125,
      "learning_rate": 5.553795575156565e-05,
      "loss": 0.8605,
      "step": 4313
    },
    {
      "epoch": 0.6574215178299299,
      "grad_norm": 0.859375,
      "learning_rate": 5.549375088687746e-05,
      "loss": 1.0265,
      "step": 4314
    },
    {
      "epoch": 0.6575739103931728,
      "grad_norm": 1.1875,
      "learning_rate": 5.544955686455161e-05,
      "loss": 1.1376,
      "step": 4315
    },
    {
      "epoch": 0.6577263029564158,
      "grad_norm": 1.0546875,
      "learning_rate": 5.5405373695354353e-05,
      "loss": 1.0277,
      "step": 4316
    },
    {
      "epoch": 0.6578786955196586,
      "grad_norm": 0.64453125,
      "learning_rate": 5.5361201390049345e-05,
      "loss": 0.7974,
      "step": 4317
    },
    {
      "epoch": 0.6580310880829016,
      "grad_norm": 1.0078125,
      "learning_rate": 5.5317039959397606e-05,
      "loss": 0.9395,
      "step": 4318
    },
    {
      "epoch": 0.6581834806461445,
      "grad_norm": 0.98046875,
      "learning_rate": 5.527288941415747e-05,
      "loss": 0.8477,
      "step": 4319
    },
    {
      "epoch": 0.6583358732093874,
      "grad_norm": 0.796875,
      "learning_rate": 5.522874976508463e-05,
      "loss": 1.1312,
      "step": 4320
    },
    {
      "epoch": 0.6584882657726303,
      "grad_norm": 0.79296875,
      "learning_rate": 5.51846210229322e-05,
      "loss": 1.0501,
      "step": 4321
    },
    {
      "epoch": 0.6586406583358733,
      "grad_norm": 0.90234375,
      "learning_rate": 5.51405031984506e-05,
      "loss": 0.9168,
      "step": 4322
    },
    {
      "epoch": 0.6587930508991161,
      "grad_norm": 0.859375,
      "learning_rate": 5.5096396302387434e-05,
      "loss": 0.8754,
      "step": 4323
    },
    {
      "epoch": 0.6589454434623591,
      "grad_norm": 0.7421875,
      "learning_rate": 5.5052300345487875e-05,
      "loss": 1.0222,
      "step": 4324
    },
    {
      "epoch": 0.6590978360256019,
      "grad_norm": 1.015625,
      "learning_rate": 5.5008215338494275e-05,
      "loss": 1.015,
      "step": 4325
    },
    {
      "epoch": 0.6592502285888449,
      "grad_norm": 0.81640625,
      "learning_rate": 5.4964141292146464e-05,
      "loss": 0.9313,
      "step": 4326
    },
    {
      "epoch": 0.6594026211520878,
      "grad_norm": 0.984375,
      "learning_rate": 5.492007821718146e-05,
      "loss": 1.0051,
      "step": 4327
    },
    {
      "epoch": 0.6595550137153307,
      "grad_norm": 1.2109375,
      "learning_rate": 5.4876026124333654e-05,
      "loss": 1.1009,
      "step": 4328
    },
    {
      "epoch": 0.6597074062785736,
      "grad_norm": 1.0234375,
      "learning_rate": 5.483198502433479e-05,
      "loss": 0.8472,
      "step": 4329
    },
    {
      "epoch": 0.6598597988418166,
      "grad_norm": 0.7890625,
      "learning_rate": 5.4787954927913886e-05,
      "loss": 0.9097,
      "step": 4330
    },
    {
      "epoch": 0.6600121914050594,
      "grad_norm": 0.8046875,
      "learning_rate": 5.47439358457973e-05,
      "loss": 0.9587,
      "step": 4331
    },
    {
      "epoch": 0.6601645839683024,
      "grad_norm": 1.2265625,
      "learning_rate": 5.469992778870876e-05,
      "loss": 1.003,
      "step": 4332
    },
    {
      "epoch": 0.6603169765315453,
      "grad_norm": 0.765625,
      "learning_rate": 5.465593076736929e-05,
      "loss": 0.9696,
      "step": 4333
    },
    {
      "epoch": 0.6604693690947882,
      "grad_norm": 0.796875,
      "learning_rate": 5.461194479249706e-05,
      "loss": 0.9347,
      "step": 4334
    },
    {
      "epoch": 0.6606217616580311,
      "grad_norm": 0.87109375,
      "learning_rate": 5.4567969874807813e-05,
      "loss": 0.9861,
      "step": 4335
    },
    {
      "epoch": 0.6607741542212741,
      "grad_norm": 0.97265625,
      "learning_rate": 5.4524006025014426e-05,
      "loss": 1.082,
      "step": 4336
    },
    {
      "epoch": 0.6609265467845169,
      "grad_norm": 0.91015625,
      "learning_rate": 5.448005325382709e-05,
      "loss": 1.0244,
      "step": 4337
    },
    {
      "epoch": 0.6610789393477599,
      "grad_norm": 0.8125,
      "learning_rate": 5.443611157195341e-05,
      "loss": 0.8713,
      "step": 4338
    },
    {
      "epoch": 0.6612313319110027,
      "grad_norm": 0.8671875,
      "learning_rate": 5.439218099009822e-05,
      "loss": 0.9312,
      "step": 4339
    },
    {
      "epoch": 0.6613837244742456,
      "grad_norm": 0.95703125,
      "learning_rate": 5.4348261518963496e-05,
      "loss": 0.9813,
      "step": 4340
    },
    {
      "epoch": 0.6615361170374886,
      "grad_norm": 0.9765625,
      "learning_rate": 5.4304353169248804e-05,
      "loss": 0.9789,
      "step": 4341
    },
    {
      "epoch": 0.6616885096007314,
      "grad_norm": 0.83984375,
      "learning_rate": 5.426045595165075e-05,
      "loss": 0.9445,
      "step": 4342
    },
    {
      "epoch": 0.6618409021639744,
      "grad_norm": 0.8203125,
      "learning_rate": 5.421656987686341e-05,
      "loss": 0.9718,
      "step": 4343
    },
    {
      "epoch": 0.6619932947272174,
      "grad_norm": 0.93359375,
      "learning_rate": 5.417269495557802e-05,
      "loss": 0.8309,
      "step": 4344
    },
    {
      "epoch": 0.6621456872904602,
      "grad_norm": 0.94140625,
      "learning_rate": 5.412883119848315e-05,
      "loss": 0.8985,
      "step": 4345
    },
    {
      "epoch": 0.6622980798537031,
      "grad_norm": 0.859375,
      "learning_rate": 5.408497861626465e-05,
      "loss": 0.8903,
      "step": 4346
    },
    {
      "epoch": 0.6624504724169461,
      "grad_norm": 0.9296875,
      "learning_rate": 5.404113721960562e-05,
      "loss": 1.0117,
      "step": 4347
    },
    {
      "epoch": 0.6626028649801889,
      "grad_norm": 1.140625,
      "learning_rate": 5.3997307019186426e-05,
      "loss": 0.9901,
      "step": 4348
    },
    {
      "epoch": 0.6627552575434319,
      "grad_norm": 1.1953125,
      "learning_rate": 5.3953488025684815e-05,
      "loss": 1.1064,
      "step": 4349
    },
    {
      "epoch": 0.6629076501066747,
      "grad_norm": 0.8515625,
      "learning_rate": 5.390968024977572e-05,
      "loss": 0.9518,
      "step": 4350
    },
    {
      "epoch": 0.6630600426699177,
      "grad_norm": 0.94921875,
      "learning_rate": 5.386588370213124e-05,
      "loss": 0.9827,
      "step": 4351
    },
    {
      "epoch": 0.6632124352331606,
      "grad_norm": 0.7109375,
      "learning_rate": 5.382209839342093e-05,
      "loss": 0.8854,
      "step": 4352
    },
    {
      "epoch": 0.6633648277964035,
      "grad_norm": 0.7734375,
      "learning_rate": 5.3778324334311516e-05,
      "loss": 0.9739,
      "step": 4353
    },
    {
      "epoch": 0.6635172203596464,
      "grad_norm": 0.875,
      "learning_rate": 5.373456153546692e-05,
      "loss": 1.0709,
      "step": 4354
    },
    {
      "epoch": 0.6636696129228894,
      "grad_norm": 0.8203125,
      "learning_rate": 5.3690810007548485e-05,
      "loss": 1.1013,
      "step": 4355
    },
    {
      "epoch": 0.6638220054861322,
      "grad_norm": 1.109375,
      "learning_rate": 5.364706976121472e-05,
      "loss": 1.1039,
      "step": 4356
    },
    {
      "epoch": 0.6639743980493752,
      "grad_norm": 0.91796875,
      "learning_rate": 5.360334080712124e-05,
      "loss": 1.0319,
      "step": 4357
    },
    {
      "epoch": 0.6641267906126181,
      "grad_norm": 0.8671875,
      "learning_rate": 5.355962315592118e-05,
      "loss": 0.703,
      "step": 4358
    },
    {
      "epoch": 0.664279183175861,
      "grad_norm": 1.28125,
      "learning_rate": 5.351591681826471e-05,
      "loss": 0.9887,
      "step": 4359
    },
    {
      "epoch": 0.6644315757391039,
      "grad_norm": 0.7109375,
      "learning_rate": 5.347222180479938e-05,
      "loss": 0.9156,
      "step": 4360
    },
    {
      "epoch": 0.6645839683023469,
      "grad_norm": 1.046875,
      "learning_rate": 5.342853812616996e-05,
      "loss": 0.8573,
      "step": 4361
    },
    {
      "epoch": 0.6647363608655897,
      "grad_norm": 0.8984375,
      "learning_rate": 5.338486579301827e-05,
      "loss": 1.1177,
      "step": 4362
    },
    {
      "epoch": 0.6648887534288327,
      "grad_norm": 0.72265625,
      "learning_rate": 5.334120481598366e-05,
      "loss": 0.8453,
      "step": 4363
    },
    {
      "epoch": 0.6650411459920755,
      "grad_norm": 0.74609375,
      "learning_rate": 5.3297555205702507e-05,
      "loss": 0.9417,
      "step": 4364
    },
    {
      "epoch": 0.6651935385553185,
      "grad_norm": 0.94921875,
      "learning_rate": 5.325391697280847e-05,
      "loss": 0.9208,
      "step": 4365
    },
    {
      "epoch": 0.6653459311185614,
      "grad_norm": 0.86328125,
      "learning_rate": 5.3210290127932516e-05,
      "loss": 0.8745,
      "step": 4366
    },
    {
      "epoch": 0.6654983236818043,
      "grad_norm": 0.859375,
      "learning_rate": 5.316667468170278e-05,
      "loss": 0.8955,
      "step": 4367
    },
    {
      "epoch": 0.6656507162450472,
      "grad_norm": 0.83203125,
      "learning_rate": 5.3123070644744475e-05,
      "loss": 0.899,
      "step": 4368
    },
    {
      "epoch": 0.6658031088082902,
      "grad_norm": 0.91796875,
      "learning_rate": 5.307947802768032e-05,
      "loss": 0.9364,
      "step": 4369
    },
    {
      "epoch": 0.665955501371533,
      "grad_norm": 1.140625,
      "learning_rate": 5.303589684113003e-05,
      "loss": 0.9869,
      "step": 4370
    },
    {
      "epoch": 0.666107893934776,
      "grad_norm": 1.109375,
      "learning_rate": 5.29923270957106e-05,
      "loss": 1.1329,
      "step": 4371
    },
    {
      "epoch": 0.6662602864980189,
      "grad_norm": 1.1015625,
      "learning_rate": 5.29487688020363e-05,
      "loss": 1.1235,
      "step": 4372
    },
    {
      "epoch": 0.6664126790612618,
      "grad_norm": 0.6484375,
      "learning_rate": 5.2905221970718535e-05,
      "loss": 0.9461,
      "step": 4373
    },
    {
      "epoch": 0.6665650716245047,
      "grad_norm": 1.0625,
      "learning_rate": 5.286168661236592e-05,
      "loss": 1.1184,
      "step": 4374
    },
    {
      "epoch": 0.6667174641877477,
      "grad_norm": 1.1953125,
      "learning_rate": 5.281816273758433e-05,
      "loss": 1.201,
      "step": 4375
    },
    {
      "epoch": 0.6668698567509905,
      "grad_norm": 1.0703125,
      "learning_rate": 5.277465035697673e-05,
      "loss": 0.8836,
      "step": 4376
    },
    {
      "epoch": 0.6670222493142335,
      "grad_norm": 0.94140625,
      "learning_rate": 5.273114948114346e-05,
      "loss": 0.9149,
      "step": 4377
    },
    {
      "epoch": 0.6671746418774763,
      "grad_norm": 0.89453125,
      "learning_rate": 5.268766012068196e-05,
      "loss": 0.938,
      "step": 4378
    },
    {
      "epoch": 0.6673270344407193,
      "grad_norm": 0.94140625,
      "learning_rate": 5.264418228618673e-05,
      "loss": 1.1398,
      "step": 4379
    },
    {
      "epoch": 0.6674794270039622,
      "grad_norm": 0.85546875,
      "learning_rate": 5.260071598824974e-05,
      "loss": 1.0615,
      "step": 4380
    },
    {
      "epoch": 0.6676318195672051,
      "grad_norm": 1.1171875,
      "learning_rate": 5.255726123745992e-05,
      "loss": 0.9427,
      "step": 4381
    },
    {
      "epoch": 0.667784212130448,
      "grad_norm": 1.0703125,
      "learning_rate": 5.251381804440349e-05,
      "loss": 0.9301,
      "step": 4382
    },
    {
      "epoch": 0.667936604693691,
      "grad_norm": 0.73046875,
      "learning_rate": 5.247038641966386e-05,
      "loss": 1.0708,
      "step": 4383
    },
    {
      "epoch": 0.6680889972569338,
      "grad_norm": 0.95703125,
      "learning_rate": 5.242696637382165e-05,
      "loss": 0.9749,
      "step": 4384
    },
    {
      "epoch": 0.6682413898201768,
      "grad_norm": 1.0234375,
      "learning_rate": 5.238355791745446e-05,
      "loss": 0.9129,
      "step": 4385
    },
    {
      "epoch": 0.6683937823834197,
      "grad_norm": 1.0703125,
      "learning_rate": 5.2340161061137324e-05,
      "loss": 1.009,
      "step": 4386
    },
    {
      "epoch": 0.6685461749466626,
      "grad_norm": 0.8671875,
      "learning_rate": 5.2296775815442334e-05,
      "loss": 1.1514,
      "step": 4387
    },
    {
      "epoch": 0.6686985675099055,
      "grad_norm": 0.96484375,
      "learning_rate": 5.225340219093869e-05,
      "loss": 0.8692,
      "step": 4388
    },
    {
      "epoch": 0.6688509600731485,
      "grad_norm": 1.1484375,
      "learning_rate": 5.2210040198192956e-05,
      "loss": 1.067,
      "step": 4389
    },
    {
      "epoch": 0.6690033526363913,
      "grad_norm": 1.1328125,
      "learning_rate": 5.21666898477686e-05,
      "loss": 0.7778,
      "step": 4390
    },
    {
      "epoch": 0.6691557451996343,
      "grad_norm": 0.66796875,
      "learning_rate": 5.212335115022647e-05,
      "loss": 0.9345,
      "step": 4391
    },
    {
      "epoch": 0.6693081377628771,
      "grad_norm": 1.0390625,
      "learning_rate": 5.208002411612449e-05,
      "loss": 1.0448,
      "step": 4392
    },
    {
      "epoch": 0.6694605303261201,
      "grad_norm": 0.97265625,
      "learning_rate": 5.2036708756017696e-05,
      "loss": 1.2466,
      "step": 4393
    },
    {
      "epoch": 0.669612922889363,
      "grad_norm": 0.84375,
      "learning_rate": 5.199340508045842e-05,
      "loss": 0.7403,
      "step": 4394
    },
    {
      "epoch": 0.6697653154526059,
      "grad_norm": 0.9609375,
      "learning_rate": 5.1950113099996046e-05,
      "loss": 1.0688,
      "step": 4395
    },
    {
      "epoch": 0.6699177080158488,
      "grad_norm": 1.0546875,
      "learning_rate": 5.190683282517701e-05,
      "loss": 1.1098,
      "step": 4396
    },
    {
      "epoch": 0.6700701005790918,
      "grad_norm": 0.98828125,
      "learning_rate": 5.1863564266545125e-05,
      "loss": 0.9971,
      "step": 4397
    },
    {
      "epoch": 0.6702224931423346,
      "grad_norm": 0.796875,
      "learning_rate": 5.1820307434641216e-05,
      "loss": 0.9244,
      "step": 4398
    },
    {
      "epoch": 0.6703748857055776,
      "grad_norm": 0.77734375,
      "learning_rate": 5.177706234000319e-05,
      "loss": 0.899,
      "step": 4399
    },
    {
      "epoch": 0.6705272782688205,
      "grad_norm": 1.0625,
      "learning_rate": 5.17338289931663e-05,
      "loss": 1.1203,
      "step": 4400
    },
    {
      "epoch": 0.6706796708320634,
      "grad_norm": 0.78125,
      "learning_rate": 5.169060740466277e-05,
      "loss": 0.9709,
      "step": 4401
    },
    {
      "epoch": 0.6708320633953063,
      "grad_norm": 0.8359375,
      "learning_rate": 5.164739758502191e-05,
      "loss": 0.8882,
      "step": 4402
    },
    {
      "epoch": 0.6709844559585493,
      "grad_norm": 0.921875,
      "learning_rate": 5.160419954477036e-05,
      "loss": 0.9836,
      "step": 4403
    },
    {
      "epoch": 0.6711368485217921,
      "grad_norm": 1.015625,
      "learning_rate": 5.156101329443173e-05,
      "loss": 1.0681,
      "step": 4404
    },
    {
      "epoch": 0.6712892410850351,
      "grad_norm": 1.0625,
      "learning_rate": 5.15178388445268e-05,
      "loss": 1.0499,
      "step": 4405
    },
    {
      "epoch": 0.6714416336482779,
      "grad_norm": 1.03125,
      "learning_rate": 5.147467620557359e-05,
      "loss": 0.8915,
      "step": 4406
    },
    {
      "epoch": 0.6715940262115209,
      "grad_norm": 0.84375,
      "learning_rate": 5.1431525388086975e-05,
      "loss": 0.9441,
      "step": 4407
    },
    {
      "epoch": 0.6717464187747638,
      "grad_norm": 1.1171875,
      "learning_rate": 5.1388386402579255e-05,
      "loss": 1.1322,
      "step": 4408
    },
    {
      "epoch": 0.6718988113380067,
      "grad_norm": 1.2109375,
      "learning_rate": 5.1345259259559666e-05,
      "loss": 1.0276,
      "step": 4409
    },
    {
      "epoch": 0.6720512039012496,
      "grad_norm": 1.0078125,
      "learning_rate": 5.130214396953452e-05,
      "loss": 0.8943,
      "step": 4410
    },
    {
      "epoch": 0.6722035964644926,
      "grad_norm": 1.03125,
      "learning_rate": 5.125904054300745e-05,
      "loss": 1.0037,
      "step": 4411
    },
    {
      "epoch": 0.6723559890277354,
      "grad_norm": 0.703125,
      "learning_rate": 5.1215948990479044e-05,
      "loss": 0.9339,
      "step": 4412
    },
    {
      "epoch": 0.6725083815909784,
      "grad_norm": 1.1171875,
      "learning_rate": 5.117286932244692e-05,
      "loss": 0.8885,
      "step": 4413
    },
    {
      "epoch": 0.6726607741542213,
      "grad_norm": 0.68359375,
      "learning_rate": 5.112980154940601e-05,
      "loss": 0.8857,
      "step": 4414
    },
    {
      "epoch": 0.6728131667174642,
      "grad_norm": 0.8125,
      "learning_rate": 5.108674568184822e-05,
      "loss": 1.0512,
      "step": 4415
    },
    {
      "epoch": 0.6729655592807071,
      "grad_norm": 1.1875,
      "learning_rate": 5.1043701730262506e-05,
      "loss": 1.0887,
      "step": 4416
    },
    {
      "epoch": 0.6731179518439501,
      "grad_norm": 0.9765625,
      "learning_rate": 5.100066970513514e-05,
      "loss": 1.0959,
      "step": 4417
    },
    {
      "epoch": 0.6732703444071929,
      "grad_norm": 0.78515625,
      "learning_rate": 5.095764961694922e-05,
      "loss": 0.8675,
      "step": 4418
    },
    {
      "epoch": 0.6734227369704359,
      "grad_norm": 0.9921875,
      "learning_rate": 5.091464147618505e-05,
      "loss": 1.0217,
      "step": 4419
    },
    {
      "epoch": 0.6735751295336787,
      "grad_norm": 1.15625,
      "learning_rate": 5.0871645293320136e-05,
      "loss": 0.9237,
      "step": 4420
    },
    {
      "epoch": 0.6737275220969217,
      "grad_norm": 0.8125,
      "learning_rate": 5.082866107882889e-05,
      "loss": 0.8877,
      "step": 4421
    },
    {
      "epoch": 0.6738799146601646,
      "grad_norm": 1.1796875,
      "learning_rate": 5.078568884318287e-05,
      "loss": 1.0432,
      "step": 4422
    },
    {
      "epoch": 0.6740323072234075,
      "grad_norm": 1.421875,
      "learning_rate": 5.0742728596850844e-05,
      "loss": 0.9272,
      "step": 4423
    },
    {
      "epoch": 0.6741846997866504,
      "grad_norm": 1.015625,
      "learning_rate": 5.0699780350298374e-05,
      "loss": 0.8997,
      "step": 4424
    },
    {
      "epoch": 0.6743370923498934,
      "grad_norm": 0.92578125,
      "learning_rate": 5.065684411398841e-05,
      "loss": 0.9741,
      "step": 4425
    },
    {
      "epoch": 0.6744894849131362,
      "grad_norm": 1.0859375,
      "learning_rate": 5.061391989838078e-05,
      "loss": 1.2165,
      "step": 4426
    },
    {
      "epoch": 0.6746418774763792,
      "grad_norm": 1.0078125,
      "learning_rate": 5.057100771393242e-05,
      "loss": 1.0387,
      "step": 4427
    },
    {
      "epoch": 0.6747942700396221,
      "grad_norm": 0.95703125,
      "learning_rate": 5.05281075710974e-05,
      "loss": 1.0537,
      "step": 4428
    },
    {
      "epoch": 0.674946662602865,
      "grad_norm": 0.8515625,
      "learning_rate": 5.048521948032685e-05,
      "loss": 0.8983,
      "step": 4429
    },
    {
      "epoch": 0.6750990551661079,
      "grad_norm": 0.9609375,
      "learning_rate": 5.0442343452068796e-05,
      "loss": 1.0447,
      "step": 4430
    },
    {
      "epoch": 0.6752514477293509,
      "grad_norm": 1.0234375,
      "learning_rate": 5.0399479496768554e-05,
      "loss": 1.1976,
      "step": 4431
    },
    {
      "epoch": 0.6754038402925937,
      "grad_norm": 0.80859375,
      "learning_rate": 5.035662762486838e-05,
      "loss": 0.8116,
      "step": 4432
    },
    {
      "epoch": 0.6755562328558367,
      "grad_norm": 0.7890625,
      "learning_rate": 5.031378784680756e-05,
      "loss": 0.8615,
      "step": 4433
    },
    {
      "epoch": 0.6757086254190795,
      "grad_norm": 0.89453125,
      "learning_rate": 5.0270960173022604e-05,
      "loss": 0.7718,
      "step": 4434
    },
    {
      "epoch": 0.6758610179823225,
      "grad_norm": 0.97265625,
      "learning_rate": 5.022814461394683e-05,
      "loss": 0.9413,
      "step": 4435
    },
    {
      "epoch": 0.6760134105455654,
      "grad_norm": 0.91015625,
      "learning_rate": 5.0185341180010725e-05,
      "loss": 0.807,
      "step": 4436
    },
    {
      "epoch": 0.6761658031088082,
      "grad_norm": 1.21875,
      "learning_rate": 5.0142549881641884e-05,
      "loss": 1.0375,
      "step": 4437
    },
    {
      "epoch": 0.6763181956720512,
      "grad_norm": 1.0,
      "learning_rate": 5.0099770729264816e-05,
      "loss": 0.9423,
      "step": 4438
    },
    {
      "epoch": 0.6764705882352942,
      "grad_norm": 1.03125,
      "learning_rate": 5.0057003733301224e-05,
      "loss": 0.9681,
      "step": 4439
    },
    {
      "epoch": 0.676622980798537,
      "grad_norm": 0.9296875,
      "learning_rate": 5.001424890416975e-05,
      "loss": 1.1047,
      "step": 4440
    },
    {
      "epoch": 0.67677537336178,
      "grad_norm": 0.703125,
      "learning_rate": 4.997150625228597e-05,
      "loss": 0.7202,
      "step": 4441
    },
    {
      "epoch": 0.6769277659250229,
      "grad_norm": 1.078125,
      "learning_rate": 4.992877578806273e-05,
      "loss": 1.0377,
      "step": 4442
    },
    {
      "epoch": 0.6770801584882657,
      "grad_norm": 0.83984375,
      "learning_rate": 4.9886057521909744e-05,
      "loss": 0.9107,
      "step": 4443
    },
    {
      "epoch": 0.6772325510515087,
      "grad_norm": 0.96484375,
      "learning_rate": 4.9843351464233765e-05,
      "loss": 0.9936,
      "step": 4444
    },
    {
      "epoch": 0.6773849436147517,
      "grad_norm": 0.8515625,
      "learning_rate": 4.9800657625438674e-05,
      "loss": 0.8718,
      "step": 4445
    },
    {
      "epoch": 0.6775373361779945,
      "grad_norm": 0.9375,
      "learning_rate": 4.97579760159253e-05,
      "loss": 0.8216,
      "step": 4446
    },
    {
      "epoch": 0.6776897287412375,
      "grad_norm": 0.97265625,
      "learning_rate": 4.971530664609139e-05,
      "loss": 0.904,
      "step": 4447
    },
    {
      "epoch": 0.6778421213044803,
      "grad_norm": 1.0859375,
      "learning_rate": 4.967264952633192e-05,
      "loss": 1.1031,
      "step": 4448
    },
    {
      "epoch": 0.6779945138677232,
      "grad_norm": 1.375,
      "learning_rate": 4.9630004667038746e-05,
      "loss": 1.0274,
      "step": 4449
    },
    {
      "epoch": 0.6781469064309662,
      "grad_norm": 0.89453125,
      "learning_rate": 4.9587372078600736e-05,
      "loss": 0.8154,
      "step": 4450
    },
    {
      "epoch": 0.678299298994209,
      "grad_norm": 0.98046875,
      "learning_rate": 4.9544751771403896e-05,
      "loss": 0.8713,
      "step": 4451
    },
    {
      "epoch": 0.678451691557452,
      "grad_norm": 0.875,
      "learning_rate": 4.9502143755831066e-05,
      "loss": 1.0721,
      "step": 4452
    },
    {
      "epoch": 0.678604084120695,
      "grad_norm": 1.109375,
      "learning_rate": 4.945954804226214e-05,
      "loss": 1.0914,
      "step": 4453
    },
    {
      "epoch": 0.6787564766839378,
      "grad_norm": 1.4296875,
      "learning_rate": 4.9416964641074134e-05,
      "loss": 0.972,
      "step": 4454
    },
    {
      "epoch": 0.6789088692471807,
      "grad_norm": 0.94140625,
      "learning_rate": 4.93743935626409e-05,
      "loss": 1.0527,
      "step": 4455
    },
    {
      "epoch": 0.6790612618104237,
      "grad_norm": 1.015625,
      "learning_rate": 4.9331834817333445e-05,
      "loss": 1.0393,
      "step": 4456
    },
    {
      "epoch": 0.6792136543736665,
      "grad_norm": 0.9140625,
      "learning_rate": 4.9289288415519686e-05,
      "loss": 1.0478,
      "step": 4457
    },
    {
      "epoch": 0.6793660469369095,
      "grad_norm": 0.91796875,
      "learning_rate": 4.924675436756443e-05,
      "loss": 1.0457,
      "step": 4458
    },
    {
      "epoch": 0.6795184395001523,
      "grad_norm": 1.125,
      "learning_rate": 4.920423268382969e-05,
      "loss": 1.1506,
      "step": 4459
    },
    {
      "epoch": 0.6796708320633953,
      "grad_norm": 0.9375,
      "learning_rate": 4.9161723374674315e-05,
      "loss": 0.8436,
      "step": 4460
    },
    {
      "epoch": 0.6798232246266382,
      "grad_norm": 1.171875,
      "learning_rate": 4.911922645045417e-05,
      "loss": 1.0368,
      "step": 4461
    },
    {
      "epoch": 0.6799756171898811,
      "grad_norm": 1.1640625,
      "learning_rate": 4.907674192152222e-05,
      "loss": 0.9042,
      "step": 4462
    },
    {
      "epoch": 0.680128009753124,
      "grad_norm": 0.73046875,
      "learning_rate": 4.903426979822817e-05,
      "loss": 0.9615,
      "step": 4463
    },
    {
      "epoch": 0.680280402316367,
      "grad_norm": 0.8671875,
      "learning_rate": 4.899181009091888e-05,
      "loss": 1.1958,
      "step": 4464
    },
    {
      "epoch": 0.6804327948796098,
      "grad_norm": 0.734375,
      "learning_rate": 4.894936280993818e-05,
      "loss": 0.6819,
      "step": 4465
    },
    {
      "epoch": 0.6805851874428528,
      "grad_norm": 0.83984375,
      "learning_rate": 4.8906927965626835e-05,
      "loss": 0.9177,
      "step": 4466
    },
    {
      "epoch": 0.6807375800060957,
      "grad_norm": 0.8046875,
      "learning_rate": 4.8864505568322536e-05,
      "loss": 1.1727,
      "step": 4467
    },
    {
      "epoch": 0.6808899725693386,
      "grad_norm": 0.94140625,
      "learning_rate": 4.882209562836009e-05,
      "loss": 0.838,
      "step": 4468
    },
    {
      "epoch": 0.6810423651325815,
      "grad_norm": 1.15625,
      "learning_rate": 4.877969815607106e-05,
      "loss": 1.0041,
      "step": 4469
    },
    {
      "epoch": 0.6811947576958245,
      "grad_norm": 1.1640625,
      "learning_rate": 4.873731316178409e-05,
      "loss": 1.1463,
      "step": 4470
    },
    {
      "epoch": 0.6813471502590673,
      "grad_norm": 0.94921875,
      "learning_rate": 4.869494065582484e-05,
      "loss": 1.0538,
      "step": 4471
    },
    {
      "epoch": 0.6814995428223103,
      "grad_norm": 0.89453125,
      "learning_rate": 4.865258064851579e-05,
      "loss": 0.8811,
      "step": 4472
    },
    {
      "epoch": 0.6816519353855531,
      "grad_norm": 0.90234375,
      "learning_rate": 4.861023315017651e-05,
      "loss": 1.0536,
      "step": 4473
    },
    {
      "epoch": 0.6818043279487961,
      "grad_norm": 0.77734375,
      "learning_rate": 4.8567898171123485e-05,
      "loss": 0.8977,
      "step": 4474
    },
    {
      "epoch": 0.681956720512039,
      "grad_norm": 1.0078125,
      "learning_rate": 4.852557572166999e-05,
      "loss": 1.1163,
      "step": 4475
    },
    {
      "epoch": 0.6821091130752819,
      "grad_norm": 0.890625,
      "learning_rate": 4.84832658121265e-05,
      "loss": 0.8041,
      "step": 4476
    },
    {
      "epoch": 0.6822615056385248,
      "grad_norm": 0.8671875,
      "learning_rate": 4.8440968452800285e-05,
      "loss": 0.8342,
      "step": 4477
    },
    {
      "epoch": 0.6824138982017678,
      "grad_norm": 0.87890625,
      "learning_rate": 4.839868365399555e-05,
      "loss": 0.8554,
      "step": 4478
    },
    {
      "epoch": 0.6825662907650106,
      "grad_norm": 1.015625,
      "learning_rate": 4.8356411426013595e-05,
      "loss": 1.0438,
      "step": 4479
    },
    {
      "epoch": 0.6827186833282536,
      "grad_norm": 0.984375,
      "learning_rate": 4.831415177915243e-05,
      "loss": 1.0765,
      "step": 4480
    },
    {
      "epoch": 0.6828710758914965,
      "grad_norm": 0.75390625,
      "learning_rate": 4.82719047237071e-05,
      "loss": 0.8306,
      "step": 4481
    },
    {
      "epoch": 0.6830234684547394,
      "grad_norm": 0.95703125,
      "learning_rate": 4.822967026996968e-05,
      "loss": 0.8191,
      "step": 4482
    },
    {
      "epoch": 0.6831758610179823,
      "grad_norm": 1.390625,
      "learning_rate": 4.8187448428229064e-05,
      "loss": 0.8921,
      "step": 4483
    },
    {
      "epoch": 0.6833282535812253,
      "grad_norm": 0.74609375,
      "learning_rate": 4.8145239208771044e-05,
      "loss": 0.8572,
      "step": 4484
    },
    {
      "epoch": 0.6834806461444681,
      "grad_norm": 1.046875,
      "learning_rate": 4.810304262187852e-05,
      "loss": 0.9444,
      "step": 4485
    },
    {
      "epoch": 0.6836330387077111,
      "grad_norm": 0.9609375,
      "learning_rate": 4.806085867783105e-05,
      "loss": 0.941,
      "step": 4486
    },
    {
      "epoch": 0.6837854312709539,
      "grad_norm": 1.3046875,
      "learning_rate": 4.801868738690528e-05,
      "loss": 0.9869,
      "step": 4487
    },
    {
      "epoch": 0.6839378238341969,
      "grad_norm": 1.0390625,
      "learning_rate": 4.797652875937481e-05,
      "loss": 1.0272,
      "step": 4488
    },
    {
      "epoch": 0.6840902163974398,
      "grad_norm": 0.87109375,
      "learning_rate": 4.793438280551e-05,
      "loss": 0.887,
      "step": 4489
    },
    {
      "epoch": 0.6842426089606827,
      "grad_norm": 0.9375,
      "learning_rate": 4.789224953557834e-05,
      "loss": 0.9822,
      "step": 4490
    },
    {
      "epoch": 0.6843950015239256,
      "grad_norm": 1.109375,
      "learning_rate": 4.785012895984397e-05,
      "loss": 1.0233,
      "step": 4491
    },
    {
      "epoch": 0.6845473940871686,
      "grad_norm": 1.09375,
      "learning_rate": 4.780802108856807e-05,
      "loss": 0.9422,
      "step": 4492
    },
    {
      "epoch": 0.6846997866504114,
      "grad_norm": 0.93359375,
      "learning_rate": 4.7765925932008815e-05,
      "loss": 0.9647,
      "step": 4493
    },
    {
      "epoch": 0.6848521792136544,
      "grad_norm": 0.8125,
      "learning_rate": 4.772384350042114e-05,
      "loss": 0.6815,
      "step": 4494
    },
    {
      "epoch": 0.6850045717768973,
      "grad_norm": 0.9375,
      "learning_rate": 4.7681773804056895e-05,
      "loss": 0.9895,
      "step": 4495
    },
    {
      "epoch": 0.6851569643401402,
      "grad_norm": 0.90625,
      "learning_rate": 4.763971685316497e-05,
      "loss": 0.9361,
      "step": 4496
    },
    {
      "epoch": 0.6853093569033831,
      "grad_norm": 0.734375,
      "learning_rate": 4.7597672657990945e-05,
      "loss": 0.9425,
      "step": 4497
    },
    {
      "epoch": 0.6854617494666261,
      "grad_norm": 0.8828125,
      "learning_rate": 4.755564122877738e-05,
      "loss": 0.9708,
      "step": 4498
    },
    {
      "epoch": 0.6856141420298689,
      "grad_norm": 0.92578125,
      "learning_rate": 4.7513622575763814e-05,
      "loss": 0.9858,
      "step": 4499
    },
    {
      "epoch": 0.6857665345931119,
      "grad_norm": 1.1640625,
      "learning_rate": 4.747161670918656e-05,
      "loss": 1.1682,
      "step": 4500
    },
    {
      "epoch": 0.6859189271563547,
      "grad_norm": 0.8984375,
      "learning_rate": 4.74296236392788e-05,
      "loss": 0.9714,
      "step": 4501
    },
    {
      "epoch": 0.6860713197195977,
      "grad_norm": 0.80859375,
      "learning_rate": 4.7387643376270776e-05,
      "loss": 0.9482,
      "step": 4502
    },
    {
      "epoch": 0.6862237122828406,
      "grad_norm": 0.96484375,
      "learning_rate": 4.734567593038933e-05,
      "loss": 0.7831,
      "step": 4503
    },
    {
      "epoch": 0.6863761048460835,
      "grad_norm": 1.1328125,
      "learning_rate": 4.730372131185845e-05,
      "loss": 0.9605,
      "step": 4504
    },
    {
      "epoch": 0.6865284974093264,
      "grad_norm": 1.59375,
      "learning_rate": 4.726177953089884e-05,
      "loss": 1.2765,
      "step": 4505
    },
    {
      "epoch": 0.6866808899725694,
      "grad_norm": 0.91796875,
      "learning_rate": 4.72198505977281e-05,
      "loss": 0.9413,
      "step": 4506
    },
    {
      "epoch": 0.6868332825358122,
      "grad_norm": 0.88671875,
      "learning_rate": 4.717793452256081e-05,
      "loss": 0.9182,
      "step": 4507
    },
    {
      "epoch": 0.6869856750990552,
      "grad_norm": 0.9140625,
      "learning_rate": 4.713603131560823e-05,
      "loss": 0.9572,
      "step": 4508
    },
    {
      "epoch": 0.6871380676622981,
      "grad_norm": 0.79296875,
      "learning_rate": 4.7094140987078586e-05,
      "loss": 0.9313,
      "step": 4509
    },
    {
      "epoch": 0.687290460225541,
      "grad_norm": 0.91796875,
      "learning_rate": 4.705226354717703e-05,
      "loss": 0.9037,
      "step": 4510
    },
    {
      "epoch": 0.6874428527887839,
      "grad_norm": 0.84765625,
      "learning_rate": 4.701039900610548e-05,
      "loss": 0.9849,
      "step": 4511
    },
    {
      "epoch": 0.6875952453520269,
      "grad_norm": 0.8984375,
      "learning_rate": 4.69685473740627e-05,
      "loss": 0.8827,
      "step": 4512
    },
    {
      "epoch": 0.6877476379152697,
      "grad_norm": 1.1171875,
      "learning_rate": 4.692670866124446e-05,
      "loss": 1.0359,
      "step": 4513
    },
    {
      "epoch": 0.6879000304785127,
      "grad_norm": 1.359375,
      "learning_rate": 4.688488287784315e-05,
      "loss": 1.0965,
      "step": 4514
    },
    {
      "epoch": 0.6880524230417555,
      "grad_norm": 0.83984375,
      "learning_rate": 4.684307003404815e-05,
      "loss": 1.0609,
      "step": 4515
    },
    {
      "epoch": 0.6882048156049985,
      "grad_norm": 0.9609375,
      "learning_rate": 4.680127014004574e-05,
      "loss": 1.0089,
      "step": 4516
    },
    {
      "epoch": 0.6883572081682414,
      "grad_norm": 0.953125,
      "learning_rate": 4.675948320601894e-05,
      "loss": 1.0169,
      "step": 4517
    },
    {
      "epoch": 0.6885096007314843,
      "grad_norm": 1.1875,
      "learning_rate": 4.671770924214763e-05,
      "loss": 0.975,
      "step": 4518
    },
    {
      "epoch": 0.6886619932947272,
      "grad_norm": 0.8671875,
      "learning_rate": 4.6675948258608574e-05,
      "loss": 1.0009,
      "step": 4519
    },
    {
      "epoch": 0.6888143858579702,
      "grad_norm": 1.1328125,
      "learning_rate": 4.663420026557529e-05,
      "loss": 1.0721,
      "step": 4520
    },
    {
      "epoch": 0.688966778421213,
      "grad_norm": 0.80078125,
      "learning_rate": 4.659246527321828e-05,
      "loss": 0.9623,
      "step": 4521
    },
    {
      "epoch": 0.689119170984456,
      "grad_norm": 0.79296875,
      "learning_rate": 4.6550743291704736e-05,
      "loss": 0.7994,
      "step": 4522
    },
    {
      "epoch": 0.6892715635476989,
      "grad_norm": 1.0390625,
      "learning_rate": 4.650903433119871e-05,
      "loss": 1.149,
      "step": 4523
    },
    {
      "epoch": 0.6894239561109418,
      "grad_norm": 1.34375,
      "learning_rate": 4.646733840186121e-05,
      "loss": 1.2088,
      "step": 4524
    },
    {
      "epoch": 0.6895763486741847,
      "grad_norm": 0.9375,
      "learning_rate": 4.642565551384984e-05,
      "loss": 1.0603,
      "step": 4525
    },
    {
      "epoch": 0.6897287412374277,
      "grad_norm": 0.81640625,
      "learning_rate": 4.638398567731918e-05,
      "loss": 0.8777,
      "step": 4526
    },
    {
      "epoch": 0.6898811338006705,
      "grad_norm": 0.89453125,
      "learning_rate": 4.634232890242064e-05,
      "loss": 0.7865,
      "step": 4527
    },
    {
      "epoch": 0.6900335263639135,
      "grad_norm": 1.1171875,
      "learning_rate": 4.630068519930241e-05,
      "loss": 0.8973,
      "step": 4528
    },
    {
      "epoch": 0.6901859189271563,
      "grad_norm": 1.3203125,
      "learning_rate": 4.6259054578109426e-05,
      "loss": 0.8521,
      "step": 4529
    },
    {
      "epoch": 0.6903383114903993,
      "grad_norm": 0.75390625,
      "learning_rate": 4.621743704898363e-05,
      "loss": 0.9209,
      "step": 4530
    },
    {
      "epoch": 0.6904907040536422,
      "grad_norm": 0.77734375,
      "learning_rate": 4.6175832622063544e-05,
      "loss": 0.932,
      "step": 4531
    },
    {
      "epoch": 0.690643096616885,
      "grad_norm": 0.796875,
      "learning_rate": 4.6134241307484596e-05,
      "loss": 0.9812,
      "step": 4532
    },
    {
      "epoch": 0.690795489180128,
      "grad_norm": 0.96875,
      "learning_rate": 4.609266311537911e-05,
      "loss": 0.8626,
      "step": 4533
    },
    {
      "epoch": 0.690947881743371,
      "grad_norm": 1.2265625,
      "learning_rate": 4.6051098055876085e-05,
      "loss": 1.0406,
      "step": 4534
    },
    {
      "epoch": 0.6911002743066138,
      "grad_norm": 1.0,
      "learning_rate": 4.6009546139101377e-05,
      "loss": 0.9877,
      "step": 4535
    },
    {
      "epoch": 0.6912526668698568,
      "grad_norm": 1.171875,
      "learning_rate": 4.596800737517762e-05,
      "loss": 0.9377,
      "step": 4536
    },
    {
      "epoch": 0.6914050594330997,
      "grad_norm": 0.81640625,
      "learning_rate": 4.592648177422423e-05,
      "loss": 0.9942,
      "step": 4537
    },
    {
      "epoch": 0.6915574519963426,
      "grad_norm": 0.8046875,
      "learning_rate": 4.588496934635751e-05,
      "loss": 0.9541,
      "step": 4538
    },
    {
      "epoch": 0.6917098445595855,
      "grad_norm": 1.1328125,
      "learning_rate": 4.5843470101690446e-05,
      "loss": 1.0894,
      "step": 4539
    },
    {
      "epoch": 0.6918622371228285,
      "grad_norm": 0.83984375,
      "learning_rate": 4.58019840503328e-05,
      "loss": 0.879,
      "step": 4540
    },
    {
      "epoch": 0.6920146296860713,
      "grad_norm": 0.890625,
      "learning_rate": 4.576051120239131e-05,
      "loss": 0.9601,
      "step": 4541
    },
    {
      "epoch": 0.6921670222493143,
      "grad_norm": 1.453125,
      "learning_rate": 4.571905156796923e-05,
      "loss": 0.9817,
      "step": 4542
    },
    {
      "epoch": 0.6923194148125571,
      "grad_norm": 0.84765625,
      "learning_rate": 4.567760515716671e-05,
      "loss": 0.9264,
      "step": 4543
    },
    {
      "epoch": 0.6924718073758,
      "grad_norm": 1.09375,
      "learning_rate": 4.56361719800808e-05,
      "loss": 0.8931,
      "step": 4544
    },
    {
      "epoch": 0.692624199939043,
      "grad_norm": 0.92578125,
      "learning_rate": 4.559475204680516e-05,
      "loss": 0.9564,
      "step": 4545
    },
    {
      "epoch": 0.6927765925022858,
      "grad_norm": 0.8046875,
      "learning_rate": 4.555334536743027e-05,
      "loss": 0.8447,
      "step": 4546
    },
    {
      "epoch": 0.6929289850655288,
      "grad_norm": 0.84375,
      "learning_rate": 4.551195195204341e-05,
      "loss": 0.8382,
      "step": 4547
    },
    {
      "epoch": 0.6930813776287718,
      "grad_norm": 0.9140625,
      "learning_rate": 4.547057181072861e-05,
      "loss": 1.0053,
      "step": 4548
    },
    {
      "epoch": 0.6932337701920146,
      "grad_norm": 0.796875,
      "learning_rate": 4.542920495356663e-05,
      "loss": 0.8523,
      "step": 4549
    },
    {
      "epoch": 0.6933861627552576,
      "grad_norm": 1.1953125,
      "learning_rate": 4.5387851390635094e-05,
      "loss": 1.0198,
      "step": 4550
    },
    {
      "epoch": 0.6935385553185005,
      "grad_norm": 0.88671875,
      "learning_rate": 4.534651113200831e-05,
      "loss": 0.9937,
      "step": 4551
    },
    {
      "epoch": 0.6936909478817433,
      "grad_norm": 1.0859375,
      "learning_rate": 4.530518418775733e-05,
      "loss": 1.1003,
      "step": 4552
    },
    {
      "epoch": 0.6938433404449863,
      "grad_norm": 1.140625,
      "learning_rate": 4.5263870567950014e-05,
      "loss": 1.0308,
      "step": 4553
    },
    {
      "epoch": 0.6939957330082291,
      "grad_norm": 0.953125,
      "learning_rate": 4.522257028265091e-05,
      "loss": 1.0303,
      "step": 4554
    },
    {
      "epoch": 0.6941481255714721,
      "grad_norm": 0.953125,
      "learning_rate": 4.5181283341921445e-05,
      "loss": 1.0816,
      "step": 4555
    },
    {
      "epoch": 0.694300518134715,
      "grad_norm": 0.83984375,
      "learning_rate": 4.514000975581966e-05,
      "loss": 0.8444,
      "step": 4556
    },
    {
      "epoch": 0.6944529106979579,
      "grad_norm": 0.70703125,
      "learning_rate": 4.5098749534400354e-05,
      "loss": 0.8108,
      "step": 4557
    },
    {
      "epoch": 0.6946053032612008,
      "grad_norm": 1.296875,
      "learning_rate": 4.505750268771524e-05,
      "loss": 1.1919,
      "step": 4558
    },
    {
      "epoch": 0.6947576958244438,
      "grad_norm": 1.5,
      "learning_rate": 4.501626922581251e-05,
      "loss": 1.1244,
      "step": 4559
    },
    {
      "epoch": 0.6949100883876866,
      "grad_norm": 0.90234375,
      "learning_rate": 4.497504915873725e-05,
      "loss": 0.906,
      "step": 4560
    },
    {
      "epoch": 0.6950624809509296,
      "grad_norm": 0.875,
      "learning_rate": 4.493384249653132e-05,
      "loss": 0.9376,
      "step": 4561
    },
    {
      "epoch": 0.6952148735141725,
      "grad_norm": 1.1875,
      "learning_rate": 4.489264924923323e-05,
      "loss": 0.9244,
      "step": 4562
    },
    {
      "epoch": 0.6953672660774154,
      "grad_norm": 0.71875,
      "learning_rate": 4.4851469426878237e-05,
      "loss": 0.8578,
      "step": 4563
    },
    {
      "epoch": 0.6955196586406583,
      "grad_norm": 0.90234375,
      "learning_rate": 4.4810303039498334e-05,
      "loss": 0.7469,
      "step": 4564
    },
    {
      "epoch": 0.6956720512039013,
      "grad_norm": 1.0546875,
      "learning_rate": 4.476915009712227e-05,
      "loss": 1.0396,
      "step": 4565
    },
    {
      "epoch": 0.6958244437671441,
      "grad_norm": 1.21875,
      "learning_rate": 4.472801060977543e-05,
      "loss": 1.1638,
      "step": 4566
    },
    {
      "epoch": 0.6959768363303871,
      "grad_norm": 0.80859375,
      "learning_rate": 4.468688458748006e-05,
      "loss": 0.8377,
      "step": 4567
    },
    {
      "epoch": 0.6961292288936299,
      "grad_norm": 1.0625,
      "learning_rate": 4.4645772040255e-05,
      "loss": 0.7535,
      "step": 4568
    },
    {
      "epoch": 0.6962816214568729,
      "grad_norm": 0.9453125,
      "learning_rate": 4.460467297811594e-05,
      "loss": 0.8798,
      "step": 4569
    },
    {
      "epoch": 0.6964340140201158,
      "grad_norm": 0.96875,
      "learning_rate": 4.456358741107512e-05,
      "loss": 1.0061,
      "step": 4570
    },
    {
      "epoch": 0.6965864065833587,
      "grad_norm": 0.91796875,
      "learning_rate": 4.452251534914153e-05,
      "loss": 1.0061,
      "step": 4571
    },
    {
      "epoch": 0.6967387991466016,
      "grad_norm": 0.78125,
      "learning_rate": 4.448145680232105e-05,
      "loss": 0.9195,
      "step": 4572
    },
    {
      "epoch": 0.6968911917098446,
      "grad_norm": 0.92578125,
      "learning_rate": 4.4440411780616056e-05,
      "loss": 0.8887,
      "step": 4573
    },
    {
      "epoch": 0.6970435842730874,
      "grad_norm": 0.89453125,
      "learning_rate": 4.439938029402571e-05,
      "loss": 0.9755,
      "step": 4574
    },
    {
      "epoch": 0.6971959768363304,
      "grad_norm": 0.765625,
      "learning_rate": 4.435836235254587e-05,
      "loss": 0.8272,
      "step": 4575
    },
    {
      "epoch": 0.6973483693995733,
      "grad_norm": 1.1953125,
      "learning_rate": 4.431735796616911e-05,
      "loss": 0.9449,
      "step": 4576
    },
    {
      "epoch": 0.6975007619628162,
      "grad_norm": 0.92578125,
      "learning_rate": 4.4276367144884645e-05,
      "loss": 0.8302,
      "step": 4577
    },
    {
      "epoch": 0.6976531545260591,
      "grad_norm": 1.015625,
      "learning_rate": 4.4235389898678494e-05,
      "loss": 1.0073,
      "step": 4578
    },
    {
      "epoch": 0.6978055470893021,
      "grad_norm": 1.171875,
      "learning_rate": 4.419442623753327e-05,
      "loss": 1.0179,
      "step": 4579
    },
    {
      "epoch": 0.6979579396525449,
      "grad_norm": 0.83984375,
      "learning_rate": 4.415347617142832e-05,
      "loss": 0.9407,
      "step": 4580
    },
    {
      "epoch": 0.6981103322157879,
      "grad_norm": 0.86328125,
      "learning_rate": 4.411253971033967e-05,
      "loss": 0.9667,
      "step": 4581
    },
    {
      "epoch": 0.6982627247790307,
      "grad_norm": 1.171875,
      "learning_rate": 4.407161686424002e-05,
      "loss": 0.8118,
      "step": 4582
    },
    {
      "epoch": 0.6984151173422737,
      "grad_norm": 1.1328125,
      "learning_rate": 4.403070764309871e-05,
      "loss": 0.9757,
      "step": 4583
    },
    {
      "epoch": 0.6985675099055166,
      "grad_norm": 0.9609375,
      "learning_rate": 4.398981205688193e-05,
      "loss": 0.8922,
      "step": 4584
    },
    {
      "epoch": 0.6987199024687595,
      "grad_norm": 0.9453125,
      "learning_rate": 4.3948930115552334e-05,
      "loss": 1.034,
      "step": 4585
    },
    {
      "epoch": 0.6988722950320024,
      "grad_norm": 1.140625,
      "learning_rate": 4.390806182906946e-05,
      "loss": 0.973,
      "step": 4586
    },
    {
      "epoch": 0.6990246875952454,
      "grad_norm": 0.92578125,
      "learning_rate": 4.386720720738931e-05,
      "loss": 1.1013,
      "step": 4587
    },
    {
      "epoch": 0.6991770801584882,
      "grad_norm": 0.89453125,
      "learning_rate": 4.3826366260464635e-05,
      "loss": 0.8872,
      "step": 4588
    },
    {
      "epoch": 0.6993294727217312,
      "grad_norm": 0.859375,
      "learning_rate": 4.378553899824498e-05,
      "loss": 1.1202,
      "step": 4589
    },
    {
      "epoch": 0.6994818652849741,
      "grad_norm": 1.125,
      "learning_rate": 4.3744725430676406e-05,
      "loss": 1.1415,
      "step": 4590
    },
    {
      "epoch": 0.699634257848217,
      "grad_norm": 1.03125,
      "learning_rate": 4.3703925567701676e-05,
      "loss": 1.1152,
      "step": 4591
    },
    {
      "epoch": 0.6997866504114599,
      "grad_norm": 0.81640625,
      "learning_rate": 4.3663139419260233e-05,
      "loss": 0.8448,
      "step": 4592
    },
    {
      "epoch": 0.6999390429747029,
      "grad_norm": 0.890625,
      "learning_rate": 4.3622366995288166e-05,
      "loss": 1.1535,
      "step": 4593
    },
    {
      "epoch": 0.7000914355379457,
      "grad_norm": 1.1015625,
      "learning_rate": 4.358160830571818e-05,
      "loss": 0.9404,
      "step": 4594
    },
    {
      "epoch": 0.7002438281011887,
      "grad_norm": 0.9921875,
      "learning_rate": 4.354086336047977e-05,
      "loss": 0.9314,
      "step": 4595
    },
    {
      "epoch": 0.7003962206644315,
      "grad_norm": 1.015625,
      "learning_rate": 4.3500132169498945e-05,
      "loss": 1.149,
      "step": 4596
    },
    {
      "epoch": 0.7005486132276745,
      "grad_norm": 0.86328125,
      "learning_rate": 4.345941474269839e-05,
      "loss": 0.8618,
      "step": 4597
    },
    {
      "epoch": 0.7007010057909174,
      "grad_norm": 0.84765625,
      "learning_rate": 4.3418711089997476e-05,
      "loss": 1.025,
      "step": 4598
    },
    {
      "epoch": 0.7008533983541603,
      "grad_norm": 0.98828125,
      "learning_rate": 4.337802122131221e-05,
      "loss": 0.9362,
      "step": 4599
    },
    {
      "epoch": 0.7010057909174032,
      "grad_norm": 1.03125,
      "learning_rate": 4.3337345146555164e-05,
      "loss": 1.0261,
      "step": 4600
    },
    {
      "epoch": 0.7011581834806462,
      "grad_norm": 0.7890625,
      "learning_rate": 4.32966828756357e-05,
      "loss": 0.8493,
      "step": 4601
    },
    {
      "epoch": 0.701310576043889,
      "grad_norm": 0.921875,
      "learning_rate": 4.32560344184597e-05,
      "loss": 1.0273,
      "step": 4602
    },
    {
      "epoch": 0.701462968607132,
      "grad_norm": 0.9375,
      "learning_rate": 4.321539978492971e-05,
      "loss": 0.8348,
      "step": 4603
    },
    {
      "epoch": 0.7016153611703749,
      "grad_norm": 0.89453125,
      "learning_rate": 4.3174778984944905e-05,
      "loss": 1.0112,
      "step": 4604
    },
    {
      "epoch": 0.7017677537336178,
      "grad_norm": 1.1171875,
      "learning_rate": 4.313417202840106e-05,
      "loss": 1.4185,
      "step": 4605
    },
    {
      "epoch": 0.7019201462968607,
      "grad_norm": 0.671875,
      "learning_rate": 4.30935789251907e-05,
      "loss": 0.7373,
      "step": 4606
    },
    {
      "epoch": 0.7020725388601037,
      "grad_norm": 1.0234375,
      "learning_rate": 4.3052999685202844e-05,
      "loss": 1.0402,
      "step": 4607
    },
    {
      "epoch": 0.7022249314233465,
      "grad_norm": 1.3046875,
      "learning_rate": 4.301243431832317e-05,
      "loss": 0.9276,
      "step": 4608
    },
    {
      "epoch": 0.7023773239865895,
      "grad_norm": 1.390625,
      "learning_rate": 4.2971882834433995e-05,
      "loss": 0.9707,
      "step": 4609
    },
    {
      "epoch": 0.7025297165498323,
      "grad_norm": 0.91796875,
      "learning_rate": 4.2931345243414247e-05,
      "loss": 0.9601,
      "step": 4610
    },
    {
      "epoch": 0.7026821091130753,
      "grad_norm": 0.83203125,
      "learning_rate": 4.2890821555139414e-05,
      "loss": 0.908,
      "step": 4611
    },
    {
      "epoch": 0.7028345016763182,
      "grad_norm": 0.86328125,
      "learning_rate": 4.285031177948173e-05,
      "loss": 0.9368,
      "step": 4612
    },
    {
      "epoch": 0.7029868942395611,
      "grad_norm": 1.4453125,
      "learning_rate": 4.280981592630994e-05,
      "loss": 1.0937,
      "step": 4613
    },
    {
      "epoch": 0.703139286802804,
      "grad_norm": 0.859375,
      "learning_rate": 4.276933400548938e-05,
      "loss": 0.8175,
      "step": 4614
    },
    {
      "epoch": 0.703291679366047,
      "grad_norm": 0.88671875,
      "learning_rate": 4.2728866026882065e-05,
      "loss": 0.8516,
      "step": 4615
    },
    {
      "epoch": 0.7034440719292898,
      "grad_norm": 0.9453125,
      "learning_rate": 4.268841200034655e-05,
      "loss": 0.9568,
      "step": 4616
    },
    {
      "epoch": 0.7035964644925328,
      "grad_norm": 1.0390625,
      "learning_rate": 4.264797193573799e-05,
      "loss": 1.0601,
      "step": 4617
    },
    {
      "epoch": 0.7037488570557757,
      "grad_norm": 0.98828125,
      "learning_rate": 4.260754584290825e-05,
      "loss": 0.84,
      "step": 4618
    },
    {
      "epoch": 0.7039012496190186,
      "grad_norm": 1.1484375,
      "learning_rate": 4.256713373170564e-05,
      "loss": 0.9593,
      "step": 4619
    },
    {
      "epoch": 0.7040536421822615,
      "grad_norm": 0.94140625,
      "learning_rate": 4.2526735611975175e-05,
      "loss": 1.0383,
      "step": 4620
    },
    {
      "epoch": 0.7042060347455045,
      "grad_norm": 1.0078125,
      "learning_rate": 4.2486351493558387e-05,
      "loss": 1.1593,
      "step": 4621
    },
    {
      "epoch": 0.7043584273087473,
      "grad_norm": 0.9453125,
      "learning_rate": 4.24459813862934e-05,
      "loss": 0.8431,
      "step": 4622
    },
    {
      "epoch": 0.7045108198719903,
      "grad_norm": 1.0546875,
      "learning_rate": 4.2405625300015026e-05,
      "loss": 1.0056,
      "step": 4623
    },
    {
      "epoch": 0.7046632124352331,
      "grad_norm": 1.4453125,
      "learning_rate": 4.2365283244554545e-05,
      "loss": 1.2467,
      "step": 4624
    },
    {
      "epoch": 0.7048156049984761,
      "grad_norm": 1.3125,
      "learning_rate": 4.232495522973987e-05,
      "loss": 0.9997,
      "step": 4625
    },
    {
      "epoch": 0.704967997561719,
      "grad_norm": 0.8046875,
      "learning_rate": 4.22846412653955e-05,
      "loss": 0.7923,
      "step": 4626
    },
    {
      "epoch": 0.7051203901249619,
      "grad_norm": 0.84375,
      "learning_rate": 4.2244341361342456e-05,
      "loss": 0.8849,
      "step": 4627
    },
    {
      "epoch": 0.7052727826882048,
      "grad_norm": 0.79296875,
      "learning_rate": 4.220405552739838e-05,
      "loss": 0.9171,
      "step": 4628
    },
    {
      "epoch": 0.7054251752514478,
      "grad_norm": 0.97265625,
      "learning_rate": 4.216378377337752e-05,
      "loss": 0.8158,
      "step": 4629
    },
    {
      "epoch": 0.7055775678146906,
      "grad_norm": 1.2109375,
      "learning_rate": 4.2123526109090636e-05,
      "loss": 0.9015,
      "step": 4630
    },
    {
      "epoch": 0.7057299603779336,
      "grad_norm": 0.85546875,
      "learning_rate": 4.208328254434507e-05,
      "loss": 0.9856,
      "step": 4631
    },
    {
      "epoch": 0.7058823529411765,
      "grad_norm": 0.78515625,
      "learning_rate": 4.2043053088944736e-05,
      "loss": 0.9703,
      "step": 4632
    },
    {
      "epoch": 0.7060347455044194,
      "grad_norm": 0.86328125,
      "learning_rate": 4.200283775269007e-05,
      "loss": 0.9228,
      "step": 4633
    },
    {
      "epoch": 0.7061871380676623,
      "grad_norm": 1.140625,
      "learning_rate": 4.1962636545378184e-05,
      "loss": 0.958,
      "step": 4634
    },
    {
      "epoch": 0.7063395306309053,
      "grad_norm": 0.87109375,
      "learning_rate": 4.192244947680263e-05,
      "loss": 0.9197,
      "step": 4635
    },
    {
      "epoch": 0.7064919231941481,
      "grad_norm": 1.078125,
      "learning_rate": 4.188227655675354e-05,
      "loss": 0.8473,
      "step": 4636
    },
    {
      "epoch": 0.7066443157573911,
      "grad_norm": 0.81640625,
      "learning_rate": 4.1842117795017633e-05,
      "loss": 0.9391,
      "step": 4637
    },
    {
      "epoch": 0.7067967083206339,
      "grad_norm": 0.57421875,
      "learning_rate": 4.180197320137815e-05,
      "loss": 0.8469,
      "step": 4638
    },
    {
      "epoch": 0.7069491008838769,
      "grad_norm": 0.87890625,
      "learning_rate": 4.176184278561488e-05,
      "loss": 0.8168,
      "step": 4639
    },
    {
      "epoch": 0.7071014934471198,
      "grad_norm": 1.1171875,
      "learning_rate": 4.172172655750421e-05,
      "loss": 1.0284,
      "step": 4640
    },
    {
      "epoch": 0.7072538860103627,
      "grad_norm": 1.2109375,
      "learning_rate": 4.168162452681901e-05,
      "loss": 0.9967,
      "step": 4641
    },
    {
      "epoch": 0.7074062785736056,
      "grad_norm": 0.8046875,
      "learning_rate": 4.164153670332871e-05,
      "loss": 0.969,
      "step": 4642
    },
    {
      "epoch": 0.7075586711368486,
      "grad_norm": 0.95703125,
      "learning_rate": 4.1601463096799274e-05,
      "loss": 0.944,
      "step": 4643
    },
    {
      "epoch": 0.7077110637000914,
      "grad_norm": 0.84375,
      "learning_rate": 4.156140371699322e-05,
      "loss": 0.9076,
      "step": 4644
    },
    {
      "epoch": 0.7078634562633344,
      "grad_norm": 1.0546875,
      "learning_rate": 4.152135857366955e-05,
      "loss": 1.057,
      "step": 4645
    },
    {
      "epoch": 0.7080158488265773,
      "grad_norm": 0.70703125,
      "learning_rate": 4.1481327676583903e-05,
      "loss": 0.7476,
      "step": 4646
    },
    {
      "epoch": 0.7081682413898202,
      "grad_norm": 0.75,
      "learning_rate": 4.144131103548834e-05,
      "loss": 0.9754,
      "step": 4647
    },
    {
      "epoch": 0.7083206339530631,
      "grad_norm": 0.875,
      "learning_rate": 4.140130866013151e-05,
      "loss": 1.0544,
      "step": 4648
    },
    {
      "epoch": 0.7084730265163061,
      "grad_norm": 0.984375,
      "learning_rate": 4.136132056025854e-05,
      "loss": 1.0517,
      "step": 4649
    },
    {
      "epoch": 0.7086254190795489,
      "grad_norm": 0.9140625,
      "learning_rate": 4.13213467456111e-05,
      "loss": 0.9636,
      "step": 4650
    },
    {
      "epoch": 0.7087778116427919,
      "grad_norm": 0.90234375,
      "learning_rate": 4.1281387225927436e-05,
      "loss": 0.9411,
      "step": 4651
    },
    {
      "epoch": 0.7089302042060347,
      "grad_norm": 0.890625,
      "learning_rate": 4.124144201094223e-05,
      "loss": 1.0221,
      "step": 4652
    },
    {
      "epoch": 0.7090825967692777,
      "grad_norm": 1.0234375,
      "learning_rate": 4.1201511110386715e-05,
      "loss": 0.9037,
      "step": 4653
    },
    {
      "epoch": 0.7092349893325206,
      "grad_norm": 1.2421875,
      "learning_rate": 4.116159453398864e-05,
      "loss": 1.0125,
      "step": 4654
    },
    {
      "epoch": 0.7093873818957634,
      "grad_norm": 0.78125,
      "learning_rate": 4.112169229147224e-05,
      "loss": 1.0035,
      "step": 4655
    },
    {
      "epoch": 0.7095397744590064,
      "grad_norm": 1.1328125,
      "learning_rate": 4.108180439255824e-05,
      "loss": 1.0516,
      "step": 4656
    },
    {
      "epoch": 0.7096921670222494,
      "grad_norm": 1.0234375,
      "learning_rate": 4.104193084696399e-05,
      "loss": 0.8735,
      "step": 4657
    },
    {
      "epoch": 0.7098445595854922,
      "grad_norm": 1.1484375,
      "learning_rate": 4.100207166440323e-05,
      "loss": 1.0001,
      "step": 4658
    },
    {
      "epoch": 0.7099969521487351,
      "grad_norm": 1.0390625,
      "learning_rate": 4.096222685458621e-05,
      "loss": 0.8241,
      "step": 4659
    },
    {
      "epoch": 0.7101493447119781,
      "grad_norm": 1.34375,
      "learning_rate": 4.09223964272197e-05,
      "loss": 0.9881,
      "step": 4660
    },
    {
      "epoch": 0.710301737275221,
      "grad_norm": 1.09375,
      "learning_rate": 4.0882580392006976e-05,
      "loss": 0.988,
      "step": 4661
    },
    {
      "epoch": 0.7104541298384639,
      "grad_norm": 1.0625,
      "learning_rate": 4.084277875864776e-05,
      "loss": 1.1442,
      "step": 4662
    },
    {
      "epoch": 0.7106065224017067,
      "grad_norm": 0.796875,
      "learning_rate": 4.0802991536838366e-05,
      "loss": 0.9837,
      "step": 4663
    },
    {
      "epoch": 0.7107589149649497,
      "grad_norm": 1.0390625,
      "learning_rate": 4.07632187362715e-05,
      "loss": 0.945,
      "step": 4664
    },
    {
      "epoch": 0.7109113075281926,
      "grad_norm": 1.078125,
      "learning_rate": 4.07234603666364e-05,
      "loss": 1.0104,
      "step": 4665
    },
    {
      "epoch": 0.7110637000914355,
      "grad_norm": 0.88671875,
      "learning_rate": 4.0683716437618754e-05,
      "loss": 0.8363,
      "step": 4666
    },
    {
      "epoch": 0.7112160926546784,
      "grad_norm": 0.7890625,
      "learning_rate": 4.0643986958900745e-05,
      "loss": 0.8695,
      "step": 4667
    },
    {
      "epoch": 0.7113684852179214,
      "grad_norm": 0.9375,
      "learning_rate": 4.06042719401611e-05,
      "loss": 0.8545,
      "step": 4668
    },
    {
      "epoch": 0.7115208777811642,
      "grad_norm": 0.76171875,
      "learning_rate": 4.056457139107494e-05,
      "loss": 0.9217,
      "step": 4669
    },
    {
      "epoch": 0.7116732703444072,
      "grad_norm": 0.9609375,
      "learning_rate": 4.052488532131388e-05,
      "loss": 0.8404,
      "step": 4670
    },
    {
      "epoch": 0.7118256629076501,
      "grad_norm": 0.98046875,
      "learning_rate": 4.048521374054601e-05,
      "loss": 0.9938,
      "step": 4671
    },
    {
      "epoch": 0.711978055470893,
      "grad_norm": 0.98046875,
      "learning_rate": 4.0445556658435936e-05,
      "loss": 1.1865,
      "step": 4672
    },
    {
      "epoch": 0.7121304480341359,
      "grad_norm": 1.0078125,
      "learning_rate": 4.0405914084644616e-05,
      "loss": 1.028,
      "step": 4673
    },
    {
      "epoch": 0.7122828405973789,
      "grad_norm": 1.28125,
      "learning_rate": 4.036628602882965e-05,
      "loss": 1.124,
      "step": 4674
    },
    {
      "epoch": 0.7124352331606217,
      "grad_norm": 1.046875,
      "learning_rate": 4.032667250064495e-05,
      "loss": 0.9433,
      "step": 4675
    },
    {
      "epoch": 0.7125876257238647,
      "grad_norm": 1.1484375,
      "learning_rate": 4.028707350974093e-05,
      "loss": 1.0245,
      "step": 4676
    },
    {
      "epoch": 0.7127400182871075,
      "grad_norm": 0.9375,
      "learning_rate": 4.024748906576452e-05,
      "loss": 1.0456,
      "step": 4677
    },
    {
      "epoch": 0.7128924108503505,
      "grad_norm": 0.71875,
      "learning_rate": 4.020791917835901e-05,
      "loss": 0.9119,
      "step": 4678
    },
    {
      "epoch": 0.7130448034135934,
      "grad_norm": 0.71875,
      "learning_rate": 4.016836385716419e-05,
      "loss": 1.1312,
      "step": 4679
    },
    {
      "epoch": 0.7131971959768363,
      "grad_norm": 0.921875,
      "learning_rate": 4.012882311181636e-05,
      "loss": 1.0123,
      "step": 4680
    },
    {
      "epoch": 0.7133495885400792,
      "grad_norm": 1.015625,
      "learning_rate": 4.008929695194819e-05,
      "loss": 1.0053,
      "step": 4681
    },
    {
      "epoch": 0.7135019811033222,
      "grad_norm": 0.94140625,
      "learning_rate": 4.00497853871888e-05,
      "loss": 1.1766,
      "step": 4682
    },
    {
      "epoch": 0.713654373666565,
      "grad_norm": 0.87109375,
      "learning_rate": 4.00102884271638e-05,
      "loss": 0.852,
      "step": 4683
    },
    {
      "epoch": 0.713806766229808,
      "grad_norm": 1.1640625,
      "learning_rate": 3.997080608149516e-05,
      "loss": 1.1117,
      "step": 4684
    },
    {
      "epoch": 0.7139591587930509,
      "grad_norm": 1.015625,
      "learning_rate": 3.993133835980144e-05,
      "loss": 0.891,
      "step": 4685
    },
    {
      "epoch": 0.7141115513562938,
      "grad_norm": 0.74609375,
      "learning_rate": 3.9891885271697496e-05,
      "loss": 1.0039,
      "step": 4686
    },
    {
      "epoch": 0.7142639439195367,
      "grad_norm": 0.92578125,
      "learning_rate": 3.9852446826794664e-05,
      "loss": 0.9759,
      "step": 4687
    },
    {
      "epoch": 0.7144163364827797,
      "grad_norm": 1.1875,
      "learning_rate": 3.9813023034700734e-05,
      "loss": 0.9691,
      "step": 4688
    },
    {
      "epoch": 0.7145687290460225,
      "grad_norm": 1.1171875,
      "learning_rate": 3.977361390501989e-05,
      "loss": 1.0769,
      "step": 4689
    },
    {
      "epoch": 0.7147211216092655,
      "grad_norm": 1.1484375,
      "learning_rate": 3.973421944735274e-05,
      "loss": 0.9644,
      "step": 4690
    },
    {
      "epoch": 0.7148735141725083,
      "grad_norm": 1.0625,
      "learning_rate": 3.9694839671296404e-05,
      "loss": 0.9813,
      "step": 4691
    },
    {
      "epoch": 0.7150259067357513,
      "grad_norm": 0.9765625,
      "learning_rate": 3.965547458644432e-05,
      "loss": 0.906,
      "step": 4692
    },
    {
      "epoch": 0.7151782992989942,
      "grad_norm": 1.265625,
      "learning_rate": 3.961612420238641e-05,
      "loss": 1.106,
      "step": 4693
    },
    {
      "epoch": 0.7153306918622371,
      "grad_norm": 1.1640625,
      "learning_rate": 3.9576788528708964e-05,
      "loss": 0.895,
      "step": 4694
    },
    {
      "epoch": 0.71548308442548,
      "grad_norm": 0.77734375,
      "learning_rate": 3.9537467574994745e-05,
      "loss": 0.8901,
      "step": 4695
    },
    {
      "epoch": 0.715635476988723,
      "grad_norm": 0.9453125,
      "learning_rate": 3.9498161350822846e-05,
      "loss": 0.885,
      "step": 4696
    },
    {
      "epoch": 0.7157878695519658,
      "grad_norm": 0.96875,
      "learning_rate": 3.945886986576892e-05,
      "loss": 0.915,
      "step": 4697
    },
    {
      "epoch": 0.7159402621152088,
      "grad_norm": 1.234375,
      "learning_rate": 3.9419593129404895e-05,
      "loss": 1.0829,
      "step": 4698
    },
    {
      "epoch": 0.7160926546784517,
      "grad_norm": 0.98046875,
      "learning_rate": 3.9380331151299144e-05,
      "loss": 0.7676,
      "step": 4699
    },
    {
      "epoch": 0.7162450472416946,
      "grad_norm": 0.890625,
      "learning_rate": 3.9341083941016445e-05,
      "loss": 0.9792,
      "step": 4700
    },
    {
      "epoch": 0.7163974398049375,
      "grad_norm": 0.99609375,
      "learning_rate": 3.930185150811795e-05,
      "loss": 1.1152,
      "step": 4701
    },
    {
      "epoch": 0.7165498323681805,
      "grad_norm": 1.0078125,
      "learning_rate": 3.9262633862161324e-05,
      "loss": 0.9352,
      "step": 4702
    },
    {
      "epoch": 0.7167022249314233,
      "grad_norm": 0.9765625,
      "learning_rate": 3.9223431012700506e-05,
      "loss": 0.9351,
      "step": 4703
    },
    {
      "epoch": 0.7168546174946663,
      "grad_norm": 1.03125,
      "learning_rate": 3.9184242969285875e-05,
      "loss": 1.1068,
      "step": 4704
    },
    {
      "epoch": 0.7170070100579091,
      "grad_norm": 1.1328125,
      "learning_rate": 3.9145069741464216e-05,
      "loss": 0.9732,
      "step": 4705
    },
    {
      "epoch": 0.7171594026211521,
      "grad_norm": 1.015625,
      "learning_rate": 3.9105911338778665e-05,
      "loss": 1.0837,
      "step": 4706
    },
    {
      "epoch": 0.717311795184395,
      "grad_norm": 1.28125,
      "learning_rate": 3.906676777076875e-05,
      "loss": 1.0293,
      "step": 4707
    },
    {
      "epoch": 0.7174641877476379,
      "grad_norm": 0.79296875,
      "learning_rate": 3.902763904697049e-05,
      "loss": 1.0122,
      "step": 4708
    },
    {
      "epoch": 0.7176165803108808,
      "grad_norm": 1.1953125,
      "learning_rate": 3.8988525176916136e-05,
      "loss": 1.108,
      "step": 4709
    },
    {
      "epoch": 0.7177689728741238,
      "grad_norm": 1.125,
      "learning_rate": 3.894942617013443e-05,
      "loss": 1.2537,
      "step": 4710
    },
    {
      "epoch": 0.7179213654373666,
      "grad_norm": 0.90234375,
      "learning_rate": 3.891034203615042e-05,
      "loss": 1.0036,
      "step": 4711
    },
    {
      "epoch": 0.7180737580006096,
      "grad_norm": 1.0859375,
      "learning_rate": 3.8871272784485566e-05,
      "loss": 1.0717,
      "step": 4712
    },
    {
      "epoch": 0.7182261505638525,
      "grad_norm": 1.0703125,
      "learning_rate": 3.88322184246577e-05,
      "loss": 1.0431,
      "step": 4713
    },
    {
      "epoch": 0.7183785431270954,
      "grad_norm": 1.1328125,
      "learning_rate": 3.879317896618104e-05,
      "loss": 1.0427,
      "step": 4714
    },
    {
      "epoch": 0.7185309356903383,
      "grad_norm": 1.015625,
      "learning_rate": 3.875415441856618e-05,
      "loss": 0.9726,
      "step": 4715
    },
    {
      "epoch": 0.7186833282535813,
      "grad_norm": 0.828125,
      "learning_rate": 3.871514479132001e-05,
      "loss": 0.978,
      "step": 4716
    },
    {
      "epoch": 0.7188357208168241,
      "grad_norm": 0.9453125,
      "learning_rate": 3.8676150093945885e-05,
      "loss": 0.9257,
      "step": 4717
    },
    {
      "epoch": 0.7189881133800671,
      "grad_norm": 1.1171875,
      "learning_rate": 3.86371703359434e-05,
      "loss": 0.979,
      "step": 4718
    },
    {
      "epoch": 0.7191405059433099,
      "grad_norm": 0.88671875,
      "learning_rate": 3.859820552680867e-05,
      "loss": 0.976,
      "step": 4719
    },
    {
      "epoch": 0.7192928985065529,
      "grad_norm": 0.94921875,
      "learning_rate": 3.855925567603406e-05,
      "loss": 1.0626,
      "step": 4720
    },
    {
      "epoch": 0.7194452910697958,
      "grad_norm": 0.79296875,
      "learning_rate": 3.852032079310829e-05,
      "loss": 0.8739,
      "step": 4721
    },
    {
      "epoch": 0.7195976836330387,
      "grad_norm": 0.83984375,
      "learning_rate": 3.8481400887516474e-05,
      "loss": 1.0423,
      "step": 4722
    },
    {
      "epoch": 0.7197500761962816,
      "grad_norm": 1.046875,
      "learning_rate": 3.8442495968740045e-05,
      "loss": 1.1691,
      "step": 4723
    },
    {
      "epoch": 0.7199024687595246,
      "grad_norm": 0.91796875,
      "learning_rate": 3.840360604625679e-05,
      "loss": 0.9413,
      "step": 4724
    },
    {
      "epoch": 0.7200548613227674,
      "grad_norm": 0.91796875,
      "learning_rate": 3.8364731129540896e-05,
      "loss": 0.9697,
      "step": 4725
    },
    {
      "epoch": 0.7202072538860104,
      "grad_norm": 1.2109375,
      "learning_rate": 3.8325871228062834e-05,
      "loss": 1.1353,
      "step": 4726
    },
    {
      "epoch": 0.7203596464492533,
      "grad_norm": 0.9609375,
      "learning_rate": 3.828702635128944e-05,
      "loss": 0.9918,
      "step": 4727
    },
    {
      "epoch": 0.7205120390124962,
      "grad_norm": 0.9453125,
      "learning_rate": 3.824819650868387e-05,
      "loss": 0.9182,
      "step": 4728
    },
    {
      "epoch": 0.7206644315757391,
      "grad_norm": 1.0234375,
      "learning_rate": 3.820938170970564e-05,
      "loss": 0.9722,
      "step": 4729
    },
    {
      "epoch": 0.7208168241389821,
      "grad_norm": 1.0,
      "learning_rate": 3.8170581963810545e-05,
      "loss": 1.1321,
      "step": 4730
    },
    {
      "epoch": 0.7209692167022249,
      "grad_norm": 1.0390625,
      "learning_rate": 3.813179728045085e-05,
      "loss": 1.0613,
      "step": 4731
    },
    {
      "epoch": 0.7211216092654679,
      "grad_norm": 0.9375,
      "learning_rate": 3.809302766907502e-05,
      "loss": 1.0802,
      "step": 4732
    },
    {
      "epoch": 0.7212740018287107,
      "grad_norm": 1.5234375,
      "learning_rate": 3.8054273139127884e-05,
      "loss": 1.0763,
      "step": 4733
    },
    {
      "epoch": 0.7214263943919537,
      "grad_norm": 1.0390625,
      "learning_rate": 3.801553370005061e-05,
      "loss": 0.8014,
      "step": 4734
    },
    {
      "epoch": 0.7215787869551966,
      "grad_norm": 1.3984375,
      "learning_rate": 3.797680936128064e-05,
      "loss": 1.2878,
      "step": 4735
    },
    {
      "epoch": 0.7217311795184395,
      "grad_norm": 0.9765625,
      "learning_rate": 3.793810013225185e-05,
      "loss": 0.8804,
      "step": 4736
    },
    {
      "epoch": 0.7218835720816824,
      "grad_norm": 1.21875,
      "learning_rate": 3.7899406022394346e-05,
      "loss": 0.9705,
      "step": 4737
    },
    {
      "epoch": 0.7220359646449254,
      "grad_norm": 0.9609375,
      "learning_rate": 3.786072704113456e-05,
      "loss": 0.9685,
      "step": 4738
    },
    {
      "epoch": 0.7221883572081682,
      "grad_norm": 0.796875,
      "learning_rate": 3.782206319789523e-05,
      "loss": 0.8806,
      "step": 4739
    },
    {
      "epoch": 0.7223407497714112,
      "grad_norm": 1.0625,
      "learning_rate": 3.778341450209545e-05,
      "loss": 1.0538,
      "step": 4740
    },
    {
      "epoch": 0.7224931423346541,
      "grad_norm": 0.96875,
      "learning_rate": 3.774478096315055e-05,
      "loss": 0.9465,
      "step": 4741
    },
    {
      "epoch": 0.722645534897897,
      "grad_norm": 1.2578125,
      "learning_rate": 3.7706162590472296e-05,
      "loss": 1.131,
      "step": 4742
    },
    {
      "epoch": 0.7227979274611399,
      "grad_norm": 0.9609375,
      "learning_rate": 3.766755939346863e-05,
      "loss": 1.0929,
      "step": 4743
    },
    {
      "epoch": 0.7229503200243829,
      "grad_norm": 0.81640625,
      "learning_rate": 3.762897138154385e-05,
      "loss": 0.9247,
      "step": 4744
    },
    {
      "epoch": 0.7231027125876257,
      "grad_norm": 0.84765625,
      "learning_rate": 3.759039856409856e-05,
      "loss": 1.1784,
      "step": 4745
    },
    {
      "epoch": 0.7232551051508687,
      "grad_norm": 1.09375,
      "learning_rate": 3.755184095052964e-05,
      "loss": 1.1583,
      "step": 4746
    },
    {
      "epoch": 0.7234074977141115,
      "grad_norm": 1.125,
      "learning_rate": 3.7513298550230244e-05,
      "loss": 1.0896,
      "step": 4747
    },
    {
      "epoch": 0.7235598902773545,
      "grad_norm": 0.81640625,
      "learning_rate": 3.747477137258994e-05,
      "loss": 0.9748,
      "step": 4748
    },
    {
      "epoch": 0.7237122828405974,
      "grad_norm": 1.453125,
      "learning_rate": 3.7436259426994466e-05,
      "loss": 1.091,
      "step": 4749
    },
    {
      "epoch": 0.7238646754038403,
      "grad_norm": 0.9609375,
      "learning_rate": 3.739776272282587e-05,
      "loss": 0.9622,
      "step": 4750
    },
    {
      "epoch": 0.7240170679670832,
      "grad_norm": 1.296875,
      "learning_rate": 3.735928126946251e-05,
      "loss": 1.0516,
      "step": 4751
    },
    {
      "epoch": 0.7241694605303262,
      "grad_norm": 1.1328125,
      "learning_rate": 3.7320815076278994e-05,
      "loss": 0.9721,
      "step": 4752
    },
    {
      "epoch": 0.724321853093569,
      "grad_norm": 1.0,
      "learning_rate": 3.7282364152646297e-05,
      "loss": 0.9014,
      "step": 4753
    },
    {
      "epoch": 0.724474245656812,
      "grad_norm": 1.171875,
      "learning_rate": 3.724392850793159e-05,
      "loss": 0.8319,
      "step": 4754
    },
    {
      "epoch": 0.7246266382200549,
      "grad_norm": 1.015625,
      "learning_rate": 3.7205508151498344e-05,
      "loss": 1.1354,
      "step": 4755
    },
    {
      "epoch": 0.7247790307832978,
      "grad_norm": 1.0546875,
      "learning_rate": 3.7167103092706326e-05,
      "loss": 0.9386,
      "step": 4756
    },
    {
      "epoch": 0.7249314233465407,
      "grad_norm": 1.125,
      "learning_rate": 3.7128713340911535e-05,
      "loss": 0.9813,
      "step": 4757
    },
    {
      "epoch": 0.7250838159097837,
      "grad_norm": 1.25,
      "learning_rate": 3.709033890546625e-05,
      "loss": 1.0105,
      "step": 4758
    },
    {
      "epoch": 0.7252362084730265,
      "grad_norm": 0.765625,
      "learning_rate": 3.7051979795719095e-05,
      "loss": 0.9585,
      "step": 4759
    },
    {
      "epoch": 0.7253886010362695,
      "grad_norm": 0.96875,
      "learning_rate": 3.701363602101487e-05,
      "loss": 1.2457,
      "step": 4760
    },
    {
      "epoch": 0.7255409935995123,
      "grad_norm": 0.84765625,
      "learning_rate": 3.6975307590694686e-05,
      "loss": 0.9073,
      "step": 4761
    },
    {
      "epoch": 0.7256933861627552,
      "grad_norm": 0.73046875,
      "learning_rate": 3.6936994514095865e-05,
      "loss": 0.8987,
      "step": 4762
    },
    {
      "epoch": 0.7258457787259982,
      "grad_norm": 0.90625,
      "learning_rate": 3.689869680055206e-05,
      "loss": 0.9805,
      "step": 4763
    },
    {
      "epoch": 0.725998171289241,
      "grad_norm": 0.8671875,
      "learning_rate": 3.686041445939308e-05,
      "loss": 1.0063,
      "step": 4764
    },
    {
      "epoch": 0.726150563852484,
      "grad_norm": 0.8125,
      "learning_rate": 3.6822147499945134e-05,
      "loss": 0.748,
      "step": 4765
    },
    {
      "epoch": 0.726302956415727,
      "grad_norm": 1.078125,
      "learning_rate": 3.6783895931530574e-05,
      "loss": 1.0172,
      "step": 4766
    },
    {
      "epoch": 0.7264553489789698,
      "grad_norm": 0.90234375,
      "learning_rate": 3.674565976346804e-05,
      "loss": 0.9661,
      "step": 4767
    },
    {
      "epoch": 0.7266077415422127,
      "grad_norm": 1.546875,
      "learning_rate": 3.67074390050724e-05,
      "loss": 1.1555,
      "step": 4768
    },
    {
      "epoch": 0.7267601341054557,
      "grad_norm": 0.984375,
      "learning_rate": 3.6669233665654734e-05,
      "loss": 0.9725,
      "step": 4769
    },
    {
      "epoch": 0.7269125266686985,
      "grad_norm": 1.0078125,
      "learning_rate": 3.6631043754522495e-05,
      "loss": 1.0969,
      "step": 4770
    },
    {
      "epoch": 0.7270649192319415,
      "grad_norm": 1.09375,
      "learning_rate": 3.659286928097927e-05,
      "loss": 0.8264,
      "step": 4771
    },
    {
      "epoch": 0.7272173117951843,
      "grad_norm": 1.359375,
      "learning_rate": 3.65547102543249e-05,
      "loss": 0.9228,
      "step": 4772
    },
    {
      "epoch": 0.7273697043584273,
      "grad_norm": 1.0078125,
      "learning_rate": 3.651656668385547e-05,
      "loss": 1.0177,
      "step": 4773
    },
    {
      "epoch": 0.7275220969216702,
      "grad_norm": 1.1875,
      "learning_rate": 3.6478438578863296e-05,
      "loss": 1.0337,
      "step": 4774
    },
    {
      "epoch": 0.7276744894849131,
      "grad_norm": 1.4140625,
      "learning_rate": 3.644032594863691e-05,
      "loss": 1.0465,
      "step": 4775
    },
    {
      "epoch": 0.727826882048156,
      "grad_norm": 0.90625,
      "learning_rate": 3.640222880246117e-05,
      "loss": 0.8786,
      "step": 4776
    },
    {
      "epoch": 0.727979274611399,
      "grad_norm": 0.87890625,
      "learning_rate": 3.6364147149617035e-05,
      "loss": 1.0677,
      "step": 4777
    },
    {
      "epoch": 0.7281316671746418,
      "grad_norm": 1.21875,
      "learning_rate": 3.632608099938176e-05,
      "loss": 1.2219,
      "step": 4778
    },
    {
      "epoch": 0.7282840597378848,
      "grad_norm": 0.9375,
      "learning_rate": 3.6288030361028804e-05,
      "loss": 1.0739,
      "step": 4779
    },
    {
      "epoch": 0.7284364523011277,
      "grad_norm": 0.984375,
      "learning_rate": 3.6249995243827805e-05,
      "loss": 1.0649,
      "step": 4780
    },
    {
      "epoch": 0.7285888448643706,
      "grad_norm": 0.89453125,
      "learning_rate": 3.621197565704474e-05,
      "loss": 0.8527,
      "step": 4781
    },
    {
      "epoch": 0.7287412374276135,
      "grad_norm": 1.1015625,
      "learning_rate": 3.617397160994171e-05,
      "loss": 0.9278,
      "step": 4782
    },
    {
      "epoch": 0.7288936299908565,
      "grad_norm": 0.76953125,
      "learning_rate": 3.6135983111777e-05,
      "loss": 0.8397,
      "step": 4783
    },
    {
      "epoch": 0.7290460225540993,
      "grad_norm": 1.046875,
      "learning_rate": 3.609801017180521e-05,
      "loss": 1.1353,
      "step": 4784
    },
    {
      "epoch": 0.7291984151173423,
      "grad_norm": 0.98046875,
      "learning_rate": 3.6060052799277066e-05,
      "loss": 0.9865,
      "step": 4785
    },
    {
      "epoch": 0.7293508076805851,
      "grad_norm": 1.0,
      "learning_rate": 3.6022111003439495e-05,
      "loss": 0.8664,
      "step": 4786
    },
    {
      "epoch": 0.7295032002438281,
      "grad_norm": 1.1875,
      "learning_rate": 3.598418479353574e-05,
      "loss": 0.8862,
      "step": 4787
    },
    {
      "epoch": 0.729655592807071,
      "grad_norm": 0.8984375,
      "learning_rate": 3.5946274178805125e-05,
      "loss": 0.7954,
      "step": 4788
    },
    {
      "epoch": 0.7298079853703139,
      "grad_norm": 0.828125,
      "learning_rate": 3.590837916848323e-05,
      "loss": 0.8928,
      "step": 4789
    },
    {
      "epoch": 0.7299603779335568,
      "grad_norm": 1.2109375,
      "learning_rate": 3.587049977180184e-05,
      "loss": 0.9758,
      "step": 4790
    },
    {
      "epoch": 0.7301127704967998,
      "grad_norm": 0.73828125,
      "learning_rate": 3.5832635997988885e-05,
      "loss": 0.8111,
      "step": 4791
    },
    {
      "epoch": 0.7302651630600426,
      "grad_norm": 0.7734375,
      "learning_rate": 3.5794787856268516e-05,
      "loss": 0.8243,
      "step": 4792
    },
    {
      "epoch": 0.7304175556232856,
      "grad_norm": 0.7734375,
      "learning_rate": 3.5756955355861154e-05,
      "loss": 1.1616,
      "step": 4793
    },
    {
      "epoch": 0.7305699481865285,
      "grad_norm": 1.0625,
      "learning_rate": 3.5719138505983295e-05,
      "loss": 1.0172,
      "step": 4794
    },
    {
      "epoch": 0.7307223407497714,
      "grad_norm": 0.97265625,
      "learning_rate": 3.568133731584767e-05,
      "loss": 0.8217,
      "step": 4795
    },
    {
      "epoch": 0.7308747333130143,
      "grad_norm": 0.80078125,
      "learning_rate": 3.56435517946632e-05,
      "loss": 0.8129,
      "step": 4796
    },
    {
      "epoch": 0.7310271258762573,
      "grad_norm": 1.0546875,
      "learning_rate": 3.560578195163493e-05,
      "loss": 1.0605,
      "step": 4797
    },
    {
      "epoch": 0.7311795184395001,
      "grad_norm": 0.9609375,
      "learning_rate": 3.556802779596422e-05,
      "loss": 0.9345,
      "step": 4798
    },
    {
      "epoch": 0.7313319110027431,
      "grad_norm": 1.1640625,
      "learning_rate": 3.55302893368485e-05,
      "loss": 1.075,
      "step": 4799
    },
    {
      "epoch": 0.7314843035659859,
      "grad_norm": 0.984375,
      "learning_rate": 3.549256658348137e-05,
      "loss": 1.1312,
      "step": 4800
    },
    {
      "epoch": 0.7316366961292289,
      "grad_norm": 1.1328125,
      "learning_rate": 3.5454859545052664e-05,
      "loss": 1.0229,
      "step": 4801
    },
    {
      "epoch": 0.7317890886924718,
      "grad_norm": 0.7265625,
      "learning_rate": 3.5417168230748346e-05,
      "loss": 0.9718,
      "step": 4802
    },
    {
      "epoch": 0.7319414812557147,
      "grad_norm": 0.81640625,
      "learning_rate": 3.537949264975053e-05,
      "loss": 0.9325,
      "step": 4803
    },
    {
      "epoch": 0.7320938738189576,
      "grad_norm": 1.03125,
      "learning_rate": 3.534183281123761e-05,
      "loss": 1.145,
      "step": 4804
    },
    {
      "epoch": 0.7322462663822006,
      "grad_norm": 1.2421875,
      "learning_rate": 3.5304188724384e-05,
      "loss": 1.1011,
      "step": 4805
    },
    {
      "epoch": 0.7323986589454434,
      "grad_norm": 0.671875,
      "learning_rate": 3.526656039836037e-05,
      "loss": 0.8605,
      "step": 4806
    },
    {
      "epoch": 0.7325510515086864,
      "grad_norm": 0.67578125,
      "learning_rate": 3.522894784233351e-05,
      "loss": 0.8342,
      "step": 4807
    },
    {
      "epoch": 0.7327034440719293,
      "grad_norm": 1.3984375,
      "learning_rate": 3.519135106546638e-05,
      "loss": 1.0243,
      "step": 4808
    },
    {
      "epoch": 0.7328558366351722,
      "grad_norm": 1.0546875,
      "learning_rate": 3.515377007691806e-05,
      "loss": 1.0766,
      "step": 4809
    },
    {
      "epoch": 0.7330082291984151,
      "grad_norm": 0.6640625,
      "learning_rate": 3.5116204885843894e-05,
      "loss": 0.9277,
      "step": 4810
    },
    {
      "epoch": 0.7331606217616581,
      "grad_norm": 0.796875,
      "learning_rate": 3.5078655501395255e-05,
      "loss": 0.8638,
      "step": 4811
    },
    {
      "epoch": 0.7333130143249009,
      "grad_norm": 1.0703125,
      "learning_rate": 3.5041121932719725e-05,
      "loss": 0.8552,
      "step": 4812
    },
    {
      "epoch": 0.7334654068881439,
      "grad_norm": 0.9765625,
      "learning_rate": 3.500360418896101e-05,
      "loss": 1.0335,
      "step": 4813
    },
    {
      "epoch": 0.7336177994513867,
      "grad_norm": 0.76171875,
      "learning_rate": 3.496610227925896e-05,
      "loss": 0.8927,
      "step": 4814
    },
    {
      "epoch": 0.7337701920146297,
      "grad_norm": 0.75390625,
      "learning_rate": 3.492861621274963e-05,
      "loss": 0.8107,
      "step": 4815
    },
    {
      "epoch": 0.7339225845778726,
      "grad_norm": 1.046875,
      "learning_rate": 3.4891145998565134e-05,
      "loss": 1.2467,
      "step": 4816
    },
    {
      "epoch": 0.7340749771411155,
      "grad_norm": 0.8359375,
      "learning_rate": 3.485369164583375e-05,
      "loss": 1.1109,
      "step": 4817
    },
    {
      "epoch": 0.7342273697043584,
      "grad_norm": 0.9921875,
      "learning_rate": 3.4816253163679914e-05,
      "loss": 0.9641,
      "step": 4818
    },
    {
      "epoch": 0.7343797622676014,
      "grad_norm": 0.875,
      "learning_rate": 3.477883056122415e-05,
      "loss": 0.9705,
      "step": 4819
    },
    {
      "epoch": 0.7345321548308442,
      "grad_norm": 0.96484375,
      "learning_rate": 3.4741423847583134e-05,
      "loss": 1.0359,
      "step": 4820
    },
    {
      "epoch": 0.7346845473940872,
      "grad_norm": 1.1796875,
      "learning_rate": 3.470403303186973e-05,
      "loss": 1.1335,
      "step": 4821
    },
    {
      "epoch": 0.7348369399573301,
      "grad_norm": 1.046875,
      "learning_rate": 3.466665812319286e-05,
      "loss": 1.232,
      "step": 4822
    },
    {
      "epoch": 0.734989332520573,
      "grad_norm": 0.9375,
      "learning_rate": 3.462929913065758e-05,
      "loss": 1.0213,
      "step": 4823
    },
    {
      "epoch": 0.7351417250838159,
      "grad_norm": 0.96875,
      "learning_rate": 3.459195606336508e-05,
      "loss": 0.805,
      "step": 4824
    },
    {
      "epoch": 0.7352941176470589,
      "grad_norm": 0.9453125,
      "learning_rate": 3.455462893041267e-05,
      "loss": 0.8493,
      "step": 4825
    },
    {
      "epoch": 0.7354465102103017,
      "grad_norm": 1.140625,
      "learning_rate": 3.451731774089374e-05,
      "loss": 1.0302,
      "step": 4826
    },
    {
      "epoch": 0.7355989027735447,
      "grad_norm": 1.0078125,
      "learning_rate": 3.448002250389789e-05,
      "loss": 0.9602,
      "step": 4827
    },
    {
      "epoch": 0.7357512953367875,
      "grad_norm": 0.984375,
      "learning_rate": 3.44427432285108e-05,
      "loss": 1.0692,
      "step": 4828
    },
    {
      "epoch": 0.7359036879000305,
      "grad_norm": 1.1640625,
      "learning_rate": 3.440547992381411e-05,
      "loss": 0.9694,
      "step": 4829
    },
    {
      "epoch": 0.7360560804632734,
      "grad_norm": 0.83984375,
      "learning_rate": 3.436823259888582e-05,
      "loss": 0.9052,
      "step": 4830
    },
    {
      "epoch": 0.7362084730265163,
      "grad_norm": 0.953125,
      "learning_rate": 3.4331001262799836e-05,
      "loss": 1.1894,
      "step": 4831
    },
    {
      "epoch": 0.7363608655897592,
      "grad_norm": 1.1796875,
      "learning_rate": 3.429378592462631e-05,
      "loss": 1.0821,
      "step": 4832
    },
    {
      "epoch": 0.7365132581530022,
      "grad_norm": 0.87109375,
      "learning_rate": 3.4256586593431407e-05,
      "loss": 1.1483,
      "step": 4833
    },
    {
      "epoch": 0.736665650716245,
      "grad_norm": 1.046875,
      "learning_rate": 3.4219403278277416e-05,
      "loss": 0.9592,
      "step": 4834
    },
    {
      "epoch": 0.736818043279488,
      "grad_norm": 0.77734375,
      "learning_rate": 3.418223598822273e-05,
      "loss": 0.8115,
      "step": 4835
    },
    {
      "epoch": 0.7369704358427309,
      "grad_norm": 1.1484375,
      "learning_rate": 3.414508473232184e-05,
      "loss": 1.0314,
      "step": 4836
    },
    {
      "epoch": 0.7371228284059738,
      "grad_norm": 0.81640625,
      "learning_rate": 3.4107949519625284e-05,
      "loss": 0.7874,
      "step": 4837
    },
    {
      "epoch": 0.7372752209692167,
      "grad_norm": 0.84375,
      "learning_rate": 3.407083035917981e-05,
      "loss": 0.9863,
      "step": 4838
    },
    {
      "epoch": 0.7374276135324597,
      "grad_norm": 1.0234375,
      "learning_rate": 3.403372726002812e-05,
      "loss": 1.0419,
      "step": 4839
    },
    {
      "epoch": 0.7375800060957025,
      "grad_norm": 0.8984375,
      "learning_rate": 3.39966402312091e-05,
      "loss": 0.9519,
      "step": 4840
    },
    {
      "epoch": 0.7377323986589455,
      "grad_norm": 0.78515625,
      "learning_rate": 3.395956928175765e-05,
      "loss": 1.0564,
      "step": 4841
    },
    {
      "epoch": 0.7378847912221883,
      "grad_norm": 0.859375,
      "learning_rate": 3.392251442070481e-05,
      "loss": 0.8486,
      "step": 4842
    },
    {
      "epoch": 0.7380371837854313,
      "grad_norm": 1.09375,
      "learning_rate": 3.3885475657077634e-05,
      "loss": 1.0101,
      "step": 4843
    },
    {
      "epoch": 0.7381895763486742,
      "grad_norm": 1.171875,
      "learning_rate": 3.384845299989936e-05,
      "loss": 1.002,
      "step": 4844
    },
    {
      "epoch": 0.7383419689119171,
      "grad_norm": 0.94140625,
      "learning_rate": 3.3811446458189224e-05,
      "loss": 1.1223,
      "step": 4845
    },
    {
      "epoch": 0.73849436147516,
      "grad_norm": 1.5625,
      "learning_rate": 3.377445604096253e-05,
      "loss": 1.0715,
      "step": 4846
    },
    {
      "epoch": 0.738646754038403,
      "grad_norm": 0.94921875,
      "learning_rate": 3.3737481757230685e-05,
      "loss": 1.0709,
      "step": 4847
    },
    {
      "epoch": 0.7387991466016458,
      "grad_norm": 1.375,
      "learning_rate": 3.370052361600112e-05,
      "loss": 0.8891,
      "step": 4848
    },
    {
      "epoch": 0.7389515391648888,
      "grad_norm": 0.71875,
      "learning_rate": 3.3663581626277454e-05,
      "loss": 0.9372,
      "step": 4849
    },
    {
      "epoch": 0.7391039317281317,
      "grad_norm": 0.95703125,
      "learning_rate": 3.362665579705923e-05,
      "loss": 0.9467,
      "step": 4850
    },
    {
      "epoch": 0.7392563242913746,
      "grad_norm": 0.8828125,
      "learning_rate": 3.358974613734212e-05,
      "loss": 0.9794,
      "step": 4851
    },
    {
      "epoch": 0.7394087168546175,
      "grad_norm": 0.9609375,
      "learning_rate": 3.355285265611784e-05,
      "loss": 1.0233,
      "step": 4852
    },
    {
      "epoch": 0.7395611094178605,
      "grad_norm": 0.9453125,
      "learning_rate": 3.351597536237419e-05,
      "loss": 0.9262,
      "step": 4853
    },
    {
      "epoch": 0.7397135019811033,
      "grad_norm": 0.95703125,
      "learning_rate": 3.347911426509496e-05,
      "loss": 0.9321,
      "step": 4854
    },
    {
      "epoch": 0.7398658945443463,
      "grad_norm": 1.359375,
      "learning_rate": 3.344226937326012e-05,
      "loss": 1.2421,
      "step": 4855
    },
    {
      "epoch": 0.7400182871075891,
      "grad_norm": 1.1953125,
      "learning_rate": 3.34054406958456e-05,
      "loss": 1.0172,
      "step": 4856
    },
    {
      "epoch": 0.740170679670832,
      "grad_norm": 0.8203125,
      "learning_rate": 3.336862824182332e-05,
      "loss": 0.8269,
      "step": 4857
    },
    {
      "epoch": 0.740323072234075,
      "grad_norm": 0.890625,
      "learning_rate": 3.3331832020161416e-05,
      "loss": 0.9846,
      "step": 4858
    },
    {
      "epoch": 0.7404754647973179,
      "grad_norm": 0.85546875,
      "learning_rate": 3.3295052039823926e-05,
      "loss": 0.9293,
      "step": 4859
    },
    {
      "epoch": 0.7406278573605608,
      "grad_norm": 1.0078125,
      "learning_rate": 3.325828830977096e-05,
      "loss": 0.9434,
      "step": 4860
    },
    {
      "epoch": 0.7407802499238038,
      "grad_norm": 1.0859375,
      "learning_rate": 3.322154083895876e-05,
      "loss": 0.9917,
      "step": 4861
    },
    {
      "epoch": 0.7409326424870466,
      "grad_norm": 0.984375,
      "learning_rate": 3.318480963633951e-05,
      "loss": 0.9284,
      "step": 4862
    },
    {
      "epoch": 0.7410850350502896,
      "grad_norm": 0.69140625,
      "learning_rate": 3.314809471086144e-05,
      "loss": 0.8873,
      "step": 4863
    },
    {
      "epoch": 0.7412374276135325,
      "grad_norm": 0.890625,
      "learning_rate": 3.311139607146886e-05,
      "loss": 1.0545,
      "step": 4864
    },
    {
      "epoch": 0.7413898201767753,
      "grad_norm": 0.90234375,
      "learning_rate": 3.3074713727102026e-05,
      "loss": 1.0239,
      "step": 4865
    },
    {
      "epoch": 0.7415422127400183,
      "grad_norm": 1.171875,
      "learning_rate": 3.303804768669737e-05,
      "loss": 0.9874,
      "step": 4866
    },
    {
      "epoch": 0.7416946053032613,
      "grad_norm": 0.82421875,
      "learning_rate": 3.300139795918723e-05,
      "loss": 0.8666,
      "step": 4867
    },
    {
      "epoch": 0.7418469978665041,
      "grad_norm": 1.2734375,
      "learning_rate": 3.29647645535e-05,
      "loss": 1.2677,
      "step": 4868
    },
    {
      "epoch": 0.741999390429747,
      "grad_norm": 0.8359375,
      "learning_rate": 3.2928147478560126e-05,
      "loss": 1.035,
      "step": 4869
    },
    {
      "epoch": 0.7421517829929899,
      "grad_norm": 0.8515625,
      "learning_rate": 3.2891546743288014e-05,
      "loss": 0.9198,
      "step": 4870
    },
    {
      "epoch": 0.7423041755562328,
      "grad_norm": 0.82421875,
      "learning_rate": 3.2854962356600124e-05,
      "loss": 0.994,
      "step": 4871
    },
    {
      "epoch": 0.7424565681194758,
      "grad_norm": 1.03125,
      "learning_rate": 3.2818394327409006e-05,
      "loss": 0.9571,
      "step": 4872
    },
    {
      "epoch": 0.7426089606827186,
      "grad_norm": 0.9453125,
      "learning_rate": 3.2781842664623155e-05,
      "loss": 0.8702,
      "step": 4873
    },
    {
      "epoch": 0.7427613532459616,
      "grad_norm": 0.91015625,
      "learning_rate": 3.2745307377146975e-05,
      "loss": 0.9794,
      "step": 4874
    },
    {
      "epoch": 0.7429137458092046,
      "grad_norm": 0.76171875,
      "learning_rate": 3.270878847388109e-05,
      "loss": 0.9422,
      "step": 4875
    },
    {
      "epoch": 0.7430661383724474,
      "grad_norm": 1.1484375,
      "learning_rate": 3.2672285963721996e-05,
      "loss": 1.1916,
      "step": 4876
    },
    {
      "epoch": 0.7432185309356903,
      "grad_norm": 0.80859375,
      "learning_rate": 3.26357998555622e-05,
      "loss": 0.8723,
      "step": 4877
    },
    {
      "epoch": 0.7433709234989333,
      "grad_norm": 0.9765625,
      "learning_rate": 3.259933015829031e-05,
      "loss": 0.765,
      "step": 4878
    },
    {
      "epoch": 0.7435233160621761,
      "grad_norm": 0.8359375,
      "learning_rate": 3.256287688079083e-05,
      "loss": 0.9096,
      "step": 4879
    },
    {
      "epoch": 0.7436757086254191,
      "grad_norm": 1.3046875,
      "learning_rate": 3.2526440031944306e-05,
      "loss": 1.0013,
      "step": 4880
    },
    {
      "epoch": 0.7438281011886619,
      "grad_norm": 0.98046875,
      "learning_rate": 3.2490019620627276e-05,
      "loss": 1.1395,
      "step": 4881
    },
    {
      "epoch": 0.7439804937519049,
      "grad_norm": 0.99609375,
      "learning_rate": 3.2453615655712245e-05,
      "loss": 0.8104,
      "step": 4882
    },
    {
      "epoch": 0.7441328863151478,
      "grad_norm": 1.0703125,
      "learning_rate": 3.241722814606781e-05,
      "loss": 0.9716,
      "step": 4883
    },
    {
      "epoch": 0.7442852788783907,
      "grad_norm": 0.82421875,
      "learning_rate": 3.2380857100558494e-05,
      "loss": 0.8168,
      "step": 4884
    },
    {
      "epoch": 0.7444376714416336,
      "grad_norm": 0.8828125,
      "learning_rate": 3.23445025280447e-05,
      "loss": 0.9236,
      "step": 4885
    },
    {
      "epoch": 0.7445900640048766,
      "grad_norm": 0.78125,
      "learning_rate": 3.230816443738302e-05,
      "loss": 0.9611,
      "step": 4886
    },
    {
      "epoch": 0.7447424565681194,
      "grad_norm": 0.85546875,
      "learning_rate": 3.227184283742591e-05,
      "loss": 1.0378,
      "step": 4887
    },
    {
      "epoch": 0.7448948491313624,
      "grad_norm": 0.828125,
      "learning_rate": 3.223553773702181e-05,
      "loss": 1.011,
      "step": 4888
    },
    {
      "epoch": 0.7450472416946053,
      "grad_norm": 1.1640625,
      "learning_rate": 3.219924914501521e-05,
      "loss": 1.087,
      "step": 4889
    },
    {
      "epoch": 0.7451996342578482,
      "grad_norm": 1.078125,
      "learning_rate": 3.216297707024655e-05,
      "loss": 1.1362,
      "step": 4890
    },
    {
      "epoch": 0.7453520268210911,
      "grad_norm": 0.86328125,
      "learning_rate": 3.212672152155212e-05,
      "loss": 1.0017,
      "step": 4891
    },
    {
      "epoch": 0.7455044193843341,
      "grad_norm": 1.0078125,
      "learning_rate": 3.2090482507764395e-05,
      "loss": 0.997,
      "step": 4892
    },
    {
      "epoch": 0.7456568119475769,
      "grad_norm": 0.90234375,
      "learning_rate": 3.205426003771169e-05,
      "loss": 0.9873,
      "step": 4893
    },
    {
      "epoch": 0.7458092045108199,
      "grad_norm": 0.86328125,
      "learning_rate": 3.201805412021829e-05,
      "loss": 0.8538,
      "step": 4894
    },
    {
      "epoch": 0.7459615970740627,
      "grad_norm": 0.62109375,
      "learning_rate": 3.198186476410453e-05,
      "loss": 0.9236,
      "step": 4895
    },
    {
      "epoch": 0.7461139896373057,
      "grad_norm": 0.859375,
      "learning_rate": 3.194569197818663e-05,
      "loss": 0.9444,
      "step": 4896
    },
    {
      "epoch": 0.7462663822005486,
      "grad_norm": 1.1015625,
      "learning_rate": 3.190953577127681e-05,
      "loss": 0.9423,
      "step": 4897
    },
    {
      "epoch": 0.7464187747637915,
      "grad_norm": 1.1796875,
      "learning_rate": 3.187339615218323e-05,
      "loss": 1.0166,
      "step": 4898
    },
    {
      "epoch": 0.7465711673270344,
      "grad_norm": 0.8046875,
      "learning_rate": 3.183727312971e-05,
      "loss": 0.8722,
      "step": 4899
    },
    {
      "epoch": 0.7467235598902774,
      "grad_norm": 0.7890625,
      "learning_rate": 3.180116671265726e-05,
      "loss": 0.9405,
      "step": 4900
    },
    {
      "epoch": 0.7468759524535202,
      "grad_norm": 0.88671875,
      "learning_rate": 3.176507690982106e-05,
      "loss": 1.0042,
      "step": 4901
    },
    {
      "epoch": 0.7470283450167632,
      "grad_norm": 0.98046875,
      "learning_rate": 3.172900372999329e-05,
      "loss": 0.9039,
      "step": 4902
    },
    {
      "epoch": 0.7471807375800061,
      "grad_norm": 1.1328125,
      "learning_rate": 3.1692947181961994e-05,
      "loss": 0.9215,
      "step": 4903
    },
    {
      "epoch": 0.747333130143249,
      "grad_norm": 1.0703125,
      "learning_rate": 3.165690727451105e-05,
      "loss": 0.9205,
      "step": 4904
    },
    {
      "epoch": 0.7474855227064919,
      "grad_norm": 0.8984375,
      "learning_rate": 3.162088401642025e-05,
      "loss": 1.0675,
      "step": 4905
    },
    {
      "epoch": 0.7476379152697349,
      "grad_norm": 1.0078125,
      "learning_rate": 3.158487741646545e-05,
      "loss": 1.0028,
      "step": 4906
    },
    {
      "epoch": 0.7477903078329777,
      "grad_norm": 0.9609375,
      "learning_rate": 3.154888748341836e-05,
      "loss": 0.991,
      "step": 4907
    },
    {
      "epoch": 0.7479427003962207,
      "grad_norm": 0.78125,
      "learning_rate": 3.151291422604658e-05,
      "loss": 1.0813,
      "step": 4908
    },
    {
      "epoch": 0.7480950929594635,
      "grad_norm": 1.015625,
      "learning_rate": 3.147695765311377e-05,
      "loss": 1.1543,
      "step": 4909
    },
    {
      "epoch": 0.7482474855227065,
      "grad_norm": 0.96875,
      "learning_rate": 3.144101777337942e-05,
      "loss": 0.7955,
      "step": 4910
    },
    {
      "epoch": 0.7483998780859494,
      "grad_norm": 1.234375,
      "learning_rate": 3.140509459559908e-05,
      "loss": 0.8962,
      "step": 4911
    },
    {
      "epoch": 0.7485522706491923,
      "grad_norm": 0.88671875,
      "learning_rate": 3.136918812852413e-05,
      "loss": 0.9092,
      "step": 4912
    },
    {
      "epoch": 0.7487046632124352,
      "grad_norm": 0.921875,
      "learning_rate": 3.1333298380901834e-05,
      "loss": 1.0314,
      "step": 4913
    },
    {
      "epoch": 0.7488570557756782,
      "grad_norm": 0.81640625,
      "learning_rate": 3.129742536147551e-05,
      "loss": 0.8464,
      "step": 4914
    },
    {
      "epoch": 0.749009448338921,
      "grad_norm": 0.7890625,
      "learning_rate": 3.1261569078984346e-05,
      "loss": 0.9195,
      "step": 4915
    },
    {
      "epoch": 0.749161840902164,
      "grad_norm": 0.8046875,
      "learning_rate": 3.122572954216338e-05,
      "loss": 0.9739,
      "step": 4916
    },
    {
      "epoch": 0.7493142334654069,
      "grad_norm": 1.3671875,
      "learning_rate": 3.1189906759743736e-05,
      "loss": 1.1513,
      "step": 4917
    },
    {
      "epoch": 0.7494666260286498,
      "grad_norm": 0.9921875,
      "learning_rate": 3.1154100740452344e-05,
      "loss": 1.0455,
      "step": 4918
    },
    {
      "epoch": 0.7496190185918927,
      "grad_norm": 1.1015625,
      "learning_rate": 3.111831149301196e-05,
      "loss": 0.9805,
      "step": 4919
    },
    {
      "epoch": 0.7497714111551357,
      "grad_norm": 0.87890625,
      "learning_rate": 3.1082539026141476e-05,
      "loss": 0.7946,
      "step": 4920
    },
    {
      "epoch": 0.7499238037183785,
      "grad_norm": 0.7578125,
      "learning_rate": 3.104678334855553e-05,
      "loss": 0.8935,
      "step": 4921
    },
    {
      "epoch": 0.7500761962816215,
      "grad_norm": 1.0078125,
      "learning_rate": 3.10110444689647e-05,
      "loss": 0.8566,
      "step": 4922
    },
    {
      "epoch": 0.7502285888448643,
      "grad_norm": 0.9140625,
      "learning_rate": 3.0975322396075535e-05,
      "loss": 0.928,
      "step": 4923
    },
    {
      "epoch": 0.7503809814081073,
      "grad_norm": 0.9140625,
      "learning_rate": 3.093961713859047e-05,
      "loss": 0.8861,
      "step": 4924
    },
    {
      "epoch": 0.7505333739713502,
      "grad_norm": 1.09375,
      "learning_rate": 3.0903928705207706e-05,
      "loss": 1.0674,
      "step": 4925
    },
    {
      "epoch": 0.7506857665345931,
      "grad_norm": 0.96484375,
      "learning_rate": 3.0868257104621565e-05,
      "loss": 0.8418,
      "step": 4926
    },
    {
      "epoch": 0.750838159097836,
      "grad_norm": 0.78125,
      "learning_rate": 3.08326023455221e-05,
      "loss": 0.904,
      "step": 4927
    },
    {
      "epoch": 0.750990551661079,
      "grad_norm": 1.1015625,
      "learning_rate": 3.079696443659538e-05,
      "loss": 0.8269,
      "step": 4928
    },
    {
      "epoch": 0.7511429442243218,
      "grad_norm": 1.0703125,
      "learning_rate": 3.07613433865233e-05,
      "loss": 1.1142,
      "step": 4929
    },
    {
      "epoch": 0.7512953367875648,
      "grad_norm": 0.95703125,
      "learning_rate": 3.072573920398358e-05,
      "loss": 0.8424,
      "step": 4930
    },
    {
      "epoch": 0.7514477293508077,
      "grad_norm": 1.0078125,
      "learning_rate": 3.069015189765001e-05,
      "loss": 1.0043,
      "step": 4931
    },
    {
      "epoch": 0.7516001219140506,
      "grad_norm": 1.203125,
      "learning_rate": 3.0654581476192136e-05,
      "loss": 1.0522,
      "step": 4932
    },
    {
      "epoch": 0.7517525144772935,
      "grad_norm": 1.0546875,
      "learning_rate": 3.061902794827538e-05,
      "loss": 1.0019,
      "step": 4933
    },
    {
      "epoch": 0.7519049070405365,
      "grad_norm": 0.8515625,
      "learning_rate": 3.058349132256116e-05,
      "loss": 1.1072,
      "step": 4934
    },
    {
      "epoch": 0.7520572996037793,
      "grad_norm": 1.2734375,
      "learning_rate": 3.0547971607706716e-05,
      "loss": 0.9765,
      "step": 4935
    },
    {
      "epoch": 0.7522096921670223,
      "grad_norm": 1.0703125,
      "learning_rate": 3.051246881236507e-05,
      "loss": 1.0949,
      "step": 4936
    },
    {
      "epoch": 0.7523620847302651,
      "grad_norm": 0.88671875,
      "learning_rate": 3.0476982945185296e-05,
      "loss": 1.0202,
      "step": 4937
    },
    {
      "epoch": 0.7525144772935081,
      "grad_norm": 0.84375,
      "learning_rate": 3.0441514014812233e-05,
      "loss": 0.8548,
      "step": 4938
    },
    {
      "epoch": 0.752666869856751,
      "grad_norm": 0.9140625,
      "learning_rate": 3.0406062029886605e-05,
      "loss": 0.9647,
      "step": 4939
    },
    {
      "epoch": 0.7528192624199939,
      "grad_norm": 0.96875,
      "learning_rate": 3.0370626999045093e-05,
      "loss": 1.0265,
      "step": 4940
    },
    {
      "epoch": 0.7529716549832368,
      "grad_norm": 1.34375,
      "learning_rate": 3.033520893092011e-05,
      "loss": 0.816,
      "step": 4941
    },
    {
      "epoch": 0.7531240475464798,
      "grad_norm": 0.83984375,
      "learning_rate": 3.0299807834140005e-05,
      "loss": 0.8656,
      "step": 4942
    },
    {
      "epoch": 0.7532764401097226,
      "grad_norm": 0.92578125,
      "learning_rate": 3.026442371732904e-05,
      "loss": 0.9518,
      "step": 4943
    },
    {
      "epoch": 0.7534288326729656,
      "grad_norm": 0.7890625,
      "learning_rate": 3.0229056589107242e-05,
      "loss": 0.9687,
      "step": 4944
    },
    {
      "epoch": 0.7535812252362085,
      "grad_norm": 0.8515625,
      "learning_rate": 3.019370645809061e-05,
      "loss": 0.9355,
      "step": 4945
    },
    {
      "epoch": 0.7537336177994514,
      "grad_norm": 1.140625,
      "learning_rate": 3.0158373332890954e-05,
      "loss": 1.1312,
      "step": 4946
    },
    {
      "epoch": 0.7538860103626943,
      "grad_norm": 1.1015625,
      "learning_rate": 3.0123057222115836e-05,
      "loss": 1.0962,
      "step": 4947
    },
    {
      "epoch": 0.7540384029259373,
      "grad_norm": 0.72265625,
      "learning_rate": 3.008775813436886e-05,
      "loss": 0.8992,
      "step": 4948
    },
    {
      "epoch": 0.7541907954891801,
      "grad_norm": 0.91796875,
      "learning_rate": 3.005247607824936e-05,
      "loss": 0.9001,
      "step": 4949
    },
    {
      "epoch": 0.7543431880524231,
      "grad_norm": 0.88671875,
      "learning_rate": 3.0017211062352502e-05,
      "loss": 0.8396,
      "step": 4950
    },
    {
      "epoch": 0.7544955806156659,
      "grad_norm": 0.79296875,
      "learning_rate": 2.998196309526945e-05,
      "loss": 1.0568,
      "step": 4951
    },
    {
      "epoch": 0.7546479731789089,
      "grad_norm": 1.046875,
      "learning_rate": 2.994673218558709e-05,
      "loss": 1.0932,
      "step": 4952
    },
    {
      "epoch": 0.7548003657421518,
      "grad_norm": 0.91796875,
      "learning_rate": 2.991151834188809e-05,
      "loss": 1.0515,
      "step": 4953
    },
    {
      "epoch": 0.7549527583053947,
      "grad_norm": 1.0078125,
      "learning_rate": 2.9876321572751144e-05,
      "loss": 1.0198,
      "step": 4954
    },
    {
      "epoch": 0.7551051508686376,
      "grad_norm": 0.76953125,
      "learning_rate": 2.9841141886750668e-05,
      "loss": 0.981,
      "step": 4955
    },
    {
      "epoch": 0.7552575434318806,
      "grad_norm": 0.75,
      "learning_rate": 2.9805979292456887e-05,
      "loss": 0.8377,
      "step": 4956
    },
    {
      "epoch": 0.7554099359951234,
      "grad_norm": 1.0625,
      "learning_rate": 2.9770833798436028e-05,
      "loss": 0.9005,
      "step": 4957
    },
    {
      "epoch": 0.7555623285583664,
      "grad_norm": 0.79296875,
      "learning_rate": 2.9735705413249937e-05,
      "loss": 0.9757,
      "step": 4958
    },
    {
      "epoch": 0.7557147211216093,
      "grad_norm": 0.89453125,
      "learning_rate": 2.9700594145456396e-05,
      "loss": 1.0022,
      "step": 4959
    },
    {
      "epoch": 0.7558671136848522,
      "grad_norm": 1.046875,
      "learning_rate": 2.966550000360907e-05,
      "loss": 1.0877,
      "step": 4960
    },
    {
      "epoch": 0.7560195062480951,
      "grad_norm": 0.75,
      "learning_rate": 2.9630422996257344e-05,
      "loss": 0.8486,
      "step": 4961
    },
    {
      "epoch": 0.7561718988113381,
      "grad_norm": 0.9453125,
      "learning_rate": 2.959536313194655e-05,
      "loss": 0.8774,
      "step": 4962
    },
    {
      "epoch": 0.7563242913745809,
      "grad_norm": 1.0546875,
      "learning_rate": 2.9560320419217758e-05,
      "loss": 0.9633,
      "step": 4963
    },
    {
      "epoch": 0.7564766839378239,
      "grad_norm": 0.9609375,
      "learning_rate": 2.9525294866607787e-05,
      "loss": 1.0416,
      "step": 4964
    },
    {
      "epoch": 0.7566290765010667,
      "grad_norm": 1.046875,
      "learning_rate": 2.9490286482649466e-05,
      "loss": 0.9966,
      "step": 4965
    },
    {
      "epoch": 0.7567814690643097,
      "grad_norm": 0.83984375,
      "learning_rate": 2.9455295275871298e-05,
      "loss": 1.055,
      "step": 4966
    },
    {
      "epoch": 0.7569338616275526,
      "grad_norm": 1.0859375,
      "learning_rate": 2.9420321254797635e-05,
      "loss": 0.8891,
      "step": 4967
    },
    {
      "epoch": 0.7570862541907954,
      "grad_norm": 1.265625,
      "learning_rate": 2.9385364427948716e-05,
      "loss": 1.023,
      "step": 4968
    },
    {
      "epoch": 0.7572386467540384,
      "grad_norm": 1.1875,
      "learning_rate": 2.935042480384046e-05,
      "loss": 0.8427,
      "step": 4969
    },
    {
      "epoch": 0.7573910393172814,
      "grad_norm": 1.09375,
      "learning_rate": 2.9315502390984638e-05,
      "loss": 0.9583,
      "step": 4970
    },
    {
      "epoch": 0.7575434318805242,
      "grad_norm": 1.0234375,
      "learning_rate": 2.9280597197888937e-05,
      "loss": 1.1303,
      "step": 4971
    },
    {
      "epoch": 0.7576958244437672,
      "grad_norm": 1.1484375,
      "learning_rate": 2.9245709233056716e-05,
      "loss": 1.2296,
      "step": 4972
    },
    {
      "epoch": 0.7578482170070101,
      "grad_norm": 0.859375,
      "learning_rate": 2.921083850498717e-05,
      "loss": 0.7114,
      "step": 4973
    },
    {
      "epoch": 0.758000609570253,
      "grad_norm": 0.7578125,
      "learning_rate": 2.917598502217538e-05,
      "loss": 0.8751,
      "step": 4974
    },
    {
      "epoch": 0.7581530021334959,
      "grad_norm": 0.8203125,
      "learning_rate": 2.914114879311205e-05,
      "loss": 1.0313,
      "step": 4975
    },
    {
      "epoch": 0.7583053946967387,
      "grad_norm": 0.921875,
      "learning_rate": 2.9106329826283875e-05,
      "loss": 0.9587,
      "step": 4976
    },
    {
      "epoch": 0.7584577872599817,
      "grad_norm": 0.97265625,
      "learning_rate": 2.907152813017322e-05,
      "loss": 0.9297,
      "step": 4977
    },
    {
      "epoch": 0.7586101798232247,
      "grad_norm": 1.046875,
      "learning_rate": 2.9036743713258253e-05,
      "loss": 1.0747,
      "step": 4978
    },
    {
      "epoch": 0.7587625723864675,
      "grad_norm": 0.80078125,
      "learning_rate": 2.9001976584013024e-05,
      "loss": 0.8577,
      "step": 4979
    },
    {
      "epoch": 0.7589149649497104,
      "grad_norm": 1.1015625,
      "learning_rate": 2.8967226750907295e-05,
      "loss": 1.2111,
      "step": 4980
    },
    {
      "epoch": 0.7590673575129534,
      "grad_norm": 1.0390625,
      "learning_rate": 2.8932494222406537e-05,
      "loss": 0.8531,
      "step": 4981
    },
    {
      "epoch": 0.7592197500761962,
      "grad_norm": 1.109375,
      "learning_rate": 2.889777900697218e-05,
      "loss": 1.079,
      "step": 4982
    },
    {
      "epoch": 0.7593721426394392,
      "grad_norm": 0.953125,
      "learning_rate": 2.886308111306133e-05,
      "loss": 0.9966,
      "step": 4983
    },
    {
      "epoch": 0.7595245352026821,
      "grad_norm": 1.0546875,
      "learning_rate": 2.8828400549126865e-05,
      "loss": 1.0628,
      "step": 4984
    },
    {
      "epoch": 0.759676927765925,
      "grad_norm": 0.9375,
      "learning_rate": 2.8793737323617553e-05,
      "loss": 1.0038,
      "step": 4985
    },
    {
      "epoch": 0.759829320329168,
      "grad_norm": 0.828125,
      "learning_rate": 2.875909144497776e-05,
      "loss": 0.9594,
      "step": 4986
    },
    {
      "epoch": 0.7599817128924109,
      "grad_norm": 0.71484375,
      "learning_rate": 2.872446292164771e-05,
      "loss": 0.9232,
      "step": 4987
    },
    {
      "epoch": 0.7601341054556537,
      "grad_norm": 0.96875,
      "learning_rate": 2.8689851762063502e-05,
      "loss": 0.9709,
      "step": 4988
    },
    {
      "epoch": 0.7602864980188967,
      "grad_norm": 0.94921875,
      "learning_rate": 2.8655257974656857e-05,
      "loss": 0.8454,
      "step": 4989
    },
    {
      "epoch": 0.7604388905821395,
      "grad_norm": 0.87109375,
      "learning_rate": 2.862068156785529e-05,
      "loss": 0.9539,
      "step": 4990
    },
    {
      "epoch": 0.7605912831453825,
      "grad_norm": 0.7734375,
      "learning_rate": 2.8586122550082218e-05,
      "loss": 1.0651,
      "step": 4991
    },
    {
      "epoch": 0.7607436757086254,
      "grad_norm": 1.125,
      "learning_rate": 2.855158092975657e-05,
      "loss": 1.0896,
      "step": 4992
    },
    {
      "epoch": 0.7608960682718683,
      "grad_norm": 0.95703125,
      "learning_rate": 2.8517056715293288e-05,
      "loss": 1.1311,
      "step": 4993
    },
    {
      "epoch": 0.7610484608351112,
      "grad_norm": 0.63671875,
      "learning_rate": 2.8482549915102942e-05,
      "loss": 0.7478,
      "step": 4994
    },
    {
      "epoch": 0.7612008533983542,
      "grad_norm": 1.015625,
      "learning_rate": 2.8448060537591835e-05,
      "loss": 0.8779,
      "step": 4995
    },
    {
      "epoch": 0.761353245961597,
      "grad_norm": 1.0859375,
      "learning_rate": 2.841358859116219e-05,
      "loss": 1.0821,
      "step": 4996
    },
    {
      "epoch": 0.76150563852484,
      "grad_norm": 1.078125,
      "learning_rate": 2.8379134084211755e-05,
      "loss": 1.0859,
      "step": 4997
    },
    {
      "epoch": 0.7616580310880829,
      "grad_norm": 0.87109375,
      "learning_rate": 2.834469702513415e-05,
      "loss": 0.9764,
      "step": 4998
    },
    {
      "epoch": 0.7618104236513258,
      "grad_norm": 0.82421875,
      "learning_rate": 2.831027742231881e-05,
      "loss": 0.9608,
      "step": 4999
    },
    {
      "epoch": 0.7619628162145687,
      "grad_norm": 0.6875,
      "learning_rate": 2.827587528415081e-05,
      "loss": 0.9631,
      "step": 5000
    },
    {
      "epoch": 0.7621152087778117,
      "grad_norm": 1.0546875,
      "learning_rate": 2.8241490619010956e-05,
      "loss": 0.9942,
      "step": 5001
    },
    {
      "epoch": 0.7622676013410545,
      "grad_norm": 1.1171875,
      "learning_rate": 2.8207123435275974e-05,
      "loss": 0.986,
      "step": 5002
    },
    {
      "epoch": 0.7624199939042975,
      "grad_norm": 0.80078125,
      "learning_rate": 2.817277374131808e-05,
      "loss": 0.9891,
      "step": 5003
    },
    {
      "epoch": 0.7625723864675403,
      "grad_norm": 0.9765625,
      "learning_rate": 2.8138441545505366e-05,
      "loss": 0.9674,
      "step": 5004
    },
    {
      "epoch": 0.7627247790307833,
      "grad_norm": 1.0078125,
      "learning_rate": 2.810412685620172e-05,
      "loss": 0.9442,
      "step": 5005
    },
    {
      "epoch": 0.7628771715940262,
      "grad_norm": 0.91796875,
      "learning_rate": 2.8069829681766648e-05,
      "loss": 0.9341,
      "step": 5006
    },
    {
      "epoch": 0.7630295641572691,
      "grad_norm": 0.85546875,
      "learning_rate": 2.8035550030555402e-05,
      "loss": 1.033,
      "step": 5007
    },
    {
      "epoch": 0.763181956720512,
      "grad_norm": 0.85546875,
      "learning_rate": 2.8001287910919106e-05,
      "loss": 0.9011,
      "step": 5008
    },
    {
      "epoch": 0.763334349283755,
      "grad_norm": 0.85546875,
      "learning_rate": 2.796704333120437e-05,
      "loss": 1.0794,
      "step": 5009
    },
    {
      "epoch": 0.7634867418469978,
      "grad_norm": 0.95703125,
      "learning_rate": 2.793281629975376e-05,
      "loss": 1.0177,
      "step": 5010
    },
    {
      "epoch": 0.7636391344102408,
      "grad_norm": 0.78515625,
      "learning_rate": 2.789860682490545e-05,
      "loss": 0.9117,
      "step": 5011
    },
    {
      "epoch": 0.7637915269734837,
      "grad_norm": 0.78125,
      "learning_rate": 2.786441491499332e-05,
      "loss": 0.8863,
      "step": 5012
    },
    {
      "epoch": 0.7639439195367266,
      "grad_norm": 0.96875,
      "learning_rate": 2.783024057834711e-05,
      "loss": 0.7992,
      "step": 5013
    },
    {
      "epoch": 0.7640963120999695,
      "grad_norm": 1.234375,
      "learning_rate": 2.7796083823292084e-05,
      "loss": 1.1892,
      "step": 5014
    },
    {
      "epoch": 0.7642487046632125,
      "grad_norm": 0.9296875,
      "learning_rate": 2.776194465814932e-05,
      "loss": 0.9516,
      "step": 5015
    },
    {
      "epoch": 0.7644010972264553,
      "grad_norm": 1.6640625,
      "learning_rate": 2.7727823091235662e-05,
      "loss": 0.9727,
      "step": 5016
    },
    {
      "epoch": 0.7645534897896983,
      "grad_norm": 1.1796875,
      "learning_rate": 2.7693719130863606e-05,
      "loss": 1.0949,
      "step": 5017
    },
    {
      "epoch": 0.7647058823529411,
      "grad_norm": 1.1484375,
      "learning_rate": 2.7659632785341304e-05,
      "loss": 1.1829,
      "step": 5018
    },
    {
      "epoch": 0.7648582749161841,
      "grad_norm": 1.1171875,
      "learning_rate": 2.7625564062972797e-05,
      "loss": 0.9409,
      "step": 5019
    },
    {
      "epoch": 0.765010667479427,
      "grad_norm": 0.84765625,
      "learning_rate": 2.759151297205762e-05,
      "loss": 1.0116,
      "step": 5020
    },
    {
      "epoch": 0.7651630600426699,
      "grad_norm": 0.76171875,
      "learning_rate": 2.7557479520891104e-05,
      "loss": 0.8327,
      "step": 5021
    },
    {
      "epoch": 0.7653154526059128,
      "grad_norm": 1.0546875,
      "learning_rate": 2.7523463717764353e-05,
      "loss": 1.0704,
      "step": 5022
    },
    {
      "epoch": 0.7654678451691558,
      "grad_norm": 0.71875,
      "learning_rate": 2.748946557096407e-05,
      "loss": 0.7799,
      "step": 5023
    },
    {
      "epoch": 0.7656202377323986,
      "grad_norm": 1.0078125,
      "learning_rate": 2.7455485088772692e-05,
      "loss": 0.8416,
      "step": 5024
    },
    {
      "epoch": 0.7657726302956416,
      "grad_norm": 1.0,
      "learning_rate": 2.742152227946836e-05,
      "loss": 1.058,
      "step": 5025
    },
    {
      "epoch": 0.7659250228588845,
      "grad_norm": 1.1484375,
      "learning_rate": 2.738757715132486e-05,
      "loss": 0.9618,
      "step": 5026
    },
    {
      "epoch": 0.7660774154221274,
      "grad_norm": 0.69140625,
      "learning_rate": 2.7353649712611786e-05,
      "loss": 0.8513,
      "step": 5027
    },
    {
      "epoch": 0.7662298079853703,
      "grad_norm": 0.80078125,
      "learning_rate": 2.7319739971594314e-05,
      "loss": 0.8995,
      "step": 5028
    },
    {
      "epoch": 0.7663822005486133,
      "grad_norm": 0.80859375,
      "learning_rate": 2.7285847936533326e-05,
      "loss": 1.0247,
      "step": 5029
    },
    {
      "epoch": 0.7665345931118561,
      "grad_norm": 0.875,
      "learning_rate": 2.7251973615685476e-05,
      "loss": 0.887,
      "step": 5030
    },
    {
      "epoch": 0.7666869856750991,
      "grad_norm": 0.80859375,
      "learning_rate": 2.7218117017302968e-05,
      "loss": 0.911,
      "step": 5031
    },
    {
      "epoch": 0.7668393782383419,
      "grad_norm": 0.93359375,
      "learning_rate": 2.7184278149633737e-05,
      "loss": 0.9705,
      "step": 5032
    },
    {
      "epoch": 0.7669917708015849,
      "grad_norm": 0.7265625,
      "learning_rate": 2.7150457020921493e-05,
      "loss": 0.7977,
      "step": 5033
    },
    {
      "epoch": 0.7671441633648278,
      "grad_norm": 0.8984375,
      "learning_rate": 2.711665363940552e-05,
      "loss": 0.8445,
      "step": 5034
    },
    {
      "epoch": 0.7672965559280707,
      "grad_norm": 1.1796875,
      "learning_rate": 2.7082868013320762e-05,
      "loss": 1.182,
      "step": 5035
    },
    {
      "epoch": 0.7674489484913136,
      "grad_norm": 1.0859375,
      "learning_rate": 2.704910015089799e-05,
      "loss": 0.8797,
      "step": 5036
    },
    {
      "epoch": 0.7676013410545566,
      "grad_norm": 0.9375,
      "learning_rate": 2.7015350060363443e-05,
      "loss": 0.921,
      "step": 5037
    },
    {
      "epoch": 0.7677537336177994,
      "grad_norm": 0.57421875,
      "learning_rate": 2.6981617749939125e-05,
      "loss": 0.7819,
      "step": 5038
    },
    {
      "epoch": 0.7679061261810424,
      "grad_norm": 0.9375,
      "learning_rate": 2.6947903227842774e-05,
      "loss": 1.0812,
      "step": 5039
    },
    {
      "epoch": 0.7680585187442853,
      "grad_norm": 0.8671875,
      "learning_rate": 2.6914206502287685e-05,
      "loss": 0.9907,
      "step": 5040
    },
    {
      "epoch": 0.7682109113075282,
      "grad_norm": 0.73046875,
      "learning_rate": 2.688052758148294e-05,
      "loss": 0.9006,
      "step": 5041
    },
    {
      "epoch": 0.7683633038707711,
      "grad_norm": 0.9609375,
      "learning_rate": 2.6846866473633125e-05,
      "loss": 1.0144,
      "step": 5042
    },
    {
      "epoch": 0.7685156964340141,
      "grad_norm": 1.0,
      "learning_rate": 2.6813223186938564e-05,
      "loss": 1.0348,
      "step": 5043
    },
    {
      "epoch": 0.7686680889972569,
      "grad_norm": 0.7421875,
      "learning_rate": 2.6779597729595318e-05,
      "loss": 0.9642,
      "step": 5044
    },
    {
      "epoch": 0.7688204815604999,
      "grad_norm": 1.1328125,
      "learning_rate": 2.6745990109794983e-05,
      "loss": 1.2408,
      "step": 5045
    },
    {
      "epoch": 0.7689728741237427,
      "grad_norm": 1.1484375,
      "learning_rate": 2.671240033572484e-05,
      "loss": 0.8574,
      "step": 5046
    },
    {
      "epoch": 0.7691252666869857,
      "grad_norm": 0.8515625,
      "learning_rate": 2.6678828415567936e-05,
      "loss": 1.0381,
      "step": 5047
    },
    {
      "epoch": 0.7692776592502286,
      "grad_norm": 1.1875,
      "learning_rate": 2.664527435750278e-05,
      "loss": 0.7548,
      "step": 5048
    },
    {
      "epoch": 0.7694300518134715,
      "grad_norm": 0.7421875,
      "learning_rate": 2.6611738169703615e-05,
      "loss": 0.9841,
      "step": 5049
    },
    {
      "epoch": 0.7695824443767144,
      "grad_norm": 1.4140625,
      "learning_rate": 2.6578219860340402e-05,
      "loss": 0.938,
      "step": 5050
    },
    {
      "epoch": 0.7697348369399574,
      "grad_norm": 1.109375,
      "learning_rate": 2.654471943757866e-05,
      "loss": 1.1357,
      "step": 5051
    },
    {
      "epoch": 0.7698872295032002,
      "grad_norm": 0.8828125,
      "learning_rate": 2.6511236909579574e-05,
      "loss": 0.8925,
      "step": 5052
    },
    {
      "epoch": 0.7700396220664432,
      "grad_norm": 0.98828125,
      "learning_rate": 2.6477772284499945e-05,
      "loss": 1.1052,
      "step": 5053
    },
    {
      "epoch": 0.7701920146296861,
      "grad_norm": 0.80078125,
      "learning_rate": 2.6444325570492277e-05,
      "loss": 0.9229,
      "step": 5054
    },
    {
      "epoch": 0.770344407192929,
      "grad_norm": 0.90234375,
      "learning_rate": 2.6410896775704608e-05,
      "loss": 0.9983,
      "step": 5055
    },
    {
      "epoch": 0.7704967997561719,
      "grad_norm": 1.046875,
      "learning_rate": 2.6377485908280753e-05,
      "loss": 1.0623,
      "step": 5056
    },
    {
      "epoch": 0.7706491923194149,
      "grad_norm": 0.84375,
      "learning_rate": 2.6344092976360003e-05,
      "loss": 0.8559,
      "step": 5057
    },
    {
      "epoch": 0.7708015848826577,
      "grad_norm": 0.81640625,
      "learning_rate": 2.631071798807746e-05,
      "loss": 0.9237,
      "step": 5058
    },
    {
      "epoch": 0.7709539774459007,
      "grad_norm": 0.9296875,
      "learning_rate": 2.6277360951563658e-05,
      "loss": 0.7677,
      "step": 5059
    },
    {
      "epoch": 0.7711063700091435,
      "grad_norm": 0.9765625,
      "learning_rate": 2.6244021874944847e-05,
      "loss": 0.8607,
      "step": 5060
    },
    {
      "epoch": 0.7712587625723865,
      "grad_norm": 0.93359375,
      "learning_rate": 2.6210700766342965e-05,
      "loss": 0.8507,
      "step": 5061
    },
    {
      "epoch": 0.7714111551356294,
      "grad_norm": 0.76171875,
      "learning_rate": 2.617739763387549e-05,
      "loss": 0.9342,
      "step": 5062
    },
    {
      "epoch": 0.7715635476988723,
      "grad_norm": 0.8984375,
      "learning_rate": 2.6144112485655504e-05,
      "loss": 1.0725,
      "step": 5063
    },
    {
      "epoch": 0.7717159402621152,
      "grad_norm": 1.2421875,
      "learning_rate": 2.6110845329791845e-05,
      "loss": 0.9556,
      "step": 5064
    },
    {
      "epoch": 0.7718683328253582,
      "grad_norm": 1.046875,
      "learning_rate": 2.6077596174388785e-05,
      "loss": 0.9208,
      "step": 5065
    },
    {
      "epoch": 0.772020725388601,
      "grad_norm": 0.8125,
      "learning_rate": 2.6044365027546293e-05,
      "loss": 0.9772,
      "step": 5066
    },
    {
      "epoch": 0.772173117951844,
      "grad_norm": 0.87890625,
      "learning_rate": 2.6011151897359998e-05,
      "loss": 0.9316,
      "step": 5067
    },
    {
      "epoch": 0.7723255105150869,
      "grad_norm": 0.98828125,
      "learning_rate": 2.5977956791921088e-05,
      "loss": 0.992,
      "step": 5068
    },
    {
      "epoch": 0.7724779030783298,
      "grad_norm": 0.96875,
      "learning_rate": 2.594477971931636e-05,
      "loss": 0.941,
      "step": 5069
    },
    {
      "epoch": 0.7726302956415727,
      "grad_norm": 1.03125,
      "learning_rate": 2.5911620687628224e-05,
      "loss": 1.2294,
      "step": 5070
    },
    {
      "epoch": 0.7727826882048157,
      "grad_norm": 0.99609375,
      "learning_rate": 2.58784797049347e-05,
      "loss": 0.9939,
      "step": 5071
    },
    {
      "epoch": 0.7729350807680585,
      "grad_norm": 0.8515625,
      "learning_rate": 2.584535677930937e-05,
      "loss": 0.8934,
      "step": 5072
    },
    {
      "epoch": 0.7730874733313015,
      "grad_norm": 1.0078125,
      "learning_rate": 2.5812251918821527e-05,
      "loss": 0.9248,
      "step": 5073
    },
    {
      "epoch": 0.7732398658945443,
      "grad_norm": 1.171875,
      "learning_rate": 2.5779165131535922e-05,
      "loss": 0.917,
      "step": 5074
    },
    {
      "epoch": 0.7733922584577873,
      "grad_norm": 1.4453125,
      "learning_rate": 2.5746096425513066e-05,
      "loss": 1.2046,
      "step": 5075
    },
    {
      "epoch": 0.7735446510210302,
      "grad_norm": 1.0078125,
      "learning_rate": 2.571304580880889e-05,
      "loss": 1.0937,
      "step": 5076
    },
    {
      "epoch": 0.773697043584273,
      "grad_norm": 1.15625,
      "learning_rate": 2.568001328947498e-05,
      "loss": 1.1976,
      "step": 5077
    },
    {
      "epoch": 0.773849436147516,
      "grad_norm": 0.83984375,
      "learning_rate": 2.5646998875558613e-05,
      "loss": 1.0265,
      "step": 5078
    },
    {
      "epoch": 0.774001828710759,
      "grad_norm": 0.8125,
      "learning_rate": 2.5614002575102535e-05,
      "loss": 1.0045,
      "step": 5079
    },
    {
      "epoch": 0.7741542212740018,
      "grad_norm": 1.1796875,
      "learning_rate": 2.5581024396145116e-05,
      "loss": 1.1073,
      "step": 5080
    },
    {
      "epoch": 0.7743066138372448,
      "grad_norm": 1.03125,
      "learning_rate": 2.5548064346720315e-05,
      "loss": 0.9603,
      "step": 5081
    },
    {
      "epoch": 0.7744590064004877,
      "grad_norm": 0.890625,
      "learning_rate": 2.5515122434857685e-05,
      "loss": 1.0069,
      "step": 5082
    },
    {
      "epoch": 0.7746113989637305,
      "grad_norm": 1.078125,
      "learning_rate": 2.54821986685823e-05,
      "loss": 1.2146,
      "step": 5083
    },
    {
      "epoch": 0.7747637915269735,
      "grad_norm": 1.03125,
      "learning_rate": 2.5449293055914947e-05,
      "loss": 1.1336,
      "step": 5084
    },
    {
      "epoch": 0.7749161840902163,
      "grad_norm": 1.0625,
      "learning_rate": 2.5416405604871862e-05,
      "loss": 1.0927,
      "step": 5085
    },
    {
      "epoch": 0.7750685766534593,
      "grad_norm": 0.8203125,
      "learning_rate": 2.5383536323464895e-05,
      "loss": 1.0455,
      "step": 5086
    },
    {
      "epoch": 0.7752209692167022,
      "grad_norm": 0.8515625,
      "learning_rate": 2.5350685219701488e-05,
      "loss": 1.0843,
      "step": 5087
    },
    {
      "epoch": 0.7753733617799451,
      "grad_norm": 0.984375,
      "learning_rate": 2.5317852301584643e-05,
      "loss": 0.7811,
      "step": 5088
    },
    {
      "epoch": 0.775525754343188,
      "grad_norm": 1.40625,
      "learning_rate": 2.5285037577112892e-05,
      "loss": 0.9304,
      "step": 5089
    },
    {
      "epoch": 0.775678146906431,
      "grad_norm": 0.84375,
      "learning_rate": 2.5252241054280433e-05,
      "loss": 0.9264,
      "step": 5090
    },
    {
      "epoch": 0.7758305394696738,
      "grad_norm": 0.81640625,
      "learning_rate": 2.5219462741076926e-05,
      "loss": 0.9387,
      "step": 5091
    },
    {
      "epoch": 0.7759829320329168,
      "grad_norm": 0.87890625,
      "learning_rate": 2.5186702645487715e-05,
      "loss": 0.9646,
      "step": 5092
    },
    {
      "epoch": 0.7761353245961597,
      "grad_norm": 0.81640625,
      "learning_rate": 2.5153960775493535e-05,
      "loss": 1.0495,
      "step": 5093
    },
    {
      "epoch": 0.7762877171594026,
      "grad_norm": 1.21875,
      "learning_rate": 2.5121237139070796e-05,
      "loss": 1.1074,
      "step": 5094
    },
    {
      "epoch": 0.7764401097226455,
      "grad_norm": 0.828125,
      "learning_rate": 2.508853174419149e-05,
      "loss": 1.0308,
      "step": 5095
    },
    {
      "epoch": 0.7765925022858885,
      "grad_norm": 1.1171875,
      "learning_rate": 2.5055844598823107e-05,
      "loss": 1.0565,
      "step": 5096
    },
    {
      "epoch": 0.7767448948491313,
      "grad_norm": 0.91796875,
      "learning_rate": 2.502317571092869e-05,
      "loss": 0.9266,
      "step": 5097
    },
    {
      "epoch": 0.7768972874123743,
      "grad_norm": 1.0859375,
      "learning_rate": 2.4990525088466854e-05,
      "loss": 0.9448,
      "step": 5098
    },
    {
      "epoch": 0.7770496799756171,
      "grad_norm": 1.046875,
      "learning_rate": 2.4957892739391765e-05,
      "loss": 1.0666,
      "step": 5099
    },
    {
      "epoch": 0.7772020725388601,
      "grad_norm": 1.0078125,
      "learning_rate": 2.4925278671653095e-05,
      "loss": 1.129,
      "step": 5100
    },
    {
      "epoch": 0.777354465102103,
      "grad_norm": 0.94921875,
      "learning_rate": 2.4892682893196172e-05,
      "loss": 0.9961,
      "step": 5101
    },
    {
      "epoch": 0.7775068576653459,
      "grad_norm": 1.0546875,
      "learning_rate": 2.4860105411961755e-05,
      "loss": 1.0352,
      "step": 5102
    },
    {
      "epoch": 0.7776592502285888,
      "grad_norm": 1.0703125,
      "learning_rate": 2.482754623588619e-05,
      "loss": 1.0483,
      "step": 5103
    },
    {
      "epoch": 0.7778116427918318,
      "grad_norm": 1.2265625,
      "learning_rate": 2.479500537290137e-05,
      "loss": 1.0268,
      "step": 5104
    },
    {
      "epoch": 0.7779640353550746,
      "grad_norm": 0.8984375,
      "learning_rate": 2.4762482830934696e-05,
      "loss": 0.9597,
      "step": 5105
    },
    {
      "epoch": 0.7781164279183176,
      "grad_norm": 1.125,
      "learning_rate": 2.472997861790912e-05,
      "loss": 0.9897,
      "step": 5106
    },
    {
      "epoch": 0.7782688204815605,
      "grad_norm": 1.25,
      "learning_rate": 2.4697492741743177e-05,
      "loss": 1.0302,
      "step": 5107
    },
    {
      "epoch": 0.7784212130448034,
      "grad_norm": 0.89453125,
      "learning_rate": 2.4665025210350878e-05,
      "loss": 1.03,
      "step": 5108
    },
    {
      "epoch": 0.7785736056080463,
      "grad_norm": 0.84765625,
      "learning_rate": 2.4632576031641762e-05,
      "loss": 0.954,
      "step": 5109
    },
    {
      "epoch": 0.7787259981712893,
      "grad_norm": 1.046875,
      "learning_rate": 2.460014521352094e-05,
      "loss": 1.0685,
      "step": 5110
    },
    {
      "epoch": 0.7788783907345321,
      "grad_norm": 0.98828125,
      "learning_rate": 2.4567732763888985e-05,
      "loss": 1.0016,
      "step": 5111
    },
    {
      "epoch": 0.7790307832977751,
      "grad_norm": 0.76171875,
      "learning_rate": 2.4535338690642086e-05,
      "loss": 0.9352,
      "step": 5112
    },
    {
      "epoch": 0.7791831758610179,
      "grad_norm": 0.8125,
      "learning_rate": 2.4502963001671885e-05,
      "loss": 0.8366,
      "step": 5113
    },
    {
      "epoch": 0.7793355684242609,
      "grad_norm": 0.91796875,
      "learning_rate": 2.4470605704865568e-05,
      "loss": 0.9767,
      "step": 5114
    },
    {
      "epoch": 0.7794879609875038,
      "grad_norm": 0.85546875,
      "learning_rate": 2.4438266808105837e-05,
      "loss": 0.8985,
      "step": 5115
    },
    {
      "epoch": 0.7796403535507467,
      "grad_norm": 0.91015625,
      "learning_rate": 2.44059463192709e-05,
      "loss": 0.8357,
      "step": 5116
    },
    {
      "epoch": 0.7797927461139896,
      "grad_norm": 0.921875,
      "learning_rate": 2.4373644246234484e-05,
      "loss": 0.8885,
      "step": 5117
    },
    {
      "epoch": 0.7799451386772326,
      "grad_norm": 0.87109375,
      "learning_rate": 2.434136059686587e-05,
      "loss": 0.8839,
      "step": 5118
    },
    {
      "epoch": 0.7800975312404754,
      "grad_norm": 1.109375,
      "learning_rate": 2.4309095379029812e-05,
      "loss": 1.0336,
      "step": 5119
    },
    {
      "epoch": 0.7802499238037184,
      "grad_norm": 0.8359375,
      "learning_rate": 2.4276848600586576e-05,
      "loss": 0.7684,
      "step": 5120
    },
    {
      "epoch": 0.7804023163669613,
      "grad_norm": 0.83984375,
      "learning_rate": 2.4244620269391947e-05,
      "loss": 0.9259,
      "step": 5121
    },
    {
      "epoch": 0.7805547089302042,
      "grad_norm": 0.95703125,
      "learning_rate": 2.4212410393297156e-05,
      "loss": 1.0129,
      "step": 5122
    },
    {
      "epoch": 0.7807071014934471,
      "grad_norm": 0.78125,
      "learning_rate": 2.4180218980149083e-05,
      "loss": 0.964,
      "step": 5123
    },
    {
      "epoch": 0.7808594940566901,
      "grad_norm": 0.85546875,
      "learning_rate": 2.4148046037789974e-05,
      "loss": 0.7266,
      "step": 5124
    },
    {
      "epoch": 0.7810118866199329,
      "grad_norm": 0.88671875,
      "learning_rate": 2.4115891574057613e-05,
      "loss": 1.0919,
      "step": 5125
    },
    {
      "epoch": 0.7811642791831759,
      "grad_norm": 0.8359375,
      "learning_rate": 2.4083755596785294e-05,
      "loss": 1.0922,
      "step": 5126
    },
    {
      "epoch": 0.7813166717464187,
      "grad_norm": 0.859375,
      "learning_rate": 2.4051638113801823e-05,
      "loss": 0.9595,
      "step": 5127
    },
    {
      "epoch": 0.7814690643096617,
      "grad_norm": 1.109375,
      "learning_rate": 2.4019539132931422e-05,
      "loss": 1.0256,
      "step": 5128
    },
    {
      "epoch": 0.7816214568729046,
      "grad_norm": 1.1953125,
      "learning_rate": 2.3987458661993943e-05,
      "loss": 1.0896,
      "step": 5129
    },
    {
      "epoch": 0.7817738494361475,
      "grad_norm": 1.0078125,
      "learning_rate": 2.395539670880461e-05,
      "loss": 0.9387,
      "step": 5130
    },
    {
      "epoch": 0.7819262419993904,
      "grad_norm": 0.97265625,
      "learning_rate": 2.3923353281174186e-05,
      "loss": 0.9758,
      "step": 5131
    },
    {
      "epoch": 0.7820786345626334,
      "grad_norm": 1.3125,
      "learning_rate": 2.3891328386908895e-05,
      "loss": 1.0883,
      "step": 5132
    },
    {
      "epoch": 0.7822310271258762,
      "grad_norm": 0.98046875,
      "learning_rate": 2.3859322033810483e-05,
      "loss": 0.9149,
      "step": 5133
    },
    {
      "epoch": 0.7823834196891192,
      "grad_norm": 1.5703125,
      "learning_rate": 2.3827334229676112e-05,
      "loss": 1.095,
      "step": 5134
    },
    {
      "epoch": 0.7825358122523621,
      "grad_norm": 0.984375,
      "learning_rate": 2.379536498229853e-05,
      "loss": 0.8625,
      "step": 5135
    },
    {
      "epoch": 0.782688204815605,
      "grad_norm": 1.0546875,
      "learning_rate": 2.376341429946588e-05,
      "loss": 1.0646,
      "step": 5136
    },
    {
      "epoch": 0.7828405973788479,
      "grad_norm": 1.0703125,
      "learning_rate": 2.3731482188961818e-05,
      "loss": 1.0837,
      "step": 5137
    },
    {
      "epoch": 0.7829929899420909,
      "grad_norm": 0.84765625,
      "learning_rate": 2.3699568658565452e-05,
      "loss": 0.7698,
      "step": 5138
    },
    {
      "epoch": 0.7831453825053337,
      "grad_norm": 0.83984375,
      "learning_rate": 2.3667673716051353e-05,
      "loss": 0.7907,
      "step": 5139
    },
    {
      "epoch": 0.7832977750685767,
      "grad_norm": 0.92578125,
      "learning_rate": 2.3635797369189637e-05,
      "loss": 1.1927,
      "step": 5140
    },
    {
      "epoch": 0.7834501676318195,
      "grad_norm": 0.88671875,
      "learning_rate": 2.360393962574582e-05,
      "loss": 0.8848,
      "step": 5141
    },
    {
      "epoch": 0.7836025601950625,
      "grad_norm": 0.91015625,
      "learning_rate": 2.3572100493480908e-05,
      "loss": 0.9851,
      "step": 5142
    },
    {
      "epoch": 0.7837549527583054,
      "grad_norm": 0.80078125,
      "learning_rate": 2.354027998015138e-05,
      "loss": 0.9721,
      "step": 5143
    },
    {
      "epoch": 0.7839073453215483,
      "grad_norm": 0.765625,
      "learning_rate": 2.3508478093509144e-05,
      "loss": 0.9313,
      "step": 5144
    },
    {
      "epoch": 0.7840597378847912,
      "grad_norm": 0.8359375,
      "learning_rate": 2.3476694841301596e-05,
      "loss": 0.8687,
      "step": 5145
    },
    {
      "epoch": 0.7842121304480342,
      "grad_norm": 0.79296875,
      "learning_rate": 2.3444930231271635e-05,
      "loss": 0.8749,
      "step": 5146
    },
    {
      "epoch": 0.784364523011277,
      "grad_norm": 0.81640625,
      "learning_rate": 2.341318427115754e-05,
      "loss": 0.8786,
      "step": 5147
    },
    {
      "epoch": 0.78451691557452,
      "grad_norm": 1.09375,
      "learning_rate": 2.3381456968693105e-05,
      "loss": 0.9679,
      "step": 5148
    },
    {
      "epoch": 0.7846693081377629,
      "grad_norm": 0.95703125,
      "learning_rate": 2.3349748331607547e-05,
      "loss": 0.9929,
      "step": 5149
    },
    {
      "epoch": 0.7848217007010058,
      "grad_norm": 0.87890625,
      "learning_rate": 2.3318058367625538e-05,
      "loss": 0.8626,
      "step": 5150
    },
    {
      "epoch": 0.7849740932642487,
      "grad_norm": 0.92578125,
      "learning_rate": 2.32863870844672e-05,
      "loss": 1.0623,
      "step": 5151
    },
    {
      "epoch": 0.7851264858274917,
      "grad_norm": 0.984375,
      "learning_rate": 2.325473448984815e-05,
      "loss": 0.8687,
      "step": 5152
    },
    {
      "epoch": 0.7852788783907345,
      "grad_norm": 0.91015625,
      "learning_rate": 2.322310059147941e-05,
      "loss": 1.1094,
      "step": 5153
    },
    {
      "epoch": 0.7854312709539775,
      "grad_norm": 1.078125,
      "learning_rate": 2.3191485397067415e-05,
      "loss": 1.0512,
      "step": 5154
    },
    {
      "epoch": 0.7855836635172203,
      "grad_norm": 0.90625,
      "learning_rate": 2.315988891431412e-05,
      "loss": 1.0418,
      "step": 5155
    },
    {
      "epoch": 0.7857360560804633,
      "grad_norm": 0.9609375,
      "learning_rate": 2.3128311150916826e-05,
      "loss": 1.0497,
      "step": 5156
    },
    {
      "epoch": 0.7858884486437062,
      "grad_norm": 1.140625,
      "learning_rate": 2.309675211456841e-05,
      "loss": 1.0144,
      "step": 5157
    },
    {
      "epoch": 0.7860408412069491,
      "grad_norm": 0.78125,
      "learning_rate": 2.3065211812957067e-05,
      "loss": 0.8516,
      "step": 5158
    },
    {
      "epoch": 0.786193233770192,
      "grad_norm": 0.87109375,
      "learning_rate": 2.303369025376646e-05,
      "loss": 0.8171,
      "step": 5159
    },
    {
      "epoch": 0.786345626333435,
      "grad_norm": 1.1171875,
      "learning_rate": 2.3002187444675715e-05,
      "loss": 0.9172,
      "step": 5160
    },
    {
      "epoch": 0.7864980188966778,
      "grad_norm": 1.1953125,
      "learning_rate": 2.297070339335935e-05,
      "loss": 1.0331,
      "step": 5161
    },
    {
      "epoch": 0.7866504114599208,
      "grad_norm": 0.83984375,
      "learning_rate": 2.2939238107487305e-05,
      "loss": 1.0252,
      "step": 5162
    },
    {
      "epoch": 0.7868028040231637,
      "grad_norm": 0.98046875,
      "learning_rate": 2.2907791594725048e-05,
      "loss": 1.0181,
      "step": 5163
    },
    {
      "epoch": 0.7869551965864066,
      "grad_norm": 0.9296875,
      "learning_rate": 2.2876363862733362e-05,
      "loss": 1.0127,
      "step": 5164
    },
    {
      "epoch": 0.7871075891496495,
      "grad_norm": 0.95703125,
      "learning_rate": 2.2844954919168492e-05,
      "loss": 0.8168,
      "step": 5165
    },
    {
      "epoch": 0.7872599817128925,
      "grad_norm": 0.7890625,
      "learning_rate": 2.2813564771682115e-05,
      "loss": 1.0159,
      "step": 5166
    },
    {
      "epoch": 0.7874123742761353,
      "grad_norm": 0.88671875,
      "learning_rate": 2.2782193427921327e-05,
      "loss": 0.964,
      "step": 5167
    },
    {
      "epoch": 0.7875647668393783,
      "grad_norm": 1.078125,
      "learning_rate": 2.27508408955286e-05,
      "loss": 1.0263,
      "step": 5168
    },
    {
      "epoch": 0.7877171594026211,
      "grad_norm": 0.83984375,
      "learning_rate": 2.271950718214193e-05,
      "loss": 0.9365,
      "step": 5169
    },
    {
      "epoch": 0.7878695519658641,
      "grad_norm": 0.86328125,
      "learning_rate": 2.2688192295394618e-05,
      "loss": 0.9853,
      "step": 5170
    },
    {
      "epoch": 0.788021944529107,
      "grad_norm": 0.890625,
      "learning_rate": 2.2656896242915428e-05,
      "loss": 1.0787,
      "step": 5171
    },
    {
      "epoch": 0.7881743370923499,
      "grad_norm": 0.9609375,
      "learning_rate": 2.2625619032328514e-05,
      "loss": 0.8775,
      "step": 5172
    },
    {
      "epoch": 0.7883267296555928,
      "grad_norm": 1.0546875,
      "learning_rate": 2.2594360671253457e-05,
      "loss": 1.0338,
      "step": 5173
    },
    {
      "epoch": 0.7884791222188358,
      "grad_norm": 0.796875,
      "learning_rate": 2.256312116730528e-05,
      "loss": 1.1096,
      "step": 5174
    },
    {
      "epoch": 0.7886315147820786,
      "grad_norm": 0.91796875,
      "learning_rate": 2.253190052809434e-05,
      "loss": 1.1066,
      "step": 5175
    },
    {
      "epoch": 0.7887839073453216,
      "grad_norm": 0.8125,
      "learning_rate": 2.2500698761226447e-05,
      "loss": 0.9622,
      "step": 5176
    },
    {
      "epoch": 0.7889362999085645,
      "grad_norm": 0.96875,
      "learning_rate": 2.2469515874302793e-05,
      "loss": 1.1226,
      "step": 5177
    },
    {
      "epoch": 0.7890886924718074,
      "grad_norm": 0.89453125,
      "learning_rate": 2.2438351874919984e-05,
      "loss": 0.9822,
      "step": 5178
    },
    {
      "epoch": 0.7892410850350503,
      "grad_norm": 0.96484375,
      "learning_rate": 2.2407206770669987e-05,
      "loss": 0.9048,
      "step": 5179
    },
    {
      "epoch": 0.7893934775982933,
      "grad_norm": 0.7578125,
      "learning_rate": 2.2376080569140258e-05,
      "loss": 0.9541,
      "step": 5180
    },
    {
      "epoch": 0.7895458701615361,
      "grad_norm": 1.234375,
      "learning_rate": 2.234497327791354e-05,
      "loss": 1.1903,
      "step": 5181
    },
    {
      "epoch": 0.789698262724779,
      "grad_norm": 0.85546875,
      "learning_rate": 2.231388490456805e-05,
      "loss": 0.8263,
      "step": 5182
    },
    {
      "epoch": 0.7898506552880219,
      "grad_norm": 1.09375,
      "learning_rate": 2.228281545667733e-05,
      "loss": 1.1303,
      "step": 5183
    },
    {
      "epoch": 0.7900030478512649,
      "grad_norm": 0.96875,
      "learning_rate": 2.225176494181036e-05,
      "loss": 1.1455,
      "step": 5184
    },
    {
      "epoch": 0.7901554404145078,
      "grad_norm": 1.0234375,
      "learning_rate": 2.2220733367531464e-05,
      "loss": 1.0339,
      "step": 5185
    },
    {
      "epoch": 0.7903078329777506,
      "grad_norm": 1.40625,
      "learning_rate": 2.2189720741400434e-05,
      "loss": 1.0858,
      "step": 5186
    },
    {
      "epoch": 0.7904602255409936,
      "grad_norm": 0.95703125,
      "learning_rate": 2.215872707097236e-05,
      "loss": 1.0031,
      "step": 5187
    },
    {
      "epoch": 0.7906126181042366,
      "grad_norm": 0.94921875,
      "learning_rate": 2.212775236379776e-05,
      "loss": 1.0298,
      "step": 5188
    },
    {
      "epoch": 0.7907650106674794,
      "grad_norm": 0.94921875,
      "learning_rate": 2.20967966274225e-05,
      "loss": 0.8814,
      "step": 5189
    },
    {
      "epoch": 0.7909174032307223,
      "grad_norm": 1.3046875,
      "learning_rate": 2.2065859869387817e-05,
      "loss": 0.9902,
      "step": 5190
    },
    {
      "epoch": 0.7910697957939653,
      "grad_norm": 0.8671875,
      "learning_rate": 2.2034942097230417e-05,
      "loss": 0.9109,
      "step": 5191
    },
    {
      "epoch": 0.7912221883572081,
      "grad_norm": 0.87890625,
      "learning_rate": 2.200404331848228e-05,
      "loss": 1.1882,
      "step": 5192
    },
    {
      "epoch": 0.7913745809204511,
      "grad_norm": 1.0625,
      "learning_rate": 2.1973163540670794e-05,
      "loss": 1.1343,
      "step": 5193
    },
    {
      "epoch": 0.7915269734836939,
      "grad_norm": 0.76171875,
      "learning_rate": 2.1942302771318712e-05,
      "loss": 0.8873,
      "step": 5194
    },
    {
      "epoch": 0.7916793660469369,
      "grad_norm": 1.0234375,
      "learning_rate": 2.1911461017944168e-05,
      "loss": 1.1505,
      "step": 5195
    },
    {
      "epoch": 0.7918317586101798,
      "grad_norm": 0.95703125,
      "learning_rate": 2.1880638288060617e-05,
      "loss": 0.8583,
      "step": 5196
    },
    {
      "epoch": 0.7919841511734227,
      "grad_norm": 0.921875,
      "learning_rate": 2.1849834589176997e-05,
      "loss": 1.0608,
      "step": 5197
    },
    {
      "epoch": 0.7921365437366656,
      "grad_norm": 1.1328125,
      "learning_rate": 2.1819049928797485e-05,
      "loss": 1.0967,
      "step": 5198
    },
    {
      "epoch": 0.7922889362999086,
      "grad_norm": 0.99609375,
      "learning_rate": 2.1788284314421668e-05,
      "loss": 0.9707,
      "step": 5199
    },
    {
      "epoch": 0.7924413288631514,
      "grad_norm": 1.078125,
      "learning_rate": 2.17575377535445e-05,
      "loss": 0.9878,
      "step": 5200
    },
    {
      "epoch": 0.7925937214263944,
      "grad_norm": 0.98828125,
      "learning_rate": 2.1726810253656282e-05,
      "loss": 0.9723,
      "step": 5201
    },
    {
      "epoch": 0.7927461139896373,
      "grad_norm": 1.4609375,
      "learning_rate": 2.1696101822242654e-05,
      "loss": 0.9009,
      "step": 5202
    },
    {
      "epoch": 0.7928985065528802,
      "grad_norm": 0.78515625,
      "learning_rate": 2.1665412466784672e-05,
      "loss": 0.8681,
      "step": 5203
    },
    {
      "epoch": 0.7930508991161231,
      "grad_norm": 1.015625,
      "learning_rate": 2.1634742194758695e-05,
      "loss": 1.0097,
      "step": 5204
    },
    {
      "epoch": 0.7932032916793661,
      "grad_norm": 0.95703125,
      "learning_rate": 2.1604091013636418e-05,
      "loss": 0.7774,
      "step": 5205
    },
    {
      "epoch": 0.7933556842426089,
      "grad_norm": 0.87890625,
      "learning_rate": 2.1573458930884938e-05,
      "loss": 0.9736,
      "step": 5206
    },
    {
      "epoch": 0.7935080768058519,
      "grad_norm": 0.83203125,
      "learning_rate": 2.1542845953966618e-05,
      "loss": 1.0441,
      "step": 5207
    },
    {
      "epoch": 0.7936604693690947,
      "grad_norm": 0.9609375,
      "learning_rate": 2.1512252090339292e-05,
      "loss": 1.0087,
      "step": 5208
    },
    {
      "epoch": 0.7938128619323377,
      "grad_norm": 0.796875,
      "learning_rate": 2.148167734745602e-05,
      "loss": 0.9306,
      "step": 5209
    },
    {
      "epoch": 0.7939652544955806,
      "grad_norm": 0.796875,
      "learning_rate": 2.1451121732765268e-05,
      "loss": 0.8302,
      "step": 5210
    },
    {
      "epoch": 0.7941176470588235,
      "grad_norm": 0.96484375,
      "learning_rate": 2.1420585253710822e-05,
      "loss": 0.9748,
      "step": 5211
    },
    {
      "epoch": 0.7942700396220664,
      "grad_norm": 0.921875,
      "learning_rate": 2.1390067917731792e-05,
      "loss": 1.0292,
      "step": 5212
    },
    {
      "epoch": 0.7944224321853094,
      "grad_norm": 1.1875,
      "learning_rate": 2.135956973226262e-05,
      "loss": 1.3112,
      "step": 5213
    },
    {
      "epoch": 0.7945748247485522,
      "grad_norm": 0.7734375,
      "learning_rate": 2.132909070473317e-05,
      "loss": 0.8757,
      "step": 5214
    },
    {
      "epoch": 0.7947272173117952,
      "grad_norm": 0.9609375,
      "learning_rate": 2.1298630842568523e-05,
      "loss": 0.9566,
      "step": 5215
    },
    {
      "epoch": 0.7948796098750381,
      "grad_norm": 0.86328125,
      "learning_rate": 2.1268190153189148e-05,
      "loss": 0.9285,
      "step": 5216
    },
    {
      "epoch": 0.795032002438281,
      "grad_norm": 0.67578125,
      "learning_rate": 2.1237768644010836e-05,
      "loss": 0.7974,
      "step": 5217
    },
    {
      "epoch": 0.7951843950015239,
      "grad_norm": 1.1015625,
      "learning_rate": 2.12073663224447e-05,
      "loss": 0.8991,
      "step": 5218
    },
    {
      "epoch": 0.7953367875647669,
      "grad_norm": 0.78515625,
      "learning_rate": 2.117698319589717e-05,
      "loss": 0.8989,
      "step": 5219
    },
    {
      "epoch": 0.7954891801280097,
      "grad_norm": 0.9609375,
      "learning_rate": 2.114661927177005e-05,
      "loss": 0.976,
      "step": 5220
    },
    {
      "epoch": 0.7956415726912527,
      "grad_norm": 1.03125,
      "learning_rate": 2.11162745574604e-05,
      "loss": 1.0369,
      "step": 5221
    },
    {
      "epoch": 0.7957939652544955,
      "grad_norm": 0.953125,
      "learning_rate": 2.1085949060360654e-05,
      "loss": 0.9822,
      "step": 5222
    },
    {
      "epoch": 0.7959463578177385,
      "grad_norm": 1.140625,
      "learning_rate": 2.105564278785851e-05,
      "loss": 1.0795,
      "step": 5223
    },
    {
      "epoch": 0.7960987503809814,
      "grad_norm": 0.79296875,
      "learning_rate": 2.1025355747336994e-05,
      "loss": 1.1819,
      "step": 5224
    },
    {
      "epoch": 0.7962511429442243,
      "grad_norm": 1.0859375,
      "learning_rate": 2.099508794617453e-05,
      "loss": 1.0076,
      "step": 5225
    },
    {
      "epoch": 0.7964035355074672,
      "grad_norm": 0.640625,
      "learning_rate": 2.0964839391744752e-05,
      "loss": 0.901,
      "step": 5226
    },
    {
      "epoch": 0.7965559280707102,
      "grad_norm": 0.98046875,
      "learning_rate": 2.093461009141664e-05,
      "loss": 1.072,
      "step": 5227
    },
    {
      "epoch": 0.796708320633953,
      "grad_norm": 0.9453125,
      "learning_rate": 2.0904400052554484e-05,
      "loss": 0.9471,
      "step": 5228
    },
    {
      "epoch": 0.796860713197196,
      "grad_norm": 0.859375,
      "learning_rate": 2.0874209282517908e-05,
      "loss": 1.0629,
      "step": 5229
    },
    {
      "epoch": 0.7970131057604389,
      "grad_norm": 1.046875,
      "learning_rate": 2.0844037788661762e-05,
      "loss": 0.8712,
      "step": 5230
    },
    {
      "epoch": 0.7971654983236818,
      "grad_norm": 0.95703125,
      "learning_rate": 2.0813885578336322e-05,
      "loss": 1.0088,
      "step": 5231
    },
    {
      "epoch": 0.7973178908869247,
      "grad_norm": 1.0078125,
      "learning_rate": 2.0783752658887066e-05,
      "loss": 0.9499,
      "step": 5232
    },
    {
      "epoch": 0.7974702834501677,
      "grad_norm": 1.0,
      "learning_rate": 2.0753639037654827e-05,
      "loss": 1.0365,
      "step": 5233
    },
    {
      "epoch": 0.7976226760134105,
      "grad_norm": 0.6953125,
      "learning_rate": 2.0723544721975694e-05,
      "loss": 0.8065,
      "step": 5234
    },
    {
      "epoch": 0.7977750685766535,
      "grad_norm": 0.76171875,
      "learning_rate": 2.069346971918108e-05,
      "loss": 0.8572,
      "step": 5235
    },
    {
      "epoch": 0.7979274611398963,
      "grad_norm": 0.96484375,
      "learning_rate": 2.0663414036597662e-05,
      "loss": 0.9731,
      "step": 5236
    },
    {
      "epoch": 0.7980798537031393,
      "grad_norm": 1.0234375,
      "learning_rate": 2.0633377681547505e-05,
      "loss": 0.9352,
      "step": 5237
    },
    {
      "epoch": 0.7982322462663822,
      "grad_norm": 0.83984375,
      "learning_rate": 2.060336066134785e-05,
      "loss": 0.9287,
      "step": 5238
    },
    {
      "epoch": 0.7983846388296251,
      "grad_norm": 0.83984375,
      "learning_rate": 2.0573362983311283e-05,
      "loss": 0.8499,
      "step": 5239
    },
    {
      "epoch": 0.798537031392868,
      "grad_norm": 1.1953125,
      "learning_rate": 2.0543384654745667e-05,
      "loss": 1.0206,
      "step": 5240
    },
    {
      "epoch": 0.798689423956111,
      "grad_norm": 0.71875,
      "learning_rate": 2.0513425682954125e-05,
      "loss": 0.7092,
      "step": 5241
    },
    {
      "epoch": 0.7988418165193538,
      "grad_norm": 0.73828125,
      "learning_rate": 2.048348607523516e-05,
      "loss": 0.8547,
      "step": 5242
    },
    {
      "epoch": 0.7989942090825968,
      "grad_norm": 1.0078125,
      "learning_rate": 2.045356583888245e-05,
      "loss": 0.8104,
      "step": 5243
    },
    {
      "epoch": 0.7991466016458397,
      "grad_norm": 1.03125,
      "learning_rate": 2.0423664981185e-05,
      "loss": 1.1503,
      "step": 5244
    },
    {
      "epoch": 0.7992989942090826,
      "grad_norm": 1.015625,
      "learning_rate": 2.039378350942709e-05,
      "loss": 0.9334,
      "step": 5245
    },
    {
      "epoch": 0.7994513867723255,
      "grad_norm": 1.109375,
      "learning_rate": 2.0363921430888277e-05,
      "loss": 0.8258,
      "step": 5246
    },
    {
      "epoch": 0.7996037793355685,
      "grad_norm": 1.0546875,
      "learning_rate": 2.0334078752843367e-05,
      "loss": 1.0024,
      "step": 5247
    },
    {
      "epoch": 0.7997561718988113,
      "grad_norm": 0.97265625,
      "learning_rate": 2.0304255482562505e-05,
      "loss": 0.9927,
      "step": 5248
    },
    {
      "epoch": 0.7999085644620543,
      "grad_norm": 1.15625,
      "learning_rate": 2.0274451627311066e-05,
      "loss": 1.0371,
      "step": 5249
    },
    {
      "epoch": 0.8000609570252971,
      "grad_norm": 0.93359375,
      "learning_rate": 2.0244667194349676e-05,
      "loss": 0.9526,
      "step": 5250
    },
    {
      "epoch": 0.8002133495885401,
      "grad_norm": 0.90234375,
      "learning_rate": 2.021490219093426e-05,
      "loss": 1.1132,
      "step": 5251
    },
    {
      "epoch": 0.800365742151783,
      "grad_norm": 1.21875,
      "learning_rate": 2.0185156624315982e-05,
      "loss": 0.9451,
      "step": 5252
    },
    {
      "epoch": 0.8005181347150259,
      "grad_norm": 0.9765625,
      "learning_rate": 2.0155430501741324e-05,
      "loss": 0.9379,
      "step": 5253
    },
    {
      "epoch": 0.8006705272782688,
      "grad_norm": 0.8828125,
      "learning_rate": 2.0125723830451992e-05,
      "loss": 0.8723,
      "step": 5254
    },
    {
      "epoch": 0.8008229198415118,
      "grad_norm": 0.953125,
      "learning_rate": 2.0096036617684944e-05,
      "loss": 0.8568,
      "step": 5255
    },
    {
      "epoch": 0.8009753124047546,
      "grad_norm": 1.0625,
      "learning_rate": 2.0066368870672414e-05,
      "loss": 1.0758,
      "step": 5256
    },
    {
      "epoch": 0.8011277049679976,
      "grad_norm": 1.1171875,
      "learning_rate": 2.003672059664189e-05,
      "loss": 1.1357,
      "step": 5257
    },
    {
      "epoch": 0.8012800975312405,
      "grad_norm": 0.97265625,
      "learning_rate": 2.0007091802816102e-05,
      "loss": 1.0926,
      "step": 5258
    },
    {
      "epoch": 0.8014324900944834,
      "grad_norm": 0.90625,
      "learning_rate": 1.9977482496413092e-05,
      "loss": 0.9416,
      "step": 5259
    },
    {
      "epoch": 0.8015848826577263,
      "grad_norm": 0.83984375,
      "learning_rate": 1.9947892684646097e-05,
      "loss": 0.8265,
      "step": 5260
    },
    {
      "epoch": 0.8017372752209693,
      "grad_norm": 0.90625,
      "learning_rate": 1.9918322374723607e-05,
      "loss": 1.0403,
      "step": 5261
    },
    {
      "epoch": 0.8018896677842121,
      "grad_norm": 0.91015625,
      "learning_rate": 1.9888771573849384e-05,
      "loss": 0.9945,
      "step": 5262
    },
    {
      "epoch": 0.8020420603474551,
      "grad_norm": 0.80859375,
      "learning_rate": 1.9859240289222426e-05,
      "loss": 0.9414,
      "step": 5263
    },
    {
      "epoch": 0.8021944529106979,
      "grad_norm": 0.78515625,
      "learning_rate": 1.9829728528036952e-05,
      "loss": 0.8235,
      "step": 5264
    },
    {
      "epoch": 0.8023468454739409,
      "grad_norm": 0.921875,
      "learning_rate": 1.98002362974825e-05,
      "loss": 0.9949,
      "step": 5265
    },
    {
      "epoch": 0.8024992380371838,
      "grad_norm": 1.0234375,
      "learning_rate": 1.977076360474379e-05,
      "loss": 0.8562,
      "step": 5266
    },
    {
      "epoch": 0.8026516306004267,
      "grad_norm": 0.80859375,
      "learning_rate": 1.974131045700077e-05,
      "loss": 0.9465,
      "step": 5267
    },
    {
      "epoch": 0.8028040231636696,
      "grad_norm": 0.9140625,
      "learning_rate": 1.971187686142868e-05,
      "loss": 1.1478,
      "step": 5268
    },
    {
      "epoch": 0.8029564157269126,
      "grad_norm": 0.9453125,
      "learning_rate": 1.968246282519791e-05,
      "loss": 0.8771,
      "step": 5269
    },
    {
      "epoch": 0.8031088082901554,
      "grad_norm": 0.97265625,
      "learning_rate": 1.9653068355474214e-05,
      "loss": 1.1398,
      "step": 5270
    },
    {
      "epoch": 0.8032612008533984,
      "grad_norm": 0.96484375,
      "learning_rate": 1.962369345941848e-05,
      "loss": 1.0088,
      "step": 5271
    },
    {
      "epoch": 0.8034135934166413,
      "grad_norm": 1.0703125,
      "learning_rate": 1.959433814418684e-05,
      "loss": 0.8052,
      "step": 5272
    },
    {
      "epoch": 0.8035659859798842,
      "grad_norm": 1.1640625,
      "learning_rate": 1.95650024169307e-05,
      "loss": 1.0974,
      "step": 5273
    },
    {
      "epoch": 0.8037183785431271,
      "grad_norm": 1.1171875,
      "learning_rate": 1.953568628479664e-05,
      "loss": 1.0899,
      "step": 5274
    },
    {
      "epoch": 0.8038707711063701,
      "grad_norm": 1.640625,
      "learning_rate": 1.950638975492647e-05,
      "loss": 1.2246,
      "step": 5275
    },
    {
      "epoch": 0.8040231636696129,
      "grad_norm": 1.0,
      "learning_rate": 1.94771128344573e-05,
      "loss": 0.9883,
      "step": 5276
    },
    {
      "epoch": 0.8041755562328559,
      "grad_norm": 0.77734375,
      "learning_rate": 1.9447855530521388e-05,
      "loss": 0.9032,
      "step": 5277
    },
    {
      "epoch": 0.8043279487960987,
      "grad_norm": 1.65625,
      "learning_rate": 1.9418617850246225e-05,
      "loss": 1.0631,
      "step": 5278
    },
    {
      "epoch": 0.8044803413593417,
      "grad_norm": 0.7734375,
      "learning_rate": 1.9389399800754538e-05,
      "loss": 0.8824,
      "step": 5279
    },
    {
      "epoch": 0.8046327339225846,
      "grad_norm": 0.80078125,
      "learning_rate": 1.9360201389164257e-05,
      "loss": 0.8919,
      "step": 5280
    },
    {
      "epoch": 0.8047851264858275,
      "grad_norm": 1.046875,
      "learning_rate": 1.9331022622588514e-05,
      "loss": 0.9398,
      "step": 5281
    },
    {
      "epoch": 0.8049375190490704,
      "grad_norm": 0.75390625,
      "learning_rate": 1.930186350813573e-05,
      "loss": 0.8263,
      "step": 5282
    },
    {
      "epoch": 0.8050899116123134,
      "grad_norm": 1.2109375,
      "learning_rate": 1.927272405290945e-05,
      "loss": 0.965,
      "step": 5283
    },
    {
      "epoch": 0.8052423041755562,
      "grad_norm": 0.78125,
      "learning_rate": 1.9243604264008475e-05,
      "loss": 0.9582,
      "step": 5284
    },
    {
      "epoch": 0.8053946967387992,
      "grad_norm": 0.86328125,
      "learning_rate": 1.9214504148526802e-05,
      "loss": 0.762,
      "step": 5285
    },
    {
      "epoch": 0.8055470893020421,
      "grad_norm": 0.984375,
      "learning_rate": 1.9185423713553606e-05,
      "loss": 0.8635,
      "step": 5286
    },
    {
      "epoch": 0.805699481865285,
      "grad_norm": 0.96484375,
      "learning_rate": 1.9156362966173347e-05,
      "loss": 1.1748,
      "step": 5287
    },
    {
      "epoch": 0.8058518744285279,
      "grad_norm": 0.95703125,
      "learning_rate": 1.9127321913465636e-05,
      "loss": 1.1668,
      "step": 5288
    },
    {
      "epoch": 0.8060042669917709,
      "grad_norm": 0.71484375,
      "learning_rate": 1.9098300562505266e-05,
      "loss": 0.8535,
      "step": 5289
    },
    {
      "epoch": 0.8061566595550137,
      "grad_norm": 0.9921875,
      "learning_rate": 1.9069298920362265e-05,
      "loss": 1.0213,
      "step": 5290
    },
    {
      "epoch": 0.8063090521182567,
      "grad_norm": 0.84765625,
      "learning_rate": 1.904031699410186e-05,
      "loss": 0.9093,
      "step": 5291
    },
    {
      "epoch": 0.8064614446814995,
      "grad_norm": 1.1484375,
      "learning_rate": 1.901135479078443e-05,
      "loss": 0.9165,
      "step": 5292
    },
    {
      "epoch": 0.8066138372447424,
      "grad_norm": 0.90234375,
      "learning_rate": 1.8982412317465627e-05,
      "loss": 0.9453,
      "step": 5293
    },
    {
      "epoch": 0.8067662298079854,
      "grad_norm": 1.1484375,
      "learning_rate": 1.895348958119625e-05,
      "loss": 1.2131,
      "step": 5294
    },
    {
      "epoch": 0.8069186223712282,
      "grad_norm": 1.078125,
      "learning_rate": 1.8924586589022277e-05,
      "loss": 0.9307,
      "step": 5295
    },
    {
      "epoch": 0.8070710149344712,
      "grad_norm": 1.21875,
      "learning_rate": 1.8895703347984893e-05,
      "loss": 1.0731,
      "step": 5296
    },
    {
      "epoch": 0.8072234074977142,
      "grad_norm": 1.15625,
      "learning_rate": 1.886683986512048e-05,
      "loss": 0.8969,
      "step": 5297
    },
    {
      "epoch": 0.807375800060957,
      "grad_norm": 0.9140625,
      "learning_rate": 1.8837996147460556e-05,
      "loss": 0.8888,
      "step": 5298
    },
    {
      "epoch": 0.8075281926242,
      "grad_norm": 0.95703125,
      "learning_rate": 1.8809172202031933e-05,
      "loss": 1.0441,
      "step": 5299
    },
    {
      "epoch": 0.8076805851874429,
      "grad_norm": 1.125,
      "learning_rate": 1.878036803585651e-05,
      "loss": 1.0715,
      "step": 5300
    },
    {
      "epoch": 0.8078329777506857,
      "grad_norm": 0.72265625,
      "learning_rate": 1.8751583655951386e-05,
      "loss": 0.8975,
      "step": 5301
    },
    {
      "epoch": 0.8079853703139287,
      "grad_norm": 0.8828125,
      "learning_rate": 1.8722819069328856e-05,
      "loss": 1.0188,
      "step": 5302
    },
    {
      "epoch": 0.8081377628771715,
      "grad_norm": 0.8125,
      "learning_rate": 1.8694074282996378e-05,
      "loss": 0.83,
      "step": 5303
    },
    {
      "epoch": 0.8082901554404145,
      "grad_norm": 0.78515625,
      "learning_rate": 1.8665349303956614e-05,
      "loss": 1.0283,
      "step": 5304
    },
    {
      "epoch": 0.8084425480036574,
      "grad_norm": 1.578125,
      "learning_rate": 1.8636644139207382e-05,
      "loss": 0.9603,
      "step": 5305
    },
    {
      "epoch": 0.8085949405669003,
      "grad_norm": 0.9375,
      "learning_rate": 1.8607958795741654e-05,
      "loss": 1.0794,
      "step": 5306
    },
    {
      "epoch": 0.8087473331301432,
      "grad_norm": 0.89453125,
      "learning_rate": 1.8579293280547606e-05,
      "loss": 0.992,
      "step": 5307
    },
    {
      "epoch": 0.8088997256933862,
      "grad_norm": 0.80078125,
      "learning_rate": 1.8550647600608573e-05,
      "loss": 0.792,
      "step": 5308
    },
    {
      "epoch": 0.809052118256629,
      "grad_norm": 0.984375,
      "learning_rate": 1.852202176290302e-05,
      "loss": 1.1308,
      "step": 5309
    },
    {
      "epoch": 0.809204510819872,
      "grad_norm": 1.15625,
      "learning_rate": 1.8493415774404655e-05,
      "loss": 1.0057,
      "step": 5310
    },
    {
      "epoch": 0.809356903383115,
      "grad_norm": 0.83203125,
      "learning_rate": 1.8464829642082303e-05,
      "loss": 1.0869,
      "step": 5311
    },
    {
      "epoch": 0.8095092959463578,
      "grad_norm": 1.03125,
      "learning_rate": 1.8436263372899943e-05,
      "loss": 1.0087,
      "step": 5312
    },
    {
      "epoch": 0.8096616885096007,
      "grad_norm": 0.95703125,
      "learning_rate": 1.8407716973816734e-05,
      "loss": 0.8969,
      "step": 5313
    },
    {
      "epoch": 0.8098140810728437,
      "grad_norm": 0.97265625,
      "learning_rate": 1.8379190451787e-05,
      "loss": 0.9494,
      "step": 5314
    },
    {
      "epoch": 0.8099664736360865,
      "grad_norm": 1.09375,
      "learning_rate": 1.8350683813760162e-05,
      "loss": 1.0638,
      "step": 5315
    },
    {
      "epoch": 0.8101188661993295,
      "grad_norm": 0.72265625,
      "learning_rate": 1.8322197066680914e-05,
      "loss": 0.8916,
      "step": 5316
    },
    {
      "epoch": 0.8102712587625723,
      "grad_norm": 1.078125,
      "learning_rate": 1.8293730217489004e-05,
      "loss": 1.1521,
      "step": 5317
    },
    {
      "epoch": 0.8104236513258153,
      "grad_norm": 0.984375,
      "learning_rate": 1.826528327311937e-05,
      "loss": 0.9758,
      "step": 5318
    },
    {
      "epoch": 0.8105760438890582,
      "grad_norm": 0.984375,
      "learning_rate": 1.8236856240502108e-05,
      "loss": 0.8476,
      "step": 5319
    },
    {
      "epoch": 0.8107284364523011,
      "grad_norm": 1.1328125,
      "learning_rate": 1.820844912656241e-05,
      "loss": 0.9806,
      "step": 5320
    },
    {
      "epoch": 0.810880829015544,
      "grad_norm": 0.859375,
      "learning_rate": 1.8180061938220715e-05,
      "loss": 0.9929,
      "step": 5321
    },
    {
      "epoch": 0.811033221578787,
      "grad_norm": 0.82421875,
      "learning_rate": 1.815169468239252e-05,
      "loss": 0.9671,
      "step": 5322
    },
    {
      "epoch": 0.8111856141420298,
      "grad_norm": 1.2109375,
      "learning_rate": 1.8123347365988498e-05,
      "loss": 0.932,
      "step": 5323
    },
    {
      "epoch": 0.8113380067052728,
      "grad_norm": 1.3046875,
      "learning_rate": 1.8095019995914476e-05,
      "loss": 1.1545,
      "step": 5324
    },
    {
      "epoch": 0.8114903992685157,
      "grad_norm": 1.0390625,
      "learning_rate": 1.8066712579071388e-05,
      "loss": 1.1366,
      "step": 5325
    },
    {
      "epoch": 0.8116427918317586,
      "grad_norm": 1.078125,
      "learning_rate": 1.8038425122355317e-05,
      "loss": 1.0765,
      "step": 5326
    },
    {
      "epoch": 0.8117951843950015,
      "grad_norm": 1.0078125,
      "learning_rate": 1.8010157632657543e-05,
      "loss": 0.9029,
      "step": 5327
    },
    {
      "epoch": 0.8119475769582445,
      "grad_norm": 0.76953125,
      "learning_rate": 1.79819101168644e-05,
      "loss": 0.9519,
      "step": 5328
    },
    {
      "epoch": 0.8120999695214873,
      "grad_norm": 0.953125,
      "learning_rate": 1.795368258185739e-05,
      "loss": 1.0231,
      "step": 5329
    },
    {
      "epoch": 0.8122523620847303,
      "grad_norm": 1.0390625,
      "learning_rate": 1.7925475034513162e-05,
      "loss": 1.1273,
      "step": 5330
    },
    {
      "epoch": 0.8124047546479731,
      "grad_norm": 1.0390625,
      "learning_rate": 1.7897287481703463e-05,
      "loss": 1.0451,
      "step": 5331
    },
    {
      "epoch": 0.8125571472112161,
      "grad_norm": 0.875,
      "learning_rate": 1.7869119930295165e-05,
      "loss": 0.9877,
      "step": 5332
    },
    {
      "epoch": 0.812709539774459,
      "grad_norm": 0.9140625,
      "learning_rate": 1.7840972387150345e-05,
      "loss": 0.9953,
      "step": 5333
    },
    {
      "epoch": 0.8128619323377019,
      "grad_norm": 0.96484375,
      "learning_rate": 1.781284485912611e-05,
      "loss": 1.1101,
      "step": 5334
    },
    {
      "epoch": 0.8130143249009448,
      "grad_norm": 1.046875,
      "learning_rate": 1.7784737353074744e-05,
      "loss": 1.0062,
      "step": 5335
    },
    {
      "epoch": 0.8131667174641878,
      "grad_norm": 0.92578125,
      "learning_rate": 1.7756649875843644e-05,
      "loss": 0.9959,
      "step": 5336
    },
    {
      "epoch": 0.8133191100274306,
      "grad_norm": 1.046875,
      "learning_rate": 1.772858243427529e-05,
      "loss": 1.0866,
      "step": 5337
    },
    {
      "epoch": 0.8134715025906736,
      "grad_norm": 0.8984375,
      "learning_rate": 1.7700535035207355e-05,
      "loss": 1.1307,
      "step": 5338
    },
    {
      "epoch": 0.8136238951539165,
      "grad_norm": 1.171875,
      "learning_rate": 1.7672507685472573e-05,
      "loss": 1.0206,
      "step": 5339
    },
    {
      "epoch": 0.8137762877171594,
      "grad_norm": 0.9140625,
      "learning_rate": 1.764450039189881e-05,
      "loss": 0.9113,
      "step": 5340
    },
    {
      "epoch": 0.8139286802804023,
      "grad_norm": 0.9453125,
      "learning_rate": 1.7616513161309055e-05,
      "loss": 0.8323,
      "step": 5341
    },
    {
      "epoch": 0.8140810728436453,
      "grad_norm": 1.03125,
      "learning_rate": 1.7588546000521378e-05,
      "loss": 1.0595,
      "step": 5342
    },
    {
      "epoch": 0.8142334654068881,
      "grad_norm": 0.90234375,
      "learning_rate": 1.756059891634898e-05,
      "loss": 1.0188,
      "step": 5343
    },
    {
      "epoch": 0.8143858579701311,
      "grad_norm": 0.88671875,
      "learning_rate": 1.7532671915600196e-05,
      "loss": 0.8755,
      "step": 5344
    },
    {
      "epoch": 0.8145382505333739,
      "grad_norm": 1.0234375,
      "learning_rate": 1.750476500507845e-05,
      "loss": 1.0727,
      "step": 5345
    },
    {
      "epoch": 0.8146906430966169,
      "grad_norm": 1.0703125,
      "learning_rate": 1.7476878191582246e-05,
      "loss": 0.9557,
      "step": 5346
    },
    {
      "epoch": 0.8148430356598598,
      "grad_norm": 1.1953125,
      "learning_rate": 1.744901148190522e-05,
      "loss": 1.0651,
      "step": 5347
    },
    {
      "epoch": 0.8149954282231027,
      "grad_norm": 1.3515625,
      "learning_rate": 1.7421164882836095e-05,
      "loss": 1.181,
      "step": 5348
    },
    {
      "epoch": 0.8151478207863456,
      "grad_norm": 0.92578125,
      "learning_rate": 1.739333840115869e-05,
      "loss": 1.0516,
      "step": 5349
    },
    {
      "epoch": 0.8153002133495886,
      "grad_norm": 0.9921875,
      "learning_rate": 1.7365532043651978e-05,
      "loss": 0.8631,
      "step": 5350
    },
    {
      "epoch": 0.8154526059128314,
      "grad_norm": 0.859375,
      "learning_rate": 1.7337745817089968e-05,
      "loss": 0.8584,
      "step": 5351
    },
    {
      "epoch": 0.8156049984760744,
      "grad_norm": 0.98046875,
      "learning_rate": 1.730997972824179e-05,
      "loss": 0.9841,
      "step": 5352
    },
    {
      "epoch": 0.8157573910393173,
      "grad_norm": 1.015625,
      "learning_rate": 1.7282233783871637e-05,
      "loss": 0.8975,
      "step": 5353
    },
    {
      "epoch": 0.8159097836025602,
      "grad_norm": 1.359375,
      "learning_rate": 1.725450799073882e-05,
      "loss": 1.1523,
      "step": 5354
    },
    {
      "epoch": 0.8160621761658031,
      "grad_norm": 1.28125,
      "learning_rate": 1.7226802355597782e-05,
      "loss": 1.2313,
      "step": 5355
    },
    {
      "epoch": 0.8162145687290461,
      "grad_norm": 0.71484375,
      "learning_rate": 1.7199116885197995e-05,
      "loss": 0.7469,
      "step": 5356
    },
    {
      "epoch": 0.8163669612922889,
      "grad_norm": 1.09375,
      "learning_rate": 1.7171451586284025e-05,
      "loss": 1.0824,
      "step": 5357
    },
    {
      "epoch": 0.8165193538555319,
      "grad_norm": 1.0,
      "learning_rate": 1.714380646559556e-05,
      "loss": 0.8556,
      "step": 5358
    },
    {
      "epoch": 0.8166717464187747,
      "grad_norm": 1.0,
      "learning_rate": 1.7116181529867327e-05,
      "loss": 0.9704,
      "step": 5359
    },
    {
      "epoch": 0.8168241389820177,
      "grad_norm": 1.0859375,
      "learning_rate": 1.7088576785829123e-05,
      "loss": 1.2786,
      "step": 5360
    },
    {
      "epoch": 0.8169765315452606,
      "grad_norm": 0.82421875,
      "learning_rate": 1.7060992240205943e-05,
      "loss": 1.0438,
      "step": 5361
    },
    {
      "epoch": 0.8171289241085035,
      "grad_norm": 0.828125,
      "learning_rate": 1.7033427899717735e-05,
      "loss": 0.977,
      "step": 5362
    },
    {
      "epoch": 0.8172813166717464,
      "grad_norm": 1.0078125,
      "learning_rate": 1.7005883771079577e-05,
      "loss": 1.0792,
      "step": 5363
    },
    {
      "epoch": 0.8174337092349894,
      "grad_norm": 1.1171875,
      "learning_rate": 1.6978359861001603e-05,
      "loss": 1.0597,
      "step": 5364
    },
    {
      "epoch": 0.8175861017982322,
      "grad_norm": 1.03125,
      "learning_rate": 1.6950856176189033e-05,
      "loss": 0.8249,
      "step": 5365
    },
    {
      "epoch": 0.8177384943614752,
      "grad_norm": 0.71484375,
      "learning_rate": 1.6923372723342157e-05,
      "loss": 1.0914,
      "step": 5366
    },
    {
      "epoch": 0.8178908869247181,
      "grad_norm": 1.0625,
      "learning_rate": 1.689590950915636e-05,
      "loss": 0.9797,
      "step": 5367
    },
    {
      "epoch": 0.818043279487961,
      "grad_norm": 0.99609375,
      "learning_rate": 1.686846654032207e-05,
      "loss": 0.93,
      "step": 5368
    },
    {
      "epoch": 0.8181956720512039,
      "grad_norm": 0.73046875,
      "learning_rate": 1.6841043823524782e-05,
      "loss": 0.9465,
      "step": 5369
    },
    {
      "epoch": 0.8183480646144469,
      "grad_norm": 1.2265625,
      "learning_rate": 1.6813641365445077e-05,
      "loss": 1.0937,
      "step": 5370
    },
    {
      "epoch": 0.8185004571776897,
      "grad_norm": 0.98046875,
      "learning_rate": 1.6786259172758546e-05,
      "loss": 1.0661,
      "step": 5371
    },
    {
      "epoch": 0.8186528497409327,
      "grad_norm": 0.83984375,
      "learning_rate": 1.675889725213593e-05,
      "loss": 0.9309,
      "step": 5372
    },
    {
      "epoch": 0.8188052423041755,
      "grad_norm": 1.1015625,
      "learning_rate": 1.6731555610242987e-05,
      "loss": 1.1059,
      "step": 5373
    },
    {
      "epoch": 0.8189576348674185,
      "grad_norm": 0.80078125,
      "learning_rate": 1.6704234253740515e-05,
      "loss": 0.8672,
      "step": 5374
    },
    {
      "epoch": 0.8191100274306614,
      "grad_norm": 1.0390625,
      "learning_rate": 1.6676933189284382e-05,
      "loss": 1.296,
      "step": 5375
    },
    {
      "epoch": 0.8192624199939043,
      "grad_norm": 1.125,
      "learning_rate": 1.664965242352554e-05,
      "loss": 1.1071,
      "step": 5376
    },
    {
      "epoch": 0.8194148125571472,
      "grad_norm": 0.921875,
      "learning_rate": 1.662239196310995e-05,
      "loss": 1.024,
      "step": 5377
    },
    {
      "epoch": 0.8195672051203902,
      "grad_norm": 0.93359375,
      "learning_rate": 1.659515181467869e-05,
      "loss": 0.9688,
      "step": 5378
    },
    {
      "epoch": 0.819719597683633,
      "grad_norm": 0.8203125,
      "learning_rate": 1.6567931984867846e-05,
      "loss": 0.8762,
      "step": 5379
    },
    {
      "epoch": 0.819871990246876,
      "grad_norm": 0.96875,
      "learning_rate": 1.6540732480308508e-05,
      "loss": 0.9008,
      "step": 5380
    },
    {
      "epoch": 0.8200243828101189,
      "grad_norm": 0.921875,
      "learning_rate": 1.651355330762693e-05,
      "loss": 0.8991,
      "step": 5381
    },
    {
      "epoch": 0.8201767753733618,
      "grad_norm": 0.76953125,
      "learning_rate": 1.6486394473444287e-05,
      "loss": 0.9523,
      "step": 5382
    },
    {
      "epoch": 0.8203291679366047,
      "grad_norm": 1.0703125,
      "learning_rate": 1.645925598437693e-05,
      "loss": 0.9942,
      "step": 5383
    },
    {
      "epoch": 0.8204815604998477,
      "grad_norm": 1.015625,
      "learning_rate": 1.6432137847036145e-05,
      "loss": 0.9201,
      "step": 5384
    },
    {
      "epoch": 0.8206339530630905,
      "grad_norm": 0.8671875,
      "learning_rate": 1.6405040068028322e-05,
      "loss": 0.9917,
      "step": 5385
    },
    {
      "epoch": 0.8207863456263335,
      "grad_norm": 0.7421875,
      "learning_rate": 1.6377962653954837e-05,
      "loss": 0.8651,
      "step": 5386
    },
    {
      "epoch": 0.8209387381895763,
      "grad_norm": 0.88671875,
      "learning_rate": 1.635090561141217e-05,
      "loss": 0.9349,
      "step": 5387
    },
    {
      "epoch": 0.8210911307528193,
      "grad_norm": 0.8984375,
      "learning_rate": 1.632386894699177e-05,
      "loss": 1.0318,
      "step": 5388
    },
    {
      "epoch": 0.8212435233160622,
      "grad_norm": 1.1640625,
      "learning_rate": 1.6296852667280194e-05,
      "loss": 1.2213,
      "step": 5389
    },
    {
      "epoch": 0.821395915879305,
      "grad_norm": 0.97265625,
      "learning_rate": 1.6269856778858983e-05,
      "loss": 0.9784,
      "step": 5390
    },
    {
      "epoch": 0.821548308442548,
      "grad_norm": 0.984375,
      "learning_rate": 1.6242881288304724e-05,
      "loss": 1.0818,
      "step": 5391
    },
    {
      "epoch": 0.821700701005791,
      "grad_norm": 0.80859375,
      "learning_rate": 1.6215926202189034e-05,
      "loss": 0.9376,
      "step": 5392
    },
    {
      "epoch": 0.8218530935690338,
      "grad_norm": 0.9375,
      "learning_rate": 1.6188991527078556e-05,
      "loss": 1.1004,
      "step": 5393
    },
    {
      "epoch": 0.8220054861322768,
      "grad_norm": 0.94921875,
      "learning_rate": 1.6162077269534946e-05,
      "loss": 0.913,
      "step": 5394
    },
    {
      "epoch": 0.8221578786955197,
      "grad_norm": 0.60546875,
      "learning_rate": 1.613518343611494e-05,
      "loss": 0.8228,
      "step": 5395
    },
    {
      "epoch": 0.8223102712587625,
      "grad_norm": 0.82421875,
      "learning_rate": 1.6108310033370276e-05,
      "loss": 0.8355,
      "step": 5396
    },
    {
      "epoch": 0.8224626638220055,
      "grad_norm": 0.9453125,
      "learning_rate": 1.6081457067847628e-05,
      "loss": 0.9082,
      "step": 5397
    },
    {
      "epoch": 0.8226150563852483,
      "grad_norm": 1.046875,
      "learning_rate": 1.6054624546088825e-05,
      "loss": 0.9551,
      "step": 5398
    },
    {
      "epoch": 0.8227674489484913,
      "grad_norm": 0.7578125,
      "learning_rate": 1.602781247463062e-05,
      "loss": 0.9315,
      "step": 5399
    },
    {
      "epoch": 0.8229198415117343,
      "grad_norm": 0.7890625,
      "learning_rate": 1.6001020860004857e-05,
      "loss": 0.9252,
      "step": 5400
    },
    {
      "epoch": 0.8230722340749771,
      "grad_norm": 1.0,
      "learning_rate": 1.5974249708738343e-05,
      "loss": 0.7473,
      "step": 5401
    },
    {
      "epoch": 0.82322462663822,
      "grad_norm": 0.94921875,
      "learning_rate": 1.594749902735292e-05,
      "loss": 1.0564,
      "step": 5402
    },
    {
      "epoch": 0.823377019201463,
      "grad_norm": 0.765625,
      "learning_rate": 1.5920768822365418e-05,
      "loss": 0.8634,
      "step": 5403
    },
    {
      "epoch": 0.8235294117647058,
      "grad_norm": 1.234375,
      "learning_rate": 1.5894059100287717e-05,
      "loss": 1.1469,
      "step": 5404
    },
    {
      "epoch": 0.8236818043279488,
      "grad_norm": 0.92578125,
      "learning_rate": 1.5867369867626658e-05,
      "loss": 0.8267,
      "step": 5405
    },
    {
      "epoch": 0.8238341968911918,
      "grad_norm": 0.87109375,
      "learning_rate": 1.5840701130884173e-05,
      "loss": 0.965,
      "step": 5406
    },
    {
      "epoch": 0.8239865894544346,
      "grad_norm": 1.140625,
      "learning_rate": 1.581405289655715e-05,
      "loss": 0.987,
      "step": 5407
    },
    {
      "epoch": 0.8241389820176775,
      "grad_norm": 1.1171875,
      "learning_rate": 1.5787425171137416e-05,
      "loss": 0.9468,
      "step": 5408
    },
    {
      "epoch": 0.8242913745809205,
      "grad_norm": 1.1640625,
      "learning_rate": 1.576081796111193e-05,
      "loss": 0.9598,
      "step": 5409
    },
    {
      "epoch": 0.8244437671441633,
      "grad_norm": 1.1875,
      "learning_rate": 1.573423127296256e-05,
      "loss": 1.0174,
      "step": 5410
    },
    {
      "epoch": 0.8245961597074063,
      "grad_norm": 0.94140625,
      "learning_rate": 1.5707665113166203e-05,
      "loss": 1.0607,
      "step": 5411
    },
    {
      "epoch": 0.8247485522706491,
      "grad_norm": 0.80859375,
      "learning_rate": 1.5681119488194795e-05,
      "loss": 0.8709,
      "step": 5412
    },
    {
      "epoch": 0.8249009448338921,
      "grad_norm": 1.0859375,
      "learning_rate": 1.5654594404515222e-05,
      "loss": 0.8157,
      "step": 5413
    },
    {
      "epoch": 0.825053337397135,
      "grad_norm": 0.95703125,
      "learning_rate": 1.5628089868589313e-05,
      "loss": 0.9762,
      "step": 5414
    },
    {
      "epoch": 0.8252057299603779,
      "grad_norm": 0.69140625,
      "learning_rate": 1.5601605886874037e-05,
      "loss": 0.8653,
      "step": 5415
    },
    {
      "epoch": 0.8253581225236208,
      "grad_norm": 0.86328125,
      "learning_rate": 1.5575142465821203e-05,
      "loss": 0.8214,
      "step": 5416
    },
    {
      "epoch": 0.8255105150868638,
      "grad_norm": 0.83203125,
      "learning_rate": 1.554869961187775e-05,
      "loss": 1.0568,
      "step": 5417
    },
    {
      "epoch": 0.8256629076501066,
      "grad_norm": 1.2734375,
      "learning_rate": 1.552227733148549e-05,
      "loss": 0.902,
      "step": 5418
    },
    {
      "epoch": 0.8258153002133496,
      "grad_norm": 0.92578125,
      "learning_rate": 1.549587563108129e-05,
      "loss": 0.8249,
      "step": 5419
    },
    {
      "epoch": 0.8259676927765925,
      "grad_norm": 0.92578125,
      "learning_rate": 1.5469494517096984e-05,
      "loss": 1.0116,
      "step": 5420
    },
    {
      "epoch": 0.8261200853398354,
      "grad_norm": 0.69140625,
      "learning_rate": 1.544313399595938e-05,
      "loss": 1.0023,
      "step": 5421
    },
    {
      "epoch": 0.8262724779030783,
      "grad_norm": 1.0234375,
      "learning_rate": 1.5416794074090258e-05,
      "loss": 0.9635,
      "step": 5422
    },
    {
      "epoch": 0.8264248704663213,
      "grad_norm": 1.1640625,
      "learning_rate": 1.5390474757906446e-05,
      "loss": 1.0556,
      "step": 5423
    },
    {
      "epoch": 0.8265772630295641,
      "grad_norm": 0.76171875,
      "learning_rate": 1.5364176053819723e-05,
      "loss": 0.9318,
      "step": 5424
    },
    {
      "epoch": 0.8267296555928071,
      "grad_norm": 0.84765625,
      "learning_rate": 1.5337897968236748e-05,
      "loss": 0.8791,
      "step": 5425
    },
    {
      "epoch": 0.8268820481560499,
      "grad_norm": 0.96875,
      "learning_rate": 1.531164050755932e-05,
      "loss": 1.0176,
      "step": 5426
    },
    {
      "epoch": 0.8270344407192929,
      "grad_norm": 1.046875,
      "learning_rate": 1.5285403678184094e-05,
      "loss": 1.0465,
      "step": 5427
    },
    {
      "epoch": 0.8271868332825358,
      "grad_norm": 1.046875,
      "learning_rate": 1.5259187486502734e-05,
      "loss": 1.054,
      "step": 5428
    },
    {
      "epoch": 0.8273392258457787,
      "grad_norm": 0.8828125,
      "learning_rate": 1.523299193890193e-05,
      "loss": 0.9541,
      "step": 5429
    },
    {
      "epoch": 0.8274916184090216,
      "grad_norm": 0.84375,
      "learning_rate": 1.5206817041763266e-05,
      "loss": 1.0426,
      "step": 5430
    },
    {
      "epoch": 0.8276440109722646,
      "grad_norm": 0.83984375,
      "learning_rate": 1.5180662801463286e-05,
      "loss": 0.9132,
      "step": 5431
    },
    {
      "epoch": 0.8277964035355074,
      "grad_norm": 1.0703125,
      "learning_rate": 1.5154529224373593e-05,
      "loss": 1.0521,
      "step": 5432
    },
    {
      "epoch": 0.8279487960987504,
      "grad_norm": 0.8125,
      "learning_rate": 1.512841631686065e-05,
      "loss": 0.8926,
      "step": 5433
    },
    {
      "epoch": 0.8281011886619933,
      "grad_norm": 1.0234375,
      "learning_rate": 1.510232408528599e-05,
      "loss": 1.0697,
      "step": 5434
    },
    {
      "epoch": 0.8282535812252362,
      "grad_norm": 0.87109375,
      "learning_rate": 1.5076252536006063e-05,
      "loss": 0.8564,
      "step": 5435
    },
    {
      "epoch": 0.8284059737884791,
      "grad_norm": 1.421875,
      "learning_rate": 1.50502016753722e-05,
      "loss": 1.1253,
      "step": 5436
    },
    {
      "epoch": 0.8285583663517221,
      "grad_norm": 1.1640625,
      "learning_rate": 1.5024171509730833e-05,
      "loss": 1.0882,
      "step": 5437
    },
    {
      "epoch": 0.8287107589149649,
      "grad_norm": 1.171875,
      "learning_rate": 1.4998162045423247e-05,
      "loss": 0.9287,
      "step": 5438
    },
    {
      "epoch": 0.8288631514782079,
      "grad_norm": 0.80859375,
      "learning_rate": 1.4972173288785729e-05,
      "loss": 0.8541,
      "step": 5439
    },
    {
      "epoch": 0.8290155440414507,
      "grad_norm": 0.77734375,
      "learning_rate": 1.4946205246149536e-05,
      "loss": 0.881,
      "step": 5440
    },
    {
      "epoch": 0.8291679366046937,
      "grad_norm": 0.90625,
      "learning_rate": 1.4920257923840864e-05,
      "loss": 0.9992,
      "step": 5441
    },
    {
      "epoch": 0.8293203291679366,
      "grad_norm": 0.73046875,
      "learning_rate": 1.4894331328180778e-05,
      "loss": 0.8963,
      "step": 5442
    },
    {
      "epoch": 0.8294727217311795,
      "grad_norm": 0.76953125,
      "learning_rate": 1.4868425465485458e-05,
      "loss": 0.8961,
      "step": 5443
    },
    {
      "epoch": 0.8296251142944224,
      "grad_norm": 1.21875,
      "learning_rate": 1.4842540342065892e-05,
      "loss": 0.9919,
      "step": 5444
    },
    {
      "epoch": 0.8297775068576654,
      "grad_norm": 1.140625,
      "learning_rate": 1.4816675964228077e-05,
      "loss": 0.9238,
      "step": 5445
    },
    {
      "epoch": 0.8299298994209082,
      "grad_norm": 0.796875,
      "learning_rate": 1.4790832338272975e-05,
      "loss": 0.8956,
      "step": 5446
    },
    {
      "epoch": 0.8300822919841512,
      "grad_norm": 1.1796875,
      "learning_rate": 1.4765009470496437e-05,
      "loss": 1.0498,
      "step": 5447
    },
    {
      "epoch": 0.8302346845473941,
      "grad_norm": 1.046875,
      "learning_rate": 1.4739207367189301e-05,
      "loss": 0.8938,
      "step": 5448
    },
    {
      "epoch": 0.830387077110637,
      "grad_norm": 1.1875,
      "learning_rate": 1.4713426034637323e-05,
      "loss": 1.0534,
      "step": 5449
    },
    {
      "epoch": 0.8305394696738799,
      "grad_norm": 0.9453125,
      "learning_rate": 1.4687665479121182e-05,
      "loss": 1.0177,
      "step": 5450
    },
    {
      "epoch": 0.8306918622371229,
      "grad_norm": 0.859375,
      "learning_rate": 1.4661925706916568e-05,
      "loss": 0.907,
      "step": 5451
    },
    {
      "epoch": 0.8308442548003657,
      "grad_norm": 0.9296875,
      "learning_rate": 1.4636206724294065e-05,
      "loss": 0.9588,
      "step": 5452
    },
    {
      "epoch": 0.8309966473636087,
      "grad_norm": 0.8046875,
      "learning_rate": 1.4610508537519096e-05,
      "loss": 0.9315,
      "step": 5453
    },
    {
      "epoch": 0.8311490399268515,
      "grad_norm": 1.109375,
      "learning_rate": 1.4584831152852207e-05,
      "loss": 0.8947,
      "step": 5454
    },
    {
      "epoch": 0.8313014324900945,
      "grad_norm": 0.96484375,
      "learning_rate": 1.455917457654874e-05,
      "loss": 0.9485,
      "step": 5455
    },
    {
      "epoch": 0.8314538250533374,
      "grad_norm": 0.89453125,
      "learning_rate": 1.4533538814858971e-05,
      "loss": 0.7947,
      "step": 5456
    },
    {
      "epoch": 0.8316062176165803,
      "grad_norm": 0.79296875,
      "learning_rate": 1.4507923874028218e-05,
      "loss": 0.9558,
      "step": 5457
    },
    {
      "epoch": 0.8317586101798232,
      "grad_norm": 0.80859375,
      "learning_rate": 1.448232976029662e-05,
      "loss": 0.9585,
      "step": 5458
    },
    {
      "epoch": 0.8319110027430662,
      "grad_norm": 0.89453125,
      "learning_rate": 1.4456756479899213e-05,
      "loss": 0.924,
      "step": 5459
    },
    {
      "epoch": 0.832063395306309,
      "grad_norm": 0.9375,
      "learning_rate": 1.4431204039066082e-05,
      "loss": 0.8139,
      "step": 5460
    },
    {
      "epoch": 0.832215787869552,
      "grad_norm": 1.1953125,
      "learning_rate": 1.4405672444022155e-05,
      "loss": 0.9211,
      "step": 5461
    },
    {
      "epoch": 0.8323681804327949,
      "grad_norm": 0.95703125,
      "learning_rate": 1.438016170098726e-05,
      "loss": 0.8833,
      "step": 5462
    },
    {
      "epoch": 0.8325205729960378,
      "grad_norm": 0.8984375,
      "learning_rate": 1.4354671816176268e-05,
      "loss": 1.0709,
      "step": 5463
    },
    {
      "epoch": 0.8326729655592807,
      "grad_norm": 0.890625,
      "learning_rate": 1.4329202795798769e-05,
      "loss": 0.9061,
      "step": 5464
    },
    {
      "epoch": 0.8328253581225237,
      "grad_norm": 0.84765625,
      "learning_rate": 1.4303754646059464e-05,
      "loss": 0.9293,
      "step": 5465
    },
    {
      "epoch": 0.8329777506857665,
      "grad_norm": 0.81640625,
      "learning_rate": 1.4278327373157852e-05,
      "loss": 1.0094,
      "step": 5466
    },
    {
      "epoch": 0.8331301432490095,
      "grad_norm": 0.7421875,
      "learning_rate": 1.4252920983288377e-05,
      "loss": 0.9397,
      "step": 5467
    },
    {
      "epoch": 0.8332825358122523,
      "grad_norm": 1.1171875,
      "learning_rate": 1.4227535482640442e-05,
      "loss": 1.0259,
      "step": 5468
    },
    {
      "epoch": 0.8334349283754953,
      "grad_norm": 1.1328125,
      "learning_rate": 1.4202170877398313e-05,
      "loss": 0.9017,
      "step": 5469
    },
    {
      "epoch": 0.8335873209387382,
      "grad_norm": 0.94921875,
      "learning_rate": 1.4176827173741103e-05,
      "loss": 1.0588,
      "step": 5470
    },
    {
      "epoch": 0.8337397135019811,
      "grad_norm": 1.0390625,
      "learning_rate": 1.4151504377842984e-05,
      "loss": 0.9274,
      "step": 5471
    },
    {
      "epoch": 0.833892106065224,
      "grad_norm": 0.796875,
      "learning_rate": 1.4126202495872909e-05,
      "loss": 0.854,
      "step": 5472
    },
    {
      "epoch": 0.834044498628467,
      "grad_norm": 1.0625,
      "learning_rate": 1.4100921533994783e-05,
      "loss": 1.0015,
      "step": 5473
    },
    {
      "epoch": 0.8341968911917098,
      "grad_norm": 0.91796875,
      "learning_rate": 1.4075661498367443e-05,
      "loss": 0.9241,
      "step": 5474
    },
    {
      "epoch": 0.8343492837549528,
      "grad_norm": 1.03125,
      "learning_rate": 1.4050422395144602e-05,
      "loss": 0.9129,
      "step": 5475
    },
    {
      "epoch": 0.8345016763181957,
      "grad_norm": 0.703125,
      "learning_rate": 1.4025204230474787e-05,
      "loss": 0.7512,
      "step": 5476
    },
    {
      "epoch": 0.8346540688814386,
      "grad_norm": 0.8359375,
      "learning_rate": 1.4000007010501593e-05,
      "loss": 0.8639,
      "step": 5477
    },
    {
      "epoch": 0.8348064614446815,
      "grad_norm": 0.921875,
      "learning_rate": 1.3974830741363397e-05,
      "loss": 0.8822,
      "step": 5478
    },
    {
      "epoch": 0.8349588540079245,
      "grad_norm": 0.953125,
      "learning_rate": 1.3949675429193466e-05,
      "loss": 1.0443,
      "step": 5479
    },
    {
      "epoch": 0.8351112465711673,
      "grad_norm": 1.03125,
      "learning_rate": 1.3924541080120068e-05,
      "loss": 0.9533,
      "step": 5480
    },
    {
      "epoch": 0.8352636391344103,
      "grad_norm": 1.1640625,
      "learning_rate": 1.389942770026622e-05,
      "loss": 1.0708,
      "step": 5481
    },
    {
      "epoch": 0.8354160316976531,
      "grad_norm": 0.9375,
      "learning_rate": 1.387433529574994e-05,
      "loss": 1.2199,
      "step": 5482
    },
    {
      "epoch": 0.8355684242608961,
      "grad_norm": 0.87890625,
      "learning_rate": 1.3849263872684104e-05,
      "loss": 0.9444,
      "step": 5483
    },
    {
      "epoch": 0.835720816824139,
      "grad_norm": 0.89453125,
      "learning_rate": 1.3824213437176426e-05,
      "loss": 0.8287,
      "step": 5484
    },
    {
      "epoch": 0.8358732093873819,
      "grad_norm": 0.90625,
      "learning_rate": 1.3799183995329612e-05,
      "loss": 1.1481,
      "step": 5485
    },
    {
      "epoch": 0.8360256019506248,
      "grad_norm": 0.8515625,
      "learning_rate": 1.3774175553241187e-05,
      "loss": 0.8998,
      "step": 5486
    },
    {
      "epoch": 0.8361779945138678,
      "grad_norm": 1.1171875,
      "learning_rate": 1.3749188117003508e-05,
      "loss": 0.9374,
      "step": 5487
    },
    {
      "epoch": 0.8363303870771106,
      "grad_norm": 0.9765625,
      "learning_rate": 1.3724221692703931e-05,
      "loss": 1.1712,
      "step": 5488
    },
    {
      "epoch": 0.8364827796403536,
      "grad_norm": 1.015625,
      "learning_rate": 1.3699276286424622e-05,
      "loss": 0.8272,
      "step": 5489
    },
    {
      "epoch": 0.8366351722035965,
      "grad_norm": 0.79296875,
      "learning_rate": 1.3674351904242611e-05,
      "loss": 0.9698,
      "step": 5490
    },
    {
      "epoch": 0.8367875647668394,
      "grad_norm": 1.109375,
      "learning_rate": 1.3649448552229904e-05,
      "loss": 0.8869,
      "step": 5491
    },
    {
      "epoch": 0.8369399573300823,
      "grad_norm": 0.953125,
      "learning_rate": 1.362456623645325e-05,
      "loss": 0.8287,
      "step": 5492
    },
    {
      "epoch": 0.8370923498933253,
      "grad_norm": 0.8203125,
      "learning_rate": 1.3599704962974347e-05,
      "loss": 1.0179,
      "step": 5493
    },
    {
      "epoch": 0.8372447424565681,
      "grad_norm": 0.984375,
      "learning_rate": 1.3574864737849791e-05,
      "loss": 0.8897,
      "step": 5494
    },
    {
      "epoch": 0.8373971350198111,
      "grad_norm": 0.8046875,
      "learning_rate": 1.3550045567130998e-05,
      "loss": 0.9585,
      "step": 5495
    },
    {
      "epoch": 0.8375495275830539,
      "grad_norm": 1.2265625,
      "learning_rate": 1.3525247456864255e-05,
      "loss": 1.1453,
      "step": 5496
    },
    {
      "epoch": 0.8377019201462969,
      "grad_norm": 1.046875,
      "learning_rate": 1.3500470413090804e-05,
      "loss": 0.9262,
      "step": 5497
    },
    {
      "epoch": 0.8378543127095398,
      "grad_norm": 0.73046875,
      "learning_rate": 1.347571444184661e-05,
      "loss": 0.7524,
      "step": 5498
    },
    {
      "epoch": 0.8380067052727826,
      "grad_norm": 0.90234375,
      "learning_rate": 1.3450979549162645e-05,
      "loss": 0.9434,
      "step": 5499
    },
    {
      "epoch": 0.8381590978360256,
      "grad_norm": 0.98828125,
      "learning_rate": 1.3426265741064648e-05,
      "loss": 0.9513,
      "step": 5500
    },
    {
      "epoch": 0.8383114903992686,
      "grad_norm": 0.81640625,
      "learning_rate": 1.3401573023573256e-05,
      "loss": 0.894,
      "step": 5501
    },
    {
      "epoch": 0.8384638829625114,
      "grad_norm": 1.34375,
      "learning_rate": 1.3376901402704e-05,
      "loss": 0.8795,
      "step": 5502
    },
    {
      "epoch": 0.8386162755257544,
      "grad_norm": 0.82421875,
      "learning_rate": 1.3352250884467244e-05,
      "loss": 0.8694,
      "step": 5503
    },
    {
      "epoch": 0.8387686680889973,
      "grad_norm": 0.9453125,
      "learning_rate": 1.3327621474868158e-05,
      "loss": 1.007,
      "step": 5504
    },
    {
      "epoch": 0.8389210606522401,
      "grad_norm": 0.83984375,
      "learning_rate": 1.3303013179906864e-05,
      "loss": 0.9994,
      "step": 5505
    },
    {
      "epoch": 0.8390734532154831,
      "grad_norm": 1.375,
      "learning_rate": 1.3278426005578282e-05,
      "loss": 0.9929,
      "step": 5506
    },
    {
      "epoch": 0.8392258457787259,
      "grad_norm": 0.87109375,
      "learning_rate": 1.3253859957872184e-05,
      "loss": 0.9619,
      "step": 5507
    },
    {
      "epoch": 0.8393782383419689,
      "grad_norm": 1.1484375,
      "learning_rate": 1.3229315042773283e-05,
      "loss": 1.1728,
      "step": 5508
    },
    {
      "epoch": 0.8395306309052118,
      "grad_norm": 1.21875,
      "learning_rate": 1.3204791266260997e-05,
      "loss": 1.0762,
      "step": 5509
    },
    {
      "epoch": 0.8396830234684547,
      "grad_norm": 0.71875,
      "learning_rate": 1.318028863430968e-05,
      "loss": 0.9116,
      "step": 5510
    },
    {
      "epoch": 0.8398354160316976,
      "grad_norm": 1.0390625,
      "learning_rate": 1.3155807152888555e-05,
      "loss": 0.8256,
      "step": 5511
    },
    {
      "epoch": 0.8399878085949406,
      "grad_norm": 1.2109375,
      "learning_rate": 1.3131346827961643e-05,
      "loss": 0.9006,
      "step": 5512
    },
    {
      "epoch": 0.8401402011581834,
      "grad_norm": 1.03125,
      "learning_rate": 1.3106907665487833e-05,
      "loss": 0.9902,
      "step": 5513
    },
    {
      "epoch": 0.8402925937214264,
      "grad_norm": 0.8203125,
      "learning_rate": 1.3082489671420895e-05,
      "loss": 1.0671,
      "step": 5514
    },
    {
      "epoch": 0.8404449862846693,
      "grad_norm": 0.828125,
      "learning_rate": 1.3058092851709324e-05,
      "loss": 0.9776,
      "step": 5515
    },
    {
      "epoch": 0.8405973788479122,
      "grad_norm": 0.859375,
      "learning_rate": 1.3033717212296614e-05,
      "loss": 0.8134,
      "step": 5516
    },
    {
      "epoch": 0.8407497714111551,
      "grad_norm": 0.95703125,
      "learning_rate": 1.300936275912098e-05,
      "loss": 0.9199,
      "step": 5517
    },
    {
      "epoch": 0.8409021639743981,
      "grad_norm": 0.8125,
      "learning_rate": 1.2985029498115497e-05,
      "loss": 1.0132,
      "step": 5518
    },
    {
      "epoch": 0.8410545565376409,
      "grad_norm": 1.1875,
      "learning_rate": 1.296071743520818e-05,
      "loss": 1.0406,
      "step": 5519
    },
    {
      "epoch": 0.8412069491008839,
      "grad_norm": 0.77734375,
      "learning_rate": 1.2936426576321725e-05,
      "loss": 0.7679,
      "step": 5520
    },
    {
      "epoch": 0.8413593416641267,
      "grad_norm": 0.79296875,
      "learning_rate": 1.291215692737373e-05,
      "loss": 0.876,
      "step": 5521
    },
    {
      "epoch": 0.8415117342273697,
      "grad_norm": 1.0390625,
      "learning_rate": 1.2887908494276668e-05,
      "loss": 0.9058,
      "step": 5522
    },
    {
      "epoch": 0.8416641267906126,
      "grad_norm": 0.69140625,
      "learning_rate": 1.286368128293779e-05,
      "loss": 1.0034,
      "step": 5523
    },
    {
      "epoch": 0.8418165193538555,
      "grad_norm": 0.8671875,
      "learning_rate": 1.2839475299259184e-05,
      "loss": 0.9818,
      "step": 5524
    },
    {
      "epoch": 0.8419689119170984,
      "grad_norm": 1.0859375,
      "learning_rate": 1.2815290549137814e-05,
      "loss": 0.9779,
      "step": 5525
    },
    {
      "epoch": 0.8421213044803414,
      "grad_norm": 0.8359375,
      "learning_rate": 1.27911270384654e-05,
      "loss": 1.0428,
      "step": 5526
    },
    {
      "epoch": 0.8422736970435842,
      "grad_norm": 0.8828125,
      "learning_rate": 1.2766984773128499e-05,
      "loss": 0.9785,
      "step": 5527
    },
    {
      "epoch": 0.8424260896068272,
      "grad_norm": 1.3046875,
      "learning_rate": 1.274286375900856e-05,
      "loss": 1.0729,
      "step": 5528
    },
    {
      "epoch": 0.8425784821700701,
      "grad_norm": 0.71875,
      "learning_rate": 1.2718764001981765e-05,
      "loss": 0.858,
      "step": 5529
    },
    {
      "epoch": 0.842730874733313,
      "grad_norm": 0.8984375,
      "learning_rate": 1.2694685507919214e-05,
      "loss": 1.0921,
      "step": 5530
    },
    {
      "epoch": 0.8428832672965559,
      "grad_norm": 1.234375,
      "learning_rate": 1.2670628282686758e-05,
      "loss": 0.8992,
      "step": 5531
    },
    {
      "epoch": 0.8430356598597989,
      "grad_norm": 0.97265625,
      "learning_rate": 1.2646592332145036e-05,
      "loss": 1.0191,
      "step": 5532
    },
    {
      "epoch": 0.8431880524230417,
      "grad_norm": 1.0078125,
      "learning_rate": 1.2622577662149604e-05,
      "loss": 0.9701,
      "step": 5533
    },
    {
      "epoch": 0.8433404449862847,
      "grad_norm": 0.84375,
      "learning_rate": 1.2598584278550774e-05,
      "loss": 1.0749,
      "step": 5534
    },
    {
      "epoch": 0.8434928375495275,
      "grad_norm": 0.9921875,
      "learning_rate": 1.2574612187193635e-05,
      "loss": 0.9006,
      "step": 5535
    },
    {
      "epoch": 0.8436452301127705,
      "grad_norm": 1.09375,
      "learning_rate": 1.2550661393918217e-05,
      "loss": 1.0322,
      "step": 5536
    },
    {
      "epoch": 0.8437976226760134,
      "grad_norm": 1.1640625,
      "learning_rate": 1.2526731904559208e-05,
      "loss": 0.9993,
      "step": 5537
    },
    {
      "epoch": 0.8439500152392563,
      "grad_norm": 0.88671875,
      "learning_rate": 1.2502823724946166e-05,
      "loss": 0.9761,
      "step": 5538
    },
    {
      "epoch": 0.8441024078024992,
      "grad_norm": 0.9765625,
      "learning_rate": 1.2478936860903524e-05,
      "loss": 0.8196,
      "step": 5539
    },
    {
      "epoch": 0.8442548003657422,
      "grad_norm": 1.0078125,
      "learning_rate": 1.2455071318250434e-05,
      "loss": 1.0211,
      "step": 5540
    },
    {
      "epoch": 0.844407192928985,
      "grad_norm": 0.73046875,
      "learning_rate": 1.2431227102800868e-05,
      "loss": 0.9895,
      "step": 5541
    },
    {
      "epoch": 0.844559585492228,
      "grad_norm": 0.93359375,
      "learning_rate": 1.2407404220363694e-05,
      "loss": 0.952,
      "step": 5542
    },
    {
      "epoch": 0.8447119780554709,
      "grad_norm": 1.1015625,
      "learning_rate": 1.2383602676742423e-05,
      "loss": 0.8685,
      "step": 5543
    },
    {
      "epoch": 0.8448643706187138,
      "grad_norm": 0.9609375,
      "learning_rate": 1.2359822477735473e-05,
      "loss": 1.0265,
      "step": 5544
    },
    {
      "epoch": 0.8450167631819567,
      "grad_norm": 0.9296875,
      "learning_rate": 1.2336063629136074e-05,
      "loss": 0.8076,
      "step": 5545
    },
    {
      "epoch": 0.8451691557451997,
      "grad_norm": 1.0078125,
      "learning_rate": 1.2312326136732189e-05,
      "loss": 0.9036,
      "step": 5546
    },
    {
      "epoch": 0.8453215483084425,
      "grad_norm": 1.2265625,
      "learning_rate": 1.2288610006306667e-05,
      "loss": 0.9612,
      "step": 5547
    },
    {
      "epoch": 0.8454739408716855,
      "grad_norm": 0.8203125,
      "learning_rate": 1.2264915243637031e-05,
      "loss": 1.0481,
      "step": 5548
    },
    {
      "epoch": 0.8456263334349283,
      "grad_norm": 1.203125,
      "learning_rate": 1.2241241854495688e-05,
      "loss": 1.0676,
      "step": 5549
    },
    {
      "epoch": 0.8457787259981713,
      "grad_norm": 1.234375,
      "learning_rate": 1.2217589844649835e-05,
      "loss": 1.0925,
      "step": 5550
    },
    {
      "epoch": 0.8459311185614142,
      "grad_norm": 0.8828125,
      "learning_rate": 1.2193959219861428e-05,
      "loss": 0.8478,
      "step": 5551
    },
    {
      "epoch": 0.8460835111246571,
      "grad_norm": 1.0,
      "learning_rate": 1.217034998588722e-05,
      "loss": 1.1401,
      "step": 5552
    },
    {
      "epoch": 0.8462359036879,
      "grad_norm": 0.8828125,
      "learning_rate": 1.2146762148478797e-05,
      "loss": 0.9297,
      "step": 5553
    },
    {
      "epoch": 0.846388296251143,
      "grad_norm": 0.89453125,
      "learning_rate": 1.2123195713382452e-05,
      "loss": 0.9734,
      "step": 5554
    },
    {
      "epoch": 0.8465406888143858,
      "grad_norm": 1.1015625,
      "learning_rate": 1.2099650686339303e-05,
      "loss": 1.0282,
      "step": 5555
    },
    {
      "epoch": 0.8466930813776288,
      "grad_norm": 0.9765625,
      "learning_rate": 1.2076127073085298e-05,
      "loss": 1.0038,
      "step": 5556
    },
    {
      "epoch": 0.8468454739408717,
      "grad_norm": 0.83984375,
      "learning_rate": 1.2052624879351104e-05,
      "loss": 0.7453,
      "step": 5557
    },
    {
      "epoch": 0.8469978665041146,
      "grad_norm": 0.7265625,
      "learning_rate": 1.2029144110862168e-05,
      "loss": 1.0342,
      "step": 5558
    },
    {
      "epoch": 0.8471502590673575,
      "grad_norm": 0.9765625,
      "learning_rate": 1.2005684773338821e-05,
      "loss": 1.0386,
      "step": 5559
    },
    {
      "epoch": 0.8473026516306005,
      "grad_norm": 0.85546875,
      "learning_rate": 1.1982246872496028e-05,
      "loss": 0.9362,
      "step": 5560
    },
    {
      "epoch": 0.8474550441938433,
      "grad_norm": 0.94140625,
      "learning_rate": 1.1958830414043588e-05,
      "loss": 0.8891,
      "step": 5561
    },
    {
      "epoch": 0.8476074367570863,
      "grad_norm": 0.90625,
      "learning_rate": 1.1935435403686135e-05,
      "loss": 0.9709,
      "step": 5562
    },
    {
      "epoch": 0.8477598293203291,
      "grad_norm": 0.79296875,
      "learning_rate": 1.1912061847122979e-05,
      "loss": 1.041,
      "step": 5563
    },
    {
      "epoch": 0.8479122218835721,
      "grad_norm": 0.98046875,
      "learning_rate": 1.1888709750048344e-05,
      "loss": 0.8181,
      "step": 5564
    },
    {
      "epoch": 0.848064614446815,
      "grad_norm": 0.91015625,
      "learning_rate": 1.1865379118151043e-05,
      "loss": 0.9605,
      "step": 5565
    },
    {
      "epoch": 0.8482170070100579,
      "grad_norm": 1.046875,
      "learning_rate": 1.1842069957114777e-05,
      "loss": 0.9795,
      "step": 5566
    },
    {
      "epoch": 0.8483693995733008,
      "grad_norm": 0.8359375,
      "learning_rate": 1.1818782272618012e-05,
      "loss": 0.8936,
      "step": 5567
    },
    {
      "epoch": 0.8485217921365438,
      "grad_norm": 0.97265625,
      "learning_rate": 1.1795516070333966e-05,
      "loss": 0.9222,
      "step": 5568
    },
    {
      "epoch": 0.8486741846997866,
      "grad_norm": 0.8828125,
      "learning_rate": 1.1772271355930576e-05,
      "loss": 1.033,
      "step": 5569
    },
    {
      "epoch": 0.8488265772630296,
      "grad_norm": 0.82421875,
      "learning_rate": 1.1749048135070673e-05,
      "loss": 0.8671,
      "step": 5570
    },
    {
      "epoch": 0.8489789698262725,
      "grad_norm": 0.8828125,
      "learning_rate": 1.172584641341169e-05,
      "loss": 0.9392,
      "step": 5571
    },
    {
      "epoch": 0.8491313623895154,
      "grad_norm": 1.1796875,
      "learning_rate": 1.1702666196605904e-05,
      "loss": 0.9637,
      "step": 5572
    },
    {
      "epoch": 0.8492837549527583,
      "grad_norm": 0.98046875,
      "learning_rate": 1.1679507490300402e-05,
      "loss": 1.1592,
      "step": 5573
    },
    {
      "epoch": 0.8494361475160013,
      "grad_norm": 0.92578125,
      "learning_rate": 1.1656370300136943e-05,
      "loss": 1.0019,
      "step": 5574
    },
    {
      "epoch": 0.8495885400792441,
      "grad_norm": 1.265625,
      "learning_rate": 1.1633254631752077e-05,
      "loss": 1.0232,
      "step": 5575
    },
    {
      "epoch": 0.8497409326424871,
      "grad_norm": 1.421875,
      "learning_rate": 1.1610160490777122e-05,
      "loss": 1.0041,
      "step": 5576
    },
    {
      "epoch": 0.8498933252057299,
      "grad_norm": 1.0234375,
      "learning_rate": 1.1587087882838144e-05,
      "loss": 0.8978,
      "step": 5577
    },
    {
      "epoch": 0.8500457177689729,
      "grad_norm": 0.98046875,
      "learning_rate": 1.1564036813555933e-05,
      "loss": 1.0311,
      "step": 5578
    },
    {
      "epoch": 0.8501981103322158,
      "grad_norm": 0.8984375,
      "learning_rate": 1.1541007288546113e-05,
      "loss": 0.9692,
      "step": 5579
    },
    {
      "epoch": 0.8503505028954587,
      "grad_norm": 0.91796875,
      "learning_rate": 1.1517999313418948e-05,
      "loss": 1.0233,
      "step": 5580
    },
    {
      "epoch": 0.8505028954587016,
      "grad_norm": 1.09375,
      "learning_rate": 1.14950128937796e-05,
      "loss": 0.9413,
      "step": 5581
    },
    {
      "epoch": 0.8506552880219446,
      "grad_norm": 0.86328125,
      "learning_rate": 1.1472048035227812e-05,
      "loss": 0.9876,
      "step": 5582
    },
    {
      "epoch": 0.8508076805851874,
      "grad_norm": 0.69140625,
      "learning_rate": 1.1449104743358152e-05,
      "loss": 0.7341,
      "step": 5583
    },
    {
      "epoch": 0.8509600731484304,
      "grad_norm": 1.0546875,
      "learning_rate": 1.1426183023759985e-05,
      "loss": 1.0275,
      "step": 5584
    },
    {
      "epoch": 0.8511124657116733,
      "grad_norm": 0.8984375,
      "learning_rate": 1.1403282882017341e-05,
      "loss": 1.1008,
      "step": 5585
    },
    {
      "epoch": 0.8512648582749162,
      "grad_norm": 0.765625,
      "learning_rate": 1.1380404323709016e-05,
      "loss": 0.9308,
      "step": 5586
    },
    {
      "epoch": 0.8514172508381591,
      "grad_norm": 0.69140625,
      "learning_rate": 1.1357547354408615e-05,
      "loss": 0.9493,
      "step": 5587
    },
    {
      "epoch": 0.8515696434014021,
      "grad_norm": 1.2265625,
      "learning_rate": 1.1334711979684353e-05,
      "loss": 0.9544,
      "step": 5588
    },
    {
      "epoch": 0.8517220359646449,
      "grad_norm": 0.984375,
      "learning_rate": 1.1311898205099269e-05,
      "loss": 0.989,
      "step": 5589
    },
    {
      "epoch": 0.8518744285278879,
      "grad_norm": 0.94140625,
      "learning_rate": 1.1289106036211161e-05,
      "loss": 0.9087,
      "step": 5590
    },
    {
      "epoch": 0.8520268210911307,
      "grad_norm": 1.0703125,
      "learning_rate": 1.1266335478572499e-05,
      "loss": 1.063,
      "step": 5591
    },
    {
      "epoch": 0.8521792136543737,
      "grad_norm": 0.890625,
      "learning_rate": 1.1243586537730532e-05,
      "loss": 0.9394,
      "step": 5592
    },
    {
      "epoch": 0.8523316062176166,
      "grad_norm": 1.0390625,
      "learning_rate": 1.1220859219227232e-05,
      "loss": 1.0027,
      "step": 5593
    },
    {
      "epoch": 0.8524839987808595,
      "grad_norm": 0.94921875,
      "learning_rate": 1.1198153528599265e-05,
      "loss": 1.0061,
      "step": 5594
    },
    {
      "epoch": 0.8526363913441024,
      "grad_norm": 0.8515625,
      "learning_rate": 1.117546947137812e-05,
      "loss": 0.9681,
      "step": 5595
    },
    {
      "epoch": 0.8527887839073454,
      "grad_norm": 0.9453125,
      "learning_rate": 1.115280705308992e-05,
      "loss": 0.9432,
      "step": 5596
    },
    {
      "epoch": 0.8529411764705882,
      "grad_norm": 0.8671875,
      "learning_rate": 1.113016627925556e-05,
      "loss": 1.0561,
      "step": 5597
    },
    {
      "epoch": 0.8530935690338312,
      "grad_norm": 1.109375,
      "learning_rate": 1.1107547155390697e-05,
      "loss": 0.9568,
      "step": 5598
    },
    {
      "epoch": 0.8532459615970741,
      "grad_norm": 1.046875,
      "learning_rate": 1.108494968700563e-05,
      "loss": 0.9872,
      "step": 5599
    },
    {
      "epoch": 0.853398354160317,
      "grad_norm": 0.96875,
      "learning_rate": 1.1062373879605415e-05,
      "loss": 0.9743,
      "step": 5600
    },
    {
      "epoch": 0.8535507467235599,
      "grad_norm": 1.1015625,
      "learning_rate": 1.1039819738689894e-05,
      "loss": 0.9977,
      "step": 5601
    },
    {
      "epoch": 0.8537031392868029,
      "grad_norm": 0.9453125,
      "learning_rate": 1.101728726975355e-05,
      "loss": 1.0726,
      "step": 5602
    },
    {
      "epoch": 0.8538555318500457,
      "grad_norm": 1.0625,
      "learning_rate": 1.0994776478285618e-05,
      "loss": 1.0772,
      "step": 5603
    },
    {
      "epoch": 0.8540079244132887,
      "grad_norm": 0.91796875,
      "learning_rate": 1.0972287369770062e-05,
      "loss": 0.9646,
      "step": 5604
    },
    {
      "epoch": 0.8541603169765315,
      "grad_norm": 1.203125,
      "learning_rate": 1.0949819949685546e-05,
      "loss": 1.0655,
      "step": 5605
    },
    {
      "epoch": 0.8543127095397745,
      "grad_norm": 1.0390625,
      "learning_rate": 1.0927374223505427e-05,
      "loss": 1.0552,
      "step": 5606
    },
    {
      "epoch": 0.8544651021030174,
      "grad_norm": 1.015625,
      "learning_rate": 1.090495019669786e-05,
      "loss": 1.1305,
      "step": 5607
    },
    {
      "epoch": 0.8546174946662602,
      "grad_norm": 0.859375,
      "learning_rate": 1.0882547874725636e-05,
      "loss": 0.6881,
      "step": 5608
    },
    {
      "epoch": 0.8547698872295032,
      "grad_norm": 0.90625,
      "learning_rate": 1.0860167263046283e-05,
      "loss": 0.9635,
      "step": 5609
    },
    {
      "epoch": 0.8549222797927462,
      "grad_norm": 1.09375,
      "learning_rate": 1.0837808367112035e-05,
      "loss": 1.0671,
      "step": 5610
    },
    {
      "epoch": 0.855074672355989,
      "grad_norm": 0.7890625,
      "learning_rate": 1.0815471192369831e-05,
      "loss": 0.9076,
      "step": 5611
    },
    {
      "epoch": 0.855227064919232,
      "grad_norm": 1.046875,
      "learning_rate": 1.0793155744261351e-05,
      "loss": 0.9665,
      "step": 5612
    },
    {
      "epoch": 0.8553794574824749,
      "grad_norm": 0.91015625,
      "learning_rate": 1.077086202822295e-05,
      "loss": 0.961,
      "step": 5613
    },
    {
      "epoch": 0.8555318500457177,
      "grad_norm": 1.1328125,
      "learning_rate": 1.074859004968568e-05,
      "loss": 0.9189,
      "step": 5614
    },
    {
      "epoch": 0.8556842426089607,
      "grad_norm": 0.921875,
      "learning_rate": 1.072633981407538e-05,
      "loss": 0.9699,
      "step": 5615
    },
    {
      "epoch": 0.8558366351722035,
      "grad_norm": 0.7421875,
      "learning_rate": 1.070411132681245e-05,
      "loss": 0.8177,
      "step": 5616
    },
    {
      "epoch": 0.8559890277354465,
      "grad_norm": 1.0390625,
      "learning_rate": 1.0681904593312086e-05,
      "loss": 1.157,
      "step": 5617
    },
    {
      "epoch": 0.8561414202986894,
      "grad_norm": 0.8359375,
      "learning_rate": 1.06597196189842e-05,
      "loss": 1.0172,
      "step": 5618
    },
    {
      "epoch": 0.8562938128619323,
      "grad_norm": 0.87890625,
      "learning_rate": 1.063755640923334e-05,
      "loss": 0.7784,
      "step": 5619
    },
    {
      "epoch": 0.8564462054251752,
      "grad_norm": 1.0390625,
      "learning_rate": 1.0615414969458803e-05,
      "loss": 1.0682,
      "step": 5620
    },
    {
      "epoch": 0.8565985979884182,
      "grad_norm": 0.86328125,
      "learning_rate": 1.059329530505455e-05,
      "loss": 0.7792,
      "step": 5621
    },
    {
      "epoch": 0.856750990551661,
      "grad_norm": 0.8046875,
      "learning_rate": 1.0571197421409262e-05,
      "loss": 0.8116,
      "step": 5622
    },
    {
      "epoch": 0.856903383114904,
      "grad_norm": 0.98046875,
      "learning_rate": 1.0549121323906264e-05,
      "loss": 1.0371,
      "step": 5623
    },
    {
      "epoch": 0.857055775678147,
      "grad_norm": 0.671875,
      "learning_rate": 1.0527067017923654e-05,
      "loss": 0.8898,
      "step": 5624
    },
    {
      "epoch": 0.8572081682413898,
      "grad_norm": 0.8203125,
      "learning_rate": 1.0505034508834167e-05,
      "loss": 0.9695,
      "step": 5625
    },
    {
      "epoch": 0.8573605608046327,
      "grad_norm": 0.83203125,
      "learning_rate": 1.0483023802005242e-05,
      "loss": 0.9801,
      "step": 5626
    },
    {
      "epoch": 0.8575129533678757,
      "grad_norm": 0.80859375,
      "learning_rate": 1.0461034902798983e-05,
      "loss": 0.8961,
      "step": 5627
    },
    {
      "epoch": 0.8576653459311185,
      "grad_norm": 0.8671875,
      "learning_rate": 1.0439067816572202e-05,
      "loss": 0.955,
      "step": 5628
    },
    {
      "epoch": 0.8578177384943615,
      "grad_norm": 0.84765625,
      "learning_rate": 1.0417122548676428e-05,
      "loss": 1.0367,
      "step": 5629
    },
    {
      "epoch": 0.8579701310576043,
      "grad_norm": 1.1640625,
      "learning_rate": 1.0395199104457832e-05,
      "loss": 1.0246,
      "step": 5630
    },
    {
      "epoch": 0.8581225236208473,
      "grad_norm": 0.91796875,
      "learning_rate": 1.0373297489257272e-05,
      "loss": 0.9777,
      "step": 5631
    },
    {
      "epoch": 0.8582749161840902,
      "grad_norm": 1.015625,
      "learning_rate": 1.0351417708410293e-05,
      "loss": 1.1825,
      "step": 5632
    },
    {
      "epoch": 0.8584273087473331,
      "grad_norm": 0.97265625,
      "learning_rate": 1.0329559767247132e-05,
      "loss": 0.9841,
      "step": 5633
    },
    {
      "epoch": 0.858579701310576,
      "grad_norm": 1.0234375,
      "learning_rate": 1.0307723671092684e-05,
      "loss": 0.9297,
      "step": 5634
    },
    {
      "epoch": 0.858732093873819,
      "grad_norm": 0.80859375,
      "learning_rate": 1.028590942526656e-05,
      "loss": 0.9101,
      "step": 5635
    },
    {
      "epoch": 0.8588844864370618,
      "grad_norm": 1.015625,
      "learning_rate": 1.0264117035083009e-05,
      "loss": 1.0037,
      "step": 5636
    },
    {
      "epoch": 0.8590368790003048,
      "grad_norm": 0.91015625,
      "learning_rate": 1.0242346505850986e-05,
      "loss": 0.9569,
      "step": 5637
    },
    {
      "epoch": 0.8591892715635477,
      "grad_norm": 0.89453125,
      "learning_rate": 1.022059784287408e-05,
      "loss": 0.9054,
      "step": 5638
    },
    {
      "epoch": 0.8593416641267906,
      "grad_norm": 0.93359375,
      "learning_rate": 1.0198871051450598e-05,
      "loss": 0.9091,
      "step": 5639
    },
    {
      "epoch": 0.8594940566900335,
      "grad_norm": 0.828125,
      "learning_rate": 1.0177166136873472e-05,
      "loss": 0.9102,
      "step": 5640
    },
    {
      "epoch": 0.8596464492532765,
      "grad_norm": 0.7734375,
      "learning_rate": 1.0155483104430375e-05,
      "loss": 0.937,
      "step": 5641
    },
    {
      "epoch": 0.8597988418165193,
      "grad_norm": 0.80078125,
      "learning_rate": 1.0133821959403567e-05,
      "loss": 0.8785,
      "step": 5642
    },
    {
      "epoch": 0.8599512343797623,
      "grad_norm": 0.79296875,
      "learning_rate": 1.011218270707004e-05,
      "loss": 0.9855,
      "step": 5643
    },
    {
      "epoch": 0.8601036269430051,
      "grad_norm": 1.0234375,
      "learning_rate": 1.009056535270141e-05,
      "loss": 1.0289,
      "step": 5644
    },
    {
      "epoch": 0.8602560195062481,
      "grad_norm": 0.8828125,
      "learning_rate": 1.0068969901563963e-05,
      "loss": 1.0677,
      "step": 5645
    },
    {
      "epoch": 0.860408412069491,
      "grad_norm": 0.9140625,
      "learning_rate": 1.0047396358918704e-05,
      "loss": 0.8837,
      "step": 5646
    },
    {
      "epoch": 0.8605608046327339,
      "grad_norm": 1.3984375,
      "learning_rate": 1.0025844730021228e-05,
      "loss": 0.9601,
      "step": 5647
    },
    {
      "epoch": 0.8607131971959768,
      "grad_norm": 0.98046875,
      "learning_rate": 1.0004315020121835e-05,
      "loss": 0.9757,
      "step": 5648
    },
    {
      "epoch": 0.8608655897592198,
      "grad_norm": 0.90625,
      "learning_rate": 9.982807234465452e-06,
      "loss": 1.1106,
      "step": 5649
    },
    {
      "epoch": 0.8610179823224626,
      "grad_norm": 0.83203125,
      "learning_rate": 9.96132137829171e-06,
      "loss": 0.8327,
      "step": 5650
    },
    {
      "epoch": 0.8611703748857056,
      "grad_norm": 0.92578125,
      "learning_rate": 9.939857456834833e-06,
      "loss": 1.0278,
      "step": 5651
    },
    {
      "epoch": 0.8613227674489485,
      "grad_norm": 1.1484375,
      "learning_rate": 9.918415475323783e-06,
      "loss": 0.9308,
      "step": 5652
    },
    {
      "epoch": 0.8614751600121914,
      "grad_norm": 1.1875,
      "learning_rate": 9.896995438982126e-06,
      "loss": 0.8945,
      "step": 5653
    },
    {
      "epoch": 0.8616275525754343,
      "grad_norm": 1.4296875,
      "learning_rate": 9.875597353028088e-06,
      "loss": 1.1655,
      "step": 5654
    },
    {
      "epoch": 0.8617799451386773,
      "grad_norm": 1.1953125,
      "learning_rate": 9.854221222674542e-06,
      "loss": 0.8937,
      "step": 5655
    },
    {
      "epoch": 0.8619323377019201,
      "grad_norm": 0.88671875,
      "learning_rate": 9.832867053129035e-06,
      "loss": 0.7708,
      "step": 5656
    },
    {
      "epoch": 0.8620847302651631,
      "grad_norm": 0.86328125,
      "learning_rate": 9.811534849593706e-06,
      "loss": 1.0253,
      "step": 5657
    },
    {
      "epoch": 0.8622371228284059,
      "grad_norm": 0.9296875,
      "learning_rate": 9.790224617265454e-06,
      "loss": 1.0352,
      "step": 5658
    },
    {
      "epoch": 0.8623895153916489,
      "grad_norm": 0.671875,
      "learning_rate": 9.768936361335723e-06,
      "loss": 0.899,
      "step": 5659
    },
    {
      "epoch": 0.8625419079548918,
      "grad_norm": 0.953125,
      "learning_rate": 9.747670086990634e-06,
      "loss": 0.971,
      "step": 5660
    },
    {
      "epoch": 0.8626943005181347,
      "grad_norm": 1.28125,
      "learning_rate": 9.726425799410965e-06,
      "loss": 0.9722,
      "step": 5661
    },
    {
      "epoch": 0.8628466930813776,
      "grad_norm": 0.93359375,
      "learning_rate": 9.705203503772108e-06,
      "loss": 1.0125,
      "step": 5662
    },
    {
      "epoch": 0.8629990856446206,
      "grad_norm": 0.94140625,
      "learning_rate": 9.684003205244175e-06,
      "loss": 1.0855,
      "step": 5663
    },
    {
      "epoch": 0.8631514782078634,
      "grad_norm": 0.9296875,
      "learning_rate": 9.662824908991807e-06,
      "loss": 0.9319,
      "step": 5664
    },
    {
      "epoch": 0.8633038707711064,
      "grad_norm": 0.84765625,
      "learning_rate": 9.641668620174382e-06,
      "loss": 0.88,
      "step": 5665
    },
    {
      "epoch": 0.8634562633343493,
      "grad_norm": 1.171875,
      "learning_rate": 9.620534343945841e-06,
      "loss": 1.0486,
      "step": 5666
    },
    {
      "epoch": 0.8636086558975922,
      "grad_norm": 0.90234375,
      "learning_rate": 9.599422085454823e-06,
      "loss": 0.9671,
      "step": 5667
    },
    {
      "epoch": 0.8637610484608351,
      "grad_norm": 1.15625,
      "learning_rate": 9.578331849844547e-06,
      "loss": 0.9146,
      "step": 5668
    },
    {
      "epoch": 0.8639134410240781,
      "grad_norm": 1.1171875,
      "learning_rate": 9.557263642252945e-06,
      "loss": 0.9713,
      "step": 5669
    },
    {
      "epoch": 0.8640658335873209,
      "grad_norm": 0.83203125,
      "learning_rate": 9.536217467812502e-06,
      "loss": 0.8786,
      "step": 5670
    },
    {
      "epoch": 0.8642182261505639,
      "grad_norm": 0.9453125,
      "learning_rate": 9.515193331650385e-06,
      "loss": 0.9481,
      "step": 5671
    },
    {
      "epoch": 0.8643706187138067,
      "grad_norm": 1.046875,
      "learning_rate": 9.494191238888362e-06,
      "loss": 0.9619,
      "step": 5672
    },
    {
      "epoch": 0.8645230112770497,
      "grad_norm": 0.91015625,
      "learning_rate": 9.47321119464285e-06,
      "loss": 1.0824,
      "step": 5673
    },
    {
      "epoch": 0.8646754038402926,
      "grad_norm": 0.90234375,
      "learning_rate": 9.452253204024864e-06,
      "loss": 1.0373,
      "step": 5674
    },
    {
      "epoch": 0.8648277964035355,
      "grad_norm": 1.140625,
      "learning_rate": 9.431317272140128e-06,
      "loss": 1.1171,
      "step": 5675
    },
    {
      "epoch": 0.8649801889667784,
      "grad_norm": 1.2421875,
      "learning_rate": 9.410403404088896e-06,
      "loss": 1.0427,
      "step": 5676
    },
    {
      "epoch": 0.8651325815300214,
      "grad_norm": 1.078125,
      "learning_rate": 9.389511604966105e-06,
      "loss": 0.9861,
      "step": 5677
    },
    {
      "epoch": 0.8652849740932642,
      "grad_norm": 0.9140625,
      "learning_rate": 9.368641879861284e-06,
      "loss": 0.8994,
      "step": 5678
    },
    {
      "epoch": 0.8654373666565072,
      "grad_norm": 0.8359375,
      "learning_rate": 9.347794233858598e-06,
      "loss": 1.0241,
      "step": 5679
    },
    {
      "epoch": 0.8655897592197501,
      "grad_norm": 1.609375,
      "learning_rate": 9.32696867203684e-06,
      "loss": 1.0427,
      "step": 5680
    },
    {
      "epoch": 0.865742151782993,
      "grad_norm": 0.81640625,
      "learning_rate": 9.306165199469418e-06,
      "loss": 0.9216,
      "step": 5681
    },
    {
      "epoch": 0.8658945443462359,
      "grad_norm": 0.828125,
      "learning_rate": 9.28538382122437e-06,
      "loss": 0.815,
      "step": 5682
    },
    {
      "epoch": 0.8660469369094789,
      "grad_norm": 1.078125,
      "learning_rate": 9.264624542364309e-06,
      "loss": 1.0682,
      "step": 5683
    },
    {
      "epoch": 0.8661993294727217,
      "grad_norm": 1.09375,
      "learning_rate": 9.243887367946514e-06,
      "loss": 0.9227,
      "step": 5684
    },
    {
      "epoch": 0.8663517220359647,
      "grad_norm": 0.94140625,
      "learning_rate": 9.22317230302282e-06,
      "loss": 1.0053,
      "step": 5685
    },
    {
      "epoch": 0.8665041145992075,
      "grad_norm": 0.98046875,
      "learning_rate": 9.202479352639782e-06,
      "loss": 0.9657,
      "step": 5686
    },
    {
      "epoch": 0.8666565071624505,
      "grad_norm": 1.078125,
      "learning_rate": 9.181808521838465e-06,
      "loss": 0.8933,
      "step": 5687
    },
    {
      "epoch": 0.8668088997256934,
      "grad_norm": 0.97265625,
      "learning_rate": 9.161159815654574e-06,
      "loss": 1.06,
      "step": 5688
    },
    {
      "epoch": 0.8669612922889363,
      "grad_norm": 1.203125,
      "learning_rate": 9.140533239118443e-06,
      "loss": 0.9797,
      "step": 5689
    },
    {
      "epoch": 0.8671136848521792,
      "grad_norm": 1.203125,
      "learning_rate": 9.119928797254995e-06,
      "loss": 0.9591,
      "step": 5690
    },
    {
      "epoch": 0.8672660774154222,
      "grad_norm": 1.0390625,
      "learning_rate": 9.09934649508375e-06,
      "loss": 0.8813,
      "step": 5691
    },
    {
      "epoch": 0.867418469978665,
      "grad_norm": 1.1171875,
      "learning_rate": 9.078786337618895e-06,
      "loss": 1.0345,
      "step": 5692
    },
    {
      "epoch": 0.867570862541908,
      "grad_norm": 0.8046875,
      "learning_rate": 9.058248329869157e-06,
      "loss": 1.0611,
      "step": 5693
    },
    {
      "epoch": 0.8677232551051509,
      "grad_norm": 0.84765625,
      "learning_rate": 9.037732476837901e-06,
      "loss": 0.9756,
      "step": 5694
    },
    {
      "epoch": 0.8678756476683938,
      "grad_norm": 0.97265625,
      "learning_rate": 9.017238783523064e-06,
      "loss": 1.0462,
      "step": 5695
    },
    {
      "epoch": 0.8680280402316367,
      "grad_norm": 0.68359375,
      "learning_rate": 8.996767254917205e-06,
      "loss": 0.9386,
      "step": 5696
    },
    {
      "epoch": 0.8681804327948797,
      "grad_norm": 0.828125,
      "learning_rate": 8.976317896007502e-06,
      "loss": 0.9621,
      "step": 5697
    },
    {
      "epoch": 0.8683328253581225,
      "grad_norm": 0.87109375,
      "learning_rate": 8.955890711775705e-06,
      "loss": 0.8841,
      "step": 5698
    },
    {
      "epoch": 0.8684852179213655,
      "grad_norm": 1.1171875,
      "learning_rate": 8.935485707198177e-06,
      "loss": 0.9743,
      "step": 5699
    },
    {
      "epoch": 0.8686376104846083,
      "grad_norm": 1.0234375,
      "learning_rate": 8.915102887245863e-06,
      "loss": 1.057,
      "step": 5700
    },
    {
      "epoch": 0.8687900030478513,
      "grad_norm": 0.89453125,
      "learning_rate": 8.894742256884303e-06,
      "loss": 1.0111,
      "step": 5701
    },
    {
      "epoch": 0.8689423956110942,
      "grad_norm": 1.3671875,
      "learning_rate": 8.87440382107363e-06,
      "loss": 1.0749,
      "step": 5702
    },
    {
      "epoch": 0.869094788174337,
      "grad_norm": 0.96875,
      "learning_rate": 8.85408758476861e-06,
      "loss": 0.9137,
      "step": 5703
    },
    {
      "epoch": 0.86924718073758,
      "grad_norm": 1.1796875,
      "learning_rate": 8.833793552918557e-06,
      "loss": 1.2546,
      "step": 5704
    },
    {
      "epoch": 0.869399573300823,
      "grad_norm": 1.0703125,
      "learning_rate": 8.8135217304674e-06,
      "loss": 1.0598,
      "step": 5705
    },
    {
      "epoch": 0.8695519658640658,
      "grad_norm": 1.1015625,
      "learning_rate": 8.793272122353624e-06,
      "loss": 1.0454,
      "step": 5706
    },
    {
      "epoch": 0.8697043584273088,
      "grad_norm": 1.09375,
      "learning_rate": 8.773044733510338e-06,
      "loss": 1.0408,
      "step": 5707
    },
    {
      "epoch": 0.8698567509905517,
      "grad_norm": 0.87109375,
      "learning_rate": 8.752839568865202e-06,
      "loss": 0.8668,
      "step": 5708
    },
    {
      "epoch": 0.8700091435537946,
      "grad_norm": 1.0703125,
      "learning_rate": 8.732656633340531e-06,
      "loss": 1.0359,
      "step": 5709
    },
    {
      "epoch": 0.8701615361170375,
      "grad_norm": 0.88671875,
      "learning_rate": 8.712495931853137e-06,
      "loss": 0.9406,
      "step": 5710
    },
    {
      "epoch": 0.8703139286802805,
      "grad_norm": 0.73828125,
      "learning_rate": 8.692357469314482e-06,
      "loss": 1.0986,
      "step": 5711
    },
    {
      "epoch": 0.8704663212435233,
      "grad_norm": 0.80078125,
      "learning_rate": 8.672241250630575e-06,
      "loss": 0.9283,
      "step": 5712
    },
    {
      "epoch": 0.8706187138067663,
      "grad_norm": 0.640625,
      "learning_rate": 8.652147280702006e-06,
      "loss": 0.8141,
      "step": 5713
    },
    {
      "epoch": 0.8707711063700091,
      "grad_norm": 0.63671875,
      "learning_rate": 8.632075564423969e-06,
      "loss": 0.9401,
      "step": 5714
    },
    {
      "epoch": 0.870923498933252,
      "grad_norm": 0.80078125,
      "learning_rate": 8.612026106686233e-06,
      "loss": 0.8855,
      "step": 5715
    },
    {
      "epoch": 0.871075891496495,
      "grad_norm": 0.9296875,
      "learning_rate": 8.59199891237311e-06,
      "loss": 0.864,
      "step": 5716
    },
    {
      "epoch": 0.8712282840597378,
      "grad_norm": 0.9765625,
      "learning_rate": 8.571993986363524e-06,
      "loss": 0.9297,
      "step": 5717
    },
    {
      "epoch": 0.8713806766229808,
      "grad_norm": 0.88671875,
      "learning_rate": 8.552011333530963e-06,
      "loss": 0.9511,
      "step": 5718
    },
    {
      "epoch": 0.8715330691862238,
      "grad_norm": 0.9453125,
      "learning_rate": 8.532050958743465e-06,
      "loss": 1.1128,
      "step": 5719
    },
    {
      "epoch": 0.8716854617494666,
      "grad_norm": 1.109375,
      "learning_rate": 8.512112866863709e-06,
      "loss": 1.0264,
      "step": 5720
    },
    {
      "epoch": 0.8718378543127095,
      "grad_norm": 1.046875,
      "learning_rate": 8.492197062748885e-06,
      "loss": 0.7564,
      "step": 5721
    },
    {
      "epoch": 0.8719902468759525,
      "grad_norm": 1.15625,
      "learning_rate": 8.472303551250748e-06,
      "loss": 1.1842,
      "step": 5722
    },
    {
      "epoch": 0.8721426394391953,
      "grad_norm": 1.0625,
      "learning_rate": 8.452432337215666e-06,
      "loss": 1.0439,
      "step": 5723
    },
    {
      "epoch": 0.8722950320024383,
      "grad_norm": 0.953125,
      "learning_rate": 8.43258342548453e-06,
      "loss": 0.8609,
      "step": 5724
    },
    {
      "epoch": 0.8724474245656811,
      "grad_norm": 0.94921875,
      "learning_rate": 8.412756820892853e-06,
      "loss": 0.9116,
      "step": 5725
    },
    {
      "epoch": 0.8725998171289241,
      "grad_norm": 0.95703125,
      "learning_rate": 8.392952528270659e-06,
      "loss": 1.0028,
      "step": 5726
    },
    {
      "epoch": 0.872752209692167,
      "grad_norm": 0.86328125,
      "learning_rate": 8.373170552442555e-06,
      "loss": 1.0082,
      "step": 5727
    },
    {
      "epoch": 0.8729046022554099,
      "grad_norm": 1.140625,
      "learning_rate": 8.353410898227731e-06,
      "loss": 0.9934,
      "step": 5728
    },
    {
      "epoch": 0.8730569948186528,
      "grad_norm": 1.1796875,
      "learning_rate": 8.333673570439914e-06,
      "loss": 1.1175,
      "step": 5729
    },
    {
      "epoch": 0.8732093873818958,
      "grad_norm": 0.8125,
      "learning_rate": 8.313958573887382e-06,
      "loss": 0.9401,
      "step": 5730
    },
    {
      "epoch": 0.8733617799451386,
      "grad_norm": 0.9609375,
      "learning_rate": 8.294265913373022e-06,
      "loss": 0.9916,
      "step": 5731
    },
    {
      "epoch": 0.8735141725083816,
      "grad_norm": 0.87109375,
      "learning_rate": 8.274595593694246e-06,
      "loss": 0.9489,
      "step": 5732
    },
    {
      "epoch": 0.8736665650716245,
      "grad_norm": 0.78515625,
      "learning_rate": 8.254947619643017e-06,
      "loss": 0.8351,
      "step": 5733
    },
    {
      "epoch": 0.8738189576348674,
      "grad_norm": 0.78515625,
      "learning_rate": 8.235321996005863e-06,
      "loss": 0.8188,
      "step": 5734
    },
    {
      "epoch": 0.8739713501981103,
      "grad_norm": 0.92578125,
      "learning_rate": 8.21571872756387e-06,
      "loss": 0.8618,
      "step": 5735
    },
    {
      "epoch": 0.8741237427613533,
      "grad_norm": 0.93359375,
      "learning_rate": 8.196137819092665e-06,
      "loss": 0.9748,
      "step": 5736
    },
    {
      "epoch": 0.8742761353245961,
      "grad_norm": 0.80078125,
      "learning_rate": 8.17657927536246e-06,
      "loss": 0.9848,
      "step": 5737
    },
    {
      "epoch": 0.8744285278878391,
      "grad_norm": 0.859375,
      "learning_rate": 8.157043101137995e-06,
      "loss": 1.0159,
      "step": 5738
    },
    {
      "epoch": 0.8745809204510819,
      "grad_norm": 1.34375,
      "learning_rate": 8.137529301178559e-06,
      "loss": 1.0493,
      "step": 5739
    },
    {
      "epoch": 0.8747333130143249,
      "grad_norm": 0.76171875,
      "learning_rate": 8.118037880237983e-06,
      "loss": 0.9476,
      "step": 5740
    },
    {
      "epoch": 0.8748857055775678,
      "grad_norm": 1.0546875,
      "learning_rate": 8.098568843064647e-06,
      "loss": 1.1454,
      "step": 5741
    },
    {
      "epoch": 0.8750380981408107,
      "grad_norm": 0.8671875,
      "learning_rate": 8.079122194401534e-06,
      "loss": 0.9943,
      "step": 5742
    },
    {
      "epoch": 0.8751904907040536,
      "grad_norm": 0.8203125,
      "learning_rate": 8.059697938986099e-06,
      "loss": 0.9133,
      "step": 5743
    },
    {
      "epoch": 0.8753428832672966,
      "grad_norm": 1.078125,
      "learning_rate": 8.04029608155037e-06,
      "loss": 0.9776,
      "step": 5744
    },
    {
      "epoch": 0.8754952758305394,
      "grad_norm": 1.2421875,
      "learning_rate": 8.020916626820919e-06,
      "loss": 1.1249,
      "step": 5745
    },
    {
      "epoch": 0.8756476683937824,
      "grad_norm": 0.77734375,
      "learning_rate": 8.001559579518857e-06,
      "loss": 1.0446,
      "step": 5746
    },
    {
      "epoch": 0.8758000609570253,
      "grad_norm": 0.78515625,
      "learning_rate": 7.982224944359828e-06,
      "loss": 1.01,
      "step": 5747
    },
    {
      "epoch": 0.8759524535202682,
      "grad_norm": 1.0390625,
      "learning_rate": 7.962912726054061e-06,
      "loss": 0.9678,
      "step": 5748
    },
    {
      "epoch": 0.8761048460835111,
      "grad_norm": 1.015625,
      "learning_rate": 7.943622929306272e-06,
      "loss": 0.9676,
      "step": 5749
    },
    {
      "epoch": 0.8762572386467541,
      "grad_norm": 0.87109375,
      "learning_rate": 7.924355558815733e-06,
      "loss": 0.8303,
      "step": 5750
    },
    {
      "epoch": 0.8764096312099969,
      "grad_norm": 0.9765625,
      "learning_rate": 7.905110619276246e-06,
      "loss": 0.9041,
      "step": 5751
    },
    {
      "epoch": 0.8765620237732399,
      "grad_norm": 1.3046875,
      "learning_rate": 7.885888115376161e-06,
      "loss": 1.2715,
      "step": 5752
    },
    {
      "epoch": 0.8767144163364827,
      "grad_norm": 1.3125,
      "learning_rate": 7.866688051798342e-06,
      "loss": 1.0427,
      "step": 5753
    },
    {
      "epoch": 0.8768668088997257,
      "grad_norm": 0.953125,
      "learning_rate": 7.84751043322024e-06,
      "loss": 1.1035,
      "step": 5754
    },
    {
      "epoch": 0.8770192014629686,
      "grad_norm": 1.0234375,
      "learning_rate": 7.828355264313758e-06,
      "loss": 1.0044,
      "step": 5755
    },
    {
      "epoch": 0.8771715940262115,
      "grad_norm": 0.71484375,
      "learning_rate": 7.8092225497454e-06,
      "loss": 0.9417,
      "step": 5756
    },
    {
      "epoch": 0.8773239865894544,
      "grad_norm": 1.1640625,
      "learning_rate": 7.790112294176143e-06,
      "loss": 1.0205,
      "step": 5757
    },
    {
      "epoch": 0.8774763791526974,
      "grad_norm": 0.63671875,
      "learning_rate": 7.771024502261526e-06,
      "loss": 0.8866,
      "step": 5758
    },
    {
      "epoch": 0.8776287717159402,
      "grad_norm": 1.15625,
      "learning_rate": 7.751959178651635e-06,
      "loss": 0.9738,
      "step": 5759
    },
    {
      "epoch": 0.8777811642791832,
      "grad_norm": 0.99609375,
      "learning_rate": 7.732916327991024e-06,
      "loss": 1.0158,
      "step": 5760
    },
    {
      "epoch": 0.8779335568424261,
      "grad_norm": 0.9765625,
      "learning_rate": 7.713895954918838e-06,
      "loss": 0.9541,
      "step": 5761
    },
    {
      "epoch": 0.878085949405669,
      "grad_norm": 1.09375,
      "learning_rate": 7.694898064068689e-06,
      "loss": 1.0283,
      "step": 5762
    },
    {
      "epoch": 0.8782383419689119,
      "grad_norm": 0.84375,
      "learning_rate": 7.675922660068734e-06,
      "loss": 1.0464,
      "step": 5763
    },
    {
      "epoch": 0.8783907345321549,
      "grad_norm": 0.94140625,
      "learning_rate": 7.656969747541665e-06,
      "loss": 1.0043,
      "step": 5764
    },
    {
      "epoch": 0.8785431270953977,
      "grad_norm": 1.09375,
      "learning_rate": 7.638039331104685e-06,
      "loss": 1.0474,
      "step": 5765
    },
    {
      "epoch": 0.8786955196586407,
      "grad_norm": 1.2109375,
      "learning_rate": 7.619131415369518e-06,
      "loss": 1.0428,
      "step": 5766
    },
    {
      "epoch": 0.8788479122218835,
      "grad_norm": 1.2265625,
      "learning_rate": 7.600246004942402e-06,
      "loss": 0.9401,
      "step": 5767
    },
    {
      "epoch": 0.8790003047851265,
      "grad_norm": 1.2578125,
      "learning_rate": 7.581383104424078e-06,
      "loss": 1.095,
      "step": 5768
    },
    {
      "epoch": 0.8791526973483694,
      "grad_norm": 1.4140625,
      "learning_rate": 7.562542718409849e-06,
      "loss": 1.3811,
      "step": 5769
    },
    {
      "epoch": 0.8793050899116123,
      "grad_norm": 0.87890625,
      "learning_rate": 7.543724851489465e-06,
      "loss": 1.1066,
      "step": 5770
    },
    {
      "epoch": 0.8794574824748552,
      "grad_norm": 1.125,
      "learning_rate": 7.524929508247269e-06,
      "loss": 0.9889,
      "step": 5771
    },
    {
      "epoch": 0.8796098750380982,
      "grad_norm": 0.921875,
      "learning_rate": 7.5061566932620675e-06,
      "loss": 1.1795,
      "step": 5772
    },
    {
      "epoch": 0.879762267601341,
      "grad_norm": 1.2890625,
      "learning_rate": 7.487406411107168e-06,
      "loss": 0.8605,
      "step": 5773
    },
    {
      "epoch": 0.879914660164584,
      "grad_norm": 0.859375,
      "learning_rate": 7.468678666350426e-06,
      "loss": 0.9365,
      "step": 5774
    },
    {
      "epoch": 0.8800670527278269,
      "grad_norm": 1.1171875,
      "learning_rate": 7.449973463554172e-06,
      "loss": 1.0661,
      "step": 5775
    },
    {
      "epoch": 0.8802194452910698,
      "grad_norm": 0.73828125,
      "learning_rate": 7.431290807275293e-06,
      "loss": 0.8777,
      "step": 5776
    },
    {
      "epoch": 0.8803718378543127,
      "grad_norm": 0.96875,
      "learning_rate": 7.4126307020651374e-06,
      "loss": 1.0014,
      "step": 5777
    },
    {
      "epoch": 0.8805242304175557,
      "grad_norm": 0.921875,
      "learning_rate": 7.393993152469569e-06,
      "loss": 0.9937,
      "step": 5778
    },
    {
      "epoch": 0.8806766229807985,
      "grad_norm": 1.0078125,
      "learning_rate": 7.375378163028968e-06,
      "loss": 0.9951,
      "step": 5779
    },
    {
      "epoch": 0.8808290155440415,
      "grad_norm": 1.0625,
      "learning_rate": 7.356785738278216e-06,
      "loss": 1.1028,
      "step": 5780
    },
    {
      "epoch": 0.8809814081072843,
      "grad_norm": 0.890625,
      "learning_rate": 7.338215882746668e-06,
      "loss": 1.0132,
      "step": 5781
    },
    {
      "epoch": 0.8811338006705273,
      "grad_norm": 0.92578125,
      "learning_rate": 7.319668600958263e-06,
      "loss": 0.986,
      "step": 5782
    },
    {
      "epoch": 0.8812861932337702,
      "grad_norm": 0.94140625,
      "learning_rate": 7.301143897431339e-06,
      "loss": 0.9105,
      "step": 5783
    },
    {
      "epoch": 0.8814385857970131,
      "grad_norm": 0.796875,
      "learning_rate": 7.2826417766788e-06,
      "loss": 0.8819,
      "step": 5784
    },
    {
      "epoch": 0.881590978360256,
      "grad_norm": 0.91015625,
      "learning_rate": 7.264162243208028e-06,
      "loss": 0.9611,
      "step": 5785
    },
    {
      "epoch": 0.881743370923499,
      "grad_norm": 0.796875,
      "learning_rate": 7.245705301520888e-06,
      "loss": 1.0066,
      "step": 5786
    },
    {
      "epoch": 0.8818957634867418,
      "grad_norm": 0.7578125,
      "learning_rate": 7.227270956113763e-06,
      "loss": 0.8875,
      "step": 5787
    },
    {
      "epoch": 0.8820481560499848,
      "grad_norm": 1.4140625,
      "learning_rate": 7.208859211477537e-06,
      "loss": 1.1764,
      "step": 5788
    },
    {
      "epoch": 0.8822005486132277,
      "grad_norm": 1.015625,
      "learning_rate": 7.190470072097577e-06,
      "loss": 1.0292,
      "step": 5789
    },
    {
      "epoch": 0.8823529411764706,
      "grad_norm": 0.66015625,
      "learning_rate": 7.172103542453723e-06,
      "loss": 0.9503,
      "step": 5790
    },
    {
      "epoch": 0.8825053337397135,
      "grad_norm": 1.109375,
      "learning_rate": 7.15375962702034e-06,
      "loss": 0.9741,
      "step": 5791
    },
    {
      "epoch": 0.8826577263029565,
      "grad_norm": 0.9140625,
      "learning_rate": 7.135438330266242e-06,
      "loss": 0.9966,
      "step": 5792
    },
    {
      "epoch": 0.8828101188661993,
      "grad_norm": 1.015625,
      "learning_rate": 7.117139656654814e-06,
      "loss": 0.9489,
      "step": 5793
    },
    {
      "epoch": 0.8829625114294423,
      "grad_norm": 0.94140625,
      "learning_rate": 7.0988636106438355e-06,
      "loss": 1.01,
      "step": 5794
    },
    {
      "epoch": 0.8831149039926851,
      "grad_norm": 1.3046875,
      "learning_rate": 7.080610196685622e-06,
      "loss": 0.9873,
      "step": 5795
    },
    {
      "epoch": 0.8832672965559281,
      "grad_norm": 1.03125,
      "learning_rate": 7.062379419226972e-06,
      "loss": 1.0563,
      "step": 5796
    },
    {
      "epoch": 0.883419689119171,
      "grad_norm": 1.09375,
      "learning_rate": 7.0441712827091665e-06,
      "loss": 1.0602,
      "step": 5797
    },
    {
      "epoch": 0.8835720816824139,
      "grad_norm": 1.3125,
      "learning_rate": 7.025985791567946e-06,
      "loss": 1.23,
      "step": 5798
    },
    {
      "epoch": 0.8837244742456568,
      "grad_norm": 1.046875,
      "learning_rate": 7.0078229502336e-06,
      "loss": 1.0807,
      "step": 5799
    },
    {
      "epoch": 0.8838768668088998,
      "grad_norm": 1.125,
      "learning_rate": 6.989682763130823e-06,
      "loss": 0.9179,
      "step": 5800
    },
    {
      "epoch": 0.8840292593721426,
      "grad_norm": 1.40625,
      "learning_rate": 6.971565234678845e-06,
      "loss": 0.9705,
      "step": 5801
    },
    {
      "epoch": 0.8841816519353856,
      "grad_norm": 0.8359375,
      "learning_rate": 6.953470369291348e-06,
      "loss": 1.0336,
      "step": 5802
    },
    {
      "epoch": 0.8843340444986285,
      "grad_norm": 1.109375,
      "learning_rate": 6.935398171376506e-06,
      "loss": 0.939,
      "step": 5803
    },
    {
      "epoch": 0.8844864370618714,
      "grad_norm": 0.78125,
      "learning_rate": 6.91734864533694e-06,
      "loss": 1.0437,
      "step": 5804
    },
    {
      "epoch": 0.8846388296251143,
      "grad_norm": 0.921875,
      "learning_rate": 6.899321795569813e-06,
      "loss": 0.8508,
      "step": 5805
    },
    {
      "epoch": 0.8847912221883573,
      "grad_norm": 0.84375,
      "learning_rate": 6.88131762646671e-06,
      "loss": 0.9308,
      "step": 5806
    },
    {
      "epoch": 0.8849436147516001,
      "grad_norm": 0.8671875,
      "learning_rate": 6.863336142413701e-06,
      "loss": 1.0074,
      "step": 5807
    },
    {
      "epoch": 0.8850960073148431,
      "grad_norm": 0.91796875,
      "learning_rate": 6.845377347791349e-06,
      "loss": 0.8898,
      "step": 5808
    },
    {
      "epoch": 0.8852483998780859,
      "grad_norm": 0.9609375,
      "learning_rate": 6.827441246974631e-06,
      "loss": 0.9679,
      "step": 5809
    },
    {
      "epoch": 0.8854007924413289,
      "grad_norm": 0.703125,
      "learning_rate": 6.809527844333097e-06,
      "loss": 0.8987,
      "step": 5810
    },
    {
      "epoch": 0.8855531850045718,
      "grad_norm": 0.91796875,
      "learning_rate": 6.791637144230678e-06,
      "loss": 1.0148,
      "step": 5811
    },
    {
      "epoch": 0.8857055775678147,
      "grad_norm": 0.97265625,
      "learning_rate": 6.773769151025822e-06,
      "loss": 0.9724,
      "step": 5812
    },
    {
      "epoch": 0.8858579701310576,
      "grad_norm": 0.92578125,
      "learning_rate": 6.7559238690714126e-06,
      "loss": 1.0079,
      "step": 5813
    },
    {
      "epoch": 0.8860103626943006,
      "grad_norm": 0.75,
      "learning_rate": 6.738101302714817e-06,
      "loss": 0.8223,
      "step": 5814
    },
    {
      "epoch": 0.8861627552575434,
      "grad_norm": 0.7578125,
      "learning_rate": 6.720301456297862e-06,
      "loss": 0.7027,
      "step": 5815
    },
    {
      "epoch": 0.8863151478207864,
      "grad_norm": 1.078125,
      "learning_rate": 6.702524334156879e-06,
      "loss": 0.9645,
      "step": 5816
    },
    {
      "epoch": 0.8864675403840293,
      "grad_norm": 1.0390625,
      "learning_rate": 6.684769940622604e-06,
      "loss": 0.9536,
      "step": 5817
    },
    {
      "epoch": 0.8866199329472721,
      "grad_norm": 1.1953125,
      "learning_rate": 6.667038280020266e-06,
      "loss": 1.1355,
      "step": 5818
    },
    {
      "epoch": 0.8867723255105151,
      "grad_norm": 1.265625,
      "learning_rate": 6.6493293566695645e-06,
      "loss": 0.8135,
      "step": 5819
    },
    {
      "epoch": 0.8869247180737581,
      "grad_norm": 0.65625,
      "learning_rate": 6.631643174884627e-06,
      "loss": 0.8233,
      "step": 5820
    },
    {
      "epoch": 0.8870771106370009,
      "grad_norm": 0.9140625,
      "learning_rate": 6.613979738974074e-06,
      "loss": 0.9796,
      "step": 5821
    },
    {
      "epoch": 0.8872295032002439,
      "grad_norm": 1.296875,
      "learning_rate": 6.596339053240974e-06,
      "loss": 0.9118,
      "step": 5822
    },
    {
      "epoch": 0.8873818957634867,
      "grad_norm": 0.76953125,
      "learning_rate": 6.578721121982856e-06,
      "loss": 1.0153,
      "step": 5823
    },
    {
      "epoch": 0.8875342883267296,
      "grad_norm": 0.94140625,
      "learning_rate": 6.561125949491697e-06,
      "loss": 1.0232,
      "step": 5824
    },
    {
      "epoch": 0.8876866808899726,
      "grad_norm": 1.09375,
      "learning_rate": 6.543553540053926e-06,
      "loss": 0.7795,
      "step": 5825
    },
    {
      "epoch": 0.8878390734532154,
      "grad_norm": 0.7578125,
      "learning_rate": 6.526003897950428e-06,
      "loss": 0.7967,
      "step": 5826
    },
    {
      "epoch": 0.8879914660164584,
      "grad_norm": 1.03125,
      "learning_rate": 6.508477027456572e-06,
      "loss": 1.0229,
      "step": 5827
    },
    {
      "epoch": 0.8881438585797014,
      "grad_norm": 0.796875,
      "learning_rate": 6.490972932842143e-06,
      "loss": 0.8825,
      "step": 5828
    },
    {
      "epoch": 0.8882962511429442,
      "grad_norm": 0.82421875,
      "learning_rate": 6.473491618371408e-06,
      "loss": 0.9643,
      "step": 5829
    },
    {
      "epoch": 0.8884486437061871,
      "grad_norm": 0.84375,
      "learning_rate": 6.456033088303037e-06,
      "loss": 0.9565,
      "step": 5830
    },
    {
      "epoch": 0.8886010362694301,
      "grad_norm": 1.0078125,
      "learning_rate": 6.438597346890196e-06,
      "loss": 1.0399,
      "step": 5831
    },
    {
      "epoch": 0.8887534288326729,
      "grad_norm": 0.9140625,
      "learning_rate": 6.421184398380453e-06,
      "loss": 0.8869,
      "step": 5832
    },
    {
      "epoch": 0.8889058213959159,
      "grad_norm": 1.0859375,
      "learning_rate": 6.403794247015904e-06,
      "loss": 1.2435,
      "step": 5833
    },
    {
      "epoch": 0.8890582139591587,
      "grad_norm": 1.109375,
      "learning_rate": 6.386426897033016e-06,
      "loss": 1.0471,
      "step": 5834
    },
    {
      "epoch": 0.8892106065224017,
      "grad_norm": 0.9140625,
      "learning_rate": 6.369082352662714e-06,
      "loss": 0.9897,
      "step": 5835
    },
    {
      "epoch": 0.8893629990856446,
      "grad_norm": 0.734375,
      "learning_rate": 6.351760618130398e-06,
      "loss": 1.045,
      "step": 5836
    },
    {
      "epoch": 0.8895153916488875,
      "grad_norm": 1.0546875,
      "learning_rate": 6.33446169765588e-06,
      "loss": 1.0391,
      "step": 5837
    },
    {
      "epoch": 0.8896677842121304,
      "grad_norm": 1.0078125,
      "learning_rate": 6.317185595453412e-06,
      "loss": 0.8566,
      "step": 5838
    },
    {
      "epoch": 0.8898201767753734,
      "grad_norm": 0.98828125,
      "learning_rate": 6.299932315731727e-06,
      "loss": 0.8641,
      "step": 5839
    },
    {
      "epoch": 0.8899725693386162,
      "grad_norm": 1.0,
      "learning_rate": 6.2827018626939624e-06,
      "loss": 0.9241,
      "step": 5840
    },
    {
      "epoch": 0.8901249619018592,
      "grad_norm": 0.828125,
      "learning_rate": 6.265494240537706e-06,
      "loss": 0.988,
      "step": 5841
    },
    {
      "epoch": 0.8902773544651021,
      "grad_norm": 0.96875,
      "learning_rate": 6.2483094534549705e-06,
      "loss": 0.8779,
      "step": 5842
    },
    {
      "epoch": 0.890429747028345,
      "grad_norm": 0.87890625,
      "learning_rate": 6.231147505632218e-06,
      "loss": 0.9746,
      "step": 5843
    },
    {
      "epoch": 0.8905821395915879,
      "grad_norm": 1.0859375,
      "learning_rate": 6.214008401250371e-06,
      "loss": 0.8858,
      "step": 5844
    },
    {
      "epoch": 0.8907345321548309,
      "grad_norm": 0.93359375,
      "learning_rate": 6.196892144484745e-06,
      "loss": 0.9605,
      "step": 5845
    },
    {
      "epoch": 0.8908869247180737,
      "grad_norm": 1.140625,
      "learning_rate": 6.179798739505094e-06,
      "loss": 0.9053,
      "step": 5846
    },
    {
      "epoch": 0.8910393172813167,
      "grad_norm": 0.9375,
      "learning_rate": 6.1627281904756415e-06,
      "loss": 0.8531,
      "step": 5847
    },
    {
      "epoch": 0.8911917098445595,
      "grad_norm": 0.90234375,
      "learning_rate": 6.145680501555007e-06,
      "loss": 0.8716,
      "step": 5848
    },
    {
      "epoch": 0.8913441024078025,
      "grad_norm": 0.82421875,
      "learning_rate": 6.128655676896222e-06,
      "loss": 0.7902,
      "step": 5849
    },
    {
      "epoch": 0.8914964949710454,
      "grad_norm": 0.90625,
      "learning_rate": 6.111653720646837e-06,
      "loss": 1.0235,
      "step": 5850
    },
    {
      "epoch": 0.8916488875342883,
      "grad_norm": 0.734375,
      "learning_rate": 6.094674636948738e-06,
      "loss": 0.8954,
      "step": 5851
    },
    {
      "epoch": 0.8918012800975312,
      "grad_norm": 0.875,
      "learning_rate": 6.077718429938284e-06,
      "loss": 0.8938,
      "step": 5852
    },
    {
      "epoch": 0.8919536726607742,
      "grad_norm": 0.97265625,
      "learning_rate": 6.060785103746247e-06,
      "loss": 0.999,
      "step": 5853
    },
    {
      "epoch": 0.892106065224017,
      "grad_norm": 0.81640625,
      "learning_rate": 6.043874662497817e-06,
      "loss": 1.0995,
      "step": 5854
    },
    {
      "epoch": 0.89225845778726,
      "grad_norm": 0.76953125,
      "learning_rate": 6.026987110312643e-06,
      "loss": 0.8403,
      "step": 5855
    },
    {
      "epoch": 0.8924108503505029,
      "grad_norm": 1.0859375,
      "learning_rate": 6.010122451304767e-06,
      "loss": 0.9906,
      "step": 5856
    },
    {
      "epoch": 0.8925632429137458,
      "grad_norm": 0.8515625,
      "learning_rate": 5.993280689582659e-06,
      "loss": 0.9572,
      "step": 5857
    },
    {
      "epoch": 0.8927156354769887,
      "grad_norm": 0.8984375,
      "learning_rate": 5.976461829249225e-06,
      "loss": 0.946,
      "step": 5858
    },
    {
      "epoch": 0.8928680280402317,
      "grad_norm": 0.83984375,
      "learning_rate": 5.959665874401765e-06,
      "loss": 1.0913,
      "step": 5859
    },
    {
      "epoch": 0.8930204206034745,
      "grad_norm": 1.0703125,
      "learning_rate": 5.942892829132007e-06,
      "loss": 0.9498,
      "step": 5860
    },
    {
      "epoch": 0.8931728131667175,
      "grad_norm": 0.8203125,
      "learning_rate": 5.926142697526138e-06,
      "loss": 0.9021,
      "step": 5861
    },
    {
      "epoch": 0.8933252057299603,
      "grad_norm": 0.91796875,
      "learning_rate": 5.909415483664704e-06,
      "loss": 0.8263,
      "step": 5862
    },
    {
      "epoch": 0.8934775982932033,
      "grad_norm": 0.94921875,
      "learning_rate": 5.892711191622724e-06,
      "loss": 0.9361,
      "step": 5863
    },
    {
      "epoch": 0.8936299908564462,
      "grad_norm": 0.859375,
      "learning_rate": 5.876029825469576e-06,
      "loss": 0.9701,
      "step": 5864
    },
    {
      "epoch": 0.8937823834196891,
      "grad_norm": 1.0703125,
      "learning_rate": 5.859371389269086e-06,
      "loss": 0.9783,
      "step": 5865
    },
    {
      "epoch": 0.893934775982932,
      "grad_norm": 1.0625,
      "learning_rate": 5.842735887079476e-06,
      "loss": 1.1089,
      "step": 5866
    },
    {
      "epoch": 0.894087168546175,
      "grad_norm": 1.15625,
      "learning_rate": 5.826123322953436e-06,
      "loss": 0.8986,
      "step": 5867
    },
    {
      "epoch": 0.8942395611094178,
      "grad_norm": 0.79296875,
      "learning_rate": 5.8095337009379965e-06,
      "loss": 0.935,
      "step": 5868
    },
    {
      "epoch": 0.8943919536726608,
      "grad_norm": 0.984375,
      "learning_rate": 5.792967025074625e-06,
      "loss": 0.9295,
      "step": 5869
    },
    {
      "epoch": 0.8945443462359037,
      "grad_norm": 0.7734375,
      "learning_rate": 5.776423299399203e-06,
      "loss": 0.8504,
      "step": 5870
    },
    {
      "epoch": 0.8946967387991466,
      "grad_norm": 0.7734375,
      "learning_rate": 5.7599025279420184e-06,
      "loss": 0.9432,
      "step": 5871
    },
    {
      "epoch": 0.8948491313623895,
      "grad_norm": 0.98828125,
      "learning_rate": 5.743404714727796e-06,
      "loss": 0.9482,
      "step": 5872
    },
    {
      "epoch": 0.8950015239256325,
      "grad_norm": 1.0859375,
      "learning_rate": 5.72692986377561e-06,
      "loss": 1.0858,
      "step": 5873
    },
    {
      "epoch": 0.8951539164888753,
      "grad_norm": 0.828125,
      "learning_rate": 5.710477979098982e-06,
      "loss": 0.8058,
      "step": 5874
    },
    {
      "epoch": 0.8953063090521183,
      "grad_norm": 0.93359375,
      "learning_rate": 5.694049064705831e-06,
      "loss": 1.066,
      "step": 5875
    },
    {
      "epoch": 0.8954587016153611,
      "grad_norm": 0.8828125,
      "learning_rate": 5.677643124598464e-06,
      "loss": 0.9784,
      "step": 5876
    },
    {
      "epoch": 0.8956110941786041,
      "grad_norm": 0.96484375,
      "learning_rate": 5.6612601627736075e-06,
      "loss": 1.0555,
      "step": 5877
    },
    {
      "epoch": 0.895763486741847,
      "grad_norm": 0.9609375,
      "learning_rate": 5.6449001832223905e-06,
      "loss": 0.9482,
      "step": 5878
    },
    {
      "epoch": 0.8959158793050899,
      "grad_norm": 1.140625,
      "learning_rate": 5.628563189930347e-06,
      "loss": 1.0998,
      "step": 5879
    },
    {
      "epoch": 0.8960682718683328,
      "grad_norm": 0.87890625,
      "learning_rate": 5.612249186877406e-06,
      "loss": 0.8634,
      "step": 5880
    },
    {
      "epoch": 0.8962206644315758,
      "grad_norm": 0.8125,
      "learning_rate": 5.595958178037874e-06,
      "loss": 0.9659,
      "step": 5881
    },
    {
      "epoch": 0.8963730569948186,
      "grad_norm": 1.078125,
      "learning_rate": 5.57969016738048e-06,
      "loss": 1.1609,
      "step": 5882
    },
    {
      "epoch": 0.8965254495580616,
      "grad_norm": 1.1796875,
      "learning_rate": 5.563445158868341e-06,
      "loss": 1.0675,
      "step": 5883
    },
    {
      "epoch": 0.8966778421213045,
      "grad_norm": 0.78515625,
      "learning_rate": 5.547223156459003e-06,
      "loss": 0.756,
      "step": 5884
    },
    {
      "epoch": 0.8968302346845474,
      "grad_norm": 1.1796875,
      "learning_rate": 5.53102416410436e-06,
      "loss": 1.026,
      "step": 5885
    },
    {
      "epoch": 0.8969826272477903,
      "grad_norm": 0.77734375,
      "learning_rate": 5.514848185750709e-06,
      "loss": 0.952,
      "step": 5886
    },
    {
      "epoch": 0.8971350198110333,
      "grad_norm": 0.93359375,
      "learning_rate": 5.498695225338768e-06,
      "loss": 0.975,
      "step": 5887
    },
    {
      "epoch": 0.8972874123742761,
      "grad_norm": 1.1640625,
      "learning_rate": 5.482565286803609e-06,
      "loss": 1.0695,
      "step": 5888
    },
    {
      "epoch": 0.8974398049375191,
      "grad_norm": 0.80078125,
      "learning_rate": 5.466458374074746e-06,
      "loss": 0.955,
      "step": 5889
    },
    {
      "epoch": 0.8975921975007619,
      "grad_norm": 1.03125,
      "learning_rate": 5.4503744910760376e-06,
      "loss": 1.0207,
      "step": 5890
    },
    {
      "epoch": 0.8977445900640049,
      "grad_norm": 1.203125,
      "learning_rate": 5.434313641725753e-06,
      "loss": 1.0707,
      "step": 5891
    },
    {
      "epoch": 0.8978969826272478,
      "grad_norm": 1.125,
      "learning_rate": 5.418275829936537e-06,
      "loss": 1.1154,
      "step": 5892
    },
    {
      "epoch": 0.8980493751904907,
      "grad_norm": 0.9140625,
      "learning_rate": 5.402261059615443e-06,
      "loss": 0.9005,
      "step": 5893
    },
    {
      "epoch": 0.8982017677537336,
      "grad_norm": 0.87109375,
      "learning_rate": 5.3862693346638844e-06,
      "loss": 0.9241,
      "step": 5894
    },
    {
      "epoch": 0.8983541603169766,
      "grad_norm": 0.89453125,
      "learning_rate": 5.3703006589777e-06,
      "loss": 0.9726,
      "step": 5895
    },
    {
      "epoch": 0.8985065528802194,
      "grad_norm": 0.87890625,
      "learning_rate": 5.354355036447067e-06,
      "loss": 0.9763,
      "step": 5896
    },
    {
      "epoch": 0.8986589454434624,
      "grad_norm": 1.0546875,
      "learning_rate": 5.338432470956589e-06,
      "loss": 0.8965,
      "step": 5897
    },
    {
      "epoch": 0.8988113380067053,
      "grad_norm": 1.0,
      "learning_rate": 5.3225329663852075e-06,
      "loss": 0.7161,
      "step": 5898
    },
    {
      "epoch": 0.8989637305699482,
      "grad_norm": 0.98828125,
      "learning_rate": 5.3066565266062905e-06,
      "loss": 0.9617,
      "step": 5899
    },
    {
      "epoch": 0.8991161231331911,
      "grad_norm": 0.84375,
      "learning_rate": 5.2908031554875445e-06,
      "loss": 0.9009,
      "step": 5900
    },
    {
      "epoch": 0.8992685156964341,
      "grad_norm": 1.09375,
      "learning_rate": 5.2749728568911025e-06,
      "loss": 0.9973,
      "step": 5901
    },
    {
      "epoch": 0.8994209082596769,
      "grad_norm": 0.93359375,
      "learning_rate": 5.259165634673457e-06,
      "loss": 0.9965,
      "step": 5902
    },
    {
      "epoch": 0.8995733008229199,
      "grad_norm": 1.109375,
      "learning_rate": 5.243381492685428e-06,
      "loss": 1.0903,
      "step": 5903
    },
    {
      "epoch": 0.8997256933861627,
      "grad_norm": 1.015625,
      "learning_rate": 5.227620434772318e-06,
      "loss": 0.9532,
      "step": 5904
    },
    {
      "epoch": 0.8998780859494057,
      "grad_norm": 1.0078125,
      "learning_rate": 5.2118824647736985e-06,
      "loss": 1.0685,
      "step": 5905
    },
    {
      "epoch": 0.9000304785126486,
      "grad_norm": 0.94140625,
      "learning_rate": 5.196167586523604e-06,
      "loss": 1.0222,
      "step": 5906
    },
    {
      "epoch": 0.9001828710758915,
      "grad_norm": 0.7890625,
      "learning_rate": 5.180475803850393e-06,
      "loss": 0.8214,
      "step": 5907
    },
    {
      "epoch": 0.9003352636391344,
      "grad_norm": 0.88671875,
      "learning_rate": 5.164807120576809e-06,
      "loss": 0.9352,
      "step": 5908
    },
    {
      "epoch": 0.9004876562023774,
      "grad_norm": 0.8125,
      "learning_rate": 5.149161540519953e-06,
      "loss": 0.9309,
      "step": 5909
    },
    {
      "epoch": 0.9006400487656202,
      "grad_norm": 1.0703125,
      "learning_rate": 5.133539067491333e-06,
      "loss": 0.994,
      "step": 5910
    },
    {
      "epoch": 0.9007924413288632,
      "grad_norm": 1.0625,
      "learning_rate": 5.117939705296782e-06,
      "loss": 1.062,
      "step": 5911
    },
    {
      "epoch": 0.9009448338921061,
      "grad_norm": 0.92578125,
      "learning_rate": 5.10236345773657e-06,
      "loss": 1.039,
      "step": 5912
    },
    {
      "epoch": 0.901097226455349,
      "grad_norm": 1.0,
      "learning_rate": 5.086810328605263e-06,
      "loss": 0.9303,
      "step": 5913
    },
    {
      "epoch": 0.9012496190185919,
      "grad_norm": 1.0,
      "learning_rate": 5.0712803216918405e-06,
      "loss": 1.0045,
      "step": 5914
    },
    {
      "epoch": 0.9014020115818349,
      "grad_norm": 0.98828125,
      "learning_rate": 5.055773440779632e-06,
      "loss": 0.8613,
      "step": 5915
    },
    {
      "epoch": 0.9015544041450777,
      "grad_norm": 1.0546875,
      "learning_rate": 5.040289689646338e-06,
      "loss": 1.1128,
      "step": 5916
    },
    {
      "epoch": 0.9017067967083207,
      "grad_norm": 1.09375,
      "learning_rate": 5.024829072064008e-06,
      "loss": 0.9102,
      "step": 5917
    },
    {
      "epoch": 0.9018591892715635,
      "grad_norm": 1.203125,
      "learning_rate": 5.0093915917990955e-06,
      "loss": 1.0533,
      "step": 5918
    },
    {
      "epoch": 0.9020115818348065,
      "grad_norm": 0.89453125,
      "learning_rate": 4.993977252612369e-06,
      "loss": 1.095,
      "step": 5919
    },
    {
      "epoch": 0.9021639743980494,
      "grad_norm": 0.86328125,
      "learning_rate": 4.978586058259016e-06,
      "loss": 1.0674,
      "step": 5920
    },
    {
      "epoch": 0.9023163669612922,
      "grad_norm": 1.1328125,
      "learning_rate": 4.9632180124885134e-06,
      "loss": 0.9046,
      "step": 5921
    },
    {
      "epoch": 0.9024687595245352,
      "grad_norm": 0.9375,
      "learning_rate": 4.947873119044755e-06,
      "loss": 0.9655,
      "step": 5922
    },
    {
      "epoch": 0.9026211520877782,
      "grad_norm": 0.86328125,
      "learning_rate": 4.9325513816659845e-06,
      "loss": 0.8071,
      "step": 5923
    },
    {
      "epoch": 0.902773544651021,
      "grad_norm": 1.1171875,
      "learning_rate": 4.917252804084804e-06,
      "loss": 1.01,
      "step": 5924
    },
    {
      "epoch": 0.902925937214264,
      "grad_norm": 1.09375,
      "learning_rate": 4.901977390028145e-06,
      "loss": 1.0528,
      "step": 5925
    },
    {
      "epoch": 0.9030783297775069,
      "grad_norm": 1.0,
      "learning_rate": 4.88672514321733e-06,
      "loss": 1.2032,
      "step": 5926
    },
    {
      "epoch": 0.9032307223407497,
      "grad_norm": 1.2265625,
      "learning_rate": 4.871496067368031e-06,
      "loss": 1.0334,
      "step": 5927
    },
    {
      "epoch": 0.9033831149039927,
      "grad_norm": 0.765625,
      "learning_rate": 4.856290166190236e-06,
      "loss": 0.9741,
      "step": 5928
    },
    {
      "epoch": 0.9035355074672355,
      "grad_norm": 1.140625,
      "learning_rate": 4.841107443388371e-06,
      "loss": 1.0966,
      "step": 5929
    },
    {
      "epoch": 0.9036879000304785,
      "grad_norm": 1.1171875,
      "learning_rate": 4.825947902661154e-06,
      "loss": 0.9211,
      "step": 5930
    },
    {
      "epoch": 0.9038402925937215,
      "grad_norm": 0.76953125,
      "learning_rate": 4.810811547701621e-06,
      "loss": 0.9806,
      "step": 5931
    },
    {
      "epoch": 0.9039926851569643,
      "grad_norm": 1.21875,
      "learning_rate": 4.795698382197267e-06,
      "loss": 0.9077,
      "step": 5932
    },
    {
      "epoch": 0.9041450777202072,
      "grad_norm": 1.140625,
      "learning_rate": 4.780608409829846e-06,
      "loss": 0.9417,
      "step": 5933
    },
    {
      "epoch": 0.9042974702834502,
      "grad_norm": 0.96875,
      "learning_rate": 4.7655416342754725e-06,
      "loss": 0.997,
      "step": 5934
    },
    {
      "epoch": 0.904449862846693,
      "grad_norm": 0.8984375,
      "learning_rate": 4.750498059204677e-06,
      "loss": 1.0365,
      "step": 5935
    },
    {
      "epoch": 0.904602255409936,
      "grad_norm": 0.9140625,
      "learning_rate": 4.735477688282264e-06,
      "loss": 0.8431,
      "step": 5936
    },
    {
      "epoch": 0.904754647973179,
      "grad_norm": 0.76171875,
      "learning_rate": 4.720480525167415e-06,
      "loss": 0.9911,
      "step": 5937
    },
    {
      "epoch": 0.9049070405364218,
      "grad_norm": 0.8984375,
      "learning_rate": 4.705506573513652e-06,
      "loss": 0.9881,
      "step": 5938
    },
    {
      "epoch": 0.9050594330996647,
      "grad_norm": 0.9609375,
      "learning_rate": 4.690555836968835e-06,
      "loss": 0.9884,
      "step": 5939
    },
    {
      "epoch": 0.9052118256629077,
      "grad_norm": 1.0859375,
      "learning_rate": 4.675628319175207e-06,
      "loss": 0.9488,
      "step": 5940
    },
    {
      "epoch": 0.9053642182261505,
      "grad_norm": 0.7109375,
      "learning_rate": 4.660724023769303e-06,
      "loss": 0.9833,
      "step": 5941
    },
    {
      "epoch": 0.9055166107893935,
      "grad_norm": 1.09375,
      "learning_rate": 4.64584295438204e-06,
      "loss": 1.0537,
      "step": 5942
    },
    {
      "epoch": 0.9056690033526363,
      "grad_norm": 0.890625,
      "learning_rate": 4.630985114638642e-06,
      "loss": 0.92,
      "step": 5943
    },
    {
      "epoch": 0.9058213959158793,
      "grad_norm": 0.80859375,
      "learning_rate": 4.616150508158712e-06,
      "loss": 0.9645,
      "step": 5944
    },
    {
      "epoch": 0.9059737884791222,
      "grad_norm": 0.92578125,
      "learning_rate": 4.601339138556138e-06,
      "loss": 1.1034,
      "step": 5945
    },
    {
      "epoch": 0.9061261810423651,
      "grad_norm": 0.9296875,
      "learning_rate": 4.586551009439222e-06,
      "loss": 0.8219,
      "step": 5946
    },
    {
      "epoch": 0.906278573605608,
      "grad_norm": 0.91796875,
      "learning_rate": 4.5717861244105705e-06,
      "loss": 0.9699,
      "step": 5947
    },
    {
      "epoch": 0.906430966168851,
      "grad_norm": 0.9609375,
      "learning_rate": 4.557044487067075e-06,
      "loss": 0.9002,
      "step": 5948
    },
    {
      "epoch": 0.9065833587320938,
      "grad_norm": 1.1484375,
      "learning_rate": 4.542326101000038e-06,
      "loss": 0.9749,
      "step": 5949
    },
    {
      "epoch": 0.9067357512953368,
      "grad_norm": 0.85546875,
      "learning_rate": 4.527630969795082e-06,
      "loss": 0.7418,
      "step": 5950
    },
    {
      "epoch": 0.9068881438585797,
      "grad_norm": 0.90234375,
      "learning_rate": 4.512959097032121e-06,
      "loss": 0.8407,
      "step": 5951
    },
    {
      "epoch": 0.9070405364218226,
      "grad_norm": 1.1328125,
      "learning_rate": 4.498310486285473e-06,
      "loss": 1.0841,
      "step": 5952
    },
    {
      "epoch": 0.9071929289850655,
      "grad_norm": 1.0390625,
      "learning_rate": 4.483685141123717e-06,
      "loss": 1.0195,
      "step": 5953
    },
    {
      "epoch": 0.9073453215483085,
      "grad_norm": 0.91796875,
      "learning_rate": 4.469083065109825e-06,
      "loss": 0.9362,
      "step": 5954
    },
    {
      "epoch": 0.9074977141115513,
      "grad_norm": 0.84765625,
      "learning_rate": 4.45450426180104e-06,
      "loss": 0.8261,
      "step": 5955
    },
    {
      "epoch": 0.9076501066747943,
      "grad_norm": 1.21875,
      "learning_rate": 4.4399487347489885e-06,
      "loss": 1.297,
      "step": 5956
    },
    {
      "epoch": 0.9078024992380371,
      "grad_norm": 1.171875,
      "learning_rate": 4.425416487499612e-06,
      "loss": 1.2906,
      "step": 5957
    },
    {
      "epoch": 0.9079548918012801,
      "grad_norm": 0.97265625,
      "learning_rate": 4.410907523593177e-06,
      "loss": 0.9183,
      "step": 5958
    },
    {
      "epoch": 0.908107284364523,
      "grad_norm": 1.1484375,
      "learning_rate": 4.3964218465642355e-06,
      "loss": 1.2882,
      "step": 5959
    },
    {
      "epoch": 0.9082596769277659,
      "grad_norm": 0.828125,
      "learning_rate": 4.381959459941753e-06,
      "loss": 0.9854,
      "step": 5960
    },
    {
      "epoch": 0.9084120694910088,
      "grad_norm": 1.265625,
      "learning_rate": 4.3675203672489675e-06,
      "loss": 0.9913,
      "step": 5961
    },
    {
      "epoch": 0.9085644620542518,
      "grad_norm": 1.0546875,
      "learning_rate": 4.353104572003419e-06,
      "loss": 1.0592,
      "step": 5962
    },
    {
      "epoch": 0.9087168546174946,
      "grad_norm": 0.953125,
      "learning_rate": 4.338712077717033e-06,
      "loss": 0.9637,
      "step": 5963
    },
    {
      "epoch": 0.9088692471807376,
      "grad_norm": 0.8671875,
      "learning_rate": 4.324342887896038e-06,
      "loss": 0.9919,
      "step": 5964
    },
    {
      "epoch": 0.9090216397439805,
      "grad_norm": 0.9140625,
      "learning_rate": 4.3099970060409335e-06,
      "loss": 0.8561,
      "step": 5965
    },
    {
      "epoch": 0.9091740323072234,
      "grad_norm": 1.015625,
      "learning_rate": 4.295674435646613e-06,
      "loss": 1.2153,
      "step": 5966
    },
    {
      "epoch": 0.9093264248704663,
      "grad_norm": 1.09375,
      "learning_rate": 4.2813751802022515e-06,
      "loss": 0.9765,
      "step": 5967
    },
    {
      "epoch": 0.9094788174337093,
      "grad_norm": 1.015625,
      "learning_rate": 4.2670992431913414e-06,
      "loss": 1.0462,
      "step": 5968
    },
    {
      "epoch": 0.9096312099969521,
      "grad_norm": 0.72265625,
      "learning_rate": 4.252846628091733e-06,
      "loss": 0.8583,
      "step": 5969
    },
    {
      "epoch": 0.9097836025601951,
      "grad_norm": 1.2109375,
      "learning_rate": 4.238617338375561e-06,
      "loss": 1.1137,
      "step": 5970
    },
    {
      "epoch": 0.9099359951234379,
      "grad_norm": 0.92578125,
      "learning_rate": 4.224411377509274e-06,
      "loss": 0.9355,
      "step": 5971
    },
    {
      "epoch": 0.9100883876866809,
      "grad_norm": 0.87890625,
      "learning_rate": 4.21022874895366e-06,
      "loss": 0.8667,
      "step": 5972
    },
    {
      "epoch": 0.9102407802499238,
      "grad_norm": 0.84765625,
      "learning_rate": 4.196069456163787e-06,
      "loss": 1.0891,
      "step": 5973
    },
    {
      "epoch": 0.9103931728131667,
      "grad_norm": 1.40625,
      "learning_rate": 4.181933502589086e-06,
      "loss": 0.9589,
      "step": 5974
    },
    {
      "epoch": 0.9105455653764096,
      "grad_norm": 1.09375,
      "learning_rate": 4.167820891673302e-06,
      "loss": 1.0286,
      "step": 5975
    },
    {
      "epoch": 0.9106979579396526,
      "grad_norm": 0.91796875,
      "learning_rate": 4.153731626854396e-06,
      "loss": 1.0662,
      "step": 5976
    },
    {
      "epoch": 0.9108503505028954,
      "grad_norm": 1.0234375,
      "learning_rate": 4.139665711564788e-06,
      "loss": 1.0451,
      "step": 5977
    },
    {
      "epoch": 0.9110027430661384,
      "grad_norm": 1.0234375,
      "learning_rate": 4.125623149231106e-06,
      "loss": 0.8419,
      "step": 5978
    },
    {
      "epoch": 0.9111551356293813,
      "grad_norm": 0.9609375,
      "learning_rate": 4.1116039432743e-06,
      "loss": 0.9481,
      "step": 5979
    },
    {
      "epoch": 0.9113075281926242,
      "grad_norm": 1.3515625,
      "learning_rate": 4.097608097109695e-06,
      "loss": 1.0242,
      "step": 5980
    },
    {
      "epoch": 0.9114599207558671,
      "grad_norm": 0.96484375,
      "learning_rate": 4.083635614146875e-06,
      "loss": 0.988,
      "step": 5981
    },
    {
      "epoch": 0.9116123133191101,
      "grad_norm": 0.7578125,
      "learning_rate": 4.069686497789693e-06,
      "loss": 0.9025,
      "step": 5982
    },
    {
      "epoch": 0.9117647058823529,
      "grad_norm": 0.91015625,
      "learning_rate": 4.055760751436389e-06,
      "loss": 0.9548,
      "step": 5983
    },
    {
      "epoch": 0.9119170984455959,
      "grad_norm": 0.921875,
      "learning_rate": 4.04185837847948e-06,
      "loss": 1.0984,
      "step": 5984
    },
    {
      "epoch": 0.9120694910088387,
      "grad_norm": 1.0625,
      "learning_rate": 4.027979382305747e-06,
      "loss": 1.0714,
      "step": 5985
    },
    {
      "epoch": 0.9122218835720817,
      "grad_norm": 1.0,
      "learning_rate": 4.014123766296374e-06,
      "loss": 0.917,
      "step": 5986
    },
    {
      "epoch": 0.9123742761353246,
      "grad_norm": 0.8828125,
      "learning_rate": 4.000291533826728e-06,
      "loss": 0.9472,
      "step": 5987
    },
    {
      "epoch": 0.9125266686985675,
      "grad_norm": 0.98046875,
      "learning_rate": 3.98648268826658e-06,
      "loss": 0.8706,
      "step": 5988
    },
    {
      "epoch": 0.9126790612618104,
      "grad_norm": 0.84765625,
      "learning_rate": 3.9726972329799386e-06,
      "loss": 0.8664,
      "step": 5989
    },
    {
      "epoch": 0.9128314538250534,
      "grad_norm": 0.95703125,
      "learning_rate": 3.95893517132514e-06,
      "loss": 0.9449,
      "step": 5990
    },
    {
      "epoch": 0.9129838463882962,
      "grad_norm": 0.8359375,
      "learning_rate": 3.945196506654847e-06,
      "loss": 0.9428,
      "step": 5991
    },
    {
      "epoch": 0.9131362389515392,
      "grad_norm": 1.09375,
      "learning_rate": 3.931481242315993e-06,
      "loss": 0.9098,
      "step": 5992
    },
    {
      "epoch": 0.9132886315147821,
      "grad_norm": 0.9921875,
      "learning_rate": 3.917789381649761e-06,
      "loss": 1.0383,
      "step": 5993
    },
    {
      "epoch": 0.913441024078025,
      "grad_norm": 0.9140625,
      "learning_rate": 3.90412092799175e-06,
      "loss": 1.082,
      "step": 5994
    },
    {
      "epoch": 0.9135934166412679,
      "grad_norm": 1.0234375,
      "learning_rate": 3.890475884671752e-06,
      "loss": 0.9737,
      "step": 5995
    },
    {
      "epoch": 0.9137458092045109,
      "grad_norm": 1.1484375,
      "learning_rate": 3.8768542550139064e-06,
      "loss": 0.9998,
      "step": 5996
    },
    {
      "epoch": 0.9138982017677537,
      "grad_norm": 1.1328125,
      "learning_rate": 3.863256042336649e-06,
      "loss": 0.9062,
      "step": 5997
    },
    {
      "epoch": 0.9140505943309967,
      "grad_norm": 1.328125,
      "learning_rate": 3.849681249952697e-06,
      "loss": 1.0946,
      "step": 5998
    },
    {
      "epoch": 0.9142029868942395,
      "grad_norm": 0.7578125,
      "learning_rate": 3.836129881169037e-06,
      "loss": 0.7451,
      "step": 5999
    },
    {
      "epoch": 0.9143553794574825,
      "grad_norm": 1.0234375,
      "learning_rate": 3.822601939287018e-06,
      "loss": 0.826,
      "step": 6000
    },
    {
      "epoch": 0.9145077720207254,
      "grad_norm": 0.95703125,
      "learning_rate": 3.809097427602204e-06,
      "loss": 0.9434,
      "step": 6001
    },
    {
      "epoch": 0.9146601645839683,
      "grad_norm": 0.84765625,
      "learning_rate": 3.7956163494045294e-06,
      "loss": 1.0217,
      "step": 6002
    },
    {
      "epoch": 0.9148125571472112,
      "grad_norm": 0.9296875,
      "learning_rate": 3.782158707978156e-06,
      "loss": 1.0853,
      "step": 6003
    },
    {
      "epoch": 0.9149649497104542,
      "grad_norm": 0.87890625,
      "learning_rate": 3.7687245066015397e-06,
      "loss": 0.8662,
      "step": 6004
    },
    {
      "epoch": 0.915117342273697,
      "grad_norm": 0.82421875,
      "learning_rate": 3.755313748547473e-06,
      "loss": 0.9003,
      "step": 6005
    },
    {
      "epoch": 0.91526973483694,
      "grad_norm": 0.9921875,
      "learning_rate": 3.74192643708301e-06,
      "loss": 1.0036,
      "step": 6006
    },
    {
      "epoch": 0.9154221274001829,
      "grad_norm": 0.91015625,
      "learning_rate": 3.728562575469463e-06,
      "loss": 1.048,
      "step": 6007
    },
    {
      "epoch": 0.9155745199634258,
      "grad_norm": 0.77734375,
      "learning_rate": 3.7152221669624956e-06,
      "loss": 0.9177,
      "step": 6008
    },
    {
      "epoch": 0.9157269125266687,
      "grad_norm": 0.9453125,
      "learning_rate": 3.701905214812018e-06,
      "loss": 0.9388,
      "step": 6009
    },
    {
      "epoch": 0.9158793050899117,
      "grad_norm": 1.109375,
      "learning_rate": 3.688611722262203e-06,
      "loss": 0.8637,
      "step": 6010
    },
    {
      "epoch": 0.9160316976531545,
      "grad_norm": 0.89453125,
      "learning_rate": 3.6753416925515593e-06,
      "loss": 0.8731,
      "step": 6011
    },
    {
      "epoch": 0.9161840902163975,
      "grad_norm": 0.921875,
      "learning_rate": 3.662095128912857e-06,
      "loss": 1.0171,
      "step": 6012
    },
    {
      "epoch": 0.9163364827796403,
      "grad_norm": 0.8984375,
      "learning_rate": 3.6488720345731363e-06,
      "loss": 0.9012,
      "step": 6013
    },
    {
      "epoch": 0.9164888753428833,
      "grad_norm": 0.94140625,
      "learning_rate": 3.6356724127537654e-06,
      "loss": 1.1281,
      "step": 6014
    },
    {
      "epoch": 0.9166412679061262,
      "grad_norm": 1.3671875,
      "learning_rate": 3.6224962666703277e-06,
      "loss": 1.0657,
      "step": 6015
    },
    {
      "epoch": 0.916793660469369,
      "grad_norm": 0.83203125,
      "learning_rate": 3.6093435995327217e-06,
      "loss": 0.9176,
      "step": 6016
    },
    {
      "epoch": 0.916946053032612,
      "grad_norm": 0.7578125,
      "learning_rate": 3.59621441454514e-06,
      "loss": 0.9732,
      "step": 6017
    },
    {
      "epoch": 0.917098445595855,
      "grad_norm": 0.88671875,
      "learning_rate": 3.583108714906036e-06,
      "loss": 0.9617,
      "step": 6018
    },
    {
      "epoch": 0.9172508381590978,
      "grad_norm": 0.84765625,
      "learning_rate": 3.570026503808155e-06,
      "loss": 0.8662,
      "step": 6019
    },
    {
      "epoch": 0.9174032307223408,
      "grad_norm": 1.1953125,
      "learning_rate": 3.556967784438525e-06,
      "loss": 0.9749,
      "step": 6020
    },
    {
      "epoch": 0.9175556232855837,
      "grad_norm": 0.96484375,
      "learning_rate": 3.5439325599783914e-06,
      "loss": 0.8954,
      "step": 6021
    },
    {
      "epoch": 0.9177080158488266,
      "grad_norm": 0.77734375,
      "learning_rate": 3.5309208336033573e-06,
      "loss": 0.9577,
      "step": 6022
    },
    {
      "epoch": 0.9178604084120695,
      "grad_norm": 1.046875,
      "learning_rate": 3.5179326084832653e-06,
      "loss": 1.1893,
      "step": 6023
    },
    {
      "epoch": 0.9180128009753125,
      "grad_norm": 1.09375,
      "learning_rate": 3.504967887782207e-06,
      "loss": 0.9315,
      "step": 6024
    },
    {
      "epoch": 0.9181651935385553,
      "grad_norm": 1.015625,
      "learning_rate": 3.4920266746586106e-06,
      "loss": 0.9711,
      "step": 6025
    },
    {
      "epoch": 0.9183175861017983,
      "grad_norm": 0.8125,
      "learning_rate": 3.4791089722651436e-06,
      "loss": 0.8823,
      "step": 6026
    },
    {
      "epoch": 0.9184699786650411,
      "grad_norm": 1.1015625,
      "learning_rate": 3.4662147837487004e-06,
      "loss": 0.9821,
      "step": 6027
    },
    {
      "epoch": 0.918622371228284,
      "grad_norm": 0.84765625,
      "learning_rate": 3.4533441122505227e-06,
      "loss": 0.9214,
      "step": 6028
    },
    {
      "epoch": 0.918774763791527,
      "grad_norm": 1.1640625,
      "learning_rate": 3.4404969609060922e-06,
      "loss": 1.0274,
      "step": 6029
    },
    {
      "epoch": 0.9189271563547698,
      "grad_norm": 1.15625,
      "learning_rate": 3.427673332845138e-06,
      "loss": 1.0516,
      "step": 6030
    },
    {
      "epoch": 0.9190795489180128,
      "grad_norm": 0.8359375,
      "learning_rate": 3.4148732311917175e-06,
      "loss": 0.9829,
      "step": 6031
    },
    {
      "epoch": 0.9192319414812558,
      "grad_norm": 1.046875,
      "learning_rate": 3.4020966590640914e-06,
      "loss": 0.9355,
      "step": 6032
    },
    {
      "epoch": 0.9193843340444986,
      "grad_norm": 1.1484375,
      "learning_rate": 3.389343619574803e-06,
      "loss": 0.8901,
      "step": 6033
    },
    {
      "epoch": 0.9195367266077416,
      "grad_norm": 0.90234375,
      "learning_rate": 3.376614115830712e-06,
      "loss": 0.9103,
      "step": 6034
    },
    {
      "epoch": 0.9196891191709845,
      "grad_norm": 1.1328125,
      "learning_rate": 3.3639081509328706e-06,
      "loss": 1.0196,
      "step": 6035
    },
    {
      "epoch": 0.9198415117342273,
      "grad_norm": 0.97265625,
      "learning_rate": 3.351225727976681e-06,
      "loss": 0.9861,
      "step": 6036
    },
    {
      "epoch": 0.9199939042974703,
      "grad_norm": 0.9375,
      "learning_rate": 3.338566850051739e-06,
      "loss": 0.8687,
      "step": 6037
    },
    {
      "epoch": 0.9201462968607131,
      "grad_norm": 0.953125,
      "learning_rate": 3.3259315202419094e-06,
      "loss": 1.0186,
      "step": 6038
    },
    {
      "epoch": 0.9202986894239561,
      "grad_norm": 0.8125,
      "learning_rate": 3.313319741625376e-06,
      "loss": 0.9481,
      "step": 6039
    },
    {
      "epoch": 0.920451081987199,
      "grad_norm": 0.953125,
      "learning_rate": 3.3007315172745245e-06,
      "loss": 1.2117,
      "step": 6040
    },
    {
      "epoch": 0.9206034745504419,
      "grad_norm": 0.76171875,
      "learning_rate": 3.288166850256036e-06,
      "loss": 0.9489,
      "step": 6041
    },
    {
      "epoch": 0.9207558671136848,
      "grad_norm": 0.82421875,
      "learning_rate": 3.275625743630861e-06,
      "loss": 0.8988,
      "step": 6042
    },
    {
      "epoch": 0.9209082596769278,
      "grad_norm": 0.78125,
      "learning_rate": 3.263108200454168e-06,
      "loss": 0.8802,
      "step": 6043
    },
    {
      "epoch": 0.9210606522401706,
      "grad_norm": 1.03125,
      "learning_rate": 3.2506142237754056e-06,
      "loss": 0.9277,
      "step": 6044
    },
    {
      "epoch": 0.9212130448034136,
      "grad_norm": 1.0,
      "learning_rate": 3.238143816638306e-06,
      "loss": 0.8306,
      "step": 6045
    },
    {
      "epoch": 0.9213654373666565,
      "grad_norm": 0.87890625,
      "learning_rate": 3.2256969820808393e-06,
      "loss": 0.8837,
      "step": 6046
    },
    {
      "epoch": 0.9215178299298994,
      "grad_norm": 1.0234375,
      "learning_rate": 3.2132737231352126e-06,
      "loss": 0.8422,
      "step": 6047
    },
    {
      "epoch": 0.9216702224931423,
      "grad_norm": 0.9453125,
      "learning_rate": 3.2008740428279503e-06,
      "loss": 0.8954,
      "step": 6048
    },
    {
      "epoch": 0.9218226150563853,
      "grad_norm": 1.0234375,
      "learning_rate": 3.188497944179758e-06,
      "loss": 1.1514,
      "step": 6049
    },
    {
      "epoch": 0.9219750076196281,
      "grad_norm": 0.87890625,
      "learning_rate": 3.176145430205624e-06,
      "loss": 1.0299,
      "step": 6050
    },
    {
      "epoch": 0.9221274001828711,
      "grad_norm": 0.875,
      "learning_rate": 3.1638165039148294e-06,
      "loss": 0.9946,
      "step": 6051
    },
    {
      "epoch": 0.9222797927461139,
      "grad_norm": 1.0234375,
      "learning_rate": 3.15151116831085e-06,
      "loss": 0.9488,
      "step": 6052
    },
    {
      "epoch": 0.9224321853093569,
      "grad_norm": 0.94140625,
      "learning_rate": 3.1392294263914768e-06,
      "loss": 1.0298,
      "step": 6053
    },
    {
      "epoch": 0.9225845778725998,
      "grad_norm": 0.81640625,
      "learning_rate": 3.126971281148705e-06,
      "loss": 0.8702,
      "step": 6054
    },
    {
      "epoch": 0.9227369704358427,
      "grad_norm": 0.8046875,
      "learning_rate": 3.114736735568757e-06,
      "loss": 0.914,
      "step": 6055
    },
    {
      "epoch": 0.9228893629990856,
      "grad_norm": 0.87109375,
      "learning_rate": 3.102525792632205e-06,
      "loss": 0.9567,
      "step": 6056
    },
    {
      "epoch": 0.9230417555623286,
      "grad_norm": 0.87890625,
      "learning_rate": 3.0903384553137682e-06,
      "loss": 1.0153,
      "step": 6057
    },
    {
      "epoch": 0.9231941481255714,
      "grad_norm": 1.0546875,
      "learning_rate": 3.0781747265824614e-06,
      "loss": 1.0733,
      "step": 6058
    },
    {
      "epoch": 0.9233465406888144,
      "grad_norm": 0.953125,
      "learning_rate": 3.066034609401569e-06,
      "loss": 1.1461,
      "step": 6059
    },
    {
      "epoch": 0.9234989332520573,
      "grad_norm": 0.9765625,
      "learning_rate": 3.0539181067285817e-06,
      "loss": 0.9691,
      "step": 6060
    },
    {
      "epoch": 0.9236513258153002,
      "grad_norm": 0.828125,
      "learning_rate": 3.0418252215152376e-06,
      "loss": 0.8285,
      "step": 6061
    },
    {
      "epoch": 0.9238037183785431,
      "grad_norm": 0.98046875,
      "learning_rate": 3.0297559567075585e-06,
      "loss": 0.9766,
      "step": 6062
    },
    {
      "epoch": 0.9239561109417861,
      "grad_norm": 0.94921875,
      "learning_rate": 3.0177103152457918e-06,
      "loss": 0.9364,
      "step": 6063
    },
    {
      "epoch": 0.9241085035050289,
      "grad_norm": 0.9296875,
      "learning_rate": 3.0056883000644132e-06,
      "loss": 0.9584,
      "step": 6064
    },
    {
      "epoch": 0.9242608960682719,
      "grad_norm": 0.80078125,
      "learning_rate": 2.993689914092179e-06,
      "loss": 0.9361,
      "step": 6065
    },
    {
      "epoch": 0.9244132886315147,
      "grad_norm": 1.0078125,
      "learning_rate": 2.981715160252052e-06,
      "loss": 0.9339,
      "step": 6066
    },
    {
      "epoch": 0.9245656811947577,
      "grad_norm": 0.953125,
      "learning_rate": 2.9697640414612647e-06,
      "loss": 0.9198,
      "step": 6067
    },
    {
      "epoch": 0.9247180737580006,
      "grad_norm": 1.09375,
      "learning_rate": 2.9578365606312665e-06,
      "loss": 1.0387,
      "step": 6068
    },
    {
      "epoch": 0.9248704663212435,
      "grad_norm": 0.734375,
      "learning_rate": 2.9459327206677768e-06,
      "loss": 0.917,
      "step": 6069
    },
    {
      "epoch": 0.9250228588844864,
      "grad_norm": 0.83203125,
      "learning_rate": 2.934052524470754e-06,
      "loss": 1.1856,
      "step": 6070
    },
    {
      "epoch": 0.9251752514477294,
      "grad_norm": 1.015625,
      "learning_rate": 2.922195974934361e-06,
      "loss": 0.947,
      "step": 6071
    },
    {
      "epoch": 0.9253276440109722,
      "grad_norm": 0.90234375,
      "learning_rate": 2.91036307494702e-06,
      "loss": 1.0373,
      "step": 6072
    },
    {
      "epoch": 0.9254800365742152,
      "grad_norm": 1.3046875,
      "learning_rate": 2.8985538273914258e-06,
      "loss": 0.9462,
      "step": 6073
    },
    {
      "epoch": 0.9256324291374581,
      "grad_norm": 1.2109375,
      "learning_rate": 2.886768235144455e-06,
      "loss": 1.0778,
      "step": 6074
    },
    {
      "epoch": 0.925784821700701,
      "grad_norm": 0.75390625,
      "learning_rate": 2.8750063010772544e-06,
      "loss": 0.9547,
      "step": 6075
    },
    {
      "epoch": 0.9259372142639439,
      "grad_norm": 0.8671875,
      "learning_rate": 2.8632680280552103e-06,
      "loss": 1.0973,
      "step": 6076
    },
    {
      "epoch": 0.9260896068271869,
      "grad_norm": 1.2890625,
      "learning_rate": 2.8515534189379244e-06,
      "loss": 1.1791,
      "step": 6077
    },
    {
      "epoch": 0.9262419993904297,
      "grad_norm": 1.125,
      "learning_rate": 2.839862476579236e-06,
      "loss": 1.1171,
      "step": 6078
    },
    {
      "epoch": 0.9263943919536727,
      "grad_norm": 0.91015625,
      "learning_rate": 2.828195203827244e-06,
      "loss": 0.9092,
      "step": 6079
    },
    {
      "epoch": 0.9265467845169155,
      "grad_norm": 0.90234375,
      "learning_rate": 2.8165516035242534e-06,
      "loss": 0.9336,
      "step": 6080
    },
    {
      "epoch": 0.9266991770801585,
      "grad_norm": 0.97265625,
      "learning_rate": 2.804931678506806e-06,
      "loss": 1.0194,
      "step": 6081
    },
    {
      "epoch": 0.9268515696434014,
      "grad_norm": 0.80859375,
      "learning_rate": 2.793335431605715e-06,
      "loss": 1.06,
      "step": 6082
    },
    {
      "epoch": 0.9270039622066443,
      "grad_norm": 0.6640625,
      "learning_rate": 2.7817628656459427e-06,
      "loss": 0.9039,
      "step": 6083
    },
    {
      "epoch": 0.9271563547698872,
      "grad_norm": 0.83203125,
      "learning_rate": 2.7702139834467787e-06,
      "loss": 0.9219,
      "step": 6084
    },
    {
      "epoch": 0.9273087473331302,
      "grad_norm": 0.93359375,
      "learning_rate": 2.758688787821673e-06,
      "loss": 0.9168,
      "step": 6085
    },
    {
      "epoch": 0.927461139896373,
      "grad_norm": 0.78515625,
      "learning_rate": 2.747187281578334e-06,
      "loss": 0.9637,
      "step": 6086
    },
    {
      "epoch": 0.927613532459616,
      "grad_norm": 1.2578125,
      "learning_rate": 2.735709467518699e-06,
      "loss": 1.0226,
      "step": 6087
    },
    {
      "epoch": 0.9277659250228589,
      "grad_norm": 0.98828125,
      "learning_rate": 2.72425534843892e-06,
      "loss": 0.857,
      "step": 6088
    },
    {
      "epoch": 0.9279183175861018,
      "grad_norm": 0.79296875,
      "learning_rate": 2.7128249271293872e-06,
      "loss": 1.0149,
      "step": 6089
    },
    {
      "epoch": 0.9280707101493447,
      "grad_norm": 0.9765625,
      "learning_rate": 2.7014182063747175e-06,
      "loss": 1.0097,
      "step": 6090
    },
    {
      "epoch": 0.9282231027125877,
      "grad_norm": 1.015625,
      "learning_rate": 2.6900351889537547e-06,
      "loss": 1.0165,
      "step": 6091
    },
    {
      "epoch": 0.9283754952758305,
      "grad_norm": 1.0859375,
      "learning_rate": 2.678675877639547e-06,
      "loss": 1.1875,
      "step": 6092
    },
    {
      "epoch": 0.9285278878390735,
      "grad_norm": 0.84375,
      "learning_rate": 2.667340275199426e-06,
      "loss": 0.8381,
      "step": 6093
    },
    {
      "epoch": 0.9286802804023163,
      "grad_norm": 0.78515625,
      "learning_rate": 2.6560283843948708e-06,
      "loss": 0.8658,
      "step": 6094
    },
    {
      "epoch": 0.9288326729655593,
      "grad_norm": 0.7578125,
      "learning_rate": 2.644740207981622e-06,
      "loss": 1.0206,
      "step": 6095
    },
    {
      "epoch": 0.9289850655288022,
      "grad_norm": 0.83984375,
      "learning_rate": 2.6334757487096683e-06,
      "loss": 1.0478,
      "step": 6096
    },
    {
      "epoch": 0.9291374580920451,
      "grad_norm": 1.0234375,
      "learning_rate": 2.6222350093231705e-06,
      "loss": 0.9202,
      "step": 6097
    },
    {
      "epoch": 0.929289850655288,
      "grad_norm": 1.1640625,
      "learning_rate": 2.611017992560549e-06,
      "loss": 1.3946,
      "step": 6098
    },
    {
      "epoch": 0.929442243218531,
      "grad_norm": 1.078125,
      "learning_rate": 2.5998247011544186e-06,
      "loss": 1.1992,
      "step": 6099
    },
    {
      "epoch": 0.9295946357817738,
      "grad_norm": 0.796875,
      "learning_rate": 2.5886551378316306e-06,
      "loss": 0.9174,
      "step": 6100
    },
    {
      "epoch": 0.9297470283450168,
      "grad_norm": 0.8203125,
      "learning_rate": 2.5775093053132528e-06,
      "loss": 0.8925,
      "step": 6101
    },
    {
      "epoch": 0.9298994209082597,
      "grad_norm": 1.0390625,
      "learning_rate": 2.566387206314569e-06,
      "loss": 0.8255,
      "step": 6102
    },
    {
      "epoch": 0.9300518134715026,
      "grad_norm": 0.8046875,
      "learning_rate": 2.555288843545078e-06,
      "loss": 0.8703,
      "step": 6103
    },
    {
      "epoch": 0.9302042060347455,
      "grad_norm": 1.125,
      "learning_rate": 2.5442142197085182e-06,
      "loss": 0.8903,
      "step": 6104
    },
    {
      "epoch": 0.9303565985979885,
      "grad_norm": 0.7421875,
      "learning_rate": 2.5331633375028085e-06,
      "loss": 0.9251,
      "step": 6105
    },
    {
      "epoch": 0.9305089911612313,
      "grad_norm": 0.94921875,
      "learning_rate": 2.5221361996200955e-06,
      "loss": 1.2407,
      "step": 6106
    },
    {
      "epoch": 0.9306613837244743,
      "grad_norm": 1.0,
      "learning_rate": 2.5111328087467743e-06,
      "loss": 0.8138,
      "step": 6107
    },
    {
      "epoch": 0.9308137762877171,
      "grad_norm": 1.0390625,
      "learning_rate": 2.5001531675634127e-06,
      "loss": 1.0997,
      "step": 6108
    },
    {
      "epoch": 0.9309661688509601,
      "grad_norm": 0.80078125,
      "learning_rate": 2.4891972787448036e-06,
      "loss": 0.9921,
      "step": 6109
    },
    {
      "epoch": 0.931118561414203,
      "grad_norm": 0.97265625,
      "learning_rate": 2.47826514495999e-06,
      "loss": 1.034,
      "step": 6110
    },
    {
      "epoch": 0.9312709539774459,
      "grad_norm": 0.71484375,
      "learning_rate": 2.4673567688721646e-06,
      "loss": 0.7839,
      "step": 6111
    },
    {
      "epoch": 0.9314233465406888,
      "grad_norm": 0.75,
      "learning_rate": 2.4564721531387556e-06,
      "loss": 1.0126,
      "step": 6112
    },
    {
      "epoch": 0.9315757391039318,
      "grad_norm": 0.8515625,
      "learning_rate": 2.4456113004114546e-06,
      "loss": 0.9702,
      "step": 6113
    },
    {
      "epoch": 0.9317281316671746,
      "grad_norm": 0.78515625,
      "learning_rate": 2.4347742133360996e-06,
      "loss": 0.6921,
      "step": 6114
    },
    {
      "epoch": 0.9318805242304176,
      "grad_norm": 0.859375,
      "learning_rate": 2.423960894552746e-06,
      "loss": 1.0002,
      "step": 6115
    },
    {
      "epoch": 0.9320329167936605,
      "grad_norm": 0.88671875,
      "learning_rate": 2.4131713466956974e-06,
      "loss": 0.9717,
      "step": 6116
    },
    {
      "epoch": 0.9321853093569034,
      "grad_norm": 1.1796875,
      "learning_rate": 2.402405572393418e-06,
      "loss": 0.9487,
      "step": 6117
    },
    {
      "epoch": 0.9323377019201463,
      "grad_norm": 1.2578125,
      "learning_rate": 2.3916635742686324e-06,
      "loss": 1.2771,
      "step": 6118
    },
    {
      "epoch": 0.9324900944833893,
      "grad_norm": 0.70703125,
      "learning_rate": 2.3809453549382354e-06,
      "loss": 0.8547,
      "step": 6119
    },
    {
      "epoch": 0.9326424870466321,
      "grad_norm": 0.765625,
      "learning_rate": 2.370250917013328e-06,
      "loss": 0.9745,
      "step": 6120
    },
    {
      "epoch": 0.9327948796098751,
      "grad_norm": 0.95703125,
      "learning_rate": 2.359580263099259e-06,
      "loss": 0.9663,
      "step": 6121
    },
    {
      "epoch": 0.9329472721731179,
      "grad_norm": 1.2109375,
      "learning_rate": 2.348933395795516e-06,
      "loss": 1.0025,
      "step": 6122
    },
    {
      "epoch": 0.9330996647363609,
      "grad_norm": 1.03125,
      "learning_rate": 2.3383103176958356e-06,
      "loss": 0.9193,
      "step": 6123
    },
    {
      "epoch": 0.9332520572996038,
      "grad_norm": 0.75390625,
      "learning_rate": 2.32771103138818e-06,
      "loss": 0.9325,
      "step": 6124
    },
    {
      "epoch": 0.9334044498628467,
      "grad_norm": 0.88671875,
      "learning_rate": 2.3171355394546624e-06,
      "loss": 0.9844,
      "step": 6125
    },
    {
      "epoch": 0.9335568424260896,
      "grad_norm": 1.046875,
      "learning_rate": 2.3065838444716325e-06,
      "loss": 1.0063,
      "step": 6126
    },
    {
      "epoch": 0.9337092349893326,
      "grad_norm": 0.9296875,
      "learning_rate": 2.2960559490096457e-06,
      "loss": 0.9151,
      "step": 6127
    },
    {
      "epoch": 0.9338616275525754,
      "grad_norm": 0.9609375,
      "learning_rate": 2.2855518556334275e-06,
      "loss": 1.0146,
      "step": 6128
    },
    {
      "epoch": 0.9340140201158184,
      "grad_norm": 1.2890625,
      "learning_rate": 2.2750715669019317e-06,
      "loss": 1.0561,
      "step": 6129
    },
    {
      "epoch": 0.9341664126790613,
      "grad_norm": 0.85546875,
      "learning_rate": 2.2646150853683156e-06,
      "loss": 0.7689,
      "step": 6130
    },
    {
      "epoch": 0.9343188052423042,
      "grad_norm": 1.0703125,
      "learning_rate": 2.2541824135799194e-06,
      "loss": 1.1151,
      "step": 6131
    },
    {
      "epoch": 0.9344711978055471,
      "grad_norm": 0.95703125,
      "learning_rate": 2.2437735540783213e-06,
      "loss": 0.9233,
      "step": 6132
    },
    {
      "epoch": 0.9346235903687901,
      "grad_norm": 0.8984375,
      "learning_rate": 2.233388509399226e-06,
      "loss": 0.9886,
      "step": 6133
    },
    {
      "epoch": 0.9347759829320329,
      "grad_norm": 0.98828125,
      "learning_rate": 2.2230272820725986e-06,
      "loss": 0.8654,
      "step": 6134
    },
    {
      "epoch": 0.9349283754952759,
      "grad_norm": 1.0,
      "learning_rate": 2.2126898746225865e-06,
      "loss": 0.9855,
      "step": 6135
    },
    {
      "epoch": 0.9350807680585187,
      "grad_norm": 1.0625,
      "learning_rate": 2.2023762895675317e-06,
      "loss": 0.9623,
      "step": 6136
    },
    {
      "epoch": 0.9352331606217616,
      "grad_norm": 0.7578125,
      "learning_rate": 2.1920865294199567e-06,
      "loss": 0.7747,
      "step": 6137
    },
    {
      "epoch": 0.9353855531850046,
      "grad_norm": 0.98046875,
      "learning_rate": 2.1818205966866343e-06,
      "loss": 0.9572,
      "step": 6138
    },
    {
      "epoch": 0.9355379457482474,
      "grad_norm": 1.21875,
      "learning_rate": 2.1715784938684425e-06,
      "loss": 1.1163,
      "step": 6139
    },
    {
      "epoch": 0.9356903383114904,
      "grad_norm": 0.7890625,
      "learning_rate": 2.161360223460529e-06,
      "loss": 0.9501,
      "step": 6140
    },
    {
      "epoch": 0.9358427308747334,
      "grad_norm": 1.03125,
      "learning_rate": 2.151165787952214e-06,
      "loss": 1.0649,
      "step": 6141
    },
    {
      "epoch": 0.9359951234379762,
      "grad_norm": 1.0234375,
      "learning_rate": 2.140995189827e-06,
      "loss": 0.9935,
      "step": 6142
    },
    {
      "epoch": 0.9361475160012191,
      "grad_norm": 0.9296875,
      "learning_rate": 2.1308484315626044e-06,
      "loss": 0.8385,
      "step": 6143
    },
    {
      "epoch": 0.9362999085644621,
      "grad_norm": 1.0,
      "learning_rate": 2.120725515630906e-06,
      "loss": 1.0003,
      "step": 6144
    },
    {
      "epoch": 0.936452301127705,
      "grad_norm": 0.96484375,
      "learning_rate": 2.1106264444980096e-06,
      "loss": 0.9695,
      "step": 6145
    },
    {
      "epoch": 0.9366046936909479,
      "grad_norm": 1.046875,
      "learning_rate": 2.1005512206241696e-06,
      "loss": 1.0094,
      "step": 6146
    },
    {
      "epoch": 0.9367570862541907,
      "grad_norm": 0.8125,
      "learning_rate": 2.0904998464638892e-06,
      "loss": 0.8122,
      "step": 6147
    },
    {
      "epoch": 0.9369094788174337,
      "grad_norm": 0.75,
      "learning_rate": 2.0804723244657985e-06,
      "loss": 0.9398,
      "step": 6148
    },
    {
      "epoch": 0.9370618713806766,
      "grad_norm": 0.9375,
      "learning_rate": 2.070468657072777e-06,
      "loss": 1.0821,
      "step": 6149
    },
    {
      "epoch": 0.9372142639439195,
      "grad_norm": 0.8515625,
      "learning_rate": 2.060488846721831e-06,
      "loss": 0.9918,
      "step": 6150
    },
    {
      "epoch": 0.9373666565071624,
      "grad_norm": 0.98046875,
      "learning_rate": 2.0505328958441925e-06,
      "loss": 0.9428,
      "step": 6151
    },
    {
      "epoch": 0.9375190490704054,
      "grad_norm": 1.1484375,
      "learning_rate": 2.040600806865278e-06,
      "loss": 1.1071,
      "step": 6152
    },
    {
      "epoch": 0.9376714416336482,
      "grad_norm": 0.8125,
      "learning_rate": 2.0306925822046964e-06,
      "loss": 1.0315,
      "step": 6153
    },
    {
      "epoch": 0.9378238341968912,
      "grad_norm": 0.890625,
      "learning_rate": 2.020808224276227e-06,
      "loss": 1.0532,
      "step": 6154
    },
    {
      "epoch": 0.9379762267601341,
      "grad_norm": 0.828125,
      "learning_rate": 2.0109477354878335e-06,
      "loss": 0.8992,
      "step": 6155
    },
    {
      "epoch": 0.938128619323377,
      "grad_norm": 1.2890625,
      "learning_rate": 2.001111118241694e-06,
      "loss": 1.0188,
      "step": 6156
    },
    {
      "epoch": 0.9382810118866199,
      "grad_norm": 0.97265625,
      "learning_rate": 1.991298374934114e-06,
      "loss": 0.9936,
      "step": 6157
    },
    {
      "epoch": 0.9384334044498629,
      "grad_norm": 0.953125,
      "learning_rate": 1.981509507955659e-06,
      "loss": 1.0238,
      "step": 6158
    },
    {
      "epoch": 0.9385857970131057,
      "grad_norm": 0.80859375,
      "learning_rate": 1.97174451969101e-06,
      "loss": 1.0977,
      "step": 6159
    },
    {
      "epoch": 0.9387381895763487,
      "grad_norm": 1.078125,
      "learning_rate": 1.9620034125190644e-06,
      "loss": 1.1261,
      "step": 6160
    },
    {
      "epoch": 0.9388905821395915,
      "grad_norm": 0.79296875,
      "learning_rate": 1.952286188812913e-06,
      "loss": 0.9114,
      "step": 6161
    },
    {
      "epoch": 0.9390429747028345,
      "grad_norm": 1.1875,
      "learning_rate": 1.942592850939784e-06,
      "loss": 1.2325,
      "step": 6162
    },
    {
      "epoch": 0.9391953672660774,
      "grad_norm": 0.8671875,
      "learning_rate": 1.932923401261133e-06,
      "loss": 0.8011,
      "step": 6163
    },
    {
      "epoch": 0.9393477598293203,
      "grad_norm": 0.83203125,
      "learning_rate": 1.923277842132576e-06,
      "loss": 1.0859,
      "step": 6164
    },
    {
      "epoch": 0.9395001523925632,
      "grad_norm": 1.0625,
      "learning_rate": 1.9136561759038883e-06,
      "loss": 0.9327,
      "step": 6165
    },
    {
      "epoch": 0.9396525449558062,
      "grad_norm": 0.8671875,
      "learning_rate": 1.9040584049190845e-06,
      "loss": 1.0064,
      "step": 6166
    },
    {
      "epoch": 0.939804937519049,
      "grad_norm": 1.0078125,
      "learning_rate": 1.894484531516294e-06,
      "loss": 1.0407,
      "step": 6167
    },
    {
      "epoch": 0.939957330082292,
      "grad_norm": 0.90234375,
      "learning_rate": 1.88493455802784e-06,
      "loss": 1.046,
      "step": 6168
    },
    {
      "epoch": 0.9401097226455349,
      "grad_norm": 0.96875,
      "learning_rate": 1.8754084867802613e-06,
      "loss": 0.8915,
      "step": 6169
    },
    {
      "epoch": 0.9402621152087778,
      "grad_norm": 1.2265625,
      "learning_rate": 1.865906320094235e-06,
      "loss": 1.0776,
      "step": 6170
    },
    {
      "epoch": 0.9404145077720207,
      "grad_norm": 0.8046875,
      "learning_rate": 1.8564280602846207e-06,
      "loss": 0.9659,
      "step": 6171
    },
    {
      "epoch": 0.9405669003352637,
      "grad_norm": 0.83984375,
      "learning_rate": 1.8469737096604488e-06,
      "loss": 1.0606,
      "step": 6172
    },
    {
      "epoch": 0.9407192928985065,
      "grad_norm": 1.0703125,
      "learning_rate": 1.8375432705249663e-06,
      "loss": 0.9921,
      "step": 6173
    },
    {
      "epoch": 0.9408716854617495,
      "grad_norm": 0.98828125,
      "learning_rate": 1.8281367451755238e-06,
      "loss": 1.0182,
      "step": 6174
    },
    {
      "epoch": 0.9410240780249923,
      "grad_norm": 0.8671875,
      "learning_rate": 1.818754135903722e-06,
      "loss": 0.9338,
      "step": 6175
    },
    {
      "epoch": 0.9411764705882353,
      "grad_norm": 1.0078125,
      "learning_rate": 1.8093954449952766e-06,
      "loss": 0.919,
      "step": 6176
    },
    {
      "epoch": 0.9413288631514782,
      "grad_norm": 0.734375,
      "learning_rate": 1.8000606747301197e-06,
      "loss": 0.9536,
      "step": 6177
    },
    {
      "epoch": 0.9414812557147211,
      "grad_norm": 0.77734375,
      "learning_rate": 1.7907498273823099e-06,
      "loss": 1.043,
      "step": 6178
    },
    {
      "epoch": 0.941633648277964,
      "grad_norm": 0.93359375,
      "learning_rate": 1.7814629052201214e-06,
      "loss": 0.9261,
      "step": 6179
    },
    {
      "epoch": 0.941786040841207,
      "grad_norm": 0.8671875,
      "learning_rate": 1.7721999105059784e-06,
      "loss": 1.1286,
      "step": 6180
    },
    {
      "epoch": 0.9419384334044498,
      "grad_norm": 1.046875,
      "learning_rate": 1.7629608454964753e-06,
      "loss": 1.2189,
      "step": 6181
    },
    {
      "epoch": 0.9420908259676928,
      "grad_norm": 0.921875,
      "learning_rate": 1.7537457124423895e-06,
      "loss": 0.8965,
      "step": 6182
    },
    {
      "epoch": 0.9422432185309357,
      "grad_norm": 0.86328125,
      "learning_rate": 1.7445545135886477e-06,
      "loss": 1.063,
      "step": 6183
    },
    {
      "epoch": 0.9423956110941786,
      "grad_norm": 1.140625,
      "learning_rate": 1.7353872511743807e-06,
      "loss": 0.7525,
      "step": 6184
    },
    {
      "epoch": 0.9425480036574215,
      "grad_norm": 0.75390625,
      "learning_rate": 1.7262439274328246e-06,
      "loss": 0.8841,
      "step": 6185
    },
    {
      "epoch": 0.9427003962206645,
      "grad_norm": 0.875,
      "learning_rate": 1.7171245445914642e-06,
      "loss": 1.0708,
      "step": 6186
    },
    {
      "epoch": 0.9428527887839073,
      "grad_norm": 0.9609375,
      "learning_rate": 1.7080291048718998e-06,
      "loss": 0.9986,
      "step": 6187
    },
    {
      "epoch": 0.9430051813471503,
      "grad_norm": 0.87890625,
      "learning_rate": 1.698957610489915e-06,
      "loss": 1.1114,
      "step": 6188
    },
    {
      "epoch": 0.9431575739103931,
      "grad_norm": 1.0703125,
      "learning_rate": 1.6899100636554422e-06,
      "loss": 0.9023,
      "step": 6189
    },
    {
      "epoch": 0.9433099664736361,
      "grad_norm": 0.8984375,
      "learning_rate": 1.6808864665726065e-06,
      "loss": 0.8472,
      "step": 6190
    },
    {
      "epoch": 0.943462359036879,
      "grad_norm": 0.96484375,
      "learning_rate": 1.6718868214396721e-06,
      "loss": 1.008,
      "step": 6191
    },
    {
      "epoch": 0.9436147516001219,
      "grad_norm": 0.984375,
      "learning_rate": 1.6629111304491074e-06,
      "loss": 0.9327,
      "step": 6192
    },
    {
      "epoch": 0.9437671441633648,
      "grad_norm": 1.125,
      "learning_rate": 1.6539593957874966e-06,
      "loss": 1.0641,
      "step": 6193
    },
    {
      "epoch": 0.9439195367266078,
      "grad_norm": 0.8671875,
      "learning_rate": 1.6450316196356175e-06,
      "loss": 0.9438,
      "step": 6194
    },
    {
      "epoch": 0.9440719292898506,
      "grad_norm": 0.9140625,
      "learning_rate": 1.636127804168408e-06,
      "loss": 0.9951,
      "step": 6195
    },
    {
      "epoch": 0.9442243218530936,
      "grad_norm": 0.984375,
      "learning_rate": 1.6272479515549555e-06,
      "loss": 0.9384,
      "step": 6196
    },
    {
      "epoch": 0.9443767144163365,
      "grad_norm": 0.8046875,
      "learning_rate": 1.6183920639585403e-06,
      "loss": 0.9033,
      "step": 6197
    },
    {
      "epoch": 0.9445291069795794,
      "grad_norm": 0.9765625,
      "learning_rate": 1.6095601435365593e-06,
      "loss": 0.9357,
      "step": 6198
    },
    {
      "epoch": 0.9446814995428223,
      "grad_norm": 0.921875,
      "learning_rate": 1.6007521924406132e-06,
      "loss": 0.9354,
      "step": 6199
    },
    {
      "epoch": 0.9448338921060653,
      "grad_norm": 1.265625,
      "learning_rate": 1.5919682128164416e-06,
      "loss": 0.9102,
      "step": 6200
    },
    {
      "epoch": 0.9449862846693081,
      "grad_norm": 1.1796875,
      "learning_rate": 1.5832082068039544e-06,
      "loss": 1.0308,
      "step": 6201
    },
    {
      "epoch": 0.9451386772325511,
      "grad_norm": 0.80859375,
      "learning_rate": 1.5744721765371896e-06,
      "loss": 0.8861,
      "step": 6202
    },
    {
      "epoch": 0.9452910697957939,
      "grad_norm": 0.8671875,
      "learning_rate": 1.5657601241443997e-06,
      "loss": 0.9665,
      "step": 6203
    },
    {
      "epoch": 0.9454434623590369,
      "grad_norm": 0.8125,
      "learning_rate": 1.557072051747943e-06,
      "loss": 0.975,
      "step": 6204
    },
    {
      "epoch": 0.9455958549222798,
      "grad_norm": 0.7890625,
      "learning_rate": 1.548407961464382e-06,
      "loss": 1.0627,
      "step": 6205
    },
    {
      "epoch": 0.9457482474855227,
      "grad_norm": 0.953125,
      "learning_rate": 1.5397678554043947e-06,
      "loss": 1.0927,
      "step": 6206
    },
    {
      "epoch": 0.9459006400487656,
      "grad_norm": 1.0078125,
      "learning_rate": 1.5311517356728423e-06,
      "loss": 1.1234,
      "step": 6207
    },
    {
      "epoch": 0.9460530326120086,
      "grad_norm": 1.0078125,
      "learning_rate": 1.522559604368734e-06,
      "loss": 0.9983,
      "step": 6208
    },
    {
      "epoch": 0.9462054251752514,
      "grad_norm": 0.953125,
      "learning_rate": 1.5139914635852403e-06,
      "loss": 0.9159,
      "step": 6209
    },
    {
      "epoch": 0.9463578177384944,
      "grad_norm": 0.8046875,
      "learning_rate": 1.5054473154096804e-06,
      "loss": 0.9425,
      "step": 6210
    },
    {
      "epoch": 0.9465102103017373,
      "grad_norm": 0.87890625,
      "learning_rate": 1.4969271619235225e-06,
      "loss": 0.9623,
      "step": 6211
    },
    {
      "epoch": 0.9466626028649802,
      "grad_norm": 0.97265625,
      "learning_rate": 1.4884310052024175e-06,
      "loss": 0.9864,
      "step": 6212
    },
    {
      "epoch": 0.9468149954282231,
      "grad_norm": 1.2265625,
      "learning_rate": 1.4799588473161319e-06,
      "loss": 0.9914,
      "step": 6213
    },
    {
      "epoch": 0.9469673879914661,
      "grad_norm": 1.0078125,
      "learning_rate": 1.4715106903286257e-06,
      "loss": 0.9929,
      "step": 6214
    },
    {
      "epoch": 0.9471197805547089,
      "grad_norm": 1.015625,
      "learning_rate": 1.4630865362979863e-06,
      "loss": 0.9173,
      "step": 6215
    },
    {
      "epoch": 0.9472721731179519,
      "grad_norm": 0.9453125,
      "learning_rate": 1.4546863872764382e-06,
      "loss": 1.2422,
      "step": 6216
    },
    {
      "epoch": 0.9474245656811947,
      "grad_norm": 1.09375,
      "learning_rate": 1.4463102453104005e-06,
      "loss": 1.1722,
      "step": 6217
    },
    {
      "epoch": 0.9475769582444377,
      "grad_norm": 1.0390625,
      "learning_rate": 1.4379581124404185e-06,
      "loss": 0.9756,
      "step": 6218
    },
    {
      "epoch": 0.9477293508076806,
      "grad_norm": 0.88671875,
      "learning_rate": 1.4296299907011756e-06,
      "loss": 0.985,
      "step": 6219
    },
    {
      "epoch": 0.9478817433709235,
      "grad_norm": 0.6875,
      "learning_rate": 1.4213258821215381e-06,
      "loss": 0.8371,
      "step": 6220
    },
    {
      "epoch": 0.9480341359341664,
      "grad_norm": 0.81640625,
      "learning_rate": 1.4130457887245096e-06,
      "loss": 0.9365,
      "step": 6221
    },
    {
      "epoch": 0.9481865284974094,
      "grad_norm": 1.1015625,
      "learning_rate": 1.4047897125272325e-06,
      "loss": 1.061,
      "step": 6222
    },
    {
      "epoch": 0.9483389210606522,
      "grad_norm": 1.1171875,
      "learning_rate": 1.3965576555410088e-06,
      "loss": 0.9016,
      "step": 6223
    },
    {
      "epoch": 0.9484913136238952,
      "grad_norm": 1.046875,
      "learning_rate": 1.3883496197712786e-06,
      "loss": 0.9736,
      "step": 6224
    },
    {
      "epoch": 0.9486437061871381,
      "grad_norm": 1.609375,
      "learning_rate": 1.3801656072176316e-06,
      "loss": 1.1594,
      "step": 6225
    },
    {
      "epoch": 0.948796098750381,
      "grad_norm": 0.83203125,
      "learning_rate": 1.3720056198738395e-06,
      "loss": 0.8686,
      "step": 6226
    },
    {
      "epoch": 0.9489484913136239,
      "grad_norm": 0.97265625,
      "learning_rate": 1.3638696597277679e-06,
      "loss": 0.9163,
      "step": 6227
    },
    {
      "epoch": 0.9491008838768669,
      "grad_norm": 1.0703125,
      "learning_rate": 1.3557577287614532e-06,
      "loss": 0.879,
      "step": 6228
    },
    {
      "epoch": 0.9492532764401097,
      "grad_norm": 0.97265625,
      "learning_rate": 1.3476698289511037e-06,
      "loss": 1.1221,
      "step": 6229
    },
    {
      "epoch": 0.9494056690033527,
      "grad_norm": 0.921875,
      "learning_rate": 1.3396059622670099e-06,
      "loss": 1.0362,
      "step": 6230
    },
    {
      "epoch": 0.9495580615665955,
      "grad_norm": 0.8671875,
      "learning_rate": 1.3315661306736672e-06,
      "loss": 0.8494,
      "step": 6231
    },
    {
      "epoch": 0.9497104541298385,
      "grad_norm": 0.96875,
      "learning_rate": 1.3235503361297087e-06,
      "loss": 0.8853,
      "step": 6232
    },
    {
      "epoch": 0.9498628466930814,
      "grad_norm": 0.77734375,
      "learning_rate": 1.3155585805878723e-06,
      "loss": 0.7862,
      "step": 6233
    },
    {
      "epoch": 0.9500152392563243,
      "grad_norm": 1.0,
      "learning_rate": 1.3075908659950786e-06,
      "loss": 1.1376,
      "step": 6234
    },
    {
      "epoch": 0.9501676318195672,
      "grad_norm": 1.015625,
      "learning_rate": 1.299647194292364e-06,
      "loss": 1.0371,
      "step": 6235
    },
    {
      "epoch": 0.9503200243828102,
      "grad_norm": 0.8515625,
      "learning_rate": 1.2917275674149244e-06,
      "loss": 1.0214,
      "step": 6236
    },
    {
      "epoch": 0.950472416946053,
      "grad_norm": 1.03125,
      "learning_rate": 1.2838319872921167e-06,
      "loss": 0.9171,
      "step": 6237
    },
    {
      "epoch": 0.950624809509296,
      "grad_norm": 1.3046875,
      "learning_rate": 1.2759604558473914e-06,
      "loss": 1.0867,
      "step": 6238
    },
    {
      "epoch": 0.9507772020725389,
      "grad_norm": 0.8671875,
      "learning_rate": 1.268112974998381e-06,
      "loss": 0.9397,
      "step": 6239
    },
    {
      "epoch": 0.9509295946357817,
      "grad_norm": 1.1171875,
      "learning_rate": 1.2602895466568343e-06,
      "loss": 1.2939,
      "step": 6240
    },
    {
      "epoch": 0.9510819871990247,
      "grad_norm": 0.74609375,
      "learning_rate": 1.252490172728671e-06,
      "loss": 1.1359,
      "step": 6241
    },
    {
      "epoch": 0.9512343797622677,
      "grad_norm": 0.9296875,
      "learning_rate": 1.2447148551138932e-06,
      "loss": 1.0614,
      "step": 6242
    },
    {
      "epoch": 0.9513867723255105,
      "grad_norm": 0.859375,
      "learning_rate": 1.2369635957067193e-06,
      "loss": 0.9324,
      "step": 6243
    },
    {
      "epoch": 0.9515391648887535,
      "grad_norm": 0.80859375,
      "learning_rate": 1.2292363963954502e-06,
      "loss": 1.0183,
      "step": 6244
    },
    {
      "epoch": 0.9516915574519963,
      "grad_norm": 0.8671875,
      "learning_rate": 1.221533259062546e-06,
      "loss": 0.9366,
      "step": 6245
    },
    {
      "epoch": 0.9518439500152392,
      "grad_norm": 0.859375,
      "learning_rate": 1.2138541855845953e-06,
      "loss": 0.9236,
      "step": 6246
    },
    {
      "epoch": 0.9519963425784822,
      "grad_norm": 1.046875,
      "learning_rate": 1.2061991778323345e-06,
      "loss": 1.1572,
      "step": 6247
    },
    {
      "epoch": 0.952148735141725,
      "grad_norm": 1.0859375,
      "learning_rate": 1.1985682376706387e-06,
      "loss": 1.045,
      "step": 6248
    },
    {
      "epoch": 0.952301127704968,
      "grad_norm": 0.8359375,
      "learning_rate": 1.1909613669585096e-06,
      "loss": 1.1766,
      "step": 6249
    },
    {
      "epoch": 0.952453520268211,
      "grad_norm": 1.03125,
      "learning_rate": 1.1833785675490872e-06,
      "loss": 1.0189,
      "step": 6250
    },
    {
      "epoch": 0.9526059128314538,
      "grad_norm": 0.91015625,
      "learning_rate": 1.1758198412896604e-06,
      "loss": 1.0741,
      "step": 6251
    },
    {
      "epoch": 0.9527583053946967,
      "grad_norm": 1.3515625,
      "learning_rate": 1.168285190021634e-06,
      "loss": 1.0963,
      "step": 6252
    },
    {
      "epoch": 0.9529106979579397,
      "grad_norm": 0.875,
      "learning_rate": 1.160774615580551e-06,
      "loss": 0.8777,
      "step": 6253
    },
    {
      "epoch": 0.9530630905211825,
      "grad_norm": 0.7890625,
      "learning_rate": 1.1532881197961255e-06,
      "loss": 0.9732,
      "step": 6254
    },
    {
      "epoch": 0.9532154830844255,
      "grad_norm": 1.09375,
      "learning_rate": 1.1458257044921428e-06,
      "loss": 1.0441,
      "step": 6255
    },
    {
      "epoch": 0.9533678756476683,
      "grad_norm": 1.015625,
      "learning_rate": 1.138387371486571e-06,
      "loss": 1.1135,
      "step": 6256
    },
    {
      "epoch": 0.9535202682109113,
      "grad_norm": 0.91796875,
      "learning_rate": 1.1309731225914943e-06,
      "loss": 0.8866,
      "step": 6257
    },
    {
      "epoch": 0.9536726607741542,
      "grad_norm": 0.85546875,
      "learning_rate": 1.1235829596131232e-06,
      "loss": 1.022,
      "step": 6258
    },
    {
      "epoch": 0.9538250533373971,
      "grad_norm": 0.78125,
      "learning_rate": 1.116216884351806e-06,
      "loss": 1.0002,
      "step": 6259
    },
    {
      "epoch": 0.95397744590064,
      "grad_norm": 0.8828125,
      "learning_rate": 1.1088748986020415e-06,
      "loss": 0.9503,
      "step": 6260
    },
    {
      "epoch": 0.954129838463883,
      "grad_norm": 0.96484375,
      "learning_rate": 1.1015570041524315e-06,
      "loss": 0.8135,
      "step": 6261
    },
    {
      "epoch": 0.9542822310271258,
      "grad_norm": 1.0546875,
      "learning_rate": 1.0942632027857168e-06,
      "loss": 1.067,
      "step": 6262
    },
    {
      "epoch": 0.9544346235903688,
      "grad_norm": 0.828125,
      "learning_rate": 1.0869934962787764e-06,
      "loss": 0.7696,
      "step": 6263
    },
    {
      "epoch": 0.9545870161536117,
      "grad_norm": 0.7890625,
      "learning_rate": 1.0797478864026046e-06,
      "loss": 0.9284,
      "step": 6264
    },
    {
      "epoch": 0.9547394087168546,
      "grad_norm": 0.75,
      "learning_rate": 1.0725263749223558e-06,
      "loss": 0.8584,
      "step": 6265
    },
    {
      "epoch": 0.9548918012800975,
      "grad_norm": 0.8359375,
      "learning_rate": 1.0653289635972896e-06,
      "loss": 0.891,
      "step": 6266
    },
    {
      "epoch": 0.9550441938433405,
      "grad_norm": 0.88671875,
      "learning_rate": 1.0581556541807924e-06,
      "loss": 0.8711,
      "step": 6267
    },
    {
      "epoch": 0.9551965864065833,
      "grad_norm": 1.109375,
      "learning_rate": 1.0510064484203885e-06,
      "loss": 1.0245,
      "step": 6268
    },
    {
      "epoch": 0.9553489789698263,
      "grad_norm": 0.83984375,
      "learning_rate": 1.043881348057718e-06,
      "loss": 0.9395,
      "step": 6269
    },
    {
      "epoch": 0.9555013715330691,
      "grad_norm": 1.40625,
      "learning_rate": 1.0367803548285704e-06,
      "loss": 1.044,
      "step": 6270
    },
    {
      "epoch": 0.9556537640963121,
      "grad_norm": 0.90625,
      "learning_rate": 1.0297034704628395e-06,
      "loss": 1.1333,
      "step": 6271
    },
    {
      "epoch": 0.955806156659555,
      "grad_norm": 0.9609375,
      "learning_rate": 1.0226506966845683e-06,
      "loss": 0.8829,
      "step": 6272
    },
    {
      "epoch": 0.9559585492227979,
      "grad_norm": 0.984375,
      "learning_rate": 1.015622035211905e-06,
      "loss": 0.9424,
      "step": 6273
    },
    {
      "epoch": 0.9561109417860408,
      "grad_norm": 1.1015625,
      "learning_rate": 1.0086174877571464e-06,
      "loss": 0.8975,
      "step": 6274
    },
    {
      "epoch": 0.9562633343492838,
      "grad_norm": 1.1171875,
      "learning_rate": 1.0016370560266718e-06,
      "loss": 0.7636,
      "step": 6275
    },
    {
      "epoch": 0.9564157269125266,
      "grad_norm": 1.15625,
      "learning_rate": 9.946807417210435e-07,
      "loss": 0.887,
      "step": 6276
    },
    {
      "epoch": 0.9565681194757696,
      "grad_norm": 1.2890625,
      "learning_rate": 9.877485465349058e-07,
      "loss": 1.0337,
      "step": 6277
    },
    {
      "epoch": 0.9567205120390125,
      "grad_norm": 1.0703125,
      "learning_rate": 9.808404721570408e-07,
      "loss": 0.9422,
      "step": 6278
    },
    {
      "epoch": 0.9568729046022554,
      "grad_norm": 0.90625,
      "learning_rate": 9.739565202703693e-07,
      "loss": 0.9438,
      "step": 6279
    },
    {
      "epoch": 0.9570252971654983,
      "grad_norm": 1.078125,
      "learning_rate": 9.670966925518944e-07,
      "loss": 0.9827,
      "step": 6280
    },
    {
      "epoch": 0.9571776897287413,
      "grad_norm": 0.921875,
      "learning_rate": 9.6026099067279e-07,
      "loss": 1.0238,
      "step": 6281
    },
    {
      "epoch": 0.9573300822919841,
      "grad_norm": 0.84765625,
      "learning_rate": 9.534494162983354e-07,
      "loss": 1.0594,
      "step": 6282
    },
    {
      "epoch": 0.9574824748552271,
      "grad_norm": 0.9453125,
      "learning_rate": 9.466619710879033e-07,
      "loss": 1.1814,
      "step": 6283
    },
    {
      "epoch": 0.9576348674184699,
      "grad_norm": 0.76171875,
      "learning_rate": 9.398986566950374e-07,
      "loss": 0.9298,
      "step": 6284
    },
    {
      "epoch": 0.9577872599817129,
      "grad_norm": 0.8515625,
      "learning_rate": 9.331594747673644e-07,
      "loss": 1.03,
      "step": 6285
    },
    {
      "epoch": 0.9579396525449558,
      "grad_norm": 0.765625,
      "learning_rate": 9.264444269466598e-07,
      "loss": 1.0505,
      "step": 6286
    },
    {
      "epoch": 0.9580920451081987,
      "grad_norm": 1.1796875,
      "learning_rate": 9.197535148687819e-07,
      "loss": 1.1038,
      "step": 6287
    },
    {
      "epoch": 0.9582444376714416,
      "grad_norm": 0.90625,
      "learning_rate": 9.130867401637488e-07,
      "loss": 0.7287,
      "step": 6288
    },
    {
      "epoch": 0.9583968302346846,
      "grad_norm": 1.1953125,
      "learning_rate": 9.064441044556949e-07,
      "loss": 0.8949,
      "step": 6289
    },
    {
      "epoch": 0.9585492227979274,
      "grad_norm": 0.90234375,
      "learning_rate": 8.998256093628365e-07,
      "loss": 0.9521,
      "step": 6290
    },
    {
      "epoch": 0.9587016153611704,
      "grad_norm": 0.9140625,
      "learning_rate": 8.932312564975509e-07,
      "loss": 1.0036,
      "step": 6291
    },
    {
      "epoch": 0.9588540079244133,
      "grad_norm": 0.94921875,
      "learning_rate": 8.866610474663084e-07,
      "loss": 1.0293,
      "step": 6292
    },
    {
      "epoch": 0.9590064004876562,
      "grad_norm": 0.91015625,
      "learning_rate": 8.801149838697175e-07,
      "loss": 1.1275,
      "step": 6293
    },
    {
      "epoch": 0.9591587930508991,
      "grad_norm": 0.828125,
      "learning_rate": 8.735930673024806e-07,
      "loss": 0.9807,
      "step": 6294
    },
    {
      "epoch": 0.9593111856141421,
      "grad_norm": 0.8828125,
      "learning_rate": 8.670952993534375e-07,
      "loss": 1.0061,
      "step": 6295
    },
    {
      "epoch": 0.9594635781773849,
      "grad_norm": 0.890625,
      "learning_rate": 8.606216816055334e-07,
      "loss": 0.8939,
      "step": 6296
    },
    {
      "epoch": 0.9596159707406279,
      "grad_norm": 0.8125,
      "learning_rate": 8.5417221563584e-07,
      "loss": 0.9657,
      "step": 6297
    },
    {
      "epoch": 0.9597683633038707,
      "grad_norm": 1.0859375,
      "learning_rate": 8.477469030155338e-07,
      "loss": 1.0382,
      "step": 6298
    },
    {
      "epoch": 0.9599207558671137,
      "grad_norm": 0.953125,
      "learning_rate": 8.413457453099294e-07,
      "loss": 1.044,
      "step": 6299
    },
    {
      "epoch": 0.9600731484303566,
      "grad_norm": 1.015625,
      "learning_rate": 8.349687440784238e-07,
      "loss": 1.0829,
      "step": 6300
    },
    {
      "epoch": 0.9602255409935995,
      "grad_norm": 0.921875,
      "learning_rate": 8.286159008745408e-07,
      "loss": 0.9935,
      "step": 6301
    },
    {
      "epoch": 0.9603779335568424,
      "grad_norm": 0.8046875,
      "learning_rate": 8.222872172459428e-07,
      "loss": 0.9339,
      "step": 6302
    },
    {
      "epoch": 0.9605303261200854,
      "grad_norm": 0.9765625,
      "learning_rate": 8.159826947343852e-07,
      "loss": 1.0592,
      "step": 6303
    },
    {
      "epoch": 0.9606827186833282,
      "grad_norm": 1.2109375,
      "learning_rate": 8.097023348757283e-07,
      "loss": 0.9367,
      "step": 6304
    },
    {
      "epoch": 0.9608351112465712,
      "grad_norm": 0.99609375,
      "learning_rate": 8.034461391999593e-07,
      "loss": 1.1192,
      "step": 6305
    },
    {
      "epoch": 0.9609875038098141,
      "grad_norm": 1.0859375,
      "learning_rate": 7.972141092311925e-07,
      "loss": 1.1303,
      "step": 6306
    },
    {
      "epoch": 0.961139896373057,
      "grad_norm": 1.21875,
      "learning_rate": 7.910062464876244e-07,
      "loss": 1.0204,
      "step": 6307
    },
    {
      "epoch": 0.9612922889362999,
      "grad_norm": 1.09375,
      "learning_rate": 7.8482255248159e-07,
      "loss": 1.0435,
      "step": 6308
    },
    {
      "epoch": 0.9614446814995429,
      "grad_norm": 0.81640625,
      "learning_rate": 7.786630287195284e-07,
      "loss": 0.8421,
      "step": 6309
    },
    {
      "epoch": 0.9615970740627857,
      "grad_norm": 1.015625,
      "learning_rate": 7.725276767019729e-07,
      "loss": 1.1196,
      "step": 6310
    },
    {
      "epoch": 0.9617494666260287,
      "grad_norm": 1.0625,
      "learning_rate": 7.664164979235944e-07,
      "loss": 1.0482,
      "step": 6311
    },
    {
      "epoch": 0.9619018591892715,
      "grad_norm": 1.140625,
      "learning_rate": 7.603294938731576e-07,
      "loss": 0.953,
      "step": 6312
    },
    {
      "epoch": 0.9620542517525145,
      "grad_norm": 0.95703125,
      "learning_rate": 7.54266666033554e-07,
      "loss": 0.9024,
      "step": 6313
    },
    {
      "epoch": 0.9622066443157574,
      "grad_norm": 1.2109375,
      "learning_rate": 7.482280158817689e-07,
      "loss": 1.1048,
      "step": 6314
    },
    {
      "epoch": 0.9623590368790003,
      "grad_norm": 1.109375,
      "learning_rate": 7.422135448889033e-07,
      "loss": 1.0463,
      "step": 6315
    },
    {
      "epoch": 0.9625114294422432,
      "grad_norm": 1.0546875,
      "learning_rate": 7.36223254520163e-07,
      "loss": 1.1316,
      "step": 6316
    },
    {
      "epoch": 0.9626638220054862,
      "grad_norm": 0.875,
      "learning_rate": 7.302571462348918e-07,
      "loss": 0.9038,
      "step": 6317
    },
    {
      "epoch": 0.962816214568729,
      "grad_norm": 0.8203125,
      "learning_rate": 7.243152214865045e-07,
      "loss": 1.0069,
      "step": 6318
    },
    {
      "epoch": 0.962968607131972,
      "grad_norm": 0.98828125,
      "learning_rate": 7.183974817225325e-07,
      "loss": 0.949,
      "step": 6319
    },
    {
      "epoch": 0.9631209996952149,
      "grad_norm": 1.296875,
      "learning_rate": 7.125039283846335e-07,
      "loss": 1.0063,
      "step": 6320
    },
    {
      "epoch": 0.9632733922584578,
      "grad_norm": 1.1328125,
      "learning_rate": 7.06634562908548e-07,
      "loss": 1.0626,
      "step": 6321
    },
    {
      "epoch": 0.9634257848217007,
      "grad_norm": 0.9921875,
      "learning_rate": 7.007893867241544e-07,
      "loss": 1.0965,
      "step": 6322
    },
    {
      "epoch": 0.9635781773849437,
      "grad_norm": 0.88671875,
      "learning_rate": 6.949684012554136e-07,
      "loss": 0.9595,
      "step": 6323
    },
    {
      "epoch": 0.9637305699481865,
      "grad_norm": 0.9375,
      "learning_rate": 6.891716079204024e-07,
      "loss": 0.8354,
      "step": 6324
    },
    {
      "epoch": 0.9638829625114295,
      "grad_norm": 0.86328125,
      "learning_rate": 6.833990081313024e-07,
      "loss": 0.8738,
      "step": 6325
    },
    {
      "epoch": 0.9640353550746723,
      "grad_norm": 0.98046875,
      "learning_rate": 6.776506032943997e-07,
      "loss": 1.1379,
      "step": 6326
    },
    {
      "epoch": 0.9641877476379153,
      "grad_norm": 0.828125,
      "learning_rate": 6.719263948100963e-07,
      "loss": 0.9656,
      "step": 6327
    },
    {
      "epoch": 0.9643401402011582,
      "grad_norm": 0.640625,
      "learning_rate": 6.662263840728878e-07,
      "loss": 0.866,
      "step": 6328
    },
    {
      "epoch": 0.9644925327644011,
      "grad_norm": 1.1328125,
      "learning_rate": 6.605505724713745e-07,
      "loss": 1.1135,
      "step": 6329
    },
    {
      "epoch": 0.964644925327644,
      "grad_norm": 0.74609375,
      "learning_rate": 6.548989613882728e-07,
      "loss": 0.8995,
      "step": 6330
    },
    {
      "epoch": 0.964797317890887,
      "grad_norm": 0.98046875,
      "learning_rate": 6.492715522003922e-07,
      "loss": 1.0109,
      "step": 6331
    },
    {
      "epoch": 0.9649497104541298,
      "grad_norm": 0.82421875,
      "learning_rate": 6.436683462786475e-07,
      "loss": 1.0825,
      "step": 6332
    },
    {
      "epoch": 0.9651021030173728,
      "grad_norm": 0.83203125,
      "learning_rate": 6.380893449880687e-07,
      "loss": 0.9499,
      "step": 6333
    },
    {
      "epoch": 0.9652544955806157,
      "grad_norm": 0.95703125,
      "learning_rate": 6.325345496877688e-07,
      "loss": 0.9918,
      "step": 6334
    },
    {
      "epoch": 0.9654068881438586,
      "grad_norm": 0.90234375,
      "learning_rate": 6.270039617309875e-07,
      "loss": 1.0342,
      "step": 6335
    },
    {
      "epoch": 0.9655592807071015,
      "grad_norm": 0.78125,
      "learning_rate": 6.21497582465047e-07,
      "loss": 0.9924,
      "step": 6336
    },
    {
      "epoch": 0.9657116732703445,
      "grad_norm": 0.76171875,
      "learning_rate": 6.160154132313856e-07,
      "loss": 1.0138,
      "step": 6337
    },
    {
      "epoch": 0.9658640658335873,
      "grad_norm": 0.640625,
      "learning_rate": 6.105574553655347e-07,
      "loss": 0.8361,
      "step": 6338
    },
    {
      "epoch": 0.9660164583968303,
      "grad_norm": 1.0234375,
      "learning_rate": 6.051237101971419e-07,
      "loss": 1.0235,
      "step": 6339
    },
    {
      "epoch": 0.9661688509600731,
      "grad_norm": 0.9765625,
      "learning_rate": 5.997141790499261e-07,
      "loss": 1.064,
      "step": 6340
    },
    {
      "epoch": 0.966321243523316,
      "grad_norm": 1.15625,
      "learning_rate": 5.943288632417443e-07,
      "loss": 0.9282,
      "step": 6341
    },
    {
      "epoch": 0.966473636086559,
      "grad_norm": 1.2109375,
      "learning_rate": 5.88967764084536e-07,
      "loss": 0.9688,
      "step": 6342
    },
    {
      "epoch": 0.9666260286498018,
      "grad_norm": 1.0234375,
      "learning_rate": 5.836308828843228e-07,
      "loss": 0.9661,
      "step": 6343
    },
    {
      "epoch": 0.9667784212130448,
      "grad_norm": 0.9453125,
      "learning_rate": 5.783182209412763e-07,
      "loss": 0.9972,
      "step": 6344
    },
    {
      "epoch": 0.9669308137762878,
      "grad_norm": 1.203125,
      "learning_rate": 5.730297795496054e-07,
      "loss": 1.0853,
      "step": 6345
    },
    {
      "epoch": 0.9670832063395306,
      "grad_norm": 0.83203125,
      "learning_rate": 5.677655599976794e-07,
      "loss": 0.9646,
      "step": 6346
    },
    {
      "epoch": 0.9672355989027736,
      "grad_norm": 0.97265625,
      "learning_rate": 5.625255635679172e-07,
      "loss": 0.9839,
      "step": 6347
    },
    {
      "epoch": 0.9673879914660165,
      "grad_norm": 0.9609375,
      "learning_rate": 5.573097915368641e-07,
      "loss": 1.0615,
      "step": 6348
    },
    {
      "epoch": 0.9675403840292593,
      "grad_norm": 1.1328125,
      "learning_rate": 5.521182451751594e-07,
      "loss": 1.1833,
      "step": 6349
    },
    {
      "epoch": 0.9676927765925023,
      "grad_norm": 0.80859375,
      "learning_rate": 5.469509257475358e-07,
      "loss": 1.0011,
      "step": 6350
    },
    {
      "epoch": 0.9678451691557451,
      "grad_norm": 1.078125,
      "learning_rate": 5.418078345128308e-07,
      "loss": 0.8039,
      "step": 6351
    },
    {
      "epoch": 0.9679975617189881,
      "grad_norm": 0.80859375,
      "learning_rate": 5.366889727239644e-07,
      "loss": 1.1363,
      "step": 6352
    },
    {
      "epoch": 0.968149954282231,
      "grad_norm": 1.1328125,
      "learning_rate": 5.315943416279834e-07,
      "loss": 0.83,
      "step": 6353
    },
    {
      "epoch": 0.9683023468454739,
      "grad_norm": 0.8828125,
      "learning_rate": 5.265239424659952e-07,
      "loss": 0.9341,
      "step": 6354
    },
    {
      "epoch": 0.9684547394087168,
      "grad_norm": 1.0234375,
      "learning_rate": 5.214777764732226e-07,
      "loss": 1.0035,
      "step": 6355
    },
    {
      "epoch": 0.9686071319719598,
      "grad_norm": 1.0390625,
      "learning_rate": 5.164558448790047e-07,
      "loss": 0.8886,
      "step": 6356
    },
    {
      "epoch": 0.9687595245352026,
      "grad_norm": 0.84375,
      "learning_rate": 5.114581489067405e-07,
      "loss": 1.0689,
      "step": 6357
    },
    {
      "epoch": 0.9689119170984456,
      "grad_norm": 1.015625,
      "learning_rate": 5.064846897739339e-07,
      "loss": 1.0169,
      "step": 6358
    },
    {
      "epoch": 0.9690643096616885,
      "grad_norm": 0.90625,
      "learning_rate": 5.015354686922046e-07,
      "loss": 0.9626,
      "step": 6359
    },
    {
      "epoch": 0.9692167022249314,
      "grad_norm": 1.15625,
      "learning_rate": 4.966104868672439e-07,
      "loss": 0.9254,
      "step": 6360
    },
    {
      "epoch": 0.9693690947881743,
      "grad_norm": 0.921875,
      "learning_rate": 4.917097454988584e-07,
      "loss": 0.9332,
      "step": 6361
    },
    {
      "epoch": 0.9695214873514173,
      "grad_norm": 1.2265625,
      "learning_rate": 4.868332457809377e-07,
      "loss": 0.9176,
      "step": 6362
    },
    {
      "epoch": 0.9696738799146601,
      "grad_norm": 0.9375,
      "learning_rate": 4.819809889014537e-07,
      "loss": 1.0463,
      "step": 6363
    },
    {
      "epoch": 0.9698262724779031,
      "grad_norm": 1.1796875,
      "learning_rate": 4.771529760425053e-07,
      "loss": 1.0204,
      "step": 6364
    },
    {
      "epoch": 0.9699786650411459,
      "grad_norm": 0.671875,
      "learning_rate": 4.723492083802517e-07,
      "loss": 0.8352,
      "step": 6365
    },
    {
      "epoch": 0.9701310576043889,
      "grad_norm": 0.93359375,
      "learning_rate": 4.6756968708495664e-07,
      "loss": 1.0547,
      "step": 6366
    },
    {
      "epoch": 0.9702834501676318,
      "grad_norm": 1.2890625,
      "learning_rate": 4.6281441332099994e-07,
      "loss": 1.042,
      "step": 6367
    },
    {
      "epoch": 0.9704358427308747,
      "grad_norm": 1.0625,
      "learning_rate": 4.5808338824682163e-07,
      "loss": 1.1192,
      "step": 6368
    },
    {
      "epoch": 0.9705882352941176,
      "grad_norm": 1.4375,
      "learning_rate": 4.533766130149664e-07,
      "loss": 1.0277,
      "step": 6369
    },
    {
      "epoch": 0.9707406278573606,
      "grad_norm": 0.8515625,
      "learning_rate": 4.486940887720725e-07,
      "loss": 0.9464,
      "step": 6370
    },
    {
      "epoch": 0.9708930204206034,
      "grad_norm": 0.94921875,
      "learning_rate": 4.4403581665887206e-07,
      "loss": 0.8146,
      "step": 6371
    },
    {
      "epoch": 0.9710454129838464,
      "grad_norm": 0.96484375,
      "learning_rate": 4.3940179781019055e-07,
      "loss": 1.1542,
      "step": 6372
    },
    {
      "epoch": 0.9711978055470893,
      "grad_norm": 1.0703125,
      "learning_rate": 4.3479203335493603e-07,
      "loss": 0.942,
      "step": 6373
    },
    {
      "epoch": 0.9713501981103322,
      "grad_norm": 0.875,
      "learning_rate": 4.302065244161213e-07,
      "loss": 1.0218,
      "step": 6374
    },
    {
      "epoch": 0.9715025906735751,
      "grad_norm": 0.75390625,
      "learning_rate": 4.256452721108306e-07,
      "loss": 0.8515,
      "step": 6375
    },
    {
      "epoch": 0.9716549832368181,
      "grad_norm": 0.76171875,
      "learning_rate": 4.2110827755026393e-07,
      "loss": 0.7712,
      "step": 6376
    },
    {
      "epoch": 0.9718073758000609,
      "grad_norm": 1.0078125,
      "learning_rate": 4.1659554183968164e-07,
      "loss": 0.8418,
      "step": 6377
    },
    {
      "epoch": 0.9719597683633039,
      "grad_norm": 0.9140625,
      "learning_rate": 4.121070660784598e-07,
      "loss": 0.9035,
      "step": 6378
    },
    {
      "epoch": 0.9721121609265467,
      "grad_norm": 0.9453125,
      "learning_rate": 4.076428513600683e-07,
      "loss": 1.034,
      "step": 6379
    },
    {
      "epoch": 0.9722645534897897,
      "grad_norm": 0.74609375,
      "learning_rate": 4.032028987720371e-07,
      "loss": 0.8455,
      "step": 6380
    },
    {
      "epoch": 0.9724169460530326,
      "grad_norm": 1.3828125,
      "learning_rate": 3.98787209396001e-07,
      "loss": 1.0031,
      "step": 6381
    },
    {
      "epoch": 0.9725693386162755,
      "grad_norm": 0.76171875,
      "learning_rate": 3.9439578430769953e-07,
      "loss": 0.8545,
      "step": 6382
    },
    {
      "epoch": 0.9727217311795184,
      "grad_norm": 1.046875,
      "learning_rate": 3.9002862457693245e-07,
      "loss": 0.8983,
      "step": 6383
    },
    {
      "epoch": 0.9728741237427614,
      "grad_norm": 1.0234375,
      "learning_rate": 3.8568573126761543e-07,
      "loss": 0.8682,
      "step": 6384
    },
    {
      "epoch": 0.9730265163060042,
      "grad_norm": 0.921875,
      "learning_rate": 3.8136710543773547e-07,
      "loss": 0.9997,
      "step": 6385
    },
    {
      "epoch": 0.9731789088692472,
      "grad_norm": 1.03125,
      "learning_rate": 3.770727481393621e-07,
      "loss": 1.0336,
      "step": 6386
    },
    {
      "epoch": 0.9733313014324901,
      "grad_norm": 1.0234375,
      "learning_rate": 3.7280266041868073e-07,
      "loss": 1.0478,
      "step": 6387
    },
    {
      "epoch": 0.973483693995733,
      "grad_norm": 0.83203125,
      "learning_rate": 3.68556843315937e-07,
      "loss": 1.0871,
      "step": 6388
    },
    {
      "epoch": 0.9736360865589759,
      "grad_norm": 0.91796875,
      "learning_rate": 3.6433529786545907e-07,
      "loss": 0.9214,
      "step": 6389
    },
    {
      "epoch": 0.9737884791222189,
      "grad_norm": 0.9453125,
      "learning_rate": 3.6013802509570205e-07,
      "loss": 0.9482,
      "step": 6390
    },
    {
      "epoch": 0.9739408716854617,
      "grad_norm": 0.98828125,
      "learning_rate": 3.5596502602917027e-07,
      "loss": 0.9282,
      "step": 6391
    },
    {
      "epoch": 0.9740932642487047,
      "grad_norm": 0.9765625,
      "learning_rate": 3.518163016824616e-07,
      "loss": 1.0338,
      "step": 6392
    },
    {
      "epoch": 0.9742456568119475,
      "grad_norm": 1.0234375,
      "learning_rate": 3.4769185306626763e-07,
      "loss": 1.0276,
      "step": 6393
    },
    {
      "epoch": 0.9743980493751905,
      "grad_norm": 0.94140625,
      "learning_rate": 3.435916811853512e-07,
      "loss": 0.9196,
      "step": 6394
    },
    {
      "epoch": 0.9745504419384334,
      "grad_norm": 0.79296875,
      "learning_rate": 3.3951578703860234e-07,
      "loss": 0.9016,
      "step": 6395
    },
    {
      "epoch": 0.9747028345016763,
      "grad_norm": 0.9453125,
      "learning_rate": 3.3546417161894884e-07,
      "loss": 1.0228,
      "step": 6396
    },
    {
      "epoch": 0.9748552270649192,
      "grad_norm": 1.328125,
      "learning_rate": 3.3143683591342346e-07,
      "loss": 0.8044,
      "step": 6397
    },
    {
      "epoch": 0.9750076196281622,
      "grad_norm": 1.0078125,
      "learning_rate": 3.2743378090315244e-07,
      "loss": 1.0644,
      "step": 6398
    },
    {
      "epoch": 0.975160012191405,
      "grad_norm": 1.28125,
      "learning_rate": 3.234550075633225e-07,
      "loss": 0.9953,
      "step": 6399
    },
    {
      "epoch": 0.975312404754648,
      "grad_norm": 1.1640625,
      "learning_rate": 3.195005168632248e-07,
      "loss": 0.9731,
      "step": 6400
    },
    {
      "epoch": 0.9754647973178909,
      "grad_norm": 1.171875,
      "learning_rate": 3.155703097662332e-07,
      "loss": 1.0011,
      "step": 6401
    },
    {
      "epoch": 0.9756171898811338,
      "grad_norm": 0.91015625,
      "learning_rate": 3.116643872298153e-07,
      "loss": 0.8379,
      "step": 6402
    },
    {
      "epoch": 0.9757695824443767,
      "grad_norm": 0.90234375,
      "learning_rate": 3.0778275020548754e-07,
      "loss": 0.9955,
      "step": 6403
    },
    {
      "epoch": 0.9759219750076197,
      "grad_norm": 1.1953125,
      "learning_rate": 3.0392539963888247e-07,
      "loss": 0.949,
      "step": 6404
    },
    {
      "epoch": 0.9760743675708625,
      "grad_norm": 0.7109375,
      "learning_rate": 3.000923364697039e-07,
      "loss": 0.7812,
      "step": 6405
    },
    {
      "epoch": 0.9762267601341055,
      "grad_norm": 1.2109375,
      "learning_rate": 2.962835616317383e-07,
      "loss": 0.9632,
      "step": 6406
    },
    {
      "epoch": 0.9763791526973483,
      "grad_norm": 0.80078125,
      "learning_rate": 2.9249907605286564e-07,
      "loss": 1.0168,
      "step": 6407
    },
    {
      "epoch": 0.9765315452605913,
      "grad_norm": 0.92578125,
      "learning_rate": 2.887388806550373e-07,
      "loss": 0.9591,
      "step": 6408
    },
    {
      "epoch": 0.9766839378238342,
      "grad_norm": 0.765625,
      "learning_rate": 2.850029763542983e-07,
      "loss": 0.8911,
      "step": 6409
    },
    {
      "epoch": 0.9768363303870771,
      "grad_norm": 1.0703125,
      "learning_rate": 2.8129136406075394e-07,
      "loss": 1.1581,
      "step": 6410
    },
    {
      "epoch": 0.97698872295032,
      "grad_norm": 1.234375,
      "learning_rate": 2.776040446786143e-07,
      "loss": 0.9992,
      "step": 6411
    },
    {
      "epoch": 0.977141115513563,
      "grad_norm": 0.875,
      "learning_rate": 2.73941019106172e-07,
      "loss": 1.1254,
      "step": 6412
    },
    {
      "epoch": 0.9772935080768058,
      "grad_norm": 1.109375,
      "learning_rate": 2.7030228823577974e-07,
      "loss": 1.1307,
      "step": 6413
    },
    {
      "epoch": 0.9774459006400488,
      "grad_norm": 0.90234375,
      "learning_rate": 2.666878529538841e-07,
      "loss": 0.7181,
      "step": 6414
    },
    {
      "epoch": 0.9775982932032917,
      "grad_norm": 0.9765625,
      "learning_rate": 2.6309771414102515e-07,
      "loss": 0.9581,
      "step": 6415
    },
    {
      "epoch": 0.9777506857665346,
      "grad_norm": 1.0234375,
      "learning_rate": 2.5953187267180323e-07,
      "loss": 1.0867,
      "step": 6416
    },
    {
      "epoch": 0.9779030783297775,
      "grad_norm": 0.9453125,
      "learning_rate": 2.559903294149124e-07,
      "loss": 0.9815,
      "step": 6417
    },
    {
      "epoch": 0.9780554708930205,
      "grad_norm": 0.8359375,
      "learning_rate": 2.524730852331181e-07,
      "loss": 0.9704,
      "step": 6418
    },
    {
      "epoch": 0.9782078634562633,
      "grad_norm": 0.94140625,
      "learning_rate": 2.489801409832793e-07,
      "loss": 0.768,
      "step": 6419
    },
    {
      "epoch": 0.9783602560195063,
      "grad_norm": 1.0234375,
      "learning_rate": 2.455114975163264e-07,
      "loss": 1.0315,
      "step": 6420
    },
    {
      "epoch": 0.9785126485827491,
      "grad_norm": 0.9296875,
      "learning_rate": 2.4206715567726134e-07,
      "loss": 0.9984,
      "step": 6421
    },
    {
      "epoch": 0.9786650411459921,
      "grad_norm": 0.9375,
      "learning_rate": 2.3864711630519063e-07,
      "loss": 0.8553,
      "step": 6422
    },
    {
      "epoch": 0.978817433709235,
      "grad_norm": 0.875,
      "learning_rate": 2.352513802332701e-07,
      "loss": 0.9622,
      "step": 6423
    },
    {
      "epoch": 0.9789698262724779,
      "grad_norm": 0.8984375,
      "learning_rate": 2.3187994828876013e-07,
      "loss": 0.9209,
      "step": 6424
    },
    {
      "epoch": 0.9791222188357208,
      "grad_norm": 0.90625,
      "learning_rate": 2.285328212929816e-07,
      "loss": 1.2004,
      "step": 6425
    },
    {
      "epoch": 0.9792746113989638,
      "grad_norm": 0.7890625,
      "learning_rate": 2.2521000006134884e-07,
      "loss": 0.9433,
      "step": 6426
    },
    {
      "epoch": 0.9794270039622066,
      "grad_norm": 0.9140625,
      "learning_rate": 2.219114854033477e-07,
      "loss": 0.8551,
      "step": 6427
    },
    {
      "epoch": 0.9795793965254496,
      "grad_norm": 1.40625,
      "learning_rate": 2.1863727812254653e-07,
      "loss": 1.2268,
      "step": 6428
    },
    {
      "epoch": 0.9797317890886925,
      "grad_norm": 0.97265625,
      "learning_rate": 2.15387379016585e-07,
      "loss": 0.8086,
      "step": 6429
    },
    {
      "epoch": 0.9798841816519354,
      "grad_norm": 1.0859375,
      "learning_rate": 2.121617888771854e-07,
      "loss": 1.2063,
      "step": 6430
    },
    {
      "epoch": 0.9800365742151783,
      "grad_norm": 0.78125,
      "learning_rate": 2.0896050849015248e-07,
      "loss": 0.8835,
      "step": 6431
    },
    {
      "epoch": 0.9801889667784213,
      "grad_norm": 1.015625,
      "learning_rate": 2.0578353863535126e-07,
      "loss": 0.9961,
      "step": 6432
    },
    {
      "epoch": 0.9803413593416641,
      "grad_norm": 1.203125,
      "learning_rate": 2.0263088008675158e-07,
      "loss": 1.0243,
      "step": 6433
    },
    {
      "epoch": 0.9804937519049071,
      "grad_norm": 0.89453125,
      "learning_rate": 1.9950253361238347e-07,
      "loss": 0.9771,
      "step": 6434
    },
    {
      "epoch": 0.9806461444681499,
      "grad_norm": 1.453125,
      "learning_rate": 1.9639849997435954e-07,
      "loss": 1.0821,
      "step": 6435
    },
    {
      "epoch": 0.9807985370313929,
      "grad_norm": 1.1875,
      "learning_rate": 1.9331877992886383e-07,
      "loss": 0.88,
      "step": 6436
    },
    {
      "epoch": 0.9809509295946358,
      "grad_norm": 1.140625,
      "learning_rate": 1.902633742261517e-07,
      "loss": 1.0499,
      "step": 6437
    },
    {
      "epoch": 0.9811033221578787,
      "grad_norm": 0.890625,
      "learning_rate": 1.8723228361057222e-07,
      "loss": 0.8848,
      "step": 6438
    },
    {
      "epoch": 0.9812557147211216,
      "grad_norm": 1.0078125,
      "learning_rate": 1.842255088205458e-07,
      "loss": 0.8925,
      "step": 6439
    },
    {
      "epoch": 0.9814081072843646,
      "grad_norm": 1.0859375,
      "learning_rate": 1.8124305058855317e-07,
      "loss": 1.0033,
      "step": 6440
    },
    {
      "epoch": 0.9815604998476074,
      "grad_norm": 0.8359375,
      "learning_rate": 1.7828490964117983e-07,
      "loss": 0.9434,
      "step": 6441
    },
    {
      "epoch": 0.9817128924108504,
      "grad_norm": 1.015625,
      "learning_rate": 1.7535108669907153e-07,
      "loss": 0.9481,
      "step": 6442
    },
    {
      "epoch": 0.9818652849740933,
      "grad_norm": 0.95703125,
      "learning_rate": 1.724415824769343e-07,
      "loss": 0.8994,
      "step": 6443
    },
    {
      "epoch": 0.9820176775373362,
      "grad_norm": 1.15625,
      "learning_rate": 1.69556397683579e-07,
      "loss": 0.948,
      "step": 6444
    },
    {
      "epoch": 0.9821700701005791,
      "grad_norm": 0.96484375,
      "learning_rate": 1.6669553302186557e-07,
      "loss": 1.0316,
      "step": 6445
    },
    {
      "epoch": 0.9823224626638221,
      "grad_norm": 1.1796875,
      "learning_rate": 1.6385898918875875e-07,
      "loss": 1.0853,
      "step": 6446
    },
    {
      "epoch": 0.9824748552270649,
      "grad_norm": 0.99609375,
      "learning_rate": 1.6104676687526132e-07,
      "loss": 1.0253,
      "step": 6447
    },
    {
      "epoch": 0.9826272477903079,
      "grad_norm": 0.875,
      "learning_rate": 1.5825886676649192e-07,
      "loss": 0.8152,
      "step": 6448
    },
    {
      "epoch": 0.9827796403535507,
      "grad_norm": 0.81640625,
      "learning_rate": 1.554952895416073e-07,
      "loss": 0.8654,
      "step": 6449
    },
    {
      "epoch": 0.9829320329167937,
      "grad_norm": 0.85546875,
      "learning_rate": 1.5275603587386887e-07,
      "loss": 0.9372,
      "step": 6450
    },
    {
      "epoch": 0.9830844254800366,
      "grad_norm": 1.203125,
      "learning_rate": 1.5004110643057623e-07,
      "loss": 0.9909,
      "step": 6451
    },
    {
      "epoch": 0.9832368180432794,
      "grad_norm": 1.09375,
      "learning_rate": 1.473505018731447e-07,
      "loss": 1.0548,
      "step": 6452
    },
    {
      "epoch": 0.9833892106065224,
      "grad_norm": 1.2734375,
      "learning_rate": 1.4468422285703885e-07,
      "loss": 1.1612,
      "step": 6453
    },
    {
      "epoch": 0.9835416031697654,
      "grad_norm": 0.80078125,
      "learning_rate": 1.420422700317947e-07,
      "loss": 1.021,
      "step": 6454
    },
    {
      "epoch": 0.9836939957330082,
      "grad_norm": 1.1328125,
      "learning_rate": 1.3942464404104184e-07,
      "loss": 0.9502,
      "step": 6455
    },
    {
      "epoch": 0.9838463882962512,
      "grad_norm": 0.94140625,
      "learning_rate": 1.3683134552247012e-07,
      "loss": 0.9724,
      "step": 6456
    },
    {
      "epoch": 0.9839987808594941,
      "grad_norm": 1.09375,
      "learning_rate": 1.342623751078409e-07,
      "loss": 1.2509,
      "step": 6457
    },
    {
      "epoch": 0.984151173422737,
      "grad_norm": 1.078125,
      "learning_rate": 1.3171773342298687e-07,
      "loss": 1.2062,
      "step": 6458
    },
    {
      "epoch": 0.9843035659859799,
      "grad_norm": 0.95703125,
      "learning_rate": 1.2919742108783439e-07,
      "loss": 0.9351,
      "step": 6459
    },
    {
      "epoch": 0.9844559585492227,
      "grad_norm": 0.9921875,
      "learning_rate": 1.2670143871634788e-07,
      "loss": 0.9458,
      "step": 6460
    },
    {
      "epoch": 0.9846083511124657,
      "grad_norm": 1.3359375,
      "learning_rate": 1.2422978691659647e-07,
      "loss": 1.0012,
      "step": 6461
    },
    {
      "epoch": 0.9847607436757086,
      "grad_norm": 0.890625,
      "learning_rate": 1.2178246629070967e-07,
      "loss": 1.0318,
      "step": 6462
    },
    {
      "epoch": 0.9849131362389515,
      "grad_norm": 1.1640625,
      "learning_rate": 1.193594774348883e-07,
      "loss": 1.0884,
      "step": 6463
    },
    {
      "epoch": 0.9850655288021944,
      "grad_norm": 0.79296875,
      "learning_rate": 1.1696082093941574e-07,
      "loss": 0.8452,
      "step": 6464
    },
    {
      "epoch": 0.9852179213654374,
      "grad_norm": 1.21875,
      "learning_rate": 1.1458649738862459e-07,
      "loss": 0.9993,
      "step": 6465
    },
    {
      "epoch": 0.9853703139286802,
      "grad_norm": 0.94140625,
      "learning_rate": 1.1223650736094104e-07,
      "loss": 0.9429,
      "step": 6466
    },
    {
      "epoch": 0.9855227064919232,
      "grad_norm": 0.95703125,
      "learning_rate": 1.0991085142886271e-07,
      "loss": 0.8922,
      "step": 6467
    },
    {
      "epoch": 0.9856750990551661,
      "grad_norm": 0.91796875,
      "learning_rate": 1.0760953015893637e-07,
      "loss": 0.8954,
      "step": 6468
    },
    {
      "epoch": 0.985827491618409,
      "grad_norm": 1.03125,
      "learning_rate": 1.053325441118136e-07,
      "loss": 0.9719,
      "step": 6469
    },
    {
      "epoch": 0.9859798841816519,
      "grad_norm": 0.984375,
      "learning_rate": 1.0307989384219507e-07,
      "loss": 0.9776,
      "step": 6470
    },
    {
      "epoch": 0.9861322767448949,
      "grad_norm": 1.03125,
      "learning_rate": 1.0085157989885297e-07,
      "loss": 1.048,
      "step": 6471
    },
    {
      "epoch": 0.9862846693081377,
      "grad_norm": 0.81640625,
      "learning_rate": 9.864760282464192e-08,
      "loss": 1.0166,
      "step": 6472
    },
    {
      "epoch": 0.9864370618713807,
      "grad_norm": 1.1484375,
      "learning_rate": 9.646796315647688e-08,
      "loss": 0.9183,
      "step": 6473
    },
    {
      "epoch": 0.9865894544346235,
      "grad_norm": 0.86328125,
      "learning_rate": 9.431266142536643e-08,
      "loss": 0.9237,
      "step": 6474
    },
    {
      "epoch": 0.9867418469978665,
      "grad_norm": 0.9296875,
      "learning_rate": 9.218169815635724e-08,
      "loss": 1.1147,
      "step": 6475
    },
    {
      "epoch": 0.9868942395611094,
      "grad_norm": 0.91015625,
      "learning_rate": 9.00750738685896e-08,
      "loss": 0.8693,
      "step": 6476
    },
    {
      "epoch": 0.9870466321243523,
      "grad_norm": 0.76171875,
      "learning_rate": 8.799278907526409e-08,
      "loss": 0.8113,
      "step": 6477
    },
    {
      "epoch": 0.9871990246875952,
      "grad_norm": 0.9296875,
      "learning_rate": 8.593484428365272e-08,
      "loss": 0.8903,
      "step": 6478
    },
    {
      "epoch": 0.9873514172508382,
      "grad_norm": 0.9375,
      "learning_rate": 8.390123999509891e-08,
      "loss": 1.0863,
      "step": 6479
    },
    {
      "epoch": 0.987503809814081,
      "grad_norm": 0.859375,
      "learning_rate": 8.189197670502857e-08,
      "loss": 0.9614,
      "step": 6480
    },
    {
      "epoch": 0.987656202377324,
      "grad_norm": 0.8125,
      "learning_rate": 7.990705490292794e-08,
      "loss": 0.931,
      "step": 6481
    },
    {
      "epoch": 0.9878085949405669,
      "grad_norm": 0.875,
      "learning_rate": 7.794647507233244e-08,
      "loss": 1.1995,
      "step": 6482
    },
    {
      "epoch": 0.9879609875038098,
      "grad_norm": 1.25,
      "learning_rate": 7.601023769089333e-08,
      "loss": 0.9687,
      "step": 6483
    },
    {
      "epoch": 0.9881133800670527,
      "grad_norm": 1.0625,
      "learning_rate": 7.409834323027776e-08,
      "loss": 0.9548,
      "step": 6484
    },
    {
      "epoch": 0.9882657726302957,
      "grad_norm": 0.96875,
      "learning_rate": 7.22107921562798e-08,
      "loss": 1.2711,
      "step": 6485
    },
    {
      "epoch": 0.9884181651935385,
      "grad_norm": 1.0703125,
      "learning_rate": 7.034758492872052e-08,
      "loss": 1.0049,
      "step": 6486
    },
    {
      "epoch": 0.9885705577567815,
      "grad_norm": 1.140625,
      "learning_rate": 6.850872200149238e-08,
      "loss": 0.9996,
      "step": 6487
    },
    {
      "epoch": 0.9887229503200243,
      "grad_norm": 0.87109375,
      "learning_rate": 6.669420382259262e-08,
      "loss": 0.9998,
      "step": 6488
    },
    {
      "epoch": 0.9888753428832673,
      "grad_norm": 1.0234375,
      "learning_rate": 6.490403083404539e-08,
      "loss": 1.223,
      "step": 6489
    },
    {
      "epoch": 0.9890277354465102,
      "grad_norm": 0.89453125,
      "learning_rate": 6.313820347196853e-08,
      "loss": 0.9031,
      "step": 6490
    },
    {
      "epoch": 0.9891801280097531,
      "grad_norm": 0.78515625,
      "learning_rate": 6.139672216654014e-08,
      "loss": 0.8585,
      "step": 6491
    },
    {
      "epoch": 0.989332520572996,
      "grad_norm": 0.79296875,
      "learning_rate": 5.967958734202084e-08,
      "loss": 0.9712,
      "step": 6492
    },
    {
      "epoch": 0.989484913136239,
      "grad_norm": 1.078125,
      "learning_rate": 5.798679941672047e-08,
      "loss": 0.9927,
      "step": 6493
    },
    {
      "epoch": 0.9896373056994818,
      "grad_norm": 1.0625,
      "learning_rate": 5.631835880303138e-08,
      "loss": 0.991,
      "step": 6494
    },
    {
      "epoch": 0.9897896982627248,
      "grad_norm": 0.8046875,
      "learning_rate": 5.467426590739511e-08,
      "loss": 1.1465,
      "step": 6495
    },
    {
      "epoch": 0.9899420908259677,
      "grad_norm": 1.03125,
      "learning_rate": 5.305452113035792e-08,
      "loss": 0.9608,
      "step": 6496
    },
    {
      "epoch": 0.9900944833892106,
      "grad_norm": 0.9296875,
      "learning_rate": 5.145912486649307e-08,
      "loss": 0.9251,
      "step": 6497
    },
    {
      "epoch": 0.9902468759524535,
      "grad_norm": 0.87109375,
      "learning_rate": 4.988807750447855e-08,
      "loss": 0.9483,
      "step": 6498
    },
    {
      "epoch": 0.9903992685156965,
      "grad_norm": 0.97265625,
      "learning_rate": 4.8341379427041535e-08,
      "loss": 0.9291,
      "step": 6499
    },
    {
      "epoch": 0.9905516610789393,
      "grad_norm": 0.9609375,
      "learning_rate": 4.681903101096952e-08,
      "loss": 0.9649,
      "step": 6500
    },
    {
      "epoch": 0.9907040536421823,
      "grad_norm": 0.8125,
      "learning_rate": 4.5321032627132497e-08,
      "loss": 0.8492,
      "step": 6501
    },
    {
      "epoch": 0.9908564462054251,
      "grad_norm": 0.84765625,
      "learning_rate": 4.384738464047189e-08,
      "loss": 0.9379,
      "step": 6502
    },
    {
      "epoch": 0.9910088387686681,
      "grad_norm": 0.921875,
      "learning_rate": 4.2398087409978304e-08,
      "loss": 0.8799,
      "step": 6503
    },
    {
      "epoch": 0.991161231331911,
      "grad_norm": 1.0,
      "learning_rate": 4.097314128872487e-08,
      "loss": 1.0985,
      "step": 6504
    },
    {
      "epoch": 0.9913136238951539,
      "grad_norm": 1.0390625,
      "learning_rate": 3.9572546623856125e-08,
      "loss": 0.9444,
      "step": 6505
    },
    {
      "epoch": 0.9914660164583968,
      "grad_norm": 0.9140625,
      "learning_rate": 3.81963037565658e-08,
      "loss": 0.9473,
      "step": 6506
    },
    {
      "epoch": 0.9916184090216398,
      "grad_norm": 1.1171875,
      "learning_rate": 3.6844413022141254e-08,
      "loss": 0.9523,
      "step": 6507
    },
    {
      "epoch": 0.9917708015848826,
      "grad_norm": 0.890625,
      "learning_rate": 3.5516874749907947e-08,
      "loss": 0.8703,
      "step": 6508
    },
    {
      "epoch": 0.9919231941481256,
      "grad_norm": 0.87890625,
      "learning_rate": 3.421368926328494e-08,
      "loss": 1.0607,
      "step": 6509
    },
    {
      "epoch": 0.9920755867113685,
      "grad_norm": 0.77734375,
      "learning_rate": 3.293485687974052e-08,
      "loss": 1.0053,
      "step": 6510
    },
    {
      "epoch": 0.9922279792746114,
      "grad_norm": 1.0390625,
      "learning_rate": 3.168037791081435e-08,
      "loss": 0.906,
      "step": 6511
    },
    {
      "epoch": 0.9923803718378543,
      "grad_norm": 1.0390625,
      "learning_rate": 3.045025266211754e-08,
      "loss": 0.9956,
      "step": 6512
    },
    {
      "epoch": 0.9925327644010973,
      "grad_norm": 1.078125,
      "learning_rate": 2.924448143332148e-08,
      "loss": 1.0685,
      "step": 6513
    },
    {
      "epoch": 0.9926851569643401,
      "grad_norm": 0.8046875,
      "learning_rate": 2.8063064518191184e-08,
      "loss": 0.7952,
      "step": 6514
    },
    {
      "epoch": 0.9928375495275831,
      "grad_norm": 0.76171875,
      "learning_rate": 2.6906002204507562e-08,
      "loss": 0.8644,
      "step": 6515
    },
    {
      "epoch": 0.9929899420908259,
      "grad_norm": 0.8984375,
      "learning_rate": 2.5773294774167345e-08,
      "loss": 0.6781,
      "step": 6516
    },
    {
      "epoch": 0.9931423346540689,
      "grad_norm": 0.88671875,
      "learning_rate": 2.4664942503105358e-08,
      "loss": 0.8264,
      "step": 6517
    },
    {
      "epoch": 0.9932947272173118,
      "grad_norm": 0.96484375,
      "learning_rate": 2.358094566133895e-08,
      "loss": 0.9527,
      "step": 6518
    },
    {
      "epoch": 0.9934471197805547,
      "grad_norm": 1.0625,
      "learning_rate": 2.252130451293466e-08,
      "loss": 0.9186,
      "step": 6519
    },
    {
      "epoch": 0.9935995123437976,
      "grad_norm": 1.203125,
      "learning_rate": 2.148601931604155e-08,
      "loss": 0.7218,
      "step": 6520
    },
    {
      "epoch": 0.9937519049070406,
      "grad_norm": 0.93359375,
      "learning_rate": 2.0475090322880087e-08,
      "loss": 0.8232,
      "step": 6521
    },
    {
      "epoch": 0.9939042974702834,
      "grad_norm": 1.0234375,
      "learning_rate": 1.9488517779708838e-08,
      "loss": 0.9542,
      "step": 6522
    },
    {
      "epoch": 0.9940566900335264,
      "grad_norm": 0.84375,
      "learning_rate": 1.8526301926891088e-08,
      "loss": 0.9353,
      "step": 6523
    },
    {
      "epoch": 0.9942090825967693,
      "grad_norm": 1.046875,
      "learning_rate": 1.7588442998817122e-08,
      "loss": 0.9221,
      "step": 6524
    },
    {
      "epoch": 0.9943614751600122,
      "grad_norm": 0.83203125,
      "learning_rate": 1.667494122397084e-08,
      "loss": 0.9705,
      "step": 6525
    },
    {
      "epoch": 0.9945138677232551,
      "grad_norm": 0.87890625,
      "learning_rate": 1.5785796824896447e-08,
      "loss": 0.9995,
      "step": 6526
    },
    {
      "epoch": 0.9946662602864981,
      "grad_norm": 1.015625,
      "learning_rate": 1.492101001820956e-08,
      "loss": 1.1152,
      "step": 6527
    },
    {
      "epoch": 0.9948186528497409,
      "grad_norm": 1.1640625,
      "learning_rate": 1.4080581014563887e-08,
      "loss": 0.9727,
      "step": 6528
    },
    {
      "epoch": 0.9949710454129839,
      "grad_norm": 0.90234375,
      "learning_rate": 1.3264510018717869e-08,
      "loss": 0.8917,
      "step": 6529
    },
    {
      "epoch": 0.9951234379762267,
      "grad_norm": 0.96484375,
      "learning_rate": 1.2472797229479139e-08,
      "loss": 0.8746,
      "step": 6530
    },
    {
      "epoch": 0.9952758305394697,
      "grad_norm": 0.95703125,
      "learning_rate": 1.1705442839704539e-08,
      "loss": 0.7884,
      "step": 6531
    },
    {
      "epoch": 0.9954282231027126,
      "grad_norm": 0.76953125,
      "learning_rate": 1.0962447036344525e-08,
      "loss": 0.9864,
      "step": 6532
    },
    {
      "epoch": 0.9955806156659555,
      "grad_norm": 1.3203125,
      "learning_rate": 1.0243810000398757e-08,
      "loss": 1.1477,
      "step": 6533
    },
    {
      "epoch": 0.9957330082291984,
      "grad_norm": 1.0625,
      "learning_rate": 9.549531906938302e-09,
      "loss": 1.0232,
      "step": 6534
    },
    {
      "epoch": 0.9958854007924414,
      "grad_norm": 0.91015625,
      "learning_rate": 8.879612925105641e-09,
      "loss": 0.9906,
      "step": 6535
    },
    {
      "epoch": 0.9960377933556842,
      "grad_norm": 0.85546875,
      "learning_rate": 8.234053218092452e-09,
      "loss": 0.9697,
      "step": 6536
    },
    {
      "epoch": 0.9961901859189272,
      "grad_norm": 1.1015625,
      "learning_rate": 7.612852943184034e-09,
      "loss": 1.1492,
      "step": 6537
    },
    {
      "epoch": 0.9963425784821701,
      "grad_norm": 0.984375,
      "learning_rate": 7.016012251681581e-09,
      "loss": 1.1544,
      "step": 6538
    },
    {
      "epoch": 0.996494971045413,
      "grad_norm": 0.859375,
      "learning_rate": 6.443531289013205e-09,
      "loss": 0.7807,
      "step": 6539
    },
    {
      "epoch": 0.9966473636086559,
      "grad_norm": 1.171875,
      "learning_rate": 5.895410194645123e-09,
      "loss": 1.0381,
      "step": 6540
    },
    {
      "epoch": 0.9967997561718989,
      "grad_norm": 1.1015625,
      "learning_rate": 5.371649102081655e-09,
      "loss": 1.0075,
      "step": 6541
    },
    {
      "epoch": 0.9969521487351417,
      "grad_norm": 0.84375,
      "learning_rate": 4.872248138931834e-09,
      "loss": 0.965,
      "step": 6542
    },
    {
      "epoch": 0.9971045412983847,
      "grad_norm": 0.8671875,
      "learning_rate": 4.397207426865002e-09,
      "loss": 0.9207,
      "step": 6543
    },
    {
      "epoch": 0.9972569338616275,
      "grad_norm": 1.0859375,
      "learning_rate": 3.946527081599704e-09,
      "loss": 0.9477,
      "step": 6544
    },
    {
      "epoch": 0.9974093264248705,
      "grad_norm": 1.0703125,
      "learning_rate": 3.520207212936999e-09,
      "loss": 0.9703,
      "step": 6545
    },
    {
      "epoch": 0.9975617189881134,
      "grad_norm": 0.76171875,
      "learning_rate": 3.1182479247160444e-09,
      "loss": 0.8757,
      "step": 6546
    },
    {
      "epoch": 0.9977141115513563,
      "grad_norm": 0.97265625,
      "learning_rate": 2.740649314880717e-09,
      "loss": 0.9456,
      "step": 6547
    },
    {
      "epoch": 0.9978665041145992,
      "grad_norm": 0.96484375,
      "learning_rate": 2.3874114754018904e-09,
      "loss": 0.9417,
      "step": 6548
    },
    {
      "epoch": 0.9980188966778422,
      "grad_norm": 1.1484375,
      "learning_rate": 2.058534492344055e-09,
      "loss": 0.9483,
      "step": 6549
    },
    {
      "epoch": 0.998171289241085,
      "grad_norm": 0.79296875,
      "learning_rate": 1.754018445820904e-09,
      "loss": 0.7993,
      "step": 6550
    },
    {
      "epoch": 0.998323681804328,
      "grad_norm": 1.1328125,
      "learning_rate": 1.473863410028642e-09,
      "loss": 0.9051,
      "step": 6551
    },
    {
      "epoch": 0.9984760743675709,
      "grad_norm": 1.0234375,
      "learning_rate": 1.2180694532015758e-09,
      "loss": 1.1349,
      "step": 6552
    },
    {
      "epoch": 0.9986284669308138,
      "grad_norm": 0.81640625,
      "learning_rate": 9.866366376565239e-10,
      "loss": 0.9544,
      "step": 6553
    },
    {
      "epoch": 0.9987808594940567,
      "grad_norm": 1.15625,
      "learning_rate": 7.795650197928161e-10,
      "loss": 0.9809,
      "step": 6554
    },
    {
      "epoch": 0.9989332520572997,
      "grad_norm": 0.73828125,
      "learning_rate": 5.9685465002568e-10,
      "loss": 0.8528,
      "step": 6555
    },
    {
      "epoch": 0.9990856446205425,
      "grad_norm": 0.80078125,
      "learning_rate": 4.3850557289726315e-10,
      "loss": 1.0045,
      "step": 6556
    },
    {
      "epoch": 0.9992380371837855,
      "grad_norm": 0.87109375,
      "learning_rate": 3.04517826954509e-10,
      "loss": 1.0579,
      "step": 6557
    },
    {
      "epoch": 0.9993904297470283,
      "grad_norm": 0.890625,
      "learning_rate": 1.9489144486017906e-10,
      "loss": 0.8653,
      "step": 6558
    },
    {
      "epoch": 0.9995428223102713,
      "grad_norm": 0.61328125,
      "learning_rate": 1.0962645331513698e-10,
      "loss": 0.7667,
      "step": 6559
    },
    {
      "epoch": 0.9996952148735142,
      "grad_norm": 1.109375,
      "learning_rate": 4.872287308055334e-11,
      "loss": 1.1252,
      "step": 6560
    },
    {
      "epoch": 0.999847607436757,
      "grad_norm": 0.77734375,
      "learning_rate": 1.2180719011212205e-11,
      "loss": 0.787,
      "step": 6561
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.03125,
      "learning_rate": 0.0,
      "loss": 1.122,
      "step": 6562
    }
  ],
  "logging_steps": 1,
  "max_steps": 6562,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 9.299624826765312e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}