{
  "best_metric": 3.9716358184814453,
  "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/full/lstm/2/checkpoints/checkpoint-3052726",
  "epoch": 0.024980623875185654,
  "eval_steps": 10,
  "global_step": 3052726,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 4.999998362119627e-05,
      "loss": 10.8204,
      "step": 1
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.999161405248948e-05,
      "loss": 7.5681,
      "step": 512
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.998322810497896e-05,
      "loss": 7.0604,
      "step": 1024
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.997484215746844e-05,
      "loss": 6.9893,
      "step": 1536
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.996645620995792e-05,
      "loss": 6.9585,
      "step": 2048
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.99580702624474e-05,
      "loss": 6.8857,
      "step": 2560
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994968431493688e-05,
      "loss": 6.7423,
      "step": 3072
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994129836742636e-05,
      "loss": 6.6348,
      "step": 3584
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.993291241991584e-05,
      "loss": 6.5475,
      "step": 4096
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.992452647240532e-05,
      "loss": 6.4551,
      "step": 4608
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.99161405248948e-05,
      "loss": 6.406,
      "step": 5120
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.990775457738428e-05,
      "loss": 6.3398,
      "step": 5632
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.989938500867749e-05,
      "loss": 6.2764,
      "step": 6144
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.989099906116697e-05,
      "loss": 6.2064,
      "step": 6656
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.988261311365645e-05,
      "loss": 6.1503,
      "step": 7168
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.987422716614593e-05,
      "loss": 6.103,
      "step": 7680
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.986584121863541e-05,
      "loss": 6.0533,
      "step": 8192
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.985745527112489e-05,
      "loss": 5.9976,
      "step": 8704
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984906932361437e-05,
      "loss": 5.9613,
      "step": 9216
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984068337610385e-05,
      "loss": 5.9256,
      "step": 9728
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.983229742859333e-05,
      "loss": 5.8795,
      "step": 10240
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.982391148108281e-05,
      "loss": 5.8432,
      "step": 10752
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9815541912376026e-05,
      "loss": 5.8223,
      "step": 11264
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9807155964865506e-05,
      "loss": 5.7856,
      "step": 11776
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9798770017354986e-05,
      "loss": 5.7637,
      "step": 12288
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9790384069844466e-05,
      "loss": 5.7367,
      "step": 12800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9781998122333946e-05,
      "loss": 5.7145,
      "step": 13312
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9773628553627155e-05,
      "loss": 5.6845,
      "step": 13824
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9765242606116635e-05,
      "loss": 5.6507,
      "step": 14336
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9756856658606115e-05,
      "loss": 5.645,
      "step": 14848
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9748470711095595e-05,
      "loss": 5.6112,
      "step": 15360
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.974008476358507e-05,
      "loss": 5.5953,
      "step": 15872
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.973169881607455e-05,
      "loss": 5.5703,
      "step": 16384
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.972331286856403e-05,
      "loss": 5.5656,
      "step": 16896
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9714943299857244e-05,
      "loss": 5.5349,
      "step": 17408
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.970657373115046e-05,
      "loss": 5.5254,
      "step": 17920
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.969818778363994e-05,
      "loss": 5.5123,
      "step": 18432
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.968980183612942e-05,
      "loss": 5.4926,
      "step": 18944
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96814158886189e-05,
      "loss": 5.4825,
      "step": 19456
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.967302994110837e-05,
      "loss": 5.4603,
      "step": 19968
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.966464399359785e-05,
      "loss": 5.4357,
      "step": 20480
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.965625804608733e-05,
      "loss": 5.4296,
      "step": 20992
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.964788847738054e-05,
      "loss": 5.4028,
      "step": 21504
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.963950252987002e-05,
      "loss": 5.4151,
      "step": 22016
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96311165823595e-05,
      "loss": 5.3867,
      "step": 22528
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.962274701365272e-05,
      "loss": 5.3832,
      "step": 23040
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96143610661422e-05,
      "loss": 5.3678,
      "step": 23552
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.960597511863168e-05,
      "loss": 5.3657,
      "step": 24064
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.959758917112116e-05,
      "loss": 5.3572,
      "step": 24576
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.958920322361064e-05,
      "loss": 5.3291,
      "step": 25088
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.958081727610012e-05,
      "loss": 5.3171,
      "step": 25600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.957244770739333e-05,
      "loss": 5.3093,
      "step": 26112
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.956406175988281e-05,
      "loss": 5.316,
      "step": 26624
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.955567581237229e-05,
      "loss": 5.2922,
      "step": 27136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.954728986486177e-05,
      "loss": 5.3001,
      "step": 27648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.953890391735125e-05,
      "loss": 5.2701,
      "step": 28160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9530517969840727e-05,
      "loss": 5.2667,
      "step": 28672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9522132022330207e-05,
      "loss": 5.2549,
      "step": 29184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.951376245362342e-05,
      "loss": 5.2482,
      "step": 29696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.95053765061129e-05,
      "loss": 5.2455,
      "step": 30208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.949699055860238e-05,
      "loss": 5.2247,
      "step": 30720
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.948860461109186e-05,
      "loss": 5.2189,
      "step": 31232
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.94802514211888e-05,
      "loss": 5.2231,
      "step": 31744
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.947186547367828e-05,
      "loss": 5.202,
      "step": 32256
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.946347952616776e-05,
      "loss": 5.1997,
      "step": 32768
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.945509357865724e-05,
      "loss": 5.1885,
      "step": 33280
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.944670763114672e-05,
      "loss": 5.1889,
      "step": 33792
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.94383216836362e-05,
      "loss": 5.1736,
      "step": 34304
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942993573612568e-05,
      "loss": 5.1751,
      "step": 34816
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942156616741889e-05,
      "loss": 5.1639,
      "step": 35328
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.941318021990837e-05,
      "loss": 5.1534,
      "step": 35840
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9404794272397856e-05,
      "loss": 5.1556,
      "step": 36352
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9396408324887336e-05,
      "loss": 5.1407,
      "step": 36864
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9388022377376816e-05,
      "loss": 5.1494,
      "step": 37376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9379636429866296e-05,
      "loss": 5.1466,
      "step": 37888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9371250482355776e-05,
      "loss": 5.1375,
      "step": 38400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9362880913648985e-05,
      "loss": 5.1181,
      "step": 38912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9354511344942194e-05,
      "loss": 5.1031,
      "step": 39424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9346125397431674e-05,
      "loss": 5.0962,
      "step": 39936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9337739449921154e-05,
      "loss": 5.1009,
      "step": 40448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9329353502410634e-05,
      "loss": 5.105,
      "step": 40960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9320967554900114e-05,
      "loss": 5.0984,
      "step": 41472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9312581607389594e-05,
      "loss": 5.0732,
      "step": 41984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9304195659879074e-05,
      "loss": 5.0678,
      "step": 42496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9295809712368554e-05,
      "loss": 5.0758,
      "step": 43008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.928744014366177e-05,
      "loss": 5.0612,
      "step": 43520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.927907057495498e-05,
      "loss": 5.0598,
      "step": 44032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.927068462744446e-05,
      "loss": 5.0538,
      "step": 44544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.926229867993394e-05,
      "loss": 5.0557,
      "step": 45056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.925391273242342e-05,
      "loss": 5.0432,
      "step": 45568
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.92455267849129e-05,
      "loss": 5.0361,
      "step": 46080
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.923714083740238e-05,
      "loss": 5.0219,
      "step": 46592
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.922877126869559e-05,
      "loss": 5.0354,
      "step": 47104
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.922038532118507e-05,
      "loss": 5.0204,
      "step": 47616
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.921199937367455e-05,
      "loss": 5.0045,
      "step": 48128
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.920361342616403e-05,
      "loss": 5.004,
      "step": 48640
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.919524385745724e-05,
      "loss": 5.0162,
      "step": 49152
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.918685790994672e-05,
      "loss": 5.0063,
      "step": 49664
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.91784719624362e-05,
      "loss": 4.9989,
      "step": 50176
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9170086014925676e-05,
      "loss": 4.9854,
      "step": 50688
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9161700067415156e-05,
      "loss": 4.9901,
      "step": 51200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.915333049870837e-05,
      "loss": 4.9692,
      "step": 51712
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.914494455119785e-05,
      "loss": 4.9842,
      "step": 52224
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9136558603687325e-05,
      "loss": 4.9856,
      "step": 52736
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9128172656176805e-05,
      "loss": 4.9622,
      "step": 53248
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9119786708666285e-05,
      "loss": 4.9544,
      "step": 53760
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9111400761155765e-05,
      "loss": 4.9322,
      "step": 54272
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9103014813645245e-05,
      "loss": 4.943,
      "step": 54784
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.909462886613473e-05,
      "loss": 4.9443,
      "step": 55296
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.908625929742794e-05,
      "loss": 4.9379,
      "step": 55808
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.907787334991742e-05,
      "loss": 4.9413,
      "step": 56320
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.90694874024069e-05,
      "loss": 4.9229,
      "step": 56832
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.906110145489638e-05,
      "loss": 4.9241,
      "step": 57344
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.905271550738586e-05,
      "loss": 4.9263,
      "step": 57856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.904434593867907e-05,
      "loss": 4.9277,
      "step": 58368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.903595999116855e-05,
      "loss": 4.9146,
      "step": 58880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.902757404365803e-05,
      "loss": 4.9041,
      "step": 59392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.901918809614751e-05,
      "loss": 4.9048,
      "step": 59904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.901080214863699e-05,
      "loss": 4.9077,
      "step": 60416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.900241620112647e-05,
      "loss": 4.9035,
      "step": 60928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8994046632419686e-05,
      "loss": 4.8985,
      "step": 61440
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8985660684909166e-05,
      "loss": 4.8885,
      "step": 61952
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8977274737398646e-05,
      "loss": 4.8801,
      "step": 62464
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8968888789888125e-05,
      "loss": 4.8719,
      "step": 62976
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8960502842377605e-05,
      "loss": 4.8796,
      "step": 63488
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8952116894867085e-05,
      "loss": 4.8783,
      "step": 64000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8943730947356565e-05,
      "loss": 4.88,
      "step": 64512
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.893534499984604e-05,
      "loss": 4.8544,
      "step": 65024
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8926991809942984e-05,
      "loss": 4.8673,
      "step": 65536
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8918605862432463e-05,
      "loss": 4.8672,
      "step": 66048
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8910219914921943e-05,
      "loss": 4.8537,
      "step": 66560
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8901833967411423e-05,
      "loss": 4.8684,
      "step": 67072
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.88934480199009e-05,
      "loss": 4.8469,
      "step": 67584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.888506207239038e-05,
      "loss": 4.8611,
      "step": 68096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.887667612487986e-05,
      "loss": 4.8507,
      "step": 68608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.886830655617308e-05,
      "loss": 4.8302,
      "step": 69120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.885992060866256e-05,
      "loss": 4.8292,
      "step": 69632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.885153466115204e-05,
      "loss": 4.8293,
      "step": 70144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.884316509244525e-05,
      "loss": 4.8315,
      "step": 70656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.883477914493473e-05,
      "loss": 4.8373,
      "step": 71168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.882639319742421e-05,
      "loss": 4.8324,
      "step": 71680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.881800724991369e-05,
      "loss": 4.8258,
      "step": 72192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.880962130240316e-05,
      "loss": 4.8156,
      "step": 72704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.880123535489264e-05,
      "loss": 4.8045,
      "step": 73216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.879284940738212e-05,
      "loss": 4.8034,
      "step": 73728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.87844634598716e-05,
      "loss": 4.811,
      "step": 74240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.877607751236109e-05,
      "loss": 4.8169,
      "step": 74752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.876769156485057e-05,
      "loss": 4.8081,
      "step": 75264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.875930561734005e-05,
      "loss": 4.7918,
      "step": 75776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.875091966982953e-05,
      "loss": 4.7917,
      "step": 76288
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.756728172302246,
      "eval_runtime": 527.8044,
      "eval_samples_per_second": 722.978,
      "eval_steps_per_second": 22.594,
      "step": 76319
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.874255010112274e-05,
      "loss": 4.7996,
      "step": 76800
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.873416415361222e-05,
      "loss": 4.7917,
      "step": 77312
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8725794584905426e-05,
      "loss": 4.7837,
      "step": 77824
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8717408637394906e-05,
      "loss": 4.7826,
      "step": 78336
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8709022689884386e-05,
      "loss": 4.7726,
      "step": 78848
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8700636742373866e-05,
      "loss": 4.7569,
      "step": 79360
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8692250794863346e-05,
      "loss": 4.7672,
      "step": 79872
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8683864847352826e-05,
      "loss": 4.7749,
      "step": 80384
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8675478899842306e-05,
      "loss": 4.7578,
      "step": 80896
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.866710933113552e-05,
      "loss": 4.7657,
      "step": 81408
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8658723383625e-05,
      "loss": 4.7682,
      "step": 81920
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.865033743611448e-05,
      "loss": 4.7687,
      "step": 82432
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.864195148860396e-05,
      "loss": 4.7568,
      "step": 82944
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.863356554109344e-05,
      "loss": 4.7542,
      "step": 83456
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.862519597238665e-05,
      "loss": 4.7512,
      "step": 83968
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.861681002487613e-05,
      "loss": 4.7415,
      "step": 84480
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.860842407736561e-05,
      "loss": 4.7437,
      "step": 84992
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.860003812985509e-05,
      "loss": 4.7457,
      "step": 85504
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.859165218234457e-05,
      "loss": 4.7443,
      "step": 86016
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.858326623483405e-05,
      "loss": 4.7428,
      "step": 86528
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.857488028732352e-05,
      "loss": 4.7293,
      "step": 87040
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.856649433981301e-05,
      "loss": 4.7408,
      "step": 87552
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8558141149909955e-05,
      "loss": 4.7278,
      "step": 88064
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8549755202399435e-05,
      "loss": 4.7368,
      "step": 88576
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8541369254888915e-05,
      "loss": 4.726,
      "step": 89088
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8532983307378395e-05,
      "loss": 4.7245,
      "step": 89600
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8524613738671604e-05,
      "loss": 4.7266,
      "step": 90112
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8516227791161084e-05,
      "loss": 4.7079,
      "step": 90624
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8507841843650564e-05,
      "loss": 4.7216,
      "step": 91136
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.849947227494377e-05,
      "loss": 4.7026,
      "step": 91648
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.849108632743325e-05,
      "loss": 4.7089,
      "step": 92160
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.848270037992273e-05,
      "loss": 4.7054,
      "step": 92672
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.847431443241221e-05,
      "loss": 4.7136,
      "step": 93184
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.846592848490169e-05,
      "loss": 4.7009,
      "step": 93696
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.845754253739117e-05,
      "loss": 4.6935,
      "step": 94208
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.844915658988065e-05,
      "loss": 4.7101,
      "step": 94720
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.844077064237013e-05,
      "loss": 4.6965,
      "step": 95232
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.843238469485961e-05,
      "loss": 4.7036,
      "step": 95744
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.842399874734909e-05,
      "loss": 4.692,
      "step": 96256
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.841561279983857e-05,
      "loss": 4.6775,
      "step": 96768
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.840722685232805e-05,
      "loss": 4.6814,
      "step": 97280
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.839885728362126e-05,
      "loss": 4.6729,
      "step": 97792
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.839047133611074e-05,
      "loss": 4.6871,
      "step": 98304
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.838208538860022e-05,
      "loss": 4.6764,
      "step": 98816
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.837371581989343e-05,
      "loss": 4.6829,
      "step": 99328
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.836532987238292e-05,
      "loss": 4.6749,
      "step": 99840
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.83569439248724e-05,
      "loss": 4.6811,
      "step": 100352
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.834855797736188e-05,
      "loss": 4.6767,
      "step": 100864
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.834017202985136e-05,
      "loss": 4.6531,
      "step": 101376
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8331802461144566e-05,
      "loss": 4.6575,
      "step": 101888
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8323416513634046e-05,
      "loss": 4.6556,
      "step": 102400
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8315030566123526e-05,
      "loss": 4.6685,
      "step": 102912
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8306644618613006e-05,
      "loss": 4.6476,
      "step": 103424
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8298275049906215e-05,
      "loss": 4.67,
      "step": 103936
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8289889102395695e-05,
      "loss": 4.6461,
      "step": 104448
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8281503154885175e-05,
      "loss": 4.6456,
      "step": 104960
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8273117207374655e-05,
      "loss": 4.6474,
      "step": 105472
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8264731259864135e-05,
      "loss": 4.643,
      "step": 105984
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8256345312353615e-05,
      "loss": 4.6445,
      "step": 106496
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8247959364843095e-05,
      "loss": 4.6368,
      "step": 107008
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.823957341733258e-05,
      "loss": 4.6331,
      "step": 107520
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.823120384862579e-05,
      "loss": 4.6438,
      "step": 108032
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.822281790111527e-05,
      "loss": 4.6294,
      "step": 108544
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.821443195360475e-05,
      "loss": 4.63,
      "step": 109056
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.820607876370169e-05,
      "loss": 4.6273,
      "step": 109568
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.819769281619117e-05,
      "loss": 4.6335,
      "step": 110080
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.818930686868065e-05,
      "loss": 4.6161,
      "step": 110592
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.818092092117013e-05,
      "loss": 4.6358,
      "step": 111104
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.817253497365961e-05,
      "loss": 4.6224,
      "step": 111616
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.816414902614909e-05,
      "loss": 4.6219,
      "step": 112128
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.815576307863857e-05,
      "loss": 4.6259,
      "step": 112640
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.814737713112805e-05,
      "loss": 4.6146,
      "step": 113152
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8138991183617535e-05,
      "loss": 4.6332,
      "step": 113664
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8130621614910745e-05,
      "loss": 4.6339,
      "step": 114176
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8122235667400224e-05,
      "loss": 4.6308,
      "step": 114688
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8113849719889704e-05,
      "loss": 4.6108,
      "step": 115200
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8105463772379184e-05,
      "loss": 4.6034,
      "step": 115712
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.809707782486866e-05,
      "loss": 4.6007,
      "step": 116224
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.808869187735814e-05,
      "loss": 4.6107,
      "step": 116736
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.808033868745508e-05,
      "loss": 4.6221,
      "step": 117248
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.807195273994456e-05,
      "loss": 4.6134,
      "step": 117760
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.806356679243404e-05,
      "loss": 4.6005,
      "step": 118272
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.805518084492352e-05,
      "loss": 4.5877,
      "step": 118784
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.8046794897413e-05,
      "loss": 4.6079,
      "step": 119296
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.803840894990248e-05,
      "loss": 4.5907,
      "step": 119808
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.803002300239196e-05,
      "loss": 4.5951,
      "step": 120320
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.802163705488144e-05,
      "loss": 4.5976,
      "step": 120832
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.801325110737092e-05,
      "loss": 4.6065,
      "step": 121344
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.80048651598604e-05,
      "loss": 4.5929,
      "step": 121856
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.799649559115361e-05,
      "loss": 4.5897,
      "step": 122368
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.798810964364309e-05,
      "loss": 4.5805,
      "step": 122880
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.797972369613257e-05,
      "loss": 4.5923,
      "step": 123392
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.797133774862205e-05,
      "loss": 4.5917,
      "step": 123904
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.796295180111153e-05,
      "loss": 4.568,
      "step": 124416
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.795456585360101e-05,
      "loss": 4.5767,
      "step": 124928
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.794617990609049e-05,
      "loss": 4.591,
      "step": 125440
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.793779395857997e-05,
      "loss": 4.5918,
      "step": 125952
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.792942438987319e-05,
      "loss": 4.5789,
      "step": 126464
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7921054821166396e-05,
      "loss": 4.5736,
      "step": 126976
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7912668873655876e-05,
      "loss": 4.5832,
      "step": 127488
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7904282926145356e-05,
      "loss": 4.5626,
      "step": 128000
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7895896978634836e-05,
      "loss": 4.5787,
      "step": 128512
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7887511031124316e-05,
      "loss": 4.5832,
      "step": 129024
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7879125083613796e-05,
      "loss": 4.57,
      "step": 129536
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7870755514907005e-05,
      "loss": 4.5682,
      "step": 130048
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7862369567396485e-05,
      "loss": 4.5469,
      "step": 130560
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7853983619885965e-05,
      "loss": 4.5514,
      "step": 131072
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7845597672375445e-05,
      "loss": 4.5592,
      "step": 131584
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7837211724864925e-05,
      "loss": 4.5602,
      "step": 132096
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.782882577735441e-05,
      "loss": 4.5639,
      "step": 132608
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.782043982984389e-05,
      "loss": 4.5505,
      "step": 133120
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.781205388233337e-05,
      "loss": 4.5495,
      "step": 133632
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.780370069243031e-05,
      "loss": 4.5566,
      "step": 134144
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.779531474491979e-05,
      "loss": 4.5633,
      "step": 134656
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.778692879740927e-05,
      "loss": 4.5511,
      "step": 135168
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.777854284989875e-05,
      "loss": 4.5509,
      "step": 135680
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.777015690238823e-05,
      "loss": 4.5482,
      "step": 136192
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.776177095487771e-05,
      "loss": 4.5518,
      "step": 136704
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.775338500736719e-05,
      "loss": 4.5514,
      "step": 137216
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.77450154386604e-05,
      "loss": 4.5454,
      "step": 137728
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.773662949114988e-05,
      "loss": 4.5423,
      "step": 138240
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.772824354363936e-05,
      "loss": 4.5387,
      "step": 138752
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7719857596128845e-05,
      "loss": 4.5325,
      "step": 139264
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7711488027422054e-05,
      "loss": 4.5394,
      "step": 139776
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7703102079911534e-05,
      "loss": 4.5425,
      "step": 140288
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7694716132401014e-05,
      "loss": 4.5439,
      "step": 140800
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7686330184890494e-05,
      "loss": 4.5257,
      "step": 141312
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.76779606161837e-05,
      "loss": 4.5343,
      "step": 141824
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.766957466867318e-05,
      "loss": 4.5462,
      "step": 142336
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.766120509996639e-05,
      "loss": 4.53,
      "step": 142848
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.765281915245587e-05,
      "loss": 4.543,
      "step": 143360
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.764443320494535e-05,
      "loss": 4.5281,
      "step": 143872
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.763604725743483e-05,
      "loss": 4.5447,
      "step": 144384
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.762766130992431e-05,
      "loss": 4.5322,
      "step": 144896
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.761927536241379e-05,
      "loss": 4.5179,
      "step": 145408
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.761088941490327e-05,
      "loss": 4.5205,
      "step": 145920
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.760250346739275e-05,
      "loss": 4.5174,
      "step": 146432
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.759413389868596e-05,
      "loss": 4.5278,
      "step": 146944
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.758574795117544e-05,
      "loss": 4.5284,
      "step": 147456
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.757736200366492e-05,
      "loss": 4.5304,
      "step": 147968
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.75689760561544e-05,
      "loss": 4.5245,
      "step": 148480
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.756060648744761e-05,
      "loss": 4.5162,
      "step": 148992
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.755222053993709e-05,
      "loss": 4.511,
      "step": 149504
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.754383459242657e-05,
      "loss": 4.5078,
      "step": 150016
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.753544864491605e-05,
      "loss": 4.5183,
      "step": 150528
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7527079076209266e-05,
      "loss": 4.5301,
      "step": 151040
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7518693128698745e-05,
      "loss": 4.5147,
      "step": 151552
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7510307181188225e-05,
      "loss": 4.5066,
      "step": 152064
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.7501921233677705e-05,
      "loss": 4.5064,
      "step": 152576
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.475635528564453,
      "eval_runtime": 527.4249,
      "eval_samples_per_second": 723.498,
      "eval_steps_per_second": 22.61,
      "step": 152638
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7493535286167185e-05,
      "loss": 4.5181,
      "step": 153088
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7485149338656665e-05,
      "loss": 4.5126,
      "step": 153600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7476763391146145e-05,
      "loss": 4.5005,
      "step": 154112
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7468377443635625e-05,
      "loss": 4.5026,
      "step": 154624
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7459991496125105e-05,
      "loss": 4.4975,
      "step": 155136
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7451605548614585e-05,
      "loss": 4.4844,
      "step": 155648
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7443219601104065e-05,
      "loss": 4.4942,
      "step": 156160
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7434833653593545e-05,
      "loss": 4.5052,
      "step": 156672
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7426447706083025e-05,
      "loss": 4.4844,
      "step": 157184
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7418061758572505e-05,
      "loss": 4.4965,
      "step": 157696
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7409675811061985e-05,
      "loss": 4.5059,
      "step": 158208
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7401289863551465e-05,
      "loss": 4.5019,
      "step": 158720
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7392903916040945e-05,
      "loss": 4.4908,
      "step": 159232
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7384517968530425e-05,
      "loss": 4.4909,
      "step": 159744
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7376148399823634e-05,
      "loss": 4.49,
      "step": 160256
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7367762452313114e-05,
      "loss": 4.4848,
      "step": 160768
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7359376504802594e-05,
      "loss": 4.4835,
      "step": 161280
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7350990557292074e-05,
      "loss": 4.4901,
      "step": 161792
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7342604609781554e-05,
      "loss": 4.4911,
      "step": 162304
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7334218662271034e-05,
      "loss": 4.489,
      "step": 162816
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7325832714760514e-05,
      "loss": 4.4792,
      "step": 163328
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7317446767249994e-05,
      "loss": 4.4901,
      "step": 163840
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.7309060819739474e-05,
      "loss": 4.4802,
      "step": 164352
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.730069125103269e-05,
      "loss": 4.4865,
      "step": 164864
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.729230530352217e-05,
      "loss": 4.4808,
      "step": 165376
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.728391935601165e-05,
      "loss": 4.4781,
      "step": 165888
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.727553340850113e-05,
      "loss": 4.4836,
      "step": 166400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.726716383979434e-05,
      "loss": 4.4667,
      "step": 166912
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.725877789228382e-05,
      "loss": 4.482,
      "step": 167424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.72503919447733e-05,
      "loss": 4.4673,
      "step": 167936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.724202237606651e-05,
      "loss": 4.4696,
      "step": 168448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.723363642855599e-05,
      "loss": 4.47,
      "step": 168960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.722525048104547e-05,
      "loss": 4.475,
      "step": 169472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.721686453353495e-05,
      "loss": 4.4687,
      "step": 169984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.720847858602443e-05,
      "loss": 4.4592,
      "step": 170496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.720009263851391e-05,
      "loss": 4.4793,
      "step": 171008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.719170669100339e-05,
      "loss": 4.466,
      "step": 171520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.718332074349287e-05,
      "loss": 4.4791,
      "step": 172032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.717493479598235e-05,
      "loss": 4.4662,
      "step": 172544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.716656522727556e-05,
      "loss": 4.4512,
      "step": 173056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.715817927976504e-05,
      "loss": 4.4521,
      "step": 173568
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7149793332254516e-05,
      "loss": 4.4527,
      "step": 174080
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7141407384743996e-05,
      "loss": 4.4696,
      "step": 174592
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7133021437233476e-05,
      "loss": 4.448,
      "step": 175104
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7124635489722956e-05,
      "loss": 4.4625,
      "step": 175616
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7116265921016165e-05,
      "loss": 4.4579,
      "step": 176128
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7107879973505645e-05,
      "loss": 4.4562,
      "step": 176640
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7099494025995125e-05,
      "loss": 4.4631,
      "step": 177152
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.7091108078484605e-05,
      "loss": 4.4376,
      "step": 177664
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.708272213097409e-05,
      "loss": 4.4436,
      "step": 178176
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.707433618346357e-05,
      "loss": 4.4363,
      "step": 178688
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.706595023595305e-05,
      "loss": 4.4587,
      "step": 179200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.705756428844253e-05,
      "loss": 4.4349,
      "step": 179712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.704921109853947e-05,
      "loss": 4.4607,
      "step": 180224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.704082515102895e-05,
      "loss": 4.4351,
      "step": 180736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.703243920351843e-05,
      "loss": 4.4369,
      "step": 181248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.702405325600791e-05,
      "loss": 4.4373,
      "step": 181760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.701566730849739e-05,
      "loss": 4.4338,
      "step": 182272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.700728136098687e-05,
      "loss": 4.4405,
      "step": 182784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.699889541347635e-05,
      "loss": 4.4347,
      "step": 183296
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.699052584476956e-05,
      "loss": 4.4247,
      "step": 183808
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6982139897259045e-05,
      "loss": 4.4414,
      "step": 184320
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6973753949748525e-05,
      "loss": 4.4276,
      "step": 184832
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6965368002238005e-05,
      "loss": 4.4317,
      "step": 185344
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6956982054727485e-05,
      "loss": 4.4276,
      "step": 185856
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6948612486020694e-05,
      "loss": 4.4352,
      "step": 186368
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6940226538510174e-05,
      "loss": 4.4146,
      "step": 186880
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6931840590999654e-05,
      "loss": 4.4395,
      "step": 187392
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6923454643489134e-05,
      "loss": 4.4249,
      "step": 187904
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6915068695978614e-05,
      "loss": 4.4289,
      "step": 188416
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6906682748468094e-05,
      "loss": 4.4304,
      "step": 188928
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6898296800957574e-05,
      "loss": 4.4219,
      "step": 189440
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.688991085344705e-05,
      "loss": 4.439,
      "step": 189952
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.688154128474026e-05,
      "loss": 4.4408,
      "step": 190464
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.687315533722974e-05,
      "loss": 4.4419,
      "step": 190976
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.686476938971923e-05,
      "loss": 4.426,
      "step": 191488
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.68563834422087e-05,
      "loss": 4.4157,
      "step": 192000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.684801387350192e-05,
      "loss": 4.4074,
      "step": 192512
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.68396279259914e-05,
      "loss": 4.426,
      "step": 193024
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.683125835728461e-05,
      "loss": 4.4331,
      "step": 193536
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.682287240977409e-05,
      "loss": 4.4286,
      "step": 194048
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.681448646226357e-05,
      "loss": 4.4152,
      "step": 194560
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.680610051475305e-05,
      "loss": 4.4053,
      "step": 195072
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.679771456724252e-05,
      "loss": 4.4199,
      "step": 195584
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6789328619732e-05,
      "loss": 4.4066,
      "step": 196096
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.678095905102522e-05,
      "loss": 4.4112,
      "step": 196608
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6772573103514697e-05,
      "loss": 4.4196,
      "step": 197120
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6764187156004177e-05,
      "loss": 4.4196,
      "step": 197632
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.6755801208493657e-05,
      "loss": 4.4123,
      "step": 198144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.674743163978687e-05,
      "loss": 4.416,
      "step": 198656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6739045692276346e-05,
      "loss": 4.4005,
      "step": 199168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6730659744765826e-05,
      "loss": 4.4153,
      "step": 199680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6722273797255305e-05,
      "loss": 4.4108,
      "step": 200192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6713887849744785e-05,
      "loss": 4.3975,
      "step": 200704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6705501902234265e-05,
      "loss": 4.3998,
      "step": 201216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6697115954723745e-05,
      "loss": 4.416,
      "step": 201728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6688730007213225e-05,
      "loss": 4.4191,
      "step": 202240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6680360438506434e-05,
      "loss": 4.4027,
      "step": 202752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.667199086979965e-05,
      "loss": 4.4049,
      "step": 203264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.666360492228913e-05,
      "loss": 4.411,
      "step": 203776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.665521897477861e-05,
      "loss": 4.3933,
      "step": 204288
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.664683302726809e-05,
      "loss": 4.4082,
      "step": 204800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.663844707975757e-05,
      "loss": 4.4144,
      "step": 205312
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.663006113224705e-05,
      "loss": 4.3996,
      "step": 205824
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.662169156354026e-05,
      "loss": 4.4042,
      "step": 206336
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.661330561602974e-05,
      "loss": 4.3807,
      "step": 206848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.660491966851922e-05,
      "loss": 4.3808,
      "step": 207360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.65965337210087e-05,
      "loss": 4.3933,
      "step": 207872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.658814777349818e-05,
      "loss": 4.3957,
      "step": 208384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.657977820479139e-05,
      "loss": 4.402,
      "step": 208896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.657139225728087e-05,
      "loss": 4.3859,
      "step": 209408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6563006309770355e-05,
      "loss": 4.3847,
      "step": 209920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6554620362259835e-05,
      "loss": 4.39,
      "step": 210432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6546234414749315e-05,
      "loss": 4.4018,
      "step": 210944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6537848467238795e-05,
      "loss": 4.3924,
      "step": 211456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6529478898532004e-05,
      "loss": 4.3956,
      "step": 211968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6521092951021484e-05,
      "loss": 4.3859,
      "step": 212480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6512707003510964e-05,
      "loss": 4.3877,
      "step": 212992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6504321056000444e-05,
      "loss": 4.3978,
      "step": 213504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6495935108489924e-05,
      "loss": 4.3853,
      "step": 214016
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.648756553978313e-05,
      "loss": 4.3843,
      "step": 214528
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.647917959227261e-05,
      "loss": 4.3835,
      "step": 215040
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.647079364476209e-05,
      "loss": 4.3748,
      "step": 215552
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.646240769725157e-05,
      "loss": 4.3827,
      "step": 216064
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.645402174974105e-05,
      "loss": 4.3824,
      "step": 216576
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.644565218103427e-05,
      "loss": 4.3906,
      "step": 217088
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.643726623352375e-05,
      "loss": 4.371,
      "step": 217600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.642888028601323e-05,
      "loss": 4.3779,
      "step": 218112
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.642051071730644e-05,
      "loss": 4.3958,
      "step": 218624
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.641212476979592e-05,
      "loss": 4.3758,
      "step": 219136
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.64037388222854e-05,
      "loss": 4.3907,
      "step": 219648
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.639535287477488e-05,
      "loss": 4.3774,
      "step": 220160
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.638696692726436e-05,
      "loss": 4.3943,
      "step": 220672
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6378597358557566e-05,
      "loss": 4.3798,
      "step": 221184
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6370211411047046e-05,
      "loss": 4.3713,
      "step": 221696
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6361825463536526e-05,
      "loss": 4.3721,
      "step": 222208
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6353439516026006e-05,
      "loss": 4.3668,
      "step": 222720
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6345053568515486e-05,
      "loss": 4.3782,
      "step": 223232
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6336667621004966e-05,
      "loss": 4.3863,
      "step": 223744
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6328281673494446e-05,
      "loss": 4.3806,
      "step": 224256
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6319895725983926e-05,
      "loss": 4.3766,
      "step": 224768
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.631154253608087e-05,
      "loss": 4.3699,
      "step": 225280
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.630315658857035e-05,
      "loss": 4.3681,
      "step": 225792
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.629477064105983e-05,
      "loss": 4.3624,
      "step": 226304
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6286384693549304e-05,
      "loss": 4.3741,
      "step": 226816
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6277998746038784e-05,
      "loss": 4.3833,
      "step": 227328
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6269612798528264e-05,
      "loss": 4.3719,
      "step": 227840
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.6261226851017744e-05,
      "loss": 4.3658,
      "step": 228352
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.625284090350723e-05,
      "loss": 4.3631,
      "step": 228864
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.340713977813721,
      "eval_runtime": 523.7183,
      "eval_samples_per_second": 728.619,
      "eval_steps_per_second": 22.77,
      "step": 228957
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.624447133480044e-05,
      "loss": 4.3742,
      "step": 229376
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.623608538728992e-05,
      "loss": 4.3753,
      "step": 229888
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.62276994397794e-05,
      "loss": 4.3591,
      "step": 230400
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.621932987107261e-05,
      "loss": 4.3596,
      "step": 230912
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.621094392356209e-05,
      "loss": 4.3603,
      "step": 231424
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.620255797605157e-05,
      "loss": 4.3431,
      "step": 231936
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.619417202854105e-05,
      "loss": 4.3551,
      "step": 232448
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.618578608103053e-05,
      "loss": 4.3639,
      "step": 232960
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.617740013352001e-05,
      "loss": 4.3539,
      "step": 233472
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.6169046943616954e-05,
      "loss": 4.3605,
      "step": 233984
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.6160660996106434e-05,
      "loss": 4.3676,
      "step": 234496
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.6152275048595913e-05,
      "loss": 4.3628,
      "step": 235008
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.6143889101085393e-05,
      "loss": 4.3554,
      "step": 235520
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.613550315357487e-05,
      "loss": 4.3596,
      "step": 236032
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.612711720606435e-05,
      "loss": 4.3533,
      "step": 236544
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.611873125855383e-05,
      "loss": 4.3505,
      "step": 237056
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.611034531104331e-05,
      "loss": 4.3468,
      "step": 237568
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.610195936353279e-05,
      "loss": 4.3551,
      "step": 238080
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.609357341602227e-05,
      "loss": 4.3561,
      "step": 238592
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.608518746851175e-05,
      "loss": 4.3599,
      "step": 239104
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.607680152100123e-05,
      "loss": 4.3478,
      "step": 239616
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.606843195229444e-05,
      "loss": 4.3599,
      "step": 240128
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.606004600478392e-05,
      "loss": 4.3492,
      "step": 240640
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.60516600572734e-05,
      "loss": 4.3508,
      "step": 241152
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.604329048856662e-05,
      "loss": 4.3516,
      "step": 241664
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.60349045410561e-05,
      "loss": 4.3469,
      "step": 242176
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.602651859354558e-05,
      "loss": 4.3526,
      "step": 242688
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.601813264603506e-05,
      "loss": 4.3395,
      "step": 243200
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.600974669852454e-05,
      "loss": 4.3536,
      "step": 243712
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.600136075101402e-05,
      "loss": 4.3393,
      "step": 244224
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.599297480350349e-05,
      "loss": 4.3403,
      "step": 244736
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.598460523479671e-05,
      "loss": 4.3454,
      "step": 245248
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.597621928728619e-05,
      "loss": 4.3508,
      "step": 245760
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.596783333977567e-05,
      "loss": 4.3389,
      "step": 246272
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.595944739226514e-05,
      "loss": 4.338,
      "step": 246784
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.595106144475462e-05,
      "loss": 4.3473,
      "step": 247296
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.59426754972441e-05,
      "loss": 4.3405,
      "step": 247808
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5934305928537316e-05,
      "loss": 4.3583,
      "step": 248320
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5925919981026796e-05,
      "loss": 4.3417,
      "step": 248832
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5917534033516276e-05,
      "loss": 4.3272,
      "step": 249344
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5909148086005756e-05,
      "loss": 4.3291,
      "step": 249856
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5900778517298965e-05,
      "loss": 4.3258,
      "step": 250368
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5892392569788445e-05,
      "loss": 4.3472,
      "step": 250880
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5884006622277925e-05,
      "loss": 4.3292,
      "step": 251392
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5875620674767404e-05,
      "loss": 4.3399,
      "step": 251904
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5867234727256884e-05,
      "loss": 4.3351,
      "step": 252416
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5858848779746364e-05,
      "loss": 4.3356,
      "step": 252928
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5850462832235844e-05,
      "loss": 4.3423,
      "step": 253440
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5842076884725324e-05,
      "loss": 4.3197,
      "step": 253952
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.583370731601854e-05,
      "loss": 4.3215,
      "step": 254464
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.582532136850802e-05,
      "loss": 4.3232,
      "step": 254976
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.58169354209975e-05,
      "loss": 4.337,
      "step": 255488
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.580856585229071e-05,
      "loss": 4.3169,
      "step": 256000
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.580017990478019e-05,
      "loss": 4.3377,
      "step": 256512
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.579179395726967e-05,
      "loss": 4.3177,
      "step": 257024
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.578340800975915e-05,
      "loss": 4.3145,
      "step": 257536
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.577502206224863e-05,
      "loss": 4.3224,
      "step": 258048
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.576665249354184e-05,
      "loss": 4.3115,
      "step": 258560
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.575826654603132e-05,
      "loss": 4.3289,
      "step": 259072
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.57498805985208e-05,
      "loss": 4.3211,
      "step": 259584
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.574149465101028e-05,
      "loss": 4.3067,
      "step": 260096
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.573310870349976e-05,
      "loss": 4.3241,
      "step": 260608
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.572472275598924e-05,
      "loss": 4.3137,
      "step": 261120
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5716353187282454e-05,
      "loss": 4.3189,
      "step": 261632
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5707967239771934e-05,
      "loss": 4.3109,
      "step": 262144
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5699581292261414e-05,
      "loss": 4.3239,
      "step": 262656
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5691195344750894e-05,
      "loss": 4.3028,
      "step": 263168
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5682809397240374e-05,
      "loss": 4.324,
      "step": 263680
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.567443982853358e-05,
      "loss": 4.3108,
      "step": 264192
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.566605388102306e-05,
      "loss": 4.312,
      "step": 264704
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.565768431231627e-05,
      "loss": 4.3182,
      "step": 265216
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.564929836480575e-05,
      "loss": 4.3149,
      "step": 265728
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.564091241729523e-05,
      "loss": 4.3217,
      "step": 266240
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.563252646978471e-05,
      "loss": 4.3314,
      "step": 266752
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.562414052227419e-05,
      "loss": 4.328,
      "step": 267264
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.561575457476368e-05,
      "loss": 4.3127,
      "step": 267776
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.560736862725315e-05,
      "loss": 4.3074,
      "step": 268288
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.559899905854637e-05,
      "loss": 4.2982,
      "step": 268800
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.559061311103585e-05,
      "loss": 4.3164,
      "step": 269312
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.558222716352533e-05,
      "loss": 4.3236,
      "step": 269824
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.55738412160148e-05,
      "loss": 4.3192,
      "step": 270336
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.556545526850428e-05,
      "loss": 4.3055,
      "step": 270848
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.555706932099376e-05,
      "loss": 4.2964,
      "step": 271360
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.554868337348324e-05,
      "loss": 4.316,
      "step": 271872
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.554029742597272e-05,
      "loss": 4.2925,
      "step": 272384
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.55319114784622e-05,
      "loss": 4.3055,
      "step": 272896
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.552352553095168e-05,
      "loss": 4.3092,
      "step": 273408
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5515155962244896e-05,
      "loss": 4.3126,
      "step": 273920
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5506770014734376e-05,
      "loss": 4.3075,
      "step": 274432
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5498384067223856e-05,
      "loss": 4.3124,
      "step": 274944
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5489998119713336e-05,
      "loss": 4.2941,
      "step": 275456
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5481612172202816e-05,
      "loss": 4.306,
      "step": 275968
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5473226224692296e-05,
      "loss": 4.303,
      "step": 276480
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5464856655985505e-05,
      "loss": 4.2953,
      "step": 276992
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5456470708474985e-05,
      "loss": 4.293,
      "step": 277504
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5448084760964465e-05,
      "loss": 4.3142,
      "step": 278016
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5439698813453945e-05,
      "loss": 4.3074,
      "step": 278528
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5431312865943425e-05,
      "loss": 4.305,
      "step": 279040
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5422926918432905e-05,
      "loss": 4.2991,
      "step": 279552
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5414557349726114e-05,
      "loss": 4.3083,
      "step": 280064
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5406171402215594e-05,
      "loss": 4.292,
      "step": 280576
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.539778545470508e-05,
      "loss": 4.3053,
      "step": 281088
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.538939950719456e-05,
      "loss": 4.3114,
      "step": 281600
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.538102993848777e-05,
      "loss": 4.2922,
      "step": 282112
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.537264399097725e-05,
      "loss": 4.3087,
      "step": 282624
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.536425804346673e-05,
      "loss": 4.2751,
      "step": 283136
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.535587209595621e-05,
      "loss": 4.2808,
      "step": 283648
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.534748614844569e-05,
      "loss": 4.293,
      "step": 284160
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.533910020093516e-05,
      "loss": 4.2938,
      "step": 284672
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.533071425342464e-05,
      "loss": 4.3025,
      "step": 285184
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.532232830591412e-05,
      "loss": 4.2848,
      "step": 285696
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.531395873720733e-05,
      "loss": 4.2846,
      "step": 286208
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.530557278969682e-05,
      "loss": 4.2898,
      "step": 286720
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.52971868421863e-05,
      "loss": 4.3017,
      "step": 287232
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.528880089467578e-05,
      "loss": 4.2964,
      "step": 287744
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.528041494716526e-05,
      "loss": 4.2966,
      "step": 288256
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.527202899965474e-05,
      "loss": 4.2852,
      "step": 288768
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.526364305214422e-05,
      "loss": 4.2874,
      "step": 289280
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.52552571046337e-05,
      "loss": 4.3014,
      "step": 289792
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.524688753592691e-05,
      "loss": 4.2855,
      "step": 290304
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.523850158841639e-05,
      "loss": 4.2827,
      "step": 290816
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.523011564090587e-05,
      "loss": 4.2894,
      "step": 291328
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5221746072199076e-05,
      "loss": 4.2742,
      "step": 291840
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5213360124688556e-05,
      "loss": 4.2885,
      "step": 292352
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5204974177178036e-05,
      "loss": 4.2869,
      "step": 292864
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5196588229667516e-05,
      "loss": 4.2958,
      "step": 293376
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.518821866096073e-05,
      "loss": 4.2752,
      "step": 293888
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.517983271345021e-05,
      "loss": 4.2784,
      "step": 294400
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.517144676593969e-05,
      "loss": 4.3017,
      "step": 294912
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.51630771972329e-05,
      "loss": 4.2809,
      "step": 295424
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.515469124972238e-05,
      "loss": 4.2911,
      "step": 295936
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.514630530221186e-05,
      "loss": 4.2875,
      "step": 296448
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.513791935470134e-05,
      "loss": 4.2976,
      "step": 296960
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.512953340719082e-05,
      "loss": 4.2873,
      "step": 297472
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.512116383848403e-05,
      "loss": 4.2804,
      "step": 297984
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.511277789097351e-05,
      "loss": 4.2735,
      "step": 298496
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.510439194346299e-05,
      "loss": 4.2728,
      "step": 299008
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.509600599595247e-05,
      "loss": 4.2822,
      "step": 299520
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5087620048441956e-05,
      "loss": 4.2928,
      "step": 300032
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5079250479735165e-05,
      "loss": 4.286,
      "step": 300544
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5070864532224645e-05,
      "loss": 4.2881,
      "step": 301056
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5062478584714125e-05,
      "loss": 4.2753,
      "step": 301568
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5054092637203605e-05,
      "loss": 4.2772,
      "step": 302080
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5045706689693085e-05,
      "loss": 4.2684,
      "step": 302592
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5037320742182565e-05,
      "loss": 4.2839,
      "step": 303104
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5028934794672045e-05,
      "loss": 4.2856,
      "step": 303616
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5020565225965254e-05,
      "loss": 4.2871,
      "step": 304128
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5012179278454734e-05,
      "loss": 4.2713,
      "step": 304640
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.5003793330944214e-05,
      "loss": 4.2711,
      "step": 305152
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.257774829864502,
      "eval_runtime": 522.7433,
      "eval_samples_per_second": 729.978,
      "eval_steps_per_second": 22.812,
      "step": 305276
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4995407383433694e-05,
      "loss": 4.2871,
      "step": 305664
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4987021435923174e-05,
      "loss": 4.2815,
      "step": 306176
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4978635488412654e-05,
      "loss": 4.2698,
      "step": 306688
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4970249540902134e-05,
      "loss": 4.2694,
      "step": 307200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4961863593391614e-05,
      "loss": 4.2706,
      "step": 307712
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4953477645881094e-05,
      "loss": 4.2574,
      "step": 308224
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4945091698370574e-05,
      "loss": 4.2649,
      "step": 308736
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4936705750860054e-05,
      "loss": 4.2687,
      "step": 309248
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4928319803349534e-05,
      "loss": 4.2686,
      "step": 309760
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4919933855839014e-05,
      "loss": 4.2742,
      "step": 310272
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4911547908328494e-05,
      "loss": 4.2765,
      "step": 310784
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4903161960817974e-05,
      "loss": 4.2713,
      "step": 311296
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4894776013307454e-05,
      "loss": 4.2698,
      "step": 311808
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.4886390065796934e-05,
      "loss": 4.2732,
      "step": 312320
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.487800411828641e-05,
      "loss": 4.2674,
      "step": 312832
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.486961817077589e-05,
      "loss": 4.2615,
      "step": 313344
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.48612486020691e-05,
      "loss": 4.2575,
      "step": 313856
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.485286265455859e-05,
      "loss": 4.2716,
      "step": 314368
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.484447670704806e-05,
      "loss": 4.2664,
      "step": 314880
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.483609075953754e-05,
      "loss": 4.2773,
      "step": 315392
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.482772119083076e-05,
      "loss": 4.2605,
      "step": 315904
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.481933524332024e-05,
      "loss": 4.2716,
      "step": 316416
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.481094929580971e-05,
      "loss": 4.2679,
      "step": 316928
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.480256334829919e-05,
      "loss": 4.2676,
      "step": 317440
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.479417740078867e-05,
      "loss": 4.265,
      "step": 317952
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.478579145327815e-05,
      "loss": 4.262,
      "step": 318464
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.477740550576763e-05,
      "loss": 4.2672,
      "step": 318976
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.476901955825711e-05,
      "loss": 4.2534,
      "step": 319488
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.476063361074659e-05,
      "loss": 4.2692,
      "step": 320000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.475226404203981e-05,
      "loss": 4.2575,
      "step": 320512
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.474387809452929e-05,
      "loss": 4.2546,
      "step": 321024
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.473549214701877e-05,
      "loss": 4.2621,
      "step": 321536
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.472710619950825e-05,
      "loss": 4.2664,
      "step": 322048
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.471872025199773e-05,
      "loss": 4.259,
      "step": 322560
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.471033430448721e-05,
      "loss": 4.2543,
      "step": 323072
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4701964735780416e-05,
      "loss": 4.2618,
      "step": 323584
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4693578788269896e-05,
      "loss": 4.2629,
      "step": 324096
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4685192840759376e-05,
      "loss": 4.2747,
      "step": 324608
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4676806893248856e-05,
      "loss": 4.2588,
      "step": 325120
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4668420945738336e-05,
      "loss": 4.2462,
      "step": 325632
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4660034998227816e-05,
      "loss": 4.2463,
      "step": 326144
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4651649050717296e-05,
      "loss": 4.248,
      "step": 326656
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.464327948201051e-05,
      "loss": 4.2619,
      "step": 327168
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.463489353449999e-05,
      "loss": 4.249,
      "step": 327680
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.462650758698947e-05,
      "loss": 4.2603,
      "step": 328192
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.461812163947895e-05,
      "loss": 4.2549,
      "step": 328704
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4609735691968425e-05,
      "loss": 4.2522,
      "step": 329216
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.460136612326164e-05,
      "loss": 4.262,
      "step": 329728
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.459298017575112e-05,
      "loss": 4.2414,
      "step": 330240
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4584594228240594e-05,
      "loss": 4.237,
      "step": 330752
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4576208280730074e-05,
      "loss": 4.2465,
      "step": 331264
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4567822333219554e-05,
      "loss": 4.2548,
      "step": 331776
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.455945276451277e-05,
      "loss": 4.2363,
      "step": 332288
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.455106681700225e-05,
      "loss": 4.2605,
      "step": 332800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4542697248295465e-05,
      "loss": 4.2377,
      "step": 333312
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4534311300784945e-05,
      "loss": 4.234,
      "step": 333824
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4525925353274425e-05,
      "loss": 4.2435,
      "step": 334336
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.45175394057639e-05,
      "loss": 4.2345,
      "step": 334848
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.450915345825338e-05,
      "loss": 4.2522,
      "step": 335360
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.450076751074286e-05,
      "loss": 4.2448,
      "step": 335872
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.449238156323234e-05,
      "loss": 4.2274,
      "step": 336384
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.448399561572182e-05,
      "loss": 4.2439,
      "step": 336896
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.44756096682113e-05,
      "loss": 4.2376,
      "step": 337408
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.446722372070078e-05,
      "loss": 4.2476,
      "step": 337920
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.445883777319026e-05,
      "loss": 4.2323,
      "step": 338432
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.445045182567974e-05,
      "loss": 4.2453,
      "step": 338944
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.444208225697295e-05,
      "loss": 4.2265,
      "step": 339456
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.443369630946243e-05,
      "loss": 4.2457,
      "step": 339968
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4425310361951914e-05,
      "loss": 4.2364,
      "step": 340480
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4416924414441394e-05,
      "loss": 4.2357,
      "step": 340992
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.4408538466930874e-05,
      "loss": 4.2431,
      "step": 341504
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.440016889822408e-05,
      "loss": 4.2362,
      "step": 342016
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.439178295071356e-05,
      "loss": 4.2465,
      "step": 342528
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.438339700320304e-05,
      "loss": 4.2537,
      "step": 343040
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.437502743449625e-05,
      "loss": 4.2536,
      "step": 343552
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.436664148698573e-05,
      "loss": 4.2369,
      "step": 344064
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.435825553947521e-05,
      "loss": 4.233,
      "step": 344576
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.434986959196469e-05,
      "loss": 4.2247,
      "step": 345088
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.434148364445417e-05,
      "loss": 4.2392,
      "step": 345600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.433309769694365e-05,
      "loss": 4.2528,
      "step": 346112
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.432472812823687e-05,
      "loss": 4.2406,
      "step": 346624
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.431634218072635e-05,
      "loss": 4.2342,
      "step": 347136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.430795623321583e-05,
      "loss": 4.2201,
      "step": 347648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.429957028570531e-05,
      "loss": 4.2461,
      "step": 348160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.429118433819479e-05,
      "loss": 4.2203,
      "step": 348672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.428279839068426e-05,
      "loss": 4.2298,
      "step": 349184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.427441244317374e-05,
      "loss": 4.2334,
      "step": 349696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.426602649566322e-05,
      "loss": 4.2359,
      "step": 350208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.425765692695643e-05,
      "loss": 4.2377,
      "step": 350720
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4249287358249645e-05,
      "loss": 4.2401,
      "step": 351232
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4240901410739125e-05,
      "loss": 4.2226,
      "step": 351744
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4232531842032334e-05,
      "loss": 4.2325,
      "step": 352256
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.422414589452182e-05,
      "loss": 4.2313,
      "step": 352768
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.42157599470113e-05,
      "loss": 4.224,
      "step": 353280
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.420737399950078e-05,
      "loss": 4.2205,
      "step": 353792
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.419898805199026e-05,
      "loss": 4.2427,
      "step": 354304
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4190602104479734e-05,
      "loss": 4.2346,
      "step": 354816
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4182216156969214e-05,
      "loss": 4.234,
      "step": 355328
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4173830209458694e-05,
      "loss": 4.2269,
      "step": 355840
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4165444261948174e-05,
      "loss": 4.2335,
      "step": 356352
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4157058314437654e-05,
      "loss": 4.2212,
      "step": 356864
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4148672366927134e-05,
      "loss": 4.2354,
      "step": 357376
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4140286419416614e-05,
      "loss": 4.2373,
      "step": 357888
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.413191685070982e-05,
      "loss": 4.225,
      "step": 358400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.41235309031993e-05,
      "loss": 4.2401,
      "step": 358912
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.411516133449252e-05,
      "loss": 4.206,
      "step": 359424
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4106775386982e-05,
      "loss": 4.212,
      "step": 359936
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.409838943947148e-05,
      "loss": 4.2195,
      "step": 360448
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.409000349196096e-05,
      "loss": 4.2222,
      "step": 360960
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.408161754445044e-05,
      "loss": 4.2329,
      "step": 361472
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.407323159693992e-05,
      "loss": 4.22,
      "step": 361984
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.40648456494294e-05,
      "loss": 4.2113,
      "step": 362496
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.405647608072261e-05,
      "loss": 4.2192,
      "step": 363008
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.404809013321209e-05,
      "loss": 4.2311,
      "step": 363520
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.403970418570157e-05,
      "loss": 4.2269,
      "step": 364032
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.403131823819105e-05,
      "loss": 4.2283,
      "step": 364544
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.402293229068053e-05,
      "loss": 4.2172,
      "step": 365056
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.401456272197374e-05,
      "loss": 4.2187,
      "step": 365568
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.400617677446322e-05,
      "loss": 4.2323,
      "step": 366080
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.39977908269527e-05,
      "loss": 4.2222,
      "step": 366592
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.398940487944218e-05,
      "loss": 4.2155,
      "step": 367104
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.398103531073539e-05,
      "loss": 4.2189,
      "step": 367616
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.397264936322487e-05,
      "loss": 4.2072,
      "step": 368128
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.396426341571435e-05,
      "loss": 4.2193,
      "step": 368640
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.395587746820383e-05,
      "loss": 4.2214,
      "step": 369152
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.394749152069331e-05,
      "loss": 4.2251,
      "step": 369664
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.393912195198652e-05,
      "loss": 4.2075,
      "step": 370176
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3930736004476e-05,
      "loss": 4.2131,
      "step": 370688
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.392235005696548e-05,
      "loss": 4.2296,
      "step": 371200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.391396410945496e-05,
      "loss": 4.2179,
      "step": 371712
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.390557816194444e-05,
      "loss": 4.2238,
      "step": 372224
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.389719221443392e-05,
      "loss": 4.2242,
      "step": 372736
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.38888062669234e-05,
      "loss": 4.2256,
      "step": 373248
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.388043669821662e-05,
      "loss": 4.2236,
      "step": 373760
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.387205075070609e-05,
      "loss": 4.2104,
      "step": 374272
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.386366480319557e-05,
      "loss": 4.2072,
      "step": 374784
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.385527885568505e-05,
      "loss": 4.2116,
      "step": 375296
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3846909286978266e-05,
      "loss": 4.2169,
      "step": 375808
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.383852333946774e-05,
      "loss": 4.2224,
      "step": 376320
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.383013739195722e-05,
      "loss": 4.223,
      "step": 376832
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.38217514444467e-05,
      "loss": 4.2209,
      "step": 377344
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.381336549693618e-05,
      "loss": 4.2113,
      "step": 377856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.380497954942566e-05,
      "loss": 4.2108,
      "step": 378368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3796593601915145e-05,
      "loss": 4.2002,
      "step": 378880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3788224033208355e-05,
      "loss": 4.2184,
      "step": 379392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3779838085697835e-05,
      "loss": 4.2206,
      "step": 379904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3771452138187314e-05,
      "loss": 4.2214,
      "step": 380416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3763066190676794e-05,
      "loss": 4.2044,
      "step": 380928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3754680243166274e-05,
      "loss": 4.2077,
      "step": 381440
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.20143985748291,
      "eval_runtime": 556.6591,
      "eval_samples_per_second": 685.502,
      "eval_steps_per_second": 21.422,
      "step": 381595
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3746294295655754e-05,
      "loss": 4.2238,
      "step": 381952
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3737908348145234e-05,
      "loss": 4.2128,
      "step": 382464
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3729522400634714e-05,
      "loss": 4.2077,
      "step": 382976
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3721136453124194e-05,
      "loss": 4.2025,
      "step": 383488
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.37127668844174e-05,
      "loss": 4.2049,
      "step": 384000
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.370439731571061e-05,
      "loss": 4.1953,
      "step": 384512
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.36960113682001e-05,
      "loss": 4.2037,
      "step": 385024
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.368762542068958e-05,
      "loss": 4.2003,
      "step": 385536
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.367923947317906e-05,
      "loss": 4.2066,
      "step": 386048
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3670886283276e-05,
      "loss": 4.2152,
      "step": 386560
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.366250033576548e-05,
      "loss": 4.2079,
      "step": 387072
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.365411438825496e-05,
      "loss": 4.2068,
      "step": 387584
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.364572844074444e-05,
      "loss": 4.2091,
      "step": 388096
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.363734249323392e-05,
      "loss": 4.2091,
      "step": 388608
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.36289565457234e-05,
      "loss": 4.2019,
      "step": 389120
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3620586977016606e-05,
      "loss": 4.1978,
      "step": 389632
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3612201029506086e-05,
      "loss": 4.1959,
      "step": 390144
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3603815081995566e-05,
      "loss": 4.2058,
      "step": 390656
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.359542913448505e-05,
      "loss": 4.2056,
      "step": 391168
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.358704318697453e-05,
      "loss": 4.2153,
      "step": 391680
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.357865723946401e-05,
      "loss": 4.1998,
      "step": 392192
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.357028767075722e-05,
      "loss": 4.2093,
      "step": 392704
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.35619017232467e-05,
      "loss": 4.2075,
      "step": 393216
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.355351577573618e-05,
      "loss": 4.2025,
      "step": 393728
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.354512982822566e-05,
      "loss": 4.2037,
      "step": 394240
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.353674388071514e-05,
      "loss": 4.1995,
      "step": 394752
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.352835793320462e-05,
      "loss": 4.2063,
      "step": 395264
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.35199719856941e-05,
      "loss": 4.1896,
      "step": 395776
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3511586038183575e-05,
      "loss": 4.2106,
      "step": 396288
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.3503200090673055e-05,
      "loss": 4.195,
      "step": 396800
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3494814143162535e-05,
      "loss": 4.1892,
      "step": 397312
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.348642819565202e-05,
      "loss": 4.2042,
      "step": 397824
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.34780422481415e-05,
      "loss": 4.2019,
      "step": 398336
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.346967267943471e-05,
      "loss": 4.2047,
      "step": 398848
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.346128673192419e-05,
      "loss": 4.1945,
      "step": 399360
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.345290078441367e-05,
      "loss": 4.197,
      "step": 399872
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.344453121570688e-05,
      "loss": 4.2001,
      "step": 400384
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.343614526819636e-05,
      "loss": 4.215,
      "step": 400896
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.342775932068584e-05,
      "loss": 4.1978,
      "step": 401408
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.341937337317532e-05,
      "loss": 4.1864,
      "step": 401920
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.34109874256648e-05,
      "loss": 4.1881,
      "step": 402432
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.340260147815428e-05,
      "loss": 4.187,
      "step": 402944
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.339421553064376e-05,
      "loss": 4.2002,
      "step": 403456
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.338582958313324e-05,
      "loss": 4.192,
      "step": 403968
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3377460014426455e-05,
      "loss": 4.198,
      "step": 404480
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3369074066915935e-05,
      "loss": 4.1995,
      "step": 404992
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3360688119405415e-05,
      "loss": 4.1916,
      "step": 405504
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3352302171894895e-05,
      "loss": 4.2036,
      "step": 406016
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3343932603188104e-05,
      "loss": 4.1819,
      "step": 406528
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3335546655677584e-05,
      "loss": 4.1767,
      "step": 407040
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3327160708167064e-05,
      "loss": 4.1915,
      "step": 407552
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3318774760656544e-05,
      "loss": 4.1893,
      "step": 408064
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.331040519194975e-05,
      "loss": 4.1812,
      "step": 408576
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.330201924443923e-05,
      "loss": 4.2001,
      "step": 409088
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.329364967573244e-05,
      "loss": 4.1789,
      "step": 409600
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.328526372822193e-05,
      "loss": 4.1792,
      "step": 410112
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.327687778071141e-05,
      "loss": 4.184,
      "step": 410624
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.326849183320089e-05,
      "loss": 4.1787,
      "step": 411136
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.326010588569037e-05,
      "loss": 4.1927,
      "step": 411648
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.325173631698358e-05,
      "loss": 4.1837,
      "step": 412160
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.324335036947306e-05,
      "loss": 4.1783,
      "step": 412672
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.323496442196254e-05,
      "loss": 4.1814,
      "step": 413184
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.322657847445202e-05,
      "loss": 4.1723,
      "step": 413696
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.32181925269415e-05,
      "loss": 4.195,
      "step": 414208
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.320980657943098e-05,
      "loss": 4.1761,
      "step": 414720
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.320142063192046e-05,
      "loss": 4.1939,
      "step": 415232
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3193051063213666e-05,
      "loss": 4.1671,
      "step": 415744
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3184665115703146e-05,
      "loss": 4.1862,
      "step": 416256
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3176279168192626e-05,
      "loss": 4.181,
      "step": 416768
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3167893220682106e-05,
      "loss": 4.1774,
      "step": 417280
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3159507273171586e-05,
      "loss": 4.186,
      "step": 417792
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3151121325661066e-05,
      "loss": 4.1818,
      "step": 418304
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3142735378150546e-05,
      "loss": 4.1859,
      "step": 418816
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3134349430640026e-05,
      "loss": 4.1961,
      "step": 419328
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3125979861933235e-05,
      "loss": 4.1953,
      "step": 419840
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3117593914422715e-05,
      "loss": 4.1876,
      "step": 420352
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3109207966912195e-05,
      "loss": 4.1777,
      "step": 420864
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3100822019401675e-05,
      "loss": 4.1704,
      "step": 421376
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3092452450694884e-05,
      "loss": 4.1819,
      "step": 421888
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3084066503184364e-05,
      "loss": 4.1935,
      "step": 422400
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.3075680555673844e-05,
      "loss": 4.1866,
      "step": 422912
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.306729460816333e-05,
      "loss": 4.179,
      "step": 423424
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.305890866065281e-05,
      "loss": 4.1645,
      "step": 423936
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.305052271314229e-05,
      "loss": 4.1917,
      "step": 424448
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.304213676563177e-05,
      "loss": 4.1614,
      "step": 424960
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.303375081812125e-05,
      "loss": 4.1776,
      "step": 425472
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.302538124941446e-05,
      "loss": 4.178,
      "step": 425984
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.301699530190394e-05,
      "loss": 4.1811,
      "step": 426496
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.300860935439342e-05,
      "loss": 4.1812,
      "step": 427008
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.30002234068829e-05,
      "loss": 4.1893,
      "step": 427520
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.299183745937238e-05,
      "loss": 4.1647,
      "step": 428032
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.298345151186186e-05,
      "loss": 4.175,
      "step": 428544
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.297506556435134e-05,
      "loss": 4.1797,
      "step": 429056
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.296669599564455e-05,
      "loss": 4.1702,
      "step": 429568
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.295831004813403e-05,
      "loss": 4.1658,
      "step": 430080
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2949924100623515e-05,
      "loss": 4.1879,
      "step": 430592
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2941538153112995e-05,
      "loss": 4.1793,
      "step": 431104
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2933168584406204e-05,
      "loss": 4.1872,
      "step": 431616
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2924782636895684e-05,
      "loss": 4.1748,
      "step": 432128
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2916396689385164e-05,
      "loss": 4.1778,
      "step": 432640
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2908010741874644e-05,
      "loss": 4.167,
      "step": 433152
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2899624794364124e-05,
      "loss": 4.1815,
      "step": 433664
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.28912388468536e-05,
      "loss": 4.1862,
      "step": 434176
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.288286927814681e-05,
      "loss": 4.1696,
      "step": 434688
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.287448333063629e-05,
      "loss": 4.1858,
      "step": 435200
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2866097383125766e-05,
      "loss": 4.1515,
      "step": 435712
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.285771143561525e-05,
      "loss": 4.1628,
      "step": 436224
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.284932548810473e-05,
      "loss": 4.1622,
      "step": 436736
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.284093954059421e-05,
      "loss": 4.171,
      "step": 437248
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.283255359308369e-05,
      "loss": 4.1798,
      "step": 437760
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.282416764557317e-05,
      "loss": 4.1737,
      "step": 438272
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.281579807686638e-05,
      "loss": 4.1513,
      "step": 438784
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.280741212935586e-05,
      "loss": 4.1716,
      "step": 439296
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.279902618184534e-05,
      "loss": 4.1781,
      "step": 439808
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.279064023433482e-05,
      "loss": 4.1773,
      "step": 440320
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.27822542868243e-05,
      "loss": 4.173,
      "step": 440832
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.277386833931378e-05,
      "loss": 4.1659,
      "step": 441344
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.276548239180326e-05,
      "loss": 4.1645,
      "step": 441856
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.275711282309647e-05,
      "loss": 4.182,
      "step": 442368
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.274874325438969e-05,
      "loss": 4.1675,
      "step": 442880
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.274035730687917e-05,
      "loss": 4.1657,
      "step": 443392
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.273197135936865e-05,
      "loss": 4.1648,
      "step": 443904
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2723585411858127e-05,
      "loss": 4.1596,
      "step": 444416
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2715199464347607e-05,
      "loss": 4.1683,
      "step": 444928
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2706813516837087e-05,
      "loss": 4.1703,
      "step": 445440
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2698427569326566e-05,
      "loss": 4.174,
      "step": 445952
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2690041621816046e-05,
      "loss": 4.1607,
      "step": 446464
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2681655674305526e-05,
      "loss": 4.1619,
      "step": 446976
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2673269726795006e-05,
      "loss": 4.1781,
      "step": 447488
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2664883779284486e-05,
      "loss": 4.165,
      "step": 448000
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.265649783177396e-05,
      "loss": 4.1715,
      "step": 448512
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2648128263067175e-05,
      "loss": 4.174,
      "step": 449024
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2639742315556655e-05,
      "loss": 4.1787,
      "step": 449536
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2631356368046135e-05,
      "loss": 4.1726,
      "step": 450048
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.262298679933935e-05,
      "loss": 4.1631,
      "step": 450560
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.261460085182883e-05,
      "loss": 4.1526,
      "step": 451072
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.260621490431831e-05,
      "loss": 4.1612,
      "step": 451584
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2597828956807784e-05,
      "loss": 4.167,
      "step": 452096
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2589459388101e-05,
      "loss": 4.176,
      "step": 452608
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.258107344059048e-05,
      "loss": 4.1705,
      "step": 453120
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.257268749307996e-05,
      "loss": 4.166,
      "step": 453632
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.256430154556943e-05,
      "loss": 4.1656,
      "step": 454144
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.255593197686265e-05,
      "loss": 4.1644,
      "step": 454656
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.254754602935213e-05,
      "loss": 4.1532,
      "step": 455168
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.253916008184161e-05,
      "loss": 4.1621,
      "step": 455680
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.253077413433109e-05,
      "loss": 4.1744,
      "step": 456192
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.252238818682057e-05,
      "loss": 4.1736,
      "step": 456704
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.251400223931005e-05,
      "loss": 4.1548,
      "step": 457216
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.250561629179953e-05,
      "loss": 4.157,
      "step": 457728
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.159981727600098,
      "eval_runtime": 552.1831,
      "eval_samples_per_second": 691.059,
      "eval_steps_per_second": 21.596,
      "step": 457914
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.249723034428901e-05,
      "loss": 4.1816,
      "step": 458240
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.248884439677849e-05,
      "loss": 4.1651,
      "step": 458752
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.248045844926797e-05,
      "loss": 4.1576,
      "step": 459264
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.247207250175745e-05,
      "loss": 4.1582,
      "step": 459776
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.246368655424693e-05,
      "loss": 4.1521,
      "step": 460288
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.245530060673641e-05,
      "loss": 4.1487,
      "step": 460800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.244691465922589e-05,
      "loss": 4.1503,
      "step": 461312
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.243852871171537e-05,
      "loss": 4.1561,
      "step": 461824
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.243014276420485e-05,
      "loss": 4.1586,
      "step": 462336
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.242175681669432e-05,
      "loss": 4.1638,
      "step": 462848
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.241337086918381e-05,
      "loss": 4.1624,
      "step": 463360
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.2405001300477024e-05,
      "loss": 4.154,
      "step": 463872
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.23966153529665e-05,
      "loss": 4.1687,
      "step": 464384
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.238822940545598e-05,
      "loss": 4.1573,
      "step": 464896
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.237984345794546e-05,
      "loss": 4.1593,
      "step": 465408
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.237145751043494e-05,
      "loss": 4.1464,
      "step": 465920
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.236307156292442e-05,
      "loss": 4.1449,
      "step": 466432
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.23546856154139e-05,
      "loss": 4.1612,
      "step": 466944
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.234629966790338e-05,
      "loss": 4.1551,
      "step": 467456
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.233791372039286e-05,
      "loss": 4.1683,
      "step": 467968
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.232952777288234e-05,
      "loss": 4.1527,
      "step": 468480
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.232114182537182e-05,
      "loss": 4.163,
      "step": 468992
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.23127558778613e-05,
      "loss": 4.1597,
      "step": 469504
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.230436993035078e-05,
      "loss": 4.1576,
      "step": 470016
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.229598398284026e-05,
      "loss": 4.1535,
      "step": 470528
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.228759803532974e-05,
      "loss": 4.1529,
      "step": 471040
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.227921208781922e-05,
      "loss": 4.1608,
      "step": 471552
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.227084251911243e-05,
      "loss": 4.1442,
      "step": 472064
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.226245657160191e-05,
      "loss": 4.1619,
      "step": 472576
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.2254070624091386e-05,
      "loss": 4.1507,
      "step": 473088
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2245684676580866e-05,
      "loss": 4.1434,
      "step": 473600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.223731510787408e-05,
      "loss": 4.1557,
      "step": 474112
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.222892916036356e-05,
      "loss": 4.157,
      "step": 474624
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2220543212853035e-05,
      "loss": 4.1613,
      "step": 475136
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2212157265342515e-05,
      "loss": 4.1468,
      "step": 475648
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.220378769663573e-05,
      "loss": 4.1531,
      "step": 476160
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.219541812792894e-05,
      "loss": 4.1535,
      "step": 476672
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2187032180418426e-05,
      "loss": 4.1662,
      "step": 477184
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2178646232907906e-05,
      "loss": 4.154,
      "step": 477696
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2170260285397386e-05,
      "loss": 4.1405,
      "step": 478208
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.216187433788686e-05,
      "loss": 4.1411,
      "step": 478720
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2153504769180075e-05,
      "loss": 4.1395,
      "step": 479232
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2145118821669555e-05,
      "loss": 4.1574,
      "step": 479744
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2136732874159035e-05,
      "loss": 4.1476,
      "step": 480256
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.212834692664851e-05,
      "loss": 4.1477,
      "step": 480768
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.211996097913799e-05,
      "loss": 4.159,
      "step": 481280
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.211157503162747e-05,
      "loss": 4.1471,
      "step": 481792
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.210318908411695e-05,
      "loss": 4.1579,
      "step": 482304
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.209480313660643e-05,
      "loss": 4.1398,
      "step": 482816
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2086433567899644e-05,
      "loss": 4.1344,
      "step": 483328
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2078047620389124e-05,
      "loss": 4.1448,
      "step": 483840
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2069661672878604e-05,
      "loss": 4.1414,
      "step": 484352
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.2061275725368084e-05,
      "loss": 4.141,
      "step": 484864
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.205290615666129e-05,
      "loss": 4.1488,
      "step": 485376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.204453658795451e-05,
      "loss": 4.1388,
      "step": 485888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.203615064044398e-05,
      "loss": 4.1342,
      "step": 486400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.202776469293346e-05,
      "loss": 4.1362,
      "step": 486912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.201939512422668e-05,
      "loss": 4.136,
      "step": 487424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.201100917671615e-05,
      "loss": 4.1486,
      "step": 487936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.200262322920563e-05,
      "loss": 4.1393,
      "step": 488448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.199423728169512e-05,
      "loss": 4.1345,
      "step": 488960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.19858513341846e-05,
      "loss": 4.1367,
      "step": 489472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.197746538667408e-05,
      "loss": 4.126,
      "step": 489984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.196907943916356e-05,
      "loss": 4.1537,
      "step": 490496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.196069349165304e-05,
      "loss": 4.1349,
      "step": 491008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.195230754414252e-05,
      "loss": 4.144,
      "step": 491520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1943921596632e-05,
      "loss": 4.1306,
      "step": 492032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.193553564912148e-05,
      "loss": 4.1368,
      "step": 492544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.192714970161096e-05,
      "loss": 4.1388,
      "step": 493056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1918780132904167e-05,
      "loss": 4.1325,
      "step": 493568
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1910394185393647e-05,
      "loss": 4.1447,
      "step": 494080
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1902008237883126e-05,
      "loss": 4.136,
      "step": 494592
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1893622290372606e-05,
      "loss": 4.1456,
      "step": 495104
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1885252721665816e-05,
      "loss": 4.1513,
      "step": 495616
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.18768667741553e-05,
      "loss": 4.1486,
      "step": 496128
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.186849720544851e-05,
      "loss": 4.1473,
      "step": 496640
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.186011125793799e-05,
      "loss": 4.1346,
      "step": 497152
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.185172531042747e-05,
      "loss": 4.1289,
      "step": 497664
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.184333936291695e-05,
      "loss": 4.1379,
      "step": 498176
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.183495341540643e-05,
      "loss": 4.1483,
      "step": 498688
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.182656746789591e-05,
      "loss": 4.1433,
      "step": 499200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.181818152038539e-05,
      "loss": 4.1359,
      "step": 499712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.18098119516786e-05,
      "loss": 4.1211,
      "step": 500224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.180142600416808e-05,
      "loss": 4.1501,
      "step": 500736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.179304005665756e-05,
      "loss": 4.1162,
      "step": 501248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.178467048795077e-05,
      "loss": 4.1354,
      "step": 501760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.177628454044025e-05,
      "loss": 4.1383,
      "step": 502272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1767898592929736e-05,
      "loss": 4.1344,
      "step": 502784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.1759512645419216e-05,
      "loss": 4.1408,
      "step": 503296
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1751126697908696e-05,
      "loss": 4.1471,
      "step": 503808
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1742757129201905e-05,
      "loss": 4.1207,
      "step": 504320
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1734371181691385e-05,
      "loss": 4.1357,
      "step": 504832
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1725985234180865e-05,
      "loss": 4.1376,
      "step": 505344
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.171759928667034e-05,
      "loss": 4.1297,
      "step": 505856
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1709229717963554e-05,
      "loss": 4.1227,
      "step": 506368
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1700843770453034e-05,
      "loss": 4.1439,
      "step": 506880
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1692457822942514e-05,
      "loss": 4.1381,
      "step": 507392
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.168407187543199e-05,
      "loss": 4.1471,
      "step": 507904
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1675685927921474e-05,
      "loss": 4.1297,
      "step": 508416
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.166731635921469e-05,
      "loss": 4.1379,
      "step": 508928
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.165893041170417e-05,
      "loss": 4.1234,
      "step": 509440
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.165054446419364e-05,
      "loss": 4.1404,
      "step": 509952
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.164215851668312e-05,
      "loss": 4.1431,
      "step": 510464
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.16337725691726e-05,
      "loss": 4.136,
      "step": 510976
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.162538662166208e-05,
      "loss": 4.1407,
      "step": 511488
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.161700067415156e-05,
      "loss": 4.1143,
      "step": 512000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.160863110544477e-05,
      "loss": 4.1171,
      "step": 512512
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.160024515793425e-05,
      "loss": 4.1183,
      "step": 513024
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.159185921042373e-05,
      "loss": 4.1294,
      "step": 513536
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.158347326291321e-05,
      "loss": 4.1383,
      "step": 514048
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.157510369420643e-05,
      "loss": 4.1371,
      "step": 514560
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.156671774669591e-05,
      "loss": 4.1093,
      "step": 515072
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.155833179918539e-05,
      "loss": 4.1286,
      "step": 515584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.154994585167487e-05,
      "loss": 4.1383,
      "step": 516096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.154155990416435e-05,
      "loss": 4.1375,
      "step": 516608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1533190335457556e-05,
      "loss": 4.1346,
      "step": 517120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1524804387947036e-05,
      "loss": 4.1263,
      "step": 517632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1516418440436516e-05,
      "loss": 4.1276,
      "step": 518144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1508032492925996e-05,
      "loss": 4.1374,
      "step": 518656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1499662924219205e-05,
      "loss": 4.129,
      "step": 519168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1491276976708685e-05,
      "loss": 4.1251,
      "step": 519680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1482891029198165e-05,
      "loss": 4.1244,
      "step": 520192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1474505081687645e-05,
      "loss": 4.1187,
      "step": 520704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1466119134177125e-05,
      "loss": 4.1312,
      "step": 521216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.145773318666661e-05,
      "loss": 4.131,
      "step": 521728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.144934723915609e-05,
      "loss": 4.1317,
      "step": 522240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.144096129164557e-05,
      "loss": 4.1239,
      "step": 522752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.143259172293878e-05,
      "loss": 4.1193,
      "step": 523264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.142422215423199e-05,
      "loss": 4.1386,
      "step": 523776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.141583620672147e-05,
      "loss": 4.121,
      "step": 524288
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.140745025921095e-05,
      "loss": 4.1345,
      "step": 524800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.139906431170043e-05,
      "loss": 4.1379,
      "step": 525312
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.139067836418991e-05,
      "loss": 4.1351,
      "step": 525824
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.138229241667939e-05,
      "loss": 4.1361,
      "step": 526336
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.137390646916887e-05,
      "loss": 4.1266,
      "step": 526848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.136552052165835e-05,
      "loss": 4.1103,
      "step": 527360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1357150952951565e-05,
      "loss": 4.1212,
      "step": 527872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1348781384244775e-05,
      "loss": 4.1259,
      "step": 528384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1340395436734254e-05,
      "loss": 4.1404,
      "step": 528896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1332009489223734e-05,
      "loss": 4.1289,
      "step": 529408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1323623541713214e-05,
      "loss": 4.1322,
      "step": 529920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1315237594202694e-05,
      "loss": 4.1245,
      "step": 530432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.1306851646692174e-05,
      "loss": 4.1263,
      "step": 530944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.129846569918165e-05,
      "loss": 4.1138,
      "step": 531456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.129009613047486e-05,
      "loss": 4.1239,
      "step": 531968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.128171018296434e-05,
      "loss": 4.136,
      "step": 532480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.127332423545382e-05,
      "loss": 4.1377,
      "step": 532992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.12649382879433e-05,
      "loss": 4.1134,
      "step": 533504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.125656871923652e-05,
      "loss": 4.1224,
      "step": 534016
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.129609107971191,
      "eval_runtime": 546.8154,
      "eval_samples_per_second": 697.842,
      "eval_steps_per_second": 21.808,
      "step": 534233
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1248182771726e-05,
      "loss": 4.1447,
      "step": 534528
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.123979682421548e-05,
      "loss": 4.1253,
      "step": 535040
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.123141087670495e-05,
      "loss": 4.1198,
      "step": 535552
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.122302492919443e-05,
      "loss": 4.1222,
      "step": 536064
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.121463898168391e-05,
      "loss": 4.1102,
      "step": 536576
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.120625303417339e-05,
      "loss": 4.1154,
      "step": 537088
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.119786708666287e-05,
      "loss": 4.1073,
      "step": 537600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.118948113915235e-05,
      "loss": 4.1209,
      "step": 538112
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.118109519164183e-05,
      "loss": 4.117,
      "step": 538624
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.117270924413131e-05,
      "loss": 4.1261,
      "step": 539136
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.116432329662079e-05,
      "loss": 4.1228,
      "step": 539648
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.115593734911027e-05,
      "loss": 4.1196,
      "step": 540160
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.114755140159975e-05,
      "loss": 4.1311,
      "step": 540672
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.113918183289297e-05,
      "loss": 4.1171,
      "step": 541184
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.113079588538245e-05,
      "loss": 4.1267,
      "step": 541696
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.112240993787193e-05,
      "loss": 4.1051,
      "step": 542208
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.111402399036141e-05,
      "loss": 4.1108,
      "step": 542720
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.110563804285089e-05,
      "loss": 4.1225,
      "step": 543232
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.109725209534036e-05,
      "loss": 4.1166,
      "step": 543744
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.108886614782984e-05,
      "loss": 4.1321,
      "step": 544256
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.108048020031932e-05,
      "loss": 4.1174,
      "step": 544768
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.10720942528088e-05,
      "loss": 4.1228,
      "step": 545280
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.106372468410201e-05,
      "loss": 4.123,
      "step": 545792
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1055355115395225e-05,
      "loss": 4.1162,
      "step": 546304
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1046969167884705e-05,
      "loss": 4.1208,
      "step": 546816
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1038583220374185e-05,
      "loss": 4.1121,
      "step": 547328
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1030197272863665e-05,
      "loss": 4.1227,
      "step": 547840
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1021811325353145e-05,
      "loss": 4.1136,
      "step": 548352
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1013425377842625e-05,
      "loss": 4.1232,
      "step": 548864
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.1005039430332105e-05,
      "loss": 4.1103,
      "step": 549376
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0996653482821585e-05,
      "loss": 4.1098,
      "step": 549888
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0988283914114794e-05,
      "loss": 4.1202,
      "step": 550400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0979897966604274e-05,
      "loss": 4.1179,
      "step": 550912
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0971512019093754e-05,
      "loss": 4.1221,
      "step": 551424
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0963126071583234e-05,
      "loss": 4.118,
      "step": 551936
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0954740124072714e-05,
      "loss": 4.1108,
      "step": 552448
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.094637055536592e-05,
      "loss": 4.1207,
      "step": 552960
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.09379846078554e-05,
      "loss": 4.1276,
      "step": 553472
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.092959866034489e-05,
      "loss": 4.1188,
      "step": 553984
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.092121271283437e-05,
      "loss": 4.1081,
      "step": 554496
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.091284314412758e-05,
      "loss": 4.102,
      "step": 555008
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.090445719661706e-05,
      "loss": 4.1054,
      "step": 555520
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.089607124910654e-05,
      "loss": 4.121,
      "step": 556032
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.088768530159602e-05,
      "loss": 4.1125,
      "step": 556544
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.08792993540855e-05,
      "loss": 4.1084,
      "step": 557056
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.087091340657498e-05,
      "loss": 4.1265,
      "step": 557568
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.086254383786819e-05,
      "loss": 4.1099,
      "step": 558080
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.085415789035767e-05,
      "loss": 4.1222,
      "step": 558592
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.084577194284715e-05,
      "loss": 4.1051,
      "step": 559104
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.083738599533663e-05,
      "loss": 4.0974,
      "step": 559616
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.082900004782611e-05,
      "loss": 4.1076,
      "step": 560128
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.082061410031559e-05,
      "loss": 4.1089,
      "step": 560640
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0812228152805074e-05,
      "loss": 4.1015,
      "step": 561152
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0803858584098283e-05,
      "loss": 4.1156,
      "step": 561664
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.079547263658776e-05,
      "loss": 4.1059,
      "step": 562176
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.078708668907724e-05,
      "loss": 4.097,
      "step": 562688
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.077870074156672e-05,
      "loss": 4.1056,
      "step": 563200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0770314794056196e-05,
      "loss": 4.0959,
      "step": 563712
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.076194522534941e-05,
      "loss": 4.1106,
      "step": 564224
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.075355927783889e-05,
      "loss": 4.1039,
      "step": 564736
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.074517333032837e-05,
      "loss": 4.102,
      "step": 565248
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0736787382817845e-05,
      "loss": 4.1018,
      "step": 565760
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0728401435307325e-05,
      "loss": 4.0917,
      "step": 566272
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.072001548779681e-05,
      "loss": 4.117,
      "step": 566784
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.071162954028629e-05,
      "loss": 4.0981,
      "step": 567296
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.070324359277577e-05,
      "loss": 4.1091,
      "step": 567808
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.069487402406898e-05,
      "loss": 4.0972,
      "step": 568320
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.068648807655846e-05,
      "loss": 4.1022,
      "step": 568832
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.067810212904794e-05,
      "loss": 4.1001,
      "step": 569344
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.066971618153742e-05,
      "loss": 4.1031,
      "step": 569856
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.06613302340269e-05,
      "loss": 4.1069,
      "step": 570368
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.065294428651638e-05,
      "loss": 4.1018,
      "step": 570880
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.064455833900586e-05,
      "loss": 4.1078,
      "step": 571392
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.063617239149534e-05,
      "loss": 4.1167,
      "step": 571904
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.062781920159228e-05,
      "loss": 4.116,
      "step": 572416
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0619433254081766e-05,
      "loss": 4.1081,
      "step": 572928
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0611047306571246e-05,
      "loss": 4.1063,
      "step": 573440
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0602661359060726e-05,
      "loss": 4.0881,
      "step": 573952
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0594275411550206e-05,
      "loss": 4.1041,
      "step": 574464
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0585889464039686e-05,
      "loss": 4.1164,
      "step": 574976
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0577503516529166e-05,
      "loss": 4.1072,
      "step": 575488
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0569117569018645e-05,
      "loss": 4.107,
      "step": 576000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0560731621508125e-05,
      "loss": 4.0882,
      "step": 576512
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0552362052801335e-05,
      "loss": 4.1109,
      "step": 577024
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0543992484094544e-05,
      "loss": 4.0866,
      "step": 577536
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0535606536584024e-05,
      "loss": 4.0994,
      "step": 578048
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0527220589073504e-05,
      "loss": 4.0989,
      "step": 578560
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0518834641562984e-05,
      "loss": 4.1032,
      "step": 579072
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.0510448694052463e-05,
      "loss": 4.1082,
      "step": 579584
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0502062746541943e-05,
      "loss": 4.108,
      "step": 580096
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.049369317783516e-05,
      "loss": 4.0902,
      "step": 580608
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.048530723032464e-05,
      "loss": 4.1072,
      "step": 581120
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.047692128281412e-05,
      "loss": 4.1015,
      "step": 581632
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.04685353353036e-05,
      "loss": 4.0956,
      "step": 582144
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.046014938779308e-05,
      "loss": 4.087,
      "step": 582656
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.045176344028256e-05,
      "loss": 4.1082,
      "step": 583168
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.044337749277203e-05,
      "loss": 4.105,
      "step": 583680
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.043499154526151e-05,
      "loss": 4.1122,
      "step": 584192
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.042660559775099e-05,
      "loss": 4.1005,
      "step": 584704
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.041823602904421e-05,
      "loss": 4.1041,
      "step": 585216
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.040985008153368e-05,
      "loss": 4.0881,
      "step": 585728
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.040146413402317e-05,
      "loss": 4.1107,
      "step": 586240
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.039307818651265e-05,
      "loss": 4.106,
      "step": 586752
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.038470861780586e-05,
      "loss": 4.1036,
      "step": 587264
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.037632267029534e-05,
      "loss": 4.1035,
      "step": 587776
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.036793672278482e-05,
      "loss": 4.0849,
      "step": 588288
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.03595507752743e-05,
      "loss": 4.0829,
      "step": 588800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0351181206567506e-05,
      "loss": 4.0867,
      "step": 589312
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0342795259056986e-05,
      "loss": 4.0941,
      "step": 589824
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0334409311546466e-05,
      "loss": 4.1065,
      "step": 590336
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0326023364035946e-05,
      "loss": 4.1022,
      "step": 590848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0317653795329155e-05,
      "loss": 4.0734,
      "step": 591360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0309267847818635e-05,
      "loss": 4.099,
      "step": 591872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.030088190030812e-05,
      "loss": 4.1055,
      "step": 592384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.029251233160133e-05,
      "loss": 4.1006,
      "step": 592896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.028412638409081e-05,
      "loss": 4.1023,
      "step": 593408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.027574043658029e-05,
      "loss": 4.0955,
      "step": 593920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.026735448906977e-05,
      "loss": 4.0886,
      "step": 594432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.025898492036298e-05,
      "loss": 4.105,
      "step": 594944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0250615351656196e-05,
      "loss": 4.0977,
      "step": 595456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0242229404145675e-05,
      "loss": 4.092,
      "step": 595968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0233843456635155e-05,
      "loss": 4.087,
      "step": 596480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.022545750912463e-05,
      "loss": 4.0896,
      "step": 596992
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.021707156161411e-05,
      "loss": 4.0953,
      "step": 597504
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.020868561410359e-05,
      "loss": 4.0954,
      "step": 598016
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0200299666593075e-05,
      "loss": 4.1026,
      "step": 598528
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0191913719082555e-05,
      "loss": 4.0892,
      "step": 599040
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0183527771572035e-05,
      "loss": 4.0864,
      "step": 599552
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0175141824061515e-05,
      "loss": 4.1063,
      "step": 600064
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0166755876550995e-05,
      "loss": 4.086,
      "step": 600576
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0158369929040475e-05,
      "loss": 4.1017,
      "step": 601088
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0150000360333684e-05,
      "loss": 4.1061,
      "step": 601600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0141614412823164e-05,
      "loss": 4.1006,
      "step": 602112
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0133228465312644e-05,
      "loss": 4.1038,
      "step": 602624
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0124842517802124e-05,
      "loss": 4.0946,
      "step": 603136
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0116456570291604e-05,
      "loss": 4.082,
      "step": 603648
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.0108070622781084e-05,
      "loss": 4.0859,
      "step": 604160
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.009971743287803e-05,
      "loss": 4.0961,
      "step": 604672
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.009133148536751e-05,
      "loss": 4.1015,
      "step": 605184
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.008294553785699e-05,
      "loss": 4.1004,
      "step": 605696
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.00745759691502e-05,
      "loss": 4.0992,
      "step": 606208
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.006619002163968e-05,
      "loss": 4.0909,
      "step": 606720
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.005780407412916e-05,
      "loss": 4.0937,
      "step": 607232
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.004941812661864e-05,
      "loss": 4.0811,
      "step": 607744
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.004103217910812e-05,
      "loss": 4.094,
      "step": 608256
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.00326462315976e-05,
      "loss": 4.1018,
      "step": 608768
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.002426028408708e-05,
      "loss": 4.106,
      "step": 609280
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.001587433657656e-05,
      "loss": 4.0783,
      "step": 609792
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.000750476786977e-05,
      "loss": 4.0928,
      "step": 610304
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.105195999145508,
      "eval_runtime": 543.7497,
      "eval_samples_per_second": 701.777,
      "eval_steps_per_second": 21.931,
      "step": 610552
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.999911882035925e-05,
      "loss": 4.1015,
      "step": 610816
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.999073287284873e-05,
      "loss": 4.0976,
      "step": 611328
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9982346925338207e-05,
      "loss": 4.0886,
      "step": 611840
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.997396097782769e-05,
      "loss": 4.0907,
      "step": 612352
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.99655914091209e-05,
      "loss": 4.078,
      "step": 612864
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.995720546161038e-05,
      "loss": 4.082,
      "step": 613376
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.994881951409986e-05,
      "loss": 4.0812,
      "step": 613888
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.994043356658934e-05,
      "loss": 4.0831,
      "step": 614400
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9932047619078815e-05,
      "loss": 4.0848,
      "step": 614912
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9923661671568295e-05,
      "loss": 4.0933,
      "step": 615424
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.991529210286151e-05,
      "loss": 4.0914,
      "step": 615936
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9906906155350984e-05,
      "loss": 4.0896,
      "step": 616448
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9898520207840464e-05,
      "loss": 4.0996,
      "step": 616960
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9890134260329944e-05,
      "loss": 4.0811,
      "step": 617472
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.988174831281943e-05,
      "loss": 4.0961,
      "step": 617984
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.987336236530891e-05,
      "loss": 4.0757,
      "step": 618496
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.986497641779839e-05,
      "loss": 4.0775,
      "step": 619008
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.98566068490916e-05,
      "loss": 4.0904,
      "step": 619520
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.984822090158108e-05,
      "loss": 4.0876,
      "step": 620032
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.983983495407056e-05,
      "loss": 4.0983,
      "step": 620544
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.983144900656004e-05,
      "loss": 4.0889,
      "step": 621056
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.982306305904952e-05,
      "loss": 4.0904,
      "step": 621568
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9814677111539e-05,
      "loss": 4.0917,
      "step": 622080
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.980629116402848e-05,
      "loss": 4.0846,
      "step": 622592
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.979790521651796e-05,
      "loss": 4.0917,
      "step": 623104
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.97895520266149e-05,
      "loss": 4.0809,
      "step": 623616
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9781166079104385e-05,
      "loss": 4.0918,
      "step": 624128
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9772780131593865e-05,
      "loss": 4.0813,
      "step": 624640
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9764394184083345e-05,
      "loss": 4.097,
      "step": 625152
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.9756008236572825e-05,
      "loss": 4.0773,
      "step": 625664
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9747622289062305e-05,
      "loss": 4.0784,
      "step": 626176
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9739236341551785e-05,
      "loss": 4.0897,
      "step": 626688
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9730850394041265e-05,
      "loss": 4.0876,
      "step": 627200
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.97224972041382e-05,
      "loss": 4.0921,
      "step": 627712
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.971411125662768e-05,
      "loss": 4.0842,
      "step": 628224
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.970572530911716e-05,
      "loss": 4.086,
      "step": 628736
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.969733936160664e-05,
      "loss": 4.0871,
      "step": 629248
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.968895341409612e-05,
      "loss": 4.0954,
      "step": 629760
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.968058384538934e-05,
      "loss": 4.0893,
      "step": 630272
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.967219789787882e-05,
      "loss": 4.0761,
      "step": 630784
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.96638119503683e-05,
      "loss": 4.0755,
      "step": 631296
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.965542600285778e-05,
      "loss": 4.0738,
      "step": 631808
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.964704005534726e-05,
      "loss": 4.0889,
      "step": 632320
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.963865410783674e-05,
      "loss": 4.0858,
      "step": 632832
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.963026816032622e-05,
      "loss": 4.0719,
      "step": 633344
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.96218822128157e-05,
      "loss": 4.0965,
      "step": 633856
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.961351264410891e-05,
      "loss": 4.0771,
      "step": 634368
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.960512669659839e-05,
      "loss": 4.0917,
      "step": 634880
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.959674074908787e-05,
      "loss": 4.0764,
      "step": 635392
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9588371180381076e-05,
      "loss": 4.0707,
      "step": 635904
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9579985232870556e-05,
      "loss": 4.0771,
      "step": 636416
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9571599285360036e-05,
      "loss": 4.0757,
      "step": 636928
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.956321333784952e-05,
      "loss": 4.0786,
      "step": 637440
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9554827390339e-05,
      "loss": 4.0789,
      "step": 637952
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9546441442828476e-05,
      "loss": 4.0783,
      "step": 638464
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.953807187412169e-05,
      "loss": 4.0646,
      "step": 638976
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.952968592661117e-05,
      "loss": 4.0759,
      "step": 639488
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.952129997910065e-05,
      "loss": 4.0678,
      "step": 640000
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9512914031590125e-05,
      "loss": 4.0799,
      "step": 640512
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.950454446288334e-05,
      "loss": 4.0712,
      "step": 641024
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.949615851537282e-05,
      "loss": 4.0759,
      "step": 641536
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9487772567862294e-05,
      "loss": 4.0733,
      "step": 642048
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9479386620351774e-05,
      "loss": 4.064,
      "step": 642560
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.947100067284126e-05,
      "loss": 4.084,
      "step": 643072
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.946261472533074e-05,
      "loss": 4.068,
      "step": 643584
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.945422877782022e-05,
      "loss": 4.0807,
      "step": 644096
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.94458428303097e-05,
      "loss": 4.071,
      "step": 644608
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.943747326160291e-05,
      "loss": 4.0705,
      "step": 645120
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.942908731409239e-05,
      "loss": 4.0713,
      "step": 645632
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.942070136658187e-05,
      "loss": 4.0739,
      "step": 646144
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.941231541907135e-05,
      "loss": 4.0767,
      "step": 646656
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.940394585036456e-05,
      "loss": 4.0739,
      "step": 647168
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.939557628165777e-05,
      "loss": 4.0786,
      "step": 647680
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.938719033414725e-05,
      "loss": 4.0934,
      "step": 648192
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9378820765440464e-05,
      "loss": 4.0826,
      "step": 648704
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9370434817929943e-05,
      "loss": 4.0776,
      "step": 649216
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9362048870419423e-05,
      "loss": 4.0771,
      "step": 649728
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.9353662922908903e-05,
      "loss": 4.0617,
      "step": 650240
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.934527697539838e-05,
      "loss": 4.073,
      "step": 650752
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.933689102788786e-05,
      "loss": 4.0859,
      "step": 651264
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.932852145918107e-05,
      "loss": 4.0837,
      "step": 651776
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.932013551167055e-05,
      "loss": 4.0747,
      "step": 652288
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.931174956416003e-05,
      "loss": 4.0604,
      "step": 652800
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.930336361664951e-05,
      "loss": 4.0795,
      "step": 653312
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.929497766913899e-05,
      "loss": 4.0577,
      "step": 653824
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.92866081004322e-05,
      "loss": 4.0737,
      "step": 654336
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.927822215292168e-05,
      "loss": 4.0718,
      "step": 654848
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.926983620541116e-05,
      "loss": 4.0718,
      "step": 655360
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.926145025790065e-05,
      "loss": 4.0815,
      "step": 655872
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.925306431039013e-05,
      "loss": 4.0824,
      "step": 656384
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.924467836287961e-05,
      "loss": 4.0639,
      "step": 656896
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.923629241536909e-05,
      "loss": 4.071,
      "step": 657408
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.922790646785857e-05,
      "loss": 4.0738,
      "step": 657920
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.921952052034805e-05,
      "loss": 4.066,
      "step": 658432
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.921113457283753e-05,
      "loss": 4.0637,
      "step": 658944
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.920276500413074e-05,
      "loss": 4.0753,
      "step": 659456
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.919437905662022e-05,
      "loss": 4.0796,
      "step": 659968
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.91859931091097e-05,
      "loss": 4.0853,
      "step": 660480
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.917760716159918e-05,
      "loss": 4.0702,
      "step": 660992
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.916922121408866e-05,
      "loss": 4.0758,
      "step": 661504
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9160851645381866e-05,
      "loss": 4.0659,
      "step": 662016
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9152465697871346e-05,
      "loss": 4.0756,
      "step": 662528
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.914407975036083e-05,
      "loss": 4.0829,
      "step": 663040
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.913569380285031e-05,
      "loss": 4.0749,
      "step": 663552
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9127307855339786e-05,
      "loss": 4.0783,
      "step": 664064
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9118921907829266e-05,
      "loss": 4.0555,
      "step": 664576
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9110535960318745e-05,
      "loss": 4.0567,
      "step": 665088
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9102166391611955e-05,
      "loss": 4.0606,
      "step": 665600
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9093780444101435e-05,
      "loss": 4.0693,
      "step": 666112
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9085394496590914e-05,
      "loss": 4.0736,
      "step": 666624
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9077008549080394e-05,
      "loss": 4.0755,
      "step": 667136
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9068622601569874e-05,
      "loss": 4.0463,
      "step": 667648
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9060236654059354e-05,
      "loss": 4.0715,
      "step": 668160
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9051850706548834e-05,
      "loss": 4.0793,
      "step": 668672
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.9043464759038314e-05,
      "loss": 4.0713,
      "step": 669184
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.90350788115278e-05,
      "loss": 4.0739,
      "step": 669696
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.902670924282101e-05,
      "loss": 4.0707,
      "step": 670208
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.901833967411422e-05,
      "loss": 4.0582,
      "step": 670720
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.90099537266037e-05,
      "loss": 4.076,
      "step": 671232
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.900156777909318e-05,
      "loss": 4.0785,
      "step": 671744
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.899318183158266e-05,
      "loss": 4.0629,
      "step": 672256
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.898479588407214e-05,
      "loss": 4.0621,
      "step": 672768
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.897642631536535e-05,
      "loss": 4.0608,
      "step": 673280
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.896804036785483e-05,
      "loss": 4.0681,
      "step": 673792
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.895965442034431e-05,
      "loss": 4.0664,
      "step": 674304
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.895126847283379e-05,
      "loss": 4.0733,
      "step": 674816
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.894288252532327e-05,
      "loss": 4.0671,
      "step": 675328
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8934496577812755e-05,
      "loss": 4.0572,
      "step": 675840
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8926110630302235e-05,
      "loss": 4.0802,
      "step": 676352
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8917724682791715e-05,
      "loss": 4.0597,
      "step": 676864
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8909338735281195e-05,
      "loss": 4.0744,
      "step": 677376
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8900969166574404e-05,
      "loss": 4.0808,
      "step": 677888
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8892583219063884e-05,
      "loss": 4.0729,
      "step": 678400
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8884197271553364e-05,
      "loss": 4.0777,
      "step": 678912
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8875811324042843e-05,
      "loss": 4.0679,
      "step": 679424
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.886745813413978e-05,
      "loss": 4.0567,
      "step": 679936
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.885907218662926e-05,
      "loss": 4.0638,
      "step": 680448
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.885068623911874e-05,
      "loss": 4.0657,
      "step": 680960
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.884230029160822e-05,
      "loss": 4.0789,
      "step": 681472
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.88339143440977e-05,
      "loss": 4.0742,
      "step": 681984
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.882552839658719e-05,
      "loss": 4.0703,
      "step": 682496
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.88171588278804e-05,
      "loss": 4.0686,
      "step": 683008
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.880877288036988e-05,
      "loss": 4.064,
      "step": 683520
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.880038693285936e-05,
      "loss": 4.055,
      "step": 684032
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.879200098534884e-05,
      "loss": 4.0674,
      "step": 684544
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.878361503783832e-05,
      "loss": 4.0757,
      "step": 685056
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.877522909032779e-05,
      "loss": 4.0808,
      "step": 685568
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.876684314281727e-05,
      "loss": 4.0568,
      "step": 686080
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.875845719530675e-05,
      "loss": 4.0627,
      "step": 686592
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.086565971374512,
      "eval_runtime": 543.9251,
      "eval_samples_per_second": 701.551,
      "eval_steps_per_second": 21.924,
      "step": 686871
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.875007124779623e-05,
      "loss": 4.0849,
      "step": 687104
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.874168530028571e-05,
      "loss": 4.069,
      "step": 687616
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.873329935277519e-05,
      "loss": 4.0652,
      "step": 688128
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.872491340526467e-05,
      "loss": 4.0623,
      "step": 688640
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.871652745775416e-05,
      "loss": 4.0575,
      "step": 689152
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.870814151024364e-05,
      "loss": 4.0547,
      "step": 689664
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.869975556273312e-05,
      "loss": 4.0528,
      "step": 690176
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.86913696152226e-05,
      "loss": 4.0593,
      "step": 690688
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.868298366771208e-05,
      "loss": 4.0562,
      "step": 691200
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8674597720201557e-05,
      "loss": 4.0706,
      "step": 691712
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.866621177269103e-05,
      "loss": 4.062,
      "step": 692224
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.865782582518051e-05,
      "loss": 4.0628,
      "step": 692736
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.864943987766999e-05,
      "loss": 4.074,
      "step": 693248
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.864105393015947e-05,
      "loss": 4.058,
      "step": 693760
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.863266798264895e-05,
      "loss": 4.0657,
      "step": 694272
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.862429841394216e-05,
      "loss": 4.0497,
      "step": 694784
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.861591246643164e-05,
      "loss": 4.0524,
      "step": 695296
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8607526518921125e-05,
      "loss": 4.0671,
      "step": 695808
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8599140571410605e-05,
      "loss": 4.0606,
      "step": 696320
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8590754623900085e-05,
      "loss": 4.0745,
      "step": 696832
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8582368676389565e-05,
      "loss": 4.0584,
      "step": 697344
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8573982728879045e-05,
      "loss": 4.0736,
      "step": 697856
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8565596781368525e-05,
      "loss": 4.0601,
      "step": 698368
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8557210833858005e-05,
      "loss": 4.0596,
      "step": 698880
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8548841265151214e-05,
      "loss": 4.0643,
      "step": 699392
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8540455317640694e-05,
      "loss": 4.0609,
      "step": 699904
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8532069370130174e-05,
      "loss": 4.0647,
      "step": 700416
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8523683422619654e-05,
      "loss": 4.0559,
      "step": 700928
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.8515297475109134e-05,
      "loss": 4.0697,
      "step": 701440
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.850692790640234e-05,
      "loss": 4.0563,
      "step": 701952
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.849854195889182e-05,
      "loss": 4.0465,
      "step": 702464
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.849015601138131e-05,
      "loss": 4.0678,
      "step": 702976
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.848177006387079e-05,
      "loss": 4.0615,
      "step": 703488
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.847338411636027e-05,
      "loss": 4.0655,
      "step": 704000
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.846501454765348e-05,
      "loss": 4.0592,
      "step": 704512
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.845662860014296e-05,
      "loss": 4.0573,
      "step": 705024
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.844824265263244e-05,
      "loss": 4.063,
      "step": 705536
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.843985670512192e-05,
      "loss": 4.0661,
      "step": 706048
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.843147075761139e-05,
      "loss": 4.0692,
      "step": 706560
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.842308481010087e-05,
      "loss": 4.0459,
      "step": 707072
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.841469886259035e-05,
      "loss": 4.0527,
      "step": 707584
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.840631291507983e-05,
      "loss": 4.0505,
      "step": 708096
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.839794334637305e-05,
      "loss": 4.0576,
      "step": 708608
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.838955739886253e-05,
      "loss": 4.0635,
      "step": 709120
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.838117145135201e-05,
      "loss": 4.0488,
      "step": 709632
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.837278550384149e-05,
      "loss": 4.0691,
      "step": 710144
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.836439955633097e-05,
      "loss": 4.0501,
      "step": 710656
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8356029987624177e-05,
      "loss": 4.0689,
      "step": 711168
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8347644040113656e-05,
      "loss": 4.0531,
      "step": 711680
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8339258092603136e-05,
      "loss": 4.0465,
      "step": 712192
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8330888523896346e-05,
      "loss": 4.0551,
      "step": 712704
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8322502576385826e-05,
      "loss": 4.0449,
      "step": 713216
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8314116628875305e-05,
      "loss": 4.0588,
      "step": 713728
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8305730681364785e-05,
      "loss": 4.0502,
      "step": 714240
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8297344733854265e-05,
      "loss": 4.0555,
      "step": 714752
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8288958786343745e-05,
      "loss": 4.0357,
      "step": 715264
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8280572838833225e-05,
      "loss": 4.0531,
      "step": 715776
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.827218689132271e-05,
      "loss": 4.0399,
      "step": 716288
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.826380094381219e-05,
      "loss": 4.0572,
      "step": 716800
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.82554313751054e-05,
      "loss": 4.0474,
      "step": 717312
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.824704542759488e-05,
      "loss": 4.0512,
      "step": 717824
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.823865948008436e-05,
      "loss": 4.0475,
      "step": 718336
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.823027353257384e-05,
      "loss": 4.0385,
      "step": 718848
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.822190396386705e-05,
      "loss": 4.0587,
      "step": 719360
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.821351801635653e-05,
      "loss": 4.0465,
      "step": 719872
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.820513206884601e-05,
      "loss": 4.0589,
      "step": 720384
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.819674612133549e-05,
      "loss": 4.0415,
      "step": 720896
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.818836017382497e-05,
      "loss": 4.0494,
      "step": 721408
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.817997422631445e-05,
      "loss": 4.0454,
      "step": 721920
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.817158827880393e-05,
      "loss": 4.0502,
      "step": 722432
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8163218710097146e-05,
      "loss": 4.0479,
      "step": 722944
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8154832762586626e-05,
      "loss": 4.0503,
      "step": 723456
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.8146446815076106e-05,
      "loss": 4.0535,
      "step": 723968
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.813806086756558e-05,
      "loss": 4.0691,
      "step": 724480
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.812967492005506e-05,
      "loss": 4.055,
      "step": 724992
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.812128897254454e-05,
      "loss": 4.0551,
      "step": 725504
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.811290302503402e-05,
      "loss": 4.0527,
      "step": 726016
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.81045170775235e-05,
      "loss": 4.0412,
      "step": 726528
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.809614750881671e-05,
      "loss": 4.0432,
      "step": 727040
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.808776156130619e-05,
      "loss": 4.0577,
      "step": 727552
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.807937561379567e-05,
      "loss": 4.062,
      "step": 728064
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.807098966628515e-05,
      "loss": 4.0533,
      "step": 728576
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.806262009757836e-05,
      "loss": 4.0368,
      "step": 729088
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.805423415006784e-05,
      "loss": 4.0497,
      "step": 729600
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.804584820255732e-05,
      "loss": 4.0357,
      "step": 730112
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.803747863385053e-05,
      "loss": 4.0463,
      "step": 730624
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.802909268634001e-05,
      "loss": 4.048,
      "step": 731136
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.802070673882949e-05,
      "loss": 4.0465,
      "step": 731648
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.801232079131897e-05,
      "loss": 4.0572,
      "step": 732160
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.800393484380845e-05,
      "loss": 4.0535,
      "step": 732672
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.799554889629793e-05,
      "loss": 4.0421,
      "step": 733184
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.798716294878741e-05,
      "loss": 4.0475,
      "step": 733696
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.797877700127689e-05,
      "loss": 4.0515,
      "step": 734208
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.797042381137384e-05,
      "loss": 4.0399,
      "step": 734720
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.796203786386332e-05,
      "loss": 4.0414,
      "step": 735232
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.79536519163528e-05,
      "loss": 4.0504,
      "step": 735744
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.794526596884228e-05,
      "loss": 4.0548,
      "step": 736256
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.793688002133176e-05,
      "loss": 4.0605,
      "step": 736768
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.792849407382124e-05,
      "loss": 4.0438,
      "step": 737280
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.792010812631072e-05,
      "loss": 4.0527,
      "step": 737792
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7911738557603926e-05,
      "loss": 4.0427,
      "step": 738304
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7903352610093406e-05,
      "loss": 4.0519,
      "step": 738816
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7894966662582886e-05,
      "loss": 4.0514,
      "step": 739328
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7886580715072366e-05,
      "loss": 4.0517,
      "step": 739840
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7878194767561846e-05,
      "loss": 4.0554,
      "step": 740352
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7869825198855055e-05,
      "loss": 4.0321,
      "step": 740864
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7861439251344535e-05,
      "loss": 4.0341,
      "step": 741376
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.785305330383402e-05,
      "loss": 4.0354,
      "step": 741888
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.78446673563235e-05,
      "loss": 4.0395,
      "step": 742400
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.783628140881298e-05,
      "loss": 4.0494,
      "step": 742912
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.782789546130246e-05,
      "loss": 4.0505,
      "step": 743424
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.781950951379194e-05,
      "loss": 4.0279,
      "step": 743936
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7811123566281415e-05,
      "loss": 4.047,
      "step": 744448
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.780275399757463e-05,
      "loss": 4.0544,
      "step": 744960
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.779436805006411e-05,
      "loss": 4.0483,
      "step": 745472
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.778598210255359e-05,
      "loss": 4.0509,
      "step": 745984
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.77776125338468e-05,
      "loss": 4.0434,
      "step": 746496
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.776922658633628e-05,
      "loss": 4.0385,
      "step": 747008
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.776084063882576e-05,
      "loss": 4.0478,
      "step": 747520
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.775245469131524e-05,
      "loss": 4.054,
      "step": 748032
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.774406874380472e-05,
      "loss": 4.0441,
      "step": 748544
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.77356827962942e-05,
      "loss": 4.0325,
      "step": 749056
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7727313227587415e-05,
      "loss": 4.0395,
      "step": 749568
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.771892728007689e-05,
      "loss": 4.0437,
      "step": 750080
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.771054133256637e-05,
      "loss": 4.0472,
      "step": 750592
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.770215538505585e-05,
      "loss": 4.044,
      "step": 751104
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.769380219515279e-05,
      "loss": 4.0494,
      "step": 751616
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.768541624764227e-05,
      "loss": 4.0337,
      "step": 752128
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.767703030013175e-05,
      "loss": 4.0559,
      "step": 752640
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.766864435262123e-05,
      "loss": 4.0398,
      "step": 753152
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.766025840511071e-05,
      "loss": 4.0501,
      "step": 753664
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.765187245760019e-05,
      "loss": 4.0565,
      "step": 754176
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.764348651008967e-05,
      "loss": 4.0491,
      "step": 754688
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.763510056257915e-05,
      "loss": 4.0536,
      "step": 755200
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.762671461506863e-05,
      "loss": 4.048,
      "step": 755712
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.761832866755811e-05,
      "loss": 4.0335,
      "step": 756224
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.760994272004759e-05,
      "loss": 4.0408,
      "step": 756736
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.760155677253707e-05,
      "loss": 4.0376,
      "step": 757248
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.759320358263401e-05,
      "loss": 4.0562,
      "step": 757760
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.758481763512349e-05,
      "loss": 4.0501,
      "step": 758272
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.757643168761297e-05,
      "loss": 4.0482,
      "step": 758784
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.756804574010245e-05,
      "loss": 4.0498,
      "step": 759296
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.755965979259193e-05,
      "loss": 4.0403,
      "step": 759808
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.755127384508141e-05,
      "loss": 4.0289,
      "step": 760320
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.75428878975709e-05,
      "loss": 4.0447,
      "step": 760832
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.753450195006038e-05,
      "loss": 4.0535,
      "step": 761344
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7526132381353586e-05,
      "loss": 4.0529,
      "step": 761856
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7517746433843066e-05,
      "loss": 4.0324,
      "step": 762368
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7509360486332546e-05,
      "loss": 4.0409,
      "step": 762880
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.070580005645752,
      "eval_runtime": 563.8279,
      "eval_samples_per_second": 676.786,
      "eval_steps_per_second": 21.15,
      "step": 763190
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7500974538822026e-05,
      "loss": 4.0571,
      "step": 763392
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7492588591311506e-05,
      "loss": 4.0416,
      "step": 763904
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7484202643800986e-05,
      "loss": 4.0453,
      "step": 764416
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7475833075094195e-05,
      "loss": 4.0361,
      "step": 764928
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7467447127583675e-05,
      "loss": 4.0406,
      "step": 765440
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7459061180073155e-05,
      "loss": 4.0302,
      "step": 765952
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7450675232562635e-05,
      "loss": 4.0338,
      "step": 766464
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.744230566385585e-05,
      "loss": 4.0307,
      "step": 766976
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.743391971634533e-05,
      "loss": 4.0358,
      "step": 767488
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.742556652644227e-05,
      "loss": 4.0453,
      "step": 768000
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.741718057893175e-05,
      "loss": 4.0442,
      "step": 768512
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.740879463142123e-05,
      "loss": 4.0397,
      "step": 769024
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.740040868391071e-05,
      "loss": 4.0484,
      "step": 769536
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.739202273640019e-05,
      "loss": 4.0351,
      "step": 770048
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.738363678888967e-05,
      "loss": 4.0468,
      "step": 770560
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.737525084137915e-05,
      "loss": 4.0278,
      "step": 771072
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.736686489386863e-05,
      "loss": 4.0259,
      "step": 771584
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.735847894635811e-05,
      "loss": 4.0469,
      "step": 772096
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.735009299884759e-05,
      "loss": 4.0379,
      "step": 772608
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.734170705133707e-05,
      "loss": 4.0521,
      "step": 773120
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.733332110382655e-05,
      "loss": 4.0325,
      "step": 773632
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7324951535119765e-05,
      "loss": 4.0534,
      "step": 774144
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7316581966412974e-05,
      "loss": 4.0397,
      "step": 774656
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7308196018902454e-05,
      "loss": 4.0393,
      "step": 775168
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7299810071391934e-05,
      "loss": 4.0407,
      "step": 775680
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7291424123881414e-05,
      "loss": 4.0383,
      "step": 776192
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7283038176370894e-05,
      "loss": 4.0422,
      "step": 776704
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.7274652228860374e-05,
      "loss": 4.0311,
      "step": 777216
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.726626628134985e-05,
      "loss": 4.046,
      "step": 777728
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.725788033383933e-05,
      "loss": 4.0388,
      "step": 778240
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.724949438632881e-05,
      "loss": 4.0236,
      "step": 778752
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.724112481762202e-05,
      "loss": 4.0437,
      "step": 779264
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.72327388701115e-05,
      "loss": 4.0399,
      "step": 779776
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.722436930140472e-05,
      "loss": 4.0432,
      "step": 780288
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.72159833538942e-05,
      "loss": 4.0351,
      "step": 780800
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.720759740638367e-05,
      "loss": 4.0395,
      "step": 781312
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.719921145887315e-05,
      "loss": 4.0398,
      "step": 781824
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.719082551136263e-05,
      "loss": 4.0429,
      "step": 782336
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.718243956385211e-05,
      "loss": 4.0459,
      "step": 782848
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.717406999514532e-05,
      "loss": 4.0274,
      "step": 783360
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.71656840476348e-05,
      "loss": 4.0306,
      "step": 783872
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.715729810012428e-05,
      "loss": 4.0281,
      "step": 784384
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.714891215261376e-05,
      "loss": 4.0381,
      "step": 784896
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.714052620510324e-05,
      "loss": 4.0395,
      "step": 785408
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.713214025759272e-05,
      "loss": 4.0245,
      "step": 785920
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.712375431008221e-05,
      "loss": 4.0482,
      "step": 786432
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.711536836257169e-05,
      "loss": 4.0299,
      "step": 786944
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7106998793864896e-05,
      "loss": 4.0462,
      "step": 787456
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7098612846354376e-05,
      "loss": 4.0379,
      "step": 787968
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7090243277647585e-05,
      "loss": 4.0191,
      "step": 788480
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7081857330137065e-05,
      "loss": 4.0313,
      "step": 788992
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7073471382626545e-05,
      "loss": 4.0251,
      "step": 789504
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7065085435116025e-05,
      "loss": 4.0371,
      "step": 790016
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7056699487605505e-05,
      "loss": 4.0292,
      "step": 790528
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7048313540094985e-05,
      "loss": 4.0305,
      "step": 791040
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7039943971388194e-05,
      "loss": 4.0136,
      "step": 791552
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.7031558023877674e-05,
      "loss": 4.0321,
      "step": 792064
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.702317207636716e-05,
      "loss": 4.0206,
      "step": 792576
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.701478612885664e-05,
      "loss": 4.0361,
      "step": 793088
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.700640018134612e-05,
      "loss": 4.0267,
      "step": 793600
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.69980142338356e-05,
      "loss": 4.0273,
      "step": 794112
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.698962828632508e-05,
      "loss": 4.0275,
      "step": 794624
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.698124233881456e-05,
      "loss": 4.0151,
      "step": 795136
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.697287277010777e-05,
      "loss": 4.0343,
      "step": 795648
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.696448682259725e-05,
      "loss": 4.0256,
      "step": 796160
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.695610087508673e-05,
      "loss": 4.0392,
      "step": 796672
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.69477149275762e-05,
      "loss": 4.0212,
      "step": 797184
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.693934535886942e-05,
      "loss": 4.0271,
      "step": 797696
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.69309594113589e-05,
      "loss": 4.0233,
      "step": 798208
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.692257346384838e-05,
      "loss": 4.0274,
      "step": 798720
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.691418751633786e-05,
      "loss": 4.0258,
      "step": 799232
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.6905817947631074e-05,
      "loss": 4.0315,
      "step": 799744
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.6897432000120554e-05,
      "loss": 4.0284,
      "step": 800256
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.6889046052610034e-05,
      "loss": 4.0486,
      "step": 800768
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.688066010509951e-05,
      "loss": 4.0387,
      "step": 801280
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.687227415758899e-05,
      "loss": 4.0323,
      "step": 801792
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.68639045888822e-05,
      "loss": 4.0335,
      "step": 802304
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.685555139897914e-05,
      "loss": 4.0178,
      "step": 802816
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.684716545146862e-05,
      "loss": 4.0293,
      "step": 803328
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.68387795039581e-05,
      "loss": 4.0307,
      "step": 803840
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.683039355644758e-05,
      "loss": 4.0408,
      "step": 804352
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.682200760893707e-05,
      "loss": 4.0338,
      "step": 804864
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.681362166142655e-05,
      "loss": 4.0186,
      "step": 805376
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.680523571391603e-05,
      "loss": 4.0265,
      "step": 805888
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.679684976640551e-05,
      "loss": 4.0167,
      "step": 806400
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.678848019769872e-05,
      "loss": 4.027,
      "step": 806912
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.67800942501882e-05,
      "loss": 4.0234,
      "step": 807424
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.677170830267768e-05,
      "loss": 4.0301,
      "step": 807936
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.676332235516715e-05,
      "loss": 4.0374,
      "step": 808448
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.675493640765663e-05,
      "loss": 4.0345,
      "step": 808960
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6746566838949846e-05,
      "loss": 4.0189,
      "step": 809472
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6738180891439326e-05,
      "loss": 4.0277,
      "step": 809984
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6729794943928806e-05,
      "loss": 4.0298,
      "step": 810496
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6721408996418286e-05,
      "loss": 4.0192,
      "step": 811008
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6713023048907766e-05,
      "loss": 4.0197,
      "step": 811520
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6704637101397246e-05,
      "loss": 4.0289,
      "step": 812032
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6696251153886726e-05,
      "loss": 4.0379,
      "step": 812544
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6687881585179935e-05,
      "loss": 4.0378,
      "step": 813056
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6679495637669415e-05,
      "loss": 4.0261,
      "step": 813568
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6671109690158895e-05,
      "loss": 4.0335,
      "step": 814080
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6662723742648375e-05,
      "loss": 4.023,
      "step": 814592
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6654337795137854e-05,
      "loss": 4.032,
      "step": 815104
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6645951847627334e-05,
      "loss": 4.0266,
      "step": 815616
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6637565900116814e-05,
      "loss": 4.0371,
      "step": 816128
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6629179952606294e-05,
      "loss": 4.033,
      "step": 816640
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6620794005095774e-05,
      "loss": 4.0107,
      "step": 817152
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6612424436388983e-05,
      "loss": 4.0145,
      "step": 817664
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.660403848887847e-05,
      "loss": 4.0146,
      "step": 818176
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.659565254136795e-05,
      "loss": 4.0207,
      "step": 818688
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.658726659385743e-05,
      "loss": 4.028,
      "step": 819200
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.657889702515064e-05,
      "loss": 4.0324,
      "step": 819712
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.657052745644385e-05,
      "loss": 4.0084,
      "step": 820224
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.656214150893333e-05,
      "loss": 4.0226,
      "step": 820736
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.655375556142281e-05,
      "loss": 4.034,
      "step": 821248
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.654536961391229e-05,
      "loss": 4.0291,
      "step": 821760
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.653698366640177e-05,
      "loss": 4.0322,
      "step": 822272
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.652859771889125e-05,
      "loss": 4.0247,
      "step": 822784
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.652021177138073e-05,
      "loss": 4.0121,
      "step": 823296
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.651184220267394e-05,
      "loss": 4.0279,
      "step": 823808
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6503456255163424e-05,
      "loss": 4.0358,
      "step": 824320
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6495070307652904e-05,
      "loss": 4.0213,
      "step": 824832
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6486684360142384e-05,
      "loss": 4.0139,
      "step": 825344
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6478298412631864e-05,
      "loss": 4.0196,
      "step": 825856
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6469912465121344e-05,
      "loss": 4.0212,
      "step": 826368
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.646152651761082e-05,
      "loss": 4.0276,
      "step": 826880
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.64531405701003e-05,
      "loss": 4.0215,
      "step": 827392
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.644477100139351e-05,
      "loss": 4.0333,
      "step": 827904
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6436385053882986e-05,
      "loss": 4.0098,
      "step": 828416
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.64280154851762e-05,
      "loss": 4.0337,
      "step": 828928
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.641962953766568e-05,
      "loss": 4.0199,
      "step": 829440
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.641124359015516e-05,
      "loss": 4.0313,
      "step": 829952
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.640285764264464e-05,
      "loss": 4.0418,
      "step": 830464
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.639447169513412e-05,
      "loss": 4.0246,
      "step": 830976
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.63860857476236e-05,
      "loss": 4.0305,
      "step": 831488
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.637771617891681e-05,
      "loss": 4.0295,
      "step": 832000
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.636933023140629e-05,
      "loss": 4.0138,
      "step": 832512
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.636094428389577e-05,
      "loss": 4.0192,
      "step": 833024
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.635255833638525e-05,
      "loss": 4.0193,
      "step": 833536
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.634417238887473e-05,
      "loss": 4.0314,
      "step": 834048
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.633578644136421e-05,
      "loss": 4.0322,
      "step": 834560
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.632741687265742e-05,
      "loss": 4.0299,
      "step": 835072
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6319047303950635e-05,
      "loss": 4.0264,
      "step": 835584
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6310661356440115e-05,
      "loss": 4.0189,
      "step": 836096
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6302275408929595e-05,
      "loss": 4.01,
      "step": 836608
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6293889461419075e-05,
      "loss": 4.025,
      "step": 837120
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6285503513908555e-05,
      "loss": 4.0317,
      "step": 837632
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6277117566398035e-05,
      "loss": 4.0337,
      "step": 838144
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6268731618887515e-05,
      "loss": 4.0159,
      "step": 838656
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.6260345671376995e-05,
      "loss": 4.0209,
      "step": 839168
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.05756139755249,
      "eval_runtime": 562.3468,
      "eval_samples_per_second": 678.569,
      "eval_steps_per_second": 21.206,
      "step": 839509
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6251976102670204e-05,
      "loss": 4.0473,
      "step": 839680
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6243590155159684e-05,
      "loss": 4.0262,
      "step": 840192
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6235204207649164e-05,
      "loss": 4.0227,
      "step": 840704
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6226818260138644e-05,
      "loss": 4.0178,
      "step": 841216
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6218432312628124e-05,
      "loss": 4.0225,
      "step": 841728
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6210046365117604e-05,
      "loss": 4.0097,
      "step": 842240
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6201660417607084e-05,
      "loss": 4.0117,
      "step": 842752
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6193274470096564e-05,
      "loss": 4.0128,
      "step": 843264
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6184888522586044e-05,
      "loss": 4.0222,
      "step": 843776
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.617650257507553e-05,
      "loss": 4.021,
      "step": 844288
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6168116627565004e-05,
      "loss": 4.0192,
      "step": 844800
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6159730680054484e-05,
      "loss": 4.0222,
      "step": 845312
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6151344732543964e-05,
      "loss": 4.0273,
      "step": 845824
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6142958785033444e-05,
      "loss": 4.0169,
      "step": 846336
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6134572837522923e-05,
      "loss": 4.028,
      "step": 846848
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6126186890012403e-05,
      "loss": 4.0082,
      "step": 847360
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.611780094250188e-05,
      "loss": 4.0058,
      "step": 847872
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.610941499499136e-05,
      "loss": 4.0283,
      "step": 848384
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.610104542628457e-05,
      "loss": 4.0197,
      "step": 848896
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.609265947877405e-05,
      "loss": 4.0281,
      "step": 849408
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.608427353126353e-05,
      "loss": 4.0164,
      "step": 849920
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.607588758375301e-05,
      "loss": 4.0301,
      "step": 850432
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.606751801504623e-05,
      "loss": 4.0186,
      "step": 850944
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.605913206753571e-05,
      "loss": 4.0196,
      "step": 851456
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.605074612002519e-05,
      "loss": 4.0194,
      "step": 851968
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.604236017251467e-05,
      "loss": 4.021,
      "step": 852480
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.603397422500415e-05,
      "loss": 4.0191,
      "step": 852992
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.602558827749363e-05,
      "loss": 4.0115,
      "step": 853504
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.601721870878684e-05,
      "loss": 4.0264,
      "step": 854016
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.600883276127632e-05,
      "loss": 4.016,
      "step": 854528
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.60004468137658e-05,
      "loss": 4.0096,
      "step": 855040
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.599206086625528e-05,
      "loss": 4.0186,
      "step": 855552
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.598367491874476e-05,
      "loss": 4.0187,
      "step": 856064
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.597528897123424e-05,
      "loss": 4.0265,
      "step": 856576
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.596690302372372e-05,
      "loss": 4.0138,
      "step": 857088
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.595853345501693e-05,
      "loss": 4.0192,
      "step": 857600
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.595014750750641e-05,
      "loss": 4.0213,
      "step": 858112
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.594176155999589e-05,
      "loss": 4.0215,
      "step": 858624
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5933375612485366e-05,
      "loss": 4.028,
      "step": 859136
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5924989664974846e-05,
      "loss": 4.0066,
      "step": 859648
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5916603717464326e-05,
      "loss": 4.0133,
      "step": 860160
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5908217769953806e-05,
      "loss": 4.0088,
      "step": 860672
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5899848201247015e-05,
      "loss": 4.0136,
      "step": 861184
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5891462253736495e-05,
      "loss": 4.02,
      "step": 861696
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5883076306225975e-05,
      "loss": 4.0075,
      "step": 862208
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5874690358715455e-05,
      "loss": 4.0258,
      "step": 862720
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5866304411204935e-05,
      "loss": 4.0119,
      "step": 863232
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5857918463694414e-05,
      "loss": 4.0274,
      "step": 863744
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.58495325161839e-05,
      "loss": 4.018,
      "step": 864256
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.584114656867338e-05,
      "loss": 3.9986,
      "step": 864768
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.583277699996659e-05,
      "loss": 4.0099,
      "step": 865280
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.582439105245607e-05,
      "loss": 4.0063,
      "step": 865792
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.581600510494555e-05,
      "loss": 4.0163,
      "step": 866304
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.580763553623876e-05,
      "loss": 4.0076,
      "step": 866816
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.579924958872824e-05,
      "loss": 4.0129,
      "step": 867328
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.579086364121772e-05,
      "loss": 3.9963,
      "step": 867840
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.57824776937072e-05,
      "loss": 4.0119,
      "step": 868352
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.577409174619668e-05,
      "loss": 4.0058,
      "step": 868864
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.576570579868616e-05,
      "loss": 4.0161,
      "step": 869376
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.575731985117564e-05,
      "loss": 3.9997,
      "step": 869888
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5748950282468855e-05,
      "loss": 4.0143,
      "step": 870400
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5740564334958335e-05,
      "loss": 4.0061,
      "step": 870912
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5732178387447815e-05,
      "loss": 3.9993,
      "step": 871424
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5723792439937295e-05,
      "loss": 4.0135,
      "step": 871936
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5715406492426775e-05,
      "loss": 4.0102,
      "step": 872448
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.570702054491625e-05,
      "loss": 4.018,
      "step": 872960
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.569863459740573e-05,
      "loss": 4.0015,
      "step": 873472
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.569024864989521e-05,
      "loss": 4.0119,
      "step": 873984
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5681879081188424e-05,
      "loss": 4.0034,
      "step": 874496
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.56734931336779e-05,
      "loss": 4.0095,
      "step": 875008
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.566510718616738e-05,
      "loss": 4.0052,
      "step": 875520
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.565672123865686e-05,
      "loss": 4.0133,
      "step": 876032
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.564835166995007e-05,
      "loss": 4.0073,
      "step": 876544
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.563996572243955e-05,
      "loss": 4.0299,
      "step": 877056
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.563159615373277e-05,
      "loss": 4.0217,
      "step": 877568
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.562321020622225e-05,
      "loss": 4.0106,
      "step": 878080
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.561482425871172e-05,
      "loss": 4.0117,
      "step": 878592
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.56064383112012e-05,
      "loss": 4.0036,
      "step": 879104
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.559805236369068e-05,
      "loss": 4.0055,
      "step": 879616
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.558966641618016e-05,
      "loss": 4.0146,
      "step": 880128
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.558128046866964e-05,
      "loss": 4.0176,
      "step": 880640
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.557291089996285e-05,
      "loss": 4.021,
      "step": 881152
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.556452495245233e-05,
      "loss": 3.9997,
      "step": 881664
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5556155383745546e-05,
      "loss": 4.0057,
      "step": 882176
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5547769436235026e-05,
      "loss": 3.9958,
      "step": 882688
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5539383488724506e-05,
      "loss": 4.0134,
      "step": 883200
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5530997541213986e-05,
      "loss": 4.0033,
      "step": 883712
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5522611593703466e-05,
      "loss": 4.0107,
      "step": 884224
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5514225646192946e-05,
      "loss": 4.0174,
      "step": 884736
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.5505856077486155e-05,
      "loss": 4.017,
      "step": 885248
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5497470129975635e-05,
      "loss": 3.9999,
      "step": 885760
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5489084182465115e-05,
      "loss": 4.0052,
      "step": 886272
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5480698234954595e-05,
      "loss": 4.0145,
      "step": 886784
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5472312287444075e-05,
      "loss": 4.0027,
      "step": 887296
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5463926339933555e-05,
      "loss": 4.0005,
      "step": 887808
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5455540392423035e-05,
      "loss": 4.0135,
      "step": 888320
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5447154444912515e-05,
      "loss": 4.0176,
      "step": 888832
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5438784876205724e-05,
      "loss": 4.0208,
      "step": 889344
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.543039892869521e-05,
      "loss": 4.011,
      "step": 889856
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.542201298118469e-05,
      "loss": 4.0122,
      "step": 890368
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.541362703367417e-05,
      "loss": 4.002,
      "step": 890880
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.540524108616365e-05,
      "loss": 4.0127,
      "step": 891392
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.539687151745686e-05,
      "loss": 4.0099,
      "step": 891904
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.538848556994634e-05,
      "loss": 4.019,
      "step": 892416
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.538009962243582e-05,
      "loss": 4.0093,
      "step": 892928
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.53717136749253e-05,
      "loss": 3.9958,
      "step": 893440
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.536332772741478e-05,
      "loss": 3.9973,
      "step": 893952
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.535495815870799e-05,
      "loss": 3.9912,
      "step": 894464
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.534657221119747e-05,
      "loss": 4.0042,
      "step": 894976
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.533818626368695e-05,
      "loss": 4.012,
      "step": 895488
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.532980031617643e-05,
      "loss": 4.0103,
      "step": 896000
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.532141436866591e-05,
      "loss": 3.9932,
      "step": 896512
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.531302842115539e-05,
      "loss": 4.001,
      "step": 897024
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.530464247364487e-05,
      "loss": 4.015,
      "step": 897536
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.529625652613435e-05,
      "loss": 4.0139,
      "step": 898048
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.528788695742756e-05,
      "loss": 4.016,
      "step": 898560
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.527950100991704e-05,
      "loss": 4.0053,
      "step": 899072
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.527111506240652e-05,
      "loss": 3.9984,
      "step": 899584
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5262729114896e-05,
      "loss": 4.0085,
      "step": 900096
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5254359546189206e-05,
      "loss": 4.0142,
      "step": 900608
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.524598997748242e-05,
      "loss": 4.005,
      "step": 901120
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.52376040299719e-05,
      "loss": 3.9951,
      "step": 901632
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.522921808246138e-05,
      "loss": 4.0012,
      "step": 902144
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.522083213495086e-05,
      "loss": 4.0065,
      "step": 902656
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.521244618744034e-05,
      "loss": 4.0098,
      "step": 903168
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.520406023992982e-05,
      "loss": 4.003,
      "step": 903680
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.51956742924193e-05,
      "loss": 4.0162,
      "step": 904192
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.518730472371251e-05,
      "loss": 3.9942,
      "step": 904704
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.517891877620199e-05,
      "loss": 4.0162,
      "step": 905216
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.517053282869147e-05,
      "loss": 4.0025,
      "step": 905728
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.516214688118095e-05,
      "loss": 4.0148,
      "step": 906240
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.515377731247416e-05,
      "loss": 4.0209,
      "step": 906752
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.514539136496364e-05,
      "loss": 4.0125,
      "step": 907264
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.513700541745312e-05,
      "loss": 4.0087,
      "step": 907776
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.51286194699426e-05,
      "loss": 4.0143,
      "step": 908288
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.512023352243209e-05,
      "loss": 3.9932,
      "step": 908800
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5111863953725296e-05,
      "loss": 4.0028,
      "step": 909312
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5103478006214776e-05,
      "loss": 3.999,
      "step": 909824
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5095092058704256e-05,
      "loss": 4.0172,
      "step": 910336
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5086706111193736e-05,
      "loss": 4.0142,
      "step": 910848
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5078320163683216e-05,
      "loss": 4.0167,
      "step": 911360
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5069950594976425e-05,
      "loss": 4.0099,
      "step": 911872
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5061564647465905e-05,
      "loss": 4.0005,
      "step": 912384
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5053178699955385e-05,
      "loss": 3.9941,
      "step": 912896
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5044792752444865e-05,
      "loss": 4.0084,
      "step": 913408
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5036423183738074e-05,
      "loss": 4.0124,
      "step": 913920
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.5028037236227554e-05,
      "loss": 4.0196,
      "step": 914432
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.501965128871704e-05,
      "loss": 3.9953,
      "step": 914944
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.501126534120652e-05,
      "loss": 4.0041,
      "step": 915456
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.046953201293945,
      "eval_runtime": 553.0572,
      "eval_samples_per_second": 689.967,
      "eval_steps_per_second": 21.562,
      "step": 915828
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5002879393696e-05,
      "loss": 4.0287,
      "step": 915968
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.499449344618548e-05,
      "loss": 4.0096,
      "step": 916480
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.498610749867496e-05,
      "loss": 4.0052,
      "step": 916992
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.497772155116444e-05,
      "loss": 4.0028,
      "step": 917504
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.496933560365392e-05,
      "loss": 4.0059,
      "step": 918016
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.496094965614339e-05,
      "loss": 3.9932,
      "step": 918528
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.495256370863287e-05,
      "loss": 3.9964,
      "step": 919040
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.494417776112235e-05,
      "loss": 3.9907,
      "step": 919552
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.493579181361183e-05,
      "loss": 4.009,
      "step": 920064
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.492740586610131e-05,
      "loss": 4.0005,
      "step": 920576
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.491901991859079e-05,
      "loss": 4.0034,
      "step": 921088
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.491063397108027e-05,
      "loss": 4.0053,
      "step": 921600
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.490224802356975e-05,
      "loss": 4.0072,
      "step": 922112
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.489386207605923e-05,
      "loss": 4.0029,
      "step": 922624
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.488547612854872e-05,
      "loss": 4.0093,
      "step": 923136
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.48770901810382e-05,
      "loss": 3.9937,
      "step": 923648
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.486870423352768e-05,
      "loss": 3.9875,
      "step": 924160
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.486031828601716e-05,
      "loss": 4.0085,
      "step": 924672
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.485194871731037e-05,
      "loss": 4.001,
      "step": 925184
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.484356276979985e-05,
      "loss": 4.0141,
      "step": 925696
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.483517682228933e-05,
      "loss": 3.9965,
      "step": 926208
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.482679087477881e-05,
      "loss": 4.0142,
      "step": 926720
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.481840492726828e-05,
      "loss": 4.0032,
      "step": 927232
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.481001897975776e-05,
      "loss": 3.9991,
      "step": 927744
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.480164941105098e-05,
      "loss": 4.0038,
      "step": 928256
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.479326346354046e-05,
      "loss": 4.003,
      "step": 928768
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.478487751602994e-05,
      "loss": 4.0071,
      "step": 929280
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.477649156851942e-05,
      "loss": 3.999,
      "step": 929792
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.47681056210089e-05,
      "loss": 4.0027,
      "step": 930304
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.475971967349838e-05,
      "loss": 4.0055,
      "step": 930816
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.475133372598786e-05,
      "loss": 3.9904,
      "step": 931328
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.474294777847734e-05,
      "loss": 3.9988,
      "step": 931840
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.473456183096682e-05,
      "loss": 4.0027,
      "step": 932352
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4726192262260026e-05,
      "loss": 4.0125,
      "step": 932864
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4717806314749506e-05,
      "loss": 3.9968,
      "step": 933376
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4709420367238986e-05,
      "loss": 4.0006,
      "step": 933888
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4701050798532195e-05,
      "loss": 4.0085,
      "step": 934400
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4692664851021675e-05,
      "loss": 4.0021,
      "step": 934912
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4684278903511155e-05,
      "loss": 4.0149,
      "step": 935424
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.467589295600064e-05,
      "loss": 3.9906,
      "step": 935936
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.466750700849012e-05,
      "loss": 3.9987,
      "step": 936448
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.46591210609796e-05,
      "loss": 3.9917,
      "step": 936960
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.465075149227281e-05,
      "loss": 3.9991,
      "step": 937472
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.464236554476229e-05,
      "loss": 4.0044,
      "step": 937984
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.463397959725177e-05,
      "loss": 3.9941,
      "step": 938496
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.462559364974125e-05,
      "loss": 4.0059,
      "step": 939008
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.461720770223073e-05,
      "loss": 3.997,
      "step": 939520
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.460882175472021e-05,
      "loss": 4.0118,
      "step": 940032
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.460043580720969e-05,
      "loss": 4.0021,
      "step": 940544
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.459204985969917e-05,
      "loss": 3.9777,
      "step": 941056
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4583663912188644e-05,
      "loss": 3.9943,
      "step": 941568
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4575310722285595e-05,
      "loss": 3.9904,
      "step": 942080
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4566924774775075e-05,
      "loss": 4.0008,
      "step": 942592
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4558538827264555e-05,
      "loss": 3.9919,
      "step": 943104
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4550152879754035e-05,
      "loss": 3.9979,
      "step": 943616
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4541766932243515e-05,
      "loss": 3.9781,
      "step": 944128
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4533380984732995e-05,
      "loss": 3.9962,
      "step": 944640
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.452499503722247e-05,
      "loss": 3.99,
      "step": 945152
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.451660908971195e-05,
      "loss": 4.0012,
      "step": 945664
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4508239521005164e-05,
      "loss": 3.9831,
      "step": 946176
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.449986995229837e-05,
      "loss": 3.9918,
      "step": 946688
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.449148400478785e-05,
      "loss": 3.9932,
      "step": 947200
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.448309805727733e-05,
      "loss": 3.9858,
      "step": 947712
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.447471210976681e-05,
      "loss": 3.9938,
      "step": 948224
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.446632616225629e-05,
      "loss": 3.9963,
      "step": 948736
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.445795659354951e-05,
      "loss": 4.0007,
      "step": 949248
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.444957064603899e-05,
      "loss": 3.9866,
      "step": 949760
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.444118469852847e-05,
      "loss": 3.9926,
      "step": 950272
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.443279875101794e-05,
      "loss": 3.9888,
      "step": 950784
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.442441280350742e-05,
      "loss": 3.9905,
      "step": 951296
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.44160268559969e-05,
      "loss": 3.9899,
      "step": 951808
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.440764090848638e-05,
      "loss": 4.0,
      "step": 952320
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.439925496097586e-05,
      "loss": 3.9907,
      "step": 952832
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.439088539226907e-05,
      "loss": 4.0128,
      "step": 953344
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.438249944475855e-05,
      "loss": 4.0056,
      "step": 953856
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.437411349724803e-05,
      "loss": 3.9976,
      "step": 954368
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.436572754973751e-05,
      "loss": 3.9959,
      "step": 954880
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.435735798103073e-05,
      "loss": 3.9895,
      "step": 955392
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.434897203352021e-05,
      "loss": 3.9849,
      "step": 955904
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.434058608600969e-05,
      "loss": 4.0019,
      "step": 956416
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4332216517302896e-05,
      "loss": 4.005,
      "step": 956928
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4323830569792376e-05,
      "loss": 4.0001,
      "step": 957440
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4315444622281856e-05,
      "loss": 3.9855,
      "step": 957952
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4307058674771336e-05,
      "loss": 3.9927,
      "step": 958464
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4298672727260816e-05,
      "loss": 3.9815,
      "step": 958976
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4290286779750296e-05,
      "loss": 3.9964,
      "step": 959488
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4281917211043505e-05,
      "loss": 3.9859,
      "step": 960000
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4273531263532985e-05,
      "loss": 3.9933,
      "step": 960512
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4265145316022465e-05,
      "loss": 4.0024,
      "step": 961024
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.425675936851195e-05,
      "loss": 4.0008,
      "step": 961536
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.424837342100143e-05,
      "loss": 3.9836,
      "step": 962048
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.423998747349091e-05,
      "loss": 3.992,
      "step": 962560
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.423160152598039e-05,
      "loss": 3.9995,
      "step": 963072
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.422321557846987e-05,
      "loss": 3.9841,
      "step": 963584
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.421482963095935e-05,
      "loss": 3.9822,
      "step": 964096
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.420646006225256e-05,
      "loss": 3.9949,
      "step": 964608
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.419807411474204e-05,
      "loss": 3.9999,
      "step": 965120
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.418968816723152e-05,
      "loss": 4.0044,
      "step": 965632
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4181302219721e-05,
      "loss": 3.9955,
      "step": 966144
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.417293265101421e-05,
      "loss": 4.0001,
      "step": 966656
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.416454670350369e-05,
      "loss": 3.9839,
      "step": 967168
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.415616075599317e-05,
      "loss": 3.9962,
      "step": 967680
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.414777480848265e-05,
      "loss": 3.996,
      "step": 968192
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4139405239775865e-05,
      "loss": 4.0068,
      "step": 968704
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4131035671069074e-05,
      "loss": 3.992,
      "step": 969216
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4122649723558554e-05,
      "loss": 3.9869,
      "step": 969728
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4114263776048034e-05,
      "loss": 3.9787,
      "step": 970240
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4105877828537514e-05,
      "loss": 3.9785,
      "step": 970752
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4097491881026994e-05,
      "loss": 3.9853,
      "step": 971264
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.4089105933516474e-05,
      "loss": 3.9993,
      "step": 971776
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.408071998600595e-05,
      "loss": 3.9937,
      "step": 972288
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.407233403849543e-05,
      "loss": 3.9784,
      "step": 972800
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.406394809098491e-05,
      "loss": 3.9864,
      "step": 973312
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.405557852227812e-05,
      "loss": 3.9983,
      "step": 973824
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.40471925747676e-05,
      "loss": 3.9996,
      "step": 974336
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.403880662725708e-05,
      "loss": 3.9988,
      "step": 974848
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.403042067974656e-05,
      "loss": 3.9924,
      "step": 975360
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.402205111103978e-05,
      "loss": 3.9841,
      "step": 975872
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.401366516352925e-05,
      "loss": 3.9921,
      "step": 976384
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.400527921601873e-05,
      "loss": 3.9989,
      "step": 976896
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.399690964731195e-05,
      "loss": 3.9907,
      "step": 977408
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.398852369980142e-05,
      "loss": 3.9783,
      "step": 977920
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.39801377522909e-05,
      "loss": 3.9884,
      "step": 978432
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.397175180478038e-05,
      "loss": 3.9858,
      "step": 978944
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3963382236073596e-05,
      "loss": 3.9954,
      "step": 979456
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3954996288563076e-05,
      "loss": 3.9884,
      "step": 979968
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3946610341052556e-05,
      "loss": 3.9982,
      "step": 980480
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3938224393542036e-05,
      "loss": 3.9813,
      "step": 980992
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3929838446031516e-05,
      "loss": 4.0015,
      "step": 981504
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3921452498520996e-05,
      "loss": 3.9866,
      "step": 982016
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3913066551010476e-05,
      "loss": 3.9986,
      "step": 982528
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3904680603499956e-05,
      "loss": 4.0009,
      "step": 983040
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3896294655989436e-05,
      "loss": 4.0009,
      "step": 983552
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3887908708478916e-05,
      "loss": 3.9946,
      "step": 984064
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3879522760968396e-05,
      "loss": 3.9997,
      "step": 984576
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3871136813457876e-05,
      "loss": 3.9802,
      "step": 985088
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3862767244751085e-05,
      "loss": 3.9829,
      "step": 985600
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3854381297240565e-05,
      "loss": 3.9853,
      "step": 986112
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3846011728533774e-05,
      "loss": 3.9966,
      "step": 986624
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.383762578102326e-05,
      "loss": 4.003,
      "step": 987136
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.382923983351274e-05,
      "loss": 3.9978,
      "step": 987648
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.382085388600222e-05,
      "loss": 3.9968,
      "step": 988160
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.38124679384917e-05,
      "loss": 3.9831,
      "step": 988672
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.380408199098118e-05,
      "loss": 3.9775,
      "step": 989184
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.379569604347066e-05,
      "loss": 3.9921,
      "step": 989696
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.378731009596014e-05,
      "loss": 3.997,
      "step": 990208
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3778924148449614e-05,
      "loss": 4.005,
      "step": 990720
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.377055457974283e-05,
      "loss": 3.9834,
      "step": 991232
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.376216863223231e-05,
      "loss": 3.9838,
      "step": 991744
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.03767728805542,
      "eval_runtime": 540.9831,
      "eval_samples_per_second": 705.366,
      "eval_steps_per_second": 22.043,
      "step": 992147
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.375379906352552e-05,
      "loss": 3.9709,
      "step": 992256
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.3745413116015e-05,
      "loss": 4.0075,
      "step": 992768
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.373702716850448e-05,
      "loss": 3.9834,
      "step": 993280
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.372864122099396e-05,
      "loss": 3.9716,
      "step": 993792
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.372025527348344e-05,
      "loss": 3.9949,
      "step": 994304
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.371186932597292e-05,
      "loss": 3.9867,
      "step": 994816
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.37034833784624e-05,
      "loss": 3.9916,
      "step": 995328
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.369509743095188e-05,
      "loss": 3.9813,
      "step": 995840
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.368671148344136e-05,
      "loss": 3.9862,
      "step": 996352
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.367832553593084e-05,
      "loss": 3.9707,
      "step": 996864
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.366993958842032e-05,
      "loss": 4.0025,
      "step": 997376
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.36615536409098e-05,
      "loss": 3.9834,
      "step": 997888
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.365316769339928e-05,
      "loss": 3.9916,
      "step": 998400
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.364478174588876e-05,
      "loss": 3.9972,
      "step": 998912
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.363639579837824e-05,
      "loss": 3.9873,
      "step": 999424
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.362800985086772e-05,
      "loss": 3.9775,
      "step": 999936
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.36196239033572e-05,
      "loss": 3.9974,
      "step": 1000448
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.3611254334650414e-05,
      "loss": 3.9894,
      "step": 1000960
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.3602868387139894e-05,
      "loss": 3.9817,
      "step": 1001472
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.3594482439629374e-05,
      "loss": 4.0012,
      "step": 1001984
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.3586096492118854e-05,
      "loss": 4.0094,
      "step": 1002496
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.357771054460833e-05,
      "loss": 3.991,
      "step": 1003008
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.356934097590154e-05,
      "loss": 3.9815,
      "step": 1003520
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.356095502839102e-05,
      "loss": 3.9918,
      "step": 1004032
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.35525690808805e-05,
      "loss": 3.9836,
      "step": 1004544
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.3544183133369976e-05,
      "loss": 3.9827,
      "step": 1005056
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.353581356466319e-05,
      "loss": 3.9881,
      "step": 1005568
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.352742761715267e-05,
      "loss": 3.9813,
      "step": 1006080
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.351904166964215e-05,
      "loss": 3.9861,
      "step": 1006592
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.351065572213163e-05,
      "loss": 3.9933,
      "step": 1007104
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.350226977462111e-05,
      "loss": 3.9933,
      "step": 1007616
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.349388382711059e-05,
      "loss": 3.9876,
      "step": 1008128
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.34855142584038e-05,
      "loss": 3.9884,
      "step": 1008640
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.347712831089328e-05,
      "loss": 3.9996,
      "step": 1009152
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.346874236338276e-05,
      "loss": 3.9786,
      "step": 1009664
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.346035641587224e-05,
      "loss": 3.9876,
      "step": 1010176
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.345197046836172e-05,
      "loss": 3.9829,
      "step": 1010688
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.34435845208512e-05,
      "loss": 3.9976,
      "step": 1011200
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.343519857334068e-05,
      "loss": 3.9975,
      "step": 1011712
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.342681262583016e-05,
      "loss": 3.9996,
      "step": 1012224
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.341842667831964e-05,
      "loss": 3.993,
      "step": 1012736
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.341004073080912e-05,
      "loss": 3.9925,
      "step": 1013248
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.34016547832986e-05,
      "loss": 3.9815,
      "step": 1013760
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.339326883578808e-05,
      "loss": 3.9799,
      "step": 1014272
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3384899267081296e-05,
      "loss": 3.9743,
      "step": 1014784
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3376513319570776e-05,
      "loss": 3.9721,
      "step": 1015296
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3368127372060256e-05,
      "loss": 3.9801,
      "step": 1015808
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3359741424549736e-05,
      "loss": 3.9844,
      "step": 1016320
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3351371855842945e-05,
      "loss": 3.9841,
      "step": 1016832
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3343002287136154e-05,
      "loss": 3.9855,
      "step": 1017344
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3334616339625634e-05,
      "loss": 3.9717,
      "step": 1017856
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3326230392115114e-05,
      "loss": 3.9846,
      "step": 1018368
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3317844444604594e-05,
      "loss": 3.9857,
      "step": 1018880
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3309458497094074e-05,
      "loss": 3.9688,
      "step": 1019392
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3301072549583554e-05,
      "loss": 3.9726,
      "step": 1019904
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3292686602073034e-05,
      "loss": 3.9812,
      "step": 1020416
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3284300654562514e-05,
      "loss": 3.9948,
      "step": 1020928
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3275914707051994e-05,
      "loss": 3.9659,
      "step": 1021440
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.326754513834521e-05,
      "loss": 3.9863,
      "step": 1021952
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.325915919083469e-05,
      "loss": 3.9878,
      "step": 1022464
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.325077324332416e-05,
      "loss": 3.9739,
      "step": 1022976
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.324240367461738e-05,
      "loss": 3.9718,
      "step": 1023488
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.323401772710686e-05,
      "loss": 3.9678,
      "step": 1024000
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.322563177959633e-05,
      "loss": 3.9712,
      "step": 1024512
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.321724583208581e-05,
      "loss": 3.9926,
      "step": 1025024
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.320885988457529e-05,
      "loss": 3.994,
      "step": 1025536
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.320047393706477e-05,
      "loss": 3.9766,
      "step": 1026048
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.319208798955425e-05,
      "loss": 3.983,
      "step": 1026560
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.318370204204374e-05,
      "loss": 3.9886,
      "step": 1027072
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.317533247333695e-05,
      "loss": 3.976,
      "step": 1027584
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.316694652582643e-05,
      "loss": 3.9898,
      "step": 1028096
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.315856057831591e-05,
      "loss": 3.9799,
      "step": 1028608
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.315017463080539e-05,
      "loss": 3.9805,
      "step": 1029120
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.314178868329487e-05,
      "loss": 3.9757,
      "step": 1029632
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3133419114588076e-05,
      "loss": 3.9926,
      "step": 1030144
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3125049545881285e-05,
      "loss": 3.9801,
      "step": 1030656
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3116663598370765e-05,
      "loss": 3.99,
      "step": 1031168
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3108277650860245e-05,
      "loss": 3.9852,
      "step": 1031680
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3099891703349725e-05,
      "loss": 3.984,
      "step": 1032192
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3091505755839205e-05,
      "loss": 3.9782,
      "step": 1032704
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.308311980832869e-05,
      "loss": 3.984,
      "step": 1033216
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.307473386081817e-05,
      "loss": 3.9751,
      "step": 1033728
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.306636429211138e-05,
      "loss": 3.9748,
      "step": 1034240
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.305797834460086e-05,
      "loss": 3.9778,
      "step": 1034752
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.304959239709034e-05,
      "loss": 3.9703,
      "step": 1035264
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.304120644957982e-05,
      "loss": 3.9846,
      "step": 1035776
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.30328205020693e-05,
      "loss": 3.9711,
      "step": 1036288
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.302443455455878e-05,
      "loss": 3.9805,
      "step": 1036800
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.301604860704826e-05,
      "loss": 3.9672,
      "step": 1037312
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.300766265953774e-05,
      "loss": 3.9872,
      "step": 1037824
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.299927671202722e-05,
      "loss": 3.9873,
      "step": 1038336
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.299090714332043e-05,
      "loss": 3.9889,
      "step": 1038848
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.298252119580991e-05,
      "loss": 3.9852,
      "step": 1039360
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.297413524829939e-05,
      "loss": 3.973,
      "step": 1039872
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.296574930078887e-05,
      "loss": 3.9834,
      "step": 1040384
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.295736335327835e-05,
      "loss": 3.9783,
      "step": 1040896
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2948993784571565e-05,
      "loss": 3.9774,
      "step": 1041408
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2940624215864775e-05,
      "loss": 3.9683,
      "step": 1041920
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2932238268354254e-05,
      "loss": 3.973,
      "step": 1042432
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2923852320843734e-05,
      "loss": 3.9888,
      "step": 1042944
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2915466373333214e-05,
      "loss": 3.9801,
      "step": 1043456
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2907080425822694e-05,
      "loss": 3.9879,
      "step": 1043968
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.289869447831217e-05,
      "loss": 3.9793,
      "step": 1044480
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.289030853080165e-05,
      "loss": 3.9793,
      "step": 1044992
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.288192258329113e-05,
      "loss": 3.9695,
      "step": 1045504
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.287355301458434e-05,
      "loss": 3.9731,
      "step": 1046016
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.286516706707382e-05,
      "loss": 3.9854,
      "step": 1046528
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.28567811195633e-05,
      "loss": 3.9848,
      "step": 1047040
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.284839517205278e-05,
      "loss": 3.973,
      "step": 1047552
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.284000922454226e-05,
      "loss": 3.966,
      "step": 1048064
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.283162327703174e-05,
      "loss": 3.98,
      "step": 1048576
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.282323732952122e-05,
      "loss": 3.984,
      "step": 1049088
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.28148513820107e-05,
      "loss": 3.9662,
      "step": 1049600
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.280648181330391e-05,
      "loss": 3.9796,
      "step": 1050112
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.279811224459712e-05,
      "loss": 3.9718,
      "step": 1050624
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.27897262970866e-05,
      "loss": 3.9734,
      "step": 1051136
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.278134034957608e-05,
      "loss": 3.9681,
      "step": 1051648
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.277295440206556e-05,
      "loss": 3.9799,
      "step": 1052160
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.276456845455505e-05,
      "loss": 3.9786,
      "step": 1052672
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.275618250704453e-05,
      "loss": 3.9757,
      "step": 1053184
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.274779655953401e-05,
      "loss": 3.9865,
      "step": 1053696
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.273941061202349e-05,
      "loss": 3.9937,
      "step": 1054208
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.27310410433167e-05,
      "loss": 3.9775,
      "step": 1054720
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.272265509580618e-05,
      "loss": 3.9811,
      "step": 1055232
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2714285527099386e-05,
      "loss": 3.9783,
      "step": 1055744
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2705899579588866e-05,
      "loss": 3.981,
      "step": 1056256
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2697513632078346e-05,
      "loss": 3.9788,
      "step": 1056768
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2689127684567826e-05,
      "loss": 3.9737,
      "step": 1057280
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2680741737057306e-05,
      "loss": 3.9782,
      "step": 1057792
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2672355789546786e-05,
      "loss": 3.9753,
      "step": 1058304
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.266398622084e-05,
      "loss": 3.9914,
      "step": 1058816
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.265560027332948e-05,
      "loss": 3.9759,
      "step": 1059328
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.264721432581896e-05,
      "loss": 3.9909,
      "step": 1059840
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.263882837830844e-05,
      "loss": 3.9764,
      "step": 1060352
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.263044243079792e-05,
      "loss": 3.9846,
      "step": 1060864
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.26220564832874e-05,
      "loss": 3.9781,
      "step": 1061376
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.261367053577688e-05,
      "loss": 3.9829,
      "step": 1061888
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.260530096707009e-05,
      "loss": 3.9893,
      "step": 1062400
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.259691501955957e-05,
      "loss": 3.9745,
      "step": 1062912
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.258852907204905e-05,
      "loss": 3.9807,
      "step": 1063424
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.258014312453853e-05,
      "loss": 3.9843,
      "step": 1063936
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.2571757177028e-05,
      "loss": 3.9735,
      "step": 1064448
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.256337122951748e-05,
      "loss": 3.9757,
      "step": 1064960
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.255498528200697e-05,
      "loss": 3.9836,
      "step": 1065472
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.254659933449645e-05,
      "loss": 3.9799,
      "step": 1065984
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.253822976578966e-05,
      "loss": 3.9809,
      "step": 1066496
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.252984381827914e-05,
      "loss": 3.9799,
      "step": 1067008
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.252145787076862e-05,
      "loss": 3.9752,
      "step": 1067520
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.25130719232581e-05,
      "loss": 3.9707,
      "step": 1068032
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.028290271759033,
      "eval_runtime": 293.3806,
      "eval_samples_per_second": 1300.669,
      "eval_steps_per_second": 40.647,
      "step": 1068467
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.250468597574758e-05,
      "loss": 3.9771,
      "step": 1068544
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.249631640704079e-05,
      "loss": 3.9926,
      "step": 1069056
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2487946838334004e-05,
      "loss": 3.9735,
      "step": 1069568
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.247956089082348e-05,
      "loss": 3.9582,
      "step": 1070080
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.247117494331296e-05,
      "loss": 3.9766,
      "step": 1070592
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.246278899580244e-05,
      "loss": 3.9758,
      "step": 1071104
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2454403048291924e-05,
      "loss": 3.9773,
      "step": 1071616
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2446017100781404e-05,
      "loss": 3.9681,
      "step": 1072128
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2437631153270884e-05,
      "loss": 3.9681,
      "step": 1072640
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2429245205760364e-05,
      "loss": 3.964,
      "step": 1073152
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.242087563705357e-05,
      "loss": 3.9851,
      "step": 1073664
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.241248968954305e-05,
      "loss": 3.9702,
      "step": 1074176
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.240410374203253e-05,
      "loss": 3.9795,
      "step": 1074688
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.239571779452201e-05,
      "loss": 3.9823,
      "step": 1075200
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.238734822581522e-05,
      "loss": 3.978,
      "step": 1075712
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.23789622783047e-05,
      "loss": 3.9636,
      "step": 1076224
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.237059270959791e-05,
      "loss": 3.9793,
      "step": 1076736
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.236220676208739e-05,
      "loss": 3.9765,
      "step": 1077248
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.235382081457688e-05,
      "loss": 3.9654,
      "step": 1077760
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.234543486706636e-05,
      "loss": 3.9866,
      "step": 1078272
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.233704891955584e-05,
      "loss": 3.9971,
      "step": 1078784
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.232866297204532e-05,
      "loss": 3.9829,
      "step": 1079296
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.23202770245348e-05,
      "loss": 3.9673,
      "step": 1079808
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.231189107702428e-05,
      "loss": 3.9741,
      "step": 1080320
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2303521508317486e-05,
      "loss": 3.9703,
      "step": 1080832
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2295135560806966e-05,
      "loss": 3.9714,
      "step": 1081344
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2286782370903904e-05,
      "loss": 3.9767,
      "step": 1081856
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2278396423393384e-05,
      "loss": 3.9663,
      "step": 1082368
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2270010475882864e-05,
      "loss": 3.9692,
      "step": 1082880
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.2261624528372344e-05,
      "loss": 3.9824,
      "step": 1083392
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.2253238580861824e-05,
      "loss": 3.9764,
      "step": 1083904
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.224485263335131e-05,
      "loss": 3.9774,
      "step": 1084416
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.223648306464452e-05,
      "loss": 3.9743,
      "step": 1084928
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.2228097117134e-05,
      "loss": 3.9839,
      "step": 1085440
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.221971116962348e-05,
      "loss": 3.9631,
      "step": 1085952
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.221132522211296e-05,
      "loss": 3.9749,
      "step": 1086464
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.220293927460244e-05,
      "loss": 3.9665,
      "step": 1086976
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.219455332709192e-05,
      "loss": 3.9888,
      "step": 1087488
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.21861673795814e-05,
      "loss": 3.9793,
      "step": 1088000
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.217778143207088e-05,
      "loss": 3.9883,
      "step": 1088512
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.216939548456036e-05,
      "loss": 3.9823,
      "step": 1089024
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.216102591585357e-05,
      "loss": 3.9765,
      "step": 1089536
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.215263996834305e-05,
      "loss": 3.9701,
      "step": 1090048
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.214425402083253e-05,
      "loss": 3.9639,
      "step": 1090560
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.213586807332201e-05,
      "loss": 3.9662,
      "step": 1091072
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.212748212581149e-05,
      "loss": 3.9595,
      "step": 1091584
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.211909617830097e-05,
      "loss": 3.9694,
      "step": 1092096
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.211071023079045e-05,
      "loss": 3.9712,
      "step": 1092608
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.210232428327993e-05,
      "loss": 3.9704,
      "step": 1093120
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.209393833576941e-05,
      "loss": 3.969,
      "step": 1093632
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.208556876706262e-05,
      "loss": 3.962,
      "step": 1094144
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.20771828195521e-05,
      "loss": 3.9692,
      "step": 1094656
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.206879687204158e-05,
      "loss": 3.9773,
      "step": 1095168
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.206041092453106e-05,
      "loss": 3.9474,
      "step": 1095680
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.2052041355824267e-05,
      "loss": 3.9651,
      "step": 1096192
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.204367178711748e-05,
      "loss": 3.9628,
      "step": 1096704
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.203528583960696e-05,
      "loss": 3.982,
      "step": 1097216
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.202689989209644e-05,
      "loss": 3.9508,
      "step": 1097728
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.201851394458592e-05,
      "loss": 3.9757,
      "step": 1098240
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.20101279970754e-05,
      "loss": 3.9686,
      "step": 1098752
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.200174204956488e-05,
      "loss": 3.9644,
      "step": 1099264
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.199337248085809e-05,
      "loss": 3.9634,
      "step": 1099776
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.198498653334757e-05,
      "loss": 3.9553,
      "step": 1100288
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.197660058583705e-05,
      "loss": 3.9559,
      "step": 1100800
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.196821463832653e-05,
      "loss": 3.9795,
      "step": 1101312
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.195982869081601e-05,
      "loss": 3.9761,
      "step": 1101824
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.195144274330549e-05,
      "loss": 3.9638,
      "step": 1102336
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.194305679579497e-05,
      "loss": 3.972,
      "step": 1102848
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.193467084828445e-05,
      "loss": 3.9731,
      "step": 1103360
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.192630127957767e-05,
      "loss": 3.9665,
      "step": 1103872
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.191791533206715e-05,
      "loss": 3.9726,
      "step": 1104384
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.190952938455663e-05,
      "loss": 3.9699,
      "step": 1104896
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.190114343704611e-05,
      "loss": 3.9634,
      "step": 1105408
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1892773868339316e-05,
      "loss": 3.9648,
      "step": 1105920
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1884387920828796e-05,
      "loss": 3.9785,
      "step": 1106432
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1876001973318276e-05,
      "loss": 3.9695,
      "step": 1106944
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1867616025807756e-05,
      "loss": 3.9789,
      "step": 1107456
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1859246457100965e-05,
      "loss": 3.9741,
      "step": 1107968
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1850860509590445e-05,
      "loss": 3.9683,
      "step": 1108480
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1842474562079925e-05,
      "loss": 3.9651,
      "step": 1108992
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.1834088614569405e-05,
      "loss": 3.9722,
      "step": 1109504
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.182571904586262e-05,
      "loss": 3.9639,
      "step": 1110016
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.18173330983521e-05,
      "loss": 3.9615,
      "step": 1110528
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.180894715084158e-05,
      "loss": 3.9651,
      "step": 1111040
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.180056120333106e-05,
      "loss": 3.9577,
      "step": 1111552
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.179217525582054e-05,
      "loss": 3.9682,
      "step": 1112064
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.178378930831002e-05,
      "loss": 3.961,
      "step": 1112576
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.177541973960323e-05,
      "loss": 3.9686,
      "step": 1113088
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.176703379209271e-05,
      "loss": 3.953,
      "step": 1113600
    },
    {
      "epoch": 1.01,
      "learning_rate": 3.175864784458219e-05,
      "loss": 3.9714,
      "step": 1114112
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.175026189707167e-05,
      "loss": 3.9759,
      "step": 1114624
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.174189232836488e-05,
      "loss": 3.9746,
      "step": 1115136
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.173350638085436e-05,
      "loss": 3.9719,
      "step": 1115648
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.172512043334384e-05,
      "loss": 3.9637,
      "step": 1116160
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.171673448583332e-05,
      "loss": 3.9645,
      "step": 1116672
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.17083485383228e-05,
      "loss": 3.9684,
      "step": 1117184
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1699978969616014e-05,
      "loss": 3.9737,
      "step": 1117696
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.169160940090922e-05,
      "loss": 3.953,
      "step": 1118208
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.16832234533987e-05,
      "loss": 3.96,
      "step": 1118720
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.167483750588818e-05,
      "loss": 3.9769,
      "step": 1119232
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.166645155837766e-05,
      "loss": 3.9637,
      "step": 1119744
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.165806561086714e-05,
      "loss": 3.9799,
      "step": 1120256
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.164969604216035e-05,
      "loss": 3.9628,
      "step": 1120768
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.164131009464983e-05,
      "loss": 3.9724,
      "step": 1121280
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.163292414713931e-05,
      "loss": 3.9536,
      "step": 1121792
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.162453819962879e-05,
      "loss": 3.96,
      "step": 1122304
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.161615225211827e-05,
      "loss": 3.972,
      "step": 1122816
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.160776630460775e-05,
      "loss": 3.9726,
      "step": 1123328
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.159938035709723e-05,
      "loss": 3.962,
      "step": 1123840
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.159099440958671e-05,
      "loss": 3.9505,
      "step": 1124352
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.158262484087992e-05,
      "loss": 3.9679,
      "step": 1124864
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.15742388933694e-05,
      "loss": 3.9694,
      "step": 1125376
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.156585294585888e-05,
      "loss": 3.9592,
      "step": 1125888
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.155746699834836e-05,
      "loss": 3.9651,
      "step": 1126400
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.154908105083784e-05,
      "loss": 3.9578,
      "step": 1126912
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.154069510332732e-05,
      "loss": 3.962,
      "step": 1127424
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.15323091558168e-05,
      "loss": 3.9602,
      "step": 1127936
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.152392320830628e-05,
      "loss": 3.9662,
      "step": 1128448
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1515553639599496e-05,
      "loss": 3.9649,
      "step": 1128960
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1507184070892705e-05,
      "loss": 3.9643,
      "step": 1129472
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1498798123382185e-05,
      "loss": 3.97,
      "step": 1129984
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1490412175871665e-05,
      "loss": 3.9859,
      "step": 1130496
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1482026228361145e-05,
      "loss": 3.9622,
      "step": 1131008
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1473640280850625e-05,
      "loss": 3.9699,
      "step": 1131520
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1465270712143834e-05,
      "loss": 3.9674,
      "step": 1132032
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1456884764633314e-05,
      "loss": 3.9638,
      "step": 1132544
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1448498817122794e-05,
      "loss": 3.9664,
      "step": 1133056
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1440112869612274e-05,
      "loss": 3.9606,
      "step": 1133568
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1431726922101754e-05,
      "loss": 3.97,
      "step": 1134080
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1423340974591234e-05,
      "loss": 3.9567,
      "step": 1134592
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1414955027080714e-05,
      "loss": 3.9822,
      "step": 1135104
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1406569079570194e-05,
      "loss": 3.9668,
      "step": 1135616
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.139821588966714e-05,
      "loss": 3.9761,
      "step": 1136128
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.138982994215662e-05,
      "loss": 3.9659,
      "step": 1136640
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.13814439946461e-05,
      "loss": 3.9734,
      "step": 1137152
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.137305804713558e-05,
      "loss": 3.9643,
      "step": 1137664
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.136468847842879e-05,
      "loss": 3.9707,
      "step": 1138176
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.135630253091827e-05,
      "loss": 3.9774,
      "step": 1138688
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.134791658340775e-05,
      "loss": 3.9613,
      "step": 1139200
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.133953063589723e-05,
      "loss": 3.9706,
      "step": 1139712
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.133114468838671e-05,
      "loss": 3.971,
      "step": 1140224
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.132275874087619e-05,
      "loss": 3.9647,
      "step": 1140736
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.131437279336567e-05,
      "loss": 3.9604,
      "step": 1141248
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.130598684585515e-05,
      "loss": 3.9717,
      "step": 1141760
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1297600898344634e-05,
      "loss": 3.9691,
      "step": 1142272
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1289231329637844e-05,
      "loss": 3.9672,
      "step": 1142784
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1280845382127324e-05,
      "loss": 3.9667,
      "step": 1143296
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1272459434616803e-05,
      "loss": 3.9687,
      "step": 1143808
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.1264073487106283e-05,
      "loss": 3.96,
      "step": 1144320
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.021714210510254,
      "eval_runtime": 293.5152,
      "eval_samples_per_second": 1300.072,
      "eval_steps_per_second": 40.628,
      "step": 1144787
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.125568753959576e-05,
      "loss": 3.9269,
      "step": 1144832
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1247301592085237e-05,
      "loss": 3.96,
      "step": 1145344
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1238915644574717e-05,
      "loss": 3.9556,
      "step": 1145856
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1230529697064196e-05,
      "loss": 3.9752,
      "step": 1146368
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1222143749553676e-05,
      "loss": 3.9662,
      "step": 1146880
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1213757802043156e-05,
      "loss": 3.9695,
      "step": 1147392
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1205371854532636e-05,
      "loss": 3.9614,
      "step": 1147904
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1196985907022116e-05,
      "loss": 3.9644,
      "step": 1148416
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1188599959511596e-05,
      "loss": 3.9541,
      "step": 1148928
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.118021401200108e-05,
      "loss": 3.9608,
      "step": 1149440
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.117182806449056e-05,
      "loss": 3.9638,
      "step": 1149952
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.116344211698004e-05,
      "loss": 3.9794,
      "step": 1150464
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.115507254827325e-05,
      "loss": 3.9725,
      "step": 1150976
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.114668660076273e-05,
      "loss": 3.9456,
      "step": 1151488
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.113830065325221e-05,
      "loss": 3.9624,
      "step": 1152000
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.112991470574169e-05,
      "loss": 3.954,
      "step": 1152512
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1121528758231165e-05,
      "loss": 3.9652,
      "step": 1153024
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1113142810720645e-05,
      "loss": 3.958,
      "step": 1153536
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1104756863210125e-05,
      "loss": 3.9643,
      "step": 1154048
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1096370915699605e-05,
      "loss": 3.9589,
      "step": 1154560
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1087984968189085e-05,
      "loss": 3.9769,
      "step": 1155072
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.1079599020678565e-05,
      "loss": 3.9659,
      "step": 1155584
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.107121307316805e-05,
      "loss": 3.9641,
      "step": 1156096
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.106282712565753e-05,
      "loss": 3.9591,
      "step": 1156608
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.105445755695074e-05,
      "loss": 3.9734,
      "step": 1157120
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.104607160944022e-05,
      "loss": 3.9612,
      "step": 1157632
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.10376856619297e-05,
      "loss": 3.9526,
      "step": 1158144
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.102929971441918e-05,
      "loss": 3.963,
      "step": 1158656
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.102091376690866e-05,
      "loss": 3.9584,
      "step": 1159168
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.101254419820187e-05,
      "loss": 3.9566,
      "step": 1159680
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.100415825069135e-05,
      "loss": 3.9633,
      "step": 1160192
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.099577230318083e-05,
      "loss": 3.9569,
      "step": 1160704
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.098740273447404e-05,
      "loss": 3.9685,
      "step": 1161216
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.097901678696352e-05,
      "loss": 3.967,
      "step": 1161728
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0970630839453005e-05,
      "loss": 3.9686,
      "step": 1162240
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0962261270746214e-05,
      "loss": 3.9636,
      "step": 1162752
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0953875323235694e-05,
      "loss": 3.9715,
      "step": 1163264
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0945489375725174e-05,
      "loss": 3.9523,
      "step": 1163776
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0937103428214654e-05,
      "loss": 3.9581,
      "step": 1164288
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0928717480704134e-05,
      "loss": 3.9592,
      "step": 1164800
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0920331533193614e-05,
      "loss": 3.9505,
      "step": 1165312
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0911945585683094e-05,
      "loss": 3.9632,
      "step": 1165824
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0903559638172574e-05,
      "loss": 3.9619,
      "step": 1166336
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0895173690662054e-05,
      "loss": 3.9535,
      "step": 1166848
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.088678774315153e-05,
      "loss": 3.9639,
      "step": 1167360
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.087840179564101e-05,
      "loss": 3.9656,
      "step": 1167872
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.087001584813049e-05,
      "loss": 3.9641,
      "step": 1168384
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.086166265822744e-05,
      "loss": 3.9627,
      "step": 1168896
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.085327671071692e-05,
      "loss": 3.9432,
      "step": 1169408
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.08448907632064e-05,
      "loss": 3.9617,
      "step": 1169920
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.083650481569588e-05,
      "loss": 3.9646,
      "step": 1170432
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.082813524698909e-05,
      "loss": 3.9624,
      "step": 1170944
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.081974929947857e-05,
      "loss": 3.9551,
      "step": 1171456
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.081136335196805e-05,
      "loss": 3.9539,
      "step": 1171968
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.080297740445753e-05,
      "loss": 3.9486,
      "step": 1172480
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0794591456947e-05,
      "loss": 3.9502,
      "step": 1172992
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.078620550943648e-05,
      "loss": 3.9592,
      "step": 1173504
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.077781956192596e-05,
      "loss": 3.9441,
      "step": 1174016
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.076943361441544e-05,
      "loss": 3.9653,
      "step": 1174528
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0761064045708657e-05,
      "loss": 3.9642,
      "step": 1175040
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0752678098198137e-05,
      "loss": 3.9482,
      "step": 1175552
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0744292150687617e-05,
      "loss": 3.9682,
      "step": 1176064
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0735906203177096e-05,
      "loss": 3.9449,
      "step": 1176576
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0727520255666576e-05,
      "loss": 3.9498,
      "step": 1177088
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0719150686959786e-05,
      "loss": 3.9492,
      "step": 1177600
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0710764739449265e-05,
      "loss": 3.9617,
      "step": 1178112
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0702378791938745e-05,
      "loss": 3.9477,
      "step": 1178624
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0693992844428225e-05,
      "loss": 3.9636,
      "step": 1179136
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0685623275721434e-05,
      "loss": 3.9389,
      "step": 1179648
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0677237328210914e-05,
      "loss": 3.9499,
      "step": 1180160
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.066886775950413e-05,
      "loss": 3.9663,
      "step": 1180672
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.066048181199361e-05,
      "loss": 3.9572,
      "step": 1181184
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.065209586448309e-05,
      "loss": 3.962,
      "step": 1181696
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.064370991697257e-05,
      "loss": 3.9632,
      "step": 1182208
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.063532396946205e-05,
      "loss": 3.9728,
      "step": 1182720
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.062693802195153e-05,
      "loss": 3.9521,
      "step": 1183232
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.061855207444101e-05,
      "loss": 3.9694,
      "step": 1183744
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.061016612693049e-05,
      "loss": 3.9493,
      "step": 1184256
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.060181293702743e-05,
      "loss": 3.9543,
      "step": 1184768
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.059342698951691e-05,
      "loss": 3.9562,
      "step": 1185280
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.058504104200639e-05,
      "loss": 3.9631,
      "step": 1185792
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.057665509449587e-05,
      "loss": 3.9623,
      "step": 1186304
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.056826914698535e-05,
      "loss": 3.9687,
      "step": 1186816
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.055988319947483e-05,
      "loss": 3.9492,
      "step": 1187328
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0551497251964315e-05,
      "loss": 3.9452,
      "step": 1187840
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0543111304453795e-05,
      "loss": 3.9472,
      "step": 1188352
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0534741735747004e-05,
      "loss": 3.9589,
      "step": 1188864
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.052637216704021e-05,
      "loss": 3.9522,
      "step": 1189376
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.051798621952969e-05,
      "loss": 3.9525,
      "step": 1189888
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0509600272019173e-05,
      "loss": 3.959,
      "step": 1190400
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0501214324508653e-05,
      "loss": 3.9523,
      "step": 1190912
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0492828376998133e-05,
      "loss": 3.9548,
      "step": 1191424
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0484442429487613e-05,
      "loss": 3.951,
      "step": 1191936
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0476056481977096e-05,
      "loss": 3.9639,
      "step": 1192448
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0467670534466576e-05,
      "loss": 3.9676,
      "step": 1192960
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0459284586956056e-05,
      "loss": 3.9628,
      "step": 1193472
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0450915018249265e-05,
      "loss": 3.9421,
      "step": 1193984
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0442545449542474e-05,
      "loss": 3.954,
      "step": 1194496
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0434159502031957e-05,
      "loss": 3.9522,
      "step": 1195008
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0425773554521437e-05,
      "loss": 3.9598,
      "step": 1195520
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0417387607010917e-05,
      "loss": 3.9648,
      "step": 1196032
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0409001659500397e-05,
      "loss": 3.9481,
      "step": 1196544
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0400615711989877e-05,
      "loss": 3.9582,
      "step": 1197056
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0392229764479357e-05,
      "loss": 3.9601,
      "step": 1197568
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0383843816968837e-05,
      "loss": 3.9565,
      "step": 1198080
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.037547424826205e-05,
      "loss": 3.9494,
      "step": 1198592
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.036710467955526e-05,
      "loss": 3.9516,
      "step": 1199104
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.035871873204474e-05,
      "loss": 3.9445,
      "step": 1199616
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.035033278453422e-05,
      "loss": 3.955,
      "step": 1200128
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.03419468370237e-05,
      "loss": 3.949,
      "step": 1200640
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.033357726831691e-05,
      "loss": 3.949,
      "step": 1201152
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.032519132080639e-05,
      "loss": 3.9492,
      "step": 1201664
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.031680537329587e-05,
      "loss": 3.9575,
      "step": 1202176
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.030841942578535e-05,
      "loss": 3.9491,
      "step": 1202688
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.030003347827483e-05,
      "loss": 3.9544,
      "step": 1203200
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.029164753076431e-05,
      "loss": 3.9653,
      "step": 1203712
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0283261583253787e-05,
      "loss": 3.9608,
      "step": 1204224
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0274875635743267e-05,
      "loss": 3.9613,
      "step": 1204736
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0266506067036483e-05,
      "loss": 3.9502,
      "step": 1205248
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0258120119525963e-05,
      "loss": 3.9559,
      "step": 1205760
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0249734172015436e-05,
      "loss": 3.9539,
      "step": 1206272
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0241364603308652e-05,
      "loss": 3.945,
      "step": 1206784
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0232978655798132e-05,
      "loss": 3.9567,
      "step": 1207296
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.022459270828761e-05,
      "loss": 3.9492,
      "step": 1207808
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.021620676077709e-05,
      "loss": 3.9589,
      "step": 1208320
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.020782081326657e-05,
      "loss": 3.9514,
      "step": 1208832
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.019943486575605e-05,
      "loss": 3.9579,
      "step": 1209344
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.019104891824553e-05,
      "loss": 3.9515,
      "step": 1209856
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.018266297073501e-05,
      "loss": 3.9503,
      "step": 1210368
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.017429340202822e-05,
      "loss": 3.9581,
      "step": 1210880
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.01659074545177e-05,
      "loss": 3.9594,
      "step": 1211392
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.015752150700718e-05,
      "loss": 3.9633,
      "step": 1211904
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.014913555949666e-05,
      "loss": 3.9544,
      "step": 1212416
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.014076599078987e-05,
      "loss": 3.9593,
      "step": 1212928
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.013238004327935e-05,
      "loss": 3.9589,
      "step": 1213440
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.012399409576883e-05,
      "loss": 3.9597,
      "step": 1213952
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0115608148258313e-05,
      "loss": 3.9582,
      "step": 1214464
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0107222200747793e-05,
      "loss": 3.9542,
      "step": 1214976
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0098836253237273e-05,
      "loss": 3.9546,
      "step": 1215488
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0090466684530482e-05,
      "loss": 3.9633,
      "step": 1216000
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.008209711582369e-05,
      "loss": 3.9516,
      "step": 1216512
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0073711168313175e-05,
      "loss": 3.9498,
      "step": 1217024
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0065325220802655e-05,
      "loss": 3.9566,
      "step": 1217536
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0056939273292135e-05,
      "loss": 3.9545,
      "step": 1218048
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0048553325781615e-05,
      "loss": 3.9529,
      "step": 1218560
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0040167378271095e-05,
      "loss": 3.9586,
      "step": 1219072
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0031781430760575e-05,
      "loss": 3.9521,
      "step": 1219584
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0023395483250055e-05,
      "loss": 3.9659,
      "step": 1220096
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.0015009535739534e-05,
      "loss": 3.9578,
      "step": 1220608
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.01444149017334,
      "eval_runtime": 342.8971,
      "eval_samples_per_second": 1112.844,
      "eval_steps_per_second": 34.777,
      "step": 1221107
    },
    {
      "epoch": 1.0,
      "learning_rate": 3.0006623588229014e-05,
      "loss": 3.9518,
      "step": 1221120
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9998237640718498e-05,
      "loss": 3.9453,
      "step": 1221632
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9989868072011707e-05,
      "loss": 3.9451,
      "step": 1222144
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9981482124501187e-05,
      "loss": 3.9626,
      "step": 1222656
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9973096176990667e-05,
      "loss": 3.9592,
      "step": 1223168
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9964710229480147e-05,
      "loss": 3.9549,
      "step": 1223680
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9956324281969623e-05,
      "loss": 3.9507,
      "step": 1224192
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.994795471326284e-05,
      "loss": 3.9537,
      "step": 1224704
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.993956876575232e-05,
      "loss": 3.94,
      "step": 1225216
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9931182818241792e-05,
      "loss": 3.9523,
      "step": 1225728
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9922796870731272e-05,
      "loss": 3.9506,
      "step": 1226240
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9914427302024488e-05,
      "loss": 3.9706,
      "step": 1226752
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.99060577333177e-05,
      "loss": 3.9642,
      "step": 1227264
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.989767178580718e-05,
      "loss": 3.9335,
      "step": 1227776
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.988928583829666e-05,
      "loss": 3.9489,
      "step": 1228288
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.988089989078614e-05,
      "loss": 3.9464,
      "step": 1228800
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.987251394327562e-05,
      "loss": 3.9472,
      "step": 1229312
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9864127995765097e-05,
      "loss": 3.9525,
      "step": 1229824
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9855758427058313e-05,
      "loss": 3.9507,
      "step": 1230336
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9847372479547793e-05,
      "loss": 3.9454,
      "step": 1230848
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9838986532037266e-05,
      "loss": 3.9681,
      "step": 1231360
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9830600584526746e-05,
      "loss": 3.9559,
      "step": 1231872
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9822214637016226e-05,
      "loss": 3.9502,
      "step": 1232384
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9813828689505706e-05,
      "loss": 3.9462,
      "step": 1232896
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.980544274199519e-05,
      "loss": 3.9658,
      "step": 1233408
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.979705679448467e-05,
      "loss": 3.9498,
      "step": 1233920
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.978867084697415e-05,
      "loss": 3.9408,
      "step": 1234432
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.978028489946363e-05,
      "loss": 3.9507,
      "step": 1234944
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.977189895195311e-05,
      "loss": 3.9442,
      "step": 1235456
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.9763529383246318e-05,
      "loss": 3.9476,
      "step": 1235968
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9755143435735798e-05,
      "loss": 3.9496,
      "step": 1236480
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.974675748822528e-05,
      "loss": 3.9473,
      "step": 1236992
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.973837154071476e-05,
      "loss": 3.9564,
      "step": 1237504
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.973000197200797e-05,
      "loss": 3.9518,
      "step": 1238016
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.972161602449745e-05,
      "loss": 3.9608,
      "step": 1238528
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.971324645579066e-05,
      "loss": 3.9549,
      "step": 1239040
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9704860508280143e-05,
      "loss": 3.9591,
      "step": 1239552
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9696474560769623e-05,
      "loss": 3.939,
      "step": 1240064
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9688088613259103e-05,
      "loss": 3.9485,
      "step": 1240576
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9679702665748583e-05,
      "loss": 3.9416,
      "step": 1241088
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9671316718238063e-05,
      "loss": 3.9468,
      "step": 1241600
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9662930770727543e-05,
      "loss": 3.9461,
      "step": 1242112
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9654544823217023e-05,
      "loss": 3.9527,
      "step": 1242624
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9646175254510232e-05,
      "loss": 3.9432,
      "step": 1243136
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9637805685803444e-05,
      "loss": 3.9552,
      "step": 1243648
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9629419738292924e-05,
      "loss": 3.9505,
      "step": 1244160
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9621033790782404e-05,
      "loss": 3.956,
      "step": 1244672
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9612647843271884e-05,
      "loss": 3.9517,
      "step": 1245184
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9604261895761364e-05,
      "loss": 3.9324,
      "step": 1245696
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9595875948250844e-05,
      "loss": 3.9475,
      "step": 1246208
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9587490000740324e-05,
      "loss": 3.9582,
      "step": 1246720
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9579104053229807e-05,
      "loss": 3.953,
      "step": 1247232
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9570734484523016e-05,
      "loss": 3.9416,
      "step": 1247744
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9562348537012496e-05,
      "loss": 3.9419,
      "step": 1248256
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9553962589501976e-05,
      "loss": 3.9405,
      "step": 1248768
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9545593020795185e-05,
      "loss": 3.936,
      "step": 1249280
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.953720707328467e-05,
      "loss": 3.9538,
      "step": 1249792
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.952882112577415e-05,
      "loss": 3.9318,
      "step": 1250304
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.952043517826363e-05,
      "loss": 3.9528,
      "step": 1250816
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9512049230753102e-05,
      "loss": 3.9518,
      "step": 1251328
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9503663283242582e-05,
      "loss": 3.937,
      "step": 1251840
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9495277335732062e-05,
      "loss": 3.9572,
      "step": 1252352
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9486891388221545e-05,
      "loss": 3.9356,
      "step": 1252864
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9478505440711025e-05,
      "loss": 3.9424,
      "step": 1253376
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9470135872004234e-05,
      "loss": 3.934,
      "step": 1253888
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9461749924493714e-05,
      "loss": 3.9547,
      "step": 1254400
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9453363976983194e-05,
      "loss": 3.9397,
      "step": 1254912
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9444978029472674e-05,
      "loss": 3.9476,
      "step": 1255424
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9436592081962154e-05,
      "loss": 3.9308,
      "step": 1255936
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9428222513255366e-05,
      "loss": 3.9378,
      "step": 1256448
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9419836565744846e-05,
      "loss": 3.9534,
      "step": 1256960
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9411450618234326e-05,
      "loss": 3.9509,
      "step": 1257472
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9403064670723806e-05,
      "loss": 3.9448,
      "step": 1257984
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9394678723213286e-05,
      "loss": 3.9526,
      "step": 1258496
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9386292775702766e-05,
      "loss": 3.9612,
      "step": 1259008
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.937792320699598e-05,
      "loss": 3.9413,
      "step": 1259520
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.936953725948546e-05,
      "loss": 3.9594,
      "step": 1260032
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.936115131197494e-05,
      "loss": 3.9366,
      "step": 1260544
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.935276536446442e-05,
      "loss": 3.9454,
      "step": 1261056
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.93443794169539e-05,
      "loss": 3.9449,
      "step": 1261568
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9336009848247108e-05,
      "loss": 3.951,
      "step": 1262080
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.932762390073659e-05,
      "loss": 3.9501,
      "step": 1262592
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.931923795322607e-05,
      "loss": 3.9599,
      "step": 1263104
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.931086838451928e-05,
      "loss": 3.9437,
      "step": 1263616
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.930248243700876e-05,
      "loss": 3.928,
      "step": 1264128
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.929409648949824e-05,
      "loss": 3.9366,
      "step": 1264640
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.928571054198772e-05,
      "loss": 3.9474,
      "step": 1265152
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9277340973280932e-05,
      "loss": 3.9398,
      "step": 1265664
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9268955025770412e-05,
      "loss": 3.9429,
      "step": 1266176
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.9260569078259892e-05,
      "loss": 3.9486,
      "step": 1266688
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9252183130749372e-05,
      "loss": 3.939,
      "step": 1267200
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9243797183238852e-05,
      "loss": 3.9435,
      "step": 1267712
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9235411235728332e-05,
      "loss": 3.9387,
      "step": 1268224
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9227025288217812e-05,
      "loss": 3.9566,
      "step": 1268736
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.921863934070729e-05,
      "loss": 3.9551,
      "step": 1269248
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9210269772000505e-05,
      "loss": 3.9508,
      "step": 1269760
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9201883824489984e-05,
      "loss": 3.9344,
      "step": 1270272
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9193497876979464e-05,
      "loss": 3.9441,
      "step": 1270784
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9185111929468938e-05,
      "loss": 3.9455,
      "step": 1271296
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.917672598195842e-05,
      "loss": 3.9455,
      "step": 1271808
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9168356413251637e-05,
      "loss": 3.9538,
      "step": 1272320
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9159970465741117e-05,
      "loss": 3.9389,
      "step": 1272832
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.915158451823059e-05,
      "loss": 3.9451,
      "step": 1273344
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.914319857072007e-05,
      "loss": 3.9479,
      "step": 1273856
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9134829002013286e-05,
      "loss": 3.95,
      "step": 1274368
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9126443054502762e-05,
      "loss": 3.935,
      "step": 1274880
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9118073485795978e-05,
      "loss": 3.9432,
      "step": 1275392
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9109687538285458e-05,
      "loss": 3.934,
      "step": 1275904
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9101301590774938e-05,
      "loss": 3.9377,
      "step": 1276416
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.909291564326441e-05,
      "loss": 3.94,
      "step": 1276928
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.908452969575389e-05,
      "loss": 3.9402,
      "step": 1277440
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9076143748243375e-05,
      "loss": 3.9338,
      "step": 1277952
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9067757800732855e-05,
      "loss": 3.9467,
      "step": 1278464
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9059371853222335e-05,
      "loss": 3.94,
      "step": 1278976
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9051002284515544e-05,
      "loss": 3.9411,
      "step": 1279488
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9042616337005024e-05,
      "loss": 3.953,
      "step": 1280000
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9034230389494504e-05,
      "loss": 3.9549,
      "step": 1280512
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9025844441983984e-05,
      "loss": 3.9509,
      "step": 1281024
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9017474873277196e-05,
      "loss": 3.9414,
      "step": 1281536
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9009088925766676e-05,
      "loss": 3.9451,
      "step": 1282048
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.9000719357059885e-05,
      "loss": 3.9433,
      "step": 1282560
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.89923497883531e-05,
      "loss": 3.9364,
      "step": 1283072
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.898396384084258e-05,
      "loss": 3.9424,
      "step": 1283584
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.897557789333206e-05,
      "loss": 3.9392,
      "step": 1284096
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8967191945821537e-05,
      "loss": 3.9468,
      "step": 1284608
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8958805998311017e-05,
      "loss": 3.942,
      "step": 1285120
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8950420050800497e-05,
      "loss": 3.9482,
      "step": 1285632
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8942034103289977e-05,
      "loss": 3.9454,
      "step": 1286144
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8933648155779457e-05,
      "loss": 3.9384,
      "step": 1286656
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8925262208268937e-05,
      "loss": 3.9371,
      "step": 1287168
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.891689263956215e-05,
      "loss": 3.9562,
      "step": 1287680
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.890850669205163e-05,
      "loss": 3.9477,
      "step": 1288192
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.890012074454111e-05,
      "loss": 3.9481,
      "step": 1288704
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.889173479703059e-05,
      "loss": 3.9467,
      "step": 1289216
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.888334884952007e-05,
      "loss": 3.95,
      "step": 1289728
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.887496290200955e-05,
      "loss": 3.9509,
      "step": 1290240
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.886657695449903e-05,
      "loss": 3.95,
      "step": 1290752
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.885819100698851e-05,
      "loss": 3.9417,
      "step": 1291264
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8849821438281722e-05,
      "loss": 3.9476,
      "step": 1291776
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8841435490771202e-05,
      "loss": 3.9523,
      "step": 1292288
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.883306592206441e-05,
      "loss": 3.9413,
      "step": 1292800
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.882467997455389e-05,
      "loss": 3.9373,
      "step": 1293312
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.881629402704337e-05,
      "loss": 3.9433,
      "step": 1293824
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8807908079532854e-05,
      "loss": 3.9499,
      "step": 1294336
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8799522132022334e-05,
      "loss": 3.937,
      "step": 1294848
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8791136184511814e-05,
      "loss": 3.9492,
      "step": 1295360
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8782750237001294e-05,
      "loss": 3.9422,
      "step": 1295872
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8774380668294503e-05,
      "loss": 3.952,
      "step": 1296384
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8765994720783983e-05,
      "loss": 3.949,
      "step": 1296896
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.8757608773273463e-05,
      "loss": 3.9374,
      "step": 1297408
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.009064674377441,
      "eval_runtime": 310.5769,
      "eval_samples_per_second": 1228.652,
      "eval_steps_per_second": 38.396,
      "step": 1297427
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8749222825762946e-05,
      "loss": 3.9344,
      "step": 1297920
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8740836878252426e-05,
      "loss": 3.9359,
      "step": 1298432
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.87324509307419e-05,
      "loss": 3.9525,
      "step": 1298944
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.872406498323138e-05,
      "loss": 3.9472,
      "step": 1299456
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.871567903572086e-05,
      "loss": 3.9472,
      "step": 1299968
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.870729308821034e-05,
      "loss": 3.9449,
      "step": 1300480
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8698907140699823e-05,
      "loss": 3.9383,
      "step": 1300992
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8690521193189303e-05,
      "loss": 3.9316,
      "step": 1301504
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8682135245678783e-05,
      "loss": 3.946,
      "step": 1302016
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8673749298168263e-05,
      "loss": 3.9347,
      "step": 1302528
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8665363350657743e-05,
      "loss": 3.9628,
      "step": 1303040
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8656977403147222e-05,
      "loss": 3.9502,
      "step": 1303552
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8648591455636702e-05,
      "loss": 3.9305,
      "step": 1304064
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8640221886929915e-05,
      "loss": 3.9358,
      "step": 1304576
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8631835939419395e-05,
      "loss": 3.9369,
      "step": 1305088
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8623449991908875e-05,
      "loss": 3.9353,
      "step": 1305600
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8615064044398355e-05,
      "loss": 3.9425,
      "step": 1306112
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8606678096887835e-05,
      "loss": 3.9455,
      "step": 1306624
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8598292149377308e-05,
      "loss": 3.9309,
      "step": 1307136
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.858990620186679e-05,
      "loss": 3.9618,
      "step": 1307648
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.858152025435627e-05,
      "loss": 3.9386,
      "step": 1308160
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.857313430684575e-05,
      "loss": 3.946,
      "step": 1308672
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.856476473813896e-05,
      "loss": 3.9379,
      "step": 1309184
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.855637879062844e-05,
      "loss": 3.9571,
      "step": 1309696
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.854799284311792e-05,
      "loss": 3.9374,
      "step": 1310208
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.85396068956074e-05,
      "loss": 3.939,
      "step": 1310720
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.853122094809688e-05,
      "loss": 3.9366,
      "step": 1311232
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8522835000586363e-05,
      "loss": 3.9373,
      "step": 1311744
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.8514449053075843e-05,
      "loss": 3.9382,
      "step": 1312256
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8506063105565323e-05,
      "loss": 3.9394,
      "step": 1312768
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8497693536858532e-05,
      "loss": 3.9409,
      "step": 1313280
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8489307589348012e-05,
      "loss": 3.9421,
      "step": 1313792
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8480921641837492e-05,
      "loss": 3.9448,
      "step": 1314304
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8472535694326972e-05,
      "loss": 3.9496,
      "step": 1314816
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8464166125620185e-05,
      "loss": 3.9423,
      "step": 1315328
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8455780178109665e-05,
      "loss": 3.9468,
      "step": 1315840
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8447394230599145e-05,
      "loss": 3.9344,
      "step": 1316352
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8439024661892354e-05,
      "loss": 3.9383,
      "step": 1316864
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8430638714381834e-05,
      "loss": 3.9321,
      "step": 1317376
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8422252766871317e-05,
      "loss": 3.9398,
      "step": 1317888
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8413866819360797e-05,
      "loss": 3.9358,
      "step": 1318400
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8405480871850277e-05,
      "loss": 3.9414,
      "step": 1318912
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8397094924339757e-05,
      "loss": 3.933,
      "step": 1319424
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8388708976829237e-05,
      "loss": 3.9468,
      "step": 1319936
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8380323029318717e-05,
      "loss": 3.9396,
      "step": 1320448
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8371937081808197e-05,
      "loss": 3.9451,
      "step": 1320960
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8363551134297673e-05,
      "loss": 3.9415,
      "step": 1321472
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8355165186787153e-05,
      "loss": 3.9259,
      "step": 1321984
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8346779239276633e-05,
      "loss": 3.9328,
      "step": 1322496
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.833840967056985e-05,
      "loss": 3.9511,
      "step": 1323008
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8330023723059322e-05,
      "loss": 3.9392,
      "step": 1323520
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8321637775548802e-05,
      "loss": 3.9324,
      "step": 1324032
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8313251828038286e-05,
      "loss": 3.9284,
      "step": 1324544
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8304865880527766e-05,
      "loss": 3.9377,
      "step": 1325056
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8296496311820975e-05,
      "loss": 3.9194,
      "step": 1325568
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8288110364310455e-05,
      "loss": 3.9489,
      "step": 1326080
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8279724416799935e-05,
      "loss": 3.919,
      "step": 1326592
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8271338469289415e-05,
      "loss": 3.944,
      "step": 1327104
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8262968900582627e-05,
      "loss": 3.9443,
      "step": 1327616
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8254582953072107e-05,
      "loss": 3.9265,
      "step": 1328128
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8246213384365323e-05,
      "loss": 3.9423,
      "step": 1328640
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8237827436854796e-05,
      "loss": 3.9301,
      "step": 1329152
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8229441489344276e-05,
      "loss": 3.9325,
      "step": 1329664
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8221055541833756e-05,
      "loss": 3.9232,
      "step": 1330176
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.821266959432324e-05,
      "loss": 3.9454,
      "step": 1330688
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.820430002561645e-05,
      "loss": 3.929,
      "step": 1331200
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.819591407810593e-05,
      "loss": 3.9351,
      "step": 1331712
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.818752813059541e-05,
      "loss": 3.9214,
      "step": 1332224
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8179142183084888e-05,
      "loss": 3.9303,
      "step": 1332736
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8170756235574368e-05,
      "loss": 3.9394,
      "step": 1333248
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8162370288063848e-05,
      "loss": 3.9423,
      "step": 1333760
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.815398434055333e-05,
      "loss": 3.9389,
      "step": 1334272
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.814559839304281e-05,
      "loss": 3.9429,
      "step": 1334784
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.813721244553229e-05,
      "loss": 3.9448,
      "step": 1335296
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.81288428768255e-05,
      "loss": 3.9347,
      "step": 1335808
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.812045692931498e-05,
      "loss": 3.9469,
      "step": 1336320
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8112087360608193e-05,
      "loss": 3.9309,
      "step": 1336832
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8103701413097673e-05,
      "loss": 3.9362,
      "step": 1337344
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8095315465587153e-05,
      "loss": 3.9321,
      "step": 1337856
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8086929518076633e-05,
      "loss": 3.9422,
      "step": 1338368
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8078543570566113e-05,
      "loss": 3.9416,
      "step": 1338880
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8070174001859322e-05,
      "loss": 3.9493,
      "step": 1339392
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8061788054348802e-05,
      "loss": 3.9398,
      "step": 1339904
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8053402106838282e-05,
      "loss": 3.9174,
      "step": 1340416
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8045016159327765e-05,
      "loss": 3.9242,
      "step": 1340928
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8036630211817245e-05,
      "loss": 3.9378,
      "step": 1341440
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8028244264306725e-05,
      "loss": 3.9322,
      "step": 1341952
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8019858316796205e-05,
      "loss": 3.9323,
      "step": 1342464
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8011472369285685e-05,
      "loss": 3.94,
      "step": 1342976
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8003119179382627e-05,
      "loss": 3.928,
      "step": 1343488
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7994733231872107e-05,
      "loss": 3.9354,
      "step": 1344000
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7986347284361587e-05,
      "loss": 3.9336,
      "step": 1344512
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7977961336851066e-05,
      "loss": 3.9424,
      "step": 1345024
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7969575389340546e-05,
      "loss": 3.9419,
      "step": 1345536
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7961189441830026e-05,
      "loss": 3.9474,
      "step": 1346048
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7952803494319506e-05,
      "loss": 3.9203,
      "step": 1346560
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7944417546808983e-05,
      "loss": 3.9359,
      "step": 1347072
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.79360479781022e-05,
      "loss": 3.9332,
      "step": 1347584
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.792766203059168e-05,
      "loss": 3.9361,
      "step": 1348096
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.791927608308116e-05,
      "loss": 3.9494,
      "step": 1348608
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7910890135570632e-05,
      "loss": 3.9237,
      "step": 1349120
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7902520566863848e-05,
      "loss": 3.9339,
      "step": 1349632
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7894134619353328e-05,
      "loss": 3.9389,
      "step": 1350144
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.788576505064654e-05,
      "loss": 3.9403,
      "step": 1350656
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.787737910313602e-05,
      "loss": 3.9249,
      "step": 1351168
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.78689931556255e-05,
      "loss": 3.9316,
      "step": 1351680
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.786060720811498e-05,
      "loss": 3.9258,
      "step": 1352192
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7852221260604457e-05,
      "loss": 3.928,
      "step": 1352704
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7843835313093937e-05,
      "loss": 3.9305,
      "step": 1353216
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7835449365583417e-05,
      "loss": 3.9335,
      "step": 1353728
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7827063418072897e-05,
      "loss": 3.9242,
      "step": 1354240
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7818677470562376e-05,
      "loss": 3.9354,
      "step": 1354752
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7810307901855586e-05,
      "loss": 3.9283,
      "step": 1355264
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7801921954345066e-05,
      "loss": 3.933,
      "step": 1355776
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7793552385638278e-05,
      "loss": 3.948,
      "step": 1356288
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7785166438127758e-05,
      "loss": 3.9422,
      "step": 1356800
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7776780490617238e-05,
      "loss": 3.936,
      "step": 1357312
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7768394543106718e-05,
      "loss": 3.933,
      "step": 1357824
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7760008595596198e-05,
      "loss": 3.9347,
      "step": 1358336
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7751622648085678e-05,
      "loss": 3.934,
      "step": 1358848
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7743236700575158e-05,
      "loss": 3.925,
      "step": 1359360
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.773485075306464e-05,
      "loss": 3.933,
      "step": 1359872
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.772648118435785e-05,
      "loss": 3.9337,
      "step": 1360384
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.771811161565106e-05,
      "loss": 3.9364,
      "step": 1360896
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.770972566814054e-05,
      "loss": 3.9348,
      "step": 1361408
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.770133972063002e-05,
      "loss": 3.9367,
      "step": 1361920
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7692953773119503e-05,
      "loss": 3.9322,
      "step": 1362432
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7684567825608982e-05,
      "loss": 3.9332,
      "step": 1362944
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7676181878098462e-05,
      "loss": 3.9254,
      "step": 1363456
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.766781230939167e-05,
      "loss": 3.9492,
      "step": 1363968
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.765942636188115e-05,
      "loss": 3.94,
      "step": 1364480
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.765104041437063e-05,
      "loss": 3.9388,
      "step": 1364992
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.764265446686011e-05,
      "loss": 3.9355,
      "step": 1365504
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7634268519349595e-05,
      "loss": 3.9378,
      "step": 1366016
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7625898950642804e-05,
      "loss": 3.9415,
      "step": 1366528
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7617513003132284e-05,
      "loss": 3.9438,
      "step": 1367040
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7609127055621764e-05,
      "loss": 3.9325,
      "step": 1367552
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7600741108111244e-05,
      "loss": 3.9371,
      "step": 1368064
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7592355160600724e-05,
      "loss": 3.9355,
      "step": 1368576
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7583969213090204e-05,
      "loss": 3.9343,
      "step": 1369088
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7575599644383416e-05,
      "loss": 3.9317,
      "step": 1369600
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7567213696872896e-05,
      "loss": 3.934,
      "step": 1370112
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7558827749362376e-05,
      "loss": 3.9395,
      "step": 1370624
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7550441801851856e-05,
      "loss": 3.9259,
      "step": 1371136
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7542055854341336e-05,
      "loss": 3.9424,
      "step": 1371648
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7533669906830816e-05,
      "loss": 3.9346,
      "step": 1372160
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7525283959320292e-05,
      "loss": 3.9384,
      "step": 1372672
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7516898011809772e-05,
      "loss": 3.9381,
      "step": 1373184
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7508512064299252e-05,
      "loss": 3.9288,
      "step": 1373696
    },
    {
      "epoch": 0.03,
      "eval_loss": 4.004461288452148,
      "eval_runtime": 295.3464,
      "eval_samples_per_second": 1292.012,
      "eval_steps_per_second": 40.376,
      "step": 1373747
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.750014249559246e-05,
      "loss": 3.9285,
      "step": 1374208
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.749175654808194e-05,
      "loss": 3.9246,
      "step": 1374720
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7483370600571425e-05,
      "loss": 3.9447,
      "step": 1375232
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7474984653060905e-05,
      "loss": 3.9323,
      "step": 1375744
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7466615084354114e-05,
      "loss": 3.9431,
      "step": 1376256
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7458229136843594e-05,
      "loss": 3.9302,
      "step": 1376768
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7449843189333074e-05,
      "loss": 3.9304,
      "step": 1377280
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.744147362062629e-05,
      "loss": 3.9199,
      "step": 1377792
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7433087673115766e-05,
      "loss": 3.9351,
      "step": 1378304
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7424701725605246e-05,
      "loss": 3.9317,
      "step": 1378816
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7416315778094726e-05,
      "loss": 3.9509,
      "step": 1379328
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7407929830584206e-05,
      "loss": 3.9389,
      "step": 1379840
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.739957664068115e-05,
      "loss": 3.9212,
      "step": 1380352
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.739119069317063e-05,
      "loss": 3.9263,
      "step": 1380864
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.738280474566011e-05,
      "loss": 3.9361,
      "step": 1381376
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7374418798149588e-05,
      "loss": 3.9158,
      "step": 1381888
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7366032850639067e-05,
      "loss": 3.9366,
      "step": 1382400
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7357646903128547e-05,
      "loss": 3.9319,
      "step": 1382912
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7349260955618027e-05,
      "loss": 3.9232,
      "step": 1383424
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.7340875008107507e-05,
      "loss": 3.9533,
      "step": 1383936
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.733250543940072e-05,
      "loss": 3.9303,
      "step": 1384448
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.73241194918902e-05,
      "loss": 3.9371,
      "step": 1384960
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.731573354437968e-05,
      "loss": 3.9301,
      "step": 1385472
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.730734759686916e-05,
      "loss": 3.944,
      "step": 1385984
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.729896164935864e-05,
      "loss": 3.9232,
      "step": 1386496
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.729057570184812e-05,
      "loss": 3.9319,
      "step": 1387008
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.72821897543376e-05,
      "loss": 3.9238,
      "step": 1387520
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.727380380682708e-05,
      "loss": 3.9318,
      "step": 1388032
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.726541785931656e-05,
      "loss": 3.9305,
      "step": 1388544
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7257048290609772e-05,
      "loss": 3.93,
      "step": 1389056
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.724867872190298e-05,
      "loss": 3.9324,
      "step": 1389568
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.724029277439246e-05,
      "loss": 3.9358,
      "step": 1390080
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.723190682688194e-05,
      "loss": 3.9288,
      "step": 1390592
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.722352087937142e-05,
      "loss": 3.944,
      "step": 1391104
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7215167689468363e-05,
      "loss": 3.9311,
      "step": 1391616
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7206781741957842e-05,
      "loss": 3.9342,
      "step": 1392128
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7198395794447322e-05,
      "loss": 3.9272,
      "step": 1392640
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7190009846936802e-05,
      "loss": 3.9294,
      "step": 1393152
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7181623899426282e-05,
      "loss": 3.9232,
      "step": 1393664
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7173237951915766e-05,
      "loss": 3.9294,
      "step": 1394176
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7164852004405246e-05,
      "loss": 3.9258,
      "step": 1394688
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7156466056894726e-05,
      "loss": 3.9374,
      "step": 1395200
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7148080109384206e-05,
      "loss": 3.9225,
      "step": 1395712
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7139694161873686e-05,
      "loss": 3.9342,
      "step": 1396224
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7131308214363165e-05,
      "loss": 3.9343,
      "step": 1396736
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7122922266852645e-05,
      "loss": 3.9331,
      "step": 1397248
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7114569076949587e-05,
      "loss": 3.9344,
      "step": 1397760
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7106183129439067e-05,
      "loss": 3.9153,
      "step": 1398272
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7097797181928547e-05,
      "loss": 3.9241,
      "step": 1398784
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7089411234418027e-05,
      "loss": 3.9388,
      "step": 1399296
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7081025286907507e-05,
      "loss": 3.9297,
      "step": 1399808
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7072639339396987e-05,
      "loss": 3.9275,
      "step": 1400320
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7064253391886467e-05,
      "loss": 3.9161,
      "step": 1400832
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.705588382317968e-05,
      "loss": 3.9307,
      "step": 1401344
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.704749787566916e-05,
      "loss": 3.9089,
      "step": 1401856
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.703911192815864e-05,
      "loss": 3.9421,
      "step": 1402368
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.703072598064812e-05,
      "loss": 3.9076,
      "step": 1402880
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.7022356411941328e-05,
      "loss": 3.9398,
      "step": 1403392
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.701397046443081e-05,
      "loss": 3.9291,
      "step": 1403904
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.700558451692029e-05,
      "loss": 3.9204,
      "step": 1404416
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.699719856940977e-05,
      "loss": 3.9343,
      "step": 1404928
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6988812621899245e-05,
      "loss": 3.9217,
      "step": 1405440
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6980426674388725e-05,
      "loss": 3.9214,
      "step": 1405952
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6972040726878205e-05,
      "loss": 3.9159,
      "step": 1406464
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6963654779367688e-05,
      "loss": 3.9376,
      "step": 1406976
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6955268831857168e-05,
      "loss": 3.92,
      "step": 1407488
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6946882884346648e-05,
      "loss": 3.9289,
      "step": 1408000
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6938496936836128e-05,
      "loss": 3.9106,
      "step": 1408512
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6930110989325608e-05,
      "loss": 3.9196,
      "step": 1409024
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.692175779942255e-05,
      "loss": 3.9305,
      "step": 1409536
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.691337185191203e-05,
      "loss": 3.9349,
      "step": 1410048
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.690498590440151e-05,
      "loss": 3.9257,
      "step": 1410560
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.689659995689099e-05,
      "loss": 3.935,
      "step": 1411072
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.688821400938047e-05,
      "loss": 3.9351,
      "step": 1411584
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.687982806186995e-05,
      "loss": 3.9292,
      "step": 1412096
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6871458493163158e-05,
      "loss": 3.9333,
      "step": 1412608
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.686307254565264e-05,
      "loss": 3.9262,
      "step": 1413120
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.685468659814212e-05,
      "loss": 3.9256,
      "step": 1413632
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.68463006506316e-05,
      "loss": 3.9253,
      "step": 1414144
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.683791470312108e-05,
      "loss": 3.9314,
      "step": 1414656
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.682954513441429e-05,
      "loss": 3.9331,
      "step": 1415168
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.682115918690377e-05,
      "loss": 3.9424,
      "step": 1415680
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.681277323939325e-05,
      "loss": 3.9284,
      "step": 1416192
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.680438729188273e-05,
      "loss": 3.9079,
      "step": 1416704
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6796001344372214e-05,
      "loss": 3.9144,
      "step": 1417216
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6787615396861694e-05,
      "loss": 3.9315,
      "step": 1417728
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6779245828154903e-05,
      "loss": 3.9231,
      "step": 1418240
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6770859880644383e-05,
      "loss": 3.9235,
      "step": 1418752
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.6762473933133863e-05,
      "loss": 3.9294,
      "step": 1419264
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6754087985623343e-05,
      "loss": 3.9229,
      "step": 1419776
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6745702038112823e-05,
      "loss": 3.9192,
      "step": 1420288
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6737316090602306e-05,
      "loss": 3.9245,
      "step": 1420800
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6728930143091786e-05,
      "loss": 3.9402,
      "step": 1421312
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.672054419558126e-05,
      "loss": 3.9309,
      "step": 1421824
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.671215824807074e-05,
      "loss": 3.9364,
      "step": 1422336
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6703788679363955e-05,
      "loss": 3.9178,
      "step": 1422848
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.669540273185343e-05,
      "loss": 3.9271,
      "step": 1423360
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.668701678434291e-05,
      "loss": 3.9265,
      "step": 1423872
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6678647215636127e-05,
      "loss": 3.9273,
      "step": 1424384
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6670261268125607e-05,
      "loss": 3.9387,
      "step": 1424896
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.666187532061508e-05,
      "loss": 3.9193,
      "step": 1425408
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.665348937310456e-05,
      "loss": 3.9236,
      "step": 1425920
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6645103425594044e-05,
      "loss": 3.9311,
      "step": 1426432
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6636717478083524e-05,
      "loss": 3.9331,
      "step": 1426944
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6628347909376733e-05,
      "loss": 3.9159,
      "step": 1427456
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.661997834066995e-05,
      "loss": 3.9233,
      "step": 1427968
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.661159239315943e-05,
      "loss": 3.9209,
      "step": 1428480
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6603206445648905e-05,
      "loss": 3.9216,
      "step": 1428992
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6594820498138385e-05,
      "loss": 3.9187,
      "step": 1429504
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6586434550627865e-05,
      "loss": 3.9218,
      "step": 1430016
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6578048603117345e-05,
      "loss": 3.9199,
      "step": 1430528
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6569662655606825e-05,
      "loss": 3.9279,
      "step": 1431040
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6561276708096305e-05,
      "loss": 3.9169,
      "step": 1431552
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6552890760585785e-05,
      "loss": 3.9239,
      "step": 1432064
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6544504813075265e-05,
      "loss": 3.9388,
      "step": 1432576
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6536118865564745e-05,
      "loss": 3.9355,
      "step": 1433088
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6527749296857957e-05,
      "loss": 3.9292,
      "step": 1433600
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6519363349347437e-05,
      "loss": 3.9284,
      "step": 1434112
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6510977401836917e-05,
      "loss": 3.9253,
      "step": 1434624
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6502591454326397e-05,
      "loss": 3.9232,
      "step": 1435136
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6494205506815877e-05,
      "loss": 3.9212,
      "step": 1435648
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.648583593810909e-05,
      "loss": 3.92,
      "step": 1436160
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.647744999059857e-05,
      "loss": 3.9216,
      "step": 1436672
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.646906404308805e-05,
      "loss": 3.9327,
      "step": 1437184
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.646067809557753e-05,
      "loss": 3.9243,
      "step": 1437696
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.645229214806701e-05,
      "loss": 3.9262,
      "step": 1438208
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.644393895816395e-05,
      "loss": 3.9269,
      "step": 1438720
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.643555301065343e-05,
      "loss": 3.9204,
      "step": 1439232
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.642716706314291e-05,
      "loss": 3.9172,
      "step": 1439744
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.641878111563239e-05,
      "loss": 3.9414,
      "step": 1440256
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.641039516812187e-05,
      "loss": 3.934,
      "step": 1440768
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.640200922061135e-05,
      "loss": 3.9331,
      "step": 1441280
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.639363965190456e-05,
      "loss": 3.9258,
      "step": 1441792
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6385253704394043e-05,
      "loss": 3.9257,
      "step": 1442304
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6376867756883523e-05,
      "loss": 3.9338,
      "step": 1442816
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6368481809373003e-05,
      "loss": 3.9398,
      "step": 1443328
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6360095861862483e-05,
      "loss": 3.9272,
      "step": 1443840
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6351709914351963e-05,
      "loss": 3.9231,
      "step": 1444352
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6343323966841443e-05,
      "loss": 3.9292,
      "step": 1444864
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.633493801933092e-05,
      "loss": 3.9263,
      "step": 1445376
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.63265520718204e-05,
      "loss": 3.9234,
      "step": 1445888
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.631816612430988e-05,
      "loss": 3.9261,
      "step": 1446400
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.630978017679936e-05,
      "loss": 3.932,
      "step": 1446912
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.630141060809257e-05,
      "loss": 3.9181,
      "step": 1447424
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6293041039385785e-05,
      "loss": 3.9329,
      "step": 1447936
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.6284655091875264e-05,
      "loss": 3.9293,
      "step": 1448448
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.627626914436474e-05,
      "loss": 3.927,
      "step": 1448960
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.626788319685422e-05,
      "loss": 3.9321,
      "step": 1449472
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.62594972493437e-05,
      "loss": 3.9178,
      "step": 1449984
    },
    {
      "epoch": 1.03,
      "eval_loss": 4.000810623168945,
      "eval_runtime": 303.8438,
      "eval_samples_per_second": 1255.879,
      "eval_steps_per_second": 39.247,
      "step": 1450067
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.625111130183318e-05,
      "loss": 3.9179,
      "step": 1450496
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.624272535432266e-05,
      "loss": 3.9156,
      "step": 1451008
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.623433940681214e-05,
      "loss": 3.9378,
      "step": 1451520
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.622595345930162e-05,
      "loss": 3.9199,
      "step": 1452032
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.62175675117911e-05,
      "loss": 3.9379,
      "step": 1452544
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6209181564280584e-05,
      "loss": 3.9211,
      "step": 1453056
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6200795616770064e-05,
      "loss": 3.9253,
      "step": 1453568
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6192409669259544e-05,
      "loss": 3.914,
      "step": 1454080
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6184023721749024e-05,
      "loss": 3.9309,
      "step": 1454592
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6175637774238504e-05,
      "loss": 3.9182,
      "step": 1455104
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6167251826727977e-05,
      "loss": 3.9427,
      "step": 1455616
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.615886587921746e-05,
      "loss": 3.9297,
      "step": 1456128
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6150496310510676e-05,
      "loss": 3.9109,
      "step": 1456640
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6142110363000156e-05,
      "loss": 3.9167,
      "step": 1457152
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.613372441548963e-05,
      "loss": 3.9283,
      "step": 1457664
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.612533846797911e-05,
      "loss": 3.9125,
      "step": 1458176
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.611695252046859e-05,
      "loss": 3.9259,
      "step": 1458688
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.610856657295807e-05,
      "loss": 3.923,
      "step": 1459200
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6100180625447553e-05,
      "loss": 3.9179,
      "step": 1459712
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6091794677937033e-05,
      "loss": 3.9479,
      "step": 1460224
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6083408730426513e-05,
      "loss": 3.9244,
      "step": 1460736
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.607503916171972e-05,
      "loss": 3.9274,
      "step": 1461248
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.60666532142092e-05,
      "loss": 3.9231,
      "step": 1461760
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.605826726669868e-05,
      "loss": 3.9383,
      "step": 1462272
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.604988131918816e-05,
      "loss": 3.9165,
      "step": 1462784
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6041511750481374e-05,
      "loss": 3.9198,
      "step": 1463296
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6033125802970854e-05,
      "loss": 3.9188,
      "step": 1463808
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6024739855460334e-05,
      "loss": 3.9239,
      "step": 1464320
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.6016353907949814e-05,
      "loss": 3.9202,
      "step": 1464832
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.6007967960439294e-05,
      "loss": 3.9194,
      "step": 1465344
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5999582012928774e-05,
      "loss": 3.9262,
      "step": 1465856
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5991196065418254e-05,
      "loss": 3.9241,
      "step": 1466368
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5982810117907737e-05,
      "loss": 3.9238,
      "step": 1466880
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5974456928004675e-05,
      "loss": 3.9344,
      "step": 1467392
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5966087359297884e-05,
      "loss": 3.9235,
      "step": 1467904
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5957701411787368e-05,
      "loss": 3.9265,
      "step": 1468416
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5949315464276848e-05,
      "loss": 3.9139,
      "step": 1468928
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5940929516766328e-05,
      "loss": 3.926,
      "step": 1469440
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5932543569255808e-05,
      "loss": 3.9125,
      "step": 1469952
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5924157621745288e-05,
      "loss": 3.9227,
      "step": 1470464
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5915771674234768e-05,
      "loss": 3.9196,
      "step": 1470976
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5907385726724247e-05,
      "loss": 3.9302,
      "step": 1471488
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5898999779213727e-05,
      "loss": 3.9123,
      "step": 1472000
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5890613831703207e-05,
      "loss": 3.9235,
      "step": 1472512
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.588222788419269e-05,
      "loss": 3.9293,
      "step": 1473024
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.587384193668217e-05,
      "loss": 3.9242,
      "step": 1473536
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.586547236797538e-05,
      "loss": 3.9296,
      "step": 1474048
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.585708642046486e-05,
      "loss": 3.9071,
      "step": 1474560
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.584870047295434e-05,
      "loss": 3.9139,
      "step": 1475072
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5840314525443816e-05,
      "loss": 3.929,
      "step": 1475584
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5831944956737032e-05,
      "loss": 3.9201,
      "step": 1476096
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5823559009226512e-05,
      "loss": 3.9193,
      "step": 1476608
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.581518944051972e-05,
      "loss": 3.9102,
      "step": 1477120
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.58068034930092e-05,
      "loss": 3.9238,
      "step": 1477632
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.579841754549868e-05,
      "loss": 3.9003,
      "step": 1478144
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.579003159798816e-05,
      "loss": 3.9276,
      "step": 1478656
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5781645650477638e-05,
      "loss": 3.9069,
      "step": 1479168
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5773276081770854e-05,
      "loss": 3.9231,
      "step": 1479680
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5764890134260333e-05,
      "loss": 3.9246,
      "step": 1480192
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5756504186749813e-05,
      "loss": 3.9118,
      "step": 1480704
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.574811823923929e-05,
      "loss": 3.9264,
      "step": 1481216
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.573973229172877e-05,
      "loss": 3.9151,
      "step": 1481728
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.573134634421825e-05,
      "loss": 3.91,
      "step": 1482240
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.572296039670773e-05,
      "loss": 3.9085,
      "step": 1482752
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.571459082800094e-05,
      "loss": 3.9261,
      "step": 1483264
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.570620488049042e-05,
      "loss": 3.9139,
      "step": 1483776
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.56978189329799e-05,
      "loss": 3.919,
      "step": 1484288
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.568943298546938e-05,
      "loss": 3.905,
      "step": 1484800
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5681047037958862e-05,
      "loss": 3.9111,
      "step": 1485312
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5672661090448342e-05,
      "loss": 3.9215,
      "step": 1485824
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5664275142937822e-05,
      "loss": 3.9271,
      "step": 1486336
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5655889195427302e-05,
      "loss": 3.913,
      "step": 1486848
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.564751962672051e-05,
      "loss": 3.9324,
      "step": 1487360
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5639150058013724e-05,
      "loss": 3.9264,
      "step": 1487872
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5630764110503204e-05,
      "loss": 3.9198,
      "step": 1488384
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5622394541796413e-05,
      "loss": 3.9236,
      "step": 1488896
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5614008594285893e-05,
      "loss": 3.9182,
      "step": 1489408
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5605622646775373e-05,
      "loss": 3.9198,
      "step": 1489920
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5597236699264853e-05,
      "loss": 3.9175,
      "step": 1490432
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5588850751754332e-05,
      "loss": 3.9221,
      "step": 1490944
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5580464804243816e-05,
      "loss": 3.9284,
      "step": 1491456
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5572078856733296e-05,
      "loss": 3.9322,
      "step": 1491968
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5563692909222776e-05,
      "loss": 3.9214,
      "step": 1492480
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5555306961712256e-05,
      "loss": 3.8994,
      "step": 1492992
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5546921014201736e-05,
      "loss": 3.9112,
      "step": 1493504
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5538551445494945e-05,
      "loss": 3.9162,
      "step": 1494016
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5530165497984425e-05,
      "loss": 3.9195,
      "step": 1494528
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5521779550473908e-05,
      "loss": 3.9138,
      "step": 1495040
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.5513393602963388e-05,
      "loss": 3.9216,
      "step": 1495552
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5505007655452868e-05,
      "loss": 3.9161,
      "step": 1496064
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5496621707942348e-05,
      "loss": 3.9118,
      "step": 1496576
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5488235760431828e-05,
      "loss": 3.919,
      "step": 1497088
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5479866191725037e-05,
      "loss": 3.9252,
      "step": 1497600
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5471480244214517e-05,
      "loss": 3.9218,
      "step": 1498112
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5463094296704e-05,
      "loss": 3.9349,
      "step": 1498624
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5454708349193473e-05,
      "loss": 3.9114,
      "step": 1499136
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5446322401682953e-05,
      "loss": 3.9173,
      "step": 1499648
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5437936454172433e-05,
      "loss": 3.918,
      "step": 1500160
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5429550506661913e-05,
      "loss": 3.9212,
      "step": 1500672
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5421164559151393e-05,
      "loss": 3.9295,
      "step": 1501184
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5412794990444606e-05,
      "loss": 3.9124,
      "step": 1501696
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5404409042934086e-05,
      "loss": 3.9184,
      "step": 1502208
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5396023095423566e-05,
      "loss": 3.9205,
      "step": 1502720
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5387637147913046e-05,
      "loss": 3.925,
      "step": 1503232
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5379267579206255e-05,
      "loss": 3.9057,
      "step": 1503744
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5370881631695738e-05,
      "loss": 3.917,
      "step": 1504256
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5362495684185218e-05,
      "loss": 3.9165,
      "step": 1504768
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5354109736674698e-05,
      "loss": 3.91,
      "step": 1505280
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5345723789164178e-05,
      "loss": 3.9093,
      "step": 1505792
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5337337841653658e-05,
      "loss": 3.9149,
      "step": 1506304
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5328968272946867e-05,
      "loss": 3.9178,
      "step": 1506816
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5320582325436347e-05,
      "loss": 3.915,
      "step": 1507328
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.531219637792583e-05,
      "loss": 3.9102,
      "step": 1507840
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.530381043041531e-05,
      "loss": 3.9154,
      "step": 1508352
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.529542448290479e-05,
      "loss": 3.9309,
      "step": 1508864
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.528703853539427e-05,
      "loss": 3.9266,
      "step": 1509376
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.527865258788375e-05,
      "loss": 3.9206,
      "step": 1509888
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.527026664037323e-05,
      "loss": 3.9172,
      "step": 1510400
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.526191345047017e-05,
      "loss": 3.9179,
      "step": 1510912
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.525352750295965e-05,
      "loss": 3.9188,
      "step": 1511424
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.524514155544913e-05,
      "loss": 3.9094,
      "step": 1511936
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.523675560793861e-05,
      "loss": 3.9128,
      "step": 1512448
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.522836966042809e-05,
      "loss": 3.9131,
      "step": 1512960
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.521998371291757e-05,
      "loss": 3.9249,
      "step": 1513472
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.521159776540705e-05,
      "loss": 3.9159,
      "step": 1513984
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.520321181789653e-05,
      "loss": 3.923,
      "step": 1514496
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5194842249189744e-05,
      "loss": 3.9153,
      "step": 1515008
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5186456301679224e-05,
      "loss": 3.9124,
      "step": 1515520
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5178070354168704e-05,
      "loss": 3.911,
      "step": 1516032
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5169684406658184e-05,
      "loss": 3.9314,
      "step": 1516544
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5161314837951393e-05,
      "loss": 3.9303,
      "step": 1517056
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5152928890440873e-05,
      "loss": 3.9203,
      "step": 1517568
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5144542942930356e-05,
      "loss": 3.9183,
      "step": 1518080
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5136156995419836e-05,
      "loss": 3.92,
      "step": 1518592
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.512777104790931e-05,
      "loss": 3.9214,
      "step": 1519104
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.511938510039879e-05,
      "loss": 3.9338,
      "step": 1519616
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.511099915288827e-05,
      "loss": 3.9173,
      "step": 1520128
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.510261320537775e-05,
      "loss": 3.9194,
      "step": 1520640
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.509424363667096e-05,
      "loss": 3.9157,
      "step": 1521152
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5085874067964177e-05,
      "loss": 3.9219,
      "step": 1521664
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5077488120453657e-05,
      "loss": 3.9153,
      "step": 1522176
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.506910217294313e-05,
      "loss": 3.918,
      "step": 1522688
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.506071622543261e-05,
      "loss": 3.9249,
      "step": 1523200
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5052330277922094e-05,
      "loss": 3.914,
      "step": 1523712
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.504396070921531e-05,
      "loss": 3.923,
      "step": 1524224
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5035574761704783e-05,
      "loss": 3.927,
      "step": 1524736
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5027188814194263e-05,
      "loss": 3.9162,
      "step": 1525248
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5018802866683743e-05,
      "loss": 3.9222,
      "step": 1525760
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5010416919173223e-05,
      "loss": 3.9111,
      "step": 1526272
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.997249126434326,
      "eval_runtime": 295.1895,
      "eval_samples_per_second": 1292.698,
      "eval_steps_per_second": 40.398,
      "step": 1526387
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.5002047350466435e-05,
      "loss": 3.9165,
      "step": 1526784
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.499366140295592e-05,
      "loss": 3.9074,
      "step": 1527296
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.49852754554454e-05,
      "loss": 3.9269,
      "step": 1527808
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4976889507934875e-05,
      "loss": 3.9157,
      "step": 1528320
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4968503560424355e-05,
      "loss": 3.9333,
      "step": 1528832
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4960117612913835e-05,
      "loss": 3.9084,
      "step": 1529344
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4951731665403315e-05,
      "loss": 3.9198,
      "step": 1529856
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4943345717892795e-05,
      "loss": 3.904,
      "step": 1530368
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4934976149186007e-05,
      "loss": 3.9219,
      "step": 1530880
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4926590201675487e-05,
      "loss": 3.9063,
      "step": 1531392
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4918204254164967e-05,
      "loss": 3.9328,
      "step": 1531904
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4909818306654447e-05,
      "loss": 3.9269,
      "step": 1532416
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4901448737947656e-05,
      "loss": 3.9014,
      "step": 1532928
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.489306279043714e-05,
      "loss": 3.9118,
      "step": 1533440
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.488467684292662e-05,
      "loss": 3.9175,
      "step": 1533952
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.48762908954161e-05,
      "loss": 3.9074,
      "step": 1534464
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.486790494790558e-05,
      "loss": 3.9145,
      "step": 1534976
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4859519000395056e-05,
      "loss": 3.9154,
      "step": 1535488
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4851133052884536e-05,
      "loss": 3.9121,
      "step": 1536000
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4842747105374016e-05,
      "loss": 3.9361,
      "step": 1536512
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4834361157863496e-05,
      "loss": 3.9158,
      "step": 1537024
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.482597521035298e-05,
      "loss": 3.917,
      "step": 1537536
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.481758926284246e-05,
      "loss": 3.9187,
      "step": 1538048
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.480921969413567e-05,
      "loss": 3.9257,
      "step": 1538560
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.480083374662515e-05,
      "loss": 3.9109,
      "step": 1539072
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.479244779911463e-05,
      "loss": 3.9205,
      "step": 1539584
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.478406185160411e-05,
      "loss": 3.9063,
      "step": 1540096
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.477567590409359e-05,
      "loss": 3.9135,
      "step": 1540608
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4767289956583068e-05,
      "loss": 3.9133,
      "step": 1541120
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4758904009072548e-05,
      "loss": 3.9151,
      "step": 1541632
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4750518061562028e-05,
      "loss": 3.9121,
      "step": 1542144
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4742148492855237e-05,
      "loss": 3.9204,
      "step": 1542656
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4733762545344717e-05,
      "loss": 3.9205,
      "step": 1543168
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.47253765978342e-05,
      "loss": 3.9251,
      "step": 1543680
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4717007029127413e-05,
      "loss": 3.9124,
      "step": 1544192
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.470862108161689e-05,
      "loss": 3.9222,
      "step": 1544704
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.470023513410637e-05,
      "loss": 3.9104,
      "step": 1545216
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.469184918659585e-05,
      "loss": 3.9181,
      "step": 1545728
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.468346323908533e-05,
      "loss": 3.9055,
      "step": 1546240
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4675093670378542e-05,
      "loss": 3.9135,
      "step": 1546752
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4666707722868022e-05,
      "loss": 3.9095,
      "step": 1547264
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4658321775357502e-05,
      "loss": 3.9235,
      "step": 1547776
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4649935827846982e-05,
      "loss": 3.9102,
      "step": 1548288
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4641549880336462e-05,
      "loss": 3.9121,
      "step": 1548800
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4633163932825942e-05,
      "loss": 3.9241,
      "step": 1549312
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.462477798531542e-05,
      "loss": 3.9171,
      "step": 1549824
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4616408416608634e-05,
      "loss": 3.9224,
      "step": 1550336
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4608022469098114e-05,
      "loss": 3.9042,
      "step": 1550848
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.459963652158759e-05,
      "loss": 3.903,
      "step": 1551360
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.459125057407707e-05,
      "loss": 3.9207,
      "step": 1551872
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4582881005370283e-05,
      "loss": 3.9154,
      "step": 1552384
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4574495057859763e-05,
      "loss": 3.9081,
      "step": 1552896
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4566109110349243e-05,
      "loss": 3.9057,
      "step": 1553408
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4557723162838723e-05,
      "loss": 3.9164,
      "step": 1553920
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4549337215328203e-05,
      "loss": 3.895,
      "step": 1554432
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4540951267817683e-05,
      "loss": 3.9172,
      "step": 1554944
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4532581699110892e-05,
      "loss": 3.8995,
      "step": 1555456
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4524195751600372e-05,
      "loss": 3.9113,
      "step": 1555968
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4515809804089855e-05,
      "loss": 3.9198,
      "step": 1556480
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4507423856579335e-05,
      "loss": 3.9064,
      "step": 1556992
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4499037909068815e-05,
      "loss": 3.9152,
      "step": 1557504
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4490668340362024e-05,
      "loss": 3.9099,
      "step": 1558016
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4482282392851504e-05,
      "loss": 3.9034,
      "step": 1558528
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4473896445340984e-05,
      "loss": 3.9077,
      "step": 1559040
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4465510497830464e-05,
      "loss": 3.9135,
      "step": 1559552
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4457124550319948e-05,
      "loss": 3.9068,
      "step": 1560064
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4448738602809424e-05,
      "loss": 3.9109,
      "step": 1560576
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4440369034102637e-05,
      "loss": 3.9013,
      "step": 1561088
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4431983086592117e-05,
      "loss": 3.9013,
      "step": 1561600
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4423597139081597e-05,
      "loss": 3.9148,
      "step": 1562112
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4415211191571073e-05,
      "loss": 3.921,
      "step": 1562624
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4406825244060556e-05,
      "loss": 3.9066,
      "step": 1563136
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4398439296550036e-05,
      "loss": 3.9219,
      "step": 1563648
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4390069727843245e-05,
      "loss": 3.9178,
      "step": 1564160
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4381683780332725e-05,
      "loss": 3.9109,
      "step": 1564672
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4373297832822205e-05,
      "loss": 3.9133,
      "step": 1565184
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4364911885311685e-05,
      "loss": 3.918,
      "step": 1565696
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4356525937801165e-05,
      "loss": 3.9105,
      "step": 1566208
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.434813999029065e-05,
      "loss": 3.9118,
      "step": 1566720
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.433975404278013e-05,
      "loss": 3.9167,
      "step": 1567232
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4331368095269605e-05,
      "loss": 3.9176,
      "step": 1567744
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4322998526562818e-05,
      "loss": 3.9194,
      "step": 1568256
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4314628957856027e-05,
      "loss": 3.9165,
      "step": 1568768
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.430624301034551e-05,
      "loss": 3.8984,
      "step": 1569280
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.429785706283499e-05,
      "loss": 3.8987,
      "step": 1569792
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.428947111532447e-05,
      "loss": 3.9115,
      "step": 1570304
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.428108516781395e-05,
      "loss": 3.9136,
      "step": 1570816
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.4272699220303427e-05,
      "loss": 3.9093,
      "step": 1571328
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.426432965159664e-05,
      "loss": 3.9128,
      "step": 1571840
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.425594370408612e-05,
      "loss": 3.9089,
      "step": 1572352
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4247557756575602e-05,
      "loss": 3.9062,
      "step": 1572864
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.423917180906508e-05,
      "loss": 3.912,
      "step": 1573376
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.423078586155456e-05,
      "loss": 3.9129,
      "step": 1573888
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.422239991404404e-05,
      "loss": 3.9182,
      "step": 1574400
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.421403034533725e-05,
      "loss": 3.9185,
      "step": 1574912
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4205644397826728e-05,
      "loss": 3.9148,
      "step": 1575424
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.419725845031621e-05,
      "loss": 3.9031,
      "step": 1575936
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.418887250280569e-05,
      "loss": 3.9143,
      "step": 1576448
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.418048655529517e-05,
      "loss": 3.9117,
      "step": 1576960
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.417210060778465e-05,
      "loss": 3.9218,
      "step": 1577472
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.416371466027413e-05,
      "loss": 3.9078,
      "step": 1577984
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4155328712763608e-05,
      "loss": 3.9059,
      "step": 1578496
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.414695914405682e-05,
      "loss": 3.9152,
      "step": 1579008
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4138573196546303e-05,
      "loss": 3.9173,
      "step": 1579520
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4130187249035783e-05,
      "loss": 3.9051,
      "step": 1580032
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4121817680328992e-05,
      "loss": 3.91,
      "step": 1580544
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4113431732818472e-05,
      "loss": 3.908,
      "step": 1581056
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4105045785307952e-05,
      "loss": 3.8998,
      "step": 1581568
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4096659837797432e-05,
      "loss": 3.9003,
      "step": 1582080
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4088273890286912e-05,
      "loss": 3.9133,
      "step": 1582592
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4079887942776392e-05,
      "loss": 3.9073,
      "step": 1583104
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4071518374069605e-05,
      "loss": 3.9076,
      "step": 1583616
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.406313242655908e-05,
      "loss": 3.9032,
      "step": 1584128
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.405474647904856e-05,
      "loss": 3.9078,
      "step": 1584640
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.404636053153804e-05,
      "loss": 3.9238,
      "step": 1585152
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.403797458402752e-05,
      "loss": 3.9252,
      "step": 1585664
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4029588636517004e-05,
      "loss": 3.9121,
      "step": 1586176
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4021202689006484e-05,
      "loss": 3.9132,
      "step": 1586688
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4012816741495964e-05,
      "loss": 3.9077,
      "step": 1587200
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.4004447172789174e-05,
      "loss": 3.9094,
      "step": 1587712
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3996077604082383e-05,
      "loss": 3.9038,
      "step": 1588224
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3987691656571866e-05,
      "loss": 3.9078,
      "step": 1588736
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3979305709061346e-05,
      "loss": 3.9092,
      "step": 1589248
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3970919761550826e-05,
      "loss": 3.9162,
      "step": 1589760
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3962533814040306e-05,
      "loss": 3.9079,
      "step": 1590272
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3954147866529786e-05,
      "loss": 3.9162,
      "step": 1590784
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3945761919019262e-05,
      "loss": 3.9071,
      "step": 1591296
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3937375971508742e-05,
      "loss": 3.9058,
      "step": 1591808
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3928990023998226e-05,
      "loss": 3.9025,
      "step": 1592320
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3920620455291438e-05,
      "loss": 3.9221,
      "step": 1592832
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3912234507780915e-05,
      "loss": 3.9225,
      "step": 1593344
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3903864939074127e-05,
      "loss": 3.9186,
      "step": 1593856
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3895478991563607e-05,
      "loss": 3.9051,
      "step": 1594368
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3887093044053087e-05,
      "loss": 3.9157,
      "step": 1594880
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3878707096542567e-05,
      "loss": 3.9128,
      "step": 1595392
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3870321149032047e-05,
      "loss": 3.9244,
      "step": 1595904
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3861935201521527e-05,
      "loss": 3.9087,
      "step": 1596416
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3853549254011007e-05,
      "loss": 3.9108,
      "step": 1596928
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3845163306500487e-05,
      "loss": 3.9147,
      "step": 1597440
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.383681011659743e-05,
      "loss": 3.9175,
      "step": 1597952
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3828424169086912e-05,
      "loss": 3.9055,
      "step": 1598464
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.382003822157639e-05,
      "loss": 3.9084,
      "step": 1598976
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.381165227406587e-05,
      "loss": 3.9154,
      "step": 1599488
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.380326632655535e-05,
      "loss": 3.9068,
      "step": 1600000
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3794896757848557e-05,
      "loss": 3.9146,
      "step": 1600512
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.378651081033804e-05,
      "loss": 3.9193,
      "step": 1601024
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.377812486282752e-05,
      "loss": 3.911,
      "step": 1601536
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.3769738915317e-05,
      "loss": 3.9109,
      "step": 1602048
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.376135296780648e-05,
      "loss": 3.9123,
      "step": 1602560
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.9945480823516846,
      "eval_runtime": 294.3847,
      "eval_samples_per_second": 1296.232,
      "eval_steps_per_second": 40.508,
      "step": 1602707
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.375296702029596e-05,
      "loss": 3.9016,
      "step": 1603072
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.374458107278544e-05,
      "loss": 3.9003,
      "step": 1603584
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.3736195125274917e-05,
      "loss": 3.9198,
      "step": 1604096
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.3727809177764397e-05,
      "loss": 3.9082,
      "step": 1604608
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.371942323025388e-05,
      "loss": 3.9214,
      "step": 1605120
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.371103728274336e-05,
      "loss": 3.9093,
      "step": 1605632
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.370265133523284e-05,
      "loss": 3.9117,
      "step": 1606144
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.369426538772232e-05,
      "loss": 3.8982,
      "step": 1606656
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.36858794402118e-05,
      "loss": 3.9117,
      "step": 1607168
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.3677493492701277e-05,
      "loss": 3.9036,
      "step": 1607680
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.3669107545190757e-05,
      "loss": 3.9199,
      "step": 1608192
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.3660721597680237e-05,
      "loss": 3.9188,
      "step": 1608704
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.365235202897345e-05,
      "loss": 3.8941,
      "step": 1609216
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.364396608146293e-05,
      "loss": 3.9069,
      "step": 1609728
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.363558013395241e-05,
      "loss": 3.9155,
      "step": 1610240
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.362719418644189e-05,
      "loss": 3.8981,
      "step": 1610752
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.361880823893137e-05,
      "loss": 3.9081,
      "step": 1611264
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.361042229142085e-05,
      "loss": 3.9096,
      "step": 1611776
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.360203634391033e-05,
      "loss": 3.9067,
      "step": 1612288
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.359365039639981e-05,
      "loss": 3.9252,
      "step": 1612800
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.358526444888929e-05,
      "loss": 3.9102,
      "step": 1613312
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.357687850137877e-05,
      "loss": 3.9102,
      "step": 1613824
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.356849255386825e-05,
      "loss": 3.9151,
      "step": 1614336
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.356010660635773e-05,
      "loss": 3.9133,
      "step": 1614848
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.3551737037650938e-05,
      "loss": 3.9063,
      "step": 1615360
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.354335109014042e-05,
      "loss": 3.9119,
      "step": 1615872
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.35349651426299e-05,
      "loss": 3.9028,
      "step": 1616384
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.352657919511938e-05,
      "loss": 3.9051,
      "step": 1616896
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.351819324760886e-05,
      "loss": 3.9052,
      "step": 1617408
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.350982367890207e-05,
      "loss": 3.9106,
      "step": 1617920
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.350143773139155e-05,
      "loss": 3.9058,
      "step": 1618432
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.349305178388103e-05,
      "loss": 3.912,
      "step": 1618944
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3484665836370513e-05,
      "loss": 3.9127,
      "step": 1619456
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3476296267663722e-05,
      "loss": 3.9175,
      "step": 1619968
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.346792669895693e-05,
      "loss": 3.9077,
      "step": 1620480
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.345954075144641e-05,
      "loss": 3.9123,
      "step": 1620992
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.345115480393589e-05,
      "loss": 3.9057,
      "step": 1621504
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3442768856425375e-05,
      "loss": 3.9069,
      "step": 1622016
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3434382908914855e-05,
      "loss": 3.8978,
      "step": 1622528
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3425996961404335e-05,
      "loss": 3.9052,
      "step": 1623040
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.341761101389381e-05,
      "loss": 3.905,
      "step": 1623552
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.340922506638329e-05,
      "loss": 3.915,
      "step": 1624064
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.340083911887277e-05,
      "loss": 3.9013,
      "step": 1624576
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.339245317136225e-05,
      "loss": 3.9064,
      "step": 1625088
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.338406722385173e-05,
      "loss": 3.9181,
      "step": 1625600
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3375681276341214e-05,
      "loss": 3.9101,
      "step": 1626112
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3367311707634424e-05,
      "loss": 3.9131,
      "step": 1626624
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3358925760123903e-05,
      "loss": 3.9001,
      "step": 1627136
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3350539812613383e-05,
      "loss": 3.8908,
      "step": 1627648
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3342153865102863e-05,
      "loss": 3.9178,
      "step": 1628160
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3333767917592343e-05,
      "loss": 3.9068,
      "step": 1628672
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3325414727689285e-05,
      "loss": 3.9017,
      "step": 1629184
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3317028780178765e-05,
      "loss": 3.9033,
      "step": 1629696
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3308642832668245e-05,
      "loss": 3.9116,
      "step": 1630208
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3300256885157725e-05,
      "loss": 3.8868,
      "step": 1630720
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3291870937647205e-05,
      "loss": 3.913,
      "step": 1631232
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3283484990136685e-05,
      "loss": 3.8912,
      "step": 1631744
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3275099042626168e-05,
      "loss": 3.9065,
      "step": 1632256
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3266713095115645e-05,
      "loss": 3.9085,
      "step": 1632768
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3258343526408857e-05,
      "loss": 3.9067,
      "step": 1633280
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3249973957702066e-05,
      "loss": 3.9038,
      "step": 1633792
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3241588010191546e-05,
      "loss": 3.9071,
      "step": 1634304
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.323320206268103e-05,
      "loss": 3.8919,
      "step": 1634816
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.322481611517051e-05,
      "loss": 3.901,
      "step": 1635328
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.321643016765999e-05,
      "loss": 3.906,
      "step": 1635840
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3208044220149466e-05,
      "loss": 3.9043,
      "step": 1636352
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3199658272638946e-05,
      "loss": 3.9012,
      "step": 1636864
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3191272325128426e-05,
      "loss": 3.8942,
      "step": 1637376
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.318290275642164e-05,
      "loss": 3.895,
      "step": 1637888
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.317451680891112e-05,
      "loss": 3.9047,
      "step": 1638400
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.31661308614006e-05,
      "loss": 3.9141,
      "step": 1638912
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.315776129269381e-05,
      "loss": 3.8989,
      "step": 1639424
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3149375345183287e-05,
      "loss": 3.917,
      "step": 1639936
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3140989397672767e-05,
      "loss": 3.9147,
      "step": 1640448
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3132619828965983e-05,
      "loss": 3.9072,
      "step": 1640960
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3124233881455463e-05,
      "loss": 3.9046,
      "step": 1641472
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.311584793394494e-05,
      "loss": 3.9101,
      "step": 1641984
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.310746198643442e-05,
      "loss": 3.9049,
      "step": 1642496
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.30990760389239e-05,
      "loss": 3.9042,
      "step": 1643008
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.309069009141338e-05,
      "loss": 3.91,
      "step": 1643520
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.308230414390286e-05,
      "loss": 3.9104,
      "step": 1644032
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.307391819639234e-05,
      "loss": 3.9169,
      "step": 1644544
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3065532248881823e-05,
      "loss": 3.9101,
      "step": 1645056
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.30571463013713e-05,
      "loss": 3.8921,
      "step": 1645568
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.304876035386078e-05,
      "loss": 3.8943,
      "step": 1646080
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3040390785153992e-05,
      "loss": 3.8988,
      "step": 1646592
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.303200483764347e-05,
      "loss": 3.9077,
      "step": 1647104
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3023618890132952e-05,
      "loss": 3.9041,
      "step": 1647616
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.3015232942622432e-05,
      "loss": 3.9045,
      "step": 1648128
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.3006846995111912e-05,
      "loss": 3.9005,
      "step": 1648640
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.299846104760139e-05,
      "loss": 3.9004,
      "step": 1649152
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.299007510009087e-05,
      "loss": 3.9055,
      "step": 1649664
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.298170553138408e-05,
      "loss": 3.9019,
      "step": 1650176
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.297331958387356e-05,
      "loss": 3.9119,
      "step": 1650688
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2964950015166773e-05,
      "loss": 3.9139,
      "step": 1651200
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2956564067656253e-05,
      "loss": 3.91,
      "step": 1651712
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2948178120145733e-05,
      "loss": 3.8953,
      "step": 1652224
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2939792172635213e-05,
      "loss": 3.9054,
      "step": 1652736
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2931406225124693e-05,
      "loss": 3.906,
      "step": 1653248
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2923020277614173e-05,
      "loss": 3.9173,
      "step": 1653760
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2914634330103653e-05,
      "loss": 3.9014,
      "step": 1654272
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2906248382593133e-05,
      "loss": 3.8967,
      "step": 1654784
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2897862435082613e-05,
      "loss": 3.916,
      "step": 1655296
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2889492866375825e-05,
      "loss": 3.9038,
      "step": 1655808
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2881106918865302e-05,
      "loss": 3.9008,
      "step": 1656320
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2872720971354782e-05,
      "loss": 3.9026,
      "step": 1656832
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2864351402647998e-05,
      "loss": 3.9011,
      "step": 1657344
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2855965455137474e-05,
      "loss": 3.8924,
      "step": 1657856
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2847579507626954e-05,
      "loss": 3.8905,
      "step": 1658368
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2839193560116434e-05,
      "loss": 3.9073,
      "step": 1658880
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2830807612605914e-05,
      "loss": 3.9032,
      "step": 1659392
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2822438043899123e-05,
      "loss": 3.8999,
      "step": 1659904
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2814052096388607e-05,
      "loss": 3.8963,
      "step": 1660416
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2805666148878086e-05,
      "loss": 3.8967,
      "step": 1660928
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2797280201367566e-05,
      "loss": 3.9171,
      "step": 1661440
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2788894253857046e-05,
      "loss": 3.9163,
      "step": 1661952
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2780508306346526e-05,
      "loss": 3.9106,
      "step": 1662464
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2772122358836006e-05,
      "loss": 3.9063,
      "step": 1662976
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2763752790129215e-05,
      "loss": 3.8967,
      "step": 1663488
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.27553668426187e-05,
      "loss": 3.9011,
      "step": 1664000
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.274698089510818e-05,
      "loss": 3.8991,
      "step": 1664512
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2738594947597655e-05,
      "loss": 3.9004,
      "step": 1665024
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2730209000087135e-05,
      "loss": 3.901,
      "step": 1665536
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2721823052576615e-05,
      "loss": 3.9068,
      "step": 1666048
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2713437105066095e-05,
      "loss": 3.9042,
      "step": 1666560
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2705067536359308e-05,
      "loss": 3.9047,
      "step": 1667072
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2696681588848788e-05,
      "loss": 3.9014,
      "step": 1667584
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2688295641338268e-05,
      "loss": 3.9008,
      "step": 1668096
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2679909693827748e-05,
      "loss": 3.8992,
      "step": 1668608
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2671540125120957e-05,
      "loss": 3.9104,
      "step": 1669120
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.266317055641417e-05,
      "loss": 3.9162,
      "step": 1669632
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2654784608903652e-05,
      "loss": 3.9128,
      "step": 1670144
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.264639866139313e-05,
      "loss": 3.9002,
      "step": 1670656
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.263801271388261e-05,
      "loss": 3.9121,
      "step": 1671168
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.262962676637209e-05,
      "loss": 3.9043,
      "step": 1671680
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.262124081886157e-05,
      "loss": 3.9164,
      "step": 1672192
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.261285487135105e-05,
      "loss": 3.9062,
      "step": 1672704
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.260446892384053e-05,
      "loss": 3.9034,
      "step": 1673216
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.259609935513374e-05,
      "loss": 3.9045,
      "step": 1673728
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.258771340762322e-05,
      "loss": 3.9102,
      "step": 1674240
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.25793274601127e-05,
      "loss": 3.9025,
      "step": 1674752
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.257094151260218e-05,
      "loss": 3.902,
      "step": 1675264
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.256255556509166e-05,
      "loss": 3.9016,
      "step": 1675776
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2554169617581138e-05,
      "loss": 3.9079,
      "step": 1676288
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.254578367007062e-05,
      "loss": 3.9078,
      "step": 1676800
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.25373977225601e-05,
      "loss": 3.9073,
      "step": 1677312
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.252902815385331e-05,
      "loss": 3.9095,
      "step": 1677824
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.252064220634279e-05,
      "loss": 3.9022,
      "step": 1678336
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.2512272637636002e-05,
      "loss": 3.9072,
      "step": 1678848
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.9921131134033203,
      "eval_runtime": 315.453,
      "eval_samples_per_second": 1209.66,
      "eval_steps_per_second": 37.803,
      "step": 1679027
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2503886690125482e-05,
      "loss": 3.9052,
      "step": 1679360
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2495500742614962e-05,
      "loss": 3.8935,
      "step": 1679872
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2487114795104442e-05,
      "loss": 3.91,
      "step": 1680384
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2478728847593922e-05,
      "loss": 3.9068,
      "step": 1680896
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2470342900083402e-05,
      "loss": 3.9168,
      "step": 1681408
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2461956952572882e-05,
      "loss": 3.9005,
      "step": 1681920
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2453571005062362e-05,
      "loss": 3.8988,
      "step": 1682432
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2445185057551842e-05,
      "loss": 3.8995,
      "step": 1682944
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2436815488845055e-05,
      "loss": 3.8999,
      "step": 1683456
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2428429541334535e-05,
      "loss": 3.8987,
      "step": 1683968
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2420043593824015e-05,
      "loss": 3.9132,
      "step": 1684480
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.241165764631349e-05,
      "loss": 3.9125,
      "step": 1684992
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2403304456410433e-05,
      "loss": 3.8913,
      "step": 1685504
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2394918508899916e-05,
      "loss": 3.8984,
      "step": 1686016
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2386532561389396e-05,
      "loss": 3.9083,
      "step": 1686528
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2378146613878876e-05,
      "loss": 3.8903,
      "step": 1687040
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2369760666368356e-05,
      "loss": 3.9,
      "step": 1687552
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2361374718857836e-05,
      "loss": 3.9004,
      "step": 1688064
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2352988771347316e-05,
      "loss": 3.8998,
      "step": 1688576
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2344602823836792e-05,
      "loss": 3.9183,
      "step": 1689088
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2336216876326276e-05,
      "loss": 3.9047,
      "step": 1689600
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2327830928815756e-05,
      "loss": 3.907,
      "step": 1690112
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2319444981305236e-05,
      "loss": 3.905,
      "step": 1690624
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2311059033794716e-05,
      "loss": 3.9074,
      "step": 1691136
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2302689465087925e-05,
      "loss": 3.9013,
      "step": 1691648
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2294319896381137e-05,
      "loss": 3.9038,
      "step": 1692160
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2285933948870617e-05,
      "loss": 3.8987,
      "step": 1692672
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.227756438016383e-05,
      "loss": 3.893,
      "step": 1693184
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.226917843265331e-05,
      "loss": 3.8988,
      "step": 1693696
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.226079248514279e-05,
      "loss": 3.9015,
      "step": 1694208
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2252406537632266e-05,
      "loss": 3.905,
      "step": 1694720
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2244020590121746e-05,
      "loss": 3.9033,
      "step": 1695232
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.223563464261123e-05,
      "loss": 3.909,
      "step": 1695744
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.222724869510071e-05,
      "loss": 3.9104,
      "step": 1696256
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.221887912639392e-05,
      "loss": 3.8998,
      "step": 1696768
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.22104931788834e-05,
      "loss": 3.9031,
      "step": 1697280
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.220210723137288e-05,
      "loss": 3.9029,
      "step": 1697792
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.219372128386236e-05,
      "loss": 3.9004,
      "step": 1698304
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2185335336351838e-05,
      "loss": 3.892,
      "step": 1698816
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2176949388841318e-05,
      "loss": 3.8975,
      "step": 1699328
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2168563441330798e-05,
      "loss": 3.8994,
      "step": 1699840
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2160177493820278e-05,
      "loss": 3.9075,
      "step": 1700352
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2151791546309758e-05,
      "loss": 3.8958,
      "step": 1700864
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2143421977602967e-05,
      "loss": 3.9001,
      "step": 1701376
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2135036030092447e-05,
      "loss": 3.9156,
      "step": 1701888
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.212665008258193e-05,
      "loss": 3.9005,
      "step": 1702400
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.211826413507141e-05,
      "loss": 3.9058,
      "step": 1702912
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.210987818756089e-05,
      "loss": 3.8942,
      "step": 1703424
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.210149224005037e-05,
      "loss": 3.8854,
      "step": 1703936
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.209310629253985e-05,
      "loss": 3.911,
      "step": 1704448
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2084720345029327e-05,
      "loss": 3.8994,
      "step": 1704960
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2076334397518807e-05,
      "loss": 3.8927,
      "step": 1705472
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2067948450008287e-05,
      "loss": 3.8954,
      "step": 1705984
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.205956250249777e-05,
      "loss": 3.9074,
      "step": 1706496
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2051209312594712e-05,
      "loss": 3.8796,
      "step": 1707008
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2042823365084192e-05,
      "loss": 3.8975,
      "step": 1707520
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.203443741757367e-05,
      "loss": 3.8925,
      "step": 1708032
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2026051470063148e-05,
      "loss": 3.8945,
      "step": 1708544
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.201766552255263e-05,
      "loss": 3.91,
      "step": 1709056
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.200927957504211e-05,
      "loss": 3.8965,
      "step": 1709568
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.200089362753159e-05,
      "loss": 3.8943,
      "step": 1710080
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.199250768002107e-05,
      "loss": 3.9037,
      "step": 1710592
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.198413811131428e-05,
      "loss": 3.8863,
      "step": 1711104
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.197575216380376e-05,
      "loss": 3.8912,
      "step": 1711616
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.196736621629324e-05,
      "loss": 3.8975,
      "step": 1712128
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1958980268782724e-05,
      "loss": 3.8967,
      "step": 1712640
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1950610700075933e-05,
      "loss": 3.8991,
      "step": 1713152
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1942224752565413e-05,
      "loss": 3.8894,
      "step": 1713664
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1933838805054893e-05,
      "loss": 3.8829,
      "step": 1714176
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1925452857544373e-05,
      "loss": 3.8947,
      "step": 1714688
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1917066910033853e-05,
      "loss": 3.9107,
      "step": 1715200
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1908697341327065e-05,
      "loss": 3.8936,
      "step": 1715712
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1900311393816545e-05,
      "loss": 3.9124,
      "step": 1716224
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1891925446306025e-05,
      "loss": 3.9084,
      "step": 1716736
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1883539498795505e-05,
      "loss": 3.9017,
      "step": 1717248
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1875169930088714e-05,
      "loss": 3.8906,
      "step": 1717760
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1866783982578194e-05,
      "loss": 3.9083,
      "step": 1718272
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1858398035067677e-05,
      "loss": 3.8962,
      "step": 1718784
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1850012087557157e-05,
      "loss": 3.8949,
      "step": 1719296
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1841642518850367e-05,
      "loss": 3.9043,
      "step": 1719808
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1833272950143576e-05,
      "loss": 3.902,
      "step": 1720320
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1824887002633056e-05,
      "loss": 3.9106,
      "step": 1720832
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.181650105512254e-05,
      "loss": 3.9012,
      "step": 1721344
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.180811510761202e-05,
      "loss": 3.8849,
      "step": 1721856
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1799745538905228e-05,
      "loss": 3.8852,
      "step": 1722368
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1791359591394708e-05,
      "loss": 3.8936,
      "step": 1722880
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1782973643884188e-05,
      "loss": 3.9017,
      "step": 1723392
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1774587696373668e-05,
      "loss": 3.8969,
      "step": 1723904
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1766201748863148e-05,
      "loss": 3.8963,
      "step": 1724416
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.175781580135263e-05,
      "loss": 3.8987,
      "step": 1724928
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1749429853842108e-05,
      "loss": 3.8908,
      "step": 1725440
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1741043906331588e-05,
      "loss": 3.8973,
      "step": 1725952
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1732657958821068e-05,
      "loss": 3.8929,
      "step": 1726464
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1724272011310548e-05,
      "loss": 3.9122,
      "step": 1726976
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1715886063800028e-05,
      "loss": 3.9058,
      "step": 1727488
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1707500116289507e-05,
      "loss": 3.902,
      "step": 1728000
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.169913054758272e-05,
      "loss": 3.8921,
      "step": 1728512
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.16907446000722e-05,
      "loss": 3.8991,
      "step": 1729024
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.168235865256168e-05,
      "loss": 3.8951,
      "step": 1729536
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.167397270505116e-05,
      "loss": 3.9111,
      "step": 1730048
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.166560313634437e-05,
      "loss": 3.896,
      "step": 1730560
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.165721718883385e-05,
      "loss": 3.8912,
      "step": 1731072
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1648831241323332e-05,
      "loss": 3.9044,
      "step": 1731584
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.164044529381281e-05,
      "loss": 3.9001,
      "step": 1732096
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.163207572510602e-05,
      "loss": 3.8925,
      "step": 1732608
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.162370615639923e-05,
      "loss": 3.8985,
      "step": 1733120
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.161532020888871e-05,
      "loss": 3.8905,
      "step": 1733632
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1606934261378194e-05,
      "loss": 3.8885,
      "step": 1734144
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1598548313867674e-05,
      "loss": 3.8827,
      "step": 1734656
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1590162366357154e-05,
      "loss": 3.9019,
      "step": 1735168
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1581776418846634e-05,
      "loss": 3.8941,
      "step": 1735680
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.157339047133611e-05,
      "loss": 3.8942,
      "step": 1736192
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.156500452382559e-05,
      "loss": 3.8935,
      "step": 1736704
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.155661857631507e-05,
      "loss": 3.8891,
      "step": 1737216
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1548249007608283e-05,
      "loss": 3.9076,
      "step": 1737728
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1539863060097762e-05,
      "loss": 3.9103,
      "step": 1738240
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1531477112587242e-05,
      "loss": 3.9053,
      "step": 1738752
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1523091165076722e-05,
      "loss": 3.8977,
      "step": 1739264
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.151472159636993e-05,
      "loss": 3.8921,
      "step": 1739776
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1506352027663147e-05,
      "loss": 3.8961,
      "step": 1740288
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1497966080152627e-05,
      "loss": 3.8884,
      "step": 1740800
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1489580132642107e-05,
      "loss": 3.8963,
      "step": 1741312
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1481194185131584e-05,
      "loss": 3.8951,
      "step": 1741824
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1472808237621064e-05,
      "loss": 3.8977,
      "step": 1742336
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1464438668914276e-05,
      "loss": 3.9028,
      "step": 1742848
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1456052721403756e-05,
      "loss": 3.8934,
      "step": 1743360
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1447666773893236e-05,
      "loss": 3.9011,
      "step": 1743872
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1439280826382716e-05,
      "loss": 3.892,
      "step": 1744384
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1430894878872196e-05,
      "loss": 3.8948,
      "step": 1744896
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1422525310165405e-05,
      "loss": 3.8983,
      "step": 1745408
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1414139362654885e-05,
      "loss": 3.9109,
      "step": 1745920
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1405753415144365e-05,
      "loss": 3.9061,
      "step": 1746432
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.139736746763385e-05,
      "loss": 3.8956,
      "step": 1746944
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.138898152012333e-05,
      "loss": 3.9039,
      "step": 1747456
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.138059557261281e-05,
      "loss": 3.9005,
      "step": 1747968
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.137220962510229e-05,
      "loss": 3.9082,
      "step": 1748480
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1363823677591765e-05,
      "loss": 3.9008,
      "step": 1748992
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1355454108884977e-05,
      "loss": 3.9016,
      "step": 1749504
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1347068161374457e-05,
      "loss": 3.8958,
      "step": 1750016
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1338682213863937e-05,
      "loss": 3.9025,
      "step": 1750528
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1330296266353417e-05,
      "loss": 3.8937,
      "step": 1751040
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1321910318842897e-05,
      "loss": 3.8958,
      "step": 1751552
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.131354075013611e-05,
      "loss": 3.8988,
      "step": 1752064
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1305154802625586e-05,
      "loss": 3.8993,
      "step": 1752576
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.129676885511507e-05,
      "loss": 3.898,
      "step": 1753088
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.128838290760455e-05,
      "loss": 3.8987,
      "step": 1753600
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.1280013338897762e-05,
      "loss": 3.9061,
      "step": 1754112
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.127162739138724e-05,
      "loss": 3.8938,
      "step": 1754624
    },
    {
      "epoch": 1.02,
      "learning_rate": 2.126324144387672e-05,
      "loss": 3.9062,
      "step": 1755136
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.9889142513275146,
      "eval_runtime": 292.5433,
      "eval_samples_per_second": 1304.391,
      "eval_steps_per_second": 40.763,
      "step": 1755347
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.12548554963662e-05,
      "loss": 3.893,
      "step": 1755648
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.124646954885568e-05,
      "loss": 3.8852,
      "step": 1756160
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.123808360134516e-05,
      "loss": 3.9032,
      "step": 1756672
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1229697653834642e-05,
      "loss": 3.8984,
      "step": 1757184
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.122131170632412e-05,
      "loss": 3.9059,
      "step": 1757696
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1212925758813598e-05,
      "loss": 3.8942,
      "step": 1758208
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1204539811303078e-05,
      "loss": 3.8933,
      "step": 1758720
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1196153863792558e-05,
      "loss": 3.893,
      "step": 1759232
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1187767916282038e-05,
      "loss": 3.8976,
      "step": 1759744
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1179381968771518e-05,
      "loss": 3.8893,
      "step": 1760256
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1170996021261e-05,
      "loss": 3.9093,
      "step": 1760768
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1162610073750478e-05,
      "loss": 3.9058,
      "step": 1761280
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.115424050504369e-05,
      "loss": 3.8882,
      "step": 1761792
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.114585455753317e-05,
      "loss": 3.8892,
      "step": 1762304
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.113746861002265e-05,
      "loss": 3.9012,
      "step": 1762816
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1129082662512127e-05,
      "loss": 3.8856,
      "step": 1763328
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.112069671500161e-05,
      "loss": 3.8933,
      "step": 1763840
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.111231076749109e-05,
      "loss": 3.8908,
      "step": 1764352
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.110392481998057e-05,
      "loss": 3.8984,
      "step": 1764864
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.109553887247005e-05,
      "loss": 3.9105,
      "step": 1765376
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.108715292495953e-05,
      "loss": 3.9029,
      "step": 1765888
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1078766977449007e-05,
      "loss": 3.8994,
      "step": 1766400
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1070381029938487e-05,
      "loss": 3.8964,
      "step": 1766912
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1062011461231703e-05,
      "loss": 3.9007,
      "step": 1767424
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1053625513721183e-05,
      "loss": 3.8907,
      "step": 1767936
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.104523956621066e-05,
      "loss": 3.9006,
      "step": 1768448
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.103685361870014e-05,
      "loss": 3.8902,
      "step": 1768960
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.102848404999335e-05,
      "loss": 3.8867,
      "step": 1769472
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.1020098102482828e-05,
      "loss": 3.8948,
      "step": 1769984
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.101171215497231e-05,
      "loss": 3.8989,
      "step": 1770496
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.100332620746179e-05,
      "loss": 3.894,
      "step": 1771008
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.099494025995127e-05,
      "loss": 3.8983,
      "step": 1771520
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.098655431244075e-05,
      "loss": 3.9016,
      "step": 1772032
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.097816836493023e-05,
      "loss": 3.9059,
      "step": 1772544
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0969815175027173e-05,
      "loss": 3.8896,
      "step": 1773056
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0961429227516656e-05,
      "loss": 3.8984,
      "step": 1773568
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0953043280006133e-05,
      "loss": 3.901,
      "step": 1774080
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0944657332495613e-05,
      "loss": 3.89,
      "step": 1774592
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0936271384985093e-05,
      "loss": 3.8847,
      "step": 1775104
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0927885437474573e-05,
      "loss": 3.8902,
      "step": 1775616
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0919499489964053e-05,
      "loss": 3.8944,
      "step": 1776128
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0911113542453533e-05,
      "loss": 3.9019,
      "step": 1776640
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0902727594943013e-05,
      "loss": 3.8872,
      "step": 1777152
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0894341647432492e-05,
      "loss": 3.8908,
      "step": 1777664
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0885988457529434e-05,
      "loss": 3.9104,
      "step": 1778176
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0877602510018914e-05,
      "loss": 3.8973,
      "step": 1778688
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0869216562508394e-05,
      "loss": 3.9032,
      "step": 1779200
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0860830614997874e-05,
      "loss": 3.8831,
      "step": 1779712
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0852444667487357e-05,
      "loss": 3.8778,
      "step": 1780224
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0844058719976837e-05,
      "loss": 3.9076,
      "step": 1780736
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0835689151270046e-05,
      "loss": 3.8919,
      "step": 1781248
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0827303203759526e-05,
      "loss": 3.8878,
      "step": 1781760
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0818917256249006e-05,
      "loss": 3.8927,
      "step": 1782272
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0810531308738486e-05,
      "loss": 3.8937,
      "step": 1782784
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0802145361227966e-05,
      "loss": 3.8794,
      "step": 1783296
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0793759413717446e-05,
      "loss": 3.8879,
      "step": 1783808
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0785373466206926e-05,
      "loss": 3.8863,
      "step": 1784320
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0776987518696406e-05,
      "loss": 3.8882,
      "step": 1784832
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0768601571185886e-05,
      "loss": 3.9019,
      "step": 1785344
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0760215623675366e-05,
      "loss": 3.8904,
      "step": 1785856
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0751829676164843e-05,
      "loss": 3.8867,
      "step": 1786368
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0743443728654326e-05,
      "loss": 3.9003,
      "step": 1786880
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.073507415994754e-05,
      "loss": 3.8794,
      "step": 1787392
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.072668821243702e-05,
      "loss": 3.8892,
      "step": 1787904
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0718302264926495e-05,
      "loss": 3.8904,
      "step": 1788416
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0709916317415975e-05,
      "loss": 3.8897,
      "step": 1788928
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.070156312751292e-05,
      "loss": 3.8913,
      "step": 1789440
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.06931771800024e-05,
      "loss": 3.8905,
      "step": 1789952
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.068479123249188e-05,
      "loss": 3.8736,
      "step": 1790464
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.067640528498136e-05,
      "loss": 3.8872,
      "step": 1790976
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.066801933747084e-05,
      "loss": 3.9043,
      "step": 1791488
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0659633389960316e-05,
      "loss": 3.8828,
      "step": 1792000
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0651247442449796e-05,
      "loss": 3.909,
      "step": 1792512
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.064286149493928e-05,
      "loss": 3.8982,
      "step": 1793024
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0634491926232492e-05,
      "loss": 3.8993,
      "step": 1793536
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.062610597872197e-05,
      "loss": 3.8841,
      "step": 1794048
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.061773641001518e-05,
      "loss": 3.8978,
      "step": 1794560
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.060935046250466e-05,
      "loss": 3.8977,
      "step": 1795072
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.060096451499414e-05,
      "loss": 3.8826,
      "step": 1795584
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.059257856748362e-05,
      "loss": 3.9026,
      "step": 1796096
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0584208998776833e-05,
      "loss": 3.897,
      "step": 1796608
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0575823051266313e-05,
      "loss": 3.9013,
      "step": 1797120
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.056743710375579e-05,
      "loss": 3.8982,
      "step": 1797632
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.055905115624527e-05,
      "loss": 3.8781,
      "step": 1798144
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.055066520873475e-05,
      "loss": 3.8791,
      "step": 1798656
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0542295640027966e-05,
      "loss": 3.8831,
      "step": 1799168
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0533909692517442e-05,
      "loss": 3.8962,
      "step": 1799680
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0525523745006922e-05,
      "loss": 3.8925,
      "step": 1800192
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0517137797496402e-05,
      "loss": 3.8897,
      "step": 1800704
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0508751849985882e-05,
      "loss": 3.8909,
      "step": 1801216
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0500365902475362e-05,
      "loss": 3.8884,
      "step": 1801728
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0491996333768575e-05,
      "loss": 3.8884,
      "step": 1802240
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0483610386258055e-05,
      "loss": 3.8891,
      "step": 1802752
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0475224438747535e-05,
      "loss": 3.9036,
      "step": 1803264
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0466838491237014e-05,
      "loss": 3.9062,
      "step": 1803776
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0458468922530224e-05,
      "loss": 3.8904,
      "step": 1804288
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0450082975019704e-05,
      "loss": 3.8863,
      "step": 1804800
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0441697027509183e-05,
      "loss": 3.8913,
      "step": 1805312
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0433311079998667e-05,
      "loss": 3.8906,
      "step": 1805824
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0424925132488143e-05,
      "loss": 3.9048,
      "step": 1806336
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0416555563781356e-05,
      "loss": 3.8902,
      "step": 1806848
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0408169616270836e-05,
      "loss": 3.8884,
      "step": 1807360
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0399783668760316e-05,
      "loss": 3.897,
      "step": 1807872
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0391397721249796e-05,
      "loss": 3.8964,
      "step": 1808384
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0383011773739276e-05,
      "loss": 3.8816,
      "step": 1808896
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0374625826228756e-05,
      "loss": 3.892,
      "step": 1809408
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0366256257521968e-05,
      "loss": 3.8884,
      "step": 1809920
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0357870310011445e-05,
      "loss": 3.8797,
      "step": 1810432
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0349484362500925e-05,
      "loss": 3.884,
      "step": 1810944
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0341098414990405e-05,
      "loss": 3.8925,
      "step": 1811456
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0332712467479888e-05,
      "loss": 3.8911,
      "step": 1811968
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0324326519969368e-05,
      "loss": 3.8847,
      "step": 1812480
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0315940572458848e-05,
      "loss": 3.8875,
      "step": 1812992
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0307554624948324e-05,
      "loss": 3.889,
      "step": 1813504
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0299185056241537e-05,
      "loss": 3.8977,
      "step": 1814016
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0290799108731017e-05,
      "loss": 3.9062,
      "step": 1814528
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0282413161220497e-05,
      "loss": 3.8987,
      "step": 1815040
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0274027213709977e-05,
      "loss": 3.8907,
      "step": 1815552
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.026565764500319e-05,
      "loss": 3.8858,
      "step": 1816064
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.025727169749267e-05,
      "loss": 3.8953,
      "step": 1816576
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.024888574998215e-05,
      "loss": 3.8806,
      "step": 1817088
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0240499802471626e-05,
      "loss": 3.8926,
      "step": 1817600
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.023213023376484e-05,
      "loss": 3.8915,
      "step": 1818112
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.022374428625432e-05,
      "loss": 3.8891,
      "step": 1818624
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0215358338743798e-05,
      "loss": 3.8936,
      "step": 1819136
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0206972391233278e-05,
      "loss": 3.8914,
      "step": 1819648
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.019860282252649e-05,
      "loss": 3.8976,
      "step": 1820160
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.019021687501597e-05,
      "loss": 3.8884,
      "step": 1820672
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.018183092750545e-05,
      "loss": 3.8874,
      "step": 1821184
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.017344497999493e-05,
      "loss": 3.8943,
      "step": 1821696
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0165075411288143e-05,
      "loss": 3.8994,
      "step": 1822208
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0156689463777623e-05,
      "loss": 3.904,
      "step": 1822720
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.01483035162671e-05,
      "loss": 3.8887,
      "step": 1823232
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.013991756875658e-05,
      "loss": 3.8973,
      "step": 1823744
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0131548000049792e-05,
      "loss": 3.8958,
      "step": 1824256
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0123178431343004e-05,
      "loss": 3.9003,
      "step": 1824768
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0114792483832484e-05,
      "loss": 3.8912,
      "step": 1825280
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0106406536321964e-05,
      "loss": 3.8953,
      "step": 1825792
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0098020588811444e-05,
      "loss": 3.8887,
      "step": 1826304
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.008963464130092e-05,
      "loss": 3.9024,
      "step": 1826816
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0081248693790404e-05,
      "loss": 3.8842,
      "step": 1827328
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0072862746279884e-05,
      "loss": 3.8925,
      "step": 1827840
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0064476798769364e-05,
      "loss": 3.8931,
      "step": 1828352
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0056107230062573e-05,
      "loss": 3.8954,
      "step": 1828864
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0047737661355786e-05,
      "loss": 3.89,
      "step": 1829376
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0039351713845266e-05,
      "loss": 3.8923,
      "step": 1829888
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0030965766334746e-05,
      "loss": 3.9016,
      "step": 1830400
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0022579818824226e-05,
      "loss": 3.8859,
      "step": 1830912
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0014193871313705e-05,
      "loss": 3.9022,
      "step": 1831424
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.9869699478149414,
      "eval_runtime": 297.3932,
      "eval_samples_per_second": 1283.119,
      "eval_steps_per_second": 40.098,
      "step": 1831667
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.0005807923803185e-05,
      "loss": 3.8848,
      "step": 1831936
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9997421976292665e-05,
      "loss": 3.8821,
      "step": 1832448
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9989036028782145e-05,
      "loss": 3.8934,
      "step": 1832960
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9980650081271625e-05,
      "loss": 3.8934,
      "step": 1833472
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9972264133761105e-05,
      "loss": 3.9046,
      "step": 1833984
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9963878186250585e-05,
      "loss": 3.8868,
      "step": 1834496
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9955492238740065e-05,
      "loss": 3.8925,
      "step": 1835008
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9947106291229545e-05,
      "loss": 3.8887,
      "step": 1835520
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9938720343719025e-05,
      "loss": 3.8875,
      "step": 1836032
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9930334396208505e-05,
      "loss": 3.8885,
      "step": 1836544
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9921948448697985e-05,
      "loss": 3.8973,
      "step": 1837056
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.991356250118746e-05,
      "loss": 3.8987,
      "step": 1837568
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9905192932480677e-05,
      "loss": 3.8836,
      "step": 1838080
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9896806984970157e-05,
      "loss": 3.8808,
      "step": 1838592
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9888421037459634e-05,
      "loss": 3.8979,
      "step": 1839104
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9880035089949114e-05,
      "loss": 3.879,
      "step": 1839616
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9871649142438594e-05,
      "loss": 3.8913,
      "step": 1840128
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9863263194928074e-05,
      "loss": 3.8868,
      "step": 1840640
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9854877247417554e-05,
      "loss": 3.8927,
      "step": 1841152
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9846491299907037e-05,
      "loss": 3.899,
      "step": 1841664
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9838105352396517e-05,
      "loss": 3.897,
      "step": 1842176
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9829719404885994e-05,
      "loss": 3.8942,
      "step": 1842688
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9821333457375474e-05,
      "loss": 3.8902,
      "step": 1843200
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9812963888668686e-05,
      "loss": 3.8908,
      "step": 1843712
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9804577941158166e-05,
      "loss": 3.8891,
      "step": 1844224
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9796191993647646e-05,
      "loss": 3.8969,
      "step": 1844736
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9787806046137126e-05,
      "loss": 3.8835,
      "step": 1845248
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9779420098626606e-05,
      "loss": 3.882,
      "step": 1845760
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9771034151116086e-05,
      "loss": 3.8927,
      "step": 1846272
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.9762648203605566e-05,
      "loss": 3.889,
      "step": 1846784
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9754262256095046e-05,
      "loss": 3.887,
      "step": 1847296
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9745892687388258e-05,
      "loss": 3.8961,
      "step": 1847808
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9737506739877738e-05,
      "loss": 3.8979,
      "step": 1848320
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9729120792367218e-05,
      "loss": 3.8927,
      "step": 1848832
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9720751223660427e-05,
      "loss": 3.886,
      "step": 1849344
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9712365276149907e-05,
      "loss": 3.8972,
      "step": 1849856
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9703979328639387e-05,
      "loss": 3.8943,
      "step": 1850368
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9695593381128867e-05,
      "loss": 3.8848,
      "step": 1850880
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9687207433618347e-05,
      "loss": 3.8806,
      "step": 1851392
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9678821486107827e-05,
      "loss": 3.8831,
      "step": 1851904
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9670435538597307e-05,
      "loss": 3.8863,
      "step": 1852416
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9662049591086787e-05,
      "loss": 3.8981,
      "step": 1852928
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9653663643576267e-05,
      "loss": 3.8807,
      "step": 1853440
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9645277696065747e-05,
      "loss": 3.8853,
      "step": 1853952
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.963690812735896e-05,
      "loss": 3.9043,
      "step": 1854464
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.962852217984844e-05,
      "loss": 3.8867,
      "step": 1854976
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.962013623233792e-05,
      "loss": 3.8999,
      "step": 1855488
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.96117502848274e-05,
      "loss": 3.8826,
      "step": 1856000
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.960338071612061e-05,
      "loss": 3.8637,
      "step": 1856512
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9594994768610088e-05,
      "loss": 3.9038,
      "step": 1857024
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.95866251999033e-05,
      "loss": 3.8897,
      "step": 1857536
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.957823925239278e-05,
      "loss": 3.8846,
      "step": 1858048
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.956985330488226e-05,
      "loss": 3.887,
      "step": 1858560
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.956146735737174e-05,
      "loss": 3.8848,
      "step": 1859072
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.955308140986122e-05,
      "loss": 3.8769,
      "step": 1859584
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.95446954623507e-05,
      "loss": 3.8803,
      "step": 1860096
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9536309514840177e-05,
      "loss": 3.8822,
      "step": 1860608
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9527939946133393e-05,
      "loss": 3.8835,
      "step": 1861120
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9519553998622873e-05,
      "loss": 3.8959,
      "step": 1861632
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9511168051112353e-05,
      "loss": 3.8857,
      "step": 1862144
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.950278210360183e-05,
      "loss": 3.8841,
      "step": 1862656
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.949439615609131e-05,
      "loss": 3.8971,
      "step": 1863168
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.948601020858079e-05,
      "loss": 3.8708,
      "step": 1863680
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.947762426107027e-05,
      "loss": 3.8823,
      "step": 1864192
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9469254692363482e-05,
      "loss": 3.8834,
      "step": 1864704
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9460868744852962e-05,
      "loss": 3.8871,
      "step": 1865216
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9452482797342442e-05,
      "loss": 3.885,
      "step": 1865728
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.944409684983192e-05,
      "loss": 3.8866,
      "step": 1866240
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.94357109023214e-05,
      "loss": 3.8624,
      "step": 1866752
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.942732495481088e-05,
      "loss": 3.8847,
      "step": 1867264
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9418955386104094e-05,
      "loss": 3.8959,
      "step": 1867776
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9410569438593574e-05,
      "loss": 3.8792,
      "step": 1868288
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9402183491083054e-05,
      "loss": 3.9008,
      "step": 1868800
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9393797543572534e-05,
      "loss": 3.8929,
      "step": 1869312
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.938541159606201e-05,
      "loss": 3.9002,
      "step": 1869824
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9377042027355223e-05,
      "loss": 3.8753,
      "step": 1870336
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9368656079844706e-05,
      "loss": 3.8922,
      "step": 1870848
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9360270132334183e-05,
      "loss": 3.8915,
      "step": 1871360
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9351884184823663e-05,
      "loss": 3.8776,
      "step": 1871872
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9343498237313143e-05,
      "loss": 3.8982,
      "step": 1872384
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9335112289802623e-05,
      "loss": 3.8863,
      "step": 1872896
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9326726342292103e-05,
      "loss": 3.8972,
      "step": 1873408
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9318356773585315e-05,
      "loss": 3.8941,
      "step": 1873920
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9309970826074795e-05,
      "loss": 3.8706,
      "step": 1874432
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9301584878564275e-05,
      "loss": 3.8766,
      "step": 1874944
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9293198931053755e-05,
      "loss": 3.881,
      "step": 1875456
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.9284812983543235e-05,
      "loss": 3.8906,
      "step": 1875968
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.927642703603271e-05,
      "loss": 3.8847,
      "step": 1876480
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.926804108852219e-05,
      "loss": 3.8823,
      "step": 1876992
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9259655141011675e-05,
      "loss": 3.8826,
      "step": 1877504
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9251269193501155e-05,
      "loss": 3.8853,
      "step": 1878016
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9242899624794364e-05,
      "loss": 3.8839,
      "step": 1878528
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9234530056087576e-05,
      "loss": 3.8793,
      "step": 1879040
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9226144108577056e-05,
      "loss": 3.9021,
      "step": 1879552
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9217758161066536e-05,
      "loss": 3.8975,
      "step": 1880064
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9209372213556016e-05,
      "loss": 3.8903,
      "step": 1880576
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9200986266045496e-05,
      "loss": 3.879,
      "step": 1881088
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9192600318534976e-05,
      "loss": 3.8878,
      "step": 1881600
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9184214371024456e-05,
      "loss": 3.8826,
      "step": 1882112
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9175828423513936e-05,
      "loss": 3.9005,
      "step": 1882624
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9167442476003416e-05,
      "loss": 3.8838,
      "step": 1883136
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9159072907296625e-05,
      "loss": 3.8843,
      "step": 1883648
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.915068695978611e-05,
      "loss": 3.8912,
      "step": 1884160
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.914230101227559e-05,
      "loss": 3.8906,
      "step": 1884672
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.913391506476507e-05,
      "loss": 3.8757,
      "step": 1885184
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9125545496058278e-05,
      "loss": 3.8841,
      "step": 1885696
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.911717592735149e-05,
      "loss": 3.8816,
      "step": 1886208
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.910878997984097e-05,
      "loss": 3.8771,
      "step": 1886720
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.910040403233045e-05,
      "loss": 3.8768,
      "step": 1887232
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.909201808481993e-05,
      "loss": 3.8837,
      "step": 1887744
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.908363213730941e-05,
      "loss": 3.8852,
      "step": 1888256
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.907524618979889e-05,
      "loss": 3.8841,
      "step": 1888768
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9066860242288366e-05,
      "loss": 3.8838,
      "step": 1889280
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9058474294777846e-05,
      "loss": 3.8796,
      "step": 1889792
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9050104726071062e-05,
      "loss": 3.8917,
      "step": 1890304
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9041718778560542e-05,
      "loss": 3.9007,
      "step": 1890816
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.903334920985375e-05,
      "loss": 3.8914,
      "step": 1891328
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.902496326234323e-05,
      "loss": 3.8891,
      "step": 1891840
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.901657731483271e-05,
      "loss": 3.8747,
      "step": 1892352
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.900819136732219e-05,
      "loss": 3.8881,
      "step": 1892864
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.899980541981167e-05,
      "loss": 3.8816,
      "step": 1893376
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8991435851104884e-05,
      "loss": 3.8851,
      "step": 1893888
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8983049903594363e-05,
      "loss": 3.8789,
      "step": 1894400
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.897466395608384e-05,
      "loss": 3.8833,
      "step": 1894912
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.896627800857332e-05,
      "loss": 3.8882,
      "step": 1895424
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.89578920610628e-05,
      "loss": 3.8844,
      "step": 1895936
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8949522492356016e-05,
      "loss": 3.8924,
      "step": 1896448
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8941136544845492e-05,
      "loss": 3.8871,
      "step": 1896960
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8932750597334972e-05,
      "loss": 3.8781,
      "step": 1897472
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8924364649824452e-05,
      "loss": 3.8928,
      "step": 1897984
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8915978702313932e-05,
      "loss": 3.8923,
      "step": 1898496
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8907592754803412e-05,
      "loss": 3.9006,
      "step": 1899008
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8899206807292892e-05,
      "loss": 3.8816,
      "step": 1899520
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8890820859782372e-05,
      "loss": 3.8959,
      "step": 1900032
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8882451291075585e-05,
      "loss": 3.8876,
      "step": 1900544
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8874065343565065e-05,
      "loss": 3.8931,
      "step": 1901056
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8865695774858274e-05,
      "loss": 3.8904,
      "step": 1901568
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8857309827347754e-05,
      "loss": 3.8875,
      "step": 1902080
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8848923879837234e-05,
      "loss": 3.8818,
      "step": 1902592
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8840537932326717e-05,
      "loss": 3.8972,
      "step": 1903104
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8832151984816197e-05,
      "loss": 3.878,
      "step": 1903616
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8823766037305673e-05,
      "loss": 3.8851,
      "step": 1904128
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8815380089795153e-05,
      "loss": 3.8896,
      "step": 1904640
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8807010521088366e-05,
      "loss": 3.8942,
      "step": 1905152
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8798624573577846e-05,
      "loss": 3.8794,
      "step": 1905664
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8790238626067326e-05,
      "loss": 3.8872,
      "step": 1906176
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8781852678556806e-05,
      "loss": 3.8936,
      "step": 1906688
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8773466731046286e-05,
      "loss": 3.8843,
      "step": 1907200
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.8765080783535766e-05,
      "loss": 3.8944,
      "step": 1907712
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.985036611557007,
      "eval_runtime": 294.0646,
      "eval_samples_per_second": 1297.643,
      "eval_steps_per_second": 40.552,
      "step": 1907987
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8756694836025246e-05,
      "loss": 3.8776,
      "step": 1908224
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8748325267318455e-05,
      "loss": 3.8841,
      "step": 1908736
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8739939319807938e-05,
      "loss": 3.8861,
      "step": 1909248
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8731553372297418e-05,
      "loss": 3.8897,
      "step": 1909760
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8723167424786898e-05,
      "loss": 3.8973,
      "step": 1910272
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8714781477276378e-05,
      "loss": 3.8811,
      "step": 1910784
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8706395529765855e-05,
      "loss": 3.8885,
      "step": 1911296
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8698009582255334e-05,
      "loss": 3.8829,
      "step": 1911808
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8689623634744814e-05,
      "loss": 3.879,
      "step": 1912320
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8681254066038027e-05,
      "loss": 3.8831,
      "step": 1912832
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8672868118527507e-05,
      "loss": 3.8921,
      "step": 1913344
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8664482171016987e-05,
      "loss": 3.8914,
      "step": 1913856
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.86561126023102e-05,
      "loss": 3.8855,
      "step": 1914368
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8647726654799676e-05,
      "loss": 3.8732,
      "step": 1914880
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8639340707289156e-05,
      "loss": 3.8879,
      "step": 1915392
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.863095475977864e-05,
      "loss": 3.875,
      "step": 1915904
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.862256881226812e-05,
      "loss": 3.8841,
      "step": 1916416
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.86141828647576e-05,
      "loss": 3.8833,
      "step": 1916928
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.860579691724708e-05,
      "loss": 3.8835,
      "step": 1917440
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.859741096973656e-05,
      "loss": 3.8917,
      "step": 1917952
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8589025022226036e-05,
      "loss": 3.8946,
      "step": 1918464
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8580639074715516e-05,
      "loss": 3.8912,
      "step": 1918976
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8572253127204995e-05,
      "loss": 3.8848,
      "step": 1919488
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.856386717969448e-05,
      "loss": 3.8819,
      "step": 1920000
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8555497610987688e-05,
      "loss": 3.8884,
      "step": 1920512
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8547111663477168e-05,
      "loss": 3.8843,
      "step": 1921024
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8538725715966648e-05,
      "loss": 3.8823,
      "step": 1921536
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.8530356147259857e-05,
      "loss": 3.8811,
      "step": 1922048
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.852197019974934e-05,
      "loss": 3.8822,
      "step": 1922560
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.851358425223882e-05,
      "loss": 3.8865,
      "step": 1923072
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.85051983047283e-05,
      "loss": 3.8769,
      "step": 1923584
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.849682873602151e-05,
      "loss": 3.8928,
      "step": 1924096
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.848844278851099e-05,
      "loss": 3.8908,
      "step": 1924608
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.848005684100047e-05,
      "loss": 3.8895,
      "step": 1925120
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.847167089348995e-05,
      "loss": 3.8811,
      "step": 1925632
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.846330132478316e-05,
      "loss": 3.8912,
      "step": 1926144
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.845491537727264e-05,
      "loss": 3.8877,
      "step": 1926656
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.844652942976212e-05,
      "loss": 3.881,
      "step": 1927168
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.84381434822516e-05,
      "loss": 3.8762,
      "step": 1927680
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.842975753474108e-05,
      "loss": 3.8781,
      "step": 1928192
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.842137158723056e-05,
      "loss": 3.8804,
      "step": 1928704
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.841298563972004e-05,
      "loss": 3.8894,
      "step": 1929216
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.840459969220952e-05,
      "loss": 3.8769,
      "step": 1929728
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8396213744699e-05,
      "loss": 3.8811,
      "step": 1930240
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.838782779718848e-05,
      "loss": 3.8956,
      "step": 1930752
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.837944184967796e-05,
      "loss": 3.8833,
      "step": 1931264
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.837105590216744e-05,
      "loss": 3.8941,
      "step": 1931776
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.836268633346065e-05,
      "loss": 3.8767,
      "step": 1932288
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8354316764753863e-05,
      "loss": 3.86,
      "step": 1932800
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8345930817243343e-05,
      "loss": 3.8995,
      "step": 1933312
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8337544869732823e-05,
      "loss": 3.8811,
      "step": 1933824
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8329158922222303e-05,
      "loss": 3.8842,
      "step": 1934336
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8320772974711783e-05,
      "loss": 3.8792,
      "step": 1934848
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8312387027201262e-05,
      "loss": 3.883,
      "step": 1935360
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8304001079690742e-05,
      "loss": 3.8716,
      "step": 1935872
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8295631510983955e-05,
      "loss": 3.8734,
      "step": 1936384
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8287245563473435e-05,
      "loss": 3.8748,
      "step": 1936896
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8278859615962915e-05,
      "loss": 3.8759,
      "step": 1937408
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8270473668452395e-05,
      "loss": 3.8907,
      "step": 1937920
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.826208772094187e-05,
      "loss": 3.8799,
      "step": 1938432
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8253701773431355e-05,
      "loss": 3.8761,
      "step": 1938944
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8245315825920835e-05,
      "loss": 3.897,
      "step": 1939456
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8236946257214044e-05,
      "loss": 3.8626,
      "step": 1939968
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8228560309703524e-05,
      "loss": 3.8725,
      "step": 1940480
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8220174362193004e-05,
      "loss": 3.8826,
      "step": 1940992
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8211788414682484e-05,
      "loss": 3.8825,
      "step": 1941504
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8203402467171964e-05,
      "loss": 3.8786,
      "step": 1942016
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8195016519661447e-05,
      "loss": 3.8822,
      "step": 1942528
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8186630572150927e-05,
      "loss": 3.8604,
      "step": 1943040
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8178244624640403e-05,
      "loss": 3.8742,
      "step": 1943552
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8169875055933616e-05,
      "loss": 3.8952,
      "step": 1944064
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8161489108423096e-05,
      "loss": 3.8742,
      "step": 1944576
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.815311953971631e-05,
      "loss": 3.8927,
      "step": 1945088
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.814473359220579e-05,
      "loss": 3.8886,
      "step": 1945600
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8136347644695268e-05,
      "loss": 3.8961,
      "step": 1946112
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8127961697184748e-05,
      "loss": 3.8717,
      "step": 1946624
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8119575749674225e-05,
      "loss": 3.8862,
      "step": 1947136
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8111206180967437e-05,
      "loss": 3.8857,
      "step": 1947648
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.810283661226065e-05,
      "loss": 3.8711,
      "step": 1948160
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.809445066475013e-05,
      "loss": 3.8923,
      "step": 1948672
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.808606471723961e-05,
      "loss": 3.884,
      "step": 1949184
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.807767876972909e-05,
      "loss": 3.8876,
      "step": 1949696
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.806929282221857e-05,
      "loss": 3.8918,
      "step": 1950208
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8060906874708046e-05,
      "loss": 3.8683,
      "step": 1950720
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8052520927197526e-05,
      "loss": 3.8715,
      "step": 1951232
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.804413497968701e-05,
      "loss": 3.8748,
      "step": 1951744
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.803574903217649e-05,
      "loss": 3.8831,
      "step": 1952256
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.802736308466597e-05,
      "loss": 3.8828,
      "step": 1952768
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.801899351595918e-05,
      "loss": 3.8762,
      "step": 1953280
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.801060756844866e-05,
      "loss": 3.8768,
      "step": 1953792
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.800222162093814e-05,
      "loss": 3.8851,
      "step": 1954304
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.799383567342762e-05,
      "loss": 3.8736,
      "step": 1954816
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.798546610472083e-05,
      "loss": 3.876,
      "step": 1955328
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.797708015721031e-05,
      "loss": 3.8988,
      "step": 1955840
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.796869420969979e-05,
      "loss": 3.8905,
      "step": 1956352
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.796030826218927e-05,
      "loss": 3.8872,
      "step": 1956864
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.795192231467875e-05,
      "loss": 3.8726,
      "step": 1957376
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7943536367168227e-05,
      "loss": 3.8821,
      "step": 1957888
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.793515041965771e-05,
      "loss": 3.8776,
      "step": 1958400
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.792676447214719e-05,
      "loss": 3.8952,
      "step": 1958912
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7918427661047865e-05,
      "loss": 3.8819,
      "step": 1959424
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7910041713537345e-05,
      "loss": 3.8766,
      "step": 1959936
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7901655766026825e-05,
      "loss": 3.883,
      "step": 1960448
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7893269818516305e-05,
      "loss": 3.8897,
      "step": 1960960
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7884883871005784e-05,
      "loss": 3.8697,
      "step": 1961472
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7876497923495264e-05,
      "loss": 3.8831,
      "step": 1961984
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7868111975984744e-05,
      "loss": 3.8703,
      "step": 1962496
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7859726028474224e-05,
      "loss": 3.8721,
      "step": 1963008
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.78513400809637e-05,
      "loss": 3.8735,
      "step": 1963520
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.784295413345318e-05,
      "loss": 3.8778,
      "step": 1964032
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7834568185942664e-05,
      "loss": 3.8794,
      "step": 1964544
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7826182238432144e-05,
      "loss": 3.8846,
      "step": 1965056
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7817812669725353e-05,
      "loss": 3.8739,
      "step": 1965568
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7809443101018566e-05,
      "loss": 3.8755,
      "step": 1966080
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7801057153508046e-05,
      "loss": 3.8872,
      "step": 1966592
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7792671205997526e-05,
      "loss": 3.8956,
      "step": 1967104
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7784285258487006e-05,
      "loss": 3.8842,
      "step": 1967616
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7775899310976486e-05,
      "loss": 3.8873,
      "step": 1968128
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7767529742269698e-05,
      "loss": 3.8721,
      "step": 1968640
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7759143794759175e-05,
      "loss": 3.8809,
      "step": 1969152
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7750757847248655e-05,
      "loss": 3.8803,
      "step": 1969664
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7742371899738135e-05,
      "loss": 3.8739,
      "step": 1970176
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7733985952227618e-05,
      "loss": 3.8773,
      "step": 1970688
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7725600004717098e-05,
      "loss": 3.8795,
      "step": 1971200
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7717214057206578e-05,
      "loss": 3.8824,
      "step": 1971712
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7708828109696058e-05,
      "loss": 3.8787,
      "step": 1972224
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7700458540989267e-05,
      "loss": 3.8845,
      "step": 1972736
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7692072593478747e-05,
      "loss": 3.8823,
      "step": 1973248
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7683686645968227e-05,
      "loss": 3.8743,
      "step": 1973760
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.767530069845771e-05,
      "loss": 3.8836,
      "step": 1974272
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.766693112975092e-05,
      "loss": 3.8912,
      "step": 1974784
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.765856156104413e-05,
      "loss": 3.889,
      "step": 1975296
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7650175613533608e-05,
      "loss": 3.8781,
      "step": 1975808
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7641789666023088e-05,
      "loss": 3.8928,
      "step": 1976320
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.763340371851257e-05,
      "loss": 3.8865,
      "step": 1976832
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.762501777100205e-05,
      "loss": 3.8881,
      "step": 1977344
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.761663182349153e-05,
      "loss": 3.8841,
      "step": 1977856
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7608245875981008e-05,
      "loss": 3.88,
      "step": 1978368
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7599859928470488e-05,
      "loss": 3.8792,
      "step": 1978880
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7591473980959968e-05,
      "loss": 3.8899,
      "step": 1979392
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.758310441225318e-05,
      "loss": 3.8732,
      "step": 1979904
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.757471846474266e-05,
      "loss": 3.8814,
      "step": 1980416
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.756633251723214e-05,
      "loss": 3.8857,
      "step": 1980928
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.755794656972162e-05,
      "loss": 3.8871,
      "step": 1981440
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.754957700101483e-05,
      "loss": 3.8765,
      "step": 1981952
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7541207432308042e-05,
      "loss": 3.8796,
      "step": 1982464
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7532821484797525e-05,
      "loss": 3.8872,
      "step": 1982976
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7524435537287005e-05,
      "loss": 3.8822,
      "step": 1983488
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7516049589776482e-05,
      "loss": 3.8882,
      "step": 1984000
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.9835500717163086,
      "eval_runtime": 295.0853,
      "eval_samples_per_second": 1293.155,
      "eval_steps_per_second": 40.412,
      "step": 1984307
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7507663642265962e-05,
      "loss": 3.863,
      "step": 1984512
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.749927769475544e-05,
      "loss": 3.8764,
      "step": 1985024
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.749089174724492e-05,
      "loss": 3.8845,
      "step": 1985536
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.74825057997344e-05,
      "loss": 3.882,
      "step": 1986048
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.747411985222388e-05,
      "loss": 3.8938,
      "step": 1986560
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.746573390471336e-05,
      "loss": 3.8758,
      "step": 1987072
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.745734795720284e-05,
      "loss": 3.8844,
      "step": 1987584
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.744896200969232e-05,
      "loss": 3.8791,
      "step": 1988096
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.74405760621818e-05,
      "loss": 3.8737,
      "step": 1988608
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.743219011467128e-05,
      "loss": 3.8788,
      "step": 1989120
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.742380416716076e-05,
      "loss": 3.8867,
      "step": 1989632
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.741541821965024e-05,
      "loss": 3.8878,
      "step": 1990144
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7407048650943454e-05,
      "loss": 3.8864,
      "step": 1990656
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7398662703432934e-05,
      "loss": 3.8618,
      "step": 1991168
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7390276755922414e-05,
      "loss": 3.8831,
      "step": 1991680
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7381890808411894e-05,
      "loss": 3.8677,
      "step": 1992192
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.737350486090137e-05,
      "loss": 3.8788,
      "step": 1992704
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.736511891339085e-05,
      "loss": 3.8759,
      "step": 1993216
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7356732965880333e-05,
      "loss": 3.8789,
      "step": 1993728
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7348363397173543e-05,
      "loss": 3.8846,
      "step": 1994240
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7339977449663022e-05,
      "loss": 3.8939,
      "step": 1994752
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7331591502152502e-05,
      "loss": 3.8854,
      "step": 1995264
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7323205554641982e-05,
      "loss": 3.8799,
      "step": 1995776
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7314819607131462e-05,
      "loss": 3.8752,
      "step": 1996288
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7306433659620942e-05,
      "loss": 3.8849,
      "step": 1996800
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7298047712110426e-05,
      "loss": 3.8828,
      "step": 1997312
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7289661764599902e-05,
      "loss": 3.8766,
      "step": 1997824
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7281275817089382e-05,
      "loss": 3.8739,
      "step": 1998336
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7272889869578862e-05,
      "loss": 3.8784,
      "step": 1998848
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.7264503922068342e-05,
      "loss": 3.8787,
      "step": 1999360
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.725613435336155e-05,
      "loss": 3.8693,
      "step": 1999872
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7247748405851035e-05,
      "loss": 3.891,
      "step": 2000384
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7239378837144247e-05,
      "loss": 3.8879,
      "step": 2000896
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7230992889633724e-05,
      "loss": 3.8823,
      "step": 2001408
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7222606942123204e-05,
      "loss": 3.8796,
      "step": 2001920
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7214237373416416e-05,
      "loss": 3.8788,
      "step": 2002432
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7205851425905896e-05,
      "loss": 3.8859,
      "step": 2002944
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7197465478395376e-05,
      "loss": 3.8768,
      "step": 2003456
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7189079530884856e-05,
      "loss": 3.873,
      "step": 2003968
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7180693583374336e-05,
      "loss": 3.8735,
      "step": 2004480
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7172307635863816e-05,
      "loss": 3.8781,
      "step": 2004992
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7163921688353296e-05,
      "loss": 3.8828,
      "step": 2005504
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7155535740842776e-05,
      "loss": 3.8723,
      "step": 2006016
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7147149793332256e-05,
      "loss": 3.8779,
      "step": 2006528
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7138763845821736e-05,
      "loss": 3.8878,
      "step": 2007040
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7130377898311216e-05,
      "loss": 3.8783,
      "step": 2007552
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7121991950800696e-05,
      "loss": 3.885,
      "step": 2008064
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7113606003290175e-05,
      "loss": 3.8784,
      "step": 2008576
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7105220055779655e-05,
      "loss": 3.8575,
      "step": 2009088
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7096834108269135e-05,
      "loss": 3.8901,
      "step": 2009600
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7088464539562344e-05,
      "loss": 3.8778,
      "step": 2010112
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7080078592051828e-05,
      "loss": 3.8785,
      "step": 2010624
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7071692644541308e-05,
      "loss": 3.8777,
      "step": 2011136
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7063306697030788e-05,
      "loss": 3.8759,
      "step": 2011648
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7054920749520264e-05,
      "loss": 3.8632,
      "step": 2012160
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7046534802009744e-05,
      "loss": 3.8709,
      "step": 2012672
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7038148854499224e-05,
      "loss": 3.8698,
      "step": 2013184
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7029762906988704e-05,
      "loss": 3.8715,
      "step": 2013696
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.7021393338281917e-05,
      "loss": 3.8866,
      "step": 2014208
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.701302376957513e-05,
      "loss": 3.8757,
      "step": 2014720
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.700463782206461e-05,
      "loss": 3.8725,
      "step": 2015232
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6996251874554086e-05,
      "loss": 3.8906,
      "step": 2015744
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6987865927043566e-05,
      "loss": 3.8604,
      "step": 2016256
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6979479979533046e-05,
      "loss": 3.8663,
      "step": 2016768
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.697109403202253e-05,
      "loss": 3.8807,
      "step": 2017280
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.696270808451201e-05,
      "loss": 3.8738,
      "step": 2017792
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.695432213700149e-05,
      "loss": 3.873,
      "step": 2018304
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6945952568294698e-05,
      "loss": 3.8784,
      "step": 2018816
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6937566620784178e-05,
      "loss": 3.8609,
      "step": 2019328
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6929180673273658e-05,
      "loss": 3.8704,
      "step": 2019840
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6920794725763138e-05,
      "loss": 3.8852,
      "step": 2020352
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.691242515705635e-05,
      "loss": 3.8718,
      "step": 2020864
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.690405558834956e-05,
      "loss": 3.8849,
      "step": 2021376
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.689566964083904e-05,
      "loss": 3.8783,
      "step": 2021888
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.688728369332852e-05,
      "loss": 3.8958,
      "step": 2022400
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6878897745818e-05,
      "loss": 3.8625,
      "step": 2022912
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6870511798307483e-05,
      "loss": 3.8848,
      "step": 2023424
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6862142229600692e-05,
      "loss": 3.8805,
      "step": 2023936
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.685375628209017e-05,
      "loss": 3.8643,
      "step": 2024448
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.684537033457965e-05,
      "loss": 3.8885,
      "step": 2024960
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.683698438706913e-05,
      "loss": 3.8798,
      "step": 2025472
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.682859843955861e-05,
      "loss": 3.8806,
      "step": 2025984
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6820228870851824e-05,
      "loss": 3.8866,
      "step": 2026496
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6811842923341304e-05,
      "loss": 3.8651,
      "step": 2027008
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6803456975830784e-05,
      "loss": 3.8655,
      "step": 2027520
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6795071028320264e-05,
      "loss": 3.8674,
      "step": 2028032
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.678668508080974e-05,
      "loss": 3.8799,
      "step": 2028544
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.677829913329922e-05,
      "loss": 3.8742,
      "step": 2029056
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6769913185788704e-05,
      "loss": 3.8719,
      "step": 2029568
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6761527238278184e-05,
      "loss": 3.8727,
      "step": 2030080
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6753141290767664e-05,
      "loss": 3.881,
      "step": 2030592
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6744771722060873e-05,
      "loss": 3.8734,
      "step": 2031104
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6736385774550353e-05,
      "loss": 3.87,
      "step": 2031616
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6727999827039833e-05,
      "loss": 3.8914,
      "step": 2032128
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6719613879529313e-05,
      "loss": 3.8842,
      "step": 2032640
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6711244310822525e-05,
      "loss": 3.8834,
      "step": 2033152
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6702874742115738e-05,
      "loss": 3.8659,
      "step": 2033664
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6694488794605214e-05,
      "loss": 3.8772,
      "step": 2034176
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6686102847094694e-05,
      "loss": 3.871,
      "step": 2034688
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6677716899584174e-05,
      "loss": 3.8889,
      "step": 2035200
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6669330952073654e-05,
      "loss": 3.8772,
      "step": 2035712
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6660945004563137e-05,
      "loss": 3.8735,
      "step": 2036224
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6652575435856346e-05,
      "loss": 3.8796,
      "step": 2036736
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6644189488345826e-05,
      "loss": 3.8819,
      "step": 2037248
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6635803540835306e-05,
      "loss": 3.8717,
      "step": 2037760
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6627417593324786e-05,
      "loss": 3.8772,
      "step": 2038272
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6619031645814266e-05,
      "loss": 3.8652,
      "step": 2038784
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6610645698303746e-05,
      "loss": 3.8718,
      "step": 2039296
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.660227612959696e-05,
      "loss": 3.8679,
      "step": 2039808
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.659389018208644e-05,
      "loss": 3.8681,
      "step": 2040320
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.658550423457592e-05,
      "loss": 3.8748,
      "step": 2040832
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6577118287065395e-05,
      "loss": 3.8756,
      "step": 2041344
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6568732339554875e-05,
      "loss": 3.8727,
      "step": 2041856
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.656034639204436e-05,
      "loss": 3.8688,
      "step": 2042368
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.655197682333757e-05,
      "loss": 3.8835,
      "step": 2042880
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6543590875827048e-05,
      "loss": 3.8886,
      "step": 2043392
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.653522130712026e-05,
      "loss": 3.878,
      "step": 2043904
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.652683535960974e-05,
      "loss": 3.8818,
      "step": 2044416
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.651844941209922e-05,
      "loss": 3.868,
      "step": 2044928
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.65100634645887e-05,
      "loss": 3.8784,
      "step": 2045440
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.650167751707818e-05,
      "loss": 3.8761,
      "step": 2045952
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.649329156956766e-05,
      "loss": 3.8657,
      "step": 2046464
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.648490562205714e-05,
      "loss": 3.8759,
      "step": 2046976
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.647651967454662e-05,
      "loss": 3.876,
      "step": 2047488
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.64681337270361e-05,
      "loss": 3.8804,
      "step": 2048000
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6459747779525576e-05,
      "loss": 3.8754,
      "step": 2048512
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.645136183201506e-05,
      "loss": 3.8778,
      "step": 2049024
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.644297588450454e-05,
      "loss": 3.8769,
      "step": 2049536
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6434606315797752e-05,
      "loss": 3.8688,
      "step": 2050048
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.642622036828723e-05,
      "loss": 3.8802,
      "step": 2050560
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.641785079958044e-05,
      "loss": 3.8839,
      "step": 2051072
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.640946485206992e-05,
      "loss": 3.8834,
      "step": 2051584
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.64010789045594e-05,
      "loss": 3.8728,
      "step": 2052096
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.639269295704888e-05,
      "loss": 3.8904,
      "step": 2052608
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.638430700953836e-05,
      "loss": 3.8798,
      "step": 2053120
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.637592106202784e-05,
      "loss": 3.8852,
      "step": 2053632
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.636753511451732e-05,
      "loss": 3.884,
      "step": 2054144
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.63591491670068e-05,
      "loss": 3.8708,
      "step": 2054656
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6350779598300013e-05,
      "loss": 3.8747,
      "step": 2055168
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6342393650789493e-05,
      "loss": 3.8897,
      "step": 2055680
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6334024082082702e-05,
      "loss": 3.8649,
      "step": 2056192
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6325638134572182e-05,
      "loss": 3.8764,
      "step": 2056704
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6317252187061662e-05,
      "loss": 3.8814,
      "step": 2057216
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6308866239551142e-05,
      "loss": 3.8802,
      "step": 2057728
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6300480292040622e-05,
      "loss": 3.8711,
      "step": 2058240
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6292094344530105e-05,
      "loss": 3.8772,
      "step": 2058752
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6283708397019582e-05,
      "loss": 3.881,
      "step": 2059264
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6275322449509062e-05,
      "loss": 3.8826,
      "step": 2059776
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6266952880802274e-05,
      "loss": 3.883,
      "step": 2060288
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.9817380905151367,
      "eval_runtime": 298.9818,
      "eval_samples_per_second": 1276.302,
      "eval_steps_per_second": 39.885,
      "step": 2060627
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6258583312095484e-05,
      "loss": 3.8664,
      "step": 2060800
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6250197364584967e-05,
      "loss": 3.8734,
      "step": 2061312
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6241811417074447e-05,
      "loss": 3.8747,
      "step": 2061824
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6233425469563927e-05,
      "loss": 3.8773,
      "step": 2062336
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6225039522053403e-05,
      "loss": 3.8909,
      "step": 2062848
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6216653574542883e-05,
      "loss": 3.8758,
      "step": 2063360
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6208267627032363e-05,
      "loss": 3.8759,
      "step": 2063872
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6199881679521843e-05,
      "loss": 3.8767,
      "step": 2064384
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6191512110815056e-05,
      "loss": 3.8704,
      "step": 2064896
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6183126163304536e-05,
      "loss": 3.8702,
      "step": 2065408
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6174740215794016e-05,
      "loss": 3.88,
      "step": 2065920
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6166354268283496e-05,
      "loss": 3.8878,
      "step": 2066432
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6157984699576705e-05,
      "loss": 3.8785,
      "step": 2066944
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6149598752066185e-05,
      "loss": 3.8602,
      "step": 2067456
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6141212804555668e-05,
      "loss": 3.8786,
      "step": 2067968
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6132826857045148e-05,
      "loss": 3.8629,
      "step": 2068480
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6124440909534628e-05,
      "loss": 3.8747,
      "step": 2068992
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6116054962024108e-05,
      "loss": 3.8721,
      "step": 2069504
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6107669014513584e-05,
      "loss": 3.8755,
      "step": 2070016
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6099283067003064e-05,
      "loss": 3.8777,
      "step": 2070528
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6090913498296277e-05,
      "loss": 3.8867,
      "step": 2071040
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.608252755078576e-05,
      "loss": 3.8808,
      "step": 2071552
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6074141603275237e-05,
      "loss": 3.8786,
      "step": 2072064
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6065755655764717e-05,
      "loss": 3.8689,
      "step": 2072576
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.605738608705793e-05,
      "loss": 3.8809,
      "step": 2073088
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.604900013954741e-05,
      "loss": 3.8769,
      "step": 2073600
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.6040614192036886e-05,
      "loss": 3.8701,
      "step": 2074112
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.603222824452637e-05,
      "loss": 3.8744,
      "step": 2074624
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.602384229701585e-05,
      "loss": 3.871,
      "step": 2075136
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.601545634950533e-05,
      "loss": 3.8761,
      "step": 2075648
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.600707040199481e-05,
      "loss": 3.8699,
      "step": 2076160
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.599868445448429e-05,
      "loss": 3.8782,
      "step": 2076672
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5990298506973765e-05,
      "loss": 3.8847,
      "step": 2077184
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5981928938266978e-05,
      "loss": 3.8774,
      "step": 2077696
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.597354299075646e-05,
      "loss": 3.877,
      "step": 2078208
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.596517342204967e-05,
      "loss": 3.8745,
      "step": 2078720
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.595678747453915e-05,
      "loss": 3.8837,
      "step": 2079232
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.594840152702863e-05,
      "loss": 3.8695,
      "step": 2079744
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.594001557951811e-05,
      "loss": 3.866,
      "step": 2080256
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5931646010811323e-05,
      "loss": 3.8734,
      "step": 2080768
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5923260063300803e-05,
      "loss": 3.8683,
      "step": 2081280
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5914874115790283e-05,
      "loss": 3.8758,
      "step": 2081792
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5906488168279763e-05,
      "loss": 3.8692,
      "step": 2082304
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.589810222076924e-05,
      "loss": 3.874,
      "step": 2082816
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5889732652062452e-05,
      "loss": 3.8795,
      "step": 2083328
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.588134670455193e-05,
      "loss": 3.8787,
      "step": 2083840
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5872960757041415e-05,
      "loss": 3.8814,
      "step": 2084352
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.586457480953089e-05,
      "loss": 3.8707,
      "step": 2084864
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.585618886202037e-05,
      "loss": 3.8499,
      "step": 2085376
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.584780291450985e-05,
      "loss": 3.8836,
      "step": 2085888
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.583941696699933e-05,
      "loss": 3.8715,
      "step": 2086400
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.583103101948881e-05,
      "loss": 3.8759,
      "step": 2086912
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5822677829585756e-05,
      "loss": 3.8735,
      "step": 2087424
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5814291882075236e-05,
      "loss": 3.872,
      "step": 2087936
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5805905934564713e-05,
      "loss": 3.8599,
      "step": 2088448
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5797519987054193e-05,
      "loss": 3.862,
      "step": 2088960
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5789134039543673e-05,
      "loss": 3.8683,
      "step": 2089472
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5780748092033153e-05,
      "loss": 3.8635,
      "step": 2089984
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5772362144522633e-05,
      "loss": 3.886,
      "step": 2090496
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5763976197012116e-05,
      "loss": 3.8724,
      "step": 2091008
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5755590249501596e-05,
      "loss": 3.8628,
      "step": 2091520
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5747220680794805e-05,
      "loss": 3.8852,
      "step": 2092032
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5738834733284285e-05,
      "loss": 3.8612,
      "step": 2092544
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5730448785773765e-05,
      "loss": 3.8616,
      "step": 2093056
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5722079217066978e-05,
      "loss": 3.8776,
      "step": 2093568
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5713693269556457e-05,
      "loss": 3.8678,
      "step": 2094080
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5705307322045937e-05,
      "loss": 3.8665,
      "step": 2094592
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5696921374535417e-05,
      "loss": 3.8734,
      "step": 2095104
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5688535427024894e-05,
      "loss": 3.8552,
      "step": 2095616
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5680149479514374e-05,
      "loss": 3.8665,
      "step": 2096128
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5671763532003854e-05,
      "loss": 3.88,
      "step": 2096640
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5663377584493337e-05,
      "loss": 3.8707,
      "step": 2097152
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5654991636982817e-05,
      "loss": 3.882,
      "step": 2097664
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5646622068276026e-05,
      "loss": 3.8732,
      "step": 2098176
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5638236120765506e-05,
      "loss": 3.8887,
      "step": 2098688
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5629850173254986e-05,
      "loss": 3.8591,
      "step": 2099200
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5621464225744466e-05,
      "loss": 3.8805,
      "step": 2099712
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.561309465703768e-05,
      "loss": 3.877,
      "step": 2100224
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.560470870952716e-05,
      "loss": 3.8601,
      "step": 2100736
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.559632276201664e-05,
      "loss": 3.8793,
      "step": 2101248
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.558793681450612e-05,
      "loss": 3.8773,
      "step": 2101760
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.55795508669956e-05,
      "loss": 3.8795,
      "step": 2102272
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.557119767709254e-05,
      "loss": 3.8821,
      "step": 2102784
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.556281172958202e-05,
      "loss": 3.8623,
      "step": 2103296
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.55544257820715e-05,
      "loss": 3.855,
      "step": 2103808
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.554603983456098e-05,
      "loss": 3.8656,
      "step": 2104320
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.553765388705046e-05,
      "loss": 3.8743,
      "step": 2104832
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.552926793953994e-05,
      "loss": 3.8708,
      "step": 2105344
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.552088199202942e-05,
      "loss": 3.8646,
      "step": 2105856
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.55124960445189e-05,
      "loss": 3.8665,
      "step": 2106368
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5504126475812112e-05,
      "loss": 3.8736,
      "step": 2106880
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5495740528301592e-05,
      "loss": 3.8706,
      "step": 2107392
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5487354580791072e-05,
      "loss": 3.8709,
      "step": 2107904
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.547896863328055e-05,
      "loss": 3.8815,
      "step": 2108416
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.547058268577003e-05,
      "loss": 3.885,
      "step": 2108928
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.546219673825951e-05,
      "loss": 3.8744,
      "step": 2109440
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5453810790748992e-05,
      "loss": 3.867,
      "step": 2109952
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5445424843238472e-05,
      "loss": 3.8681,
      "step": 2110464
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.543705527453168e-05,
      "loss": 3.8663,
      "step": 2110976
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.542866932702116e-05,
      "loss": 3.8799,
      "step": 2111488
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.542028337951064e-05,
      "loss": 3.8746,
      "step": 2112000
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.541189743200012e-05,
      "loss": 3.8695,
      "step": 2112512
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.54035114844896e-05,
      "loss": 3.8723,
      "step": 2113024
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5395158294586546e-05,
      "loss": 3.8751,
      "step": 2113536
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5386788725879755e-05,
      "loss": 3.8717,
      "step": 2114048
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5378402778369235e-05,
      "loss": 3.869,
      "step": 2114560
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5370033209662447e-05,
      "loss": 3.8636,
      "step": 2115072
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5361647262151927e-05,
      "loss": 3.8617,
      "step": 2115584
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5353261314641407e-05,
      "loss": 3.8688,
      "step": 2116096
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5344875367130887e-05,
      "loss": 3.8623,
      "step": 2116608
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5336489419620367e-05,
      "loss": 3.8698,
      "step": 2117120
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5328103472109844e-05,
      "loss": 3.868,
      "step": 2117632
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5319717524599324e-05,
      "loss": 3.8673,
      "step": 2118144
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5311331577088807e-05,
      "loss": 3.8637,
      "step": 2118656
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5302945629578287e-05,
      "loss": 3.883,
      "step": 2119168
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5294559682067767e-05,
      "loss": 3.8869,
      "step": 2119680
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5286173734557247e-05,
      "loss": 3.8723,
      "step": 2120192
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5277787787046727e-05,
      "loss": 3.8784,
      "step": 2120704
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5269401839536203e-05,
      "loss": 3.8674,
      "step": 2121216
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5261015892025683e-05,
      "loss": 3.8723,
      "step": 2121728
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5252629944515165e-05,
      "loss": 3.8696,
      "step": 2122240
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5244260375808378e-05,
      "loss": 3.8618,
      "step": 2122752
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5235874428297856e-05,
      "loss": 3.868,
      "step": 2123264
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5227488480787336e-05,
      "loss": 3.871,
      "step": 2123776
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5219102533276816e-05,
      "loss": 3.8787,
      "step": 2124288
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5210716585766296e-05,
      "loss": 3.872,
      "step": 2124800
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5202347017059506e-05,
      "loss": 3.874,
      "step": 2125312
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5193961069548986e-05,
      "loss": 3.8715,
      "step": 2125824
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5185575122038468e-05,
      "loss": 3.8643,
      "step": 2126336
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5177189174527948e-05,
      "loss": 3.8745,
      "step": 2126848
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5168803227017428e-05,
      "loss": 3.8799,
      "step": 2127360
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5160417279506908e-05,
      "loss": 3.8816,
      "step": 2127872
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5152031331996386e-05,
      "loss": 3.8655,
      "step": 2128384
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5143645384485866e-05,
      "loss": 3.8882,
      "step": 2128896
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5135259436975346e-05,
      "loss": 3.8699,
      "step": 2129408
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5126889868268557e-05,
      "loss": 3.8829,
      "step": 2129920
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5118503920758037e-05,
      "loss": 3.8741,
      "step": 2130432
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5110117973247517e-05,
      "loss": 3.8694,
      "step": 2130944
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5101732025736997e-05,
      "loss": 3.8679,
      "step": 2131456
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5093346078226478e-05,
      "loss": 3.8871,
      "step": 2131968
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5084976509519688e-05,
      "loss": 3.861,
      "step": 2132480
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.507659056200917e-05,
      "loss": 3.8736,
      "step": 2132992
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5068204614498649e-05,
      "loss": 3.8742,
      "step": 2133504
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5059818666988129e-05,
      "loss": 3.8751,
      "step": 2134016
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.505144909828134e-05,
      "loss": 3.8687,
      "step": 2134528
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5043079529574552e-05,
      "loss": 3.8719,
      "step": 2135040
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.503469358206403e-05,
      "loss": 3.8737,
      "step": 2135552
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.502630763455351e-05,
      "loss": 3.8848,
      "step": 2136064
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.501792168704299e-05,
      "loss": 3.8755,
      "step": 2136576
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.980809211730957,
      "eval_runtime": 303.474,
      "eval_samples_per_second": 1257.409,
      "eval_steps_per_second": 39.295,
      "step": 2136947
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.500953573953247e-05,
      "loss": 3.8547,
      "step": 2137088
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.500114979202195e-05,
      "loss": 3.8675,
      "step": 2137600
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4992763844511432e-05,
      "loss": 3.8701,
      "step": 2138112
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4984377897000912e-05,
      "loss": 3.8743,
      "step": 2138624
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4975991949490389e-05,
      "loss": 3.8824,
      "step": 2139136
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.496760600197987e-05,
      "loss": 3.8678,
      "step": 2139648
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.495922005446935e-05,
      "loss": 3.8769,
      "step": 2140160
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.495083410695883e-05,
      "loss": 3.8718,
      "step": 2140672
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.494244815944831e-05,
      "loss": 3.8647,
      "step": 2141184
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4934062211937792e-05,
      "loss": 3.8608,
      "step": 2141696
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4925676264427272e-05,
      "loss": 3.8803,
      "step": 2142208
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4917290316916748e-05,
      "loss": 3.8836,
      "step": 2142720
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4908920748209963e-05,
      "loss": 3.8744,
      "step": 2143232
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4900534800699442e-05,
      "loss": 3.8553,
      "step": 2143744
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4892148853188919e-05,
      "loss": 3.8738,
      "step": 2144256
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.48837629056784e-05,
      "loss": 3.8584,
      "step": 2144768
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.487537695816788e-05,
      "loss": 3.8761,
      "step": 2145280
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.486699101065736e-05,
      "loss": 3.8696,
      "step": 2145792
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.485860506314684e-05,
      "loss": 3.8682,
      "step": 2146304
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.485021911563632e-05,
      "loss": 3.873,
      "step": 2146816
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4841833168125802e-05,
      "loss": 3.8811,
      "step": 2147328
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4833447220615279e-05,
      "loss": 3.8736,
      "step": 2147840
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4825061273104759e-05,
      "loss": 3.8776,
      "step": 2148352
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.481667532559424e-05,
      "loss": 3.8692,
      "step": 2148864
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4808305756887453e-05,
      "loss": 3.874,
      "step": 2149376
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4799919809376931e-05,
      "loss": 3.875,
      "step": 2149888
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4791533861866411e-05,
      "loss": 3.8608,
      "step": 2150400
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4783147914355891e-05,
      "loss": 3.869,
      "step": 2150912
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4774761966845371e-05,
      "loss": 3.8696,
      "step": 2151424
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.4766376019334851e-05,
      "loss": 3.8694,
      "step": 2151936
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4757990071824333e-05,
      "loss": 3.867,
      "step": 2152448
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4749604124313809e-05,
      "loss": 3.875,
      "step": 2152960
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4741234555607023e-05,
      "loss": 3.8743,
      "step": 2153472
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4732848608096503e-05,
      "loss": 3.8788,
      "step": 2153984
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4724462660585983e-05,
      "loss": 3.8705,
      "step": 2154496
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4716093091879194e-05,
      "loss": 3.8707,
      "step": 2155008
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4707707144368674e-05,
      "loss": 3.8818,
      "step": 2155520
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4699321196858154e-05,
      "loss": 3.8649,
      "step": 2156032
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4690935249347634e-05,
      "loss": 3.8632,
      "step": 2156544
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4682549301837112e-05,
      "loss": 3.8672,
      "step": 2157056
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4674163354326592e-05,
      "loss": 3.8641,
      "step": 2157568
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4665777406816072e-05,
      "loss": 3.87,
      "step": 2158080
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4657391459305552e-05,
      "loss": 3.8626,
      "step": 2158592
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4649021890598763e-05,
      "loss": 3.8707,
      "step": 2159104
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4640635943088243e-05,
      "loss": 3.8736,
      "step": 2159616
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4632249995577724e-05,
      "loss": 3.8756,
      "step": 2160128
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4623864048067204e-05,
      "loss": 3.875,
      "step": 2160640
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4615494479360413e-05,
      "loss": 3.8728,
      "step": 2161152
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4607108531849895e-05,
      "loss": 3.8479,
      "step": 2161664
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4598722584339375e-05,
      "loss": 3.8803,
      "step": 2162176
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4590336636828855e-05,
      "loss": 3.8648,
      "step": 2162688
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4581967068122066e-05,
      "loss": 3.8703,
      "step": 2163200
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4573581120611546e-05,
      "loss": 3.8669,
      "step": 2163712
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4565211551904757e-05,
      "loss": 3.8666,
      "step": 2164224
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4556825604394236e-05,
      "loss": 3.8569,
      "step": 2164736
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4548439656883716e-05,
      "loss": 3.8614,
      "step": 2165248
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4540053709373196e-05,
      "loss": 3.8642,
      "step": 2165760
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4531667761862678e-05,
      "loss": 3.8547,
      "step": 2166272
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4523298193155887e-05,
      "loss": 3.8799,
      "step": 2166784
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4514912245645367e-05,
      "loss": 3.8703,
      "step": 2167296
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4506526298134849e-05,
      "loss": 3.8581,
      "step": 2167808
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4498140350624329e-05,
      "loss": 3.8799,
      "step": 2168320
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4489754403113809e-05,
      "loss": 3.857,
      "step": 2168832
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4481368455603287e-05,
      "loss": 3.8578,
      "step": 2169344
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4472982508092767e-05,
      "loss": 3.8693,
      "step": 2169856
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4464596560582247e-05,
      "loss": 3.8647,
      "step": 2170368
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.445622699187546e-05,
      "loss": 3.8601,
      "step": 2170880
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4447841044364938e-05,
      "loss": 3.867,
      "step": 2171392
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4439455096854418e-05,
      "loss": 3.8566,
      "step": 2171904
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4431069149343897e-05,
      "loss": 3.8567,
      "step": 2172416
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4422683201833379e-05,
      "loss": 3.8768,
      "step": 2172928
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4414297254322859e-05,
      "loss": 3.868,
      "step": 2173440
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4405911306812339e-05,
      "loss": 3.8734,
      "step": 2173952
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4397525359301819e-05,
      "loss": 3.8718,
      "step": 2174464
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.438915579059503e-05,
      "loss": 3.8849,
      "step": 2174976
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.438078622188824e-05,
      "loss": 3.8566,
      "step": 2175488
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.437240027437772e-05,
      "loss": 3.8766,
      "step": 2176000
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.43640143268672e-05,
      "loss": 3.8677,
      "step": 2176512
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.435562837935668e-05,
      "loss": 3.8609,
      "step": 2177024
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.434724243184616e-05,
      "loss": 3.8705,
      "step": 2177536
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4338856484335642e-05,
      "loss": 3.8775,
      "step": 2178048
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4330470536825119e-05,
      "loss": 3.8752,
      "step": 2178560
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4322084589314599e-05,
      "loss": 3.882,
      "step": 2179072
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.431369864180408e-05,
      "loss": 3.8596,
      "step": 2179584
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4305329073097293e-05,
      "loss": 3.8499,
      "step": 2180096
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4296943125586771e-05,
      "loss": 3.8603,
      "step": 2180608
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4288557178076251e-05,
      "loss": 3.8729,
      "step": 2181120
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4280171230565731e-05,
      "loss": 3.8684,
      "step": 2181632
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4271801661858942e-05,
      "loss": 3.8629,
      "step": 2182144
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.4263415714348422e-05,
      "loss": 3.8625,
      "step": 2182656
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4255029766837902e-05,
      "loss": 3.8711,
      "step": 2183168
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4246643819327382e-05,
      "loss": 3.863,
      "step": 2183680
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4238257871816863e-05,
      "loss": 3.8661,
      "step": 2184192
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4229888303110072e-05,
      "loss": 3.8757,
      "step": 2184704
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4221502355599552e-05,
      "loss": 3.8817,
      "step": 2185216
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4213116408089034e-05,
      "loss": 3.8716,
      "step": 2185728
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4204730460578514e-05,
      "loss": 3.8626,
      "step": 2186240
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4196360891871725e-05,
      "loss": 3.8649,
      "step": 2186752
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4187974944361205e-05,
      "loss": 3.8631,
      "step": 2187264
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4179588996850685e-05,
      "loss": 3.8761,
      "step": 2187776
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4171203049340165e-05,
      "loss": 3.8734,
      "step": 2188288
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4162817101829644e-05,
      "loss": 3.8657,
      "step": 2188800
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4154431154319123e-05,
      "loss": 3.8703,
      "step": 2189312
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4146045206808603e-05,
      "loss": 3.8663,
      "step": 2189824
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4137675638101817e-05,
      "loss": 3.8684,
      "step": 2190336
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4129289690591297e-05,
      "loss": 3.8627,
      "step": 2190848
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4120903743080773e-05,
      "loss": 3.8634,
      "step": 2191360
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4112517795570255e-05,
      "loss": 3.8557,
      "step": 2191872
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4104131848059735e-05,
      "loss": 3.8631,
      "step": 2192384
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4095745900549215e-05,
      "loss": 3.8657,
      "step": 2192896
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4087359953038695e-05,
      "loss": 3.8622,
      "step": 2193408
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4078974005528175e-05,
      "loss": 3.8632,
      "step": 2193920
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4070604436821386e-05,
      "loss": 3.8641,
      "step": 2194432
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4062218489310866e-05,
      "loss": 3.8588,
      "step": 2194944
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4053848920604076e-05,
      "loss": 3.8767,
      "step": 2195456
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4045462973093556e-05,
      "loss": 3.8804,
      "step": 2195968
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4037077025583036e-05,
      "loss": 3.8705,
      "step": 2196480
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4028691078072518e-05,
      "loss": 3.8705,
      "step": 2196992
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4020305130561998e-05,
      "loss": 3.8663,
      "step": 2197504
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4011935561855207e-05,
      "loss": 3.867,
      "step": 2198016
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4003549614344689e-05,
      "loss": 3.8711,
      "step": 2198528
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3995163666834169e-05,
      "loss": 3.8549,
      "step": 2199040
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3986777719323649e-05,
      "loss": 3.8671,
      "step": 2199552
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3978391771813127e-05,
      "loss": 3.8665,
      "step": 2200064
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3970005824302607e-05,
      "loss": 3.8738,
      "step": 2200576
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3961619876792087e-05,
      "loss": 3.8661,
      "step": 2201088
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.39532503080853e-05,
      "loss": 3.8704,
      "step": 2201600
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3944864360574777e-05,
      "loss": 3.8675,
      "step": 2202112
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3936478413064257e-05,
      "loss": 3.8596,
      "step": 2202624
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3928092465553737e-05,
      "loss": 3.8717,
      "step": 2203136
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3919722896846952e-05,
      "loss": 3.8723,
      "step": 2203648
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3911336949336428e-05,
      "loss": 3.876,
      "step": 2204160
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.390295100182591e-05,
      "loss": 3.8653,
      "step": 2204672
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.389456505431539e-05,
      "loss": 3.8814,
      "step": 2205184
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.388617910680487e-05,
      "loss": 3.869,
      "step": 2205696
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.387780953809808e-05,
      "loss": 3.8755,
      "step": 2206208
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.386942359058756e-05,
      "loss": 3.8741,
      "step": 2206720
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.386103764307704e-05,
      "loss": 3.8667,
      "step": 2207232
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.385265169556652e-05,
      "loss": 3.8636,
      "step": 2207744
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3844265748056002e-05,
      "loss": 3.8825,
      "step": 2208256
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3835879800545482e-05,
      "loss": 3.8591,
      "step": 2208768
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3827493853034959e-05,
      "loss": 3.8662,
      "step": 2209280
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.381910790552444e-05,
      "loss": 3.872,
      "step": 2209792
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.381072195801392e-05,
      "loss": 3.873,
      "step": 2210304
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3802352389307133e-05,
      "loss": 3.8662,
      "step": 2210816
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3793966441796611e-05,
      "loss": 3.868,
      "step": 2211328
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.378558049428609e-05,
      "loss": 3.8695,
      "step": 2211840
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3777210925579303e-05,
      "loss": 3.8762,
      "step": 2212352
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3768824978068782e-05,
      "loss": 3.8735,
      "step": 2212864
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.9795851707458496,
      "eval_runtime": 310.6169,
      "eval_samples_per_second": 1228.494,
      "eval_steps_per_second": 38.391,
      "step": 2213267
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3760455409361994e-05,
      "loss": 3.8622,
      "step": 2213376
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3752069461851474e-05,
      "loss": 3.8564,
      "step": 2213888
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3743683514340954e-05,
      "loss": 3.8659,
      "step": 2214400
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3735297566830432e-05,
      "loss": 3.8766,
      "step": 2214912
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3726911619319912e-05,
      "loss": 3.8732,
      "step": 2215424
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3718525671809392e-05,
      "loss": 3.8676,
      "step": 2215936
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3710139724298874e-05,
      "loss": 3.8707,
      "step": 2216448
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3701753776788354e-05,
      "loss": 3.8693,
      "step": 2216960
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3693367829277834e-05,
      "loss": 3.859,
      "step": 2217472
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3684998260571045e-05,
      "loss": 3.8594,
      "step": 2217984
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3676612313060524e-05,
      "loss": 3.8738,
      "step": 2218496
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3668226365550004e-05,
      "loss": 3.874,
      "step": 2219008
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3659856796843215e-05,
      "loss": 3.878,
      "step": 2219520
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3651470849332695e-05,
      "loss": 3.8468,
      "step": 2220032
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3643084901822175e-05,
      "loss": 3.8679,
      "step": 2220544
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3634698954311657e-05,
      "loss": 3.8566,
      "step": 2221056
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3626313006801137e-05,
      "loss": 3.8691,
      "step": 2221568
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3617927059290613e-05,
      "loss": 3.8661,
      "step": 2222080
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3609541111780095e-05,
      "loss": 3.8639,
      "step": 2222592
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3601155164269575e-05,
      "loss": 3.8663,
      "step": 2223104
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3592769216759055e-05,
      "loss": 3.8831,
      "step": 2223616
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3584399648052266e-05,
      "loss": 3.8676,
      "step": 2224128
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3576013700541746e-05,
      "loss": 3.8703,
      "step": 2224640
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3567627753031226e-05,
      "loss": 3.8631,
      "step": 2225152
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3559241805520706e-05,
      "loss": 3.8739,
      "step": 2225664
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3550872236813916e-05,
      "loss": 3.8678,
      "step": 2226176
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3542486289303396e-05,
      "loss": 3.8578,
      "step": 2226688
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3534100341792876e-05,
      "loss": 3.8666,
      "step": 2227200
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3525714394282358e-05,
      "loss": 3.863,
      "step": 2227712
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.3517328446771838e-05,
      "loss": 3.8621,
      "step": 2228224
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3508958878065049e-05,
      "loss": 3.867,
      "step": 2228736
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3500572930554529e-05,
      "loss": 3.8692,
      "step": 2229248
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3492186983044009e-05,
      "loss": 3.8705,
      "step": 2229760
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3483801035533488e-05,
      "loss": 3.8703,
      "step": 2230272
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3475415088022968e-05,
      "loss": 3.8699,
      "step": 2230784
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.346704551931618e-05,
      "loss": 3.8668,
      "step": 2231296
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.345865957180566e-05,
      "loss": 3.8739,
      "step": 2231808
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3450273624295139e-05,
      "loss": 3.8577,
      "step": 2232320
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3441887676784617e-05,
      "loss": 3.8582,
      "step": 2232832
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3433501729274097e-05,
      "loss": 3.8657,
      "step": 2233344
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3425115781763577e-05,
      "loss": 3.8592,
      "step": 2233856
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3416729834253059e-05,
      "loss": 3.8662,
      "step": 2234368
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3408343886742539e-05,
      "loss": 3.8617,
      "step": 2234880
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.339997431803575e-05,
      "loss": 3.8675,
      "step": 2235392
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.339158837052523e-05,
      "loss": 3.8715,
      "step": 2235904
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.338320242301471e-05,
      "loss": 3.869,
      "step": 2236416
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.337481647550419e-05,
      "loss": 3.8703,
      "step": 2236928
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.33664469067974e-05,
      "loss": 3.8669,
      "step": 2237440
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.335806095928688e-05,
      "loss": 3.8451,
      "step": 2237952
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.334967501177636e-05,
      "loss": 3.8713,
      "step": 2238464
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3341305443069571e-05,
      "loss": 3.8639,
      "step": 2238976
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3332919495559051e-05,
      "loss": 3.8679,
      "step": 2239488
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3324533548048531e-05,
      "loss": 3.8624,
      "step": 2240000
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3316147600538013e-05,
      "loss": 3.859,
      "step": 2240512
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3307761653027493e-05,
      "loss": 3.8542,
      "step": 2241024
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3299375705516973e-05,
      "loss": 3.8566,
      "step": 2241536
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.329098975800645e-05,
      "loss": 3.8631,
      "step": 2242048
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3282620189299663e-05,
      "loss": 3.8505,
      "step": 2242560
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3274234241789143e-05,
      "loss": 3.877,
      "step": 2243072
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3265848294278621e-05,
      "loss": 3.8682,
      "step": 2243584
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3257462346768101e-05,
      "loss": 3.8486,
      "step": 2244096
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3249076399257581e-05,
      "loss": 3.8769,
      "step": 2244608
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3240690451747061e-05,
      "loss": 3.8534,
      "step": 2245120
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3232304504236543e-05,
      "loss": 3.8522,
      "step": 2245632
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3223918556726023e-05,
      "loss": 3.8647,
      "step": 2246144
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3215548988019234e-05,
      "loss": 3.8658,
      "step": 2246656
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3207163040508714e-05,
      "loss": 3.8545,
      "step": 2247168
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3198777092998194e-05,
      "loss": 3.8695,
      "step": 2247680
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3190391145487674e-05,
      "loss": 3.8473,
      "step": 2248192
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3182005197977154e-05,
      "loss": 3.8536,
      "step": 2248704
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3173619250466632e-05,
      "loss": 3.8732,
      "step": 2249216
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3165249681759844e-05,
      "loss": 3.8591,
      "step": 2249728
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3156863734249324e-05,
      "loss": 3.8703,
      "step": 2250240
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3148477786738803e-05,
      "loss": 3.8633,
      "step": 2250752
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3140091839228283e-05,
      "loss": 3.8816,
      "step": 2251264
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3131705891717762e-05,
      "loss": 3.8554,
      "step": 2251776
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3123336323010977e-05,
      "loss": 3.8708,
      "step": 2252288
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3114950375500453e-05,
      "loss": 3.8621,
      "step": 2252800
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3106564427989935e-05,
      "loss": 3.8592,
      "step": 2253312
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3098178480479415e-05,
      "loss": 3.8655,
      "step": 2253824
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3089792532968895e-05,
      "loss": 3.8767,
      "step": 2254336
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3081406585458375e-05,
      "loss": 3.8636,
      "step": 2254848
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3073020637947855e-05,
      "loss": 3.8789,
      "step": 2255360
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3064634690437336e-05,
      "loss": 3.8517,
      "step": 2255872
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3056265121730545e-05,
      "loss": 3.851,
      "step": 2256384
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3047879174220027e-05,
      "loss": 3.8561,
      "step": 2256896
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3039509605513236e-05,
      "loss": 3.8625,
      "step": 2257408
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3031123658002716e-05,
      "loss": 3.8639,
      "step": 2257920
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3022737710492198e-05,
      "loss": 3.8616,
      "step": 2258432
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.3014351762981678e-05,
      "loss": 3.8589,
      "step": 2258944
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3005965815471158e-05,
      "loss": 3.8632,
      "step": 2259456
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2997579867960636e-05,
      "loss": 3.8617,
      "step": 2259968
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2989193920450116e-05,
      "loss": 3.8594,
      "step": 2260480
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2980824351743328e-05,
      "loss": 3.8733,
      "step": 2260992
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2972438404232808e-05,
      "loss": 3.8743,
      "step": 2261504
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2964052456722287e-05,
      "loss": 3.8717,
      "step": 2262016
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2955666509211767e-05,
      "loss": 3.8551,
      "step": 2262528
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2947280561701247e-05,
      "loss": 3.8613,
      "step": 2263040
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2938894614190728e-05,
      "loss": 3.8615,
      "step": 2263552
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2930508666680208e-05,
      "loss": 3.8689,
      "step": 2264064
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2922122719169688e-05,
      "loss": 3.8687,
      "step": 2264576
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2913753150462899e-05,
      "loss": 3.8613,
      "step": 2265088
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2905367202952379e-05,
      "loss": 3.8646,
      "step": 2265600
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2896981255441859e-05,
      "loss": 3.8665,
      "step": 2266112
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2888595307931339e-05,
      "loss": 3.8647,
      "step": 2266624
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.288022573922455e-05,
      "loss": 3.8573,
      "step": 2267136
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.287185617051776e-05,
      "loss": 3.8592,
      "step": 2267648
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.286347022300724e-05,
      "loss": 3.8492,
      "step": 2268160
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.285508427549672e-05,
      "loss": 3.8614,
      "step": 2268672
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.28466983279862e-05,
      "loss": 3.8583,
      "step": 2269184
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2838328759279411e-05,
      "loss": 3.8596,
      "step": 2269696
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2829942811768891e-05,
      "loss": 3.8596,
      "step": 2270208
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2821556864258371e-05,
      "loss": 3.8631,
      "step": 2270720
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2813170916747853e-05,
      "loss": 3.8531,
      "step": 2271232
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2804784969237332e-05,
      "loss": 3.8729,
      "step": 2271744
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2796399021726812e-05,
      "loss": 3.8739,
      "step": 2272256
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.278801307421629e-05,
      "loss": 3.8677,
      "step": 2272768
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.277962712670577e-05,
      "loss": 3.8675,
      "step": 2273280
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2771257557998983e-05,
      "loss": 3.8632,
      "step": 2273792
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2762871610488461e-05,
      "loss": 3.861,
      "step": 2274304
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2754485662977941e-05,
      "loss": 3.8672,
      "step": 2274816
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2746099715467421e-05,
      "loss": 3.8492,
      "step": 2275328
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2737713767956901e-05,
      "loss": 3.8657,
      "step": 2275840
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2729344199250112e-05,
      "loss": 3.8558,
      "step": 2276352
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2720958251739592e-05,
      "loss": 3.873,
      "step": 2276864
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2712588683032806e-05,
      "loss": 3.865,
      "step": 2277376
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2704202735522286e-05,
      "loss": 3.8658,
      "step": 2277888
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2695816788011763e-05,
      "loss": 3.859,
      "step": 2278400
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2687430840501244e-05,
      "loss": 3.8564,
      "step": 2278912
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2679044892990724e-05,
      "loss": 3.8662,
      "step": 2279424
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2670658945480204e-05,
      "loss": 3.8714,
      "step": 2279936
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2662272997969684e-05,
      "loss": 3.8749,
      "step": 2280448
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2653887050459164e-05,
      "loss": 3.8591,
      "step": 2280960
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2645517481752375e-05,
      "loss": 3.8765,
      "step": 2281472
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2637131534241855e-05,
      "loss": 3.8642,
      "step": 2281984
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2628745586731337e-05,
      "loss": 3.8721,
      "step": 2282496
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2620359639220817e-05,
      "loss": 3.8712,
      "step": 2283008
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2611990070514026e-05,
      "loss": 3.8619,
      "step": 2283520
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2603604123003507e-05,
      "loss": 3.8591,
      "step": 2284032
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2595218175492987e-05,
      "loss": 3.8775,
      "step": 2284544
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2586832227982467e-05,
      "loss": 3.8576,
      "step": 2285056
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2578446280471945e-05,
      "loss": 3.8586,
      "step": 2285568
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2570076711765158e-05,
      "loss": 3.8713,
      "step": 2286080
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2561690764254638e-05,
      "loss": 3.8691,
      "step": 2286592
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2553304816744116e-05,
      "loss": 3.8616,
      "step": 2287104
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2544918869233596e-05,
      "loss": 3.8643,
      "step": 2287616
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2536532921723076e-05,
      "loss": 3.8644,
      "step": 2288128
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.252816335301629e-05,
      "loss": 3.8727,
      "step": 2288640
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2519777405505767e-05,
      "loss": 3.8684,
      "step": 2289152
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.9781768321990967,
      "eval_runtime": 311.7696,
      "eval_samples_per_second": 1223.952,
      "eval_steps_per_second": 38.249,
      "step": 2289587
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2511391457995247e-05,
      "loss": 3.8478,
      "step": 2289664
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2503005510484728e-05,
      "loss": 3.8528,
      "step": 2290176
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2494619562974208e-05,
      "loss": 3.8631,
      "step": 2290688
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2486233615463688e-05,
      "loss": 3.875,
      "step": 2291200
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2477847667953167e-05,
      "loss": 3.8687,
      "step": 2291712
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2469461720442648e-05,
      "loss": 3.8654,
      "step": 2292224
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2461075772932128e-05,
      "loss": 3.8645,
      "step": 2292736
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2452689825421606e-05,
      "loss": 3.8641,
      "step": 2293248
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2444303877911086e-05,
      "loss": 3.8551,
      "step": 2293760
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2435917930400568e-05,
      "loss": 3.8618,
      "step": 2294272
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2427531982890048e-05,
      "loss": 3.8613,
      "step": 2294784
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2419146035379526e-05,
      "loss": 3.8763,
      "step": 2295296
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2410776466672739e-05,
      "loss": 3.8745,
      "step": 2295808
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2402390519162219e-05,
      "loss": 3.8431,
      "step": 2296320
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2394004571651697e-05,
      "loss": 3.8657,
      "step": 2296832
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2385618624141179e-05,
      "loss": 3.8509,
      "step": 2297344
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2377232676630659e-05,
      "loss": 3.8658,
      "step": 2297856
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2368846729120139e-05,
      "loss": 3.8578,
      "step": 2298368
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2360460781609617e-05,
      "loss": 3.8651,
      "step": 2298880
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.235209121290283e-05,
      "loss": 3.8573,
      "step": 2299392
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.234370526539231e-05,
      "loss": 3.8801,
      "step": 2299904
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2335319317881788e-05,
      "loss": 3.8627,
      "step": 2300416
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.232693337037127e-05,
      "loss": 3.8681,
      "step": 2300928
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2318547422860749e-05,
      "loss": 3.86,
      "step": 2301440
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2310161475350229e-05,
      "loss": 3.8735,
      "step": 2301952
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2301775527839707e-05,
      "loss": 3.863,
      "step": 2302464
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2293389580329189e-05,
      "loss": 3.8537,
      "step": 2302976
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2285003632818669e-05,
      "loss": 3.8604,
      "step": 2303488
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2276617685308147e-05,
      "loss": 3.8644,
      "step": 2304000
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.226824811660136e-05,
      "loss": 3.8574,
      "step": 2304512
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.225986216909084e-05,
      "loss": 3.8625,
      "step": 2305024
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2251476221580318e-05,
      "loss": 3.8616,
      "step": 2305536
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.22430902740698e-05,
      "loss": 3.8675,
      "step": 2306048
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.223470432655928e-05,
      "loss": 3.8656,
      "step": 2306560
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.222631837904876e-05,
      "loss": 3.8699,
      "step": 2307072
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.221794881034197e-05,
      "loss": 3.8647,
      "step": 2307584
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.220956286283145e-05,
      "loss": 3.8736,
      "step": 2308096
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.220117691532093e-05,
      "loss": 3.8503,
      "step": 2308608
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.219279096781041e-05,
      "loss": 3.8596,
      "step": 2309120
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.218440502029989e-05,
      "loss": 3.8612,
      "step": 2309632
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.217601907278937e-05,
      "loss": 3.8516,
      "step": 2310144
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.216764950408258e-05,
      "loss": 3.8645,
      "step": 2310656
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.215926355657206e-05,
      "loss": 3.8624,
      "step": 2311168
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.215087760906154e-05,
      "loss": 3.8595,
      "step": 2311680
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.214249166155102e-05,
      "loss": 3.8672,
      "step": 2312192
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.21341057140405e-05,
      "loss": 3.865,
      "step": 2312704
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.212571976652998e-05,
      "loss": 3.8661,
      "step": 2313216
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.211733381901946e-05,
      "loss": 3.8648,
      "step": 2313728
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.210894787150894e-05,
      "loss": 3.8446,
      "step": 2314240
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2100578302802151e-05,
      "loss": 3.8643,
      "step": 2314752
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2092192355291631e-05,
      "loss": 3.8639,
      "step": 2315264
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2083806407781111e-05,
      "loss": 3.8655,
      "step": 2315776
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2075420460270591e-05,
      "loss": 3.8547,
      "step": 2316288
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2067050891563802e-05,
      "loss": 3.8567,
      "step": 2316800
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2058681322857014e-05,
      "loss": 3.8542,
      "step": 2317312
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2050295375346494e-05,
      "loss": 3.853,
      "step": 2317824
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2041909427835973e-05,
      "loss": 3.8606,
      "step": 2318336
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2033523480325454e-05,
      "loss": 3.8432,
      "step": 2318848
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2025137532814934e-05,
      "loss": 3.8737,
      "step": 2319360
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2016751585304414e-05,
      "loss": 3.8677,
      "step": 2319872
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2008365637793892e-05,
      "loss": 3.8417,
      "step": 2320384
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1999979690283374e-05,
      "loss": 3.8728,
      "step": 2320896
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1991610121576585e-05,
      "loss": 3.8464,
      "step": 2321408
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1983224174066065e-05,
      "loss": 3.8531,
      "step": 2321920
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1974838226555545e-05,
      "loss": 3.8534,
      "step": 2322432
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1966452279045025e-05,
      "loss": 3.8616,
      "step": 2322944
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1958066331534505e-05,
      "loss": 3.8519,
      "step": 2323456
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1949680384023985e-05,
      "loss": 3.8661,
      "step": 2323968
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1941294436513465e-05,
      "loss": 3.8401,
      "step": 2324480
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1932908489002945e-05,
      "loss": 3.8524,
      "step": 2324992
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1924538920296155e-05,
      "loss": 3.8662,
      "step": 2325504
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1916152972785635e-05,
      "loss": 3.8568,
      "step": 2326016
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1907767025275115e-05,
      "loss": 3.8668,
      "step": 2326528
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1899397456568328e-05,
      "loss": 3.8635,
      "step": 2327040
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1891011509057806e-05,
      "loss": 3.8755,
      "step": 2327552
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1882625561547286e-05,
      "loss": 3.8517,
      "step": 2328064
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1874239614036766e-05,
      "loss": 3.8715,
      "step": 2328576
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1865853666526246e-05,
      "loss": 3.8533,
      "step": 2329088
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1857484097819457e-05,
      "loss": 3.8573,
      "step": 2329600
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1849098150308938e-05,
      "loss": 3.8618,
      "step": 2330112
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1840712202798418e-05,
      "loss": 3.8705,
      "step": 2330624
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1832326255287897e-05,
      "loss": 3.8638,
      "step": 2331136
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1823940307777377e-05,
      "loss": 3.8741,
      "step": 2331648
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1815554360266858e-05,
      "loss": 3.8495,
      "step": 2332160
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1807168412756336e-05,
      "loss": 3.8438,
      "step": 2332672
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1798782465245816e-05,
      "loss": 3.8533,
      "step": 2333184
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1790396517735296e-05,
      "loss": 3.8606,
      "step": 2333696
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1782026949028509e-05,
      "loss": 3.8589,
      "step": 2334208
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1773641001517987e-05,
      "loss": 3.8583,
      "step": 2334720
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.1765255054007469e-05,
      "loss": 3.858,
      "step": 2335232
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.175688548530068e-05,
      "loss": 3.8577,
      "step": 2335744
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.174849953779016e-05,
      "loss": 3.8582,
      "step": 2336256
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.174011359027964e-05,
      "loss": 3.8541,
      "step": 2336768
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.173172764276912e-05,
      "loss": 3.868,
      "step": 2337280
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.17233416952586e-05,
      "loss": 3.8732,
      "step": 2337792
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.171497212655181e-05,
      "loss": 3.8666,
      "step": 2338304
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.170658617904129e-05,
      "loss": 3.8521,
      "step": 2338816
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.169820023153077e-05,
      "loss": 3.8589,
      "step": 2339328
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.168981428402025e-05,
      "loss": 3.8571,
      "step": 2339840
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.168142833650973e-05,
      "loss": 3.8659,
      "step": 2340352
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.167304238899921e-05,
      "loss": 3.8691,
      "step": 2340864
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.166465644148869e-05,
      "loss": 3.8522,
      "step": 2341376
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.165627049397817e-05,
      "loss": 3.8619,
      "step": 2341888
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1647917304075111e-05,
      "loss": 3.8631,
      "step": 2342400
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1639531356564593e-05,
      "loss": 3.861,
      "step": 2342912
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1631145409054073e-05,
      "loss": 3.8556,
      "step": 2343424
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1622775840347284e-05,
      "loss": 3.857,
      "step": 2343936
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1614389892836764e-05,
      "loss": 3.845,
      "step": 2344448
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1606003945326244e-05,
      "loss": 3.8563,
      "step": 2344960
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1597617997815722e-05,
      "loss": 3.8525,
      "step": 2345472
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1589232050305204e-05,
      "loss": 3.8559,
      "step": 2345984
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1580846102794684e-05,
      "loss": 3.8542,
      "step": 2346496
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1572460155284164e-05,
      "loss": 3.8584,
      "step": 2347008
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1564074207773642e-05,
      "loss": 3.8551,
      "step": 2347520
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1555688260263124e-05,
      "loss": 3.8645,
      "step": 2348032
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1547302312752603e-05,
      "loss": 3.8691,
      "step": 2348544
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1538916365242082e-05,
      "loss": 3.8677,
      "step": 2349056
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1530530417731562e-05,
      "loss": 3.8658,
      "step": 2349568
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1522160849024774e-05,
      "loss": 3.8544,
      "step": 2350080
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1513791280317985e-05,
      "loss": 3.8606,
      "step": 2350592
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1505405332807465e-05,
      "loss": 3.8637,
      "step": 2351104
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1497019385296945e-05,
      "loss": 3.845,
      "step": 2351616
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1488633437786425e-05,
      "loss": 3.861,
      "step": 2352128
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1480247490275905e-05,
      "loss": 3.8552,
      "step": 2352640
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1471861542765385e-05,
      "loss": 3.8691,
      "step": 2353152
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1463475595254865e-05,
      "loss": 3.8603,
      "step": 2353664
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1455089647744345e-05,
      "loss": 3.8645,
      "step": 2354176
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1446720079037555e-05,
      "loss": 3.8552,
      "step": 2354688
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1438334131527035e-05,
      "loss": 3.8548,
      "step": 2355200
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1429948184016515e-05,
      "loss": 3.8655,
      "step": 2355712
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1421578615309728e-05,
      "loss": 3.8636,
      "step": 2356224
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1413192667799206e-05,
      "loss": 3.8712,
      "step": 2356736
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1404806720288686e-05,
      "loss": 3.8573,
      "step": 2357248
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1396420772778168e-05,
      "loss": 3.8698,
      "step": 2357760
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1388034825267646e-05,
      "loss": 3.8639,
      "step": 2358272
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1379648877757126e-05,
      "loss": 3.8642,
      "step": 2358784
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1371262930246606e-05,
      "loss": 3.869,
      "step": 2359296
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1362876982736086e-05,
      "loss": 3.8572,
      "step": 2359808
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1354507414029297e-05,
      "loss": 3.8619,
      "step": 2360320
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1346121466518778e-05,
      "loss": 3.8728,
      "step": 2360832
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1337751897811989e-05,
      "loss": 3.8551,
      "step": 2361344
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1329365950301469e-05,
      "loss": 3.8544,
      "step": 2361856
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1320980002790949e-05,
      "loss": 3.8627,
      "step": 2362368
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1312594055280429e-05,
      "loss": 3.8622,
      "step": 2362880
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1304208107769909e-05,
      "loss": 3.8581,
      "step": 2363392
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1295822160259389e-05,
      "loss": 3.8627,
      "step": 2363904
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1287436212748869e-05,
      "loss": 3.8558,
      "step": 2364416
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1279083022845812e-05,
      "loss": 3.8749,
      "step": 2364928
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1270697075335292e-05,
      "loss": 3.86,
      "step": 2365440
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.977307081222534,
      "eval_runtime": 286.2031,
      "eval_samples_per_second": 1333.288,
      "eval_steps_per_second": 41.666,
      "step": 2365907
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.126231112782477e-05,
      "loss": 3.8531,
      "step": 2365952
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.125392518031425e-05,
      "loss": 3.8556,
      "step": 2366464
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1245539232803732e-05,
      "loss": 3.8511,
      "step": 2366976
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.123715328529321e-05,
      "loss": 3.8711,
      "step": 2367488
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.122876733778269e-05,
      "loss": 3.8623,
      "step": 2368000
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.122038139027217e-05,
      "loss": 3.8657,
      "step": 2368512
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.121199544276165e-05,
      "loss": 3.8582,
      "step": 2369024
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.120360949525113e-05,
      "loss": 3.8609,
      "step": 2369536
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.119522354774061e-05,
      "loss": 3.85,
      "step": 2370048
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.118683760023009e-05,
      "loss": 3.8591,
      "step": 2370560
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.11784680315233e-05,
      "loss": 3.8595,
      "step": 2371072
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1170098462816513e-05,
      "loss": 3.8751,
      "step": 2371584
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1161728894109724e-05,
      "loss": 3.8687,
      "step": 2372096
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1153342946599204e-05,
      "loss": 3.8412,
      "step": 2372608
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1144956999088684e-05,
      "loss": 3.8586,
      "step": 2373120
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1136571051578164e-05,
      "loss": 3.8509,
      "step": 2373632
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1128185104067644e-05,
      "loss": 3.8605,
      "step": 2374144
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1119799156557124e-05,
      "loss": 3.8533,
      "step": 2374656
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1111413209046604e-05,
      "loss": 3.8591,
      "step": 2375168
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1103027261536084e-05,
      "loss": 3.8536,
      "step": 2375680
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1094641314025564e-05,
      "loss": 3.8749,
      "step": 2376192
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1086255366515044e-05,
      "loss": 3.8625,
      "step": 2376704
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1077869419004524e-05,
      "loss": 3.8616,
      "step": 2377216
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1069499850297734e-05,
      "loss": 3.8561,
      "step": 2377728
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1061113902787214e-05,
      "loss": 3.8716,
      "step": 2378240
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1052727955276694e-05,
      "loss": 3.8569,
      "step": 2378752
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1044342007766174e-05,
      "loss": 3.8498,
      "step": 2379264
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1035956060255652e-05,
      "loss": 3.8614,
      "step": 2379776
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1027570112745134e-05,
      "loss": 3.8556,
      "step": 2380288
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.1019200544038345e-05,
      "loss": 3.8516,
      "step": 2380800
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.1010814596527825e-05,
      "loss": 3.8602,
      "step": 2381312
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.1002428649017305e-05,
      "loss": 3.8555,
      "step": 2381824
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0994042701506785e-05,
      "loss": 3.8646,
      "step": 2382336
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0985656753996265e-05,
      "loss": 3.8641,
      "step": 2382848
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0977270806485745e-05,
      "loss": 3.8661,
      "step": 2383360
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0968901237778955e-05,
      "loss": 3.8612,
      "step": 2383872
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0960515290268435e-05,
      "loss": 3.8679,
      "step": 2384384
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0952129342757917e-05,
      "loss": 3.849,
      "step": 2384896
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0943743395247395e-05,
      "loss": 3.8561,
      "step": 2385408
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0935357447736875e-05,
      "loss": 3.8569,
      "step": 2385920
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0926971500226355e-05,
      "loss": 3.8497,
      "step": 2386432
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0918585552715835e-05,
      "loss": 3.8607,
      "step": 2386944
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0910199605205315e-05,
      "loss": 3.8602,
      "step": 2387456
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0901830036498526e-05,
      "loss": 3.8516,
      "step": 2387968
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0893460467791738e-05,
      "loss": 3.8631,
      "step": 2388480
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0885074520281217e-05,
      "loss": 3.8623,
      "step": 2388992
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0876688572770698e-05,
      "loss": 3.8618,
      "step": 2389504
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0868302625260178e-05,
      "loss": 3.8579,
      "step": 2390016
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0859916677749658e-05,
      "loss": 3.8413,
      "step": 2390528
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0851547109042869e-05,
      "loss": 3.8602,
      "step": 2391040
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0843161161532349e-05,
      "loss": 3.8642,
      "step": 2391552
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0834775214021829e-05,
      "loss": 3.8594,
      "step": 2392064
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0826389266511309e-05,
      "loss": 3.8528,
      "step": 2392576
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0818003319000789e-05,
      "loss": 3.8513,
      "step": 2393088
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0809617371490269e-05,
      "loss": 3.847,
      "step": 2393600
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0801231423979749e-05,
      "loss": 3.8477,
      "step": 2394112
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0792845476469229e-05,
      "loss": 3.8568,
      "step": 2394624
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.078449228656617e-05,
      "loss": 3.8447,
      "step": 2395136
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0776106339055652e-05,
      "loss": 3.8647,
      "step": 2395648
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0767720391545132e-05,
      "loss": 3.8622,
      "step": 2396160
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.075933444403461e-05,
      "loss": 3.8468,
      "step": 2396672
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.075094849652409e-05,
      "loss": 3.8667,
      "step": 2397184
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0742562549013572e-05,
      "loss": 3.8422,
      "step": 2397696
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.073417660150305e-05,
      "loss": 3.8505,
      "step": 2398208
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.072579065399253e-05,
      "loss": 3.8501,
      "step": 2398720
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0717421085285743e-05,
      "loss": 3.8621,
      "step": 2399232
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0709035137775223e-05,
      "loss": 3.8459,
      "step": 2399744
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.07006491902647e-05,
      "loss": 3.8632,
      "step": 2400256
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0692263242754182e-05,
      "loss": 3.8369,
      "step": 2400768
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0683877295243662e-05,
      "loss": 3.8505,
      "step": 2401280
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.067549134773314e-05,
      "loss": 3.8636,
      "step": 2401792
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.066710540022262e-05,
      "loss": 3.855,
      "step": 2402304
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0658719452712102e-05,
      "loss": 3.8581,
      "step": 2402816
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0650349884005313e-05,
      "loss": 3.8633,
      "step": 2403328
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0641963936494791e-05,
      "loss": 3.871,
      "step": 2403840
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0633577988984273e-05,
      "loss": 3.8506,
      "step": 2404352
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0625192041473753e-05,
      "loss": 3.8687,
      "step": 2404864
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0616822472766964e-05,
      "loss": 3.8493,
      "step": 2405376
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0608436525256444e-05,
      "loss": 3.8558,
      "step": 2405888
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0600050577745924e-05,
      "loss": 3.8551,
      "step": 2406400
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0591664630235404e-05,
      "loss": 3.8635,
      "step": 2406912
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0583278682724884e-05,
      "loss": 3.8629,
      "step": 2407424
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0574892735214363e-05,
      "loss": 3.8701,
      "step": 2407936
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0566506787703843e-05,
      "loss": 3.8503,
      "step": 2408448
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0558120840193322e-05,
      "loss": 3.844,
      "step": 2408960
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0549734892682803e-05,
      "loss": 3.8483,
      "step": 2409472
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0541381702779745e-05,
      "loss": 3.8577,
      "step": 2409984
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0532995755269227e-05,
      "loss": 3.8531,
      "step": 2410496
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0524609807758705e-05,
      "loss": 3.8539,
      "step": 2411008
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.0516223860248185e-05,
      "loss": 3.8587,
      "step": 2411520
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0507837912737665e-05,
      "loss": 3.8525,
      "step": 2412032
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0499451965227145e-05,
      "loss": 3.8544,
      "step": 2412544
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0491066017716625e-05,
      "loss": 3.8518,
      "step": 2413056
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0482680070206105e-05,
      "loss": 3.8665,
      "step": 2413568
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0474310501499317e-05,
      "loss": 3.8689,
      "step": 2414080
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0465924553988795e-05,
      "loss": 3.8646,
      "step": 2414592
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0457538606478275e-05,
      "loss": 3.8453,
      "step": 2415104
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0449152658967757e-05,
      "loss": 3.8566,
      "step": 2415616
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0440799469064699e-05,
      "loss": 3.8563,
      "step": 2416128
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0432413521554179e-05,
      "loss": 3.8604,
      "step": 2416640
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0424027574043659e-05,
      "loss": 3.8642,
      "step": 2417152
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0415641626533138e-05,
      "loss": 3.8497,
      "step": 2417664
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0407255679022618e-05,
      "loss": 3.8582,
      "step": 2418176
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.039888611031583e-05,
      "loss": 3.8616,
      "step": 2418688
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.039050016280531e-05,
      "loss": 3.8573,
      "step": 2419200
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0382114215294791e-05,
      "loss": 3.8533,
      "step": 2419712
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0373744646588e-05,
      "loss": 3.8516,
      "step": 2420224
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.036535869907748e-05,
      "loss": 3.8441,
      "step": 2420736
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0356972751566962e-05,
      "loss": 3.8547,
      "step": 2421248
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.034858680405644e-05,
      "loss": 3.8474,
      "step": 2421760
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.034020085654592e-05,
      "loss": 3.8497,
      "step": 2422272
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.03318149090354e-05,
      "loss": 3.8521,
      "step": 2422784
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0323428961524881e-05,
      "loss": 3.8591,
      "step": 2423296
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.031504301401436e-05,
      "loss": 3.8491,
      "step": 2423808
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.030665706650384e-05,
      "loss": 3.8565,
      "step": 2424320
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0298287497797052e-05,
      "loss": 3.8681,
      "step": 2424832
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.028990155028653e-05,
      "loss": 3.863,
      "step": 2425344
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.028151560277601e-05,
      "loss": 3.8627,
      "step": 2425856
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0273129655265492e-05,
      "loss": 3.8521,
      "step": 2426368
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0264760086558703e-05,
      "loss": 3.856,
      "step": 2426880
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0256374139048181e-05,
      "loss": 3.8562,
      "step": 2427392
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0247988191537663e-05,
      "loss": 3.8439,
      "step": 2427904
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0239602244027143e-05,
      "loss": 3.8572,
      "step": 2428416
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0231216296516621e-05,
      "loss": 3.8514,
      "step": 2428928
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0222830349006103e-05,
      "loss": 3.862,
      "step": 2429440
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0214444401495582e-05,
      "loss": 3.8553,
      "step": 2429952
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0206058453985062e-05,
      "loss": 3.8616,
      "step": 2430464
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0197688885278273e-05,
      "loss": 3.8522,
      "step": 2430976
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0189302937767753e-05,
      "loss": 3.8519,
      "step": 2431488
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0180916990257233e-05,
      "loss": 3.8597,
      "step": 2432000
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0172531042746711e-05,
      "loss": 3.8609,
      "step": 2432512
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0164145095236193e-05,
      "loss": 3.8676,
      "step": 2433024
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0155775526529404e-05,
      "loss": 3.8561,
      "step": 2433536
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0147389579018884e-05,
      "loss": 3.8626,
      "step": 2434048
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0139003631508364e-05,
      "loss": 3.8625,
      "step": 2434560
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0130617683997844e-05,
      "loss": 3.864,
      "step": 2435072
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0122231736487324e-05,
      "loss": 3.8593,
      "step": 2435584
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0113862167780536e-05,
      "loss": 3.8561,
      "step": 2436096
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0105476220270014e-05,
      "loss": 3.86,
      "step": 2436608
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0097090272759494e-05,
      "loss": 3.865,
      "step": 2437120
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0088704325248976e-05,
      "loss": 3.8556,
      "step": 2437632
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0080318377738454e-05,
      "loss": 3.8499,
      "step": 2438144
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0071932430227934e-05,
      "loss": 3.8594,
      "step": 2438656
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0063562861521147e-05,
      "loss": 3.8589,
      "step": 2439168
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0055176914010627e-05,
      "loss": 3.858,
      "step": 2439680
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0046790966500105e-05,
      "loss": 3.8605,
      "step": 2440192
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0038405018989585e-05,
      "loss": 3.8541,
      "step": 2440704
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0030019071479067e-05,
      "loss": 3.8677,
      "step": 2441216
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.0021649502772276e-05,
      "loss": 3.8611,
      "step": 2441728
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.976402521133423,
      "eval_runtime": 285.9742,
      "eval_samples_per_second": 1334.355,
      "eval_steps_per_second": 41.7,
      "step": 2442227
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0013263555261757e-05,
      "loss": 3.8228,
      "step": 2442240
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0004877607751237e-05,
      "loss": 3.8508,
      "step": 2442752
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.996491660240717e-06,
      "loss": 3.8469,
      "step": 2443264
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.988105712730195e-06,
      "loss": 3.8648,
      "step": 2443776
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.979719765219677e-06,
      "loss": 3.8625,
      "step": 2444288
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.971333817709157e-06,
      "loss": 3.8597,
      "step": 2444800
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.962947870198635e-06,
      "loss": 3.8543,
      "step": 2445312
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.954561922688115e-06,
      "loss": 3.8578,
      "step": 2445824
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.946175975177597e-06,
      "loss": 3.8466,
      "step": 2446336
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.937790027667075e-06,
      "loss": 3.8566,
      "step": 2446848
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.929404080156555e-06,
      "loss": 3.8549,
      "step": 2447360
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.921018132646035e-06,
      "loss": 3.8724,
      "step": 2447872
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.912648563939248e-06,
      "loss": 3.8679,
      "step": 2448384
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.904262616428726e-06,
      "loss": 3.8382,
      "step": 2448896
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.895876668918207e-06,
      "loss": 3.8535,
      "step": 2449408
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.887490721407687e-06,
      "loss": 3.8511,
      "step": 2449920
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.879104773897166e-06,
      "loss": 3.8528,
      "step": 2450432
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.870718826386646e-06,
      "loss": 3.8592,
      "step": 2450944
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.862332878876127e-06,
      "loss": 3.8536,
      "step": 2451456
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.853946931365606e-06,
      "loss": 3.8491,
      "step": 2451968
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.845560983855086e-06,
      "loss": 3.8739,
      "step": 2452480
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.837175036344565e-06,
      "loss": 3.8601,
      "step": 2452992
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.828789088834047e-06,
      "loss": 3.8551,
      "step": 2453504
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.820403141323525e-06,
      "loss": 3.8504,
      "step": 2454016
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.812033572616736e-06,
      "loss": 3.869,
      "step": 2454528
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.803647625106218e-06,
      "loss": 3.8548,
      "step": 2455040
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.795261677595696e-06,
      "loss": 3.8483,
      "step": 2455552
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.786875730085176e-06,
      "loss": 3.8557,
      "step": 2456064
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.778489782574658e-06,
      "loss": 3.8497,
      "step": 2456576
    },
    {
      "epoch": 0.0,
      "learning_rate": 9.770120213867868e-06,
      "loss": 3.8518,
      "step": 2457088
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.761734266357347e-06,
      "loss": 3.8557,
      "step": 2457600
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.753348318846828e-06,
      "loss": 3.8537,
      "step": 2458112
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.744962371336308e-06,
      "loss": 3.8605,
      "step": 2458624
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.736576423825787e-06,
      "loss": 3.8572,
      "step": 2459136
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.728206855118999e-06,
      "loss": 3.8635,
      "step": 2459648
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.71983728641221e-06,
      "loss": 3.8584,
      "step": 2460160
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.71145133890169e-06,
      "loss": 3.8644,
      "step": 2460672
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.70306539139117e-06,
      "loss": 3.8469,
      "step": 2461184
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.69467944388065e-06,
      "loss": 3.8559,
      "step": 2461696
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.68629349637013e-06,
      "loss": 3.8469,
      "step": 2462208
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.67790754885961e-06,
      "loss": 3.852,
      "step": 2462720
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.66952160134909e-06,
      "loss": 3.8534,
      "step": 2463232
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.66113565383857e-06,
      "loss": 3.8598,
      "step": 2463744
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.65274970632805e-06,
      "loss": 3.8501,
      "step": 2464256
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.64436375881753e-06,
      "loss": 3.8586,
      "step": 2464768
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.63597781130701e-06,
      "loss": 3.858,
      "step": 2465280
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.62759186379649e-06,
      "loss": 3.8619,
      "step": 2465792
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.619222295089702e-06,
      "loss": 3.8546,
      "step": 2466304
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.61083634757918e-06,
      "loss": 3.8387,
      "step": 2466816
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.60245040006866e-06,
      "loss": 3.8508,
      "step": 2467328
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.59406445255814e-06,
      "loss": 3.8647,
      "step": 2467840
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.585694883851351e-06,
      "loss": 3.8572,
      "step": 2468352
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.57730893634083e-06,
      "loss": 3.8465,
      "step": 2468864
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.568922988830312e-06,
      "loss": 3.8495,
      "step": 2469376
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.560537041319792e-06,
      "loss": 3.8463,
      "step": 2469888
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.552167472613002e-06,
      "loss": 3.8429,
      "step": 2470400
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.543781525102483e-06,
      "loss": 3.861,
      "step": 2470912
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.535411956395694e-06,
      "loss": 3.8398,
      "step": 2471424
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.527026008885174e-06,
      "loss": 3.8591,
      "step": 2471936
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.518640061374654e-06,
      "loss": 3.8578,
      "step": 2472448
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.510254113864134e-06,
      "loss": 3.8453,
      "step": 2472960
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.501868166353614e-06,
      "loss": 3.862,
      "step": 2473472
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.493482218843094e-06,
      "loss": 3.8428,
      "step": 2473984
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.485112650136305e-06,
      "loss": 3.8505,
      "step": 2474496
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.476726702625784e-06,
      "loss": 3.8408,
      "step": 2475008
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.468340755115266e-06,
      "loss": 3.8602,
      "step": 2475520
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.459954807604744e-06,
      "loss": 3.8475,
      "step": 2476032
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.451568860094224e-06,
      "loss": 3.8533,
      "step": 2476544
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.443199291387437e-06,
      "loss": 3.8368,
      "step": 2477056
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.434813343876915e-06,
      "loss": 3.845,
      "step": 2477568
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.426427396366395e-06,
      "loss": 3.8607,
      "step": 2478080
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.418041448855875e-06,
      "loss": 3.8579,
      "step": 2478592
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.409655501345357e-06,
      "loss": 3.8512,
      "step": 2479104
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.401269553834835e-06,
      "loss": 3.8582,
      "step": 2479616
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.392883606324315e-06,
      "loss": 3.868,
      "step": 2480128
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.384514037617527e-06,
      "loss": 3.8475,
      "step": 2480640
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.376128090107006e-06,
      "loss": 3.8664,
      "step": 2481152
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.367742142596486e-06,
      "loss": 3.8439,
      "step": 2481664
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.359356195085967e-06,
      "loss": 3.8539,
      "step": 2482176
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.350986626379178e-06,
      "loss": 3.8549,
      "step": 2482688
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.342600678868656e-06,
      "loss": 3.8574,
      "step": 2483200
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.334214731358138e-06,
      "loss": 3.8589,
      "step": 2483712
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.325828783847618e-06,
      "loss": 3.8677,
      "step": 2484224
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.317459215140829e-06,
      "loss": 3.8519,
      "step": 2484736
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.30908964643404e-06,
      "loss": 3.8343,
      "step": 2485248
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.30070369892352e-06,
      "loss": 3.843,
      "step": 2485760
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.292317751413001e-06,
      "loss": 3.8547,
      "step": 2486272
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.28393180390248e-06,
      "loss": 3.8481,
      "step": 2486784
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.27556223519569e-06,
      "loss": 3.8509,
      "step": 2487296
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.267176287685172e-06,
      "loss": 3.8569,
      "step": 2487808
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.258790340174652e-06,
      "loss": 3.8493,
      "step": 2488320
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.25040439266413e-06,
      "loss": 3.8519,
      "step": 2488832
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.24201844515361e-06,
      "loss": 3.8488,
      "step": 2489344
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.233632497643092e-06,
      "loss": 3.8627,
      "step": 2489856
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.22524655013257e-06,
      "loss": 3.8639,
      "step": 2490368
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.21686060262205e-06,
      "loss": 3.8587,
      "step": 2490880
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.208491033915262e-06,
      "loss": 3.8412,
      "step": 2491392
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.200105086404742e-06,
      "loss": 3.8524,
      "step": 2491904
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.19171913889422e-06,
      "loss": 3.8543,
      "step": 2492416
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.183333191383702e-06,
      "loss": 3.8564,
      "step": 2492928
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.174947243873182e-06,
      "loss": 3.8623,
      "step": 2493440
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.16656129636266e-06,
      "loss": 3.8471,
      "step": 2493952
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.15817534885214e-06,
      "loss": 3.8519,
      "step": 2494464
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.149805780145353e-06,
      "loss": 3.8558,
      "step": 2494976
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.141419832634833e-06,
      "loss": 3.8581,
      "step": 2495488
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.133033885124313e-06,
      "loss": 3.8446,
      "step": 2496000
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.124664316417524e-06,
      "loss": 3.8521,
      "step": 2496512
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.116278368907003e-06,
      "loss": 3.8422,
      "step": 2497024
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.107892421396483e-06,
      "loss": 3.846,
      "step": 2497536
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.099506473885963e-06,
      "loss": 3.8489,
      "step": 2498048
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.091120526375443e-06,
      "loss": 3.8483,
      "step": 2498560
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.082734578864923e-06,
      "loss": 3.8447,
      "step": 2499072
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.074348631354403e-06,
      "loss": 3.8552,
      "step": 2499584
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.065962683843883e-06,
      "loss": 3.8495,
      "step": 2500096
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.057576736333363e-06,
      "loss": 3.8513,
      "step": 2500608
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.049190788822841e-06,
      "loss": 3.8628,
      "step": 2501120
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.040821220116054e-06,
      "loss": 3.8642,
      "step": 2501632
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.032435272605534e-06,
      "loss": 3.8572,
      "step": 2502144
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.024049325095014e-06,
      "loss": 3.8491,
      "step": 2502656
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.015663377584494e-06,
      "loss": 3.8541,
      "step": 2503168
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.007277430073974e-06,
      "loss": 3.8516,
      "step": 2503680
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.998891482563454e-06,
      "loss": 3.8453,
      "step": 2504192
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.990505535052934e-06,
      "loss": 3.8497,
      "step": 2504704
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.982119587542414e-06,
      "loss": 3.8499,
      "step": 2505216
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.973750018835624e-06,
      "loss": 3.8559,
      "step": 2505728
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.965364071325106e-06,
      "loss": 3.8518,
      "step": 2506240
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.956978123814584e-06,
      "loss": 3.8568,
      "step": 2506752
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.948608555107795e-06,
      "loss": 3.8557,
      "step": 2507264
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.940222607597277e-06,
      "loss": 3.8478,
      "step": 2507776
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.931836660086755e-06,
      "loss": 3.8467,
      "step": 2508288
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.923467091379967e-06,
      "loss": 3.867,
      "step": 2508800
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.915081143869447e-06,
      "loss": 3.8567,
      "step": 2509312
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.906695196358927e-06,
      "loss": 3.8576,
      "step": 2509824
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.898309248848406e-06,
      "loss": 3.8555,
      "step": 2510336
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.889923301337887e-06,
      "loss": 3.8613,
      "step": 2510848
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.881537353827367e-06,
      "loss": 3.8603,
      "step": 2511360
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.873151406316846e-06,
      "loss": 3.8598,
      "step": 2511872
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.864765458806325e-06,
      "loss": 3.8506,
      "step": 2512384
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.856379511295807e-06,
      "loss": 3.8561,
      "step": 2512896
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.847993563785287e-06,
      "loss": 3.8615,
      "step": 2513408
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.839623995078498e-06,
      "loss": 3.8519,
      "step": 2513920
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.831238047567978e-06,
      "loss": 3.849,
      "step": 2514432
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.822852100057458e-06,
      "loss": 3.8526,
      "step": 2514944
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.814466152546936e-06,
      "loss": 3.8623,
      "step": 2515456
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.806080205036418e-06,
      "loss": 3.8476,
      "step": 2515968
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.797694257525898e-06,
      "loss": 3.8599,
      "step": 2516480
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.789324688819108e-06,
      "loss": 3.8542,
      "step": 2516992
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.780938741308588e-06,
      "loss": 3.8632,
      "step": 2517504
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.772552793798068e-06,
      "loss": 3.8597,
      "step": 2518016
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.764166846287548e-06,
      "loss": 3.8488,
      "step": 2518528
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.9756507873535156,
      "eval_runtime": 290.1673,
      "eval_samples_per_second": 1315.073,
      "eval_steps_per_second": 41.097,
      "step": 2518547
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.755780898777027e-06,
      "loss": 3.8469,
      "step": 2519040
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.747394951266508e-06,
      "loss": 3.8448,
      "step": 2519552
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.739025382559719e-06,
      "loss": 3.8638,
      "step": 2520064
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.730639435049199e-06,
      "loss": 3.8561,
      "step": 2520576
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.722253487538679e-06,
      "loss": 3.8586,
      "step": 2521088
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.713867540028159e-06,
      "loss": 3.8545,
      "step": 2521600
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.705481592517639e-06,
      "loss": 3.8477,
      "step": 2522112
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.697095645007119e-06,
      "loss": 3.8427,
      "step": 2522624
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.688709697496599e-06,
      "loss": 3.8577,
      "step": 2523136
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.680323749986079e-06,
      "loss": 3.8451,
      "step": 2523648
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.671970560083022e-06,
      "loss": 3.8745,
      "step": 2524160
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.6635846125725e-06,
      "loss": 3.8601,
      "step": 2524672
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.655215043865713e-06,
      "loss": 3.8415,
      "step": 2525184
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.646829096355193e-06,
      "loss": 3.8458,
      "step": 2525696
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.638443148844673e-06,
      "loss": 3.8472,
      "step": 2526208
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.630057201334153e-06,
      "loss": 3.8461,
      "step": 2526720
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.621671253823633e-06,
      "loss": 3.8534,
      "step": 2527232
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.613285306313113e-06,
      "loss": 3.8553,
      "step": 2527744
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.60489935880259e-06,
      "loss": 3.8405,
      "step": 2528256
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.596513411292072e-06,
      "loss": 3.8722,
      "step": 2528768
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.588127463781552e-06,
      "loss": 3.8488,
      "step": 2529280
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.579757895074763e-06,
      "loss": 3.8569,
      "step": 2529792
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.571371947564243e-06,
      "loss": 3.8483,
      "step": 2530304
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.562986000053723e-06,
      "loss": 3.8674,
      "step": 2530816
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.554600052543203e-06,
      "loss": 3.8482,
      "step": 2531328
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.546214105032681e-06,
      "loss": 3.8508,
      "step": 2531840
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.537828157522163e-06,
      "loss": 3.8487,
      "step": 2532352
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.529442210011643e-06,
      "loss": 3.8482,
      "step": 2532864
    },
    {
      "epoch": 1.0,
      "learning_rate": 8.521056262501121e-06,
      "loss": 3.8473,
      "step": 2533376
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.512686693794334e-06,
      "loss": 3.8508,
      "step": 2533888
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.504300746283814e-06,
      "loss": 3.8546,
      "step": 2534400
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.495914798773294e-06,
      "loss": 3.8548,
      "step": 2534912
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.487528851262774e-06,
      "loss": 3.8563,
      "step": 2535424
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.479159282555984e-06,
      "loss": 3.8602,
      "step": 2535936
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.470789713849197e-06,
      "loss": 3.8552,
      "step": 2536448
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.462403766338677e-06,
      "loss": 3.8571,
      "step": 2536960
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.454017818828155e-06,
      "loss": 3.8471,
      "step": 2537472
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.445631871317635e-06,
      "loss": 3.8518,
      "step": 2537984
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.437245923807117e-06,
      "loss": 3.8435,
      "step": 2538496
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.428859976296595e-06,
      "loss": 3.8518,
      "step": 2539008
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.420474028786075e-06,
      "loss": 3.8512,
      "step": 2539520
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.412088081275555e-06,
      "loss": 3.8554,
      "step": 2540032
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.403702133765036e-06,
      "loss": 3.8461,
      "step": 2540544
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.395316186254515e-06,
      "loss": 3.8579,
      "step": 2541056
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.386930238743995e-06,
      "loss": 3.8521,
      "step": 2541568
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.378544291233476e-06,
      "loss": 3.8578,
      "step": 2542080
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.370158343722955e-06,
      "loss": 3.8521,
      "step": 2542592
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.361805153819898e-06,
      "loss": 3.8388,
      "step": 2543104
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.353419206309378e-06,
      "loss": 3.8432,
      "step": 2543616
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.345033258798858e-06,
      "loss": 3.864,
      "step": 2544128
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.336647311288338e-06,
      "loss": 3.851,
      "step": 2544640
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.328261363777818e-06,
      "loss": 3.8437,
      "step": 2545152
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.319875416267298e-06,
      "loss": 3.8406,
      "step": 2545664
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.311489468756776e-06,
      "loss": 3.8503,
      "step": 2546176
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.303103521246258e-06,
      "loss": 3.8314,
      "step": 2546688
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.294733952539468e-06,
      "loss": 3.8618,
      "step": 2547200
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.286348005028948e-06,
      "loss": 3.8325,
      "step": 2547712
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.277978436322159e-06,
      "loss": 3.8565,
      "step": 2548224
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.269592488811639e-06,
      "loss": 3.8556,
      "step": 2548736
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.261206541301119e-06,
      "loss": 3.8404,
      "step": 2549248
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.2528205937906e-06,
      "loss": 3.8558,
      "step": 2549760
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.244434646280079e-06,
      "loss": 3.8424,
      "step": 2550272
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.236048698769559e-06,
      "loss": 3.8472,
      "step": 2550784
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.227662751259039e-06,
      "loss": 3.836,
      "step": 2551296
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.219276803748519e-06,
      "loss": 3.8583,
      "step": 2551808
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.21090723504173e-06,
      "loss": 3.8428,
      "step": 2552320
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.202537666334942e-06,
      "loss": 3.8494,
      "step": 2552832
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.194151718824422e-06,
      "loss": 3.8336,
      "step": 2553344
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.1857657713139e-06,
      "loss": 3.8444,
      "step": 2553856
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.177379823803382e-06,
      "loss": 3.855,
      "step": 2554368
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.169010255096593e-06,
      "loss": 3.8561,
      "step": 2554880
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.160624307586073e-06,
      "loss": 3.8523,
      "step": 2555392
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.152238360075553e-06,
      "loss": 3.8566,
      "step": 2555904
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.143852412565033e-06,
      "loss": 3.8593,
      "step": 2556416
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.135466465054513e-06,
      "loss": 3.8494,
      "step": 2556928
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.127096896347723e-06,
      "loss": 3.8605,
      "step": 2557440
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.118710948837203e-06,
      "loss": 3.8431,
      "step": 2557952
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.110325001326683e-06,
      "loss": 3.8511,
      "step": 2558464
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.101939053816163e-06,
      "loss": 3.8455,
      "step": 2558976
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.093569485109374e-06,
      "loss": 3.8565,
      "step": 2559488
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.085199916402587e-06,
      "loss": 3.8535,
      "step": 2560000
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.076813968892066e-06,
      "loss": 3.863,
      "step": 2560512
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.068428021381545e-06,
      "loss": 3.8536,
      "step": 2561024
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.060042073871026e-06,
      "loss": 3.8297,
      "step": 2561536
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.051656126360506e-06,
      "loss": 3.8402,
      "step": 2562048
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.043270178849986e-06,
      "loss": 3.8514,
      "step": 2562560
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.034884231339465e-06,
      "loss": 3.8476,
      "step": 2563072
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.026498283828946e-06,
      "loss": 3.8463,
      "step": 2563584
    },
    {
      "epoch": 1.01,
      "learning_rate": 8.018112336318426e-06,
      "loss": 3.8555,
      "step": 2564096
    },
    {
      "epoch": 1.02,
      "learning_rate": 8.009726388807904e-06,
      "loss": 3.8423,
      "step": 2564608
    },
    {
      "epoch": 1.02,
      "learning_rate": 8.001340441297384e-06,
      "loss": 3.8481,
      "step": 2565120
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.992970872590597e-06,
      "loss": 3.8492,
      "step": 2565632
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.984601303883808e-06,
      "loss": 3.8603,
      "step": 2566144
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.976215356373288e-06,
      "loss": 3.8579,
      "step": 2566656
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.967829408862768e-06,
      "loss": 3.862,
      "step": 2567168
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.959443461352248e-06,
      "loss": 3.8367,
      "step": 2567680
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.951057513841727e-06,
      "loss": 3.8519,
      "step": 2568192
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.942687945134938e-06,
      "loss": 3.8505,
      "step": 2568704
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.934301997624418e-06,
      "loss": 3.8517,
      "step": 2569216
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.9259160501139e-06,
      "loss": 3.8631,
      "step": 2569728
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.917530102603378e-06,
      "loss": 3.8397,
      "step": 2570240
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.909144155092858e-06,
      "loss": 3.8483,
      "step": 2570752
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.900758207582338e-06,
      "loss": 3.8541,
      "step": 2571264
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.892372260071818e-06,
      "loss": 3.8568,
      "step": 2571776
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.883986312561298e-06,
      "loss": 3.8405,
      "step": 2572288
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.875616743854509e-06,
      "loss": 3.8492,
      "step": 2572800
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.86723079634399e-06,
      "loss": 3.8424,
      "step": 2573312
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.8588612276372e-06,
      "loss": 3.8426,
      "step": 2573824
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.850491658930412e-06,
      "loss": 3.8442,
      "step": 2574336
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.842105711419892e-06,
      "loss": 3.8468,
      "step": 2574848
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.833719763909372e-06,
      "loss": 3.8414,
      "step": 2575360
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.825333816398852e-06,
      "loss": 3.8496,
      "step": 2575872
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.816947868888332e-06,
      "loss": 3.8425,
      "step": 2576384
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.808561921377812e-06,
      "loss": 3.8476,
      "step": 2576896
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.80017597386729e-06,
      "loss": 3.8613,
      "step": 2577408
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.791790026356772e-06,
      "loss": 3.8597,
      "step": 2577920
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.783404078846252e-06,
      "loss": 3.8524,
      "step": 2578432
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.775018131335732e-06,
      "loss": 3.8497,
      "step": 2578944
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.766648562628942e-06,
      "loss": 3.8505,
      "step": 2579456
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.758278993922153e-06,
      "loss": 3.85,
      "step": 2579968
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.749893046411635e-06,
      "loss": 3.8389,
      "step": 2580480
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.741507098901115e-06,
      "loss": 3.8473,
      "step": 2580992
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.733121151390593e-06,
      "loss": 3.8486,
      "step": 2581504
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.724735203880073e-06,
      "loss": 3.8524,
      "step": 2582016
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.716349256369555e-06,
      "loss": 3.8505,
      "step": 2582528
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.707963308859033e-06,
      "loss": 3.8531,
      "step": 2583040
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.699577361348513e-06,
      "loss": 3.8502,
      "step": 2583552
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.691191413837993e-06,
      "loss": 3.8457,
      "step": 2584064
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.682821845131205e-06,
      "loss": 3.8415,
      "step": 2584576
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.674435897620684e-06,
      "loss": 3.8641,
      "step": 2585088
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.666049950110164e-06,
      "loss": 3.8559,
      "step": 2585600
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.657664002599645e-06,
      "loss": 3.8544,
      "step": 2586112
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.649278055089123e-06,
      "loss": 3.8522,
      "step": 2586624
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.640908486382336e-06,
      "loss": 3.8547,
      "step": 2587136
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.632522538871816e-06,
      "loss": 3.8575,
      "step": 2587648
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.624136591361296e-06,
      "loss": 3.8598,
      "step": 2588160
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.615750643850775e-06,
      "loss": 3.85,
      "step": 2588672
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.607364696340255e-06,
      "loss": 3.8551,
      "step": 2589184
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.598978748829736e-06,
      "loss": 3.8543,
      "step": 2589696
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.590592801319214e-06,
      "loss": 3.8492,
      "step": 2590208
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.582206853808695e-06,
      "loss": 3.8478,
      "step": 2590720
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.573837285101906e-06,
      "loss": 3.8539,
      "step": 2591232
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.565451337591386e-06,
      "loss": 3.8563,
      "step": 2591744
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.5570653900808654e-06,
      "loss": 3.8439,
      "step": 2592256
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.548679442570345e-06,
      "loss": 3.858,
      "step": 2592768
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.540293495059826e-06,
      "loss": 3.8505,
      "step": 2593280
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.531923926353036e-06,
      "loss": 3.8571,
      "step": 2593792
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.523554357646248e-06,
      "loss": 3.8562,
      "step": 2594304
    },
    {
      "epoch": 1.02,
      "learning_rate": 7.515168410135729e-06,
      "loss": 3.8468,
      "step": 2594816
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.974666118621826,
      "eval_runtime": 291.2535,
      "eval_samples_per_second": 1310.168,
      "eval_steps_per_second": 40.944,
      "step": 2594867
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.5067824626252085e-06,
      "loss": 3.8429,
      "step": 2595328
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.498396515114688e-06,
      "loss": 3.8422,
      "step": 2595840
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.490010567604168e-06,
      "loss": 3.8615,
      "step": 2596352
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.481624620093648e-06,
      "loss": 3.8503,
      "step": 2596864
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.4732386725831275e-06,
      "loss": 3.8596,
      "step": 2597376
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.4648527250726075e-06,
      "loss": 3.8466,
      "step": 2597888
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.4564667775620874e-06,
      "loss": 3.851,
      "step": 2598400
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.4480808300515665e-06,
      "loss": 3.8401,
      "step": 2598912
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.439694882541047e-06,
      "loss": 3.852,
      "step": 2599424
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.431308935030527e-06,
      "loss": 3.8478,
      "step": 2599936
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.422922987520007e-06,
      "loss": 3.8693,
      "step": 2600448
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.414537040009486e-06,
      "loss": 3.8538,
      "step": 2600960
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.406167471302698e-06,
      "loss": 3.8389,
      "step": 2601472
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.397781523792179e-06,
      "loss": 3.8441,
      "step": 2601984
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.389395576281657e-06,
      "loss": 3.8525,
      "step": 2602496
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.381009628771138e-06,
      "loss": 3.8325,
      "step": 2603008
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.372623681260618e-06,
      "loss": 3.8542,
      "step": 2603520
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.364237733750099e-06,
      "loss": 3.8504,
      "step": 2604032
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.355851786239578e-06,
      "loss": 3.8425,
      "step": 2604544
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.347465838729058e-06,
      "loss": 3.8719,
      "step": 2605056
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.339079891218538e-06,
      "loss": 3.8455,
      "step": 2605568
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.330693943708017e-06,
      "loss": 3.8561,
      "step": 2606080
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.322324375001228e-06,
      "loss": 3.8468,
      "step": 2606592
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.313938427490709e-06,
      "loss": 3.8628,
      "step": 2607104
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.305552479980189e-06,
      "loss": 3.8409,
      "step": 2607616
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.297166532469668e-06,
      "loss": 3.8489,
      "step": 2608128
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.288780584959148e-06,
      "loss": 3.8421,
      "step": 2608640
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.280394637448629e-06,
      "loss": 3.8477,
      "step": 2609152
    },
    {
      "epoch": 0.0,
      "learning_rate": 7.272025068741839e-06,
      "loss": 3.8473,
      "step": 2609664
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.263639121231319e-06,
      "loss": 3.8467,
      "step": 2610176
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.2552531737208e-06,
      "loss": 3.8512,
      "step": 2610688
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.24686722621028e-06,
      "loss": 3.854,
      "step": 2611200
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.238481278699759e-06,
      "loss": 3.8482,
      "step": 2611712
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.230095331189239e-06,
      "loss": 3.8625,
      "step": 2612224
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.221725762482451e-06,
      "loss": 3.85,
      "step": 2612736
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.2133398149719295e-06,
      "loss": 3.8522,
      "step": 2613248
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.20495386746141e-06,
      "loss": 3.8443,
      "step": 2613760
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.19656791995089e-06,
      "loss": 3.8465,
      "step": 2614272
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.188181972440371e-06,
      "loss": 3.8405,
      "step": 2614784
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.179812403733581e-06,
      "loss": 3.8493,
      "step": 2615296
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.171426456223061e-06,
      "loss": 3.8459,
      "step": 2615808
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.163040508712542e-06,
      "loss": 3.8566,
      "step": 2616320
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.154654561202021e-06,
      "loss": 3.8428,
      "step": 2616832
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.146268613691501e-06,
      "loss": 3.852,
      "step": 2617344
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.137882666180981e-06,
      "loss": 3.854,
      "step": 2617856
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.1294967186704616e-06,
      "loss": 3.8539,
      "step": 2618368
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.121110771159941e-06,
      "loss": 3.8504,
      "step": 2618880
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.112741202453152e-06,
      "loss": 3.8354,
      "step": 2619392
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.104355254942632e-06,
      "loss": 3.8423,
      "step": 2619904
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.095969307432111e-06,
      "loss": 3.8574,
      "step": 2620416
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.087583359921591e-06,
      "loss": 3.8495,
      "step": 2620928
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.079213791214803e-06,
      "loss": 3.8462,
      "step": 2621440
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.070827843704284e-06,
      "loss": 3.8356,
      "step": 2621952
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.062441896193762e-06,
      "loss": 3.8477,
      "step": 2622464
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.054055948683243e-06,
      "loss": 3.8277,
      "step": 2622976
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.045670001172723e-06,
      "loss": 3.8608,
      "step": 2623488
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.037284053662202e-06,
      "loss": 3.8269,
      "step": 2624000
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.028898106151683e-06,
      "loss": 3.8578,
      "step": 2624512
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.020528537444894e-06,
      "loss": 3.8491,
      "step": 2625024
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.012142589934374e-06,
      "loss": 3.8399,
      "step": 2625536
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.003756642423853e-06,
      "loss": 3.8533,
      "step": 2626048
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.995370694913333e-06,
      "loss": 3.8412,
      "step": 2626560
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.986984747402814e-06,
      "loss": 3.8426,
      "step": 2627072
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.978615178696026e-06,
      "loss": 3.8345,
      "step": 2627584
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.970229231185504e-06,
      "loss": 3.8556,
      "step": 2628096
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.961843283674985e-06,
      "loss": 3.8385,
      "step": 2628608
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.953457336164465e-06,
      "loss": 3.8475,
      "step": 2629120
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.945087767457676e-06,
      "loss": 3.8294,
      "step": 2629632
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.9367018199471556e-06,
      "loss": 3.8393,
      "step": 2630144
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.9283158724366355e-06,
      "loss": 3.8523,
      "step": 2630656
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.919946303729848e-06,
      "loss": 3.8545,
      "step": 2631168
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.911560356219326e-06,
      "loss": 3.8445,
      "step": 2631680
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.903174408708807e-06,
      "loss": 3.8556,
      "step": 2632192
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.894788461198287e-06,
      "loss": 3.8542,
      "step": 2632704
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.886402513687766e-06,
      "loss": 3.8483,
      "step": 2633216
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.878016566177246e-06,
      "loss": 3.8527,
      "step": 2633728
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.869646997470458e-06,
      "loss": 3.8458,
      "step": 2634240
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.8612610499599385e-06,
      "loss": 3.8469,
      "step": 2634752
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.852875102449418e-06,
      "loss": 3.8442,
      "step": 2635264
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.844489154938898e-06,
      "loss": 3.8522,
      "step": 2635776
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.836119586232109e-06,
      "loss": 3.8536,
      "step": 2636288
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.827733638721588e-06,
      "loss": 3.86,
      "step": 2636800
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.819347691211068e-06,
      "loss": 3.8501,
      "step": 2637312
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.810961743700549e-06,
      "loss": 3.8303,
      "step": 2637824
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.802575796190029e-06,
      "loss": 3.8327,
      "step": 2638336
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.794189848679508e-06,
      "loss": 3.8487,
      "step": 2638848
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.785803901168988e-06,
      "loss": 3.8448,
      "step": 2639360
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.7774343324622e-06,
      "loss": 3.8422,
      "step": 2639872
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.769048384951679e-06,
      "loss": 3.8504,
      "step": 2640384
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.760662437441159e-06,
      "loss": 3.8444,
      "step": 2640896
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.75227648993064e-06,
      "loss": 3.84,
      "step": 2641408
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.74389054242012e-06,
      "loss": 3.8438,
      "step": 2641920
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.735504594909599e-06,
      "loss": 3.8608,
      "step": 2642432
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.7271186473990795e-06,
      "loss": 3.8514,
      "step": 2642944
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.7187326998885594e-06,
      "loss": 3.8574,
      "step": 2643456
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.710363131181769e-06,
      "loss": 3.837,
      "step": 2643968
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.70197718367125e-06,
      "loss": 3.8472,
      "step": 2644480
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.69359123616073e-06,
      "loss": 3.8484,
      "step": 2644992
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.685205288650211e-06,
      "loss": 3.8466,
      "step": 2645504
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.676819341139689e-06,
      "loss": 3.8589,
      "step": 2646016
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.668449772432901e-06,
      "loss": 3.8409,
      "step": 2646528
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.660063824922382e-06,
      "loss": 3.8437,
      "step": 2647040
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.651677877411861e-06,
      "loss": 3.8518,
      "step": 2647552
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.643291929901341e-06,
      "loss": 3.8546,
      "step": 2648064
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.634905982390821e-06,
      "loss": 3.8376,
      "step": 2648576
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.626536413684032e-06,
      "loss": 3.8459,
      "step": 2649088
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.618150466173511e-06,
      "loss": 3.8423,
      "step": 2649600
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.609764518662992e-06,
      "loss": 3.8416,
      "step": 2650112
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.601378571152472e-06,
      "loss": 3.8378,
      "step": 2650624
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.592992623641951e-06,
      "loss": 3.8432,
      "step": 2651136
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.584606676131431e-06,
      "loss": 3.8402,
      "step": 2651648
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.576220728620912e-06,
      "loss": 3.8488,
      "step": 2652160
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.567834781110392e-06,
      "loss": 3.839,
      "step": 2652672
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.559465212403603e-06,
      "loss": 3.8454,
      "step": 2653184
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.551079264893083e-06,
      "loss": 3.8588,
      "step": 2653696
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.542693317382563e-06,
      "loss": 3.8559,
      "step": 2654208
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.534307369872042e-06,
      "loss": 3.8477,
      "step": 2654720
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.5259378011652534e-06,
      "loss": 3.8479,
      "step": 2655232
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.517551853654734e-06,
      "loss": 3.8446,
      "step": 2655744
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.509165906144214e-06,
      "loss": 3.8464,
      "step": 2656256
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.500779958633693e-06,
      "loss": 3.8421,
      "step": 2656768
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.492410389926905e-06,
      "loss": 3.842,
      "step": 2657280
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.484024442416385e-06,
      "loss": 3.8433,
      "step": 2657792
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.4756548737095965e-06,
      "loss": 3.8553,
      "step": 2658304
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.467268926199076e-06,
      "loss": 3.8482,
      "step": 2658816
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.458882978688556e-06,
      "loss": 3.8505,
      "step": 2659328
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.450497031178036e-06,
      "loss": 3.8489,
      "step": 2659840
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.4421110836675155e-06,
      "loss": 3.842,
      "step": 2660352
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.4337251361569955e-06,
      "loss": 3.8395,
      "step": 2660864
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.425339188646476e-06,
      "loss": 3.8616,
      "step": 2661376
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.416953241135956e-06,
      "loss": 3.8547,
      "step": 2661888
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.408583672429166e-06,
      "loss": 3.8554,
      "step": 2662400
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.400214103722378e-06,
      "loss": 3.8466,
      "step": 2662912
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.391828156211859e-06,
      "loss": 3.8482,
      "step": 2663424
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.3834422087013386e-06,
      "loss": 3.8549,
      "step": 2663936
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.375056261190818e-06,
      "loss": 3.8614,
      "step": 2664448
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.366670313680298e-06,
      "loss": 3.8488,
      "step": 2664960
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.3582843661697784e-06,
      "loss": 3.8451,
      "step": 2665472
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.3498984186592575e-06,
      "loss": 3.8505,
      "step": 2665984
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.341528849952469e-06,
      "loss": 3.8495,
      "step": 2666496
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.333142902441949e-06,
      "loss": 3.8453,
      "step": 2667008
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.324756954931428e-06,
      "loss": 3.8486,
      "step": 2667520
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.316371007420908e-06,
      "loss": 3.8564,
      "step": 2668032
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.307985059910389e-06,
      "loss": 3.8391,
      "step": 2668544
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.299599112399869e-06,
      "loss": 3.8531,
      "step": 2669056
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.291213164889348e-06,
      "loss": 3.8522,
      "step": 2669568
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.282827217378828e-06,
      "loss": 3.8503,
      "step": 2670080
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.274441269868309e-06,
      "loss": 3.8541,
      "step": 2670592
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.266071701161519e-06,
      "loss": 3.8413,
      "step": 2671104
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.97413969039917,
      "eval_runtime": 298.1698,
      "eval_samples_per_second": 1279.777,
      "eval_steps_per_second": 39.994,
      "step": 2671187
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.257685753651e-06,
      "loss": 3.8388,
      "step": 2671616
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.2492998061404795e-06,
      "loss": 3.8393,
      "step": 2672128
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.2409138586299595e-06,
      "loss": 3.86,
      "step": 2672640
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.2325279111194394e-06,
      "loss": 3.8436,
      "step": 2673152
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.224141963608919e-06,
      "loss": 3.8615,
      "step": 2673664
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.2157560160983985e-06,
      "loss": 3.8447,
      "step": 2674176
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.207370068587879e-06,
      "loss": 3.8478,
      "step": 2674688
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.198984121077358e-06,
      "loss": 3.8347,
      "step": 2675200
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.190598173566839e-06,
      "loss": 3.8534,
      "step": 2675712
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.182212226056318e-06,
      "loss": 3.8398,
      "step": 2676224
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.173826278545798e-06,
      "loss": 3.8662,
      "step": 2676736
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.165440331035278e-06,
      "loss": 3.851,
      "step": 2677248
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.157070762328489e-06,
      "loss": 3.8356,
      "step": 2677760
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.14868481481797e-06,
      "loss": 3.8405,
      "step": 2678272
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.140298867307449e-06,
      "loss": 3.853,
      "step": 2678784
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.13191291979693e-06,
      "loss": 3.8356,
      "step": 2679296
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.1235433510901405e-06,
      "loss": 3.8486,
      "step": 2679808
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.115157403579621e-06,
      "loss": 3.8467,
      "step": 2680320
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.1067714560691004e-06,
      "loss": 3.8411,
      "step": 2680832
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.09838550855858e-06,
      "loss": 3.8698,
      "step": 2681344
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.08999956104806e-06,
      "loss": 3.8475,
      "step": 2681856
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.08161361353754e-06,
      "loss": 3.8483,
      "step": 2682368
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.073227666027019e-06,
      "loss": 3.8453,
      "step": 2682880
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.0648417185165e-06,
      "loss": 3.8608,
      "step": 2683392
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.056472149809711e-06,
      "loss": 3.8396,
      "step": 2683904
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.048086202299191e-06,
      "loss": 3.844,
      "step": 2684416
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.039700254788671e-06,
      "loss": 3.8428,
      "step": 2684928
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.031314307278151e-06,
      "loss": 3.8448,
      "step": 2685440
    },
    {
      "epoch": 0.0,
      "learning_rate": 6.022928359767631e-06,
      "loss": 3.843,
      "step": 2685952
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.014542412257111e-06,
      "loss": 3.8432,
      "step": 2686464
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.006156464746591e-06,
      "loss": 3.8505,
      "step": 2686976
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.9977868960398015e-06,
      "loss": 3.8477,
      "step": 2687488
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.989400948529282e-06,
      "loss": 3.8481,
      "step": 2688000
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.981031379822493e-06,
      "loss": 3.8572,
      "step": 2688512
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.972661811115705e-06,
      "loss": 3.8468,
      "step": 2689024
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.964275863605185e-06,
      "loss": 3.8513,
      "step": 2689536
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.955889916094665e-06,
      "loss": 3.8402,
      "step": 2690048
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.947503968584145e-06,
      "loss": 3.8485,
      "step": 2690560
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.939118021073625e-06,
      "loss": 3.8368,
      "step": 2691072
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.9307320735631045e-06,
      "loss": 3.8448,
      "step": 2691584
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.922346126052584e-06,
      "loss": 3.8447,
      "step": 2692096
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.9139601785420645e-06,
      "loss": 3.8527,
      "step": 2692608
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.9055742310315436e-06,
      "loss": 3.8364,
      "step": 2693120
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.897188283521024e-06,
      "loss": 3.8482,
      "step": 2693632
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.8888023360105035e-06,
      "loss": 3.8537,
      "step": 2694144
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.8804163884999834e-06,
      "loss": 3.8504,
      "step": 2694656
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.872030440989463e-06,
      "loss": 3.8532,
      "step": 2695168
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.863644493478943e-06,
      "loss": 3.832,
      "step": 2695680
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.855274924772155e-06,
      "loss": 3.838,
      "step": 2696192
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.846888977261634e-06,
      "loss": 3.8516,
      "step": 2696704
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.838503029751115e-06,
      "loss": 3.8448,
      "step": 2697216
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.830117082240594e-06,
      "loss": 3.8432,
      "step": 2697728
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.821731134730074e-06,
      "loss": 3.8342,
      "step": 2698240
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.813345187219555e-06,
      "loss": 3.8474,
      "step": 2698752
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.804959239709034e-06,
      "loss": 3.8263,
      "step": 2699264
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.796573292198515e-06,
      "loss": 3.8525,
      "step": 2699776
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.7882037234917255e-06,
      "loss": 3.8301,
      "step": 2700288
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.779817775981205e-06,
      "loss": 3.848,
      "step": 2700800
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.771431828470685e-06,
      "loss": 3.8494,
      "step": 2701312
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.7630458809601645e-06,
      "loss": 3.836,
      "step": 2701824
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.754676312253376e-06,
      "loss": 3.8511,
      "step": 2702336
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.746290364742856e-06,
      "loss": 3.8395,
      "step": 2702848
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.737904417232336e-06,
      "loss": 3.8361,
      "step": 2703360
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.729518469721816e-06,
      "loss": 3.8335,
      "step": 2703872
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.721165279818759e-06,
      "loss": 3.8504,
      "step": 2704384
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.712779332308238e-06,
      "loss": 3.8389,
      "step": 2704896
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.704393384797719e-06,
      "loss": 3.8445,
      "step": 2705408
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.696007437287198e-06,
      "loss": 3.8289,
      "step": 2705920
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.687621489776679e-06,
      "loss": 3.8349,
      "step": 2706432
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.679235542266158e-06,
      "loss": 3.8481,
      "step": 2706944
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.670849594755638e-06,
      "loss": 3.8533,
      "step": 2707456
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.662463647245118e-06,
      "loss": 3.8383,
      "step": 2707968
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.6541104573420615e-06,
      "loss": 3.8579,
      "step": 2708480
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.6457245098315414e-06,
      "loss": 3.8525,
      "step": 2708992
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.6373385623210205e-06,
      "loss": 3.844,
      "step": 2709504
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.628952614810501e-06,
      "loss": 3.8483,
      "step": 2710016
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.6205666672999804e-06,
      "loss": 3.8422,
      "step": 2710528
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.612180719789461e-06,
      "loss": 3.8449,
      "step": 2711040
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.60379477227894e-06,
      "loss": 3.8428,
      "step": 2711552
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.59540882476842e-06,
      "loss": 3.8486,
      "step": 2712064
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.587039256061632e-06,
      "loss": 3.8559,
      "step": 2712576
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.578653308551111e-06,
      "loss": 3.8575,
      "step": 2713088
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.570283739844323e-06,
      "loss": 3.8462,
      "step": 2713600
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.561897792333803e-06,
      "loss": 3.826,
      "step": 2714112
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.553511844823283e-06,
      "loss": 3.8368,
      "step": 2714624
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.5451258973127626e-06,
      "loss": 3.8419,
      "step": 2715136
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.5367399498022425e-06,
      "loss": 3.8457,
      "step": 2715648
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.528370381095454e-06,
      "loss": 3.8395,
      "step": 2716160
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.519984433584934e-06,
      "loss": 3.8483,
      "step": 2716672
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.511598486074414e-06,
      "loss": 3.8406,
      "step": 2717184
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.503212538563893e-06,
      "loss": 3.8379,
      "step": 2717696
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.494826591053374e-06,
      "loss": 3.8459,
      "step": 2718208
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.486457022346585e-06,
      "loss": 3.8524,
      "step": 2718720
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.478071074836065e-06,
      "loss": 3.8475,
      "step": 2719232
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.469685127325545e-06,
      "loss": 3.8575,
      "step": 2719744
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.461299179815025e-06,
      "loss": 3.8387,
      "step": 2720256
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.452913232304505e-06,
      "loss": 3.8425,
      "step": 2720768
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.4445272847939845e-06,
      "loss": 3.8447,
      "step": 2721280
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.4361413372834645e-06,
      "loss": 3.8465,
      "step": 2721792
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.427771768576675e-06,
      "loss": 3.854,
      "step": 2722304
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.419385821066156e-06,
      "loss": 3.8377,
      "step": 2722816
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.410999873555635e-06,
      "loss": 3.8427,
      "step": 2723328
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.402613926045115e-06,
      "loss": 3.8466,
      "step": 2723840
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.394227978534595e-06,
      "loss": 3.8509,
      "step": 2724352
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.385858409827806e-06,
      "loss": 3.8328,
      "step": 2724864
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.377472462317287e-06,
      "loss": 3.8448,
      "step": 2725376
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.369086514806766e-06,
      "loss": 3.8423,
      "step": 2725888
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.360700567296247e-06,
      "loss": 3.8348,
      "step": 2726400
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.352314619785726e-06,
      "loss": 3.8361,
      "step": 2726912
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.343928672275206e-06,
      "loss": 3.8413,
      "step": 2727424
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.3355427247646865e-06,
      "loss": 3.8432,
      "step": 2727936
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.327173156057897e-06,
      "loss": 3.8415,
      "step": 2728448
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.318787208547377e-06,
      "loss": 3.8382,
      "step": 2728960
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.310401261036857e-06,
      "loss": 3.8414,
      "step": 2729472
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.302015313526337e-06,
      "loss": 3.8568,
      "step": 2729984
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.293645744819548e-06,
      "loss": 3.8549,
      "step": 2730496
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.285259797309029e-06,
      "loss": 3.8473,
      "step": 2731008
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.276873849798508e-06,
      "loss": 3.8427,
      "step": 2731520
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.268487902287988e-06,
      "loss": 3.8438,
      "step": 2732032
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.260101954777468e-06,
      "loss": 3.845,
      "step": 2732544
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.251716007266948e-06,
      "loss": 3.8361,
      "step": 2733056
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.243330059756428e-06,
      "loss": 3.8374,
      "step": 2733568
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.234944112245908e-06,
      "loss": 3.8409,
      "step": 2734080
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.226574543539119e-06,
      "loss": 3.8514,
      "step": 2734592
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.218188596028599e-06,
      "loss": 3.8437,
      "step": 2735104
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.209802648518078e-06,
      "loss": 3.8502,
      "step": 2735616
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.20143307981129e-06,
      "loss": 3.8416,
      "step": 2736128
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.19304713230077e-06,
      "loss": 3.8377,
      "step": 2736640
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.18466118479025e-06,
      "loss": 3.8369,
      "step": 2737152
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.17627523727973e-06,
      "loss": 3.8586,
      "step": 2737664
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.16788928976921e-06,
      "loss": 3.856,
      "step": 2738176
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.15950334225869e-06,
      "loss": 3.8477,
      "step": 2738688
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.151133773551901e-06,
      "loss": 3.8466,
      "step": 2739200
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.142747826041381e-06,
      "loss": 3.8462,
      "step": 2739712
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.1343618785308604e-06,
      "loss": 3.848,
      "step": 2740224
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.125975931020341e-06,
      "loss": 3.86,
      "step": 2740736
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.11758998350982e-06,
      "loss": 3.8449,
      "step": 2741248
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.109204035999301e-06,
      "loss": 3.8457,
      "step": 2741760
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.10081808848878e-06,
      "loss": 3.8428,
      "step": 2742272
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.09243214097826e-06,
      "loss": 3.8494,
      "step": 2742784
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.084062572271472e-06,
      "loss": 3.8415,
      "step": 2743296
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.075676624760951e-06,
      "loss": 3.8447,
      "step": 2743808
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.067290677250432e-06,
      "loss": 3.8496,
      "step": 2744320
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.0589211085436426e-06,
      "loss": 3.8404,
      "step": 2744832
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.0505351610331225e-06,
      "loss": 3.8498,
      "step": 2745344
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.0421492135226025e-06,
      "loss": 3.8529,
      "step": 2745856
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.033763266012083e-06,
      "loss": 3.8449,
      "step": 2746368
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.025377318501562e-06,
      "loss": 3.8508,
      "step": 2746880
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.016991370991042e-06,
      "loss": 3.8399,
      "step": 2747392
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.973403215408325,
      "eval_runtime": 296.9745,
      "eval_samples_per_second": 1284.929,
      "eval_steps_per_second": 40.155,
      "step": 2747507
    },
    {
      "epoch": 1.0,
      "learning_rate": 5.008605423480522e-06,
      "loss": 3.8438,
      "step": 2747904
    },
    {
      "epoch": 1.0,
      "learning_rate": 5.000219475970002e-06,
      "loss": 3.836,
      "step": 2748416
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.991833528459482e-06,
      "loss": 3.8547,
      "step": 2748928
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.983463959752693e-06,
      "loss": 3.8446,
      "step": 2749440
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.975094391045905e-06,
      "loss": 3.8609,
      "step": 2749952
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.966708443535385e-06,
      "loss": 3.8372,
      "step": 2750464
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.9583224960248645e-06,
      "loss": 3.8485,
      "step": 2750976
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.9499365485143445e-06,
      "loss": 3.833,
      "step": 2751488
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.9415506010038245e-06,
      "loss": 3.8497,
      "step": 2752000
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.933164653493304e-06,
      "loss": 3.8348,
      "step": 2752512
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.924778705982784e-06,
      "loss": 3.8623,
      "step": 2753024
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.916409137275996e-06,
      "loss": 3.8525,
      "step": 2753536
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.908039568569207e-06,
      "loss": 3.8317,
      "step": 2754048
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.899653621058687e-06,
      "loss": 3.8396,
      "step": 2754560
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.891267673548167e-06,
      "loss": 3.8461,
      "step": 2755072
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.882881726037647e-06,
      "loss": 3.8356,
      "step": 2755584
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.874495778527127e-06,
      "loss": 3.8417,
      "step": 2756096
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8661098310166066e-06,
      "loss": 3.8451,
      "step": 2756608
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.8577238835060865e-06,
      "loss": 3.8408,
      "step": 2757120
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.849354314799297e-06,
      "loss": 3.8638,
      "step": 2757632
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.840968367288778e-06,
      "loss": 3.8441,
      "step": 2758144
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.832582419778257e-06,
      "loss": 3.8457,
      "step": 2758656
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.824196472267737e-06,
      "loss": 3.8466,
      "step": 2759168
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.815810524757217e-06,
      "loss": 3.8541,
      "step": 2759680
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.807424577246697e-06,
      "loss": 3.8379,
      "step": 2760192
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.799038629736177e-06,
      "loss": 3.8482,
      "step": 2760704
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.790652682225657e-06,
      "loss": 3.8348,
      "step": 2761216
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.782283113518869e-06,
      "loss": 3.8424,
      "step": 2761728
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.773897166008348e-06,
      "loss": 3.8406,
      "step": 2762240
    },
    {
      "epoch": 1.0,
      "learning_rate": 4.765511218497828e-06,
      "loss": 3.8425,
      "step": 2762752
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.757125270987308e-06,
      "loss": 3.8418,
      "step": 2763264
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.748739323476788e-06,
      "loss": 3.8491,
      "step": 2763776
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.740353375966268e-06,
      "loss": 3.848,
      "step": 2764288
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.7319674284557475e-06,
      "loss": 3.8564,
      "step": 2764800
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.723597859748959e-06,
      "loss": 3.841,
      "step": 2765312
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.715211912238439e-06,
      "loss": 3.8498,
      "step": 2765824
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.706825964727918e-06,
      "loss": 3.8396,
      "step": 2766336
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.698440017217399e-06,
      "loss": 3.8476,
      "step": 2766848
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.690054069706878e-06,
      "loss": 3.8342,
      "step": 2767360
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.681668122196359e-06,
      "loss": 3.8431,
      "step": 2767872
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.673282174685838e-06,
      "loss": 3.8395,
      "step": 2768384
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.664896227175318e-06,
      "loss": 3.8529,
      "step": 2768896
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.656510279664798e-06,
      "loss": 3.8378,
      "step": 2769408
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.648124332154278e-06,
      "loss": 3.8398,
      "step": 2769920
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.639738384643758e-06,
      "loss": 3.8552,
      "step": 2770432
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.631352437133238e-06,
      "loss": 3.8463,
      "step": 2770944
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6229828684264495e-06,
      "loss": 3.8507,
      "step": 2771456
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6145969209159294e-06,
      "loss": 3.8338,
      "step": 2771968
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.6062109734054085e-06,
      "loss": 3.832,
      "step": 2772480
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.597825025894889e-06,
      "loss": 3.8495,
      "step": 2772992
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.5894554571881e-06,
      "loss": 3.8456,
      "step": 2773504
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.581085888481312e-06,
      "loss": 3.8359,
      "step": 2774016
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.572699940970791e-06,
      "loss": 3.8347,
      "step": 2774528
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.564313993460272e-06,
      "loss": 3.8467,
      "step": 2775040
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.555928045949752e-06,
      "loss": 3.8271,
      "step": 2775552
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.547558477242963e-06,
      "loss": 3.8472,
      "step": 2776064
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.539172529732442e-06,
      "loss": 3.8298,
      "step": 2776576
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.530802961025654e-06,
      "loss": 3.8415,
      "step": 2777088
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.522417013515134e-06,
      "loss": 3.8498,
      "step": 2777600
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.514031066004614e-06,
      "loss": 3.8358,
      "step": 2778112
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.505645118494094e-06,
      "loss": 3.8447,
      "step": 2778624
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.497259170983573e-06,
      "loss": 3.8402,
      "step": 2779136
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.488873223473054e-06,
      "loss": 3.835,
      "step": 2779648
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.480487275962533e-06,
      "loss": 3.837,
      "step": 2780160
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.472101328452014e-06,
      "loss": 3.8437,
      "step": 2780672
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.463715380941493e-06,
      "loss": 3.8365,
      "step": 2781184
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.455329433430973e-06,
      "loss": 3.8406,
      "step": 2781696
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.446943485920453e-06,
      "loss": 3.8314,
      "step": 2782208
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.438557538409933e-06,
      "loss": 3.831,
      "step": 2782720
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.430187969703144e-06,
      "loss": 3.8444,
      "step": 2783232
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.421802022192624e-06,
      "loss": 3.8507,
      "step": 2783744
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.413416074682104e-06,
      "loss": 3.8368,
      "step": 2784256
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.405046505975315e-06,
      "loss": 3.8523,
      "step": 2784768
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.396660558464796e-06,
      "loss": 3.8477,
      "step": 2785280
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.388274610954275e-06,
      "loss": 3.841,
      "step": 2785792
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.379888663443755e-06,
      "loss": 3.8453,
      "step": 2786304
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.371502715933235e-06,
      "loss": 3.847,
      "step": 2786816
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.363116768422715e-06,
      "loss": 3.8397,
      "step": 2787328
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.354730820912195e-06,
      "loss": 3.8413,
      "step": 2787840
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.346344873401675e-06,
      "loss": 3.8469,
      "step": 2788352
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.337975304694886e-06,
      "loss": 3.8489,
      "step": 2788864
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.329589357184366e-06,
      "loss": 3.8503,
      "step": 2789376
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.321203409673845e-06,
      "loss": 3.8471,
      "step": 2789888
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.312817462163326e-06,
      "loss": 3.8295,
      "step": 2790400
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.304431514652805e-06,
      "loss": 3.8283,
      "step": 2790912
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.296045567142285e-06,
      "loss": 3.8423,
      "step": 2791424
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.287659619631765e-06,
      "loss": 3.8441,
      "step": 2791936
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.279273672121245e-06,
      "loss": 3.8394,
      "step": 2792448
    },
    {
      "epoch": 1.01,
      "learning_rate": 4.270904103414457e-06,
      "loss": 3.8433,
      "step": 2792960
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.262518155903936e-06,
      "loss": 3.8378,
      "step": 2793472
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.254132208393417e-06,
      "loss": 3.8362,
      "step": 2793984
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.245746260882897e-06,
      "loss": 3.8431,
      "step": 2794496
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.237376692176108e-06,
      "loss": 3.8441,
      "step": 2795008
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.2289907446655874e-06,
      "loss": 3.8487,
      "step": 2795520
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.220604797155067e-06,
      "loss": 3.8486,
      "step": 2796032
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.212218849644547e-06,
      "loss": 3.8459,
      "step": 2796544
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.203849280937758e-06,
      "loss": 3.8348,
      "step": 2797056
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.195463333427239e-06,
      "loss": 3.8466,
      "step": 2797568
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.187077385916718e-06,
      "loss": 3.8432,
      "step": 2798080
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.178691438406199e-06,
      "loss": 3.8525,
      "step": 2798592
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.17032186969941e-06,
      "loss": 3.8396,
      "step": 2799104
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.16193592218889e-06,
      "loss": 3.8354,
      "step": 2799616
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.1535499746783696e-06,
      "loss": 3.8466,
      "step": 2800128
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.1451640271678495e-06,
      "loss": 3.8488,
      "step": 2800640
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.1367780796573295e-06,
      "loss": 3.836,
      "step": 2801152
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.12840851095054e-06,
      "loss": 3.8421,
      "step": 2801664
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.120022563440021e-06,
      "loss": 3.839,
      "step": 2802176
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.1116366159295e-06,
      "loss": 3.8309,
      "step": 2802688
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.103250668418981e-06,
      "loss": 3.8319,
      "step": 2803200
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.09486472090846e-06,
      "loss": 3.8425,
      "step": 2803712
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.08647877339794e-06,
      "loss": 3.8373,
      "step": 2804224
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.07809282588742e-06,
      "loss": 3.8415,
      "step": 2804736
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.0697068783769e-06,
      "loss": 3.8337,
      "step": 2805248
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.061337309670112e-06,
      "loss": 3.8368,
      "step": 2805760
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.0529513621595915e-06,
      "loss": 3.8551,
      "step": 2806272
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.0445654146490715e-06,
      "loss": 3.8554,
      "step": 2806784
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.0361794671385515e-06,
      "loss": 3.8438,
      "step": 2807296
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.027809898431763e-06,
      "loss": 3.8437,
      "step": 2807808
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.019423950921242e-06,
      "loss": 3.8374,
      "step": 2808320
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.011038003410722e-06,
      "loss": 3.8412,
      "step": 2808832
    },
    {
      "epoch": 1.02,
      "learning_rate": 4.002652055900202e-06,
      "loss": 3.8356,
      "step": 2809344
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.994266108389682e-06,
      "loss": 3.8401,
      "step": 2809856
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.985896539682894e-06,
      "loss": 3.8403,
      "step": 2810368
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.977510592172373e-06,
      "loss": 3.8472,
      "step": 2810880
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.969124644661854e-06,
      "loss": 3.842,
      "step": 2811392
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.960738697151333e-06,
      "loss": 3.8483,
      "step": 2811904
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.952369128444545e-06,
      "loss": 3.8372,
      "step": 2812416
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.943983180934024e-06,
      "loss": 3.8384,
      "step": 2812928
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.935597233423504e-06,
      "loss": 3.8363,
      "step": 2813440
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.927211285912984e-06,
      "loss": 3.8541,
      "step": 2813952
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.918825338402464e-06,
      "loss": 3.8558,
      "step": 2814464
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.910455769695676e-06,
      "loss": 3.8508,
      "step": 2814976
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.902069822185155e-06,
      "loss": 3.839,
      "step": 2815488
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.893683874674636e-06,
      "loss": 3.8457,
      "step": 2816000
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.885297927164115e-06,
      "loss": 3.847,
      "step": 2816512
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.876911979653595e-06,
      "loss": 3.8563,
      "step": 2817024
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.868526032143075e-06,
      "loss": 3.8427,
      "step": 2817536
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.860140084632555e-06,
      "loss": 3.8414,
      "step": 2818048
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.851770515925766e-06,
      "loss": 3.845,
      "step": 2818560
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.843384568415246e-06,
      "loss": 3.8493,
      "step": 2819072
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.834998620904726e-06,
      "loss": 3.838,
      "step": 2819584
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.826612673394206e-06,
      "loss": 3.84,
      "step": 2820096
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.818226725883685e-06,
      "loss": 3.8485,
      "step": 2820608
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8098407783731657e-06,
      "loss": 3.84,
      "step": 2821120
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.8014548308626457e-06,
      "loss": 3.8468,
      "step": 2821632
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.7930688833521256e-06,
      "loss": 3.8524,
      "step": 2822144
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.784699314645337e-06,
      "loss": 3.8426,
      "step": 2822656
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.776313367134817e-06,
      "loss": 3.8439,
      "step": 2823168
    },
    {
      "epoch": 1.02,
      "learning_rate": 3.767943798428028e-06,
      "loss": 3.845,
      "step": 2823680
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.9730207920074463,
      "eval_runtime": 303.5083,
      "eval_samples_per_second": 1257.267,
      "eval_steps_per_second": 39.291,
      "step": 2823827
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.7595578509175084e-06,
      "loss": 3.8339,
      "step": 2824192
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.751171903406988e-06,
      "loss": 3.832,
      "step": 2824704
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.7427859558964674e-06,
      "loss": 3.8523,
      "step": 2825216
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.734400008385948e-06,
      "loss": 3.8409,
      "step": 2825728
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.7260140608754274e-06,
      "loss": 3.8553,
      "step": 2826240
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.7176281133649077e-06,
      "loss": 3.8404,
      "step": 2826752
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.7092421658543873e-06,
      "loss": 3.8454,
      "step": 2827264
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.7008562183438672e-06,
      "loss": 3.832,
      "step": 2827776
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.692470270833347e-06,
      "loss": 3.8456,
      "step": 2828288
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.684084323322827e-06,
      "loss": 3.8352,
      "step": 2828800
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6756983758123067e-06,
      "loss": 3.8514,
      "step": 2829312
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.667312428301787e-06,
      "loss": 3.8515,
      "step": 2829824
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.658942859594998e-06,
      "loss": 3.8278,
      "step": 2830336
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.650556912084478e-06,
      "loss": 3.8395,
      "step": 2830848
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6421709645739577e-06,
      "loss": 3.8471,
      "step": 2831360
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.633785017063438e-06,
      "loss": 3.8332,
      "step": 2831872
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6253990695529177e-06,
      "loss": 3.8424,
      "step": 2832384
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.617013122042397e-06,
      "loss": 3.8406,
      "step": 2832896
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6086271745318776e-06,
      "loss": 3.8407,
      "step": 2833408
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.6002576058250888e-06,
      "loss": 3.855,
      "step": 2833920
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.591871658314569e-06,
      "loss": 3.8425,
      "step": 2834432
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5834857108040487e-06,
      "loss": 3.8441,
      "step": 2834944
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.575099763293529e-06,
      "loss": 3.8487,
      "step": 2835456
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5667138157830086e-06,
      "loss": 3.8454,
      "step": 2835968
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.558327868272488e-06,
      "loss": 3.8388,
      "step": 2836480
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5499419207619685e-06,
      "loss": 3.8443,
      "step": 2836992
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.541555973251448e-06,
      "loss": 3.8352,
      "step": 2837504
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5331700257409284e-06,
      "loss": 3.84,
      "step": 2838016
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5248004570341392e-06,
      "loss": 3.8385,
      "step": 2838528
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5164145095236196e-06,
      "loss": 3.8436,
      "step": 2839040
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.508044940816831e-06,
      "loss": 3.8383,
      "step": 2839552
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4996589933063108e-06,
      "loss": 3.8469,
      "step": 2840064
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4912730457957907e-06,
      "loss": 3.8484,
      "step": 2840576
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4828870982852703e-06,
      "loss": 3.8487,
      "step": 2841088
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.474517529578482e-06,
      "loss": 3.8434,
      "step": 2841600
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4661315820679614e-06,
      "loss": 3.8466,
      "step": 2842112
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.457745634557442e-06,
      "loss": 3.8386,
      "step": 2842624
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4493596870469213e-06,
      "loss": 3.8416,
      "step": 2843136
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4409737395364017e-06,
      "loss": 3.8307,
      "step": 2843648
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4325877920258812e-06,
      "loss": 3.8375,
      "step": 2844160
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4242018445153608e-06,
      "loss": 3.8399,
      "step": 2844672
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.415815897004841e-06,
      "loss": 3.8506,
      "step": 2845184
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.4074299494943207e-06,
      "loss": 3.8337,
      "step": 2845696
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3990603807875323e-06,
      "loss": 3.8415,
      "step": 2846208
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3906744332770123e-06,
      "loss": 3.8538,
      "step": 2846720
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3822884857664922e-06,
      "loss": 3.8455,
      "step": 2847232
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.373902538255972e-06,
      "loss": 3.846,
      "step": 2847744
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3655165907454517e-06,
      "loss": 3.8339,
      "step": 2848256
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.357130643234932e-06,
      "loss": 3.8229,
      "step": 2848768
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3487446957244116e-06,
      "loss": 3.8495,
      "step": 2849280
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.340358748213892e-06,
      "loss": 3.8408,
      "step": 2849792
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.331989179507103e-06,
      "loss": 3.8351,
      "step": 2850304
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.323603231996583e-06,
      "loss": 3.8366,
      "step": 2850816
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3152172844860627e-06,
      "loss": 3.845,
      "step": 2851328
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.3068313369755422e-06,
      "loss": 3.8199,
      "step": 2851840
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2984453894650226e-06,
      "loss": 3.8454,
      "step": 2852352
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.290059441954502e-06,
      "loss": 3.826,
      "step": 2852864
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2816898732477142e-06,
      "loss": 3.8421,
      "step": 2853376
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2733039257371938e-06,
      "loss": 3.8442,
      "step": 2853888
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.264917978226674e-06,
      "loss": 3.8416,
      "step": 2854400
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2565320307161537e-06,
      "loss": 3.8379,
      "step": 2854912
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.248146083205633e-06,
      "loss": 3.842,
      "step": 2855424
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2397601356951136e-06,
      "loss": 3.8283,
      "step": 2855936
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.231374188184593e-06,
      "loss": 3.8349,
      "step": 2856448
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2229882406740735e-06,
      "loss": 3.8418,
      "step": 2856960
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2146186719672843e-06,
      "loss": 3.8392,
      "step": 2857472
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2062327244567647e-06,
      "loss": 3.8375,
      "step": 2857984
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.197846776946244e-06,
      "loss": 3.8291,
      "step": 2858496
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.189460829435724e-06,
      "loss": 3.8294,
      "step": 2859008
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1810912607289358e-06,
      "loss": 3.8381,
      "step": 2859520
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1727216920221474e-06,
      "loss": 3.8489,
      "step": 2860032
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.164335744511627e-06,
      "loss": 3.8338,
      "step": 2860544
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1559497970011065e-06,
      "loss": 3.851,
      "step": 2861056
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.147563849490587e-06,
      "loss": 3.8488,
      "step": 2861568
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1391779019800664e-06,
      "loss": 3.8413,
      "step": 2862080
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.130791954469546e-06,
      "loss": 3.8395,
      "step": 2862592
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1224223857627576e-06,
      "loss": 3.8442,
      "step": 2863104
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1140364382522375e-06,
      "loss": 3.8369,
      "step": 2863616
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.1056504907417175e-06,
      "loss": 3.8387,
      "step": 2864128
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0972645432311974e-06,
      "loss": 3.8467,
      "step": 2864640
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.088894974524409e-06,
      "loss": 3.8438,
      "step": 2865152
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.080509027013889e-06,
      "loss": 3.8499,
      "step": 2865664
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0721230795033686e-06,
      "loss": 3.8454,
      "step": 2866176
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0637371319928485e-06,
      "loss": 3.827,
      "step": 2866688
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0553511844823285e-06,
      "loss": 3.8278,
      "step": 2867200
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0469652369718084e-06,
      "loss": 3.8356,
      "step": 2867712
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0385956682650196e-06,
      "loss": 3.8411,
      "step": 2868224
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0302097207544996e-06,
      "loss": 3.8393,
      "step": 2868736
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.0218237732439795e-06,
      "loss": 3.8401,
      "step": 2869248
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.013437825733459e-06,
      "loss": 3.8365,
      "step": 2869760
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.005051878222939e-06,
      "loss": 3.8358,
      "step": 2870272
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.996665930712419e-06,
      "loss": 3.8404,
      "step": 2870784
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.9882963620056306e-06,
      "loss": 3.8374,
      "step": 2871296
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.9799104144951106e-06,
      "loss": 3.8485,
      "step": 2871808
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.9715244669845905e-06,
      "loss": 3.8488,
      "step": 2872320
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.96313851947407e-06,
      "loss": 3.8451,
      "step": 2872832
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.95475257196355e-06,
      "loss": 3.8324,
      "step": 2873344
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.94636662445303e-06,
      "loss": 3.841,
      "step": 2873856
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.93798067694251e-06,
      "loss": 3.8408,
      "step": 2874368
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.92959472943199e-06,
      "loss": 3.8492,
      "step": 2874880
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.9212087819214694e-06,
      "loss": 3.8397,
      "step": 2875392
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.912839213214681e-06,
      "loss": 3.831,
      "step": 2875904
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.9044532657041606e-06,
      "loss": 3.8512,
      "step": 2876416
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8960673181936405e-06,
      "loss": 3.84,
      "step": 2876928
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.887681370683121e-06,
      "loss": 3.8357,
      "step": 2877440
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.879311801976332e-06,
      "loss": 3.8404,
      "step": 2877952
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.870925854465812e-06,
      "loss": 3.8374,
      "step": 2878464
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.862539906955292e-06,
      "loss": 3.8297,
      "step": 2878976
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.854153959444772e-06,
      "loss": 3.826,
      "step": 2879488
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8457680119342515e-06,
      "loss": 3.8416,
      "step": 2880000
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8373820644237315e-06,
      "loss": 3.8396,
      "step": 2880512
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8289961169132115e-06,
      "loss": 3.8373,
      "step": 2881024
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8206101694026914e-06,
      "loss": 3.8314,
      "step": 2881536
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8122406006959026e-06,
      "loss": 3.8326,
      "step": 2882048
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8038546531853826e-06,
      "loss": 3.8513,
      "step": 2882560
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7954850844785942e-06,
      "loss": 3.8518,
      "step": 2883072
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.787099136968074e-06,
      "loss": 3.847,
      "step": 2883584
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.778713189457554e-06,
      "loss": 3.8396,
      "step": 2884096
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7703272419470337e-06,
      "loss": 3.8334,
      "step": 2884608
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7619412944365136e-06,
      "loss": 3.8382,
      "step": 2885120
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7535553469259936e-06,
      "loss": 3.8356,
      "step": 2885632
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7451693994154735e-06,
      "loss": 3.8378,
      "step": 2886144
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7367998307086847e-06,
      "loss": 3.8371,
      "step": 2886656
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7284138831981647e-06,
      "loss": 3.8441,
      "step": 2887168
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.7200279356876447e-06,
      "loss": 3.8419,
      "step": 2887680
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.711641988177124e-06,
      "loss": 3.8423,
      "step": 2888192
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.703256040666604e-06,
      "loss": 3.8381,
      "step": 2888704
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.694870093156084e-06,
      "loss": 3.8376,
      "step": 2889216
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.686484145645564e-06,
      "loss": 3.8359,
      "step": 2889728
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.678098198135044e-06,
      "loss": 3.8465,
      "step": 2890240
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.6697286294282556e-06,
      "loss": 3.8532,
      "step": 2890752
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.661342681917735e-06,
      "loss": 3.8486,
      "step": 2891264
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.652956734407215e-06,
      "loss": 3.8381,
      "step": 2891776
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.644570786896695e-06,
      "loss": 3.8465,
      "step": 2892288
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.636184839386175e-06,
      "loss": 3.842,
      "step": 2892800
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.627798891875655e-06,
      "loss": 3.8521,
      "step": 2893312
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.6194129443651345e-06,
      "loss": 3.8428,
      "step": 2893824
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.6110269968546145e-06,
      "loss": 3.84,
      "step": 2894336
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.6026574281478257e-06,
      "loss": 3.8409,
      "step": 2894848
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5942714806373057e-06,
      "loss": 3.8463,
      "step": 2895360
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5858855331267856e-06,
      "loss": 3.838,
      "step": 2895872
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.577499585616266e-06,
      "loss": 3.8391,
      "step": 2896384
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.569113638105746e-06,
      "loss": 3.8404,
      "step": 2896896
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.560744069398957e-06,
      "loss": 3.8438,
      "step": 2897408
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.552358121888437e-06,
      "loss": 3.8423,
      "step": 2897920
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5439721743779166e-06,
      "loss": 3.8439,
      "step": 2898432
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5355862268673966e-06,
      "loss": 3.8485,
      "step": 2898944
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.527216658160608e-06,
      "loss": 3.8405,
      "step": 2899456
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5188307106500878e-06,
      "loss": 3.8433,
      "step": 2899968
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.9723987579345703,
      "eval_runtime": 301.5668,
      "eval_samples_per_second": 1265.361,
      "eval_steps_per_second": 39.543,
      "step": 2900147
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.5104447631395677e-06,
      "loss": 3.8398,
      "step": 2900480
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.5020588156290477e-06,
      "loss": 3.8297,
      "step": 2900992
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4936892469222593e-06,
      "loss": 3.8477,
      "step": 2901504
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4853032994117393e-06,
      "loss": 3.8426,
      "step": 2902016
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4769173519012192e-06,
      "loss": 3.8529,
      "step": 2902528
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4685314043906988e-06,
      "loss": 3.8367,
      "step": 2903040
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4601454568801787e-06,
      "loss": 3.8359,
      "step": 2903552
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4517595093696587e-06,
      "loss": 3.8372,
      "step": 2904064
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.44338994066287e-06,
      "loss": 3.8388,
      "step": 2904576
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.43500399315235e-06,
      "loss": 3.8368,
      "step": 2905088
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.42661804564183e-06,
      "loss": 3.8511,
      "step": 2905600
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.4182320981313098e-06,
      "loss": 3.8505,
      "step": 2906112
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.409862529424521e-06,
      "loss": 3.8287,
      "step": 2906624
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.401476581914001e-06,
      "loss": 3.8353,
      "step": 2907136
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.393090634403481e-06,
      "loss": 3.8462,
      "step": 2907648
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.384704686892961e-06,
      "loss": 3.8302,
      "step": 2908160
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.376318739382441e-06,
      "loss": 3.8373,
      "step": 2908672
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3679327918719207e-06,
      "loss": 3.8381,
      "step": 2909184
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3595468443614003e-06,
      "loss": 3.8372,
      "step": 2909696
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3511608968508802e-06,
      "loss": 3.8548,
      "step": 2910208
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.34277494934036e-06,
      "loss": 3.8419,
      "step": 2910720
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3344053806335714e-06,
      "loss": 3.8451,
      "step": 2911232
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3260194331230514e-06,
      "loss": 3.8411,
      "step": 2911744
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3176334856125313e-06,
      "loss": 3.8443,
      "step": 2912256
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3092639169057425e-06,
      "loss": 3.8403,
      "step": 2912768
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.3008779693952225e-06,
      "loss": 3.8423,
      "step": 2913280
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2924920218847024e-06,
      "loss": 3.8364,
      "step": 2913792
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2841060743741824e-06,
      "loss": 3.8312,
      "step": 2914304
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2757201268636624e-06,
      "loss": 3.8365,
      "step": 2914816
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2673341793531423e-06,
      "loss": 3.8381,
      "step": 2915328
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2589482318426223e-06,
      "loss": 3.8419,
      "step": 2915840
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2505622843321022e-06,
      "loss": 3.8408,
      "step": 2916352
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2421763368215818e-06,
      "loss": 3.8442,
      "step": 2916864
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.2337903893110617e-06,
      "loss": 3.8487,
      "step": 2917376
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.225420820604273e-06,
      "loss": 3.8393,
      "step": 2917888
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.217034873093753e-06,
      "loss": 3.8416,
      "step": 2918400
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.208648925583233e-06,
      "loss": 3.841,
      "step": 2918912
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.200262978072713e-06,
      "loss": 3.8382,
      "step": 2919424
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1918934093659244e-06,
      "loss": 3.8294,
      "step": 2919936
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1835074618554044e-06,
      "loss": 3.8338,
      "step": 2920448
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1751215143448843e-06,
      "loss": 3.8368,
      "step": 2920960
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.166735566834364e-06,
      "loss": 3.847,
      "step": 2921472
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.158349619323844e-06,
      "loss": 3.835,
      "step": 2921984
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1499636718133238e-06,
      "loss": 3.8388,
      "step": 2922496
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1415777243028037e-06,
      "loss": 3.8542,
      "step": 2923008
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.133208155596015e-06,
      "loss": 3.8406,
      "step": 2923520
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.124822208085495e-06,
      "loss": 3.8429,
      "step": 2924032
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1164362605749744e-06,
      "loss": 3.8309,
      "step": 2924544
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.1080503130644544e-06,
      "loss": 3.8234,
      "step": 2925056
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0996643655539343e-06,
      "loss": 3.8496,
      "step": 2925568
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0912784180434143e-06,
      "loss": 3.8398,
      "step": 2926080
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.082908849336626e-06,
      "loss": 3.8314,
      "step": 2926592
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.074522901826106e-06,
      "loss": 3.834,
      "step": 2927104
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.066136954315586e-06,
      "loss": 3.8467,
      "step": 2927616
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0577510068050654e-06,
      "loss": 3.8192,
      "step": 2928128
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.049381438098277e-06,
      "loss": 3.8365,
      "step": 2928640
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0409954905877566e-06,
      "loss": 3.831,
      "step": 2929152
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0326095430772365e-06,
      "loss": 3.8319,
      "step": 2929664
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0242235955667165e-06,
      "loss": 3.8465,
      "step": 2930176
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0158376480561964e-06,
      "loss": 3.8345,
      "step": 2930688
    },
    {
      "epoch": 1.01,
      "learning_rate": 2.0074517005456764e-06,
      "loss": 3.8356,
      "step": 2931200
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.999065753035156e-06,
      "loss": 3.8429,
      "step": 2931712
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.990679805524636e-06,
      "loss": 3.8262,
      "step": 2932224
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.982293858014116e-06,
      "loss": 3.8329,
      "step": 2932736
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9739242893073275e-06,
      "loss": 3.839,
      "step": 2933248
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9655383417968074e-06,
      "loss": 3.8358,
      "step": 2933760
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9571523942862874e-06,
      "loss": 3.838,
      "step": 2934272
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9487664467757673e-06,
      "loss": 3.829,
      "step": 2934784
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9403968780689785e-06,
      "loss": 3.8225,
      "step": 2935296
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9320109305584585e-06,
      "loss": 3.8348,
      "step": 2935808
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.923624983047938e-06,
      "loss": 3.8496,
      "step": 2936320
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.915239035537418e-06,
      "loss": 3.8328,
      "step": 2936832
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.9068694668306294e-06,
      "loss": 3.8508,
      "step": 2937344
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8984835193201094e-06,
      "loss": 3.8461,
      "step": 2937856
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8900975718095893e-06,
      "loss": 3.8402,
      "step": 2938368
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8817280031028005e-06,
      "loss": 3.8317,
      "step": 2938880
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8733420555922805e-06,
      "loss": 3.8469,
      "step": 2939392
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8649561080817604e-06,
      "loss": 3.8357,
      "step": 2939904
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8565701605712406e-06,
      "loss": 3.8363,
      "step": 2940416
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8481842130607201e-06,
      "loss": 3.8465,
      "step": 2940928
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8398146443539318e-06,
      "loss": 3.8429,
      "step": 2941440
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8314286968434113e-06,
      "loss": 3.8503,
      "step": 2941952
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8230427493328913e-06,
      "loss": 3.8415,
      "step": 2942464
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8146568018223714e-06,
      "loss": 3.8263,
      "step": 2942976
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.8062708543118514e-06,
      "loss": 3.823,
      "step": 2943488
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.797884906801331e-06,
      "loss": 3.833,
      "step": 2944000
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.7894989592908109e-06,
      "loss": 3.8405,
      "step": 2944512
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.781129390584022e-06,
      "loss": 3.8366,
      "step": 2945024
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.7727434430735023e-06,
      "loss": 3.8349,
      "step": 2945536
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7643574955629822e-06,
      "loss": 3.8377,
      "step": 2946048
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7559879268561934e-06,
      "loss": 3.8302,
      "step": 2946560
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7476019793456734e-06,
      "loss": 3.8381,
      "step": 2947072
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7392160318351533e-06,
      "loss": 3.836,
      "step": 2947584
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7308300843246333e-06,
      "loss": 3.8527,
      "step": 2948096
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.722444136814113e-06,
      "loss": 3.8472,
      "step": 2948608
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.714058189303593e-06,
      "loss": 3.8419,
      "step": 2949120
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.7056886205968042e-06,
      "loss": 3.8311,
      "step": 2949632
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6973026730862842e-06,
      "loss": 3.8402,
      "step": 2950144
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6889167255757641e-06,
      "loss": 3.8348,
      "step": 2950656
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.680530778065244e-06,
      "loss": 3.8497,
      "step": 2951168
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.672144830554724e-06,
      "loss": 3.8373,
      "step": 2951680
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6637588830442038e-06,
      "loss": 3.8316,
      "step": 2952192
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6553729355336837e-06,
      "loss": 3.843,
      "step": 2952704
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6469869880231637e-06,
      "loss": 3.8398,
      "step": 2953216
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6386010405126436e-06,
      "loss": 3.8314,
      "step": 2953728
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6302150930021236e-06,
      "loss": 3.8395,
      "step": 2954240
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6218455242953348e-06,
      "loss": 3.8305,
      "step": 2954752
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6134595767848148e-06,
      "loss": 3.8296,
      "step": 2955264
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.6050736292742945e-06,
      "loss": 3.8235,
      "step": 2955776
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5966876817637745e-06,
      "loss": 3.8439,
      "step": 2956288
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5883017342532544e-06,
      "loss": 3.8343,
      "step": 2956800
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5799321655464656e-06,
      "loss": 3.8359,
      "step": 2957312
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.571562596839677e-06,
      "loss": 3.8329,
      "step": 2957824
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.563176649329157e-06,
      "loss": 3.8304,
      "step": 2958336
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.554790701818637e-06,
      "loss": 3.8491,
      "step": 2958848
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5464047543081167e-06,
      "loss": 3.8527,
      "step": 2959360
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5380188067975967e-06,
      "loss": 3.8454,
      "step": 2959872
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5296328592870766e-06,
      "loss": 3.8373,
      "step": 2960384
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5212469117765564e-06,
      "loss": 3.8322,
      "step": 2960896
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5128609642660363e-06,
      "loss": 3.8374,
      "step": 2961408
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5044750167555163e-06,
      "loss": 3.8287,
      "step": 2961920
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4960890692449962e-06,
      "loss": 3.8362,
      "step": 2962432
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4877031217344762e-06,
      "loss": 3.8364,
      "step": 2962944
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.479317174223956e-06,
      "loss": 3.8395,
      "step": 2963456
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4709476055171674e-06,
      "loss": 3.8446,
      "step": 2963968
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4625616580066473e-06,
      "loss": 3.8344,
      "step": 2964480
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4541920892998588e-06,
      "loss": 3.8431,
      "step": 2964992
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4458061417893385e-06,
      "loss": 3.8315,
      "step": 2965504
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4374201942788184e-06,
      "loss": 3.8363,
      "step": 2966016
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4290342467682982e-06,
      "loss": 3.8414,
      "step": 2966528
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.4206482992577781e-06,
      "loss": 3.8518,
      "step": 2967040
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.412262351747258e-06,
      "loss": 3.8479,
      "step": 2967552
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.403876404236738e-06,
      "loss": 3.8377,
      "step": 2968064
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3955068355299493e-06,
      "loss": 3.8452,
      "step": 2968576
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3871208880194292e-06,
      "loss": 3.8417,
      "step": 2969088
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3787349405089092e-06,
      "loss": 3.8495,
      "step": 2969600
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.370348992998389e-06,
      "loss": 3.8416,
      "step": 2970112
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3619630454878689e-06,
      "loss": 3.8434,
      "step": 2970624
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3535770979773488e-06,
      "loss": 3.8379,
      "step": 2971136
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3451911504668288e-06,
      "loss": 3.8442,
      "step": 2971648
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3368052029563088e-06,
      "loss": 3.8329,
      "step": 2972160
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.32843563424952e-06,
      "loss": 3.8379,
      "step": 2972672
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.320049686739e-06,
      "loss": 3.8389,
      "step": 2973184
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.3116637392284799e-06,
      "loss": 3.8404,
      "step": 2973696
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.303294170521691e-06,
      "loss": 3.8408,
      "step": 2974208
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.294908223011171e-06,
      "loss": 3.8389,
      "step": 2974720
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.286522275500651e-06,
      "loss": 3.849,
      "step": 2975232
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2781363279901307e-06,
      "loss": 3.8342,
      "step": 2975744
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.2697503804796107e-06,
      "loss": 3.8469,
      "step": 2976256
    },
    {
      "epoch": 1.03,
      "eval_loss": 3.972022533416748,
      "eval_runtime": 301.4089,
      "eval_samples_per_second": 1266.025,
      "eval_steps_per_second": 39.564,
      "step": 2976467
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2613644329690907e-06,
      "loss": 3.8366,
      "step": 2976768
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2529784854585706e-06,
      "loss": 3.8263,
      "step": 2977280
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2445925379480506e-06,
      "loss": 3.8455,
      "step": 2977792
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2362065904375303e-06,
      "loss": 3.8412,
      "step": 2978304
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.2278206429270103e-06,
      "loss": 3.8495,
      "step": 2978816
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.21943469541649e-06,
      "loss": 3.8343,
      "step": 2979328
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.21104874790597e-06,
      "loss": 3.8357,
      "step": 2979840
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.20266280039545e-06,
      "loss": 3.8353,
      "step": 2980352
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1942768528849297e-06,
      "loss": 3.8383,
      "step": 2980864
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1858909053744098e-06,
      "loss": 3.832,
      "step": 2981376
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1775049578638896e-06,
      "loss": 3.8506,
      "step": 2981888
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1691190103533695e-06,
      "loss": 3.8468,
      "step": 2982400
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1607494416465807e-06,
      "loss": 3.8299,
      "step": 2982912
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1523634941360607e-06,
      "loss": 3.832,
      "step": 2983424
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1439775466255407e-06,
      "loss": 3.8441,
      "step": 2983936
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1355915991150206e-06,
      "loss": 3.8285,
      "step": 2984448
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1272056516045006e-06,
      "loss": 3.8356,
      "step": 2984960
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1188197040939803e-06,
      "loss": 3.8342,
      "step": 2985472
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.1104337565834603e-06,
      "loss": 3.8399,
      "step": 2985984
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.10204780907294e-06,
      "loss": 3.8501,
      "step": 2986496
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.09366186156242e-06,
      "loss": 3.8425,
      "step": 2987008
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0852759140519e-06,
      "loss": 3.8425,
      "step": 2987520
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0768899665413797e-06,
      "loss": 3.8404,
      "step": 2988032
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0685040190308598e-06,
      "loss": 3.8433,
      "step": 2988544
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.060134450324071e-06,
      "loss": 3.8328,
      "step": 2989056
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0517648816172825e-06,
      "loss": 3.8435,
      "step": 2989568
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0433789341067622e-06,
      "loss": 3.8327,
      "step": 2990080
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0349929865962424e-06,
      "loss": 3.8315,
      "step": 2990592
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0266070390857221e-06,
      "loss": 3.8361,
      "step": 2991104
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.0182374703789336e-06,
      "loss": 3.8406,
      "step": 2991616
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.0098515228684133e-06,
      "loss": 3.8374,
      "step": 2992128
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.0014655753578933e-06,
      "loss": 3.8407,
      "step": 2992640
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.930796278473732e-07,
      "loss": 3.8437,
      "step": 2993152
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.846936803368532e-07,
      "loss": 3.847,
      "step": 2993664
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.763241116300644e-07,
      "loss": 3.8323,
      "step": 2994176
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.679381641195443e-07,
      "loss": 3.8405,
      "step": 2994688
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.595522166090243e-07,
      "loss": 3.8441,
      "step": 2995200
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.511662690985041e-07,
      "loss": 3.8339,
      "step": 2995712
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.427803215879841e-07,
      "loss": 3.8281,
      "step": 2996224
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.343943740774638e-07,
      "loss": 3.8325,
      "step": 2996736
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.260084265669438e-07,
      "loss": 3.8378,
      "step": 2997248
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.176224790564237e-07,
      "loss": 3.8464,
      "step": 2997760
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.092529103496351e-07,
      "loss": 3.8301,
      "step": 2998272
    },
    {
      "epoch": 0.01,
      "learning_rate": 9.00866962839115e-07,
      "loss": 3.8349,
      "step": 2998784
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.924810153285949e-07,
      "loss": 3.8528,
      "step": 2999296
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.840950678180748e-07,
      "loss": 3.8417,
      "step": 2999808
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.757254991112862e-07,
      "loss": 3.8456,
      "step": 3000320
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.673395516007661e-07,
      "loss": 3.8269,
      "step": 3000832
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.589536040902459e-07,
      "loss": 3.8212,
      "step": 3001344
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.505676565797259e-07,
      "loss": 3.851,
      "step": 3001856
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.421817090692057e-07,
      "loss": 3.8344,
      "step": 3002368
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.337957615586856e-07,
      "loss": 3.8312,
      "step": 3002880
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.254098140481656e-07,
      "loss": 3.8365,
      "step": 3003392
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.170238665376454e-07,
      "loss": 3.8359,
      "step": 3003904
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.086542978308568e-07,
      "loss": 3.8218,
      "step": 3004416
    },
    {
      "epoch": 0.01,
      "learning_rate": 8.002847291240682e-07,
      "loss": 3.8324,
      "step": 3004928
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.918987816135481e-07,
      "loss": 3.8297,
      "step": 3005440
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.83512834103028e-07,
      "loss": 3.8308,
      "step": 3005952
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.751268865925078e-07,
      "loss": 3.846,
      "step": 3006464
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.667573178857192e-07,
      "loss": 3.8352,
      "step": 3006976
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.583713703751991e-07,
      "loss": 3.831,
      "step": 3007488
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.49985422864679e-07,
      "loss": 3.8418,
      "step": 3008000
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.415994753541589e-07,
      "loss": 3.8236,
      "step": 3008512
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.332299066473703e-07,
      "loss": 3.8324,
      "step": 3009024
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.248439591368502e-07,
      "loss": 3.8334,
      "step": 3009536
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.164580116263301e-07,
      "loss": 3.8318,
      "step": 3010048
    },
    {
      "epoch": 0.01,
      "learning_rate": 7.0807206411581e-07,
      "loss": 3.8358,
      "step": 3010560
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.996861166052898e-07,
      "loss": 3.8318,
      "step": 3011072
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.913165478985013e-07,
      "loss": 3.8158,
      "step": 3011584
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.829306003879811e-07,
      "loss": 3.833,
      "step": 3012096
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.74544652877461e-07,
      "loss": 3.8495,
      "step": 3012608
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.661587053669409e-07,
      "loss": 3.8276,
      "step": 3013120
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.577727578564209e-07,
      "loss": 3.8515,
      "step": 3013632
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.493868103459007e-07,
      "loss": 3.8419,
      "step": 3014144
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.410008628353806e-07,
      "loss": 3.8436,
      "step": 3014656
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.326149153248605e-07,
      "loss": 3.8292,
      "step": 3015168
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.242453466180718e-07,
      "loss": 3.8424,
      "step": 3015680
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.158593991075518e-07,
      "loss": 3.8395,
      "step": 3016192
    },
    {
      "epoch": 0.01,
      "learning_rate": 6.074734515970316e-07,
      "loss": 3.8279,
      "step": 3016704
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.990875040865116e-07,
      "loss": 3.8466,
      "step": 3017216
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.907343141834544e-07,
      "loss": 3.8391,
      "step": 3017728
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.823483666729343e-07,
      "loss": 3.8424,
      "step": 3018240
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.739624191624142e-07,
      "loss": 3.843,
      "step": 3018752
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.655764716518942e-07,
      "loss": 3.8215,
      "step": 3019264
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.57190524141374e-07,
      "loss": 3.8218,
      "step": 3019776
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.488209554345854e-07,
      "loss": 3.8297,
      "step": 3020288
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.404350079240653e-07,
      "loss": 3.8406,
      "step": 3020800
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.320490604135451e-07,
      "loss": 3.8374,
      "step": 3021312
    },
    {
      "epoch": 0.01,
      "learning_rate": 5.236631129030251e-07,
      "loss": 3.8345,
      "step": 3021824
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.152771653925049e-07,
      "loss": 3.8349,
      "step": 3022336
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.068912178819848e-07,
      "loss": 3.8328,
      "step": 3022848
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.985052703714647e-07,
      "loss": 3.833,
      "step": 3023360
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.901357016646761e-07,
      "loss": 3.8333,
      "step": 3023872
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.81749754154156e-07,
      "loss": 3.8489,
      "step": 3024384
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.733638066436359e-07,
      "loss": 3.8509,
      "step": 3024896
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.649778591331158e-07,
      "loss": 3.8349,
      "step": 3025408
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.5659191162259567e-07,
      "loss": 3.8305,
      "step": 3025920
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4820596411207557e-07,
      "loss": 3.8382,
      "step": 3026432
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.398200166015554e-07,
      "loss": 3.8354,
      "step": 3026944
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.3143406909103543e-07,
      "loss": 3.848,
      "step": 3027456
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.230645003842467e-07,
      "loss": 3.836,
      "step": 3027968
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.146785528737267e-07,
      "loss": 3.8326,
      "step": 3028480
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.06308984166938e-07,
      "loss": 3.8416,
      "step": 3028992
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.97923036656418e-07,
      "loss": 3.8411,
      "step": 3029504
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.895370891458978e-07,
      "loss": 3.8276,
      "step": 3030016
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.8115114163537774e-07,
      "loss": 3.8367,
      "step": 3030528
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.727651941248576e-07,
      "loss": 3.8328,
      "step": 3031040
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.643792466143375e-07,
      "loss": 3.8236,
      "step": 3031552
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.559932991038174e-07,
      "loss": 3.827,
      "step": 3032064
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.476073515932973e-07,
      "loss": 3.8363,
      "step": 3032576
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3923778288650867e-07,
      "loss": 3.8357,
      "step": 3033088
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.3085183537598857e-07,
      "loss": 3.8298,
      "step": 3033600
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.224658878654684e-07,
      "loss": 3.8326,
      "step": 3034112
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.140799403549484e-07,
      "loss": 3.8321,
      "step": 3034624
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.056939928444282e-07,
      "loss": 3.8438,
      "step": 3035136
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.973080453339081e-07,
      "loss": 3.851,
      "step": 3035648
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8892209782338803e-07,
      "loss": 3.8447,
      "step": 3036160
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.8053615031286793e-07,
      "loss": 3.8362,
      "step": 3036672
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.721665816060793e-07,
      "loss": 3.8301,
      "step": 3037184
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.637806340955592e-07,
      "loss": 3.8404,
      "step": 3037696
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.5539468658503906e-07,
      "loss": 3.8248,
      "step": 3038208
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.47008739074519e-07,
      "loss": 3.835,
      "step": 3038720
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.3862279156399886e-07,
      "loss": 3.8365,
      "step": 3039232
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.3025322285721026e-07,
      "loss": 3.8333,
      "step": 3039744
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.218836541504216e-07,
      "loss": 3.8385,
      "step": 3040256
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.1349770663990154e-07,
      "loss": 3.8368,
      "step": 3040768
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0511175912938141e-07,
      "loss": 3.8432,
      "step": 3041280
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.9674219042259281e-07,
      "loss": 3.8332,
      "step": 3041792
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.883562429120727e-07,
      "loss": 3.8328,
      "step": 3042304
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.799702954015526e-07,
      "loss": 3.8402,
      "step": 3042816
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.7158434789103247e-07,
      "loss": 3.8465,
      "step": 3043328
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.6319840038051237e-07,
      "loss": 3.8481,
      "step": 3043840
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.5481245286999227e-07,
      "loss": 3.8341,
      "step": 3044352
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.4642650535947215e-07,
      "loss": 3.8434,
      "step": 3044864
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.3804055784895205e-07,
      "loss": 3.841,
      "step": 3045376
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.2965461033843195e-07,
      "loss": 3.8461,
      "step": 3045888
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.2126866282791183e-07,
      "loss": 3.8382,
      "step": 3046400
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.1288271531739173e-07,
      "loss": 3.8392,
      "step": 3046912
    },
    {
      "epoch": 0.02,
      "learning_rate": 1.0449676780687164e-07,
      "loss": 3.8364,
      "step": 3047424
    },
    {
      "epoch": 0.02,
      "learning_rate": 9.612719910008301e-08,
      "loss": 3.8482,
      "step": 3047936
    },
    {
      "epoch": 0.02,
      "learning_rate": 8.77412515895629e-08,
      "loss": 3.829,
      "step": 3048448
    },
    {
      "epoch": 0.02,
      "learning_rate": 7.937168288277429e-08,
      "loss": 3.8372,
      "step": 3048960
    },
    {
      "epoch": 0.02,
      "learning_rate": 7.100211417598566e-08,
      "loss": 3.8382,
      "step": 3049472
    },
    {
      "epoch": 0.02,
      "learning_rate": 6.261616666546555e-08,
      "loss": 3.8416,
      "step": 3049984
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.423021915494545e-08,
      "loss": 3.8363,
      "step": 3050496
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.584427164442534e-08,
      "loss": 3.8382,
      "step": 3051008
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.7458324133905236e-08,
      "loss": 3.847,
      "step": 3051520
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.9072376623385135e-08,
      "loss": 3.8321,
      "step": 3052032
    },
    {
      "epoch": 0.02,
      "learning_rate": 2.0686429112865028e-08,
      "loss": 3.8461,
      "step": 3052544
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.9716358184814453,
      "eval_runtime": 347.769,
      "eval_samples_per_second": 1097.254,
      "eval_steps_per_second": 34.29,
      "step": 3052726
    }
  ],
  "logging_steps": 512,
  "max_steps": 3052726,
  "num_train_epochs": 9223372036854775807,
  "save_steps": 10,
  "total_flos": 1.2607898917747707e+18,
  "trial_name": null,
  "trial_params": null
}