diff --git "a/checkpoint-2365920/trainer_state.json" "b/checkpoint-2365920/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/checkpoint-2365920/trainer_state.json"
@@ -0,0 +1,27993 @@
+{
+  "best_metric": 3.995774269104004,
+  "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/superlative-quantifier/lstm/4/checkpoints/checkpoint-2365920",
+  "epoch": 1.0250006060157382,
+  "eval_steps": 10,
+  "global_step": 2365920,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.999998362119627e-05,
+      "loss": 10.8207,
+      "step": 1
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.999161405248948e-05,
+      "loss": 7.5574,
+      "step": 512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.998322810497896e-05,
+      "loss": 7.0552,
+      "step": 1024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.997484215746844e-05,
+      "loss": 6.9906,
+      "step": 1536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.996645620995792e-05,
+      "loss": 6.946,
+      "step": 2048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.99580702624474e-05,
+      "loss": 6.8958,
+      "step": 2560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.994968431493688e-05,
+      "loss": 6.7246,
+      "step": 3072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.994129836742636e-05,
+      "loss": 6.6133,
+      "step": 3584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.993291241991584e-05,
+      "loss": 6.5249,
+      "step": 4096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.992452647240532e-05,
+      "loss": 6.4571,
+      "step": 4608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.99161405248948e-05,
+      "loss": 6.3993,
+      "step": 5120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.990777095618801e-05,
+      "loss": 6.3338,
+      "step": 5632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.989938500867749e-05,
+      "loss": 6.2675,
+      "step": 6144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.989099906116697e-05,
+      "loss": 6.2045,
+      "step": 6656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.988261311365645e-05,
+      "loss": 6.1463,
+      "step": 7168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.987422716614593e-05,
+      "loss": 6.0968,
+      "step": 7680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.986584121863541e-05,
+      "loss": 6.049,
+      "step": 8192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.985745527112489e-05,
+      "loss": 6.0115,
+      "step": 8704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.984906932361437e-05,
+      "loss": 5.9629,
+      "step": 9216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.984068337610385e-05,
+      "loss": 5.9354,
+      "step": 9728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.983229742859333e-05,
+      "loss": 5.8991,
+      "step": 10240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.982391148108281e-05,
+      "loss": 5.8529,
+      "step": 10752
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.981552553357229e-05,
+      "loss": 5.8222,
+      "step": 11264
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.980713958606178e-05,
+      "loss": 5.7872,
+      "step": 11776
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.979875363855125e-05,
+      "loss": 5.767,
+      "step": 12288
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.979036769104073e-05,
+      "loss": 5.7332,
+      "step": 12800
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.9781998122333946e-05,
+      "loss": 5.7032,
+      "step": 13312
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.9773612174823426e-05,
+      "loss": 5.6743,
+      "step": 13824
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.97652262273129e-05,
+      "loss": 5.6674,
+      "step": 14336
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.975684027980238e-05,
+      "loss": 5.6273,
+      "step": 14848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.974845433229186e-05,
+      "loss": 5.6144,
+      "step": 15360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.974008476358507e-05,
+      "loss": 5.6057,
+      "step": 15872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.973169881607455e-05,
+      "loss": 5.5758,
+      "step": 16384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.972331286856403e-05,
+      "loss": 5.5575,
+      "step": 16896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9714926921053515e-05,
+      "loss": 5.5534,
+      "step": 17408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9706557352346724e-05,
+      "loss": 5.5314,
+      "step": 17920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9698171404836204e-05,
+      "loss": 5.5222,
+      "step": 18432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9689785457325684e-05,
+      "loss": 5.483,
+      "step": 18944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9681399509815164e-05,
+      "loss": 5.4775,
+      "step": 19456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9673013562304644e-05,
+      "loss": 5.4499,
+      "step": 19968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.966464399359785e-05,
+      "loss": 5.447,
+      "step": 20480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.965625804608733e-05,
+      "loss": 5.4338,
+      "step": 20992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.964787209857681e-05,
+      "loss": 5.4286,
+      "step": 21504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.963948615106629e-05,
+      "loss": 5.3972,
+      "step": 22016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.963110020355577e-05,
+      "loss": 5.3972,
+      "step": 22528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.962273063484898e-05,
+      "loss": 5.3838,
+      "step": 23040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.961434468733847e-05,
+      "loss": 5.3851,
+      "step": 23552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.960595873982795e-05,
+      "loss": 5.3776,
+      "step": 24064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.959757279231743e-05,
+      "loss": 5.3492,
+      "step": 24576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.958920322361064e-05,
+      "loss": 5.3342,
+      "step": 25088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.958081727610012e-05,
+      "loss": 5.3436,
+      "step": 25600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.95724313285896e-05,
+      "loss": 5.3374,
+      "step": 26112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.956404538107908e-05,
+      "loss": 5.3125,
+      "step": 26624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.955565943356856e-05,
+      "loss": 5.3033,
+      "step": 27136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.954728986486177e-05,
+      "loss": 5.3002,
+      "step": 27648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.953890391735125e-05,
+      "loss": 5.2821,
+      "step": 28160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9530517969840727e-05,
+      "loss": 5.2895,
+      "step": 28672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9522132022330207e-05,
+      "loss": 5.2555,
+      "step": 29184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9513746074819686e-05,
+      "loss": 5.2601,
+      "step": 29696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.95053765061129e-05,
+      "loss": 5.25,
+      "step": 30208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.949699055860238e-05,
+      "loss": 5.2477,
+      "step": 30720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.948860461109186e-05,
+      "loss": 5.2243,
+      "step": 31232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.948021866358134e-05,
+      "loss": 5.2321,
+      "step": 31744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.947183271607082e-05,
+      "loss": 5.2042,
+      "step": 32256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.94634467685603e-05,
+      "loss": 5.2114,
+      "step": 32768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.945507719985351e-05,
+      "loss": 5.2097,
+      "step": 33280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.944669125234299e-05,
+      "loss": 5.2035,
+      "step": 33792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.943830530483247e-05,
+      "loss": 5.1832,
+      "step": 34304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.942991935732195e-05,
+      "loss": 5.1654,
+      "step": 34816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.942154978861516e-05,
+      "loss": 5.1598,
+      "step": 35328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.941316384110464e-05,
+      "loss": 5.175,
+      "step": 35840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.940477789359412e-05,
+      "loss": 5.1631,
+      "step": 36352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.93963919460836e-05,
+      "loss": 5.1481,
+      "step": 36864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9388038756180545e-05,
+      "loss": 5.1584,
+      "step": 37376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9379652808670025e-05,
+      "loss": 5.1567,
+      "step": 37888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9371266861159505e-05,
+      "loss": 5.1405,
+      "step": 38400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9362880913648985e-05,
+      "loss": 5.1299,
+      "step": 38912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9354494966138465e-05,
+      "loss": 5.1196,
+      "step": 39424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9346109018627945e-05,
+      "loss": 5.1157,
+      "step": 39936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9337723071117425e-05,
+      "loss": 5.1023,
+      "step": 40448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9329337123606905e-05,
+      "loss": 5.1036,
+      "step": 40960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.932095117609638e-05,
+      "loss": 5.0931,
+      "step": 41472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9312581607389594e-05,
+      "loss": 5.1048,
+      "step": 41984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9304195659879074e-05,
+      "loss": 5.0914,
+      "step": 42496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9295809712368554e-05,
+      "loss": 5.0688,
+      "step": 43008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.9287423764858034e-05,
+      "loss": 5.0707,
+      "step": 43520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.927905419615125e-05,
+      "loss": 5.07,
+      "step": 44032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.927066824864073e-05,
+      "loss": 5.0695,
+      "step": 44544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.92622823011302e-05,
+      "loss": 5.0631,
+      "step": 45056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.925389635361968e-05,
+      "loss": 5.0464,
+      "step": 45568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.924554316371663e-05,
+      "loss": 5.044,
+      "step": 46080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.923715721620611e-05,
+      "loss": 5.0398,
+      "step": 46592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.922877126869559e-05,
+      "loss": 5.0253,
+      "step": 47104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.922038532118507e-05,
+      "loss": 5.0173,
+      "step": 47616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.921199937367455e-05,
+      "loss": 5.0157,
+      "step": 48128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.920361342616403e-05,
+      "loss": 5.0239,
+      "step": 48640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.919524385745724e-05,
+      "loss": 5.0111,
+      "step": 49152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.918685790994672e-05,
+      "loss": 4.9996,
+      "step": 49664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.91784719624362e-05,
+      "loss": 4.9906,
+      "step": 50176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9170086014925676e-05,
+      "loss": 4.9931,
+      "step": 50688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9161700067415156e-05,
+      "loss": 4.9954,
+      "step": 51200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9153314119904636e-05,
+      "loss": 4.9883,
+      "step": 51712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9144928172394116e-05,
+      "loss": 4.9821,
+      "step": 52224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9136558603687325e-05,
+      "loss": 4.9697,
+      "step": 52736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9128172656176805e-05,
+      "loss": 4.9795,
+      "step": 53248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9119786708666285e-05,
+      "loss": 4.9571,
+      "step": 53760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9111400761155765e-05,
+      "loss": 4.9607,
+      "step": 54272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.9103014813645245e-05,
+      "loss": 4.9518,
+      "step": 54784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.909462886613473e-05,
+      "loss": 4.9462,
+      "step": 55296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.908624291862421e-05,
+      "loss": 4.9414,
+      "step": 55808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.907785697111369e-05,
+      "loss": 4.946,
+      "step": 56320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.906947102360317e-05,
+      "loss": 4.9338,
+      "step": 56832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.906110145489638e-05,
+      "loss": 4.9318,
+      "step": 57344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.905271550738586e-05,
+      "loss": 4.9294,
+      "step": 57856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.904432955987534e-05,
+      "loss": 4.9234,
+      "step": 58368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.903594361236482e-05,
+      "loss": 4.9276,
+      "step": 58880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.90275576648543e-05,
+      "loss": 4.9204,
+      "step": 59392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.901918809614751e-05,
+      "loss": 4.9169,
+      "step": 59904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.901080214863699e-05,
+      "loss": 4.9141,
+      "step": 60416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.900241620112647e-05,
+      "loss": 4.9064,
+      "step": 60928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.899403025361595e-05,
+      "loss": 4.9058,
+      "step": 61440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.898564430610543e-05,
+      "loss": 4.8941,
+      "step": 61952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8977274737398646e-05,
+      "loss": 4.8901,
+      "step": 62464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8968888789888125e-05,
+      "loss": 4.888,
+      "step": 62976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8960502842377605e-05,
+      "loss": 4.8866,
+      "step": 63488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8952133273670815e-05,
+      "loss": 4.8816,
+      "step": 64000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8943747326160294e-05,
+      "loss": 4.8786,
+      "step": 64512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8935361378649774e-05,
+      "loss": 4.8764,
+      "step": 65024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8926975431139254e-05,
+      "loss": 4.8636,
+      "step": 65536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8918589483628734e-05,
+      "loss": 4.8734,
+      "step": 66048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8910203536118214e-05,
+      "loss": 4.8709,
+      "step": 66560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.890181758860769e-05,
+      "loss": 4.8526,
+      "step": 67072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.889343164109717e-05,
+      "loss": 4.8398,
+      "step": 67584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.888506207239038e-05,
+      "loss": 4.8469,
+      "step": 68096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.887667612487986e-05,
+      "loss": 4.8546,
+      "step": 68608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.886829017736934e-05,
+      "loss": 4.861,
+      "step": 69120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.885990422985882e-05,
+      "loss": 4.8445,
+      "step": 69632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.88515182823483e-05,
+      "loss": 4.8428,
+      "step": 70144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.884316509244525e-05,
+      "loss": 4.842,
+      "step": 70656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.883477914493473e-05,
+      "loss": 4.8422,
+      "step": 71168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.882639319742421e-05,
+      "loss": 4.829,
+      "step": 71680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.881800724991369e-05,
+      "loss": 4.8211,
+      "step": 72192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.88096376812069e-05,
+      "loss": 4.8289,
+      "step": 72704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.880125173369638e-05,
+      "loss": 4.8184,
+      "step": 73216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.879286578618586e-05,
+      "loss": 4.8181,
+      "step": 73728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.878447983867534e-05,
+      "loss": 4.8206,
+      "step": 74240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.877609389116482e-05,
+      "loss": 4.8042,
+      "step": 74752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.87677079436543e-05,
+      "loss": 4.8157,
+      "step": 75264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.875932199614378e-05,
+      "loss": 4.8113,
+      "step": 75776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.875093604863326e-05,
+      "loss": 4.7903,
+      "step": 76288
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.776516914367676,
+      "eval_runtime": 292.6321,
+      "eval_samples_per_second": 1303.996,
+      "eval_steps_per_second": 40.751,
+      "step": 76320
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8742566479926466e-05,
+      "loss": 4.8033,
+      "step": 76800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.873419691121968e-05,
+      "loss": 4.794,
+      "step": 77312
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.872581096370916e-05,
+      "loss": 4.8041,
+      "step": 77824
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8717425016198635e-05,
+      "loss": 4.7906,
+      "step": 78336
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8709039068688115e-05,
+      "loss": 4.7918,
+      "step": 78848
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8700653121177595e-05,
+      "loss": 4.7772,
+      "step": 79360
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8692267173667075e-05,
+      "loss": 4.7778,
+      "step": 79872
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8683881226156555e-05,
+      "loss": 4.7584,
+      "step": 80384
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.867549527864604e-05,
+      "loss": 4.7853,
+      "step": 80896
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.866710933113552e-05,
+      "loss": 4.7773,
+      "step": 81408
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.865873976242873e-05,
+      "loss": 4.7728,
+      "step": 81920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.865035381491821e-05,
+      "loss": 4.7767,
+      "step": 82432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.864198424621142e-05,
+      "loss": 4.7598,
+      "step": 82944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.86335982987009e-05,
+      "loss": 4.7594,
+      "step": 83456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.862521235119038e-05,
+      "loss": 4.7544,
+      "step": 83968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.861682640367986e-05,
+      "loss": 4.7521,
+      "step": 84480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.860844045616934e-05,
+      "loss": 4.753,
+      "step": 84992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.860007088746255e-05,
+      "loss": 4.7452,
+      "step": 85504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.859168493995203e-05,
+      "loss": 4.7542,
+      "step": 86016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.858329899244151e-05,
+      "loss": 4.7572,
+      "step": 86528
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8574913044930995e-05,
+      "loss": 4.7434,
+      "step": 87040
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8566527097420475e-05,
+      "loss": 4.7367,
+      "step": 87552
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8558141149909955e-05,
+      "loss": 4.734,
+      "step": 88064
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8549771581203164e-05,
+      "loss": 4.7401,
+      "step": 88576
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8541385633692644e-05,
+      "loss": 4.7311,
+      "step": 89088
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8532999686182124e-05,
+      "loss": 4.7187,
+      "step": 89600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8524613738671604e-05,
+      "loss": 4.7188,
+      "step": 90112
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8516227791161084e-05,
+      "loss": 4.7325,
+      "step": 90624
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.8507841843650564e-05,
+      "loss": 4.7052,
+      "step": 91136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8499455896140044e-05,
+      "loss": 4.7121,
+      "step": 91648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8491069948629524e-05,
+      "loss": 4.7216,
+      "step": 92160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8482684001119e-05,
+      "loss": 4.7172,
+      "step": 92672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.847433081121595e-05,
+      "loss": 4.7167,
+      "step": 93184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.846594486370543e-05,
+      "loss": 4.715,
+      "step": 93696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.845755891619491e-05,
+      "loss": 4.712,
+      "step": 94208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.844917296868439e-05,
+      "loss": 4.7131,
+      "step": 94720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.844078702117387e-05,
+      "loss": 4.6901,
+      "step": 95232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.843240107366335e-05,
+      "loss": 4.699,
+      "step": 95744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.842401512615282e-05,
+      "loss": 4.6822,
+      "step": 96256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.84156291786423e-05,
+      "loss": 4.6882,
+      "step": 96768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.840724323113178e-05,
+      "loss": 4.6896,
+      "step": 97280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.839885728362126e-05,
+      "loss": 4.6874,
+      "step": 97792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.839048771491447e-05,
+      "loss": 4.6832,
+      "step": 98304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.838210176740395e-05,
+      "loss": 4.6832,
+      "step": 98816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.837371581989343e-05,
+      "loss": 4.6822,
+      "step": 99328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.836532987238292e-05,
+      "loss": 4.6858,
+      "step": 99840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.83569439248724e-05,
+      "loss": 4.6883,
+      "step": 100352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.834855797736188e-05,
+      "loss": 4.6697,
+      "step": 100864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.834017202985136e-05,
+      "loss": 4.662,
+      "step": 101376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.833178608234084e-05,
+      "loss": 4.6782,
+      "step": 101888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8323416513634046e-05,
+      "loss": 4.6823,
+      "step": 102400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8315030566123526e-05,
+      "loss": 4.6651,
+      "step": 102912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8306644618613006e-05,
+      "loss": 4.662,
+      "step": 103424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8298258671102486e-05,
+      "loss": 4.6617,
+      "step": 103936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8289889102395695e-05,
+      "loss": 4.6559,
+      "step": 104448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8281503154885175e-05,
+      "loss": 4.6652,
+      "step": 104960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8273117207374655e-05,
+      "loss": 4.6451,
+      "step": 105472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8264731259864135e-05,
+      "loss": 4.6572,
+      "step": 105984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.8256345312353615e-05,
+      "loss": 4.6529,
+      "step": 106496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.824797574364683e-05,
+      "loss": 4.654,
+      "step": 107008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.823958979613631e-05,
+      "loss": 4.6349,
+      "step": 107520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.823120384862579e-05,
+      "loss": 4.6561,
+      "step": 108032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.822281790111527e-05,
+      "loss": 4.6299,
+      "step": 108544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.821443195360475e-05,
+      "loss": 4.645,
+      "step": 109056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.820604600609423e-05,
+      "loss": 4.6447,
+      "step": 109568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.819767643738744e-05,
+      "loss": 4.6458,
+      "step": 110080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.818929048987692e-05,
+      "loss": 4.6293,
+      "step": 110592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.81809045423664e-05,
+      "loss": 4.6246,
+      "step": 111104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.817251859485588e-05,
+      "loss": 4.623,
+      "step": 111616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.816413264734535e-05,
+      "loss": 4.6347,
+      "step": 112128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.815574669983483e-05,
+      "loss": 4.6347,
+      "step": 112640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.814736075232432e-05,
+      "loss": 4.6272,
+      "step": 113152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.81389748048138e-05,
+      "loss": 4.6337,
+      "step": 113664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.813060523610701e-05,
+      "loss": 4.6387,
+      "step": 114176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.812221928859649e-05,
+      "loss": 4.635,
+      "step": 114688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.811383334108597e-05,
+      "loss": 4.6181,
+      "step": 115200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.810544739357545e-05,
+      "loss": 4.6217,
+      "step": 115712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.809707782486866e-05,
+      "loss": 4.6231,
+      "step": 116224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.808869187735814e-05,
+      "loss": 4.6027,
+      "step": 116736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.808030592984762e-05,
+      "loss": 4.6243,
+      "step": 117248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.80719199823371e-05,
+      "loss": 4.606,
+      "step": 117760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.806353403482658e-05,
+      "loss": 4.6272,
+      "step": 118272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.805514808731606e-05,
+      "loss": 4.6163,
+      "step": 118784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.804677851860927e-05,
+      "loss": 4.5943,
+      "step": 119296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.803839257109875e-05,
+      "loss": 4.6105,
+      "step": 119808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.803000662358823e-05,
+      "loss": 4.5993,
+      "step": 120320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.802162067607771e-05,
+      "loss": 4.6137,
+      "step": 120832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.801325110737092e-05,
+      "loss": 4.6062,
+      "step": 121344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.800488153866413e-05,
+      "loss": 4.6037,
+      "step": 121856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.799649559115361e-05,
+      "loss": 4.6002,
+      "step": 122368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.798812602244683e-05,
+      "loss": 4.5935,
+      "step": 122880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.79797400749363e-05,
+      "loss": 4.5893,
+      "step": 123392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.797135412742578e-05,
+      "loss": 4.5834,
+      "step": 123904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.796296817991526e-05,
+      "loss": 4.5852,
+      "step": 124416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.795458223240474e-05,
+      "loss": 4.5972,
+      "step": 124928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7946212663697956e-05,
+      "loss": 4.5869,
+      "step": 125440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7937826716187436e-05,
+      "loss": 4.5812,
+      "step": 125952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7929440768676916e-05,
+      "loss": 4.5766,
+      "step": 126464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7921054821166396e-05,
+      "loss": 4.5839,
+      "step": 126976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7912668873655876e-05,
+      "loss": 4.5887,
+      "step": 127488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7904282926145356e-05,
+      "loss": 4.582,
+      "step": 128000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7895896978634836e-05,
+      "loss": 4.5777,
+      "step": 128512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7887511031124316e-05,
+      "loss": 4.5763,
+      "step": 129024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7879125083613796e-05,
+      "loss": 4.5817,
+      "step": 129536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7870739136103276e-05,
+      "loss": 4.5626,
+      "step": 130048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7862353188592756e-05,
+      "loss": 4.5719,
+      "step": 130560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7853967241082236e-05,
+      "loss": 4.5696,
+      "step": 131072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7845581293571715e-05,
+      "loss": 4.5638,
+      "step": 131584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7837211724864925e-05,
+      "loss": 4.5637,
+      "step": 132096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.782882577735441e-05,
+      "loss": 4.5671,
+      "step": 132608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.782043982984389e-05,
+      "loss": 4.5628,
+      "step": 133120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.781205388233337e-05,
+      "loss": 4.5604,
+      "step": 133632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7803667934822844e-05,
+      "loss": 4.5609,
+      "step": 134144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.779529836611606e-05,
+      "loss": 4.5576,
+      "step": 134656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.778691241860554e-05,
+      "loss": 4.5689,
+      "step": 135168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.777852647109502e-05,
+      "loss": 4.563,
+      "step": 135680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.777014052358449e-05,
+      "loss": 4.5598,
+      "step": 136192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.776175457607397e-05,
+      "loss": 4.5604,
+      "step": 136704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.775336862856345e-05,
+      "loss": 4.5582,
+      "step": 137216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.774498268105293e-05,
+      "loss": 4.5565,
+      "step": 137728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.773659673354241e-05,
+      "loss": 4.5474,
+      "step": 138240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.772822716483563e-05,
+      "loss": 4.5458,
+      "step": 138752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.771984121732511e-05,
+      "loss": 4.5502,
+      "step": 139264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.771145526981459e-05,
+      "loss": 4.5512,
+      "step": 139776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.770306932230407e-05,
+      "loss": 4.5495,
+      "step": 140288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.769469975359728e-05,
+      "loss": 4.5481,
+      "step": 140800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.768631380608676e-05,
+      "loss": 4.542,
+      "step": 141312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.767794423737997e-05,
+      "loss": 4.5436,
+      "step": 141824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.766955828986945e-05,
+      "loss": 4.5426,
+      "step": 142336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.766117234235893e-05,
+      "loss": 4.551,
+      "step": 142848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.765278639484841e-05,
+      "loss": 4.5318,
+      "step": 143360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.764440044733789e-05,
+      "loss": 4.5237,
+      "step": 143872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.763601449982737e-05,
+      "loss": 4.5292,
+      "step": 144384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.762764493112058e-05,
+      "loss": 4.542,
+      "step": 144896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.761925898361006e-05,
+      "loss": 4.5461,
+      "step": 145408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.761087303609954e-05,
+      "loss": 4.5364,
+      "step": 145920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.760248708858902e-05,
+      "loss": 4.5355,
+      "step": 146432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.75941011410785e-05,
+      "loss": 4.5376,
+      "step": 146944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.758571519356798e-05,
+      "loss": 4.5372,
+      "step": 147456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.757734562486119e-05,
+      "loss": 4.5283,
+      "step": 147968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.756895967735067e-05,
+      "loss": 4.5244,
+      "step": 148480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.756057372984015e-05,
+      "loss": 4.5295,
+      "step": 148992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.755218778232963e-05,
+      "loss": 4.5261,
+      "step": 149504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.754383459242657e-05,
+      "loss": 4.5192,
+      "step": 150016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.753544864491605e-05,
+      "loss": 4.5314,
+      "step": 150528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7527062697405536e-05,
+      "loss": 4.515,
+      "step": 151040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7518676749895016e-05,
+      "loss": 4.525,
+      "step": 151552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7510290802384496e-05,
+      "loss": 4.5232,
+      "step": 152064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.7501904854873976e-05,
+      "loss": 4.5105,
+      "step": 152576
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.495769500732422,
+      "eval_runtime": 291.918,
+      "eval_samples_per_second": 1307.186,
+      "eval_steps_per_second": 40.851,
+      "step": 152640
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7493518907363456e-05,
+      "loss": 4.5214,
+      "step": 153088
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7485132959852936e-05,
+      "loss": 4.5117,
+      "step": 153600
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7476747012342416e-05,
+      "loss": 4.5216,
+      "step": 154112
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7468361064831896e-05,
+      "loss": 4.5149,
+      "step": 154624
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7459975117321376e-05,
+      "loss": 4.5181,
+      "step": 155136
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.745158916981085e-05,
+      "loss": 4.5047,
+      "step": 155648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.744320322230033e-05,
+      "loss": 4.5042,
+      "step": 156160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.743481727478981e-05,
+      "loss": 4.4898,
+      "step": 156672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.742643132727929e-05,
+      "loss": 4.5199,
+      "step": 157184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.741804537976877e-05,
+      "loss": 4.511,
+      "step": 157696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.740965943225825e-05,
+      "loss": 4.5028,
+      "step": 158208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7401273484747736e-05,
+      "loss": 4.5177,
+      "step": 158720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7392887537237216e-05,
+      "loss": 4.4953,
+      "step": 159232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7384501589726696e-05,
+      "loss": 4.502,
+      "step": 159744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7376115642216176e-05,
+      "loss": 4.4931,
+      "step": 160256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7367729694705656e-05,
+      "loss": 4.4899,
+      "step": 160768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7359360125998865e-05,
+      "loss": 4.5008,
+      "step": 161280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7350974178488345e-05,
+      "loss": 4.4872,
+      "step": 161792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7342588230977825e-05,
+      "loss": 4.501,
+      "step": 162304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7334202283467305e-05,
+      "loss": 4.5052,
+      "step": 162816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7325816335956784e-05,
+      "loss": 4.4955,
+      "step": 163328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7317430388446264e-05,
+      "loss": 4.4906,
+      "step": 163840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.730904444093574e-05,
+      "loss": 4.489,
+      "step": 164352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7300674872228953e-05,
+      "loss": 4.4939,
+      "step": 164864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.7292288924718433e-05,
+      "loss": 4.4816,
+      "step": 165376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.728390297720791e-05,
+      "loss": 4.4834,
+      "step": 165888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.727551702969739e-05,
+      "loss": 4.4717,
+      "step": 166400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.726713108218687e-05,
+      "loss": 4.4936,
+      "step": 166912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.725874513467635e-05,
+      "loss": 4.4653,
+      "step": 167424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.725035918716583e-05,
+      "loss": 4.4797,
+      "step": 167936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.724197323965531e-05,
+      "loss": 4.4828,
+      "step": 168448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.723360367094852e-05,
+      "loss": 4.4839,
+      "step": 168960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.722523410224174e-05,
+      "loss": 4.4809,
+      "step": 169472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.721684815473121e-05,
+      "loss": 4.4819,
+      "step": 169984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.720847858602443e-05,
+      "loss": 4.4794,
+      "step": 170496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.720009263851391e-05,
+      "loss": 4.4828,
+      "step": 171008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.719170669100339e-05,
+      "loss": 4.466,
+      "step": 171520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.718332074349287e-05,
+      "loss": 4.4711,
+      "step": 172032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.717493479598235e-05,
+      "loss": 4.455,
+      "step": 172544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.716654884847183e-05,
+      "loss": 4.4652,
+      "step": 173056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.715816290096131e-05,
+      "loss": 4.4634,
+      "step": 173568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.714977695345079e-05,
+      "loss": 4.4702,
+      "step": 174080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.714139100594027e-05,
+      "loss": 4.4615,
+      "step": 174592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.713300505842975e-05,
+      "loss": 4.4635,
+      "step": 175104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.712461911091923e-05,
+      "loss": 4.4642,
+      "step": 175616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.711623316340871e-05,
+      "loss": 4.4678,
+      "step": 176128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7107863594701916e-05,
+      "loss": 4.4699,
+      "step": 176640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7099494025995125e-05,
+      "loss": 4.456,
+      "step": 177152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.7091108078484605e-05,
+      "loss": 4.4444,
+      "step": 177664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.708272213097409e-05,
+      "loss": 4.4687,
+      "step": 178176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.707433618346357e-05,
+      "loss": 4.4655,
+      "step": 178688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.706595023595305e-05,
+      "loss": 4.4567,
+      "step": 179200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.705756428844253e-05,
+      "loss": 4.4552,
+      "step": 179712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.704917834093201e-05,
+      "loss": 4.4483,
+      "step": 180224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.704079239342149e-05,
+      "loss": 4.4509,
+      "step": 180736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.70324228247147e-05,
+      "loss": 4.4568,
+      "step": 181248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.702403687720418e-05,
+      "loss": 4.4372,
+      "step": 181760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.701565092969366e-05,
+      "loss": 4.4514,
+      "step": 182272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.700726498218314e-05,
+      "loss": 4.4481,
+      "step": 182784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.699889541347635e-05,
+      "loss": 4.4471,
+      "step": 183296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.699050946596583e-05,
+      "loss": 4.4326,
+      "step": 183808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.698212351845531e-05,
+      "loss": 4.456,
+      "step": 184320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6973753949748525e-05,
+      "loss": 4.433,
+      "step": 184832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6965368002238005e-05,
+      "loss": 4.4457,
+      "step": 185344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6956982054727485e-05,
+      "loss": 4.4421,
+      "step": 185856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6948596107216965e-05,
+      "loss": 4.4501,
+      "step": 186368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6940226538510174e-05,
+      "loss": 4.4313,
+      "step": 186880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6931840590999654e-05,
+      "loss": 4.4337,
+      "step": 187392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6923454643489134e-05,
+      "loss": 4.4226,
+      "step": 187904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6915068695978614e-05,
+      "loss": 4.4421,
+      "step": 188416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6906682748468094e-05,
+      "loss": 4.4416,
+      "step": 188928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6898296800957574e-05,
+      "loss": 4.4362,
+      "step": 189440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.688991085344705e-05,
+      "loss": 4.4398,
+      "step": 189952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.688152490593653e-05,
+      "loss": 4.4479,
+      "step": 190464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6873138958426014e-05,
+      "loss": 4.446,
+      "step": 190976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.686476938971923e-05,
+      "loss": 4.426,
+      "step": 191488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.685639982101244e-05,
+      "loss": 4.4333,
+      "step": 192000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.684801387350192e-05,
+      "loss": 4.4408,
+      "step": 192512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.68396279259914e-05,
+      "loss": 4.4181,
+      "step": 193024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.683124197848087e-05,
+      "loss": 4.4355,
+      "step": 193536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.682285603097035e-05,
+      "loss": 4.419,
+      "step": 194048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.681447008345983e-05,
+      "loss": 4.445,
+      "step": 194560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.680608413594931e-05,
+      "loss": 4.4346,
+      "step": 195072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.679769818843879e-05,
+      "loss": 4.4117,
+      "step": 195584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6789328619732e-05,
+      "loss": 4.4256,
+      "step": 196096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.678094267222148e-05,
+      "loss": 4.4229,
+      "step": 196608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.677255672471097e-05,
+      "loss": 4.4328,
+      "step": 197120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.676417077720045e-05,
+      "loss": 4.428,
+      "step": 197632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.6755801208493657e-05,
+      "loss": 4.425,
+      "step": 198144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6747415260983136e-05,
+      "loss": 4.4209,
+      "step": 198656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6739029313472616e-05,
+      "loss": 4.4159,
+      "step": 199168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6730643365962096e-05,
+      "loss": 4.4136,
+      "step": 199680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6722257418451576e-05,
+      "loss": 4.4052,
+      "step": 200192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6713871470941056e-05,
+      "loss": 4.414,
+      "step": 200704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6705501902234265e-05,
+      "loss": 4.4223,
+      "step": 201216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6697115954723745e-05,
+      "loss": 4.4144,
+      "step": 201728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6688730007213225e-05,
+      "loss": 4.4093,
+      "step": 202240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6680344059702705e-05,
+      "loss": 4.4069,
+      "step": 202752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6671958112192185e-05,
+      "loss": 4.4104,
+      "step": 203264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6663572164681665e-05,
+      "loss": 4.4207,
+      "step": 203776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6655186217171145e-05,
+      "loss": 4.4108,
+      "step": 204288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.664680026966063e-05,
+      "loss": 4.4108,
+      "step": 204800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.663843070095384e-05,
+      "loss": 4.4019,
+      "step": 205312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.663004475344332e-05,
+      "loss": 4.4148,
+      "step": 205824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.662167518473653e-05,
+      "loss": 4.3939,
+      "step": 206336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.661328923722601e-05,
+      "loss": 4.4071,
+      "step": 206848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.660490328971549e-05,
+      "loss": 4.3999,
+      "step": 207360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.659651734220497e-05,
+      "loss": 4.4018,
+      "step": 207872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.658813139469445e-05,
+      "loss": 4.3967,
+      "step": 208384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.657974544718393e-05,
+      "loss": 4.4026,
+      "step": 208896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.657135949967341e-05,
+      "loss": 4.4003,
+      "step": 209408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.656297355216288e-05,
+      "loss": 4.3957,
+      "step": 209920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.65546039834561e-05,
+      "loss": 4.4037,
+      "step": 210432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6546234414749315e-05,
+      "loss": 4.3956,
+      "step": 210944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6537848467238795e-05,
+      "loss": 4.4055,
+      "step": 211456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6529462519728275e-05,
+      "loss": 4.4044,
+      "step": 211968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6521076572217755e-05,
+      "loss": 4.4014,
+      "step": 212480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6512690624707234e-05,
+      "loss": 4.4017,
+      "step": 212992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.650430467719671e-05,
+      "loss": 4.398,
+      "step": 213504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.649591872968619e-05,
+      "loss": 4.3938,
+      "step": 214016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6487549160979403e-05,
+      "loss": 4.3933,
+      "step": 214528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6479163213468883e-05,
+      "loss": 4.3929,
+      "step": 215040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.647077726595836e-05,
+      "loss": 4.39,
+      "step": 215552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6462391318447837e-05,
+      "loss": 4.4001,
+      "step": 216064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.645400537093732e-05,
+      "loss": 4.3914,
+      "step": 216576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.64456194234268e-05,
+      "loss": 4.3951,
+      "step": 217088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.643723347591628e-05,
+      "loss": 4.3905,
+      "step": 217600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.642886390720949e-05,
+      "loss": 4.3891,
+      "step": 218112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.642047795969897e-05,
+      "loss": 4.387,
+      "step": 218624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.641209201218845e-05,
+      "loss": 4.4012,
+      "step": 219136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.640370606467793e-05,
+      "loss": 4.379,
+      "step": 219648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.639532011716741e-05,
+      "loss": 4.3769,
+      "step": 220160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.638693416965689e-05,
+      "loss": 4.3742,
+      "step": 220672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.637854822214637e-05,
+      "loss": 4.3873,
+      "step": 221184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.637016227463585e-05,
+      "loss": 4.3944,
+      "step": 221696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.636179270592906e-05,
+      "loss": 4.3919,
+      "step": 222208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.635340675841854e-05,
+      "loss": 4.3837,
+      "step": 222720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.634502081090802e-05,
+      "loss": 4.3908,
+      "step": 223232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.633663486339751e-05,
+      "loss": 4.3849,
+      "step": 223744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.632826529469072e-05,
+      "loss": 4.3854,
+      "step": 224256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.63198793471802e-05,
+      "loss": 4.3764,
+      "step": 224768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.631149339966968e-05,
+      "loss": 4.3856,
+      "step": 225280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.630310745215916e-05,
+      "loss": 4.3815,
+      "step": 225792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6294737883452366e-05,
+      "loss": 4.3706,
+      "step": 226304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6286351935941846e-05,
+      "loss": 4.387,
+      "step": 226816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6277965988431326e-05,
+      "loss": 4.3751,
+      "step": 227328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6269596419724535e-05,
+      "loss": 4.3772,
+      "step": 227840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6261210472214015e-05,
+      "loss": 4.3786,
+      "step": 228352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.6252824524703495e-05,
+      "loss": 4.3672,
+      "step": 228864
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.3605637550354,
+      "eval_runtime": 316.5441,
+      "eval_samples_per_second": 1205.491,
+      "eval_steps_per_second": 37.672,
+      "step": 228960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6244438577192975e-05,
+      "loss": 4.3802,
+      "step": 229376
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.623605262968246e-05,
+      "loss": 4.3693,
+      "step": 229888
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.62276994397794e-05,
+      "loss": 4.3784,
+      "step": 230400
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.621931349226888e-05,
+      "loss": 4.3708,
+      "step": 230912
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.621092754475836e-05,
+      "loss": 4.3825,
+      "step": 231424
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.620254159724784e-05,
+      "loss": 4.3659,
+      "step": 231936
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.619415564973732e-05,
+      "loss": 4.3619,
+      "step": 232448
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.61857697022268e-05,
+      "loss": 4.3538,
+      "step": 232960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.617738375471628e-05,
+      "loss": 4.3782,
+      "step": 233472
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.616899780720576e-05,
+      "loss": 4.3774,
+      "step": 233984
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.616061185969524e-05,
+      "loss": 4.3619,
+      "step": 234496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.615222591218472e-05,
+      "loss": 4.3776,
+      "step": 235008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.614385634347793e-05,
+      "loss": 4.3606,
+      "step": 235520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6135470395967415e-05,
+      "loss": 4.3643,
+      "step": 236032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.6127084448456895e-05,
+      "loss": 4.3642,
+      "step": 236544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.611869850094637e-05,
+      "loss": 4.3546,
+      "step": 237056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.611031255343585e-05,
+      "loss": 4.3638,
+      "step": 237568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.610192660592533e-05,
+      "loss": 4.3554,
+      "step": 238080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.609354065841481e-05,
+      "loss": 4.367,
+      "step": 238592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.608515471090429e-05,
+      "loss": 4.3686,
+      "step": 239104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.60767851421975e-05,
+      "loss": 4.3669,
+      "step": 239616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.606839919468698e-05,
+      "loss": 4.3587,
+      "step": 240128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.606001324717646e-05,
+      "loss": 4.3523,
+      "step": 240640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.605162729966594e-05,
+      "loss": 4.3689,
+      "step": 241152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.604325773095915e-05,
+      "loss": 4.3477,
+      "step": 241664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.60349045410561e-05,
+      "loss": 4.3542,
+      "step": 242176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.602651859354558e-05,
+      "loss": 4.3428,
+      "step": 242688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.601813264603506e-05,
+      "loss": 4.3628,
+      "step": 243200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.600974669852454e-05,
+      "loss": 4.3394,
+      "step": 243712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.600136075101402e-05,
+      "loss": 4.348,
+      "step": 244224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.599297480350349e-05,
+      "loss": 4.3563,
+      "step": 244736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.598458885599297e-05,
+      "loss": 4.3523,
+      "step": 245248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.597620290848245e-05,
+      "loss": 4.3535,
+      "step": 245760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.596781696097193e-05,
+      "loss": 4.3545,
+      "step": 246272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.595944739226514e-05,
+      "loss": 4.3546,
+      "step": 246784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.595106144475462e-05,
+      "loss": 4.3554,
+      "step": 247296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.59426754972441e-05,
+      "loss": 4.3457,
+      "step": 247808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5934289549733587e-05,
+      "loss": 4.3388,
+      "step": 248320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5925919981026796e-05,
+      "loss": 4.3332,
+      "step": 248832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5917534033516276e-05,
+      "loss": 4.34,
+      "step": 249344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5909148086005756e-05,
+      "loss": 4.3391,
+      "step": 249856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5900762138495235e-05,
+      "loss": 4.3491,
+      "step": 250368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5892392569788445e-05,
+      "loss": 4.337,
+      "step": 250880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5884006622277925e-05,
+      "loss": 4.3408,
+      "step": 251392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5875620674767404e-05,
+      "loss": 4.3461,
+      "step": 251904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5867234727256884e-05,
+      "loss": 4.3394,
+      "step": 252416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5858865158550094e-05,
+      "loss": 4.3528,
+      "step": 252928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5850479211039573e-05,
+      "loss": 4.3341,
+      "step": 253440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5842093263529053e-05,
+      "loss": 4.3198,
+      "step": 253952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.583370731601854e-05,
+      "loss": 4.3507,
+      "step": 254464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.582532136850802e-05,
+      "loss": 4.3423,
+      "step": 254976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.58169354209975e-05,
+      "loss": 4.3413,
+      "step": 255488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.580854947348698e-05,
+      "loss": 4.3341,
+      "step": 256000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.580016352597646e-05,
+      "loss": 4.3301,
+      "step": 256512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.57918103360734e-05,
+      "loss": 4.3309,
+      "step": 257024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.578344076736661e-05,
+      "loss": 4.3365,
+      "step": 257536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.577505481985609e-05,
+      "loss": 4.3227,
+      "step": 258048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.576666887234557e-05,
+      "loss": 4.3283,
+      "step": 258560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.575828292483505e-05,
+      "loss": 4.336,
+      "step": 259072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.574989697732453e-05,
+      "loss": 4.3279,
+      "step": 259584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.574151102981401e-05,
+      "loss": 4.3152,
+      "step": 260096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5733125082303494e-05,
+      "loss": 4.3415,
+      "step": 260608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5724739134792974e-05,
+      "loss": 4.3151,
+      "step": 261120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.571636956608618e-05,
+      "loss": 4.325,
+      "step": 261632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.570798361857566e-05,
+      "loss": 4.3325,
+      "step": 262144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.569959767106514e-05,
+      "loss": 4.3343,
+      "step": 262656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.569121172355462e-05,
+      "loss": 4.3172,
+      "step": 263168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.56828257760441e-05,
+      "loss": 4.3215,
+      "step": 263680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.567443982853358e-05,
+      "loss": 4.3054,
+      "step": 264192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.566605388102306e-05,
+      "loss": 4.3323,
+      "step": 264704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.565766793351254e-05,
+      "loss": 4.326,
+      "step": 265216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.564928198600202e-05,
+      "loss": 4.3252,
+      "step": 265728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5640896038491496e-05,
+      "loss": 4.3228,
+      "step": 266240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5632510090980976e-05,
+      "loss": 4.3345,
+      "step": 266752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.562414052227419e-05,
+      "loss": 4.337,
+      "step": 267264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.561575457476368e-05,
+      "loss": 4.3145,
+      "step": 267776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.560736862725315e-05,
+      "loss": 4.3232,
+      "step": 268288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.559898267974263e-05,
+      "loss": 4.3308,
+      "step": 268800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.559059673223211e-05,
+      "loss": 4.3069,
+      "step": 269312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.558222716352533e-05,
+      "loss": 4.3266,
+      "step": 269824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.55738412160148e-05,
+      "loss": 4.3121,
+      "step": 270336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.556545526850428e-05,
+      "loss": 4.329,
+      "step": 270848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.555706932099376e-05,
+      "loss": 4.3269,
+      "step": 271360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.554868337348324e-05,
+      "loss": 4.3048,
+      "step": 271872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.554029742597272e-05,
+      "loss": 4.3177,
+      "step": 272384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.55319114784622e-05,
+      "loss": 4.3127,
+      "step": 272896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5523541909755416e-05,
+      "loss": 4.3255,
+      "step": 273408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5515155962244896e-05,
+      "loss": 4.3143,
+      "step": 273920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.5506786393538105e-05,
+      "loss": 4.3144,
+      "step": 274432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5498400446027585e-05,
+      "loss": 4.3127,
+      "step": 274944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5490014498517065e-05,
+      "loss": 4.311,
+      "step": 275456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5481628551006545e-05,
+      "loss": 4.3083,
+      "step": 275968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5473258982299754e-05,
+      "loss": 4.298,
+      "step": 276480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5464873034789234e-05,
+      "loss": 4.3075,
+      "step": 276992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5456487087278714e-05,
+      "loss": 4.3158,
+      "step": 277504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5448101139768194e-05,
+      "loss": 4.3159,
+      "step": 278016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5439715192257674e-05,
+      "loss": 4.2966,
+      "step": 278528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5431329244747154e-05,
+      "loss": 4.3034,
+      "step": 279040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5422943297236634e-05,
+      "loss": 4.3066,
+      "step": 279552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5414557349726114e-05,
+      "loss": 4.3165,
+      "step": 280064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5406171402215594e-05,
+      "loss": 4.306,
+      "step": 280576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.539780183350881e-05,
+      "loss": 4.3067,
+      "step": 281088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.538941588599829e-05,
+      "loss": 4.2983,
+      "step": 281600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.538102993848777e-05,
+      "loss": 4.3146,
+      "step": 282112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.537264399097725e-05,
+      "loss": 4.2893,
+      "step": 282624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.536425804346673e-05,
+      "loss": 4.3055,
+      "step": 283136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.535588847475994e-05,
+      "loss": 4.2964,
+      "step": 283648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.534750252724942e-05,
+      "loss": 4.3009,
+      "step": 284160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.53391165797389e-05,
+      "loss": 4.2945,
+      "step": 284672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.533073063222838e-05,
+      "loss": 4.3065,
+      "step": 285184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.532234468471786e-05,
+      "loss": 4.2937,
+      "step": 285696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.531395873720733e-05,
+      "loss": 4.2953,
+      "step": 286208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.530557278969682e-05,
+      "loss": 4.3004,
+      "step": 286720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.52971868421863e-05,
+      "loss": 4.2963,
+      "step": 287232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.528883365228324e-05,
+      "loss": 4.3062,
+      "step": 287744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.528044770477272e-05,
+      "loss": 4.3051,
+      "step": 288256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.52720617572622e-05,
+      "loss": 4.3008,
+      "step": 288768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.526367580975168e-05,
+      "loss": 4.3055,
+      "step": 289280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.525528986224116e-05,
+      "loss": 4.2982,
+      "step": 289792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5246903914730636e-05,
+      "loss": 4.2921,
+      "step": 290304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5238517967220116e-05,
+      "loss": 4.2933,
+      "step": 290816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.523014839851333e-05,
+      "loss": 4.2946,
+      "step": 291328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5221762451002805e-05,
+      "loss": 4.295,
+      "step": 291840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5213376503492285e-05,
+      "loss": 4.3009,
+      "step": 292352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.520499055598177e-05,
+      "loss": 4.2919,
+      "step": 292864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.519660460847125e-05,
+      "loss": 4.2984,
+      "step": 293376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.518821866096073e-05,
+      "loss": 4.2884,
+      "step": 293888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.517983271345021e-05,
+      "loss": 4.2921,
+      "step": 294400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.517144676593969e-05,
+      "loss": 4.2905,
+      "step": 294912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.51630771972329e-05,
+      "loss": 4.3043,
+      "step": 295424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.515469124972238e-05,
+      "loss": 4.2869,
+      "step": 295936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.514630530221186e-05,
+      "loss": 4.2809,
+      "step": 296448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.513793573350507e-05,
+      "loss": 4.2752,
+      "step": 296960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.512954978599455e-05,
+      "loss": 4.2922,
+      "step": 297472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.512116383848403e-05,
+      "loss": 4.2975,
+      "step": 297984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.511277789097351e-05,
+      "loss": 4.2972,
+      "step": 298496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.510439194346299e-05,
+      "loss": 4.2917,
+      "step": 299008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.509600599595247e-05,
+      "loss": 4.2965,
+      "step": 299520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5087620048441956e-05,
+      "loss": 4.2912,
+      "step": 300032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5079250479735165e-05,
+      "loss": 4.2903,
+      "step": 300544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5070864532224645e-05,
+      "loss": 4.2843,
+      "step": 301056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5062478584714125e-05,
+      "loss": 4.2906,
+      "step": 301568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5054092637203605e-05,
+      "loss": 4.2887,
+      "step": 302080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5045706689693085e-05,
+      "loss": 4.2763,
+      "step": 302592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5037337120986294e-05,
+      "loss": 4.294,
+      "step": 303104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5028951173475774e-05,
+      "loss": 4.2856,
+      "step": 303616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5020565225965254e-05,
+      "loss": 4.2812,
+      "step": 304128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.5012179278454734e-05,
+      "loss": 4.2843,
+      "step": 304640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.500380970974794e-05,
+      "loss": 4.282,
+      "step": 305152
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.276997089385986,
+      "eval_runtime": 289.737,
+      "eval_samples_per_second": 1317.026,
+      "eval_steps_per_second": 41.158,
+      "step": 305280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.499542376223742e-05,
+      "loss": 4.2866,
+      "step": 305664
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.498703781472691e-05,
+      "loss": 4.2803,
+      "step": 306176
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.497865186721639e-05,
+      "loss": 4.2901,
+      "step": 306688
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.497026591970587e-05,
+      "loss": 4.2787,
+      "step": 307200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.496187997219535e-05,
+      "loss": 4.2915,
+      "step": 307712
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.495349402468482e-05,
+      "loss": 4.28,
+      "step": 308224
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.49451080771743e-05,
+      "loss": 4.2694,
+      "step": 308736
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.493672212966378e-05,
+      "loss": 4.2689,
+      "step": 309248
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.492833618215326e-05,
+      "loss": 4.2835,
+      "step": 309760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.491996661344647e-05,
+      "loss": 4.2875,
+      "step": 310272
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.491158066593595e-05,
+      "loss": 4.2762,
+      "step": 310784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.490319471842543e-05,
+      "loss": 4.2852,
+      "step": 311296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.489480877091491e-05,
+      "loss": 4.2737,
+      "step": 311808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.488642282340439e-05,
+      "loss": 4.2773,
+      "step": 312320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.487803687589387e-05,
+      "loss": 4.2725,
+      "step": 312832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.486965092838336e-05,
+      "loss": 4.2673,
+      "step": 313344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.486126498087284e-05,
+      "loss": 4.2728,
+      "step": 313856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.485287903336232e-05,
+      "loss": 4.2676,
+      "step": 314368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.48444930858518e-05,
+      "loss": 4.2809,
+      "step": 314880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.483610713834128e-05,
+      "loss": 4.281,
+      "step": 315392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.482772119083076e-05,
+      "loss": 4.2816,
+      "step": 315904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.481933524332024e-05,
+      "loss": 4.2744,
+      "step": 316416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.481094929580971e-05,
+      "loss": 4.2673,
+      "step": 316928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.480256334829919e-05,
+      "loss": 4.2789,
+      "step": 317440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.479417740078867e-05,
+      "loss": 4.2667,
+      "step": 317952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.478580783208188e-05,
+      "loss": 4.2675,
+      "step": 318464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4777438263375096e-05,
+      "loss": 4.2564,
+      "step": 318976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4769052315864576e-05,
+      "loss": 4.2725,
+      "step": 319488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4760666368354056e-05,
+      "loss": 4.2596,
+      "step": 320000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.4752280420843536e-05,
+      "loss": 4.258,
+      "step": 320512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4743894473333016e-05,
+      "loss": 4.2736,
+      "step": 321024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.473552490462623e-05,
+      "loss": 4.267,
+      "step": 321536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.472713895711571e-05,
+      "loss": 4.2733,
+      "step": 322048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4718753009605185e-05,
+      "loss": 4.273,
+      "step": 322560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4710367062094665e-05,
+      "loss": 4.2642,
+      "step": 323072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.470199749338788e-05,
+      "loss": 4.2742,
+      "step": 323584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4693611545877354e-05,
+      "loss": 4.2655,
+      "step": 324096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4685225598366834e-05,
+      "loss": 4.2539,
+      "step": 324608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4676839650856314e-05,
+      "loss": 4.2503,
+      "step": 325120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4668453703345794e-05,
+      "loss": 4.256,
+      "step": 325632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.466006775583528e-05,
+      "loss": 4.2573,
+      "step": 326144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.465168180832476e-05,
+      "loss": 4.2681,
+      "step": 326656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.464329586081424e-05,
+      "loss": 4.2547,
+      "step": 327168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.463492629210745e-05,
+      "loss": 4.2572,
+      "step": 327680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.462654034459693e-05,
+      "loss": 4.2654,
+      "step": 328192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.461815439708641e-05,
+      "loss": 4.2611,
+      "step": 328704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.460976844957589e-05,
+      "loss": 4.2667,
+      "step": 329216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.460138250206537e-05,
+      "loss": 4.2506,
+      "step": 329728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.459299655455485e-05,
+      "loss": 4.2357,
+      "step": 330240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.458461060704433e-05,
+      "loss": 4.2686,
+      "step": 330752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.457622465953381e-05,
+      "loss": 4.2673,
+      "step": 331264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.456787146963075e-05,
+      "loss": 4.2592,
+      "step": 331776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4559485522120234e-05,
+      "loss": 4.2527,
+      "step": 332288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4551099574609714e-05,
+      "loss": 4.2495,
+      "step": 332800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4542730005902923e-05,
+      "loss": 4.2502,
+      "step": 333312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.453436043719613e-05,
+      "loss": 4.2548,
+      "step": 333824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.452597448968561e-05,
+      "loss": 4.2439,
+      "step": 334336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.451758854217509e-05,
+      "loss": 4.249,
+      "step": 334848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.450920259466457e-05,
+      "loss": 4.2584,
+      "step": 335360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.450081664715405e-05,
+      "loss": 4.2522,
+      "step": 335872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.449243069964353e-05,
+      "loss": 4.2373,
+      "step": 336384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.448404475213301e-05,
+      "loss": 4.2626,
+      "step": 336896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.447565880462249e-05,
+      "loss": 4.2382,
+      "step": 337408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.446727285711197e-05,
+      "loss": 4.2408,
+      "step": 337920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.445888690960145e-05,
+      "loss": 4.2561,
+      "step": 338432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.445050096209093e-05,
+      "loss": 4.2592,
+      "step": 338944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.444211501458041e-05,
+      "loss": 4.2356,
+      "step": 339456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.44337290670699e-05,
+      "loss": 4.245,
+      "step": 339968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.442534311955937e-05,
+      "loss": 4.2278,
+      "step": 340480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.441697355085259e-05,
+      "loss": 4.2552,
+      "step": 340992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.440858760334207e-05,
+      "loss": 4.2531,
+      "step": 341504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.440020165583154e-05,
+      "loss": 4.2437,
+      "step": 342016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.439181570832102e-05,
+      "loss": 4.2481,
+      "step": 342528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.43834297608105e-05,
+      "loss": 4.2561,
+      "step": 343040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.437504381329998e-05,
+      "loss": 4.265,
+      "step": 343552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.436667424459319e-05,
+      "loss": 4.237,
+      "step": 344064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.435828829708267e-05,
+      "loss": 4.2481,
+      "step": 344576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.434990234957216e-05,
+      "loss": 4.2536,
+      "step": 345088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.434151640206164e-05,
+      "loss": 4.2301,
+      "step": 345600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4333130454551117e-05,
+      "loss": 4.2527,
+      "step": 346112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4324744507040597e-05,
+      "loss": 4.2373,
+      "step": 346624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4316358559530076e-05,
+      "loss": 4.254,
+      "step": 347136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4307972612019556e-05,
+      "loss": 4.2522,
+      "step": 347648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4299586664509036e-05,
+      "loss": 4.233,
+      "step": 348160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4291217095802245e-05,
+      "loss": 4.2377,
+      "step": 348672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4282831148291725e-05,
+      "loss": 4.2398,
+      "step": 349184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4274445200781205e-05,
+      "loss": 4.2547,
+      "step": 349696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4266059253270685e-05,
+      "loss": 4.2409,
+      "step": 350208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.4257689684563894e-05,
+      "loss": 4.2409,
+      "step": 350720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.4249320115857104e-05,
+      "loss": 4.236,
+      "step": 351232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.424093416834659e-05,
+      "loss": 4.2425,
+      "step": 351744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.423254822083607e-05,
+      "loss": 4.2357,
+      "step": 352256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.422416227332555e-05,
+      "loss": 4.2221,
+      "step": 352768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.421577632581503e-05,
+      "loss": 4.2366,
+      "step": 353280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.420739037830451e-05,
+      "loss": 4.2406,
+      "step": 353792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.419900443079399e-05,
+      "loss": 4.2426,
+      "step": 354304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.419061848328347e-05,
+      "loss": 4.2244,
+      "step": 354816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.418224891457668e-05,
+      "loss": 4.2312,
+      "step": 355328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.417386296706616e-05,
+      "loss": 4.2365,
+      "step": 355840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.416547701955564e-05,
+      "loss": 4.2405,
+      "step": 356352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.415709107204512e-05,
+      "loss": 4.2402,
+      "step": 356864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.41487051245346e-05,
+      "loss": 4.2336,
+      "step": 357376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.414033555582781e-05,
+      "loss": 4.2292,
+      "step": 357888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.413194960831729e-05,
+      "loss": 4.241,
+      "step": 358400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.412359641841423e-05,
+      "loss": 4.2176,
+      "step": 358912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.411521047090371e-05,
+      "loss": 4.2343,
+      "step": 359424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.410684090219692e-05,
+      "loss": 4.223,
+      "step": 359936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.40984549546864e-05,
+      "loss": 4.2338,
+      "step": 360448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.409006900717588e-05,
+      "loss": 4.2203,
+      "step": 360960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.408168305966536e-05,
+      "loss": 4.2354,
+      "step": 361472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.407329711215484e-05,
+      "loss": 4.2268,
+      "step": 361984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.406491116464432e-05,
+      "loss": 4.2241,
+      "step": 362496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.40565252171338e-05,
+      "loss": 4.2287,
+      "step": 363008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.404813926962328e-05,
+      "loss": 4.2271,
+      "step": 363520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.403975332211276e-05,
+      "loss": 4.2349,
+      "step": 364032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.403136737460224e-05,
+      "loss": 4.2393,
+      "step": 364544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.402299780589546e-05,
+      "loss": 4.2323,
+      "step": 365056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.401462823718867e-05,
+      "loss": 4.2335,
+      "step": 365568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.4006242289678147e-05,
+      "loss": 4.2283,
+      "step": 366080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3997856342167627e-05,
+      "loss": 4.2244,
+      "step": 366592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3989470394657106e-05,
+      "loss": 4.2213,
+      "step": 367104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3981084447146586e-05,
+      "loss": 4.2316,
+      "step": 367616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3972698499636066e-05,
+      "loss": 4.2212,
+      "step": 368128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3964312552125546e-05,
+      "loss": 4.2299,
+      "step": 368640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3955942983418755e-05,
+      "loss": 4.2243,
+      "step": 369152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3947557035908235e-05,
+      "loss": 4.2309,
+      "step": 369664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3939171088397715e-05,
+      "loss": 4.2217,
+      "step": 370176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3930785140887195e-05,
+      "loss": 4.2255,
+      "step": 370688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3922399193376675e-05,
+      "loss": 4.2203,
+      "step": 371200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3914013245866155e-05,
+      "loss": 4.2334,
+      "step": 371712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3905627298355635e-05,
+      "loss": 4.2226,
+      "step": 372224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3897241350845115e-05,
+      "loss": 4.2142,
+      "step": 372736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3888855403334595e-05,
+      "loss": 4.2068,
+      "step": 373248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3880485834627804e-05,
+      "loss": 4.2253,
+      "step": 373760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3872099887117284e-05,
+      "loss": 4.2295,
+      "step": 374272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3863713939606764e-05,
+      "loss": 4.2296,
+      "step": 374784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3855327992096244e-05,
+      "loss": 4.2278,
+      "step": 375296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3846942044585724e-05,
+      "loss": 4.2274,
+      "step": 375808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3838556097075204e-05,
+      "loss": 4.2236,
+      "step": 376320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3830170149564684e-05,
+      "loss": 4.228,
+      "step": 376832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3821784202054164e-05,
+      "loss": 4.2155,
+      "step": 377344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.3813398254543644e-05,
+      "loss": 4.224,
+      "step": 377856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.380501230703313e-05,
+      "loss": 4.2203,
+      "step": 378368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.379662635952261e-05,
+      "loss": 4.2155,
+      "step": 378880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.378824041201209e-05,
+      "loss": 4.2281,
+      "step": 379392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.37798708433053e-05,
+      "loss": 4.2196,
+      "step": 379904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.377148489579478e-05,
+      "loss": 4.2173,
+      "step": 380416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.376309894828426e-05,
+      "loss": 4.2188,
+      "step": 380928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.375471300077374e-05,
+      "loss": 4.2196,
+      "step": 381440
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.220510005950928,
+      "eval_runtime": 294.3224,
+      "eval_samples_per_second": 1296.507,
+      "eval_steps_per_second": 40.517,
+      "step": 381600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.374632705326321e-05,
+      "loss": 4.2171,
+      "step": 381952
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.373795748455643e-05,
+      "loss": 4.2172,
+      "step": 382464
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.372957153704591e-05,
+      "loss": 4.2232,
+      "step": 382976
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.372118558953539e-05,
+      "loss": 4.2136,
+      "step": 383488
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.371279964202487e-05,
+      "loss": 4.2267,
+      "step": 384000
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.370446283092554e-05,
+      "loss": 4.217,
+      "step": 384512
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.369607688341502e-05,
+      "loss": 4.2056,
+      "step": 385024
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.36876909359045e-05,
+      "loss": 4.2043,
+      "step": 385536
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.367930498839398e-05,
+      "loss": 4.2201,
+      "step": 386048
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.367091904088346e-05,
+      "loss": 4.2225,
+      "step": 386560
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.366253309337294e-05,
+      "loss": 4.2148,
+      "step": 387072
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.365414714586242e-05,
+      "loss": 4.2192,
+      "step": 387584
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.364577757715563e-05,
+      "loss": 4.2112,
+      "step": 388096
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.363739162964511e-05,
+      "loss": 4.2144,
+      "step": 388608
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.362900568213459e-05,
+      "loss": 4.2092,
+      "step": 389120
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.362061973462407e-05,
+      "loss": 4.2005,
+      "step": 389632
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.361223378711355e-05,
+      "loss": 4.2122,
+      "step": 390144
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.360384783960304e-05,
+      "loss": 4.2033,
+      "step": 390656
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.359546189209251e-05,
+      "loss": 4.2174,
+      "step": 391168
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.358707594458199e-05,
+      "loss": 4.2188,
+      "step": 391680
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.357868999707147e-05,
+      "loss": 4.2215,
+      "step": 392192
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.357030404956095e-05,
+      "loss": 4.211,
+      "step": 392704
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.356191810205043e-05,
+      "loss": 4.2077,
+      "step": 393216
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.355353215453991e-05,
+      "loss": 4.2139,
+      "step": 393728
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.354516258583312e-05,
+      "loss": 4.2028,
+      "step": 394240
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.35367766383226e-05,
+      "loss": 4.2081,
+      "step": 394752
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.352839069081208e-05,
+      "loss": 4.1919,
+      "step": 395264
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.352000474330156e-05,
+      "loss": 4.2143,
+      "step": 395776
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.3511635174594776e-05,
+      "loss": 4.2017,
+      "step": 396288
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.3503249227084256e-05,
+      "loss": 4.197,
+      "step": 396800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3494863279573736e-05,
+      "loss": 4.207,
+      "step": 397312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3486477332063216e-05,
+      "loss": 4.2111,
+      "step": 397824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3478091384552696e-05,
+      "loss": 4.2137,
+      "step": 398336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3469721815845905e-05,
+      "loss": 4.2089,
+      "step": 398848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3461335868335385e-05,
+      "loss": 4.2043,
+      "step": 399360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3452966299628594e-05,
+      "loss": 4.2122,
+      "step": 399872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3444580352118074e-05,
+      "loss": 4.2072,
+      "step": 400384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3436194404607554e-05,
+      "loss": 4.192,
+      "step": 400896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3427808457097034e-05,
+      "loss": 4.1919,
+      "step": 401408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3419422509586514e-05,
+      "loss": 4.1934,
+      "step": 401920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3411036562075993e-05,
+      "loss": 4.1982,
+      "step": 402432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.340266699336921e-05,
+      "loss": 4.2056,
+      "step": 402944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.339428104585869e-05,
+      "loss": 4.1973,
+      "step": 403456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.338589509834817e-05,
+      "loss": 4.1943,
+      "step": 403968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.337750915083765e-05,
+      "loss": 4.2071,
+      "step": 404480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.336912320332713e-05,
+      "loss": 4.1985,
+      "step": 404992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.336073725581661e-05,
+      "loss": 4.2132,
+      "step": 405504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.335235130830609e-05,
+      "loss": 4.1928,
+      "step": 406016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.334396536079557e-05,
+      "loss": 4.1737,
+      "step": 406528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.333559579208878e-05,
+      "loss": 4.2109,
+      "step": 407040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.332720984457826e-05,
+      "loss": 4.205,
+      "step": 407552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.331882389706774e-05,
+      "loss": 4.203,
+      "step": 408064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.331043794955722e-05,
+      "loss": 4.1974,
+      "step": 408576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.330206838085043e-05,
+      "loss": 4.1926,
+      "step": 409088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3293682433339914e-05,
+      "loss": 4.1906,
+      "step": 409600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3285296485829394e-05,
+      "loss": 4.1944,
+      "step": 410112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3276910538318874e-05,
+      "loss": 4.1885,
+      "step": 410624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.326854096961208e-05,
+      "loss": 4.1897,
+      "step": 411136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.326015502210156e-05,
+      "loss": 4.1977,
+      "step": 411648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.325178545339477e-05,
+      "loss": 4.1925,
+      "step": 412160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.324339950588425e-05,
+      "loss": 4.1827,
+      "step": 412672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.323501355837373e-05,
+      "loss": 4.2019,
+      "step": 413184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.322664398966694e-05,
+      "loss": 4.1837,
+      "step": 413696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.321825804215642e-05,
+      "loss": 4.1837,
+      "step": 414208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.320988847344964e-05,
+      "loss": 4.1964,
+      "step": 414720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.320150252593912e-05,
+      "loss": 4.199,
+      "step": 415232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.31931165784286e-05,
+      "loss": 4.1796,
+      "step": 415744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3184730630918077e-05,
+      "loss": 4.1893,
+      "step": 416256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3176344683407557e-05,
+      "loss": 4.1719,
+      "step": 416768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3167958735897036e-05,
+      "loss": 4.1948,
+      "step": 417280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3159572788386516e-05,
+      "loss": 4.1949,
+      "step": 417792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3151186840875996e-05,
+      "loss": 4.1927,
+      "step": 418304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.314280089336547e-05,
+      "loss": 4.1908,
+      "step": 418816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.313441494585495e-05,
+      "loss": 4.2001,
+      "step": 419328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.312602899834443e-05,
+      "loss": 4.207,
+      "step": 419840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.311764305083391e-05,
+      "loss": 4.18,
+      "step": 420352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.310925710332339e-05,
+      "loss": 4.1955,
+      "step": 420864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.310087115581287e-05,
+      "loss": 4.1975,
+      "step": 421376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3092501587106085e-05,
+      "loss": 4.1727,
+      "step": 421888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3084115639595565e-05,
+      "loss": 4.1961,
+      "step": 422400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3075729692085045e-05,
+      "loss": 4.1802,
+      "step": 422912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3067343744574525e-05,
+      "loss": 4.197,
+      "step": 423424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3058957797064005e-05,
+      "loss": 4.1961,
+      "step": 423936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3050571849553485e-05,
+      "loss": 4.1801,
+      "step": 424448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3042202280846694e-05,
+      "loss": 4.1796,
+      "step": 424960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3033816333336174e-05,
+      "loss": 4.1847,
+      "step": 425472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3025430385825654e-05,
+      "loss": 4.2001,
+      "step": 425984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.3017044438315134e-05,
+      "loss": 4.1849,
+      "step": 426496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.300869124841207e-05,
+      "loss": 4.1874,
+      "step": 427008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.300030530090155e-05,
+      "loss": 4.1813,
+      "step": 427520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.299191935339104e-05,
+      "loss": 4.1877,
+      "step": 428032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.298353340588052e-05,
+      "loss": 4.1827,
+      "step": 428544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.297514745837e-05,
+      "loss": 4.1653,
+      "step": 429056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.296676151085948e-05,
+      "loss": 4.1818,
+      "step": 429568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.295837556334896e-05,
+      "loss": 4.1898,
+      "step": 430080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.294998961583844e-05,
+      "loss": 4.1891,
+      "step": 430592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.294162004713165e-05,
+      "loss": 4.1721,
+      "step": 431104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.293325047842486e-05,
+      "loss": 4.1759,
+      "step": 431616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.292486453091434e-05,
+      "loss": 4.1812,
+      "step": 432128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.291647858340382e-05,
+      "loss": 4.1839,
+      "step": 432640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.29080926358933e-05,
+      "loss": 4.1911,
+      "step": 433152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.289970668838278e-05,
+      "loss": 4.1743,
+      "step": 433664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.289132074087226e-05,
+      "loss": 4.1807,
+      "step": 434176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2882934793361737e-05,
+      "loss": 4.1873,
+      "step": 434688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.287454884585122e-05,
+      "loss": 4.1677,
+      "step": 435200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.28661628983407e-05,
+      "loss": 4.1786,
+      "step": 435712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.285779332963391e-05,
+      "loss": 4.1687,
+      "step": 436224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.284940738212339e-05,
+      "loss": 4.1827,
+      "step": 436736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.28410378134166e-05,
+      "loss": 4.1686,
+      "step": 437248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.283265186590608e-05,
+      "loss": 4.184,
+      "step": 437760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.282426591839556e-05,
+      "loss": 4.1726,
+      "step": 438272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.281587997088504e-05,
+      "loss": 4.1742,
+      "step": 438784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.280749402337452e-05,
+      "loss": 4.1767,
+      "step": 439296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2799108075864e-05,
+      "loss": 4.1769,
+      "step": 439808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2790722128353474e-05,
+      "loss": 4.1762,
+      "step": 440320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.278233618084296e-05,
+      "loss": 4.1903,
+      "step": 440832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.277396661213618e-05,
+      "loss": 4.1805,
+      "step": 441344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.276558066462566e-05,
+      "loss": 4.1827,
+      "step": 441856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.275719471711513e-05,
+      "loss": 4.1722,
+      "step": 442368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.274880876960461e-05,
+      "loss": 4.1769,
+      "step": 442880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.274042282209409e-05,
+      "loss": 4.171,
+      "step": 443392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.273203687458357e-05,
+      "loss": 4.1759,
+      "step": 443904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.272365092707305e-05,
+      "loss": 4.1732,
+      "step": 444416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.271528135836626e-05,
+      "loss": 4.1763,
+      "step": 444928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.270689541085574e-05,
+      "loss": 4.1767,
+      "step": 445440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.269850946334522e-05,
+      "loss": 4.1756,
+      "step": 445952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.26901235158347e-05,
+      "loss": 4.1704,
+      "step": 446464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2681753947127915e-05,
+      "loss": 4.1781,
+      "step": 446976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2673367999617395e-05,
+      "loss": 4.1707,
+      "step": 447488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2664982052106875e-05,
+      "loss": 4.1785,
+      "step": 448000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2656596104596355e-05,
+      "loss": 4.1716,
+      "step": 448512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.2648226535889564e-05,
+      "loss": 4.1675,
+      "step": 449024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.263985696718277e-05,
+      "loss": 4.1526,
+      "step": 449536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.263147101967225e-05,
+      "loss": 4.1793,
+      "step": 450048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.262310145096547e-05,
+      "loss": 4.1798,
+      "step": 450560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.261471550345495e-05,
+      "loss": 4.1787,
+      "step": 451072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.260632955594442e-05,
+      "loss": 4.1751,
+      "step": 451584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.25979436084339e-05,
+      "loss": 4.1765,
+      "step": 452096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.258955766092338e-05,
+      "loss": 4.1699,
+      "step": 452608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.258117171341287e-05,
+      "loss": 4.183,
+      "step": 453120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.257278576590235e-05,
+      "loss": 4.164,
+      "step": 453632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.256439981839183e-05,
+      "loss": 4.1721,
+      "step": 454144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.255601387088131e-05,
+      "loss": 4.1691,
+      "step": 454656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.254762792337079e-05,
+      "loss": 4.1678,
+      "step": 455168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.253924197586027e-05,
+      "loss": 4.1775,
+      "step": 455680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.253085602834975e-05,
+      "loss": 4.1655,
+      "step": 456192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.252248645964296e-05,
+      "loss": 4.1748,
+      "step": 456704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.251410051213244e-05,
+      "loss": 4.165,
+      "step": 457216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 4.250571456462192e-05,
+      "loss": 4.1715,
+      "step": 457728
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.179671764373779,
+      "eval_runtime": 297.5058,
+      "eval_samples_per_second": 1282.634,
+      "eval_steps_per_second": 40.083,
+      "step": 457920
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.24973286171114e-05,
+      "loss": 4.1706,
+      "step": 458240
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.248894266960088e-05,
+      "loss": 4.1666,
+      "step": 458752
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.248055672209036e-05,
+      "loss": 4.1742,
+      "step": 459264
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.247217077457984e-05,
+      "loss": 4.1638,
+      "step": 459776
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.246378482706932e-05,
+      "loss": 4.1788,
+      "step": 460288
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.24553988795588e-05,
+      "loss": 4.1672,
+      "step": 460800
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.244704568965574e-05,
+      "loss": 4.1606,
+      "step": 461312
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.243865974214522e-05,
+      "loss": 4.1544,
+      "step": 461824
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.24302737946347e-05,
+      "loss": 4.1723,
+      "step": 462336
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.242188784712418e-05,
+      "loss": 4.1757,
+      "step": 462848
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.241350189961366e-05,
+      "loss": 4.166,
+      "step": 463360
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.2405115952103135e-05,
+      "loss": 4.1712,
+      "step": 463872
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.239674638339635e-05,
+      "loss": 4.1647,
+      "step": 464384
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.238836043588583e-05,
+      "loss": 4.1639,
+      "step": 464896
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.237997448837531e-05,
+      "loss": 4.1585,
+      "step": 465408
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.2371588540864784e-05,
+      "loss": 4.1553,
+      "step": 465920
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.236320259335427e-05,
+      "loss": 4.1639,
+      "step": 466432
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.235481664584375e-05,
+      "loss": 4.1599,
+      "step": 466944
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.234643069833323e-05,
+      "loss": 4.1666,
+      "step": 467456
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.233806112962644e-05,
+      "loss": 4.1709,
+      "step": 467968
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.232967518211592e-05,
+      "loss": 4.1748,
+      "step": 468480
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.23212892346054e-05,
+      "loss": 4.1656,
+      "step": 468992
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.231290328709488e-05,
+      "loss": 4.161,
+      "step": 469504
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.230451733958436e-05,
+      "loss": 4.1642,
+      "step": 470016
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.229613139207384e-05,
+      "loss": 4.159,
+      "step": 470528
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.228774544456332e-05,
+      "loss": 4.1608,
+      "step": 471040
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.22793594970528e-05,
+      "loss": 4.1416,
+      "step": 471552
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.227098992834601e-05,
+      "loss": 4.1693,
+      "step": 472064
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.226260398083549e-05,
+      "loss": 4.1579,
+      "step": 472576
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.2254234412128704e-05,
+      "loss": 4.1487,
+      "step": 473088
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.2245848464618184e-05,
+      "loss": 4.1584,
+      "step": 473600
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.2237462517107664e-05,
+      "loss": 4.1663,
+      "step": 474112
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.2229076569597144e-05,
+      "loss": 4.1614,
+      "step": 474624
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.2220690622086624e-05,
+      "loss": 4.1662,
+      "step": 475136
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.221232105337983e-05,
+      "loss": 4.1552,
+      "step": 475648
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.220393510586931e-05,
+      "loss": 4.1672,
+      "step": 476160
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.219554915835879e-05,
+      "loss": 4.1606,
+      "step": 476672
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.218716321084827e-05,
+      "loss": 4.1445,
+      "step": 477184
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.217877726333775e-05,
+      "loss": 4.1466,
+      "step": 477696
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.217039131582723e-05,
+      "loss": 4.1466,
+      "step": 478208
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.216200536831671e-05,
+      "loss": 4.1532,
+      "step": 478720
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.215361942080619e-05,
+      "loss": 4.162,
+      "step": 479232
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.214524985209941e-05,
+      "loss": 4.149,
+      "step": 479744
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.213688028339262e-05,
+      "loss": 4.1517,
+      "step": 480256
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.21284943358821e-05,
+      "loss": 4.1563,
+      "step": 480768
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.212010838837158e-05,
+      "loss": 4.1509,
+      "step": 481280
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.211172244086106e-05,
+      "loss": 4.167,
+      "step": 481792
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.210335287215427e-05,
+      "loss": 4.1489,
+      "step": 482304
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.209496692464375e-05,
+      "loss": 4.1242,
+      "step": 482816
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.208658097713323e-05,
+      "loss": 4.1663,
+      "step": 483328
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.207819502962271e-05,
+      "loss": 4.1598,
+      "step": 483840
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.206980908211219e-05,
+      "loss": 4.1606,
+      "step": 484352
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.2061439513405396e-05,
+      "loss": 4.1513,
+      "step": 484864
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.2053053565894876e-05,
+      "loss": 4.1472,
+      "step": 485376
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.204466761838436e-05,
+      "loss": 4.1457,
+      "step": 485888
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.203628167087384e-05,
+      "loss": 4.153,
+      "step": 486400
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.202789572336332e-05,
+      "loss": 4.1451,
+      "step": 486912
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.201952615465653e-05,
+      "loss": 4.1426,
+      "step": 487424
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.201114020714601e-05,
+      "loss": 4.1532,
+      "step": 487936
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.200275425963549e-05,
+      "loss": 4.1498,
+      "step": 488448
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.199436831212497e-05,
+      "loss": 4.1358,
+      "step": 488960
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1985982364614444e-05,
+      "loss": 4.1575,
+      "step": 489472
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.197761279590766e-05,
+      "loss": 4.1361,
+      "step": 489984
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.196922684839714e-05,
+      "loss": 4.1409,
+      "step": 490496
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.196084090088662e-05,
+      "loss": 4.1538,
+      "step": 491008
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.19524549533761e-05,
+      "loss": 4.1536,
+      "step": 491520
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.194406900586558e-05,
+      "loss": 4.1338,
+      "step": 492032
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.193568305835506e-05,
+      "loss": 4.145,
+      "step": 492544
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.192729711084454e-05,
+      "loss": 4.1269,
+      "step": 493056
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.191891116333402e-05,
+      "loss": 4.1509,
+      "step": 493568
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.19105252158235e-05,
+      "loss": 4.1501,
+      "step": 494080
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.190215564711671e-05,
+      "loss": 4.1503,
+      "step": 494592
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.189376969960619e-05,
+      "loss": 4.1448,
+      "step": 495104
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.188538375209567e-05,
+      "loss": 4.1538,
+      "step": 495616
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.187701418338888e-05,
+      "loss": 4.1617,
+      "step": 496128
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.186862823587836e-05,
+      "loss": 4.1376,
+      "step": 496640
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.186024228836784e-05,
+      "loss": 4.1497,
+      "step": 497152
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.185185634085732e-05,
+      "loss": 4.1535,
+      "step": 497664
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.18434703933468e-05,
+      "loss": 4.1336,
+      "step": 498176
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.183508444583628e-05,
+      "loss": 4.1489,
+      "step": 498688
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1826714877129494e-05,
+      "loss": 4.144,
+      "step": 499200
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1818328929618974e-05,
+      "loss": 4.1504,
+      "step": 499712
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1809942982108454e-05,
+      "loss": 4.1536,
+      "step": 500224
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1801557034597934e-05,
+      "loss": 4.1385,
+      "step": 500736
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1793171087087414e-05,
+      "loss": 4.137,
+      "step": 501248
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1784785139576894e-05,
+      "loss": 4.1438,
+      "step": 501760
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1776399192066373e-05,
+      "loss": 4.1492,
+      "step": 502272
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.1768013244555853e-05,
+      "loss": 4.1441,
+      "step": 502784
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 4.175966005465279e-05,
+      "loss": 4.1427,
+      "step": 503296
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.175127410714227e-05,
+      "loss": 4.1407,
+      "step": 503808
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.174288815963175e-05,
+      "loss": 4.1458,
+      "step": 504320
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.173450221212123e-05,
+      "loss": 4.1391,
+      "step": 504832
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.172611626461072e-05,
+      "loss": 4.1232,
+      "step": 505344
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.17177303171002e-05,
+      "loss": 4.1405,
+      "step": 505856
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.170936074839341e-05,
+      "loss": 4.1435,
+      "step": 506368
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.170097480088289e-05,
+      "loss": 4.1455,
+      "step": 506880
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.169258885337237e-05,
+      "loss": 4.1316,
+      "step": 507392
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.168420290586185e-05,
+      "loss": 4.1321,
+      "step": 507904
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.167581695835133e-05,
+      "loss": 4.1404,
+      "step": 508416
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1667447389644536e-05,
+      "loss": 4.1408,
+      "step": 508928
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1659061442134016e-05,
+      "loss": 4.1497,
+      "step": 509440
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1650675494623496e-05,
+      "loss": 4.1335,
+      "step": 509952
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1642289547112976e-05,
+      "loss": 4.1378,
+      "step": 510464
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1633903599602456e-05,
+      "loss": 4.1441,
+      "step": 510976
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1625517652091936e-05,
+      "loss": 4.1251,
+      "step": 511488
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1617131704581416e-05,
+      "loss": 4.1381,
+      "step": 512000
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.160876213587463e-05,
+      "loss": 4.1222,
+      "step": 512512
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1600376188364105e-05,
+      "loss": 4.1447,
+      "step": 513024
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1591990240853585e-05,
+      "loss": 4.125,
+      "step": 513536
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1583604293343065e-05,
+      "loss": 4.1413,
+      "step": 514048
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1575218345832545e-05,
+      "loss": 4.1311,
+      "step": 514560
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1566832398322025e-05,
+      "loss": 4.1306,
+      "step": 515072
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1558446450811505e-05,
+      "loss": 4.1349,
+      "step": 515584
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1550076882104714e-05,
+      "loss": 4.1384,
+      "step": 516096
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1541690934594194e-05,
+      "loss": 4.1346,
+      "step": 516608
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1533304987083674e-05,
+      "loss": 4.1484,
+      "step": 517120
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.152493541837689e-05,
+      "loss": 4.1337,
+      "step": 517632
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.151654947086637e-05,
+      "loss": 4.1492,
+      "step": 518144
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.150816352335585e-05,
+      "loss": 4.1308,
+      "step": 518656
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.149977757584533e-05,
+      "loss": 4.1327,
+      "step": 519168
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.149139162833481e-05,
+      "loss": 4.1335,
+      "step": 519680
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.148300568082429e-05,
+      "loss": 4.1311,
+      "step": 520192
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.147461973331377e-05,
+      "loss": 4.1302,
+      "step": 520704
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.146623378580325e-05,
+      "loss": 4.1366,
+      "step": 521216
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.145784783829273e-05,
+      "loss": 4.1321,
+      "step": 521728
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.144946189078221e-05,
+      "loss": 4.1372,
+      "step": 522240
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.144107594327169e-05,
+      "loss": 4.1323,
+      "step": 522752
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.14327063745649e-05,
+      "loss": 4.1379,
+      "step": 523264
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.142432042705438e-05,
+      "loss": 4.1318,
+      "step": 523776
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.141593447954386e-05,
+      "loss": 4.1336,
+      "step": 524288
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.140754853203334e-05,
+      "loss": 4.13,
+      "step": 524800
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.139916258452282e-05,
+      "loss": 4.1302,
+      "step": 525312
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1390793015816034e-05,
+      "loss": 4.1128,
+      "step": 525824
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1382407068305514e-05,
+      "loss": 4.1385,
+      "step": 526336
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1374021120794994e-05,
+      "loss": 4.1379,
+      "step": 526848
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.136563517328447e-05,
+      "loss": 4.1389,
+      "step": 527360
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.135724922577395e-05,
+      "loss": 4.1312,
+      "step": 527872
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.134886327826343e-05,
+      "loss": 4.1394,
+      "step": 528384
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.134047733075291e-05,
+      "loss": 4.1325,
+      "step": 528896
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.133209138324239e-05,
+      "loss": 4.1419,
+      "step": 529408
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1323721814535596e-05,
+      "loss": 4.1219,
+      "step": 529920
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1315335867025076e-05,
+      "loss": 4.1282,
+      "step": 530432
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.1306949919514556e-05,
+      "loss": 4.1313,
+      "step": 530944
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.129856397200404e-05,
+      "loss": 4.1307,
+      "step": 531456
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.129019440329725e-05,
+      "loss": 4.134,
+      "step": 531968
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.128182483459047e-05,
+      "loss": 4.1287,
+      "step": 532480
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.127343888707994e-05,
+      "loss": 4.1334,
+      "step": 532992
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.126505293956942e-05,
+      "loss": 4.1258,
+      "step": 533504
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 4.12566669920589e-05,
+      "loss": 4.1356,
+      "step": 534016
+    },
+    {
+      "epoch": 2.03,
+      "eval_loss": 4.148338317871094,
+      "eval_runtime": 296.5235,
+      "eval_samples_per_second": 1286.883,
+      "eval_steps_per_second": 40.216,
+      "step": 534240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.124828104454838e-05,
+      "loss": 4.1339,
+      "step": 534528
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.123989509703786e-05,
+      "loss": 4.1319,
+      "step": 535040
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.123150914952734e-05,
+      "loss": 4.1295,
+      "step": 535552
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.122312320201682e-05,
+      "loss": 4.1242,
+      "step": 536064
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.12147372545063e-05,
+      "loss": 4.1371,
+      "step": 536576
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.120635130699578e-05,
+      "loss": 4.1311,
+      "step": 537088
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.119796535948526e-05,
+      "loss": 4.1226,
+      "step": 537600
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.118957941197474e-05,
+      "loss": 4.1189,
+      "step": 538112
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.118119346446423e-05,
+      "loss": 4.1268,
+      "step": 538624
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.117280751695371e-05,
+      "loss": 4.1389,
+      "step": 539136
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.116442156944318e-05,
+      "loss": 4.1245,
+      "step": 539648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.115603562193266e-05,
+      "loss": 4.1312,
+      "step": 540160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1147666053225876e-05,
+      "loss": 4.131,
+      "step": 540672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1139280105715356e-05,
+      "loss": 4.1208,
+      "step": 541184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.113089415820483e-05,
+      "loss": 4.1233,
+      "step": 541696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.112250821069431e-05,
+      "loss": 4.1159,
+      "step": 542208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.111412226318379e-05,
+      "loss": 4.124,
+      "step": 542720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.110573631567327e-05,
+      "loss": 4.1198,
+      "step": 543232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.109735036816275e-05,
+      "loss": 4.1236,
+      "step": 543744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.108896442065223e-05,
+      "loss": 4.1351,
+      "step": 544256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.108057847314171e-05,
+      "loss": 4.1359,
+      "step": 544768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1072192525631196e-05,
+      "loss": 4.1268,
+      "step": 545280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1063806578120676e-05,
+      "loss": 4.1203,
+      "step": 545792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1055420630610156e-05,
+      "loss": 4.1223,
+      "step": 546304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1047051061903365e-05,
+      "loss": 4.123,
+      "step": 546816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1038681493196574e-05,
+      "loss": 4.1217,
+      "step": 547328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1030295545686054e-05,
+      "loss": 4.1064,
+      "step": 547840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1021909598175534e-05,
+      "loss": 4.1288,
+      "step": 548352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1013523650665014e-05,
+      "loss": 4.1221,
+      "step": 548864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 4.1005137703154494e-05,
+      "loss": 4.114,
+      "step": 549376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0996751755643974e-05,
+      "loss": 4.1162,
+      "step": 549888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0988365808133454e-05,
+      "loss": 4.131,
+      "step": 550400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0979979860622933e-05,
+      "loss": 4.1256,
+      "step": 550912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0971593913112413e-05,
+      "loss": 4.1249,
+      "step": 551424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.096322434440563e-05,
+      "loss": 4.1183,
+      "step": 551936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.095485477569884e-05,
+      "loss": 4.125,
+      "step": 552448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.094646882818832e-05,
+      "loss": 4.1254,
+      "step": 552960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.09380828806778e-05,
+      "loss": 4.108,
+      "step": 553472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.092969693316728e-05,
+      "loss": 4.1087,
+      "step": 553984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.092131098565676e-05,
+      "loss": 4.1088,
+      "step": 554496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.091292503814624e-05,
+      "loss": 4.1147,
+      "step": 555008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.090453909063572e-05,
+      "loss": 4.1213,
+      "step": 555520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.089616952192893e-05,
+      "loss": 4.1151,
+      "step": 556032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.088778357441841e-05,
+      "loss": 4.1133,
+      "step": 556544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.087939762690789e-05,
+      "loss": 4.1195,
+      "step": 557056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.087101167939737e-05,
+      "loss": 4.1144,
+      "step": 557568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.086264211069058e-05,
+      "loss": 4.1264,
+      "step": 558080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.085425616318006e-05,
+      "loss": 4.118,
+      "step": 558592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.084588659447327e-05,
+      "loss": 4.089,
+      "step": 559104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.083750064696275e-05,
+      "loss": 4.1257,
+      "step": 559616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.082911469945223e-05,
+      "loss": 4.1216,
+      "step": 560128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.082072875194171e-05,
+      "loss": 4.1264,
+      "step": 560640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.081234280443119e-05,
+      "loss": 4.1131,
+      "step": 561152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0803956856920665e-05,
+      "loss": 4.1089,
+      "step": 561664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.079558728821388e-05,
+      "loss": 4.1119,
+      "step": 562176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.078720134070336e-05,
+      "loss": 4.1137,
+      "step": 562688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0778815393192834e-05,
+      "loss": 4.1129,
+      "step": 563200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.077042944568232e-05,
+      "loss": 4.1017,
+      "step": 563712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.07620434981718e-05,
+      "loss": 4.1187,
+      "step": 564224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.075365755066128e-05,
+      "loss": 4.116,
+      "step": 564736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.074527160315076e-05,
+      "loss": 4.0988,
+      "step": 565248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.073690203444397e-05,
+      "loss": 4.1218,
+      "step": 565760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.072851608693345e-05,
+      "loss": 4.1003,
+      "step": 566272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.072013013942293e-05,
+      "loss": 4.1031,
+      "step": 566784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.071174419191241e-05,
+      "loss": 4.1174,
+      "step": 567296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.070335824440189e-05,
+      "loss": 4.1172,
+      "step": 567808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.069497229689137e-05,
+      "loss": 4.1012,
+      "step": 568320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.068658634938085e-05,
+      "loss": 4.1086,
+      "step": 568832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.067820040187033e-05,
+      "loss": 4.0902,
+      "step": 569344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.066983083316354e-05,
+      "loss": 4.1144,
+      "step": 569856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.066144488565302e-05,
+      "loss": 4.1139,
+      "step": 570368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0653075316946234e-05,
+      "loss": 4.1156,
+      "step": 570880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0644689369435714e-05,
+      "loss": 4.1073,
+      "step": 571392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0636303421925194e-05,
+      "loss": 4.1166,
+      "step": 571904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0627917474414674e-05,
+      "loss": 4.13,
+      "step": 572416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0619531526904154e-05,
+      "loss": 4.0994,
+      "step": 572928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0611145579393634e-05,
+      "loss": 4.1151,
+      "step": 573440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0602759631883114e-05,
+      "loss": 4.1179,
+      "step": 573952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0594373684372594e-05,
+      "loss": 4.1003,
+      "step": 574464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.0585987736862074e-05,
+      "loss": 4.1129,
+      "step": 574976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.057761816815528e-05,
+      "loss": 4.11,
+      "step": 575488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.056923222064476e-05,
+      "loss": 4.1134,
+      "step": 576000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.056084627313424e-05,
+      "loss": 4.1216,
+      "step": 576512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.055246032562372e-05,
+      "loss": 4.0985,
+      "step": 577024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.054410713572067e-05,
+      "loss": 4.1042,
+      "step": 577536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.053572118821015e-05,
+      "loss": 4.1074,
+      "step": 578048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.052733524069963e-05,
+      "loss": 4.1159,
+      "step": 578560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.051894929318911e-05,
+      "loss": 4.1064,
+      "step": 579072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.051057972448232e-05,
+      "loss": 4.1125,
+      "step": 579584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.05021937769718e-05,
+      "loss": 4.1085,
+      "step": 580096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.049380782946128e-05,
+      "loss": 4.1058,
+      "step": 580608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.048542188195076e-05,
+      "loss": 4.1074,
+      "step": 581120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.047703593444024e-05,
+      "loss": 4.0871,
+      "step": 581632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0468666365733446e-05,
+      "loss": 4.1039,
+      "step": 582144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0460280418222926e-05,
+      "loss": 4.1095,
+      "step": 582656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.045189447071241e-05,
+      "loss": 4.1126,
+      "step": 583168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.044350852320189e-05,
+      "loss": 4.0984,
+      "step": 583680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.043512257569137e-05,
+      "loss": 4.0955,
+      "step": 584192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.042675300698458e-05,
+      "loss": 4.1049,
+      "step": 584704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.041836705947406e-05,
+      "loss": 4.1068,
+      "step": 585216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.040998111196354e-05,
+      "loss": 4.1128,
+      "step": 585728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.040159516445302e-05,
+      "loss": 4.1043,
+      "step": 586240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0393209216942495e-05,
+      "loss": 4.1063,
+      "step": 586752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0384823269431975e-05,
+      "loss": 4.104,
+      "step": 587264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0376437321921454e-05,
+      "loss": 4.0966,
+      "step": 587776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0368051374410934e-05,
+      "loss": 4.099,
+      "step": 588288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.035968180570415e-05,
+      "loss": 4.0899,
+      "step": 588800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.035129585819363e-05,
+      "loss": 4.1118,
+      "step": 589312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0342926289486846e-05,
+      "loss": 4.091,
+      "step": 589824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0334540341976326e-05,
+      "loss": 4.1068,
+      "step": 590336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.03261543944658e-05,
+      "loss": 4.0982,
+      "step": 590848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.031776844695528e-05,
+      "loss": 4.1014,
+      "step": 591360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.030938249944476e-05,
+      "loss": 4.099,
+      "step": 591872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.030099655193424e-05,
+      "loss": 4.1027,
+      "step": 592384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.029261060442372e-05,
+      "loss": 4.1011,
+      "step": 592896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.02842246569132e-05,
+      "loss": 4.1161,
+      "step": 593408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.027585508820641e-05,
+      "loss": 4.0989,
+      "step": 593920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.026748551949962e-05,
+      "loss": 4.1145,
+      "step": 594432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0259099571989104e-05,
+      "loss": 4.0958,
+      "step": 594944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0250713624478584e-05,
+      "loss": 4.0994,
+      "step": 595456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0242327676968064e-05,
+      "loss": 4.1002,
+      "step": 595968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0233941729457544e-05,
+      "loss": 4.1017,
+      "step": 596480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0225555781947024e-05,
+      "loss": 4.093,
+      "step": 596992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0217169834436504e-05,
+      "loss": 4.1038,
+      "step": 597504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0208783886925984e-05,
+      "loss": 4.0958,
+      "step": 598016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0200397939415464e-05,
+      "loss": 4.1036,
+      "step": 598528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0192011991904944e-05,
+      "loss": 4.1027,
+      "step": 599040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.018364242319815e-05,
+      "loss": 4.0979,
+      "step": 599552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.017525647568763e-05,
+      "loss": 4.1015,
+      "step": 600064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.016687052817711e-05,
+      "loss": 4.1066,
+      "step": 600576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.015848458066659e-05,
+      "loss": 4.0934,
+      "step": 601088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.015013139076354e-05,
+      "loss": 4.0985,
+      "step": 601600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.014174544325302e-05,
+      "loss": 4.0779,
+      "step": 602112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.01333594957425e-05,
+      "loss": 4.1052,
+      "step": 602624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.012497354823198e-05,
+      "loss": 4.1058,
+      "step": 603136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.011658760072146e-05,
+      "loss": 4.1074,
+      "step": 603648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.010820165321094e-05,
+      "loss": 4.0996,
+      "step": 604160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0099832084504146e-05,
+      "loss": 4.1054,
+      "step": 604672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0091446136993626e-05,
+      "loss": 4.1002,
+      "step": 605184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0083060189483106e-05,
+      "loss": 4.1069,
+      "step": 605696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0074674241972586e-05,
+      "loss": 4.0898,
+      "step": 606208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0066288294462066e-05,
+      "loss": 4.0984,
+      "step": 606720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0057902346951546e-05,
+      "loss": 4.0963,
+      "step": 607232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0049516399441026e-05,
+      "loss": 4.1004,
+      "step": 607744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.0041146830734235e-05,
+      "loss": 4.1017,
+      "step": 608256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.003276088322372e-05,
+      "loss": 4.0923,
+      "step": 608768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.00243749357132e-05,
+      "loss": 4.1032,
+      "step": 609280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.001598898820268e-05,
+      "loss": 4.0923,
+      "step": 609792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.000760304069216e-05,
+      "loss": 4.1053,
+      "step": 610304
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.123390197753906,
+      "eval_runtime": 301.8228,
+      "eval_samples_per_second": 1264.288,
+      "eval_steps_per_second": 39.51,
+      "step": 610560
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9999217093181635e-05,
+      "loss": 4.0888,
+      "step": 610816
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9990831145671115e-05,
+      "loss": 4.0986,
+      "step": 611328
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9982445198160595e-05,
+      "loss": 4.0979,
+      "step": 611840
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9974075629453804e-05,
+      "loss": 4.095,
+      "step": 612352
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9965689681943284e-05,
+      "loss": 4.1072,
+      "step": 612864
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.99573201132365e-05,
+      "loss": 4.0991,
+      "step": 613376
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.994893416572598e-05,
+      "loss": 4.0886,
+      "step": 613888
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.994054821821546e-05,
+      "loss": 4.0852,
+      "step": 614400
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.993216227070494e-05,
+      "loss": 4.094,
+      "step": 614912
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.992377632319442e-05,
+      "loss": 4.1068,
+      "step": 615424
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.99153903756839e-05,
+      "loss": 4.0974,
+      "step": 615936
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.990702080697711e-05,
+      "loss": 4.096,
+      "step": 616448
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9898651238270325e-05,
+      "loss": 4.0979,
+      "step": 616960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9890265290759805e-05,
+      "loss": 4.0889,
+      "step": 617472
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.988187934324928e-05,
+      "loss": 4.0875,
+      "step": 617984
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.987349339573876e-05,
+      "loss": 4.084,
+      "step": 618496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.986510744822824e-05,
+      "loss": 4.0945,
+      "step": 619008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.985672150071772e-05,
+      "loss": 4.0881,
+      "step": 619520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.98483355532072e-05,
+      "loss": 4.0947,
+      "step": 620032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.983994960569668e-05,
+      "loss": 4.1029,
+      "step": 620544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.983156365818616e-05,
+      "loss": 4.1034,
+      "step": 621056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9823177710675644e-05,
+      "loss": 4.0928,
+      "step": 621568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.9814791763165124e-05,
+      "loss": 4.0934,
+      "step": 622080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.980642219445833e-05,
+      "loss": 4.0905,
+      "step": 622592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.979803624694781e-05,
+      "loss": 4.0893,
+      "step": 623104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.978965029943729e-05,
+      "loss": 4.0902,
+      "step": 623616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.97812807307305e-05,
+      "loss": 4.0777,
+      "step": 624128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.977289478321998e-05,
+      "loss": 4.0994,
+      "step": 624640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.976450883570946e-05,
+      "loss": 4.0909,
+      "step": 625152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.975612288819894e-05,
+      "loss": 4.0786,
+      "step": 625664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.974773694068842e-05,
+      "loss": 4.0844,
+      "step": 626176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.97393509931779e-05,
+      "loss": 4.0965,
+      "step": 626688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.973096504566738e-05,
+      "loss": 4.0985,
+      "step": 627200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.972257909815686e-05,
+      "loss": 4.0939,
+      "step": 627712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.971420952945008e-05,
+      "loss": 4.0885,
+      "step": 628224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.970582358193956e-05,
+      "loss": 4.0878,
+      "step": 628736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.969743763442904e-05,
+      "loss": 4.0998,
+      "step": 629248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.968905168691852e-05,
+      "loss": 4.0775,
+      "step": 629760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.968068211821173e-05,
+      "loss": 4.0764,
+      "step": 630272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.967229617070121e-05,
+      "loss": 4.0811,
+      "step": 630784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.966391022319069e-05,
+      "loss": 4.0823,
+      "step": 631296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.965552427568017e-05,
+      "loss": 4.0905,
+      "step": 631808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.964713832816964e-05,
+      "loss": 4.0797,
+      "step": 632320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.963875238065912e-05,
+      "loss": 4.0906,
+      "step": 632832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.96303664331486e-05,
+      "loss": 4.0853,
+      "step": 633344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.962198048563808e-05,
+      "loss": 4.0823,
+      "step": 633856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9613610916931296e-05,
+      "loss": 4.0998,
+      "step": 634368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9605224969420776e-05,
+      "loss": 4.0853,
+      "step": 634880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9596839021910256e-05,
+      "loss": 4.0625,
+      "step": 635392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9588453074399736e-05,
+      "loss": 4.0935,
+      "step": 635904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9580083505692945e-05,
+      "loss": 4.0877,
+      "step": 636416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.957174669459362e-05,
+      "loss": 4.0966,
+      "step": 636928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.95633607470831e-05,
+      "loss": 4.0841,
+      "step": 637440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.955497479957258e-05,
+      "loss": 4.0784,
+      "step": 637952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.954658885206206e-05,
+      "loss": 4.0809,
+      "step": 638464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.953820290455154e-05,
+      "loss": 4.0803,
+      "step": 638976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.952981695704102e-05,
+      "loss": 4.0865,
+      "step": 639488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.95214310095305e-05,
+      "loss": 4.0693,
+      "step": 640000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9513045062019985e-05,
+      "loss": 4.0914,
+      "step": 640512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9504659114509465e-05,
+      "loss": 4.0852,
+      "step": 641024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.949627316699894e-05,
+      "loss": 4.0703,
+      "step": 641536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.948788721948842e-05,
+      "loss": 4.0911,
+      "step": 642048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.94795012719779e-05,
+      "loss": 4.0703,
+      "step": 642560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.947111532446738e-05,
+      "loss": 4.0728,
+      "step": 643072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.946274575576059e-05,
+      "loss": 4.0851,
+      "step": 643584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.945435980825007e-05,
+      "loss": 4.0907,
+      "step": 644096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.944597386073955e-05,
+      "loss": 4.067,
+      "step": 644608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.943758791322903e-05,
+      "loss": 4.0828,
+      "step": 645120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.942920196571851e-05,
+      "loss": 4.0626,
+      "step": 645632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.942081601820799e-05,
+      "loss": 4.0851,
+      "step": 646144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.941243007069747e-05,
+      "loss": 4.0851,
+      "step": 646656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9404044123186954e-05,
+      "loss": 4.0773,
+      "step": 647168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9395658175676434e-05,
+      "loss": 4.084,
+      "step": 647680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.938728860696964e-05,
+      "loss": 4.0846,
+      "step": 648192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.937890265945912e-05,
+      "loss": 4.1034,
+      "step": 648704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.93705167119486e-05,
+      "loss": 4.0679,
+      "step": 649216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.936213076443808e-05,
+      "loss": 4.0859,
+      "step": 649728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.935376119573129e-05,
+      "loss": 4.084,
+      "step": 650240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.934537524822077e-05,
+      "loss": 4.0761,
+      "step": 650752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.933700567951398e-05,
+      "loss": 4.0821,
+      "step": 651264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.932861973200346e-05,
+      "loss": 4.0785,
+      "step": 651776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.932023378449294e-05,
+      "loss": 4.0842,
+      "step": 652288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.931184783698242e-05,
+      "loss": 4.0935,
+      "step": 652800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.930346188947191e-05,
+      "loss": 4.0694,
+      "step": 653312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.929507594196139e-05,
+      "loss": 4.075,
+      "step": 653824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.928668999445087e-05,
+      "loss": 4.0771,
+      "step": 654336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9278320425744076e-05,
+      "loss": 4.0894,
+      "step": 654848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9269934478233556e-05,
+      "loss": 4.073,
+      "step": 655360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.9261564909526766e-05,
+      "loss": 4.0824,
+      "step": 655872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9253178962016245e-05,
+      "loss": 4.0818,
+      "step": 656384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9244793014505725e-05,
+      "loss": 4.0772,
+      "step": 656896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9236407066995205e-05,
+      "loss": 4.077,
+      "step": 657408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9228021119484685e-05,
+      "loss": 4.0594,
+      "step": 657920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9219635171974165e-05,
+      "loss": 4.0743,
+      "step": 658432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9211249224463645e-05,
+      "loss": 4.0791,
+      "step": 658944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9202863276953125e-05,
+      "loss": 4.0844,
+      "step": 659456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9194477329442605e-05,
+      "loss": 4.072,
+      "step": 659968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9186091381932085e-05,
+      "loss": 4.0656,
+      "step": 660480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.91777218132253e-05,
+      "loss": 4.0735,
+      "step": 660992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9169335865714774e-05,
+      "loss": 4.0798,
+      "step": 661504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9160949918204254e-05,
+      "loss": 4.0842,
+      "step": 662016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9152563970693734e-05,
+      "loss": 4.0749,
+      "step": 662528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9144178023183214e-05,
+      "loss": 4.0799,
+      "step": 663040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.913580845447642e-05,
+      "loss": 4.0728,
+      "step": 663552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.91274225069659e-05,
+      "loss": 4.0735,
+      "step": 664064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.911903655945538e-05,
+      "loss": 4.0661,
+      "step": 664576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.91106669907486e-05,
+      "loss": 4.0681,
+      "step": 665088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.910228104323808e-05,
+      "loss": 4.0846,
+      "step": 665600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.909389509572756e-05,
+      "loss": 4.0627,
+      "step": 666112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.908550914821704e-05,
+      "loss": 4.0761,
+      "step": 666624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.907712320070652e-05,
+      "loss": 4.0656,
+      "step": 667136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.9068737253196e-05,
+      "loss": 4.0745,
+      "step": 667648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.906035130568548e-05,
+      "loss": 4.0714,
+      "step": 668160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.905196535817496e-05,
+      "loss": 4.074,
+      "step": 668672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.904357941066444e-05,
+      "loss": 4.0731,
+      "step": 669184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.903519346315392e-05,
+      "loss": 4.0863,
+      "step": 669696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.90268075156434e-05,
+      "loss": 4.0737,
+      "step": 670208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.901842156813288e-05,
+      "loss": 4.0827,
+      "step": 670720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.901005199942609e-05,
+      "loss": 4.0701,
+      "step": 671232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.900166605191557e-05,
+      "loss": 4.0733,
+      "step": 671744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.899328010440505e-05,
+      "loss": 4.0747,
+      "step": 672256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.898489415689453e-05,
+      "loss": 4.0742,
+      "step": 672768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.897652458818774e-05,
+      "loss": 4.0654,
+      "step": 673280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.896813864067722e-05,
+      "loss": 4.0748,
+      "step": 673792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.89597526931667e-05,
+      "loss": 4.0691,
+      "step": 674304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.895136674565618e-05,
+      "loss": 4.0766,
+      "step": 674816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.894298079814566e-05,
+      "loss": 4.072,
+      "step": 675328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.893461122943887e-05,
+      "loss": 4.0777,
+      "step": 675840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.892622528192835e-05,
+      "loss": 4.0712,
+      "step": 676352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.891783933441783e-05,
+      "loss": 4.081,
+      "step": 676864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.890945338690731e-05,
+      "loss": 4.0635,
+      "step": 677376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.890108381820052e-05,
+      "loss": 4.0723,
+      "step": 677888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.889269787069e-05,
+      "loss": 4.0468,
+      "step": 678400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.888431192317948e-05,
+      "loss": 4.0801,
+      "step": 678912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.887592597566896e-05,
+      "loss": 4.0785,
+      "step": 679424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.886755640696218e-05,
+      "loss": 4.0814,
+      "step": 679936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.885917045945166e-05,
+      "loss": 4.0693,
+      "step": 680448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.885078451194114e-05,
+      "loss": 4.0809,
+      "step": 680960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8842414943234346e-05,
+      "loss": 4.071,
+      "step": 681472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8834028995723826e-05,
+      "loss": 4.084,
+      "step": 681984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8825643048213306e-05,
+      "loss": 4.0639,
+      "step": 682496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8817257100702786e-05,
+      "loss": 4.0689,
+      "step": 683008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.880887115319226e-05,
+      "loss": 4.0687,
+      "step": 683520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.880048520568174e-05,
+      "loss": 4.0794,
+      "step": 684032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.879209925817122e-05,
+      "loss": 4.0675,
+      "step": 684544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.87837133106607e-05,
+      "loss": 4.0656,
+      "step": 685056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8775343741953915e-05,
+      "loss": 4.0752,
+      "step": 685568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.8766957794443395e-05,
+      "loss": 4.0682,
+      "step": 686080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.875858822573661e-05,
+      "loss": 4.0757,
+      "step": 686592
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.1041579246521,
+      "eval_runtime": 301.1615,
+      "eval_samples_per_second": 1267.064,
+      "eval_steps_per_second": 39.597,
+      "step": 686880
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8750202278226084e-05,
+      "loss": 4.0618,
+      "step": 687104
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8741816330715564e-05,
+      "loss": 4.0752,
+      "step": 687616
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8733430383205044e-05,
+      "loss": 4.0678,
+      "step": 688128
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8725044435694524e-05,
+      "loss": 4.0712,
+      "step": 688640
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8716658488184004e-05,
+      "loss": 4.0821,
+      "step": 689152
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8708272540673483e-05,
+      "loss": 4.0701,
+      "step": 689664
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8699886593162963e-05,
+      "loss": 4.0644,
+      "step": 690176
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.869153340325991e-05,
+      "loss": 4.0595,
+      "step": 690688
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.868314745574939e-05,
+      "loss": 4.069,
+      "step": 691200
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.867476150823887e-05,
+      "loss": 4.0807,
+      "step": 691712
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.866637556072835e-05,
+      "loss": 4.0679,
+      "step": 692224
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.865798961321783e-05,
+      "loss": 4.0708,
+      "step": 692736
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.864962004451104e-05,
+      "loss": 4.073,
+      "step": 693248
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.864123409700052e-05,
+      "loss": 4.0634,
+      "step": 693760
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.863284814949e-05,
+      "loss": 4.0618,
+      "step": 694272
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.862446220197948e-05,
+      "loss": 4.0569,
+      "step": 694784
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.861607625446896e-05,
+      "loss": 4.0686,
+      "step": 695296
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.860769030695844e-05,
+      "loss": 4.0634,
+      "step": 695808
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.859930435944792e-05,
+      "loss": 4.062,
+      "step": 696320
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8590934790741126e-05,
+      "loss": 4.0748,
+      "step": 696832
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.8582548843230606e-05,
+      "loss": 4.0759,
+      "step": 697344
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.857416289572009e-05,
+      "loss": 4.0682,
+      "step": 697856
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.856577694820957e-05,
+      "loss": 4.0668,
+      "step": 698368
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.855739100069905e-05,
+      "loss": 4.0655,
+      "step": 698880
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.854900505318853e-05,
+      "loss": 4.0651,
+      "step": 699392
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.854061910567801e-05,
+      "loss": 4.0641,
+      "step": 699904
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.853224953697122e-05,
+      "loss": 4.0492,
+      "step": 700416
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.85238635894607e-05,
+      "loss": 4.0699,
+      "step": 700928
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.851549402075391e-05,
+      "loss": 4.0692,
+      "step": 701440
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 3.850710807324339e-05,
+      "loss": 4.0527,
+      "step": 701952
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.849872212573287e-05,
+      "loss": 4.0599,
+      "step": 702464
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.849033617822235e-05,
+      "loss": 4.0664,
+      "step": 702976
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.848195023071183e-05,
+      "loss": 4.0714,
+      "step": 703488
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.847356428320131e-05,
+      "loss": 4.0718,
+      "step": 704000
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.846517833569079e-05,
+      "loss": 4.0667,
+      "step": 704512
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.845679238818027e-05,
+      "loss": 4.0583,
+      "step": 705024
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.844840644066975e-05,
+      "loss": 4.0755,
+      "step": 705536
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.844002049315923e-05,
+      "loss": 4.0499,
+      "step": 706048
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.843163454564871e-05,
+      "loss": 4.0512,
+      "step": 706560
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.842324859813819e-05,
+      "loss": 4.0548,
+      "step": 707072
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.841486265062767e-05,
+      "loss": 4.0581,
+      "step": 707584
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.840649308192088e-05,
+      "loss": 4.063,
+      "step": 708096
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.839810713441036e-05,
+      "loss": 4.0558,
+      "step": 708608
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.838972118689984e-05,
+      "loss": 4.0645,
+      "step": 709120
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.838133523938932e-05,
+      "loss": 4.0614,
+      "step": 709632
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.83729492918788e-05,
+      "loss": 4.0564,
+      "step": 710144
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.836457972317201e-05,
+      "loss": 4.0689,
+      "step": 710656
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8356193775661495e-05,
+      "loss": 4.065,
+      "step": 711168
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8347807828150975e-05,
+      "loss": 4.0378,
+      "step": 711680
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8339438259444184e-05,
+      "loss": 4.0635,
+      "step": 712192
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8331052311933664e-05,
+      "loss": 4.0644,
+      "step": 712704
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8322666364423144e-05,
+      "loss": 4.0722,
+      "step": 713216
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8314280416912624e-05,
+      "loss": 4.0584,
+      "step": 713728
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.830591084820583e-05,
+      "loss": 4.0562,
+      "step": 714240
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.829752490069531e-05,
+      "loss": 4.0515,
+      "step": 714752
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.828915533198852e-05,
+      "loss": 4.0571,
+      "step": 715264
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8280769384478e-05,
+      "loss": 4.0599,
+      "step": 715776
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.827238343696748e-05,
+      "loss": 4.0465,
+      "step": 716288
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.826399748945696e-05,
+      "loss": 4.0637,
+      "step": 716800
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.825561154194645e-05,
+      "loss": 4.0605,
+      "step": 717312
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.824722559443593e-05,
+      "loss": 4.0504,
+      "step": 717824
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.823883964692541e-05,
+      "loss": 4.061,
+      "step": 718336
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.823045369941489e-05,
+      "loss": 4.0471,
+      "step": 718848
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.822206775190437e-05,
+      "loss": 4.0485,
+      "step": 719360
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.821368180439385e-05,
+      "loss": 4.0549,
+      "step": 719872
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.820529585688333e-05,
+      "loss": 4.0658,
+      "step": 720384
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.819690990937281e-05,
+      "loss": 4.0469,
+      "step": 720896
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.818854034066602e-05,
+      "loss": 4.0591,
+      "step": 721408
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.81801543931555e-05,
+      "loss": 4.0339,
+      "step": 721920
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8171784824448707e-05,
+      "loss": 4.06,
+      "step": 722432
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8163398876938187e-05,
+      "loss": 4.0591,
+      "step": 722944
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8155012929427666e-05,
+      "loss": 4.0596,
+      "step": 723456
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8146626981917146e-05,
+      "loss": 4.0547,
+      "step": 723968
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.813824103440663e-05,
+      "loss": 4.0606,
+      "step": 724480
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8129855086896106e-05,
+      "loss": 4.0756,
+      "step": 724992
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.812148551818932e-05,
+      "loss": 4.0485,
+      "step": 725504
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.81130995706788e-05,
+      "loss": 4.0606,
+      "step": 726016
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.810471362316828e-05,
+      "loss": 4.0521,
+      "step": 726528
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8096327675657755e-05,
+      "loss": 4.0545,
+      "step": 727040
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8087941728147235e-05,
+      "loss": 4.0559,
+      "step": 727552
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8079555780636715e-05,
+      "loss": 4.053,
+      "step": 728064
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8071186211929924e-05,
+      "loss": 4.0626,
+      "step": 728576
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.8062800264419404e-05,
+      "loss": 4.0695,
+      "step": 729088
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.805443069571262e-05,
+      "loss": 4.0435,
+      "step": 729600
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.80460447482021e-05,
+      "loss": 4.052,
+      "step": 730112
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.803765880069158e-05,
+      "loss": 4.0514,
+      "step": 730624
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.802927285318106e-05,
+      "loss": 4.065,
+      "step": 731136
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.802088690567054e-05,
+      "loss": 4.048,
+      "step": 731648
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 3.801251733696375e-05,
+      "loss": 4.0623,
+      "step": 732160
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.800413138945323e-05,
+      "loss": 4.0538,
+      "step": 732672
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.799574544194271e-05,
+      "loss": 4.0519,
+      "step": 733184
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.798735949443219e-05,
+      "loss": 4.051,
+      "step": 733696
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.797897354692167e-05,
+      "loss": 4.0368,
+      "step": 734208
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.797058759941115e-05,
+      "loss": 4.0484,
+      "step": 734720
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.796220165190063e-05,
+      "loss": 4.057,
+      "step": 735232
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.795381570439011e-05,
+      "loss": 4.064,
+      "step": 735744
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.794542975687959e-05,
+      "loss": 4.0421,
+      "step": 736256
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.793704380936907e-05,
+      "loss": 4.0461,
+      "step": 736768
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.792865786185855e-05,
+      "loss": 4.0492,
+      "step": 737280
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7920271914348035e-05,
+      "loss": 4.0541,
+      "step": 737792
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7911902345641244e-05,
+      "loss": 4.0593,
+      "step": 738304
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7903516398130724e-05,
+      "loss": 4.0496,
+      "step": 738816
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7895130450620204e-05,
+      "loss": 4.0545,
+      "step": 739328
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7886744503109684e-05,
+      "loss": 4.0514,
+      "step": 739840
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7878358555599164e-05,
+      "loss": 4.0499,
+      "step": 740352
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.786997260808864e-05,
+      "loss": 4.0393,
+      "step": 740864
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.786160303938185e-05,
+      "loss": 4.0443,
+      "step": 741376
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.785323347067506e-05,
+      "loss": 4.0554,
+      "step": 741888
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.784484752316454e-05,
+      "loss": 4.0452,
+      "step": 742400
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.783646157565402e-05,
+      "loss": 4.0505,
+      "step": 742912
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.78280756281435e-05,
+      "loss": 4.0399,
+      "step": 743424
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.781968968063299e-05,
+      "loss": 4.0501,
+      "step": 743936
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.78113201119262e-05,
+      "loss": 4.048,
+      "step": 744448
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.780293416441568e-05,
+      "loss": 4.0503,
+      "step": 744960
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.779454821690516e-05,
+      "loss": 4.0526,
+      "step": 745472
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.778616226939464e-05,
+      "loss": 4.0639,
+      "step": 745984
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.777777632188411e-05,
+      "loss": 4.0505,
+      "step": 746496
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.776940675317733e-05,
+      "loss": 4.0543,
+      "step": 747008
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.776102080566681e-05,
+      "loss": 4.0525,
+      "step": 747520
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.775263485815629e-05,
+      "loss": 4.044,
+      "step": 748032
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.774424891064576e-05,
+      "loss": 4.055,
+      "step": 748544
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.773586296313524e-05,
+      "loss": 4.0454,
+      "step": 749056
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.772747701562473e-05,
+      "loss": 4.0437,
+      "step": 749568
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.771909106811421e-05,
+      "loss": 4.0516,
+      "step": 750080
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.771070512060369e-05,
+      "loss": 4.0474,
+      "step": 750592
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.770235193070063e-05,
+      "loss": 4.0523,
+      "step": 751104
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.769396598319011e-05,
+      "loss": 4.0454,
+      "step": 751616
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.768559641448332e-05,
+      "loss": 4.0567,
+      "step": 752128
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.76772104669728e-05,
+      "loss": 4.0445,
+      "step": 752640
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.766882451946228e-05,
+      "loss": 4.0583,
+      "step": 753152
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.766043857195176e-05,
+      "loss": 4.0433,
+      "step": 753664
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7652052624441234e-05,
+      "loss": 4.0461,
+      "step": 754176
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7643666676930714e-05,
+      "loss": 4.023,
+      "step": 754688
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7635280729420194e-05,
+      "loss": 4.0579,
+      "step": 755200
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.762689478190968e-05,
+      "loss": 4.0509,
+      "step": 755712
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.761850883439916e-05,
+      "loss": 4.06,
+      "step": 756224
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.761012288688864e-05,
+      "loss": 4.0448,
+      "step": 756736
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.760173693937812e-05,
+      "loss": 4.057,
+      "step": 757248
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.75933509918676e-05,
+      "loss": 4.0471,
+      "step": 757760
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.758498142316081e-05,
+      "loss": 4.0642,
+      "step": 758272
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.757659547565029e-05,
+      "loss": 4.0389,
+      "step": 758784
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.756820952813977e-05,
+      "loss": 4.046,
+      "step": 759296
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.755982358062925e-05,
+      "loss": 4.0442,
+      "step": 759808
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.755143763311873e-05,
+      "loss": 4.0547,
+      "step": 760320
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.754306806441194e-05,
+      "loss": 4.0442,
+      "step": 760832
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.753468211690142e-05,
+      "loss": 4.0437,
+      "step": 761344
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7526312548194634e-05,
+      "loss": 4.0481,
+      "step": 761856
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7517926600684114e-05,
+      "loss": 4.0502,
+      "step": 762368
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 3.7509540653173594e-05,
+      "loss": 4.0511,
+      "step": 762880
+    },
+    {
+      "epoch": 2.03,
+      "eval_loss": 4.087827205657959,
+      "eval_runtime": 300.3169,
+      "eval_samples_per_second": 1270.628,
+      "eval_steps_per_second": 39.708,
+      "step": 763200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7501154705663074e-05,
+      "loss": 4.0383,
+      "step": 763392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7492768758152554e-05,
+      "loss": 4.0522,
+      "step": 763904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7484382810642034e-05,
+      "loss": 4.0478,
+      "step": 764416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7475996863131514e-05,
+      "loss": 4.0462,
+      "step": 764928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7467610915620994e-05,
+      "loss": 4.0573,
+      "step": 765440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7459224968110474e-05,
+      "loss": 4.0474,
+      "step": 765952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.745083902059995e-05,
+      "loss": 4.0459,
+      "step": 766464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.744245307308943e-05,
+      "loss": 4.0352,
+      "step": 766976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.743406712557891e-05,
+      "loss": 4.0428,
+      "step": 767488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.742568117806839e-05,
+      "loss": 4.0521,
+      "step": 768000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.741729523055787e-05,
+      "loss": 4.0503,
+      "step": 768512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.740890928304735e-05,
+      "loss": 4.0481,
+      "step": 769024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.740053971434056e-05,
+      "loss": 4.0514,
+      "step": 769536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.739215376683004e-05,
+      "loss": 4.0392,
+      "step": 770048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.738376781931952e-05,
+      "loss": 4.0397,
+      "step": 770560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.7375381871809e-05,
+      "loss": 4.0316,
+      "step": 771072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.736699592429848e-05,
+      "loss": 4.0501,
+      "step": 771584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.735860997678796e-05,
+      "loss": 4.0418,
+      "step": 772096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.735022402927744e-05,
+      "loss": 4.0382,
+      "step": 772608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.734185446057065e-05,
+      "loss": 4.05,
+      "step": 773120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.733346851306013e-05,
+      "loss": 4.0584,
+      "step": 773632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.732508256554961e-05,
+      "loss": 4.0451,
+      "step": 774144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.731669661803909e-05,
+      "loss": 4.0425,
+      "step": 774656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.730831067052857e-05,
+      "loss": 4.0455,
+      "step": 775168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.729992472301805e-05,
+      "loss": 4.0425,
+      "step": 775680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.729153877550753e-05,
+      "loss": 4.0422,
+      "step": 776192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.728315282799701e-05,
+      "loss": 4.0254,
+      "step": 776704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.727476688048649e-05,
+      "loss": 4.045,
+      "step": 777216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.726638093297597e-05,
+      "loss": 4.0475,
+      "step": 777728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.725799498546545e-05,
+      "loss": 4.0302,
+      "step": 778240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.724960903795493e-05,
+      "loss": 4.0385,
+      "step": 778752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.724123946924814e-05,
+      "loss": 4.0462,
+      "step": 779264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.723285352173762e-05,
+      "loss": 4.0437,
+      "step": 779776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.72244675742271e-05,
+      "loss": 4.0523,
+      "step": 780288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.721608162671658e-05,
+      "loss": 4.0433,
+      "step": 780800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.720771205800979e-05,
+      "loss": 4.0365,
+      "step": 781312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.719932611049927e-05,
+      "loss": 4.0567,
+      "step": 781824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.719094016298875e-05,
+      "loss": 4.0236,
+      "step": 782336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7182554215478236e-05,
+      "loss": 4.0313,
+      "step": 782848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7174168267967716e-05,
+      "loss": 4.0366,
+      "step": 783360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7165782320457196e-05,
+      "loss": 4.0339,
+      "step": 783872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7157396372946676e-05,
+      "loss": 4.0386,
+      "step": 784384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7149010425436155e-05,
+      "loss": 4.0363,
+      "step": 784896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7140640856729365e-05,
+      "loss": 4.0416,
+      "step": 785408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7132254909218845e-05,
+      "loss": 4.0377,
+      "step": 785920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7123868961708324e-05,
+      "loss": 4.0349,
+      "step": 786432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7115483014197804e-05,
+      "loss": 4.0463,
+      "step": 786944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7107097066687284e-05,
+      "loss": 4.042,
+      "step": 787456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7098727497980494e-05,
+      "loss": 4.0185,
+      "step": 787968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7090341550469973e-05,
+      "loss": 4.0408,
+      "step": 788480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.7081955602959453e-05,
+      "loss": 4.0436,
+      "step": 788992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.707356965544893e-05,
+      "loss": 4.0514,
+      "step": 789504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.706518370793842e-05,
+      "loss": 4.0357,
+      "step": 790016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.705681413923163e-05,
+      "loss": 4.0367,
+      "step": 790528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.704842819172111e-05,
+      "loss": 4.0285,
+      "step": 791040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.704004224421059e-05,
+      "loss": 4.0322,
+      "step": 791552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.703165629670007e-05,
+      "loss": 4.0414,
+      "step": 792064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.702327034918955e-05,
+      "loss": 4.0242,
+      "step": 792576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.701488440167902e-05,
+      "loss": 4.0391,
+      "step": 793088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.700651483297224e-05,
+      "loss": 4.0391,
+      "step": 793600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.699812888546172e-05,
+      "loss": 4.032,
+      "step": 794112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.69897429379512e-05,
+      "loss": 4.0376,
+      "step": 794624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.698135699044067e-05,
+      "loss": 4.0254,
+      "step": 795136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.697297104293016e-05,
+      "loss": 4.0298,
+      "step": 795648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.696458509541964e-05,
+      "loss": 4.0331,
+      "step": 796160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.695619914790912e-05,
+      "loss": 4.0434,
+      "step": 796672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.694782957920233e-05,
+      "loss": 4.0259,
+      "step": 797184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.693944363169181e-05,
+      "loss": 4.0386,
+      "step": 797696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.693105768418129e-05,
+      "loss": 4.0148,
+      "step": 798208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.692267173667077e-05,
+      "loss": 4.0356,
+      "step": 798720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.691428578916025e-05,
+      "loss": 4.0358,
+      "step": 799232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.690589984164973e-05,
+      "loss": 4.0415,
+      "step": 799744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.689751389413921e-05,
+      "loss": 4.0333,
+      "step": 800256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6889127946628687e-05,
+      "loss": 4.0379,
+      "step": 800768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6880741999118167e-05,
+      "loss": 4.0546,
+      "step": 801280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6872372430411376e-05,
+      "loss": 4.0282,
+      "step": 801792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6863986482900856e-05,
+      "loss": 4.044,
+      "step": 802304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6855600535390336e-05,
+      "loss": 4.0285,
+      "step": 802816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.684723096668355e-05,
+      "loss": 4.0347,
+      "step": 803328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.683884501917303e-05,
+      "loss": 4.0354,
+      "step": 803840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.683045907166251e-05,
+      "loss": 4.0289,
+      "step": 804352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.682207312415199e-05,
+      "loss": 4.042,
+      "step": 804864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.681368717664147e-05,
+      "loss": 4.0481,
+      "step": 805376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.680530122913095e-05,
+      "loss": 4.0304,
+      "step": 805888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.679691528162043e-05,
+      "loss": 4.0263,
+      "step": 806400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.678852933410991e-05,
+      "loss": 4.0296,
+      "step": 806912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.678015976540312e-05,
+      "loss": 4.0403,
+      "step": 807424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.67717738178926e-05,
+      "loss": 4.0288,
+      "step": 807936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6763420627989545e-05,
+      "loss": 4.0421,
+      "step": 808448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.6755034680479025e-05,
+      "loss": 4.0338,
+      "step": 808960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6746648732968505e-05,
+      "loss": 4.0336,
+      "step": 809472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6738262785457985e-05,
+      "loss": 4.0286,
+      "step": 809984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6729876837947465e-05,
+      "loss": 4.0183,
+      "step": 810496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6721490890436945e-05,
+      "loss": 4.0255,
+      "step": 811008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6713104942926425e-05,
+      "loss": 4.0362,
+      "step": 811520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6704718995415905e-05,
+      "loss": 4.0389,
+      "step": 812032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6696333047905385e-05,
+      "loss": 4.024,
+      "step": 812544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.668794710039486e-05,
+      "loss": 4.0238,
+      "step": 813056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.667956115288434e-05,
+      "loss": 4.0296,
+      "step": 813568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.667117520537382e-05,
+      "loss": 4.0356,
+      "step": 814080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6662805636667034e-05,
+      "loss": 4.042,
+      "step": 814592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6654419689156514e-05,
+      "loss": 4.0263,
+      "step": 815104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6646033741645994e-05,
+      "loss": 4.0348,
+      "step": 815616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6637647794135474e-05,
+      "loss": 4.0282,
+      "step": 816128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6629261846624954e-05,
+      "loss": 4.0286,
+      "step": 816640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.662089227791816e-05,
+      "loss": 4.0227,
+      "step": 817152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.661250633040764e-05,
+      "loss": 4.0268,
+      "step": 817664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.660412038289712e-05,
+      "loss": 4.0319,
+      "step": 818176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.65957344353866e-05,
+      "loss": 4.0238,
+      "step": 818688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.658734848787608e-05,
+      "loss": 4.0285,
+      "step": 819200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.657896254036556e-05,
+      "loss": 4.022,
+      "step": 819712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.657059297165877e-05,
+      "loss": 4.0279,
+      "step": 820224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.656220702414825e-05,
+      "loss": 4.0301,
+      "step": 820736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.655382107663773e-05,
+      "loss": 4.0274,
+      "step": 821248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.654545150793095e-05,
+      "loss": 4.0306,
+      "step": 821760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.653706556042043e-05,
+      "loss": 4.0442,
+      "step": 822272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.652867961290991e-05,
+      "loss": 4.0335,
+      "step": 822784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.652029366539939e-05,
+      "loss": 4.0337,
+      "step": 823296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.651190771788887e-05,
+      "loss": 4.0307,
+      "step": 823808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.650352177037835e-05,
+      "loss": 4.0247,
+      "step": 824320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.649513582286783e-05,
+      "loss": 4.0388,
+      "step": 824832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.648674987535731e-05,
+      "loss": 4.0227,
+      "step": 825344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.647836392784679e-05,
+      "loss": 4.0233,
+      "step": 825856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6469994359139996e-05,
+      "loss": 4.0298,
+      "step": 826368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6461608411629476e-05,
+      "loss": 4.0297,
+      "step": 826880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6453222464118956e-05,
+      "loss": 4.0298,
+      "step": 827392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6444836516608436e-05,
+      "loss": 4.0293,
+      "step": 827904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.643646694790165e-05,
+      "loss": 4.034,
+      "step": 828416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.642809737919486e-05,
+      "loss": 4.0251,
+      "step": 828928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.641971143168434e-05,
+      "loss": 4.0361,
+      "step": 829440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.641132548417382e-05,
+      "loss": 4.0275,
+      "step": 829952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.64029395366633e-05,
+      "loss": 4.0227,
+      "step": 830464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.639455358915278e-05,
+      "loss": 4.0041,
+      "step": 830976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.638616764164226e-05,
+      "loss": 4.0392,
+      "step": 831488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.637778169413174e-05,
+      "loss": 4.0306,
+      "step": 832000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.636941212542495e-05,
+      "loss": 4.0419,
+      "step": 832512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.636102617791443e-05,
+      "loss": 4.0265,
+      "step": 833024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.635264023040391e-05,
+      "loss": 4.0324,
+      "step": 833536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.634425428289339e-05,
+      "loss": 4.028,
+      "step": 834048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.633586833538287e-05,
+      "loss": 4.0455,
+      "step": 834560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.632748238787235e-05,
+      "loss": 4.0158,
+      "step": 835072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.631909644036183e-05,
+      "loss": 4.0281,
+      "step": 835584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.631071049285131e-05,
+      "loss": 4.0283,
+      "step": 836096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.630232454534079e-05,
+      "loss": 4.0319,
+      "step": 836608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.6293954976634e-05,
+      "loss": 4.0257,
+      "step": 837120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.628556902912348e-05,
+      "loss": 4.026,
+      "step": 837632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.627718308161296e-05,
+      "loss": 4.0266,
+      "step": 838144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.626881351290617e-05,
+      "loss": 4.0346,
+      "step": 838656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.626042756539565e-05,
+      "loss": 4.0272,
+      "step": 839168
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.075066089630127,
+      "eval_runtime": 292.4706,
+      "eval_samples_per_second": 1304.716,
+      "eval_steps_per_second": 40.773,
+      "step": 839520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.625204161788513e-05,
+      "loss": 4.021,
+      "step": 839680
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6243672049178337e-05,
+      "loss": 4.0297,
+      "step": 840192
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.623528610166782e-05,
+      "loss": 4.0255,
+      "step": 840704
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.62269001541573e-05,
+      "loss": 4.0276,
+      "step": 841216
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.621851420664678e-05,
+      "loss": 4.0336,
+      "step": 841728
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.621012825913626e-05,
+      "loss": 4.0261,
+      "step": 842240
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.620174231162574e-05,
+      "loss": 4.0283,
+      "step": 842752
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.619335636411522e-05,
+      "loss": 4.0174,
+      "step": 843264
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.61849704166047e-05,
+      "loss": 4.0219,
+      "step": 843776
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.617658446909418e-05,
+      "loss": 4.0316,
+      "step": 844288
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.616819852158366e-05,
+      "loss": 4.034,
+      "step": 844800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.615982895287687e-05,
+      "loss": 4.026,
+      "step": 845312
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.615145938417008e-05,
+      "loss": 4.039,
+      "step": 845824
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.614307343665956e-05,
+      "loss": 4.0149,
+      "step": 846336
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.613468748914904e-05,
+      "loss": 4.0209,
+      "step": 846848
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.612630154163852e-05,
+      "loss": 4.014,
+      "step": 847360
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.611791559412801e-05,
+      "loss": 4.0311,
+      "step": 847872
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.610952964661749e-05,
+      "loss": 4.0205,
+      "step": 848384
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.610114369910697e-05,
+      "loss": 4.0218,
+      "step": 848896
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.609275775159645e-05,
+      "loss": 4.0281,
+      "step": 849408
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.608437180408593e-05,
+      "loss": 4.0398,
+      "step": 849920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.607598585657541e-05,
+      "loss": 4.028,
+      "step": 850432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.606759990906488e-05,
+      "loss": 4.0219,
+      "step": 850944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.605921396155436e-05,
+      "loss": 4.0264,
+      "step": 851456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.605082801404384e-05,
+      "loss": 4.0261,
+      "step": 851968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6042458445337056e-05,
+      "loss": 4.0231,
+      "step": 852480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.603407249782653e-05,
+      "loss": 4.0091,
+      "step": 852992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.602568655031601e-05,
+      "loss": 4.0236,
+      "step": 853504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.601730060280549e-05,
+      "loss": 4.028,
+      "step": 854016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.6008914655294976e-05,
+      "loss": 4.0094,
+      "step": 854528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.6000528707784456e-05,
+      "loss": 4.0211,
+      "step": 855040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5992142760273936e-05,
+      "loss": 4.028,
+      "step": 855552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5983773191567145e-05,
+      "loss": 4.0239,
+      "step": 856064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5975387244056625e-05,
+      "loss": 4.0333,
+      "step": 856576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5967001296546105e-05,
+      "loss": 4.0237,
+      "step": 857088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5958615349035585e-05,
+      "loss": 4.0198,
+      "step": 857600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5950229401525065e-05,
+      "loss": 4.0415,
+      "step": 858112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5941843454014545e-05,
+      "loss": 4.0017,
+      "step": 858624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5933457506504025e-05,
+      "loss": 4.0134,
+      "step": 859136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5925071558993505e-05,
+      "loss": 4.0216,
+      "step": 859648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5916685611482985e-05,
+      "loss": 4.0123,
+      "step": 860160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5908299663972465e-05,
+      "loss": 4.0203,
+      "step": 860672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5899930095265674e-05,
+      "loss": 4.022,
+      "step": 861184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5891544147755154e-05,
+      "loss": 4.019,
+      "step": 861696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.588315820024464e-05,
+      "loss": 4.0217,
+      "step": 862208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.587478863153785e-05,
+      "loss": 4.0143,
+      "step": 862720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.586640268402733e-05,
+      "loss": 4.023,
+      "step": 863232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.585801673651681e-05,
+      "loss": 4.0271,
+      "step": 863744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.584963078900629e-05,
+      "loss": 4.0026,
+      "step": 864256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.584124484149577e-05,
+      "loss": 4.0209,
+      "step": 864768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.583285889398524e-05,
+      "loss": 4.0257,
+      "step": 865280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.582447294647472e-05,
+      "loss": 4.031,
+      "step": 865792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.58160869989642e-05,
+      "loss": 4.0164,
+      "step": 866304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.580771743025742e-05,
+      "loss": 4.0149,
+      "step": 866816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.579934786155063e-05,
+      "loss": 4.016,
+      "step": 867328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.579096191404011e-05,
+      "loss": 4.0115,
+      "step": 867840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5782575966529594e-05,
+      "loss": 4.023,
+      "step": 868352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.577419001901907e-05,
+      "loss": 4.0047,
+      "step": 868864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.576580407150855e-05,
+      "loss": 4.0231,
+      "step": 869376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.575743450280176e-05,
+      "loss": 4.0172,
+      "step": 869888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.574904855529124e-05,
+      "loss": 4.0151,
+      "step": 870400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5740662607780716e-05,
+      "loss": 4.0146,
+      "step": 870912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5732276660270196e-05,
+      "loss": 4.0145,
+      "step": 871424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5723890712759676e-05,
+      "loss": 4.0121,
+      "step": 871936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5715504765249156e-05,
+      "loss": 4.0068,
+      "step": 872448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5707118817738636e-05,
+      "loss": 4.0291,
+      "step": 872960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5698749249031845e-05,
+      "loss": 4.0115,
+      "step": 873472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.569036330152133e-05,
+      "loss": 4.0102,
+      "step": 873984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.568199373281454e-05,
+      "loss": 4.0006,
+      "step": 874496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.567360778530402e-05,
+      "loss": 4.0164,
+      "step": 875008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.56652218377935e-05,
+      "loss": 4.018,
+      "step": 875520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.565685226908672e-05,
+      "loss": 4.0239,
+      "step": 876032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.564846632157619e-05,
+      "loss": 4.0177,
+      "step": 876544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.564008037406567e-05,
+      "loss": 4.0207,
+      "step": 877056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.563169442655515e-05,
+      "loss": 4.0323,
+      "step": 877568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.562330847904463e-05,
+      "loss": 4.0125,
+      "step": 878080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.561493891033784e-05,
+      "loss": 4.0277,
+      "step": 878592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.560655296282732e-05,
+      "loss": 4.0126,
+      "step": 879104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.55981670153168e-05,
+      "loss": 4.0146,
+      "step": 879616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5589781067806286e-05,
+      "loss": 4.0169,
+      "step": 880128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5581395120295766e-05,
+      "loss": 4.0115,
+      "step": 880640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5573009172785246e-05,
+      "loss": 4.0241,
+      "step": 881152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5564623225274726e-05,
+      "loss": 4.0311,
+      "step": 881664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5556237277764206e-05,
+      "loss": 4.0177,
+      "step": 882176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5547851330253686e-05,
+      "loss": 4.0025,
+      "step": 882688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5539465382743166e-05,
+      "loss": 4.0133,
+      "step": 883200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5531079435232645e-05,
+      "loss": 4.0224,
+      "step": 883712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5522709866525855e-05,
+      "loss": 4.0101,
+      "step": 884224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5514340297819064e-05,
+      "loss": 4.0213,
+      "step": 884736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.5505954350308544e-05,
+      "loss": 4.0166,
+      "step": 885248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5497568402798024e-05,
+      "loss": 4.0175,
+      "step": 885760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5489182455287504e-05,
+      "loss": 4.0117,
+      "step": 886272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5480796507776983e-05,
+      "loss": 3.9997,
+      "step": 886784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.547241056026647e-05,
+      "loss": 4.009,
+      "step": 887296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.546402461275595e-05,
+      "loss": 4.0131,
+      "step": 887808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.545563866524543e-05,
+      "loss": 4.0288,
+      "step": 888320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.54472527177349e-05,
+      "loss": 4.0034,
+      "step": 888832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.543886677022438e-05,
+      "loss": 4.0093,
+      "step": 889344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.543048082271386e-05,
+      "loss": 4.0133,
+      "step": 889856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.542211125400708e-05,
+      "loss": 4.0137,
+      "step": 890368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.541372530649655e-05,
+      "loss": 4.023,
+      "step": 890880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.540533935898603e-05,
+      "loss": 4.0107,
+      "step": 891392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.539695341147551e-05,
+      "loss": 4.0197,
+      "step": 891904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.538856746396499e-05,
+      "loss": 4.0105,
+      "step": 892416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.538018151645447e-05,
+      "loss": 4.0102,
+      "step": 892928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.537181194774769e-05,
+      "loss": 4.0063,
+      "step": 893440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.536342600023717e-05,
+      "loss": 4.0092,
+      "step": 893952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.535504005272665e-05,
+      "loss": 4.0159,
+      "step": 894464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.534665410521613e-05,
+      "loss": 4.0076,
+      "step": 894976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.533826815770561e-05,
+      "loss": 4.0093,
+      "step": 895488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.532989858899882e-05,
+      "loss": 4.0028,
+      "step": 896000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.53215126414883e-05,
+      "loss": 4.008,
+      "step": 896512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.531312669397778e-05,
+      "loss": 4.0141,
+      "step": 897024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.530474074646726e-05,
+      "loss": 4.0134,
+      "step": 897536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5296371177760466e-05,
+      "loss": 4.0125,
+      "step": 898048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5287985230249946e-05,
+      "loss": 4.0177,
+      "step": 898560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5279599282739426e-05,
+      "loss": 4.0183,
+      "step": 899072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5271213335228906e-05,
+      "loss": 4.0191,
+      "step": 899584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5262827387718386e-05,
+      "loss": 4.0164,
+      "step": 900096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.525444144020787e-05,
+      "loss": 4.0088,
+      "step": 900608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.524605549269735e-05,
+      "loss": 4.0162,
+      "step": 901120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.523766954518683e-05,
+      "loss": 4.0055,
+      "step": 901632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.522928359767631e-05,
+      "loss": 4.0074,
+      "step": 902144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.522089765016579e-05,
+      "loss": 4.0133,
+      "step": 902656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5212511702655265e-05,
+      "loss": 4.0127,
+      "step": 903168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5204125755144745e-05,
+      "loss": 4.0109,
+      "step": 903680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.519575618643796e-05,
+      "loss": 4.01,
+      "step": 904192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.518738661773117e-05,
+      "loss": 4.0175,
+      "step": 904704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.517900067022065e-05,
+      "loss": 4.0086,
+      "step": 905216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.517061472271013e-05,
+      "loss": 4.019,
+      "step": 905728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.516222877519961e-05,
+      "loss": 4.01,
+      "step": 906240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.515384282768909e-05,
+      "loss": 4.0087,
+      "step": 906752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5145473258982306e-05,
+      "loss": 3.9845,
+      "step": 907264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5137087311471786e-05,
+      "loss": 4.0201,
+      "step": 907776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5128701363961266e-05,
+      "loss": 4.0122,
+      "step": 908288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.512031541645074e-05,
+      "loss": 4.0251,
+      "step": 908800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.511192946894022e-05,
+      "loss": 4.0127,
+      "step": 909312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.51035435214297e-05,
+      "loss": 4.013,
+      "step": 909824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.509515757391918e-05,
+      "loss": 4.0152,
+      "step": 910336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.508677162640866e-05,
+      "loss": 4.0257,
+      "step": 910848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.507838567889814e-05,
+      "loss": 3.9989,
+      "step": 911360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.507001611019135e-05,
+      "loss": 4.0118,
+      "step": 911872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.506163016268083e-05,
+      "loss": 4.0078,
+      "step": 912384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.505324421517031e-05,
+      "loss": 4.015,
+      "step": 912896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5044874646463524e-05,
+      "loss": 4.0098,
+      "step": 913408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5036488698953004e-05,
+      "loss": 4.01,
+      "step": 913920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5028102751442484e-05,
+      "loss": 4.0078,
+      "step": 914432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5019716803931964e-05,
+      "loss": 4.0161,
+      "step": 914944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.5011330856421444e-05,
+      "loss": 4.009,
+      "step": 915456
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.0643205642700195,
+      "eval_runtime": 294.1127,
+      "eval_samples_per_second": 1297.431,
+      "eval_steps_per_second": 40.546,
+      "step": 915840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.5002944908910924e-05,
+      "loss": 4.0169,
+      "step": 915968
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4994558961400404e-05,
+      "loss": 4.0128,
+      "step": 916480
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4986173013889883e-05,
+      "loss": 4.0066,
+      "step": 916992
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4977787066379363e-05,
+      "loss": 4.0137,
+      "step": 917504
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.496940111886884e-05,
+      "loss": 4.0136,
+      "step": 918016
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.496101517135832e-05,
+      "loss": 4.0149,
+      "step": 918528
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4952629223847797e-05,
+      "loss": 4.0107,
+      "step": 919040
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4944243276337276e-05,
+      "loss": 3.9995,
+      "step": 919552
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.493585732882676e-05,
+      "loss": 4.0021,
+      "step": 920064
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.492747138131624e-05,
+      "loss": 4.0224,
+      "step": 920576
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.491908543380572e-05,
+      "loss": 4.0132,
+      "step": 921088
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.49106994862952e-05,
+      "loss": 4.0039,
+      "step": 921600
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.490232991758841e-05,
+      "loss": 4.0237,
+      "step": 922112
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.489394397007789e-05,
+      "loss": 3.9975,
+      "step": 922624
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.488555802256737e-05,
+      "loss": 4.0041,
+      "step": 923136
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.487717207505685e-05,
+      "loss": 3.9976,
+      "step": 923648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.486878612754633e-05,
+      "loss": 4.0112,
+      "step": 924160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.486040018003581e-05,
+      "loss": 4.0045,
+      "step": 924672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.485201423252529e-05,
+      "loss": 4.0055,
+      "step": 925184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.484362828501477e-05,
+      "loss": 4.0122,
+      "step": 925696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.483524233750425e-05,
+      "loss": 4.0164,
+      "step": 926208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.482685638999373e-05,
+      "loss": 4.0143,
+      "step": 926720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.481847044248321e-05,
+      "loss": 4.0018,
+      "step": 927232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.481008449497269e-05,
+      "loss": 4.0106,
+      "step": 927744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.480169854746217e-05,
+      "loss": 4.0092,
+      "step": 928256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.479334535755912e-05,
+      "loss": 4.005,
+      "step": 928768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4784959410048597e-05,
+      "loss": 3.9931,
+      "step": 929280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4776573462538077e-05,
+      "loss": 4.003,
+      "step": 929792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4768187515027557e-05,
+      "loss": 4.0109,
+      "step": 930304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.4759801567517036e-05,
+      "loss": 3.9915,
+      "step": 930816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4751415620006516e-05,
+      "loss": 4.0029,
+      "step": 931328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.474302967249599e-05,
+      "loss": 4.0049,
+      "step": 931840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.473464372498547e-05,
+      "loss": 4.0109,
+      "step": 932352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4726274156278685e-05,
+      "loss": 4.013,
+      "step": 932864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4717888208768165e-05,
+      "loss": 4.0084,
+      "step": 933376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.470951864006138e-05,
+      "loss": 4.0086,
+      "step": 933888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.470113269255086e-05,
+      "loss": 4.02,
+      "step": 934400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.469274674504034e-05,
+      "loss": 3.984,
+      "step": 934912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4684360797529814e-05,
+      "loss": 3.9979,
+      "step": 935424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4675974850019294e-05,
+      "loss": 4.0044,
+      "step": 935936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4667588902508774e-05,
+      "loss": 3.9919,
+      "step": 936448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4659202954998254e-05,
+      "loss": 4.0047,
+      "step": 936960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4650817007487734e-05,
+      "loss": 4.0036,
+      "step": 937472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.464244743878094e-05,
+      "loss": 4.0013,
+      "step": 937984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.463406149127042e-05,
+      "loss": 4.0044,
+      "step": 938496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.46256755437599e-05,
+      "loss": 4.0001,
+      "step": 939008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.461728959624938e-05,
+      "loss": 4.004,
+      "step": 939520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.460890364873886e-05,
+      "loss": 4.0151,
+      "step": 940032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.460053408003208e-05,
+      "loss": 3.9808,
+      "step": 940544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.459214813252156e-05,
+      "loss": 4.0006,
+      "step": 941056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.458376218501104e-05,
+      "loss": 4.0112,
+      "step": 941568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.457537623750052e-05,
+      "loss": 4.0134,
+      "step": 942080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.456699028999e-05,
+      "loss": 4.0029,
+      "step": 942592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.455860434247948e-05,
+      "loss": 3.997,
+      "step": 943104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.455021839496896e-05,
+      "loss": 4.0011,
+      "step": 943616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.454183244745844e-05,
+      "loss": 3.9933,
+      "step": 944128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.453346287875165e-05,
+      "loss": 4.0073,
+      "step": 944640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.452507693124113e-05,
+      "loss": 3.9883,
+      "step": 945152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.451669098373061e-05,
+      "loss": 4.0092,
+      "step": 945664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.450832141502382e-05,
+      "loss": 3.9991,
+      "step": 946176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4499935467513303e-05,
+      "loss": 4.0005,
+      "step": 946688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4491549520002783e-05,
+      "loss": 3.9926,
+      "step": 947200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4483163572492263e-05,
+      "loss": 4.0002,
+      "step": 947712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.447477762498174e-05,
+      "loss": 3.9945,
+      "step": 948224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.446640805627495e-05,
+      "loss": 3.9921,
+      "step": 948736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.445802210876443e-05,
+      "loss": 4.01,
+      "step": 949248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.444963616125391e-05,
+      "loss": 4.0005,
+      "step": 949760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.444125021374339e-05,
+      "loss": 3.9921,
+      "step": 950272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.443286426623287e-05,
+      "loss": 3.9835,
+      "step": 950784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4424478318722345e-05,
+      "loss": 4.0036,
+      "step": 951296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4416092371211825e-05,
+      "loss": 3.9953,
+      "step": 951808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4407706423701305e-05,
+      "loss": 4.0119,
+      "step": 952320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.439935323379826e-05,
+      "loss": 3.9941,
+      "step": 952832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.439096728628774e-05,
+      "loss": 4.0071,
+      "step": 953344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.438258133877722e-05,
+      "loss": 4.0138,
+      "step": 953856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.43741953912667e-05,
+      "loss": 3.9987,
+      "step": 954368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.436580944375618e-05,
+      "loss": 4.0068,
+      "step": 954880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.435742349624565e-05,
+      "loss": 3.9971,
+      "step": 955392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.434903754873513e-05,
+      "loss": 3.9964,
+      "step": 955904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.434065160122461e-05,
+      "loss": 4.0025,
+      "step": 956416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.433228203251782e-05,
+      "loss": 3.9964,
+      "step": 956928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4323912463811035e-05,
+      "loss": 4.0009,
+      "step": 957440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4315526516300515e-05,
+      "loss": 4.018,
+      "step": 957952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4307140568789995e-05,
+      "loss": 4.0041,
+      "step": 958464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4298754621279475e-05,
+      "loss": 3.9858,
+      "step": 958976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4290368673768955e-05,
+      "loss": 3.995,
+      "step": 959488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4281982726258435e-05,
+      "loss": 4.0024,
+      "step": 960000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.427361315755165e-05,
+      "loss": 3.9996,
+      "step": 960512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4265227210041124e-05,
+      "loss": 4.0017,
+      "step": 961024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.4256841262530604e-05,
+      "loss": 4.0016,
+      "step": 961536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4248455315020084e-05,
+      "loss": 4.0023,
+      "step": 962048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4240069367509564e-05,
+      "loss": 3.9954,
+      "step": 962560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4231683419999044e-05,
+      "loss": 3.9823,
+      "step": 963072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.422331385129225e-05,
+      "loss": 3.9933,
+      "step": 963584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.421492790378173e-05,
+      "loss": 3.9948,
+      "step": 964096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.420654195627121e-05,
+      "loss": 4.0134,
+      "step": 964608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.419815600876069e-05,
+      "loss": 3.9866,
+      "step": 965120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.418977006125017e-05,
+      "loss": 3.9929,
+      "step": 965632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.418140049254339e-05,
+      "loss": 3.9977,
+      "step": 966144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.417301454503287e-05,
+      "loss": 3.9993,
+      "step": 966656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.416462859752235e-05,
+      "loss": 4.006,
+      "step": 967168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.415624265001183e-05,
+      "loss": 3.9956,
+      "step": 967680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.414785670250131e-05,
+      "loss": 3.9966,
+      "step": 968192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.413947075499079e-05,
+      "loss": 3.9968,
+      "step": 968704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.413108480748027e-05,
+      "loss": 3.9936,
+      "step": 969216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.412269885996975e-05,
+      "loss": 3.9904,
+      "step": 969728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.411432929126296e-05,
+      "loss": 3.9949,
+      "step": 970240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.410594334375244e-05,
+      "loss": 3.999,
+      "step": 970752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.409755739624192e-05,
+      "loss": 3.9915,
+      "step": 971264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4089187827535126e-05,
+      "loss": 3.993,
+      "step": 971776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.408080188002461e-05,
+      "loss": 3.9884,
+      "step": 972288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.407241593251409e-05,
+      "loss": 3.9923,
+      "step": 972800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.406402998500357e-05,
+      "loss": 3.9965,
+      "step": 973312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.405564403749305e-05,
+      "loss": 3.9954,
+      "step": 973824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.404727446878626e-05,
+      "loss": 3.9959,
+      "step": 974336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.403888852127574e-05,
+      "loss": 4.0007,
+      "step": 974848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.403050257376522e-05,
+      "loss": 4.008,
+      "step": 975360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.40221166262547e-05,
+      "loss": 3.9978,
+      "step": 975872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.401373067874418e-05,
+      "loss": 4.0007,
+      "step": 976384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.4005344731233655e-05,
+      "loss": 3.995,
+      "step": 976896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3996958783723135e-05,
+      "loss": 3.9975,
+      "step": 977408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3988572836212615e-05,
+      "loss": 3.9905,
+      "step": 977920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.398020326750583e-05,
+      "loss": 3.9934,
+      "step": 978432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.397181731999531e-05,
+      "loss": 3.9972,
+      "step": 978944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.396343137248479e-05,
+      "loss": 3.9965,
+      "step": 979456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.395504542497427e-05,
+      "loss": 3.9976,
+      "step": 979968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.394665947746375e-05,
+      "loss": 3.9946,
+      "step": 980480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.393827352995323e-05,
+      "loss": 3.9985,
+      "step": 980992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.392988758244271e-05,
+      "loss": 3.9905,
+      "step": 981504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.392151801373592e-05,
+      "loss": 4.0024,
+      "step": 982016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.39131320662254e-05,
+      "loss": 3.9927,
+      "step": 982528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.390474611871488e-05,
+      "loss": 3.9945,
+      "step": 983040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.389636017120436e-05,
+      "loss": 3.9716,
+      "step": 983552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.388797422369384e-05,
+      "loss": 3.9985,
+      "step": 984064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.387958827618332e-05,
+      "loss": 4.0015,
+      "step": 984576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3871218707476535e-05,
+      "loss": 4.0076,
+      "step": 985088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3862832759966015e-05,
+      "loss": 3.998,
+      "step": 985600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3854446812455495e-05,
+      "loss": 3.9923,
+      "step": 986112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3846060864944975e-05,
+      "loss": 3.9993,
+      "step": 986624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3837674917434455e-05,
+      "loss": 4.01,
+      "step": 987136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3829288969923935e-05,
+      "loss": 3.9836,
+      "step": 987648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3820903022413415e-05,
+      "loss": 4.0004,
+      "step": 988160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3812517074902895e-05,
+      "loss": 3.9931,
+      "step": 988672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.380413112739237e-05,
+      "loss": 3.9979,
+      "step": 989184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3795761558685584e-05,
+      "loss": 3.9945,
+      "step": 989696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3787375611175064e-05,
+      "loss": 3.9933,
+      "step": 990208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.3778989663664544e-05,
+      "loss": 3.9923,
+      "step": 990720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.377062009495775e-05,
+      "loss": 4.0009,
+      "step": 991232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.376223414744723e-05,
+      "loss": 3.9972,
+      "step": 991744
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.054955959320068,
+      "eval_runtime": 314.0724,
+      "eval_samples_per_second": 1214.978,
+      "eval_steps_per_second": 37.969,
+      "step": 992160
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.375384819993671e-05,
+      "loss": 3.9886,
+      "step": 992256
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.37454622524262e-05,
+      "loss": 3.9979,
+      "step": 992768
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.373707630491567e-05,
+      "loss": 3.9896,
+      "step": 993280
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.372869035740515e-05,
+      "loss": 4.0,
+      "step": 993792
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.372032078869837e-05,
+      "loss": 4.001,
+      "step": 994304
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.371193484118784e-05,
+      "loss": 3.9962,
+      "step": 994816
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.370354889367732e-05,
+      "loss": 3.9956,
+      "step": 995328
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.36951629461668e-05,
+      "loss": 3.983,
+      "step": 995840
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.368677699865628e-05,
+      "loss": 3.9909,
+      "step": 996352
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.367839105114576e-05,
+      "loss": 4.0022,
+      "step": 996864
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.367000510363524e-05,
+      "loss": 3.9959,
+      "step": 997376
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.366161915612472e-05,
+      "loss": 3.9961,
+      "step": 997888
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.36532823450254e-05,
+      "loss": 4.0044,
+      "step": 998400
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.364489639751488e-05,
+      "loss": 3.9867,
+      "step": 998912
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.363651045000436e-05,
+      "loss": 3.9878,
+      "step": 999424
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.362812450249384e-05,
+      "loss": 3.9838,
+      "step": 999936
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3619738554983316e-05,
+      "loss": 3.9974,
+      "step": 1000448
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3611352607472795e-05,
+      "loss": 3.9888,
+      "step": 1000960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.360298303876601e-05,
+      "loss": 3.9877,
+      "step": 1001472
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.359459709125549e-05,
+      "loss": 3.9964,
+      "step": 1001984
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3586211143744964e-05,
+      "loss": 4.0041,
+      "step": 1002496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3577825196234444e-05,
+      "loss": 3.9994,
+      "step": 1003008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3569439248723924e-05,
+      "loss": 3.9907,
+      "step": 1003520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.3561053301213404e-05,
+      "loss": 3.9914,
+      "step": 1004032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.355266735370289e-05,
+      "loss": 3.9987,
+      "step": 1004544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.354428140619237e-05,
+      "loss": 3.992,
+      "step": 1005056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.353589545868185e-05,
+      "loss": 3.9784,
+      "step": 1005568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.352750951117133e-05,
+      "loss": 3.9888,
+      "step": 1006080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.351913994246454e-05,
+      "loss": 3.9888,
+      "step": 1006592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.351075399495402e-05,
+      "loss": 3.9816,
+      "step": 1007104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.35023680474435e-05,
+      "loss": 3.9901,
+      "step": 1007616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.349398209993298e-05,
+      "loss": 3.9889,
+      "step": 1008128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.348559615242246e-05,
+      "loss": 3.9959,
+      "step": 1008640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.347721020491194e-05,
+      "loss": 3.9984,
+      "step": 1009152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.346884063620515e-05,
+      "loss": 3.9959,
+      "step": 1009664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.346047106749836e-05,
+      "loss": 3.9946,
+      "step": 1010176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3452085119987845e-05,
+      "loss": 4.0043,
+      "step": 1010688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3443699172477325e-05,
+      "loss": 3.9712,
+      "step": 1011200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3435313224966805e-05,
+      "loss": 3.9856,
+      "step": 1011712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3426927277456285e-05,
+      "loss": 3.9882,
+      "step": 1012224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3418541329945765e-05,
+      "loss": 3.9738,
+      "step": 1012736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3410155382435245e-05,
+      "loss": 3.9959,
+      "step": 1013248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3401769434924724e-05,
+      "loss": 3.9896,
+      "step": 1013760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3393383487414204e-05,
+      "loss": 3.9868,
+      "step": 1014272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.338499753990368e-05,
+      "loss": 3.9865,
+      "step": 1014784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3376627971196893e-05,
+      "loss": 3.9902,
+      "step": 1015296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3368242023686373e-05,
+      "loss": 3.9876,
+      "step": 1015808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3359856076175853e-05,
+      "loss": 3.9992,
+      "step": 1016320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3351470128665327e-05,
+      "loss": 3.9682,
+      "step": 1016832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.334310055995854e-05,
+      "loss": 3.981,
+      "step": 1017344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.333473099125176e-05,
+      "loss": 4.0016,
+      "step": 1017856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.332634504374124e-05,
+      "loss": 3.9987,
+      "step": 1018368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.331795909623072e-05,
+      "loss": 3.9878,
+      "step": 1018880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.33095731487202e-05,
+      "loss": 3.9799,
+      "step": 1019392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.330118720120968e-05,
+      "loss": 3.9862,
+      "step": 1019904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.329280125369915e-05,
+      "loss": 3.9768,
+      "step": 1020416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.328441530618863e-05,
+      "loss": 3.9935,
+      "step": 1020928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.327602935867811e-05,
+      "loss": 3.975,
+      "step": 1021440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.326764341116759e-05,
+      "loss": 3.9902,
+      "step": 1021952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.325925746365707e-05,
+      "loss": 3.9874,
+      "step": 1022464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.325087151614655e-05,
+      "loss": 3.9885,
+      "step": 1022976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.324248556863603e-05,
+      "loss": 3.9791,
+      "step": 1023488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.323411599992925e-05,
+      "loss": 3.9873,
+      "step": 1024000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.322573005241873e-05,
+      "loss": 3.9761,
+      "step": 1024512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.321734410490821e-05,
+      "loss": 3.9758,
+      "step": 1025024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.320895815739769e-05,
+      "loss": 3.9959,
+      "step": 1025536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.320057220988717e-05,
+      "loss": 3.9878,
+      "step": 1026048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3192202641180376e-05,
+      "loss": 3.9793,
+      "step": 1026560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3183833072473585e-05,
+      "loss": 3.9698,
+      "step": 1027072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3175447124963065e-05,
+      "loss": 3.986,
+      "step": 1027584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3167061177452545e-05,
+      "loss": 3.9831,
+      "step": 1028096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3158675229942025e-05,
+      "loss": 4.0009,
+      "step": 1028608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3150289282431505e-05,
+      "loss": 3.9785,
+      "step": 1029120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3141903334920985e-05,
+      "loss": 3.9917,
+      "step": 1029632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3133517387410465e-05,
+      "loss": 4.0003,
+      "step": 1030144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.312514781870368e-05,
+      "loss": 3.9804,
+      "step": 1030656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.311676187119316e-05,
+      "loss": 3.9893,
+      "step": 1031168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.310837592368264e-05,
+      "loss": 3.9873,
+      "step": 1031680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.309998997617212e-05,
+      "loss": 3.9858,
+      "step": 1032192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.30916040286616e-05,
+      "loss": 3.985,
+      "step": 1032704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.308323445995481e-05,
+      "loss": 3.9822,
+      "step": 1033216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.307484851244429e-05,
+      "loss": 3.9895,
+      "step": 1033728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.306646256493377e-05,
+      "loss": 4.0024,
+      "step": 1034240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.305807661742325e-05,
+      "loss": 3.9872,
+      "step": 1034752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.304969066991273e-05,
+      "loss": 3.9697,
+      "step": 1035264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.304130472240221e-05,
+      "loss": 3.9833,
+      "step": 1035776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.303293515369542e-05,
+      "loss": 3.9888,
+      "step": 1036288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.30245492061849e-05,
+      "loss": 3.9843,
+      "step": 1036800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3016163258674385e-05,
+      "loss": 3.9906,
+      "step": 1037312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.3007793689967594e-05,
+      "loss": 3.9861,
+      "step": 1037824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2999407742457074e-05,
+      "loss": 3.9883,
+      "step": 1038336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2991021794946554e-05,
+      "loss": 3.9779,
+      "step": 1038848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2982635847436034e-05,
+      "loss": 3.9705,
+      "step": 1039360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2974249899925514e-05,
+      "loss": 3.9792,
+      "step": 1039872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.296586395241499e-05,
+      "loss": 3.9779,
+      "step": 1040384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.295747800490447e-05,
+      "loss": 3.9967,
+      "step": 1040896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.294909205739395e-05,
+      "loss": 3.981,
+      "step": 1041408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.294070610988343e-05,
+      "loss": 3.9751,
+      "step": 1041920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.293232016237291e-05,
+      "loss": 3.9857,
+      "step": 1042432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.292395059366612e-05,
+      "loss": 3.9818,
+      "step": 1042944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.29155646461556e-05,
+      "loss": 3.9904,
+      "step": 1043456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.290717869864508e-05,
+      "loss": 3.9832,
+      "step": 1043968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.289879275113456e-05,
+      "loss": 3.9831,
+      "step": 1044480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.289040680362404e-05,
+      "loss": 3.9777,
+      "step": 1044992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.288202085611352e-05,
+      "loss": 3.9847,
+      "step": 1045504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2873634908603e-05,
+      "loss": 3.9747,
+      "step": 1046016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.286524896109248e-05,
+      "loss": 3.9785,
+      "step": 1046528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.285689577118942e-05,
+      "loss": 3.9886,
+      "step": 1047040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.28485098236789e-05,
+      "loss": 3.9777,
+      "step": 1047552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.284012387616838e-05,
+      "loss": 3.9793,
+      "step": 1048064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.283173792865786e-05,
+      "loss": 3.9748,
+      "step": 1048576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2823368359951076e-05,
+      "loss": 3.9784,
+      "step": 1049088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2814982412440556e-05,
+      "loss": 3.9825,
+      "step": 1049600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2806596464930036e-05,
+      "loss": 3.9803,
+      "step": 1050112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2798210517419516e-05,
+      "loss": 3.9803,
+      "step": 1050624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2789824569908996e-05,
+      "loss": 3.9896,
+      "step": 1051136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2781438622398476e-05,
+      "loss": 3.9912,
+      "step": 1051648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2773052674887956e-05,
+      "loss": 3.9851,
+      "step": 1052160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2764666727377436e-05,
+      "loss": 3.9881,
+      "step": 1052672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2756280779866916e-05,
+      "loss": 3.9795,
+      "step": 1053184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2747894832356396e-05,
+      "loss": 3.9851,
+      "step": 1053696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2739525263649605e-05,
+      "loss": 3.9763,
+      "step": 1054208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2731139316139085e-05,
+      "loss": 3.9837,
+      "step": 1054720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2722753368628565e-05,
+      "loss": 3.9747,
+      "step": 1055232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2714367421118045e-05,
+      "loss": 3.9861,
+      "step": 1055744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2705981473607525e-05,
+      "loss": 3.9857,
+      "step": 1056256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2697595526097005e-05,
+      "loss": 3.9849,
+      "step": 1056768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2689209578586485e-05,
+      "loss": 3.9812,
+      "step": 1057280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2680823631075965e-05,
+      "loss": 3.9759,
+      "step": 1057792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2672437683565445e-05,
+      "loss": 3.9927,
+      "step": 1058304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2664068114858654e-05,
+      "loss": 3.9821,
+      "step": 1058816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2655682167348134e-05,
+      "loss": 3.9777,
+      "step": 1059328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2647296219837614e-05,
+      "loss": 3.9586,
+      "step": 1059840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.263892665113082e-05,
+      "loss": 3.9804,
+      "step": 1060352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.26305407036203e-05,
+      "loss": 3.9881,
+      "step": 1060864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.262215475610978e-05,
+      "loss": 3.9939,
+      "step": 1061376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.261376880859926e-05,
+      "loss": 3.9846,
+      "step": 1061888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.260538286108874e-05,
+      "loss": 3.9788,
+      "step": 1062400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.259699691357822e-05,
+      "loss": 3.9836,
+      "step": 1062912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.258862734487144e-05,
+      "loss": 3.9983,
+      "step": 1063424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.258025777616465e-05,
+      "loss": 3.9722,
+      "step": 1063936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.257187182865413e-05,
+      "loss": 3.9851,
+      "step": 1064448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.256348588114361e-05,
+      "loss": 3.9806,
+      "step": 1064960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.255509993363309e-05,
+      "loss": 3.9839,
+      "step": 1065472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.254671398612257e-05,
+      "loss": 3.9834,
+      "step": 1065984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2538344417415777e-05,
+      "loss": 3.9793,
+      "step": 1066496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2529958469905257e-05,
+      "loss": 3.9728,
+      "step": 1067008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2521572522394737e-05,
+      "loss": 3.987,
+      "step": 1067520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.2513186574884216e-05,
+      "loss": 3.9818,
+      "step": 1068032
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.047435760498047,
+      "eval_runtime": 306.5853,
+      "eval_samples_per_second": 1244.649,
+      "eval_steps_per_second": 38.896,
+      "step": 1068480
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2504800627373696e-05,
+      "loss": 3.9777,
+      "step": 1068544
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2496414679863176e-05,
+      "loss": 3.9874,
+      "step": 1069056
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.248802873235266e-05,
+      "loss": 3.9765,
+      "step": 1069568
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.247964278484214e-05,
+      "loss": 3.9866,
+      "step": 1070080
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.247125683733162e-05,
+      "loss": 3.9843,
+      "step": 1070592
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.24628708898211e-05,
+      "loss": 3.9865,
+      "step": 1071104
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.245448494231058e-05,
+      "loss": 3.9781,
+      "step": 1071616
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.244609899480006e-05,
+      "loss": 3.9699,
+      "step": 1072128
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2437713047289536e-05,
+      "loss": 3.9765,
+      "step": 1072640
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2429327099779016e-05,
+      "loss": 3.9904,
+      "step": 1073152
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2420941152268496e-05,
+      "loss": 3.981,
+      "step": 1073664
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2412555204757976e-05,
+      "loss": 3.9813,
+      "step": 1074176
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2404185636051185e-05,
+      "loss": 3.9923,
+      "step": 1074688
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2395799688540665e-05,
+      "loss": 3.9771,
+      "step": 1075200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2387413741030145e-05,
+      "loss": 3.9737,
+      "step": 1075712
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.237902779351963e-05,
+      "loss": 3.9706,
+      "step": 1076224
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.237064184600911e-05,
+      "loss": 3.9796,
+      "step": 1076736
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.236225589849859e-05,
+      "loss": 3.9768,
+      "step": 1077248
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.235386995098807e-05,
+      "loss": 3.9744,
+      "step": 1077760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.234548400347755e-05,
+      "loss": 3.9807,
+      "step": 1078272
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.233709805596703e-05,
+      "loss": 3.9949,
+      "step": 1078784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.232871210845651e-05,
+      "loss": 3.9812,
+      "step": 1079296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.232032616094599e-05,
+      "loss": 3.9766,
+      "step": 1079808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.231194021343547e-05,
+      "loss": 3.9803,
+      "step": 1080320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.230355426592495e-05,
+      "loss": 3.9791,
+      "step": 1080832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.229518469721816e-05,
+      "loss": 3.9812,
+      "step": 1081344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.228679874970764e-05,
+      "loss": 3.965,
+      "step": 1081856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.227841280219712e-05,
+      "loss": 3.9736,
+      "step": 1082368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.22700268546866e-05,
+      "loss": 3.9778,
+      "step": 1082880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 3.2261657285979816e-05,
+      "loss": 3.9687,
+      "step": 1083392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2253271338469296e-05,
+      "loss": 3.9697,
+      "step": 1083904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2244885390958776e-05,
+      "loss": 3.9831,
+      "step": 1084416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.223649944344825e-05,
+      "loss": 3.9771,
+      "step": 1084928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.222811349593773e-05,
+      "loss": 3.9875,
+      "step": 1085440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2219743927230945e-05,
+      "loss": 3.9808,
+      "step": 1085952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.221139073732788e-05,
+      "loss": 3.979,
+      "step": 1086464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.220300478981736e-05,
+      "loss": 3.9869,
+      "step": 1086976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.219461884230684e-05,
+      "loss": 3.9629,
+      "step": 1087488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.218623289479632e-05,
+      "loss": 3.9742,
+      "step": 1088000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.21778469472858e-05,
+      "loss": 3.972,
+      "step": 1088512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.216946099977528e-05,
+      "loss": 3.9641,
+      "step": 1089024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.216107505226476e-05,
+      "loss": 3.9803,
+      "step": 1089536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.215268910475425e-05,
+      "loss": 3.9751,
+      "step": 1090048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.214430315724372e-05,
+      "loss": 3.9715,
+      "step": 1090560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.213593358853694e-05,
+      "loss": 3.9764,
+      "step": 1091072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.212754764102642e-05,
+      "loss": 3.9736,
+      "step": 1091584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.21191616935159e-05,
+      "loss": 3.9785,
+      "step": 1092096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.211077574600537e-05,
+      "loss": 3.9789,
+      "step": 1092608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.210238979849485e-05,
+      "loss": 3.9583,
+      "step": 1093120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.209400385098433e-05,
+      "loss": 3.9679,
+      "step": 1093632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.208561790347381e-05,
+      "loss": 3.9863,
+      "step": 1094144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.207723195596329e-05,
+      "loss": 3.9843,
+      "step": 1094656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.20688623872565e-05,
+      "loss": 3.9756,
+      "step": 1095168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.206047643974599e-05,
+      "loss": 3.967,
+      "step": 1095680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.205209049223547e-05,
+      "loss": 3.9754,
+      "step": 1096192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.204370454472495e-05,
+      "loss": 3.9668,
+      "step": 1096704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2035334976018157e-05,
+      "loss": 3.9744,
+      "step": 1097216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2026949028507636e-05,
+      "loss": 3.9615,
+      "step": 1097728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2018563080997116e-05,
+      "loss": 3.9775,
+      "step": 1098240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2010177133486596e-05,
+      "loss": 3.9754,
+      "step": 1098752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.2001791185976076e-05,
+      "loss": 3.9737,
+      "step": 1099264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1993405238465556e-05,
+      "loss": 3.9629,
+      "step": 1099776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1985019290955036e-05,
+      "loss": 3.9745,
+      "step": 1100288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1976633343444516e-05,
+      "loss": 3.9612,
+      "step": 1100800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1968247395933996e-05,
+      "loss": 3.9672,
+      "step": 1101312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.195989420603094e-05,
+      "loss": 3.9792,
+      "step": 1101824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.195150825852042e-05,
+      "loss": 3.9764,
+      "step": 1102336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.19431223110099e-05,
+      "loss": 3.9664,
+      "step": 1102848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.193473636349938e-05,
+      "loss": 3.9561,
+      "step": 1103360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.192635041598886e-05,
+      "loss": 3.972,
+      "step": 1103872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.191798084728207e-05,
+      "loss": 3.9716,
+      "step": 1104384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.190961127857528e-05,
+      "loss": 3.9833,
+      "step": 1104896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.190122533106476e-05,
+      "loss": 3.9674,
+      "step": 1105408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.189283938355424e-05,
+      "loss": 3.9829,
+      "step": 1105920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.188445343604372e-05,
+      "loss": 3.9802,
+      "step": 1106432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.18760674885332e-05,
+      "loss": 3.973,
+      "step": 1106944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.186768154102268e-05,
+      "loss": 3.9748,
+      "step": 1107456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.185929559351216e-05,
+      "loss": 3.9704,
+      "step": 1107968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.185090964600164e-05,
+      "loss": 3.9768,
+      "step": 1108480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1842540077294855e-05,
+      "loss": 3.9696,
+      "step": 1108992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1834154129784335e-05,
+      "loss": 3.9683,
+      "step": 1109504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1825768182273815e-05,
+      "loss": 3.9743,
+      "step": 1110016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1817382234763295e-05,
+      "loss": 3.9887,
+      "step": 1110528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1808996287252775e-05,
+      "loss": 3.9745,
+      "step": 1111040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1800610339742255e-05,
+      "loss": 3.9592,
+      "step": 1111552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.1792224392231735e-05,
+      "loss": 3.973,
+      "step": 1112064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.178383844472121e-05,
+      "loss": 3.9754,
+      "step": 1112576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.177545249721069e-05,
+      "loss": 3.9749,
+      "step": 1113088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.176706654970017e-05,
+      "loss": 3.9769,
+      "step": 1113600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 3.175871335979711e-05,
+      "loss": 3.9713,
+      "step": 1114112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.175032741228659e-05,
+      "loss": 3.9748,
+      "step": 1114624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.174194146477608e-05,
+      "loss": 3.9637,
+      "step": 1115136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.173357189606929e-05,
+      "loss": 3.9596,
+      "step": 1115648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.172518594855877e-05,
+      "loss": 3.9624,
+      "step": 1116160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.171680000104825e-05,
+      "loss": 3.964,
+      "step": 1116672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.170841405353773e-05,
+      "loss": 3.9826,
+      "step": 1117184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.170002810602721e-05,
+      "loss": 3.9645,
+      "step": 1117696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.169164215851668e-05,
+      "loss": 3.9606,
+      "step": 1118208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.168325621100616e-05,
+      "loss": 3.9759,
+      "step": 1118720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.167487026349564e-05,
+      "loss": 3.9692,
+      "step": 1119232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.166648431598512e-05,
+      "loss": 3.9801,
+      "step": 1119744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.16580983684746e-05,
+      "loss": 3.9721,
+      "step": 1120256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.164971242096408e-05,
+      "loss": 3.9675,
+      "step": 1120768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.16413428522573e-05,
+      "loss": 3.9687,
+      "step": 1121280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.163295690474678e-05,
+      "loss": 3.9718,
+      "step": 1121792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.162457095723626e-05,
+      "loss": 3.9591,
+      "step": 1122304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.161618500972574e-05,
+      "loss": 3.9694,
+      "step": 1122816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.160779906221522e-05,
+      "loss": 3.9709,
+      "step": 1123328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.15994131147047e-05,
+      "loss": 3.9668,
+      "step": 1123840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.159102716719418e-05,
+      "loss": 3.9651,
+      "step": 1124352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.158264121968366e-05,
+      "loss": 3.9653,
+      "step": 1124864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1574271650976866e-05,
+      "loss": 3.9635,
+      "step": 1125376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1565885703466346e-05,
+      "loss": 3.9689,
+      "step": 1125888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1557516134759555e-05,
+      "loss": 3.9698,
+      "step": 1126400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1549130187249035e-05,
+      "loss": 3.9674,
+      "step": 1126912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1540744239738515e-05,
+      "loss": 3.9765,
+      "step": 1127424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1532358292227995e-05,
+      "loss": 3.9804,
+      "step": 1127936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.152398872352121e-05,
+      "loss": 3.9726,
+      "step": 1128448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.151560277601069e-05,
+      "loss": 3.9723,
+      "step": 1128960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.150721682850017e-05,
+      "loss": 3.969,
+      "step": 1129472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.149883088098965e-05,
+      "loss": 3.9695,
+      "step": 1129984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.149044493347913e-05,
+      "loss": 3.9609,
+      "step": 1130496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.148205898596861e-05,
+      "loss": 3.9697,
+      "step": 1131008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.147367303845809e-05,
+      "loss": 3.9656,
+      "step": 1131520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1465287090947564e-05,
+      "loss": 3.972,
+      "step": 1132032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1456901143437043e-05,
+      "loss": 3.9697,
+      "step": 1132544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.144853157473026e-05,
+      "loss": 3.9748,
+      "step": 1133056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.144014562721974e-05,
+      "loss": 3.9669,
+      "step": 1133568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.143177605851295e-05,
+      "loss": 3.9611,
+      "step": 1134080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1423390111002435e-05,
+      "loss": 3.9795,
+      "step": 1134592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1415004163491915e-05,
+      "loss": 3.9732,
+      "step": 1135104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1406618215981395e-05,
+      "loss": 3.9638,
+      "step": 1135616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.139823226847087e-05,
+      "loss": 3.9458,
+      "step": 1136128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.138984632096035e-05,
+      "loss": 3.9662,
+      "step": 1136640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.138146037344983e-05,
+      "loss": 3.9769,
+      "step": 1137152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.137309080474304e-05,
+      "loss": 3.9818,
+      "step": 1137664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.136470485723252e-05,
+      "loss": 3.9699,
+      "step": 1138176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1356318909722e-05,
+      "loss": 3.965,
+      "step": 1138688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.134793296221148e-05,
+      "loss": 3.9695,
+      "step": 1139200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.133954701470096e-05,
+      "loss": 3.9858,
+      "step": 1139712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.133116106719044e-05,
+      "loss": 3.964,
+      "step": 1140224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.132277511967992e-05,
+      "loss": 3.9683,
+      "step": 1140736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.1314389172169404e-05,
+      "loss": 3.9697,
+      "step": 1141248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.130601960346261e-05,
+      "loss": 3.9719,
+      "step": 1141760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.129763365595209e-05,
+      "loss": 3.9687,
+      "step": 1142272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.128924770844157e-05,
+      "loss": 3.968,
+      "step": 1142784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.128086176093105e-05,
+      "loss": 3.9602,
+      "step": 1143296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.127249219222426e-05,
+      "loss": 3.9786,
+      "step": 1143808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 3.126410624471374e-05,
+      "loss": 3.9665,
+      "step": 1144320
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.040378093719482,
+      "eval_runtime": 290.287,
+      "eval_samples_per_second": 1314.53,
+      "eval_steps_per_second": 41.08,
+      "step": 1144800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.125572029720322e-05,
+      "loss": 3.9629,
+      "step": 1144832
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.12473343496927e-05,
+      "loss": 3.9731,
+      "step": 1145344
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.123894840218218e-05,
+      "loss": 3.9619,
+      "step": 1145856
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.123057883347539e-05,
+      "loss": 3.9747,
+      "step": 1146368
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.122219288596487e-05,
+      "loss": 3.9708,
+      "step": 1146880
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.121380693845436e-05,
+      "loss": 3.977,
+      "step": 1147392
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.120542099094384e-05,
+      "loss": 3.9644,
+      "step": 1147904
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1197051422237046e-05,
+      "loss": 3.9548,
+      "step": 1148416
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1188681853530256e-05,
+      "loss": 3.9578,
+      "step": 1148928
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1180295906019735e-05,
+      "loss": 3.9844,
+      "step": 1149440
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1171909958509215e-05,
+      "loss": 3.9657,
+      "step": 1149952
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1163524010998695e-05,
+      "loss": 3.9688,
+      "step": 1150464
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1155138063488175e-05,
+      "loss": 3.9761,
+      "step": 1150976
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1146768494781384e-05,
+      "loss": 3.9695,
+      "step": 1151488
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1138382547270864e-05,
+      "loss": 3.9595,
+      "step": 1152000
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1129996599760344e-05,
+      "loss": 3.9607,
+      "step": 1152512
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.1121610652249824e-05,
+      "loss": 3.9608,
+      "step": 1153024
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.111322470473931e-05,
+      "loss": 3.9649,
+      "step": 1153536
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.110483875722879e-05,
+      "loss": 3.9631,
+      "step": 1154048
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.109645280971827e-05,
+      "loss": 3.9701,
+      "step": 1154560
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.108808324101148e-05,
+      "loss": 3.9827,
+      "step": 1155072
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.107969729350096e-05,
+      "loss": 3.9692,
+      "step": 1155584
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.107131134599044e-05,
+      "loss": 3.9671,
+      "step": 1156096
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.106292539847992e-05,
+      "loss": 3.9646,
+      "step": 1156608
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.105455582977313e-05,
+      "loss": 3.9695,
+      "step": 1157120
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.104616988226261e-05,
+      "loss": 3.9635,
+      "step": 1157632
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.103778393475209e-05,
+      "loss": 3.9553,
+      "step": 1158144
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.102939798724157e-05,
+      "loss": 3.9593,
+      "step": 1158656
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.102101203973105e-05,
+      "loss": 3.965,
+      "step": 1159168
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.101262609222053e-05,
+      "loss": 3.9571,
+      "step": 1159680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.100424014471001e-05,
+      "loss": 3.956,
+      "step": 1160192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.099585419719949e-05,
+      "loss": 3.9704,
+      "step": 1160704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.09874846284927e-05,
+      "loss": 3.966,
+      "step": 1161216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.097909868098218e-05,
+      "loss": 3.9721,
+      "step": 1161728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.097071273347166e-05,
+      "loss": 3.9737,
+      "step": 1162240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0962343164764874e-05,
+      "loss": 3.9675,
+      "step": 1162752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.095395721725435e-05,
+      "loss": 3.9689,
+      "step": 1163264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.094557126974383e-05,
+      "loss": 3.9551,
+      "step": 1163776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.093718532223331e-05,
+      "loss": 3.9608,
+      "step": 1164288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.092879937472279e-05,
+      "loss": 3.958,
+      "step": 1164800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0920413427212267e-05,
+      "loss": 3.9563,
+      "step": 1165312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0912027479701747e-05,
+      "loss": 3.9612,
+      "step": 1165824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0903641532191227e-05,
+      "loss": 3.9677,
+      "step": 1166336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.089527196348444e-05,
+      "loss": 3.9549,
+      "step": 1166848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.088688601597392e-05,
+      "loss": 3.9657,
+      "step": 1167360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.087851644726713e-05,
+      "loss": 3.9601,
+      "step": 1167872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.087013049975661e-05,
+      "loss": 3.9653,
+      "step": 1168384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.086174455224609e-05,
+      "loss": 3.968,
+      "step": 1168896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.085335860473557e-05,
+      "loss": 3.9479,
+      "step": 1169408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.084498903602878e-05,
+      "loss": 3.9555,
+      "step": 1169920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.083660308851826e-05,
+      "loss": 3.9696,
+      "step": 1170432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.082821714100774e-05,
+      "loss": 3.9729,
+      "step": 1170944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.081983119349722e-05,
+      "loss": 3.9579,
+      "step": 1171456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.08114452459867e-05,
+      "loss": 3.9571,
+      "step": 1171968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.080305929847618e-05,
+      "loss": 3.9649,
+      "step": 1172480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.079467335096567e-05,
+      "loss": 3.9521,
+      "step": 1172992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.078628740345515e-05,
+      "loss": 3.9625,
+      "step": 1173504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.077790145594463e-05,
+      "loss": 3.9492,
+      "step": 1174016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0769531887237836e-05,
+      "loss": 3.9675,
+      "step": 1174528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0761145939727316e-05,
+      "loss": 3.9596,
+      "step": 1175040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0752776371020525e-05,
+      "loss": 3.963,
+      "step": 1175552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0744390423510005e-05,
+      "loss": 3.9504,
+      "step": 1176064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0736004475999485e-05,
+      "loss": 3.9644,
+      "step": 1176576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0727618528488965e-05,
+      "loss": 3.9468,
+      "step": 1177088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0719232580978445e-05,
+      "loss": 3.9589,
+      "step": 1177600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0710846633467925e-05,
+      "loss": 3.9651,
+      "step": 1178112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0702477064761134e-05,
+      "loss": 3.9638,
+      "step": 1178624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.069409111725062e-05,
+      "loss": 3.9517,
+      "step": 1179136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.06857051697401e-05,
+      "loss": 3.9459,
+      "step": 1179648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.067731922222958e-05,
+      "loss": 3.9584,
+      "step": 1180160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.066893327471906e-05,
+      "loss": 3.9572,
+      "step": 1180672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.0660547327208534e-05,
+      "loss": 3.9737,
+      "step": 1181184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.065217775850175e-05,
+      "loss": 3.9534,
+      "step": 1181696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.064379181099123e-05,
+      "loss": 3.9711,
+      "step": 1182208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.063540586348071e-05,
+      "loss": 3.9657,
+      "step": 1182720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.062701991597018e-05,
+      "loss": 3.9635,
+      "step": 1183232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.061863396845966e-05,
+      "loss": 3.9634,
+      "step": 1183744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.061026439975288e-05,
+      "loss": 3.9612,
+      "step": 1184256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.060187845224236e-05,
+      "loss": 3.963,
+      "step": 1184768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.059349250473184e-05,
+      "loss": 3.9575,
+      "step": 1185280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.058510655722132e-05,
+      "loss": 3.9571,
+      "step": 1185792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.05767206097108e-05,
+      "loss": 3.9613,
+      "step": 1186304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.056833466220028e-05,
+      "loss": 3.9756,
+      "step": 1186816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.055994871468976e-05,
+      "loss": 3.9604,
+      "step": 1187328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.055157914598297e-05,
+      "loss": 3.9539,
+      "step": 1187840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.054319319847245e-05,
+      "loss": 3.9529,
+      "step": 1188352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.053480725096193e-05,
+      "loss": 3.9632,
+      "step": 1188864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.052642130345141e-05,
+      "loss": 3.9588,
+      "step": 1189376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.051803535594089e-05,
+      "loss": 3.9692,
+      "step": 1189888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 3.05096657872341e-05,
+      "loss": 3.9578,
+      "step": 1190400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.050127983972358e-05,
+      "loss": 3.9633,
+      "step": 1190912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.049289389221306e-05,
+      "loss": 3.9501,
+      "step": 1191424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.048450794470254e-05,
+      "loss": 3.9504,
+      "step": 1191936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.047612199719202e-05,
+      "loss": 3.9468,
+      "step": 1192448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.04677360496815e-05,
+      "loss": 3.9518,
+      "step": 1192960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.045935010217098e-05,
+      "loss": 3.9695,
+      "step": 1193472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.045096415466046e-05,
+      "loss": 3.9597,
+      "step": 1193984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0442594585953672e-05,
+      "loss": 3.9485,
+      "step": 1194496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.043422501724688e-05,
+      "loss": 3.9587,
+      "step": 1195008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.042583906973636e-05,
+      "loss": 3.9608,
+      "step": 1195520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.041745312222584e-05,
+      "loss": 3.9661,
+      "step": 1196032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.040906717471532e-05,
+      "loss": 3.9591,
+      "step": 1196544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0400681227204804e-05,
+      "loss": 3.955,
+      "step": 1197056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0392295279694284e-05,
+      "loss": 3.9568,
+      "step": 1197568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0383909332183764e-05,
+      "loss": 3.9653,
+      "step": 1198080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0375523384673244e-05,
+      "loss": 3.9442,
+      "step": 1198592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0367153815966453e-05,
+      "loss": 3.9545,
+      "step": 1199104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0358767868455933e-05,
+      "loss": 3.96,
+      "step": 1199616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0350381920945413e-05,
+      "loss": 3.9512,
+      "step": 1200128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0341995973434896e-05,
+      "loss": 3.953,
+      "step": 1200640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.033361002592437e-05,
+      "loss": 3.9564,
+      "step": 1201152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0325240457217585e-05,
+      "loss": 3.9497,
+      "step": 1201664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0316854509707065e-05,
+      "loss": 3.9604,
+      "step": 1202176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0308468562196545e-05,
+      "loss": 3.9566,
+      "step": 1202688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0300082614686022e-05,
+      "loss": 3.9509,
+      "step": 1203200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0291713045979238e-05,
+      "loss": 3.9675,
+      "step": 1203712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0283327098468718e-05,
+      "loss": 3.9653,
+      "step": 1204224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.027494115095819e-05,
+      "loss": 3.9601,
+      "step": 1204736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0266571582251407e-05,
+      "loss": 3.9633,
+      "step": 1205248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0258185634740887e-05,
+      "loss": 3.9555,
+      "step": 1205760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0249799687230367e-05,
+      "loss": 3.9576,
+      "step": 1206272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0241413739719843e-05,
+      "loss": 3.9548,
+      "step": 1206784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.023304417101306e-05,
+      "loss": 3.9517,
+      "step": 1207296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.022465822350254e-05,
+      "loss": 3.9557,
+      "step": 1207808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.021627227599202e-05,
+      "loss": 3.958,
+      "step": 1208320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0207886328481495e-05,
+      "loss": 3.952,
+      "step": 1208832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0199500380970975e-05,
+      "loss": 3.9657,
+      "step": 1209344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0191114433460455e-05,
+      "loss": 3.9557,
+      "step": 1209856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0182744864753664e-05,
+      "loss": 3.9492,
+      "step": 1210368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.017437529604688e-05,
+      "loss": 3.9665,
+      "step": 1210880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.016598934853636e-05,
+      "loss": 3.9612,
+      "step": 1211392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.015760340102584e-05,
+      "loss": 3.9515,
+      "step": 1211904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0149217453515317e-05,
+      "loss": 3.9389,
+      "step": 1212416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0140831506004797e-05,
+      "loss": 3.9525,
+      "step": 1212928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0132445558494277e-05,
+      "loss": 3.9657,
+      "step": 1213440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0124059610983757e-05,
+      "loss": 3.9674,
+      "step": 1213952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0115673663473237e-05,
+      "loss": 3.9568,
+      "step": 1214464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.0107287715962717e-05,
+      "loss": 3.9516,
+      "step": 1214976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.009891814725593e-05,
+      "loss": 3.961,
+      "step": 1215488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.009053219974541e-05,
+      "loss": 3.9709,
+      "step": 1216000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.008214625223489e-05,
+      "loss": 3.9528,
+      "step": 1216512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.007376030472437e-05,
+      "loss": 3.9585,
+      "step": 1217024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.006537435721385e-05,
+      "loss": 3.955,
+      "step": 1217536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.005698840970333e-05,
+      "loss": 3.9572,
+      "step": 1218048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.004860246219281e-05,
+      "loss": 3.9562,
+      "step": 1218560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.004021651468229e-05,
+      "loss": 3.9625,
+      "step": 1219072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.00318469459755e-05,
+      "loss": 3.9486,
+      "step": 1219584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.002346099846498e-05,
+      "loss": 3.964,
+      "step": 1220096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.001509142975819e-05,
+      "loss": 3.9563,
+      "step": 1220608
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 3.000670548224767e-05,
+      "loss": 3.9515,
+      "step": 1221120
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.034036159515381,
+      "eval_runtime": 304.592,
+      "eval_samples_per_second": 1252.794,
+      "eval_steps_per_second": 39.151,
+      "step": 1221120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.999831953473715e-05,
+      "loss": 3.9597,
+      "step": 1221632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.998993358722663e-05,
+      "loss": 3.9508,
+      "step": 1222144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9981547639716114e-05,
+      "loss": 3.9632,
+      "step": 1222656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9973161692205593e-05,
+      "loss": 3.9576,
+      "step": 1223168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9964775744695073e-05,
+      "loss": 3.965,
+      "step": 1223680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9956389797184553e-05,
+      "loss": 3.9535,
+      "step": 1224192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9948003849674027e-05,
+      "loss": 3.9443,
+      "step": 1224704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9939617902163507e-05,
+      "loss": 3.9478,
+      "step": 1225216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.993123195465299e-05,
+      "loss": 3.9719,
+      "step": 1225728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.992284600714247e-05,
+      "loss": 3.9572,
+      "step": 1226240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.991446005963195e-05,
+      "loss": 3.952,
+      "step": 1226752
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.990607411212143e-05,
+      "loss": 3.9615,
+      "step": 1227264
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.989770454341464e-05,
+      "loss": 3.9581,
+      "step": 1227776
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.988931859590412e-05,
+      "loss": 3.9481,
+      "step": 1228288
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.98809326483936e-05,
+      "loss": 3.9515,
+      "step": 1228800
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9872546700883082e-05,
+      "loss": 3.9468,
+      "step": 1229312
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9864160753372562e-05,
+      "loss": 3.9542,
+      "step": 1229824
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9855774805862042e-05,
+      "loss": 3.9476,
+      "step": 1230336
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9847388858351522e-05,
+      "loss": 3.9603,
+      "step": 1230848
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9839002910841002e-05,
+      "loss": 3.9724,
+      "step": 1231360
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.983063334213421e-05,
+      "loss": 3.9567,
+      "step": 1231872
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.982224739462369e-05,
+      "loss": 3.9509,
+      "step": 1232384
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9813861447113174e-05,
+      "loss": 3.956,
+      "step": 1232896
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9805491878406383e-05,
+      "loss": 3.9602,
+      "step": 1233408
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9797105930895863e-05,
+      "loss": 3.9481,
+      "step": 1233920
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9788719983385343e-05,
+      "loss": 3.9448,
+      "step": 1234432
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9780334035874823e-05,
+      "loss": 3.9458,
+      "step": 1234944
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9771948088364303e-05,
+      "loss": 3.9604,
+      "step": 1235456
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.9763562140853783e-05,
+      "loss": 3.9401,
+      "step": 1235968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9755176193343267e-05,
+      "loss": 3.945,
+      "step": 1236480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.974679024583274e-05,
+      "loss": 3.9588,
+      "step": 1236992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.973840429832222e-05,
+      "loss": 3.9573,
+      "step": 1237504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.97300183508117e-05,
+      "loss": 3.9569,
+      "step": 1238016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9721648782104916e-05,
+      "loss": 3.961,
+      "step": 1238528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9713262834594392e-05,
+      "loss": 3.9533,
+      "step": 1239040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9704876887083872e-05,
+      "loss": 3.9574,
+      "step": 1239552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9696490939573352e-05,
+      "loss": 3.941,
+      "step": 1240064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9688104992062832e-05,
+      "loss": 3.9507,
+      "step": 1240576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9679719044552312e-05,
+      "loss": 3.946,
+      "step": 1241088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9671333097041792e-05,
+      "loss": 3.9435,
+      "step": 1241600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9662947149531272e-05,
+      "loss": 3.952,
+      "step": 1242112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9654561202020752e-05,
+      "loss": 3.9551,
+      "step": 1242624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9646191633313964e-05,
+      "loss": 3.9441,
+      "step": 1243136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9637805685803444e-05,
+      "loss": 3.9528,
+      "step": 1243648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9629419738292924e-05,
+      "loss": 3.948,
+      "step": 1244160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9621033790782404e-05,
+      "loss": 3.9535,
+      "step": 1244672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9612680600879346e-05,
+      "loss": 3.958,
+      "step": 1245184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9604294653368826e-05,
+      "loss": 3.9395,
+      "step": 1245696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9595908705858306e-05,
+      "loss": 3.9399,
+      "step": 1246208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9587522758347786e-05,
+      "loss": 3.9568,
+      "step": 1246720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9579136810837266e-05,
+      "loss": 3.9628,
+      "step": 1247232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9570750863326746e-05,
+      "loss": 3.9465,
+      "step": 1247744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9562364915816225e-05,
+      "loss": 3.9487,
+      "step": 1248256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9553978968305705e-05,
+      "loss": 3.9534,
+      "step": 1248768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9545593020795185e-05,
+      "loss": 3.9385,
+      "step": 1249280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9537223452088398e-05,
+      "loss": 3.9497,
+      "step": 1249792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9528837504577878e-05,
+      "loss": 3.9379,
+      "step": 1250304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9520467935871087e-05,
+      "loss": 3.955,
+      "step": 1250816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9512081988360567e-05,
+      "loss": 3.9504,
+      "step": 1251328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9503696040850047e-05,
+      "loss": 3.9515,
+      "step": 1251840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.949531009333953e-05,
+      "loss": 3.9357,
+      "step": 1252352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.948694052463274e-05,
+      "loss": 3.9562,
+      "step": 1252864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.947855457712222e-05,
+      "loss": 3.9334,
+      "step": 1253376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.94701686296117e-05,
+      "loss": 3.9475,
+      "step": 1253888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.946178268210118e-05,
+      "loss": 3.9537,
+      "step": 1254400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.945339673459066e-05,
+      "loss": 3.9549,
+      "step": 1254912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.944501078708014e-05,
+      "loss": 3.9402,
+      "step": 1255424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9436624839569622e-05,
+      "loss": 3.9373,
+      "step": 1255936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9428238892059102e-05,
+      "loss": 3.9407,
+      "step": 1256448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.941986932335231e-05,
+      "loss": 3.9521,
+      "step": 1256960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.941148337584179e-05,
+      "loss": 3.9537,
+      "step": 1257472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.940309742833127e-05,
+      "loss": 3.947,
+      "step": 1257984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.939471148082075e-05,
+      "loss": 3.9568,
+      "step": 1258496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9386341912113964e-05,
+      "loss": 3.9607,
+      "step": 1259008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9377955964603444e-05,
+      "loss": 3.9484,
+      "step": 1259520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9369570017092924e-05,
+      "loss": 3.9511,
+      "step": 1260032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9361184069582404e-05,
+      "loss": 3.9506,
+      "step": 1260544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9352814500875613e-05,
+      "loss": 3.9559,
+      "step": 1261056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9344444932168825e-05,
+      "loss": 3.943,
+      "step": 1261568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9336058984658305e-05,
+      "loss": 3.9479,
+      "step": 1262080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9327673037147785e-05,
+      "loss": 3.9467,
+      "step": 1262592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9319287089637265e-05,
+      "loss": 3.9643,
+      "step": 1263104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9310901142126745e-05,
+      "loss": 3.9515,
+      "step": 1263616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9302515194616225e-05,
+      "loss": 3.9373,
+      "step": 1264128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.92941292471057e-05,
+      "loss": 3.944,
+      "step": 1264640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.928574329959518e-05,
+      "loss": 3.9469,
+      "step": 1265152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.927735735208466e-05,
+      "loss": 3.9542,
+      "step": 1265664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.926898778337787e-05,
+      "loss": 3.9558,
+      "step": 1266176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.9260618214671086e-05,
+      "loss": 3.9469,
+      "step": 1266688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9252232267160566e-05,
+      "loss": 3.9488,
+      "step": 1267200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9243846319650046e-05,
+      "loss": 3.9422,
+      "step": 1267712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9235460372139523e-05,
+      "loss": 3.9364,
+      "step": 1268224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.922709080343274e-05,
+      "loss": 3.935,
+      "step": 1268736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.921870485592222e-05,
+      "loss": 3.9446,
+      "step": 1269248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.92103189084117e-05,
+      "loss": 3.9576,
+      "step": 1269760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9201932960901175e-05,
+      "loss": 3.9487,
+      "step": 1270272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9193547013390655e-05,
+      "loss": 3.9408,
+      "step": 1270784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9185161065880135e-05,
+      "loss": 3.945,
+      "step": 1271296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9176791497173344e-05,
+      "loss": 3.9491,
+      "step": 1271808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9168405549662824e-05,
+      "loss": 3.955,
+      "step": 1272320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9160019602152304e-05,
+      "loss": 3.9511,
+      "step": 1272832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9151633654641784e-05,
+      "loss": 3.9446,
+      "step": 1273344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9143247707131268e-05,
+      "loss": 3.9417,
+      "step": 1273856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9134878138424477e-05,
+      "loss": 3.955,
+      "step": 1274368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9126492190913957e-05,
+      "loss": 3.9333,
+      "step": 1274880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9118106243403437e-05,
+      "loss": 3.9409,
+      "step": 1275392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9109720295892916e-05,
+      "loss": 3.9519,
+      "step": 1275904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.9101334348382396e-05,
+      "loss": 3.9436,
+      "step": 1276416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.909296477967561e-05,
+      "loss": 3.9399,
+      "step": 1276928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.908457883216509e-05,
+      "loss": 3.9459,
+      "step": 1277440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.907619288465457e-05,
+      "loss": 3.9412,
+      "step": 1277952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.906780693714405e-05,
+      "loss": 3.945,
+      "step": 1278464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.905942098963353e-05,
+      "loss": 3.9432,
+      "step": 1278976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.905103504212301e-05,
+      "loss": 3.9417,
+      "step": 1279488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.904264909461249e-05,
+      "loss": 3.9537,
+      "step": 1280000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.903426314710197e-05,
+      "loss": 3.9552,
+      "step": 1280512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.902589357839518e-05,
+      "loss": 3.9458,
+      "step": 1281024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.901750763088466e-05,
+      "loss": 3.9545,
+      "step": 1281536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.900912168337414e-05,
+      "loss": 3.9438,
+      "step": 1282048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.900073573586362e-05,
+      "loss": 3.9464,
+      "step": 1282560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.89923497883531e-05,
+      "loss": 3.9403,
+      "step": 1283072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8983980219646313e-05,
+      "loss": 3.9437,
+      "step": 1283584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8975594272135793e-05,
+      "loss": 3.9451,
+      "step": 1284096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8967208324625273e-05,
+      "loss": 3.9477,
+      "step": 1284608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8958822377114753e-05,
+      "loss": 3.946,
+      "step": 1285120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8950436429604233e-05,
+      "loss": 3.9499,
+      "step": 1285632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8942066860897442e-05,
+      "loss": 3.9497,
+      "step": 1286144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8933697292190655e-05,
+      "loss": 3.9373,
+      "step": 1286656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8925311344680135e-05,
+      "loss": 3.9515,
+      "step": 1287168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8916925397169615e-05,
+      "loss": 3.9521,
+      "step": 1287680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8908539449659095e-05,
+      "loss": 3.9385,
+      "step": 1288192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8900153502148575e-05,
+      "loss": 3.9314,
+      "step": 1288704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8891767554638055e-05,
+      "loss": 3.9398,
+      "step": 1289216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8883381607127535e-05,
+      "loss": 3.9544,
+      "step": 1289728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.887499565961701e-05,
+      "loss": 3.9561,
+      "step": 1290240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.886660971210649e-05,
+      "loss": 3.9405,
+      "step": 1290752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8858240143399707e-05,
+      "loss": 3.9451,
+      "step": 1291264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.884985419588918e-05,
+      "loss": 3.9549,
+      "step": 1291776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.884146824837866e-05,
+      "loss": 3.956,
+      "step": 1292288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.883308230086814e-05,
+      "loss": 3.9467,
+      "step": 1292800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8824696353357623e-05,
+      "loss": 3.9446,
+      "step": 1293312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8816326784650832e-05,
+      "loss": 3.9483,
+      "step": 1293824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8807940837140312e-05,
+      "loss": 3.9476,
+      "step": 1294336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8799554889629792e-05,
+      "loss": 3.9416,
+      "step": 1294848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8791168942119272e-05,
+      "loss": 3.9532,
+      "step": 1295360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8782799373412485e-05,
+      "loss": 3.9378,
+      "step": 1295872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.87744298047057e-05,
+      "loss": 3.9532,
+      "step": 1296384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.876604385719518e-05,
+      "loss": 3.9471,
+      "step": 1296896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.8757657909684654e-05,
+      "loss": 3.9376,
+      "step": 1297408
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.028563499450684,
+      "eval_runtime": 294.3251,
+      "eval_samples_per_second": 1296.495,
+      "eval_steps_per_second": 40.516,
+      "step": 1297440
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8749271962174134e-05,
+      "loss": 3.9509,
+      "step": 1297920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.874090239346735e-05,
+      "loss": 3.9416,
+      "step": 1298432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.873251644595683e-05,
+      "loss": 3.9522,
+      "step": 1298944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8724146877250042e-05,
+      "loss": 3.9468,
+      "step": 1299456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8715760929739522e-05,
+      "loss": 3.9544,
+      "step": 1299968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8707374982229002e-05,
+      "loss": 3.9427,
+      "step": 1300480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8698989034718482e-05,
+      "loss": 3.9372,
+      "step": 1300992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8690603087207955e-05,
+      "loss": 3.9307,
+      "step": 1301504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.868221713969744e-05,
+      "loss": 3.9612,
+      "step": 1302016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.867383119218692e-05,
+      "loss": 3.9509,
+      "step": 1302528
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.86654452446764e-05,
+      "loss": 3.9434,
+      "step": 1303040
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8657092054773343e-05,
+      "loss": 3.9523,
+      "step": 1303552
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8648706107262823e-05,
+      "loss": 3.9417,
+      "step": 1304064
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8640336538556032e-05,
+      "loss": 3.9409,
+      "step": 1304576
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8631950591045516e-05,
+      "loss": 3.9376,
+      "step": 1305088
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8623564643534996e-05,
+      "loss": 3.9399,
+      "step": 1305600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8615178696024476e-05,
+      "loss": 3.942,
+      "step": 1306112
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.860679274851395e-05,
+      "loss": 3.9372,
+      "step": 1306624
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.859840680100343e-05,
+      "loss": 3.9514,
+      "step": 1307136
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.859002085349291e-05,
+      "loss": 3.9606,
+      "step": 1307648
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8581634905982392e-05,
+      "loss": 3.9464,
+      "step": 1308160
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8573248958471872e-05,
+      "loss": 3.9386,
+      "step": 1308672
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8564863010961352e-05,
+      "loss": 3.9453,
+      "step": 1309184
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8556477063450832e-05,
+      "loss": 3.9477,
+      "step": 1309696
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.854810749474404e-05,
+      "loss": 3.9416,
+      "step": 1310208
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.853972154723352e-05,
+      "loss": 3.9355,
+      "step": 1310720
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8531335599723e-05,
+      "loss": 3.938,
+      "step": 1311232
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8522949652212484e-05,
+      "loss": 3.9514,
+      "step": 1311744
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.8514563704701964e-05,
+      "loss": 3.9268,
+      "step": 1312256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8506177757191444e-05,
+      "loss": 3.9351,
+      "step": 1312768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8497791809680924e-05,
+      "loss": 3.9482,
+      "step": 1313280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8489405862170404e-05,
+      "loss": 3.9479,
+      "step": 1313792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8481019914659884e-05,
+      "loss": 3.9524,
+      "step": 1314304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8472633967149364e-05,
+      "loss": 3.9454,
+      "step": 1314816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8464280777246306e-05,
+      "loss": 3.9454,
+      "step": 1315328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8455894829735786e-05,
+      "loss": 3.9495,
+      "step": 1315840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8447508882225266e-05,
+      "loss": 3.9316,
+      "step": 1316352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8439122934714746e-05,
+      "loss": 3.9449,
+      "step": 1316864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8430736987204226e-05,
+      "loss": 3.9317,
+      "step": 1317376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8422351039693706e-05,
+      "loss": 3.9347,
+      "step": 1317888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8413965092183185e-05,
+      "loss": 3.9407,
+      "step": 1318400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.840557914467267e-05,
+      "loss": 3.9414,
+      "step": 1318912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8397193197162142e-05,
+      "loss": 3.9403,
+      "step": 1319424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8388807249651622e-05,
+      "loss": 3.9386,
+      "step": 1319936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8380421302141102e-05,
+      "loss": 3.9423,
+      "step": 1320448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8372035354630582e-05,
+      "loss": 3.9446,
+      "step": 1320960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8363665785923794e-05,
+      "loss": 3.9463,
+      "step": 1321472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8355279838413274e-05,
+      "loss": 3.9332,
+      "step": 1321984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8346893890902754e-05,
+      "loss": 3.9298,
+      "step": 1322496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8338507943392234e-05,
+      "loss": 3.945,
+      "step": 1323008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8330138374685443e-05,
+      "loss": 3.9539,
+      "step": 1323520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.832176880597866e-05,
+      "loss": 3.939,
+      "step": 1324032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.831338285846814e-05,
+      "loss": 3.9397,
+      "step": 1324544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8304996910957616e-05,
+      "loss": 3.9391,
+      "step": 1325056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8296610963447096e-05,
+      "loss": 3.9335,
+      "step": 1325568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8288225015936576e-05,
+      "loss": 3.9391,
+      "step": 1326080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8279839068426056e-05,
+      "loss": 3.9277,
+      "step": 1326592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8271453120915536e-05,
+      "loss": 3.9434,
+      "step": 1327104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8263083552208748e-05,
+      "loss": 3.9416,
+      "step": 1327616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8254697604698228e-05,
+      "loss": 3.9419,
+      "step": 1328128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8246311657187708e-05,
+      "loss": 3.9258,
+      "step": 1328640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8237925709677188e-05,
+      "loss": 3.9481,
+      "step": 1329152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8229539762166668e-05,
+      "loss": 3.9234,
+      "step": 1329664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8221186572263613e-05,
+      "loss": 3.9405,
+      "step": 1330176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.821280062475309e-05,
+      "loss": 3.9451,
+      "step": 1330688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.820441467724257e-05,
+      "loss": 3.9438,
+      "step": 1331200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.819602872973205e-05,
+      "loss": 3.9314,
+      "step": 1331712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.818765916102526e-05,
+      "loss": 3.9291,
+      "step": 1332224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.817927321351474e-05,
+      "loss": 3.931,
+      "step": 1332736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.817088726600422e-05,
+      "loss": 3.9385,
+      "step": 1333248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.81625013184937e-05,
+      "loss": 3.9433,
+      "step": 1333760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.815411537098318e-05,
+      "loss": 3.9393,
+      "step": 1334272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.814572942347266e-05,
+      "loss": 3.946,
+      "step": 1334784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.813734347596214e-05,
+      "loss": 3.9464,
+      "step": 1335296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.812895752845162e-05,
+      "loss": 3.9446,
+      "step": 1335808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.812058795974483e-05,
+      "loss": 3.9311,
+      "step": 1336320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.811220201223431e-05,
+      "loss": 3.9444,
+      "step": 1336832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8103816064723794e-05,
+      "loss": 3.9448,
+      "step": 1337344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8095430117213274e-05,
+      "loss": 3.9293,
+      "step": 1337856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8087060548506483e-05,
+      "loss": 3.9455,
+      "step": 1338368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8078674600995963e-05,
+      "loss": 3.9339,
+      "step": 1338880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8070288653485443e-05,
+      "loss": 3.9567,
+      "step": 1339392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8061902705974923e-05,
+      "loss": 3.9424,
+      "step": 1339904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8053516758464403e-05,
+      "loss": 3.9239,
+      "step": 1340416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8045163568561344e-05,
+      "loss": 3.9407,
+      "step": 1340928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8036793999854553e-05,
+      "loss": 3.933,
+      "step": 1341440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8028408052344033e-05,
+      "loss": 3.946,
+      "step": 1341952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8020022104833517e-05,
+      "loss": 3.9417,
+      "step": 1342464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.8011636157322997e-05,
+      "loss": 3.9392,
+      "step": 1342976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.8003250209812477e-05,
+      "loss": 3.9394,
+      "step": 1343488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7994864262301957e-05,
+      "loss": 3.9321,
+      "step": 1344000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7986478314791437e-05,
+      "loss": 3.9291,
+      "step": 1344512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7978092367280917e-05,
+      "loss": 3.9253,
+      "step": 1345024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7969722798574126e-05,
+      "loss": 3.9377,
+      "step": 1345536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.796133685106361e-05,
+      "loss": 3.945,
+      "step": 1346048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.795295090355309e-05,
+      "loss": 3.9379,
+      "step": 1346560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.794456495604257e-05,
+      "loss": 3.9321,
+      "step": 1347072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.793617900853205e-05,
+      "loss": 3.9323,
+      "step": 1347584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7927809439825258e-05,
+      "loss": 3.9393,
+      "step": 1348096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7919423492314738e-05,
+      "loss": 3.9452,
+      "step": 1348608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7911037544804218e-05,
+      "loss": 3.938,
+      "step": 1349120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.79026515972937e-05,
+      "loss": 3.9364,
+      "step": 1349632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.789426564978318e-05,
+      "loss": 3.9369,
+      "step": 1350144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.788589608107639e-05,
+      "loss": 3.9454,
+      "step": 1350656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.787751013356587e-05,
+      "loss": 3.9228,
+      "step": 1351168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.786914056485908e-05,
+      "loss": 3.9342,
+      "step": 1351680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7860754617348563e-05,
+      "loss": 3.9396,
+      "step": 1352192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7852368669838043e-05,
+      "loss": 3.9345,
+      "step": 1352704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7843982722327523e-05,
+      "loss": 3.9289,
+      "step": 1353216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7835596774817003e-05,
+      "loss": 3.9354,
+      "step": 1353728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7827210827306482e-05,
+      "loss": 3.9331,
+      "step": 1354240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7818824879795962e-05,
+      "loss": 3.9334,
+      "step": 1354752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7810438932285442e-05,
+      "loss": 3.9351,
+      "step": 1355264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.780205298477492e-05,
+      "loss": 3.9326,
+      "step": 1355776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.77936670372644e-05,
+      "loss": 3.9453,
+      "step": 1356288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7785297468557615e-05,
+      "loss": 3.9457,
+      "step": 1356800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7776927899850824e-05,
+      "loss": 3.938,
+      "step": 1357312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7768541952340304e-05,
+      "loss": 3.9414,
+      "step": 1357824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7760156004829784e-05,
+      "loss": 3.9378,
+      "step": 1358336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7751770057319264e-05,
+      "loss": 3.9354,
+      "step": 1358848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7743384109808747e-05,
+      "loss": 3.9312,
+      "step": 1359360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7735014541101956e-05,
+      "loss": 3.932,
+      "step": 1359872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7726628593591436e-05,
+      "loss": 3.9355,
+      "step": 1360384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7718242646080916e-05,
+      "loss": 3.9408,
+      "step": 1360896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7709856698570393e-05,
+      "loss": 3.9372,
+      "step": 1361408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7701470751059873e-05,
+      "loss": 3.9401,
+      "step": 1361920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7693084803549353e-05,
+      "loss": 3.9342,
+      "step": 1362432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7684698856038833e-05,
+      "loss": 3.9373,
+      "step": 1362944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7676312908528313e-05,
+      "loss": 3.9381,
+      "step": 1363456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7667926961017792e-05,
+      "loss": 3.9458,
+      "step": 1363968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7659541013507272e-05,
+      "loss": 3.9311,
+      "step": 1364480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7651155065996752e-05,
+      "loss": 3.9209,
+      "step": 1364992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7642769118486232e-05,
+      "loss": 3.9292,
+      "step": 1365504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7634415928583174e-05,
+      "loss": 3.9468,
+      "step": 1366016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7626029981072654e-05,
+      "loss": 3.9459,
+      "step": 1366528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7617644033562134e-05,
+      "loss": 3.9373,
+      "step": 1367040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7609258086051614e-05,
+      "loss": 3.935,
+      "step": 1367552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7600872138541094e-05,
+      "loss": 3.9474,
+      "step": 1368064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7592486191030574e-05,
+      "loss": 3.9437,
+      "step": 1368576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7584116622323786e-05,
+      "loss": 3.9374,
+      "step": 1369088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7575730674813266e-05,
+      "loss": 3.935,
+      "step": 1369600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7567344727302746e-05,
+      "loss": 3.9369,
+      "step": 1370112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7558958779792226e-05,
+      "loss": 3.9384,
+      "step": 1370624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.755058921108544e-05,
+      "loss": 3.9295,
+      "step": 1371136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7542219642378648e-05,
+      "loss": 3.9446,
+      "step": 1371648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7533833694868128e-05,
+      "loss": 3.9302,
+      "step": 1372160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7525447747357608e-05,
+      "loss": 3.9422,
+      "step": 1372672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7517061799847088e-05,
+      "loss": 3.9373,
+      "step": 1373184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.7508675852336567e-05,
+      "loss": 3.9283,
+      "step": 1373696
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.023937702178955,
+      "eval_runtime": 293.5365,
+      "eval_samples_per_second": 1299.978,
+      "eval_steps_per_second": 40.625,
+      "step": 1373760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7500289904826047e-05,
+      "loss": 3.9442,
+      "step": 1374208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7491903957315527e-05,
+      "loss": 3.9345,
+      "step": 1374720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.748351800980501e-05,
+      "loss": 3.9405,
+      "step": 1375232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.747513206229449e-05,
+      "loss": 3.9377,
+      "step": 1375744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.746674611478397e-05,
+      "loss": 3.9462,
+      "step": 1376256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.745836016727345e-05,
+      "loss": 3.9348,
+      "step": 1376768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.744997421976293e-05,
+      "loss": 3.9285,
+      "step": 1377280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7441588272252404e-05,
+      "loss": 3.9195,
+      "step": 1377792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7433202324741887e-05,
+      "loss": 3.9526,
+      "step": 1378304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7424816377231367e-05,
+      "loss": 3.9422,
+      "step": 1378816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7416430429720847e-05,
+      "loss": 3.931,
+      "step": 1379328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7408044482210327e-05,
+      "loss": 3.9476,
+      "step": 1379840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7399674913503536e-05,
+      "loss": 3.9305,
+      "step": 1380352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7391288965993016e-05,
+      "loss": 3.9362,
+      "step": 1380864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7382903018482496e-05,
+      "loss": 3.93,
+      "step": 1381376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.737451707097198e-05,
+      "loss": 3.9288,
+      "step": 1381888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.736613112346146e-05,
+      "loss": 3.936,
+      "step": 1382400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.735774517595094e-05,
+      "loss": 3.9243,
+      "step": 1382912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.734935922844042e-05,
+      "loss": 3.9411,
+      "step": 1383424
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.73409732809299e-05,
+      "loss": 3.9476,
+      "step": 1383936
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.733258733341938e-05,
+      "loss": 3.9362,
+      "step": 1384448
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.732420138590886e-05,
+      "loss": 3.9336,
+      "step": 1384960
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.731583181720207e-05,
+      "loss": 3.9369,
+      "step": 1385472
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.730744586969155e-05,
+      "loss": 3.9372,
+      "step": 1385984
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.729905992218103e-05,
+      "loss": 3.9306,
+      "step": 1386496
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.729067397467051e-05,
+      "loss": 3.9313,
+      "step": 1387008
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.728228802715999e-05,
+      "loss": 3.9232,
+      "step": 1387520
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7273902079649465e-05,
+      "loss": 3.9439,
+      "step": 1388032
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.7265516132138948e-05,
+      "loss": 3.9159,
+      "step": 1388544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7257130184628428e-05,
+      "loss": 3.9297,
+      "step": 1389056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7248760615921644e-05,
+      "loss": 3.9366,
+      "step": 1389568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7240374668411117e-05,
+      "loss": 3.9403,
+      "step": 1390080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7231988720900597e-05,
+      "loss": 3.9379,
+      "step": 1390592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7223602773390077e-05,
+      "loss": 3.9369,
+      "step": 1391104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7215233204683293e-05,
+      "loss": 3.9344,
+      "step": 1391616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7206863635976505e-05,
+      "loss": 3.9411,
+      "step": 1392128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7198477688465985e-05,
+      "loss": 3.9247,
+      "step": 1392640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7190091740955465e-05,
+      "loss": 3.934,
+      "step": 1393152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7181705793444938e-05,
+      "loss": 3.921,
+      "step": 1393664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7173319845934418e-05,
+      "loss": 3.9283,
+      "step": 1394176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.71649338984239e-05,
+      "loss": 3.9267,
+      "step": 1394688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.715654795091338e-05,
+      "loss": 3.9373,
+      "step": 1395200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.714816200340286e-05,
+      "loss": 3.9311,
+      "step": 1395712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.713979243469607e-05,
+      "loss": 3.931,
+      "step": 1396224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.713140648718555e-05,
+      "loss": 3.935,
+      "step": 1396736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.712302053967503e-05,
+      "loss": 3.9338,
+      "step": 1397248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.711463459216451e-05,
+      "loss": 3.9355,
+      "step": 1397760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.710624864465399e-05,
+      "loss": 3.9252,
+      "step": 1398272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7097862697143474e-05,
+      "loss": 3.9166,
+      "step": 1398784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7089493128436683e-05,
+      "loss": 3.9396,
+      "step": 1399296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7081107180926163e-05,
+      "loss": 3.9402,
+      "step": 1399808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7072721233415643e-05,
+      "loss": 3.9319,
+      "step": 1400320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7064335285905123e-05,
+      "loss": 3.9321,
+      "step": 1400832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7055949338394603e-05,
+      "loss": 3.9248,
+      "step": 1401344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7047563390884083e-05,
+      "loss": 3.9267,
+      "step": 1401856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7039193822177295e-05,
+      "loss": 3.9307,
+      "step": 1402368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7030807874666775e-05,
+      "loss": 3.9193,
+      "step": 1402880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7022421927156255e-05,
+      "loss": 3.934,
+      "step": 1403392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7014035979645735e-05,
+      "loss": 3.9331,
+      "step": 1403904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.7005650032135215e-05,
+      "loss": 3.9285,
+      "step": 1404416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6997264084624695e-05,
+      "loss": 3.9181,
+      "step": 1404928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6988878137114175e-05,
+      "loss": 3.9383,
+      "step": 1405440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6980492189603658e-05,
+      "loss": 3.917,
+      "step": 1405952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6972122620896867e-05,
+      "loss": 3.9309,
+      "step": 1406464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6963736673386347e-05,
+      "loss": 3.9315,
+      "step": 1406976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6955350725875827e-05,
+      "loss": 3.9353,
+      "step": 1407488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6946964778365304e-05,
+      "loss": 3.9229,
+      "step": 1408000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6938578830854784e-05,
+      "loss": 3.9239,
+      "step": 1408512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6930209262148e-05,
+      "loss": 3.9144,
+      "step": 1409024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.692182331463748e-05,
+      "loss": 3.9321,
+      "step": 1409536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6913437367126953e-05,
+      "loss": 3.9343,
+      "step": 1410048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6905051419616433e-05,
+      "loss": 3.9312,
+      "step": 1410560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.689668185090965e-05,
+      "loss": 3.935,
+      "step": 1411072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6888295903399125e-05,
+      "loss": 3.9379,
+      "step": 1411584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.687992633469234e-05,
+      "loss": 3.9416,
+      "step": 1412096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.687155676598555e-05,
+      "loss": 3.9219,
+      "step": 1412608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.686317081847503e-05,
+      "loss": 3.936,
+      "step": 1413120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.685478487096451e-05,
+      "loss": 3.9381,
+      "step": 1413632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.684639892345399e-05,
+      "loss": 3.9219,
+      "step": 1414144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6838012975943473e-05,
+      "loss": 3.9348,
+      "step": 1414656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6829627028432953e-05,
+      "loss": 3.9234,
+      "step": 1415168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6821241080922426e-05,
+      "loss": 3.9483,
+      "step": 1415680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6812855133411906e-05,
+      "loss": 3.9354,
+      "step": 1416192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6804485564705122e-05,
+      "loss": 3.9146,
+      "step": 1416704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.67960996171946e-05,
+      "loss": 3.929,
+      "step": 1417216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.678771366968408e-05,
+      "loss": 3.927,
+      "step": 1417728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.677932772217356e-05,
+      "loss": 3.936,
+      "step": 1418240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.677094177466304e-05,
+      "loss": 3.9338,
+      "step": 1418752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.6762572205956248e-05,
+      "loss": 3.9295,
+      "step": 1419264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6754186258445728e-05,
+      "loss": 3.929,
+      "step": 1419776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.674580031093521e-05,
+      "loss": 3.9251,
+      "step": 1420288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.673741436342469e-05,
+      "loss": 3.9227,
+      "step": 1420800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.672902841591417e-05,
+      "loss": 3.9142,
+      "step": 1421312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.672064246840365e-05,
+      "loss": 3.9293,
+      "step": 1421824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.671225652089313e-05,
+      "loss": 3.9383,
+      "step": 1422336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.670388695218634e-05,
+      "loss": 3.9318,
+      "step": 1422848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.669550100467582e-05,
+      "loss": 3.9211,
+      "step": 1423360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6687115057165303e-05,
+      "loss": 3.9251,
+      "step": 1423872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6678729109654783e-05,
+      "loss": 3.9288,
+      "step": 1424384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6670343162144263e-05,
+      "loss": 3.9406,
+      "step": 1424896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6661973593437472e-05,
+      "loss": 3.9309,
+      "step": 1425408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6653587645926952e-05,
+      "loss": 3.9283,
+      "step": 1425920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6645201698416432e-05,
+      "loss": 3.9249,
+      "step": 1426432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6636815750905912e-05,
+      "loss": 3.9366,
+      "step": 1426944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6628446182199125e-05,
+      "loss": 3.9111,
+      "step": 1427456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6620060234688605e-05,
+      "loss": 3.9283,
+      "step": 1427968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6611674287178085e-05,
+      "loss": 3.9255,
+      "step": 1428480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6603288339667565e-05,
+      "loss": 3.9307,
+      "step": 1428992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6594902392157044e-05,
+      "loss": 3.9197,
+      "step": 1429504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6586532823450257e-05,
+      "loss": 3.9276,
+      "step": 1430016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6578146875939737e-05,
+      "loss": 3.9241,
+      "step": 1430528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6569760928429217e-05,
+      "loss": 3.9248,
+      "step": 1431040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6561374980918697e-05,
+      "loss": 3.9271,
+      "step": 1431552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6552989033408177e-05,
+      "loss": 3.9249,
+      "step": 1432064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6544619464701386e-05,
+      "loss": 3.9341,
+      "step": 1432576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6536233517190866e-05,
+      "loss": 3.9369,
+      "step": 1433088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6527847569680346e-05,
+      "loss": 3.9309,
+      "step": 1433600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.651946162216983e-05,
+      "loss": 3.9329,
+      "step": 1434112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.651107567465931e-05,
+      "loss": 3.9271,
+      "step": 1434624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6502706105952518e-05,
+      "loss": 3.9243,
+      "step": 1435136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6494320158441998e-05,
+      "loss": 3.9257,
+      "step": 1435648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6485934210931478e-05,
+      "loss": 3.926,
+      "step": 1436160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.647756464222469e-05,
+      "loss": 3.9232,
+      "step": 1436672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.646917869471417e-05,
+      "loss": 3.935,
+      "step": 1437184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.646079274720365e-05,
+      "loss": 3.9245,
+      "step": 1437696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.645240679969313e-05,
+      "loss": 3.9335,
+      "step": 1438208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.644402085218261e-05,
+      "loss": 3.9282,
+      "step": 1438720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6435634904672084e-05,
+      "loss": 3.9251,
+      "step": 1439232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6427248957161567e-05,
+      "loss": 3.9261,
+      "step": 1439744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6418863009651047e-05,
+      "loss": 3.9389,
+      "step": 1440256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6410477062140527e-05,
+      "loss": 3.9213,
+      "step": 1440768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6402091114630007e-05,
+      "loss": 3.9182,
+      "step": 1441280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6393705167119487e-05,
+      "loss": 3.9155,
+      "step": 1441792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6385319219608967e-05,
+      "loss": 3.9328,
+      "step": 1442304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.637696602970591e-05,
+      "loss": 3.9369,
+      "step": 1442816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6368580082195388e-05,
+      "loss": 3.9328,
+      "step": 1443328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6360194134684868e-05,
+      "loss": 3.9265,
+      "step": 1443840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6351808187174348e-05,
+      "loss": 3.9391,
+      "step": 1444352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6343422239663828e-05,
+      "loss": 3.9295,
+      "step": 1444864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6335036292153308e-05,
+      "loss": 3.9321,
+      "step": 1445376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6326650344642788e-05,
+      "loss": 3.9232,
+      "step": 1445888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6318264397132268e-05,
+      "loss": 3.9308,
+      "step": 1446400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.630989482842548e-05,
+      "loss": 3.9296,
+      "step": 1446912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.630150888091496e-05,
+      "loss": 3.9199,
+      "step": 1447424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.629312293340444e-05,
+      "loss": 3.9356,
+      "step": 1447936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.628473698589392e-05,
+      "loss": 3.927,
+      "step": 1448448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.62763510383834e-05,
+      "loss": 3.9285,
+      "step": 1448960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6267997848480342e-05,
+      "loss": 3.9291,
+      "step": 1449472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.6259611900969822e-05,
+      "loss": 3.9186,
+      "step": 1449984
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.020461082458496,
+      "eval_runtime": 286.1154,
+      "eval_samples_per_second": 1333.696,
+      "eval_steps_per_second": 41.679,
+      "step": 1450080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6251225953459302e-05,
+      "loss": 3.9339,
+      "step": 1450496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6242840005948782e-05,
+      "loss": 3.9269,
+      "step": 1451008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.623447043724199e-05,
+      "loss": 3.9299,
+      "step": 1451520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6226084489731474e-05,
+      "loss": 3.9283,
+      "step": 1452032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6217698542220954e-05,
+      "loss": 3.9429,
+      "step": 1452544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6209312594710434e-05,
+      "loss": 3.9251,
+      "step": 1453056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6200926647199914e-05,
+      "loss": 3.9203,
+      "step": 1453568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6192557078493123e-05,
+      "loss": 3.9126,
+      "step": 1454080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6184171130982603e-05,
+      "loss": 3.9384,
+      "step": 1454592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6175785183472083e-05,
+      "loss": 3.9374,
+      "step": 1455104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6167399235961566e-05,
+      "loss": 3.9182,
+      "step": 1455616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6159013288451046e-05,
+      "loss": 3.9374,
+      "step": 1456128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6150643719744256e-05,
+      "loss": 3.9233,
+      "step": 1456640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6142257772233735e-05,
+      "loss": 3.9253,
+      "step": 1457152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6133871824723215e-05,
+      "loss": 3.9239,
+      "step": 1457664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6125485877212695e-05,
+      "loss": 3.9199,
+      "step": 1458176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6117116308505908e-05,
+      "loss": 3.926,
+      "step": 1458688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6108730360995388e-05,
+      "loss": 3.9183,
+      "step": 1459200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6100344413484868e-05,
+      "loss": 3.9324,
+      "step": 1459712
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6091958465974348e-05,
+      "loss": 3.9378,
+      "step": 1460224
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6083572518463828e-05,
+      "loss": 3.9335,
+      "step": 1460736
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6075186570953308e-05,
+      "loss": 3.9257,
+      "step": 1461248
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6066800623442788e-05,
+      "loss": 3.9248,
+      "step": 1461760
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6058414675932268e-05,
+      "loss": 3.9344,
+      "step": 1462272
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6050028728421744e-05,
+      "loss": 3.9205,
+      "step": 1462784
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6041642780911224e-05,
+      "loss": 3.9229,
+      "step": 1463296
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.603327321220444e-05,
+      "loss": 3.9146,
+      "step": 1463808
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.602488726469392e-05,
+      "loss": 3.9346,
+      "step": 1464320
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.6016501317183396e-05,
+      "loss": 3.9115,
+      "step": 1464832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.6008115369672876e-05,
+      "loss": 3.9202,
+      "step": 1465344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5999729422162356e-05,
+      "loss": 3.9306,
+      "step": 1465856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5991343474651836e-05,
+      "loss": 3.928,
+      "step": 1466368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5982957527141316e-05,
+      "loss": 3.9305,
+      "step": 1466880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5974571579630796e-05,
+      "loss": 3.9309,
+      "step": 1467392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.596621838972774e-05,
+      "loss": 3.9267,
+      "step": 1467904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5957832442217218e-05,
+      "loss": 3.9325,
+      "step": 1468416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5949446494706698e-05,
+      "loss": 3.9189,
+      "step": 1468928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5941060547196178e-05,
+      "loss": 3.9178,
+      "step": 1469440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5932674599685658e-05,
+      "loss": 3.9159,
+      "step": 1469952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5924305030978867e-05,
+      "loss": 3.9183,
+      "step": 1470464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.591591908346835e-05,
+      "loss": 3.9179,
+      "step": 1470976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.590753313595783e-05,
+      "loss": 3.9307,
+      "step": 1471488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.589914718844731e-05,
+      "loss": 3.9204,
+      "step": 1472000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.589076124093679e-05,
+      "loss": 3.9219,
+      "step": 1472512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.588237529342627e-05,
+      "loss": 3.9309,
+      "step": 1473024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.587398934591575e-05,
+      "loss": 3.9195,
+      "step": 1473536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.586560339840523e-05,
+      "loss": 3.933,
+      "step": 1474048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.585723382969844e-05,
+      "loss": 3.9169,
+      "step": 1474560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5848847882187922e-05,
+      "loss": 3.9022,
+      "step": 1475072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5840461934677402e-05,
+      "loss": 3.9378,
+      "step": 1475584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5832075987166882e-05,
+      "loss": 3.9293,
+      "step": 1476096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.582370641846009e-05,
+      "loss": 3.9289,
+      "step": 1476608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.581532047094957e-05,
+      "loss": 3.9198,
+      "step": 1477120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.580693452343905e-05,
+      "loss": 3.9177,
+      "step": 1477632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5798564954732264e-05,
+      "loss": 3.9177,
+      "step": 1478144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5790195386025473e-05,
+      "loss": 3.9228,
+      "step": 1478656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5781809438514953e-05,
+      "loss": 3.9134,
+      "step": 1479168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5773423491004433e-05,
+      "loss": 3.9204,
+      "step": 1479680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5765037543493913e-05,
+      "loss": 3.9304,
+      "step": 1480192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5756651595983393e-05,
+      "loss": 3.9189,
+      "step": 1480704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5748265648472876e-05,
+      "loss": 3.9097,
+      "step": 1481216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5739879700962356e-05,
+      "loss": 3.9321,
+      "step": 1481728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5731493753451836e-05,
+      "loss": 3.9087,
+      "step": 1482240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5723107805941316e-05,
+      "loss": 3.9168,
+      "step": 1482752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5714738237234525e-05,
+      "loss": 3.9279,
+      "step": 1483264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5706352289724005e-05,
+      "loss": 3.9272,
+      "step": 1483776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5697966342213485e-05,
+      "loss": 3.9124,
+      "step": 1484288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5689580394702968e-05,
+      "loss": 3.9165,
+      "step": 1484800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5681194447192448e-05,
+      "loss": 3.9017,
+      "step": 1485312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5672808499681928e-05,
+      "loss": 3.9296,
+      "step": 1485824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.56644225521714e-05,
+      "loss": 3.9234,
+      "step": 1486336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.565603660466088e-05,
+      "loss": 3.9233,
+      "step": 1486848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5647667035954097e-05,
+      "loss": 3.9227,
+      "step": 1487360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5639281088443577e-05,
+      "loss": 3.9292,
+      "step": 1487872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5630895140933054e-05,
+      "loss": 3.9353,
+      "step": 1488384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.562252557222627e-05,
+      "loss": 3.9126,
+      "step": 1488896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.561413962471575e-05,
+      "loss": 3.9263,
+      "step": 1489408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5605753677205223e-05,
+      "loss": 3.9313,
+      "step": 1489920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5597367729694706e-05,
+      "loss": 3.9105,
+      "step": 1490432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5588981782184186e-05,
+      "loss": 3.9266,
+      "step": 1490944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5580595834673666e-05,
+      "loss": 3.917,
+      "step": 1491456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5572209887163146e-05,
+      "loss": 3.9351,
+      "step": 1491968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5563823939652626e-05,
+      "loss": 3.9292,
+      "step": 1492480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5555454370945835e-05,
+      "loss": 3.9095,
+      "step": 1492992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5547068423435315e-05,
+      "loss": 3.9215,
+      "step": 1493504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5538682475924798e-05,
+      "loss": 3.9165,
+      "step": 1494016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5530296528414278e-05,
+      "loss": 3.9293,
+      "step": 1494528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5521910580903758e-05,
+      "loss": 3.922,
+      "step": 1495040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.5513557391000696e-05,
+      "loss": 3.9206,
+      "step": 1495552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5505171443490176e-05,
+      "loss": 3.9203,
+      "step": 1496064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.549678549597966e-05,
+      "loss": 3.9205,
+      "step": 1496576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.548839954846914e-05,
+      "loss": 3.9154,
+      "step": 1497088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.548002997976235e-05,
+      "loss": 3.9063,
+      "step": 1497600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.547164403225183e-05,
+      "loss": 3.9184,
+      "step": 1498112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.546325808474131e-05,
+      "loss": 3.9294,
+      "step": 1498624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.545487213723079e-05,
+      "loss": 3.9277,
+      "step": 1499136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.544648618972027e-05,
+      "loss": 3.9095,
+      "step": 1499648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5438100242209752e-05,
+      "loss": 3.9179,
+      "step": 1500160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5429714294699232e-05,
+      "loss": 3.9185,
+      "step": 1500672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.542134472599244e-05,
+      "loss": 3.9299,
+      "step": 1501184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.541295877848192e-05,
+      "loss": 3.9231,
+      "step": 1501696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.54045728309714e-05,
+      "loss": 3.9211,
+      "step": 1502208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.539618688346088e-05,
+      "loss": 3.914,
+      "step": 1502720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5387817314754093e-05,
+      "loss": 3.9297,
+      "step": 1503232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5379431367243573e-05,
+      "loss": 3.9053,
+      "step": 1503744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5371045419733053e-05,
+      "loss": 3.9207,
+      "step": 1504256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5362659472222533e-05,
+      "loss": 3.9151,
+      "step": 1504768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5354273524712013e-05,
+      "loss": 3.9205,
+      "step": 1505280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5345903956005222e-05,
+      "loss": 3.9134,
+      "step": 1505792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5337518008494706e-05,
+      "loss": 3.9246,
+      "step": 1506304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5329132060984185e-05,
+      "loss": 3.9139,
+      "step": 1506816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5320746113473665e-05,
+      "loss": 3.9161,
+      "step": 1507328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5312360165963145e-05,
+      "loss": 3.9174,
+      "step": 1507840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5303990597256355e-05,
+      "loss": 3.9174,
+      "step": 1508352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5295604649745834e-05,
+      "loss": 3.9267,
+      "step": 1508864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5287218702235314e-05,
+      "loss": 3.9297,
+      "step": 1509376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5278832754724794e-05,
+      "loss": 3.9216,
+      "step": 1509888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5270446807214278e-05,
+      "loss": 3.9263,
+      "step": 1510400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5262077238507487e-05,
+      "loss": 3.9204,
+      "step": 1510912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5253707669800696e-05,
+      "loss": 3.9155,
+      "step": 1511424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5245321722290176e-05,
+      "loss": 3.9161,
+      "step": 1511936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.523693577477966e-05,
+      "loss": 3.9194,
+      "step": 1512448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.522854982726914e-05,
+      "loss": 3.9173,
+      "step": 1512960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.522016387975862e-05,
+      "loss": 3.9272,
+      "step": 1513472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.52117779322481e-05,
+      "loss": 3.9175,
+      "step": 1513984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.520339198473758e-05,
+      "loss": 3.9263,
+      "step": 1514496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.519500603722706e-05,
+      "loss": 3.9164,
+      "step": 1515008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5186636468520268e-05,
+      "loss": 3.9175,
+      "step": 1515520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5178250521009748e-05,
+      "loss": 3.9169,
+      "step": 1516032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.516986457349923e-05,
+      "loss": 3.9306,
+      "step": 1516544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.516147862598871e-05,
+      "loss": 3.9169,
+      "step": 1517056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5153092678478185e-05,
+      "loss": 3.9113,
+      "step": 1517568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5144706730967664e-05,
+      "loss": 3.9065,
+      "step": 1518080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5136320783457144e-05,
+      "loss": 3.9239,
+      "step": 1518592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.512795121475036e-05,
+      "loss": 3.9281,
+      "step": 1519104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5119565267239837e-05,
+      "loss": 3.9268,
+      "step": 1519616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5111179319729317e-05,
+      "loss": 3.9225,
+      "step": 1520128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5102793372218797e-05,
+      "loss": 3.9312,
+      "step": 1520640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5094407424708277e-05,
+      "loss": 3.9224,
+      "step": 1521152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5086021477197757e-05,
+      "loss": 3.922,
+      "step": 1521664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5077635529687237e-05,
+      "loss": 3.9176,
+      "step": 1522176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5069249582176717e-05,
+      "loss": 3.9217,
+      "step": 1522688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.506088001346993e-05,
+      "loss": 3.9231,
+      "step": 1523200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.505249406595941e-05,
+      "loss": 3.9097,
+      "step": 1523712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5044124497252618e-05,
+      "loss": 3.9279,
+      "step": 1524224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5035738549742098e-05,
+      "loss": 3.9225,
+      "step": 1524736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.5027352602231578e-05,
+      "loss": 3.9159,
+      "step": 1525248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.501896665472106e-05,
+      "loss": 3.9183,
+      "step": 1525760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.501058070721054e-05,
+      "loss": 3.9169,
+      "step": 1526272
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.017267227172852,
+      "eval_runtime": 286.8235,
+      "eval_samples_per_second": 1330.403,
+      "eval_steps_per_second": 41.576,
+      "step": 1526400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.500219475970002e-05,
+      "loss": 3.9233,
+      "step": 1526784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.49938088121895e-05,
+      "loss": 3.9198,
+      "step": 1527296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.498542286467898e-05,
+      "loss": 3.9237,
+      "step": 1527808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4977036917168458e-05,
+      "loss": 3.9177,
+      "step": 1528320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4968650969657938e-05,
+      "loss": 3.9339,
+      "step": 1528832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4960265022147418e-05,
+      "loss": 3.9197,
+      "step": 1529344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.49518790746369e-05,
+      "loss": 3.911,
+      "step": 1529856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.494349312712638e-05,
+      "loss": 3.9093,
+      "step": 1530368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.493510717961586e-05,
+      "loss": 3.925,
+      "step": 1530880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4926721232105338e-05,
+      "loss": 3.9275,
+      "step": 1531392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4918335284594817e-05,
+      "loss": 3.9135,
+      "step": 1531904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4909949337084297e-05,
+      "loss": 3.9264,
+      "step": 1532416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4901596147181242e-05,
+      "loss": 3.9184,
+      "step": 1532928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4893210199670722e-05,
+      "loss": 3.9189,
+      "step": 1533440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4884824252160202e-05,
+      "loss": 3.9138,
+      "step": 1533952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4876438304649682e-05,
+      "loss": 3.9125,
+      "step": 1534464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4868052357139162e-05,
+      "loss": 3.9163,
+      "step": 1534976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.485966640962864e-05,
+      "loss": 3.9095,
+      "step": 1535488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4851280462118122e-05,
+      "loss": 3.925,
+      "step": 1536000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4842894514607602e-05,
+      "loss": 3.9279,
+      "step": 1536512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4834508567097082e-05,
+      "loss": 3.9271,
+      "step": 1537024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4826122619586562e-05,
+      "loss": 3.9208,
+      "step": 1537536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4817736672076042e-05,
+      "loss": 3.9163,
+      "step": 1538048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.480935072456552e-05,
+      "loss": 3.9236,
+      "step": 1538560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4800964777055e-05,
+      "loss": 3.9156,
+      "step": 1539072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.479257882954448e-05,
+      "loss": 3.9154,
+      "step": 1539584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4784192882033962e-05,
+      "loss": 3.9061,
+      "step": 1540096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4775806934523442e-05,
+      "loss": 3.9212,
+      "step": 1540608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4767453744620383e-05,
+      "loss": 3.9086,
+      "step": 1541120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.4759067797109863e-05,
+      "loss": 3.906,
+      "step": 1541632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4750681849599343e-05,
+      "loss": 3.9234,
+      "step": 1542144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4742295902088823e-05,
+      "loss": 3.9179,
+      "step": 1542656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4733909954578303e-05,
+      "loss": 3.9251,
+      "step": 1543168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4725540385871516e-05,
+      "loss": 3.9243,
+      "step": 1543680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4717154438360992e-05,
+      "loss": 3.9141,
+      "step": 1544192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4708768490850472e-05,
+      "loss": 3.9262,
+      "step": 1544704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4700382543339952e-05,
+      "loss": 3.9151,
+      "step": 1545216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4691996595829432e-05,
+      "loss": 3.9079,
+      "step": 1545728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4683610648318915e-05,
+      "loss": 3.9052,
+      "step": 1546240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4675224700808395e-05,
+      "loss": 3.908,
+      "step": 1546752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4666838753297875e-05,
+      "loss": 3.9114,
+      "step": 1547264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4658469184591084e-05,
+      "loss": 3.9247,
+      "step": 1547776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4650083237080564e-05,
+      "loss": 3.9122,
+      "step": 1548288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4641713668373777e-05,
+      "loss": 3.9124,
+      "step": 1548800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4633327720863257e-05,
+      "loss": 3.9235,
+      "step": 1549312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4624941773352737e-05,
+      "loss": 3.9159,
+      "step": 1549824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4616555825842217e-05,
+      "loss": 3.9219,
+      "step": 1550336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4608169878331697e-05,
+      "loss": 3.9097,
+      "step": 1550848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4599783930821173e-05,
+      "loss": 3.893,
+      "step": 1551360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4591414362114386e-05,
+      "loss": 3.9279,
+      "step": 1551872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.458302841460387e-05,
+      "loss": 3.9256,
+      "step": 1552384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.457464246709335e-05,
+      "loss": 3.9201,
+      "step": 1552896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4566256519582826e-05,
+      "loss": 3.9125,
+      "step": 1553408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4557870572072306e-05,
+      "loss": 3.911,
+      "step": 1553920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4549501003365518e-05,
+      "loss": 3.9078,
+      "step": 1554432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4541115055854998e-05,
+      "loss": 3.9129,
+      "step": 1554944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4532729108344478e-05,
+      "loss": 3.9089,
+      "step": 1555456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4524343160833958e-05,
+      "loss": 3.9122,
+      "step": 1555968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4515957213323438e-05,
+      "loss": 3.9234,
+      "step": 1556480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4507571265812918e-05,
+      "loss": 3.9135,
+      "step": 1556992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4499185318302398e-05,
+      "loss": 3.902,
+      "step": 1557504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4490799370791878e-05,
+      "loss": 3.9245,
+      "step": 1558016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4482429802085087e-05,
+      "loss": 3.9009,
+      "step": 1558528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.447404385457457e-05,
+      "loss": 3.9047,
+      "step": 1559040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.446565790706405e-05,
+      "loss": 3.9233,
+      "step": 1559552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.445727195955353e-05,
+      "loss": 3.9232,
+      "step": 1560064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4448886012043007e-05,
+      "loss": 3.9021,
+      "step": 1560576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.444051644333622e-05,
+      "loss": 3.91,
+      "step": 1561088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.44321304958257e-05,
+      "loss": 3.8955,
+      "step": 1561600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.442374454831518e-05,
+      "loss": 3.9185,
+      "step": 1562112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.441537497960839e-05,
+      "loss": 3.9204,
+      "step": 1562624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.440698903209787e-05,
+      "loss": 3.9106,
+      "step": 1563136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.439860308458735e-05,
+      "loss": 3.9174,
+      "step": 1563648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4390217137076828e-05,
+      "loss": 3.9202,
+      "step": 1564160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4381831189566308e-05,
+      "loss": 3.9329,
+      "step": 1564672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4373445242055788e-05,
+      "loss": 3.9044,
+      "step": 1565184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.436505929454527e-05,
+      "loss": 3.919,
+      "step": 1565696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.435667334703475e-05,
+      "loss": 3.9229,
+      "step": 1566208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.434828739952423e-05,
+      "loss": 3.9012,
+      "step": 1566720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.433991783081744e-05,
+      "loss": 3.9217,
+      "step": 1567232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.433153188330692e-05,
+      "loss": 3.9091,
+      "step": 1567744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4323162314600133e-05,
+      "loss": 3.9263,
+      "step": 1568256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4314776367089613e-05,
+      "loss": 3.9209,
+      "step": 1568768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4306390419579093e-05,
+      "loss": 3.9035,
+      "step": 1569280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4298004472068573e-05,
+      "loss": 3.9088,
+      "step": 1569792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4289618524558053e-05,
+      "loss": 3.9096,
+      "step": 1570304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4281232577047533e-05,
+      "loss": 3.9269,
+      "step": 1570816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.427286300834074e-05,
+      "loss": 3.9137,
+      "step": 1571328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.4264477060830225e-05,
+      "loss": 3.9113,
+      "step": 1571840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4256091113319705e-05,
+      "loss": 3.911,
+      "step": 1572352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4247705165809185e-05,
+      "loss": 3.9159,
+      "step": 1572864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.423931921829866e-05,
+      "loss": 3.9098,
+      "step": 1573376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.423093327078814e-05,
+      "loss": 3.8956,
+      "step": 1573888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.422254732327762e-05,
+      "loss": 3.9135,
+      "step": 1574400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4214177754570834e-05,
+      "loss": 3.9181,
+      "step": 1574912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4205791807060314e-05,
+      "loss": 3.9198,
+      "step": 1575424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4197405859549794e-05,
+      "loss": 3.8997,
+      "step": 1575936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4189019912039274e-05,
+      "loss": 3.9107,
+      "step": 1576448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4180650343332483e-05,
+      "loss": 3.9126,
+      "step": 1576960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4172264395821963e-05,
+      "loss": 3.9188,
+      "step": 1577472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4163878448311443e-05,
+      "loss": 3.9183,
+      "step": 1577984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4155492500800926e-05,
+      "loss": 3.9126,
+      "step": 1578496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4147106553290406e-05,
+      "loss": 3.9072,
+      "step": 1579008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4138736984583615e-05,
+      "loss": 3.9214,
+      "step": 1579520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4130351037073095e-05,
+      "loss": 3.8983,
+      "step": 1580032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4121965089562575e-05,
+      "loss": 3.913,
+      "step": 1580544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4113579142052055e-05,
+      "loss": 3.904,
+      "step": 1581056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4105193194541535e-05,
+      "loss": 3.9167,
+      "step": 1581568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4096823625834747e-05,
+      "loss": 3.9001,
+      "step": 1582080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4088437678324227e-05,
+      "loss": 3.9188,
+      "step": 1582592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4080051730813707e-05,
+      "loss": 3.9057,
+      "step": 1583104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4071665783303187e-05,
+      "loss": 3.9086,
+      "step": 1583616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4063279835792664e-05,
+      "loss": 3.9091,
+      "step": 1584128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.405491026708588e-05,
+      "loss": 3.9119,
+      "step": 1584640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.404652431957536e-05,
+      "loss": 3.9164,
+      "step": 1585152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.403813837206484e-05,
+      "loss": 3.925,
+      "step": 1585664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4029752424554316e-05,
+      "loss": 3.9171,
+      "step": 1586176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.4021366477043796e-05,
+      "loss": 3.9177,
+      "step": 1586688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.401299690833701e-05,
+      "loss": 3.912,
+      "step": 1587200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.400462733963022e-05,
+      "loss": 3.9088,
+      "step": 1587712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.39962413921197e-05,
+      "loss": 3.9042,
+      "step": 1588224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.398785544460918e-05,
+      "loss": 3.9171,
+      "step": 1588736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.397946949709866e-05,
+      "loss": 3.9071,
+      "step": 1589248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3971083549588138e-05,
+      "loss": 3.9176,
+      "step": 1589760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3962697602077618e-05,
+      "loss": 3.9112,
+      "step": 1590272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.39543116545671e-05,
+      "loss": 3.9161,
+      "step": 1590784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.394592570705658e-05,
+      "loss": 3.9113,
+      "step": 1591296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.393753975954606e-05,
+      "loss": 3.9118,
+      "step": 1591808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.392915381203554e-05,
+      "loss": 3.9068,
+      "step": 1592320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.392076786452502e-05,
+      "loss": 3.9221,
+      "step": 1592832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.391239829581823e-05,
+      "loss": 3.9131,
+      "step": 1593344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.390401234830771e-05,
+      "loss": 3.9047,
+      "step": 1593856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.389562640079719e-05,
+      "loss": 3.8978,
+      "step": 1594368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.388724045328667e-05,
+      "loss": 3.9168,
+      "step": 1594880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.387885450577615e-05,
+      "loss": 3.9202,
+      "step": 1595392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.387046855826563e-05,
+      "loss": 3.9177,
+      "step": 1595904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3862098989558842e-05,
+      "loss": 3.9155,
+      "step": 1596416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.385371304204832e-05,
+      "loss": 3.9217,
+      "step": 1596928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3845327094537802e-05,
+      "loss": 3.9139,
+      "step": 1597440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3836941147027282e-05,
+      "loss": 3.9195,
+      "step": 1597952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3828555199516762e-05,
+      "loss": 3.9073,
+      "step": 1598464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3820169252006242e-05,
+      "loss": 3.9153,
+      "step": 1598976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3811783304495722e-05,
+      "loss": 3.9118,
+      "step": 1599488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.38033973569852e-05,
+      "loss": 3.906,
+      "step": 1600000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.379502778827841e-05,
+      "loss": 3.9182,
+      "step": 1600512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3786641840767894e-05,
+      "loss": 3.9154,
+      "step": 1601024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3778272272061103e-05,
+      "loss": 3.911,
+      "step": 1601536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3769886324550583e-05,
+      "loss": 3.9104,
+      "step": 1602048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.3761500377040063e-05,
+      "loss": 3.9113,
+      "step": 1602560
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.0143280029296875,
+      "eval_runtime": 310.8282,
+      "eval_samples_per_second": 1227.659,
+      "eval_steps_per_second": 38.365,
+      "step": 1602720
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3753114429529543e-05,
+      "loss": 3.9143,
+      "step": 1603072
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3744728482019023e-05,
+      "loss": 3.9132,
+      "step": 1603584
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3736342534508503e-05,
+      "loss": 3.9157,
+      "step": 1604096
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3727956586997983e-05,
+      "loss": 3.9086,
+      "step": 1604608
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3719570639487463e-05,
+      "loss": 3.9251,
+      "step": 1605120
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3711201070780672e-05,
+      "loss": 3.9132,
+      "step": 1605632
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3702831502073885e-05,
+      "loss": 3.9041,
+      "step": 1606144
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3694445554563365e-05,
+      "loss": 3.9005,
+      "step": 1606656
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3686059607052848e-05,
+      "loss": 3.9169,
+      "step": 1607168
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3677673659542324e-05,
+      "loss": 3.9204,
+      "step": 1607680
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3669287712031804e-05,
+      "loss": 3.9095,
+      "step": 1608192
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3660901764521284e-05,
+      "loss": 3.9172,
+      "step": 1608704
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3652532195814497e-05,
+      "loss": 3.9126,
+      "step": 1609216
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3644146248303973e-05,
+      "loss": 3.9103,
+      "step": 1609728
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3635760300793457e-05,
+      "loss": 3.9074,
+      "step": 1610240
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3627374353282937e-05,
+      "loss": 3.9013,
+      "step": 1610752
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3618988405772417e-05,
+      "loss": 3.9107,
+      "step": 1611264
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3610618837065626e-05,
+      "loss": 3.9025,
+      "step": 1611776
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3602232889555106e-05,
+      "loss": 3.9172,
+      "step": 1612288
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3593846942044586e-05,
+      "loss": 3.9202,
+      "step": 1612800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3585460994534066e-05,
+      "loss": 3.9227,
+      "step": 1613312
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.357707504702355e-05,
+      "loss": 3.9113,
+      "step": 1613824
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.356868909951303e-05,
+      "loss": 3.9114,
+      "step": 1614336
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3560303152002505e-05,
+      "loss": 3.9134,
+      "step": 1614848
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3551933583295718e-05,
+      "loss": 3.9066,
+      "step": 1615360
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3543547635785198e-05,
+      "loss": 3.9117,
+      "step": 1615872
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3535161688274678e-05,
+      "loss": 3.8953,
+      "step": 1616384
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3526775740764158e-05,
+      "loss": 3.9177,
+      "step": 1616896
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.3518389793253638e-05,
+      "loss": 3.9031,
+      "step": 1617408
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.351002022454685e-05,
+      "loss": 3.8981,
+      "step": 1617920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3501634277036327e-05,
+      "loss": 3.9096,
+      "step": 1618432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3493248329525807e-05,
+      "loss": 3.9185,
+      "step": 1618944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3484862382015287e-05,
+      "loss": 3.9197,
+      "step": 1619456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3476476434504767e-05,
+      "loss": 3.9156,
+      "step": 1619968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3468123244601712e-05,
+      "loss": 3.9058,
+      "step": 1620480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.345973729709119e-05,
+      "loss": 3.9188,
+      "step": 1620992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.345135134958067e-05,
+      "loss": 3.9102,
+      "step": 1621504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.344296540207015e-05,
+      "loss": 3.8982,
+      "step": 1622016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3434579454559628e-05,
+      "loss": 3.8995,
+      "step": 1622528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.342619350704911e-05,
+      "loss": 3.8985,
+      "step": 1623040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.341780755953859e-05,
+      "loss": 3.9054,
+      "step": 1623552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.340942161202807e-05,
+      "loss": 3.9153,
+      "step": 1624064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.340103566451755e-05,
+      "loss": 3.9079,
+      "step": 1624576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.339264971700703e-05,
+      "loss": 3.9024,
+      "step": 1625088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3384263769496508e-05,
+      "loss": 3.9155,
+      "step": 1625600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3375877821985988e-05,
+      "loss": 3.9053,
+      "step": 1626112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3367491874475468e-05,
+      "loss": 3.9191,
+      "step": 1626624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3359122305768684e-05,
+      "loss": 3.9014,
+      "step": 1627136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.335073635825816e-05,
+      "loss": 3.8819,
+      "step": 1627648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.334235041074764e-05,
+      "loss": 3.9234,
+      "step": 1628160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3333980842040853e-05,
+      "loss": 3.9147,
+      "step": 1628672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3325594894530333e-05,
+      "loss": 3.914,
+      "step": 1629184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3317208947019813e-05,
+      "loss": 3.9091,
+      "step": 1629696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3308822999509293e-05,
+      "loss": 3.9044,
+      "step": 1630208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3300453430802505e-05,
+      "loss": 3.9016,
+      "step": 1630720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.329206748329198e-05,
+      "loss": 3.905,
+      "step": 1631232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.328368153578146e-05,
+      "loss": 3.9027,
+      "step": 1631744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.327529558827094e-05,
+      "loss": 3.9055,
+      "step": 1632256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.326690964076042e-05,
+      "loss": 3.913,
+      "step": 1632768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3258523693249905e-05,
+      "loss": 3.9059,
+      "step": 1633280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3250137745739385e-05,
+      "loss": 3.8959,
+      "step": 1633792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3241751798228865e-05,
+      "loss": 3.9143,
+      "step": 1634304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.323336585071834e-05,
+      "loss": 3.8972,
+      "step": 1634816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.322497990320782e-05,
+      "loss": 3.8964,
+      "step": 1635328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3216610334501034e-05,
+      "loss": 3.916,
+      "step": 1635840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3208224386990514e-05,
+      "loss": 3.9138,
+      "step": 1636352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3199838439479994e-05,
+      "loss": 3.8958,
+      "step": 1636864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3191452491969474e-05,
+      "loss": 3.9027,
+      "step": 1637376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3183066544458954e-05,
+      "loss": 3.8882,
+      "step": 1637888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3174680596948434e-05,
+      "loss": 3.9079,
+      "step": 1638400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3166311028241643e-05,
+      "loss": 3.9108,
+      "step": 1638912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3157925080731126e-05,
+      "loss": 3.9082,
+      "step": 1639424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3149539133220606e-05,
+      "loss": 3.9077,
+      "step": 1639936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3141153185710086e-05,
+      "loss": 3.9152,
+      "step": 1640448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3132783617003295e-05,
+      "loss": 3.9213,
+      "step": 1640960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3124397669492775e-05,
+      "loss": 3.8966,
+      "step": 1641472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3116011721982255e-05,
+      "loss": 3.9155,
+      "step": 1641984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3107625774471735e-05,
+      "loss": 3.9136,
+      "step": 1642496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3099256205764947e-05,
+      "loss": 3.8962,
+      "step": 1643008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3090870258254427e-05,
+      "loss": 3.9158,
+      "step": 1643520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3082500689547636e-05,
+      "loss": 3.9003,
+      "step": 1644032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3074114742037116e-05,
+      "loss": 3.9192,
+      "step": 1644544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3065728794526596e-05,
+      "loss": 3.9135,
+      "step": 1645056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.305734284701608e-05,
+      "loss": 3.8987,
+      "step": 1645568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.304895689950556e-05,
+      "loss": 3.8987,
+      "step": 1646080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.304057095199504e-05,
+      "loss": 3.9033,
+      "step": 1646592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.303218500448452e-05,
+      "loss": 3.9195,
+      "step": 1647104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.3023799056973996e-05,
+      "loss": 3.9042,
+      "step": 1647616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 2.301542948826721e-05,
+      "loss": 3.9055,
+      "step": 1648128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.300704354075669e-05,
+      "loss": 3.9033,
+      "step": 1648640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.299865759324617e-05,
+      "loss": 3.9071,
+      "step": 1649152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.299027164573565e-05,
+      "loss": 3.9022,
+      "step": 1649664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.298188569822513e-05,
+      "loss": 3.8874,
+      "step": 1650176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.297349975071461e-05,
+      "loss": 3.9064,
+      "step": 1650688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2965130182007817e-05,
+      "loss": 3.9127,
+      "step": 1651200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2956744234497297e-05,
+      "loss": 3.9125,
+      "step": 1651712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.294835828698678e-05,
+      "loss": 3.8978,
+      "step": 1652224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.293997233947626e-05,
+      "loss": 3.9006,
+      "step": 1652736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.293158639196574e-05,
+      "loss": 3.9032,
+      "step": 1653248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.292321682325895e-05,
+      "loss": 3.9092,
+      "step": 1653760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.291483087574843e-05,
+      "loss": 3.9154,
+      "step": 1654272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.290644492823791e-05,
+      "loss": 3.9,
+      "step": 1654784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.289805898072739e-05,
+      "loss": 3.905,
+      "step": 1655296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2889673033216873e-05,
+      "loss": 3.9112,
+      "step": 1655808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2881303464510082e-05,
+      "loss": 3.8926,
+      "step": 1656320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2872917516999562e-05,
+      "loss": 3.9027,
+      "step": 1656832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2864531569489042e-05,
+      "loss": 3.8957,
+      "step": 1657344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2856145621978522e-05,
+      "loss": 3.9101,
+      "step": 1657856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2847776053271734e-05,
+      "loss": 3.893,
+      "step": 1658368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2839390105761214e-05,
+      "loss": 3.9107,
+      "step": 1658880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2831004158250694e-05,
+      "loss": 3.8993,
+      "step": 1659392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2822634589543903e-05,
+      "loss": 3.9033,
+      "step": 1659904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2814248642033383e-05,
+      "loss": 3.902,
+      "step": 1660416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2805862694522863e-05,
+      "loss": 3.9063,
+      "step": 1660928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2797476747012343e-05,
+      "loss": 3.9046,
+      "step": 1661440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2789090799501823e-05,
+      "loss": 3.9201,
+      "step": 1661952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2780721230795036e-05,
+      "loss": 3.9106,
+      "step": 1662464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2772335283284516e-05,
+      "loss": 3.9116,
+      "step": 1662976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2763949335773996e-05,
+      "loss": 3.9014,
+      "step": 1663488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2755563388263472e-05,
+      "loss": 3.9067,
+      "step": 1664000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2747177440752952e-05,
+      "loss": 3.8994,
+      "step": 1664512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2738791493242435e-05,
+      "loss": 3.9049,
+      "step": 1665024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2730405545731915e-05,
+      "loss": 3.9031,
+      "step": 1665536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2722019598221395e-05,
+      "loss": 3.9097,
+      "step": 1666048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2713650029514604e-05,
+      "loss": 3.9049,
+      "step": 1666560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2705264082004084e-05,
+      "loss": 3.9042,
+      "step": 1667072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2696878134493564e-05,
+      "loss": 3.9033,
+      "step": 1667584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2688492186983044e-05,
+      "loss": 3.908,
+      "step": 1668096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2680106239472528e-05,
+      "loss": 3.9002,
+      "step": 1668608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2671720291962004e-05,
+      "loss": 3.9103,
+      "step": 1669120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2663350723255217e-05,
+      "loss": 3.9054,
+      "step": 1669632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2654964775744697e-05,
+      "loss": 3.8995,
+      "step": 1670144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2646578828234177e-05,
+      "loss": 3.8853,
+      "step": 1670656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2638192880723653e-05,
+      "loss": 3.913,
+      "step": 1671168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.262982331201687e-05,
+      "loss": 3.911,
+      "step": 1671680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.262143736450635e-05,
+      "loss": 3.9106,
+      "step": 1672192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2613051416995826e-05,
+      "loss": 3.9072,
+      "step": 1672704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2604665469485306e-05,
+      "loss": 3.9109,
+      "step": 1673216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2596279521974786e-05,
+      "loss": 3.9044,
+      "step": 1673728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2587893574464265e-05,
+      "loss": 3.9169,
+      "step": 1674240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2579507626953745e-05,
+      "loss": 3.8992,
+      "step": 1674752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2571138058246958e-05,
+      "loss": 3.9068,
+      "step": 1675264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2562752110736438e-05,
+      "loss": 3.9047,
+      "step": 1675776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2554366163225918e-05,
+      "loss": 3.9025,
+      "step": 1676288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2545980215715398e-05,
+      "loss": 3.9131,
+      "step": 1676800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2537594268204878e-05,
+      "loss": 3.9032,
+      "step": 1677312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2529208320694358e-05,
+      "loss": 3.9099,
+      "step": 1677824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2520822373183838e-05,
+      "loss": 3.8989,
+      "step": 1678336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.2512436425673318e-05,
+      "loss": 3.9058,
+      "step": 1678848
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.011335372924805,
+      "eval_runtime": 295.1907,
+      "eval_samples_per_second": 1292.693,
+      "eval_steps_per_second": 40.398,
+      "step": 1679040
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.250406685696653e-05,
+      "loss": 3.9088,
+      "step": 1679360
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.249569728825974e-05,
+      "loss": 3.9056,
+      "step": 1679872
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.248731134074922e-05,
+      "loss": 3.9087,
+      "step": 1680384
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.24789253932387e-05,
+      "loss": 3.9021,
+      "step": 1680896
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2470539445728182e-05,
+      "loss": 3.9181,
+      "step": 1681408
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.246215349821766e-05,
+      "loss": 3.9063,
+      "step": 1681920
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.245376755070714e-05,
+      "loss": 3.8993,
+      "step": 1682432
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.244538160319662e-05,
+      "loss": 3.8921,
+      "step": 1682944
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.24369956556861e-05,
+      "loss": 3.9114,
+      "step": 1683456
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2428626086979308e-05,
+      "loss": 3.9125,
+      "step": 1683968
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.242024013946879e-05,
+      "loss": 3.9019,
+      "step": 1684480
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.241185419195827e-05,
+      "loss": 3.9101,
+      "step": 1684992
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.240348462325148e-05,
+      "loss": 3.9048,
+      "step": 1685504
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.239509867574096e-05,
+      "loss": 3.9061,
+      "step": 1686016
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.238671272823044e-05,
+      "loss": 3.8974,
+      "step": 1686528
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.237832678071992e-05,
+      "loss": 3.8961,
+      "step": 1687040
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.23699408332094e-05,
+      "loss": 3.9037,
+      "step": 1687552
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2361571264502613e-05,
+      "loss": 3.8982,
+      "step": 1688064
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2353185316992093e-05,
+      "loss": 3.9077,
+      "step": 1688576
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2344799369481573e-05,
+      "loss": 3.913,
+      "step": 1689088
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2336413421971053e-05,
+      "loss": 3.9158,
+      "step": 1689600
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2328027474460533e-05,
+      "loss": 3.9074,
+      "step": 1690112
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2319641526950012e-05,
+      "loss": 3.904,
+      "step": 1690624
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2311255579439492e-05,
+      "loss": 3.9042,
+      "step": 1691136
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2302869631928972e-05,
+      "loss": 3.9032,
+      "step": 1691648
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2294483684418452e-05,
+      "loss": 3.9043,
+      "step": 1692160
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2286097736907932e-05,
+      "loss": 3.8849,
+      "step": 1692672
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.227772816820114e-05,
+      "loss": 3.9128,
+      "step": 1693184
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.226934222069062e-05,
+      "loss": 3.8986,
+      "step": 1693696
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 2.2260956273180105e-05,
+      "loss": 3.892,
+      "step": 1694208
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2252570325669585e-05,
+      "loss": 3.9017,
+      "step": 1694720
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2244184378159065e-05,
+      "loss": 3.912,
+      "step": 1695232
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2235798430648545e-05,
+      "loss": 3.9075,
+      "step": 1695744
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.222741248313802e-05,
+      "loss": 3.9082,
+      "step": 1696256
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2219042914431234e-05,
+      "loss": 3.9002,
+      "step": 1696768
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2210673345724446e-05,
+      "loss": 3.911,
+      "step": 1697280
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2202287398213926e-05,
+      "loss": 3.9023,
+      "step": 1697792
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2193901450703406e-05,
+      "loss": 3.892,
+      "step": 1698304
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2185515503192886e-05,
+      "loss": 3.8964,
+      "step": 1698816
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2177129555682366e-05,
+      "loss": 3.8943,
+      "step": 1699328
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2168743608171842e-05,
+      "loss": 3.9,
+      "step": 1699840
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2160357660661322e-05,
+      "loss": 3.9113,
+      "step": 1700352
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2151971713150806e-05,
+      "loss": 3.8978,
+      "step": 1700864
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2143585765640286e-05,
+      "loss": 3.8979,
+      "step": 1701376
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2135199818129766e-05,
+      "loss": 3.9028,
+      "step": 1701888
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2126813870619246e-05,
+      "loss": 3.8973,
+      "step": 1702400
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2118427923108726e-05,
+      "loss": 3.9136,
+      "step": 1702912
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2110058354401935e-05,
+      "loss": 3.8992,
+      "step": 1703424
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2101672406891415e-05,
+      "loss": 3.8721,
+      "step": 1703936
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2093302838184627e-05,
+      "loss": 3.9166,
+      "step": 1704448
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2084916890674107e-05,
+      "loss": 3.9085,
+      "step": 1704960
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2076530943163587e-05,
+      "loss": 3.9069,
+      "step": 1705472
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2068144995653067e-05,
+      "loss": 3.9006,
+      "step": 1705984
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2059759048142547e-05,
+      "loss": 3.8967,
+      "step": 1706496
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2051373100632024e-05,
+      "loss": 3.8926,
+      "step": 1707008
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.204300353192524e-05,
+      "loss": 3.8999,
+      "step": 1707520
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.203461758441472e-05,
+      "loss": 3.8955,
+      "step": 1708032
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.20262316369042e-05,
+      "loss": 3.8941,
+      "step": 1708544
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2017845689393676e-05,
+      "loss": 3.9064,
+      "step": 1709056
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2009459741883156e-05,
+      "loss": 3.9015,
+      "step": 1709568
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.2001073794372636e-05,
+      "loss": 3.8889,
+      "step": 1710080
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1992687846862116e-05,
+      "loss": 3.9076,
+      "step": 1710592
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.19843018993516e-05,
+      "loss": 3.8871,
+      "step": 1711104
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1975932330644808e-05,
+      "loss": 3.893,
+      "step": 1711616
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.196756276193802e-05,
+      "loss": 3.907,
+      "step": 1712128
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1959176814427497e-05,
+      "loss": 3.9069,
+      "step": 1712640
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1950807245720713e-05,
+      "loss": 3.8874,
+      "step": 1713152
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1942421298210193e-05,
+      "loss": 3.8934,
+      "step": 1713664
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1934035350699673e-05,
+      "loss": 3.8817,
+      "step": 1714176
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.192564940318915e-05,
+      "loss": 3.9031,
+      "step": 1714688
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.191726345567863e-05,
+      "loss": 3.9048,
+      "step": 1715200
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.190887750816811e-05,
+      "loss": 3.9025,
+      "step": 1715712
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.190049156065759e-05,
+      "loss": 3.8996,
+      "step": 1716224
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.189210561314707e-05,
+      "loss": 3.9047,
+      "step": 1716736
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1883719665636553e-05,
+      "loss": 3.915,
+      "step": 1717248
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.187533371812603e-05,
+      "loss": 3.8937,
+      "step": 1717760
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.186694777061551e-05,
+      "loss": 3.9058,
+      "step": 1718272
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.185856182310499e-05,
+      "loss": 3.9055,
+      "step": 1718784
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1850192254398202e-05,
+      "loss": 3.8904,
+      "step": 1719296
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1841806306887678e-05,
+      "loss": 3.9037,
+      "step": 1719808
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.183342035937716e-05,
+      "loss": 3.9008,
+      "step": 1720320
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.182503441186664e-05,
+      "loss": 3.9061,
+      "step": 1720832
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.181664846435612e-05,
+      "loss": 3.9086,
+      "step": 1721344
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.18082625168456e-05,
+      "loss": 3.8936,
+      "step": 1721856
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.179989294813881e-05,
+      "loss": 3.8931,
+      "step": 1722368
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.179150700062829e-05,
+      "loss": 3.8977,
+      "step": 1722880
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1783137431921503e-05,
+      "loss": 3.9069,
+      "step": 1723392
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1774751484410983e-05,
+      "loss": 3.8992,
+      "step": 1723904
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.1766365536900463e-05,
+      "loss": 3.9002,
+      "step": 1724416
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1757979589389943e-05,
+      "loss": 3.8991,
+      "step": 1724928
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1749593641879423e-05,
+      "loss": 3.9026,
+      "step": 1725440
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1741207694368903e-05,
+      "loss": 3.897,
+      "step": 1725952
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1732821746858383e-05,
+      "loss": 3.8808,
+      "step": 1726464
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1724435799347863e-05,
+      "loss": 3.8997,
+      "step": 1726976
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1716066230641075e-05,
+      "loss": 3.9023,
+      "step": 1727488
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1707680283130555e-05,
+      "loss": 3.9052,
+      "step": 1728000
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1699294335620035e-05,
+      "loss": 3.8915,
+      "step": 1728512
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.169090838810951e-05,
+      "loss": 3.8932,
+      "step": 1729024
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.168252244059899e-05,
+      "loss": 3.9003,
+      "step": 1729536
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.167413649308847e-05,
+      "loss": 3.9,
+      "step": 1730048
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1665766924381684e-05,
+      "loss": 3.9108,
+      "step": 1730560
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1657397355674897e-05,
+      "loss": 3.8941,
+      "step": 1731072
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1649011408164377e-05,
+      "loss": 3.8991,
+      "step": 1731584
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1640625460653856e-05,
+      "loss": 3.9037,
+      "step": 1732096
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1632239513143336e-05,
+      "loss": 3.8866,
+      "step": 1732608
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1623853565632816e-05,
+      "loss": 3.8971,
+      "step": 1733120
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1615467618122296e-05,
+      "loss": 3.8854,
+      "step": 1733632
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1607081670611776e-05,
+      "loss": 3.9081,
+      "step": 1734144
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1598695723101256e-05,
+      "loss": 3.8872,
+      "step": 1734656
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1590326154394465e-05,
+      "loss": 3.9028,
+      "step": 1735168
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1581940206883945e-05,
+      "loss": 3.8931,
+      "step": 1735680
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1573554259373425e-05,
+      "loss": 3.8957,
+      "step": 1736192
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.156516831186291e-05,
+      "loss": 3.8961,
+      "step": 1736704
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.155678236435239e-05,
+      "loss": 3.9023,
+      "step": 1737216
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1548396416841865e-05,
+      "loss": 3.8962,
+      "step": 1737728
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1540026848135078e-05,
+      "loss": 3.9145,
+      "step": 1738240
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1531640900624558e-05,
+      "loss": 3.8962,
+      "step": 1738752
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1523254953114038e-05,
+      "loss": 3.9118,
+      "step": 1739264
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1514869005603517e-05,
+      "loss": 3.8962,
+      "step": 1739776
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1506483058092997e-05,
+      "loss": 3.897,
+      "step": 1740288
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1498097110582477e-05,
+      "loss": 3.8974,
+      "step": 1740800
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1489711163071957e-05,
+      "loss": 3.8956,
+      "step": 1741312
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1481325215561437e-05,
+      "loss": 3.894,
+      "step": 1741824
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1472939268050917e-05,
+      "loss": 3.9034,
+      "step": 1742336
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1464586078147862e-05,
+      "loss": 3.8972,
+      "step": 1742848
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.145620013063734e-05,
+      "loss": 3.9021,
+      "step": 1743360
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.144781418312682e-05,
+      "loss": 3.8974,
+      "step": 1743872
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.14394282356163e-05,
+      "loss": 3.9022,
+      "step": 1744384
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.143104228810578e-05,
+      "loss": 3.8972,
+      "step": 1744896
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.142265634059526e-05,
+      "loss": 3.9013,
+      "step": 1745408
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.141427039308474e-05,
+      "loss": 3.8971,
+      "step": 1745920
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.140588444557422e-05,
+      "loss": 3.8963,
+      "step": 1746432
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.139751487686743e-05,
+      "loss": 3.8792,
+      "step": 1746944
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.138912892935691e-05,
+      "loss": 3.9058,
+      "step": 1747456
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.138074298184639e-05,
+      "loss": 3.9034,
+      "step": 1747968
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1372357034335868e-05,
+      "loss": 3.9058,
+      "step": 1748480
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.136398746562908e-05,
+      "loss": 3.8976,
+      "step": 1748992
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1355617896922293e-05,
+      "loss": 3.9077,
+      "step": 1749504
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1347231949411772e-05,
+      "loss": 3.9026,
+      "step": 1750016
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1338846001901252e-05,
+      "loss": 3.9096,
+      "step": 1750528
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1330460054390732e-05,
+      "loss": 3.8903,
+      "step": 1751040
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1322090485683945e-05,
+      "loss": 3.8963,
+      "step": 1751552
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1313704538173425e-05,
+      "loss": 3.8997,
+      "step": 1752064
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1305318590662905e-05,
+      "loss": 3.8999,
+      "step": 1752576
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1296932643152385e-05,
+      "loss": 3.9016,
+      "step": 1753088
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1288546695641865e-05,
+      "loss": 3.8991,
+      "step": 1753600
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1280177126935074e-05,
+      "loss": 3.9018,
+      "step": 1754112
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1271791179424554e-05,
+      "loss": 3.8927,
+      "step": 1754624
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 2.1263405231914034e-05,
+      "loss": 3.9044,
+      "step": 1755136
+    },
+    {
+      "epoch": 2.03,
+      "eval_loss": 4.008820056915283,
+      "eval_runtime": 316.8047,
+      "eval_samples_per_second": 1204.499,
+      "eval_steps_per_second": 37.641,
+      "step": 1755360
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1255019284403517e-05,
+      "loss": 3.9052,
+      "step": 1755648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1246633336892994e-05,
+      "loss": 3.9033,
+      "step": 1756160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1238247389382474e-05,
+      "loss": 3.8958,
+      "step": 1756672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1229861441871954e-05,
+      "loss": 3.8959,
+      "step": 1757184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1221475494361433e-05,
+      "loss": 3.9088,
+      "step": 1757696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1213089546850913e-05,
+      "loss": 3.9018,
+      "step": 1758208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1204703599340393e-05,
+      "loss": 3.8957,
+      "step": 1758720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1196317651829877e-05,
+      "loss": 3.8898,
+      "step": 1759232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1187931704319353e-05,
+      "loss": 3.9004,
+      "step": 1759744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1179545756808833e-05,
+      "loss": 3.9091,
+      "step": 1760256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1171159809298313e-05,
+      "loss": 3.8954,
+      "step": 1760768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1162773861787793e-05,
+      "loss": 3.9019,
+      "step": 1761280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1154420671884738e-05,
+      "loss": 3.9042,
+      "step": 1761792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1146034724374218e-05,
+      "loss": 3.8955,
+      "step": 1762304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1137648776863698e-05,
+      "loss": 3.8925,
+      "step": 1762816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1129262829353175e-05,
+      "loss": 3.8903,
+      "step": 1763328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1120876881842655e-05,
+      "loss": 3.8977,
+      "step": 1763840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1112490934332135e-05,
+      "loss": 3.8927,
+      "step": 1764352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1104104986821615e-05,
+      "loss": 3.8973,
+      "step": 1764864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1095719039311094e-05,
+      "loss": 3.9089,
+      "step": 1765376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1087333091800578e-05,
+      "loss": 3.9083,
+      "step": 1765888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1078947144290058e-05,
+      "loss": 3.9017,
+      "step": 1766400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1070561196779534e-05,
+      "loss": 3.8954,
+      "step": 1766912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1062175249269014e-05,
+      "loss": 3.8951,
+      "step": 1767424
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1053789301758494e-05,
+      "loss": 3.8977,
+      "step": 1767936
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1045419733051703e-05,
+      "loss": 3.8968,
+      "step": 1768448
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1037033785541187e-05,
+      "loss": 3.8801,
+      "step": 1768960
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1028647838030667e-05,
+      "loss": 3.9041,
+      "step": 1769472
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1020261890520147e-05,
+      "loss": 3.8943,
+      "step": 1769984
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.1011875943009627e-05,
+      "loss": 3.8871,
+      "step": 1770496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.1003506374302836e-05,
+      "loss": 3.8897,
+      "step": 1771008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0995120426792316e-05,
+      "loss": 3.9077,
+      "step": 1771520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0986734479281796e-05,
+      "loss": 3.9024,
+      "step": 1772032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.097834853177128e-05,
+      "loss": 3.9004,
+      "step": 1772544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0969978963064488e-05,
+      "loss": 3.8933,
+      "step": 1773056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.09616093943577e-05,
+      "loss": 3.9025,
+      "step": 1773568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0953223446847177e-05,
+      "loss": 3.8993,
+      "step": 1774080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0944837499336657e-05,
+      "loss": 3.8865,
+      "step": 1774592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.093645155182614e-05,
+      "loss": 3.8873,
+      "step": 1775104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.092806560431562e-05,
+      "loss": 3.8844,
+      "step": 1775616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.09196796568051e-05,
+      "loss": 3.8927,
+      "step": 1776128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.091129370929458e-05,
+      "loss": 3.9011,
+      "step": 1776640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.090290776178406e-05,
+      "loss": 3.8949,
+      "step": 1777152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0894521814273537e-05,
+      "loss": 3.8903,
+      "step": 1777664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0886135866763017e-05,
+      "loss": 3.8985,
+      "step": 1778176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0877766298056233e-05,
+      "loss": 3.8921,
+      "step": 1778688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.086938035054571e-05,
+      "loss": 3.9022,
+      "step": 1779200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.086099440303519e-05,
+      "loss": 3.8957,
+      "step": 1779712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.085260845552467e-05,
+      "loss": 3.8674,
+      "step": 1780224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.084422250801415e-05,
+      "loss": 3.9065,
+      "step": 1780736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.083585293930736e-05,
+      "loss": 3.9031,
+      "step": 1781248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.082746699179684e-05,
+      "loss": 3.9048,
+      "step": 1781760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.081908104428632e-05,
+      "loss": 3.8928,
+      "step": 1782272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.08106950967758e-05,
+      "loss": 3.887,
+      "step": 1782784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.080232552806901e-05,
+      "loss": 3.8912,
+      "step": 1783296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.079393958055849e-05,
+      "loss": 3.8942,
+      "step": 1783808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.078555363304797e-05,
+      "loss": 3.8936,
+      "step": 1784320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.077716768553745e-05,
+      "loss": 3.8836,
+      "step": 1784832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0768781738026934e-05,
+      "loss": 3.9013,
+      "step": 1785344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0760395790516414e-05,
+      "loss": 3.8968,
+      "step": 1785856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.075200984300589e-05,
+      "loss": 3.8799,
+      "step": 1786368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.074362389549537e-05,
+      "loss": 3.9009,
+      "step": 1786880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.073523794798485e-05,
+      "loss": 3.8822,
+      "step": 1787392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0726868379278063e-05,
+      "loss": 3.8845,
+      "step": 1787904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0718498810571275e-05,
+      "loss": 3.898,
+      "step": 1788416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0710112863060755e-05,
+      "loss": 3.9024,
+      "step": 1788928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0701726915550235e-05,
+      "loss": 3.8853,
+      "step": 1789440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0693340968039715e-05,
+      "loss": 3.8897,
+      "step": 1789952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.068495502052919e-05,
+      "loss": 3.8745,
+      "step": 1790464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.067656907301867e-05,
+      "loss": 3.8967,
+      "step": 1790976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0668215883115616e-05,
+      "loss": 3.897,
+      "step": 1791488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0659829935605096e-05,
+      "loss": 3.8955,
+      "step": 1792000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0651443988094576e-05,
+      "loss": 3.8919,
+      "step": 1792512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0643058040584056e-05,
+      "loss": 3.8982,
+      "step": 1793024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0634672093073536e-05,
+      "loss": 3.9125,
+      "step": 1793536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0626286145563016e-05,
+      "loss": 3.8833,
+      "step": 1794048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0617900198052496e-05,
+      "loss": 3.9017,
+      "step": 1794560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0609514250541976e-05,
+      "loss": 3.8998,
+      "step": 1795072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0601128303031456e-05,
+      "loss": 3.8844,
+      "step": 1795584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0592742355520936e-05,
+      "loss": 3.9002,
+      "step": 1796096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0584356408010416e-05,
+      "loss": 3.8957,
+      "step": 1796608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0575986839303625e-05,
+      "loss": 3.8992,
+      "step": 1797120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.056760089179311e-05,
+      "loss": 3.9063,
+      "step": 1797632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.055921494428259e-05,
+      "loss": 3.882,
+      "step": 1798144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.055082899677207e-05,
+      "loss": 3.889,
+      "step": 1798656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0542443049261545e-05,
+      "loss": 3.8895,
+      "step": 1799168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0534057101751025e-05,
+      "loss": 3.8995,
+      "step": 1799680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0525671154240505e-05,
+      "loss": 3.8915,
+      "step": 1800192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 2.0517301585533717e-05,
+      "loss": 3.8992,
+      "step": 1800704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0508915638023197e-05,
+      "loss": 3.894,
+      "step": 1801216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0500529690512677e-05,
+      "loss": 3.8911,
+      "step": 1801728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0492143743002157e-05,
+      "loss": 3.8918,
+      "step": 1802240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0483757795491637e-05,
+      "loss": 3.8746,
+      "step": 1802752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0475371847981117e-05,
+      "loss": 3.8912,
+      "step": 1803264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0467002279274326e-05,
+      "loss": 3.8969,
+      "step": 1803776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.045861633176381e-05,
+      "loss": 3.9008,
+      "step": 1804288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.045023038425329e-05,
+      "loss": 3.8849,
+      "step": 1804800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.044184443674277e-05,
+      "loss": 3.887,
+      "step": 1805312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.043345848923225e-05,
+      "loss": 3.8918,
+      "step": 1805824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.042508892052546e-05,
+      "loss": 3.894,
+      "step": 1806336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.041670297301494e-05,
+      "loss": 3.9022,
+      "step": 1806848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.040831702550442e-05,
+      "loss": 3.8925,
+      "step": 1807360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0399931077993902e-05,
+      "loss": 3.8949,
+      "step": 1807872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.039154513048338e-05,
+      "loss": 3.8924,
+      "step": 1808384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0383159182972858e-05,
+      "loss": 3.8869,
+      "step": 1808896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0374773235462338e-05,
+      "loss": 3.8881,
+      "step": 1809408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0366387287951818e-05,
+      "loss": 3.8812,
+      "step": 1809920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0358017719245027e-05,
+      "loss": 3.9029,
+      "step": 1810432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.034963177173451e-05,
+      "loss": 3.8826,
+      "step": 1810944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.034124582422399e-05,
+      "loss": 3.8937,
+      "step": 1811456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.033285987671347e-05,
+      "loss": 3.8871,
+      "step": 1811968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.032449030800668e-05,
+      "loss": 3.8944,
+      "step": 1812480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.031610436049616e-05,
+      "loss": 3.8888,
+      "step": 1812992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.030771841298564e-05,
+      "loss": 3.8925,
+      "step": 1813504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.029933246547512e-05,
+      "loss": 3.8912,
+      "step": 1814016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0290946517964603e-05,
+      "loss": 3.9081,
+      "step": 1814528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0282576949257812e-05,
+      "loss": 3.8888,
+      "step": 1815040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0274191001747292e-05,
+      "loss": 3.9052,
+      "step": 1815552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0265805054236772e-05,
+      "loss": 3.8882,
+      "step": 1816064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0257419106726252e-05,
+      "loss": 3.8907,
+      "step": 1816576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.024903315921573e-05,
+      "loss": 3.8913,
+      "step": 1817088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0240663590508944e-05,
+      "loss": 3.8919,
+      "step": 1817600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0232277642998424e-05,
+      "loss": 3.8855,
+      "step": 1818112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0223891695487904e-05,
+      "loss": 3.8971,
+      "step": 1818624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.021550574797738e-05,
+      "loss": 3.8876,
+      "step": 1819136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.020711980046686e-05,
+      "loss": 3.8954,
+      "step": 1819648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.019873385295634e-05,
+      "loss": 3.895,
+      "step": 1820160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.019034790544582e-05,
+      "loss": 3.8923,
+      "step": 1820672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0181978336739033e-05,
+      "loss": 3.8923,
+      "step": 1821184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0173608768032246e-05,
+      "loss": 3.8983,
+      "step": 1821696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0165222820521726e-05,
+      "loss": 3.8863,
+      "step": 1822208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0156836873011202e-05,
+      "loss": 3.8907,
+      "step": 1822720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0148450925500682e-05,
+      "loss": 3.8723,
+      "step": 1823232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0140064977990165e-05,
+      "loss": 3.8989,
+      "step": 1823744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0131679030479645e-05,
+      "loss": 3.8983,
+      "step": 1824256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0123293082969125e-05,
+      "loss": 3.9002,
+      "step": 1824768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0114907135458605e-05,
+      "loss": 3.8903,
+      "step": 1825280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0106537566751814e-05,
+      "loss": 3.8994,
+      "step": 1825792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0098151619241294e-05,
+      "loss": 3.8973,
+      "step": 1826304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0089782050534507e-05,
+      "loss": 3.9014,
+      "step": 1826816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0081396103023987e-05,
+      "loss": 3.8836,
+      "step": 1827328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0073010155513467e-05,
+      "loss": 3.8916,
+      "step": 1827840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0064624208002947e-05,
+      "loss": 3.8908,
+      "step": 1828352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0056238260492427e-05,
+      "loss": 3.8962,
+      "step": 1828864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0047852312981907e-05,
+      "loss": 3.8925,
+      "step": 1829376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0039466365471387e-05,
+      "loss": 3.8905,
+      "step": 1829888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.0031080417960867e-05,
+      "loss": 3.8973,
+      "step": 1830400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.002271084925408e-05,
+      "loss": 3.8845,
+      "step": 1830912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 2.001432490174356e-05,
+      "loss": 3.8991,
+      "step": 1831424
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.0060834884643555,
+      "eval_runtime": 297.743,
+      "eval_samples_per_second": 1281.612,
+      "eval_steps_per_second": 40.051,
+      "step": 1831680
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 2.0005938954233036e-05,
+      "loss": 3.8885,
+      "step": 1831936
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9997553006722515e-05,
+      "loss": 3.8953,
+      "step": 1832448
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9989167059211995e-05,
+      "loss": 3.8929,
+      "step": 1832960
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.998079749050521e-05,
+      "loss": 3.8903,
+      "step": 1833472
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9972411542994688e-05,
+      "loss": 3.9027,
+      "step": 1833984
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9964025595484168e-05,
+      "loss": 3.8947,
+      "step": 1834496
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9955639647973648e-05,
+      "loss": 3.8872,
+      "step": 1835008
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9947253700463128e-05,
+      "loss": 3.8828,
+      "step": 1835520
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9938867752952608e-05,
+      "loss": 3.8913,
+      "step": 1836032
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9930481805442088e-05,
+      "loss": 3.9034,
+      "step": 1836544
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9922095857931568e-05,
+      "loss": 3.8947,
+      "step": 1837056
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.991372628922478e-05,
+      "loss": 3.8933,
+      "step": 1837568
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.990535672051799e-05,
+      "loss": 3.8958,
+      "step": 1838080
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.989697077300747e-05,
+      "loss": 3.8887,
+      "step": 1838592
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.988858482549695e-05,
+      "loss": 3.8857,
+      "step": 1839104
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.988019887798643e-05,
+      "loss": 3.8828,
+      "step": 1839616
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9871812930475912e-05,
+      "loss": 3.8914,
+      "step": 1840128
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.986344336176912e-05,
+      "loss": 3.8862,
+      "step": 1840640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.98550574142586e-05,
+      "loss": 3.8928,
+      "step": 1841152
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.984667146674808e-05,
+      "loss": 3.9032,
+      "step": 1841664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.983828551923756e-05,
+      "loss": 3.9015,
+      "step": 1842176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9829915950530774e-05,
+      "loss": 3.8918,
+      "step": 1842688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9821530003020254e-05,
+      "loss": 3.8924,
+      "step": 1843200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9813144055509734e-05,
+      "loss": 3.8904,
+      "step": 1843712
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9804758107999214e-05,
+      "loss": 3.8874,
+      "step": 1844224
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.979637216048869e-05,
+      "loss": 3.8905,
+      "step": 1844736
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.978798621297817e-05,
+      "loss": 3.876,
+      "step": 1845248
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.977960026546765e-05,
+      "loss": 3.8985,
+      "step": 1845760
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9771214317957134e-05,
+      "loss": 3.8897,
+      "step": 1846272
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.9762844749250343e-05,
+      "loss": 3.8775,
+      "step": 1846784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9754458801739823e-05,
+      "loss": 3.8855,
+      "step": 1847296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9746089233033035e-05,
+      "loss": 3.8994,
+      "step": 1847808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.973770328552251e-05,
+      "loss": 3.8996,
+      "step": 1848320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9729317338011995e-05,
+      "loss": 3.8944,
+      "step": 1848832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9720947769305207e-05,
+      "loss": 3.8878,
+      "step": 1849344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9712578200598417e-05,
+      "loss": 3.89,
+      "step": 1849856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9704192253087897e-05,
+      "loss": 3.8995,
+      "step": 1850368
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9695806305577376e-05,
+      "loss": 3.8792,
+      "step": 1850880
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9687420358066856e-05,
+      "loss": 3.8809,
+      "step": 1851392
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9679034410556336e-05,
+      "loss": 3.882,
+      "step": 1851904
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9670648463045816e-05,
+      "loss": 3.8853,
+      "step": 1852416
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9662262515535296e-05,
+      "loss": 3.8944,
+      "step": 1852928
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9653876568024776e-05,
+      "loss": 3.8843,
+      "step": 1853440
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9645490620514256e-05,
+      "loss": 3.8932,
+      "step": 1853952
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9637121051807465e-05,
+      "loss": 3.8863,
+      "step": 1854464
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.962873510429695e-05,
+      "loss": 3.8854,
+      "step": 1854976
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.962034915678643e-05,
+      "loss": 3.9003,
+      "step": 1855488
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.961196320927591e-05,
+      "loss": 3.8881,
+      "step": 1856000
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.960357726176539e-05,
+      "loss": 3.8652,
+      "step": 1856512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.959519131425487e-05,
+      "loss": 3.8964,
+      "step": 1857024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9586805366744345e-05,
+      "loss": 3.8927,
+      "step": 1857536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9578419419233825e-05,
+      "loss": 3.9009,
+      "step": 1858048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9570033471723305e-05,
+      "loss": 3.8887,
+      "step": 1858560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9561647524212788e-05,
+      "loss": 3.8824,
+      "step": 1859072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9553261576702268e-05,
+      "loss": 3.884,
+      "step": 1859584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9544875629191748e-05,
+      "loss": 3.8852,
+      "step": 1860096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9536506060484957e-05,
+      "loss": 3.8907,
+      "step": 1860608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9528120112974437e-05,
+      "loss": 3.8756,
+      "step": 1861120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9519734165463917e-05,
+      "loss": 3.8953,
+      "step": 1861632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.951136459675713e-05,
+      "loss": 3.8902,
+      "step": 1862144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.950297864924661e-05,
+      "loss": 3.8757,
+      "step": 1862656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.949459270173609e-05,
+      "loss": 3.8945,
+      "step": 1863168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.948620675422557e-05,
+      "loss": 3.8771,
+      "step": 1863680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.947782080671505e-05,
+      "loss": 3.8783,
+      "step": 1864192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9469434859204526e-05,
+      "loss": 3.8913,
+      "step": 1864704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9461048911694006e-05,
+      "loss": 3.8975,
+      "step": 1865216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9452679342987222e-05,
+      "loss": 3.8733,
+      "step": 1865728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.94442933954767e-05,
+      "loss": 3.8876,
+      "step": 1866240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.943590744796618e-05,
+      "loss": 3.8692,
+      "step": 1866752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.942752150045566e-05,
+      "loss": 3.8902,
+      "step": 1867264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.941913555294514e-05,
+      "loss": 3.8927,
+      "step": 1867776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.941076598423835e-05,
+      "loss": 3.8854,
+      "step": 1868288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.940238003672783e-05,
+      "loss": 3.8921,
+      "step": 1868800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.939399408921731e-05,
+      "loss": 3.8885,
+      "step": 1869312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.938560814170679e-05,
+      "loss": 3.9087,
+      "step": 1869824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9377238573e-05,
+      "loss": 3.8756,
+      "step": 1870336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.936885262548948e-05,
+      "loss": 3.895,
+      "step": 1870848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.936046667797896e-05,
+      "loss": 3.8896,
+      "step": 1871360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9352080730468443e-05,
+      "loss": 3.8836,
+      "step": 1871872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9343694782957923e-05,
+      "loss": 3.8918,
+      "step": 1872384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9335325214251132e-05,
+      "loss": 3.886,
+      "step": 1872896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9326939266740612e-05,
+      "loss": 3.8934,
+      "step": 1873408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9318553319230092e-05,
+      "loss": 3.8999,
+      "step": 1873920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9310167371719572e-05,
+      "loss": 3.877,
+      "step": 1874432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9301781424209052e-05,
+      "loss": 3.8834,
+      "step": 1874944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9293395476698532e-05,
+      "loss": 3.8858,
+      "step": 1875456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9285009529188012e-05,
+      "loss": 3.8958,
+      "step": 1875968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9276623581677492e-05,
+      "loss": 3.8828,
+      "step": 1876480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.9268254012970704e-05,
+      "loss": 3.8915,
+      "step": 1876992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.925986806546018e-05,
+      "loss": 3.8907,
+      "step": 1877504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.925148211794966e-05,
+      "loss": 3.8846,
+      "step": 1878016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9243096170439144e-05,
+      "loss": 3.8856,
+      "step": 1878528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9234726601732353e-05,
+      "loss": 3.8691,
+      "step": 1879040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9226340654221833e-05,
+      "loss": 3.8839,
+      "step": 1879552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9217954706711313e-05,
+      "loss": 3.89,
+      "step": 1880064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9209568759200793e-05,
+      "loss": 3.8947,
+      "step": 1880576
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9201182811690273e-05,
+      "loss": 3.8829,
+      "step": 1881088
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9192813242983486e-05,
+      "loss": 3.879,
+      "step": 1881600
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9184443674276698e-05,
+      "loss": 3.8835,
+      "step": 1882112
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9176057726766178e-05,
+      "loss": 3.8915,
+      "step": 1882624
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9167671779255655e-05,
+      "loss": 3.8943,
+      "step": 1883136
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9159285831745135e-05,
+      "loss": 3.8854,
+      "step": 1883648
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9150899884234614e-05,
+      "loss": 3.8912,
+      "step": 1884160
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9142530315527827e-05,
+      "loss": 3.8831,
+      "step": 1884672
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9134144368017307e-05,
+      "loss": 3.885,
+      "step": 1885184
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9125758420506787e-05,
+      "loss": 3.8777,
+      "step": 1885696
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9117372472996267e-05,
+      "loss": 3.8785,
+      "step": 1886208
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9108986525485747e-05,
+      "loss": 3.8971,
+      "step": 1886720
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9100600577975227e-05,
+      "loss": 3.8761,
+      "step": 1887232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.909223100926844e-05,
+      "loss": 3.8868,
+      "step": 1887744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.908384506175792e-05,
+      "loss": 3.8779,
+      "step": 1888256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.90754591142474e-05,
+      "loss": 3.8889,
+      "step": 1888768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.906707316673688e-05,
+      "loss": 3.8822,
+      "step": 1889280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9058703598030088e-05,
+      "loss": 3.8883,
+      "step": 1889792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.9050317650519568e-05,
+      "loss": 3.8859,
+      "step": 1890304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.904193170300905e-05,
+      "loss": 3.8984,
+      "step": 1890816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.903354575549853e-05,
+      "loss": 3.8856,
+      "step": 1891328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.902517618679174e-05,
+      "loss": 3.8943,
+      "step": 1891840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.901679023928122e-05,
+      "loss": 3.8829,
+      "step": 1892352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.90084042917707e-05,
+      "loss": 3.8838,
+      "step": 1892864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.900001834426018e-05,
+      "loss": 3.8855,
+      "step": 1893376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8991648775553393e-05,
+      "loss": 3.8872,
+      "step": 1893888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8983262828042873e-05,
+      "loss": 3.8777,
+      "step": 1894400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8974876880532353e-05,
+      "loss": 3.8908,
+      "step": 1894912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.896649093302183e-05,
+      "loss": 3.8807,
+      "step": 1895424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8958121364315042e-05,
+      "loss": 3.8903,
+      "step": 1895936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8949735416804522e-05,
+      "loss": 3.8864,
+      "step": 1896448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8941349469294005e-05,
+      "loss": 3.8895,
+      "step": 1896960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8932963521783482e-05,
+      "loss": 3.8837,
+      "step": 1897472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.892457757427296e-05,
+      "loss": 3.8957,
+      "step": 1897984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.891619162676244e-05,
+      "loss": 3.8794,
+      "step": 1898496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.890780567925192e-05,
+      "loss": 3.8868,
+      "step": 1899008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.88994197317414e-05,
+      "loss": 3.8628,
+      "step": 1899520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.889103378423088e-05,
+      "loss": 3.8939,
+      "step": 1900032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.888264783672036e-05,
+      "loss": 3.8927,
+      "step": 1900544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.887426188920984e-05,
+      "loss": 3.8961,
+      "step": 1901056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.886587594169932e-05,
+      "loss": 3.8844,
+      "step": 1901568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8857506372992534e-05,
+      "loss": 3.8947,
+      "step": 1902080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.884912042548201e-05,
+      "loss": 3.8897,
+      "step": 1902592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.884073447797149e-05,
+      "loss": 3.8999,
+      "step": 1903104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8832348530460974e-05,
+      "loss": 3.8783,
+      "step": 1903616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8823978961754186e-05,
+      "loss": 3.8839,
+      "step": 1904128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8815593014243663e-05,
+      "loss": 3.8841,
+      "step": 1904640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8807207066733143e-05,
+      "loss": 3.8975,
+      "step": 1905152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8798821119222623e-05,
+      "loss": 3.8824,
+      "step": 1905664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8790435171712103e-05,
+      "loss": 3.883,
+      "step": 1906176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8782049224201583e-05,
+      "loss": 3.8912,
+      "step": 1906688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8773679655494795e-05,
+      "loss": 3.8844,
+      "step": 1907200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.8765293707984275e-05,
+      "loss": 3.891,
+      "step": 1907712
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 4.003902435302734,
+      "eval_runtime": 297.3597,
+      "eval_samples_per_second": 1283.264,
+      "eval_steps_per_second": 40.103,
+      "step": 1908000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8756907760473755e-05,
+      "loss": 3.885,
+      "step": 1908224
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8748521812963235e-05,
+      "loss": 3.8931,
+      "step": 1908736
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8740135865452715e-05,
+      "loss": 3.8833,
+      "step": 1909248
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.873174991794219e-05,
+      "loss": 3.8878,
+      "step": 1909760
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8723363970431675e-05,
+      "loss": 3.8996,
+      "step": 1910272
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8714978022921155e-05,
+      "loss": 3.8878,
+      "step": 1910784
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8706592075410635e-05,
+      "loss": 3.8819,
+      "step": 1911296
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8698206127900115e-05,
+      "loss": 3.8765,
+      "step": 1911808
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8689820180389595e-05,
+      "loss": 3.8861,
+      "step": 1912320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8681434232879075e-05,
+      "loss": 3.8953,
+      "step": 1912832
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.867304828536855e-05,
+      "loss": 3.8853,
+      "step": 1913344
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.866466233785803e-05,
+      "loss": 3.8881,
+      "step": 1913856
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8656292769151247e-05,
+      "loss": 3.892,
+      "step": 1914368
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8647906821640727e-05,
+      "loss": 3.8826,
+      "step": 1914880
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8639520874130203e-05,
+      "loss": 3.8801,
+      "step": 1915392
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8631134926619683e-05,
+      "loss": 3.876,
+      "step": 1915904
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8622748979109163e-05,
+      "loss": 3.8884,
+      "step": 1916416
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8614363031598643e-05,
+      "loss": 3.8825,
+      "step": 1916928
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8605977084088123e-05,
+      "loss": 3.8799,
+      "step": 1917440
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8597591136577607e-05,
+      "loss": 3.8945,
+      "step": 1917952
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8589205189067083e-05,
+      "loss": 3.8959,
+      "step": 1918464
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8580819241556563e-05,
+      "loss": 3.8869,
+      "step": 1918976
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8572433294046043e-05,
+      "loss": 3.8876,
+      "step": 1919488
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8564047346535523e-05,
+      "loss": 3.885,
+      "step": 1920000
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8555661399025003e-05,
+      "loss": 3.8847,
+      "step": 1920512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8547291830318216e-05,
+      "loss": 3.8827,
+      "step": 1921024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8538905882807695e-05,
+      "loss": 3.8692,
+      "step": 1921536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8530519935297175e-05,
+      "loss": 3.8904,
+      "step": 1922048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8522133987786655e-05,
+      "loss": 3.8877,
+      "step": 1922560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.8513764419079864e-05,
+      "loss": 3.8714,
+      "step": 1923072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8505378471569344e-05,
+      "loss": 3.8789,
+      "step": 1923584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8496992524058824e-05,
+      "loss": 3.8886,
+      "step": 1924096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8488606576548308e-05,
+      "loss": 3.892,
+      "step": 1924608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8480237007841517e-05,
+      "loss": 3.8904,
+      "step": 1925120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8471851060330997e-05,
+      "loss": 3.8862,
+      "step": 1925632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8463481491624206e-05,
+      "loss": 3.8804,
+      "step": 1926144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8455095544113686e-05,
+      "loss": 3.8956,
+      "step": 1926656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.844670959660317e-05,
+      "loss": 3.8723,
+      "step": 1927168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.843832364909265e-05,
+      "loss": 3.8749,
+      "step": 1927680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.842993770158213e-05,
+      "loss": 3.8761,
+      "step": 1928192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.842155175407161e-05,
+      "loss": 3.8808,
+      "step": 1928704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8413165806561086e-05,
+      "loss": 3.8854,
+      "step": 1929216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8404779859050566e-05,
+      "loss": 3.879,
+      "step": 1929728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8396410290343778e-05,
+      "loss": 3.886,
+      "step": 1930240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.838804072163699e-05,
+      "loss": 3.8832,
+      "step": 1930752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.837965477412647e-05,
+      "loss": 3.8794,
+      "step": 1931264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.837126882661595e-05,
+      "loss": 3.8916,
+      "step": 1931776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.836288287910543e-05,
+      "loss": 3.8871,
+      "step": 1932288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.835451331039864e-05,
+      "loss": 3.8603,
+      "step": 1932800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8346127362888123e-05,
+      "loss": 3.8894,
+      "step": 1933312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8337741415377603e-05,
+      "loss": 3.8901,
+      "step": 1933824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8329355467867083e-05,
+      "loss": 3.8945,
+      "step": 1934336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.832096952035656e-05,
+      "loss": 3.8794,
+      "step": 1934848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.831258357284604e-05,
+      "loss": 3.8767,
+      "step": 1935360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.830419762533552e-05,
+      "loss": 3.874,
+      "step": 1935872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8295811677825e-05,
+      "loss": 3.8805,
+      "step": 1936384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8287442109118212e-05,
+      "loss": 3.8827,
+      "step": 1936896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.827905616160769e-05,
+      "loss": 3.8712,
+      "step": 1937408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.827067021409717e-05,
+      "loss": 3.8868,
+      "step": 1937920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.826228426658665e-05,
+      "loss": 3.8862,
+      "step": 1938432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.825391469787986e-05,
+      "loss": 3.8749,
+      "step": 1938944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.824552875036934e-05,
+      "loss": 3.8837,
+      "step": 1939456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8237159181662556e-05,
+      "loss": 3.8728,
+      "step": 1939968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8228789612955766e-05,
+      "loss": 3.8748,
+      "step": 1940480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8220403665445246e-05,
+      "loss": 3.8796,
+      "step": 1940992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8212017717934725e-05,
+      "loss": 3.8922,
+      "step": 1941504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8203631770424205e-05,
+      "loss": 3.8716,
+      "step": 1942016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8195245822913685e-05,
+      "loss": 3.8834,
+      "step": 1942528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8186859875403165e-05,
+      "loss": 3.8596,
+      "step": 1943040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8178473927892645e-05,
+      "loss": 3.884,
+      "step": 1943552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8170087980382125e-05,
+      "loss": 3.8857,
+      "step": 1944064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8161718411675334e-05,
+      "loss": 3.8848,
+      "step": 1944576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8153332464164814e-05,
+      "loss": 3.8799,
+      "step": 1945088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8144946516654294e-05,
+      "loss": 3.8874,
+      "step": 1945600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8136560569143778e-05,
+      "loss": 3.8987,
+      "step": 1946112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8128191000436987e-05,
+      "loss": 3.8731,
+      "step": 1946624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8119805052926467e-05,
+      "loss": 3.8905,
+      "step": 1947136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8111419105415947e-05,
+      "loss": 3.8779,
+      "step": 1947648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8103033157905427e-05,
+      "loss": 3.8822,
+      "step": 1948160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8094647210394907e-05,
+      "loss": 3.8841,
+      "step": 1948672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.808627764168812e-05,
+      "loss": 3.8774,
+      "step": 1949184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.80778916941776e-05,
+      "loss": 3.8901,
+      "step": 1949696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.806950574666708e-05,
+      "loss": 3.8977,
+      "step": 1950208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.806111979915656e-05,
+      "loss": 3.8683,
+      "step": 1950720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.805273385164604e-05,
+      "loss": 3.8788,
+      "step": 1951232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8044347904135515e-05,
+      "loss": 3.8774,
+      "step": 1951744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.8035961956625e-05,
+      "loss": 3.8892,
+      "step": 1952256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.802757600911448e-05,
+      "loss": 3.8744,
+      "step": 1952768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.801922281921142e-05,
+      "loss": 3.8893,
+      "step": 1953280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.80108368717009e-05,
+      "loss": 3.8825,
+      "step": 1953792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.800245092419038e-05,
+      "loss": 3.8787,
+      "step": 1954304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.799406497667986e-05,
+      "loss": 3.8779,
+      "step": 1954816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.798567902916934e-05,
+      "loss": 3.8669,
+      "step": 1955328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.797729308165882e-05,
+      "loss": 3.875,
+      "step": 1955840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7968923512952033e-05,
+      "loss": 3.8856,
+      "step": 1956352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.796053756544151e-05,
+      "loss": 3.8923,
+      "step": 1956864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.795215161793099e-05,
+      "loss": 3.8734,
+      "step": 1957376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.794376567042047e-05,
+      "loss": 3.8761,
+      "step": 1957888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.793537972290995e-05,
+      "loss": 3.8773,
+      "step": 1958400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7926993775399432e-05,
+      "loss": 3.8835,
+      "step": 1958912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.791862420669264e-05,
+      "loss": 3.8891,
+      "step": 1959424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.791023825918212e-05,
+      "loss": 3.8798,
+      "step": 1959936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.79018523116716e-05,
+      "loss": 3.8842,
+      "step": 1960448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.789346636416108e-05,
+      "loss": 3.8784,
+      "step": 1960960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.788508041665056e-05,
+      "loss": 3.8813,
+      "step": 1961472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.787669446914004e-05,
+      "loss": 3.8688,
+      "step": 1961984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.786830852162952e-05,
+      "loss": 3.8741,
+      "step": 1962496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7859938952922734e-05,
+      "loss": 3.8866,
+      "step": 1963008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7851569384215943e-05,
+      "loss": 3.8749,
+      "step": 1963520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7843183436705423e-05,
+      "loss": 3.8792,
+      "step": 1964032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7834797489194903e-05,
+      "loss": 3.8735,
+      "step": 1964544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7826411541684386e-05,
+      "loss": 3.8812,
+      "step": 1965056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7818025594173866e-05,
+      "loss": 3.8766,
+      "step": 1965568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7809639646663343e-05,
+      "loss": 3.8805,
+      "step": 1966080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7801253699152823e-05,
+      "loss": 3.8826,
+      "step": 1966592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7792867751642302e-05,
+      "loss": 3.8951,
+      "step": 1967104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7784498182935515e-05,
+      "loss": 3.8817,
+      "step": 1967616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7776128614228727e-05,
+      "loss": 3.8847,
+      "step": 1968128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7767742666718207e-05,
+      "loss": 3.8831,
+      "step": 1968640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7759356719207687e-05,
+      "loss": 3.8746,
+      "step": 1969152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7750970771697164e-05,
+      "loss": 3.8858,
+      "step": 1969664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7742601202990376e-05,
+      "loss": 3.8759,
+      "step": 1970176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7734215255479856e-05,
+      "loss": 3.8747,
+      "step": 1970688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.772582930796934e-05,
+      "loss": 3.8834,
+      "step": 1971200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7717443360458816e-05,
+      "loss": 3.8813,
+      "step": 1971712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7709057412948296e-05,
+      "loss": 3.8828,
+      "step": 1972224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7700671465437776e-05,
+      "loss": 3.878,
+      "step": 1972736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.769230189673099e-05,
+      "loss": 3.8886,
+      "step": 1973248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.768391594922047e-05,
+      "loss": 3.8745,
+      "step": 1973760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.767553000170995e-05,
+      "loss": 3.8903,
+      "step": 1974272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.766714405419943e-05,
+      "loss": 3.8769,
+      "step": 1974784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.765875810668891e-05,
+      "loss": 3.8795,
+      "step": 1975296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7650388537982118e-05,
+      "loss": 3.8564,
+      "step": 1975808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7642002590471598e-05,
+      "loss": 3.89,
+      "step": 1976320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7633616642961078e-05,
+      "loss": 3.8834,
+      "step": 1976832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7625230695450557e-05,
+      "loss": 3.8949,
+      "step": 1977344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.761684474794004e-05,
+      "loss": 3.8785,
+      "step": 1977856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.760845880042952e-05,
+      "loss": 3.8861,
+      "step": 1978368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7600072852918997e-05,
+      "loss": 3.8817,
+      "step": 1978880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.759170328421221e-05,
+      "loss": 3.8985,
+      "step": 1979392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.758331733670169e-05,
+      "loss": 3.8729,
+      "step": 1979904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7574947767994902e-05,
+      "loss": 3.8786,
+      "step": 1980416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.756657819928811e-05,
+      "loss": 3.8793,
+      "step": 1980928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.755819225177759e-05,
+      "loss": 3.889,
+      "step": 1981440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.754980630426707e-05,
+      "loss": 3.8781,
+      "step": 1981952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.754142035675655e-05,
+      "loss": 3.8802,
+      "step": 1982464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.753303440924603e-05,
+      "loss": 3.8845,
+      "step": 1982976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.752464846173551e-05,
+      "loss": 3.8826,
+      "step": 1983488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.7516262514224994e-05,
+      "loss": 3.8847,
+      "step": 1984000
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.001920223236084,
+      "eval_runtime": 304.8997,
+      "eval_samples_per_second": 1251.529,
+      "eval_steps_per_second": 39.111,
+      "step": 1984320
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.750787656671447e-05,
+      "loss": 3.874,
+      "step": 1984512
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.749949061920395e-05,
+      "loss": 3.8873,
+      "step": 1985024
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.749110467169343e-05,
+      "loss": 3.8818,
+      "step": 1985536
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.748271872418291e-05,
+      "loss": 3.8807,
+      "step": 1986048
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.747433277667239e-05,
+      "loss": 3.8946,
+      "step": 1986560
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.746594682916187e-05,
+      "loss": 3.8807,
+      "step": 1987072
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7457560881651354e-05,
+      "loss": 3.8813,
+      "step": 1987584
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.744917493414083e-05,
+      "loss": 3.8722,
+      "step": 1988096
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.744078898663031e-05,
+      "loss": 3.8788,
+      "step": 1988608
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.743240303911979e-05,
+      "loss": 3.8883,
+      "step": 1989120
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.742401709160927e-05,
+      "loss": 3.8859,
+      "step": 1989632
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.741563114409875e-05,
+      "loss": 3.8826,
+      "step": 1990144
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7407261575391963e-05,
+      "loss": 3.888,
+      "step": 1990656
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7398875627881443e-05,
+      "loss": 3.8773,
+      "step": 1991168
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7390489680370923e-05,
+      "loss": 3.8755,
+      "step": 1991680
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7382103732860403e-05,
+      "loss": 3.8691,
+      "step": 1992192
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7373717785349883e-05,
+      "loss": 3.8857,
+      "step": 1992704
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.736533183783936e-05,
+      "loss": 3.8777,
+      "step": 1993216
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.735694589032884e-05,
+      "loss": 3.8767,
+      "step": 1993728
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.734855994281832e-05,
+      "loss": 3.8872,
+      "step": 1994240
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7340173995307803e-05,
+      "loss": 3.8922,
+      "step": 1994752
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7331788047797283e-05,
+      "loss": 3.883,
+      "step": 1995264
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7323418479090492e-05,
+      "loss": 3.8799,
+      "step": 1995776
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7315032531579972e-05,
+      "loss": 3.8805,
+      "step": 1996288
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.730664658406945e-05,
+      "loss": 3.8808,
+      "step": 1996800
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.729826063655893e-05,
+      "loss": 3.8805,
+      "step": 1997312
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.728987468904841e-05,
+      "loss": 3.8626,
+      "step": 1997824
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.728148874153789e-05,
+      "loss": 3.8845,
+      "step": 1998336
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.727310279402737e-05,
+      "loss": 3.8837,
+      "step": 1998848
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.7264733225320584e-05,
+      "loss": 3.8664,
+      "step": 1999360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7256347277810064e-05,
+      "loss": 3.8763,
+      "step": 1999872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.724796133029954e-05,
+      "loss": 3.8853,
+      "step": 2000384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7239591761592756e-05,
+      "loss": 3.8818,
+      "step": 2000896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7231205814082236e-05,
+      "loss": 3.8911,
+      "step": 2001408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7222819866571713e-05,
+      "loss": 3.8815,
+      "step": 2001920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7214450297864925e-05,
+      "loss": 3.8758,
+      "step": 2002432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7206064350354405e-05,
+      "loss": 3.8954,
+      "step": 2002944
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7197678402843885e-05,
+      "loss": 3.8634,
+      "step": 2003456
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7189292455333365e-05,
+      "loss": 3.8713,
+      "step": 2003968
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7180906507822845e-05,
+      "loss": 3.8751,
+      "step": 2004480
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7172520560312325e-05,
+      "loss": 3.8735,
+      "step": 2004992
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7164134612801805e-05,
+      "loss": 3.8783,
+      "step": 2005504
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7155748665291285e-05,
+      "loss": 3.8779,
+      "step": 2006016
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7147362717780765e-05,
+      "loss": 3.8802,
+      "step": 2006528
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7138976770270245e-05,
+      "loss": 3.8768,
+      "step": 2007040
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7130590822759725e-05,
+      "loss": 3.873,
+      "step": 2007552
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7122204875249205e-05,
+      "loss": 3.882,
+      "step": 2008064
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7113835306542417e-05,
+      "loss": 3.8817,
+      "step": 2008576
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7105449359031894e-05,
+      "loss": 3.8585,
+      "step": 2009088
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7097063411521374e-05,
+      "loss": 3.8827,
+      "step": 2009600
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7088677464010854e-05,
+      "loss": 3.8848,
+      "step": 2010112
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7080307895304066e-05,
+      "loss": 3.8917,
+      "step": 2010624
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.707193832659728e-05,
+      "loss": 3.8742,
+      "step": 2011136
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.706355237908676e-05,
+      "loss": 3.8751,
+      "step": 2011648
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7055182810379968e-05,
+      "loss": 3.868,
+      "step": 2012160
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7046796862869448e-05,
+      "loss": 3.8722,
+      "step": 2012672
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7038410915358928e-05,
+      "loss": 3.8832,
+      "step": 2013184
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.703002496784841e-05,
+      "loss": 3.8671,
+      "step": 2013696
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.702163902033789e-05,
+      "loss": 3.881,
+      "step": 2014208
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7013253072827368e-05,
+      "loss": 3.8801,
+      "step": 2014720
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.7004867125316848e-05,
+      "loss": 3.8738,
+      "step": 2015232
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6996481177806328e-05,
+      "loss": 3.8776,
+      "step": 2015744
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.698811160909954e-05,
+      "loss": 3.8675,
+      "step": 2016256
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.697972566158902e-05,
+      "loss": 3.871,
+      "step": 2016768
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.69713397140785e-05,
+      "loss": 3.8749,
+      "step": 2017280
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6962970145371712e-05,
+      "loss": 3.8845,
+      "step": 2017792
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6954584197861192e-05,
+      "loss": 3.8682,
+      "step": 2018304
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.694619825035067e-05,
+      "loss": 3.8784,
+      "step": 2018816
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.693781230284015e-05,
+      "loss": 3.8587,
+      "step": 2019328
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6929426355329632e-05,
+      "loss": 3.8776,
+      "step": 2019840
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6921040407819112e-05,
+      "loss": 3.8788,
+      "step": 2020352
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6912654460308592e-05,
+      "loss": 3.8841,
+      "step": 2020864
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6904268512798072e-05,
+      "loss": 3.8751,
+      "step": 2021376
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.689588256528755e-05,
+      "loss": 3.8774,
+      "step": 2021888
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.688749661777703e-05,
+      "loss": 3.8956,
+      "step": 2022400
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.687911067026651e-05,
+      "loss": 3.8695,
+      "step": 2022912
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.687072472275599e-05,
+      "loss": 3.8884,
+      "step": 2023424
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.68623551540492e-05,
+      "loss": 3.8722,
+      "step": 2023936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6853985585342414e-05,
+      "loss": 3.8756,
+      "step": 2024448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6845599637831893e-05,
+      "loss": 3.882,
+      "step": 2024960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6837213690321373e-05,
+      "loss": 3.874,
+      "step": 2025472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6828844121614586e-05,
+      "loss": 3.8862,
+      "step": 2025984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6820458174104066e-05,
+      "loss": 3.8904,
+      "step": 2026496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6812072226593546e-05,
+      "loss": 3.8722,
+      "step": 2027008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6803686279083022e-05,
+      "loss": 3.8695,
+      "step": 2027520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6795300331572502e-05,
+      "loss": 3.8727,
+      "step": 2028032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6786914384061982e-05,
+      "loss": 3.8826,
+      "step": 2028544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6778528436551462e-05,
+      "loss": 3.8713,
+      "step": 2029056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6770158867844675e-05,
+      "loss": 3.8834,
+      "step": 2029568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.6761772920334155e-05,
+      "loss": 3.8764,
+      "step": 2030080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6753386972823635e-05,
+      "loss": 3.8763,
+      "step": 2030592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6745001025313115e-05,
+      "loss": 3.8715,
+      "step": 2031104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6736615077802595e-05,
+      "loss": 3.8618,
+      "step": 2031616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6728229130292075e-05,
+      "loss": 3.8692,
+      "step": 2032128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.671984318278155e-05,
+      "loss": 3.8809,
+      "step": 2032640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6711473614074767e-05,
+      "loss": 3.8842,
+      "step": 2033152
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6703087666564247e-05,
+      "loss": 3.8692,
+      "step": 2033664
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6694701719053727e-05,
+      "loss": 3.8713,
+      "step": 2034176
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6686315771543203e-05,
+      "loss": 3.8738,
+      "step": 2034688
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6677929824032683e-05,
+      "loss": 3.8791,
+      "step": 2035200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6669543876522163e-05,
+      "loss": 3.8868,
+      "step": 2035712
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6661157929011643e-05,
+      "loss": 3.8721,
+      "step": 2036224
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6652771981501127e-05,
+      "loss": 3.88,
+      "step": 2036736
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6644402412794336e-05,
+      "loss": 3.8739,
+      "step": 2037248
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6636016465283816e-05,
+      "loss": 3.8744,
+      "step": 2037760
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6627630517773296e-05,
+      "loss": 3.8677,
+      "step": 2038272
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6619260949066505e-05,
+      "loss": 3.8723,
+      "step": 2038784
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6610875001555988e-05,
+      "loss": 3.8779,
+      "step": 2039296
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6602489054045468e-05,
+      "loss": 3.8709,
+      "step": 2039808
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6594103106534948e-05,
+      "loss": 3.8728,
+      "step": 2040320
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6585717159024428e-05,
+      "loss": 3.8684,
+      "step": 2040832
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6577331211513908e-05,
+      "loss": 3.875,
+      "step": 2041344
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6568945264003384e-05,
+      "loss": 3.876,
+      "step": 2041856
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6560559316492864e-05,
+      "loss": 3.8737,
+      "step": 2042368
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.655220612658981e-05,
+      "loss": 3.878,
+      "step": 2042880
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.654382017907929e-05,
+      "loss": 3.8915,
+      "step": 2043392
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.653543423156877e-05,
+      "loss": 3.879,
+      "step": 2043904
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.652704828405825e-05,
+      "loss": 3.8791,
+      "step": 2044416
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.651866233654773e-05,
+      "loss": 3.8784,
+      "step": 2044928
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6510292767840942e-05,
+      "loss": 3.8709,
+      "step": 2045440
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.650192319913415e-05,
+      "loss": 3.8841,
+      "step": 2045952
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.649353725162363e-05,
+      "loss": 3.8683,
+      "step": 2046464
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.648515130411311e-05,
+      "loss": 3.8701,
+      "step": 2046976
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.647676535660259e-05,
+      "loss": 3.8791,
+      "step": 2047488
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.646837940909207e-05,
+      "loss": 3.8784,
+      "step": 2048000
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.645999346158155e-05,
+      "loss": 3.8756,
+      "step": 2048512
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6451607514071034e-05,
+      "loss": 3.8764,
+      "step": 2049024
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.644322156656051e-05,
+      "loss": 3.88,
+      "step": 2049536
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.643483561904999e-05,
+      "loss": 3.8736,
+      "step": 2050048
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.642644967153947e-05,
+      "loss": 3.884,
+      "step": 2050560
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.641806372402895e-05,
+      "loss": 3.8782,
+      "step": 2051072
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.640967777651843e-05,
+      "loss": 3.8713,
+      "step": 2051584
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6401308207811643e-05,
+      "loss": 3.852,
+      "step": 2052096
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6392922260301123e-05,
+      "loss": 3.8871,
+      "step": 2052608
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6384536312790603e-05,
+      "loss": 3.8773,
+      "step": 2053120
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6376150365280083e-05,
+      "loss": 3.8902,
+      "step": 2053632
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6367780796573292e-05,
+      "loss": 3.8761,
+      "step": 2054144
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6359394849062772e-05,
+      "loss": 3.8795,
+      "step": 2054656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6351008901552252e-05,
+      "loss": 3.8789,
+      "step": 2055168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6342622954041735e-05,
+      "loss": 3.894,
+      "step": 2055680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6334253385334944e-05,
+      "loss": 3.8646,
+      "step": 2056192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6325867437824424e-05,
+      "loss": 3.8748,
+      "step": 2056704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6317481490313904e-05,
+      "loss": 3.8772,
+      "step": 2057216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6309095542803384e-05,
+      "loss": 3.8843,
+      "step": 2057728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6300725974096597e-05,
+      "loss": 3.8739,
+      "step": 2058240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6292340026586076e-05,
+      "loss": 3.8749,
+      "step": 2058752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6283954079075556e-05,
+      "loss": 3.8752,
+      "step": 2059264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6275568131565036e-05,
+      "loss": 3.8806,
+      "step": 2059776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.6267182184054513e-05,
+      "loss": 3.8766,
+      "step": 2060288
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 4.0003437995910645,
+      "eval_runtime": 295.9646,
+      "eval_samples_per_second": 1289.313,
+      "eval_steps_per_second": 40.292,
+      "step": 2060640
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6258796236543993e-05,
+      "loss": 3.871,
+      "step": 2060800
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6250410289033473e-05,
+      "loss": 3.8794,
+      "step": 2061312
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6242024341522953e-05,
+      "loss": 3.8765,
+      "step": 2061824
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6233654772816165e-05,
+      "loss": 3.8782,
+      "step": 2062336
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6225268825305645e-05,
+      "loss": 3.8837,
+      "step": 2062848
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6216882877795125e-05,
+      "loss": 3.8754,
+      "step": 2063360
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6208496930284605e-05,
+      "loss": 3.8795,
+      "step": 2063872
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6200110982774085e-05,
+      "loss": 3.8684,
+      "step": 2064384
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6191741414067298e-05,
+      "loss": 3.872,
+      "step": 2064896
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.618337184536051e-05,
+      "loss": 3.882,
+      "step": 2065408
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6174985897849987e-05,
+      "loss": 3.8839,
+      "step": 2065920
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6166599950339467e-05,
+      "loss": 3.8762,
+      "step": 2066432
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.615823038163268e-05,
+      "loss": 3.8895,
+      "step": 2066944
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.614984443412216e-05,
+      "loss": 3.8652,
+      "step": 2067456
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.614145848661164e-05,
+      "loss": 3.8721,
+      "step": 2067968
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.613307253910112e-05,
+      "loss": 3.8663,
+      "step": 2068480
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.61246865915906e-05,
+      "loss": 3.8799,
+      "step": 2068992
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.611630064408008e-05,
+      "loss": 3.8721,
+      "step": 2069504
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6107931075373288e-05,
+      "loss": 3.8728,
+      "step": 2070016
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6099545127862768e-05,
+      "loss": 3.8796,
+      "step": 2070528
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.609115918035225e-05,
+      "loss": 3.886,
+      "step": 2071040
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.608277323284173e-05,
+      "loss": 3.8795,
+      "step": 2071552
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.607438728533121e-05,
+      "loss": 3.8732,
+      "step": 2072064
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.606600133782069e-05,
+      "loss": 3.8782,
+      "step": 2072576
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6057615390310168e-05,
+      "loss": 3.8768,
+      "step": 2073088
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6049229442799648e-05,
+      "loss": 3.8751,
+      "step": 2073600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.6040843495289128e-05,
+      "loss": 3.8606,
+      "step": 2074112
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.603245754777861e-05,
+      "loss": 3.8752,
+      "step": 2074624
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.602407160026809e-05,
+      "loss": 3.8783,
+      "step": 2075136
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.601568565275757e-05,
+      "loss": 3.8607,
+      "step": 2075648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.6007332462854513e-05,
+      "loss": 3.8724,
+      "step": 2076160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.599894651534399e-05,
+      "loss": 3.8815,
+      "step": 2076672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5990560567833472e-05,
+      "loss": 3.8764,
+      "step": 2077184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5982174620322952e-05,
+      "loss": 3.8862,
+      "step": 2077696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5973788672812432e-05,
+      "loss": 3.8741,
+      "step": 2078208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5965435482909374e-05,
+      "loss": 3.8729,
+      "step": 2078720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5957049535398854e-05,
+      "loss": 3.8933,
+      "step": 2079232
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5948663587888334e-05,
+      "loss": 3.8563,
+      "step": 2079744
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5940277640377814e-05,
+      "loss": 3.8681,
+      "step": 2080256
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5931891692867294e-05,
+      "loss": 3.8736,
+      "step": 2080768
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5923505745356774e-05,
+      "loss": 3.8671,
+      "step": 2081280
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5915119797846254e-05,
+      "loss": 3.8725,
+      "step": 2081792
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5906733850335734e-05,
+      "loss": 3.8785,
+      "step": 2082304
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5898347902825214e-05,
+      "loss": 3.8722,
+      "step": 2082816
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5889961955314694e-05,
+      "loss": 3.8739,
+      "step": 2083328
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5881576007804174e-05,
+      "loss": 3.8685,
+      "step": 2083840
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5873206439097386e-05,
+      "loss": 3.8774,
+      "step": 2084352
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5864820491586866e-05,
+      "loss": 3.8796,
+      "step": 2084864
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5856434544076346e-05,
+      "loss": 3.8554,
+      "step": 2085376
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5848048596565822e-05,
+      "loss": 3.8769,
+      "step": 2085888
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5839662649055302e-05,
+      "loss": 3.8808,
+      "step": 2086400
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5831276701544782e-05,
+      "loss": 3.8836,
+      "step": 2086912
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5822890754034266e-05,
+      "loss": 3.8697,
+      "step": 2087424
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5814504806523746e-05,
+      "loss": 3.869,
+      "step": 2087936
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5806135237816955e-05,
+      "loss": 3.8681,
+      "step": 2088448
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5797749290306435e-05,
+      "loss": 3.866,
+      "step": 2088960
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5789363342795915e-05,
+      "loss": 3.8789,
+      "step": 2089472
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5780977395285395e-05,
+      "loss": 3.8598,
+      "step": 2089984
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5772607826578607e-05,
+      "loss": 3.8781,
+      "step": 2090496
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5764221879068087e-05,
+      "loss": 3.8738,
+      "step": 2091008
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5755835931557567e-05,
+      "loss": 3.8702,
+      "step": 2091520
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5747449984047047e-05,
+      "loss": 3.8682,
+      "step": 2092032
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5739064036536527e-05,
+      "loss": 3.8655,
+      "step": 2092544
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5730678089026004e-05,
+      "loss": 3.8682,
+      "step": 2093056
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5722292141515483e-05,
+      "loss": 3.861,
+      "step": 2093568
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.57139225728087e-05,
+      "loss": 3.8873,
+      "step": 2094080
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5705536625298176e-05,
+      "loss": 3.8678,
+      "step": 2094592
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5697150677787656e-05,
+      "loss": 3.865,
+      "step": 2095104
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5688764730277136e-05,
+      "loss": 3.8558,
+      "step": 2095616
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5680378782766616e-05,
+      "loss": 3.8725,
+      "step": 2096128
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5671992835256096e-05,
+      "loss": 3.8742,
+      "step": 2096640
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5663606887745576e-05,
+      "loss": 3.8784,
+      "step": 2097152
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.565522094023506e-05,
+      "loss": 3.8726,
+      "step": 2097664
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5646851371528268e-05,
+      "loss": 3.8767,
+      "step": 2098176
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5638465424017748e-05,
+      "loss": 3.8884,
+      "step": 2098688
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5630079476507228e-05,
+      "loss": 3.8668,
+      "step": 2099200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5621693528996708e-05,
+      "loss": 3.8848,
+      "step": 2099712
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.561332396028992e-05,
+      "loss": 3.868,
+      "step": 2100224
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.56049380127794e-05,
+      "loss": 3.8697,
+      "step": 2100736
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.559655206526888e-05,
+      "loss": 3.8736,
+      "step": 2101248
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5588166117758357e-05,
+      "loss": 3.867,
+      "step": 2101760
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.557979654905157e-05,
+      "loss": 3.8792,
+      "step": 2102272
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.557141060154105e-05,
+      "loss": 3.8882,
+      "step": 2102784
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5563041032834262e-05,
+      "loss": 3.8737,
+      "step": 2103296
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5554655085323742e-05,
+      "loss": 3.8605,
+      "step": 2103808
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5546269137813222e-05,
+      "loss": 3.8707,
+      "step": 2104320
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5537883190302702e-05,
+      "loss": 3.8793,
+      "step": 2104832
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.552949724279218e-05,
+      "loss": 3.8684,
+      "step": 2105344
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5521111295281658e-05,
+      "loss": 3.8793,
+      "step": 2105856
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.5512725347771138e-05,
+      "loss": 3.8737,
+      "step": 2106368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.550433940026062e-05,
+      "loss": 3.8754,
+      "step": 2106880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5495986210357563e-05,
+      "loss": 3.8667,
+      "step": 2107392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5487600262847043e-05,
+      "loss": 3.8582,
+      "step": 2107904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5479214315336523e-05,
+      "loss": 3.8664,
+      "step": 2108416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5470828367826003e-05,
+      "loss": 3.8725,
+      "step": 2108928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5462442420315483e-05,
+      "loss": 3.8883,
+      "step": 2109440
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5454056472804963e-05,
+      "loss": 3.8608,
+      "step": 2109952
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5445670525294443e-05,
+      "loss": 3.8669,
+      "step": 2110464
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5437284577783923e-05,
+      "loss": 3.8721,
+      "step": 2110976
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5428915009077132e-05,
+      "loss": 3.8733,
+      "step": 2111488
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5420529061566612e-05,
+      "loss": 3.8786,
+      "step": 2112000
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5412143114056092e-05,
+      "loss": 3.8692,
+      "step": 2112512
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5403757166545575e-05,
+      "loss": 3.8787,
+      "step": 2113024
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5395387597838784e-05,
+      "loss": 3.8668,
+      "step": 2113536
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5387001650328264e-05,
+      "loss": 3.8712,
+      "step": 2114048
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5378615702817744e-05,
+      "loss": 3.8642,
+      "step": 2114560
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5370229755307224e-05,
+      "loss": 3.8683,
+      "step": 2115072
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5361843807796704e-05,
+      "loss": 3.8748,
+      "step": 2115584
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5353474239089917e-05,
+      "loss": 3.8678,
+      "step": 2116096
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5345088291579397e-05,
+      "loss": 3.8684,
+      "step": 2116608
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5336702344068877e-05,
+      "loss": 3.8607,
+      "step": 2117120
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5328316396558357e-05,
+      "loss": 3.8696,
+      "step": 2117632
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5319930449047833e-05,
+      "loss": 3.8737,
+      "step": 2118144
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5311544501537313e-05,
+      "loss": 3.8725,
+      "step": 2118656
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5303158554026793e-05,
+      "loss": 3.872,
+      "step": 2119168
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5294772606516276e-05,
+      "loss": 3.8794,
+      "step": 2119680
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5286403037809485e-05,
+      "loss": 3.8777,
+      "step": 2120192
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5278017090298965e-05,
+      "loss": 3.8765,
+      "step": 2120704
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5269631142788445e-05,
+      "loss": 3.8754,
+      "step": 2121216
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5261261574081658e-05,
+      "loss": 3.867,
+      "step": 2121728
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5252875626571136e-05,
+      "loss": 3.8763,
+      "step": 2122240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.524450605786435e-05,
+      "loss": 3.8667,
+      "step": 2122752
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.523612011035383e-05,
+      "loss": 3.8669,
+      "step": 2123264
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5227734162843307e-05,
+      "loss": 3.873,
+      "step": 2123776
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5219348215332788e-05,
+      "loss": 3.8741,
+      "step": 2124288
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5210962267822268e-05,
+      "loss": 3.8724,
+      "step": 2124800
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5202576320311748e-05,
+      "loss": 3.8718,
+      "step": 2125312
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5194190372801228e-05,
+      "loss": 3.8756,
+      "step": 2125824
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5185804425290708e-05,
+      "loss": 3.8678,
+      "step": 2126336
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.517741847778019e-05,
+      "loss": 3.878,
+      "step": 2126848
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5169032530269666e-05,
+      "loss": 3.8735,
+      "step": 2127360
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5160646582759146e-05,
+      "loss": 3.8702,
+      "step": 2127872
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5152260635248628e-05,
+      "loss": 3.8443,
+      "step": 2128384
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.514389106654184e-05,
+      "loss": 3.8801,
+      "step": 2128896
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5135505119031317e-05,
+      "loss": 3.8721,
+      "step": 2129408
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5127135550324531e-05,
+      "loss": 3.8856,
+      "step": 2129920
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5118749602814011e-05,
+      "loss": 3.8741,
+      "step": 2130432
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.511036365530349e-05,
+      "loss": 3.8733,
+      "step": 2130944
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.510197770779297e-05,
+      "loss": 3.8783,
+      "step": 2131456
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5093608139086182e-05,
+      "loss": 3.8869,
+      "step": 2131968
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5085222191575662e-05,
+      "loss": 3.8601,
+      "step": 2132480
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.507683624406514e-05,
+      "loss": 3.872,
+      "step": 2132992
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.506845029655462e-05,
+      "loss": 3.8712,
+      "step": 2133504
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.50600643490441e-05,
+      "loss": 3.8763,
+      "step": 2134016
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5051694780337314e-05,
+      "loss": 3.8731,
+      "step": 2134528
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5043308832826791e-05,
+      "loss": 3.8716,
+      "step": 2135040
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.503492288531627e-05,
+      "loss": 3.8714,
+      "step": 2135552
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5026553316609485e-05,
+      "loss": 3.8776,
+      "step": 2136064
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.5018167369098962e-05,
+      "loss": 3.8712,
+      "step": 2136576
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 3.9988739490509033,
+      "eval_runtime": 302.3299,
+      "eval_samples_per_second": 1262.167,
+      "eval_steps_per_second": 39.444,
+      "step": 2136960
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.5009781421588443e-05,
+      "loss": 3.8794,
+      "step": 2137088
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.5001395474077923e-05,
+      "loss": 3.8748,
+      "step": 2137600
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4993009526567403e-05,
+      "loss": 3.8713,
+      "step": 2138112
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4984623579056883e-05,
+      "loss": 3.8758,
+      "step": 2138624
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4976237631546363e-05,
+      "loss": 3.8759,
+      "step": 2139136
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4967851684035845e-05,
+      "loss": 3.877,
+      "step": 2139648
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4959465736525321e-05,
+      "loss": 3.8718,
+      "step": 2140160
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4951079789014801e-05,
+      "loss": 3.8632,
+      "step": 2140672
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4942693841504283e-05,
+      "loss": 3.8667,
+      "step": 2141184
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4934307893993763e-05,
+      "loss": 3.8843,
+      "step": 2141696
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4925921946483243e-05,
+      "loss": 3.8752,
+      "step": 2142208
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4917535998972723e-05,
+      "loss": 3.8683,
+      "step": 2142720
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4909166430265934e-05,
+      "loss": 3.8865,
+      "step": 2143232
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4900780482755413e-05,
+      "loss": 3.864,
+      "step": 2143744
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4892394535244893e-05,
+      "loss": 3.8673,
+      "step": 2144256
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4884008587734375e-05,
+      "loss": 3.8639,
+      "step": 2144768
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4875622640223852e-05,
+      "loss": 3.8758,
+      "step": 2145280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4867236692713332e-05,
+      "loss": 3.8669,
+      "step": 2145792
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4858850745202813e-05,
+      "loss": 3.8704,
+      "step": 2146304
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4850464797692293e-05,
+      "loss": 3.8764,
+      "step": 2146816
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4842078850181773e-05,
+      "loss": 3.8792,
+      "step": 2147328
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4833692902671253e-05,
+      "loss": 3.8782,
+      "step": 2147840
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4825306955160733e-05,
+      "loss": 3.8659,
+      "step": 2148352
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4816921007650211e-05,
+      "loss": 3.8731,
+      "step": 2148864
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4808551438943424e-05,
+      "loss": 3.8731,
+      "step": 2149376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4800181870236635e-05,
+      "loss": 3.8692,
+      "step": 2149888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4791795922726115e-05,
+      "loss": 3.8564,
+      "step": 2150400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4783409975215595e-05,
+      "loss": 3.868,
+      "step": 2150912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4775024027705076e-05,
+      "loss": 3.8752,
+      "step": 2151424
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.4766638080194556e-05,
+      "loss": 3.8564,
+      "step": 2151936
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4758252132684033e-05,
+      "loss": 3.8688,
+      "step": 2152448
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4749882563977247e-05,
+      "loss": 3.8709,
+      "step": 2152960
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4741496616466727e-05,
+      "loss": 3.8766,
+      "step": 2153472
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4733110668956207e-05,
+      "loss": 3.8791,
+      "step": 2153984
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4724724721445685e-05,
+      "loss": 3.871,
+      "step": 2154496
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4716355152738898e-05,
+      "loss": 3.8756,
+      "step": 2155008
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4707969205228377e-05,
+      "loss": 3.8845,
+      "step": 2155520
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4699583257717856e-05,
+      "loss": 3.8493,
+      "step": 2156032
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4691197310207336e-05,
+      "loss": 3.8639,
+      "step": 2156544
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4682827741500548e-05,
+      "loss": 3.8723,
+      "step": 2157056
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.467444179399003e-05,
+      "loss": 3.8571,
+      "step": 2157568
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4666055846479506e-05,
+      "loss": 3.8723,
+      "step": 2158080
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.465768627777272e-05,
+      "loss": 3.8719,
+      "step": 2158592
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.46493003302622e-05,
+      "loss": 3.8686,
+      "step": 2159104
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.464091438275168e-05,
+      "loss": 3.8682,
+      "step": 2159616
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4632528435241159e-05,
+      "loss": 3.8671,
+      "step": 2160128
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4624142487730639e-05,
+      "loss": 3.8705,
+      "step": 2160640
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4615756540220119e-05,
+      "loss": 3.8819,
+      "step": 2161152
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4607370592709599e-05,
+      "loss": 3.8462,
+      "step": 2161664
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4598984645199079e-05,
+      "loss": 3.8695,
+      "step": 2162176
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.459059869768856e-05,
+      "loss": 3.8796,
+      "step": 2162688
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.458222912898177e-05,
+      "loss": 3.8808,
+      "step": 2163200
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.457384318147125e-05,
+      "loss": 3.869,
+      "step": 2163712
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4565457233960731e-05,
+      "loss": 3.8646,
+      "step": 2164224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4557071286450211e-05,
+      "loss": 3.8669,
+      "step": 2164736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4548718096547154e-05,
+      "loss": 3.8585,
+      "step": 2165248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.454033214903663e-05,
+      "loss": 3.8752,
+      "step": 2165760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.453194620152611e-05,
+      "loss": 3.8556,
+      "step": 2166272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4523560254015592e-05,
+      "loss": 3.8776,
+      "step": 2166784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4515174306505072e-05,
+      "loss": 3.8669,
+      "step": 2167296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4506788358994552e-05,
+      "loss": 3.8697,
+      "step": 2167808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4498418790287763e-05,
+      "loss": 3.8604,
+      "step": 2168320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4490032842777243e-05,
+      "loss": 3.8665,
+      "step": 2168832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4481646895266723e-05,
+      "loss": 3.8615,
+      "step": 2169344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4473260947756203e-05,
+      "loss": 3.8588,
+      "step": 2169856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4464875000245685e-05,
+      "loss": 3.8792,
+      "step": 2170368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4456489052735161e-05,
+      "loss": 3.8697,
+      "step": 2170880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4448103105224641e-05,
+      "loss": 3.8585,
+      "step": 2171392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4439717157714123e-05,
+      "loss": 3.8501,
+      "step": 2171904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4431331210203603e-05,
+      "loss": 3.8716,
+      "step": 2172416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4422961641496814e-05,
+      "loss": 3.865,
+      "step": 2172928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4414575693986293e-05,
+      "loss": 3.88,
+      "step": 2173440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4406189746475773e-05,
+      "loss": 3.8641,
+      "step": 2173952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4397803798965253e-05,
+      "loss": 3.8743,
+      "step": 2174464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4389417851454733e-05,
+      "loss": 3.8807,
+      "step": 2174976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4381048282747944e-05,
+      "loss": 3.8649,
+      "step": 2175488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4372662335237424e-05,
+      "loss": 3.8759,
+      "step": 2176000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4364276387726904e-05,
+      "loss": 3.8659,
+      "step": 2176512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4355890440216386e-05,
+      "loss": 3.8644,
+      "step": 2177024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4347504492705866e-05,
+      "loss": 3.8721,
+      "step": 2177536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4339118545195342e-05,
+      "loss": 3.8659,
+      "step": 2178048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4330732597684824e-05,
+      "loss": 3.8702,
+      "step": 2178560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4322363028978036e-05,
+      "loss": 3.8868,
+      "step": 2179072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4313977081467515e-05,
+      "loss": 3.8716,
+      "step": 2179584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4305591133956995e-05,
+      "loss": 3.8554,
+      "step": 2180096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4297205186446475e-05,
+      "loss": 3.8645,
+      "step": 2180608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4288819238935954e-05,
+      "loss": 3.8707,
+      "step": 2181120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4280433291425434e-05,
+      "loss": 3.8704,
+      "step": 2181632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4272047343914916e-05,
+      "loss": 3.8719,
+      "step": 2182144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.4263677775208125e-05,
+      "loss": 3.8711,
+      "step": 2182656
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4255291827697607e-05,
+      "loss": 3.8709,
+      "step": 2183168
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4246905880187087e-05,
+      "loss": 3.863,
+      "step": 2183680
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4238519932676567e-05,
+      "loss": 3.8525,
+      "step": 2184192
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4230133985166047e-05,
+      "loss": 3.8625,
+      "step": 2184704
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4221748037655525e-05,
+      "loss": 3.8661,
+      "step": 2185216
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4213394847752468e-05,
+      "loss": 3.8852,
+      "step": 2185728
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4205008900241948e-05,
+      "loss": 3.8584,
+      "step": 2186240
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4196622952731428e-05,
+      "loss": 3.8629,
+      "step": 2186752
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4188237005220908e-05,
+      "loss": 3.8692,
+      "step": 2187264
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4179851057710388e-05,
+      "loss": 3.8703,
+      "step": 2187776
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.417146511019987e-05,
+      "loss": 3.8763,
+      "step": 2188288
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4163079162689346e-05,
+      "loss": 3.867,
+      "step": 2188800
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4154693215178826e-05,
+      "loss": 3.8674,
+      "step": 2189312
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.414632364647204e-05,
+      "loss": 3.8659,
+      "step": 2189824
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.413793769896152e-05,
+      "loss": 3.8668,
+      "step": 2190336
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4129551751450999e-05,
+      "loss": 3.8606,
+      "step": 2190848
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4121165803940479e-05,
+      "loss": 3.8654,
+      "step": 2191360
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4112779856429959e-05,
+      "loss": 3.8699,
+      "step": 2191872
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4104393908919439e-05,
+      "loss": 3.8624,
+      "step": 2192384
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4096007961408918e-05,
+      "loss": 3.8618,
+      "step": 2192896
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.40876220138984e-05,
+      "loss": 3.8592,
+      "step": 2193408
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.407925244519161e-05,
+      "loss": 3.8674,
+      "step": 2193920
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.407086649768109e-05,
+      "loss": 3.8675,
+      "step": 2194432
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.406248055017057e-05,
+      "loss": 3.8686,
+      "step": 2194944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.405409460266005e-05,
+      "loss": 3.8671,
+      "step": 2195456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4045725033953262e-05,
+      "loss": 3.8721,
+      "step": 2195968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.403735546524647e-05,
+      "loss": 3.878,
+      "step": 2196480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.402896951773595e-05,
+      "loss": 3.87,
+      "step": 2196992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4020583570225432e-05,
+      "loss": 3.8713,
+      "step": 2197504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4012197622714912e-05,
+      "loss": 3.8675,
+      "step": 2198016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.4003811675204392e-05,
+      "loss": 3.8684,
+      "step": 2198528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3995425727693872e-05,
+      "loss": 3.8624,
+      "step": 2199040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.398703978018335e-05,
+      "loss": 3.8642,
+      "step": 2199552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3978670211476563e-05,
+      "loss": 3.8703,
+      "step": 2200064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3970284263966043e-05,
+      "loss": 3.8699,
+      "step": 2200576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3961898316455525e-05,
+      "loss": 3.8702,
+      "step": 2201088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3953512368945001e-05,
+      "loss": 3.8662,
+      "step": 2201600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3945142800238215e-05,
+      "loss": 3.8682,
+      "step": 2202112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3936756852727695e-05,
+      "loss": 3.8618,
+      "step": 2202624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3928370905217175e-05,
+      "loss": 3.8776,
+      "step": 2203136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3919984957706653e-05,
+      "loss": 3.8675,
+      "step": 2203648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3911599010196133e-05,
+      "loss": 3.8668,
+      "step": 2204160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3903213062685613e-05,
+      "loss": 3.8438,
+      "step": 2204672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3894827115175093e-05,
+      "loss": 3.8706,
+      "step": 2205184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3886441167664573e-05,
+      "loss": 3.8732,
+      "step": 2205696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3878071598957784e-05,
+      "loss": 3.881,
+      "step": 2206208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3869685651447264e-05,
+      "loss": 3.8711,
+      "step": 2206720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3861299703936746e-05,
+      "loss": 3.8629,
+      "step": 2207232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3852913756426226e-05,
+      "loss": 3.8737,
+      "step": 2207744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3844527808915706e-05,
+      "loss": 3.8851,
+      "step": 2208256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3836141861405184e-05,
+      "loss": 3.8583,
+      "step": 2208768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3827755913894664e-05,
+      "loss": 3.8717,
+      "step": 2209280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3819402723991607e-05,
+      "loss": 3.8666,
+      "step": 2209792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3811016776481087e-05,
+      "loss": 3.8725,
+      "step": 2210304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3802630828970567e-05,
+      "loss": 3.8679,
+      "step": 2210816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3794244881460047e-05,
+      "loss": 3.8659,
+      "step": 2211328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3785858933949527e-05,
+      "loss": 3.8662,
+      "step": 2211840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3777472986439005e-05,
+      "loss": 3.8716,
+      "step": 2212352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3769087038928485e-05,
+      "loss": 3.8716,
+      "step": 2212864
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 3.997922897338867,
+      "eval_runtime": 302.3397,
+      "eval_samples_per_second": 1262.127,
+      "eval_steps_per_second": 39.442,
+      "step": 2213280
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3760701091417965e-05,
+      "loss": 3.8704,
+      "step": 2213376
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3752315143907447e-05,
+      "loss": 3.8734,
+      "step": 2213888
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3743929196396927e-05,
+      "loss": 3.8638,
+      "step": 2214400
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3735543248886407e-05,
+      "loss": 3.8728,
+      "step": 2214912
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3727157301375887e-05,
+      "loss": 3.8744,
+      "step": 2215424
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3718771353865365e-05,
+      "loss": 3.8709,
+      "step": 2215936
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3710385406354845e-05,
+      "loss": 3.8707,
+      "step": 2216448
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3701999458844325e-05,
+      "loss": 3.856,
+      "step": 2216960
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3693613511333805e-05,
+      "loss": 3.8655,
+      "step": 2217472
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3685227563823286e-05,
+      "loss": 3.8751,
+      "step": 2217984
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3676841616312766e-05,
+      "loss": 3.8703,
+      "step": 2218496
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3668455668802243e-05,
+      "loss": 3.8711,
+      "step": 2219008
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3660086100095457e-05,
+      "loss": 3.8791,
+      "step": 2219520
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3651700152584937e-05,
+      "loss": 3.8615,
+      "step": 2220032
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3643314205074417e-05,
+      "loss": 3.8626,
+      "step": 2220544
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3634928257563895e-05,
+      "loss": 3.8569,
+      "step": 2221056
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3626542310053375e-05,
+      "loss": 3.8714,
+      "step": 2221568
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3618156362542855e-05,
+      "loss": 3.863,
+      "step": 2222080
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3609770415032335e-05,
+      "loss": 3.8638,
+      "step": 2222592
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3601400846325546e-05,
+      "loss": 3.8704,
+      "step": 2223104
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3593014898815026e-05,
+      "loss": 3.8775,
+      "step": 2223616
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3584628951304506e-05,
+      "loss": 3.8709,
+      "step": 2224128
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3576243003793987e-05,
+      "loss": 3.8659,
+      "step": 2224640
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3567857056283467e-05,
+      "loss": 3.8672,
+      "step": 2225152
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3559471108772947e-05,
+      "loss": 3.8736,
+      "step": 2225664
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3551085161262426e-05,
+      "loss": 3.8666,
+      "step": 2226176
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3542699213751906e-05,
+      "loss": 3.8527,
+      "step": 2226688
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3534313266241386e-05,
+      "loss": 3.8674,
+      "step": 2227200
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3525927318730866e-05,
+      "loss": 3.864,
+      "step": 2227712
+    },
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.3517541371220347e-05,
+      "loss": 3.8584,
+      "step": 2228224
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3509155423709827e-05,
+      "loss": 3.8652,
+      "step": 2228736
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3500785855003036e-05,
+      "loss": 3.8673,
+      "step": 2229248
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3492399907492518e-05,
+      "loss": 3.8711,
+      "step": 2229760
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3484013959981998e-05,
+      "loss": 3.8746,
+      "step": 2230272
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3475628012471478e-05,
+      "loss": 3.8719,
+      "step": 2230784
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3467258443764689e-05,
+      "loss": 3.8725,
+      "step": 2231296
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3458872496254169e-05,
+      "loss": 3.879,
+      "step": 2231808
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3450486548743648e-05,
+      "loss": 3.85,
+      "step": 2232320
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3442100601233128e-05,
+      "loss": 3.8612,
+      "step": 2232832
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.343373103252634e-05,
+      "loss": 3.8641,
+      "step": 2233344
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.342534508501582e-05,
+      "loss": 3.8486,
+      "step": 2233856
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.34169591375053e-05,
+      "loss": 3.8694,
+      "step": 2234368
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.340857318999478e-05,
+      "loss": 3.8674,
+      "step": 2234880
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.340020362128799e-05,
+      "loss": 3.866,
+      "step": 2235392
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3391817673777472e-05,
+      "loss": 3.8639,
+      "step": 2235904
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3383431726266951e-05,
+      "loss": 3.8665,
+      "step": 2236416
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3375045778756431e-05,
+      "loss": 3.8652,
+      "step": 2236928
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.336665983124591e-05,
+      "loss": 3.8758,
+      "step": 2237440
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.335827388373539e-05,
+      "loss": 3.8438,
+      "step": 2237952
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.334988793622487e-05,
+      "loss": 3.8592,
+      "step": 2238464
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.334150198871435e-05,
+      "loss": 3.8803,
+      "step": 2238976
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.333313242000756e-05,
+      "loss": 3.8766,
+      "step": 2239488
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3324762851300773e-05,
+      "loss": 3.8652,
+      "step": 2240000
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3316376903790253e-05,
+      "loss": 3.8549,
+      "step": 2240512
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3307990956279731e-05,
+      "loss": 3.8648,
+      "step": 2241024
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3299621387572944e-05,
+      "loss": 3.8535,
+      "step": 2241536
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3291235440062425e-05,
+      "loss": 3.8721,
+      "step": 2242048
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3282849492551905e-05,
+      "loss": 3.8533,
+      "step": 2242560
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3274463545041382e-05,
+      "loss": 3.8693,
+      "step": 2243072
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3266077597530863e-05,
+      "loss": 3.865,
+      "step": 2243584
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3257691650020343e-05,
+      "loss": 3.8674,
+      "step": 2244096
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3249305702509823e-05,
+      "loss": 3.857,
+      "step": 2244608
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3240919754999303e-05,
+      "loss": 3.8645,
+      "step": 2245120
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3232533807488783e-05,
+      "loss": 3.8559,
+      "step": 2245632
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3224164238781994e-05,
+      "loss": 3.8529,
+      "step": 2246144
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3215778291271474e-05,
+      "loss": 3.8722,
+      "step": 2246656
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3207392343760956e-05,
+      "loss": 3.8669,
+      "step": 2247168
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3199006396250436e-05,
+      "loss": 3.8557,
+      "step": 2247680
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3190620448739912e-05,
+      "loss": 3.8475,
+      "step": 2248192
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3182234501229394e-05,
+      "loss": 3.8641,
+      "step": 2248704
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3173864932522606e-05,
+      "loss": 3.8625,
+      "step": 2249216
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3165478985012083e-05,
+      "loss": 3.8788,
+      "step": 2249728
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3157093037501564e-05,
+      "loss": 3.8592,
+      "step": 2250240
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3148707089991044e-05,
+      "loss": 3.8704,
+      "step": 2250752
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3140321142480524e-05,
+      "loss": 3.8763,
+      "step": 2251264
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3131935194970004e-05,
+      "loss": 3.8589,
+      "step": 2251776
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3123549247459484e-05,
+      "loss": 3.87,
+      "step": 2252288
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3115179678752695e-05,
+      "loss": 3.8672,
+      "step": 2252800
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3106793731242175e-05,
+      "loss": 3.865,
+      "step": 2253312
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3098407783731657e-05,
+      "loss": 3.8645,
+      "step": 2253824
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3090021836221137e-05,
+      "loss": 3.8608,
+      "step": 2254336
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3081652267514347e-05,
+      "loss": 3.8694,
+      "step": 2254848
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3073266320003827e-05,
+      "loss": 3.883,
+      "step": 2255360
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3064880372493307e-05,
+      "loss": 3.8675,
+      "step": 2255872
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3056494424982787e-05,
+      "loss": 3.8499,
+      "step": 2256384
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3048108477472266e-05,
+      "loss": 3.862,
+      "step": 2256896
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3039722529961746e-05,
+      "loss": 3.868,
+      "step": 2257408
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3031336582451225e-05,
+      "loss": 3.864,
+      "step": 2257920
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3022950634940705e-05,
+      "loss": 3.8703,
+      "step": 2258432
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 1.3014581066233916e-05,
+      "loss": 3.8643,
+      "step": 2258944
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.3006211497527129e-05,
+      "loss": 3.8682,
+      "step": 2259456
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.299782555001661e-05,
+      "loss": 3.8568,
+      "step": 2259968
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.298943960250609e-05,
+      "loss": 3.8507,
+      "step": 2260480
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2981053654995567e-05,
+      "loss": 3.8594,
+      "step": 2260992
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2972667707485049e-05,
+      "loss": 3.8582,
+      "step": 2261504
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2964298138778261e-05,
+      "loss": 3.8757,
+      "step": 2262016
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2955912191267738e-05,
+      "loss": 3.8602,
+      "step": 2262528
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.294752624375722e-05,
+      "loss": 3.8575,
+      "step": 2263040
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.29391402962467e-05,
+      "loss": 3.8665,
+      "step": 2263552
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2930754348736179e-05,
+      "loss": 3.8618,
+      "step": 2264064
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2922368401225659e-05,
+      "loss": 3.8713,
+      "step": 2264576
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.291398245371514e-05,
+      "loss": 3.8658,
+      "step": 2265088
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.290559650620462e-05,
+      "loss": 3.8621,
+      "step": 2265600
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2897243316301564e-05,
+      "loss": 3.8617,
+      "step": 2266112
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.288885736879104e-05,
+      "loss": 3.8664,
+      "step": 2266624
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.288047142128052e-05,
+      "loss": 3.8562,
+      "step": 2267136
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2872085473770002e-05,
+      "loss": 3.859,
+      "step": 2267648
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2863699526259482e-05,
+      "loss": 3.8678,
+      "step": 2268160
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2855313578748962e-05,
+      "loss": 3.8599,
+      "step": 2268672
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2846927631238442e-05,
+      "loss": 3.8595,
+      "step": 2269184
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.283854168372792e-05,
+      "loss": 3.8557,
+      "step": 2269696
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2830172115021133e-05,
+      "loss": 3.8605,
+      "step": 2270208
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2821786167510613e-05,
+      "loss": 3.8652,
+      "step": 2270720
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2813400220000093e-05,
+      "loss": 3.863,
+      "step": 2271232
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2805014272489571e-05,
+      "loss": 3.8606,
+      "step": 2271744
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2796644703782783e-05,
+      "loss": 3.8707,
+      "step": 2272256
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2788258756272265e-05,
+      "loss": 3.8722,
+      "step": 2272768
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2779872808761745e-05,
+      "loss": 3.866,
+      "step": 2273280
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2771486861251222e-05,
+      "loss": 3.8681,
+      "step": 2273792
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2763117292544436e-05,
+      "loss": 3.8628,
+      "step": 2274304
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2754747723837645e-05,
+      "loss": 3.8643,
+      "step": 2274816
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2746361776327127e-05,
+      "loss": 3.8575,
+      "step": 2275328
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2737975828816607e-05,
+      "loss": 3.8629,
+      "step": 2275840
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2729589881306086e-05,
+      "loss": 3.858,
+      "step": 2276352
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2721203933795566e-05,
+      "loss": 3.8686,
+      "step": 2276864
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2712817986285045e-05,
+      "loss": 3.8671,
+      "step": 2277376
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2704432038774525e-05,
+      "loss": 3.8663,
+      "step": 2277888
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2696046091264005e-05,
+      "loss": 3.8632,
+      "step": 2278400
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2687676522557219e-05,
+      "loss": 3.8563,
+      "step": 2278912
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2679290575046695e-05,
+      "loss": 3.8735,
+      "step": 2279424
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2670904627536175e-05,
+      "loss": 3.8651,
+      "step": 2279936
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2662518680025657e-05,
+      "loss": 3.8625,
+      "step": 2280448
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2654149111318866e-05,
+      "loss": 3.8404,
+      "step": 2280960
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2645763163808346e-05,
+      "loss": 3.863,
+      "step": 2281472
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2637377216297828e-05,
+      "loss": 3.8694,
+      "step": 2281984
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2628991268787308e-05,
+      "loss": 3.8768,
+      "step": 2282496
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2620621700080518e-05,
+      "loss": 3.8663,
+      "step": 2283008
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2612235752569998e-05,
+      "loss": 3.8605,
+      "step": 2283520
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2603849805059478e-05,
+      "loss": 3.8678,
+      "step": 2284032
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2595463857548958e-05,
+      "loss": 3.8804,
+      "step": 2284544
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2587077910038438e-05,
+      "loss": 3.8559,
+      "step": 2285056
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2578724720135382e-05,
+      "loss": 3.8668,
+      "step": 2285568
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2570338772624861e-05,
+      "loss": 3.8648,
+      "step": 2286080
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.256195282511434e-05,
+      "loss": 3.8694,
+      "step": 2286592
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.255356687760382e-05,
+      "loss": 3.8651,
+      "step": 2287104
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.25451809300933e-05,
+      "loss": 3.8624,
+      "step": 2287616
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2536794982582781e-05,
+      "loss": 3.8592,
+      "step": 2288128
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.252842541387599e-05,
+      "loss": 3.8715,
+      "step": 2288640
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.2520039466365472e-05,
+      "loss": 3.8644,
+      "step": 2289152
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 3.9966635704040527,
+      "eval_runtime": 295.2939,
+      "eval_samples_per_second": 1292.241,
+      "eval_steps_per_second": 40.383,
+      "step": 2289600
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2511653518854952e-05,
+      "loss": 3.8576,
+      "step": 2289664
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2503267571344432e-05,
+      "loss": 3.8694,
+      "step": 2290176
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2494881623833912e-05,
+      "loss": 3.861,
+      "step": 2290688
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2486512055127123e-05,
+      "loss": 3.8678,
+      "step": 2291200
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2478126107616603e-05,
+      "loss": 3.8701,
+      "step": 2291712
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2469740160106083e-05,
+      "loss": 3.8709,
+      "step": 2292224
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2461354212595563e-05,
+      "loss": 3.8629,
+      "step": 2292736
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2452968265085043e-05,
+      "loss": 3.853,
+      "step": 2293248
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2444598696378253e-05,
+      "loss": 3.8602,
+      "step": 2293760
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2436212748867735e-05,
+      "loss": 3.8752,
+      "step": 2294272
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2427826801357213e-05,
+      "loss": 3.8658,
+      "step": 2294784
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2419440853846693e-05,
+      "loss": 3.8648,
+      "step": 2295296
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2411071285139906e-05,
+      "loss": 3.8756,
+      "step": 2295808
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2402685337629386e-05,
+      "loss": 3.8605,
+      "step": 2296320
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2394299390118864e-05,
+      "loss": 3.8579,
+      "step": 2296832
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2385913442608346e-05,
+      "loss": 3.8532,
+      "step": 2297344
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2377527495097826e-05,
+      "loss": 3.8649,
+      "step": 2297856
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2369157926391035e-05,
+      "loss": 3.862,
+      "step": 2298368
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2360771978880516e-05,
+      "loss": 3.8595,
+      "step": 2298880
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2352386031369996e-05,
+      "loss": 3.8652,
+      "step": 2299392
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2344000083859476e-05,
+      "loss": 3.8784,
+      "step": 2299904
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2335614136348954e-05,
+      "loss": 3.8666,
+      "step": 2300416
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2327228188838436e-05,
+      "loss": 3.8633,
+      "step": 2300928
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2318842241327916e-05,
+      "loss": 3.8647,
+      "step": 2301440
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2310456293817394e-05,
+      "loss": 3.8635,
+      "step": 2301952
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2302086725110607e-05,
+      "loss": 3.8666,
+      "step": 2302464
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2293700777600087e-05,
+      "loss": 3.8502,
+      "step": 2302976
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.22853312088933e-05,
+      "loss": 3.8585,
+      "step": 2303488
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2276945261382777e-05,
+      "loss": 3.8606,
+      "step": 2304000
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 1.2268559313872257e-05,
+      "loss": 3.8547,
+      "step": 2304512
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2260173366361737e-05,
+      "loss": 3.8565,
+      "step": 2305024
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2251787418851217e-05,
+      "loss": 3.8677,
+      "step": 2305536
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2243401471340697e-05,
+      "loss": 3.8622,
+      "step": 2306048
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2235015523830177e-05,
+      "loss": 3.8717,
+      "step": 2306560
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2226629576319657e-05,
+      "loss": 3.8657,
+      "step": 2307072
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2218276386416599e-05,
+      "loss": 3.8651,
+      "step": 2307584
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.220989043890608e-05,
+      "loss": 3.8731,
+      "step": 2308096
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.220150449139556e-05,
+      "loss": 3.849,
+      "step": 2308608
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.219311854388504e-05,
+      "loss": 3.8593,
+      "step": 2309120
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2184732596374519e-05,
+      "loss": 3.8572,
+      "step": 2309632
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2176346648864e-05,
+      "loss": 3.8504,
+      "step": 2310144
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.216796070135348e-05,
+      "loss": 3.8657,
+      "step": 2310656
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2159574753842959e-05,
+      "loss": 3.8642,
+      "step": 2311168
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2151205185136171e-05,
+      "loss": 3.8581,
+      "step": 2311680
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2142819237625651e-05,
+      "loss": 3.8621,
+      "step": 2312192
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2134433290115131e-05,
+      "loss": 3.863,
+      "step": 2312704
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2126063721408342e-05,
+      "loss": 3.8653,
+      "step": 2313216
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2117677773897822e-05,
+      "loss": 3.8672,
+      "step": 2313728
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2109291826387302e-05,
+      "loss": 3.8452,
+      "step": 2314240
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2100905878876782e-05,
+      "loss": 3.8582,
+      "step": 2314752
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2092519931366262e-05,
+      "loss": 3.8731,
+      "step": 2315264
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2084133983855741e-05,
+      "loss": 3.8723,
+      "step": 2315776
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2075748036345221e-05,
+      "loss": 3.8617,
+      "step": 2316288
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2067378467638432e-05,
+      "loss": 3.8525,
+      "step": 2316800
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2058992520127912e-05,
+      "loss": 3.8617,
+      "step": 2317312
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2050606572617392e-05,
+      "loss": 3.8527,
+      "step": 2317824
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2042237003910605e-05,
+      "loss": 3.8615,
+      "step": 2318336
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2033851056400083e-05,
+      "loss": 3.851,
+      "step": 2318848
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2025465108889563e-05,
+      "loss": 3.8671,
+      "step": 2319360
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2017079161379044e-05,
+      "loss": 3.8643,
+      "step": 2319872
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2008709592672254e-05,
+      "loss": 3.8607,
+      "step": 2320384
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.2000323645161735e-05,
+      "loss": 3.8495,
+      "step": 2320896
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1991937697651215e-05,
+      "loss": 3.8619,
+      "step": 2321408
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1983551750140693e-05,
+      "loss": 3.8517,
+      "step": 2321920
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1975198560237637e-05,
+      "loss": 3.8535,
+      "step": 2322432
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1966812612727117e-05,
+      "loss": 3.8671,
+      "step": 2322944
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1958426665216597e-05,
+      "loss": 3.8633,
+      "step": 2323456
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1950040717706077e-05,
+      "loss": 3.8551,
+      "step": 2323968
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1941654770195557e-05,
+      "loss": 3.8422,
+      "step": 2324480
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1933268822685037e-05,
+      "loss": 3.8614,
+      "step": 2324992
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1924882875174517e-05,
+      "loss": 3.8589,
+      "step": 2325504
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1916496927663996e-05,
+      "loss": 3.8714,
+      "step": 2326016
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1908110980153476e-05,
+      "loss": 3.8567,
+      "step": 2326528
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1899725032642956e-05,
+      "loss": 3.869,
+      "step": 2327040
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1891339085132436e-05,
+      "loss": 3.8681,
+      "step": 2327552
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1882953137621916e-05,
+      "loss": 3.8607,
+      "step": 2328064
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.187459994771886e-05,
+      "loss": 3.8647,
+      "step": 2328576
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.186621400020834e-05,
+      "loss": 3.8597,
+      "step": 2329088
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1857828052697818e-05,
+      "loss": 3.8641,
+      "step": 2329600
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.18494421051873e-05,
+      "loss": 3.8595,
+      "step": 2330112
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.184105615767678e-05,
+      "loss": 3.8596,
+      "step": 2330624
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1832670210166258e-05,
+      "loss": 3.8621,
+      "step": 2331136
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1824284262655738e-05,
+      "loss": 3.8772,
+      "step": 2331648
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.181591469394895e-05,
+      "loss": 3.8624,
+      "step": 2332160
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.180752874643843e-05,
+      "loss": 3.8484,
+      "step": 2332672
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1799142798927908e-05,
+      "loss": 3.8596,
+      "step": 2333184
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.179075685141739e-05,
+      "loss": 3.8638,
+      "step": 2333696
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.178237090390687e-05,
+      "loss": 3.8635,
+      "step": 2334208
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.1773984956396348e-05,
+      "loss": 3.8659,
+      "step": 2334720
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 1.176561538768956e-05,
+      "loss": 3.8615,
+      "step": 2335232
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.175722944017904e-05,
+      "loss": 3.863,
+      "step": 2335744
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.174884349266852e-05,
+      "loss": 3.852,
+      "step": 2336256
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1740473923961731e-05,
+      "loss": 3.8504,
+      "step": 2336768
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1732087976451211e-05,
+      "loss": 3.8516,
+      "step": 2337280
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1723702028940691e-05,
+      "loss": 3.8541,
+      "step": 2337792
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1715316081430171e-05,
+      "loss": 3.8741,
+      "step": 2338304
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1706930133919651e-05,
+      "loss": 3.857,
+      "step": 2338816
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1698544186409131e-05,
+      "loss": 3.8506,
+      "step": 2339328
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1690158238898611e-05,
+      "loss": 3.8651,
+      "step": 2339840
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1681772291388091e-05,
+      "loss": 3.8589,
+      "step": 2340352
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1673386343877571e-05,
+      "loss": 3.8692,
+      "step": 2340864
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1665000396367051e-05,
+      "loss": 3.8617,
+      "step": 2341376
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1656614448856531e-05,
+      "loss": 3.8558,
+      "step": 2341888
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1648228501346011e-05,
+      "loss": 3.8606,
+      "step": 2342400
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1639858932639222e-05,
+      "loss": 3.8627,
+      "step": 2342912
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1631472985128702e-05,
+      "loss": 3.8488,
+      "step": 2343424
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1623087037618182e-05,
+      "loss": 3.8603,
+      "step": 2343936
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1614701090107662e-05,
+      "loss": 3.8584,
+      "step": 2344448
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1606331521400874e-05,
+      "loss": 3.8573,
+      "step": 2344960
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1597945573890354e-05,
+      "loss": 3.8534,
+      "step": 2345472
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1589576005183563e-05,
+      "loss": 3.8571,
+      "step": 2345984
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1581190057673045e-05,
+      "loss": 3.8545,
+      "step": 2346496
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1572820488966256e-05,
+      "loss": 3.8607,
+      "step": 2347008
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1564434541455736e-05,
+      "loss": 3.8615,
+      "step": 2347520
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1556048593945215e-05,
+      "loss": 3.8566,
+      "step": 2348032
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1547662646434695e-05,
+      "loss": 3.8671,
+      "step": 2348544
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1539276698924175e-05,
+      "loss": 3.8702,
+      "step": 2349056
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1530890751413655e-05,
+      "loss": 3.8609,
+      "step": 2349568
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1522504803903135e-05,
+      "loss": 3.8626,
+      "step": 2350080
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1514118856392615e-05,
+      "loss": 3.8607,
+      "step": 2350592
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1505732908882094e-05,
+      "loss": 3.8616,
+      "step": 2351104
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1497346961371575e-05,
+      "loss": 3.8519,
+      "step": 2351616
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1488961013861055e-05,
+      "loss": 3.861,
+      "step": 2352128
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1480591445154266e-05,
+      "loss": 3.8567,
+      "step": 2352640
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1472205497643746e-05,
+      "loss": 3.8637,
+      "step": 2353152
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1463835928936957e-05,
+      "loss": 3.8599,
+      "step": 2353664
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1455449981426437e-05,
+      "loss": 3.8662,
+      "step": 2354176
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1447064033915918e-05,
+      "loss": 3.8587,
+      "step": 2354688
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1438678086405397e-05,
+      "loss": 3.852,
+      "step": 2355200
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1430292138894876e-05,
+      "loss": 3.8715,
+      "step": 2355712
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1421906191384356e-05,
+      "loss": 3.8642,
+      "step": 2356224
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1413536622677567e-05,
+      "loss": 3.856,
+      "step": 2356736
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1405150675167047e-05,
+      "loss": 3.8411,
+      "step": 2357248
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1396764727656529e-05,
+      "loss": 3.8562,
+      "step": 2357760
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1388378780146007e-05,
+      "loss": 3.8676,
+      "step": 2358272
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1379992832635487e-05,
+      "loss": 3.8723,
+      "step": 2358784
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1371606885124967e-05,
+      "loss": 3.863,
+      "step": 2359296
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1363220937614449e-05,
+      "loss": 3.855,
+      "step": 2359808
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1354851368907658e-05,
+      "loss": 3.864,
+      "step": 2360320
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.134646542139714e-05,
+      "loss": 3.8771,
+      "step": 2360832
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.133807947388662e-05,
+      "loss": 3.8567,
+      "step": 2361344
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1329693526376098e-05,
+      "loss": 3.8608,
+      "step": 2361856
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.132132395766931e-05,
+      "loss": 3.8619,
+      "step": 2362368
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1312954388962521e-05,
+      "loss": 3.8638,
+      "step": 2362880
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1304568441452e-05,
+      "loss": 3.8612,
+      "step": 2363392
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.129618249394148e-05,
+      "loss": 3.8598,
+      "step": 2363904
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.128779654643096e-05,
+      "loss": 3.8523,
+      "step": 2364416
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1279426977724172e-05,
+      "loss": 3.8707,
+      "step": 2364928
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.1271041030213653e-05,
+      "loss": 3.8586,
+      "step": 2365440
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 3.995774269104004,
+      "eval_runtime": 297.2083,
+      "eval_samples_per_second": 1283.918,
+      "eval_steps_per_second": 40.123,
+      "step": 2365920
+    }
+  ],
+  "logging_steps": 512,
+  "max_steps": 3052726,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 10,
+  "total_flos": 9.381286249225097e+17,
+  "trial_name": null,
+  "trial_params": null
+}